tigerbeetle-node 0.11.13 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. package/dist/bin/aarch64-linux-gnu/client.node +0 -0
  2. package/dist/bin/aarch64-linux-musl/client.node +0 -0
  3. package/dist/bin/aarch64-macos/client.node +0 -0
  4. package/dist/bin/x86_64-linux-gnu/client.node +0 -0
  5. package/dist/bin/x86_64-linux-musl/client.node +0 -0
  6. package/dist/bin/x86_64-macos/client.node +0 -0
  7. package/dist/index.js +33 -1
  8. package/dist/index.js.map +1 -1
  9. package/package-lock.json +66 -0
  10. package/package.json +6 -16
  11. package/src/index.ts +56 -1
  12. package/src/node.zig +9 -9
  13. package/dist/.client.node.sha256 +0 -1
  14. package/scripts/build_lib.sh +0 -61
  15. package/scripts/download_node_headers.sh +0 -32
  16. package/src/tigerbeetle/scripts/benchmark.bat +0 -55
  17. package/src/tigerbeetle/scripts/benchmark.sh +0 -66
  18. package/src/tigerbeetle/scripts/confirm_image.sh +0 -44
  19. package/src/tigerbeetle/scripts/fail_on_diff.sh +0 -9
  20. package/src/tigerbeetle/scripts/fuzz_loop.sh +0 -15
  21. package/src/tigerbeetle/scripts/fuzz_loop_hash_log.sh +0 -12
  22. package/src/tigerbeetle/scripts/fuzz_unique_errors.sh +0 -7
  23. package/src/tigerbeetle/scripts/install.bat +0 -7
  24. package/src/tigerbeetle/scripts/install.sh +0 -21
  25. package/src/tigerbeetle/scripts/install_zig.bat +0 -113
  26. package/src/tigerbeetle/scripts/install_zig.sh +0 -90
  27. package/src/tigerbeetle/scripts/lint.zig +0 -199
  28. package/src/tigerbeetle/scripts/pre-commit.sh +0 -9
  29. package/src/tigerbeetle/scripts/scripts/benchmark.bat +0 -55
  30. package/src/tigerbeetle/scripts/scripts/benchmark.sh +0 -66
  31. package/src/tigerbeetle/scripts/scripts/confirm_image.sh +0 -44
  32. package/src/tigerbeetle/scripts/scripts/fail_on_diff.sh +0 -9
  33. package/src/tigerbeetle/scripts/scripts/fuzz_loop.sh +0 -15
  34. package/src/tigerbeetle/scripts/scripts/fuzz_loop_hash_log.sh +0 -12
  35. package/src/tigerbeetle/scripts/scripts/fuzz_unique_errors.sh +0 -7
  36. package/src/tigerbeetle/scripts/scripts/install.bat +0 -7
  37. package/src/tigerbeetle/scripts/scripts/install.sh +0 -21
  38. package/src/tigerbeetle/scripts/scripts/install_zig.bat +0 -113
  39. package/src/tigerbeetle/scripts/scripts/install_zig.sh +0 -90
  40. package/src/tigerbeetle/scripts/scripts/lint.zig +0 -199
  41. package/src/tigerbeetle/scripts/scripts/pre-commit.sh +0 -9
  42. package/src/tigerbeetle/scripts/scripts/shellcheck.sh +0 -5
  43. package/src/tigerbeetle/scripts/scripts/tests_on_alpine.sh +0 -10
  44. package/src/tigerbeetle/scripts/scripts/tests_on_ubuntu.sh +0 -14
  45. package/src/tigerbeetle/scripts/scripts/upgrade_ubuntu_kernel.sh +0 -48
  46. package/src/tigerbeetle/scripts/scripts/validate_docs.sh +0 -23
  47. package/src/tigerbeetle/scripts/scripts/vr_state_enumerate +0 -46
  48. package/src/tigerbeetle/scripts/shellcheck.sh +0 -5
  49. package/src/tigerbeetle/scripts/tests_on_alpine.sh +0 -10
  50. package/src/tigerbeetle/scripts/tests_on_ubuntu.sh +0 -14
  51. package/src/tigerbeetle/scripts/upgrade_ubuntu_kernel.sh +0 -48
  52. package/src/tigerbeetle/scripts/validate_docs.sh +0 -23
  53. package/src/tigerbeetle/scripts/vr_state_enumerate +0 -46
  54. package/src/tigerbeetle/src/benchmark.zig +0 -336
  55. package/src/tigerbeetle/src/config.zig +0 -233
  56. package/src/tigerbeetle/src/constants.zig +0 -428
  57. package/src/tigerbeetle/src/ewah.zig +0 -286
  58. package/src/tigerbeetle/src/ewah_benchmark.zig +0 -120
  59. package/src/tigerbeetle/src/ewah_fuzz.zig +0 -130
  60. package/src/tigerbeetle/src/fifo.zig +0 -120
  61. package/src/tigerbeetle/src/io/benchmark.zig +0 -213
  62. package/src/tigerbeetle/src/io/darwin.zig +0 -814
  63. package/src/tigerbeetle/src/io/linux.zig +0 -1071
  64. package/src/tigerbeetle/src/io/test.zig +0 -643
  65. package/src/tigerbeetle/src/io/windows.zig +0 -1183
  66. package/src/tigerbeetle/src/io.zig +0 -34
  67. package/src/tigerbeetle/src/iops.zig +0 -107
  68. package/src/tigerbeetle/src/lsm/README.md +0 -308
  69. package/src/tigerbeetle/src/lsm/binary_search.zig +0 -341
  70. package/src/tigerbeetle/src/lsm/bloom_filter.zig +0 -125
  71. package/src/tigerbeetle/src/lsm/compaction.zig +0 -603
  72. package/src/tigerbeetle/src/lsm/composite_key.zig +0 -77
  73. package/src/tigerbeetle/src/lsm/direction.zig +0 -11
  74. package/src/tigerbeetle/src/lsm/eytzinger.zig +0 -587
  75. package/src/tigerbeetle/src/lsm/eytzinger_benchmark.zig +0 -330
  76. package/src/tigerbeetle/src/lsm/forest.zig +0 -205
  77. package/src/tigerbeetle/src/lsm/forest_fuzz.zig +0 -450
  78. package/src/tigerbeetle/src/lsm/grid.zig +0 -573
  79. package/src/tigerbeetle/src/lsm/groove.zig +0 -1036
  80. package/src/tigerbeetle/src/lsm/k_way_merge.zig +0 -474
  81. package/src/tigerbeetle/src/lsm/level_iterator.zig +0 -332
  82. package/src/tigerbeetle/src/lsm/manifest.zig +0 -617
  83. package/src/tigerbeetle/src/lsm/manifest_level.zig +0 -878
  84. package/src/tigerbeetle/src/lsm/manifest_log.zig +0 -789
  85. package/src/tigerbeetle/src/lsm/manifest_log_fuzz.zig +0 -691
  86. package/src/tigerbeetle/src/lsm/merge_iterator.zig +0 -106
  87. package/src/tigerbeetle/src/lsm/node_pool.zig +0 -235
  88. package/src/tigerbeetle/src/lsm/posted_groove.zig +0 -381
  89. package/src/tigerbeetle/src/lsm/segmented_array.zig +0 -1329
  90. package/src/tigerbeetle/src/lsm/segmented_array_benchmark.zig +0 -148
  91. package/src/tigerbeetle/src/lsm/segmented_array_fuzz.zig +0 -9
  92. package/src/tigerbeetle/src/lsm/set_associative_cache.zig +0 -850
  93. package/src/tigerbeetle/src/lsm/table.zig +0 -1009
  94. package/src/tigerbeetle/src/lsm/table_immutable.zig +0 -192
  95. package/src/tigerbeetle/src/lsm/table_iterator.zig +0 -340
  96. package/src/tigerbeetle/src/lsm/table_mutable.zig +0 -203
  97. package/src/tigerbeetle/src/lsm/test.zig +0 -439
  98. package/src/tigerbeetle/src/lsm/tree.zig +0 -1169
  99. package/src/tigerbeetle/src/lsm/tree_fuzz.zig +0 -479
  100. package/src/tigerbeetle/src/message_bus.zig +0 -1013
  101. package/src/tigerbeetle/src/message_pool.zig +0 -156
  102. package/src/tigerbeetle/src/ring_buffer.zig +0 -399
  103. package/src/tigerbeetle/src/simulator.zig +0 -580
  104. package/src/tigerbeetle/src/state_machine/auditor.zig +0 -578
  105. package/src/tigerbeetle/src/state_machine/workload.zig +0 -883
  106. package/src/tigerbeetle/src/state_machine.zig +0 -2099
  107. package/src/tigerbeetle/src/static_allocator.zig +0 -65
  108. package/src/tigerbeetle/src/stdx.zig +0 -171
  109. package/src/tigerbeetle/src/storage.zig +0 -393
  110. package/src/tigerbeetle/src/testing/cluster/message_bus.zig +0 -82
  111. package/src/tigerbeetle/src/testing/cluster/network.zig +0 -237
  112. package/src/tigerbeetle/src/testing/cluster/state_checker.zig +0 -169
  113. package/src/tigerbeetle/src/testing/cluster/storage_checker.zig +0 -202
  114. package/src/tigerbeetle/src/testing/cluster.zig +0 -444
  115. package/src/tigerbeetle/src/testing/fuzz.zig +0 -140
  116. package/src/tigerbeetle/src/testing/hash_log.zig +0 -66
  117. package/src/tigerbeetle/src/testing/id.zig +0 -99
  118. package/src/tigerbeetle/src/testing/packet_simulator.zig +0 -374
  119. package/src/tigerbeetle/src/testing/priority_queue.zig +0 -645
  120. package/src/tigerbeetle/src/testing/reply_sequence.zig +0 -139
  121. package/src/tigerbeetle/src/testing/state_machine.zig +0 -250
  122. package/src/tigerbeetle/src/testing/storage.zig +0 -757
  123. package/src/tigerbeetle/src/testing/table.zig +0 -247
  124. package/src/tigerbeetle/src/testing/time.zig +0 -84
  125. package/src/tigerbeetle/src/tigerbeetle.zig +0 -227
  126. package/src/tigerbeetle/src/time.zig +0 -112
  127. package/src/tigerbeetle/src/tracer.zig +0 -529
  128. package/src/tigerbeetle/src/unit_tests.zig +0 -40
  129. package/src/tigerbeetle/src/vopr.zig +0 -495
  130. package/src/tigerbeetle/src/vsr/README.md +0 -209
  131. package/src/tigerbeetle/src/vsr/client.zig +0 -544
  132. package/src/tigerbeetle/src/vsr/clock.zig +0 -855
  133. package/src/tigerbeetle/src/vsr/journal.zig +0 -2415
  134. package/src/tigerbeetle/src/vsr/journal_format_fuzz.zig +0 -111
  135. package/src/tigerbeetle/src/vsr/marzullo.zig +0 -309
  136. package/src/tigerbeetle/src/vsr/replica.zig +0 -6616
  137. package/src/tigerbeetle/src/vsr/replica_format.zig +0 -219
  138. package/src/tigerbeetle/src/vsr/superblock.zig +0 -1631
  139. package/src/tigerbeetle/src/vsr/superblock_client_table.zig +0 -256
  140. package/src/tigerbeetle/src/vsr/superblock_free_set.zig +0 -929
  141. package/src/tigerbeetle/src/vsr/superblock_free_set_fuzz.zig +0 -334
  142. package/src/tigerbeetle/src/vsr/superblock_fuzz.zig +0 -390
  143. package/src/tigerbeetle/src/vsr/superblock_manifest.zig +0 -615
  144. package/src/tigerbeetle/src/vsr/superblock_quorums.zig +0 -394
  145. package/src/tigerbeetle/src/vsr/superblock_quorums_fuzz.zig +0 -314
  146. package/src/tigerbeetle/src/vsr.zig +0 -1425
@@ -1,855 +0,0 @@
1
- const std = @import("std");
2
- const assert = std.debug.assert;
3
- const log = std.log.scoped(.clock);
4
- const fmt = std.fmt;
5
-
6
- const constants = @import("../constants.zig");
7
-
8
- const clock_offset_tolerance_max: u64 = constants.clock_offset_tolerance_max_ms * std.time.ns_per_ms;
9
- const epoch_max: u64 = constants.clock_epoch_max_ms * std.time.ns_per_ms;
10
- const window_min: u64 = constants.clock_synchronization_window_min_ms * std.time.ns_per_ms;
11
- const window_max: u64 = constants.clock_synchronization_window_max_ms * std.time.ns_per_ms;
12
-
13
- const Marzullo = @import("marzullo.zig").Marzullo;
14
-
15
- pub fn Clock(comptime Time: type) type {
16
- return struct {
17
- const Self = @This();
18
-
19
- const Sample = struct {
20
- /// The relative difference between our wall clock reading and that of the remote clock source.
21
- clock_offset: i64,
22
- one_way_delay: u64,
23
- };
24
-
25
- const Epoch = struct {
26
- /// The best clock offset sample per remote clock source (with minimum one way delay) collected
27
- /// over the course of a window period of several seconds.
28
- sources: []?Sample,
29
-
30
- /// The total number of samples learned while synchronizing this epoch.
31
- samples: usize,
32
-
33
- /// The monotonic clock timestamp when this epoch began. We use this to measure elapsed time.
34
- monotonic: u64,
35
-
36
- /// The wall clock timestamp when this epoch began. We add the elapsed monotonic time to this
37
- /// plus the synchronized clock offset to arrive at a synchronized realtime timestamp. We
38
- /// capture this realtime when starting the epoch, before we take any samples, to guard against
39
- /// any jumps in the system's realtime clock from impacting our measurements.
40
- realtime: i64,
41
-
42
- /// Once we have enough source clock offset samples in agreement, the epoch is synchronized.
43
- /// We then have lower and upper bounds on the true cluster time, and can install this epoch for
44
- /// subsequent clock readings. This epoch is then valid for several seconds, while clock drift
45
- /// has not had enough time to accumulate into any significant clock skew, and while we collect
46
- /// samples for the next epoch to refresh and replace this one.
47
- synchronized: ?Marzullo.Interval,
48
-
49
- /// A guard to prevent synchronizing too often without having learned any new samples.
50
- learned: bool = false,
51
-
52
- fn elapsed(epoch: *Epoch, clock: *Self) u64 {
53
- return clock.monotonic() - epoch.monotonic;
54
- }
55
-
56
- fn reset(epoch: *Epoch, clock: *Self) void {
57
- std.mem.set(?Sample, epoch.sources, null);
58
- // A replica always has zero clock offset and network delay to its own system time reading:
59
- epoch.sources[clock.replica] = Sample{
60
- .clock_offset = 0,
61
- .one_way_delay = 0,
62
- };
63
- epoch.samples = 1;
64
- epoch.monotonic = clock.monotonic();
65
- epoch.realtime = clock.realtime();
66
- epoch.synchronized = null;
67
- epoch.learned = false;
68
- }
69
-
70
- fn sources_sampled(epoch: *Epoch) usize {
71
- var count: usize = 0;
72
- for (epoch.sources) |sampled| {
73
- if (sampled != null) count += 1;
74
- }
75
- return count;
76
- }
77
- };
78
-
79
- /// The index of the replica using this clock to provide synchronized time.
80
- replica: u8,
81
-
82
- /// The underlying time source for this clock (system time or deterministic time).
83
- time: *Time,
84
-
85
- /// An epoch from which the clock can read synchronized clock timestamps within safe bounds.
86
- /// At least `constants.clock_synchronization_window_min_ms` is needed for this to be ready
87
- /// to use.
88
- epoch: Epoch,
89
-
90
- /// The next epoch (collecting samples and being synchronized) to replace the current epoch.
91
- window: Epoch,
92
-
93
- /// A static allocation to convert window samples into tuple bounds for Marzullo's algorithm.
94
- marzullo_tuples: []Marzullo.Tuple,
95
-
96
- /// A kill switch to revert to unsynchronized realtime.
97
- synchronization_disabled: bool,
98
-
99
- pub fn init(
100
- allocator: std.mem.Allocator,
101
- /// The size of the cluster, i.e. the number of clock sources (including this replica).
102
- replica_count: u8,
103
- replica: u8,
104
- time: *Time,
105
- ) !Self {
106
- assert(replica_count > 0);
107
- assert(replica < replica_count);
108
-
109
- var epoch: Epoch = undefined;
110
- epoch.sources = try allocator.alloc(?Sample, replica_count);
111
- errdefer allocator.free(epoch.sources);
112
-
113
- var window: Epoch = undefined;
114
- window.sources = try allocator.alloc(?Sample, replica_count);
115
- errdefer allocator.free(window.sources);
116
-
117
- // There are two Marzullo tuple bounds (lower and upper) per source clock offset sample:
118
- var marzullo_tuples = try allocator.alloc(Marzullo.Tuple, replica_count * 2);
119
- errdefer allocator.free(marzullo_tuples);
120
-
121
- var self = Self{
122
- .replica = replica,
123
- .time = time,
124
- .epoch = epoch,
125
- .window = window,
126
- .marzullo_tuples = marzullo_tuples,
127
- .synchronization_disabled = replica_count == 1, // A cluster of one cannot synchronize.
128
- };
129
-
130
- // Reset the current epoch to be unsynchronized,
131
- self.epoch.reset(&self);
132
- // and open a new epoch window to start collecting samples...
133
- self.window.reset(&self);
134
-
135
- return self;
136
- }
137
-
138
- pub fn deinit(self: *Self, allocator: std.mem.Allocator) void {
139
- allocator.free(self.epoch.sources);
140
- allocator.free(self.window.sources);
141
- allocator.free(self.marzullo_tuples);
142
- }
143
-
144
- /// Called by `Replica.on_pong()` with:
145
- /// * the index of the `replica` that has replied to our ping with a pong,
146
- /// * our monotonic timestamp `m0` embedded in the ping we sent, carried over into this pong,
147
- /// * the remote replica's `realtime()` timestamp `t1`, and
148
- /// * our monotonic timestamp `m2` as captured by our `Replica.on_pong()` handler.
149
- pub fn learn(self: *Self, replica: u8, m0: u64, t1: i64, m2: u64) void {
150
- if (self.synchronization_disabled) return;
151
-
152
- // A network routing fault must have replayed one of our outbound messages back against us:
153
- if (replica == self.replica) {
154
- log.warn("{}: learn: replica == self.replica", .{self.replica});
155
- return;
156
- }
157
-
158
- // Our m0 and m2 readings should always be monotonically increasing if not equal.
159
- // Crucially, it is possible for a very fast network to have m0 == m2, especially where
160
- // `constants.tick_ms` is at a more course granularity. We must therefore tolerate RTT=0
161
- // or otherwise we would have a liveness bug simply because we would be throwing away
162
- // perfectly good clock samples.
163
- // This condition should never be true. Reject this as a bad sample:
164
- if (m0 > m2) {
165
- log.warn("{}: learn: m0={} > m2={}", .{ self.replica, m0, m2 });
166
- return;
167
- }
168
-
169
- // We may receive delayed packets after a reboot, in which case m0/m2 may be invalid:
170
- if (m0 < self.window.monotonic) {
171
- log.warn("{}: learn: m0={} < window.monotonic={}", .{
172
- self.replica,
173
- m0,
174
- self.window.monotonic,
175
- });
176
- return;
177
- }
178
-
179
- if (m2 < self.window.monotonic) {
180
- log.warn("{}: learn: m2={} < window.monotonic={}", .{
181
- self.replica,
182
- m2,
183
- self.window.monotonic,
184
- });
185
- return;
186
- }
187
-
188
- const elapsed: u64 = m2 - self.window.monotonic;
189
- if (elapsed > window_max) {
190
- log.warn("{}: learn: elapsed={} > window_max={}", .{
191
- self.replica,
192
- elapsed,
193
- window_max,
194
- });
195
- return;
196
- }
197
-
198
- const round_trip_time: u64 = m2 - m0;
199
- const one_way_delay: u64 = round_trip_time / 2;
200
- const t2: i64 = self.window.realtime + @intCast(i64, elapsed);
201
- const clock_offset: i64 = t1 + @intCast(i64, one_way_delay) - t2;
202
- const asymmetric_delay = self.estimate_asymmetric_delay(
203
- replica,
204
- one_way_delay,
205
- clock_offset,
206
- );
207
- const clock_offset_corrected = clock_offset + asymmetric_delay;
208
-
209
- log.debug("{}: learn: replica={} m0={} t1={} m2={} t2={} one_way_delay={} " ++
210
- "asymmetric_delay={} clock_offset={}", .{
211
- self.replica,
212
- replica,
213
- m0,
214
- t1,
215
- m2,
216
- t2,
217
- one_way_delay,
218
- asymmetric_delay,
219
- clock_offset_corrected,
220
- });
221
-
222
- // The less network delay, the more likely we have an accurante clock offset measurement:
223
- self.window.sources[replica] = minimum_one_way_delay(
224
- self.window.sources[replica],
225
- Sample{
226
- .clock_offset = clock_offset_corrected,
227
- .one_way_delay = one_way_delay,
228
- },
229
- );
230
-
231
- self.window.samples += 1;
232
-
233
- // We decouple calls to `synchronize()` so that it's not triggered by these network events.
234
- // Otherwise, excessive duplicate network packets would burn the CPU.
235
- self.window.learned = true;
236
- }
237
-
238
- /// Called by `Replica.on_ping_timeout()` to provide `m0` when we decide to send a ping.
239
- /// Called by `Replica.on_pong()` to provide `m2` when we receive a pong.
240
- /// Called by `Replica.on_commit_message_timeout()` to allow backups to discard
241
- // duplicate/misdirected heartbeats.
242
- pub fn monotonic(self: *Self) u64 {
243
- return self.time.monotonic();
244
- }
245
-
246
- /// Called by `Replica.on_ping()` when responding to a ping with a pong.
247
- /// This should never be used by the state machine, only for measuring clock offsets.
248
- pub fn realtime(self: *Self) i64 {
249
- return self.time.realtime();
250
- }
251
-
252
- /// Called by `StateMachine.prepare_timestamp()` when the primary wants to timestamp a batch.
253
- /// If the primary's clock is not synchronized with the cluster, it must wait until it is.
254
- /// Returns the system time clamped to be within our synchronized lower and upper bounds.
255
- /// This is complementary to NTP and allows clusters with very accurate time to make use of it,
256
- /// while providing guard rails for when NTP is partitioned or unable to correct quickly enough.
257
- pub fn realtime_synchronized(self: *Self) ?i64 {
258
- if (self.synchronization_disabled) {
259
- return self.realtime();
260
- } else if (self.epoch.synchronized) |interval| {
261
- const elapsed = @intCast(i64, self.epoch.elapsed(self));
262
- return std.math.clamp(
263
- self.realtime(),
264
- self.epoch.realtime + elapsed + interval.lower_bound,
265
- self.epoch.realtime + elapsed + interval.upper_bound,
266
- );
267
- } else {
268
- return null;
269
- }
270
- }
271
-
272
- pub fn tick(self: *Self) void {
273
- self.time.tick();
274
-
275
- if (self.synchronization_disabled) return;
276
- self.synchronize();
277
- // Expire the current epoch if successive windows failed to synchronize:
278
- // Gradual clock drift prevents us from using an epoch for more than a few seconds.
279
- if (self.epoch.elapsed(self) >= epoch_max) {
280
- log.err(
281
- "{}: no agreement on cluster time (partitioned or too many clock faults)",
282
- .{self.replica},
283
- );
284
- self.epoch.reset(self);
285
- }
286
- }
287
-
288
- /// Estimates the asymmetric delay for a sample compared to the previous window, according to
289
- /// Algorithm 1 from Section 4.2, "A System for Clock Synchronization in an Internet of Things".
290
- fn estimate_asymmetric_delay(
291
- self: *Self,
292
- replica: u8,
293
- one_way_delay: u64,
294
- clock_offset: i64,
295
- ) i64 {
296
- // Note that `one_way_delay` may be 0 for very fast networks.
297
-
298
- const error_margin = 10 * std.time.ns_per_ms;
299
-
300
- if (self.epoch.sources[replica]) |epoch| {
301
- if (one_way_delay <= epoch.one_way_delay) {
302
- return 0;
303
- } else if (clock_offset > epoch.clock_offset + error_margin) {
304
- // The asymmetric error is on the forward network path.
305
- return 0 - @intCast(i64, one_way_delay - epoch.one_way_delay);
306
- } else if (clock_offset < epoch.clock_offset - error_margin) {
307
- // The asymmetric error is on the reverse network path.
308
- return 0 + @intCast(i64, one_way_delay - epoch.one_way_delay);
309
- } else {
310
- return 0;
311
- }
312
- } else {
313
- return 0;
314
- }
315
- }
316
-
317
- fn synchronize(self: *Self) void {
318
- assert(self.window.synchronized == null);
319
-
320
- // Wait until the window has enough accurate samples:
321
- const elapsed = self.window.elapsed(self);
322
- if (elapsed < window_min) return;
323
- if (elapsed >= window_max) {
324
- // We took too long to synchronize the window, expire stale samples...
325
- const sources_sampled = self.window.sources_sampled();
326
- if (sources_sampled <= @divTrunc(self.window.sources.len, 2)) {
327
- log.err("{}: synchronization failed, partitioned (sources={} samples={})", .{
328
- self.replica,
329
- sources_sampled,
330
- self.window.samples,
331
- });
332
- } else {
333
- log.err("{}: synchronization failed, no agreement (sources={} samples={})", .{
334
- self.replica,
335
- sources_sampled,
336
- self.window.samples,
337
- });
338
- }
339
- self.window.reset(self);
340
- return;
341
- }
342
-
343
- if (!self.window.learned) return;
344
- // Do not reset `learned` any earlier than this (before we have attempted to synchronize).
345
- self.window.learned = false;
346
-
347
- // Starting with the most clock offset tolerance, while we have a majority, find the best
348
- // smallest interval with the least clock offset tolerance, reducing tolerance at each step:
349
- var tolerance: u64 = clock_offset_tolerance_max;
350
- var terminate = false;
351
- var rounds: usize = 0;
352
- // Do at least one round if tolerance=0 and cap the number of rounds to avoid runaway loops.
353
- while (!terminate and rounds < 64) : (tolerance /= 2) {
354
- if (tolerance == 0) terminate = true;
355
- rounds += 1;
356
-
357
- const interval = Marzullo.smallest_interval(self.window_tuples(tolerance));
358
- const majority = interval.sources_true > @divTrunc(self.window.sources.len, 2);
359
- if (!majority) break;
360
-
361
- // The new interval may reduce the number of `sources_true` while also decreasing error.
362
- // In other words, provided we maintain a majority, we prefer tighter tolerance bounds.
363
- self.window.synchronized = interval;
364
- }
365
-
366
- // Wait for more accurate samples or until we timeout the window for lack of majority:
367
- if (self.window.synchronized == null) return;
368
-
369
- var new_window = self.epoch;
370
- new_window.reset(self);
371
- self.epoch = self.window;
372
- self.window = new_window;
373
-
374
- self.after_synchronization();
375
- }
376
-
377
- fn after_synchronization(self: *Self) void {
378
- const new_interval = self.epoch.synchronized.?;
379
-
380
- log.debug("{}: synchronized: truechimers={}/{} clock_offset={}..{} accuracy={}", .{
381
- self.replica,
382
- new_interval.sources_true,
383
- self.epoch.sources.len,
384
- fmt.fmtDurationSigned(new_interval.lower_bound),
385
- fmt.fmtDurationSigned(new_interval.upper_bound),
386
- fmt.fmtDurationSigned(new_interval.upper_bound - new_interval.lower_bound),
387
- });
388
-
389
- const elapsed = @intCast(i64, self.epoch.elapsed(self));
390
- const system = self.realtime();
391
- const lower = self.epoch.realtime + elapsed + new_interval.lower_bound;
392
- const upper = self.epoch.realtime + elapsed + new_interval.upper_bound;
393
- const cluster = std.math.clamp(system, lower, upper);
394
-
395
- if (system == cluster) {} else if (system < lower) {
396
- const delta = lower - system;
397
- if (delta < std.time.ns_per_ms) {
398
- log.info("{}: system time is {} behind", .{
399
- self.replica,
400
- fmt.fmtDurationSigned(delta),
401
- });
402
- } else {
403
- log.err("{}: system time is {} behind, clamping system time to cluster time", .{
404
- self.replica,
405
- fmt.fmtDurationSigned(delta),
406
- });
407
- }
408
- } else {
409
- const delta = system - upper;
410
- if (delta < std.time.ns_per_ms) {
411
- log.info("{}: system time is {} ahead", .{
412
- self.replica,
413
- fmt.fmtDurationSigned(delta),
414
- });
415
- } else {
416
- log.err("{}: system time is {} ahead, clamping system time to cluster time", .{
417
- self.replica,
418
- fmt.fmtDurationSigned(delta),
419
- });
420
- }
421
- }
422
- }
423
-
424
- fn window_tuples(self: *Self, tolerance: u64) []Marzullo.Tuple {
425
- assert(self.window.sources[self.replica].?.clock_offset == 0);
426
- assert(self.window.sources[self.replica].?.one_way_delay == 0);
427
- var count: usize = 0;
428
- for (self.window.sources) |sampled, source| {
429
- if (sampled) |sample| {
430
- self.marzullo_tuples[count] = Marzullo.Tuple{
431
- .source = @intCast(u8, source),
432
- .offset = sample.clock_offset - @intCast(i64, sample.one_way_delay + tolerance),
433
- .bound = .lower,
434
- };
435
- count += 1;
436
- self.marzullo_tuples[count] = Marzullo.Tuple{
437
- .source = @intCast(u8, source),
438
- .offset = sample.clock_offset + @intCast(i64, sample.one_way_delay + tolerance),
439
- .bound = .upper,
440
- };
441
- count += 1;
442
- }
443
- }
444
- return self.marzullo_tuples[0..count];
445
- }
446
-
447
- fn minimum_one_way_delay(a: ?Sample, b: ?Sample) ?Sample {
448
- if (a == null) return b;
449
- if (b == null) return a;
450
- if (a.?.one_way_delay < b.?.one_way_delay) return a;
451
- // Choose B if B's one way delay is less or the same (we assume B is the newer sample):
452
- return b;
453
- }
454
- };
455
- }
456
-
457
- const testing = std.testing;
458
- const OffsetType = @import("../testing/time.zig").OffsetType;
459
- const DeterministicTime = @import("../testing/time.zig").Time;
460
- const DeterministicClock = Clock(DeterministicTime);
461
-
462
- const ClockUnitTestContainer = struct {
463
- const Self = @This();
464
- time: DeterministicTime,
465
- clock: DeterministicClock,
466
- rtt: u64 = 300 * std.time.ns_per_ms,
467
- owd: u64 = 150 * std.time.ns_per_ms,
468
- learn_interval: u64 = 5,
469
-
470
- pub fn init(
471
- self: *Self,
472
- allocator: std.mem.Allocator,
473
- offset_type: OffsetType,
474
- offset_coefficient_A: i64,
475
- offset_coefficient_B: i64,
476
- ) !void {
477
- // TODO(Zig) Use @returnAddress() when available.
478
- self.* = .{
479
- .time = .{
480
- .resolution = std.time.ns_per_s / 2,
481
- .offset_type = offset_type,
482
- .offset_coefficient_A = offset_coefficient_A,
483
- .offset_coefficient_B = offset_coefficient_B,
484
- },
485
- .clock = try DeterministicClock.init(allocator, 3, 0, &self.time),
486
- };
487
- }
488
-
489
- pub fn run_till_tick(self: *Self, tick: u64) void {
490
- while (self.clock.time.ticks < tick) {
491
- self.clock.time.tick();
492
-
493
- if (@mod(self.clock.time.ticks, self.learn_interval) == 0) {
494
- const on_pong_time = self.clock.monotonic();
495
- const m0 = on_pong_time - self.rtt;
496
- const t1 = @intCast(i64, on_pong_time - self.owd);
497
-
498
- self.clock.learn(1, m0, t1, on_pong_time);
499
- self.clock.learn(2, m0, t1, on_pong_time);
500
- }
501
-
502
- self.clock.synchronize();
503
- }
504
- }
505
-
506
- const AssertionPoint = struct {
507
- tick: u64,
508
- expected_offset: i64,
509
- };
510
- pub fn ticks_to_perform_assertions(self: *Self) [3]AssertionPoint {
511
- var ret: [3]AssertionPoint = undefined;
512
- switch (self.clock.time.offset_type) {
513
- .linear => {
514
- // For the first (OWD/drift per tick) ticks, the offset < OWD. This means that the
515
- // Marzullo interval is [0,0] (the offset and OWD are 0 for a replica w.r.t. itself).
516
- // Therefore the offset of `clock.realtime_synchronised` will be the analytically prescribed
517
- // offset at the start of the window.
518
- // Beyond this, the offset > OWD and the Marzullo interval will be from replica 1 and
519
- // replica 2. The `clock.realtime_synchronized` will be clamped to the lower bound.
520
- // Therefore the `clock.realtime_synchronized` will be offset by the OWD.
521
- var threshold = self.owd / @intCast(u64, self.clock.time.offset_coefficient_A);
522
- ret[0] = .{
523
- .tick = threshold,
524
- .expected_offset = self.clock.time.offset(threshold - self.learn_interval),
525
- };
526
- ret[1] = .{
527
- .tick = threshold + 100,
528
- .expected_offset = @intCast(i64, self.owd),
529
- };
530
- ret[2] = .{
531
- .tick = threshold + 200,
532
- .expected_offset = @intCast(i64, self.owd),
533
- };
534
- },
535
- .periodic => {
536
- ret[0] = .{
537
- .tick = @intCast(u64, @divTrunc(self.clock.time.offset_coefficient_B, 4)),
538
- .expected_offset = @intCast(i64, self.owd),
539
- };
540
- ret[1] = .{
541
- .tick = @intCast(u64, @divTrunc(self.clock.time.offset_coefficient_B, 2)),
542
- .expected_offset = 0,
543
- };
544
- ret[2] = .{
545
- .tick = @intCast(u64, @divTrunc(self.clock.time.offset_coefficient_B * 3, 4)),
546
- .expected_offset = -@intCast(i64, self.owd),
547
- };
548
- },
549
- .step => {
550
- ret[0] = .{
551
- .tick = @intCast(u64, self.clock.time.offset_coefficient_B - 10),
552
- .expected_offset = 0,
553
- };
554
- ret[1] = .{
555
- .tick = @intCast(u64, self.clock.time.offset_coefficient_B + 10),
556
- .expected_offset = -@intCast(i64, self.owd),
557
- };
558
- ret[2] = .{
559
- .tick = @intCast(u64, self.clock.time.offset_coefficient_B + 10),
560
- .expected_offset = -@intCast(i64, self.owd),
561
- };
562
- },
563
- .non_ideal => unreachable, // use ideal clocks for the unit tests
564
- }
565
-
566
- return ret;
567
- }
568
- };
569
-
570
- test "ideal clocks get clamped to cluster time" {
571
- std.testing.log_level = .err;
572
- var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
573
- defer arena.deinit();
574
- const allocator = arena.allocator();
575
-
576
- var ideal_constant_drift_clock: ClockUnitTestContainer = undefined;
577
- try ideal_constant_drift_clock.init(
578
- allocator,
579
- OffsetType.linear,
580
- std.time.ns_per_ms, // loses 1ms per tick
581
- 0,
582
- );
583
- var linear_clock_assertion_points = ideal_constant_drift_clock.ticks_to_perform_assertions();
584
- for (linear_clock_assertion_points) |point| {
585
- ideal_constant_drift_clock.run_till_tick(point.tick);
586
- try testing.expectEqual(
587
- point.expected_offset,
588
- @intCast(i64, ideal_constant_drift_clock.clock.monotonic()) -
589
- ideal_constant_drift_clock.clock.realtime_synchronized().?,
590
- );
591
- }
592
-
593
- var ideal_periodic_drift_clock: ClockUnitTestContainer = undefined;
594
- try ideal_periodic_drift_clock.init(
595
- allocator,
596
- OffsetType.periodic,
597
- std.time.ns_per_s, // loses up to 1s
598
- 200, // period of 200 ticks
599
- );
600
- var ideal_periodic_drift_clock_assertion_points =
601
- ideal_periodic_drift_clock.ticks_to_perform_assertions();
602
- for (ideal_periodic_drift_clock_assertion_points) |point| {
603
- ideal_periodic_drift_clock.run_till_tick(point.tick);
604
- try testing.expectEqual(
605
- point.expected_offset,
606
- @intCast(i64, ideal_periodic_drift_clock.clock.monotonic()) -
607
- ideal_periodic_drift_clock.clock.realtime_synchronized().?,
608
- );
609
- }
610
-
611
- var ideal_jumping_clock: ClockUnitTestContainer = undefined;
612
- try ideal_jumping_clock.init(
613
- allocator,
614
- OffsetType.step,
615
- -5 * std.time.ns_per_day, // jumps 5 days ahead.
616
- 49, // after 49 ticks
617
- );
618
- var ideal_jumping_clock_assertion_points = ideal_jumping_clock.ticks_to_perform_assertions();
619
- for (ideal_jumping_clock_assertion_points) |point| {
620
- ideal_jumping_clock.run_till_tick(point.tick);
621
- try testing.expectEqual(
622
- point.expected_offset,
623
- @intCast(i64, ideal_jumping_clock.clock.monotonic()) -
624
- ideal_jumping_clock.clock.realtime_synchronized().?,
625
- );
626
- }
627
- }
628
-
629
- const PacketSimulatorOptions = @import("../testing/packet_simulator.zig").PacketSimulatorOptions;
630
- const PacketSimulatorType = @import("../testing/packet_simulator.zig").PacketSimulatorType;
631
- const Path = @import("../testing/packet_simulator.zig").Path;
632
- const Command = @import("../vsr.zig").Command;
633
- const ClockSimulator = struct {
634
- const Packet = struct {
635
- m0: u64,
636
- t1: ?i64,
637
- clock_simulator: *ClockSimulator,
638
-
639
- /// PacketSimulator requires this function, but we don't actually have anything to deinit.
640
- pub fn deinit(packet: *const Packet) void {
641
- _ = packet;
642
- }
643
- };
644
-
645
- const Options = struct {
646
- ping_timeout: u32,
647
- clock_count: u8,
648
- network_options: PacketSimulatorOptions,
649
- };
650
-
651
- allocator: std.mem.Allocator,
652
- options: Options,
653
- ticks: u64 = 0,
654
- network: PacketSimulatorType(Packet),
655
- times: []DeterministicTime,
656
- clocks: []DeterministicClock,
657
- prng: std.rand.DefaultPrng,
658
-
659
- pub fn init(allocator: std.mem.Allocator, options: Options) !ClockSimulator {
660
- var network = try PacketSimulatorType(Packet).init(allocator, options.network_options);
661
- errdefer network.deinit(allocator);
662
-
663
- var times = try allocator.alloc(DeterministicTime, options.clock_count);
664
- errdefer allocator.free(times);
665
-
666
- var clocks = try allocator.alloc(DeterministicClock, options.clock_count);
667
- errdefer allocator.free(clocks);
668
-
669
- var prng = std.rand.DefaultPrng.init(options.network_options.seed);
670
-
671
- for (clocks) |*clock, replica| {
672
- errdefer for (clocks[0..replica]) |*c| c.deinit(allocator);
673
-
674
- const amplitude = prng.random().intRangeAtMost(i64, -10, 10) * std.time.ns_per_s;
675
- const phase = prng.random().intRangeAtMost(i64, 100, 1000) +
676
- @floatToInt(i64, prng.random().floatNorm(f64) * 50);
677
- times[replica] = .{
678
- .resolution = std.time.ns_per_s / 2, // delta_t = 0.5s
679
- .offset_type = OffsetType.non_ideal,
680
- .offset_coefficient_A = amplitude,
681
- .offset_coefficient_B = phase,
682
- .offset_coefficient_C = 10,
683
- };
684
-
685
- clock.* = try DeterministicClock.init(
686
- allocator,
687
- options.clock_count,
688
- @intCast(u8, replica),
689
- &times[replica],
690
- );
691
- errdefer clock.deinit(allocator);
692
- }
693
- errdefer for (clocks) |*clock| clock.deinit(allocator);
694
-
695
- return ClockSimulator{
696
- .allocator = allocator,
697
- .options = options,
698
- .network = network,
699
- .times = times,
700
- .clocks = clocks,
701
- .prng = prng,
702
- };
703
- }
704
-
705
- pub fn deinit(self: *ClockSimulator) void {
706
- for (self.clocks) |*clock| clock.deinit(self.allocator);
707
- self.allocator.free(self.clocks);
708
- self.allocator.free(self.times);
709
- self.network.deinit(self.allocator);
710
- }
711
-
712
- pub fn tick(self: *ClockSimulator) void {
713
- self.ticks += 1;
714
- self.network.tick();
715
- for (self.clocks) |*clock| {
716
- clock.tick();
717
- }
718
-
719
- for (self.clocks) |*clock| {
720
- if (clock.time.ticks % self.options.ping_timeout == 0) {
721
- const m0 = clock.monotonic();
722
- for (self.clocks) |_, target| {
723
- if (target != clock.replica) {
724
- self.network.submit_packet(
725
- .{
726
- .m0 = m0,
727
- .t1 = null,
728
- .clock_simulator = self,
729
- },
730
- ClockSimulator.handle_packet,
731
- .{
732
- .source = clock.replica,
733
- .target = @intCast(u8, target),
734
- },
735
- );
736
- }
737
- }
738
- }
739
- }
740
- }
741
-
742
- fn handle_packet(packet: Packet, path: Path) void {
743
- const self = packet.clock_simulator;
744
- const target = &self.clocks[path.target];
745
-
746
- if (packet.t1) |t1| {
747
- target.learn(
748
- path.source,
749
- packet.m0,
750
- t1,
751
- target.monotonic(),
752
- );
753
- } else {
754
- self.network.submit_packet(
755
- .{
756
- .m0 = packet.m0,
757
- .t1 = target.realtime(),
758
- .clock_simulator = self,
759
- },
760
- ClockSimulator.handle_packet,
761
- .{
762
- // send the packet back to where it came from.
763
- .source = path.target,
764
- .target = path.source,
765
- },
766
- );
767
- }
768
- }
769
- };
770
-
771
- test "clock: fuzz test" {
772
- std.testing.log_level = .err; // silence all clock logs
773
-
774
- const ticks_max: u64 = 1_000_000;
775
- const clock_count: u8 = 3;
776
- const SystemTime = @import("../testing/time.zig").Time;
777
- var system_time = SystemTime{
778
- .resolution = constants.tick_ms * std.time.ns_per_ms,
779
- .offset_type = .linear,
780
- .offset_coefficient_A = 0,
781
- .offset_coefficient_B = 0,
782
- };
783
- var seed = @intCast(u64, system_time.realtime());
784
- var min_sync_error: u64 = 1_000_000_000;
785
- var max_sync_error: u64 = 0;
786
- var max_clock_offset: u64 = 0;
787
- var min_clock_offset: u64 = 1_000_000_000;
788
- var simulator = try ClockSimulator.init(std.testing.allocator, .{
789
- .network_options = .{
790
- .replica_count = 3,
791
- .client_count = 0,
792
- .seed = seed,
793
-
794
- .one_way_delay_mean = 25,
795
- .one_way_delay_min = 10,
796
- .packet_loss_probability = 10,
797
- .path_maximum_capacity = 20,
798
- .path_clog_duration_mean = 200,
799
- .path_clog_probability = 2,
800
- .packet_replay_probability = 2,
801
-
802
- .partition_mode = .isolate_single,
803
- .partition_probability = 25,
804
- .unpartition_probability = 5,
805
- .partition_stability = 100,
806
- .unpartition_stability = 10,
807
- },
808
- .clock_count = clock_count,
809
- .ping_timeout = 20,
810
- });
811
- defer simulator.deinit();
812
-
813
- var clock_ticks_without_synchronization = [_]u32{0} ** clock_count;
814
- while (simulator.ticks < ticks_max) {
815
- simulator.tick();
816
-
817
- for (simulator.clocks) |*clock, index| {
818
- var offset = clock.time.offset(simulator.ticks);
819
- var abs_offset = if (offset >= 0) @intCast(u64, offset) else @intCast(u64, -offset);
820
- max_clock_offset = if (abs_offset > max_clock_offset) abs_offset else max_clock_offset;
821
- min_clock_offset = if (abs_offset < min_clock_offset) abs_offset else min_clock_offset;
822
-
823
- var synced_time = clock.realtime_synchronized() orelse {
824
- clock_ticks_without_synchronization[index] += 1;
825
- continue;
826
- };
827
-
828
- for (simulator.clocks) |*other_clock, other_clock_index| {
829
- if (index == other_clock_index) continue;
830
- var other_clock_sync_time = other_clock.realtime_synchronized() orelse {
831
- continue;
832
- };
833
- var err: i64 = synced_time - other_clock_sync_time;
834
- var abs_err: u64 = if (err >= 0) @intCast(u64, err) else @intCast(u64, -err);
835
- max_sync_error = if (abs_err > max_sync_error) abs_err else max_sync_error;
836
- min_sync_error = if (abs_err < min_sync_error) abs_err else min_sync_error;
837
- }
838
- }
839
- }
840
-
841
- std.debug.print("seed={}, max ticks={}, clock count={}\n", .{
842
- seed,
843
- ticks_max,
844
- clock_count,
845
- });
846
- std.debug.print("absolute clock offsets with respect to test time:\n", .{});
847
- std.debug.print("maximum={}\n", .{fmt.fmtDurationSigned(@intCast(i64, max_clock_offset))});
848
- std.debug.print("minimum={}\n", .{fmt.fmtDurationSigned(@intCast(i64, min_clock_offset))});
849
- std.debug.print("\nabsolute synchronization errors between clocks:\n", .{});
850
- std.debug.print("maximum={}\n", .{fmt.fmtDurationSigned(@intCast(i64, max_sync_error))});
851
- std.debug.print("minimum={}\n", .{fmt.fmtDurationSigned(@intCast(i64, min_sync_error))});
852
- std.debug.print("clock ticks without synchronization={d}\n", .{
853
- clock_ticks_without_synchronization,
854
- });
855
- }