tigerbeetle-node 0.3.3 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -7
- package/dist/benchmark.js +1 -1
- package/dist/benchmark.js.map +1 -1
- package/dist/index.d.ts +22 -20
- package/dist/index.js +40 -18
- package/dist/index.js.map +1 -1
- package/dist/test.js +13 -1
- package/dist/test.js.map +1 -1
- package/package.json +12 -12
- package/scripts/postinstall.sh +2 -2
- package/src/benchmark.ts +4 -4
- package/src/index.ts +35 -9
- package/src/node.zig +139 -28
- package/src/test.ts +19 -5
- package/src/tigerbeetle/scripts/benchmark.sh +10 -3
- package/src/tigerbeetle/scripts/install.sh +2 -2
- package/src/tigerbeetle/scripts/install_zig.bat +109 -0
- package/src/tigerbeetle/scripts/install_zig.sh +21 -4
- package/src/tigerbeetle/scripts/vopr.bat +48 -0
- package/src/tigerbeetle/scripts/vopr.sh +33 -0
- package/src/tigerbeetle/src/benchmark.zig +74 -42
- package/src/tigerbeetle/src/cli.zig +136 -83
- package/src/tigerbeetle/src/config.zig +80 -26
- package/src/tigerbeetle/src/demo.zig +101 -78
- package/src/tigerbeetle/src/demo_01_create_accounts.zig +2 -7
- package/src/tigerbeetle/src/demo_02_lookup_accounts.zig +2 -7
- package/src/tigerbeetle/src/demo_03_create_transfers.zig +2 -7
- package/src/tigerbeetle/src/demo_04_create_transfers_two_phase_commit.zig +2 -5
- package/src/tigerbeetle/src/demo_05_accept_transfers.zig +2 -7
- package/src/tigerbeetle/src/demo_06_reject_transfers.zig +2 -7
- package/src/tigerbeetle/src/demo_07_lookup_transfers.zig +8 -0
- package/src/tigerbeetle/src/fifo.zig +20 -11
- package/src/tigerbeetle/src/io.zig +35 -22
- package/src/tigerbeetle/src/io_darwin.zig +701 -0
- package/src/tigerbeetle/src/main.zig +72 -25
- package/src/tigerbeetle/src/message_bus.zig +379 -456
- package/src/tigerbeetle/src/message_pool.zig +3 -3
- package/src/tigerbeetle/src/ring_buffer.zig +192 -37
- package/src/tigerbeetle/src/simulator.zig +317 -0
- package/src/tigerbeetle/src/state_machine.zig +846 -38
- package/src/tigerbeetle/src/storage.zig +488 -90
- package/src/tigerbeetle/src/test/cluster.zig +221 -0
- package/src/tigerbeetle/src/test/message_bus.zig +92 -0
- package/src/tigerbeetle/src/test/network.zig +182 -0
- package/src/tigerbeetle/src/test/packet_simulator.zig +371 -0
- package/src/tigerbeetle/src/test/state_checker.zig +142 -0
- package/src/tigerbeetle/src/test/state_machine.zig +71 -0
- package/src/tigerbeetle/src/test/storage.zig +375 -0
- package/src/tigerbeetle/src/test/time.zig +84 -0
- package/src/tigerbeetle/src/tigerbeetle.zig +6 -3
- package/src/tigerbeetle/src/time.zig +65 -0
- package/src/tigerbeetle/src/unit_tests.zig +14 -0
- package/src/tigerbeetle/src/vsr/client.zig +519 -0
- package/src/tigerbeetle/src/vsr/clock.zig +829 -0
- package/src/tigerbeetle/src/vsr/journal.zig +1368 -0
- package/src/tigerbeetle/src/vsr/marzullo.zig +306 -0
- package/src/tigerbeetle/src/vsr/replica.zig +4248 -0
- package/src/tigerbeetle/src/vsr.zig +601 -0
- package/src/tigerbeetle/LICENSE +0 -177
- package/src/tigerbeetle/README.md +0 -116
- package/src/tigerbeetle/src/client.zig +0 -319
- package/src/tigerbeetle/src/concurrent_ranges.zig +0 -162
- package/src/tigerbeetle/src/fixed_array_list.zig +0 -53
- package/src/tigerbeetle/src/io_async.zig +0 -600
- package/src/tigerbeetle/src/journal.zig +0 -567
- package/src/tigerbeetle/src/test_client.zig +0 -41
- package/src/tigerbeetle/src/test_main.zig +0 -118
- package/src/tigerbeetle/src/test_message_bus.zig +0 -132
- package/src/tigerbeetle/src/vr/journal.zig +0 -672
- package/src/tigerbeetle/src/vr/replica.zig +0 -3061
- package/src/tigerbeetle/src/vr.zig +0 -374
|
@@ -0,0 +1,375 @@
|
|
|
1
|
+
const std = @import("std");
|
|
2
|
+
const assert = std.debug.assert;
|
|
3
|
+
const math = std.math;
|
|
4
|
+
const mem = std.mem;
|
|
5
|
+
|
|
6
|
+
const config = @import("../config.zig");
|
|
7
|
+
const vsr = @import("../vsr.zig");
|
|
8
|
+
|
|
9
|
+
const log = std.log.scoped(.storage);
|
|
10
|
+
|
|
11
|
+
// TODOs:
|
|
12
|
+
// less than a majority of replicas may have corruption
|
|
13
|
+
// have an option to enable/disable the following corruption types:
|
|
14
|
+
// bitrot
|
|
15
|
+
// misdirected read/write
|
|
16
|
+
// corrupt sector
|
|
17
|
+
// latent sector error
|
|
18
|
+
// - emulate by zeroing sector, as this is how we handle this in the real Storage implementation
|
|
19
|
+
// - likely that surrounding sectors also corrupt
|
|
20
|
+
// - likely that stuff written at the same time is also corrupt even if written to a far away sector
|
|
21
|
+
pub const Storage = struct {
|
|
22
|
+
/// Options for fault injection during fuzz testing
|
|
23
|
+
pub const Options = struct {
|
|
24
|
+
/// Seed for the storage PRNG
|
|
25
|
+
seed: u64,
|
|
26
|
+
|
|
27
|
+
/// Minimum number of ticks it may take to read data.
|
|
28
|
+
read_latency_min: u64,
|
|
29
|
+
/// Average number of ticks it may take to read data. Must be >= read_latency_min.
|
|
30
|
+
read_latency_mean: u64,
|
|
31
|
+
/// Minimum number of ticks it may take to write data.
|
|
32
|
+
write_latency_min: u64,
|
|
33
|
+
/// Average number of ticks it may take to write data. Must be >= write_latency_min.
|
|
34
|
+
write_latency_mean: u64,
|
|
35
|
+
|
|
36
|
+
/// Chance out of 100 that a read will return incorrect data, if the target memory is within
|
|
37
|
+
/// the faulty area of this replica.
|
|
38
|
+
read_fault_probability: u8,
|
|
39
|
+
/// Chance out of 100 that a read will return incorrect data, if the target memory is within
|
|
40
|
+
/// the faulty area of this replica.
|
|
41
|
+
write_fault_probability: u8,
|
|
42
|
+
};
|
|
43
|
+
|
|
44
|
+
/// See usage in Journal.write_sectors() for details.
|
|
45
|
+
/// TODO: allow testing in both modes.
|
|
46
|
+
pub const synchronicity: enum {
|
|
47
|
+
always_synchronous,
|
|
48
|
+
always_asynchronous,
|
|
49
|
+
} = .always_asynchronous;
|
|
50
|
+
|
|
51
|
+
pub const Read = struct {
|
|
52
|
+
callback: fn (read: *Storage.Read) void,
|
|
53
|
+
buffer: []u8,
|
|
54
|
+
offset: u64,
|
|
55
|
+
/// Tick at which this read is considered "completed" and the callback should be called.
|
|
56
|
+
done_at_tick: u64,
|
|
57
|
+
|
|
58
|
+
fn less_than(storage: *Read, other: *Read) math.Order {
|
|
59
|
+
return math.order(storage.done_at_tick, other.done_at_tick);
|
|
60
|
+
}
|
|
61
|
+
};
|
|
62
|
+
|
|
63
|
+
pub const Write = struct {
|
|
64
|
+
callback: fn (write: *Storage.Write) void,
|
|
65
|
+
buffer: []const u8,
|
|
66
|
+
offset: u64,
|
|
67
|
+
/// Tick at which this write is considered "completed" and the callback should be called.
|
|
68
|
+
done_at_tick: u64,
|
|
69
|
+
|
|
70
|
+
fn less_than(storage: *Write, other: *Write) math.Order {
|
|
71
|
+
return math.order(storage.done_at_tick, other.done_at_tick);
|
|
72
|
+
}
|
|
73
|
+
};
|
|
74
|
+
|
|
75
|
+
/// Faulty areas are always sized to message_size_max
|
|
76
|
+
/// If the faulty areas of all replicas are superimposed, the padding between them is always message_size_max.
|
|
77
|
+
/// For a single replica, the padding between faulty areas depends on the number of other replicas.
|
|
78
|
+
pub const FaultyAreas = struct {
|
|
79
|
+
first_offset: u64,
|
|
80
|
+
period: u64,
|
|
81
|
+
};
|
|
82
|
+
|
|
83
|
+
memory: []align(config.sector_size) u8,
|
|
84
|
+
size: u64,
|
|
85
|
+
|
|
86
|
+
options: Options,
|
|
87
|
+
replica_index: u8,
|
|
88
|
+
prng: std.rand.DefaultPrng,
|
|
89
|
+
|
|
90
|
+
// We can't allow storage faults for the same message in a majority of
|
|
91
|
+
// the replicas as that would make recovery impossible. Instead, we only
|
|
92
|
+
// allow faults in certian areas which differ between replicas.
|
|
93
|
+
faulty_areas: FaultyAreas,
|
|
94
|
+
|
|
95
|
+
reads: std.PriorityQueue(*Storage.Read),
|
|
96
|
+
writes: std.PriorityQueue(*Storage.Write),
|
|
97
|
+
|
|
98
|
+
ticks: u64 = 0,
|
|
99
|
+
|
|
100
|
+
pub fn init(
|
|
101
|
+
allocator: *mem.Allocator,
|
|
102
|
+
size: u64,
|
|
103
|
+
options: Storage.Options,
|
|
104
|
+
replica_index: u8,
|
|
105
|
+
faulty_areas: FaultyAreas,
|
|
106
|
+
) !Storage {
|
|
107
|
+
assert(options.write_latency_mean >= options.write_latency_min);
|
|
108
|
+
assert(options.read_latency_mean >= options.read_latency_min);
|
|
109
|
+
|
|
110
|
+
const memory = try allocator.allocAdvanced(u8, config.sector_size, size, .exact);
|
|
111
|
+
errdefer allocator.free(memory);
|
|
112
|
+
// TODO: random data
|
|
113
|
+
mem.set(u8, memory, 0);
|
|
114
|
+
|
|
115
|
+
var reads = std.PriorityQueue(*Storage.Read).init(allocator, Storage.Read.less_than);
|
|
116
|
+
errdefer reads.deinit();
|
|
117
|
+
try reads.ensureCapacity(config.io_depth_read);
|
|
118
|
+
|
|
119
|
+
var writes = std.PriorityQueue(*Storage.Write).init(allocator, Storage.Write.less_than);
|
|
120
|
+
errdefer writes.deinit();
|
|
121
|
+
try writes.ensureCapacity(config.io_depth_write);
|
|
122
|
+
|
|
123
|
+
return Storage{
|
|
124
|
+
.memory = memory,
|
|
125
|
+
.size = size,
|
|
126
|
+
.options = options,
|
|
127
|
+
.replica_index = replica_index,
|
|
128
|
+
.prng = std.rand.DefaultPrng.init(options.seed),
|
|
129
|
+
.faulty_areas = faulty_areas,
|
|
130
|
+
.reads = reads,
|
|
131
|
+
.writes = writes,
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
/// Cancel any currently in progress reads/writes but leave the stored data untouched.
|
|
136
|
+
pub fn reset(storage: *Storage) void {
|
|
137
|
+
storage.reads.len = 0;
|
|
138
|
+
storage.writes.len = 0;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
pub fn deinit(storage: *Storage, allocator: *mem.Allocator) void {
|
|
142
|
+
allocator.free(storage.memory);
|
|
143
|
+
storage.reads.deinit();
|
|
144
|
+
storage.writes.deinit();
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
pub fn tick(storage: *Storage) void {
|
|
148
|
+
storage.ticks += 1;
|
|
149
|
+
|
|
150
|
+
while (storage.reads.peek()) |read| {
|
|
151
|
+
if (read.done_at_tick > storage.ticks) break;
|
|
152
|
+
_ = storage.reads.remove();
|
|
153
|
+
storage.read_sectors_finish(read);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
while (storage.writes.peek()) |write| {
|
|
157
|
+
if (write.done_at_tick > storage.ticks) break;
|
|
158
|
+
_ = storage.writes.remove();
|
|
159
|
+
storage.write_sectors_finish(write);
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
pub fn read_sectors(
|
|
164
|
+
storage: *Storage,
|
|
165
|
+
callback: fn (read: *Storage.Read) void,
|
|
166
|
+
read: *Storage.Read,
|
|
167
|
+
buffer: []u8,
|
|
168
|
+
offset: u64,
|
|
169
|
+
) void {
|
|
170
|
+
storage.assert_bounds_and_alignment(buffer, offset);
|
|
171
|
+
|
|
172
|
+
read.* = .{
|
|
173
|
+
.callback = callback,
|
|
174
|
+
.buffer = buffer,
|
|
175
|
+
.offset = offset,
|
|
176
|
+
.done_at_tick = storage.ticks + storage.read_latency(),
|
|
177
|
+
};
|
|
178
|
+
|
|
179
|
+
// We ensure the capacity is sufficient for config.io_depth_read in init()
|
|
180
|
+
storage.reads.add(read) catch unreachable;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
fn read_sectors_finish(storage: *Storage, read: *Storage.Read) void {
|
|
184
|
+
const faulty = storage.faulty_sectors(read.offset, read.buffer.len);
|
|
185
|
+
if (faulty.len > 0 and storage.x_in_100(storage.options.read_fault_probability)) {
|
|
186
|
+
// Randomly corrupt one of the faulty sectors the read targeted
|
|
187
|
+
// TODO: inject more realistic and varied storage faults as described above.
|
|
188
|
+
const sector_count = @divExact(faulty.len, config.sector_size);
|
|
189
|
+
const faulty_sector = storage.prng.random.uintLessThan(u64, sector_count);
|
|
190
|
+
const faulty_sector_offset = faulty_sector * config.sector_size;
|
|
191
|
+
const faulty_sector_bytes = faulty[faulty_sector_offset..][0..config.sector_size];
|
|
192
|
+
|
|
193
|
+
log.info("corrupting sector at offset {} during read by replica {}", .{
|
|
194
|
+
faulty_sector_offset,
|
|
195
|
+
storage.replica_index,
|
|
196
|
+
});
|
|
197
|
+
|
|
198
|
+
storage.prng.random.bytes(faulty_sector_bytes);
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
mem.copy(u8, read.buffer, storage.memory[read.offset..][0..read.buffer.len]);
|
|
202
|
+
read.callback(read);
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
pub fn write_sectors(
|
|
206
|
+
storage: *Storage,
|
|
207
|
+
callback: fn (write: *Storage.Write) void,
|
|
208
|
+
write: *Storage.Write,
|
|
209
|
+
buffer: []const u8,
|
|
210
|
+
offset: u64,
|
|
211
|
+
) void {
|
|
212
|
+
storage.assert_bounds_and_alignment(buffer, offset);
|
|
213
|
+
|
|
214
|
+
write.* = .{
|
|
215
|
+
.callback = callback,
|
|
216
|
+
.buffer = buffer,
|
|
217
|
+
.offset = offset,
|
|
218
|
+
.done_at_tick = storage.ticks + storage.write_latency(),
|
|
219
|
+
};
|
|
220
|
+
|
|
221
|
+
// We ensure the capacity is sufficient for config.io_depth_write in init()
|
|
222
|
+
storage.writes.add(write) catch unreachable;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
fn write_sectors_finish(storage: *Storage, write: *Storage.Write) void {
|
|
226
|
+
mem.copy(u8, storage.memory[write.offset..][0..write.buffer.len], write.buffer);
|
|
227
|
+
|
|
228
|
+
const faulty = storage.faulty_sectors(write.offset, write.buffer.len);
|
|
229
|
+
if (faulty.len > 0 and storage.x_in_100(storage.options.write_fault_probability)) {
|
|
230
|
+
// Randomly corrupt one of the faulty sectors the write targeted
|
|
231
|
+
// TODO: inject more realistic and varied storage faults as described above.
|
|
232
|
+
const sector_count = @divExact(faulty.len, config.sector_size);
|
|
233
|
+
const faulty_sector = storage.prng.random.uintLessThan(u64, sector_count);
|
|
234
|
+
const faulty_sector_offset = faulty_sector * config.sector_size;
|
|
235
|
+
const faulty_sector_bytes = faulty[faulty_sector_offset..][0..config.sector_size];
|
|
236
|
+
|
|
237
|
+
log.info("corrupting sector at offset {} during write by replica {}", .{
|
|
238
|
+
faulty_sector_offset,
|
|
239
|
+
storage.replica_index,
|
|
240
|
+
});
|
|
241
|
+
|
|
242
|
+
storage.prng.random.bytes(faulty_sector_bytes);
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
write.callback(write);
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
fn assert_bounds_and_alignment(storage: *Storage, buffer: []const u8, offset: u64) void {
|
|
249
|
+
assert(buffer.len > 0);
|
|
250
|
+
assert(offset + buffer.len <= storage.size);
|
|
251
|
+
|
|
252
|
+
// Ensure that the read or write is aligned correctly for Direct I/O:
|
|
253
|
+
// If this is not the case, the underlying syscall will return EINVAL.
|
|
254
|
+
assert(@mod(@ptrToInt(buffer.ptr), config.sector_size) == 0);
|
|
255
|
+
assert(@mod(buffer.len, config.sector_size) == 0);
|
|
256
|
+
assert(@mod(offset, config.sector_size) == 0);
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
fn read_latency(storage: *Storage) u64 {
|
|
260
|
+
return storage.latency(storage.options.read_latency_min, storage.options.read_latency_mean);
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
fn write_latency(storage: *Storage) u64 {
|
|
264
|
+
return storage.latency(storage.options.write_latency_min, storage.options.write_latency_mean);
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
fn latency(storage: *Storage, min: u64, mean: u64) u64 {
|
|
268
|
+
return min + @floatToInt(u64, @intToFloat(f64, mean - min) * storage.prng.random.floatExp(f64));
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
/// Return true with probability x/100.
|
|
272
|
+
fn x_in_100(storage: *Storage, x: u8) bool {
|
|
273
|
+
assert(x <= 100);
|
|
274
|
+
return x > storage.prng.random.uintLessThan(u8, 100);
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
/// The return value is a slice into the provided out array.
|
|
278
|
+
pub fn generate_faulty_areas(
|
|
279
|
+
prng: *std.rand.Random,
|
|
280
|
+
size: u64,
|
|
281
|
+
replica_count: u8,
|
|
282
|
+
out: *[config.replicas_max]FaultyAreas,
|
|
283
|
+
) []FaultyAreas {
|
|
284
|
+
comptime assert(config.message_size_max % config.sector_size == 0);
|
|
285
|
+
const message_size_max = config.message_size_max;
|
|
286
|
+
|
|
287
|
+
// We need to ensure there is message_size_max fault-free padding
|
|
288
|
+
// between faulty areas of memory so that a single message
|
|
289
|
+
// cannot straddle the corruptable areas of a majority of replicas.
|
|
290
|
+
comptime assert(config.replicas_max == 6);
|
|
291
|
+
switch (replica_count) {
|
|
292
|
+
1 => {
|
|
293
|
+
// If there is only one replica in the cluster, storage faults are not recoverable.
|
|
294
|
+
out[0] = .{ .first_offset = size, .period = 1 };
|
|
295
|
+
},
|
|
296
|
+
2 => {
|
|
297
|
+
// 0123456789
|
|
298
|
+
// 0X X X
|
|
299
|
+
// 1 X X X
|
|
300
|
+
out[0] = .{ .first_offset = 0 * message_size_max, .period = 4 * message_size_max };
|
|
301
|
+
out[1] = .{ .first_offset = 2 * message_size_max, .period = 4 * message_size_max };
|
|
302
|
+
},
|
|
303
|
+
3 => {
|
|
304
|
+
// 0123456789
|
|
305
|
+
// 0X X
|
|
306
|
+
// 1 X X
|
|
307
|
+
// 2 X X
|
|
308
|
+
out[0] = .{ .first_offset = 0 * message_size_max, .period = 6 * message_size_max };
|
|
309
|
+
out[1] = .{ .first_offset = 2 * message_size_max, .period = 6 * message_size_max };
|
|
310
|
+
out[2] = .{ .first_offset = 4 * message_size_max, .period = 6 * message_size_max };
|
|
311
|
+
},
|
|
312
|
+
4 => {
|
|
313
|
+
// 0123456789
|
|
314
|
+
// 0X X X
|
|
315
|
+
// 1X X X
|
|
316
|
+
// 2 X X X
|
|
317
|
+
// 3 X X X
|
|
318
|
+
out[0] = .{ .first_offset = 0 * message_size_max, .period = 4 * message_size_max };
|
|
319
|
+
out[1] = .{ .first_offset = 0 * message_size_max, .period = 4 * message_size_max };
|
|
320
|
+
out[2] = .{ .first_offset = 2 * message_size_max, .period = 4 * message_size_max };
|
|
321
|
+
out[3] = .{ .first_offset = 2 * message_size_max, .period = 4 * message_size_max };
|
|
322
|
+
},
|
|
323
|
+
5 => {
|
|
324
|
+
// 0123456789
|
|
325
|
+
// 0X X
|
|
326
|
+
// 1X X
|
|
327
|
+
// 2 X X
|
|
328
|
+
// 3 X X
|
|
329
|
+
// 4 X X
|
|
330
|
+
out[0] = .{ .first_offset = 0 * message_size_max, .period = 6 * message_size_max };
|
|
331
|
+
out[1] = .{ .first_offset = 0 * message_size_max, .period = 6 * message_size_max };
|
|
332
|
+
out[2] = .{ .first_offset = 2 * message_size_max, .period = 6 * message_size_max };
|
|
333
|
+
out[3] = .{ .first_offset = 2 * message_size_max, .period = 6 * message_size_max };
|
|
334
|
+
out[4] = .{ .first_offset = 4 * message_size_max, .period = 6 * message_size_max };
|
|
335
|
+
},
|
|
336
|
+
6 => {
|
|
337
|
+
// 0123456789
|
|
338
|
+
// 0X X
|
|
339
|
+
// 1X X
|
|
340
|
+
// 2 X X
|
|
341
|
+
// 3 X X
|
|
342
|
+
// 4 X X
|
|
343
|
+
// 5 X X
|
|
344
|
+
out[0] = .{ .first_offset = 0 * message_size_max, .period = 6 * message_size_max };
|
|
345
|
+
out[1] = .{ .first_offset = 0 * message_size_max, .period = 6 * message_size_max };
|
|
346
|
+
out[2] = .{ .first_offset = 2 * message_size_max, .period = 6 * message_size_max };
|
|
347
|
+
out[3] = .{ .first_offset = 2 * message_size_max, .period = 6 * message_size_max };
|
|
348
|
+
out[4] = .{ .first_offset = 4 * message_size_max, .period = 6 * message_size_max };
|
|
349
|
+
out[5] = .{ .first_offset = 4 * message_size_max, .period = 6 * message_size_max };
|
|
350
|
+
},
|
|
351
|
+
else => unreachable,
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
prng.shuffle(FaultyAreas, out[0..replica_count]);
|
|
355
|
+
return out[0..replica_count];
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
/// Given an offest and size of a read/write, returns a slice into storage.memory of any
|
|
359
|
+
/// faulty sectors touched by the read/write
|
|
360
|
+
fn faulty_sectors(storage: *Storage, offset: u64, size: u64) []align(config.sector_size) u8 {
|
|
361
|
+
assert(size <= config.message_size_max);
|
|
362
|
+
const message_size_max = config.message_size_max;
|
|
363
|
+
const period = storage.faulty_areas.period;
|
|
364
|
+
|
|
365
|
+
const faulty_offset = storage.faulty_areas.first_offset + (offset / period) * period;
|
|
366
|
+
|
|
367
|
+
const start = std.math.max(offset, faulty_offset);
|
|
368
|
+
const end = std.math.min(offset + size, faulty_offset + message_size_max);
|
|
369
|
+
|
|
370
|
+
// The read/write does not touch any faulty sectors
|
|
371
|
+
if (start >= end) return &[0]u8{};
|
|
372
|
+
|
|
373
|
+
return storage.memory[start..end];
|
|
374
|
+
}
|
|
375
|
+
};
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
const std = @import("std");
|
|
2
|
+
const assert = std.debug.assert;
|
|
3
|
+
|
|
4
|
+
pub const OffsetType = enum {
|
|
5
|
+
linear,
|
|
6
|
+
periodic,
|
|
7
|
+
step,
|
|
8
|
+
non_ideal,
|
|
9
|
+
};
|
|
10
|
+
|
|
11
|
+
pub const Time = struct {
|
|
12
|
+
const Self = @This();
|
|
13
|
+
|
|
14
|
+
/// The duration of a single tick in nanoseconds.
|
|
15
|
+
resolution: u64,
|
|
16
|
+
|
|
17
|
+
offset_type: OffsetType,
|
|
18
|
+
|
|
19
|
+
/// Co-efficients to scale the offset according to the `offset_type`.
|
|
20
|
+
/// Linear offset is described as A * x + B: A is the drift per tick and B the initial offset.
|
|
21
|
+
/// Periodic is described as A * sin(x * pi / B): A controls the amplitude and B the period in
|
|
22
|
+
/// terms of ticks.
|
|
23
|
+
/// Step function represents a discontinuous jump in the wall-clock time. B is the period in
|
|
24
|
+
/// which the jumps occur. A is the amplitude of the step.
|
|
25
|
+
/// Non-ideal is similar to periodic except the phase is adjusted using a random number taken
|
|
26
|
+
/// from a normal distribution with mean=0, stddev=10. Finally, a random offset (up to
|
|
27
|
+
/// offset_coefficientC) is added to the result.
|
|
28
|
+
offset_coefficient_A: i64,
|
|
29
|
+
offset_coefficient_B: i64,
|
|
30
|
+
offset_coefficient_C: u32 = 0,
|
|
31
|
+
|
|
32
|
+
prng: std.rand.DefaultPrng = std.rand.DefaultPrng.init(0),
|
|
33
|
+
|
|
34
|
+
/// The number of ticks elapsed since initialization.
|
|
35
|
+
ticks: u64 = 0,
|
|
36
|
+
|
|
37
|
+
/// The instant in time chosen as the origin of this time source.
|
|
38
|
+
epoch: i64 = 0,
|
|
39
|
+
|
|
40
|
+
pub fn monotonic(self: *Self) u64 {
|
|
41
|
+
return self.ticks * self.resolution;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
pub fn realtime(self: *Self) i64 {
|
|
45
|
+
return self.epoch + @intCast(i64, self.monotonic()) - self.offset(self.ticks);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
pub fn offset(self: *Self, ticks: u64) i64 {
|
|
49
|
+
switch (self.offset_type) {
|
|
50
|
+
.linear => {
|
|
51
|
+
const drift_per_tick = self.offset_coefficient_A;
|
|
52
|
+
return @intCast(i64, ticks) * drift_per_tick + @intCast(
|
|
53
|
+
i64,
|
|
54
|
+
self.offset_coefficient_B,
|
|
55
|
+
);
|
|
56
|
+
},
|
|
57
|
+
.periodic => {
|
|
58
|
+
const unscaled = std.math.sin(@intToFloat(f64, ticks) * 2 * std.math.pi /
|
|
59
|
+
@intToFloat(f64, self.offset_coefficient_B));
|
|
60
|
+
const scaled = @intToFloat(f64, self.offset_coefficient_A) * unscaled;
|
|
61
|
+
return @floatToInt(i64, std.math.floor(scaled));
|
|
62
|
+
},
|
|
63
|
+
.step => {
|
|
64
|
+
return if (ticks > self.offset_coefficient_B) self.offset_coefficient_A else 0;
|
|
65
|
+
},
|
|
66
|
+
.non_ideal => {
|
|
67
|
+
const phase: f64 = @intToFloat(f64, ticks) * 2 * std.math.pi /
|
|
68
|
+
(@intToFloat(f64, self.offset_coefficient_B) + self.prng.random.floatNorm(f64) * 10);
|
|
69
|
+
const unscaled = std.math.sin(phase);
|
|
70
|
+
const scaled = @intToFloat(f64, self.offset_coefficient_A) * unscaled;
|
|
71
|
+
return @floatToInt(i64, std.math.floor(scaled)) +
|
|
72
|
+
self.prng.random.intRangeAtMost(
|
|
73
|
+
i64,
|
|
74
|
+
-@intCast(i64, self.offset_coefficient_C),
|
|
75
|
+
self.offset_coefficient_C,
|
|
76
|
+
);
|
|
77
|
+
},
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
pub fn tick(self: *Self) void {
|
|
82
|
+
self.ticks += 1;
|
|
83
|
+
}
|
|
84
|
+
};
|
|
@@ -249,7 +249,6 @@ pub const CommitTransferResult = packed enum(u32) {
|
|
|
249
249
|
transfer_not_found,
|
|
250
250
|
transfer_not_two_phase_commit,
|
|
251
251
|
transfer_expired,
|
|
252
|
-
already_auto_committed,
|
|
253
252
|
already_committed,
|
|
254
253
|
already_committed_but_accepted,
|
|
255
254
|
already_committed_but_rejected,
|
|
@@ -325,10 +324,14 @@ pub const CommitTransfersResult = packed struct {
|
|
|
325
324
|
};
|
|
326
325
|
|
|
327
326
|
comptime {
|
|
328
|
-
|
|
327
|
+
const target = std.Target.current;
|
|
328
|
+
|
|
329
|
+
if (target.os.tag != .linux and !target.isDarwin()) {
|
|
330
|
+
@compileError("linux or macos required for io");
|
|
331
|
+
}
|
|
329
332
|
|
|
330
333
|
// We require little-endian architectures everywhere for efficient network deserialization:
|
|
331
|
-
if (
|
|
334
|
+
if (target.cpu.arch.endian() != std.builtin.Endian.Little) {
|
|
332
335
|
@compileError("big-endian systems not supported");
|
|
333
336
|
}
|
|
334
337
|
}
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
const std = @import("std");
|
|
2
|
+
const assert = std.debug.assert;
|
|
3
|
+
const is_darwin = std.Target.current.isDarwin();
|
|
4
|
+
const config = @import("./config.zig");
|
|
5
|
+
|
|
6
|
+
pub const Time = struct {
|
|
7
|
+
const Self = @This();
|
|
8
|
+
|
|
9
|
+
/// Hardware and/or software bugs can mean that the monotonic clock may regress.
|
|
10
|
+
/// One example (of many): https://bugzilla.redhat.com/show_bug.cgi?id=448449
|
|
11
|
+
/// We crash the process for safety if this ever happens, to protect against infinite loops.
|
|
12
|
+
/// It's better to crash and come back with a valid monotonic clock than get stuck forever.
|
|
13
|
+
monotonic_guard: u64 = 0,
|
|
14
|
+
|
|
15
|
+
/// A timestamp to measure elapsed time, meaningful only on the same system, not across reboots.
|
|
16
|
+
/// Always use a monotonic timestamp if the goal is to measure elapsed time.
|
|
17
|
+
/// This clock is not affected by discontinuous jumps in the system time, for example if the
|
|
18
|
+
/// system administrator manually changes the clock.
|
|
19
|
+
pub fn monotonic(self: *Self) u64 {
|
|
20
|
+
const m = blk: {
|
|
21
|
+
// Uses mach_continuous_time() instead of mach_absolute_time() as it counts while suspended.
|
|
22
|
+
// https://developer.apple.com/documentation/kernel/1646199-mach_continuous_time
|
|
23
|
+
// https://opensource.apple.com/source/Libc/Libc-1158.1.2/gen/clock_gettime.c.auto.html
|
|
24
|
+
if (is_darwin) {
|
|
25
|
+
const darwin = struct {
|
|
26
|
+
const mach_timebase_info_t = std.os.darwin.mach_timebase_info_data;
|
|
27
|
+
extern "c" fn mach_timebase_info(info: *mach_timebase_info_t) std.os.darwin.kern_return_t;
|
|
28
|
+
extern "c" fn mach_continuous_time() u64;
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
const now = darwin.mach_continuous_time();
|
|
32
|
+
var info: darwin.mach_timebase_info_t = undefined;
|
|
33
|
+
if (darwin.mach_timebase_info(&info) != 0) @panic("mach_timebase_info() failed");
|
|
34
|
+
return (now * info.numer) / info.denom;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
// The true monotonic clock on Linux is not in fact CLOCK_MONOTONIC:
|
|
38
|
+
// CLOCK_MONOTONIC excludes elapsed time while the system is suspended (e.g. VM migration).
|
|
39
|
+
// CLOCK_BOOTTIME is the same as CLOCK_MONOTONIC but includes elapsed time during a suspend.
|
|
40
|
+
// For more detail and why CLOCK_MONOTONIC_RAW is even worse than CLOCK_MONOTONIC,
|
|
41
|
+
// see https://github.com/ziglang/zig/pull/933#discussion_r656021295.
|
|
42
|
+
var ts: std.os.timespec = undefined;
|
|
43
|
+
std.os.clock_gettime(std.os.CLOCK_BOOTTIME, &ts) catch @panic("CLOCK_BOOTTIME required");
|
|
44
|
+
break :blk @intCast(u64, ts.tv_sec) * std.time.ns_per_s + @intCast(u64, ts.tv_nsec);
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
// "Oops!...I Did It Again"
|
|
48
|
+
if (m < self.monotonic_guard) @panic("a hardware/kernel bug regressed the monotonic clock");
|
|
49
|
+
self.monotonic_guard = m;
|
|
50
|
+
return m;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/// A timestamp to measure real (i.e. wall clock) time, meaningful across systems, and reboots.
|
|
54
|
+
/// This clock is affected by discontinuous jumps in the system time.
|
|
55
|
+
pub fn realtime(self: *Self) i64 {
|
|
56
|
+
// macos has supported clock_gettime() since 10.12:
|
|
57
|
+
// https://opensource.apple.com/source/Libc/Libc-1158.1.2/gen/clock_gettime.3.auto.html
|
|
58
|
+
|
|
59
|
+
var ts: std.os.timespec = undefined;
|
|
60
|
+
std.os.clock_gettime(std.os.CLOCK_REALTIME, &ts) catch unreachable;
|
|
61
|
+
return @as(i64, ts.tv_sec) * std.time.ns_per_s + ts.tv_nsec;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
pub fn tick(self: *Self) void {}
|
|
65
|
+
};
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
test {
|
|
2
|
+
_ = @import("vsr.zig");
|
|
3
|
+
_ = @import("vsr/journal.zig");
|
|
4
|
+
_ = @import("vsr/marzullo.zig");
|
|
5
|
+
// TODO: clean up logging of clock test and enable it here.
|
|
6
|
+
//_ = @import("vsr/clock.zig");
|
|
7
|
+
|
|
8
|
+
_ = @import("state_machine.zig");
|
|
9
|
+
|
|
10
|
+
_ = @import("fifo.zig");
|
|
11
|
+
_ = @import("ring_buffer.zig");
|
|
12
|
+
|
|
13
|
+
_ = @import("io.zig");
|
|
14
|
+
}
|