tigerbeetle-node 0.4.2 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -5
- package/dist/benchmark.js.map +1 -1
- package/dist/index.d.ts +18 -16
- package/dist/index.js +35 -13
- package/dist/index.js.map +1 -1
- package/dist/test.js +12 -0
- package/dist/test.js.map +1 -1
- package/package.json +2 -2
- package/scripts/postinstall.sh +2 -2
- package/src/benchmark.ts +2 -2
- package/src/index.ts +29 -4
- package/src/node.zig +120 -17
- package/src/test.ts +14 -0
- package/src/tigerbeetle/scripts/install.sh +1 -1
- package/src/tigerbeetle/scripts/install_zig.bat +109 -0
- package/src/tigerbeetle/scripts/install_zig.sh +4 -2
- package/src/tigerbeetle/scripts/lint.zig +8 -2
- package/src/tigerbeetle/scripts/vopr.bat +48 -0
- package/src/tigerbeetle/src/benchmark.zig +10 -8
- package/src/tigerbeetle/src/cli.zig +6 -4
- package/src/tigerbeetle/src/config.zig +2 -2
- package/src/tigerbeetle/src/demo.zig +119 -89
- package/src/tigerbeetle/src/demo_01_create_accounts.zig +5 -3
- package/src/tigerbeetle/src/demo_02_lookup_accounts.zig +2 -3
- package/src/tigerbeetle/src/demo_03_create_transfers.zig +5 -3
- package/src/tigerbeetle/src/demo_04_create_transfers_two_phase_commit.zig +5 -3
- package/src/tigerbeetle/src/demo_05_accept_transfers.zig +5 -3
- package/src/tigerbeetle/src/demo_06_reject_transfers.zig +5 -3
- package/src/tigerbeetle/src/demo_07_lookup_transfers.zig +7 -0
- package/src/tigerbeetle/src/io/benchmark.zig +238 -0
- package/src/tigerbeetle/src/{io_darwin.zig → io/darwin.zig} +89 -124
- package/src/tigerbeetle/src/io/linux.zig +933 -0
- package/src/tigerbeetle/src/io/test.zig +621 -0
- package/src/tigerbeetle/src/io.zig +7 -1328
- package/src/tigerbeetle/src/main.zig +18 -10
- package/src/tigerbeetle/src/message_bus.zig +43 -60
- package/src/tigerbeetle/src/message_pool.zig +3 -2
- package/src/tigerbeetle/src/ring_buffer.zig +135 -68
- package/src/tigerbeetle/src/simulator.zig +41 -37
- package/src/tigerbeetle/src/state_machine.zig +851 -26
- package/src/tigerbeetle/src/storage.zig +49 -46
- package/src/tigerbeetle/src/test/cluster.zig +2 -2
- package/src/tigerbeetle/src/test/message_bus.zig +6 -6
- package/src/tigerbeetle/src/test/network.zig +3 -3
- package/src/tigerbeetle/src/test/packet_simulator.zig +32 -29
- package/src/tigerbeetle/src/test/state_checker.zig +2 -2
- package/src/tigerbeetle/src/test/state_machine.zig +4 -0
- package/src/tigerbeetle/src/test/storage.zig +39 -19
- package/src/tigerbeetle/src/test/time.zig +2 -2
- package/src/tigerbeetle/src/tigerbeetle.zig +6 -129
- package/src/tigerbeetle/src/time.zig +6 -5
- package/src/tigerbeetle/src/vsr/client.zig +11 -11
- package/src/tigerbeetle/src/vsr/clock.zig +26 -43
- package/src/tigerbeetle/src/vsr/journal.zig +7 -6
- package/src/tigerbeetle/src/vsr/marzullo.zig +6 -3
- package/src/tigerbeetle/src/vsr/replica.zig +51 -48
- package/src/tigerbeetle/src/vsr.zig +24 -20
- package/src/translate.zig +55 -55
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
const std = @import("std");
|
|
2
2
|
const assert = std.debug.assert;
|
|
3
3
|
const log = std.log.scoped(.clock);
|
|
4
|
+
const fmt = std.fmt;
|
|
4
5
|
|
|
5
6
|
const config = @import("../config.zig");
|
|
6
7
|
|
|
@@ -95,7 +96,7 @@ pub fn Clock(comptime Time: type) type {
|
|
|
95
96
|
synchronization_disabled: bool,
|
|
96
97
|
|
|
97
98
|
pub fn init(
|
|
98
|
-
allocator:
|
|
99
|
+
allocator: std.mem.Allocator,
|
|
99
100
|
/// The size of the cluster, i.e. the number of clock sources (including this replica).
|
|
100
101
|
replica_count: u8,
|
|
101
102
|
replica: u8,
|
|
@@ -133,7 +134,7 @@ pub fn Clock(comptime Time: type) type {
|
|
|
133
134
|
return self;
|
|
134
135
|
}
|
|
135
136
|
|
|
136
|
-
pub fn deinit(self: *Self, allocator:
|
|
137
|
+
pub fn deinit(self: *Self, allocator: std.mem.Allocator) void {
|
|
137
138
|
allocator.free(self.epoch.sources);
|
|
138
139
|
allocator.free(self.window.sources);
|
|
139
140
|
allocator.free(self.marzullo_tuples);
|
|
@@ -273,7 +274,7 @@ pub fn Clock(comptime Time: type) type {
|
|
|
273
274
|
// Expire the current epoch if successive windows failed to synchronize:
|
|
274
275
|
// Gradual clock drift prevents us from using an epoch for more than a few seconds.
|
|
275
276
|
if (self.epoch.elapsed(self) >= epoch_max) {
|
|
276
|
-
log.
|
|
277
|
+
log.err(
|
|
277
278
|
"{}: no agreement on cluster time (partitioned or too many clock faults)",
|
|
278
279
|
.{self.replica},
|
|
279
280
|
);
|
|
@@ -320,13 +321,13 @@ pub fn Clock(comptime Time: type) type {
|
|
|
320
321
|
// We took too long to synchronize the window, expire stale samples...
|
|
321
322
|
const sources_sampled = self.window.sources_sampled();
|
|
322
323
|
if (sources_sampled <= @divTrunc(self.window.sources.len, 2)) {
|
|
323
|
-
log.
|
|
324
|
+
log.err("{}: synchronization failed, partitioned (sources={} samples={})", .{
|
|
324
325
|
self.replica,
|
|
325
326
|
sources_sampled,
|
|
326
327
|
self.window.samples,
|
|
327
328
|
});
|
|
328
329
|
} else {
|
|
329
|
-
log.
|
|
330
|
+
log.err("{}: synchronization failed, no agreement (sources={} samples={})", .{
|
|
330
331
|
self.replica,
|
|
331
332
|
sources_sampled,
|
|
332
333
|
self.window.samples,
|
|
@@ -377,9 +378,9 @@ pub fn Clock(comptime Time: type) type {
|
|
|
377
378
|
self.replica,
|
|
378
379
|
new_interval.sources_true,
|
|
379
380
|
self.epoch.sources.len,
|
|
380
|
-
fmtDurationSigned(new_interval.lower_bound),
|
|
381
|
-
fmtDurationSigned(new_interval.upper_bound),
|
|
382
|
-
fmtDurationSigned(new_interval.upper_bound - new_interval.lower_bound),
|
|
381
|
+
fmt.fmtDurationSigned(new_interval.lower_bound),
|
|
382
|
+
fmt.fmtDurationSigned(new_interval.upper_bound),
|
|
383
|
+
fmt.fmtDurationSigned(new_interval.upper_bound - new_interval.lower_bound),
|
|
383
384
|
});
|
|
384
385
|
|
|
385
386
|
const elapsed = @intCast(i64, self.epoch.elapsed(self));
|
|
@@ -393,12 +394,12 @@ pub fn Clock(comptime Time: type) type {
|
|
|
393
394
|
if (delta < std.time.ns_per_ms) {
|
|
394
395
|
log.info("{}: system time is {} behind", .{
|
|
395
396
|
self.replica,
|
|
396
|
-
fmtDurationSigned(delta),
|
|
397
|
+
fmt.fmtDurationSigned(delta),
|
|
397
398
|
});
|
|
398
399
|
} else {
|
|
399
400
|
log.err("{}: system time is {} behind, clamping system time to cluster time", .{
|
|
400
401
|
self.replica,
|
|
401
|
-
fmtDurationSigned(delta),
|
|
402
|
+
fmt.fmtDurationSigned(delta),
|
|
402
403
|
});
|
|
403
404
|
}
|
|
404
405
|
} else {
|
|
@@ -406,12 +407,12 @@ pub fn Clock(comptime Time: type) type {
|
|
|
406
407
|
if (delta < std.time.ns_per_ms) {
|
|
407
408
|
log.info("{}: system time is {} ahead", .{
|
|
408
409
|
self.replica,
|
|
409
|
-
fmtDurationSigned(delta),
|
|
410
|
+
fmt.fmtDurationSigned(delta),
|
|
410
411
|
});
|
|
411
412
|
} else {
|
|
412
413
|
log.err("{}: system time is {} ahead, clamping system time to cluster time", .{
|
|
413
414
|
self.replica,
|
|
414
|
-
fmtDurationSigned(delta),
|
|
415
|
+
fmt.fmtDurationSigned(delta),
|
|
415
416
|
});
|
|
416
417
|
}
|
|
417
418
|
}
|
|
@@ -450,25 +451,6 @@ pub fn Clock(comptime Time: type) type {
|
|
|
450
451
|
};
|
|
451
452
|
}
|
|
452
453
|
|
|
453
|
-
/// Return a Formatter for a signed number of nanoseconds according to magnitude:
|
|
454
|
-
/// [#y][#w][#d][#h][#m]#[.###][n|u|m]s
|
|
455
|
-
pub fn fmtDurationSigned(ns: i64) std.fmt.Formatter(formatDurationSigned) {
|
|
456
|
-
return .{ .data = ns };
|
|
457
|
-
}
|
|
458
|
-
|
|
459
|
-
fn formatDurationSigned(
|
|
460
|
-
ns: i64,
|
|
461
|
-
comptime fmt: []const u8,
|
|
462
|
-
options: std.fmt.FormatOptions,
|
|
463
|
-
writer: anytype,
|
|
464
|
-
) !void {
|
|
465
|
-
if (ns < 0) {
|
|
466
|
-
try writer.print("-{}", .{std.fmt.fmtDuration(@intCast(u64, -ns))});
|
|
467
|
-
} else {
|
|
468
|
-
try writer.print("{}", .{std.fmt.fmtDuration(@intCast(u64, ns))});
|
|
469
|
-
}
|
|
470
|
-
}
|
|
471
|
-
|
|
472
454
|
const testing = std.testing;
|
|
473
455
|
const OffsetType = @import("../test/time.zig").OffsetType;
|
|
474
456
|
const DeterministicTime = @import("../test/time.zig").Time;
|
|
@@ -482,7 +464,7 @@ const ClockUnitTestContainer = struct {
|
|
|
482
464
|
learn_interval: u64 = 5,
|
|
483
465
|
|
|
484
466
|
pub fn init(
|
|
485
|
-
allocator:
|
|
467
|
+
allocator: std.mem.Allocator,
|
|
486
468
|
offset_type: OffsetType,
|
|
487
469
|
offset_coefficient_A: i64,
|
|
488
470
|
offset_coefficient_B: i64,
|
|
@@ -581,7 +563,7 @@ const ClockUnitTestContainer = struct {
|
|
|
581
563
|
};
|
|
582
564
|
|
|
583
565
|
test "ideal clocks get clamped to cluster time" {
|
|
584
|
-
std.testing.log_level = .
|
|
566
|
+
std.testing.log_level = .err;
|
|
585
567
|
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
|
586
568
|
defer arena.deinit();
|
|
587
569
|
const allocator = &arena.allocator;
|
|
@@ -646,7 +628,10 @@ const ClockSimulator = struct {
|
|
|
646
628
|
clock_simulator: *ClockSimulator,
|
|
647
629
|
|
|
648
630
|
/// PacketSimulator requires this function, but we don't actually have anything to deinit.
|
|
649
|
-
pub fn deinit(packet: *const Packet, path: Path) void {
|
|
631
|
+
pub fn deinit(packet: *const Packet, path: Path) void {
|
|
632
|
+
_ = packet;
|
|
633
|
+
_ = path;
|
|
634
|
+
}
|
|
650
635
|
};
|
|
651
636
|
|
|
652
637
|
const Options = struct {
|
|
@@ -655,14 +640,14 @@ const ClockSimulator = struct {
|
|
|
655
640
|
network_options: PacketSimulatorOptions,
|
|
656
641
|
};
|
|
657
642
|
|
|
658
|
-
allocator:
|
|
643
|
+
allocator: std.mem.Allocator,
|
|
659
644
|
options: Options,
|
|
660
645
|
ticks: u64 = 0,
|
|
661
646
|
network: PacketSimulator(Packet),
|
|
662
647
|
clocks: []DeterministicClock,
|
|
663
648
|
prng: std.rand.DefaultPrng,
|
|
664
649
|
|
|
665
|
-
pub fn init(allocator:
|
|
650
|
+
pub fn init(allocator: std.mem.Allocator, options: Options) !ClockSimulator {
|
|
666
651
|
var self = ClockSimulator{
|
|
667
652
|
.allocator = allocator,
|
|
668
653
|
.options = options,
|
|
@@ -753,13 +738,12 @@ const ClockSimulator = struct {
|
|
|
753
738
|
};
|
|
754
739
|
|
|
755
740
|
test "fuzz test" {
|
|
756
|
-
std.testing.log_level = .
|
|
741
|
+
std.testing.log_level = .err; // silence all clock logs
|
|
757
742
|
var arena_allocator = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
|
758
743
|
defer arena_allocator.deinit();
|
|
759
744
|
const allocator = &arena_allocator.allocator;
|
|
760
745
|
const ticks_max: u64 = 1_000_000;
|
|
761
746
|
const clock_count: u8 = 3;
|
|
762
|
-
const test_delta_time: u64 = std.time.ns_per_s / 2;
|
|
763
747
|
const SystemTime = @import("../time.zig").Time;
|
|
764
748
|
var system_time = SystemTime{};
|
|
765
749
|
var seed = @intCast(u64, system_time.realtime());
|
|
@@ -787,7 +771,6 @@ test "fuzz test" {
|
|
|
787
771
|
while (simulator.ticks < ticks_max) {
|
|
788
772
|
simulator.tick();
|
|
789
773
|
|
|
790
|
-
const test_time: u64 = simulator.ticks * test_delta_time;
|
|
791
774
|
for (simulator.clocks) |*clock, index| {
|
|
792
775
|
var offset = clock.time.offset(simulator.ticks);
|
|
793
776
|
var abs_offset = if (offset >= 0) @intCast(u64, offset) else @intCast(u64, -offset);
|
|
@@ -818,11 +801,11 @@ test "fuzz test" {
|
|
|
818
801
|
clock_count,
|
|
819
802
|
});
|
|
820
803
|
std.debug.print("absolute clock offsets with respect to test time:\n", .{});
|
|
821
|
-
std.debug.print("maximum={}\n", .{fmtDurationSigned(@intCast(i64, max_clock_offset))});
|
|
822
|
-
std.debug.print("minimum={}\n", .{fmtDurationSigned(@intCast(i64, min_clock_offset))});
|
|
804
|
+
std.debug.print("maximum={}\n", .{fmt.fmtDurationSigned(@intCast(i64, max_clock_offset))});
|
|
805
|
+
std.debug.print("minimum={}\n", .{fmt.fmtDurationSigned(@intCast(i64, min_clock_offset))});
|
|
823
806
|
std.debug.print("\nabsolute synchronization errors between clocks:\n", .{});
|
|
824
|
-
std.debug.print("maximum={}\n", .{fmtDurationSigned(@intCast(i64, max_sync_error))});
|
|
825
|
-
std.debug.print("minimum={}\n", .{fmtDurationSigned(@intCast(i64, min_sync_error))});
|
|
807
|
+
std.debug.print("maximum={}\n", .{fmt.fmtDurationSigned(@intCast(i64, max_sync_error))});
|
|
808
|
+
std.debug.print("minimum={}\n", .{fmt.fmtDurationSigned(@intCast(i64, min_sync_error))});
|
|
826
809
|
std.debug.print("clock ticks without synchronization={d}\n", .{
|
|
827
810
|
clock_ticks_without_synchronization,
|
|
828
811
|
});
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
const std = @import("std");
|
|
2
|
+
const builtin = @import("builtin");
|
|
2
3
|
const Allocator = std.mem.Allocator;
|
|
3
4
|
const assert = std.debug.assert;
|
|
4
5
|
const math = std.math;
|
|
@@ -120,7 +121,7 @@ pub fn Journal(comptime Replica: type, comptime Storage: type) type {
|
|
|
120
121
|
recovering: bool = false,
|
|
121
122
|
|
|
122
123
|
pub fn init(
|
|
123
|
-
allocator:
|
|
124
|
+
allocator: Allocator,
|
|
124
125
|
storage: *Storage,
|
|
125
126
|
replica: u8,
|
|
126
127
|
size: u64,
|
|
@@ -201,7 +202,7 @@ pub fn Journal(comptime Replica: type, comptime Storage: type) type {
|
|
|
201
202
|
return self;
|
|
202
203
|
}
|
|
203
204
|
|
|
204
|
-
pub fn deinit(self: *Self, allocator:
|
|
205
|
+
pub fn deinit(self: *Self, allocator: Allocator) void {
|
|
205
206
|
const replica = @fieldParentPtr(Replica, "journal", self);
|
|
206
207
|
|
|
207
208
|
self.dirty.deinit(allocator);
|
|
@@ -278,7 +279,7 @@ pub fn Journal(comptime Replica: type, comptime Storage: type) type {
|
|
|
278
279
|
return self.entry_for_op(header.op + 1);
|
|
279
280
|
}
|
|
280
281
|
|
|
281
|
-
pub fn next_offset(
|
|
282
|
+
pub fn next_offset(header: *const Header) u64 {
|
|
282
283
|
// TODO Snapshots
|
|
283
284
|
assert(header.command == .prepare);
|
|
284
285
|
return header.offset + vsr.sector_ceil(header.size);
|
|
@@ -860,7 +861,7 @@ pub fn Journal(comptime Replica: type, comptime Storage: type) type {
|
|
|
860
861
|
const sectors = message.buffer[0..vsr.sector_ceil(message.header.size)];
|
|
861
862
|
assert(message.header.offset + sectors.len <= self.size_circular_buffer);
|
|
862
863
|
|
|
863
|
-
if (
|
|
864
|
+
if (builtin.mode == .Debug) {
|
|
864
865
|
// Assert that any sector padding has already been zeroed:
|
|
865
866
|
var sum_of_sector_padding_bytes: u32 = 0;
|
|
866
867
|
for (sectors[message.header.size..]) |byte| sum_of_sector_padding_bytes += byte;
|
|
@@ -1227,7 +1228,7 @@ pub const BitSet = struct {
|
|
|
1227
1228
|
/// The number of bits set (updated incrementally as bits are set or cleared):
|
|
1228
1229
|
len: u64 = 0,
|
|
1229
1230
|
|
|
1230
|
-
fn init(allocator:
|
|
1231
|
+
fn init(allocator: Allocator, count: u64) !BitSet {
|
|
1231
1232
|
const bits = try allocator.alloc(bool, count);
|
|
1232
1233
|
errdefer allocator.free(bits);
|
|
1233
1234
|
std.mem.set(bool, bits, false);
|
|
@@ -1235,7 +1236,7 @@ pub const BitSet = struct {
|
|
|
1235
1236
|
return BitSet{ .bits = bits };
|
|
1236
1237
|
}
|
|
1237
1238
|
|
|
1238
|
-
fn deinit(self: *BitSet, allocator:
|
|
1239
|
+
fn deinit(self: *BitSet, allocator: Allocator) void {
|
|
1239
1240
|
allocator.free(self.bits);
|
|
1240
1241
|
}
|
|
1241
1242
|
|
|
@@ -117,6 +117,8 @@ pub const Marzullo = struct {
|
|
|
117
117
|
/// upper bound. Alternatively, if such pathological overlaps are considered objectionable then
|
|
118
118
|
/// they can be avoided by sorting the upper bound before the lower bound.
|
|
119
119
|
fn less_than(context: void, a: Tuple, b: Tuple) bool {
|
|
120
|
+
_ = context;
|
|
121
|
+
|
|
120
122
|
if (a.offset < b.offset) return true;
|
|
121
123
|
if (b.offset < a.offset) return false;
|
|
122
124
|
if (a.bound == .lower and b.bound == .upper) return true;
|
|
@@ -130,9 +132,10 @@ pub const Marzullo = struct {
|
|
|
130
132
|
};
|
|
131
133
|
|
|
132
134
|
fn test_smallest_interval(bounds: []const i64, smallest_interval: Marzullo.Interval) !void {
|
|
133
|
-
var
|
|
134
|
-
defer
|
|
135
|
-
|
|
135
|
+
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
|
136
|
+
defer arena.deinit();
|
|
137
|
+
|
|
138
|
+
const allocator = arena.allocator();
|
|
136
139
|
|
|
137
140
|
var tuples = try allocator.alloc(Marzullo.Tuple, bounds.len);
|
|
138
141
|
for (bounds) |bound, i| {
|
|
@@ -197,7 +197,7 @@ pub fn Replica(
|
|
|
197
197
|
on_change_state: ?fn (replica: *Self) void = null,
|
|
198
198
|
|
|
199
199
|
pub fn init(
|
|
200
|
-
allocator:
|
|
200
|
+
allocator: Allocator,
|
|
201
201
|
cluster: u32,
|
|
202
202
|
replica_count: u8,
|
|
203
203
|
replica: u8,
|
|
@@ -239,7 +239,7 @@ pub fn Replica(
|
|
|
239
239
|
|
|
240
240
|
var client_table: ClientTable = .{};
|
|
241
241
|
errdefer client_table.deinit(allocator);
|
|
242
|
-
try client_table.
|
|
242
|
+
try client_table.ensureTotalCapacity(allocator, @intCast(u32, config.clients_max));
|
|
243
243
|
assert(client_table.capacity() >= config.clients_max);
|
|
244
244
|
|
|
245
245
|
var init_prepare = Header{
|
|
@@ -363,7 +363,7 @@ pub fn Replica(
|
|
|
363
363
|
|
|
364
364
|
/// Free all memory and unref all messages held by the replica
|
|
365
365
|
/// This does not deinitialize the StateMachine, MessageBus, Storage, or Time
|
|
366
|
-
pub fn deinit(self: *Self, allocator:
|
|
366
|
+
pub fn deinit(self: *Self, allocator: Allocator) void {
|
|
367
367
|
self.journal.deinit(allocator);
|
|
368
368
|
self.clock.deinit(allocator);
|
|
369
369
|
|
|
@@ -452,7 +452,7 @@ pub fn Replica(
|
|
|
452
452
|
});
|
|
453
453
|
|
|
454
454
|
if (message.header.invalid()) |reason| {
|
|
455
|
-
log.
|
|
455
|
+
log.err("{}: on_message: invalid ({s})", .{ self.replica, reason });
|
|
456
456
|
return;
|
|
457
457
|
}
|
|
458
458
|
|
|
@@ -506,7 +506,7 @@ pub fn Replica(
|
|
|
506
506
|
}
|
|
507
507
|
|
|
508
508
|
if (self.loopback_queue) |loopback_message| {
|
|
509
|
-
log.
|
|
509
|
+
log.err("{}: on_message: on_{s}() queued a {s} loopback message with no flush", .{
|
|
510
510
|
self.replica,
|
|
511
511
|
@tagName(message.header.command),
|
|
512
512
|
@tagName(loopback_message.header.command),
|
|
@@ -581,7 +581,7 @@ pub fn Replica(
|
|
|
581
581
|
assert(message.header.view <= self.view); // The client's view may be behind ours.
|
|
582
582
|
|
|
583
583
|
const realtime = self.clock.realtime_synchronized() orelse {
|
|
584
|
-
log.
|
|
584
|
+
log.err("{}: on_request: dropping (clock not synchronized)", .{self.replica});
|
|
585
585
|
return;
|
|
586
586
|
};
|
|
587
587
|
|
|
@@ -599,7 +599,7 @@ pub fn Replica(
|
|
|
599
599
|
message.header.view = self.view;
|
|
600
600
|
message.header.op = self.op + 1;
|
|
601
601
|
message.header.commit = self.commit_max;
|
|
602
|
-
message.header.offset =
|
|
602
|
+
message.header.offset = Journal.next_offset(latest_entry);
|
|
603
603
|
message.header.replica = self.replica;
|
|
604
604
|
message.header.command = .prepare;
|
|
605
605
|
|
|
@@ -885,7 +885,7 @@ pub fn Replica(
|
|
|
885
885
|
if (self.message_bus.get_message()) |available| {
|
|
886
886
|
self.message_bus.unref(available);
|
|
887
887
|
} else {
|
|
888
|
-
log.
|
|
888
|
+
log.err("{}: on_start_view_change: waiting for message for do_view_change", .{
|
|
889
889
|
self.replica,
|
|
890
890
|
});
|
|
891
891
|
return;
|
|
@@ -986,7 +986,7 @@ pub fn Replica(
|
|
|
986
986
|
assert(replica_view_normal < m.header.view);
|
|
987
987
|
|
|
988
988
|
var replica_latest = Header.reserved();
|
|
989
|
-
|
|
989
|
+
set_latest_op(self.message_body_as_headers(m), &replica_latest);
|
|
990
990
|
assert(replica_latest.op == m.header.op);
|
|
991
991
|
|
|
992
992
|
log.debug(
|
|
@@ -1062,7 +1062,7 @@ pub fn Replica(
|
|
|
1062
1062
|
assert(message.header.view == self.view);
|
|
1063
1063
|
|
|
1064
1064
|
var latest = Header.reserved();
|
|
1065
|
-
|
|
1065
|
+
set_latest_op(self.message_body_as_headers(message), &latest);
|
|
1066
1066
|
assert(latest.op == message.header.op);
|
|
1067
1067
|
|
|
1068
1068
|
self.set_latest_op_and_k(&latest, message.header.commit, "on_start_view");
|
|
@@ -1098,7 +1098,7 @@ pub fn Replica(
|
|
|
1098
1098
|
assert(self.leader());
|
|
1099
1099
|
|
|
1100
1100
|
const start_view = self.create_view_change_message(.start_view) orelse {
|
|
1101
|
-
log.
|
|
1101
|
+
log.err("{}: on_request_start_view: dropping start_view, no message available", .{
|
|
1102
1102
|
self.replica,
|
|
1103
1103
|
});
|
|
1104
1104
|
return;
|
|
@@ -1127,7 +1127,7 @@ pub fn Replica(
|
|
|
1127
1127
|
}
|
|
1128
1128
|
|
|
1129
1129
|
const response = self.message_bus.get_message() orelse {
|
|
1130
|
-
log.
|
|
1130
|
+
log.err("{}: on_recovery: ignoring (waiting for message)", .{self.replica});
|
|
1131
1131
|
return;
|
|
1132
1132
|
};
|
|
1133
1133
|
defer self.message_bus.unref(response);
|
|
@@ -1172,7 +1172,10 @@ pub fn Replica(
|
|
|
1172
1172
|
}
|
|
1173
1173
|
|
|
1174
1174
|
/// TODO This is a work in progress (out of scope for the bounty)
|
|
1175
|
-
fn on_recovery_response(self: *Self, message: *Message) void {
|
|
1175
|
+
fn on_recovery_response(self: *Self, message: *Message) void {
|
|
1176
|
+
_ = self;
|
|
1177
|
+
_ = message;
|
|
1178
|
+
}
|
|
1176
1179
|
|
|
1177
1180
|
fn on_request_prepare(self: *Self, message: *const Message) void {
|
|
1178
1181
|
if (self.ignore_repair_message(message)) return;
|
|
@@ -1274,7 +1277,7 @@ pub fn Replica(
|
|
|
1274
1277
|
assert(message.header.replica != self.replica);
|
|
1275
1278
|
|
|
1276
1279
|
const response = self.message_bus.get_message() orelse {
|
|
1277
|
-
log.
|
|
1280
|
+
log.err("{}: on_request_headers: ignoring (op={}..{}, no message available)", .{
|
|
1278
1281
|
self.replica,
|
|
1279
1282
|
message.header.commit,
|
|
1280
1283
|
message.header.op,
|
|
@@ -1467,7 +1470,7 @@ pub fn Replica(
|
|
|
1467
1470
|
assert(self.status == .normal);
|
|
1468
1471
|
assert(self.leader());
|
|
1469
1472
|
|
|
1470
|
-
const prepare = self.pipeline.
|
|
1473
|
+
const prepare = self.pipeline.head_ptr().?;
|
|
1471
1474
|
assert(prepare.message.header.command == .prepare);
|
|
1472
1475
|
|
|
1473
1476
|
if (prepare.ok_quorum_received) {
|
|
@@ -1513,7 +1516,7 @@ pub fn Replica(
|
|
|
1513
1516
|
return;
|
|
1514
1517
|
}
|
|
1515
1518
|
|
|
1516
|
-
self.prepare_timeout.backoff(
|
|
1519
|
+
self.prepare_timeout.backoff(self.prng.random());
|
|
1517
1520
|
|
|
1518
1521
|
assert(waiting_len <= self.replica_count);
|
|
1519
1522
|
for (waiting[0..waiting_len]) |replica| {
|
|
@@ -1682,7 +1685,6 @@ pub fn Replica(
|
|
|
1682
1685
|
|
|
1683
1686
|
/// Returns whether `b` succeeds `a` by having a newer view or same view and newer op.
|
|
1684
1687
|
fn ascending_viewstamps(
|
|
1685
|
-
self: *Self,
|
|
1686
1688
|
a: *const Header,
|
|
1687
1689
|
b: *const Header,
|
|
1688
1690
|
) bool {
|
|
@@ -1834,7 +1836,7 @@ pub fn Replica(
|
|
|
1834
1836
|
|
|
1835
1837
|
// TODO We can optimize this to commit into the client table reply if it exists.
|
|
1836
1838
|
const reply = self.message_bus.get_message() orelse {
|
|
1837
|
-
log.
|
|
1839
|
+
log.err("{}: commit_ops_commit: waiting for message", .{self.replica});
|
|
1838
1840
|
return;
|
|
1839
1841
|
};
|
|
1840
1842
|
defer self.message_bus.unref(reply);
|
|
@@ -1925,7 +1927,7 @@ pub fn Replica(
|
|
|
1925
1927
|
assert(self.leader());
|
|
1926
1928
|
assert(self.pipeline.count > 0);
|
|
1927
1929
|
|
|
1928
|
-
while (self.pipeline.
|
|
1930
|
+
while (self.pipeline.head_ptr()) |prepare| {
|
|
1929
1931
|
assert(self.pipeline.count > 0);
|
|
1930
1932
|
assert(self.commit_min == self.commit_max);
|
|
1931
1933
|
assert(self.commit_max + self.pipeline.count == self.op);
|
|
@@ -1947,7 +1949,7 @@ pub fn Replica(
|
|
|
1947
1949
|
// TODO We can optimize this to commit into the client table reply if it exists.
|
|
1948
1950
|
const reply = self.message_bus.get_message() orelse {
|
|
1949
1951
|
// Eventually handled by on_prepare_timeout().
|
|
1950
|
-
log.
|
|
1952
|
+
log.err("{}: commit_pipeline: waiting for message", .{self.replica});
|
|
1951
1953
|
return;
|
|
1952
1954
|
};
|
|
1953
1955
|
defer self.message_bus.unref(reply);
|
|
@@ -2028,13 +2030,14 @@ pub fn Replica(
|
|
|
2028
2030
|
assert(request == 0);
|
|
2029
2031
|
|
|
2030
2032
|
// For correctness, it's critical that all replicas evict deterministically:
|
|
2031
|
-
// We cannot depend on `HashMap.capacity()` since `HashMap.
|
|
2032
|
-
// across
|
|
2033
|
-
// which must be the same across all replicas, and must not change after
|
|
2034
|
-
//
|
|
2035
|
-
//
|
|
2033
|
+
// We cannot depend on `HashMap.capacity()` since `HashMap.ensureTotalCapacity()` may
|
|
2034
|
+
// change across versions of the Zig std lib. We therefore rely on `config.clients_max`,
|
|
2035
|
+
// which must be the same across all replicas, and must not change after initializing a
|
|
2036
|
+
// cluster.
|
|
2037
|
+
// We also do not depend on `HashMap.valueIterator()` being deterministic here. However,
|
|
2038
|
+
// we do require that all entries have different commit numbers and are iterated.
|
|
2036
2039
|
// This ensures that we will always pick the entry with the oldest commit number.
|
|
2037
|
-
// We also
|
|
2040
|
+
// We also check that a client has only one entry in the hash map (or it's buggy).
|
|
2038
2041
|
const clients = self.client_table.count();
|
|
2039
2042
|
assert(clients <= config.clients_max);
|
|
2040
2043
|
if (clients == config.clients_max) {
|
|
@@ -2059,7 +2062,7 @@ pub fn Replica(
|
|
|
2059
2062
|
}
|
|
2060
2063
|
}
|
|
2061
2064
|
assert(iterated == clients);
|
|
2062
|
-
log.
|
|
2065
|
+
log.err("{}: create_client_table_entry: clients={}/{} evicting client={}", .{
|
|
2063
2066
|
self.replica,
|
|
2064
2067
|
clients,
|
|
2065
2068
|
config.clients_max,
|
|
@@ -2453,11 +2456,11 @@ pub fn Replica(
|
|
|
2453
2456
|
// Fall through below to check if we should resend the .register session reply.
|
|
2454
2457
|
} else if (entry.session > message.header.context) {
|
|
2455
2458
|
// The client must not reuse the ephemeral client ID when registering a new session.
|
|
2456
|
-
log.
|
|
2459
|
+
log.err("{}: on_request: ignoring older session (client bug)", .{self.replica});
|
|
2457
2460
|
return true;
|
|
2458
2461
|
} else if (entry.session < message.header.context) {
|
|
2459
2462
|
// This cannot be because of a partition since we check the client's view number.
|
|
2460
|
-
log.
|
|
2463
|
+
log.err("{}: on_request: ignoring newer session (client bug)", .{self.replica});
|
|
2461
2464
|
return true;
|
|
2462
2465
|
}
|
|
2463
2466
|
|
|
@@ -2472,7 +2475,7 @@ pub fn Replica(
|
|
|
2472
2475
|
self.message_bus.send_message_to_client(message.header.client, entry.reply);
|
|
2473
2476
|
return true;
|
|
2474
2477
|
} else {
|
|
2475
|
-
log.
|
|
2478
|
+
log.err("{}: on_request: request collision (client bug)", .{self.replica});
|
|
2476
2479
|
return true;
|
|
2477
2480
|
}
|
|
2478
2481
|
} else if (entry.reply.header.request + 1 == message.header.request) {
|
|
@@ -2482,11 +2485,11 @@ pub fn Replica(
|
|
|
2482
2485
|
return false;
|
|
2483
2486
|
} else {
|
|
2484
2487
|
// The client may have only one request inflight at a time.
|
|
2485
|
-
log.
|
|
2488
|
+
log.err("{}: on_request: ignoring new request (client bug)", .{self.replica});
|
|
2486
2489
|
return true;
|
|
2487
2490
|
}
|
|
2488
2491
|
} else {
|
|
2489
|
-
log.
|
|
2492
|
+
log.err("{}: on_request: ignoring newer request (client bug)", .{self.replica});
|
|
2490
2493
|
return true;
|
|
2491
2494
|
}
|
|
2492
2495
|
} else if (message.header.operation == .register) {
|
|
@@ -2573,7 +2576,7 @@ pub fn Replica(
|
|
|
2573
2576
|
log.debug("{}: on_request: ignoring (already preparing)", .{self.replica});
|
|
2574
2577
|
return true;
|
|
2575
2578
|
} else {
|
|
2576
|
-
log.
|
|
2579
|
+
log.err("{}: on_request: ignoring (client forked)", .{self.replica});
|
|
2577
2580
|
return true;
|
|
2578
2581
|
}
|
|
2579
2582
|
}
|
|
@@ -2682,7 +2685,7 @@ pub fn Replica(
|
|
|
2682
2685
|
assert(self.op + 1 == header.op);
|
|
2683
2686
|
}
|
|
2684
2687
|
|
|
2685
|
-
fn message_body_as_headers(
|
|
2688
|
+
fn message_body_as_headers(_: *Self, message: *const Message) []Header {
|
|
2686
2689
|
// TODO Assert message commands that we expect this to be called for.
|
|
2687
2690
|
assert(message.header.size > @sizeOf(Header)); // Body must contain at least one header.
|
|
2688
2691
|
return std.mem.bytesAsSlice(Header, message.buffer[@sizeOf(Header)..message.header.size]);
|
|
@@ -2701,8 +2704,8 @@ pub fn Replica(
|
|
|
2701
2704
|
if (a.view == b.view and a.op + 1 == b.op and a.checksum != b.parent) {
|
|
2702
2705
|
assert(a.valid_checksum());
|
|
2703
2706
|
assert(b.valid_checksum());
|
|
2704
|
-
log.
|
|
2705
|
-
log.
|
|
2707
|
+
log.err("{}: panic_if_hash_chain_would_break: a: {}", .{ self.replica, a });
|
|
2708
|
+
log.err("{}: panic_if_hash_chain_would_break: b: {}", .{ self.replica, b });
|
|
2706
2709
|
@panic("hash chain would break");
|
|
2707
2710
|
}
|
|
2708
2711
|
}
|
|
@@ -3071,7 +3074,7 @@ pub fn Replica(
|
|
|
3071
3074
|
while (op > 0) {
|
|
3072
3075
|
op -= 1;
|
|
3073
3076
|
if (self.journal.entry_for_op(op)) |neighbor| {
|
|
3074
|
-
if (
|
|
3077
|
+
if (Journal.next_offset(neighbor) > header.offset) return true;
|
|
3075
3078
|
break;
|
|
3076
3079
|
}
|
|
3077
3080
|
}
|
|
@@ -3081,7 +3084,7 @@ pub fn Replica(
|
|
|
3081
3084
|
var op: u64 = header.op + 1;
|
|
3082
3085
|
while (op <= self.op) : (op += 1) {
|
|
3083
3086
|
if (self.journal.entry_for_op(op)) |neighbor| {
|
|
3084
|
-
if (
|
|
3087
|
+
if (Journal.next_offset(header) > neighbor.offset) return true;
|
|
3085
3088
|
break;
|
|
3086
3089
|
}
|
|
3087
3090
|
}
|
|
@@ -3581,7 +3584,7 @@ pub fn Replica(
|
|
|
3581
3584
|
assert(count_start_view_change >= self.quorum_view_change - 1);
|
|
3582
3585
|
|
|
3583
3586
|
const message = self.create_view_change_message(.do_view_change) orelse {
|
|
3584
|
-
log.
|
|
3587
|
+
log.err("{}: send_do_view_change: waiting for message", .{self.replica});
|
|
3585
3588
|
return;
|
|
3586
3589
|
};
|
|
3587
3590
|
defer self.message_bus.unref(message);
|
|
@@ -3600,7 +3603,7 @@ pub fn Replica(
|
|
|
3600
3603
|
assert(self.status == .normal);
|
|
3601
3604
|
assert(self.leader());
|
|
3602
3605
|
|
|
3603
|
-
log.
|
|
3606
|
+
log.err("{}: too many sessions, sending eviction message to client={}", .{
|
|
3604
3607
|
self.replica,
|
|
3605
3608
|
client,
|
|
3606
3609
|
});
|
|
@@ -3616,7 +3619,7 @@ pub fn Replica(
|
|
|
3616
3619
|
|
|
3617
3620
|
fn send_header_to_client(self: *Self, client: u128, header: Header) void {
|
|
3618
3621
|
const message = self.create_message_from_header(header) orelse {
|
|
3619
|
-
log.
|
|
3622
|
+
log.err("{}: no header-only message available, dropping message to client {}", .{
|
|
3620
3623
|
self.replica,
|
|
3621
3624
|
client,
|
|
3622
3625
|
});
|
|
@@ -3629,7 +3632,7 @@ pub fn Replica(
|
|
|
3629
3632
|
|
|
3630
3633
|
fn send_header_to_other_replicas(self: *Self, header: Header) void {
|
|
3631
3634
|
const message = self.create_message_from_header(header) orelse {
|
|
3632
|
-
log.
|
|
3635
|
+
log.err("{}: no header-only message available, dropping message to replicas", .{
|
|
3633
3636
|
self.replica,
|
|
3634
3637
|
});
|
|
3635
3638
|
return;
|
|
@@ -3646,7 +3649,7 @@ pub fn Replica(
|
|
|
3646
3649
|
|
|
3647
3650
|
fn send_header_to_replica(self: *Self, replica: u8, header: Header) void {
|
|
3648
3651
|
const message = self.create_message_from_header(header) orelse {
|
|
3649
|
-
log.
|
|
3652
|
+
log.err("{}: no header-only message available, dropping message to replica {}", .{
|
|
3650
3653
|
self.replica,
|
|
3651
3654
|
replica,
|
|
3652
3655
|
});
|
|
@@ -3675,7 +3678,7 @@ pub fn Replica(
|
|
|
3675
3678
|
});
|
|
3676
3679
|
|
|
3677
3680
|
if (message.header.invalid()) |reason| {
|
|
3678
|
-
log.
|
|
3681
|
+
log.err("{}: send_message_to_replica: invalid ({s})", .{ self.replica, reason });
|
|
3679
3682
|
@panic("send_message_to_replica: invalid message");
|
|
3680
3683
|
}
|
|
3681
3684
|
|
|
@@ -3764,7 +3767,7 @@ pub fn Replica(
|
|
|
3764
3767
|
assert(replica == self.leader_index(self.view));
|
|
3765
3768
|
},
|
|
3766
3769
|
else => {
|
|
3767
|
-
log.
|
|
3770
|
+
log.info("{}: send_message_to_replica: TODO {s}", .{
|
|
3768
3771
|
self.replica,
|
|
3769
3772
|
@tagName(message.header.command),
|
|
3770
3773
|
});
|
|
@@ -3781,7 +3784,7 @@ pub fn Replica(
|
|
|
3781
3784
|
|
|
3782
3785
|
/// Finds the header with the highest op number in a slice of headers from a replica.
|
|
3783
3786
|
/// Searches only by op number to find the highest `self.op for the replica.
|
|
3784
|
-
fn set_latest_op(
|
|
3787
|
+
fn set_latest_op(headers: []Header, latest: *Header) void {
|
|
3785
3788
|
switch (latest.command) {
|
|
3786
3789
|
.reserved, .prepare => assert(latest.valid_checksum()),
|
|
3787
3790
|
else => unreachable,
|
|
@@ -3910,7 +3913,7 @@ pub fn Replica(
|
|
|
3910
3913
|
assert(self.nack_prepare_op == null);
|
|
3911
3914
|
|
|
3912
3915
|
const start_view = self.create_view_change_message(.start_view) orelse {
|
|
3913
|
-
log.
|
|
3916
|
+
log.err("{}: start_view_as_the_new_leader: waiting for message", .{self.replica});
|
|
3914
3917
|
return;
|
|
3915
3918
|
};
|
|
3916
3919
|
defer self.message_bus.unref(start_view);
|
|
@@ -4112,7 +4115,7 @@ pub fn Replica(
|
|
|
4112
4115
|
if (self.journal.entry_for_op_exact(op)) |a| {
|
|
4113
4116
|
assert(a.op + 1 == b.op);
|
|
4114
4117
|
if (a.checksum == b.parent) {
|
|
4115
|
-
assert(
|
|
4118
|
+
assert(ascending_viewstamps(a, b));
|
|
4116
4119
|
b = a;
|
|
4117
4120
|
} else {
|
|
4118
4121
|
log.debug("{}: valid_hash_chain_between: break: A: {}", .{ self.replica, a });
|