tigerbeetle-node 0.11.11 → 0.11.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/.client.node.sha256 +1 -1
- package/package.json +4 -7
- package/scripts/build_lib.sh +22 -2
- package/src/tigerbeetle/scripts/benchmark.bat +0 -3
- package/src/tigerbeetle/scripts/benchmark.sh +0 -3
- package/src/tigerbeetle/scripts/scripts/benchmark.bat +48 -0
- package/src/tigerbeetle/scripts/scripts/benchmark.sh +66 -0
- package/src/tigerbeetle/scripts/scripts/confirm_image.sh +44 -0
- package/src/tigerbeetle/scripts/scripts/fuzz_loop.sh +15 -0
- package/src/tigerbeetle/scripts/scripts/fuzz_unique_errors.sh +7 -0
- package/src/tigerbeetle/scripts/scripts/install.bat +7 -0
- package/src/tigerbeetle/scripts/scripts/install.sh +21 -0
- package/src/tigerbeetle/scripts/scripts/install_zig.bat +113 -0
- package/src/tigerbeetle/scripts/scripts/install_zig.sh +90 -0
- package/src/tigerbeetle/scripts/scripts/lint.zig +199 -0
- package/src/tigerbeetle/scripts/scripts/pre-commit.sh +9 -0
- package/src/tigerbeetle/scripts/scripts/shellcheck.sh +5 -0
- package/src/tigerbeetle/scripts/scripts/tests_on_alpine.sh +10 -0
- package/src/tigerbeetle/scripts/scripts/tests_on_ubuntu.sh +14 -0
- package/src/tigerbeetle/scripts/scripts/upgrade_ubuntu_kernel.sh +48 -0
- package/src/tigerbeetle/scripts/scripts/validate_docs.sh +23 -0
- package/src/tigerbeetle/scripts/scripts/vr_state_enumerate +46 -0
- package/src/tigerbeetle/src/message_bus.zig +1 -1
- package/src/tigerbeetle/src/stdx.zig +23 -0
- package/src/tigerbeetle/src/testing/cluster/network.zig +6 -7
- package/src/tigerbeetle/src/testing/cluster.zig +5 -5
- package/src/tigerbeetle/src/testing/packet_simulator.zig +0 -1
- package/src/tigerbeetle/src/vsr/README.md +209 -0
- package/src/tigerbeetle/src/vsr/replica.zig +195 -362
- package/src/tigerbeetle/src/vsr/superblock.zig +14 -17
- package/src/tigerbeetle/src/vsr/superblock_fuzz.zig +5 -5
- package/src/tigerbeetle/src/vsr.zig +274 -14
|
@@ -285,8 +285,8 @@ pub const SuperBlockHeader = extern struct {
|
|
|
285
285
|
return true;
|
|
286
286
|
}
|
|
287
287
|
|
|
288
|
-
pub fn vsr_headers(superblock: *const SuperBlockHeader) vsr.
|
|
289
|
-
return vsr.
|
|
288
|
+
pub fn vsr_headers(superblock: *const SuperBlockHeader) vsr.Headers.ViewChangeSlice {
|
|
289
|
+
return vsr.Headers.ViewChangeSlice.init(
|
|
290
290
|
superblock.vsr_headers_all[0..superblock.vsr_headers_count],
|
|
291
291
|
);
|
|
292
292
|
}
|
|
@@ -441,7 +441,7 @@ pub fn SuperBlockType(comptime Storage: type) type {
|
|
|
441
441
|
/// Used by format(), checkpoint(), and view_change().
|
|
442
442
|
vsr_state: ?SuperBlockHeader.VSRState = null,
|
|
443
443
|
/// Used by format() and view_change().
|
|
444
|
-
vsr_headers: ?vsr.
|
|
444
|
+
vsr_headers: ?vsr.Headers.ViewChangeArray = null,
|
|
445
445
|
repairs: ?Quorums.RepairIterator = null, // Used by open().
|
|
446
446
|
};
|
|
447
447
|
|
|
@@ -657,15 +657,12 @@ pub fn SuperBlockType(comptime Storage: type) type {
|
|
|
657
657
|
|
|
658
658
|
superblock.working.set_checksum();
|
|
659
659
|
|
|
660
|
-
var vsr_headers = vsr.ViewChangeHeaders.BoundedArray{ .buffer = undefined };
|
|
661
|
-
vsr_headers.appendAssumeCapacity(vsr.Header.root_prepare(options.cluster));
|
|
662
|
-
|
|
663
660
|
context.* = .{
|
|
664
661
|
.superblock = superblock,
|
|
665
662
|
.callback = callback,
|
|
666
663
|
.caller = .format,
|
|
667
664
|
.vsr_state = SuperBlockHeader.VSRState.root(options.cluster),
|
|
668
|
-
.vsr_headers =
|
|
665
|
+
.vsr_headers = vsr.Headers.ViewChangeArray.root(options.cluster),
|
|
669
666
|
};
|
|
670
667
|
|
|
671
668
|
// TODO At a higher layer, we must:
|
|
@@ -733,7 +730,7 @@ pub fn SuperBlockType(comptime Storage: type) type {
|
|
|
733
730
|
commit_max: u64,
|
|
734
731
|
log_view: u32,
|
|
735
732
|
view: u32,
|
|
736
|
-
headers: vsr.
|
|
733
|
+
headers: *const vsr.Headers.ViewChangeArray,
|
|
737
734
|
};
|
|
738
735
|
|
|
739
736
|
/// The replica calls view_change() to persist its view/log_view — it cannot
|
|
@@ -747,14 +744,14 @@ pub fn SuperBlockType(comptime Storage: type) type {
|
|
|
747
744
|
update: UpdateViewChange,
|
|
748
745
|
) void {
|
|
749
746
|
assert(superblock.opened);
|
|
750
|
-
assert(superblock.staging.vsr_state.commit_min <= update.headers.get(0).op);
|
|
747
|
+
assert(superblock.staging.vsr_state.commit_min <= update.headers.array.get(0).op);
|
|
751
748
|
assert(superblock.staging.vsr_state.commit_max <= update.commit_max);
|
|
752
749
|
assert(superblock.staging.vsr_state.view <= update.view);
|
|
753
750
|
assert(superblock.staging.vsr_state.log_view <= update.log_view);
|
|
754
751
|
assert(superblock.staging.vsr_state.log_view < update.log_view or
|
|
755
752
|
superblock.staging.vsr_state.view < update.view);
|
|
756
753
|
|
|
757
|
-
vsr.
|
|
754
|
+
vsr.Headers.ViewChangeSlice.verify(update.headers.array.constSlice());
|
|
758
755
|
assert(update.view >= update.log_view);
|
|
759
756
|
|
|
760
757
|
const vsr_state = SuperBlockHeader.VSRState{
|
|
@@ -779,7 +776,7 @@ pub fn SuperBlockType(comptime Storage: type) type {
|
|
|
779
776
|
update.view,
|
|
780
777
|
|
|
781
778
|
superblock.staging.vsr_headers().slice[0].checksum,
|
|
782
|
-
update.headers.get(0).checksum,
|
|
779
|
+
update.headers.array.get(0).checksum,
|
|
783
780
|
});
|
|
784
781
|
|
|
785
782
|
context.* = .{
|
|
@@ -787,7 +784,7 @@ pub fn SuperBlockType(comptime Storage: type) type {
|
|
|
787
784
|
.callback = callback,
|
|
788
785
|
.caller = .view_change,
|
|
789
786
|
.vsr_state = vsr_state,
|
|
790
|
-
.vsr_headers = update.headers
|
|
787
|
+
.vsr_headers = update.headers.*,
|
|
791
788
|
};
|
|
792
789
|
|
|
793
790
|
superblock.acquire(context);
|
|
@@ -823,19 +820,19 @@ pub fn SuperBlockType(comptime Storage: type) type {
|
|
|
823
820
|
superblock.staging.parent = superblock.staging.checksum;
|
|
824
821
|
superblock.staging.vsr_state = context.vsr_state.?;
|
|
825
822
|
|
|
826
|
-
if (context.vsr_headers)
|
|
823
|
+
if (context.vsr_headers) |*headers| {
|
|
827
824
|
assert(context.caller == .format or context.caller == .view_change);
|
|
828
825
|
|
|
829
|
-
superblock.staging.vsr_headers_count = @intCast(u32, headers.len);
|
|
826
|
+
superblock.staging.vsr_headers_count = @intCast(u32, headers.array.len);
|
|
830
827
|
stdx.copy_disjoint(
|
|
831
828
|
.exact,
|
|
832
829
|
vsr.Header,
|
|
833
|
-
superblock.staging.vsr_headers_all[0..headers.len],
|
|
834
|
-
headers.constSlice(),
|
|
830
|
+
superblock.staging.vsr_headers_all[0..headers.array.len],
|
|
831
|
+
headers.array.constSlice(),
|
|
835
832
|
);
|
|
836
833
|
std.mem.set(
|
|
837
834
|
vsr.Header,
|
|
838
|
-
superblock.staging.vsr_headers_all[headers.len..],
|
|
835
|
+
superblock.staging.vsr_headers_all[headers.array.len..],
|
|
839
836
|
std.mem.zeroes(vsr.Header),
|
|
840
837
|
);
|
|
841
838
|
} else {
|
|
@@ -151,7 +151,7 @@ const Environment = struct {
|
|
|
151
151
|
/// Indexed by sequence.
|
|
152
152
|
const SequenceStates = std.ArrayList(struct {
|
|
153
153
|
vsr_state: VSRState,
|
|
154
|
-
vsr_headers: vsr.
|
|
154
|
+
vsr_headers: vsr.Headers.Array,
|
|
155
155
|
/// Track the expected `checksum(free_set)`.
|
|
156
156
|
/// Note that this is a checksum of the decoded free set; it is not the same as
|
|
157
157
|
/// `SuperBlockHeader.free_set_checksum`.
|
|
@@ -269,7 +269,7 @@ const Environment = struct {
|
|
|
269
269
|
.replica = 0,
|
|
270
270
|
});
|
|
271
271
|
|
|
272
|
-
var vsr_headers = vsr.
|
|
272
|
+
var vsr_headers = vsr.Headers.Array{ .buffer = undefined };
|
|
273
273
|
vsr_headers.appendAssumeCapacity(vsr.Header.root_prepare(cluster));
|
|
274
274
|
|
|
275
275
|
assert(env.sequence_states.items.len == 0);
|
|
@@ -315,7 +315,7 @@ const Environment = struct {
|
|
|
315
315
|
.view = env.superblock.staging.vsr_state.view + 5,
|
|
316
316
|
};
|
|
317
317
|
|
|
318
|
-
var vsr_headers = vsr.
|
|
318
|
+
var vsr_headers = vsr.Headers.Array{ .buffer = undefined };
|
|
319
319
|
var vsr_head = std.mem.zeroInit(vsr.Header, .{
|
|
320
320
|
.command = .prepare,
|
|
321
321
|
.op = env.superblock.staging.vsr_state.commit_min,
|
|
@@ -336,7 +336,7 @@ const Environment = struct {
|
|
|
336
336
|
.commit_max = vsr_state.commit_max,
|
|
337
337
|
.log_view = vsr_state.log_view,
|
|
338
338
|
.view = vsr_state.view,
|
|
339
|
-
.headers = vsr_headers,
|
|
339
|
+
.headers = &.{ .array = vsr_headers },
|
|
340
340
|
});
|
|
341
341
|
}
|
|
342
342
|
|
|
@@ -361,7 +361,7 @@ const Environment = struct {
|
|
|
361
361
|
assert(env.sequence_states.items.len == env.superblock.staging.sequence + 1);
|
|
362
362
|
try env.sequence_states.append(.{
|
|
363
363
|
.vsr_state = vsr_state,
|
|
364
|
-
.vsr_headers = vsr.
|
|
364
|
+
.vsr_headers = vsr.Headers.Array.fromSlice(
|
|
365
365
|
env.superblock.staging.vsr_headers().slice,
|
|
366
366
|
) catch unreachable,
|
|
367
367
|
.free_set = checksum_free_set(env.superblock),
|
|
@@ -352,6 +352,11 @@ pub const Header = extern struct {
|
|
|
352
352
|
if (self.request != 0) return "request != 0";
|
|
353
353
|
if (self.commit != 0) return "commit != 0";
|
|
354
354
|
if (self.timestamp != 0) return "timestamp != 0";
|
|
355
|
+
if (self.view != 0) return "view != 0";
|
|
356
|
+
if (self.client != 0) {
|
|
357
|
+
if (self.replica != 0) return "replica != 0";
|
|
358
|
+
if (self.op != 0) return "op != 0";
|
|
359
|
+
}
|
|
355
360
|
if (self.operation != .reserved) return "operation != .reserved";
|
|
356
361
|
return null;
|
|
357
362
|
}
|
|
@@ -363,6 +368,9 @@ pub const Header = extern struct {
|
|
|
363
368
|
if (self.context != 0) return "context != 0";
|
|
364
369
|
if (self.request != 0) return "request != 0";
|
|
365
370
|
if (self.commit != 0) return "commit != 0";
|
|
371
|
+
if (self.timestamp > 0) {
|
|
372
|
+
if (self.view != 0) return "view != 0";
|
|
373
|
+
}
|
|
366
374
|
if (self.operation != .reserved) return "operation != .reserved";
|
|
367
375
|
return null;
|
|
368
376
|
}
|
|
@@ -999,26 +1007,30 @@ pub fn quorums(replica_count: u8) struct {
|
|
|
999
1007
|
};
|
|
1000
1008
|
}
|
|
1001
1009
|
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
///
|
|
1005
|
-
///
|
|
1006
|
-
///
|
|
1007
|
-
|
|
1010
|
+
pub const Headers = struct {
|
|
1011
|
+
pub const Array = std.BoundedArray(Header, constants.view_change_headers_max);
|
|
1012
|
+
/// The SuperBlock's persisted VSR headers.
|
|
1013
|
+
/// One of the following:
|
|
1014
|
+
///
|
|
1015
|
+
/// - SV headers (consecutive chain)
|
|
1016
|
+
/// - DVC headers (disjoint chain)
|
|
1017
|
+
pub const ViewChangeSlice = ViewChangeHeadersSlice;
|
|
1018
|
+
pub const ViewChangeArray = ViewChangeHeadersArray;
|
|
1019
|
+
};
|
|
1020
|
+
|
|
1021
|
+
const ViewChangeHeadersSlice = struct {
|
|
1008
1022
|
/// Headers are ordered from high-to-low op.
|
|
1009
1023
|
slice: []const Header,
|
|
1010
1024
|
|
|
1011
|
-
pub
|
|
1012
|
-
|
|
1013
|
-
pub fn init(slice: []const Header) ViewChangeHeaders {
|
|
1014
|
-
ViewChangeHeaders.verify(slice);
|
|
1025
|
+
pub fn init(slice: []const Header) ViewChangeHeadersSlice {
|
|
1026
|
+
ViewChangeHeadersSlice.verify(slice);
|
|
1015
1027
|
|
|
1016
1028
|
return .{ .slice = slice };
|
|
1017
1029
|
}
|
|
1018
1030
|
|
|
1019
1031
|
pub fn verify(slice: []const Header) void {
|
|
1020
1032
|
assert(slice.len > 0);
|
|
1021
|
-
assert(slice.len <= constants.
|
|
1033
|
+
assert(slice.len <= constants.view_change_headers_max);
|
|
1022
1034
|
|
|
1023
1035
|
var child: ?*const Header = null;
|
|
1024
1036
|
for (slice) |*header| {
|
|
@@ -1053,7 +1065,7 @@ pub const ViewChangeHeaders = struct {
|
|
|
1053
1065
|
/// - When these are SV headers for a log_view=V, we can continue to add to them (by preparing
|
|
1054
1066
|
/// more ops), but those ops will laways be part of the log_view. If they were prepared during
|
|
1055
1067
|
/// a view prior to the log_view, they would already be part of the headers.
|
|
1056
|
-
pub fn view_for_op(headers:
|
|
1068
|
+
pub fn view_for_op(headers: ViewChangeHeadersSlice, op: u64, log_view: u32) ViewRange {
|
|
1057
1069
|
const header_newest = &headers.slice[0];
|
|
1058
1070
|
const header_oldest = &headers.slice[headers.slice.len - 1];
|
|
1059
1071
|
|
|
@@ -1074,13 +1086,13 @@ pub const ViewChangeHeaders = struct {
|
|
|
1074
1086
|
}
|
|
1075
1087
|
};
|
|
1076
1088
|
|
|
1077
|
-
test "
|
|
1089
|
+
test "Headers.ViewChangeSlice.view_for_op" {
|
|
1078
1090
|
var headers_array = [_]Header{
|
|
1079
1091
|
std.mem.zeroInit(Header, .{ .op = 9, .view = 10 }),
|
|
1080
1092
|
std.mem.zeroInit(Header, .{ .op = 6, .view = 7 }),
|
|
1081
1093
|
};
|
|
1082
1094
|
|
|
1083
|
-
const headers =
|
|
1095
|
+
const headers = Headers.ViewChangeSlice{ .slice = &headers_array };
|
|
1084
1096
|
try std.testing.expect(std.meta.eql(headers.view_for_op(11, 12), .{ .min = 12, .max = 12 }));
|
|
1085
1097
|
try std.testing.expect(std.meta.eql(headers.view_for_op(10, 12), .{ .min = 12, .max = 12 }));
|
|
1086
1098
|
try std.testing.expect(std.meta.eql(headers.view_for_op(9, 12), .{ .min = 10, .max = 10 }));
|
|
@@ -1090,3 +1102,251 @@ test "ViewChangeHeaders.view_for_op" {
|
|
|
1090
1102
|
try std.testing.expect(std.meta.eql(headers.view_for_op(5, 12), .{ .min = 0, .max = 7 }));
|
|
1091
1103
|
try std.testing.expect(std.meta.eql(headers.view_for_op(0, 12), .{ .min = 0, .max = 7 }));
|
|
1092
1104
|
}
|
|
1105
|
+
|
|
1106
|
+
/// The headers of a SV or DVC message.
|
|
1107
|
+
const ViewChangeHeadersArray = struct {
|
|
1108
|
+
array: Headers.Array,
|
|
1109
|
+
|
|
1110
|
+
pub fn root(cluster: u32) ViewChangeHeadersArray {
|
|
1111
|
+
var array = Headers.Array{ .buffer = undefined };
|
|
1112
|
+
array.appendAssumeCapacity(Header.root_prepare(cluster));
|
|
1113
|
+
return ViewChangeHeadersArray.init(array);
|
|
1114
|
+
}
|
|
1115
|
+
|
|
1116
|
+
fn init(array: Headers.Array) ViewChangeHeadersArray {
|
|
1117
|
+
Headers.ViewChangeSlice.verify(array.constSlice());
|
|
1118
|
+
return .{ .array = array };
|
|
1119
|
+
}
|
|
1120
|
+
|
|
1121
|
+
/// This function generates either DVC headers or SV headers:
|
|
1122
|
+
/// - When `current.log_view < current.view`, generate headers for a SV message.
|
|
1123
|
+
/// - When `current.log_view = current.view`, generate headers for a DVC message.
|
|
1124
|
+
///
|
|
1125
|
+
/// Additionally, the current log_view/view/primary state informs the sort of "faults"
|
|
1126
|
+
/// (gaps/breaks/etc) that we expect to find in the journal headers (`current.headers`).
|
|
1127
|
+
/// For example, backups generating a DVC can safely skip over gaps (if the gap is after the DVC
|
|
1128
|
+
/// anchor).
|
|
1129
|
+
///
|
|
1130
|
+
/// Primaries and backups both generate DVCs and SVs.
|
|
1131
|
+
/// - However, SVs are broadcast only by the primary.
|
|
1132
|
+
/// - Backups generate a SV for persisting to the superblock.
|
|
1133
|
+
/// (For convenience/symmetry, not correctness).
|
|
1134
|
+
///
|
|
1135
|
+
/// DVCs and SVs have different invariants they must abide.
|
|
1136
|
+
/// - Read DVCQuorum's comments to understand DVC invariants.
|
|
1137
|
+
/// - SV headers are much simpler: no gaps or breaks, and all uncommitted ops must be included.
|
|
1138
|
+
pub fn build(
|
|
1139
|
+
results: *ViewChangeHeadersArray,
|
|
1140
|
+
options: struct {
|
|
1141
|
+
op_checkpoint: u64,
|
|
1142
|
+
/// The last view_change_headers_max headers of the journal, starting with the head op
|
|
1143
|
+
/// then descending, skipping over all gaps.
|
|
1144
|
+
current: struct {
|
|
1145
|
+
headers: *const Headers.Array,
|
|
1146
|
+
view: u32,
|
|
1147
|
+
log_view: u32,
|
|
1148
|
+
log_view_primary: bool,
|
|
1149
|
+
},
|
|
1150
|
+
// The vsr_headers from the working superblock.
|
|
1151
|
+
// The durable headers are useful (complimenting `current.headers`) because:
|
|
1152
|
+
// - They simplify generation of DVCs in the case where we are recovering from a crash,
|
|
1153
|
+
// when we were generating the same DVC prior to the crash.
|
|
1154
|
+
// - They enable additional verification of header gaps/breaks based on the
|
|
1155
|
+
// gap's/break's position relative to the durable headers.
|
|
1156
|
+
durable: struct {
|
|
1157
|
+
headers: Headers.ViewChangeSlice,
|
|
1158
|
+
view: u32,
|
|
1159
|
+
log_view: u32,
|
|
1160
|
+
log_view_primary: bool,
|
|
1161
|
+
},
|
|
1162
|
+
},
|
|
1163
|
+
) void {
|
|
1164
|
+
defer Headers.ViewChangeSlice.verify(results.array.constSlice());
|
|
1165
|
+
|
|
1166
|
+
const headers = &results.array;
|
|
1167
|
+
const current = options.current;
|
|
1168
|
+
const durable = options.durable;
|
|
1169
|
+
|
|
1170
|
+
assert(headers.len == 0);
|
|
1171
|
+
assert(durable.headers.slice.len > 0);
|
|
1172
|
+
assert(current.headers.len > 0);
|
|
1173
|
+
for (current.headers.constSlice()[1..]) |*header, i| {
|
|
1174
|
+
assert(current.headers.get(i).op > header.op);
|
|
1175
|
+
}
|
|
1176
|
+
|
|
1177
|
+
assert(current.view >= durable.view);
|
|
1178
|
+
assert(current.log_view >= durable.log_view);
|
|
1179
|
+
assert(current.view >= current.log_view);
|
|
1180
|
+
assert(durable.view >= durable.log_view);
|
|
1181
|
+
|
|
1182
|
+
const op_head_current = current.headers.get(0).op;
|
|
1183
|
+
const op_head_durable = durable.headers.slice[0].op;
|
|
1184
|
+
|
|
1185
|
+
// The rules for generating DVCs and SVs differ. We use the current view numbers to
|
|
1186
|
+
// determine which is being generated:
|
|
1187
|
+
// - When `log_view < view`, generate a DVC.
|
|
1188
|
+
// - When `log_view = view`, generate a SV.
|
|
1189
|
+
const command_current: enum { start_view, do_view_change } =
|
|
1190
|
+
if (current.log_view == current.view) .start_view else .do_view_change;
|
|
1191
|
+
// Likewise, the durable view numbers identify whether the durable headers were from a past
|
|
1192
|
+
// DVC or SV. The durable headers are only useful if they are from the same view as our
|
|
1193
|
+
// current headers, though.
|
|
1194
|
+
const command_durable: enum { start_view, do_view_change, outdated } = command: {
|
|
1195
|
+
if (durable.log_view == current.log_view) {
|
|
1196
|
+
if (durable.log_view == durable.view) {
|
|
1197
|
+
break :command .start_view;
|
|
1198
|
+
} else {
|
|
1199
|
+
break :command .do_view_change;
|
|
1200
|
+
}
|
|
1201
|
+
} else {
|
|
1202
|
+
break :command .outdated;
|
|
1203
|
+
}
|
|
1204
|
+
};
|
|
1205
|
+
|
|
1206
|
+
if (command_durable == .do_view_change and command_current == .do_view_change) {
|
|
1207
|
+
assert(op_head_durable == op_head_current);
|
|
1208
|
+
// Ensure that if we started a DVC before a crash, that we will resume sending the exact
|
|
1209
|
+
// same DVC after recovery. (An alternative implementation would be to load the
|
|
1210
|
+
// superblock's DVC headers (including gaps) into the journal during Replica.open(), but
|
|
1211
|
+
// that is more complicated to implement correctly).
|
|
1212
|
+
for (durable.headers.slice) |*header| headers.appendAssumeCapacity(header.*);
|
|
1213
|
+
return;
|
|
1214
|
+
}
|
|
1215
|
+
|
|
1216
|
+
// What is the relationship between two prepares?
|
|
1217
|
+
const Chain = enum {
|
|
1218
|
+
// The ops are sequential, and the hash-chain is valid.
|
|
1219
|
+
chain_sequence,
|
|
1220
|
+
// The ops are sequential, and the hash-chain is invalid.
|
|
1221
|
+
chain_break,
|
|
1222
|
+
// The ops are non-sequential, and belong to the same view.
|
|
1223
|
+
// This gap never hides a break.
|
|
1224
|
+
chain_view,
|
|
1225
|
+
// The ops are non-sequential, and belong to the different views.
|
|
1226
|
+
// Depending on the replica state, this gap may hide a break.
|
|
1227
|
+
chain_gap,
|
|
1228
|
+
};
|
|
1229
|
+
|
|
1230
|
+
// The DVC anchor: Within the log suffix following the anchor, we have additional
|
|
1231
|
+
// guarantees about the state of the log headers which allow us to tolerate certain
|
|
1232
|
+
// gaps (by locally guaranteeing that the gap does not hide a break).
|
|
1233
|
+
const op_dvc_anchor = std.math.max(
|
|
1234
|
+
options.op_checkpoint,
|
|
1235
|
+
// +1: We may have a full pipeline, but not yet have performed any repair.
|
|
1236
|
+
// In such a case, we want to send those pipeline_prepare_queue_max headers in
|
|
1237
|
+
// the DVC, but not the preceding op (which may belong to a different chain).
|
|
1238
|
+
// This satisfies the DVC invariant because the first op in the pipeline is
|
|
1239
|
+
// "connected" to the canonical chain (via its "parent" checksum).
|
|
1240
|
+
1 + op_head_current -| constants.pipeline_prepare_queue_max,
|
|
1241
|
+
);
|
|
1242
|
+
|
|
1243
|
+
// Within the "suffix" we can make additional assumptions about gaps/etc.
|
|
1244
|
+
// After the suffix, we just add as many extra (valid) headers as we can fit.
|
|
1245
|
+
var suffix_done = false;
|
|
1246
|
+
|
|
1247
|
+
for (current.headers.constSlice()) |*header, i| {
|
|
1248
|
+
const op = header.op;
|
|
1249
|
+
const chain = chain: {
|
|
1250
|
+
// Always include the head message.
|
|
1251
|
+
if (i == 0) break :chain Chain.chain_sequence;
|
|
1252
|
+
|
|
1253
|
+
const child = headers.get(i - 1);
|
|
1254
|
+
if (child.op == header.op + 1) {
|
|
1255
|
+
break :chain if (child.parent == header.checksum) Chain.chain_sequence else Chain.chain_break;
|
|
1256
|
+
} else {
|
|
1257
|
+
break :chain if (child.view == header.view) Chain.chain_view else Chain.chain_gap;
|
|
1258
|
+
}
|
|
1259
|
+
};
|
|
1260
|
+
|
|
1261
|
+
if (command_current == .start_view) {
|
|
1262
|
+
// Primary: Collect headers for a start_view message.
|
|
1263
|
+
// Backup: these headers are stored in the superblock's vsr_headers.
|
|
1264
|
+
switch (chain) {
|
|
1265
|
+
.chain_sequence => {},
|
|
1266
|
+
// Gaps are due to either:
|
|
1267
|
+
// - entries before checkpoint, which are not repaired, or
|
|
1268
|
+
// - backup missed prepares and has not repaired headers. (Immediately after
|
|
1269
|
+
// receiving a start_view this is not a concern, but the view_durable_update()
|
|
1270
|
+
// may be delayed if another is in progress).
|
|
1271
|
+
.chain_view, .chain_gap => {
|
|
1272
|
+
assert(op <= options.op_checkpoint or !current.log_view_primary);
|
|
1273
|
+
break;
|
|
1274
|
+
},
|
|
1275
|
+
// Breaks are due to:
|
|
1276
|
+
// - entries before checkpoint, which are not repaired
|
|
1277
|
+
.chain_break => {
|
|
1278
|
+
assert(op <= options.op_checkpoint);
|
|
1279
|
+
break;
|
|
1280
|
+
},
|
|
1281
|
+
}
|
|
1282
|
+
} else if (suffix_done) {
|
|
1283
|
+
// Add extra headers to the DVC. These are not required for correctness or
|
|
1284
|
+
// availability, but including extra (correct) headers minimizes header repair at
|
|
1285
|
+
// the new primary.
|
|
1286
|
+
switch (chain) {
|
|
1287
|
+
.chain_sequence => {},
|
|
1288
|
+
.chain_view => {},
|
|
1289
|
+
// Outside of the log suffix, repair may not have been finished, so gaps and
|
|
1290
|
+
// breaks are possible. Non-same-view gaps may hide breaks.
|
|
1291
|
+
.chain_gap => break,
|
|
1292
|
+
.chain_break => break,
|
|
1293
|
+
}
|
|
1294
|
+
} else if (current.log_view_primary and command_durable == .start_view) {
|
|
1295
|
+
switch (chain) {
|
|
1296
|
+
.chain_sequence => {},
|
|
1297
|
+
// Gaps to the right of the (durable) SV originate from:
|
|
1298
|
+
// 1. The primary (durable SV: 1,2,3) prepares several ops (4,5,6).
|
|
1299
|
+
// 2. However, the WAL writes are reordered such that some later ops (5,6)
|
|
1300
|
+
// finish before an earlier op (4).
|
|
1301
|
+
// 3. Crash, recover. Start sending a DVC for the next view. Either:
|
|
1302
|
+
// - There is a gap in the WAL at op=4, but this is to the right of the
|
|
1303
|
+
// durable SV, so it may be safely skipped.
|
|
1304
|
+
// - Same as above, except op=4 was a torn write (or bit rot).
|
|
1305
|
+
.chain_view, .chain_gap => assert(op + 1 > op_head_durable),
|
|
1306
|
+
// Breaks are impossible to the right of the durable SV — journal recovery uses
|
|
1307
|
+
// the durable SV to prune bad headers by their view numbers.
|
|
1308
|
+
.chain_break => unreachable,
|
|
1309
|
+
}
|
|
1310
|
+
suffix_done = op <= op_head_durable;
|
|
1311
|
+
} else if (current.log_view_primary and command_durable != .start_view) {
|
|
1312
|
+
switch (chain) {
|
|
1313
|
+
.chain_sequence => {},
|
|
1314
|
+
.chain_view => {},
|
|
1315
|
+
// The retiring primary may have gap-breaks or breaks in its suffix iff:
|
|
1316
|
+
// - it didn't finish repairs before the second view-change, and
|
|
1317
|
+
// - some uncommitted ops were truncated during the first view-change.
|
|
1318
|
+
// (Truncation "moves" the suffix backwards).
|
|
1319
|
+
.chain_gap => break,
|
|
1320
|
+
.chain_break => break,
|
|
1321
|
+
}
|
|
1322
|
+
suffix_done = op <= op_dvc_anchor;
|
|
1323
|
+
} else if (!current.log_view_primary and command_durable == .start_view) {
|
|
1324
|
+
switch (chain) {
|
|
1325
|
+
.chain_sequence => {},
|
|
1326
|
+
// Backups load a full suffix of headers from the view's SV message. If there
|
|
1327
|
+
// is now a gap in it the bcakup's suffix, this must be due to missed prepares.
|
|
1328
|
+
.chain_view, .chain_gap => assert(op + 1 > op_head_durable),
|
|
1329
|
+
// Breaks are impossible to the right of the durable SV — journal recovery uses
|
|
1330
|
+
// the durable SV to prune bad headers by their view numbers.
|
|
1331
|
+
.chain_break => unreachable,
|
|
1332
|
+
}
|
|
1333
|
+
suffix_done = op <= op_head_durable;
|
|
1334
|
+
} else if (!current.log_view_primary and command_durable != .start_view) {
|
|
1335
|
+
switch (chain) {
|
|
1336
|
+
.chain_sequence => {},
|
|
1337
|
+
.chain_view => {},
|
|
1338
|
+
// Backups load a full suffix of headers from the view's SV message.
|
|
1339
|
+
// That SV isn't durable, but it is part of the journal, so any gaps to its
|
|
1340
|
+
// right must be due to missed prepares.
|
|
1341
|
+
.chain_gap => {},
|
|
1342
|
+
// Breaks are impossible to the right of the ephemeral SV, since the log was
|
|
1343
|
+
// truncated when the SV was installed.
|
|
1344
|
+
.chain_break => unreachable,
|
|
1345
|
+
}
|
|
1346
|
+
suffix_done = op <= op_dvc_anchor;
|
|
1347
|
+
} else unreachable;
|
|
1348
|
+
|
|
1349
|
+
headers.appendAssumeCapacity(header.*);
|
|
1350
|
+
}
|
|
1351
|
+
}
|
|
1352
|
+
};
|