tigerbeetle-node 0.11.2 → 0.11.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/.client.node.sha256 +1 -1
- package/package.json +1 -1
- package/src/node.zig +6 -1
- package/src/tigerbeetle/src/benchmark.zig +1 -1
- package/src/tigerbeetle/src/c/tb_client/context.zig +1 -1
- package/src/tigerbeetle/src/c/tb_client/echo_client.zig +1 -1
- package/src/tigerbeetle/src/c/tb_client/thread.zig +1 -1
- package/src/tigerbeetle/src/c/tb_client.h +97 -111
- package/src/tigerbeetle/src/c/tb_client.zig +29 -18
- package/src/tigerbeetle/src/c/tb_client_header.zig +218 -0
- package/src/tigerbeetle/src/c/test.zig +7 -7
- package/src/tigerbeetle/src/cli.zig +4 -4
- package/src/tigerbeetle/src/config.zig +184 -374
- package/src/tigerbeetle/src/constants.zig +394 -0
- package/src/tigerbeetle/src/demo.zig +1 -1
- package/src/tigerbeetle/src/ewah.zig +18 -29
- package/src/tigerbeetle/src/ewah_fuzz.zig +130 -0
- package/src/tigerbeetle/src/io/darwin.zig +1 -1
- package/src/tigerbeetle/src/io/linux.zig +2 -2
- package/src/tigerbeetle/src/io/windows.zig +1 -1
- package/src/tigerbeetle/src/lsm/bloom_filter.zig +1 -1
- package/src/tigerbeetle/src/lsm/compaction.zig +55 -2
- package/src/tigerbeetle/src/lsm/forest.zig +1 -1
- package/src/tigerbeetle/src/lsm/forest_fuzz.zig +15 -7
- package/src/tigerbeetle/src/lsm/grid.zig +1 -1
- package/src/tigerbeetle/src/lsm/groove.zig +5 -39
- package/src/tigerbeetle/src/lsm/level_iterator.zig +1 -1
- package/src/tigerbeetle/src/lsm/manifest.zig +1 -6
- package/src/tigerbeetle/src/lsm/manifest_level.zig +1 -1
- package/src/tigerbeetle/src/lsm/manifest_log.zig +1 -1
- package/src/tigerbeetle/src/lsm/manifest_log_fuzz.zig +14 -9
- package/src/tigerbeetle/src/lsm/posted_groove.zig +2 -13
- package/src/tigerbeetle/src/lsm/segmented_array_benchmark.zig +1 -1
- package/src/tigerbeetle/src/lsm/set_associative_cache.zig +1 -1
- package/src/tigerbeetle/src/lsm/table.zig +25 -17
- package/src/tigerbeetle/src/lsm/table_immutable.zig +1 -1
- package/src/tigerbeetle/src/lsm/table_iterator.zig +1 -1
- package/src/tigerbeetle/src/lsm/table_mutable.zig +1 -1
- package/src/tigerbeetle/src/lsm/test.zig +1 -1
- package/src/tigerbeetle/src/lsm/tree.zig +47 -5
- package/src/tigerbeetle/src/lsm/tree_fuzz.zig +22 -18
- package/src/tigerbeetle/src/main.zig +22 -16
- package/src/tigerbeetle/src/message_bus.zig +1 -1
- package/src/tigerbeetle/src/message_pool.zig +2 -2
- package/src/tigerbeetle/src/simulator.zig +3 -10
- package/src/tigerbeetle/src/state_machine.zig +627 -1806
- package/src/tigerbeetle/src/storage.zig +1 -1
- package/src/tigerbeetle/src/test/accounting/auditor.zig +1 -1
- package/src/tigerbeetle/src/test/accounting/workload.zig +1 -1
- package/src/tigerbeetle/src/test/cluster.zig +1 -1
- package/src/tigerbeetle/src/test/conductor.zig +1 -1
- package/src/tigerbeetle/src/test/fuzz.zig +19 -0
- package/src/tigerbeetle/src/test/message_bus.zig +1 -1
- package/src/tigerbeetle/src/test/network.zig +1 -1
- package/src/tigerbeetle/src/test/state_checker.zig +2 -2
- package/src/tigerbeetle/src/test/storage.zig +14 -5
- package/src/tigerbeetle/src/test/storage_checker.zig +1 -1
- package/src/tigerbeetle/src/test/table.zig +226 -0
- package/src/tigerbeetle/src/time.zig +1 -1
- package/src/tigerbeetle/src/tracer.zig +507 -0
- package/src/tigerbeetle/src/unit_tests.zig +2 -0
- package/src/tigerbeetle/src/vsr/client.zig +1 -1
- package/src/tigerbeetle/src/vsr/clock.zig +1 -1
- package/src/tigerbeetle/src/vsr/journal.zig +46 -115
- package/src/tigerbeetle/src/vsr/journal_format_fuzz.zig +111 -0
- package/src/tigerbeetle/src/vsr/replica.zig +50 -159
- package/src/tigerbeetle/src/vsr/replica_format.zig +216 -0
- package/src/tigerbeetle/src/vsr/superblock.zig +1 -1
- package/src/tigerbeetle/src/vsr/superblock_client_table.zig +1 -1
- package/src/tigerbeetle/src/vsr/superblock_free_set.zig +3 -2
- package/src/tigerbeetle/src/vsr/superblock_free_set_fuzz.zig +2 -0
- package/src/tigerbeetle/src/vsr/superblock_fuzz.zig +8 -5
- package/src/tigerbeetle/src/vsr/superblock_manifest.zig +1 -1
- package/src/tigerbeetle/src/vsr/superblock_quorums_fuzz.zig +2 -0
- package/src/tigerbeetle/src/vsr.zig +2 -2
|
@@ -2,7 +2,7 @@ const std = @import("std");
|
|
|
2
2
|
const Allocator = std.mem.Allocator;
|
|
3
3
|
const assert = std.debug.assert;
|
|
4
4
|
|
|
5
|
-
const config = @import("../
|
|
5
|
+
const config = @import("../constants.zig");
|
|
6
6
|
|
|
7
7
|
const StaticAllocator = @import("../static_allocator.zig");
|
|
8
8
|
const GridType = @import("../lsm/grid.zig").GridType;
|
|
@@ -10,8 +10,6 @@ const MessagePool = @import("../message_pool.zig").MessagePool;
|
|
|
10
10
|
const Message = @import("../message_pool.zig").MessagePool.Message;
|
|
11
11
|
const RingBuffer = @import("../ring_buffer.zig").RingBuffer;
|
|
12
12
|
const ClientTable = @import("superblock_client_table.zig").ClientTable;
|
|
13
|
-
const format_wal_headers = @import("./journal.zig").format_wal_headers;
|
|
14
|
-
const format_wal_prepares = @import("./journal.zig").format_wal_prepares;
|
|
15
13
|
|
|
16
14
|
const vsr = @import("../vsr.zig");
|
|
17
15
|
const Header = vsr.Header;
|
|
@@ -21,13 +19,14 @@ const Version = vsr.Version;
|
|
|
21
19
|
const VSRState = vsr.VSRState;
|
|
22
20
|
|
|
23
21
|
const log = std.log.scoped(.replica);
|
|
22
|
+
const tracer = @import("../tracer.zig");
|
|
24
23
|
|
|
25
24
|
pub const Status = enum {
|
|
26
25
|
normal,
|
|
27
26
|
view_change,
|
|
28
27
|
// Recovery (for replica_count > 1):
|
|
29
28
|
//
|
|
30
|
-
// 1. At replica start: `status=recovering` and `journal.
|
|
29
|
+
// 1. At replica start: `status=recovering` and `journal.status=recovering`
|
|
31
30
|
// 2. Load the WAL. Mark questionable entries as faulty.
|
|
32
31
|
// 3. If the WAL has no entries (besides the initial commit), skip to step 5 with view 0.
|
|
33
32
|
// 4. Run VSR recovery protocol:
|
|
@@ -286,6 +285,9 @@ pub fn ReplicaType(
|
|
|
286
285
|
/// The prepare message being committed.
|
|
287
286
|
commit_prepare: ?*Message = null,
|
|
288
287
|
|
|
288
|
+
tracer_slot_commit: ?tracer.SpanStart = null,
|
|
289
|
+
tracer_slot_checkpoint: ?tracer.SpanStart = null,
|
|
290
|
+
|
|
289
291
|
const OpenOptions = struct {
|
|
290
292
|
replica_count: u8,
|
|
291
293
|
storage: *Storage,
|
|
@@ -401,7 +403,7 @@ pub fn ReplicaType(
|
|
|
401
403
|
// The view change quorum may be more expensive to make the replication quorum cheaper.
|
|
402
404
|
// The insight is that the replication phase is by far more common than the view change.
|
|
403
405
|
// This trade-off allows us to optimize for the common case.
|
|
404
|
-
// See the comments in `
|
|
406
|
+
// See the comments in `constants.zig` for further explanation.
|
|
405
407
|
assert(quorum_view_change >= majority);
|
|
406
408
|
|
|
407
409
|
if (replica_count <= 2) {
|
|
@@ -544,6 +546,9 @@ pub fn ReplicaType(
|
|
|
544
546
|
/// Free all memory and unref all messages held by the replica
|
|
545
547
|
/// This does not deinitialize the StateMachine, MessageBus, Storage, or Time
|
|
546
548
|
pub fn deinit(self: *Self, allocator: Allocator) void {
|
|
549
|
+
assert(self.tracer_slot_checkpoint == null);
|
|
550
|
+
assert(self.tracer_slot_commit == null);
|
|
551
|
+
|
|
547
552
|
self.static_allocator.transition_from_static_to_deinit();
|
|
548
553
|
|
|
549
554
|
self.journal.deinit(allocator);
|
|
@@ -602,11 +607,10 @@ pub fn ReplicaType(
|
|
|
602
607
|
self.grid.tick();
|
|
603
608
|
self.message_bus.tick();
|
|
604
609
|
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
return
|
|
608
|
-
|
|
609
|
-
assert(!self.journal.recovering);
|
|
610
|
+
switch (self.journal.status) {
|
|
611
|
+
.init => return self.journal.recover(),
|
|
612
|
+
.recovering => return,
|
|
613
|
+
.recovered => {},
|
|
610
614
|
}
|
|
611
615
|
|
|
612
616
|
if (self.status == .recovering) {
|
|
@@ -700,11 +704,9 @@ pub fn ReplicaType(
|
|
|
700
704
|
return;
|
|
701
705
|
}
|
|
702
706
|
|
|
703
|
-
if (
|
|
707
|
+
if (self.journal.status != .recovered) {
|
|
704
708
|
log.debug("{}: on_message: waiting for journal to recover", .{self.replica});
|
|
705
709
|
return;
|
|
706
|
-
} else {
|
|
707
|
-
assert(!self.journal.recovering);
|
|
708
710
|
}
|
|
709
711
|
|
|
710
712
|
assert(message.header.replica < self.replica_count);
|
|
@@ -748,6 +750,9 @@ pub fn ReplicaType(
|
|
|
748
750
|
|
|
749
751
|
// Any message handlers that loopback must take responsibility for the flush.
|
|
750
752
|
assert(self.loopback_queue == null);
|
|
753
|
+
|
|
754
|
+
// We have to regularly flush the tracer to get output from short benchmarks.
|
|
755
|
+
tracer.flush();
|
|
751
756
|
}
|
|
752
757
|
|
|
753
758
|
fn on_ping(self: *Self, message: *const Message) void {
|
|
@@ -1443,7 +1448,7 @@ pub fn ReplicaType(
|
|
|
1443
1448
|
}
|
|
1444
1449
|
|
|
1445
1450
|
// Recovery messages with our nonce are not sent until after the journal is recovered.
|
|
1446
|
-
assert(self.journal.recovered);
|
|
1451
|
+
assert(self.journal.status == .recovered);
|
|
1447
1452
|
|
|
1448
1453
|
var responses: *QuorumMessages = &self.recovery_response_from_other_replicas;
|
|
1449
1454
|
if (responses[message.header.replica]) |existing| {
|
|
@@ -2010,7 +2015,7 @@ pub fn ReplicaType(
|
|
|
2010
2015
|
// We may be slow and waiting for the write to complete.
|
|
2011
2016
|
//
|
|
2012
2017
|
// We may even have maxed out our IO depth and been unable to initiate the write,
|
|
2013
|
-
// which can happen if `config.pipeline_max` exceeds `config.
|
|
2018
|
+
// which can happen if `config.pipeline_max` exceeds `config.journal_iops_write_max`.
|
|
2014
2019
|
// This can lead to deadlock for a cluster of one or two (if we do not retry here),
|
|
2015
2020
|
// since there is no other way for the leader to repair the dirty op because no
|
|
2016
2021
|
// other replica has it.
|
|
@@ -2515,6 +2520,13 @@ pub fn ReplicaType(
|
|
|
2515
2520
|
assert(prepare.header.op == self.commit_min + 1);
|
|
2516
2521
|
assert(prepare.header.op <= self.op);
|
|
2517
2522
|
|
|
2523
|
+
tracer.start(
|
|
2524
|
+
&self.tracer_slot_commit,
|
|
2525
|
+
.main,
|
|
2526
|
+
.{ .commit = .{ .op = prepare.header.op } },
|
|
2527
|
+
@src(),
|
|
2528
|
+
);
|
|
2529
|
+
|
|
2518
2530
|
self.commit_prepare = prepare.ref();
|
|
2519
2531
|
self.commit_callback = callback;
|
|
2520
2532
|
self.state_machine.prefetch(
|
|
@@ -2593,6 +2605,12 @@ pub fn ReplicaType(
|
|
|
2593
2605
|
self.op_checkpoint,
|
|
2594
2606
|
self.op_checkpoint_next(),
|
|
2595
2607
|
});
|
|
2608
|
+
tracer.start(
|
|
2609
|
+
&self.tracer_slot_checkpoint,
|
|
2610
|
+
.main,
|
|
2611
|
+
.checkpoint,
|
|
2612
|
+
@src(),
|
|
2613
|
+
);
|
|
2596
2614
|
self.state_machine.checkpoint(commit_op_checkpoint_state_machine_callback);
|
|
2597
2615
|
} else {
|
|
2598
2616
|
self.commit_op_done();
|
|
@@ -2651,6 +2669,11 @@ pub fn ReplicaType(
|
|
|
2651
2669
|
self.op,
|
|
2652
2670
|
self.op_checkpoint,
|
|
2653
2671
|
});
|
|
2672
|
+
tracer.end(
|
|
2673
|
+
&self.tracer_slot_checkpoint,
|
|
2674
|
+
.main,
|
|
2675
|
+
.checkpoint,
|
|
2676
|
+
);
|
|
2654
2677
|
|
|
2655
2678
|
if (self.on_checkpoint) |on_checkpoint| on_checkpoint(self);
|
|
2656
2679
|
self.commit_op_done();
|
|
@@ -2662,9 +2685,18 @@ pub fn ReplicaType(
|
|
|
2662
2685
|
assert(self.commit_prepare.?.header.op == self.commit_min);
|
|
2663
2686
|
assert(self.commit_prepare.?.header.op < self.op_checkpoint_trigger());
|
|
2664
2687
|
|
|
2688
|
+
const op = self.commit_prepare.?.header.op;
|
|
2689
|
+
|
|
2665
2690
|
self.message_bus.unref(self.commit_prepare.?);
|
|
2666
2691
|
self.commit_prepare = null;
|
|
2667
2692
|
self.commit_callback = null;
|
|
2693
|
+
|
|
2694
|
+
tracer.end(
|
|
2695
|
+
&self.tracer_slot_commit,
|
|
2696
|
+
.main,
|
|
2697
|
+
.{ .commit = .{ .op = op } },
|
|
2698
|
+
);
|
|
2699
|
+
|
|
2668
2700
|
callback(self);
|
|
2669
2701
|
}
|
|
2670
2702
|
|
|
@@ -3634,7 +3666,7 @@ pub fn ReplicaType(
|
|
|
3634
3666
|
// learn the op.
|
|
3635
3667
|
fn op_certain(self: *const Self) bool {
|
|
3636
3668
|
assert(self.status == .recovering);
|
|
3637
|
-
assert(self.journal.recovered);
|
|
3669
|
+
assert(self.journal.status == .recovered);
|
|
3638
3670
|
assert(self.op_checkpoint <= self.op);
|
|
3639
3671
|
|
|
3640
3672
|
const slot_op_checkpoint = self.journal.slot_for_op(self.op_checkpoint).index;
|
|
@@ -3849,7 +3881,7 @@ pub fn ReplicaType(
|
|
|
3849
3881
|
fn recover(self: *Self) void {
|
|
3850
3882
|
assert(self.status == .recovering);
|
|
3851
3883
|
assert(self.replica_count > 1);
|
|
3852
|
-
assert(self.journal.recovered);
|
|
3884
|
+
assert(self.journal.status == .recovered);
|
|
3853
3885
|
|
|
3854
3886
|
log.debug("{}: recover: sending recovery messages nonce={}", .{
|
|
3855
3887
|
self.replica,
|
|
@@ -5089,7 +5121,7 @@ pub fn ReplicaType(
|
|
|
5089
5121
|
|
|
5090
5122
|
fn set_op_and_commit_max(self: *Self, op: u64, commit_max: u64, method: []const u8) void {
|
|
5091
5123
|
assert(self.status == .view_change or self.status == .recovering);
|
|
5092
|
-
assert(self.journal.recovered);
|
|
5124
|
+
assert(self.journal.status == .recovered);
|
|
5093
5125
|
|
|
5094
5126
|
switch (self.status) {
|
|
5095
5127
|
.normal => unreachable,
|
|
@@ -5961,144 +5993,3 @@ pub fn ReplicaType(
|
|
|
5961
5993
|
}
|
|
5962
5994
|
};
|
|
5963
5995
|
}
|
|
5964
|
-
|
|
5965
|
-
/// Initialize the TigerBeetle replica's data file.
|
|
5966
|
-
pub fn format(
|
|
5967
|
-
comptime Storage: type,
|
|
5968
|
-
allocator: std.mem.Allocator,
|
|
5969
|
-
cluster: u32,
|
|
5970
|
-
replica: u8,
|
|
5971
|
-
storage: *Storage,
|
|
5972
|
-
superblock: *vsr.SuperBlockType(Storage),
|
|
5973
|
-
) !void {
|
|
5974
|
-
const ReplicaFormat = ReplicaFormatType(Storage);
|
|
5975
|
-
var replica_format = ReplicaFormat{};
|
|
5976
|
-
|
|
5977
|
-
try replica_format.format_wal(allocator, cluster, storage);
|
|
5978
|
-
assert(!replica_format.formatting);
|
|
5979
|
-
|
|
5980
|
-
superblock.format(
|
|
5981
|
-
ReplicaFormat.format_superblock_callback,
|
|
5982
|
-
&replica_format.superblock_context,
|
|
5983
|
-
.{
|
|
5984
|
-
.cluster = cluster,
|
|
5985
|
-
.replica = replica,
|
|
5986
|
-
.size_max = config.size_max, // This can later become a runtime arg, to cap storage.
|
|
5987
|
-
},
|
|
5988
|
-
);
|
|
5989
|
-
|
|
5990
|
-
replica_format.formatting = true;
|
|
5991
|
-
while (replica_format.formatting) storage.tick();
|
|
5992
|
-
}
|
|
5993
|
-
|
|
5994
|
-
fn ReplicaFormatType(comptime Storage: type) type {
|
|
5995
|
-
const SuperBlock = vsr.SuperBlockType(Storage);
|
|
5996
|
-
return struct {
|
|
5997
|
-
const Self = @This();
|
|
5998
|
-
|
|
5999
|
-
formatting: bool = false,
|
|
6000
|
-
superblock_context: SuperBlock.Context = undefined,
|
|
6001
|
-
wal_write: Storage.Write = undefined,
|
|
6002
|
-
|
|
6003
|
-
fn format_wal(
|
|
6004
|
-
self: *Self,
|
|
6005
|
-
allocator: std.mem.Allocator,
|
|
6006
|
-
cluster: u32,
|
|
6007
|
-
storage: *Storage,
|
|
6008
|
-
) !void {
|
|
6009
|
-
const header_zeroes = [_]u8{0} ** @sizeOf(Header);
|
|
6010
|
-
const wal_write_size_max = 4 * 1024 * 1024;
|
|
6011
|
-
assert(wal_write_size_max % config.sector_size == 0);
|
|
6012
|
-
|
|
6013
|
-
// Direct I/O requires the buffer to be sector-aligned.
|
|
6014
|
-
var wal_buffer = try allocator.allocAdvanced(
|
|
6015
|
-
u8,
|
|
6016
|
-
config.sector_size,
|
|
6017
|
-
wal_write_size_max,
|
|
6018
|
-
.exact,
|
|
6019
|
-
);
|
|
6020
|
-
errdefer allocator.free(wal_buffer);
|
|
6021
|
-
|
|
6022
|
-
// The logical offset *within the Zone*.
|
|
6023
|
-
// Even though the prepare zone follows the redundant header zone, write the prepares
|
|
6024
|
-
// first. This allows the test Storage to check the invariant "never write the redundant
|
|
6025
|
-
// header before the prepare".
|
|
6026
|
-
var wal_offset: u64 = 0;
|
|
6027
|
-
while (wal_offset < config.journal_size_prepares) {
|
|
6028
|
-
const size = format_wal_prepares(cluster, wal_offset, wal_buffer);
|
|
6029
|
-
assert(size > 0);
|
|
6030
|
-
|
|
6031
|
-
for (std.mem.bytesAsSlice(Header, wal_buffer[0..size])) |*header| {
|
|
6032
|
-
if (std.mem.eql(u8, std.mem.asBytes(header), &header_zeroes)) {
|
|
6033
|
-
// This is the (empty) body of a reserved or root Prepare.
|
|
6034
|
-
} else {
|
|
6035
|
-
// This is a Prepare's header.
|
|
6036
|
-
assert(header.valid_checksum());
|
|
6037
|
-
if (header.op == 0) {
|
|
6038
|
-
assert(header.command == .prepare);
|
|
6039
|
-
assert(header.operation == .root);
|
|
6040
|
-
} else {
|
|
6041
|
-
assert(header.command == .reserved);
|
|
6042
|
-
assert(header.operation == .reserved);
|
|
6043
|
-
}
|
|
6044
|
-
}
|
|
6045
|
-
}
|
|
6046
|
-
|
|
6047
|
-
storage.write_sectors(
|
|
6048
|
-
format_wal_sectors_callback,
|
|
6049
|
-
&self.wal_write,
|
|
6050
|
-
wal_buffer[0..size],
|
|
6051
|
-
.wal_prepares,
|
|
6052
|
-
wal_offset,
|
|
6053
|
-
);
|
|
6054
|
-
self.formatting = true;
|
|
6055
|
-
while (self.formatting) storage.tick();
|
|
6056
|
-
wal_offset += size;
|
|
6057
|
-
}
|
|
6058
|
-
// There are no prepares left to write.
|
|
6059
|
-
assert(format_wal_prepares(cluster, wal_offset, wal_buffer) == 0);
|
|
6060
|
-
|
|
6061
|
-
wal_offset = 0;
|
|
6062
|
-
while (wal_offset < config.journal_size_headers) {
|
|
6063
|
-
const size = format_wal_headers(cluster, wal_offset, wal_buffer);
|
|
6064
|
-
assert(size > 0);
|
|
6065
|
-
|
|
6066
|
-
for (std.mem.bytesAsSlice(Header, wal_buffer[0..size])) |*header| {
|
|
6067
|
-
assert(header.valid_checksum());
|
|
6068
|
-
if (header.op == 0) {
|
|
6069
|
-
assert(header.command == .prepare);
|
|
6070
|
-
assert(header.operation == .root);
|
|
6071
|
-
} else {
|
|
6072
|
-
assert(header.command == .reserved);
|
|
6073
|
-
assert(header.operation == .reserved);
|
|
6074
|
-
}
|
|
6075
|
-
}
|
|
6076
|
-
|
|
6077
|
-
storage.write_sectors(
|
|
6078
|
-
format_wal_sectors_callback,
|
|
6079
|
-
&self.wal_write,
|
|
6080
|
-
wal_buffer[0..size],
|
|
6081
|
-
.wal_headers,
|
|
6082
|
-
wal_offset,
|
|
6083
|
-
);
|
|
6084
|
-
self.formatting = true;
|
|
6085
|
-
while (self.formatting) storage.tick();
|
|
6086
|
-
wal_offset += size;
|
|
6087
|
-
}
|
|
6088
|
-
// There are no headers left to write.
|
|
6089
|
-
assert(format_wal_headers(cluster, wal_offset, wal_buffer) == 0);
|
|
6090
|
-
}
|
|
6091
|
-
|
|
6092
|
-
fn format_wal_sectors_callback(write: *Storage.Write) void {
|
|
6093
|
-
const self = @fieldParentPtr(Self, "wal_write", write);
|
|
6094
|
-
assert(self.formatting);
|
|
6095
|
-
self.formatting = false;
|
|
6096
|
-
}
|
|
6097
|
-
|
|
6098
|
-
fn format_superblock_callback(superblock_context: *SuperBlock.Context) void {
|
|
6099
|
-
const self = @fieldParentPtr(Self, "superblock_context", superblock_context);
|
|
6100
|
-
assert(self.formatting);
|
|
6101
|
-
self.formatting = false;
|
|
6102
|
-
}
|
|
6103
|
-
};
|
|
6104
|
-
}
|
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
const std = @import("std");
|
|
2
|
+
const assert = std.debug.assert;
|
|
3
|
+
|
|
4
|
+
const config = @import("../constants.zig");
|
|
5
|
+
const vsr = @import("../vsr.zig");
|
|
6
|
+
const Header = vsr.Header;
|
|
7
|
+
const format_wal_headers = @import("./journal.zig").format_wal_headers;
|
|
8
|
+
const format_wal_prepares = @import("./journal.zig").format_wal_prepares;
|
|
9
|
+
|
|
10
|
+
/// Initialize the TigerBeetle replica's data file.
|
|
11
|
+
pub fn format(
|
|
12
|
+
comptime Storage: type,
|
|
13
|
+
allocator: std.mem.Allocator,
|
|
14
|
+
cluster: u32,
|
|
15
|
+
replica: u8,
|
|
16
|
+
storage: *Storage,
|
|
17
|
+
superblock: *vsr.SuperBlockType(Storage),
|
|
18
|
+
) !void {
|
|
19
|
+
const ReplicaFormat = ReplicaFormatType(Storage);
|
|
20
|
+
var replica_format = ReplicaFormat{};
|
|
21
|
+
|
|
22
|
+
try replica_format.format_wal(allocator, cluster, storage);
|
|
23
|
+
assert(!replica_format.formatting);
|
|
24
|
+
|
|
25
|
+
superblock.format(
|
|
26
|
+
ReplicaFormat.format_superblock_callback,
|
|
27
|
+
&replica_format.superblock_context,
|
|
28
|
+
.{
|
|
29
|
+
.cluster = cluster,
|
|
30
|
+
.replica = replica,
|
|
31
|
+
// TODO Convert this to a runtime arg, to cap storage.
|
|
32
|
+
.size_max = config.size_max,
|
|
33
|
+
},
|
|
34
|
+
);
|
|
35
|
+
|
|
36
|
+
replica_format.formatting = true;
|
|
37
|
+
while (replica_format.formatting) storage.tick();
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
fn ReplicaFormatType(comptime Storage: type) type {
|
|
41
|
+
const SuperBlock = vsr.SuperBlockType(Storage);
|
|
42
|
+
return struct {
|
|
43
|
+
const Self = @This();
|
|
44
|
+
|
|
45
|
+
formatting: bool = false,
|
|
46
|
+
superblock_context: SuperBlock.Context = undefined,
|
|
47
|
+
wal_write: Storage.Write = undefined,
|
|
48
|
+
|
|
49
|
+
fn format_wal(
|
|
50
|
+
self: *Self,
|
|
51
|
+
allocator: std.mem.Allocator,
|
|
52
|
+
cluster: u32,
|
|
53
|
+
storage: *Storage,
|
|
54
|
+
) !void {
|
|
55
|
+
const header_zeroes = [_]u8{0} ** @sizeOf(Header);
|
|
56
|
+
const wal_write_size_max = 4 * 1024 * 1024;
|
|
57
|
+
assert(wal_write_size_max % config.sector_size == 0);
|
|
58
|
+
|
|
59
|
+
// Direct I/O requires the buffer to be sector-aligned.
|
|
60
|
+
var wal_buffer = try allocator.allocAdvanced(
|
|
61
|
+
u8,
|
|
62
|
+
config.sector_size,
|
|
63
|
+
wal_write_size_max,
|
|
64
|
+
.exact,
|
|
65
|
+
);
|
|
66
|
+
defer allocator.free(wal_buffer);
|
|
67
|
+
|
|
68
|
+
// The logical offset *within the Zone*.
|
|
69
|
+
// Even though the prepare zone follows the redundant header zone, write the prepares
|
|
70
|
+
// first. This allows the test Storage to check the invariant "never write the redundant
|
|
71
|
+
// header before the prepare".
|
|
72
|
+
var wal_offset: u64 = 0;
|
|
73
|
+
while (wal_offset < config.journal_size_prepares) {
|
|
74
|
+
const size = format_wal_prepares(cluster, wal_offset, wal_buffer);
|
|
75
|
+
assert(size > 0);
|
|
76
|
+
|
|
77
|
+
for (std.mem.bytesAsSlice(Header, wal_buffer[0..size])) |*header| {
|
|
78
|
+
if (std.mem.eql(u8, std.mem.asBytes(header), &header_zeroes)) {
|
|
79
|
+
// This is the (empty) body of a reserved or root Prepare.
|
|
80
|
+
} else {
|
|
81
|
+
// This is a Prepare's header.
|
|
82
|
+
assert(header.valid_checksum());
|
|
83
|
+
if (header.op == 0) {
|
|
84
|
+
assert(header.command == .prepare);
|
|
85
|
+
assert(header.operation == .root);
|
|
86
|
+
} else {
|
|
87
|
+
assert(header.command == .reserved);
|
|
88
|
+
assert(header.operation == .reserved);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
storage.write_sectors(
|
|
94
|
+
format_wal_sectors_callback,
|
|
95
|
+
&self.wal_write,
|
|
96
|
+
wal_buffer[0..size],
|
|
97
|
+
.wal_prepares,
|
|
98
|
+
wal_offset,
|
|
99
|
+
);
|
|
100
|
+
self.formatting = true;
|
|
101
|
+
while (self.formatting) storage.tick();
|
|
102
|
+
wal_offset += size;
|
|
103
|
+
}
|
|
104
|
+
// There are no prepares left to write.
|
|
105
|
+
assert(format_wal_prepares(cluster, wal_offset, wal_buffer) == 0);
|
|
106
|
+
|
|
107
|
+
wal_offset = 0;
|
|
108
|
+
while (wal_offset < config.journal_size_headers) {
|
|
109
|
+
const size = format_wal_headers(cluster, wal_offset, wal_buffer);
|
|
110
|
+
assert(size > 0);
|
|
111
|
+
|
|
112
|
+
for (std.mem.bytesAsSlice(Header, wal_buffer[0..size])) |*header| {
|
|
113
|
+
assert(header.valid_checksum());
|
|
114
|
+
if (header.op == 0) {
|
|
115
|
+
assert(header.command == .prepare);
|
|
116
|
+
assert(header.operation == .root);
|
|
117
|
+
} else {
|
|
118
|
+
assert(header.command == .reserved);
|
|
119
|
+
assert(header.operation == .reserved);
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
storage.write_sectors(
|
|
124
|
+
format_wal_sectors_callback,
|
|
125
|
+
&self.wal_write,
|
|
126
|
+
wal_buffer[0..size],
|
|
127
|
+
.wal_headers,
|
|
128
|
+
wal_offset,
|
|
129
|
+
);
|
|
130
|
+
self.formatting = true;
|
|
131
|
+
while (self.formatting) storage.tick();
|
|
132
|
+
wal_offset += size;
|
|
133
|
+
}
|
|
134
|
+
// There are no headers left to write.
|
|
135
|
+
assert(format_wal_headers(cluster, wal_offset, wal_buffer) == 0);
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
fn format_wal_sectors_callback(write: *Storage.Write) void {
|
|
139
|
+
const self = @fieldParentPtr(Self, "wal_write", write);
|
|
140
|
+
assert(self.formatting);
|
|
141
|
+
self.formatting = false;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
fn format_superblock_callback(superblock_context: *SuperBlock.Context) void {
|
|
145
|
+
const self = @fieldParentPtr(Self, "superblock_context", superblock_context);
|
|
146
|
+
assert(self.formatting);
|
|
147
|
+
self.formatting = false;
|
|
148
|
+
}
|
|
149
|
+
};
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
test "format" {
|
|
153
|
+
const superblock_zone_size = @import("./superblock.zig").superblock_zone_size;
|
|
154
|
+
const MessagePool = @import("../message_pool.zig").MessagePool;
|
|
155
|
+
const Storage = @import("../test/storage.zig").Storage;
|
|
156
|
+
const SuperBlock = vsr.SuperBlockType(Storage);
|
|
157
|
+
const allocator = std.testing.allocator;
|
|
158
|
+
const cluster = 0;
|
|
159
|
+
const replica = 1;
|
|
160
|
+
|
|
161
|
+
var storage = try Storage.init(
|
|
162
|
+
allocator,
|
|
163
|
+
superblock_zone_size + config.journal_size_headers + config.journal_size_prepares,
|
|
164
|
+
.{
|
|
165
|
+
.read_latency_min = 0,
|
|
166
|
+
.read_latency_mean = 0,
|
|
167
|
+
.write_latency_min = 0,
|
|
168
|
+
.write_latency_mean = 0,
|
|
169
|
+
},
|
|
170
|
+
);
|
|
171
|
+
defer storage.deinit(allocator);
|
|
172
|
+
|
|
173
|
+
var message_pool = try MessagePool.init(allocator, .replica);
|
|
174
|
+
defer message_pool.deinit(allocator);
|
|
175
|
+
|
|
176
|
+
var superblock = try SuperBlock.init(allocator, &storage, &message_pool);
|
|
177
|
+
defer superblock.deinit(allocator);
|
|
178
|
+
|
|
179
|
+
try format(Storage, allocator, cluster, replica, &storage, &superblock);
|
|
180
|
+
|
|
181
|
+
// Verify the superblock sectors.
|
|
182
|
+
var copy: u8 = 0;
|
|
183
|
+
while (copy < config.superblock_copies) : (copy += 1) {
|
|
184
|
+
const sector = storage.superblock_sector(copy);
|
|
185
|
+
|
|
186
|
+
try std.testing.expectEqual(sector.copy, copy);
|
|
187
|
+
try std.testing.expectEqual(sector.replica, replica);
|
|
188
|
+
try std.testing.expectEqual(sector.cluster, cluster);
|
|
189
|
+
try std.testing.expectEqual(sector.size, storage.size);
|
|
190
|
+
try std.testing.expectEqual(sector.sequence, 1);
|
|
191
|
+
try std.testing.expectEqual(sector.vsr_state.commit_min, 0);
|
|
192
|
+
try std.testing.expectEqual(sector.vsr_state.commit_max, 0);
|
|
193
|
+
try std.testing.expectEqual(sector.vsr_state.view, 0);
|
|
194
|
+
try std.testing.expectEqual(sector.vsr_state.view_normal, 0);
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
// Verify the WAL headers and prepares zones.
|
|
198
|
+
assert(storage.wal_headers().len == storage.wal_headers().len);
|
|
199
|
+
for (storage.wal_headers()) |header, slot| {
|
|
200
|
+
const message = storage.wal_prepares()[slot];
|
|
201
|
+
try std.testing.expect(std.meta.eql(header, message.header));
|
|
202
|
+
|
|
203
|
+
try std.testing.expect(header.valid_checksum());
|
|
204
|
+
try std.testing.expect(header.valid_checksum_body(&[0]u8{}));
|
|
205
|
+
try std.testing.expectEqual(header.invalid(), null);
|
|
206
|
+
try std.testing.expectEqual(header.cluster, cluster);
|
|
207
|
+
try std.testing.expectEqual(header.op, slot);
|
|
208
|
+
try std.testing.expectEqual(header.size, @sizeOf(vsr.Header));
|
|
209
|
+
if (slot == 0) {
|
|
210
|
+
try std.testing.expectEqual(header.command, .prepare);
|
|
211
|
+
try std.testing.expectEqual(header.operation, .root);
|
|
212
|
+
} else {
|
|
213
|
+
try std.testing.expectEqual(header.command, .reserved);
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
}
|
|
@@ -16,7 +16,7 @@ const mem = std.mem;
|
|
|
16
16
|
const meta = std.meta;
|
|
17
17
|
const os = std.os;
|
|
18
18
|
|
|
19
|
-
const config = @import("../
|
|
19
|
+
const config = @import("../constants.zig");
|
|
20
20
|
const div_ceil = @import("../util.zig").div_ceil;
|
|
21
21
|
const vsr = @import("../vsr.zig");
|
|
22
22
|
const log = std.log.scoped(.superblock);
|
|
@@ -5,7 +5,7 @@ const mem = std.mem;
|
|
|
5
5
|
const DynamicBitSetUnmanaged = std.bit_set.DynamicBitSetUnmanaged;
|
|
6
6
|
const MaskInt = DynamicBitSetUnmanaged.MaskInt;
|
|
7
7
|
|
|
8
|
-
const config = @import("../
|
|
8
|
+
const config = @import("../constants.zig");
|
|
9
9
|
|
|
10
10
|
const ewah = @import("../ewah.zig").ewah(usize);
|
|
11
11
|
const div_ceil = @import("../util.zig").div_ceil;
|
|
@@ -404,7 +404,8 @@ fn bit_set_masks(bit_set: DynamicBitSetUnmanaged) []usize {
|
|
|
404
404
|
}
|
|
405
405
|
|
|
406
406
|
test "FreeSet block shard count" {
|
|
407
|
-
|
|
407
|
+
if (config.block_size != 64 * 1024) return;
|
|
408
|
+
const blocks_in_tb = @divExact(1 << 40, config.block_size);
|
|
408
409
|
try test_block_shards_count(5120 * 8, 10 * blocks_in_tb);
|
|
409
410
|
try test_block_shards_count(5120 * 8 - 1, 10 * blocks_in_tb - FreeSet.shard_size);
|
|
410
411
|
try test_block_shards_count(1, FreeSet.shard_size); // Must be at least one index bit.
|
|
@@ -9,6 +9,8 @@ const FreeSet = @import("./superblock_free_set.zig").FreeSet;
|
|
|
9
9
|
const Reservation = @import("./superblock_free_set.zig").Reservation;
|
|
10
10
|
const fuzz = @import("../test/fuzz.zig");
|
|
11
11
|
|
|
12
|
+
pub const tigerbeetle_config = @import("../config.zig").configs.test_min;
|
|
13
|
+
|
|
12
14
|
pub fn main() !void {
|
|
13
15
|
const allocator = std.testing.allocator;
|
|
14
16
|
const args = try fuzz.parse_fuzz_args(allocator);
|
|
@@ -14,7 +14,7 @@ const std = @import("std");
|
|
|
14
14
|
const assert = std.debug.assert;
|
|
15
15
|
const log = std.log.scoped(.fuzz_vsr_superblock);
|
|
16
16
|
|
|
17
|
-
const config = @import("../
|
|
17
|
+
const config = @import("../constants.zig");
|
|
18
18
|
const util = @import("../util.zig");
|
|
19
19
|
const vsr = @import("../vsr.zig");
|
|
20
20
|
const Storage = @import("../test/storage.zig").Storage;
|
|
@@ -26,18 +26,21 @@ const SuperBlockType = @import("superblock.zig").SuperBlockType;
|
|
|
26
26
|
const SuperBlock = SuperBlockType(Storage);
|
|
27
27
|
const fuzz = @import("../test/fuzz.zig");
|
|
28
28
|
|
|
29
|
-
|
|
30
|
-
|
|
29
|
+
pub const tigerbeetle_config = @import("../config.zig").configs.test_min;
|
|
30
|
+
|
|
31
31
|
const cluster = 0;
|
|
32
32
|
|
|
33
33
|
pub fn main() !void {
|
|
34
34
|
const allocator = std.testing.allocator;
|
|
35
35
|
const args = try fuzz.parse_fuzz_args(allocator);
|
|
36
36
|
|
|
37
|
-
|
|
37
|
+
// Total calls to checkpoint() + view_change().
|
|
38
|
+
const transitions_count_total = args.events_max orelse 10;
|
|
39
|
+
|
|
40
|
+
try run_fuzz(allocator, args.seed, transitions_count_total);
|
|
38
41
|
}
|
|
39
42
|
|
|
40
|
-
fn run_fuzz(allocator: std.mem.Allocator, seed: u64) !void {
|
|
43
|
+
fn run_fuzz(allocator: std.mem.Allocator, seed: u64, transitions_count_total: usize) !void {
|
|
41
44
|
var prng = std.rand.DefaultPrng.init(seed);
|
|
42
45
|
const random = prng.random();
|
|
43
46
|
|
|
@@ -5,7 +5,7 @@ const assert = std.debug.assert;
|
|
|
5
5
|
const log = std.log.scoped(.superblock_manifest);
|
|
6
6
|
const mem = std.mem;
|
|
7
7
|
|
|
8
|
-
const config = @import("../
|
|
8
|
+
const config = @import("../constants.zig");
|
|
9
9
|
const util = @import("../util.zig");
|
|
10
10
|
|
|
11
11
|
// TODO Compute & use the upper bound of manifest blocks (per tree) to size the trailer zone.
|
|
@@ -10,6 +10,8 @@ const fuzz = @import("../test/fuzz.zig");
|
|
|
10
10
|
const superblock_quorums = @import("superblock_quorums.zig");
|
|
11
11
|
const QuorumsType = superblock_quorums.QuorumsType;
|
|
12
12
|
|
|
13
|
+
pub const tigerbeetle_config = @import("../config.zig").configs.test_min;
|
|
14
|
+
|
|
13
15
|
pub fn main() !void {
|
|
14
16
|
const fuzz_args = try fuzz.parse_fuzz_args(std.testing.allocator);
|
|
15
17
|
var prng = std.rand.DefaultPrng.init(fuzz_args.seed);
|
|
@@ -4,14 +4,14 @@ const Allocator = std.mem.Allocator;
|
|
|
4
4
|
const assert = std.debug.assert;
|
|
5
5
|
const log = std.log.scoped(.vsr);
|
|
6
6
|
|
|
7
|
-
const config = @import("
|
|
7
|
+
const config = @import("constants.zig");
|
|
8
8
|
|
|
9
9
|
/// The version of our Viewstamped Replication protocol in use, including customizations.
|
|
10
10
|
/// For backwards compatibility through breaking changes (e.g. upgrading checksums/ciphers).
|
|
11
11
|
pub const Version: u8 = 0;
|
|
12
12
|
|
|
13
13
|
pub const ReplicaType = @import("vsr/replica.zig").ReplicaType;
|
|
14
|
-
pub const format = @import("vsr/
|
|
14
|
+
pub const format = @import("vsr/replica_format.zig").format;
|
|
15
15
|
pub const Status = @import("vsr/replica.zig").Status;
|
|
16
16
|
pub const Client = @import("vsr/client.zig").Client;
|
|
17
17
|
pub const Clock = @import("vsr/clock.zig").Clock;
|