tigerbeetle-node 0.11.7 → 0.11.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/.client.node.sha256 +1 -1
- package/package.json +4 -3
- package/scripts/build_lib.sh +29 -0
- package/src/node.zig +1 -1
- package/src/tigerbeetle/scripts/validate_docs.sh +7 -1
- package/src/tigerbeetle/src/benchmark.zig +3 -3
- package/src/tigerbeetle/src/config.zig +29 -16
- package/src/tigerbeetle/src/constants.zig +30 -9
- package/src/tigerbeetle/src/ewah.zig +5 -5
- package/src/tigerbeetle/src/ewah_fuzz.zig +1 -1
- package/src/tigerbeetle/src/lsm/binary_search.zig +1 -1
- package/src/tigerbeetle/src/lsm/bloom_filter.zig +1 -1
- package/src/tigerbeetle/src/lsm/compaction.zig +34 -21
- package/src/tigerbeetle/src/lsm/forest_fuzz.zig +85 -103
- package/src/tigerbeetle/src/lsm/grid.zig +19 -13
- package/src/tigerbeetle/src/lsm/manifest_log.zig +8 -10
- package/src/tigerbeetle/src/lsm/manifest_log_fuzz.zig +12 -8
- package/src/tigerbeetle/src/lsm/merge_iterator.zig +1 -1
- package/src/tigerbeetle/src/lsm/segmented_array.zig +17 -17
- package/src/tigerbeetle/src/lsm/segmented_array_fuzz.zig +1 -1
- package/src/tigerbeetle/src/lsm/set_associative_cache.zig +1 -1
- package/src/tigerbeetle/src/lsm/table.zig +8 -20
- package/src/tigerbeetle/src/lsm/table_immutable.zig +1 -1
- package/src/tigerbeetle/src/lsm/table_iterator.zig +3 -3
- package/src/tigerbeetle/src/lsm/table_mutable.zig +14 -2
- package/src/tigerbeetle/src/lsm/tree.zig +31 -5
- package/src/tigerbeetle/src/lsm/tree_fuzz.zig +86 -114
- package/src/tigerbeetle/src/message_bus.zig +4 -4
- package/src/tigerbeetle/src/message_pool.zig +7 -10
- package/src/tigerbeetle/src/ring_buffer.zig +22 -12
- package/src/tigerbeetle/src/simulator.zig +360 -214
- package/src/tigerbeetle/src/state_machine/auditor.zig +5 -5
- package/src/tigerbeetle/src/state_machine/workload.zig +3 -3
- package/src/tigerbeetle/src/state_machine.zig +190 -178
- package/src/tigerbeetle/src/{util.zig → stdx.zig} +2 -0
- package/src/tigerbeetle/src/storage.zig +13 -6
- package/src/tigerbeetle/src/{test → testing/cluster}/message_bus.zig +3 -3
- package/src/tigerbeetle/src/{test → testing/cluster}/network.zig +46 -22
- package/src/tigerbeetle/src/testing/cluster/state_checker.zig +169 -0
- package/src/tigerbeetle/src/testing/cluster/storage_checker.zig +202 -0
- package/src/tigerbeetle/src/testing/cluster.zig +537 -0
- package/src/tigerbeetle/src/{test → testing}/fuzz.zig +0 -0
- package/src/tigerbeetle/src/testing/hash_log.zig +66 -0
- package/src/tigerbeetle/src/{test → testing}/id.zig +0 -0
- package/src/tigerbeetle/src/testing/packet_simulator.zig +365 -0
- package/src/tigerbeetle/src/{test → testing}/priority_queue.zig +1 -1
- package/src/tigerbeetle/src/testing/reply_sequence.zig +139 -0
- package/src/tigerbeetle/src/{test → testing}/state_machine.zig +3 -1
- package/src/tigerbeetle/src/testing/storage.zig +754 -0
- package/src/tigerbeetle/src/{test → testing}/table.zig +21 -0
- package/src/tigerbeetle/src/{test → testing}/time.zig +0 -0
- package/src/tigerbeetle/src/tigerbeetle.zig +2 -0
- package/src/tigerbeetle/src/tracer.zig +3 -3
- package/src/tigerbeetle/src/unit_tests.zig +4 -4
- package/src/tigerbeetle/src/vopr.zig +2 -2
- package/src/tigerbeetle/src/vsr/client.zig +16 -9
- package/src/tigerbeetle/src/vsr/clock.zig +93 -53
- package/src/tigerbeetle/src/vsr/journal.zig +29 -14
- package/src/tigerbeetle/src/vsr/journal_format_fuzz.zig +2 -2
- package/src/tigerbeetle/src/vsr/replica.zig +1383 -774
- package/src/tigerbeetle/src/vsr/replica_format.zig +2 -2
- package/src/tigerbeetle/src/vsr/superblock.zig +59 -43
- package/src/tigerbeetle/src/vsr/superblock_client_table.zig +7 -7
- package/src/tigerbeetle/src/vsr/superblock_free_set.zig +1 -1
- package/src/tigerbeetle/src/vsr/superblock_free_set_fuzz.zig +1 -1
- package/src/tigerbeetle/src/vsr/superblock_fuzz.zig +15 -7
- package/src/tigerbeetle/src/vsr/superblock_manifest.zig +38 -19
- package/src/tigerbeetle/src/vsr/superblock_quorums_fuzz.zig +1 -1
- package/src/tigerbeetle/src/vsr.zig +6 -4
- package/src/tigerbeetle/src/demo.zig +0 -132
- package/src/tigerbeetle/src/demo_01_create_accounts.zig +0 -35
- package/src/tigerbeetle/src/demo_02_lookup_accounts.zig +0 -7
- package/src/tigerbeetle/src/demo_03_create_transfers.zig +0 -37
- package/src/tigerbeetle/src/demo_04_create_pending_transfers.zig +0 -61
- package/src/tigerbeetle/src/demo_05_post_pending_transfers.zig +0 -37
- package/src/tigerbeetle/src/demo_06_void_pending_transfers.zig +0 -24
- package/src/tigerbeetle/src/demo_07_lookup_transfers.zig +0 -7
- package/src/tigerbeetle/src/test/cluster.zig +0 -352
- package/src/tigerbeetle/src/test/conductor.zig +0 -366
- package/src/tigerbeetle/src/test/packet_simulator.zig +0 -398
- package/src/tigerbeetle/src/test/state_checker.zig +0 -169
- package/src/tigerbeetle/src/test/storage.zig +0 -864
- package/src/tigerbeetle/src/test/storage_checker.zig +0 -204
|
@@ -1,132 +0,0 @@
|
|
|
1
|
-
const std = @import("std");
|
|
2
|
-
const assert = std.debug.assert;
|
|
3
|
-
|
|
4
|
-
const constants = @import("constants.zig");
|
|
5
|
-
|
|
6
|
-
const tb = @import("tigerbeetle.zig");
|
|
7
|
-
const Account = tb.Account;
|
|
8
|
-
const Transfer = tb.Transfer;
|
|
9
|
-
|
|
10
|
-
const CreateAccountsResult = tb.CreateAccountsResult;
|
|
11
|
-
const CreateTransfersResult = tb.CreateTransfersResult;
|
|
12
|
-
|
|
13
|
-
const util = @import("util.zig");
|
|
14
|
-
const IO = @import("io.zig").IO;
|
|
15
|
-
const Storage = @import("storage.zig").Storage;
|
|
16
|
-
const MessagePool = @import("message_pool.zig").MessagePool;
|
|
17
|
-
const MessageBus = @import("message_bus.zig").MessageBusClient;
|
|
18
|
-
const StateMachine = @import("state_machine.zig").StateMachineType(Storage, .{
|
|
19
|
-
.message_body_size_max = constants.message_body_size_max,
|
|
20
|
-
});
|
|
21
|
-
|
|
22
|
-
const vsr = @import("vsr.zig");
|
|
23
|
-
const Header = vsr.Header;
|
|
24
|
-
const Client = vsr.Client(StateMachine, MessageBus);
|
|
25
|
-
|
|
26
|
-
pub const log_level: std.log.Level = .alert;
|
|
27
|
-
|
|
28
|
-
pub fn request(
|
|
29
|
-
operation: StateMachine.Operation,
|
|
30
|
-
batch: anytype,
|
|
31
|
-
on_reply: fn (
|
|
32
|
-
user_data: u128,
|
|
33
|
-
operation: StateMachine.Operation,
|
|
34
|
-
results: Client.Error![]const u8,
|
|
35
|
-
) void,
|
|
36
|
-
) !void {
|
|
37
|
-
const allocator = std.heap.page_allocator;
|
|
38
|
-
const client_id = std.crypto.random.int(u128);
|
|
39
|
-
const cluster_id: u32 = 0;
|
|
40
|
-
var addresses = [_]std.net.Address{try std.net.Address.parseIp4("127.0.0.1", constants.port)};
|
|
41
|
-
|
|
42
|
-
var io = try IO.init(32, 0);
|
|
43
|
-
defer io.deinit();
|
|
44
|
-
|
|
45
|
-
var message_pool = try MessagePool.init(allocator, .client);
|
|
46
|
-
defer message_pool.deinit(allocator);
|
|
47
|
-
|
|
48
|
-
var client = try Client.init(
|
|
49
|
-
allocator,
|
|
50
|
-
client_id,
|
|
51
|
-
cluster_id,
|
|
52
|
-
@intCast(u8, addresses.len),
|
|
53
|
-
&message_pool,
|
|
54
|
-
.{
|
|
55
|
-
.configuration = &addresses,
|
|
56
|
-
.io = &io,
|
|
57
|
-
},
|
|
58
|
-
);
|
|
59
|
-
defer client.deinit(allocator);
|
|
60
|
-
|
|
61
|
-
const message = client.get_message();
|
|
62
|
-
defer client.unref(message);
|
|
63
|
-
|
|
64
|
-
const body = std.mem.asBytes(&batch);
|
|
65
|
-
util.copy_disjoint(.inexact, u8, message.buffer[@sizeOf(Header)..], body);
|
|
66
|
-
|
|
67
|
-
client.request(
|
|
68
|
-
0,
|
|
69
|
-
on_reply,
|
|
70
|
-
operation,
|
|
71
|
-
message,
|
|
72
|
-
body.len,
|
|
73
|
-
);
|
|
74
|
-
|
|
75
|
-
while (client.request_queue.count > 0) {
|
|
76
|
-
client.tick();
|
|
77
|
-
try io.run_for_ns(constants.tick_ms * std.time.ns_per_ms);
|
|
78
|
-
}
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
pub fn on_create_accounts(
|
|
82
|
-
user_data: u128,
|
|
83
|
-
operation: StateMachine.Operation,
|
|
84
|
-
results: Client.Error![]const u8,
|
|
85
|
-
) void {
|
|
86
|
-
_ = user_data;
|
|
87
|
-
_ = operation;
|
|
88
|
-
|
|
89
|
-
print_results(CreateAccountsResult, results);
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
pub fn on_lookup_accounts(
|
|
93
|
-
user_data: u128,
|
|
94
|
-
operation: StateMachine.Operation,
|
|
95
|
-
results: Client.Error![]const u8,
|
|
96
|
-
) void {
|
|
97
|
-
_ = user_data;
|
|
98
|
-
_ = operation;
|
|
99
|
-
|
|
100
|
-
print_results(Account, results);
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
pub fn on_lookup_transfers(
|
|
104
|
-
user_data: u128,
|
|
105
|
-
operation: StateMachine.Operation,
|
|
106
|
-
results: Client.Error![]const u8,
|
|
107
|
-
) void {
|
|
108
|
-
_ = user_data;
|
|
109
|
-
_ = operation;
|
|
110
|
-
|
|
111
|
-
print_results(Transfer, results);
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
pub fn on_create_transfers(
|
|
115
|
-
user_data: u128,
|
|
116
|
-
operation: StateMachine.Operation,
|
|
117
|
-
results: Client.Error![]const u8,
|
|
118
|
-
) void {
|
|
119
|
-
_ = user_data;
|
|
120
|
-
_ = operation;
|
|
121
|
-
|
|
122
|
-
print_results(CreateTransfersResult, results);
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
fn print_results(comptime Results: type, results: Client.Error![]const u8) void {
|
|
126
|
-
const body = results catch unreachable;
|
|
127
|
-
const slice = std.mem.bytesAsSlice(Results, body);
|
|
128
|
-
for (slice) |result| {
|
|
129
|
-
std.debug.print("{}\n", .{result});
|
|
130
|
-
}
|
|
131
|
-
if (slice.len == 0) std.debug.print("OK\n", .{});
|
|
132
|
-
}
|
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
const tb = @import("tigerbeetle.zig");
|
|
2
|
-
const demo = @import("demo.zig");
|
|
3
|
-
|
|
4
|
-
const Account = tb.Account;
|
|
5
|
-
|
|
6
|
-
pub fn main() !void {
|
|
7
|
-
const accounts = [_]Account{
|
|
8
|
-
Account{
|
|
9
|
-
.id = 1,
|
|
10
|
-
.user_data = 0,
|
|
11
|
-
.reserved = [_]u8{0} ** 48,
|
|
12
|
-
.ledger = 710, // Let's use the ISO-4217 Code Number for ZAR
|
|
13
|
-
.code = 1000, // A chart of accounts code to describe this as a clearing account.
|
|
14
|
-
.flags = .{ .debits_must_not_exceed_credits = true },
|
|
15
|
-
.debits_pending = 0,
|
|
16
|
-
.debits_posted = 0,
|
|
17
|
-
.credits_pending = 0,
|
|
18
|
-
.credits_posted = 0,
|
|
19
|
-
},
|
|
20
|
-
Account{
|
|
21
|
-
.id = 2,
|
|
22
|
-
.user_data = 0,
|
|
23
|
-
.reserved = [_]u8{0} ** 48,
|
|
24
|
-
.ledger = 710, // Let's use the ISO-4217 Code Number for ZAR
|
|
25
|
-
.code = 2000, // A chart of accounts code to describe this as a payable account.
|
|
26
|
-
.flags = .{},
|
|
27
|
-
.debits_pending = 0,
|
|
28
|
-
.debits_posted = 0,
|
|
29
|
-
.credits_pending = 0,
|
|
30
|
-
.credits_posted = 0,
|
|
31
|
-
},
|
|
32
|
-
};
|
|
33
|
-
|
|
34
|
-
try demo.request(.create_accounts, accounts, demo.on_create_accounts);
|
|
35
|
-
}
|
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
const tb = @import("tigerbeetle.zig");
|
|
2
|
-
const demo = @import("demo.zig");
|
|
3
|
-
|
|
4
|
-
const Transfer = tb.Transfer;
|
|
5
|
-
|
|
6
|
-
pub fn main() !void {
|
|
7
|
-
const transfers = [_]Transfer{
|
|
8
|
-
Transfer{
|
|
9
|
-
.id = 1,
|
|
10
|
-
.debit_account_id = 2,
|
|
11
|
-
.credit_account_id = 1,
|
|
12
|
-
.user_data = 0,
|
|
13
|
-
.reserved = 0,
|
|
14
|
-
.pending_id = 0,
|
|
15
|
-
.timeout = 0,
|
|
16
|
-
.ledger = 710, // Let's use the ISO-4217 Code Number for ZAR
|
|
17
|
-
.code = 1,
|
|
18
|
-
.flags = .{},
|
|
19
|
-
.amount = 10000, // Let's start with some liquidity in account 1.
|
|
20
|
-
},
|
|
21
|
-
Transfer{
|
|
22
|
-
.id = 2,
|
|
23
|
-
.debit_account_id = 1,
|
|
24
|
-
.credit_account_id = 2,
|
|
25
|
-
.user_data = 0,
|
|
26
|
-
.reserved = 0,
|
|
27
|
-
.pending_id = 0,
|
|
28
|
-
.timeout = 0,
|
|
29
|
-
.ledger = 710, // Let's use the ISO-4217 Code Number for ZAR
|
|
30
|
-
.code = 1,
|
|
31
|
-
.flags = .{},
|
|
32
|
-
.amount = 1000,
|
|
33
|
-
},
|
|
34
|
-
};
|
|
35
|
-
|
|
36
|
-
try demo.request(.create_transfers, transfers, demo.on_create_transfers);
|
|
37
|
-
}
|
|
@@ -1,61 +0,0 @@
|
|
|
1
|
-
const std = @import("std");
|
|
2
|
-
|
|
3
|
-
const tb = @import("tigerbeetle.zig");
|
|
4
|
-
const demo = @import("demo.zig");
|
|
5
|
-
|
|
6
|
-
const Transfer = tb.Transfer;
|
|
7
|
-
|
|
8
|
-
pub fn main() !void {
|
|
9
|
-
const transfers = [_]Transfer{
|
|
10
|
-
Transfer{
|
|
11
|
-
.id = 1001,
|
|
12
|
-
.debit_account_id = 1,
|
|
13
|
-
.credit_account_id = 2,
|
|
14
|
-
.user_data = 0,
|
|
15
|
-
.reserved = 0,
|
|
16
|
-
.pending_id = 0,
|
|
17
|
-
.timeout = std.time.ns_per_hour,
|
|
18
|
-
.ledger = 710,
|
|
19
|
-
.code = 1,
|
|
20
|
-
.flags = .{
|
|
21
|
-
.pending = true, // Set this transfer to be two-phase.
|
|
22
|
-
},
|
|
23
|
-
.amount = 8000,
|
|
24
|
-
},
|
|
25
|
-
Transfer{
|
|
26
|
-
.id = 1002,
|
|
27
|
-
.debit_account_id = 1,
|
|
28
|
-
.credit_account_id = 2,
|
|
29
|
-
.user_data = 0,
|
|
30
|
-
.reserved = 0,
|
|
31
|
-
.pending_id = 0,
|
|
32
|
-
.timeout = std.time.ns_per_hour,
|
|
33
|
-
.ledger = 710,
|
|
34
|
-
.code = 1,
|
|
35
|
-
.flags = .{
|
|
36
|
-
.pending = true, // Set this transfer to be two-phase.
|
|
37
|
-
.linked = true, // Link this transfer with the next transfer 1003.
|
|
38
|
-
},
|
|
39
|
-
.amount = 500,
|
|
40
|
-
},
|
|
41
|
-
Transfer{
|
|
42
|
-
.id = 1003,
|
|
43
|
-
.debit_account_id = 1,
|
|
44
|
-
.credit_account_id = 2,
|
|
45
|
-
.user_data = 0,
|
|
46
|
-
.reserved = 0,
|
|
47
|
-
.pending_id = 0,
|
|
48
|
-
.timeout = std.time.ns_per_hour,
|
|
49
|
-
.ledger = 710,
|
|
50
|
-
.code = 1,
|
|
51
|
-
.flags = .{
|
|
52
|
-
.pending = true, // Set this transfer to be two-phase.
|
|
53
|
-
// The last transfer in a linked chain has .linked set to false to close the chain.
|
|
54
|
-
// This transfer will succeed or fail together with transfer 1002 above.
|
|
55
|
-
},
|
|
56
|
-
.amount = 500,
|
|
57
|
-
},
|
|
58
|
-
};
|
|
59
|
-
|
|
60
|
-
try demo.request(.create_transfers, transfers, demo.on_create_transfers);
|
|
61
|
-
}
|
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
const tb = @import("tigerbeetle.zig");
|
|
2
|
-
const demo = @import("demo.zig");
|
|
3
|
-
|
|
4
|
-
const Transfer = tb.Transfer;
|
|
5
|
-
|
|
6
|
-
pub fn main() !void {
|
|
7
|
-
const commits = [_]Transfer{
|
|
8
|
-
Transfer{
|
|
9
|
-
.id = 2001,
|
|
10
|
-
.debit_account_id = 1,
|
|
11
|
-
.credit_account_id = 2,
|
|
12
|
-
.user_data = 0,
|
|
13
|
-
.reserved = 0,
|
|
14
|
-
.pending_id = 1001,
|
|
15
|
-
.timeout = 0,
|
|
16
|
-
.ledger = 0, // Honor original Transfer ledger.
|
|
17
|
-
.code = 0, // Honor original Transfer code.
|
|
18
|
-
.flags = .{ .post_pending_transfer = true }, // Post the pending two-phase transfer.
|
|
19
|
-
.amount = 0, // Inherit the amount from the pending transfer.
|
|
20
|
-
},
|
|
21
|
-
Transfer{
|
|
22
|
-
.id = 2002,
|
|
23
|
-
.debit_account_id = 1,
|
|
24
|
-
.credit_account_id = 2,
|
|
25
|
-
.user_data = 0,
|
|
26
|
-
.reserved = 0,
|
|
27
|
-
.pending_id = 1002,
|
|
28
|
-
.timeout = 0,
|
|
29
|
-
.ledger = 0,
|
|
30
|
-
.code = 0,
|
|
31
|
-
.flags = .{ .post_pending_transfer = true }, // Post the pending two-phase transfer.
|
|
32
|
-
.amount = 0, // Inherit the amount from the pending transfer.
|
|
33
|
-
},
|
|
34
|
-
};
|
|
35
|
-
|
|
36
|
-
try demo.request(.create_transfers, commits, demo.on_create_transfers);
|
|
37
|
-
}
|
|
@@ -1,24 +0,0 @@
|
|
|
1
|
-
const tb = @import("tigerbeetle.zig");
|
|
2
|
-
const demo = @import("demo.zig");
|
|
3
|
-
|
|
4
|
-
const Transfer = tb.Transfer;
|
|
5
|
-
|
|
6
|
-
pub fn main() !void {
|
|
7
|
-
const commits = [_]Transfer{
|
|
8
|
-
Transfer{
|
|
9
|
-
.id = 2003,
|
|
10
|
-
.debit_account_id = 1,
|
|
11
|
-
.credit_account_id = 2,
|
|
12
|
-
.user_data = 0,
|
|
13
|
-
.reserved = 0,
|
|
14
|
-
.pending_id = 1003,
|
|
15
|
-
.timeout = 0,
|
|
16
|
-
.ledger = 0,
|
|
17
|
-
.code = 0,
|
|
18
|
-
.flags = .{ .void_pending_transfer = true },
|
|
19
|
-
.amount = 0,
|
|
20
|
-
},
|
|
21
|
-
};
|
|
22
|
-
|
|
23
|
-
try demo.request(.create_transfers, commits, demo.on_create_transfers);
|
|
24
|
-
}
|
|
@@ -1,352 +0,0 @@
|
|
|
1
|
-
const std = @import("std");
|
|
2
|
-
const assert = std.debug.assert;
|
|
3
|
-
const mem = std.mem;
|
|
4
|
-
|
|
5
|
-
const constants = @import("../constants.zig");
|
|
6
|
-
|
|
7
|
-
const message_pool = @import("../message_pool.zig");
|
|
8
|
-
const MessagePool = message_pool.MessagePool;
|
|
9
|
-
const Message = MessagePool.Message;
|
|
10
|
-
|
|
11
|
-
const Network = @import("network.zig").Network;
|
|
12
|
-
const NetworkOptions = @import("network.zig").NetworkOptions;
|
|
13
|
-
|
|
14
|
-
pub const StateMachine = constants.StateMachineType(Storage, .{
|
|
15
|
-
.message_body_size_max = constants.message_body_size_max,
|
|
16
|
-
});
|
|
17
|
-
const MessageBus = @import("message_bus.zig").MessageBus;
|
|
18
|
-
const Storage = @import("storage.zig").Storage;
|
|
19
|
-
const Time = @import("time.zig").Time;
|
|
20
|
-
|
|
21
|
-
const vsr = @import("../vsr.zig");
|
|
22
|
-
pub const Replica = vsr.ReplicaType(StateMachine, MessageBus, Storage, Time);
|
|
23
|
-
pub const ReplicaFormat = vsr.ReplicaFormatType(Storage);
|
|
24
|
-
pub const Client = vsr.Client(StateMachine, MessageBus);
|
|
25
|
-
const SuperBlock = vsr.SuperBlockType(Storage);
|
|
26
|
-
const superblock_zone_size = @import("../vsr/superblock.zig").superblock_zone_size;
|
|
27
|
-
|
|
28
|
-
pub const ClusterOptions = struct {
|
|
29
|
-
cluster: u32,
|
|
30
|
-
replica_count: u8,
|
|
31
|
-
client_count: u8,
|
|
32
|
-
storage_size_limit: u64,
|
|
33
|
-
|
|
34
|
-
seed: u64,
|
|
35
|
-
on_change_state: fn (replica: *const Replica) void,
|
|
36
|
-
on_compact: fn (replica: *const Replica) void,
|
|
37
|
-
on_checkpoint: fn (replica: *const Replica) void,
|
|
38
|
-
|
|
39
|
-
network_options: NetworkOptions,
|
|
40
|
-
storage_options: Storage.Options,
|
|
41
|
-
health_options: HealthOptions,
|
|
42
|
-
state_machine_options: StateMachine.Options,
|
|
43
|
-
};
|
|
44
|
-
|
|
45
|
-
pub const HealthOptions = struct {
|
|
46
|
-
/// Probability per tick that a crash will occur.
|
|
47
|
-
crash_probability: f64,
|
|
48
|
-
/// Minimum duration of a crash.
|
|
49
|
-
crash_stability: u32,
|
|
50
|
-
/// Probability per tick that a crashed replica will recovery.
|
|
51
|
-
restart_probability: f64,
|
|
52
|
-
/// Minimum time a replica is up until it is crashed again.
|
|
53
|
-
restart_stability: u32,
|
|
54
|
-
};
|
|
55
|
-
|
|
56
|
-
pub const ReplicaHealth = union(enum) {
|
|
57
|
-
/// When >0, the replica cannot crash.
|
|
58
|
-
/// When =0, the replica may crash.
|
|
59
|
-
up: u32,
|
|
60
|
-
/// When >0, this is the ticks remaining until recovery is possible.
|
|
61
|
-
/// When =0, the replica may recover.
|
|
62
|
-
down: u32,
|
|
63
|
-
};
|
|
64
|
-
|
|
65
|
-
pub const Cluster = struct {
|
|
66
|
-
allocator: mem.Allocator,
|
|
67
|
-
options: ClusterOptions,
|
|
68
|
-
|
|
69
|
-
storages: []Storage,
|
|
70
|
-
pools: []MessagePool,
|
|
71
|
-
replicas: []Replica,
|
|
72
|
-
health: []ReplicaHealth,
|
|
73
|
-
|
|
74
|
-
network: Network,
|
|
75
|
-
|
|
76
|
-
pub fn create(allocator: mem.Allocator, prng: std.rand.Random, options: ClusterOptions) !*Cluster {
|
|
77
|
-
assert(options.replica_count > 0);
|
|
78
|
-
assert(options.client_count > 0);
|
|
79
|
-
assert(options.storage_size_limit % constants.sector_size == 0);
|
|
80
|
-
assert(options.storage_size_limit <= constants.storage_size_max);
|
|
81
|
-
assert(options.health_options.crash_probability < 1.0);
|
|
82
|
-
assert(options.health_options.crash_probability >= 0.0);
|
|
83
|
-
assert(options.health_options.restart_probability < 1.0);
|
|
84
|
-
assert(options.health_options.restart_probability >= 0.0);
|
|
85
|
-
|
|
86
|
-
const cluster = try allocator.create(Cluster);
|
|
87
|
-
errdefer allocator.destroy(cluster);
|
|
88
|
-
|
|
89
|
-
const storages = try allocator.alloc(Storage, options.replica_count);
|
|
90
|
-
errdefer allocator.free(storages);
|
|
91
|
-
|
|
92
|
-
var pools = try allocator.alloc(MessagePool, options.replica_count);
|
|
93
|
-
errdefer allocator.free(pools);
|
|
94
|
-
|
|
95
|
-
for (pools) |*pool, i| {
|
|
96
|
-
errdefer for (pools[0..i]) |*p| p.deinit(allocator);
|
|
97
|
-
pool.* = try MessagePool.init(allocator, .replica);
|
|
98
|
-
}
|
|
99
|
-
errdefer for (pools) |*pool| pool.deinit(allocator);
|
|
100
|
-
|
|
101
|
-
const replicas = try allocator.alloc(Replica, options.replica_count);
|
|
102
|
-
errdefer allocator.free(replicas);
|
|
103
|
-
|
|
104
|
-
const health = try allocator.alloc(ReplicaHealth, options.replica_count);
|
|
105
|
-
errdefer allocator.free(health);
|
|
106
|
-
mem.set(ReplicaHealth, health, .{ .up = 0 });
|
|
107
|
-
|
|
108
|
-
var network = try Network.init(
|
|
109
|
-
allocator,
|
|
110
|
-
options.replica_count,
|
|
111
|
-
options.client_count,
|
|
112
|
-
options.network_options,
|
|
113
|
-
);
|
|
114
|
-
errdefer network.deinit();
|
|
115
|
-
|
|
116
|
-
cluster.* = .{
|
|
117
|
-
.allocator = allocator,
|
|
118
|
-
.options = options,
|
|
119
|
-
.storages = storages,
|
|
120
|
-
.pools = pools,
|
|
121
|
-
.replicas = replicas,
|
|
122
|
-
.health = health,
|
|
123
|
-
.network = network,
|
|
124
|
-
};
|
|
125
|
-
|
|
126
|
-
var buffer: [constants.replicas_max]Storage.FaultyAreas = undefined;
|
|
127
|
-
const faulty_wal_areas = Storage.generate_faulty_wal_areas(
|
|
128
|
-
prng,
|
|
129
|
-
constants.journal_size_max,
|
|
130
|
-
options.replica_count,
|
|
131
|
-
&buffer,
|
|
132
|
-
);
|
|
133
|
-
assert(faulty_wal_areas.len == options.replica_count);
|
|
134
|
-
|
|
135
|
-
for (cluster.storages) |*storage, replica_index| {
|
|
136
|
-
var storage_options = options.storage_options;
|
|
137
|
-
storage_options.replica_index = @intCast(u8, replica_index);
|
|
138
|
-
storage_options.faulty_wal_areas = faulty_wal_areas[replica_index];
|
|
139
|
-
storage.* = try Storage.init(allocator, options.storage_size_limit, storage_options);
|
|
140
|
-
// Disable most faults at startup, so that the replicas don't get stuck in recovery mode.
|
|
141
|
-
storage.faulty = replica_index >= vsr.quorums(options.replica_count).view_change;
|
|
142
|
-
}
|
|
143
|
-
errdefer for (cluster.storages) |*storage| storage.deinit(allocator);
|
|
144
|
-
|
|
145
|
-
// Format each replica's storage (equivalent to "tigerbeetle format ...").
|
|
146
|
-
for (cluster.storages) |*storage, replica_index| {
|
|
147
|
-
var superblock = try SuperBlock.init(allocator, .{
|
|
148
|
-
.storage = storage,
|
|
149
|
-
.message_pool = &cluster.pools[replica_index],
|
|
150
|
-
.storage_size_limit = options.storage_size_limit,
|
|
151
|
-
});
|
|
152
|
-
defer superblock.deinit(allocator);
|
|
153
|
-
|
|
154
|
-
try vsr.format(
|
|
155
|
-
Storage,
|
|
156
|
-
allocator,
|
|
157
|
-
options.cluster,
|
|
158
|
-
@intCast(u8, replica_index),
|
|
159
|
-
storage,
|
|
160
|
-
&superblock,
|
|
161
|
-
);
|
|
162
|
-
}
|
|
163
|
-
|
|
164
|
-
for (cluster.replicas) |_, replica_index| {
|
|
165
|
-
try cluster.open_replica(@intCast(u8, replica_index), .{
|
|
166
|
-
.resolution = constants.tick_ms * std.time.ns_per_ms,
|
|
167
|
-
.offset_type = .linear,
|
|
168
|
-
.offset_coefficient_A = 0,
|
|
169
|
-
.offset_coefficient_B = 0,
|
|
170
|
-
});
|
|
171
|
-
}
|
|
172
|
-
errdefer for (cluster.replicas) |*replica| replica.deinit(allocator);
|
|
173
|
-
|
|
174
|
-
return cluster;
|
|
175
|
-
}
|
|
176
|
-
|
|
177
|
-
pub fn destroy(cluster: *Cluster) void {
|
|
178
|
-
cluster.network.deinit();
|
|
179
|
-
|
|
180
|
-
for (cluster.replicas) |*replica| replica.deinit(cluster.allocator);
|
|
181
|
-
cluster.allocator.free(cluster.replicas);
|
|
182
|
-
cluster.allocator.free(cluster.health);
|
|
183
|
-
for (cluster.pools) |*pool| pool.deinit(cluster.allocator);
|
|
184
|
-
cluster.allocator.free(cluster.pools);
|
|
185
|
-
|
|
186
|
-
for (cluster.storages) |*storage| storage.deinit(cluster.allocator);
|
|
187
|
-
cluster.allocator.free(cluster.storages);
|
|
188
|
-
|
|
189
|
-
cluster.allocator.destroy(cluster);
|
|
190
|
-
}
|
|
191
|
-
|
|
192
|
-
/// Reset a replica to its initial state, simulating a random crash/panic.
|
|
193
|
-
/// Leave the persistent storage untouched, and leave any currently
|
|
194
|
-
/// inflight messages to/from the replica in the network.
|
|
195
|
-
///
|
|
196
|
-
/// Returns whether the replica was crashed.
|
|
197
|
-
pub fn crash_replica(cluster: *Cluster, replica_index: u8) !bool {
|
|
198
|
-
const replica = &cluster.replicas[replica_index];
|
|
199
|
-
if (replica.op == 0) {
|
|
200
|
-
// Only crash when `replica.op > 0` — an empty WAL would skip recovery after a crash.
|
|
201
|
-
return false;
|
|
202
|
-
}
|
|
203
|
-
|
|
204
|
-
// TODO Remove this workaround when VSR recovery protocol is disabled.
|
|
205
|
-
for (replica.journal.prepare_inhabited) |inhabited, i| {
|
|
206
|
-
if (i == 0) {
|
|
207
|
-
// Ignore the root header.
|
|
208
|
-
} else {
|
|
209
|
-
if (inhabited) break;
|
|
210
|
-
}
|
|
211
|
-
} else {
|
|
212
|
-
// Only crash when at least one header has been written to the WAL.
|
|
213
|
-
// An empty WAL would skip recovery after a crash.
|
|
214
|
-
return false;
|
|
215
|
-
}
|
|
216
|
-
|
|
217
|
-
// Ensure that the cluster can eventually recover without this replica.
|
|
218
|
-
// Verify that each op is recoverable by the current healthy cluster (minus the replica we
|
|
219
|
-
// are trying to crash).
|
|
220
|
-
// TODO Remove this workaround when VSR recovery protocol is disabled.
|
|
221
|
-
if (cluster.options.replica_count != 1) {
|
|
222
|
-
var parent: u128 = undefined;
|
|
223
|
-
const cluster_op_max = op_max: {
|
|
224
|
-
var v: ?u32 = null;
|
|
225
|
-
var op_max: ?u64 = null;
|
|
226
|
-
for (cluster.replicas) |other_replica, i| {
|
|
227
|
-
if (cluster.health[i] == .down) continue;
|
|
228
|
-
if (other_replica.status == .recovering) continue;
|
|
229
|
-
|
|
230
|
-
if (v == null or other_replica.view_normal > v.? or
|
|
231
|
-
(other_replica.view_normal == v.? and other_replica.op > op_max.?))
|
|
232
|
-
{
|
|
233
|
-
v = other_replica.view_normal;
|
|
234
|
-
op_max = other_replica.op;
|
|
235
|
-
parent = other_replica.journal.header_with_op(op_max.?).?.checksum;
|
|
236
|
-
}
|
|
237
|
-
}
|
|
238
|
-
break :op_max op_max.?;
|
|
239
|
-
};
|
|
240
|
-
|
|
241
|
-
// This whole workaround doesn't handle log wrapping correctly.
|
|
242
|
-
// If the log has wrapped, don't crash the replica.
|
|
243
|
-
if (cluster_op_max >= constants.journal_slot_count) {
|
|
244
|
-
return false;
|
|
245
|
-
}
|
|
246
|
-
|
|
247
|
-
var op: u64 = cluster_op_max + 1;
|
|
248
|
-
while (op > 0) {
|
|
249
|
-
op -= 1;
|
|
250
|
-
|
|
251
|
-
var cluster_op_known: bool = false;
|
|
252
|
-
for (cluster.replicas) |other_replica, i| {
|
|
253
|
-
// Ignore replicas that are ineligible to assist recovery.
|
|
254
|
-
if (replica_index == i) continue;
|
|
255
|
-
if (cluster.health[i] == .down) continue;
|
|
256
|
-
if (other_replica.status == .recovering) continue;
|
|
257
|
-
|
|
258
|
-
if (other_replica.journal.header_with_op_and_checksum(op, parent)) |header| {
|
|
259
|
-
parent = header.parent;
|
|
260
|
-
if (!other_replica.journal.dirty.bit(.{ .index = op })) {
|
|
261
|
-
// The op is recoverable if this replica crashes.
|
|
262
|
-
break;
|
|
263
|
-
}
|
|
264
|
-
cluster_op_known = true;
|
|
265
|
-
}
|
|
266
|
-
} else {
|
|
267
|
-
if (op == cluster_op_max and !cluster_op_known) {
|
|
268
|
-
// The replica can crash; it will be able to truncate the last op.
|
|
269
|
-
} else {
|
|
270
|
-
// The op isn't recoverable if this replica is crashed.
|
|
271
|
-
return false;
|
|
272
|
-
}
|
|
273
|
-
}
|
|
274
|
-
}
|
|
275
|
-
|
|
276
|
-
// We can't crash this replica because without it we won't be able to repair a broken
|
|
277
|
-
// hash chain.
|
|
278
|
-
if (parent != 0) return false;
|
|
279
|
-
}
|
|
280
|
-
|
|
281
|
-
cluster.health[replica_index] = .{ .down = cluster.options.health_options.crash_stability };
|
|
282
|
-
|
|
283
|
-
// Reset the storage before the replica so that pending writes can (partially) finish.
|
|
284
|
-
cluster.storages[replica_index].reset();
|
|
285
|
-
const replica_time = replica.time;
|
|
286
|
-
replica.deinit(cluster.allocator);
|
|
287
|
-
|
|
288
|
-
// Ensure that none of the replica's messages leaked when it was deinitialized.
|
|
289
|
-
var messages_in_pool: usize = 0;
|
|
290
|
-
const message_bus = cluster.network.get_message_bus(.{ .replica = replica_index });
|
|
291
|
-
{
|
|
292
|
-
var it = message_bus.pool.free_list;
|
|
293
|
-
while (it) |message| : (it = message.next) messages_in_pool += 1;
|
|
294
|
-
}
|
|
295
|
-
assert(messages_in_pool == message_pool.messages_max_replica);
|
|
296
|
-
|
|
297
|
-
// Logically it would make more sense to run this during restart, not immediately following
|
|
298
|
-
// the crash. But having it here allows the replica's MessageBus to initialize and begin
|
|
299
|
-
// queueing packets.
|
|
300
|
-
//
|
|
301
|
-
// Pass the old replica's Time through to the new replica. It will continue to be tick
|
|
302
|
-
// while the replica is crashed, to ensure the clocks don't desyncronize too far to recover.
|
|
303
|
-
try cluster.open_replica(replica_index, replica_time);
|
|
304
|
-
|
|
305
|
-
return true;
|
|
306
|
-
}
|
|
307
|
-
|
|
308
|
-
/// Returns the number of replicas capable of helping a crashed node recover (i.e. with
|
|
309
|
-
/// replica.status=normal).
|
|
310
|
-
pub fn replica_normal_count(cluster: *Cluster) u8 {
|
|
311
|
-
var count: u8 = 0;
|
|
312
|
-
for (cluster.replicas) |*replica| {
|
|
313
|
-
if (replica.status == .normal) count += 1;
|
|
314
|
-
}
|
|
315
|
-
return count;
|
|
316
|
-
}
|
|
317
|
-
|
|
318
|
-
pub fn replica_up_count(cluster: *const Cluster) u8 {
|
|
319
|
-
var count: u8 = 0;
|
|
320
|
-
for (cluster.health) |health| {
|
|
321
|
-
if (health == .up) {
|
|
322
|
-
count += 1;
|
|
323
|
-
}
|
|
324
|
-
}
|
|
325
|
-
return count;
|
|
326
|
-
}
|
|
327
|
-
|
|
328
|
-
fn open_replica(cluster: *Cluster, replica_index: u8, time: Time) !void {
|
|
329
|
-
var replica = &cluster.replicas[replica_index];
|
|
330
|
-
try replica.open(
|
|
331
|
-
cluster.allocator,
|
|
332
|
-
.{
|
|
333
|
-
.replica_count = @intCast(u8, cluster.replicas.len),
|
|
334
|
-
.storage = &cluster.storages[replica_index],
|
|
335
|
-
// TODO Test restarting with a higher storage limit.
|
|
336
|
-
.storage_size_limit = cluster.options.storage_size_limit,
|
|
337
|
-
.message_pool = &cluster.pools[replica_index],
|
|
338
|
-
.time = time,
|
|
339
|
-
.state_machine_options = cluster.options.state_machine_options,
|
|
340
|
-
.message_bus_options = .{ .network = &cluster.network },
|
|
341
|
-
},
|
|
342
|
-
);
|
|
343
|
-
assert(replica.cluster == cluster.options.cluster);
|
|
344
|
-
assert(replica.replica == replica_index);
|
|
345
|
-
assert(replica.replica_count == cluster.replicas.len);
|
|
346
|
-
|
|
347
|
-
replica.on_change_state = cluster.options.on_change_state;
|
|
348
|
-
replica.on_compact = cluster.options.on_compact;
|
|
349
|
-
replica.on_checkpoint = cluster.options.on_checkpoint;
|
|
350
|
-
cluster.network.link(replica.message_bus.process, &replica.message_bus);
|
|
351
|
-
}
|
|
352
|
-
};
|