tigerbeetle-node 0.5.2 → 0.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +97 -78
- package/dist/benchmark.js +96 -94
- package/dist/benchmark.js.map +1 -1
- package/dist/index.d.ts +82 -82
- package/dist/index.js +74 -93
- package/dist/index.js.map +1 -1
- package/dist/test.js +134 -111
- package/dist/test.js.map +1 -1
- package/package.json +3 -2
- package/scripts/download_node_headers.sh +3 -1
- package/src/benchmark.ts +114 -118
- package/src/index.ts +102 -111
- package/src/node.zig +55 -63
- package/src/test.ts +146 -125
- package/src/tigerbeetle/scripts/benchmark.bat +46 -0
- package/src/tigerbeetle/scripts/benchmark.sh +5 -0
- package/src/tigerbeetle/scripts/install_zig.bat +109 -109
- package/src/tigerbeetle/scripts/install_zig.sh +8 -4
- package/src/tigerbeetle/scripts/vopr.bat +47 -47
- package/src/tigerbeetle/scripts/vopr.sh +2 -2
- package/src/tigerbeetle/src/benchmark.zig +65 -102
- package/src/tigerbeetle/src/cli.zig +39 -18
- package/src/tigerbeetle/src/config.zig +44 -25
- package/src/tigerbeetle/src/demo.zig +2 -15
- package/src/tigerbeetle/src/demo_01_create_accounts.zig +10 -10
- package/src/tigerbeetle/src/demo_03_create_transfers.zig +5 -3
- package/src/tigerbeetle/src/{demo_04_create_transfers_two_phase_commit.zig → demo_04_create_pending_transfers.zig} +18 -12
- package/src/tigerbeetle/src/demo_05_post_pending_transfers.zig +37 -0
- package/src/tigerbeetle/src/demo_06_void_pending_transfers.zig +24 -0
- package/src/tigerbeetle/src/demo_07_lookup_transfers.zig +1 -1
- package/src/tigerbeetle/src/io/benchmark.zig +24 -49
- package/src/tigerbeetle/src/io/darwin.zig +175 -44
- package/src/tigerbeetle/src/io/linux.zig +177 -72
- package/src/tigerbeetle/src/io/test.zig +61 -39
- package/src/tigerbeetle/src/io/windows.zig +1161 -0
- package/src/tigerbeetle/src/io.zig +2 -0
- package/src/tigerbeetle/src/main.zig +31 -10
- package/src/tigerbeetle/src/message_bus.zig +49 -61
- package/src/tigerbeetle/src/message_pool.zig +66 -57
- package/src/tigerbeetle/src/ring_buffer.zig +55 -3
- package/src/tigerbeetle/src/simulator.zig +108 -12
- package/src/tigerbeetle/src/state_machine.zig +1813 -816
- package/src/tigerbeetle/src/storage.zig +0 -230
- package/src/tigerbeetle/src/test/cluster.zig +168 -38
- package/src/tigerbeetle/src/test/message_bus.zig +4 -3
- package/src/tigerbeetle/src/test/network.zig +13 -16
- package/src/tigerbeetle/src/test/packet_simulator.zig +14 -1
- package/src/tigerbeetle/src/test/state_checker.zig +6 -3
- package/src/tigerbeetle/src/test/state_machine.zig +8 -7
- package/src/tigerbeetle/src/test/storage.zig +99 -40
- package/src/tigerbeetle/src/tigerbeetle.zig +108 -101
- package/src/tigerbeetle/src/time.zig +58 -11
- package/src/tigerbeetle/src/vsr/client.zig +18 -32
- package/src/tigerbeetle/src/vsr/clock.zig +1 -1
- package/src/tigerbeetle/src/vsr/journal.zig +1388 -464
- package/src/tigerbeetle/src/vsr/replica.zig +1340 -576
- package/src/tigerbeetle/src/vsr.zig +452 -40
- package/src/translate.zig +10 -0
- package/src/tigerbeetle/src/demo_05_accept_transfers.zig +0 -23
- package/src/tigerbeetle/src/demo_06_reject_transfers.zig +0 -17
- package/src/tigerbeetle/src/format_test.zig +0 -69
|
@@ -12,42 +12,48 @@ pub fn main() !void {
|
|
|
12
12
|
.debit_account_id = 1,
|
|
13
13
|
.credit_account_id = 2,
|
|
14
14
|
.user_data = 0,
|
|
15
|
-
.reserved =
|
|
15
|
+
.reserved = 0,
|
|
16
|
+
.pending_id = 0,
|
|
16
17
|
.timeout = std.time.ns_per_hour,
|
|
17
|
-
.
|
|
18
|
+
.ledger = 710,
|
|
19
|
+
.code = 1,
|
|
18
20
|
.flags = .{
|
|
19
|
-
.
|
|
21
|
+
.pending = true, // Set this transfer to be two-phase.
|
|
20
22
|
},
|
|
21
|
-
.amount =
|
|
23
|
+
.amount = 8000,
|
|
22
24
|
},
|
|
23
25
|
Transfer{
|
|
24
26
|
.id = 1002,
|
|
25
27
|
.debit_account_id = 1,
|
|
26
28
|
.credit_account_id = 2,
|
|
27
29
|
.user_data = 0,
|
|
28
|
-
.reserved =
|
|
30
|
+
.reserved = 0,
|
|
31
|
+
.pending_id = 0,
|
|
29
32
|
.timeout = std.time.ns_per_hour,
|
|
30
|
-
.
|
|
33
|
+
.ledger = 710,
|
|
34
|
+
.code = 1,
|
|
31
35
|
.flags = .{
|
|
32
|
-
.
|
|
36
|
+
.pending = true, // Set this transfer to be two-phase.
|
|
33
37
|
.linked = true, // Link this transfer with the next transfer 1003.
|
|
34
38
|
},
|
|
35
|
-
.amount =
|
|
39
|
+
.amount = 500,
|
|
36
40
|
},
|
|
37
41
|
Transfer{
|
|
38
42
|
.id = 1003,
|
|
39
43
|
.debit_account_id = 1,
|
|
40
44
|
.credit_account_id = 2,
|
|
41
45
|
.user_data = 0,
|
|
42
|
-
.reserved =
|
|
46
|
+
.reserved = 0,
|
|
47
|
+
.pending_id = 0,
|
|
43
48
|
.timeout = std.time.ns_per_hour,
|
|
44
|
-
.
|
|
49
|
+
.ledger = 710,
|
|
50
|
+
.code = 1,
|
|
45
51
|
.flags = .{
|
|
46
|
-
.
|
|
52
|
+
.pending = true, // Set this transfer to be two-phase.
|
|
47
53
|
// The last transfer in a linked chain has .linked set to false to close the chain.
|
|
48
54
|
// This transfer will succeed or fail together with transfer 1002 above.
|
|
49
55
|
},
|
|
50
|
-
.amount =
|
|
56
|
+
.amount = 500,
|
|
51
57
|
},
|
|
52
58
|
};
|
|
53
59
|
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
const tb = @import("tigerbeetle.zig");
|
|
2
|
+
const demo = @import("demo.zig");
|
|
3
|
+
|
|
4
|
+
const Transfer = tb.Transfer;
|
|
5
|
+
|
|
6
|
+
pub fn main() !void {
|
|
7
|
+
const commits = [_]Transfer{
|
|
8
|
+
Transfer{
|
|
9
|
+
.id = 2001,
|
|
10
|
+
.debit_account_id = 1,
|
|
11
|
+
.credit_account_id = 2,
|
|
12
|
+
.user_data = 0,
|
|
13
|
+
.reserved = 0,
|
|
14
|
+
.pending_id = 1001,
|
|
15
|
+
.timeout = 0,
|
|
16
|
+
.ledger = 0,// Honor original Transfer ledger.
|
|
17
|
+
.code = 0,// Honor original Transfer code.
|
|
18
|
+
.flags = .{ .post_pending_transfer = true }, // Post the pending two-phase transfer.
|
|
19
|
+
.amount = 0, // Inherit the amount from the pending transfer.
|
|
20
|
+
},
|
|
21
|
+
Transfer{
|
|
22
|
+
.id = 2002,
|
|
23
|
+
.debit_account_id = 1,
|
|
24
|
+
.credit_account_id = 2,
|
|
25
|
+
.user_data = 0,
|
|
26
|
+
.reserved = 0,
|
|
27
|
+
.pending_id = 1002,
|
|
28
|
+
.timeout = 0,
|
|
29
|
+
.ledger = 0,
|
|
30
|
+
.code = 0,
|
|
31
|
+
.flags = .{ .post_pending_transfer = true }, // Post the pending two-phase transfer.
|
|
32
|
+
.amount = 0, // Inherit the amount from the pending transfer.
|
|
33
|
+
},
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
try demo.request(.create_transfers, commits, demo.on_create_transfers);
|
|
37
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
const tb = @import("tigerbeetle.zig");
|
|
2
|
+
const demo = @import("demo.zig");
|
|
3
|
+
|
|
4
|
+
const Transfer = tb.Transfer;
|
|
5
|
+
|
|
6
|
+
pub fn main() !void {
|
|
7
|
+
const commits = [_]Transfer{
|
|
8
|
+
Transfer{
|
|
9
|
+
.id = 2003,
|
|
10
|
+
.debit_account_id = 1,
|
|
11
|
+
.credit_account_id = 2,
|
|
12
|
+
.user_data = 0,
|
|
13
|
+
.reserved = 0,
|
|
14
|
+
.pending_id = 1003,
|
|
15
|
+
.timeout = 0,
|
|
16
|
+
.ledger = 0,
|
|
17
|
+
.code = 0,
|
|
18
|
+
.flags = .{ .void_pending_transfer = true },
|
|
19
|
+
.amount = 0,
|
|
20
|
+
},
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
try demo.request(.create_transfers, commits, demo.on_create_transfers);
|
|
24
|
+
}
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
const std = @import("std");
|
|
2
2
|
const os = std.os;
|
|
3
3
|
const assert = std.debug.assert;
|
|
4
|
+
const log = std.log.scoped(.io_benchmark);
|
|
4
5
|
|
|
5
6
|
const Time = @import("../time.zig").Time;
|
|
6
7
|
const IO = @import("../io.zig").IO;
|
|
@@ -14,7 +15,7 @@ const run_duration = 1 * std.time.ns_per_s;
|
|
|
14
15
|
|
|
15
16
|
pub fn main() !void {
|
|
16
17
|
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
|
17
|
-
const allocator =
|
|
18
|
+
const allocator = gpa.allocator();
|
|
18
19
|
defer {
|
|
19
20
|
const leaks = gpa.deinit();
|
|
20
21
|
assert(!leaks);
|
|
@@ -24,37 +25,34 @@ pub fn main() !void {
|
|
|
24
25
|
defer allocator.free(buffer);
|
|
25
26
|
std.mem.set(u8, buffer, 0);
|
|
26
27
|
|
|
27
|
-
var timer = Time{};
|
|
28
|
-
const started = timer.monotonic();
|
|
29
28
|
var self = Context{
|
|
30
29
|
.io = try IO.init(32, 0),
|
|
31
|
-
.timer = &timer,
|
|
32
|
-
.started = started,
|
|
33
|
-
.current = started,
|
|
34
30
|
.tx = .{ .buffer = buffer[0 * buffer_size ..][0..buffer_size] },
|
|
35
31
|
.rx = .{ .buffer = buffer[1 * buffer_size ..][0..buffer_size] },
|
|
36
32
|
};
|
|
33
|
+
defer self.io.deinit();
|
|
37
34
|
|
|
35
|
+
var timer = Time{};
|
|
36
|
+
const started = timer.monotonic();
|
|
38
37
|
defer {
|
|
39
|
-
|
|
40
|
-
const elapsed_ns = self.current - started;
|
|
38
|
+
const elapsed_ns = timer.monotonic() - started;
|
|
41
39
|
const transferred_mb = @intToFloat(f64, self.transferred) / 1024 / 1024;
|
|
42
40
|
|
|
43
|
-
|
|
41
|
+
log.info("took {}ms @ {d:.2} MB/s\n", .{
|
|
44
42
|
elapsed_ns / std.time.ns_per_ms,
|
|
45
43
|
transferred_mb / (@intToFloat(f64, elapsed_ns) / std.time.ns_per_s),
|
|
46
44
|
});
|
|
47
45
|
}
|
|
48
46
|
|
|
49
47
|
// Setup the server socket
|
|
50
|
-
self.server.fd = try
|
|
48
|
+
self.server.fd = try self.io.open_socket(os.AF.INET, os.SOCK.STREAM, os.IPPROTO.TCP);
|
|
51
49
|
defer os.closeSocket(self.server.fd);
|
|
52
50
|
|
|
53
51
|
const address = try std.net.Address.parseIp4("127.0.0.1", 3131);
|
|
54
52
|
try os.setsockopt(
|
|
55
53
|
self.server.fd,
|
|
56
|
-
os.
|
|
57
|
-
os.
|
|
54
|
+
os.SOL.SOCKET,
|
|
55
|
+
os.SO.REUSEADDR,
|
|
58
56
|
&std.mem.toBytes(@as(c_int, 1)),
|
|
59
57
|
);
|
|
60
58
|
try os.bind(self.server.fd, &address.any, address.getOsSockLen());
|
|
@@ -70,7 +68,7 @@ pub fn main() !void {
|
|
|
70
68
|
);
|
|
71
69
|
|
|
72
70
|
// Setup the client connection
|
|
73
|
-
self.tx.socket.fd = try
|
|
71
|
+
self.tx.socket.fd = try self.io.open_socket(os.AF.INET, os.SOCK.STREAM, os.IPPROTO.TCP);
|
|
74
72
|
defer os.closeSocket(self.tx.socket.fd);
|
|
75
73
|
|
|
76
74
|
self.io.connect(
|
|
@@ -82,22 +80,14 @@ pub fn main() !void {
|
|
|
82
80
|
address,
|
|
83
81
|
);
|
|
84
82
|
|
|
85
|
-
// Run the IO loop
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
while (self.is_running()) : (tick +%= 1) {
|
|
89
|
-
if (tick % 61 == 0) {
|
|
90
|
-
const timeout_ns = tick % (10 * std.time.ns_per_ms);
|
|
91
|
-
try self.io.run_for_ns(@intCast(u63, timeout_ns));
|
|
92
|
-
} else {
|
|
93
|
-
try self.io.tick();
|
|
94
|
-
}
|
|
95
|
-
}
|
|
83
|
+
// Run the IO loop for the duration of the benchmark
|
|
84
|
+
log.info("running for {}", .{std.fmt.fmtDuration(run_duration)});
|
|
85
|
+
try self.io.run_for_ns(run_duration);
|
|
96
86
|
|
|
97
87
|
// Assert that everything is connected
|
|
98
|
-
assert(self.server.fd !=
|
|
99
|
-
assert(self.tx.socket.fd !=
|
|
100
|
-
assert(self.rx.socket.fd !=
|
|
88
|
+
assert(self.server.fd != IO.INVALID_SOCKET);
|
|
89
|
+
assert(self.tx.socket.fd != IO.INVALID_SOCKET);
|
|
90
|
+
assert(self.rx.socket.fd != IO.INVALID_SOCKET);
|
|
101
91
|
|
|
102
92
|
// Close the accepted client socket.
|
|
103
93
|
// The actual client socket + server socket are closed by defer
|
|
@@ -108,14 +98,11 @@ const Context = struct {
|
|
|
108
98
|
io: IO,
|
|
109
99
|
tx: Pipe,
|
|
110
100
|
rx: Pipe,
|
|
111
|
-
timer: *Time,
|
|
112
|
-
started: u64,
|
|
113
|
-
current: u64,
|
|
114
101
|
server: Socket = .{},
|
|
115
102
|
transferred: u64 = 0,
|
|
116
103
|
|
|
117
104
|
const Socket = struct {
|
|
118
|
-
fd: os.socket_t =
|
|
105
|
+
fd: os.socket_t = IO.INVALID_SOCKET,
|
|
119
106
|
completion: IO.Completion = undefined,
|
|
120
107
|
};
|
|
121
108
|
const Pipe = struct {
|
|
@@ -124,21 +111,12 @@ const Context = struct {
|
|
|
124
111
|
transferred: usize = 0,
|
|
125
112
|
};
|
|
126
113
|
|
|
127
|
-
fn is_running(self: Context) bool {
|
|
128
|
-
// Make sure that we're connected
|
|
129
|
-
if (self.rx.socket.fd == -1) return true;
|
|
130
|
-
|
|
131
|
-
// Make sure that we haven't run too long as configured
|
|
132
|
-
const elapsed = self.current - self.started;
|
|
133
|
-
return elapsed < run_duration;
|
|
134
|
-
}
|
|
135
|
-
|
|
136
114
|
fn on_accept(
|
|
137
115
|
self: *Context,
|
|
138
116
|
completion: *IO.Completion,
|
|
139
117
|
result: IO.AcceptError!os.socket_t,
|
|
140
118
|
) void {
|
|
141
|
-
assert(self.rx.socket.fd ==
|
|
119
|
+
assert(self.rx.socket.fd == IO.INVALID_SOCKET);
|
|
142
120
|
assert(&self.server.completion == completion);
|
|
143
121
|
self.rx.socket.fd = result catch |err| std.debug.panic("accept error {}", .{err});
|
|
144
122
|
|
|
@@ -152,7 +130,9 @@ const Context = struct {
|
|
|
152
130
|
completion: *IO.Completion,
|
|
153
131
|
result: IO.ConnectError!void,
|
|
154
132
|
) void {
|
|
155
|
-
|
|
133
|
+
_ = result catch unreachable;
|
|
134
|
+
|
|
135
|
+
assert(self.tx.socket.fd != IO.INVALID_SOCKET);
|
|
156
136
|
assert(&self.tx.socket.completion == completion);
|
|
157
137
|
|
|
158
138
|
// Start sending data to the server's accepted client
|
|
@@ -161,8 +141,8 @@ const Context = struct {
|
|
|
161
141
|
}
|
|
162
142
|
|
|
163
143
|
const TransferType = enum {
|
|
164
|
-
read
|
|
165
|
-
write
|
|
144
|
+
read,
|
|
145
|
+
write,
|
|
166
146
|
};
|
|
167
147
|
|
|
168
148
|
fn do_transfer(
|
|
@@ -188,11 +168,6 @@ const Context = struct {
|
|
|
188
168
|
assert(bytes <= buffer_size);
|
|
189
169
|
self.transferred += bytes;
|
|
190
170
|
|
|
191
|
-
// Check in with the benchmark timer to stop sending/receiving data
|
|
192
|
-
self.current = self.timer.monotonic();
|
|
193
|
-
if (!self.is_running())
|
|
194
|
-
return;
|
|
195
|
-
|
|
196
171
|
// Select which connection (tx or rx) depending on the type of transfer
|
|
197
172
|
const pipe = &@field(self, pipe_name);
|
|
198
173
|
pipe.transferred += bytes;
|
|
@@ -2,7 +2,9 @@ const std = @import("std");
|
|
|
2
2
|
const os = std.os;
|
|
3
3
|
const mem = std.mem;
|
|
4
4
|
const assert = std.debug.assert;
|
|
5
|
+
const log = std.log.scoped(.io);
|
|
5
6
|
|
|
7
|
+
const config = @import("../config.zig");
|
|
6
8
|
const FIFO = @import("../fifo.zig").FIFO;
|
|
7
9
|
const Time = @import("../time.zig").Time;
|
|
8
10
|
const buffer_limit = @import("../io.zig").buffer_limit;
|
|
@@ -204,9 +206,6 @@ pub const IO = struct {
|
|
|
204
206
|
address: std.net.Address,
|
|
205
207
|
initiated: bool,
|
|
206
208
|
},
|
|
207
|
-
fsync: struct {
|
|
208
|
-
fd: os.fd_t,
|
|
209
|
-
},
|
|
210
209
|
read: struct {
|
|
211
210
|
fd: os.fd_t,
|
|
212
211
|
buf: [*]u8,
|
|
@@ -248,7 +247,7 @@ pub const IO = struct {
|
|
|
248
247
|
fn onComplete(io: *IO, _completion: *Completion) void {
|
|
249
248
|
// Perform the actual operaton
|
|
250
249
|
const op_data = &@field(_completion.operation, @tagName(operation_tag));
|
|
251
|
-
const result = OperationImpl.
|
|
250
|
+
const result = OperationImpl.do_operation(op_data);
|
|
252
251
|
|
|
253
252
|
// Requeue onto io_pending if error.WouldBlock
|
|
254
253
|
switch (operation_tag) {
|
|
@@ -310,7 +309,7 @@ pub const IO = struct {
|
|
|
310
309
|
.socket = socket,
|
|
311
310
|
},
|
|
312
311
|
struct {
|
|
313
|
-
fn
|
|
312
|
+
fn do_operation(op: anytype) AcceptError!os.socket_t {
|
|
314
313
|
const fd = try os.accept(
|
|
315
314
|
op.socket,
|
|
316
315
|
null,
|
|
@@ -368,7 +367,7 @@ pub const IO = struct {
|
|
|
368
367
|
.fd = fd,
|
|
369
368
|
},
|
|
370
369
|
struct {
|
|
371
|
-
fn
|
|
370
|
+
fn do_operation(op: anytype) CloseError!void {
|
|
372
371
|
return switch (os.errno(os.system.close(op.fd))) {
|
|
373
372
|
.SUCCESS => {},
|
|
374
373
|
.BADF => error.FileDescriptorInvalid,
|
|
@@ -407,7 +406,7 @@ pub const IO = struct {
|
|
|
407
406
|
.initiated = false,
|
|
408
407
|
},
|
|
409
408
|
struct {
|
|
410
|
-
fn
|
|
409
|
+
fn do_operation(op: anytype) ConnectError!void {
|
|
411
410
|
// Don't call connect after being rescheduled by io_pending as it gives EISCONN.
|
|
412
411
|
// Instead, check the socket error to see if has been connected successfully.
|
|
413
412
|
const result = switch (op.initiated) {
|
|
@@ -422,36 +421,6 @@ pub const IO = struct {
|
|
|
422
421
|
);
|
|
423
422
|
}
|
|
424
423
|
|
|
425
|
-
pub const FsyncError = os.SyncError;
|
|
426
|
-
|
|
427
|
-
pub fn fsync(
|
|
428
|
-
self: *IO,
|
|
429
|
-
comptime Context: type,
|
|
430
|
-
context: Context,
|
|
431
|
-
comptime callback: fn (
|
|
432
|
-
context: Context,
|
|
433
|
-
completion: *Completion,
|
|
434
|
-
result: FsyncError!void,
|
|
435
|
-
) void,
|
|
436
|
-
completion: *Completion,
|
|
437
|
-
fd: os.fd_t,
|
|
438
|
-
) void {
|
|
439
|
-
self.submit(
|
|
440
|
-
context,
|
|
441
|
-
callback,
|
|
442
|
-
completion,
|
|
443
|
-
.fsync,
|
|
444
|
-
.{
|
|
445
|
-
.fd = fd,
|
|
446
|
-
},
|
|
447
|
-
struct {
|
|
448
|
-
fn doOperation(op: anytype) FsyncError!void {
|
|
449
|
-
_ = os.fcntl(op.fd, os.F.FULLFSYNC, 1) catch return os.fsync(op.fd);
|
|
450
|
-
}
|
|
451
|
-
},
|
|
452
|
-
);
|
|
453
|
-
}
|
|
454
|
-
|
|
455
424
|
pub const ReadError = error{
|
|
456
425
|
WouldBlock,
|
|
457
426
|
NotOpenForReading,
|
|
@@ -489,7 +458,7 @@ pub const IO = struct {
|
|
|
489
458
|
.offset = offset,
|
|
490
459
|
},
|
|
491
460
|
struct {
|
|
492
|
-
fn
|
|
461
|
+
fn do_operation(op: anytype) ReadError!usize {
|
|
493
462
|
while (true) {
|
|
494
463
|
const rc = os.system.pread(
|
|
495
464
|
op.fd,
|
|
@@ -546,7 +515,7 @@ pub const IO = struct {
|
|
|
546
515
|
.len = @intCast(u32, buffer_limit(buffer.len)),
|
|
547
516
|
},
|
|
548
517
|
struct {
|
|
549
|
-
fn
|
|
518
|
+
fn do_operation(op: anytype) RecvError!usize {
|
|
550
519
|
return os.recv(op.socket, op.buf[0..op.len], 0);
|
|
551
520
|
}
|
|
552
521
|
},
|
|
@@ -579,7 +548,7 @@ pub const IO = struct {
|
|
|
579
548
|
.len = @intCast(u32, buffer_limit(buffer.len)),
|
|
580
549
|
},
|
|
581
550
|
struct {
|
|
582
|
-
fn
|
|
551
|
+
fn do_operation(op: anytype) SendError!usize {
|
|
583
552
|
return os.send(op.socket, op.buf[0..op.len], 0);
|
|
584
553
|
}
|
|
585
554
|
},
|
|
@@ -609,7 +578,7 @@ pub const IO = struct {
|
|
|
609
578
|
.expires = self.time.monotonic() + nanoseconds,
|
|
610
579
|
},
|
|
611
580
|
struct {
|
|
612
|
-
fn
|
|
581
|
+
fn do_operation(_: anytype) TimeoutError!void {
|
|
613
582
|
return; // timeouts don't have errors for now
|
|
614
583
|
}
|
|
615
584
|
},
|
|
@@ -644,19 +613,181 @@ pub const IO = struct {
|
|
|
644
613
|
.offset = offset,
|
|
645
614
|
},
|
|
646
615
|
struct {
|
|
647
|
-
fn
|
|
616
|
+
fn do_operation(op: anytype) WriteError!usize {
|
|
648
617
|
return os.pwrite(op.fd, op.buf[0..op.len], op.offset);
|
|
649
618
|
}
|
|
650
619
|
},
|
|
651
620
|
);
|
|
652
621
|
}
|
|
653
622
|
|
|
654
|
-
pub
|
|
623
|
+
pub const INVALID_SOCKET = -1;
|
|
624
|
+
|
|
625
|
+
/// Creates a socket that can be used for async operations with the IO instance.
|
|
626
|
+
pub fn open_socket(self: *IO, family: u32, sock_type: u32, protocol: u32) !os.socket_t {
|
|
627
|
+
_ = self;
|
|
628
|
+
|
|
655
629
|
const fd = try os.socket(family, sock_type | os.SOCK.NONBLOCK, protocol);
|
|
656
|
-
errdefer os.
|
|
630
|
+
errdefer os.closeSocket(fd);
|
|
657
631
|
|
|
658
632
|
// darwin doesn't support os.MSG_NOSIGNAL, but instead a socket option to avoid SIGPIPE.
|
|
659
633
|
try os.setsockopt(fd, os.SOL.SOCKET, os.SO.NOSIGPIPE, &mem.toBytes(@as(c_int, 1)));
|
|
660
634
|
return fd;
|
|
661
635
|
}
|
|
636
|
+
|
|
637
|
+
/// Opens a directory with read only access.
|
|
638
|
+
pub fn open_dir(dir_path: [:0]const u8) !os.fd_t {
|
|
639
|
+
return os.openZ(dir_path, os.O.CLOEXEC | os.O.RDONLY, 0);
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
/// Opens or creates a journal file:
|
|
643
|
+
/// - For reading and writing.
|
|
644
|
+
/// - For Direct I/O (required on darwin).
|
|
645
|
+
/// - Obtains an advisory exclusive lock to the file descriptor.
|
|
646
|
+
/// - Allocates the file contiguously on disk if this is supported by the file system.
|
|
647
|
+
/// - Ensures that the file data (and file inode in the parent directory) is durable on disk.
|
|
648
|
+
/// The caller is responsible for ensuring that the parent directory inode is durable.
|
|
649
|
+
/// - Verifies that the file size matches the expected file size before returning.
|
|
650
|
+
pub fn open_file(
|
|
651
|
+
self: *IO,
|
|
652
|
+
dir_fd: os.fd_t,
|
|
653
|
+
relative_path: [:0]const u8,
|
|
654
|
+
size: u64,
|
|
655
|
+
must_create: bool,
|
|
656
|
+
) !os.fd_t {
|
|
657
|
+
_ = self;
|
|
658
|
+
|
|
659
|
+
assert(relative_path.len > 0);
|
|
660
|
+
assert(size >= config.sector_size);
|
|
661
|
+
assert(size % config.sector_size == 0);
|
|
662
|
+
|
|
663
|
+
// TODO Use O_EXCL when opening as a block device to obtain a mandatory exclusive lock.
|
|
664
|
+
// This is much stronger than an advisory exclusive lock, and is required on some platforms.
|
|
665
|
+
|
|
666
|
+
// Opening with O_DSYNC is essential for both durability and correctness.
|
|
667
|
+
// O_DSYNC enables us to omit fsync() calls in the data plane, since we sync to the disk on every write.
|
|
668
|
+
var flags: u32 = os.O.CLOEXEC | os.O.RDWR | os.O.DSYNC;
|
|
669
|
+
var mode: os.mode_t = 0;
|
|
670
|
+
|
|
671
|
+
// TODO Document this and investigate whether this is in fact correct to set here.
|
|
672
|
+
if (@hasDecl(os.O, "LARGEFILE")) flags |= os.O.LARGEFILE;
|
|
673
|
+
|
|
674
|
+
if (must_create) {
|
|
675
|
+
log.info("creating \"{s}\"...", .{relative_path});
|
|
676
|
+
flags |= os.O.CREAT;
|
|
677
|
+
flags |= os.O.EXCL;
|
|
678
|
+
mode = 0o666;
|
|
679
|
+
} else {
|
|
680
|
+
log.info("opening \"{s}\"...", .{relative_path});
|
|
681
|
+
}
|
|
682
|
+
|
|
683
|
+
// This is critical as we rely on O_DSYNC for fsync() whenever we write to the file:
|
|
684
|
+
assert((flags & os.O.DSYNC) > 0);
|
|
685
|
+
|
|
686
|
+
// Be careful with openat(2): "If pathname is absolute, then dirfd is ignored." (man page)
|
|
687
|
+
assert(!std.fs.path.isAbsolute(relative_path));
|
|
688
|
+
const fd = try os.openatZ(dir_fd, relative_path, flags, mode);
|
|
689
|
+
// TODO Return a proper error message when the path exists or does not exist (init/start).
|
|
690
|
+
errdefer os.close(fd);
|
|
691
|
+
|
|
692
|
+
// TODO Check that the file is actually a file.
|
|
693
|
+
|
|
694
|
+
// On darwin assume that Direct I/O is always supported.
|
|
695
|
+
// Use F_NOCACHE to disable the page cache as O_DIRECT doesn't exist.
|
|
696
|
+
if (config.direct_io) {
|
|
697
|
+
_ = try os.fcntl(fd, os.F.NOCACHE, 1);
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
// Obtain an advisory exclusive lock that works only if all processes actually use flock().
|
|
701
|
+
// LOCK_NB means that we want to fail the lock without waiting if another process has it.
|
|
702
|
+
os.flock(fd, os.LOCK.EX | os.LOCK.NB) catch |err| switch (err) {
|
|
703
|
+
error.WouldBlock => @panic("another process holds the data file lock"),
|
|
704
|
+
else => return err,
|
|
705
|
+
};
|
|
706
|
+
|
|
707
|
+
// Ask the file system to allocate contiguous sectors for the file (if possible):
|
|
708
|
+
// If the file system does not support `fallocate()`, then this could mean more seeks or a
|
|
709
|
+
// panic if we run out of disk space (ENOSPC).
|
|
710
|
+
if (must_create) try fs_allocate(fd, size);
|
|
711
|
+
|
|
712
|
+
// The best fsync strategy is always to fsync before reading because this prevents us from
|
|
713
|
+
// making decisions on data that was never durably written by a previously crashed process.
|
|
714
|
+
// We therefore always fsync when we open the path, also to wait for any pending O_DSYNC.
|
|
715
|
+
// Thanks to Alex Miller from FoundationDB for diving into our source and pointing this out.
|
|
716
|
+
try fs_sync(fd);
|
|
717
|
+
|
|
718
|
+
// We fsync the parent directory to ensure that the file inode is durably written.
|
|
719
|
+
// The caller is responsible for the parent directory inode stored under the grandparent.
|
|
720
|
+
// We always do this when opening because we don't know if this was done before crashing.
|
|
721
|
+
try fs_sync(dir_fd);
|
|
722
|
+
|
|
723
|
+
const stat = try os.fstat(fd);
|
|
724
|
+
if (stat.size != size) @panic("data file inode size was truncated or corrupted");
|
|
725
|
+
|
|
726
|
+
return fd;
|
|
727
|
+
}
|
|
728
|
+
|
|
729
|
+
/// Darwin's fsync() syscall does not flush past the disk cache. We must use F_FULLFSYNC instead.
|
|
730
|
+
/// https://twitter.com/TigerBeetleDB/status/1422491736224436225
|
|
731
|
+
fn fs_sync(fd: os.fd_t) !void {
|
|
732
|
+
_ = os.fcntl(fd, os.F.FULLFSYNC, 1) catch return os.fsync(fd);
|
|
733
|
+
}
|
|
734
|
+
|
|
735
|
+
/// Allocates a file contiguously using fallocate() if supported.
|
|
736
|
+
/// Alternatively, writes to the last sector so that at least the file size is correct.
|
|
737
|
+
fn fs_allocate(fd: os.fd_t, size: u64) !void {
|
|
738
|
+
log.info("allocating {}...", .{std.fmt.fmtIntSizeBin(size)});
|
|
739
|
+
|
|
740
|
+
// Darwin doesn't have fallocate() but we can simulate it using fcntl()s.
|
|
741
|
+
//
|
|
742
|
+
// https://stackoverflow.com/a/11497568
|
|
743
|
+
// https://api.kde.org/frameworks/kcoreaddons/html/posix__fallocate__mac_8h_source.html
|
|
744
|
+
// http://hg.mozilla.org/mozilla-central/file/3d846420a907/xpcom/glue/FileUtils.cpp#l61
|
|
745
|
+
|
|
746
|
+
const F_ALLOCATECONTIG = 0x2; // Allocate contiguous space.
|
|
747
|
+
const F_ALLOCATEALL = 0x4; // Allocate all or nothing.
|
|
748
|
+
const F_PEOFPOSMODE = 3; // Use relative offset from the seek pos mode.
|
|
749
|
+
const fstore_t = extern struct {
|
|
750
|
+
fst_flags: c_uint,
|
|
751
|
+
fst_posmode: c_int,
|
|
752
|
+
fst_offset: os.off_t,
|
|
753
|
+
fst_length: os.off_t,
|
|
754
|
+
fst_bytesalloc: os.off_t,
|
|
755
|
+
};
|
|
756
|
+
|
|
757
|
+
var store = fstore_t{
|
|
758
|
+
.fst_flags = F_ALLOCATECONTIG | F_ALLOCATEALL,
|
|
759
|
+
.fst_posmode = F_PEOFPOSMODE,
|
|
760
|
+
.fst_offset = 0,
|
|
761
|
+
.fst_length = @intCast(os.off_t, size),
|
|
762
|
+
.fst_bytesalloc = 0,
|
|
763
|
+
};
|
|
764
|
+
|
|
765
|
+
// Try to pre-allocate contiguous space and fall back to default non-contiguous.
|
|
766
|
+
var res = os.system.fcntl(fd, os.F.PREALLOCATE, @ptrToInt(&store));
|
|
767
|
+
if (os.errno(res) != .SUCCESS) {
|
|
768
|
+
store.fst_flags = F_ALLOCATEALL;
|
|
769
|
+
res = os.system.fcntl(fd, os.F.PREALLOCATE, @ptrToInt(&store));
|
|
770
|
+
}
|
|
771
|
+
|
|
772
|
+
switch (os.errno(res)) {
|
|
773
|
+
.SUCCESS => {},
|
|
774
|
+
.ACCES => unreachable, // F_SETLK or F_SETSIZE of F_WRITEBOOTSTRAP
|
|
775
|
+
.BADF => return error.FileDescriptorInvalid,
|
|
776
|
+
.DEADLK => unreachable, // F_SETLKW
|
|
777
|
+
.INTR => unreachable, // F_SETLKW
|
|
778
|
+
.INVAL => return error.ArgumentsInvalid, // for F_PREALLOCATE (offset invalid)
|
|
779
|
+
.MFILE => unreachable, // F_DUPFD or F_DUPED
|
|
780
|
+
.NOLCK => unreachable, // F_SETLK or F_SETLKW
|
|
781
|
+
.OVERFLOW => return error.FileTooBig,
|
|
782
|
+
.SRCH => unreachable, // F_SETOWN
|
|
783
|
+
.OPNOTSUPP => return error.OperationNotSupported, // not reported but need same error union
|
|
784
|
+
else => |errno| return os.unexpectedErrno(errno),
|
|
785
|
+
}
|
|
786
|
+
|
|
787
|
+
// Now actually perform the allocation.
|
|
788
|
+
return os.ftruncate(fd, size) catch |err| switch (err) {
|
|
789
|
+
error.AccessDenied => error.PermissionDenied,
|
|
790
|
+
else => |e| e,
|
|
791
|
+
};
|
|
792
|
+
}
|
|
662
793
|
};
|