tigerbeetle-node 0.11.12 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. package/README.md +212 -196
  2. package/dist/bin/aarch64-linux-gnu/client.node +0 -0
  3. package/dist/bin/aarch64-linux-musl/client.node +0 -0
  4. package/dist/bin/aarch64-macos/client.node +0 -0
  5. package/dist/bin/x86_64-linux-gnu/client.node +0 -0
  6. package/dist/bin/x86_64-linux-musl/client.node +0 -0
  7. package/dist/bin/x86_64-macos/client.node +0 -0
  8. package/dist/index.js +33 -1
  9. package/dist/index.js.map +1 -1
  10. package/package-lock.json +66 -0
  11. package/package.json +8 -17
  12. package/src/index.ts +56 -1
  13. package/src/node.zig +10 -9
  14. package/dist/.client.node.sha256 +0 -1
  15. package/scripts/build_lib.sh +0 -61
  16. package/scripts/download_node_headers.sh +0 -32
  17. package/src/tigerbeetle/scripts/benchmark.bat +0 -48
  18. package/src/tigerbeetle/scripts/benchmark.sh +0 -66
  19. package/src/tigerbeetle/scripts/confirm_image.sh +0 -44
  20. package/src/tigerbeetle/scripts/fuzz_loop.sh +0 -15
  21. package/src/tigerbeetle/scripts/fuzz_unique_errors.sh +0 -7
  22. package/src/tigerbeetle/scripts/install.bat +0 -7
  23. package/src/tigerbeetle/scripts/install.sh +0 -21
  24. package/src/tigerbeetle/scripts/install_zig.bat +0 -113
  25. package/src/tigerbeetle/scripts/install_zig.sh +0 -90
  26. package/src/tigerbeetle/scripts/lint.zig +0 -199
  27. package/src/tigerbeetle/scripts/pre-commit.sh +0 -9
  28. package/src/tigerbeetle/scripts/scripts/benchmark.bat +0 -48
  29. package/src/tigerbeetle/scripts/scripts/benchmark.sh +0 -66
  30. package/src/tigerbeetle/scripts/scripts/confirm_image.sh +0 -44
  31. package/src/tigerbeetle/scripts/scripts/fuzz_loop.sh +0 -15
  32. package/src/tigerbeetle/scripts/scripts/fuzz_unique_errors.sh +0 -7
  33. package/src/tigerbeetle/scripts/scripts/install.bat +0 -7
  34. package/src/tigerbeetle/scripts/scripts/install.sh +0 -21
  35. package/src/tigerbeetle/scripts/scripts/install_zig.bat +0 -113
  36. package/src/tigerbeetle/scripts/scripts/install_zig.sh +0 -90
  37. package/src/tigerbeetle/scripts/scripts/lint.zig +0 -199
  38. package/src/tigerbeetle/scripts/scripts/pre-commit.sh +0 -9
  39. package/src/tigerbeetle/scripts/scripts/shellcheck.sh +0 -5
  40. package/src/tigerbeetle/scripts/scripts/tests_on_alpine.sh +0 -10
  41. package/src/tigerbeetle/scripts/scripts/tests_on_ubuntu.sh +0 -14
  42. package/src/tigerbeetle/scripts/scripts/upgrade_ubuntu_kernel.sh +0 -48
  43. package/src/tigerbeetle/scripts/scripts/validate_docs.sh +0 -23
  44. package/src/tigerbeetle/scripts/scripts/vr_state_enumerate +0 -46
  45. package/src/tigerbeetle/scripts/shellcheck.sh +0 -5
  46. package/src/tigerbeetle/scripts/tests_on_alpine.sh +0 -10
  47. package/src/tigerbeetle/scripts/tests_on_ubuntu.sh +0 -14
  48. package/src/tigerbeetle/scripts/upgrade_ubuntu_kernel.sh +0 -48
  49. package/src/tigerbeetle/scripts/validate_docs.sh +0 -23
  50. package/src/tigerbeetle/scripts/vr_state_enumerate +0 -46
  51. package/src/tigerbeetle/src/benchmark.zig +0 -314
  52. package/src/tigerbeetle/src/config.zig +0 -234
  53. package/src/tigerbeetle/src/constants.zig +0 -436
  54. package/src/tigerbeetle/src/ewah.zig +0 -286
  55. package/src/tigerbeetle/src/ewah_benchmark.zig +0 -120
  56. package/src/tigerbeetle/src/ewah_fuzz.zig +0 -130
  57. package/src/tigerbeetle/src/fifo.zig +0 -120
  58. package/src/tigerbeetle/src/io/benchmark.zig +0 -213
  59. package/src/tigerbeetle/src/io/darwin.zig +0 -814
  60. package/src/tigerbeetle/src/io/linux.zig +0 -1062
  61. package/src/tigerbeetle/src/io/test.zig +0 -643
  62. package/src/tigerbeetle/src/io/windows.zig +0 -1183
  63. package/src/tigerbeetle/src/io.zig +0 -34
  64. package/src/tigerbeetle/src/iops.zig +0 -107
  65. package/src/tigerbeetle/src/lsm/README.md +0 -308
  66. package/src/tigerbeetle/src/lsm/binary_search.zig +0 -341
  67. package/src/tigerbeetle/src/lsm/bloom_filter.zig +0 -125
  68. package/src/tigerbeetle/src/lsm/compaction.zig +0 -603
  69. package/src/tigerbeetle/src/lsm/composite_key.zig +0 -77
  70. package/src/tigerbeetle/src/lsm/direction.zig +0 -11
  71. package/src/tigerbeetle/src/lsm/eytzinger.zig +0 -587
  72. package/src/tigerbeetle/src/lsm/eytzinger_benchmark.zig +0 -330
  73. package/src/tigerbeetle/src/lsm/forest.zig +0 -204
  74. package/src/tigerbeetle/src/lsm/forest_fuzz.zig +0 -401
  75. package/src/tigerbeetle/src/lsm/grid.zig +0 -573
  76. package/src/tigerbeetle/src/lsm/groove.zig +0 -972
  77. package/src/tigerbeetle/src/lsm/k_way_merge.zig +0 -474
  78. package/src/tigerbeetle/src/lsm/level_iterator.zig +0 -332
  79. package/src/tigerbeetle/src/lsm/manifest.zig +0 -617
  80. package/src/tigerbeetle/src/lsm/manifest_level.zig +0 -877
  81. package/src/tigerbeetle/src/lsm/manifest_log.zig +0 -789
  82. package/src/tigerbeetle/src/lsm/manifest_log_fuzz.zig +0 -691
  83. package/src/tigerbeetle/src/lsm/merge_iterator.zig +0 -106
  84. package/src/tigerbeetle/src/lsm/node_pool.zig +0 -235
  85. package/src/tigerbeetle/src/lsm/posted_groove.zig +0 -378
  86. package/src/tigerbeetle/src/lsm/segmented_array.zig +0 -1328
  87. package/src/tigerbeetle/src/lsm/segmented_array_benchmark.zig +0 -148
  88. package/src/tigerbeetle/src/lsm/segmented_array_fuzz.zig +0 -9
  89. package/src/tigerbeetle/src/lsm/set_associative_cache.zig +0 -850
  90. package/src/tigerbeetle/src/lsm/table.zig +0 -1031
  91. package/src/tigerbeetle/src/lsm/table_immutable.zig +0 -203
  92. package/src/tigerbeetle/src/lsm/table_iterator.zig +0 -340
  93. package/src/tigerbeetle/src/lsm/table_mutable.zig +0 -220
  94. package/src/tigerbeetle/src/lsm/test.zig +0 -438
  95. package/src/tigerbeetle/src/lsm/tree.zig +0 -1193
  96. package/src/tigerbeetle/src/lsm/tree_fuzz.zig +0 -474
  97. package/src/tigerbeetle/src/message_bus.zig +0 -1012
  98. package/src/tigerbeetle/src/message_pool.zig +0 -156
  99. package/src/tigerbeetle/src/ring_buffer.zig +0 -399
  100. package/src/tigerbeetle/src/simulator.zig +0 -569
  101. package/src/tigerbeetle/src/state_machine/auditor.zig +0 -577
  102. package/src/tigerbeetle/src/state_machine/workload.zig +0 -883
  103. package/src/tigerbeetle/src/state_machine.zig +0 -1881
  104. package/src/tigerbeetle/src/static_allocator.zig +0 -65
  105. package/src/tigerbeetle/src/stdx.zig +0 -162
  106. package/src/tigerbeetle/src/storage.zig +0 -393
  107. package/src/tigerbeetle/src/testing/cluster/message_bus.zig +0 -82
  108. package/src/tigerbeetle/src/testing/cluster/network.zig +0 -237
  109. package/src/tigerbeetle/src/testing/cluster/state_checker.zig +0 -169
  110. package/src/tigerbeetle/src/testing/cluster/storage_checker.zig +0 -202
  111. package/src/tigerbeetle/src/testing/cluster.zig +0 -443
  112. package/src/tigerbeetle/src/testing/fuzz.zig +0 -140
  113. package/src/tigerbeetle/src/testing/hash_log.zig +0 -66
  114. package/src/tigerbeetle/src/testing/id.zig +0 -99
  115. package/src/tigerbeetle/src/testing/packet_simulator.zig +0 -364
  116. package/src/tigerbeetle/src/testing/priority_queue.zig +0 -645
  117. package/src/tigerbeetle/src/testing/reply_sequence.zig +0 -139
  118. package/src/tigerbeetle/src/testing/state_machine.zig +0 -249
  119. package/src/tigerbeetle/src/testing/storage.zig +0 -757
  120. package/src/tigerbeetle/src/testing/table.zig +0 -247
  121. package/src/tigerbeetle/src/testing/time.zig +0 -84
  122. package/src/tigerbeetle/src/tigerbeetle.zig +0 -227
  123. package/src/tigerbeetle/src/time.zig +0 -112
  124. package/src/tigerbeetle/src/tracer.zig +0 -529
  125. package/src/tigerbeetle/src/unit_tests.zig +0 -42
  126. package/src/tigerbeetle/src/vopr.zig +0 -495
  127. package/src/tigerbeetle/src/vsr/README.md +0 -209
  128. package/src/tigerbeetle/src/vsr/client.zig +0 -544
  129. package/src/tigerbeetle/src/vsr/clock.zig +0 -853
  130. package/src/tigerbeetle/src/vsr/journal.zig +0 -2413
  131. package/src/tigerbeetle/src/vsr/journal_format_fuzz.zig +0 -111
  132. package/src/tigerbeetle/src/vsr/marzullo.zig +0 -309
  133. package/src/tigerbeetle/src/vsr/replica.zig +0 -6381
  134. package/src/tigerbeetle/src/vsr/replica_format.zig +0 -219
  135. package/src/tigerbeetle/src/vsr/superblock.zig +0 -1631
  136. package/src/tigerbeetle/src/vsr/superblock_client_table.zig +0 -256
  137. package/src/tigerbeetle/src/vsr/superblock_free_set.zig +0 -929
  138. package/src/tigerbeetle/src/vsr/superblock_free_set_fuzz.zig +0 -334
  139. package/src/tigerbeetle/src/vsr/superblock_fuzz.zig +0 -390
  140. package/src/tigerbeetle/src/vsr/superblock_manifest.zig +0 -615
  141. package/src/tigerbeetle/src/vsr/superblock_quorums.zig +0 -394
  142. package/src/tigerbeetle/src/vsr/superblock_quorums_fuzz.zig +0 -314
  143. package/src/tigerbeetle/src/vsr.zig +0 -1352
@@ -1,814 +0,0 @@
1
- const std = @import("std");
2
- const os = std.os;
3
- const mem = std.mem;
4
- const assert = std.debug.assert;
5
- const log = std.log.scoped(.io);
6
-
7
- const constants = @import("../constants.zig");
8
- const FIFO = @import("../fifo.zig").FIFO;
9
- const Time = @import("../time.zig").Time;
10
- const buffer_limit = @import("../io.zig").buffer_limit;
11
-
12
- pub const IO = struct {
13
- kq: os.fd_t,
14
- time: Time = .{},
15
- io_inflight: usize = 0,
16
- timeouts: FIFO(Completion) = .{},
17
- completed: FIFO(Completion) = .{},
18
- io_pending: FIFO(Completion) = .{},
19
-
20
- pub fn init(entries: u12, flags: u32) !IO {
21
- _ = entries;
22
- _ = flags;
23
-
24
- const kq = try os.kqueue();
25
- assert(kq > -1);
26
- return IO{ .kq = kq };
27
- }
28
-
29
- pub fn deinit(self: *IO) void {
30
- assert(self.kq > -1);
31
- os.close(self.kq);
32
- self.kq = -1;
33
- }
34
-
35
- /// Pass all queued submissions to the kernel and peek for completions.
36
- pub fn tick(self: *IO) !void {
37
- return self.flush(false);
38
- }
39
-
40
- /// Pass all queued submissions to the kernel and run for `nanoseconds`.
41
- /// The `nanoseconds` argument is a u63 to allow coercion to the i64 used
42
- /// in the __kernel_timespec struct.
43
- pub fn run_for_ns(self: *IO, nanoseconds: u63) !void {
44
- var timed_out = false;
45
- var completion: Completion = undefined;
46
- const on_timeout = struct {
47
- fn callback(
48
- timed_out_ptr: *bool,
49
- _completion: *Completion,
50
- result: TimeoutError!void,
51
- ) void {
52
- _ = _completion;
53
- _ = result catch unreachable;
54
-
55
- timed_out_ptr.* = true;
56
- }
57
- }.callback;
58
-
59
- // Submit a timeout which sets the timed_out value to true to terminate the loop below.
60
- self.timeout(
61
- *bool,
62
- &timed_out,
63
- on_timeout,
64
- &completion,
65
- nanoseconds,
66
- );
67
-
68
- // Loop until our timeout completion is processed above, which sets timed_out to true.
69
- // LLVM shouldn't be able to cache timed_out's value here since its address escapes above.
70
- while (!timed_out) {
71
- try self.flush(true);
72
- }
73
- }
74
-
75
- fn flush(self: *IO, wait_for_completions: bool) !void {
76
- var io_pending = self.io_pending.peek();
77
- var events: [256]os.Kevent = undefined;
78
-
79
- // Check timeouts and fill events with completions in io_pending
80
- // (they will be submitted through kevent).
81
- // Timeouts are expired here and possibly pushed to the completed queue.
82
- const next_timeout = self.flush_timeouts();
83
- const change_events = self.flush_io(&events, &io_pending);
84
-
85
- // Only call kevent() if we need to submit io events or if we need to wait for completions.
86
- if (change_events > 0 or self.completed.empty()) {
87
- // Zero timeouts for kevent() implies a non-blocking poll
88
- var ts = std.mem.zeroes(os.timespec);
89
-
90
- // We need to wait (not poll) on kevent if there's nothing to submit or complete.
91
- // We should never wait indefinitely (timeout_ptr = null for kevent) given:
92
- // - tick() is non-blocking (wait_for_completions = false)
93
- // - run_for_ns() always submits a timeout
94
- if (change_events == 0 and self.completed.empty()) {
95
- if (wait_for_completions) {
96
- const timeout_ns = next_timeout orelse @panic("kevent() blocking forever");
97
- ts.tv_nsec = @intCast(@TypeOf(ts.tv_nsec), timeout_ns % std.time.ns_per_s);
98
- ts.tv_sec = @intCast(@TypeOf(ts.tv_sec), timeout_ns / std.time.ns_per_s);
99
- } else if (self.io_inflight == 0) {
100
- return;
101
- }
102
- }
103
-
104
- const new_events = try os.kevent(
105
- self.kq,
106
- events[0..change_events],
107
- events[0..events.len],
108
- &ts,
109
- );
110
-
111
- // Mark the io events submitted only after kevent() successfully processed them
112
- self.io_pending.out = io_pending;
113
- if (io_pending == null) {
114
- self.io_pending.in = null;
115
- }
116
-
117
- self.io_inflight += change_events;
118
- self.io_inflight -= new_events;
119
-
120
- for (events[0..new_events]) |event| {
121
- const completion = @intToPtr(*Completion, event.udata);
122
- completion.next = null;
123
- self.completed.push(completion);
124
- }
125
- }
126
-
127
- var completed = self.completed;
128
- self.completed = .{};
129
- while (completed.pop()) |completion| {
130
- (completion.callback)(self, completion);
131
- }
132
- }
133
-
134
- fn flush_io(_: *IO, events: []os.Kevent, io_pending_top: *?*Completion) usize {
135
- for (events) |*event, flushed| {
136
- const completion = io_pending_top.* orelse return flushed;
137
- io_pending_top.* = completion.next;
138
-
139
- const event_info = switch (completion.operation) {
140
- .accept => |op| [2]c_int{ op.socket, os.system.EVFILT_READ },
141
- .connect => |op| [2]c_int{ op.socket, os.system.EVFILT_WRITE },
142
- .read => |op| [2]c_int{ op.fd, os.system.EVFILT_READ },
143
- .write => |op| [2]c_int{ op.fd, os.system.EVFILT_WRITE },
144
- .recv => |op| [2]c_int{ op.socket, os.system.EVFILT_READ },
145
- .send => |op| [2]c_int{ op.socket, os.system.EVFILT_WRITE },
146
- else => @panic("invalid completion operation queued for io"),
147
- };
148
-
149
- event.* = .{
150
- .ident = @intCast(u32, event_info[0]),
151
- .filter = @intCast(i16, event_info[1]),
152
- .flags = os.system.EV_ADD | os.system.EV_ENABLE | os.system.EV_ONESHOT,
153
- .fflags = 0,
154
- .data = 0,
155
- .udata = @ptrToInt(completion),
156
- };
157
- }
158
- return events.len;
159
- }
160
-
161
- fn flush_timeouts(self: *IO) ?u64 {
162
- var min_timeout: ?u64 = null;
163
- var timeouts: ?*Completion = self.timeouts.peek();
164
- while (timeouts) |completion| {
165
- timeouts = completion.next;
166
-
167
- // NOTE: We could cache `now` above the loop but monotonic() should be cheap to call.
168
- const now = self.time.monotonic();
169
- const expires = completion.operation.timeout.expires;
170
-
171
- // NOTE: remove() could be O(1) here with a doubly-linked-list
172
- // since we know the previous Completion.
173
- if (now >= expires) {
174
- self.timeouts.remove(completion);
175
- self.completed.push(completion);
176
- continue;
177
- }
178
-
179
- const timeout_ns = expires - now;
180
- if (min_timeout) |min_ns| {
181
- min_timeout = std.math.min(min_ns, timeout_ns);
182
- } else {
183
- min_timeout = timeout_ns;
184
- }
185
- }
186
- return min_timeout;
187
- }
188
-
189
- /// This struct holds the data needed for a single IO operation
190
- pub const Completion = struct {
191
- next: ?*Completion,
192
- context: ?*anyopaque,
193
- callback: fn (*IO, *Completion) void,
194
- operation: Operation,
195
- };
196
-
197
- const Operation = union(enum) {
198
- accept: struct {
199
- socket: os.socket_t,
200
- },
201
- close: struct {
202
- fd: os.fd_t,
203
- },
204
- connect: struct {
205
- socket: os.socket_t,
206
- address: std.net.Address,
207
- initiated: bool,
208
- },
209
- read: struct {
210
- fd: os.fd_t,
211
- buf: [*]u8,
212
- len: u32,
213
- offset: u64,
214
- },
215
- recv: struct {
216
- socket: os.socket_t,
217
- buf: [*]u8,
218
- len: u32,
219
- },
220
- send: struct {
221
- socket: os.socket_t,
222
- buf: [*]const u8,
223
- len: u32,
224
- },
225
- timeout: struct {
226
- expires: u64,
227
- },
228
- write: struct {
229
- fd: os.fd_t,
230
- buf: [*]const u8,
231
- len: u32,
232
- offset: u64,
233
- },
234
- };
235
-
236
- fn submit(
237
- self: *IO,
238
- context: anytype,
239
- comptime callback: anytype,
240
- completion: *Completion,
241
- comptime operation_tag: std.meta.Tag(Operation),
242
- operation_data: anytype,
243
- comptime OperationImpl: type,
244
- ) void {
245
- const Context = @TypeOf(context);
246
- const onCompleteFn = struct {
247
- fn onComplete(io: *IO, _completion: *Completion) void {
248
- // Perform the actual operaton
249
- const op_data = &@field(_completion.operation, @tagName(operation_tag));
250
- const result = OperationImpl.do_operation(op_data);
251
-
252
- // Requeue onto io_pending if error.WouldBlock
253
- switch (operation_tag) {
254
- .accept, .connect, .read, .write, .send, .recv => {
255
- _ = result catch |err| switch (err) {
256
- error.WouldBlock => {
257
- _completion.next = null;
258
- io.io_pending.push(_completion);
259
- return;
260
- },
261
- else => {},
262
- };
263
- },
264
- else => {},
265
- }
266
-
267
- // Complete the Completion
268
- return callback(
269
- @intToPtr(Context, @ptrToInt(_completion.context)),
270
- _completion,
271
- result,
272
- );
273
- }
274
- }.onComplete;
275
-
276
- completion.* = .{
277
- .next = null,
278
- .context = context,
279
- .callback = onCompleteFn,
280
- .operation = @unionInit(Operation, @tagName(operation_tag), operation_data),
281
- };
282
-
283
- switch (operation_tag) {
284
- .timeout => self.timeouts.push(completion),
285
- else => self.completed.push(completion),
286
- }
287
- }
288
-
289
- pub const AcceptError = os.AcceptError || os.SetSockOptError;
290
-
291
- pub fn accept(
292
- self: *IO,
293
- comptime Context: type,
294
- context: Context,
295
- comptime callback: fn (
296
- context: Context,
297
- completion: *Completion,
298
- result: AcceptError!os.socket_t,
299
- ) void,
300
- completion: *Completion,
301
- socket: os.socket_t,
302
- ) void {
303
- self.submit(
304
- context,
305
- callback,
306
- completion,
307
- .accept,
308
- .{
309
- .socket = socket,
310
- },
311
- struct {
312
- fn do_operation(op: anytype) AcceptError!os.socket_t {
313
- const fd = try os.accept(
314
- op.socket,
315
- null,
316
- null,
317
- os.SOCK.NONBLOCK | os.SOCK.CLOEXEC,
318
- );
319
- errdefer os.close(fd);
320
-
321
- // Darwin doesn't support os.MSG_NOSIGNAL to avoid getting SIGPIPE on socket send().
322
- // Instead, it uses the SO_NOSIGPIPE socket option which does the same for all send()s.
323
- os.setsockopt(
324
- fd,
325
- os.SOL.SOCKET,
326
- os.SO.NOSIGPIPE,
327
- &mem.toBytes(@as(c_int, 1)),
328
- ) catch |err| return switch (err) {
329
- error.TimeoutTooBig => unreachable,
330
- error.PermissionDenied => error.NetworkSubsystemFailed,
331
- error.AlreadyConnected => error.NetworkSubsystemFailed,
332
- error.InvalidProtocolOption => error.ProtocolFailure,
333
- else => |e| e,
334
- };
335
-
336
- return fd;
337
- }
338
- },
339
- );
340
- }
341
-
342
- pub const CloseError = error{
343
- FileDescriptorInvalid,
344
- DiskQuota,
345
- InputOutput,
346
- NoSpaceLeft,
347
- } || os.UnexpectedError;
348
-
349
- pub fn close(
350
- self: *IO,
351
- comptime Context: type,
352
- context: Context,
353
- comptime callback: fn (
354
- context: Context,
355
- completion: *Completion,
356
- result: CloseError!void,
357
- ) void,
358
- completion: *Completion,
359
- fd: os.fd_t,
360
- ) void {
361
- self.submit(
362
- context,
363
- callback,
364
- completion,
365
- .close,
366
- .{
367
- .fd = fd,
368
- },
369
- struct {
370
- fn do_operation(op: anytype) CloseError!void {
371
- return switch (os.errno(os.system.close(op.fd))) {
372
- .SUCCESS => {},
373
- .BADF => error.FileDescriptorInvalid,
374
- .INTR => {}, // A success, see https://github.com/ziglang/zig/issues/2425
375
- .IO => error.InputOutput,
376
- else => |errno| os.unexpectedErrno(errno),
377
- };
378
- }
379
- },
380
- );
381
- }
382
-
383
- pub const ConnectError = os.ConnectError;
384
-
385
- pub fn connect(
386
- self: *IO,
387
- comptime Context: type,
388
- context: Context,
389
- comptime callback: fn (
390
- context: Context,
391
- completion: *Completion,
392
- result: ConnectError!void,
393
- ) void,
394
- completion: *Completion,
395
- socket: os.socket_t,
396
- address: std.net.Address,
397
- ) void {
398
- self.submit(
399
- context,
400
- callback,
401
- completion,
402
- .connect,
403
- .{
404
- .socket = socket,
405
- .address = address,
406
- .initiated = false,
407
- },
408
- struct {
409
- fn do_operation(op: anytype) ConnectError!void {
410
- // Don't call connect after being rescheduled by io_pending as it gives EISCONN.
411
- // Instead, check the socket error to see if has been connected successfully.
412
- const result = switch (op.initiated) {
413
- true => os.getsockoptError(op.socket),
414
- else => os.connect(op.socket, &op.address.any, op.address.getOsSockLen()),
415
- };
416
-
417
- op.initiated = true;
418
- return result;
419
- }
420
- },
421
- );
422
- }
423
-
424
- pub const ReadError = error{
425
- WouldBlock,
426
- NotOpenForReading,
427
- ConnectionResetByPeer,
428
- Alignment,
429
- InputOutput,
430
- IsDir,
431
- SystemResources,
432
- Unseekable,
433
- ConnectionTimedOut,
434
- } || os.UnexpectedError;
435
-
436
- pub fn read(
437
- self: *IO,
438
- comptime Context: type,
439
- context: Context,
440
- comptime callback: fn (
441
- context: Context,
442
- completion: *Completion,
443
- result: ReadError!usize,
444
- ) void,
445
- completion: *Completion,
446
- fd: os.fd_t,
447
- buffer: []u8,
448
- offset: u64,
449
- ) void {
450
- self.submit(
451
- context,
452
- callback,
453
- completion,
454
- .read,
455
- .{
456
- .fd = fd,
457
- .buf = buffer.ptr,
458
- .len = @intCast(u32, buffer_limit(buffer.len)),
459
- .offset = offset,
460
- },
461
- struct {
462
- fn do_operation(op: anytype) ReadError!usize {
463
- while (true) {
464
- const rc = os.system.pread(
465
- op.fd,
466
- op.buf,
467
- op.len,
468
- @bitCast(isize, op.offset),
469
- );
470
- return switch (os.errno(rc)) {
471
- .SUCCESS => @intCast(usize, rc),
472
- .INTR => continue,
473
- .AGAIN => error.WouldBlock,
474
- .BADF => error.NotOpenForReading,
475
- .CONNRESET => error.ConnectionResetByPeer,
476
- .FAULT => unreachable,
477
- .INVAL => error.Alignment,
478
- .IO => error.InputOutput,
479
- .ISDIR => error.IsDir,
480
- .NOBUFS => error.SystemResources,
481
- .NOMEM => error.SystemResources,
482
- .NXIO => error.Unseekable,
483
- .OVERFLOW => error.Unseekable,
484
- .SPIPE => error.Unseekable,
485
- .TIMEDOUT => error.ConnectionTimedOut,
486
- else => |err| os.unexpectedErrno(err),
487
- };
488
- }
489
- }
490
- },
491
- );
492
- }
493
-
494
- pub const RecvError = os.RecvFromError;
495
-
496
- pub fn recv(
497
- self: *IO,
498
- comptime Context: type,
499
- context: Context,
500
- comptime callback: fn (
501
- context: Context,
502
- completion: *Completion,
503
- result: RecvError!usize,
504
- ) void,
505
- completion: *Completion,
506
- socket: os.socket_t,
507
- buffer: []u8,
508
- ) void {
509
- self.submit(
510
- context,
511
- callback,
512
- completion,
513
- .recv,
514
- .{
515
- .socket = socket,
516
- .buf = buffer.ptr,
517
- .len = @intCast(u32, buffer_limit(buffer.len)),
518
- },
519
- struct {
520
- fn do_operation(op: anytype) RecvError!usize {
521
- return os.recv(op.socket, op.buf[0..op.len], 0);
522
- }
523
- },
524
- );
525
- }
526
-
527
- pub const SendError = os.SendError;
528
-
529
- pub fn send(
530
- self: *IO,
531
- comptime Context: type,
532
- context: Context,
533
- comptime callback: fn (
534
- context: Context,
535
- completion: *Completion,
536
- result: SendError!usize,
537
- ) void,
538
- completion: *Completion,
539
- socket: os.socket_t,
540
- buffer: []const u8,
541
- ) void {
542
- self.submit(
543
- context,
544
- callback,
545
- completion,
546
- .send,
547
- .{
548
- .socket = socket,
549
- .buf = buffer.ptr,
550
- .len = @intCast(u32, buffer_limit(buffer.len)),
551
- },
552
- struct {
553
- fn do_operation(op: anytype) SendError!usize {
554
- return os.send(op.socket, op.buf[0..op.len], 0);
555
- }
556
- },
557
- );
558
- }
559
-
560
- pub const TimeoutError = error{Canceled} || os.UnexpectedError;
561
-
562
- pub fn timeout(
563
- self: *IO,
564
- comptime Context: type,
565
- context: Context,
566
- comptime callback: fn (
567
- context: Context,
568
- completion: *Completion,
569
- result: TimeoutError!void,
570
- ) void,
571
- completion: *Completion,
572
- nanoseconds: u63,
573
- ) void {
574
- // Special case a zero timeout as a yield.
575
- if (nanoseconds == 0) {
576
- completion.* = .{
577
- .next = null,
578
- .context = context,
579
- .operation = undefined,
580
- .callback = struct {
581
- fn on_complete(_io: *IO, _completion: *Completion) void {
582
- _ = _io;
583
- const _context = @intToPtr(Context, @ptrToInt(_completion.context));
584
- callback(_context, _completion, {});
585
- }
586
- }.on_complete,
587
- };
588
-
589
- self.completed.push(completion);
590
- return;
591
- }
592
-
593
- self.submit(
594
- context,
595
- callback,
596
- completion,
597
- .timeout,
598
- .{
599
- .expires = self.time.monotonic() + nanoseconds,
600
- },
601
- struct {
602
- fn do_operation(_: anytype) TimeoutError!void {
603
- return; // timeouts don't have errors for now
604
- }
605
- },
606
- );
607
- }
608
-
609
- pub const WriteError = os.PWriteError;
610
-
611
- pub fn write(
612
- self: *IO,
613
- comptime Context: type,
614
- context: Context,
615
- comptime callback: fn (
616
- context: Context,
617
- completion: *Completion,
618
- result: WriteError!usize,
619
- ) void,
620
- completion: *Completion,
621
- fd: os.fd_t,
622
- buffer: []const u8,
623
- offset: u64,
624
- ) void {
625
- self.submit(
626
- context,
627
- callback,
628
- completion,
629
- .write,
630
- .{
631
- .fd = fd,
632
- .buf = buffer.ptr,
633
- .len = @intCast(u32, buffer_limit(buffer.len)),
634
- .offset = offset,
635
- },
636
- struct {
637
- fn do_operation(op: anytype) WriteError!usize {
638
- return os.pwrite(op.fd, op.buf[0..op.len], op.offset);
639
- }
640
- },
641
- );
642
- }
643
-
644
- pub const INVALID_SOCKET = -1;
645
-
646
- /// Creates a socket that can be used for async operations with the IO instance.
647
- pub fn open_socket(self: *IO, family: u32, sock_type: u32, protocol: u32) !os.socket_t {
648
- _ = self;
649
-
650
- const fd = try os.socket(family, sock_type | os.SOCK.NONBLOCK, protocol);
651
- errdefer os.closeSocket(fd);
652
-
653
- // darwin doesn't support os.MSG_NOSIGNAL, but instead a socket option to avoid SIGPIPE.
654
- try os.setsockopt(fd, os.SOL.SOCKET, os.SO.NOSIGPIPE, &mem.toBytes(@as(c_int, 1)));
655
- return fd;
656
- }
657
-
658
- /// Opens a directory with read only access.
659
- pub fn open_dir(dir_path: []const u8) !os.fd_t {
660
- return os.open(dir_path, os.O.CLOEXEC | os.O.RDONLY, 0);
661
- }
662
-
663
- pub const INVALID_FILE: os.fd_t = -1;
664
-
665
- /// Opens or creates a journal file:
666
- /// - For reading and writing.
667
- /// - For Direct I/O (required on darwin).
668
- /// - Obtains an advisory exclusive lock to the file descriptor.
669
- /// - Allocates the file contiguously on disk if this is supported by the file system.
670
- /// - Ensures that the file data (and file inode in the parent directory) is durable on disk.
671
- /// The caller is responsible for ensuring that the parent directory inode is durable.
672
- /// - Verifies that the file size matches the expected file size before returning.
673
- pub fn open_file(
674
- dir_fd: os.fd_t,
675
- relative_path: []const u8,
676
- size: u64,
677
- must_create: bool,
678
- ) !os.fd_t {
679
- assert(relative_path.len > 0);
680
- assert(size >= constants.sector_size);
681
- assert(size % constants.sector_size == 0);
682
-
683
- // TODO Use O_EXCL when opening as a block device to obtain a mandatory exclusive lock.
684
- // This is much stronger than an advisory exclusive lock, and is required on some platforms.
685
-
686
- // Opening with O_DSYNC is essential for both durability and correctness.
687
- // O_DSYNC enables us to omit fsync() calls in the data plane, since we sync to the disk on every write.
688
- var flags: u32 = os.O.CLOEXEC | os.O.RDWR | os.O.DSYNC;
689
- var mode: os.mode_t = 0;
690
-
691
- // TODO Document this and investigate whether this is in fact correct to set here.
692
- if (@hasDecl(os.O, "LARGEFILE")) flags |= os.O.LARGEFILE;
693
-
694
- if (must_create) {
695
- log.info("creating \"{s}\"...", .{relative_path});
696
- flags |= os.O.CREAT;
697
- flags |= os.O.EXCL;
698
- mode = 0o666;
699
- } else {
700
- log.info("opening \"{s}\"...", .{relative_path});
701
- }
702
-
703
- // This is critical as we rely on O_DSYNC for fsync() whenever we write to the file:
704
- assert((flags & os.O.DSYNC) > 0);
705
-
706
- // Be careful with openat(2): "If pathname is absolute, then dirfd is ignored." (man page)
707
- assert(!std.fs.path.isAbsolute(relative_path));
708
- const fd = try os.openat(dir_fd, relative_path, flags, mode);
709
- // TODO Return a proper error message when the path exists or does not exist (init/start).
710
- errdefer os.close(fd);
711
-
712
- // TODO Check that the file is actually a file.
713
-
714
- // On darwin assume that Direct I/O is always supported.
715
- // Use F_NOCACHE to disable the page cache as O_DIRECT doesn't exist.
716
- if (constants.direct_io) {
717
- _ = try os.fcntl(fd, os.F.NOCACHE, 1);
718
- }
719
-
720
- // Obtain an advisory exclusive lock that works only if all processes actually use flock().
721
- // LOCK_NB means that we want to fail the lock without waiting if another process has it.
722
- os.flock(fd, os.LOCK.EX | os.LOCK.NB) catch |err| switch (err) {
723
- error.WouldBlock => @panic("another process holds the data file lock"),
724
- else => return err,
725
- };
726
-
727
- // Ask the file system to allocate contiguous sectors for the file (if possible):
728
- // If the file system does not support `fallocate()`, then this could mean more seeks or a
729
- // panic if we run out of disk space (ENOSPC).
730
- if (must_create) try fs_allocate(fd, size);
731
-
732
- // The best fsync strategy is always to fsync before reading because this prevents us from
733
- // making decisions on data that was never durably written by a previously crashed process.
734
- // We therefore always fsync when we open the path, also to wait for any pending O_DSYNC.
735
- // Thanks to Alex Miller from FoundationDB for diving into our source and pointing this out.
736
- try fs_sync(fd);
737
-
738
- // We fsync the parent directory to ensure that the file inode is durably written.
739
- // The caller is responsible for the parent directory inode stored under the grandparent.
740
- // We always do this when opening because we don't know if this was done before crashing.
741
- try fs_sync(dir_fd);
742
-
743
- // TODO Document that `size` is now `data_file_size_min` from `main.zig`.
744
- const stat = try os.fstat(fd);
745
- if (stat.size < size) @panic("data file inode size was truncated or corrupted");
746
-
747
- return fd;
748
- }
749
-
750
- /// Darwin's fsync() syscall does not flush past the disk cache. We must use F_FULLFSYNC instead.
751
- /// https://twitter.com/TigerBeetleDB/status/1422491736224436225
752
- fn fs_sync(fd: os.fd_t) !void {
753
- _ = os.fcntl(fd, os.F.FULLFSYNC, 1) catch return os.fsync(fd);
754
- }
755
-
756
- /// Allocates a file contiguously using fallocate() if supported.
757
- /// Alternatively, writes to the last sector so that at least the file size is correct.
758
- fn fs_allocate(fd: os.fd_t, size: u64) !void {
759
- log.info("allocating {}...", .{std.fmt.fmtIntSizeBin(size)});
760
-
761
- // Darwin doesn't have fallocate() but we can simulate it using fcntl()s.
762
- //
763
- // https://stackoverflow.com/a/11497568
764
- // https://api.kde.org/frameworks/kcoreaddons/html/posix__fallocate__mac_8h_source.html
765
- // http://hg.mozilla.org/mozilla-central/file/3d846420a907/xpcom/glue/FileUtils.cpp#l61
766
-
767
- const F_ALLOCATECONTIG = 0x2; // Allocate contiguous space.
768
- const F_ALLOCATEALL = 0x4; // Allocate all or nothing.
769
- const F_PEOFPOSMODE = 3; // Use relative offset from the seek pos mode.
770
- const fstore_t = extern struct {
771
- fst_flags: c_uint,
772
- fst_posmode: c_int,
773
- fst_offset: os.off_t,
774
- fst_length: os.off_t,
775
- fst_bytesalloc: os.off_t,
776
- };
777
-
778
- var store = fstore_t{
779
- .fst_flags = F_ALLOCATECONTIG | F_ALLOCATEALL,
780
- .fst_posmode = F_PEOFPOSMODE,
781
- .fst_offset = 0,
782
- .fst_length = @intCast(os.off_t, size),
783
- .fst_bytesalloc = 0,
784
- };
785
-
786
- // Try to pre-allocate contiguous space and fall back to default non-contiguous.
787
- var res = os.system.fcntl(fd, os.F.PREALLOCATE, @ptrToInt(&store));
788
- if (os.errno(res) != .SUCCESS) {
789
- store.fst_flags = F_ALLOCATEALL;
790
- res = os.system.fcntl(fd, os.F.PREALLOCATE, @ptrToInt(&store));
791
- }
792
-
793
- switch (os.errno(res)) {
794
- .SUCCESS => {},
795
- .ACCES => unreachable, // F_SETLK or F_SETSIZE of F_WRITEBOOTSTRAP
796
- .BADF => return error.FileDescriptorInvalid,
797
- .DEADLK => unreachable, // F_SETLKW
798
- .INTR => unreachable, // F_SETLKW
799
- .INVAL => return error.ArgumentsInvalid, // for F_PREALLOCATE (offset invalid)
800
- .MFILE => unreachable, // F_DUPFD or F_DUPED
801
- .NOLCK => unreachable, // F_SETLK or F_SETLKW
802
- .OVERFLOW => return error.FileTooBig,
803
- .SRCH => unreachable, // F_SETOWN
804
- .OPNOTSUPP => return error.OperationNotSupported, // not reported but need same error union
805
- else => |errno| return os.unexpectedErrno(errno),
806
- }
807
-
808
- // Now actually perform the allocation.
809
- return os.ftruncate(fd, size) catch |err| switch (err) {
810
- error.AccessDenied => error.PermissionDenied,
811
- else => |e| e,
812
- };
813
- }
814
- };