tigerbeetle-node 0.8.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. package/README.md +47 -47
  2. package/dist/benchmark.js +15 -15
  3. package/dist/benchmark.js.map +1 -1
  4. package/dist/index.d.ts +66 -61
  5. package/dist/index.js +66 -61
  6. package/dist/index.js.map +1 -1
  7. package/dist/test.js +1 -1
  8. package/dist/test.js.map +1 -1
  9. package/package.json +14 -16
  10. package/scripts/download_node_headers.sh +3 -1
  11. package/src/index.ts +5 -0
  12. package/src/node.zig +18 -19
  13. package/src/tigerbeetle/scripts/benchmark.bat +47 -46
  14. package/src/tigerbeetle/scripts/benchmark.sh +25 -10
  15. package/src/tigerbeetle/scripts/install.sh +2 -1
  16. package/src/tigerbeetle/scripts/install_zig.bat +109 -109
  17. package/src/tigerbeetle/scripts/install_zig.sh +18 -18
  18. package/src/tigerbeetle/scripts/upgrade_ubuntu_kernel.sh +12 -3
  19. package/src/tigerbeetle/scripts/vopr.bat +47 -47
  20. package/src/tigerbeetle/scripts/vopr.sh +5 -5
  21. package/src/tigerbeetle/src/benchmark.zig +17 -9
  22. package/src/tigerbeetle/src/benchmark_array_search.zig +317 -0
  23. package/src/tigerbeetle/src/benchmarks/perf.zig +299 -0
  24. package/src/tigerbeetle/src/c/tb_client/context.zig +103 -0
  25. package/src/tigerbeetle/src/c/tb_client/packet.zig +80 -0
  26. package/src/tigerbeetle/src/c/tb_client/signal.zig +288 -0
  27. package/src/tigerbeetle/src/c/tb_client/thread.zig +329 -0
  28. package/src/tigerbeetle/src/c/tb_client.h +201 -0
  29. package/src/tigerbeetle/src/c/tb_client.zig +101 -0
  30. package/src/tigerbeetle/src/c/test.zig +1 -0
  31. package/src/tigerbeetle/src/cli.zig +142 -83
  32. package/src/tigerbeetle/src/config.zig +136 -23
  33. package/src/tigerbeetle/src/demo.zig +12 -8
  34. package/src/tigerbeetle/src/demo_03_create_transfers.zig +3 -3
  35. package/src/tigerbeetle/src/demo_04_create_pending_transfers.zig +10 -10
  36. package/src/tigerbeetle/src/demo_05_post_pending_transfers.zig +7 -7
  37. package/src/tigerbeetle/src/demo_06_void_pending_transfers.zig +3 -3
  38. package/src/tigerbeetle/src/demo_07_lookup_transfers.zig +1 -1
  39. package/src/tigerbeetle/src/ewah.zig +318 -0
  40. package/src/tigerbeetle/src/ewah_benchmark.zig +121 -0
  41. package/src/tigerbeetle/src/eytzinger_benchmark.zig +317 -0
  42. package/src/tigerbeetle/src/fifo.zig +17 -1
  43. package/src/tigerbeetle/src/io/darwin.zig +12 -10
  44. package/src/tigerbeetle/src/io/linux.zig +25 -9
  45. package/src/tigerbeetle/src/io/windows.zig +13 -9
  46. package/src/tigerbeetle/src/iops.zig +101 -0
  47. package/src/tigerbeetle/src/lsm/binary_search.zig +214 -0
  48. package/src/tigerbeetle/src/lsm/bloom_filter.zig +82 -0
  49. package/src/tigerbeetle/src/lsm/compaction.zig +603 -0
  50. package/src/tigerbeetle/src/lsm/composite_key.zig +75 -0
  51. package/src/tigerbeetle/src/lsm/direction.zig +11 -0
  52. package/src/tigerbeetle/src/lsm/eytzinger.zig +587 -0
  53. package/src/tigerbeetle/src/lsm/forest.zig +630 -0
  54. package/src/tigerbeetle/src/lsm/grid.zig +473 -0
  55. package/src/tigerbeetle/src/lsm/groove.zig +939 -0
  56. package/src/tigerbeetle/src/lsm/k_way_merge.zig +452 -0
  57. package/src/tigerbeetle/src/lsm/level_iterator.zig +296 -0
  58. package/src/tigerbeetle/src/lsm/manifest.zig +680 -0
  59. package/src/tigerbeetle/src/lsm/manifest_level.zig +1169 -0
  60. package/src/tigerbeetle/src/lsm/manifest_log.zig +904 -0
  61. package/src/tigerbeetle/src/lsm/node_pool.zig +231 -0
  62. package/src/tigerbeetle/src/lsm/posted_groove.zig +399 -0
  63. package/src/tigerbeetle/src/lsm/segmented_array.zig +998 -0
  64. package/src/tigerbeetle/src/lsm/set_associative_cache.zig +844 -0
  65. package/src/tigerbeetle/src/lsm/table.zig +932 -0
  66. package/src/tigerbeetle/src/lsm/table_immutable.zig +196 -0
  67. package/src/tigerbeetle/src/lsm/table_iterator.zig +295 -0
  68. package/src/tigerbeetle/src/lsm/table_mutable.zig +123 -0
  69. package/src/tigerbeetle/src/lsm/test.zig +429 -0
  70. package/src/tigerbeetle/src/lsm/tree.zig +1085 -0
  71. package/src/tigerbeetle/src/main.zig +121 -95
  72. package/src/tigerbeetle/src/message_bus.zig +49 -48
  73. package/src/tigerbeetle/src/message_pool.zig +19 -3
  74. package/src/tigerbeetle/src/ring_buffer.zig +172 -31
  75. package/src/tigerbeetle/src/simulator.zig +171 -43
  76. package/src/tigerbeetle/src/state_machine.zig +1026 -599
  77. package/src/tigerbeetle/src/storage.zig +46 -16
  78. package/src/tigerbeetle/src/test/cluster.zig +257 -78
  79. package/src/tigerbeetle/src/test/message_bus.zig +15 -24
  80. package/src/tigerbeetle/src/test/network.zig +26 -17
  81. package/src/tigerbeetle/src/test/packet_simulator.zig +14 -1
  82. package/src/tigerbeetle/src/test/state_checker.zig +10 -6
  83. package/src/tigerbeetle/src/test/state_machine.zig +159 -68
  84. package/src/tigerbeetle/src/test/storage.zig +137 -49
  85. package/src/tigerbeetle/src/tigerbeetle.zig +5 -0
  86. package/src/tigerbeetle/src/unit_tests.zig +8 -0
  87. package/src/tigerbeetle/src/util.zig +51 -0
  88. package/src/tigerbeetle/src/vsr/client.zig +21 -7
  89. package/src/tigerbeetle/src/vsr/journal.zig +1429 -514
  90. package/src/tigerbeetle/src/vsr/replica.zig +1855 -550
  91. package/src/tigerbeetle/src/vsr/superblock.zig +1743 -0
  92. package/src/tigerbeetle/src/vsr/superblock_client_table.zig +258 -0
  93. package/src/tigerbeetle/src/vsr/superblock_free_set.zig +644 -0
  94. package/src/tigerbeetle/src/vsr/superblock_manifest.zig +546 -0
  95. package/src/tigerbeetle/src/vsr.zig +134 -52
  96. package/.yarn/releases/yarn-berry.cjs +0 -55
  97. package/.yarnrc.yml +0 -1
  98. package/scripts/postinstall.sh +0 -6
  99. package/yarn.lock +0 -42
@@ -0,0 +1,317 @@
1
+ const std = @import("std");
2
+ const assert = std.debug.assert;
3
+ const math = std.math;
4
+
5
+ const binary_search = @import("./binary_search.zig").binary_search;
6
+ const eytzinger = @import("./eytzinger.zig").eytzinger;
7
+ const perf = @import("./benchmarks/perf.zig");
8
+
9
+ const GiB = 1 << 30;
10
+ const searches = 500_000;
11
+
12
+ const kv_types = .{
13
+ .{ .key_size = 8, .value_size = 128 },
14
+ .{ .key_size = 8, .value_size = 64 },
15
+ .{ .key_size = 16, .value_size = 16 },
16
+ .{ .key_size = 32, .value_size = 32 },
17
+ };
18
+
19
+ // keys_per_summary = values_per_page / summary_fraction
20
+ const summary_fractions = .{ 4, 8, 16, 32 };
21
+ const values_per_page = .{ 128, 256, 512, 1024, 2048, 4096, 8192 };
22
+ const body_fmt = "{:_>2}B/{:_>3}B {:_>4}/{:_>4} {s}{s}: WT={:_>6}ns UT={:_>6}ns" ++
23
+ " CY={:_>6} IN={:_>6} CR={:_>5} CM={:_>5} BM={}\n";
24
+
25
+ const summary_sizes = blk: {
26
+ var sizes: [values_per_page.len][summary_fractions.len]usize = undefined;
27
+ for (values_per_page) |values_count, v| {
28
+ for (summary_fractions) |fraction, k| {
29
+ // Set in reverse order so that the summary sizes ascend.
30
+ sizes[v][summary_fractions.len - k - 1] = values_count / fraction;
31
+ }
32
+ }
33
+ break :blk sizes;
34
+ };
35
+
36
+ pub fn main() !void {
37
+ std.log.info("Samples: {}", .{searches});
38
+ std.log.info("WT: Wall time/search", .{});
39
+ std.log.info("UT: utime time/search", .{});
40
+ std.log.info("CY: CPU cycles/search", .{});
41
+ std.log.info("IN: instructions/search", .{});
42
+ std.log.info("CR: cache references/search", .{});
43
+ std.log.info("CM: cache misses/search", .{});
44
+ std.log.info("BM: branch misses/search", .{});
45
+
46
+ var seed: u64 = undefined;
47
+ try std.os.getrandom(std.mem.asBytes(&seed));
48
+ var prng = std.rand.DefaultPrng.init(seed);
49
+
50
+ // Allocate on the heap just once.
51
+ // All page allocations reuse this buffer to speed up the run time.
52
+ var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
53
+ defer arena.deinit();
54
+
55
+ const blob_size = GiB;
56
+ var blob = try arena.allocator.alloc(u8, blob_size);
57
+
58
+ inline for (kv_types) |kv| {
59
+ inline for (values_per_page) |values_count, v| {
60
+ inline for (summary_sizes[v]) |keys_count| {
61
+ try run_benchmark(.{
62
+ .blob_size = blob_size,
63
+ .key_size = kv.key_size,
64
+ .value_size = kv.value_size,
65
+ .keys_count = keys_count,
66
+ .values_count = values_count,
67
+ .searches = searches,
68
+ }, blob, &prng.random);
69
+ }
70
+ }
71
+ }
72
+ }
73
+
74
+ fn run_benchmark(comptime layout: Layout, blob: []u8, random: *std.rand.Random) !void {
75
+ assert(blob.len == layout.blob_size);
76
+ const Eytzinger = eytzinger(layout.keys_count - 1, layout.values_count);
77
+ const V = Value(layout);
78
+ const K = V.Key;
79
+ const Page = struct {
80
+ keys: [layout.keys_count]K,
81
+ values: [layout.values_count]V,
82
+ };
83
+ const page_count = layout.blob_size / @sizeOf(Page);
84
+
85
+ // Search pages and keys in random order.
86
+ var page_picker = shuffled_index(page_count, random);
87
+ var value_picker = shuffled_index(layout.values_count, random);
88
+
89
+ // Generate 1GiB worth of 24KiB pages.
90
+ var blob_alloc = std.heap.FixedBufferAllocator.init(blob);
91
+ var pages = try blob_alloc.allocator.alloc(Page, page_count);
92
+ random.bytes(std.mem.sliceAsBytes(pages));
93
+ for (pages) |*page| {
94
+ for (page.values) |*value, i| value.key = i;
95
+ Eytzinger.layout_from_keys_or_values(K, V, V.key_from_value, V.max_key, &page.values, &page.keys);
96
+ }
97
+
98
+ const stdout = std.io.getStdOut().writer();
99
+ {
100
+ var benchmark = try Benchmark.begin();
101
+ var i: usize = 0;
102
+ var v: usize = 0;
103
+ while (i < layout.searches) : (i += 1) {
104
+ const page_index = page_picker[i % page_picker.len];
105
+ const target = value_picker[v % value_picker.len];
106
+ const page = &pages[page_index];
107
+ const bounds = Eytzinger.search_values(K, V, V.key_compare, &page.keys, &page.values, target);
108
+ const hit = bounds[binary_search(K, V, V.key_from_value, V.key_compare, bounds, target)];
109
+
110
+ assert(hit.key == target);
111
+ if (i % pages.len == 0) v += 1;
112
+ }
113
+
114
+ const result = try benchmark.end(layout.searches);
115
+ try stdout.print(body_fmt, .{
116
+ layout.key_size,
117
+ layout.value_size,
118
+ layout.keys_count,
119
+ layout.values_count,
120
+ "E",
121
+ "B",
122
+ result.wall_time,
123
+ result.utime,
124
+ result.cpu_cycles,
125
+ result.instructions,
126
+ result.cache_references,
127
+ result.cache_misses,
128
+ result.branch_misses,
129
+ });
130
+ }
131
+
132
+ {
133
+ var benchmark = try Benchmark.begin();
134
+ var i: usize = 0;
135
+ var v: usize = 0;
136
+ while (i < layout.searches) : (i += 1) {
137
+ const target = value_picker[v % value_picker.len];
138
+ const page = &pages[page_picker[i % page_picker.len]];
139
+ const hit = page.values[binary_search(K, V, V.key_from_value, V.key_compare, page.values[0..], target)];
140
+
141
+ assert(hit.key == target);
142
+ if (i % pages.len == 0) v += 1;
143
+ }
144
+ const result = try benchmark.end(layout.searches);
145
+ try stdout.print(body_fmt, .{
146
+ layout.key_size,
147
+ layout.value_size,
148
+ layout.keys_count,
149
+ layout.values_count,
150
+ "_",
151
+ "B",
152
+ result.wall_time,
153
+ result.utime,
154
+ result.cpu_cycles,
155
+ result.instructions,
156
+ result.cache_references,
157
+ result.cache_misses,
158
+ result.branch_misses,
159
+ });
160
+ }
161
+ }
162
+
163
+ const Layout = struct {
164
+ blob_size: usize, // bytes allocated for all pages
165
+ key_size: usize, // bytes per key
166
+ value_size: usize, // bytes per value
167
+ keys_count: usize, // keys per page (in the summary)
168
+ values_count: usize, // values per page
169
+ searches: usize,
170
+ };
171
+
172
+ fn Value(comptime layout: Layout) type {
173
+ return struct {
174
+ pub const max_key = 1 << (8 * layout.key_size) - 1;
175
+ pub const Key = math.IntFittingRange(0, max_key);
176
+ const Self = @This();
177
+ key: Key,
178
+ body: [layout.value_size - layout.key_size]u8,
179
+
180
+ comptime {
181
+ assert(@sizeOf(Key) == layout.key_size);
182
+ assert(@sizeOf(Self) == layout.value_size);
183
+ }
184
+
185
+ inline fn key_from_value(self: Self) Key {
186
+ return self.key;
187
+ }
188
+
189
+ inline fn key_from_key(x: Key) Key {
190
+ return x;
191
+ }
192
+
193
+ inline fn key_compare(a: Key, b: Key) math.Order {
194
+ return math.order(a, b);
195
+ }
196
+ };
197
+ }
198
+
199
+ const BenchmarkResult = struct {
200
+ wall_time: u64, // nanoseconds
201
+ utime: u64, // nanoseconds
202
+ cpu_cycles: usize,
203
+ instructions: usize,
204
+ cache_references: usize,
205
+ cache_misses: usize,
206
+ branch_misses: usize,
207
+ };
208
+
209
+ const PERF = perf.PERF;
210
+ const perf_event_attr = perf.perf_event_attr;
211
+ const perf_event_open = perf.perf_event_open;
212
+ const perf_counters = [_]PERF.COUNT.HW{
213
+ PERF.COUNT.HW.CPU_CYCLES,
214
+ PERF.COUNT.HW.INSTRUCTIONS,
215
+ PERF.COUNT.HW.CACHE_REFERENCES,
216
+ PERF.COUNT.HW.CACHE_MISSES,
217
+ PERF.COUNT.HW.BRANCH_MISSES,
218
+ };
219
+
220
+ const Benchmark = struct {
221
+ timer: std.time.Timer,
222
+ rusage: std.os.rusage,
223
+ perf_fds: [perf_counters.len]std.os.fd_t,
224
+
225
+ fn begin() !Benchmark {
226
+ const flags = PERF.FLAG.FD_NO_GROUP;
227
+ var perf_fds = [1]std.os.fd_t{-1} ** perf_counters.len;
228
+ for (perf_counters) |counter, i| {
229
+ var attr: perf_event_attr = .{
230
+ .type = PERF.TYPE.HARDWARE,
231
+ .config = @enumToInt(counter),
232
+ .flags = .{
233
+ .disabled = true,
234
+ .exclude_kernel = true,
235
+ .exclude_hv = true,
236
+ },
237
+ };
238
+ perf_fds[i] = try perf_event_open(&attr, 0, -1, perf_fds[0], PERF.FLAG.FD_CLOEXEC);
239
+ }
240
+ const err = std.os.linux.ioctl(perf_fds[0], PERF.EVENT_IOC.ENABLE, PERF.IOC_FLAG_GROUP);
241
+ if (err == -1) return error.Unexpected;
242
+
243
+ // Start the wall clock after perf, since setup is slow.
244
+ const timer = try std.time.Timer.start();
245
+ return Benchmark{
246
+ .timer = timer,
247
+ // TODO pass std.os.linux.rusage.SELF once Zig is upgraded
248
+ .rusage = std.os.getrusage(0),
249
+ .perf_fds = perf_fds,
250
+ };
251
+ }
252
+
253
+ fn end(self: *Benchmark, samples: usize) !BenchmarkResult {
254
+ defer {
255
+ for (perf_counters) |_, i| {
256
+ std.os.close(self.perf_fds[i]);
257
+ self.perf_fds[i] = -1;
258
+ }
259
+ }
260
+
261
+ const rusage = std.os.getrusage(0);
262
+ const err = std.os.linux.ioctl(self.perf_fds[0], PERF.EVENT_IOC.DISABLE, PERF.IOC_FLAG_GROUP);
263
+ if (err == -1) return error.Unexpected;
264
+ return BenchmarkResult{
265
+ .wall_time = self.timer.read() / samples,
266
+ .utime = (timeval_to_ns(rusage.utime) - timeval_to_ns(self.rusage.utime)) / samples,
267
+ .cpu_cycles = (try readPerfFd(self.perf_fds[0])) / samples,
268
+ .instructions = (try readPerfFd(self.perf_fds[1])) / samples,
269
+ .cache_references = (try readPerfFd(self.perf_fds[2])) / samples,
270
+ .cache_misses = (try readPerfFd(self.perf_fds[3])) / samples,
271
+ .branch_misses = (try readPerfFd(self.perf_fds[4])) / samples,
272
+ };
273
+ }
274
+ };
275
+
276
+ // shuffle([0,1,…,n-1])
277
+ fn shuffled_index(comptime n: usize, rand: *std.rand.Random) [n]usize {
278
+ var indices: [n]usize = undefined;
279
+ for (indices) |*i, j| i.* = j;
280
+ rand.shuffle(usize, indices[0..]);
281
+ return indices;
282
+ }
283
+
284
+ fn timeval_to_ns(tv: std.os.timeval) u64 {
285
+ const ns_per_us = std.time.ns_per_s / std.time.us_per_s;
286
+ return @bitCast(u64, tv.tv_sec) * std.time.ns_per_s +
287
+ @bitCast(u64, tv.tv_usec) * ns_per_us;
288
+ }
289
+
290
+ fn readPerfFd(fd: std.os.fd_t) !usize {
291
+ var result: usize = 0;
292
+ const n = try std.os.read(fd, std.mem.asBytes(&result));
293
+ assert(n == @sizeOf(usize));
294
+
295
+ return result;
296
+ }
297
+
298
+ fn binary_search_keys(
299
+ comptime layout: Layout,
300
+ comptime Key: type,
301
+ comptime V: type,
302
+ comptime compare_keys: fn (Key, Key) math.Order,
303
+ keys: []const Key,
304
+ values: []const V,
305
+ key: Key,
306
+ ) []const V {
307
+ assert(keys.len == layout.keys_count);
308
+ assert(values.len == layout.values_count);
309
+
310
+ const key_index = binary_search(Key, Key, V.key_from_key, compare_keys, keys, key);
311
+ const key_stride = layout.values_count / layout.keys_count;
312
+ const high = key_index * key_stride;
313
+ if (key_index < keys.len and keys[key_index] == key) {
314
+ return if (high == 0) values[0..1] else values[high - 1 .. high];
315
+ }
316
+ return values[high - key_stride .. high];
317
+ }
@@ -34,6 +34,10 @@ pub fn FIFO(comptime T: type) type {
34
34
  return self.out;
35
35
  }
36
36
 
37
+ pub fn empty(self: Self) bool {
38
+ return self.peek() == null;
39
+ }
40
+
37
41
  /// Remove an element from the FIFO. Asserts that the element is
38
42
  /// in the FIFO. This operation is O(N), if this is done often you
39
43
  /// probably want a different data structure.
@@ -55,7 +59,7 @@ pub fn FIFO(comptime T: type) type {
55
59
  };
56
60
  }
57
61
 
58
- test "push/pop/peek/remove" {
62
+ test "push/pop/peek/remove/empty" {
59
63
  const testing = @import("std").testing;
60
64
 
61
65
  const Foo = struct { next: ?*@This() = null };
@@ -65,34 +69,45 @@ test "push/pop/peek/remove" {
65
69
  var three: Foo = .{};
66
70
 
67
71
  var fifo: FIFO(Foo) = .{};
72
+ try testing.expect(fifo.empty());
68
73
 
69
74
  fifo.push(&one);
75
+ try testing.expect(!fifo.empty());
70
76
  try testing.expectEqual(@as(?*Foo, &one), fifo.peek());
71
77
 
72
78
  fifo.push(&two);
73
79
  fifo.push(&three);
80
+ try testing.expect(!fifo.empty());
74
81
  try testing.expectEqual(@as(?*Foo, &one), fifo.peek());
75
82
 
76
83
  fifo.remove(&one);
84
+ try testing.expect(!fifo.empty());
77
85
  try testing.expectEqual(@as(?*Foo, &two), fifo.pop());
78
86
  try testing.expectEqual(@as(?*Foo, &three), fifo.pop());
79
87
  try testing.expectEqual(@as(?*Foo, null), fifo.pop());
88
+ try testing.expect(fifo.empty());
80
89
 
81
90
  fifo.push(&one);
82
91
  fifo.push(&two);
83
92
  fifo.push(&three);
84
93
  fifo.remove(&two);
94
+ try testing.expect(!fifo.empty());
85
95
  try testing.expectEqual(@as(?*Foo, &one), fifo.pop());
86
96
  try testing.expectEqual(@as(?*Foo, &three), fifo.pop());
87
97
  try testing.expectEqual(@as(?*Foo, null), fifo.pop());
98
+ try testing.expect(fifo.empty());
88
99
 
89
100
  fifo.push(&one);
90
101
  fifo.push(&two);
91
102
  fifo.push(&three);
92
103
  fifo.remove(&three);
104
+ try testing.expect(!fifo.empty());
93
105
  try testing.expectEqual(@as(?*Foo, &one), fifo.pop());
106
+ try testing.expect(!fifo.empty());
94
107
  try testing.expectEqual(@as(?*Foo, &two), fifo.pop());
108
+ try testing.expect(fifo.empty());
95
109
  try testing.expectEqual(@as(?*Foo, null), fifo.pop());
110
+ try testing.expect(fifo.empty());
96
111
 
97
112
  fifo.push(&one);
98
113
  fifo.push(&two);
@@ -101,4 +116,5 @@ test "push/pop/peek/remove" {
101
116
  try testing.expectEqual(@as(?*Foo, &one), fifo.pop());
102
117
  try testing.expectEqual(@as(?*Foo, &three), fifo.pop());
103
118
  try testing.expectEqual(@as(?*Foo, null), fifo.pop());
119
+ try testing.expect(fifo.empty());
104
120
  }
@@ -83,7 +83,7 @@ pub const IO = struct {
83
83
  const change_events = self.flush_io(&events, &io_pending);
84
84
 
85
85
  // Only call kevent() if we need to submit io events or if we need to wait for completions.
86
- if (change_events > 0 or self.completed.peek() == null) {
86
+ if (change_events > 0 or self.completed.empty()) {
87
87
  // Zero timeouts for kevent() implies a non-blocking poll
88
88
  var ts = std.mem.zeroes(os.timespec);
89
89
 
@@ -91,7 +91,7 @@ pub const IO = struct {
91
91
  // We should never wait indefinitely (timeout_ptr = null for kevent) given:
92
92
  // - tick() is non-blocking (wait_for_completions = false)
93
93
  // - run_for_ns() always submits a timeout
94
- if (change_events == 0 and self.completed.peek() == null) {
94
+ if (change_events == 0 and self.completed.empty()) {
95
95
  if (wait_for_completions) {
96
96
  const timeout_ns = next_timeout orelse @panic("kevent() blocking forever");
97
97
  ts.tv_nsec = @intCast(@TypeOf(ts.tv_nsec), timeout_ns % std.time.ns_per_s);
@@ -430,6 +430,7 @@ pub const IO = struct {
430
430
  IsDir,
431
431
  SystemResources,
432
432
  Unseekable,
433
+ ConnectionTimedOut,
433
434
  } || os.UnexpectedError;
434
435
 
435
436
  pub fn read(
@@ -481,6 +482,7 @@ pub const IO = struct {
481
482
  .NXIO => error.Unseekable,
482
483
  .OVERFLOW => error.Unseekable,
483
484
  .SPIPE => error.Unseekable,
485
+ .TIMEDOUT => error.ConnectionTimedOut,
484
486
  else => |err| os.unexpectedErrno(err),
485
487
  };
486
488
  }
@@ -635,10 +637,12 @@ pub const IO = struct {
635
637
  }
636
638
 
637
639
  /// Opens a directory with read only access.
638
- pub fn open_dir(dir_path: [:0]const u8) !os.fd_t {
639
- return os.openZ(dir_path, os.O.CLOEXEC | os.O.RDONLY, 0);
640
+ pub fn open_dir(dir_path: []const u8) !os.fd_t {
641
+ return os.open(dir_path, os.O.CLOEXEC | os.O.RDONLY, 0);
640
642
  }
641
643
 
644
+ pub const INVALID_FILE: os.fd_t = -1;
645
+
642
646
  /// Opens or creates a journal file:
643
647
  /// - For reading and writing.
644
648
  /// - For Direct I/O (required on darwin).
@@ -648,14 +652,11 @@ pub const IO = struct {
648
652
  /// The caller is responsible for ensuring that the parent directory inode is durable.
649
653
  /// - Verifies that the file size matches the expected file size before returning.
650
654
  pub fn open_file(
651
- self: *IO,
652
655
  dir_fd: os.fd_t,
653
- relative_path: [:0]const u8,
656
+ relative_path: []const u8,
654
657
  size: u64,
655
658
  must_create: bool,
656
659
  ) !os.fd_t {
657
- _ = self;
658
-
659
660
  assert(relative_path.len > 0);
660
661
  assert(size >= config.sector_size);
661
662
  assert(size % config.sector_size == 0);
@@ -685,7 +686,7 @@ pub const IO = struct {
685
686
 
686
687
  // Be careful with openat(2): "If pathname is absolute, then dirfd is ignored." (man page)
687
688
  assert(!std.fs.path.isAbsolute(relative_path));
688
- const fd = try os.openatZ(dir_fd, relative_path, flags, mode);
689
+ const fd = try os.openat(dir_fd, relative_path, flags, mode);
689
690
  // TODO Return a proper error message when the path exists or does not exist (init/start).
690
691
  errdefer os.close(fd);
691
692
 
@@ -720,8 +721,9 @@ pub const IO = struct {
720
721
  // We always do this when opening because we don't know if this was done before crashing.
721
722
  try fs_sync(dir_fd);
722
723
 
724
+ // TODO Document that `size` is now `data_file_size_min` from `main.zig`.
723
725
  const stat = try os.fstat(fd);
724
- if (stat.size != size) @panic("data file inode size was truncated or corrupted");
726
+ if (stat.size < size) @panic("data file inode size was truncated or corrupted");
725
727
 
726
728
  return fd;
727
729
  }
@@ -22,6 +22,14 @@ pub const IO = struct {
22
22
  completed: FIFO(Completion) = .{},
23
23
 
24
24
  pub fn init(entries: u12, flags: u32) !IO {
25
+ // Detect the linux version to ensure that we support all io_uring ops used.
26
+ const uts = std.os.uname();
27
+ const release = std.mem.sliceTo(&uts.release, 0);
28
+ const version = try std.builtin.Version.parse(release);
29
+ if (version.major < 5 or version.minor < 5) {
30
+ @panic("Linux kernel 5.5 or greater is required for io_uring OP_ACCEPT");
31
+ }
32
+
25
33
  return IO{ .ring = try IO_Uring.init(entries, flags) };
26
34
  }
27
35
 
@@ -334,6 +342,7 @@ pub const IO = struct {
334
342
  .NXIO => error.Unseekable,
335
343
  .OVERFLOW => error.Unseekable,
336
344
  .SPIPE => error.Unseekable,
345
+ .TIMEDOUT => error.ConnectionTimedOut,
337
346
  else => |errno| os.unexpectedErrno(errno),
338
347
  };
339
348
  break :blk err;
@@ -360,6 +369,8 @@ pub const IO = struct {
360
369
  .NOTCONN => error.SocketNotConnected,
361
370
  .NOTSOCK => error.FileDescriptorNotASocket,
362
371
  .CONNRESET => error.ConnectionResetByPeer,
372
+ .TIMEDOUT => error.ConnectionTimedOut,
373
+ .OPNOTSUPP => error.OperationNotSupported,
363
374
  else => |errno| os.unexpectedErrno(errno),
364
375
  };
365
376
  break :blk err;
@@ -394,6 +405,7 @@ pub const IO = struct {
394
405
  .NOTSOCK => error.FileDescriptorNotASocket,
395
406
  .OPNOTSUPP => error.OperationNotSupported,
396
407
  .PIPE => error.BrokenPipe,
408
+ .TIMEDOUT => error.ConnectionTimedOut,
397
409
  else => |errno| os.unexpectedErrno(errno),
398
410
  };
399
411
  break :blk err;
@@ -591,6 +603,7 @@ pub const IO = struct {
591
603
  PermissionDenied,
592
604
  ProtocolNotSupported,
593
605
  ConnectionTimedOut,
606
+ SystemResources,
594
607
  } || os.UnexpectedError;
595
608
 
596
609
  pub fn connect(
@@ -637,6 +650,7 @@ pub const IO = struct {
637
650
  IsDir,
638
651
  SystemResources,
639
652
  Unseekable,
653
+ ConnectionTimedOut,
640
654
  } || os.UnexpectedError;
641
655
 
642
656
  pub fn read(
@@ -683,6 +697,8 @@ pub const IO = struct {
683
697
  SystemResources,
684
698
  SocketNotConnected,
685
699
  FileDescriptorNotASocket,
700
+ ConnectionTimedOut,
701
+ OperationNotSupported,
686
702
  } || os.UnexpectedError;
687
703
 
688
704
  pub fn recv(
@@ -733,6 +749,7 @@ pub const IO = struct {
733
749
  FileDescriptorNotASocket,
734
750
  OperationNotSupported,
735
751
  BrokenPipe,
752
+ ConnectionTimedOut,
736
753
  } || os.UnexpectedError;
737
754
 
738
755
  pub fn send(
@@ -867,10 +884,12 @@ pub const IO = struct {
867
884
  }
868
885
 
869
886
  /// Opens a directory with read only access.
870
- pub fn open_dir(dir_path: [:0]const u8) !os.fd_t {
871
- return os.openZ(dir_path, os.O.CLOEXEC | os.O.RDONLY, 0);
887
+ pub fn open_dir(dir_path: []const u8) !os.fd_t {
888
+ return os.open(dir_path, os.O.CLOEXEC | os.O.RDONLY, 0);
872
889
  }
873
890
 
891
+ pub const INVALID_FILE: os.fd_t = -1;
892
+
874
893
  /// Opens or creates a journal file:
875
894
  /// - For reading and writing.
876
895
  /// - For Direct I/O (if possible in development mode, but required in production mode).
@@ -880,14 +899,11 @@ pub const IO = struct {
880
899
  /// The caller is responsible for ensuring that the parent directory inode is durable.
881
900
  /// - Verifies that the file size matches the expected file size before returning.
882
901
  pub fn open_file(
883
- self: *IO,
884
902
  dir_fd: os.fd_t,
885
- relative_path: [:0]const u8,
903
+ relative_path: []const u8,
886
904
  size: u64,
887
905
  must_create: bool,
888
906
  ) !os.fd_t {
889
- _ = self;
890
-
891
907
  assert(relative_path.len > 0);
892
908
  assert(size >= config.sector_size);
893
909
  assert(size % config.sector_size == 0);
@@ -929,7 +945,7 @@ pub const IO = struct {
929
945
 
930
946
  // Be careful with openat(2): "If pathname is absolute, then dirfd is ignored." (man page)
931
947
  assert(!std.fs.path.isAbsolute(relative_path));
932
- const fd = try os.openatZ(dir_fd, relative_path, flags, mode);
948
+ const fd = try os.openat(dir_fd, relative_path, flags, mode);
933
949
  // TODO Return a proper error message when the path exists or does not exist (init/start).
934
950
  errdefer os.close(fd);
935
951
 
@@ -978,7 +994,7 @@ pub const IO = struct {
978
994
  try os.fsync(dir_fd);
979
995
 
980
996
  const stat = try os.fstat(fd);
981
- if (stat.size != size) @panic("data file inode size was truncated or corrupted");
997
+ if (stat.size < size) @panic("data file inode size was truncated or corrupted");
982
998
 
983
999
  return fd;
984
1000
  }
@@ -995,7 +1011,7 @@ pub const IO = struct {
995
1011
  defer dir.deleteFile(path) catch {};
996
1012
 
997
1013
  while (true) {
998
- const res = os.system.openat(dir_fd, path, os.O.CLOEXEC | os.O.RDONLY | os.O.DIRECT, 0);
1014
+ const res = os.linux.openat(dir_fd, path, os.O.CLOEXEC | os.O.RDONLY | os.O.DIRECT, 0);
999
1015
  switch (os.linux.getErrno(res)) {
1000
1016
  .SUCCESS => {
1001
1017
  os.close(@intCast(os.fd_t, res));
@@ -737,6 +737,7 @@ pub const IO = struct {
737
737
  IsDir,
738
738
  SystemResources,
739
739
  Unseekable,
740
+ ConnectionTimedOut,
740
741
  } || os.UnexpectedError;
741
742
 
742
743
  pub fn read(
@@ -918,11 +919,13 @@ pub const IO = struct {
918
919
  }
919
920
 
920
921
  /// Opens a directory with read only access.
921
- pub fn open_dir(dir_path: [:0]const u8) !os.fd_t {
922
- const dir = try std.fs.cwd().openDirZ(dir_path, .{});
922
+ pub fn open_dir(dir_path: []const u8) !os.fd_t {
923
+ const dir = try std.fs.cwd().openDir(dir_path, .{});
923
924
  return dir.fd;
924
925
  }
925
926
 
927
+ pub const INVALID_FILE = os.windows.INVALID_HANDLE_VALUE;
928
+
926
929
  /// Opens or creates a journal file:
927
930
  /// - For reading and writing.
928
931
  /// - For Direct I/O (required on windows).
@@ -932,14 +935,11 @@ pub const IO = struct {
932
935
  /// The caller is responsible for ensuring that the parent directory inode is durable.
933
936
  /// - Verifies that the file size matches the expected file size before returning.
934
937
  pub fn open_file(
935
- self: *IO,
936
938
  dir_handle: os.fd_t,
937
- relative_path: [:0]const u8,
939
+ relative_path: []const u8,
938
940
  size: u64,
939
941
  must_create: bool,
940
942
  ) !os.fd_t {
941
- _ = self;
942
-
943
943
  assert(relative_path.len > 0);
944
944
  assert(size >= config.sector_size);
945
945
  assert(size % config.sector_size == 0);
@@ -988,8 +988,12 @@ pub const IO = struct {
988
988
 
989
989
  if (handle == os.windows.INVALID_HANDLE_VALUE) {
990
990
  return switch (os.windows.kernel32.GetLastError()) {
991
- .ACCESS_DENIED => error.AccessDenied,
992
- else => |err| os.windows.unexpectedError(err),
991
+ .FILE_NOT_FOUND => error.FileNotFound,
992
+ .SHARING_VIOLATION, .ACCESS_DENIED => error.AccessDenied,
993
+ else => |err| {
994
+ log.warn("CreateFileW(): {}", .{err});
995
+ return os.windows.unexpectedError(err);
996
+ },
993
997
  };
994
998
  }
995
999
 
@@ -1034,7 +1038,7 @@ pub const IO = struct {
1034
1038
  _ = dir_handle;
1035
1039
 
1036
1040
  const file_size = try os.windows.GetFileSizeEx(handle);
1037
- if (file_size != size) @panic("data file inode size was truncated or corrupted");
1041
+ if (file_size < size) @panic("data file inode size was truncated or corrupted");
1038
1042
 
1039
1043
  return handle;
1040
1044
  }