tigerbeetle-node 0.8.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +47 -47
- package/dist/benchmark.js +15 -15
- package/dist/benchmark.js.map +1 -1
- package/dist/index.d.ts +66 -61
- package/dist/index.js +66 -61
- package/dist/index.js.map +1 -1
- package/dist/test.js +1 -1
- package/dist/test.js.map +1 -1
- package/package.json +14 -16
- package/scripts/download_node_headers.sh +3 -1
- package/src/index.ts +5 -0
- package/src/node.zig +18 -19
- package/src/tigerbeetle/scripts/benchmark.bat +47 -46
- package/src/tigerbeetle/scripts/benchmark.sh +25 -10
- package/src/tigerbeetle/scripts/install.sh +2 -1
- package/src/tigerbeetle/scripts/install_zig.bat +109 -109
- package/src/tigerbeetle/scripts/install_zig.sh +18 -18
- package/src/tigerbeetle/scripts/upgrade_ubuntu_kernel.sh +12 -3
- package/src/tigerbeetle/scripts/vopr.bat +47 -47
- package/src/tigerbeetle/scripts/vopr.sh +5 -5
- package/src/tigerbeetle/src/benchmark.zig +17 -9
- package/src/tigerbeetle/src/benchmark_array_search.zig +317 -0
- package/src/tigerbeetle/src/benchmarks/perf.zig +299 -0
- package/src/tigerbeetle/src/c/tb_client/context.zig +103 -0
- package/src/tigerbeetle/src/c/tb_client/packet.zig +80 -0
- package/src/tigerbeetle/src/c/tb_client/signal.zig +288 -0
- package/src/tigerbeetle/src/c/tb_client/thread.zig +329 -0
- package/src/tigerbeetle/src/c/tb_client.h +201 -0
- package/src/tigerbeetle/src/c/tb_client.zig +101 -0
- package/src/tigerbeetle/src/c/test.zig +1 -0
- package/src/tigerbeetle/src/cli.zig +142 -83
- package/src/tigerbeetle/src/config.zig +136 -23
- package/src/tigerbeetle/src/demo.zig +12 -8
- package/src/tigerbeetle/src/demo_03_create_transfers.zig +3 -3
- package/src/tigerbeetle/src/demo_04_create_pending_transfers.zig +10 -10
- package/src/tigerbeetle/src/demo_05_post_pending_transfers.zig +7 -7
- package/src/tigerbeetle/src/demo_06_void_pending_transfers.zig +3 -3
- package/src/tigerbeetle/src/demo_07_lookup_transfers.zig +1 -1
- package/src/tigerbeetle/src/ewah.zig +318 -0
- package/src/tigerbeetle/src/ewah_benchmark.zig +121 -0
- package/src/tigerbeetle/src/eytzinger_benchmark.zig +317 -0
- package/src/tigerbeetle/src/fifo.zig +17 -1
- package/src/tigerbeetle/src/io/darwin.zig +12 -10
- package/src/tigerbeetle/src/io/linux.zig +25 -9
- package/src/tigerbeetle/src/io/windows.zig +13 -9
- package/src/tigerbeetle/src/iops.zig +101 -0
- package/src/tigerbeetle/src/lsm/binary_search.zig +214 -0
- package/src/tigerbeetle/src/lsm/bloom_filter.zig +82 -0
- package/src/tigerbeetle/src/lsm/compaction.zig +603 -0
- package/src/tigerbeetle/src/lsm/composite_key.zig +75 -0
- package/src/tigerbeetle/src/lsm/direction.zig +11 -0
- package/src/tigerbeetle/src/lsm/eytzinger.zig +587 -0
- package/src/tigerbeetle/src/lsm/forest.zig +630 -0
- package/src/tigerbeetle/src/lsm/grid.zig +473 -0
- package/src/tigerbeetle/src/lsm/groove.zig +939 -0
- package/src/tigerbeetle/src/lsm/k_way_merge.zig +452 -0
- package/src/tigerbeetle/src/lsm/level_iterator.zig +296 -0
- package/src/tigerbeetle/src/lsm/manifest.zig +680 -0
- package/src/tigerbeetle/src/lsm/manifest_level.zig +1169 -0
- package/src/tigerbeetle/src/lsm/manifest_log.zig +904 -0
- package/src/tigerbeetle/src/lsm/node_pool.zig +231 -0
- package/src/tigerbeetle/src/lsm/posted_groove.zig +399 -0
- package/src/tigerbeetle/src/lsm/segmented_array.zig +998 -0
- package/src/tigerbeetle/src/lsm/set_associative_cache.zig +844 -0
- package/src/tigerbeetle/src/lsm/table.zig +932 -0
- package/src/tigerbeetle/src/lsm/table_immutable.zig +196 -0
- package/src/tigerbeetle/src/lsm/table_iterator.zig +295 -0
- package/src/tigerbeetle/src/lsm/table_mutable.zig +123 -0
- package/src/tigerbeetle/src/lsm/test.zig +429 -0
- package/src/tigerbeetle/src/lsm/tree.zig +1085 -0
- package/src/tigerbeetle/src/main.zig +121 -95
- package/src/tigerbeetle/src/message_bus.zig +49 -48
- package/src/tigerbeetle/src/message_pool.zig +19 -3
- package/src/tigerbeetle/src/ring_buffer.zig +172 -31
- package/src/tigerbeetle/src/simulator.zig +171 -43
- package/src/tigerbeetle/src/state_machine.zig +1026 -599
- package/src/tigerbeetle/src/storage.zig +46 -16
- package/src/tigerbeetle/src/test/cluster.zig +257 -78
- package/src/tigerbeetle/src/test/message_bus.zig +15 -24
- package/src/tigerbeetle/src/test/network.zig +26 -17
- package/src/tigerbeetle/src/test/packet_simulator.zig +14 -1
- package/src/tigerbeetle/src/test/state_checker.zig +10 -6
- package/src/tigerbeetle/src/test/state_machine.zig +159 -68
- package/src/tigerbeetle/src/test/storage.zig +137 -49
- package/src/tigerbeetle/src/tigerbeetle.zig +5 -0
- package/src/tigerbeetle/src/unit_tests.zig +8 -0
- package/src/tigerbeetle/src/util.zig +51 -0
- package/src/tigerbeetle/src/vsr/client.zig +21 -7
- package/src/tigerbeetle/src/vsr/journal.zig +1429 -514
- package/src/tigerbeetle/src/vsr/replica.zig +1855 -550
- package/src/tigerbeetle/src/vsr/superblock.zig +1743 -0
- package/src/tigerbeetle/src/vsr/superblock_client_table.zig +258 -0
- package/src/tigerbeetle/src/vsr/superblock_free_set.zig +644 -0
- package/src/tigerbeetle/src/vsr/superblock_manifest.zig +546 -0
- package/src/tigerbeetle/src/vsr.zig +134 -52
- package/.yarn/releases/yarn-berry.cjs +0 -55
- package/.yarnrc.yml +0 -1
- package/scripts/postinstall.sh +0 -6
- package/yarn.lock +0 -42
|
@@ -0,0 +1,317 @@
|
|
|
1
|
+
const std = @import("std");
|
|
2
|
+
const assert = std.debug.assert;
|
|
3
|
+
const math = std.math;
|
|
4
|
+
|
|
5
|
+
const binary_search = @import("./binary_search.zig").binary_search;
|
|
6
|
+
const eytzinger = @import("./eytzinger.zig").eytzinger;
|
|
7
|
+
const perf = @import("./benchmarks/perf.zig");
|
|
8
|
+
|
|
9
|
+
const GiB = 1 << 30;
|
|
10
|
+
const searches = 500_000;
|
|
11
|
+
|
|
12
|
+
const kv_types = .{
|
|
13
|
+
.{ .key_size = 8, .value_size = 128 },
|
|
14
|
+
.{ .key_size = 8, .value_size = 64 },
|
|
15
|
+
.{ .key_size = 16, .value_size = 16 },
|
|
16
|
+
.{ .key_size = 32, .value_size = 32 },
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
// keys_per_summary = values_per_page / summary_fraction
|
|
20
|
+
const summary_fractions = .{ 4, 8, 16, 32 };
|
|
21
|
+
const values_per_page = .{ 128, 256, 512, 1024, 2048, 4096, 8192 };
|
|
22
|
+
const body_fmt = "{:_>2}B/{:_>3}B {:_>4}/{:_>4} {s}{s}: WT={:_>6}ns UT={:_>6}ns" ++
|
|
23
|
+
" CY={:_>6} IN={:_>6} CR={:_>5} CM={:_>5} BM={}\n";
|
|
24
|
+
|
|
25
|
+
const summary_sizes = blk: {
|
|
26
|
+
var sizes: [values_per_page.len][summary_fractions.len]usize = undefined;
|
|
27
|
+
for (values_per_page) |values_count, v| {
|
|
28
|
+
for (summary_fractions) |fraction, k| {
|
|
29
|
+
// Set in reverse order so that the summary sizes ascend.
|
|
30
|
+
sizes[v][summary_fractions.len - k - 1] = values_count / fraction;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
break :blk sizes;
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
pub fn main() !void {
|
|
37
|
+
std.log.info("Samples: {}", .{searches});
|
|
38
|
+
std.log.info("WT: Wall time/search", .{});
|
|
39
|
+
std.log.info("UT: utime time/search", .{});
|
|
40
|
+
std.log.info("CY: CPU cycles/search", .{});
|
|
41
|
+
std.log.info("IN: instructions/search", .{});
|
|
42
|
+
std.log.info("CR: cache references/search", .{});
|
|
43
|
+
std.log.info("CM: cache misses/search", .{});
|
|
44
|
+
std.log.info("BM: branch misses/search", .{});
|
|
45
|
+
|
|
46
|
+
var seed: u64 = undefined;
|
|
47
|
+
try std.os.getrandom(std.mem.asBytes(&seed));
|
|
48
|
+
var prng = std.rand.DefaultPrng.init(seed);
|
|
49
|
+
|
|
50
|
+
// Allocate on the heap just once.
|
|
51
|
+
// All page allocations reuse this buffer to speed up the run time.
|
|
52
|
+
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
|
53
|
+
defer arena.deinit();
|
|
54
|
+
|
|
55
|
+
const blob_size = GiB;
|
|
56
|
+
var blob = try arena.allocator.alloc(u8, blob_size);
|
|
57
|
+
|
|
58
|
+
inline for (kv_types) |kv| {
|
|
59
|
+
inline for (values_per_page) |values_count, v| {
|
|
60
|
+
inline for (summary_sizes[v]) |keys_count| {
|
|
61
|
+
try run_benchmark(.{
|
|
62
|
+
.blob_size = blob_size,
|
|
63
|
+
.key_size = kv.key_size,
|
|
64
|
+
.value_size = kv.value_size,
|
|
65
|
+
.keys_count = keys_count,
|
|
66
|
+
.values_count = values_count,
|
|
67
|
+
.searches = searches,
|
|
68
|
+
}, blob, &prng.random);
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
fn run_benchmark(comptime layout: Layout, blob: []u8, random: *std.rand.Random) !void {
|
|
75
|
+
assert(blob.len == layout.blob_size);
|
|
76
|
+
const Eytzinger = eytzinger(layout.keys_count - 1, layout.values_count);
|
|
77
|
+
const V = Value(layout);
|
|
78
|
+
const K = V.Key;
|
|
79
|
+
const Page = struct {
|
|
80
|
+
keys: [layout.keys_count]K,
|
|
81
|
+
values: [layout.values_count]V,
|
|
82
|
+
};
|
|
83
|
+
const page_count = layout.blob_size / @sizeOf(Page);
|
|
84
|
+
|
|
85
|
+
// Search pages and keys in random order.
|
|
86
|
+
var page_picker = shuffled_index(page_count, random);
|
|
87
|
+
var value_picker = shuffled_index(layout.values_count, random);
|
|
88
|
+
|
|
89
|
+
// Generate 1GiB worth of 24KiB pages.
|
|
90
|
+
var blob_alloc = std.heap.FixedBufferAllocator.init(blob);
|
|
91
|
+
var pages = try blob_alloc.allocator.alloc(Page, page_count);
|
|
92
|
+
random.bytes(std.mem.sliceAsBytes(pages));
|
|
93
|
+
for (pages) |*page| {
|
|
94
|
+
for (page.values) |*value, i| value.key = i;
|
|
95
|
+
Eytzinger.layout_from_keys_or_values(K, V, V.key_from_value, V.max_key, &page.values, &page.keys);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
const stdout = std.io.getStdOut().writer();
|
|
99
|
+
{
|
|
100
|
+
var benchmark = try Benchmark.begin();
|
|
101
|
+
var i: usize = 0;
|
|
102
|
+
var v: usize = 0;
|
|
103
|
+
while (i < layout.searches) : (i += 1) {
|
|
104
|
+
const page_index = page_picker[i % page_picker.len];
|
|
105
|
+
const target = value_picker[v % value_picker.len];
|
|
106
|
+
const page = &pages[page_index];
|
|
107
|
+
const bounds = Eytzinger.search_values(K, V, V.key_compare, &page.keys, &page.values, target);
|
|
108
|
+
const hit = bounds[binary_search(K, V, V.key_from_value, V.key_compare, bounds, target)];
|
|
109
|
+
|
|
110
|
+
assert(hit.key == target);
|
|
111
|
+
if (i % pages.len == 0) v += 1;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
const result = try benchmark.end(layout.searches);
|
|
115
|
+
try stdout.print(body_fmt, .{
|
|
116
|
+
layout.key_size,
|
|
117
|
+
layout.value_size,
|
|
118
|
+
layout.keys_count,
|
|
119
|
+
layout.values_count,
|
|
120
|
+
"E",
|
|
121
|
+
"B",
|
|
122
|
+
result.wall_time,
|
|
123
|
+
result.utime,
|
|
124
|
+
result.cpu_cycles,
|
|
125
|
+
result.instructions,
|
|
126
|
+
result.cache_references,
|
|
127
|
+
result.cache_misses,
|
|
128
|
+
result.branch_misses,
|
|
129
|
+
});
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
{
|
|
133
|
+
var benchmark = try Benchmark.begin();
|
|
134
|
+
var i: usize = 0;
|
|
135
|
+
var v: usize = 0;
|
|
136
|
+
while (i < layout.searches) : (i += 1) {
|
|
137
|
+
const target = value_picker[v % value_picker.len];
|
|
138
|
+
const page = &pages[page_picker[i % page_picker.len]];
|
|
139
|
+
const hit = page.values[binary_search(K, V, V.key_from_value, V.key_compare, page.values[0..], target)];
|
|
140
|
+
|
|
141
|
+
assert(hit.key == target);
|
|
142
|
+
if (i % pages.len == 0) v += 1;
|
|
143
|
+
}
|
|
144
|
+
const result = try benchmark.end(layout.searches);
|
|
145
|
+
try stdout.print(body_fmt, .{
|
|
146
|
+
layout.key_size,
|
|
147
|
+
layout.value_size,
|
|
148
|
+
layout.keys_count,
|
|
149
|
+
layout.values_count,
|
|
150
|
+
"_",
|
|
151
|
+
"B",
|
|
152
|
+
result.wall_time,
|
|
153
|
+
result.utime,
|
|
154
|
+
result.cpu_cycles,
|
|
155
|
+
result.instructions,
|
|
156
|
+
result.cache_references,
|
|
157
|
+
result.cache_misses,
|
|
158
|
+
result.branch_misses,
|
|
159
|
+
});
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
const Layout = struct {
|
|
164
|
+
blob_size: usize, // bytes allocated for all pages
|
|
165
|
+
key_size: usize, // bytes per key
|
|
166
|
+
value_size: usize, // bytes per value
|
|
167
|
+
keys_count: usize, // keys per page (in the summary)
|
|
168
|
+
values_count: usize, // values per page
|
|
169
|
+
searches: usize,
|
|
170
|
+
};
|
|
171
|
+
|
|
172
|
+
fn Value(comptime layout: Layout) type {
|
|
173
|
+
return struct {
|
|
174
|
+
pub const max_key = 1 << (8 * layout.key_size) - 1;
|
|
175
|
+
pub const Key = math.IntFittingRange(0, max_key);
|
|
176
|
+
const Self = @This();
|
|
177
|
+
key: Key,
|
|
178
|
+
body: [layout.value_size - layout.key_size]u8,
|
|
179
|
+
|
|
180
|
+
comptime {
|
|
181
|
+
assert(@sizeOf(Key) == layout.key_size);
|
|
182
|
+
assert(@sizeOf(Self) == layout.value_size);
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
inline fn key_from_value(self: Self) Key {
|
|
186
|
+
return self.key;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
inline fn key_from_key(x: Key) Key {
|
|
190
|
+
return x;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
inline fn key_compare(a: Key, b: Key) math.Order {
|
|
194
|
+
return math.order(a, b);
|
|
195
|
+
}
|
|
196
|
+
};
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
const BenchmarkResult = struct {
|
|
200
|
+
wall_time: u64, // nanoseconds
|
|
201
|
+
utime: u64, // nanoseconds
|
|
202
|
+
cpu_cycles: usize,
|
|
203
|
+
instructions: usize,
|
|
204
|
+
cache_references: usize,
|
|
205
|
+
cache_misses: usize,
|
|
206
|
+
branch_misses: usize,
|
|
207
|
+
};
|
|
208
|
+
|
|
209
|
+
const PERF = perf.PERF;
|
|
210
|
+
const perf_event_attr = perf.perf_event_attr;
|
|
211
|
+
const perf_event_open = perf.perf_event_open;
|
|
212
|
+
const perf_counters = [_]PERF.COUNT.HW{
|
|
213
|
+
PERF.COUNT.HW.CPU_CYCLES,
|
|
214
|
+
PERF.COUNT.HW.INSTRUCTIONS,
|
|
215
|
+
PERF.COUNT.HW.CACHE_REFERENCES,
|
|
216
|
+
PERF.COUNT.HW.CACHE_MISSES,
|
|
217
|
+
PERF.COUNT.HW.BRANCH_MISSES,
|
|
218
|
+
};
|
|
219
|
+
|
|
220
|
+
const Benchmark = struct {
|
|
221
|
+
timer: std.time.Timer,
|
|
222
|
+
rusage: std.os.rusage,
|
|
223
|
+
perf_fds: [perf_counters.len]std.os.fd_t,
|
|
224
|
+
|
|
225
|
+
fn begin() !Benchmark {
|
|
226
|
+
const flags = PERF.FLAG.FD_NO_GROUP;
|
|
227
|
+
var perf_fds = [1]std.os.fd_t{-1} ** perf_counters.len;
|
|
228
|
+
for (perf_counters) |counter, i| {
|
|
229
|
+
var attr: perf_event_attr = .{
|
|
230
|
+
.type = PERF.TYPE.HARDWARE,
|
|
231
|
+
.config = @enumToInt(counter),
|
|
232
|
+
.flags = .{
|
|
233
|
+
.disabled = true,
|
|
234
|
+
.exclude_kernel = true,
|
|
235
|
+
.exclude_hv = true,
|
|
236
|
+
},
|
|
237
|
+
};
|
|
238
|
+
perf_fds[i] = try perf_event_open(&attr, 0, -1, perf_fds[0], PERF.FLAG.FD_CLOEXEC);
|
|
239
|
+
}
|
|
240
|
+
const err = std.os.linux.ioctl(perf_fds[0], PERF.EVENT_IOC.ENABLE, PERF.IOC_FLAG_GROUP);
|
|
241
|
+
if (err == -1) return error.Unexpected;
|
|
242
|
+
|
|
243
|
+
// Start the wall clock after perf, since setup is slow.
|
|
244
|
+
const timer = try std.time.Timer.start();
|
|
245
|
+
return Benchmark{
|
|
246
|
+
.timer = timer,
|
|
247
|
+
// TODO pass std.os.linux.rusage.SELF once Zig is upgraded
|
|
248
|
+
.rusage = std.os.getrusage(0),
|
|
249
|
+
.perf_fds = perf_fds,
|
|
250
|
+
};
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
fn end(self: *Benchmark, samples: usize) !BenchmarkResult {
|
|
254
|
+
defer {
|
|
255
|
+
for (perf_counters) |_, i| {
|
|
256
|
+
std.os.close(self.perf_fds[i]);
|
|
257
|
+
self.perf_fds[i] = -1;
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
const rusage = std.os.getrusage(0);
|
|
262
|
+
const err = std.os.linux.ioctl(self.perf_fds[0], PERF.EVENT_IOC.DISABLE, PERF.IOC_FLAG_GROUP);
|
|
263
|
+
if (err == -1) return error.Unexpected;
|
|
264
|
+
return BenchmarkResult{
|
|
265
|
+
.wall_time = self.timer.read() / samples,
|
|
266
|
+
.utime = (timeval_to_ns(rusage.utime) - timeval_to_ns(self.rusage.utime)) / samples,
|
|
267
|
+
.cpu_cycles = (try readPerfFd(self.perf_fds[0])) / samples,
|
|
268
|
+
.instructions = (try readPerfFd(self.perf_fds[1])) / samples,
|
|
269
|
+
.cache_references = (try readPerfFd(self.perf_fds[2])) / samples,
|
|
270
|
+
.cache_misses = (try readPerfFd(self.perf_fds[3])) / samples,
|
|
271
|
+
.branch_misses = (try readPerfFd(self.perf_fds[4])) / samples,
|
|
272
|
+
};
|
|
273
|
+
}
|
|
274
|
+
};
|
|
275
|
+
|
|
276
|
+
// shuffle([0,1,…,n-1])
|
|
277
|
+
fn shuffled_index(comptime n: usize, rand: *std.rand.Random) [n]usize {
|
|
278
|
+
var indices: [n]usize = undefined;
|
|
279
|
+
for (indices) |*i, j| i.* = j;
|
|
280
|
+
rand.shuffle(usize, indices[0..]);
|
|
281
|
+
return indices;
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
fn timeval_to_ns(tv: std.os.timeval) u64 {
|
|
285
|
+
const ns_per_us = std.time.ns_per_s / std.time.us_per_s;
|
|
286
|
+
return @bitCast(u64, tv.tv_sec) * std.time.ns_per_s +
|
|
287
|
+
@bitCast(u64, tv.tv_usec) * ns_per_us;
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
fn readPerfFd(fd: std.os.fd_t) !usize {
|
|
291
|
+
var result: usize = 0;
|
|
292
|
+
const n = try std.os.read(fd, std.mem.asBytes(&result));
|
|
293
|
+
assert(n == @sizeOf(usize));
|
|
294
|
+
|
|
295
|
+
return result;
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
fn binary_search_keys(
|
|
299
|
+
comptime layout: Layout,
|
|
300
|
+
comptime Key: type,
|
|
301
|
+
comptime V: type,
|
|
302
|
+
comptime compare_keys: fn (Key, Key) math.Order,
|
|
303
|
+
keys: []const Key,
|
|
304
|
+
values: []const V,
|
|
305
|
+
key: Key,
|
|
306
|
+
) []const V {
|
|
307
|
+
assert(keys.len == layout.keys_count);
|
|
308
|
+
assert(values.len == layout.values_count);
|
|
309
|
+
|
|
310
|
+
const key_index = binary_search(Key, Key, V.key_from_key, compare_keys, keys, key);
|
|
311
|
+
const key_stride = layout.values_count / layout.keys_count;
|
|
312
|
+
const high = key_index * key_stride;
|
|
313
|
+
if (key_index < keys.len and keys[key_index] == key) {
|
|
314
|
+
return if (high == 0) values[0..1] else values[high - 1 .. high];
|
|
315
|
+
}
|
|
316
|
+
return values[high - key_stride .. high];
|
|
317
|
+
}
|
|
@@ -34,6 +34,10 @@ pub fn FIFO(comptime T: type) type {
|
|
|
34
34
|
return self.out;
|
|
35
35
|
}
|
|
36
36
|
|
|
37
|
+
pub fn empty(self: Self) bool {
|
|
38
|
+
return self.peek() == null;
|
|
39
|
+
}
|
|
40
|
+
|
|
37
41
|
/// Remove an element from the FIFO. Asserts that the element is
|
|
38
42
|
/// in the FIFO. This operation is O(N), if this is done often you
|
|
39
43
|
/// probably want a different data structure.
|
|
@@ -55,7 +59,7 @@ pub fn FIFO(comptime T: type) type {
|
|
|
55
59
|
};
|
|
56
60
|
}
|
|
57
61
|
|
|
58
|
-
test "push/pop/peek/remove" {
|
|
62
|
+
test "push/pop/peek/remove/empty" {
|
|
59
63
|
const testing = @import("std").testing;
|
|
60
64
|
|
|
61
65
|
const Foo = struct { next: ?*@This() = null };
|
|
@@ -65,34 +69,45 @@ test "push/pop/peek/remove" {
|
|
|
65
69
|
var three: Foo = .{};
|
|
66
70
|
|
|
67
71
|
var fifo: FIFO(Foo) = .{};
|
|
72
|
+
try testing.expect(fifo.empty());
|
|
68
73
|
|
|
69
74
|
fifo.push(&one);
|
|
75
|
+
try testing.expect(!fifo.empty());
|
|
70
76
|
try testing.expectEqual(@as(?*Foo, &one), fifo.peek());
|
|
71
77
|
|
|
72
78
|
fifo.push(&two);
|
|
73
79
|
fifo.push(&three);
|
|
80
|
+
try testing.expect(!fifo.empty());
|
|
74
81
|
try testing.expectEqual(@as(?*Foo, &one), fifo.peek());
|
|
75
82
|
|
|
76
83
|
fifo.remove(&one);
|
|
84
|
+
try testing.expect(!fifo.empty());
|
|
77
85
|
try testing.expectEqual(@as(?*Foo, &two), fifo.pop());
|
|
78
86
|
try testing.expectEqual(@as(?*Foo, &three), fifo.pop());
|
|
79
87
|
try testing.expectEqual(@as(?*Foo, null), fifo.pop());
|
|
88
|
+
try testing.expect(fifo.empty());
|
|
80
89
|
|
|
81
90
|
fifo.push(&one);
|
|
82
91
|
fifo.push(&two);
|
|
83
92
|
fifo.push(&three);
|
|
84
93
|
fifo.remove(&two);
|
|
94
|
+
try testing.expect(!fifo.empty());
|
|
85
95
|
try testing.expectEqual(@as(?*Foo, &one), fifo.pop());
|
|
86
96
|
try testing.expectEqual(@as(?*Foo, &three), fifo.pop());
|
|
87
97
|
try testing.expectEqual(@as(?*Foo, null), fifo.pop());
|
|
98
|
+
try testing.expect(fifo.empty());
|
|
88
99
|
|
|
89
100
|
fifo.push(&one);
|
|
90
101
|
fifo.push(&two);
|
|
91
102
|
fifo.push(&three);
|
|
92
103
|
fifo.remove(&three);
|
|
104
|
+
try testing.expect(!fifo.empty());
|
|
93
105
|
try testing.expectEqual(@as(?*Foo, &one), fifo.pop());
|
|
106
|
+
try testing.expect(!fifo.empty());
|
|
94
107
|
try testing.expectEqual(@as(?*Foo, &two), fifo.pop());
|
|
108
|
+
try testing.expect(fifo.empty());
|
|
95
109
|
try testing.expectEqual(@as(?*Foo, null), fifo.pop());
|
|
110
|
+
try testing.expect(fifo.empty());
|
|
96
111
|
|
|
97
112
|
fifo.push(&one);
|
|
98
113
|
fifo.push(&two);
|
|
@@ -101,4 +116,5 @@ test "push/pop/peek/remove" {
|
|
|
101
116
|
try testing.expectEqual(@as(?*Foo, &one), fifo.pop());
|
|
102
117
|
try testing.expectEqual(@as(?*Foo, &three), fifo.pop());
|
|
103
118
|
try testing.expectEqual(@as(?*Foo, null), fifo.pop());
|
|
119
|
+
try testing.expect(fifo.empty());
|
|
104
120
|
}
|
|
@@ -83,7 +83,7 @@ pub const IO = struct {
|
|
|
83
83
|
const change_events = self.flush_io(&events, &io_pending);
|
|
84
84
|
|
|
85
85
|
// Only call kevent() if we need to submit io events or if we need to wait for completions.
|
|
86
|
-
if (change_events > 0 or self.completed.
|
|
86
|
+
if (change_events > 0 or self.completed.empty()) {
|
|
87
87
|
// Zero timeouts for kevent() implies a non-blocking poll
|
|
88
88
|
var ts = std.mem.zeroes(os.timespec);
|
|
89
89
|
|
|
@@ -91,7 +91,7 @@ pub const IO = struct {
|
|
|
91
91
|
// We should never wait indefinitely (timeout_ptr = null for kevent) given:
|
|
92
92
|
// - tick() is non-blocking (wait_for_completions = false)
|
|
93
93
|
// - run_for_ns() always submits a timeout
|
|
94
|
-
if (change_events == 0 and self.completed.
|
|
94
|
+
if (change_events == 0 and self.completed.empty()) {
|
|
95
95
|
if (wait_for_completions) {
|
|
96
96
|
const timeout_ns = next_timeout orelse @panic("kevent() blocking forever");
|
|
97
97
|
ts.tv_nsec = @intCast(@TypeOf(ts.tv_nsec), timeout_ns % std.time.ns_per_s);
|
|
@@ -430,6 +430,7 @@ pub const IO = struct {
|
|
|
430
430
|
IsDir,
|
|
431
431
|
SystemResources,
|
|
432
432
|
Unseekable,
|
|
433
|
+
ConnectionTimedOut,
|
|
433
434
|
} || os.UnexpectedError;
|
|
434
435
|
|
|
435
436
|
pub fn read(
|
|
@@ -481,6 +482,7 @@ pub const IO = struct {
|
|
|
481
482
|
.NXIO => error.Unseekable,
|
|
482
483
|
.OVERFLOW => error.Unseekable,
|
|
483
484
|
.SPIPE => error.Unseekable,
|
|
485
|
+
.TIMEDOUT => error.ConnectionTimedOut,
|
|
484
486
|
else => |err| os.unexpectedErrno(err),
|
|
485
487
|
};
|
|
486
488
|
}
|
|
@@ -635,10 +637,12 @@ pub const IO = struct {
|
|
|
635
637
|
}
|
|
636
638
|
|
|
637
639
|
/// Opens a directory with read only access.
|
|
638
|
-
pub fn open_dir(dir_path: [
|
|
639
|
-
return os.
|
|
640
|
+
pub fn open_dir(dir_path: []const u8) !os.fd_t {
|
|
641
|
+
return os.open(dir_path, os.O.CLOEXEC | os.O.RDONLY, 0);
|
|
640
642
|
}
|
|
641
643
|
|
|
644
|
+
pub const INVALID_FILE: os.fd_t = -1;
|
|
645
|
+
|
|
642
646
|
/// Opens or creates a journal file:
|
|
643
647
|
/// - For reading and writing.
|
|
644
648
|
/// - For Direct I/O (required on darwin).
|
|
@@ -648,14 +652,11 @@ pub const IO = struct {
|
|
|
648
652
|
/// The caller is responsible for ensuring that the parent directory inode is durable.
|
|
649
653
|
/// - Verifies that the file size matches the expected file size before returning.
|
|
650
654
|
pub fn open_file(
|
|
651
|
-
self: *IO,
|
|
652
655
|
dir_fd: os.fd_t,
|
|
653
|
-
relative_path: [
|
|
656
|
+
relative_path: []const u8,
|
|
654
657
|
size: u64,
|
|
655
658
|
must_create: bool,
|
|
656
659
|
) !os.fd_t {
|
|
657
|
-
_ = self;
|
|
658
|
-
|
|
659
660
|
assert(relative_path.len > 0);
|
|
660
661
|
assert(size >= config.sector_size);
|
|
661
662
|
assert(size % config.sector_size == 0);
|
|
@@ -685,7 +686,7 @@ pub const IO = struct {
|
|
|
685
686
|
|
|
686
687
|
// Be careful with openat(2): "If pathname is absolute, then dirfd is ignored." (man page)
|
|
687
688
|
assert(!std.fs.path.isAbsolute(relative_path));
|
|
688
|
-
const fd = try os.
|
|
689
|
+
const fd = try os.openat(dir_fd, relative_path, flags, mode);
|
|
689
690
|
// TODO Return a proper error message when the path exists or does not exist (init/start).
|
|
690
691
|
errdefer os.close(fd);
|
|
691
692
|
|
|
@@ -720,8 +721,9 @@ pub const IO = struct {
|
|
|
720
721
|
// We always do this when opening because we don't know if this was done before crashing.
|
|
721
722
|
try fs_sync(dir_fd);
|
|
722
723
|
|
|
724
|
+
// TODO Document that `size` is now `data_file_size_min` from `main.zig`.
|
|
723
725
|
const stat = try os.fstat(fd);
|
|
724
|
-
if (stat.size
|
|
726
|
+
if (stat.size < size) @panic("data file inode size was truncated or corrupted");
|
|
725
727
|
|
|
726
728
|
return fd;
|
|
727
729
|
}
|
|
@@ -22,6 +22,14 @@ pub const IO = struct {
|
|
|
22
22
|
completed: FIFO(Completion) = .{},
|
|
23
23
|
|
|
24
24
|
pub fn init(entries: u12, flags: u32) !IO {
|
|
25
|
+
// Detect the linux version to ensure that we support all io_uring ops used.
|
|
26
|
+
const uts = std.os.uname();
|
|
27
|
+
const release = std.mem.sliceTo(&uts.release, 0);
|
|
28
|
+
const version = try std.builtin.Version.parse(release);
|
|
29
|
+
if (version.major < 5 or version.minor < 5) {
|
|
30
|
+
@panic("Linux kernel 5.5 or greater is required for io_uring OP_ACCEPT");
|
|
31
|
+
}
|
|
32
|
+
|
|
25
33
|
return IO{ .ring = try IO_Uring.init(entries, flags) };
|
|
26
34
|
}
|
|
27
35
|
|
|
@@ -334,6 +342,7 @@ pub const IO = struct {
|
|
|
334
342
|
.NXIO => error.Unseekable,
|
|
335
343
|
.OVERFLOW => error.Unseekable,
|
|
336
344
|
.SPIPE => error.Unseekable,
|
|
345
|
+
.TIMEDOUT => error.ConnectionTimedOut,
|
|
337
346
|
else => |errno| os.unexpectedErrno(errno),
|
|
338
347
|
};
|
|
339
348
|
break :blk err;
|
|
@@ -360,6 +369,8 @@ pub const IO = struct {
|
|
|
360
369
|
.NOTCONN => error.SocketNotConnected,
|
|
361
370
|
.NOTSOCK => error.FileDescriptorNotASocket,
|
|
362
371
|
.CONNRESET => error.ConnectionResetByPeer,
|
|
372
|
+
.TIMEDOUT => error.ConnectionTimedOut,
|
|
373
|
+
.OPNOTSUPP => error.OperationNotSupported,
|
|
363
374
|
else => |errno| os.unexpectedErrno(errno),
|
|
364
375
|
};
|
|
365
376
|
break :blk err;
|
|
@@ -394,6 +405,7 @@ pub const IO = struct {
|
|
|
394
405
|
.NOTSOCK => error.FileDescriptorNotASocket,
|
|
395
406
|
.OPNOTSUPP => error.OperationNotSupported,
|
|
396
407
|
.PIPE => error.BrokenPipe,
|
|
408
|
+
.TIMEDOUT => error.ConnectionTimedOut,
|
|
397
409
|
else => |errno| os.unexpectedErrno(errno),
|
|
398
410
|
};
|
|
399
411
|
break :blk err;
|
|
@@ -591,6 +603,7 @@ pub const IO = struct {
|
|
|
591
603
|
PermissionDenied,
|
|
592
604
|
ProtocolNotSupported,
|
|
593
605
|
ConnectionTimedOut,
|
|
606
|
+
SystemResources,
|
|
594
607
|
} || os.UnexpectedError;
|
|
595
608
|
|
|
596
609
|
pub fn connect(
|
|
@@ -637,6 +650,7 @@ pub const IO = struct {
|
|
|
637
650
|
IsDir,
|
|
638
651
|
SystemResources,
|
|
639
652
|
Unseekable,
|
|
653
|
+
ConnectionTimedOut,
|
|
640
654
|
} || os.UnexpectedError;
|
|
641
655
|
|
|
642
656
|
pub fn read(
|
|
@@ -683,6 +697,8 @@ pub const IO = struct {
|
|
|
683
697
|
SystemResources,
|
|
684
698
|
SocketNotConnected,
|
|
685
699
|
FileDescriptorNotASocket,
|
|
700
|
+
ConnectionTimedOut,
|
|
701
|
+
OperationNotSupported,
|
|
686
702
|
} || os.UnexpectedError;
|
|
687
703
|
|
|
688
704
|
pub fn recv(
|
|
@@ -733,6 +749,7 @@ pub const IO = struct {
|
|
|
733
749
|
FileDescriptorNotASocket,
|
|
734
750
|
OperationNotSupported,
|
|
735
751
|
BrokenPipe,
|
|
752
|
+
ConnectionTimedOut,
|
|
736
753
|
} || os.UnexpectedError;
|
|
737
754
|
|
|
738
755
|
pub fn send(
|
|
@@ -867,10 +884,12 @@ pub const IO = struct {
|
|
|
867
884
|
}
|
|
868
885
|
|
|
869
886
|
/// Opens a directory with read only access.
|
|
870
|
-
pub fn open_dir(dir_path: [
|
|
871
|
-
return os.
|
|
887
|
+
pub fn open_dir(dir_path: []const u8) !os.fd_t {
|
|
888
|
+
return os.open(dir_path, os.O.CLOEXEC | os.O.RDONLY, 0);
|
|
872
889
|
}
|
|
873
890
|
|
|
891
|
+
pub const INVALID_FILE: os.fd_t = -1;
|
|
892
|
+
|
|
874
893
|
/// Opens or creates a journal file:
|
|
875
894
|
/// - For reading and writing.
|
|
876
895
|
/// - For Direct I/O (if possible in development mode, but required in production mode).
|
|
@@ -880,14 +899,11 @@ pub const IO = struct {
|
|
|
880
899
|
/// The caller is responsible for ensuring that the parent directory inode is durable.
|
|
881
900
|
/// - Verifies that the file size matches the expected file size before returning.
|
|
882
901
|
pub fn open_file(
|
|
883
|
-
self: *IO,
|
|
884
902
|
dir_fd: os.fd_t,
|
|
885
|
-
relative_path: [
|
|
903
|
+
relative_path: []const u8,
|
|
886
904
|
size: u64,
|
|
887
905
|
must_create: bool,
|
|
888
906
|
) !os.fd_t {
|
|
889
|
-
_ = self;
|
|
890
|
-
|
|
891
907
|
assert(relative_path.len > 0);
|
|
892
908
|
assert(size >= config.sector_size);
|
|
893
909
|
assert(size % config.sector_size == 0);
|
|
@@ -929,7 +945,7 @@ pub const IO = struct {
|
|
|
929
945
|
|
|
930
946
|
// Be careful with openat(2): "If pathname is absolute, then dirfd is ignored." (man page)
|
|
931
947
|
assert(!std.fs.path.isAbsolute(relative_path));
|
|
932
|
-
const fd = try os.
|
|
948
|
+
const fd = try os.openat(dir_fd, relative_path, flags, mode);
|
|
933
949
|
// TODO Return a proper error message when the path exists or does not exist (init/start).
|
|
934
950
|
errdefer os.close(fd);
|
|
935
951
|
|
|
@@ -978,7 +994,7 @@ pub const IO = struct {
|
|
|
978
994
|
try os.fsync(dir_fd);
|
|
979
995
|
|
|
980
996
|
const stat = try os.fstat(fd);
|
|
981
|
-
if (stat.size
|
|
997
|
+
if (stat.size < size) @panic("data file inode size was truncated or corrupted");
|
|
982
998
|
|
|
983
999
|
return fd;
|
|
984
1000
|
}
|
|
@@ -995,7 +1011,7 @@ pub const IO = struct {
|
|
|
995
1011
|
defer dir.deleteFile(path) catch {};
|
|
996
1012
|
|
|
997
1013
|
while (true) {
|
|
998
|
-
const res = os.
|
|
1014
|
+
const res = os.linux.openat(dir_fd, path, os.O.CLOEXEC | os.O.RDONLY | os.O.DIRECT, 0);
|
|
999
1015
|
switch (os.linux.getErrno(res)) {
|
|
1000
1016
|
.SUCCESS => {
|
|
1001
1017
|
os.close(@intCast(os.fd_t, res));
|
|
@@ -737,6 +737,7 @@ pub const IO = struct {
|
|
|
737
737
|
IsDir,
|
|
738
738
|
SystemResources,
|
|
739
739
|
Unseekable,
|
|
740
|
+
ConnectionTimedOut,
|
|
740
741
|
} || os.UnexpectedError;
|
|
741
742
|
|
|
742
743
|
pub fn read(
|
|
@@ -918,11 +919,13 @@ pub const IO = struct {
|
|
|
918
919
|
}
|
|
919
920
|
|
|
920
921
|
/// Opens a directory with read only access.
|
|
921
|
-
pub fn open_dir(dir_path: [
|
|
922
|
-
const dir = try std.fs.cwd().
|
|
922
|
+
pub fn open_dir(dir_path: []const u8) !os.fd_t {
|
|
923
|
+
const dir = try std.fs.cwd().openDir(dir_path, .{});
|
|
923
924
|
return dir.fd;
|
|
924
925
|
}
|
|
925
926
|
|
|
927
|
+
pub const INVALID_FILE = os.windows.INVALID_HANDLE_VALUE;
|
|
928
|
+
|
|
926
929
|
/// Opens or creates a journal file:
|
|
927
930
|
/// - For reading and writing.
|
|
928
931
|
/// - For Direct I/O (required on windows).
|
|
@@ -932,14 +935,11 @@ pub const IO = struct {
|
|
|
932
935
|
/// The caller is responsible for ensuring that the parent directory inode is durable.
|
|
933
936
|
/// - Verifies that the file size matches the expected file size before returning.
|
|
934
937
|
pub fn open_file(
|
|
935
|
-
self: *IO,
|
|
936
938
|
dir_handle: os.fd_t,
|
|
937
|
-
relative_path: [
|
|
939
|
+
relative_path: []const u8,
|
|
938
940
|
size: u64,
|
|
939
941
|
must_create: bool,
|
|
940
942
|
) !os.fd_t {
|
|
941
|
-
_ = self;
|
|
942
|
-
|
|
943
943
|
assert(relative_path.len > 0);
|
|
944
944
|
assert(size >= config.sector_size);
|
|
945
945
|
assert(size % config.sector_size == 0);
|
|
@@ -988,8 +988,12 @@ pub const IO = struct {
|
|
|
988
988
|
|
|
989
989
|
if (handle == os.windows.INVALID_HANDLE_VALUE) {
|
|
990
990
|
return switch (os.windows.kernel32.GetLastError()) {
|
|
991
|
-
.
|
|
992
|
-
|
|
991
|
+
.FILE_NOT_FOUND => error.FileNotFound,
|
|
992
|
+
.SHARING_VIOLATION, .ACCESS_DENIED => error.AccessDenied,
|
|
993
|
+
else => |err| {
|
|
994
|
+
log.warn("CreateFileW(): {}", .{err});
|
|
995
|
+
return os.windows.unexpectedError(err);
|
|
996
|
+
},
|
|
993
997
|
};
|
|
994
998
|
}
|
|
995
999
|
|
|
@@ -1034,7 +1038,7 @@ pub const IO = struct {
|
|
|
1034
1038
|
_ = dir_handle;
|
|
1035
1039
|
|
|
1036
1040
|
const file_size = try os.windows.GetFileSizeEx(handle);
|
|
1037
|
-
if (file_size
|
|
1041
|
+
if (file_size < size) @panic("data file inode size was truncated or corrupted");
|
|
1038
1042
|
|
|
1039
1043
|
return handle;
|
|
1040
1044
|
}
|