tigerbeetle-node 0.8.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +47 -47
- package/dist/benchmark.js +15 -15
- package/dist/benchmark.js.map +1 -1
- package/dist/index.d.ts +66 -61
- package/dist/index.js +66 -61
- package/dist/index.js.map +1 -1
- package/dist/test.js +1 -1
- package/dist/test.js.map +1 -1
- package/package.json +14 -16
- package/scripts/download_node_headers.sh +3 -1
- package/src/index.ts +5 -0
- package/src/node.zig +18 -19
- package/src/tigerbeetle/scripts/benchmark.bat +47 -46
- package/src/tigerbeetle/scripts/benchmark.sh +25 -10
- package/src/tigerbeetle/scripts/install.sh +2 -1
- package/src/tigerbeetle/scripts/install_zig.bat +109 -109
- package/src/tigerbeetle/scripts/install_zig.sh +18 -18
- package/src/tigerbeetle/scripts/upgrade_ubuntu_kernel.sh +12 -3
- package/src/tigerbeetle/scripts/vopr.bat +47 -47
- package/src/tigerbeetle/scripts/vopr.sh +5 -5
- package/src/tigerbeetle/src/benchmark.zig +17 -9
- package/src/tigerbeetle/src/benchmark_array_search.zig +317 -0
- package/src/tigerbeetle/src/benchmarks/perf.zig +299 -0
- package/src/tigerbeetle/src/c/tb_client/context.zig +103 -0
- package/src/tigerbeetle/src/c/tb_client/packet.zig +80 -0
- package/src/tigerbeetle/src/c/tb_client/signal.zig +288 -0
- package/src/tigerbeetle/src/c/tb_client/thread.zig +329 -0
- package/src/tigerbeetle/src/c/tb_client.h +201 -0
- package/src/tigerbeetle/src/c/tb_client.zig +101 -0
- package/src/tigerbeetle/src/c/test.zig +1 -0
- package/src/tigerbeetle/src/cli.zig +142 -83
- package/src/tigerbeetle/src/config.zig +136 -23
- package/src/tigerbeetle/src/demo.zig +12 -8
- package/src/tigerbeetle/src/demo_03_create_transfers.zig +3 -3
- package/src/tigerbeetle/src/demo_04_create_pending_transfers.zig +10 -10
- package/src/tigerbeetle/src/demo_05_post_pending_transfers.zig +7 -7
- package/src/tigerbeetle/src/demo_06_void_pending_transfers.zig +3 -3
- package/src/tigerbeetle/src/demo_07_lookup_transfers.zig +1 -1
- package/src/tigerbeetle/src/ewah.zig +318 -0
- package/src/tigerbeetle/src/ewah_benchmark.zig +121 -0
- package/src/tigerbeetle/src/eytzinger_benchmark.zig +317 -0
- package/src/tigerbeetle/src/fifo.zig +17 -1
- package/src/tigerbeetle/src/io/darwin.zig +12 -10
- package/src/tigerbeetle/src/io/linux.zig +25 -9
- package/src/tigerbeetle/src/io/windows.zig +13 -9
- package/src/tigerbeetle/src/iops.zig +101 -0
- package/src/tigerbeetle/src/lsm/binary_search.zig +214 -0
- package/src/tigerbeetle/src/lsm/bloom_filter.zig +82 -0
- package/src/tigerbeetle/src/lsm/compaction.zig +603 -0
- package/src/tigerbeetle/src/lsm/composite_key.zig +75 -0
- package/src/tigerbeetle/src/lsm/direction.zig +11 -0
- package/src/tigerbeetle/src/lsm/eytzinger.zig +587 -0
- package/src/tigerbeetle/src/lsm/forest.zig +630 -0
- package/src/tigerbeetle/src/lsm/grid.zig +473 -0
- package/src/tigerbeetle/src/lsm/groove.zig +939 -0
- package/src/tigerbeetle/src/lsm/k_way_merge.zig +452 -0
- package/src/tigerbeetle/src/lsm/level_iterator.zig +296 -0
- package/src/tigerbeetle/src/lsm/manifest.zig +680 -0
- package/src/tigerbeetle/src/lsm/manifest_level.zig +1169 -0
- package/src/tigerbeetle/src/lsm/manifest_log.zig +904 -0
- package/src/tigerbeetle/src/lsm/node_pool.zig +231 -0
- package/src/tigerbeetle/src/lsm/posted_groove.zig +399 -0
- package/src/tigerbeetle/src/lsm/segmented_array.zig +998 -0
- package/src/tigerbeetle/src/lsm/set_associative_cache.zig +844 -0
- package/src/tigerbeetle/src/lsm/table.zig +932 -0
- package/src/tigerbeetle/src/lsm/table_immutable.zig +196 -0
- package/src/tigerbeetle/src/lsm/table_iterator.zig +295 -0
- package/src/tigerbeetle/src/lsm/table_mutable.zig +123 -0
- package/src/tigerbeetle/src/lsm/test.zig +429 -0
- package/src/tigerbeetle/src/lsm/tree.zig +1085 -0
- package/src/tigerbeetle/src/main.zig +121 -95
- package/src/tigerbeetle/src/message_bus.zig +49 -48
- package/src/tigerbeetle/src/message_pool.zig +19 -3
- package/src/tigerbeetle/src/ring_buffer.zig +172 -31
- package/src/tigerbeetle/src/simulator.zig +171 -43
- package/src/tigerbeetle/src/state_machine.zig +1026 -599
- package/src/tigerbeetle/src/storage.zig +46 -16
- package/src/tigerbeetle/src/test/cluster.zig +257 -78
- package/src/tigerbeetle/src/test/message_bus.zig +15 -24
- package/src/tigerbeetle/src/test/network.zig +26 -17
- package/src/tigerbeetle/src/test/packet_simulator.zig +14 -1
- package/src/tigerbeetle/src/test/state_checker.zig +10 -6
- package/src/tigerbeetle/src/test/state_machine.zig +159 -68
- package/src/tigerbeetle/src/test/storage.zig +137 -49
- package/src/tigerbeetle/src/tigerbeetle.zig +5 -0
- package/src/tigerbeetle/src/unit_tests.zig +8 -0
- package/src/tigerbeetle/src/util.zig +51 -0
- package/src/tigerbeetle/src/vsr/client.zig +21 -7
- package/src/tigerbeetle/src/vsr/journal.zig +1429 -514
- package/src/tigerbeetle/src/vsr/replica.zig +1855 -550
- package/src/tigerbeetle/src/vsr/superblock.zig +1743 -0
- package/src/tigerbeetle/src/vsr/superblock_client_table.zig +258 -0
- package/src/tigerbeetle/src/vsr/superblock_free_set.zig +644 -0
- package/src/tigerbeetle/src/vsr/superblock_manifest.zig +546 -0
- package/src/tigerbeetle/src/vsr.zig +134 -52
- package/.yarn/releases/yarn-berry.cjs +0 -55
- package/.yarnrc.yml +0 -1
- package/scripts/postinstall.sh +0 -6
- package/yarn.lock +0 -42
|
@@ -9,8 +9,10 @@ pub const log_level: std.log.Level = .err;
|
|
|
9
9
|
const cli = @import("cli.zig");
|
|
10
10
|
const IO = @import("io.zig").IO;
|
|
11
11
|
|
|
12
|
+
const Storage = @import("storage.zig").Storage;
|
|
13
|
+
const MessagePool = @import("message_pool.zig").MessagePool;
|
|
12
14
|
const MessageBus = @import("message_bus.zig").MessageBusClient;
|
|
13
|
-
const StateMachine = @import("state_machine.zig").
|
|
15
|
+
const StateMachine = @import("state_machine.zig").StateMachineType(Storage);
|
|
14
16
|
const RingBuffer = @import("ring_buffer.zig").RingBuffer;
|
|
15
17
|
|
|
16
18
|
const vsr = @import("vsr.zig");
|
|
@@ -76,22 +78,28 @@ pub fn main() !void {
|
|
|
76
78
|
var address = [_]std.net.Address{try std.net.Address.parseIp4("127.0.0.1", config.port)};
|
|
77
79
|
|
|
78
80
|
var io = try IO.init(32, 0);
|
|
79
|
-
|
|
80
|
-
|
|
81
|
+
defer io.deinit();
|
|
82
|
+
|
|
83
|
+
var message_pool = try MessagePool.init(allocator, .client);
|
|
84
|
+
defer message_pool.deinit(allocator);
|
|
81
85
|
|
|
82
86
|
var client = try Client.init(
|
|
83
87
|
allocator,
|
|
84
88
|
client_id,
|
|
85
89
|
cluster_id,
|
|
86
90
|
@intCast(u8, address.len),
|
|
87
|
-
&
|
|
91
|
+
&message_pool,
|
|
92
|
+
.{
|
|
93
|
+
.configuration = address[0..],
|
|
94
|
+
.io = &io,
|
|
95
|
+
},
|
|
88
96
|
);
|
|
89
|
-
defer client.deinit();
|
|
90
|
-
|
|
91
|
-
message_bus.set_on_message(*Client, &client, Client.on_message);
|
|
97
|
+
defer client.deinit(allocator);
|
|
92
98
|
|
|
93
99
|
// Pre-allocate a million transfers:
|
|
94
|
-
const transfers = try
|
|
100
|
+
const transfers = try allocator.alloc(tb.Transfer, transfers_max);
|
|
101
|
+
defer allocator.free(transfers);
|
|
102
|
+
|
|
95
103
|
for (transfers) |*transfer, index| {
|
|
96
104
|
transfer.* = .{
|
|
97
105
|
.id = index,
|
|
@@ -158,7 +166,7 @@ const TimedQueue = struct {
|
|
|
158
166
|
transfers_latency_max: i64,
|
|
159
167
|
client: *Client,
|
|
160
168
|
io: *IO,
|
|
161
|
-
batches: RingBuffer(Batch, batches_count),
|
|
169
|
+
batches: RingBuffer(Batch, batches_count, .array),
|
|
162
170
|
|
|
163
171
|
pub fn init(client: *Client, io: *IO) TimedQueue {
|
|
164
172
|
var self = TimedQueue{
|
|
@@ -0,0 +1,317 @@
|
|
|
1
|
+
const std = @import("std");
|
|
2
|
+
const assert = std.debug.assert;
|
|
3
|
+
const math = std.math;
|
|
4
|
+
|
|
5
|
+
const binary_search = @import("./binary_search.zig").binary_search;
|
|
6
|
+
const eytzinger = @import("./eytzinger.zig").eytzinger;
|
|
7
|
+
const perf = @import("./benchmarks/perf.zig");
|
|
8
|
+
|
|
9
|
+
const GiB = 1 << 30;
|
|
10
|
+
const searches = 500_000;
|
|
11
|
+
|
|
12
|
+
const kv_types = .{
|
|
13
|
+
.{ .key_size = 8, .value_size = 128 },
|
|
14
|
+
.{ .key_size = 8, .value_size = 64 },
|
|
15
|
+
.{ .key_size = 16, .value_size = 16 },
|
|
16
|
+
.{ .key_size = 32, .value_size = 32 },
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
// keys_per_summary = values_per_page / summary_fraction
|
|
20
|
+
const summary_fractions = .{ 4, 8, 16, 32 };
|
|
21
|
+
const values_per_page = .{ 128, 256, 512, 1024, 2048, 4096, 8192 };
|
|
22
|
+
const body_fmt = "{:_>2}B/{:_>3}B {:_>4}/{:_>4} {s}{s}: WT={:_>6}ns UT={:_>6}ns" ++
|
|
23
|
+
" CY={:_>6} IN={:_>6} CR={:_>5} CM={:_>5} BM={}\n";
|
|
24
|
+
|
|
25
|
+
const summary_sizes = blk: {
|
|
26
|
+
var sizes: [values_per_page.len][summary_fractions.len]usize = undefined;
|
|
27
|
+
for (values_per_page) |values_count, v| {
|
|
28
|
+
for (summary_fractions) |fraction, k| {
|
|
29
|
+
// Set in reverse order so that the summary sizes ascend.
|
|
30
|
+
sizes[v][summary_fractions.len - k - 1] = values_count / fraction;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
break :blk sizes;
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
pub fn main() !void {
|
|
37
|
+
std.log.info("Samples: {}", .{searches});
|
|
38
|
+
std.log.info("WT: Wall time/search", .{});
|
|
39
|
+
std.log.info("UT: utime time/search", .{});
|
|
40
|
+
std.log.info("CY: CPU cycles/search", .{});
|
|
41
|
+
std.log.info("IN: instructions/search", .{});
|
|
42
|
+
std.log.info("CR: cache references/search", .{});
|
|
43
|
+
std.log.info("CM: cache misses/search", .{});
|
|
44
|
+
std.log.info("BM: branch misses/search", .{});
|
|
45
|
+
|
|
46
|
+
var seed: u64 = undefined;
|
|
47
|
+
try std.os.getrandom(std.mem.asBytes(&seed));
|
|
48
|
+
var prng = std.rand.DefaultPrng.init(seed);
|
|
49
|
+
|
|
50
|
+
// Allocate on the heap just once.
|
|
51
|
+
// All page allocations reuse this buffer to speed up the run time.
|
|
52
|
+
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
|
53
|
+
defer arena.deinit();
|
|
54
|
+
|
|
55
|
+
const blob_size = GiB;
|
|
56
|
+
var blob = try arena.allocator.alloc(u8, blob_size);
|
|
57
|
+
|
|
58
|
+
inline for (kv_types) |kv| {
|
|
59
|
+
inline for (values_per_page) |values_count, v| {
|
|
60
|
+
inline for (summary_sizes[v]) |keys_count| {
|
|
61
|
+
try run_benchmark(.{
|
|
62
|
+
.blob_size = blob_size,
|
|
63
|
+
.key_size = kv.key_size,
|
|
64
|
+
.value_size = kv.value_size,
|
|
65
|
+
.keys_count = keys_count,
|
|
66
|
+
.values_count = values_count,
|
|
67
|
+
.searches = searches,
|
|
68
|
+
}, blob, &prng.random);
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
fn run_benchmark(comptime layout: Layout, blob: []u8, random: *std.rand.Random) !void {
|
|
75
|
+
assert(blob.len == layout.blob_size);
|
|
76
|
+
const Eytzinger = eytzinger(layout.keys_count - 1, layout.values_count);
|
|
77
|
+
const V = Value(layout);
|
|
78
|
+
const K = V.Key;
|
|
79
|
+
const Page = struct {
|
|
80
|
+
keys: [layout.keys_count]K,
|
|
81
|
+
values: [layout.values_count]V,
|
|
82
|
+
};
|
|
83
|
+
const page_count = layout.blob_size / @sizeOf(Page);
|
|
84
|
+
|
|
85
|
+
// Search pages and keys in random order.
|
|
86
|
+
var page_picker = shuffled_index(page_count, random);
|
|
87
|
+
var value_picker = shuffled_index(layout.values_count, random);
|
|
88
|
+
|
|
89
|
+
// Generate 1GiB worth of 24KiB pages.
|
|
90
|
+
var blob_alloc = std.heap.FixedBufferAllocator.init(blob);
|
|
91
|
+
var pages = try blob_alloc.allocator.alloc(Page, page_count);
|
|
92
|
+
random.bytes(std.mem.sliceAsBytes(pages));
|
|
93
|
+
for (pages) |*page| {
|
|
94
|
+
for (page.values) |*value, i| value.key = i;
|
|
95
|
+
Eytzinger.layout_from_keys_or_values(K, V, V.key_from_value, V.max_key, &page.values, &page.keys);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
const stdout = std.io.getStdOut().writer();
|
|
99
|
+
{
|
|
100
|
+
var benchmark = try Benchmark.begin();
|
|
101
|
+
var i: usize = 0;
|
|
102
|
+
var v: usize = 0;
|
|
103
|
+
while (i < layout.searches) : (i += 1) {
|
|
104
|
+
const page_index = page_picker[i % page_picker.len];
|
|
105
|
+
const target = value_picker[v % value_picker.len];
|
|
106
|
+
const page = &pages[page_index];
|
|
107
|
+
const bounds = Eytzinger.search_values(K, V, V.key_compare, &page.keys, &page.values, target);
|
|
108
|
+
const hit = bounds[binary_search(K, V, V.key_from_value, V.key_compare, bounds, target)];
|
|
109
|
+
|
|
110
|
+
assert(hit.key == target);
|
|
111
|
+
if (i % pages.len == 0) v += 1;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
const result = try benchmark.end(layout.searches);
|
|
115
|
+
try stdout.print(body_fmt, .{
|
|
116
|
+
layout.key_size,
|
|
117
|
+
layout.value_size,
|
|
118
|
+
layout.keys_count,
|
|
119
|
+
layout.values_count,
|
|
120
|
+
"E",
|
|
121
|
+
"B",
|
|
122
|
+
result.wall_time,
|
|
123
|
+
result.utime,
|
|
124
|
+
result.cpu_cycles,
|
|
125
|
+
result.instructions,
|
|
126
|
+
result.cache_references,
|
|
127
|
+
result.cache_misses,
|
|
128
|
+
result.branch_misses,
|
|
129
|
+
});
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
{
|
|
133
|
+
var benchmark = try Benchmark.begin();
|
|
134
|
+
var i: usize = 0;
|
|
135
|
+
var v: usize = 0;
|
|
136
|
+
while (i < layout.searches) : (i += 1) {
|
|
137
|
+
const target = value_picker[v % value_picker.len];
|
|
138
|
+
const page = &pages[page_picker[i % page_picker.len]];
|
|
139
|
+
const hit = page.values[binary_search(K, V, V.key_from_value, V.key_compare, page.values[0..], target)];
|
|
140
|
+
|
|
141
|
+
assert(hit.key == target);
|
|
142
|
+
if (i % pages.len == 0) v += 1;
|
|
143
|
+
}
|
|
144
|
+
const result = try benchmark.end(layout.searches);
|
|
145
|
+
try stdout.print(body_fmt, .{
|
|
146
|
+
layout.key_size,
|
|
147
|
+
layout.value_size,
|
|
148
|
+
layout.keys_count,
|
|
149
|
+
layout.values_count,
|
|
150
|
+
"_",
|
|
151
|
+
"B",
|
|
152
|
+
result.wall_time,
|
|
153
|
+
result.utime,
|
|
154
|
+
result.cpu_cycles,
|
|
155
|
+
result.instructions,
|
|
156
|
+
result.cache_references,
|
|
157
|
+
result.cache_misses,
|
|
158
|
+
result.branch_misses,
|
|
159
|
+
});
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
const Layout = struct {
|
|
164
|
+
blob_size: usize, // bytes allocated for all pages
|
|
165
|
+
key_size: usize, // bytes per key
|
|
166
|
+
value_size: usize, // bytes per value
|
|
167
|
+
keys_count: usize, // keys per page (in the summary)
|
|
168
|
+
values_count: usize, // values per page
|
|
169
|
+
searches: usize,
|
|
170
|
+
};
|
|
171
|
+
|
|
172
|
+
fn Value(comptime layout: Layout) type {
|
|
173
|
+
return struct {
|
|
174
|
+
pub const max_key = 1 << (8 * layout.key_size) - 1;
|
|
175
|
+
pub const Key = math.IntFittingRange(0, max_key);
|
|
176
|
+
const Self = @This();
|
|
177
|
+
key: Key,
|
|
178
|
+
body: [layout.value_size - layout.key_size]u8,
|
|
179
|
+
|
|
180
|
+
comptime {
|
|
181
|
+
assert(@sizeOf(Key) == layout.key_size);
|
|
182
|
+
assert(@sizeOf(Self) == layout.value_size);
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
inline fn key_from_value(self: Self) Key {
|
|
186
|
+
return self.key;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
inline fn key_from_key(x: Key) Key {
|
|
190
|
+
return x;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
inline fn key_compare(a: Key, b: Key) math.Order {
|
|
194
|
+
return math.order(a, b);
|
|
195
|
+
}
|
|
196
|
+
};
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
const BenchmarkResult = struct {
|
|
200
|
+
wall_time: u64, // nanoseconds
|
|
201
|
+
utime: u64, // nanoseconds
|
|
202
|
+
cpu_cycles: usize,
|
|
203
|
+
instructions: usize,
|
|
204
|
+
cache_references: usize,
|
|
205
|
+
cache_misses: usize,
|
|
206
|
+
branch_misses: usize,
|
|
207
|
+
};
|
|
208
|
+
|
|
209
|
+
const PERF = perf.PERF;
|
|
210
|
+
const perf_event_attr = perf.perf_event_attr;
|
|
211
|
+
const perf_event_open = perf.perf_event_open;
|
|
212
|
+
const perf_counters = [_]PERF.COUNT.HW{
|
|
213
|
+
PERF.COUNT.HW.CPU_CYCLES,
|
|
214
|
+
PERF.COUNT.HW.INSTRUCTIONS,
|
|
215
|
+
PERF.COUNT.HW.CACHE_REFERENCES,
|
|
216
|
+
PERF.COUNT.HW.CACHE_MISSES,
|
|
217
|
+
PERF.COUNT.HW.BRANCH_MISSES,
|
|
218
|
+
};
|
|
219
|
+
|
|
220
|
+
const Benchmark = struct {
|
|
221
|
+
timer: std.time.Timer,
|
|
222
|
+
rusage: std.os.rusage,
|
|
223
|
+
perf_fds: [perf_counters.len]std.os.fd_t,
|
|
224
|
+
|
|
225
|
+
fn begin() !Benchmark {
|
|
226
|
+
const flags = PERF.FLAG.FD_NO_GROUP;
|
|
227
|
+
var perf_fds = [1]std.os.fd_t{-1} ** perf_counters.len;
|
|
228
|
+
for (perf_counters) |counter, i| {
|
|
229
|
+
var attr: perf_event_attr = .{
|
|
230
|
+
.type = PERF.TYPE.HARDWARE,
|
|
231
|
+
.config = @enumToInt(counter),
|
|
232
|
+
.flags = .{
|
|
233
|
+
.disabled = true,
|
|
234
|
+
.exclude_kernel = true,
|
|
235
|
+
.exclude_hv = true,
|
|
236
|
+
},
|
|
237
|
+
};
|
|
238
|
+
perf_fds[i] = try perf_event_open(&attr, 0, -1, perf_fds[0], PERF.FLAG.FD_CLOEXEC);
|
|
239
|
+
}
|
|
240
|
+
const err = std.os.linux.ioctl(perf_fds[0], PERF.EVENT_IOC.ENABLE, PERF.IOC_FLAG_GROUP);
|
|
241
|
+
if (err == -1) return error.Unexpected;
|
|
242
|
+
|
|
243
|
+
// Start the wall clock after perf, since setup is slow.
|
|
244
|
+
const timer = try std.time.Timer.start();
|
|
245
|
+
return Benchmark{
|
|
246
|
+
.timer = timer,
|
|
247
|
+
// TODO pass std.os.linux.rusage.SELF once Zig is upgraded
|
|
248
|
+
.rusage = std.os.getrusage(0),
|
|
249
|
+
.perf_fds = perf_fds,
|
|
250
|
+
};
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
fn end(self: *Benchmark, samples: usize) !BenchmarkResult {
|
|
254
|
+
defer {
|
|
255
|
+
for (perf_counters) |_, i| {
|
|
256
|
+
std.os.close(self.perf_fds[i]);
|
|
257
|
+
self.perf_fds[i] = -1;
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
const rusage = std.os.getrusage(0);
|
|
262
|
+
const err = std.os.linux.ioctl(self.perf_fds[0], PERF.EVENT_IOC.DISABLE, PERF.IOC_FLAG_GROUP);
|
|
263
|
+
if (err == -1) return error.Unexpected;
|
|
264
|
+
return BenchmarkResult{
|
|
265
|
+
.wall_time = self.timer.read() / samples,
|
|
266
|
+
.utime = (timeval_to_ns(rusage.utime) - timeval_to_ns(self.rusage.utime)) / samples,
|
|
267
|
+
.cpu_cycles = (try readPerfFd(self.perf_fds[0])) / samples,
|
|
268
|
+
.instructions = (try readPerfFd(self.perf_fds[1])) / samples,
|
|
269
|
+
.cache_references = (try readPerfFd(self.perf_fds[2])) / samples,
|
|
270
|
+
.cache_misses = (try readPerfFd(self.perf_fds[3])) / samples,
|
|
271
|
+
.branch_misses = (try readPerfFd(self.perf_fds[4])) / samples,
|
|
272
|
+
};
|
|
273
|
+
}
|
|
274
|
+
};
|
|
275
|
+
|
|
276
|
+
// shuffle([0,1,…,n-1])
|
|
277
|
+
fn shuffled_index(comptime n: usize, rand: *std.rand.Random) [n]usize {
|
|
278
|
+
var indices: [n]usize = undefined;
|
|
279
|
+
for (indices) |*i, j| i.* = j;
|
|
280
|
+
rand.shuffle(usize, indices[0..]);
|
|
281
|
+
return indices;
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
fn timeval_to_ns(tv: std.os.timeval) u64 {
|
|
285
|
+
const ns_per_us = std.time.ns_per_s / std.time.us_per_s;
|
|
286
|
+
return @bitCast(u64, tv.tv_sec) * std.time.ns_per_s +
|
|
287
|
+
@bitCast(u64, tv.tv_usec) * ns_per_us;
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
fn readPerfFd(fd: std.os.fd_t) !usize {
|
|
291
|
+
var result: usize = 0;
|
|
292
|
+
const n = try std.os.read(fd, std.mem.asBytes(&result));
|
|
293
|
+
assert(n == @sizeOf(usize));
|
|
294
|
+
|
|
295
|
+
return result;
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
fn binary_search_keys(
|
|
299
|
+
comptime layout: Layout,
|
|
300
|
+
comptime Key: type,
|
|
301
|
+
comptime V: type,
|
|
302
|
+
comptime compare_keys: fn (Key, Key) math.Order,
|
|
303
|
+
keys: []const Key,
|
|
304
|
+
values: []const V,
|
|
305
|
+
key: Key,
|
|
306
|
+
) []const V {
|
|
307
|
+
assert(keys.len == layout.keys_count);
|
|
308
|
+
assert(values.len == layout.values_count);
|
|
309
|
+
|
|
310
|
+
const key_index = binary_search(Key, Key, V.key_from_key, compare_keys, keys, key);
|
|
311
|
+
const key_stride = layout.values_count / layout.keys_count;
|
|
312
|
+
const high = key_index * key_stride;
|
|
313
|
+
if (key_index < keys.len and keys[key_index] == key) {
|
|
314
|
+
return if (high == 0) values[0..1] else values[high - 1 .. high];
|
|
315
|
+
}
|
|
316
|
+
return values[high - key_stride .. high];
|
|
317
|
+
}
|
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
// Copyright (c) 2015-2021, Zig contributors
|
|
2
|
+
// Backported from https://github.com/ziglang/zig/blob/master/lib/std/os/linux.zig
|
|
3
|
+
// TODO Remove this file once we upgrade to Zig 0.9.0.
|
|
4
|
+
const std = @import("std");
|
|
5
|
+
const pid_t = std.os.linux.pid_t;
|
|
6
|
+
const fd_t = std.os.linux.fd_t;
|
|
7
|
+
|
|
8
|
+
pub fn perf_event_open(
|
|
9
|
+
attr: *perf_event_attr,
|
|
10
|
+
pid: pid_t,
|
|
11
|
+
cpu: i32,
|
|
12
|
+
group_fd: fd_t,
|
|
13
|
+
flags: usize,
|
|
14
|
+
) !fd_t {
|
|
15
|
+
const rc = perf_event_open_internal(attr, pid, cpu, group_fd, flags);
|
|
16
|
+
const errno = std.os.errno(rc);
|
|
17
|
+
if (errno != 0) {
|
|
18
|
+
std.log.err("perf_event_open_internal errno={}", .{errno});
|
|
19
|
+
return error.Unexpected;
|
|
20
|
+
}
|
|
21
|
+
return @intCast(fd_t, rc);
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
fn perf_event_open_internal(
|
|
25
|
+
attr: *perf_event_attr,
|
|
26
|
+
pid: pid_t,
|
|
27
|
+
cpu: i32,
|
|
28
|
+
group_fd: fd_t,
|
|
29
|
+
flags: usize,
|
|
30
|
+
) usize {
|
|
31
|
+
return std.os.linux.syscall5(
|
|
32
|
+
.perf_event_open,
|
|
33
|
+
@ptrToInt(attr),
|
|
34
|
+
@bitCast(usize, @as(isize, pid)),
|
|
35
|
+
@bitCast(usize, @as(isize, cpu)),
|
|
36
|
+
@bitCast(usize, @as(isize, group_fd)),
|
|
37
|
+
flags,
|
|
38
|
+
);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
pub const perf_event_attr = extern struct {
|
|
42
|
+
/// Major type: hardware/software/tracepoint/etc.
|
|
43
|
+
type: PERF.TYPE = undefined,
|
|
44
|
+
/// Size of the attr structure, for fwd/bwd compat.
|
|
45
|
+
size: u32 = @sizeOf(perf_event_attr),
|
|
46
|
+
/// Type specific configuration information.
|
|
47
|
+
config: u64 = 0,
|
|
48
|
+
|
|
49
|
+
sample_period_or_freq: u64 = 0,
|
|
50
|
+
sample_type: u64 = 0,
|
|
51
|
+
read_format: u64 = 0,
|
|
52
|
+
|
|
53
|
+
flags: packed struct {
|
|
54
|
+
/// off by default
|
|
55
|
+
disabled: bool = false,
|
|
56
|
+
/// children inherit it
|
|
57
|
+
inherit: bool = false,
|
|
58
|
+
/// must always be on PMU
|
|
59
|
+
pinned: bool = false,
|
|
60
|
+
/// only group on PMU
|
|
61
|
+
exclusive: bool = false,
|
|
62
|
+
/// don't count user
|
|
63
|
+
exclude_user: bool = false,
|
|
64
|
+
/// ditto kernel
|
|
65
|
+
exclude_kernel: bool = false,
|
|
66
|
+
/// ditto hypervisor
|
|
67
|
+
exclude_hv: bool = false,
|
|
68
|
+
/// don't count when idle
|
|
69
|
+
exclude_idle: bool = false,
|
|
70
|
+
/// include mmap data
|
|
71
|
+
mmap: bool = false,
|
|
72
|
+
/// include comm data
|
|
73
|
+
comm: bool = false,
|
|
74
|
+
/// use freq, not period
|
|
75
|
+
freq: bool = false,
|
|
76
|
+
/// per task counts
|
|
77
|
+
inherit_stat: bool = false,
|
|
78
|
+
/// next exec enables
|
|
79
|
+
enable_on_exec: bool = false,
|
|
80
|
+
/// trace fork/exit
|
|
81
|
+
task: bool = false,
|
|
82
|
+
/// wakeup_watermark
|
|
83
|
+
watermark: bool = false,
|
|
84
|
+
/// precise_ip:
|
|
85
|
+
///
|
|
86
|
+
/// 0 - SAMPLE_IP can have arbitrary skid
|
|
87
|
+
/// 1 - SAMPLE_IP must have constant skid
|
|
88
|
+
/// 2 - SAMPLE_IP requested to have 0 skid
|
|
89
|
+
/// 3 - SAMPLE_IP must have 0 skid
|
|
90
|
+
///
|
|
91
|
+
/// See also PERF_RECORD_MISC_EXACT_IP
|
|
92
|
+
/// skid constraint
|
|
93
|
+
precise_ip: u2 = 0,
|
|
94
|
+
/// non-exec mmap data
|
|
95
|
+
mmap_data: bool = false,
|
|
96
|
+
/// sample_type all events
|
|
97
|
+
sample_id_all: bool = false,
|
|
98
|
+
|
|
99
|
+
/// don't count in host
|
|
100
|
+
exclude_host: bool = false,
|
|
101
|
+
/// don't count in guest
|
|
102
|
+
exclude_guest: bool = false,
|
|
103
|
+
|
|
104
|
+
/// exclude kernel callchains
|
|
105
|
+
exclude_callchain_kernel: bool = false,
|
|
106
|
+
/// exclude user callchains
|
|
107
|
+
exclude_callchain_user: bool = false,
|
|
108
|
+
/// include mmap with inode data
|
|
109
|
+
mmap2: bool = false,
|
|
110
|
+
/// flag comm events that are due to an exec
|
|
111
|
+
comm_exec: bool = false,
|
|
112
|
+
/// use @clockid for time fields
|
|
113
|
+
use_clockid: bool = false,
|
|
114
|
+
/// context switch data
|
|
115
|
+
context_switch: bool = false,
|
|
116
|
+
/// Write ring buffer from end to beginning
|
|
117
|
+
write_backward: bool = false,
|
|
118
|
+
/// include namespaces data
|
|
119
|
+
namespaces: bool = false,
|
|
120
|
+
|
|
121
|
+
__reserved_1: u35 = 0,
|
|
122
|
+
} = .{},
|
|
123
|
+
/// wakeup every n events, or
|
|
124
|
+
/// bytes before wakeup
|
|
125
|
+
wakeup_events_or_watermark: u32 = 0,
|
|
126
|
+
|
|
127
|
+
bp_type: u32 = 0,
|
|
128
|
+
|
|
129
|
+
/// This field is also used for:
|
|
130
|
+
/// bp_addr
|
|
131
|
+
/// kprobe_func for perf_kprobe
|
|
132
|
+
/// uprobe_path for perf_uprobe
|
|
133
|
+
config1: u64 = 0,
|
|
134
|
+
/// This field is also used for:
|
|
135
|
+
/// bp_len
|
|
136
|
+
/// kprobe_addr when kprobe_func == null
|
|
137
|
+
/// probe_offset for perf_[k,u]probe
|
|
138
|
+
config2: u64 = 0,
|
|
139
|
+
|
|
140
|
+
/// enum perf_branch_sample_type
|
|
141
|
+
branch_sample_type: u64 = 0,
|
|
142
|
+
|
|
143
|
+
/// Defines set of user regs to dump on samples.
|
|
144
|
+
/// See asm/perf_regs.h for details.
|
|
145
|
+
sample_regs_user: u64 = 0,
|
|
146
|
+
|
|
147
|
+
/// Defines size of the user stack to dump on samples.
|
|
148
|
+
sample_stack_user: u32 = 0,
|
|
149
|
+
|
|
150
|
+
clockid: i32 = 0,
|
|
151
|
+
/// Defines set of regs to dump for each sample
|
|
152
|
+
/// state captured on:
|
|
153
|
+
/// - precise = 0: PMU interrupt
|
|
154
|
+
/// - precise > 0: sampled instruction
|
|
155
|
+
///
|
|
156
|
+
/// See asm/perf_regs.h for details.
|
|
157
|
+
sample_regs_intr: u64 = 0,
|
|
158
|
+
|
|
159
|
+
/// Wakeup watermark for AUX area
|
|
160
|
+
aux_watermark: u32 = 0,
|
|
161
|
+
sample_max_stack: u16 = 0,
|
|
162
|
+
/// Align to u64
|
|
163
|
+
__reserved_2: u16 = 0,
|
|
164
|
+
};
|
|
165
|
+
|
|
166
|
+
pub const PERF = struct {
|
|
167
|
+
pub const TYPE = enum(u32) {
|
|
168
|
+
HARDWARE,
|
|
169
|
+
SOFTWARE,
|
|
170
|
+
TRACEPOINT,
|
|
171
|
+
HW_CACHE,
|
|
172
|
+
RAW,
|
|
173
|
+
BREAKPOINT,
|
|
174
|
+
MAX,
|
|
175
|
+
};
|
|
176
|
+
|
|
177
|
+
pub const COUNT = struct {
|
|
178
|
+
pub const HW = enum(u32) {
|
|
179
|
+
CPU_CYCLES,
|
|
180
|
+
INSTRUCTIONS,
|
|
181
|
+
CACHE_REFERENCES,
|
|
182
|
+
CACHE_MISSES,
|
|
183
|
+
BRANCH_INSTRUCTIONS,
|
|
184
|
+
BRANCH_MISSES,
|
|
185
|
+
BUS_CYCLES,
|
|
186
|
+
STALLED_CYCLES_FRONTEND,
|
|
187
|
+
STALLED_CYCLES_BACKEND,
|
|
188
|
+
REF_CPU_CYCLES,
|
|
189
|
+
MAX,
|
|
190
|
+
|
|
191
|
+
pub const CACHE = enum(u32) {
|
|
192
|
+
L1D,
|
|
193
|
+
L1I,
|
|
194
|
+
LL,
|
|
195
|
+
DTLB,
|
|
196
|
+
ITLB,
|
|
197
|
+
BPU,
|
|
198
|
+
NODE,
|
|
199
|
+
MAX,
|
|
200
|
+
|
|
201
|
+
pub const OP = enum(u32) {
|
|
202
|
+
READ,
|
|
203
|
+
WRITE,
|
|
204
|
+
PREFETCH,
|
|
205
|
+
MAX,
|
|
206
|
+
};
|
|
207
|
+
|
|
208
|
+
pub const RESULT = enum(u32) {
|
|
209
|
+
ACCESS,
|
|
210
|
+
MISS,
|
|
211
|
+
MAX,
|
|
212
|
+
};
|
|
213
|
+
};
|
|
214
|
+
};
|
|
215
|
+
|
|
216
|
+
pub const SW = enum(u32) {
|
|
217
|
+
CPU_CLOCK,
|
|
218
|
+
TASK_CLOCK,
|
|
219
|
+
PAGE_FAULTS,
|
|
220
|
+
CONTEXT_SWITCHES,
|
|
221
|
+
CPU_MIGRATIONS,
|
|
222
|
+
PAGE_FAULTS_MIN,
|
|
223
|
+
PAGE_FAULTS_MAJ,
|
|
224
|
+
ALIGNMENT_FAULTS,
|
|
225
|
+
EMULATION_FAULTS,
|
|
226
|
+
DUMMY,
|
|
227
|
+
BPF_OUTPUT,
|
|
228
|
+
MAX,
|
|
229
|
+
};
|
|
230
|
+
};
|
|
231
|
+
|
|
232
|
+
pub const SAMPLE = struct {
|
|
233
|
+
pub const IP = 1;
|
|
234
|
+
pub const TID = 2;
|
|
235
|
+
pub const TIME = 4;
|
|
236
|
+
pub const ADDR = 8;
|
|
237
|
+
pub const READ = 16;
|
|
238
|
+
pub const CALLCHAIN = 32;
|
|
239
|
+
pub const ID = 64;
|
|
240
|
+
pub const CPU = 128;
|
|
241
|
+
pub const PERIOD = 256;
|
|
242
|
+
pub const STREAM_ID = 512;
|
|
243
|
+
pub const RAW = 1024;
|
|
244
|
+
pub const BRANCH_STACK = 2048;
|
|
245
|
+
pub const REGS_USER = 4096;
|
|
246
|
+
pub const STACK_USER = 8192;
|
|
247
|
+
pub const WEIGHT = 16384;
|
|
248
|
+
pub const DATA_SRC = 32768;
|
|
249
|
+
pub const IDENTIFIER = 65536;
|
|
250
|
+
pub const TRANSACTION = 131072;
|
|
251
|
+
pub const REGS_INTR = 262144;
|
|
252
|
+
pub const PHYS_ADDR = 524288;
|
|
253
|
+
pub const MAX = 1048576;
|
|
254
|
+
|
|
255
|
+
pub const BRANCH = struct {
|
|
256
|
+
pub const USER = 1 << 0;
|
|
257
|
+
pub const KERNEL = 1 << 1;
|
|
258
|
+
pub const HV = 1 << 2;
|
|
259
|
+
pub const ANY = 1 << 3;
|
|
260
|
+
pub const ANY_CALL = 1 << 4;
|
|
261
|
+
pub const ANY_RETURN = 1 << 5;
|
|
262
|
+
pub const IND_CALL = 1 << 6;
|
|
263
|
+
pub const ABORT_TX = 1 << 7;
|
|
264
|
+
pub const IN_TX = 1 << 8;
|
|
265
|
+
pub const NO_TX = 1 << 9;
|
|
266
|
+
pub const COND = 1 << 10;
|
|
267
|
+
pub const CALL_STACK = 1 << 11;
|
|
268
|
+
pub const IND_JUMP = 1 << 12;
|
|
269
|
+
pub const CALL = 1 << 13;
|
|
270
|
+
pub const NO_FLAGS = 1 << 14;
|
|
271
|
+
pub const NO_CYCLES = 1 << 15;
|
|
272
|
+
pub const TYPE_SAVE = 1 << 16;
|
|
273
|
+
pub const MAX = 1 << 17;
|
|
274
|
+
};
|
|
275
|
+
};
|
|
276
|
+
|
|
277
|
+
pub const FLAG = struct {
|
|
278
|
+
pub const FD_NO_GROUP = 1 << 0;
|
|
279
|
+
pub const FD_OUTPUT = 1 << 1;
|
|
280
|
+
pub const PID_CGROUP = 1 << 2;
|
|
281
|
+
pub const FD_CLOEXEC = 1 << 3;
|
|
282
|
+
};
|
|
283
|
+
|
|
284
|
+
pub const EVENT_IOC = struct {
|
|
285
|
+
pub const ENABLE = 9216;
|
|
286
|
+
pub const DISABLE = 9217;
|
|
287
|
+
pub const REFRESH = 9218;
|
|
288
|
+
pub const RESET = 9219;
|
|
289
|
+
pub const PERIOD = 1074275332;
|
|
290
|
+
pub const SET_OUTPUT = 9221;
|
|
291
|
+
pub const SET_FILTER = 1074275334;
|
|
292
|
+
pub const SET_BPF = 1074013192;
|
|
293
|
+
pub const PAUSE_OUTPUT = 1074013193;
|
|
294
|
+
pub const QUERY_BPF = 3221758986;
|
|
295
|
+
pub const MODIFY_ATTRIBUTES = 1074275339;
|
|
296
|
+
};
|
|
297
|
+
|
|
298
|
+
pub const IOC_FLAG_GROUP = 1;
|
|
299
|
+
};
|