cirron 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/cirron.rb ADDED
@@ -0,0 +1,2 @@
1
+ require_relative 'tracer'
2
+ require_relative 'collector'
data/lib/cirronlib.cpp ADDED
@@ -0,0 +1,321 @@
1
+ #include <stdio.h>
2
+ #include <string.h>
3
+ #include <sys/syscall.h>
4
+ #include <unistd.h>
5
+ #include <errno.h>
6
+ #include <sys/ioctl.h>
7
+ #include <stdint.h>
8
+ #include <stdlib.h>
9
+
10
+ #if defined(__linux__)
11
+ #include <linux/perf_event.h>
12
+ #elif defined(__APPLE__)
13
+ #include "apple_arm_events.h"
14
+ #endif
15
+
16
+ struct counter
17
+ {
18
+ uint64_t time_enabled_ns;
19
+ uint64_t instruction_count;
20
+ uint64_t branch_misses;
21
+ uint64_t page_faults;
22
+ };
23
+
24
+ extern "C"
25
+ {
26
+ int start();
27
+ int end(int fd, struct counter *out);
28
+ }
29
+
30
+ #if defined(__linux__)
31
+ struct read_format
32
+ {
33
+ uint64_t nr;
34
+ uint64_t time_enabled;
35
+ uint64_t time_running;
36
+ struct
37
+ {
38
+ uint64_t value;
39
+ } values[];
40
+ };
41
+
42
+ struct perf_event_config
43
+ {
44
+ uint64_t type;
45
+ uint64_t config;
46
+ };
47
+
48
+ struct perf_event_config events[] = {
49
+ {PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS},
50
+ {PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES},
51
+ {PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS},
52
+ // {PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES}, For whatever reason, these two always show 0
53
+ // {PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS},
54
+ };
55
+
56
+ const int NUM_EVENTS = sizeof(events) / sizeof(events[0]);
57
+ #elif defined(__APPLE__)
58
+ u64 counters_0[KPC_MAX_COUNTERS] = {0};
59
+ usize counter_map[KPC_MAX_COUNTERS] = {0};
60
+ #endif
61
+
62
+ int start()
63
+ {
64
+ #if defined(__linux__)
65
+ // Construct base perf_event_attr struct
66
+ struct perf_event_attr attr;
67
+ memset(&attr, 0, sizeof(attr));
68
+ attr.size = sizeof(attr);
69
+ attr.disabled = 1;
70
+ attr.exclude_kernel = 1;
71
+ attr.exclude_hv = 1;
72
+ attr.sample_period = 0;
73
+ attr.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING;
74
+
75
+ int group = -1;
76
+ int leader_fd;
77
+
78
+ // Enable every event in perf_event_config
79
+ for (int i = 0; i < NUM_EVENTS; i++)
80
+ {
81
+ attr.type = events[i].type;
82
+ attr.config = events[i].config;
83
+
84
+ int fd = syscall(SYS_perf_event_open, &attr, 0, -1, group, 0);
85
+ if (fd == -1)
86
+ {
87
+ fprintf(stderr, "Failed to open event %lu: %s.\n", events[i].config, strerror(errno));
88
+ return -1;
89
+ }
90
+
91
+ if (i == 0)
92
+ {
93
+ group = fd;
94
+ leader_fd = fd;
95
+ }
96
+ }
97
+
98
+ // Enable the event group
99
+ if (ioctl(leader_fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) == -1)
100
+ {
101
+ fprintf(stderr, "Failed to enable perf events: %s.\n", strerror(errno));
102
+ // Consider cleaning up previously opened file descriptors here
103
+ return -1;
104
+ }
105
+
106
+ return leader_fd;
107
+ #elif defined(__APPLE__)
108
+ // load dylib
109
+ if (!lib_init())
110
+ {
111
+ printf("Error: %s\n", lib_err_msg);
112
+ return 1;
113
+ }
114
+
115
+ // check permission
116
+ int force_ctrs = 0;
117
+ if (kpc_force_all_ctrs_get(&force_ctrs))
118
+ {
119
+ printf("Permission denied, xnu/kpc requires root privileges.\n");
120
+ return 1;
121
+ }
122
+
123
+ // load pmc db
124
+ int ret = 0;
125
+ kpep_db *db = NULL;
126
+ if ((ret = kpep_db_create("a9", &db)))
127
+ {
128
+ printf("Error: cannot load pmc database: %d.\n", ret);
129
+ return 1;
130
+ }
131
+
132
+ // create a config
133
+ kpep_config *cfg = NULL;
134
+ if ((ret = kpep_config_create(db, &cfg)))
135
+ {
136
+ printf("Failed to create kpep config: %d (%s).\n",
137
+ ret, kpep_config_error_desc(ret));
138
+ return 1;
139
+ }
140
+ if ((ret = kpep_config_force_counters(cfg)))
141
+ {
142
+ printf("Failed to force counters: %d (%s).\n",
143
+ ret, kpep_config_error_desc(ret));
144
+ return 1;
145
+ }
146
+
147
+ // get events
148
+ const usize ev_count = sizeof(profile_events) / sizeof(profile_events[0]);
149
+ kpep_event *ev_arr[ev_count] = {0};
150
+ for (usize i = 0; i < ev_count; i++)
151
+ {
152
+ const event_alias *alias = profile_events + i;
153
+ ev_arr[i] = get_event(db, alias);
154
+ if (!ev_arr[i])
155
+ {
156
+ printf("Cannot find event: %s.\n", alias->alias);
157
+ return 1;
158
+ }
159
+ }
160
+
161
+ // add event to config
162
+ for (usize i = 0; i < ev_count; i++)
163
+ {
164
+ kpep_event *ev = ev_arr[i];
165
+ if ((ret = kpep_config_add_event(cfg, &ev, 0, NULL)))
166
+ {
167
+ printf("Failed to add event: %d (%s).\n",
168
+ ret, kpep_config_error_desc(ret));
169
+ return 1;
170
+ }
171
+ }
172
+
173
+ // prepare buffer and config
174
+ u32 classes = 0;
175
+ usize reg_count = 0;
176
+ kpc_config_t regs[KPC_MAX_COUNTERS] = {0};
177
+ if ((ret = kpep_config_kpc_classes(cfg, &classes)))
178
+ {
179
+ printf("Failed get kpc classes: %d (%s).\n",
180
+ ret, kpep_config_error_desc(ret));
181
+ return 1;
182
+ }
183
+ if ((ret = kpep_config_kpc_count(cfg, &reg_count)))
184
+ {
185
+ printf("Failed get kpc count: %d (%s).\n",
186
+ ret, kpep_config_error_desc(ret));
187
+ return 1;
188
+ }
189
+ if ((ret = kpep_config_kpc_map(cfg, counter_map, sizeof(counter_map))))
190
+ {
191
+ printf("Failed get kpc map: %d (%s).\n",
192
+ ret, kpep_config_error_desc(ret));
193
+ return 1;
194
+ }
195
+ if ((ret = kpep_config_kpc(cfg, regs, sizeof(regs))))
196
+ {
197
+ printf("Failed get kpc registers: %d (%s).\n",
198
+ ret, kpep_config_error_desc(ret));
199
+ return 1;
200
+ }
201
+
202
+ // set config to kernel
203
+ if ((ret = kpc_force_all_ctrs_set(1)))
204
+ {
205
+ printf("Failed force all ctrs: %d.\n", ret);
206
+ return 1;
207
+ }
208
+ if ((classes & KPC_CLASS_CONFIGURABLE_MASK) && reg_count)
209
+ {
210
+ if ((ret = kpc_set_config(classes, regs)))
211
+ {
212
+ printf("Failed set kpc config: %d.\n", ret);
213
+ return 1;
214
+ }
215
+ }
216
+
217
+ // start counting
218
+ if ((ret = kpc_set_counting(classes)))
219
+ {
220
+ printf("Failed set counting: %d.\n", ret);
221
+ return 1;
222
+ }
223
+ if ((ret = kpc_set_thread_counting(classes)))
224
+ {
225
+ printf("Failed set thread counting: %d.\n", ret);
226
+ return 1;
227
+ }
228
+
229
+ // get counters before
230
+ if ((ret = kpc_get_thread_counters(0, KPC_MAX_COUNTERS, counters_0)))
231
+ {
232
+ printf("Failed get thread counters before: %d.\n", ret);
233
+ return 1;
234
+ }
235
+
236
+ return 0;
237
+ #else
238
+ printf("This systems seems to be neither Linux, nor ARM OSX, so I don't know how to proceeed.\nIf this is a mistake, please open an issue on the GitHub repository.\n");
239
+ return -1;
240
+ #endif
241
+ }
242
+
243
+ int end(int fd, struct counter *out)
244
+ {
245
+ #if defined(__linux__)
246
+ if (out == NULL)
247
+ {
248
+ fprintf(stderr, "Error: 'out' pointer is NULL in end().\n");
249
+ return -1;
250
+ }
251
+
252
+ // Disable the event group
253
+ if (ioctl(fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP) == -1)
254
+ {
255
+ fprintf(stderr, "Error disabling perf event (fd: %d): %s\n", fd, strerror(errno));
256
+ return -1;
257
+ }
258
+
259
+ // Allocate buffer for reading results
260
+ int size = sizeof(struct read_format) + (sizeof(uint64_t) * NUM_EVENTS);
261
+ struct read_format *buffer = (struct read_format *)malloc(size);
262
+ if (!buffer)
263
+ {
264
+ fprintf(stderr, "Failed to allocate memory for read buffer.\n");
265
+ return -1;
266
+ }
267
+
268
+ // Read results
269
+ int ret_val = read(fd, buffer, size);
270
+ if (ret_val == -1)
271
+ {
272
+ fprintf(stderr, "Error reading perf event results: %s\n", strerror(errno));
273
+ free(buffer);
274
+ return -1;
275
+ }
276
+ else if (ret_val != size)
277
+ {
278
+ fprintf(stderr, "Error reading perf event results: read %d bytes, expected %d\n", ret_val, size);
279
+ free(buffer);
280
+ return -1;
281
+ }
282
+
283
+ // Assign time_enabled_ns
284
+ out->time_enabled_ns = buffer->time_enabled;
285
+
286
+ // Directly assign values to struct fields treating them as an array 8)
287
+ uint64_t *counter_ptr = (uint64_t *)out;
288
+ counter_ptr++; // Now points to instruction_count, the first counter field
289
+
290
+ for (int i = 0; i < NUM_EVENTS; i++)
291
+ {
292
+ counter_ptr[i] = buffer->values[i].value;
293
+ }
294
+
295
+ close(fd);
296
+ free(buffer);
297
+ return 0;
298
+ #elif defined(__APPLE__)
299
+ // get counters after
300
+ int ret = 0;
301
+ u64 counters_1[KPC_MAX_COUNTERS] = {0};
302
+ if ((ret = kpc_get_thread_counters(0, KPC_MAX_COUNTERS, counters_1)))
303
+ {
304
+ printf("Failed get thread counters after: %d.\n", ret);
305
+ return 1;
306
+ }
307
+
308
+ kpc_set_counting(0);
309
+ kpc_set_thread_counting(0);
310
+ kpc_force_all_ctrs_set(0);
311
+
312
+ out->time_enabled_ns = 0;
313
+ out->instruction_count = counters_1[counter_map[1]] - counters_0[counter_map[1]];
314
+ out->page_faults = 0;
315
+ out->branch_misses = counters_1[counter_map[3]] - counters_0[counter_map[3]];
316
+ return 0;
317
+ #else
318
+ printf("This systems seems to be neither Linux, nor OSX, so I don't know how to proceeed.\nIf this is a mistake, please open an issue on the GitHub repository.\n");
319
+ return -1;
320
+ #endif
321
+ }
data/lib/collector.rb ADDED
@@ -0,0 +1,70 @@
1
+ require 'ffi'
2
+
3
+ module CirronInterOp
4
+ extend FFI::Library
5
+
6
+ lib_path = File.join(__dir__, 'cirronlib.so')
7
+ source_path = File.join(__dir__, 'cirronlib.cpp')
8
+
9
+ unless File.exist?(lib_path)
10
+ exit_status = system("c++ -std=c++17 -O3 -shared -fPIC -o #{lib_path} #{source_path}")
11
+ if exit_status.nil? || !exit_status
12
+ raise "Failed to compile cirronlib.cpp, make sure you have 'c++' installed."
13
+ end
14
+ end
15
+
16
+ ffi_lib lib_path
17
+ attach_function :start, [], :int
18
+ attach_function :end, [:int, :pointer], :int
19
+ end
20
+
21
+ class Counter < FFI::Struct
22
+ layout :time_enabled_ns, :uint64,
23
+ :instruction_count, :uint64,
24
+ :branch_misses, :uint64,
25
+ :page_faults, :uint64
26
+
27
+ def self.create_accessors
28
+ layout.members.each do |field|
29
+ define_method(field) { self[field] }
30
+ define_method("#{field}=") { |value| self[field] = value }
31
+ end
32
+ end
33
+
34
+ create_accessors
35
+
36
+ def to_s
37
+ inspect
38
+ end
39
+
40
+ def inspect
41
+ fields = self.class.layout.members.map do |field|
42
+ "#{field}: #{self[field]}"
43
+ end
44
+ "Counter(#{fields.join(', ')})"
45
+ end
46
+ end
47
+
48
+ module Cirron
49
+ def self.start
50
+ ret_val = CirronInterOp.start
51
+ if ret_val == -1
52
+ raise "Failed to start collector"
53
+ end
54
+ ret_val
55
+ end
56
+
57
+ def self.end(fd, counter)
58
+ CirronInterOp.end(fd, counter)
59
+ end
60
+
61
+ def self.collector(&blk)
62
+ counter = Counter.new
63
+ ret_val = self.start
64
+
65
+ yield
66
+
67
+ self.end(ret_val, counter)
68
+ counter
69
+ end
70
+ end
data/lib/tracer.rb ADDED
@@ -0,0 +1,162 @@
1
+ require 'tempfile'
2
+ require 'json'
3
+
4
+ class Syscall
5
+ attr_reader :name, :args, :retval, :duration, :timestamp, :pid
6
+
7
+ def initialize(name:, args:, retval:, duration:, timestamp:, pid:)
8
+ @name = name
9
+ @args = args
10
+ @retval = retval
11
+ @duration = duration
12
+ @timestamp = timestamp
13
+ @pid = pid
14
+ end
15
+
16
+ def to_s
17
+ "#{name}(#{args}) = #{retval} <#{duration}s>"
18
+ end
19
+ end
20
+
21
+ class TraceSignal
22
+ attr_reader :name, :details, :timestamp, :pid
23
+
24
+ def initialize(name:, details:, timestamp:, pid:)
25
+ @name = name
26
+ @details = details
27
+ @timestamp = timestamp
28
+ @pid = pid
29
+ end
30
+
31
+ def to_s
32
+ "#{name} {#{details}}"
33
+ end
34
+ end
35
+
36
+ def parse_strace(file)
37
+ syscall_pattern = /^(\d+) +(\d+\.\d+) (\w+)\((.*?)\) += +(.*?) <(.*?)>$/
38
+ signal_pattern = /^(\d+) +(\d+\.\d+) --- (\w+) {(.*)} ---$/
39
+ unfinished_pattern = /^(\d+) +(\d+\.\d+) (\w+)\((.*?) +<unfinished \.\.\.>$/
40
+ resumed_pattern = /^(\d+) +(\d+\.\d+) <\.\.\. (\w+) resumed>(.*?)?\) += +(.*?) <(.*?)>$/
41
+
42
+ result = []
43
+ unfinished_syscalls = {}
44
+
45
+ file.each_line do |line|
46
+ case line
47
+ when syscall_pattern
48
+ pid, timestamp, syscall, args, retval, duration = $~.captures
49
+ result << Syscall.new(name: syscall, args: args, retval: retval, duration: duration, timestamp: timestamp, pid: pid)
50
+ when signal_pattern
51
+ pid, timestamp, signal, details = $~.captures
52
+ result << TraceSignal.new(name: signal, details: details, timestamp: timestamp, pid: pid)
53
+ when unfinished_pattern
54
+ pid, timestamp, syscall, args = $~.captures
55
+ key = [pid, syscall]
56
+ unfinished_syscalls[key] ||= []
57
+ unfinished_syscalls[key] << [timestamp, args]
58
+ when resumed_pattern
59
+ pid, timestamp, syscall, args2, retval, duration = $~.captures
60
+ key = [pid, syscall]
61
+ if unfinished_syscalls[key]&.any?
62
+ start_timestamp, args = unfinished_syscalls[key].pop
63
+ result << Syscall.new(name: syscall, args: "#{args}#{args2}", retval: retval, duration: duration, timestamp: start_timestamp, pid: pid)
64
+ else
65
+ puts "Resumed syscall without a start: #{line}"
66
+ end
67
+ else
68
+ puts "Attempted to parse unrecognized strace line: #{line}"
69
+ end
70
+ end
71
+
72
+ result
73
+ end
74
+
75
+ def filter_trace(trace, marker_path)
76
+ start_index = trace.index { |event| event.args.include?(marker_path) }
77
+ end_index = trace.rindex { |event| event.args.include?(marker_path) }
78
+
79
+ if start_index && end_index
80
+ trace[start_index + 1...end_index]
81
+ else
82
+ puts "Failed to find start and end markers for the trace, returning the full trace."
83
+ trace
84
+ end
85
+ end
86
+
87
+ module Cirron
88
+ def self.tracer(timeout = 10, &block)
89
+ trace_file = Tempfile.new('cirron')
90
+ trace_file.close
91
+ parent_pid = Process.pid
92
+ cmd = "strace --quiet=attach,exit -f -T -ttt -o #{trace_file.path} -p #{parent_pid}"
93
+
94
+ strace_proc = spawn(cmd, :out => "/dev/null", :err => "/dev/null")
95
+
96
+ # Wait for the trace file to be created
97
+ deadline = Time.now + timeout
98
+ begin
99
+ until File.exist?(trace_file.path)
100
+ if Time.now > deadline
101
+ raise Timeout::Error, "Failed to start strace within #{timeout}s."
102
+ end
103
+ end
104
+ # :(
105
+ sleep 0.1
106
+
107
+ # We use this dummy fstat to recognize when we start executing the block
108
+ File.stat(trace_file.path + ".dummy") rescue nil
109
+
110
+ yield if block_given?
111
+
112
+ # Same here, to recognize when we're done executing the block
113
+ File.stat(trace_file.path + ".dummy") rescue nil
114
+ ensure
115
+ Process.kill('INT', strace_proc) rescue nil
116
+ Process.wait(strace_proc) rescue nil
117
+ end
118
+
119
+ # Parse the trace file into a list of events
120
+ trace = File.open(trace_file.path, 'r') do |file|
121
+ parse_strace(trace)
122
+ end
123
+
124
+ trace = filter_trace(trace, trace_file.path + ".dummy")
125
+
126
+ trace_file.unlink
127
+
128
+ trace
129
+ end
130
+
131
+ def to_tef(parsed_events)
132
+ events = parsed_events.map do |event|
133
+ case event
134
+ when Syscall
135
+ start_ts = event.timestamp.to_f * 1_000_000
136
+ duration_us = event.duration.to_f * 1_000_000
137
+ {
138
+ name: event.name,
139
+ ph: "X",
140
+ ts: start_ts,
141
+ dur: duration_us,
142
+ pid: event.pid.to_i,
143
+ tid: event.pid.to_i,
144
+ args: { args: event.args, retval: event.retval }
145
+ }
146
+ when TraceSignal
147
+ ts = event.timestamp.to_f * 1_000_000
148
+ {
149
+ name: "Signal: #{event.name}",
150
+ ph: "i",
151
+ s: "g",
152
+ ts: ts,
153
+ pid: event.pid.to_i,
154
+ tid: event.pid.to_i,
155
+ args: { details: event.details }
156
+ }
157
+ end
158
+ end
159
+
160
+ JSON.pretty_generate(events)
161
+ end
162
+ end
metadata ADDED
@@ -0,0 +1,96 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: cirron
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.2
5
+ platform: ruby
6
+ authors:
7
+ - Matt Stuchlik
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2024-07-06 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: |
14
+ ==
15
+ # Cirron
16
+
17
+ Cirron measures a piece of Ruby code and report back several performance counters: CPU instruction count, branch misses, page faults and time spent measuring. It uses the Linux perf events interface or @ibireme's [KPC demo](https://gist.github.com/ibireme/173517c208c7dc333ba962c1f0d67d12) on OSX.
18
+
19
+ It can also trace syscalls using `strace`, Linux only!
20
+
21
+ ## Prerequisites
22
+
23
+ - Linux with perf events support / Apple ARM OSX
24
+ - C++
25
+ - Ruby 3.x
26
+
27
+ ## Usage
28
+
29
+ ### Performance Counters
30
+ ```
31
+ from cirron import Collector
32
+
33
+ # Start collecting performance metrics
34
+ with Collector() as collector:
35
+ # Your code here
36
+ # ...
37
+
38
+ # Retrieve the metrics
39
+ print(collector.counters)
40
+ ```
41
+
42
+ ### Syscalls
43
+ ```
44
+ from cirron import Tracer, to_tef
45
+
46
+ with Tracer() as tracer:
47
+ # Your code here
48
+ # ...
49
+
50
+ # Stop collecting and retrieve the trace
51
+ print(tracer.trace)
52
+
53
+ # Save the trace for ingesting to Perfetto
54
+ open("/tmp/trace", "w").write(to_tef(trace))
55
+ ```
56
+ email:
57
+ - matej.stuchlik@gmail.com
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - LICENSE
63
+ - README.md
64
+ - cirron.gemspec
65
+ - lib/apple_arm_events.h
66
+ - lib/cirron.rb
67
+ - lib/cirronlib.cpp
68
+ - lib/collector.rb
69
+ - lib/tracer.rb
70
+ homepage: https://github.com/s7nfo/Cirron
71
+ licenses:
72
+ - MIT
73
+ metadata:
74
+ homepage_uri: https://github.com/s7nfo/Cirron
75
+ source_code_uri: https://github.com/s7nfo/Cirron
76
+ post_install_message:
77
+ rdoc_options: []
78
+ require_paths:
79
+ - lib
80
+ required_ruby_version: !ruby/object:Gem::Requirement
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
84
+ version: 3.0.0
85
+ required_rubygems_version: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ requirements: []
91
+ rubygems_version: 3.5.10
92
+ signing_key:
93
+ specification_version: 4
94
+ summary: Cirron measures how many CPU instructions and system calls a piece of Ruby
95
+ code executes.
96
+ test_files: []