rperf 0.4.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +69 -28
- data/docs/help.md +149 -7
- data/exe/rperf +33 -8
- data/ext/rperf/rperf.c +547 -264
- data/lib/rperf/active_job.rb +13 -0
- data/lib/rperf/middleware.rb +15 -0
- data/lib/rperf/sidekiq.rb +9 -0
- data/lib/rperf/version.rb +1 -1
- data/lib/rperf.rb +145 -18
- metadata +4 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 497392cfda8e82d1c37aadd0953b4c73b6bfb09870e6c612c1fd5fced0e3d24f
|
|
4
|
+
data.tar.gz: 6960be209fc3d4aac0f268378c5b7e1399027da0c5b7f498bcb4be0662012d62
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: '09fc32b7577ac9544a846c86c37a7ad11e9de00a27bbb0bbd25cbc2fcabe04e74741c64f9fb3cfe1a9663145e058215272a247766c7b8106218eda80cbcd838f'
|
|
7
|
+
data.tar.gz: 9d13e685c5a293c4d9033376509bf4b5c762a5f5155a2d5dd6e838d5a55dc79b9ef7d9521a5bcf65eac88a683f0e20cd7e5dac2680134aa565c709eb48452e40
|
data/README.md
CHANGED
|
@@ -2,25 +2,66 @@
|
|
|
2
2
|
<img src="docs/logo.svg" alt="rperf logo" width="260">
|
|
3
3
|
</p>
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
<h1 align="center">rperf</h1>
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
<p align="center">
|
|
8
|
+
<strong>Know where your Ruby spends its time — accurately.</strong><br>
|
|
9
|
+
A sampling profiler that corrects safepoint bias using real time deltas.
|
|
10
|
+
</p>
|
|
8
11
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
12
|
+
<p align="center">
|
|
13
|
+
<a href="https://rubygems.org/gems/rperf"><img src="https://img.shields.io/gem/v/rperf.svg" alt="Gem Version"></a>
|
|
14
|
+
<img src="https://img.shields.io/badge/Ruby-%3E%3D%203.4.0-cc342d" alt="Ruby >= 3.4.0">
|
|
15
|
+
<a href="https://ko1.github.io/rperf/docs/manual/"><img src="https://img.shields.io/badge/docs-manual-blue" alt="Manual"></a>
|
|
16
|
+
<img src="https://img.shields.io/badge/license-MIT-green" alt="MIT License">
|
|
17
|
+
</p>
|
|
13
18
|
|
|
14
|
-
|
|
19
|
+
<p align="center">
|
|
20
|
+
pprof / collapsed stacks / text report · CPU mode & wall mode (GVL + GC tracking)
|
|
21
|
+
</p>
|
|
22
|
+
|
|
23
|
+
<p align="center">
|
|
24
|
+
<a href='https://ko1.github.io/rperf/'>Web site</a>,
|
|
25
|
+
<a href='https://ko1.github.io/rperf/docs/manual/'>Online manual</a>,
|
|
26
|
+
<a href='https://github.com/ko1/rperf'>GitHub repository</a>
|
|
27
|
+
</p>
|
|
28
|
+
|
|
29
|
+
## See It in Action
|
|
15
30
|
|
|
16
31
|
```bash
|
|
17
|
-
gem install rperf
|
|
32
|
+
$ gem install rperf
|
|
33
|
+
$ rperf exec ruby fib.rb
|
|
18
34
|
|
|
35
|
+
Performance stats for 'ruby fib.rb':
|
|
36
|
+
|
|
37
|
+
2,326.0 ms user
|
|
38
|
+
64.5 ms sys
|
|
39
|
+
2,035.5 ms real
|
|
40
|
+
|
|
41
|
+
2,034.2 ms 100.0% CPU execution
|
|
42
|
+
1 [Ruby] detected threads
|
|
43
|
+
7.0 ms [Ruby] GC time (7 count: 5 minor, 2 major)
|
|
44
|
+
106,078 [Ruby] allocated objects
|
|
45
|
+
22 MB [OS] peak memory (maxrss)
|
|
46
|
+
|
|
47
|
+
Flat:
|
|
48
|
+
2,034.2 ms 100.0% Object#fibonacci (fib.rb)
|
|
49
|
+
|
|
50
|
+
Cumulative:
|
|
51
|
+
2,034.2 ms 100.0% Object#fibonacci (fib.rb)
|
|
52
|
+
2,034.2 ms 100.0% <main> (fib.rb)
|
|
53
|
+
|
|
54
|
+
2034 samples / 2034 triggers, 0.1% profiler overhead
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
## Quick Start
|
|
58
|
+
|
|
59
|
+
```bash
|
|
19
60
|
# Performance summary (wall mode, prints to stderr)
|
|
20
61
|
rperf stat ruby app.rb
|
|
21
62
|
|
|
22
|
-
#
|
|
23
|
-
rperf record ruby app.rb # → rperf.data (
|
|
63
|
+
# Record a pprof profile to file
|
|
64
|
+
rperf record ruby app.rb # → rperf.data (cpu mode)
|
|
24
65
|
rperf record -m wall -o profile.pb.gz ruby server.rb # wall mode, custom output
|
|
25
66
|
|
|
26
67
|
# View results (report/diff require Go: https://go.dev/dl/)
|
|
@@ -67,19 +108,20 @@ Inspired by Linux `perf` — familiar subcommand interface for profiling workflo
|
|
|
67
108
|
|---------|-------------|
|
|
68
109
|
| `rperf record` | Profile a command and save to file |
|
|
69
110
|
| `rperf stat` | Profile a command and print summary to stderr |
|
|
111
|
+
| `rperf exec` | Profile a command and print full report to stderr |
|
|
70
112
|
| `rperf report` | Open pprof profile with `go tool pprof` (requires Go) |
|
|
71
113
|
| `rperf diff` | Compare two pprof profiles (requires Go) |
|
|
72
114
|
| `rperf help` | Show full reference documentation |
|
|
73
115
|
|
|
74
116
|
## How It Works
|
|
75
117
|
|
|
76
|
-
### The
|
|
118
|
+
### The Challenge: Safepoint Sampling
|
|
77
119
|
|
|
78
|
-
Ruby
|
|
120
|
+
Most Ruby profilers (e.g., stackprof) use signal handlers to capture stack traces at the exact moment the timer fires. rperf takes a different approach — it samples at **safepoints** (VM checkpoints), which is safer (no async-signal-safety concerns, reliable access to VM state) but means the sample timing can be delayed. Without correction, this delay would skew the results.
|
|
79
121
|
|
|
80
|
-
### The
|
|
122
|
+
### The Fix: Weight = Real Time
|
|
81
123
|
|
|
82
|
-
rperf uses **time
|
|
124
|
+
rperf uses **actual elapsed time as sample weights** — so delayed samples carry proportionally more weight, and the profile matches reality:
|
|
83
125
|
|
|
84
126
|
```
|
|
85
127
|
Timer (signal or thread) VM thread (postponed job)
|
|
@@ -116,23 +158,22 @@ rperf hooks GVL and GC events to attribute non-CPU time:
|
|
|
116
158
|
| `[GC marking]` | Time in GC mark phase |
|
|
117
159
|
| `[GC sweeping]` | Time in GC sweep phase |
|
|
118
160
|
|
|
119
|
-
##
|
|
161
|
+
## Why rperf?
|
|
120
162
|
|
|
121
|
-
|
|
163
|
+
- **Accurate despite safepoints** — Safepoint sampling is *safer* (no async-signal-safety issues), but normally *inaccurate*. rperf compensates with real time-delta weights, so profiles faithfully reflect where time is actually spent.
|
|
164
|
+
- **See the whole picture** (wall mode) — GVL contention, off-GVL I/O, GC marking/sweeping — all attributed to the call stacks responsible, via synthetic frames.
|
|
165
|
+
- **Low overhead** — Signal-based timer on Linux (no extra thread). ~1–5 µs per sample.
|
|
166
|
+
- **pprof compatible** — Works with `go tool pprof`, speedscope, and other standard tools out of the box.
|
|
167
|
+
- **Zero code changes** — Profile any Ruby program via CLI or environment variables. Drop-in for Rails, too.
|
|
168
|
+
- **`perf`-like CLI** — `record`, `stat`, `report`, `diff` — if you know Linux perf, you already know rperf.
|
|
122
169
|
|
|
123
|
-
|
|
124
|
-
- **GVL & GC visibility** (wall mode): Attributes off-GVL time, GVL contention, and GC phases to the responsible call stacks with synthetic frames.
|
|
125
|
-
- **Low overhead**: No extra thread on Linux (signal-based timer). Sampling overhead is ~1-5 us per sample.
|
|
126
|
-
- **pprof compatible**: Output works with `go tool pprof`, speedscope, and other standard tools.
|
|
127
|
-
- **No code changes required**: Profile any Ruby program via CLI (`rperf stat ruby app.rb`) or environment variables (`RPERF_ENABLED=1`).
|
|
128
|
-
- **perf-like CLI**: Familiar subcommand interface — `record`, `stat`, `report`, `diff` — inspired by Linux perf.
|
|
170
|
+
### Limitations
|
|
129
171
|
|
|
130
|
-
|
|
172
|
+
- **Method-level only** — no line-level granularity.
|
|
173
|
+
- **Ruby >= 3.4.0** — uses recent VM internals (postponed jobs, thread event hooks).
|
|
174
|
+
- **POSIX only** — Linux, macOS. No Windows.
|
|
175
|
+
- **No fork support** — profiling does not follow fork(2) child processes.
|
|
131
176
|
|
|
132
|
-
- **Method-level only**: Profiles at the method level, not the line level. You can see which method is slow, but not which line within it.
|
|
133
|
-
- **Ruby >= 3.4.0**: Requires recent Ruby for the internal APIs used (postponed jobs, thread event hooks).
|
|
134
|
-
- **POSIX only**: Linux, macOS, etc. No Windows support.
|
|
135
|
-
- **Safepoint sampling**: Cannot sample inside C extensions or during long-running C calls that don't reach a safepoint. Time spent there is attributed to the next sample.
|
|
136
177
|
|
|
137
178
|
## Output Formats
|
|
138
179
|
|
|
@@ -146,4 +187,4 @@ Format is auto-detected from extension, or set explicitly with `--format`.
|
|
|
146
187
|
|
|
147
188
|
## License
|
|
148
189
|
|
|
149
|
-
MIT
|
|
190
|
+
MIT
|
data/docs/help.md
CHANGED
|
@@ -10,6 +10,7 @@ POSIX systems (Linux, macOS). Requires Ruby >= 3.4.0.
|
|
|
10
10
|
|
|
11
11
|
rperf record [options] command [args...]
|
|
12
12
|
rperf stat [options] command [args...]
|
|
13
|
+
rperf exec [options] command [args...]
|
|
13
14
|
rperf report [options] [file]
|
|
14
15
|
rperf help
|
|
15
16
|
|
|
@@ -41,6 +42,20 @@ Shows: user/sys/real time, time breakdown (CPU execution, GVL blocked,
|
|
|
41
42
|
GVL wait, GC marking, GC sweeping), GC/memory/OS stats, and profiler overhead.
|
|
42
43
|
Use --report to add flat and cumulative top-50 function tables.
|
|
43
44
|
|
|
45
|
+
### exec: Run command and print full profile report to stderr.
|
|
46
|
+
|
|
47
|
+
Like `stat --report`. Uses wall mode by default. No file output by default.
|
|
48
|
+
|
|
49
|
+
-o, --output PATH Also save profile to file (default: none)
|
|
50
|
+
-f, --frequency HZ Sampling frequency in Hz (default: 1000)
|
|
51
|
+
-m, --mode MODE cpu or wall (default: wall)
|
|
52
|
+
--signal VALUE Timer signal (Linux only): signal number, or 'false'
|
|
53
|
+
for nanosleep thread (default: auto)
|
|
54
|
+
-v, --verbose Print additional sampling statistics
|
|
55
|
+
|
|
56
|
+
Shows: user/sys/real time, time breakdown, GC/memory/OS stats, profiler overhead,
|
|
57
|
+
and flat/cumulative top-50 function tables.
|
|
58
|
+
|
|
44
59
|
### report: Open pprof profile with go tool pprof. Requires Go.
|
|
45
60
|
|
|
46
61
|
--top Print top functions by flat time
|
|
@@ -67,6 +82,8 @@ Default (no flag): opens diff in browser.
|
|
|
67
82
|
rperf stat ruby app.rb
|
|
68
83
|
rperf stat --report ruby app.rb
|
|
69
84
|
rperf stat -o profile.pb.gz ruby app.rb
|
|
85
|
+
rperf exec ruby app.rb
|
|
86
|
+
rperf exec -m cpu ruby app.rb
|
|
70
87
|
rperf report
|
|
71
88
|
rperf report --top profile.pb.gz
|
|
72
89
|
rperf diff before.pb.gz after.pb.gz
|
|
@@ -106,23 +123,139 @@ Rperf.save("profile.txt", data)
|
|
|
106
123
|
nil if profiler was not running; otherwise a Hash:
|
|
107
124
|
|
|
108
125
|
```ruby
|
|
109
|
-
{ mode: :cpu,
|
|
126
|
+
{ mode: :cpu, # or :wall
|
|
110
127
|
frequency: 500,
|
|
111
128
|
sampling_count: 1234,
|
|
112
129
|
sampling_time_ns: 56789,
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
130
|
+
detected_thread_count: 4, # threads seen during profiling
|
|
131
|
+
start_time_ns: 17740..., # CLOCK_REALTIME epoch nanos
|
|
132
|
+
duration_ns: 10000000, # profiling duration in nanos
|
|
133
|
+
aggregated_samples: [ # when aggregate: true (default)
|
|
134
|
+
[frames, weight, seq, label_set_id], # frames: [[path, label], ...] deepest-first
|
|
135
|
+
... # weight: Integer (nanoseconds, merged per unique stack)
|
|
136
|
+
], # seq: Integer (thread sequence, 1-based)
|
|
137
|
+
# label_set_id: Integer (0 = no labels)
|
|
138
|
+
label_sets: [{}, {request: "abc"}, ...], # label set table (index = label_set_id)
|
|
139
|
+
# --- OR ---
|
|
140
|
+
raw_samples: [ # when aggregate: false
|
|
141
|
+
[frames, weight, seq, label_set_id], # one entry per timer sample (not merged)
|
|
142
|
+
...
|
|
143
|
+
] }
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
### Rperf.snapshot(clear: false)
|
|
147
|
+
|
|
148
|
+
Returns a snapshot of the current profiling data without stopping.
|
|
149
|
+
Only works in aggregate mode (the default). Returns nil if not profiling.
|
|
150
|
+
|
|
151
|
+
When `clear: true` is given, resets aggregated data after taking the snapshot.
|
|
152
|
+
This enables interval-based profiling where each snapshot covers only the
|
|
153
|
+
period since the last clear.
|
|
154
|
+
|
|
155
|
+
```ruby
|
|
156
|
+
Rperf.start(frequency: 1000)
|
|
157
|
+
# ... work ...
|
|
158
|
+
snap = Rperf.snapshot # read data without stopping
|
|
159
|
+
Rperf.save("snap.pb.gz", snap)
|
|
160
|
+
# ... more work ...
|
|
161
|
+
data = Rperf.stop
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
Interval-based usage:
|
|
165
|
+
|
|
166
|
+
```ruby
|
|
167
|
+
Rperf.start(frequency: 1000)
|
|
168
|
+
loop do
|
|
169
|
+
sleep 10
|
|
170
|
+
snap = Rperf.snapshot(clear: true) # each snapshot covers the last 10s
|
|
171
|
+
Rperf.save("profile-#{Time.now.to_i}.pb.gz", snap)
|
|
172
|
+
end
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
### Rperf.label(**labels, &block)
|
|
176
|
+
|
|
177
|
+
Attaches key-value labels to the current thread's samples. Labels appear
|
|
178
|
+
in pprof sample labels, enabling per-context filtering (e.g., per-request).
|
|
179
|
+
If profiling is not running, labels are silently ignored (no error).
|
|
180
|
+
|
|
181
|
+
```ruby
|
|
182
|
+
# Block form — labels are restored when the block exits
|
|
183
|
+
Rperf.label(request: "abc-123", endpoint: "/api/users") do
|
|
184
|
+
handle_request # samples inside get these labels
|
|
185
|
+
end
|
|
186
|
+
# labels are restored to previous state here
|
|
187
|
+
|
|
188
|
+
# Without block — labels persist until changed
|
|
189
|
+
Rperf.label(request: "abc-123")
|
|
190
|
+
|
|
191
|
+
# Merge — new labels merge with existing ones
|
|
192
|
+
Rperf.label(phase: "db") # adds phase, keeps request
|
|
193
|
+
|
|
194
|
+
# Delete a key — set value to nil
|
|
195
|
+
Rperf.label(request: nil) # removes request key
|
|
196
|
+
|
|
197
|
+
# Nested blocks — each block restores its entry state
|
|
198
|
+
Rperf.label(request: "abc") do
|
|
199
|
+
Rperf.label(phase: "db") do
|
|
200
|
+
Rperf.labels #=> {request: "abc", phase: "db"}
|
|
201
|
+
end
|
|
202
|
+
Rperf.labels #=> {request: "abc"}
|
|
203
|
+
end
|
|
204
|
+
Rperf.labels #=> {}
|
|
119
205
|
```
|
|
120
206
|
|
|
207
|
+
In pprof output, use labels for filtering and grouping:
|
|
208
|
+
|
|
209
|
+
go tool pprof -tagfocus=request=abc-123 profile.pb.gz
|
|
210
|
+
go tool pprof -tagroot=request profile.pb.gz
|
|
211
|
+
go tool pprof -tagleaf=request profile.pb.gz
|
|
212
|
+
|
|
213
|
+
### Rperf.labels
|
|
214
|
+
|
|
215
|
+
Returns the current thread's labels as a Hash. Empty hash if none set.
|
|
216
|
+
|
|
121
217
|
### Rperf.save(path, data, format: nil)
|
|
122
218
|
|
|
123
219
|
Writes data to path. format: :pprof, :collapsed, or :text.
|
|
124
220
|
nil auto-detects from extension.
|
|
125
221
|
|
|
222
|
+
### Rperf::Middleware (Rack)
|
|
223
|
+
|
|
224
|
+
Labels samples with the request endpoint. Requires `require "rperf/middleware"`.
|
|
225
|
+
|
|
226
|
+
```ruby
|
|
227
|
+
# Rails
|
|
228
|
+
Rails.application.config.middleware.use Rperf::Middleware
|
|
229
|
+
|
|
230
|
+
# Sinatra
|
|
231
|
+
use Rperf::Middleware
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
The middleware only sets labels — start profiling separately.
|
|
235
|
+
Option: `label_key:` (default: `:endpoint`).
|
|
236
|
+
|
|
237
|
+
### Rperf::ActiveJobMiddleware
|
|
238
|
+
|
|
239
|
+
Labels samples with the job class name. Requires `require "rperf/active_job"`.
|
|
240
|
+
|
|
241
|
+
```ruby
|
|
242
|
+
class ApplicationJob < ActiveJob::Base
|
|
243
|
+
include Rperf::ActiveJobMiddleware
|
|
244
|
+
end
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
### Rperf::SidekiqMiddleware
|
|
248
|
+
|
|
249
|
+
Labels samples with the worker class name. Requires `require "rperf/sidekiq"`.
|
|
250
|
+
|
|
251
|
+
```ruby
|
|
252
|
+
Sidekiq.configure_server do |config|
|
|
253
|
+
config.server_middleware do |chain|
|
|
254
|
+
chain.add Rperf::SidekiqMiddleware
|
|
255
|
+
end
|
|
256
|
+
end
|
|
257
|
+
```
|
|
258
|
+
|
|
126
259
|
## PROFILING MODES
|
|
127
260
|
|
|
128
261
|
- **cpu** — Measures per-thread CPU time via Linux thread clock.
|
|
@@ -152,11 +285,20 @@ Embedded metadata:
|
|
|
152
285
|
Sample labels:
|
|
153
286
|
|
|
154
287
|
thread_seq thread sequence number (1-based, assigned per profiling session)
|
|
288
|
+
<user labels> custom key-value labels set via Rperf.label()
|
|
155
289
|
|
|
156
290
|
View comments: `go tool pprof -comments profile.pb.gz`
|
|
157
291
|
|
|
158
292
|
Group by thread: `go tool pprof -tagroot=thread_seq profile.pb.gz`
|
|
159
293
|
|
|
294
|
+
Filter by label: `go tool pprof -tagfocus=request=abc-123 profile.pb.gz`
|
|
295
|
+
|
|
296
|
+
Group by label (root): `go tool pprof -tagroot=request profile.pb.gz`
|
|
297
|
+
|
|
298
|
+
Group by label (leaf): `go tool pprof -tagleaf=request profile.pb.gz`
|
|
299
|
+
|
|
300
|
+
Exclude by label: `go tool pprof -tagignore=request=healthcheck profile.pb.gz`
|
|
301
|
+
|
|
160
302
|
### collapsed
|
|
161
303
|
|
|
162
304
|
Plain text. One line per unique stack: `frame1;frame2;...;leaf weight`
|
data/exe/rperf
CHANGED
|
@@ -72,6 +72,7 @@ HELP_TEXT = File.read(File.expand_path("../docs/help.md", __dir__))
|
|
|
72
72
|
|
|
73
73
|
USAGE = "Usage: rperf record [options] command [args...]\n" \
|
|
74
74
|
" rperf stat [options] command [args...]\n" \
|
|
75
|
+
" rperf exec [options] command [args...]\n" \
|
|
75
76
|
" rperf report [options] [file]\n" \
|
|
76
77
|
" rperf diff [options] base.pb.gz target.pb.gz\n" \
|
|
77
78
|
" rperf help\n"
|
|
@@ -79,7 +80,7 @@ USAGE = "Usage: rperf record [options] command [args...]\n" \
|
|
|
79
80
|
# Handle top-level flags before subcommand parsing
|
|
80
81
|
case ARGV.first
|
|
81
82
|
when "-v", "--version"
|
|
82
|
-
|
|
83
|
+
require_relative "../lib/rperf"
|
|
83
84
|
puts "rperf #{Rperf::VERSION}"
|
|
84
85
|
exit
|
|
85
86
|
when "-h", "--help"
|
|
@@ -120,7 +121,7 @@ when "diff"
|
|
|
120
121
|
else exec("go", "tool", "pprof", "-http=localhost:#{find_available_port}", "-diff_base=#{base_file}", target_file)
|
|
121
122
|
end
|
|
122
123
|
end
|
|
123
|
-
when "record", "stat"
|
|
124
|
+
when "record", "stat", "exec"
|
|
124
125
|
# continue below
|
|
125
126
|
else
|
|
126
127
|
$stderr.puts "Unknown subcommand: #{subcommand.inspect}" if subcommand
|
|
@@ -128,22 +129,23 @@ else
|
|
|
128
129
|
exit 1
|
|
129
130
|
end
|
|
130
131
|
|
|
131
|
-
output = (subcommand == "
|
|
132
|
+
output = (subcommand == "record") ? "rperf.data" : nil
|
|
132
133
|
frequency = 1000
|
|
133
|
-
mode = (subcommand == "
|
|
134
|
+
mode = (subcommand == "record") ? "cpu" : "wall"
|
|
134
135
|
format = nil
|
|
135
136
|
signal = nil
|
|
136
137
|
verbose = false
|
|
137
138
|
aggregate = true
|
|
138
|
-
stat_report =
|
|
139
|
+
stat_report = (subcommand == "exec")
|
|
139
140
|
|
|
140
141
|
parser = OptionParser.new do |opts|
|
|
141
142
|
opts.banner = case subcommand
|
|
142
143
|
when "record" then "Usage: rperf record [options] command [args...]"
|
|
143
144
|
when "stat" then "Usage: rperf stat [options] command [args...]"
|
|
145
|
+
when "exec" then "Usage: rperf exec [options] command [args...]"
|
|
144
146
|
end
|
|
145
147
|
|
|
146
|
-
opts.on("-o", "--output PATH", "Output file#{subcommand == '
|
|
148
|
+
opts.on("-o", "--output PATH", "Output file#{subcommand == 'record' ? ' (default: rperf.data)' : ' (default: none)'}") do |v|
|
|
147
149
|
output = v
|
|
148
150
|
end
|
|
149
151
|
|
|
@@ -151,7 +153,7 @@ parser = OptionParser.new do |opts|
|
|
|
151
153
|
frequency = v
|
|
152
154
|
end
|
|
153
155
|
|
|
154
|
-
default_mode = (subcommand == "
|
|
156
|
+
default_mode = (subcommand == "record") ? "cpu" : "wall"
|
|
155
157
|
opts.on("-m", "--mode MODE", %w[cpu wall], "Profiling mode: cpu or wall (default: #{default_mode})") do |v|
|
|
156
158
|
mode = v
|
|
157
159
|
end
|
|
@@ -208,6 +210,29 @@ if ARGV.empty?
|
|
|
208
210
|
exit 1
|
|
209
211
|
end
|
|
210
212
|
|
|
213
|
+
if frequency <= 0
|
|
214
|
+
$stderr.puts "Error: frequency must be a positive integer (got #{frequency})"
|
|
215
|
+
exit 1
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
if frequency > 10_000
|
|
219
|
+
$stderr.puts "Error: frequency must be <= 10000 (10KHz), got #{frequency}"
|
|
220
|
+
exit 1
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
if signal && signal != "false"
|
|
224
|
+
unless RUBY_PLATFORM =~ /linux/
|
|
225
|
+
$stderr.puts "Error: signal mode is only supported on Linux"
|
|
226
|
+
exit 1
|
|
227
|
+
end
|
|
228
|
+
sig_num = signal.to_i
|
|
229
|
+
uncatchable = [Signal.list["KILL"], Signal.list["STOP"]].compact
|
|
230
|
+
if uncatchable.include?(sig_num)
|
|
231
|
+
$stderr.puts "Error: signal #{sig_num} (#{Signal.signame(sig_num)}) cannot be caught; use a different signal"
|
|
232
|
+
exit 1
|
|
233
|
+
end
|
|
234
|
+
end
|
|
235
|
+
|
|
211
236
|
# Add lib dir to RUBYLIB so -rrperf can find the extension
|
|
212
237
|
lib_dir = File.expand_path("../lib", __dir__)
|
|
213
238
|
ENV["RUBYLIB"] = [lib_dir, ENV["RUBYLIB"]].compact.join(File::PATH_SEPARATOR)
|
|
@@ -221,7 +246,7 @@ ENV["RPERF_VERBOSE"] = "1" if verbose
|
|
|
221
246
|
ENV["RPERF_SIGNAL"] = signal if signal
|
|
222
247
|
ENV["RPERF_AGGREGATE"] = "0" unless aggregate
|
|
223
248
|
|
|
224
|
-
if subcommand == "stat"
|
|
249
|
+
if subcommand == "stat" || subcommand == "exec"
|
|
225
250
|
ENV["RPERF_STAT"] = "1"
|
|
226
251
|
ENV["RPERF_STAT_COMMAND"] = ARGV.join(" ")
|
|
227
252
|
ENV["RPERF_STAT_REPORT"] = "1" if stat_report
|