rperf 0.8.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE +21 -0
- data/README.md +12 -10
- data/docs/help.md +106 -9
- data/exe/rperf +35 -6
- data/ext/rperf/rperf.c +129 -43
- data/lib/rperf/active_job.rb +1 -0
- data/lib/rperf/version.rb +1 -1
- data/lib/rperf/viewer.rb +66 -17
- data/lib/rperf.rb +514 -60
- metadata +5 -4
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 7182c353301aa38afde2d65928219c46cee3e777b49842bf60331dd40e7b3ab2
|
|
4
|
+
data.tar.gz: ebb30b807d9b86a7ff48090bc5990d6bdcc0c9951f80b21758df2413bdbd39f2
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: d70dde1af1a4c3c9cec02e20981038c465facea4d3da32c32ce26e16cfc402de01f594c0b41386e3095f53603631f930fb2bd133d23b8a5757662d40f117d2a5
|
|
7
|
+
data.tar.gz: b7868f0237f84a47bda6286c99b7056f47738fb235ede0d21ac8f950feb9b891b1677dff38f25294393766cbe4f742b2eeffe73a43dded92562926db8d5bd392
|
data/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Koichi Sasada
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
data/README.md
CHANGED
|
@@ -69,7 +69,7 @@ rperf stat ruby app.rb
|
|
|
69
69
|
rperf record ruby app.rb # → rperf.json.gz (cpu mode, default)
|
|
70
70
|
rperf record -m wall ruby server.rb # wall mode
|
|
71
71
|
|
|
72
|
-
# View results in browser
|
|
72
|
+
# View results in browser
|
|
73
73
|
rperf report # open rperf.json.gz in viewer
|
|
74
74
|
rperf report --top profile.json.gz # print top functions to terminal
|
|
75
75
|
|
|
@@ -77,6 +77,8 @@ rperf report --top profile.json.gz # print top functions to terminal
|
|
|
77
77
|
rperf diff before.json.gz after.json.gz # open diff in browser
|
|
78
78
|
```
|
|
79
79
|
|
|
80
|
+
On `rperf report`, you can see the profile result like this page: [rprof viewer](https://ko1.github.io/rperf/examples/cpu_intensive_profile.html)
|
|
81
|
+
|
|
80
82
|
### Ruby API
|
|
81
83
|
|
|
82
84
|
```ruby
|
|
@@ -156,7 +158,7 @@ Timer (signal or thread) VM thread (postponed job)
|
|
|
156
158
|
record(backtrace, weight)
|
|
157
159
|
```
|
|
158
160
|
|
|
159
|
-
On Linux, the timer uses `timer_create` + signal delivery
|
|
161
|
+
On Linux, the timer uses `timer_create` + signal delivery to a dedicated worker thread.
|
|
160
162
|
On other platforms, a dedicated pthread with `nanosleep` is used.
|
|
161
163
|
|
|
162
164
|
If a safepoint is delayed, the sample carries proportionally more weight. The total weight equals the total time, accurately distributed across call stacks.
|
|
@@ -170,16 +172,16 @@ If a safepoint is delayed, the sample carries proportionally more weight. The to
|
|
|
170
172
|
|
|
171
173
|
Use `cpu` to find what consumes CPU. Use `wall` to find what makes things slow (I/O, GVL contention, GC).
|
|
172
174
|
|
|
173
|
-
### GVL and GC Labels
|
|
175
|
+
### GVL and GC Labels
|
|
174
176
|
|
|
175
177
|
rperf hooks GVL and GC events to attribute non-CPU time. These are recorded as labels on samples rather than synthetic stack frames:
|
|
176
178
|
|
|
177
|
-
| Label | Meaning |
|
|
178
|
-
|
|
179
|
-
| `%GVL
|
|
180
|
-
| `%GVL
|
|
181
|
-
| `%GC
|
|
182
|
-
| `%GC
|
|
179
|
+
| Label (key=value) | Mode | Meaning |
|
|
180
|
+
|-------|------|---------|
|
|
181
|
+
| `%GVL=blocked` | wall only | Off-GVL time (I/O, sleep, C extension releasing GVL) |
|
|
182
|
+
| `%GVL=wait` | wall only | Waiting to reacquire the GVL (contention) |
|
|
183
|
+
| `%GC=mark` | cpu and wall | Time in GC mark phase (wall time) |
|
|
184
|
+
| `%GC=sweep` | cpu and wall | Time in GC sweep phase (wall time) |
|
|
183
185
|
|
|
184
186
|
## Why rperf?
|
|
185
187
|
|
|
@@ -189,13 +191,13 @@ rperf hooks GVL and GC events to attribute non-CPU time. These are recorded as l
|
|
|
189
191
|
- **Low overhead** — Signal-based timer on Linux (no extra thread). ~1–5 us per sample.
|
|
190
192
|
- **Zero code changes** — Profile any Ruby program via CLI or environment variables. Drop-in for Rails, too.
|
|
191
193
|
- **`perf`-like CLI** — `record`, `stat`, `report`, `diff` — if you know Linux perf, you already know rperf.
|
|
194
|
+
- **Multi-process** — automatically profiles forked/spawned Ruby child processes (e.g., Unicorn/Puma workers). Use `--no-inherit` to disable.
|
|
192
195
|
|
|
193
196
|
### Limitations
|
|
194
197
|
|
|
195
198
|
- **Method-level only** — no line-level granularity.
|
|
196
199
|
- **Ruby >= 3.4.0** — uses recent VM internals (postponed jobs, thread event hooks).
|
|
197
200
|
- **POSIX only** — Linux, macOS. No Windows.
|
|
198
|
-
- **No fork following** — profiling stops in fork(2) child processes (the child can start a new session).
|
|
199
201
|
|
|
200
202
|
|
|
201
203
|
## Output Formats
|
data/docs/help.md
CHANGED
|
@@ -12,7 +12,9 @@ POSIX systems (Linux, macOS). Requires Ruby >= 3.4.0.
|
|
|
12
12
|
rperf stat [options] command [args...]
|
|
13
13
|
rperf exec [options] command [args...]
|
|
14
14
|
rperf report [options] [file]
|
|
15
|
+
rperf diff [options] base target
|
|
15
16
|
rperf help
|
|
17
|
+
rperf -v / --version
|
|
16
18
|
|
|
17
19
|
### record: Profile and save to file.
|
|
18
20
|
|
|
@@ -24,6 +26,8 @@ POSIX systems (Linux, macOS). Requires Ruby >= 3.4.0.
|
|
|
24
26
|
(same as --format=text --output=/dev/stdout)
|
|
25
27
|
--signal VALUE Timer signal (Linux only): signal number, or 'false'
|
|
26
28
|
for nanosleep thread (default: auto)
|
|
29
|
+
--no-inherit Do not profile forked/spawned child processes
|
|
30
|
+
--no-aggregate Disable C-level sample aggregation (raw per-sample data)
|
|
27
31
|
-v, --verbose Print sampling statistics to stderr
|
|
28
32
|
|
|
29
33
|
### stat: Run command and print performance summary to stderr.
|
|
@@ -36,6 +40,8 @@ Uses wall mode by default. No file output by default.
|
|
|
36
40
|
--report Include flat/cumulative profile tables in output
|
|
37
41
|
--signal VALUE Timer signal (Linux only): signal number, or 'false'
|
|
38
42
|
for nanosleep thread (default: auto)
|
|
43
|
+
--no-inherit Do not profile forked/spawned child processes
|
|
44
|
+
--no-aggregate Disable C-level sample aggregation (raw per-sample data)
|
|
39
45
|
-v, --verbose Print additional sampling statistics
|
|
40
46
|
|
|
41
47
|
Shows: user/sys/real time, time breakdown (CPU execution, GVL blocked,
|
|
@@ -44,6 +50,10 @@ Lines are prefixed: `[Rperf]` for sampling-derived data, `[Ruby ]` for
|
|
|
44
50
|
runtime info, `[OS ]` for OS-level info.
|
|
45
51
|
Use --report to add flat and cumulative top-50 function tables.
|
|
46
52
|
|
|
53
|
+
When child processes are profiled (default), the stat output shows
|
|
54
|
+
aggregated data from all processes and includes a "Ruby processes profiled"
|
|
55
|
+
count. Use --no-inherit to disable child process tracking.
|
|
56
|
+
|
|
47
57
|
### exec: Run command and print full profile report to stderr.
|
|
48
58
|
|
|
49
59
|
Like `stat --report`. Uses wall mode by default. No file output by default.
|
|
@@ -53,6 +63,8 @@ Like `stat --report`. Uses wall mode by default. No file output by default.
|
|
|
53
63
|
-m, --mode MODE cpu or wall (default: wall)
|
|
54
64
|
--signal VALUE Timer signal (Linux only): signal number, or 'false'
|
|
55
65
|
for nanosleep thread (default: auto)
|
|
66
|
+
--no-inherit Do not profile forked/spawned child processes
|
|
67
|
+
--no-aggregate Disable C-level sample aggregation (raw per-sample data)
|
|
56
68
|
-v, --verbose Print additional sampling statistics
|
|
57
69
|
|
|
58
70
|
Shows: user/sys/real time, time breakdown, GC/memory/OS stats, profiler overhead,
|
|
@@ -62,17 +74,70 @@ and flat/cumulative top-50 function tables.
|
|
|
62
74
|
|
|
63
75
|
--top Print top functions by flat time
|
|
64
76
|
--text Print text report
|
|
77
|
+
--html Output static HTML viewer to stdout
|
|
65
78
|
|
|
66
79
|
Default (no flag): opens interactive web UI in browser.
|
|
67
80
|
Default file: rperf.json.gz
|
|
68
81
|
|
|
69
|
-
|
|
82
|
+
`--html` generates an HTML file with profile data embedded inline.
|
|
83
|
+
No server is needed — open it directly in a browser. d3 and
|
|
84
|
+
d3-flamegraph are loaded from CDN, so an internet connection is
|
|
85
|
+
required on first viewing. Useful for sharing or hosting on static
|
|
86
|
+
sites (e.g., GitHub Pages).
|
|
87
|
+
|
|
88
|
+
rperf report --html profile.json.gz > report.html
|
|
89
|
+
|
|
90
|
+
### diff: Compare two profiles (target - base). Requires Go.
|
|
91
|
+
|
|
92
|
+
Accepts `.json.gz` (auto-converted to pprof) or `.pb.gz` files.
|
|
70
93
|
|
|
71
94
|
--top Print top functions by diff
|
|
72
95
|
--text Print text diff report
|
|
73
96
|
|
|
74
97
|
Default (no flag): opens diff in browser.
|
|
75
98
|
|
|
99
|
+
### Multi-process profiling
|
|
100
|
+
|
|
101
|
+
By default, rperf profiles forked and spawned Ruby child processes.
|
|
102
|
+
Profiles from all processes are merged into a single output. Each child
|
|
103
|
+
process's samples are tagged with a `%pid` label for per-process filtering.
|
|
104
|
+
|
|
105
|
+
# Profile a preforking server (Unicorn, Puma, etc.)
|
|
106
|
+
rperf stat -m wall bundle exec unicorn
|
|
107
|
+
rperf record -m wall -o profile.json.gz bundle exec unicorn
|
|
108
|
+
|
|
109
|
+
# Profile with fork
|
|
110
|
+
rperf stat ruby -e '4.times { fork { work } }; Process.waitall'
|
|
111
|
+
|
|
112
|
+
# Disable child process tracking
|
|
113
|
+
rperf stat --no-inherit ruby app.rb
|
|
114
|
+
|
|
115
|
+
How it works:
|
|
116
|
+
|
|
117
|
+
- On fork: `Process._fork` hook restarts profiling in the child and sets
|
|
118
|
+
a `%pid` label. When the child exits, its profile is saved to a
|
|
119
|
+
temporary session directory.
|
|
120
|
+
- On spawn/system: The spawned Ruby process inherits `RUBYLIB` (pointing
|
|
121
|
+
to rperf's lib directory) and `RUBYOPT=-rrperf`, plus `RPERF_SESSION_DIR`.
|
|
122
|
+
It auto-starts profiling and writes its profile to the session directory.
|
|
123
|
+
- When the root process exits, it aggregates all profiles from the
|
|
124
|
+
session directory into a single output (stat report or file).
|
|
125
|
+
- The session directory is cleaned up after aggregation.
|
|
126
|
+
|
|
127
|
+
Limitations:
|
|
128
|
+
|
|
129
|
+
- Daemon children (Process.daemon) that outlive the parent will have
|
|
130
|
+
their profiles lost, since the parent aggregates and cleans up the
|
|
131
|
+
session directory at exit.
|
|
132
|
+
- Cross-process snapshots (Rperf.snapshot) are not supported; snapshots
|
|
133
|
+
only cover the current process.
|
|
134
|
+
- Only Ruby child processes are profiled; non-Ruby children (shell
|
|
135
|
+
scripts, Python, etc.) are not affected.
|
|
136
|
+
- Child processes that use rperf independently (Rperf.start in their
|
|
137
|
+
own code) will conflict with the inherited auto-start session.
|
|
138
|
+
Such programs should clear RPERF_ENABLED from their environment
|
|
139
|
+
before requiring rperf.
|
|
140
|
+
|
|
76
141
|
### Examples
|
|
77
142
|
|
|
78
143
|
rperf record ruby app.rb
|
|
@@ -84,6 +149,8 @@ Default (no flag): opens diff in browser.
|
|
|
84
149
|
rperf stat ruby app.rb
|
|
85
150
|
rperf stat --report ruby app.rb
|
|
86
151
|
rperf stat -o profile.pb.gz ruby app.rb
|
|
152
|
+
rperf stat -m wall bundle exec unicorn
|
|
153
|
+
rperf stat --no-inherit ruby app.rb
|
|
87
154
|
rperf exec ruby app.rb
|
|
88
155
|
rperf exec -m cpu ruby app.rb
|
|
89
156
|
rperf report
|
|
@@ -114,12 +181,17 @@ Rperf.save("profile.txt", data)
|
|
|
114
181
|
|
|
115
182
|
### Rperf.start parameters
|
|
116
183
|
|
|
117
|
-
frequency: Sampling frequency in Hz (Integer, default: 1000)
|
|
184
|
+
frequency: Sampling frequency in Hz (Integer, 1..10000, default: 1000)
|
|
118
185
|
mode: :cpu or :wall (Symbol, default: :cpu)
|
|
119
186
|
output: File path to write on stop (String or nil)
|
|
120
187
|
verbose: Print statistics to stderr (true/false, default: false)
|
|
121
188
|
format: :json, :pprof, :collapsed, :text, or nil for auto-detect (Symbol or nil)
|
|
122
189
|
defer: Start with timer paused; use Rperf.profile to activate (default: false)
|
|
190
|
+
inherit: Child process tracking: :fork (default), true (fork+spawn), false (none)
|
|
191
|
+
Note: CLI defaults to true (--no-inherit to disable)
|
|
192
|
+
signal: Timer signal (Linux only): nil (default, auto), false (use nanosleep),
|
|
193
|
+
or a signal number (Integer)
|
|
194
|
+
aggregate: Aggregate samples in C (default: true). false returns raw per-sample data
|
|
123
195
|
|
|
124
196
|
### Rperf.stop return value
|
|
125
197
|
|
|
@@ -128,7 +200,8 @@ nil if profiler was not running; otherwise a Hash:
|
|
|
128
200
|
```ruby
|
|
129
201
|
{ mode: :cpu, # or :wall
|
|
130
202
|
frequency: 500,
|
|
131
|
-
|
|
203
|
+
trigger_count: 1300, # number of timer triggers
|
|
204
|
+
sampling_count: 1234, # number of timer callbacks (may differ from trigger_count)
|
|
132
205
|
sampling_time_ns: 56789,
|
|
133
206
|
detected_thread_count: 4, # threads seen during profiling
|
|
134
207
|
start_time_ns: 17740..., # CLOCK_REALTIME epoch nanos
|
|
@@ -153,7 +226,9 @@ Only works in aggregate mode (the default). Returns nil if not profiling.
|
|
|
153
226
|
|
|
154
227
|
When `clear: true` is given, resets aggregated data after taking the snapshot.
|
|
155
228
|
This enables interval-based profiling where each snapshot covers only the
|
|
156
|
-
period since the last clear.
|
|
229
|
+
period since the last clear. Note: the frame table is intentionally retained
|
|
230
|
+
(frame IDs must stay stable for GC safety and thread data consistency), so
|
|
231
|
+
`unique_frames` may accumulate across intervals.
|
|
157
232
|
|
|
158
233
|
```ruby
|
|
159
234
|
Rperf.start(frequency: 1000)
|
|
@@ -250,6 +325,18 @@ running). Raises `RuntimeError` if not started, `ArgumentError` without block.
|
|
|
250
325
|
|
|
251
326
|
Returns the current thread's labels as a Hash. Empty hash if none set.
|
|
252
327
|
|
|
328
|
+
### Rperf.load(path)
|
|
329
|
+
|
|
330
|
+
Loads a `.json.gz` or `.json` profile file (saved by `rperf record` or `Rperf.save`)
|
|
331
|
+
and returns the parsed data hash (same format as `Rperf.stop` / `Rperf.snapshot`).
|
|
332
|
+
Gzip is auto-detected by magic bytes, so both compressed and plain files work.
|
|
333
|
+
Warns to stderr if the file was saved by a different rperf version.
|
|
334
|
+
|
|
335
|
+
```ruby
|
|
336
|
+
data = Rperf.load("rperf.json.gz") # gzip compressed
|
|
337
|
+
data = Rperf.load("profile.json") # plain text JSON
|
|
338
|
+
```
|
|
339
|
+
|
|
253
340
|
### Rperf.save(path, data, format: nil)
|
|
254
341
|
|
|
255
342
|
Writes data to path. format: :json, :pprof, :collapsed, or :text.
|
|
@@ -389,12 +476,12 @@ Tag keys are sorted alphabetically (`%`-prefixed VM state keys appear first).
|
|
|
389
476
|
|
|
390
477
|
### json (default) — rperf native format
|
|
391
478
|
|
|
392
|
-
|
|
479
|
+
JSON representation of the internal data hash
|
|
393
480
|
(the same hash returned by `Rperf.stop` / `Rperf.snapshot` — see
|
|
394
481
|
"Return value" above for the full structure).
|
|
395
482
|
Preserves all data including labels, VM state, thread info, and statistics.
|
|
396
483
|
Readable by non-Ruby tools (Python, jq, etc.).
|
|
397
|
-
Extension convention: `.json.gz`
|
|
484
|
+
Extension convention: `.json.gz` (gzip-compressed, default) or `.json` (plain text).
|
|
398
485
|
View with: `rperf report` (opens rperf viewer in browser, no Go required).
|
|
399
486
|
Load programmatically: `data = Rperf.load("rperf.json.gz")`
|
|
400
487
|
|
|
@@ -459,7 +546,8 @@ Example output:
|
|
|
459
546
|
|
|
460
547
|
Format is auto-detected from the output file extension:
|
|
461
548
|
|
|
462
|
-
.json.gz → json (rperf native, default)
|
|
549
|
+
.json.gz → json (rperf native, gzip compressed, default)
|
|
550
|
+
.json → json (plain text, readable by jq)
|
|
463
551
|
.pb.gz → pprof
|
|
464
552
|
.collapsed → collapsed
|
|
465
553
|
.txt → text
|
|
@@ -485,8 +573,8 @@ In both modes, GC state labels are recorded:
|
|
|
485
573
|
- **%GC=mark** — Time spent in GC marking phase (wall time).
|
|
486
574
|
- **%GC=sweep** — Time spent in GC sweeping phase (wall time).
|
|
487
575
|
|
|
488
|
-
These labels appear in `label_sets` (e.g., `{"%GVL" => "blocked"}`,
|
|
489
|
-
`{"%GC" => "mark"}`) and are written into pprof sample labels.
|
|
576
|
+
These labels appear in `label_sets` (e.g., `{:"%GVL" => "blocked"}`,
|
|
577
|
+
`{:"%GC" => "mark"}`) and are written into pprof sample labels.
|
|
490
578
|
|
|
491
579
|
To add VM state as frames in flamegraphs, use pprof tag options:
|
|
492
580
|
|
|
@@ -581,6 +669,15 @@ Used internally by the CLI to pass options to the auto-started profiler:
|
|
|
581
669
|
RPERF_SIGNAL=N|false Timer signal number or 'false' for nanosleep (Linux only)
|
|
582
670
|
RPERF_STAT=1 Enable stat mode (used by rperf stat)
|
|
583
671
|
RPERF_STAT_REPORT=1 Include profile tables in stat output
|
|
672
|
+
RPERF_AGGREGATE=0 Disable C-level sample aggregation (raw mode)
|
|
673
|
+
RPERF_DEFER=1 Start with timer paused; use Rperf.profile to activate
|
|
674
|
+
RPERF_TMPDIR=path Base directory for session directories (overrides default tmpdir)
|
|
675
|
+
|
|
676
|
+
Internal variables (set automatically by the CLI — not for manual use):
|
|
677
|
+
|
|
678
|
+
RPERF_SESSION_DIR=path Session directory for multi-process profiling
|
|
679
|
+
RPERF_ROOT_PROCESS=pid Marks the root aggregating process
|
|
680
|
+
RPERF_STAT_COMMAND=str Command string displayed in stat output
|
|
584
681
|
|
|
585
682
|
## TIPS
|
|
586
683
|
|
data/exe/rperf
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env ruby
|
|
2
2
|
require "optparse"
|
|
3
3
|
require "socket"
|
|
4
|
+
require "fileutils"
|
|
4
5
|
|
|
5
6
|
def find_available_port
|
|
6
7
|
server = TCPServer.new("localhost", 0)
|
|
@@ -23,6 +24,12 @@ def run_pprof_subcommand(name, banner, min_files:)
|
|
|
23
24
|
mode = :text
|
|
24
25
|
end
|
|
25
26
|
|
|
27
|
+
if min_files == 1
|
|
28
|
+
opts.on("--html", "Output static HTML viewer to stdout") do
|
|
29
|
+
mode = :html
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
26
33
|
opts.on("-h", "--help", "Show this help") do
|
|
27
34
|
puts opts
|
|
28
35
|
exit
|
|
@@ -31,7 +38,7 @@ def run_pprof_subcommand(name, banner, min_files:)
|
|
|
31
38
|
|
|
32
39
|
begin
|
|
33
40
|
parser.order!(ARGV)
|
|
34
|
-
rescue OptionParser::InvalidOption, OptionParser::MissingArgument => e
|
|
41
|
+
rescue OptionParser::InvalidOption, OptionParser::MissingArgument, OptionParser::NeedlessArgument => e
|
|
35
42
|
$stderr.puts e.message
|
|
36
43
|
$stderr.puts parser
|
|
37
44
|
exit 1
|
|
@@ -63,13 +70,15 @@ def run_pprof_subcommand(name, banner, min_files:)
|
|
|
63
70
|
yield mode, files
|
|
64
71
|
end
|
|
65
72
|
|
|
66
|
-
|
|
73
|
+
def self.help_text
|
|
74
|
+
@help_text ||= File.read(File.expand_path("../docs/help.md", __dir__))
|
|
75
|
+
end
|
|
67
76
|
|
|
68
77
|
USAGE = "Usage: rperf record [options] command [args...]\n" \
|
|
69
78
|
" rperf stat [options] command [args...]\n" \
|
|
70
79
|
" rperf exec [options] command [args...]\n" \
|
|
71
80
|
" rperf report [options] [file]\n" \
|
|
72
|
-
" rperf diff [options] base
|
|
81
|
+
" rperf diff [options] base target\n" \
|
|
73
82
|
" rperf help\n"
|
|
74
83
|
|
|
75
84
|
# Handle top-level flags before subcommand parsing
|
|
@@ -89,7 +98,7 @@ subcommand = ARGV.shift
|
|
|
89
98
|
|
|
90
99
|
case subcommand
|
|
91
100
|
when "help"
|
|
92
|
-
puts
|
|
101
|
+
puts help_text
|
|
93
102
|
exit
|
|
94
103
|
when "report"
|
|
95
104
|
run_pprof_subcommand("report",
|
|
@@ -114,6 +123,9 @@ when "report"
|
|
|
114
123
|
when :text
|
|
115
124
|
$stdout.puts Rperf::Text.encode(data)
|
|
116
125
|
exit
|
|
126
|
+
when :html
|
|
127
|
+
$stdout.puts Rperf::Viewer.render_static_html(data)
|
|
128
|
+
exit
|
|
117
129
|
end
|
|
118
130
|
|
|
119
131
|
port = find_available_port
|
|
@@ -225,6 +237,7 @@ signal = nil
|
|
|
225
237
|
verbose = false
|
|
226
238
|
aggregate = true
|
|
227
239
|
stat_report = (subcommand == "exec")
|
|
240
|
+
inherit = true
|
|
228
241
|
|
|
229
242
|
parser = OptionParser.new do |opts|
|
|
230
243
|
opts.banner = case subcommand
|
|
@@ -279,6 +292,10 @@ parser = OptionParser.new do |opts|
|
|
|
279
292
|
end
|
|
280
293
|
end
|
|
281
294
|
|
|
295
|
+
opts.on("--no-inherit", "Do not profile forked/spawned child processes (default: inherit)") do
|
|
296
|
+
inherit = false
|
|
297
|
+
end
|
|
298
|
+
|
|
282
299
|
opts.on("-v", "--verbose", "Print sampling statistics to stderr") do
|
|
283
300
|
verbose = true
|
|
284
301
|
end
|
|
@@ -293,7 +310,7 @@ end
|
|
|
293
310
|
|
|
294
311
|
begin
|
|
295
312
|
parser.order!(ARGV)
|
|
296
|
-
rescue OptionParser::InvalidOption, OptionParser::MissingArgument => e
|
|
313
|
+
rescue OptionParser::InvalidOption, OptionParser::MissingArgument, OptionParser::NeedlessArgument => e
|
|
297
314
|
$stderr.puts e.message
|
|
298
315
|
$stderr.puts parser
|
|
299
316
|
exit 1
|
|
@@ -328,7 +345,9 @@ if signal && signal != "false"
|
|
|
328
345
|
end
|
|
329
346
|
end
|
|
330
347
|
|
|
331
|
-
# Add lib dir to RUBYLIB so -rrperf can find the
|
|
348
|
+
# Add lib dir to RUBYLIB so -rrperf can find the correct version.
|
|
349
|
+
# RUBYLIB handles spaces in paths safely (PATH_SEPARATOR delimited).
|
|
350
|
+
# RUBYOPT -r<path> does not support spaces, so we use RUBYLIB + -rrperf.
|
|
332
351
|
lib_dir = File.expand_path("../lib", __dir__)
|
|
333
352
|
ENV["RUBYLIB"] = [lib_dir, ENV["RUBYLIB"]].compact.join(File::PATH_SEPARATOR)
|
|
334
353
|
ENV["RUBYOPT"] = "-rrperf #{ENV['RUBYOPT']}".strip
|
|
@@ -347,4 +366,14 @@ if subcommand == "stat" || subcommand == "exec"
|
|
|
347
366
|
ENV["RPERF_STAT_REPORT"] = "1" if stat_report
|
|
348
367
|
end
|
|
349
368
|
|
|
369
|
+
# Multi-process (fork) support: create a session directory for aggregation
|
|
370
|
+
if inherit
|
|
371
|
+
require_relative "../lib/rperf"
|
|
372
|
+
session_dir = Rperf.send(:_create_session_dir, clean_stale: true)
|
|
373
|
+
if session_dir
|
|
374
|
+
ENV["RPERF_ROOT_PROCESS"] = Process.pid.to_s
|
|
375
|
+
ENV["RPERF_SESSION_DIR"] = session_dir
|
|
376
|
+
end
|
|
377
|
+
end
|
|
378
|
+
|
|
350
379
|
exec(*ARGV)
|