sperf 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +125 -0
- data/exe/sperf +457 -0
- data/ext/sperf/extconf.rb +6 -0
- data/ext/sperf/sperf.c +708 -0
- data/lib/sperf.rb +598 -0
- metadata +79 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: aecc23432b8dba72018c524b1fb6653a0c83a44e9e8b2c9af17c87cde20b648b
|
|
4
|
+
data.tar.gz: fbdf1dddcb5b8fef86e358f315a801478bfcad7768d8f28cbcf82726c67d529c
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: ff265b78dd2e237dac4d07a32b7459e07e73d77504347556945878e4addc1ccf89c47c409824591ff800cbe5605a863ae536b200c7f7c5eca5d06bbbd4ca6c05
|
|
7
|
+
data.tar.gz: 950c3860737f37a9d57d15dae9db259635ebe94494096ad06c0489ab6c028827ea2b24d7e0046462e3b57ea546173993b2ed3e0c910bc677828b23b01ed718e9
|
data/README.md
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
<p align="center">
|
|
2
|
+
<img src="docs/logo.svg" alt="sperf logo" width="260">
|
|
3
|
+
</p>
|
|
4
|
+
|
|
5
|
+
# sperf
|
|
6
|
+
|
|
7
|
+
A safepoint-based sampling performance profiler for Ruby. Uses actual time deltas as sample weights to correct safepoint bias.
|
|
8
|
+
|
|
9
|
+
- Requires Ruby >= 3.4.0
|
|
10
|
+
- Output: pprof protobuf, collapsed stacks, or text report
|
|
11
|
+
- Modes: CPU time (per-thread) and wall time (with GVL/GC tracking)
|
|
12
|
+
|
|
13
|
+
## Quick Start
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
gem install sperf
|
|
17
|
+
|
|
18
|
+
# Performance summary (wall mode, prints to stderr)
|
|
19
|
+
sperf stat ruby app.rb
|
|
20
|
+
|
|
21
|
+
# Profile to file
|
|
22
|
+
sperf record ruby app.rb # → sperf.data (pprof, cpu mode)
|
|
23
|
+
sperf record -m wall -o profile.pb.gz ruby server.rb # wall mode, custom output
|
|
24
|
+
|
|
25
|
+
# View results (report/diff require Go: https://go.dev/dl/)
|
|
26
|
+
sperf report # open sperf.data in browser
|
|
27
|
+
sperf report --top profile.pb.gz # print top functions to terminal
|
|
28
|
+
|
|
29
|
+
# Compare two profiles
|
|
30
|
+
sperf diff before.pb.gz after.pb.gz # open diff in browser
|
|
31
|
+
sperf diff --top before.pb.gz after.pb.gz # print diff to terminal
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
### Ruby API
|
|
35
|
+
|
|
36
|
+
```ruby
|
|
37
|
+
require "sperf"
|
|
38
|
+
|
|
39
|
+
# Block form — profiles and saves to file
|
|
40
|
+
Sperf.start(output: "profile.pb.gz", frequency: 500, mode: :cpu) do
|
|
41
|
+
# code to profile
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Manual start/stop
|
|
45
|
+
Sperf.start(frequency: 1000, mode: :wall)
|
|
46
|
+
# ...
|
|
47
|
+
data = Sperf.stop
|
|
48
|
+
Sperf.save("profile.pb.gz", data)
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
### Environment Variables
|
|
52
|
+
|
|
53
|
+
Profile without code changes (e.g., Rails):
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
SPERF_ENABLED=1 SPERF_MODE=wall SPERF_OUTPUT=profile.pb.gz ruby app.rb
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
Run `sperf help` for full documentation (all options, output interpretation, diagnostics guide).
|
|
60
|
+
|
|
61
|
+
## Subcommands
|
|
62
|
+
|
|
63
|
+
| Command | Description |
|
|
64
|
+
|---------|-------------|
|
|
65
|
+
| `sperf record` | Profile a command and save to file |
|
|
66
|
+
| `sperf stat` | Profile a command and print summary to stderr |
|
|
67
|
+
| `sperf report` | Open pprof profile with `go tool pprof` (requires Go) |
|
|
68
|
+
| `sperf diff` | Compare two pprof profiles (requires Go) |
|
|
69
|
+
| `sperf help` | Show full reference documentation |
|
|
70
|
+
|
|
71
|
+
## How It Works
|
|
72
|
+
|
|
73
|
+
### The Problem
|
|
74
|
+
|
|
75
|
+
Ruby's sampling profilers collect stack traces at **safepoints**, not at the exact timer tick. Traditional profilers assign equal weight to every sample, so if a safepoint is delayed 5ms, that delay is invisible.
|
|
76
|
+
|
|
77
|
+
### The Solution
|
|
78
|
+
|
|
79
|
+
sperf uses **time deltas as sample weights**:
|
|
80
|
+
|
|
81
|
+
```
|
|
82
|
+
Timer thread (pthread) VM thread (postponed job)
|
|
83
|
+
───────────────────── ────────────────────────
|
|
84
|
+
every 1/frequency sec: at next safepoint:
|
|
85
|
+
rb_postponed_job_trigger() → sperf_sample_job()
|
|
86
|
+
time_now = read_clock()
|
|
87
|
+
weight = time_now - prev_time
|
|
88
|
+
record(backtrace, weight)
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
If a safepoint is delayed, the sample carries proportionally more weight. The total weight equals the total time, accurately distributed across call stacks.
|
|
92
|
+
|
|
93
|
+
### Modes
|
|
94
|
+
|
|
95
|
+
| Mode | Clock | What it measures |
|
|
96
|
+
|------|-------|------------------|
|
|
97
|
+
| `cpu` (default) | `CLOCK_THREAD_CPUTIME_ID` | CPU time consumed (excludes sleep/I/O) |
|
|
98
|
+
| `wall` | `CLOCK_MONOTONIC` | Real elapsed time (includes everything) |
|
|
99
|
+
|
|
100
|
+
Use `cpu` to find what consumes CPU. Use `wall` to find what makes things slow (I/O, GVL contention, GC).
|
|
101
|
+
|
|
102
|
+
### Synthetic Frames (wall mode)
|
|
103
|
+
|
|
104
|
+
sperf hooks GVL and GC events to attribute non-CPU time:
|
|
105
|
+
|
|
106
|
+
| Frame | Meaning |
|
|
107
|
+
|-------|---------|
|
|
108
|
+
| `[GVL blocked]` | Off-GVL time (I/O, sleep, C extension releasing GVL) |
|
|
109
|
+
| `[GVL wait]` | Waiting to reacquire the GVL (contention) |
|
|
110
|
+
| `[GC marking]` | Time in GC mark phase |
|
|
111
|
+
| `[GC sweeping]` | Time in GC sweep phase |
|
|
112
|
+
|
|
113
|
+
## Output Formats
|
|
114
|
+
|
|
115
|
+
| Format | Extension | Use case |
|
|
116
|
+
|--------|-----------|----------|
|
|
117
|
+
| pprof (default) | `.pb.gz` | `sperf report`, `go tool pprof`, speedscope |
|
|
118
|
+
| collapsed | `.collapsed` | FlameGraph (`flamegraph.pl`), speedscope |
|
|
119
|
+
| text | `.txt` | Human/AI-readable flat + cumulative report |
|
|
120
|
+
|
|
121
|
+
Format is auto-detected from extension, or set explicitly with `--format`.
|
|
122
|
+
|
|
123
|
+
## License
|
|
124
|
+
|
|
125
|
+
MIT
|
data/exe/sperf
ADDED
|
@@ -0,0 +1,457 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
require "optparse"
|
|
3
|
+
|
|
4
|
+
HELP_TEXT = <<'HELP'
|
|
5
|
+
sperf - safepoint-based sampling performance profiler for Ruby
|
|
6
|
+
|
|
7
|
+
OVERVIEW
|
|
8
|
+
|
|
9
|
+
sperf profiles Ruby programs by sampling at safepoints and using actual
|
|
10
|
+
time deltas (nanoseconds) as weights to correct safepoint bias.
|
|
11
|
+
POSIX systems (Linux, macOS). Requires Ruby >= 3.4.0.
|
|
12
|
+
|
|
13
|
+
CLI USAGE
|
|
14
|
+
|
|
15
|
+
sperf record [options] command [args...]
|
|
16
|
+
sperf stat [options] command [args...]
|
|
17
|
+
sperf report [options] [file]
|
|
18
|
+
sperf help
|
|
19
|
+
|
|
20
|
+
record: Profile and save to file.
|
|
21
|
+
-o, --output PATH Output file (default: sperf.data)
|
|
22
|
+
-f, --frequency HZ Sampling frequency in Hz (default: 1000)
|
|
23
|
+
-m, --mode MODE cpu or wall (default: cpu)
|
|
24
|
+
--format FORMAT pprof, collapsed, or text (default: auto from extension)
|
|
25
|
+
-v, --verbose Print sampling statistics to stderr
|
|
26
|
+
|
|
27
|
+
stat: Run command and print performance summary to stderr.
|
|
28
|
+
Always uses wall mode. No file output by default.
|
|
29
|
+
-o, --output PATH Also save profile to file (default: none)
|
|
30
|
+
-f, --frequency HZ Sampling frequency in Hz (default: 1000)
|
|
31
|
+
-v, --verbose Print additional sampling statistics
|
|
32
|
+
|
|
33
|
+
Shows: user/sys/real time, time breakdown (CPU execution, GVL blocked,
|
|
34
|
+
GVL wait, GC marking, GC sweeping), and top 5 hot functions.
|
|
35
|
+
|
|
36
|
+
report: Open pprof profile with go tool pprof. Requires Go.
|
|
37
|
+
--top Print top functions by flat time
|
|
38
|
+
--text Print text report
|
|
39
|
+
Default (no flag): opens interactive web UI in browser.
|
|
40
|
+
Default file: sperf.data
|
|
41
|
+
|
|
42
|
+
diff: Compare two pprof profiles (target - base). Requires Go.
|
|
43
|
+
--top Print top functions by diff
|
|
44
|
+
--text Print text diff report
|
|
45
|
+
Default (no flag): opens diff in browser.
|
|
46
|
+
|
|
47
|
+
Examples:
|
|
48
|
+
sperf record ruby app.rb
|
|
49
|
+
sperf record -o profile.pb.gz ruby app.rb
|
|
50
|
+
sperf record -m wall -f 500 -o profile.pb.gz ruby server.rb
|
|
51
|
+
sperf record -o profile.collapsed ruby app.rb
|
|
52
|
+
sperf record -o profile.txt ruby app.rb
|
|
53
|
+
sperf stat ruby app.rb
|
|
54
|
+
sperf stat -o profile.pb.gz ruby app.rb
|
|
55
|
+
sperf report
|
|
56
|
+
sperf report --top profile.pb.gz
|
|
57
|
+
sperf diff before.pb.gz after.pb.gz
|
|
58
|
+
sperf diff --top before.pb.gz after.pb.gz
|
|
59
|
+
|
|
60
|
+
RUBY API
|
|
61
|
+
|
|
62
|
+
require "sperf"
|
|
63
|
+
|
|
64
|
+
# Block form (recommended) — profiles the block and writes to file
|
|
65
|
+
Sperf.start(output: "profile.pb.gz", frequency: 500, mode: :cpu) do
|
|
66
|
+
# code to profile
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Manual start/stop — returns data hash for programmatic use
|
|
70
|
+
Sperf.start(frequency: 1000, mode: :wall)
|
|
71
|
+
# ... code to profile ...
|
|
72
|
+
data = Sperf.stop
|
|
73
|
+
|
|
74
|
+
# Save data to file later
|
|
75
|
+
Sperf.save("profile.pb.gz", data)
|
|
76
|
+
Sperf.save("profile.collapsed", data)
|
|
77
|
+
Sperf.save("profile.txt", data)
|
|
78
|
+
|
|
79
|
+
Sperf.start parameters:
|
|
80
|
+
frequency: Sampling frequency in Hz (Integer, default: 1000)
|
|
81
|
+
mode: :cpu or :wall (Symbol, default: :cpu)
|
|
82
|
+
output: File path to write on stop (String or nil)
|
|
83
|
+
verbose: Print statistics to stderr (true/false, default: false)
|
|
84
|
+
format: :pprof, :collapsed, :text, or nil for auto-detect (Symbol or nil)
|
|
85
|
+
|
|
86
|
+
Sperf.stop return value:
|
|
87
|
+
nil if profiler was not running; otherwise a Hash:
|
|
88
|
+
{ mode: :cpu, # or :wall
|
|
89
|
+
frequency: 500,
|
|
90
|
+
sampling_count: 1234,
|
|
91
|
+
sampling_time_ns: 56789,
|
|
92
|
+
samples: [ # Array of [frames, weight]
|
|
93
|
+
[frames, weight], # frames: [[path, label], ...] deepest-first
|
|
94
|
+
... # weight: Integer (nanoseconds)
|
|
95
|
+
] }
|
|
96
|
+
|
|
97
|
+
Sperf.save(path, data, format: nil)
|
|
98
|
+
Writes data to path. format: :pprof, :collapsed, or :text.
|
|
99
|
+
nil auto-detects from extension.
|
|
100
|
+
|
|
101
|
+
PROFILING MODES
|
|
102
|
+
|
|
103
|
+
cpu Measures per-thread CPU time via Linux thread clock.
|
|
104
|
+
Use for: finding functions that consume CPU cycles.
|
|
105
|
+
Ignores time spent sleeping, in I/O, or waiting for GVL.
|
|
106
|
+
|
|
107
|
+
wall Measures wall-clock time (CLOCK_MONOTONIC).
|
|
108
|
+
Use for: finding where wall time goes, including I/O, sleep, GVL
|
|
109
|
+
contention, and off-CPU waits.
|
|
110
|
+
Includes synthetic frames (see below).
|
|
111
|
+
|
|
112
|
+
OUTPUT FORMATS
|
|
113
|
+
|
|
114
|
+
pprof (default)
|
|
115
|
+
Gzip-compressed protobuf. Standard pprof format.
|
|
116
|
+
Extension convention: .pb.gz
|
|
117
|
+
View with: go tool pprof, pprof-rs, or speedscope (via import).
|
|
118
|
+
|
|
119
|
+
collapsed
|
|
120
|
+
Plain text. One line per unique stack: "frame1;frame2;...;leaf weight\n"
|
|
121
|
+
Frames are semicolon-separated, bottom-to-top. Weight in nanoseconds.
|
|
122
|
+
Extension convention: .collapsed
|
|
123
|
+
Compatible with: FlameGraph (flamegraph.pl), speedscope.
|
|
124
|
+
|
|
125
|
+
text
|
|
126
|
+
Human/AI-readable report. Shows total time, then flat and cumulative
|
|
127
|
+
top-N tables sorted by weight descending. No parsing needed.
|
|
128
|
+
Extension convention: .txt
|
|
129
|
+
Example output:
|
|
130
|
+
Total: 1523.4ms (cpu)
|
|
131
|
+
Samples: 4820, Frequency: 500Hz
|
|
132
|
+
|
|
133
|
+
Flat:
|
|
134
|
+
820.3ms 53.8% Array#each (app/models/user.rb)
|
|
135
|
+
312.1ms 20.5% JSON.parse (lib/json/parser.rb)
|
|
136
|
+
...
|
|
137
|
+
|
|
138
|
+
Cumulative:
|
|
139
|
+
1401.2ms 92.0% UsersController#index (app/controllers/users_controller.rb)
|
|
140
|
+
...
|
|
141
|
+
|
|
142
|
+
Format is auto-detected from the output file extension:
|
|
143
|
+
.collapsed → collapsed
|
|
144
|
+
.txt → text
|
|
145
|
+
anything else → pprof
|
|
146
|
+
The --format flag (CLI) or format: parameter (API) overrides auto-detect.
|
|
147
|
+
|
|
148
|
+
SYNTHETIC FRAMES
|
|
149
|
+
|
|
150
|
+
In wall mode, sperf adds synthetic frames that represent non-CPU time:
|
|
151
|
+
|
|
152
|
+
[GVL blocked] Time the thread spent off-GVL (I/O, sleep, C extension
|
|
153
|
+
releasing GVL). Attributed to the stack at SUSPENDED.
|
|
154
|
+
[GVL wait] Time the thread spent waiting to reacquire the GVL after
|
|
155
|
+
becoming ready. Indicates GVL contention. Same stack.
|
|
156
|
+
|
|
157
|
+
In both modes, GC time is tracked:
|
|
158
|
+
|
|
159
|
+
[GC marking] Time spent in GC marking phase (wall time).
|
|
160
|
+
[GC sweeping] Time spent in GC sweeping phase (wall time).
|
|
161
|
+
|
|
162
|
+
These always appear as the leaf (deepest) frame in a sample.
|
|
163
|
+
|
|
164
|
+
INTERPRETING RESULTS
|
|
165
|
+
|
|
166
|
+
Weight unit is always nanoseconds regardless of mode.
|
|
167
|
+
|
|
168
|
+
Flat time: weight attributed directly to a function (it was the leaf).
|
|
169
|
+
Cumulative time: weight for all samples where the function appears
|
|
170
|
+
anywhere in the stack.
|
|
171
|
+
|
|
172
|
+
High flat time → the function itself is expensive.
|
|
173
|
+
High cum but low flat → the function calls expensive children.
|
|
174
|
+
|
|
175
|
+
To convert: 1_000_000 ns = 1 ms, 1_000_000_000 ns = 1 s.
|
|
176
|
+
|
|
177
|
+
DIAGNOSING COMMON PERFORMANCE PROBLEMS
|
|
178
|
+
|
|
179
|
+
Problem: high CPU usage
|
|
180
|
+
Mode: cpu
|
|
181
|
+
Look for: functions with high flat cpu time.
|
|
182
|
+
Action: optimize the hot function or call it less.
|
|
183
|
+
|
|
184
|
+
Problem: slow request / high latency
|
|
185
|
+
Mode: wall
|
|
186
|
+
Look for: functions with high cum wall time.
|
|
187
|
+
If [GVL blocked] is dominant → I/O or sleep is the bottleneck.
|
|
188
|
+
If [GVL wait] is dominant → GVL contention; reduce GVL-holding work
|
|
189
|
+
or move work to Ractors / child processes.
|
|
190
|
+
|
|
191
|
+
Problem: GC pauses
|
|
192
|
+
Mode: cpu or wall
|
|
193
|
+
Look for: [GC marking] and [GC sweeping] samples.
|
|
194
|
+
High [GC marking] → too many live objects; reduce allocations.
|
|
195
|
+
High [GC sweeping] → too many short-lived objects; reuse or pool.
|
|
196
|
+
|
|
197
|
+
Problem: multithreaded app slower than expected
|
|
198
|
+
Mode: wall
|
|
199
|
+
Look for: [GVL wait] time across threads.
|
|
200
|
+
High [GVL wait] means threads are serialized on the GVL.
|
|
201
|
+
|
|
202
|
+
READING COLLAPSED STACKS PROGRAMMATICALLY
|
|
203
|
+
|
|
204
|
+
Each line: "bottom_frame;...;top_frame weight_ns"
|
|
205
|
+
Parse with:
|
|
206
|
+
File.readlines("profile.collapsed").each do |line|
|
|
207
|
+
stack, weight = line.rpartition(" ").then { |s, _, w| [s, w.to_i] }
|
|
208
|
+
frames = stack.split(";")
|
|
209
|
+
# frames[0] is bottom (main), frames[-1] is leaf (hot)
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
READING PPROF PROGRAMMATICALLY
|
|
213
|
+
|
|
214
|
+
Decompress + parse protobuf:
|
|
215
|
+
require "zlib"; require "stringio"
|
|
216
|
+
raw = Zlib::GzipReader.new(StringIO.new(File.binread("profile.pb.gz"))).read
|
|
217
|
+
# raw is a protobuf binary; use google-protobuf gem or pprof tooling.
|
|
218
|
+
|
|
219
|
+
Or convert to text with pprof CLI:
|
|
220
|
+
go tool pprof -text profile.pb.gz
|
|
221
|
+
go tool pprof -top profile.pb.gz
|
|
222
|
+
go tool pprof -flame profile.pb.gz
|
|
223
|
+
|
|
224
|
+
ENVIRONMENT VARIABLES
|
|
225
|
+
|
|
226
|
+
Used internally by the CLI to pass options to the auto-started profiler:
|
|
227
|
+
SPERF_ENABLED=1 Enable auto-start on require
|
|
228
|
+
SPERF_OUTPUT=path Output file path
|
|
229
|
+
SPERF_FREQUENCY=hz Sampling frequency
|
|
230
|
+
SPERF_MODE=cpu|wall Profiling mode
|
|
231
|
+
SPERF_FORMAT=fmt pprof, collapsed, or text
|
|
232
|
+
SPERF_VERBOSE=1 Print statistics
|
|
233
|
+
|
|
234
|
+
TIPS
|
|
235
|
+
|
|
236
|
+
- Default frequency (1000 Hz) works well for most cases; overhead is < 0.2%.
|
|
237
|
+
- For long-running production profiling, lower frequency (100-500) reduces overhead further.
|
|
238
|
+
- Profile representative workloads, not micro-benchmarks.
|
|
239
|
+
- Compare cpu and wall profiles to distinguish CPU-bound from I/O-bound.
|
|
240
|
+
- The verbose flag (-v) shows sampling overhead and top functions on stderr.
|
|
241
|
+
HELP
|
|
242
|
+
|
|
243
|
+
USAGE = "Usage: sperf record [options] command [args...]\n" \
|
|
244
|
+
" sperf stat [options] command [args...]\n" \
|
|
245
|
+
" sperf report [options] [file]\n" \
|
|
246
|
+
" sperf diff [options] base.pb.gz target.pb.gz\n" \
|
|
247
|
+
" sperf help\n"
|
|
248
|
+
|
|
249
|
+
# Handle top-level flags before subcommand parsing
|
|
250
|
+
case ARGV.first
|
|
251
|
+
when "-v", "--version"
|
|
252
|
+
require "sperf"
|
|
253
|
+
puts "sperf #{Sperf::VERSION}"
|
|
254
|
+
exit
|
|
255
|
+
when "-h", "--help"
|
|
256
|
+
puts USAGE
|
|
257
|
+
puts
|
|
258
|
+
puts "Run 'sperf help' for full documentation"
|
|
259
|
+
exit
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
subcommand = ARGV.shift
|
|
263
|
+
|
|
264
|
+
case subcommand
|
|
265
|
+
when "help"
|
|
266
|
+
puts HELP_TEXT
|
|
267
|
+
exit
|
|
268
|
+
when "report"
|
|
269
|
+
# sperf report: wrapper around go tool pprof
|
|
270
|
+
report_mode = :http # default: open in browser
|
|
271
|
+
report_file = nil
|
|
272
|
+
|
|
273
|
+
report_parser = OptionParser.new do |opts|
|
|
274
|
+
opts.banner = "Usage: sperf report [options] [file]\n" \
|
|
275
|
+
" Opens pprof profile in browser (default) or prints summary.\n" \
|
|
276
|
+
" Default file: sperf.data"
|
|
277
|
+
|
|
278
|
+
opts.on("--top", "Print top functions by flat time") do
|
|
279
|
+
report_mode = :top
|
|
280
|
+
end
|
|
281
|
+
|
|
282
|
+
opts.on("--text", "Print text report") do
|
|
283
|
+
report_mode = :text
|
|
284
|
+
end
|
|
285
|
+
|
|
286
|
+
opts.on("-h", "--help", "Show this help") do
|
|
287
|
+
puts opts
|
|
288
|
+
exit
|
|
289
|
+
end
|
|
290
|
+
end
|
|
291
|
+
|
|
292
|
+
begin
|
|
293
|
+
report_parser.order!(ARGV)
|
|
294
|
+
rescue OptionParser::InvalidOption => e
|
|
295
|
+
$stderr.puts e.message
|
|
296
|
+
$stderr.puts report_parser
|
|
297
|
+
exit 1
|
|
298
|
+
end
|
|
299
|
+
|
|
300
|
+
report_file = ARGV.shift || "sperf.data"
|
|
301
|
+
|
|
302
|
+
unless File.exist?(report_file)
|
|
303
|
+
$stderr.puts "File not found: #{report_file}"
|
|
304
|
+
exit 1
|
|
305
|
+
end
|
|
306
|
+
|
|
307
|
+
unless system("go", "version", out: File::NULL, err: File::NULL)
|
|
308
|
+
$stderr.puts "'go' command not found. Install Go to use 'sperf report'."
|
|
309
|
+
$stderr.puts " https://go.dev/dl/"
|
|
310
|
+
exit 1
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
case report_mode
|
|
314
|
+
when :top
|
|
315
|
+
exec("go", "tool", "pprof", "-top", report_file)
|
|
316
|
+
when :text
|
|
317
|
+
exec("go", "tool", "pprof", "-text", report_file)
|
|
318
|
+
else
|
|
319
|
+
exec("go", "tool", "pprof", "-http=:0", report_file)
|
|
320
|
+
end
|
|
321
|
+
when "diff"
|
|
322
|
+
# sperf diff: compare two pprof profiles via go tool pprof -diff_base
|
|
323
|
+
diff_mode = :http
|
|
324
|
+
diff_parser = OptionParser.new do |opts|
|
|
325
|
+
opts.banner = "Usage: sperf diff [options] base.pb.gz target.pb.gz\n" \
|
|
326
|
+
" Compare two pprof profiles (shows target - base)."
|
|
327
|
+
|
|
328
|
+
opts.on("--top", "Print top functions by diff") do
|
|
329
|
+
diff_mode = :top
|
|
330
|
+
end
|
|
331
|
+
|
|
332
|
+
opts.on("--text", "Print text diff report") do
|
|
333
|
+
diff_mode = :text
|
|
334
|
+
end
|
|
335
|
+
|
|
336
|
+
opts.on("-h", "--help", "Show this help") do
|
|
337
|
+
puts opts
|
|
338
|
+
exit
|
|
339
|
+
end
|
|
340
|
+
end
|
|
341
|
+
|
|
342
|
+
begin
|
|
343
|
+
diff_parser.order!(ARGV)
|
|
344
|
+
rescue OptionParser::InvalidOption => e
|
|
345
|
+
$stderr.puts e.message
|
|
346
|
+
$stderr.puts diff_parser
|
|
347
|
+
exit 1
|
|
348
|
+
end
|
|
349
|
+
|
|
350
|
+
if ARGV.size < 2
|
|
351
|
+
$stderr.puts "Two profile files required."
|
|
352
|
+
$stderr.puts diff_parser
|
|
353
|
+
exit 1
|
|
354
|
+
end
|
|
355
|
+
|
|
356
|
+
base_file, target_file = ARGV.shift(2)
|
|
357
|
+
|
|
358
|
+
[base_file, target_file].each do |f|
|
|
359
|
+
unless File.exist?(f)
|
|
360
|
+
$stderr.puts "File not found: #{f}"
|
|
361
|
+
exit 1
|
|
362
|
+
end
|
|
363
|
+
end
|
|
364
|
+
|
|
365
|
+
unless system("go", "version", out: File::NULL, err: File::NULL)
|
|
366
|
+
$stderr.puts "'go' command not found. Install Go to use 'sperf diff'."
|
|
367
|
+
$stderr.puts " https://go.dev/dl/"
|
|
368
|
+
exit 1
|
|
369
|
+
end
|
|
370
|
+
|
|
371
|
+
case diff_mode
|
|
372
|
+
when :top
|
|
373
|
+
exec("go", "tool", "pprof", "-top", "-diff_base=#{base_file}", target_file)
|
|
374
|
+
when :text
|
|
375
|
+
exec("go", "tool", "pprof", "-text", "-diff_base=#{base_file}", target_file)
|
|
376
|
+
else
|
|
377
|
+
exec("go", "tool", "pprof", "-http=:0", "-diff_base=#{base_file}", target_file)
|
|
378
|
+
end
|
|
379
|
+
when "record", "stat"
|
|
380
|
+
# continue below
|
|
381
|
+
else
|
|
382
|
+
$stderr.puts "Unknown subcommand: #{subcommand.inspect}" if subcommand
|
|
383
|
+
$stderr.puts USAGE
|
|
384
|
+
exit 1
|
|
385
|
+
end
|
|
386
|
+
|
|
387
|
+
output = (subcommand == "stat") ? nil : "sperf.data"
|
|
388
|
+
frequency = 1000
|
|
389
|
+
mode = (subcommand == "stat") ? "wall" : "cpu"
|
|
390
|
+
format = nil
|
|
391
|
+
verbose = false
|
|
392
|
+
|
|
393
|
+
parser = OptionParser.new do |opts|
|
|
394
|
+
opts.banner = USAGE
|
|
395
|
+
|
|
396
|
+
opts.on("-o", "--output PATH", "Output file#{subcommand == 'stat' ? ' (default: none)' : ' (default: sperf.data)'}") do |v|
|
|
397
|
+
output = v
|
|
398
|
+
end
|
|
399
|
+
|
|
400
|
+
opts.on("-f", "--frequency HZ", Integer, "Sampling frequency in Hz (default: 1000)") do |v|
|
|
401
|
+
frequency = v
|
|
402
|
+
end
|
|
403
|
+
|
|
404
|
+
if subcommand == "record"
|
|
405
|
+
opts.on("-m", "--mode MODE", %w[cpu wall], "Profiling mode: cpu or wall (default: cpu)") do |v|
|
|
406
|
+
mode = v
|
|
407
|
+
end
|
|
408
|
+
|
|
409
|
+
opts.on("--format FORMAT", %w[pprof collapsed text],
|
|
410
|
+
"Output format: pprof, collapsed, or text (default: auto from extension)") do |v|
|
|
411
|
+
format = v
|
|
412
|
+
end
|
|
413
|
+
end
|
|
414
|
+
|
|
415
|
+
opts.on("-v", "--verbose", "Print sampling statistics to stderr") do
|
|
416
|
+
verbose = true
|
|
417
|
+
end
|
|
418
|
+
|
|
419
|
+
opts.on("-h", "--help", "Show this help") do
|
|
420
|
+
puts opts
|
|
421
|
+
puts
|
|
422
|
+
puts "Run 'sperf help' for full documentation (modes, formats, diagnostics guide, etc.)"
|
|
423
|
+
exit
|
|
424
|
+
end
|
|
425
|
+
end
|
|
426
|
+
|
|
427
|
+
begin
|
|
428
|
+
parser.order!(ARGV)
|
|
429
|
+
rescue OptionParser::InvalidOption => e
|
|
430
|
+
$stderr.puts e.message
|
|
431
|
+
$stderr.puts parser
|
|
432
|
+
exit 1
|
|
433
|
+
end
|
|
434
|
+
|
|
435
|
+
if ARGV.empty?
|
|
436
|
+
$stderr.puts "No command specified."
|
|
437
|
+
$stderr.puts parser
|
|
438
|
+
exit 1
|
|
439
|
+
end
|
|
440
|
+
|
|
441
|
+
# Add lib dir to RUBYLIB so -rsperf can find the extension
|
|
442
|
+
lib_dir = File.expand_path("../lib", __dir__)
|
|
443
|
+
ENV["RUBYLIB"] = [lib_dir, ENV["RUBYLIB"]].compact.join(File::PATH_SEPARATOR)
|
|
444
|
+
ENV["RUBYOPT"] = "-rsperf #{ENV['RUBYOPT']}".strip
|
|
445
|
+
ENV["SPERF_ENABLED"] = "1"
|
|
446
|
+
ENV["SPERF_OUTPUT"] = output if output
|
|
447
|
+
ENV["SPERF_FREQUENCY"] = frequency.to_s
|
|
448
|
+
ENV["SPERF_MODE"] = mode
|
|
449
|
+
ENV["SPERF_FORMAT"] = format if format
|
|
450
|
+
ENV["SPERF_VERBOSE"] = "1" if verbose
|
|
451
|
+
|
|
452
|
+
if subcommand == "stat"
|
|
453
|
+
ENV["SPERF_STAT"] = "1"
|
|
454
|
+
ENV["SPERF_STAT_COMMAND"] = ARGV.join(" ")
|
|
455
|
+
end
|
|
456
|
+
|
|
457
|
+
exec(*ARGV)
|