stackprof 0.2.12 → 0.2.17
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.github/workflows/ci.yml +43 -0
- data/.gitignore +1 -1
- data/CHANGELOG.md +13 -2
- data/README.md +66 -51
- data/Rakefile +11 -25
- data/bin/stackprof +14 -4
- data/ext/stackprof/extconf.rb +9 -0
- data/ext/stackprof/stackprof.c +788 -0
- data/lib/stackprof.rb +4 -0
- data/lib/stackprof/middleware.rb +8 -2
- data/lib/stackprof/report.rb +270 -9
- data/stackprof.gemspec +11 -2
- data/test/test_middleware.rb +6 -0
- data/test/test_stackprof.rb +112 -11
- data/vendor/FlameGraph/flamegraph.pl +751 -85
- metadata +14 -12
- data/.travis.yml +0 -8
- data/Gemfile.lock +0 -27
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: b55691b8d1966ba4b2b2458a4908b2a2d5b65f2074dfe3b3b1b6350f752704ec
|
4
|
+
data.tar.gz: 79e2a0508a1c722f39cc61d39b0577cfb5520669a7a2db4cadac6c49dcb1267a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2fa22779f03c332a3680f526bf1df29553588773fabeb00da327af3525018e535e973bafd990254c6ad50516faf5e8b1d087bb7c208c99d0b512d99ccdef53bb
|
7
|
+
data.tar.gz: 73ba1328c793b0c0c4657e7826f4bf2cd52102c61a2ca2e3e0b1c5240ffe96ee0ec328ea831b1592c10b4e13c6aec2bb9d28fd05e93ddef999d5131e55124362
|
@@ -0,0 +1,43 @@
|
|
1
|
+
name: CI
|
2
|
+
|
3
|
+
on: [push, pull_request]
|
4
|
+
|
5
|
+
jobs:
|
6
|
+
rubies:
|
7
|
+
runs-on: ubuntu-latest
|
8
|
+
strategy:
|
9
|
+
fail-fast: false
|
10
|
+
matrix:
|
11
|
+
ruby: [ ruby-head, '3.0', '2.7', '2.6', '2.5', '2.4', '2.3', '2.2' ]
|
12
|
+
steps:
|
13
|
+
- name: Checkout
|
14
|
+
uses: actions/checkout@v2
|
15
|
+
- name: Set up Ruby
|
16
|
+
uses: ruby/setup-ruby@v1
|
17
|
+
with:
|
18
|
+
ruby-version: ${{ matrix.ruby }}
|
19
|
+
- name: Install dependencies
|
20
|
+
run: bundle install
|
21
|
+
- name: Run test
|
22
|
+
run: rake
|
23
|
+
- name: Install gem
|
24
|
+
run: rake install
|
25
|
+
platforms:
|
26
|
+
strategy:
|
27
|
+
matrix:
|
28
|
+
os: [macos]
|
29
|
+
ruby: ['3.0']
|
30
|
+
runs-on: ${{ matrix.os }}-latest
|
31
|
+
steps:
|
32
|
+
- name: Checkout
|
33
|
+
uses: actions/checkout@v2
|
34
|
+
- name: Set up Ruby
|
35
|
+
uses: ruby/setup-ruby@v1
|
36
|
+
with:
|
37
|
+
ruby-version: ${{ matrix.ruby }}
|
38
|
+
- name: Install dependencies
|
39
|
+
run: bundle install
|
40
|
+
- name: Run test
|
41
|
+
run: rake
|
42
|
+
- name: Install gem
|
43
|
+
run: rake install
|
data/.gitignore
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,14 @@
|
|
1
|
-
# 0.2.
|
1
|
+
# 0.2.16
|
2
2
|
|
3
|
-
*
|
3
|
+
* [flamegraph.pl] Update to latest version
|
4
|
+
* Add option to ignore GC frames
|
5
|
+
* Handle source code not being available
|
6
|
+
* Freeze strings in report.rb
|
7
|
+
* Use a cursor object instead of array slicing
|
8
|
+
* ArgumentError on interval <1 or >1m
|
9
|
+
* fix variable name.
|
10
|
+
* Fix default mode comment in readme
|
11
|
+
|
12
|
+
# 0.2.15
|
13
|
+
|
14
|
+
* Mark the metadata object before the GC is invoked to prevent it from being garbage collected.
|
data/README.md
CHANGED
@@ -6,7 +6,7 @@ Inspired heavily by [gperftools](https://code.google.com/p/gperftools/), and wri
|
|
6
6
|
|
7
7
|
## Requirements
|
8
8
|
|
9
|
-
* Ruby 2.
|
9
|
+
* Ruby 2.2+
|
10
10
|
* Linux-based OS
|
11
11
|
|
12
12
|
## Getting Started
|
@@ -81,34 +81,42 @@ $ stackprof tmp/stackprof-cpu-*.dump --method 'Object#present?'
|
|
81
81
|
|
82
82
|
For an experimental version of WebUI reporting of stackprof, see [stackprof-webnav](https://github.com/alisnic/stackprof-webnav)
|
83
83
|
|
84
|
-
|
84
|
+
To generate flamegraphs with Stackprof, additional data must be collected using the `raw: true` flag. Once you've collected results with this flag enabled, generate a flamegraph with:
|
85
85
|
|
86
86
|
```
|
87
87
|
$ stackprof --flamegraph tmp/stackprof-cpu-myapp.dump > tmp/flamegraph
|
88
88
|
```
|
89
89
|
|
90
|
-
|
90
|
+
After the flamegraph has been generated, you can generate a viewer command with:
|
91
91
|
|
92
92
|
```
|
93
93
|
$ stackprof --flamegraph-viewer=tmp/flamegraph
|
94
94
|
```
|
95
95
|
|
96
|
-
The `--flamegraph-viewer` command will output the exact shell command you need to run to open the `tmp/flamegraph` you generated with the built
|
96
|
+
The `--flamegraph-viewer` command will output the exact shell command you need to run in order to open the `tmp/flamegraph` you generated with the built-in stackprof flamegraph viewer:
|
97
97
|
|
98
|
-
![](http://i.imgur.com/EwndrgD.png)
|
98
|
+
![Flamegraph Viewer](http://i.imgur.com/EwndrgD.png)
|
99
|
+
|
100
|
+
Alternatively, you can generate a flamegraph that uses [d3-flame-graph](https://github.com/spiermar/d3-flame-graph):
|
101
|
+
|
102
|
+
```
|
103
|
+
$ stackprof --d3-flamegraph tmp/stackprof-cpu-myapp.dump > flamegraph.html
|
104
|
+
```
|
105
|
+
|
106
|
+
And just open the result by your browser.
|
99
107
|
|
100
108
|
## Sampling
|
101
109
|
|
102
|
-
|
110
|
+
Four sampling modes are supported:
|
103
111
|
|
104
|
-
-
|
105
|
-
-
|
106
|
-
-
|
107
|
-
-
|
112
|
+
- `:wall` (using `ITIMER_REAL` and `SIGALRM`) [default mode]
|
113
|
+
- `:cpu` (using `ITIMER_PROF` and `SIGPROF`)
|
114
|
+
- `:object` (using `RUBY_INTERNAL_EVENT_NEWOBJ`)
|
115
|
+
- `:custom` (user-defined via `StackProf.sample`)
|
108
116
|
|
109
|
-
|
117
|
+
Samplers have a tuneable interval which can be used to reduce overhead or increase granularity:
|
110
118
|
|
111
|
-
-
|
119
|
+
- Wall time: sample every _interval_ microseconds of wallclock time (default: 1000)
|
112
120
|
|
113
121
|
```ruby
|
114
122
|
StackProf.run(mode: :wall, out: 'tmp/stackprof.dump', interval: 1000) do
|
@@ -116,7 +124,7 @@ StackProf.run(mode: :wall, out: 'tmp/stackprof.dump', interval: 1000) do
|
|
116
124
|
end
|
117
125
|
```
|
118
126
|
|
119
|
-
-
|
127
|
+
- CPU time: sample every _interval_ microseconds of CPU activity (default: 1000 = 1 millisecond)
|
120
128
|
|
121
129
|
```ruby
|
122
130
|
StackProf.run(mode: :cpu, out: 'tmp/stackprof.dump', interval: 1000) do
|
@@ -124,7 +132,7 @@ StackProf.run(mode: :cpu, out: 'tmp/stackprof.dump', interval: 1000) do
|
|
124
132
|
end
|
125
133
|
```
|
126
134
|
|
127
|
-
-
|
135
|
+
- Object allocation: sample every _interval_ allocations (default: 1)
|
128
136
|
|
129
137
|
|
130
138
|
```ruby
|
@@ -133,30 +141,36 @@ StackProf.run(mode: :object, out: 'tmp/stackprof.dump', interval: 1) do
|
|
133
141
|
end
|
134
142
|
```
|
135
143
|
|
136
|
-
samples
|
144
|
+
By default, samples taken during garbage collection will show as garbage collection frames
|
145
|
+
including both mark and sweep phases. For longer traces, these can leave gaps in a flamegraph
|
146
|
+
that are hard to follow. They can be disabled by setting the `ignore_gc` option to true.
|
147
|
+
Garbage collection time will still be present in the profile but not explicitly marked with
|
148
|
+
its own frame.
|
149
|
+
|
150
|
+
Samples are taken using a combination of three new C-APIs in ruby 2.1:
|
137
151
|
|
138
|
-
-
|
152
|
+
- Signal handlers enqueue a sampling job using `rb_postponed_job_register_one`.
|
139
153
|
this ensures callstack samples can be taken safely, in case the VM is garbage collecting
|
140
154
|
or in some other inconsistent state during the interruption.
|
141
155
|
|
142
|
-
-
|
143
|
-
to the VM's call stack.
|
156
|
+
- Stack frames are collected via `rb_profile_frames`, which provides low-overhead C-API access
|
157
|
+
to the VM's call stack. No object allocations occur in this path, allowing stackprof to collect
|
144
158
|
callstacks in allocation mode.
|
145
159
|
|
146
|
-
-
|
160
|
+
- In allocation mode, samples are taken via `rb_tracepoint_new(RUBY_INTERNAL_EVENT_NEWOBJ)`,
|
147
161
|
which provides a notification every time the VM allocates a new object.
|
148
162
|
|
149
163
|
## Aggregation
|
150
164
|
|
151
|
-
|
152
|
-
|
165
|
+
Each sample consists of N stack frames, where a frame looks something like `MyClass#method` or `block in MySingleton.method`.
|
166
|
+
For each of these frames in the sample, the profiler collects a few pieces of metadata:
|
153
167
|
|
154
|
-
- samples
|
155
|
-
- total_samples
|
156
|
-
- lines
|
157
|
-
- edges
|
168
|
+
- `samples`: Number of samples where this was the topmost frame
|
169
|
+
- `total_samples`: Samples where this frame was in the stack
|
170
|
+
- `lines`: Samples per line number in this frame
|
171
|
+
- `edges`: Samples per callee frame (methods invoked by this frame)
|
158
172
|
|
159
|
-
|
173
|
+
The aggregation algorithm is roughly equivalent to the following pseudo code:
|
160
174
|
|
161
175
|
``` ruby
|
162
176
|
trap('PROF') do
|
@@ -175,16 +189,16 @@ trap('PROF') do
|
|
175
189
|
end
|
176
190
|
```
|
177
191
|
|
178
|
-
|
192
|
+
This technique builds up an incremental call graph from the samples. On any given frame,
|
179
193
|
the sum of the outbound edge weights is equal to total samples collected on that frame
|
180
194
|
(`frame.total_samples == frame.edges.values.sum`).
|
181
195
|
|
182
196
|
## Reporting
|
183
197
|
|
184
|
-
|
185
|
-
-
|
186
|
-
-
|
187
|
-
-
|
198
|
+
Multiple reporting modes are supported:
|
199
|
+
- Text
|
200
|
+
- Dotgraph
|
201
|
+
- Source annotation
|
188
202
|
|
189
203
|
### `StackProf::Report.new(data).print_text`
|
190
204
|
|
@@ -203,8 +217,6 @@ multiple reporting modes are supported:
|
|
203
217
|
|
204
218
|
### `StackProf::Report.new(data).print_graphviz`
|
205
219
|
|
206
|
-
![](http://cl.ly/image/2t3l2q0l0B0A/content)
|
207
|
-
|
208
220
|
```
|
209
221
|
digraph profile {
|
210
222
|
70346498324780 [size=23.5531914893617] [fontsize=23.5531914893617] [shape=box] [label="A#pow\n91 (48.4%)\r"];
|
@@ -251,8 +263,8 @@ block in A#math (/Users/tmm1/code/stackprof/sample.rb:21)
|
|
251
263
|
|
252
264
|
## Usage
|
253
265
|
|
254
|
-
|
255
|
-
|
266
|
+
The profiler is compiled as a C-extension and exposes a simple api: `StackProf.run(mode: [:cpu|:wall|:object])`.
|
267
|
+
The `run` method takes a block of code and returns a profile as a simple hash.
|
256
268
|
|
257
269
|
``` ruby
|
258
270
|
# sample after every 1ms of cpu activity
|
@@ -261,12 +273,12 @@ profile = StackProf.run(mode: :cpu, interval: 1000) do
|
|
261
273
|
end
|
262
274
|
```
|
263
275
|
|
264
|
-
|
265
|
-
(as json/marshal for example) for later processing.
|
276
|
+
This profile data structure is part of the public API, and is intended to be saved
|
277
|
+
(as json/marshal for example) for later processing. The reports above can be generated
|
266
278
|
by passing this structure into `StackProf::Report.new`.
|
267
279
|
|
268
|
-
|
269
|
-
identifying information such as its name, file and line.
|
280
|
+
The format itself is very simple. It contains a header and a list of frames. Each frame has a unique ID and
|
281
|
+
identifying information such as its name, file, and line. The frame also contains sampling data, including per-line
|
270
282
|
samples, and a list of relationships to other frames represented as weighted edges.
|
271
283
|
|
272
284
|
``` ruby
|
@@ -293,20 +305,21 @@ samples, and a list of relationships to other frames represented as weighted edg
|
|
293
305
|
:lines=>{8=>1}},
|
294
306
|
```
|
295
307
|
|
296
|
-
|
308
|
+
Above, `A#pow` was involved in 91 samples, and in all cases it was at the top of the stack on line 12.
|
297
309
|
|
298
|
-
`A#initialize` was in 185 samples, but it was at the top of the stack in only 1 sample.
|
299
|
-
divided up between its callee edges.
|
310
|
+
`A#initialize` was in 185 samples, but it was at the top of the stack in only 1 sample. The rest of the samples are
|
311
|
+
divided up between its callee edges. All 91 calls to `A#pow` came from `A#initialize`, as seen by the edge numbered
|
300
312
|
`70346498324780`.
|
301
313
|
|
302
314
|
## Advanced usage
|
303
315
|
|
304
|
-
|
305
|
-
multiple start
|
316
|
+
The profiler can be started and stopped manually. Results are accumulated until retrieval, across
|
317
|
+
multiple `start`/`stop` invocations.
|
306
318
|
|
307
319
|
``` ruby
|
308
|
-
StackProf.running?
|
320
|
+
StackProf.running? # => false
|
309
321
|
StackProf.start(mode: :cpu)
|
322
|
+
StackProf.running? # => true
|
310
323
|
StackProf.stop
|
311
324
|
StackProf.results('/tmp/some.file')
|
312
325
|
```
|
@@ -317,12 +330,14 @@ StackProf.results('/tmp/some.file')
|
|
317
330
|
|
318
331
|
Option | Meaning
|
319
332
|
------- | ---------
|
320
|
-
`mode` |
|
321
|
-
`out` |
|
322
|
-
`interval` |
|
323
|
-
`
|
324
|
-
`
|
325
|
-
`
|
333
|
+
`mode` | Mode of sampling: `:cpu`, `:wall`, `:object`, or `:custom` [c.f.](#sampling)
|
334
|
+
`out` | The target file, which will be overwritten
|
335
|
+
`interval` | Mode-relative sample rate [c.f.](#sampling)
|
336
|
+
`ignore_gc` | Ignore garbage collection frames
|
337
|
+
`aggregate` | Defaults: `true` - if `false` disables [aggregation](#aggregation)
|
338
|
+
`raw` | Defaults `false` - if `true` collects the extra data required by the `--flamegraph` and `--stackcollapse` report types
|
339
|
+
`metadata` | Defaults to `{}`. Must be a `Hash`. metadata associated with this profile
|
340
|
+
`save_every`| (Rack middleware only) write the target file after this many requests
|
326
341
|
|
327
342
|
## Todo
|
328
343
|
|
data/Rakefile
CHANGED
@@ -1,31 +1,17 @@
|
|
1
|
-
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
require "rake/testtask"
|
2
3
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
GEMSPEC = Gem::Specification::load('stackprof.gemspec')
|
8
|
-
|
9
|
-
require 'rubygems/package_task'
|
10
|
-
Gem::PackageTask.new(GEMSPEC) do |pkg|
|
4
|
+
Rake::TestTask.new(:test) do |t|
|
5
|
+
t.libs << "test"
|
6
|
+
t.libs << "lib"
|
7
|
+
t.test_files = FileList["test/**/test_*.rb"]
|
11
8
|
end
|
12
9
|
|
13
|
-
|
14
|
-
# Ruby Extension
|
15
|
-
# ==========================================================
|
10
|
+
require "rake/extensiontask"
|
16
11
|
|
17
|
-
|
18
|
-
|
19
|
-
ext.lib_dir =
|
12
|
+
Rake::ExtensionTask.new("stackprof") do |ext|
|
13
|
+
ext.ext_dir = "ext/stackprof"
|
14
|
+
ext.lib_dir = "lib/stackprof"
|
20
15
|
end
|
21
|
-
task :build => :compile
|
22
16
|
|
23
|
-
|
24
|
-
# Testing
|
25
|
-
# ==========================================================
|
26
|
-
|
27
|
-
require 'rake/testtask'
|
28
|
-
Rake::TestTask.new 'test' do |t|
|
29
|
-
t.test_files = FileList['test/test_*.rb']
|
30
|
-
end
|
31
|
-
task :test => :build
|
17
|
+
task default: %i(compile test)
|
data/bin/stackprof
CHANGED
@@ -8,6 +8,7 @@ parser = OptionParser.new(ARGV) do |o|
|
|
8
8
|
o.banner = "Usage: stackprof [file.dump]+ [--text|--method=NAME|--callgrind|--graphviz]"
|
9
9
|
|
10
10
|
o.on('--text', 'Text summary per method (default)'){ options[:format] = :text }
|
11
|
+
o.on('--json', 'JSON output (use with web viewers)'){ options[:format] = :json }
|
11
12
|
o.on('--files', 'List of files'){ |f| options[:format] = :files }
|
12
13
|
o.on('--limit [num]', Integer, 'Limit --text, --files, or --graphviz output to N entries'){ |n| options[:limit] = n }
|
13
14
|
o.on('--sort-total', "Sort --text or --files output on total samples\n\n"){ options[:sort] = true }
|
@@ -18,11 +19,14 @@ parser = OptionParser.new(ARGV) do |o|
|
|
18
19
|
o.on('--graphviz', "Graphviz output (use with dot)"){ options[:format] = :graphviz }
|
19
20
|
o.on('--node-fraction [frac]', OptionParser::DecimalNumeric, 'Drop nodes representing less than [frac] fraction of samples'){ |n| options[:node_fraction] = n }
|
20
21
|
o.on('--stackcollapse', 'stackcollapse.pl compatible output (use with stackprof-flamegraph.pl)'){ options[:format] = :stackcollapse }
|
21
|
-
o.on('--flamegraph', "timeline-flamegraph output (js)"){ options[:format] = :
|
22
|
-
o.on('--flamegraph
|
22
|
+
o.on('--timeline-flamegraph', "timeline-flamegraph output (js)"){ options[:format] = :timeline_flamegraph }
|
23
|
+
o.on('--alphabetical-flamegraph', "alphabetical-flamegraph output (js)"){ options[:format] = :alphabetical_flamegraph }
|
24
|
+
o.on('--flamegraph', "alias to --timeline-flamegraph"){ options[:format] = :timeline_flamegraph }
|
25
|
+
o.on('--flamegraph-viewer [f.js]', String, "open html viewer for flamegraph output"){ |file|
|
23
26
|
puts("open file://#{File.expand_path('../../lib/stackprof/flamegraph/viewer.html', __FILE__)}?data=#{File.expand_path(file)}")
|
24
27
|
exit
|
25
28
|
}
|
29
|
+
o.on('--d3-flamegraph', "flamegraph output (html using d3-flame-graph)\n\n"){ options[:format] = :d3_flamegraph }
|
26
30
|
o.on('--select-files []', String, 'Show results of matching files'){ |path| (options[:select_files] ||= []) << File.expand_path(path) }
|
27
31
|
o.on('--reject-files []', String, 'Exclude results of matching files'){ |path| (options[:reject_files] ||= []) << File.expand_path(path) }
|
28
32
|
o.on('--select-names []', Regexp, 'Show results of matching method names'){ |regexp| (options[:select_names] ||= []) << regexp }
|
@@ -62,6 +66,8 @@ options.delete(:limit) if options[:limit] == 0
|
|
62
66
|
case options[:format]
|
63
67
|
when :text
|
64
68
|
report.print_text(options[:sort], options[:limit], options[:select_files], options[:reject_files], options[:select_names], options[:reject_names])
|
69
|
+
when :json
|
70
|
+
report.print_json
|
65
71
|
when :debug
|
66
72
|
report.print_debug
|
67
73
|
when :dump
|
@@ -72,8 +78,12 @@ when :graphviz
|
|
72
78
|
report.print_graphviz(options)
|
73
79
|
when :stackcollapse
|
74
80
|
report.print_stackcollapse
|
75
|
-
when :
|
76
|
-
report.
|
81
|
+
when :timeline_flamegraph
|
82
|
+
report.print_timeline_flamegraph
|
83
|
+
when :alphabetical_flamegraph
|
84
|
+
report.print_alphabetical_flamegraph
|
85
|
+
when :d3_flamegraph
|
86
|
+
report.print_d3_flamegraph
|
77
87
|
when :method
|
78
88
|
options[:walk] ? report.walk_method(options[:filter]) : report.print_method(options[:filter])
|
79
89
|
when :file
|
@@ -0,0 +1,9 @@
|
|
1
|
+
require 'mkmf'
|
2
|
+
if have_func('rb_postponed_job_register_one') &&
|
3
|
+
have_func('rb_profile_frames') &&
|
4
|
+
have_func('rb_tracepoint_new') &&
|
5
|
+
have_const('RUBY_INTERNAL_EVENT_NEWOBJ')
|
6
|
+
create_makefile('stackprof/stackprof')
|
7
|
+
else
|
8
|
+
fail 'missing API: are you using ruby 2.1+?'
|
9
|
+
end
|
@@ -0,0 +1,788 @@
|
|
1
|
+
/**********************************************************************
|
2
|
+
|
3
|
+
stackprof.c - Sampling call-stack frame profiler for MRI.
|
4
|
+
|
5
|
+
vim: noexpandtab shiftwidth=4 tabstop=8 softtabstop=4
|
6
|
+
|
7
|
+
**********************************************************************/
|
8
|
+
|
9
|
+
#include <ruby/ruby.h>
|
10
|
+
#include <ruby/debug.h>
|
11
|
+
#include <ruby/st.h>
|
12
|
+
#include <ruby/io.h>
|
13
|
+
#include <ruby/intern.h>
|
14
|
+
#include <signal.h>
|
15
|
+
#include <sys/time.h>
|
16
|
+
#include <pthread.h>
|
17
|
+
|
18
|
+
#define BUF_SIZE 2048
|
19
|
+
#define MICROSECONDS_IN_SECOND 1000000
|
20
|
+
|
21
|
+
#define FAKE_FRAME_GC INT2FIX(0)
|
22
|
+
#define FAKE_FRAME_MARK INT2FIX(1)
|
23
|
+
#define FAKE_FRAME_SWEEP INT2FIX(2)
|
24
|
+
|
25
|
+
/*
|
26
|
+
* As of Ruby 3.0, it should be safe to read stack frames at any time
|
27
|
+
* See https://github.com/ruby/ruby/commit/0e276dc458f94d9d79a0f7c7669bde84abe80f21
|
28
|
+
*/
|
29
|
+
#if RUBY_API_VERSION_MAJOR < 3
|
30
|
+
#define USE_POSTPONED_JOB
|
31
|
+
#endif
|
32
|
+
|
33
|
+
static const char *fake_frame_cstrs[] = {
|
34
|
+
"(garbage collection)",
|
35
|
+
"(marking)",
|
36
|
+
"(sweeping)",
|
37
|
+
};
|
38
|
+
|
39
|
+
#define TOTAL_FAKE_FRAMES (sizeof(fake_frame_cstrs) / sizeof(char *))
|
40
|
+
|
41
|
+
typedef struct {
|
42
|
+
size_t total_samples;
|
43
|
+
size_t caller_samples;
|
44
|
+
size_t seen_at_sample_number;
|
45
|
+
st_table *edges;
|
46
|
+
st_table *lines;
|
47
|
+
} frame_data_t;
|
48
|
+
|
49
|
+
static struct {
|
50
|
+
int running;
|
51
|
+
int raw;
|
52
|
+
int aggregate;
|
53
|
+
|
54
|
+
VALUE mode;
|
55
|
+
VALUE interval;
|
56
|
+
VALUE out;
|
57
|
+
VALUE metadata;
|
58
|
+
int ignore_gc;
|
59
|
+
|
60
|
+
VALUE *raw_samples;
|
61
|
+
size_t raw_samples_len;
|
62
|
+
size_t raw_samples_capa;
|
63
|
+
size_t raw_sample_index;
|
64
|
+
|
65
|
+
struct timeval last_sample_at;
|
66
|
+
int *raw_timestamp_deltas;
|
67
|
+
size_t raw_timestamp_deltas_len;
|
68
|
+
size_t raw_timestamp_deltas_capa;
|
69
|
+
|
70
|
+
size_t overall_signals;
|
71
|
+
size_t overall_samples;
|
72
|
+
size_t during_gc;
|
73
|
+
size_t unrecorded_gc_samples;
|
74
|
+
size_t unrecorded_gc_marking_samples;
|
75
|
+
size_t unrecorded_gc_sweeping_samples;
|
76
|
+
st_table *frames;
|
77
|
+
|
78
|
+
VALUE fake_frame_names[TOTAL_FAKE_FRAMES];
|
79
|
+
VALUE empty_string;
|
80
|
+
VALUE frames_buffer[BUF_SIZE];
|
81
|
+
int lines_buffer[BUF_SIZE];
|
82
|
+
} _stackprof;
|
83
|
+
|
84
|
+
static VALUE sym_object, sym_wall, sym_cpu, sym_custom, sym_name, sym_file, sym_line;
|
85
|
+
static VALUE sym_samples, sym_total_samples, sym_missed_samples, sym_edges, sym_lines;
|
86
|
+
static VALUE sym_version, sym_mode, sym_interval, sym_raw, sym_metadata, sym_frames, sym_ignore_gc, sym_out;
|
87
|
+
static VALUE sym_aggregate, sym_raw_timestamp_deltas, sym_state, sym_marking, sym_sweeping;
|
88
|
+
static VALUE sym_gc_samples, objtracer;
|
89
|
+
static VALUE gc_hook;
|
90
|
+
static VALUE rb_mStackProf;
|
91
|
+
|
92
|
+
static void stackprof_newobj_handler(VALUE, void*);
|
93
|
+
static void stackprof_signal_handler(int sig, siginfo_t* sinfo, void* ucontext);
|
94
|
+
|
95
|
+
static VALUE
|
96
|
+
stackprof_start(int argc, VALUE *argv, VALUE self)
|
97
|
+
{
|
98
|
+
struct sigaction sa;
|
99
|
+
struct itimerval timer;
|
100
|
+
VALUE opts = Qnil, mode = Qnil, interval = Qnil, metadata = rb_hash_new(), out = Qfalse;
|
101
|
+
int ignore_gc = 0;
|
102
|
+
int raw = 0, aggregate = 1;
|
103
|
+
|
104
|
+
if (_stackprof.running)
|
105
|
+
return Qfalse;
|
106
|
+
|
107
|
+
rb_scan_args(argc, argv, "0:", &opts);
|
108
|
+
|
109
|
+
if (RTEST(opts)) {
|
110
|
+
mode = rb_hash_aref(opts, sym_mode);
|
111
|
+
interval = rb_hash_aref(opts, sym_interval);
|
112
|
+
out = rb_hash_aref(opts, sym_out);
|
113
|
+
if (RTEST(rb_hash_aref(opts, sym_ignore_gc))) {
|
114
|
+
ignore_gc = 1;
|
115
|
+
}
|
116
|
+
|
117
|
+
VALUE metadata_val = rb_hash_aref(opts, sym_metadata);
|
118
|
+
if (RTEST(metadata_val)) {
|
119
|
+
if (!RB_TYPE_P(metadata_val, T_HASH))
|
120
|
+
rb_raise(rb_eArgError, "metadata should be a hash");
|
121
|
+
|
122
|
+
metadata = metadata_val;
|
123
|
+
}
|
124
|
+
|
125
|
+
if (RTEST(rb_hash_aref(opts, sym_raw)))
|
126
|
+
raw = 1;
|
127
|
+
if (rb_hash_lookup2(opts, sym_aggregate, Qundef) == Qfalse)
|
128
|
+
aggregate = 0;
|
129
|
+
}
|
130
|
+
if (!RTEST(mode)) mode = sym_wall;
|
131
|
+
|
132
|
+
if (!NIL_P(interval) && (NUM2INT(interval) < 1 || NUM2INT(interval) >= MICROSECONDS_IN_SECOND)) {
|
133
|
+
rb_raise(rb_eArgError, "interval is a number of microseconds between 1 and 1 million");
|
134
|
+
}
|
135
|
+
|
136
|
+
if (!_stackprof.frames) {
|
137
|
+
_stackprof.frames = st_init_numtable();
|
138
|
+
_stackprof.overall_signals = 0;
|
139
|
+
_stackprof.overall_samples = 0;
|
140
|
+
_stackprof.during_gc = 0;
|
141
|
+
}
|
142
|
+
|
143
|
+
if (mode == sym_object) {
|
144
|
+
if (!RTEST(interval)) interval = INT2FIX(1);
|
145
|
+
|
146
|
+
objtracer = rb_tracepoint_new(Qnil, RUBY_INTERNAL_EVENT_NEWOBJ, stackprof_newobj_handler, 0);
|
147
|
+
rb_tracepoint_enable(objtracer);
|
148
|
+
} else if (mode == sym_wall || mode == sym_cpu) {
|
149
|
+
if (!RTEST(interval)) interval = INT2FIX(1000);
|
150
|
+
|
151
|
+
sa.sa_sigaction = stackprof_signal_handler;
|
152
|
+
sa.sa_flags = SA_RESTART | SA_SIGINFO;
|
153
|
+
sigemptyset(&sa.sa_mask);
|
154
|
+
sigaction(mode == sym_wall ? SIGALRM : SIGPROF, &sa, NULL);
|
155
|
+
|
156
|
+
timer.it_interval.tv_sec = 0;
|
157
|
+
timer.it_interval.tv_usec = NUM2LONG(interval);
|
158
|
+
timer.it_value = timer.it_interval;
|
159
|
+
setitimer(mode == sym_wall ? ITIMER_REAL : ITIMER_PROF, &timer, 0);
|
160
|
+
} else if (mode == sym_custom) {
|
161
|
+
/* sampled manually */
|
162
|
+
interval = Qnil;
|
163
|
+
} else {
|
164
|
+
rb_raise(rb_eArgError, "unknown profiler mode");
|
165
|
+
}
|
166
|
+
|
167
|
+
_stackprof.running = 1;
|
168
|
+
_stackprof.raw = raw;
|
169
|
+
_stackprof.aggregate = aggregate;
|
170
|
+
_stackprof.mode = mode;
|
171
|
+
_stackprof.interval = interval;
|
172
|
+
_stackprof.ignore_gc = ignore_gc;
|
173
|
+
_stackprof.metadata = metadata;
|
174
|
+
_stackprof.out = out;
|
175
|
+
|
176
|
+
if (raw) {
|
177
|
+
gettimeofday(&_stackprof.last_sample_at, NULL);
|
178
|
+
}
|
179
|
+
|
180
|
+
return Qtrue;
|
181
|
+
}
|
182
|
+
|
183
|
+
static VALUE
|
184
|
+
stackprof_stop(VALUE self)
|
185
|
+
{
|
186
|
+
struct sigaction sa;
|
187
|
+
struct itimerval timer;
|
188
|
+
|
189
|
+
if (!_stackprof.running)
|
190
|
+
return Qfalse;
|
191
|
+
_stackprof.running = 0;
|
192
|
+
|
193
|
+
if (_stackprof.mode == sym_object) {
|
194
|
+
rb_tracepoint_disable(objtracer);
|
195
|
+
} else if (_stackprof.mode == sym_wall || _stackprof.mode == sym_cpu) {
|
196
|
+
memset(&timer, 0, sizeof(timer));
|
197
|
+
setitimer(_stackprof.mode == sym_wall ? ITIMER_REAL : ITIMER_PROF, &timer, 0);
|
198
|
+
|
199
|
+
sa.sa_handler = SIG_IGN;
|
200
|
+
sa.sa_flags = SA_RESTART;
|
201
|
+
sigemptyset(&sa.sa_mask);
|
202
|
+
sigaction(_stackprof.mode == sym_wall ? SIGALRM : SIGPROF, &sa, NULL);
|
203
|
+
} else if (_stackprof.mode == sym_custom) {
|
204
|
+
/* sampled manually */
|
205
|
+
} else {
|
206
|
+
rb_raise(rb_eArgError, "unknown profiler mode");
|
207
|
+
}
|
208
|
+
|
209
|
+
return Qtrue;
|
210
|
+
}
|
211
|
+
|
212
|
+
static int
|
213
|
+
frame_edges_i(st_data_t key, st_data_t val, st_data_t arg)
|
214
|
+
{
|
215
|
+
VALUE edges = (VALUE)arg;
|
216
|
+
|
217
|
+
intptr_t weight = (intptr_t)val;
|
218
|
+
rb_hash_aset(edges, rb_obj_id((VALUE)key), INT2FIX(weight));
|
219
|
+
return ST_CONTINUE;
|
220
|
+
}
|
221
|
+
|
222
|
+
static int
|
223
|
+
frame_lines_i(st_data_t key, st_data_t val, st_data_t arg)
|
224
|
+
{
|
225
|
+
VALUE lines = (VALUE)arg;
|
226
|
+
|
227
|
+
size_t weight = (size_t)val;
|
228
|
+
size_t total = weight & (~(size_t)0 << (8*SIZEOF_SIZE_T/2));
|
229
|
+
weight -= total;
|
230
|
+
total = total >> (8*SIZEOF_SIZE_T/2);
|
231
|
+
rb_hash_aset(lines, INT2FIX(key), rb_ary_new3(2, ULONG2NUM(total), ULONG2NUM(weight)));
|
232
|
+
return ST_CONTINUE;
|
233
|
+
}
|
234
|
+
|
235
|
+
static int
|
236
|
+
frame_i(st_data_t key, st_data_t val, st_data_t arg)
|
237
|
+
{
|
238
|
+
VALUE frame = (VALUE)key;
|
239
|
+
frame_data_t *frame_data = (frame_data_t *)val;
|
240
|
+
VALUE results = (VALUE)arg;
|
241
|
+
VALUE details = rb_hash_new();
|
242
|
+
VALUE name, file, edges, lines;
|
243
|
+
VALUE line;
|
244
|
+
|
245
|
+
rb_hash_aset(results, rb_obj_id(frame), details);
|
246
|
+
|
247
|
+
if (FIXNUM_P(frame)) {
|
248
|
+
name = _stackprof.fake_frame_names[FIX2INT(frame)];
|
249
|
+
file = _stackprof.empty_string;
|
250
|
+
line = INT2FIX(0);
|
251
|
+
} else {
|
252
|
+
name = rb_profile_frame_full_label(frame);
|
253
|
+
|
254
|
+
file = rb_profile_frame_absolute_path(frame);
|
255
|
+
if (NIL_P(file))
|
256
|
+
file = rb_profile_frame_path(frame);
|
257
|
+
line = rb_profile_frame_first_lineno(frame);
|
258
|
+
}
|
259
|
+
|
260
|
+
rb_hash_aset(details, sym_name, name);
|
261
|
+
rb_hash_aset(details, sym_file, file);
|
262
|
+
if (line != INT2FIX(0)) {
|
263
|
+
rb_hash_aset(details, sym_line, line);
|
264
|
+
}
|
265
|
+
|
266
|
+
rb_hash_aset(details, sym_total_samples, SIZET2NUM(frame_data->total_samples));
|
267
|
+
rb_hash_aset(details, sym_samples, SIZET2NUM(frame_data->caller_samples));
|
268
|
+
|
269
|
+
if (frame_data->edges) {
|
270
|
+
edges = rb_hash_new();
|
271
|
+
rb_hash_aset(details, sym_edges, edges);
|
272
|
+
st_foreach(frame_data->edges, frame_edges_i, (st_data_t)edges);
|
273
|
+
st_free_table(frame_data->edges);
|
274
|
+
frame_data->edges = NULL;
|
275
|
+
}
|
276
|
+
|
277
|
+
if (frame_data->lines) {
|
278
|
+
lines = rb_hash_new();
|
279
|
+
rb_hash_aset(details, sym_lines, lines);
|
280
|
+
st_foreach(frame_data->lines, frame_lines_i, (st_data_t)lines);
|
281
|
+
st_free_table(frame_data->lines);
|
282
|
+
frame_data->lines = NULL;
|
283
|
+
}
|
284
|
+
|
285
|
+
xfree(frame_data);
|
286
|
+
return ST_DELETE;
|
287
|
+
}
|
288
|
+
|
289
|
+
static VALUE
|
290
|
+
stackprof_results(int argc, VALUE *argv, VALUE self)
|
291
|
+
{
|
292
|
+
VALUE results, frames;
|
293
|
+
|
294
|
+
if (!_stackprof.frames || _stackprof.running)
|
295
|
+
return Qnil;
|
296
|
+
|
297
|
+
results = rb_hash_new();
|
298
|
+
rb_hash_aset(results, sym_version, DBL2NUM(1.2));
|
299
|
+
rb_hash_aset(results, sym_mode, _stackprof.mode);
|
300
|
+
rb_hash_aset(results, sym_interval, _stackprof.interval);
|
301
|
+
rb_hash_aset(results, sym_samples, SIZET2NUM(_stackprof.overall_samples));
|
302
|
+
rb_hash_aset(results, sym_gc_samples, SIZET2NUM(_stackprof.during_gc));
|
303
|
+
rb_hash_aset(results, sym_missed_samples, SIZET2NUM(_stackprof.overall_signals - _stackprof.overall_samples));
|
304
|
+
rb_hash_aset(results, sym_metadata, _stackprof.metadata);
|
305
|
+
|
306
|
+
_stackprof.metadata = Qnil;
|
307
|
+
|
308
|
+
frames = rb_hash_new();
|
309
|
+
rb_hash_aset(results, sym_frames, frames);
|
310
|
+
st_foreach(_stackprof.frames, frame_i, (st_data_t)frames);
|
311
|
+
|
312
|
+
st_free_table(_stackprof.frames);
|
313
|
+
_stackprof.frames = NULL;
|
314
|
+
|
315
|
+
if (_stackprof.raw && _stackprof.raw_samples_len) {
|
316
|
+
size_t len, n, o;
|
317
|
+
VALUE raw_timestamp_deltas;
|
318
|
+
VALUE raw_samples = rb_ary_new_capa(_stackprof.raw_samples_len);
|
319
|
+
|
320
|
+
for (n = 0; n < _stackprof.raw_samples_len; n++) {
|
321
|
+
len = (size_t)_stackprof.raw_samples[n];
|
322
|
+
rb_ary_push(raw_samples, SIZET2NUM(len));
|
323
|
+
|
324
|
+
for (o = 0, n++; o < len; n++, o++)
|
325
|
+
rb_ary_push(raw_samples, rb_obj_id(_stackprof.raw_samples[n]));
|
326
|
+
rb_ary_push(raw_samples, SIZET2NUM((size_t)_stackprof.raw_samples[n]));
|
327
|
+
}
|
328
|
+
|
329
|
+
free(_stackprof.raw_samples);
|
330
|
+
_stackprof.raw_samples = NULL;
|
331
|
+
_stackprof.raw_samples_len = 0;
|
332
|
+
_stackprof.raw_samples_capa = 0;
|
333
|
+
_stackprof.raw_sample_index = 0;
|
334
|
+
|
335
|
+
rb_hash_aset(results, sym_raw, raw_samples);
|
336
|
+
|
337
|
+
raw_timestamp_deltas = rb_ary_new_capa(_stackprof.raw_timestamp_deltas_len);
|
338
|
+
|
339
|
+
for (n = 0; n < _stackprof.raw_timestamp_deltas_len; n++) {
|
340
|
+
rb_ary_push(raw_timestamp_deltas, INT2FIX(_stackprof.raw_timestamp_deltas[n]));
|
341
|
+
}
|
342
|
+
|
343
|
+
free(_stackprof.raw_timestamp_deltas);
|
344
|
+
_stackprof.raw_timestamp_deltas = NULL;
|
345
|
+
_stackprof.raw_timestamp_deltas_len = 0;
|
346
|
+
_stackprof.raw_timestamp_deltas_capa = 0;
|
347
|
+
|
348
|
+
rb_hash_aset(results, sym_raw_timestamp_deltas, raw_timestamp_deltas);
|
349
|
+
|
350
|
+
_stackprof.raw = 0;
|
351
|
+
}
|
352
|
+
|
353
|
+
if (argc == 1)
|
354
|
+
_stackprof.out = argv[0];
|
355
|
+
|
356
|
+
if (RTEST(_stackprof.out)) {
|
357
|
+
VALUE file;
|
358
|
+
if (rb_respond_to(_stackprof.out, rb_intern("to_io"))) {
|
359
|
+
file = rb_io_check_io(_stackprof.out);
|
360
|
+
} else {
|
361
|
+
file = rb_file_open_str(_stackprof.out, "w");
|
362
|
+
}
|
363
|
+
|
364
|
+
rb_marshal_dump(results, file);
|
365
|
+
rb_io_flush(file);
|
366
|
+
_stackprof.out = Qnil;
|
367
|
+
return file;
|
368
|
+
} else {
|
369
|
+
return results;
|
370
|
+
}
|
371
|
+
}
|
372
|
+
|
373
|
+
static VALUE
|
374
|
+
stackprof_run(int argc, VALUE *argv, VALUE self)
|
375
|
+
{
|
376
|
+
rb_need_block();
|
377
|
+
stackprof_start(argc, argv, self);
|
378
|
+
rb_ensure(rb_yield, Qundef, stackprof_stop, self);
|
379
|
+
return stackprof_results(0, 0, self);
|
380
|
+
}
|
381
|
+
|
382
|
+
static VALUE
|
383
|
+
stackprof_running_p(VALUE self)
|
384
|
+
{
|
385
|
+
return _stackprof.running ? Qtrue : Qfalse;
|
386
|
+
}
|
387
|
+
|
388
|
+
static inline frame_data_t *
|
389
|
+
sample_for(VALUE frame)
|
390
|
+
{
|
391
|
+
st_data_t key = (st_data_t)frame, val = 0;
|
392
|
+
frame_data_t *frame_data;
|
393
|
+
|
394
|
+
if (st_lookup(_stackprof.frames, key, &val)) {
|
395
|
+
frame_data = (frame_data_t *)val;
|
396
|
+
} else {
|
397
|
+
frame_data = ALLOC_N(frame_data_t, 1);
|
398
|
+
MEMZERO(frame_data, frame_data_t, 1);
|
399
|
+
val = (st_data_t)frame_data;
|
400
|
+
st_insert(_stackprof.frames, key, val);
|
401
|
+
}
|
402
|
+
|
403
|
+
return frame_data;
|
404
|
+
}
|
405
|
+
|
406
|
+
static int
|
407
|
+
numtable_increment_callback(st_data_t *key, st_data_t *value, st_data_t arg, int existing)
|
408
|
+
{
|
409
|
+
size_t *weight = (size_t *)value;
|
410
|
+
size_t increment = (size_t)arg;
|
411
|
+
|
412
|
+
if (existing)
|
413
|
+
(*weight) += increment;
|
414
|
+
else
|
415
|
+
*weight = increment;
|
416
|
+
|
417
|
+
return ST_CONTINUE;
|
418
|
+
}
|
419
|
+
|
420
|
+
void
|
421
|
+
st_numtable_increment(st_table *table, st_data_t key, size_t increment)
|
422
|
+
{
|
423
|
+
st_update(table, key, numtable_increment_callback, (st_data_t)increment);
|
424
|
+
}
|
425
|
+
|
426
|
+
void
|
427
|
+
stackprof_record_sample_for_stack(int num, int timestamp_delta)
|
428
|
+
{
|
429
|
+
int i, n;
|
430
|
+
VALUE prev_frame = Qnil;
|
431
|
+
|
432
|
+
_stackprof.overall_samples++;
|
433
|
+
|
434
|
+
if (_stackprof.raw) {
|
435
|
+
int found = 0;
|
436
|
+
|
437
|
+
/* If there's no sample buffer allocated, then allocate one. The buffer
|
438
|
+
* format is the number of frames (num), then the list of frames (from
|
439
|
+
* `_stackprof.raw_samples`), followed by the number of times this
|
440
|
+
* particular stack has been seen in a row. Each "new" stack is added
|
441
|
+
* to the end of the buffer, but if the previous stack is the same as
|
442
|
+
* the current stack, the counter will be incremented. */
|
443
|
+
if (!_stackprof.raw_samples) {
|
444
|
+
_stackprof.raw_samples_capa = num * 100;
|
445
|
+
_stackprof.raw_samples = malloc(sizeof(VALUE) * _stackprof.raw_samples_capa);
|
446
|
+
}
|
447
|
+
|
448
|
+
/* If we can't fit all the samples in the buffer, double the buffer size. */
|
449
|
+
while (_stackprof.raw_samples_capa <= _stackprof.raw_samples_len + (num + 2)) {
|
450
|
+
_stackprof.raw_samples_capa *= 2;
|
451
|
+
_stackprof.raw_samples = realloc(_stackprof.raw_samples, sizeof(VALUE) * _stackprof.raw_samples_capa);
|
452
|
+
}
|
453
|
+
|
454
|
+
/* If we've seen this stack before in the last sample, then increment the "seen" count. */
|
455
|
+
if (_stackprof.raw_samples_len > 0 && _stackprof.raw_samples[_stackprof.raw_sample_index] == (VALUE)num) {
|
456
|
+
/* The number of samples could have been the same, but the stack
|
457
|
+
* might be different, so we need to check the stack here. Stacks
|
458
|
+
* in the raw buffer are stored in the opposite direction of stacks
|
459
|
+
* in the frames buffer that came from Ruby. */
|
460
|
+
for (i = num-1, n = 0; i >= 0; i--, n++) {
|
461
|
+
VALUE frame = _stackprof.frames_buffer[i];
|
462
|
+
if (_stackprof.raw_samples[_stackprof.raw_sample_index + 1 + n] != frame)
|
463
|
+
break;
|
464
|
+
}
|
465
|
+
if (i == -1) {
|
466
|
+
_stackprof.raw_samples[_stackprof.raw_samples_len-1] += 1;
|
467
|
+
found = 1;
|
468
|
+
}
|
469
|
+
}
|
470
|
+
|
471
|
+
/* If we haven't seen the stack, then add it to the buffer along with
|
472
|
+
* the length of the stack and a 1 for the "seen" count */
|
473
|
+
if (!found) {
|
474
|
+
/* Bump the `raw_sample_index` up so that the next iteration can
|
475
|
+
* find the previously recorded stack size. */
|
476
|
+
_stackprof.raw_sample_index = _stackprof.raw_samples_len;
|
477
|
+
_stackprof.raw_samples[_stackprof.raw_samples_len++] = (VALUE)num;
|
478
|
+
for (i = num-1; i >= 0; i--) {
|
479
|
+
VALUE frame = _stackprof.frames_buffer[i];
|
480
|
+
_stackprof.raw_samples[_stackprof.raw_samples_len++] = frame;
|
481
|
+
}
|
482
|
+
_stackprof.raw_samples[_stackprof.raw_samples_len++] = (VALUE)1;
|
483
|
+
}
|
484
|
+
|
485
|
+
/* If there's no timestamp delta buffer, allocate one */
|
486
|
+
if (!_stackprof.raw_timestamp_deltas) {
|
487
|
+
_stackprof.raw_timestamp_deltas_capa = 100;
|
488
|
+
_stackprof.raw_timestamp_deltas = malloc(sizeof(int) * _stackprof.raw_timestamp_deltas_capa);
|
489
|
+
_stackprof.raw_timestamp_deltas_len = 0;
|
490
|
+
}
|
491
|
+
|
492
|
+
/* Double the buffer size if it's too small */
|
493
|
+
while (_stackprof.raw_timestamp_deltas_capa <= _stackprof.raw_timestamp_deltas_len + 1) {
|
494
|
+
_stackprof.raw_timestamp_deltas_capa *= 2;
|
495
|
+
_stackprof.raw_timestamp_deltas = realloc(_stackprof.raw_timestamp_deltas, sizeof(int) * _stackprof.raw_timestamp_deltas_capa);
|
496
|
+
}
|
497
|
+
|
498
|
+
/* Store the time delta (which is the amount of time between samples) */
|
499
|
+
_stackprof.raw_timestamp_deltas[_stackprof.raw_timestamp_deltas_len++] = timestamp_delta;
|
500
|
+
}
|
501
|
+
|
502
|
+
for (i = 0; i < num; i++) {
|
503
|
+
int line = _stackprof.lines_buffer[i];
|
504
|
+
VALUE frame = _stackprof.frames_buffer[i];
|
505
|
+
frame_data_t *frame_data = sample_for(frame);
|
506
|
+
|
507
|
+
if (frame_data->seen_at_sample_number != _stackprof.overall_samples) {
|
508
|
+
frame_data->total_samples++;
|
509
|
+
}
|
510
|
+
frame_data->seen_at_sample_number = _stackprof.overall_samples;
|
511
|
+
|
512
|
+
if (i == 0) {
|
513
|
+
frame_data->caller_samples++;
|
514
|
+
} else if (_stackprof.aggregate) {
|
515
|
+
if (!frame_data->edges)
|
516
|
+
frame_data->edges = st_init_numtable();
|
517
|
+
st_numtable_increment(frame_data->edges, (st_data_t)prev_frame, 1);
|
518
|
+
}
|
519
|
+
|
520
|
+
if (_stackprof.aggregate && line > 0) {
|
521
|
+
size_t half = (size_t)1<<(8*SIZEOF_SIZE_T/2);
|
522
|
+
size_t increment = i == 0 ? half + 1 : half;
|
523
|
+
if (!frame_data->lines)
|
524
|
+
frame_data->lines = st_init_numtable();
|
525
|
+
st_numtable_increment(frame_data->lines, (st_data_t)line, increment);
|
526
|
+
}
|
527
|
+
|
528
|
+
prev_frame = frame;
|
529
|
+
}
|
530
|
+
|
531
|
+
if (_stackprof.raw) {
|
532
|
+
gettimeofday(&_stackprof.last_sample_at, NULL);
|
533
|
+
}
|
534
|
+
}
|
535
|
+
|
536
|
+
void
|
537
|
+
stackprof_record_sample()
|
538
|
+
{
|
539
|
+
int timestamp_delta = 0;
|
540
|
+
int num;
|
541
|
+
if (_stackprof.raw) {
|
542
|
+
struct timeval t;
|
543
|
+
struct timeval diff;
|
544
|
+
gettimeofday(&t, NULL);
|
545
|
+
timersub(&t, &_stackprof.last_sample_at, &diff);
|
546
|
+
timestamp_delta = (1000 * diff.tv_sec) + diff.tv_usec;
|
547
|
+
}
|
548
|
+
num = rb_profile_frames(0, sizeof(_stackprof.frames_buffer) / sizeof(VALUE), _stackprof.frames_buffer, _stackprof.lines_buffer);
|
549
|
+
stackprof_record_sample_for_stack(num, timestamp_delta);
|
550
|
+
}
|
551
|
+
|
552
|
+
void
|
553
|
+
stackprof_record_gc_samples()
|
554
|
+
{
|
555
|
+
int delta_to_first_unrecorded_gc_sample = 0;
|
556
|
+
int i;
|
557
|
+
if (_stackprof.raw) {
|
558
|
+
struct timeval t;
|
559
|
+
struct timeval diff;
|
560
|
+
gettimeofday(&t, NULL);
|
561
|
+
timersub(&t, &_stackprof.last_sample_at, &diff);
|
562
|
+
|
563
|
+
// We don't know when the GC samples were actually marked, so let's
|
564
|
+
// assume that they were marked at a perfectly regular interval.
|
565
|
+
delta_to_first_unrecorded_gc_sample = (1000 * diff.tv_sec + diff.tv_usec) - (_stackprof.unrecorded_gc_samples - 1) * NUM2LONG(_stackprof.interval);
|
566
|
+
if (delta_to_first_unrecorded_gc_sample < 0) {
|
567
|
+
delta_to_first_unrecorded_gc_sample = 0;
|
568
|
+
}
|
569
|
+
}
|
570
|
+
|
571
|
+
|
572
|
+
for (i = 0; i < _stackprof.unrecorded_gc_samples; i++) {
|
573
|
+
int timestamp_delta = i == 0 ? delta_to_first_unrecorded_gc_sample : NUM2LONG(_stackprof.interval);
|
574
|
+
|
575
|
+
if (_stackprof.unrecorded_gc_marking_samples) {
|
576
|
+
_stackprof.frames_buffer[0] = FAKE_FRAME_MARK;
|
577
|
+
_stackprof.lines_buffer[0] = 0;
|
578
|
+
_stackprof.frames_buffer[1] = FAKE_FRAME_GC;
|
579
|
+
_stackprof.lines_buffer[1] = 0;
|
580
|
+
_stackprof.unrecorded_gc_marking_samples--;
|
581
|
+
|
582
|
+
stackprof_record_sample_for_stack(2, timestamp_delta);
|
583
|
+
} else if (_stackprof.unrecorded_gc_sweeping_samples) {
|
584
|
+
_stackprof.frames_buffer[0] = FAKE_FRAME_SWEEP;
|
585
|
+
_stackprof.lines_buffer[0] = 0;
|
586
|
+
_stackprof.frames_buffer[1] = FAKE_FRAME_GC;
|
587
|
+
_stackprof.lines_buffer[1] = 0;
|
588
|
+
|
589
|
+
_stackprof.unrecorded_gc_sweeping_samples--;
|
590
|
+
|
591
|
+
stackprof_record_sample_for_stack(2, timestamp_delta);
|
592
|
+
} else {
|
593
|
+
_stackprof.frames_buffer[0] = FAKE_FRAME_GC;
|
594
|
+
_stackprof.lines_buffer[0] = 0;
|
595
|
+
stackprof_record_sample_for_stack(1, timestamp_delta);
|
596
|
+
}
|
597
|
+
}
|
598
|
+
_stackprof.during_gc += _stackprof.unrecorded_gc_samples;
|
599
|
+
_stackprof.unrecorded_gc_samples = 0;
|
600
|
+
_stackprof.unrecorded_gc_marking_samples = 0;
|
601
|
+
_stackprof.unrecorded_gc_sweeping_samples = 0;
|
602
|
+
}
|
603
|
+
|
604
|
+
static void
|
605
|
+
stackprof_gc_job_handler(void *data)
|
606
|
+
{
|
607
|
+
if (!_stackprof.running) return;
|
608
|
+
|
609
|
+
stackprof_record_gc_samples();
|
610
|
+
}
|
611
|
+
|
612
|
+
static void
|
613
|
+
stackprof_job_handler(void *data)
|
614
|
+
{
|
615
|
+
if (!_stackprof.running) return;
|
616
|
+
|
617
|
+
stackprof_record_sample();
|
618
|
+
}
|
619
|
+
|
620
|
+
static void
|
621
|
+
stackprof_signal_handler(int sig, siginfo_t *sinfo, void *ucontext)
|
622
|
+
{
|
623
|
+
static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
|
624
|
+
|
625
|
+
_stackprof.overall_signals++;
|
626
|
+
|
627
|
+
if (!_stackprof.running) return;
|
628
|
+
if (!ruby_native_thread_p()) return;
|
629
|
+
if (pthread_mutex_trylock(&lock)) return;
|
630
|
+
|
631
|
+
if (!_stackprof.ignore_gc && rb_during_gc()) {
|
632
|
+
VALUE mode = rb_gc_latest_gc_info(sym_state);
|
633
|
+
if (mode == sym_marking) {
|
634
|
+
_stackprof.unrecorded_gc_marking_samples++;
|
635
|
+
} else if (mode == sym_sweeping) {
|
636
|
+
_stackprof.unrecorded_gc_sweeping_samples++;
|
637
|
+
}
|
638
|
+
_stackprof.unrecorded_gc_samples++;
|
639
|
+
rb_postponed_job_register_one(0, stackprof_gc_job_handler, (void*)0);
|
640
|
+
} else {
|
641
|
+
#ifdef USE_POSTPONED_JOB
|
642
|
+
rb_postponed_job_register_one(0, stackprof_job_handler, (void*)0);
|
643
|
+
#else
|
644
|
+
stackprof_job_handler(0);
|
645
|
+
#endif
|
646
|
+
}
|
647
|
+
pthread_mutex_unlock(&lock);
|
648
|
+
}
|
649
|
+
|
650
|
+
static void
|
651
|
+
stackprof_newobj_handler(VALUE tpval, void *data)
|
652
|
+
{
|
653
|
+
_stackprof.overall_signals++;
|
654
|
+
if (RTEST(_stackprof.interval) && _stackprof.overall_signals % NUM2LONG(_stackprof.interval))
|
655
|
+
return;
|
656
|
+
stackprof_job_handler(0);
|
657
|
+
}
|
658
|
+
|
659
|
+
static VALUE
|
660
|
+
stackprof_sample(VALUE self)
|
661
|
+
{
|
662
|
+
if (!_stackprof.running)
|
663
|
+
return Qfalse;
|
664
|
+
|
665
|
+
_stackprof.overall_signals++;
|
666
|
+
stackprof_job_handler(0);
|
667
|
+
return Qtrue;
|
668
|
+
}
|
669
|
+
|
670
|
+
static int
|
671
|
+
frame_mark_i(st_data_t key, st_data_t val, st_data_t arg)
|
672
|
+
{
|
673
|
+
VALUE frame = (VALUE)key;
|
674
|
+
rb_gc_mark(frame);
|
675
|
+
return ST_CONTINUE;
|
676
|
+
}
|
677
|
+
|
678
|
+
static void
|
679
|
+
stackprof_gc_mark(void *data)
|
680
|
+
{
|
681
|
+
if (RTEST(_stackprof.metadata))
|
682
|
+
rb_gc_mark(_stackprof.metadata);
|
683
|
+
|
684
|
+
if (RTEST(_stackprof.out))
|
685
|
+
rb_gc_mark(_stackprof.out);
|
686
|
+
|
687
|
+
if (_stackprof.frames)
|
688
|
+
st_foreach(_stackprof.frames, frame_mark_i, 0);
|
689
|
+
}
|
690
|
+
|
691
|
+
static void
|
692
|
+
stackprof_atfork_prepare(void)
|
693
|
+
{
|
694
|
+
struct itimerval timer;
|
695
|
+
if (_stackprof.running) {
|
696
|
+
if (_stackprof.mode == sym_wall || _stackprof.mode == sym_cpu) {
|
697
|
+
memset(&timer, 0, sizeof(timer));
|
698
|
+
setitimer(_stackprof.mode == sym_wall ? ITIMER_REAL : ITIMER_PROF, &timer, 0);
|
699
|
+
}
|
700
|
+
}
|
701
|
+
}
|
702
|
+
|
703
|
+
static void
|
704
|
+
stackprof_atfork_parent(void)
|
705
|
+
{
|
706
|
+
struct itimerval timer;
|
707
|
+
if (_stackprof.running) {
|
708
|
+
if (_stackprof.mode == sym_wall || _stackprof.mode == sym_cpu) {
|
709
|
+
timer.it_interval.tv_sec = 0;
|
710
|
+
timer.it_interval.tv_usec = NUM2LONG(_stackprof.interval);
|
711
|
+
timer.it_value = timer.it_interval;
|
712
|
+
setitimer(_stackprof.mode == sym_wall ? ITIMER_REAL : ITIMER_PROF, &timer, 0);
|
713
|
+
}
|
714
|
+
}
|
715
|
+
}
|
716
|
+
|
717
|
+
static void
|
718
|
+
stackprof_atfork_child(void)
|
719
|
+
{
|
720
|
+
stackprof_stop(rb_mStackProf);
|
721
|
+
}
|
722
|
+
|
723
|
+
void
|
724
|
+
Init_stackprof(void)
|
725
|
+
{
|
726
|
+
size_t i;
|
727
|
+
#define S(name) sym_##name = ID2SYM(rb_intern(#name));
|
728
|
+
S(object);
|
729
|
+
S(custom);
|
730
|
+
S(wall);
|
731
|
+
S(cpu);
|
732
|
+
S(name);
|
733
|
+
S(file);
|
734
|
+
S(line);
|
735
|
+
S(total_samples);
|
736
|
+
S(gc_samples);
|
737
|
+
S(missed_samples);
|
738
|
+
S(samples);
|
739
|
+
S(edges);
|
740
|
+
S(lines);
|
741
|
+
S(version);
|
742
|
+
S(mode);
|
743
|
+
S(interval);
|
744
|
+
S(raw);
|
745
|
+
S(raw_timestamp_deltas);
|
746
|
+
S(out);
|
747
|
+
S(metadata);
|
748
|
+
S(ignore_gc);
|
749
|
+
S(frames);
|
750
|
+
S(aggregate);
|
751
|
+
S(state);
|
752
|
+
S(marking);
|
753
|
+
S(sweeping);
|
754
|
+
#undef S
|
755
|
+
|
756
|
+
/* Need to run this to warm the symbol table before we call this during GC */
|
757
|
+
rb_gc_latest_gc_info(sym_state);
|
758
|
+
|
759
|
+
gc_hook = Data_Wrap_Struct(rb_cObject, stackprof_gc_mark, NULL, &_stackprof);
|
760
|
+
rb_global_variable(&gc_hook);
|
761
|
+
|
762
|
+
_stackprof.raw_samples = NULL;
|
763
|
+
_stackprof.raw_samples_len = 0;
|
764
|
+
_stackprof.raw_samples_capa = 0;
|
765
|
+
_stackprof.raw_sample_index = 0;
|
766
|
+
|
767
|
+
_stackprof.raw_timestamp_deltas = NULL;
|
768
|
+
_stackprof.raw_timestamp_deltas_len = 0;
|
769
|
+
_stackprof.raw_timestamp_deltas_capa = 0;
|
770
|
+
|
771
|
+
_stackprof.empty_string = rb_str_new_cstr("");
|
772
|
+
rb_global_variable(&_stackprof.empty_string);
|
773
|
+
|
774
|
+
for (i = 0; i < TOTAL_FAKE_FRAMES; i++) {
|
775
|
+
_stackprof.fake_frame_names[i] = rb_str_new_cstr(fake_frame_cstrs[i]);
|
776
|
+
rb_global_variable(&_stackprof.fake_frame_names[i]);
|
777
|
+
}
|
778
|
+
|
779
|
+
rb_mStackProf = rb_define_module("StackProf");
|
780
|
+
rb_define_singleton_method(rb_mStackProf, "running?", stackprof_running_p, 0);
|
781
|
+
rb_define_singleton_method(rb_mStackProf, "run", stackprof_run, -1);
|
782
|
+
rb_define_singleton_method(rb_mStackProf, "start", stackprof_start, -1);
|
783
|
+
rb_define_singleton_method(rb_mStackProf, "stop", stackprof_stop, 0);
|
784
|
+
rb_define_singleton_method(rb_mStackProf, "results", stackprof_results, -1);
|
785
|
+
rb_define_singleton_method(rb_mStackProf, "sample", stackprof_sample, 0);
|
786
|
+
|
787
|
+
pthread_atfork(stackprof_atfork_prepare, stackprof_atfork_parent, stackprof_atfork_child);
|
788
|
+
}
|