stackprof 0.2.11 → 0.2.16
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.gitignore +2 -0
- data/.travis.yml +21 -8
- data/CHANGELOG.md +14 -0
- data/Dockerfile +21 -0
- data/README.md +39 -19
- data/bin/stackprof +14 -4
- data/ext/stackprof/stackprof.c +131 -34
- data/lib/stackprof.rb +4 -0
- data/lib/stackprof/middleware.rb +23 -7
- data/lib/stackprof/report.rb +271 -10
- data/stackprof.gemspec +11 -2
- data/test/test_middleware.rb +13 -7
- data/test/test_stackprof.rb +97 -2
- data/vendor/FlameGraph/flamegraph.pl +751 -85
- metadata +15 -11
- data/Gemfile.lock +0 -27
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 109d5dc07fefb68933ae164c88420aecc662593f9fa96e102a90e7c8d4c605a9
|
4
|
+
data.tar.gz: 7cbd4e6919a160f5e7b680bd0994e9e33097df03c7098ffda7317996a8afb1f6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d4c6894359a809ea8e504eca85506179eb598d2b0db6833b1661e08d982efb78ec0fa526e73ba30a7388c443b5b4eb16a466baa7dd3dcd4e139c0bec7b22ab5d
|
7
|
+
data.tar.gz: 040bc4d3c1ffb1f724bce5ca7db4ddeae7af7ecdc526f893f878049bf42c715983802def5a2e8905f6b1016401a478457a237ef0a76dc742e7070780940e762b
|
data/.gitignore
CHANGED
data/.travis.yml
CHANGED
@@ -1,8 +1,21 @@
|
|
1
|
-
sudo:
|
2
|
-
|
3
|
-
|
4
|
-
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
1
|
+
sudo: required
|
2
|
+
|
3
|
+
services:
|
4
|
+
- docker
|
5
|
+
|
6
|
+
language: general
|
7
|
+
|
8
|
+
env:
|
9
|
+
matrix:
|
10
|
+
- RVM_RUBY_VERSION=2.2
|
11
|
+
- RVM_RUBY_VERSION=2.3
|
12
|
+
- RVM_RUBY_VERSION=2.4
|
13
|
+
- RVM_RUBY_VERSION=2.5
|
14
|
+
- RVM_RUBY_VERSION=2.6
|
15
|
+
- RVM_RUBY_VERSION=ruby-head
|
16
|
+
|
17
|
+
before_install:
|
18
|
+
- sudo docker build -t stackprof-$RVM_RUBY_VERSION --build-arg=RVM_RUBY_VERSION=$RVM_RUBY_VERSION .
|
19
|
+
|
20
|
+
script:
|
21
|
+
- sudo docker run --name stackprof-$RVM_RUBY_VERSION stackprof-$RVM_RUBY_VERSION
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
# 0.2.16
|
2
|
+
|
3
|
+
* [flamegraph.pl] Update to latest version
|
4
|
+
* Add option to ignore GC frames
|
5
|
+
* Handle source code not being available
|
6
|
+
* Freeze strings in report.rb
|
7
|
+
* Use a cursor object instead of array slicing
|
8
|
+
* ArgumentError on interval <1 or >1m
|
9
|
+
* fix variable name.
|
10
|
+
* Fix default mode comment in readme
|
11
|
+
|
12
|
+
# 0.2.15
|
13
|
+
|
14
|
+
* Mark the metadata object before the GC is invoked to prevent it from being garbage collected.
|
data/Dockerfile
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
FROM ubuntu:16.04
|
2
|
+
ARG DEBIAN_FRONTEND=noninteractive
|
3
|
+
RUN apt-get update -q && \
|
4
|
+
apt-get install -qy \
|
5
|
+
curl ca-certificates gnupg2 dirmngr build-essential \
|
6
|
+
gawk git autoconf automake pkg-config \
|
7
|
+
bison libffi-dev libgdbm-dev libncurses5-dev libsqlite3-dev libtool \
|
8
|
+
libyaml-dev sqlite3 zlib1g-dev libgmp-dev libreadline-dev libssl-dev \
|
9
|
+
ruby --no-install-recommends && \
|
10
|
+
apt-get clean
|
11
|
+
|
12
|
+
RUN gpg2 --recv-keys 409B6B1796C275462A1703113804BB82D39DC0E3 7D2BAF1CF37B13E2069D6956105BD0E739499BDB
|
13
|
+
RUN curl -sSL https://get.rvm.io | bash -s
|
14
|
+
ARG RVM_RUBY_VERSION=ruby-head
|
15
|
+
RUN /bin/bash -l -c "echo $RVM_RUBY_VERSION"
|
16
|
+
RUN /bin/bash -l -c ". /etc/profile.d/rvm.sh && rvm install $RVM_RUBY_VERSION --binary || rvm install $RVM_RUBY_VERSION"
|
17
|
+
ADD . /stackprof/
|
18
|
+
WORKDIR /stackprof/
|
19
|
+
RUN /bin/bash -l -c ". /etc/profile.d/rvm.sh && gem install bundler:1.16.0"
|
20
|
+
RUN /bin/bash -l -c ". /etc/profile.d/rvm.sh && bundle install"
|
21
|
+
CMD /bin/bash -l -c ". /etc/profile.d/rvm.sh && bundle exec rake"
|
data/README.md
CHANGED
@@ -1,13 +1,17 @@
|
|
1
|
-
|
1
|
+
# Stackprof
|
2
2
|
|
3
|
-
|
3
|
+
A sampling call-stack profiler for Ruby.
|
4
4
|
|
5
|
-
|
6
|
-
and written as a replacement for [perftools.rb](https://github.com/tmm1/perftools.rb)
|
5
|
+
Inspired heavily by [gperftools](https://code.google.com/p/gperftools/), and written as a replacement for [perftools.rb](https://github.com/tmm1/perftools.rb).
|
7
6
|
|
8
|
-
|
7
|
+
## Requirements
|
9
8
|
|
10
|
-
|
9
|
+
* Ruby 2.2+
|
10
|
+
* Linux-based OS
|
11
|
+
|
12
|
+
## Getting Started
|
13
|
+
|
14
|
+
### Install
|
11
15
|
|
12
16
|
In your Gemfile add:
|
13
17
|
|
@@ -18,7 +22,7 @@ gem 'stackprof'
|
|
18
22
|
Then run `$ bundle install`. Alternatively you can run `$ gem install stackprof`.
|
19
23
|
|
20
24
|
|
21
|
-
|
25
|
+
### Run
|
22
26
|
|
23
27
|
in ruby:
|
24
28
|
|
@@ -93,12 +97,20 @@ The `--flamegraph-viewer` command will output the exact shell command you need t
|
|
93
97
|
|
94
98
|
![](http://i.imgur.com/EwndrgD.png)
|
95
99
|
|
96
|
-
|
100
|
+
Alternatively, you can generate a flamegraph that uses [d3-flame-graph](https://github.com/spiermar/d3-flame-graph):
|
101
|
+
|
102
|
+
```
|
103
|
+
$ stackprof --d3-flamegraph tmp/stackprof-cpu-myapp.dump > flamegraph.html
|
104
|
+
```
|
105
|
+
|
106
|
+
And just open the result by your browser.
|
107
|
+
|
108
|
+
## Sampling
|
97
109
|
|
98
110
|
four sampling modes are supported:
|
99
111
|
|
100
|
-
- :wall (using `ITIMER_REAL` and `SIGALRM`)
|
101
|
-
- :cpu (using `ITIMER_PROF` and `SIGPROF`)
|
112
|
+
- :wall (using `ITIMER_REAL` and `SIGALRM`) [default mode]
|
113
|
+
- :cpu (using `ITIMER_PROF` and `SIGPROF`)
|
102
114
|
- :object (using `RUBY_INTERNAL_EVENT_NEWOBJ`)
|
103
115
|
- :custom (user-defined via `StackProf.sample`)
|
104
116
|
|
@@ -129,6 +141,12 @@ StackProf.run(mode: :object, out: 'tmp/stackprof.dump', interval: 1) do
|
|
129
141
|
end
|
130
142
|
```
|
131
143
|
|
144
|
+
by default, samples taken during garbage collection will show as garbage collection frames
|
145
|
+
including both mark and sweep phases. for longer traces, these can leave gaps in a flamegraph
|
146
|
+
that are hard to follow and can be disabled by setting the `ignore_gc` option to true.
|
147
|
+
garbage collection time will still be present in the profile but not explicitly marked with
|
148
|
+
its own frame.
|
149
|
+
|
132
150
|
samples are taken using a combination of three new C-APIs in ruby 2.1:
|
133
151
|
|
134
152
|
- signal handlers enqueue a sampling job using `rb_postponed_job_register_one`.
|
@@ -142,7 +160,7 @@ samples are taken using a combination of three new C-APIs in ruby 2.1:
|
|
142
160
|
- in allocation mode, samples are taken via `rb_tracepoint_new(RUBY_INTERNAL_EVENT_NEWOBJ)`,
|
143
161
|
which provides a notification every time the VM allocates a new object.
|
144
162
|
|
145
|
-
|
163
|
+
## Aggregation
|
146
164
|
|
147
165
|
each sample consists of N stack frames, where a frame looks something like `MyClass#method` or `block in MySingleton.method`.
|
148
166
|
for each of these frames in the sample, the profiler collects a few pieces of metadata:
|
@@ -175,14 +193,14 @@ this technique builds up an incremental callgraph from the samples. on any given
|
|
175
193
|
the sum of the outbound edge weights is equal to total samples collected on that frame
|
176
194
|
(`frame.total_samples == frame.edges.values.sum`).
|
177
195
|
|
178
|
-
|
196
|
+
## Reporting
|
179
197
|
|
180
198
|
multiple reporting modes are supported:
|
181
199
|
- text
|
182
200
|
- dotgraph
|
183
201
|
- source annotation
|
184
202
|
|
185
|
-
|
203
|
+
### `StackProf::Report.new(data).print_text`
|
186
204
|
|
187
205
|
```
|
188
206
|
TOTAL (pct) SAMPLES (pct) FRAME
|
@@ -197,7 +215,7 @@ multiple reporting modes are supported:
|
|
197
215
|
188 (100.0%) 0 (0.0%) <main>
|
198
216
|
```
|
199
217
|
|
200
|
-
|
218
|
+
### `StackProf::Report.new(data).print_graphviz`
|
201
219
|
|
202
220
|
![](http://cl.ly/image/2t3l2q0l0B0A/content)
|
203
221
|
|
@@ -223,7 +241,7 @@ digraph profile {
|
|
223
241
|
}
|
224
242
|
```
|
225
243
|
|
226
|
-
|
244
|
+
### `StackProf::Report.new(data).print_method(/pow|newobj|math/)`
|
227
245
|
|
228
246
|
```
|
229
247
|
A#pow (/Users/tmm1/code/stackprof/sample.rb:11)
|
@@ -245,7 +263,7 @@ block in A#math (/Users/tmm1/code/stackprof/sample.rb:21)
|
|
245
263
|
| 23 | end
|
246
264
|
```
|
247
265
|
|
248
|
-
|
266
|
+
## Usage
|
249
267
|
|
250
268
|
the profiler is compiled as a C-extension and exposes a simple api: `StackProf.run(mode: [:cpu|:wall|:object])`.
|
251
269
|
the `run` method takes a block of code and returns a profile as a simple hash.
|
@@ -295,7 +313,7 @@ above, `A#pow` was involved in 91 samples, and in all cases it was at the top of
|
|
295
313
|
divided up between its callee edges. all 91 calls to `A#pow` came from `A#initialize`, as seen by the edge numbered
|
296
314
|
`70346498324780`.
|
297
315
|
|
298
|
-
|
316
|
+
## Advanced usage
|
299
317
|
|
300
318
|
the profiler can be started and stopped manually. results are accumulated until retrieval, across
|
301
319
|
multiple start/stop invocations.
|
@@ -307,7 +325,7 @@ StackProf.stop
|
|
307
325
|
StackProf.results('/tmp/some.file')
|
308
326
|
```
|
309
327
|
|
310
|
-
|
328
|
+
## All options
|
311
329
|
|
312
330
|
`StackProf.run` accepts an options hash. Currently, the following options are recognized:
|
313
331
|
|
@@ -316,11 +334,13 @@ Option | Meaning
|
|
316
334
|
`mode` | mode of sampling: `:cpu`, `:wall`, `:object`, or `:custom` [c.f.](#sampling)
|
317
335
|
`out` | the target file, which will be overwritten
|
318
336
|
`interval` | mode-relative sample rate [c.f.](#sampling)
|
337
|
+
`ignore_gc` | Ignore garbage collection frames
|
319
338
|
`aggregate` | defaults: `true` - if `false` disables [aggregation](#aggregation)
|
320
339
|
`raw` | defaults `false` - if `true` collects the extra data required by the `--flamegraph` and `--stackcollapse` report types
|
340
|
+
`metadata` | defaults to `{}`. Must be a `Hash`. metadata associated with this profile
|
321
341
|
`save_every`| (rack middleware only) write the target file after this many requests
|
322
342
|
|
323
|
-
|
343
|
+
## Todo
|
324
344
|
|
325
345
|
* file/iseq blacklist
|
326
346
|
* restore signal handlers on stop
|
data/bin/stackprof
CHANGED
@@ -8,6 +8,7 @@ parser = OptionParser.new(ARGV) do |o|
|
|
8
8
|
o.banner = "Usage: stackprof [file.dump]+ [--text|--method=NAME|--callgrind|--graphviz]"
|
9
9
|
|
10
10
|
o.on('--text', 'Text summary per method (default)'){ options[:format] = :text }
|
11
|
+
o.on('--json', 'JSON output (use with web viewers)'){ options[:format] = :json }
|
11
12
|
o.on('--files', 'List of files'){ |f| options[:format] = :files }
|
12
13
|
o.on('--limit [num]', Integer, 'Limit --text, --files, or --graphviz output to N entries'){ |n| options[:limit] = n }
|
13
14
|
o.on('--sort-total', "Sort --text or --files output on total samples\n\n"){ options[:sort] = true }
|
@@ -18,11 +19,14 @@ parser = OptionParser.new(ARGV) do |o|
|
|
18
19
|
o.on('--graphviz', "Graphviz output (use with dot)"){ options[:format] = :graphviz }
|
19
20
|
o.on('--node-fraction [frac]', OptionParser::DecimalNumeric, 'Drop nodes representing less than [frac] fraction of samples'){ |n| options[:node_fraction] = n }
|
20
21
|
o.on('--stackcollapse', 'stackcollapse.pl compatible output (use with stackprof-flamegraph.pl)'){ options[:format] = :stackcollapse }
|
21
|
-
o.on('--flamegraph', "timeline-flamegraph output (js)"){ options[:format] = :
|
22
|
-
o.on('--flamegraph
|
22
|
+
o.on('--timeline-flamegraph', "timeline-flamegraph output (js)"){ options[:format] = :timeline_flamegraph }
|
23
|
+
o.on('--alphabetical-flamegraph', "alphabetical-flamegraph output (js)"){ options[:format] = :alphabetical_flamegraph }
|
24
|
+
o.on('--flamegraph', "alias to --timeline-flamegraph"){ options[:format] = :timeline_flamegraph }
|
25
|
+
o.on('--flamegraph-viewer [f.js]', String, "open html viewer for flamegraph output"){ |file|
|
23
26
|
puts("open file://#{File.expand_path('../../lib/stackprof/flamegraph/viewer.html', __FILE__)}?data=#{File.expand_path(file)}")
|
24
27
|
exit
|
25
28
|
}
|
29
|
+
o.on('--d3-flamegraph', "flamegraph output (html using d3-flame-graph)\n\n"){ options[:format] = :d3_flamegraph }
|
26
30
|
o.on('--select-files []', String, 'Show results of matching files'){ |path| (options[:select_files] ||= []) << File.expand_path(path) }
|
27
31
|
o.on('--reject-files []', String, 'Exclude results of matching files'){ |path| (options[:reject_files] ||= []) << File.expand_path(path) }
|
28
32
|
o.on('--select-names []', Regexp, 'Show results of matching method names'){ |regexp| (options[:select_names] ||= []) << regexp }
|
@@ -62,6 +66,8 @@ options.delete(:limit) if options[:limit] == 0
|
|
62
66
|
case options[:format]
|
63
67
|
when :text
|
64
68
|
report.print_text(options[:sort], options[:limit], options[:select_files], options[:reject_files], options[:select_names], options[:reject_names])
|
69
|
+
when :json
|
70
|
+
report.print_json
|
65
71
|
when :debug
|
66
72
|
report.print_debug
|
67
73
|
when :dump
|
@@ -72,8 +78,12 @@ when :graphviz
|
|
72
78
|
report.print_graphviz(options)
|
73
79
|
when :stackcollapse
|
74
80
|
report.print_stackcollapse
|
75
|
-
when :
|
76
|
-
report.
|
81
|
+
when :timeline_flamegraph
|
82
|
+
report.print_timeline_flamegraph
|
83
|
+
when :alphabetical_flamegraph
|
84
|
+
report.print_alphabetical_flamegraph
|
85
|
+
when :d3_flamegraph
|
86
|
+
report.print_d3_flamegraph
|
77
87
|
when :method
|
78
88
|
options[:walk] ? report.walk_method(options[:filter]) : report.print_method(options[:filter])
|
79
89
|
when :file
|
data/ext/stackprof/stackprof.c
CHANGED
@@ -16,11 +16,24 @@
|
|
16
16
|
#include <pthread.h>
|
17
17
|
|
18
18
|
#define BUF_SIZE 2048
|
19
|
+
#define MICROSECONDS_IN_SECOND 1000000
|
20
|
+
|
21
|
+
#define FAKE_FRAME_GC INT2FIX(0)
|
22
|
+
#define FAKE_FRAME_MARK INT2FIX(1)
|
23
|
+
#define FAKE_FRAME_SWEEP INT2FIX(2)
|
24
|
+
|
25
|
+
static const char *fake_frame_cstrs[] = {
|
26
|
+
"(garbage collection)",
|
27
|
+
"(marking)",
|
28
|
+
"(sweeping)",
|
29
|
+
};
|
30
|
+
|
31
|
+
#define TOTAL_FAKE_FRAMES (sizeof(fake_frame_cstrs) / sizeof(char *))
|
19
32
|
|
20
33
|
typedef struct {
|
21
34
|
size_t total_samples;
|
22
35
|
size_t caller_samples;
|
23
|
-
|
36
|
+
size_t seen_at_sample_number;
|
24
37
|
st_table *edges;
|
25
38
|
st_table *lines;
|
26
39
|
} frame_data_t;
|
@@ -33,6 +46,8 @@ static struct {
|
|
33
46
|
VALUE mode;
|
34
47
|
VALUE interval;
|
35
48
|
VALUE out;
|
49
|
+
VALUE metadata;
|
50
|
+
int ignore_gc;
|
36
51
|
|
37
52
|
VALUE *raw_samples;
|
38
53
|
size_t raw_samples_len;
|
@@ -48,10 +63,11 @@ static struct {
|
|
48
63
|
size_t overall_samples;
|
49
64
|
size_t during_gc;
|
50
65
|
size_t unrecorded_gc_samples;
|
66
|
+
size_t unrecorded_gc_marking_samples;
|
67
|
+
size_t unrecorded_gc_sweeping_samples;
|
51
68
|
st_table *frames;
|
52
69
|
|
53
|
-
VALUE
|
54
|
-
VALUE fake_gc_frame_name;
|
70
|
+
VALUE fake_frame_names[TOTAL_FAKE_FRAMES];
|
55
71
|
VALUE empty_string;
|
56
72
|
VALUE frames_buffer[BUF_SIZE];
|
57
73
|
int lines_buffer[BUF_SIZE];
|
@@ -59,7 +75,8 @@ static struct {
|
|
59
75
|
|
60
76
|
static VALUE sym_object, sym_wall, sym_cpu, sym_custom, sym_name, sym_file, sym_line;
|
61
77
|
static VALUE sym_samples, sym_total_samples, sym_missed_samples, sym_edges, sym_lines;
|
62
|
-
static VALUE sym_version, sym_mode, sym_interval, sym_raw,
|
78
|
+
static VALUE sym_version, sym_mode, sym_interval, sym_raw, sym_metadata, sym_frames, sym_ignore_gc, sym_out;
|
79
|
+
static VALUE sym_aggregate, sym_raw_timestamp_deltas, sym_state, sym_marking, sym_sweeping;
|
63
80
|
static VALUE sym_gc_samples, objtracer;
|
64
81
|
static VALUE gc_hook;
|
65
82
|
static VALUE rb_mStackProf;
|
@@ -72,7 +89,8 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
|
|
72
89
|
{
|
73
90
|
struct sigaction sa;
|
74
91
|
struct itimerval timer;
|
75
|
-
VALUE opts = Qnil, mode = Qnil, interval = Qnil, out = Qfalse;
|
92
|
+
VALUE opts = Qnil, mode = Qnil, interval = Qnil, metadata = rb_hash_new(), out = Qfalse;
|
93
|
+
int ignore_gc = 0;
|
76
94
|
int raw = 0, aggregate = 1;
|
77
95
|
|
78
96
|
if (_stackprof.running)
|
@@ -84,6 +102,17 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
|
|
84
102
|
mode = rb_hash_aref(opts, sym_mode);
|
85
103
|
interval = rb_hash_aref(opts, sym_interval);
|
86
104
|
out = rb_hash_aref(opts, sym_out);
|
105
|
+
if (RTEST(rb_hash_aref(opts, sym_ignore_gc))) {
|
106
|
+
ignore_gc = 1;
|
107
|
+
}
|
108
|
+
|
109
|
+
VALUE metadata_val = rb_hash_aref(opts, sym_metadata);
|
110
|
+
if (RTEST(metadata_val)) {
|
111
|
+
if (!RB_TYPE_P(metadata_val, T_HASH))
|
112
|
+
rb_raise(rb_eArgError, "metadata should be a hash");
|
113
|
+
|
114
|
+
metadata = metadata_val;
|
115
|
+
}
|
87
116
|
|
88
117
|
if (RTEST(rb_hash_aref(opts, sym_raw)))
|
89
118
|
raw = 1;
|
@@ -92,6 +121,10 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
|
|
92
121
|
}
|
93
122
|
if (!RTEST(mode)) mode = sym_wall;
|
94
123
|
|
124
|
+
if (!NIL_P(interval) && (NUM2INT(interval) < 1 || NUM2INT(interval) >= MICROSECONDS_IN_SECOND)) {
|
125
|
+
rb_raise(rb_eArgError, "interval is a number of microseconds between 1 and 1 million");
|
126
|
+
}
|
127
|
+
|
95
128
|
if (!_stackprof.frames) {
|
96
129
|
_stackprof.frames = st_init_numtable();
|
97
130
|
_stackprof.overall_signals = 0;
|
@@ -128,6 +161,8 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
|
|
128
161
|
_stackprof.aggregate = aggregate;
|
129
162
|
_stackprof.mode = mode;
|
130
163
|
_stackprof.interval = interval;
|
164
|
+
_stackprof.ignore_gc = ignore_gc;
|
165
|
+
_stackprof.metadata = metadata;
|
131
166
|
_stackprof.out = out;
|
132
167
|
|
133
168
|
if (raw) {
|
@@ -201,8 +236,8 @@ frame_i(st_data_t key, st_data_t val, st_data_t arg)
|
|
201
236
|
|
202
237
|
rb_hash_aset(results, rb_obj_id(frame), details);
|
203
238
|
|
204
|
-
if (frame
|
205
|
-
name = _stackprof.
|
239
|
+
if (FIXNUM_P(frame)) {
|
240
|
+
name = _stackprof.fake_frame_names[FIX2INT(frame)];
|
206
241
|
file = _stackprof.empty_string;
|
207
242
|
line = INT2FIX(0);
|
208
243
|
} else {
|
@@ -258,6 +293,9 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
|
|
258
293
|
rb_hash_aset(results, sym_samples, SIZET2NUM(_stackprof.overall_samples));
|
259
294
|
rb_hash_aset(results, sym_gc_samples, SIZET2NUM(_stackprof.during_gc));
|
260
295
|
rb_hash_aset(results, sym_missed_samples, SIZET2NUM(_stackprof.overall_signals - _stackprof.overall_samples));
|
296
|
+
rb_hash_aset(results, sym_metadata, _stackprof.metadata);
|
297
|
+
|
298
|
+
_stackprof.metadata = Qnil;
|
261
299
|
|
262
300
|
frames = rb_hash_new();
|
263
301
|
rb_hash_aset(results, sym_frames, frames);
|
@@ -268,6 +306,7 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
|
|
268
306
|
|
269
307
|
if (_stackprof.raw && _stackprof.raw_samples_len) {
|
270
308
|
size_t len, n, o;
|
309
|
+
VALUE raw_timestamp_deltas;
|
271
310
|
VALUE raw_samples = rb_ary_new_capa(_stackprof.raw_samples_len);
|
272
311
|
|
273
312
|
for (n = 0; n < _stackprof.raw_samples_len; n++) {
|
@@ -287,7 +326,7 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
|
|
287
326
|
|
288
327
|
rb_hash_aset(results, sym_raw, raw_samples);
|
289
328
|
|
290
|
-
|
329
|
+
raw_timestamp_deltas = rb_ary_new_capa(_stackprof.raw_timestamp_deltas_len);
|
291
330
|
|
292
331
|
for (n = 0; n < _stackprof.raw_timestamp_deltas_len; n++) {
|
293
332
|
rb_ary_push(raw_timestamp_deltas, INT2FIX(_stackprof.raw_timestamp_deltas[n]));
|
@@ -308,11 +347,12 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
|
|
308
347
|
|
309
348
|
if (RTEST(_stackprof.out)) {
|
310
349
|
VALUE file;
|
311
|
-
if (
|
312
|
-
file = rb_file_open_str(_stackprof.out, "w");
|
313
|
-
} else {
|
350
|
+
if (rb_respond_to(_stackprof.out, rb_intern("to_io"))) {
|
314
351
|
file = rb_io_check_io(_stackprof.out);
|
352
|
+
} else {
|
353
|
+
file = rb_file_open_str(_stackprof.out, "w");
|
315
354
|
}
|
355
|
+
|
316
356
|
rb_marshal_dump(results, file);
|
317
357
|
rb_io_flush(file);
|
318
358
|
_stackprof.out = Qnil;
|
@@ -386,17 +426,29 @@ stackprof_record_sample_for_stack(int num, int timestamp_delta)
|
|
386
426
|
if (_stackprof.raw) {
|
387
427
|
int found = 0;
|
388
428
|
|
429
|
+
/* If there's no sample buffer allocated, then allocate one. The buffer
|
430
|
+
* format is the number of frames (num), then the list of frames (from
|
431
|
+
* `_stackprof.raw_samples`), followed by the number of times this
|
432
|
+
* particular stack has been seen in a row. Each "new" stack is added
|
433
|
+
* to the end of the buffer, but if the previous stack is the same as
|
434
|
+
* the current stack, the counter will be incremented. */
|
389
435
|
if (!_stackprof.raw_samples) {
|
390
436
|
_stackprof.raw_samples_capa = num * 100;
|
391
437
|
_stackprof.raw_samples = malloc(sizeof(VALUE) * _stackprof.raw_samples_capa);
|
392
438
|
}
|
393
439
|
|
440
|
+
/* If we can't fit all the samples in the buffer, double the buffer size. */
|
394
441
|
while (_stackprof.raw_samples_capa <= _stackprof.raw_samples_len + (num + 2)) {
|
395
442
|
_stackprof.raw_samples_capa *= 2;
|
396
443
|
_stackprof.raw_samples = realloc(_stackprof.raw_samples, sizeof(VALUE) * _stackprof.raw_samples_capa);
|
397
444
|
}
|
398
445
|
|
446
|
+
/* If we've seen this stack before in the last sample, then increment the "seen" count. */
|
399
447
|
if (_stackprof.raw_samples_len > 0 && _stackprof.raw_samples[_stackprof.raw_sample_index] == (VALUE)num) {
|
448
|
+
/* The number of samples could have been the same, but the stack
|
449
|
+
* might be different, so we need to check the stack here. Stacks
|
450
|
+
* in the raw buffer are stored in the opposite direction of stacks
|
451
|
+
* in the frames buffer that came from Ruby. */
|
400
452
|
for (i = num-1, n = 0; i >= 0; i--, n++) {
|
401
453
|
VALUE frame = _stackprof.frames_buffer[i];
|
402
454
|
if (_stackprof.raw_samples[_stackprof.raw_sample_index + 1 + n] != frame)
|
@@ -408,7 +460,11 @@ stackprof_record_sample_for_stack(int num, int timestamp_delta)
|
|
408
460
|
}
|
409
461
|
}
|
410
462
|
|
463
|
+
/* If we haven't seen the stack, then add it to the buffer along with
|
464
|
+
* the length of the stack and a 1 for the "seen" count */
|
411
465
|
if (!found) {
|
466
|
+
/* Bump the `raw_sample_index` up so that the next iteration can
|
467
|
+
* find the previously recorded stack size. */
|
412
468
|
_stackprof.raw_sample_index = _stackprof.raw_samples_len;
|
413
469
|
_stackprof.raw_samples[_stackprof.raw_samples_len++] = (VALUE)num;
|
414
470
|
for (i = num-1; i >= 0; i--) {
|
@@ -418,33 +474,32 @@ stackprof_record_sample_for_stack(int num, int timestamp_delta)
|
|
418
474
|
_stackprof.raw_samples[_stackprof.raw_samples_len++] = (VALUE)1;
|
419
475
|
}
|
420
476
|
|
477
|
+
/* If there's no timestamp delta buffer, allocate one */
|
421
478
|
if (!_stackprof.raw_timestamp_deltas) {
|
422
479
|
_stackprof.raw_timestamp_deltas_capa = 100;
|
423
480
|
_stackprof.raw_timestamp_deltas = malloc(sizeof(int) * _stackprof.raw_timestamp_deltas_capa);
|
424
481
|
_stackprof.raw_timestamp_deltas_len = 0;
|
425
482
|
}
|
426
483
|
|
484
|
+
/* Double the buffer size if it's too small */
|
427
485
|
while (_stackprof.raw_timestamp_deltas_capa <= _stackprof.raw_timestamp_deltas_len + 1) {
|
428
486
|
_stackprof.raw_timestamp_deltas_capa *= 2;
|
429
487
|
_stackprof.raw_timestamp_deltas = realloc(_stackprof.raw_timestamp_deltas, sizeof(int) * _stackprof.raw_timestamp_deltas_capa);
|
430
488
|
}
|
431
489
|
|
490
|
+
/* Store the time delta (which is the amount of time between samples) */
|
432
491
|
_stackprof.raw_timestamp_deltas[_stackprof.raw_timestamp_deltas_len++] = timestamp_delta;
|
433
492
|
}
|
434
493
|
|
435
|
-
for (i = 0; i < num; i++) {
|
436
|
-
VALUE frame = _stackprof.frames_buffer[i];
|
437
|
-
sample_for(frame)->already_accounted_in_total = 0;
|
438
|
-
}
|
439
|
-
|
440
494
|
for (i = 0; i < num; i++) {
|
441
495
|
int line = _stackprof.lines_buffer[i];
|
442
496
|
VALUE frame = _stackprof.frames_buffer[i];
|
443
497
|
frame_data_t *frame_data = sample_for(frame);
|
444
498
|
|
445
|
-
if (
|
499
|
+
if (frame_data->seen_at_sample_number != _stackprof.overall_samples) {
|
446
500
|
frame_data->total_samples++;
|
447
|
-
|
501
|
+
}
|
502
|
+
frame_data->seen_at_sample_number = _stackprof.overall_samples;
|
448
503
|
|
449
504
|
if (i == 0) {
|
450
505
|
frame_data->caller_samples++;
|
@@ -455,10 +510,10 @@ stackprof_record_sample_for_stack(int num, int timestamp_delta)
|
|
455
510
|
}
|
456
511
|
|
457
512
|
if (_stackprof.aggregate && line > 0) {
|
458
|
-
if (!frame_data->lines)
|
459
|
-
frame_data->lines = st_init_numtable();
|
460
513
|
size_t half = (size_t)1<<(8*SIZEOF_SIZE_T/2);
|
461
514
|
size_t increment = i == 0 ? half + 1 : half;
|
515
|
+
if (!frame_data->lines)
|
516
|
+
frame_data->lines = st_init_numtable();
|
462
517
|
st_numtable_increment(frame_data->lines, (st_data_t)line, increment);
|
463
518
|
}
|
464
519
|
|
@@ -474,14 +529,15 @@ void
|
|
474
529
|
stackprof_record_sample()
|
475
530
|
{
|
476
531
|
int timestamp_delta = 0;
|
532
|
+
int num;
|
477
533
|
if (_stackprof.raw) {
|
478
534
|
struct timeval t;
|
479
|
-
gettimeofday(&t, NULL);
|
480
535
|
struct timeval diff;
|
536
|
+
gettimeofday(&t, NULL);
|
481
537
|
timersub(&t, &_stackprof.last_sample_at, &diff);
|
482
538
|
timestamp_delta = (1000 * diff.tv_sec) + diff.tv_usec;
|
483
539
|
}
|
484
|
-
|
540
|
+
num = rb_profile_frames(0, sizeof(_stackprof.frames_buffer) / sizeof(VALUE), _stackprof.frames_buffer, _stackprof.lines_buffer);
|
485
541
|
stackprof_record_sample_for_stack(num, timestamp_delta);
|
486
542
|
}
|
487
543
|
|
@@ -489,31 +545,52 @@ void
|
|
489
545
|
stackprof_record_gc_samples()
|
490
546
|
{
|
491
547
|
int delta_to_first_unrecorded_gc_sample = 0;
|
548
|
+
int i;
|
492
549
|
if (_stackprof.raw) {
|
493
550
|
struct timeval t;
|
494
|
-
gettimeofday(&t, NULL);
|
495
551
|
struct timeval diff;
|
552
|
+
gettimeofday(&t, NULL);
|
496
553
|
timersub(&t, &_stackprof.last_sample_at, &diff);
|
497
554
|
|
498
555
|
// We don't know when the GC samples were actually marked, so let's
|
499
556
|
// assume that they were marked at a perfectly regular interval.
|
500
|
-
delta_to_first_unrecorded_gc_sample = (1000 * diff.tv_sec + diff.tv_usec) - (_stackprof.unrecorded_gc_samples - 1) * _stackprof.interval;
|
557
|
+
delta_to_first_unrecorded_gc_sample = (1000 * diff.tv_sec + diff.tv_usec) - (_stackprof.unrecorded_gc_samples - 1) * NUM2LONG(_stackprof.interval);
|
501
558
|
if (delta_to_first_unrecorded_gc_sample < 0) {
|
502
559
|
delta_to_first_unrecorded_gc_sample = 0;
|
503
560
|
}
|
504
561
|
}
|
505
562
|
|
506
|
-
int i;
|
507
|
-
|
508
|
-
_stackprof.frames_buffer[0] = _stackprof.fake_gc_frame;
|
509
|
-
_stackprof.lines_buffer[0] = 0;
|
510
563
|
|
511
564
|
for (i = 0; i < _stackprof.unrecorded_gc_samples; i++) {
|
512
|
-
int timestamp_delta = i == 0 ? delta_to_first_unrecorded_gc_sample : _stackprof.interval;
|
513
|
-
|
565
|
+
int timestamp_delta = i == 0 ? delta_to_first_unrecorded_gc_sample : NUM2LONG(_stackprof.interval);
|
566
|
+
|
567
|
+
if (_stackprof.unrecorded_gc_marking_samples) {
|
568
|
+
_stackprof.frames_buffer[0] = FAKE_FRAME_MARK;
|
569
|
+
_stackprof.lines_buffer[0] = 0;
|
570
|
+
_stackprof.frames_buffer[1] = FAKE_FRAME_GC;
|
571
|
+
_stackprof.lines_buffer[1] = 0;
|
572
|
+
_stackprof.unrecorded_gc_marking_samples--;
|
573
|
+
|
574
|
+
stackprof_record_sample_for_stack(2, timestamp_delta);
|
575
|
+
} else if (_stackprof.unrecorded_gc_sweeping_samples) {
|
576
|
+
_stackprof.frames_buffer[0] = FAKE_FRAME_SWEEP;
|
577
|
+
_stackprof.lines_buffer[0] = 0;
|
578
|
+
_stackprof.frames_buffer[1] = FAKE_FRAME_GC;
|
579
|
+
_stackprof.lines_buffer[1] = 0;
|
580
|
+
|
581
|
+
_stackprof.unrecorded_gc_sweeping_samples--;
|
582
|
+
|
583
|
+
stackprof_record_sample_for_stack(2, timestamp_delta);
|
584
|
+
} else {
|
585
|
+
_stackprof.frames_buffer[0] = FAKE_FRAME_GC;
|
586
|
+
_stackprof.lines_buffer[0] = 0;
|
587
|
+
stackprof_record_sample_for_stack(1, timestamp_delta);
|
588
|
+
}
|
514
589
|
}
|
515
590
|
_stackprof.during_gc += _stackprof.unrecorded_gc_samples;
|
516
591
|
_stackprof.unrecorded_gc_samples = 0;
|
592
|
+
_stackprof.unrecorded_gc_marking_samples = 0;
|
593
|
+
_stackprof.unrecorded_gc_sweeping_samples = 0;
|
517
594
|
}
|
518
595
|
|
519
596
|
static void
|
@@ -544,7 +621,13 @@ static void
|
|
544
621
|
stackprof_signal_handler(int sig, siginfo_t *sinfo, void *ucontext)
|
545
622
|
{
|
546
623
|
_stackprof.overall_signals++;
|
547
|
-
if (rb_during_gc()) {
|
624
|
+
if (!_stackprof.ignore_gc && rb_during_gc()) {
|
625
|
+
VALUE mode = rb_gc_latest_gc_info(sym_state);
|
626
|
+
if (mode == sym_marking) {
|
627
|
+
_stackprof.unrecorded_gc_marking_samples++;
|
628
|
+
} else if (mode == sym_sweeping) {
|
629
|
+
_stackprof.unrecorded_gc_sweeping_samples++;
|
630
|
+
}
|
548
631
|
_stackprof.unrecorded_gc_samples++;
|
549
632
|
rb_postponed_job_register_one(0, stackprof_gc_job_handler, (void*)0);
|
550
633
|
} else {
|
@@ -583,6 +666,9 @@ frame_mark_i(st_data_t key, st_data_t val, st_data_t arg)
|
|
583
666
|
static void
|
584
667
|
stackprof_gc_mark(void *data)
|
585
668
|
{
|
669
|
+
if (RTEST(_stackprof.metadata))
|
670
|
+
rb_gc_mark(_stackprof.metadata);
|
671
|
+
|
586
672
|
if (RTEST(_stackprof.out))
|
587
673
|
rb_gc_mark(_stackprof.out);
|
588
674
|
|
@@ -625,6 +711,7 @@ stackprof_atfork_child(void)
|
|
625
711
|
void
|
626
712
|
Init_stackprof(void)
|
627
713
|
{
|
714
|
+
size_t i;
|
628
715
|
#define S(name) sym_##name = ID2SYM(rb_intern(#name));
|
629
716
|
S(object);
|
630
717
|
S(custom);
|
@@ -645,10 +732,18 @@ Init_stackprof(void)
|
|
645
732
|
S(raw);
|
646
733
|
S(raw_timestamp_deltas);
|
647
734
|
S(out);
|
735
|
+
S(metadata);
|
736
|
+
S(ignore_gc);
|
648
737
|
S(frames);
|
649
738
|
S(aggregate);
|
739
|
+
S(state);
|
740
|
+
S(marking);
|
741
|
+
S(sweeping);
|
650
742
|
#undef S
|
651
743
|
|
744
|
+
/* Need to run this to warm the symbol table before we call this during GC */
|
745
|
+
rb_gc_latest_gc_info(sym_state);
|
746
|
+
|
652
747
|
gc_hook = Data_Wrap_Struct(rb_cObject, stackprof_gc_mark, NULL, &_stackprof);
|
653
748
|
rb_global_variable(&gc_hook);
|
654
749
|
|
@@ -661,12 +756,14 @@ Init_stackprof(void)
|
|
661
756
|
_stackprof.raw_timestamp_deltas_len = 0;
|
662
757
|
_stackprof.raw_timestamp_deltas_capa = 0;
|
663
758
|
|
664
|
-
_stackprof.fake_gc_frame = INT2FIX(0x9C);
|
665
759
|
_stackprof.empty_string = rb_str_new_cstr("");
|
666
|
-
_stackprof.fake_gc_frame_name = rb_str_new_cstr("(garbage collection)");
|
667
|
-
rb_global_variable(&_stackprof.fake_gc_frame_name);
|
668
760
|
rb_global_variable(&_stackprof.empty_string);
|
669
761
|
|
762
|
+
for (i = 0; i < TOTAL_FAKE_FRAMES; i++) {
|
763
|
+
_stackprof.fake_frame_names[i] = rb_str_new_cstr(fake_frame_cstrs[i]);
|
764
|
+
rb_global_variable(&_stackprof.fake_frame_names[i]);
|
765
|
+
}
|
766
|
+
|
670
767
|
rb_mStackProf = rb_define_module("StackProf");
|
671
768
|
rb_define_singleton_method(rb_mStackProf, "running?", stackprof_running_p, 0);
|
672
769
|
rb_define_singleton_method(rb_mStackProf, "run", stackprof_run, -1);
|