stackprof 0.2.11 → 0.2.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.gitignore +2 -0
- data/.travis.yml +21 -8
- data/CHANGELOG.md +14 -0
- data/Dockerfile +21 -0
- data/README.md +39 -19
- data/bin/stackprof +14 -4
- data/ext/stackprof/stackprof.c +131 -34
- data/lib/stackprof.rb +4 -0
- data/lib/stackprof/middleware.rb +23 -7
- data/lib/stackprof/report.rb +271 -10
- data/stackprof.gemspec +11 -2
- data/test/test_middleware.rb +13 -7
- data/test/test_stackprof.rb +97 -2
- data/vendor/FlameGraph/flamegraph.pl +751 -85
- metadata +15 -11
- data/Gemfile.lock +0 -27
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 109d5dc07fefb68933ae164c88420aecc662593f9fa96e102a90e7c8d4c605a9
|
4
|
+
data.tar.gz: 7cbd4e6919a160f5e7b680bd0994e9e33097df03c7098ffda7317996a8afb1f6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d4c6894359a809ea8e504eca85506179eb598d2b0db6833b1661e08d982efb78ec0fa526e73ba30a7388c443b5b4eb16a466baa7dd3dcd4e139c0bec7b22ab5d
|
7
|
+
data.tar.gz: 040bc4d3c1ffb1f724bce5ca7db4ddeae7af7ecdc526f893f878049bf42c715983802def5a2e8905f6b1016401a478457a237ef0a76dc742e7070780940e762b
|
data/.gitignore
CHANGED
data/.travis.yml
CHANGED
@@ -1,8 +1,21 @@
|
|
1
|
-
sudo:
|
2
|
-
|
3
|
-
|
4
|
-
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
1
|
+
sudo: required
|
2
|
+
|
3
|
+
services:
|
4
|
+
- docker
|
5
|
+
|
6
|
+
language: general
|
7
|
+
|
8
|
+
env:
|
9
|
+
matrix:
|
10
|
+
- RVM_RUBY_VERSION=2.2
|
11
|
+
- RVM_RUBY_VERSION=2.3
|
12
|
+
- RVM_RUBY_VERSION=2.4
|
13
|
+
- RVM_RUBY_VERSION=2.5
|
14
|
+
- RVM_RUBY_VERSION=2.6
|
15
|
+
- RVM_RUBY_VERSION=ruby-head
|
16
|
+
|
17
|
+
before_install:
|
18
|
+
- sudo docker build -t stackprof-$RVM_RUBY_VERSION --build-arg=RVM_RUBY_VERSION=$RVM_RUBY_VERSION .
|
19
|
+
|
20
|
+
script:
|
21
|
+
- sudo docker run --name stackprof-$RVM_RUBY_VERSION stackprof-$RVM_RUBY_VERSION
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
# 0.2.16
|
2
|
+
|
3
|
+
* [flamegraph.pl] Update to latest version
|
4
|
+
* Add option to ignore GC frames
|
5
|
+
* Handle source code not being available
|
6
|
+
* Freeze strings in report.rb
|
7
|
+
* Use a cursor object instead of array slicing
|
8
|
+
* ArgumentError on interval <1 or >1m
|
9
|
+
* fix variable name.
|
10
|
+
* Fix default mode comment in readme
|
11
|
+
|
12
|
+
# 0.2.15
|
13
|
+
|
14
|
+
* Mark the metadata object before the GC is invoked to prevent it from being garbage collected.
|
data/Dockerfile
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
FROM ubuntu:16.04
|
2
|
+
ARG DEBIAN_FRONTEND=noninteractive
|
3
|
+
RUN apt-get update -q && \
|
4
|
+
apt-get install -qy \
|
5
|
+
curl ca-certificates gnupg2 dirmngr build-essential \
|
6
|
+
gawk git autoconf automake pkg-config \
|
7
|
+
bison libffi-dev libgdbm-dev libncurses5-dev libsqlite3-dev libtool \
|
8
|
+
libyaml-dev sqlite3 zlib1g-dev libgmp-dev libreadline-dev libssl-dev \
|
9
|
+
ruby --no-install-recommends && \
|
10
|
+
apt-get clean
|
11
|
+
|
12
|
+
RUN gpg2 --recv-keys 409B6B1796C275462A1703113804BB82D39DC0E3 7D2BAF1CF37B13E2069D6956105BD0E739499BDB
|
13
|
+
RUN curl -sSL https://get.rvm.io | bash -s
|
14
|
+
ARG RVM_RUBY_VERSION=ruby-head
|
15
|
+
RUN /bin/bash -l -c "echo $RVM_RUBY_VERSION"
|
16
|
+
RUN /bin/bash -l -c ". /etc/profile.d/rvm.sh && rvm install $RVM_RUBY_VERSION --binary || rvm install $RVM_RUBY_VERSION"
|
17
|
+
ADD . /stackprof/
|
18
|
+
WORKDIR /stackprof/
|
19
|
+
RUN /bin/bash -l -c ". /etc/profile.d/rvm.sh && gem install bundler:1.16.0"
|
20
|
+
RUN /bin/bash -l -c ". /etc/profile.d/rvm.sh && bundle install"
|
21
|
+
CMD /bin/bash -l -c ". /etc/profile.d/rvm.sh && bundle exec rake"
|
data/README.md
CHANGED
@@ -1,13 +1,17 @@
|
|
1
|
-
|
1
|
+
# Stackprof
|
2
2
|
|
3
|
-
|
3
|
+
A sampling call-stack profiler for Ruby.
|
4
4
|
|
5
|
-
|
6
|
-
and written as a replacement for [perftools.rb](https://github.com/tmm1/perftools.rb)
|
5
|
+
Inspired heavily by [gperftools](https://code.google.com/p/gperftools/), and written as a replacement for [perftools.rb](https://github.com/tmm1/perftools.rb).
|
7
6
|
|
8
|
-
|
7
|
+
## Requirements
|
9
8
|
|
10
|
-
|
9
|
+
* Ruby 2.2+
|
10
|
+
* Linux-based OS
|
11
|
+
|
12
|
+
## Getting Started
|
13
|
+
|
14
|
+
### Install
|
11
15
|
|
12
16
|
In your Gemfile add:
|
13
17
|
|
@@ -18,7 +22,7 @@ gem 'stackprof'
|
|
18
22
|
Then run `$ bundle install`. Alternatively you can run `$ gem install stackprof`.
|
19
23
|
|
20
24
|
|
21
|
-
|
25
|
+
### Run
|
22
26
|
|
23
27
|
in ruby:
|
24
28
|
|
@@ -93,12 +97,20 @@ The `--flamegraph-viewer` command will output the exact shell command you need t
|
|
93
97
|
|
94
98
|

|
95
99
|
|
96
|
-
|
100
|
+
Alternatively, you can generate a flamegraph that uses [d3-flame-graph](https://github.com/spiermar/d3-flame-graph):
|
101
|
+
|
102
|
+
```
|
103
|
+
$ stackprof --d3-flamegraph tmp/stackprof-cpu-myapp.dump > flamegraph.html
|
104
|
+
```
|
105
|
+
|
106
|
+
And just open the result by your browser.
|
107
|
+
|
108
|
+
## Sampling
|
97
109
|
|
98
110
|
four sampling modes are supported:
|
99
111
|
|
100
|
-
- :wall (using `ITIMER_REAL` and `SIGALRM`)
|
101
|
-
- :cpu (using `ITIMER_PROF` and `SIGPROF`)
|
112
|
+
- :wall (using `ITIMER_REAL` and `SIGALRM`) [default mode]
|
113
|
+
- :cpu (using `ITIMER_PROF` and `SIGPROF`)
|
102
114
|
- :object (using `RUBY_INTERNAL_EVENT_NEWOBJ`)
|
103
115
|
- :custom (user-defined via `StackProf.sample`)
|
104
116
|
|
@@ -129,6 +141,12 @@ StackProf.run(mode: :object, out: 'tmp/stackprof.dump', interval: 1) do
|
|
129
141
|
end
|
130
142
|
```
|
131
143
|
|
144
|
+
by default, samples taken during garbage collection will show as garbage collection frames
|
145
|
+
including both mark and sweep phases. for longer traces, these can leave gaps in a flamegraph
|
146
|
+
that are hard to follow and can be disabled by setting the `ignore_gc` option to true.
|
147
|
+
garbage collection time will still be present in the profile but not explicitly marked with
|
148
|
+
its own frame.
|
149
|
+
|
132
150
|
samples are taken using a combination of three new C-APIs in ruby 2.1:
|
133
151
|
|
134
152
|
- signal handlers enqueue a sampling job using `rb_postponed_job_register_one`.
|
@@ -142,7 +160,7 @@ samples are taken using a combination of three new C-APIs in ruby 2.1:
|
|
142
160
|
- in allocation mode, samples are taken via `rb_tracepoint_new(RUBY_INTERNAL_EVENT_NEWOBJ)`,
|
143
161
|
which provides a notification every time the VM allocates a new object.
|
144
162
|
|
145
|
-
|
163
|
+
## Aggregation
|
146
164
|
|
147
165
|
each sample consists of N stack frames, where a frame looks something like `MyClass#method` or `block in MySingleton.method`.
|
148
166
|
for each of these frames in the sample, the profiler collects a few pieces of metadata:
|
@@ -175,14 +193,14 @@ this technique builds up an incremental callgraph from the samples. on any given
|
|
175
193
|
the sum of the outbound edge weights is equal to total samples collected on that frame
|
176
194
|
(`frame.total_samples == frame.edges.values.sum`).
|
177
195
|
|
178
|
-
|
196
|
+
## Reporting
|
179
197
|
|
180
198
|
multiple reporting modes are supported:
|
181
199
|
- text
|
182
200
|
- dotgraph
|
183
201
|
- source annotation
|
184
202
|
|
185
|
-
|
203
|
+
### `StackProf::Report.new(data).print_text`
|
186
204
|
|
187
205
|
```
|
188
206
|
TOTAL (pct) SAMPLES (pct) FRAME
|
@@ -197,7 +215,7 @@ multiple reporting modes are supported:
|
|
197
215
|
188 (100.0%) 0 (0.0%) <main>
|
198
216
|
```
|
199
217
|
|
200
|
-
|
218
|
+
### `StackProf::Report.new(data).print_graphviz`
|
201
219
|
|
202
220
|

|
203
221
|
|
@@ -223,7 +241,7 @@ digraph profile {
|
|
223
241
|
}
|
224
242
|
```
|
225
243
|
|
226
|
-
|
244
|
+
### `StackProf::Report.new(data).print_method(/pow|newobj|math/)`
|
227
245
|
|
228
246
|
```
|
229
247
|
A#pow (/Users/tmm1/code/stackprof/sample.rb:11)
|
@@ -245,7 +263,7 @@ block in A#math (/Users/tmm1/code/stackprof/sample.rb:21)
|
|
245
263
|
| 23 | end
|
246
264
|
```
|
247
265
|
|
248
|
-
|
266
|
+
## Usage
|
249
267
|
|
250
268
|
the profiler is compiled as a C-extension and exposes a simple api: `StackProf.run(mode: [:cpu|:wall|:object])`.
|
251
269
|
the `run` method takes a block of code and returns a profile as a simple hash.
|
@@ -295,7 +313,7 @@ above, `A#pow` was involved in 91 samples, and in all cases it was at the top of
|
|
295
313
|
divided up between its callee edges. all 91 calls to `A#pow` came from `A#initialize`, as seen by the edge numbered
|
296
314
|
`70346498324780`.
|
297
315
|
|
298
|
-
|
316
|
+
## Advanced usage
|
299
317
|
|
300
318
|
the profiler can be started and stopped manually. results are accumulated until retrieval, across
|
301
319
|
multiple start/stop invocations.
|
@@ -307,7 +325,7 @@ StackProf.stop
|
|
307
325
|
StackProf.results('/tmp/some.file')
|
308
326
|
```
|
309
327
|
|
310
|
-
|
328
|
+
## All options
|
311
329
|
|
312
330
|
`StackProf.run` accepts an options hash. Currently, the following options are recognized:
|
313
331
|
|
@@ -316,11 +334,13 @@ Option | Meaning
|
|
316
334
|
`mode` | mode of sampling: `:cpu`, `:wall`, `:object`, or `:custom` [c.f.](#sampling)
|
317
335
|
`out` | the target file, which will be overwritten
|
318
336
|
`interval` | mode-relative sample rate [c.f.](#sampling)
|
337
|
+
`ignore_gc` | Ignore garbage collection frames
|
319
338
|
`aggregate` | defaults: `true` - if `false` disables [aggregation](#aggregation)
|
320
339
|
`raw` | defaults `false` - if `true` collects the extra data required by the `--flamegraph` and `--stackcollapse` report types
|
340
|
+
`metadata` | defaults to `{}`. Must be a `Hash`. metadata associated with this profile
|
321
341
|
`save_every`| (rack middleware only) write the target file after this many requests
|
322
342
|
|
323
|
-
|
343
|
+
## Todo
|
324
344
|
|
325
345
|
* file/iseq blacklist
|
326
346
|
* restore signal handlers on stop
|
data/bin/stackprof
CHANGED
@@ -8,6 +8,7 @@ parser = OptionParser.new(ARGV) do |o|
|
|
8
8
|
o.banner = "Usage: stackprof [file.dump]+ [--text|--method=NAME|--callgrind|--graphviz]"
|
9
9
|
|
10
10
|
o.on('--text', 'Text summary per method (default)'){ options[:format] = :text }
|
11
|
+
o.on('--json', 'JSON output (use with web viewers)'){ options[:format] = :json }
|
11
12
|
o.on('--files', 'List of files'){ |f| options[:format] = :files }
|
12
13
|
o.on('--limit [num]', Integer, 'Limit --text, --files, or --graphviz output to N entries'){ |n| options[:limit] = n }
|
13
14
|
o.on('--sort-total', "Sort --text or --files output on total samples\n\n"){ options[:sort] = true }
|
@@ -18,11 +19,14 @@ parser = OptionParser.new(ARGV) do |o|
|
|
18
19
|
o.on('--graphviz', "Graphviz output (use with dot)"){ options[:format] = :graphviz }
|
19
20
|
o.on('--node-fraction [frac]', OptionParser::DecimalNumeric, 'Drop nodes representing less than [frac] fraction of samples'){ |n| options[:node_fraction] = n }
|
20
21
|
o.on('--stackcollapse', 'stackcollapse.pl compatible output (use with stackprof-flamegraph.pl)'){ options[:format] = :stackcollapse }
|
21
|
-
o.on('--flamegraph', "timeline-flamegraph output (js)"){ options[:format] = :
|
22
|
-
o.on('--flamegraph
|
22
|
+
o.on('--timeline-flamegraph', "timeline-flamegraph output (js)"){ options[:format] = :timeline_flamegraph }
|
23
|
+
o.on('--alphabetical-flamegraph', "alphabetical-flamegraph output (js)"){ options[:format] = :alphabetical_flamegraph }
|
24
|
+
o.on('--flamegraph', "alias to --timeline-flamegraph"){ options[:format] = :timeline_flamegraph }
|
25
|
+
o.on('--flamegraph-viewer [f.js]', String, "open html viewer for flamegraph output"){ |file|
|
23
26
|
puts("open file://#{File.expand_path('../../lib/stackprof/flamegraph/viewer.html', __FILE__)}?data=#{File.expand_path(file)}")
|
24
27
|
exit
|
25
28
|
}
|
29
|
+
o.on('--d3-flamegraph', "flamegraph output (html using d3-flame-graph)\n\n"){ options[:format] = :d3_flamegraph }
|
26
30
|
o.on('--select-files []', String, 'Show results of matching files'){ |path| (options[:select_files] ||= []) << File.expand_path(path) }
|
27
31
|
o.on('--reject-files []', String, 'Exclude results of matching files'){ |path| (options[:reject_files] ||= []) << File.expand_path(path) }
|
28
32
|
o.on('--select-names []', Regexp, 'Show results of matching method names'){ |regexp| (options[:select_names] ||= []) << regexp }
|
@@ -62,6 +66,8 @@ options.delete(:limit) if options[:limit] == 0
|
|
62
66
|
case options[:format]
|
63
67
|
when :text
|
64
68
|
report.print_text(options[:sort], options[:limit], options[:select_files], options[:reject_files], options[:select_names], options[:reject_names])
|
69
|
+
when :json
|
70
|
+
report.print_json
|
65
71
|
when :debug
|
66
72
|
report.print_debug
|
67
73
|
when :dump
|
@@ -72,8 +78,12 @@ when :graphviz
|
|
72
78
|
report.print_graphviz(options)
|
73
79
|
when :stackcollapse
|
74
80
|
report.print_stackcollapse
|
75
|
-
when :
|
76
|
-
report.
|
81
|
+
when :timeline_flamegraph
|
82
|
+
report.print_timeline_flamegraph
|
83
|
+
when :alphabetical_flamegraph
|
84
|
+
report.print_alphabetical_flamegraph
|
85
|
+
when :d3_flamegraph
|
86
|
+
report.print_d3_flamegraph
|
77
87
|
when :method
|
78
88
|
options[:walk] ? report.walk_method(options[:filter]) : report.print_method(options[:filter])
|
79
89
|
when :file
|
data/ext/stackprof/stackprof.c
CHANGED
@@ -16,11 +16,24 @@
|
|
16
16
|
#include <pthread.h>
|
17
17
|
|
18
18
|
#define BUF_SIZE 2048
|
19
|
+
#define MICROSECONDS_IN_SECOND 1000000
|
20
|
+
|
21
|
+
#define FAKE_FRAME_GC INT2FIX(0)
|
22
|
+
#define FAKE_FRAME_MARK INT2FIX(1)
|
23
|
+
#define FAKE_FRAME_SWEEP INT2FIX(2)
|
24
|
+
|
25
|
+
static const char *fake_frame_cstrs[] = {
|
26
|
+
"(garbage collection)",
|
27
|
+
"(marking)",
|
28
|
+
"(sweeping)",
|
29
|
+
};
|
30
|
+
|
31
|
+
#define TOTAL_FAKE_FRAMES (sizeof(fake_frame_cstrs) / sizeof(char *))
|
19
32
|
|
20
33
|
typedef struct {
|
21
34
|
size_t total_samples;
|
22
35
|
size_t caller_samples;
|
23
|
-
|
36
|
+
size_t seen_at_sample_number;
|
24
37
|
st_table *edges;
|
25
38
|
st_table *lines;
|
26
39
|
} frame_data_t;
|
@@ -33,6 +46,8 @@ static struct {
|
|
33
46
|
VALUE mode;
|
34
47
|
VALUE interval;
|
35
48
|
VALUE out;
|
49
|
+
VALUE metadata;
|
50
|
+
int ignore_gc;
|
36
51
|
|
37
52
|
VALUE *raw_samples;
|
38
53
|
size_t raw_samples_len;
|
@@ -48,10 +63,11 @@ static struct {
|
|
48
63
|
size_t overall_samples;
|
49
64
|
size_t during_gc;
|
50
65
|
size_t unrecorded_gc_samples;
|
66
|
+
size_t unrecorded_gc_marking_samples;
|
67
|
+
size_t unrecorded_gc_sweeping_samples;
|
51
68
|
st_table *frames;
|
52
69
|
|
53
|
-
VALUE
|
54
|
-
VALUE fake_gc_frame_name;
|
70
|
+
VALUE fake_frame_names[TOTAL_FAKE_FRAMES];
|
55
71
|
VALUE empty_string;
|
56
72
|
VALUE frames_buffer[BUF_SIZE];
|
57
73
|
int lines_buffer[BUF_SIZE];
|
@@ -59,7 +75,8 @@ static struct {
|
|
59
75
|
|
60
76
|
static VALUE sym_object, sym_wall, sym_cpu, sym_custom, sym_name, sym_file, sym_line;
|
61
77
|
static VALUE sym_samples, sym_total_samples, sym_missed_samples, sym_edges, sym_lines;
|
62
|
-
static VALUE sym_version, sym_mode, sym_interval, sym_raw,
|
78
|
+
static VALUE sym_version, sym_mode, sym_interval, sym_raw, sym_metadata, sym_frames, sym_ignore_gc, sym_out;
|
79
|
+
static VALUE sym_aggregate, sym_raw_timestamp_deltas, sym_state, sym_marking, sym_sweeping;
|
63
80
|
static VALUE sym_gc_samples, objtracer;
|
64
81
|
static VALUE gc_hook;
|
65
82
|
static VALUE rb_mStackProf;
|
@@ -72,7 +89,8 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
|
|
72
89
|
{
|
73
90
|
struct sigaction sa;
|
74
91
|
struct itimerval timer;
|
75
|
-
VALUE opts = Qnil, mode = Qnil, interval = Qnil, out = Qfalse;
|
92
|
+
VALUE opts = Qnil, mode = Qnil, interval = Qnil, metadata = rb_hash_new(), out = Qfalse;
|
93
|
+
int ignore_gc = 0;
|
76
94
|
int raw = 0, aggregate = 1;
|
77
95
|
|
78
96
|
if (_stackprof.running)
|
@@ -84,6 +102,17 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
|
|
84
102
|
mode = rb_hash_aref(opts, sym_mode);
|
85
103
|
interval = rb_hash_aref(opts, sym_interval);
|
86
104
|
out = rb_hash_aref(opts, sym_out);
|
105
|
+
if (RTEST(rb_hash_aref(opts, sym_ignore_gc))) {
|
106
|
+
ignore_gc = 1;
|
107
|
+
}
|
108
|
+
|
109
|
+
VALUE metadata_val = rb_hash_aref(opts, sym_metadata);
|
110
|
+
if (RTEST(metadata_val)) {
|
111
|
+
if (!RB_TYPE_P(metadata_val, T_HASH))
|
112
|
+
rb_raise(rb_eArgError, "metadata should be a hash");
|
113
|
+
|
114
|
+
metadata = metadata_val;
|
115
|
+
}
|
87
116
|
|
88
117
|
if (RTEST(rb_hash_aref(opts, sym_raw)))
|
89
118
|
raw = 1;
|
@@ -92,6 +121,10 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
|
|
92
121
|
}
|
93
122
|
if (!RTEST(mode)) mode = sym_wall;
|
94
123
|
|
124
|
+
if (!NIL_P(interval) && (NUM2INT(interval) < 1 || NUM2INT(interval) >= MICROSECONDS_IN_SECOND)) {
|
125
|
+
rb_raise(rb_eArgError, "interval is a number of microseconds between 1 and 1 million");
|
126
|
+
}
|
127
|
+
|
95
128
|
if (!_stackprof.frames) {
|
96
129
|
_stackprof.frames = st_init_numtable();
|
97
130
|
_stackprof.overall_signals = 0;
|
@@ -128,6 +161,8 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
|
|
128
161
|
_stackprof.aggregate = aggregate;
|
129
162
|
_stackprof.mode = mode;
|
130
163
|
_stackprof.interval = interval;
|
164
|
+
_stackprof.ignore_gc = ignore_gc;
|
165
|
+
_stackprof.metadata = metadata;
|
131
166
|
_stackprof.out = out;
|
132
167
|
|
133
168
|
if (raw) {
|
@@ -201,8 +236,8 @@ frame_i(st_data_t key, st_data_t val, st_data_t arg)
|
|
201
236
|
|
202
237
|
rb_hash_aset(results, rb_obj_id(frame), details);
|
203
238
|
|
204
|
-
if (frame
|
205
|
-
name = _stackprof.
|
239
|
+
if (FIXNUM_P(frame)) {
|
240
|
+
name = _stackprof.fake_frame_names[FIX2INT(frame)];
|
206
241
|
file = _stackprof.empty_string;
|
207
242
|
line = INT2FIX(0);
|
208
243
|
} else {
|
@@ -258,6 +293,9 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
|
|
258
293
|
rb_hash_aset(results, sym_samples, SIZET2NUM(_stackprof.overall_samples));
|
259
294
|
rb_hash_aset(results, sym_gc_samples, SIZET2NUM(_stackprof.during_gc));
|
260
295
|
rb_hash_aset(results, sym_missed_samples, SIZET2NUM(_stackprof.overall_signals - _stackprof.overall_samples));
|
296
|
+
rb_hash_aset(results, sym_metadata, _stackprof.metadata);
|
297
|
+
|
298
|
+
_stackprof.metadata = Qnil;
|
261
299
|
|
262
300
|
frames = rb_hash_new();
|
263
301
|
rb_hash_aset(results, sym_frames, frames);
|
@@ -268,6 +306,7 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
|
|
268
306
|
|
269
307
|
if (_stackprof.raw && _stackprof.raw_samples_len) {
|
270
308
|
size_t len, n, o;
|
309
|
+
VALUE raw_timestamp_deltas;
|
271
310
|
VALUE raw_samples = rb_ary_new_capa(_stackprof.raw_samples_len);
|
272
311
|
|
273
312
|
for (n = 0; n < _stackprof.raw_samples_len; n++) {
|
@@ -287,7 +326,7 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
|
|
287
326
|
|
288
327
|
rb_hash_aset(results, sym_raw, raw_samples);
|
289
328
|
|
290
|
-
|
329
|
+
raw_timestamp_deltas = rb_ary_new_capa(_stackprof.raw_timestamp_deltas_len);
|
291
330
|
|
292
331
|
for (n = 0; n < _stackprof.raw_timestamp_deltas_len; n++) {
|
293
332
|
rb_ary_push(raw_timestamp_deltas, INT2FIX(_stackprof.raw_timestamp_deltas[n]));
|
@@ -308,11 +347,12 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
|
|
308
347
|
|
309
348
|
if (RTEST(_stackprof.out)) {
|
310
349
|
VALUE file;
|
311
|
-
if (
|
312
|
-
file = rb_file_open_str(_stackprof.out, "w");
|
313
|
-
} else {
|
350
|
+
if (rb_respond_to(_stackprof.out, rb_intern("to_io"))) {
|
314
351
|
file = rb_io_check_io(_stackprof.out);
|
352
|
+
} else {
|
353
|
+
file = rb_file_open_str(_stackprof.out, "w");
|
315
354
|
}
|
355
|
+
|
316
356
|
rb_marshal_dump(results, file);
|
317
357
|
rb_io_flush(file);
|
318
358
|
_stackprof.out = Qnil;
|
@@ -386,17 +426,29 @@ stackprof_record_sample_for_stack(int num, int timestamp_delta)
|
|
386
426
|
if (_stackprof.raw) {
|
387
427
|
int found = 0;
|
388
428
|
|
429
|
+
/* If there's no sample buffer allocated, then allocate one. The buffer
|
430
|
+
* format is the number of frames (num), then the list of frames (from
|
431
|
+
* `_stackprof.raw_samples`), followed by the number of times this
|
432
|
+
* particular stack has been seen in a row. Each "new" stack is added
|
433
|
+
* to the end of the buffer, but if the previous stack is the same as
|
434
|
+
* the current stack, the counter will be incremented. */
|
389
435
|
if (!_stackprof.raw_samples) {
|
390
436
|
_stackprof.raw_samples_capa = num * 100;
|
391
437
|
_stackprof.raw_samples = malloc(sizeof(VALUE) * _stackprof.raw_samples_capa);
|
392
438
|
}
|
393
439
|
|
440
|
+
/* If we can't fit all the samples in the buffer, double the buffer size. */
|
394
441
|
while (_stackprof.raw_samples_capa <= _stackprof.raw_samples_len + (num + 2)) {
|
395
442
|
_stackprof.raw_samples_capa *= 2;
|
396
443
|
_stackprof.raw_samples = realloc(_stackprof.raw_samples, sizeof(VALUE) * _stackprof.raw_samples_capa);
|
397
444
|
}
|
398
445
|
|
446
|
+
/* If we've seen this stack before in the last sample, then increment the "seen" count. */
|
399
447
|
if (_stackprof.raw_samples_len > 0 && _stackprof.raw_samples[_stackprof.raw_sample_index] == (VALUE)num) {
|
448
|
+
/* The number of samples could have been the same, but the stack
|
449
|
+
* might be different, so we need to check the stack here. Stacks
|
450
|
+
* in the raw buffer are stored in the opposite direction of stacks
|
451
|
+
* in the frames buffer that came from Ruby. */
|
400
452
|
for (i = num-1, n = 0; i >= 0; i--, n++) {
|
401
453
|
VALUE frame = _stackprof.frames_buffer[i];
|
402
454
|
if (_stackprof.raw_samples[_stackprof.raw_sample_index + 1 + n] != frame)
|
@@ -408,7 +460,11 @@ stackprof_record_sample_for_stack(int num, int timestamp_delta)
|
|
408
460
|
}
|
409
461
|
}
|
410
462
|
|
463
|
+
/* If we haven't seen the stack, then add it to the buffer along with
|
464
|
+
* the length of the stack and a 1 for the "seen" count */
|
411
465
|
if (!found) {
|
466
|
+
/* Bump the `raw_sample_index` up so that the next iteration can
|
467
|
+
* find the previously recorded stack size. */
|
412
468
|
_stackprof.raw_sample_index = _stackprof.raw_samples_len;
|
413
469
|
_stackprof.raw_samples[_stackprof.raw_samples_len++] = (VALUE)num;
|
414
470
|
for (i = num-1; i >= 0; i--) {
|
@@ -418,33 +474,32 @@ stackprof_record_sample_for_stack(int num, int timestamp_delta)
|
|
418
474
|
_stackprof.raw_samples[_stackprof.raw_samples_len++] = (VALUE)1;
|
419
475
|
}
|
420
476
|
|
477
|
+
/* If there's no timestamp delta buffer, allocate one */
|
421
478
|
if (!_stackprof.raw_timestamp_deltas) {
|
422
479
|
_stackprof.raw_timestamp_deltas_capa = 100;
|
423
480
|
_stackprof.raw_timestamp_deltas = malloc(sizeof(int) * _stackprof.raw_timestamp_deltas_capa);
|
424
481
|
_stackprof.raw_timestamp_deltas_len = 0;
|
425
482
|
}
|
426
483
|
|
484
|
+
/* Double the buffer size if it's too small */
|
427
485
|
while (_stackprof.raw_timestamp_deltas_capa <= _stackprof.raw_timestamp_deltas_len + 1) {
|
428
486
|
_stackprof.raw_timestamp_deltas_capa *= 2;
|
429
487
|
_stackprof.raw_timestamp_deltas = realloc(_stackprof.raw_timestamp_deltas, sizeof(int) * _stackprof.raw_timestamp_deltas_capa);
|
430
488
|
}
|
431
489
|
|
490
|
+
/* Store the time delta (which is the amount of time between samples) */
|
432
491
|
_stackprof.raw_timestamp_deltas[_stackprof.raw_timestamp_deltas_len++] = timestamp_delta;
|
433
492
|
}
|
434
493
|
|
435
|
-
for (i = 0; i < num; i++) {
|
436
|
-
VALUE frame = _stackprof.frames_buffer[i];
|
437
|
-
sample_for(frame)->already_accounted_in_total = 0;
|
438
|
-
}
|
439
|
-
|
440
494
|
for (i = 0; i < num; i++) {
|
441
495
|
int line = _stackprof.lines_buffer[i];
|
442
496
|
VALUE frame = _stackprof.frames_buffer[i];
|
443
497
|
frame_data_t *frame_data = sample_for(frame);
|
444
498
|
|
445
|
-
if (
|
499
|
+
if (frame_data->seen_at_sample_number != _stackprof.overall_samples) {
|
446
500
|
frame_data->total_samples++;
|
447
|
-
|
501
|
+
}
|
502
|
+
frame_data->seen_at_sample_number = _stackprof.overall_samples;
|
448
503
|
|
449
504
|
if (i == 0) {
|
450
505
|
frame_data->caller_samples++;
|
@@ -455,10 +510,10 @@ stackprof_record_sample_for_stack(int num, int timestamp_delta)
|
|
455
510
|
}
|
456
511
|
|
457
512
|
if (_stackprof.aggregate && line > 0) {
|
458
|
-
if (!frame_data->lines)
|
459
|
-
frame_data->lines = st_init_numtable();
|
460
513
|
size_t half = (size_t)1<<(8*SIZEOF_SIZE_T/2);
|
461
514
|
size_t increment = i == 0 ? half + 1 : half;
|
515
|
+
if (!frame_data->lines)
|
516
|
+
frame_data->lines = st_init_numtable();
|
462
517
|
st_numtable_increment(frame_data->lines, (st_data_t)line, increment);
|
463
518
|
}
|
464
519
|
|
@@ -474,14 +529,15 @@ void
|
|
474
529
|
stackprof_record_sample()
|
475
530
|
{
|
476
531
|
int timestamp_delta = 0;
|
532
|
+
int num;
|
477
533
|
if (_stackprof.raw) {
|
478
534
|
struct timeval t;
|
479
|
-
gettimeofday(&t, NULL);
|
480
535
|
struct timeval diff;
|
536
|
+
gettimeofday(&t, NULL);
|
481
537
|
timersub(&t, &_stackprof.last_sample_at, &diff);
|
482
538
|
timestamp_delta = (1000 * diff.tv_sec) + diff.tv_usec;
|
483
539
|
}
|
484
|
-
|
540
|
+
num = rb_profile_frames(0, sizeof(_stackprof.frames_buffer) / sizeof(VALUE), _stackprof.frames_buffer, _stackprof.lines_buffer);
|
485
541
|
stackprof_record_sample_for_stack(num, timestamp_delta);
|
486
542
|
}
|
487
543
|
|
@@ -489,31 +545,52 @@ void
|
|
489
545
|
stackprof_record_gc_samples()
|
490
546
|
{
|
491
547
|
int delta_to_first_unrecorded_gc_sample = 0;
|
548
|
+
int i;
|
492
549
|
if (_stackprof.raw) {
|
493
550
|
struct timeval t;
|
494
|
-
gettimeofday(&t, NULL);
|
495
551
|
struct timeval diff;
|
552
|
+
gettimeofday(&t, NULL);
|
496
553
|
timersub(&t, &_stackprof.last_sample_at, &diff);
|
497
554
|
|
498
555
|
// We don't know when the GC samples were actually marked, so let's
|
499
556
|
// assume that they were marked at a perfectly regular interval.
|
500
|
-
delta_to_first_unrecorded_gc_sample = (1000 * diff.tv_sec + diff.tv_usec) - (_stackprof.unrecorded_gc_samples - 1) * _stackprof.interval;
|
557
|
+
delta_to_first_unrecorded_gc_sample = (1000 * diff.tv_sec + diff.tv_usec) - (_stackprof.unrecorded_gc_samples - 1) * NUM2LONG(_stackprof.interval);
|
501
558
|
if (delta_to_first_unrecorded_gc_sample < 0) {
|
502
559
|
delta_to_first_unrecorded_gc_sample = 0;
|
503
560
|
}
|
504
561
|
}
|
505
562
|
|
506
|
-
int i;
|
507
|
-
|
508
|
-
_stackprof.frames_buffer[0] = _stackprof.fake_gc_frame;
|
509
|
-
_stackprof.lines_buffer[0] = 0;
|
510
563
|
|
511
564
|
for (i = 0; i < _stackprof.unrecorded_gc_samples; i++) {
|
512
|
-
int timestamp_delta = i == 0 ? delta_to_first_unrecorded_gc_sample : _stackprof.interval;
|
513
|
-
|
565
|
+
int timestamp_delta = i == 0 ? delta_to_first_unrecorded_gc_sample : NUM2LONG(_stackprof.interval);
|
566
|
+
|
567
|
+
if (_stackprof.unrecorded_gc_marking_samples) {
|
568
|
+
_stackprof.frames_buffer[0] = FAKE_FRAME_MARK;
|
569
|
+
_stackprof.lines_buffer[0] = 0;
|
570
|
+
_stackprof.frames_buffer[1] = FAKE_FRAME_GC;
|
571
|
+
_stackprof.lines_buffer[1] = 0;
|
572
|
+
_stackprof.unrecorded_gc_marking_samples--;
|
573
|
+
|
574
|
+
stackprof_record_sample_for_stack(2, timestamp_delta);
|
575
|
+
} else if (_stackprof.unrecorded_gc_sweeping_samples) {
|
576
|
+
_stackprof.frames_buffer[0] = FAKE_FRAME_SWEEP;
|
577
|
+
_stackprof.lines_buffer[0] = 0;
|
578
|
+
_stackprof.frames_buffer[1] = FAKE_FRAME_GC;
|
579
|
+
_stackprof.lines_buffer[1] = 0;
|
580
|
+
|
581
|
+
_stackprof.unrecorded_gc_sweeping_samples--;
|
582
|
+
|
583
|
+
stackprof_record_sample_for_stack(2, timestamp_delta);
|
584
|
+
} else {
|
585
|
+
_stackprof.frames_buffer[0] = FAKE_FRAME_GC;
|
586
|
+
_stackprof.lines_buffer[0] = 0;
|
587
|
+
stackprof_record_sample_for_stack(1, timestamp_delta);
|
588
|
+
}
|
514
589
|
}
|
515
590
|
_stackprof.during_gc += _stackprof.unrecorded_gc_samples;
|
516
591
|
_stackprof.unrecorded_gc_samples = 0;
|
592
|
+
_stackprof.unrecorded_gc_marking_samples = 0;
|
593
|
+
_stackprof.unrecorded_gc_sweeping_samples = 0;
|
517
594
|
}
|
518
595
|
|
519
596
|
static void
|
@@ -544,7 +621,13 @@ static void
|
|
544
621
|
stackprof_signal_handler(int sig, siginfo_t *sinfo, void *ucontext)
|
545
622
|
{
|
546
623
|
_stackprof.overall_signals++;
|
547
|
-
if (rb_during_gc()) {
|
624
|
+
if (!_stackprof.ignore_gc && rb_during_gc()) {
|
625
|
+
VALUE mode = rb_gc_latest_gc_info(sym_state);
|
626
|
+
if (mode == sym_marking) {
|
627
|
+
_stackprof.unrecorded_gc_marking_samples++;
|
628
|
+
} else if (mode == sym_sweeping) {
|
629
|
+
_stackprof.unrecorded_gc_sweeping_samples++;
|
630
|
+
}
|
548
631
|
_stackprof.unrecorded_gc_samples++;
|
549
632
|
rb_postponed_job_register_one(0, stackprof_gc_job_handler, (void*)0);
|
550
633
|
} else {
|
@@ -583,6 +666,9 @@ frame_mark_i(st_data_t key, st_data_t val, st_data_t arg)
|
|
583
666
|
static void
|
584
667
|
stackprof_gc_mark(void *data)
|
585
668
|
{
|
669
|
+
if (RTEST(_stackprof.metadata))
|
670
|
+
rb_gc_mark(_stackprof.metadata);
|
671
|
+
|
586
672
|
if (RTEST(_stackprof.out))
|
587
673
|
rb_gc_mark(_stackprof.out);
|
588
674
|
|
@@ -625,6 +711,7 @@ stackprof_atfork_child(void)
|
|
625
711
|
void
|
626
712
|
Init_stackprof(void)
|
627
713
|
{
|
714
|
+
size_t i;
|
628
715
|
#define S(name) sym_##name = ID2SYM(rb_intern(#name));
|
629
716
|
S(object);
|
630
717
|
S(custom);
|
@@ -645,10 +732,18 @@ Init_stackprof(void)
|
|
645
732
|
S(raw);
|
646
733
|
S(raw_timestamp_deltas);
|
647
734
|
S(out);
|
735
|
+
S(metadata);
|
736
|
+
S(ignore_gc);
|
648
737
|
S(frames);
|
649
738
|
S(aggregate);
|
739
|
+
S(state);
|
740
|
+
S(marking);
|
741
|
+
S(sweeping);
|
650
742
|
#undef S
|
651
743
|
|
744
|
+
/* Need to run this to warm the symbol table before we call this during GC */
|
745
|
+
rb_gc_latest_gc_info(sym_state);
|
746
|
+
|
652
747
|
gc_hook = Data_Wrap_Struct(rb_cObject, stackprof_gc_mark, NULL, &_stackprof);
|
653
748
|
rb_global_variable(&gc_hook);
|
654
749
|
|
@@ -661,12 +756,14 @@ Init_stackprof(void)
|
|
661
756
|
_stackprof.raw_timestamp_deltas_len = 0;
|
662
757
|
_stackprof.raw_timestamp_deltas_capa = 0;
|
663
758
|
|
664
|
-
_stackprof.fake_gc_frame = INT2FIX(0x9C);
|
665
759
|
_stackprof.empty_string = rb_str_new_cstr("");
|
666
|
-
_stackprof.fake_gc_frame_name = rb_str_new_cstr("(garbage collection)");
|
667
|
-
rb_global_variable(&_stackprof.fake_gc_frame_name);
|
668
760
|
rb_global_variable(&_stackprof.empty_string);
|
669
761
|
|
762
|
+
for (i = 0; i < TOTAL_FAKE_FRAMES; i++) {
|
763
|
+
_stackprof.fake_frame_names[i] = rb_str_new_cstr(fake_frame_cstrs[i]);
|
764
|
+
rb_global_variable(&_stackprof.fake_frame_names[i]);
|
765
|
+
}
|
766
|
+
|
670
767
|
rb_mStackProf = rb_define_module("StackProf");
|
671
768
|
rb_define_singleton_method(rb_mStackProf, "running?", stackprof_running_p, 0);
|
672
769
|
rb_define_singleton_method(rb_mStackProf, "run", stackprof_run, -1);
|