stackprof 0.2.11 → 0.2.16

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 88f62f89ebff2c249b7eaabcb330f12997f09120
4
- data.tar.gz: 629071b6584701d830b827e5d4b9b0951eaa0282
2
+ SHA256:
3
+ metadata.gz: 109d5dc07fefb68933ae164c88420aecc662593f9fa96e102a90e7c8d4c605a9
4
+ data.tar.gz: 7cbd4e6919a160f5e7b680bd0994e9e33097df03c7098ffda7317996a8afb1f6
5
5
  SHA512:
6
- metadata.gz: d02684e9bd77e2b561f626a69a897e1ec2c89b870b30ef54709165cc1debb966a56127f3053d7a27e81cc7c3c62fef50865ebd5bcbfd6d2b28630add6280e479
7
- data.tar.gz: 2d5d70aaa53080112d8f794d1d5412cd8225f451f43431f30209e42854b5de8246ff77347ef39496c2b55b447d6cf114116b81dc2eb458adea16a5d9ce03606e
6
+ metadata.gz: d4c6894359a809ea8e504eca85506179eb598d2b0db6833b1661e08d982efb78ec0fa526e73ba30a7388c443b5b4eb16a466baa7dd3dcd4e139c0bec7b22ab5d
7
+ data.tar.gz: 040bc4d3c1ffb1f724bce5ca7db4ddeae7af7ecdc526f893f878049bf42c715983802def5a2e8905f6b1016401a478457a237ef0a76dc742e7070780940e762b
data/.gitignore CHANGED
@@ -2,3 +2,5 @@
2
2
  /lib/stackprof/stackprof.bundle
3
3
  /lib/stackprof/stackprof.so
4
4
  *.sw?
5
+ /pkg
6
+ /Gemfile.lock
@@ -1,8 +1,21 @@
1
- sudo: false
2
- language: ruby
3
- rvm:
4
- - 2.1
5
- - 2.2
6
- - 2.3
7
- - 2.4
8
- - ruby-head
1
+ sudo: required
2
+
3
+ services:
4
+ - docker
5
+
6
+ language: general
7
+
8
+ env:
9
+ matrix:
10
+ - RVM_RUBY_VERSION=2.2
11
+ - RVM_RUBY_VERSION=2.3
12
+ - RVM_RUBY_VERSION=2.4
13
+ - RVM_RUBY_VERSION=2.5
14
+ - RVM_RUBY_VERSION=2.6
15
+ - RVM_RUBY_VERSION=ruby-head
16
+
17
+ before_install:
18
+ - sudo docker build -t stackprof-$RVM_RUBY_VERSION --build-arg=RVM_RUBY_VERSION=$RVM_RUBY_VERSION .
19
+
20
+ script:
21
+ - sudo docker run --name stackprof-$RVM_RUBY_VERSION stackprof-$RVM_RUBY_VERSION
@@ -0,0 +1,14 @@
1
+ # 0.2.16
2
+
3
+ * [flamegraph.pl] Update to latest version
4
+ * Add option to ignore GC frames
5
+ * Handle source code not being available
6
+ * Freeze strings in report.rb
7
+ * Use a cursor object instead of array slicing
8
+ * ArgumentError on interval <1 or >1m
9
+ * fix variable name.
10
+ * Fix default mode comment in readme
11
+
12
+ # 0.2.15
13
+
14
+ * Mark the metadata object before the GC is invoked to prevent it from being garbage collected.
@@ -0,0 +1,21 @@
1
+ FROM ubuntu:16.04
2
+ ARG DEBIAN_FRONTEND=noninteractive
3
+ RUN apt-get update -q && \
4
+ apt-get install -qy \
5
+ curl ca-certificates gnupg2 dirmngr build-essential \
6
+ gawk git autoconf automake pkg-config \
7
+ bison libffi-dev libgdbm-dev libncurses5-dev libsqlite3-dev libtool \
8
+ libyaml-dev sqlite3 zlib1g-dev libgmp-dev libreadline-dev libssl-dev \
9
+ ruby --no-install-recommends && \
10
+ apt-get clean
11
+
12
+ RUN gpg2 --recv-keys 409B6B1796C275462A1703113804BB82D39DC0E3 7D2BAF1CF37B13E2069D6956105BD0E739499BDB
13
+ RUN curl -sSL https://get.rvm.io | bash -s
14
+ ARG RVM_RUBY_VERSION=ruby-head
15
+ RUN /bin/bash -l -c "echo $RVM_RUBY_VERSION"
16
+ RUN /bin/bash -l -c ". /etc/profile.d/rvm.sh && rvm install $RVM_RUBY_VERSION --binary || rvm install $RVM_RUBY_VERSION"
17
+ ADD . /stackprof/
18
+ WORKDIR /stackprof/
19
+ RUN /bin/bash -l -c ". /etc/profile.d/rvm.sh && gem install bundler:1.16.0"
20
+ RUN /bin/bash -l -c ". /etc/profile.d/rvm.sh && bundle install"
21
+ CMD /bin/bash -l -c ". /etc/profile.d/rvm.sh && bundle exec rake"
data/README.md CHANGED
@@ -1,13 +1,17 @@
1
- ## stackprof
1
+ # Stackprof
2
2
 
3
- a sampling call-stack profiler for ruby 2.1+
3
+ A sampling call-stack profiler for Ruby.
4
4
 
5
- inspired heavily by [gperftools](https://code.google.com/p/gperftools/),
6
- and written as a replacement for [perftools.rb](https://github.com/tmm1/perftools.rb)
5
+ Inspired heavily by [gperftools](https://code.google.com/p/gperftools/), and written as a replacement for [perftools.rb](https://github.com/tmm1/perftools.rb).
7
6
 
8
- ### getting started
7
+ ## Requirements
9
8
 
10
- #### Install
9
+ * Ruby 2.2+
10
+ * Linux-based OS
11
+
12
+ ## Getting Started
13
+
14
+ ### Install
11
15
 
12
16
  In your Gemfile add:
13
17
 
@@ -18,7 +22,7 @@ gem 'stackprof'
18
22
  Then run `$ bundle install`. Alternatively you can run `$ gem install stackprof`.
19
23
 
20
24
 
21
- #### Run
25
+ ### Run
22
26
 
23
27
  in ruby:
24
28
 
@@ -93,12 +97,20 @@ The `--flamegraph-viewer` command will output the exact shell command you need t
93
97
 
94
98
  ![](http://i.imgur.com/EwndrgD.png)
95
99
 
96
- ### sampling
100
+ Alternatively, you can generate a flamegraph that uses [d3-flame-graph](https://github.com/spiermar/d3-flame-graph):
101
+
102
+ ```
103
+ $ stackprof --d3-flamegraph tmp/stackprof-cpu-myapp.dump > flamegraph.html
104
+ ```
105
+
106
+ And just open the result by your browser.
107
+
108
+ ## Sampling
97
109
 
98
110
  four sampling modes are supported:
99
111
 
100
- - :wall (using `ITIMER_REAL` and `SIGALRM`)
101
- - :cpu (using `ITIMER_PROF` and `SIGPROF`) [default mode]
112
+ - :wall (using `ITIMER_REAL` and `SIGALRM`) [default mode]
113
+ - :cpu (using `ITIMER_PROF` and `SIGPROF`)
102
114
  - :object (using `RUBY_INTERNAL_EVENT_NEWOBJ`)
103
115
  - :custom (user-defined via `StackProf.sample`)
104
116
 
@@ -129,6 +141,12 @@ StackProf.run(mode: :object, out: 'tmp/stackprof.dump', interval: 1) do
129
141
  end
130
142
  ```
131
143
 
144
+ by default, samples taken during garbage collection will show as garbage collection frames
145
+ including both mark and sweep phases. for longer traces, these can leave gaps in a flamegraph
146
+ that are hard to follow and can be disabled by setting the `ignore_gc` option to true.
147
+ garbage collection time will still be present in the profile but not explicitly marked with
148
+ its own frame.
149
+
132
150
  samples are taken using a combination of three new C-APIs in ruby 2.1:
133
151
 
134
152
  - signal handlers enqueue a sampling job using `rb_postponed_job_register_one`.
@@ -142,7 +160,7 @@ samples are taken using a combination of three new C-APIs in ruby 2.1:
142
160
  - in allocation mode, samples are taken via `rb_tracepoint_new(RUBY_INTERNAL_EVENT_NEWOBJ)`,
143
161
  which provides a notification every time the VM allocates a new object.
144
162
 
145
- ### Aggregation
163
+ ## Aggregation
146
164
 
147
165
  each sample consists of N stack frames, where a frame looks something like `MyClass#method` or `block in MySingleton.method`.
148
166
  for each of these frames in the sample, the profiler collects a few pieces of metadata:
@@ -175,14 +193,14 @@ this technique builds up an incremental callgraph from the samples. on any given
175
193
  the sum of the outbound edge weights is equal to total samples collected on that frame
176
194
  (`frame.total_samples == frame.edges.values.sum`).
177
195
 
178
- ### reporting
196
+ ## Reporting
179
197
 
180
198
  multiple reporting modes are supported:
181
199
  - text
182
200
  - dotgraph
183
201
  - source annotation
184
202
 
185
- #### `StackProf::Report.new(data).print_text`
203
+ ### `StackProf::Report.new(data).print_text`
186
204
 
187
205
  ```
188
206
  TOTAL (pct) SAMPLES (pct) FRAME
@@ -197,7 +215,7 @@ multiple reporting modes are supported:
197
215
  188 (100.0%) 0 (0.0%) <main>
198
216
  ```
199
217
 
200
- #### `StackProf::Report.new(data).print_graphviz`
218
+ ### `StackProf::Report.new(data).print_graphviz`
201
219
 
202
220
  ![](http://cl.ly/image/2t3l2q0l0B0A/content)
203
221
 
@@ -223,7 +241,7 @@ digraph profile {
223
241
  }
224
242
  ```
225
243
 
226
- #### `StackProf::Report.new(data).print_method(/pow|newobj|math/)`
244
+ ### `StackProf::Report.new(data).print_method(/pow|newobj|math/)`
227
245
 
228
246
  ```
229
247
  A#pow (/Users/tmm1/code/stackprof/sample.rb:11)
@@ -245,7 +263,7 @@ block in A#math (/Users/tmm1/code/stackprof/sample.rb:21)
245
263
  | 23 | end
246
264
  ```
247
265
 
248
- ### usage
266
+ ## Usage
249
267
 
250
268
  the profiler is compiled as a C-extension and exposes a simple api: `StackProf.run(mode: [:cpu|:wall|:object])`.
251
269
  the `run` method takes a block of code and returns a profile as a simple hash.
@@ -295,7 +313,7 @@ above, `A#pow` was involved in 91 samples, and in all cases it was at the top of
295
313
  divided up between its callee edges. all 91 calls to `A#pow` came from `A#initialize`, as seen by the edge numbered
296
314
  `70346498324780`.
297
315
 
298
- ### advanced usage
316
+ ## Advanced usage
299
317
 
300
318
  the profiler can be started and stopped manually. results are accumulated until retrieval, across
301
319
  multiple start/stop invocations.
@@ -307,7 +325,7 @@ StackProf.stop
307
325
  StackProf.results('/tmp/some.file')
308
326
  ```
309
327
 
310
- ### all options
328
+ ## All options
311
329
 
312
330
  `StackProf.run` accepts an options hash. Currently, the following options are recognized:
313
331
 
@@ -316,11 +334,13 @@ Option | Meaning
316
334
  `mode` | mode of sampling: `:cpu`, `:wall`, `:object`, or `:custom` [c.f.](#sampling)
317
335
  `out` | the target file, which will be overwritten
318
336
  `interval` | mode-relative sample rate [c.f.](#sampling)
337
+ `ignore_gc` | Ignore garbage collection frames
319
338
  `aggregate` | defaults: `true` - if `false` disables [aggregation](#aggregation)
320
339
  `raw` | defaults `false` - if `true` collects the extra data required by the `--flamegraph` and `--stackcollapse` report types
340
+ `metadata` | defaults to `{}`. Must be a `Hash`. metadata associated with this profile
321
341
  `save_every`| (rack middleware only) write the target file after this many requests
322
342
 
323
- ### todo
343
+ ## Todo
324
344
 
325
345
  * file/iseq blacklist
326
346
  * restore signal handlers on stop
@@ -8,6 +8,7 @@ parser = OptionParser.new(ARGV) do |o|
8
8
  o.banner = "Usage: stackprof [file.dump]+ [--text|--method=NAME|--callgrind|--graphviz]"
9
9
 
10
10
  o.on('--text', 'Text summary per method (default)'){ options[:format] = :text }
11
+ o.on('--json', 'JSON output (use with web viewers)'){ options[:format] = :json }
11
12
  o.on('--files', 'List of files'){ |f| options[:format] = :files }
12
13
  o.on('--limit [num]', Integer, 'Limit --text, --files, or --graphviz output to N entries'){ |n| options[:limit] = n }
13
14
  o.on('--sort-total', "Sort --text or --files output on total samples\n\n"){ options[:sort] = true }
@@ -18,11 +19,14 @@ parser = OptionParser.new(ARGV) do |o|
18
19
  o.on('--graphviz', "Graphviz output (use with dot)"){ options[:format] = :graphviz }
19
20
  o.on('--node-fraction [frac]', OptionParser::DecimalNumeric, 'Drop nodes representing less than [frac] fraction of samples'){ |n| options[:node_fraction] = n }
20
21
  o.on('--stackcollapse', 'stackcollapse.pl compatible output (use with stackprof-flamegraph.pl)'){ options[:format] = :stackcollapse }
21
- o.on('--flamegraph', "timeline-flamegraph output (js)"){ options[:format] = :flamegraph }
22
- o.on('--flamegraph-viewer [f.js]', String, "open html viewer for flamegraph output\n\n"){ |file|
22
+ o.on('--timeline-flamegraph', "timeline-flamegraph output (js)"){ options[:format] = :timeline_flamegraph }
23
+ o.on('--alphabetical-flamegraph', "alphabetical-flamegraph output (js)"){ options[:format] = :alphabetical_flamegraph }
24
+ o.on('--flamegraph', "alias to --timeline-flamegraph"){ options[:format] = :timeline_flamegraph }
25
+ o.on('--flamegraph-viewer [f.js]', String, "open html viewer for flamegraph output"){ |file|
23
26
  puts("open file://#{File.expand_path('../../lib/stackprof/flamegraph/viewer.html', __FILE__)}?data=#{File.expand_path(file)}")
24
27
  exit
25
28
  }
29
+ o.on('--d3-flamegraph', "flamegraph output (html using d3-flame-graph)\n\n"){ options[:format] = :d3_flamegraph }
26
30
  o.on('--select-files []', String, 'Show results of matching files'){ |path| (options[:select_files] ||= []) << File.expand_path(path) }
27
31
  o.on('--reject-files []', String, 'Exclude results of matching files'){ |path| (options[:reject_files] ||= []) << File.expand_path(path) }
28
32
  o.on('--select-names []', Regexp, 'Show results of matching method names'){ |regexp| (options[:select_names] ||= []) << regexp }
@@ -62,6 +66,8 @@ options.delete(:limit) if options[:limit] == 0
62
66
  case options[:format]
63
67
  when :text
64
68
  report.print_text(options[:sort], options[:limit], options[:select_files], options[:reject_files], options[:select_names], options[:reject_names])
69
+ when :json
70
+ report.print_json
65
71
  when :debug
66
72
  report.print_debug
67
73
  when :dump
@@ -72,8 +78,12 @@ when :graphviz
72
78
  report.print_graphviz(options)
73
79
  when :stackcollapse
74
80
  report.print_stackcollapse
75
- when :flamegraph
76
- report.print_flamegraph
81
+ when :timeline_flamegraph
82
+ report.print_timeline_flamegraph
83
+ when :alphabetical_flamegraph
84
+ report.print_alphabetical_flamegraph
85
+ when :d3_flamegraph
86
+ report.print_d3_flamegraph
77
87
  when :method
78
88
  options[:walk] ? report.walk_method(options[:filter]) : report.print_method(options[:filter])
79
89
  when :file
@@ -16,11 +16,24 @@
16
16
  #include <pthread.h>
17
17
 
18
18
  #define BUF_SIZE 2048
19
+ #define MICROSECONDS_IN_SECOND 1000000
20
+
21
+ #define FAKE_FRAME_GC INT2FIX(0)
22
+ #define FAKE_FRAME_MARK INT2FIX(1)
23
+ #define FAKE_FRAME_SWEEP INT2FIX(2)
24
+
25
+ static const char *fake_frame_cstrs[] = {
26
+ "(garbage collection)",
27
+ "(marking)",
28
+ "(sweeping)",
29
+ };
30
+
31
+ #define TOTAL_FAKE_FRAMES (sizeof(fake_frame_cstrs) / sizeof(char *))
19
32
 
20
33
  typedef struct {
21
34
  size_t total_samples;
22
35
  size_t caller_samples;
23
- int already_accounted_in_total;
36
+ size_t seen_at_sample_number;
24
37
  st_table *edges;
25
38
  st_table *lines;
26
39
  } frame_data_t;
@@ -33,6 +46,8 @@ static struct {
33
46
  VALUE mode;
34
47
  VALUE interval;
35
48
  VALUE out;
49
+ VALUE metadata;
50
+ int ignore_gc;
36
51
 
37
52
  VALUE *raw_samples;
38
53
  size_t raw_samples_len;
@@ -48,10 +63,11 @@ static struct {
48
63
  size_t overall_samples;
49
64
  size_t during_gc;
50
65
  size_t unrecorded_gc_samples;
66
+ size_t unrecorded_gc_marking_samples;
67
+ size_t unrecorded_gc_sweeping_samples;
51
68
  st_table *frames;
52
69
 
53
- VALUE fake_gc_frame;
54
- VALUE fake_gc_frame_name;
70
+ VALUE fake_frame_names[TOTAL_FAKE_FRAMES];
55
71
  VALUE empty_string;
56
72
  VALUE frames_buffer[BUF_SIZE];
57
73
  int lines_buffer[BUF_SIZE];
@@ -59,7 +75,8 @@ static struct {
59
75
 
60
76
  static VALUE sym_object, sym_wall, sym_cpu, sym_custom, sym_name, sym_file, sym_line;
61
77
  static VALUE sym_samples, sym_total_samples, sym_missed_samples, sym_edges, sym_lines;
62
- static VALUE sym_version, sym_mode, sym_interval, sym_raw, sym_frames, sym_out, sym_aggregate, sym_raw_timestamp_deltas;
78
+ static VALUE sym_version, sym_mode, sym_interval, sym_raw, sym_metadata, sym_frames, sym_ignore_gc, sym_out;
79
+ static VALUE sym_aggregate, sym_raw_timestamp_deltas, sym_state, sym_marking, sym_sweeping;
63
80
  static VALUE sym_gc_samples, objtracer;
64
81
  static VALUE gc_hook;
65
82
  static VALUE rb_mStackProf;
@@ -72,7 +89,8 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
72
89
  {
73
90
  struct sigaction sa;
74
91
  struct itimerval timer;
75
- VALUE opts = Qnil, mode = Qnil, interval = Qnil, out = Qfalse;
92
+ VALUE opts = Qnil, mode = Qnil, interval = Qnil, metadata = rb_hash_new(), out = Qfalse;
93
+ int ignore_gc = 0;
76
94
  int raw = 0, aggregate = 1;
77
95
 
78
96
  if (_stackprof.running)
@@ -84,6 +102,17 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
84
102
  mode = rb_hash_aref(opts, sym_mode);
85
103
  interval = rb_hash_aref(opts, sym_interval);
86
104
  out = rb_hash_aref(opts, sym_out);
105
+ if (RTEST(rb_hash_aref(opts, sym_ignore_gc))) {
106
+ ignore_gc = 1;
107
+ }
108
+
109
+ VALUE metadata_val = rb_hash_aref(opts, sym_metadata);
110
+ if (RTEST(metadata_val)) {
111
+ if (!RB_TYPE_P(metadata_val, T_HASH))
112
+ rb_raise(rb_eArgError, "metadata should be a hash");
113
+
114
+ metadata = metadata_val;
115
+ }
87
116
 
88
117
  if (RTEST(rb_hash_aref(opts, sym_raw)))
89
118
  raw = 1;
@@ -92,6 +121,10 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
92
121
  }
93
122
  if (!RTEST(mode)) mode = sym_wall;
94
123
 
124
+ if (!NIL_P(interval) && (NUM2INT(interval) < 1 || NUM2INT(interval) >= MICROSECONDS_IN_SECOND)) {
125
+ rb_raise(rb_eArgError, "interval is a number of microseconds between 1 and 1 million");
126
+ }
127
+
95
128
  if (!_stackprof.frames) {
96
129
  _stackprof.frames = st_init_numtable();
97
130
  _stackprof.overall_signals = 0;
@@ -128,6 +161,8 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
128
161
  _stackprof.aggregate = aggregate;
129
162
  _stackprof.mode = mode;
130
163
  _stackprof.interval = interval;
164
+ _stackprof.ignore_gc = ignore_gc;
165
+ _stackprof.metadata = metadata;
131
166
  _stackprof.out = out;
132
167
 
133
168
  if (raw) {
@@ -201,8 +236,8 @@ frame_i(st_data_t key, st_data_t val, st_data_t arg)
201
236
 
202
237
  rb_hash_aset(results, rb_obj_id(frame), details);
203
238
 
204
- if (frame == _stackprof.fake_gc_frame) {
205
- name = _stackprof.fake_gc_frame_name;
239
+ if (FIXNUM_P(frame)) {
240
+ name = _stackprof.fake_frame_names[FIX2INT(frame)];
206
241
  file = _stackprof.empty_string;
207
242
  line = INT2FIX(0);
208
243
  } else {
@@ -258,6 +293,9 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
258
293
  rb_hash_aset(results, sym_samples, SIZET2NUM(_stackprof.overall_samples));
259
294
  rb_hash_aset(results, sym_gc_samples, SIZET2NUM(_stackprof.during_gc));
260
295
  rb_hash_aset(results, sym_missed_samples, SIZET2NUM(_stackprof.overall_signals - _stackprof.overall_samples));
296
+ rb_hash_aset(results, sym_metadata, _stackprof.metadata);
297
+
298
+ _stackprof.metadata = Qnil;
261
299
 
262
300
  frames = rb_hash_new();
263
301
  rb_hash_aset(results, sym_frames, frames);
@@ -268,6 +306,7 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
268
306
 
269
307
  if (_stackprof.raw && _stackprof.raw_samples_len) {
270
308
  size_t len, n, o;
309
+ VALUE raw_timestamp_deltas;
271
310
  VALUE raw_samples = rb_ary_new_capa(_stackprof.raw_samples_len);
272
311
 
273
312
  for (n = 0; n < _stackprof.raw_samples_len; n++) {
@@ -287,7 +326,7 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
287
326
 
288
327
  rb_hash_aset(results, sym_raw, raw_samples);
289
328
 
290
- VALUE raw_timestamp_deltas = rb_ary_new_capa(_stackprof.raw_timestamp_deltas_len);
329
+ raw_timestamp_deltas = rb_ary_new_capa(_stackprof.raw_timestamp_deltas_len);
291
330
 
292
331
  for (n = 0; n < _stackprof.raw_timestamp_deltas_len; n++) {
293
332
  rb_ary_push(raw_timestamp_deltas, INT2FIX(_stackprof.raw_timestamp_deltas[n]));
@@ -308,11 +347,12 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
308
347
 
309
348
  if (RTEST(_stackprof.out)) {
310
349
  VALUE file;
311
- if (RB_TYPE_P(_stackprof.out, T_STRING)) {
312
- file = rb_file_open_str(_stackprof.out, "w");
313
- } else {
350
+ if (rb_respond_to(_stackprof.out, rb_intern("to_io"))) {
314
351
  file = rb_io_check_io(_stackprof.out);
352
+ } else {
353
+ file = rb_file_open_str(_stackprof.out, "w");
315
354
  }
355
+
316
356
  rb_marshal_dump(results, file);
317
357
  rb_io_flush(file);
318
358
  _stackprof.out = Qnil;
@@ -386,17 +426,29 @@ stackprof_record_sample_for_stack(int num, int timestamp_delta)
386
426
  if (_stackprof.raw) {
387
427
  int found = 0;
388
428
 
429
+ /* If there's no sample buffer allocated, then allocate one. The buffer
430
+ * format is the number of frames (num), then the list of frames (from
431
+ * `_stackprof.raw_samples`), followed by the number of times this
432
+ * particular stack has been seen in a row. Each "new" stack is added
433
+ * to the end of the buffer, but if the previous stack is the same as
434
+ * the current stack, the counter will be incremented. */
389
435
  if (!_stackprof.raw_samples) {
390
436
  _stackprof.raw_samples_capa = num * 100;
391
437
  _stackprof.raw_samples = malloc(sizeof(VALUE) * _stackprof.raw_samples_capa);
392
438
  }
393
439
 
440
+ /* If we can't fit all the samples in the buffer, double the buffer size. */
394
441
  while (_stackprof.raw_samples_capa <= _stackprof.raw_samples_len + (num + 2)) {
395
442
  _stackprof.raw_samples_capa *= 2;
396
443
  _stackprof.raw_samples = realloc(_stackprof.raw_samples, sizeof(VALUE) * _stackprof.raw_samples_capa);
397
444
  }
398
445
 
446
+ /* If we've seen this stack before in the last sample, then increment the "seen" count. */
399
447
  if (_stackprof.raw_samples_len > 0 && _stackprof.raw_samples[_stackprof.raw_sample_index] == (VALUE)num) {
448
+ /* The number of samples could have been the same, but the stack
449
+ * might be different, so we need to check the stack here. Stacks
450
+ * in the raw buffer are stored in the opposite direction of stacks
451
+ * in the frames buffer that came from Ruby. */
400
452
  for (i = num-1, n = 0; i >= 0; i--, n++) {
401
453
  VALUE frame = _stackprof.frames_buffer[i];
402
454
  if (_stackprof.raw_samples[_stackprof.raw_sample_index + 1 + n] != frame)
@@ -408,7 +460,11 @@ stackprof_record_sample_for_stack(int num, int timestamp_delta)
408
460
  }
409
461
  }
410
462
 
463
+ /* If we haven't seen the stack, then add it to the buffer along with
464
+ * the length of the stack and a 1 for the "seen" count */
411
465
  if (!found) {
466
+ /* Bump the `raw_sample_index` up so that the next iteration can
467
+ * find the previously recorded stack size. */
412
468
  _stackprof.raw_sample_index = _stackprof.raw_samples_len;
413
469
  _stackprof.raw_samples[_stackprof.raw_samples_len++] = (VALUE)num;
414
470
  for (i = num-1; i >= 0; i--) {
@@ -418,33 +474,32 @@ stackprof_record_sample_for_stack(int num, int timestamp_delta)
418
474
  _stackprof.raw_samples[_stackprof.raw_samples_len++] = (VALUE)1;
419
475
  }
420
476
 
477
+ /* If there's no timestamp delta buffer, allocate one */
421
478
  if (!_stackprof.raw_timestamp_deltas) {
422
479
  _stackprof.raw_timestamp_deltas_capa = 100;
423
480
  _stackprof.raw_timestamp_deltas = malloc(sizeof(int) * _stackprof.raw_timestamp_deltas_capa);
424
481
  _stackprof.raw_timestamp_deltas_len = 0;
425
482
  }
426
483
 
484
+ /* Double the buffer size if it's too small */
427
485
  while (_stackprof.raw_timestamp_deltas_capa <= _stackprof.raw_timestamp_deltas_len + 1) {
428
486
  _stackprof.raw_timestamp_deltas_capa *= 2;
429
487
  _stackprof.raw_timestamp_deltas = realloc(_stackprof.raw_timestamp_deltas, sizeof(int) * _stackprof.raw_timestamp_deltas_capa);
430
488
  }
431
489
 
490
+ /* Store the time delta (which is the amount of time between samples) */
432
491
  _stackprof.raw_timestamp_deltas[_stackprof.raw_timestamp_deltas_len++] = timestamp_delta;
433
492
  }
434
493
 
435
- for (i = 0; i < num; i++) {
436
- VALUE frame = _stackprof.frames_buffer[i];
437
- sample_for(frame)->already_accounted_in_total = 0;
438
- }
439
-
440
494
  for (i = 0; i < num; i++) {
441
495
  int line = _stackprof.lines_buffer[i];
442
496
  VALUE frame = _stackprof.frames_buffer[i];
443
497
  frame_data_t *frame_data = sample_for(frame);
444
498
 
445
- if (!frame_data->already_accounted_in_total)
499
+ if (frame_data->seen_at_sample_number != _stackprof.overall_samples) {
446
500
  frame_data->total_samples++;
447
- frame_data->already_accounted_in_total = 1;
501
+ }
502
+ frame_data->seen_at_sample_number = _stackprof.overall_samples;
448
503
 
449
504
  if (i == 0) {
450
505
  frame_data->caller_samples++;
@@ -455,10 +510,10 @@ stackprof_record_sample_for_stack(int num, int timestamp_delta)
455
510
  }
456
511
 
457
512
  if (_stackprof.aggregate && line > 0) {
458
- if (!frame_data->lines)
459
- frame_data->lines = st_init_numtable();
460
513
  size_t half = (size_t)1<<(8*SIZEOF_SIZE_T/2);
461
514
  size_t increment = i == 0 ? half + 1 : half;
515
+ if (!frame_data->lines)
516
+ frame_data->lines = st_init_numtable();
462
517
  st_numtable_increment(frame_data->lines, (st_data_t)line, increment);
463
518
  }
464
519
 
@@ -474,14 +529,15 @@ void
474
529
  stackprof_record_sample()
475
530
  {
476
531
  int timestamp_delta = 0;
532
+ int num;
477
533
  if (_stackprof.raw) {
478
534
  struct timeval t;
479
- gettimeofday(&t, NULL);
480
535
  struct timeval diff;
536
+ gettimeofday(&t, NULL);
481
537
  timersub(&t, &_stackprof.last_sample_at, &diff);
482
538
  timestamp_delta = (1000 * diff.tv_sec) + diff.tv_usec;
483
539
  }
484
- int num = rb_profile_frames(0, sizeof(_stackprof.frames_buffer) / sizeof(VALUE), _stackprof.frames_buffer, _stackprof.lines_buffer);
540
+ num = rb_profile_frames(0, sizeof(_stackprof.frames_buffer) / sizeof(VALUE), _stackprof.frames_buffer, _stackprof.lines_buffer);
485
541
  stackprof_record_sample_for_stack(num, timestamp_delta);
486
542
  }
487
543
 
@@ -489,31 +545,52 @@ void
489
545
  stackprof_record_gc_samples()
490
546
  {
491
547
  int delta_to_first_unrecorded_gc_sample = 0;
548
+ int i;
492
549
  if (_stackprof.raw) {
493
550
  struct timeval t;
494
- gettimeofday(&t, NULL);
495
551
  struct timeval diff;
552
+ gettimeofday(&t, NULL);
496
553
  timersub(&t, &_stackprof.last_sample_at, &diff);
497
554
 
498
555
  // We don't know when the GC samples were actually marked, so let's
499
556
  // assume that they were marked at a perfectly regular interval.
500
- delta_to_first_unrecorded_gc_sample = (1000 * diff.tv_sec + diff.tv_usec) - (_stackprof.unrecorded_gc_samples - 1) * _stackprof.interval;
557
+ delta_to_first_unrecorded_gc_sample = (1000 * diff.tv_sec + diff.tv_usec) - (_stackprof.unrecorded_gc_samples - 1) * NUM2LONG(_stackprof.interval);
501
558
  if (delta_to_first_unrecorded_gc_sample < 0) {
502
559
  delta_to_first_unrecorded_gc_sample = 0;
503
560
  }
504
561
  }
505
562
 
506
- int i;
507
-
508
- _stackprof.frames_buffer[0] = _stackprof.fake_gc_frame;
509
- _stackprof.lines_buffer[0] = 0;
510
563
 
511
564
  for (i = 0; i < _stackprof.unrecorded_gc_samples; i++) {
512
- int timestamp_delta = i == 0 ? delta_to_first_unrecorded_gc_sample : _stackprof.interval;
513
- stackprof_record_sample_for_stack(1, timestamp_delta);
565
+ int timestamp_delta = i == 0 ? delta_to_first_unrecorded_gc_sample : NUM2LONG(_stackprof.interval);
566
+
567
+ if (_stackprof.unrecorded_gc_marking_samples) {
568
+ _stackprof.frames_buffer[0] = FAKE_FRAME_MARK;
569
+ _stackprof.lines_buffer[0] = 0;
570
+ _stackprof.frames_buffer[1] = FAKE_FRAME_GC;
571
+ _stackprof.lines_buffer[1] = 0;
572
+ _stackprof.unrecorded_gc_marking_samples--;
573
+
574
+ stackprof_record_sample_for_stack(2, timestamp_delta);
575
+ } else if (_stackprof.unrecorded_gc_sweeping_samples) {
576
+ _stackprof.frames_buffer[0] = FAKE_FRAME_SWEEP;
577
+ _stackprof.lines_buffer[0] = 0;
578
+ _stackprof.frames_buffer[1] = FAKE_FRAME_GC;
579
+ _stackprof.lines_buffer[1] = 0;
580
+
581
+ _stackprof.unrecorded_gc_sweeping_samples--;
582
+
583
+ stackprof_record_sample_for_stack(2, timestamp_delta);
584
+ } else {
585
+ _stackprof.frames_buffer[0] = FAKE_FRAME_GC;
586
+ _stackprof.lines_buffer[0] = 0;
587
+ stackprof_record_sample_for_stack(1, timestamp_delta);
588
+ }
514
589
  }
515
590
  _stackprof.during_gc += _stackprof.unrecorded_gc_samples;
516
591
  _stackprof.unrecorded_gc_samples = 0;
592
+ _stackprof.unrecorded_gc_marking_samples = 0;
593
+ _stackprof.unrecorded_gc_sweeping_samples = 0;
517
594
  }
518
595
 
519
596
  static void
@@ -544,7 +621,13 @@ static void
544
621
  stackprof_signal_handler(int sig, siginfo_t *sinfo, void *ucontext)
545
622
  {
546
623
  _stackprof.overall_signals++;
547
- if (rb_during_gc()) {
624
+ if (!_stackprof.ignore_gc && rb_during_gc()) {
625
+ VALUE mode = rb_gc_latest_gc_info(sym_state);
626
+ if (mode == sym_marking) {
627
+ _stackprof.unrecorded_gc_marking_samples++;
628
+ } else if (mode == sym_sweeping) {
629
+ _stackprof.unrecorded_gc_sweeping_samples++;
630
+ }
548
631
  _stackprof.unrecorded_gc_samples++;
549
632
  rb_postponed_job_register_one(0, stackprof_gc_job_handler, (void*)0);
550
633
  } else {
@@ -583,6 +666,9 @@ frame_mark_i(st_data_t key, st_data_t val, st_data_t arg)
583
666
  static void
584
667
  stackprof_gc_mark(void *data)
585
668
  {
669
+ if (RTEST(_stackprof.metadata))
670
+ rb_gc_mark(_stackprof.metadata);
671
+
586
672
  if (RTEST(_stackprof.out))
587
673
  rb_gc_mark(_stackprof.out);
588
674
 
@@ -625,6 +711,7 @@ stackprof_atfork_child(void)
625
711
  void
626
712
  Init_stackprof(void)
627
713
  {
714
+ size_t i;
628
715
  #define S(name) sym_##name = ID2SYM(rb_intern(#name));
629
716
  S(object);
630
717
  S(custom);
@@ -645,10 +732,18 @@ Init_stackprof(void)
645
732
  S(raw);
646
733
  S(raw_timestamp_deltas);
647
734
  S(out);
735
+ S(metadata);
736
+ S(ignore_gc);
648
737
  S(frames);
649
738
  S(aggregate);
739
+ S(state);
740
+ S(marking);
741
+ S(sweeping);
650
742
  #undef S
651
743
 
744
+ /* Need to run this to warm the symbol table before we call this during GC */
745
+ rb_gc_latest_gc_info(sym_state);
746
+
652
747
  gc_hook = Data_Wrap_Struct(rb_cObject, stackprof_gc_mark, NULL, &_stackprof);
653
748
  rb_global_variable(&gc_hook);
654
749
 
@@ -661,12 +756,14 @@ Init_stackprof(void)
661
756
  _stackprof.raw_timestamp_deltas_len = 0;
662
757
  _stackprof.raw_timestamp_deltas_capa = 0;
663
758
 
664
- _stackprof.fake_gc_frame = INT2FIX(0x9C);
665
759
  _stackprof.empty_string = rb_str_new_cstr("");
666
- _stackprof.fake_gc_frame_name = rb_str_new_cstr("(garbage collection)");
667
- rb_global_variable(&_stackprof.fake_gc_frame_name);
668
760
  rb_global_variable(&_stackprof.empty_string);
669
761
 
762
+ for (i = 0; i < TOTAL_FAKE_FRAMES; i++) {
763
+ _stackprof.fake_frame_names[i] = rb_str_new_cstr(fake_frame_cstrs[i]);
764
+ rb_global_variable(&_stackprof.fake_frame_names[i]);
765
+ }
766
+
670
767
  rb_mStackProf = rb_define_module("StackProf");
671
768
  rb_define_singleton_method(rb_mStackProf, "running?", stackprof_running_p, 0);
672
769
  rb_define_singleton_method(rb_mStackProf, "run", stackprof_run, -1);