stackprof 0.2.2 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +5 -4
- data/bin/stackprof +15 -4
- data/bin/stackprof-flamegraph +2 -0
- data/bin/stackprof-gprof2dot +2 -0
- data/ext/stackprof.c +77 -14
- data/lib/stackprof/middleware.rb +4 -3
- data/lib/stackprof/report.rb +26 -1
- data/stackprof.gemspec +3 -1
- data/test/test_stackprof.rb +26 -0
- data/vendor/FlameGraph/README +134 -0
- data/vendor/FlameGraph/flamegraph.pl +494 -0
- data/vendor/gprof2dot/gprof2dot.py +3266 -0
- data/vendor/gprof2dot/hotshotmain.py +70 -0
- metadata +10 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d8814b67fbdc027a47b2d087321b62ba3f2c5e0b
|
4
|
+
data.tar.gz: f5584eca1af8afa88afd407846b019dedbf33d3d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 50d3070d8a4ae606cb90b8e94afdbe4a047abc449a77731ebb9e198dd7fbb4f5e86965bbdba8761f3fbebe01023fca08376b865f84c25dcc7f51d4e7a9752d4c
|
7
|
+
data.tar.gz: 4b0bdc52fdd32bb21b1a10a0061371e2d51ae126cd39c01c6898b2a405f6e2a13dd879e439b610d90fde8661d245f995bcf86063bb76570b2b8ddc793a2bdd2b
|
data/README.md
CHANGED
@@ -10,10 +10,9 @@ and written as a replacement for [perftools.rb](https://github.com/tmm1/perftool
|
|
10
10
|
in ruby:
|
11
11
|
|
12
12
|
``` ruby
|
13
|
-
|
13
|
+
StackProf.run(mode: :cpu, out: 'tmp/stackprof-cpu-myapp.dump') do
|
14
14
|
...
|
15
15
|
end
|
16
|
-
File.open('tmp/stackprof-cpu-myapp.dump', 'wb'){ |f| f.write Marshal.dump(profile) }
|
17
16
|
```
|
18
17
|
|
19
18
|
via rack:
|
@@ -63,12 +62,14 @@ $ stackprof tmp/stackprof-cpu-*.dump --method 'Object#present?'
|
|
63
62
|
| 22 | end
|
64
63
|
```
|
65
64
|
|
65
|
+
For an experimental version of WebUI reporting of stackprof, see [stackprof-webnav](https://github.com/alisnic/stackprof-webnav)
|
66
|
+
|
66
67
|
### sampling
|
67
68
|
|
68
69
|
four sampling modes are supported:
|
69
70
|
|
70
|
-
- :wall (using `ITIMER_REAL` and `SIGALRM`)
|
71
|
-
- :cpu (using `ITIMER_PROF` and `SIGPROF`)
|
71
|
+
- :wall (using `ITIMER_REAL` and `SIGALRM`)
|
72
|
+
- :cpu (using `ITIMER_PROF` and `SIGPROF`) [default mode]
|
72
73
|
- :object (using `RUBY_INTERNAL_EVENT_NEWOBJ`)
|
73
74
|
- :custom (user-defined via `StackProf.sample`)
|
74
75
|
|
data/bin/stackprof
CHANGED
@@ -13,12 +13,14 @@ parser = OptionParser.new(ARGV) do |o|
|
|
13
13
|
|
14
14
|
o.on('--text', 'Text summary per method (default)'){ options[:format] = :text }
|
15
15
|
o.on('--files', 'List of files'){ |f| options[:format] = :files }
|
16
|
-
o.on('--limit
|
16
|
+
o.on('--limit [num]', Integer, 'Limit --text or --files output to N lines'){ |n| options[:limit] = n }
|
17
17
|
o.on('--sort-total', "Sort --text or --files output on total samples\n\n"){ options[:sort] = true }
|
18
|
-
o.on('--method
|
19
|
-
o.on('--file
|
18
|
+
o.on('--method [grep]', 'Zoom into specified method'){ |f| options[:format] = :method; options[:filter] = f }
|
19
|
+
o.on('--file [grep]', 'Show annotated code for specified file'){ |f| options[:format] = :file; options[:filter] = f }
|
20
|
+
o.on('--stackcollapse', 'stackcollapse.pl compatible output (use with flamegraph.pl)'){ options[:format] = :stackcollapse }
|
20
21
|
o.on('--callgrind', 'Callgrind output (use with kcachegrind, gprof2dot)'){ options[:format] = :callgrind }
|
21
22
|
o.on('--graphviz', "Graphviz output (use with dot)\n\n"){ options[:format] = :graphviz }
|
23
|
+
o.on('--dump', 'Print marshaled profile dump (combine multiple profiles)'){ options[:format] = :dump }
|
22
24
|
o.on('--debug', 'Pretty print raw profile data'){ options[:format] = :debug }
|
23
25
|
end
|
24
26
|
|
@@ -27,7 +29,12 @@ parser.abort(parser.help) if ARGV.empty?
|
|
27
29
|
|
28
30
|
reports = []
|
29
31
|
while ARGV.size > 0
|
30
|
-
|
32
|
+
begin
|
33
|
+
file = ARGV.pop
|
34
|
+
reports << StackProf::Report.new(Marshal.load(IO.binread(file)))
|
35
|
+
rescue TypeError => e
|
36
|
+
STDERR.puts "** error parsing #{file}: #{e.inspect}"
|
37
|
+
end
|
31
38
|
end
|
32
39
|
report = reports.inject(:+)
|
33
40
|
|
@@ -36,10 +43,14 @@ when :text
|
|
36
43
|
report.print_text(options[:sort], options[:limit])
|
37
44
|
when :debug
|
38
45
|
report.print_debug
|
46
|
+
when :dump
|
47
|
+
report.print_dump
|
39
48
|
when :callgrind
|
40
49
|
report.print_callgrind
|
41
50
|
when :graphviz
|
42
51
|
report.print_graphviz
|
52
|
+
when :stackcollapse
|
53
|
+
report.print_stackcollapse
|
43
54
|
when :method
|
44
55
|
report.print_method(options[:filter])
|
45
56
|
when :file
|
data/ext/stackprof.c
CHANGED
@@ -2,19 +2,15 @@
|
|
2
2
|
|
3
3
|
stackprof.c - Sampling call-stack frame profiler for MRI.
|
4
4
|
|
5
|
-
|
6
|
-
created at: Thu May 30 17:55:25 2013
|
7
|
-
|
8
|
-
NOTE: This extension library is not expected to exist except C Ruby.
|
9
|
-
|
10
|
-
All the files in this distribution are covered under the Ruby's
|
11
|
-
license (see the file COPYING).
|
5
|
+
vim: setl noexpandtab shiftwidth=4 tabstop=8 softtabstop=4
|
12
6
|
|
13
7
|
**********************************************************************/
|
14
8
|
|
15
9
|
#include <ruby/ruby.h>
|
16
10
|
#include <ruby/debug.h>
|
17
11
|
#include <ruby/st.h>
|
12
|
+
#include <ruby/io.h>
|
13
|
+
#include <ruby/intern.h>
|
18
14
|
#include <signal.h>
|
19
15
|
#include <sys/time.h>
|
20
16
|
#include <pthread.h>
|
@@ -32,6 +28,9 @@ static struct {
|
|
32
28
|
int running;
|
33
29
|
VALUE mode;
|
34
30
|
VALUE interval;
|
31
|
+
VALUE raw;
|
32
|
+
size_t raw_sample_index;
|
33
|
+
VALUE out;
|
35
34
|
|
36
35
|
size_t overall_signals;
|
37
36
|
size_t overall_samples;
|
@@ -44,7 +43,7 @@ static struct {
|
|
44
43
|
|
45
44
|
static VALUE sym_object, sym_wall, sym_cpu, sym_custom, sym_name, sym_file, sym_line;
|
46
45
|
static VALUE sym_samples, sym_total_samples, sym_missed_samples, sym_edges, sym_lines;
|
47
|
-
static VALUE sym_version, sym_mode, sym_interval, sym_frames;
|
46
|
+
static VALUE sym_version, sym_mode, sym_interval, sym_raw, sym_frames, sym_out;
|
48
47
|
static VALUE sym_gc_samples, objtracer;
|
49
48
|
static VALUE gc_hook;
|
50
49
|
static VALUE rb_mStackProf;
|
@@ -57,7 +56,7 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
|
|
57
56
|
{
|
58
57
|
struct sigaction sa;
|
59
58
|
struct itimerval timer;
|
60
|
-
VALUE opts = Qnil, mode = Qnil, interval = Qnil;
|
59
|
+
VALUE opts = Qnil, mode = Qnil, interval = Qnil, raw = Qfalse, out = Qfalse;
|
61
60
|
|
62
61
|
if (_stackprof.running)
|
63
62
|
return Qfalse;
|
@@ -67,6 +66,10 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
|
|
67
66
|
if (RTEST(opts)) {
|
68
67
|
mode = rb_hash_aref(opts, sym_mode);
|
69
68
|
interval = rb_hash_aref(opts, sym_interval);
|
69
|
+
out = rb_hash_aref(opts, sym_out);
|
70
|
+
|
71
|
+
if (RTEST(rb_hash_aref(opts, sym_raw)))
|
72
|
+
raw = rb_ary_new();
|
70
73
|
}
|
71
74
|
if (!RTEST(mode)) mode = sym_wall;
|
72
75
|
|
@@ -102,8 +105,10 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
|
|
102
105
|
}
|
103
106
|
|
104
107
|
_stackprof.running = 1;
|
108
|
+
_stackprof.raw = raw;
|
105
109
|
_stackprof.mode = mode;
|
106
110
|
_stackprof.interval = interval;
|
111
|
+
_stackprof.out = out;
|
107
112
|
|
108
113
|
return Qtrue;
|
109
114
|
}
|
@@ -207,7 +212,7 @@ frame_i(st_data_t key, st_data_t val, st_data_t arg)
|
|
207
212
|
}
|
208
213
|
|
209
214
|
static VALUE
|
210
|
-
stackprof_results(VALUE self)
|
215
|
+
stackprof_results(int argc, VALUE *argv, VALUE self)
|
211
216
|
{
|
212
217
|
VALUE results, frames;
|
213
218
|
|
@@ -229,7 +234,28 @@ stackprof_results(VALUE self)
|
|
229
234
|
st_free_table(_stackprof.frames);
|
230
235
|
_stackprof.frames = NULL;
|
231
236
|
|
232
|
-
|
237
|
+
if (RTEST(_stackprof.raw)) {
|
238
|
+
rb_hash_aset(results, sym_raw, _stackprof.raw);
|
239
|
+
_stackprof.raw = Qfalse;
|
240
|
+
}
|
241
|
+
|
242
|
+
if (argc == 1)
|
243
|
+
_stackprof.out = argv[0];
|
244
|
+
|
245
|
+
if (RTEST(_stackprof.out)) {
|
246
|
+
VALUE file;
|
247
|
+
if (RB_TYPE_P(_stackprof.out, T_STRING)) {
|
248
|
+
file = rb_file_open_str(_stackprof.out, "w");
|
249
|
+
} else {
|
250
|
+
file = rb_io_check_io(_stackprof.out);
|
251
|
+
}
|
252
|
+
rb_marshal_dump(results, file);
|
253
|
+
rb_io_flush(file);
|
254
|
+
_stackprof.out = Qnil;
|
255
|
+
return file;
|
256
|
+
} else {
|
257
|
+
return results;
|
258
|
+
}
|
233
259
|
}
|
234
260
|
|
235
261
|
static VALUE
|
@@ -238,7 +264,7 @@ stackprof_run(int argc, VALUE *argv, VALUE self)
|
|
238
264
|
rb_need_block();
|
239
265
|
stackprof_start(argc, argv, self);
|
240
266
|
rb_ensure(rb_yield, Qundef, stackprof_stop, self);
|
241
|
-
return stackprof_results(self);
|
267
|
+
return stackprof_results(0, 0, self);
|
242
268
|
}
|
243
269
|
|
244
270
|
static VALUE
|
@@ -288,12 +314,41 @@ st_numtable_increment(st_table *table, st_data_t key, size_t increment)
|
|
288
314
|
void
|
289
315
|
stackprof_record_sample()
|
290
316
|
{
|
291
|
-
int num, i;
|
317
|
+
int num, i, n;
|
318
|
+
int raw_mode = RTEST(_stackprof.raw);
|
292
319
|
VALUE prev_frame = Qnil;
|
320
|
+
size_t raw_len;
|
293
321
|
|
294
322
|
_stackprof.overall_samples++;
|
295
323
|
num = rb_profile_frames(0, sizeof(_stackprof.frames_buffer), _stackprof.frames_buffer, _stackprof.lines_buffer);
|
296
324
|
|
325
|
+
if (raw_mode) {
|
326
|
+
int found = 0;
|
327
|
+
raw_len = RARRAY_LEN(_stackprof.raw);
|
328
|
+
|
329
|
+
if (RARRAY_LEN(_stackprof.raw) > 0 && RARRAY_AREF(_stackprof.raw, _stackprof.raw_sample_index) == INT2FIX(num)) {
|
330
|
+
for (i = num-1, n = 0; i >= 0; i--, n++) {
|
331
|
+
VALUE frame = _stackprof.frames_buffer[i];
|
332
|
+
if (RARRAY_AREF(_stackprof.raw, _stackprof.raw_sample_index + 1 + n) != rb_obj_id(frame))
|
333
|
+
break;
|
334
|
+
}
|
335
|
+
if (i == -1) {
|
336
|
+
RARRAY_ASET(_stackprof.raw, raw_len-1, LONG2NUM(NUM2LONG(RARRAY_AREF(_stackprof.raw, raw_len-1))+1));
|
337
|
+
found = 1;
|
338
|
+
}
|
339
|
+
}
|
340
|
+
|
341
|
+
if (!found) {
|
342
|
+
_stackprof.raw_sample_index = raw_len;
|
343
|
+
rb_ary_push(_stackprof.raw, INT2FIX(num));
|
344
|
+
for (i = num-1; i >= 0; i--) {
|
345
|
+
VALUE frame = _stackprof.frames_buffer[i];
|
346
|
+
rb_ary_push(_stackprof.raw, rb_obj_id(frame));
|
347
|
+
}
|
348
|
+
rb_ary_push(_stackprof.raw, INT2FIX(1));
|
349
|
+
}
|
350
|
+
}
|
351
|
+
|
297
352
|
for (i = 0; i < num; i++) {
|
298
353
|
int line = _stackprof.lines_buffer[i];
|
299
354
|
VALUE frame = _stackprof.frames_buffer[i];
|
@@ -346,6 +401,7 @@ stackprof_signal_handler(int sig, siginfo_t *sinfo, void *ucontext)
|
|
346
401
|
static void
|
347
402
|
stackprof_newobj_handler(VALUE tpval, void *data)
|
348
403
|
{
|
404
|
+
/* TODO: implement interval */
|
349
405
|
_stackprof.overall_signals++;
|
350
406
|
stackprof_job_handler(0);
|
351
407
|
}
|
@@ -372,6 +428,11 @@ frame_mark_i(st_data_t key, st_data_t val, st_data_t arg)
|
|
372
428
|
static void
|
373
429
|
stackprof_gc_mark(void *data)
|
374
430
|
{
|
431
|
+
if (RTEST(_stackprof.raw))
|
432
|
+
rb_gc_mark(_stackprof.raw);
|
433
|
+
if (RTEST(_stackprof.out))
|
434
|
+
rb_gc_mark(_stackprof.out);
|
435
|
+
|
375
436
|
if (_stackprof.frames)
|
376
437
|
st_foreach(_stackprof.frames, frame_mark_i, 0);
|
377
438
|
}
|
@@ -427,6 +488,8 @@ Init_stackprof(void)
|
|
427
488
|
sym_version = ID2SYM(rb_intern("version"));
|
428
489
|
sym_mode = ID2SYM(rb_intern("mode"));
|
429
490
|
sym_interval = ID2SYM(rb_intern("interval"));
|
491
|
+
sym_raw = ID2SYM(rb_intern("raw"));
|
492
|
+
sym_out = ID2SYM(rb_intern("out"));
|
430
493
|
sym_frames = ID2SYM(rb_intern("frames"));
|
431
494
|
|
432
495
|
gc_hook = Data_Wrap_Struct(rb_cObject, stackprof_gc_mark, NULL, NULL);
|
@@ -437,7 +500,7 @@ Init_stackprof(void)
|
|
437
500
|
rb_define_singleton_method(rb_mStackProf, "run", stackprof_run, -1);
|
438
501
|
rb_define_singleton_method(rb_mStackProf, "start", stackprof_start, -1);
|
439
502
|
rb_define_singleton_method(rb_mStackProf, "stop", stackprof_stop, 0);
|
440
|
-
rb_define_singleton_method(rb_mStackProf, "results", stackprof_results,
|
503
|
+
rb_define_singleton_method(rb_mStackProf, "results", stackprof_results, -1);
|
441
504
|
rb_define_singleton_method(rb_mStackProf, "sample", stackprof_sample, 0);
|
442
505
|
|
443
506
|
rb_autoload(rb_mStackProf, rb_intern_const("Report"), "stackprof/report.rb");
|
data/lib/stackprof/middleware.rb
CHANGED
@@ -31,13 +31,14 @@ module StackProf
|
|
31
31
|
attr_accessor :enabled, :mode, :interval, :path
|
32
32
|
alias enabled? enabled
|
33
33
|
|
34
|
-
def save
|
34
|
+
def save(filename = nil)
|
35
35
|
if results = StackProf.results
|
36
36
|
FileUtils.mkdir_p(Middleware.path)
|
37
|
-
filename
|
38
|
-
File.open(File.join(Middleware.path, filename), 'wb') do |f|
|
37
|
+
filename ||= "stackprof-#{results[:mode]}-#{Process.pid}-#{Time.now.to_i}.dump"
|
38
|
+
File.open(File.join(Middleware.path, filename), 'wb') do |f|
|
39
39
|
f.write Marshal.dump(results)
|
40
40
|
end
|
41
|
+
filename
|
41
42
|
end
|
42
43
|
end
|
43
44
|
|
data/lib/stackprof/report.rb
CHANGED
@@ -63,6 +63,22 @@ module StackProf
|
|
63
63
|
pp @data
|
64
64
|
end
|
65
65
|
|
66
|
+
def print_dump
|
67
|
+
puts Marshal.dump(@data.reject{|k,v| k == :files })
|
68
|
+
end
|
69
|
+
|
70
|
+
def print_stackcollapse
|
71
|
+
raise "profile does not include raw samples" unless raw = data[:raw]
|
72
|
+
|
73
|
+
while len = raw.shift
|
74
|
+
frames = raw.slice!(0, len)
|
75
|
+
weight = raw.shift
|
76
|
+
|
77
|
+
print frames.map{ |a| data[:frames][a][:name] }.join(';')
|
78
|
+
puts " #{weight}"
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
66
82
|
def print_graphviz(filter = nil, f = STDOUT)
|
67
83
|
if filter
|
68
84
|
mark_stack = []
|
@@ -161,7 +177,7 @@ module StackProf
|
|
161
177
|
f.printf "%s (%s:%d)\n", info[:name], file, line
|
162
178
|
f.printf " samples: % 5d self (%2.1f%%) / % 5d total (%2.1f%%)\n", info[:samples], 100.0*info[:samples]/overall_samples, info[:total_samples], 100.0*info[:total_samples]/overall_samples
|
163
179
|
|
164
|
-
if (callers =
|
180
|
+
if (callers = callers_for(frame)).any?
|
165
181
|
f.puts " callers:"
|
166
182
|
callers = callers.sort_by(&:last).reverse
|
167
183
|
callers.each do |name, weight|
|
@@ -203,6 +219,15 @@ module StackProf
|
|
203
219
|
|
204
220
|
private
|
205
221
|
|
222
|
+
def root_frames
|
223
|
+
frames.select{ |addr, frame| callers_for(addr).size == 0 }
|
224
|
+
end
|
225
|
+
|
226
|
+
def callers_for(addr)
|
227
|
+
@callers_for ||= {}
|
228
|
+
@callers_for[addr] ||= data[:frames].map{ |id, other| [other[:name], other[:edges][addr]] if other[:edges] && other[:edges].include?(addr) }.compact
|
229
|
+
end
|
230
|
+
|
206
231
|
def source_display(f, file, lines, range=nil)
|
207
232
|
File.readlines(file).each_with_index do |code, i|
|
208
233
|
next unless range.nil? || range.include?(i)
|
data/stackprof.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'stackprof'
|
3
|
-
s.version = '0.2.
|
3
|
+
s.version = '0.2.3'
|
4
4
|
s.homepage = 'http://github.com/tmm1/stackprof'
|
5
5
|
|
6
6
|
s.authors = 'Aman Gupta'
|
@@ -11,6 +11,8 @@ Gem::Specification.new do |s|
|
|
11
11
|
|
12
12
|
s.bindir = 'bin'
|
13
13
|
s.executables << 'stackprof'
|
14
|
+
s.executables << 'stackprof-flamegraph'
|
15
|
+
s.executables << 'stackprof-gprof2dot'
|
14
16
|
|
15
17
|
s.summary = 'sampling callstack-profiler for ruby 2.1+'
|
16
18
|
s.description = 'stackprof is a fast sampling profiler for ruby code, with cpu, wallclock and object allocation samplers.'
|
data/test/test_stackprof.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
$:.unshift File.expand_path('../../lib', __FILE__)
|
2
2
|
require 'stackprof'
|
3
3
|
require 'test/unit'
|
4
|
+
require 'tempfile'
|
4
5
|
|
5
6
|
class StackProfTest < Test::Unit::TestCase
|
6
7
|
def test_info
|
@@ -86,6 +87,19 @@ class StackProfTest < Test::Unit::TestCase
|
|
86
87
|
assert_equal [10, 10], frame[:lines][__LINE__-10]
|
87
88
|
end
|
88
89
|
|
90
|
+
def test_raw
|
91
|
+
profile = StackProf.run(mode: :custom, raw: true) do
|
92
|
+
10.times do
|
93
|
+
StackProf.sample
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
raw = profile[:raw]
|
98
|
+
assert_equal 10, raw[-1]
|
99
|
+
assert_equal raw[0] + 2, raw.size
|
100
|
+
assert_equal 'block (2 levels) in StackProfTest#test_raw', profile[:frames][raw[-2]][:name]
|
101
|
+
end
|
102
|
+
|
89
103
|
def test_fork
|
90
104
|
StackProf.run do
|
91
105
|
pid = fork do
|
@@ -109,6 +123,18 @@ class StackProfTest < Test::Unit::TestCase
|
|
109
123
|
assert_equal 0, profile[:missed_samples]
|
110
124
|
end
|
111
125
|
|
126
|
+
def test_out
|
127
|
+
tmpfile = Tempfile.new('stackprof-out')
|
128
|
+
ret = StackProf.run(mode: :custom, out: tmpfile) do
|
129
|
+
StackProf.sample
|
130
|
+
end
|
131
|
+
|
132
|
+
assert_equal tmpfile, ret
|
133
|
+
tmpfile.rewind
|
134
|
+
profile = Marshal.load(tmpfile.read)
|
135
|
+
assert_not_empty profile[:frames]
|
136
|
+
end
|
137
|
+
|
112
138
|
def math
|
113
139
|
250_000.times do
|
114
140
|
2 ** 10
|
@@ -0,0 +1,134 @@
|
|
1
|
+
Flame Graphs visualize profiled code-paths.
|
2
|
+
|
3
|
+
Website: http://www.brendangregg.com/flamegraphs.html
|
4
|
+
|
5
|
+
CPU profiling using DTrace, perf_events, SystemTap, or ktap: http://www.brendangregg.com/FlameGraphs/cpuflamegraphs.html
|
6
|
+
CPU profiling using XCode Instruments: http://schani.wordpress.com/2012/11/16/flame-graphs-for-instruments/
|
7
|
+
CPU profiling using Xperf.exe: http://randomascii.wordpress.com/2013/03/26/summarizing-xperf-cpu-usage-with-flame-graphs/
|
8
|
+
Memory profiling: http://www.brendangregg.com/FlameGraphs/memoryflamegraphs.html
|
9
|
+
|
10
|
+
These can be created in three steps:
|
11
|
+
|
12
|
+
1. Capture stacks
|
13
|
+
2. Fold stacks
|
14
|
+
3. flamegraph.pl
|
15
|
+
|
16
|
+
|
17
|
+
1. Capture stacks
|
18
|
+
=================
|
19
|
+
Stack samples can be captured using DTrace, perf_events or SystemTap.
|
20
|
+
|
21
|
+
Using DTrace to capture 60 seconds of kernel stacks at 997 Hertz:
|
22
|
+
|
23
|
+
# dtrace -x stackframes=100 -n 'profile-997 /arg0/ { @[stack()] = count(); } tick-60s { exit(0); }' -o out.kern_stacks
|
24
|
+
|
25
|
+
Using DTrace to capture 60 seconds of user-level stacks for PID 12345 at 97 Hertz:
|
26
|
+
|
27
|
+
# dtrace -x ustackframes=100 -n 'profile-97 /pid == 12345 && arg1/ { @[ustack()] = count(); } tick-60s { exit(0); }' -o out.user_stacks
|
28
|
+
|
29
|
+
Using DTrace to capture 60 seconds of user-level stacks, including while time is spent in the kernel, for PID 12345 at 97 Hertz:
|
30
|
+
|
31
|
+
# dtrace -x ustackframes=100 -n 'profile-97 /pid == 12345/ { @[ustack()] = count(); } tick-60s { exit(0); }' -o out.user_stacks
|
32
|
+
|
33
|
+
Switch ustack() for jstack() if the application has a ustack helper to include translated frames (eg, node.js frames; see: http://dtrace.org/blogs/dap/2012/01/05/where-does-your-node-program-spend-its-time/). The rate for user-level stack collection is deliberately slower than kernel, which is especially important when using jstack() as it performs additional work to translate frames.
|
34
|
+
|
35
|
+
2. Fold stacks
|
36
|
+
==============
|
37
|
+
Use the stackcollapse programs to fold stack samples into single lines. The programs provided are:
|
38
|
+
|
39
|
+
- stackcollapse.pl: for DTrace stacks
|
40
|
+
- stackcollapse-perf.pl: for perf_events "perf script" output
|
41
|
+
- stackcollapse-stap.pl: for SystemTap stacks
|
42
|
+
- stackcollapse-instruments.pl: for XCode Instruments
|
43
|
+
|
44
|
+
Usage example:
|
45
|
+
|
46
|
+
$ ./stackcollapse.pl out.kern_stacks > out.kern_folded
|
47
|
+
|
48
|
+
The output looks like this:
|
49
|
+
|
50
|
+
unix`_sys_sysenter_post_swapgs 1401
|
51
|
+
unix`_sys_sysenter_post_swapgs;genunix`close 5
|
52
|
+
unix`_sys_sysenter_post_swapgs;genunix`close;genunix`closeandsetf 85
|
53
|
+
unix`_sys_sysenter_post_swapgs;genunix`close;genunix`closeandsetf;c2audit`audit_closef 26
|
54
|
+
unix`_sys_sysenter_post_swapgs;genunix`close;genunix`closeandsetf;c2audit`audit_setf 5
|
55
|
+
unix`_sys_sysenter_post_swapgs;genunix`close;genunix`closeandsetf;genunix`audit_getstate 6
|
56
|
+
unix`_sys_sysenter_post_swapgs;genunix`close;genunix`closeandsetf;genunix`audit_unfalloc 2
|
57
|
+
unix`_sys_sysenter_post_swapgs;genunix`close;genunix`closeandsetf;genunix`closef 48
|
58
|
+
[...]
|
59
|
+
|
60
|
+
3. flamegraph.pl
|
61
|
+
================
|
62
|
+
Use flamegraph.pl to render a SVG.
|
63
|
+
|
64
|
+
$ ./flamegraph.pl out.kern_folded > kernel.svg
|
65
|
+
|
66
|
+
An advantage of having the folded input file (and why this is separate to flamegraph.pl) is that you can use grep for functions of interest. Eg:
|
67
|
+
|
68
|
+
$ grep cpuid out.kern_folded | ./flamegraph.pl > cpuid.svg
|
69
|
+
|
70
|
+
|
71
|
+
Provided Example
|
72
|
+
================
|
73
|
+
An example output from DTrace is included, both the captured stacks and
|
74
|
+
the resulting Flame Graph. You can generate it yourself using:
|
75
|
+
|
76
|
+
$ ./stackcollapse.pl example-stacks.txt | ./flamegraph.pl > example.svg
|
77
|
+
|
78
|
+
This was from a particular performance investigation: the Flame Graph
|
79
|
+
identified that CPU time was spent in the lofs module, and quantified
|
80
|
+
that time.
|
81
|
+
|
82
|
+
|
83
|
+
Options
|
84
|
+
=======
|
85
|
+
See the USAGE message (--help) for options:
|
86
|
+
|
87
|
+
USAGE: ./flamegraph.pl [options] infile > outfile.svg
|
88
|
+
|
89
|
+
--titletext # change title text
|
90
|
+
--width # width of image (default 1200)
|
91
|
+
--height # height of each frame (default 16)
|
92
|
+
--minwidth # omit smaller functions (default 0.1 pixels)
|
93
|
+
--fonttype # font type (default "Verdana")
|
94
|
+
--fontsize # font size (default 12)
|
95
|
+
--countname # count type label (default "samples")
|
96
|
+
--nametype # name type label (default "Function:")
|
97
|
+
--colors # "hot", "mem", "io" palette (default "hot")
|
98
|
+
--hash # colors are keyed by function name hash
|
99
|
+
--cp # use consistent palette (palette.map)
|
100
|
+
eg,
|
101
|
+
./flamegraph.pl --titletext="Flame Graph: malloc()" trace.txt > graph.svg
|
102
|
+
|
103
|
+
As suggested in the example, flame graphs can process traces of any event,
|
104
|
+
such as malloc()s, provided stack traces are gathered.
|
105
|
+
|
106
|
+
|
107
|
+
Consistent Palette
|
108
|
+
==================
|
109
|
+
If you use the --cp option, it will use the $colors selection and randomly
|
110
|
+
generate the palette like normal. Any future flamegraphs created using the --cp
|
111
|
+
option will use the same palette map. Any new symbols from future flamegraphs
|
112
|
+
will have their colors randomly generated using the $colors selection.
|
113
|
+
|
114
|
+
If you don't like the palette, just delete the palette.map file.
|
115
|
+
|
116
|
+
This allows your to change your colorscheme between flamegraphs to make the
|
117
|
+
differences REALLY stand out.
|
118
|
+
|
119
|
+
Example:
|
120
|
+
|
121
|
+
Say we have 2 captures, one with a problem, and one when it was working
|
122
|
+
(whatever "it" is):
|
123
|
+
|
124
|
+
cat working.folded | ./flamegraph.pl --cp > working.svg
|
125
|
+
# this generates a palette.map, as per the normal random generated look.
|
126
|
+
|
127
|
+
cat broken.folded | ./flamegraph.pl --cp --colors mem > broken.svg
|
128
|
+
# this svg will use the same palette.map for the same events, but a very
|
129
|
+
# different colorscheme for any new events.
|
130
|
+
|
131
|
+
Take a look at the demo directory for an example:
|
132
|
+
|
133
|
+
palette-example-working.svg
|
134
|
+
palette-example-broken.svg
|