stackprof 0.2.2 → 0.2.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +5 -4
- data/bin/stackprof +15 -4
- data/bin/stackprof-flamegraph +2 -0
- data/bin/stackprof-gprof2dot +2 -0
- data/ext/stackprof.c +77 -14
- data/lib/stackprof/middleware.rb +4 -3
- data/lib/stackprof/report.rb +26 -1
- data/stackprof.gemspec +3 -1
- data/test/test_stackprof.rb +26 -0
- data/vendor/FlameGraph/README +134 -0
- data/vendor/FlameGraph/flamegraph.pl +494 -0
- data/vendor/gprof2dot/gprof2dot.py +3266 -0
- data/vendor/gprof2dot/hotshotmain.py +70 -0
- metadata +10 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d8814b67fbdc027a47b2d087321b62ba3f2c5e0b
|
4
|
+
data.tar.gz: f5584eca1af8afa88afd407846b019dedbf33d3d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 50d3070d8a4ae606cb90b8e94afdbe4a047abc449a77731ebb9e198dd7fbb4f5e86965bbdba8761f3fbebe01023fca08376b865f84c25dcc7f51d4e7a9752d4c
|
7
|
+
data.tar.gz: 4b0bdc52fdd32bb21b1a10a0061371e2d51ae126cd39c01c6898b2a405f6e2a13dd879e439b610d90fde8661d245f995bcf86063bb76570b2b8ddc793a2bdd2b
|
data/README.md
CHANGED
@@ -10,10 +10,9 @@ and written as a replacement for [perftools.rb](https://github.com/tmm1/perftool
|
|
10
10
|
in ruby:
|
11
11
|
|
12
12
|
``` ruby
|
13
|
-
|
13
|
+
StackProf.run(mode: :cpu, out: 'tmp/stackprof-cpu-myapp.dump') do
|
14
14
|
...
|
15
15
|
end
|
16
|
-
File.open('tmp/stackprof-cpu-myapp.dump', 'wb'){ |f| f.write Marshal.dump(profile) }
|
17
16
|
```
|
18
17
|
|
19
18
|
via rack:
|
@@ -63,12 +62,14 @@ $ stackprof tmp/stackprof-cpu-*.dump --method 'Object#present?'
|
|
63
62
|
| 22 | end
|
64
63
|
```
|
65
64
|
|
65
|
+
For an experimental version of WebUI reporting of stackprof, see [stackprof-webnav](https://github.com/alisnic/stackprof-webnav)
|
66
|
+
|
66
67
|
### sampling
|
67
68
|
|
68
69
|
four sampling modes are supported:
|
69
70
|
|
70
|
-
- :wall (using `ITIMER_REAL` and `SIGALRM`)
|
71
|
-
- :cpu (using `ITIMER_PROF` and `SIGPROF`)
|
71
|
+
- :wall (using `ITIMER_REAL` and `SIGALRM`)
|
72
|
+
- :cpu (using `ITIMER_PROF` and `SIGPROF`) [default mode]
|
72
73
|
- :object (using `RUBY_INTERNAL_EVENT_NEWOBJ`)
|
73
74
|
- :custom (user-defined via `StackProf.sample`)
|
74
75
|
|
data/bin/stackprof
CHANGED
@@ -13,12 +13,14 @@ parser = OptionParser.new(ARGV) do |o|
|
|
13
13
|
|
14
14
|
o.on('--text', 'Text summary per method (default)'){ options[:format] = :text }
|
15
15
|
o.on('--files', 'List of files'){ |f| options[:format] = :files }
|
16
|
-
o.on('--limit
|
16
|
+
o.on('--limit [num]', Integer, 'Limit --text or --files output to N lines'){ |n| options[:limit] = n }
|
17
17
|
o.on('--sort-total', "Sort --text or --files output on total samples\n\n"){ options[:sort] = true }
|
18
|
-
o.on('--method
|
19
|
-
o.on('--file
|
18
|
+
o.on('--method [grep]', 'Zoom into specified method'){ |f| options[:format] = :method; options[:filter] = f }
|
19
|
+
o.on('--file [grep]', 'Show annotated code for specified file'){ |f| options[:format] = :file; options[:filter] = f }
|
20
|
+
o.on('--stackcollapse', 'stackcollapse.pl compatible output (use with flamegraph.pl)'){ options[:format] = :stackcollapse }
|
20
21
|
o.on('--callgrind', 'Callgrind output (use with kcachegrind, gprof2dot)'){ options[:format] = :callgrind }
|
21
22
|
o.on('--graphviz', "Graphviz output (use with dot)\n\n"){ options[:format] = :graphviz }
|
23
|
+
o.on('--dump', 'Print marshaled profile dump (combine multiple profiles)'){ options[:format] = :dump }
|
22
24
|
o.on('--debug', 'Pretty print raw profile data'){ options[:format] = :debug }
|
23
25
|
end
|
24
26
|
|
@@ -27,7 +29,12 @@ parser.abort(parser.help) if ARGV.empty?
|
|
27
29
|
|
28
30
|
reports = []
|
29
31
|
while ARGV.size > 0
|
30
|
-
|
32
|
+
begin
|
33
|
+
file = ARGV.pop
|
34
|
+
reports << StackProf::Report.new(Marshal.load(IO.binread(file)))
|
35
|
+
rescue TypeError => e
|
36
|
+
STDERR.puts "** error parsing #{file}: #{e.inspect}"
|
37
|
+
end
|
31
38
|
end
|
32
39
|
report = reports.inject(:+)
|
33
40
|
|
@@ -36,10 +43,14 @@ when :text
|
|
36
43
|
report.print_text(options[:sort], options[:limit])
|
37
44
|
when :debug
|
38
45
|
report.print_debug
|
46
|
+
when :dump
|
47
|
+
report.print_dump
|
39
48
|
when :callgrind
|
40
49
|
report.print_callgrind
|
41
50
|
when :graphviz
|
42
51
|
report.print_graphviz
|
52
|
+
when :stackcollapse
|
53
|
+
report.print_stackcollapse
|
43
54
|
when :method
|
44
55
|
report.print_method(options[:filter])
|
45
56
|
when :file
|
data/ext/stackprof.c
CHANGED
@@ -2,19 +2,15 @@
|
|
2
2
|
|
3
3
|
stackprof.c - Sampling call-stack frame profiler for MRI.
|
4
4
|
|
5
|
-
|
6
|
-
created at: Thu May 30 17:55:25 2013
|
7
|
-
|
8
|
-
NOTE: This extension library is not expected to exist except C Ruby.
|
9
|
-
|
10
|
-
All the files in this distribution are covered under the Ruby's
|
11
|
-
license (see the file COPYING).
|
5
|
+
vim: setl noexpandtab shiftwidth=4 tabstop=8 softtabstop=4
|
12
6
|
|
13
7
|
**********************************************************************/
|
14
8
|
|
15
9
|
#include <ruby/ruby.h>
|
16
10
|
#include <ruby/debug.h>
|
17
11
|
#include <ruby/st.h>
|
12
|
+
#include <ruby/io.h>
|
13
|
+
#include <ruby/intern.h>
|
18
14
|
#include <signal.h>
|
19
15
|
#include <sys/time.h>
|
20
16
|
#include <pthread.h>
|
@@ -32,6 +28,9 @@ static struct {
|
|
32
28
|
int running;
|
33
29
|
VALUE mode;
|
34
30
|
VALUE interval;
|
31
|
+
VALUE raw;
|
32
|
+
size_t raw_sample_index;
|
33
|
+
VALUE out;
|
35
34
|
|
36
35
|
size_t overall_signals;
|
37
36
|
size_t overall_samples;
|
@@ -44,7 +43,7 @@ static struct {
|
|
44
43
|
|
45
44
|
static VALUE sym_object, sym_wall, sym_cpu, sym_custom, sym_name, sym_file, sym_line;
|
46
45
|
static VALUE sym_samples, sym_total_samples, sym_missed_samples, sym_edges, sym_lines;
|
47
|
-
static VALUE sym_version, sym_mode, sym_interval, sym_frames;
|
46
|
+
static VALUE sym_version, sym_mode, sym_interval, sym_raw, sym_frames, sym_out;
|
48
47
|
static VALUE sym_gc_samples, objtracer;
|
49
48
|
static VALUE gc_hook;
|
50
49
|
static VALUE rb_mStackProf;
|
@@ -57,7 +56,7 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
|
|
57
56
|
{
|
58
57
|
struct sigaction sa;
|
59
58
|
struct itimerval timer;
|
60
|
-
VALUE opts = Qnil, mode = Qnil, interval = Qnil;
|
59
|
+
VALUE opts = Qnil, mode = Qnil, interval = Qnil, raw = Qfalse, out = Qfalse;
|
61
60
|
|
62
61
|
if (_stackprof.running)
|
63
62
|
return Qfalse;
|
@@ -67,6 +66,10 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
|
|
67
66
|
if (RTEST(opts)) {
|
68
67
|
mode = rb_hash_aref(opts, sym_mode);
|
69
68
|
interval = rb_hash_aref(opts, sym_interval);
|
69
|
+
out = rb_hash_aref(opts, sym_out);
|
70
|
+
|
71
|
+
if (RTEST(rb_hash_aref(opts, sym_raw)))
|
72
|
+
raw = rb_ary_new();
|
70
73
|
}
|
71
74
|
if (!RTEST(mode)) mode = sym_wall;
|
72
75
|
|
@@ -102,8 +105,10 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
|
|
102
105
|
}
|
103
106
|
|
104
107
|
_stackprof.running = 1;
|
108
|
+
_stackprof.raw = raw;
|
105
109
|
_stackprof.mode = mode;
|
106
110
|
_stackprof.interval = interval;
|
111
|
+
_stackprof.out = out;
|
107
112
|
|
108
113
|
return Qtrue;
|
109
114
|
}
|
@@ -207,7 +212,7 @@ frame_i(st_data_t key, st_data_t val, st_data_t arg)
|
|
207
212
|
}
|
208
213
|
|
209
214
|
static VALUE
|
210
|
-
stackprof_results(VALUE self)
|
215
|
+
stackprof_results(int argc, VALUE *argv, VALUE self)
|
211
216
|
{
|
212
217
|
VALUE results, frames;
|
213
218
|
|
@@ -229,7 +234,28 @@ stackprof_results(VALUE self)
|
|
229
234
|
st_free_table(_stackprof.frames);
|
230
235
|
_stackprof.frames = NULL;
|
231
236
|
|
232
|
-
|
237
|
+
if (RTEST(_stackprof.raw)) {
|
238
|
+
rb_hash_aset(results, sym_raw, _stackprof.raw);
|
239
|
+
_stackprof.raw = Qfalse;
|
240
|
+
}
|
241
|
+
|
242
|
+
if (argc == 1)
|
243
|
+
_stackprof.out = argv[0];
|
244
|
+
|
245
|
+
if (RTEST(_stackprof.out)) {
|
246
|
+
VALUE file;
|
247
|
+
if (RB_TYPE_P(_stackprof.out, T_STRING)) {
|
248
|
+
file = rb_file_open_str(_stackprof.out, "w");
|
249
|
+
} else {
|
250
|
+
file = rb_io_check_io(_stackprof.out);
|
251
|
+
}
|
252
|
+
rb_marshal_dump(results, file);
|
253
|
+
rb_io_flush(file);
|
254
|
+
_stackprof.out = Qnil;
|
255
|
+
return file;
|
256
|
+
} else {
|
257
|
+
return results;
|
258
|
+
}
|
233
259
|
}
|
234
260
|
|
235
261
|
static VALUE
|
@@ -238,7 +264,7 @@ stackprof_run(int argc, VALUE *argv, VALUE self)
|
|
238
264
|
rb_need_block();
|
239
265
|
stackprof_start(argc, argv, self);
|
240
266
|
rb_ensure(rb_yield, Qundef, stackprof_stop, self);
|
241
|
-
return stackprof_results(self);
|
267
|
+
return stackprof_results(0, 0, self);
|
242
268
|
}
|
243
269
|
|
244
270
|
static VALUE
|
@@ -288,12 +314,41 @@ st_numtable_increment(st_table *table, st_data_t key, size_t increment)
|
|
288
314
|
void
|
289
315
|
stackprof_record_sample()
|
290
316
|
{
|
291
|
-
int num, i;
|
317
|
+
int num, i, n;
|
318
|
+
int raw_mode = RTEST(_stackprof.raw);
|
292
319
|
VALUE prev_frame = Qnil;
|
320
|
+
size_t raw_len;
|
293
321
|
|
294
322
|
_stackprof.overall_samples++;
|
295
323
|
num = rb_profile_frames(0, sizeof(_stackprof.frames_buffer), _stackprof.frames_buffer, _stackprof.lines_buffer);
|
296
324
|
|
325
|
+
if (raw_mode) {
|
326
|
+
int found = 0;
|
327
|
+
raw_len = RARRAY_LEN(_stackprof.raw);
|
328
|
+
|
329
|
+
if (RARRAY_LEN(_stackprof.raw) > 0 && RARRAY_AREF(_stackprof.raw, _stackprof.raw_sample_index) == INT2FIX(num)) {
|
330
|
+
for (i = num-1, n = 0; i >= 0; i--, n++) {
|
331
|
+
VALUE frame = _stackprof.frames_buffer[i];
|
332
|
+
if (RARRAY_AREF(_stackprof.raw, _stackprof.raw_sample_index + 1 + n) != rb_obj_id(frame))
|
333
|
+
break;
|
334
|
+
}
|
335
|
+
if (i == -1) {
|
336
|
+
RARRAY_ASET(_stackprof.raw, raw_len-1, LONG2NUM(NUM2LONG(RARRAY_AREF(_stackprof.raw, raw_len-1))+1));
|
337
|
+
found = 1;
|
338
|
+
}
|
339
|
+
}
|
340
|
+
|
341
|
+
if (!found) {
|
342
|
+
_stackprof.raw_sample_index = raw_len;
|
343
|
+
rb_ary_push(_stackprof.raw, INT2FIX(num));
|
344
|
+
for (i = num-1; i >= 0; i--) {
|
345
|
+
VALUE frame = _stackprof.frames_buffer[i];
|
346
|
+
rb_ary_push(_stackprof.raw, rb_obj_id(frame));
|
347
|
+
}
|
348
|
+
rb_ary_push(_stackprof.raw, INT2FIX(1));
|
349
|
+
}
|
350
|
+
}
|
351
|
+
|
297
352
|
for (i = 0; i < num; i++) {
|
298
353
|
int line = _stackprof.lines_buffer[i];
|
299
354
|
VALUE frame = _stackprof.frames_buffer[i];
|
@@ -346,6 +401,7 @@ stackprof_signal_handler(int sig, siginfo_t *sinfo, void *ucontext)
|
|
346
401
|
static void
|
347
402
|
stackprof_newobj_handler(VALUE tpval, void *data)
|
348
403
|
{
|
404
|
+
/* TODO: implement interval */
|
349
405
|
_stackprof.overall_signals++;
|
350
406
|
stackprof_job_handler(0);
|
351
407
|
}
|
@@ -372,6 +428,11 @@ frame_mark_i(st_data_t key, st_data_t val, st_data_t arg)
|
|
372
428
|
static void
|
373
429
|
stackprof_gc_mark(void *data)
|
374
430
|
{
|
431
|
+
if (RTEST(_stackprof.raw))
|
432
|
+
rb_gc_mark(_stackprof.raw);
|
433
|
+
if (RTEST(_stackprof.out))
|
434
|
+
rb_gc_mark(_stackprof.out);
|
435
|
+
|
375
436
|
if (_stackprof.frames)
|
376
437
|
st_foreach(_stackprof.frames, frame_mark_i, 0);
|
377
438
|
}
|
@@ -427,6 +488,8 @@ Init_stackprof(void)
|
|
427
488
|
sym_version = ID2SYM(rb_intern("version"));
|
428
489
|
sym_mode = ID2SYM(rb_intern("mode"));
|
429
490
|
sym_interval = ID2SYM(rb_intern("interval"));
|
491
|
+
sym_raw = ID2SYM(rb_intern("raw"));
|
492
|
+
sym_out = ID2SYM(rb_intern("out"));
|
430
493
|
sym_frames = ID2SYM(rb_intern("frames"));
|
431
494
|
|
432
495
|
gc_hook = Data_Wrap_Struct(rb_cObject, stackprof_gc_mark, NULL, NULL);
|
@@ -437,7 +500,7 @@ Init_stackprof(void)
|
|
437
500
|
rb_define_singleton_method(rb_mStackProf, "run", stackprof_run, -1);
|
438
501
|
rb_define_singleton_method(rb_mStackProf, "start", stackprof_start, -1);
|
439
502
|
rb_define_singleton_method(rb_mStackProf, "stop", stackprof_stop, 0);
|
440
|
-
rb_define_singleton_method(rb_mStackProf, "results", stackprof_results,
|
503
|
+
rb_define_singleton_method(rb_mStackProf, "results", stackprof_results, -1);
|
441
504
|
rb_define_singleton_method(rb_mStackProf, "sample", stackprof_sample, 0);
|
442
505
|
|
443
506
|
rb_autoload(rb_mStackProf, rb_intern_const("Report"), "stackprof/report.rb");
|
data/lib/stackprof/middleware.rb
CHANGED
@@ -31,13 +31,14 @@ module StackProf
|
|
31
31
|
attr_accessor :enabled, :mode, :interval, :path
|
32
32
|
alias enabled? enabled
|
33
33
|
|
34
|
-
def save
|
34
|
+
def save(filename = nil)
|
35
35
|
if results = StackProf.results
|
36
36
|
FileUtils.mkdir_p(Middleware.path)
|
37
|
-
filename
|
38
|
-
File.open(File.join(Middleware.path, filename), 'wb') do |f|
|
37
|
+
filename ||= "stackprof-#{results[:mode]}-#{Process.pid}-#{Time.now.to_i}.dump"
|
38
|
+
File.open(File.join(Middleware.path, filename), 'wb') do |f|
|
39
39
|
f.write Marshal.dump(results)
|
40
40
|
end
|
41
|
+
filename
|
41
42
|
end
|
42
43
|
end
|
43
44
|
|
data/lib/stackprof/report.rb
CHANGED
@@ -63,6 +63,22 @@ module StackProf
|
|
63
63
|
pp @data
|
64
64
|
end
|
65
65
|
|
66
|
+
def print_dump
|
67
|
+
puts Marshal.dump(@data.reject{|k,v| k == :files })
|
68
|
+
end
|
69
|
+
|
70
|
+
def print_stackcollapse
|
71
|
+
raise "profile does not include raw samples" unless raw = data[:raw]
|
72
|
+
|
73
|
+
while len = raw.shift
|
74
|
+
frames = raw.slice!(0, len)
|
75
|
+
weight = raw.shift
|
76
|
+
|
77
|
+
print frames.map{ |a| data[:frames][a][:name] }.join(';')
|
78
|
+
puts " #{weight}"
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
66
82
|
def print_graphviz(filter = nil, f = STDOUT)
|
67
83
|
if filter
|
68
84
|
mark_stack = []
|
@@ -161,7 +177,7 @@ module StackProf
|
|
161
177
|
f.printf "%s (%s:%d)\n", info[:name], file, line
|
162
178
|
f.printf " samples: % 5d self (%2.1f%%) / % 5d total (%2.1f%%)\n", info[:samples], 100.0*info[:samples]/overall_samples, info[:total_samples], 100.0*info[:total_samples]/overall_samples
|
163
179
|
|
164
|
-
if (callers =
|
180
|
+
if (callers = callers_for(frame)).any?
|
165
181
|
f.puts " callers:"
|
166
182
|
callers = callers.sort_by(&:last).reverse
|
167
183
|
callers.each do |name, weight|
|
@@ -203,6 +219,15 @@ module StackProf
|
|
203
219
|
|
204
220
|
private
|
205
221
|
|
222
|
+
def root_frames
|
223
|
+
frames.select{ |addr, frame| callers_for(addr).size == 0 }
|
224
|
+
end
|
225
|
+
|
226
|
+
def callers_for(addr)
|
227
|
+
@callers_for ||= {}
|
228
|
+
@callers_for[addr] ||= data[:frames].map{ |id, other| [other[:name], other[:edges][addr]] if other[:edges] && other[:edges].include?(addr) }.compact
|
229
|
+
end
|
230
|
+
|
206
231
|
def source_display(f, file, lines, range=nil)
|
207
232
|
File.readlines(file).each_with_index do |code, i|
|
208
233
|
next unless range.nil? || range.include?(i)
|
data/stackprof.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'stackprof'
|
3
|
-
s.version = '0.2.
|
3
|
+
s.version = '0.2.3'
|
4
4
|
s.homepage = 'http://github.com/tmm1/stackprof'
|
5
5
|
|
6
6
|
s.authors = 'Aman Gupta'
|
@@ -11,6 +11,8 @@ Gem::Specification.new do |s|
|
|
11
11
|
|
12
12
|
s.bindir = 'bin'
|
13
13
|
s.executables << 'stackprof'
|
14
|
+
s.executables << 'stackprof-flamegraph'
|
15
|
+
s.executables << 'stackprof-gprof2dot'
|
14
16
|
|
15
17
|
s.summary = 'sampling callstack-profiler for ruby 2.1+'
|
16
18
|
s.description = 'stackprof is a fast sampling profiler for ruby code, with cpu, wallclock and object allocation samplers.'
|
data/test/test_stackprof.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
$:.unshift File.expand_path('../../lib', __FILE__)
|
2
2
|
require 'stackprof'
|
3
3
|
require 'test/unit'
|
4
|
+
require 'tempfile'
|
4
5
|
|
5
6
|
class StackProfTest < Test::Unit::TestCase
|
6
7
|
def test_info
|
@@ -86,6 +87,19 @@ class StackProfTest < Test::Unit::TestCase
|
|
86
87
|
assert_equal [10, 10], frame[:lines][__LINE__-10]
|
87
88
|
end
|
88
89
|
|
90
|
+
def test_raw
|
91
|
+
profile = StackProf.run(mode: :custom, raw: true) do
|
92
|
+
10.times do
|
93
|
+
StackProf.sample
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
raw = profile[:raw]
|
98
|
+
assert_equal 10, raw[-1]
|
99
|
+
assert_equal raw[0] + 2, raw.size
|
100
|
+
assert_equal 'block (2 levels) in StackProfTest#test_raw', profile[:frames][raw[-2]][:name]
|
101
|
+
end
|
102
|
+
|
89
103
|
def test_fork
|
90
104
|
StackProf.run do
|
91
105
|
pid = fork do
|
@@ -109,6 +123,18 @@ class StackProfTest < Test::Unit::TestCase
|
|
109
123
|
assert_equal 0, profile[:missed_samples]
|
110
124
|
end
|
111
125
|
|
126
|
+
def test_out
|
127
|
+
tmpfile = Tempfile.new('stackprof-out')
|
128
|
+
ret = StackProf.run(mode: :custom, out: tmpfile) do
|
129
|
+
StackProf.sample
|
130
|
+
end
|
131
|
+
|
132
|
+
assert_equal tmpfile, ret
|
133
|
+
tmpfile.rewind
|
134
|
+
profile = Marshal.load(tmpfile.read)
|
135
|
+
assert_not_empty profile[:frames]
|
136
|
+
end
|
137
|
+
|
112
138
|
def math
|
113
139
|
250_000.times do
|
114
140
|
2 ** 10
|
@@ -0,0 +1,134 @@
|
|
1
|
+
Flame Graphs visualize profiled code-paths.
|
2
|
+
|
3
|
+
Website: http://www.brendangregg.com/flamegraphs.html
|
4
|
+
|
5
|
+
CPU profiling using DTrace, perf_events, SystemTap, or ktap: http://www.brendangregg.com/FlameGraphs/cpuflamegraphs.html
|
6
|
+
CPU profiling using XCode Instruments: http://schani.wordpress.com/2012/11/16/flame-graphs-for-instruments/
|
7
|
+
CPU profiling using Xperf.exe: http://randomascii.wordpress.com/2013/03/26/summarizing-xperf-cpu-usage-with-flame-graphs/
|
8
|
+
Memory profiling: http://www.brendangregg.com/FlameGraphs/memoryflamegraphs.html
|
9
|
+
|
10
|
+
These can be created in three steps:
|
11
|
+
|
12
|
+
1. Capture stacks
|
13
|
+
2. Fold stacks
|
14
|
+
3. flamegraph.pl
|
15
|
+
|
16
|
+
|
17
|
+
1. Capture stacks
|
18
|
+
=================
|
19
|
+
Stack samples can be captured using DTrace, perf_events or SystemTap.
|
20
|
+
|
21
|
+
Using DTrace to capture 60 seconds of kernel stacks at 997 Hertz:
|
22
|
+
|
23
|
+
# dtrace -x stackframes=100 -n 'profile-997 /arg0/ { @[stack()] = count(); } tick-60s { exit(0); }' -o out.kern_stacks
|
24
|
+
|
25
|
+
Using DTrace to capture 60 seconds of user-level stacks for PID 12345 at 97 Hertz:
|
26
|
+
|
27
|
+
# dtrace -x ustackframes=100 -n 'profile-97 /pid == 12345 && arg1/ { @[ustack()] = count(); } tick-60s { exit(0); }' -o out.user_stacks
|
28
|
+
|
29
|
+
Using DTrace to capture 60 seconds of user-level stacks, including while time is spent in the kernel, for PID 12345 at 97 Hertz:
|
30
|
+
|
31
|
+
# dtrace -x ustackframes=100 -n 'profile-97 /pid == 12345/ { @[ustack()] = count(); } tick-60s { exit(0); }' -o out.user_stacks
|
32
|
+
|
33
|
+
Switch ustack() for jstack() if the application has a ustack helper to include translated frames (eg, node.js frames; see: http://dtrace.org/blogs/dap/2012/01/05/where-does-your-node-program-spend-its-time/). The rate for user-level stack collection is deliberately slower than kernel, which is especially important when using jstack() as it performs additional work to translate frames.
|
34
|
+
|
35
|
+
2. Fold stacks
|
36
|
+
==============
|
37
|
+
Use the stackcollapse programs to fold stack samples into single lines. The programs provided are:
|
38
|
+
|
39
|
+
- stackcollapse.pl: for DTrace stacks
|
40
|
+
- stackcollapse-perf.pl: for perf_events "perf script" output
|
41
|
+
- stackcollapse-stap.pl: for SystemTap stacks
|
42
|
+
- stackcollapse-instruments.pl: for XCode Instruments
|
43
|
+
|
44
|
+
Usage example:
|
45
|
+
|
46
|
+
$ ./stackcollapse.pl out.kern_stacks > out.kern_folded
|
47
|
+
|
48
|
+
The output looks like this:
|
49
|
+
|
50
|
+
unix`_sys_sysenter_post_swapgs 1401
|
51
|
+
unix`_sys_sysenter_post_swapgs;genunix`close 5
|
52
|
+
unix`_sys_sysenter_post_swapgs;genunix`close;genunix`closeandsetf 85
|
53
|
+
unix`_sys_sysenter_post_swapgs;genunix`close;genunix`closeandsetf;c2audit`audit_closef 26
|
54
|
+
unix`_sys_sysenter_post_swapgs;genunix`close;genunix`closeandsetf;c2audit`audit_setf 5
|
55
|
+
unix`_sys_sysenter_post_swapgs;genunix`close;genunix`closeandsetf;genunix`audit_getstate 6
|
56
|
+
unix`_sys_sysenter_post_swapgs;genunix`close;genunix`closeandsetf;genunix`audit_unfalloc 2
|
57
|
+
unix`_sys_sysenter_post_swapgs;genunix`close;genunix`closeandsetf;genunix`closef 48
|
58
|
+
[...]
|
59
|
+
|
60
|
+
3. flamegraph.pl
|
61
|
+
================
|
62
|
+
Use flamegraph.pl to render a SVG.
|
63
|
+
|
64
|
+
$ ./flamegraph.pl out.kern_folded > kernel.svg
|
65
|
+
|
66
|
+
An advantage of having the folded input file (and why this is separate to flamegraph.pl) is that you can use grep for functions of interest. Eg:
|
67
|
+
|
68
|
+
$ grep cpuid out.kern_folded | ./flamegraph.pl > cpuid.svg
|
69
|
+
|
70
|
+
|
71
|
+
Provided Example
|
72
|
+
================
|
73
|
+
An example output from DTrace is included, both the captured stacks and
|
74
|
+
the resulting Flame Graph. You can generate it yourself using:
|
75
|
+
|
76
|
+
$ ./stackcollapse.pl example-stacks.txt | ./flamegraph.pl > example.svg
|
77
|
+
|
78
|
+
This was from a particular performance investigation: the Flame Graph
|
79
|
+
identified that CPU time was spent in the lofs module, and quantified
|
80
|
+
that time.
|
81
|
+
|
82
|
+
|
83
|
+
Options
|
84
|
+
=======
|
85
|
+
See the USAGE message (--help) for options:
|
86
|
+
|
87
|
+
USAGE: ./flamegraph.pl [options] infile > outfile.svg
|
88
|
+
|
89
|
+
--titletext # change title text
|
90
|
+
--width # width of image (default 1200)
|
91
|
+
--height # height of each frame (default 16)
|
92
|
+
--minwidth # omit smaller functions (default 0.1 pixels)
|
93
|
+
--fonttype # font type (default "Verdana")
|
94
|
+
--fontsize # font size (default 12)
|
95
|
+
--countname # count type label (default "samples")
|
96
|
+
--nametype # name type label (default "Function:")
|
97
|
+
--colors # "hot", "mem", "io" palette (default "hot")
|
98
|
+
--hash # colors are keyed by function name hash
|
99
|
+
--cp # use consistent palette (palette.map)
|
100
|
+
eg,
|
101
|
+
./flamegraph.pl --titletext="Flame Graph: malloc()" trace.txt > graph.svg
|
102
|
+
|
103
|
+
As suggested in the example, flame graphs can process traces of any event,
|
104
|
+
such as malloc()s, provided stack traces are gathered.
|
105
|
+
|
106
|
+
|
107
|
+
Consistent Palette
|
108
|
+
==================
|
109
|
+
If you use the --cp option, it will use the $colors selection and randomly
|
110
|
+
generate the palette like normal. Any future flamegraphs created using the --cp
|
111
|
+
option will use the same palette map. Any new symbols from future flamegraphs
|
112
|
+
will have their colors randomly generated using the $colors selection.
|
113
|
+
|
114
|
+
If you don't like the palette, just delete the palette.map file.
|
115
|
+
|
116
|
+
This allows your to change your colorscheme between flamegraphs to make the
|
117
|
+
differences REALLY stand out.
|
118
|
+
|
119
|
+
Example:
|
120
|
+
|
121
|
+
Say we have 2 captures, one with a problem, and one when it was working
|
122
|
+
(whatever "it" is):
|
123
|
+
|
124
|
+
cat working.folded | ./flamegraph.pl --cp > working.svg
|
125
|
+
# this generates a palette.map, as per the normal random generated look.
|
126
|
+
|
127
|
+
cat broken.folded | ./flamegraph.pl --cp --colors mem > broken.svg
|
128
|
+
# this svg will use the same palette.map for the same events, but a very
|
129
|
+
# different colorscheme for any new events.
|
130
|
+
|
131
|
+
Take a look at the demo directory for an example:
|
132
|
+
|
133
|
+
palette-example-working.svg
|
134
|
+
palette-example-broken.svg
|