stackprof 0.2.2 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a10d79a11e17b11b5b706c69c9c5b5546b7fabe9
4
- data.tar.gz: 41e9454828e293709c63bb96ecff91530f2fb8da
3
+ metadata.gz: d8814b67fbdc027a47b2d087321b62ba3f2c5e0b
4
+ data.tar.gz: f5584eca1af8afa88afd407846b019dedbf33d3d
5
5
  SHA512:
6
- metadata.gz: 0e3d5900059d2531409bba444a58679e50227d656ab450d5fc04bc8a8fcec9d0055224599369cab3304dbd8366c06957af06978b12f28cb359d8f83455c409ca
7
- data.tar.gz: 34c673bbe0c60795d7b97a39d2ab1bace18a6377dfd7c5f63dca31fc2c2e0447e8c420e5663e9b7c61cd11365d5cd4c2e2bc6d0f23f8b414ddb3c7083b82867a
6
+ metadata.gz: 50d3070d8a4ae606cb90b8e94afdbe4a047abc449a77731ebb9e198dd7fbb4f5e86965bbdba8761f3fbebe01023fca08376b865f84c25dcc7f51d4e7a9752d4c
7
+ data.tar.gz: 4b0bdc52fdd32bb21b1a10a0061371e2d51ae126cd39c01c6898b2a405f6e2a13dd879e439b610d90fde8661d245f995bcf86063bb76570b2b8ddc793a2bdd2b
data/README.md CHANGED
@@ -10,10 +10,9 @@ and written as a replacement for [perftools.rb](https://github.com/tmm1/perftool
10
10
  in ruby:
11
11
 
12
12
  ``` ruby
13
- profile = StackProf.run(mode: :cpu) do
13
+ StackProf.run(mode: :cpu, out: 'tmp/stackprof-cpu-myapp.dump') do
14
14
  ...
15
15
  end
16
- File.open('tmp/stackprof-cpu-myapp.dump', 'wb'){ |f| f.write Marshal.dump(profile) }
17
16
  ```
18
17
 
19
18
  via rack:
@@ -63,12 +62,14 @@ $ stackprof tmp/stackprof-cpu-*.dump --method 'Object#present?'
63
62
  | 22 | end
64
63
  ```
65
64
 
65
+ For an experimental version of WebUI reporting of stackprof, see [stackprof-webnav](https://github.com/alisnic/stackprof-webnav)
66
+
66
67
  ### sampling
67
68
 
68
69
  four sampling modes are supported:
69
70
 
70
- - :wall (using `ITIMER_REAL` and `SIGALRM`) [default mode]
71
- - :cpu (using `ITIMER_PROF` and `SIGPROF`)
71
+ - :wall (using `ITIMER_REAL` and `SIGALRM`)
72
+ - :cpu (using `ITIMER_PROF` and `SIGPROF`) [default mode]
72
73
  - :object (using `RUBY_INTERNAL_EVENT_NEWOBJ`)
73
74
  - :custom (user-defined via `StackProf.sample`)
74
75
 
data/bin/stackprof CHANGED
@@ -13,12 +13,14 @@ parser = OptionParser.new(ARGV) do |o|
13
13
 
14
14
  o.on('--text', 'Text summary per method (default)'){ options[:format] = :text }
15
15
  o.on('--files', 'List of files'){ |f| options[:format] = :files }
16
- o.on('--limit=[num]', Integer, 'Limit --text or --files output to N lines'){ |n| options[:limit] = n }
16
+ o.on('--limit [num]', Integer, 'Limit --text or --files output to N lines'){ |n| options[:limit] = n }
17
17
  o.on('--sort-total', "Sort --text or --files output on total samples\n\n"){ options[:sort] = true }
18
- o.on('--method=[grep]', 'Zoom into specified method'){ |f| options[:format] = :method; options[:filter] = f }
19
- o.on('--file=[grep]', 'Show annotated code for specified file'){ |f| options[:format] = :file; options[:filter] = f }
18
+ o.on('--method [grep]', 'Zoom into specified method'){ |f| options[:format] = :method; options[:filter] = f }
19
+ o.on('--file [grep]', 'Show annotated code for specified file'){ |f| options[:format] = :file; options[:filter] = f }
20
+ o.on('--stackcollapse', 'stackcollapse.pl compatible output (use with flamegraph.pl)'){ options[:format] = :stackcollapse }
20
21
  o.on('--callgrind', 'Callgrind output (use with kcachegrind, gprof2dot)'){ options[:format] = :callgrind }
21
22
  o.on('--graphviz', "Graphviz output (use with dot)\n\n"){ options[:format] = :graphviz }
23
+ o.on('--dump', 'Print marshaled profile dump (combine multiple profiles)'){ options[:format] = :dump }
22
24
  o.on('--debug', 'Pretty print raw profile data'){ options[:format] = :debug }
23
25
  end
24
26
 
@@ -27,7 +29,12 @@ parser.abort(parser.help) if ARGV.empty?
27
29
 
28
30
  reports = []
29
31
  while ARGV.size > 0
30
- reports << StackProf::Report.new(Marshal.load(IO.binread(ARGV.pop)))
32
+ begin
33
+ file = ARGV.pop
34
+ reports << StackProf::Report.new(Marshal.load(IO.binread(file)))
35
+ rescue TypeError => e
36
+ STDERR.puts "** error parsing #{file}: #{e.inspect}"
37
+ end
31
38
  end
32
39
  report = reports.inject(:+)
33
40
 
@@ -36,10 +43,14 @@ when :text
36
43
  report.print_text(options[:sort], options[:limit])
37
44
  when :debug
38
45
  report.print_debug
46
+ when :dump
47
+ report.print_dump
39
48
  when :callgrind
40
49
  report.print_callgrind
41
50
  when :graphviz
42
51
  report.print_graphviz
52
+ when :stackcollapse
53
+ report.print_stackcollapse
43
54
  when :method
44
55
  report.print_method(options[:filter])
45
56
  when :file
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env ruby
2
+ exec(File.expand_path("../../vendor/FlameGraph/flamegraph.pl", __FILE__), *ARGV)
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env ruby
2
+ exec(File.expand_path("../../vendor/gprof2dot/gprof2dot.py", __FILE__), *ARGV)
data/ext/stackprof.c CHANGED
@@ -2,19 +2,15 @@
2
2
 
3
3
  stackprof.c - Sampling call-stack frame profiler for MRI.
4
4
 
5
- $Author$
6
- created at: Thu May 30 17:55:25 2013
7
-
8
- NOTE: This extension library is not expected to exist except C Ruby.
9
-
10
- All the files in this distribution are covered under the Ruby's
11
- license (see the file COPYING).
5
+ vim: setl noexpandtab shiftwidth=4 tabstop=8 softtabstop=4
12
6
 
13
7
  **********************************************************************/
14
8
 
15
9
  #include <ruby/ruby.h>
16
10
  #include <ruby/debug.h>
17
11
  #include <ruby/st.h>
12
+ #include <ruby/io.h>
13
+ #include <ruby/intern.h>
18
14
  #include <signal.h>
19
15
  #include <sys/time.h>
20
16
  #include <pthread.h>
@@ -32,6 +28,9 @@ static struct {
32
28
  int running;
33
29
  VALUE mode;
34
30
  VALUE interval;
31
+ VALUE raw;
32
+ size_t raw_sample_index;
33
+ VALUE out;
35
34
 
36
35
  size_t overall_signals;
37
36
  size_t overall_samples;
@@ -44,7 +43,7 @@ static struct {
44
43
 
45
44
  static VALUE sym_object, sym_wall, sym_cpu, sym_custom, sym_name, sym_file, sym_line;
46
45
  static VALUE sym_samples, sym_total_samples, sym_missed_samples, sym_edges, sym_lines;
47
- static VALUE sym_version, sym_mode, sym_interval, sym_frames;
46
+ static VALUE sym_version, sym_mode, sym_interval, sym_raw, sym_frames, sym_out;
48
47
  static VALUE sym_gc_samples, objtracer;
49
48
  static VALUE gc_hook;
50
49
  static VALUE rb_mStackProf;
@@ -57,7 +56,7 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
57
56
  {
58
57
  struct sigaction sa;
59
58
  struct itimerval timer;
60
- VALUE opts = Qnil, mode = Qnil, interval = Qnil;
59
+ VALUE opts = Qnil, mode = Qnil, interval = Qnil, raw = Qfalse, out = Qfalse;
61
60
 
62
61
  if (_stackprof.running)
63
62
  return Qfalse;
@@ -67,6 +66,10 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
67
66
  if (RTEST(opts)) {
68
67
  mode = rb_hash_aref(opts, sym_mode);
69
68
  interval = rb_hash_aref(opts, sym_interval);
69
+ out = rb_hash_aref(opts, sym_out);
70
+
71
+ if (RTEST(rb_hash_aref(opts, sym_raw)))
72
+ raw = rb_ary_new();
70
73
  }
71
74
  if (!RTEST(mode)) mode = sym_wall;
72
75
 
@@ -102,8 +105,10 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
102
105
  }
103
106
 
104
107
  _stackprof.running = 1;
108
+ _stackprof.raw = raw;
105
109
  _stackprof.mode = mode;
106
110
  _stackprof.interval = interval;
111
+ _stackprof.out = out;
107
112
 
108
113
  return Qtrue;
109
114
  }
@@ -207,7 +212,7 @@ frame_i(st_data_t key, st_data_t val, st_data_t arg)
207
212
  }
208
213
 
209
214
  static VALUE
210
- stackprof_results(VALUE self)
215
+ stackprof_results(int argc, VALUE *argv, VALUE self)
211
216
  {
212
217
  VALUE results, frames;
213
218
 
@@ -229,7 +234,28 @@ stackprof_results(VALUE self)
229
234
  st_free_table(_stackprof.frames);
230
235
  _stackprof.frames = NULL;
231
236
 
232
- return results;
237
+ if (RTEST(_stackprof.raw)) {
238
+ rb_hash_aset(results, sym_raw, _stackprof.raw);
239
+ _stackprof.raw = Qfalse;
240
+ }
241
+
242
+ if (argc == 1)
243
+ _stackprof.out = argv[0];
244
+
245
+ if (RTEST(_stackprof.out)) {
246
+ VALUE file;
247
+ if (RB_TYPE_P(_stackprof.out, T_STRING)) {
248
+ file = rb_file_open_str(_stackprof.out, "w");
249
+ } else {
250
+ file = rb_io_check_io(_stackprof.out);
251
+ }
252
+ rb_marshal_dump(results, file);
253
+ rb_io_flush(file);
254
+ _stackprof.out = Qnil;
255
+ return file;
256
+ } else {
257
+ return results;
258
+ }
233
259
  }
234
260
 
235
261
  static VALUE
@@ -238,7 +264,7 @@ stackprof_run(int argc, VALUE *argv, VALUE self)
238
264
  rb_need_block();
239
265
  stackprof_start(argc, argv, self);
240
266
  rb_ensure(rb_yield, Qundef, stackprof_stop, self);
241
- return stackprof_results(self);
267
+ return stackprof_results(0, 0, self);
242
268
  }
243
269
 
244
270
  static VALUE
@@ -288,12 +314,41 @@ st_numtable_increment(st_table *table, st_data_t key, size_t increment)
288
314
  void
289
315
  stackprof_record_sample()
290
316
  {
291
- int num, i;
317
+ int num, i, n;
318
+ int raw_mode = RTEST(_stackprof.raw);
292
319
  VALUE prev_frame = Qnil;
320
+ size_t raw_len;
293
321
 
294
322
  _stackprof.overall_samples++;
295
323
  num = rb_profile_frames(0, sizeof(_stackprof.frames_buffer), _stackprof.frames_buffer, _stackprof.lines_buffer);
296
324
 
325
+ if (raw_mode) {
326
+ int found = 0;
327
+ raw_len = RARRAY_LEN(_stackprof.raw);
328
+
329
+ if (RARRAY_LEN(_stackprof.raw) > 0 && RARRAY_AREF(_stackprof.raw, _stackprof.raw_sample_index) == INT2FIX(num)) {
330
+ for (i = num-1, n = 0; i >= 0; i--, n++) {
331
+ VALUE frame = _stackprof.frames_buffer[i];
332
+ if (RARRAY_AREF(_stackprof.raw, _stackprof.raw_sample_index + 1 + n) != rb_obj_id(frame))
333
+ break;
334
+ }
335
+ if (i == -1) {
336
+ RARRAY_ASET(_stackprof.raw, raw_len-1, LONG2NUM(NUM2LONG(RARRAY_AREF(_stackprof.raw, raw_len-1))+1));
337
+ found = 1;
338
+ }
339
+ }
340
+
341
+ if (!found) {
342
+ _stackprof.raw_sample_index = raw_len;
343
+ rb_ary_push(_stackprof.raw, INT2FIX(num));
344
+ for (i = num-1; i >= 0; i--) {
345
+ VALUE frame = _stackprof.frames_buffer[i];
346
+ rb_ary_push(_stackprof.raw, rb_obj_id(frame));
347
+ }
348
+ rb_ary_push(_stackprof.raw, INT2FIX(1));
349
+ }
350
+ }
351
+
297
352
  for (i = 0; i < num; i++) {
298
353
  int line = _stackprof.lines_buffer[i];
299
354
  VALUE frame = _stackprof.frames_buffer[i];
@@ -346,6 +401,7 @@ stackprof_signal_handler(int sig, siginfo_t *sinfo, void *ucontext)
346
401
  static void
347
402
  stackprof_newobj_handler(VALUE tpval, void *data)
348
403
  {
404
+ /* TODO: implement interval */
349
405
  _stackprof.overall_signals++;
350
406
  stackprof_job_handler(0);
351
407
  }
@@ -372,6 +428,11 @@ frame_mark_i(st_data_t key, st_data_t val, st_data_t arg)
372
428
  static void
373
429
  stackprof_gc_mark(void *data)
374
430
  {
431
+ if (RTEST(_stackprof.raw))
432
+ rb_gc_mark(_stackprof.raw);
433
+ if (RTEST(_stackprof.out))
434
+ rb_gc_mark(_stackprof.out);
435
+
375
436
  if (_stackprof.frames)
376
437
  st_foreach(_stackprof.frames, frame_mark_i, 0);
377
438
  }
@@ -427,6 +488,8 @@ Init_stackprof(void)
427
488
  sym_version = ID2SYM(rb_intern("version"));
428
489
  sym_mode = ID2SYM(rb_intern("mode"));
429
490
  sym_interval = ID2SYM(rb_intern("interval"));
491
+ sym_raw = ID2SYM(rb_intern("raw"));
492
+ sym_out = ID2SYM(rb_intern("out"));
430
493
  sym_frames = ID2SYM(rb_intern("frames"));
431
494
 
432
495
  gc_hook = Data_Wrap_Struct(rb_cObject, stackprof_gc_mark, NULL, NULL);
@@ -437,7 +500,7 @@ Init_stackprof(void)
437
500
  rb_define_singleton_method(rb_mStackProf, "run", stackprof_run, -1);
438
501
  rb_define_singleton_method(rb_mStackProf, "start", stackprof_start, -1);
439
502
  rb_define_singleton_method(rb_mStackProf, "stop", stackprof_stop, 0);
440
- rb_define_singleton_method(rb_mStackProf, "results", stackprof_results, 0);
503
+ rb_define_singleton_method(rb_mStackProf, "results", stackprof_results, -1);
441
504
  rb_define_singleton_method(rb_mStackProf, "sample", stackprof_sample, 0);
442
505
 
443
506
  rb_autoload(rb_mStackProf, rb_intern_const("Report"), "stackprof/report.rb");
@@ -31,13 +31,14 @@ module StackProf
31
31
  attr_accessor :enabled, :mode, :interval, :path
32
32
  alias enabled? enabled
33
33
 
34
- def save
34
+ def save(filename = nil)
35
35
  if results = StackProf.results
36
36
  FileUtils.mkdir_p(Middleware.path)
37
- filename = "stackprof-#{results[:mode]}-#{Process.pid}-#{Time.now.to_i}.dump"
38
- File.open(File.join(Middleware.path, filename), 'wb') do |f|
37
+ filename ||= "stackprof-#{results[:mode]}-#{Process.pid}-#{Time.now.to_i}.dump"
38
+ File.open(File.join(Middleware.path, filename), 'wb') do |f|
39
39
  f.write Marshal.dump(results)
40
40
  end
41
+ filename
41
42
  end
42
43
  end
43
44
 
@@ -63,6 +63,22 @@ module StackProf
63
63
  pp @data
64
64
  end
65
65
 
66
+ def print_dump
67
+ puts Marshal.dump(@data.reject{|k,v| k == :files })
68
+ end
69
+
70
+ def print_stackcollapse
71
+ raise "profile does not include raw samples" unless raw = data[:raw]
72
+
73
+ while len = raw.shift
74
+ frames = raw.slice!(0, len)
75
+ weight = raw.shift
76
+
77
+ print frames.map{ |a| data[:frames][a][:name] }.join(';')
78
+ puts " #{weight}"
79
+ end
80
+ end
81
+
66
82
  def print_graphviz(filter = nil, f = STDOUT)
67
83
  if filter
68
84
  mark_stack = []
@@ -161,7 +177,7 @@ module StackProf
161
177
  f.printf "%s (%s:%d)\n", info[:name], file, line
162
178
  f.printf " samples: % 5d self (%2.1f%%) / % 5d total (%2.1f%%)\n", info[:samples], 100.0*info[:samples]/overall_samples, info[:total_samples], 100.0*info[:total_samples]/overall_samples
163
179
 
164
- if (callers = data[:frames].map{ |id, other| [other[:name], other[:edges][frame]] if other[:edges] && other[:edges].include?(frame) }.compact).any?
180
+ if (callers = callers_for(frame)).any?
165
181
  f.puts " callers:"
166
182
  callers = callers.sort_by(&:last).reverse
167
183
  callers.each do |name, weight|
@@ -203,6 +219,15 @@ module StackProf
203
219
 
204
220
  private
205
221
 
222
+ def root_frames
223
+ frames.select{ |addr, frame| callers_for(addr).size == 0 }
224
+ end
225
+
226
+ def callers_for(addr)
227
+ @callers_for ||= {}
228
+ @callers_for[addr] ||= data[:frames].map{ |id, other| [other[:name], other[:edges][addr]] if other[:edges] && other[:edges].include?(addr) }.compact
229
+ end
230
+
206
231
  def source_display(f, file, lines, range=nil)
207
232
  File.readlines(file).each_with_index do |code, i|
208
233
  next unless range.nil? || range.include?(i)
data/stackprof.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'stackprof'
3
- s.version = '0.2.2'
3
+ s.version = '0.2.3'
4
4
  s.homepage = 'http://github.com/tmm1/stackprof'
5
5
 
6
6
  s.authors = 'Aman Gupta'
@@ -11,6 +11,8 @@ Gem::Specification.new do |s|
11
11
 
12
12
  s.bindir = 'bin'
13
13
  s.executables << 'stackprof'
14
+ s.executables << 'stackprof-flamegraph'
15
+ s.executables << 'stackprof-gprof2dot'
14
16
 
15
17
  s.summary = 'sampling callstack-profiler for ruby 2.1+'
16
18
  s.description = 'stackprof is a fast sampling profiler for ruby code, with cpu, wallclock and object allocation samplers.'
@@ -1,6 +1,7 @@
1
1
  $:.unshift File.expand_path('../../lib', __FILE__)
2
2
  require 'stackprof'
3
3
  require 'test/unit'
4
+ require 'tempfile'
4
5
 
5
6
  class StackProfTest < Test::Unit::TestCase
6
7
  def test_info
@@ -86,6 +87,19 @@ class StackProfTest < Test::Unit::TestCase
86
87
  assert_equal [10, 10], frame[:lines][__LINE__-10]
87
88
  end
88
89
 
90
+ def test_raw
91
+ profile = StackProf.run(mode: :custom, raw: true) do
92
+ 10.times do
93
+ StackProf.sample
94
+ end
95
+ end
96
+
97
+ raw = profile[:raw]
98
+ assert_equal 10, raw[-1]
99
+ assert_equal raw[0] + 2, raw.size
100
+ assert_equal 'block (2 levels) in StackProfTest#test_raw', profile[:frames][raw[-2]][:name]
101
+ end
102
+
89
103
  def test_fork
90
104
  StackProf.run do
91
105
  pid = fork do
@@ -109,6 +123,18 @@ class StackProfTest < Test::Unit::TestCase
109
123
  assert_equal 0, profile[:missed_samples]
110
124
  end
111
125
 
126
+ def test_out
127
+ tmpfile = Tempfile.new('stackprof-out')
128
+ ret = StackProf.run(mode: :custom, out: tmpfile) do
129
+ StackProf.sample
130
+ end
131
+
132
+ assert_equal tmpfile, ret
133
+ tmpfile.rewind
134
+ profile = Marshal.load(tmpfile.read)
135
+ assert_not_empty profile[:frames]
136
+ end
137
+
112
138
  def math
113
139
  250_000.times do
114
140
  2 ** 10
@@ -0,0 +1,134 @@
1
+ Flame Graphs visualize profiled code-paths.
2
+
3
+ Website: http://www.brendangregg.com/flamegraphs.html
4
+
5
+ CPU profiling using DTrace, perf_events, SystemTap, or ktap: http://www.brendangregg.com/FlameGraphs/cpuflamegraphs.html
6
+ CPU profiling using XCode Instruments: http://schani.wordpress.com/2012/11/16/flame-graphs-for-instruments/
7
+ CPU profiling using Xperf.exe: http://randomascii.wordpress.com/2013/03/26/summarizing-xperf-cpu-usage-with-flame-graphs/
8
+ Memory profiling: http://www.brendangregg.com/FlameGraphs/memoryflamegraphs.html
9
+
10
+ These can be created in three steps:
11
+
12
+ 1. Capture stacks
13
+ 2. Fold stacks
14
+ 3. flamegraph.pl
15
+
16
+
17
+ 1. Capture stacks
18
+ =================
19
+ Stack samples can be captured using DTrace, perf_events or SystemTap.
20
+
21
+ Using DTrace to capture 60 seconds of kernel stacks at 997 Hertz:
22
+
23
+ # dtrace -x stackframes=100 -n 'profile-997 /arg0/ { @[stack()] = count(); } tick-60s { exit(0); }' -o out.kern_stacks
24
+
25
+ Using DTrace to capture 60 seconds of user-level stacks for PID 12345 at 97 Hertz:
26
+
27
+ # dtrace -x ustackframes=100 -n 'profile-97 /pid == 12345 && arg1/ { @[ustack()] = count(); } tick-60s { exit(0); }' -o out.user_stacks
28
+
29
+ Using DTrace to capture 60 seconds of user-level stacks, including while time is spent in the kernel, for PID 12345 at 97 Hertz:
30
+
31
+ # dtrace -x ustackframes=100 -n 'profile-97 /pid == 12345/ { @[ustack()] = count(); } tick-60s { exit(0); }' -o out.user_stacks
32
+
33
+ Switch ustack() for jstack() if the application has a ustack helper to include translated frames (eg, node.js frames; see: http://dtrace.org/blogs/dap/2012/01/05/where-does-your-node-program-spend-its-time/). The rate for user-level stack collection is deliberately slower than kernel, which is especially important when using jstack() as it performs additional work to translate frames.
34
+
35
+ 2. Fold stacks
36
+ ==============
37
+ Use the stackcollapse programs to fold stack samples into single lines. The programs provided are:
38
+
39
+ - stackcollapse.pl: for DTrace stacks
40
+ - stackcollapse-perf.pl: for perf_events "perf script" output
41
+ - stackcollapse-stap.pl: for SystemTap stacks
42
+ - stackcollapse-instruments.pl: for XCode Instruments
43
+
44
+ Usage example:
45
+
46
+ $ ./stackcollapse.pl out.kern_stacks > out.kern_folded
47
+
48
+ The output looks like this:
49
+
50
+ unix`_sys_sysenter_post_swapgs 1401
51
+ unix`_sys_sysenter_post_swapgs;genunix`close 5
52
+ unix`_sys_sysenter_post_swapgs;genunix`close;genunix`closeandsetf 85
53
+ unix`_sys_sysenter_post_swapgs;genunix`close;genunix`closeandsetf;c2audit`audit_closef 26
54
+ unix`_sys_sysenter_post_swapgs;genunix`close;genunix`closeandsetf;c2audit`audit_setf 5
55
+ unix`_sys_sysenter_post_swapgs;genunix`close;genunix`closeandsetf;genunix`audit_getstate 6
56
+ unix`_sys_sysenter_post_swapgs;genunix`close;genunix`closeandsetf;genunix`audit_unfalloc 2
57
+ unix`_sys_sysenter_post_swapgs;genunix`close;genunix`closeandsetf;genunix`closef 48
58
+ [...]
59
+
60
+ 3. flamegraph.pl
61
+ ================
62
+ Use flamegraph.pl to render a SVG.
63
+
64
+ $ ./flamegraph.pl out.kern_folded > kernel.svg
65
+
66
+ An advantage of having the folded input file (and why this is separate to flamegraph.pl) is that you can use grep for functions of interest. Eg:
67
+
68
+ $ grep cpuid out.kern_folded | ./flamegraph.pl > cpuid.svg
69
+
70
+
71
+ Provided Example
72
+ ================
73
+ An example output from DTrace is included, both the captured stacks and
74
+ the resulting Flame Graph. You can generate it yourself using:
75
+
76
+ $ ./stackcollapse.pl example-stacks.txt | ./flamegraph.pl > example.svg
77
+
78
+ This was from a particular performance investigation: the Flame Graph
79
+ identified that CPU time was spent in the lofs module, and quantified
80
+ that time.
81
+
82
+
83
+ Options
84
+ =======
85
+ See the USAGE message (--help) for options:
86
+
87
+ USAGE: ./flamegraph.pl [options] infile > outfile.svg
88
+
89
+ --titletext # change title text
90
+ --width # width of image (default 1200)
91
+ --height # height of each frame (default 16)
92
+ --minwidth # omit smaller functions (default 0.1 pixels)
93
+ --fonttype # font type (default "Verdana")
94
+ --fontsize # font size (default 12)
95
+ --countname # count type label (default "samples")
96
+ --nametype # name type label (default "Function:")
97
+ --colors # "hot", "mem", "io" palette (default "hot")
98
+ --hash # colors are keyed by function name hash
99
+ --cp # use consistent palette (palette.map)
100
+ eg,
101
+ ./flamegraph.pl --titletext="Flame Graph: malloc()" trace.txt > graph.svg
102
+
103
+ As suggested in the example, flame graphs can process traces of any event,
104
+ such as malloc()s, provided stack traces are gathered.
105
+
106
+
107
+ Consistent Palette
108
+ ==================
109
+ If you use the --cp option, it will use the $colors selection and randomly
110
+ generate the palette like normal. Any future flamegraphs created using the --cp
111
+ option will use the same palette map. Any new symbols from future flamegraphs
112
+ will have their colors randomly generated using the $colors selection.
113
+
114
+ If you don't like the palette, just delete the palette.map file.
115
+
116
+ This allows your to change your colorscheme between flamegraphs to make the
117
+ differences REALLY stand out.
118
+
119
+ Example:
120
+
121
+ Say we have 2 captures, one with a problem, and one when it was working
122
+ (whatever "it" is):
123
+
124
+ cat working.folded | ./flamegraph.pl --cp > working.svg
125
+ # this generates a palette.map, as per the normal random generated look.
126
+
127
+ cat broken.folded | ./flamegraph.pl --cp --colors mem > broken.svg
128
+ # this svg will use the same palette.map for the same events, but a very
129
+ # different colorscheme for any new events.
130
+
131
+ Take a look at the demo directory for an example:
132
+
133
+ palette-example-working.svg
134
+ palette-example-broken.svg