stackprof 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a10d79a11e17b11b5b706c69c9c5b5546b7fabe9
4
- data.tar.gz: 41e9454828e293709c63bb96ecff91530f2fb8da
3
+ metadata.gz: d8814b67fbdc027a47b2d087321b62ba3f2c5e0b
4
+ data.tar.gz: f5584eca1af8afa88afd407846b019dedbf33d3d
5
5
  SHA512:
6
- metadata.gz: 0e3d5900059d2531409bba444a58679e50227d656ab450d5fc04bc8a8fcec9d0055224599369cab3304dbd8366c06957af06978b12f28cb359d8f83455c409ca
7
- data.tar.gz: 34c673bbe0c60795d7b97a39d2ab1bace18a6377dfd7c5f63dca31fc2c2e0447e8c420e5663e9b7c61cd11365d5cd4c2e2bc6d0f23f8b414ddb3c7083b82867a
6
+ metadata.gz: 50d3070d8a4ae606cb90b8e94afdbe4a047abc449a77731ebb9e198dd7fbb4f5e86965bbdba8761f3fbebe01023fca08376b865f84c25dcc7f51d4e7a9752d4c
7
+ data.tar.gz: 4b0bdc52fdd32bb21b1a10a0061371e2d51ae126cd39c01c6898b2a405f6e2a13dd879e439b610d90fde8661d245f995bcf86063bb76570b2b8ddc793a2bdd2b
data/README.md CHANGED
@@ -10,10 +10,9 @@ and written as a replacement for [perftools.rb](https://github.com/tmm1/perftool
10
10
  in ruby:
11
11
 
12
12
  ``` ruby
13
- profile = StackProf.run(mode: :cpu) do
13
+ StackProf.run(mode: :cpu, out: 'tmp/stackprof-cpu-myapp.dump') do
14
14
  ...
15
15
  end
16
- File.open('tmp/stackprof-cpu-myapp.dump', 'wb'){ |f| f.write Marshal.dump(profile) }
17
16
  ```
18
17
 
19
18
  via rack:
@@ -63,12 +62,14 @@ $ stackprof tmp/stackprof-cpu-*.dump --method 'Object#present?'
63
62
  | 22 | end
64
63
  ```
65
64
 
65
+ For an experimental version of WebUI reporting of stackprof, see [stackprof-webnav](https://github.com/alisnic/stackprof-webnav)
66
+
66
67
  ### sampling
67
68
 
68
69
  four sampling modes are supported:
69
70
 
70
- - :wall (using `ITIMER_REAL` and `SIGALRM`) [default mode]
71
- - :cpu (using `ITIMER_PROF` and `SIGPROF`)
71
+ - :wall (using `ITIMER_REAL` and `SIGALRM`)
72
+ - :cpu (using `ITIMER_PROF` and `SIGPROF`) [default mode]
72
73
  - :object (using `RUBY_INTERNAL_EVENT_NEWOBJ`)
73
74
  - :custom (user-defined via `StackProf.sample`)
74
75
 
data/bin/stackprof CHANGED
@@ -13,12 +13,14 @@ parser = OptionParser.new(ARGV) do |o|
13
13
 
14
14
  o.on('--text', 'Text summary per method (default)'){ options[:format] = :text }
15
15
  o.on('--files', 'List of files'){ |f| options[:format] = :files }
16
- o.on('--limit=[num]', Integer, 'Limit --text or --files output to N lines'){ |n| options[:limit] = n }
16
+ o.on('--limit [num]', Integer, 'Limit --text or --files output to N lines'){ |n| options[:limit] = n }
17
17
  o.on('--sort-total', "Sort --text or --files output on total samples\n\n"){ options[:sort] = true }
18
- o.on('--method=[grep]', 'Zoom into specified method'){ |f| options[:format] = :method; options[:filter] = f }
19
- o.on('--file=[grep]', 'Show annotated code for specified file'){ |f| options[:format] = :file; options[:filter] = f }
18
+ o.on('--method [grep]', 'Zoom into specified method'){ |f| options[:format] = :method; options[:filter] = f }
19
+ o.on('--file [grep]', 'Show annotated code for specified file'){ |f| options[:format] = :file; options[:filter] = f }
20
+ o.on('--stackcollapse', 'stackcollapse.pl compatible output (use with flamegraph.pl)'){ options[:format] = :stackcollapse }
20
21
  o.on('--callgrind', 'Callgrind output (use with kcachegrind, gprof2dot)'){ options[:format] = :callgrind }
21
22
  o.on('--graphviz', "Graphviz output (use with dot)\n\n"){ options[:format] = :graphviz }
23
+ o.on('--dump', 'Print marshaled profile dump (combine multiple profiles)'){ options[:format] = :dump }
22
24
  o.on('--debug', 'Pretty print raw profile data'){ options[:format] = :debug }
23
25
  end
24
26
 
@@ -27,7 +29,12 @@ parser.abort(parser.help) if ARGV.empty?
27
29
 
28
30
  reports = []
29
31
  while ARGV.size > 0
30
- reports << StackProf::Report.new(Marshal.load(IO.binread(ARGV.pop)))
32
+ begin
33
+ file = ARGV.pop
34
+ reports << StackProf::Report.new(Marshal.load(IO.binread(file)))
35
+ rescue TypeError => e
36
+ STDERR.puts "** error parsing #{file}: #{e.inspect}"
37
+ end
31
38
  end
32
39
  report = reports.inject(:+)
33
40
 
@@ -36,10 +43,14 @@ when :text
36
43
  report.print_text(options[:sort], options[:limit])
37
44
  when :debug
38
45
  report.print_debug
46
+ when :dump
47
+ report.print_dump
39
48
  when :callgrind
40
49
  report.print_callgrind
41
50
  when :graphviz
42
51
  report.print_graphviz
52
+ when :stackcollapse
53
+ report.print_stackcollapse
43
54
  when :method
44
55
  report.print_method(options[:filter])
45
56
  when :file
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env ruby
2
+ exec(File.expand_path("../../vendor/FlameGraph/flamegraph.pl", __FILE__), *ARGV)
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env ruby
2
+ exec(File.expand_path("../../vendor/gprof2dot/gprof2dot.py", __FILE__), *ARGV)
data/ext/stackprof.c CHANGED
@@ -2,19 +2,15 @@
2
2
 
3
3
  stackprof.c - Sampling call-stack frame profiler for MRI.
4
4
 
5
- $Author$
6
- created at: Thu May 30 17:55:25 2013
7
-
8
- NOTE: This extension library is not expected to exist except C Ruby.
9
-
10
- All the files in this distribution are covered under the Ruby's
11
- license (see the file COPYING).
5
+ vim: setl noexpandtab shiftwidth=4 tabstop=8 softtabstop=4
12
6
 
13
7
  **********************************************************************/
14
8
 
15
9
  #include <ruby/ruby.h>
16
10
  #include <ruby/debug.h>
17
11
  #include <ruby/st.h>
12
+ #include <ruby/io.h>
13
+ #include <ruby/intern.h>
18
14
  #include <signal.h>
19
15
  #include <sys/time.h>
20
16
  #include <pthread.h>
@@ -32,6 +28,9 @@ static struct {
32
28
  int running;
33
29
  VALUE mode;
34
30
  VALUE interval;
31
+ VALUE raw;
32
+ size_t raw_sample_index;
33
+ VALUE out;
35
34
 
36
35
  size_t overall_signals;
37
36
  size_t overall_samples;
@@ -44,7 +43,7 @@ static struct {
44
43
 
45
44
  static VALUE sym_object, sym_wall, sym_cpu, sym_custom, sym_name, sym_file, sym_line;
46
45
  static VALUE sym_samples, sym_total_samples, sym_missed_samples, sym_edges, sym_lines;
47
- static VALUE sym_version, sym_mode, sym_interval, sym_frames;
46
+ static VALUE sym_version, sym_mode, sym_interval, sym_raw, sym_frames, sym_out;
48
47
  static VALUE sym_gc_samples, objtracer;
49
48
  static VALUE gc_hook;
50
49
  static VALUE rb_mStackProf;
@@ -57,7 +56,7 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
57
56
  {
58
57
  struct sigaction sa;
59
58
  struct itimerval timer;
60
- VALUE opts = Qnil, mode = Qnil, interval = Qnil;
59
+ VALUE opts = Qnil, mode = Qnil, interval = Qnil, raw = Qfalse, out = Qfalse;
61
60
 
62
61
  if (_stackprof.running)
63
62
  return Qfalse;
@@ -67,6 +66,10 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
67
66
  if (RTEST(opts)) {
68
67
  mode = rb_hash_aref(opts, sym_mode);
69
68
  interval = rb_hash_aref(opts, sym_interval);
69
+ out = rb_hash_aref(opts, sym_out);
70
+
71
+ if (RTEST(rb_hash_aref(opts, sym_raw)))
72
+ raw = rb_ary_new();
70
73
  }
71
74
  if (!RTEST(mode)) mode = sym_wall;
72
75
 
@@ -102,8 +105,10 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
102
105
  }
103
106
 
104
107
  _stackprof.running = 1;
108
+ _stackprof.raw = raw;
105
109
  _stackprof.mode = mode;
106
110
  _stackprof.interval = interval;
111
+ _stackprof.out = out;
107
112
 
108
113
  return Qtrue;
109
114
  }
@@ -207,7 +212,7 @@ frame_i(st_data_t key, st_data_t val, st_data_t arg)
207
212
  }
208
213
 
209
214
  static VALUE
210
- stackprof_results(VALUE self)
215
+ stackprof_results(int argc, VALUE *argv, VALUE self)
211
216
  {
212
217
  VALUE results, frames;
213
218
 
@@ -229,7 +234,28 @@ stackprof_results(VALUE self)
229
234
  st_free_table(_stackprof.frames);
230
235
  _stackprof.frames = NULL;
231
236
 
232
- return results;
237
+ if (RTEST(_stackprof.raw)) {
238
+ rb_hash_aset(results, sym_raw, _stackprof.raw);
239
+ _stackprof.raw = Qfalse;
240
+ }
241
+
242
+ if (argc == 1)
243
+ _stackprof.out = argv[0];
244
+
245
+ if (RTEST(_stackprof.out)) {
246
+ VALUE file;
247
+ if (RB_TYPE_P(_stackprof.out, T_STRING)) {
248
+ file = rb_file_open_str(_stackprof.out, "w");
249
+ } else {
250
+ file = rb_io_check_io(_stackprof.out);
251
+ }
252
+ rb_marshal_dump(results, file);
253
+ rb_io_flush(file);
254
+ _stackprof.out = Qnil;
255
+ return file;
256
+ } else {
257
+ return results;
258
+ }
233
259
  }
234
260
 
235
261
  static VALUE
@@ -238,7 +264,7 @@ stackprof_run(int argc, VALUE *argv, VALUE self)
238
264
  rb_need_block();
239
265
  stackprof_start(argc, argv, self);
240
266
  rb_ensure(rb_yield, Qundef, stackprof_stop, self);
241
- return stackprof_results(self);
267
+ return stackprof_results(0, 0, self);
242
268
  }
243
269
 
244
270
  static VALUE
@@ -288,12 +314,41 @@ st_numtable_increment(st_table *table, st_data_t key, size_t increment)
288
314
  void
289
315
  stackprof_record_sample()
290
316
  {
291
- int num, i;
317
+ int num, i, n;
318
+ int raw_mode = RTEST(_stackprof.raw);
292
319
  VALUE prev_frame = Qnil;
320
+ size_t raw_len;
293
321
 
294
322
  _stackprof.overall_samples++;
295
323
  num = rb_profile_frames(0, sizeof(_stackprof.frames_buffer), _stackprof.frames_buffer, _stackprof.lines_buffer);
296
324
 
325
+ if (raw_mode) {
326
+ int found = 0;
327
+ raw_len = RARRAY_LEN(_stackprof.raw);
328
+
329
+ if (RARRAY_LEN(_stackprof.raw) > 0 && RARRAY_AREF(_stackprof.raw, _stackprof.raw_sample_index) == INT2FIX(num)) {
330
+ for (i = num-1, n = 0; i >= 0; i--, n++) {
331
+ VALUE frame = _stackprof.frames_buffer[i];
332
+ if (RARRAY_AREF(_stackprof.raw, _stackprof.raw_sample_index + 1 + n) != rb_obj_id(frame))
333
+ break;
334
+ }
335
+ if (i == -1) {
336
+ RARRAY_ASET(_stackprof.raw, raw_len-1, LONG2NUM(NUM2LONG(RARRAY_AREF(_stackprof.raw, raw_len-1))+1));
337
+ found = 1;
338
+ }
339
+ }
340
+
341
+ if (!found) {
342
+ _stackprof.raw_sample_index = raw_len;
343
+ rb_ary_push(_stackprof.raw, INT2FIX(num));
344
+ for (i = num-1; i >= 0; i--) {
345
+ VALUE frame = _stackprof.frames_buffer[i];
346
+ rb_ary_push(_stackprof.raw, rb_obj_id(frame));
347
+ }
348
+ rb_ary_push(_stackprof.raw, INT2FIX(1));
349
+ }
350
+ }
351
+
297
352
  for (i = 0; i < num; i++) {
298
353
  int line = _stackprof.lines_buffer[i];
299
354
  VALUE frame = _stackprof.frames_buffer[i];
@@ -346,6 +401,7 @@ stackprof_signal_handler(int sig, siginfo_t *sinfo, void *ucontext)
346
401
  static void
347
402
  stackprof_newobj_handler(VALUE tpval, void *data)
348
403
  {
404
+ /* TODO: implement interval */
349
405
  _stackprof.overall_signals++;
350
406
  stackprof_job_handler(0);
351
407
  }
@@ -372,6 +428,11 @@ frame_mark_i(st_data_t key, st_data_t val, st_data_t arg)
372
428
  static void
373
429
  stackprof_gc_mark(void *data)
374
430
  {
431
+ if (RTEST(_stackprof.raw))
432
+ rb_gc_mark(_stackprof.raw);
433
+ if (RTEST(_stackprof.out))
434
+ rb_gc_mark(_stackprof.out);
435
+
375
436
  if (_stackprof.frames)
376
437
  st_foreach(_stackprof.frames, frame_mark_i, 0);
377
438
  }
@@ -427,6 +488,8 @@ Init_stackprof(void)
427
488
  sym_version = ID2SYM(rb_intern("version"));
428
489
  sym_mode = ID2SYM(rb_intern("mode"));
429
490
  sym_interval = ID2SYM(rb_intern("interval"));
491
+ sym_raw = ID2SYM(rb_intern("raw"));
492
+ sym_out = ID2SYM(rb_intern("out"));
430
493
  sym_frames = ID2SYM(rb_intern("frames"));
431
494
 
432
495
  gc_hook = Data_Wrap_Struct(rb_cObject, stackprof_gc_mark, NULL, NULL);
@@ -437,7 +500,7 @@ Init_stackprof(void)
437
500
  rb_define_singleton_method(rb_mStackProf, "run", stackprof_run, -1);
438
501
  rb_define_singleton_method(rb_mStackProf, "start", stackprof_start, -1);
439
502
  rb_define_singleton_method(rb_mStackProf, "stop", stackprof_stop, 0);
440
- rb_define_singleton_method(rb_mStackProf, "results", stackprof_results, 0);
503
+ rb_define_singleton_method(rb_mStackProf, "results", stackprof_results, -1);
441
504
  rb_define_singleton_method(rb_mStackProf, "sample", stackprof_sample, 0);
442
505
 
443
506
  rb_autoload(rb_mStackProf, rb_intern_const("Report"), "stackprof/report.rb");
@@ -31,13 +31,14 @@ module StackProf
31
31
  attr_accessor :enabled, :mode, :interval, :path
32
32
  alias enabled? enabled
33
33
 
34
- def save
34
+ def save(filename = nil)
35
35
  if results = StackProf.results
36
36
  FileUtils.mkdir_p(Middleware.path)
37
- filename = "stackprof-#{results[:mode]}-#{Process.pid}-#{Time.now.to_i}.dump"
38
- File.open(File.join(Middleware.path, filename), 'wb') do |f|
37
+ filename ||= "stackprof-#{results[:mode]}-#{Process.pid}-#{Time.now.to_i}.dump"
38
+ File.open(File.join(Middleware.path, filename), 'wb') do |f|
39
39
  f.write Marshal.dump(results)
40
40
  end
41
+ filename
41
42
  end
42
43
  end
43
44
 
@@ -63,6 +63,22 @@ module StackProf
63
63
  pp @data
64
64
  end
65
65
 
66
+ def print_dump
67
+ puts Marshal.dump(@data.reject{|k,v| k == :files })
68
+ end
69
+
70
+ def print_stackcollapse
71
+ raise "profile does not include raw samples" unless raw = data[:raw]
72
+
73
+ while len = raw.shift
74
+ frames = raw.slice!(0, len)
75
+ weight = raw.shift
76
+
77
+ print frames.map{ |a| data[:frames][a][:name] }.join(';')
78
+ puts " #{weight}"
79
+ end
80
+ end
81
+
66
82
  def print_graphviz(filter = nil, f = STDOUT)
67
83
  if filter
68
84
  mark_stack = []
@@ -161,7 +177,7 @@ module StackProf
161
177
  f.printf "%s (%s:%d)\n", info[:name], file, line
162
178
  f.printf " samples: % 5d self (%2.1f%%) / % 5d total (%2.1f%%)\n", info[:samples], 100.0*info[:samples]/overall_samples, info[:total_samples], 100.0*info[:total_samples]/overall_samples
163
179
 
164
- if (callers = data[:frames].map{ |id, other| [other[:name], other[:edges][frame]] if other[:edges] && other[:edges].include?(frame) }.compact).any?
180
+ if (callers = callers_for(frame)).any?
165
181
  f.puts " callers:"
166
182
  callers = callers.sort_by(&:last).reverse
167
183
  callers.each do |name, weight|
@@ -203,6 +219,15 @@ module StackProf
203
219
 
204
220
  private
205
221
 
222
+ def root_frames
223
+ frames.select{ |addr, frame| callers_for(addr).size == 0 }
224
+ end
225
+
226
+ def callers_for(addr)
227
+ @callers_for ||= {}
228
+ @callers_for[addr] ||= data[:frames].map{ |id, other| [other[:name], other[:edges][addr]] if other[:edges] && other[:edges].include?(addr) }.compact
229
+ end
230
+
206
231
  def source_display(f, file, lines, range=nil)
207
232
  File.readlines(file).each_with_index do |code, i|
208
233
  next unless range.nil? || range.include?(i)
data/stackprof.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'stackprof'
3
- s.version = '0.2.2'
3
+ s.version = '0.2.3'
4
4
  s.homepage = 'http://github.com/tmm1/stackprof'
5
5
 
6
6
  s.authors = 'Aman Gupta'
@@ -11,6 +11,8 @@ Gem::Specification.new do |s|
11
11
 
12
12
  s.bindir = 'bin'
13
13
  s.executables << 'stackprof'
14
+ s.executables << 'stackprof-flamegraph'
15
+ s.executables << 'stackprof-gprof2dot'
14
16
 
15
17
  s.summary = 'sampling callstack-profiler for ruby 2.1+'
16
18
  s.description = 'stackprof is a fast sampling profiler for ruby code, with cpu, wallclock and object allocation samplers.'
@@ -1,6 +1,7 @@
1
1
  $:.unshift File.expand_path('../../lib', __FILE__)
2
2
  require 'stackprof'
3
3
  require 'test/unit'
4
+ require 'tempfile'
4
5
 
5
6
  class StackProfTest < Test::Unit::TestCase
6
7
  def test_info
@@ -86,6 +87,19 @@ class StackProfTest < Test::Unit::TestCase
86
87
  assert_equal [10, 10], frame[:lines][__LINE__-10]
87
88
  end
88
89
 
90
+ def test_raw
91
+ profile = StackProf.run(mode: :custom, raw: true) do
92
+ 10.times do
93
+ StackProf.sample
94
+ end
95
+ end
96
+
97
+ raw = profile[:raw]
98
+ assert_equal 10, raw[-1]
99
+ assert_equal raw[0] + 2, raw.size
100
+ assert_equal 'block (2 levels) in StackProfTest#test_raw', profile[:frames][raw[-2]][:name]
101
+ end
102
+
89
103
  def test_fork
90
104
  StackProf.run do
91
105
  pid = fork do
@@ -109,6 +123,18 @@ class StackProfTest < Test::Unit::TestCase
109
123
  assert_equal 0, profile[:missed_samples]
110
124
  end
111
125
 
126
+ def test_out
127
+ tmpfile = Tempfile.new('stackprof-out')
128
+ ret = StackProf.run(mode: :custom, out: tmpfile) do
129
+ StackProf.sample
130
+ end
131
+
132
+ assert_equal tmpfile, ret
133
+ tmpfile.rewind
134
+ profile = Marshal.load(tmpfile.read)
135
+ assert_not_empty profile[:frames]
136
+ end
137
+
112
138
  def math
113
139
  250_000.times do
114
140
  2 ** 10
@@ -0,0 +1,134 @@
1
+ Flame Graphs visualize profiled code-paths.
2
+
3
+ Website: http://www.brendangregg.com/flamegraphs.html
4
+
5
+ CPU profiling using DTrace, perf_events, SystemTap, or ktap: http://www.brendangregg.com/FlameGraphs/cpuflamegraphs.html
6
+ CPU profiling using XCode Instruments: http://schani.wordpress.com/2012/11/16/flame-graphs-for-instruments/
7
+ CPU profiling using Xperf.exe: http://randomascii.wordpress.com/2013/03/26/summarizing-xperf-cpu-usage-with-flame-graphs/
8
+ Memory profiling: http://www.brendangregg.com/FlameGraphs/memoryflamegraphs.html
9
+
10
+ These can be created in three steps:
11
+
12
+ 1. Capture stacks
13
+ 2. Fold stacks
14
+ 3. flamegraph.pl
15
+
16
+
17
+ 1. Capture stacks
18
+ =================
19
+ Stack samples can be captured using DTrace, perf_events or SystemTap.
20
+
21
+ Using DTrace to capture 60 seconds of kernel stacks at 997 Hertz:
22
+
23
+ # dtrace -x stackframes=100 -n 'profile-997 /arg0/ { @[stack()] = count(); } tick-60s { exit(0); }' -o out.kern_stacks
24
+
25
+ Using DTrace to capture 60 seconds of user-level stacks for PID 12345 at 97 Hertz:
26
+
27
+ # dtrace -x ustackframes=100 -n 'profile-97 /pid == 12345 && arg1/ { @[ustack()] = count(); } tick-60s { exit(0); }' -o out.user_stacks
28
+
29
+ Using DTrace to capture 60 seconds of user-level stacks, including while time is spent in the kernel, for PID 12345 at 97 Hertz:
30
+
31
+ # dtrace -x ustackframes=100 -n 'profile-97 /pid == 12345/ { @[ustack()] = count(); } tick-60s { exit(0); }' -o out.user_stacks
32
+
33
+ Switch ustack() for jstack() if the application has a ustack helper to include translated frames (eg, node.js frames; see: http://dtrace.org/blogs/dap/2012/01/05/where-does-your-node-program-spend-its-time/). The rate for user-level stack collection is deliberately slower than kernel, which is especially important when using jstack() as it performs additional work to translate frames.
34
+
35
+ 2. Fold stacks
36
+ ==============
37
+ Use the stackcollapse programs to fold stack samples into single lines. The programs provided are:
38
+
39
+ - stackcollapse.pl: for DTrace stacks
40
+ - stackcollapse-perf.pl: for perf_events "perf script" output
41
+ - stackcollapse-stap.pl: for SystemTap stacks
42
+ - stackcollapse-instruments.pl: for XCode Instruments
43
+
44
+ Usage example:
45
+
46
+ $ ./stackcollapse.pl out.kern_stacks > out.kern_folded
47
+
48
+ The output looks like this:
49
+
50
+ unix`_sys_sysenter_post_swapgs 1401
51
+ unix`_sys_sysenter_post_swapgs;genunix`close 5
52
+ unix`_sys_sysenter_post_swapgs;genunix`close;genunix`closeandsetf 85
53
+ unix`_sys_sysenter_post_swapgs;genunix`close;genunix`closeandsetf;c2audit`audit_closef 26
54
+ unix`_sys_sysenter_post_swapgs;genunix`close;genunix`closeandsetf;c2audit`audit_setf 5
55
+ unix`_sys_sysenter_post_swapgs;genunix`close;genunix`closeandsetf;genunix`audit_getstate 6
56
+ unix`_sys_sysenter_post_swapgs;genunix`close;genunix`closeandsetf;genunix`audit_unfalloc 2
57
+ unix`_sys_sysenter_post_swapgs;genunix`close;genunix`closeandsetf;genunix`closef 48
58
+ [...]
59
+
60
+ 3. flamegraph.pl
61
+ ================
62
+ Use flamegraph.pl to render a SVG.
63
+
64
+ $ ./flamegraph.pl out.kern_folded > kernel.svg
65
+
66
+ An advantage of having the folded input file (and why this is separate to flamegraph.pl) is that you can use grep for functions of interest. Eg:
67
+
68
+ $ grep cpuid out.kern_folded | ./flamegraph.pl > cpuid.svg
69
+
70
+
71
+ Provided Example
72
+ ================
73
+ An example output from DTrace is included, both the captured stacks and
74
+ the resulting Flame Graph. You can generate it yourself using:
75
+
76
+ $ ./stackcollapse.pl example-stacks.txt | ./flamegraph.pl > example.svg
77
+
78
+ This was from a particular performance investigation: the Flame Graph
79
+ identified that CPU time was spent in the lofs module, and quantified
80
+ that time.
81
+
82
+
83
+ Options
84
+ =======
85
+ See the USAGE message (--help) for options:
86
+
87
+ USAGE: ./flamegraph.pl [options] infile > outfile.svg
88
+
89
+ --titletext # change title text
90
+ --width # width of image (default 1200)
91
+ --height # height of each frame (default 16)
92
+ --minwidth # omit smaller functions (default 0.1 pixels)
93
+ --fonttype # font type (default "Verdana")
94
+ --fontsize # font size (default 12)
95
+ --countname # count type label (default "samples")
96
+ --nametype # name type label (default "Function:")
97
+ --colors # "hot", "mem", "io" palette (default "hot")
98
+ --hash # colors are keyed by function name hash
99
+ --cp # use consistent palette (palette.map)
100
+ eg,
101
+ ./flamegraph.pl --titletext="Flame Graph: malloc()" trace.txt > graph.svg
102
+
103
+ As suggested in the example, flame graphs can process traces of any event,
104
+ such as malloc()s, provided stack traces are gathered.
105
+
106
+
107
+ Consistent Palette
108
+ ==================
109
+ If you use the --cp option, it will use the $colors selection and randomly
110
+ generate the palette like normal. Any future flamegraphs created using the --cp
111
+ option will use the same palette map. Any new symbols from future flamegraphs
112
+ will have their colors randomly generated using the $colors selection.
113
+
114
+ If you don't like the palette, just delete the palette.map file.
115
+
116
+ This allows your to change your colorscheme between flamegraphs to make the
117
+ differences REALLY stand out.
118
+
119
+ Example:
120
+
121
+ Say we have 2 captures, one with a problem, and one when it was working
122
+ (whatever "it" is):
123
+
124
+ cat working.folded | ./flamegraph.pl --cp > working.svg
125
+ # this generates a palette.map, as per the normal random generated look.
126
+
127
+ cat broken.folded | ./flamegraph.pl --cp --colors mem > broken.svg
128
+ # this svg will use the same palette.map for the same events, but a very
129
+ # different colorscheme for any new events.
130
+
131
+ Take a look at the demo directory for an example:
132
+
133
+ palette-example-working.svg
134
+ palette-example-broken.svg