stackprof 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +2 -0
- data/Gemfile +2 -0
- data/Gemfile.lock +18 -0
- data/README.md +197 -0
- data/Rakefile +31 -0
- data/bin/stackprof +3 -0
- data/ext/extconf.rb +2 -0
- data/ext/stackprof.c +315 -0
- data/lib/stackprof/report.rb +71 -0
- data/sample.rb +43 -0
- data/stackprof.gemspec +21 -0
- data/test/test_stackprof.rb +57 -0
- metadata +72 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 1e9663289cf7257e1ce593f22f1bc635b0d4406d
|
4
|
+
data.tar.gz: 56484ba1a6c48b2c9c41bb056500b7aa432db974
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 4a0062a181c777976147420395729d65eff6fa4ef10a6624153629bcaed7a5ed8c6b071e2e30176d6a28b3d6c0dcd4ed1ab5b4cc8373fbdbc814ed57dff20a93
|
7
|
+
data.tar.gz: 1e004d7757b016d00cda696be9fe6a4769aa11b97651228069d63df4193219c16bbc2f9c44800234d9d98855db04dde6f1cbcc68a7c1913949c43f18c68a419e
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
data/README.md
ADDED
@@ -0,0 +1,197 @@
|
|
1
|
+
## stackprof
|
2
|
+
|
3
|
+
a sampling call-stack profiler for ruby 2.1+
|
4
|
+
|
5
|
+
inspired heavily by [gperftools](https://code.google.com/p/gperftools/),
|
6
|
+
and written as a replacement for [perftools.rb](https://github.com/tmm1/perftools.rb)
|
7
|
+
|
8
|
+
### sampling
|
9
|
+
|
10
|
+
three sampling modes are supported:
|
11
|
+
|
12
|
+
- cpu time (using `ITIMER_PROF` and `SIGPROF`)
|
13
|
+
- wall time (using `ITIMER_REAL` and `SIGALRM`)
|
14
|
+
- object allocation (using `RUBY_INTERNAL_EVENT_NEWOBJ`)
|
15
|
+
|
16
|
+
samplers have a tuneable interval which can be used to reduce overhead or increase granularity:
|
17
|
+
|
18
|
+
- cpu time: sample every _interval_ microseconds of cpu activity (default: 10000 = 10 milliseconds)
|
19
|
+
- wall time: sample every _interval_ microseconds of wallclock time (default: 10000)
|
20
|
+
- object allocation: sample every _interval_ allocations (default: 1)
|
21
|
+
|
22
|
+
samples are taken using a combination of three new C-APIs in ruby 2.1:
|
23
|
+
|
24
|
+
- signal handlers enqueue a sampling job using `rb_postponed_job_register_one`.
|
25
|
+
this ensures callstack samples can be taken safely, in case the VM is garbage collecting
|
26
|
+
or in some other inconsistent state during the interruption.
|
27
|
+
|
28
|
+
- stack frames are collected via `rb_profile_frames`, which provides low-overhead C-API access
|
29
|
+
to the VM's call stack. no object allocations occur in this path, allowing stackprof to collect
|
30
|
+
callstacks in allocation mode.
|
31
|
+
|
32
|
+
- in allocation mode, samples are taken via `rb_tracepoint_new(RUBY_INTERNAL_EVENT_NEWOBJ)`,
|
33
|
+
which provides a notification every time the VM allocates a new object.
|
34
|
+
|
35
|
+
### aggregation
|
36
|
+
|
37
|
+
each sample consists of N stack frames, where a frame looks something like `MyClass#method` or `block in MySingleton.method`.
|
38
|
+
for each of these frames in the sample, the profiler collects a few pieces of metadata:
|
39
|
+
|
40
|
+
- samples: number of samples where this was the topmost frame
|
41
|
+
- total_samples: samples where this frame was in the stack
|
42
|
+
- lines: samples per line number in this frame
|
43
|
+
- edges: samples per callee frame (methods invoked by this frame)
|
44
|
+
|
45
|
+
the aggregation algorithm is roughly equivalent to the following pseudo code:
|
46
|
+
|
47
|
+
``` ruby
|
48
|
+
trap('PROF') do
|
49
|
+
top, *rest = caller
|
50
|
+
|
51
|
+
top.samples += 1
|
52
|
+
top.lines[top.lineno] += 1
|
53
|
+
top.total_samples += 1
|
54
|
+
|
55
|
+
prev = top
|
56
|
+
rest.each do |frame|
|
57
|
+
frame.edges[prev] += 1
|
58
|
+
frame.total_samples += 1
|
59
|
+
prev = frame
|
60
|
+
end
|
61
|
+
end
|
62
|
+
```
|
63
|
+
|
64
|
+
this technique builds up an incremental callgraph from the samples. on any given frame,
|
65
|
+
the sum of the outbound edge weights is equal to total samples collected on that frame
|
66
|
+
(`frame.total_samples == frame.edges.values.sum`).
|
67
|
+
|
68
|
+
### reporting
|
69
|
+
|
70
|
+
three reporting modes are supported:
|
71
|
+
- text
|
72
|
+
- dotgraph
|
73
|
+
- source annotation
|
74
|
+
|
75
|
+
#### `StackProf::Report.new(data).print_text`
|
76
|
+
|
77
|
+
```
|
78
|
+
TOTAL (pct) SAMPLES (pct) FRAME
|
79
|
+
91 (48.4%) 91 (48.4%) A#pow
|
80
|
+
58 (30.9%) 58 (30.9%) A.newobj
|
81
|
+
34 (18.1%) 34 (18.1%) block in A#math
|
82
|
+
188 (100.0%) 3 (1.6%) block (2 levels) in <main>
|
83
|
+
185 (98.4%) 1 (0.5%) A#initialize
|
84
|
+
35 (18.6%) 1 (0.5%) A#math
|
85
|
+
188 (100.0%) 0 (0.0%) <main>
|
86
|
+
188 (100.0%) 0 (0.0%) block in <main>
|
87
|
+
188 (100.0%) 0 (0.0%) <main>
|
88
|
+
```
|
89
|
+
|
90
|
+
#### `StackProf::Report.new(data).print_graphviz`
|
91
|
+
|
92
|
+
![](http://cl.ly/image/2t3l2q0l0B0A/content)
|
93
|
+
|
94
|
+
```
|
95
|
+
digraph profile {
|
96
|
+
70346498324780 [size=23.5531914893617] [fontsize=23.5531914893617] [shape=box] [label="A#pow\n91 (48.4%)\r"];
|
97
|
+
70346498324680 [size=18.638297872340424] [fontsize=18.638297872340424] [shape=box] [label="A.newobj\n58 (30.9%)\r"];
|
98
|
+
70346498324480 [size=15.063829787234042] [fontsize=15.063829787234042] [shape=box] [label="block in A#math\n34 (18.1%)\r"];
|
99
|
+
70346498324220 [size=10.446808510638299] [fontsize=10.446808510638299] [shape=box] [label="block (2 levels) in <main>\n3 (1.6%)\rof 188 (100.0%)\r"];
|
100
|
+
70346498324220 -> 70346498324900 [label="185"];
|
101
|
+
70346498324900 [size=10.148936170212766] [fontsize=10.148936170212766] [shape=box] [label="A#initialize\n1 (0.5%)\rof 185 (98.4%)\r"];
|
102
|
+
70346498324900 -> 70346498324780 [label="91"];
|
103
|
+
70346498324900 -> 70346498324680 [label="58"];
|
104
|
+
70346498324900 -> 70346498324580 [label="35"];
|
105
|
+
70346498324580 [size=10.148936170212766] [fontsize=10.148936170212766] [shape=box] [label="A#math\n1 (0.5%)\rof 35 (18.6%)\r"];
|
106
|
+
70346498324580 -> 70346498324480 [label="34"];
|
107
|
+
70346497983360 [size=10.0] [fontsize=10.0] [shape=box] [label="<main>\n0 (0.0%)\rof 188 (100.0%)\r"];
|
108
|
+
70346497983360 -> 70346498325080 [label="188"];
|
109
|
+
70346498324300 [size=10.0] [fontsize=10.0] [shape=box] [label="block in <main>\n0 (0.0%)\rof 188 (100.0%)\r"];
|
110
|
+
70346498324300 -> 70346498324220 [label="188"];
|
111
|
+
70346498325080 [size=10.0] [fontsize=10.0] [shape=box] [label="<main>\n0 (0.0%)\rof 188 (100.0%)\r"];
|
112
|
+
70346498325080 -> 70346498324300 [label="188"];
|
113
|
+
}
|
114
|
+
```
|
115
|
+
|
116
|
+
#### `StackProf::Report.new(data).print_source(/pow|newobj|math/)`
|
117
|
+
|
118
|
+
```
|
119
|
+
A#pow (/Users/tmm1/code/stackprof/sample.rb:11)
|
120
|
+
| 11 | def pow
|
121
|
+
91 (48.4% / 100.0%) | 12 | 2 ** 100
|
122
|
+
| 13 | end
|
123
|
+
A.newobj (/Users/tmm1/code/stackprof/sample.rb:15)
|
124
|
+
| 15 | def self.newobj
|
125
|
+
33 (17.6% / 56.9%) | 16 | Object.new
|
126
|
+
25 (13.3% / 43.1%) | 17 | Object.new
|
127
|
+
| 18 | end
|
128
|
+
A#math (/Users/tmm1/code/stackprof/sample.rb:20)
|
129
|
+
| 20 | def math
|
130
|
+
1 (0.5% / 100.0%) | 21 | 2.times do
|
131
|
+
| 22 | 2 + 3 * 4 ^ 5 / 6
|
132
|
+
block in A#math (/Users/tmm1/code/stackprof/sample.rb:21)
|
133
|
+
| 21 | 2.times do
|
134
|
+
34 (18.1% / 100.0%) | 22 | 2 + 3 * 4 ^ 5 / 6
|
135
|
+
| 23 | end
|
136
|
+
```
|
137
|
+
|
138
|
+
### usage
|
139
|
+
|
140
|
+
the profiler is compiled as a C-extension and exposes a simple api: `StackProf.run(mode, interval)`.
|
141
|
+
the `run` method takes a block of code and returns a profile as a simple hash.
|
142
|
+
|
143
|
+
``` ruby
|
144
|
+
profile = StackProf.run(sampling_mode, sampling_interval) do
|
145
|
+
MyCode.execute
|
146
|
+
end
|
147
|
+
```
|
148
|
+
|
149
|
+
this profile data structure is part of the public API, and is intended to be saved
|
150
|
+
(as json/marshal for example) for later processing. the reports above can be generated
|
151
|
+
by passing this structure into `StackProf::Report.new`.
|
152
|
+
|
153
|
+
the format itself is very simple. it contains a header and a list of frames. each frame has a unique id and
|
154
|
+
identifying information such as its name, file and line. the frame also contains sampling data, including per-line
|
155
|
+
samples, and a list of relationships to other frames represented as weighted edges.
|
156
|
+
|
157
|
+
```
|
158
|
+
{:version=>1.0,
|
159
|
+
:mode=>"cpu(1000)",
|
160
|
+
:samples=>188,
|
161
|
+
:frames=>
|
162
|
+
{70346498324780=>
|
163
|
+
{:name=>"A#pow",
|
164
|
+
:file=>"/Users/tmm1/code/stackprof/sample.rb",
|
165
|
+
:line=>11,
|
166
|
+
:total_samples=>91,
|
167
|
+
:samples=>91,
|
168
|
+
:lines=>{12=>91}},
|
169
|
+
70346498324900=>
|
170
|
+
{:name=>"A#initialize",
|
171
|
+
:file=>"/Users/tmm1/code/stackprof/sample.rb",
|
172
|
+
:line=>5,
|
173
|
+
:total_samples=>185,
|
174
|
+
:samples=>1,
|
175
|
+
:edges=>{70346498324780=>91, 70346498324680=>58, 70346498324580=>35},
|
176
|
+
:lines=>{8=>1}},
|
177
|
+
```
|
178
|
+
|
179
|
+
above, `A#pow` was involved in 91 samples, and in all cases it was at the top of the stack on line 12.
|
180
|
+
|
181
|
+
`A#initialize` was in 185 samples, but it was at the top of the stack in only 1 sample. the rest of the samples are
|
182
|
+
divided up between its callee edges. all 91 calls to `A#pow` came from `A#initialize`, as seen by the edge numbered
|
183
|
+
`70346498324780`.
|
184
|
+
|
185
|
+
### advanced usage
|
186
|
+
|
187
|
+
the profiler can be started, paused, resumed and stopped manually for greater control.
|
188
|
+
|
189
|
+
```
|
190
|
+
StackProf.running?
|
191
|
+
StackProf.start
|
192
|
+
StackProf.pause
|
193
|
+
StackProf.paused?
|
194
|
+
StackProf.resume
|
195
|
+
StackProf.stop
|
196
|
+
StackProf.results
|
197
|
+
```
|
data/Rakefile
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
task :default => :test
|
2
|
+
|
3
|
+
# ==========================================================
|
4
|
+
# Packaging
|
5
|
+
# ==========================================================
|
6
|
+
|
7
|
+
GEMSPEC = eval(File.read('stackprof.gemspec'))
|
8
|
+
|
9
|
+
require 'rubygems/package_task'
|
10
|
+
Gem::PackageTask.new(GEMSPEC) do |pkg|
|
11
|
+
end
|
12
|
+
|
13
|
+
# ==========================================================
|
14
|
+
# Ruby Extension
|
15
|
+
# ==========================================================
|
16
|
+
|
17
|
+
require 'rake/extensiontask'
|
18
|
+
Rake::ExtensionTask.new('stackprof', GEMSPEC) do |ext|
|
19
|
+
ext.ext_dir = 'ext'
|
20
|
+
end
|
21
|
+
task :build => :compile
|
22
|
+
|
23
|
+
# ==========================================================
|
24
|
+
# Testing
|
25
|
+
# ==========================================================
|
26
|
+
|
27
|
+
require 'rake/testtask'
|
28
|
+
Rake::TestTask.new 'test' do |t|
|
29
|
+
t.test_files = FileList['test/test_*.rb']
|
30
|
+
end
|
31
|
+
task :test => :build
|
data/bin/stackprof
ADDED
data/ext/extconf.rb
ADDED
data/ext/stackprof.c
ADDED
@@ -0,0 +1,315 @@
|
|
1
|
+
/**********************************************************************
|
2
|
+
|
3
|
+
stackprof.c - Sampling call-stack frame profiler for MRI.
|
4
|
+
|
5
|
+
$Author$
|
6
|
+
created at: Thu May 30 17:55:25 2013
|
7
|
+
|
8
|
+
NOTE: This extension library is not expected to exist except C Ruby.
|
9
|
+
|
10
|
+
All the files in this distribution are covered under the Ruby's
|
11
|
+
license (see the file COPYING).
|
12
|
+
|
13
|
+
**********************************************************************/
|
14
|
+
|
15
|
+
#include <ruby/ruby.h>
|
16
|
+
#include <ruby/debug.h>
|
17
|
+
#include <ruby/st.h>
|
18
|
+
#include <sys/time.h>
|
19
|
+
|
20
|
+
#define BUF_SIZE 2048
|
21
|
+
|
22
|
+
typedef struct {
|
23
|
+
size_t total_samples;
|
24
|
+
size_t caller_samples;
|
25
|
+
st_table *edges;
|
26
|
+
st_table *lines;
|
27
|
+
} frame_data_t;
|
28
|
+
|
29
|
+
static struct {
|
30
|
+
enum {
|
31
|
+
PROF_NONE = 0,
|
32
|
+
PROF_CPU,
|
33
|
+
PROF_WALL,
|
34
|
+
PROF_OBJECT
|
35
|
+
} type;
|
36
|
+
|
37
|
+
size_t overall_samples;
|
38
|
+
st_table *frames;
|
39
|
+
|
40
|
+
VALUE frames_buffer[BUF_SIZE];
|
41
|
+
int lines_buffer[BUF_SIZE];
|
42
|
+
} _results;
|
43
|
+
|
44
|
+
static VALUE sym_object, sym_wall, sym_name, sym_file, sym_line;
|
45
|
+
static VALUE sym_samples, sym_total_samples, sym_edges, sym_lines;
|
46
|
+
static VALUE sym_version, sym_mode, sym_frames;
|
47
|
+
static VALUE objtracer;
|
48
|
+
static VALUE gc_hook;
|
49
|
+
|
50
|
+
static void stackprof_newobj_handler(VALUE, void*);
|
51
|
+
static void stackprof_signal_handler(int sig, siginfo_t* sinfo, void* ucontext);
|
52
|
+
|
53
|
+
static VALUE
|
54
|
+
stackprof_start(VALUE self, VALUE type, VALUE usec)
|
55
|
+
{
|
56
|
+
if (type == sym_object) {
|
57
|
+
_results.type = PROF_OBJECT;
|
58
|
+
objtracer = rb_tracepoint_new(0, RUBY_INTERNAL_EVENT_NEWOBJ, stackprof_newobj_handler, 0);
|
59
|
+
rb_tracepoint_enable(objtracer);
|
60
|
+
} else {
|
61
|
+
if (type == sym_wall)
|
62
|
+
_results.type = PROF_WALL;
|
63
|
+
else
|
64
|
+
_results.type = PROF_CPU;
|
65
|
+
|
66
|
+
struct sigaction sa;
|
67
|
+
sa.sa_sigaction = stackprof_signal_handler;
|
68
|
+
sa.sa_flags = SA_RESTART | SA_SIGINFO;
|
69
|
+
sigemptyset(&sa.sa_mask);
|
70
|
+
sigaction(_results.type == PROF_WALL ? SIGALRM : SIGPROF, &sa, NULL);
|
71
|
+
|
72
|
+
struct itimerval timer;
|
73
|
+
timer.it_interval.tv_sec = 0;
|
74
|
+
timer.it_interval.tv_usec = NUM2LONG(usec);
|
75
|
+
timer.it_value = timer.it_interval;
|
76
|
+
setitimer(_results.type == PROF_WALL ? ITIMER_REAL : ITIMER_PROF, &timer, 0);
|
77
|
+
}
|
78
|
+
|
79
|
+
return Qnil;
|
80
|
+
}
|
81
|
+
|
82
|
+
static VALUE
|
83
|
+
stackprof_stop(VALUE self)
|
84
|
+
{
|
85
|
+
if (_results.type == PROF_OBJECT) {
|
86
|
+
rb_tracepoint_disable(objtracer);
|
87
|
+
} else {
|
88
|
+
struct itimerval timer;
|
89
|
+
memset(&timer, 0, sizeof(timer));
|
90
|
+
setitimer(_results.type == PROF_WALL ? ITIMER_REAL : ITIMER_PROF, &timer, 0);
|
91
|
+
|
92
|
+
struct sigaction sa;
|
93
|
+
sa.sa_handler = SIG_IGN;
|
94
|
+
sa.sa_flags = SA_RESTART;
|
95
|
+
sigemptyset(&sa.sa_mask);
|
96
|
+
sigaction(_results.type == PROF_WALL ? SIGALRM : SIGPROF, &sa, NULL);
|
97
|
+
}
|
98
|
+
|
99
|
+
return Qnil;
|
100
|
+
}
|
101
|
+
|
102
|
+
static int
|
103
|
+
frame_edges_i(st_data_t key, st_data_t val, st_data_t arg)
|
104
|
+
{
|
105
|
+
VALUE edges = (VALUE)arg;
|
106
|
+
|
107
|
+
intptr_t weight = (intptr_t)val;
|
108
|
+
rb_hash_aset(edges, rb_obj_id((VALUE)key), INT2FIX(weight));
|
109
|
+
return ST_CONTINUE;
|
110
|
+
}
|
111
|
+
|
112
|
+
static int
|
113
|
+
frame_lines_i(st_data_t key, st_data_t val, st_data_t arg)
|
114
|
+
{
|
115
|
+
VALUE lines = (VALUE)arg;
|
116
|
+
|
117
|
+
intptr_t weight = (intptr_t)val;
|
118
|
+
rb_hash_aset(lines, INT2FIX(key), INT2FIX(weight));
|
119
|
+
return ST_CONTINUE;
|
120
|
+
}
|
121
|
+
|
122
|
+
static int
|
123
|
+
frame_i(st_data_t key, st_data_t val, st_data_t arg)
|
124
|
+
{
|
125
|
+
VALUE frame = (VALUE)key;
|
126
|
+
frame_data_t *frame_data = (frame_data_t *)val;
|
127
|
+
VALUE results = (VALUE)arg;
|
128
|
+
VALUE details = rb_hash_new();
|
129
|
+
VALUE name, file, edges, lines;
|
130
|
+
VALUE label, method_name;
|
131
|
+
VALUE line;
|
132
|
+
|
133
|
+
rb_hash_aset(results, rb_obj_id(frame), details);
|
134
|
+
|
135
|
+
name = rb_profile_frame_full_label(frame);
|
136
|
+
rb_hash_aset(details, sym_name, name);
|
137
|
+
|
138
|
+
file = rb_profile_frame_absolute_path(frame);
|
139
|
+
if (NIL_P(file))
|
140
|
+
file = rb_profile_frame_path(frame);
|
141
|
+
rb_hash_aset(details, sym_file, file);
|
142
|
+
|
143
|
+
if ((line = rb_profile_frame_first_lineno(frame)) != INT2FIX(0))
|
144
|
+
rb_hash_aset(details, sym_line, line);
|
145
|
+
|
146
|
+
rb_hash_aset(details, sym_total_samples, SIZET2NUM(frame_data->total_samples));
|
147
|
+
rb_hash_aset(details, sym_samples, SIZET2NUM(frame_data->caller_samples));
|
148
|
+
|
149
|
+
if (frame_data->edges) {
|
150
|
+
edges = rb_hash_new();
|
151
|
+
rb_hash_aset(details, sym_edges, edges);
|
152
|
+
st_foreach(frame_data->edges, frame_edges_i, (st_data_t)edges);
|
153
|
+
st_free_table(frame_data->edges);
|
154
|
+
frame_data->edges = NULL;
|
155
|
+
}
|
156
|
+
|
157
|
+
if (frame_data->lines) {
|
158
|
+
lines = rb_hash_new();
|
159
|
+
rb_hash_aset(details, sym_lines, lines);
|
160
|
+
st_foreach(frame_data->lines, frame_lines_i, (st_data_t)lines);
|
161
|
+
st_free_table(frame_data->lines);
|
162
|
+
frame_data->lines = NULL;
|
163
|
+
}
|
164
|
+
|
165
|
+
xfree(frame_data);
|
166
|
+
return ST_DELETE;
|
167
|
+
}
|
168
|
+
|
169
|
+
static VALUE
|
170
|
+
stackprof_run(VALUE self, VALUE type, VALUE usec)
|
171
|
+
{
|
172
|
+
VALUE results, frames;
|
173
|
+
rb_need_block();
|
174
|
+
if (!_results.frames)
|
175
|
+
_results.frames = st_init_numtable();
|
176
|
+
_results.overall_samples = 0;
|
177
|
+
|
178
|
+
stackprof_start(self, type, usec);
|
179
|
+
rb_yield(Qundef);
|
180
|
+
stackprof_stop(self);
|
181
|
+
|
182
|
+
results = rb_hash_new();
|
183
|
+
rb_hash_aset(results, sym_version, DBL2NUM(1.0));
|
184
|
+
rb_hash_aset(results, sym_mode, rb_sprintf("%"PRIsVALUE"(%"PRIsVALUE")", type, usec));
|
185
|
+
rb_hash_aset(results, sym_samples, SIZET2NUM(_results.overall_samples));
|
186
|
+
|
187
|
+
frames = rb_hash_new();
|
188
|
+
rb_hash_aset(results, sym_frames, frames);
|
189
|
+
st_foreach(_results.frames, frame_i, (st_data_t)frames);
|
190
|
+
|
191
|
+
return results;
|
192
|
+
}
|
193
|
+
|
194
|
+
static inline frame_data_t *
|
195
|
+
sample_for(VALUE frame)
|
196
|
+
{
|
197
|
+
st_data_t key = (st_data_t)frame, val = 0;
|
198
|
+
frame_data_t *frame_data;
|
199
|
+
|
200
|
+
if (st_lookup(_results.frames, key, &val)) {
|
201
|
+
frame_data = (frame_data_t *)val;
|
202
|
+
} else {
|
203
|
+
frame_data = ALLOC_N(frame_data_t, 1);
|
204
|
+
MEMZERO(frame_data, frame_data_t, 1);
|
205
|
+
val = (st_data_t)frame_data;
|
206
|
+
st_insert(_results.frames, key, val);
|
207
|
+
}
|
208
|
+
|
209
|
+
return frame_data;
|
210
|
+
}
|
211
|
+
|
212
|
+
void
|
213
|
+
st_numtable_increment(st_table *table, st_data_t key)
|
214
|
+
{
|
215
|
+
intptr_t weight = 0;
|
216
|
+
st_lookup(table, key, (st_data_t *)&weight);
|
217
|
+
weight++;
|
218
|
+
st_insert(table, key, weight);
|
219
|
+
}
|
220
|
+
|
221
|
+
static void
|
222
|
+
stackprof_sample()
|
223
|
+
{
|
224
|
+
int num, i;
|
225
|
+
VALUE prev_frame;
|
226
|
+
st_data_t key;
|
227
|
+
|
228
|
+
_results.overall_samples++;
|
229
|
+
num = rb_profile_frames(0, sizeof(_results.frames_buffer), _results.frames_buffer, _results.lines_buffer);
|
230
|
+
|
231
|
+
for (i = 0; i < num; i++) {
|
232
|
+
int line = _results.lines_buffer[i];
|
233
|
+
VALUE frame = _results.frames_buffer[i];
|
234
|
+
frame_data_t *frame_data = sample_for(frame);
|
235
|
+
|
236
|
+
frame_data->total_samples++;
|
237
|
+
|
238
|
+
if (i == 0) {
|
239
|
+
frame_data->caller_samples++;
|
240
|
+
if (line > 0) {
|
241
|
+
if (!frame_data->lines)
|
242
|
+
frame_data->lines = st_init_numtable();
|
243
|
+
st_numtable_increment(frame_data->lines, (st_data_t)line);
|
244
|
+
}
|
245
|
+
} else {
|
246
|
+
if (!frame_data->edges)
|
247
|
+
frame_data->edges = st_init_numtable();
|
248
|
+
st_numtable_increment(frame_data->edges, (st_data_t)prev_frame);
|
249
|
+
}
|
250
|
+
|
251
|
+
prev_frame = frame;
|
252
|
+
}
|
253
|
+
}
|
254
|
+
|
255
|
+
static void
|
256
|
+
stackprof_job_handler(void *data)
|
257
|
+
{
|
258
|
+
static int in_signal_handler = 0;
|
259
|
+
if (in_signal_handler) return;
|
260
|
+
|
261
|
+
in_signal_handler++;
|
262
|
+
stackprof_sample();
|
263
|
+
in_signal_handler--;
|
264
|
+
}
|
265
|
+
|
266
|
+
static void
|
267
|
+
stackprof_signal_handler(int sig, siginfo_t *sinfo, void *ucontext)
|
268
|
+
{
|
269
|
+
rb_postponed_job_register_one(0, stackprof_job_handler, 0);
|
270
|
+
}
|
271
|
+
|
272
|
+
static void
|
273
|
+
stackprof_newobj_handler(VALUE tpval, void *data)
|
274
|
+
{
|
275
|
+
stackprof_job_handler(0);
|
276
|
+
}
|
277
|
+
|
278
|
+
static int
|
279
|
+
frame_mark_i(st_data_t key, st_data_t val, st_data_t arg)
|
280
|
+
{
|
281
|
+
VALUE frame = (VALUE)key;
|
282
|
+
rb_gc_mark_maybe(frame);
|
283
|
+
return ST_CONTINUE;
|
284
|
+
}
|
285
|
+
|
286
|
+
static void
|
287
|
+
stackprof_gc_mark()
|
288
|
+
{
|
289
|
+
if (_results.frames)
|
290
|
+
st_foreach(_results.frames, frame_mark_i, 0);
|
291
|
+
}
|
292
|
+
|
293
|
+
void
|
294
|
+
Init_stackprof(void)
|
295
|
+
{
|
296
|
+
sym_object = ID2SYM(rb_intern("object"));
|
297
|
+
sym_name = ID2SYM(rb_intern("name"));
|
298
|
+
sym_wall = ID2SYM(rb_intern("wall"));
|
299
|
+
sym_file = ID2SYM(rb_intern("file"));
|
300
|
+
sym_line = ID2SYM(rb_intern("line"));
|
301
|
+
sym_total_samples = ID2SYM(rb_intern("total_samples"));
|
302
|
+
sym_samples = ID2SYM(rb_intern("samples"));
|
303
|
+
sym_edges = ID2SYM(rb_intern("edges"));
|
304
|
+
sym_lines = ID2SYM(rb_intern("lines"));
|
305
|
+
sym_version = ID2SYM(rb_intern("version"));
|
306
|
+
sym_mode = ID2SYM(rb_intern("mode"));
|
307
|
+
sym_frames = ID2SYM(rb_intern("frames"));
|
308
|
+
|
309
|
+
gc_hook = Data_Wrap_Struct(rb_cObject, stackprof_gc_mark, NULL, NULL);
|
310
|
+
rb_global_variable(&gc_hook);
|
311
|
+
|
312
|
+
VALUE rb_mStackProf = rb_define_module("StackProf");
|
313
|
+
rb_define_singleton_method(rb_mStackProf, "run", stackprof_run, 2);
|
314
|
+
rb_autoload(rb_mStackProf, rb_intern_const("Report"), "stackprof/report.rb");
|
315
|
+
}
|
@@ -0,0 +1,71 @@
|
|
1
|
+
require 'pp'
|
2
|
+
|
3
|
+
module StackProf
|
4
|
+
class Report
|
5
|
+
def initialize(data)
|
6
|
+
@data = data
|
7
|
+
end
|
8
|
+
|
9
|
+
def frames
|
10
|
+
@data[:frames].sort_by{ |iseq, stats| -stats[:samples] }
|
11
|
+
end
|
12
|
+
|
13
|
+
def overall_samples
|
14
|
+
@data[:samples]
|
15
|
+
end
|
16
|
+
|
17
|
+
def print_debug
|
18
|
+
pp @data
|
19
|
+
end
|
20
|
+
|
21
|
+
def print_graphviz
|
22
|
+
f = STDOUT
|
23
|
+
f.puts "digraph profile {"
|
24
|
+
frames.each do |frame, info|
|
25
|
+
call, total = info.values_at(:samples, :total_samples)
|
26
|
+
sample = ''
|
27
|
+
sample << "#{call} (%2.1f%%)\\rof " % (call*100.0/overall_samples) if call < total
|
28
|
+
sample << "#{total} (%2.1f%%)\\r" % (total*100.0/overall_samples)
|
29
|
+
size = (1.0 * call / overall_samples) * 28 + 10
|
30
|
+
|
31
|
+
f.puts " #{frame} [size=#{size}] [fontsize=#{size}] [shape=box] [label=\"#{info[:name]}\\n#{sample}\"];"
|
32
|
+
if edges = info[:edges]
|
33
|
+
edges.each do |edge, weight|
|
34
|
+
size = (1.0 * weight / overall_samples) * 28
|
35
|
+
f.puts " #{frame} -> #{edge} [label=\"#{weight}\"];"
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
f.puts "}"
|
40
|
+
end
|
41
|
+
|
42
|
+
def print_text
|
43
|
+
printf "% 10s (pct) % 10s (pct) FRAME\n" % ["TOTAL", "SAMPLES"]
|
44
|
+
frames.each do |frame, info|
|
45
|
+
call, total = info.values_at(:samples, :total_samples)
|
46
|
+
printf "% 10d % 8s % 10d % 8s %s\n", total, "(%2.1f%%)" % (total*100.0/overall_samples), call, "(%2.1f%%)" % (call*100.0/overall_samples), info[:name]
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def print_source(name)
|
51
|
+
name = /#{Regexp.escape name}/ unless Regexp === name
|
52
|
+
frames.each do |frame, info|
|
53
|
+
next unless info[:name] =~ name
|
54
|
+
file, line = info.values_at(:file, :line)
|
55
|
+
|
56
|
+
maxline = info[:lines] ? info[:lines].keys.max : line + 5
|
57
|
+
printf "%s (%s:%d)\n", info[:name], file, line
|
58
|
+
|
59
|
+
lines = info[:lines]
|
60
|
+
source = File.readlines(file).each_with_index do |code, i|
|
61
|
+
next unless (line-1..maxline).include?(i)
|
62
|
+
if lines and samples = lines[i+1]
|
63
|
+
printf "% 5d % 7s / % 7s | % 5d | %s", samples, "(%2.1f%%" % (100.0*samples/overall_samples), "%2.1f%%)" % (100.0*samples/info[:samples]), i+1, code
|
64
|
+
else
|
65
|
+
printf " | % 5d | %s", i+1, code
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
data/sample.rb
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
$:.unshift File.expand_path('../lib', __FILE__)
|
2
|
+
require 'stackprof'
|
3
|
+
|
4
|
+
class A
|
5
|
+
def initialize
|
6
|
+
pow
|
7
|
+
self.class.newobj
|
8
|
+
math
|
9
|
+
end
|
10
|
+
|
11
|
+
def pow
|
12
|
+
2 ** 100
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.newobj
|
16
|
+
Object.new
|
17
|
+
Object.new
|
18
|
+
end
|
19
|
+
|
20
|
+
def math
|
21
|
+
2.times do
|
22
|
+
2 + 3 * 4 ^ 5 / 6
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
#profile = StackProf.run(:object, 1) do
|
28
|
+
#profile = StackProf.run(:wall, 1000) do
|
29
|
+
profile = StackProf.run(:cpu, 1000) do
|
30
|
+
1_000_000.times do
|
31
|
+
A.new
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
result = StackProf::Report.new(profile)
|
36
|
+
puts
|
37
|
+
result.print_source(/pow|newobj|math/)
|
38
|
+
puts
|
39
|
+
result.print_text
|
40
|
+
puts
|
41
|
+
result.print_graphviz
|
42
|
+
puts
|
43
|
+
result.print_debug
|
data/stackprof.gemspec
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = 'stackprof'
|
3
|
+
s.version = '0.1.0'
|
4
|
+
s.homepage = 'http://github.com/tmm1/stackprof'
|
5
|
+
|
6
|
+
s.authors = 'Aman Gupta'
|
7
|
+
s.email = 'aman@tmm1.net'
|
8
|
+
|
9
|
+
s.files = `git ls-files`.split("\n")
|
10
|
+
s.extensions = 'ext/extconf.rb'
|
11
|
+
|
12
|
+
s.bindir = 'bin'
|
13
|
+
s.executables << 'stackprof'
|
14
|
+
|
15
|
+
s.summary = 'sampling callstack-profiler for ruby 2.1+'
|
16
|
+
s.description = 'stackprof is a fast sampling profiler for ruby code, with cpu, wallclock and object allocation samplers.'
|
17
|
+
|
18
|
+
s.license = 'MIT'
|
19
|
+
|
20
|
+
s.add_development_dependency 'rake-compiler'
|
21
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
$:.unshift File.expand_path('../../lib', __FILE__)
|
2
|
+
require 'stackprof'
|
3
|
+
require 'test/unit'
|
4
|
+
|
5
|
+
class StackProfTest < Test::Unit::TestCase
|
6
|
+
def test_info
|
7
|
+
profile = StackProf.run(:wall, 1000){}
|
8
|
+
assert_equal 1.0, profile[:version]
|
9
|
+
assert_equal "wall(1000)", profile[:mode]
|
10
|
+
assert_equal 0, profile[:samples]
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_object_allocation
|
14
|
+
profile = StackProf.run(:object, 1) do
|
15
|
+
Object.new
|
16
|
+
Object.new
|
17
|
+
end
|
18
|
+
assert_equal "object(1)", profile[:mode]
|
19
|
+
assert_equal 2, profile[:samples]
|
20
|
+
|
21
|
+
frame = profile[:frames].values.first
|
22
|
+
assert_equal "block in StackProfTest#test_object_allocation", frame[:name]
|
23
|
+
assert_equal 2, frame[:samples]
|
24
|
+
assert_equal 14, frame[:line]
|
25
|
+
assert_equal 1, frame[:lines][15]
|
26
|
+
assert_equal 1, frame[:lines][16]
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_cputime
|
30
|
+
profile = StackProf.run(:cpu, 1000) do
|
31
|
+
math
|
32
|
+
end
|
33
|
+
|
34
|
+
frame = profile[:frames].values.first
|
35
|
+
assert_equal "block in StackProfTest#math", frame[:name]
|
36
|
+
end
|
37
|
+
|
38
|
+
def test_walltime
|
39
|
+
profile = StackProf.run(:wall, 1000) do
|
40
|
+
idle
|
41
|
+
end
|
42
|
+
|
43
|
+
frame = profile[:frames].values.first
|
44
|
+
assert_equal "StackProfTest#idle", frame[:name]
|
45
|
+
assert_in_delta 200, frame[:samples], 5
|
46
|
+
end
|
47
|
+
|
48
|
+
def math
|
49
|
+
250_000.times do
|
50
|
+
2 ** 10
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def idle
|
55
|
+
sleep 0.2
|
56
|
+
end
|
57
|
+
end
|
metadata
ADDED
@@ -0,0 +1,72 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: stackprof
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Aman Gupta
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-10-14 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rake-compiler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
description: stackprof is a fast sampling profiler for ruby code, with cpu, wallclock
|
28
|
+
and object allocation samplers.
|
29
|
+
email: aman@tmm1.net
|
30
|
+
executables:
|
31
|
+
- stackprof
|
32
|
+
extensions:
|
33
|
+
- ext/extconf.rb
|
34
|
+
extra_rdoc_files: []
|
35
|
+
files:
|
36
|
+
- ".gitignore"
|
37
|
+
- Gemfile
|
38
|
+
- Gemfile.lock
|
39
|
+
- README.md
|
40
|
+
- Rakefile
|
41
|
+
- bin/stackprof
|
42
|
+
- ext/extconf.rb
|
43
|
+
- ext/stackprof.c
|
44
|
+
- lib/stackprof/report.rb
|
45
|
+
- sample.rb
|
46
|
+
- stackprof.gemspec
|
47
|
+
- test/test_stackprof.rb
|
48
|
+
homepage: http://github.com/tmm1/stackprof
|
49
|
+
licenses:
|
50
|
+
- MIT
|
51
|
+
metadata: {}
|
52
|
+
post_install_message:
|
53
|
+
rdoc_options: []
|
54
|
+
require_paths:
|
55
|
+
- lib
|
56
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
57
|
+
requirements:
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: '0'
|
61
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
62
|
+
requirements:
|
63
|
+
- - ">="
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
version: '0'
|
66
|
+
requirements: []
|
67
|
+
rubyforge_project:
|
68
|
+
rubygems_version: 2.2.0.preview.1
|
69
|
+
signing_key:
|
70
|
+
specification_version: 4
|
71
|
+
summary: sampling callstack-profiler for ruby 2.1+
|
72
|
+
test_files: []
|