stackprof 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +2 -0
- data/Gemfile +2 -0
- data/Gemfile.lock +18 -0
- data/README.md +197 -0
- data/Rakefile +31 -0
- data/bin/stackprof +3 -0
- data/ext/extconf.rb +2 -0
- data/ext/stackprof.c +315 -0
- data/lib/stackprof/report.rb +71 -0
- data/sample.rb +43 -0
- data/stackprof.gemspec +21 -0
- data/test/test_stackprof.rb +57 -0
- metadata +72 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 1e9663289cf7257e1ce593f22f1bc635b0d4406d
|
4
|
+
data.tar.gz: 56484ba1a6c48b2c9c41bb056500b7aa432db974
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 4a0062a181c777976147420395729d65eff6fa4ef10a6624153629bcaed7a5ed8c6b071e2e30176d6a28b3d6c0dcd4ed1ab5b4cc8373fbdbc814ed57dff20a93
|
7
|
+
data.tar.gz: 1e004d7757b016d00cda696be9fe6a4769aa11b97651228069d63df4193219c16bbc2f9c44800234d9d98855db04dde6f1cbcc68a7c1913949c43f18c68a419e
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
data/README.md
ADDED
@@ -0,0 +1,197 @@
|
|
1
|
+
## stackprof
|
2
|
+
|
3
|
+
a sampling call-stack profiler for ruby 2.1+
|
4
|
+
|
5
|
+
inspired heavily by [gperftools](https://code.google.com/p/gperftools/),
|
6
|
+
and written as a replacement for [perftools.rb](https://github.com/tmm1/perftools.rb)
|
7
|
+
|
8
|
+
### sampling
|
9
|
+
|
10
|
+
three sampling modes are supported:
|
11
|
+
|
12
|
+
- cpu time (using `ITIMER_PROF` and `SIGPROF`)
|
13
|
+
- wall time (using `ITIMER_REAL` and `SIGALRM`)
|
14
|
+
- object allocation (using `RUBY_INTERNAL_EVENT_NEWOBJ`)
|
15
|
+
|
16
|
+
samplers have a tuneable interval which can be used to reduce overhead or increase granularity:
|
17
|
+
|
18
|
+
- cpu time: sample every _interval_ microseconds of cpu activity (default: 10000 = 10 milliseconds)
|
19
|
+
- wall time: sample every _interval_ microseconds of wallclock time (default: 10000)
|
20
|
+
- object allocation: sample every _interval_ allocations (default: 1)
|
21
|
+
|
22
|
+
samples are taken using a combination of three new C-APIs in ruby 2.1:
|
23
|
+
|
24
|
+
- signal handlers enqueue a sampling job using `rb_postponed_job_register_one`.
|
25
|
+
this ensures callstack samples can be taken safely, in case the VM is garbage collecting
|
26
|
+
or in some other inconsistent state during the interruption.
|
27
|
+
|
28
|
+
- stack frames are collected via `rb_profile_frames`, which provides low-overhead C-API access
|
29
|
+
to the VM's call stack. no object allocations occur in this path, allowing stackprof to collect
|
30
|
+
callstacks in allocation mode.
|
31
|
+
|
32
|
+
- in allocation mode, samples are taken via `rb_tracepoint_new(RUBY_INTERNAL_EVENT_NEWOBJ)`,
|
33
|
+
which provides a notification every time the VM allocates a new object.
|
34
|
+
|
35
|
+
### aggregation
|
36
|
+
|
37
|
+
each sample consists of N stack frames, where a frame looks something like `MyClass#method` or `block in MySingleton.method`.
|
38
|
+
for each of these frames in the sample, the profiler collects a few pieces of metadata:
|
39
|
+
|
40
|
+
- samples: number of samples where this was the topmost frame
|
41
|
+
- total_samples: samples where this frame was in the stack
|
42
|
+
- lines: samples per line number in this frame
|
43
|
+
- edges: samples per callee frame (methods invoked by this frame)
|
44
|
+
|
45
|
+
the aggregation algorithm is roughly equivalent to the following pseudo code:
|
46
|
+
|
47
|
+
``` ruby
|
48
|
+
trap('PROF') do
|
49
|
+
top, *rest = caller
|
50
|
+
|
51
|
+
top.samples += 1
|
52
|
+
top.lines[top.lineno] += 1
|
53
|
+
top.total_samples += 1
|
54
|
+
|
55
|
+
prev = top
|
56
|
+
rest.each do |frame|
|
57
|
+
frame.edges[prev] += 1
|
58
|
+
frame.total_samples += 1
|
59
|
+
prev = frame
|
60
|
+
end
|
61
|
+
end
|
62
|
+
```
|
63
|
+
|
64
|
+
this technique builds up an incremental callgraph from the samples. on any given frame,
|
65
|
+
the sum of the outbound edge weights is equal to total samples collected on that frame
|
66
|
+
(`frame.total_samples == frame.edges.values.sum`).
|
67
|
+
|
68
|
+
### reporting
|
69
|
+
|
70
|
+
three reporting modes are supported:
|
71
|
+
- text
|
72
|
+
- dotgraph
|
73
|
+
- source annotation
|
74
|
+
|
75
|
+
#### `StackProf::Report.new(data).print_text`
|
76
|
+
|
77
|
+
```
|
78
|
+
TOTAL (pct) SAMPLES (pct) FRAME
|
79
|
+
91 (48.4%) 91 (48.4%) A#pow
|
80
|
+
58 (30.9%) 58 (30.9%) A.newobj
|
81
|
+
34 (18.1%) 34 (18.1%) block in A#math
|
82
|
+
188 (100.0%) 3 (1.6%) block (2 levels) in <main>
|
83
|
+
185 (98.4%) 1 (0.5%) A#initialize
|
84
|
+
35 (18.6%) 1 (0.5%) A#math
|
85
|
+
188 (100.0%) 0 (0.0%) <main>
|
86
|
+
188 (100.0%) 0 (0.0%) block in <main>
|
87
|
+
188 (100.0%) 0 (0.0%) <main>
|
88
|
+
```
|
89
|
+
|
90
|
+
#### `StackProf::Report.new(data).print_graphviz`
|
91
|
+
|
92
|
+

|
93
|
+
|
94
|
+
```
|
95
|
+
digraph profile {
|
96
|
+
70346498324780 [size=23.5531914893617] [fontsize=23.5531914893617] [shape=box] [label="A#pow\n91 (48.4%)\r"];
|
97
|
+
70346498324680 [size=18.638297872340424] [fontsize=18.638297872340424] [shape=box] [label="A.newobj\n58 (30.9%)\r"];
|
98
|
+
70346498324480 [size=15.063829787234042] [fontsize=15.063829787234042] [shape=box] [label="block in A#math\n34 (18.1%)\r"];
|
99
|
+
70346498324220 [size=10.446808510638299] [fontsize=10.446808510638299] [shape=box] [label="block (2 levels) in <main>\n3 (1.6%)\rof 188 (100.0%)\r"];
|
100
|
+
70346498324220 -> 70346498324900 [label="185"];
|
101
|
+
70346498324900 [size=10.148936170212766] [fontsize=10.148936170212766] [shape=box] [label="A#initialize\n1 (0.5%)\rof 185 (98.4%)\r"];
|
102
|
+
70346498324900 -> 70346498324780 [label="91"];
|
103
|
+
70346498324900 -> 70346498324680 [label="58"];
|
104
|
+
70346498324900 -> 70346498324580 [label="35"];
|
105
|
+
70346498324580 [size=10.148936170212766] [fontsize=10.148936170212766] [shape=box] [label="A#math\n1 (0.5%)\rof 35 (18.6%)\r"];
|
106
|
+
70346498324580 -> 70346498324480 [label="34"];
|
107
|
+
70346497983360 [size=10.0] [fontsize=10.0] [shape=box] [label="<main>\n0 (0.0%)\rof 188 (100.0%)\r"];
|
108
|
+
70346497983360 -> 70346498325080 [label="188"];
|
109
|
+
70346498324300 [size=10.0] [fontsize=10.0] [shape=box] [label="block in <main>\n0 (0.0%)\rof 188 (100.0%)\r"];
|
110
|
+
70346498324300 -> 70346498324220 [label="188"];
|
111
|
+
70346498325080 [size=10.0] [fontsize=10.0] [shape=box] [label="<main>\n0 (0.0%)\rof 188 (100.0%)\r"];
|
112
|
+
70346498325080 -> 70346498324300 [label="188"];
|
113
|
+
}
|
114
|
+
```
|
115
|
+
|
116
|
+
#### `StackProf::Report.new(data).print_source(/pow|newobj|math/)`
|
117
|
+
|
118
|
+
```
|
119
|
+
A#pow (/Users/tmm1/code/stackprof/sample.rb:11)
|
120
|
+
| 11 | def pow
|
121
|
+
91 (48.4% / 100.0%) | 12 | 2 ** 100
|
122
|
+
| 13 | end
|
123
|
+
A.newobj (/Users/tmm1/code/stackprof/sample.rb:15)
|
124
|
+
| 15 | def self.newobj
|
125
|
+
33 (17.6% / 56.9%) | 16 | Object.new
|
126
|
+
25 (13.3% / 43.1%) | 17 | Object.new
|
127
|
+
| 18 | end
|
128
|
+
A#math (/Users/tmm1/code/stackprof/sample.rb:20)
|
129
|
+
| 20 | def math
|
130
|
+
1 (0.5% / 100.0%) | 21 | 2.times do
|
131
|
+
| 22 | 2 + 3 * 4 ^ 5 / 6
|
132
|
+
block in A#math (/Users/tmm1/code/stackprof/sample.rb:21)
|
133
|
+
| 21 | 2.times do
|
134
|
+
34 (18.1% / 100.0%) | 22 | 2 + 3 * 4 ^ 5 / 6
|
135
|
+
| 23 | end
|
136
|
+
```
|
137
|
+
|
138
|
+
### usage
|
139
|
+
|
140
|
+
the profiler is compiled as a C-extension and exposes a simple api: `StackProf.run(mode, interval)`.
|
141
|
+
the `run` method takes a block of code and returns a profile as a simple hash.
|
142
|
+
|
143
|
+
``` ruby
|
144
|
+
profile = StackProf.run(sampling_mode, sampling_interval) do
|
145
|
+
MyCode.execute
|
146
|
+
end
|
147
|
+
```
|
148
|
+
|
149
|
+
this profile data structure is part of the public API, and is intended to be saved
|
150
|
+
(as json/marshal for example) for later processing. the reports above can be generated
|
151
|
+
by passing this structure into `StackProf::Report.new`.
|
152
|
+
|
153
|
+
the format itself is very simple. it contains a header and a list of frames. each frame has a unique id and
|
154
|
+
identifying information such as its name, file and line. the frame also contains sampling data, including per-line
|
155
|
+
samples, and a list of relationships to other frames represented as weighted edges.
|
156
|
+
|
157
|
+
```
|
158
|
+
{:version=>1.0,
|
159
|
+
:mode=>"cpu(1000)",
|
160
|
+
:samples=>188,
|
161
|
+
:frames=>
|
162
|
+
{70346498324780=>
|
163
|
+
{:name=>"A#pow",
|
164
|
+
:file=>"/Users/tmm1/code/stackprof/sample.rb",
|
165
|
+
:line=>11,
|
166
|
+
:total_samples=>91,
|
167
|
+
:samples=>91,
|
168
|
+
:lines=>{12=>91}},
|
169
|
+
70346498324900=>
|
170
|
+
{:name=>"A#initialize",
|
171
|
+
:file=>"/Users/tmm1/code/stackprof/sample.rb",
|
172
|
+
:line=>5,
|
173
|
+
:total_samples=>185,
|
174
|
+
:samples=>1,
|
175
|
+
:edges=>{70346498324780=>91, 70346498324680=>58, 70346498324580=>35},
|
176
|
+
:lines=>{8=>1}},
|
177
|
+
```
|
178
|
+
|
179
|
+
above, `A#pow` was involved in 91 samples, and in all cases it was at the top of the stack on line 12.
|
180
|
+
|
181
|
+
`A#initialize` was in 185 samples, but it was at the top of the stack in only 1 sample. the rest of the samples are
|
182
|
+
divided up between its callee edges. all 91 calls to `A#pow` came from `A#initialize`, as seen by the edge numbered
|
183
|
+
`70346498324780`.
|
184
|
+
|
185
|
+
### advanced usage
|
186
|
+
|
187
|
+
the profiler can be started, paused, resumed and stopped manually for greater control.
|
188
|
+
|
189
|
+
```
|
190
|
+
StackProf.running?
|
191
|
+
StackProf.start
|
192
|
+
StackProf.pause
|
193
|
+
StackProf.paused?
|
194
|
+
StackProf.resume
|
195
|
+
StackProf.stop
|
196
|
+
StackProf.results
|
197
|
+
```
|
data/Rakefile
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
task :default => :test
|
2
|
+
|
3
|
+
# ==========================================================
|
4
|
+
# Packaging
|
5
|
+
# ==========================================================
|
6
|
+
|
7
|
+
GEMSPEC = eval(File.read('stackprof.gemspec'))
|
8
|
+
|
9
|
+
require 'rubygems/package_task'
|
10
|
+
Gem::PackageTask.new(GEMSPEC) do |pkg|
|
11
|
+
end
|
12
|
+
|
13
|
+
# ==========================================================
|
14
|
+
# Ruby Extension
|
15
|
+
# ==========================================================
|
16
|
+
|
17
|
+
require 'rake/extensiontask'
|
18
|
+
Rake::ExtensionTask.new('stackprof', GEMSPEC) do |ext|
|
19
|
+
ext.ext_dir = 'ext'
|
20
|
+
end
|
21
|
+
task :build => :compile
|
22
|
+
|
23
|
+
# ==========================================================
|
24
|
+
# Testing
|
25
|
+
# ==========================================================
|
26
|
+
|
27
|
+
require 'rake/testtask'
|
28
|
+
Rake::TestTask.new 'test' do |t|
|
29
|
+
t.test_files = FileList['test/test_*.rb']
|
30
|
+
end
|
31
|
+
task :test => :build
|
data/bin/stackprof
ADDED
data/ext/extconf.rb
ADDED
data/ext/stackprof.c
ADDED
@@ -0,0 +1,315 @@
|
|
1
|
+
/**********************************************************************
|
2
|
+
|
3
|
+
stackprof.c - Sampling call-stack frame profiler for MRI.
|
4
|
+
|
5
|
+
$Author$
|
6
|
+
created at: Thu May 30 17:55:25 2013
|
7
|
+
|
8
|
+
NOTE: This extension library is not expected to exist except C Ruby.
|
9
|
+
|
10
|
+
All the files in this distribution are covered under the Ruby's
|
11
|
+
license (see the file COPYING).
|
12
|
+
|
13
|
+
**********************************************************************/
|
14
|
+
|
15
|
+
#include <ruby/ruby.h>
|
16
|
+
#include <ruby/debug.h>
|
17
|
+
#include <ruby/st.h>
|
18
|
+
#include <sys/time.h>
|
19
|
+
|
20
|
+
#define BUF_SIZE 2048
|
21
|
+
|
22
|
+
typedef struct {
|
23
|
+
size_t total_samples;
|
24
|
+
size_t caller_samples;
|
25
|
+
st_table *edges;
|
26
|
+
st_table *lines;
|
27
|
+
} frame_data_t;
|
28
|
+
|
29
|
+
static struct {
|
30
|
+
enum {
|
31
|
+
PROF_NONE = 0,
|
32
|
+
PROF_CPU,
|
33
|
+
PROF_WALL,
|
34
|
+
PROF_OBJECT
|
35
|
+
} type;
|
36
|
+
|
37
|
+
size_t overall_samples;
|
38
|
+
st_table *frames;
|
39
|
+
|
40
|
+
VALUE frames_buffer[BUF_SIZE];
|
41
|
+
int lines_buffer[BUF_SIZE];
|
42
|
+
} _results;
|
43
|
+
|
44
|
+
static VALUE sym_object, sym_wall, sym_name, sym_file, sym_line;
|
45
|
+
static VALUE sym_samples, sym_total_samples, sym_edges, sym_lines;
|
46
|
+
static VALUE sym_version, sym_mode, sym_frames;
|
47
|
+
static VALUE objtracer;
|
48
|
+
static VALUE gc_hook;
|
49
|
+
|
50
|
+
static void stackprof_newobj_handler(VALUE, void*);
|
51
|
+
static void stackprof_signal_handler(int sig, siginfo_t* sinfo, void* ucontext);
|
52
|
+
|
53
|
+
static VALUE
|
54
|
+
stackprof_start(VALUE self, VALUE type, VALUE usec)
|
55
|
+
{
|
56
|
+
if (type == sym_object) {
|
57
|
+
_results.type = PROF_OBJECT;
|
58
|
+
objtracer = rb_tracepoint_new(0, RUBY_INTERNAL_EVENT_NEWOBJ, stackprof_newobj_handler, 0);
|
59
|
+
rb_tracepoint_enable(objtracer);
|
60
|
+
} else {
|
61
|
+
if (type == sym_wall)
|
62
|
+
_results.type = PROF_WALL;
|
63
|
+
else
|
64
|
+
_results.type = PROF_CPU;
|
65
|
+
|
66
|
+
struct sigaction sa;
|
67
|
+
sa.sa_sigaction = stackprof_signal_handler;
|
68
|
+
sa.sa_flags = SA_RESTART | SA_SIGINFO;
|
69
|
+
sigemptyset(&sa.sa_mask);
|
70
|
+
sigaction(_results.type == PROF_WALL ? SIGALRM : SIGPROF, &sa, NULL);
|
71
|
+
|
72
|
+
struct itimerval timer;
|
73
|
+
timer.it_interval.tv_sec = 0;
|
74
|
+
timer.it_interval.tv_usec = NUM2LONG(usec);
|
75
|
+
timer.it_value = timer.it_interval;
|
76
|
+
setitimer(_results.type == PROF_WALL ? ITIMER_REAL : ITIMER_PROF, &timer, 0);
|
77
|
+
}
|
78
|
+
|
79
|
+
return Qnil;
|
80
|
+
}
|
81
|
+
|
82
|
+
static VALUE
|
83
|
+
stackprof_stop(VALUE self)
|
84
|
+
{
|
85
|
+
if (_results.type == PROF_OBJECT) {
|
86
|
+
rb_tracepoint_disable(objtracer);
|
87
|
+
} else {
|
88
|
+
struct itimerval timer;
|
89
|
+
memset(&timer, 0, sizeof(timer));
|
90
|
+
setitimer(_results.type == PROF_WALL ? ITIMER_REAL : ITIMER_PROF, &timer, 0);
|
91
|
+
|
92
|
+
struct sigaction sa;
|
93
|
+
sa.sa_handler = SIG_IGN;
|
94
|
+
sa.sa_flags = SA_RESTART;
|
95
|
+
sigemptyset(&sa.sa_mask);
|
96
|
+
sigaction(_results.type == PROF_WALL ? SIGALRM : SIGPROF, &sa, NULL);
|
97
|
+
}
|
98
|
+
|
99
|
+
return Qnil;
|
100
|
+
}
|
101
|
+
|
102
|
+
static int
|
103
|
+
frame_edges_i(st_data_t key, st_data_t val, st_data_t arg)
|
104
|
+
{
|
105
|
+
VALUE edges = (VALUE)arg;
|
106
|
+
|
107
|
+
intptr_t weight = (intptr_t)val;
|
108
|
+
rb_hash_aset(edges, rb_obj_id((VALUE)key), INT2FIX(weight));
|
109
|
+
return ST_CONTINUE;
|
110
|
+
}
|
111
|
+
|
112
|
+
static int
|
113
|
+
frame_lines_i(st_data_t key, st_data_t val, st_data_t arg)
|
114
|
+
{
|
115
|
+
VALUE lines = (VALUE)arg;
|
116
|
+
|
117
|
+
intptr_t weight = (intptr_t)val;
|
118
|
+
rb_hash_aset(lines, INT2FIX(key), INT2FIX(weight));
|
119
|
+
return ST_CONTINUE;
|
120
|
+
}
|
121
|
+
|
122
|
+
static int
|
123
|
+
frame_i(st_data_t key, st_data_t val, st_data_t arg)
|
124
|
+
{
|
125
|
+
VALUE frame = (VALUE)key;
|
126
|
+
frame_data_t *frame_data = (frame_data_t *)val;
|
127
|
+
VALUE results = (VALUE)arg;
|
128
|
+
VALUE details = rb_hash_new();
|
129
|
+
VALUE name, file, edges, lines;
|
130
|
+
VALUE label, method_name;
|
131
|
+
VALUE line;
|
132
|
+
|
133
|
+
rb_hash_aset(results, rb_obj_id(frame), details);
|
134
|
+
|
135
|
+
name = rb_profile_frame_full_label(frame);
|
136
|
+
rb_hash_aset(details, sym_name, name);
|
137
|
+
|
138
|
+
file = rb_profile_frame_absolute_path(frame);
|
139
|
+
if (NIL_P(file))
|
140
|
+
file = rb_profile_frame_path(frame);
|
141
|
+
rb_hash_aset(details, sym_file, file);
|
142
|
+
|
143
|
+
if ((line = rb_profile_frame_first_lineno(frame)) != INT2FIX(0))
|
144
|
+
rb_hash_aset(details, sym_line, line);
|
145
|
+
|
146
|
+
rb_hash_aset(details, sym_total_samples, SIZET2NUM(frame_data->total_samples));
|
147
|
+
rb_hash_aset(details, sym_samples, SIZET2NUM(frame_data->caller_samples));
|
148
|
+
|
149
|
+
if (frame_data->edges) {
|
150
|
+
edges = rb_hash_new();
|
151
|
+
rb_hash_aset(details, sym_edges, edges);
|
152
|
+
st_foreach(frame_data->edges, frame_edges_i, (st_data_t)edges);
|
153
|
+
st_free_table(frame_data->edges);
|
154
|
+
frame_data->edges = NULL;
|
155
|
+
}
|
156
|
+
|
157
|
+
if (frame_data->lines) {
|
158
|
+
lines = rb_hash_new();
|
159
|
+
rb_hash_aset(details, sym_lines, lines);
|
160
|
+
st_foreach(frame_data->lines, frame_lines_i, (st_data_t)lines);
|
161
|
+
st_free_table(frame_data->lines);
|
162
|
+
frame_data->lines = NULL;
|
163
|
+
}
|
164
|
+
|
165
|
+
xfree(frame_data);
|
166
|
+
return ST_DELETE;
|
167
|
+
}
|
168
|
+
|
169
|
+
static VALUE
|
170
|
+
stackprof_run(VALUE self, VALUE type, VALUE usec)
|
171
|
+
{
|
172
|
+
VALUE results, frames;
|
173
|
+
rb_need_block();
|
174
|
+
if (!_results.frames)
|
175
|
+
_results.frames = st_init_numtable();
|
176
|
+
_results.overall_samples = 0;
|
177
|
+
|
178
|
+
stackprof_start(self, type, usec);
|
179
|
+
rb_yield(Qundef);
|
180
|
+
stackprof_stop(self);
|
181
|
+
|
182
|
+
results = rb_hash_new();
|
183
|
+
rb_hash_aset(results, sym_version, DBL2NUM(1.0));
|
184
|
+
rb_hash_aset(results, sym_mode, rb_sprintf("%"PRIsVALUE"(%"PRIsVALUE")", type, usec));
|
185
|
+
rb_hash_aset(results, sym_samples, SIZET2NUM(_results.overall_samples));
|
186
|
+
|
187
|
+
frames = rb_hash_new();
|
188
|
+
rb_hash_aset(results, sym_frames, frames);
|
189
|
+
st_foreach(_results.frames, frame_i, (st_data_t)frames);
|
190
|
+
|
191
|
+
return results;
|
192
|
+
}
|
193
|
+
|
194
|
+
static inline frame_data_t *
|
195
|
+
sample_for(VALUE frame)
|
196
|
+
{
|
197
|
+
st_data_t key = (st_data_t)frame, val = 0;
|
198
|
+
frame_data_t *frame_data;
|
199
|
+
|
200
|
+
if (st_lookup(_results.frames, key, &val)) {
|
201
|
+
frame_data = (frame_data_t *)val;
|
202
|
+
} else {
|
203
|
+
frame_data = ALLOC_N(frame_data_t, 1);
|
204
|
+
MEMZERO(frame_data, frame_data_t, 1);
|
205
|
+
val = (st_data_t)frame_data;
|
206
|
+
st_insert(_results.frames, key, val);
|
207
|
+
}
|
208
|
+
|
209
|
+
return frame_data;
|
210
|
+
}
|
211
|
+
|
212
|
+
void
|
213
|
+
st_numtable_increment(st_table *table, st_data_t key)
|
214
|
+
{
|
215
|
+
intptr_t weight = 0;
|
216
|
+
st_lookup(table, key, (st_data_t *)&weight);
|
217
|
+
weight++;
|
218
|
+
st_insert(table, key, weight);
|
219
|
+
}
|
220
|
+
|
221
|
+
static void
|
222
|
+
stackprof_sample()
|
223
|
+
{
|
224
|
+
int num, i;
|
225
|
+
VALUE prev_frame;
|
226
|
+
st_data_t key;
|
227
|
+
|
228
|
+
_results.overall_samples++;
|
229
|
+
num = rb_profile_frames(0, sizeof(_results.frames_buffer), _results.frames_buffer, _results.lines_buffer);
|
230
|
+
|
231
|
+
for (i = 0; i < num; i++) {
|
232
|
+
int line = _results.lines_buffer[i];
|
233
|
+
VALUE frame = _results.frames_buffer[i];
|
234
|
+
frame_data_t *frame_data = sample_for(frame);
|
235
|
+
|
236
|
+
frame_data->total_samples++;
|
237
|
+
|
238
|
+
if (i == 0) {
|
239
|
+
frame_data->caller_samples++;
|
240
|
+
if (line > 0) {
|
241
|
+
if (!frame_data->lines)
|
242
|
+
frame_data->lines = st_init_numtable();
|
243
|
+
st_numtable_increment(frame_data->lines, (st_data_t)line);
|
244
|
+
}
|
245
|
+
} else {
|
246
|
+
if (!frame_data->edges)
|
247
|
+
frame_data->edges = st_init_numtable();
|
248
|
+
st_numtable_increment(frame_data->edges, (st_data_t)prev_frame);
|
249
|
+
}
|
250
|
+
|
251
|
+
prev_frame = frame;
|
252
|
+
}
|
253
|
+
}
|
254
|
+
|
255
|
+
static void
|
256
|
+
stackprof_job_handler(void *data)
|
257
|
+
{
|
258
|
+
static int in_signal_handler = 0;
|
259
|
+
if (in_signal_handler) return;
|
260
|
+
|
261
|
+
in_signal_handler++;
|
262
|
+
stackprof_sample();
|
263
|
+
in_signal_handler--;
|
264
|
+
}
|
265
|
+
|
266
|
+
static void
|
267
|
+
stackprof_signal_handler(int sig, siginfo_t *sinfo, void *ucontext)
|
268
|
+
{
|
269
|
+
rb_postponed_job_register_one(0, stackprof_job_handler, 0);
|
270
|
+
}
|
271
|
+
|
272
|
+
static void
|
273
|
+
stackprof_newobj_handler(VALUE tpval, void *data)
|
274
|
+
{
|
275
|
+
stackprof_job_handler(0);
|
276
|
+
}
|
277
|
+
|
278
|
+
static int
|
279
|
+
frame_mark_i(st_data_t key, st_data_t val, st_data_t arg)
|
280
|
+
{
|
281
|
+
VALUE frame = (VALUE)key;
|
282
|
+
rb_gc_mark_maybe(frame);
|
283
|
+
return ST_CONTINUE;
|
284
|
+
}
|
285
|
+
|
286
|
+
static void
|
287
|
+
stackprof_gc_mark()
|
288
|
+
{
|
289
|
+
if (_results.frames)
|
290
|
+
st_foreach(_results.frames, frame_mark_i, 0);
|
291
|
+
}
|
292
|
+
|
293
|
+
void
|
294
|
+
Init_stackprof(void)
|
295
|
+
{
|
296
|
+
sym_object = ID2SYM(rb_intern("object"));
|
297
|
+
sym_name = ID2SYM(rb_intern("name"));
|
298
|
+
sym_wall = ID2SYM(rb_intern("wall"));
|
299
|
+
sym_file = ID2SYM(rb_intern("file"));
|
300
|
+
sym_line = ID2SYM(rb_intern("line"));
|
301
|
+
sym_total_samples = ID2SYM(rb_intern("total_samples"));
|
302
|
+
sym_samples = ID2SYM(rb_intern("samples"));
|
303
|
+
sym_edges = ID2SYM(rb_intern("edges"));
|
304
|
+
sym_lines = ID2SYM(rb_intern("lines"));
|
305
|
+
sym_version = ID2SYM(rb_intern("version"));
|
306
|
+
sym_mode = ID2SYM(rb_intern("mode"));
|
307
|
+
sym_frames = ID2SYM(rb_intern("frames"));
|
308
|
+
|
309
|
+
gc_hook = Data_Wrap_Struct(rb_cObject, stackprof_gc_mark, NULL, NULL);
|
310
|
+
rb_global_variable(&gc_hook);
|
311
|
+
|
312
|
+
VALUE rb_mStackProf = rb_define_module("StackProf");
|
313
|
+
rb_define_singleton_method(rb_mStackProf, "run", stackprof_run, 2);
|
314
|
+
rb_autoload(rb_mStackProf, rb_intern_const("Report"), "stackprof/report.rb");
|
315
|
+
}
|
@@ -0,0 +1,71 @@
|
|
1
|
+
require 'pp'
|
2
|
+
|
3
|
+
module StackProf
|
4
|
+
class Report
|
5
|
+
def initialize(data)
|
6
|
+
@data = data
|
7
|
+
end
|
8
|
+
|
9
|
+
def frames
|
10
|
+
@data[:frames].sort_by{ |iseq, stats| -stats[:samples] }
|
11
|
+
end
|
12
|
+
|
13
|
+
def overall_samples
|
14
|
+
@data[:samples]
|
15
|
+
end
|
16
|
+
|
17
|
+
def print_debug
|
18
|
+
pp @data
|
19
|
+
end
|
20
|
+
|
21
|
+
def print_graphviz
|
22
|
+
f = STDOUT
|
23
|
+
f.puts "digraph profile {"
|
24
|
+
frames.each do |frame, info|
|
25
|
+
call, total = info.values_at(:samples, :total_samples)
|
26
|
+
sample = ''
|
27
|
+
sample << "#{call} (%2.1f%%)\\rof " % (call*100.0/overall_samples) if call < total
|
28
|
+
sample << "#{total} (%2.1f%%)\\r" % (total*100.0/overall_samples)
|
29
|
+
size = (1.0 * call / overall_samples) * 28 + 10
|
30
|
+
|
31
|
+
f.puts " #{frame} [size=#{size}] [fontsize=#{size}] [shape=box] [label=\"#{info[:name]}\\n#{sample}\"];"
|
32
|
+
if edges = info[:edges]
|
33
|
+
edges.each do |edge, weight|
|
34
|
+
size = (1.0 * weight / overall_samples) * 28
|
35
|
+
f.puts " #{frame} -> #{edge} [label=\"#{weight}\"];"
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
f.puts "}"
|
40
|
+
end
|
41
|
+
|
42
|
+
def print_text
|
43
|
+
printf "% 10s (pct) % 10s (pct) FRAME\n" % ["TOTAL", "SAMPLES"]
|
44
|
+
frames.each do |frame, info|
|
45
|
+
call, total = info.values_at(:samples, :total_samples)
|
46
|
+
printf "% 10d % 8s % 10d % 8s %s\n", total, "(%2.1f%%)" % (total*100.0/overall_samples), call, "(%2.1f%%)" % (call*100.0/overall_samples), info[:name]
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def print_source(name)
|
51
|
+
name = /#{Regexp.escape name}/ unless Regexp === name
|
52
|
+
frames.each do |frame, info|
|
53
|
+
next unless info[:name] =~ name
|
54
|
+
file, line = info.values_at(:file, :line)
|
55
|
+
|
56
|
+
maxline = info[:lines] ? info[:lines].keys.max : line + 5
|
57
|
+
printf "%s (%s:%d)\n", info[:name], file, line
|
58
|
+
|
59
|
+
lines = info[:lines]
|
60
|
+
source = File.readlines(file).each_with_index do |code, i|
|
61
|
+
next unless (line-1..maxline).include?(i)
|
62
|
+
if lines and samples = lines[i+1]
|
63
|
+
printf "% 5d % 7s / % 7s | % 5d | %s", samples, "(%2.1f%%" % (100.0*samples/overall_samples), "%2.1f%%)" % (100.0*samples/info[:samples]), i+1, code
|
64
|
+
else
|
65
|
+
printf " | % 5d | %s", i+1, code
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
data/sample.rb
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
$:.unshift File.expand_path('../lib', __FILE__)
|
2
|
+
require 'stackprof'
|
3
|
+
|
4
|
+
class A
|
5
|
+
def initialize
|
6
|
+
pow
|
7
|
+
self.class.newobj
|
8
|
+
math
|
9
|
+
end
|
10
|
+
|
11
|
+
def pow
|
12
|
+
2 ** 100
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.newobj
|
16
|
+
Object.new
|
17
|
+
Object.new
|
18
|
+
end
|
19
|
+
|
20
|
+
def math
|
21
|
+
2.times do
|
22
|
+
2 + 3 * 4 ^ 5 / 6
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
#profile = StackProf.run(:object, 1) do
|
28
|
+
#profile = StackProf.run(:wall, 1000) do
|
29
|
+
profile = StackProf.run(:cpu, 1000) do
|
30
|
+
1_000_000.times do
|
31
|
+
A.new
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
result = StackProf::Report.new(profile)
|
36
|
+
puts
|
37
|
+
result.print_source(/pow|newobj|math/)
|
38
|
+
puts
|
39
|
+
result.print_text
|
40
|
+
puts
|
41
|
+
result.print_graphviz
|
42
|
+
puts
|
43
|
+
result.print_debug
|
data/stackprof.gemspec
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = 'stackprof'
|
3
|
+
s.version = '0.1.0'
|
4
|
+
s.homepage = 'http://github.com/tmm1/stackprof'
|
5
|
+
|
6
|
+
s.authors = 'Aman Gupta'
|
7
|
+
s.email = 'aman@tmm1.net'
|
8
|
+
|
9
|
+
s.files = `git ls-files`.split("\n")
|
10
|
+
s.extensions = 'ext/extconf.rb'
|
11
|
+
|
12
|
+
s.bindir = 'bin'
|
13
|
+
s.executables << 'stackprof'
|
14
|
+
|
15
|
+
s.summary = 'sampling callstack-profiler for ruby 2.1+'
|
16
|
+
s.description = 'stackprof is a fast sampling profiler for ruby code, with cpu, wallclock and object allocation samplers.'
|
17
|
+
|
18
|
+
s.license = 'MIT'
|
19
|
+
|
20
|
+
s.add_development_dependency 'rake-compiler'
|
21
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
$:.unshift File.expand_path('../../lib', __FILE__)
|
2
|
+
require 'stackprof'
|
3
|
+
require 'test/unit'
|
4
|
+
|
5
|
+
class StackProfTest < Test::Unit::TestCase
|
6
|
+
def test_info
|
7
|
+
profile = StackProf.run(:wall, 1000){}
|
8
|
+
assert_equal 1.0, profile[:version]
|
9
|
+
assert_equal "wall(1000)", profile[:mode]
|
10
|
+
assert_equal 0, profile[:samples]
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_object_allocation
|
14
|
+
profile = StackProf.run(:object, 1) do
|
15
|
+
Object.new
|
16
|
+
Object.new
|
17
|
+
end
|
18
|
+
assert_equal "object(1)", profile[:mode]
|
19
|
+
assert_equal 2, profile[:samples]
|
20
|
+
|
21
|
+
frame = profile[:frames].values.first
|
22
|
+
assert_equal "block in StackProfTest#test_object_allocation", frame[:name]
|
23
|
+
assert_equal 2, frame[:samples]
|
24
|
+
assert_equal 14, frame[:line]
|
25
|
+
assert_equal 1, frame[:lines][15]
|
26
|
+
assert_equal 1, frame[:lines][16]
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_cputime
|
30
|
+
profile = StackProf.run(:cpu, 1000) do
|
31
|
+
math
|
32
|
+
end
|
33
|
+
|
34
|
+
frame = profile[:frames].values.first
|
35
|
+
assert_equal "block in StackProfTest#math", frame[:name]
|
36
|
+
end
|
37
|
+
|
38
|
+
def test_walltime
|
39
|
+
profile = StackProf.run(:wall, 1000) do
|
40
|
+
idle
|
41
|
+
end
|
42
|
+
|
43
|
+
frame = profile[:frames].values.first
|
44
|
+
assert_equal "StackProfTest#idle", frame[:name]
|
45
|
+
assert_in_delta 200, frame[:samples], 5
|
46
|
+
end
|
47
|
+
|
48
|
+
def math
|
49
|
+
250_000.times do
|
50
|
+
2 ** 10
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def idle
|
55
|
+
sleep 0.2
|
56
|
+
end
|
57
|
+
end
|
metadata
ADDED
@@ -0,0 +1,72 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: stackprof
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Aman Gupta
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-10-14 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rake-compiler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
description: stackprof is a fast sampling profiler for ruby code, with cpu, wallclock
|
28
|
+
and object allocation samplers.
|
29
|
+
email: aman@tmm1.net
|
30
|
+
executables:
|
31
|
+
- stackprof
|
32
|
+
extensions:
|
33
|
+
- ext/extconf.rb
|
34
|
+
extra_rdoc_files: []
|
35
|
+
files:
|
36
|
+
- ".gitignore"
|
37
|
+
- Gemfile
|
38
|
+
- Gemfile.lock
|
39
|
+
- README.md
|
40
|
+
- Rakefile
|
41
|
+
- bin/stackprof
|
42
|
+
- ext/extconf.rb
|
43
|
+
- ext/stackprof.c
|
44
|
+
- lib/stackprof/report.rb
|
45
|
+
- sample.rb
|
46
|
+
- stackprof.gemspec
|
47
|
+
- test/test_stackprof.rb
|
48
|
+
homepage: http://github.com/tmm1/stackprof
|
49
|
+
licenses:
|
50
|
+
- MIT
|
51
|
+
metadata: {}
|
52
|
+
post_install_message:
|
53
|
+
rdoc_options: []
|
54
|
+
require_paths:
|
55
|
+
- lib
|
56
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
57
|
+
requirements:
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: '0'
|
61
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
62
|
+
requirements:
|
63
|
+
- - ">="
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
version: '0'
|
66
|
+
requirements: []
|
67
|
+
rubyforge_project:
|
68
|
+
rubygems_version: 2.2.0.preview.1
|
69
|
+
signing_key:
|
70
|
+
specification_version: 4
|
71
|
+
summary: sampling callstack-profiler for ruby 2.1+
|
72
|
+
test_files: []
|