honkster-perftools.rb 0.5.6
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +177 -0
- data/bin/pprof.rb +4 -0
- data/ext/extconf.rb +116 -0
- data/ext/perftools.c +493 -0
- data/ext/src/google-perftools-1.6.tar.gz +0 -0
- data/patches/perftools-debug.patch +20 -0
- data/patches/perftools-frames.patch +13 -0
- data/patches/perftools-gc.patch +107 -0
- data/patches/perftools-notests.patch +15 -0
- data/patches/perftools-objects.patch +85 -0
- data/patches/perftools-osx.patch +13 -0
- data/patches/perftools-pprof.patch +76 -0
- data/patches/perftools-stddef.patch +34 -0
- data/patches/perftools.patch +266 -0
- data/perftools.rb.gemspec +21 -0
- metadata +70 -0
data/README.md
ADDED
@@ -0,0 +1,177 @@
|
|
1
|
+
# perftools.rb
|
2
|
+
|
3
|
+
google-perftools for ruby code
|
4
|
+
(c) 2010 Aman Gupta (tmm1)
|
5
|
+
http://www.ruby-lang.org/en/LICENSE.txt
|
6
|
+
|
7
|
+
## Usage (in a webapp)
|
8
|
+
|
9
|
+
Use [rack-perftools_profiler](https://github.com/bhb/rack-perftools_profiler):
|
10
|
+
|
11
|
+
require 'rack/perftools_profiler'
|
12
|
+
config.middleware.use ::Rack::PerftoolsProfiler, :default_printer => 'gif'
|
13
|
+
|
14
|
+
Simply add `profile=true` to profile a request:
|
15
|
+
|
16
|
+
curl -o 10_requests_to_homepage.gif "http://localhost:3000/homepage?profile=true×=10"
|
17
|
+
|
18
|
+
## Usage (from Ruby)
|
19
|
+
|
20
|
+
Run the profiler with a block:
|
21
|
+
|
22
|
+
require 'perftools'
|
23
|
+
PerfTools::CpuProfiler.start("/tmp/add_numbers_profile") do
|
24
|
+
5_000_000.times{ 1+2+3+4+5 }
|
25
|
+
end
|
26
|
+
|
27
|
+
Start and stop the profiler manually:
|
28
|
+
|
29
|
+
require 'perftools'
|
30
|
+
PerfTools::CpuProfiler.start("/tmp/add_numbers_profile")
|
31
|
+
5_000_000.times{ 1+2+3+4+5 }
|
32
|
+
PerfTools::CpuProfiler.stop
|
33
|
+
|
34
|
+
## Usage (externally)
|
35
|
+
|
36
|
+
Profile an existing ruby application without modifying it:
|
37
|
+
|
38
|
+
$ CPUPROFILE=/tmp/my_app_profile \
|
39
|
+
RUBYOPT="-r`gem which perftools | tail -1`" \
|
40
|
+
ruby my_app.rb
|
41
|
+
|
42
|
+
## Profiler Modes
|
43
|
+
|
44
|
+
The profiler can be run in one of many modes, set via an environment
|
45
|
+
variable before the library is loaded:
|
46
|
+
|
47
|
+
* `CPUPROFILE_REALTIME=1`
|
48
|
+
|
49
|
+
Use walltime instead of cputime profiling. This will capture all time spent in a method, even if it does not involve the CPU.
|
50
|
+
|
51
|
+
For example, `sleep()` is not expensive in terms of cputime, but very expensive in walltime. walltime will also show functions spending a lot of time in network i/o.
|
52
|
+
|
53
|
+
* `CPUPROFILE_OBJECTS=1`
|
54
|
+
|
55
|
+
Profile object allocations instead of cpu/wall time. Each sample represents one object created inside that function.
|
56
|
+
|
57
|
+
* `CPUPROFILE_METHODS=1`
|
58
|
+
|
59
|
+
Profile method calls. Each sample represents one method call made inside that function.
|
60
|
+
|
61
|
+
The sampling interval of the profiler can be adjusted to collect more
|
62
|
+
(for better profile detail) or fewer samples (for lower overhead):
|
63
|
+
|
64
|
+
* `CPUPROFILE_FREQUENCY=500`
|
65
|
+
|
66
|
+
Default sampling interval is 100 times a second. Valid range is 1-4000
|
67
|
+
|
68
|
+
## Reporting
|
69
|
+
|
70
|
+
pprof.rb --text /tmp/add_numbers_profile
|
71
|
+
|
72
|
+
pprof.rb --pdf /tmp/add_numbers_profile > /tmp/add_numbers_profile.pdf
|
73
|
+
|
74
|
+
pprof.rb --gif /tmp/add_numbers_profile > /tmp/add_numbers_profile.gif
|
75
|
+
|
76
|
+
pprof.rb --callgrind /tmp/add_numbers_profile > /tmp/add_numbers_profile.grind
|
77
|
+
kcachegrind /tmp/add_numbers_profile.grind
|
78
|
+
|
79
|
+
pprof.rb --gif --focus=Integer /tmp/add_numbers_profile > /tmp/add_numbers_custom.gif
|
80
|
+
|
81
|
+
pprof.rb --text --ignore=Gem /tmp/my_app_profile
|
82
|
+
|
83
|
+
|
84
|
+
For more options, see [pprof documentation](http://google-perftools.googlecode.com/svn/trunk/doc/cpuprofile.html#pprof)
|
85
|
+
|
86
|
+
|
87
|
+
### Examples
|
88
|
+
|
89
|
+
#### pprof.rb --text
|
90
|
+
|
91
|
+
Total: 1735 samples
|
92
|
+
1487 85.7% 85.7% 1735 100.0% Integer#times
|
93
|
+
248 14.3% 100.0% 248 14.3% Fixnum#+
|
94
|
+
|
95
|
+
#### pprof.rb --gif
|
96
|
+
|
97
|
+
* Simple [require 'rubygems'](http://perftools-rb.rubyforge.org/examples/rubygems.gif) profile
|
98
|
+
|
99
|
+
* Comparing redis-rb [with](http://perftools-rb.rubyforge.org/examples/redis-rb.gif) and [without](http://perftools-rb.rubyforge.org/examples/redis-rb-notimeout.gif) SystemTimer based socket timeouts
|
100
|
+
|
101
|
+
* [Sinatra](http://perftools-rb.rubyforge.org/examples/sinatra.gif) vs. [Merb](http://perftools-rb.rubyforge.org/examples/merb.gif) vs. [Rails](http://perftools-rb.rubyforge.org/examples/rails.gif)
|
102
|
+
|
103
|
+
* C-level profile of EventMachine + epoll + Ruby threads [before](http://perftools-rb.rubyforge.org/examples/eventmachine-epoll+nothreads.gif) and [after](http://perftools-rb.rubyforge.org/examples/eventmachine-epoll+threads.gif) a [6 line EM bugfix](http://timetobleed.com/6-line-eventmachine-bugfix-2x-faster-gc-1300-requestssec/)
|
104
|
+
|
105
|
+
* C-level profile of a [ruby/rails vm](http://perftools-rb.rubyforge.org/examples/ruby_interpreter.gif)
|
106
|
+
* 12% time spent in re_match_exec because of excessive calls to rb_str_sub_bang by Date.parse
|
107
|
+
|
108
|
+
|
109
|
+
## Installation
|
110
|
+
|
111
|
+
Just install the gem, which will download, patch and compile google-perftools for you:
|
112
|
+
|
113
|
+
sudo gem install perftools.rb
|
114
|
+
|
115
|
+
Or build your own gem:
|
116
|
+
|
117
|
+
git clone git://github.com/tmm1/perftools.rb
|
118
|
+
cd perftools.rb
|
119
|
+
gem build perftools.rb.gemspec
|
120
|
+
gem install perftools.rb
|
121
|
+
|
122
|
+
|
123
|
+
You'll also need graphviz to generate call graphs using dot:
|
124
|
+
|
125
|
+
sudo brew install graphviz ghostscript # osx
|
126
|
+
sudo apt-get install graphviz ps2pdf # debian/ubuntu
|
127
|
+
|
128
|
+
## Advantages over ruby-prof
|
129
|
+
|
130
|
+
* Sampling profiler
|
131
|
+
|
132
|
+
* perftools samples your process using setitimer() so it can be used in production with minimal overhead.
|
133
|
+
|
134
|
+
|
135
|
+
## Profiling the Ruby VM and C extensions
|
136
|
+
|
137
|
+
To profile C code, download and build an unpatched perftools (libunwind or ./configure --enable-frame-pointers required on x86_64).
|
138
|
+
|
139
|
+
Download:
|
140
|
+
|
141
|
+
wget http://google-perftools.googlecode.com/files/google-perftools-1.6.tar.gz
|
142
|
+
tar zxvf google-perftools-1.6.tar.gz
|
143
|
+
cd google-perftools-1.6
|
144
|
+
|
145
|
+
Compile:
|
146
|
+
|
147
|
+
./configure --prefix=/opt
|
148
|
+
make
|
149
|
+
sudo make install
|
150
|
+
|
151
|
+
Profile:
|
152
|
+
|
153
|
+
export LD_PRELOAD=/opt/lib/libprofiler.so # for linux
|
154
|
+
export DYLD_INSERT_LIBRARIES=/opt/lib/libprofiler.dylib # for osx
|
155
|
+
CPUPROFILE=/tmp/ruby_interpreter.profile ruby -e' 5_000_000.times{ "hello world" } '
|
156
|
+
|
157
|
+
Report:
|
158
|
+
|
159
|
+
pprof `which ruby` --text /tmp/ruby_interpreter.profile
|
160
|
+
|
161
|
+
|
162
|
+
## TODO
|
163
|
+
|
164
|
+
* Add support for heap profiling to find memory leaks (PerfTools::HeapProfiler)
|
165
|
+
* Allow both C and Ruby profiling
|
166
|
+
* Add setter for the sampling interval
|
167
|
+
|
168
|
+
|
169
|
+
## Resources
|
170
|
+
|
171
|
+
* [GoRuCo 2009 Lightning Talk on perftools.rb](http://goruco2009.confreaks.com/30-may-2009-18-35-rejectconf-various-presenters.html) @ 21:52
|
172
|
+
|
173
|
+
* [Ilya Grigorik's introduction to perftools.rb](http://www.igvita.com/2009/06/13/profiling-ruby-with-googles-perftools/)
|
174
|
+
|
175
|
+
* [Google Perftools](http://code.google.com/p/google-perftools/)
|
176
|
+
|
177
|
+
* [Analyzing profiles and interpreting different output formats](http://google-perftools.googlecode.com/svn/trunk/doc/cpuprofile.html#pprof)
|
data/bin/pprof.rb
ADDED
data/ext/extconf.rb
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
CWD = File.expand_path(File.dirname(__FILE__))
|
2
|
+
|
3
|
+
def sys(cmd)
|
4
|
+
puts " -- #{cmd}"
|
5
|
+
unless ret = xsystem(cmd)
|
6
|
+
raise "#{cmd} failed, please report to perftools@tmm1.net with pastie.org link to #{CWD}/mkmf.log and #{CWD}/src/google-perftools-1.6/config.log"
|
7
|
+
end
|
8
|
+
ret
|
9
|
+
end
|
10
|
+
|
11
|
+
require 'mkmf'
|
12
|
+
require 'fileutils'
|
13
|
+
|
14
|
+
if RUBY_VERSION >= "1.9"
|
15
|
+
begin
|
16
|
+
require "ruby_core_source"
|
17
|
+
rescue LoadError
|
18
|
+
require 'rubygems/user_interaction' # for 1.9.1
|
19
|
+
require 'rubygems/dependency_installer'
|
20
|
+
installer = Gem::DependencyInstaller.new
|
21
|
+
installer.install 'ruby_core_source'
|
22
|
+
|
23
|
+
Gem.refresh
|
24
|
+
Gem.activate('ruby_core_source') # for 1.9.1
|
25
|
+
|
26
|
+
require "ruby_core_source"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
perftools = File.basename('google-perftools-1.6.tar.gz')
|
31
|
+
dir = File.basename(perftools, '.tar.gz')
|
32
|
+
|
33
|
+
puts "(I'm about to compile google-perftools.. this will definitely take a while)"
|
34
|
+
ENV["PATCH_GET"] = '0'
|
35
|
+
|
36
|
+
Dir.chdir('src') do
|
37
|
+
FileUtils.rm_rf(dir) if File.exists?(dir)
|
38
|
+
|
39
|
+
sys("tar zpxvf #{perftools}")
|
40
|
+
Dir.chdir(dir) do
|
41
|
+
if ENV['DEV']
|
42
|
+
sys("git init")
|
43
|
+
sys("git add .")
|
44
|
+
sys("git commit -m 'initial source'")
|
45
|
+
end
|
46
|
+
|
47
|
+
[ ['perftools', true],
|
48
|
+
['perftools-notests', true],
|
49
|
+
['perftools-pprof', true],
|
50
|
+
['perftools-gc', true],
|
51
|
+
['perftools-osx', RUBY_PLATFORM =~ /darwin/],
|
52
|
+
['perftools-debug', true],
|
53
|
+
['perftools-objects', true],
|
54
|
+
['perftools-frames', true],
|
55
|
+
['perftools-stddef', true]
|
56
|
+
].each do |patch, apply|
|
57
|
+
if apply
|
58
|
+
sys("patch -p1 < ../../../patches/#{patch}.patch")
|
59
|
+
sys("git commit -am '#{patch}'") if ENV['DEV']
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
sys("sed -i -e 's,SpinLock,ISpinLock,g' src/*.cc src/*.h src/base/*.cc src/base/*.h")
|
64
|
+
sys("git commit -am 'rename spinlock'") if ENV['DEV']
|
65
|
+
end
|
66
|
+
|
67
|
+
Dir.chdir(dir) do
|
68
|
+
FileUtils.cp 'src/pprof', '../../../bin/'
|
69
|
+
FileUtils.chmod 0755, '../../../bin/pprof'
|
70
|
+
end
|
71
|
+
|
72
|
+
Dir.chdir(dir) do
|
73
|
+
if RUBY_PLATFORM =~ /darwin10/
|
74
|
+
ENV['CFLAGS'] = ENV['CXXFLAGS'] = '-D_XOPEN_SOURCE'
|
75
|
+
end
|
76
|
+
sys("./configure --disable-heap-profiler --disable-heap-checker --disable-debugalloc --disable-shared")
|
77
|
+
sys("make")
|
78
|
+
FileUtils.cp '.libs/libprofiler.a', '../../librubyprofiler.a'
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
$LIBPATH << CWD
|
83
|
+
$libs = append_library($libs, 'rubyprofiler')
|
84
|
+
def add_define(name)
|
85
|
+
$defs.push("-D#{name}")
|
86
|
+
end
|
87
|
+
|
88
|
+
case RUBY_PLATFORM
|
89
|
+
when /darwin/, /linux/, /freebsd/
|
90
|
+
CONFIG['LDSHARED'] = "$(CXX) " + CONFIG['LDSHARED'].split[1..-1].join(' ')
|
91
|
+
end
|
92
|
+
|
93
|
+
if RUBY_VERSION >= "1.9"
|
94
|
+
add_define 'RUBY19'
|
95
|
+
|
96
|
+
hdrs = proc {
|
97
|
+
have_header("method.h") # exists on 1.9.2
|
98
|
+
have_header("vm_core.h") and
|
99
|
+
have_header("iseq.h") and
|
100
|
+
have_header("insns.inc") and
|
101
|
+
have_header("insns_info.inc")
|
102
|
+
}
|
103
|
+
|
104
|
+
unless Ruby_core_source::create_makefile_with_core(hdrs, "perftools")
|
105
|
+
STDERR.puts "\n\n"
|
106
|
+
STDERR.puts "***************************************************************************************"
|
107
|
+
STDERR.puts "********************** Ruby_core_source::create_makefile FAILED ***********************"
|
108
|
+
STDERR.puts "***************************************************************************************"
|
109
|
+
exit(1)
|
110
|
+
end
|
111
|
+
else
|
112
|
+
add_define 'RUBY18'
|
113
|
+
|
114
|
+
have_func('rb_during_gc', 'ruby.h')
|
115
|
+
create_makefile 'perftools'
|
116
|
+
end
|
data/ext/perftools.c
ADDED
@@ -0,0 +1,493 @@
|
|
1
|
+
#ifndef _GNU_SOURCE
|
2
|
+
#define _GNU_SOURCE
|
3
|
+
#endif
|
4
|
+
|
5
|
+
#include <ruby.h>
|
6
|
+
|
7
|
+
#ifndef _XOPEN_SOURCE
|
8
|
+
#define _XOPEN_SOURCE 600
|
9
|
+
#endif
|
10
|
+
|
11
|
+
#include <assert.h>
|
12
|
+
|
13
|
+
void ProfilerGcMark(void (*cb)(VALUE));
|
14
|
+
int ProfilerStart(const char*);
|
15
|
+
void ProfilerStop();
|
16
|
+
void ProfilerFlush();
|
17
|
+
void ProfilerRecord(int, void*, void*);
|
18
|
+
int ProfilingIsEnabledForAllThreads();
|
19
|
+
|
20
|
+
static VALUE Iallocate;
|
21
|
+
static VALUE I__send__;
|
22
|
+
static VALUE Isend;
|
23
|
+
|
24
|
+
#define SAVE_FRAME() \
|
25
|
+
if (method && method != I__send__ && method != Isend) { \
|
26
|
+
if (self && FL_TEST(klass, FL_SINGLETON) && (BUILTIN_TYPE(self) == T_CLASS || BUILTIN_TYPE(self) == T_MODULE)) \
|
27
|
+
result[depth++] = (void*) self; \
|
28
|
+
else \
|
29
|
+
result[depth++] = 0; \
|
30
|
+
\
|
31
|
+
result[depth++] = (void*) klass; \
|
32
|
+
result[depth++] = (void*) (method == ID_ALLOCATOR ? Iallocate : method); \
|
33
|
+
}
|
34
|
+
|
35
|
+
#ifdef RUBY18
|
36
|
+
#include <env.h>
|
37
|
+
#include <node.h>
|
38
|
+
#include <setjmp.h>
|
39
|
+
#include <signal.h>
|
40
|
+
|
41
|
+
static jmp_buf saved_location;
|
42
|
+
static sig_t saved_handler = NULL;
|
43
|
+
|
44
|
+
void
|
45
|
+
segv_handler(int sig)
|
46
|
+
{
|
47
|
+
fprintf(stderr, "*** Caught a segfault during stack trace sampling!\n");
|
48
|
+
assert(saved_handler);
|
49
|
+
_longjmp(saved_location, 1);
|
50
|
+
}
|
51
|
+
|
52
|
+
int
|
53
|
+
rb_stack_trace(void** result, int max_depth)
|
54
|
+
{
|
55
|
+
struct FRAME *frame = ruby_frame;
|
56
|
+
NODE *n;
|
57
|
+
|
58
|
+
VALUE klass, self;
|
59
|
+
ID method;
|
60
|
+
int depth = 0;
|
61
|
+
|
62
|
+
if (max_depth == 0)
|
63
|
+
return 0;
|
64
|
+
|
65
|
+
#ifdef HAVE_RB_DURING_GC
|
66
|
+
if (rb_during_gc()) {
|
67
|
+
result[0] = rb_gc;
|
68
|
+
return 1;
|
69
|
+
}
|
70
|
+
#endif
|
71
|
+
|
72
|
+
// should not be possible to get here and already have a saved signal handler
|
73
|
+
assert(!saved_handler);
|
74
|
+
|
75
|
+
// ruby_frame is occasionally inconsistent, so temporarily catch segfaults
|
76
|
+
saved_handler = signal(SIGSEGV, segv_handler);
|
77
|
+
if (_setjmp(saved_location)) {
|
78
|
+
signal(SIGSEGV, saved_handler);
|
79
|
+
saved_handler = NULL;
|
80
|
+
return 0;
|
81
|
+
}
|
82
|
+
|
83
|
+
/*
|
84
|
+
// XXX does it make sense to track allocations or not?
|
85
|
+
if (frame->last_func == ID_ALLOCATOR) {
|
86
|
+
frame = frame->prev;
|
87
|
+
}
|
88
|
+
|
89
|
+
// XXX SIGPROF can come in while ruby_frame is in an inconsistent state (rb_call0), so we ignore the top-most frame
|
90
|
+
if (frame->last_func && frame->last_class) {
|
91
|
+
self = frame->self;
|
92
|
+
klass = frame->last_class;
|
93
|
+
method = frame->last_func;
|
94
|
+
SAVE_FRAME();
|
95
|
+
}
|
96
|
+
*/
|
97
|
+
|
98
|
+
for (; frame && (n = frame->node); frame = frame->prev) {
|
99
|
+
if (frame->prev && frame->prev->last_func) {
|
100
|
+
if (frame->prev->node == n) {
|
101
|
+
if (frame->prev->last_func == frame->last_func) continue;
|
102
|
+
}
|
103
|
+
|
104
|
+
if (depth+3 > max_depth)
|
105
|
+
break;
|
106
|
+
|
107
|
+
self = frame->prev->self;
|
108
|
+
klass = frame->prev->last_class;
|
109
|
+
method = frame->prev->last_func;
|
110
|
+
SAVE_FRAME();
|
111
|
+
}
|
112
|
+
}
|
113
|
+
|
114
|
+
signal(SIGSEGV, saved_handler);
|
115
|
+
saved_handler = NULL;
|
116
|
+
|
117
|
+
assert(depth <= max_depth);
|
118
|
+
return depth;
|
119
|
+
}
|
120
|
+
#endif
|
121
|
+
|
122
|
+
#ifdef RUBY19
|
123
|
+
#include <vm_core.h>
|
124
|
+
#include <iseq.h>
|
125
|
+
|
126
|
+
int
|
127
|
+
rb_stack_trace(void** result, int max_depth)
|
128
|
+
{
|
129
|
+
rb_thread_t *th = GET_THREAD();
|
130
|
+
rb_control_frame_t *cfp = th->cfp;
|
131
|
+
rb_control_frame_t *end_cfp = RUBY_VM_END_CONTROL_FRAME(th);
|
132
|
+
|
133
|
+
VALUE klass, self;
|
134
|
+
ID method;
|
135
|
+
int depth = 0;
|
136
|
+
|
137
|
+
if (max_depth == 0)
|
138
|
+
return 0;
|
139
|
+
|
140
|
+
if (rb_during_gc()) {
|
141
|
+
result[0] = rb_gc;
|
142
|
+
return 1;
|
143
|
+
}
|
144
|
+
|
145
|
+
while (RUBY_VM_VALID_CONTROL_FRAME_P(cfp, end_cfp) && depth+3 <= max_depth) {
|
146
|
+
rb_iseq_t *iseq = cfp->iseq;
|
147
|
+
|
148
|
+
if (iseq && iseq->type == ISEQ_TYPE_METHOD) {
|
149
|
+
self = 0; // maybe use cfp->self here, but iseq->self is a ISeq ruby obj
|
150
|
+
klass = iseq->klass;
|
151
|
+
method = iseq->defined_method_id;
|
152
|
+
SAVE_FRAME();
|
153
|
+
}
|
154
|
+
|
155
|
+
if (depth+3 > max_depth)
|
156
|
+
break;
|
157
|
+
|
158
|
+
switch (VM_FRAME_TYPE(cfp)) {
|
159
|
+
case VM_FRAME_MAGIC_METHOD:
|
160
|
+
case VM_FRAME_MAGIC_CFUNC:
|
161
|
+
self = cfp->self;
|
162
|
+
#ifdef HAVE_METHOD_H
|
163
|
+
if (!cfp->me) break;
|
164
|
+
|
165
|
+
klass = cfp->me->klass;
|
166
|
+
method = cfp->me->called_id;
|
167
|
+
#else
|
168
|
+
klass = cfp->method_class;
|
169
|
+
method = cfp->method_id;
|
170
|
+
#endif
|
171
|
+
SAVE_FRAME();
|
172
|
+
break;
|
173
|
+
}
|
174
|
+
|
175
|
+
cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(cfp);
|
176
|
+
}
|
177
|
+
|
178
|
+
assert(depth <= max_depth);
|
179
|
+
return depth;
|
180
|
+
}
|
181
|
+
|
182
|
+
#if 0
|
183
|
+
void
|
184
|
+
rb_dump_stack()
|
185
|
+
{
|
186
|
+
rb_thread_t *th = GET_THREAD();
|
187
|
+
rb_control_frame_t *cfp = th->cfp;
|
188
|
+
rb_control_frame_t *end_cfp = RUBY_VM_END_CONTROL_FRAME(th);
|
189
|
+
ID func;
|
190
|
+
|
191
|
+
printf("\n\n*********************\n");
|
192
|
+
while (RUBY_VM_VALID_CONTROL_FRAME_P(cfp, end_cfp)) {
|
193
|
+
printf("cfp (%p):\n", cfp);
|
194
|
+
printf(" type: 0x%x\n", VM_FRAME_TYPE(cfp));
|
195
|
+
printf(" pc: %p\n", cfp->pc);
|
196
|
+
printf(" iseq: %p\n", cfp->iseq);
|
197
|
+
if (cfp->iseq) {
|
198
|
+
printf(" type: %d\n", FIX2INT(cfp->iseq->type));
|
199
|
+
printf(" self: %p\n", cfp->iseq->self);
|
200
|
+
printf(" klass: %p (%s)\n", cfp->iseq->klass, cfp->iseq->klass ? rb_class2name(cfp->iseq->klass) : "");
|
201
|
+
printf(" method: %p (%s)\n", cfp->iseq->defined_method_id, cfp->iseq->defined_method_id ? rb_id2name(cfp->iseq->defined_method_id) : "");
|
202
|
+
}
|
203
|
+
printf(" self: %p\n", cfp->self);
|
204
|
+
printf(" klass: %p (%s)\n", cfp->method_class, cfp->method_class ? rb_class2name(cfp->method_class) : "");
|
205
|
+
printf(" method: %p (%s)\n", cfp->method_id, cfp->method_id ? rb_id2name(cfp->method_id) : "");
|
206
|
+
|
207
|
+
cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(cfp);
|
208
|
+
printf("\n");
|
209
|
+
}
|
210
|
+
printf("*********************\n\n");
|
211
|
+
}
|
212
|
+
#endif
|
213
|
+
#endif
|
214
|
+
|
215
|
+
static VALUE objprofiler_setup();
|
216
|
+
static VALUE objprofiler_teardown();
|
217
|
+
static VALUE methprofiler_setup();
|
218
|
+
static VALUE methprofiler_teardown();
|
219
|
+
|
220
|
+
/* CpuProfiler */
|
221
|
+
|
222
|
+
static VALUE cPerfTools;
|
223
|
+
static VALUE cCpuProfiler;
|
224
|
+
static VALUE eError;
|
225
|
+
static VALUE bProfilerRunning;
|
226
|
+
static VALUE gc_hook;
|
227
|
+
|
228
|
+
static VALUE
|
229
|
+
cpuprofiler_running_p(VALUE self)
|
230
|
+
{
|
231
|
+
return bProfilerRunning;
|
232
|
+
}
|
233
|
+
|
234
|
+
static VALUE
|
235
|
+
cpuprofiler_stop(VALUE self)
|
236
|
+
{
|
237
|
+
if (!bProfilerRunning)
|
238
|
+
return Qfalse;
|
239
|
+
|
240
|
+
bProfilerRunning = Qfalse;
|
241
|
+
objprofiler_teardown();
|
242
|
+
methprofiler_teardown();
|
243
|
+
ProfilerStop();
|
244
|
+
ProfilerFlush();
|
245
|
+
|
246
|
+
return Qtrue;
|
247
|
+
}
|
248
|
+
|
249
|
+
static VALUE
|
250
|
+
cpuprofiler_start(VALUE self, VALUE filename)
|
251
|
+
{
|
252
|
+
StringValue(filename);
|
253
|
+
|
254
|
+
if (bProfilerRunning)
|
255
|
+
rb_raise(eError, "profiler is already running");
|
256
|
+
|
257
|
+
if (getenv("CPUPROFILE_OBJECTS"))
|
258
|
+
objprofiler_setup();
|
259
|
+
else if (getenv("CPUPROFILE_METHODS"))
|
260
|
+
methprofiler_setup();
|
261
|
+
|
262
|
+
if (ProfilerStart(RSTRING_PTR(filename))) {
|
263
|
+
bProfilerRunning = Qtrue;
|
264
|
+
} else {
|
265
|
+
rb_raise(eError, "profiler could not be started");
|
266
|
+
}
|
267
|
+
|
268
|
+
if (rb_block_given_p()) {
|
269
|
+
rb_yield(Qnil);
|
270
|
+
cpuprofiler_stop(self);
|
271
|
+
}
|
272
|
+
|
273
|
+
return Qtrue;
|
274
|
+
}
|
275
|
+
|
276
|
+
static void
|
277
|
+
cpuprofiler_gc_mark()
|
278
|
+
{
|
279
|
+
ProfilerGcMark(rb_gc_mark);
|
280
|
+
}
|
281
|
+
|
282
|
+
/* ObjProfiler */
|
283
|
+
|
284
|
+
#include <assert.h>
|
285
|
+
#include <ucontext.h>
|
286
|
+
#include <unistd.h>
|
287
|
+
#include <signal.h>
|
288
|
+
#include <stdio.h>
|
289
|
+
#include <stdlib.h>
|
290
|
+
#include <string.h>
|
291
|
+
#include <sys/mman.h>
|
292
|
+
|
293
|
+
static VALUE bObjProfilerRunning;
|
294
|
+
static VALUE bMethProfilerRunning;
|
295
|
+
#define NUM_ORIG_BYTES 2
|
296
|
+
|
297
|
+
struct {
|
298
|
+
char *location;
|
299
|
+
unsigned char value;
|
300
|
+
} orig_bytes[NUM_ORIG_BYTES];
|
301
|
+
|
302
|
+
static inline void *
|
303
|
+
page_align(void *addr) {
|
304
|
+
assert(addr != NULL);
|
305
|
+
return (void *)((size_t)addr & ~(0xFFFF));
|
306
|
+
}
|
307
|
+
|
308
|
+
static void
|
309
|
+
unprotect_page(char *addr) {
|
310
|
+
assert(addr != NULL);
|
311
|
+
char *aligned_addr = page_align(addr);
|
312
|
+
if (mprotect(aligned_addr, (addr - aligned_addr), PROT_READ|PROT_WRITE|PROT_EXEC) != 0)
|
313
|
+
perror("mprotect");
|
314
|
+
}
|
315
|
+
|
316
|
+
static inline char**
|
317
|
+
uc_get_ip(ucontext_t *uc) {
|
318
|
+
# if defined(__FreeBSD__)
|
319
|
+
# ifdef __i386__
|
320
|
+
# define program_counter uc_mcontext.mc_eip
|
321
|
+
# else
|
322
|
+
# define program_counter uc_mcontext.mc_rip
|
323
|
+
# endif
|
324
|
+
# elif defined(__dietlibc__)
|
325
|
+
# define program_counter uc_mcontext.rip
|
326
|
+
# elif defined(__APPLE__)
|
327
|
+
# ifdef __LP64__
|
328
|
+
# define program_counter uc_mcontext->__ss.__rip
|
329
|
+
# else
|
330
|
+
# define program_counter uc_mcontext->__ss.__eip
|
331
|
+
# endif
|
332
|
+
# else
|
333
|
+
# ifdef REG_RIP
|
334
|
+
# define program_counter uc_mcontext.gregs[REG_RIP]
|
335
|
+
# else
|
336
|
+
# define program_counter uc_mcontext.gregs[REG_EIP]
|
337
|
+
# endif
|
338
|
+
# endif
|
339
|
+
return (char**)&uc->program_counter;
|
340
|
+
}
|
341
|
+
|
342
|
+
#if defined(RB_EVENT_HOOKS_HAVE_CALLBACK_DATA) || defined(RUBY_EVENT_VM)
|
343
|
+
static void
|
344
|
+
event_handler(rb_event_flag_t event, VALUE data, VALUE self, ID id, VALUE klass) {
|
345
|
+
ProfilerRecord(0, NULL, NULL);
|
346
|
+
}
|
347
|
+
#else
|
348
|
+
static void
|
349
|
+
event_handler(rb_event_t event, NODE *node, VALUE self, ID id, VALUE klass) {
|
350
|
+
ProfilerRecord(0, NULL, NULL);
|
351
|
+
}
|
352
|
+
#endif
|
353
|
+
|
354
|
+
static VALUE
|
355
|
+
methprofiler_setup()
|
356
|
+
{
|
357
|
+
if (bMethProfilerRunning)
|
358
|
+
return Qtrue;
|
359
|
+
|
360
|
+
#if defined(RB_EVENT_HOOKS_HAVE_CALLBACK_DATA) || defined(RUBY_EVENT_VM)
|
361
|
+
rb_add_event_hook(event_handler, RUBY_EVENT_CALL|RUBY_EVENT_C_CALL, 0);
|
362
|
+
#else
|
363
|
+
rb_add_event_hook(event_handler, RUBY_EVENT_CALL|RUBY_EVENT_C_CALL);
|
364
|
+
#endif
|
365
|
+
|
366
|
+
bMethProfilerRunning = Qtrue;
|
367
|
+
return Qtrue;
|
368
|
+
}
|
369
|
+
|
370
|
+
static VALUE
|
371
|
+
methprofiler_teardown()
|
372
|
+
{
|
373
|
+
if (!bMethProfilerRunning)
|
374
|
+
return Qfalse;
|
375
|
+
|
376
|
+
rb_remove_event_hook(event_handler);
|
377
|
+
|
378
|
+
bMethProfilerRunning = Qfalse;
|
379
|
+
return Qtrue;
|
380
|
+
}
|
381
|
+
|
382
|
+
static void
|
383
|
+
trap_handler(int sig, siginfo_t *info, void *data) {
|
384
|
+
int i;
|
385
|
+
ucontext_t *uc = (ucontext_t *)data;
|
386
|
+
char **ip = uc_get_ip(uc);
|
387
|
+
|
388
|
+
// printf("signal: %d, addr: %p, ip: %p\n", signal, info->si_addr, *ip);
|
389
|
+
|
390
|
+
for (i=0; i<NUM_ORIG_BYTES; i++) {
|
391
|
+
if (orig_bytes[i].location == *ip-1) {
|
392
|
+
// restore original byte
|
393
|
+
orig_bytes[i].location[0] = orig_bytes[i].value;
|
394
|
+
|
395
|
+
// setup next breakpoint
|
396
|
+
orig_bytes[(i+1)%NUM_ORIG_BYTES].location[0] = '\xCC';
|
397
|
+
|
398
|
+
// first breakpoint is the notification
|
399
|
+
if (i == 0)
|
400
|
+
ProfilerRecord(sig, info, data);
|
401
|
+
|
402
|
+
// reset instruction pointer
|
403
|
+
*ip -= 1;
|
404
|
+
|
405
|
+
break;
|
406
|
+
}
|
407
|
+
}
|
408
|
+
}
|
409
|
+
|
410
|
+
static VALUE
|
411
|
+
objprofiler_setup()
|
412
|
+
{
|
413
|
+
if (bObjProfilerRunning)
|
414
|
+
return Qtrue;
|
415
|
+
|
416
|
+
int i;
|
417
|
+
struct sigaction sig = { .sa_sigaction = trap_handler, .sa_flags = SA_SIGINFO };
|
418
|
+
sigemptyset(&sig.sa_mask);
|
419
|
+
sigaction(SIGTRAP, &sig, NULL);
|
420
|
+
|
421
|
+
unprotect_page((char*)rb_newobj);
|
422
|
+
|
423
|
+
for (i=0; i<NUM_ORIG_BYTES; i++) {
|
424
|
+
orig_bytes[i].location = (char *)rb_newobj + i;
|
425
|
+
orig_bytes[i].value = ((unsigned char*)rb_newobj)[i];
|
426
|
+
orig_bytes[i].location[0] = '\xCC';
|
427
|
+
}
|
428
|
+
|
429
|
+
// setenv("CPUPROFILE_OBJECTS", "1", 1);
|
430
|
+
bObjProfilerRunning = Qtrue;
|
431
|
+
return Qtrue;
|
432
|
+
}
|
433
|
+
|
434
|
+
static VALUE
|
435
|
+
objprofiler_teardown()
|
436
|
+
{
|
437
|
+
if (!bObjProfilerRunning)
|
438
|
+
return Qfalse;
|
439
|
+
|
440
|
+
int i;
|
441
|
+
struct sigaction sig = { .sa_handler = SIG_IGN };
|
442
|
+
sigemptyset(&sig.sa_mask);
|
443
|
+
sigaction(SIGTRAP, &sig, NULL);
|
444
|
+
|
445
|
+
for (i=0; i<NUM_ORIG_BYTES; i++) {
|
446
|
+
orig_bytes[i].location[0] = orig_bytes[i].value;
|
447
|
+
}
|
448
|
+
|
449
|
+
// unsetenv("CPUPROFILE_OBJECTS");
|
450
|
+
bObjProfilerRunning = Qfalse;
|
451
|
+
return Qtrue;
|
452
|
+
}
|
453
|
+
|
454
|
+
/* Init */
|
455
|
+
|
456
|
+
static void
|
457
|
+
profiler_at_exit(VALUE self)
|
458
|
+
{
|
459
|
+
cpuprofiler_stop(self);
|
460
|
+
}
|
461
|
+
|
462
|
+
void
|
463
|
+
Init_perftools()
|
464
|
+
{
|
465
|
+
cPerfTools = rb_define_class("PerfTools", rb_cObject);
|
466
|
+
eError = rb_define_class_under(cPerfTools, "Error", rb_eStandardError);
|
467
|
+
cCpuProfiler = rb_define_class_under(cPerfTools, "CpuProfiler", rb_cObject);
|
468
|
+
|
469
|
+
Iallocate = rb_intern("allocate");
|
470
|
+
I__send__ = rb_intern("__send__");
|
471
|
+
Isend = rb_intern("send");
|
472
|
+
|
473
|
+
bMethProfilerRunning = bObjProfilerRunning = bProfilerRunning = Qfalse;
|
474
|
+
|
475
|
+
rb_define_singleton_method(cCpuProfiler, "running?", cpuprofiler_running_p, 0);
|
476
|
+
rb_define_singleton_method(cCpuProfiler, "start", cpuprofiler_start, 1);
|
477
|
+
rb_define_singleton_method(cCpuProfiler, "stop", cpuprofiler_stop, 0);
|
478
|
+
|
479
|
+
gc_hook = Data_Wrap_Struct(cCpuProfiler, cpuprofiler_gc_mark, NULL, NULL);
|
480
|
+
rb_global_variable(&gc_hook);
|
481
|
+
|
482
|
+
if (ProfilingIsEnabledForAllThreads()) { // profiler is already running?
|
483
|
+
bProfilerRunning = Qtrue;
|
484
|
+
|
485
|
+
if (getenv("CPUPROFILE_OBJECTS")) { // want to profile objects
|
486
|
+
objprofiler_setup();
|
487
|
+
} else if (getenv("CPUPROFILE_METHODS")) {
|
488
|
+
methprofiler_setup();
|
489
|
+
}
|
490
|
+
|
491
|
+
rb_set_end_proc(profiler_at_exit, 0); // make sure to cleanup before the VM shuts down
|
492
|
+
}
|
493
|
+
}
|