honkster-perftools.rb 0.5.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +177 -0
- data/bin/pprof.rb +4 -0
- data/ext/extconf.rb +116 -0
- data/ext/perftools.c +493 -0
- data/ext/src/google-perftools-1.6.tar.gz +0 -0
- data/patches/perftools-debug.patch +20 -0
- data/patches/perftools-frames.patch +13 -0
- data/patches/perftools-gc.patch +107 -0
- data/patches/perftools-notests.patch +15 -0
- data/patches/perftools-objects.patch +85 -0
- data/patches/perftools-osx.patch +13 -0
- data/patches/perftools-pprof.patch +76 -0
- data/patches/perftools-stddef.patch +34 -0
- data/patches/perftools.patch +266 -0
- data/perftools.rb.gemspec +21 -0
- metadata +70 -0
data/README.md
ADDED
@@ -0,0 +1,177 @@
|
|
1
|
+
# perftools.rb
|
2
|
+
|
3
|
+
google-perftools for ruby code
|
4
|
+
(c) 2010 Aman Gupta (tmm1)
|
5
|
+
http://www.ruby-lang.org/en/LICENSE.txt
|
6
|
+
|
7
|
+
## Usage (in a webapp)
|
8
|
+
|
9
|
+
Use [rack-perftools_profiler](https://github.com/bhb/rack-perftools_profiler):
|
10
|
+
|
11
|
+
require 'rack/perftools_profiler'
|
12
|
+
config.middleware.use ::Rack::PerftoolsProfiler, :default_printer => 'gif'
|
13
|
+
|
14
|
+
Simply add `profile=true` to profile a request:
|
15
|
+
|
16
|
+
curl -o 10_requests_to_homepage.gif "http://localhost:3000/homepage?profile=true×=10"
|
17
|
+
|
18
|
+
## Usage (from Ruby)
|
19
|
+
|
20
|
+
Run the profiler with a block:
|
21
|
+
|
22
|
+
require 'perftools'
|
23
|
+
PerfTools::CpuProfiler.start("/tmp/add_numbers_profile") do
|
24
|
+
5_000_000.times{ 1+2+3+4+5 }
|
25
|
+
end
|
26
|
+
|
27
|
+
Start and stop the profiler manually:
|
28
|
+
|
29
|
+
require 'perftools'
|
30
|
+
PerfTools::CpuProfiler.start("/tmp/add_numbers_profile")
|
31
|
+
5_000_000.times{ 1+2+3+4+5 }
|
32
|
+
PerfTools::CpuProfiler.stop
|
33
|
+
|
34
|
+
## Usage (externally)
|
35
|
+
|
36
|
+
Profile an existing ruby application without modifying it:
|
37
|
+
|
38
|
+
$ CPUPROFILE=/tmp/my_app_profile \
|
39
|
+
RUBYOPT="-r`gem which perftools | tail -1`" \
|
40
|
+
ruby my_app.rb
|
41
|
+
|
42
|
+
## Profiler Modes
|
43
|
+
|
44
|
+
The profiler can be run in one of many modes, set via an environment
|
45
|
+
variable before the library is loaded:
|
46
|
+
|
47
|
+
* `CPUPROFILE_REALTIME=1`
|
48
|
+
|
49
|
+
Use walltime instead of cputime profiling. This will capture all time spent in a method, even if it does not involve the CPU.
|
50
|
+
|
51
|
+
For example, `sleep()` is not expensive in terms of cputime, but very expensive in walltime. walltime will also show functions spending a lot of time in network i/o.
|
52
|
+
|
53
|
+
* `CPUPROFILE_OBJECTS=1`
|
54
|
+
|
55
|
+
Profile object allocations instead of cpu/wall time. Each sample represents one object created inside that function.
|
56
|
+
|
57
|
+
* `CPUPROFILE_METHODS=1`
|
58
|
+
|
59
|
+
Profile method calls. Each sample represents one method call made inside that function.
|
60
|
+
|
61
|
+
The sampling interval of the profiler can be adjusted to collect more
|
62
|
+
(for better profile detail) or fewer samples (for lower overhead):
|
63
|
+
|
64
|
+
* `CPUPROFILE_FREQUENCY=500`
|
65
|
+
|
66
|
+
Default sampling interval is 100 times a second. Valid range is 1-4000
|
67
|
+
|
68
|
+
## Reporting
|
69
|
+
|
70
|
+
pprof.rb --text /tmp/add_numbers_profile
|
71
|
+
|
72
|
+
pprof.rb --pdf /tmp/add_numbers_profile > /tmp/add_numbers_profile.pdf
|
73
|
+
|
74
|
+
pprof.rb --gif /tmp/add_numbers_profile > /tmp/add_numbers_profile.gif
|
75
|
+
|
76
|
+
pprof.rb --callgrind /tmp/add_numbers_profile > /tmp/add_numbers_profile.grind
|
77
|
+
kcachegrind /tmp/add_numbers_profile.grind
|
78
|
+
|
79
|
+
pprof.rb --gif --focus=Integer /tmp/add_numbers_profile > /tmp/add_numbers_custom.gif
|
80
|
+
|
81
|
+
pprof.rb --text --ignore=Gem /tmp/my_app_profile
|
82
|
+
|
83
|
+
|
84
|
+
For more options, see [pprof documentation](http://google-perftools.googlecode.com/svn/trunk/doc/cpuprofile.html#pprof)
|
85
|
+
|
86
|
+
|
87
|
+
### Examples
|
88
|
+
|
89
|
+
#### pprof.rb --text
|
90
|
+
|
91
|
+
Total: 1735 samples
|
92
|
+
1487 85.7% 85.7% 1735 100.0% Integer#times
|
93
|
+
248 14.3% 100.0% 248 14.3% Fixnum#+
|
94
|
+
|
95
|
+
#### pprof.rb --gif
|
96
|
+
|
97
|
+
* Simple [require 'rubygems'](http://perftools-rb.rubyforge.org/examples/rubygems.gif) profile
|
98
|
+
|
99
|
+
* Comparing redis-rb [with](http://perftools-rb.rubyforge.org/examples/redis-rb.gif) and [without](http://perftools-rb.rubyforge.org/examples/redis-rb-notimeout.gif) SystemTimer based socket timeouts
|
100
|
+
|
101
|
+
* [Sinatra](http://perftools-rb.rubyforge.org/examples/sinatra.gif) vs. [Merb](http://perftools-rb.rubyforge.org/examples/merb.gif) vs. [Rails](http://perftools-rb.rubyforge.org/examples/rails.gif)
|
102
|
+
|
103
|
+
* C-level profile of EventMachine + epoll + Ruby threads [before](http://perftools-rb.rubyforge.org/examples/eventmachine-epoll+nothreads.gif) and [after](http://perftools-rb.rubyforge.org/examples/eventmachine-epoll+threads.gif) a [6 line EM bugfix](http://timetobleed.com/6-line-eventmachine-bugfix-2x-faster-gc-1300-requestssec/)
|
104
|
+
|
105
|
+
* C-level profile of a [ruby/rails vm](http://perftools-rb.rubyforge.org/examples/ruby_interpreter.gif)
|
106
|
+
* 12% time spent in re_match_exec because of excessive calls to rb_str_sub_bang by Date.parse
|
107
|
+
|
108
|
+
|
109
|
+
## Installation
|
110
|
+
|
111
|
+
Just install the gem, which will download, patch and compile google-perftools for you:
|
112
|
+
|
113
|
+
sudo gem install perftools.rb
|
114
|
+
|
115
|
+
Or build your own gem:
|
116
|
+
|
117
|
+
git clone git://github.com/tmm1/perftools.rb
|
118
|
+
cd perftools.rb
|
119
|
+
gem build perftools.rb.gemspec
|
120
|
+
gem install perftools.rb
|
121
|
+
|
122
|
+
|
123
|
+
You'll also need graphviz to generate call graphs using dot:
|
124
|
+
|
125
|
+
sudo brew install graphviz ghostscript # osx
|
126
|
+
sudo apt-get install graphviz ps2pdf # debian/ubuntu
|
127
|
+
|
128
|
+
## Advantages over ruby-prof
|
129
|
+
|
130
|
+
* Sampling profiler
|
131
|
+
|
132
|
+
* perftools samples your process using setitimer() so it can be used in production with minimal overhead.
|
133
|
+
|
134
|
+
|
135
|
+
## Profiling the Ruby VM and C extensions
|
136
|
+
|
137
|
+
To profile C code, download and build an unpatched perftools (libunwind or ./configure --enable-frame-pointers required on x86_64).
|
138
|
+
|
139
|
+
Download:
|
140
|
+
|
141
|
+
wget http://google-perftools.googlecode.com/files/google-perftools-1.6.tar.gz
|
142
|
+
tar zxvf google-perftools-1.6.tar.gz
|
143
|
+
cd google-perftools-1.6
|
144
|
+
|
145
|
+
Compile:
|
146
|
+
|
147
|
+
./configure --prefix=/opt
|
148
|
+
make
|
149
|
+
sudo make install
|
150
|
+
|
151
|
+
Profile:
|
152
|
+
|
153
|
+
export LD_PRELOAD=/opt/lib/libprofiler.so # for linux
|
154
|
+
export DYLD_INSERT_LIBRARIES=/opt/lib/libprofiler.dylib # for osx
|
155
|
+
CPUPROFILE=/tmp/ruby_interpreter.profile ruby -e' 5_000_000.times{ "hello world" } '
|
156
|
+
|
157
|
+
Report:
|
158
|
+
|
159
|
+
pprof `which ruby` --text /tmp/ruby_interpreter.profile
|
160
|
+
|
161
|
+
|
162
|
+
## TODO
|
163
|
+
|
164
|
+
* Add support for heap profiling to find memory leaks (PerfTools::HeapProfiler)
|
165
|
+
* Allow both C and Ruby profiling
|
166
|
+
* Add setter for the sampling interval
|
167
|
+
|
168
|
+
|
169
|
+
## Resources
|
170
|
+
|
171
|
+
* [GoRuCo 2009 Lightning Talk on perftools.rb](http://goruco2009.confreaks.com/30-may-2009-18-35-rejectconf-various-presenters.html) @ 21:52
|
172
|
+
|
173
|
+
* [Ilya Grigorik's introduction to perftools.rb](http://www.igvita.com/2009/06/13/profiling-ruby-with-googles-perftools/)
|
174
|
+
|
175
|
+
* [Google Perftools](http://code.google.com/p/google-perftools/)
|
176
|
+
|
177
|
+
* [Analyzing profiles and interpreting different output formats](http://google-perftools.googlecode.com/svn/trunk/doc/cpuprofile.html#pprof)
|
data/bin/pprof.rb
ADDED
data/ext/extconf.rb
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
CWD = File.expand_path(File.dirname(__FILE__))
|
2
|
+
|
3
|
+
def sys(cmd)
|
4
|
+
puts " -- #{cmd}"
|
5
|
+
unless ret = xsystem(cmd)
|
6
|
+
raise "#{cmd} failed, please report to perftools@tmm1.net with pastie.org link to #{CWD}/mkmf.log and #{CWD}/src/google-perftools-1.6/config.log"
|
7
|
+
end
|
8
|
+
ret
|
9
|
+
end
|
10
|
+
|
11
|
+
require 'mkmf'
|
12
|
+
require 'fileutils'
|
13
|
+
|
14
|
+
if RUBY_VERSION >= "1.9"
|
15
|
+
begin
|
16
|
+
require "ruby_core_source"
|
17
|
+
rescue LoadError
|
18
|
+
require 'rubygems/user_interaction' # for 1.9.1
|
19
|
+
require 'rubygems/dependency_installer'
|
20
|
+
installer = Gem::DependencyInstaller.new
|
21
|
+
installer.install 'ruby_core_source'
|
22
|
+
|
23
|
+
Gem.refresh
|
24
|
+
Gem.activate('ruby_core_source') # for 1.9.1
|
25
|
+
|
26
|
+
require "ruby_core_source"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
perftools = File.basename('google-perftools-1.6.tar.gz')
|
31
|
+
dir = File.basename(perftools, '.tar.gz')
|
32
|
+
|
33
|
+
puts "(I'm about to compile google-perftools.. this will definitely take a while)"
|
34
|
+
ENV["PATCH_GET"] = '0'
|
35
|
+
|
36
|
+
Dir.chdir('src') do
|
37
|
+
FileUtils.rm_rf(dir) if File.exists?(dir)
|
38
|
+
|
39
|
+
sys("tar zpxvf #{perftools}")
|
40
|
+
Dir.chdir(dir) do
|
41
|
+
if ENV['DEV']
|
42
|
+
sys("git init")
|
43
|
+
sys("git add .")
|
44
|
+
sys("git commit -m 'initial source'")
|
45
|
+
end
|
46
|
+
|
47
|
+
[ ['perftools', true],
|
48
|
+
['perftools-notests', true],
|
49
|
+
['perftools-pprof', true],
|
50
|
+
['perftools-gc', true],
|
51
|
+
['perftools-osx', RUBY_PLATFORM =~ /darwin/],
|
52
|
+
['perftools-debug', true],
|
53
|
+
['perftools-objects', true],
|
54
|
+
['perftools-frames', true],
|
55
|
+
['perftools-stddef', true]
|
56
|
+
].each do |patch, apply|
|
57
|
+
if apply
|
58
|
+
sys("patch -p1 < ../../../patches/#{patch}.patch")
|
59
|
+
sys("git commit -am '#{patch}'") if ENV['DEV']
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
sys("sed -i -e 's,SpinLock,ISpinLock,g' src/*.cc src/*.h src/base/*.cc src/base/*.h")
|
64
|
+
sys("git commit -am 'rename spinlock'") if ENV['DEV']
|
65
|
+
end
|
66
|
+
|
67
|
+
Dir.chdir(dir) do
|
68
|
+
FileUtils.cp 'src/pprof', '../../../bin/'
|
69
|
+
FileUtils.chmod 0755, '../../../bin/pprof'
|
70
|
+
end
|
71
|
+
|
72
|
+
Dir.chdir(dir) do
|
73
|
+
if RUBY_PLATFORM =~ /darwin10/
|
74
|
+
ENV['CFLAGS'] = ENV['CXXFLAGS'] = '-D_XOPEN_SOURCE'
|
75
|
+
end
|
76
|
+
sys("./configure --disable-heap-profiler --disable-heap-checker --disable-debugalloc --disable-shared")
|
77
|
+
sys("make")
|
78
|
+
FileUtils.cp '.libs/libprofiler.a', '../../librubyprofiler.a'
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
$LIBPATH << CWD
|
83
|
+
$libs = append_library($libs, 'rubyprofiler')
|
84
|
+
def add_define(name)
|
85
|
+
$defs.push("-D#{name}")
|
86
|
+
end
|
87
|
+
|
88
|
+
case RUBY_PLATFORM
|
89
|
+
when /darwin/, /linux/, /freebsd/
|
90
|
+
CONFIG['LDSHARED'] = "$(CXX) " + CONFIG['LDSHARED'].split[1..-1].join(' ')
|
91
|
+
end
|
92
|
+
|
93
|
+
if RUBY_VERSION >= "1.9"
|
94
|
+
add_define 'RUBY19'
|
95
|
+
|
96
|
+
hdrs = proc {
|
97
|
+
have_header("method.h") # exists on 1.9.2
|
98
|
+
have_header("vm_core.h") and
|
99
|
+
have_header("iseq.h") and
|
100
|
+
have_header("insns.inc") and
|
101
|
+
have_header("insns_info.inc")
|
102
|
+
}
|
103
|
+
|
104
|
+
unless Ruby_core_source::create_makefile_with_core(hdrs, "perftools")
|
105
|
+
STDERR.puts "\n\n"
|
106
|
+
STDERR.puts "***************************************************************************************"
|
107
|
+
STDERR.puts "********************** Ruby_core_source::create_makefile FAILED ***********************"
|
108
|
+
STDERR.puts "***************************************************************************************"
|
109
|
+
exit(1)
|
110
|
+
end
|
111
|
+
else
|
112
|
+
add_define 'RUBY18'
|
113
|
+
|
114
|
+
have_func('rb_during_gc', 'ruby.h')
|
115
|
+
create_makefile 'perftools'
|
116
|
+
end
|
data/ext/perftools.c
ADDED
@@ -0,0 +1,493 @@
|
|
1
|
+
#ifndef _GNU_SOURCE
|
2
|
+
#define _GNU_SOURCE
|
3
|
+
#endif
|
4
|
+
|
5
|
+
#include <ruby.h>
|
6
|
+
|
7
|
+
#ifndef _XOPEN_SOURCE
|
8
|
+
#define _XOPEN_SOURCE 600
|
9
|
+
#endif
|
10
|
+
|
11
|
+
#include <assert.h>
|
12
|
+
|
13
|
+
void ProfilerGcMark(void (*cb)(VALUE));
|
14
|
+
int ProfilerStart(const char*);
|
15
|
+
void ProfilerStop();
|
16
|
+
void ProfilerFlush();
|
17
|
+
void ProfilerRecord(int, void*, void*);
|
18
|
+
int ProfilingIsEnabledForAllThreads();
|
19
|
+
|
20
|
+
static VALUE Iallocate;
|
21
|
+
static VALUE I__send__;
|
22
|
+
static VALUE Isend;
|
23
|
+
|
24
|
+
#define SAVE_FRAME() \
|
25
|
+
if (method && method != I__send__ && method != Isend) { \
|
26
|
+
if (self && FL_TEST(klass, FL_SINGLETON) && (BUILTIN_TYPE(self) == T_CLASS || BUILTIN_TYPE(self) == T_MODULE)) \
|
27
|
+
result[depth++] = (void*) self; \
|
28
|
+
else \
|
29
|
+
result[depth++] = 0; \
|
30
|
+
\
|
31
|
+
result[depth++] = (void*) klass; \
|
32
|
+
result[depth++] = (void*) (method == ID_ALLOCATOR ? Iallocate : method); \
|
33
|
+
}
|
34
|
+
|
35
|
+
#ifdef RUBY18
|
36
|
+
#include <env.h>
|
37
|
+
#include <node.h>
|
38
|
+
#include <setjmp.h>
|
39
|
+
#include <signal.h>
|
40
|
+
|
41
|
+
static jmp_buf saved_location;
|
42
|
+
static sig_t saved_handler = NULL;
|
43
|
+
|
44
|
+
void
|
45
|
+
segv_handler(int sig)
|
46
|
+
{
|
47
|
+
fprintf(stderr, "*** Caught a segfault during stack trace sampling!\n");
|
48
|
+
assert(saved_handler);
|
49
|
+
_longjmp(saved_location, 1);
|
50
|
+
}
|
51
|
+
|
52
|
+
int
|
53
|
+
rb_stack_trace(void** result, int max_depth)
|
54
|
+
{
|
55
|
+
struct FRAME *frame = ruby_frame;
|
56
|
+
NODE *n;
|
57
|
+
|
58
|
+
VALUE klass, self;
|
59
|
+
ID method;
|
60
|
+
int depth = 0;
|
61
|
+
|
62
|
+
if (max_depth == 0)
|
63
|
+
return 0;
|
64
|
+
|
65
|
+
#ifdef HAVE_RB_DURING_GC
|
66
|
+
if (rb_during_gc()) {
|
67
|
+
result[0] = rb_gc;
|
68
|
+
return 1;
|
69
|
+
}
|
70
|
+
#endif
|
71
|
+
|
72
|
+
// should not be possible to get here and already have a saved signal handler
|
73
|
+
assert(!saved_handler);
|
74
|
+
|
75
|
+
// ruby_frame is occasionally inconsistent, so temporarily catch segfaults
|
76
|
+
saved_handler = signal(SIGSEGV, segv_handler);
|
77
|
+
if (_setjmp(saved_location)) {
|
78
|
+
signal(SIGSEGV, saved_handler);
|
79
|
+
saved_handler = NULL;
|
80
|
+
return 0;
|
81
|
+
}
|
82
|
+
|
83
|
+
/*
|
84
|
+
// XXX does it make sense to track allocations or not?
|
85
|
+
if (frame->last_func == ID_ALLOCATOR) {
|
86
|
+
frame = frame->prev;
|
87
|
+
}
|
88
|
+
|
89
|
+
// XXX SIGPROF can come in while ruby_frame is in an inconsistent state (rb_call0), so we ignore the top-most frame
|
90
|
+
if (frame->last_func && frame->last_class) {
|
91
|
+
self = frame->self;
|
92
|
+
klass = frame->last_class;
|
93
|
+
method = frame->last_func;
|
94
|
+
SAVE_FRAME();
|
95
|
+
}
|
96
|
+
*/
|
97
|
+
|
98
|
+
for (; frame && (n = frame->node); frame = frame->prev) {
|
99
|
+
if (frame->prev && frame->prev->last_func) {
|
100
|
+
if (frame->prev->node == n) {
|
101
|
+
if (frame->prev->last_func == frame->last_func) continue;
|
102
|
+
}
|
103
|
+
|
104
|
+
if (depth+3 > max_depth)
|
105
|
+
break;
|
106
|
+
|
107
|
+
self = frame->prev->self;
|
108
|
+
klass = frame->prev->last_class;
|
109
|
+
method = frame->prev->last_func;
|
110
|
+
SAVE_FRAME();
|
111
|
+
}
|
112
|
+
}
|
113
|
+
|
114
|
+
signal(SIGSEGV, saved_handler);
|
115
|
+
saved_handler = NULL;
|
116
|
+
|
117
|
+
assert(depth <= max_depth);
|
118
|
+
return depth;
|
119
|
+
}
|
120
|
+
#endif
|
121
|
+
|
122
|
+
#ifdef RUBY19
|
123
|
+
#include <vm_core.h>
|
124
|
+
#include <iseq.h>
|
125
|
+
|
126
|
+
int
|
127
|
+
rb_stack_trace(void** result, int max_depth)
|
128
|
+
{
|
129
|
+
rb_thread_t *th = GET_THREAD();
|
130
|
+
rb_control_frame_t *cfp = th->cfp;
|
131
|
+
rb_control_frame_t *end_cfp = RUBY_VM_END_CONTROL_FRAME(th);
|
132
|
+
|
133
|
+
VALUE klass, self;
|
134
|
+
ID method;
|
135
|
+
int depth = 0;
|
136
|
+
|
137
|
+
if (max_depth == 0)
|
138
|
+
return 0;
|
139
|
+
|
140
|
+
if (rb_during_gc()) {
|
141
|
+
result[0] = rb_gc;
|
142
|
+
return 1;
|
143
|
+
}
|
144
|
+
|
145
|
+
while (RUBY_VM_VALID_CONTROL_FRAME_P(cfp, end_cfp) && depth+3 <= max_depth) {
|
146
|
+
rb_iseq_t *iseq = cfp->iseq;
|
147
|
+
|
148
|
+
if (iseq && iseq->type == ISEQ_TYPE_METHOD) {
|
149
|
+
self = 0; // maybe use cfp->self here, but iseq->self is a ISeq ruby obj
|
150
|
+
klass = iseq->klass;
|
151
|
+
method = iseq->defined_method_id;
|
152
|
+
SAVE_FRAME();
|
153
|
+
}
|
154
|
+
|
155
|
+
if (depth+3 > max_depth)
|
156
|
+
break;
|
157
|
+
|
158
|
+
switch (VM_FRAME_TYPE(cfp)) {
|
159
|
+
case VM_FRAME_MAGIC_METHOD:
|
160
|
+
case VM_FRAME_MAGIC_CFUNC:
|
161
|
+
self = cfp->self;
|
162
|
+
#ifdef HAVE_METHOD_H
|
163
|
+
if (!cfp->me) break;
|
164
|
+
|
165
|
+
klass = cfp->me->klass;
|
166
|
+
method = cfp->me->called_id;
|
167
|
+
#else
|
168
|
+
klass = cfp->method_class;
|
169
|
+
method = cfp->method_id;
|
170
|
+
#endif
|
171
|
+
SAVE_FRAME();
|
172
|
+
break;
|
173
|
+
}
|
174
|
+
|
175
|
+
cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(cfp);
|
176
|
+
}
|
177
|
+
|
178
|
+
assert(depth <= max_depth);
|
179
|
+
return depth;
|
180
|
+
}
|
181
|
+
|
182
|
+
#if 0
|
183
|
+
void
|
184
|
+
rb_dump_stack()
|
185
|
+
{
|
186
|
+
rb_thread_t *th = GET_THREAD();
|
187
|
+
rb_control_frame_t *cfp = th->cfp;
|
188
|
+
rb_control_frame_t *end_cfp = RUBY_VM_END_CONTROL_FRAME(th);
|
189
|
+
ID func;
|
190
|
+
|
191
|
+
printf("\n\n*********************\n");
|
192
|
+
while (RUBY_VM_VALID_CONTROL_FRAME_P(cfp, end_cfp)) {
|
193
|
+
printf("cfp (%p):\n", cfp);
|
194
|
+
printf(" type: 0x%x\n", VM_FRAME_TYPE(cfp));
|
195
|
+
printf(" pc: %p\n", cfp->pc);
|
196
|
+
printf(" iseq: %p\n", cfp->iseq);
|
197
|
+
if (cfp->iseq) {
|
198
|
+
printf(" type: %d\n", FIX2INT(cfp->iseq->type));
|
199
|
+
printf(" self: %p\n", cfp->iseq->self);
|
200
|
+
printf(" klass: %p (%s)\n", cfp->iseq->klass, cfp->iseq->klass ? rb_class2name(cfp->iseq->klass) : "");
|
201
|
+
printf(" method: %p (%s)\n", cfp->iseq->defined_method_id, cfp->iseq->defined_method_id ? rb_id2name(cfp->iseq->defined_method_id) : "");
|
202
|
+
}
|
203
|
+
printf(" self: %p\n", cfp->self);
|
204
|
+
printf(" klass: %p (%s)\n", cfp->method_class, cfp->method_class ? rb_class2name(cfp->method_class) : "");
|
205
|
+
printf(" method: %p (%s)\n", cfp->method_id, cfp->method_id ? rb_id2name(cfp->method_id) : "");
|
206
|
+
|
207
|
+
cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(cfp);
|
208
|
+
printf("\n");
|
209
|
+
}
|
210
|
+
printf("*********************\n\n");
|
211
|
+
}
|
212
|
+
#endif
|
213
|
+
#endif
|
214
|
+
|
215
|
+
static VALUE objprofiler_setup();
|
216
|
+
static VALUE objprofiler_teardown();
|
217
|
+
static VALUE methprofiler_setup();
|
218
|
+
static VALUE methprofiler_teardown();
|
219
|
+
|
220
|
+
/* CpuProfiler */
|
221
|
+
|
222
|
+
static VALUE cPerfTools;
|
223
|
+
static VALUE cCpuProfiler;
|
224
|
+
static VALUE eError;
|
225
|
+
static VALUE bProfilerRunning;
|
226
|
+
static VALUE gc_hook;
|
227
|
+
|
228
|
+
static VALUE
|
229
|
+
cpuprofiler_running_p(VALUE self)
|
230
|
+
{
|
231
|
+
return bProfilerRunning;
|
232
|
+
}
|
233
|
+
|
234
|
+
static VALUE
|
235
|
+
cpuprofiler_stop(VALUE self)
|
236
|
+
{
|
237
|
+
if (!bProfilerRunning)
|
238
|
+
return Qfalse;
|
239
|
+
|
240
|
+
bProfilerRunning = Qfalse;
|
241
|
+
objprofiler_teardown();
|
242
|
+
methprofiler_teardown();
|
243
|
+
ProfilerStop();
|
244
|
+
ProfilerFlush();
|
245
|
+
|
246
|
+
return Qtrue;
|
247
|
+
}
|
248
|
+
|
249
|
+
static VALUE
|
250
|
+
cpuprofiler_start(VALUE self, VALUE filename)
|
251
|
+
{
|
252
|
+
StringValue(filename);
|
253
|
+
|
254
|
+
if (bProfilerRunning)
|
255
|
+
rb_raise(eError, "profiler is already running");
|
256
|
+
|
257
|
+
if (getenv("CPUPROFILE_OBJECTS"))
|
258
|
+
objprofiler_setup();
|
259
|
+
else if (getenv("CPUPROFILE_METHODS"))
|
260
|
+
methprofiler_setup();
|
261
|
+
|
262
|
+
if (ProfilerStart(RSTRING_PTR(filename))) {
|
263
|
+
bProfilerRunning = Qtrue;
|
264
|
+
} else {
|
265
|
+
rb_raise(eError, "profiler could not be started");
|
266
|
+
}
|
267
|
+
|
268
|
+
if (rb_block_given_p()) {
|
269
|
+
rb_yield(Qnil);
|
270
|
+
cpuprofiler_stop(self);
|
271
|
+
}
|
272
|
+
|
273
|
+
return Qtrue;
|
274
|
+
}
|
275
|
+
|
276
|
+
static void
|
277
|
+
cpuprofiler_gc_mark()
|
278
|
+
{
|
279
|
+
ProfilerGcMark(rb_gc_mark);
|
280
|
+
}
|
281
|
+
|
282
|
+
/* ObjProfiler */
|
283
|
+
|
284
|
+
#include <assert.h>
|
285
|
+
#include <ucontext.h>
|
286
|
+
#include <unistd.h>
|
287
|
+
#include <signal.h>
|
288
|
+
#include <stdio.h>
|
289
|
+
#include <stdlib.h>
|
290
|
+
#include <string.h>
|
291
|
+
#include <sys/mman.h>
|
292
|
+
|
293
|
+
static VALUE bObjProfilerRunning;
|
294
|
+
static VALUE bMethProfilerRunning;
|
295
|
+
#define NUM_ORIG_BYTES 2
|
296
|
+
|
297
|
+
struct {
|
298
|
+
char *location;
|
299
|
+
unsigned char value;
|
300
|
+
} orig_bytes[NUM_ORIG_BYTES];
|
301
|
+
|
302
|
+
static inline void *
|
303
|
+
page_align(void *addr) {
|
304
|
+
assert(addr != NULL);
|
305
|
+
return (void *)((size_t)addr & ~(0xFFFF));
|
306
|
+
}
|
307
|
+
|
308
|
+
static void
|
309
|
+
unprotect_page(char *addr) {
|
310
|
+
assert(addr != NULL);
|
311
|
+
char *aligned_addr = page_align(addr);
|
312
|
+
if (mprotect(aligned_addr, (addr - aligned_addr), PROT_READ|PROT_WRITE|PROT_EXEC) != 0)
|
313
|
+
perror("mprotect");
|
314
|
+
}
|
315
|
+
|
316
|
+
static inline char**
|
317
|
+
uc_get_ip(ucontext_t *uc) {
|
318
|
+
# if defined(__FreeBSD__)
|
319
|
+
# ifdef __i386__
|
320
|
+
# define program_counter uc_mcontext.mc_eip
|
321
|
+
# else
|
322
|
+
# define program_counter uc_mcontext.mc_rip
|
323
|
+
# endif
|
324
|
+
# elif defined(__dietlibc__)
|
325
|
+
# define program_counter uc_mcontext.rip
|
326
|
+
# elif defined(__APPLE__)
|
327
|
+
# ifdef __LP64__
|
328
|
+
# define program_counter uc_mcontext->__ss.__rip
|
329
|
+
# else
|
330
|
+
# define program_counter uc_mcontext->__ss.__eip
|
331
|
+
# endif
|
332
|
+
# else
|
333
|
+
# ifdef REG_RIP
|
334
|
+
# define program_counter uc_mcontext.gregs[REG_RIP]
|
335
|
+
# else
|
336
|
+
# define program_counter uc_mcontext.gregs[REG_EIP]
|
337
|
+
# endif
|
338
|
+
# endif
|
339
|
+
return (char**)&uc->program_counter;
|
340
|
+
}
|
341
|
+
|
342
|
+
#if defined(RB_EVENT_HOOKS_HAVE_CALLBACK_DATA) || defined(RUBY_EVENT_VM)
|
343
|
+
static void
|
344
|
+
event_handler(rb_event_flag_t event, VALUE data, VALUE self, ID id, VALUE klass) {
|
345
|
+
ProfilerRecord(0, NULL, NULL);
|
346
|
+
}
|
347
|
+
#else
|
348
|
+
static void
|
349
|
+
event_handler(rb_event_t event, NODE *node, VALUE self, ID id, VALUE klass) {
|
350
|
+
ProfilerRecord(0, NULL, NULL);
|
351
|
+
}
|
352
|
+
#endif
|
353
|
+
|
354
|
+
static VALUE
|
355
|
+
methprofiler_setup()
|
356
|
+
{
|
357
|
+
if (bMethProfilerRunning)
|
358
|
+
return Qtrue;
|
359
|
+
|
360
|
+
#if defined(RB_EVENT_HOOKS_HAVE_CALLBACK_DATA) || defined(RUBY_EVENT_VM)
|
361
|
+
rb_add_event_hook(event_handler, RUBY_EVENT_CALL|RUBY_EVENT_C_CALL, 0);
|
362
|
+
#else
|
363
|
+
rb_add_event_hook(event_handler, RUBY_EVENT_CALL|RUBY_EVENT_C_CALL);
|
364
|
+
#endif
|
365
|
+
|
366
|
+
bMethProfilerRunning = Qtrue;
|
367
|
+
return Qtrue;
|
368
|
+
}
|
369
|
+
|
370
|
+
static VALUE
|
371
|
+
methprofiler_teardown()
|
372
|
+
{
|
373
|
+
if (!bMethProfilerRunning)
|
374
|
+
return Qfalse;
|
375
|
+
|
376
|
+
rb_remove_event_hook(event_handler);
|
377
|
+
|
378
|
+
bMethProfilerRunning = Qfalse;
|
379
|
+
return Qtrue;
|
380
|
+
}
|
381
|
+
|
382
|
+
static void
|
383
|
+
trap_handler(int sig, siginfo_t *info, void *data) {
|
384
|
+
int i;
|
385
|
+
ucontext_t *uc = (ucontext_t *)data;
|
386
|
+
char **ip = uc_get_ip(uc);
|
387
|
+
|
388
|
+
// printf("signal: %d, addr: %p, ip: %p\n", signal, info->si_addr, *ip);
|
389
|
+
|
390
|
+
for (i=0; i<NUM_ORIG_BYTES; i++) {
|
391
|
+
if (orig_bytes[i].location == *ip-1) {
|
392
|
+
// restore original byte
|
393
|
+
orig_bytes[i].location[0] = orig_bytes[i].value;
|
394
|
+
|
395
|
+
// setup next breakpoint
|
396
|
+
orig_bytes[(i+1)%NUM_ORIG_BYTES].location[0] = '\xCC';
|
397
|
+
|
398
|
+
// first breakpoint is the notification
|
399
|
+
if (i == 0)
|
400
|
+
ProfilerRecord(sig, info, data);
|
401
|
+
|
402
|
+
// reset instruction pointer
|
403
|
+
*ip -= 1;
|
404
|
+
|
405
|
+
break;
|
406
|
+
}
|
407
|
+
}
|
408
|
+
}
|
409
|
+
|
410
|
+
static VALUE
|
411
|
+
objprofiler_setup()
|
412
|
+
{
|
413
|
+
if (bObjProfilerRunning)
|
414
|
+
return Qtrue;
|
415
|
+
|
416
|
+
int i;
|
417
|
+
struct sigaction sig = { .sa_sigaction = trap_handler, .sa_flags = SA_SIGINFO };
|
418
|
+
sigemptyset(&sig.sa_mask);
|
419
|
+
sigaction(SIGTRAP, &sig, NULL);
|
420
|
+
|
421
|
+
unprotect_page((char*)rb_newobj);
|
422
|
+
|
423
|
+
for (i=0; i<NUM_ORIG_BYTES; i++) {
|
424
|
+
orig_bytes[i].location = (char *)rb_newobj + i;
|
425
|
+
orig_bytes[i].value = ((unsigned char*)rb_newobj)[i];
|
426
|
+
orig_bytes[i].location[0] = '\xCC';
|
427
|
+
}
|
428
|
+
|
429
|
+
// setenv("CPUPROFILE_OBJECTS", "1", 1);
|
430
|
+
bObjProfilerRunning = Qtrue;
|
431
|
+
return Qtrue;
|
432
|
+
}
|
433
|
+
|
434
|
+
static VALUE
|
435
|
+
objprofiler_teardown()
|
436
|
+
{
|
437
|
+
if (!bObjProfilerRunning)
|
438
|
+
return Qfalse;
|
439
|
+
|
440
|
+
int i;
|
441
|
+
struct sigaction sig = { .sa_handler = SIG_IGN };
|
442
|
+
sigemptyset(&sig.sa_mask);
|
443
|
+
sigaction(SIGTRAP, &sig, NULL);
|
444
|
+
|
445
|
+
for (i=0; i<NUM_ORIG_BYTES; i++) {
|
446
|
+
orig_bytes[i].location[0] = orig_bytes[i].value;
|
447
|
+
}
|
448
|
+
|
449
|
+
// unsetenv("CPUPROFILE_OBJECTS");
|
450
|
+
bObjProfilerRunning = Qfalse;
|
451
|
+
return Qtrue;
|
452
|
+
}
|
453
|
+
|
454
|
+
/* Init */
|
455
|
+
|
456
|
+
static void
|
457
|
+
profiler_at_exit(VALUE self)
|
458
|
+
{
|
459
|
+
cpuprofiler_stop(self);
|
460
|
+
}
|
461
|
+
|
462
|
+
void
|
463
|
+
Init_perftools()
|
464
|
+
{
|
465
|
+
cPerfTools = rb_define_class("PerfTools", rb_cObject);
|
466
|
+
eError = rb_define_class_under(cPerfTools, "Error", rb_eStandardError);
|
467
|
+
cCpuProfiler = rb_define_class_under(cPerfTools, "CpuProfiler", rb_cObject);
|
468
|
+
|
469
|
+
Iallocate = rb_intern("allocate");
|
470
|
+
I__send__ = rb_intern("__send__");
|
471
|
+
Isend = rb_intern("send");
|
472
|
+
|
473
|
+
bMethProfilerRunning = bObjProfilerRunning = bProfilerRunning = Qfalse;
|
474
|
+
|
475
|
+
rb_define_singleton_method(cCpuProfiler, "running?", cpuprofiler_running_p, 0);
|
476
|
+
rb_define_singleton_method(cCpuProfiler, "start", cpuprofiler_start, 1);
|
477
|
+
rb_define_singleton_method(cCpuProfiler, "stop", cpuprofiler_stop, 0);
|
478
|
+
|
479
|
+
gc_hook = Data_Wrap_Struct(cCpuProfiler, cpuprofiler_gc_mark, NULL, NULL);
|
480
|
+
rb_global_variable(&gc_hook);
|
481
|
+
|
482
|
+
if (ProfilingIsEnabledForAllThreads()) { // profiler is already running?
|
483
|
+
bProfilerRunning = Qtrue;
|
484
|
+
|
485
|
+
if (getenv("CPUPROFILE_OBJECTS")) { // want to profile objects
|
486
|
+
objprofiler_setup();
|
487
|
+
} else if (getenv("CPUPROFILE_METHODS")) {
|
488
|
+
methprofiler_setup();
|
489
|
+
}
|
490
|
+
|
491
|
+
rb_set_end_proc(profiler_at_exit, 0); // make sure to cleanup before the VM shuts down
|
492
|
+
}
|
493
|
+
}
|