honkster-perftools.rb 0.5.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,177 @@
1
+ # perftools.rb
2
+
3
+ google-perftools for ruby code
4
+ (c) 2010 Aman Gupta (tmm1)
5
+ http://www.ruby-lang.org/en/LICENSE.txt
6
+
7
+ ## Usage (in a webapp)
8
+
9
+ Use [rack-perftools_profiler](https://github.com/bhb/rack-perftools_profiler):
10
+
11
+ require 'rack/perftools_profiler'
12
+ config.middleware.use ::Rack::PerftoolsProfiler, :default_printer => 'gif'
13
+
14
+ Simply add `profile=true` to profile a request:
15
+
16
+ curl -o 10_requests_to_homepage.gif "http://localhost:3000/homepage?profile=true&times=10"
17
+
18
+ ## Usage (from Ruby)
19
+
20
+ Run the profiler with a block:
21
+
22
+ require 'perftools'
23
+ PerfTools::CpuProfiler.start("/tmp/add_numbers_profile") do
24
+ 5_000_000.times{ 1+2+3+4+5 }
25
+ end
26
+
27
+ Start and stop the profiler manually:
28
+
29
+ require 'perftools'
30
+ PerfTools::CpuProfiler.start("/tmp/add_numbers_profile")
31
+ 5_000_000.times{ 1+2+3+4+5 }
32
+ PerfTools::CpuProfiler.stop
33
+
34
+ ## Usage (externally)
35
+
36
+ Profile an existing ruby application without modifying it:
37
+
38
+ $ CPUPROFILE=/tmp/my_app_profile \
39
+ RUBYOPT="-r`gem which perftools | tail -1`" \
40
+ ruby my_app.rb
41
+
42
+ ## Profiler Modes
43
+
44
+ The profiler can be run in one of many modes, set via an environment
45
+ variable before the library is loaded:
46
+
47
+ * `CPUPROFILE_REALTIME=1`
48
+
49
+ Use walltime instead of cputime profiling. This will capture all time spent in a method, even if it does not involve the CPU.
50
+
51
+ For example, `sleep()` is not expensive in terms of cputime, but very expensive in walltime. walltime will also show functions spending a lot of time in network i/o.
52
+
53
+ * `CPUPROFILE_OBJECTS=1`
54
+
55
+ Profile object allocations instead of cpu/wall time. Each sample represents one object created inside that function.
56
+
57
+ * `CPUPROFILE_METHODS=1`
58
+
59
+ Profile method calls. Each sample represents one method call made inside that function.
60
+
61
+ The sampling interval of the profiler can be adjusted to collect more
62
+ (for better profile detail) or fewer samples (for lower overhead):
63
+
64
+ * `CPUPROFILE_FREQUENCY=500`
65
+
66
+ Default sampling interval is 100 times a second. Valid range is 1-4000
67
+
68
+ ## Reporting
69
+
70
+ pprof.rb --text /tmp/add_numbers_profile
71
+
72
+ pprof.rb --pdf /tmp/add_numbers_profile > /tmp/add_numbers_profile.pdf
73
+
74
+ pprof.rb --gif /tmp/add_numbers_profile > /tmp/add_numbers_profile.gif
75
+
76
+ pprof.rb --callgrind /tmp/add_numbers_profile > /tmp/add_numbers_profile.grind
77
+ kcachegrind /tmp/add_numbers_profile.grind
78
+
79
+ pprof.rb --gif --focus=Integer /tmp/add_numbers_profile > /tmp/add_numbers_custom.gif
80
+
81
+ pprof.rb --text --ignore=Gem /tmp/my_app_profile
82
+
83
+
84
+ For more options, see [pprof documentation](http://google-perftools.googlecode.com/svn/trunk/doc/cpuprofile.html#pprof)
85
+
86
+
87
+ ### Examples
88
+
89
+ #### pprof.rb --text
90
+
91
+ Total: 1735 samples
92
+ 1487 85.7% 85.7% 1735 100.0% Integer#times
93
+ 248 14.3% 100.0% 248 14.3% Fixnum#+
94
+
95
+ #### pprof.rb --gif
96
+
97
+ * Simple [require 'rubygems'](http://perftools-rb.rubyforge.org/examples/rubygems.gif) profile
98
+
99
+ * Comparing redis-rb [with](http://perftools-rb.rubyforge.org/examples/redis-rb.gif) and [without](http://perftools-rb.rubyforge.org/examples/redis-rb-notimeout.gif) SystemTimer based socket timeouts
100
+
101
+ * [Sinatra](http://perftools-rb.rubyforge.org/examples/sinatra.gif) vs. [Merb](http://perftools-rb.rubyforge.org/examples/merb.gif) vs. [Rails](http://perftools-rb.rubyforge.org/examples/rails.gif)
102
+
103
+ * C-level profile of EventMachine + epoll + Ruby threads [before](http://perftools-rb.rubyforge.org/examples/eventmachine-epoll+nothreads.gif) and [after](http://perftools-rb.rubyforge.org/examples/eventmachine-epoll+threads.gif) a [6 line EM bugfix](http://timetobleed.com/6-line-eventmachine-bugfix-2x-faster-gc-1300-requestssec/)
104
+
105
+ * C-level profile of a [ruby/rails vm](http://perftools-rb.rubyforge.org/examples/ruby_interpreter.gif)
106
+ * 12% time spent in re_match_exec because of excessive calls to rb_str_sub_bang by Date.parse
107
+
108
+
109
+ ## Installation
110
+
111
+ Just install the gem, which will download, patch and compile google-perftools for you:
112
+
113
+ sudo gem install perftools.rb
114
+
115
+ Or build your own gem:
116
+
117
+ git clone git://github.com/tmm1/perftools.rb
118
+ cd perftools.rb
119
+ gem build perftools.rb.gemspec
120
+ gem install perftools.rb
121
+
122
+
123
+ You'll also need graphviz to generate call graphs using dot:
124
+
125
+ sudo brew install graphviz ghostscript # osx
126
+ sudo apt-get install graphviz ps2pdf # debian/ubuntu
127
+
128
+ ## Advantages over ruby-prof
129
+
130
+ * Sampling profiler
131
+
132
+ * perftools samples your process using setitimer() so it can be used in production with minimal overhead.
133
+
134
+
135
+ ## Profiling the Ruby VM and C extensions
136
+
137
+ To profile C code, download and build an unpatched perftools (libunwind or ./configure --enable-frame-pointers required on x86_64).
138
+
139
+ Download:
140
+
141
+ wget http://google-perftools.googlecode.com/files/google-perftools-1.6.tar.gz
142
+ tar zxvf google-perftools-1.6.tar.gz
143
+ cd google-perftools-1.6
144
+
145
+ Compile:
146
+
147
+ ./configure --prefix=/opt
148
+ make
149
+ sudo make install
150
+
151
+ Profile:
152
+
153
+ export LD_PRELOAD=/opt/lib/libprofiler.so # for linux
154
+ export DYLD_INSERT_LIBRARIES=/opt/lib/libprofiler.dylib # for osx
155
+ CPUPROFILE=/tmp/ruby_interpreter.profile ruby -e' 5_000_000.times{ "hello world" } '
156
+
157
+ Report:
158
+
159
+ pprof `which ruby` --text /tmp/ruby_interpreter.profile
160
+
161
+
162
+ ## TODO
163
+
164
+ * Add support for heap profiling to find memory leaks (PerfTools::HeapProfiler)
165
+ * Allow both C and Ruby profiling
166
+ * Add setter for the sampling interval
167
+
168
+
169
+ ## Resources
170
+
171
+ * [GoRuCo 2009 Lightning Talk on perftools.rb](http://goruco2009.confreaks.com/30-may-2009-18-35-rejectconf-various-presenters.html) @ 21:52
172
+
173
+ * [Ilya Grigorik's introduction to perftools.rb](http://www.igvita.com/2009/06/13/profiling-ruby-with-googles-perftools/)
174
+
175
+ * [Google Perftools](http://code.google.com/p/google-perftools/)
176
+
177
+ * [Analyzing profiles and interpreting different output formats](http://google-perftools.googlecode.com/svn/trunk/doc/cpuprofile.html#pprof)
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+ require 'rbconfig'
3
+ cmd = "#{File.dirname(__FILE__)}/pprof #{Config::CONFIG['bindir']}/#{Config::CONFIG['ruby_install_name']} #{ARGV.join(" ")}"
4
+ exec(cmd)
@@ -0,0 +1,116 @@
1
+ CWD = File.expand_path(File.dirname(__FILE__))
2
+
3
+ def sys(cmd)
4
+ puts " -- #{cmd}"
5
+ unless ret = xsystem(cmd)
6
+ raise "#{cmd} failed, please report to perftools@tmm1.net with pastie.org link to #{CWD}/mkmf.log and #{CWD}/src/google-perftools-1.6/config.log"
7
+ end
8
+ ret
9
+ end
10
+
11
+ require 'mkmf'
12
+ require 'fileutils'
13
+
14
+ if RUBY_VERSION >= "1.9"
15
+ begin
16
+ require "ruby_core_source"
17
+ rescue LoadError
18
+ require 'rubygems/user_interaction' # for 1.9.1
19
+ require 'rubygems/dependency_installer'
20
+ installer = Gem::DependencyInstaller.new
21
+ installer.install 'ruby_core_source'
22
+
23
+ Gem.refresh
24
+ Gem.activate('ruby_core_source') # for 1.9.1
25
+
26
+ require "ruby_core_source"
27
+ end
28
+ end
29
+
30
+ perftools = File.basename('google-perftools-1.6.tar.gz')
31
+ dir = File.basename(perftools, '.tar.gz')
32
+
33
+ puts "(I'm about to compile google-perftools.. this will definitely take a while)"
34
+ ENV["PATCH_GET"] = '0'
35
+
36
+ Dir.chdir('src') do
37
+ FileUtils.rm_rf(dir) if File.exists?(dir)
38
+
39
+ sys("tar zpxvf #{perftools}")
40
+ Dir.chdir(dir) do
41
+ if ENV['DEV']
42
+ sys("git init")
43
+ sys("git add .")
44
+ sys("git commit -m 'initial source'")
45
+ end
46
+
47
+ [ ['perftools', true],
48
+ ['perftools-notests', true],
49
+ ['perftools-pprof', true],
50
+ ['perftools-gc', true],
51
+ ['perftools-osx', RUBY_PLATFORM =~ /darwin/],
52
+ ['perftools-debug', true],
53
+ ['perftools-objects', true],
54
+ ['perftools-frames', true],
55
+ ['perftools-stddef', true]
56
+ ].each do |patch, apply|
57
+ if apply
58
+ sys("patch -p1 < ../../../patches/#{patch}.patch")
59
+ sys("git commit -am '#{patch}'") if ENV['DEV']
60
+ end
61
+ end
62
+
63
+ sys("sed -i -e 's,SpinLock,ISpinLock,g' src/*.cc src/*.h src/base/*.cc src/base/*.h")
64
+ sys("git commit -am 'rename spinlock'") if ENV['DEV']
65
+ end
66
+
67
+ Dir.chdir(dir) do
68
+ FileUtils.cp 'src/pprof', '../../../bin/'
69
+ FileUtils.chmod 0755, '../../../bin/pprof'
70
+ end
71
+
72
+ Dir.chdir(dir) do
73
+ if RUBY_PLATFORM =~ /darwin10/
74
+ ENV['CFLAGS'] = ENV['CXXFLAGS'] = '-D_XOPEN_SOURCE'
75
+ end
76
+ sys("./configure --disable-heap-profiler --disable-heap-checker --disable-debugalloc --disable-shared")
77
+ sys("make")
78
+ FileUtils.cp '.libs/libprofiler.a', '../../librubyprofiler.a'
79
+ end
80
+ end
81
+
82
+ $LIBPATH << CWD
83
+ $libs = append_library($libs, 'rubyprofiler')
84
+ def add_define(name)
85
+ $defs.push("-D#{name}")
86
+ end
87
+
88
+ case RUBY_PLATFORM
89
+ when /darwin/, /linux/, /freebsd/
90
+ CONFIG['LDSHARED'] = "$(CXX) " + CONFIG['LDSHARED'].split[1..-1].join(' ')
91
+ end
92
+
93
+ if RUBY_VERSION >= "1.9"
94
+ add_define 'RUBY19'
95
+
96
+ hdrs = proc {
97
+ have_header("method.h") # exists on 1.9.2
98
+ have_header("vm_core.h") and
99
+ have_header("iseq.h") and
100
+ have_header("insns.inc") and
101
+ have_header("insns_info.inc")
102
+ }
103
+
104
+ unless Ruby_core_source::create_makefile_with_core(hdrs, "perftools")
105
+ STDERR.puts "\n\n"
106
+ STDERR.puts "***************************************************************************************"
107
+ STDERR.puts "********************** Ruby_core_source::create_makefile FAILED ***********************"
108
+ STDERR.puts "***************************************************************************************"
109
+ exit(1)
110
+ end
111
+ else
112
+ add_define 'RUBY18'
113
+
114
+ have_func('rb_during_gc', 'ruby.h')
115
+ create_makefile 'perftools'
116
+ end
@@ -0,0 +1,493 @@
1
+ #ifndef _GNU_SOURCE
2
+ #define _GNU_SOURCE
3
+ #endif
4
+
5
+ #include <ruby.h>
6
+
7
+ #ifndef _XOPEN_SOURCE
8
+ #define _XOPEN_SOURCE 600
9
+ #endif
10
+
11
+ #include <assert.h>
12
+
13
+ void ProfilerGcMark(void (*cb)(VALUE));
14
+ int ProfilerStart(const char*);
15
+ void ProfilerStop();
16
+ void ProfilerFlush();
17
+ void ProfilerRecord(int, void*, void*);
18
+ int ProfilingIsEnabledForAllThreads();
19
+
20
+ static VALUE Iallocate;
21
+ static VALUE I__send__;
22
+ static VALUE Isend;
23
+
24
+ #define SAVE_FRAME() \
25
+ if (method && method != I__send__ && method != Isend) { \
26
+ if (self && FL_TEST(klass, FL_SINGLETON) && (BUILTIN_TYPE(self) == T_CLASS || BUILTIN_TYPE(self) == T_MODULE)) \
27
+ result[depth++] = (void*) self; \
28
+ else \
29
+ result[depth++] = 0; \
30
+ \
31
+ result[depth++] = (void*) klass; \
32
+ result[depth++] = (void*) (method == ID_ALLOCATOR ? Iallocate : method); \
33
+ }
34
+
35
+ #ifdef RUBY18
36
+ #include <env.h>
37
+ #include <node.h>
38
+ #include <setjmp.h>
39
+ #include <signal.h>
40
+
41
+ static jmp_buf saved_location;
42
+ static sig_t saved_handler = NULL;
43
+
44
+ void
45
+ segv_handler(int sig)
46
+ {
47
+ fprintf(stderr, "*** Caught a segfault during stack trace sampling!\n");
48
+ assert(saved_handler);
49
+ _longjmp(saved_location, 1);
50
+ }
51
+
52
+ int
53
+ rb_stack_trace(void** result, int max_depth)
54
+ {
55
+ struct FRAME *frame = ruby_frame;
56
+ NODE *n;
57
+
58
+ VALUE klass, self;
59
+ ID method;
60
+ int depth = 0;
61
+
62
+ if (max_depth == 0)
63
+ return 0;
64
+
65
+ #ifdef HAVE_RB_DURING_GC
66
+ if (rb_during_gc()) {
67
+ result[0] = rb_gc;
68
+ return 1;
69
+ }
70
+ #endif
71
+
72
+ // should not be possible to get here and already have a saved signal handler
73
+ assert(!saved_handler);
74
+
75
+ // ruby_frame is occasionally inconsistent, so temporarily catch segfaults
76
+ saved_handler = signal(SIGSEGV, segv_handler);
77
+ if (_setjmp(saved_location)) {
78
+ signal(SIGSEGV, saved_handler);
79
+ saved_handler = NULL;
80
+ return 0;
81
+ }
82
+
83
+ /*
84
+ // XXX does it make sense to track allocations or not?
85
+ if (frame->last_func == ID_ALLOCATOR) {
86
+ frame = frame->prev;
87
+ }
88
+
89
+ // XXX SIGPROF can come in while ruby_frame is in an inconsistent state (rb_call0), so we ignore the top-most frame
90
+ if (frame->last_func && frame->last_class) {
91
+ self = frame->self;
92
+ klass = frame->last_class;
93
+ method = frame->last_func;
94
+ SAVE_FRAME();
95
+ }
96
+ */
97
+
98
+ for (; frame && (n = frame->node); frame = frame->prev) {
99
+ if (frame->prev && frame->prev->last_func) {
100
+ if (frame->prev->node == n) {
101
+ if (frame->prev->last_func == frame->last_func) continue;
102
+ }
103
+
104
+ if (depth+3 > max_depth)
105
+ break;
106
+
107
+ self = frame->prev->self;
108
+ klass = frame->prev->last_class;
109
+ method = frame->prev->last_func;
110
+ SAVE_FRAME();
111
+ }
112
+ }
113
+
114
+ signal(SIGSEGV, saved_handler);
115
+ saved_handler = NULL;
116
+
117
+ assert(depth <= max_depth);
118
+ return depth;
119
+ }
120
+ #endif
121
+
122
+ #ifdef RUBY19
123
+ #include <vm_core.h>
124
+ #include <iseq.h>
125
+
126
+ int
127
+ rb_stack_trace(void** result, int max_depth)
128
+ {
129
+ rb_thread_t *th = GET_THREAD();
130
+ rb_control_frame_t *cfp = th->cfp;
131
+ rb_control_frame_t *end_cfp = RUBY_VM_END_CONTROL_FRAME(th);
132
+
133
+ VALUE klass, self;
134
+ ID method;
135
+ int depth = 0;
136
+
137
+ if (max_depth == 0)
138
+ return 0;
139
+
140
+ if (rb_during_gc()) {
141
+ result[0] = rb_gc;
142
+ return 1;
143
+ }
144
+
145
+ while (RUBY_VM_VALID_CONTROL_FRAME_P(cfp, end_cfp) && depth+3 <= max_depth) {
146
+ rb_iseq_t *iseq = cfp->iseq;
147
+
148
+ if (iseq && iseq->type == ISEQ_TYPE_METHOD) {
149
+ self = 0; // maybe use cfp->self here, but iseq->self is a ISeq ruby obj
150
+ klass = iseq->klass;
151
+ method = iseq->defined_method_id;
152
+ SAVE_FRAME();
153
+ }
154
+
155
+ if (depth+3 > max_depth)
156
+ break;
157
+
158
+ switch (VM_FRAME_TYPE(cfp)) {
159
+ case VM_FRAME_MAGIC_METHOD:
160
+ case VM_FRAME_MAGIC_CFUNC:
161
+ self = cfp->self;
162
+ #ifdef HAVE_METHOD_H
163
+ if (!cfp->me) break;
164
+
165
+ klass = cfp->me->klass;
166
+ method = cfp->me->called_id;
167
+ #else
168
+ klass = cfp->method_class;
169
+ method = cfp->method_id;
170
+ #endif
171
+ SAVE_FRAME();
172
+ break;
173
+ }
174
+
175
+ cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(cfp);
176
+ }
177
+
178
+ assert(depth <= max_depth);
179
+ return depth;
180
+ }
181
+
182
+ #if 0
183
+ void
184
+ rb_dump_stack()
185
+ {
186
+ rb_thread_t *th = GET_THREAD();
187
+ rb_control_frame_t *cfp = th->cfp;
188
+ rb_control_frame_t *end_cfp = RUBY_VM_END_CONTROL_FRAME(th);
189
+ ID func;
190
+
191
+ printf("\n\n*********************\n");
192
+ while (RUBY_VM_VALID_CONTROL_FRAME_P(cfp, end_cfp)) {
193
+ printf("cfp (%p):\n", cfp);
194
+ printf(" type: 0x%x\n", VM_FRAME_TYPE(cfp));
195
+ printf(" pc: %p\n", cfp->pc);
196
+ printf(" iseq: %p\n", cfp->iseq);
197
+ if (cfp->iseq) {
198
+ printf(" type: %d\n", FIX2INT(cfp->iseq->type));
199
+ printf(" self: %p\n", cfp->iseq->self);
200
+ printf(" klass: %p (%s)\n", cfp->iseq->klass, cfp->iseq->klass ? rb_class2name(cfp->iseq->klass) : "");
201
+ printf(" method: %p (%s)\n", cfp->iseq->defined_method_id, cfp->iseq->defined_method_id ? rb_id2name(cfp->iseq->defined_method_id) : "");
202
+ }
203
+ printf(" self: %p\n", cfp->self);
204
+ printf(" klass: %p (%s)\n", cfp->method_class, cfp->method_class ? rb_class2name(cfp->method_class) : "");
205
+ printf(" method: %p (%s)\n", cfp->method_id, cfp->method_id ? rb_id2name(cfp->method_id) : "");
206
+
207
+ cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(cfp);
208
+ printf("\n");
209
+ }
210
+ printf("*********************\n\n");
211
+ }
212
+ #endif
213
+ #endif
214
+
215
+ static VALUE objprofiler_setup();
216
+ static VALUE objprofiler_teardown();
217
+ static VALUE methprofiler_setup();
218
+ static VALUE methprofiler_teardown();
219
+
220
+ /* CpuProfiler */
221
+
222
+ static VALUE cPerfTools;
223
+ static VALUE cCpuProfiler;
224
+ static VALUE eError;
225
+ static VALUE bProfilerRunning;
226
+ static VALUE gc_hook;
227
+
228
+ static VALUE
229
+ cpuprofiler_running_p(VALUE self)
230
+ {
231
+ return bProfilerRunning;
232
+ }
233
+
234
+ static VALUE
235
+ cpuprofiler_stop(VALUE self)
236
+ {
237
+ if (!bProfilerRunning)
238
+ return Qfalse;
239
+
240
+ bProfilerRunning = Qfalse;
241
+ objprofiler_teardown();
242
+ methprofiler_teardown();
243
+ ProfilerStop();
244
+ ProfilerFlush();
245
+
246
+ return Qtrue;
247
+ }
248
+
249
+ static VALUE
250
+ cpuprofiler_start(VALUE self, VALUE filename)
251
+ {
252
+ StringValue(filename);
253
+
254
+ if (bProfilerRunning)
255
+ rb_raise(eError, "profiler is already running");
256
+
257
+ if (getenv("CPUPROFILE_OBJECTS"))
258
+ objprofiler_setup();
259
+ else if (getenv("CPUPROFILE_METHODS"))
260
+ methprofiler_setup();
261
+
262
+ if (ProfilerStart(RSTRING_PTR(filename))) {
263
+ bProfilerRunning = Qtrue;
264
+ } else {
265
+ rb_raise(eError, "profiler could not be started");
266
+ }
267
+
268
+ if (rb_block_given_p()) {
269
+ rb_yield(Qnil);
270
+ cpuprofiler_stop(self);
271
+ }
272
+
273
+ return Qtrue;
274
+ }
275
+
276
+ static void
277
+ cpuprofiler_gc_mark()
278
+ {
279
+ ProfilerGcMark(rb_gc_mark);
280
+ }
281
+
282
+ /* ObjProfiler */
283
+
284
+ #include <assert.h>
285
+ #include <ucontext.h>
286
+ #include <unistd.h>
287
+ #include <signal.h>
288
+ #include <stdio.h>
289
+ #include <stdlib.h>
290
+ #include <string.h>
291
+ #include <sys/mman.h>
292
+
293
+ static VALUE bObjProfilerRunning;
294
+ static VALUE bMethProfilerRunning;
295
+ #define NUM_ORIG_BYTES 2
296
+
297
+ struct {
298
+ char *location;
299
+ unsigned char value;
300
+ } orig_bytes[NUM_ORIG_BYTES];
301
+
302
+ static inline void *
303
+ page_align(void *addr) {
304
+ assert(addr != NULL);
305
+ return (void *)((size_t)addr & ~(0xFFFF));
306
+ }
307
+
308
+ static void
309
+ unprotect_page(char *addr) {
310
+ assert(addr != NULL);
311
+ char *aligned_addr = page_align(addr);
312
+ if (mprotect(aligned_addr, (addr - aligned_addr), PROT_READ|PROT_WRITE|PROT_EXEC) != 0)
313
+ perror("mprotect");
314
+ }
315
+
316
+ static inline char**
317
+ uc_get_ip(ucontext_t *uc) {
318
+ # if defined(__FreeBSD__)
319
+ # ifdef __i386__
320
+ # define program_counter uc_mcontext.mc_eip
321
+ # else
322
+ # define program_counter uc_mcontext.mc_rip
323
+ # endif
324
+ # elif defined(__dietlibc__)
325
+ # define program_counter uc_mcontext.rip
326
+ # elif defined(__APPLE__)
327
+ # ifdef __LP64__
328
+ # define program_counter uc_mcontext->__ss.__rip
329
+ # else
330
+ # define program_counter uc_mcontext->__ss.__eip
331
+ # endif
332
+ # else
333
+ # ifdef REG_RIP
334
+ # define program_counter uc_mcontext.gregs[REG_RIP]
335
+ # else
336
+ # define program_counter uc_mcontext.gregs[REG_EIP]
337
+ # endif
338
+ # endif
339
+ return (char**)&uc->program_counter;
340
+ }
341
+
342
+ #if defined(RB_EVENT_HOOKS_HAVE_CALLBACK_DATA) || defined(RUBY_EVENT_VM)
343
+ static void
344
+ event_handler(rb_event_flag_t event, VALUE data, VALUE self, ID id, VALUE klass) {
345
+ ProfilerRecord(0, NULL, NULL);
346
+ }
347
+ #else
348
+ static void
349
+ event_handler(rb_event_t event, NODE *node, VALUE self, ID id, VALUE klass) {
350
+ ProfilerRecord(0, NULL, NULL);
351
+ }
352
+ #endif
353
+
354
+ static VALUE
355
+ methprofiler_setup()
356
+ {
357
+ if (bMethProfilerRunning)
358
+ return Qtrue;
359
+
360
+ #if defined(RB_EVENT_HOOKS_HAVE_CALLBACK_DATA) || defined(RUBY_EVENT_VM)
361
+ rb_add_event_hook(event_handler, RUBY_EVENT_CALL|RUBY_EVENT_C_CALL, 0);
362
+ #else
363
+ rb_add_event_hook(event_handler, RUBY_EVENT_CALL|RUBY_EVENT_C_CALL);
364
+ #endif
365
+
366
+ bMethProfilerRunning = Qtrue;
367
+ return Qtrue;
368
+ }
369
+
370
+ static VALUE
371
+ methprofiler_teardown()
372
+ {
373
+ if (!bMethProfilerRunning)
374
+ return Qfalse;
375
+
376
+ rb_remove_event_hook(event_handler);
377
+
378
+ bMethProfilerRunning = Qfalse;
379
+ return Qtrue;
380
+ }
381
+
382
+ static void
383
+ trap_handler(int sig, siginfo_t *info, void *data) {
384
+ int i;
385
+ ucontext_t *uc = (ucontext_t *)data;
386
+ char **ip = uc_get_ip(uc);
387
+
388
+ // printf("signal: %d, addr: %p, ip: %p\n", signal, info->si_addr, *ip);
389
+
390
+ for (i=0; i<NUM_ORIG_BYTES; i++) {
391
+ if (orig_bytes[i].location == *ip-1) {
392
+ // restore original byte
393
+ orig_bytes[i].location[0] = orig_bytes[i].value;
394
+
395
+ // setup next breakpoint
396
+ orig_bytes[(i+1)%NUM_ORIG_BYTES].location[0] = '\xCC';
397
+
398
+ // first breakpoint is the notification
399
+ if (i == 0)
400
+ ProfilerRecord(sig, info, data);
401
+
402
+ // reset instruction pointer
403
+ *ip -= 1;
404
+
405
+ break;
406
+ }
407
+ }
408
+ }
409
+
410
+ static VALUE
411
+ objprofiler_setup()
412
+ {
413
+ if (bObjProfilerRunning)
414
+ return Qtrue;
415
+
416
+ int i;
417
+ struct sigaction sig = { .sa_sigaction = trap_handler, .sa_flags = SA_SIGINFO };
418
+ sigemptyset(&sig.sa_mask);
419
+ sigaction(SIGTRAP, &sig, NULL);
420
+
421
+ unprotect_page((char*)rb_newobj);
422
+
423
+ for (i=0; i<NUM_ORIG_BYTES; i++) {
424
+ orig_bytes[i].location = (char *)rb_newobj + i;
425
+ orig_bytes[i].value = ((unsigned char*)rb_newobj)[i];
426
+ orig_bytes[i].location[0] = '\xCC';
427
+ }
428
+
429
+ // setenv("CPUPROFILE_OBJECTS", "1", 1);
430
+ bObjProfilerRunning = Qtrue;
431
+ return Qtrue;
432
+ }
433
+
434
+ static VALUE
435
+ objprofiler_teardown()
436
+ {
437
+ if (!bObjProfilerRunning)
438
+ return Qfalse;
439
+
440
+ int i;
441
+ struct sigaction sig = { .sa_handler = SIG_IGN };
442
+ sigemptyset(&sig.sa_mask);
443
+ sigaction(SIGTRAP, &sig, NULL);
444
+
445
+ for (i=0; i<NUM_ORIG_BYTES; i++) {
446
+ orig_bytes[i].location[0] = orig_bytes[i].value;
447
+ }
448
+
449
+ // unsetenv("CPUPROFILE_OBJECTS");
450
+ bObjProfilerRunning = Qfalse;
451
+ return Qtrue;
452
+ }
453
+
454
+ /* Init */
455
+
456
+ static void
457
+ profiler_at_exit(VALUE self)
458
+ {
459
+ cpuprofiler_stop(self);
460
+ }
461
+
462
+ void
463
+ Init_perftools()
464
+ {
465
+ cPerfTools = rb_define_class("PerfTools", rb_cObject);
466
+ eError = rb_define_class_under(cPerfTools, "Error", rb_eStandardError);
467
+ cCpuProfiler = rb_define_class_under(cPerfTools, "CpuProfiler", rb_cObject);
468
+
469
+ Iallocate = rb_intern("allocate");
470
+ I__send__ = rb_intern("__send__");
471
+ Isend = rb_intern("send");
472
+
473
+ bMethProfilerRunning = bObjProfilerRunning = bProfilerRunning = Qfalse;
474
+
475
+ rb_define_singleton_method(cCpuProfiler, "running?", cpuprofiler_running_p, 0);
476
+ rb_define_singleton_method(cCpuProfiler, "start", cpuprofiler_start, 1);
477
+ rb_define_singleton_method(cCpuProfiler, "stop", cpuprofiler_stop, 0);
478
+
479
+ gc_hook = Data_Wrap_Struct(cCpuProfiler, cpuprofiler_gc_mark, NULL, NULL);
480
+ rb_global_variable(&gc_hook);
481
+
482
+ if (ProfilingIsEnabledForAllThreads()) { // profiler is already running?
483
+ bProfilerRunning = Qtrue;
484
+
485
+ if (getenv("CPUPROFILE_OBJECTS")) { // want to profile objects
486
+ objprofiler_setup();
487
+ } else if (getenv("CPUPROFILE_METHODS")) {
488
+ methprofiler_setup();
489
+ }
490
+
491
+ rb_set_end_proc(profiler_at_exit, 0); // make sure to cleanup before the VM shuts down
492
+ }
493
+ }