honkster-perftools.rb 0.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,177 @@
1
+ # perftools.rb
2
+
3
+ google-perftools for ruby code
4
+ (c) 2010 Aman Gupta (tmm1)
5
+ http://www.ruby-lang.org/en/LICENSE.txt
6
+
7
+ ## Usage (in a webapp)
8
+
9
+ Use [rack-perftools_profiler](https://github.com/bhb/rack-perftools_profiler):
10
+
11
+ require 'rack/perftools_profiler'
12
+ config.middleware.use ::Rack::PerftoolsProfiler, :default_printer => 'gif'
13
+
14
+ Simply add `profile=true` to profile a request:
15
+
16
+ curl -o 10_requests_to_homepage.gif "http://localhost:3000/homepage?profile=true&times=10"
17
+
18
+ ## Usage (from Ruby)
19
+
20
+ Run the profiler with a block:
21
+
22
+ require 'perftools'
23
+ PerfTools::CpuProfiler.start("/tmp/add_numbers_profile") do
24
+ 5_000_000.times{ 1+2+3+4+5 }
25
+ end
26
+
27
+ Start and stop the profiler manually:
28
+
29
+ require 'perftools'
30
+ PerfTools::CpuProfiler.start("/tmp/add_numbers_profile")
31
+ 5_000_000.times{ 1+2+3+4+5 }
32
+ PerfTools::CpuProfiler.stop
33
+
34
+ ## Usage (externally)
35
+
36
+ Profile an existing ruby application without modifying it:
37
+
38
+ $ CPUPROFILE=/tmp/my_app_profile \
39
+ RUBYOPT="-r`gem which perftools | tail -1`" \
40
+ ruby my_app.rb
41
+
42
+ ## Profiler Modes
43
+
44
+ The profiler can be run in one of many modes, set via an environment
45
+ variable before the library is loaded:
46
+
47
+ * `CPUPROFILE_REALTIME=1`
48
+
49
+ Use walltime instead of cputime profiling. This will capture all time spent in a method, even if it does not involve the CPU.
50
+
51
+ For example, `sleep()` is not expensive in terms of cputime, but very expensive in walltime. walltime will also show functions spending a lot of time in network i/o.
52
+
53
+ * `CPUPROFILE_OBJECTS=1`
54
+
55
+ Profile object allocations instead of cpu/wall time. Each sample represents one object created inside that function.
56
+
57
+ * `CPUPROFILE_METHODS=1`
58
+
59
+ Profile method calls. Each sample represents one method call made inside that function.
60
+
61
+ The sampling interval of the profiler can be adjusted to collect more
62
+ (for better profile detail) or fewer samples (for lower overhead):
63
+
64
+ * `CPUPROFILE_FREQUENCY=500`
65
+
66
+ Default sampling interval is 100 times a second. Valid range is 1-4000
67
+
68
+ ## Reporting
69
+
70
+ pprof.rb --text /tmp/add_numbers_profile
71
+
72
+ pprof.rb --pdf /tmp/add_numbers_profile > /tmp/add_numbers_profile.pdf
73
+
74
+ pprof.rb --gif /tmp/add_numbers_profile > /tmp/add_numbers_profile.gif
75
+
76
+ pprof.rb --callgrind /tmp/add_numbers_profile > /tmp/add_numbers_profile.grind
77
+ kcachegrind /tmp/add_numbers_profile.grind
78
+
79
+ pprof.rb --gif --focus=Integer /tmp/add_numbers_profile > /tmp/add_numbers_custom.gif
80
+
81
+ pprof.rb --text --ignore=Gem /tmp/my_app_profile
82
+
83
+
84
+ For more options, see [pprof documentation](http://google-perftools.googlecode.com/svn/trunk/doc/cpuprofile.html#pprof)
85
+
86
+
87
+ ### Examples
88
+
89
+ #### pprof.rb --text
90
+
91
+ Total: 1735 samples
92
+ 1487 85.7% 85.7% 1735 100.0% Integer#times
93
+ 248 14.3% 100.0% 248 14.3% Fixnum#+
94
+
95
+ #### pprof.rb --gif
96
+
97
+ * Simple [require 'rubygems'](http://perftools-rb.rubyforge.org/examples/rubygems.gif) profile
98
+
99
+ * Comparing redis-rb [with](http://perftools-rb.rubyforge.org/examples/redis-rb.gif) and [without](http://perftools-rb.rubyforge.org/examples/redis-rb-notimeout.gif) SystemTimer based socket timeouts
100
+
101
+ * [Sinatra](http://perftools-rb.rubyforge.org/examples/sinatra.gif) vs. [Merb](http://perftools-rb.rubyforge.org/examples/merb.gif) vs. [Rails](http://perftools-rb.rubyforge.org/examples/rails.gif)
102
+
103
+ * C-level profile of EventMachine + epoll + Ruby threads [before](http://perftools-rb.rubyforge.org/examples/eventmachine-epoll+nothreads.gif) and [after](http://perftools-rb.rubyforge.org/examples/eventmachine-epoll+threads.gif) a [6 line EM bugfix](http://timetobleed.com/6-line-eventmachine-bugfix-2x-faster-gc-1300-requestssec/)
104
+
105
+ * C-level profile of a [ruby/rails vm](http://perftools-rb.rubyforge.org/examples/ruby_interpreter.gif)
106
+ * 12% time spent in re_match_exec because of excessive calls to rb_str_sub_bang by Date.parse
107
+
108
+
109
+ ## Installation
110
+
111
+ Just install the gem, which will download, patch and compile google-perftools for you:
112
+
113
+ sudo gem install perftools.rb
114
+
115
+ Or build your own gem:
116
+
117
+ git clone git://github.com/tmm1/perftools.rb
118
+ cd perftools.rb
119
+ gem build perftools.rb.gemspec
120
+ gem install perftools.rb
121
+
122
+
123
+ You'll also need graphviz to generate call graphs using dot:
124
+
125
+ sudo brew install graphviz ghostscript # osx
126
+ sudo apt-get install graphviz ps2pdf # debian/ubuntu
127
+
128
+ ## Advantages over ruby-prof
129
+
130
+ * Sampling profiler
131
+
132
+ * perftools samples your process using setitimer() so it can be used in production with minimal overhead.
133
+
134
+
135
+ ## Profiling the Ruby VM and C extensions
136
+
137
+ To profile C code, download and build an unpatched perftools (libunwind or ./configure --enable-frame-pointers required on x86_64).
138
+
139
+ Download:
140
+
141
+ wget http://google-perftools.googlecode.com/files/google-perftools-1.6.tar.gz
142
+ tar zxvf google-perftools-1.6.tar.gz
143
+ cd google-perftools-1.6
144
+
145
+ Compile:
146
+
147
+ ./configure --prefix=/opt
148
+ make
149
+ sudo make install
150
+
151
+ Profile:
152
+
153
+ export LD_PRELOAD=/opt/lib/libprofiler.so # for linux
154
+ export DYLD_INSERT_LIBRARIES=/opt/lib/libprofiler.dylib # for osx
155
+ CPUPROFILE=/tmp/ruby_interpreter.profile ruby -e' 5_000_000.times{ "hello world" } '
156
+
157
+ Report:
158
+
159
+ pprof `which ruby` --text /tmp/ruby_interpreter.profile
160
+
161
+
162
+ ## TODO
163
+
164
+ * Add support for heap profiling to find memory leaks (PerfTools::HeapProfiler)
165
+ * Allow both C and Ruby profiling
166
+ * Add setter for the sampling interval
167
+
168
+
169
+ ## Resources
170
+
171
+ * [GoRuCo 2009 Lightning Talk on perftools.rb](http://goruco2009.confreaks.com/30-may-2009-18-35-rejectconf-various-presenters.html) @ 21:52
172
+
173
+ * [Ilya Grigorik's introduction to perftools.rb](http://www.igvita.com/2009/06/13/profiling-ruby-with-googles-perftools/)
174
+
175
+ * [Google Perftools](http://code.google.com/p/google-perftools/)
176
+
177
+ * [Analyzing profiles and interpreting different output formats](http://google-perftools.googlecode.com/svn/trunk/doc/cpuprofile.html#pprof)
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+ require 'rbconfig'
3
+ cmd = "#{File.dirname(__FILE__)}/pprof #{Config::CONFIG['bindir']}/#{Config::CONFIG['ruby_install_name']} #{ARGV.join(" ")}"
4
+ exec(cmd)
@@ -0,0 +1,116 @@
1
+ CWD = File.expand_path(File.dirname(__FILE__))
2
+
3
+ def sys(cmd)
4
+ puts " -- #{cmd}"
5
+ unless ret = xsystem(cmd)
6
+ raise "#{cmd} failed, please report to perftools@tmm1.net with pastie.org link to #{CWD}/mkmf.log and #{CWD}/src/google-perftools-1.6/config.log"
7
+ end
8
+ ret
9
+ end
10
+
11
+ require 'mkmf'
12
+ require 'fileutils'
13
+
14
+ if RUBY_VERSION >= "1.9"
15
+ begin
16
+ require "ruby_core_source"
17
+ rescue LoadError
18
+ require 'rubygems/user_interaction' # for 1.9.1
19
+ require 'rubygems/dependency_installer'
20
+ installer = Gem::DependencyInstaller.new
21
+ installer.install 'ruby_core_source'
22
+
23
+ Gem.refresh
24
+ Gem.activate('ruby_core_source') # for 1.9.1
25
+
26
+ require "ruby_core_source"
27
+ end
28
+ end
29
+
30
+ perftools = File.basename('google-perftools-1.6.tar.gz')
31
+ dir = File.basename(perftools, '.tar.gz')
32
+
33
+ puts "(I'm about to compile google-perftools.. this will definitely take a while)"
34
+ ENV["PATCH_GET"] = '0'
35
+
36
+ Dir.chdir('src') do
37
+ FileUtils.rm_rf(dir) if File.exists?(dir)
38
+
39
+ sys("tar zpxvf #{perftools}")
40
+ Dir.chdir(dir) do
41
+ if ENV['DEV']
42
+ sys("git init")
43
+ sys("git add .")
44
+ sys("git commit -m 'initial source'")
45
+ end
46
+
47
+ [ ['perftools', true],
48
+ ['perftools-notests', true],
49
+ ['perftools-pprof', true],
50
+ ['perftools-gc', true],
51
+ ['perftools-osx', RUBY_PLATFORM =~ /darwin/],
52
+ ['perftools-debug', true],
53
+ ['perftools-objects', true],
54
+ ['perftools-frames', true],
55
+ ['perftools-stddef', true]
56
+ ].each do |patch, apply|
57
+ if apply
58
+ sys("patch -p1 < ../../../patches/#{patch}.patch")
59
+ sys("git commit -am '#{patch}'") if ENV['DEV']
60
+ end
61
+ end
62
+
63
+ sys("sed -i -e 's,SpinLock,ISpinLock,g' src/*.cc src/*.h src/base/*.cc src/base/*.h")
64
+ sys("git commit -am 'rename spinlock'") if ENV['DEV']
65
+ end
66
+
67
+ Dir.chdir(dir) do
68
+ FileUtils.cp 'src/pprof', '../../../bin/'
69
+ FileUtils.chmod 0755, '../../../bin/pprof'
70
+ end
71
+
72
+ Dir.chdir(dir) do
73
+ if RUBY_PLATFORM =~ /darwin10/
74
+ ENV['CFLAGS'] = ENV['CXXFLAGS'] = '-D_XOPEN_SOURCE'
75
+ end
76
+ sys("./configure --disable-heap-profiler --disable-heap-checker --disable-debugalloc --disable-shared")
77
+ sys("make")
78
+ FileUtils.cp '.libs/libprofiler.a', '../../librubyprofiler.a'
79
+ end
80
+ end
81
+
82
+ $LIBPATH << CWD
83
+ $libs = append_library($libs, 'rubyprofiler')
84
+ def add_define(name)
85
+ $defs.push("-D#{name}")
86
+ end
87
+
88
+ case RUBY_PLATFORM
89
+ when /darwin/, /linux/, /freebsd/
90
+ CONFIG['LDSHARED'] = "$(CXX) " + CONFIG['LDSHARED'].split[1..-1].join(' ')
91
+ end
92
+
93
+ if RUBY_VERSION >= "1.9"
94
+ add_define 'RUBY19'
95
+
96
+ hdrs = proc {
97
+ have_header("method.h") # exists on 1.9.2
98
+ have_header("vm_core.h") and
99
+ have_header("iseq.h") and
100
+ have_header("insns.inc") and
101
+ have_header("insns_info.inc")
102
+ }
103
+
104
+ unless Ruby_core_source::create_makefile_with_core(hdrs, "perftools")
105
+ STDERR.puts "\n\n"
106
+ STDERR.puts "***************************************************************************************"
107
+ STDERR.puts "********************** Ruby_core_source::create_makefile FAILED ***********************"
108
+ STDERR.puts "***************************************************************************************"
109
+ exit(1)
110
+ end
111
+ else
112
+ add_define 'RUBY18'
113
+
114
+ have_func('rb_during_gc', 'ruby.h')
115
+ create_makefile 'perftools'
116
+ end
@@ -0,0 +1,493 @@
1
+ #ifndef _GNU_SOURCE
2
+ #define _GNU_SOURCE
3
+ #endif
4
+
5
+ #include <ruby.h>
6
+
7
+ #ifndef _XOPEN_SOURCE
8
+ #define _XOPEN_SOURCE 600
9
+ #endif
10
+
11
+ #include <assert.h>
12
+
13
+ void ProfilerGcMark(void (*cb)(VALUE));
14
+ int ProfilerStart(const char*);
15
+ void ProfilerStop();
16
+ void ProfilerFlush();
17
+ void ProfilerRecord(int, void*, void*);
18
+ int ProfilingIsEnabledForAllThreads();
19
+
20
+ static VALUE Iallocate;
21
+ static VALUE I__send__;
22
+ static VALUE Isend;
23
+
24
+ #define SAVE_FRAME() \
25
+ if (method && method != I__send__ && method != Isend) { \
26
+ if (self && FL_TEST(klass, FL_SINGLETON) && (BUILTIN_TYPE(self) == T_CLASS || BUILTIN_TYPE(self) == T_MODULE)) \
27
+ result[depth++] = (void*) self; \
28
+ else \
29
+ result[depth++] = 0; \
30
+ \
31
+ result[depth++] = (void*) klass; \
32
+ result[depth++] = (void*) (method == ID_ALLOCATOR ? Iallocate : method); \
33
+ }
34
+
35
+ #ifdef RUBY18
36
+ #include <env.h>
37
+ #include <node.h>
38
+ #include <setjmp.h>
39
+ #include <signal.h>
40
+
41
+ static jmp_buf saved_location;
42
+ static sig_t saved_handler = NULL;
43
+
44
+ void
45
+ segv_handler(int sig)
46
+ {
47
+ fprintf(stderr, "*** Caught a segfault during stack trace sampling!\n");
48
+ assert(saved_handler);
49
+ _longjmp(saved_location, 1);
50
+ }
51
+
52
+ int
53
+ rb_stack_trace(void** result, int max_depth)
54
+ {
55
+ struct FRAME *frame = ruby_frame;
56
+ NODE *n;
57
+
58
+ VALUE klass, self;
59
+ ID method;
60
+ int depth = 0;
61
+
62
+ if (max_depth == 0)
63
+ return 0;
64
+
65
+ #ifdef HAVE_RB_DURING_GC
66
+ if (rb_during_gc()) {
67
+ result[0] = rb_gc;
68
+ return 1;
69
+ }
70
+ #endif
71
+
72
+ // should not be possible to get here and already have a saved signal handler
73
+ assert(!saved_handler);
74
+
75
+ // ruby_frame is occasionally inconsistent, so temporarily catch segfaults
76
+ saved_handler = signal(SIGSEGV, segv_handler);
77
+ if (_setjmp(saved_location)) {
78
+ signal(SIGSEGV, saved_handler);
79
+ saved_handler = NULL;
80
+ return 0;
81
+ }
82
+
83
+ /*
84
+ // XXX does it make sense to track allocations or not?
85
+ if (frame->last_func == ID_ALLOCATOR) {
86
+ frame = frame->prev;
87
+ }
88
+
89
+ // XXX SIGPROF can come in while ruby_frame is in an inconsistent state (rb_call0), so we ignore the top-most frame
90
+ if (frame->last_func && frame->last_class) {
91
+ self = frame->self;
92
+ klass = frame->last_class;
93
+ method = frame->last_func;
94
+ SAVE_FRAME();
95
+ }
96
+ */
97
+
98
+ for (; frame && (n = frame->node); frame = frame->prev) {
99
+ if (frame->prev && frame->prev->last_func) {
100
+ if (frame->prev->node == n) {
101
+ if (frame->prev->last_func == frame->last_func) continue;
102
+ }
103
+
104
+ if (depth+3 > max_depth)
105
+ break;
106
+
107
+ self = frame->prev->self;
108
+ klass = frame->prev->last_class;
109
+ method = frame->prev->last_func;
110
+ SAVE_FRAME();
111
+ }
112
+ }
113
+
114
+ signal(SIGSEGV, saved_handler);
115
+ saved_handler = NULL;
116
+
117
+ assert(depth <= max_depth);
118
+ return depth;
119
+ }
120
+ #endif
121
+
122
+ #ifdef RUBY19
123
+ #include <vm_core.h>
124
+ #include <iseq.h>
125
+
126
+ int
127
+ rb_stack_trace(void** result, int max_depth)
128
+ {
129
+ rb_thread_t *th = GET_THREAD();
130
+ rb_control_frame_t *cfp = th->cfp;
131
+ rb_control_frame_t *end_cfp = RUBY_VM_END_CONTROL_FRAME(th);
132
+
133
+ VALUE klass, self;
134
+ ID method;
135
+ int depth = 0;
136
+
137
+ if (max_depth == 0)
138
+ return 0;
139
+
140
+ if (rb_during_gc()) {
141
+ result[0] = rb_gc;
142
+ return 1;
143
+ }
144
+
145
+ while (RUBY_VM_VALID_CONTROL_FRAME_P(cfp, end_cfp) && depth+3 <= max_depth) {
146
+ rb_iseq_t *iseq = cfp->iseq;
147
+
148
+ if (iseq && iseq->type == ISEQ_TYPE_METHOD) {
149
+ self = 0; // maybe use cfp->self here, but iseq->self is a ISeq ruby obj
150
+ klass = iseq->klass;
151
+ method = iseq->defined_method_id;
152
+ SAVE_FRAME();
153
+ }
154
+
155
+ if (depth+3 > max_depth)
156
+ break;
157
+
158
+ switch (VM_FRAME_TYPE(cfp)) {
159
+ case VM_FRAME_MAGIC_METHOD:
160
+ case VM_FRAME_MAGIC_CFUNC:
161
+ self = cfp->self;
162
+ #ifdef HAVE_METHOD_H
163
+ if (!cfp->me) break;
164
+
165
+ klass = cfp->me->klass;
166
+ method = cfp->me->called_id;
167
+ #else
168
+ klass = cfp->method_class;
169
+ method = cfp->method_id;
170
+ #endif
171
+ SAVE_FRAME();
172
+ break;
173
+ }
174
+
175
+ cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(cfp);
176
+ }
177
+
178
+ assert(depth <= max_depth);
179
+ return depth;
180
+ }
181
+
182
+ #if 0
183
+ void
184
+ rb_dump_stack()
185
+ {
186
+ rb_thread_t *th = GET_THREAD();
187
+ rb_control_frame_t *cfp = th->cfp;
188
+ rb_control_frame_t *end_cfp = RUBY_VM_END_CONTROL_FRAME(th);
189
+ ID func;
190
+
191
+ printf("\n\n*********************\n");
192
+ while (RUBY_VM_VALID_CONTROL_FRAME_P(cfp, end_cfp)) {
193
+ printf("cfp (%p):\n", cfp);
194
+ printf(" type: 0x%x\n", VM_FRAME_TYPE(cfp));
195
+ printf(" pc: %p\n", cfp->pc);
196
+ printf(" iseq: %p\n", cfp->iseq);
197
+ if (cfp->iseq) {
198
+ printf(" type: %d\n", FIX2INT(cfp->iseq->type));
199
+ printf(" self: %p\n", cfp->iseq->self);
200
+ printf(" klass: %p (%s)\n", cfp->iseq->klass, cfp->iseq->klass ? rb_class2name(cfp->iseq->klass) : "");
201
+ printf(" method: %p (%s)\n", cfp->iseq->defined_method_id, cfp->iseq->defined_method_id ? rb_id2name(cfp->iseq->defined_method_id) : "");
202
+ }
203
+ printf(" self: %p\n", cfp->self);
204
+ printf(" klass: %p (%s)\n", cfp->method_class, cfp->method_class ? rb_class2name(cfp->method_class) : "");
205
+ printf(" method: %p (%s)\n", cfp->method_id, cfp->method_id ? rb_id2name(cfp->method_id) : "");
206
+
207
+ cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(cfp);
208
+ printf("\n");
209
+ }
210
+ printf("*********************\n\n");
211
+ }
212
+ #endif
213
+ #endif
214
+
215
+ static VALUE objprofiler_setup();
216
+ static VALUE objprofiler_teardown();
217
+ static VALUE methprofiler_setup();
218
+ static VALUE methprofiler_teardown();
219
+
220
+ /* CpuProfiler */
221
+
222
+ static VALUE cPerfTools;
223
+ static VALUE cCpuProfiler;
224
+ static VALUE eError;
225
+ static VALUE bProfilerRunning;
226
+ static VALUE gc_hook;
227
+
228
+ static VALUE
229
+ cpuprofiler_running_p(VALUE self)
230
+ {
231
+ return bProfilerRunning;
232
+ }
233
+
234
+ static VALUE
235
+ cpuprofiler_stop(VALUE self)
236
+ {
237
+ if (!bProfilerRunning)
238
+ return Qfalse;
239
+
240
+ bProfilerRunning = Qfalse;
241
+ objprofiler_teardown();
242
+ methprofiler_teardown();
243
+ ProfilerStop();
244
+ ProfilerFlush();
245
+
246
+ return Qtrue;
247
+ }
248
+
249
+ static VALUE
250
+ cpuprofiler_start(VALUE self, VALUE filename)
251
+ {
252
+ StringValue(filename);
253
+
254
+ if (bProfilerRunning)
255
+ rb_raise(eError, "profiler is already running");
256
+
257
+ if (getenv("CPUPROFILE_OBJECTS"))
258
+ objprofiler_setup();
259
+ else if (getenv("CPUPROFILE_METHODS"))
260
+ methprofiler_setup();
261
+
262
+ if (ProfilerStart(RSTRING_PTR(filename))) {
263
+ bProfilerRunning = Qtrue;
264
+ } else {
265
+ rb_raise(eError, "profiler could not be started");
266
+ }
267
+
268
+ if (rb_block_given_p()) {
269
+ rb_yield(Qnil);
270
+ cpuprofiler_stop(self);
271
+ }
272
+
273
+ return Qtrue;
274
+ }
275
+
276
+ static void
277
+ cpuprofiler_gc_mark()
278
+ {
279
+ ProfilerGcMark(rb_gc_mark);
280
+ }
281
+
282
+ /* ObjProfiler */
283
+
284
+ #include <assert.h>
285
+ #include <ucontext.h>
286
+ #include <unistd.h>
287
+ #include <signal.h>
288
+ #include <stdio.h>
289
+ #include <stdlib.h>
290
+ #include <string.h>
291
+ #include <sys/mman.h>
292
+
293
+ static VALUE bObjProfilerRunning;
294
+ static VALUE bMethProfilerRunning;
295
+ #define NUM_ORIG_BYTES 2
296
+
297
+ struct {
298
+ char *location;
299
+ unsigned char value;
300
+ } orig_bytes[NUM_ORIG_BYTES];
301
+
302
+ static inline void *
303
+ page_align(void *addr) {
304
+ assert(addr != NULL);
305
+ return (void *)((size_t)addr & ~(0xFFFF));
306
+ }
307
+
308
+ static void
309
+ unprotect_page(char *addr) {
310
+ assert(addr != NULL);
311
+ char *aligned_addr = page_align(addr);
312
+ if (mprotect(aligned_addr, (addr - aligned_addr), PROT_READ|PROT_WRITE|PROT_EXEC) != 0)
313
+ perror("mprotect");
314
+ }
315
+
316
+ static inline char**
317
+ uc_get_ip(ucontext_t *uc) {
318
+ # if defined(__FreeBSD__)
319
+ # ifdef __i386__
320
+ # define program_counter uc_mcontext.mc_eip
321
+ # else
322
+ # define program_counter uc_mcontext.mc_rip
323
+ # endif
324
+ # elif defined(__dietlibc__)
325
+ # define program_counter uc_mcontext.rip
326
+ # elif defined(__APPLE__)
327
+ # ifdef __LP64__
328
+ # define program_counter uc_mcontext->__ss.__rip
329
+ # else
330
+ # define program_counter uc_mcontext->__ss.__eip
331
+ # endif
332
+ # else
333
+ # ifdef REG_RIP
334
+ # define program_counter uc_mcontext.gregs[REG_RIP]
335
+ # else
336
+ # define program_counter uc_mcontext.gregs[REG_EIP]
337
+ # endif
338
+ # endif
339
+ return (char**)&uc->program_counter;
340
+ }
341
+
342
+ #if defined(RB_EVENT_HOOKS_HAVE_CALLBACK_DATA) || defined(RUBY_EVENT_VM)
343
+ static void
344
+ event_handler(rb_event_flag_t event, VALUE data, VALUE self, ID id, VALUE klass) {
345
+ ProfilerRecord(0, NULL, NULL);
346
+ }
347
+ #else
348
+ static void
349
+ event_handler(rb_event_t event, NODE *node, VALUE self, ID id, VALUE klass) {
350
+ ProfilerRecord(0, NULL, NULL);
351
+ }
352
+ #endif
353
+
354
+ static VALUE
355
+ methprofiler_setup()
356
+ {
357
+ if (bMethProfilerRunning)
358
+ return Qtrue;
359
+
360
+ #if defined(RB_EVENT_HOOKS_HAVE_CALLBACK_DATA) || defined(RUBY_EVENT_VM)
361
+ rb_add_event_hook(event_handler, RUBY_EVENT_CALL|RUBY_EVENT_C_CALL, 0);
362
+ #else
363
+ rb_add_event_hook(event_handler, RUBY_EVENT_CALL|RUBY_EVENT_C_CALL);
364
+ #endif
365
+
366
+ bMethProfilerRunning = Qtrue;
367
+ return Qtrue;
368
+ }
369
+
370
+ static VALUE
371
+ methprofiler_teardown()
372
+ {
373
+ if (!bMethProfilerRunning)
374
+ return Qfalse;
375
+
376
+ rb_remove_event_hook(event_handler);
377
+
378
+ bMethProfilerRunning = Qfalse;
379
+ return Qtrue;
380
+ }
381
+
382
+ static void
383
+ trap_handler(int sig, siginfo_t *info, void *data) {
384
+ int i;
385
+ ucontext_t *uc = (ucontext_t *)data;
386
+ char **ip = uc_get_ip(uc);
387
+
388
+ // printf("signal: %d, addr: %p, ip: %p\n", signal, info->si_addr, *ip);
389
+
390
+ for (i=0; i<NUM_ORIG_BYTES; i++) {
391
+ if (orig_bytes[i].location == *ip-1) {
392
+ // restore original byte
393
+ orig_bytes[i].location[0] = orig_bytes[i].value;
394
+
395
+ // setup next breakpoint
396
+ orig_bytes[(i+1)%NUM_ORIG_BYTES].location[0] = '\xCC';
397
+
398
+ // first breakpoint is the notification
399
+ if (i == 0)
400
+ ProfilerRecord(sig, info, data);
401
+
402
+ // reset instruction pointer
403
+ *ip -= 1;
404
+
405
+ break;
406
+ }
407
+ }
408
+ }
409
+
410
+ static VALUE
411
+ objprofiler_setup()
412
+ {
413
+ if (bObjProfilerRunning)
414
+ return Qtrue;
415
+
416
+ int i;
417
+ struct sigaction sig = { .sa_sigaction = trap_handler, .sa_flags = SA_SIGINFO };
418
+ sigemptyset(&sig.sa_mask);
419
+ sigaction(SIGTRAP, &sig, NULL);
420
+
421
+ unprotect_page((char*)rb_newobj);
422
+
423
+ for (i=0; i<NUM_ORIG_BYTES; i++) {
424
+ orig_bytes[i].location = (char *)rb_newobj + i;
425
+ orig_bytes[i].value = ((unsigned char*)rb_newobj)[i];
426
+ orig_bytes[i].location[0] = '\xCC';
427
+ }
428
+
429
+ // setenv("CPUPROFILE_OBJECTS", "1", 1);
430
+ bObjProfilerRunning = Qtrue;
431
+ return Qtrue;
432
+ }
433
+
434
+ static VALUE
435
+ objprofiler_teardown()
436
+ {
437
+ if (!bObjProfilerRunning)
438
+ return Qfalse;
439
+
440
+ int i;
441
+ struct sigaction sig = { .sa_handler = SIG_IGN };
442
+ sigemptyset(&sig.sa_mask);
443
+ sigaction(SIGTRAP, &sig, NULL);
444
+
445
+ for (i=0; i<NUM_ORIG_BYTES; i++) {
446
+ orig_bytes[i].location[0] = orig_bytes[i].value;
447
+ }
448
+
449
+ // unsetenv("CPUPROFILE_OBJECTS");
450
+ bObjProfilerRunning = Qfalse;
451
+ return Qtrue;
452
+ }
453
+
454
+ /* Init */
455
+
456
+ static void
457
+ profiler_at_exit(VALUE self)
458
+ {
459
+ cpuprofiler_stop(self);
460
+ }
461
+
462
+ void
463
+ Init_perftools()
464
+ {
465
+ cPerfTools = rb_define_class("PerfTools", rb_cObject);
466
+ eError = rb_define_class_under(cPerfTools, "Error", rb_eStandardError);
467
+ cCpuProfiler = rb_define_class_under(cPerfTools, "CpuProfiler", rb_cObject);
468
+
469
+ Iallocate = rb_intern("allocate");
470
+ I__send__ = rb_intern("__send__");
471
+ Isend = rb_intern("send");
472
+
473
+ bMethProfilerRunning = bObjProfilerRunning = bProfilerRunning = Qfalse;
474
+
475
+ rb_define_singleton_method(cCpuProfiler, "running?", cpuprofiler_running_p, 0);
476
+ rb_define_singleton_method(cCpuProfiler, "start", cpuprofiler_start, 1);
477
+ rb_define_singleton_method(cCpuProfiler, "stop", cpuprofiler_stop, 0);
478
+
479
+ gc_hook = Data_Wrap_Struct(cCpuProfiler, cpuprofiler_gc_mark, NULL, NULL);
480
+ rb_global_variable(&gc_hook);
481
+
482
+ if (ProfilingIsEnabledForAllThreads()) { // profiler is already running?
483
+ bProfilerRunning = Qtrue;
484
+
485
+ if (getenv("CPUPROFILE_OBJECTS")) { // want to profile objects
486
+ objprofiler_setup();
487
+ } else if (getenv("CPUPROFILE_METHODS")) {
488
+ methprofiler_setup();
489
+ }
490
+
491
+ rb_set_end_proc(profiler_at_exit, 0); // make sure to cleanup before the VM shuts down
492
+ }
493
+ }