perftools.rb 0.5.4 → 0.5.5

Sign up to get free protection for your applications and to get access to all the features.
data/README.md ADDED
@@ -0,0 +1,177 @@
1
+ # perftools.rb
2
+
3
+ google-perftools for ruby code
4
+ (c) 2010 Aman Gupta (tmm1)
5
+ http://www.ruby-lang.org/en/LICENSE.txt
6
+
7
+ ## Usage (in a webapp)
8
+
9
+ Use [rack-perftools_profiler](https://github.com/bhb/rack-perftools_profiler):
10
+
11
+ require 'rack/perftools_profiler'
12
+ config.middleware.use ::Rack::PerftoolsProfiler, :default_printer => 'gif'
13
+
14
+ Simply add `profile=true` to profile a request:
15
+
16
+ curl -o 10_requests_to_homepage.gif "http://localhost:3000/homepage?profile=true&times=10"
17
+
18
+ ## Usage (from Ruby)
19
+
20
+ Run the profiler with a block:
21
+
22
+ require 'perftools'
23
+ PerfTools::CpuProfiler.start("/tmp/add_numbers_profile") do
24
+ 5_000_000.times{ 1+2+3+4+5 }
25
+ end
26
+
27
+ Start and stop the profiler manually:
28
+
29
+ require 'perftools'
30
+ PerfTools::CpuProfiler.start("/tmp/add_numbers_profile")
31
+ 5_000_000.times{ 1+2+3+4+5 }
32
+ PerfTools::CpuProfiler.stop
33
+
34
+ ## Usage (externally)
35
+
36
+ Profile an existing ruby application without modifying it:
37
+
38
+ $ CPUPROFILE=/tmp/my_app_profile \
39
+ RUBYOPT="-r`gem which perftools | tail -1`" \
40
+ ruby my_app.rb
41
+
42
+ ## Profiler Modes
43
+
44
+ The profiler can be run in one of many modes, set via an environment
45
+ variable before the library is loaded:
46
+
47
+ * `CPUPROFILE_REALTIME=1`
48
+
49
+ Use walltime instead of cputime profiling. This will capture all time spent in a method, even if it does not involve the CPU.
50
+
51
+ For example, `sleep()` is not expensive in terms of cputime, but very expensive in walltime. walltime will also show functions spending a lot of time in network i/o.
52
+
53
+ * `CPUPROFILE_OBJECTS=1`
54
+
55
+ Profile object allocations instead of cpu/wall time. Each sample represents one object created inside that function.
56
+
57
+ * `CPUPROFILE_METHODS=1`
58
+
59
+ Profile method calls. Each sample represents one method call made inside that function.
60
+
61
+ The sampling interval of the profiler can be adjusted to collect more
62
+ (for better profile detail) or fewer samples (for lower overhead):
63
+
64
+ * `CPUPROFILE_FREQUENCY=500`
65
+
66
+ Default sampling interval is 100 times a second. Valid range is 1-4000
67
+
68
+ ## Reporting
69
+
70
+ pprof.rb --text /tmp/add_numbers_profile
71
+
72
+ pprof.rb --pdf /tmp/add_numbers_profile > /tmp/add_numbers_profile.pdf
73
+
74
+ pprof.rb --gif /tmp/add_numbers_profile > /tmp/add_numbers_profile.gif
75
+
76
+ pprof.rb --callgrind /tmp/add_numbers_profile > /tmp/add_numbers_profile.grind
77
+ kcachegrind /tmp/add_numbers_profile.grind
78
+
79
+ pprof.rb --gif --focus=Integer /tmp/add_numbers_profile > /tmp/add_numbers_custom.gif
80
+
81
+ pprof.rb --text --ignore=Gem /tmp/my_app_profile
82
+
83
+
84
+ For more options, see [pprof documentation](http://google-perftools.googlecode.com/svn/trunk/doc/cpuprofile.html#pprof)
85
+
86
+
87
+ ### Examples
88
+
89
+ #### pprof.rb --text
90
+
91
+ Total: 1735 samples
92
+ 1487 85.7% 85.7% 1735 100.0% Integer#times
93
+ 248 14.3% 100.0% 248 14.3% Fixnum#+
94
+
95
+ #### pprof.rb --gif
96
+
97
+ * Simple [require 'rubygems'](http://perftools-rb.rubyforge.org/examples/rubygems.gif) profile
98
+
99
+ * Comparing redis-rb [with](http://perftools-rb.rubyforge.org/examples/redis-rb.gif) and [without](http://perftools-rb.rubyforge.org/examples/redis-rb-notimeout.gif) SystemTimer based socket timeouts
100
+
101
+ * [Sinatra](http://perftools-rb.rubyforge.org/examples/sinatra.gif) vs. [Merb](http://perftools-rb.rubyforge.org/examples/merb.gif) vs. [Rails](http://perftools-rb.rubyforge.org/examples/rails.gif)
102
+
103
+ * C-level profile of EventMachine + epoll + Ruby threads [before](http://perftools-rb.rubyforge.org/examples/eventmachine-epoll+nothreads.gif) and [after](http://perftools-rb.rubyforge.org/examples/eventmachine-epoll+threads.gif) a [6 line EM bugfix](http://timetobleed.com/6-line-eventmachine-bugfix-2x-faster-gc-1300-requestssec/)
104
+
105
+ * C-level profile of a [ruby/rails vm](http://perftools-rb.rubyforge.org/examples/ruby_interpreter.gif)
106
+ * 12% time spent in re_match_exec because of excessive calls to rb_str_sub_bang by Date.parse
107
+
108
+
109
+ ## Installation
110
+
111
+ Just install the gem, which will download, patch and compile google-perftools for you:
112
+
113
+ sudo gem install perftools.rb
114
+
115
+ Or build your own gem:
116
+
117
+ git clone git://github.com/tmm1/perftools.rb
118
+ cd perftools.rb
119
+ gem build perftools.rb.gemspec
120
+ gem install perftools.rb
121
+
122
+
123
+ You'll also need graphviz to generate call graphs using dot:
124
+
125
+ sudo brew install graphviz ghostscript # osx
126
+ sudo apt-get install graphviz ps2pdf # debian/ubuntu
127
+
128
+ ## Advantages over ruby-prof
129
+
130
+ * Sampling profiler
131
+
132
+ * perftools samples your process using setitimer() so it can be used in production with minimal overhead.
133
+
134
+
135
+ ## Profiling the Ruby VM and C extensions
136
+
137
+ To profile C code, download and build an unpatched perftools (libunwind or ./configure --enable-frame-pointers required on x86_64).
138
+
139
+ Download:
140
+
141
+ wget http://google-perftools.googlecode.com/files/google-perftools-1.6.tar.gz
142
+ tar zxvf google-perftools-1.6.tar.gz
143
+ cd google-perftools-1.6
144
+
145
+ Compile:
146
+
147
+ ./configure --prefix=/opt
148
+ make
149
+ sudo make install
150
+
151
+ Profile:
152
+
153
+ export LD_PRELOAD=/opt/lib/libprofiler.so # for linux
154
+ export DYLD_INSERT_LIBRARIES=/opt/lib/libprofiler.dylib # for osx
155
+ CPUPROFILE=/tmp/ruby_interpreter.profile ruby -e' 5_000_000.times{ "hello world" } '
156
+
157
+ Report:
158
+
159
+ pprof `which ruby` --text /tmp/ruby_interpreter.profile
160
+
161
+
162
+ ## TODO
163
+
164
+ * Add support for heap profiling to find memory leaks (PerfTools::HeapProfiler)
165
+ * Allow both C and Ruby profiling
166
+ * Add setter for the sampling interval
167
+
168
+
169
+ ## Resources
170
+
171
+ * [GoRuCo 2009 Lightning Talk on perftools.rb](http://goruco2009.confreaks.com/30-may-2009-18-35-rejectconf-various-presenters.html) @ 21:52
172
+
173
+ * [Ilya Grigorik's introduction to perftools.rb](http://www.igvita.com/2009/06/13/profiling-ruby-with-googles-perftools/)
174
+
175
+ * [Google Perftools](http://code.google.com/p/google-perftools/)
176
+
177
+ * [Analyzing profiles and interpreting different output formats](http://google-perftools.googlecode.com/svn/trunk/doc/cpuprofile.html#pprof)
data/ext/extconf.rb CHANGED
@@ -15,11 +15,12 @@ if RUBY_VERSION >= "1.9"
15
15
  begin
16
16
  require "ruby_core_source"
17
17
  rescue LoadError
18
- STDERR.puts "\n\n"
19
- STDERR.puts "***************************************************************************************"
20
- STDERR.puts "******************** PLEASE RUN gem install ruby_core_source FIRST ********************"
21
- STDERR.puts "***************************************************************************************"
22
- exit(1)
18
+ require 'rubygems/dependency_installer'
19
+ installer = Gem::DependencyInstaller.new
20
+ installer.install 'ruby_core_source'
21
+ Gem.refresh
22
+
23
+ require "ruby_core_source"
23
24
  end
24
25
  end
25
26
 
data/perftools.rb.gemspec CHANGED
@@ -1,7 +1,7 @@
1
1
  spec = Gem::Specification.new do |s|
2
2
  s.name = 'perftools.rb'
3
- s.version = '0.5.4'
4
- s.date = '2010-11-12'
3
+ s.version = '0.5.5'
4
+ s.date = '2011-01-23'
5
5
  s.rubyforge_project = 'perftools-rb'
6
6
  s.summary = 'google-perftools for ruby code'
7
7
  s.description = 'A sampling profiler for ruby code based on patches to google-perftools'
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 5
8
- - 4
9
- version: 0.5.4
8
+ - 5
9
+ version: 0.5.5
10
10
  platform: ruby
11
11
  authors:
12
12
  - Aman Gupta
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-11-12 00:00:00 -06:00
17
+ date: 2011-01-23 00:00:00 -08:00
18
18
  default_executable:
19
19
  dependencies: []
20
20
 
@@ -27,13 +27,11 @@ extensions:
27
27
  extra_rdoc_files: []
28
28
 
29
29
  files:
30
- - README
30
+ - README.md
31
31
  - bin/pprof.rb
32
32
  - ext/extconf.rb
33
33
  - ext/perftools.c
34
34
  - ext/src/google-perftools-1.6.tar.gz
35
- - objalloc_tests/Makefile
36
- - objalloc_tests/trap.c
37
35
  - patches/perftools-debug.patch
38
36
  - patches/perftools-frames.patch
39
37
  - patches/perftools-gc.patch
data/README DELETED
@@ -1,167 +0,0 @@
1
- google-perftools for ruby code
2
- (c) 2009 Aman Gupta (tmm1)
3
-
4
- === Usage
5
-
6
- Run the profiler with a block:
7
-
8
- require 'perftools'
9
- PerfTools::CpuProfiler.start("/tmp/add_numbers_profile") do
10
- 5_000_000.times{ 1+2+3+4+5 }
11
- end
12
-
13
- Start and stop the profiler manually:
14
-
15
- require 'perftools'
16
- PerfTools::CpuProfiler.start("/tmp/add_numbers_profile")
17
- 5_000_000.times{ 1+2+3+4+5 }
18
- PerfTools::CpuProfiler.stop
19
-
20
- Profile an existing ruby application without modifying it:
21
-
22
- $ CPUPROFILE=/tmp/my_app_profile RUBYOPT="-r`gem which perftools | tail -1`" ruby my_app.rb
23
-
24
- Change the sampling interval (defaults to 100 times a second, valid range is 1-4000):
25
-
26
- $ CPUPROFILE_FREQUENCY=500 ruby my_app.rb
27
-
28
- Use walltime instead of cputime profiling:
29
-
30
- $ CPUPROFILE_REALTIME=1 ruby my_app.rb
31
-
32
- Profile object allocations instead of cpu/wall time:
33
-
34
- $ CPUPROFILE_OBJECTS=1 ruby my_app.rb
35
-
36
- Profile method calls:
37
-
38
- $ CPUPROFILE_METHODS=1 ruby my_app.rb
39
-
40
-
41
- === Reporting
42
-
43
- pprof.rb --text /tmp/add_numbers_profile
44
-
45
- pprof.rb --pdf /tmp/add_numbers_profile > /tmp/add_numbers_profile.pdf
46
-
47
- pprof.rb --gif /tmp/add_numbers_profile > /tmp/add_numbers_profile.gif
48
-
49
- pprof.rb --callgrind /tmp/add_numbers_profile > /tmp/add_numbers_profile.grind
50
- kcachegrind /tmp/add_numbers_profile.grind
51
-
52
- pprof.rb --gif --focus=Integer /tmp/add_numbers_profile > /tmp/add_numbers_custom.gif
53
-
54
- pprof.rb --text --ignore=Gem /tmp/my_app_profile
55
-
56
-
57
- For more options, see http://google-perftools.googlecode.com/svn/trunk/doc/cpuprofile.html#pprof
58
-
59
-
60
- === Examples
61
-
62
- pprof.rb --text
63
-
64
- Total: 1735 samples
65
- 1487 85.7% 85.7% 1735 100.0% Integer#times
66
- 248 14.3% 100.0% 248 14.3% Fixnum#+
67
-
68
- pprof.rb --gif
69
-
70
- Simple require 'rubygems' profile
71
-
72
- http://perftools-rb.rubyforge.org/examples/rubygems.gif
73
-
74
- Comparing redis-rb with and without SystemTimer based socket timeouts
75
-
76
- http://perftools-rb.rubyforge.org/examples/redis-rb.gif
77
- http://perftools-rb.rubyforge.org/examples/redis-rb-notimeout.gif
78
-
79
- Sinatra vs. Merb vs. Rails
80
-
81
- http://perftools-rb.rubyforge.org/examples/sinatra.gif
82
- http://perftools-rb.rubyforge.org/examples/merb.gif
83
- http://perftools-rb.rubyforge.org/examples/rails.gif
84
-
85
- C-level profile of EventMachine + epoll + Ruby threads
86
- before http://timetobleed.com/6-line-eventmachine-bugfix-2x-faster-gc-1300-requestssec/
87
-
88
- http://perftools-rb.rubyforge.org/examples/eventmachine-epoll+nothreads.gif
89
- http://perftools-rb.rubyforge.org/examples/eventmachine-epoll+threads.gif
90
-
91
- C-level profile of the ruby interpreter
92
- 12% time spent in re_match_exec because of excessive calls to rb_str_sub_bang by Date.parse
93
- easily fixed by using the ThirdBase gem
94
-
95
- http://perftools-rb.rubyforge.org/examples/ruby_interpreter.gif
96
-
97
-
98
- === Installation
99
-
100
- Just install the gem, which will download, patch and compile google-perftools for you:
101
-
102
- sudo gem install perftools.rb
103
-
104
- Or use the dev gem from github:
105
-
106
- gem install -s http://gems.github.com tmm1-perftools.rb
107
-
108
- Or build your own gem:
109
-
110
- git clone git://github.com/tmm1/perftools.rb
111
- cd perftools.rb
112
- gem build perftools.rb.gemspec
113
- gem install perftools.rb
114
-
115
-
116
- You'll also need graphviz to generate call graphs using dot:
117
-
118
- sudo port install graphviz # osx
119
- sudo apt-get install graphviz # debian/ubuntu
120
-
121
-
122
- === Advantages over ruby-prof
123
-
124
- Sampling profiler
125
-
126
- perftools samples your process using setitimer() so it can be used in production with minimal overhead.
127
-
128
-
129
- === Profiling the Ruby VM and C extensions
130
-
131
- To profile C code, download and build an unpatched perftools (libunwind or ./configure --enable-frame-pointers required on x86_64):
132
-
133
- wget http://google-perftools.googlecode.com/files/google-perftools-1.3.tar.gz
134
- tar zxvf google-perftools-1.3.tar.gz
135
- cd google-perftools-1.3
136
-
137
- ./configure --prefix=/opt
138
- make
139
- sudo make install
140
-
141
- export LD_PRELOAD=/opt/lib/libprofiler.so # for linux
142
- export DYLD_INSERT_LIBRARIES=/opt/lib/libprofiler.dylib # for osx
143
- CPUPROFILE=/tmp/ruby_interpreter.profile ruby -e' 5_000_000.times{ "hello world" } '
144
-
145
- pprof `which ruby` --text /tmp/ruby_interpreter.profile
146
-
147
-
148
- === TODO
149
-
150
- * Add support for heap profiling to find memory leaks (PerfTools::HeapProfiler)
151
- * Allow both C and Ruby profiling
152
- * Add setter for the sampling interval
153
-
154
-
155
- === Resources
156
-
157
- GoRuCo 2009 Lightning Talk on perftools.rb
158
- http://goruco2009.confreaks.com/30-may-2009-18-35-rejectconf-various-presenters.html at 21:52
159
-
160
- Ilya Grigorik's introduction to perftools.rb
161
- http://www.igvita.com/2009/06/13/profiling-ruby-with-googles-perftools/
162
-
163
- Google Perftools
164
- http://code.google.com/p/google-perftools/
165
-
166
- Analyzing profiles and interpreting different output formats
167
- http://google-perftools.googlecode.com/svn/trunk/doc/cpuprofile.html#pprof
@@ -1,7 +0,0 @@
1
- default: trap
2
-
3
- trap: trap.c
4
- gcc -o trap trap.c -ggdb -O0 -Wall
5
-
6
- clean:
7
- rm -rf *.o trap *.dSYM
@@ -1,103 +0,0 @@
1
- #define _GNU_SOURCE
2
- #define _XOPEN_SOURCE 600
3
-
4
- #include <assert.h>
5
- #include <ucontext.h>
6
- #include <unistd.h>
7
- #include <signal.h>
8
- #include <stdio.h>
9
- #include <stdlib.h>
10
- #include <string.h>
11
- #include <sys/mman.h>
12
-
13
-
14
- void
15
- func() {
16
- printf("hi\n");
17
- }
18
-
19
-
20
- static inline void *
21
- page_align(void *addr) {
22
- assert(addr != NULL);
23
- return (void *)((size_t)addr & ~(0xFFFF));
24
- }
25
-
26
- static void
27
- copy_instructions(void *dest, void *src, size_t count) {
28
- assert(dest != NULL);
29
- assert(src != NULL);
30
-
31
- void *aligned_addr = page_align(dest);
32
- if (mprotect(aligned_addr, (dest - aligned_addr) + count, PROT_READ|PROT_WRITE|PROT_EXEC) != 0)
33
- perror("mprotect");
34
- memcpy(dest, src, count);
35
- }
36
-
37
- #define NUM_ORIG_BYTES 2
38
- struct {
39
- void *location;
40
- unsigned char value;
41
- } orig_bytes[NUM_ORIG_BYTES];
42
-
43
- static inline void**
44
- uc_get_ip(ucontext_t *uc) {
45
- #if defined(__FreeBSD__)
46
- return (void**)&uc->uc_mcontext.mc_rip;
47
- #elif defined(__dietlibc__)
48
- return (void**)&uc->uc_mcontext.rip;
49
- #elif defined(__APPLE__)
50
- return (void**)&uc->uc_mcontext->__ss.__rip;
51
- #else
52
- return (void**)&uc->uc_mcontext.gregs[REG_RIP];
53
- #endif
54
- }
55
-
56
- static void
57
- trap_handler(int signal, siginfo_t *info, void *data) {
58
- int i;
59
- ucontext_t *uc = (ucontext_t *)data;
60
- void **ip = uc_get_ip(uc);
61
-
62
- // printf("signal: %d, addr: %p, ip: %p\n", signal, info->si_addr, *ip);
63
-
64
- for (i=0; i<NUM_ORIG_BYTES; i++) {
65
- if (orig_bytes[i].location == *ip-1) {
66
- // restore original byte
67
- copy_instructions(orig_bytes[i].location, &orig_bytes[i].value, 1);
68
-
69
- // setup next breakpoint
70
- copy_instructions(orig_bytes[(i+1)%NUM_ORIG_BYTES].location, "\xCC", 1);
71
-
72
- // first breakpoint is the notification
73
- if (i == 0)
74
- printf(" ---> YOU'RE CALLING FUNC()\n");
75
-
76
- // reset instruction pointer
77
- *ip -= 1;
78
-
79
- break;
80
- }
81
- }
82
- }
83
-
84
- int
85
- main() {
86
- int i;
87
- struct sigaction sig = { .sa_sigaction = trap_handler, .sa_flags = SA_SIGINFO };
88
- sigemptyset(&sig.sa_mask);
89
- sigaction(SIGTRAP, &sig, NULL);
90
-
91
- for (i=0; i<NUM_ORIG_BYTES; i++) {
92
- orig_bytes[i].location = func + i;
93
- orig_bytes[i].value = ((unsigned char*)func)[i];
94
- copy_instructions(func + i, "\xCC", 1);
95
- }
96
-
97
- printf("func: %p\n", func);
98
-
99
- for (i=0; i<10; i++)
100
- func();
101
-
102
- return 0;
103
- }