perftools.rb 0.5.4 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md ADDED
@@ -0,0 +1,177 @@
1
+ # perftools.rb
2
+
3
+ google-perftools for ruby code
4
+ (c) 2010 Aman Gupta (tmm1)
5
+ http://www.ruby-lang.org/en/LICENSE.txt
6
+
7
+ ## Usage (in a webapp)
8
+
9
+ Use [rack-perftools_profiler](https://github.com/bhb/rack-perftools_profiler):
10
+
11
+ require 'rack/perftools_profiler'
12
+ config.middleware.use ::Rack::PerftoolsProfiler, :default_printer => 'gif'
13
+
14
+ Simply add `profile=true` to profile a request:
15
+
16
+ curl -o 10_requests_to_homepage.gif "http://localhost:3000/homepage?profile=true&times=10"
17
+
18
+ ## Usage (from Ruby)
19
+
20
+ Run the profiler with a block:
21
+
22
+ require 'perftools'
23
+ PerfTools::CpuProfiler.start("/tmp/add_numbers_profile") do
24
+ 5_000_000.times{ 1+2+3+4+5 }
25
+ end
26
+
27
+ Start and stop the profiler manually:
28
+
29
+ require 'perftools'
30
+ PerfTools::CpuProfiler.start("/tmp/add_numbers_profile")
31
+ 5_000_000.times{ 1+2+3+4+5 }
32
+ PerfTools::CpuProfiler.stop
33
+
34
+ ## Usage (externally)
35
+
36
+ Profile an existing ruby application without modifying it:
37
+
38
+ $ CPUPROFILE=/tmp/my_app_profile \
39
+ RUBYOPT="-r`gem which perftools | tail -1`" \
40
+ ruby my_app.rb
41
+
42
+ ## Profiler Modes
43
+
44
+ The profiler can be run in one of many modes, set via an environment
45
+ variable before the library is loaded:
46
+
47
+ * `CPUPROFILE_REALTIME=1`
48
+
49
+ Use walltime instead of cputime profiling. This will capture all time spent in a method, even if it does not involve the CPU.
50
+
51
+ For example, `sleep()` is not expensive in terms of cputime, but very expensive in walltime. walltime will also show functions spending a lot of time in network i/o.
52
+
53
+ * `CPUPROFILE_OBJECTS=1`
54
+
55
+ Profile object allocations instead of cpu/wall time. Each sample represents one object created inside that function.
56
+
57
+ * `CPUPROFILE_METHODS=1`
58
+
59
+ Profile method calls. Each sample represents one method call made inside that function.
60
+
61
+ The sampling interval of the profiler can be adjusted to collect more
62
+ (for better profile detail) or fewer samples (for lower overhead):
63
+
64
+ * `CPUPROFILE_FREQUENCY=500`
65
+
66
+ Default sampling interval is 100 times a second. Valid range is 1-4000
67
+
68
+ ## Reporting
69
+
70
+ pprof.rb --text /tmp/add_numbers_profile
71
+
72
+ pprof.rb --pdf /tmp/add_numbers_profile > /tmp/add_numbers_profile.pdf
73
+
74
+ pprof.rb --gif /tmp/add_numbers_profile > /tmp/add_numbers_profile.gif
75
+
76
+ pprof.rb --callgrind /tmp/add_numbers_profile > /tmp/add_numbers_profile.grind
77
+ kcachegrind /tmp/add_numbers_profile.grind
78
+
79
+ pprof.rb --gif --focus=Integer /tmp/add_numbers_profile > /tmp/add_numbers_custom.gif
80
+
81
+ pprof.rb --text --ignore=Gem /tmp/my_app_profile
82
+
83
+
84
+ For more options, see [pprof documentation](http://google-perftools.googlecode.com/svn/trunk/doc/cpuprofile.html#pprof)
85
+
86
+
87
+ ### Examples
88
+
89
+ #### pprof.rb --text
90
+
91
+ Total: 1735 samples
92
+ 1487 85.7% 85.7% 1735 100.0% Integer#times
93
+ 248 14.3% 100.0% 248 14.3% Fixnum#+
94
+
95
+ #### pprof.rb --gif
96
+
97
+ * Simple [require 'rubygems'](http://perftools-rb.rubyforge.org/examples/rubygems.gif) profile
98
+
99
+ * Comparing redis-rb [with](http://perftools-rb.rubyforge.org/examples/redis-rb.gif) and [without](http://perftools-rb.rubyforge.org/examples/redis-rb-notimeout.gif) SystemTimer based socket timeouts
100
+
101
+ * [Sinatra](http://perftools-rb.rubyforge.org/examples/sinatra.gif) vs. [Merb](http://perftools-rb.rubyforge.org/examples/merb.gif) vs. [Rails](http://perftools-rb.rubyforge.org/examples/rails.gif)
102
+
103
+ * C-level profile of EventMachine + epoll + Ruby threads [before](http://perftools-rb.rubyforge.org/examples/eventmachine-epoll+nothreads.gif) and [after](http://perftools-rb.rubyforge.org/examples/eventmachine-epoll+threads.gif) a [6 line EM bugfix](http://timetobleed.com/6-line-eventmachine-bugfix-2x-faster-gc-1300-requestssec/)
104
+
105
+ * C-level profile of a [ruby/rails vm](http://perftools-rb.rubyforge.org/examples/ruby_interpreter.gif)
106
+ * 12% time spent in re_match_exec because of excessive calls to rb_str_sub_bang by Date.parse
107
+
108
+
109
+ ## Installation
110
+
111
+ Just install the gem, which will download, patch and compile google-perftools for you:
112
+
113
+ sudo gem install perftools.rb
114
+
115
+ Or build your own gem:
116
+
117
+ git clone git://github.com/tmm1/perftools.rb
118
+ cd perftools.rb
119
+ gem build perftools.rb.gemspec
120
+ gem install perftools.rb
121
+
122
+
123
+ You'll also need graphviz to generate call graphs using dot:
124
+
125
+ sudo brew install graphviz ghostscript # osx
126
+ sudo apt-get install graphviz ps2pdf # debian/ubuntu
127
+
128
+ ## Advantages over ruby-prof
129
+
130
+ * Sampling profiler
131
+
132
+ * perftools samples your process using setitimer() so it can be used in production with minimal overhead.
133
+
134
+
135
+ ## Profiling the Ruby VM and C extensions
136
+
137
+ To profile C code, download and build an unpatched perftools (libunwind or ./configure --enable-frame-pointers required on x86_64).
138
+
139
+ Download:
140
+
141
+ wget http://google-perftools.googlecode.com/files/google-perftools-1.6.tar.gz
142
+ tar zxvf google-perftools-1.6.tar.gz
143
+ cd google-perftools-1.6
144
+
145
+ Compile:
146
+
147
+ ./configure --prefix=/opt
148
+ make
149
+ sudo make install
150
+
151
+ Profile:
152
+
153
+ export LD_PRELOAD=/opt/lib/libprofiler.so # for linux
154
+ export DYLD_INSERT_LIBRARIES=/opt/lib/libprofiler.dylib # for osx
155
+ CPUPROFILE=/tmp/ruby_interpreter.profile ruby -e' 5_000_000.times{ "hello world" } '
156
+
157
+ Report:
158
+
159
+ pprof `which ruby` --text /tmp/ruby_interpreter.profile
160
+
161
+
162
+ ## TODO
163
+
164
+ * Add support for heap profiling to find memory leaks (PerfTools::HeapProfiler)
165
+ * Allow both C and Ruby profiling
166
+ * Add setter for the sampling interval
167
+
168
+
169
+ ## Resources
170
+
171
+ * [GoRuCo 2009 Lightning Talk on perftools.rb](http://goruco2009.confreaks.com/30-may-2009-18-35-rejectconf-various-presenters.html) @ 21:52
172
+
173
+ * [Ilya Grigorik's introduction to perftools.rb](http://www.igvita.com/2009/06/13/profiling-ruby-with-googles-perftools/)
174
+
175
+ * [Google Perftools](http://code.google.com/p/google-perftools/)
176
+
177
+ * [Analyzing profiles and interpreting different output formats](http://google-perftools.googlecode.com/svn/trunk/doc/cpuprofile.html#pprof)
data/ext/extconf.rb CHANGED
@@ -15,11 +15,12 @@ if RUBY_VERSION >= "1.9"
15
15
  begin
16
16
  require "ruby_core_source"
17
17
  rescue LoadError
18
- STDERR.puts "\n\n"
19
- STDERR.puts "***************************************************************************************"
20
- STDERR.puts "******************** PLEASE RUN gem install ruby_core_source FIRST ********************"
21
- STDERR.puts "***************************************************************************************"
22
- exit(1)
18
+ require 'rubygems/dependency_installer'
19
+ installer = Gem::DependencyInstaller.new
20
+ installer.install 'ruby_core_source'
21
+ Gem.refresh
22
+
23
+ require "ruby_core_source"
23
24
  end
24
25
  end
25
26
 
data/perftools.rb.gemspec CHANGED
@@ -1,7 +1,7 @@
1
1
  spec = Gem::Specification.new do |s|
2
2
  s.name = 'perftools.rb'
3
- s.version = '0.5.4'
4
- s.date = '2010-11-12'
3
+ s.version = '0.5.5'
4
+ s.date = '2011-01-23'
5
5
  s.rubyforge_project = 'perftools-rb'
6
6
  s.summary = 'google-perftools for ruby code'
7
7
  s.description = 'A sampling profiler for ruby code based on patches to google-perftools'
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 5
8
- - 4
9
- version: 0.5.4
8
+ - 5
9
+ version: 0.5.5
10
10
  platform: ruby
11
11
  authors:
12
12
  - Aman Gupta
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-11-12 00:00:00 -06:00
17
+ date: 2011-01-23 00:00:00 -08:00
18
18
  default_executable:
19
19
  dependencies: []
20
20
 
@@ -27,13 +27,11 @@ extensions:
27
27
  extra_rdoc_files: []
28
28
 
29
29
  files:
30
- - README
30
+ - README.md
31
31
  - bin/pprof.rb
32
32
  - ext/extconf.rb
33
33
  - ext/perftools.c
34
34
  - ext/src/google-perftools-1.6.tar.gz
35
- - objalloc_tests/Makefile
36
- - objalloc_tests/trap.c
37
35
  - patches/perftools-debug.patch
38
36
  - patches/perftools-frames.patch
39
37
  - patches/perftools-gc.patch
data/README DELETED
@@ -1,167 +0,0 @@
1
- google-perftools for ruby code
2
- (c) 2009 Aman Gupta (tmm1)
3
-
4
- === Usage
5
-
6
- Run the profiler with a block:
7
-
8
- require 'perftools'
9
- PerfTools::CpuProfiler.start("/tmp/add_numbers_profile") do
10
- 5_000_000.times{ 1+2+3+4+5 }
11
- end
12
-
13
- Start and stop the profiler manually:
14
-
15
- require 'perftools'
16
- PerfTools::CpuProfiler.start("/tmp/add_numbers_profile")
17
- 5_000_000.times{ 1+2+3+4+5 }
18
- PerfTools::CpuProfiler.stop
19
-
20
- Profile an existing ruby application without modifying it:
21
-
22
- $ CPUPROFILE=/tmp/my_app_profile RUBYOPT="-r`gem which perftools | tail -1`" ruby my_app.rb
23
-
24
- Change the sampling interval (defaults to 100 times a second, valid range is 1-4000):
25
-
26
- $ CPUPROFILE_FREQUENCY=500 ruby my_app.rb
27
-
28
- Use walltime instead of cputime profiling:
29
-
30
- $ CPUPROFILE_REALTIME=1 ruby my_app.rb
31
-
32
- Profile object allocations instead of cpu/wall time:
33
-
34
- $ CPUPROFILE_OBJECTS=1 ruby my_app.rb
35
-
36
- Profile method calls:
37
-
38
- $ CPUPROFILE_METHODS=1 ruby my_app.rb
39
-
40
-
41
- === Reporting
42
-
43
- pprof.rb --text /tmp/add_numbers_profile
44
-
45
- pprof.rb --pdf /tmp/add_numbers_profile > /tmp/add_numbers_profile.pdf
46
-
47
- pprof.rb --gif /tmp/add_numbers_profile > /tmp/add_numbers_profile.gif
48
-
49
- pprof.rb --callgrind /tmp/add_numbers_profile > /tmp/add_numbers_profile.grind
50
- kcachegrind /tmp/add_numbers_profile.grind
51
-
52
- pprof.rb --gif --focus=Integer /tmp/add_numbers_profile > /tmp/add_numbers_custom.gif
53
-
54
- pprof.rb --text --ignore=Gem /tmp/my_app_profile
55
-
56
-
57
- For more options, see http://google-perftools.googlecode.com/svn/trunk/doc/cpuprofile.html#pprof
58
-
59
-
60
- === Examples
61
-
62
- pprof.rb --text
63
-
64
- Total: 1735 samples
65
- 1487 85.7% 85.7% 1735 100.0% Integer#times
66
- 248 14.3% 100.0% 248 14.3% Fixnum#+
67
-
68
- pprof.rb --gif
69
-
70
- Simple require 'rubygems' profile
71
-
72
- http://perftools-rb.rubyforge.org/examples/rubygems.gif
73
-
74
- Comparing redis-rb with and without SystemTimer based socket timeouts
75
-
76
- http://perftools-rb.rubyforge.org/examples/redis-rb.gif
77
- http://perftools-rb.rubyforge.org/examples/redis-rb-notimeout.gif
78
-
79
- Sinatra vs. Merb vs. Rails
80
-
81
- http://perftools-rb.rubyforge.org/examples/sinatra.gif
82
- http://perftools-rb.rubyforge.org/examples/merb.gif
83
- http://perftools-rb.rubyforge.org/examples/rails.gif
84
-
85
- C-level profile of EventMachine + epoll + Ruby threads
86
- before http://timetobleed.com/6-line-eventmachine-bugfix-2x-faster-gc-1300-requestssec/
87
-
88
- http://perftools-rb.rubyforge.org/examples/eventmachine-epoll+nothreads.gif
89
- http://perftools-rb.rubyforge.org/examples/eventmachine-epoll+threads.gif
90
-
91
- C-level profile of the ruby interpreter
92
- 12% time spent in re_match_exec because of excessive calls to rb_str_sub_bang by Date.parse
93
- easily fixed by using the ThirdBase gem
94
-
95
- http://perftools-rb.rubyforge.org/examples/ruby_interpreter.gif
96
-
97
-
98
- === Installation
99
-
100
- Just install the gem, which will download, patch and compile google-perftools for you:
101
-
102
- sudo gem install perftools.rb
103
-
104
- Or use the dev gem from github:
105
-
106
- gem install -s http://gems.github.com tmm1-perftools.rb
107
-
108
- Or build your own gem:
109
-
110
- git clone git://github.com/tmm1/perftools.rb
111
- cd perftools.rb
112
- gem build perftools.rb.gemspec
113
- gem install perftools.rb
114
-
115
-
116
- You'll also need graphviz to generate call graphs using dot:
117
-
118
- sudo port install graphviz # osx
119
- sudo apt-get install graphviz # debian/ubuntu
120
-
121
-
122
- === Advantages over ruby-prof
123
-
124
- Sampling profiler
125
-
126
- perftools samples your process using setitimer() so it can be used in production with minimal overhead.
127
-
128
-
129
- === Profiling the Ruby VM and C extensions
130
-
131
- To profile C code, download and build an unpatched perftools (libunwind or ./configure --enable-frame-pointers required on x86_64):
132
-
133
- wget http://google-perftools.googlecode.com/files/google-perftools-1.3.tar.gz
134
- tar zxvf google-perftools-1.3.tar.gz
135
- cd google-perftools-1.3
136
-
137
- ./configure --prefix=/opt
138
- make
139
- sudo make install
140
-
141
- export LD_PRELOAD=/opt/lib/libprofiler.so # for linux
142
- export DYLD_INSERT_LIBRARIES=/opt/lib/libprofiler.dylib # for osx
143
- CPUPROFILE=/tmp/ruby_interpreter.profile ruby -e' 5_000_000.times{ "hello world" } '
144
-
145
- pprof `which ruby` --text /tmp/ruby_interpreter.profile
146
-
147
-
148
- === TODO
149
-
150
- * Add support for heap profiling to find memory leaks (PerfTools::HeapProfiler)
151
- * Allow both C and Ruby profiling
152
- * Add setter for the sampling interval
153
-
154
-
155
- === Resources
156
-
157
- GoRuCo 2009 Lightning Talk on perftools.rb
158
- http://goruco2009.confreaks.com/30-may-2009-18-35-rejectconf-various-presenters.html at 21:52
159
-
160
- Ilya Grigorik's introduction to perftools.rb
161
- http://www.igvita.com/2009/06/13/profiling-ruby-with-googles-perftools/
162
-
163
- Google Perftools
164
- http://code.google.com/p/google-perftools/
165
-
166
- Analyzing profiles and interpreting different output formats
167
- http://google-perftools.googlecode.com/svn/trunk/doc/cpuprofile.html#pprof
@@ -1,7 +0,0 @@
1
- default: trap
2
-
3
- trap: trap.c
4
- gcc -o trap trap.c -ggdb -O0 -Wall
5
-
6
- clean:
7
- rm -rf *.o trap *.dSYM
@@ -1,103 +0,0 @@
1
- #define _GNU_SOURCE
2
- #define _XOPEN_SOURCE 600
3
-
4
- #include <assert.h>
5
- #include <ucontext.h>
6
- #include <unistd.h>
7
- #include <signal.h>
8
- #include <stdio.h>
9
- #include <stdlib.h>
10
- #include <string.h>
11
- #include <sys/mman.h>
12
-
13
-
14
- void
15
- func() {
16
- printf("hi\n");
17
- }
18
-
19
-
20
- static inline void *
21
- page_align(void *addr) {
22
- assert(addr != NULL);
23
- return (void *)((size_t)addr & ~(0xFFFF));
24
- }
25
-
26
- static void
27
- copy_instructions(void *dest, void *src, size_t count) {
28
- assert(dest != NULL);
29
- assert(src != NULL);
30
-
31
- void *aligned_addr = page_align(dest);
32
- if (mprotect(aligned_addr, (dest - aligned_addr) + count, PROT_READ|PROT_WRITE|PROT_EXEC) != 0)
33
- perror("mprotect");
34
- memcpy(dest, src, count);
35
- }
36
-
37
- #define NUM_ORIG_BYTES 2
38
- struct {
39
- void *location;
40
- unsigned char value;
41
- } orig_bytes[NUM_ORIG_BYTES];
42
-
43
- static inline void**
44
- uc_get_ip(ucontext_t *uc) {
45
- #if defined(__FreeBSD__)
46
- return (void**)&uc->uc_mcontext.mc_rip;
47
- #elif defined(__dietlibc__)
48
- return (void**)&uc->uc_mcontext.rip;
49
- #elif defined(__APPLE__)
50
- return (void**)&uc->uc_mcontext->__ss.__rip;
51
- #else
52
- return (void**)&uc->uc_mcontext.gregs[REG_RIP];
53
- #endif
54
- }
55
-
56
- static void
57
- trap_handler(int signal, siginfo_t *info, void *data) {
58
- int i;
59
- ucontext_t *uc = (ucontext_t *)data;
60
- void **ip = uc_get_ip(uc);
61
-
62
- // printf("signal: %d, addr: %p, ip: %p\n", signal, info->si_addr, *ip);
63
-
64
- for (i=0; i<NUM_ORIG_BYTES; i++) {
65
- if (orig_bytes[i].location == *ip-1) {
66
- // restore original byte
67
- copy_instructions(orig_bytes[i].location, &orig_bytes[i].value, 1);
68
-
69
- // setup next breakpoint
70
- copy_instructions(orig_bytes[(i+1)%NUM_ORIG_BYTES].location, "\xCC", 1);
71
-
72
- // first breakpoint is the notification
73
- if (i == 0)
74
- printf(" ---> YOU'RE CALLING FUNC()\n");
75
-
76
- // reset instruction pointer
77
- *ip -= 1;
78
-
79
- break;
80
- }
81
- }
82
- }
83
-
84
- int
85
- main() {
86
- int i;
87
- struct sigaction sig = { .sa_sigaction = trap_handler, .sa_flags = SA_SIGINFO };
88
- sigemptyset(&sig.sa_mask);
89
- sigaction(SIGTRAP, &sig, NULL);
90
-
91
- for (i=0; i<NUM_ORIG_BYTES; i++) {
92
- orig_bytes[i].location = func + i;
93
- orig_bytes[i].value = ((unsigned char*)func)[i];
94
- copy_instructions(func + i, "\xCC", 1);
95
- }
96
-
97
- printf("func: %p\n", func);
98
-
99
- for (i=0; i<10; i++)
100
- func();
101
-
102
- return 0;
103
- }