perftools.rb 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +146 -0
- data/bin/pprof.rb +3 -0
- data/ext/extconf.rb +56 -0
- data/ext/perftools.c +122 -0
- data/patches/perftools-debug.patch +20 -0
- data/patches/perftools-osx.patch +13 -0
- data/patches/perftools-static.patch +27 -0
- data/patches/perftools.patch +302 -0
- data/perftools.rb.gemspec +31 -0
- metadata +63 -0
data/README
ADDED
@@ -0,0 +1,146 @@
|
|
1
|
+
google-perftools for ruby code
|
2
|
+
(c) 2009 Aman Gupta (tmm1)
|
3
|
+
|
4
|
+
=== Usage
|
5
|
+
|
6
|
+
Run the profiler with a block:
|
7
|
+
|
8
|
+
require 'perftools'
|
9
|
+
PerfTools::CpuProfiler.start("/tmp/add_numbers_profile") do
|
10
|
+
5_000_000.times{ 1+2+3+4+5 }
|
11
|
+
end
|
12
|
+
|
13
|
+
Start and stop the profiler manually:
|
14
|
+
|
15
|
+
require 'perftools'
|
16
|
+
PerfTools::CpuProfiler.start("/tmp/add_numbers_profile")
|
17
|
+
5_000_000.times{ 1+2+3+4+5 }
|
18
|
+
PerfTools::CpuProfiler.stop
|
19
|
+
|
20
|
+
Profile an existing ruby application without modifying it:
|
21
|
+
|
22
|
+
$ CPUPROFILE=/tmp/my_app_profile RUBYOPT="-r`gem which perftools | tail -1`" ruby my_app.rb
|
23
|
+
|
24
|
+
|
25
|
+
=== Reporting
|
26
|
+
|
27
|
+
pprof.rb --text /tmp/add_numbers_profile
|
28
|
+
|
29
|
+
pprof.rb --pdf /tmp/add_numbers_profile > /tmp/add_numbers_profile.pdf
|
30
|
+
|
31
|
+
pprof.rb --gif /tmp/add_numbers_profile > /tmp/add_numbers_profile.gif
|
32
|
+
|
33
|
+
pprof.rb --callgrind /tmp/add_numbers_profile > /tmp/add_numbers_profile.grind
|
34
|
+
kcachegrind /tmp/add_numbers_profile.grind
|
35
|
+
|
36
|
+
pprof.rb --gif --focus=Integer /tmp/add_numbers_profile > /tmp/add_numbers_custom.gif
|
37
|
+
|
38
|
+
pprof.rb --text --ignore=Gem /tmp/my_app_profile
|
39
|
+
|
40
|
+
|
41
|
+
For more options, see http://google-perftools.googlecode.com/svn/trunk/doc/cpuprofile.html#pprof
|
42
|
+
|
43
|
+
|
44
|
+
=== Examples
|
45
|
+
|
46
|
+
pprof.rb --text
|
47
|
+
|
48
|
+
Total: 1735 samples
|
49
|
+
1487 85.7% 85.7% 1735 100.0% Integer#times
|
50
|
+
248 14.3% 100.0% 248 14.3% Fixnum#+
|
51
|
+
|
52
|
+
pprof.rb --gif
|
53
|
+
|
54
|
+
Simple require 'rubygems' profile
|
55
|
+
|
56
|
+
http://perftools-rb.rubyforge.org/examples/rubygems.gif
|
57
|
+
|
58
|
+
Comparing redis-rb with and without SystemTimer based socket timeouts
|
59
|
+
|
60
|
+
http://perftools-rb.rubyforge.org/examples/redis-rb.gif
|
61
|
+
http://perftools-rb.rubyforge.org/examples/redis-rb-notimeout.gif
|
62
|
+
|
63
|
+
Sinatra vs. Merb vs. Rails
|
64
|
+
|
65
|
+
http://perftools-rb.rubyforge.org/examples/sinatra.gif
|
66
|
+
http://perftools-rb.rubyforge.org/examples/merb.gif
|
67
|
+
http://perftools-rb.rubyforge.org/examples/rails.gif
|
68
|
+
|
69
|
+
C-level profile of EventMachine + epoll + Ruby threads
|
70
|
+
before http://timetobleed.com/6-line-eventmachine-bugfix-2x-faster-gc-1300-requestssec/
|
71
|
+
|
72
|
+
http://perftools-rb.rubyforge.org/examples/eventmachine-epoll+nothreads.gif
|
73
|
+
http://perftools-rb.rubyforge.org/examples/eventmachine-epoll+threads.gif
|
74
|
+
|
75
|
+
C-level profile of the ruby interpreter
|
76
|
+
12% time spent in re_match_exec because of excessive calls to rb_str_sub_bang by Date.parse
|
77
|
+
easily fixed by using the ThirdBase gem
|
78
|
+
|
79
|
+
http://perftools-rb.rubyforge.org/examples/ruby_interpreter.gif
|
80
|
+
|
81
|
+
|
82
|
+
=== Installation
|
83
|
+
|
84
|
+
Just install the gem, which will download, patch and compile google-perftools for you:
|
85
|
+
|
86
|
+
sudo gem install perftools.rb
|
87
|
+
|
88
|
+
Or use the dev gem from github:
|
89
|
+
|
90
|
+
gem install -s http://gems.github.com tmm1-perftools.rb
|
91
|
+
|
92
|
+
Or build your own gem:
|
93
|
+
|
94
|
+
git clone git://github.com/tmm1/perftools.rb
|
95
|
+
cd perftools.rb
|
96
|
+
gem build perftools.rb.gemspec
|
97
|
+
gem install perftools.rb
|
98
|
+
|
99
|
+
|
100
|
+
You'll also need graphviz to generate call graphs using dot:
|
101
|
+
|
102
|
+
sudo port install graphviz # osx
|
103
|
+
sudo apt-get install graphviz # debian/ubuntu
|
104
|
+
|
105
|
+
|
106
|
+
=== Advantages over ruby-prof
|
107
|
+
|
108
|
+
Sampling profiler
|
109
|
+
|
110
|
+
perftools samples your process using setitimer() so it can be used in production with minimal overhead.
|
111
|
+
|
112
|
+
|
113
|
+
=== Profiling the Ruby VM and C extensions
|
114
|
+
|
115
|
+
To profile C code, download and build an unpatched perftools (libunwind or ./configure --enable-frame-pointers required on x86_64):
|
116
|
+
|
117
|
+
wget http://google-perftools.googlecode.com/files/google-perftools-1.2.tar.gz
|
118
|
+
tar zxvf google-perftools-1.2.tar.gz
|
119
|
+
cd google-perftools-1.2
|
120
|
+
|
121
|
+
./configure --prefix=/opt --disable-shared
|
122
|
+
make
|
123
|
+
sudo make install
|
124
|
+
|
125
|
+
export LD_PRELOAD=/opt/lib/libprofiler.so # for linux
|
126
|
+
export DYLD_INSERT_LIBRARIES=/opt/lib/libprofiler.dylib # for osx
|
127
|
+
CPUPROFILE=/tmp/ruby_interpreter.profile ruby -e' 5_000_000.times{ "hello world" } '
|
128
|
+
|
129
|
+
pprof `which ruby` --text /tmp/ruby_interpreter.profile
|
130
|
+
|
131
|
+
|
132
|
+
=== TODO
|
133
|
+
|
134
|
+
* Add support for heap profiling to find memory leaks (PerfTools::HeapProfiler)
|
135
|
+
* Allow both C and Ruby profiling
|
136
|
+
* Add setter for the sampling interval
|
137
|
+
* Add support for ruby 1.9
|
138
|
+
|
139
|
+
|
140
|
+
=== Resources
|
141
|
+
|
142
|
+
Google Perftools
|
143
|
+
http://code.google.com/p/google-perftools/
|
144
|
+
|
145
|
+
Analyzing profiles and interpreting different output formats
|
146
|
+
http://google-perftools.googlecode.com/svn/trunk/doc/cpuprofile.html#pprof
|
data/bin/pprof.rb
ADDED
data/ext/extconf.rb
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'mkmf'
|
2
|
+
require 'fileutils'
|
3
|
+
require 'net/http'
|
4
|
+
|
5
|
+
url = 'http://google-perftools.googlecode.com/files/google-perftools-1.2.tar.gz'
|
6
|
+
perftools = File.basename(url)
|
7
|
+
dir = File.basename(perftools, '.tar.gz')
|
8
|
+
|
9
|
+
Logging.message "(I'm about to download and compile google-perftools.. this will definitely take a while)"
|
10
|
+
|
11
|
+
FileUtils.mkdir_p('src')
|
12
|
+
|
13
|
+
Dir.chdir('src') do
|
14
|
+
unless File.exists?(perftools)
|
15
|
+
Net::HTTP.get_response(URI(url)) do |res|
|
16
|
+
File.open(perftools, 'wb') do |out|
|
17
|
+
res.read_body do |chunk|
|
18
|
+
out.write(chunk)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
unless File.exists?(dir)
|
25
|
+
xsystem("tar zxvf #{perftools}")
|
26
|
+
Dir.chdir(dir) do
|
27
|
+
xsystem("patch -p1 < ../../../patches/perftools.patch")
|
28
|
+
xsystem("patch -p1 < ../../../patches/perftools-static.patch")
|
29
|
+
xsystem("patch -p1 < ../../../patches/perftools-osx.patch") if RUBY_PLATFORM =~ /darwin/
|
30
|
+
xsystem("patch -p1 < ../../../patches/perftools-debug.patch")# if ENV['DEBUG']
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
unless File.exists?('../bin/pprof')
|
35
|
+
Dir.chdir(dir) do
|
36
|
+
FileUtils.cp 'src/pprof', '../../../bin/'
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
unless File.exists?('../libprofiler.a')
|
41
|
+
Dir.chdir(dir) do
|
42
|
+
xsystem("./configure --disable-heap-profiler --disable-heap-checker --disable-shared")
|
43
|
+
xsystem("make")
|
44
|
+
FileUtils.cp '.libs/libprofiler.a', '../../'
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
case RUBY_PLATFORM
|
50
|
+
when /darwin/, /linux/
|
51
|
+
CONFIG['LDSHARED'] = "$(CXX) " + CONFIG['LDSHARED'].split[1..-1].join(' ')
|
52
|
+
end
|
53
|
+
|
54
|
+
$libs = append_library($libs, 'profiler')
|
55
|
+
have_func('rb_during_gc', 'ruby.h')
|
56
|
+
create_makefile 'perftools'
|
data/ext/perftools.c
ADDED
@@ -0,0 +1,122 @@
|
|
1
|
+
#include <ruby.h>
|
2
|
+
#include <node.h>
|
3
|
+
#include <env.h>
|
4
|
+
|
5
|
+
static VALUE Iallocate;
|
6
|
+
|
7
|
+
static inline void
|
8
|
+
save_frame(struct FRAME *frame, void** result, int *depth)
|
9
|
+
{
|
10
|
+
VALUE klass = frame->last_class;
|
11
|
+
// XXX what is an ICLASS anyway?
|
12
|
+
// if (BUILTIN_TYPE(klass) == T_ICLASS)
|
13
|
+
// klass = RBASIC(klass)->klass;
|
14
|
+
|
15
|
+
if (FL_TEST(klass, FL_SINGLETON) &&
|
16
|
+
(BUILTIN_TYPE(frame->self) == T_CLASS || BUILTIN_TYPE(frame->self) == T_MODULE))
|
17
|
+
result[(*depth)++] = (void*) frame->self;
|
18
|
+
else
|
19
|
+
result[(*depth)++] = 0;
|
20
|
+
|
21
|
+
result[(*depth)++] = (void*) klass;
|
22
|
+
result[(*depth)++] = (void*) (frame->last_func == ID_ALLOCATOR ? Iallocate : frame->last_func);
|
23
|
+
}
|
24
|
+
|
25
|
+
int
|
26
|
+
rb_stack_trace(void** result, int max_depth)
|
27
|
+
{
|
28
|
+
int depth = 0;
|
29
|
+
struct FRAME *frame = ruby_frame;
|
30
|
+
NODE *n;
|
31
|
+
|
32
|
+
if (max_depth == 0)
|
33
|
+
return 0;
|
34
|
+
|
35
|
+
// XXX: figure out what these mean. is there a way to access them from an extension?
|
36
|
+
// if (rb_prohibit_interrupt || !rb_trap_immediate) return 0;
|
37
|
+
|
38
|
+
#ifdef HAVE_RB_DURING_GC
|
39
|
+
if (rb_during_gc()) {
|
40
|
+
result[0] = rb_gc;
|
41
|
+
return 1;
|
42
|
+
}
|
43
|
+
#endif
|
44
|
+
|
45
|
+
// XXX does it make sense to track allocations or not?
|
46
|
+
if (frame->last_func == ID_ALLOCATOR) {
|
47
|
+
frame = frame->prev;
|
48
|
+
}
|
49
|
+
|
50
|
+
if (frame->last_func) {
|
51
|
+
save_frame(frame, result, &depth);
|
52
|
+
}
|
53
|
+
|
54
|
+
for (; frame && (n = frame->node); frame = frame->prev) {
|
55
|
+
if (frame->prev && frame->prev->last_func) {
|
56
|
+
if (frame->prev->node == n) {
|
57
|
+
if (frame->prev->last_func == frame->last_func) continue;
|
58
|
+
}
|
59
|
+
|
60
|
+
if (depth+3 > max_depth)
|
61
|
+
break;
|
62
|
+
|
63
|
+
save_frame(frame->prev, result, &depth);
|
64
|
+
}
|
65
|
+
}
|
66
|
+
|
67
|
+
return depth;
|
68
|
+
}
|
69
|
+
|
70
|
+
static VALUE cPerfTools;
|
71
|
+
static VALUE cCpuProfiler;
|
72
|
+
static VALUE bProfilerRunning;
|
73
|
+
|
74
|
+
VALUE
|
75
|
+
cpuprofiler_running_p(VALUE self)
|
76
|
+
{
|
77
|
+
return bProfilerRunning;
|
78
|
+
}
|
79
|
+
|
80
|
+
VALUE
|
81
|
+
cpuprofiler_stop(VALUE self)
|
82
|
+
{
|
83
|
+
if (!bProfilerRunning)
|
84
|
+
return Qfalse;
|
85
|
+
|
86
|
+
bProfilerRunning = Qfalse;
|
87
|
+
ProfilerStop();
|
88
|
+
ProfilerFlush();
|
89
|
+
return Qtrue;
|
90
|
+
}
|
91
|
+
|
92
|
+
VALUE
|
93
|
+
cpuprofiler_start(VALUE self, VALUE filename)
|
94
|
+
{
|
95
|
+
StringValue(filename);
|
96
|
+
|
97
|
+
if (bProfilerRunning)
|
98
|
+
return Qfalse;
|
99
|
+
|
100
|
+
ProfilerStart(RSTRING_PTR(filename));
|
101
|
+
bProfilerRunning = Qtrue;
|
102
|
+
|
103
|
+
if (rb_block_given_p()) {
|
104
|
+
rb_yield(Qnil);
|
105
|
+
cpuprofiler_stop(self);
|
106
|
+
}
|
107
|
+
|
108
|
+
return Qtrue;
|
109
|
+
}
|
110
|
+
|
111
|
+
void
|
112
|
+
Init_perftools()
|
113
|
+
{
|
114
|
+
cPerfTools = rb_define_class("PerfTools", rb_cObject);
|
115
|
+
cCpuProfiler = rb_define_class_under(cPerfTools, "CpuProfiler", rb_cObject);
|
116
|
+
bProfilerRunning = Qfalse;
|
117
|
+
Iallocate = rb_intern("allocate");
|
118
|
+
|
119
|
+
rb_define_singleton_method(cCpuProfiler, "running?", cpuprofiler_running_p, 0);
|
120
|
+
rb_define_singleton_method(cCpuProfiler, "start", cpuprofiler_start, 1);
|
121
|
+
rb_define_singleton_method(cCpuProfiler, "stop", cpuprofiler_stop, 0);
|
122
|
+
}
|
@@ -0,0 +1,20 @@
|
|
1
|
+
diff --git a/Makefile.in b/Makefile.in
|
2
|
+
index 1247b91..45179f3 100644
|
3
|
+
--- a/Makefile.in
|
4
|
+
+++ b/Makefile.in
|
5
|
+
@@ -989,13 +989,13 @@ AUTOMAKE = @AUTOMAKE@
|
6
|
+
AWK = @AWK@
|
7
|
+
CC = @CC@
|
8
|
+
CCDEPMODE = @CCDEPMODE@
|
9
|
+
-CFLAGS = @CFLAGS@ -DBUILD_FOR_RUBY -fPIC
|
10
|
+
+CFLAGS = @CFLAGS@ -DBUILD_FOR_RUBY -fPIC -O0 -ggdb
|
11
|
+
CPP = @CPP@
|
12
|
+
CPPFLAGS = @CPPFLAGS@
|
13
|
+
CXX = @CXX@
|
14
|
+
CXXCPP = @CXXCPP@
|
15
|
+
CXXDEPMODE = @CXXDEPMODE@
|
16
|
+
-CXXFLAGS = @CXXFLAGS@ -DBUILD_FOR_RUBY -fPIC
|
17
|
+
+CXXFLAGS = @CXXFLAGS@ -DBUILD_FOR_RUBY -fPIC -O0 -ggdb
|
18
|
+
CYGPATH_W = @CYGPATH_W@
|
19
|
+
DEFS = @DEFS@
|
20
|
+
DEPDIR = @DEPDIR@
|
@@ -0,0 +1,13 @@
|
|
1
|
+
diff --git a/Makefile.in b/Makefile.in
|
2
|
+
index 1247b91..3f6e133 100644
|
3
|
+
--- a/Makefile.in
|
4
|
+
+++ b/Makefile.in
|
5
|
+
@@ -1017,7 +1017,7 @@ INSTALL_DATA = @INSTALL_DATA@
|
6
|
+
INSTALL_PROGRAM = @INSTALL_PROGRAM@
|
7
|
+
INSTALL_SCRIPT = @INSTALL_SCRIPT@
|
8
|
+
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
|
9
|
+
-LDFLAGS = @LDFLAGS@
|
10
|
+
+LDFLAGS = @LDFLAGS@ -Wl,-flat_namespace,-undefined,dynamic_lookup
|
11
|
+
LIBOBJS = @LIBOBJS@
|
12
|
+
LIBS = @LIBS@
|
13
|
+
LIBSTDCXX_LA_LINKER_FLAG = @LIBSTDCXX_LA_LINKER_FLAG@
|
@@ -0,0 +1,27 @@
|
|
1
|
+
diff --git a/Makefile.in b/Makefile.in
|
2
|
+
index 1247b91..e7c5bca 100644
|
3
|
+
--- a/Makefile.in
|
4
|
+
+++ b/Makefile.in
|
5
|
+
@@ -3736,14 +3736,14 @@ uninstall-man: uninstall-man1
|
6
|
+
# (since it does work only in global constructors and destructors).
|
7
|
+
# We just replace the .a with an 'ld-r-ized' version under libtool's
|
8
|
+
# nose.
|
9
|
+
-@WITH_CPU_PROFILER_TRUE@libprofiler.o: libprofiler.la $(libprofiler_la_OBJECTS) $(libprofiler_la_DEPENDENCIES) $(libprofiler_la_LIBADD)
|
10
|
+
-@WITH_CPU_PROFILER_TRUE@ $(CXXLINK) $(libprofiler_la_OBJECTS) $(libprofiler_la_LIBADD)
|
11
|
+
-@WITH_CPU_PROFILER_TRUE@ cp -f "$@" .libs/libprofiler.a
|
12
|
+
-@WITH_CPU_PROFILER_TRUE@profiler_unittest.sh$(EXEEXT): $(top_srcdir)/$(profiler_unittest_sh_SOURCES) \
|
13
|
+
-@WITH_CPU_PROFILER_TRUE@ profiler1_unittest profiler2_unittest \
|
14
|
+
-@WITH_CPU_PROFILER_TRUE@ profiler3_unittest profiler4_unittest
|
15
|
+
-@WITH_CPU_PROFILER_TRUE@ rm -f $@
|
16
|
+
-@WITH_CPU_PROFILER_TRUE@ cp -p $(top_srcdir)/$(profiler_unittest_sh_SOURCES) $@
|
17
|
+
+# @WITH_CPU_PROFILER_TRUE@libprofiler.o: libprofiler.la $(libprofiler_la_OBJECTS) $(libprofiler_la_DEPENDENCIES) $(libprofiler_la_LIBADD)
|
18
|
+
+# @WITH_CPU_PROFILER_TRUE@ $(CXXLINK) $(libprofiler_la_OBJECTS) $(libprofiler_la_LIBADD)
|
19
|
+
+# @WITH_CPU_PROFILER_TRUE@ cp -f "$@" .libs/libprofiler.a
|
20
|
+
+# @WITH_CPU_PROFILER_TRUE@profiler_unittest.sh$(EXEEXT): $(top_srcdir)/$(profiler_unittest_sh_SOURCES) \
|
21
|
+
+# @WITH_CPU_PROFILER_TRUE@ profiler1_unittest profiler2_unittest \
|
22
|
+
+# @WITH_CPU_PROFILER_TRUE@ profiler3_unittest profiler4_unittest
|
23
|
+
+# @WITH_CPU_PROFILER_TRUE@ rm -f $@
|
24
|
+
+# @WITH_CPU_PROFILER_TRUE@ cp -p $(top_srcdir)/$(profiler_unittest_sh_SOURCES) $@
|
25
|
+
|
26
|
+
rpm: dist-gzip packages/rpm.sh packages/rpm/rpm.spec
|
27
|
+
@cd packages && ./rpm.sh ${PACKAGE} ${VERSION}
|
@@ -0,0 +1,302 @@
|
|
1
|
+
diff --git a/src/pprof b/src/pprof
|
2
|
+
index f23786d..21ccd12 100755
|
3
|
+
--- a/src/pprof
|
4
|
+
+++ b/src/pprof
|
5
|
+
@@ -504,7 +504,8 @@ sub Main() {
|
6
|
+
FetchDynamicProfiles();
|
7
|
+
|
8
|
+
# Read one profile, pick the last item on the list
|
9
|
+
- my $data = ReadProfile($main::prog, pop(@main::profile_files));
|
10
|
+
+ my $fname = pop(@main::profile_files);
|
11
|
+
+ my $data = ReadProfile($main::prog, $fname);
|
12
|
+
my $profile = $data->{profile};
|
13
|
+
my $pcs = $data->{pcs};
|
14
|
+
my $libs = $data->{libs}; # Info about main program and shared libraries
|
15
|
+
@@ -536,6 +537,17 @@ sub Main() {
|
16
|
+
$symbols = ExtractSymbols($libs, $pcs);
|
17
|
+
}
|
18
|
+
|
19
|
+
+ if (-e "$fname.symbols") {
|
20
|
+
+ open(SYMBOLS, "<$fname.symbols");
|
21
|
+
+ while(<SYMBOLS>){
|
22
|
+
+ chop;
|
23
|
+
+ if (m/(.+?)\s*:\s*(.*)/){
|
24
|
+
+ $symbols->{$1}[0] = $2;
|
25
|
+
+ }
|
26
|
+
+ }
|
27
|
+
+ close(SYMBOLS);
|
28
|
+
+ }
|
29
|
+
+
|
30
|
+
my $calls = ExtractCalls($symbols, $profile);
|
31
|
+
|
32
|
+
# Remove uniniteresting stack items
|
33
|
+
diff --git a/src/profile-handler.cc b/src/profile-handler.cc
|
34
|
+
index 0a9f54c..5b99a81 100644
|
35
|
+
--- a/src/profile-handler.cc
|
36
|
+
+++ b/src/profile-handler.cc
|
37
|
+
@@ -263,6 +263,11 @@ ProfileHandler::~ProfileHandler() {
|
38
|
+
void ProfileHandler::RegisterThread() {
|
39
|
+
SpinLockHolder cl(&control_lock_);
|
40
|
+
|
41
|
+
+#ifdef BUILD_FOR_RUBY
|
42
|
+
+ timer_sharing_ = TIMERS_SHARED;
|
43
|
+
+ if (callback_count_ > 0 && !IsTimerRunning())
|
44
|
+
+ StartTimer();
|
45
|
+
+#else
|
46
|
+
// We try to detect whether timers are being shared by setting a
|
47
|
+
// timer in the first call to this function, then checking whether
|
48
|
+
// it's set in the second call.
|
49
|
+
@@ -304,6 +309,7 @@ void ProfileHandler::RegisterThread() {
|
50
|
+
StartTimer();
|
51
|
+
break;
|
52
|
+
}
|
53
|
+
+#endif
|
54
|
+
}
|
55
|
+
|
56
|
+
ProfileHandlerToken* ProfileHandler::RegisterCallback(
|
57
|
+
diff --git a/src/profiledata.cc b/src/profiledata.cc
|
58
|
+
index 5f2531b..e6240d9 100644
|
59
|
+
--- a/src/profiledata.cc
|
60
|
+
+++ b/src/profiledata.cc
|
61
|
+
@@ -56,6 +56,19 @@ const int ProfileData::kAssociativity;
|
62
|
+
const int ProfileData::kBuckets;
|
63
|
+
const int ProfileData::kBufferLength;
|
64
|
+
|
65
|
+
+#ifdef BUILD_FOR_RUBY
|
66
|
+
+extern "C" {
|
67
|
+
+ typedef unsigned long ID;
|
68
|
+
+ typedef unsigned long VALUE;
|
69
|
+
+
|
70
|
+
+ void rb_gc();
|
71
|
+
+ const char *rb_id2name(ID);
|
72
|
+
+ const char *rb_class2name(VALUE);
|
73
|
+
+}
|
74
|
+
+
|
75
|
+
+#include <set>
|
76
|
+
+#endif
|
77
|
+
+
|
78
|
+
ProfileData::Options::Options()
|
79
|
+
: frequency_(1) {
|
80
|
+
}
|
81
|
+
@@ -63,17 +76,33 @@ ProfileData::Options::Options()
|
82
|
+
// This function is safe to call from asynchronous signals (but is not
|
83
|
+
// re-entrant). However, that's not part of its public interface.
|
84
|
+
void ProfileData::Evict(const Entry& entry) {
|
85
|
+
+#ifdef BUILD_FOR_RUBY
|
86
|
+
+ const int d = entry.depth == 1 ? 1 : entry.depth/3;
|
87
|
+
+#else
|
88
|
+
const int d = entry.depth;
|
89
|
+
+#endif
|
90
|
+
const int nslots = d + 2; // Number of slots needed in eviction buffer
|
91
|
+
+
|
92
|
+
if (num_evicted_ + nslots > kBufferLength) {
|
93
|
+
FlushEvicted();
|
94
|
+
assert(num_evicted_ == 0);
|
95
|
+
assert(nslots <= kBufferLength);
|
96
|
+
}
|
97
|
+
+
|
98
|
+
evict_[num_evicted_++] = entry.count;
|
99
|
+
evict_[num_evicted_++] = d;
|
100
|
+
+
|
101
|
+
+#ifdef BUILD_FOR_RUBY
|
102
|
+
+ if (entry.depth > 1) {
|
103
|
+
+ for (int n=0; n<entry.depth; n+=3)
|
104
|
+
+ evict_[num_evicted_++] = entry.stack[n] + entry.stack[n+1] + entry.stack[n+2];
|
105
|
+
+ } else if (entry.depth == 1) {
|
106
|
+
+ evict_[num_evicted_++] = entry.stack[0];
|
107
|
+
+ }
|
108
|
+
+#else
|
109
|
+
memcpy(&evict_[num_evicted_], entry.stack, d * sizeof(Slot));
|
110
|
+
num_evicted_ += d;
|
111
|
+
+#endif
|
112
|
+
}
|
113
|
+
|
114
|
+
ProfileData::ProfileData()
|
115
|
+
@@ -85,6 +114,7 @@ ProfileData::ProfileData()
|
116
|
+
evictions_(0),
|
117
|
+
total_bytes_(0),
|
118
|
+
fname_(0),
|
119
|
+
+ sym_fname_(0),
|
120
|
+
start_time_(0) {
|
121
|
+
}
|
122
|
+
|
123
|
+
@@ -101,6 +131,13 @@ bool ProfileData::Start(const char* fname,
|
124
|
+
return false;
|
125
|
+
}
|
126
|
+
|
127
|
+
+#ifdef BUILD_FOR_RUBY
|
128
|
+
+ int len = strlen(fname);
|
129
|
+
+ sym_fname_ = (char*)malloc((len+9) * sizeof(char));
|
130
|
+
+ strncpy(sym_fname_, fname, len);
|
131
|
+
+ strcpy(sym_fname_+len, ".symbols");
|
132
|
+
+#endif
|
133
|
+
+
|
134
|
+
start_time_ = time(NULL);
|
135
|
+
fname_ = strdup(fname);
|
136
|
+
|
137
|
+
@@ -166,16 +203,47 @@ void ProfileData::Stop() {
|
138
|
+
return;
|
139
|
+
}
|
140
|
+
|
141
|
+
+#ifdef BUILD_FOR_RUBY
|
142
|
+
+ FILE *symbols;
|
143
|
+
+ symbols = fopen(sym_fname_, "w");
|
144
|
+
+ fprintf(symbols, "%0*lx: garbage_collector\n", sizeof(unsigned long)*2, (ID)rb_gc);
|
145
|
+
+
|
146
|
+
+ std::set<ID> known_symbols;
|
147
|
+
+#endif
|
148
|
+
+
|
149
|
+
// Move data from hash table to eviction buffer
|
150
|
+
for (int b = 0; b < kBuckets; b++) {
|
151
|
+
Bucket* bucket = &hash_[b];
|
152
|
+
for (int a = 0; a < kAssociativity; a++) {
|
153
|
+
if (bucket->entry[a].count > 0) {
|
154
|
+
- Evict(bucket->entry[a]);
|
155
|
+
+ Entry e = bucket->entry[a];
|
156
|
+
+ Evict(e);
|
157
|
+
+#ifdef BUILD_FOR_RUBY
|
158
|
+
+ if (e.depth > 1)
|
159
|
+
+ for (int n=0; n<e.depth; n+=3) {
|
160
|
+
+ ID sym = e.stack[n] + e.stack[n+1] + e.stack[n+2];
|
161
|
+
+
|
162
|
+
+ if (known_symbols.find(sym) == known_symbols.end()) {
|
163
|
+
+ fprintf(symbols, "%0*lx: ", sizeof(unsigned long)*2, sym);
|
164
|
+
+
|
165
|
+
+ if (e.stack[n])
|
166
|
+
+ fprintf(symbols, "%s.", rb_class2name(e.stack[n]));
|
167
|
+
+ else
|
168
|
+
+ fprintf(symbols, "%s#", rb_class2name(e.stack[n+1]));
|
169
|
+
+
|
170
|
+
+ fprintf(symbols, "%s\n", rb_id2name(e.stack[n+2]));
|
171
|
+
+ known_symbols.insert(sym);
|
172
|
+
+ }
|
173
|
+
+ }
|
174
|
+
+#endif
|
175
|
+
}
|
176
|
+
}
|
177
|
+
}
|
178
|
+
|
179
|
+
+#ifdef BUILD_FOR_RUBY
|
180
|
+
+ fclose(symbols);
|
181
|
+
+#endif
|
182
|
+
+
|
183
|
+
if (num_evicted_ + 3 > kBufferLength) {
|
184
|
+
// Ensure there is enough room for end of data marker
|
185
|
+
FlushEvicted();
|
186
|
+
@@ -211,6 +279,10 @@ void ProfileData::Reset() {
|
187
|
+
num_evicted_ = 0;
|
188
|
+
free(fname_);
|
189
|
+
fname_ = 0;
|
190
|
+
+#ifdef BUILD_FOR_RUBY
|
191
|
+
+ free(sym_fname_);
|
192
|
+
+ sym_fname_ = 0;
|
193
|
+
+#endif
|
194
|
+
start_time_ = 0;
|
195
|
+
|
196
|
+
out_ = -1;
|
197
|
+
diff --git a/src/profiledata.h b/src/profiledata.h
|
198
|
+
index da7ea9e..67c463d 100644
|
199
|
+
--- a/src/profiledata.h
|
200
|
+
+++ b/src/profiledata.h
|
201
|
+
@@ -169,6 +169,7 @@ class ProfileData {
|
202
|
+
int evictions_; // How many evictions
|
203
|
+
size_t total_bytes_; // How much output
|
204
|
+
char* fname_; // Profile file name
|
205
|
+
+ char* sym_fname_; // Symbol file name
|
206
|
+
time_t start_time_; // Start time, or 0
|
207
|
+
|
208
|
+
// Move 'entry' to the eviction buffer.
|
209
|
+
diff --git a/src/profiler.cc b/src/profiler.cc
|
210
|
+
index c51c7b2..21c7669 100644
|
211
|
+
--- a/src/profiler.cc
|
212
|
+
+++ b/src/profiler.cc
|
213
|
+
@@ -63,6 +63,12 @@ typedef int ucontext_t; // just to quiet the compiler, mostly
|
214
|
+
#include "conflict-signal.h" /* used on msvc machines */
|
215
|
+
#endif
|
216
|
+
|
217
|
+
+#ifdef BUILD_FOR_RUBY
|
218
|
+
+extern "C" {
|
219
|
+
+ int rb_stack_trace(void**,int);
|
220
|
+
+}
|
221
|
+
+#endif
|
222
|
+
+
|
223
|
+
using std::string;
|
224
|
+
|
225
|
+
// Collects up all profile data. This is a singleton, which is
|
226
|
+
@@ -261,6 +267,9 @@ void CpuProfiler::prof_handler(int sig, siginfo_t*, void* signal_ucontext,
|
227
|
+
(*instance->filter_)(instance->filter_arg_)) {
|
228
|
+
void* stack[ProfileData::kMaxStackDepth];
|
229
|
+
|
230
|
+
+#ifdef BUILD_FOR_RUBY
|
231
|
+
+ int depth = rb_stack_trace(stack, arraysize(stack));
|
232
|
+
+#else
|
233
|
+
// The top-most active routine doesn't show up as a normal
|
234
|
+
// frame, but as the "pc" value in the signal handler context.
|
235
|
+
stack[0] = GetPC(*reinterpret_cast<ucontext_t*>(signal_ucontext));
|
236
|
+
@@ -274,8 +283,10 @@ void CpuProfiler::prof_handler(int sig, siginfo_t*, void* signal_ucontext,
|
237
|
+
int depth = GetStackTraceWithContext(stack + 1, arraysize(stack) - 1,
|
238
|
+
2, signal_ucontext);
|
239
|
+
depth++; // To account for pc value in stack[0];
|
240
|
+
+#endif
|
241
|
+
|
242
|
+
- instance->collector_.Add(depth, stack);
|
243
|
+
+ if (depth > 0)
|
244
|
+
+ instance->collector_.Add(depth, stack);
|
245
|
+
}
|
246
|
+
}
|
247
|
+
|
248
|
+
diff --git a/src/stacktrace.cc b/src/stacktrace.cc
|
249
|
+
index d158eea..e4132fe 100644
|
250
|
+
--- a/src/stacktrace.cc
|
251
|
+
+++ b/src/stacktrace.cc
|
252
|
+
@@ -52,6 +52,7 @@
|
253
|
+
// correctly when GetStackTrace() is called with max_depth == 0.
|
254
|
+
// Some code may do that.
|
255
|
+
|
256
|
+
+#ifndef BUILD_FOR_RUBY
|
257
|
+
#include "config.h"
|
258
|
+
#include <google/stacktrace.h>
|
259
|
+
#include "stacktrace_config.h"
|
260
|
+
@@ -69,3 +70,4 @@
|
261
|
+
#else
|
262
|
+
# error Cannot calculate stack trace: will need to write for your environment
|
263
|
+
#endif
|
264
|
+
+#endif
|
265
|
+
|
266
|
+
diff --git a/Makefile.in b/Makefile.in
|
267
|
+
index 1247b91..45179f3 100644
|
268
|
+
--- a/Makefile.in
|
269
|
+
+++ b/Makefile.in
|
270
|
+
@@ -989,13 +989,13 @@ AUTOMAKE = @AUTOMAKE@
|
271
|
+
AWK = @AWK@
|
272
|
+
CC = @CC@
|
273
|
+
CCDEPMODE = @CCDEPMODE@
|
274
|
+
-CFLAGS = @CFLAGS@
|
275
|
+
+CFLAGS = @CFLAGS@ -DBUILD_FOR_RUBY -fPIC
|
276
|
+
CPP = @CPP@
|
277
|
+
CPPFLAGS = @CPPFLAGS@
|
278
|
+
CXX = @CXX@
|
279
|
+
CXXCPP = @CXXCPP@
|
280
|
+
CXXDEPMODE = @CXXDEPMODE@
|
281
|
+
-CXXFLAGS = @CXXFLAGS@
|
282
|
+
+CXXFLAGS = @CXXFLAGS@ -DBUILD_FOR_RUBY -fPIC
|
283
|
+
CYGPATH_W = @CYGPATH_W@
|
284
|
+
DEFS = @DEFS@
|
285
|
+
DEPDIR = @DEPDIR@
|
286
|
+
diff --git a/src/stacktrace_with_context.cc b/src/stacktrace_with_context.cc
|
287
|
+
index f9fc28f..beb7562 100644
|
288
|
+
--- a/src/stacktrace_with_context.cc
|
289
|
+
+++ b/src/stacktrace_with_context.cc
|
290
|
+
@@ -41,6 +41,7 @@
|
291
|
+
// may inline this code anyway. Let's hope they respect
|
292
|
+
// ATTRIBUTE_NOINLINE.
|
293
|
+
|
294
|
+
+#ifndef BUILD_FOR_RUBY
|
295
|
+
#include "config.h"
|
296
|
+
#include <google/stacktrace.h>
|
297
|
+
#include "stacktrace_config.h"
|
298
|
+
@@ -59,3 +60,4 @@ int GetStackTraceWithContext(void** result, int max_depth,
|
299
|
+
return GetStackTrace(result, max_depth, skip_count + 1);
|
300
|
+
}
|
301
|
+
#endif
|
302
|
+
+#endif
|
@@ -0,0 +1,31 @@
|
|
1
|
+
spec = Gem::Specification.new do |s|
|
2
|
+
s.name = 'perftools.rb'
|
3
|
+
s.version = '0.1.6'
|
4
|
+
s.date = '2009-06-02'
|
5
|
+
s.rubyforge_project = 'perftools-rb'
|
6
|
+
s.summary = 'google-perftools for ruby code'
|
7
|
+
s.description = 'A sampling profiler for ruby code based on patches to google-perftools'
|
8
|
+
|
9
|
+
s.homepage = "http://github.com/tmm1/perftools.rb"
|
10
|
+
|
11
|
+
s.authors = ["Aman Gupta"]
|
12
|
+
s.email = "aman@tmm1.net"
|
13
|
+
|
14
|
+
s.has_rdoc = false
|
15
|
+
s.extensions = 'ext/extconf.rb'
|
16
|
+
s.bindir = 'bin'
|
17
|
+
s.executables << 'pprof.rb'
|
18
|
+
|
19
|
+
# ruby -rpp -e' pp `git ls-files | grep -v examples`.split("\n") '
|
20
|
+
s.files = [
|
21
|
+
"README",
|
22
|
+
"bin/pprof.rb",
|
23
|
+
"ext/extconf.rb",
|
24
|
+
"ext/perftools.c",
|
25
|
+
"patches/perftools-debug.patch",
|
26
|
+
"patches/perftools-osx.patch",
|
27
|
+
"patches/perftools-static.patch",
|
28
|
+
"patches/perftools.patch",
|
29
|
+
"perftools.rb.gemspec"
|
30
|
+
]
|
31
|
+
end
|
metadata
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: perftools.rb
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.6
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Aman Gupta
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-06-02 00:00:00 -04:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description: A sampling profiler for ruby code based on patches to google-perftools
|
17
|
+
email: aman@tmm1.net
|
18
|
+
executables:
|
19
|
+
- pprof.rb
|
20
|
+
extensions:
|
21
|
+
- ext/extconf.rb
|
22
|
+
extra_rdoc_files: []
|
23
|
+
|
24
|
+
files:
|
25
|
+
- README
|
26
|
+
- bin/pprof.rb
|
27
|
+
- ext/extconf.rb
|
28
|
+
- ext/perftools.c
|
29
|
+
- patches/perftools-debug.patch
|
30
|
+
- patches/perftools-osx.patch
|
31
|
+
- patches/perftools-static.patch
|
32
|
+
- patches/perftools.patch
|
33
|
+
- perftools.rb.gemspec
|
34
|
+
has_rdoc: true
|
35
|
+
homepage: http://github.com/tmm1/perftools.rb
|
36
|
+
licenses: []
|
37
|
+
|
38
|
+
post_install_message:
|
39
|
+
rdoc_options: []
|
40
|
+
|
41
|
+
require_paths:
|
42
|
+
- lib
|
43
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: "0"
|
48
|
+
version:
|
49
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - ">="
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: "0"
|
54
|
+
version:
|
55
|
+
requirements: []
|
56
|
+
|
57
|
+
rubyforge_project: perftools-rb
|
58
|
+
rubygems_version: 1.3.4
|
59
|
+
signing_key:
|
60
|
+
specification_version: 3
|
61
|
+
summary: google-perftools for ruby code
|
62
|
+
test_files: []
|
63
|
+
|