memprof 0.0.1 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ *.o
2
+ *.so
3
+ *.bundle
4
+ Makefile
data/README CHANGED
@@ -2,17 +2,50 @@ memprof (c) Joe Damato @joedamato http://timetobleed.com
2
2
 
3
3
  What is memprof?
4
4
  ================
5
+ Memprof is a memory profiler for Ruby that requires no patches to the Ruby VM.
6
+ It can help you find Ruby level memory leaks in your application.
5
7
 
6
- Memprof hopes to become a memory profiler for Ruby that will work without
7
- modifying your Ruby binary. You just require the gem and off you go.
8
+ How to use
9
+ ==========
8
10
 
11
+ require 'memprof'
12
+ Memprof.start
9
13
 
10
- When will it be done?
11
- =====================
14
+ # ruby code
12
15
 
13
- No idea, but soon I hope.
16
+ Memprof.stop
17
+ Memprof.dump
14
18
 
15
- Why release this then?
16
- ======================
19
+ That will monitor allocations and frees that happen between "start" and "stop"
20
+ and will output information to standard error.
17
21
 
18
- This gem illustrates an ugly hack that I thought might interest other people.
22
+ Memprof.dump also takes an (optional) file name to write the output to a file.
23
+
24
+ Supported systems
25
+ =================
26
+ Currently supporting:
27
+
28
+ Linux:
29
+ x86_64 builds of Ruby Enterprise Edition 1.8.6/1.8.7
30
+ x86_64 builds of MRI Ruby if built with --disable-shared
31
+
32
+ Experimental support:
33
+
34
+ Snow Leopard:
35
+ x86_64 builds of MRI (both enable-shared and disable-shared)
36
+
37
+ Coming soon:
38
+
39
+ Official support for Snow Leopard.
40
+
41
+ Linux:
42
+ Tracking object allocationns in C extensions.
43
+ x86_64 builds of MRI Ruby with --enable-shared
44
+
45
+ i386/i686 support for all the above.
46
+
47
+ CREDITS
48
+ =======
49
+ Jake Douglas for the Mach O/snow leopard support.
50
+
51
+ Aman Gupta for various bug fixes and other cleanup.
data/ext/bin_api.h ADDED
@@ -0,0 +1,83 @@
1
+ #if !defined(BIN_API__)
2
+ #define BIN_API__
3
+ #include <stddef.h>
4
+ #include <stdint.h>
5
+
6
+ /* generic file format stuff */
7
+ extern void *text_segment;
8
+ extern unsigned long text_segment_len;
9
+ extern size_t pagesize;
10
+
11
+ /*
12
+ * trampoline specific stuff
13
+ */
14
+ extern struct tramp_tbl_entry *tramp_table;
15
+ extern size_t tramp_size;
16
+
17
+ /*
18
+ * inline trampoline specific stuff
19
+ */
20
+ extern size_t inline_tramp_size;
21
+ extern struct inline_tramp_tbl_entry *inline_tramp_table;
22
+
23
+ /* trampoline types */
24
+ struct tramp_inline {
25
+ unsigned char jmp[1];
26
+ uint32_t displacement;
27
+ unsigned char pad[2];
28
+ } __attribute__((__packed__));
29
+
30
+ struct tramp_tbl_entry {
31
+ unsigned char rbx_save[1];
32
+ unsigned char mov[2];
33
+ void *addr;
34
+ unsigned char callq[2];
35
+ unsigned char rbx_restore[1];
36
+ unsigned char ret[1];
37
+ } __attribute__((__packed__));
38
+
39
+ struct inline_tramp_tbl_entry {
40
+ unsigned char rex[1];
41
+ unsigned char mov[1];
42
+ unsigned char src_reg[1];
43
+ uint32_t mov_displacement;
44
+
45
+ struct {
46
+ unsigned char push_rdi[1];
47
+ unsigned char mov_rdi[3];
48
+ uint32_t rdi_source_displacement;
49
+ unsigned char push_rbx[1];
50
+ unsigned char push_rbp[1];
51
+ unsigned char save_rsp[3];
52
+ unsigned char align_rsp[4];
53
+ unsigned char mov[2];
54
+ void *addr;
55
+ unsigned char callq[2];
56
+ unsigned char leave[1];
57
+ unsigned char rbx_restore[1];
58
+ unsigned char rdi_restore[1];
59
+ } __attribute__((__packed__)) frame;
60
+
61
+ unsigned char jmp[1];
62
+ uint32_t jmp_displacement;
63
+ } __attribute__((__packed__));
64
+
65
+ void
66
+ update_callqs(int entry, void *trampee_addr);
67
+
68
+ /*
69
+ * EXPORTED API.
70
+ */
71
+ void
72
+ bin_init();
73
+
74
+ void *
75
+ bin_find_symbol(char *sym, size_t *size);
76
+
77
+ void
78
+ bin_update_image(int entry, void *trampee_addr);
79
+
80
+ void *
81
+ bin_allocate_page();
82
+
83
+ #endif
data/ext/elf.c ADDED
@@ -0,0 +1,106 @@
1
+ #if defined(HAVE_ELF)
2
+
3
+ #include "bin_api.h"
4
+
5
+ #include <stdio.h>
6
+ #include <fcntl.h>
7
+ #include <gelf.h>
8
+ #include <link.h>
9
+ #include <sysexits.h>
10
+ #include <unistd.h>
11
+
12
+ #include <sys/mman.h>
13
+
14
+ static ElfW(Shdr) symtab_shdr;
15
+ static Elf *elf = NULL;
16
+ static Elf_Data *symtab_data = NULL;
17
+
18
+ void *
19
+ bin_allocate_page()
20
+ {
21
+ return mmap(NULL, pagesize, PROT_WRITE|PROT_READ|PROT_EXEC, MAP_ANON|MAP_PRIVATE|MAP_32BIT, -1, 0);
22
+ }
23
+
24
+ void
25
+ bin_update_image(int entry, void *trampee_addr)
26
+ {
27
+ update_callqs(entry, trampee_addr);
28
+ }
29
+
30
+ void *
31
+ bin_find_symbol(char *sym, size_t *size)
32
+ {
33
+ char *name = NULL;
34
+
35
+ /*now print the symbols*/
36
+ ElfW(Sym) *esym = (ElfW(Sym)*) symtab_data->d_buf;
37
+ ElfW(Sym) *lastsym = (ElfW(Sym)*) ((char*) symtab_data->d_buf + symtab_data->d_size);
38
+ /* now loop through the symbol table and print it*/
39
+ for (; esym < lastsym; esym++){
40
+ if ((esym->st_value == 0) ||
41
+ (ELF32_ST_BIND(esym->st_info)== STB_WEAK) ||
42
+ (ELF32_ST_BIND(esym->st_info)== STB_NUM))
43
+ continue;
44
+ name = elf_strptr(elf, symtab_shdr.sh_link, (size_t)esym->st_name);
45
+ if (strcmp(name, sym) == 0) {
46
+ if (size) {
47
+ *size = esym->st_size;
48
+ }
49
+ return (void *)esym->st_value;
50
+ }
51
+ }
52
+ return NULL;
53
+ }
54
+
55
+
56
+ void
57
+ bin_init()
58
+ {
59
+ int fd;
60
+ ElfW(Shdr) shdr;
61
+ size_t shstrndx;
62
+ char *filename;
63
+ Elf_Scn *scn;
64
+
65
+ if (elf_version(EV_CURRENT) == EV_NONE)
66
+ errx(EX_SOFTWARE, "ELF library initialization failed: %s",
67
+ elf_errmsg(-1));
68
+
69
+ asprintf(&filename, "/proc/%ld/exe", (long)getpid());
70
+
71
+ if ((fd = open(filename, O_RDONLY, 0)) < 0)
72
+ err(EX_NOINPUT, "open \%s\" failed", filename);
73
+
74
+ if ((elf = elf_begin(fd, ELF_C_READ, NULL)) == NULL)
75
+ errx(EX_SOFTWARE, "elf_begin() failed: %s.",
76
+ elf_errmsg(-1));
77
+
78
+ if (elf_kind(elf) != ELF_K_ELF)
79
+ errx(EX_DATAERR, "%s is not an ELF object.", filename);
80
+
81
+ if (elf_getshstrndx(elf, &shstrndx) == 0)
82
+ errx(EX_SOFTWARE, "getshstrndx() failed: %s.",
83
+ elf_errmsg(-1));
84
+
85
+ scn = NULL;
86
+
87
+ while ((scn = elf_nextscn(elf, scn)) != NULL) {
88
+ if (gelf_getshdr(scn, &shdr) != &shdr)
89
+ errx(EX_SOFTWARE, "getshdr() failed: %s.",
90
+ elf_errmsg(-1));
91
+
92
+ if (shdr.sh_type == SHT_PROGBITS &&
93
+ (shdr.sh_flags == (SHF_ALLOC | SHF_EXECINSTR)) &&
94
+ strcmp(elf_strptr(elf, shstrndx, shdr.sh_name), ".text") == 0) {
95
+
96
+ text_segment = (void *)shdr.sh_addr;
97
+ text_segment_len = shdr.sh_size;
98
+ } else if (shdr.sh_type == SHT_SYMTAB) {
99
+ symtab_shdr = shdr;
100
+ if ((symtab_data = elf_getdata(scn,symtab_data)) == 0 || symtab_data->d_size == 0) {
101
+ return;
102
+ }
103
+ }
104
+ }
105
+ }
106
+ #endif
data/ext/extconf.rb CHANGED
@@ -1,5 +1,10 @@
1
1
  require 'mkmf'
2
2
 
3
- if (have_library('elf', 'gelf_getshdr'))
4
- create_makefile ('memprof')
3
+ def add_define(name)
4
+ $defs.push("-D#{name}")
5
+ end
6
+
7
+ if (add_define("HAVE_ELF") if have_library('elf', 'gelf_getshdr')) ||
8
+ (add_define("HAVE_MACH") if have_header('mach-o/dyld.h'))
9
+ create_makefile('memprof')
5
10
  end
data/ext/mach.c ADDED
@@ -0,0 +1,108 @@
1
+ #if defined(HAVE_MACH)
2
+
3
+ #include "bin_api.h"
4
+
5
+ #include <limits.h>
6
+ #include <sysexits.h>
7
+ #include <sys/mman.h>
8
+
9
+ #include <mach-o/dyld.h>
10
+ #include <mach-o/getsect.h>
11
+ #include <mach-o/loader.h>
12
+ #include <mach-o/ldsyms.h>
13
+
14
+ static void
15
+ set_text_segment(struct mach_header *header, const char *sectname)
16
+ {
17
+ text_segment = getsectdatafromheader_64((struct mach_header_64*)header, "__TEXT", sectname, (uint64_t*)&text_segment_len);
18
+ if (!text_segment)
19
+ errx(EX_UNAVAILABLE, "Failed to locate the %s section", sectname);
20
+ }
21
+
22
+ static void
23
+ update_dyld_stubs(int entry, void *trampee_addr)
24
+ {
25
+ char *byte = text_segment;
26
+ size_t count = 0;
27
+
28
+ for(; count < text_segment_len; count++) {
29
+ if (*byte == '\xff') {
30
+ int off = *(int *)(byte+2);
31
+ if (trampee_addr == (void*)(*(long long*)(byte + 6 + off))) {
32
+ *(long long*)(byte + 6 + off) = tramp_table[entry].addr;
33
+ }
34
+ }
35
+ byte++;
36
+ }
37
+ }
38
+
39
+ void *
40
+ bin_allocate_page()
41
+ {
42
+ void *ret = NULL;
43
+ size_t i = 0;
44
+
45
+ for (i = pagesize; i < INT_MAX - pagesize; i += pagesize) {
46
+ ret = mmap((void*)(NULL + i), 2*pagesize, PROT_WRITE|PROT_READ|PROT_EXEC,
47
+ MAP_ANON|MAP_PRIVATE, -1, 0);
48
+
49
+ if (tramp_table != MAP_FAILED)
50
+ return ret;
51
+ }
52
+ return NULL;
53
+ }
54
+
55
+ void
56
+ bin_update_image(int entry, void *trampee_addr)
57
+ {
58
+ // Modify any callsites residing inside the text segment of the executable itself
59
+ set_text_segment((struct mach_header*)&_mh_execute_header, "__text");
60
+ update_callqs(entry, trampee_addr);
61
+
62
+ // Modify all dyld stubs in shared libraries that have been loaded
63
+ int i, j, k;
64
+ int header_count = _dyld_image_count();
65
+
66
+ // Go through all the mach objects that are loaded into this process
67
+ for (i=0; i < header_count; i++) {
68
+ const struct mach_header *current_hdr = _dyld_get_image_header(i);
69
+ int lc_count = current_hdr->ncmds;
70
+
71
+ // this as a char* because we need to step it forward by an arbitrary number of bytes
72
+ const char *lc = ((const char*) current_hdr) + sizeof(struct mach_header_64);
73
+
74
+ // Check all the load commands in the object to see if they are segment commands
75
+ for (j = 0; j < lc_count; j++) {
76
+ if (((struct load_command*)lc)->cmd == LC_SEGMENT_64) {
77
+ const struct segment_command_64 *seg = (const struct segment_command_64 *) lc;
78
+ const struct section_64 * sect = (const struct section_64*)(lc + sizeof(struct segment_command_64));
79
+ int section_count = (seg->cmdsize - sizeof(struct segment_command_64)) / sizeof(struct section_64);
80
+
81
+ // Search the segment for a section containing dyld stub functions
82
+ for (k=0; k < section_count; k++) {
83
+ if (strncmp(sect->sectname, "__symbol_stub", 13) == 0) {
84
+ set_text_segment((struct mach_header*)current_hdr, sect->sectname);
85
+ text_segment += _dyld_get_image_vmaddr_slide(i);
86
+ update_dyld_stubs(entry, trampee_addr);
87
+ }
88
+ sect++;
89
+ }
90
+ }
91
+ lc += ((struct load_command*)lc)->cmdsize;
92
+ }
93
+ }
94
+ }
95
+
96
+ void *
97
+ bin_find_symbol(char *sym, size_t *size) {
98
+ void *ptr = NULL;
99
+ _dyld_lookup_and_bind((const char*)sym, &ptr, NULL);
100
+ return ptr;
101
+ }
102
+
103
+ void
104
+ bin_init()
105
+ {
106
+ /* mach-o is so cool it needs no initialization */
107
+ }
108
+ #endif
data/ext/memprof.c CHANGED
@@ -1,78 +1,289 @@
1
1
  #define _GNU_SOURCE
2
2
  #include <err.h>
3
3
  #include <fcntl.h>
4
- #include <gelf.h>
4
+ #include <stddef.h>
5
5
  #include <stdio.h>
6
6
  #include <stdint.h>
7
7
  #include <stdlib.h>
8
8
  #include <unistd.h>
9
- #include <link.h>
10
9
  #include <sysexits.h>
11
10
  #include <sys/mman.h>
11
+ #include <err.h>
12
12
 
13
+ #include <st.h>
13
14
  #include <ruby.h>
14
15
  #include <intern.h>
16
+ #include <node.h>
15
17
 
16
- struct tramp_tbl_entry {
17
- unsigned char mov[2];
18
- long long addr;
19
- unsigned char callq[2];
20
- unsigned char ret;
21
- unsigned char pad[3];
22
- } __attribute__((__packed__));;
18
+ #include "bin_api.h"
23
19
 
20
+ size_t pagesize;
21
+ void *text_segment = NULL;
22
+ unsigned long text_segment_len = 0;
24
23
 
25
- static void *text_segment = NULL;
26
- static unsigned long text_segment_len = 0;
24
+ /*
25
+ trampoline specific stuff
26
+ */
27
+ struct tramp_tbl_entry *tramp_table = NULL;
28
+ size_t tramp_size = 0;
27
29
 
28
30
  /*
29
- trampoline specific stuff
30
- */
31
- static struct tramp_tbl_entry *tramp_table = NULL;
32
- static size_t tramp_size = 0;
31
+ inline trampoline specific stuff
32
+ */
33
+ size_t inline_tramp_size = 0;
34
+ struct inline_tramp_tbl_entry *inline_tramp_table = NULL;
33
35
 
34
36
  /*
35
- ELF specific stuff
36
- */
37
- static ElfW(Shdr) symtab_shdr;
38
- static Elf *elf = NULL;
39
- static Elf_Data *symtab_data = NULL;
37
+ * bleak_house stuff
38
+ */
39
+ static int track_objs = 0;
40
+ static st_table *objs = NULL;
40
41
 
42
+ struct obj_track {
43
+ VALUE obj;
44
+ char *source;
45
+ int line;
46
+ };
41
47
 
42
48
  static void
43
- error_tramp() {
49
+ error_tramp()
50
+ {
44
51
  printf("WARNING: NO TRAMPOLINE SET.\n");
45
52
  return;
46
53
  }
47
54
 
48
55
  static VALUE
49
- newobj_tramp() {
50
- printf("source = %s, line = %d\n", ruby_sourcefile, ruby_sourceline);
51
- return rb_newobj();
56
+ newobj_tramp()
57
+ {
58
+ VALUE ret = rb_newobj();
59
+ struct obj_track *tracker = NULL;
60
+
61
+ if (track_objs) {
62
+ tracker = malloc(sizeof(*tracker));
63
+
64
+ if (tracker) {
65
+ if (ruby_current_node && ruby_current_node->nd_file && *ruby_current_node->nd_file) {
66
+ tracker->source = strdup(ruby_current_node->nd_file);
67
+ tracker->line = nd_line(ruby_current_node);
68
+ } else if (ruby_sourcefile) {
69
+ tracker->source = strdup(ruby_sourcefile);
70
+ tracker->line = ruby_sourceline;
71
+ } else {
72
+ tracker->source = strdup("__null__");
73
+ tracker->line = 0;
74
+ }
75
+
76
+ tracker->obj = ret;
77
+ st_insert(objs, (st_data_t)ret, (st_data_t)tracker);
78
+ } else {
79
+ fprintf(stderr, "Warning, unable to allocate a tracker. You are running dangerously low on RAM!\n");
80
+ }
81
+ }
82
+
83
+ return ret;
84
+ }
85
+
86
+ static void
87
+ freelist_tramp(unsigned long rval)
88
+ {
89
+ struct obj_track *tracker = NULL;
90
+
91
+ if (track_objs) {
92
+ st_delete(objs, (st_data_t *) &rval, (st_data_t *) &tracker);
93
+ if (tracker) {
94
+ free(tracker->source);
95
+ free(tracker);
96
+ }
97
+ }
98
+ }
99
+
100
+ static int
101
+ memprof_tabulate(st_data_t key, st_data_t record, st_data_t arg)
102
+ {
103
+ st_table *table = (st_table *)arg;
104
+ struct obj_track *tracker = (struct obj_track *)record;
105
+ char *source_key = NULL;
106
+ unsigned long count = 0;
107
+ char *type = NULL;
108
+
109
+ switch (TYPE(tracker->obj)) {
110
+ case T_NONE:
111
+ type = "__none__"; break;
112
+ case T_BLKTAG:
113
+ type = "__blktag__"; break;
114
+ case T_UNDEF:
115
+ type = "__undef__"; break;
116
+ case T_VARMAP:
117
+ type = "__varmap__"; break;
118
+ case T_SCOPE:
119
+ type = "__scope__"; break;
120
+ case T_NODE:
121
+ type = "__node__"; break;
122
+ default:
123
+ if (RBASIC(tracker->obj)->klass) {
124
+ type = rb_obj_classname(tracker->obj);
125
+ } else {
126
+ type = "__unknown__";
127
+ }
128
+ }
129
+
130
+ asprintf(&source_key, "%s:%d:%s", tracker->source, tracker->line, type);
131
+ st_lookup(table, (st_data_t)source_key, (st_data_t *)&count);
132
+ if (st_insert(table, (st_data_t)source_key, ++count)) {
133
+ free(source_key);
134
+ }
135
+
136
+ free(tracker->source);
137
+ return ST_DELETE;
138
+ }
139
+
140
+ struct results {
141
+ char **entries;
142
+ unsigned long num_entries;
143
+ };
144
+
145
+ static int
146
+ memprof_do_dump(st_data_t key, st_data_t record, st_data_t arg)
147
+ {
148
+ struct results *res = (struct results *)arg;
149
+ unsigned long count = (unsigned long)record;
150
+ char *source = (char *)key;
151
+
152
+ asprintf(&(res->entries[res->num_entries++]), "%7d %s", count, source);
153
+
154
+ free(source);
155
+ return ST_DELETE;
156
+ }
157
+
158
+ static VALUE
159
+ memprof_start(VALUE self)
160
+ {
161
+ if (track_objs == 1)
162
+ return Qfalse;
163
+
164
+ track_objs = 1;
165
+ return Qtrue;
166
+ }
167
+
168
+ static VALUE
169
+ memprof_stop(VALUE self)
170
+ {
171
+ if (track_objs == 0)
172
+ return Qfalse;
173
+
174
+ track_objs = 0;
175
+ return Qtrue;
176
+ }
177
+
178
+ static int
179
+ memprof_strcmp(const void *obj1, const void *obj2)
180
+ {
181
+ char *str1 = *(char **)obj1;
182
+ char *str2 = *(char **)obj2;
183
+ return strcmp(str2, str1);
184
+ }
185
+
186
+ static VALUE
187
+ memprof_dump(int argc, VALUE *argv, VALUE self)
188
+ {
189
+ st_table *tmp_table;
190
+ struct results res;
191
+ int i;
192
+ VALUE str;
193
+ FILE *out = NULL;
194
+
195
+ rb_scan_args(argc, argv, "01", &str);
196
+
197
+ if (RTEST(str)) {
198
+ out = fopen(StringValueCStr(str), "w");
199
+ if (!out)
200
+ rb_raise(rb_eArgError, "unable to open output file");
201
+ }
202
+
203
+ track_objs = 0;
204
+
205
+ tmp_table = st_init_strtable();
206
+ st_foreach(objs, memprof_tabulate, (st_data_t)tmp_table);
207
+
208
+ res.num_entries = 0;
209
+ res.entries = malloc(sizeof(char*) * tmp_table->num_entries);
210
+
211
+ st_foreach(tmp_table, memprof_do_dump, (st_data_t)&res);
212
+ st_free_table(tmp_table);
213
+
214
+ qsort(res.entries, res.num_entries, sizeof(char*), &memprof_strcmp);
215
+ for (i=0; i < res.num_entries; i++) {
216
+ fprintf(out ? out : stderr, "%s\n", res.entries[i]);
217
+ free(res.entries[i]);
218
+ }
219
+ free(res.entries);
220
+
221
+ track_objs = 1;
222
+ return Qnil;
52
223
  }
53
224
 
54
225
  static void
55
- create_tramp_table() {
56
- int i = 0;
226
+ create_tramp_table()
227
+ {
228
+ int i, j = 0;
57
229
 
58
230
  struct tramp_tbl_entry ent = {
59
- .mov = {'\x48', '\xbb'},
60
- .addr = (long long)&error_tramp,
61
- .callq = { '\xff', '\xd3' },
62
- .ret = '\xc3',
63
- .pad = { '\x90', '\x90', '\x90'},
231
+ .rbx_save = {'\x53'}, // push rbx
232
+ .mov = {'\x48', '\xbb'}, // mov addr into rbx
233
+ .addr = error_tramp, // ^^^
234
+ .callq = {'\xff', '\xd3'}, // callq rbx
235
+ .rbx_restore = {'\x5b'}, // pop rbx
236
+ .ret = {'\xc3'}, // ret
64
237
  };
65
238
 
66
- tramp_table = mmap(NULL, 4096, PROT_WRITE|PROT_READ|PROT_EXEC, MAP_32BIT|MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
67
- if (tramp_table != MAP_FAILED) {
68
- for (; i < 4096/sizeof(struct tramp_tbl_entry); i ++ ) {
69
- memcpy(tramp_table + i, &ent, sizeof(struct tramp_tbl_entry));
70
- }
239
+ struct inline_tramp_tbl_entry inline_ent = {
240
+ .rex = {'\x48'},
241
+ .mov = {'\x89'},
242
+ .src_reg = {'\x05'},
243
+ .mov_displacement = 0,
244
+
245
+ .frame = {
246
+ .push_rdi = {'\x57'},
247
+ .mov_rdi = {'\x48', '\x8b', '\x3d'},
248
+ .rdi_source_displacement = 0,
249
+ .push_rbx = {'\x53'},
250
+ .push_rbp = {'\x55'},
251
+ .save_rsp = {'\x48', '\x89', '\xe5'},
252
+ .align_rsp = {'\x48', '\x83', '\xe4', '\xf0'},
253
+ .mov = {'\x48', '\xbb'},
254
+ .addr = error_tramp,
255
+ .callq = {'\xff', '\xd3'},
256
+ .leave = {'\xc9'},
257
+ .rbx_restore = {'\x5b'},
258
+ .rdi_restore = {'\x5f'},
259
+ },
260
+
261
+ .jmp = {'\xe9'},
262
+ .jmp_displacement = 0,
263
+ };
264
+
265
+ if ((tramp_table = bin_allocate_page()) == MAP_FAILED) {
266
+ fprintf(stderr, "Failed to allocate memory for stage 1 trampoline.\n");
267
+ return;
268
+ }
269
+
270
+ if ((inline_tramp_table = bin_allocate_page()) == MAP_FAILED) {
271
+ fprintf(stderr, "Faied to allocate memory for the stage 1 inline trampoline.\n");
272
+ return;
273
+ }
274
+
275
+ for (j = 0; j < pagesize/sizeof(struct tramp_tbl_entry); j ++ ) {
276
+ memcpy(tramp_table + j, &ent, sizeof(struct tramp_tbl_entry));
277
+ }
278
+
279
+ for (j = 0; j < pagesize/sizeof(struct inline_tramp_tbl_entry); j++) {
280
+ memcpy(inline_tramp_table + j, &inline_ent, sizeof(struct inline_tramp_tbl_entry));
71
281
  }
72
282
  }
73
283
 
74
- static void
75
- update_image(int entry, void *trampee_addr) {
284
+ void
285
+ update_callqs(int entry, void *trampee_addr)
286
+ {
76
287
  char *byte = text_segment;
77
288
  size_t count = 0;
78
289
  int fn_addr = 0;
@@ -92,93 +303,174 @@ update_image(int entry, void *trampee_addr) {
92
303
  }
93
304
  }
94
305
 
95
- static void *
96
- find_symbol(char *sym) {
97
- char *name = NULL;
98
-
99
- /*now print the symbols*/
100
- ElfW(Sym) *esym = (ElfW(Sym)*) symtab_data->d_buf;
101
- ElfW(Sym) *lastsym = (ElfW(Sym)*) ((char*) symtab_data->d_buf + symtab_data->d_size);
102
- /* now loop through the symbol table and print it*/
103
- for (; esym < lastsym; esym++){
104
- if ((esym->st_value == 0) ||
105
- (ELF32_ST_BIND(esym->st_info)== STB_WEAK) ||
106
- (ELF32_ST_BIND(esym->st_info)== STB_NUM) ||
107
- (ELF32_ST_TYPE(esym->st_info)!= STT_FUNC))
108
- continue;
109
- name = elf_strptr(elf, symtab_shdr.sh_link, (size_t)esym->st_name);
110
- if (strcmp(name, sym) == 0) {
111
- return (void *)esym->st_value;
306
+
307
+ static void
308
+ hook_freelist(int entry)
309
+ {
310
+ long sizes[] = { 0, 0, 0 };
311
+ void *sym1 = bin_find_symbol("gc_sweep", &sizes[0]);
312
+
313
+ if (sym1 == NULL) {
314
+ /* this is MRI ... */
315
+ sym1 = bin_find_symbol("garbage_collect", &sizes[0]);
316
+ }
317
+
318
+ void *sym2 = bin_find_symbol("finalize_list", &sizes[1]);
319
+ void *sym3 = bin_find_symbol("rb_gc_force_recycle", &sizes[2]);
320
+ void *freelist_callers[] = { sym1, sym2, sym3 };
321
+ int max = 3;
322
+ size_t i = 0;
323
+ char *byte = freelist_callers[0];
324
+ void *freelist = bin_find_symbol("freelist", NULL);
325
+ uint32_t mov_target = 0;
326
+ void *aligned_addr = NULL;
327
+ size_t count = 0;
328
+
329
+ /* This is the stage 1 trampoline for hooking the inlined add_freelist
330
+ * function .
331
+ *
332
+ * NOTE: The original instruction mov %reg, freelist is 7 bytes wide,
333
+ * whereas jmpq $displacement is only 5 bytes wide. We *must* pad out
334
+ * the next two bytes. This will be important to remember below.
335
+ */
336
+ struct tramp_inline tramp = {
337
+ .jmp = {'\xe9'},
338
+ .displacement = 0,
339
+ .pad = {'\x90', '\x90'},
340
+ };
341
+
342
+ struct inline_tramp_tbl_entry *inl_tramp_st2 = NULL;
343
+
344
+ for (;i < max;) {
345
+ /* make sure it is a mov instruction */
346
+ if (byte[1] == '\x89') {
347
+
348
+ /* Read the REX byte to make sure it is a mov that we care about */
349
+ if ((byte[0] == '\x48') ||
350
+ (byte[0] == '\x4c')) {
351
+
352
+ /* Grab the target of the mov. REMEMBER: in this case the target is
353
+ * a 32bit displacment that gets added to RIP (where RIP is the adress of
354
+ * the next instruction).
355
+ */
356
+ mov_target = *(uint32_t *)(byte + 3);
357
+
358
+ /* Sanity check. Ensure that the displacement from freelist to the next
359
+ * instruction matches the mov_target. If so, we know this mov is
360
+ * updating freelist.
361
+ */
362
+ if ((freelist - (void *)(byte+7)) == mov_target) {
363
+ /* Before the stage 1 trampoline gets written, we need to generate
364
+ * the code for the stage 2 trampoline. Let's copy over the REX byte
365
+ * and the byte which mentions the source register into the stage 2
366
+ * trampoline.
367
+ */
368
+ inl_tramp_st2 = inline_tramp_table + entry;
369
+ inl_tramp_st2->rex[0] = byte[0];
370
+ inl_tramp_st2->src_reg[0] = byte[2];
371
+
372
+ /* Setup the stage 1 trampoline. Calculate the displacement to
373
+ * the stage 2 trampoline from the next instruction.
374
+ *
375
+ * REMEMBER!!!! The next instruction will be NOP after our stage 1
376
+ * trampoline is written. This is 5 bytes into the structure, even
377
+ * though the original instruction we overwrote was 7 bytes.
378
+ */
379
+ tramp.displacement = (uint32_t)((void *)(inl_tramp_st2) - (void *)(byte+5));
380
+
381
+ /* Figure out what page the stage 1 tramp is gonna be written to, mark
382
+ * it WRITE, write the trampoline in, and then remove WRITE permission.
383
+ */
384
+ aligned_addr = (void*)(((long)byte)&~(0xffff));
385
+ mprotect(aligned_addr, (((void *)byte) - aligned_addr) + 10, PROT_READ|PROT_WRITE|PROT_EXEC);
386
+ memcpy(byte, &tramp, sizeof(struct tramp_inline));
387
+ mprotect(aligned_addr, (((void *)byte) - aligned_addr) + 10, PROT_READ|PROT_EXEC);
388
+
389
+ /* Finish setting up the stage 2 trampoline. */
390
+
391
+ /* calculate the displacement to freelist from the next instruction.
392
+ *
393
+ * This is used to replicate the original instruction we overwrote.
394
+ */
395
+ inl_tramp_st2->mov_displacement = freelist - (void *)&(inl_tramp_st2->frame);
396
+
397
+ /* fill in the displacement to freelist from the next instruction.
398
+ *
399
+ * This is to arrange for the new value in freelist to be in %rdi, and as such
400
+ * be the first argument to the C handler. As per the amd64 ABI.
401
+ */
402
+ inl_tramp_st2->frame.rdi_source_displacement = freelist - (void *)&(inl_tramp_st2->frame.push_rbx);
403
+
404
+ /* jmp back to the instruction after stage 1 trampoline was inserted
405
+ *
406
+ * This can be 5 or 7, it doesn't matter. If its 5, we'll hit our 2
407
+ * NOPS. If its 7, we'll land directly on the next instruction.
408
+ */
409
+ inl_tramp_st2->jmp_displacement = (uint32_t)((void *)(byte + 7) -
410
+ (void *)(inline_tramp_table + entry + 1));
411
+
412
+ /* write the address of our C level trampoline in to the structure */
413
+ inl_tramp_st2->frame.addr = freelist_tramp;
414
+
415
+ /* track the new entry and new trampoline size */
416
+ entry++;
417
+ inline_tramp_size++;
418
+ }
419
+ }
112
420
  }
421
+
422
+ if (count >= sizes[i]) {
423
+ count = 0;
424
+ i ++;
425
+ byte = freelist_callers[i];
426
+ }
427
+ count++;
428
+ byte++;
113
429
  }
114
- return NULL;
115
430
  }
116
431
 
117
432
  static void
118
- insert_tramp(char *trampee, void *tramp) {
119
- void *trampee_addr = find_symbol(trampee);
433
+ insert_tramp(char *trampee, void *tramp)
434
+ {
435
+ void *trampee_addr = bin_find_symbol(trampee, NULL);
120
436
  int entry = tramp_size;
121
- tramp_table[tramp_size].addr = (long long)tramp;
122
- tramp_size++;
123
- update_image(entry, trampee_addr);
124
- }
437
+ int inline_ent = inline_tramp_size;
125
438
 
126
- void Init_memprof()
127
- {
128
- int fd;
129
- ElfW(Shdr) shdr;
130
- size_t shstrndx;
131
- char *filename;
132
- Elf_Scn *scn;
133
-
134
- if (elf_version(EV_CURRENT) == EV_NONE)
135
- errx(EX_SOFTWARE, "ELF library initialization failed: %s",
136
- elf_errmsg(-1));
137
-
138
- asprintf(&filename, "/proc/%ld/exe", (long)getpid());
139
-
140
- if ((fd = open(filename, O_RDONLY, 0)) < 0)
141
- err(EX_NOINPUT, "open \%s\" failed", filename);
142
-
143
- if ((elf = elf_begin(fd, ELF_C_READ, NULL)) == NULL)
144
- errx(EX_SOFTWARE, "elf_begin() failed: %s.",
145
- elf_errmsg(-1));
146
-
147
- if (elf_kind(elf) != ELF_K_ELF)
148
- errx(EX_DATAERR, "%s is not an ELF object.", filename);
149
-
150
- if (elf_getshstrndx(elf, &shstrndx) == 0)
151
- errx(EX_SOFTWARE, "getshstrndx() failed: %s.",
152
- elf_errmsg(-1));
153
-
154
- scn = NULL;
155
-
156
- while ((scn = elf_nextscn(elf, scn)) != NULL) {
157
- if (gelf_getshdr(scn, &shdr) != &shdr)
158
- errx(EX_SOFTWARE, "getshdr() failed: %s.",
159
- elf_errmsg(-1));
160
-
161
- if (shdr.sh_type == SHT_PROGBITS &&
162
- (shdr.sh_flags == (SHF_ALLOC | SHF_EXECINSTR)) &&
163
- strcmp(elf_strptr(elf, shstrndx, shdr.sh_name), ".text") == 0) {
164
-
165
- text_segment = (void *)shdr.sh_addr;
166
- text_segment_len = shdr.sh_size;
167
- } else if (shdr.sh_type == SHT_SYMTAB) {
168
- symtab_shdr = shdr;
169
- if ((symtab_data = elf_getdata(scn,symtab_data)) == 0 || symtab_data->d_size == 0) {
170
- return;
171
- }
439
+ if (trampee_addr == NULL) {
440
+ if (strcmp("add_freelist", trampee) == 0) {
441
+ /* XXX super hack */
442
+ inline_tramp_table[inline_tramp_size].frame.addr = tramp;
443
+ inline_tramp_size++;
444
+ hook_freelist(inline_ent);
445
+ } else {
446
+ return;
172
447
  }
448
+ } else {
449
+ tramp_table[tramp_size].addr = tramp;
450
+ tramp_size++;
451
+ bin_update_image(entry, trampee_addr);
173
452
  }
453
+ }
174
454
 
455
+ void
456
+ Init_memprof()
457
+ {
458
+ VALUE memprof = rb_define_module("Memprof");
459
+ rb_define_singleton_method(memprof, "start", memprof_start, 0);
460
+ rb_define_singleton_method(memprof, "stop", memprof_stop, 0);
461
+ rb_define_singleton_method(memprof, "dump", memprof_dump, -1);
175
462
 
463
+ pagesize = getpagesize();
464
+ objs = st_init_numtable();
465
+ bin_init();
176
466
  create_tramp_table();
177
467
 
468
+ #if defined(HAVE_MACH)
469
+ insert_tramp("_rb_newobj", newobj_tramp);
470
+ #elif defined(HAVE_ELF)
178
471
  insert_tramp("rb_newobj", newobj_tramp);
179
- #if 0
180
- (void) elf_end(e);
181
- (void) close(fd);
472
+ insert_tramp("add_freelist", freelist_tramp);
182
473
  #endif
474
+
183
475
  return;
184
476
  }
data/memprof.gemspec CHANGED
@@ -1,17 +1,22 @@
1
1
  spec = Gem::Specification.new do |s|
2
2
  s.name = 'memprof'
3
- s.version = '0.0.1'
4
- s.date = '2009-11-21'
5
- s.summary = 'Ruby memory profiler gem'
3
+ s.version = '0.1.0'
4
+ s.date = '2009-12-10'
5
+ s.summary = 'Ruby Memory Profiler'
6
+ s.description = "Ruby memory profiler similar to bleak_house, but without patches to the Ruby VM"
6
7
  s.email = "ice799@gmail.com"
7
8
  s.homepage = "http://github.com/ice799/memprof"
8
- s.description = "Ruby memory profiler gem"
9
9
  s.has_rdoc = false
10
10
  s.authors = ["Joe Damato"]
11
11
  s.extensions = "ext/extconf.rb"
12
- s.require_paths << "ext"
13
- s.files = ["README",
14
- "memprof.gemspec",
15
- "ext/memprof.c",
16
- "ext/extconf.rb"]
12
+ s.files = %w[
13
+ .gitignore
14
+ README
15
+ ext/bin_api.h
16
+ ext/elf.c
17
+ ext/extconf.rb
18
+ ext/mach.c
19
+ ext/memprof.c
20
+ memprof.gemspec
21
+ ]
17
22
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: memprof
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Joe Damato
@@ -9,11 +9,11 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-11-21 00:00:00 -08:00
12
+ date: 2009-12-10 00:00:00 -08:00
13
13
  default_executable:
14
14
  dependencies: []
15
15
 
16
- description: Ruby memory profiler gem
16
+ description: Ruby memory profiler similar to bleak_house, but without patches to the Ruby VM
17
17
  email: ice799@gmail.com
18
18
  executables: []
19
19
 
@@ -22,10 +22,14 @@ extensions:
22
22
  extra_rdoc_files: []
23
23
 
24
24
  files:
25
+ - .gitignore
25
26
  - README
26
- - memprof.gemspec
27
- - ext/memprof.c
27
+ - ext/bin_api.h
28
+ - ext/elf.c
28
29
  - ext/extconf.rb
30
+ - ext/mach.c
31
+ - ext/memprof.c
32
+ - memprof.gemspec
29
33
  has_rdoc: true
30
34
  homepage: http://github.com/ice799/memprof
31
35
  licenses: []
@@ -35,7 +39,6 @@ rdoc_options: []
35
39
 
36
40
  require_paths:
37
41
  - lib
38
- - ext
39
42
  required_ruby_version: !ruby/object:Gem::Requirement
40
43
  requirements:
41
44
  - - ">="
@@ -54,6 +57,6 @@ rubyforge_project:
54
57
  rubygems_version: 1.3.5
55
58
  signing_key:
56
59
  specification_version: 3
57
- summary: Ruby memory profiler gem
60
+ summary: Ruby Memory Profiler
58
61
  test_files: []
59
62