memprof 0.3.2 → 0.3.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -33,6 +33,22 @@ class MemprofUploader
33
33
  opts.on("--put-my-data-on-the-internet", "Confirm that you understand\n" +
34
34
  "memprof.com will show all your \n".rjust(80) +
35
35
  "internal data on the internet (required)".rjust(80)) {|arg| @confirmed = true}
36
+ opts.on("--info") do
37
+ require 'rbconfig'
38
+ puts RUBY_DESCRIPTION if defined? RUBY_DESCRIPTION
39
+ puts "CFLAGS='#{Config::CONFIG["CFLAGS"]}' ./configure #{Config::CONFIG["configure_args"]}"
40
+ bin = "#{Config::CONFIG['bindir']}/#{Config::CONFIG['ruby_install_name']}"
41
+ puts `file #{bin}`
42
+
43
+ if RUBY_PLATFORM =~ /darwin/
44
+ puts `otool -L #{bin}`
45
+ else
46
+ puts `ldd #{bin}`
47
+ end
48
+
49
+ puts
50
+ exit!
51
+ end
36
52
  end
37
53
 
38
54
  begin
data/ext/elf.c CHANGED
@@ -562,32 +562,7 @@ bin_update_image(const char *trampee, struct tramp_st2_entry *tramp, void **orig
562
562
  /* first check if the symbol is in the PLT */
563
563
  trampee_addr = find_plt_addr(trampee, NULL);
564
564
 
565
- /* it isn't in the PLT, try to find it in the binary itself */
566
- if (!trampee_addr) {
567
- dbg_printf("Couldn't find %s in the PLT...\n", trampee);
568
- unsigned char *byte = ruby_info->text_segment;
569
- trampee_addr = bin_find_symbol(trampee, NULL, 0);
570
- size_t count = 0;
571
- int num = 0;
572
-
573
- assert(byte != NULL);
574
-
575
- if (!trampee_addr) {
576
- dbg_printf("WARNING: Couldn't find: %s anywhere, so not tramping!\n", trampee);
577
- return 0;
578
- }
579
-
580
- if (orig_func) {
581
- *orig_func = trampee_addr;
582
- }
583
-
584
- for(; count < ruby_info->text_segment_len; byte++, count++) {
585
- if (arch_insert_st1_tramp(byte, trampee_addr, tramp) == 0) {
586
- num++;
587
- }
588
- }
589
- dbg_printf("Inserted %d tramps for: %s\n", num, trampee);
590
- } else {
565
+ if (trampee_addr) {
591
566
  void *ret = NULL;
592
567
  dbg_printf("Found %s in the PLT, inserting tramp...\n", trampee);
593
568
  ret = overwrite_got(trampee_addr, tramp->addr);
@@ -598,6 +573,29 @@ bin_update_image(const char *trampee, struct tramp_st2_entry *tramp, void **orig
598
573
  *orig_func = ret;
599
574
  dbg_printf("setting orig function: %p\n", *orig_func);
600
575
  }
576
+ } else {
577
+ trampee_addr = bin_find_symbol(trampee, NULL, 0);
578
+ dbg_printf("Couldn't find %s in the PLT...\n", trampee);
579
+
580
+ if (trampee_addr) {
581
+ unsigned char *byte = ruby_info->text_segment;
582
+ size_t count = 0;
583
+ int num = 0;
584
+
585
+ assert(byte != NULL);
586
+
587
+ if (orig_func) {
588
+ *orig_func = trampee_addr;
589
+ }
590
+
591
+ for(; count < ruby_info->text_segment_len; byte++, count++) {
592
+ if (arch_insert_st1_tramp(byte, trampee_addr, tramp) == 0) {
593
+ num++;
594
+ }
595
+ }
596
+
597
+ dbg_printf("Inserted %d tramps for: %s\n", num, trampee);
598
+ }
601
599
  }
602
600
 
603
601
  dbg_printf("Trying to hook %s in other libraries...\n", trampee);
@@ -976,8 +974,8 @@ find_debug_syms(struct elf_info *elf)
976
974
  dbg_printf(".gnu_debuglink base file name: %s, crc: %lx\n", basename, crc);
977
975
 
978
976
  dir = dirname(tmp);
979
- debug_file = malloc(strlen(DEBUGDIR) + strlen(dir) +
980
- strlen("/") + strlen(basename) + 1);
977
+ debug_file = calloc(1, strlen(DEBUGDIR) + strlen(dir) +
978
+ strlen("/") + strlen(basename) + 1);
981
979
 
982
980
  strncat(debug_file, DEBUGDIR, strlen(DEBUGDIR));
983
981
  strncat(debug_file, dir, strlen(dir));
@@ -1197,7 +1195,11 @@ dissect_elf(struct elf_info *info, int find_debug)
1197
1195
 
1198
1196
  out:
1199
1197
  if (find_debug && ret == 1) {
1200
- find_debug_syms(info);
1198
+ if (info->debuglink_addr) {
1199
+ find_debug_syms(info);
1200
+ } else {
1201
+ dbg_printf("=== WARNING: Object %s was STRIPPED and had no debuglink section. Nothing left to try.", info->filename);
1202
+ }
1201
1203
  }
1202
1204
  return ret;
1203
1205
  }
@@ -33,17 +33,23 @@ unless File.exists?("#{CWD}/dst/lib/libyajl_ext.a")
33
33
 
34
34
  sys("tar zxvf #{yajl}")
35
35
  Dir.chdir("#{dir}/src") do
36
- FileUtils.mkdir_p "api/yajl"
36
+ sys("sed -i -e 's,yajl,json,g' *.h *.c api/*.h")
37
+ Dir['{,api/}yajl*.{h,c}'].each do |file|
38
+ FileUtils.mv file, file.gsub('yajl', 'json')
39
+ end
40
+
41
+ FileUtils.mkdir_p "api/json"
37
42
  %w[ common parse gen ].each do |f|
38
- FileUtils.cp "api/yajl_#{f}.h", 'api/yajl/'
43
+ FileUtils.cp "api/json_#{f}.h", 'api/json/'
39
44
  end
40
45
 
41
46
  File.open("extconf.rb",'w') do |f|
42
47
  f.puts "require 'mkmf'; $INCFLAGS[0,0] = '-I./api/ '; create_makefile 'libyajl'"
43
48
  end
44
- sys("#{Config::CONFIG['bindir']}/#{Config::CONFIG['ruby_install_name']} extconf.rb")
45
49
 
50
+ sys("#{Config::CONFIG['bindir']}/#{Config::CONFIG['ruby_install_name']} extconf.rb")
46
51
  sys("make")
52
+
47
53
  if RUBY_PLATFORM =~ /darwin/
48
54
  sys("libtool -static -o libyajl_ext.a #{Dir['*.o'].join(' ')}")
49
55
  else
@@ -53,7 +59,7 @@ unless File.exists?("#{CWD}/dst/lib/libyajl_ext.a")
53
59
  FileUtils.mkdir_p "#{CWD}/dst/lib"
54
60
  FileUtils.cp 'libyajl_ext.a', "#{CWD}/dst/lib"
55
61
  FileUtils.mkdir_p "#{CWD}/dst/include"
56
- FileUtils.cp_r 'api/yajl', "#{CWD}/dst/include/"
62
+ FileUtils.cp_r 'api/json', "#{CWD}/dst/include/"
57
63
  end
58
64
  end
59
65
  end
@@ -61,7 +67,7 @@ end
61
67
  $LIBPATH.unshift "#{CWD}/dst/lib"
62
68
  $INCFLAGS[0,0] = "-I#{CWD}/dst/include "
63
69
 
64
- unless have_library('yajl_ext') and have_header('yajl/yajl_gen.h')
70
+ unless have_library('yajl_ext') and have_header('json/json_gen.h')
65
71
  raise 'Yajl build failed'
66
72
  end
67
73
 
data/ext/i386.h CHANGED
@@ -16,8 +16,8 @@ static struct tramp_st2_entry {
16
16
  unsigned char ret;
17
17
  } __attribute__((__packed__)) default_st2_tramp = {
18
18
  .ebx_save = 0x53, /* push ebx */
19
- .mov = 0xbb, /* mov addr into ebx */
20
- .addr = 0, /* this is filled in later */
19
+ .mov = 0xbb, /* mov addr into ebx */
20
+ .addr = 0, /* this is filled in later */
21
21
  .call = {0xff, 0xd3}, /* calll *ebx */
22
22
  .ebx_restore = 0x5b, /* pop ebx */
23
23
  .ret = 0xc3, /* ret */
@@ -0,0 +1,53 @@
1
+ #ifndef _GNU_SOURCE
2
+ #define _GNU_SOURCE
3
+ #endif
4
+
5
+ #include <assert.h>
6
+ #include <stdio.h>
7
+ #include <stdlib.h>
8
+ #include <string.h>
9
+
10
+ #include "json.h"
11
+
12
+ void
13
+ json_gen_reset(json_gen gen)
14
+ {
15
+ json_gen_clear(gen);
16
+ assert (gen->state[gen->depth] == json_gen_complete);
17
+ gen->state[gen->depth] = json_gen_start;
18
+ gen->print(gen->ctx, "\n", 1);
19
+ }
20
+
21
+ json_gen_status
22
+ json_gen_cstr(json_gen gen, const char * str)
23
+ {
24
+ if (!str || str[0] == 0)
25
+ return json_gen_null(gen);
26
+ else
27
+ return json_gen_string(gen, (unsigned char *)str, strlen(str));
28
+ }
29
+
30
+ json_gen_status
31
+ json_gen_format(json_gen gen, char *format, ...)
32
+ {
33
+ va_list args;
34
+ char *str;
35
+ int bytes_printed = 0;
36
+
37
+ json_gen_status ret;
38
+
39
+ va_start(args, format);
40
+ bytes_printed = vasprintf(&str, format, args);
41
+ assert(bytes_printed != -1);
42
+ va_end(args);
43
+
44
+ ret = json_gen_string(gen, (unsigned char *)str, strlen(str));
45
+ free(str);
46
+ return ret;
47
+ }
48
+
49
+ json_gen_status
50
+ json_gen_pointer(json_gen gen, void* ptr)
51
+ {
52
+ return json_gen_format(gen, "0x%x", ptr);
53
+ }
@@ -0,0 +1,48 @@
1
+ #if !defined(__JSON__H_)
2
+ #define __JSON__H_
3
+
4
+ #include <stdarg.h>
5
+ #include <json/json_gen.h>
6
+
7
+ /* HAX: copied from internal json_gen.c (PATCH json before building instead)
8
+ */
9
+
10
+ typedef enum {
11
+ json_gen_start,
12
+ json_gen_map_start,
13
+ json_gen_map_key,
14
+ json_gen_map_val,
15
+ json_gen_array_start,
16
+ json_gen_in_array,
17
+ json_gen_complete,
18
+ json_gen_error
19
+ } json_gen_state;
20
+
21
+ struct json_gen_t
22
+ {
23
+ unsigned int depth;
24
+ unsigned int pretty;
25
+ const char * indentString;
26
+ json_gen_state state[YAJL_MAX_DEPTH];
27
+ json_print_t print;
28
+ void * ctx; /* json_buf */
29
+ /* memory allocation routines */
30
+ json_alloc_funcs alloc;
31
+ };
32
+
33
+ /* END HAX
34
+ */
35
+
36
+ void
37
+ json_gen_reset(json_gen gen);
38
+
39
+ json_gen_status
40
+ json_gen_cstr(json_gen gen, const char * str);
41
+
42
+ json_gen_status
43
+ json_gen_format(json_gen gen, char *format, ...);
44
+
45
+ json_gen_status
46
+ json_gen_pointer(json_gen gen, void* ptr);
47
+
48
+ #endif
data/ext/mach.c CHANGED
@@ -1,12 +1,15 @@
1
1
  #if defined(HAVE_MACH)
2
2
 
3
- #include "bin_api.h"
4
3
  #include "arch.h"
4
+ #include "bin_api.h"
5
+ #include "mmap.h"
5
6
  #include "util.h"
6
7
 
7
8
  #include <assert.h>
8
9
  #include <dlfcn.h>
9
10
  #include <err.h>
11
+ #include <errno.h>
12
+ #include <inttypes.h>
10
13
  #include <limits.h>
11
14
  #include <stdio.h>
12
15
  #include <stdlib.h>
@@ -20,16 +23,34 @@
20
23
  #include <mach-o/loader.h>
21
24
  #include <mach-o/ldsyms.h>
22
25
  #include <mach-o/nlist.h>
26
+ #include <mach-o/dyld_images.h>
27
+ #include <mach-o/fat.h>
23
28
 
24
29
  struct mach_config {
25
- const struct nlist_64 **symbol_table;
30
+ const struct mach_header *hdr;
31
+ const struct nlist_64 *symbol_table;
32
+ const struct nlist_64 **sorted_symbol_table;
33
+ const struct section_64 *symstub_sect;
26
34
  const char *string_table;
27
35
  uint32_t symbol_count;
28
36
  uint32_t string_table_size;
29
37
  intptr_t image_offset;
38
+ const struct mach_header* load_addr;
39
+ uint32_t nindirectsyms;
40
+ uint32_t indirectsymoff;
41
+ struct mmap_info file;
42
+ const char *filename;
43
+ unsigned int index;
44
+ };
45
+
46
+ struct symbol_data {
47
+ const char *name;
48
+ void *address;
49
+ uint32_t size;
50
+ uint32_t index;
30
51
  };
31
52
 
32
- static struct mach_config mach_config;
53
+ static struct mach_config ruby_img_cfg;
33
54
  extern struct memprof_config memprof_config;
34
55
 
35
56
  /*
@@ -46,7 +67,7 @@ extern struct memprof_config memprof_config;
46
67
 
47
68
  struct dyld_stub_entry {
48
69
  unsigned char jmp[2];
49
- uint32_t offset;
70
+ int32_t offset;
50
71
  } __attribute((__packed__));
51
72
 
52
73
  /* Return the jmp target of a stub entry */
@@ -64,7 +85,60 @@ get_dyld_stub_target(struct dyld_stub_entry *entry) {
64
85
 
65
86
  static inline void
66
87
  set_dyld_stub_target(struct dyld_stub_entry *entry, void *addr) {
67
- *((void**)((void*)(entry + 1) + entry->offset)) = addr;
88
+ void *target = (void *)(entry+1) + entry->offset;
89
+ copy_instructions(target, &addr, sizeof(void *));
90
+ }
91
+
92
+ static inline const char*
93
+ get_symtab_string(struct mach_config *img_cfg, uint32_t stroff);
94
+
95
+ static void
96
+ extract_symbol_data(struct mach_config *img_cfg, struct symbol_data *sym_data);
97
+
98
+ static int
99
+ find_dyld_image_index(const struct mach_header_64 *hdr);
100
+
101
+ static void *
102
+ find_stub_addr(const char *symname, struct mach_config *img_cfg)
103
+ {
104
+ uint64_t i = 0, nsyms = 0;
105
+ uint32_t symindex = 0;
106
+ assert(img_cfg && symname);
107
+ const struct section_64 *sect = img_cfg->symstub_sect;
108
+
109
+ if (!sect)
110
+ return NULL;
111
+
112
+ nsyms = sect->size / sect->reserved2;
113
+
114
+ for (; i < nsyms; i ++) {
115
+ uint32_t currsym = sect->reserved1 + i;
116
+ uint64_t stubaddr = sect->offset + (i * sect->reserved2);
117
+ uint32_t symoff = 0;
118
+
119
+ assert(currsym <= img_cfg->nindirectsyms);
120
+
121
+ /* indirect sym entries are just 32bit indexes into the symbol table to the
122
+ * symbol the stub is referring to.
123
+ */
124
+ symoff = img_cfg->indirectsymoff + (i * 4);
125
+ memcpy(&symindex, (char*)img_cfg->hdr + symoff, 4);
126
+ symindex = symindex & ((uint32_t) ~(INDIRECT_SYMBOL_LOCAL | INDIRECT_SYMBOL_ABS));
127
+
128
+ const struct nlist_64 *ent = img_cfg->symbol_table + symindex;
129
+ const char *string = get_symtab_string(img_cfg, ent->n_un.n_strx);
130
+
131
+ if (strcmp(symname, string+1) == 0) {
132
+ if (stubaddr) {
133
+ dbg_printf("address of stub in %s for %s is %" PRIx64 " + %p = ", img_cfg->filename, string, stubaddr, img_cfg->load_addr);
134
+ stubaddr = (uint64_t)img_cfg->load_addr + stubaddr;
135
+ dbg_printf("%" PRIx64 "\n", stubaddr);
136
+ return (void *)stubaddr;
137
+ }
138
+ }
139
+ }
140
+ dbg_printf("couldn't find address of stub: %s in %s\n", symname, img_cfg->filename);
141
+ return NULL;
68
142
  }
69
143
 
70
144
  /*
@@ -91,31 +165,83 @@ update_dyld_stub_table(void *table, uint64_t len, void *trampee_addr, struct tra
91
165
  }
92
166
 
93
167
  /*
94
- * This function tells us if the passed stub table address
95
- * is something that we should try to update (by looking at it's filename)
96
- * Only try to update dyld stub entries in files that match "libruby.dylib" or "*.bundle" (other C gems)
168
+ * Get all DSOs
97
169
  */
170
+ static const struct dyld_all_image_infos *
171
+ dyld_get_all_images() {
172
+ static const struct dyld_all_image_infos* (*_dyld_get_all_image_infos)() = NULL;
173
+ static const struct dyld_all_image_infos *images = NULL;
174
+
175
+ if (!_dyld_get_all_image_infos) {
176
+ _dyld_lookup_and_bind("__dyld_get_all_image_infos", (void**)&_dyld_get_all_image_infos, NULL);
177
+ assert(_dyld_get_all_image_infos != NULL);
178
+ }
98
179
 
99
- static inline int
100
- should_update_stub_table(void *addr) {
101
- Dl_info info;
180
+ if (!images) {
181
+ images = _dyld_get_all_image_infos();
182
+ assert(images != NULL);
183
+ }
102
184
 
103
- if (dladdr(addr, &info)) {
104
- size_t len = strlen(info.dli_fname);
185
+ return images;
186
+ }
105
187
 
106
- if (len >= 6) {
107
- const char *possible_bundle = (info.dli_fname + len - 6);
108
- if (strcmp(possible_bundle, "bundle") == 0)
109
- return 1;
110
- }
188
+ /*
189
+ * Get info for particular DSO
190
+ */
191
+ static const struct dyld_image_info *
192
+ dyld_get_image_info_for_index(int index) {
193
+ const struct dyld_all_image_infos *images = dyld_get_all_images();
194
+
195
+ // Stupid indexes into the infoArray don't match indexes used elsewhere, so we have to loop
196
+ unsigned int i;
197
+ const struct mach_header *hdr = _dyld_get_image_header(index);
198
+
199
+ for(i=0; i < _dyld_image_count(); i++) {
200
+ const struct dyld_image_info image = images->infoArray[i];
201
+ if (hdr == image.imageLoadAddress)
202
+ return &(images->infoArray[i]);
203
+ }
111
204
 
112
- if (len >= 13) {
113
- const char *possible_libruby = (info.dli_fname + len - 13);
114
- if (strcmp(possible_libruby, "libruby.dylib") == 0)
115
- return 1;
116
- }
205
+ return NULL;
206
+ }
207
+
208
+ /*
209
+ * This function tells us if the passed header index is something
210
+ * that we should try to update (by looking at it's filename)
211
+ * Only try to update the running executable, or files that match
212
+ * "libruby.dylib" or "*.bundle" (other C gems)
213
+ */
214
+
215
+ static const struct mach_header *
216
+ should_update_image(int index) {
217
+ const struct mach_header *hdr = _dyld_get_image_header(index);
218
+
219
+ /* Don't update if it's the memprof bundle */
220
+ if ((void*)hdr == &_mh_bundle_header)
221
+ return NULL;
222
+
223
+ /* If it's the ruby executable, do it! */
224
+ if ((void*)hdr == &_mh_execute_header)
225
+ return hdr;
226
+
227
+ /* otherwise, check to see if its a bundle or libruby */
228
+ const struct dyld_image_info *image = dyld_get_image_info_for_index(index);
229
+
230
+ size_t len = strlen(image->imageFilePath);
231
+
232
+ if (len >= 6) {
233
+ const char *possible_bundle = (image->imageFilePath + len - 6);
234
+ if (strcmp(possible_bundle, "bundle") == 0)
235
+ return hdr;
117
236
  }
118
- return 0;
237
+
238
+ if (len >= 13) {
239
+ const char *possible_libruby = (image->imageFilePath + len - 13);
240
+ if (strcmp(possible_libruby, "libruby.dylib") == 0)
241
+ return hdr;
242
+ }
243
+
244
+ return NULL;
119
245
  }
120
246
 
121
247
  /*
@@ -139,15 +265,12 @@ update_mach_section(const struct mach_header *header, const struct section_64 *s
139
265
  void *section = getsectdatafromheader_64((const struct mach_header_64*)header, "__TEXT", sect->sectname, &len) + slide;
140
266
 
141
267
  if (strncmp(sect->sectname, "__symbol_stub", 13) == 0) {
142
- if (should_update_stub_table(section)) {
143
- if (update_dyld_stub_table(section, sect->size, trampee_addr, tramp) == 0) {
144
- ret = 0;
145
- }
268
+ if (update_dyld_stub_table(section, sect->size, trampee_addr, tramp) == 0) {
269
+ ret = 0;
146
270
  }
147
271
  return ret;
148
272
  }
149
273
 
150
- /* TODO: check the filename just like we do above for stub sections. No reason to look at unrelated files. */
151
274
  if (strcmp(sect->sectname, "__text") == 0) {
152
275
  size_t count = 0;
153
276
  for(; count < len; section++, count++) {
@@ -223,52 +346,6 @@ find_dyld_image_index(const struct mach_header_64 *hdr) {
223
346
  return -1;
224
347
  }
225
348
 
226
- /*
227
- * This function returns a buffer containing the file that is presumed
228
- * to be either the Ruby executable or libruby. (Wherever rb_newobj is found.)
229
- *
230
- * The passed pointer index is set to the dyld image index for the associated
231
- * in-process mach image.
232
- *
233
- * The reason that we read in the file is because the symbol table is not loaded
234
- * into memory with everything else at load time (at least not anywhere I can find).
235
- *
236
- * !!! The pointer returned by this function must be freed !!!
237
- */
238
-
239
- static void *
240
- get_ruby_file_and_header_index(int *index) {
241
- void *ptr = NULL;
242
- void *buf = NULL;
243
- Dl_info info;
244
- struct stat filestat;
245
-
246
- // We can use this is a reasonably sure method of finding the file
247
- // that the Ruby junk resides in.
248
- ptr = dlsym(RTLD_DEFAULT, "rb_newobj");
249
-
250
- if (!ptr)
251
- errx(EX_SOFTWARE, "Could not find rb_newobj in this process. WTF???");
252
-
253
- if (!dladdr(ptr, &info) || !info.dli_fname)
254
- errx(EX_SOFTWARE, "Could not find the Mach object associated with rb_newobj.");
255
-
256
- FILE *file = fopen(info.dli_fname, "r");
257
- if (!file)
258
- errx(EX_OSFILE, "Failed to open Ruby file %s", info.dli_fname);
259
-
260
- stat(info.dli_fname, &filestat);
261
- buf = malloc(filestat.st_size);
262
-
263
- if (fread(buf, filestat.st_size, 1, file) != 1)
264
- errx(EX_OSFILE, "Failed to fread() Ruby file %s", info.dli_fname);
265
-
266
- fclose(file);
267
-
268
- *index = find_dyld_image_index((const struct mach_header_64*) info.dli_fbase);
269
- return buf;
270
- }
271
-
272
349
  /*
273
350
  * This function compares two nlist_64 structures by their n_value field (address, usually).
274
351
  * It is used by qsort in extract_symbol_table.
@@ -298,15 +375,19 @@ nlist_cmp(const void *obj1, const void *obj2) {
298
375
  */
299
376
 
300
377
  static void
301
- extract_symbol_table(const struct mach_header_64 *hdr, const struct nlist_64 ***symbol_table, const char **string_table, uint32_t *symbol_count, uint32_t *strsize) {
378
+ extract_symbol_table(const struct mach_header_64 *hdr, struct mach_config *img_cfg) {
302
379
  const struct nlist_64 **new_symtbl;
303
380
  char *new_strtbl;
304
381
  uint32_t i, j;
305
382
 
306
- const char *lc = (const char*) hdr + sizeof(struct mach_header_64);
383
+ assert(hdr);
384
+ assert(img_cfg);
385
+
386
+ const struct load_command *lc = (const struct load_command *)(hdr + 1);
307
387
 
308
- for (i = 0; i < hdr->ncmds; i++) {
309
- if (((const struct load_command*)lc)->cmd == LC_SYMTAB) {
388
+ for (i = 0; i < hdr->ncmds; i++, (lc = (const struct load_command *)((char *)lc + lc->cmdsize))) {
389
+ if (lc->cmd == LC_SYMTAB) {
390
+ // dbg_printf("found an LC_SYMTAB load command.\n");
310
391
  const struct symtab_command *sc = (const struct symtab_command*) lc;
311
392
  const struct nlist_64 *file_symtbl = (const struct nlist_64*)((const char*)hdr + sc->symoff);
312
393
 
@@ -320,16 +401,49 @@ extract_symbol_table(const struct mach_header_64 *hdr, const struct nlist_64 ***
320
401
 
321
402
  qsort(new_symtbl, sc->nsyms, sizeof(struct nlist_64*), &nlist_cmp);
322
403
 
323
- *symbol_table = new_symtbl;
324
- *string_table = new_strtbl;
325
- *symbol_count = sc->nsyms;
326
- *strsize = sc->strsize;
327
- return;
328
- }
404
+ img_cfg->symbol_table = file_symtbl;
405
+ img_cfg->sorted_symbol_table = new_symtbl;
406
+ img_cfg->symbol_count = sc->nsyms;
407
+
408
+ img_cfg->string_table = new_strtbl;
409
+ img_cfg->string_table_size = sc->strsize;
410
+
411
+ } else if (lc->cmd == LC_DYSYMTAB) {
412
+ // dbg_printf("found an LC_DYSYMTAB load command.\n");
413
+ const struct dysymtab_command *dynsym = (const struct dysymtab_command *) lc;
414
+ img_cfg->nindirectsyms = dynsym->nindirectsyms;
415
+ img_cfg->indirectsymoff = dynsym->indirectsymoff;
416
+
417
+ } else if (lc->cmd == LC_SEGMENT_64) {
418
+ // dbg_printf("found an LC_SEGMENT_64 load command.\n");
419
+ const struct segment_command_64 *seg = (const struct segment_command_64 *) lc;
420
+ uint32_t i = 0;
421
+ const struct section_64 *asect = (const struct section_64 *)(seg + 1);
422
+ for(; i < seg->nsects; i++, asect++) {
423
+ /*
424
+ * setting up data to find the indirect symbol tables.
425
+ */
426
+
427
+ /* if this section hsa no symbol stubs, then we don't care about it */
428
+ if ((asect->flags & SECTION_TYPE) != S_SYMBOL_STUBS)
429
+ continue;
329
430
 
330
- lc += ((const struct load_command*)lc)->cmdsize;
431
+ if (asect->reserved2 == 0) {
432
+ dbg_printf("!!! Found an LC_SEGMET_64 which was marked as having stubs,"
433
+ " but does not have reserved2 set!! %16s.%16s (skipping)\n", asect->segname, asect->sectname);
434
+ continue;
435
+ }
436
+
437
+ // dbg_printf("Found a section with symbol stubs: %16s.%16s.\n", asect->segname, asect->sectname);
438
+ img_cfg->symstub_sect = asect;
439
+ }
440
+
441
+ } else {
442
+ // dbg_printf("found another load command that is not being tracked: %" PRId32 "\n", lc->cmd);
443
+ }
331
444
  }
332
- errx(EX_SOFTWARE, "Unable to find LC_SYMTAB");
445
+
446
+ assert(img_cfg->symbol_table && img_cfg->string_table);
333
447
  }
334
448
 
335
449
  /*
@@ -337,60 +451,174 @@ extract_symbol_table(const struct mach_header_64 *hdr, const struct nlist_64 ***
337
451
  */
338
452
 
339
453
  static inline const char*
340
- get_symtab_string(uint32_t stroff) {
341
- assert(mach_config.string_table != NULL);
342
- assert(stroff < mach_config.string_table_size);
343
- return mach_config.string_table + stroff;
454
+ get_symtab_string(struct mach_config *img_cfg, uint32_t stroff) {
455
+ assert(img_cfg);
456
+ assert(img_cfg->string_table != NULL);
457
+ assert(stroff < img_cfg->string_table_size);
458
+ return img_cfg->string_table + stroff;
344
459
  }
345
460
 
346
461
  /*
347
- * Return the address and size of a symbol given it's name
462
+ * Lookup the address, size, and symbol table index of a symbol given a symbol_data
463
+ * If sym_data is passed with the name set, this function will attempt to fill
464
+ * in the address, etc. If it is passed with the address set, it will attempt
465
+ * to fill in the name.
348
466
  */
349
467
 
468
+ static void
469
+ extract_symbol_data(struct mach_config *img_cfg, struct symbol_data *sym_data)
470
+ {
471
+ uint32_t i, j;
472
+
473
+ assert(img_cfg->symbol_table != NULL);
474
+ assert(img_cfg->symbol_count > 0);
475
+
476
+ for (i=0; i < img_cfg->symbol_count; i++) {
477
+ // const struct nlist_64 *nlist_entry = img_cfg->sorted_symbol_table[i];
478
+ const struct nlist_64 *nlist_entry = img_cfg->symbol_table + i;
479
+ const char *string = NULL;
480
+
481
+ string = get_symtab_string(img_cfg, nlist_entry->n_un.n_strx);
482
+
483
+ /* Add the slide to get the *real* address in the process. */
484
+ const uint64_t addr = nlist_entry->n_value;
485
+ void *ptr = (void*)(addr + img_cfg->image_offset);
486
+
487
+ /*
488
+ * If the user passes a name, match against the name
489
+ * If the user passes an address, match against that.
490
+ */
491
+ if ((sym_data->name && string && strcmp(sym_data->name, string+1) == 0) || (sym_data->address && ptr == sym_data->address)) {
492
+ if (!sym_data->address)
493
+ sym_data->address = ptr;
494
+ if (!sym_data->name)
495
+ sym_data->name = string+1;
496
+
497
+ sym_data->index = i;
498
+
499
+ const struct nlist_64 *next_entry = NULL;
500
+
501
+ /*
502
+ * There can be multiple entries in the symbol table with the same n_value (address).
503
+ * This means that the 'next' one isn't always good enough. We have to make sure it's
504
+ * really a different symbol.
505
+ */
506
+ j = 1;
507
+ while (next_entry == NULL) {
508
+ const struct nlist_64 *tmp_entry = img_cfg->sorted_symbol_table[i + j];
509
+ if (nlist_entry->n_value != tmp_entry->n_value)
510
+ next_entry = tmp_entry;
511
+ j++;
512
+ }
513
+
514
+ /*
515
+ * Subtract our address from the address of the next symbol to get it's rough size.
516
+ * My observation is that the start of the next symbol will be padded to 16 byte alignment from the end of this one.
517
+ * This should be fine, since the point of getting the size is just to minimize scan area for tramp insertions.
518
+ */
519
+ sym_data->size = (next_entry->n_value - addr);
520
+ break;
521
+ }
522
+ }
523
+ }
524
+
525
+ static void
526
+ free_mach_config(struct mach_config *cfg) {
527
+ if (cfg == &ruby_img_cfg)
528
+ return;
529
+
530
+ munmap_file(&cfg->file);
531
+ free(cfg);
532
+ }
533
+
534
+ static struct mach_config *
535
+ mach_config_for_index(unsigned int index) {
536
+ if (index >= _dyld_image_count())
537
+ return NULL;
538
+
539
+ if (index == ruby_img_cfg.index)
540
+ return &ruby_img_cfg;
541
+
542
+ const struct dyld_image_info *image = dyld_get_image_info_for_index(index);
543
+ struct mach_config *cfg = calloc(1, sizeof(struct mach_config));
544
+
545
+ cfg->index = index;
546
+ cfg->file.name = cfg->filename = image->imageFilePath;
547
+ if (mmap_file_open(&cfg->file) < 0)
548
+ errx(EX_OSFILE, "Failed to fread() file %s", cfg->filename);
549
+ cfg->image_offset = _dyld_get_image_vmaddr_slide(index);
550
+ cfg->load_addr = image->imageLoadAddress;
551
+
552
+ struct mach_header_64 *hdr = (struct mach_header_64*) cfg->file.data;
553
+ assert(hdr);
554
+
555
+ if (hdr->magic == FAT_CIGAM) {
556
+ unsigned int j;
557
+ struct fat_header *fat = (struct fat_header *)hdr;
558
+
559
+ for(j=0; j < OSSwapInt32(fat->nfat_arch); j++) {
560
+ struct fat_arch *arch = (struct fat_arch *)((char*)fat + sizeof(struct fat_header) + sizeof(struct fat_arch) * j);
561
+
562
+ if (OSSwapInt32(arch->cputype) == CPU_TYPE_X86_64) {
563
+ hdr = (struct mach_header_64 *)(cfg->file.data + OSSwapInt32(arch->offset));
564
+ break;
565
+ }
566
+ }
567
+ }
568
+
569
+ if (hdr->magic != MH_MAGIC_64) {
570
+ printf("Magic for Ruby Mach-O file doesn't match\n");
571
+ munmap_file(&cfg->file);
572
+ free(cfg);
573
+ return NULL;
574
+ }
575
+
576
+ extract_symbol_table(hdr, cfg);
577
+ cfg->hdr = (const struct mach_header *)hdr;
578
+
579
+ return cfg;
580
+ }
581
+
350
582
  void *
351
583
  bin_find_symbol(const char *symbol, size_t *size, int search_libs) {
352
- void *ptr = NULL;
353
- uint32_t i, j;
584
+ struct symbol_data sym_data;
354
585
 
355
- assert(mach_config.symbol_table != NULL);
356
- assert(mach_config.symbol_count > 0);
586
+ memset(&sym_data, 0, sizeof(struct symbol_data));
587
+ sym_data.name = symbol;
357
588
 
358
- for (i=0; i < mach_config.symbol_count; i++) {
359
- const struct nlist_64 *nlist_entry = mach_config.symbol_table[i];
360
- const char *string = get_symtab_string(nlist_entry->n_un.n_strx);
589
+ extract_symbol_data(&ruby_img_cfg, &sym_data);
361
590
 
362
- if (string && strcmp(symbol, string+1) == 0) {
363
- const uint64_t addr = nlist_entry->n_value;
364
- /* Add the slide to get the *real* address in the process. */
365
- ptr = (void*)(addr + mach_config.image_offset);
591
+ if (!sym_data.address && search_libs) {
592
+ int i, header_count = _dyld_image_count();
366
593
 
367
- if (size) {
368
- const struct nlist_64 *next_entry = NULL;
594
+ for (i=0; i < header_count; i++) {
595
+ const struct dyld_image_info *image = dyld_get_image_info_for_index(i);
369
596
 
370
- /*
371
- * There can be multiple entries in the symbol table with the same n_value (address).
372
- * This means that the 'next' one isn't always good enough. We have to make sure it's
373
- * really a different symbol.
374
- */
375
- j = 1;
376
- while (next_entry == NULL) {
377
- const struct nlist_64 *tmp_entry = mach_config.symbol_table[i + j];
378
- if (nlist_entry->n_value != tmp_entry->n_value)
379
- next_entry = tmp_entry;
380
- j++;
381
- }
597
+ if ((void*)image->imageLoadAddress == &_mh_bundle_header ||
598
+ (void*)image->imageLoadAddress == &_mh_execute_header)
599
+ continue;
382
600
 
383
- /*
384
- * Subtract our address from the address of the next symbol to get it's rough size.
385
- * My observation is that the start of the next symbol will be padded to 16 byte alignment from the end of this one.
386
- * This should be fine, since the point of getting the size is just to minimize scan area for tramp insertions.
387
- */
388
- *size = (next_entry->n_value - addr);
601
+ struct mach_config *cfg = mach_config_for_index(i);
602
+ if (cfg) {
603
+ extract_symbol_data(cfg, &sym_data);
604
+
605
+ if (sym_data.address == image->imageLoadAddress) { // wtf? this happens for mysql_api.bundle with mysql_real_query
606
+ sym_data.address = 0;
607
+ } else if (sym_data.address) {
608
+ if (cfg->image_offset == 0) // another wtf? happens on libSystem.dylib where we need to add load address, but libmysqlclient.dylib etc are fine
609
+ sym_data.address = (char*)image->imageLoadAddress + (size_t)sym_data.address;
610
+ dbg_printf("found symbol %s in %s: %p\n", sym_data.name, image->imageFilePath, sym_data.address);
611
+ free_mach_config(cfg);
612
+ break;
613
+ }
614
+ free_mach_config(cfg);
389
615
  }
390
- break;
391
616
  }
392
617
  }
393
- return ptr;
618
+
619
+ if (size)
620
+ *size = sym_data.size;
621
+ return sym_data.address;
394
622
  }
395
623
 
396
624
  /*
@@ -398,27 +626,14 @@ bin_find_symbol(const char *symbol, size_t *size, int search_libs) {
398
626
  */
399
627
  const char *
400
628
  bin_find_symbol_name(void *symbol) {
401
- const char *name = NULL;
402
- uint32_t i;
403
-
404
- assert(mach_config.symbol_table != NULL);
405
- assert(mach_config.symbol_count > 0);
629
+ struct symbol_data sym_data;
406
630
 
407
- for (i=0; i < mach_config.symbol_count; i++) {
408
- const struct nlist_64 *nlist_entry = mach_config.symbol_table[i];
409
- const char *string = get_symtab_string(nlist_entry->n_un.n_strx);
631
+ memset(&sym_data, 0, sizeof(struct symbol_data));
632
+ sym_data.address = symbol;
410
633
 
411
- const uint64_t addr = nlist_entry->n_value;
412
- /* Add the slide to get the *real* address in the process. */
413
- void *ptr = (void*)(addr + mach_config.image_offset);
634
+ extract_symbol_data(&ruby_img_cfg, &sym_data);
414
635
 
415
- if (ptr == symbol) {
416
- name = string+1;
417
- break;
418
- }
419
- }
420
-
421
- return name;
636
+ return sym_data.name;
422
637
  }
423
638
 
424
639
  /*
@@ -447,27 +662,65 @@ bin_update_image(const char *trampee, struct tramp_st2_entry *tramp, void **orig
447
662
 
448
663
  // Go through all the mach objects that are loaded into this process
449
664
  for (i=0; i < header_count; i++) {
450
- const struct mach_header *current_hdr = _dyld_get_image_header(i);
451
- if ((void*)current_hdr == &_mh_bundle_header)
452
- continue;
665
+ const struct mach_header *current_hdr = NULL;
666
+
667
+ if ((void*)_dyld_get_image_header(i) == &_mh_bundle_header)
668
+ continue; // always ignore memprof.bundle
669
+
670
+ struct mach_config *cfg = mach_config_for_index(i);
671
+ if (cfg->filename && strstr(cfg->filename, "libSystem") != NULL) {
672
+ free_mach_config(cfg);
673
+ continue; // ignore libSystem
674
+ }
675
+
676
+ void *stub = find_stub_addr(trampee, cfg);
677
+
678
+ if (stub) {
679
+ struct dyld_stub_entry *entry = (struct dyld_stub_entry *)stub;
680
+ if (orig_function)
681
+ *orig_function = get_dyld_stub_target(entry);
682
+ set_dyld_stub_target(entry, tramp->addr);
453
683
 
454
- if (update_bin_for_mach_header(current_hdr, _dyld_get_image_vmaddr_slide(i), trampee_addr, tramp) == 0)
455
684
  ret = 0;
685
+
686
+ } else if (trampee_addr) {
687
+ if ((current_hdr = should_update_image(i)) == NULL)
688
+ continue;
689
+
690
+ if (update_bin_for_mach_header(current_hdr, _dyld_get_image_vmaddr_slide(i), trampee_addr, tramp) == 0) {
691
+ ret = 0;
692
+ if (orig_function)
693
+ *orig_function = trampee_addr;
694
+ }
695
+ }
696
+
697
+ free_mach_config(cfg);
456
698
  }
699
+
457
700
  return ret;
458
701
  }
459
702
 
460
703
  void *
461
- bin_allocate_page()
704
+ do_bin_allocate_page(struct mach_config *cfg)
462
705
  {
463
706
  void *ret = NULL;
707
+ void *addr = (void *)cfg->load_addr;
464
708
  size_t i = 0;
465
709
 
466
- for (i = memprof_config.pagesize; i < INT_MAX - memprof_config.pagesize; i += memprof_config.pagesize) {
467
- ret = mmap((void*)(NULL + i), memprof_config.pagesize, PROT_WRITE|PROT_READ|PROT_EXEC,
710
+ dbg_printf("ruby loaded at: %p\n", addr);
711
+
712
+ /*
713
+ * XXX no clue how large the text segment is, so guess.
714
+ * TODO remove this.
715
+ */
716
+ addr += 65535;
717
+
718
+ for (; i < INT_MAX - memprof_config.pagesize; i += memprof_config.pagesize, addr += memprof_config.pagesize) {
719
+ ret = mmap(addr, memprof_config.pagesize, PROT_WRITE|PROT_READ|PROT_EXEC,
468
720
  MAP_ANON|MAP_PRIVATE, -1, 0);
469
721
 
470
722
  if (ret != MAP_FAILED) {
723
+ dbg_printf("found a page at: %p\n", ret);
471
724
  memset(ret, 0x90, memprof_config.pagesize);
472
725
  return ret;
473
726
  }
@@ -475,38 +728,68 @@ bin_allocate_page()
475
728
  return NULL;
476
729
  }
477
730
 
731
+ void *
732
+ bin_allocate_page()
733
+ {
734
+ return do_bin_allocate_page(&ruby_img_cfg);
735
+ }
736
+
478
737
  size_t
479
738
  bin_type_size(const char *type)
480
739
  {
740
+ (void) type;
481
741
  return 0;
482
742
  }
483
743
 
484
744
  int
485
745
  bin_type_member_offset(const char *type, const char *member)
486
746
  {
747
+ (void) type;
748
+ (void) member;
487
749
  return -1;
488
750
  }
489
751
 
490
752
  void
491
753
  bin_init()
492
754
  {
493
- void *file = NULL;
755
+ void *ptr = NULL;
494
756
  int index = 0;
757
+ Dl_info info;
495
758
 
496
- memset(&mach_config, 0, sizeof(struct mach_config));
759
+ memset(&ruby_img_cfg, 0, sizeof(struct mach_config));
497
760
 
498
- file = get_ruby_file_and_header_index(&index);
761
+ // We can use this is a reasonably sure method of finding the file
762
+ // that the Ruby junk resides in.
763
+ ptr = dlsym(RTLD_DEFAULT, "rb_newobj");
764
+
765
+ if (!ptr)
766
+ errx(EX_SOFTWARE, "Could not find rb_newobj in this process. WTF???");
767
+
768
+ if (!dladdr(ptr, &info) || !info.dli_fname)
769
+ errx(EX_SOFTWARE, "Could not find the Mach object associated with rb_newobj.");
770
+
771
+ ruby_img_cfg.file.name = ruby_img_cfg.filename = info.dli_fname;
772
+ if (mmap_file_open(&ruby_img_cfg.file) < 0)
773
+ errx(EX_OSFILE, "Failed to fread() file %s", ruby_img_cfg.filename);
774
+ struct mach_header_64 *hdr = (struct mach_header_64*) ruby_img_cfg.file.data;
775
+ assert(hdr);
776
+ ruby_img_cfg.hdr = (const struct mach_header *)hdr;
499
777
 
500
- const struct mach_header_64 *hdr = (const struct mach_header_64*) file;
501
778
  if (hdr->magic != MH_MAGIC_64)
502
779
  errx(EX_SOFTWARE, "Magic for Ruby Mach-O file doesn't match");
503
780
 
504
- mach_config.image_offset = _dyld_get_image_vmaddr_slide(index);
781
+ index = find_dyld_image_index((const struct mach_header_64*) info.dli_fbase);
782
+ ruby_img_cfg.image_offset = _dyld_get_image_vmaddr_slide(index);
783
+ ruby_img_cfg.index = index;
784
+ ruby_img_cfg.load_addr = dyld_get_image_info_for_index(index)->imageLoadAddress;
785
+
786
+ extract_symbol_table(hdr, &ruby_img_cfg);
505
787
 
506
- extract_symbol_table(hdr, &mach_config.symbol_table, &mach_config.string_table, &mach_config.symbol_count, &mach_config.string_table_size);
788
+ assert(ruby_img_cfg.symbol_table != NULL);
789
+ assert(ruby_img_cfg.string_table != NULL);
790
+ assert(ruby_img_cfg.symbol_count > 0);
507
791
 
508
- assert(mach_config.symbol_table != NULL);
509
- assert(mach_config.string_table != NULL);
510
- assert(mach_config.symbol_count > 0);
792
+ // XXX: do not free this, since we're using the symbol and string tables from inside the file
793
+ // free(hdr);
511
794
  }
512
795
  #endif