memprof 0.3.2 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -33,6 +33,22 @@ class MemprofUploader
33
33
  opts.on("--put-my-data-on-the-internet", "Confirm that you understand\n" +
34
34
  "memprof.com will show all your \n".rjust(80) +
35
35
  "internal data on the internet (required)".rjust(80)) {|arg| @confirmed = true}
36
+ opts.on("--info") do
37
+ require 'rbconfig'
38
+ puts RUBY_DESCRIPTION if defined? RUBY_DESCRIPTION
39
+ puts "CFLAGS='#{Config::CONFIG["CFLAGS"]}' ./configure #{Config::CONFIG["configure_args"]}"
40
+ bin = "#{Config::CONFIG['bindir']}/#{Config::CONFIG['ruby_install_name']}"
41
+ puts `file #{bin}`
42
+
43
+ if RUBY_PLATFORM =~ /darwin/
44
+ puts `otool -L #{bin}`
45
+ else
46
+ puts `ldd #{bin}`
47
+ end
48
+
49
+ puts
50
+ exit!
51
+ end
36
52
  end
37
53
 
38
54
  begin
data/ext/elf.c CHANGED
@@ -562,32 +562,7 @@ bin_update_image(const char *trampee, struct tramp_st2_entry *tramp, void **orig
562
562
  /* first check if the symbol is in the PLT */
563
563
  trampee_addr = find_plt_addr(trampee, NULL);
564
564
 
565
- /* it isn't in the PLT, try to find it in the binary itself */
566
- if (!trampee_addr) {
567
- dbg_printf("Couldn't find %s in the PLT...\n", trampee);
568
- unsigned char *byte = ruby_info->text_segment;
569
- trampee_addr = bin_find_symbol(trampee, NULL, 0);
570
- size_t count = 0;
571
- int num = 0;
572
-
573
- assert(byte != NULL);
574
-
575
- if (!trampee_addr) {
576
- dbg_printf("WARNING: Couldn't find: %s anywhere, so not tramping!\n", trampee);
577
- return 0;
578
- }
579
-
580
- if (orig_func) {
581
- *orig_func = trampee_addr;
582
- }
583
-
584
- for(; count < ruby_info->text_segment_len; byte++, count++) {
585
- if (arch_insert_st1_tramp(byte, trampee_addr, tramp) == 0) {
586
- num++;
587
- }
588
- }
589
- dbg_printf("Inserted %d tramps for: %s\n", num, trampee);
590
- } else {
565
+ if (trampee_addr) {
591
566
  void *ret = NULL;
592
567
  dbg_printf("Found %s in the PLT, inserting tramp...\n", trampee);
593
568
  ret = overwrite_got(trampee_addr, tramp->addr);
@@ -598,6 +573,29 @@ bin_update_image(const char *trampee, struct tramp_st2_entry *tramp, void **orig
598
573
  *orig_func = ret;
599
574
  dbg_printf("setting orig function: %p\n", *orig_func);
600
575
  }
576
+ } else {
577
+ trampee_addr = bin_find_symbol(trampee, NULL, 0);
578
+ dbg_printf("Couldn't find %s in the PLT...\n", trampee);
579
+
580
+ if (trampee_addr) {
581
+ unsigned char *byte = ruby_info->text_segment;
582
+ size_t count = 0;
583
+ int num = 0;
584
+
585
+ assert(byte != NULL);
586
+
587
+ if (orig_func) {
588
+ *orig_func = trampee_addr;
589
+ }
590
+
591
+ for(; count < ruby_info->text_segment_len; byte++, count++) {
592
+ if (arch_insert_st1_tramp(byte, trampee_addr, tramp) == 0) {
593
+ num++;
594
+ }
595
+ }
596
+
597
+ dbg_printf("Inserted %d tramps for: %s\n", num, trampee);
598
+ }
601
599
  }
602
600
 
603
601
  dbg_printf("Trying to hook %s in other libraries...\n", trampee);
@@ -976,8 +974,8 @@ find_debug_syms(struct elf_info *elf)
976
974
  dbg_printf(".gnu_debuglink base file name: %s, crc: %lx\n", basename, crc);
977
975
 
978
976
  dir = dirname(tmp);
979
- debug_file = malloc(strlen(DEBUGDIR) + strlen(dir) +
980
- strlen("/") + strlen(basename) + 1);
977
+ debug_file = calloc(1, strlen(DEBUGDIR) + strlen(dir) +
978
+ strlen("/") + strlen(basename) + 1);
981
979
 
982
980
  strncat(debug_file, DEBUGDIR, strlen(DEBUGDIR));
983
981
  strncat(debug_file, dir, strlen(dir));
@@ -1197,7 +1195,11 @@ dissect_elf(struct elf_info *info, int find_debug)
1197
1195
 
1198
1196
  out:
1199
1197
  if (find_debug && ret == 1) {
1200
- find_debug_syms(info);
1198
+ if (info->debuglink_addr) {
1199
+ find_debug_syms(info);
1200
+ } else {
1201
+ dbg_printf("=== WARNING: Object %s was STRIPPED and had no debuglink section. Nothing left to try.", info->filename);
1202
+ }
1201
1203
  }
1202
1204
  return ret;
1203
1205
  }
@@ -33,17 +33,23 @@ unless File.exists?("#{CWD}/dst/lib/libyajl_ext.a")
33
33
 
34
34
  sys("tar zxvf #{yajl}")
35
35
  Dir.chdir("#{dir}/src") do
36
- FileUtils.mkdir_p "api/yajl"
36
+ sys("sed -i -e 's,yajl,json,g' *.h *.c api/*.h")
37
+ Dir['{,api/}yajl*.{h,c}'].each do |file|
38
+ FileUtils.mv file, file.gsub('yajl', 'json')
39
+ end
40
+
41
+ FileUtils.mkdir_p "api/json"
37
42
  %w[ common parse gen ].each do |f|
38
- FileUtils.cp "api/yajl_#{f}.h", 'api/yajl/'
43
+ FileUtils.cp "api/json_#{f}.h", 'api/json/'
39
44
  end
40
45
 
41
46
  File.open("extconf.rb",'w') do |f|
42
47
  f.puts "require 'mkmf'; $INCFLAGS[0,0] = '-I./api/ '; create_makefile 'libyajl'"
43
48
  end
44
- sys("#{Config::CONFIG['bindir']}/#{Config::CONFIG['ruby_install_name']} extconf.rb")
45
49
 
50
+ sys("#{Config::CONFIG['bindir']}/#{Config::CONFIG['ruby_install_name']} extconf.rb")
46
51
  sys("make")
52
+
47
53
  if RUBY_PLATFORM =~ /darwin/
48
54
  sys("libtool -static -o libyajl_ext.a #{Dir['*.o'].join(' ')}")
49
55
  else
@@ -53,7 +59,7 @@ unless File.exists?("#{CWD}/dst/lib/libyajl_ext.a")
53
59
  FileUtils.mkdir_p "#{CWD}/dst/lib"
54
60
  FileUtils.cp 'libyajl_ext.a', "#{CWD}/dst/lib"
55
61
  FileUtils.mkdir_p "#{CWD}/dst/include"
56
- FileUtils.cp_r 'api/yajl', "#{CWD}/dst/include/"
62
+ FileUtils.cp_r 'api/json', "#{CWD}/dst/include/"
57
63
  end
58
64
  end
59
65
  end
@@ -61,7 +67,7 @@ end
61
67
  $LIBPATH.unshift "#{CWD}/dst/lib"
62
68
  $INCFLAGS[0,0] = "-I#{CWD}/dst/include "
63
69
 
64
- unless have_library('yajl_ext') and have_header('yajl/yajl_gen.h')
70
+ unless have_library('yajl_ext') and have_header('json/json_gen.h')
65
71
  raise 'Yajl build failed'
66
72
  end
67
73
 
data/ext/i386.h CHANGED
@@ -16,8 +16,8 @@ static struct tramp_st2_entry {
16
16
  unsigned char ret;
17
17
  } __attribute__((__packed__)) default_st2_tramp = {
18
18
  .ebx_save = 0x53, /* push ebx */
19
- .mov = 0xbb, /* mov addr into ebx */
20
- .addr = 0, /* this is filled in later */
19
+ .mov = 0xbb, /* mov addr into ebx */
20
+ .addr = 0, /* this is filled in later */
21
21
  .call = {0xff, 0xd3}, /* calll *ebx */
22
22
  .ebx_restore = 0x5b, /* pop ebx */
23
23
  .ret = 0xc3, /* ret */
@@ -0,0 +1,53 @@
1
+ #ifndef _GNU_SOURCE
2
+ #define _GNU_SOURCE
3
+ #endif
4
+
5
+ #include <assert.h>
6
+ #include <stdio.h>
7
+ #include <stdlib.h>
8
+ #include <string.h>
9
+
10
+ #include "json.h"
11
+
12
+ void
13
+ json_gen_reset(json_gen gen)
14
+ {
15
+ json_gen_clear(gen);
16
+ assert (gen->state[gen->depth] == json_gen_complete);
17
+ gen->state[gen->depth] = json_gen_start;
18
+ gen->print(gen->ctx, "\n", 1);
19
+ }
20
+
21
+ json_gen_status
22
+ json_gen_cstr(json_gen gen, const char * str)
23
+ {
24
+ if (!str || str[0] == 0)
25
+ return json_gen_null(gen);
26
+ else
27
+ return json_gen_string(gen, (unsigned char *)str, strlen(str));
28
+ }
29
+
30
+ json_gen_status
31
+ json_gen_format(json_gen gen, char *format, ...)
32
+ {
33
+ va_list args;
34
+ char *str;
35
+ int bytes_printed = 0;
36
+
37
+ json_gen_status ret;
38
+
39
+ va_start(args, format);
40
+ bytes_printed = vasprintf(&str, format, args);
41
+ assert(bytes_printed != -1);
42
+ va_end(args);
43
+
44
+ ret = json_gen_string(gen, (unsigned char *)str, strlen(str));
45
+ free(str);
46
+ return ret;
47
+ }
48
+
49
+ json_gen_status
50
+ json_gen_pointer(json_gen gen, void* ptr)
51
+ {
52
+ return json_gen_format(gen, "0x%x", ptr);
53
+ }
@@ -0,0 +1,48 @@
1
+ #if !defined(__JSON__H_)
2
+ #define __JSON__H_
3
+
4
+ #include <stdarg.h>
5
+ #include <json/json_gen.h>
6
+
7
+ /* HAX: copied from internal json_gen.c (PATCH json before building instead)
8
+ */
9
+
10
+ typedef enum {
11
+ json_gen_start,
12
+ json_gen_map_start,
13
+ json_gen_map_key,
14
+ json_gen_map_val,
15
+ json_gen_array_start,
16
+ json_gen_in_array,
17
+ json_gen_complete,
18
+ json_gen_error
19
+ } json_gen_state;
20
+
21
+ struct json_gen_t
22
+ {
23
+ unsigned int depth;
24
+ unsigned int pretty;
25
+ const char * indentString;
26
+ json_gen_state state[YAJL_MAX_DEPTH];
27
+ json_print_t print;
28
+ void * ctx; /* json_buf */
29
+ /* memory allocation routines */
30
+ json_alloc_funcs alloc;
31
+ };
32
+
33
+ /* END HAX
34
+ */
35
+
36
+ void
37
+ json_gen_reset(json_gen gen);
38
+
39
+ json_gen_status
40
+ json_gen_cstr(json_gen gen, const char * str);
41
+
42
+ json_gen_status
43
+ json_gen_format(json_gen gen, char *format, ...);
44
+
45
+ json_gen_status
46
+ json_gen_pointer(json_gen gen, void* ptr);
47
+
48
+ #endif
data/ext/mach.c CHANGED
@@ -1,12 +1,15 @@
1
1
  #if defined(HAVE_MACH)
2
2
 
3
- #include "bin_api.h"
4
3
  #include "arch.h"
4
+ #include "bin_api.h"
5
+ #include "mmap.h"
5
6
  #include "util.h"
6
7
 
7
8
  #include <assert.h>
8
9
  #include <dlfcn.h>
9
10
  #include <err.h>
11
+ #include <errno.h>
12
+ #include <inttypes.h>
10
13
  #include <limits.h>
11
14
  #include <stdio.h>
12
15
  #include <stdlib.h>
@@ -20,16 +23,34 @@
20
23
  #include <mach-o/loader.h>
21
24
  #include <mach-o/ldsyms.h>
22
25
  #include <mach-o/nlist.h>
26
+ #include <mach-o/dyld_images.h>
27
+ #include <mach-o/fat.h>
23
28
 
24
29
  struct mach_config {
25
- const struct nlist_64 **symbol_table;
30
+ const struct mach_header *hdr;
31
+ const struct nlist_64 *symbol_table;
32
+ const struct nlist_64 **sorted_symbol_table;
33
+ const struct section_64 *symstub_sect;
26
34
  const char *string_table;
27
35
  uint32_t symbol_count;
28
36
  uint32_t string_table_size;
29
37
  intptr_t image_offset;
38
+ const struct mach_header* load_addr;
39
+ uint32_t nindirectsyms;
40
+ uint32_t indirectsymoff;
41
+ struct mmap_info file;
42
+ const char *filename;
43
+ unsigned int index;
44
+ };
45
+
46
+ struct symbol_data {
47
+ const char *name;
48
+ void *address;
49
+ uint32_t size;
50
+ uint32_t index;
30
51
  };
31
52
 
32
- static struct mach_config mach_config;
53
+ static struct mach_config ruby_img_cfg;
33
54
  extern struct memprof_config memprof_config;
34
55
 
35
56
  /*
@@ -46,7 +67,7 @@ extern struct memprof_config memprof_config;
46
67
 
47
68
  struct dyld_stub_entry {
48
69
  unsigned char jmp[2];
49
- uint32_t offset;
70
+ int32_t offset;
50
71
  } __attribute((__packed__));
51
72
 
52
73
  /* Return the jmp target of a stub entry */
@@ -64,7 +85,60 @@ get_dyld_stub_target(struct dyld_stub_entry *entry) {
64
85
 
65
86
  static inline void
66
87
  set_dyld_stub_target(struct dyld_stub_entry *entry, void *addr) {
67
- *((void**)((void*)(entry + 1) + entry->offset)) = addr;
88
+ void *target = (void *)(entry+1) + entry->offset;
89
+ copy_instructions(target, &addr, sizeof(void *));
90
+ }
91
+
92
+ static inline const char*
93
+ get_symtab_string(struct mach_config *img_cfg, uint32_t stroff);
94
+
95
+ static void
96
+ extract_symbol_data(struct mach_config *img_cfg, struct symbol_data *sym_data);
97
+
98
+ static int
99
+ find_dyld_image_index(const struct mach_header_64 *hdr);
100
+
101
+ static void *
102
+ find_stub_addr(const char *symname, struct mach_config *img_cfg)
103
+ {
104
+ uint64_t i = 0, nsyms = 0;
105
+ uint32_t symindex = 0;
106
+ assert(img_cfg && symname);
107
+ const struct section_64 *sect = img_cfg->symstub_sect;
108
+
109
+ if (!sect)
110
+ return NULL;
111
+
112
+ nsyms = sect->size / sect->reserved2;
113
+
114
+ for (; i < nsyms; i ++) {
115
+ uint32_t currsym = sect->reserved1 + i;
116
+ uint64_t stubaddr = sect->offset + (i * sect->reserved2);
117
+ uint32_t symoff = 0;
118
+
119
+ assert(currsym <= img_cfg->nindirectsyms);
120
+
121
+ /* indirect sym entries are just 32bit indexes into the symbol table to the
122
+ * symbol the stub is referring to.
123
+ */
124
+ symoff = img_cfg->indirectsymoff + (i * 4);
125
+ memcpy(&symindex, (char*)img_cfg->hdr + symoff, 4);
126
+ symindex = symindex & ((uint32_t) ~(INDIRECT_SYMBOL_LOCAL | INDIRECT_SYMBOL_ABS));
127
+
128
+ const struct nlist_64 *ent = img_cfg->symbol_table + symindex;
129
+ const char *string = get_symtab_string(img_cfg, ent->n_un.n_strx);
130
+
131
+ if (strcmp(symname, string+1) == 0) {
132
+ if (stubaddr) {
133
+ dbg_printf("address of stub in %s for %s is %" PRIx64 " + %p = ", img_cfg->filename, string, stubaddr, img_cfg->load_addr);
134
+ stubaddr = (uint64_t)img_cfg->load_addr + stubaddr;
135
+ dbg_printf("%" PRIx64 "\n", stubaddr);
136
+ return (void *)stubaddr;
137
+ }
138
+ }
139
+ }
140
+ dbg_printf("couldn't find address of stub: %s in %s\n", symname, img_cfg->filename);
141
+ return NULL;
68
142
  }
69
143
 
70
144
  /*
@@ -91,31 +165,83 @@ update_dyld_stub_table(void *table, uint64_t len, void *trampee_addr, struct tra
91
165
  }
92
166
 
93
167
  /*
94
- * This function tells us if the passed stub table address
95
- * is something that we should try to update (by looking at it's filename)
96
- * Only try to update dyld stub entries in files that match "libruby.dylib" or "*.bundle" (other C gems)
168
+ * Get all DSOs
97
169
  */
170
+ static const struct dyld_all_image_infos *
171
+ dyld_get_all_images() {
172
+ static const struct dyld_all_image_infos* (*_dyld_get_all_image_infos)() = NULL;
173
+ static const struct dyld_all_image_infos *images = NULL;
174
+
175
+ if (!_dyld_get_all_image_infos) {
176
+ _dyld_lookup_and_bind("__dyld_get_all_image_infos", (void**)&_dyld_get_all_image_infos, NULL);
177
+ assert(_dyld_get_all_image_infos != NULL);
178
+ }
98
179
 
99
- static inline int
100
- should_update_stub_table(void *addr) {
101
- Dl_info info;
180
+ if (!images) {
181
+ images = _dyld_get_all_image_infos();
182
+ assert(images != NULL);
183
+ }
102
184
 
103
- if (dladdr(addr, &info)) {
104
- size_t len = strlen(info.dli_fname);
185
+ return images;
186
+ }
105
187
 
106
- if (len >= 6) {
107
- const char *possible_bundle = (info.dli_fname + len - 6);
108
- if (strcmp(possible_bundle, "bundle") == 0)
109
- return 1;
110
- }
188
+ /*
189
+ * Get info for particular DSO
190
+ */
191
+ static const struct dyld_image_info *
192
+ dyld_get_image_info_for_index(int index) {
193
+ const struct dyld_all_image_infos *images = dyld_get_all_images();
194
+
195
+ // Stupid indexes into the infoArray don't match indexes used elsewhere, so we have to loop
196
+ unsigned int i;
197
+ const struct mach_header *hdr = _dyld_get_image_header(index);
198
+
199
+ for(i=0; i < _dyld_image_count(); i++) {
200
+ const struct dyld_image_info image = images->infoArray[i];
201
+ if (hdr == image.imageLoadAddress)
202
+ return &(images->infoArray[i]);
203
+ }
111
204
 
112
- if (len >= 13) {
113
- const char *possible_libruby = (info.dli_fname + len - 13);
114
- if (strcmp(possible_libruby, "libruby.dylib") == 0)
115
- return 1;
116
- }
205
+ return NULL;
206
+ }
207
+
208
+ /*
209
+ * This function tells us if the passed header index is something
210
+ * that we should try to update (by looking at it's filename)
211
+ * Only try to update the running executable, or files that match
212
+ * "libruby.dylib" or "*.bundle" (other C gems)
213
+ */
214
+
215
+ static const struct mach_header *
216
+ should_update_image(int index) {
217
+ const struct mach_header *hdr = _dyld_get_image_header(index);
218
+
219
+ /* Don't update if it's the memprof bundle */
220
+ if ((void*)hdr == &_mh_bundle_header)
221
+ return NULL;
222
+
223
+ /* If it's the ruby executable, do it! */
224
+ if ((void*)hdr == &_mh_execute_header)
225
+ return hdr;
226
+
227
+ /* otherwise, check to see if its a bundle or libruby */
228
+ const struct dyld_image_info *image = dyld_get_image_info_for_index(index);
229
+
230
+ size_t len = strlen(image->imageFilePath);
231
+
232
+ if (len >= 6) {
233
+ const char *possible_bundle = (image->imageFilePath + len - 6);
234
+ if (strcmp(possible_bundle, "bundle") == 0)
235
+ return hdr;
117
236
  }
118
- return 0;
237
+
238
+ if (len >= 13) {
239
+ const char *possible_libruby = (image->imageFilePath + len - 13);
240
+ if (strcmp(possible_libruby, "libruby.dylib") == 0)
241
+ return hdr;
242
+ }
243
+
244
+ return NULL;
119
245
  }
120
246
 
121
247
  /*
@@ -139,15 +265,12 @@ update_mach_section(const struct mach_header *header, const struct section_64 *s
139
265
  void *section = getsectdatafromheader_64((const struct mach_header_64*)header, "__TEXT", sect->sectname, &len) + slide;
140
266
 
141
267
  if (strncmp(sect->sectname, "__symbol_stub", 13) == 0) {
142
- if (should_update_stub_table(section)) {
143
- if (update_dyld_stub_table(section, sect->size, trampee_addr, tramp) == 0) {
144
- ret = 0;
145
- }
268
+ if (update_dyld_stub_table(section, sect->size, trampee_addr, tramp) == 0) {
269
+ ret = 0;
146
270
  }
147
271
  return ret;
148
272
  }
149
273
 
150
- /* TODO: check the filename just like we do above for stub sections. No reason to look at unrelated files. */
151
274
  if (strcmp(sect->sectname, "__text") == 0) {
152
275
  size_t count = 0;
153
276
  for(; count < len; section++, count++) {
@@ -223,52 +346,6 @@ find_dyld_image_index(const struct mach_header_64 *hdr) {
223
346
  return -1;
224
347
  }
225
348
 
226
- /*
227
- * This function returns a buffer containing the file that is presumed
228
- * to be either the Ruby executable or libruby. (Wherever rb_newobj is found.)
229
- *
230
- * The passed pointer index is set to the dyld image index for the associated
231
- * in-process mach image.
232
- *
233
- * The reason that we read in the file is because the symbol table is not loaded
234
- * into memory with everything else at load time (at least not anywhere I can find).
235
- *
236
- * !!! The pointer returned by this function must be freed !!!
237
- */
238
-
239
- static void *
240
- get_ruby_file_and_header_index(int *index) {
241
- void *ptr = NULL;
242
- void *buf = NULL;
243
- Dl_info info;
244
- struct stat filestat;
245
-
246
- // We can use this is a reasonably sure method of finding the file
247
- // that the Ruby junk resides in.
248
- ptr = dlsym(RTLD_DEFAULT, "rb_newobj");
249
-
250
- if (!ptr)
251
- errx(EX_SOFTWARE, "Could not find rb_newobj in this process. WTF???");
252
-
253
- if (!dladdr(ptr, &info) || !info.dli_fname)
254
- errx(EX_SOFTWARE, "Could not find the Mach object associated with rb_newobj.");
255
-
256
- FILE *file = fopen(info.dli_fname, "r");
257
- if (!file)
258
- errx(EX_OSFILE, "Failed to open Ruby file %s", info.dli_fname);
259
-
260
- stat(info.dli_fname, &filestat);
261
- buf = malloc(filestat.st_size);
262
-
263
- if (fread(buf, filestat.st_size, 1, file) != 1)
264
- errx(EX_OSFILE, "Failed to fread() Ruby file %s", info.dli_fname);
265
-
266
- fclose(file);
267
-
268
- *index = find_dyld_image_index((const struct mach_header_64*) info.dli_fbase);
269
- return buf;
270
- }
271
-
272
349
  /*
273
350
  * This function compares two nlist_64 structures by their n_value field (address, usually).
274
351
  * It is used by qsort in extract_symbol_table.
@@ -298,15 +375,19 @@ nlist_cmp(const void *obj1, const void *obj2) {
298
375
  */
299
376
 
300
377
  static void
301
- extract_symbol_table(const struct mach_header_64 *hdr, const struct nlist_64 ***symbol_table, const char **string_table, uint32_t *symbol_count, uint32_t *strsize) {
378
+ extract_symbol_table(const struct mach_header_64 *hdr, struct mach_config *img_cfg) {
302
379
  const struct nlist_64 **new_symtbl;
303
380
  char *new_strtbl;
304
381
  uint32_t i, j;
305
382
 
306
- const char *lc = (const char*) hdr + sizeof(struct mach_header_64);
383
+ assert(hdr);
384
+ assert(img_cfg);
385
+
386
+ const struct load_command *lc = (const struct load_command *)(hdr + 1);
307
387
 
308
- for (i = 0; i < hdr->ncmds; i++) {
309
- if (((const struct load_command*)lc)->cmd == LC_SYMTAB) {
388
+ for (i = 0; i < hdr->ncmds; i++, (lc = (const struct load_command *)((char *)lc + lc->cmdsize))) {
389
+ if (lc->cmd == LC_SYMTAB) {
390
+ // dbg_printf("found an LC_SYMTAB load command.\n");
310
391
  const struct symtab_command *sc = (const struct symtab_command*) lc;
311
392
  const struct nlist_64 *file_symtbl = (const struct nlist_64*)((const char*)hdr + sc->symoff);
312
393
 
@@ -320,16 +401,49 @@ extract_symbol_table(const struct mach_header_64 *hdr, const struct nlist_64 ***
320
401
 
321
402
  qsort(new_symtbl, sc->nsyms, sizeof(struct nlist_64*), &nlist_cmp);
322
403
 
323
- *symbol_table = new_symtbl;
324
- *string_table = new_strtbl;
325
- *symbol_count = sc->nsyms;
326
- *strsize = sc->strsize;
327
- return;
328
- }
404
+ img_cfg->symbol_table = file_symtbl;
405
+ img_cfg->sorted_symbol_table = new_symtbl;
406
+ img_cfg->symbol_count = sc->nsyms;
407
+
408
+ img_cfg->string_table = new_strtbl;
409
+ img_cfg->string_table_size = sc->strsize;
410
+
411
+ } else if (lc->cmd == LC_DYSYMTAB) {
412
+ // dbg_printf("found an LC_DYSYMTAB load command.\n");
413
+ const struct dysymtab_command *dynsym = (const struct dysymtab_command *) lc;
414
+ img_cfg->nindirectsyms = dynsym->nindirectsyms;
415
+ img_cfg->indirectsymoff = dynsym->indirectsymoff;
416
+
417
+ } else if (lc->cmd == LC_SEGMENT_64) {
418
+ // dbg_printf("found an LC_SEGMENT_64 load command.\n");
419
+ const struct segment_command_64 *seg = (const struct segment_command_64 *) lc;
420
+ uint32_t i = 0;
421
+ const struct section_64 *asect = (const struct section_64 *)(seg + 1);
422
+ for(; i < seg->nsects; i++, asect++) {
423
+ /*
424
+ * setting up data to find the indirect symbol tables.
425
+ */
426
+
427
+ /* if this section hsa no symbol stubs, then we don't care about it */
428
+ if ((asect->flags & SECTION_TYPE) != S_SYMBOL_STUBS)
429
+ continue;
329
430
 
330
- lc += ((const struct load_command*)lc)->cmdsize;
431
+ if (asect->reserved2 == 0) {
432
+ dbg_printf("!!! Found an LC_SEGMET_64 which was marked as having stubs,"
433
+ " but does not have reserved2 set!! %16s.%16s (skipping)\n", asect->segname, asect->sectname);
434
+ continue;
435
+ }
436
+
437
+ // dbg_printf("Found a section with symbol stubs: %16s.%16s.\n", asect->segname, asect->sectname);
438
+ img_cfg->symstub_sect = asect;
439
+ }
440
+
441
+ } else {
442
+ // dbg_printf("found another load command that is not being tracked: %" PRId32 "\n", lc->cmd);
443
+ }
331
444
  }
332
- errx(EX_SOFTWARE, "Unable to find LC_SYMTAB");
445
+
446
+ assert(img_cfg->symbol_table && img_cfg->string_table);
333
447
  }
334
448
 
335
449
  /*
@@ -337,60 +451,174 @@ extract_symbol_table(const struct mach_header_64 *hdr, const struct nlist_64 ***
337
451
  */
338
452
 
339
453
  static inline const char*
340
- get_symtab_string(uint32_t stroff) {
341
- assert(mach_config.string_table != NULL);
342
- assert(stroff < mach_config.string_table_size);
343
- return mach_config.string_table + stroff;
454
+ get_symtab_string(struct mach_config *img_cfg, uint32_t stroff) {
455
+ assert(img_cfg);
456
+ assert(img_cfg->string_table != NULL);
457
+ assert(stroff < img_cfg->string_table_size);
458
+ return img_cfg->string_table + stroff;
344
459
  }
345
460
 
346
461
  /*
347
- * Return the address and size of a symbol given it's name
462
+ * Lookup the address, size, and symbol table index of a symbol given a symbol_data
463
+ * If sym_data is passed with the name set, this function will attempt to fill
464
+ * in the address, etc. If it is passed with the address set, it will attempt
465
+ * to fill in the name.
348
466
  */
349
467
 
468
+ static void
469
+ extract_symbol_data(struct mach_config *img_cfg, struct symbol_data *sym_data)
470
+ {
471
+ uint32_t i, j;
472
+
473
+ assert(img_cfg->symbol_table != NULL);
474
+ assert(img_cfg->symbol_count > 0);
475
+
476
+ for (i=0; i < img_cfg->symbol_count; i++) {
477
+ // const struct nlist_64 *nlist_entry = img_cfg->sorted_symbol_table[i];
478
+ const struct nlist_64 *nlist_entry = img_cfg->symbol_table + i;
479
+ const char *string = NULL;
480
+
481
+ string = get_symtab_string(img_cfg, nlist_entry->n_un.n_strx);
482
+
483
+ /* Add the slide to get the *real* address in the process. */
484
+ const uint64_t addr = nlist_entry->n_value;
485
+ void *ptr = (void*)(addr + img_cfg->image_offset);
486
+
487
+ /*
488
+ * If the user passes a name, match against the name
489
+ * If the user passes an address, match against that.
490
+ */
491
+ if ((sym_data->name && string && strcmp(sym_data->name, string+1) == 0) || (sym_data->address && ptr == sym_data->address)) {
492
+ if (!sym_data->address)
493
+ sym_data->address = ptr;
494
+ if (!sym_data->name)
495
+ sym_data->name = string+1;
496
+
497
+ sym_data->index = i;
498
+
499
+ const struct nlist_64 *next_entry = NULL;
500
+
501
+ /*
502
+ * There can be multiple entries in the symbol table with the same n_value (address).
503
+ * This means that the 'next' one isn't always good enough. We have to make sure it's
504
+ * really a different symbol.
505
+ */
506
+ j = 1;
507
+ while (next_entry == NULL) {
508
+ const struct nlist_64 *tmp_entry = img_cfg->sorted_symbol_table[i + j];
509
+ if (nlist_entry->n_value != tmp_entry->n_value)
510
+ next_entry = tmp_entry;
511
+ j++;
512
+ }
513
+
514
+ /*
515
+ * Subtract our address from the address of the next symbol to get it's rough size.
516
+ * My observation is that the start of the next symbol will be padded to 16 byte alignment from the end of this one.
517
+ * This should be fine, since the point of getting the size is just to minimize scan area for tramp insertions.
518
+ */
519
+ sym_data->size = (next_entry->n_value - addr);
520
+ break;
521
+ }
522
+ }
523
+ }
524
+
525
+ static void
526
+ free_mach_config(struct mach_config *cfg) {
527
+ if (cfg == &ruby_img_cfg)
528
+ return;
529
+
530
+ munmap_file(&cfg->file);
531
+ free(cfg);
532
+ }
533
+
534
+ static struct mach_config *
535
+ mach_config_for_index(unsigned int index) {
536
+ if (index >= _dyld_image_count())
537
+ return NULL;
538
+
539
+ if (index == ruby_img_cfg.index)
540
+ return &ruby_img_cfg;
541
+
542
+ const struct dyld_image_info *image = dyld_get_image_info_for_index(index);
543
+ struct mach_config *cfg = calloc(1, sizeof(struct mach_config));
544
+
545
+ cfg->index = index;
546
+ cfg->file.name = cfg->filename = image->imageFilePath;
547
+ if (mmap_file_open(&cfg->file) < 0)
548
+ errx(EX_OSFILE, "Failed to fread() file %s", cfg->filename);
549
+ cfg->image_offset = _dyld_get_image_vmaddr_slide(index);
550
+ cfg->load_addr = image->imageLoadAddress;
551
+
552
+ struct mach_header_64 *hdr = (struct mach_header_64*) cfg->file.data;
553
+ assert(hdr);
554
+
555
+ if (hdr->magic == FAT_CIGAM) {
556
+ unsigned int j;
557
+ struct fat_header *fat = (struct fat_header *)hdr;
558
+
559
+ for(j=0; j < OSSwapInt32(fat->nfat_arch); j++) {
560
+ struct fat_arch *arch = (struct fat_arch *)((char*)fat + sizeof(struct fat_header) + sizeof(struct fat_arch) * j);
561
+
562
+ if (OSSwapInt32(arch->cputype) == CPU_TYPE_X86_64) {
563
+ hdr = (struct mach_header_64 *)(cfg->file.data + OSSwapInt32(arch->offset));
564
+ break;
565
+ }
566
+ }
567
+ }
568
+
569
+ if (hdr->magic != MH_MAGIC_64) {
570
+ printf("Magic for Ruby Mach-O file doesn't match\n");
571
+ munmap_file(&cfg->file);
572
+ free(cfg);
573
+ return NULL;
574
+ }
575
+
576
+ extract_symbol_table(hdr, cfg);
577
+ cfg->hdr = (const struct mach_header *)hdr;
578
+
579
+ return cfg;
580
+ }
581
+
350
582
  void *
351
583
  bin_find_symbol(const char *symbol, size_t *size, int search_libs) {
352
- void *ptr = NULL;
353
- uint32_t i, j;
584
+ struct symbol_data sym_data;
354
585
 
355
- assert(mach_config.symbol_table != NULL);
356
- assert(mach_config.symbol_count > 0);
586
+ memset(&sym_data, 0, sizeof(struct symbol_data));
587
+ sym_data.name = symbol;
357
588
 
358
- for (i=0; i < mach_config.symbol_count; i++) {
359
- const struct nlist_64 *nlist_entry = mach_config.symbol_table[i];
360
- const char *string = get_symtab_string(nlist_entry->n_un.n_strx);
589
+ extract_symbol_data(&ruby_img_cfg, &sym_data);
361
590
 
362
- if (string && strcmp(symbol, string+1) == 0) {
363
- const uint64_t addr = nlist_entry->n_value;
364
- /* Add the slide to get the *real* address in the process. */
365
- ptr = (void*)(addr + mach_config.image_offset);
591
+ if (!sym_data.address && search_libs) {
592
+ int i, header_count = _dyld_image_count();
366
593
 
367
- if (size) {
368
- const struct nlist_64 *next_entry = NULL;
594
+ for (i=0; i < header_count; i++) {
595
+ const struct dyld_image_info *image = dyld_get_image_info_for_index(i);
369
596
 
370
- /*
371
- * There can be multiple entries in the symbol table with the same n_value (address).
372
- * This means that the 'next' one isn't always good enough. We have to make sure it's
373
- * really a different symbol.
374
- */
375
- j = 1;
376
- while (next_entry == NULL) {
377
- const struct nlist_64 *tmp_entry = mach_config.symbol_table[i + j];
378
- if (nlist_entry->n_value != tmp_entry->n_value)
379
- next_entry = tmp_entry;
380
- j++;
381
- }
597
+ if ((void*)image->imageLoadAddress == &_mh_bundle_header ||
598
+ (void*)image->imageLoadAddress == &_mh_execute_header)
599
+ continue;
382
600
 
383
- /*
384
- * Subtract our address from the address of the next symbol to get it's rough size.
385
- * My observation is that the start of the next symbol will be padded to 16 byte alignment from the end of this one.
386
- * This should be fine, since the point of getting the size is just to minimize scan area for tramp insertions.
387
- */
388
- *size = (next_entry->n_value - addr);
601
+ struct mach_config *cfg = mach_config_for_index(i);
602
+ if (cfg) {
603
+ extract_symbol_data(cfg, &sym_data);
604
+
605
+ if (sym_data.address == image->imageLoadAddress) { // wtf? this happens for mysql_api.bundle with mysql_real_query
606
+ sym_data.address = 0;
607
+ } else if (sym_data.address) {
608
+ if (cfg->image_offset == 0) // another wtf? happens on libSystem.dylib where we need to add load address, but libmysqlclient.dylib etc are fine
609
+ sym_data.address = (char*)image->imageLoadAddress + (size_t)sym_data.address;
610
+ dbg_printf("found symbol %s in %s: %p\n", sym_data.name, image->imageFilePath, sym_data.address);
611
+ free_mach_config(cfg);
612
+ break;
613
+ }
614
+ free_mach_config(cfg);
389
615
  }
390
- break;
391
616
  }
392
617
  }
393
- return ptr;
618
+
619
+ if (size)
620
+ *size = sym_data.size;
621
+ return sym_data.address;
394
622
  }
395
623
 
396
624
  /*
@@ -398,27 +626,14 @@ bin_find_symbol(const char *symbol, size_t *size, int search_libs) {
398
626
  */
399
627
  const char *
400
628
  bin_find_symbol_name(void *symbol) {
401
- const char *name = NULL;
402
- uint32_t i;
403
-
404
- assert(mach_config.symbol_table != NULL);
405
- assert(mach_config.symbol_count > 0);
629
+ struct symbol_data sym_data;
406
630
 
407
- for (i=0; i < mach_config.symbol_count; i++) {
408
- const struct nlist_64 *nlist_entry = mach_config.symbol_table[i];
409
- const char *string = get_symtab_string(nlist_entry->n_un.n_strx);
631
+ memset(&sym_data, 0, sizeof(struct symbol_data));
632
+ sym_data.address = symbol;
410
633
 
411
- const uint64_t addr = nlist_entry->n_value;
412
- /* Add the slide to get the *real* address in the process. */
413
- void *ptr = (void*)(addr + mach_config.image_offset);
634
+ extract_symbol_data(&ruby_img_cfg, &sym_data);
414
635
 
415
- if (ptr == symbol) {
416
- name = string+1;
417
- break;
418
- }
419
- }
420
-
421
- return name;
636
+ return sym_data.name;
422
637
  }
423
638
 
424
639
  /*
@@ -447,27 +662,65 @@ bin_update_image(const char *trampee, struct tramp_st2_entry *tramp, void **orig
447
662
 
448
663
  // Go through all the mach objects that are loaded into this process
449
664
  for (i=0; i < header_count; i++) {
450
- const struct mach_header *current_hdr = _dyld_get_image_header(i);
451
- if ((void*)current_hdr == &_mh_bundle_header)
452
- continue;
665
+ const struct mach_header *current_hdr = NULL;
666
+
667
+ if ((void*)_dyld_get_image_header(i) == &_mh_bundle_header)
668
+ continue; // always ignore memprof.bundle
669
+
670
+ struct mach_config *cfg = mach_config_for_index(i);
671
+ if (cfg->filename && strstr(cfg->filename, "libSystem") != NULL) {
672
+ free_mach_config(cfg);
673
+ continue; // ignore libSystem
674
+ }
675
+
676
+ void *stub = find_stub_addr(trampee, cfg);
677
+
678
+ if (stub) {
679
+ struct dyld_stub_entry *entry = (struct dyld_stub_entry *)stub;
680
+ if (orig_function)
681
+ *orig_function = get_dyld_stub_target(entry);
682
+ set_dyld_stub_target(entry, tramp->addr);
453
683
 
454
- if (update_bin_for_mach_header(current_hdr, _dyld_get_image_vmaddr_slide(i), trampee_addr, tramp) == 0)
455
684
  ret = 0;
685
+
686
+ } else if (trampee_addr) {
687
+ if ((current_hdr = should_update_image(i)) == NULL)
688
+ continue;
689
+
690
+ if (update_bin_for_mach_header(current_hdr, _dyld_get_image_vmaddr_slide(i), trampee_addr, tramp) == 0) {
691
+ ret = 0;
692
+ if (orig_function)
693
+ *orig_function = trampee_addr;
694
+ }
695
+ }
696
+
697
+ free_mach_config(cfg);
456
698
  }
699
+
457
700
  return ret;
458
701
  }
459
702
 
460
703
  void *
461
- bin_allocate_page()
704
+ do_bin_allocate_page(struct mach_config *cfg)
462
705
  {
463
706
  void *ret = NULL;
707
+ void *addr = (void *)cfg->load_addr;
464
708
  size_t i = 0;
465
709
 
466
- for (i = memprof_config.pagesize; i < INT_MAX - memprof_config.pagesize; i += memprof_config.pagesize) {
467
- ret = mmap((void*)(NULL + i), memprof_config.pagesize, PROT_WRITE|PROT_READ|PROT_EXEC,
710
+ dbg_printf("ruby loaded at: %p\n", addr);
711
+
712
+ /*
713
+ * XXX no clue how large the text segment is, so guess.
714
+ * TODO remove this.
715
+ */
716
+ addr += 65535;
717
+
718
+ for (; i < INT_MAX - memprof_config.pagesize; i += memprof_config.pagesize, addr += memprof_config.pagesize) {
719
+ ret = mmap(addr, memprof_config.pagesize, PROT_WRITE|PROT_READ|PROT_EXEC,
468
720
  MAP_ANON|MAP_PRIVATE, -1, 0);
469
721
 
470
722
  if (ret != MAP_FAILED) {
723
+ dbg_printf("found a page at: %p\n", ret);
471
724
  memset(ret, 0x90, memprof_config.pagesize);
472
725
  return ret;
473
726
  }
@@ -475,38 +728,68 @@ bin_allocate_page()
475
728
  return NULL;
476
729
  }
477
730
 
731
+ void *
732
+ bin_allocate_page()
733
+ {
734
+ return do_bin_allocate_page(&ruby_img_cfg);
735
+ }
736
+
478
737
  size_t
479
738
  bin_type_size(const char *type)
480
739
  {
740
+ (void) type;
481
741
  return 0;
482
742
  }
483
743
 
484
744
  int
485
745
  bin_type_member_offset(const char *type, const char *member)
486
746
  {
747
+ (void) type;
748
+ (void) member;
487
749
  return -1;
488
750
  }
489
751
 
490
752
  void
491
753
  bin_init()
492
754
  {
493
- void *file = NULL;
755
+ void *ptr = NULL;
494
756
  int index = 0;
757
+ Dl_info info;
495
758
 
496
- memset(&mach_config, 0, sizeof(struct mach_config));
759
+ memset(&ruby_img_cfg, 0, sizeof(struct mach_config));
497
760
 
498
- file = get_ruby_file_and_header_index(&index);
761
+ // We can use this is a reasonably sure method of finding the file
762
+ // that the Ruby junk resides in.
763
+ ptr = dlsym(RTLD_DEFAULT, "rb_newobj");
764
+
765
+ if (!ptr)
766
+ errx(EX_SOFTWARE, "Could not find rb_newobj in this process. WTF???");
767
+
768
+ if (!dladdr(ptr, &info) || !info.dli_fname)
769
+ errx(EX_SOFTWARE, "Could not find the Mach object associated with rb_newobj.");
770
+
771
+ ruby_img_cfg.file.name = ruby_img_cfg.filename = info.dli_fname;
772
+ if (mmap_file_open(&ruby_img_cfg.file) < 0)
773
+ errx(EX_OSFILE, "Failed to fread() file %s", ruby_img_cfg.filename);
774
+ struct mach_header_64 *hdr = (struct mach_header_64*) ruby_img_cfg.file.data;
775
+ assert(hdr);
776
+ ruby_img_cfg.hdr = (const struct mach_header *)hdr;
499
777
 
500
- const struct mach_header_64 *hdr = (const struct mach_header_64*) file;
501
778
  if (hdr->magic != MH_MAGIC_64)
502
779
  errx(EX_SOFTWARE, "Magic for Ruby Mach-O file doesn't match");
503
780
 
504
- mach_config.image_offset = _dyld_get_image_vmaddr_slide(index);
781
+ index = find_dyld_image_index((const struct mach_header_64*) info.dli_fbase);
782
+ ruby_img_cfg.image_offset = _dyld_get_image_vmaddr_slide(index);
783
+ ruby_img_cfg.index = index;
784
+ ruby_img_cfg.load_addr = dyld_get_image_info_for_index(index)->imageLoadAddress;
785
+
786
+ extract_symbol_table(hdr, &ruby_img_cfg);
505
787
 
506
- extract_symbol_table(hdr, &mach_config.symbol_table, &mach_config.string_table, &mach_config.symbol_count, &mach_config.string_table_size);
788
+ assert(ruby_img_cfg.symbol_table != NULL);
789
+ assert(ruby_img_cfg.string_table != NULL);
790
+ assert(ruby_img_cfg.symbol_count > 0);
507
791
 
508
- assert(mach_config.symbol_table != NULL);
509
- assert(mach_config.string_table != NULL);
510
- assert(mach_config.symbol_count > 0);
792
+ // XXX: do not free this, since we're using the symbol and string tables from inside the file
793
+ // free(hdr);
511
794
  }
512
795
  #endif