metasm 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (192) hide show
  1. data/BUGS +11 -0
  2. data/CREDITS +17 -0
  3. data/README +270 -0
  4. data/TODO +114 -0
  5. data/doc/code_organisation.txt +146 -0
  6. data/doc/const_missing.txt +16 -0
  7. data/doc/core_classes.txt +75 -0
  8. data/doc/feature_list.txt +53 -0
  9. data/doc/index.txt +59 -0
  10. data/doc/install_notes.txt +170 -0
  11. data/doc/style.css +3 -0
  12. data/doc/use_cases.txt +18 -0
  13. data/lib/metasm.rb +80 -0
  14. data/lib/metasm/arm.rb +12 -0
  15. data/lib/metasm/arm/debug.rb +39 -0
  16. data/lib/metasm/arm/decode.rb +167 -0
  17. data/lib/metasm/arm/encode.rb +77 -0
  18. data/lib/metasm/arm/main.rb +75 -0
  19. data/lib/metasm/arm/opcodes.rb +177 -0
  20. data/lib/metasm/arm/parse.rb +130 -0
  21. data/lib/metasm/arm/render.rb +55 -0
  22. data/lib/metasm/compile_c.rb +1457 -0
  23. data/lib/metasm/dalvik.rb +8 -0
  24. data/lib/metasm/dalvik/decode.rb +196 -0
  25. data/lib/metasm/dalvik/main.rb +60 -0
  26. data/lib/metasm/dalvik/opcodes.rb +366 -0
  27. data/lib/metasm/decode.rb +213 -0
  28. data/lib/metasm/decompile.rb +2659 -0
  29. data/lib/metasm/disassemble.rb +2068 -0
  30. data/lib/metasm/disassemble_api.rb +1280 -0
  31. data/lib/metasm/dynldr.rb +1329 -0
  32. data/lib/metasm/encode.rb +333 -0
  33. data/lib/metasm/exe_format/a_out.rb +194 -0
  34. data/lib/metasm/exe_format/autoexe.rb +82 -0
  35. data/lib/metasm/exe_format/bflt.rb +189 -0
  36. data/lib/metasm/exe_format/coff.rb +455 -0
  37. data/lib/metasm/exe_format/coff_decode.rb +901 -0
  38. data/lib/metasm/exe_format/coff_encode.rb +1078 -0
  39. data/lib/metasm/exe_format/dex.rb +457 -0
  40. data/lib/metasm/exe_format/dol.rb +145 -0
  41. data/lib/metasm/exe_format/elf.rb +923 -0
  42. data/lib/metasm/exe_format/elf_decode.rb +979 -0
  43. data/lib/metasm/exe_format/elf_encode.rb +1375 -0
  44. data/lib/metasm/exe_format/macho.rb +827 -0
  45. data/lib/metasm/exe_format/main.rb +228 -0
  46. data/lib/metasm/exe_format/mz.rb +164 -0
  47. data/lib/metasm/exe_format/nds.rb +172 -0
  48. data/lib/metasm/exe_format/pe.rb +437 -0
  49. data/lib/metasm/exe_format/serialstruct.rb +246 -0
  50. data/lib/metasm/exe_format/shellcode.rb +114 -0
  51. data/lib/metasm/exe_format/xcoff.rb +167 -0
  52. data/lib/metasm/gui.rb +23 -0
  53. data/lib/metasm/gui/cstruct.rb +373 -0
  54. data/lib/metasm/gui/dasm_coverage.rb +199 -0
  55. data/lib/metasm/gui/dasm_decomp.rb +369 -0
  56. data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
  57. data/lib/metasm/gui/dasm_graph.rb +1354 -0
  58. data/lib/metasm/gui/dasm_hex.rb +543 -0
  59. data/lib/metasm/gui/dasm_listing.rb +599 -0
  60. data/lib/metasm/gui/dasm_main.rb +906 -0
  61. data/lib/metasm/gui/dasm_opcodes.rb +291 -0
  62. data/lib/metasm/gui/debug.rb +1228 -0
  63. data/lib/metasm/gui/gtk.rb +884 -0
  64. data/lib/metasm/gui/qt.rb +495 -0
  65. data/lib/metasm/gui/win32.rb +3004 -0
  66. data/lib/metasm/gui/x11.rb +621 -0
  67. data/lib/metasm/ia32.rb +14 -0
  68. data/lib/metasm/ia32/compile_c.rb +1523 -0
  69. data/lib/metasm/ia32/debug.rb +193 -0
  70. data/lib/metasm/ia32/decode.rb +1167 -0
  71. data/lib/metasm/ia32/decompile.rb +564 -0
  72. data/lib/metasm/ia32/encode.rb +314 -0
  73. data/lib/metasm/ia32/main.rb +233 -0
  74. data/lib/metasm/ia32/opcodes.rb +872 -0
  75. data/lib/metasm/ia32/parse.rb +327 -0
  76. data/lib/metasm/ia32/render.rb +91 -0
  77. data/lib/metasm/main.rb +1193 -0
  78. data/lib/metasm/mips.rb +11 -0
  79. data/lib/metasm/mips/compile_c.rb +7 -0
  80. data/lib/metasm/mips/decode.rb +253 -0
  81. data/lib/metasm/mips/encode.rb +51 -0
  82. data/lib/metasm/mips/main.rb +72 -0
  83. data/lib/metasm/mips/opcodes.rb +443 -0
  84. data/lib/metasm/mips/parse.rb +51 -0
  85. data/lib/metasm/mips/render.rb +43 -0
  86. data/lib/metasm/os/gnu_exports.rb +270 -0
  87. data/lib/metasm/os/linux.rb +1112 -0
  88. data/lib/metasm/os/main.rb +1686 -0
  89. data/lib/metasm/os/remote.rb +527 -0
  90. data/lib/metasm/os/windows.rb +2027 -0
  91. data/lib/metasm/os/windows_exports.rb +745 -0
  92. data/lib/metasm/parse.rb +876 -0
  93. data/lib/metasm/parse_c.rb +3938 -0
  94. data/lib/metasm/pic16c/decode.rb +42 -0
  95. data/lib/metasm/pic16c/main.rb +17 -0
  96. data/lib/metasm/pic16c/opcodes.rb +68 -0
  97. data/lib/metasm/ppc.rb +11 -0
  98. data/lib/metasm/ppc/decode.rb +264 -0
  99. data/lib/metasm/ppc/decompile.rb +251 -0
  100. data/lib/metasm/ppc/encode.rb +51 -0
  101. data/lib/metasm/ppc/main.rb +129 -0
  102. data/lib/metasm/ppc/opcodes.rb +410 -0
  103. data/lib/metasm/ppc/parse.rb +52 -0
  104. data/lib/metasm/preprocessor.rb +1277 -0
  105. data/lib/metasm/render.rb +130 -0
  106. data/lib/metasm/sh4.rb +8 -0
  107. data/lib/metasm/sh4/decode.rb +336 -0
  108. data/lib/metasm/sh4/main.rb +292 -0
  109. data/lib/metasm/sh4/opcodes.rb +381 -0
  110. data/lib/metasm/x86_64.rb +12 -0
  111. data/lib/metasm/x86_64/compile_c.rb +1025 -0
  112. data/lib/metasm/x86_64/debug.rb +59 -0
  113. data/lib/metasm/x86_64/decode.rb +268 -0
  114. data/lib/metasm/x86_64/encode.rb +264 -0
  115. data/lib/metasm/x86_64/main.rb +135 -0
  116. data/lib/metasm/x86_64/opcodes.rb +118 -0
  117. data/lib/metasm/x86_64/parse.rb +68 -0
  118. data/misc/bottleneck.rb +61 -0
  119. data/misc/cheader-findpppath.rb +58 -0
  120. data/misc/hexdiff.rb +74 -0
  121. data/misc/hexdump.rb +55 -0
  122. data/misc/metasm-all.rb +13 -0
  123. data/misc/objdiff.rb +47 -0
  124. data/misc/objscan.rb +40 -0
  125. data/misc/pdfparse.rb +661 -0
  126. data/misc/ppc_pdf2oplist.rb +192 -0
  127. data/misc/tcp_proxy_hex.rb +84 -0
  128. data/misc/txt2html.rb +440 -0
  129. data/samples/a.out.rb +31 -0
  130. data/samples/asmsyntax.rb +77 -0
  131. data/samples/bindiff.rb +555 -0
  132. data/samples/compilation-steps.rb +49 -0
  133. data/samples/cparser_makestackoffset.rb +55 -0
  134. data/samples/dasm-backtrack.rb +38 -0
  135. data/samples/dasmnavig.rb +318 -0
  136. data/samples/dbg-apihook.rb +228 -0
  137. data/samples/dbghelp.rb +143 -0
  138. data/samples/disassemble-gui.rb +102 -0
  139. data/samples/disassemble.rb +133 -0
  140. data/samples/dump_upx.rb +95 -0
  141. data/samples/dynamic_ruby.rb +1929 -0
  142. data/samples/elf_list_needed.rb +46 -0
  143. data/samples/elf_listexports.rb +33 -0
  144. data/samples/elfencode.rb +25 -0
  145. data/samples/exeencode.rb +128 -0
  146. data/samples/factorize-headers-elfimports.rb +77 -0
  147. data/samples/factorize-headers-peimports.rb +109 -0
  148. data/samples/factorize-headers.rb +43 -0
  149. data/samples/gdbclient.rb +583 -0
  150. data/samples/generate_libsigs.rb +102 -0
  151. data/samples/hotfix_gtk_dbg.rb +59 -0
  152. data/samples/install_win_env.rb +78 -0
  153. data/samples/lindebug.rb +924 -0
  154. data/samples/linux_injectsyscall.rb +95 -0
  155. data/samples/machoencode.rb +31 -0
  156. data/samples/metasm-shell.rb +91 -0
  157. data/samples/pe-hook.rb +69 -0
  158. data/samples/pe-ia32-cpuid.rb +203 -0
  159. data/samples/pe-mips.rb +35 -0
  160. data/samples/pe-shutdown.rb +78 -0
  161. data/samples/pe-testrelocs.rb +51 -0
  162. data/samples/pe-testrsrc.rb +24 -0
  163. data/samples/pe_listexports.rb +31 -0
  164. data/samples/peencode.rb +19 -0
  165. data/samples/peldr.rb +494 -0
  166. data/samples/preprocess-flatten.rb +19 -0
  167. data/samples/r0trace.rb +308 -0
  168. data/samples/rubstop.rb +399 -0
  169. data/samples/scan_pt_gnu_stack.rb +54 -0
  170. data/samples/scanpeexports.rb +62 -0
  171. data/samples/shellcode-c.rb +40 -0
  172. data/samples/shellcode-dynlink.rb +146 -0
  173. data/samples/source.asm +34 -0
  174. data/samples/struct_offset.rb +47 -0
  175. data/samples/testpe.rb +32 -0
  176. data/samples/testraw.rb +45 -0
  177. data/samples/win32genloader.rb +132 -0
  178. data/samples/win32hooker-advanced.rb +169 -0
  179. data/samples/win32hooker.rb +96 -0
  180. data/samples/win32livedasm.rb +33 -0
  181. data/samples/win32remotescan.rb +133 -0
  182. data/samples/wintrace.rb +92 -0
  183. data/tests/all.rb +8 -0
  184. data/tests/dasm.rb +39 -0
  185. data/tests/dynldr.rb +35 -0
  186. data/tests/encodeddata.rb +132 -0
  187. data/tests/ia32.rb +82 -0
  188. data/tests/mips.rb +116 -0
  189. data/tests/parse_c.rb +239 -0
  190. data/tests/preprocessor.rb +269 -0
  191. data/tests/x86_64.rb +62 -0
  192. metadata +255 -0
@@ -0,0 +1,1329 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+ # This sample creates the dynldr.so ruby shared object that allows interaction with
7
+ # native libraries
8
+ # x86 only for now
9
+
10
+ module Metasm
11
+ class DynLdr
12
+ # basic C defs for ruby internals - 1.8 and 1.9 compat - x86/x64
13
+ RUBY_H = <<EOS
14
+ #line #{__LINE__}
15
+ typedef uintptr_t VALUE;
16
+
17
+ #if defined(__PE__) && defined(__x86_64__)
18
+ // sonovabeep
19
+ #define INT2VAL(v) rb_ull2inum(v)
20
+ #define VAL2INT(v) rb_num2ull(v)
21
+ #else
22
+ #define INT2VAL(v) rb_uint2inum(v)
23
+ #define VAL2INT(v) rb_num2ulong(v)
24
+ #endif
25
+
26
+ struct rb_string_t {
27
+ VALUE flags;
28
+ VALUE klass;
29
+ VALUE len;
30
+ char *ptr;
31
+ union {
32
+ long capa;
33
+ VALUE shared;
34
+ } aux;
35
+ };
36
+ #define RString(x) ((struct rb_string_t *)(x))
37
+
38
+ struct rb_array_t {
39
+ VALUE flags;
40
+ VALUE klass;
41
+ VALUE len;
42
+ union {
43
+ long capa;
44
+ VALUE shared;
45
+ } aux;
46
+ VALUE *ptr;
47
+ };
48
+ #define RArray(x) ((struct rb_array_t *)(x))
49
+
50
+ // TODO improve autoimport to handle data imports correctly
51
+ extern VALUE *rb_cObject __attribute__((import));
52
+ extern VALUE *rb_eRuntimeError __attribute__((import));
53
+ extern VALUE *rb_eArgError __attribute__((import));
54
+
55
+ #define Qfalse ((VALUE)0)
56
+ #define Qtrue ((VALUE)2)
57
+ #define Qnil ((VALUE)4)
58
+
59
+ // allows generating a ruby1.9 dynldr.so from ruby1.8
60
+ #ifndef DYNLDR_RUBY_19
61
+ #define DYNLDR_RUBY_19 #{RUBY_VERSION >= '1.9' ? 1 : 0}
62
+ #endif
63
+
64
+ #if DYNLDR_RUBY_19
65
+ #define T_STRING 0x05
66
+ #define T_ARRAY 0x07
67
+ #define T_FIXNUM 0x15
68
+ #define T_MASK 0x1f
69
+ #define RSTRING_NOEMBED (1<<13)
70
+ #define STR_PTR(o) ((RString(o)->flags & RSTRING_NOEMBED) ? RString(o)->ptr : (char*)&RString(o)->len)
71
+ #define STR_LEN(o) ((RString(o)->flags & RSTRING_NOEMBED) ? RString(o)->len : (RString(o)->flags >> 14) & 0x1f)
72
+ #define RARRAY_EMBED (1<<13)
73
+ #define ARY_PTR(o) ((RArray(o)->flags & RARRAY_EMBED) ? (VALUE*)&RArray(o)->len : RArray(o)->ptr)
74
+ #define ARY_LEN(o) ((RArray(o)->flags & RARRAY_EMBED) ? ((RArray(o)->flags >> 15) & 3) : RArray(o)->len)
75
+ #else
76
+ #define T_STRING 0x07
77
+ #define T_ARRAY 0x09
78
+ #define T_FIXNUM 0x0a
79
+ #define T_MASK 0x3f
80
+ #define STR_PTR(o) (RString(o)->ptr)
81
+ #define STR_LEN(o) (RString(o)->len)
82
+ #define ARY_PTR(o) (RArray(o)->ptr)
83
+ #define ARY_LEN(o) (RArray(o)->len)
84
+ #endif
85
+
86
+ #define TYPE(x) (((VALUE)(x) & 1) ? T_FIXNUM : (((VALUE)(x) & 3) || ((VALUE)(x) < 7)) ? 0x40 : RString(x)->flags & T_MASK)
87
+
88
+ VALUE rb_uint2inum(VALUE);
89
+ VALUE rb_ull2inum(unsigned long long);
90
+ VALUE rb_num2ulong(VALUE);
91
+ unsigned long long rb_num2ull(VALUE);
92
+ VALUE rb_str_new(const char* ptr, long len); // alloc + memcpy + 0term
93
+ VALUE rb_ary_new2(int len);
94
+ VALUE rb_float_new(double);
95
+
96
+ VALUE rb_intern(char *);
97
+ VALUE rb_funcall(VALUE recv, VALUE id, int nargs, ...);
98
+ VALUE rb_const_get(VALUE, VALUE);
99
+ VALUE rb_raise(VALUE, char*, ...);
100
+ void rb_define_const(VALUE, char *, VALUE);
101
+ void rb_define_method(VALUE, char *, VALUE (*)(), int);
102
+ void rb_define_singleton_method(VALUE, char *, VALUE (*)(), int);
103
+
104
+ EOS
105
+
106
+ # generic C source for the native component, ruby glue
107
+ DYNLDR_C = <<EOS
108
+ #{RUBY_H}
109
+ #line #{__LINE__}
110
+
111
+ #ifdef __PE__
112
+ __stdcall uintptr_t LoadLibraryA(char *);
113
+ __stdcall uintptr_t GetProcAddress(uintptr_t, char *);
114
+
115
+ #define os_load_lib(l) LoadLibraryA(l)
116
+ #define os_load_sym(l, s) GetProcAddress(l, s)
117
+ #define os_load_sym_ord(l, s) GetProcAddress(l, (char*)s)
118
+ #endif
119
+
120
+ #ifdef __ELF__
121
+ asm(".pt_gnu_stack rw");
122
+
123
+ #define RTLD_LAZY 1
124
+ uintptr_t dlopen(char*, int);
125
+ uintptr_t dlsym(uintptr_t, char*);
126
+
127
+ #define os_load_lib(l) dlopen(l, RTLD_LAZY)
128
+ #define os_load_sym(l, s) dlsym(l, s)
129
+ #define os_load_sym_ord(l, s) 0U
130
+ #endif
131
+
132
+ extern int *cb_ret_table;
133
+ extern void *callback_handler;
134
+ extern void *callback_id_0;
135
+ extern void *callback_id_1;
136
+
137
+ static VALUE dynldr;
138
+
139
+
140
+ static VALUE memory_read(VALUE self, VALUE addr, VALUE len)
141
+ {
142
+ return rb_str_new((char*)VAL2INT(addr), (long)VAL2INT(len));
143
+ }
144
+
145
+ static VALUE memory_read_int(VALUE self, VALUE addr)
146
+ {
147
+ return INT2VAL(*(uintptr_t*)VAL2INT(addr));
148
+ }
149
+
150
+ static VALUE memory_write(VALUE self, VALUE addr, VALUE val)
151
+ {
152
+ if (TYPE(val) != T_STRING)
153
+ rb_raise(*rb_eArgError, "mem_write needs a String");
154
+
155
+ char *src = STR_PTR(val);
156
+ char *dst = (char*)VAL2INT(addr);
157
+ unsigned len = (unsigned)STR_LEN(val);
158
+ while (len--)
159
+ *dst++ = *src++;
160
+ return val;
161
+ }
162
+
163
+ static VALUE memory_write_int(VALUE self, VALUE addr, VALUE val)
164
+ {
165
+ *(uintptr_t *)VAL2INT(addr) = VAL2INT(val);
166
+ return Qtrue;
167
+ }
168
+
169
+ static VALUE str_ptr(VALUE self, VALUE str)
170
+ {
171
+ if (TYPE(str) != T_STRING)
172
+ rb_raise(*rb_eArgError, "Invalid ptr");
173
+ return INT2VAL((uintptr_t)STR_PTR(str));
174
+ }
175
+
176
+ // return the VALUE of an object (different of .object_id for Symbols, maybe others)
177
+ static VALUE rb_obj_to_value(VALUE self, VALUE obj)
178
+ {
179
+ return INT2VAL((uintptr_t)obj);
180
+ }
181
+
182
+ // return the ruby object at VALUE
183
+ // USE WITH CAUTION, passing invalid values will segfault the interpreter/GC
184
+ static VALUE rb_value_to_obj(VALUE self, VALUE val)
185
+ {
186
+ return VAL2INT(val);
187
+ }
188
+
189
+ // load a symbol from a lib byname, byordinal if integral
190
+ static VALUE sym_addr(VALUE self, VALUE lib, VALUE func)
191
+ {
192
+ uintptr_t h, p;
193
+
194
+ if (TYPE(lib) == T_STRING)
195
+ h = os_load_lib(STR_PTR(lib));
196
+ else if (TYPE(lib) == T_FIXNUM)
197
+ h = VAL2INT(lib);
198
+ else
199
+ rb_raise(*rb_eArgError, "Invalid lib");
200
+
201
+ if (TYPE(func) != T_STRING && TYPE(func) != T_FIXNUM)
202
+ rb_raise(*rb_eArgError, "Invalid func");
203
+
204
+ if (TYPE(func) == T_FIXNUM)
205
+ p = os_load_sym_ord(h, VAL2INT(func));
206
+ else
207
+ p = os_load_sym(h, STR_PTR(func));
208
+
209
+ return INT2VAL(p);
210
+ }
211
+
212
+ #ifdef __i386__
213
+
214
+ __int64 do_invoke_stdcall(unsigned, unsigned, unsigned*);
215
+ __int64 do_invoke_fastcall(unsigned, unsigned, unsigned*);
216
+ __int64 do_invoke(unsigned, unsigned, unsigned*);
217
+ double fake_float(void);
218
+
219
+ // invoke a symbol
220
+ // args is an array of Integers
221
+ // flags: 1 stdcall 2 fastcall 4 ret_64bits 8 ret_float
222
+ // TODO float args
223
+ static VALUE invoke(VALUE self, VALUE ptr, VALUE args, VALUE flags)
224
+ {
225
+ if (TYPE(args) != T_ARRAY || ARY_LEN(args) > 64)
226
+ rb_raise(*rb_eArgError, "bad args");
227
+
228
+ uintptr_t flags_v = VAL2INT(flags);
229
+ uintptr_t ptr_v = VAL2INT(ptr);
230
+ unsigned i, argsz;
231
+ uintptr_t args_c[64];
232
+ __int64 ret;
233
+
234
+ argsz = ARY_LEN(args);
235
+ for (i=0U ; i<argsz ; ++i)
236
+ args_c[i] = VAL2INT(ARY_PTR(args)[i]);
237
+
238
+ if (flags_v & 2)
239
+ ret = do_invoke_fastcall(ptr_v, argsz, args_c); // supercedes stdcall
240
+ else if (flags_v & 1)
241
+ ret = do_invoke_stdcall(ptr_v, argsz, args_c);
242
+ else
243
+ ret = do_invoke(ptr_v, argsz, args_c);
244
+
245
+ if (flags_v & 4)
246
+ return rb_ull2inum((unsigned __int64)ret);
247
+ else if (flags_v & 8)
248
+ // fake_float does nothing, to allow the compiler to use ST(0)
249
+ // which was in fact set by ptr_v()
250
+ return rb_float_new(fake_float());
251
+
252
+ return INT2VAL((unsigned)ret);
253
+ }
254
+
255
+ // this is the function that is called on behalf of all callbacks
256
+ // we're called through callback_handler (asm), itself called from the unique
257
+ // callback generated by callback_alloc
258
+ // heavy stack magick at work here !
259
+ // TODO float args / float retval / ret __int64
260
+ uintptr_t do_callback_handler(uintptr_t ori_retaddr, uintptr_t caller_id, uintptr_t arg0)
261
+ {
262
+ uintptr_t *addr = &arg0;
263
+ unsigned i, ret;
264
+ VALUE args = rb_ary_new2(8);
265
+
266
+ // copy our args to a ruby-accessible buffer
267
+ for (i=0U ; i<8U ; ++i)
268
+ ARY_PTR(args)[i] = INT2VAL(*addr++);
269
+ RArray(args)->len = 8U; // len == 8, no need to ARY_LEN/EMBED stuff
270
+
271
+ ret = rb_funcall(dynldr, rb_intern("callback_run"), 2, INT2VAL(caller_id), args);
272
+
273
+ // dynldr.callback will give us the arity (in bytes) of the callback in args[0]
274
+ // we just put the stack lifting offset in caller_id for the asm stub to use
275
+ caller_id = VAL2INT(ARY_PTR(args)[0]);
276
+
277
+ return VAL2INT(ret);
278
+ }
279
+
280
+ #elif defined __amd64__
281
+
282
+ uintptr_t do_invoke(uintptr_t, uintptr_t, uintptr_t*);
283
+ double fake_float(void);
284
+
285
+ // invoke a symbol
286
+ // args is an array of Integers
287
+ // flags: 1 stdcall 2 fastcall 4 ret_64bits 8 ret_float
288
+ // TODO float args
289
+ static VALUE invoke(VALUE self, VALUE ptr, VALUE args, VALUE flags)
290
+ {
291
+ if (TYPE(args) != T_ARRAY || ARY_LEN(args) > 16)
292
+ rb_raise(*rb_eArgError, "bad args");
293
+
294
+ uintptr_t flags_v = VAL2INT(flags);
295
+ uintptr_t ptr_v = VAL2INT(ptr);
296
+ int i, argsz;
297
+ uintptr_t args_c[16];
298
+ uintptr_t ret;
299
+ uintptr_t (*ptr_f)(uintptr_t, ...) = (void*)ptr_v;
300
+
301
+ argsz = (int)ARY_LEN(args);
302
+ for (i=0 ; i<argsz ; ++i)
303
+ args_c[i] = VAL2INT(ARY_PTR(args)[i]);
304
+
305
+ for (i=argsz ; i<16 ; ++i)
306
+ args_c[i] = 0;
307
+
308
+ if (argsz <= 4)
309
+ ret = ptr_f(args_c[0], args_c[1], args_c[2], args_c[3]);
310
+ else
311
+ ret = ptr_f(args_c[0], args_c[1], args_c[2], args_c[3],
312
+ args_c[4], args_c[5], args_c[6], args_c[7],
313
+ args_c[8], args_c[9], args_c[10], args_c[11],
314
+ args_c[12], args_c[13], args_c[14], args_c[15]);
315
+
316
+ if (flags_v & 8)
317
+ return rb_float_new(fake_float());
318
+
319
+ return INT2VAL(ret);
320
+ }
321
+
322
+ uintptr_t do_callback_handler(uintptr_t cb_id __attribute__((register(rax))),
323
+ uintptr_t arg0, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3,
324
+ uintptr_t arg4, uintptr_t arg5, uintptr_t arg6, uintptr_t arg7)
325
+ {
326
+ uintptr_t ret;
327
+ VALUE args = rb_ary_new2(8);
328
+ VALUE *ptr = ARY_PTR(args);
329
+
330
+ RArray(args)->len = 8;
331
+ ptr[0] = INT2VAL(arg0);
332
+ ptr[1] = INT2VAL(arg1);
333
+ ptr[2] = INT2VAL(arg2);
334
+ ptr[3] = INT2VAL(arg3);
335
+ ptr[4] = INT2VAL(arg4);
336
+ ptr[5] = INT2VAL(arg5);
337
+ ptr[6] = INT2VAL(arg6);
338
+ ptr[7] = INT2VAL(arg7);
339
+
340
+ ret = rb_funcall(dynldr, rb_intern("callback_run"), 2, INT2VAL(cb_id), args);
341
+
342
+ return VAL2INT(ret);
343
+ }
344
+ #endif
345
+
346
+ int Init_dynldr(void) __attribute__((export_as(Init_<insertfilenamehere>))) // to patch before parsing to match the .so name
347
+ {
348
+ dynldr = rb_const_get(rb_const_get(*rb_cObject, rb_intern("Metasm")), rb_intern("DynLdr"));
349
+ rb_define_singleton_method(dynldr, "memory_read", memory_read, 2);
350
+ rb_define_singleton_method(dynldr, "memory_read_int", memory_read_int, 1);
351
+ rb_define_singleton_method(dynldr, "memory_write", memory_write, 2);
352
+ rb_define_singleton_method(dynldr, "memory_write_int", memory_write_int, 2);
353
+ rb_define_singleton_method(dynldr, "str_ptr", str_ptr, 1);
354
+ rb_define_singleton_method(dynldr, "rb_obj_to_value", rb_obj_to_value, 1);
355
+ rb_define_singleton_method(dynldr, "rb_value_to_obj", rb_value_to_obj, 1);
356
+ rb_define_singleton_method(dynldr, "sym_addr", sym_addr, 2);
357
+ rb_define_singleton_method(dynldr, "raw_invoke", invoke, 3);
358
+ rb_define_const(dynldr, "CALLBACK_TARGET",
359
+ #ifdef __i386__
360
+ INT2VAL((VALUE)&callback_handler));
361
+ #elif defined __amd64__
362
+ INT2VAL((VALUE)&do_callback_handler));
363
+ #endif
364
+ rb_define_const(dynldr, "CALLBACK_ID_0", INT2VAL((VALUE)&callback_id_0));
365
+ rb_define_const(dynldr, "CALLBACK_ID_1", INT2VAL((VALUE)&callback_id_1));
366
+ return 0;
367
+ }
368
+ EOS
369
+
370
+ # see the note in compile_bin_module
371
+ # this is a dynamic resolver for the ruby symbols we use
372
+ DYNLDR_C_PE_HACK = <<EOS
373
+ #line #{__LINE__}
374
+
375
+ void* get_peb(void);
376
+
377
+ // check if the wstr s1 contains 'ruby' (case-insensitive)
378
+ static void *wstrcaseruby(short *s1, int len)
379
+ {
380
+ int i = 0;
381
+ int match = 0;
382
+
383
+ static char *want = "ruby"; // cant contain the same letter twice
384
+
385
+ while (i < len) {
386
+ if (want[match] == (s1[i] | 0x20)) { // downcase cmp
387
+ if (match == 3)
388
+ return s1+i-match;
389
+ } else
390
+ match = 0;
391
+ if (want[match] == (s1[i] | 0x20))
392
+ ++match;
393
+ ++i;
394
+ }
395
+
396
+ return 0;
397
+ }
398
+
399
+ asm(".text"); // TODO fix compiler
400
+ #ifdef __x86_64__
401
+ asm("get_peb: mov rax, gs:[60h] ret");
402
+ #endif
403
+ #ifdef __i386__
404
+ asm("get_peb: mov eax, fs:[30h] ret");
405
+
406
+ // 1st arg for ld_rb_imp == Init retaddr
407
+ asm("Init_dynldr: call load_ruby_imports jmp Init_dynldr_real");
408
+ #endif
409
+
410
+ struct _lmodule {
411
+ struct _lmodule *next; // list_head
412
+ void *; void *; void*; void*; void*;
413
+ uintptr_t base, entry, size;
414
+ short; short; short*;
415
+ short len, maxlen;
416
+ short *basename;
417
+ };
418
+
419
+ struct _peb {
420
+ void*; void*; void*;
421
+ struct {
422
+ int; int; void*;
423
+ struct _lmodule *inloadorder; // list_head
424
+ } *ldr;
425
+ };
426
+
427
+ // find the ruby library in the loaded modules list of the interpreter through the PEB
428
+ static uintptr_t find_ruby_module_peb(void)
429
+ {
430
+ struct _lmodule *ptr;
431
+ void *base;
432
+ struct _peb *peb = get_peb();
433
+
434
+ base = &peb->ldr->inloadorder;
435
+ ptr = ((struct _lmodule *)base)->next;
436
+ ptr = ptr->next; // skip the first entry = ruby.exe
437
+ while (ptr != base) {
438
+ if (wstrcaseruby(ptr->basename, ptr->len/2))
439
+ return ptr->base;
440
+ ptr = ptr->next;
441
+ }
442
+
443
+ return 0;
444
+ }
445
+
446
+ // find the ruby library from an address in the ruby module (Init_dynldr retaddr)
447
+ static uintptr_t find_ruby_module_mem(uintptr_t someaddr)
448
+ {
449
+ // could __try{}, but with no imports we're useless anyway.
450
+ uintptr_t ptr = someaddr & (-0x10000);
451
+ while (*((unsigned __int16 *)ptr) != 'ZM') // XXX too weak?
452
+ ptr -= 0x10000;
453
+ return ptr;
454
+ }
455
+
456
+ // a table of string offsets, base = the table itself
457
+ // each entry is a ruby function, whose address is to be put inplace in the table
458
+ // last entry == 0
459
+ extern void *ruby_import_table;
460
+
461
+ __stdcall uintptr_t GetProcAddress(uintptr_t, char *);
462
+ // resolve the ruby imports found by offset in ruby_import_table
463
+ int load_ruby_imports(uintptr_t rbaddr)
464
+ {
465
+ uintptr_t ruby_module;
466
+ uintptr_t *ptr;
467
+ char *table;
468
+
469
+ static int loaded_ruby_imports = 0;
470
+ if (loaded_ruby_imports)
471
+ return 0;
472
+ loaded_ruby_imports = 1;
473
+
474
+ if (rbaddr)
475
+ ruby_module = find_ruby_module_mem(rbaddr);
476
+ else
477
+ ruby_module = find_ruby_module_peb();
478
+
479
+ if (!ruby_module)
480
+ return 0;
481
+
482
+ ptr = &ruby_import_table;
483
+ table = (char*)ptr;
484
+
485
+ while (*ptr) {
486
+ if (!(*ptr = GetProcAddress(ruby_module, table+*ptr)))
487
+ // TODO warning or something
488
+ return 0;
489
+ ptr++;
490
+ }
491
+
492
+ return 1;
493
+ }
494
+
495
+ #ifdef __x86_64__
496
+ #define DLL_PROCESS_ATTACH 1
497
+ __stdcall int DllMain(void *handle, int reason, void *res)
498
+ {
499
+ if (reason == DLL_PROCESS_ATTACH)
500
+ return load_ruby_imports(0);
501
+ return 1;
502
+ }
503
+ #endif
504
+ EOS
505
+
506
+ # ia32 asm source for the native component: handles ABI stuff
507
+ DYNLDR_ASM_IA32 = <<EOS
508
+ .text
509
+ do_invoke_fastcall:
510
+ push ebp
511
+ mov ebp, esp
512
+
513
+ // load ecx/edx, fix arg/argcount
514
+ mov eax, [ebp+16]
515
+ mov ecx, [eax]
516
+ mov edx, [eax+4]
517
+ add eax, 8
518
+ mov [ebp+16], eax
519
+
520
+ mov eax, [ebp+12]
521
+ sub eax, 2
522
+ jb _do_invoke_call
523
+ jmp _do_invoke_copy
524
+
525
+ do_invoke:
526
+ do_invoke_stdcall:
527
+ push ebp
528
+ mov ebp, esp
529
+ mov eax, [ebp+12]
530
+ _do_invoke_copy:
531
+ // make room for args
532
+ shl eax, 2
533
+ jz _do_invoke_call
534
+ sub esp, eax
535
+ // copy args
536
+ push esi
537
+ push edi
538
+ push ecx
539
+ mov ecx, [ebp+12]
540
+ mov esi, [ebp+16]
541
+ mov edi, esp
542
+ add edi, 12
543
+ rep movsd
544
+ pop ecx
545
+ pop edi
546
+ pop esi
547
+ // go
548
+ _do_invoke_call:
549
+ call dword ptr [ebp+8]
550
+ leave
551
+ fake_float:
552
+ ret
553
+
554
+ // entrypoint for callbacks: to the native api, give the addr of some code
555
+ // that will push a unique cb_identifier and jmp here
556
+ callback_handler:
557
+ // stack here: cb_id_retaddr, cb_native_retaddr, cb_native_arg0, ...
558
+ // swap caller retaddr & cb_identifier, fix cb_identifier from the stub
559
+ pop eax // stuff pushed by the stub
560
+ sub eax, callback_id_1 - callback_id_0 // fixup cb_id_retaddr to get a cb id
561
+ xchg eax, [esp] // put on stack, retrieve original retaddr
562
+ push eax // push intended cb retaddr
563
+ call do_callback_handler
564
+ // do_cb_handler puts the nr of bytes we have to pop from the stack in its 1st arg (eg [esp+4] here)
565
+ // stack here: cb_native_retaddr, ruby_popcount, cb_native_arg0, ...
566
+ pop ecx // get retaddr w/o interfering with retval (incl 64bits eax+edx)
567
+ add esp, [esp] // pop cb args if stdcall
568
+ add esp, 4 // pop cb_id/popcount
569
+ jmp ecx // return
570
+
571
+ // those are valid callback id
572
+ // most of the time only 2 cb is used (source: meearse)
573
+ // so this prevents dynamic allocation of a whole page for the most common case
574
+ callback_id_0: call callback_handler
575
+ callback_id_1: call callback_handler
576
+ EOS
577
+
578
+ # ia32 asm source for the native component: handles ABI stuff
579
+ DYNLDR_ASM_X86_64 = <<EOS
580
+ .text
581
+ fake_float:
582
+ ret
583
+
584
+ // entrypoint for callbacks: to the native api, give the addr of some code
585
+ // that will save its address in rax and jump to do_cb_h
586
+ callback_id_0:
587
+ lea rax, [rip-$_+callback_id_0]
588
+ jmp do_callback_handler
589
+ callback_id_1:
590
+ lea rax, [rip-$_+callback_id_1]
591
+ jmp do_callback_handler
592
+ EOS
593
+
594
+ # initialization
595
+ # load (build if needed) the binary module
596
+ def self.start
597
+ # callbacks are really just a list of asm 'call', so we share them among subclasses of DynLdr
598
+ @@callback_addrs = [] # list of all allocated callback addrs (in use or not)
599
+ @@callback_table = {} # addr -> cb structure (inuse only)
600
+
601
+ binmodule = find_bin_path
602
+
603
+ if not File.exists?(binmodule) or File.stat(binmodule).mtime < File.stat(__FILE__).mtime
604
+ compile_binary_module(host_exe, host_cpu, binmodule)
605
+ end
606
+
607
+ require binmodule
608
+
609
+ @@callback_addrs << CALLBACK_ID_0 << CALLBACK_ID_1
610
+ end
611
+
612
+ # compile the dynldr binary ruby module for a specific arch/cpu/modulename
613
+ def self.compile_binary_module(exe, cpu, modulename)
614
+ bin = exe.new(cpu)
615
+ # compile the C code, but patch the Init_ export name, which must match the string used in 'require'
616
+ module_c_src = DYNLDR_C.gsub('<insertfilenamehere>', File.basename(modulename, '.so'))
617
+ bin.compile_c module_c_src
618
+ # compile the Asm stuff according to the target architecture
619
+ bin.assemble case cpu.shortname
620
+ when 'ia32'; DYNLDR_ASM_IA32
621
+ when 'x64'; DYNLDR_ASM_X86_64
622
+ end
623
+
624
+ # tweak the resulting binary linkage procedures if needed
625
+ compile_binary_module_hack(bin)
626
+
627
+ # save the shared library
628
+ bin.encode_file(modulename, :lib)
629
+ end
630
+
631
+ def self.compile_binary_module_hack(bin)
632
+ # this is a hack
633
+ # we need the module to use ruby symbols
634
+ # but we don't know the actual ruby lib filename (depends on ruby version, # platform, ...)
635
+ case bin.shortname
636
+ when 'elf'
637
+ # we know the lib is already loaded by the main ruby executable, no DT_NEEDED needed
638
+ class << bin
639
+ def automagic_symbols(*a)
640
+ # do the plt generation
641
+ super(*a)
642
+ # but remove the specific lib names
643
+ @tag.delete 'NEEDED'
644
+ end
645
+ end
646
+ return
647
+ when 'coff'
648
+ # the hard part, see below
649
+ else
650
+ # unhandled arch, dont tweak
651
+ return
652
+ end
653
+
654
+ # we remove the PE IAT section related to ruby symbols, and make
655
+ # a manual symbol resolution on module loading.
656
+
657
+ # populate the ruby import table ourselves on module loading
658
+ bin.imports.delete_if { |id| id.libname =~ /ruby/ }
659
+
660
+ # we generate something like:
661
+ # .data
662
+ # ruby_import_table:
663
+ # rb_cObject dd str_rb_cObject - ruby_import_table
664
+ # riat_rb_intern dd str_rb_intern - ruby_import_table
665
+ # dd 0
666
+ #
667
+ # .rodata
668
+ # str_rb_cObject db "rb_cObject", 0
669
+ # str_rb_intern db "rb_intern", 0
670
+ #
671
+ # .text
672
+ # rb_intern: jmp [riat_rb_intern]
673
+ #
674
+ # the PE_HACK code will parse ruby_import_table and make the symbol resolution on startup
675
+
676
+ # setup the string table and the thunks
677
+ text = bin.sections.find { |s| s.name == '.text' }.encoded
678
+ rb_syms = text.reloc_externals.grep(/^rb_/)
679
+
680
+ dd = (bin.cpu.size == 64 ? 'dq' : 'dd')
681
+
682
+ init_symbol = text.export.keys.grep(/^Init_/).first
683
+ raise 'no Init_mname symbol found' if not init_symbol
684
+ if bin.cpu.size == 32
685
+ # hax to find the base of libruby under Win98 (peb sux)
686
+ text.export[init_symbol + '_real'] = text.export.delete(init_symbol)
687
+ bin.unique_labels_cache.delete(init_symbol)
688
+ end
689
+
690
+ # the C glue: getprocaddress etc
691
+ bin.compile_c DYNLDR_C_PE_HACK.gsub('Init_dynldr', init_symbol)
692
+
693
+ # the IAT, initialized with relative offsets to symbol names
694
+ asm_table = ['.data', '.align 8', 'ruby_import_table:']
695
+ # strings will be in .rodata
696
+ bin.parse('.rodata')
697
+ rb_syms.each { |sym|
698
+ # raw symbol name
699
+ str_label = bin.parse_new_label('str', "db #{sym.inspect}, 0")
700
+
701
+ if sym !~ /^rb_[ce][A-Z]/
702
+ # if we dont reference a data import (rb_cClass / rb_eException),
703
+ # then create a function thunk
704
+ i = PE::ImportDirectory::Import.new
705
+ i.thunk = sym
706
+ sym = i.target = 'riat_' + str_label
707
+ bin.arch_encode_thunk(text, i) # encode a jmp [importtable]
708
+ end
709
+
710
+ # update the IAT
711
+ asm_table << "#{sym} #{dd} #{str_label} - ruby_import_table"
712
+ }
713
+ # IAT null-terminated
714
+ asm_table << "#{dd} 0"
715
+
716
+ # now parse & assemble the IAT in .data
717
+ bin.assemble asm_table.join("\n")
718
+ end
719
+
720
+ # find the path of the binary module
721
+ # if none exists, create a path writeable by the current user
722
+ def self.find_bin_path
723
+ fname = ['dynldr', host_arch, host_cpu.shortname,
724
+ ('19' if RUBY_VERSION >= '1.9')].compact.join('-') + '.so'
725
+ dir = File.dirname(__FILE__)
726
+ binmodule = File.join(dir, fname)
727
+ if not File.exists? binmodule or File.stat(binmodule).mtime < File.stat(__FILE__).mtime
728
+ if not dir = find_write_dir
729
+ raise LoadError, "no writable dir to put the DynLdr ruby module, try to run as root"
730
+ end
731
+ binmodule = File.join(dir, fname)
732
+ end
733
+ binmodule
734
+ end
735
+
736
+ # find a writeable directory
737
+ # searches this script directory, $HOME / %APPDATA% / %USERPROFILE%, or $TMP
738
+ def self.find_write_dir
739
+ writable = lambda { |d|
740
+ begin
741
+ foo = '/_test_write_' + rand(1<<32).to_s
742
+ true if File.writable?(d) and
743
+ File.open(d+foo, 'w') { true } and
744
+ File.unlink(d+foo)
745
+ rescue
746
+ end
747
+ }
748
+ dir = File.dirname(__FILE__)
749
+ return dir if writable[dir]
750
+ dir = ENV['HOME'] || ENV['APPDATA'] || ENV['USERPROFILE']
751
+ if writable[dir]
752
+ dir = File.join(dir, '.metasm')
753
+ Dir.mkdir dir if not File.directory? dir
754
+ return dir
755
+ end
756
+ ENV['TMP'] || ENV['TEMP'] || '.'
757
+ end
758
+
759
+ # CPU suitable for compiling code for the current running host
760
+ def self.host_cpu
761
+ @cpu ||=
762
+ case RUBY_PLATFORM
763
+ when /i[3-6]86/; Ia32.new
764
+ when /x86_64|x64/i; X86_64.new
765
+ else raise LoadError, "Unsupported host platform #{RUBY_PLATFORM}"
766
+ end
767
+ end
768
+
769
+ # returns whether we run on linux or windows
770
+ def self.host_arch
771
+ case RUBY_PLATFORM
772
+ when /linux/i; :linux
773
+ when /mswin|mingw|cygwin/i; :windows
774
+ else raise LoadError, "Unsupported host platform #{RUBY_PLATFORM}"
775
+ end
776
+ end
777
+
778
+ # ExeFormat suitable as current running host native module
779
+ def self.host_exe
780
+ case host_arch
781
+ when :linux; ELF
782
+ when :windows; PE
783
+ end
784
+ end
785
+
786
+ # parse a C string into the @cp parser, create it if needed
787
+ def self.parse_c(src)
788
+ cp.parse(src)
789
+ end
790
+
791
+ # compile a C fragment into a Shellcode, honors the host ABI
792
+ def self.compile_c(src)
793
+ # XXX could we reuse self.cp ? (for its macros etc)
794
+ cp = C::Parser.new(host_exe.new(host_cpu))
795
+ cp.parse(src)
796
+ sc = Shellcode.new(host_cpu)
797
+ asm = host_cpu.new_ccompiler(cp, sc).compile
798
+ sc.assemble(asm)
799
+ end
800
+
801
+ # retrieve the library where a symbol is to be found (uses AutoImport)
802
+ def self.lib_from_sym(symname)
803
+ case host_arch
804
+ when :linux; GNUExports::EXPORT
805
+ when :windows; WindowsExports::EXPORT
806
+ end[symname]
807
+ end
808
+
809
+ # reads a bunch of C code, creates binding for those according to the prototypes
810
+ # handles enum/defines to define constants
811
+ # For each toplevel method prototype, it generates a ruby method in this module, the name is lowercased
812
+ # For each numeric macro/enum, it also generates an uppercase named constant
813
+ # When such a function is called with a lambda as argument, a callback is created for the duration of the call
814
+ # and destroyed afterwards ; use callback_alloc_c to get a callback id with longer life span
815
+ def self.new_api_c(proto, fromlib=nil)
816
+ proto += "\n;" # allow 'int foo()' and '#include <bar>'
817
+ parse_c(proto)
818
+
819
+ cp.toplevel.symbol.dup.each_value { |v|
820
+ next if not v.kind_of? C::Variable # enums
821
+ cp.toplevel.symbol.delete v.name
822
+ lib = fromlib || lib_from_sym(v.name)
823
+ addr = sym_addr(lib, v.name)
824
+ if addr == 0 or addr == -1 or addr == 0xffff_ffff or addr == 0xffffffff_ffffffff
825
+ api_not_found(lib, v)
826
+ next
827
+ end
828
+
829
+ rbname = c_func_name_to_rb(v.name)
830
+ if not v.type.kind_of? C::Function
831
+ # not a function, simply return the symbol address
832
+ # TODO struct/table access through hash/array ?
833
+ class << self ; self ; end.send(:define_method, rbname) { addr }
834
+ next
835
+ end
836
+ next if v.initializer # inline & stuff
837
+ puts "new_api_c: load method #{rbname} from #{lib}" if $DEBUG
838
+
839
+ new_caller_for(v, rbname, addr)
840
+ }
841
+
842
+ # predeclare constants from enums
843
+ # macros are handled in const_missing (too slow to (re)do here everytime)
844
+ # TODO #define FOO(v) (v<<1)|1 => create ruby counterpart
845
+ cexist = constants.inject({}) { |h, c| h.update c.to_s => true }
846
+ cp.toplevel.symbol.each { |k, v|
847
+ if v.kind_of? ::Integer
848
+ n = c_const_name_to_rb(k)
849
+ const_set(n, v) if v.kind_of? Integer and not cexist[n]
850
+ end
851
+ }
852
+
853
+ # avoid WTF rb warning: toplevel const TRUE referenced by WinAPI::TRUE
854
+ cp.lexer.definition.each_key { |k|
855
+ n = c_const_name_to_rb(k)
856
+ if not cexist[n] and Object.const_defined?(n) and v = @cp.macro_numeric(n)
857
+ const_set(n, v)
858
+ end
859
+ }
860
+ end
861
+
862
+ # const_missing handler: will try to find a matching #define
863
+ def self.const_missing(c)
864
+ # infinite loop on autorequire C..
865
+ return super(c) if not defined? @cp or not @cp
866
+
867
+ cs = c.to_s
868
+ if @cp.lexer.definition[cs]
869
+ m = cs
870
+ else
871
+ m = @cp.lexer.definition.keys.find { |k| c_const_name_to_rb(k) == cs }
872
+ end
873
+
874
+ if m and v = @cp.macro_numeric(m)
875
+ const_set(c, v)
876
+ v
877
+ else
878
+ super(c)
879
+ end
880
+ end
881
+
882
+ # when defining ruby wrapper for C methods, the ruby method name is the string returned by this function from the C name
883
+ def self.c_func_name_to_rb(name)
884
+ n = name.to_s.gsub(/[^a-z0-9_]/i) { |c| c.unpack('H*')[0] }.downcase
885
+ n = "m#{n}" if n !~ /^[a-z]/
886
+ n
887
+ end
888
+
889
+ # when defining ruby wrapper for C constants (numeric define/enum), the ruby const name is
890
+ # the string returned by this function from the C name. It should follow ruby standards (1st letter upcase)
891
+ def self.c_const_name_to_rb(name)
892
+ n = name.to_s.gsub(/[^a-z0-9_]/i) { |c| c.unpack('H*')[0] }.upcase
893
+ n = "C#{n}" if n !~ /^[A-Z]/
894
+ n
895
+ end
896
+
897
+ def self.api_not_found(lib, func)
898
+ raise "could not find symbol #{func.name.inspect} in #{lib.inspect}"
899
+ end
900
+
901
+ # called whenever a native API is called through new_api_c/new_func_c/etc
902
+ def self.trace_invoke(api, args)
903
+ #p api
904
+ end
905
+
906
+ # define a new method 'name' in the current module to invoke the raw method at addr addr
907
+ # translates ruby args to raw args using the specified prototype
908
+ def self.new_caller_for(proto, name, addr)
909
+ flags = 0
910
+ flags |= 1 if proto.has_attribute('stdcall')
911
+ flags |= 2 if proto.has_attribute('fastcall')
912
+ flags |= 4 if proto.type.type.integral? and cp.sizeof(nil, proto.type.type) == 8
913
+ flags |= 8 if proto.type.type.float?
914
+ class << self ; self ; end.send(:define_method, name) { |*a|
915
+ raise ArgumentError, "bad arg count for #{name}: #{a.length} for #{proto.type.args.length}" if a.length != proto.type.args.length and not proto.type.varargs
916
+
917
+ # convert the arglist suitably for raw_invoke
918
+ auto_cb = [] # list of automatic C callbacks generated from lambdas
919
+ a = a.zip(proto.type.args).map { |ra, fa|
920
+ aa = convert_rb2c(fa, ra, :cb_list => auto_cb)
921
+ if fa and fa.type.integral? and cp.sizeof(fa) == 8 and host_cpu.size == 32
922
+ aa = [aa & 0xffff_ffff, (aa >> 32) & 0xffff_ffff]
923
+ aa.reverse! if host_cpu.endianness != :little
924
+ end
925
+ aa
926
+ }.flatten
927
+
928
+ trace_invoke(name, a)
929
+ # do it
930
+ ret = raw_invoke(addr, a, flags)
931
+
932
+ # cleanup autogenerated callbacks
933
+ auto_cb.each { |cb| callback_free(cb) }
934
+
935
+ # interpret return value
936
+ ret = convert_ret_c2rb(proto, ret)
937
+ }
938
+ end
939
+
940
+ # ruby object -> integer suitable as arg for raw_invoke
941
+ def self.convert_rb2c(formal, val, opts=nil)
942
+ case val
943
+ when String; str_ptr(val)
944
+ when Proc; cb = callback_alloc_cobj(formal, val) ; (opts[:cb_list] << cb if opts and opts[:cb_list]) ; cb
945
+ when C::AllocCStruct; str_ptr(val.str) + val.stroff
946
+ when Hash
947
+ if not formal.type.pointed.kind_of?(C::Struct)
948
+ raise "invalid argument #{val.inspect} for #{formal}, need a struct*"
949
+ end
950
+ buf = cp.alloc_c_struct(formal, val)
951
+ val.instance_variable_set('@rb2c', buf) # GC trick: lifetime(buf) >= lifetime(hash) (XXX or until next call to convert_rb2c)
952
+ str_ptr(buf.str)
953
+ #when Float; val # TODO handle that in raw_invoke C code
954
+ else
955
+ v = val.to_i rescue 0 # NaN, Infinity, etc
956
+ v = -v if v == -(1<<(cp.typesize[:ptr]*8-1)) # ruby bug... raise -0x8000_0000: out of ulong range
957
+ v
958
+ end
959
+ end
960
+
961
+ # this method is called from the C part to run the ruby code corresponding to
962
+ # a given C callback allocated by callback_alloc_c
963
+ def self.callback_run(id, args)
964
+ cb = @@callback_table[id]
965
+ raise "invalid callback #{'%x' % id} not in #{@@callback_table.keys.map { |c| c.to_s(16) }}" if not cb
966
+
967
+ rawargs = args.dup
968
+ ra = cb[:proto] ? cb[:proto].args.map { |fa| convert_cbargs_c2rb(fa, rawargs) } : []
969
+
970
+ # run it
971
+ ret = cb[:proc].call(*ra)
972
+
973
+ # the C code expects to find in args[0] the amount of stack fixing needed for __stdcall callbacks
974
+ args[0] = cb[:abi_stackfix] || 0
975
+ ret
976
+ end
977
+
978
+ # C raw cb arg -> ruby object
979
+ # will combine 2 32bit values for 1 64bit arg
980
+ def self.convert_cbargs_c2rb(formal, rawargs)
981
+ val = rawargs.shift
982
+ if formal.type.integral? and cp.sizeof(formal) == 8 and host_cpu.size == 32
983
+ if host.cpu.endianness == :little
984
+ val |= rawargs.shift << 32
985
+ else
986
+ val = (val << 32) | rawargs.shift
987
+ end
988
+ end
989
+
990
+ convert_c2rb(formal, val)
991
+ end
992
+
993
+ # interpret a raw decoded C value to a ruby value according to the C prototype
994
+ # handles signedness etc
995
+ # XXX val is an integer, how to decode Floats etc ? raw binary ptr ?
996
+ def self.convert_c2rb(formal, val)
997
+ formal = formal.type if formal.kind_of? C::Variable
998
+ val = Expression.make_signed(val, 8*cp.sizeof(formal)) if formal.integral? and formal.signed?
999
+ val = nil if formal.pointer? and val == 0
1000
+ val
1001
+ end
1002
+
1003
+ # C raw ret -> ruby obj
1004
+ # can be overridden for system-specific calling convention (eg return 0/-1 => raise an error)
1005
+ def self.convert_ret_c2rb(fproto, ret)
1006
+ fproto = fproto.type if fproto.kind_of? C::Variable
1007
+ convert_c2rb(fproto.untypedef.type, ret)
1008
+ end
1009
+
1010
+ def self.cp ; @cp ||= C::Parser.new(host_exe.new(host_cpu)) ; end
1011
+ def self.cp=(c); @cp = c ; end
1012
+
1013
+ # allocate a callback for a given C prototype (string)
1014
+ # accepts full C functions (with body) (only 1 at a time) or toplevel 'asm' statement
1015
+ def self.callback_alloc_c(proto, &b)
1016
+ proto += ';' # allow 'int foo()'
1017
+ parse_c(proto)
1018
+ v = cp.toplevel.symbol.values.find_all { |v_| v_.kind_of? C::Variable and v_.type.kind_of? C::Function }.first
1019
+ if (v and v.initializer) or cp.toplevel.statements.find { |st| st.kind_of? C::Asm }
1020
+ cp.toplevel.statements.delete_if { |st| st.kind_of? C::Asm }
1021
+ cp.toplevel.symbol.delete v.name if v
1022
+ sc = compile_c(proto)
1023
+ ptr = memory_alloc(sc.encoded.length)
1024
+ sc.base_addr = ptr
1025
+ # TODO fixup external calls
1026
+ memory_write ptr, sc.encode_string
1027
+ memory_perm ptr, sc.encoded.length, 'rwx'
1028
+ ptr
1029
+ elsif not v
1030
+ raise 'empty prototype'
1031
+ else
1032
+ cp.toplevel.symbol.delete v.name
1033
+ callback_alloc_cobj(v, b)
1034
+ end
1035
+ end
1036
+
1037
+ # allocates a callback for a given C prototype (C variable, pointer to func accepted)
1038
+ def self.callback_alloc_cobj(proto, b)
1039
+ ori = proto
1040
+ proto = proto.type if proto and proto.kind_of? C::Variable
1041
+ proto = proto.pointed while proto and proto.pointer?
1042
+ id = callback_find_id
1043
+ cb = {}
1044
+ cb[:id] = id
1045
+ cb[:proc] = b
1046
+ cb[:proto] = proto
1047
+ cb[:abi_stackfix] = proto.args.inject(0) { |s, a| s + [cp.sizeof(a), cp.typesize[:ptr]].max } if ori and ori.has_attribute('stdcall')
1048
+ cb[:abi_stackfix] = proto.args[2..-1].to_a.inject(0) { |s, a| s + [cp.sizeof(a), cp.typesize[:ptr]].max } if ori and ori.has_attribute('fastcall') # supercedes stdcall
1049
+ @@callback_table[id] = cb
1050
+ id
1051
+ end
1052
+
1053
+ # releases a callback id, so that it may be reused by a later callback_alloc
1054
+ def self.callback_free(id)
1055
+ @@callback_table.delete id
1056
+ end
1057
+
1058
+ # finds a free callback id, allocates a new page if needed
1059
+ def self.callback_find_id
1060
+ if not id = @@callback_addrs.find { |a| not @@callback_table[a] }
1061
+ cb_page = memory_alloc(4096)
1062
+ sc = Shellcode.new(host_cpu, cb_page)
1063
+ case sc.cpu.shortname
1064
+ when 'ia32'
1065
+ addr = cb_page
1066
+ nrcb = 128 # TODO should be 4096/5, but the parser/compiler is really too slow
1067
+ nrcb.times {
1068
+ @@callback_addrs << addr
1069
+ sc.parse "call #{CALLBACK_TARGET}"
1070
+ addr += 5
1071
+ }
1072
+ when 'x64'
1073
+ addr = cb_page
1074
+ nrcb = 128 # same remark
1075
+ nrcb.times {
1076
+ @@callback_addrs << addr
1077
+ sc.parse "1: lea rax, [rip-$_+1b] jmp #{CALLBACK_TARGET}"
1078
+ addr += 12 # XXX approximative..
1079
+ }
1080
+ end
1081
+ sc.assemble
1082
+ memory_write cb_page, sc.encode_string
1083
+ memory_perm cb_page, 4096, 'rx'
1084
+ raise 'callback_alloc bouh' if not id = @@callback_addrs.find { |a| not @@callback_table[a] }
1085
+ end
1086
+ id
1087
+ end
1088
+
1089
+ # compile a bunch of C functions, defines methods in this module to call them
1090
+ # returns the raw pointer to the code page
1091
+ # if given a block, run the block and then undefine all the C functions & free memory
1092
+ def self.new_func_c(src)
1093
+ sc = compile_c(src)
1094
+ ptr = memory_alloc(sc.encoded.length)
1095
+ sc.base_addr = ptr
1096
+ bd = sc.encoded.binding(ptr)
1097
+ sc.encoded.reloc_externals.uniq.each { |ext| bd[ext] = sym_addr(lib_from_sym(ext), ext) or raise "unknown symbol #{ext}" }
1098
+ sc.encoded.fixup(bd)
1099
+ memory_write ptr, sc.encode_string
1100
+ memory_perm ptr, sc.encoded.length, 'rwx'
1101
+ parse_c(src) # XXX the Shellcode parser may have defined stuff / interpreted C another way...
1102
+ defs = []
1103
+ cp.toplevel.symbol.dup.each_value { |v|
1104
+ next if not v.kind_of? C::Variable
1105
+ cp.toplevel.symbol.delete v.name
1106
+ next if not v.type.kind_of? C::Function or not v.initializer
1107
+ next if not off = sc.encoded.export[v.name]
1108
+ rbname = c_func_name_to_rb(v.name)
1109
+ new_caller_for(v, rbname, ptr+off)
1110
+ defs << rbname
1111
+ }
1112
+ if block_given?
1113
+ begin
1114
+ yield
1115
+ ensure
1116
+ defs.each { |d| class << self ; self ; end.send(:remove_method, d) }
1117
+ memory_free ptr
1118
+ end
1119
+ else
1120
+ ptr
1121
+ end
1122
+ end
1123
+
1124
+ # compile an asm sequence, callable with the ABI of the C prototype given
1125
+ # function name comes from the prototype
1126
+ def self.new_func_asm(proto, asm)
1127
+ proto += "\n;"
1128
+ old = cp.toplevel.symbol.keys
1129
+ parse_c(proto)
1130
+ news = cp.toplevel.symbol.keys - old
1131
+ raise "invalid proto #{proto}" if news.length != 1
1132
+ f = cp.toplevel.symbol[news.first]
1133
+ raise "invalid func proto #{proto}" if not f.name or not f.type.kind_of? C::Function or f.initializer
1134
+ cp.toplevel.symbol.delete f.name
1135
+
1136
+ sc = Shellcode.assemble(host_cpu, asm)
1137
+ ptr = memory_alloc(sc.encoded.length)
1138
+ bd = sc.encoded.binding(ptr)
1139
+ sc.encoded.reloc_externals.uniq.each { |ext| bd[ext] = sym_addr(lib_from_sym(ext), ext) or raise "unknown symbol #{ext}" }
1140
+ sc.encoded.fixup(bd)
1141
+ memory_write ptr, sc.encode_string
1142
+ memory_perm ptr, sc.encoded.length, 'rwx'
1143
+ rbname = c_func_name_to_rb(f.name)
1144
+ new_caller_for(f, rbname, ptr)
1145
+ if block_given?
1146
+ begin
1147
+ yield
1148
+ ensure
1149
+ class << self ; self ; end.send(:remove_method, rbname)
1150
+ memory_free ptr
1151
+ end
1152
+ else
1153
+ ptr
1154
+ end
1155
+ end
1156
+
1157
+ # allocate a C::AllocCStruct to hold a specific struct defined in a previous new_api_c
1158
+ def self.alloc_c_struct(structname, values={})
1159
+ cp.alloc_c_struct(structname, values)
1160
+ end
1161
+
1162
+ # return a C::AllocCStruct mapped over the string (with optionnal offset)
1163
+ # str may be an EncodedData
1164
+ def self.decode_c_struct(structname, str, off=0)
1165
+ str = str.data if str.kind_of? EncodedData
1166
+ cp.decode_c_struct(structname, str, off)
1167
+ end
1168
+
1169
+ # allocate a C::AllocCStruct holding an Array of typename variables
1170
+ # if len is an int, it holds the ary length, or it can be an array of initialisers
1171
+ # eg alloc_c_ary("int", [4, 5, 28])
1172
+ def self.alloc_c_ary(typename, len)
1173
+ cp.alloc_c_ary(typename, len)
1174
+ end
1175
+
1176
+ # return a C::AllocCStruct holding an array of type typename mapped over str
1177
+ def self.decode_c_ary(typename, len, str, off=0)
1178
+ cp.decode_c_ary(typename, len, str, off)
1179
+ end
1180
+
1181
+ # return an AllocCStruct holding an array of 1 element of type typename
1182
+ # access its value with obj[0]
1183
+ # useful when you need a pointer to an int that will be filled by an API: use alloc_c_ptr('int')
1184
+ def self.alloc_c_ptr(typename, init=nil)
1185
+ cp.alloc_c_ary(typename, (init ? [init] : 1))
1186
+ end
1187
+
1188
+ # return the binary version of a ruby value encoded as a C variable
1189
+ # only integral types handled for now
1190
+ def self.encode_c_value(var, val)
1191
+ cp.encode_c_value(var, val)
1192
+ end
1193
+
1194
+ # decode a C variable
1195
+ # only integral types handled for now
1196
+ def self.decode_c_value(str, var, off=0)
1197
+ cp.decode_c_value(str, var, off)
1198
+ end
1199
+
1200
+ # read a 0-terminated string from memory
1201
+ def self.memory_read_strz(ptr, szmax=4096)
1202
+ # read up to the end of the ptr memory page
1203
+ pglim = (ptr + 0x1000) & ~0xfff
1204
+ sz = [pglim-ptr, szmax].min
1205
+ data = memory_read(ptr, sz)
1206
+ return data[0, data.index(?\0)] if data.index(?\0)
1207
+ if sz < szmax
1208
+ data = memory_read(ptr, szmax)
1209
+ data = data[0, data.index(?\0)] if data.index(?\0)
1210
+ end
1211
+ data
1212
+ end
1213
+
1214
+ # read a 0-terminated wide string from memory
1215
+ def self.memory_read_wstrz(ptr, szmax=4096)
1216
+ # read up to the end of the ptr memory page
1217
+ pglim = (ptr + 0x1000) & ~0xfff
1218
+ sz = [pglim-ptr, szmax].min
1219
+ data = memory_read(ptr, sz)
1220
+ if i = data.unpack('v*').index(0)
1221
+ return data[0, 2*i]
1222
+ end
1223
+ if sz < szmax
1224
+ data = memory_read(ptr, szmax)
1225
+ data = data[0, 2*i] if i = data.unpack('v*').index(0)
1226
+ end
1227
+ data
1228
+ end
1229
+
1230
+ # automatically build/load the bin module
1231
+ start
1232
+
1233
+ case host_arch
1234
+ when :windows
1235
+
1236
+ new_api_c <<EOS, 'kernel32'
1237
+ #define PAGE_NOACCESS 0x01
1238
+ #define PAGE_READONLY 0x02
1239
+ #define PAGE_READWRITE 0x04
1240
+ #define PAGE_WRITECOPY 0x08
1241
+ #define PAGE_EXECUTE 0x10
1242
+ #define PAGE_EXECUTE_READ 0x20
1243
+ #define PAGE_EXECUTE_READWRITE 0x40
1244
+ #define PAGE_EXECUTE_WRITECOPY 0x80
1245
+ #define PAGE_GUARD 0x100
1246
+ #define PAGE_NOCACHE 0x200
1247
+ #define PAGE_WRITECOMBINE 0x400
1248
+
1249
+ #define MEM_COMMIT 0x1000
1250
+ #define MEM_RESERVE 0x2000
1251
+ #define MEM_DECOMMIT 0x4000
1252
+ #define MEM_RELEASE 0x8000
1253
+ #define MEM_FREE 0x10000
1254
+ #define MEM_PRIVATE 0x20000
1255
+ #define MEM_MAPPED 0x40000
1256
+ #define MEM_RESET 0x80000
1257
+ #define MEM_TOP_DOWN 0x100000
1258
+ #define MEM_WRITE_WATCH 0x200000
1259
+ #define MEM_PHYSICAL 0x400000
1260
+ #define MEM_LARGE_PAGES 0x20000000
1261
+ #define MEM_4MB_PAGES 0x80000000
1262
+
1263
+ __stdcall uintptr_t VirtualAlloc(uintptr_t addr, uintptr_t size, int type, int prot);
1264
+ __stdcall uintptr_t VirtualFree(uintptr_t addr, uintptr_t size, int freetype);
1265
+ __stdcall uintptr_t VirtualProtect(uintptr_t addr, uintptr_t size, int prot, int *oldprot);
1266
+ EOS
1267
+
1268
+ # allocate some memory suitable for code allocation (ie VirtualAlloc)
1269
+ def self.memory_alloc(sz)
1270
+ virtualalloc(nil, sz, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE)
1271
+ end
1272
+
1273
+ # free memory allocated through memory_alloc
1274
+ def self.memory_free(addr)
1275
+ virtualfree(addr, 0, MEM_RELEASE)
1276
+ end
1277
+
1278
+ # change memory permissions - perm in [r rw rx rwx]
1279
+ def self.memory_perm(addr, len, perm)
1280
+ perm = { 'r' => PAGE_READONLY, 'rw' => PAGE_READWRITE, 'rx' => PAGE_EXECUTE_READ,
1281
+ 'rwx' => PAGE_EXECUTE_READWRITE }[perm.to_s.downcase]
1282
+ virtualprotect(addr, len, perm, str_ptr([0].pack('C')*8))
1283
+ end
1284
+
1285
+ when :linux
1286
+
1287
+ new_api_c <<EOS
1288
+ #define PROT_READ 0x1
1289
+ #define PROT_WRITE 0x2
1290
+ #define PROT_EXEC 0x4
1291
+
1292
+ #define MAP_PRIVATE 0x2
1293
+ #define MAP_ANONYMOUS 0x20
1294
+
1295
+ uintptr_t mmap(uintptr_t addr, uintptr_t length, int prot, int flags, uintptr_t fd, uintptr_t offset);
1296
+ uintptr_t munmap(uintptr_t addr, uintptr_t length);
1297
+ uintptr_t mprotect(uintptr_t addr, uintptr_t len, int prot);
1298
+ EOS
1299
+
1300
+ # allocate some memory suitable for code allocation (ie mmap)
1301
+ def self.memory_alloc(sz)
1302
+ @mmaps ||= {} # save size for mem_free
1303
+ a = mmap(nil, sz, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0)
1304
+ @mmaps[a] = sz
1305
+ a
1306
+ end
1307
+
1308
+ # free memory allocated through memory_alloc
1309
+ def self.memory_free(addr)
1310
+ munmap(addr, @mmaps[addr])
1311
+ end
1312
+
1313
+ # change memory permissions - perm 'rwx'
1314
+ # on PaX-enabled systems, this may need a non-mprotect-restricted ruby interpreter
1315
+ def self.memory_perm(addr, len, perm)
1316
+ perm = perm.to_s.downcase
1317
+ len += (addr & 0xfff) + 0xfff
1318
+ len &= ~0xfff
1319
+ addr &= ~0xfff
1320
+ p = 0
1321
+ p |= PROT_READ if perm.include? 'r'
1322
+ p |= PROT_WRITE if perm.include? 'w'
1323
+ p |= PROT_EXEC if perm.include? 'x'
1324
+ mprotect(addr, len, p)
1325
+ end
1326
+
1327
+ end
1328
+ end
1329
+ end