metasm 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. data/BUGS +11 -0
  2. data/CREDITS +17 -0
  3. data/README +270 -0
  4. data/TODO +114 -0
  5. data/doc/code_organisation.txt +146 -0
  6. data/doc/const_missing.txt +16 -0
  7. data/doc/core_classes.txt +75 -0
  8. data/doc/feature_list.txt +53 -0
  9. data/doc/index.txt +59 -0
  10. data/doc/install_notes.txt +170 -0
  11. data/doc/style.css +3 -0
  12. data/doc/use_cases.txt +18 -0
  13. data/lib/metasm.rb +80 -0
  14. data/lib/metasm/arm.rb +12 -0
  15. data/lib/metasm/arm/debug.rb +39 -0
  16. data/lib/metasm/arm/decode.rb +167 -0
  17. data/lib/metasm/arm/encode.rb +77 -0
  18. data/lib/metasm/arm/main.rb +75 -0
  19. data/lib/metasm/arm/opcodes.rb +177 -0
  20. data/lib/metasm/arm/parse.rb +130 -0
  21. data/lib/metasm/arm/render.rb +55 -0
  22. data/lib/metasm/compile_c.rb +1457 -0
  23. data/lib/metasm/dalvik.rb +8 -0
  24. data/lib/metasm/dalvik/decode.rb +196 -0
  25. data/lib/metasm/dalvik/main.rb +60 -0
  26. data/lib/metasm/dalvik/opcodes.rb +366 -0
  27. data/lib/metasm/decode.rb +213 -0
  28. data/lib/metasm/decompile.rb +2659 -0
  29. data/lib/metasm/disassemble.rb +2068 -0
  30. data/lib/metasm/disassemble_api.rb +1280 -0
  31. data/lib/metasm/dynldr.rb +1329 -0
  32. data/lib/metasm/encode.rb +333 -0
  33. data/lib/metasm/exe_format/a_out.rb +194 -0
  34. data/lib/metasm/exe_format/autoexe.rb +82 -0
  35. data/lib/metasm/exe_format/bflt.rb +189 -0
  36. data/lib/metasm/exe_format/coff.rb +455 -0
  37. data/lib/metasm/exe_format/coff_decode.rb +901 -0
  38. data/lib/metasm/exe_format/coff_encode.rb +1078 -0
  39. data/lib/metasm/exe_format/dex.rb +457 -0
  40. data/lib/metasm/exe_format/dol.rb +145 -0
  41. data/lib/metasm/exe_format/elf.rb +923 -0
  42. data/lib/metasm/exe_format/elf_decode.rb +979 -0
  43. data/lib/metasm/exe_format/elf_encode.rb +1375 -0
  44. data/lib/metasm/exe_format/macho.rb +827 -0
  45. data/lib/metasm/exe_format/main.rb +228 -0
  46. data/lib/metasm/exe_format/mz.rb +164 -0
  47. data/lib/metasm/exe_format/nds.rb +172 -0
  48. data/lib/metasm/exe_format/pe.rb +437 -0
  49. data/lib/metasm/exe_format/serialstruct.rb +246 -0
  50. data/lib/metasm/exe_format/shellcode.rb +114 -0
  51. data/lib/metasm/exe_format/xcoff.rb +167 -0
  52. data/lib/metasm/gui.rb +23 -0
  53. data/lib/metasm/gui/cstruct.rb +373 -0
  54. data/lib/metasm/gui/dasm_coverage.rb +199 -0
  55. data/lib/metasm/gui/dasm_decomp.rb +369 -0
  56. data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
  57. data/lib/metasm/gui/dasm_graph.rb +1354 -0
  58. data/lib/metasm/gui/dasm_hex.rb +543 -0
  59. data/lib/metasm/gui/dasm_listing.rb +599 -0
  60. data/lib/metasm/gui/dasm_main.rb +906 -0
  61. data/lib/metasm/gui/dasm_opcodes.rb +291 -0
  62. data/lib/metasm/gui/debug.rb +1228 -0
  63. data/lib/metasm/gui/gtk.rb +884 -0
  64. data/lib/metasm/gui/qt.rb +495 -0
  65. data/lib/metasm/gui/win32.rb +3004 -0
  66. data/lib/metasm/gui/x11.rb +621 -0
  67. data/lib/metasm/ia32.rb +14 -0
  68. data/lib/metasm/ia32/compile_c.rb +1523 -0
  69. data/lib/metasm/ia32/debug.rb +193 -0
  70. data/lib/metasm/ia32/decode.rb +1167 -0
  71. data/lib/metasm/ia32/decompile.rb +564 -0
  72. data/lib/metasm/ia32/encode.rb +314 -0
  73. data/lib/metasm/ia32/main.rb +233 -0
  74. data/lib/metasm/ia32/opcodes.rb +872 -0
  75. data/lib/metasm/ia32/parse.rb +327 -0
  76. data/lib/metasm/ia32/render.rb +91 -0
  77. data/lib/metasm/main.rb +1193 -0
  78. data/lib/metasm/mips.rb +11 -0
  79. data/lib/metasm/mips/compile_c.rb +7 -0
  80. data/lib/metasm/mips/decode.rb +253 -0
  81. data/lib/metasm/mips/encode.rb +51 -0
  82. data/lib/metasm/mips/main.rb +72 -0
  83. data/lib/metasm/mips/opcodes.rb +443 -0
  84. data/lib/metasm/mips/parse.rb +51 -0
  85. data/lib/metasm/mips/render.rb +43 -0
  86. data/lib/metasm/os/gnu_exports.rb +270 -0
  87. data/lib/metasm/os/linux.rb +1112 -0
  88. data/lib/metasm/os/main.rb +1686 -0
  89. data/lib/metasm/os/remote.rb +527 -0
  90. data/lib/metasm/os/windows.rb +2027 -0
  91. data/lib/metasm/os/windows_exports.rb +745 -0
  92. data/lib/metasm/parse.rb +876 -0
  93. data/lib/metasm/parse_c.rb +3938 -0
  94. data/lib/metasm/pic16c/decode.rb +42 -0
  95. data/lib/metasm/pic16c/main.rb +17 -0
  96. data/lib/metasm/pic16c/opcodes.rb +68 -0
  97. data/lib/metasm/ppc.rb +11 -0
  98. data/lib/metasm/ppc/decode.rb +264 -0
  99. data/lib/metasm/ppc/decompile.rb +251 -0
  100. data/lib/metasm/ppc/encode.rb +51 -0
  101. data/lib/metasm/ppc/main.rb +129 -0
  102. data/lib/metasm/ppc/opcodes.rb +410 -0
  103. data/lib/metasm/ppc/parse.rb +52 -0
  104. data/lib/metasm/preprocessor.rb +1277 -0
  105. data/lib/metasm/render.rb +130 -0
  106. data/lib/metasm/sh4.rb +8 -0
  107. data/lib/metasm/sh4/decode.rb +336 -0
  108. data/lib/metasm/sh4/main.rb +292 -0
  109. data/lib/metasm/sh4/opcodes.rb +381 -0
  110. data/lib/metasm/x86_64.rb +12 -0
  111. data/lib/metasm/x86_64/compile_c.rb +1025 -0
  112. data/lib/metasm/x86_64/debug.rb +59 -0
  113. data/lib/metasm/x86_64/decode.rb +268 -0
  114. data/lib/metasm/x86_64/encode.rb +264 -0
  115. data/lib/metasm/x86_64/main.rb +135 -0
  116. data/lib/metasm/x86_64/opcodes.rb +118 -0
  117. data/lib/metasm/x86_64/parse.rb +68 -0
  118. data/misc/bottleneck.rb +61 -0
  119. data/misc/cheader-findpppath.rb +58 -0
  120. data/misc/hexdiff.rb +74 -0
  121. data/misc/hexdump.rb +55 -0
  122. data/misc/metasm-all.rb +13 -0
  123. data/misc/objdiff.rb +47 -0
  124. data/misc/objscan.rb +40 -0
  125. data/misc/pdfparse.rb +661 -0
  126. data/misc/ppc_pdf2oplist.rb +192 -0
  127. data/misc/tcp_proxy_hex.rb +84 -0
  128. data/misc/txt2html.rb +440 -0
  129. data/samples/a.out.rb +31 -0
  130. data/samples/asmsyntax.rb +77 -0
  131. data/samples/bindiff.rb +555 -0
  132. data/samples/compilation-steps.rb +49 -0
  133. data/samples/cparser_makestackoffset.rb +55 -0
  134. data/samples/dasm-backtrack.rb +38 -0
  135. data/samples/dasmnavig.rb +318 -0
  136. data/samples/dbg-apihook.rb +228 -0
  137. data/samples/dbghelp.rb +143 -0
  138. data/samples/disassemble-gui.rb +102 -0
  139. data/samples/disassemble.rb +133 -0
  140. data/samples/dump_upx.rb +95 -0
  141. data/samples/dynamic_ruby.rb +1929 -0
  142. data/samples/elf_list_needed.rb +46 -0
  143. data/samples/elf_listexports.rb +33 -0
  144. data/samples/elfencode.rb +25 -0
  145. data/samples/exeencode.rb +128 -0
  146. data/samples/factorize-headers-elfimports.rb +77 -0
  147. data/samples/factorize-headers-peimports.rb +109 -0
  148. data/samples/factorize-headers.rb +43 -0
  149. data/samples/gdbclient.rb +583 -0
  150. data/samples/generate_libsigs.rb +102 -0
  151. data/samples/hotfix_gtk_dbg.rb +59 -0
  152. data/samples/install_win_env.rb +78 -0
  153. data/samples/lindebug.rb +924 -0
  154. data/samples/linux_injectsyscall.rb +95 -0
  155. data/samples/machoencode.rb +31 -0
  156. data/samples/metasm-shell.rb +91 -0
  157. data/samples/pe-hook.rb +69 -0
  158. data/samples/pe-ia32-cpuid.rb +203 -0
  159. data/samples/pe-mips.rb +35 -0
  160. data/samples/pe-shutdown.rb +78 -0
  161. data/samples/pe-testrelocs.rb +51 -0
  162. data/samples/pe-testrsrc.rb +24 -0
  163. data/samples/pe_listexports.rb +31 -0
  164. data/samples/peencode.rb +19 -0
  165. data/samples/peldr.rb +494 -0
  166. data/samples/preprocess-flatten.rb +19 -0
  167. data/samples/r0trace.rb +308 -0
  168. data/samples/rubstop.rb +399 -0
  169. data/samples/scan_pt_gnu_stack.rb +54 -0
  170. data/samples/scanpeexports.rb +62 -0
  171. data/samples/shellcode-c.rb +40 -0
  172. data/samples/shellcode-dynlink.rb +146 -0
  173. data/samples/source.asm +34 -0
  174. data/samples/struct_offset.rb +47 -0
  175. data/samples/testpe.rb +32 -0
  176. data/samples/testraw.rb +45 -0
  177. data/samples/win32genloader.rb +132 -0
  178. data/samples/win32hooker-advanced.rb +169 -0
  179. data/samples/win32hooker.rb +96 -0
  180. data/samples/win32livedasm.rb +33 -0
  181. data/samples/win32remotescan.rb +133 -0
  182. data/samples/wintrace.rb +92 -0
  183. data/tests/all.rb +8 -0
  184. data/tests/dasm.rb +39 -0
  185. data/tests/dynldr.rb +35 -0
  186. data/tests/encodeddata.rb +132 -0
  187. data/tests/ia32.rb +82 -0
  188. data/tests/mips.rb +116 -0
  189. data/tests/parse_c.rb +239 -0
  190. data/tests/preprocessor.rb +269 -0
  191. data/tests/x86_64.rb +62 -0
  192. metadata +255 -0
@@ -0,0 +1,1329 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+ # This sample creates the dynldr.so ruby shared object that allows interaction with
7
+ # native libraries
8
+ # x86 only for now
9
+
10
+ module Metasm
11
+ class DynLdr
12
+ # basic C defs for ruby internals - 1.8 and 1.9 compat - x86/x64
13
+ RUBY_H = <<EOS
14
+ #line #{__LINE__}
15
+ typedef uintptr_t VALUE;
16
+
17
+ #if defined(__PE__) && defined(__x86_64__)
18
+ // sonovabeep
19
+ #define INT2VAL(v) rb_ull2inum(v)
20
+ #define VAL2INT(v) rb_num2ull(v)
21
+ #else
22
+ #define INT2VAL(v) rb_uint2inum(v)
23
+ #define VAL2INT(v) rb_num2ulong(v)
24
+ #endif
25
+
26
+ struct rb_string_t {
27
+ VALUE flags;
28
+ VALUE klass;
29
+ VALUE len;
30
+ char *ptr;
31
+ union {
32
+ long capa;
33
+ VALUE shared;
34
+ } aux;
35
+ };
36
+ #define RString(x) ((struct rb_string_t *)(x))
37
+
38
+ struct rb_array_t {
39
+ VALUE flags;
40
+ VALUE klass;
41
+ VALUE len;
42
+ union {
43
+ long capa;
44
+ VALUE shared;
45
+ } aux;
46
+ VALUE *ptr;
47
+ };
48
+ #define RArray(x) ((struct rb_array_t *)(x))
49
+
50
+ // TODO improve autoimport to handle data imports correctly
51
+ extern VALUE *rb_cObject __attribute__((import));
52
+ extern VALUE *rb_eRuntimeError __attribute__((import));
53
+ extern VALUE *rb_eArgError __attribute__((import));
54
+
55
+ #define Qfalse ((VALUE)0)
56
+ #define Qtrue ((VALUE)2)
57
+ #define Qnil ((VALUE)4)
58
+
59
+ // allows generating a ruby1.9 dynldr.so from ruby1.8
60
+ #ifndef DYNLDR_RUBY_19
61
+ #define DYNLDR_RUBY_19 #{RUBY_VERSION >= '1.9' ? 1 : 0}
62
+ #endif
63
+
64
+ #if DYNLDR_RUBY_19
65
+ #define T_STRING 0x05
66
+ #define T_ARRAY 0x07
67
+ #define T_FIXNUM 0x15
68
+ #define T_MASK 0x1f
69
+ #define RSTRING_NOEMBED (1<<13)
70
+ #define STR_PTR(o) ((RString(o)->flags & RSTRING_NOEMBED) ? RString(o)->ptr : (char*)&RString(o)->len)
71
+ #define STR_LEN(o) ((RString(o)->flags & RSTRING_NOEMBED) ? RString(o)->len : (RString(o)->flags >> 14) & 0x1f)
72
+ #define RARRAY_EMBED (1<<13)
73
+ #define ARY_PTR(o) ((RArray(o)->flags & RARRAY_EMBED) ? (VALUE*)&RArray(o)->len : RArray(o)->ptr)
74
+ #define ARY_LEN(o) ((RArray(o)->flags & RARRAY_EMBED) ? ((RArray(o)->flags >> 15) & 3) : RArray(o)->len)
75
+ #else
76
+ #define T_STRING 0x07
77
+ #define T_ARRAY 0x09
78
+ #define T_FIXNUM 0x0a
79
+ #define T_MASK 0x3f
80
+ #define STR_PTR(o) (RString(o)->ptr)
81
+ #define STR_LEN(o) (RString(o)->len)
82
+ #define ARY_PTR(o) (RArray(o)->ptr)
83
+ #define ARY_LEN(o) (RArray(o)->len)
84
+ #endif
85
+
86
+ #define TYPE(x) (((VALUE)(x) & 1) ? T_FIXNUM : (((VALUE)(x) & 3) || ((VALUE)(x) < 7)) ? 0x40 : RString(x)->flags & T_MASK)
87
+
88
+ VALUE rb_uint2inum(VALUE);
89
+ VALUE rb_ull2inum(unsigned long long);
90
+ VALUE rb_num2ulong(VALUE);
91
+ unsigned long long rb_num2ull(VALUE);
92
+ VALUE rb_str_new(const char* ptr, long len); // alloc + memcpy + 0term
93
+ VALUE rb_ary_new2(int len);
94
+ VALUE rb_float_new(double);
95
+
96
+ VALUE rb_intern(char *);
97
+ VALUE rb_funcall(VALUE recv, VALUE id, int nargs, ...);
98
+ VALUE rb_const_get(VALUE, VALUE);
99
+ VALUE rb_raise(VALUE, char*, ...);
100
+ void rb_define_const(VALUE, char *, VALUE);
101
+ void rb_define_method(VALUE, char *, VALUE (*)(), int);
102
+ void rb_define_singleton_method(VALUE, char *, VALUE (*)(), int);
103
+
104
+ EOS
105
+
106
+ # generic C source for the native component, ruby glue
107
+ DYNLDR_C = <<EOS
108
+ #{RUBY_H}
109
+ #line #{__LINE__}
110
+
111
+ #ifdef __PE__
112
+ __stdcall uintptr_t LoadLibraryA(char *);
113
+ __stdcall uintptr_t GetProcAddress(uintptr_t, char *);
114
+
115
+ #define os_load_lib(l) LoadLibraryA(l)
116
+ #define os_load_sym(l, s) GetProcAddress(l, s)
117
+ #define os_load_sym_ord(l, s) GetProcAddress(l, (char*)s)
118
+ #endif
119
+
120
+ #ifdef __ELF__
121
+ asm(".pt_gnu_stack rw");
122
+
123
+ #define RTLD_LAZY 1
124
+ uintptr_t dlopen(char*, int);
125
+ uintptr_t dlsym(uintptr_t, char*);
126
+
127
+ #define os_load_lib(l) dlopen(l, RTLD_LAZY)
128
+ #define os_load_sym(l, s) dlsym(l, s)
129
+ #define os_load_sym_ord(l, s) 0U
130
+ #endif
131
+
132
+ extern int *cb_ret_table;
133
+ extern void *callback_handler;
134
+ extern void *callback_id_0;
135
+ extern void *callback_id_1;
136
+
137
+ static VALUE dynldr;
138
+
139
+
140
+ static VALUE memory_read(VALUE self, VALUE addr, VALUE len)
141
+ {
142
+ return rb_str_new((char*)VAL2INT(addr), (long)VAL2INT(len));
143
+ }
144
+
145
+ static VALUE memory_read_int(VALUE self, VALUE addr)
146
+ {
147
+ return INT2VAL(*(uintptr_t*)VAL2INT(addr));
148
+ }
149
+
150
+ static VALUE memory_write(VALUE self, VALUE addr, VALUE val)
151
+ {
152
+ if (TYPE(val) != T_STRING)
153
+ rb_raise(*rb_eArgError, "mem_write needs a String");
154
+
155
+ char *src = STR_PTR(val);
156
+ char *dst = (char*)VAL2INT(addr);
157
+ unsigned len = (unsigned)STR_LEN(val);
158
+ while (len--)
159
+ *dst++ = *src++;
160
+ return val;
161
+ }
162
+
163
+ static VALUE memory_write_int(VALUE self, VALUE addr, VALUE val)
164
+ {
165
+ *(uintptr_t *)VAL2INT(addr) = VAL2INT(val);
166
+ return Qtrue;
167
+ }
168
+
169
+ static VALUE str_ptr(VALUE self, VALUE str)
170
+ {
171
+ if (TYPE(str) != T_STRING)
172
+ rb_raise(*rb_eArgError, "Invalid ptr");
173
+ return INT2VAL((uintptr_t)STR_PTR(str));
174
+ }
175
+
176
+ // return the VALUE of an object (different of .object_id for Symbols, maybe others)
177
+ static VALUE rb_obj_to_value(VALUE self, VALUE obj)
178
+ {
179
+ return INT2VAL((uintptr_t)obj);
180
+ }
181
+
182
+ // return the ruby object at VALUE
183
+ // USE WITH CAUTION, passing invalid values will segfault the interpreter/GC
184
+ static VALUE rb_value_to_obj(VALUE self, VALUE val)
185
+ {
186
+ return VAL2INT(val);
187
+ }
188
+
189
+ // load a symbol from a lib byname, byordinal if integral
190
+ static VALUE sym_addr(VALUE self, VALUE lib, VALUE func)
191
+ {
192
+ uintptr_t h, p;
193
+
194
+ if (TYPE(lib) == T_STRING)
195
+ h = os_load_lib(STR_PTR(lib));
196
+ else if (TYPE(lib) == T_FIXNUM)
197
+ h = VAL2INT(lib);
198
+ else
199
+ rb_raise(*rb_eArgError, "Invalid lib");
200
+
201
+ if (TYPE(func) != T_STRING && TYPE(func) != T_FIXNUM)
202
+ rb_raise(*rb_eArgError, "Invalid func");
203
+
204
+ if (TYPE(func) == T_FIXNUM)
205
+ p = os_load_sym_ord(h, VAL2INT(func));
206
+ else
207
+ p = os_load_sym(h, STR_PTR(func));
208
+
209
+ return INT2VAL(p);
210
+ }
211
+
212
+ #ifdef __i386__
213
+
214
+ __int64 do_invoke_stdcall(unsigned, unsigned, unsigned*);
215
+ __int64 do_invoke_fastcall(unsigned, unsigned, unsigned*);
216
+ __int64 do_invoke(unsigned, unsigned, unsigned*);
217
+ double fake_float(void);
218
+
219
+ // invoke a symbol
220
+ // args is an array of Integers
221
+ // flags: 1 stdcall 2 fastcall 4 ret_64bits 8 ret_float
222
+ // TODO float args
223
+ static VALUE invoke(VALUE self, VALUE ptr, VALUE args, VALUE flags)
224
+ {
225
+ if (TYPE(args) != T_ARRAY || ARY_LEN(args) > 64)
226
+ rb_raise(*rb_eArgError, "bad args");
227
+
228
+ uintptr_t flags_v = VAL2INT(flags);
229
+ uintptr_t ptr_v = VAL2INT(ptr);
230
+ unsigned i, argsz;
231
+ uintptr_t args_c[64];
232
+ __int64 ret;
233
+
234
+ argsz = ARY_LEN(args);
235
+ for (i=0U ; i<argsz ; ++i)
236
+ args_c[i] = VAL2INT(ARY_PTR(args)[i]);
237
+
238
+ if (flags_v & 2)
239
+ ret = do_invoke_fastcall(ptr_v, argsz, args_c); // supercedes stdcall
240
+ else if (flags_v & 1)
241
+ ret = do_invoke_stdcall(ptr_v, argsz, args_c);
242
+ else
243
+ ret = do_invoke(ptr_v, argsz, args_c);
244
+
245
+ if (flags_v & 4)
246
+ return rb_ull2inum((unsigned __int64)ret);
247
+ else if (flags_v & 8)
248
+ // fake_float does nothing, to allow the compiler to use ST(0)
249
+ // which was in fact set by ptr_v()
250
+ return rb_float_new(fake_float());
251
+
252
+ return INT2VAL((unsigned)ret);
253
+ }
254
+
255
+ // this is the function that is called on behalf of all callbacks
256
+ // we're called through callback_handler (asm), itself called from the unique
257
+ // callback generated by callback_alloc
258
+ // heavy stack magick at work here !
259
+ // TODO float args / float retval / ret __int64
260
+ uintptr_t do_callback_handler(uintptr_t ori_retaddr, uintptr_t caller_id, uintptr_t arg0)
261
+ {
262
+ uintptr_t *addr = &arg0;
263
+ unsigned i, ret;
264
+ VALUE args = rb_ary_new2(8);
265
+
266
+ // copy our args to a ruby-accessible buffer
267
+ for (i=0U ; i<8U ; ++i)
268
+ ARY_PTR(args)[i] = INT2VAL(*addr++);
269
+ RArray(args)->len = 8U; // len == 8, no need to ARY_LEN/EMBED stuff
270
+
271
+ ret = rb_funcall(dynldr, rb_intern("callback_run"), 2, INT2VAL(caller_id), args);
272
+
273
+ // dynldr.callback will give us the arity (in bytes) of the callback in args[0]
274
+ // we just put the stack lifting offset in caller_id for the asm stub to use
275
+ caller_id = VAL2INT(ARY_PTR(args)[0]);
276
+
277
+ return VAL2INT(ret);
278
+ }
279
+
280
+ #elif defined __amd64__
281
+
282
+ uintptr_t do_invoke(uintptr_t, uintptr_t, uintptr_t*);
283
+ double fake_float(void);
284
+
285
+ // invoke a symbol
286
+ // args is an array of Integers
287
+ // flags: 1 stdcall 2 fastcall 4 ret_64bits 8 ret_float
288
+ // TODO float args
289
+ static VALUE invoke(VALUE self, VALUE ptr, VALUE args, VALUE flags)
290
+ {
291
+ if (TYPE(args) != T_ARRAY || ARY_LEN(args) > 16)
292
+ rb_raise(*rb_eArgError, "bad args");
293
+
294
+ uintptr_t flags_v = VAL2INT(flags);
295
+ uintptr_t ptr_v = VAL2INT(ptr);
296
+ int i, argsz;
297
+ uintptr_t args_c[16];
298
+ uintptr_t ret;
299
+ uintptr_t (*ptr_f)(uintptr_t, ...) = (void*)ptr_v;
300
+
301
+ argsz = (int)ARY_LEN(args);
302
+ for (i=0 ; i<argsz ; ++i)
303
+ args_c[i] = VAL2INT(ARY_PTR(args)[i]);
304
+
305
+ for (i=argsz ; i<16 ; ++i)
306
+ args_c[i] = 0;
307
+
308
+ if (argsz <= 4)
309
+ ret = ptr_f(args_c[0], args_c[1], args_c[2], args_c[3]);
310
+ else
311
+ ret = ptr_f(args_c[0], args_c[1], args_c[2], args_c[3],
312
+ args_c[4], args_c[5], args_c[6], args_c[7],
313
+ args_c[8], args_c[9], args_c[10], args_c[11],
314
+ args_c[12], args_c[13], args_c[14], args_c[15]);
315
+
316
+ if (flags_v & 8)
317
+ return rb_float_new(fake_float());
318
+
319
+ return INT2VAL(ret);
320
+ }
321
+
322
+ uintptr_t do_callback_handler(uintptr_t cb_id __attribute__((register(rax))),
323
+ uintptr_t arg0, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3,
324
+ uintptr_t arg4, uintptr_t arg5, uintptr_t arg6, uintptr_t arg7)
325
+ {
326
+ uintptr_t ret;
327
+ VALUE args = rb_ary_new2(8);
328
+ VALUE *ptr = ARY_PTR(args);
329
+
330
+ RArray(args)->len = 8;
331
+ ptr[0] = INT2VAL(arg0);
332
+ ptr[1] = INT2VAL(arg1);
333
+ ptr[2] = INT2VAL(arg2);
334
+ ptr[3] = INT2VAL(arg3);
335
+ ptr[4] = INT2VAL(arg4);
336
+ ptr[5] = INT2VAL(arg5);
337
+ ptr[6] = INT2VAL(arg6);
338
+ ptr[7] = INT2VAL(arg7);
339
+
340
+ ret = rb_funcall(dynldr, rb_intern("callback_run"), 2, INT2VAL(cb_id), args);
341
+
342
+ return VAL2INT(ret);
343
+ }
344
+ #endif
345
+
346
+ int Init_dynldr(void) __attribute__((export_as(Init_<insertfilenamehere>))) // to patch before parsing to match the .so name
347
+ {
348
+ dynldr = rb_const_get(rb_const_get(*rb_cObject, rb_intern("Metasm")), rb_intern("DynLdr"));
349
+ rb_define_singleton_method(dynldr, "memory_read", memory_read, 2);
350
+ rb_define_singleton_method(dynldr, "memory_read_int", memory_read_int, 1);
351
+ rb_define_singleton_method(dynldr, "memory_write", memory_write, 2);
352
+ rb_define_singleton_method(dynldr, "memory_write_int", memory_write_int, 2);
353
+ rb_define_singleton_method(dynldr, "str_ptr", str_ptr, 1);
354
+ rb_define_singleton_method(dynldr, "rb_obj_to_value", rb_obj_to_value, 1);
355
+ rb_define_singleton_method(dynldr, "rb_value_to_obj", rb_value_to_obj, 1);
356
+ rb_define_singleton_method(dynldr, "sym_addr", sym_addr, 2);
357
+ rb_define_singleton_method(dynldr, "raw_invoke", invoke, 3);
358
+ rb_define_const(dynldr, "CALLBACK_TARGET",
359
+ #ifdef __i386__
360
+ INT2VAL((VALUE)&callback_handler));
361
+ #elif defined __amd64__
362
+ INT2VAL((VALUE)&do_callback_handler));
363
+ #endif
364
+ rb_define_const(dynldr, "CALLBACK_ID_0", INT2VAL((VALUE)&callback_id_0));
365
+ rb_define_const(dynldr, "CALLBACK_ID_1", INT2VAL((VALUE)&callback_id_1));
366
+ return 0;
367
+ }
368
+ EOS
369
+
370
+ # see the note in compile_bin_module
371
+ # this is a dynamic resolver for the ruby symbols we use
372
+ DYNLDR_C_PE_HACK = <<EOS
373
+ #line #{__LINE__}
374
+
375
+ void* get_peb(void);
376
+
377
+ // check if the wstr s1 contains 'ruby' (case-insensitive)
378
+ static void *wstrcaseruby(short *s1, int len)
379
+ {
380
+ int i = 0;
381
+ int match = 0;
382
+
383
+ static char *want = "ruby"; // cant contain the same letter twice
384
+
385
+ while (i < len) {
386
+ if (want[match] == (s1[i] | 0x20)) { // downcase cmp
387
+ if (match == 3)
388
+ return s1+i-match;
389
+ } else
390
+ match = 0;
391
+ if (want[match] == (s1[i] | 0x20))
392
+ ++match;
393
+ ++i;
394
+ }
395
+
396
+ return 0;
397
+ }
398
+
399
+ asm(".text"); // TODO fix compiler
400
+ #ifdef __x86_64__
401
+ asm("get_peb: mov rax, gs:[60h] ret");
402
+ #endif
403
+ #ifdef __i386__
404
+ asm("get_peb: mov eax, fs:[30h] ret");
405
+
406
+ // 1st arg for ld_rb_imp == Init retaddr
407
+ asm("Init_dynldr: call load_ruby_imports jmp Init_dynldr_real");
408
+ #endif
409
+
410
+ struct _lmodule {
411
+ struct _lmodule *next; // list_head
412
+ void *; void *; void*; void*; void*;
413
+ uintptr_t base, entry, size;
414
+ short; short; short*;
415
+ short len, maxlen;
416
+ short *basename;
417
+ };
418
+
419
+ struct _peb {
420
+ void*; void*; void*;
421
+ struct {
422
+ int; int; void*;
423
+ struct _lmodule *inloadorder; // list_head
424
+ } *ldr;
425
+ };
426
+
427
+ // find the ruby library in the loaded modules list of the interpreter through the PEB
428
+ static uintptr_t find_ruby_module_peb(void)
429
+ {
430
+ struct _lmodule *ptr;
431
+ void *base;
432
+ struct _peb *peb = get_peb();
433
+
434
+ base = &peb->ldr->inloadorder;
435
+ ptr = ((struct _lmodule *)base)->next;
436
+ ptr = ptr->next; // skip the first entry = ruby.exe
437
+ while (ptr != base) {
438
+ if (wstrcaseruby(ptr->basename, ptr->len/2))
439
+ return ptr->base;
440
+ ptr = ptr->next;
441
+ }
442
+
443
+ return 0;
444
+ }
445
+
446
+ // find the ruby library from an address in the ruby module (Init_dynldr retaddr)
447
+ static uintptr_t find_ruby_module_mem(uintptr_t someaddr)
448
+ {
449
+ // could __try{}, but with no imports we're useless anyway.
450
+ uintptr_t ptr = someaddr & (-0x10000);
451
+ while (*((unsigned __int16 *)ptr) != 'ZM') // XXX too weak?
452
+ ptr -= 0x10000;
453
+ return ptr;
454
+ }
455
+
456
+ // a table of string offsets, base = the table itself
457
+ // each entry is a ruby function, whose address is to be put inplace in the table
458
+ // last entry == 0
459
+ extern void *ruby_import_table;
460
+
461
+ __stdcall uintptr_t GetProcAddress(uintptr_t, char *);
462
+ // resolve the ruby imports found by offset in ruby_import_table
463
+ int load_ruby_imports(uintptr_t rbaddr)
464
+ {
465
+ uintptr_t ruby_module;
466
+ uintptr_t *ptr;
467
+ char *table;
468
+
469
+ static int loaded_ruby_imports = 0;
470
+ if (loaded_ruby_imports)
471
+ return 0;
472
+ loaded_ruby_imports = 1;
473
+
474
+ if (rbaddr)
475
+ ruby_module = find_ruby_module_mem(rbaddr);
476
+ else
477
+ ruby_module = find_ruby_module_peb();
478
+
479
+ if (!ruby_module)
480
+ return 0;
481
+
482
+ ptr = &ruby_import_table;
483
+ table = (char*)ptr;
484
+
485
+ while (*ptr) {
486
+ if (!(*ptr = GetProcAddress(ruby_module, table+*ptr)))
487
+ // TODO warning or something
488
+ return 0;
489
+ ptr++;
490
+ }
491
+
492
+ return 1;
493
+ }
494
+
495
+ #ifdef __x86_64__
496
+ #define DLL_PROCESS_ATTACH 1
497
+ __stdcall int DllMain(void *handle, int reason, void *res)
498
+ {
499
+ if (reason == DLL_PROCESS_ATTACH)
500
+ return load_ruby_imports(0);
501
+ return 1;
502
+ }
503
+ #endif
504
+ EOS
505
+
506
+ # ia32 asm source for the native component: handles ABI stuff
507
+ DYNLDR_ASM_IA32 = <<EOS
508
+ .text
509
+ do_invoke_fastcall:
510
+ push ebp
511
+ mov ebp, esp
512
+
513
+ // load ecx/edx, fix arg/argcount
514
+ mov eax, [ebp+16]
515
+ mov ecx, [eax]
516
+ mov edx, [eax+4]
517
+ add eax, 8
518
+ mov [ebp+16], eax
519
+
520
+ mov eax, [ebp+12]
521
+ sub eax, 2
522
+ jb _do_invoke_call
523
+ jmp _do_invoke_copy
524
+
525
+ do_invoke:
526
+ do_invoke_stdcall:
527
+ push ebp
528
+ mov ebp, esp
529
+ mov eax, [ebp+12]
530
+ _do_invoke_copy:
531
+ // make room for args
532
+ shl eax, 2
533
+ jz _do_invoke_call
534
+ sub esp, eax
535
+ // copy args
536
+ push esi
537
+ push edi
538
+ push ecx
539
+ mov ecx, [ebp+12]
540
+ mov esi, [ebp+16]
541
+ mov edi, esp
542
+ add edi, 12
543
+ rep movsd
544
+ pop ecx
545
+ pop edi
546
+ pop esi
547
+ // go
548
+ _do_invoke_call:
549
+ call dword ptr [ebp+8]
550
+ leave
551
+ fake_float:
552
+ ret
553
+
554
+ // entrypoint for callbacks: to the native api, give the addr of some code
555
+ // that will push a unique cb_identifier and jmp here
556
+ callback_handler:
557
+ // stack here: cb_id_retaddr, cb_native_retaddr, cb_native_arg0, ...
558
+ // swap caller retaddr & cb_identifier, fix cb_identifier from the stub
559
+ pop eax // stuff pushed by the stub
560
+ sub eax, callback_id_1 - callback_id_0 // fixup cb_id_retaddr to get a cb id
561
+ xchg eax, [esp] // put on stack, retrieve original retaddr
562
+ push eax // push intended cb retaddr
563
+ call do_callback_handler
564
+ // do_cb_handler puts the nr of bytes we have to pop from the stack in its 1st arg (eg [esp+4] here)
565
+ // stack here: cb_native_retaddr, ruby_popcount, cb_native_arg0, ...
566
+ pop ecx // get retaddr w/o interfering with retval (incl 64bits eax+edx)
567
+ add esp, [esp] // pop cb args if stdcall
568
+ add esp, 4 // pop cb_id/popcount
569
+ jmp ecx // return
570
+
571
+ // those are valid callback id
572
+ // most of the time only 2 cb is used (source: meearse)
573
+ // so this prevents dynamic allocation of a whole page for the most common case
574
+ callback_id_0: call callback_handler
575
+ callback_id_1: call callback_handler
576
+ EOS
577
+
578
+ # ia32 asm source for the native component: handles ABI stuff
579
+ DYNLDR_ASM_X86_64 = <<EOS
580
+ .text
581
+ fake_float:
582
+ ret
583
+
584
+ // entrypoint for callbacks: to the native api, give the addr of some code
585
+ // that will save its address in rax and jump to do_cb_h
586
+ callback_id_0:
587
+ lea rax, [rip-$_+callback_id_0]
588
+ jmp do_callback_handler
589
+ callback_id_1:
590
+ lea rax, [rip-$_+callback_id_1]
591
+ jmp do_callback_handler
592
+ EOS
593
+
594
+ # initialization
595
+ # load (build if needed) the binary module
596
+ def self.start
597
+ # callbacks are really just a list of asm 'call', so we share them among subclasses of DynLdr
598
+ @@callback_addrs = [] # list of all allocated callback addrs (in use or not)
599
+ @@callback_table = {} # addr -> cb structure (inuse only)
600
+
601
+ binmodule = find_bin_path
602
+
603
+ if not File.exists?(binmodule) or File.stat(binmodule).mtime < File.stat(__FILE__).mtime
604
+ compile_binary_module(host_exe, host_cpu, binmodule)
605
+ end
606
+
607
+ require binmodule
608
+
609
+ @@callback_addrs << CALLBACK_ID_0 << CALLBACK_ID_1
610
+ end
611
+
612
+ # compile the dynldr binary ruby module for a specific arch/cpu/modulename
613
+ def self.compile_binary_module(exe, cpu, modulename)
614
+ bin = exe.new(cpu)
615
+ # compile the C code, but patch the Init_ export name, which must match the string used in 'require'
616
+ module_c_src = DYNLDR_C.gsub('<insertfilenamehere>', File.basename(modulename, '.so'))
617
+ bin.compile_c module_c_src
618
+ # compile the Asm stuff according to the target architecture
619
+ bin.assemble case cpu.shortname
620
+ when 'ia32'; DYNLDR_ASM_IA32
621
+ when 'x64'; DYNLDR_ASM_X86_64
622
+ end
623
+
624
+ # tweak the resulting binary linkage procedures if needed
625
+ compile_binary_module_hack(bin)
626
+
627
+ # save the shared library
628
+ bin.encode_file(modulename, :lib)
629
+ end
630
+
631
+ def self.compile_binary_module_hack(bin)
632
+ # this is a hack
633
+ # we need the module to use ruby symbols
634
+ # but we don't know the actual ruby lib filename (depends on ruby version, # platform, ...)
635
+ case bin.shortname
636
+ when 'elf'
637
+ # we know the lib is already loaded by the main ruby executable, no DT_NEEDED needed
638
+ class << bin
639
+ def automagic_symbols(*a)
640
+ # do the plt generation
641
+ super(*a)
642
+ # but remove the specific lib names
643
+ @tag.delete 'NEEDED'
644
+ end
645
+ end
646
+ return
647
+ when 'coff'
648
+ # the hard part, see below
649
+ else
650
+ # unhandled arch, dont tweak
651
+ return
652
+ end
653
+
654
+ # we remove the PE IAT section related to ruby symbols, and make
655
+ # a manual symbol resolution on module loading.
656
+
657
+ # populate the ruby import table ourselves on module loading
658
+ bin.imports.delete_if { |id| id.libname =~ /ruby/ }
659
+
660
+ # we generate something like:
661
+ # .data
662
+ # ruby_import_table:
663
+ # rb_cObject dd str_rb_cObject - ruby_import_table
664
+ # riat_rb_intern dd str_rb_intern - ruby_import_table
665
+ # dd 0
666
+ #
667
+ # .rodata
668
+ # str_rb_cObject db "rb_cObject", 0
669
+ # str_rb_intern db "rb_intern", 0
670
+ #
671
+ # .text
672
+ # rb_intern: jmp [riat_rb_intern]
673
+ #
674
+ # the PE_HACK code will parse ruby_import_table and make the symbol resolution on startup
675
+
676
+ # setup the string table and the thunks
677
+ text = bin.sections.find { |s| s.name == '.text' }.encoded
678
+ rb_syms = text.reloc_externals.grep(/^rb_/)
679
+
680
+ dd = (bin.cpu.size == 64 ? 'dq' : 'dd')
681
+
682
+ init_symbol = text.export.keys.grep(/^Init_/).first
683
+ raise 'no Init_mname symbol found' if not init_symbol
684
+ if bin.cpu.size == 32
685
+ # hax to find the base of libruby under Win98 (peb sux)
686
+ text.export[init_symbol + '_real'] = text.export.delete(init_symbol)
687
+ bin.unique_labels_cache.delete(init_symbol)
688
+ end
689
+
690
+ # the C glue: getprocaddress etc
691
+ bin.compile_c DYNLDR_C_PE_HACK.gsub('Init_dynldr', init_symbol)
692
+
693
+ # the IAT, initialized with relative offsets to symbol names
694
+ asm_table = ['.data', '.align 8', 'ruby_import_table:']
695
+ # strings will be in .rodata
696
+ bin.parse('.rodata')
697
+ rb_syms.each { |sym|
698
+ # raw symbol name
699
+ str_label = bin.parse_new_label('str', "db #{sym.inspect}, 0")
700
+
701
+ if sym !~ /^rb_[ce][A-Z]/
702
+ # if we dont reference a data import (rb_cClass / rb_eException),
703
+ # then create a function thunk
704
+ i = PE::ImportDirectory::Import.new
705
+ i.thunk = sym
706
+ sym = i.target = 'riat_' + str_label
707
+ bin.arch_encode_thunk(text, i) # encode a jmp [importtable]
708
+ end
709
+
710
+ # update the IAT
711
+ asm_table << "#{sym} #{dd} #{str_label} - ruby_import_table"
712
+ }
713
+ # IAT null-terminated
714
+ asm_table << "#{dd} 0"
715
+
716
+ # now parse & assemble the IAT in .data
717
+ bin.assemble asm_table.join("\n")
718
+ end
719
+
720
+ # find the path of the binary module
721
+ # if none exists, create a path writeable by the current user
722
+ def self.find_bin_path
723
+ fname = ['dynldr', host_arch, host_cpu.shortname,
724
+ ('19' if RUBY_VERSION >= '1.9')].compact.join('-') + '.so'
725
+ dir = File.dirname(__FILE__)
726
+ binmodule = File.join(dir, fname)
727
+ if not File.exists? binmodule or File.stat(binmodule).mtime < File.stat(__FILE__).mtime
728
+ if not dir = find_write_dir
729
+ raise LoadError, "no writable dir to put the DynLdr ruby module, try to run as root"
730
+ end
731
+ binmodule = File.join(dir, fname)
732
+ end
733
+ binmodule
734
+ end
735
+
736
+ # find a writeable directory
737
+ # searches this script directory, $HOME / %APPDATA% / %USERPROFILE%, or $TMP
738
+ def self.find_write_dir
739
+ writable = lambda { |d|
740
+ begin
741
+ foo = '/_test_write_' + rand(1<<32).to_s
742
+ true if File.writable?(d) and
743
+ File.open(d+foo, 'w') { true } and
744
+ File.unlink(d+foo)
745
+ rescue
746
+ end
747
+ }
748
+ dir = File.dirname(__FILE__)
749
+ return dir if writable[dir]
750
+ dir = ENV['HOME'] || ENV['APPDATA'] || ENV['USERPROFILE']
751
+ if writable[dir]
752
+ dir = File.join(dir, '.metasm')
753
+ Dir.mkdir dir if not File.directory? dir
754
+ return dir
755
+ end
756
+ ENV['TMP'] || ENV['TEMP'] || '.'
757
+ end
758
+
759
+ # CPU suitable for compiling code for the current running host
760
+ def self.host_cpu
761
+ @cpu ||=
762
+ case RUBY_PLATFORM
763
+ when /i[3-6]86/; Ia32.new
764
+ when /x86_64|x64/i; X86_64.new
765
+ else raise LoadError, "Unsupported host platform #{RUBY_PLATFORM}"
766
+ end
767
+ end
768
+
769
+ # returns whether we run on linux or windows
770
+ def self.host_arch
771
+ case RUBY_PLATFORM
772
+ when /linux/i; :linux
773
+ when /mswin|mingw|cygwin/i; :windows
774
+ else raise LoadError, "Unsupported host platform #{RUBY_PLATFORM}"
775
+ end
776
+ end
777
+
778
+ # ExeFormat suitable as current running host native module
779
+ def self.host_exe
780
+ case host_arch
781
+ when :linux; ELF
782
+ when :windows; PE
783
+ end
784
+ end
785
+
786
+ # parse a C string into the @cp parser, create it if needed
787
+ def self.parse_c(src)
788
+ cp.parse(src)
789
+ end
790
+
791
+ # compile a C fragment into a Shellcode, honors the host ABI
792
+ def self.compile_c(src)
793
+ # XXX could we reuse self.cp ? (for its macros etc)
794
+ cp = C::Parser.new(host_exe.new(host_cpu))
795
+ cp.parse(src)
796
+ sc = Shellcode.new(host_cpu)
797
+ asm = host_cpu.new_ccompiler(cp, sc).compile
798
+ sc.assemble(asm)
799
+ end
800
+
801
+ # retrieve the library where a symbol is to be found (uses AutoImport)
802
+ def self.lib_from_sym(symname)
803
+ case host_arch
804
+ when :linux; GNUExports::EXPORT
805
+ when :windows; WindowsExports::EXPORT
806
+ end[symname]
807
+ end
808
+
809
+ # reads a bunch of C code, creates binding for those according to the prototypes
810
+ # handles enum/defines to define constants
811
+ # For each toplevel method prototype, it generates a ruby method in this module, the name is lowercased
812
+ # For each numeric macro/enum, it also generates an uppercase named constant
813
+ # When such a function is called with a lambda as argument, a callback is created for the duration of the call
814
+ # and destroyed afterwards ; use callback_alloc_c to get a callback id with longer life span
815
+ def self.new_api_c(proto, fromlib=nil)
816
+ proto += "\n;" # allow 'int foo()' and '#include <bar>'
817
+ parse_c(proto)
818
+
819
+ cp.toplevel.symbol.dup.each_value { |v|
820
+ next if not v.kind_of? C::Variable # enums
821
+ cp.toplevel.symbol.delete v.name
822
+ lib = fromlib || lib_from_sym(v.name)
823
+ addr = sym_addr(lib, v.name)
824
+ if addr == 0 or addr == -1 or addr == 0xffff_ffff or addr == 0xffffffff_ffffffff
825
+ api_not_found(lib, v)
826
+ next
827
+ end
828
+
829
+ rbname = c_func_name_to_rb(v.name)
830
+ if not v.type.kind_of? C::Function
831
+ # not a function, simply return the symbol address
832
+ # TODO struct/table access through hash/array ?
833
+ class << self ; self ; end.send(:define_method, rbname) { addr }
834
+ next
835
+ end
836
+ next if v.initializer # inline & stuff
837
+ puts "new_api_c: load method #{rbname} from #{lib}" if $DEBUG
838
+
839
+ new_caller_for(v, rbname, addr)
840
+ }
841
+
842
+ # predeclare constants from enums
843
+ # macros are handled in const_missing (too slow to (re)do here everytime)
844
+ # TODO #define FOO(v) (v<<1)|1 => create ruby counterpart
845
+ cexist = constants.inject({}) { |h, c| h.update c.to_s => true }
846
+ cp.toplevel.symbol.each { |k, v|
847
+ if v.kind_of? ::Integer
848
+ n = c_const_name_to_rb(k)
849
+ const_set(n, v) if v.kind_of? Integer and not cexist[n]
850
+ end
851
+ }
852
+
853
+ # avoid WTF rb warning: toplevel const TRUE referenced by WinAPI::TRUE
854
+ cp.lexer.definition.each_key { |k|
855
+ n = c_const_name_to_rb(k)
856
+ if not cexist[n] and Object.const_defined?(n) and v = @cp.macro_numeric(n)
857
+ const_set(n, v)
858
+ end
859
+ }
860
+ end
861
+
862
+ # const_missing handler: will try to find a matching #define
863
+ def self.const_missing(c)
864
+ # infinite loop on autorequire C..
865
+ return super(c) if not defined? @cp or not @cp
866
+
867
+ cs = c.to_s
868
+ if @cp.lexer.definition[cs]
869
+ m = cs
870
+ else
871
+ m = @cp.lexer.definition.keys.find { |k| c_const_name_to_rb(k) == cs }
872
+ end
873
+
874
+ if m and v = @cp.macro_numeric(m)
875
+ const_set(c, v)
876
+ v
877
+ else
878
+ super(c)
879
+ end
880
+ end
881
+
882
+ # when defining ruby wrapper for C methods, the ruby method name is the string returned by this function from the C name
883
+ def self.c_func_name_to_rb(name)
884
+ n = name.to_s.gsub(/[^a-z0-9_]/i) { |c| c.unpack('H*')[0] }.downcase
885
+ n = "m#{n}" if n !~ /^[a-z]/
886
+ n
887
+ end
888
+
889
+ # when defining ruby wrapper for C constants (numeric define/enum), the ruby const name is
890
+ # the string returned by this function from the C name. It should follow ruby standards (1st letter upcase)
891
+ def self.c_const_name_to_rb(name)
892
+ n = name.to_s.gsub(/[^a-z0-9_]/i) { |c| c.unpack('H*')[0] }.upcase
893
+ n = "C#{n}" if n !~ /^[A-Z]/
894
+ n
895
+ end
896
+
897
+ def self.api_not_found(lib, func)
898
+ raise "could not find symbol #{func.name.inspect} in #{lib.inspect}"
899
+ end
900
+
901
+ # called whenever a native API is called through new_api_c/new_func_c/etc
902
+ def self.trace_invoke(api, args)
903
+ #p api
904
+ end
905
+
906
+ # define a new method 'name' in the current module to invoke the raw method at addr addr
907
+ # translates ruby args to raw args using the specified prototype
908
+ def self.new_caller_for(proto, name, addr)
909
+ flags = 0
910
+ flags |= 1 if proto.has_attribute('stdcall')
911
+ flags |= 2 if proto.has_attribute('fastcall')
912
+ flags |= 4 if proto.type.type.integral? and cp.sizeof(nil, proto.type.type) == 8
913
+ flags |= 8 if proto.type.type.float?
914
+ class << self ; self ; end.send(:define_method, name) { |*a|
915
+ raise ArgumentError, "bad arg count for #{name}: #{a.length} for #{proto.type.args.length}" if a.length != proto.type.args.length and not proto.type.varargs
916
+
917
+ # convert the arglist suitably for raw_invoke
918
+ auto_cb = [] # list of automatic C callbacks generated from lambdas
919
+ a = a.zip(proto.type.args).map { |ra, fa|
920
+ aa = convert_rb2c(fa, ra, :cb_list => auto_cb)
921
+ if fa and fa.type.integral? and cp.sizeof(fa) == 8 and host_cpu.size == 32
922
+ aa = [aa & 0xffff_ffff, (aa >> 32) & 0xffff_ffff]
923
+ aa.reverse! if host_cpu.endianness != :little
924
+ end
925
+ aa
926
+ }.flatten
927
+
928
+ trace_invoke(name, a)
929
+ # do it
930
+ ret = raw_invoke(addr, a, flags)
931
+
932
+ # cleanup autogenerated callbacks
933
+ auto_cb.each { |cb| callback_free(cb) }
934
+
935
+ # interpret return value
936
+ ret = convert_ret_c2rb(proto, ret)
937
+ }
938
+ end
939
+
940
+ # ruby object -> integer suitable as arg for raw_invoke
941
+ def self.convert_rb2c(formal, val, opts=nil)
942
+ case val
943
+ when String; str_ptr(val)
944
+ when Proc; cb = callback_alloc_cobj(formal, val) ; (opts[:cb_list] << cb if opts and opts[:cb_list]) ; cb
945
+ when C::AllocCStruct; str_ptr(val.str) + val.stroff
946
+ when Hash
947
+ if not formal.type.pointed.kind_of?(C::Struct)
948
+ raise "invalid argument #{val.inspect} for #{formal}, need a struct*"
949
+ end
950
+ buf = cp.alloc_c_struct(formal, val)
951
+ val.instance_variable_set('@rb2c', buf) # GC trick: lifetime(buf) >= lifetime(hash) (XXX or until next call to convert_rb2c)
952
+ str_ptr(buf.str)
953
+ #when Float; val # TODO handle that in raw_invoke C code
954
+ else
955
+ v = val.to_i rescue 0 # NaN, Infinity, etc
956
+ v = -v if v == -(1<<(cp.typesize[:ptr]*8-1)) # ruby bug... raise -0x8000_0000: out of ulong range
957
+ v
958
+ end
959
+ end
960
+
961
+ # this method is called from the C part to run the ruby code corresponding to
962
+ # a given C callback allocated by callback_alloc_c
963
+ def self.callback_run(id, args)
964
+ cb = @@callback_table[id]
965
+ raise "invalid callback #{'%x' % id} not in #{@@callback_table.keys.map { |c| c.to_s(16) }}" if not cb
966
+
967
+ rawargs = args.dup
968
+ ra = cb[:proto] ? cb[:proto].args.map { |fa| convert_cbargs_c2rb(fa, rawargs) } : []
969
+
970
+ # run it
971
+ ret = cb[:proc].call(*ra)
972
+
973
+ # the C code expects to find in args[0] the amount of stack fixing needed for __stdcall callbacks
974
+ args[0] = cb[:abi_stackfix] || 0
975
+ ret
976
+ end
977
+
978
+ # C raw cb arg -> ruby object
979
+ # will combine 2 32bit values for 1 64bit arg
980
+ def self.convert_cbargs_c2rb(formal, rawargs)
981
+ val = rawargs.shift
982
+ if formal.type.integral? and cp.sizeof(formal) == 8 and host_cpu.size == 32
983
+ if host.cpu.endianness == :little
984
+ val |= rawargs.shift << 32
985
+ else
986
+ val = (val << 32) | rawargs.shift
987
+ end
988
+ end
989
+
990
+ convert_c2rb(formal, val)
991
+ end
992
+
993
+ # interpret a raw decoded C value to a ruby value according to the C prototype
994
+ # handles signedness etc
995
+ # XXX val is an integer, how to decode Floats etc ? raw binary ptr ?
996
+ def self.convert_c2rb(formal, val)
997
+ formal = formal.type if formal.kind_of? C::Variable
998
+ val = Expression.make_signed(val, 8*cp.sizeof(formal)) if formal.integral? and formal.signed?
999
+ val = nil if formal.pointer? and val == 0
1000
+ val
1001
+ end
1002
+
1003
+ # C raw ret -> ruby obj
1004
+ # can be overridden for system-specific calling convention (eg return 0/-1 => raise an error)
1005
+ def self.convert_ret_c2rb(fproto, ret)
1006
+ fproto = fproto.type if fproto.kind_of? C::Variable
1007
+ convert_c2rb(fproto.untypedef.type, ret)
1008
+ end
1009
+
1010
+ def self.cp ; @cp ||= C::Parser.new(host_exe.new(host_cpu)) ; end
1011
+ def self.cp=(c); @cp = c ; end
1012
+
1013
+ # allocate a callback for a given C prototype (string)
1014
+ # accepts full C functions (with body) (only 1 at a time) or toplevel 'asm' statement
1015
+ def self.callback_alloc_c(proto, &b)
1016
+ proto += ';' # allow 'int foo()'
1017
+ parse_c(proto)
1018
+ v = cp.toplevel.symbol.values.find_all { |v_| v_.kind_of? C::Variable and v_.type.kind_of? C::Function }.first
1019
+ if (v and v.initializer) or cp.toplevel.statements.find { |st| st.kind_of? C::Asm }
1020
+ cp.toplevel.statements.delete_if { |st| st.kind_of? C::Asm }
1021
+ cp.toplevel.symbol.delete v.name if v
1022
+ sc = compile_c(proto)
1023
+ ptr = memory_alloc(sc.encoded.length)
1024
+ sc.base_addr = ptr
1025
+ # TODO fixup external calls
1026
+ memory_write ptr, sc.encode_string
1027
+ memory_perm ptr, sc.encoded.length, 'rwx'
1028
+ ptr
1029
+ elsif not v
1030
+ raise 'empty prototype'
1031
+ else
1032
+ cp.toplevel.symbol.delete v.name
1033
+ callback_alloc_cobj(v, b)
1034
+ end
1035
+ end
1036
+
1037
+ # allocates a callback for a given C prototype (C variable, pointer to func accepted)
1038
+ def self.callback_alloc_cobj(proto, b)
1039
+ ori = proto
1040
+ proto = proto.type if proto and proto.kind_of? C::Variable
1041
+ proto = proto.pointed while proto and proto.pointer?
1042
+ id = callback_find_id
1043
+ cb = {}
1044
+ cb[:id] = id
1045
+ cb[:proc] = b
1046
+ cb[:proto] = proto
1047
+ cb[:abi_stackfix] = proto.args.inject(0) { |s, a| s + [cp.sizeof(a), cp.typesize[:ptr]].max } if ori and ori.has_attribute('stdcall')
1048
+ cb[:abi_stackfix] = proto.args[2..-1].to_a.inject(0) { |s, a| s + [cp.sizeof(a), cp.typesize[:ptr]].max } if ori and ori.has_attribute('fastcall') # supercedes stdcall
1049
+ @@callback_table[id] = cb
1050
+ id
1051
+ end
1052
+
1053
+ # releases a callback id, so that it may be reused by a later callback_alloc
1054
+ def self.callback_free(id)
1055
+ @@callback_table.delete id
1056
+ end
1057
+
1058
+ # finds a free callback id, allocates a new page if needed
1059
+ def self.callback_find_id
1060
+ if not id = @@callback_addrs.find { |a| not @@callback_table[a] }
1061
+ cb_page = memory_alloc(4096)
1062
+ sc = Shellcode.new(host_cpu, cb_page)
1063
+ case sc.cpu.shortname
1064
+ when 'ia32'
1065
+ addr = cb_page
1066
+ nrcb = 128 # TODO should be 4096/5, but the parser/compiler is really too slow
1067
+ nrcb.times {
1068
+ @@callback_addrs << addr
1069
+ sc.parse "call #{CALLBACK_TARGET}"
1070
+ addr += 5
1071
+ }
1072
+ when 'x64'
1073
+ addr = cb_page
1074
+ nrcb = 128 # same remark
1075
+ nrcb.times {
1076
+ @@callback_addrs << addr
1077
+ sc.parse "1: lea rax, [rip-$_+1b] jmp #{CALLBACK_TARGET}"
1078
+ addr += 12 # XXX approximative..
1079
+ }
1080
+ end
1081
+ sc.assemble
1082
+ memory_write cb_page, sc.encode_string
1083
+ memory_perm cb_page, 4096, 'rx'
1084
+ raise 'callback_alloc bouh' if not id = @@callback_addrs.find { |a| not @@callback_table[a] }
1085
+ end
1086
+ id
1087
+ end
1088
+
1089
+ # compile a bunch of C functions, defines methods in this module to call them
1090
+ # returns the raw pointer to the code page
1091
+ # if given a block, run the block and then undefine all the C functions & free memory
1092
+ def self.new_func_c(src)
1093
+ sc = compile_c(src)
1094
+ ptr = memory_alloc(sc.encoded.length)
1095
+ sc.base_addr = ptr
1096
+ bd = sc.encoded.binding(ptr)
1097
+ sc.encoded.reloc_externals.uniq.each { |ext| bd[ext] = sym_addr(lib_from_sym(ext), ext) or raise "unknown symbol #{ext}" }
1098
+ sc.encoded.fixup(bd)
1099
+ memory_write ptr, sc.encode_string
1100
+ memory_perm ptr, sc.encoded.length, 'rwx'
1101
+ parse_c(src) # XXX the Shellcode parser may have defined stuff / interpreted C another way...
1102
+ defs = []
1103
+ cp.toplevel.symbol.dup.each_value { |v|
1104
+ next if not v.kind_of? C::Variable
1105
+ cp.toplevel.symbol.delete v.name
1106
+ next if not v.type.kind_of? C::Function or not v.initializer
1107
+ next if not off = sc.encoded.export[v.name]
1108
+ rbname = c_func_name_to_rb(v.name)
1109
+ new_caller_for(v, rbname, ptr+off)
1110
+ defs << rbname
1111
+ }
1112
+ if block_given?
1113
+ begin
1114
+ yield
1115
+ ensure
1116
+ defs.each { |d| class << self ; self ; end.send(:remove_method, d) }
1117
+ memory_free ptr
1118
+ end
1119
+ else
1120
+ ptr
1121
+ end
1122
+ end
1123
+
1124
+ # compile an asm sequence, callable with the ABI of the C prototype given
1125
+ # function name comes from the prototype
1126
+ def self.new_func_asm(proto, asm)
1127
+ proto += "\n;"
1128
+ old = cp.toplevel.symbol.keys
1129
+ parse_c(proto)
1130
+ news = cp.toplevel.symbol.keys - old
1131
+ raise "invalid proto #{proto}" if news.length != 1
1132
+ f = cp.toplevel.symbol[news.first]
1133
+ raise "invalid func proto #{proto}" if not f.name or not f.type.kind_of? C::Function or f.initializer
1134
+ cp.toplevel.symbol.delete f.name
1135
+
1136
+ sc = Shellcode.assemble(host_cpu, asm)
1137
+ ptr = memory_alloc(sc.encoded.length)
1138
+ bd = sc.encoded.binding(ptr)
1139
+ sc.encoded.reloc_externals.uniq.each { |ext| bd[ext] = sym_addr(lib_from_sym(ext), ext) or raise "unknown symbol #{ext}" }
1140
+ sc.encoded.fixup(bd)
1141
+ memory_write ptr, sc.encode_string
1142
+ memory_perm ptr, sc.encoded.length, 'rwx'
1143
+ rbname = c_func_name_to_rb(f.name)
1144
+ new_caller_for(f, rbname, ptr)
1145
+ if block_given?
1146
+ begin
1147
+ yield
1148
+ ensure
1149
+ class << self ; self ; end.send(:remove_method, rbname)
1150
+ memory_free ptr
1151
+ end
1152
+ else
1153
+ ptr
1154
+ end
1155
+ end
1156
+
1157
+ # allocate a C::AllocCStruct to hold a specific struct defined in a previous new_api_c
1158
+ def self.alloc_c_struct(structname, values={})
1159
+ cp.alloc_c_struct(structname, values)
1160
+ end
1161
+
1162
+ # return a C::AllocCStruct mapped over the string (with optionnal offset)
1163
+ # str may be an EncodedData
1164
+ def self.decode_c_struct(structname, str, off=0)
1165
+ str = str.data if str.kind_of? EncodedData
1166
+ cp.decode_c_struct(structname, str, off)
1167
+ end
1168
+
1169
+ # allocate a C::AllocCStruct holding an Array of typename variables
1170
+ # if len is an int, it holds the ary length, or it can be an array of initialisers
1171
+ # eg alloc_c_ary("int", [4, 5, 28])
1172
+ def self.alloc_c_ary(typename, len)
1173
+ cp.alloc_c_ary(typename, len)
1174
+ end
1175
+
1176
+ # return a C::AllocCStruct holding an array of type typename mapped over str
1177
+ def self.decode_c_ary(typename, len, str, off=0)
1178
+ cp.decode_c_ary(typename, len, str, off)
1179
+ end
1180
+
1181
+ # return an AllocCStruct holding an array of 1 element of type typename
1182
+ # access its value with obj[0]
1183
+ # useful when you need a pointer to an int that will be filled by an API: use alloc_c_ptr('int')
1184
+ def self.alloc_c_ptr(typename, init=nil)
1185
+ cp.alloc_c_ary(typename, (init ? [init] : 1))
1186
+ end
1187
+
1188
+ # return the binary version of a ruby value encoded as a C variable
1189
+ # only integral types handled for now
1190
+ def self.encode_c_value(var, val)
1191
+ cp.encode_c_value(var, val)
1192
+ end
1193
+
1194
+ # decode a C variable
1195
+ # only integral types handled for now
1196
+ def self.decode_c_value(str, var, off=0)
1197
+ cp.decode_c_value(str, var, off)
1198
+ end
1199
+
1200
+ # read a 0-terminated string from memory
1201
+ def self.memory_read_strz(ptr, szmax=4096)
1202
+ # read up to the end of the ptr memory page
1203
+ pglim = (ptr + 0x1000) & ~0xfff
1204
+ sz = [pglim-ptr, szmax].min
1205
+ data = memory_read(ptr, sz)
1206
+ return data[0, data.index(?\0)] if data.index(?\0)
1207
+ if sz < szmax
1208
+ data = memory_read(ptr, szmax)
1209
+ data = data[0, data.index(?\0)] if data.index(?\0)
1210
+ end
1211
+ data
1212
+ end
1213
+
1214
+ # read a 0-terminated wide string from memory
1215
+ def self.memory_read_wstrz(ptr, szmax=4096)
1216
+ # read up to the end of the ptr memory page
1217
+ pglim = (ptr + 0x1000) & ~0xfff
1218
+ sz = [pglim-ptr, szmax].min
1219
+ data = memory_read(ptr, sz)
1220
+ if i = data.unpack('v*').index(0)
1221
+ return data[0, 2*i]
1222
+ end
1223
+ if sz < szmax
1224
+ data = memory_read(ptr, szmax)
1225
+ data = data[0, 2*i] if i = data.unpack('v*').index(0)
1226
+ end
1227
+ data
1228
+ end
1229
+
1230
+ # automatically build/load the bin module
1231
+ start
1232
+
1233
+ case host_arch
1234
+ when :windows
1235
+
1236
+ new_api_c <<EOS, 'kernel32'
1237
+ #define PAGE_NOACCESS 0x01
1238
+ #define PAGE_READONLY 0x02
1239
+ #define PAGE_READWRITE 0x04
1240
+ #define PAGE_WRITECOPY 0x08
1241
+ #define PAGE_EXECUTE 0x10
1242
+ #define PAGE_EXECUTE_READ 0x20
1243
+ #define PAGE_EXECUTE_READWRITE 0x40
1244
+ #define PAGE_EXECUTE_WRITECOPY 0x80
1245
+ #define PAGE_GUARD 0x100
1246
+ #define PAGE_NOCACHE 0x200
1247
+ #define PAGE_WRITECOMBINE 0x400
1248
+
1249
+ #define MEM_COMMIT 0x1000
1250
+ #define MEM_RESERVE 0x2000
1251
+ #define MEM_DECOMMIT 0x4000
1252
+ #define MEM_RELEASE 0x8000
1253
+ #define MEM_FREE 0x10000
1254
+ #define MEM_PRIVATE 0x20000
1255
+ #define MEM_MAPPED 0x40000
1256
+ #define MEM_RESET 0x80000
1257
+ #define MEM_TOP_DOWN 0x100000
1258
+ #define MEM_WRITE_WATCH 0x200000
1259
+ #define MEM_PHYSICAL 0x400000
1260
+ #define MEM_LARGE_PAGES 0x20000000
1261
+ #define MEM_4MB_PAGES 0x80000000
1262
+
1263
+ __stdcall uintptr_t VirtualAlloc(uintptr_t addr, uintptr_t size, int type, int prot);
1264
+ __stdcall uintptr_t VirtualFree(uintptr_t addr, uintptr_t size, int freetype);
1265
+ __stdcall uintptr_t VirtualProtect(uintptr_t addr, uintptr_t size, int prot, int *oldprot);
1266
+ EOS
1267
+
1268
+ # allocate some memory suitable for code allocation (ie VirtualAlloc)
1269
+ def self.memory_alloc(sz)
1270
+ virtualalloc(nil, sz, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE)
1271
+ end
1272
+
1273
+ # free memory allocated through memory_alloc
1274
+ def self.memory_free(addr)
1275
+ virtualfree(addr, 0, MEM_RELEASE)
1276
+ end
1277
+
1278
+ # change memory permissions - perm in [r rw rx rwx]
1279
+ def self.memory_perm(addr, len, perm)
1280
+ perm = { 'r' => PAGE_READONLY, 'rw' => PAGE_READWRITE, 'rx' => PAGE_EXECUTE_READ,
1281
+ 'rwx' => PAGE_EXECUTE_READWRITE }[perm.to_s.downcase]
1282
+ virtualprotect(addr, len, perm, str_ptr([0].pack('C')*8))
1283
+ end
1284
+
1285
+ when :linux
1286
+
1287
+ new_api_c <<EOS
1288
+ #define PROT_READ 0x1
1289
+ #define PROT_WRITE 0x2
1290
+ #define PROT_EXEC 0x4
1291
+
1292
+ #define MAP_PRIVATE 0x2
1293
+ #define MAP_ANONYMOUS 0x20
1294
+
1295
+ uintptr_t mmap(uintptr_t addr, uintptr_t length, int prot, int flags, uintptr_t fd, uintptr_t offset);
1296
+ uintptr_t munmap(uintptr_t addr, uintptr_t length);
1297
+ uintptr_t mprotect(uintptr_t addr, uintptr_t len, int prot);
1298
+ EOS
1299
+
1300
+ # allocate some memory suitable for code allocation (ie mmap)
1301
+ def self.memory_alloc(sz)
1302
+ @mmaps ||= {} # save size for mem_free
1303
+ a = mmap(nil, sz, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0)
1304
+ @mmaps[a] = sz
1305
+ a
1306
+ end
1307
+
1308
+ # free memory allocated through memory_alloc
1309
+ def self.memory_free(addr)
1310
+ munmap(addr, @mmaps[addr])
1311
+ end
1312
+
1313
+ # change memory permissions - perm 'rwx'
1314
+ # on PaX-enabled systems, this may need a non-mprotect-restricted ruby interpreter
1315
+ def self.memory_perm(addr, len, perm)
1316
+ perm = perm.to_s.downcase
1317
+ len += (addr & 0xfff) + 0xfff
1318
+ len &= ~0xfff
1319
+ addr &= ~0xfff
1320
+ p = 0
1321
+ p |= PROT_READ if perm.include? 'r'
1322
+ p |= PROT_WRITE if perm.include? 'w'
1323
+ p |= PROT_EXEC if perm.include? 'x'
1324
+ mprotect(addr, len, p)
1325
+ end
1326
+
1327
+ end
1328
+ end
1329
+ end