metasm 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (192) hide show
  1. data/BUGS +11 -0
  2. data/CREDITS +17 -0
  3. data/README +270 -0
  4. data/TODO +114 -0
  5. data/doc/code_organisation.txt +146 -0
  6. data/doc/const_missing.txt +16 -0
  7. data/doc/core_classes.txt +75 -0
  8. data/doc/feature_list.txt +53 -0
  9. data/doc/index.txt +59 -0
  10. data/doc/install_notes.txt +170 -0
  11. data/doc/style.css +3 -0
  12. data/doc/use_cases.txt +18 -0
  13. data/lib/metasm.rb +80 -0
  14. data/lib/metasm/arm.rb +12 -0
  15. data/lib/metasm/arm/debug.rb +39 -0
  16. data/lib/metasm/arm/decode.rb +167 -0
  17. data/lib/metasm/arm/encode.rb +77 -0
  18. data/lib/metasm/arm/main.rb +75 -0
  19. data/lib/metasm/arm/opcodes.rb +177 -0
  20. data/lib/metasm/arm/parse.rb +130 -0
  21. data/lib/metasm/arm/render.rb +55 -0
  22. data/lib/metasm/compile_c.rb +1457 -0
  23. data/lib/metasm/dalvik.rb +8 -0
  24. data/lib/metasm/dalvik/decode.rb +196 -0
  25. data/lib/metasm/dalvik/main.rb +60 -0
  26. data/lib/metasm/dalvik/opcodes.rb +366 -0
  27. data/lib/metasm/decode.rb +213 -0
  28. data/lib/metasm/decompile.rb +2659 -0
  29. data/lib/metasm/disassemble.rb +2068 -0
  30. data/lib/metasm/disassemble_api.rb +1280 -0
  31. data/lib/metasm/dynldr.rb +1329 -0
  32. data/lib/metasm/encode.rb +333 -0
  33. data/lib/metasm/exe_format/a_out.rb +194 -0
  34. data/lib/metasm/exe_format/autoexe.rb +82 -0
  35. data/lib/metasm/exe_format/bflt.rb +189 -0
  36. data/lib/metasm/exe_format/coff.rb +455 -0
  37. data/lib/metasm/exe_format/coff_decode.rb +901 -0
  38. data/lib/metasm/exe_format/coff_encode.rb +1078 -0
  39. data/lib/metasm/exe_format/dex.rb +457 -0
  40. data/lib/metasm/exe_format/dol.rb +145 -0
  41. data/lib/metasm/exe_format/elf.rb +923 -0
  42. data/lib/metasm/exe_format/elf_decode.rb +979 -0
  43. data/lib/metasm/exe_format/elf_encode.rb +1375 -0
  44. data/lib/metasm/exe_format/macho.rb +827 -0
  45. data/lib/metasm/exe_format/main.rb +228 -0
  46. data/lib/metasm/exe_format/mz.rb +164 -0
  47. data/lib/metasm/exe_format/nds.rb +172 -0
  48. data/lib/metasm/exe_format/pe.rb +437 -0
  49. data/lib/metasm/exe_format/serialstruct.rb +246 -0
  50. data/lib/metasm/exe_format/shellcode.rb +114 -0
  51. data/lib/metasm/exe_format/xcoff.rb +167 -0
  52. data/lib/metasm/gui.rb +23 -0
  53. data/lib/metasm/gui/cstruct.rb +373 -0
  54. data/lib/metasm/gui/dasm_coverage.rb +199 -0
  55. data/lib/metasm/gui/dasm_decomp.rb +369 -0
  56. data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
  57. data/lib/metasm/gui/dasm_graph.rb +1354 -0
  58. data/lib/metasm/gui/dasm_hex.rb +543 -0
  59. data/lib/metasm/gui/dasm_listing.rb +599 -0
  60. data/lib/metasm/gui/dasm_main.rb +906 -0
  61. data/lib/metasm/gui/dasm_opcodes.rb +291 -0
  62. data/lib/metasm/gui/debug.rb +1228 -0
  63. data/lib/metasm/gui/gtk.rb +884 -0
  64. data/lib/metasm/gui/qt.rb +495 -0
  65. data/lib/metasm/gui/win32.rb +3004 -0
  66. data/lib/metasm/gui/x11.rb +621 -0
  67. data/lib/metasm/ia32.rb +14 -0
  68. data/lib/metasm/ia32/compile_c.rb +1523 -0
  69. data/lib/metasm/ia32/debug.rb +193 -0
  70. data/lib/metasm/ia32/decode.rb +1167 -0
  71. data/lib/metasm/ia32/decompile.rb +564 -0
  72. data/lib/metasm/ia32/encode.rb +314 -0
  73. data/lib/metasm/ia32/main.rb +233 -0
  74. data/lib/metasm/ia32/opcodes.rb +872 -0
  75. data/lib/metasm/ia32/parse.rb +327 -0
  76. data/lib/metasm/ia32/render.rb +91 -0
  77. data/lib/metasm/main.rb +1193 -0
  78. data/lib/metasm/mips.rb +11 -0
  79. data/lib/metasm/mips/compile_c.rb +7 -0
  80. data/lib/metasm/mips/decode.rb +253 -0
  81. data/lib/metasm/mips/encode.rb +51 -0
  82. data/lib/metasm/mips/main.rb +72 -0
  83. data/lib/metasm/mips/opcodes.rb +443 -0
  84. data/lib/metasm/mips/parse.rb +51 -0
  85. data/lib/metasm/mips/render.rb +43 -0
  86. data/lib/metasm/os/gnu_exports.rb +270 -0
  87. data/lib/metasm/os/linux.rb +1112 -0
  88. data/lib/metasm/os/main.rb +1686 -0
  89. data/lib/metasm/os/remote.rb +527 -0
  90. data/lib/metasm/os/windows.rb +2027 -0
  91. data/lib/metasm/os/windows_exports.rb +745 -0
  92. data/lib/metasm/parse.rb +876 -0
  93. data/lib/metasm/parse_c.rb +3938 -0
  94. data/lib/metasm/pic16c/decode.rb +42 -0
  95. data/lib/metasm/pic16c/main.rb +17 -0
  96. data/lib/metasm/pic16c/opcodes.rb +68 -0
  97. data/lib/metasm/ppc.rb +11 -0
  98. data/lib/metasm/ppc/decode.rb +264 -0
  99. data/lib/metasm/ppc/decompile.rb +251 -0
  100. data/lib/metasm/ppc/encode.rb +51 -0
  101. data/lib/metasm/ppc/main.rb +129 -0
  102. data/lib/metasm/ppc/opcodes.rb +410 -0
  103. data/lib/metasm/ppc/parse.rb +52 -0
  104. data/lib/metasm/preprocessor.rb +1277 -0
  105. data/lib/metasm/render.rb +130 -0
  106. data/lib/metasm/sh4.rb +8 -0
  107. data/lib/metasm/sh4/decode.rb +336 -0
  108. data/lib/metasm/sh4/main.rb +292 -0
  109. data/lib/metasm/sh4/opcodes.rb +381 -0
  110. data/lib/metasm/x86_64.rb +12 -0
  111. data/lib/metasm/x86_64/compile_c.rb +1025 -0
  112. data/lib/metasm/x86_64/debug.rb +59 -0
  113. data/lib/metasm/x86_64/decode.rb +268 -0
  114. data/lib/metasm/x86_64/encode.rb +264 -0
  115. data/lib/metasm/x86_64/main.rb +135 -0
  116. data/lib/metasm/x86_64/opcodes.rb +118 -0
  117. data/lib/metasm/x86_64/parse.rb +68 -0
  118. data/misc/bottleneck.rb +61 -0
  119. data/misc/cheader-findpppath.rb +58 -0
  120. data/misc/hexdiff.rb +74 -0
  121. data/misc/hexdump.rb +55 -0
  122. data/misc/metasm-all.rb +13 -0
  123. data/misc/objdiff.rb +47 -0
  124. data/misc/objscan.rb +40 -0
  125. data/misc/pdfparse.rb +661 -0
  126. data/misc/ppc_pdf2oplist.rb +192 -0
  127. data/misc/tcp_proxy_hex.rb +84 -0
  128. data/misc/txt2html.rb +440 -0
  129. data/samples/a.out.rb +31 -0
  130. data/samples/asmsyntax.rb +77 -0
  131. data/samples/bindiff.rb +555 -0
  132. data/samples/compilation-steps.rb +49 -0
  133. data/samples/cparser_makestackoffset.rb +55 -0
  134. data/samples/dasm-backtrack.rb +38 -0
  135. data/samples/dasmnavig.rb +318 -0
  136. data/samples/dbg-apihook.rb +228 -0
  137. data/samples/dbghelp.rb +143 -0
  138. data/samples/disassemble-gui.rb +102 -0
  139. data/samples/disassemble.rb +133 -0
  140. data/samples/dump_upx.rb +95 -0
  141. data/samples/dynamic_ruby.rb +1929 -0
  142. data/samples/elf_list_needed.rb +46 -0
  143. data/samples/elf_listexports.rb +33 -0
  144. data/samples/elfencode.rb +25 -0
  145. data/samples/exeencode.rb +128 -0
  146. data/samples/factorize-headers-elfimports.rb +77 -0
  147. data/samples/factorize-headers-peimports.rb +109 -0
  148. data/samples/factorize-headers.rb +43 -0
  149. data/samples/gdbclient.rb +583 -0
  150. data/samples/generate_libsigs.rb +102 -0
  151. data/samples/hotfix_gtk_dbg.rb +59 -0
  152. data/samples/install_win_env.rb +78 -0
  153. data/samples/lindebug.rb +924 -0
  154. data/samples/linux_injectsyscall.rb +95 -0
  155. data/samples/machoencode.rb +31 -0
  156. data/samples/metasm-shell.rb +91 -0
  157. data/samples/pe-hook.rb +69 -0
  158. data/samples/pe-ia32-cpuid.rb +203 -0
  159. data/samples/pe-mips.rb +35 -0
  160. data/samples/pe-shutdown.rb +78 -0
  161. data/samples/pe-testrelocs.rb +51 -0
  162. data/samples/pe-testrsrc.rb +24 -0
  163. data/samples/pe_listexports.rb +31 -0
  164. data/samples/peencode.rb +19 -0
  165. data/samples/peldr.rb +494 -0
  166. data/samples/preprocess-flatten.rb +19 -0
  167. data/samples/r0trace.rb +308 -0
  168. data/samples/rubstop.rb +399 -0
  169. data/samples/scan_pt_gnu_stack.rb +54 -0
  170. data/samples/scanpeexports.rb +62 -0
  171. data/samples/shellcode-c.rb +40 -0
  172. data/samples/shellcode-dynlink.rb +146 -0
  173. data/samples/source.asm +34 -0
  174. data/samples/struct_offset.rb +47 -0
  175. data/samples/testpe.rb +32 -0
  176. data/samples/testraw.rb +45 -0
  177. data/samples/win32genloader.rb +132 -0
  178. data/samples/win32hooker-advanced.rb +169 -0
  179. data/samples/win32hooker.rb +96 -0
  180. data/samples/win32livedasm.rb +33 -0
  181. data/samples/win32remotescan.rb +133 -0
  182. data/samples/wintrace.rb +92 -0
  183. data/tests/all.rb +8 -0
  184. data/tests/dasm.rb +39 -0
  185. data/tests/dynldr.rb +35 -0
  186. data/tests/encodeddata.rb +132 -0
  187. data/tests/ia32.rb +82 -0
  188. data/tests/mips.rb +116 -0
  189. data/tests/parse_c.rb +239 -0
  190. data/tests/preprocessor.rb +269 -0
  191. data/tests/x86_64.rb +62 -0
  192. metadata +255 -0
@@ -0,0 +1,437 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/exe_format/main'
8
+ require 'metasm/exe_format/mz'
9
+ require 'metasm/exe_format/coff'
10
+
11
+ module Metasm
12
+ class PE < COFF
13
+ MAGIC = "PE\0\0" # 0x50450000
14
+
15
+ attr_accessor :coff_offset, :signature, :mz
16
+
17
+ def initialize(*a)
18
+ super(*a)
19
+ cpu = a.grep(CPU).first
20
+ @mz = MZ.new(cpu).share_namespace(self)
21
+ end
22
+
23
+ # overrides COFF#decode_header
24
+ # simply sets the offset to the PE pointer before decoding the COFF header
25
+ # also checks the PE signature
26
+ def decode_header
27
+ @cursection ||= self
28
+ @encoded.ptr = 0x3c
29
+ @encoded.ptr = decode_word(@encoded)
30
+ @signature = @encoded.read(4)
31
+ raise InvalidExeFormat, "Invalid PE signature #{@signature.inspect}" if @signature != MAGIC
32
+ @coff_offset = @encoded.ptr
33
+ if @mz.encoded.empty?
34
+ @mz.encoded << @encoded[0, @coff_offset-4]
35
+ @mz.encoded.ptr = 0
36
+ @mz.decode_header
37
+ end
38
+ super()
39
+ end
40
+
41
+ # creates a default MZ file to be used in the PE header
42
+ # this one is specially crafted to fit in the 0x3c bytes before the signature
43
+ def encode_default_mz_header
44
+ # XXX use single-quoted source, to avoid ruby interpretation of \r\n
45
+ @mz.cpu = Ia32.new(386, 16)
46
+ @mz.assemble <<'EOMZSTUB'
47
+ db "Needs Win32!\r\n$"
48
+ .entrypoint
49
+ push cs
50
+ pop ds
51
+ xor dx, dx ; ds:dx = addr of $-terminated string
52
+ mov ah, 9 ; output string
53
+ int 21h
54
+ mov ax, 4c01h ; exit with code in al
55
+ int 21h
56
+ EOMZSTUB
57
+
58
+ mzparts = @mz.pre_encode
59
+
60
+ # put stuff before 0x3c
61
+ @mz.encoded << mzparts.shift
62
+ raise 'OH NOES !!1!!!1!' if @mz.encoded.virtsize > 0x3c # MZ header is too long, cannot happen
63
+ until mzparts.empty?
64
+ break if mzparts.first.virtsize + @mz.encoded.virtsize > 0x3c
65
+ @mz.encoded << mzparts.shift
66
+ end
67
+
68
+ # set PE signature pointer
69
+ @mz.encoded.align 0x3c
70
+ @mz.encoded << encode_word('pesigptr')
71
+
72
+ # put last parts of the MZ program
73
+ until mzparts.empty?
74
+ @mz.encoded << mzparts.shift
75
+ end
76
+
77
+ # ensure the sig will be 8bytes-aligned
78
+ @mz.encoded.align 8
79
+
80
+ @mz.encoded.fixup 'pesigptr' => @mz.encoded.virtsize
81
+ @mz.encoded.fixup @mz.encoded.binding
82
+ @mz.encoded.fill
83
+ @mz.encode_fix_checksum
84
+ end
85
+
86
+ # encodes the PE header before the COFF header, uses a default mz header if none defined
87
+ # the MZ header must have 0x3c pointing just past its last byte which should be 8bytes aligned
88
+ # the 2 1st bytes of the MZ header should be 'MZ'
89
+ def encode_header(*a)
90
+ encode_default_mz_header if @mz.encoded.empty?
91
+
92
+ @encoded << @mz.encoded.dup
93
+
94
+ # append the PE signature
95
+ @signature ||= MAGIC
96
+ @encoded << @signature
97
+
98
+ super(*a)
99
+ end
100
+
101
+ # a returns a new PE with only minimal information copied:
102
+ # section name/perm/addr/content
103
+ # exports
104
+ # imports (with boundimport cleared)
105
+ # resources
106
+ def mini_copy(share_ns=true)
107
+ ret = self.class.new(@cpu)
108
+ ret.share_namespace(self) if share_ns
109
+ ret.header.machine = @header.machine
110
+ ret.header.characteristics = @header.characteristics
111
+ ret.optheader.entrypoint = @optheader.entrypoint
112
+ ret.optheader.image_base = @optheader.image_base
113
+ ret.optheader.subsystem = @optheader.subsystem
114
+ ret.optheader.dll_characts = @optheader.dll_characts
115
+ @sections.each { |s|
116
+ rs = Section.new
117
+ rs.name = s.name
118
+ rs.virtaddr = s.virtaddr
119
+ rs.characteristics = s.characteristics
120
+ rs.encoded = s.encoded
121
+ ret.sections << s
122
+ }
123
+ ret.resource = resource
124
+ ret.tls = tls
125
+ if imports
126
+ ret.imports = @imports.map { |id| id.dup }
127
+ ret.imports.each { |id|
128
+ id.timestamp = id.firstforwarder =
129
+ id.ilt_p = id.libname_p = nil
130
+ }
131
+ end
132
+ ret.export = export
133
+ ret
134
+ end
135
+
136
+ def c_set_default_entrypoint
137
+ return if @optheader.entrypoint
138
+ if @sections.find { |s| s.encoded.export['main'] }
139
+ @optheader.entrypoint = 'main'
140
+ elsif @sections.find { |s| s.encoded.export['DllEntryPoint'] }
141
+ @optheader.entrypoint = 'DllEntryPoint'
142
+ elsif @sections.find { |s| s.encoded.export['DllMain'] }
143
+ case @cpu.shortname
144
+ when 'ia32'
145
+ @optheader.entrypoint = 'DllEntryPoint'
146
+ compile_c <<EOS
147
+ enum { DLL_PROCESS_DETACH, DLL_PROCESS_ATTACH, DLL_THREAD_ATTACH, DLL_THREAD_DETACH, DLL_PROCESS_VERIFIER };
148
+ __stdcall int DllMain(void *handle, unsigned long reason, void *reserved);
149
+ __stdcall int DllEntryPoint(void *handle, unsigned long reason, void *reserved) {
150
+ int ret = DllMain(handle, reason, reserved);
151
+ if (ret == 0 && reason == DLL_PROCESS_ATTACH)
152
+ DllMain(handle, DLL_PROCESS_DETACH, reserved);
153
+ return ret;
154
+ }
155
+ EOS
156
+ else
157
+ @optheader.entrypoint = 'DllMain'
158
+ end
159
+ elsif @sections.find { |s| s.encoded.export['WinMain'] }
160
+ case @cpu.shortname
161
+ when 'ia32'
162
+ @optheader.entrypoint = 'main'
163
+ compile_c <<EOS
164
+ #define GetCommandLine GetCommandLineA
165
+ #define GetModuleHandle GetModuleHandleA
166
+ #define GetStartupInfo GetStartupInfoA
167
+ #define STARTF_USESHOWWINDOW 0x00000001
168
+ #define SW_SHOWDEFAULT 10
169
+
170
+ typedef unsigned long DWORD;
171
+ typedef unsigned short WORD;
172
+ typedef struct {
173
+ DWORD cb; char *lpReserved, *lpDesktop, *lpTitle;
174
+ DWORD dwX, dwY, dwXSize, dwYSize, dwXCountChars, dwYCountChars, dwFillAttribute, dwFlags;
175
+ WORD wShowWindow, cbReserved2; char *lpReserved2;
176
+ void *hStdInput, *hStdOutput, *hStdError;
177
+ } STARTUPINFO;
178
+
179
+ __stdcall void *GetModuleHandleA(const char *lpModuleName);
180
+ __stdcall void GetStartupInfoA(STARTUPINFO *lpStartupInfo);
181
+ __stdcall void ExitProcess(unsigned int uExitCode);
182
+ __stdcall char *GetCommandLineA(void);
183
+ __stdcall int WinMain(void *hInstance, void *hPrevInstance, char *lpCmdLine, int nShowCmd);
184
+
185
+ int main(void) {
186
+ STARTUPINFO startupinfo;
187
+ startupinfo.cb = sizeof(STARTUPINFO);
188
+ char *cmd = GetCommandLine();
189
+ int ret;
190
+
191
+ if (*cmd == '"') {
192
+ cmd++;
193
+ while (*cmd && *cmd != '"') {
194
+ if (*cmd == '\\\\') cmd++;
195
+ cmd++;
196
+ }
197
+ if (*cmd == '"') cmd++;
198
+ } else
199
+ while (*cmd && *cmd != ' ') cmd++;
200
+ while (*cmd == ' ') cmd++;
201
+
202
+ GetStartupInfo(&startupinfo);
203
+ ret = WinMain(GetModuleHandle(0), 0, cmd, (startupinfo.dwFlags & STARTF_USESHOWWINDOW) ? (int)startupinfo.wShowWindow : (int)SW_SHOWDEFAULT);
204
+ ExitProcess((DWORD)ret);
205
+ return ret;
206
+ }
207
+ EOS
208
+ else
209
+ @optheader.entrypoint = 'WinMain'
210
+ end
211
+ end
212
+ end
213
+
214
+ # handles writes to fs:[0] -> dasm SEH handler (first only, does not follow the chain)
215
+ # TODO seh prototype (args => context)
216
+ # TODO hook on (non)resolution of :w xref
217
+ def get_xrefs_x(dasm, di)
218
+ if @cpu.shortname =~ /ia32|x64/ and a = di.instruction.args.first and a.kind_of? Ia32::ModRM and a.seg and a.seg.val == 4 and
219
+ w = get_xrefs_rw(dasm, di).find { |type, ptr, len| type == :w and ptr.externals.include? 'segment_base_fs' } and
220
+ dasm.backtrace(Expression[w[1], :-, 'segment_base_fs'], di.address).to_a.include?(Expression[0])
221
+ sehptr = w[1]
222
+ sz = @cpu.size/8
223
+ sehptr = Indirection.new(Expression[Indirection.new(sehptr, sz, di.address), :+, sz], sz, di.address)
224
+ a = dasm.backtrace(sehptr, di.address, :include_start => true, :origin => di.address, :type => :x, :detached => true)
225
+ puts "backtrace seh from #{di} => #{a.map { |addr| Expression[addr] }.join(', ')}" if $VERBOSE
226
+ a.each { |aa|
227
+ next if aa == Expression::Unknown
228
+ l = dasm.auto_label_at(aa, 'seh', 'loc', 'sub')
229
+ dasm.addrs_todo << [aa]
230
+ }
231
+ super(dasm, di)
232
+ else
233
+ super(dasm, di)
234
+ end
235
+ end
236
+
237
+ # returns a disassembler with a special decodedfunction for GetProcAddress (i386 only), and the default func
238
+ def init_disassembler
239
+ d = super()
240
+ d.backtrace_maxblocks_data = 4
241
+ case @cpu.shortname
242
+ when 'ia32', 'x64'
243
+ old_cp = d.c_parser
244
+ d.c_parser = nil
245
+ d.parse_c '__stdcall void *GetProcAddress(int, char *);'
246
+ d.c_parser.lexer.define_weak('__MS_X86_64_ABI__') if @cpu.kind_of? X86_64
247
+ gpa = @cpu.decode_c_function_prototype(d.c_parser, 'GetProcAddress')
248
+ d.c_parser = old_cp
249
+ d.parse_c ''
250
+ d.c_parser.lexer.define_weak('__MS_X86_64_ABI__') if @cpu.kind_of? X86_64
251
+ @getprocaddr_unknown = []
252
+ gpa.btbind_callback = lambda { |dasm, bind, funcaddr, calladdr, expr, origin, maxdepth|
253
+ break bind if @getprocaddr_unknown.include? [dasm, calladdr] or not Expression[expr].externals.include? :eax
254
+ sz = @cpu.size/8
255
+ break bind if not dasm.decoded[calladdr]
256
+ if @cpu.kind_of? X86_64
257
+ arg2 = :rdx
258
+ else
259
+ arg2 = Indirection[[:esp, :+, 2*sz], sz, calladdr]
260
+ end
261
+ fnaddr = dasm.backtrace(arg2, calladdr, :include_start => true, :maxdepth => maxdepth)
262
+ if fnaddr.kind_of? ::Array and fnaddr.length == 1 and s = dasm.get_section_at(fnaddr.first) and fn = s[0].read(64) and i = fn.index(?\0) and i > sz # try to avoid ordinals
263
+ bind = bind.merge @cpu.register_symbols[0] => Expression[fn[0, i]]
264
+ else
265
+ @getprocaddr_unknown << [dasm, calladdr]
266
+ puts "unknown func name for getprocaddress from #{Expression[calladdr]}" if $VERBOSE
267
+ end
268
+ bind
269
+ }
270
+ d.function[Expression['GetProcAddress']] = gpa
271
+ d.function[:default] = @cpu.disassembler_default_func
272
+ end
273
+ d
274
+ end
275
+
276
+ def module_name
277
+ export and @export.libname
278
+ end
279
+
280
+ def module_address
281
+ @optheader.image_base
282
+ end
283
+
284
+ def module_size
285
+ @sections.map { |s_| s_.virtaddr + s_.virtsize }.max || 0
286
+ end
287
+
288
+ def module_symbols
289
+ syms = [['entrypoint', @optheader.entrypoint]]
290
+ @export.exports.to_a.each { |e|
291
+ next if not e.target
292
+ name = e.name || "ord_#{e.ordinal}"
293
+ syms << [name, label_rva(e.target)]
294
+ } if export
295
+ syms
296
+ end
297
+ end
298
+
299
+ # an instance of a PE file, loaded in memory
300
+ # just change the rva_to_off and the section content decoding methods
301
+ class LoadedPE < PE
302
+ attr_accessor :load_address
303
+
304
+ # use the virtualaddr/virtualsize fields of the section header
305
+ def decode_section_body(s)
306
+ s.encoded = @encoded[s.virtaddr, s.virtsize] || EncodedData.new
307
+ end
308
+
309
+ # no need to decode relocations on an already mapped image
310
+ def decode_relocs
311
+ end
312
+
313
+ # reads a loaded PE from memory, returns a PE object
314
+ # dumps the header, optheader and all sections ; try to rebuild IAT (#memdump_imports)
315
+ def self.memdump(memory, baseaddr, entrypoint = nil, iat_p=nil)
316
+ loaded = LoadedPE.load memory[baseaddr, 0x1000_0000]
317
+ loaded.load_address = baseaddr
318
+ loaded.decode
319
+
320
+ dump = PE.new(loaded.cpu_from_headers)
321
+ dump.share_namespace loaded
322
+ dump.optheader.image_base = baseaddr
323
+ dump.optheader.entrypoint = (entrypoint || loaded.optheader.entrypoint + baseaddr) - baseaddr
324
+ dump.directory['resource_table'] = loaded.directory['resource_table']
325
+
326
+ loaded.sections.each { |s|
327
+ ss = Section.new
328
+ ss.name = s.name
329
+ ss.virtaddr = s.virtaddr
330
+ ss.encoded = s.encoded
331
+ ss.characteristics = s.characteristics
332
+ dump.sections << ss
333
+ }
334
+
335
+ loaded.memdump_imports(memory, dump, iat_p)
336
+
337
+ dump
338
+ end
339
+
340
+ # rebuilds an IAT from the loaded pe and the memory
341
+ # for each loaded iat, find the matching dll in memory
342
+ # for each loaded iat entry, retrieve the exported name from the loaded dll
343
+ # OR
344
+ # from a base iat address in memory (unk_iat_p, rva), retrieve the 1st dll, find
345
+ # all iat pointers/forwarders to this dll, on failure try to find another dll
346
+ # allows gaps of 5 invalid pointers between libraries
347
+ # dll found by scanning pages 16 by 16 backward from the first iat address (XXX the 1st must not be forwarded)
348
+ # TODO bound imports
349
+ def memdump_imports(memory, dump, unk_iat_p=nil)
350
+ puts 'rebuilding imports...' if $VERBOSE
351
+ if unk_iat_p
352
+ # read iat data from unk_iat_p
353
+ iat_p = unk_iat_p
354
+ else
355
+ return if not imports
356
+ # read iat data from @imports
357
+ imports = @imports.dup
358
+ imports.each { |id| id.iat = id.iat.dup }
359
+ iat_p = imports.first.iat_p # used for iat_p
360
+ end
361
+
362
+ failcnt = 0 # bad pointers in iat table (unk_ only)
363
+ dump.imports ||= []
364
+ loaded_dll = nil # the dll from who we're importing the current importdirectory
365
+ ptrsz = (@optheader.signature == 'PE+' ? 8 : 4)
366
+ cache = [] # optimize forwarder target search
367
+ loop do
368
+ if unk_iat_p
369
+ # read imported pointer from the table
370
+ ptr = decode_xword(EncodedData.new(memory[@load_address + iat_p, ptrsz]))
371
+ iat_p += ptrsz
372
+ else
373
+ # read imported pointer from the import structure
374
+ while not ptr = imports.first.iat.shift
375
+ load_dll = nil
376
+ imports.shift
377
+ break if imports.empty?
378
+ iat_p = imports.first.iat_p
379
+ end
380
+ break if imports.empty?
381
+ iat_p += ptrsz
382
+ end
383
+
384
+ if not loaded_dll or not e = loaded_dll.export.exports.find { |e_| loaded_dll.label_rva(e_.target) == ptr - loaded_dll.load_address }
385
+ # points to unknown space
386
+ # find pointed module start
387
+ if not dll = cache.find { |dll_| ptr >= dll_.load_address and ptr < dll_.load_address + dll_.optheader.image_size }
388
+ addr = ptr & ~0xffff
389
+ 256.times { break if memory[addr, 2] == MZ::MAGIC or addr < 0x10000 ; addr -= 0x10000 }
390
+ if memory[addr, 2] == MZ::MAGIC
391
+ dll = LoadedPE.load memory[addr, 0x1000_0000]
392
+ dll.load_address = addr
393
+ dll.decode_header
394
+ dll.decode_exports
395
+ cache << dll
396
+ end
397
+ end
398
+ if dll and dll.export and e = dll.export.exports.find { |e_| dll.label_rva(e_.target) == ptr - dll.load_address }
399
+ if loaded_dll and ee = loaded_dll.export.exports.find { |ee_| ee_.forwarder_name == e.name }
400
+ # it's a forwarder from the current loaded_dll
401
+ puts "forwarder #{ee.name} -> #{dll.export.libname}!#{e.name}" if $DEBUG
402
+ e = ee
403
+ else
404
+ # new library, start a new importdirectory
405
+ # XXX if 1st import is forwarded, loaded_dll will points to the bad module...
406
+ loaded_dll = dll
407
+ id = ImportDirectory.new
408
+ id.libname = loaded_dll.export.libname
409
+ puts "lib #{id.libname}" if $VERBOSE
410
+ id.imports = []
411
+ id.iat_p = iat_p - ptrsz
412
+ dump.imports << id
413
+ end
414
+ else
415
+ puts 'unknown ptr %x' % ptr if $DEBUG
416
+ # allow holes in the unk_iat_p table
417
+ break if not unk_iat_p or failcnt > 4
418
+ failcnt += 1
419
+ next
420
+ end
421
+ failcnt = 0
422
+ end
423
+
424
+ # dumped last importdirectory is correct, append the import field
425
+ i = ImportDirectory::Import.new
426
+ if e.name
427
+ puts e.name if $DEBUG
428
+ i.name = e.name
429
+ else
430
+ puts "##{e.ordinal}" if $DEBUG
431
+ i.ordinal = e.ordinal
432
+ end
433
+ dump.imports.last.imports << i
434
+ end
435
+ end
436
+ end
437
+ end
@@ -0,0 +1,246 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+ module Metasm
7
+ # a class representing a simple structure serialized in a binary
8
+ class SerialStruct
9
+ # hash shared by all classes
10
+ # key = class, value = array of fields
11
+ # field = array [name, decode...]
12
+ @@fields = {}
13
+ NAME=0
14
+ DECODE=1
15
+ ENCODE=2
16
+ DEFVAL=3
17
+ ENUM=4
18
+ BITS=5
19
+
20
+ class << self
21
+ # defines a new field
22
+ # adds an accessor
23
+ def new_field(name, decode, encode, defval, enum=nil, bits=nil)
24
+ if name
25
+ attr_accessor name
26
+ name = "@#{name}".to_sym
27
+ end
28
+ (@@fields[self] ||= []) << [name, decode, encode, defval, enum, bits]
29
+ end
30
+
31
+ # creates a field constructor for a simple integer
32
+ # relies on exe implementing (en,de)code_#{type}
33
+ def new_int_field(*types)
34
+ recv = class << self ; self ; end
35
+ types.each { |type|
36
+ recv.send(:define_method, type) { |name, *args|
37
+ new_field(name, "decode_#{type}".to_sym, "encode_#{type}".to_sym, args[0] || 0, args[1])
38
+ }
39
+
40
+ # shortcut to define multiple fields of this type with default values
41
+ recv.send(:define_method, "#{type}s") { |*names|
42
+ names.each { |name| send type, name }
43
+ }
44
+ }
45
+ end
46
+
47
+ # standard fields:
48
+
49
+ # a fixed-size memory chunk
50
+ def mem(name, len, defval='')
51
+ new_field(name, lambda { |exe, me| exe.curencoded.read(len) }, lambda { |exe, me, val| val[0, len].ljust(len, 0.chr) }, defval)
52
+ end
53
+ # a fixed-size string, 0-padded
54
+ def str(name, len, defval='')
55
+ e = lambda { |exe, me, val| val[0, len].ljust(len, 0.chr) }
56
+ d = lambda { |exe, me| v = exe.curencoded.read(len) ; v = v[0, v.index(?\0)] if v.index(?\0) ; v }
57
+ new_field(name, d, e, defval)
58
+ end
59
+ # 0-terminated string
60
+ def strz(name, defval='')
61
+ d = lambda { |exe, me|
62
+ ed = exe.curencoded
63
+ ed.read(ed.data.index(?\0, ed.ptr)-ed.ptr+1).chop
64
+ }
65
+ e = lambda { |exe, me, val| val + 0.chr }
66
+ new_field(name, d, e, defval)
67
+ end
68
+
69
+ # field access
70
+ def fld_get(name)
71
+ name = "@#{name}".to_sym
72
+ @@fields[self].find { |f| f[NAME] == name }
73
+ end
74
+
75
+ # change the default for a field
76
+ def fld_default(name, default=nil, &b)
77
+ default ||= b
78
+ fld_get(name)[DEFVAL] = default
79
+ end
80
+ def fld_enum(name, enum=nil, &b) fld_get(name)[ENUM] = enum||b end
81
+ def fld_bits(name, bits=nil, &b) fld_get(name)[BITS] = bits||b end
82
+
83
+ # define a bitfield: many fields inside a single word/byte/whatever
84
+ # usage: bitfield :word, 0 => :lala, 1 => nil, 4 => :lolo, 8 => :foo
85
+ # => a bitfield read using exe.decode_word, containing 3 subfields:
86
+ # :lala (bits 0...1), (discard 3 bits), :lolo (bits 4...8), and :foo (bits 8..-1)
87
+ # fields default to 0
88
+ def bitfield(inttype, h)
89
+ # XXX encode/decode very not threadsafe ! this is a Georges Foreman Guarantee.
90
+ # could use a me.instance_variable..
91
+
92
+ # decode the value in a temp var
93
+ d = lambda { |exe, me| @bitfield_val = exe.send("decode_#{inttype}") }
94
+ # reset a temp var
95
+ e = lambda { |exe, me, val| @bitfield_val = 0 ; nil }
96
+ new_field(nil, d, e, nil)
97
+
98
+ h = h.sort
99
+ h.length.times { |i|
100
+ # yay closure !
101
+ # get field parameters
102
+ next if not name = h[i][1]
103
+ off = h[i][0]
104
+ nxt = h[i+1]
105
+ mask = (nxt ? (1 << (nxt[0]-off))-1 : -1)
106
+ # read the field value from the temp var
107
+ d = lambda { |exe, me| (@bitfield_val >> off) & mask }
108
+ # update the temp var with the field value, return nil
109
+ e = lambda { |exe, me, val| @bitfield_val |= (val & mask) << off ; nil }
110
+ new_field(name, d, e, 0)
111
+ }
112
+
113
+ # free the temp var
114
+ d = lambda { |exe, me| @bitfield_val = nil }
115
+ # return encoded temp var
116
+ e = lambda { |exe, me, val|
117
+ val = @bitfield_val
118
+ @bitfield_val = nil
119
+ exe.send("encode_#{inttype}", val)
120
+ }
121
+ new_field(nil, d, e, nil)
122
+ end
123
+
124
+ # inject a hook to be run during the decoding process
125
+ def decode_hook(before=nil, &b)
126
+ idx = (before ? @@fields[self].index(fld_get(before)) : -1)
127
+ @@fields[self].insert(idx, [nil, b])
128
+ end
129
+ end # class methods
130
+
131
+ # standard int fields
132
+ new_int_field :byte, :half, :word
133
+
134
+ # set value of fields from argument list, runs int_to_hash if needed
135
+ def initialize(*a)
136
+ if not a.empty?
137
+ a.zip(struct_fields.reject { |f| not f[NAME] }).each { |v, f|
138
+ v = int_to_hash(v, f[ENUM]) if f[ENUM]
139
+ v = bits_to_hash(v, f[BITS]) if f[BITS]
140
+ instance_variable_set f[NAME], v
141
+ }
142
+ end
143
+ end
144
+
145
+ # returns this classes' field array
146
+ # uses struct_specialized if defined (a method that returns another
147
+ # SerialStruct class whose fields should be used)
148
+ def struct_fields(exe=nil)
149
+ klass = self.class
150
+ klass = struct_specialized(exe) if respond_to? :struct_specialized
151
+ raise "SerialStruct: no fields for #{klass}" if $DEBUG and not @@fields[klass]
152
+ @@fields[klass]
153
+ end
154
+
155
+ # decodes the fields from the exe
156
+ def decode(exe, *args)
157
+ struct_fields(exe).each { |f|
158
+ case d = f[DECODE]
159
+ when Symbol; val = exe.send(d, *args)
160
+ when Array; val = exe.send(*d)
161
+ when Proc; val = d[exe, self]
162
+ when nil; next
163
+ end
164
+ next if not f[NAME]
165
+ if h = f[ENUM]; h = h[exe, self] if h.kind_of? Proc; val = int_to_hash( val, h) end
166
+ if h = f[BITS]; h = h[exe, self] if h.kind_of? Proc; val = bits_to_hash(val, h) end
167
+ instance_variable_set(f[NAME], val)
168
+ }
169
+ end
170
+
171
+ # initialize uninitialized fields
172
+ def set_default_values(exe)
173
+ struct_fields(exe).each { |f|
174
+ if not f[NAME]
175
+ f[DEFVAL][exe, self] if f[DEFVAL]
176
+ next
177
+ end
178
+ # check existence to avoid a "warning: ivar @bla not initialized"
179
+ next if instance_variables.map { |ivn| ivn.to_sym }.include?(f[NAME]) and instance_variable_get(f[NAME])
180
+ val = f[DEFVAL]
181
+ val = val[exe, self] if val.kind_of? Proc
182
+ if val.kind_of? Integer and h = f[ENUM]; h = h[exe, self] if h.kind_of? Proc; val = int_to_hash( val, h) end
183
+ if val.kind_of? Integer and h = f[BITS]; h = h[exe, self] if h.kind_of? Proc; val = bits_to_hash(val, h) end
184
+ instance_variable_set(f[NAME], val)
185
+ }
186
+ end
187
+
188
+ # sets default values, then encodes the fields, returns an EData
189
+ def encode(exe, *a)
190
+ set_default_values(exe, *a)
191
+
192
+ ed = EncodedData.new
193
+ struct_fields(exe).each { |f|
194
+ if not f[NAME]
195
+ ed << f[ENCODE][exe, self, nil] if f[ENCODE]
196
+ next
197
+ end
198
+ val = instance_variable_get(f[NAME])
199
+ if h = f[ENUM]; h = h[exe, self] if h.kind_of? Proc; val = int_from_hash( val, h) end
200
+ if h = f[BITS]; h = h[exe, self] if h.kind_of? Proc; val = bits_from_hash(val, h) end
201
+ case e = f[ENCODE]
202
+ when Symbol; val = exe.send(e, val)
203
+ when Array; val = exe.send(e, *val)
204
+ when Proc; val = e[exe, self, val]
205
+ when nil; next
206
+ end
207
+ ed << val
208
+ }
209
+ ed
210
+ end
211
+
212
+ # shortcut to create a new instance and decode it
213
+ def self.decode(*a)
214
+ s = new
215
+ s.decode(*a)
216
+ s
217
+ end
218
+
219
+ def dump(e, a)
220
+ case e
221
+ when Integer; e >= 0x100 ? '0x%X'%e : e
222
+ when String; e.length > 64 ? e[0, 62].inspect+'...' : e.inspect
223
+ when Array; '[' + e.map { |i| dump(i, a) }.join(', ') + ']'
224
+ when SerialStruct; a.include?(e) ? '...' : e.to_s(a)
225
+ else e.inspect
226
+ end
227
+ end
228
+
229
+ # displays the struct content, ordered by fields
230
+ def to_s(a=[])
231
+ ivs = instance_variables.map { |iv| iv.to_sym }
232
+ ivs = (struct_fields.to_a.map { |f| f[NAME] }.compact & ivs) | ivs
233
+ "<#{self.class} " + ivs.map { |iv| "#{iv}=#{dump(instance_variable_get(iv), a+[self])}" }.join(' ') + ">"
234
+ end
235
+ end
236
+
237
+ class ExeFormat
238
+ def curencoded; encoded; end
239
+ def decode_strz(ed = curencoded)
240
+ if stop = ed.data.index(?\0, ed.ptr)
241
+ ed.read(stop - ed.ptr + 1).chop
242
+ else ''
243
+ end
244
+ end
245
+ end
246
+ end