metasm 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. data/BUGS +11 -0
  2. data/CREDITS +17 -0
  3. data/README +270 -0
  4. data/TODO +114 -0
  5. data/doc/code_organisation.txt +146 -0
  6. data/doc/const_missing.txt +16 -0
  7. data/doc/core_classes.txt +75 -0
  8. data/doc/feature_list.txt +53 -0
  9. data/doc/index.txt +59 -0
  10. data/doc/install_notes.txt +170 -0
  11. data/doc/style.css +3 -0
  12. data/doc/use_cases.txt +18 -0
  13. data/lib/metasm.rb +80 -0
  14. data/lib/metasm/arm.rb +12 -0
  15. data/lib/metasm/arm/debug.rb +39 -0
  16. data/lib/metasm/arm/decode.rb +167 -0
  17. data/lib/metasm/arm/encode.rb +77 -0
  18. data/lib/metasm/arm/main.rb +75 -0
  19. data/lib/metasm/arm/opcodes.rb +177 -0
  20. data/lib/metasm/arm/parse.rb +130 -0
  21. data/lib/metasm/arm/render.rb +55 -0
  22. data/lib/metasm/compile_c.rb +1457 -0
  23. data/lib/metasm/dalvik.rb +8 -0
  24. data/lib/metasm/dalvik/decode.rb +196 -0
  25. data/lib/metasm/dalvik/main.rb +60 -0
  26. data/lib/metasm/dalvik/opcodes.rb +366 -0
  27. data/lib/metasm/decode.rb +213 -0
  28. data/lib/metasm/decompile.rb +2659 -0
  29. data/lib/metasm/disassemble.rb +2068 -0
  30. data/lib/metasm/disassemble_api.rb +1280 -0
  31. data/lib/metasm/dynldr.rb +1329 -0
  32. data/lib/metasm/encode.rb +333 -0
  33. data/lib/metasm/exe_format/a_out.rb +194 -0
  34. data/lib/metasm/exe_format/autoexe.rb +82 -0
  35. data/lib/metasm/exe_format/bflt.rb +189 -0
  36. data/lib/metasm/exe_format/coff.rb +455 -0
  37. data/lib/metasm/exe_format/coff_decode.rb +901 -0
  38. data/lib/metasm/exe_format/coff_encode.rb +1078 -0
  39. data/lib/metasm/exe_format/dex.rb +457 -0
  40. data/lib/metasm/exe_format/dol.rb +145 -0
  41. data/lib/metasm/exe_format/elf.rb +923 -0
  42. data/lib/metasm/exe_format/elf_decode.rb +979 -0
  43. data/lib/metasm/exe_format/elf_encode.rb +1375 -0
  44. data/lib/metasm/exe_format/macho.rb +827 -0
  45. data/lib/metasm/exe_format/main.rb +228 -0
  46. data/lib/metasm/exe_format/mz.rb +164 -0
  47. data/lib/metasm/exe_format/nds.rb +172 -0
  48. data/lib/metasm/exe_format/pe.rb +437 -0
  49. data/lib/metasm/exe_format/serialstruct.rb +246 -0
  50. data/lib/metasm/exe_format/shellcode.rb +114 -0
  51. data/lib/metasm/exe_format/xcoff.rb +167 -0
  52. data/lib/metasm/gui.rb +23 -0
  53. data/lib/metasm/gui/cstruct.rb +373 -0
  54. data/lib/metasm/gui/dasm_coverage.rb +199 -0
  55. data/lib/metasm/gui/dasm_decomp.rb +369 -0
  56. data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
  57. data/lib/metasm/gui/dasm_graph.rb +1354 -0
  58. data/lib/metasm/gui/dasm_hex.rb +543 -0
  59. data/lib/metasm/gui/dasm_listing.rb +599 -0
  60. data/lib/metasm/gui/dasm_main.rb +906 -0
  61. data/lib/metasm/gui/dasm_opcodes.rb +291 -0
  62. data/lib/metasm/gui/debug.rb +1228 -0
  63. data/lib/metasm/gui/gtk.rb +884 -0
  64. data/lib/metasm/gui/qt.rb +495 -0
  65. data/lib/metasm/gui/win32.rb +3004 -0
  66. data/lib/metasm/gui/x11.rb +621 -0
  67. data/lib/metasm/ia32.rb +14 -0
  68. data/lib/metasm/ia32/compile_c.rb +1523 -0
  69. data/lib/metasm/ia32/debug.rb +193 -0
  70. data/lib/metasm/ia32/decode.rb +1167 -0
  71. data/lib/metasm/ia32/decompile.rb +564 -0
  72. data/lib/metasm/ia32/encode.rb +314 -0
  73. data/lib/metasm/ia32/main.rb +233 -0
  74. data/lib/metasm/ia32/opcodes.rb +872 -0
  75. data/lib/metasm/ia32/parse.rb +327 -0
  76. data/lib/metasm/ia32/render.rb +91 -0
  77. data/lib/metasm/main.rb +1193 -0
  78. data/lib/metasm/mips.rb +11 -0
  79. data/lib/metasm/mips/compile_c.rb +7 -0
  80. data/lib/metasm/mips/decode.rb +253 -0
  81. data/lib/metasm/mips/encode.rb +51 -0
  82. data/lib/metasm/mips/main.rb +72 -0
  83. data/lib/metasm/mips/opcodes.rb +443 -0
  84. data/lib/metasm/mips/parse.rb +51 -0
  85. data/lib/metasm/mips/render.rb +43 -0
  86. data/lib/metasm/os/gnu_exports.rb +270 -0
  87. data/lib/metasm/os/linux.rb +1112 -0
  88. data/lib/metasm/os/main.rb +1686 -0
  89. data/lib/metasm/os/remote.rb +527 -0
  90. data/lib/metasm/os/windows.rb +2027 -0
  91. data/lib/metasm/os/windows_exports.rb +745 -0
  92. data/lib/metasm/parse.rb +876 -0
  93. data/lib/metasm/parse_c.rb +3938 -0
  94. data/lib/metasm/pic16c/decode.rb +42 -0
  95. data/lib/metasm/pic16c/main.rb +17 -0
  96. data/lib/metasm/pic16c/opcodes.rb +68 -0
  97. data/lib/metasm/ppc.rb +11 -0
  98. data/lib/metasm/ppc/decode.rb +264 -0
  99. data/lib/metasm/ppc/decompile.rb +251 -0
  100. data/lib/metasm/ppc/encode.rb +51 -0
  101. data/lib/metasm/ppc/main.rb +129 -0
  102. data/lib/metasm/ppc/opcodes.rb +410 -0
  103. data/lib/metasm/ppc/parse.rb +52 -0
  104. data/lib/metasm/preprocessor.rb +1277 -0
  105. data/lib/metasm/render.rb +130 -0
  106. data/lib/metasm/sh4.rb +8 -0
  107. data/lib/metasm/sh4/decode.rb +336 -0
  108. data/lib/metasm/sh4/main.rb +292 -0
  109. data/lib/metasm/sh4/opcodes.rb +381 -0
  110. data/lib/metasm/x86_64.rb +12 -0
  111. data/lib/metasm/x86_64/compile_c.rb +1025 -0
  112. data/lib/metasm/x86_64/debug.rb +59 -0
  113. data/lib/metasm/x86_64/decode.rb +268 -0
  114. data/lib/metasm/x86_64/encode.rb +264 -0
  115. data/lib/metasm/x86_64/main.rb +135 -0
  116. data/lib/metasm/x86_64/opcodes.rb +118 -0
  117. data/lib/metasm/x86_64/parse.rb +68 -0
  118. data/misc/bottleneck.rb +61 -0
  119. data/misc/cheader-findpppath.rb +58 -0
  120. data/misc/hexdiff.rb +74 -0
  121. data/misc/hexdump.rb +55 -0
  122. data/misc/metasm-all.rb +13 -0
  123. data/misc/objdiff.rb +47 -0
  124. data/misc/objscan.rb +40 -0
  125. data/misc/pdfparse.rb +661 -0
  126. data/misc/ppc_pdf2oplist.rb +192 -0
  127. data/misc/tcp_proxy_hex.rb +84 -0
  128. data/misc/txt2html.rb +440 -0
  129. data/samples/a.out.rb +31 -0
  130. data/samples/asmsyntax.rb +77 -0
  131. data/samples/bindiff.rb +555 -0
  132. data/samples/compilation-steps.rb +49 -0
  133. data/samples/cparser_makestackoffset.rb +55 -0
  134. data/samples/dasm-backtrack.rb +38 -0
  135. data/samples/dasmnavig.rb +318 -0
  136. data/samples/dbg-apihook.rb +228 -0
  137. data/samples/dbghelp.rb +143 -0
  138. data/samples/disassemble-gui.rb +102 -0
  139. data/samples/disassemble.rb +133 -0
  140. data/samples/dump_upx.rb +95 -0
  141. data/samples/dynamic_ruby.rb +1929 -0
  142. data/samples/elf_list_needed.rb +46 -0
  143. data/samples/elf_listexports.rb +33 -0
  144. data/samples/elfencode.rb +25 -0
  145. data/samples/exeencode.rb +128 -0
  146. data/samples/factorize-headers-elfimports.rb +77 -0
  147. data/samples/factorize-headers-peimports.rb +109 -0
  148. data/samples/factorize-headers.rb +43 -0
  149. data/samples/gdbclient.rb +583 -0
  150. data/samples/generate_libsigs.rb +102 -0
  151. data/samples/hotfix_gtk_dbg.rb +59 -0
  152. data/samples/install_win_env.rb +78 -0
  153. data/samples/lindebug.rb +924 -0
  154. data/samples/linux_injectsyscall.rb +95 -0
  155. data/samples/machoencode.rb +31 -0
  156. data/samples/metasm-shell.rb +91 -0
  157. data/samples/pe-hook.rb +69 -0
  158. data/samples/pe-ia32-cpuid.rb +203 -0
  159. data/samples/pe-mips.rb +35 -0
  160. data/samples/pe-shutdown.rb +78 -0
  161. data/samples/pe-testrelocs.rb +51 -0
  162. data/samples/pe-testrsrc.rb +24 -0
  163. data/samples/pe_listexports.rb +31 -0
  164. data/samples/peencode.rb +19 -0
  165. data/samples/peldr.rb +494 -0
  166. data/samples/preprocess-flatten.rb +19 -0
  167. data/samples/r0trace.rb +308 -0
  168. data/samples/rubstop.rb +399 -0
  169. data/samples/scan_pt_gnu_stack.rb +54 -0
  170. data/samples/scanpeexports.rb +62 -0
  171. data/samples/shellcode-c.rb +40 -0
  172. data/samples/shellcode-dynlink.rb +146 -0
  173. data/samples/source.asm +34 -0
  174. data/samples/struct_offset.rb +47 -0
  175. data/samples/testpe.rb +32 -0
  176. data/samples/testraw.rb +45 -0
  177. data/samples/win32genloader.rb +132 -0
  178. data/samples/win32hooker-advanced.rb +169 -0
  179. data/samples/win32hooker.rb +96 -0
  180. data/samples/win32livedasm.rb +33 -0
  181. data/samples/win32remotescan.rb +133 -0
  182. data/samples/wintrace.rb +92 -0
  183. data/tests/all.rb +8 -0
  184. data/tests/dasm.rb +39 -0
  185. data/tests/dynldr.rb +35 -0
  186. data/tests/encodeddata.rb +132 -0
  187. data/tests/ia32.rb +82 -0
  188. data/tests/mips.rb +116 -0
  189. data/tests/parse_c.rb +239 -0
  190. data/tests/preprocessor.rb +269 -0
  191. data/tests/x86_64.rb +62 -0
  192. metadata +255 -0
@@ -0,0 +1,437 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/exe_format/main'
8
+ require 'metasm/exe_format/mz'
9
+ require 'metasm/exe_format/coff'
10
+
11
+ module Metasm
12
+ class PE < COFF
13
+ MAGIC = "PE\0\0" # 0x50450000
14
+
15
+ attr_accessor :coff_offset, :signature, :mz
16
+
17
+ def initialize(*a)
18
+ super(*a)
19
+ cpu = a.grep(CPU).first
20
+ @mz = MZ.new(cpu).share_namespace(self)
21
+ end
22
+
23
+ # overrides COFF#decode_header
24
+ # simply sets the offset to the PE pointer before decoding the COFF header
25
+ # also checks the PE signature
26
+ def decode_header
27
+ @cursection ||= self
28
+ @encoded.ptr = 0x3c
29
+ @encoded.ptr = decode_word(@encoded)
30
+ @signature = @encoded.read(4)
31
+ raise InvalidExeFormat, "Invalid PE signature #{@signature.inspect}" if @signature != MAGIC
32
+ @coff_offset = @encoded.ptr
33
+ if @mz.encoded.empty?
34
+ @mz.encoded << @encoded[0, @coff_offset-4]
35
+ @mz.encoded.ptr = 0
36
+ @mz.decode_header
37
+ end
38
+ super()
39
+ end
40
+
41
+ # creates a default MZ file to be used in the PE header
42
+ # this one is specially crafted to fit in the 0x3c bytes before the signature
43
+ def encode_default_mz_header
44
+ # XXX use single-quoted source, to avoid ruby interpretation of \r\n
45
+ @mz.cpu = Ia32.new(386, 16)
46
+ @mz.assemble <<'EOMZSTUB'
47
+ db "Needs Win32!\r\n$"
48
+ .entrypoint
49
+ push cs
50
+ pop ds
51
+ xor dx, dx ; ds:dx = addr of $-terminated string
52
+ mov ah, 9 ; output string
53
+ int 21h
54
+ mov ax, 4c01h ; exit with code in al
55
+ int 21h
56
+ EOMZSTUB
57
+
58
+ mzparts = @mz.pre_encode
59
+
60
+ # put stuff before 0x3c
61
+ @mz.encoded << mzparts.shift
62
+ raise 'OH NOES !!1!!!1!' if @mz.encoded.virtsize > 0x3c # MZ header is too long, cannot happen
63
+ until mzparts.empty?
64
+ break if mzparts.first.virtsize + @mz.encoded.virtsize > 0x3c
65
+ @mz.encoded << mzparts.shift
66
+ end
67
+
68
+ # set PE signature pointer
69
+ @mz.encoded.align 0x3c
70
+ @mz.encoded << encode_word('pesigptr')
71
+
72
+ # put last parts of the MZ program
73
+ until mzparts.empty?
74
+ @mz.encoded << mzparts.shift
75
+ end
76
+
77
+ # ensure the sig will be 8bytes-aligned
78
+ @mz.encoded.align 8
79
+
80
+ @mz.encoded.fixup 'pesigptr' => @mz.encoded.virtsize
81
+ @mz.encoded.fixup @mz.encoded.binding
82
+ @mz.encoded.fill
83
+ @mz.encode_fix_checksum
84
+ end
85
+
86
+ # encodes the PE header before the COFF header, uses a default mz header if none defined
87
+ # the MZ header must have 0x3c pointing just past its last byte which should be 8bytes aligned
88
+ # the 2 1st bytes of the MZ header should be 'MZ'
89
+ def encode_header(*a)
90
+ encode_default_mz_header if @mz.encoded.empty?
91
+
92
+ @encoded << @mz.encoded.dup
93
+
94
+ # append the PE signature
95
+ @signature ||= MAGIC
96
+ @encoded << @signature
97
+
98
+ super(*a)
99
+ end
100
+
101
+ # a returns a new PE with only minimal information copied:
102
+ # section name/perm/addr/content
103
+ # exports
104
+ # imports (with boundimport cleared)
105
+ # resources
106
+ def mini_copy(share_ns=true)
107
+ ret = self.class.new(@cpu)
108
+ ret.share_namespace(self) if share_ns
109
+ ret.header.machine = @header.machine
110
+ ret.header.characteristics = @header.characteristics
111
+ ret.optheader.entrypoint = @optheader.entrypoint
112
+ ret.optheader.image_base = @optheader.image_base
113
+ ret.optheader.subsystem = @optheader.subsystem
114
+ ret.optheader.dll_characts = @optheader.dll_characts
115
+ @sections.each { |s|
116
+ rs = Section.new
117
+ rs.name = s.name
118
+ rs.virtaddr = s.virtaddr
119
+ rs.characteristics = s.characteristics
120
+ rs.encoded = s.encoded
121
+ ret.sections << s
122
+ }
123
+ ret.resource = resource
124
+ ret.tls = tls
125
+ if imports
126
+ ret.imports = @imports.map { |id| id.dup }
127
+ ret.imports.each { |id|
128
+ id.timestamp = id.firstforwarder =
129
+ id.ilt_p = id.libname_p = nil
130
+ }
131
+ end
132
+ ret.export = export
133
+ ret
134
+ end
135
+
136
+ def c_set_default_entrypoint
137
+ return if @optheader.entrypoint
138
+ if @sections.find { |s| s.encoded.export['main'] }
139
+ @optheader.entrypoint = 'main'
140
+ elsif @sections.find { |s| s.encoded.export['DllEntryPoint'] }
141
+ @optheader.entrypoint = 'DllEntryPoint'
142
+ elsif @sections.find { |s| s.encoded.export['DllMain'] }
143
+ case @cpu.shortname
144
+ when 'ia32'
145
+ @optheader.entrypoint = 'DllEntryPoint'
146
+ compile_c <<EOS
147
+ enum { DLL_PROCESS_DETACH, DLL_PROCESS_ATTACH, DLL_THREAD_ATTACH, DLL_THREAD_DETACH, DLL_PROCESS_VERIFIER };
148
+ __stdcall int DllMain(void *handle, unsigned long reason, void *reserved);
149
+ __stdcall int DllEntryPoint(void *handle, unsigned long reason, void *reserved) {
150
+ int ret = DllMain(handle, reason, reserved);
151
+ if (ret == 0 && reason == DLL_PROCESS_ATTACH)
152
+ DllMain(handle, DLL_PROCESS_DETACH, reserved);
153
+ return ret;
154
+ }
155
+ EOS
156
+ else
157
+ @optheader.entrypoint = 'DllMain'
158
+ end
159
+ elsif @sections.find { |s| s.encoded.export['WinMain'] }
160
+ case @cpu.shortname
161
+ when 'ia32'
162
+ @optheader.entrypoint = 'main'
163
+ compile_c <<EOS
164
+ #define GetCommandLine GetCommandLineA
165
+ #define GetModuleHandle GetModuleHandleA
166
+ #define GetStartupInfo GetStartupInfoA
167
+ #define STARTF_USESHOWWINDOW 0x00000001
168
+ #define SW_SHOWDEFAULT 10
169
+
170
+ typedef unsigned long DWORD;
171
+ typedef unsigned short WORD;
172
+ typedef struct {
173
+ DWORD cb; char *lpReserved, *lpDesktop, *lpTitle;
174
+ DWORD dwX, dwY, dwXSize, dwYSize, dwXCountChars, dwYCountChars, dwFillAttribute, dwFlags;
175
+ WORD wShowWindow, cbReserved2; char *lpReserved2;
176
+ void *hStdInput, *hStdOutput, *hStdError;
177
+ } STARTUPINFO;
178
+
179
+ __stdcall void *GetModuleHandleA(const char *lpModuleName);
180
+ __stdcall void GetStartupInfoA(STARTUPINFO *lpStartupInfo);
181
+ __stdcall void ExitProcess(unsigned int uExitCode);
182
+ __stdcall char *GetCommandLineA(void);
183
+ __stdcall int WinMain(void *hInstance, void *hPrevInstance, char *lpCmdLine, int nShowCmd);
184
+
185
+ int main(void) {
186
+ STARTUPINFO startupinfo;
187
+ startupinfo.cb = sizeof(STARTUPINFO);
188
+ char *cmd = GetCommandLine();
189
+ int ret;
190
+
191
+ if (*cmd == '"') {
192
+ cmd++;
193
+ while (*cmd && *cmd != '"') {
194
+ if (*cmd == '\\\\') cmd++;
195
+ cmd++;
196
+ }
197
+ if (*cmd == '"') cmd++;
198
+ } else
199
+ while (*cmd && *cmd != ' ') cmd++;
200
+ while (*cmd == ' ') cmd++;
201
+
202
+ GetStartupInfo(&startupinfo);
203
+ ret = WinMain(GetModuleHandle(0), 0, cmd, (startupinfo.dwFlags & STARTF_USESHOWWINDOW) ? (int)startupinfo.wShowWindow : (int)SW_SHOWDEFAULT);
204
+ ExitProcess((DWORD)ret);
205
+ return ret;
206
+ }
207
+ EOS
208
+ else
209
+ @optheader.entrypoint = 'WinMain'
210
+ end
211
+ end
212
+ end
213
+
214
+ # handles writes to fs:[0] -> dasm SEH handler (first only, does not follow the chain)
215
+ # TODO seh prototype (args => context)
216
+ # TODO hook on (non)resolution of :w xref
217
+ def get_xrefs_x(dasm, di)
218
+ if @cpu.shortname =~ /ia32|x64/ and a = di.instruction.args.first and a.kind_of? Ia32::ModRM and a.seg and a.seg.val == 4 and
219
+ w = get_xrefs_rw(dasm, di).find { |type, ptr, len| type == :w and ptr.externals.include? 'segment_base_fs' } and
220
+ dasm.backtrace(Expression[w[1], :-, 'segment_base_fs'], di.address).to_a.include?(Expression[0])
221
+ sehptr = w[1]
222
+ sz = @cpu.size/8
223
+ sehptr = Indirection.new(Expression[Indirection.new(sehptr, sz, di.address), :+, sz], sz, di.address)
224
+ a = dasm.backtrace(sehptr, di.address, :include_start => true, :origin => di.address, :type => :x, :detached => true)
225
+ puts "backtrace seh from #{di} => #{a.map { |addr| Expression[addr] }.join(', ')}" if $VERBOSE
226
+ a.each { |aa|
227
+ next if aa == Expression::Unknown
228
+ l = dasm.auto_label_at(aa, 'seh', 'loc', 'sub')
229
+ dasm.addrs_todo << [aa]
230
+ }
231
+ super(dasm, di)
232
+ else
233
+ super(dasm, di)
234
+ end
235
+ end
236
+
237
+ # returns a disassembler with a special decodedfunction for GetProcAddress (i386 only), and the default func
238
+ def init_disassembler
239
+ d = super()
240
+ d.backtrace_maxblocks_data = 4
241
+ case @cpu.shortname
242
+ when 'ia32', 'x64'
243
+ old_cp = d.c_parser
244
+ d.c_parser = nil
245
+ d.parse_c '__stdcall void *GetProcAddress(int, char *);'
246
+ d.c_parser.lexer.define_weak('__MS_X86_64_ABI__') if @cpu.kind_of? X86_64
247
+ gpa = @cpu.decode_c_function_prototype(d.c_parser, 'GetProcAddress')
248
+ d.c_parser = old_cp
249
+ d.parse_c ''
250
+ d.c_parser.lexer.define_weak('__MS_X86_64_ABI__') if @cpu.kind_of? X86_64
251
+ @getprocaddr_unknown = []
252
+ gpa.btbind_callback = lambda { |dasm, bind, funcaddr, calladdr, expr, origin, maxdepth|
253
+ break bind if @getprocaddr_unknown.include? [dasm, calladdr] or not Expression[expr].externals.include? :eax
254
+ sz = @cpu.size/8
255
+ break bind if not dasm.decoded[calladdr]
256
+ if @cpu.kind_of? X86_64
257
+ arg2 = :rdx
258
+ else
259
+ arg2 = Indirection[[:esp, :+, 2*sz], sz, calladdr]
260
+ end
261
+ fnaddr = dasm.backtrace(arg2, calladdr, :include_start => true, :maxdepth => maxdepth)
262
+ if fnaddr.kind_of? ::Array and fnaddr.length == 1 and s = dasm.get_section_at(fnaddr.first) and fn = s[0].read(64) and i = fn.index(?\0) and i > sz # try to avoid ordinals
263
+ bind = bind.merge @cpu.register_symbols[0] => Expression[fn[0, i]]
264
+ else
265
+ @getprocaddr_unknown << [dasm, calladdr]
266
+ puts "unknown func name for getprocaddress from #{Expression[calladdr]}" if $VERBOSE
267
+ end
268
+ bind
269
+ }
270
+ d.function[Expression['GetProcAddress']] = gpa
271
+ d.function[:default] = @cpu.disassembler_default_func
272
+ end
273
+ d
274
+ end
275
+
276
+ def module_name
277
+ export and @export.libname
278
+ end
279
+
280
+ def module_address
281
+ @optheader.image_base
282
+ end
283
+
284
+ def module_size
285
+ @sections.map { |s_| s_.virtaddr + s_.virtsize }.max || 0
286
+ end
287
+
288
+ def module_symbols
289
+ syms = [['entrypoint', @optheader.entrypoint]]
290
+ @export.exports.to_a.each { |e|
291
+ next if not e.target
292
+ name = e.name || "ord_#{e.ordinal}"
293
+ syms << [name, label_rva(e.target)]
294
+ } if export
295
+ syms
296
+ end
297
+ end
298
+
299
+ # an instance of a PE file, loaded in memory
300
+ # just change the rva_to_off and the section content decoding methods
301
+ class LoadedPE < PE
302
+ attr_accessor :load_address
303
+
304
+ # use the virtualaddr/virtualsize fields of the section header
305
+ def decode_section_body(s)
306
+ s.encoded = @encoded[s.virtaddr, s.virtsize] || EncodedData.new
307
+ end
308
+
309
+ # no need to decode relocations on an already mapped image
310
+ def decode_relocs
311
+ end
312
+
313
+ # reads a loaded PE from memory, returns a PE object
314
+ # dumps the header, optheader and all sections ; try to rebuild IAT (#memdump_imports)
315
+ def self.memdump(memory, baseaddr, entrypoint = nil, iat_p=nil)
316
+ loaded = LoadedPE.load memory[baseaddr, 0x1000_0000]
317
+ loaded.load_address = baseaddr
318
+ loaded.decode
319
+
320
+ dump = PE.new(loaded.cpu_from_headers)
321
+ dump.share_namespace loaded
322
+ dump.optheader.image_base = baseaddr
323
+ dump.optheader.entrypoint = (entrypoint || loaded.optheader.entrypoint + baseaddr) - baseaddr
324
+ dump.directory['resource_table'] = loaded.directory['resource_table']
325
+
326
+ loaded.sections.each { |s|
327
+ ss = Section.new
328
+ ss.name = s.name
329
+ ss.virtaddr = s.virtaddr
330
+ ss.encoded = s.encoded
331
+ ss.characteristics = s.characteristics
332
+ dump.sections << ss
333
+ }
334
+
335
+ loaded.memdump_imports(memory, dump, iat_p)
336
+
337
+ dump
338
+ end
339
+
340
+ # rebuilds an IAT from the loaded pe and the memory
341
+ # for each loaded iat, find the matching dll in memory
342
+ # for each loaded iat entry, retrieve the exported name from the loaded dll
343
+ # OR
344
+ # from a base iat address in memory (unk_iat_p, rva), retrieve the 1st dll, find
345
+ # all iat pointers/forwarders to this dll, on failure try to find another dll
346
+ # allows gaps of 5 invalid pointers between libraries
347
+ # dll found by scanning pages 16 by 16 backward from the first iat address (XXX the 1st must not be forwarded)
348
+ # TODO bound imports
349
+ def memdump_imports(memory, dump, unk_iat_p=nil)
350
+ puts 'rebuilding imports...' if $VERBOSE
351
+ if unk_iat_p
352
+ # read iat data from unk_iat_p
353
+ iat_p = unk_iat_p
354
+ else
355
+ return if not imports
356
+ # read iat data from @imports
357
+ imports = @imports.dup
358
+ imports.each { |id| id.iat = id.iat.dup }
359
+ iat_p = imports.first.iat_p # used for iat_p
360
+ end
361
+
362
+ failcnt = 0 # bad pointers in iat table (unk_ only)
363
+ dump.imports ||= []
364
+ loaded_dll = nil # the dll from who we're importing the current importdirectory
365
+ ptrsz = (@optheader.signature == 'PE+' ? 8 : 4)
366
+ cache = [] # optimize forwarder target search
367
+ loop do
368
+ if unk_iat_p
369
+ # read imported pointer from the table
370
+ ptr = decode_xword(EncodedData.new(memory[@load_address + iat_p, ptrsz]))
371
+ iat_p += ptrsz
372
+ else
373
+ # read imported pointer from the import structure
374
+ while not ptr = imports.first.iat.shift
375
+ load_dll = nil
376
+ imports.shift
377
+ break if imports.empty?
378
+ iat_p = imports.first.iat_p
379
+ end
380
+ break if imports.empty?
381
+ iat_p += ptrsz
382
+ end
383
+
384
+ if not loaded_dll or not e = loaded_dll.export.exports.find { |e_| loaded_dll.label_rva(e_.target) == ptr - loaded_dll.load_address }
385
+ # points to unknown space
386
+ # find pointed module start
387
+ if not dll = cache.find { |dll_| ptr >= dll_.load_address and ptr < dll_.load_address + dll_.optheader.image_size }
388
+ addr = ptr & ~0xffff
389
+ 256.times { break if memory[addr, 2] == MZ::MAGIC or addr < 0x10000 ; addr -= 0x10000 }
390
+ if memory[addr, 2] == MZ::MAGIC
391
+ dll = LoadedPE.load memory[addr, 0x1000_0000]
392
+ dll.load_address = addr
393
+ dll.decode_header
394
+ dll.decode_exports
395
+ cache << dll
396
+ end
397
+ end
398
+ if dll and dll.export and e = dll.export.exports.find { |e_| dll.label_rva(e_.target) == ptr - dll.load_address }
399
+ if loaded_dll and ee = loaded_dll.export.exports.find { |ee_| ee_.forwarder_name == e.name }
400
+ # it's a forwarder from the current loaded_dll
401
+ puts "forwarder #{ee.name} -> #{dll.export.libname}!#{e.name}" if $DEBUG
402
+ e = ee
403
+ else
404
+ # new library, start a new importdirectory
405
+ # XXX if 1st import is forwarded, loaded_dll will points to the bad module...
406
+ loaded_dll = dll
407
+ id = ImportDirectory.new
408
+ id.libname = loaded_dll.export.libname
409
+ puts "lib #{id.libname}" if $VERBOSE
410
+ id.imports = []
411
+ id.iat_p = iat_p - ptrsz
412
+ dump.imports << id
413
+ end
414
+ else
415
+ puts 'unknown ptr %x' % ptr if $DEBUG
416
+ # allow holes in the unk_iat_p table
417
+ break if not unk_iat_p or failcnt > 4
418
+ failcnt += 1
419
+ next
420
+ end
421
+ failcnt = 0
422
+ end
423
+
424
+ # dumped last importdirectory is correct, append the import field
425
+ i = ImportDirectory::Import.new
426
+ if e.name
427
+ puts e.name if $DEBUG
428
+ i.name = e.name
429
+ else
430
+ puts "##{e.ordinal}" if $DEBUG
431
+ i.ordinal = e.ordinal
432
+ end
433
+ dump.imports.last.imports << i
434
+ end
435
+ end
436
+ end
437
+ end
@@ -0,0 +1,246 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+ module Metasm
7
+ # a class representing a simple structure serialized in a binary
8
+ class SerialStruct
9
+ # hash shared by all classes
10
+ # key = class, value = array of fields
11
+ # field = array [name, decode...]
12
+ @@fields = {}
13
+ NAME=0
14
+ DECODE=1
15
+ ENCODE=2
16
+ DEFVAL=3
17
+ ENUM=4
18
+ BITS=5
19
+
20
+ class << self
21
+ # defines a new field
22
+ # adds an accessor
23
+ def new_field(name, decode, encode, defval, enum=nil, bits=nil)
24
+ if name
25
+ attr_accessor name
26
+ name = "@#{name}".to_sym
27
+ end
28
+ (@@fields[self] ||= []) << [name, decode, encode, defval, enum, bits]
29
+ end
30
+
31
+ # creates a field constructor for a simple integer
32
+ # relies on exe implementing (en,de)code_#{type}
33
+ def new_int_field(*types)
34
+ recv = class << self ; self ; end
35
+ types.each { |type|
36
+ recv.send(:define_method, type) { |name, *args|
37
+ new_field(name, "decode_#{type}".to_sym, "encode_#{type}".to_sym, args[0] || 0, args[1])
38
+ }
39
+
40
+ # shortcut to define multiple fields of this type with default values
41
+ recv.send(:define_method, "#{type}s") { |*names|
42
+ names.each { |name| send type, name }
43
+ }
44
+ }
45
+ end
46
+
47
+ # standard fields:
48
+
49
+ # a fixed-size memory chunk
50
+ def mem(name, len, defval='')
51
+ new_field(name, lambda { |exe, me| exe.curencoded.read(len) }, lambda { |exe, me, val| val[0, len].ljust(len, 0.chr) }, defval)
52
+ end
53
+ # a fixed-size string, 0-padded
54
+ def str(name, len, defval='')
55
+ e = lambda { |exe, me, val| val[0, len].ljust(len, 0.chr) }
56
+ d = lambda { |exe, me| v = exe.curencoded.read(len) ; v = v[0, v.index(?\0)] if v.index(?\0) ; v }
57
+ new_field(name, d, e, defval)
58
+ end
59
+ # 0-terminated string
60
+ def strz(name, defval='')
61
+ d = lambda { |exe, me|
62
+ ed = exe.curencoded
63
+ ed.read(ed.data.index(?\0, ed.ptr)-ed.ptr+1).chop
64
+ }
65
+ e = lambda { |exe, me, val| val + 0.chr }
66
+ new_field(name, d, e, defval)
67
+ end
68
+
69
+ # field access
70
+ def fld_get(name)
71
+ name = "@#{name}".to_sym
72
+ @@fields[self].find { |f| f[NAME] == name }
73
+ end
74
+
75
+ # change the default for a field
76
+ def fld_default(name, default=nil, &b)
77
+ default ||= b
78
+ fld_get(name)[DEFVAL] = default
79
+ end
80
+ def fld_enum(name, enum=nil, &b) fld_get(name)[ENUM] = enum||b end
81
+ def fld_bits(name, bits=nil, &b) fld_get(name)[BITS] = bits||b end
82
+
83
+ # define a bitfield: many fields inside a single word/byte/whatever
84
+ # usage: bitfield :word, 0 => :lala, 1 => nil, 4 => :lolo, 8 => :foo
85
+ # => a bitfield read using exe.decode_word, containing 3 subfields:
86
+ # :lala (bits 0...1), (discard 3 bits), :lolo (bits 4...8), and :foo (bits 8..-1)
87
+ # fields default to 0
88
+ def bitfield(inttype, h)
89
+ # XXX encode/decode very not threadsafe ! this is a Georges Foreman Guarantee.
90
+ # could use a me.instance_variable..
91
+
92
+ # decode the value in a temp var
93
+ d = lambda { |exe, me| @bitfield_val = exe.send("decode_#{inttype}") }
94
+ # reset a temp var
95
+ e = lambda { |exe, me, val| @bitfield_val = 0 ; nil }
96
+ new_field(nil, d, e, nil)
97
+
98
+ h = h.sort
99
+ h.length.times { |i|
100
+ # yay closure !
101
+ # get field parameters
102
+ next if not name = h[i][1]
103
+ off = h[i][0]
104
+ nxt = h[i+1]
105
+ mask = (nxt ? (1 << (nxt[0]-off))-1 : -1)
106
+ # read the field value from the temp var
107
+ d = lambda { |exe, me| (@bitfield_val >> off) & mask }
108
+ # update the temp var with the field value, return nil
109
+ e = lambda { |exe, me, val| @bitfield_val |= (val & mask) << off ; nil }
110
+ new_field(name, d, e, 0)
111
+ }
112
+
113
+ # free the temp var
114
+ d = lambda { |exe, me| @bitfield_val = nil }
115
+ # return encoded temp var
116
+ e = lambda { |exe, me, val|
117
+ val = @bitfield_val
118
+ @bitfield_val = nil
119
+ exe.send("encode_#{inttype}", val)
120
+ }
121
+ new_field(nil, d, e, nil)
122
+ end
123
+
124
+ # inject a hook to be run during the decoding process
125
+ def decode_hook(before=nil, &b)
126
+ idx = (before ? @@fields[self].index(fld_get(before)) : -1)
127
+ @@fields[self].insert(idx, [nil, b])
128
+ end
129
+ end # class methods
130
+
131
+ # standard int fields
132
+ new_int_field :byte, :half, :word
133
+
134
+ # set value of fields from argument list, runs int_to_hash if needed
135
+ def initialize(*a)
136
+ if not a.empty?
137
+ a.zip(struct_fields.reject { |f| not f[NAME] }).each { |v, f|
138
+ v = int_to_hash(v, f[ENUM]) if f[ENUM]
139
+ v = bits_to_hash(v, f[BITS]) if f[BITS]
140
+ instance_variable_set f[NAME], v
141
+ }
142
+ end
143
+ end
144
+
145
+ # returns this classes' field array
146
+ # uses struct_specialized if defined (a method that returns another
147
+ # SerialStruct class whose fields should be used)
148
+ def struct_fields(exe=nil)
149
+ klass = self.class
150
+ klass = struct_specialized(exe) if respond_to? :struct_specialized
151
+ raise "SerialStruct: no fields for #{klass}" if $DEBUG and not @@fields[klass]
152
+ @@fields[klass]
153
+ end
154
+
155
+ # decodes the fields from the exe
156
+ def decode(exe, *args)
157
+ struct_fields(exe).each { |f|
158
+ case d = f[DECODE]
159
+ when Symbol; val = exe.send(d, *args)
160
+ when Array; val = exe.send(*d)
161
+ when Proc; val = d[exe, self]
162
+ when nil; next
163
+ end
164
+ next if not f[NAME]
165
+ if h = f[ENUM]; h = h[exe, self] if h.kind_of? Proc; val = int_to_hash( val, h) end
166
+ if h = f[BITS]; h = h[exe, self] if h.kind_of? Proc; val = bits_to_hash(val, h) end
167
+ instance_variable_set(f[NAME], val)
168
+ }
169
+ end
170
+
171
+ # initialize uninitialized fields
172
+ def set_default_values(exe)
173
+ struct_fields(exe).each { |f|
174
+ if not f[NAME]
175
+ f[DEFVAL][exe, self] if f[DEFVAL]
176
+ next
177
+ end
178
+ # check existence to avoid a "warning: ivar @bla not initialized"
179
+ next if instance_variables.map { |ivn| ivn.to_sym }.include?(f[NAME]) and instance_variable_get(f[NAME])
180
+ val = f[DEFVAL]
181
+ val = val[exe, self] if val.kind_of? Proc
182
+ if val.kind_of? Integer and h = f[ENUM]; h = h[exe, self] if h.kind_of? Proc; val = int_to_hash( val, h) end
183
+ if val.kind_of? Integer and h = f[BITS]; h = h[exe, self] if h.kind_of? Proc; val = bits_to_hash(val, h) end
184
+ instance_variable_set(f[NAME], val)
185
+ }
186
+ end
187
+
188
+ # sets default values, then encodes the fields, returns an EData
189
+ def encode(exe, *a)
190
+ set_default_values(exe, *a)
191
+
192
+ ed = EncodedData.new
193
+ struct_fields(exe).each { |f|
194
+ if not f[NAME]
195
+ ed << f[ENCODE][exe, self, nil] if f[ENCODE]
196
+ next
197
+ end
198
+ val = instance_variable_get(f[NAME])
199
+ if h = f[ENUM]; h = h[exe, self] if h.kind_of? Proc; val = int_from_hash( val, h) end
200
+ if h = f[BITS]; h = h[exe, self] if h.kind_of? Proc; val = bits_from_hash(val, h) end
201
+ case e = f[ENCODE]
202
+ when Symbol; val = exe.send(e, val)
203
+ when Array; val = exe.send(e, *val)
204
+ when Proc; val = e[exe, self, val]
205
+ when nil; next
206
+ end
207
+ ed << val
208
+ }
209
+ ed
210
+ end
211
+
212
+ # shortcut to create a new instance and decode it
213
+ def self.decode(*a)
214
+ s = new
215
+ s.decode(*a)
216
+ s
217
+ end
218
+
219
+ def dump(e, a)
220
+ case e
221
+ when Integer; e >= 0x100 ? '0x%X'%e : e
222
+ when String; e.length > 64 ? e[0, 62].inspect+'...' : e.inspect
223
+ when Array; '[' + e.map { |i| dump(i, a) }.join(', ') + ']'
224
+ when SerialStruct; a.include?(e) ? '...' : e.to_s(a)
225
+ else e.inspect
226
+ end
227
+ end
228
+
229
+ # displays the struct content, ordered by fields
230
+ def to_s(a=[])
231
+ ivs = instance_variables.map { |iv| iv.to_sym }
232
+ ivs = (struct_fields.to_a.map { |f| f[NAME] }.compact & ivs) | ivs
233
+ "<#{self.class} " + ivs.map { |iv| "#{iv}=#{dump(instance_variable_get(iv), a+[self])}" }.join(' ') + ">"
234
+ end
235
+ end
236
+
237
+ class ExeFormat
238
+ def curencoded; encoded; end
239
+ def decode_strz(ed = curencoded)
240
+ if stop = ed.data.index(?\0, ed.ptr)
241
+ ed.read(stop - ed.ptr + 1).chop
242
+ else ''
243
+ end
244
+ end
245
+ end
246
+ end