metasm 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (235) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +1 -0
  3. data/.hgtags +3 -0
  4. data/Gemfile +1 -0
  5. data/INSTALL +61 -0
  6. data/LICENCE +458 -0
  7. data/README +29 -21
  8. data/Rakefile +10 -0
  9. data/TODO +10 -12
  10. data/doc/code_organisation.txt +2 -0
  11. data/doc/core/DynLdr.txt +247 -0
  12. data/doc/core/ExeFormat.txt +43 -0
  13. data/doc/core/Expression.txt +220 -0
  14. data/doc/core/GNUExports.txt +27 -0
  15. data/doc/core/Ia32.txt +236 -0
  16. data/doc/core/SerialStruct.txt +108 -0
  17. data/doc/core/VirtualString.txt +145 -0
  18. data/doc/core/WindowsExports.txt +61 -0
  19. data/doc/core/index.txt +1 -0
  20. data/doc/style.css +6 -3
  21. data/doc/usage/debugger.txt +327 -0
  22. data/doc/usage/index.txt +1 -0
  23. data/doc/use_cases.txt +2 -2
  24. data/metasm.gemspec +22 -0
  25. data/{lib/metasm.rb → metasm.rb} +11 -3
  26. data/{lib/metasm → metasm}/compile_c.rb +13 -7
  27. data/metasm/cpu/arc.rb +8 -0
  28. data/metasm/cpu/arc/decode.rb +425 -0
  29. data/metasm/cpu/arc/main.rb +191 -0
  30. data/metasm/cpu/arc/opcodes.rb +588 -0
  31. data/{lib/metasm → metasm/cpu}/arm.rb +7 -5
  32. data/{lib/metasm → metasm/cpu}/arm/debug.rb +2 -2
  33. data/{lib/metasm → metasm/cpu}/arm/decode.rb +13 -12
  34. data/{lib/metasm → metasm/cpu}/arm/encode.rb +23 -8
  35. data/{lib/metasm → metasm/cpu}/arm/main.rb +0 -3
  36. data/metasm/cpu/arm/opcodes.rb +324 -0
  37. data/{lib/metasm → metasm/cpu}/arm/parse.rb +25 -13
  38. data/{lib/metasm → metasm/cpu}/arm/render.rb +2 -2
  39. data/metasm/cpu/arm64.rb +15 -0
  40. data/metasm/cpu/arm64/debug.rb +38 -0
  41. data/metasm/cpu/arm64/decode.rb +289 -0
  42. data/metasm/cpu/arm64/encode.rb +41 -0
  43. data/metasm/cpu/arm64/main.rb +105 -0
  44. data/metasm/cpu/arm64/opcodes.rb +232 -0
  45. data/metasm/cpu/arm64/parse.rb +20 -0
  46. data/metasm/cpu/arm64/render.rb +95 -0
  47. data/{lib/metasm/ppc.rb → metasm/cpu/bpf.rb} +2 -4
  48. data/metasm/cpu/bpf/decode.rb +142 -0
  49. data/metasm/cpu/bpf/main.rb +60 -0
  50. data/metasm/cpu/bpf/opcodes.rb +81 -0
  51. data/metasm/cpu/bpf/render.rb +41 -0
  52. data/metasm/cpu/cy16.rb +9 -0
  53. data/metasm/cpu/cy16/decode.rb +253 -0
  54. data/metasm/cpu/cy16/main.rb +63 -0
  55. data/metasm/cpu/cy16/opcodes.rb +78 -0
  56. data/metasm/cpu/cy16/render.rb +41 -0
  57. data/metasm/cpu/dalvik.rb +11 -0
  58. data/{lib/metasm → metasm/cpu}/dalvik/decode.rb +35 -13
  59. data/{lib/metasm → metasm/cpu}/dalvik/main.rb +51 -2
  60. data/{lib/metasm → metasm/cpu}/dalvik/opcodes.rb +19 -11
  61. data/metasm/cpu/ia32.rb +17 -0
  62. data/{lib/metasm → metasm/cpu}/ia32/compile_c.rb +5 -7
  63. data/{lib/metasm → metasm/cpu}/ia32/debug.rb +5 -5
  64. data/{lib/metasm → metasm/cpu}/ia32/decode.rb +246 -59
  65. data/{lib/metasm → metasm/cpu}/ia32/decompile.rb +7 -7
  66. data/{lib/metasm → metasm/cpu}/ia32/encode.rb +19 -13
  67. data/{lib/metasm → metasm/cpu}/ia32/main.rb +51 -8
  68. data/metasm/cpu/ia32/opcodes.rb +1424 -0
  69. data/{lib/metasm → metasm/cpu}/ia32/parse.rb +47 -16
  70. data/{lib/metasm → metasm/cpu}/ia32/render.rb +31 -4
  71. data/metasm/cpu/mips.rb +14 -0
  72. data/{lib/metasm → metasm/cpu}/mips/compile_c.rb +1 -1
  73. data/metasm/cpu/mips/debug.rb +42 -0
  74. data/{lib/metasm → metasm/cpu}/mips/decode.rb +46 -16
  75. data/{lib/metasm → metasm/cpu}/mips/encode.rb +4 -3
  76. data/{lib/metasm → metasm/cpu}/mips/main.rb +11 -4
  77. data/{lib/metasm → metasm/cpu}/mips/opcodes.rb +86 -17
  78. data/{lib/metasm → metasm/cpu}/mips/parse.rb +1 -1
  79. data/{lib/metasm → metasm/cpu}/mips/render.rb +1 -1
  80. data/{lib/metasm/dalvik.rb → metasm/cpu/msp430.rb} +1 -1
  81. data/metasm/cpu/msp430/decode.rb +247 -0
  82. data/metasm/cpu/msp430/main.rb +62 -0
  83. data/metasm/cpu/msp430/opcodes.rb +101 -0
  84. data/{lib/metasm → metasm/cpu}/pic16c/decode.rb +6 -7
  85. data/{lib/metasm → metasm/cpu}/pic16c/main.rb +0 -0
  86. data/{lib/metasm → metasm/cpu}/pic16c/opcodes.rb +1 -1
  87. data/{lib/metasm/mips.rb → metasm/cpu/ppc.rb} +4 -4
  88. data/{lib/metasm → metasm/cpu}/ppc/decode.rb +18 -12
  89. data/{lib/metasm → metasm/cpu}/ppc/decompile.rb +3 -3
  90. data/{lib/metasm → metasm/cpu}/ppc/encode.rb +2 -2
  91. data/{lib/metasm → metasm/cpu}/ppc/main.rb +17 -12
  92. data/{lib/metasm → metasm/cpu}/ppc/opcodes.rb +11 -5
  93. data/metasm/cpu/ppc/parse.rb +55 -0
  94. data/metasm/cpu/python.rb +8 -0
  95. data/metasm/cpu/python/decode.rb +136 -0
  96. data/metasm/cpu/python/main.rb +36 -0
  97. data/metasm/cpu/python/opcodes.rb +180 -0
  98. data/{lib/metasm → metasm/cpu}/sh4.rb +1 -1
  99. data/{lib/metasm → metasm/cpu}/sh4/decode.rb +48 -17
  100. data/{lib/metasm → metasm/cpu}/sh4/main.rb +13 -4
  101. data/{lib/metasm → metasm/cpu}/sh4/opcodes.rb +7 -8
  102. data/metasm/cpu/x86_64.rb +15 -0
  103. data/{lib/metasm → metasm/cpu}/x86_64/compile_c.rb +28 -17
  104. data/{lib/metasm → metasm/cpu}/x86_64/debug.rb +4 -4
  105. data/{lib/metasm → metasm/cpu}/x86_64/decode.rb +57 -15
  106. data/{lib/metasm → metasm/cpu}/x86_64/encode.rb +55 -26
  107. data/{lib/metasm → metasm/cpu}/x86_64/main.rb +14 -6
  108. data/metasm/cpu/x86_64/opcodes.rb +136 -0
  109. data/{lib/metasm → metasm/cpu}/x86_64/parse.rb +10 -2
  110. data/metasm/cpu/x86_64/render.rb +35 -0
  111. data/metasm/cpu/z80.rb +9 -0
  112. data/metasm/cpu/z80/decode.rb +313 -0
  113. data/metasm/cpu/z80/main.rb +67 -0
  114. data/metasm/cpu/z80/opcodes.rb +224 -0
  115. data/metasm/cpu/z80/render.rb +59 -0
  116. data/{lib/metasm/os/main.rb → metasm/debug.rb} +160 -401
  117. data/{lib/metasm → metasm}/decode.rb +35 -4
  118. data/{lib/metasm → metasm}/decompile.rb +15 -16
  119. data/{lib/metasm → metasm}/disassemble.rb +201 -45
  120. data/{lib/metasm → metasm}/disassemble_api.rb +651 -87
  121. data/{lib/metasm → metasm}/dynldr.rb +220 -133
  122. data/{lib/metasm → metasm}/encode.rb +10 -1
  123. data/{lib/metasm → metasm}/exe_format/a_out.rb +9 -6
  124. data/{lib/metasm → metasm}/exe_format/autoexe.rb +1 -0
  125. data/{lib/metasm → metasm}/exe_format/bflt.rb +57 -27
  126. data/{lib/metasm → metasm}/exe_format/coff.rb +11 -3
  127. data/{lib/metasm → metasm}/exe_format/coff_decode.rb +53 -20
  128. data/{lib/metasm → metasm}/exe_format/coff_encode.rb +11 -13
  129. data/{lib/metasm → metasm}/exe_format/dex.rb +13 -5
  130. data/{lib/metasm → metasm}/exe_format/dol.rb +1 -0
  131. data/{lib/metasm → metasm}/exe_format/elf.rb +93 -57
  132. data/{lib/metasm → metasm}/exe_format/elf_decode.rb +143 -34
  133. data/{lib/metasm → metasm}/exe_format/elf_encode.rb +122 -31
  134. data/metasm/exe_format/gb.rb +65 -0
  135. data/metasm/exe_format/javaclass.rb +424 -0
  136. data/{lib/metasm → metasm}/exe_format/macho.rb +204 -16
  137. data/{lib/metasm → metasm}/exe_format/main.rb +26 -3
  138. data/{lib/metasm → metasm}/exe_format/mz.rb +1 -0
  139. data/{lib/metasm → metasm}/exe_format/nds.rb +7 -4
  140. data/{lib/metasm → metasm}/exe_format/pe.rb +71 -8
  141. data/metasm/exe_format/pyc.rb +167 -0
  142. data/{lib/metasm → metasm}/exe_format/serialstruct.rb +67 -14
  143. data/{lib/metasm → metasm}/exe_format/shellcode.rb +7 -3
  144. data/metasm/exe_format/shellcode_rwx.rb +114 -0
  145. data/metasm/exe_format/swf.rb +205 -0
  146. data/{lib/metasm → metasm}/exe_format/xcoff.rb +7 -7
  147. data/metasm/exe_format/zip.rb +335 -0
  148. data/metasm/gui.rb +13 -0
  149. data/{lib/metasm → metasm}/gui/cstruct.rb +35 -41
  150. data/{lib/metasm → metasm}/gui/dasm_coverage.rb +11 -11
  151. data/{lib/metasm → metasm}/gui/dasm_decomp.rb +7 -20
  152. data/{lib/metasm → metasm}/gui/dasm_funcgraph.rb +0 -0
  153. data/metasm/gui/dasm_graph.rb +1695 -0
  154. data/{lib/metasm → metasm}/gui/dasm_hex.rb +12 -8
  155. data/{lib/metasm → metasm}/gui/dasm_listing.rb +43 -28
  156. data/{lib/metasm → metasm}/gui/dasm_main.rb +310 -53
  157. data/{lib/metasm → metasm}/gui/dasm_opcodes.rb +5 -19
  158. data/{lib/metasm → metasm}/gui/debug.rb +93 -27
  159. data/{lib/metasm → metasm}/gui/gtk.rb +162 -40
  160. data/{lib/metasm → metasm}/gui/qt.rb +12 -2
  161. data/{lib/metasm → metasm}/gui/win32.rb +179 -42
  162. data/{lib/metasm → metasm}/gui/x11.rb +59 -59
  163. data/{lib/metasm → metasm}/main.rb +389 -264
  164. data/{lib/metasm/os/remote.rb → metasm/os/gdbremote.rb} +146 -54
  165. data/{lib/metasm → metasm}/os/gnu_exports.rb +1 -1
  166. data/{lib/metasm → metasm}/os/linux.rb +628 -151
  167. data/metasm/os/main.rb +330 -0
  168. data/{lib/metasm → metasm}/os/windows.rb +132 -42
  169. data/{lib/metasm → metasm}/os/windows_exports.rb +141 -0
  170. data/{lib/metasm → metasm}/parse.rb +26 -24
  171. data/{lib/metasm → metasm}/parse_c.rb +221 -116
  172. data/{lib/metasm → metasm}/preprocessor.rb +55 -40
  173. data/{lib/metasm → metasm}/render.rb +14 -38
  174. data/misc/hexdump.rb +2 -1
  175. data/misc/lint.rb +58 -0
  176. data/misc/txt2html.rb +9 -7
  177. data/samples/bindiff.rb +3 -4
  178. data/samples/dasm-plugins/bindiff.rb +15 -0
  179. data/samples/dasm-plugins/bookmark.rb +133 -0
  180. data/samples/dasm-plugins/c_constants.rb +57 -0
  181. data/samples/dasm-plugins/colortheme_solarized.rb +125 -0
  182. data/samples/dasm-plugins/cppobj_funcall.rb +60 -0
  183. data/samples/dasm-plugins/dasm_all.rb +70 -0
  184. data/samples/dasm-plugins/demangle_cpp.rb +31 -0
  185. data/samples/dasm-plugins/deobfuscate.rb +251 -0
  186. data/samples/dasm-plugins/dump_text.rb +35 -0
  187. data/samples/dasm-plugins/export_graph_svg.rb +86 -0
  188. data/samples/dasm-plugins/findgadget.rb +75 -0
  189. data/samples/dasm-plugins/hl_opcode.rb +32 -0
  190. data/samples/dasm-plugins/hotfix_gtk_dbg.rb +19 -0
  191. data/samples/dasm-plugins/imm2off.rb +34 -0
  192. data/samples/dasm-plugins/match_libsigs.rb +93 -0
  193. data/samples/dasm-plugins/patch_file.rb +95 -0
  194. data/samples/dasm-plugins/scanfuncstart.rb +36 -0
  195. data/samples/dasm-plugins/scanxrefs.rb +26 -0
  196. data/samples/dasm-plugins/selfmodify.rb +197 -0
  197. data/samples/dasm-plugins/stringsxrefs.rb +28 -0
  198. data/samples/dasmnavig.rb +1 -1
  199. data/samples/dbg-apihook.rb +24 -9
  200. data/samples/dbg-plugins/heapscan.rb +283 -0
  201. data/samples/dbg-plugins/heapscan/compiled_heapscan_lin.c +155 -0
  202. data/samples/dbg-plugins/heapscan/compiled_heapscan_win.c +128 -0
  203. data/samples/dbg-plugins/heapscan/graphheap.rb +616 -0
  204. data/samples/dbg-plugins/heapscan/heapscan.rb +709 -0
  205. data/samples/dbg-plugins/heapscan/winheap.h +174 -0
  206. data/samples/dbg-plugins/heapscan/winheap7.h +307 -0
  207. data/samples/dbg-plugins/trace_func.rb +214 -0
  208. data/samples/disassemble-gui.rb +35 -5
  209. data/samples/disassemble.rb +31 -6
  210. data/samples/dump_upx.rb +24 -12
  211. data/samples/dynamic_ruby.rb +12 -3
  212. data/samples/exeencode.rb +6 -5
  213. data/samples/factorize-headers-peimports.rb +1 -1
  214. data/samples/lindebug.rb +175 -381
  215. data/samples/metasm-shell.rb +1 -2
  216. data/samples/peldr.rb +2 -2
  217. data/tests/all.rb +1 -1
  218. data/tests/arc.rb +26 -0
  219. data/tests/dynldr.rb +22 -4
  220. data/tests/expression.rb +55 -0
  221. data/tests/graph_layout.rb +285 -0
  222. data/tests/ia32.rb +79 -26
  223. data/tests/mips.rb +9 -2
  224. data/tests/x86_64.rb +66 -18
  225. metadata +330 -218
  226. data/lib/metasm/arm/opcodes.rb +0 -177
  227. data/lib/metasm/gui.rb +0 -23
  228. data/lib/metasm/gui/dasm_graph.rb +0 -1354
  229. data/lib/metasm/ia32.rb +0 -14
  230. data/lib/metasm/ia32/opcodes.rb +0 -873
  231. data/lib/metasm/ppc/parse.rb +0 -52
  232. data/lib/metasm/x86_64.rb +0 -12
  233. data/lib/metasm/x86_64/opcodes.rb +0 -118
  234. data/samples/gdbclient.rb +0 -583
  235. data/samples/rubstop.rb +0 -399
@@ -215,7 +215,7 @@ EOS
215
215
  # TODO seh prototype (args => context)
216
216
  # TODO hook on (non)resolution of :w xref
217
217
  def get_xrefs_x(dasm, di)
218
- if @cpu.shortname =~ /ia32|x64/ and a = di.instruction.args.first and a.kind_of? Ia32::ModRM and a.seg and a.seg.val == 4 and
218
+ if @cpu.shortname =~ /^ia32|^x64/ and a = di.instruction.args.first and a.kind_of?(Ia32::ModRM) and a.seg and a.seg.val == 4 and
219
219
  w = get_xrefs_rw(dasm, di).find { |type, ptr, len| type == :w and ptr.externals.include? 'segment_base_fs' } and
220
220
  dasm.backtrace(Expression[w[1], :-, 'segment_base_fs'], di.address).to_a.include?(Expression[0])
221
221
  sehptr = w[1]
@@ -225,7 +225,7 @@ EOS
225
225
  puts "backtrace seh from #{di} => #{a.map { |addr| Expression[addr] }.join(', ')}" if $VERBOSE
226
226
  a.each { |aa|
227
227
  next if aa == Expression::Unknown
228
- l = dasm.auto_label_at(aa, 'seh', 'loc', 'sub')
228
+ dasm.auto_label_at(aa, 'seh', 'loc', 'sub')
229
229
  dasm.addrs_todo << [aa]
230
230
  }
231
231
  super(dasm, di)
@@ -243,17 +243,19 @@ EOS
243
243
  old_cp = d.c_parser
244
244
  d.c_parser = nil
245
245
  d.parse_c '__stdcall void *GetProcAddress(int, char *);'
246
- d.c_parser.lexer.define_weak('__MS_X86_64_ABI__') if @cpu.kind_of? X86_64
246
+ d.parse_c '__stdcall void ExitProcess(int) __attribute__((noreturn));'
247
+ d.c_parser.lexer.define_weak('__MS_X86_64_ABI__') if @cpu.shortname == 'x64'
247
248
  gpa = @cpu.decode_c_function_prototype(d.c_parser, 'GetProcAddress')
249
+ epr = @cpu.decode_c_function_prototype(d.c_parser, 'ExitProcess')
248
250
  d.c_parser = old_cp
249
251
  d.parse_c ''
250
- d.c_parser.lexer.define_weak('__MS_X86_64_ABI__') if @cpu.kind_of? X86_64
252
+ d.c_parser.lexer.define_weak('__MS_X86_64_ABI__') if @cpu.shortname == 'x64'
251
253
  @getprocaddr_unknown = []
252
254
  gpa.btbind_callback = lambda { |dasm, bind, funcaddr, calladdr, expr, origin, maxdepth|
253
255
  break bind if @getprocaddr_unknown.include? [dasm, calladdr] or not Expression[expr].externals.include? :eax
254
256
  sz = @cpu.size/8
255
257
  break bind if not dasm.decoded[calladdr]
256
- if @cpu.kind_of? X86_64
258
+ if @cpu.shortname == 'x64'
257
259
  arg2 = :rdx
258
260
  else
259
261
  arg2 = Indirection[[:esp, :+, 2*sz], sz, calladdr]
@@ -268,6 +270,7 @@ EOS
268
270
  bind
269
271
  }
270
272
  d.function[Expression['GetProcAddress']] = gpa
273
+ d.function[Expression['ExitProcess']] = epr
271
274
  d.function[:default] = @cpu.disassembler_default_func
272
275
  end
273
276
  d
@@ -294,6 +297,62 @@ EOS
294
297
  } if export
295
298
  syms
296
299
  end
300
+
301
+ # compute the pe-sha1 or pe-sha256 of the binary
302
+ # argument should be a Digest::SHA1 (from digest/sha1) or a Digest::SHA256 (from digest/sha2)
303
+ # returns the hex checksum
304
+ def pehash(digest)
305
+ off0 = 0
306
+ off1 = @coff_offset + @header.sizeof(self) + @optheader.offsetof(self, :checksum)
307
+
308
+ dir_ct_idx = DIRECTORIES.index('certificate_table')
309
+ if @optheader.numrva > dir_ct_idx
310
+ off2 = @coff_offset + @header.sizeof(self) + @optheader.sizeof(self) + 8*dir_ct_idx
311
+ ct_size = @encoded.data[off2, 8].unpack('V*')[1]
312
+ off3 = @encoded.length - ct_size
313
+ else
314
+ off4 = @encoded.length
315
+ end
316
+
317
+ digest << @encoded.data[off0 ... off1].to_str
318
+ digest << @encoded.data[off1+4 ... off2].to_str if off2
319
+ digest << @encoded.data[off2+8 ... off3].to_str if off2 and off3 > off2+8
320
+ digest << @encoded.data[off1+4 ... off4].to_str if off4
321
+ digest << ("\0" * (8 - (@encoded.length & 7))) if @encoded.length & 7 != 0
322
+
323
+ digest.hexdigest
324
+ end
325
+
326
+ def self.pehash(path, digest)
327
+ decode_file_header(path).pehash(digest)
328
+ end
329
+
330
+ # compute Mandiant "importhash"
331
+ def imphash
332
+ lst = []
333
+ @imports.each { |id|
334
+ ln = id.libname.downcase.sub(/.(dll|sys|ocx)$/, '')
335
+ id.imports.each { |i|
336
+ if not i.name and ordtable = WindowsExports::IMPORT_HASH[ln]
337
+ iname = ordtable[i.ordinal]
338
+ else
339
+ iname = i.name
340
+ end
341
+ iname ||= "ord#{i.ordinal}"
342
+
343
+ lst << "#{ln}.#{iname}"
344
+ }
345
+ }
346
+
347
+ require 'digest/md5'
348
+ Digest::MD5.hexdigest(lst.join(',').downcase)
349
+ end
350
+
351
+ def self.imphash(path)
352
+ pe = decode_file_header(path)
353
+ pe.decode_imports
354
+ pe.imphash
355
+ end
297
356
  end
298
357
 
299
358
  # an instance of a PE file, loaded in memory
@@ -312,7 +371,7 @@ class LoadedPE < PE
312
371
 
313
372
  # reads a loaded PE from memory, returns a PE object
314
373
  # dumps the header, optheader and all sections ; try to rebuild IAT (#memdump_imports)
315
- def self.memdump(memory, baseaddr, entrypoint = nil, iat_p=nil)
374
+ def self.memdump(memory, baseaddr, entrypoint=nil, iat_p=nil)
316
375
  loaded = LoadedPE.load memory[baseaddr, 0x1000_0000]
317
376
  loaded.load_address = baseaddr
318
377
  loaded.decode
@@ -372,7 +431,6 @@ class LoadedPE < PE
372
431
  else
373
432
  # read imported pointer from the import structure
374
433
  while not ptr = imports.first.iat.shift
375
- load_dll = nil
376
434
  imports.shift
377
435
  break if imports.empty?
378
436
  iat_p = imports.first.iat_p
@@ -415,6 +473,7 @@ class LoadedPE < PE
415
473
  puts 'unknown ptr %x' % ptr if $DEBUG
416
474
  # allow holes in the unk_iat_p table
417
475
  break if not unk_iat_p or failcnt > 4
476
+ loaded_dll = nil
418
477
  failcnt += 1
419
478
  next
420
479
  end
@@ -422,7 +481,7 @@ class LoadedPE < PE
422
481
  end
423
482
 
424
483
  # dumped last importdirectory is correct, append the import field
425
- i = ImportDirectory::Import.new
484
+ i = ImportDirectory::Import.new
426
485
  if e.name
427
486
  puts e.name if $DEBUG
428
487
  i.name = e.name
@@ -433,5 +492,9 @@ class LoadedPE < PE
433
492
  dump.imports.last.imports << i
434
493
  end
435
494
  end
495
+
496
+ def pehash(digest)
497
+ raise "cannot compute a PEhash from memory image"
498
+ end
436
499
  end
437
500
  end
@@ -0,0 +1,167 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/exe_format/main'
8
+ require 'metasm/encode'
9
+ require 'metasm/decode'
10
+
11
+
12
+ module Metasm
13
+ # Python preparsed module (.pyc)
14
+ class PYC < ExeFormat
15
+ # 1 magic per python version...
16
+ # file = MAGIC(u16) \r \n timestamp(u32) data
17
+ MAGICS = [
18
+ 62211 # 62211 = python2.7a0
19
+ ]
20
+
21
+ class Header < SerialStruct
22
+ half :version
23
+ half :rn
24
+ word :timestamp
25
+ end
26
+
27
+ def decode_half(edata=@encoded) edata.decode_imm(:u16, @endianness) end
28
+ def decode_word(edata=@encoded) edata.decode_imm(:u32, @endianness) end
29
+ def decode_long(edata=@encoded) edata.decode_imm(:i32, @endianness) end
30
+ def sizeof_half ; 2 ; end
31
+ def sizeof_word ; 4 ; end
32
+ def sizeof_long ; 4 ; end
33
+
34
+ # file header
35
+ attr_accessor :header
36
+ # the marshalled object
37
+ attr_accessor :root
38
+ # list of all code objects
39
+ attr_accessor :all_code
40
+
41
+ def initialize()
42
+ @endianness = :little
43
+ @encoded = EncodedData.new
44
+ super()
45
+ end
46
+
47
+ def decode_header
48
+ @header = Header.decode(self)
49
+ end
50
+
51
+ def decode_pymarshal
52
+ case c = @encoded.read(1)
53
+ when '0' # NULL
54
+ :null
55
+ when 'N' # None
56
+ nil
57
+ when 'F' # False
58
+ false
59
+ when 'T' # True
60
+ true
61
+ #when 'S' # stopiter TODO
62
+ #when '.' # ellipsis TODO
63
+ when 'i' # long (i32)
64
+ decode_long
65
+ when 'I' # long (i64)
66
+ decode_word | (decode_long << 32)
67
+ when 'f' # float (ascii)
68
+ @encoded.read(@encoded.read(1).unpack('C').first).to_f
69
+ when 'g' # float (binary)
70
+ @encoded.read(8).unpack('d').first # XXX check
71
+ when 'x' # complex (f f)
72
+ { :type => :complex,
73
+ :real => @encoded.read(@encoded.read(1).unpack('C').first).to_f,
74
+ :imag => @encoded.read(@encoded.read(1).unpack('C').first).to_f }
75
+ when 'y' # complex (g g)
76
+ { :type => :complex,
77
+ :real => @encoded.read(8).unpack('d').first,
78
+ :imag => @encoded.read(8).unpack('d').first }
79
+ when 'l' # long (i32?)
80
+ decode_long
81
+ when 's' # string: len (long), data
82
+ @encoded.read(decode_long)
83
+ when 't' # 'interned': string with possible backreference later
84
+ s = @encoded.read(decode_long)
85
+ @references << s
86
+ s
87
+ when 'R' # stringref (see 't')
88
+ @references[decode_long]
89
+ when '(' # tuple (frozen Array): length l*objs
90
+ obj = []
91
+ decode_long.times { obj << decode_pymarshal }
92
+ obj
93
+ when '[' # list (Array)
94
+ obj = []
95
+ decode_long.times { obj << decode_pymarshal }
96
+ obj
97
+ when '{' # dict (Hash)
98
+ obj = {}
99
+ loop do
100
+ k = decode_pymarshal
101
+ break if k == :null
102
+ obj[k] = decode_pymarshal
103
+ end
104
+ { :type => hash, :hash => obj } # XXX to avoid confusion with code, etc
105
+ when 'c' # code
106
+ # XXX format varies with version (header.signature)
107
+ obj = {}
108
+ obj[:type] = :code
109
+ obj[:argcount] = decode_long
110
+ #obj[:kwonly_argcount] = decode_long # not in py2.7
111
+ obj[:nlocals] = decode_long
112
+ obj[:stacksize] = decode_long
113
+ obj[:flags] = decode_long # TODO bit-decode this one
114
+
115
+ obj[:fileoff] = @encoded.ptr + 5 # XXX assume :code is a 's'
116
+ obj[:code] = decode_pymarshal
117
+ obj[:consts] = decode_pymarshal
118
+ obj[:names] = decode_pymarshal
119
+ obj[:varnames] = decode_pymarshal
120
+ obj[:freevars] = decode_pymarshal
121
+ obj[:cellvars] = decode_pymarshal
122
+ obj[:filename] = decode_pymarshal
123
+ obj[:name] = decode_pymarshal
124
+ obj[:firstlineno] = decode_long
125
+ obj[:lnotab] = decode_pymarshal
126
+ @all_code << obj
127
+ obj
128
+ when 'u' # unicode
129
+ @encoded.read(decode_long)
130
+ #when '?' # unknown TODO
131
+ #when '<' # set TODO
132
+ #when '>' # set (frozen) TODO
133
+ else
134
+ raise "unsupported python marshal #{c.inspect}"
135
+ end
136
+ end
137
+
138
+ def decode
139
+ decode_header
140
+ @all_code = []
141
+ @references = []
142
+ @root = decode_pymarshal
143
+ @references = nil
144
+ end
145
+
146
+ def cpu_from_headers
147
+ Python.new(self)
148
+ end
149
+
150
+ def each_section
151
+ yield @encoded, 0
152
+ end
153
+
154
+ def get_default_entrypoints
155
+ if @root.kind_of? Hash and @root[:type] == :code
156
+ [@root[:fileoff]]
157
+ else
158
+ []
159
+ end
160
+ end
161
+
162
+ # return the :code part which contains off
163
+ def code_at_off(off)
164
+ @all_code.find { |c| c[:fileoff] <= off and c[:fileoff] + c[:code].length > off }
165
+ end
166
+ end
167
+ end
@@ -13,19 +13,20 @@ class SerialStruct
13
13
  NAME=0
14
14
  DECODE=1
15
15
  ENCODE=2
16
- DEFVAL=3
17
- ENUM=4
18
- BITS=5
16
+ SIZEOF=3
17
+ DEFVAL=4
18
+ ENUM=5
19
+ BITS=6
19
20
 
20
21
  class << self
21
22
  # defines a new field
22
23
  # adds an accessor
23
- def new_field(name, decode, encode, defval, enum=nil, bits=nil)
24
+ def new_field(name, decode, encode, sizeof, defval, enum=nil, bits=nil)
24
25
  if name
25
26
  attr_accessor name
26
27
  name = "@#{name}".to_sym
27
28
  end
28
- (@@fields[self] ||= []) << [name, decode, encode, defval, enum, bits]
29
+ (@@fields[self] ||= []) << [name, decode, encode, sizeof, defval, enum, bits]
29
30
  end
30
31
 
31
32
  # creates a field constructor for a simple integer
@@ -34,7 +35,7 @@ class << self
34
35
  recv = class << self ; self ; end
35
36
  types.each { |type|
36
37
  recv.send(:define_method, type) { |name, *args|
37
- new_field(name, "decode_#{type}".to_sym, "encode_#{type}".to_sym, args[0] || 0, args[1])
38
+ new_field(name, "decode_#{type}".to_sym, "encode_#{type}".to_sym, "sizeof_#{type}".to_sym, args[0] || 0, args[1])
38
39
  }
39
40
 
40
41
  # shortcut to define multiple fields of this type with default values
@@ -46,24 +47,33 @@ class << self
46
47
 
47
48
  # standard fields:
48
49
 
50
+ # virtual field, handled explicitly in a custom encode/decode
51
+ def virtual(*a)
52
+ a.each { |f|
53
+ new_field(f, nil, nil, nil, nil)
54
+ }
55
+ end
56
+
49
57
  # a fixed-size memory chunk
50
58
  def mem(name, len, defval='')
51
- new_field(name, lambda { |exe, me| exe.curencoded.read(len) }, lambda { |exe, me, val| val[0, len].ljust(len, 0.chr) }, defval)
59
+ new_field(name, lambda { |exe, me| exe.curencoded.read(len) }, lambda { |exe, me, val| val[0, len].ljust(len, 0.chr) }, lambda { |exe, me| len }, defval)
52
60
  end
53
61
  # a fixed-size string, 0-padded
54
62
  def str(name, len, defval='')
55
- e = lambda { |exe, me, val| val[0, len].ljust(len, 0.chr) }
56
63
  d = lambda { |exe, me| v = exe.curencoded.read(len) ; v = v[0, v.index(?\0)] if v.index(?\0) ; v }
57
- new_field(name, d, e, defval)
64
+ e = lambda { |exe, me, val| val[0, len].ljust(len, 0.chr) }
65
+ s = lambda { |exe, me| len }
66
+ new_field(name, d, e, s, defval)
58
67
  end
59
68
  # 0-terminated string
60
69
  def strz(name, defval='')
61
70
  d = lambda { |exe, me|
62
- ed = exe.curencoded
71
+ ed = exe.curencoded
63
72
  ed.read(ed.data.index(?\0, ed.ptr)-ed.ptr+1).chop
64
73
  }
65
74
  e = lambda { |exe, me, val| val + 0.chr }
66
- new_field(name, d, e, defval)
75
+ s = lambda { |exe, val| val.length + 1 }
76
+ new_field(name, d, e, s, defval)
67
77
  end
68
78
 
69
79
  # field access
@@ -93,7 +103,7 @@ class << self
93
103
  d = lambda { |exe, me| @bitfield_val = exe.send("decode_#{inttype}") }
94
104
  # reset a temp var
95
105
  e = lambda { |exe, me, val| @bitfield_val = 0 ; nil }
96
- new_field(nil, d, e, nil)
106
+ new_field(nil, d, e, 0, nil)
97
107
 
98
108
  h = h.sort
99
109
  h.length.times { |i|
@@ -107,7 +117,7 @@ class << self
107
117
  d = lambda { |exe, me| (@bitfield_val >> off) & mask }
108
118
  # update the temp var with the field value, return nil
109
119
  e = lambda { |exe, me, val| @bitfield_val |= (val & mask) << off ; nil }
110
- new_field(name, d, e, 0)
120
+ new_field(name, d, e, 0, 0)
111
121
  }
112
122
 
113
123
  # free the temp var
@@ -118,11 +128,13 @@ class << self
118
128
  @bitfield_val = nil
119
129
  exe.send("encode_#{inttype}", val)
120
130
  }
121
- new_field(nil, d, e, nil)
131
+ s = lambda { |exe, me| exe.send("sizeof_#{inttype}") }
132
+ new_field(nil, d, e, s, nil)
122
133
  end
123
134
 
124
135
  # inject a hook to be run during the decoding process
125
136
  def decode_hook(before=nil, &b)
137
+ @@fields[self] ||= []
126
138
  idx = (before ? @@fields[self].index(fld_get(before)) : -1)
127
139
  @@fields[self].insert(idx, [nil, b])
128
140
  end
@@ -209,6 +221,39 @@ end # class methods
209
221
  ed
210
222
  end
211
223
 
224
+ # size of the structure = fields.sum { size of field }
225
+ def sizeof(exe)
226
+ struct_fields(exe).inject(0) { |off, f|
227
+ case sz = f[SIZEOF]
228
+ when Proc; sz = sz[exe, self]
229
+ when Symbol; sz = exe.send(sz)
230
+ when Array; sz = exe.send(*sz)
231
+ when nil; sz = 0
232
+ end
233
+ off + sz
234
+ }
235
+ end
236
+
237
+ # offset (in bytes) of the structure member
238
+ # for bitfields, return the byte offset of the whole bitfield
239
+ def offsetof(exe, fld)
240
+ fld2 = fld
241
+ fld2 = "@#{fld}".to_sym if fld.to_s[0] != ?@
242
+ off = 0
243
+ struct_fields(exe).each { |f|
244
+ return off if f[NAME] == fld or f[NAME] == fld2
245
+
246
+ case sz = f[SIZEOF]
247
+ when Proc; sz = sz[exe, self]
248
+ when Symbol; sz = exe.send(sz)
249
+ when Array; sz = exe.send(*sz)
250
+ when nil; sz = 0
251
+ end
252
+ off += sz
253
+ }
254
+ raise 'unknown field'
255
+ end
256
+
212
257
  # shortcut to create a new instance and decode it
213
258
  def self.decode(*a)
214
259
  s = new
@@ -216,6 +261,14 @@ end # class methods
216
261
  s
217
262
  end
218
263
 
264
+ def self.sizeof(exe)
265
+ new.sizeof(exe)
266
+ end
267
+
268
+ def self.offsetof(exe, fld)
269
+ new.offsetof(exe, fld)
270
+ end
271
+
219
272
  def dump(e, a)
220
273
  case e
221
274
  when Integer; e >= 0x100 ? '0x%X'%e : e