metasm 1.0.1 → 1.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +1 -0
- data/.hgtags +3 -0
- data/Gemfile +1 -0
- data/INSTALL +61 -0
- data/LICENCE +458 -0
- data/README +29 -21
- data/Rakefile +10 -0
- data/TODO +10 -12
- data/doc/code_organisation.txt +2 -0
- data/doc/core/DynLdr.txt +247 -0
- data/doc/core/ExeFormat.txt +43 -0
- data/doc/core/Expression.txt +220 -0
- data/doc/core/GNUExports.txt +27 -0
- data/doc/core/Ia32.txt +236 -0
- data/doc/core/SerialStruct.txt +108 -0
- data/doc/core/VirtualString.txt +145 -0
- data/doc/core/WindowsExports.txt +61 -0
- data/doc/core/index.txt +1 -0
- data/doc/style.css +6 -3
- data/doc/usage/debugger.txt +327 -0
- data/doc/usage/index.txt +1 -0
- data/doc/use_cases.txt +2 -2
- data/metasm.gemspec +22 -0
- data/{lib/metasm.rb → metasm.rb} +11 -3
- data/{lib/metasm → metasm}/compile_c.rb +13 -7
- data/metasm/cpu/arc.rb +8 -0
- data/metasm/cpu/arc/decode.rb +425 -0
- data/metasm/cpu/arc/main.rb +191 -0
- data/metasm/cpu/arc/opcodes.rb +588 -0
- data/{lib/metasm → metasm/cpu}/arm.rb +7 -5
- data/{lib/metasm → metasm/cpu}/arm/debug.rb +2 -2
- data/{lib/metasm → metasm/cpu}/arm/decode.rb +13 -12
- data/{lib/metasm → metasm/cpu}/arm/encode.rb +23 -8
- data/{lib/metasm → metasm/cpu}/arm/main.rb +0 -3
- data/metasm/cpu/arm/opcodes.rb +324 -0
- data/{lib/metasm → metasm/cpu}/arm/parse.rb +25 -13
- data/{lib/metasm → metasm/cpu}/arm/render.rb +2 -2
- data/metasm/cpu/arm64.rb +15 -0
- data/metasm/cpu/arm64/debug.rb +38 -0
- data/metasm/cpu/arm64/decode.rb +289 -0
- data/metasm/cpu/arm64/encode.rb +41 -0
- data/metasm/cpu/arm64/main.rb +105 -0
- data/metasm/cpu/arm64/opcodes.rb +232 -0
- data/metasm/cpu/arm64/parse.rb +20 -0
- data/metasm/cpu/arm64/render.rb +95 -0
- data/{lib/metasm/ppc.rb → metasm/cpu/bpf.rb} +2 -4
- data/metasm/cpu/bpf/decode.rb +142 -0
- data/metasm/cpu/bpf/main.rb +60 -0
- data/metasm/cpu/bpf/opcodes.rb +81 -0
- data/metasm/cpu/bpf/render.rb +41 -0
- data/metasm/cpu/cy16.rb +9 -0
- data/metasm/cpu/cy16/decode.rb +253 -0
- data/metasm/cpu/cy16/main.rb +63 -0
- data/metasm/cpu/cy16/opcodes.rb +78 -0
- data/metasm/cpu/cy16/render.rb +41 -0
- data/metasm/cpu/dalvik.rb +11 -0
- data/{lib/metasm → metasm/cpu}/dalvik/decode.rb +35 -13
- data/{lib/metasm → metasm/cpu}/dalvik/main.rb +51 -2
- data/{lib/metasm → metasm/cpu}/dalvik/opcodes.rb +19 -11
- data/metasm/cpu/ia32.rb +17 -0
- data/{lib/metasm → metasm/cpu}/ia32/compile_c.rb +5 -7
- data/{lib/metasm → metasm/cpu}/ia32/debug.rb +5 -5
- data/{lib/metasm → metasm/cpu}/ia32/decode.rb +246 -59
- data/{lib/metasm → metasm/cpu}/ia32/decompile.rb +7 -7
- data/{lib/metasm → metasm/cpu}/ia32/encode.rb +19 -13
- data/{lib/metasm → metasm/cpu}/ia32/main.rb +51 -8
- data/metasm/cpu/ia32/opcodes.rb +1424 -0
- data/{lib/metasm → metasm/cpu}/ia32/parse.rb +47 -16
- data/{lib/metasm → metasm/cpu}/ia32/render.rb +31 -4
- data/metasm/cpu/mips.rb +14 -0
- data/{lib/metasm → metasm/cpu}/mips/compile_c.rb +1 -1
- data/metasm/cpu/mips/debug.rb +42 -0
- data/{lib/metasm → metasm/cpu}/mips/decode.rb +46 -16
- data/{lib/metasm → metasm/cpu}/mips/encode.rb +4 -3
- data/{lib/metasm → metasm/cpu}/mips/main.rb +11 -4
- data/{lib/metasm → metasm/cpu}/mips/opcodes.rb +86 -17
- data/{lib/metasm → metasm/cpu}/mips/parse.rb +1 -1
- data/{lib/metasm → metasm/cpu}/mips/render.rb +1 -1
- data/{lib/metasm/dalvik.rb → metasm/cpu/msp430.rb} +1 -1
- data/metasm/cpu/msp430/decode.rb +247 -0
- data/metasm/cpu/msp430/main.rb +62 -0
- data/metasm/cpu/msp430/opcodes.rb +101 -0
- data/{lib/metasm → metasm/cpu}/pic16c/decode.rb +6 -7
- data/{lib/metasm → metasm/cpu}/pic16c/main.rb +0 -0
- data/{lib/metasm → metasm/cpu}/pic16c/opcodes.rb +1 -1
- data/{lib/metasm/mips.rb → metasm/cpu/ppc.rb} +4 -4
- data/{lib/metasm → metasm/cpu}/ppc/decode.rb +18 -12
- data/{lib/metasm → metasm/cpu}/ppc/decompile.rb +3 -3
- data/{lib/metasm → metasm/cpu}/ppc/encode.rb +2 -2
- data/{lib/metasm → metasm/cpu}/ppc/main.rb +17 -12
- data/{lib/metasm → metasm/cpu}/ppc/opcodes.rb +11 -5
- data/metasm/cpu/ppc/parse.rb +55 -0
- data/metasm/cpu/python.rb +8 -0
- data/metasm/cpu/python/decode.rb +136 -0
- data/metasm/cpu/python/main.rb +36 -0
- data/metasm/cpu/python/opcodes.rb +180 -0
- data/{lib/metasm → metasm/cpu}/sh4.rb +1 -1
- data/{lib/metasm → metasm/cpu}/sh4/decode.rb +48 -17
- data/{lib/metasm → metasm/cpu}/sh4/main.rb +13 -4
- data/{lib/metasm → metasm/cpu}/sh4/opcodes.rb +7 -8
- data/metasm/cpu/x86_64.rb +15 -0
- data/{lib/metasm → metasm/cpu}/x86_64/compile_c.rb +28 -17
- data/{lib/metasm → metasm/cpu}/x86_64/debug.rb +4 -4
- data/{lib/metasm → metasm/cpu}/x86_64/decode.rb +57 -15
- data/{lib/metasm → metasm/cpu}/x86_64/encode.rb +55 -26
- data/{lib/metasm → metasm/cpu}/x86_64/main.rb +14 -6
- data/metasm/cpu/x86_64/opcodes.rb +136 -0
- data/{lib/metasm → metasm/cpu}/x86_64/parse.rb +10 -2
- data/metasm/cpu/x86_64/render.rb +35 -0
- data/metasm/cpu/z80.rb +9 -0
- data/metasm/cpu/z80/decode.rb +313 -0
- data/metasm/cpu/z80/main.rb +67 -0
- data/metasm/cpu/z80/opcodes.rb +224 -0
- data/metasm/cpu/z80/render.rb +59 -0
- data/{lib/metasm/os/main.rb → metasm/debug.rb} +160 -401
- data/{lib/metasm → metasm}/decode.rb +35 -4
- data/{lib/metasm → metasm}/decompile.rb +15 -16
- data/{lib/metasm → metasm}/disassemble.rb +201 -45
- data/{lib/metasm → metasm}/disassemble_api.rb +651 -87
- data/{lib/metasm → metasm}/dynldr.rb +220 -133
- data/{lib/metasm → metasm}/encode.rb +10 -1
- data/{lib/metasm → metasm}/exe_format/a_out.rb +9 -6
- data/{lib/metasm → metasm}/exe_format/autoexe.rb +1 -0
- data/{lib/metasm → metasm}/exe_format/bflt.rb +57 -27
- data/{lib/metasm → metasm}/exe_format/coff.rb +11 -3
- data/{lib/metasm → metasm}/exe_format/coff_decode.rb +53 -20
- data/{lib/metasm → metasm}/exe_format/coff_encode.rb +11 -13
- data/{lib/metasm → metasm}/exe_format/dex.rb +13 -5
- data/{lib/metasm → metasm}/exe_format/dol.rb +1 -0
- data/{lib/metasm → metasm}/exe_format/elf.rb +93 -57
- data/{lib/metasm → metasm}/exe_format/elf_decode.rb +143 -34
- data/{lib/metasm → metasm}/exe_format/elf_encode.rb +122 -31
- data/metasm/exe_format/gb.rb +65 -0
- data/metasm/exe_format/javaclass.rb +424 -0
- data/{lib/metasm → metasm}/exe_format/macho.rb +204 -16
- data/{lib/metasm → metasm}/exe_format/main.rb +26 -3
- data/{lib/metasm → metasm}/exe_format/mz.rb +1 -0
- data/{lib/metasm → metasm}/exe_format/nds.rb +7 -4
- data/{lib/metasm → metasm}/exe_format/pe.rb +71 -8
- data/metasm/exe_format/pyc.rb +167 -0
- data/{lib/metasm → metasm}/exe_format/serialstruct.rb +67 -14
- data/{lib/metasm → metasm}/exe_format/shellcode.rb +7 -3
- data/metasm/exe_format/shellcode_rwx.rb +114 -0
- data/metasm/exe_format/swf.rb +205 -0
- data/{lib/metasm → metasm}/exe_format/xcoff.rb +7 -7
- data/metasm/exe_format/zip.rb +335 -0
- data/metasm/gui.rb +13 -0
- data/{lib/metasm → metasm}/gui/cstruct.rb +35 -41
- data/{lib/metasm → metasm}/gui/dasm_coverage.rb +11 -11
- data/{lib/metasm → metasm}/gui/dasm_decomp.rb +7 -20
- data/{lib/metasm → metasm}/gui/dasm_funcgraph.rb +0 -0
- data/metasm/gui/dasm_graph.rb +1695 -0
- data/{lib/metasm → metasm}/gui/dasm_hex.rb +12 -8
- data/{lib/metasm → metasm}/gui/dasm_listing.rb +43 -28
- data/{lib/metasm → metasm}/gui/dasm_main.rb +310 -53
- data/{lib/metasm → metasm}/gui/dasm_opcodes.rb +5 -19
- data/{lib/metasm → metasm}/gui/debug.rb +93 -27
- data/{lib/metasm → metasm}/gui/gtk.rb +162 -40
- data/{lib/metasm → metasm}/gui/qt.rb +12 -2
- data/{lib/metasm → metasm}/gui/win32.rb +179 -42
- data/{lib/metasm → metasm}/gui/x11.rb +59 -59
- data/{lib/metasm → metasm}/main.rb +389 -264
- data/{lib/metasm/os/remote.rb → metasm/os/gdbremote.rb} +146 -54
- data/{lib/metasm → metasm}/os/gnu_exports.rb +1 -1
- data/{lib/metasm → metasm}/os/linux.rb +628 -151
- data/metasm/os/main.rb +330 -0
- data/{lib/metasm → metasm}/os/windows.rb +132 -42
- data/{lib/metasm → metasm}/os/windows_exports.rb +141 -0
- data/{lib/metasm → metasm}/parse.rb +26 -24
- data/{lib/metasm → metasm}/parse_c.rb +221 -116
- data/{lib/metasm → metasm}/preprocessor.rb +55 -40
- data/{lib/metasm → metasm}/render.rb +14 -38
- data/misc/hexdump.rb +2 -1
- data/misc/lint.rb +58 -0
- data/misc/txt2html.rb +9 -7
- data/samples/bindiff.rb +3 -4
- data/samples/dasm-plugins/bindiff.rb +15 -0
- data/samples/dasm-plugins/bookmark.rb +133 -0
- data/samples/dasm-plugins/c_constants.rb +57 -0
- data/samples/dasm-plugins/colortheme_solarized.rb +125 -0
- data/samples/dasm-plugins/cppobj_funcall.rb +60 -0
- data/samples/dasm-plugins/dasm_all.rb +70 -0
- data/samples/dasm-plugins/demangle_cpp.rb +31 -0
- data/samples/dasm-plugins/deobfuscate.rb +251 -0
- data/samples/dasm-plugins/dump_text.rb +35 -0
- data/samples/dasm-plugins/export_graph_svg.rb +86 -0
- data/samples/dasm-plugins/findgadget.rb +75 -0
- data/samples/dasm-plugins/hl_opcode.rb +32 -0
- data/samples/dasm-plugins/hotfix_gtk_dbg.rb +19 -0
- data/samples/dasm-plugins/imm2off.rb +34 -0
- data/samples/dasm-plugins/match_libsigs.rb +93 -0
- data/samples/dasm-plugins/patch_file.rb +95 -0
- data/samples/dasm-plugins/scanfuncstart.rb +36 -0
- data/samples/dasm-plugins/scanxrefs.rb +26 -0
- data/samples/dasm-plugins/selfmodify.rb +197 -0
- data/samples/dasm-plugins/stringsxrefs.rb +28 -0
- data/samples/dasmnavig.rb +1 -1
- data/samples/dbg-apihook.rb +24 -9
- data/samples/dbg-plugins/heapscan.rb +283 -0
- data/samples/dbg-plugins/heapscan/compiled_heapscan_lin.c +155 -0
- data/samples/dbg-plugins/heapscan/compiled_heapscan_win.c +128 -0
- data/samples/dbg-plugins/heapscan/graphheap.rb +616 -0
- data/samples/dbg-plugins/heapscan/heapscan.rb +709 -0
- data/samples/dbg-plugins/heapscan/winheap.h +174 -0
- data/samples/dbg-plugins/heapscan/winheap7.h +307 -0
- data/samples/dbg-plugins/trace_func.rb +214 -0
- data/samples/disassemble-gui.rb +35 -5
- data/samples/disassemble.rb +31 -6
- data/samples/dump_upx.rb +24 -12
- data/samples/dynamic_ruby.rb +12 -3
- data/samples/exeencode.rb +6 -5
- data/samples/factorize-headers-peimports.rb +1 -1
- data/samples/lindebug.rb +175 -381
- data/samples/metasm-shell.rb +1 -2
- data/samples/peldr.rb +2 -2
- data/tests/all.rb +1 -1
- data/tests/arc.rb +26 -0
- data/tests/dynldr.rb +22 -4
- data/tests/expression.rb +55 -0
- data/tests/graph_layout.rb +285 -0
- data/tests/ia32.rb +79 -26
- data/tests/mips.rb +9 -2
- data/tests/x86_64.rb +66 -18
- metadata +330 -218
- data/lib/metasm/arm/opcodes.rb +0 -177
- data/lib/metasm/gui.rb +0 -23
- data/lib/metasm/gui/dasm_graph.rb +0 -1354
- data/lib/metasm/ia32.rb +0 -14
- data/lib/metasm/ia32/opcodes.rb +0 -873
- data/lib/metasm/ppc/parse.rb +0 -52
- data/lib/metasm/x86_64.rb +0 -12
- data/lib/metasm/x86_64/opcodes.rb +0 -118
- data/samples/gdbclient.rb +0 -583
- data/samples/rubstop.rb +0 -399
@@ -215,7 +215,7 @@ EOS
|
|
215
215
|
# TODO seh prototype (args => context)
|
216
216
|
# TODO hook on (non)resolution of :w xref
|
217
217
|
def get_xrefs_x(dasm, di)
|
218
|
-
if @cpu.shortname =~
|
218
|
+
if @cpu.shortname =~ /^ia32|^x64/ and a = di.instruction.args.first and a.kind_of?(Ia32::ModRM) and a.seg and a.seg.val == 4 and
|
219
219
|
w = get_xrefs_rw(dasm, di).find { |type, ptr, len| type == :w and ptr.externals.include? 'segment_base_fs' } and
|
220
220
|
dasm.backtrace(Expression[w[1], :-, 'segment_base_fs'], di.address).to_a.include?(Expression[0])
|
221
221
|
sehptr = w[1]
|
@@ -225,7 +225,7 @@ EOS
|
|
225
225
|
puts "backtrace seh from #{di} => #{a.map { |addr| Expression[addr] }.join(', ')}" if $VERBOSE
|
226
226
|
a.each { |aa|
|
227
227
|
next if aa == Expression::Unknown
|
228
|
-
|
228
|
+
dasm.auto_label_at(aa, 'seh', 'loc', 'sub')
|
229
229
|
dasm.addrs_todo << [aa]
|
230
230
|
}
|
231
231
|
super(dasm, di)
|
@@ -243,17 +243,19 @@ EOS
|
|
243
243
|
old_cp = d.c_parser
|
244
244
|
d.c_parser = nil
|
245
245
|
d.parse_c '__stdcall void *GetProcAddress(int, char *);'
|
246
|
-
d.
|
246
|
+
d.parse_c '__stdcall void ExitProcess(int) __attribute__((noreturn));'
|
247
|
+
d.c_parser.lexer.define_weak('__MS_X86_64_ABI__') if @cpu.shortname == 'x64'
|
247
248
|
gpa = @cpu.decode_c_function_prototype(d.c_parser, 'GetProcAddress')
|
249
|
+
epr = @cpu.decode_c_function_prototype(d.c_parser, 'ExitProcess')
|
248
250
|
d.c_parser = old_cp
|
249
251
|
d.parse_c ''
|
250
|
-
d.c_parser.lexer.define_weak('__MS_X86_64_ABI__') if @cpu.
|
252
|
+
d.c_parser.lexer.define_weak('__MS_X86_64_ABI__') if @cpu.shortname == 'x64'
|
251
253
|
@getprocaddr_unknown = []
|
252
254
|
gpa.btbind_callback = lambda { |dasm, bind, funcaddr, calladdr, expr, origin, maxdepth|
|
253
255
|
break bind if @getprocaddr_unknown.include? [dasm, calladdr] or not Expression[expr].externals.include? :eax
|
254
256
|
sz = @cpu.size/8
|
255
257
|
break bind if not dasm.decoded[calladdr]
|
256
|
-
if @cpu.
|
258
|
+
if @cpu.shortname == 'x64'
|
257
259
|
arg2 = :rdx
|
258
260
|
else
|
259
261
|
arg2 = Indirection[[:esp, :+, 2*sz], sz, calladdr]
|
@@ -268,6 +270,7 @@ EOS
|
|
268
270
|
bind
|
269
271
|
}
|
270
272
|
d.function[Expression['GetProcAddress']] = gpa
|
273
|
+
d.function[Expression['ExitProcess']] = epr
|
271
274
|
d.function[:default] = @cpu.disassembler_default_func
|
272
275
|
end
|
273
276
|
d
|
@@ -294,6 +297,62 @@ EOS
|
|
294
297
|
} if export
|
295
298
|
syms
|
296
299
|
end
|
300
|
+
|
301
|
+
# compute the pe-sha1 or pe-sha256 of the binary
|
302
|
+
# argument should be a Digest::SHA1 (from digest/sha1) or a Digest::SHA256 (from digest/sha2)
|
303
|
+
# returns the hex checksum
|
304
|
+
def pehash(digest)
|
305
|
+
off0 = 0
|
306
|
+
off1 = @coff_offset + @header.sizeof(self) + @optheader.offsetof(self, :checksum)
|
307
|
+
|
308
|
+
dir_ct_idx = DIRECTORIES.index('certificate_table')
|
309
|
+
if @optheader.numrva > dir_ct_idx
|
310
|
+
off2 = @coff_offset + @header.sizeof(self) + @optheader.sizeof(self) + 8*dir_ct_idx
|
311
|
+
ct_size = @encoded.data[off2, 8].unpack('V*')[1]
|
312
|
+
off3 = @encoded.length - ct_size
|
313
|
+
else
|
314
|
+
off4 = @encoded.length
|
315
|
+
end
|
316
|
+
|
317
|
+
digest << @encoded.data[off0 ... off1].to_str
|
318
|
+
digest << @encoded.data[off1+4 ... off2].to_str if off2
|
319
|
+
digest << @encoded.data[off2+8 ... off3].to_str if off2 and off3 > off2+8
|
320
|
+
digest << @encoded.data[off1+4 ... off4].to_str if off4
|
321
|
+
digest << ("\0" * (8 - (@encoded.length & 7))) if @encoded.length & 7 != 0
|
322
|
+
|
323
|
+
digest.hexdigest
|
324
|
+
end
|
325
|
+
|
326
|
+
def self.pehash(path, digest)
|
327
|
+
decode_file_header(path).pehash(digest)
|
328
|
+
end
|
329
|
+
|
330
|
+
# compute Mandiant "importhash"
|
331
|
+
def imphash
|
332
|
+
lst = []
|
333
|
+
@imports.each { |id|
|
334
|
+
ln = id.libname.downcase.sub(/.(dll|sys|ocx)$/, '')
|
335
|
+
id.imports.each { |i|
|
336
|
+
if not i.name and ordtable = WindowsExports::IMPORT_HASH[ln]
|
337
|
+
iname = ordtable[i.ordinal]
|
338
|
+
else
|
339
|
+
iname = i.name
|
340
|
+
end
|
341
|
+
iname ||= "ord#{i.ordinal}"
|
342
|
+
|
343
|
+
lst << "#{ln}.#{iname}"
|
344
|
+
}
|
345
|
+
}
|
346
|
+
|
347
|
+
require 'digest/md5'
|
348
|
+
Digest::MD5.hexdigest(lst.join(',').downcase)
|
349
|
+
end
|
350
|
+
|
351
|
+
def self.imphash(path)
|
352
|
+
pe = decode_file_header(path)
|
353
|
+
pe.decode_imports
|
354
|
+
pe.imphash
|
355
|
+
end
|
297
356
|
end
|
298
357
|
|
299
358
|
# an instance of a PE file, loaded in memory
|
@@ -312,7 +371,7 @@ class LoadedPE < PE
|
|
312
371
|
|
313
372
|
# reads a loaded PE from memory, returns a PE object
|
314
373
|
# dumps the header, optheader and all sections ; try to rebuild IAT (#memdump_imports)
|
315
|
-
def self.memdump(memory, baseaddr, entrypoint
|
374
|
+
def self.memdump(memory, baseaddr, entrypoint=nil, iat_p=nil)
|
316
375
|
loaded = LoadedPE.load memory[baseaddr, 0x1000_0000]
|
317
376
|
loaded.load_address = baseaddr
|
318
377
|
loaded.decode
|
@@ -372,7 +431,6 @@ class LoadedPE < PE
|
|
372
431
|
else
|
373
432
|
# read imported pointer from the import structure
|
374
433
|
while not ptr = imports.first.iat.shift
|
375
|
-
load_dll = nil
|
376
434
|
imports.shift
|
377
435
|
break if imports.empty?
|
378
436
|
iat_p = imports.first.iat_p
|
@@ -415,6 +473,7 @@ class LoadedPE < PE
|
|
415
473
|
puts 'unknown ptr %x' % ptr if $DEBUG
|
416
474
|
# allow holes in the unk_iat_p table
|
417
475
|
break if not unk_iat_p or failcnt > 4
|
476
|
+
loaded_dll = nil
|
418
477
|
failcnt += 1
|
419
478
|
next
|
420
479
|
end
|
@@ -422,7 +481,7 @@ class LoadedPE < PE
|
|
422
481
|
end
|
423
482
|
|
424
483
|
# dumped last importdirectory is correct, append the import field
|
425
|
-
|
484
|
+
i = ImportDirectory::Import.new
|
426
485
|
if e.name
|
427
486
|
puts e.name if $DEBUG
|
428
487
|
i.name = e.name
|
@@ -433,5 +492,9 @@ class LoadedPE < PE
|
|
433
492
|
dump.imports.last.imports << i
|
434
493
|
end
|
435
494
|
end
|
495
|
+
|
496
|
+
def pehash(digest)
|
497
|
+
raise "cannot compute a PEhash from memory image"
|
498
|
+
end
|
436
499
|
end
|
437
500
|
end
|
@@ -0,0 +1,167 @@
|
|
1
|
+
# This file is part of Metasm, the Ruby assembly manipulation suite
|
2
|
+
# Copyright (C) 2006-2009 Yoann GUILLOT
|
3
|
+
#
|
4
|
+
# Licence is LGPL, see LICENCE in the top-level directory
|
5
|
+
|
6
|
+
|
7
|
+
require 'metasm/exe_format/main'
|
8
|
+
require 'metasm/encode'
|
9
|
+
require 'metasm/decode'
|
10
|
+
|
11
|
+
|
12
|
+
module Metasm
|
13
|
+
# Python preparsed module (.pyc)
|
14
|
+
class PYC < ExeFormat
|
15
|
+
# 1 magic per python version...
|
16
|
+
# file = MAGIC(u16) \r \n timestamp(u32) data
|
17
|
+
MAGICS = [
|
18
|
+
62211 # 62211 = python2.7a0
|
19
|
+
]
|
20
|
+
|
21
|
+
class Header < SerialStruct
|
22
|
+
half :version
|
23
|
+
half :rn
|
24
|
+
word :timestamp
|
25
|
+
end
|
26
|
+
|
27
|
+
def decode_half(edata=@encoded) edata.decode_imm(:u16, @endianness) end
|
28
|
+
def decode_word(edata=@encoded) edata.decode_imm(:u32, @endianness) end
|
29
|
+
def decode_long(edata=@encoded) edata.decode_imm(:i32, @endianness) end
|
30
|
+
def sizeof_half ; 2 ; end
|
31
|
+
def sizeof_word ; 4 ; end
|
32
|
+
def sizeof_long ; 4 ; end
|
33
|
+
|
34
|
+
# file header
|
35
|
+
attr_accessor :header
|
36
|
+
# the marshalled object
|
37
|
+
attr_accessor :root
|
38
|
+
# list of all code objects
|
39
|
+
attr_accessor :all_code
|
40
|
+
|
41
|
+
def initialize()
|
42
|
+
@endianness = :little
|
43
|
+
@encoded = EncodedData.new
|
44
|
+
super()
|
45
|
+
end
|
46
|
+
|
47
|
+
def decode_header
|
48
|
+
@header = Header.decode(self)
|
49
|
+
end
|
50
|
+
|
51
|
+
def decode_pymarshal
|
52
|
+
case c = @encoded.read(1)
|
53
|
+
when '0' # NULL
|
54
|
+
:null
|
55
|
+
when 'N' # None
|
56
|
+
nil
|
57
|
+
when 'F' # False
|
58
|
+
false
|
59
|
+
when 'T' # True
|
60
|
+
true
|
61
|
+
#when 'S' # stopiter TODO
|
62
|
+
#when '.' # ellipsis TODO
|
63
|
+
when 'i' # long (i32)
|
64
|
+
decode_long
|
65
|
+
when 'I' # long (i64)
|
66
|
+
decode_word | (decode_long << 32)
|
67
|
+
when 'f' # float (ascii)
|
68
|
+
@encoded.read(@encoded.read(1).unpack('C').first).to_f
|
69
|
+
when 'g' # float (binary)
|
70
|
+
@encoded.read(8).unpack('d').first # XXX check
|
71
|
+
when 'x' # complex (f f)
|
72
|
+
{ :type => :complex,
|
73
|
+
:real => @encoded.read(@encoded.read(1).unpack('C').first).to_f,
|
74
|
+
:imag => @encoded.read(@encoded.read(1).unpack('C').first).to_f }
|
75
|
+
when 'y' # complex (g g)
|
76
|
+
{ :type => :complex,
|
77
|
+
:real => @encoded.read(8).unpack('d').first,
|
78
|
+
:imag => @encoded.read(8).unpack('d').first }
|
79
|
+
when 'l' # long (i32?)
|
80
|
+
decode_long
|
81
|
+
when 's' # string: len (long), data
|
82
|
+
@encoded.read(decode_long)
|
83
|
+
when 't' # 'interned': string with possible backreference later
|
84
|
+
s = @encoded.read(decode_long)
|
85
|
+
@references << s
|
86
|
+
s
|
87
|
+
when 'R' # stringref (see 't')
|
88
|
+
@references[decode_long]
|
89
|
+
when '(' # tuple (frozen Array): length l*objs
|
90
|
+
obj = []
|
91
|
+
decode_long.times { obj << decode_pymarshal }
|
92
|
+
obj
|
93
|
+
when '[' # list (Array)
|
94
|
+
obj = []
|
95
|
+
decode_long.times { obj << decode_pymarshal }
|
96
|
+
obj
|
97
|
+
when '{' # dict (Hash)
|
98
|
+
obj = {}
|
99
|
+
loop do
|
100
|
+
k = decode_pymarshal
|
101
|
+
break if k == :null
|
102
|
+
obj[k] = decode_pymarshal
|
103
|
+
end
|
104
|
+
{ :type => hash, :hash => obj } # XXX to avoid confusion with code, etc
|
105
|
+
when 'c' # code
|
106
|
+
# XXX format varies with version (header.signature)
|
107
|
+
obj = {}
|
108
|
+
obj[:type] = :code
|
109
|
+
obj[:argcount] = decode_long
|
110
|
+
#obj[:kwonly_argcount] = decode_long # not in py2.7
|
111
|
+
obj[:nlocals] = decode_long
|
112
|
+
obj[:stacksize] = decode_long
|
113
|
+
obj[:flags] = decode_long # TODO bit-decode this one
|
114
|
+
|
115
|
+
obj[:fileoff] = @encoded.ptr + 5 # XXX assume :code is a 's'
|
116
|
+
obj[:code] = decode_pymarshal
|
117
|
+
obj[:consts] = decode_pymarshal
|
118
|
+
obj[:names] = decode_pymarshal
|
119
|
+
obj[:varnames] = decode_pymarshal
|
120
|
+
obj[:freevars] = decode_pymarshal
|
121
|
+
obj[:cellvars] = decode_pymarshal
|
122
|
+
obj[:filename] = decode_pymarshal
|
123
|
+
obj[:name] = decode_pymarshal
|
124
|
+
obj[:firstlineno] = decode_long
|
125
|
+
obj[:lnotab] = decode_pymarshal
|
126
|
+
@all_code << obj
|
127
|
+
obj
|
128
|
+
when 'u' # unicode
|
129
|
+
@encoded.read(decode_long)
|
130
|
+
#when '?' # unknown TODO
|
131
|
+
#when '<' # set TODO
|
132
|
+
#when '>' # set (frozen) TODO
|
133
|
+
else
|
134
|
+
raise "unsupported python marshal #{c.inspect}"
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
def decode
|
139
|
+
decode_header
|
140
|
+
@all_code = []
|
141
|
+
@references = []
|
142
|
+
@root = decode_pymarshal
|
143
|
+
@references = nil
|
144
|
+
end
|
145
|
+
|
146
|
+
def cpu_from_headers
|
147
|
+
Python.new(self)
|
148
|
+
end
|
149
|
+
|
150
|
+
def each_section
|
151
|
+
yield @encoded, 0
|
152
|
+
end
|
153
|
+
|
154
|
+
def get_default_entrypoints
|
155
|
+
if @root.kind_of? Hash and @root[:type] == :code
|
156
|
+
[@root[:fileoff]]
|
157
|
+
else
|
158
|
+
[]
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
# return the :code part which contains off
|
163
|
+
def code_at_off(off)
|
164
|
+
@all_code.find { |c| c[:fileoff] <= off and c[:fileoff] + c[:code].length > off }
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
@@ -13,19 +13,20 @@ class SerialStruct
|
|
13
13
|
NAME=0
|
14
14
|
DECODE=1
|
15
15
|
ENCODE=2
|
16
|
-
|
17
|
-
|
18
|
-
|
16
|
+
SIZEOF=3
|
17
|
+
DEFVAL=4
|
18
|
+
ENUM=5
|
19
|
+
BITS=6
|
19
20
|
|
20
21
|
class << self
|
21
22
|
# defines a new field
|
22
23
|
# adds an accessor
|
23
|
-
def new_field(name, decode, encode, defval, enum=nil, bits=nil)
|
24
|
+
def new_field(name, decode, encode, sizeof, defval, enum=nil, bits=nil)
|
24
25
|
if name
|
25
26
|
attr_accessor name
|
26
27
|
name = "@#{name}".to_sym
|
27
28
|
end
|
28
|
-
(@@fields[self] ||= []) << [name, decode, encode, defval, enum, bits]
|
29
|
+
(@@fields[self] ||= []) << [name, decode, encode, sizeof, defval, enum, bits]
|
29
30
|
end
|
30
31
|
|
31
32
|
# creates a field constructor for a simple integer
|
@@ -34,7 +35,7 @@ class << self
|
|
34
35
|
recv = class << self ; self ; end
|
35
36
|
types.each { |type|
|
36
37
|
recv.send(:define_method, type) { |name, *args|
|
37
|
-
new_field(name, "decode_#{type}".to_sym, "encode_#{type}".to_sym, args[0] || 0, args[1])
|
38
|
+
new_field(name, "decode_#{type}".to_sym, "encode_#{type}".to_sym, "sizeof_#{type}".to_sym, args[0] || 0, args[1])
|
38
39
|
}
|
39
40
|
|
40
41
|
# shortcut to define multiple fields of this type with default values
|
@@ -46,24 +47,33 @@ class << self
|
|
46
47
|
|
47
48
|
# standard fields:
|
48
49
|
|
50
|
+
# virtual field, handled explicitly in a custom encode/decode
|
51
|
+
def virtual(*a)
|
52
|
+
a.each { |f|
|
53
|
+
new_field(f, nil, nil, nil, nil)
|
54
|
+
}
|
55
|
+
end
|
56
|
+
|
49
57
|
# a fixed-size memory chunk
|
50
58
|
def mem(name, len, defval='')
|
51
|
-
new_field(name, lambda { |exe, me| exe.curencoded.read(len) }, lambda { |exe, me, val| val[0, len].ljust(len, 0.chr) }, defval)
|
59
|
+
new_field(name, lambda { |exe, me| exe.curencoded.read(len) }, lambda { |exe, me, val| val[0, len].ljust(len, 0.chr) }, lambda { |exe, me| len }, defval)
|
52
60
|
end
|
53
61
|
# a fixed-size string, 0-padded
|
54
62
|
def str(name, len, defval='')
|
55
|
-
e = lambda { |exe, me, val| val[0, len].ljust(len, 0.chr) }
|
56
63
|
d = lambda { |exe, me| v = exe.curencoded.read(len) ; v = v[0, v.index(?\0)] if v.index(?\0) ; v }
|
57
|
-
|
64
|
+
e = lambda { |exe, me, val| val[0, len].ljust(len, 0.chr) }
|
65
|
+
s = lambda { |exe, me| len }
|
66
|
+
new_field(name, d, e, s, defval)
|
58
67
|
end
|
59
68
|
# 0-terminated string
|
60
69
|
def strz(name, defval='')
|
61
70
|
d = lambda { |exe, me|
|
62
|
-
|
71
|
+
ed = exe.curencoded
|
63
72
|
ed.read(ed.data.index(?\0, ed.ptr)-ed.ptr+1).chop
|
64
73
|
}
|
65
74
|
e = lambda { |exe, me, val| val + 0.chr }
|
66
|
-
|
75
|
+
s = lambda { |exe, val| val.length + 1 }
|
76
|
+
new_field(name, d, e, s, defval)
|
67
77
|
end
|
68
78
|
|
69
79
|
# field access
|
@@ -93,7 +103,7 @@ class << self
|
|
93
103
|
d = lambda { |exe, me| @bitfield_val = exe.send("decode_#{inttype}") }
|
94
104
|
# reset a temp var
|
95
105
|
e = lambda { |exe, me, val| @bitfield_val = 0 ; nil }
|
96
|
-
new_field(nil, d, e, nil)
|
106
|
+
new_field(nil, d, e, 0, nil)
|
97
107
|
|
98
108
|
h = h.sort
|
99
109
|
h.length.times { |i|
|
@@ -107,7 +117,7 @@ class << self
|
|
107
117
|
d = lambda { |exe, me| (@bitfield_val >> off) & mask }
|
108
118
|
# update the temp var with the field value, return nil
|
109
119
|
e = lambda { |exe, me, val| @bitfield_val |= (val & mask) << off ; nil }
|
110
|
-
|
120
|
+
new_field(name, d, e, 0, 0)
|
111
121
|
}
|
112
122
|
|
113
123
|
# free the temp var
|
@@ -118,11 +128,13 @@ class << self
|
|
118
128
|
@bitfield_val = nil
|
119
129
|
exe.send("encode_#{inttype}", val)
|
120
130
|
}
|
121
|
-
|
131
|
+
s = lambda { |exe, me| exe.send("sizeof_#{inttype}") }
|
132
|
+
new_field(nil, d, e, s, nil)
|
122
133
|
end
|
123
134
|
|
124
135
|
# inject a hook to be run during the decoding process
|
125
136
|
def decode_hook(before=nil, &b)
|
137
|
+
@@fields[self] ||= []
|
126
138
|
idx = (before ? @@fields[self].index(fld_get(before)) : -1)
|
127
139
|
@@fields[self].insert(idx, [nil, b])
|
128
140
|
end
|
@@ -209,6 +221,39 @@ end # class methods
|
|
209
221
|
ed
|
210
222
|
end
|
211
223
|
|
224
|
+
# size of the structure = fields.sum { size of field }
|
225
|
+
def sizeof(exe)
|
226
|
+
struct_fields(exe).inject(0) { |off, f|
|
227
|
+
case sz = f[SIZEOF]
|
228
|
+
when Proc; sz = sz[exe, self]
|
229
|
+
when Symbol; sz = exe.send(sz)
|
230
|
+
when Array; sz = exe.send(*sz)
|
231
|
+
when nil; sz = 0
|
232
|
+
end
|
233
|
+
off + sz
|
234
|
+
}
|
235
|
+
end
|
236
|
+
|
237
|
+
# offset (in bytes) of the structure member
|
238
|
+
# for bitfields, return the byte offset of the whole bitfield
|
239
|
+
def offsetof(exe, fld)
|
240
|
+
fld2 = fld
|
241
|
+
fld2 = "@#{fld}".to_sym if fld.to_s[0] != ?@
|
242
|
+
off = 0
|
243
|
+
struct_fields(exe).each { |f|
|
244
|
+
return off if f[NAME] == fld or f[NAME] == fld2
|
245
|
+
|
246
|
+
case sz = f[SIZEOF]
|
247
|
+
when Proc; sz = sz[exe, self]
|
248
|
+
when Symbol; sz = exe.send(sz)
|
249
|
+
when Array; sz = exe.send(*sz)
|
250
|
+
when nil; sz = 0
|
251
|
+
end
|
252
|
+
off += sz
|
253
|
+
}
|
254
|
+
raise 'unknown field'
|
255
|
+
end
|
256
|
+
|
212
257
|
# shortcut to create a new instance and decode it
|
213
258
|
def self.decode(*a)
|
214
259
|
s = new
|
@@ -216,6 +261,14 @@ end # class methods
|
|
216
261
|
s
|
217
262
|
end
|
218
263
|
|
264
|
+
def self.sizeof(exe)
|
265
|
+
new.sizeof(exe)
|
266
|
+
end
|
267
|
+
|
268
|
+
def self.offsetof(exe, fld)
|
269
|
+
new.offsetof(exe, fld)
|
270
|
+
end
|
271
|
+
|
219
272
|
def dump(e, a)
|
220
273
|
case e
|
221
274
|
when Integer; e >= 0x100 ? '0x%X'%e : e
|