metasm 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/BUGS +11 -0
- data/CREDITS +17 -0
- data/README +270 -0
- data/TODO +114 -0
- data/doc/code_organisation.txt +146 -0
- data/doc/const_missing.txt +16 -0
- data/doc/core_classes.txt +75 -0
- data/doc/feature_list.txt +53 -0
- data/doc/index.txt +59 -0
- data/doc/install_notes.txt +170 -0
- data/doc/style.css +3 -0
- data/doc/use_cases.txt +18 -0
- data/lib/metasm.rb +80 -0
- data/lib/metasm/arm.rb +12 -0
- data/lib/metasm/arm/debug.rb +39 -0
- data/lib/metasm/arm/decode.rb +167 -0
- data/lib/metasm/arm/encode.rb +77 -0
- data/lib/metasm/arm/main.rb +75 -0
- data/lib/metasm/arm/opcodes.rb +177 -0
- data/lib/metasm/arm/parse.rb +130 -0
- data/lib/metasm/arm/render.rb +55 -0
- data/lib/metasm/compile_c.rb +1457 -0
- data/lib/metasm/dalvik.rb +8 -0
- data/lib/metasm/dalvik/decode.rb +196 -0
- data/lib/metasm/dalvik/main.rb +60 -0
- data/lib/metasm/dalvik/opcodes.rb +366 -0
- data/lib/metasm/decode.rb +213 -0
- data/lib/metasm/decompile.rb +2659 -0
- data/lib/metasm/disassemble.rb +2068 -0
- data/lib/metasm/disassemble_api.rb +1280 -0
- data/lib/metasm/dynldr.rb +1329 -0
- data/lib/metasm/encode.rb +333 -0
- data/lib/metasm/exe_format/a_out.rb +194 -0
- data/lib/metasm/exe_format/autoexe.rb +82 -0
- data/lib/metasm/exe_format/bflt.rb +189 -0
- data/lib/metasm/exe_format/coff.rb +455 -0
- data/lib/metasm/exe_format/coff_decode.rb +901 -0
- data/lib/metasm/exe_format/coff_encode.rb +1078 -0
- data/lib/metasm/exe_format/dex.rb +457 -0
- data/lib/metasm/exe_format/dol.rb +145 -0
- data/lib/metasm/exe_format/elf.rb +923 -0
- data/lib/metasm/exe_format/elf_decode.rb +979 -0
- data/lib/metasm/exe_format/elf_encode.rb +1375 -0
- data/lib/metasm/exe_format/macho.rb +827 -0
- data/lib/metasm/exe_format/main.rb +228 -0
- data/lib/metasm/exe_format/mz.rb +164 -0
- data/lib/metasm/exe_format/nds.rb +172 -0
- data/lib/metasm/exe_format/pe.rb +437 -0
- data/lib/metasm/exe_format/serialstruct.rb +246 -0
- data/lib/metasm/exe_format/shellcode.rb +114 -0
- data/lib/metasm/exe_format/xcoff.rb +167 -0
- data/lib/metasm/gui.rb +23 -0
- data/lib/metasm/gui/cstruct.rb +373 -0
- data/lib/metasm/gui/dasm_coverage.rb +199 -0
- data/lib/metasm/gui/dasm_decomp.rb +369 -0
- data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
- data/lib/metasm/gui/dasm_graph.rb +1354 -0
- data/lib/metasm/gui/dasm_hex.rb +543 -0
- data/lib/metasm/gui/dasm_listing.rb +599 -0
- data/lib/metasm/gui/dasm_main.rb +906 -0
- data/lib/metasm/gui/dasm_opcodes.rb +291 -0
- data/lib/metasm/gui/debug.rb +1228 -0
- data/lib/metasm/gui/gtk.rb +884 -0
- data/lib/metasm/gui/qt.rb +495 -0
- data/lib/metasm/gui/win32.rb +3004 -0
- data/lib/metasm/gui/x11.rb +621 -0
- data/lib/metasm/ia32.rb +14 -0
- data/lib/metasm/ia32/compile_c.rb +1523 -0
- data/lib/metasm/ia32/debug.rb +193 -0
- data/lib/metasm/ia32/decode.rb +1167 -0
- data/lib/metasm/ia32/decompile.rb +564 -0
- data/lib/metasm/ia32/encode.rb +314 -0
- data/lib/metasm/ia32/main.rb +233 -0
- data/lib/metasm/ia32/opcodes.rb +872 -0
- data/lib/metasm/ia32/parse.rb +327 -0
- data/lib/metasm/ia32/render.rb +91 -0
- data/lib/metasm/main.rb +1193 -0
- data/lib/metasm/mips.rb +11 -0
- data/lib/metasm/mips/compile_c.rb +7 -0
- data/lib/metasm/mips/decode.rb +253 -0
- data/lib/metasm/mips/encode.rb +51 -0
- data/lib/metasm/mips/main.rb +72 -0
- data/lib/metasm/mips/opcodes.rb +443 -0
- data/lib/metasm/mips/parse.rb +51 -0
- data/lib/metasm/mips/render.rb +43 -0
- data/lib/metasm/os/gnu_exports.rb +270 -0
- data/lib/metasm/os/linux.rb +1112 -0
- data/lib/metasm/os/main.rb +1686 -0
- data/lib/metasm/os/remote.rb +527 -0
- data/lib/metasm/os/windows.rb +2027 -0
- data/lib/metasm/os/windows_exports.rb +745 -0
- data/lib/metasm/parse.rb +876 -0
- data/lib/metasm/parse_c.rb +3938 -0
- data/lib/metasm/pic16c/decode.rb +42 -0
- data/lib/metasm/pic16c/main.rb +17 -0
- data/lib/metasm/pic16c/opcodes.rb +68 -0
- data/lib/metasm/ppc.rb +11 -0
- data/lib/metasm/ppc/decode.rb +264 -0
- data/lib/metasm/ppc/decompile.rb +251 -0
- data/lib/metasm/ppc/encode.rb +51 -0
- data/lib/metasm/ppc/main.rb +129 -0
- data/lib/metasm/ppc/opcodes.rb +410 -0
- data/lib/metasm/ppc/parse.rb +52 -0
- data/lib/metasm/preprocessor.rb +1277 -0
- data/lib/metasm/render.rb +130 -0
- data/lib/metasm/sh4.rb +8 -0
- data/lib/metasm/sh4/decode.rb +336 -0
- data/lib/metasm/sh4/main.rb +292 -0
- data/lib/metasm/sh4/opcodes.rb +381 -0
- data/lib/metasm/x86_64.rb +12 -0
- data/lib/metasm/x86_64/compile_c.rb +1025 -0
- data/lib/metasm/x86_64/debug.rb +59 -0
- data/lib/metasm/x86_64/decode.rb +268 -0
- data/lib/metasm/x86_64/encode.rb +264 -0
- data/lib/metasm/x86_64/main.rb +135 -0
- data/lib/metasm/x86_64/opcodes.rb +118 -0
- data/lib/metasm/x86_64/parse.rb +68 -0
- data/misc/bottleneck.rb +61 -0
- data/misc/cheader-findpppath.rb +58 -0
- data/misc/hexdiff.rb +74 -0
- data/misc/hexdump.rb +55 -0
- data/misc/metasm-all.rb +13 -0
- data/misc/objdiff.rb +47 -0
- data/misc/objscan.rb +40 -0
- data/misc/pdfparse.rb +661 -0
- data/misc/ppc_pdf2oplist.rb +192 -0
- data/misc/tcp_proxy_hex.rb +84 -0
- data/misc/txt2html.rb +440 -0
- data/samples/a.out.rb +31 -0
- data/samples/asmsyntax.rb +77 -0
- data/samples/bindiff.rb +555 -0
- data/samples/compilation-steps.rb +49 -0
- data/samples/cparser_makestackoffset.rb +55 -0
- data/samples/dasm-backtrack.rb +38 -0
- data/samples/dasmnavig.rb +318 -0
- data/samples/dbg-apihook.rb +228 -0
- data/samples/dbghelp.rb +143 -0
- data/samples/disassemble-gui.rb +102 -0
- data/samples/disassemble.rb +133 -0
- data/samples/dump_upx.rb +95 -0
- data/samples/dynamic_ruby.rb +1929 -0
- data/samples/elf_list_needed.rb +46 -0
- data/samples/elf_listexports.rb +33 -0
- data/samples/elfencode.rb +25 -0
- data/samples/exeencode.rb +128 -0
- data/samples/factorize-headers-elfimports.rb +77 -0
- data/samples/factorize-headers-peimports.rb +109 -0
- data/samples/factorize-headers.rb +43 -0
- data/samples/gdbclient.rb +583 -0
- data/samples/generate_libsigs.rb +102 -0
- data/samples/hotfix_gtk_dbg.rb +59 -0
- data/samples/install_win_env.rb +78 -0
- data/samples/lindebug.rb +924 -0
- data/samples/linux_injectsyscall.rb +95 -0
- data/samples/machoencode.rb +31 -0
- data/samples/metasm-shell.rb +91 -0
- data/samples/pe-hook.rb +69 -0
- data/samples/pe-ia32-cpuid.rb +203 -0
- data/samples/pe-mips.rb +35 -0
- data/samples/pe-shutdown.rb +78 -0
- data/samples/pe-testrelocs.rb +51 -0
- data/samples/pe-testrsrc.rb +24 -0
- data/samples/pe_listexports.rb +31 -0
- data/samples/peencode.rb +19 -0
- data/samples/peldr.rb +494 -0
- data/samples/preprocess-flatten.rb +19 -0
- data/samples/r0trace.rb +308 -0
- data/samples/rubstop.rb +399 -0
- data/samples/scan_pt_gnu_stack.rb +54 -0
- data/samples/scanpeexports.rb +62 -0
- data/samples/shellcode-c.rb +40 -0
- data/samples/shellcode-dynlink.rb +146 -0
- data/samples/source.asm +34 -0
- data/samples/struct_offset.rb +47 -0
- data/samples/testpe.rb +32 -0
- data/samples/testraw.rb +45 -0
- data/samples/win32genloader.rb +132 -0
- data/samples/win32hooker-advanced.rb +169 -0
- data/samples/win32hooker.rb +96 -0
- data/samples/win32livedasm.rb +33 -0
- data/samples/win32remotescan.rb +133 -0
- data/samples/wintrace.rb +92 -0
- data/tests/all.rb +8 -0
- data/tests/dasm.rb +39 -0
- data/tests/dynldr.rb +35 -0
- data/tests/encodeddata.rb +132 -0
- data/tests/ia32.rb +82 -0
- data/tests/mips.rb +116 -0
- data/tests/parse_c.rb +239 -0
- data/tests/preprocessor.rb +269 -0
- data/tests/x86_64.rb +62 -0
- metadata +255 -0
@@ -0,0 +1,193 @@
|
|
1
|
+
# This file is part of Metasm, the Ruby assembly manipulation suite
|
2
|
+
# Copyright (C) 2006-2009 Yoann GUILLOT
|
3
|
+
#
|
4
|
+
# Licence is LGPL, see LICENCE in the top-level directory
|
5
|
+
|
6
|
+
|
7
|
+
require 'metasm/ia32/opcodes'
|
8
|
+
|
9
|
+
module Metasm
|
10
|
+
class Ia32
|
11
|
+
def dbg_register_pc
|
12
|
+
@dbg_register_pc ||= :eip
|
13
|
+
end
|
14
|
+
def dbg_register_sp
|
15
|
+
@dbg_register_sp ||= dbg_register_list[7]
|
16
|
+
end
|
17
|
+
def dbg_register_flags
|
18
|
+
@dbg_register_flags ||= :eflags
|
19
|
+
end
|
20
|
+
|
21
|
+
def dbg_register_list
|
22
|
+
@dbg_register_list ||= [:eax, :ebx, :ecx, :edx, :esi, :edi, :ebp, :esp, :eip]
|
23
|
+
end
|
24
|
+
|
25
|
+
def dbg_register_size
|
26
|
+
@dbg_register_size ||= Hash.new(32).update(:cs => 16, :ds => 16, :es => 16, :fs => 16, :gs => 16)
|
27
|
+
end
|
28
|
+
|
29
|
+
def dbg_flag_list
|
30
|
+
@dbg_flag_list ||= [:c, :p, :a, :z, :s, :i, :d, :o]
|
31
|
+
end
|
32
|
+
|
33
|
+
DBG_FLAGS = { :c => 0, :p => 2, :a => 4, :z => 6, :s => 7, :t => 8, :i => 9, :d => 10, :o => 11 }
|
34
|
+
def dbg_get_flag(dbg, f)
|
35
|
+
(dbg.get_reg_value(dbg_register_flags) >> DBG_FLAGS[f]) & 1
|
36
|
+
end
|
37
|
+
def dbg_set_flag(dbg, f)
|
38
|
+
fl = dbg.get_reg_value(dbg_register_flags)
|
39
|
+
fl |= 1 << DBG_FLAGS[f]
|
40
|
+
dbg.set_reg_value(dbg_register_flags, fl)
|
41
|
+
end
|
42
|
+
def dbg_unset_flag(dbg, f)
|
43
|
+
fl = dbg.get_reg_value(dbg_register_flags)
|
44
|
+
fl &= ~(1 << DBG_FLAGS[f])
|
45
|
+
dbg.set_reg_value(dbg_register_flags, fl)
|
46
|
+
end
|
47
|
+
|
48
|
+
def dbg_enable_singlestep(dbg)
|
49
|
+
dbg_set_flag(dbg, :t)
|
50
|
+
end
|
51
|
+
def dbg_disable_singlestep(dbg)
|
52
|
+
dbg_unset_flag(dbg, :t)
|
53
|
+
end
|
54
|
+
|
55
|
+
def dbg_enable_bp(dbg, bp)
|
56
|
+
case bp.type
|
57
|
+
when :bpx; dbg_enable_bpx( dbg, bp)
|
58
|
+
else dbg_enable_bphw(dbg, bp)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def dbg_disable_bp(dbg, bp)
|
63
|
+
case bp.type
|
64
|
+
when :bpx; dbg_disable_bpx( dbg, bp)
|
65
|
+
else dbg_disable_bphw(dbg, bp)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def dbg_enable_bpx(dbg, bp)
|
70
|
+
bp.internal[:previous] ||= dbg.memory[bp.address, 1]
|
71
|
+
dbg.memory[bp.address, 1] = "\xcc"
|
72
|
+
end
|
73
|
+
|
74
|
+
def dbg_disable_bpx(dbg, bp)
|
75
|
+
dbg.memory[bp.address, 1] = bp.internal[:previous]
|
76
|
+
end
|
77
|
+
|
78
|
+
# allocate a debug register for a hwbp by checking the list of hwbp existing in dbg
|
79
|
+
def dbg_alloc_bphw(dbg, bp)
|
80
|
+
if not bp.internal[:dr]
|
81
|
+
may = [0, 1, 2, 3]
|
82
|
+
dbg.breakpoint_thread.values.each { |bb| may.delete bb.internal[:dr] }
|
83
|
+
raise 'alloc_bphw: no free debugregister' if may.empty?
|
84
|
+
bp.internal[:dr] = may.first
|
85
|
+
end
|
86
|
+
bp.internal[:type] ||= :x
|
87
|
+
bp.internal[:len] ||= 1
|
88
|
+
bp.internal[:dr]
|
89
|
+
end
|
90
|
+
|
91
|
+
def dbg_enable_bphw(dbg, bp)
|
92
|
+
nr = dbg_alloc_bphw(dbg, bp)
|
93
|
+
dr7 = dbg[:dr7]
|
94
|
+
l = { 1 => 0, 2 => 1, 4 => 3, 8 => 2 }[bp.internal[:len]]
|
95
|
+
rw = { :x => 0, :w => 1, :r => 3 }[bp.internal[:type]]
|
96
|
+
raise "enable_bphw: invalid breakpoint #{bp.inspect}" if not l or not rw
|
97
|
+
dr7 &= ~((15 << (16+4*nr)) | (3 << (2*nr))) # clear
|
98
|
+
dr7 |= ((l << 2) | rw) << (16+4*nr) # set drN len/rw
|
99
|
+
dr7 |= 3 << (2*nr) # enable global/local drN
|
100
|
+
|
101
|
+
dbg["dr#{nr}"] = bp.address
|
102
|
+
dbg[:dr7] = dr7
|
103
|
+
end
|
104
|
+
|
105
|
+
def dbg_disable_bphw(dbg, bp)
|
106
|
+
nr = bp.internal[:dr]
|
107
|
+
dr7 = dbg[:dr7]
|
108
|
+
dr7 &= ~(3 << (2*nr))
|
109
|
+
dbg[:dr7] = dr7
|
110
|
+
end
|
111
|
+
|
112
|
+
def dbg_check_pre_run(dbg)
|
113
|
+
if dbg[:dr6] == 0 and dbg[:dr7] == 0
|
114
|
+
dbg[:dr7] = 0x10000 # some OS (eg Windows) only return dr6 if dr7 != 0
|
115
|
+
end
|
116
|
+
dbg[:dr6] = 0
|
117
|
+
end
|
118
|
+
|
119
|
+
def dbg_evt_bpx(dbg, b)
|
120
|
+
if b.address == dbg.pc-1
|
121
|
+
dbg.pc -= 1
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
def dbg_find_bpx(dbg)
|
126
|
+
return if dbg[:dr6] & 0x4000 != 0
|
127
|
+
pc = dbg.pc
|
128
|
+
dbg.breakpoint[pc-1] || dbg.breakpoint[pc]
|
129
|
+
end
|
130
|
+
|
131
|
+
def dbg_find_hwbp(dbg)
|
132
|
+
dr6 = dbg[:dr6]
|
133
|
+
return if dr6 & 0xf == 0
|
134
|
+
dn = (0..3).find { |n| dr6 & (1 << n) }
|
135
|
+
dbg.breakpoint_thread.values.find { |b| b.internal[:dr] == dn }
|
136
|
+
end
|
137
|
+
|
138
|
+
def dbg_need_stepover(dbg, addr, di)
|
139
|
+
di and ((di.instruction.prefix and di.instruction.prefix[:rep]) or di.opcode.props[:saveip])
|
140
|
+
end
|
141
|
+
|
142
|
+
def dbg_end_stepout(dbg, addr, di)
|
143
|
+
di and di.opcode.name == 'ret'
|
144
|
+
end
|
145
|
+
|
146
|
+
# return (yield) a list of [addr, symbolic name]
|
147
|
+
def dbg_stacktrace(dbg, rec=500)
|
148
|
+
ret = []
|
149
|
+
s = dbg.addrname!(dbg.pc)
|
150
|
+
yield(dbg.pc, s) if block_given?
|
151
|
+
ret << [dbg.pc, s]
|
152
|
+
fp = dbg.get_reg_value(dbg_register_list[6])
|
153
|
+
stack = dbg.get_reg_value(dbg_register_list[7]) - 8
|
154
|
+
while fp > stack and fp <= stack+0x10000 and rec != 0
|
155
|
+
rec -= 1
|
156
|
+
ra = dbg.resolve_expr Indirection[fp+4, 4]
|
157
|
+
s = dbg.addrname!(ra)
|
158
|
+
yield(ra, s) if block_given?
|
159
|
+
ret << [ra, s]
|
160
|
+
stack = fp # ensure we walk the stack upwards
|
161
|
+
fp = dbg.resolve_expr Indirection[fp, 4]
|
162
|
+
end
|
163
|
+
ret
|
164
|
+
end
|
165
|
+
|
166
|
+
# retrieve the current function return value
|
167
|
+
# only valid at function exit
|
168
|
+
def dbg_func_retval(dbg)
|
169
|
+
dbg.get_reg_value(dbg_register_list[0])
|
170
|
+
end
|
171
|
+
def dbg_func_retval_set(dbg, val)
|
172
|
+
dbg.set_reg_value(dbg_register_list[0], val)
|
173
|
+
end
|
174
|
+
|
175
|
+
# retrieve the current function return address
|
176
|
+
# to be called only on entry of the subfunction
|
177
|
+
def dbg_func_retaddr(dbg)
|
178
|
+
dbg.memory_read_int(dbg_register_list[7])
|
179
|
+
end
|
180
|
+
def dbg_func_retaddr_set(dbg, ret)
|
181
|
+
dbg.memory_write_int(dbg_register_list[7], ret)
|
182
|
+
end
|
183
|
+
|
184
|
+
# retrieve the current function arguments
|
185
|
+
# only valid at function entry (eg right after the call)
|
186
|
+
def dbg_func_arg(dbg, argnr)
|
187
|
+
dbg.memory_read_int(Expression[:esp, :+, 4*(argnr+1)])
|
188
|
+
end
|
189
|
+
def dbg_func_arg_set(dbg, argnr, arg)
|
190
|
+
dbg.memory_write_int(Expression[:esp, :+, 4*(argnr+1)], arg)
|
191
|
+
end
|
192
|
+
end
|
193
|
+
end
|
@@ -0,0 +1,1167 @@
|
|
1
|
+
# This file is part of Metasm, the Ruby assembly manipulation suite
|
2
|
+
# Copyright (C) 2006-2009 Yoann GUILLOT
|
3
|
+
#
|
4
|
+
# Licence is LGPL, see LICENCE in the top-level directory
|
5
|
+
|
6
|
+
|
7
|
+
require 'metasm/ia32/opcodes'
|
8
|
+
require 'metasm/decode'
|
9
|
+
|
10
|
+
module Metasm
|
11
|
+
class Ia32
|
12
|
+
class ModRM
|
13
|
+
def self.decode(edata, byte, endianness, adsz, opsz, seg=nil, regclass=Reg)
|
14
|
+
m = (byte >> 6) & 3
|
15
|
+
rm = byte & 7
|
16
|
+
|
17
|
+
if m == 3
|
18
|
+
return regclass.new(rm, opsz)
|
19
|
+
end
|
20
|
+
|
21
|
+
sum = Sum[adsz][m][rm]
|
22
|
+
|
23
|
+
s, i, b, imm = nil
|
24
|
+
sum.each { |a|
|
25
|
+
case a
|
26
|
+
when Integer
|
27
|
+
if not b
|
28
|
+
b = Reg.new(a, adsz)
|
29
|
+
else
|
30
|
+
s = 1
|
31
|
+
i = Reg.new(a, adsz)
|
32
|
+
end
|
33
|
+
|
34
|
+
when :sib
|
35
|
+
sib = edata.get_byte.to_i
|
36
|
+
|
37
|
+
ii = ((sib >> 3) & 7)
|
38
|
+
if ii != 4
|
39
|
+
s = 1 << ((sib >> 6) & 3)
|
40
|
+
i = Reg.new(ii, adsz)
|
41
|
+
end
|
42
|
+
|
43
|
+
bb = sib & 7
|
44
|
+
if bb == 5 and m == 0
|
45
|
+
imm = Expression[edata.decode_imm("i#{adsz}".to_sym, endianness)]
|
46
|
+
else
|
47
|
+
b = Reg.new(bb, adsz)
|
48
|
+
end
|
49
|
+
|
50
|
+
when :i8, :i16, :i32
|
51
|
+
imm = Expression[edata.decode_imm(a, endianness)]
|
52
|
+
end
|
53
|
+
}
|
54
|
+
|
55
|
+
if imm and imm.reduce.kind_of? Integer and imm.reduce < -0x10_0000
|
56
|
+
# probably a base address -> unsigned
|
57
|
+
imm = Expression[imm.reduce & ((1 << (adsz || 32)) - 1)]
|
58
|
+
end
|
59
|
+
|
60
|
+
new adsz, opsz, s, i, b, imm, seg
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
class Farptr
|
65
|
+
def self.decode(edata, endianness, adsz)
|
66
|
+
addr = Expression[edata.decode_imm("u#{adsz}".to_sym, endianness)]
|
67
|
+
seg = Expression[edata.decode_imm(:u16, endianness)]
|
68
|
+
new seg, addr
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def build_opcode_bin_mask(op)
|
73
|
+
# bit = 0 if can be mutated by an field value, 1 if fixed by opcode
|
74
|
+
op.bin_mask = Array.new(op.bin.length, 0)
|
75
|
+
op.fields.each { |f, (oct, off)|
|
76
|
+
op.bin_mask[oct] |= (@fields_mask[f] << off)
|
77
|
+
}
|
78
|
+
op.bin_mask.map! { |v| 255 ^ v }
|
79
|
+
end
|
80
|
+
|
81
|
+
def build_bin_lookaside
|
82
|
+
# sets up a hash byte value => list of opcodes that may match
|
83
|
+
# opcode.bin_mask is built here
|
84
|
+
lookaside = Array.new(256) { [] }
|
85
|
+
opcode_list.each { |op|
|
86
|
+
|
87
|
+
build_opcode_bin_mask op
|
88
|
+
|
89
|
+
b = op.bin[0]
|
90
|
+
msk = op.bin_mask[0]
|
91
|
+
|
92
|
+
for i in b..(b | (255^msk))
|
93
|
+
next if i & msk != b & msk
|
94
|
+
lookaside[i] << op
|
95
|
+
end
|
96
|
+
}
|
97
|
+
lookaside
|
98
|
+
end
|
99
|
+
|
100
|
+
def decode_prefix(instr, byte)
|
101
|
+
instr.prefix ||= {}
|
102
|
+
(instr.prefix[:list] ||= []) << byte
|
103
|
+
|
104
|
+
# XXX actual limit = 15-instr.length
|
105
|
+
return false if instr.prefix[:list].length >= 15
|
106
|
+
|
107
|
+
case byte
|
108
|
+
when 0x66; instr.prefix[:opsz] = true
|
109
|
+
when 0x67; instr.prefix[:adsz] = true
|
110
|
+
when 0xF0; instr.prefix[:lock] = true
|
111
|
+
when 0xF2; instr.prefix[:rep] = :nz
|
112
|
+
when 0xF3; instr.prefix[:rep] = :z # postprocessed by decode_instr
|
113
|
+
when 0x26, 0x2E, 0x36, 0x3E, 0x64, 0x65
|
114
|
+
if byte & 0x40 == 0
|
115
|
+
v = (byte >> 3) & 3
|
116
|
+
else
|
117
|
+
v = byte & 7
|
118
|
+
end
|
119
|
+
instr.prefix[:seg] = SegReg.new(v)
|
120
|
+
|
121
|
+
instr.prefix[:jmphint] = ((byte & 0x10) == 0x10)
|
122
|
+
else
|
123
|
+
return false
|
124
|
+
end
|
125
|
+
true
|
126
|
+
end
|
127
|
+
|
128
|
+
# tries to find the opcode encoded at edata.ptr
|
129
|
+
# if no match, tries to match a prefix (update di.instruction.prefix)
|
130
|
+
# on match, edata.ptr points to the first byte of the opcode (after prefixes)
|
131
|
+
def decode_findopcode(edata)
|
132
|
+
di = DecodedInstruction.new self
|
133
|
+
while edata.ptr < edata.data.length
|
134
|
+
pfx = di.instruction.prefix || {}
|
135
|
+
byte = edata.data[edata.ptr]
|
136
|
+
byte = byte.unpack('C').first if byte.kind_of? ::String # 1.9
|
137
|
+
return di if di.opcode = @bin_lookaside[byte].find { |op|
|
138
|
+
# fetch the relevant bytes from edata
|
139
|
+
bseq = edata.data[edata.ptr, op.bin.length].unpack('C*')
|
140
|
+
di.opcode = op if op.props[:opsz] # needed by opsz(di)
|
141
|
+
|
142
|
+
# check against full opcode mask
|
143
|
+
op.bin.zip(bseq, op.bin_mask).all? { |b1, b2, m| b2 and ((b1 & m) == (b2 & m)) } and
|
144
|
+
# check special cases
|
145
|
+
!(
|
146
|
+
# fail if any of those is true
|
147
|
+
(fld = op.fields[:seg2A] and (bseq[fld[0]] >> fld[1]) & @fields_mask[:seg2A] == 1) or
|
148
|
+
(fld = op.fields[:seg3A] and (bseq[fld[0]] >> fld[1]) & @fields_mask[:seg3A] < 4) or
|
149
|
+
(fld = op.fields[:seg3A] || op.fields[:seg3] and (bseq[fld[0]] >> fld[1]) & @fields_mask[:seg3] > 5) or
|
150
|
+
(fld = op.fields[:modrmA] and (bseq[fld[0]] >> fld[1]) & 0xC0 == 0xC0) or
|
151
|
+
(sz = op.props[:opsz] and opsz(di) != sz) or
|
152
|
+
(ndpfx = op.props[:needpfx] and not pfx[:list].to_a.include? ndpfx) or
|
153
|
+
# return non-ambiguous opcode (eg push.i16 in 32bit mode) / sync with addop_post in opcode.rb
|
154
|
+
(pfx[:opsz] and (op.args == [:i] or op.args == [:farptr] or op.name[0, 3] == 'ret') and not op.props[:opsz]) or
|
155
|
+
(pfx[:adsz] and op.props[:adsz] and op.props[:adsz] == @size)
|
156
|
+
)
|
157
|
+
}
|
158
|
+
|
159
|
+
break if not decode_prefix(di.instruction, edata.get_byte)
|
160
|
+
di.bin_length += 1
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
def decode_instr_op(edata, di)
|
165
|
+
before_ptr = edata.ptr
|
166
|
+
op = di.opcode
|
167
|
+
di.instruction.opname = op.name
|
168
|
+
bseq = edata.read(op.bin.length).unpack('C*') # decode_findopcode ensures that data >= op.length
|
169
|
+
pfx = di.instruction.prefix || {}
|
170
|
+
|
171
|
+
case op.props[:needpfx]
|
172
|
+
when 0x66; pfx.delete :opsz
|
173
|
+
when 0x67; pfx.delete :adsz
|
174
|
+
when 0xF2, 0xF3; pfx.delete :rep
|
175
|
+
end
|
176
|
+
|
177
|
+
field_val = lambda { |f|
|
178
|
+
if fld = op.fields[f]
|
179
|
+
(bseq[fld[0]] >> fld[1]) & @fields_mask[f]
|
180
|
+
end
|
181
|
+
}
|
182
|
+
|
183
|
+
opsz = opsz(di)
|
184
|
+
|
185
|
+
if pfx[:adsz]
|
186
|
+
adsz = 48 - @size
|
187
|
+
else
|
188
|
+
adsz = @size
|
189
|
+
end
|
190
|
+
|
191
|
+
mmxsz = ((op.props[:xmmx] && pfx[:opsz]) ? 128 : 64)
|
192
|
+
op.args.each { |a|
|
193
|
+
di.instruction.args << case a
|
194
|
+
when :reg; Reg.new field_val[a], opsz
|
195
|
+
when :eeec; CtrlReg.new field_val[a]
|
196
|
+
when :eeed; DbgReg.new field_val[a]
|
197
|
+
when :seg2, :seg2A, :seg3, :seg3A; SegReg.new field_val[a]
|
198
|
+
when :regfp; FpReg.new field_val[a]
|
199
|
+
when :regmmx; SimdReg.new field_val[a], mmxsz
|
200
|
+
when :regxmm; SimdReg.new field_val[a], 128
|
201
|
+
|
202
|
+
when :farptr; Farptr.decode edata, @endianness, opsz
|
203
|
+
when :i8, :u8, :u16; Expression[edata.decode_imm(a, @endianness)]
|
204
|
+
when :i; Expression[edata.decode_imm("#{op.props[:unsigned_imm] ? 'a' : 'i'}#{opsz}".to_sym, @endianness)]
|
205
|
+
|
206
|
+
when :mrm_imm; ModRM.decode edata, (adsz == 16 ? 6 : 5), @endianness, adsz, opsz, pfx[:seg]
|
207
|
+
when :modrm, :modrmA; ModRM.decode edata, field_val[a], @endianness, adsz, opsz, pfx[:seg]
|
208
|
+
when :modrmmmx; ModRM.decode edata, field_val[:modrm], @endianness, adsz, mmxsz, pfx[:seg], SimdReg
|
209
|
+
when :modrmxmm; ModRM.decode edata, field_val[:modrm], @endianness, adsz, 128, pfx[:seg], SimdReg
|
210
|
+
|
211
|
+
when :imm_val1; Expression[1]
|
212
|
+
when :imm_val3; Expression[3]
|
213
|
+
when :reg_cl; Reg.new 1, 8
|
214
|
+
when :reg_eax; Reg.new 0, opsz
|
215
|
+
when :reg_dx; Reg.new 2, 16
|
216
|
+
when :regfp0; FpReg.new nil
|
217
|
+
else raise SyntaxError, "Internal error: invalid argument #{a} in #{op.name}"
|
218
|
+
end
|
219
|
+
}
|
220
|
+
|
221
|
+
di.bin_length += edata.ptr - before_ptr
|
222
|
+
|
223
|
+
if op.name == 'movsx' or op.name == 'movzx'
|
224
|
+
if di.opcode.props[:argsz] == 8
|
225
|
+
di.instruction.args[1].sz = 8
|
226
|
+
else
|
227
|
+
di.instruction.args[1].sz = 16
|
228
|
+
end
|
229
|
+
if pfx[:opsz]
|
230
|
+
di.instruction.args[0].sz = 48-@size
|
231
|
+
else
|
232
|
+
di.instruction.args[0].sz = @size
|
233
|
+
end
|
234
|
+
end
|
235
|
+
|
236
|
+
pfx.delete :seg
|
237
|
+
case pfx.delete(:rep)
|
238
|
+
when :nz
|
239
|
+
if di.opcode.props[:strop]
|
240
|
+
pfx[:rep] = 'rep'
|
241
|
+
elsif di.opcode.props[:stropz]
|
242
|
+
pfx[:rep] = 'repnz'
|
243
|
+
end
|
244
|
+
when :z
|
245
|
+
if di.opcode.props[:strop]
|
246
|
+
pfx[:rep] = 'rep'
|
247
|
+
elsif di.opcode.props[:stropz]
|
248
|
+
pfx[:rep] = 'repz'
|
249
|
+
end
|
250
|
+
end
|
251
|
+
|
252
|
+
di
|
253
|
+
end
|
254
|
+
|
255
|
+
# converts relative jump/call offsets to absolute addresses
|
256
|
+
# adds the eip delta to the offset +off+ of the instruction (may be an Expression) + its bin_length
|
257
|
+
# do not call twice on the same di !
|
258
|
+
def decode_instr_interpret(di, addr)
|
259
|
+
if di.opcode.props[:setip] and di.instruction.args.last.kind_of? Expression and di.instruction.opname[0, 3] != 'ret'
|
260
|
+
delta = di.instruction.args.last.reduce
|
261
|
+
arg = Expression[[addr, :+, di.bin_length], :+, delta].reduce
|
262
|
+
di.instruction.args[-1] = Expression[arg]
|
263
|
+
end
|
264
|
+
|
265
|
+
di
|
266
|
+
end
|
267
|
+
|
268
|
+
# return the list of registers as symbols in the order used by pushad
|
269
|
+
# for use in backtrace and stuff, for compatibility with x64
|
270
|
+
# esp is [4]
|
271
|
+
REG_SYMS = [:eax, :ecx, :edx, :ebx, :esp, :ebp, :esi, :edi]
|
272
|
+
def register_symbols
|
273
|
+
REG_SYMS
|
274
|
+
end
|
275
|
+
|
276
|
+
# interprets a condition code (in an opcode name) as an expression involving backtracked eflags
|
277
|
+
# eflag_p is never computed, and this returns Expression::Unknown for this flag
|
278
|
+
# ex: 'z' => Expression[:eflag_z]
|
279
|
+
def decode_cc_to_expr(cc)
|
280
|
+
case cc
|
281
|
+
when 'o'; Expression[:eflag_o]
|
282
|
+
when 'no'; Expression[:'!', :eflag_o]
|
283
|
+
when 'b', 'nae', 'c'; Expression[:eflag_c]
|
284
|
+
when 'nb', 'ae', 'nc'; Expression[:'!', :eflag_c]
|
285
|
+
when 'z', 'e'; Expression[:eflag_z]
|
286
|
+
when 'nz', 'ne'; Expression[:'!', :eflag_z]
|
287
|
+
when 'be', 'na'; Expression[:eflag_c, :|, :eflag_z]
|
288
|
+
when 'nbe', 'a'; Expression[:'!', [:eflag_c, :|, :eflag_z]]
|
289
|
+
when 's'; Expression[:eflag_s]
|
290
|
+
when 'ns'; Expression[:'!', :eflag_s]
|
291
|
+
when 'p', 'pe'; Expression::Unknown
|
292
|
+
when 'np', 'po'; Expression::Unknown
|
293
|
+
when 'l', 'nge'; Expression[:eflag_s, :'!=', :eflag_o]
|
294
|
+
when 'nl', 'ge'; Expression[:eflag_s, :==, :eflag_o]
|
295
|
+
when 'le', 'ng'; Expression[[:eflag_s, :'!=', :eflag_o], :|, :eflag_z]
|
296
|
+
when 'nle', 'g'; Expression[[:eflag_s, :==, :eflag_o], :&, :eflag_z]
|
297
|
+
when 'ecxz'; Expression[:'!', register_symbols[1]]
|
298
|
+
when 'cxz'; Expression[:'!', [register_symbols[1], :&, 0xffff]]
|
299
|
+
end
|
300
|
+
end
|
301
|
+
|
302
|
+
# hash opcode_name => lambda { |dasm, di, *symbolic_args| instr_binding }
|
303
|
+
def backtrace_binding
|
304
|
+
@backtrace_binding ||= init_backtrace_binding
|
305
|
+
end
|
306
|
+
def backtrace_binding=(b) @backtrace_binding = b end
|
307
|
+
|
308
|
+
def opsz(di)
|
309
|
+
ret = @size
|
310
|
+
ret = di.opcode.props[:argsz] if di and di.opcode.props[:argsz]
|
311
|
+
ret = 48 - ret if di and not di.opcode.props[:argsz] and di.instruction.prefix and di.instruction.prefix[:opsz]
|
312
|
+
ret
|
313
|
+
end
|
314
|
+
|
315
|
+
# populate the @backtrace_binding hash with default values
|
316
|
+
def init_backtrace_binding
|
317
|
+
@backtrace_binding ||= {}
|
318
|
+
|
319
|
+
eax, ecx, edx, ebx, esp, ebp, esi, edi = register_symbols
|
320
|
+
|
321
|
+
mask = lambda { |di| (1 << opsz(di))-1 } # 32bits => 0xffff_ffff
|
322
|
+
sign = lambda { |v, di| Expression[[[v, :&, mask[di]], :>>, opsz(di)-1], :'!=', 0] }
|
323
|
+
|
324
|
+
opcode_list.map { |ol| ol.basename }.uniq.sort.each { |op|
|
325
|
+
binding = case op
|
326
|
+
when 'mov', 'movsx', 'movzx', 'movsxd', 'movd', 'movq'; lambda { |di, a0, a1| { a0 => Expression[a1] } }
|
327
|
+
when 'lea'; lambda { |di, a0, a1| { a0 => a1.target } }
|
328
|
+
when 'xchg'; lambda { |di, a0, a1| { a0 => Expression[a1], a1 => Expression[a0] } }
|
329
|
+
when 'add', 'sub', 'or', 'xor', 'and', 'pxor', 'adc', 'sbb'
|
330
|
+
lambda { |di, a0, a1|
|
331
|
+
e_op = { 'add' => :+, 'sub' => :-, 'or' => :|, 'and' => :&, 'xor' => :^, 'pxor' => :^, 'adc' => :+, 'sbb' => :- }[op]
|
332
|
+
ret = Expression[a0, e_op, a1]
|
333
|
+
ret = Expression[ret, e_op, :eflag_c] if op == 'adc' or op == 'sbb'
|
334
|
+
# optimises eax ^ eax => 0
|
335
|
+
# avoid hiding memory accesses (to not hide possible fault)
|
336
|
+
ret = Expression[ret.reduce] if not a0.kind_of? Indirection
|
337
|
+
{ a0 => ret }
|
338
|
+
}
|
339
|
+
when 'xadd'; lambda { |di, a0, a1| { a0 => Expression[a0, :+, a1], a1 => Expression[a0] } }
|
340
|
+
when 'inc'; lambda { |di, a0| { a0 => Expression[a0, :+, 1] } }
|
341
|
+
when 'dec'; lambda { |di, a0| { a0 => Expression[a0, :-, 1] } }
|
342
|
+
when 'not'; lambda { |di, a0| { a0 => Expression[a0, :^, mask[di]] } }
|
343
|
+
when 'neg'; lambda { |di, a0| { a0 => Expression[:-, a0] } }
|
344
|
+
when 'rol', 'ror'
|
345
|
+
lambda { |di, a0, a1|
|
346
|
+
e_op = (op[2] == ?r ? :>> : :<<)
|
347
|
+
inv_op = {:<< => :>>, :>> => :<< }[e_op]
|
348
|
+
sz = [a1, :%, opsz(di)]
|
349
|
+
isz = [[opsz(di), :-, a1], :%, opsz(di)]
|
350
|
+
# ror a, b => (a >> b) | (a << (32-b))
|
351
|
+
{ a0 => Expression[[[a0, e_op, sz], :|, [a0, inv_op, isz]], :&, mask[di]] }
|
352
|
+
}
|
353
|
+
when 'sar', 'shl', 'sal'; lambda { |di, a0, a1| { a0 => Expression[a0, (op[-1] == ?r ? :>> : :<<), [a1, :%, [opsz(di), 32].max]] } }
|
354
|
+
when 'shr'; lambda { |di, a0, a1| { a0 => Expression[[a0, :&, mask[di]], :>>, [a1, :%, opsz(di)]] } }
|
355
|
+
when 'cwd', 'cdq', 'cqo'; lambda { |di| { Expression[edx, :&, mask[di]] => Expression[mask[di], :*, sign[eax, di]] } }
|
356
|
+
when 'cbw', 'cwde', 'cdqe'; lambda { |di|
|
357
|
+
o2 = opsz(di)/2 ; m2 = (1 << o2) - 1
|
358
|
+
{ Expression[eax, :&, mask[di]] => Expression[[eax, :&, m2], :|, [m2 << o2, :*, [[eax, :>>, o2-1], :&, 1]]] } }
|
359
|
+
when 'push'
|
360
|
+
lambda { |di, a0| { esp => Expression[esp, :-, opsz(di)/8],
|
361
|
+
Indirection[esp, opsz(di)/8, di.address] => Expression[a0] } }
|
362
|
+
when 'pop'
|
363
|
+
lambda { |di, a0| { esp => Expression[esp, :+, opsz(di)/8],
|
364
|
+
a0 => Indirection[esp, opsz(di)/8, di.address] } }
|
365
|
+
when 'pushfd'
|
366
|
+
# TODO Unknown per bit
|
367
|
+
lambda { |di|
|
368
|
+
efl = Expression[0x202]
|
369
|
+
bts = lambda { |pos, v| efl = Expression[efl, :|, [[v, :&, 1], :<<, pos]] }
|
370
|
+
bts[0, :eflag_c]
|
371
|
+
bts[6, :eflag_z]
|
372
|
+
bts[7, :eflag_s]
|
373
|
+
bts[11, :eflag_o]
|
374
|
+
{ esp => Expression[esp, :-, opsz(di)/8], Indirection[esp, opsz(di)/8, di.address] => efl }
|
375
|
+
}
|
376
|
+
when 'popfd'
|
377
|
+
lambda { |di| bt = lambda { |pos| Expression[[Indirection[esp, opsz(di)/8, di.address], :>>, pos], :&, 1] }
|
378
|
+
{ esp => Expression[esp, :+, opsz(di)/8], :eflag_c => bt[0], :eflag_z => bt[6], :eflag_s => bt[7], :eflag_o => bt[11] } }
|
379
|
+
when 'sahf'
|
380
|
+
lambda { |di| bt = lambda { |pos| Expression[[eax, :>>, pos], :&, 1] }
|
381
|
+
{ :eflag_c => bt[0], :eflag_z => bt[6], :eflag_s => bt[7] } }
|
382
|
+
when 'lahf'
|
383
|
+
lambda { |di|
|
384
|
+
efl = Expression[2]
|
385
|
+
bts = lambda { |pos, v| efl = Expression[efl, :|, [[v, :&, 1], :<<, pos]] }
|
386
|
+
bts[0, :eflag_c] #bts[2, :eflag_p] #bts[4, :eflag_a]
|
387
|
+
bts[6, :eflag_z]
|
388
|
+
bts[7, :eflag_s]
|
389
|
+
{ eax => efl }
|
390
|
+
}
|
391
|
+
when 'pushad'
|
392
|
+
lambda { |di|
|
393
|
+
ret = {}
|
394
|
+
st_off = 0
|
395
|
+
register_symbols.reverse_each { |r|
|
396
|
+
ret[Indirection[Expression[esp, :+, st_off].reduce, opsz(di)/8, di.address]] = Expression[r]
|
397
|
+
st_off += opsz(di)/8
|
398
|
+
}
|
399
|
+
ret[esp] = Expression[esp, :-, st_off]
|
400
|
+
ret
|
401
|
+
}
|
402
|
+
when 'popad'
|
403
|
+
lambda { |di|
|
404
|
+
ret = {}
|
405
|
+
st_off = 0
|
406
|
+
register_symbols.reverse_each { |r|
|
407
|
+
ret[r] = Indirection[Expression[esp, :+, st_off].reduce, opsz(di)/8, di.address]
|
408
|
+
st_off += opsz(di)/8
|
409
|
+
}
|
410
|
+
ret[esp] = Expression[esp, :+, st_off] # esp is not popped
|
411
|
+
ret
|
412
|
+
}
|
413
|
+
when 'call'
|
414
|
+
lambda { |di, a0| { esp => Expression[esp, :-, opsz(di)/8],
|
415
|
+
Indirection[esp, opsz(di)/8, di.address] => Expression[di.next_addr] } }
|
416
|
+
when 'ret'; lambda { |di, *a| { esp => Expression[esp, :+, [opsz(di)/8, :+, a[0] || 0]] } }
|
417
|
+
when 'loop', 'loopz', 'loopnz'; lambda { |di, a0| { ecx => Expression[ecx, :-, 1] } }
|
418
|
+
when 'enter'
|
419
|
+
lambda { |di, a0, a1|
|
420
|
+
sz = opsz(di)/8
|
421
|
+
depth = a1.reduce % 32
|
422
|
+
b = { Indirection[ebp, sz, di.address] => Expression[ebp],
|
423
|
+
Indirection[[esp, :+, a0.reduce+sz*depth], sz, di.address] => Expression[ebp],
|
424
|
+
ebp => Expression[esp, :-, sz],
|
425
|
+
esp => Expression[esp, :-, a0.reduce+sz*depth+sz] }
|
426
|
+
(1..depth).each { |i|
|
427
|
+
b[Indirection[[esp, :+, a0.reduce+i*sz], sz, di.address]] =
|
428
|
+
b[Indirection[[ebp, :-, i*sz], sz, di.address]] =
|
429
|
+
Expression::Unknown # TODO Indirection[[ebp, :-, i*sz], sz, di.address]
|
430
|
+
}
|
431
|
+
b
|
432
|
+
}
|
433
|
+
when 'leave'; lambda { |di| { ebp => Indirection[[ebp], opsz(di)/8, di.address], esp => Expression[ebp, :+, opsz(di)/8] } }
|
434
|
+
when 'aaa'; lambda { |di| { eax => Expression::Unknown, :incomplete_binding => Expression[1] } }
|
435
|
+
when 'imul'
|
436
|
+
lambda { |di, *a|
|
437
|
+
# 1 operand form == same as 'mul' (ax:dx stuff)
|
438
|
+
next { eax => Expression::Unknown, edx => Expression::Unknown, :incomplete_binding => Expression[1] } if not a[1]
|
439
|
+
|
440
|
+
if a[2]; e = Expression[a[1], :*, a[2]]
|
441
|
+
else e = Expression[[a[0], :*, a[1]], :&, (1 << (di.instruction.args.first.sz || opsz(di))) - 1]
|
442
|
+
end
|
443
|
+
{ a[0] => e }
|
444
|
+
}
|
445
|
+
when 'mul', 'div', 'idiv'; lambda { |di, *a| { eax => Expression::Unknown, edx => Expression::Unknown, :incomplete_binding => Expression[1] } }
|
446
|
+
when 'rdtsc'; lambda { |di| { eax => Expression::Unknown, edx => Expression::Unknown, :incomplete_binding => Expression[1] } }
|
447
|
+
when /^(stos|movs|lods|scas|cmps)[bwd]$/
|
448
|
+
lambda { |di|
|
449
|
+
op =~ /^(stos|movs|lods|scas|cmps)([bwd])$/
|
450
|
+
e_op = $1
|
451
|
+
sz = { 'b' => 1, 'w' => 2, 'd' => 4 }[$2]
|
452
|
+
eax_ = Reg.new(0, 8*sz).symbolic
|
453
|
+
dir = :+
|
454
|
+
if di.block and (di.block.list.find { |ddi| ddi.opcode.name == 'std' } rescue nil)
|
455
|
+
dir = :-
|
456
|
+
end
|
457
|
+
pesi = Indirection[esi, sz, di.address]
|
458
|
+
pedi = Indirection[edi, sz, di.address]
|
459
|
+
pfx = di.instruction.prefix || {}
|
460
|
+
bd =
|
461
|
+
case e_op
|
462
|
+
when 'movs'
|
463
|
+
case pfx[:rep]
|
464
|
+
when nil; { pedi => pesi, esi => Expression[esi, dir, sz], edi => Expression[edi, dir, sz] }
|
465
|
+
else { pedi => pesi, esi => Expression[esi, dir, [sz ,:*, ecx]], edi => Expression[edi, dir, [sz, :*, ecx]], ecx => 0 }
|
466
|
+
end
|
467
|
+
when 'stos'
|
468
|
+
case pfx[:rep]
|
469
|
+
when nil; { pedi => Expression[eax_], edi => Expression[edi, dir, sz] }
|
470
|
+
else { pedi => Expression[eax_], edi => Expression[edi, dir, [sz, :*, ecx]], ecx => 0 }
|
471
|
+
end
|
472
|
+
when 'lods'
|
473
|
+
case pfx[:rep]
|
474
|
+
when nil; { eax_ => pesi, esi => Expression[esi, dir, sz] }
|
475
|
+
else { eax_ => Indirection[[esi, dir, [sz, :*, [ecx, :-, 1]]], sz, di.address], esi => Expression[esi, dir, [sz, :*, ecx]], ecx => 0 }
|
476
|
+
end
|
477
|
+
when 'scas'
|
478
|
+
case pfx[:rep]
|
479
|
+
when nil; { edi => Expression[edi, dir, sz] }
|
480
|
+
else { edi => Expression::Unknown, ecx => Expression::Unknown }
|
481
|
+
end
|
482
|
+
when 'cmps'
|
483
|
+
case pfx[:rep]
|
484
|
+
when nil; { edi => Expression[edi, dir, sz], esi => Expression[esi, dir, sz] }
|
485
|
+
else { edi => Expression::Unknown, esi => Expression::Unknown, ecx => Expression::Unknown }
|
486
|
+
end
|
487
|
+
end
|
488
|
+
bd[:incomplete_binding] = Expression[1] if pfx[:rep]
|
489
|
+
bd
|
490
|
+
}
|
491
|
+
when 'clc'; lambda { |di| { :eflag_c => Expression[0] } }
|
492
|
+
when 'stc'; lambda { |di| { :eflag_c => Expression[1] } }
|
493
|
+
when 'cmc'; lambda { |di| { :eflag_c => Expression[:'!', :eflag_c] } }
|
494
|
+
when 'cld'; lambda { |di| { :eflag_d => Expression[0] } }
|
495
|
+
when 'std'; lambda { |di| { :eflag_d => Expression[1] } }
|
496
|
+
when 'setalc'; lambda { |di| { Reg.new(0, 8).symbolic => Expression[:eflag_c, :*, 0xff] } }
|
497
|
+
when /^set/; lambda { |di, *a| { a[0] => Expression[decode_cc_to_expr(op[/^set(.*)/, 1])] } }
|
498
|
+
when /^cmov/; lambda { |di, *a| fl = decode_cc_to_expr(op[/^cmov(.*)/, 1]) ; { a[0] => Expression[[fl, :*, a[1]], :|, [[1, :-, fl], :*, a[0]]] } }
|
499
|
+
when /^j/
|
500
|
+
lambda { |di, a0|
|
501
|
+
ret = { 'dummy_metasm_0' => Expression[a0] } # mark modr/m as read
|
502
|
+
if fl = decode_cc_to_expr(op[/^j(.*)/, 1]) and fl != Expression::Unknown
|
503
|
+
ret['dummy_metasm_1'] = fl # mark eflags as read
|
504
|
+
end
|
505
|
+
ret
|
506
|
+
}
|
507
|
+
when 'fstenv', 'fnstenv'
|
508
|
+
lambda { |di, a0|
|
509
|
+
# stores the address of the last non-control fpu instr run
|
510
|
+
lastfpuinstr = di.block.list[0...di.block.list.index(di)].reverse.find { |pdi|
|
511
|
+
case pdi.opcode.name
|
512
|
+
when /fn?init|fn?clex|fldcw|fn?st[cs]w|fn?stenv|fldenv|fn?save|frstor|f?wait/
|
513
|
+
when /^f/; true
|
514
|
+
end
|
515
|
+
} if di.block
|
516
|
+
lastfpuinstr = lastfpuinstr.address if lastfpuinstr
|
517
|
+
ret = {}
|
518
|
+
save_at = lambda { |off, val| ret[Indirection[a0.target + off, 4, di.address]] = val }
|
519
|
+
save_at[0, Expression::Unknown]
|
520
|
+
save_at[4, Expression::Unknown]
|
521
|
+
save_at[8, Expression::Unknown]
|
522
|
+
save_at[12, lastfpuinstr || Expression::Unknown]
|
523
|
+
save_at[16, Expression::Unknown]
|
524
|
+
save_at[20, Expression::Unknown]
|
525
|
+
save_at[24, Expression::Unknown]
|
526
|
+
ret
|
527
|
+
}
|
528
|
+
when 'bt'; lambda { |di, a0, a1| { :eflag_c => Expression[[a0, :>>, [a1, :%, opsz(di)]], :&, 1] } }
|
529
|
+
when 'bts'; lambda { |di, a0, a1| { :eflag_c => Expression[[a0, :>>, [a1, :%, opsz(di)]], :&, 1],
|
530
|
+
a0 => Expression[a0, :|, [1, :<<, [a1, :%, opsz(di)]]] } }
|
531
|
+
when 'btr'; lambda { |di, a0, a1| { :eflag_c => Expression[[a0, :>>, [a1, :%, opsz(di)]], :&, 1],
|
532
|
+
a0 => Expression[a0, :&, [[1, :<<, [a1, :%, opsz(di)]], :^, mask[di]]] } }
|
533
|
+
when 'btc'; lambda { |di, a0, a1| { :eflag_c => Expression[[a0, :>>, [a1, :%, opsz(di)]], :&, 1],
|
534
|
+
a0 => Expression[a0, :^, [1, :<<, [a1, :%, opsz(di)]]] } }
|
535
|
+
when 'bswap'
|
536
|
+
lambda { |di, a0|
|
537
|
+
if opsz(di) == 64
|
538
|
+
{ a0 => Expression[
|
539
|
+
[[[[a0, :&, 0xff000000_00000000], :>>, 56], :|,
|
540
|
+
[[a0, :&, 0x00ff0000_00000000], :>>, 40]], :|,
|
541
|
+
[[[a0, :&, 0x0000ff00_00000000], :>>, 24], :|,
|
542
|
+
[[a0, :&, 0x000000ff_00000000], :>>, 8]]], :|,
|
543
|
+
[[[[a0, :&, 0x00000000_ff000000], :<<, 8], :|,
|
544
|
+
[[a0, :&, 0x00000000_00ff0000], :<<, 24]], :|,
|
545
|
+
[[[a0, :&, 0x00000000_0000ff00], :<<, 40], :|,
|
546
|
+
[[a0, :&, 0x00000000_000000ff], :<<, 56]]]] }
|
547
|
+
else # XXX opsz != 32 => undef
|
548
|
+
{ a0 => Expression[
|
549
|
+
[[[a0, :&, 0xff000000], :>>, 24], :|,
|
550
|
+
[[a0, :&, 0x00ff0000], :>>, 8]], :|,
|
551
|
+
[[[a0, :&, 0x0000ff00], :<<, 8], :|,
|
552
|
+
[[a0, :&, 0x000000ff], :<<, 24]]] }
|
553
|
+
end
|
554
|
+
}
|
555
|
+
when 'nop', 'pause', 'wait', 'cmp', 'test'; lambda { |di, *a| {} }
|
556
|
+
end
|
557
|
+
|
558
|
+
# add eflags side-effects
|
559
|
+
|
560
|
+
full_binding = case op
|
561
|
+
when 'adc', 'add', 'and', 'cmp', 'or', 'sbb', 'sub', 'xor', 'test', 'xadd'
|
562
|
+
lambda { |di, a0, a1|
|
563
|
+
e_op = { 'adc' => :+, 'add' => :+, 'xadd' => :+, 'and' => :&, 'cmp' => :-, 'or' => :|, 'sbb' => :-, 'sub' => :-, 'xor' => :^, 'test' => :& }[op]
|
564
|
+
res = Expression[[a0, :&, mask[di]], e_op, [a1, :&, mask[di]]]
|
565
|
+
res = Expression[res, e_op, :eflag_c] if op == 'adc' or op == 'sbb'
|
566
|
+
|
567
|
+
ret = (binding ? binding[di, a0, a1] : {})
|
568
|
+
ret[:eflag_z] = Expression[[res, :&, mask[di]], :==, 0]
|
569
|
+
ret[:eflag_s] = sign[res, di]
|
570
|
+
ret[:eflag_c] = case e_op
|
571
|
+
when :+; Expression[res, :>, mask[di]]
|
572
|
+
when :-; Expression[[a0, :&, mask[di]], :<, [a1, :&, mask[di]]]
|
573
|
+
else Expression[0]
|
574
|
+
end
|
575
|
+
ret[:eflag_o] = case e_op
|
576
|
+
when :+; Expression[[sign[a0, di], :==, sign[a1, di]], :'&&', [sign[a0, di], :'!=', sign[res, di]]]
|
577
|
+
when :-; Expression[[sign[a0, di], :==, [:'!', sign[a1, di]]], :'&&', [sign[a0, di], :'!=', sign[res, di]]]
|
578
|
+
else Expression[0]
|
579
|
+
end
|
580
|
+
ret
|
581
|
+
}
|
582
|
+
when 'inc', 'dec', 'neg', 'shl', 'shr', 'sar', 'ror', 'rol', 'rcr', 'rcl', 'shld', 'shrd'
|
583
|
+
lambda { |di, a0, *a|
|
584
|
+
ret = (binding ? binding[di, a0, *a] : {})
|
585
|
+
res = ret[a0] || Expression::Unknown
|
586
|
+
ret[:eflag_z] = Expression[[res, :&, mask[di]], :==, 0]
|
587
|
+
ret[:eflag_s] = sign[res, di]
|
588
|
+
case op
|
589
|
+
when 'neg'; ret[:eflag_c] = Expression[[res, :&, mask[di]], :'!=', 0]
|
590
|
+
when 'inc', 'dec' # don't touch carry flag
|
591
|
+
else ret[:eflag_c] = Expression::Unknown # :incomplete_binding ?
|
592
|
+
end
|
593
|
+
ret[:eflag_o] = case op
|
594
|
+
when 'inc'; Expression[[a0, :&, mask[di]], :==, mask[di] >> 1]
|
595
|
+
when 'dec'; Expression[[res , :&, mask[di]], :==, mask[di] >> 1]
|
596
|
+
when 'neg'; Expression[[a0, :&, mask[di]], :==, (mask[di]+1) >> 1]
|
597
|
+
else Expression::Unknown
|
598
|
+
end
|
599
|
+
ret
|
600
|
+
}
|
601
|
+
when 'imul', 'mul', 'idiv', 'div', /^(scas|cmps)[bwdq]$/
|
602
|
+
lambda { |di, *a|
|
603
|
+
ret = (binding ? binding[di, *a] : {})
|
604
|
+
ret[:eflag_z] = ret[:eflag_s] = ret[:eflag_c] = ret[:eflag_o] = Expression::Unknown # :incomplete_binding ?
|
605
|
+
ret
|
606
|
+
}
|
607
|
+
end
|
608
|
+
|
609
|
+
@backtrace_binding[op] ||= full_binding || binding if full_binding || binding
|
610
|
+
}
|
611
|
+
@backtrace_binding
|
612
|
+
end
|
613
|
+
|
614
|
+
# returns the condition (bool Expression) under which a conditionnal jump is taken
|
615
|
+
# returns nil if not a conditionnal jump
|
616
|
+
# backtrace for the condition must include the jump itself (eg loop -> ecx--)
|
617
|
+
def get_jump_condition(di)
|
618
|
+
ecx = register_symbols[1]
|
619
|
+
case di.opcode.name
|
620
|
+
when /^j(.*)/
|
621
|
+
decode_cc_to_expr($1)
|
622
|
+
when /^loop(.+)?/
|
623
|
+
e = Expression[ecx, :'!=', 0]
|
624
|
+
e = Expression[e, :'||', decode_cc_to_expr($1)] if $1
|
625
|
+
e
|
626
|
+
end
|
627
|
+
end
|
628
|
+
|
629
|
+
def get_backtrace_binding(di)
|
630
|
+
a = di.instruction.args.map { |arg|
|
631
|
+
case arg
|
632
|
+
when ModRM, Reg, SimdReg; arg.symbolic(di)
|
633
|
+
else arg
|
634
|
+
end
|
635
|
+
}
|
636
|
+
|
637
|
+
if binding = backtrace_binding[di.opcode.basename]
|
638
|
+
bd = binding[di, *a]
|
639
|
+
# handle modifications to al/ah etc
|
640
|
+
bd.keys.grep(Expression).each { |e|
|
641
|
+
# must be in the form (x & mask), with x either :reg or (:reg >> shift) eg ah == ((eax >> 8) & 0xff)
|
642
|
+
if e.op == :& and mask = e.rexpr and mask.kind_of? Integer
|
643
|
+
reg = e.lexpr
|
644
|
+
reg = reg.lexpr if reg.kind_of? Expression and reg.op == :>> and shift = reg.rexpr and shift.kind_of? Integer
|
645
|
+
next if not reg.kind_of? Symbol
|
646
|
+
if bd.has_key? reg
|
647
|
+
# xchg ah, al ; pop sp..
|
648
|
+
puts "backtrace: conflict for #{di}: #{e} vs #{reg}" if $VERBOSE
|
649
|
+
bd[reg] = Expression::Unknown
|
650
|
+
next
|
651
|
+
end
|
652
|
+
val = bd.delete e
|
653
|
+
mask <<= shift if shift
|
654
|
+
invmask = mask ^ (@size == 64 ? 0xffff_ffff_ffff_ffff : 0xffff_ffff)
|
655
|
+
if invmask == 0xffff_ffff_0000_0000 and not di.opcode.props[:op32no64]
|
656
|
+
bd[reg] = Expression[val, :&, 0xffff_ffff]
|
657
|
+
elsif invmask == 0
|
658
|
+
bd[reg] = val
|
659
|
+
else
|
660
|
+
val = Expression[val, :<<, shift] if shift
|
661
|
+
bd[reg] = Expression[[reg, :&, invmask], :|, [val, :&, mask]]
|
662
|
+
end
|
663
|
+
end
|
664
|
+
}
|
665
|
+
bd
|
666
|
+
else
|
667
|
+
puts "unhandled instruction to backtrace: #{di}" if $VERBOSE
|
668
|
+
# assume nothing except the 1st arg is modified
|
669
|
+
case a[0]
|
670
|
+
when Indirection, Symbol; { a[0] => Expression::Unknown }
|
671
|
+
when Expression; (x = a[0].externals.first) ? { x => Expression::Unknown } : {}
|
672
|
+
else {}
|
673
|
+
end.update(:incomplete_binding => Expression[1])
|
674
|
+
end
|
675
|
+
end
|
676
|
+
|
677
|
+
def get_xrefs_x(dasm, di)
|
678
|
+
return [] if not di.opcode.props[:setip]
|
679
|
+
|
680
|
+
sz = opsz(di)
|
681
|
+
case di.opcode.basename
|
682
|
+
when 'ret'; return [Indirection[register_symbols[4], sz/8, di.address]]
|
683
|
+
when 'jmp', 'call'
|
684
|
+
a = di.instruction.args.first
|
685
|
+
if dasm and a.kind_of?(ModRM) and a.imm and a.s == sz/8 and not a.b and dasm.get_section_at(a.imm)
|
686
|
+
return get_xrefs_x_jmptable(dasm, di, a, sz)
|
687
|
+
end
|
688
|
+
end
|
689
|
+
|
690
|
+
case tg = di.instruction.args.first
|
691
|
+
when ModRM
|
692
|
+
tg.sz ||= sz if tg.kind_of? ModRM
|
693
|
+
[Expression[tg.symbolic(di)]]
|
694
|
+
when Reg; [Expression[tg.symbolic(di)]]
|
695
|
+
when Expression, ::Integer; [Expression[tg]]
|
696
|
+
when Farptr; tg.seg.reduce < 0x30 ? [tg.addr] : [Expression[[tg.seg, :*, 0x10], :+, tg.addr]]
|
697
|
+
else
|
698
|
+
puts "unhandled setip at #{di.address} #{di.instruction}" if $DEBUG
|
699
|
+
[]
|
700
|
+
end
|
701
|
+
end
|
702
|
+
|
703
|
+
# we detected a jmp table (jmp [base+4*idx])
|
704
|
+
# try to return an accurate dest list
|
705
|
+
def get_xrefs_x_jmptable(dasm, di, mrm, sz)
|
706
|
+
# include the symbolic dest for backtrack stuff
|
707
|
+
ret = [Expression[mrm.symbolic(di)]]
|
708
|
+
i = mrm.i
|
709
|
+
if di.block.list.length == 2 and di.block.list[0].opcode.name =~ /^mov/ and a0 = di.block.list[0].instruction.args[0] and
|
710
|
+
a0.respond_to? :symbolic and a0.symbolic == i.symbolic
|
711
|
+
i = di.block.list[0].instruction.args[1]
|
712
|
+
end
|
713
|
+
pb = di.block.from_normal.to_a
|
714
|
+
if pb.length == 1 and pdi = dasm.decoded[pb[0]] and pdi.opcode.name =~ /^jn?be?/ and ppdi = pdi.block.list[-2] and ppdi.opcode.name == 'cmp' and
|
715
|
+
ppdi.instruction.args[0].symbolic == i.symbolic and lim = Expression[ppdi.instruction.args[1]].reduce and lim.kind_of? Integer
|
716
|
+
# cmp eax, 42 ; jbe switch ; switch: jmp [base+4*eax]
|
717
|
+
s = dasm.get_section_at(mrm.imm)
|
718
|
+
lim += 1 if pdi.opcode.name[-1] == ?e
|
719
|
+
lim.times { |v|
|
720
|
+
dasm.add_xref(s[1]+s[0].ptr, Xref.new(:r, di.address, sz/8))
|
721
|
+
ret << Indirection[[mrm.imm, :+, v*sz/8], sz/8, di.address]
|
722
|
+
s[0].read(sz/8)
|
723
|
+
}
|
724
|
+
l = dasm.auto_label_at(mrm.imm, 'jmp_table', 'xref')
|
725
|
+
replace_instr_arg_immediate(di.instruction, mrm.imm, Expression[l])
|
726
|
+
return ret
|
727
|
+
end
|
728
|
+
|
729
|
+
puts "unrecognized jmp table pattern, using wild guess for #{di}" if $VERBOSE
|
730
|
+
di.add_comment 'wildguess'
|
731
|
+
if s = dasm.get_section_at(mrm.imm - 3*sz/8)
|
732
|
+
v = -3
|
733
|
+
else
|
734
|
+
s = dasm.get_section_at(mrm.imm)
|
735
|
+
v = 0
|
736
|
+
end
|
737
|
+
loop do
|
738
|
+
ptr = dasm.normalize s[0].decode_imm("u#{sz}".to_sym, @endianness)
|
739
|
+
diff = Expression[ptr, :-, di.address].reduce
|
740
|
+
if (diff.kind_of? ::Integer and diff.abs < 4096) or (di.opcode.basename == 'call' and ptr != 0 and dasm.get_section_at(ptr))
|
741
|
+
dasm.add_xref(s[1]+s[0].ptr-sz/8, Xref.new(:r, di.address, sz/8))
|
742
|
+
ret << Indirection[[mrm.imm, :+, v*sz/8], sz/8, di.address]
|
743
|
+
elsif v > 0
|
744
|
+
break
|
745
|
+
end
|
746
|
+
v += 1
|
747
|
+
end
|
748
|
+
ret
|
749
|
+
end
|
750
|
+
|
751
|
+
# checks if expr is a valid return expression matching the :saveip instruction
|
752
|
+
def backtrace_is_function_return(expr, di=nil)
|
753
|
+
expr = Expression[expr].reduce_rec
|
754
|
+
expr.kind_of? Indirection and expr.len == @size/8 and expr.target == Expression[register_symbols[4]]
|
755
|
+
end
|
756
|
+
|
757
|
+
# updates the function backtrace_binding
|
758
|
+
# if the function is big and no specific register is given, do nothing (the binding will be lazily updated later, on demand)
|
759
|
+
# XXX assume retaddrlist is either a list of addr of ret or a list with a single entry which is an external function name (thunk)
|
760
|
+
def backtrace_update_function_binding(dasm, faddr, f, retaddrlist, *wantregs)
|
761
|
+
b = f.backtrace_binding
|
762
|
+
|
763
|
+
esp, ebp = register_symbols[4, 2]
|
764
|
+
|
765
|
+
# XXX handle retaddrlist for multiple/mixed thunks
|
766
|
+
if retaddrlist and not dasm.decoded[retaddrlist.first] and di = dasm.decoded[faddr]
|
767
|
+
# no return instruction, must be a thunk : find the last instruction (to backtrace from it)
|
768
|
+
done = []
|
769
|
+
while ndi = dasm.decoded[di.block.to_subfuncret.to_a.first] || dasm.decoded[di.block.to_normal.to_a.first] and ndi.kind_of? DecodedInstruction and not done.include? ndi.address
|
770
|
+
done << ndi.address
|
771
|
+
di = ndi
|
772
|
+
end
|
773
|
+
if not di.block.to_subfuncret.to_a.first and di.block.to_normal and di.block.to_normal.length > 1
|
774
|
+
thunklast = di.block.list.last.address
|
775
|
+
end
|
776
|
+
end
|
777
|
+
|
778
|
+
bt_val = lambda { |r|
|
779
|
+
next if not retaddrlist
|
780
|
+
b[r] = Expression::Unknown # TODO :pending or something ? (for recursive lazy functions)
|
781
|
+
bt = []
|
782
|
+
retaddrlist.each { |retaddr|
|
783
|
+
bt |= dasm.backtrace(Expression[r], (thunklast ? thunklast : retaddr),
|
784
|
+
:include_start => true, :snapshot_addr => faddr, :origin => retaddr, :from_subfuncret => thunklast)
|
785
|
+
}
|
786
|
+
if bt.length != 1
|
787
|
+
b[r] = Expression::Unknown
|
788
|
+
else
|
789
|
+
b[r] = bt.first
|
790
|
+
end
|
791
|
+
}
|
792
|
+
|
793
|
+
if not wantregs.empty?
|
794
|
+
wantregs.each(&bt_val)
|
795
|
+
else
|
796
|
+
if dasm.function_blocks(faddr, true).length < 20
|
797
|
+
register_symbols.each(&bt_val)
|
798
|
+
else
|
799
|
+
[ebp, esp].each(&bt_val)
|
800
|
+
end
|
801
|
+
end
|
802
|
+
|
803
|
+
backtrace_update_function_binding_check(dasm, faddr, f, b, &bt_val)
|
804
|
+
|
805
|
+
b
|
806
|
+
end
|
807
|
+
|
808
|
+
def backtrace_update_function_binding_check(dasm, faddr, f, b)
|
809
|
+
sz = @size/8
|
810
|
+
if b[:ebp] and b[:ebp] != Expression[:ebp]
|
811
|
+
# may be a custom 'enter' function (eg recent Visual Studio)
|
812
|
+
# TODO put all memory writes in the binding ?
|
813
|
+
[[:ebp], [:esp, :+, 1*sz], [:esp, :+, 2*sz], [:esp, :+, 3*sz]].each { |ptr|
|
814
|
+
ind = Indirection[ptr, sz, faddr]
|
815
|
+
yield(ind)
|
816
|
+
b.delete(ind) if b[ind] and not [:ebx, :edx, :esi, :edi, :ebp].include? b[ind].reduce_rec
|
817
|
+
}
|
818
|
+
end
|
819
|
+
if dasm.funcs_stdabi
|
820
|
+
if b[:esp] and b[:esp] == Expression::Unknown and not f.btbind_callback
|
821
|
+
puts "update_func_bind: #{Expression[faddr]} has esp -> unknown, use dynamic callback" if $DEBUG
|
822
|
+
f.btbind_callback = disassembler_default_btbind_callback
|
823
|
+
end
|
824
|
+
[:ebp, :ebx, :esi, :edi].each { |reg|
|
825
|
+
if b[reg] and b[reg] == Expression::Unknown
|
826
|
+
puts "update_func_bind: #{Expression[faddr]} has #{reg} -> unknown, presume it is preserved" if $DEBUG
|
827
|
+
b[reg] = Expression[reg]
|
828
|
+
end
|
829
|
+
}
|
830
|
+
else
|
831
|
+
if b[:esp] and not Expression[b[:esp], :-, :esp].reduce.kind_of?(::Integer)
|
832
|
+
puts "update_func_bind: #{Expression[faddr]} has esp -> #{b[:esp]}" if $DEBUG
|
833
|
+
end
|
834
|
+
end
|
835
|
+
|
836
|
+
# rename some functions
|
837
|
+
# TODO database and real signatures
|
838
|
+
rename =
|
839
|
+
if b[:eax] and Expression[b[:eax], :-, faddr].reduce == 0
|
840
|
+
'geteip' # metasm pic linker
|
841
|
+
elsif b[:eax] and b[:ebx] and Expression[b[:eax], :-, :eax].reduce == 0 and Expression[b[:ebx], :-, Indirection[:esp, sz, nil]].reduce == 0
|
842
|
+
'get_pc_thunk_ebx' # elf pic convention
|
843
|
+
elsif b[:esp] and Expression[b[:esp], :-, [:esp, :-, Indirection[[:esp, :+, 2*sz], sz]]].reduce.kind_of? ::Integer and
|
844
|
+
dasm.decoded[faddr].block.list.find { |di| di.backtrace_binding[Indirection['segment_base_fs', sz]] }
|
845
|
+
'__SEH_prolog'
|
846
|
+
elsif b[:esp] == Expression[:ebp, :+, sz] and
|
847
|
+
dasm.decoded[faddr].block.list.find { |di| di.backtrace_binding[Indirection['segment_base_fs', sz]] }
|
848
|
+
'__SEH_epilog'
|
849
|
+
end
|
850
|
+
dasm.auto_label_at(faddr, rename, 'loc', 'sub') if rename
|
851
|
+
end
|
852
|
+
|
853
|
+
# returns true if the expression is an address on the stack
|
854
|
+
def backtrace_is_stack_address(expr)
|
855
|
+
Expression[expr].expr_externals.include? register_symbols[4]
|
856
|
+
end
|
857
|
+
|
858
|
+
# updates an instruction's argument replacing an expression with another (eg label renamed)
|
859
|
+
def replace_instr_arg_immediate(i, old, new)
|
860
|
+
i.args.map! { |a|
|
861
|
+
case a
|
862
|
+
when Expression; a == old ? new : Expression[a.bind(old => new).reduce]
|
863
|
+
when ModRM
|
864
|
+
a.imm = (a.imm == old ? new : Expression[a.imm.bind(old => new).reduce]) if a.imm
|
865
|
+
a
|
866
|
+
else a
|
867
|
+
end
|
868
|
+
}
|
869
|
+
end
|
870
|
+
|
871
|
+
# returns a DecodedFunction from a parsed C function prototype
|
872
|
+
# TODO rebacktrace already decoded functions (load a header file after dasm finished)
|
873
|
+
# TODO walk structs args
|
874
|
+
def decode_c_function_prototype(cp, sym, orig=nil)
|
875
|
+
sym = cp.toplevel.symbol[sym] if sym.kind_of?(::String)
|
876
|
+
df = DecodedFunction.new
|
877
|
+
orig ||= Expression[sym.name]
|
878
|
+
|
879
|
+
new_bt = lambda { |expr, rlen|
|
880
|
+
df.backtracked_for << BacktraceTrace.new(expr, orig, expr, rlen ? :r : :x, rlen)
|
881
|
+
}
|
882
|
+
|
883
|
+
# return instr emulation
|
884
|
+
if sym.has_attribute 'noreturn' or sym.has_attribute '__noreturn__'
|
885
|
+
df.noreturn = true
|
886
|
+
else
|
887
|
+
new_bt[Indirection[:esp, @size/8, orig], nil]
|
888
|
+
end
|
889
|
+
|
890
|
+
# register dirty (XXX assume standard ABI)
|
891
|
+
[:eax, :ecx, :edx].each { |r|
|
892
|
+
df.backtrace_binding.update r => Expression::Unknown
|
893
|
+
}
|
894
|
+
|
895
|
+
# emulate ret <n>
|
896
|
+
al = cp.typesize[:ptr]
|
897
|
+
stackoff = al
|
898
|
+
if sym.has_attribute 'fastcall'
|
899
|
+
stackoff = sym.type.args.to_a[2..-1].to_a.inject(al) { |sum, a| sum += (cp.sizeof(a) + al - 1) / al * al }
|
900
|
+
elsif sym.has_attribute 'stdcall'
|
901
|
+
stackoff = sym.type.args.to_a.inject(al) { |sum, a| sum += (cp.sizeof(a) + al - 1) / al * al }
|
902
|
+
end
|
903
|
+
df.backtrace_binding[:esp] = Expression[:esp, :+, stackoff]
|
904
|
+
|
905
|
+
# scan args for function pointers
|
906
|
+
# TODO walk structs/unions..
|
907
|
+
stackoff = al
|
908
|
+
sym.type.args.to_a.each { |a|
|
909
|
+
p = Indirection[[:esp, :+, stackoff], al, orig]
|
910
|
+
stackoff += (cp.sizeof(a) + al - 1) / al * al
|
911
|
+
if a.type.untypedef.kind_of? C::Pointer
|
912
|
+
pt = a.type.untypedef.type.untypedef
|
913
|
+
if pt.kind_of? C::Function
|
914
|
+
new_bt[p, nil]
|
915
|
+
df.backtracked_for.last.detached = true
|
916
|
+
elsif pt.kind_of? C::Struct
|
917
|
+
new_bt[p, al]
|
918
|
+
else
|
919
|
+
new_bt[p, cp.sizeof(nil, pt)]
|
920
|
+
end
|
921
|
+
end
|
922
|
+
}
|
923
|
+
|
924
|
+
df
|
925
|
+
end
|
926
|
+
|
927
|
+
# the lambda for the :default backtrace_binding callback of the disassembler
|
928
|
+
# tries to determine the stack offset of unprototyped functions
|
929
|
+
# working:
|
930
|
+
# checks that origin is a ret, that expr is an indirection from esp and that expr.origin is the ret
|
931
|
+
# bt_walk from calladdr until we finds a call into us, and assumes it is the current function start
|
932
|
+
# TODO handle foo: call bar ; bar: pop eax ; call <withourcallback> ; ret -> bar is not the function start (foo is)
|
933
|
+
# then backtrace expr from calladdr to funcstart (snapshot), using esp -> esp+<stackoffvariable>
|
934
|
+
# from the result, compute stackoffvariable (only if trivial)
|
935
|
+
# will not work if the current function calls any other unknown function (unless all are __cdecl)
|
936
|
+
# will not work if the current function is framed (ebp leave ret): in this case the function will return, but its esp will be unknown
|
937
|
+
# if the stack offset is found and funcaddr is a string, fixup the static binding and remove the dynamic binding
|
938
|
+
# TODO dynamise thunks bt_for & bt_cb
|
939
|
+
def disassembler_default_btbind_callback
|
940
|
+
esp = register_symbols[4]
|
941
|
+
|
942
|
+
lambda { |dasm, bind, funcaddr, calladdr, expr, origin, maxdepth|
|
943
|
+
@dasm_func_default_off ||= {}
|
944
|
+
if off = @dasm_func_default_off[[dasm, calladdr]]
|
945
|
+
bind = bind.merge(esp => Expression[esp, :+, off])
|
946
|
+
break bind
|
947
|
+
end
|
948
|
+
break bind if not odi = dasm.decoded[origin] or odi.opcode.basename != 'ret'
|
949
|
+
expr = expr.reduce_rec if expr.kind_of? Expression
|
950
|
+
break bind unless expr.kind_of? Indirection and expr.origin == origin
|
951
|
+
break bind unless expr.externals.reject { |e| e =~ /^autostackoffset_/ } == [esp]
|
952
|
+
|
953
|
+
curfunc = dasm.function[funcaddr]
|
954
|
+
if curfunc.backtrace_binding and tk = curfunc.backtrace_binding[:thunk] and dasm.function[tk]
|
955
|
+
curfunc = dasm.function[tk]
|
956
|
+
end
|
957
|
+
|
958
|
+
# scan from calladdr for the probable parent function start
|
959
|
+
func_start = nil
|
960
|
+
dasm.backtrace_walk(true, calladdr, false, false, nil, maxdepth) { |ev, foo, h|
|
961
|
+
if ev == :up and h[:sfret] != :subfuncret and di = dasm.decoded[h[:to]] and di.opcode.basename == 'call'
|
962
|
+
func_start = h[:from]
|
963
|
+
break
|
964
|
+
elsif ev == :end
|
965
|
+
# entrypoints are functions too
|
966
|
+
func_start = h[:addr]
|
967
|
+
break
|
968
|
+
end
|
969
|
+
}
|
970
|
+
break bind if not func_start
|
971
|
+
puts "automagic #{Expression[funcaddr]}: found func start for #{dasm.decoded[origin]} at #{Expression[func_start]}" if dasm.debug_backtrace
|
972
|
+
s_off = "autostackoffset_#{Expression[funcaddr]}_#{Expression[calladdr]}"
|
973
|
+
list = dasm.backtrace(expr.bind(esp => Expression[esp, :+, s_off]), calladdr, :include_start => true, :snapshot_addr => func_start, :maxdepth => maxdepth, :origin => origin)
|
974
|
+
# check if this backtrace made us find our binding
|
975
|
+
if off = @dasm_func_default_off[[dasm, calladdr]]
|
976
|
+
bind = bind.merge(esp => Expression[esp, :+, off])
|
977
|
+
break bind
|
978
|
+
elsif not curfunc.btbind_callback
|
979
|
+
break curfunc.backtrace_binding
|
980
|
+
end
|
981
|
+
e_expr = list.find { |e_expr_|
|
982
|
+
# TODO cleanup this
|
983
|
+
e_expr_ = Expression[e_expr_].reduce_rec
|
984
|
+
next if not e_expr_.kind_of? Indirection
|
985
|
+
off = Expression[[esp, :+, s_off], :-, e_expr_.target].reduce
|
986
|
+
off.kind_of? Integer and off >= @size/8 and off < 10*@size/8 and (off % (@size/8)) == 0
|
987
|
+
} || list.first
|
988
|
+
|
989
|
+
e_expr = e_expr.rexpr if e_expr.kind_of? Expression and e_expr.op == :+ and not e_expr.lexpr
|
990
|
+
break bind unless e_expr.kind_of? Indirection
|
991
|
+
|
992
|
+
off = Expression[[esp, :+, s_off], :-, e_expr.target].reduce
|
993
|
+
if off.kind_of? Expression
|
994
|
+
bd = off.externals.grep(/^autostackoffset_/).inject({}) { |bd_, xt| bd_.update xt => @size/8 }
|
995
|
+
bd.delete s_off
|
996
|
+
if off.bind(bd).reduce == @size/8
|
997
|
+
# all __cdecl
|
998
|
+
off = @size/8
|
999
|
+
else
|
1000
|
+
# check if all calls are to the same extern func
|
1001
|
+
bd.delete_if { |k, v| k !~ /^autostackoffset_#{Expression[funcaddr]}_/ }
|
1002
|
+
bd.each_key { |k| bd[k] = 0 }
|
1003
|
+
if off.bind(bd).reduce.kind_of? Integer
|
1004
|
+
off = off.bind(bd).reduce / (bd.length + 1)
|
1005
|
+
end
|
1006
|
+
end
|
1007
|
+
end
|
1008
|
+
if off.kind_of? Integer
|
1009
|
+
if off < @size/8 or off > 20*@size/8 or (off % (@size/8)) != 0
|
1010
|
+
puts "autostackoffset: ignoring off #{off} for #{Expression[funcaddr]} from #{dasm.decoded[calladdr]}" if $VERBOSE
|
1011
|
+
off = :unknown
|
1012
|
+
end
|
1013
|
+
end
|
1014
|
+
|
1015
|
+
bind = bind.merge esp => Expression[esp, :+, off] if off != :unknown
|
1016
|
+
if funcaddr != :default
|
1017
|
+
if not off.kind_of? ::Integer
|
1018
|
+
#XXX we allow the current function to return, so we should handle the func backtracking its esp
|
1019
|
+
#(and other register that are saved and restored in epilog)
|
1020
|
+
puts "stackoff #{dasm.decoded[calladdr]} | #{Expression[func_start]} | #{expr} | #{e_expr} | #{off}" if dasm.debug_backtrace
|
1021
|
+
else
|
1022
|
+
puts "autostackoffset: found #{off} for #{Expression[funcaddr]} from #{dasm.decoded[calladdr]}" if $VERBOSE
|
1023
|
+
curfunc.btbind_callback = nil
|
1024
|
+
curfunc.backtrace_binding = bind
|
1025
|
+
|
1026
|
+
# rebacktrace the return address, so that other unknown funcs that depend on us are solved
|
1027
|
+
dasm.backtrace(Indirection[esp, @size/8, origin], origin, :origin => origin)
|
1028
|
+
end
|
1029
|
+
else
|
1030
|
+
if off.kind_of? ::Integer and dasm.decoded[calladdr]
|
1031
|
+
puts "autostackoffset: found #{off-@size/8} for #{dasm.decoded[calladdr]}" if $VERBOSE
|
1032
|
+
di = dasm.decoded[calladdr]
|
1033
|
+
di.comment.delete_if { |c| c =~ /^stackoff=/ } if di.comment
|
1034
|
+
di.add_comment "stackoff=#{off-@size/8}"
|
1035
|
+
@dasm_func_default_off[[dasm, calladdr]] = off
|
1036
|
+
|
1037
|
+
dasm.backtrace(Indirection[esp, @size/8, origin], origin, :origin => origin)
|
1038
|
+
elsif cachedoff = @dasm_func_default_off[[dasm, calladdr]]
|
1039
|
+
bind[esp] = Expression[esp, :+, cachedoff]
|
1040
|
+
elsif off.kind_of? ::Integer
|
1041
|
+
dasm.decoded[calladdr].add_comment "stackoff=#{off-@size/8}"
|
1042
|
+
end
|
1043
|
+
|
1044
|
+
puts "stackoff #{dasm.decoded[calladdr]} | #{Expression[func_start]} | #{expr} | #{e_expr} | #{off}" if dasm.debug_backtrace
|
1045
|
+
end
|
1046
|
+
|
1047
|
+
bind
|
1048
|
+
}
|
1049
|
+
end
|
1050
|
+
|
1051
|
+
# the :default backtracked_for callback
|
1052
|
+
# returns empty unless funcaddr is not default or calladdr is a call or a jmp
|
1053
|
+
def disassembler_default_btfor_callback
|
1054
|
+
lambda { |dasm, btfor, funcaddr, calladdr|
|
1055
|
+
if funcaddr != :default; btfor
|
1056
|
+
elsif di = dasm.decoded[calladdr] and (di.opcode.name == 'call' or di.opcode.name == 'jmp'); btfor
|
1057
|
+
else []
|
1058
|
+
end
|
1059
|
+
}
|
1060
|
+
end
|
1061
|
+
|
1062
|
+
# returns a DecodedFunction suitable for :default
|
1063
|
+
# uses disassembler_default_bt{for/bind}_callback
|
1064
|
+
def disassembler_default_func
|
1065
|
+
esp = register_symbols[4]
|
1066
|
+
cp = new_cparser
|
1067
|
+
cp.parse 'void stdfunc(void);'
|
1068
|
+
f = decode_c_function_prototype(cp, 'stdfunc', :default)
|
1069
|
+
f.backtrace_binding[esp] = Expression[esp, :+, :unknown]
|
1070
|
+
f.btbind_callback = disassembler_default_btbind_callback
|
1071
|
+
f.btfor_callback = disassembler_default_btfor_callback
|
1072
|
+
f
|
1073
|
+
end
|
1074
|
+
|
1075
|
+
# returns a hash { :retval => r, :changed => [] }
|
1076
|
+
def abi_funcall
|
1077
|
+
{ :retval => register_symbols[0], :changed => register_symbols[0, 3] }
|
1078
|
+
end
|
1079
|
+
|
1080
|
+
|
1081
|
+
# computes the binding of the sequence of code starting at entry included
|
1082
|
+
# the binding is a hash showing the value of modified elements at the
|
1083
|
+
# end of the code sequence, relative to their value at entry
|
1084
|
+
# the elements are all the registers and the memory written to
|
1085
|
+
# if finish is nil, the binding will include :ip, which is the address
|
1086
|
+
# to be executed next (if it exists)
|
1087
|
+
# the binding will not include memory access from subfunctions
|
1088
|
+
# entry should be an entrypoint of the disassembler if finish is nil
|
1089
|
+
# the code sequence must have only one end, with no to_normal
|
1090
|
+
def code_binding(dasm, entry, finish=nil)
|
1091
|
+
entry = dasm.normalize(entry)
|
1092
|
+
finish = dasm.normalize(finish) if finish
|
1093
|
+
lastdi = nil
|
1094
|
+
binding = {}
|
1095
|
+
bt = lambda { |from, expr, inc_start|
|
1096
|
+
ret = dasm.backtrace(Expression[expr], from, :snapshot_addr => entry, :include_start => inc_start)
|
1097
|
+
ret.length == 1 ? ret.first : Expression::Unknown
|
1098
|
+
}
|
1099
|
+
|
1100
|
+
# walk blocks, search for finish, scan memory writes
|
1101
|
+
todo = [entry]
|
1102
|
+
done = [Expression::Unknown]
|
1103
|
+
while addr = todo.pop
|
1104
|
+
addr = dasm.normalize(addr)
|
1105
|
+
next if done.include? addr or addr == finish or not dasm.decoded[addr].kind_of? DecodedInstruction
|
1106
|
+
done << addr
|
1107
|
+
b = dasm.decoded[addr].block
|
1108
|
+
|
1109
|
+
next if b.list.find { |di|
|
1110
|
+
a = di.address
|
1111
|
+
if a == finish
|
1112
|
+
lastdi = b.list[b.list.index(di) - 1]
|
1113
|
+
true
|
1114
|
+
else
|
1115
|
+
# check writes from the instruction
|
1116
|
+
get_xrefs_w(dasm, di).each { |waddr, len|
|
1117
|
+
# we want the ptr expressed with reg values at entry
|
1118
|
+
ptr = bt[a, waddr, false]
|
1119
|
+
binding[Indirection[ptr, len, a]] = bt[a, Indirection[waddr, len, a], true]
|
1120
|
+
}
|
1121
|
+
false
|
1122
|
+
end
|
1123
|
+
}
|
1124
|
+
|
1125
|
+
hasnext = false
|
1126
|
+
b.each_to_samefunc(dasm) { |t|
|
1127
|
+
hasnext = true
|
1128
|
+
if t == finish
|
1129
|
+
lastdi = b.list.last
|
1130
|
+
else
|
1131
|
+
todo << t
|
1132
|
+
end
|
1133
|
+
}
|
1134
|
+
|
1135
|
+
# check end of sequence
|
1136
|
+
if not hasnext
|
1137
|
+
raise "two-ended code_binding #{lastdi} & #{b.list.last}" if lastdi
|
1138
|
+
lastdi = b.list.last
|
1139
|
+
if lastdi.opcode.props[:setip]
|
1140
|
+
e = get_xrefs_x(dasm, lastdi)
|
1141
|
+
raise 'bad code_binding ending' if e.to_a.length != 1 or not lastdi.opcode.props[:stopexec]
|
1142
|
+
binding[:ip] = bt[lastdi.address, e.first, false]
|
1143
|
+
elsif not lastdi.opcode.props[:stopexec]
|
1144
|
+
binding[:ip] = lastdi.next_addr
|
1145
|
+
end
|
1146
|
+
end
|
1147
|
+
end
|
1148
|
+
binding.delete_if { |k, v| Expression[k] == Expression[v] }
|
1149
|
+
|
1150
|
+
# add register binding
|
1151
|
+
raise "no code_binding end" if not lastdi and not finish
|
1152
|
+
register_symbols.each { |reg|
|
1153
|
+
val =
|
1154
|
+
if lastdi; bt[lastdi.address, reg, true]
|
1155
|
+
else bt[finish, reg, false]
|
1156
|
+
end
|
1157
|
+
next if val == Expression[reg]
|
1158
|
+
mask = 0xffff_ffff # dont use 1<<@size, because 16bit code may use e.g. edi (through opszoverride)
|
1159
|
+
mask = 0xffff_ffff_ffff_ffff if @size == 64
|
1160
|
+
val = Expression[val, :&, mask].reduce
|
1161
|
+
binding[reg] = Expression[val]
|
1162
|
+
}
|
1163
|
+
|
1164
|
+
binding
|
1165
|
+
end
|
1166
|
+
end
|
1167
|
+
end
|