metasm 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/BUGS +11 -0
- data/CREDITS +17 -0
- data/README +270 -0
- data/TODO +114 -0
- data/doc/code_organisation.txt +146 -0
- data/doc/const_missing.txt +16 -0
- data/doc/core_classes.txt +75 -0
- data/doc/feature_list.txt +53 -0
- data/doc/index.txt +59 -0
- data/doc/install_notes.txt +170 -0
- data/doc/style.css +3 -0
- data/doc/use_cases.txt +18 -0
- data/lib/metasm.rb +80 -0
- data/lib/metasm/arm.rb +12 -0
- data/lib/metasm/arm/debug.rb +39 -0
- data/lib/metasm/arm/decode.rb +167 -0
- data/lib/metasm/arm/encode.rb +77 -0
- data/lib/metasm/arm/main.rb +75 -0
- data/lib/metasm/arm/opcodes.rb +177 -0
- data/lib/metasm/arm/parse.rb +130 -0
- data/lib/metasm/arm/render.rb +55 -0
- data/lib/metasm/compile_c.rb +1457 -0
- data/lib/metasm/dalvik.rb +8 -0
- data/lib/metasm/dalvik/decode.rb +196 -0
- data/lib/metasm/dalvik/main.rb +60 -0
- data/lib/metasm/dalvik/opcodes.rb +366 -0
- data/lib/metasm/decode.rb +213 -0
- data/lib/metasm/decompile.rb +2659 -0
- data/lib/metasm/disassemble.rb +2068 -0
- data/lib/metasm/disassemble_api.rb +1280 -0
- data/lib/metasm/dynldr.rb +1329 -0
- data/lib/metasm/encode.rb +333 -0
- data/lib/metasm/exe_format/a_out.rb +194 -0
- data/lib/metasm/exe_format/autoexe.rb +82 -0
- data/lib/metasm/exe_format/bflt.rb +189 -0
- data/lib/metasm/exe_format/coff.rb +455 -0
- data/lib/metasm/exe_format/coff_decode.rb +901 -0
- data/lib/metasm/exe_format/coff_encode.rb +1078 -0
- data/lib/metasm/exe_format/dex.rb +457 -0
- data/lib/metasm/exe_format/dol.rb +145 -0
- data/lib/metasm/exe_format/elf.rb +923 -0
- data/lib/metasm/exe_format/elf_decode.rb +979 -0
- data/lib/metasm/exe_format/elf_encode.rb +1375 -0
- data/lib/metasm/exe_format/macho.rb +827 -0
- data/lib/metasm/exe_format/main.rb +228 -0
- data/lib/metasm/exe_format/mz.rb +164 -0
- data/lib/metasm/exe_format/nds.rb +172 -0
- data/lib/metasm/exe_format/pe.rb +437 -0
- data/lib/metasm/exe_format/serialstruct.rb +246 -0
- data/lib/metasm/exe_format/shellcode.rb +114 -0
- data/lib/metasm/exe_format/xcoff.rb +167 -0
- data/lib/metasm/gui.rb +23 -0
- data/lib/metasm/gui/cstruct.rb +373 -0
- data/lib/metasm/gui/dasm_coverage.rb +199 -0
- data/lib/metasm/gui/dasm_decomp.rb +369 -0
- data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
- data/lib/metasm/gui/dasm_graph.rb +1354 -0
- data/lib/metasm/gui/dasm_hex.rb +543 -0
- data/lib/metasm/gui/dasm_listing.rb +599 -0
- data/lib/metasm/gui/dasm_main.rb +906 -0
- data/lib/metasm/gui/dasm_opcodes.rb +291 -0
- data/lib/metasm/gui/debug.rb +1228 -0
- data/lib/metasm/gui/gtk.rb +884 -0
- data/lib/metasm/gui/qt.rb +495 -0
- data/lib/metasm/gui/win32.rb +3004 -0
- data/lib/metasm/gui/x11.rb +621 -0
- data/lib/metasm/ia32.rb +14 -0
- data/lib/metasm/ia32/compile_c.rb +1523 -0
- data/lib/metasm/ia32/debug.rb +193 -0
- data/lib/metasm/ia32/decode.rb +1167 -0
- data/lib/metasm/ia32/decompile.rb +564 -0
- data/lib/metasm/ia32/encode.rb +314 -0
- data/lib/metasm/ia32/main.rb +233 -0
- data/lib/metasm/ia32/opcodes.rb +872 -0
- data/lib/metasm/ia32/parse.rb +327 -0
- data/lib/metasm/ia32/render.rb +91 -0
- data/lib/metasm/main.rb +1193 -0
- data/lib/metasm/mips.rb +11 -0
- data/lib/metasm/mips/compile_c.rb +7 -0
- data/lib/metasm/mips/decode.rb +253 -0
- data/lib/metasm/mips/encode.rb +51 -0
- data/lib/metasm/mips/main.rb +72 -0
- data/lib/metasm/mips/opcodes.rb +443 -0
- data/lib/metasm/mips/parse.rb +51 -0
- data/lib/metasm/mips/render.rb +43 -0
- data/lib/metasm/os/gnu_exports.rb +270 -0
- data/lib/metasm/os/linux.rb +1112 -0
- data/lib/metasm/os/main.rb +1686 -0
- data/lib/metasm/os/remote.rb +527 -0
- data/lib/metasm/os/windows.rb +2027 -0
- data/lib/metasm/os/windows_exports.rb +745 -0
- data/lib/metasm/parse.rb +876 -0
- data/lib/metasm/parse_c.rb +3938 -0
- data/lib/metasm/pic16c/decode.rb +42 -0
- data/lib/metasm/pic16c/main.rb +17 -0
- data/lib/metasm/pic16c/opcodes.rb +68 -0
- data/lib/metasm/ppc.rb +11 -0
- data/lib/metasm/ppc/decode.rb +264 -0
- data/lib/metasm/ppc/decompile.rb +251 -0
- data/lib/metasm/ppc/encode.rb +51 -0
- data/lib/metasm/ppc/main.rb +129 -0
- data/lib/metasm/ppc/opcodes.rb +410 -0
- data/lib/metasm/ppc/parse.rb +52 -0
- data/lib/metasm/preprocessor.rb +1277 -0
- data/lib/metasm/render.rb +130 -0
- data/lib/metasm/sh4.rb +8 -0
- data/lib/metasm/sh4/decode.rb +336 -0
- data/lib/metasm/sh4/main.rb +292 -0
- data/lib/metasm/sh4/opcodes.rb +381 -0
- data/lib/metasm/x86_64.rb +12 -0
- data/lib/metasm/x86_64/compile_c.rb +1025 -0
- data/lib/metasm/x86_64/debug.rb +59 -0
- data/lib/metasm/x86_64/decode.rb +268 -0
- data/lib/metasm/x86_64/encode.rb +264 -0
- data/lib/metasm/x86_64/main.rb +135 -0
- data/lib/metasm/x86_64/opcodes.rb +118 -0
- data/lib/metasm/x86_64/parse.rb +68 -0
- data/misc/bottleneck.rb +61 -0
- data/misc/cheader-findpppath.rb +58 -0
- data/misc/hexdiff.rb +74 -0
- data/misc/hexdump.rb +55 -0
- data/misc/metasm-all.rb +13 -0
- data/misc/objdiff.rb +47 -0
- data/misc/objscan.rb +40 -0
- data/misc/pdfparse.rb +661 -0
- data/misc/ppc_pdf2oplist.rb +192 -0
- data/misc/tcp_proxy_hex.rb +84 -0
- data/misc/txt2html.rb +440 -0
- data/samples/a.out.rb +31 -0
- data/samples/asmsyntax.rb +77 -0
- data/samples/bindiff.rb +555 -0
- data/samples/compilation-steps.rb +49 -0
- data/samples/cparser_makestackoffset.rb +55 -0
- data/samples/dasm-backtrack.rb +38 -0
- data/samples/dasmnavig.rb +318 -0
- data/samples/dbg-apihook.rb +228 -0
- data/samples/dbghelp.rb +143 -0
- data/samples/disassemble-gui.rb +102 -0
- data/samples/disassemble.rb +133 -0
- data/samples/dump_upx.rb +95 -0
- data/samples/dynamic_ruby.rb +1929 -0
- data/samples/elf_list_needed.rb +46 -0
- data/samples/elf_listexports.rb +33 -0
- data/samples/elfencode.rb +25 -0
- data/samples/exeencode.rb +128 -0
- data/samples/factorize-headers-elfimports.rb +77 -0
- data/samples/factorize-headers-peimports.rb +109 -0
- data/samples/factorize-headers.rb +43 -0
- data/samples/gdbclient.rb +583 -0
- data/samples/generate_libsigs.rb +102 -0
- data/samples/hotfix_gtk_dbg.rb +59 -0
- data/samples/install_win_env.rb +78 -0
- data/samples/lindebug.rb +924 -0
- data/samples/linux_injectsyscall.rb +95 -0
- data/samples/machoencode.rb +31 -0
- data/samples/metasm-shell.rb +91 -0
- data/samples/pe-hook.rb +69 -0
- data/samples/pe-ia32-cpuid.rb +203 -0
- data/samples/pe-mips.rb +35 -0
- data/samples/pe-shutdown.rb +78 -0
- data/samples/pe-testrelocs.rb +51 -0
- data/samples/pe-testrsrc.rb +24 -0
- data/samples/pe_listexports.rb +31 -0
- data/samples/peencode.rb +19 -0
- data/samples/peldr.rb +494 -0
- data/samples/preprocess-flatten.rb +19 -0
- data/samples/r0trace.rb +308 -0
- data/samples/rubstop.rb +399 -0
- data/samples/scan_pt_gnu_stack.rb +54 -0
- data/samples/scanpeexports.rb +62 -0
- data/samples/shellcode-c.rb +40 -0
- data/samples/shellcode-dynlink.rb +146 -0
- data/samples/source.asm +34 -0
- data/samples/struct_offset.rb +47 -0
- data/samples/testpe.rb +32 -0
- data/samples/testraw.rb +45 -0
- data/samples/win32genloader.rb +132 -0
- data/samples/win32hooker-advanced.rb +169 -0
- data/samples/win32hooker.rb +96 -0
- data/samples/win32livedasm.rb +33 -0
- data/samples/win32remotescan.rb +133 -0
- data/samples/wintrace.rb +92 -0
- data/tests/all.rb +8 -0
- data/tests/dasm.rb +39 -0
- data/tests/dynldr.rb +35 -0
- data/tests/encodeddata.rb +132 -0
- data/tests/ia32.rb +82 -0
- data/tests/mips.rb +116 -0
- data/tests/parse_c.rb +239 -0
- data/tests/preprocessor.rb +269 -0
- data/tests/x86_64.rb +62 -0
- metadata +255 -0
|
@@ -0,0 +1,1280 @@
|
|
|
1
|
+
# This file is part of Metasm, the Ruby assembly manipulation suite
|
|
2
|
+
# Copyright (C) 2006-2009 Yoann GUILLOT
|
|
3
|
+
#
|
|
4
|
+
# Licence is LGPL, see LICENCE in the top-level directory
|
|
5
|
+
|
|
6
|
+
# this file compliments disassemble.rb, adding misc user-friendly methods
|
|
7
|
+
|
|
8
|
+
module Metasm
|
|
9
|
+
class InstructionBlock
|
|
10
|
+
# adds an address to the from_normal/from_subfuncret list
|
|
11
|
+
def add_from(addr, type=:normal)
|
|
12
|
+
send "add_from_#{type}", addr
|
|
13
|
+
end
|
|
14
|
+
def add_from_normal(addr)
|
|
15
|
+
@from_normal ||= []
|
|
16
|
+
@from_normal |= [addr]
|
|
17
|
+
end
|
|
18
|
+
def add_from_subfuncret(addr)
|
|
19
|
+
@from_subfuncret ||= []
|
|
20
|
+
@from_subfuncret |= [addr]
|
|
21
|
+
end
|
|
22
|
+
def add_from_indirect(addr)
|
|
23
|
+
@from_indirect ||= []
|
|
24
|
+
@from_indirect |= [addr]
|
|
25
|
+
end
|
|
26
|
+
# iterates over every from address, yields [address, type in [:normal, :subfuncret, :indirect]]
|
|
27
|
+
def each_from
|
|
28
|
+
each_from_normal { |a| yield a, :normal }
|
|
29
|
+
each_from_subfuncret { |a| yield a, :subfuncret }
|
|
30
|
+
each_from_indirect { |a| yield a, :indirect }
|
|
31
|
+
end
|
|
32
|
+
def each_from_normal(&b)
|
|
33
|
+
@from_normal.each(&b) if from_normal
|
|
34
|
+
end
|
|
35
|
+
def each_from_subfuncret(&b)
|
|
36
|
+
@from_subfuncret.each(&b) if from_subfuncret
|
|
37
|
+
end
|
|
38
|
+
def each_from_indirect(&b)
|
|
39
|
+
@from_indirect.each(&b) if from_indirect
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def add_to(addr, type=:normal)
|
|
43
|
+
send "add_to_#{type}", addr
|
|
44
|
+
end
|
|
45
|
+
def add_to_normal(addr)
|
|
46
|
+
@to_normal ||= []
|
|
47
|
+
@to_normal |= [addr]
|
|
48
|
+
end
|
|
49
|
+
def add_to_subfuncret(addr)
|
|
50
|
+
@to_subfuncret ||= []
|
|
51
|
+
@to_subfuncret |= [addr]
|
|
52
|
+
end
|
|
53
|
+
def add_to_indirect(addr)
|
|
54
|
+
@to_indirect ||= []
|
|
55
|
+
@to_indirect |= [addr]
|
|
56
|
+
end
|
|
57
|
+
def each_to
|
|
58
|
+
each_to_normal { |a| yield a, :normal }
|
|
59
|
+
each_to_subfuncret { |a| yield a, :subfuncret }
|
|
60
|
+
each_to_indirect { |a| yield a, :indirect }
|
|
61
|
+
end
|
|
62
|
+
def each_to_normal(&b)
|
|
63
|
+
@to_normal.each(&b) if to_normal
|
|
64
|
+
end
|
|
65
|
+
def each_to_subfuncret(&b)
|
|
66
|
+
@to_subfuncret.each(&b) if to_subfuncret
|
|
67
|
+
end
|
|
68
|
+
def each_to_indirect(&b)
|
|
69
|
+
@to_indirect.each(&b) if to_indirect
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# yields all from that are from the same function
|
|
73
|
+
def each_from_samefunc(dasm, &b)
|
|
74
|
+
return if dasm.function[address]
|
|
75
|
+
@from_subfuncret.each(&b) if from_subfuncret
|
|
76
|
+
@from_normal.each(&b) if from_normal
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# yields all from that are not in the same subfunction as this block
|
|
80
|
+
def each_from_otherfunc(dasm, &b)
|
|
81
|
+
@from_normal.each(&b) if from_normal and dasm.function[address]
|
|
82
|
+
@from_subfuncret.each(&b) if from_subfuncret and dasm.function[address]
|
|
83
|
+
@from_indirect.each(&b) if from_indirect
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# yields all to that are in the same subfunction as this block
|
|
87
|
+
def each_to_samefunc(dasm)
|
|
88
|
+
each_to { |to, type|
|
|
89
|
+
next if type != :normal and type != :subfuncret
|
|
90
|
+
to = dasm.normalize(to)
|
|
91
|
+
yield to if not dasm.function[to]
|
|
92
|
+
}
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# yields all to that are not in the same subfunction as this block
|
|
96
|
+
def each_to_otherfunc(dasm)
|
|
97
|
+
each_to { |to, type|
|
|
98
|
+
to = dasm.normalize(to)
|
|
99
|
+
yield to if type == :indirect or dasm.function[to] or not dasm.decoded[to]
|
|
100
|
+
}
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
class DecodedInstruction
|
|
105
|
+
# checks if this instruction is the first of its IBlock
|
|
106
|
+
def block_head?
|
|
107
|
+
self == @block.list.first
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
class CPU
|
|
112
|
+
# compat alias, for scripts using older version of metasm
|
|
113
|
+
def get_backtrace_binding(di) backtrace_binding(di) end
|
|
114
|
+
|
|
115
|
+
# return something like backtrace_binding in the forward direction
|
|
116
|
+
# set pc_reg to some reg name (eg :pc) to include effects on the instruction pointer
|
|
117
|
+
def get_fwdemu_binding(di, pc_reg=nil)
|
|
118
|
+
fdi = di.backtrace_binding ||= get_backtrace_binding(di)
|
|
119
|
+
# find self-updated regs & revert them in simultaneous affectations
|
|
120
|
+
# XXX handles only a <- a+i for now, this covers all useful cases (except imul eax, eax, 42 jz foobar)
|
|
121
|
+
fdi.keys.grep(::Symbol).each { |s|
|
|
122
|
+
val = Expression[fdi[s]]
|
|
123
|
+
next if val.lexpr != s or (val.op != :+ and val.op != :-) #or not val.rexpr.kind_of? ::Integer
|
|
124
|
+
fwd = { s => val }
|
|
125
|
+
inv = { s => val.dup }
|
|
126
|
+
inv[s].op = ((inv[s].op == :+) ? :- : :+)
|
|
127
|
+
nxt = {}
|
|
128
|
+
fdi.each { |k, v|
|
|
129
|
+
if k == s
|
|
130
|
+
nxt[k] = v
|
|
131
|
+
else
|
|
132
|
+
k = k.bind(fwd).reduce_rec if k.kind_of? Indirection
|
|
133
|
+
nxt[k] = Expression[Expression[v].bind(inv).reduce_rec]
|
|
134
|
+
end
|
|
135
|
+
}
|
|
136
|
+
fdi = nxt
|
|
137
|
+
}
|
|
138
|
+
if pc_reg
|
|
139
|
+
if di.opcode.props[:setip]
|
|
140
|
+
xr = get_xrefs_x(nil, di)
|
|
141
|
+
if xr and xr.length == 1
|
|
142
|
+
fdi[pc_reg] = xr[0]
|
|
143
|
+
else
|
|
144
|
+
fdi[:incomplete_binding] = Expression[1]
|
|
145
|
+
end
|
|
146
|
+
else
|
|
147
|
+
fdi[pc_reg] = Expression[pc_reg, :+, di.bin_length]
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
fdi
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
class Disassembler
|
|
155
|
+
# access the default value for @@backtrace_maxblocks for newly created Disassemblers
|
|
156
|
+
def self.backtrace_maxblocks ; @@backtrace_maxblocks ; end
|
|
157
|
+
def self.backtrace_maxblocks=(b) ; @@backtrace_maxblocks = b ; end
|
|
158
|
+
|
|
159
|
+
# returns the dasm section's edata containing addr
|
|
160
|
+
# its #ptr points to addr
|
|
161
|
+
# returns the 1st element of #get_section_at
|
|
162
|
+
def get_edata_at(addr)
|
|
163
|
+
if s = get_section_at(addr)
|
|
164
|
+
s[0]
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
# returns the DecodedInstruction at addr if it exists
|
|
169
|
+
def di_at(addr)
|
|
170
|
+
di = @decoded[addr] || @decoded[normalize(addr)] if addr
|
|
171
|
+
di if di.kind_of? DecodedInstruction
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
# returns the InstructionBlock containing the address at addr
|
|
175
|
+
def block_at(addr)
|
|
176
|
+
di = di_at(addr)
|
|
177
|
+
di.block if di
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
# returns the DecodedFunction at addr if it exists
|
|
181
|
+
def function_at(addr)
|
|
182
|
+
f = @function[addr] || @function[normalize(addr)] if addr
|
|
183
|
+
f if f.kind_of? DecodedFunction
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
# returns the DecodedInstruction covering addr
|
|
187
|
+
# returns one at starting nearest addr if multiple are available (overlapping instrs)
|
|
188
|
+
def di_including(addr)
|
|
189
|
+
return if not addr
|
|
190
|
+
addr = normalize(addr)
|
|
191
|
+
if off = (0...16).find { |o| @decoded[addr-o].kind_of? DecodedInstruction and @decoded[addr-o].bin_length > o }
|
|
192
|
+
@decoded[addr-off]
|
|
193
|
+
end
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
# returns the InstructionBlock containing the byte at addr
|
|
197
|
+
# returns the one of di_including() on multiple matches (overlapping instrs)
|
|
198
|
+
def block_including(addr)
|
|
199
|
+
di = di_including(addr)
|
|
200
|
+
di.block if di
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
# returns the DecodedFunction including this byte
|
|
204
|
+
# return the one of find_function_start() if multiple are possible (block shared by multiple funcs)
|
|
205
|
+
def function_including(addr)
|
|
206
|
+
return if not di = di_including(addr)
|
|
207
|
+
function_at(find_function_start(di.address))
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
# yields every InstructionBlock
|
|
211
|
+
# returns the list of IBlocks
|
|
212
|
+
def each_instructionblock
|
|
213
|
+
ret = []
|
|
214
|
+
@decoded.each { |addr, di|
|
|
215
|
+
next if not di.kind_of? DecodedInstruction or not di.block_head?
|
|
216
|
+
ret << di.block
|
|
217
|
+
yield di.block if block_given?
|
|
218
|
+
}
|
|
219
|
+
ret
|
|
220
|
+
end
|
|
221
|
+
alias instructionblocks each_instructionblock
|
|
222
|
+
|
|
223
|
+
# return a backtrace_binding reversed (akin to code emulation) (but not really)
|
|
224
|
+
def get_fwdemu_binding(di, pc=nil)
|
|
225
|
+
@cpu.get_fwdemu_binding(di, pc)
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
# reads len raw bytes from the mmaped address space
|
|
229
|
+
def read_raw_data(addr, len)
|
|
230
|
+
if e = get_section_at(addr)
|
|
231
|
+
e[0].read(len)
|
|
232
|
+
end
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
# read an int of arbitrary type (:u8, :i32, ...)
|
|
236
|
+
def decode_int(addr, type)
|
|
237
|
+
type = "u#{type*8}".to_sym if type.kind_of? Integer
|
|
238
|
+
if e = get_section_at(addr)
|
|
239
|
+
e[0].decode_imm(type, @cpu.endianness)
|
|
240
|
+
end
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
# read a byte at address addr
|
|
244
|
+
def decode_byte(addr)
|
|
245
|
+
decode_int(addr, :u8)
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
# read a dword at address addr
|
|
249
|
+
# the dword is cpu-sized (eg 32 or 64bits)
|
|
250
|
+
def decode_dword(addr)
|
|
251
|
+
decode_int(addr, @cpu.size/8)
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
# read a zero-terminated string from addr
|
|
255
|
+
# if no terminal 0 is found, return nil
|
|
256
|
+
def decode_strz(addr, maxsz=4096)
|
|
257
|
+
if e = get_section_at(addr)
|
|
258
|
+
str = e[0].read(maxsz).to_s
|
|
259
|
+
return if not len = str.index(?\0)
|
|
260
|
+
str[0, len]
|
|
261
|
+
end
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
# read a zero-terminated wide string from addr
|
|
265
|
+
# return nil if no terminal found
|
|
266
|
+
def decode_wstrz(addr, maxsz=4096)
|
|
267
|
+
if e = get_section_at(addr)
|
|
268
|
+
str = e[0].read(maxsz).to_s
|
|
269
|
+
return if not len = str.unpack('v*').index(0)
|
|
270
|
+
str[0, 2*len]
|
|
271
|
+
end
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
# disassembles one instruction at address
|
|
275
|
+
# returns nil if no instruction can be decoded there
|
|
276
|
+
# does not update any internal state of the disassembler, nor reuse the @decoded cache
|
|
277
|
+
def disassemble_instruction(addr)
|
|
278
|
+
if e = get_section_at(addr)
|
|
279
|
+
@cpu.decode_instruction(e[0], normalize(addr))
|
|
280
|
+
end
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
# disassemble addr as if the code flow came from from_addr
|
|
284
|
+
def disassemble_from(addr, from_addr)
|
|
285
|
+
from_addr = from_addr.address if from_addr.kind_of? DecodedInstruction
|
|
286
|
+
from_addr = normalize(from_addr)
|
|
287
|
+
if b = block_at(from_addr)
|
|
288
|
+
b.add_to_normal(addr)
|
|
289
|
+
end
|
|
290
|
+
@addrs_todo << [addr, from_addr]
|
|
291
|
+
disassemble
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
# returns the label associated to an addr, or nil if none exist
|
|
295
|
+
def get_label_at(addr)
|
|
296
|
+
e, b = get_section_at(addr, false)
|
|
297
|
+
e.inv_export[e.ptr] if e
|
|
298
|
+
end
|
|
299
|
+
|
|
300
|
+
# sets the label for the specified address
|
|
301
|
+
# returns nil if the address is not mapped
|
|
302
|
+
# memcheck is passed to get_section_at to validate that the address is mapped
|
|
303
|
+
def set_label_at(addr, name, memcheck=true)
|
|
304
|
+
addr = Expression[addr].reduce
|
|
305
|
+
e, b = get_section_at(addr, memcheck)
|
|
306
|
+
if not e
|
|
307
|
+
elsif not l = e.inv_export[e.ptr]
|
|
308
|
+
l = @program.new_label(name)
|
|
309
|
+
e.add_export l, e.ptr
|
|
310
|
+
@label_alias_cache = nil
|
|
311
|
+
@old_prog_binding[l] = @prog_binding[l] = b + e.ptr
|
|
312
|
+
elsif l != name
|
|
313
|
+
l = rename_label l, @program.new_label(name)
|
|
314
|
+
end
|
|
315
|
+
l
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
# remove a label at address addr
|
|
319
|
+
def del_label_at(addr, name=get_label_at(addr))
|
|
320
|
+
ed, b = get_section_at(addr)
|
|
321
|
+
if ed and ed.inv_export[ed.ptr]
|
|
322
|
+
ed.del_export name, ed.ptr
|
|
323
|
+
@label_alias_cache = nil
|
|
324
|
+
end
|
|
325
|
+
each_xref(addr) { |xr|
|
|
326
|
+
next if not xr.origin or not o = @decoded[xr.origin] or not o.kind_of? Renderable
|
|
327
|
+
o.each_expr { |e|
|
|
328
|
+
e.lexpr = addr if e.lexpr == name
|
|
329
|
+
e.rexpr = addr if e.rexpr == name
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
@old_prog_binding.delete name
|
|
333
|
+
@prog_binding.delete name
|
|
334
|
+
end
|
|
335
|
+
|
|
336
|
+
# changes a label to another, updates referring instructions etc
|
|
337
|
+
# returns the new label
|
|
338
|
+
# the new label must be program-uniq (see @program.new_label)
|
|
339
|
+
def rename_label(old, new)
|
|
340
|
+
each_xref(normalize(old)) { |x|
|
|
341
|
+
next if not di = @decoded[x.origin]
|
|
342
|
+
@cpu.replace_instr_arg_immediate(di.instruction, old, new)
|
|
343
|
+
di.comment.to_a.each { |c| c.gsub!(old, new) }
|
|
344
|
+
}
|
|
345
|
+
e, l = get_section_at(old, false)
|
|
346
|
+
if e
|
|
347
|
+
e.add_export new, e.export.delete(old), true
|
|
348
|
+
end
|
|
349
|
+
raise "cant rename nonexisting label #{old}" if not @prog_binding[old]
|
|
350
|
+
@label_alias_cache = nil
|
|
351
|
+
@old_prog_binding[new] = @prog_binding[new] = @prog_binding.delete(old)
|
|
352
|
+
@addrs_todo.each { |at|
|
|
353
|
+
case at[0]
|
|
354
|
+
when old; at[0] = new
|
|
355
|
+
when Expression; at[0] = at[0].bind(old => new)
|
|
356
|
+
end
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
if @inv_section_reloc[old]
|
|
360
|
+
@inv_section_reloc[old].each { |b, e_, o, r|
|
|
361
|
+
(0..16).each { |off|
|
|
362
|
+
if di = @decoded[Expression[b]+o-off] and di.bin_length > off
|
|
363
|
+
@cpu.replace_instr_arg_immediate(di.instruction, old, new)
|
|
364
|
+
end
|
|
365
|
+
}
|
|
366
|
+
r.target = r.target.bind(old => new)
|
|
367
|
+
}
|
|
368
|
+
@inv_section_reloc[new] = @inv_section_reloc.delete(old)
|
|
369
|
+
end
|
|
370
|
+
|
|
371
|
+
if c_parser and @c_parser.toplevel.symbol[old]
|
|
372
|
+
@c_parser.toplevel.symbol[new] = @c_parser.toplevel.symbol.delete(old)
|
|
373
|
+
@c_parser.toplevel.symbol[new].name = new
|
|
374
|
+
end
|
|
375
|
+
|
|
376
|
+
new
|
|
377
|
+
end
|
|
378
|
+
|
|
379
|
+
# finds the start of a function from the address of an instruction
|
|
380
|
+
def find_function_start(addr)
|
|
381
|
+
addr = addr.address if addr.kind_of? DecodedInstruction
|
|
382
|
+
todo = [addr]
|
|
383
|
+
done = []
|
|
384
|
+
while a = todo.pop
|
|
385
|
+
a = normalize(a)
|
|
386
|
+
di = @decoded[a]
|
|
387
|
+
next if done.include? a or not di.kind_of? DecodedInstruction
|
|
388
|
+
done << a
|
|
389
|
+
a = di.block.address
|
|
390
|
+
break a if @function[a]
|
|
391
|
+
l = []
|
|
392
|
+
di.block.each_from_samefunc(self) { |f| l << f }
|
|
393
|
+
break a if l.empty?
|
|
394
|
+
todo.concat l
|
|
395
|
+
end
|
|
396
|
+
end
|
|
397
|
+
|
|
398
|
+
# iterates over the blocks of a function, yields each func block address
|
|
399
|
+
# returns the graph of blocks (block address => [list of samefunc blocks])
|
|
400
|
+
def each_function_block(addr, incl_subfuncs = false, find_func_start = true)
|
|
401
|
+
addr = @function.index(addr) if addr.kind_of? DecodedFunction
|
|
402
|
+
addr = addr.address if addr.kind_of? DecodedInstruction
|
|
403
|
+
addr = find_function_start(addr) if not @function[addr] and find_func_start
|
|
404
|
+
todo = [addr]
|
|
405
|
+
ret = {}
|
|
406
|
+
while a = todo.pop
|
|
407
|
+
next if not di = di_at(a)
|
|
408
|
+
a = di.block.address
|
|
409
|
+
next if ret[a]
|
|
410
|
+
ret[a] = []
|
|
411
|
+
yield a if block_given?
|
|
412
|
+
di.block.each_to_samefunc(self) { |f| ret[a] << f ; todo << f }
|
|
413
|
+
di.block.each_to_otherfunc(self) { |f| ret[a] << f ; todo << f } if incl_subfuncs
|
|
414
|
+
end
|
|
415
|
+
ret
|
|
416
|
+
end
|
|
417
|
+
alias function_blocks each_function_block
|
|
418
|
+
|
|
419
|
+
# returns a graph of function calls
|
|
420
|
+
# for each func passed as arg (default: all), update the 'ret' hash
|
|
421
|
+
# associating func => [list of direct subfuncs called]
|
|
422
|
+
def function_graph(funcs = @function.keys + @entrypoints.to_a, ret={})
|
|
423
|
+
funcs = funcs.map { |f| normalize(f) }.uniq.find_all { |f| @decoded[f] }
|
|
424
|
+
funcs.each { |f|
|
|
425
|
+
next if ret[f]
|
|
426
|
+
ret[f] = []
|
|
427
|
+
each_function_block(f) { |b|
|
|
428
|
+
@decoded[b].block.each_to_otherfunc(self) { |sf|
|
|
429
|
+
ret[f] |= [sf]
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
}
|
|
433
|
+
ret
|
|
434
|
+
end
|
|
435
|
+
|
|
436
|
+
# return the graph of function => subfunction list
|
|
437
|
+
# recurses from an entrypoint
|
|
438
|
+
def function_graph_from(addr)
|
|
439
|
+
addr = normalize(addr)
|
|
440
|
+
addr = find_function_start(addr) || addr
|
|
441
|
+
ret = {}
|
|
442
|
+
osz = ret.length-1
|
|
443
|
+
while ret.length != osz
|
|
444
|
+
osz = ret.length
|
|
445
|
+
function_graph(ret.values.flatten + [addr], ret)
|
|
446
|
+
end
|
|
447
|
+
ret
|
|
448
|
+
end
|
|
449
|
+
|
|
450
|
+
# return the graph of function => subfunction list
|
|
451
|
+
# for which a (sub-sub)function includes addr
|
|
452
|
+
def function_graph_to(addr)
|
|
453
|
+
addr = normalize(addr)
|
|
454
|
+
addr = find_function_start(addr) || addr
|
|
455
|
+
full = function_graph
|
|
456
|
+
ret = {}
|
|
457
|
+
todo = [addr]
|
|
458
|
+
done = []
|
|
459
|
+
while a = todo.pop
|
|
460
|
+
next if done.include? a
|
|
461
|
+
done << a
|
|
462
|
+
full.each { |f, sf|
|
|
463
|
+
next if not sf.include? a
|
|
464
|
+
ret[f] ||= []
|
|
465
|
+
ret[f] |= [a]
|
|
466
|
+
todo << f
|
|
467
|
+
}
|
|
468
|
+
end
|
|
469
|
+
ret
|
|
470
|
+
end
|
|
471
|
+
|
|
472
|
+
# returns info on sections, from @program if supported
|
|
473
|
+
# returns an array of [name, addr, length, info]
|
|
474
|
+
def section_info
|
|
475
|
+
if @program.respond_to? :section_info
|
|
476
|
+
@program.section_info
|
|
477
|
+
else
|
|
478
|
+
list = []
|
|
479
|
+
@sections.each { |k, v|
|
|
480
|
+
list << [get_label_at(k), normalize(k), v.length, nil]
|
|
481
|
+
}
|
|
482
|
+
list
|
|
483
|
+
end
|
|
484
|
+
end
|
|
485
|
+
|
|
486
|
+
# transform an address into a file offset
|
|
487
|
+
def addr_to_fileoff(addr)
|
|
488
|
+
addr = normalize(addr)
|
|
489
|
+
@program.addr_to_fileoff(addr)
|
|
490
|
+
end
|
|
491
|
+
|
|
492
|
+
# transform a file offset into an address
|
|
493
|
+
def fileoff_to_addr(foff)
|
|
494
|
+
@program.fileoff_to_addr(foff)
|
|
495
|
+
end
|
|
496
|
+
|
|
497
|
+
# remove the decodedinstruction from..to, replace them by the new Instructions in 'by'
|
|
498
|
+
# this updates the block list structure, old di will still be visible in @decoded, except from original block (those are deleted)
|
|
499
|
+
# if from..to spans multiple blocks
|
|
500
|
+
# to.block is splitted after to
|
|
501
|
+
# all path from from are replaced by a single link to after 'to', be careful !
|
|
502
|
+
# (eg a->b->... & a->c ; from in a, to in c => a->b is lost)
|
|
503
|
+
# all instructions are stuffed in the first block
|
|
504
|
+
# paths are only walked using from/to_normal
|
|
505
|
+
# 'by' may be empty
|
|
506
|
+
# returns the block containing the new instrs (nil if empty)
|
|
507
|
+
def replace_instrs(from, to, by)
|
|
508
|
+
raise 'bad from' if not fdi = di_at(from) or not fdi.block.list.index(fdi)
|
|
509
|
+
raise 'bad to' if not tdi = di_at(to) or not tdi.block.list.index(tdi)
|
|
510
|
+
|
|
511
|
+
# create DecodedInstruction from Instructions in 'by' if needed
|
|
512
|
+
split_block(fdi.block, fdi.address)
|
|
513
|
+
split_block(tdi.block, tdi.block.list[tdi.block.list.index(tdi)+1].address) if tdi != tdi.block.list.last
|
|
514
|
+
fb = fdi.block
|
|
515
|
+
tb = tdi.block
|
|
516
|
+
|
|
517
|
+
# generate DecodedInstr from Instrs
|
|
518
|
+
# try to keep the bin_length of original block
|
|
519
|
+
wantlen = tdi.address + tdi.bin_length - fb.address
|
|
520
|
+
wantlen -= by.grep(DecodedInstruction).inject(0) { |len, di| len + di.bin_length }
|
|
521
|
+
ldi = by.last
|
|
522
|
+
ldi = DecodedInstruction.new(ldi) if ldi.kind_of? Instruction
|
|
523
|
+
wantlen = by.grep(Instruction).length if wantlen < 0 or (ldi and ldi.opcode.props[:setip])
|
|
524
|
+
by.map! { |di|
|
|
525
|
+
if di.kind_of? Instruction
|
|
526
|
+
di = DecodedInstruction.new(di)
|
|
527
|
+
wantlen -= di.bin_length = wantlen / by.grep(Instruction).length
|
|
528
|
+
end
|
|
529
|
+
di
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
#puts " ** patch next_addr to #{Expression[tb.list.last.next_addr]}" if not by.empty? and by.last.opcode.props[:saveip]
|
|
533
|
+
by.last.next_addr = tb.list.last.next_addr if not by.empty? and by.last.opcode.props[:saveip]
|
|
534
|
+
fb.list.each { |di| @decoded.delete di.address }
|
|
535
|
+
fb.list.clear
|
|
536
|
+
tb.list.each { |di| @decoded.delete di.address }
|
|
537
|
+
tb.list.clear
|
|
538
|
+
by.each { |di| fb.add_di di }
|
|
539
|
+
by.each_with_index { |di, i|
|
|
540
|
+
if odi = di_at(di.address)
|
|
541
|
+
# collision, hopefully with another deobfuscation run ?
|
|
542
|
+
if by[i..-1].all? { |mydi| mydi.to_s == @decoded[mydi.address].to_s }
|
|
543
|
+
puts "replace_instrs: merge at #{di}" if $DEBUG
|
|
544
|
+
by[i..-1] = by[i..-1].map { |xdi| @decoded[xdi.address] }
|
|
545
|
+
by[i..-1].each { fb.list.pop }
|
|
546
|
+
split_block(odi.block, odi.address)
|
|
547
|
+
tb.to_normal = [di.address]
|
|
548
|
+
(odi.block.from_normal ||= []) << to
|
|
549
|
+
odi.block.from_normal.uniq!
|
|
550
|
+
break
|
|
551
|
+
else
|
|
552
|
+
#raise "replace_instrs: collision #{di} vs #{odi}"
|
|
553
|
+
puts "replace_instrs: collision #{di} vs #{odi}" if $VERBOSE
|
|
554
|
+
while @decoded[di.address].kind_of? DecodedInstruction # find free space.. raise ?
|
|
555
|
+
di.address += 1 # XXX use floats ?
|
|
556
|
+
di.bin_length -= 1
|
|
557
|
+
end
|
|
558
|
+
end
|
|
559
|
+
end
|
|
560
|
+
@decoded[di.address] = di
|
|
561
|
+
}
|
|
562
|
+
@addrs_done.delete_if { |ad| normalize(ad[0]) == tb.address or ad[1] == tb.address }
|
|
563
|
+
@addrs_done.delete_if { |ad| normalize(ad[0]) == fb.address or ad[1] == fb.address } if by.empty? and tb.address != fb.address
|
|
564
|
+
|
|
565
|
+
# update to_normal/from_normal
|
|
566
|
+
fb.to_normal = tb.to_normal
|
|
567
|
+
fb.to_normal.to_a.each { |newto|
|
|
568
|
+
# other paths may already point to newto, we must only update the relevant entry
|
|
569
|
+
if ndi = di_at(newto) and idx = ndi.block.from_normal.to_a.index(to)
|
|
570
|
+
if by.empty?
|
|
571
|
+
ndi.block.from_normal[idx,1] = fb.from_normal.to_a
|
|
572
|
+
else
|
|
573
|
+
ndi.block.from_normal[idx] = fb.list.last.address
|
|
574
|
+
end
|
|
575
|
+
end
|
|
576
|
+
}
|
|
577
|
+
|
|
578
|
+
fb.to_subfuncret = tb.to_subfuncret
|
|
579
|
+
fb.to_subfuncret.to_a.each { |newto|
|
|
580
|
+
if ndi = di_at(newto) and idx = ndi.block.from_subfuncret.to_a.index(to)
|
|
581
|
+
if by.empty?
|
|
582
|
+
ndi.block.from_subfuncret[idx,1] = fb.from_subfuncret.to_a
|
|
583
|
+
else
|
|
584
|
+
ndi.block.from_subfuncret[idx] = fb.list.last.address
|
|
585
|
+
end
|
|
586
|
+
end
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
if by.empty?
|
|
590
|
+
tb.to_subfuncret = nil if tb.to_subfuncret == []
|
|
591
|
+
tolist = tb.to_subfuncret || tb.to_normal.to_a
|
|
592
|
+
if lfrom = get_label_at(fb.address) and tolist.length == 1
|
|
593
|
+
lto = auto_label_at(tolist.first)
|
|
594
|
+
each_xref(fb.address, :x) { |x|
|
|
595
|
+
next if not di = @decoded[x.origin]
|
|
596
|
+
@cpu.replace_instr_arg_immediate(di.instruction, lfrom, lto)
|
|
597
|
+
di.comment.to_a.each { |c| c.gsub!(lfrom, lto) }
|
|
598
|
+
}
|
|
599
|
+
end
|
|
600
|
+
fb.from_normal.to_a.each { |newfrom|
|
|
601
|
+
if ndi = di_at(newfrom) and idx = ndi.block.to_normal.to_a.index(from)
|
|
602
|
+
ndi.block.to_normal[idx..idx] = tolist
|
|
603
|
+
end
|
|
604
|
+
}
|
|
605
|
+
fb.from_subfuncret.to_a.each { |newfrom|
|
|
606
|
+
if ndi = di_at(newfrom) and idx = ndi.block.to_subfuncret.to_a.index(from)
|
|
607
|
+
ndi.block.to_subfuncret[idx..idx] = tolist
|
|
608
|
+
end
|
|
609
|
+
}
|
|
610
|
+
else
|
|
611
|
+
# merge with adjacent blocks
|
|
612
|
+
merge_blocks(fb, fb.to_normal.first) if fb.to_normal.to_a.length == 1 and di_at(fb.to_normal.first)
|
|
613
|
+
merge_blocks(fb.from_normal.first, fb) if fb.from_normal.to_a.length == 1 and di_at(fb.from_normal.first)
|
|
614
|
+
end
|
|
615
|
+
|
|
616
|
+
fb if not by.empty?
|
|
617
|
+
end
|
|
618
|
+
|
|
619
|
+
# undefine a sequence of decodedinstructions from an address
|
|
620
|
+
# stops at first non-linear branch
|
|
621
|
+
# removes @decoded, @comments, @xrefs, @addrs_done
|
|
622
|
+
# does not update @prog_binding (does not undefine labels)
|
|
623
|
+
def undefine_from(addr)
|
|
624
|
+
return if not di_at(addr)
|
|
625
|
+
@comment.delete addr if @function.delete addr
|
|
626
|
+
split_block(addr)
|
|
627
|
+
addrs = []
|
|
628
|
+
while di = di_at(addr)
|
|
629
|
+
di.block.list.each { |ddi| addrs << ddi.address }
|
|
630
|
+
break if di.block.to_subfuncret.to_a != [] or di.block.to_normal.to_a.length != 1
|
|
631
|
+
addr = di.block.to_normal.first
|
|
632
|
+
break if ndi = di_at(addr) and ndi.block.from_normal.to_a.length != 1
|
|
633
|
+
end
|
|
634
|
+
addrs.each { |a| @decoded.delete a }
|
|
635
|
+
@xrefs.delete_if { |a, x|
|
|
636
|
+
if not x.kind_of? Array
|
|
637
|
+
true if x and addrs.include? x.origin
|
|
638
|
+
else
|
|
639
|
+
x.delete_if { |xx| addrs.include? xx.origin }
|
|
640
|
+
true if x.empty?
|
|
641
|
+
end
|
|
642
|
+
}
|
|
643
|
+
@addrs_done.delete_if { |ad| !(addrs & [normalize(ad[0]), normalize(ad[1])]).empty? }
|
|
644
|
+
end
|
|
645
|
+
|
|
646
|
+
# merge two instruction blocks if they form a simple chain and are adjacent
|
|
647
|
+
# returns true if merged
|
|
648
|
+
def merge_blocks(b1, b2, allow_nonadjacent = false)
|
|
649
|
+
if b1 and not b1.kind_of? InstructionBlock
|
|
650
|
+
return if not b1 = block_at(b1)
|
|
651
|
+
end
|
|
652
|
+
if b2 and not b2.kind_of? InstructionBlock
|
|
653
|
+
return if not b2 = block_at(b2)
|
|
654
|
+
end
|
|
655
|
+
if b1 and b2 and (allow_nonadjacent or b1.list.last.next_addr == b2.address) and
|
|
656
|
+
b1.to_normal.to_a == [b2.address] and b2.from_normal.to_a.length == 1 and # that handles delay_slot
|
|
657
|
+
b1.to_subfuncret.to_a == [] and b2.from_subfuncret.to_a == [] and
|
|
658
|
+
b1.to_indirect.to_a == [] and b2.from_indirect.to_a == []
|
|
659
|
+
b2.list.each { |di| b1.add_di di }
|
|
660
|
+
b1.to_normal = b2.to_normal
|
|
661
|
+
b2.list.clear
|
|
662
|
+
@addrs_done.delete_if { |ad| normalize(ad[0]) == b2.address }
|
|
663
|
+
true
|
|
664
|
+
end
|
|
665
|
+
end
|
|
666
|
+
|
|
667
|
+
# computes the binding of a code sequence
|
|
668
|
+
# just a forwarder to CPU#code_binding
|
|
669
|
+
def code_binding(*a)
|
|
670
|
+
@cpu.code_binding(self, *a)
|
|
671
|
+
end
|
|
672
|
+
|
|
673
|
+
# returns an array of instructions/label that, once parsed and assembled, should
|
|
674
|
+
# give something equivalent to the code accessible from the (list of) entrypoints given
|
|
675
|
+
# from the @decoded dasm graph
|
|
676
|
+
# assume all jump targets have a matching label in @prog_binding
|
|
677
|
+
# may add inconditionnal jumps in the listing to preserve the code flow
|
|
678
|
+
def flatten_graph(entry, include_subfunc=true)
|
|
679
|
+
ret = []
|
|
680
|
+
entry = [entry] if not entry.kind_of? Array
|
|
681
|
+
todo = entry.map { |a| normalize(a) }
|
|
682
|
+
done = []
|
|
683
|
+
inv_binding = @prog_binding.invert
|
|
684
|
+
while addr = todo.pop
|
|
685
|
+
next if done.include? addr or not di_at(addr)
|
|
686
|
+
done << addr
|
|
687
|
+
b = @decoded[addr].block
|
|
688
|
+
|
|
689
|
+
ret << Label.new(inv_binding[addr]) if inv_binding[addr]
|
|
690
|
+
ret.concat b.list.map { |di| di.instruction }
|
|
691
|
+
|
|
692
|
+
b.each_to_otherfunc(self) { |to|
|
|
693
|
+
to = normalize to
|
|
694
|
+
todo.unshift to if include_subfunc
|
|
695
|
+
}
|
|
696
|
+
b.each_to_samefunc(self) { |to|
|
|
697
|
+
to = normalize to
|
|
698
|
+
todo << to
|
|
699
|
+
}
|
|
700
|
+
|
|
701
|
+
if not di = b.list[-1-@cpu.delay_slot] or not di.opcode.props[:stopexec] or di.opcode.props[:saveip]
|
|
702
|
+
to = b.list.last.next_addr
|
|
703
|
+
if todo.include? to
|
|
704
|
+
if done.include? to or not di_at(to)
|
|
705
|
+
if not to_l = inv_binding[to]
|
|
706
|
+
to_l = auto_label_at(to, 'loc')
|
|
707
|
+
if done.include? to and idx = ret.index(@decoded[to].block.list.first.instruction)
|
|
708
|
+
ret.insert(idx, Label.new(to_l))
|
|
709
|
+
end
|
|
710
|
+
end
|
|
711
|
+
ret << @cpu.instr_uncond_jump_to(to_l)
|
|
712
|
+
else
|
|
713
|
+
todo << to # ensure it's next in the listing
|
|
714
|
+
end
|
|
715
|
+
end
|
|
716
|
+
end
|
|
717
|
+
end
|
|
718
|
+
|
|
719
|
+
ret
|
|
720
|
+
end
|
|
721
|
+
|
|
722
|
+
# returns a demangled C++ name
|
|
723
|
+
# from wgcc-2.2.2/undecorate.cpp
|
|
724
|
+
# TODO
|
|
725
|
+
def demangle_cppname(name)
|
|
726
|
+
ret = name
|
|
727
|
+
if name[0] == ??
|
|
728
|
+
name = name[1..-1]
|
|
729
|
+
if name[0] == ??
|
|
730
|
+
name = name[1..-1]
|
|
731
|
+
op = name[0, 1]
|
|
732
|
+
op = name[0, 2] if op == '_'
|
|
733
|
+
if op = {
|
|
734
|
+
'2' => "new", '3' => "delete", '4' => "=", '5' => ">>", '6' => "<<", '7' => "!", '8' => "==", '9' => "!=",
|
|
735
|
+
'A' => "[]", 'C' => "->", 'D' => "*", 'E' => "++", 'F' => "--", 'G' => "-", 'H' => "+", 'I' => "&",
|
|
736
|
+
'J' => "->*", 'K' => "/", 'L' => "%", 'M' => "<", 'N' => "<=", 'O' => ">", 'P' => ">=", 'Q' => ",",
|
|
737
|
+
'R' => "()", 'S' => "~", 'T' => "^", 'U' => "|", 'V' => "&&", 'W' => "||", 'X' => "*=", 'Y' => "+=",
|
|
738
|
+
'Z' => "-=", '_0' => "/=", '_1' => "%=", '_2' => ">>=", '_3' => "<<=", '_4' => "&=", '_5' => "|=", '_6' => "^=",
|
|
739
|
+
'_7' => "`vftable'", '_8' => "`vbtable'", '_9' => "`vcall'", '_A' => "`typeof'", '_B' => "`local static guard'",
|
|
740
|
+
'_C' => "`string'", '_D' => "`vbase destructor'", '_E' => "`vector deleting destructor'", '_F' => "`default constructor closure'",
|
|
741
|
+
'_G' => "`scalar deleting destructor'", '_H' => "`vector constructor iterator'", '_I' => "`vector destructor iterator'",
|
|
742
|
+
'_J' => "`vector vbase constructor iterator'", '_K' => "`virtual displacement map'", '_L' => "`eh vector constructor iterator'",
|
|
743
|
+
'_M' => "`eh vector destructor iterator'", '_N' => "`eh vector vbase constructor iterator'", '_O' => "`copy constructor closure'",
|
|
744
|
+
'_S' => "`local vftable'", '_T' => "`local vftable constructor closure'", '_U' => "new[]", '_V' => "delete[]",
|
|
745
|
+
'_X' => "`placement delete closure'", '_Y' => "`placement delete[] closure'"}[op]
|
|
746
|
+
ret = op[0] == ?` ? op[1..-2] : "op_#{op}"
|
|
747
|
+
end
|
|
748
|
+
end
|
|
749
|
+
end
|
|
750
|
+
# TODO
|
|
751
|
+
ret
|
|
752
|
+
end
|
|
753
|
+
|
|
754
|
+
# scans all the sections raw for a given regexp
|
|
755
|
+
# return/yields all the addresses matching
|
|
756
|
+
# if yield returns nil/false, do not include the addr in the final result
|
|
757
|
+
# sections are scanned MB by MB, so this should work (slowly) on 4GB sections (eg debugger VM)
|
|
758
|
+
def pattern_scan(pat, chunksz=nil, margin=nil)
|
|
759
|
+
chunksz ||= 4*1024*1024 # scan 4MB at a time
|
|
760
|
+
margin ||= 65536 # add this much bytes at each chunk to find /pat/ over chunk boundaries
|
|
761
|
+
|
|
762
|
+
pat = Regexp.new(Regexp.escape(pat)) if pat.kind_of? ::String
|
|
763
|
+
|
|
764
|
+
found = []
|
|
765
|
+
@sections.each { |sec_addr, e|
|
|
766
|
+
e.pattern_scan(pat, chunksz, margin) { |eo|
|
|
767
|
+
match_addr = sec_addr + eo
|
|
768
|
+
found << match_addr if not block_given? or yield(match_addr)
|
|
769
|
+
false
|
|
770
|
+
}
|
|
771
|
+
}
|
|
772
|
+
found
|
|
773
|
+
end
|
|
774
|
+
|
|
775
|
+
# returns/yields [addr, string] found using pattern_scan /[\x20-\x7e]/
|
|
776
|
+
def strings_scan(minlen=6)
|
|
777
|
+
ret = []
|
|
778
|
+
nexto = 0
|
|
779
|
+
pattern_scan(/[\x20-\x7e]{#{minlen},}/m, nil, 1024) { |o|
|
|
780
|
+
if o - nexto > 0
|
|
781
|
+
next unless e = get_edata_at(o)
|
|
782
|
+
str = e.data[e.ptr, 1024][/[\x20-\x7e]{#{minlen},}/m]
|
|
783
|
+
ret << [o, str] if not block_given? or yield(o, str)
|
|
784
|
+
nexto = o + str.length
|
|
785
|
+
end
|
|
786
|
+
}
|
|
787
|
+
ret
|
|
788
|
+
end
|
|
789
|
+
|
|
790
|
+
# exports the addr => symbol map (see load_map)
|
|
791
|
+
def save_map
|
|
792
|
+
@prog_binding.map { |l, o|
|
|
793
|
+
type = di_at(o) ? 'c' : 'd' # XXX
|
|
794
|
+
o = o.to_s(16).rjust(8, '0') if o.kind_of? ::Integer
|
|
795
|
+
"#{o} #{type} #{l}"
|
|
796
|
+
}
|
|
797
|
+
end
|
|
798
|
+
|
|
799
|
+
# loads a map file (addr => symbol)
|
|
800
|
+
# off is an optionnal offset to add to every address found (for eg rebased binaries)
|
|
801
|
+
# understands:
|
|
802
|
+
# standard map files (eg linux-kernel.map: <addr> <type> <name>, e.g. 'c01001ba t setup_idt')
|
|
803
|
+
# ida map files (<sectionidx>:<sectionoffset> <name>)
|
|
804
|
+
# arg is either the map itself or the filename of the map (if it contains no newline)
|
|
805
|
+
def load_map(str, off=0)
|
|
806
|
+
str = File.read(str) rescue nil if not str.index("\n")
|
|
807
|
+
sks = @sections.keys.sort
|
|
808
|
+
str.each_line { |l|
|
|
809
|
+
case l.strip
|
|
810
|
+
when /^([0-9A-F]+)\s+(\w+)\s+(\w+)/i # kernel.map style
|
|
811
|
+
set_label_at($1.to_i(16)+off, $3)
|
|
812
|
+
when /^([0-9A-F]+):([0-9A-F]+)\s+([a-z_]\w+)/i # IDA style
|
|
813
|
+
# we do not have section load order, let's just hope that the addresses are sorted (and sortable..)
|
|
814
|
+
# could check the 1st part of the file, with section sizes, but it is not very convenient
|
|
815
|
+
# the regexp is so that we skip the 1st part with section descriptions
|
|
816
|
+
# in the file, section 1 is the 1st section ; we have an additionnal section (exe header) which fixes the 0-index
|
|
817
|
+
set_label_at(sks[$1.to_i(16)] + $2.to_i(16) + off, $3)
|
|
818
|
+
end
|
|
819
|
+
}
|
|
820
|
+
end
|
|
821
|
+
|
|
822
|
+
# saves the dasm state in a file
|
|
823
|
+
def save_file(file)
|
|
824
|
+
tmpfile = file + '.tmp'
|
|
825
|
+
File.open(tmpfile, 'wb') { |fd| save_io(fd) }
|
|
826
|
+
File.rename tmpfile, file
|
|
827
|
+
end
|
|
828
|
+
|
|
829
|
+
# saves the dasm state to an IO
|
|
830
|
+
def save_io(fd)
|
|
831
|
+
fd.puts 'Metasm.dasm'
|
|
832
|
+
|
|
833
|
+
if @program.filename
|
|
834
|
+
t = @program.filename.to_s
|
|
835
|
+
fd.puts "binarypath #{t.length}", t
|
|
836
|
+
else
|
|
837
|
+
t = "#{@cpu.class.name.sub(/.*::/, '')} #{@cpu.size} #{@cpu.endianness}"
|
|
838
|
+
fd.puts "cpu #{t.length}", t
|
|
839
|
+
# XXX will be reloaded as a Shellcode with this CPU, but it may be a custom EXE
|
|
840
|
+
end
|
|
841
|
+
|
|
842
|
+
@sections.each { |a, e|
|
|
843
|
+
# forget edata exports/relocs
|
|
844
|
+
# dump at most 16Mo per section
|
|
845
|
+
t = "#{Expression[a]} #{e.length}\n" +
|
|
846
|
+
[e.data[0, 2**24].to_str].pack('m*')
|
|
847
|
+
fd.puts "section #{t.length}", t
|
|
848
|
+
}
|
|
849
|
+
|
|
850
|
+
t = save_map.join("\n")
|
|
851
|
+
fd.puts "map #{t.length}", t
|
|
852
|
+
|
|
853
|
+
t = @decoded.map { |a, d|
|
|
854
|
+
next if not d.kind_of? DecodedInstruction
|
|
855
|
+
"#{Expression[a]},#{d.bin_length} #{d.instruction}#{" ; #{d.comment.join(' ')}" if d.comment}"
|
|
856
|
+
}.compact.sort.join("\n")
|
|
857
|
+
fd.puts "decoded #{t.length}", t
|
|
858
|
+
|
|
859
|
+
t = @comment.map { |a, c|
|
|
860
|
+
c.map { |l| l.chomp }.join("\n").split("\n").map { |lc| "#{Expression[a]} #{lc.chomp}" }
|
|
861
|
+
}.join("\n")
|
|
862
|
+
fd.puts "comment #{t.length}", t
|
|
863
|
+
|
|
864
|
+
bl = @decoded.values.map { |d|
|
|
865
|
+
d.block if d.kind_of? DecodedInstruction and d.block_head?
|
|
866
|
+
}.compact
|
|
867
|
+
t = bl.map { |b|
|
|
868
|
+
[Expression[b.address],
|
|
869
|
+
b.list.map { |d| Expression[d.address] }.join(','),
|
|
870
|
+
b.to_normal.to_a.map { |t_| Expression[t_] }.join(','),
|
|
871
|
+
b.to_subfuncret.to_a.map { |t_| Expression[t_] }.join(','),
|
|
872
|
+
b.to_indirect.to_a.map { |t_| Expression[t_] }.join(','),
|
|
873
|
+
b.from_normal.to_a.map { |t_| Expression[t_] }.join(','),
|
|
874
|
+
b.from_subfuncret.to_a.map { |t_| Expression[t_] }.join(','),
|
|
875
|
+
b.from_indirect.to_a.map { |t_| Expression[t_] }.join(','),
|
|
876
|
+
].join(';')
|
|
877
|
+
}.sort.join("\n")
|
|
878
|
+
fd.puts "blocks #{t.length}", t
|
|
879
|
+
|
|
880
|
+
t = @function.map { |a, f|
|
|
881
|
+
next if not @decoded[a]
|
|
882
|
+
[a, *f.return_address.to_a].map { |e| Expression[e] }.join(',')
|
|
883
|
+
}.compact.sort.join("\n")
|
|
884
|
+
# TODO binding ?
|
|
885
|
+
fd.puts "funcs #{t.length}", t
|
|
886
|
+
|
|
887
|
+
t = @xrefs.map { |a, x|
|
|
888
|
+
a = ':default' if a == :default
|
|
889
|
+
a = ':unknown' if a == Expression::Unknown
|
|
890
|
+
# XXX origin
|
|
891
|
+
case x
|
|
892
|
+
when nil
|
|
893
|
+
when Xref
|
|
894
|
+
[Expression[a], x.type, x.len, (Expression[x.origin] if x.origin)].join(',')
|
|
895
|
+
when Array
|
|
896
|
+
x.map { |x_| [Expression[a], x_.type, x_.len, (Expression[x_.origin] if x_.origin)].join(',') }
|
|
897
|
+
end
|
|
898
|
+
}.compact.join("\n")
|
|
899
|
+
fd.puts "xrefs #{t.length}", t
|
|
900
|
+
|
|
901
|
+
t = @c_parser.to_s
|
|
902
|
+
fd.puts "c #{t.length}", t
|
|
903
|
+
|
|
904
|
+
#t = bl.map { |b| b.backtracked_for }
|
|
905
|
+
#fd.puts "trace #{t.length}" , t
|
|
906
|
+
end
|
|
907
|
+
|
|
908
|
+
# loads a disassembler from a saved file
|
|
909
|
+
def self.load(str, &b)
|
|
910
|
+
d = new(nil, nil)
|
|
911
|
+
d.load(str, &b)
|
|
912
|
+
d
|
|
913
|
+
end
|
|
914
|
+
|
|
915
|
+
# loads the dasm state from a savefile content
|
|
916
|
+
# will yield unknown segments / binarypath notfound
|
|
917
|
+
def load(str)
|
|
918
|
+
raise 'Not a metasm save file' if str[0, 12].chomp != 'Metasm.dasm'
|
|
919
|
+
off = 12
|
|
920
|
+
pp = Preprocessor.new
|
|
921
|
+
app = AsmPreprocessor.new
|
|
922
|
+
while off < str.length
|
|
923
|
+
i = str.index("\n", off) || str.length
|
|
924
|
+
type, len = str[off..i].chomp.split
|
|
925
|
+
off = i+1
|
|
926
|
+
data = str[off, len.to_i]
|
|
927
|
+
off += len.to_i
|
|
928
|
+
case type
|
|
929
|
+
when nil, ''
|
|
930
|
+
when 'binarypath'
|
|
931
|
+
data = yield(type, data) if not File.exist? data and block_given?
|
|
932
|
+
reinitialize AutoExe.decode_file(data)
|
|
933
|
+
@program.disassembler = self
|
|
934
|
+
@program.init_disassembler
|
|
935
|
+
when 'cpu'
|
|
936
|
+
cpuname, size, endianness = data.split
|
|
937
|
+
cpu = Metasm.const_get(cpuname)
|
|
938
|
+
raise 'invalid cpu' if not cpu < CPU
|
|
939
|
+
cpu = cpu.new
|
|
940
|
+
cpu.size = size.to_i
|
|
941
|
+
cpu.endianness = endianness.to_sym
|
|
942
|
+
reinitialize Shellcode.new(cpu)
|
|
943
|
+
@program.disassembler = self
|
|
944
|
+
@program.init_disassembler
|
|
945
|
+
when 'section'
|
|
946
|
+
info = data[0, data.index("\n") || data.length]
|
|
947
|
+
data = data[info.length, data.length]
|
|
948
|
+
pp.feed!(info)
|
|
949
|
+
addr = Expression.parse(pp).reduce
|
|
950
|
+
len = Expression.parse(pp).reduce
|
|
951
|
+
edata = EncodedData.new(data.unpack('m*').first, :virtsize => len)
|
|
952
|
+
add_section(addr, edata)
|
|
953
|
+
when 'map'
|
|
954
|
+
load_map data
|
|
955
|
+
when 'decoded'
|
|
956
|
+
data.each_line { |l|
|
|
957
|
+
begin
|
|
958
|
+
next if l !~ /^([^,]*),(\d*) ([^;]*)(?:; (.*))?/
|
|
959
|
+
a, len, instr, cmt = $1, $2, $3, $4
|
|
960
|
+
a = Expression.parse(pp.feed!(a)).reduce
|
|
961
|
+
instr = @cpu.parse_instruction(app.feed!(instr))
|
|
962
|
+
di = DecodedInstruction.new(instr, a)
|
|
963
|
+
di.bin_length = len.to_i
|
|
964
|
+
di.add_comment cmt if cmt
|
|
965
|
+
@decoded[a] = di
|
|
966
|
+
rescue
|
|
967
|
+
puts "load: bad di #{l.inspect}" if $VERBOSE
|
|
968
|
+
end
|
|
969
|
+
}
|
|
970
|
+
when 'blocks'
|
|
971
|
+
data.each_line { |l|
|
|
972
|
+
bla = l.chomp.split(';').map { |sl| sl.split(',') }
|
|
973
|
+
begin
|
|
974
|
+
a = Expression.parse(pp.feed!(bla.shift[0])).reduce
|
|
975
|
+
b = InstructionBlock.new(a, get_section_at(a).to_a[0])
|
|
976
|
+
bla.shift.each { |e|
|
|
977
|
+
a = Expression.parse(pp.feed!(e)).reduce
|
|
978
|
+
b.add_di(@decoded[a])
|
|
979
|
+
}
|
|
980
|
+
bla.zip([:to_normal, :to_subfuncret, :to_indirect, :from_normal, :from_subfuncret, :from_indirect]).each { |l_, s|
|
|
981
|
+
b.send("#{s}=", l_.map { |e| Expression.parse(pp.feed!(e)).reduce }) if not l_.empty?
|
|
982
|
+
}
|
|
983
|
+
rescue
|
|
984
|
+
puts "load: bad block #{l.inspect}" if $VERBOSE
|
|
985
|
+
end
|
|
986
|
+
}
|
|
987
|
+
when 'funcs'
|
|
988
|
+
data.each_line { |l|
|
|
989
|
+
begin
|
|
990
|
+
a, *r = l.split(',').map { |e| Expression.parse(pp.feed!(e)).reduce }
|
|
991
|
+
@function[a] = DecodedFunction.new
|
|
992
|
+
@function[a].return_address = r if not r.empty?
|
|
993
|
+
@function[a].finalized = true
|
|
994
|
+
# TODO
|
|
995
|
+
rescue
|
|
996
|
+
puts "load: bad function #{l.inspect} #$!" if $VERBOSE
|
|
997
|
+
end
|
|
998
|
+
}
|
|
999
|
+
when 'comment'
|
|
1000
|
+
data.each_line { |l|
|
|
1001
|
+
begin
|
|
1002
|
+
a, c = l.split(' ', 2)
|
|
1003
|
+
a = Expression.parse(pp.feed!(a)).reduce
|
|
1004
|
+
@comment[a] ||= []
|
|
1005
|
+
@comment[a] |= [c]
|
|
1006
|
+
rescue
|
|
1007
|
+
puts "load: bad comment #{l.inspect} #$!" if $VERBOSE
|
|
1008
|
+
end
|
|
1009
|
+
}
|
|
1010
|
+
when 'c'
|
|
1011
|
+
begin
|
|
1012
|
+
# TODO parse_invalid_c, split per function, whatever
|
|
1013
|
+
parse_c('')
|
|
1014
|
+
@c_parser.allow_bad_c = true
|
|
1015
|
+
parse_c(data, 'savefile#c')
|
|
1016
|
+
rescue
|
|
1017
|
+
puts "load: bad C: #$!", $!.backtrace if $VERBOSE
|
|
1018
|
+
end
|
|
1019
|
+
@c_parser.readtok until @c_parser.eos? if @c_parser
|
|
1020
|
+
when 'xrefs'
|
|
1021
|
+
data.each_line { |l|
|
|
1022
|
+
begin
|
|
1023
|
+
a, t, len, o = l.chomp.split(',')
|
|
1024
|
+
case a
|
|
1025
|
+
when ':default'; a = :default
|
|
1026
|
+
when ':unknown'; a = Expression::Unknown
|
|
1027
|
+
else a = Expression.parse(pp.feed!(a)).reduce
|
|
1028
|
+
end
|
|
1029
|
+
t = (t.empty? ? nil : t.to_sym)
|
|
1030
|
+
len = (len != '' ? len.to_i : nil)
|
|
1031
|
+
o = (o.to_s != '' ? Expression.parse(pp.feed!(o)).reduce : nil) # :default/:unknown ?
|
|
1032
|
+
add_xref(a, Xref.new(t, o, len))
|
|
1033
|
+
rescue
|
|
1034
|
+
puts "load: bad xref #{l.inspect} #$!" if $VERBOSE
|
|
1035
|
+
end
|
|
1036
|
+
}
|
|
1037
|
+
#when 'trace'
|
|
1038
|
+
else
|
|
1039
|
+
if block_given?
|
|
1040
|
+
yield(type, data)
|
|
1041
|
+
else
|
|
1042
|
+
puts "load: unsupported section #{type.inspect}" if $VERBOSE
|
|
1043
|
+
end
|
|
1044
|
+
end
|
|
1045
|
+
end
|
|
1046
|
+
end
|
|
1047
|
+
|
|
1048
|
+
# change the base address of the loaded binary
|
|
1049
|
+
# better done early (before disassembling anything)
|
|
1050
|
+
# returns the delta
|
|
1051
|
+
def rebase(newaddr)
|
|
1052
|
+
rebase_delta(newaddr - @sections.keys.min)
|
|
1053
|
+
end
|
|
1054
|
+
|
|
1055
|
+
def rebase_delta(delta)
|
|
1056
|
+
fix = lambda { |a|
|
|
1057
|
+
case a
|
|
1058
|
+
when Array
|
|
1059
|
+
a.map! { |e| fix[e] }
|
|
1060
|
+
when Hash
|
|
1061
|
+
tmp = {}
|
|
1062
|
+
a.each { |k, v| tmp[fix[k]] = v }
|
|
1063
|
+
a.replace tmp
|
|
1064
|
+
when Integer
|
|
1065
|
+
a += delta
|
|
1066
|
+
when BacktraceTrace
|
|
1067
|
+
a.origin = fix[a.origin]
|
|
1068
|
+
a.address = fix[a.address]
|
|
1069
|
+
end
|
|
1070
|
+
a
|
|
1071
|
+
}
|
|
1072
|
+
|
|
1073
|
+
fix[@sections]
|
|
1074
|
+
fix[@decoded]
|
|
1075
|
+
fix[@xrefs]
|
|
1076
|
+
fix[@function]
|
|
1077
|
+
fix[@addrs_todo]
|
|
1078
|
+
fix[@addrs_done]
|
|
1079
|
+
fix[@comment]
|
|
1080
|
+
@prog_binding.each_key { |k| @prog_binding[k] = fix[@prog_binding[k]] }
|
|
1081
|
+
@old_prog_binding.each_key { |k| @old_prog_binding[k] = fix[@old_prog_binding[k]] }
|
|
1082
|
+
@label_alias_cache = nil
|
|
1083
|
+
|
|
1084
|
+
@decoded.values.grep(DecodedInstruction).each { |di|
|
|
1085
|
+
if di.block_head?
|
|
1086
|
+
b = di.block
|
|
1087
|
+
b.address += delta
|
|
1088
|
+
fix[b.to_normal]
|
|
1089
|
+
fix[b.to_subfuncret]
|
|
1090
|
+
fix[b.to_indirect]
|
|
1091
|
+
fix[b.from_normal]
|
|
1092
|
+
fix[b.from_subfuncret]
|
|
1093
|
+
fix[b.from_indirect]
|
|
1094
|
+
fix[b.backtracked_for]
|
|
1095
|
+
end
|
|
1096
|
+
di.address = fix[di.address]
|
|
1097
|
+
di.next_addr = fix[di.next_addr]
|
|
1098
|
+
}
|
|
1099
|
+
@function.each_value { |f|
|
|
1100
|
+
f.return_address = fix[f.return_address]
|
|
1101
|
+
fix[f.backtracked_for]
|
|
1102
|
+
}
|
|
1103
|
+
@xrefs.values.flatten.compact.each { |x| x.origin = fix[x.origin] }
|
|
1104
|
+
delta
|
|
1105
|
+
end
|
|
1106
|
+
|
|
1107
|
+
# change Expression display mode for current object o to display integers as char constants
|
|
1108
|
+
def toggle_expr_char(o)
|
|
1109
|
+
return if not o.kind_of? Renderable
|
|
1110
|
+
o.each_expr { |e|
|
|
1111
|
+
e.render_info ||= {}
|
|
1112
|
+
e.render_info[:char] = e.render_info[:char] ? nil : @cpu.endianness
|
|
1113
|
+
}
|
|
1114
|
+
end
|
|
1115
|
+
|
|
1116
|
+
# patch Expressions in current object to include label names when available
|
|
1117
|
+
# XXX should we also create labels ?
|
|
1118
|
+
def toggle_expr_offset(o)
|
|
1119
|
+
return if not o.kind_of? Renderable
|
|
1120
|
+
o.each_expr { |e|
|
|
1121
|
+
if n = @prog_binding[e.lexpr]
|
|
1122
|
+
e.lexpr = n
|
|
1123
|
+
elsif e.lexpr.kind_of? ::Integer and n = get_label_at(e.lexpr)
|
|
1124
|
+
add_xref(normalize(e.lexpr), Xref.new(:addr, o.address)) if o.respond_to? :address
|
|
1125
|
+
e.lexpr = n
|
|
1126
|
+
end
|
|
1127
|
+
if n = @prog_binding[e.rexpr]
|
|
1128
|
+
e.rexpr = n
|
|
1129
|
+
elsif e.rexpr.kind_of? ::Integer and n = get_label_at(e.rexpr)
|
|
1130
|
+
add_xref(normalize(e.rexpr), Xref.new(:addr, o.address)) if o.respond_to? :address
|
|
1131
|
+
e.rexpr = n
|
|
1132
|
+
end
|
|
1133
|
+
}
|
|
1134
|
+
end
|
|
1135
|
+
|
|
1136
|
+
# call this function on a function entrypoint if the function is in fact a __noreturn
|
|
1137
|
+
# will cut the to_subfuncret of callers
|
|
1138
|
+
def fix_noreturn(o)
|
|
1139
|
+
each_xref(o, :x) { |a|
|
|
1140
|
+
a = normalize(a.origin)
|
|
1141
|
+
next if not di = di_at(a) or not di.opcode.props[:saveip]
|
|
1142
|
+
# XXX should check if caller also becomes __noreturn
|
|
1143
|
+
di.block.each_to_subfuncret { |to|
|
|
1144
|
+
next if not tdi = di_at(to) or not tdi.block.from_subfuncret
|
|
1145
|
+
tdi.block.from_subfuncret.delete_if { |aa| normalize(aa) == di.address }
|
|
1146
|
+
tdi.block.from_subfuncret = nil if tdi.block.from_subfuncret.empty?
|
|
1147
|
+
}
|
|
1148
|
+
di.block.to_subfuncret = nil
|
|
1149
|
+
}
|
|
1150
|
+
end
|
|
1151
|
+
|
|
1152
|
+
# find the addresses of calls calling the address, handles thunks
|
|
1153
|
+
def call_sites(funcaddr)
|
|
1154
|
+
find_call_site = proc { |a|
|
|
1155
|
+
until not di = di_at(a)
|
|
1156
|
+
if di.opcode.props[:saveip]
|
|
1157
|
+
cs = di.address
|
|
1158
|
+
break
|
|
1159
|
+
end
|
|
1160
|
+
if di.block.from_subfuncret.to_a.first
|
|
1161
|
+
while di.block.from_subfuncret.to_a.length == 1
|
|
1162
|
+
a = di.block.from_subfuncret[0]
|
|
1163
|
+
break if not di_at(a)
|
|
1164
|
+
a = @decoded[a].block.list.first.address
|
|
1165
|
+
di = @decoded[a]
|
|
1166
|
+
end
|
|
1167
|
+
end
|
|
1168
|
+
break if di.block.from_subfuncret.to_a.first
|
|
1169
|
+
break if di.block.from_normal.to_a.length != 1
|
|
1170
|
+
a = di.block.from_normal.first
|
|
1171
|
+
end
|
|
1172
|
+
cs
|
|
1173
|
+
}
|
|
1174
|
+
ret = []
|
|
1175
|
+
each_xref(normalize(funcaddr), :x) { |a|
|
|
1176
|
+
ret << find_call_site[a.origin]
|
|
1177
|
+
}
|
|
1178
|
+
ret.compact.uniq
|
|
1179
|
+
end
|
|
1180
|
+
|
|
1181
|
+
# loads a disassembler plugin script
|
|
1182
|
+
# this is simply a ruby script instance_eval() in the disassembler
|
|
1183
|
+
# the filename argument is autocompleted with '.rb' suffix, and also
|
|
1184
|
+
# searched for in the Metasmdir/samples/dasm-plugins subdirectory if not found in cwd
|
|
1185
|
+
def load_plugin(plugin_filename)
|
|
1186
|
+
if not File.exist?(plugin_filename)
|
|
1187
|
+
if File.exist?(plugin_filename+'.rb')
|
|
1188
|
+
plugin_filename += '.rb'
|
|
1189
|
+
elsif defined? Metasmdir
|
|
1190
|
+
# try autocomplete
|
|
1191
|
+
pf = File.join(Metasmdir, 'samples', 'dasm-plugins', plugin_filename)
|
|
1192
|
+
if File.exist? pf
|
|
1193
|
+
plugin_filename = pf
|
|
1194
|
+
elsif File.exist? pf + '.rb'
|
|
1195
|
+
plugin_filename = pf + '.rb'
|
|
1196
|
+
end
|
|
1197
|
+
end
|
|
1198
|
+
end
|
|
1199
|
+
|
|
1200
|
+
instance_eval File.read(plugin_filename)
|
|
1201
|
+
end
|
|
1202
|
+
|
|
1203
|
+
# same as load_plugin, but hides the @gui attribute while loading, preventing the plugin do popup stuff
|
|
1204
|
+
# this is useful when you want to load a plugin from another plugin to enhance the plugin's functionnality
|
|
1205
|
+
# XXX this also prevents setting up kbd_callbacks etc..
|
|
1206
|
+
def load_plugin_nogui(plugin_filename)
|
|
1207
|
+
oldgui = gui
|
|
1208
|
+
@gui = nil
|
|
1209
|
+
load_plugin(plugin_filename)
|
|
1210
|
+
ensure
|
|
1211
|
+
@gui = oldgui
|
|
1212
|
+
end
|
|
1213
|
+
|
|
1214
|
+
# compose two code/instruction's backtrace_binding
|
|
1215
|
+
# assumes bd1 is followed by bd2 in the code flow
|
|
1216
|
+
# eg inc edi + push edi =>
|
|
1217
|
+
# { Ind[:esp, 4] => Expr[:edi + 1], :esp => Expr[:esp - 4], :edi => Expr[:edi + 1] }
|
|
1218
|
+
# XXX if bd1 writes to memory with a pointer that is reused in bd2, this function has to
|
|
1219
|
+
# revert the change made by bd2, which only works with simple ptr addition now
|
|
1220
|
+
# XXX unhandled situations may be resolved using :unknown, or by returning incorrect values
|
|
1221
|
+
def compose_bt_binding(bd1, bd2)
|
|
1222
|
+
if bd1.kind_of? DecodedInstruction
|
|
1223
|
+
bd1 = bd1.backtrace_binding ||= cpu.get_backtrace_binding(bd1)
|
|
1224
|
+
end
|
|
1225
|
+
if bd2.kind_of? DecodedInstruction
|
|
1226
|
+
bd2 = bd2.backtrace_binding ||= cpu.get_backtrace_binding(bd2)
|
|
1227
|
+
end
|
|
1228
|
+
|
|
1229
|
+
reduce = lambda { |e| Expression[Expression[e].reduce] }
|
|
1230
|
+
|
|
1231
|
+
bd = {}
|
|
1232
|
+
|
|
1233
|
+
bd2.each { |k, v|
|
|
1234
|
+
bd[k] = reduce[v.bind(bd1)]
|
|
1235
|
+
}
|
|
1236
|
+
|
|
1237
|
+
# for each pointer appearing in keys of bd1, we must infer from bd2 what final
|
|
1238
|
+
# pointers should appear in bd
|
|
1239
|
+
# eg 'mov [eax], 0 mov ebx, eax' => { [eax] <- 0, [ebx] <- 0, ebx <- eax }
|
|
1240
|
+
bd1.each { |k, v|
|
|
1241
|
+
if k.kind_of? Indirection
|
|
1242
|
+
done = false
|
|
1243
|
+
k.pointer.externals.each { |e|
|
|
1244
|
+
# XXX this will break on nontrivial pointers or bd2
|
|
1245
|
+
bd2.each { |k2, v2|
|
|
1246
|
+
# we dont want to invert computation of flag_zero/carry etc (booh)
|
|
1247
|
+
next if k2.to_s =~ /flag/
|
|
1248
|
+
|
|
1249
|
+
# discard indirection etc, result would be too complex / not useful
|
|
1250
|
+
next if not Expression[v2].expr_externals.include? e
|
|
1251
|
+
|
|
1252
|
+
done = true
|
|
1253
|
+
|
|
1254
|
+
# try to reverse the computation made upon 'e'
|
|
1255
|
+
# only simple addition handled here
|
|
1256
|
+
ptr = reduce[k.pointer.bind(e => Expression[[k2, :-, v2], :+, e])]
|
|
1257
|
+
|
|
1258
|
+
# if bd2 does not rewrite e, duplicate the original pointer
|
|
1259
|
+
if not bd2[e]
|
|
1260
|
+
bd[k] ||= reduce[v]
|
|
1261
|
+
|
|
1262
|
+
# here we should not see 'e' in ptr anymore
|
|
1263
|
+
ptr = Expression::Unknown if ptr.externals.include? e
|
|
1264
|
+
else
|
|
1265
|
+
# cant check if add reversion was successful..
|
|
1266
|
+
end
|
|
1267
|
+
|
|
1268
|
+
bd[Indirection[reduce[ptr], k.len]] ||= reduce[v]
|
|
1269
|
+
}
|
|
1270
|
+
}
|
|
1271
|
+
bd[k] ||= reduce[v] if not done
|
|
1272
|
+
else
|
|
1273
|
+
bd[k] ||= reduce[v]
|
|
1274
|
+
end
|
|
1275
|
+
}
|
|
1276
|
+
|
|
1277
|
+
bd
|
|
1278
|
+
end
|
|
1279
|
+
end
|
|
1280
|
+
end
|