metasm 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/BUGS +11 -0
- data/CREDITS +17 -0
- data/README +270 -0
- data/TODO +114 -0
- data/doc/code_organisation.txt +146 -0
- data/doc/const_missing.txt +16 -0
- data/doc/core_classes.txt +75 -0
- data/doc/feature_list.txt +53 -0
- data/doc/index.txt +59 -0
- data/doc/install_notes.txt +170 -0
- data/doc/style.css +3 -0
- data/doc/use_cases.txt +18 -0
- data/lib/metasm.rb +80 -0
- data/lib/metasm/arm.rb +12 -0
- data/lib/metasm/arm/debug.rb +39 -0
- data/lib/metasm/arm/decode.rb +167 -0
- data/lib/metasm/arm/encode.rb +77 -0
- data/lib/metasm/arm/main.rb +75 -0
- data/lib/metasm/arm/opcodes.rb +177 -0
- data/lib/metasm/arm/parse.rb +130 -0
- data/lib/metasm/arm/render.rb +55 -0
- data/lib/metasm/compile_c.rb +1457 -0
- data/lib/metasm/dalvik.rb +8 -0
- data/lib/metasm/dalvik/decode.rb +196 -0
- data/lib/metasm/dalvik/main.rb +60 -0
- data/lib/metasm/dalvik/opcodes.rb +366 -0
- data/lib/metasm/decode.rb +213 -0
- data/lib/metasm/decompile.rb +2659 -0
- data/lib/metasm/disassemble.rb +2068 -0
- data/lib/metasm/disassemble_api.rb +1280 -0
- data/lib/metasm/dynldr.rb +1329 -0
- data/lib/metasm/encode.rb +333 -0
- data/lib/metasm/exe_format/a_out.rb +194 -0
- data/lib/metasm/exe_format/autoexe.rb +82 -0
- data/lib/metasm/exe_format/bflt.rb +189 -0
- data/lib/metasm/exe_format/coff.rb +455 -0
- data/lib/metasm/exe_format/coff_decode.rb +901 -0
- data/lib/metasm/exe_format/coff_encode.rb +1078 -0
- data/lib/metasm/exe_format/dex.rb +457 -0
- data/lib/metasm/exe_format/dol.rb +145 -0
- data/lib/metasm/exe_format/elf.rb +923 -0
- data/lib/metasm/exe_format/elf_decode.rb +979 -0
- data/lib/metasm/exe_format/elf_encode.rb +1375 -0
- data/lib/metasm/exe_format/macho.rb +827 -0
- data/lib/metasm/exe_format/main.rb +228 -0
- data/lib/metasm/exe_format/mz.rb +164 -0
- data/lib/metasm/exe_format/nds.rb +172 -0
- data/lib/metasm/exe_format/pe.rb +437 -0
- data/lib/metasm/exe_format/serialstruct.rb +246 -0
- data/lib/metasm/exe_format/shellcode.rb +114 -0
- data/lib/metasm/exe_format/xcoff.rb +167 -0
- data/lib/metasm/gui.rb +23 -0
- data/lib/metasm/gui/cstruct.rb +373 -0
- data/lib/metasm/gui/dasm_coverage.rb +199 -0
- data/lib/metasm/gui/dasm_decomp.rb +369 -0
- data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
- data/lib/metasm/gui/dasm_graph.rb +1354 -0
- data/lib/metasm/gui/dasm_hex.rb +543 -0
- data/lib/metasm/gui/dasm_listing.rb +599 -0
- data/lib/metasm/gui/dasm_main.rb +906 -0
- data/lib/metasm/gui/dasm_opcodes.rb +291 -0
- data/lib/metasm/gui/debug.rb +1228 -0
- data/lib/metasm/gui/gtk.rb +884 -0
- data/lib/metasm/gui/qt.rb +495 -0
- data/lib/metasm/gui/win32.rb +3004 -0
- data/lib/metasm/gui/x11.rb +621 -0
- data/lib/metasm/ia32.rb +14 -0
- data/lib/metasm/ia32/compile_c.rb +1523 -0
- data/lib/metasm/ia32/debug.rb +193 -0
- data/lib/metasm/ia32/decode.rb +1167 -0
- data/lib/metasm/ia32/decompile.rb +564 -0
- data/lib/metasm/ia32/encode.rb +314 -0
- data/lib/metasm/ia32/main.rb +233 -0
- data/lib/metasm/ia32/opcodes.rb +872 -0
- data/lib/metasm/ia32/parse.rb +327 -0
- data/lib/metasm/ia32/render.rb +91 -0
- data/lib/metasm/main.rb +1193 -0
- data/lib/metasm/mips.rb +11 -0
- data/lib/metasm/mips/compile_c.rb +7 -0
- data/lib/metasm/mips/decode.rb +253 -0
- data/lib/metasm/mips/encode.rb +51 -0
- data/lib/metasm/mips/main.rb +72 -0
- data/lib/metasm/mips/opcodes.rb +443 -0
- data/lib/metasm/mips/parse.rb +51 -0
- data/lib/metasm/mips/render.rb +43 -0
- data/lib/metasm/os/gnu_exports.rb +270 -0
- data/lib/metasm/os/linux.rb +1112 -0
- data/lib/metasm/os/main.rb +1686 -0
- data/lib/metasm/os/remote.rb +527 -0
- data/lib/metasm/os/windows.rb +2027 -0
- data/lib/metasm/os/windows_exports.rb +745 -0
- data/lib/metasm/parse.rb +876 -0
- data/lib/metasm/parse_c.rb +3938 -0
- data/lib/metasm/pic16c/decode.rb +42 -0
- data/lib/metasm/pic16c/main.rb +17 -0
- data/lib/metasm/pic16c/opcodes.rb +68 -0
- data/lib/metasm/ppc.rb +11 -0
- data/lib/metasm/ppc/decode.rb +264 -0
- data/lib/metasm/ppc/decompile.rb +251 -0
- data/lib/metasm/ppc/encode.rb +51 -0
- data/lib/metasm/ppc/main.rb +129 -0
- data/lib/metasm/ppc/opcodes.rb +410 -0
- data/lib/metasm/ppc/parse.rb +52 -0
- data/lib/metasm/preprocessor.rb +1277 -0
- data/lib/metasm/render.rb +130 -0
- data/lib/metasm/sh4.rb +8 -0
- data/lib/metasm/sh4/decode.rb +336 -0
- data/lib/metasm/sh4/main.rb +292 -0
- data/lib/metasm/sh4/opcodes.rb +381 -0
- data/lib/metasm/x86_64.rb +12 -0
- data/lib/metasm/x86_64/compile_c.rb +1025 -0
- data/lib/metasm/x86_64/debug.rb +59 -0
- data/lib/metasm/x86_64/decode.rb +268 -0
- data/lib/metasm/x86_64/encode.rb +264 -0
- data/lib/metasm/x86_64/main.rb +135 -0
- data/lib/metasm/x86_64/opcodes.rb +118 -0
- data/lib/metasm/x86_64/parse.rb +68 -0
- data/misc/bottleneck.rb +61 -0
- data/misc/cheader-findpppath.rb +58 -0
- data/misc/hexdiff.rb +74 -0
- data/misc/hexdump.rb +55 -0
- data/misc/metasm-all.rb +13 -0
- data/misc/objdiff.rb +47 -0
- data/misc/objscan.rb +40 -0
- data/misc/pdfparse.rb +661 -0
- data/misc/ppc_pdf2oplist.rb +192 -0
- data/misc/tcp_proxy_hex.rb +84 -0
- data/misc/txt2html.rb +440 -0
- data/samples/a.out.rb +31 -0
- data/samples/asmsyntax.rb +77 -0
- data/samples/bindiff.rb +555 -0
- data/samples/compilation-steps.rb +49 -0
- data/samples/cparser_makestackoffset.rb +55 -0
- data/samples/dasm-backtrack.rb +38 -0
- data/samples/dasmnavig.rb +318 -0
- data/samples/dbg-apihook.rb +228 -0
- data/samples/dbghelp.rb +143 -0
- data/samples/disassemble-gui.rb +102 -0
- data/samples/disassemble.rb +133 -0
- data/samples/dump_upx.rb +95 -0
- data/samples/dynamic_ruby.rb +1929 -0
- data/samples/elf_list_needed.rb +46 -0
- data/samples/elf_listexports.rb +33 -0
- data/samples/elfencode.rb +25 -0
- data/samples/exeencode.rb +128 -0
- data/samples/factorize-headers-elfimports.rb +77 -0
- data/samples/factorize-headers-peimports.rb +109 -0
- data/samples/factorize-headers.rb +43 -0
- data/samples/gdbclient.rb +583 -0
- data/samples/generate_libsigs.rb +102 -0
- data/samples/hotfix_gtk_dbg.rb +59 -0
- data/samples/install_win_env.rb +78 -0
- data/samples/lindebug.rb +924 -0
- data/samples/linux_injectsyscall.rb +95 -0
- data/samples/machoencode.rb +31 -0
- data/samples/metasm-shell.rb +91 -0
- data/samples/pe-hook.rb +69 -0
- data/samples/pe-ia32-cpuid.rb +203 -0
- data/samples/pe-mips.rb +35 -0
- data/samples/pe-shutdown.rb +78 -0
- data/samples/pe-testrelocs.rb +51 -0
- data/samples/pe-testrsrc.rb +24 -0
- data/samples/pe_listexports.rb +31 -0
- data/samples/peencode.rb +19 -0
- data/samples/peldr.rb +494 -0
- data/samples/preprocess-flatten.rb +19 -0
- data/samples/r0trace.rb +308 -0
- data/samples/rubstop.rb +399 -0
- data/samples/scan_pt_gnu_stack.rb +54 -0
- data/samples/scanpeexports.rb +62 -0
- data/samples/shellcode-c.rb +40 -0
- data/samples/shellcode-dynlink.rb +146 -0
- data/samples/source.asm +34 -0
- data/samples/struct_offset.rb +47 -0
- data/samples/testpe.rb +32 -0
- data/samples/testraw.rb +45 -0
- data/samples/win32genloader.rb +132 -0
- data/samples/win32hooker-advanced.rb +169 -0
- data/samples/win32hooker.rb +96 -0
- data/samples/win32livedasm.rb +33 -0
- data/samples/win32remotescan.rb +133 -0
- data/samples/wintrace.rb +92 -0
- data/tests/all.rb +8 -0
- data/tests/dasm.rb +39 -0
- data/tests/dynldr.rb +35 -0
- data/tests/encodeddata.rb +132 -0
- data/tests/ia32.rb +82 -0
- data/tests/mips.rb +116 -0
- data/tests/parse_c.rb +239 -0
- data/tests/preprocessor.rb +269 -0
- data/tests/x86_64.rb +62 -0
- metadata +255 -0
@@ -0,0 +1,2068 @@
|
|
1
|
+
# This file is part of Metasm, the Ruby assembly manipulation suite
|
2
|
+
# Copyright (C) 2006-2009 Yoann GUILLOT
|
3
|
+
#
|
4
|
+
# Licence is LGPL, see LICENCE in the top-level directory
|
5
|
+
|
6
|
+
|
7
|
+
require 'metasm/decode'
|
8
|
+
|
9
|
+
|
10
|
+
module Metasm
|
11
|
+
# holds information for decoded instructions: the original opcode, a pointer to the InstructionBlock, etc
|
12
|
+
class DecodedInstruction
|
13
|
+
# the instance of InstructionBlock this di is into
|
14
|
+
attr_accessor :block
|
15
|
+
# our offset (in bytes) from the start of the block, used only for hexdump
|
16
|
+
attr_accessor :block_offset
|
17
|
+
# the address of the instruction's first byte in memory
|
18
|
+
attr_accessor :address
|
19
|
+
# the disassembled data
|
20
|
+
attr_accessor :instruction, :opcode
|
21
|
+
# our, length in bytes
|
22
|
+
attr_accessor :bin_length
|
23
|
+
# array of arbitrary strings
|
24
|
+
attr_accessor :comment
|
25
|
+
# a cache of the binding used by the backtracker to emulate this instruction
|
26
|
+
attr_accessor :backtrace_binding
|
27
|
+
|
28
|
+
# create a new DecodedInstruction with an Instruction whose cpu is the argument
|
29
|
+
# can take an existing Instruction as argument
|
30
|
+
def initialize(arg, addr=nil)
|
31
|
+
case arg
|
32
|
+
when Instruction
|
33
|
+
@instruction = arg
|
34
|
+
@opcode = @instruction.cpu.opcode_list.find { |op| op.name == @instruction.opname } if @instruction.cpu
|
35
|
+
else @instruction = Instruction.new(arg)
|
36
|
+
end
|
37
|
+
@bin_length = 0
|
38
|
+
@address = addr if addr
|
39
|
+
end
|
40
|
+
|
41
|
+
def next_addr=(a) @next_addr = a end
|
42
|
+
def next_addr
|
43
|
+
(@next_addr ||= nil) || (address + @bin_length) if address
|
44
|
+
end
|
45
|
+
|
46
|
+
def show
|
47
|
+
if block
|
48
|
+
bin = @block.edata.data[@block.edata_ptr+@block_offset, @bin_length].unpack('C*').map { |c| '%02x' % c }.join
|
49
|
+
if @bin_length > 12
|
50
|
+
bin = bin[0, 20] + "..<+#{@bin_length-10}>"
|
51
|
+
end
|
52
|
+
" #{@instruction.to_s.ljust(44)} ; @#{Expression[address]} #{bin} #{@comment.sort[0,6].join(' ') if comment}"
|
53
|
+
else
|
54
|
+
"#{@instruction}#{' ; ' + @comment.join(' ') if comment}"
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
include Renderable
|
59
|
+
def render
|
60
|
+
ret = []
|
61
|
+
ret << Expression[address] << ' ' if address
|
62
|
+
ret << @instruction
|
63
|
+
ret << ' ; ' << @comment if comment
|
64
|
+
ret
|
65
|
+
end
|
66
|
+
|
67
|
+
def add_comment(c)
|
68
|
+
@comment ||= []
|
69
|
+
@comment |= [c]
|
70
|
+
end
|
71
|
+
|
72
|
+
# returns a copy of the DecInstr, with duplicated #instruction ("deep_copy")
|
73
|
+
def dup
|
74
|
+
new = super()
|
75
|
+
new.instruction = @instruction.dup
|
76
|
+
new
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
# holds information on a backtracked expression near begin and end of instruction blocks (#backtracked_for)
|
81
|
+
class BacktraceTrace
|
82
|
+
# address of the instruction in the block from which rebacktrace should start (use with from_subfuncret bool)
|
83
|
+
# address is nil if the backtrace is from block start
|
84
|
+
# exclude_instr is a bool saying if the backtrace should start at address or at the preceding instruction
|
85
|
+
# these are optional: if absent, expr is to be rebacktracked when a new codepath arrives at the beginning of the block
|
86
|
+
attr_accessor :address, :from_subfuncret, :exclude_instr
|
87
|
+
# address of the instruction that initiated the backtrace
|
88
|
+
attr_accessor :origin
|
89
|
+
# the Expression to backtrace at this point
|
90
|
+
attr_accessor :expr
|
91
|
+
# the original backtracked Expression
|
92
|
+
attr_accessor :orig_expr
|
93
|
+
# length of r/w xref (in bytes)
|
94
|
+
attr_accessor :len
|
95
|
+
# :r/:w/:x
|
96
|
+
attr_accessor :type
|
97
|
+
# bool: true if this maps to a :x that should not have a from when resolved
|
98
|
+
attr_accessor :detached
|
99
|
+
# maxdepth at the point of the object creation
|
100
|
+
attr_accessor :maxdepth
|
101
|
+
|
102
|
+
def initialize(expr, origin, orig_expr, type, len=nil, maxdepth=nil)
|
103
|
+
@expr, @origin, @orig_expr, @type = expr, origin, orig_expr, type
|
104
|
+
@len = len if len
|
105
|
+
@maxdepth = maxdepth if maxdepth
|
106
|
+
end
|
107
|
+
|
108
|
+
def hash ; [origin, expr].hash ; end
|
109
|
+
def eql?(o)
|
110
|
+
o.class == self.class and
|
111
|
+
[ address, from_subfuncret, exclude_instr, origin, orig_expr, len, type, detached] ==
|
112
|
+
[o.address, o.from_subfuncret, o.exclude_instr, o.origin, o.orig_expr, o.len, o.type, o.detached]
|
113
|
+
end
|
114
|
+
alias == eql?
|
115
|
+
end
|
116
|
+
|
117
|
+
# a cross-reference, tracks read/write/execute memory accesses by decoded instructions
|
118
|
+
class Xref
|
119
|
+
# :r/:w/:x
|
120
|
+
attr_accessor :type
|
121
|
+
# length of r/w (in bytes)
|
122
|
+
attr_accessor :len
|
123
|
+
# address of the instruction responsible of the xref
|
124
|
+
attr_accessor :origin
|
125
|
+
# XXX list of instructions intervening in the backtrace ?
|
126
|
+
|
127
|
+
def initialize(type, origin, len=nil)
|
128
|
+
@origin, @type = origin, type
|
129
|
+
@len = len if len
|
130
|
+
end
|
131
|
+
|
132
|
+
def hash ; @origin.hash ; end
|
133
|
+
def eql?(o) o.class == self.class and [type, len, origin] == [o.type, o.len, o.origin] end
|
134
|
+
alias == eql?
|
135
|
+
end
|
136
|
+
|
137
|
+
# holds a list of contiguous decoded instructions, forming an uninterrupted block (except for eg CPU exceptions)
|
138
|
+
# most attributes are either a value or an array of values, use the associated iterator.
|
139
|
+
class InstructionBlock
|
140
|
+
# address of the first instruction
|
141
|
+
attr_accessor :address
|
142
|
+
# pointer to raw data
|
143
|
+
attr_accessor :edata, :edata_ptr
|
144
|
+
# list of DecodedInstructions
|
145
|
+
attr_accessor :list
|
146
|
+
# address of instructions giving control directly to us
|
147
|
+
# includes addr of normal instruction when call flow continues to us past the end of the preceding block
|
148
|
+
# does not include addresses of subfunction return instructions
|
149
|
+
# may be nil or an array
|
150
|
+
attr_accessor :from_normal
|
151
|
+
# address of instructions called/jumped to
|
152
|
+
attr_accessor :to_normal
|
153
|
+
# address of an instruction that calls a subfunction which returns to us
|
154
|
+
attr_accessor :from_subfuncret
|
155
|
+
# address of instruction executed after a called subfunction returns
|
156
|
+
attr_accessor :to_subfuncret
|
157
|
+
# address of instructions executed indirectly through us (callback in a subfunction, SEH...)
|
158
|
+
# XXX from_indirect is not populated for now
|
159
|
+
attr_accessor :from_indirect, :to_indirect
|
160
|
+
# array of BacktraceTrace
|
161
|
+
# when a new code path comes to us, it should be backtracked for the values of :r/:w/:x using btt with no address
|
162
|
+
# for internal use only (block splitting): btt with an address
|
163
|
+
attr_accessor :backtracked_for
|
164
|
+
|
165
|
+
# create a new InstructionBlock based at address
|
166
|
+
# also accepts a DecodedInstruction or an Array of them to initialize from
|
167
|
+
def initialize(arg0, edata=nil, edata_ptr=nil)
|
168
|
+
@list = []
|
169
|
+
case arg0
|
170
|
+
when DecodedInstruction
|
171
|
+
@address = arg0.address
|
172
|
+
add_di(arg0)
|
173
|
+
when Array
|
174
|
+
@address = arg0.first.address if not arg0.empty?
|
175
|
+
arg0.each { |di| add_di(di) }
|
176
|
+
else
|
177
|
+
@address = arg0
|
178
|
+
end
|
179
|
+
edata_ptr ||= edata ? edata.ptr : 0
|
180
|
+
@edata, @edata_ptr = edata, edata_ptr
|
181
|
+
@backtracked_for = []
|
182
|
+
end
|
183
|
+
|
184
|
+
def bin_length
|
185
|
+
(di = @list.last) ? di.block_offset + di.bin_length : 0
|
186
|
+
end
|
187
|
+
|
188
|
+
# splits the current block into a new one with all di from address addr to end
|
189
|
+
# caller is responsible for rebacktracing new.bt_for to regenerate correct old.btt/new.btt
|
190
|
+
def split(addr)
|
191
|
+
raise "invalid split @#{Expression[addr]}" if not idx = @list.index(@list.find { |di| di.address == addr }) or idx == 0
|
192
|
+
off = @list[idx].block_offset
|
193
|
+
new_b = self.class.new(addr, @edata, @edata_ptr + off)
|
194
|
+
new_b.add_di @list.delete_at(idx) while @list[idx]
|
195
|
+
new_b.to_normal, @to_normal = to_normal, new_b.to_normal
|
196
|
+
new_b.to_subfuncret, @to_subfuncret = to_subfuncret, new_b.to_subfuncret
|
197
|
+
new_b.add_from @list.last.address
|
198
|
+
add_to new_b.address
|
199
|
+
@backtracked_for.delete_if { |btt|
|
200
|
+
if btt.address and new_b.list.find { |di| di.address == btt.address }
|
201
|
+
new_b.backtracked_for << btt
|
202
|
+
true
|
203
|
+
end
|
204
|
+
}
|
205
|
+
new_b
|
206
|
+
end
|
207
|
+
|
208
|
+
# adds a decodedinstruction to the block list, updates di.block and di.block_offset
|
209
|
+
def add_di(di)
|
210
|
+
di.block = self
|
211
|
+
di.block_offset = bin_length
|
212
|
+
di.address ||= @address + di.block_offset
|
213
|
+
@list << di
|
214
|
+
end
|
215
|
+
end
|
216
|
+
|
217
|
+
# a factorized subfunction as seen by the disassembler
|
218
|
+
class DecodedFunction
|
219
|
+
# when backtracking an instruction that calls us, use this binding and then the instruction's
|
220
|
+
# the binding is lazily filled up for non-external functions, register by register, when
|
221
|
+
# a backtraced expression depends on it
|
222
|
+
attr_accessor :backtrace_binding
|
223
|
+
# same as InstructionBlock#backtracked_for
|
224
|
+
# includes the expression responsible of the function return (eg [esp] on ia32)
|
225
|
+
attr_accessor :backtracked_for
|
226
|
+
# addresses of instruction causing the function to return
|
227
|
+
attr_accessor :return_address
|
228
|
+
# a lambda called for dynamic backtrace_binding generation
|
229
|
+
attr_accessor :btbind_callback
|
230
|
+
# a lambda called for dynamic backtracked_for
|
231
|
+
attr_accessor :btfor_callback
|
232
|
+
# bool, if false the function is actually being disassembled
|
233
|
+
attr_accessor :finalized
|
234
|
+
# bool, if true the function does not return (eg exit() or ExitProcess())
|
235
|
+
attr_accessor :noreturn
|
236
|
+
|
237
|
+
# if btbind_callback is defined, calls it with args [dasm, binding, funcaddr, calladdr, expr, origin, maxdepth]
|
238
|
+
# else update lazily the binding from expr.externals, and return backtrace_binding
|
239
|
+
def get_backtrace_binding(dasm, funcaddr, calladdr, expr, origin, maxdepth)
|
240
|
+
if btbind_callback
|
241
|
+
@btbind_callback[dasm, @backtrace_binding, funcaddr, calladdr, expr, origin, maxdepth]
|
242
|
+
elsif backtrace_binding and dest = @backtrace_binding[:thunk] and target = dasm.function[dest]
|
243
|
+
target.get_backtrace_binding(dasm, funcaddr, calladdr, expr, origin, maxdepth)
|
244
|
+
else
|
245
|
+
unk_regs = expr.externals.grep(Symbol).uniq - @backtrace_binding.keys - [:unknown]
|
246
|
+
dasm.cpu.backtrace_update_function_binding(dasm, funcaddr, self, return_address, *unk_regs) if not unk_regs.empty?
|
247
|
+
@backtrace_binding
|
248
|
+
end
|
249
|
+
end
|
250
|
+
|
251
|
+
# if btfor_callback is defined, calls it with args [dasm, bt_for, funcaddr, calladdr]
|
252
|
+
# else return backtracked_for
|
253
|
+
def get_backtracked_for(dasm, funcaddr, calladdr)
|
254
|
+
if btfor_callback
|
255
|
+
@btfor_callback[dasm, @backtracked_for, funcaddr, calladdr]
|
256
|
+
elsif backtrace_binding and dest = @backtrace_binding[:thunk] and target = dasm.function[dest]
|
257
|
+
target.get_backtracked_for(dasm, funcaddr, calladdr)
|
258
|
+
else
|
259
|
+
@backtracked_for
|
260
|
+
end
|
261
|
+
end
|
262
|
+
|
263
|
+
def initialize
|
264
|
+
@backtracked_for = []
|
265
|
+
@backtrace_binding = {}
|
266
|
+
end
|
267
|
+
end
|
268
|
+
|
269
|
+
class CPU
|
270
|
+
# return the thing to backtrace to find +value+ before the execution of this instruction
|
271
|
+
# eg backtrace_emu('inc eax', Expression[:eax]) => Expression[:eax + 1]
|
272
|
+
# (the value of :eax after 'inc eax' is the value of :eax before plus 1)
|
273
|
+
# may return Expression::Unknown
|
274
|
+
def backtrace_emu(di, value)
|
275
|
+
Expression[Expression[value].bind(di.backtrace_binding ||= get_backtrace_binding(di)).reduce]
|
276
|
+
end
|
277
|
+
|
278
|
+
# returns a list of Expressions/Integer to backtrace to find an execution target
|
279
|
+
def get_xrefs_x(dasm, di)
|
280
|
+
end
|
281
|
+
|
282
|
+
# returns a list of [type, address, len]
|
283
|
+
def get_xrefs_rw(dasm, di)
|
284
|
+
get_xrefs_r(dasm, di).map { |addr, len| [:r, addr, len] } + get_xrefs_w(dasm, di).map { |addr, len| [:w, addr, len] }
|
285
|
+
end
|
286
|
+
|
287
|
+
# returns a list [addr, len]
|
288
|
+
def get_xrefs_r(dasm, di)
|
289
|
+
b = di.backtrace_binding ||= get_backtrace_binding(di)
|
290
|
+
r = b.values
|
291
|
+
x = get_xrefs_x(dasm, di)
|
292
|
+
r |= x if x
|
293
|
+
(r.grep(Indirection) + r.grep(Expression).map { |e| e.expr_indirections }.flatten).map { |e| [e.target, e.len] }
|
294
|
+
end
|
295
|
+
|
296
|
+
# returns a list [addr, len]
|
297
|
+
def get_xrefs_w(dasm, di)
|
298
|
+
b = di.backtrace_binding ||= get_backtrace_binding(di)
|
299
|
+
w = b.keys
|
300
|
+
(w.grep(Indirection) + w.grep(Expression).map { |e| e.expr_indirections }.flatten).map { |e| [e.target, e.len] }
|
301
|
+
end
|
302
|
+
|
303
|
+
# checks if the expression corresponds to a function return value with the instruction
|
304
|
+
# (eg di == 'call something' and expr == [esp])
|
305
|
+
def backtrace_is_function_return(expr, di=nil)
|
306
|
+
end
|
307
|
+
|
308
|
+
# updates f.backtrace_binding when a new return address has been found
|
309
|
+
# TODO update also when anything changes inside the function (new loop found etc) - use backtracked_for ?
|
310
|
+
def backtrace_update_function_binding(dasm, faddr, f, retaddrlist, *wantregs)
|
311
|
+
end
|
312
|
+
|
313
|
+
# returns if the expression is an address on the stack
|
314
|
+
# (to avoid trying to backtrace its absolute address until we found function boundaries)
|
315
|
+
def backtrace_is_stack_address(expr)
|
316
|
+
end
|
317
|
+
|
318
|
+
# updates the instruction arguments: replace an expression with another (eg when a label is renamed)
|
319
|
+
def replace_instr_arg_immediate(i, old, new)
|
320
|
+
i.args.map! { |a|
|
321
|
+
case a
|
322
|
+
when Expression; Expression[a.bind(old => new).reduce]
|
323
|
+
else a
|
324
|
+
end
|
325
|
+
}
|
326
|
+
end
|
327
|
+
|
328
|
+
# a callback called whenever a backtrace is successful
|
329
|
+
# di is the decodedinstruction at the backtrace's origin
|
330
|
+
def backtrace_found_result(dasm, di, expr, type, len)
|
331
|
+
end
|
332
|
+
end
|
333
|
+
|
334
|
+
class ExeFormat
|
335
|
+
# returns a string containing asm-style section declaration
|
336
|
+
def dump_section_header(addr, edata)
|
337
|
+
"\n// section at #{Expression[addr]}"
|
338
|
+
end
|
339
|
+
|
340
|
+
# returns an array of expressions that may be executed by this instruction
|
341
|
+
def get_xrefs_x(dasm, di) @cpu.get_xrefs_x(dasm, di) end
|
342
|
+
|
343
|
+
# returns an array of [type, expression, length] that may be accessed by this instruction (type is :r/:w, len is in bytes)
|
344
|
+
def get_xrefs_rw(dasm, di) @cpu.get_xrefs_rw(dasm, di) end
|
345
|
+
end
|
346
|
+
|
347
|
+
# a disassembler class
|
348
|
+
# holds a copy of a program sections, a list of decoded instructions, xrefs
|
349
|
+
# is able to backtrace an expression from an address following the call flow (backwards)
|
350
|
+
class Disassembler
|
351
|
+
attr_accessor :program, :cpu
|
352
|
+
# binding (jointure of @sections.values.exports)
|
353
|
+
attr_accessor :prog_binding
|
354
|
+
# hash addr => edata
|
355
|
+
attr_accessor :sections
|
356
|
+
# hash addr => DecodedInstruction
|
357
|
+
attr_accessor :decoded
|
358
|
+
# hash addr => DecodedFunction (includes 'imported' functions)
|
359
|
+
attr_accessor :function
|
360
|
+
# hash addr => (array of) xrefs - access with +add_xref+/+each_xref+
|
361
|
+
attr_accessor :xrefs
|
362
|
+
# bool, true to check write xrefs on each instr disasm (default true)
|
363
|
+
attr_accessor :check_smc
|
364
|
+
# list of [addr to disassemble, (optional)who jumped to it, (optional)got there by a subfunction return]
|
365
|
+
attr_accessor :addrs_todo
|
366
|
+
# hash address => binding
|
367
|
+
attr_accessor :address_binding
|
368
|
+
# number of blocks to backtrace before aborting if no result is found (defaults to class.backtrace_maxblocks, 50 by default)
|
369
|
+
attr_accessor :backtrace_maxblocks
|
370
|
+
# maximum backtrace length for :r/:w, defaults to backtrace_maxblocks
|
371
|
+
attr_accessor :backtrace_maxblocks_data
|
372
|
+
# max bt length for backtrace_fast blocks, default=0
|
373
|
+
attr_accessor :backtrace_maxblocks_fast
|
374
|
+
# max complexity for an Expr during backtrace before abort
|
375
|
+
attr_accessor :backtrace_maxcomplexity, :backtrace_maxcomplexity_data
|
376
|
+
# maximum number of instructions inside a basic block, split past this limit
|
377
|
+
attr_accessor :disassemble_maxblocklength
|
378
|
+
# a cparser that parsed some C header files, prototypes are converted to DecodedFunction when jumped to
|
379
|
+
attr_accessor :c_parser
|
380
|
+
# hash address => array of strings
|
381
|
+
# default dasm dump will only show comments at beginning of code blocks
|
382
|
+
attr_accessor :comment
|
383
|
+
# bool, set to true (default) if functions with undetermined binding should be assumed to return with ABI-conforming binding (conserve frame ptr)
|
384
|
+
attr_accessor :funcs_stdabi
|
385
|
+
# callback called whenever an instruction will backtrace :x (before the backtrace is started)
|
386
|
+
# arguments: |addr of origin, array of exprs to backtrace|
|
387
|
+
# must return the replacement array, nil == []
|
388
|
+
attr_accessor :callback_newaddr
|
389
|
+
# called whenever an instruction is decoded and added to an instruction block. arg: the new decoded instruction
|
390
|
+
# returns the new di to consider (nil to end block)
|
391
|
+
attr_accessor :callback_newinstr
|
392
|
+
# called whenever the disassembler tries to disassemble an addresse that has been written to. arg: the address
|
393
|
+
attr_accessor :callback_selfmodifying
|
394
|
+
# called when the disassembler stops (stopexec/undecodable instruction)
|
395
|
+
attr_accessor :callback_stopaddr
|
396
|
+
# callback called before each backtrace that may take some time
|
397
|
+
attr_accessor :callback_prebacktrace
|
398
|
+
# callback called once all addresses have been disassembled
|
399
|
+
attr_accessor :callback_finished
|
400
|
+
# pointer to the gui widget we're displayed in
|
401
|
+
attr_accessor :gui
|
402
|
+
|
403
|
+
@@backtrace_maxblocks = 50
|
404
|
+
|
405
|
+
# creates a new disassembler
|
406
|
+
def initialize(program, cpu=program.cpu)
|
407
|
+
reinitialize(program, cpu)
|
408
|
+
end
|
409
|
+
|
410
|
+
# resets the program
|
411
|
+
def reinitialize(program, cpu=program.cpu)
|
412
|
+
@program = program
|
413
|
+
@cpu = cpu
|
414
|
+
@sections = {}
|
415
|
+
@decoded = {}
|
416
|
+
@xrefs = {}
|
417
|
+
@function = {}
|
418
|
+
@check_smc = true
|
419
|
+
@prog_binding = {}
|
420
|
+
@old_prog_binding = {} # same as prog_binding, but keep old var names
|
421
|
+
@addrs_todo = []
|
422
|
+
@addrs_done = []
|
423
|
+
@address_binding = {}
|
424
|
+
@backtrace_maxblocks = @@backtrace_maxblocks
|
425
|
+
@backtrace_maxblocks_fast = 0
|
426
|
+
@backtrace_maxcomplexity = 40
|
427
|
+
@backtrace_maxcomplexity_data = 5
|
428
|
+
@disassemble_maxblocklength = 100
|
429
|
+
@comment = {}
|
430
|
+
@funcs_stdabi = true
|
431
|
+
end
|
432
|
+
|
433
|
+
# adds a section, updates prog_binding
|
434
|
+
# base addr is an Integer or a String (label name for offset 0)
|
435
|
+
def add_section(encoded, base)
|
436
|
+
encoded, base = base, encoded if base.kind_of? EncodedData
|
437
|
+
case base
|
438
|
+
when ::Integer
|
439
|
+
when ::String
|
440
|
+
raise "invalid section base #{base.inspect} - not at section start" if encoded.export[base] and encoded.export[base] != 0
|
441
|
+
raise "invalid section base #{base.inspect} - already seen at #{@prog_binding[base]}" if @prog_binding[base] and @prog_binding[base] != Expression[base]
|
442
|
+
encoded.add_export base, 0
|
443
|
+
else raise "invalid section base #{base.inspect} - expected string or integer"
|
444
|
+
end
|
445
|
+
|
446
|
+
@sections[base] = encoded
|
447
|
+
@label_alias_cache = nil
|
448
|
+
encoded.binding(base).each { |k, v|
|
449
|
+
@old_prog_binding[k] = @prog_binding[k] = v.reduce
|
450
|
+
}
|
451
|
+
|
452
|
+
# update section_edata.reloc
|
453
|
+
# label -> list of relocs that refers to it
|
454
|
+
@inv_section_reloc = {}
|
455
|
+
@sections.each { |b, e|
|
456
|
+
e.reloc.each { |o, r|
|
457
|
+
r.target.externals.grep(::String).each { |ext| (@inv_section_reloc[ext] ||= []) << [b, e, o, r] }
|
458
|
+
}
|
459
|
+
}
|
460
|
+
|
461
|
+
self
|
462
|
+
end
|
463
|
+
|
464
|
+
def add_xref(addr, x)
|
465
|
+
case @xrefs[addr]
|
466
|
+
when nil; @xrefs[addr] = x
|
467
|
+
when x
|
468
|
+
when ::Array; @xrefs[addr] |= [x]
|
469
|
+
else @xrefs[addr] = [@xrefs[addr], x]
|
470
|
+
end
|
471
|
+
end
|
472
|
+
|
473
|
+
# yields each xref to a given address, optionnaly restricted to a type
|
474
|
+
def each_xref(addr, type=nil)
|
475
|
+
addr = normalize addr
|
476
|
+
|
477
|
+
x = @xrefs[addr]
|
478
|
+
x = case x
|
479
|
+
when nil; []
|
480
|
+
when ::Array; x.dup
|
481
|
+
else [x]
|
482
|
+
end
|
483
|
+
|
484
|
+
x.delete_if { |x_| x_.type != type } if type
|
485
|
+
|
486
|
+
# add pseudo-xrefs for exe relocs
|
487
|
+
if (not type or type == :reloc) and l = get_label_at(addr) and a = @inv_section_reloc[l]
|
488
|
+
a.each { |b, e, o, r|
|
489
|
+
addr = Expression[b]+o
|
490
|
+
# ignore relocs embedded in an already-listed instr
|
491
|
+
x << Xref.new(:reloc, addr) if not x.find { |x_|
|
492
|
+
next if not x_.origin or not di_at(x_.origin)
|
493
|
+
(addr - x_.origin rescue 50) < @decoded[x_.origin].bin_length
|
494
|
+
}
|
495
|
+
}
|
496
|
+
end
|
497
|
+
|
498
|
+
x.each { |x_| yield x_ }
|
499
|
+
end
|
500
|
+
|
501
|
+
# parses a C header file, from which function prototypes will be converted to DecodedFunction when found in the code flow
|
502
|
+
def parse_c_file(file)
|
503
|
+
parse_c File.read(file), file
|
504
|
+
end
|
505
|
+
|
506
|
+
# parses a C string for function prototypes
|
507
|
+
def parse_c(str, filename=nil, lineno=1)
|
508
|
+
@c_parser ||= @cpu.new_cparser
|
509
|
+
@c_parser.lexer.define_weak('__METASM__DECODE__')
|
510
|
+
@c_parser.parse(str, filename, lineno)
|
511
|
+
end
|
512
|
+
|
513
|
+
# returns the canonical form of addr (absolute address integer or label of start of section + section offset)
|
514
|
+
def normalize(addr)
|
515
|
+
return addr if not addr or addr == :default
|
516
|
+
addr = Expression[addr].bind(@old_prog_binding).reduce if not addr.kind_of? Integer
|
517
|
+
addr %= 1 << [@cpu.size, 32].max if @cpu and addr.kind_of? Integer
|
518
|
+
addr
|
519
|
+
end
|
520
|
+
|
521
|
+
# returns [edata, edata_base] or nil
|
522
|
+
# edata.ptr points to addr
|
523
|
+
def get_section_at(addr, memcheck=true)
|
524
|
+
case addr = normalize(addr)
|
525
|
+
when ::Integer
|
526
|
+
if s = @sections.find { |b, e| b.kind_of? ::Integer and addr >= b and addr < b + e.length } ||
|
527
|
+
@sections.find { |b, e| b.kind_of? ::Integer and addr == b + e.length } # end label
|
528
|
+
s[1].ptr = addr - s[0]
|
529
|
+
return if memcheck and s[1].data.respond_to?(:page_invalid?) and s[1].data.page_invalid?(s[1].ptr)
|
530
|
+
[s[1], s[0]]
|
531
|
+
end
|
532
|
+
when Expression
|
533
|
+
if addr.op == :+ and addr.rexpr.kind_of? ::Integer and addr.rexpr >= 0 and addr.lexpr.kind_of? ::String and e = @sections[addr.lexpr]
|
534
|
+
e.ptr = addr.rexpr
|
535
|
+
return if memcheck and e.data.respond_to?(:page_invalid?) and e.data.page_invalid?(e.ptr)
|
536
|
+
[e, Expression[addr.lexpr]]
|
537
|
+
elsif addr.op == :+ and addr.rexpr.kind_of? ::String and not addr.lexpr and e = @sections[addr.rexpr]
|
538
|
+
e.ptr = 0
|
539
|
+
return if memcheck and e.data.respond_to?(:page_invalid?) and e.data.page_invalid?(e.ptr)
|
540
|
+
[e, addr.rexpr]
|
541
|
+
end
|
542
|
+
end
|
543
|
+
end
|
544
|
+
|
545
|
+
# returns the label at the specified address, creates it if needed using "prefix_addr"
|
546
|
+
# renames the existing label if it is in the form rewritepfx_addr
|
547
|
+
# returns nil if the address is not known and is not a string
|
548
|
+
def auto_label_at(addr, base='xref', *rewritepfx)
|
549
|
+
addr = Expression[addr].reduce
|
550
|
+
addrstr = "#{base}_#{Expression[addr]}"
|
551
|
+
return if addrstr !~ /^\w+$/
|
552
|
+
e, b = get_section_at(addr)
|
553
|
+
if not e
|
554
|
+
l = Expression[addr].reduce_rec if Expression[addr].reduce_rec.kind_of? ::String
|
555
|
+
l ||= addrstr if addr.kind_of? Expression and addr.externals.grep(::Symbol).empty?
|
556
|
+
elsif not l = e.inv_export[e.ptr]
|
557
|
+
l = @program.new_label(addrstr)
|
558
|
+
e.add_export l, e.ptr
|
559
|
+
@label_alias_cache = nil
|
560
|
+
@old_prog_binding[l] = @prog_binding[l] = b + e.ptr
|
561
|
+
elsif rewritepfx.find { |p| base != p and addrstr.sub(base, p) == l }
|
562
|
+
newl = addrstr
|
563
|
+
newl = @program.new_label(newl) unless @old_prog_binding[newl] and @old_prog_binding[newl] == @prog_binding[l] # avoid _uuid when a -> b -> a
|
564
|
+
rename_label l, newl
|
565
|
+
l = newl
|
566
|
+
end
|
567
|
+
l
|
568
|
+
end
|
569
|
+
|
570
|
+
# returns a hash associating addr => list of labels at this addr
|
571
|
+
def label_alias
|
572
|
+
if not @label_alias_cache
|
573
|
+
@label_alias_cache = {}
|
574
|
+
@prog_binding.each { |k, v|
|
575
|
+
(@label_alias_cache[v] ||= []) << k
|
576
|
+
}
|
577
|
+
end
|
578
|
+
@label_alias_cache
|
579
|
+
end
|
580
|
+
|
581
|
+
# decodes instructions from an entrypoint, (tries to) follows code flow
|
582
|
+
def disassemble(*entrypoints)
|
583
|
+
nil while disassemble_mainiter(entrypoints)
|
584
|
+
self
|
585
|
+
end
|
586
|
+
|
587
|
+
attr_accessor :entrypoints
|
588
|
+
|
589
|
+
# do one operation relevant to disassembling
|
590
|
+
# returns nil once done
|
591
|
+
def disassemble_mainiter(entrypoints=[])
|
592
|
+
@entrypoints ||= []
|
593
|
+
if @addrs_todo.empty? and entrypoints.empty?
|
594
|
+
post_disassemble
|
595
|
+
puts 'disassembly finished' if $VERBOSE
|
596
|
+
@callback_finished[] if callback_finished
|
597
|
+
return false
|
598
|
+
elsif @addrs_todo.empty?
|
599
|
+
ep = entrypoints.shift
|
600
|
+
l = auto_label_at(normalize(ep), 'entrypoint')
|
601
|
+
puts "start disassemble from #{l} (#{entrypoints.length})" if $VERBOSE and not entrypoints.empty?
|
602
|
+
@entrypoints << l
|
603
|
+
@addrs_todo << [ep]
|
604
|
+
else
|
605
|
+
disassemble_step
|
606
|
+
end
|
607
|
+
true
|
608
|
+
end
|
609
|
+
|
610
|
+
def post_disassemble
|
611
|
+
@decoded.each_value { |di|
|
612
|
+
next if not di.kind_of? DecodedInstruction
|
613
|
+
next if not di.opcode or not di.opcode.props[:saveip]
|
614
|
+
if not di.block.to_subfuncret
|
615
|
+
di.add_comment 'noreturn'
|
616
|
+
# there is no need to re-loop on all :saveip as check_noret is transitive
|
617
|
+
di.block.each_to_normal { |fa| check_noreturn_function(fa) }
|
618
|
+
end
|
619
|
+
}
|
620
|
+
@function.each { |addr, f|
|
621
|
+
next if not @decoded[addr]
|
622
|
+
if not f.finalized
|
623
|
+
f.finalized = true
|
624
|
+
puts " finalize subfunc #{Expression[addr]}" if debug_backtrace
|
625
|
+
@cpu.backtrace_update_function_binding(self, addr, f, f.return_address)
|
626
|
+
if not f.return_address
|
627
|
+
detect_function_thunk(addr)
|
628
|
+
end
|
629
|
+
end
|
630
|
+
@comment[addr] ||= []
|
631
|
+
bd = f.backtrace_binding.reject { |k, v| Expression[k] == Expression[v] or Expression[v] == Expression::Unknown }
|
632
|
+
unk = f.backtrace_binding.map { |k, v| k if v == Expression::Unknown }.compact
|
633
|
+
bd[unk.map { |u| Expression[u].to_s }.sort.join(',')] = Expression::Unknown if not unk.empty?
|
634
|
+
@comment[addr] |= ["function binding: " + bd.map { |k, v| "#{k} -> #{v}" }.sort.join(', ')]
|
635
|
+
@comment[addr] |= ["function ends at " + f.return_address.map { |ra| Expression[ra] }.join(', ')] if f.return_address
|
636
|
+
}
|
637
|
+
end
|
638
|
+
|
639
|
+
# disassembles one block from addrs_todo
|
640
|
+
# adds next addresses to handle to addrs_todo
|
641
|
+
# if @function[:default] exists, jumps to unknows locations are interpreted as to @function[:default]
|
642
|
+
def disassemble_step
|
643
|
+
return if not todo = @addrs_todo.pop or @addrs_done.include? todo
|
644
|
+
@addrs_done << todo if todo[1]
|
645
|
+
|
646
|
+
# from_sfret is true if from is the address of a function call that returns to addr
|
647
|
+
addr, from, from_subfuncret = todo
|
648
|
+
|
649
|
+
return if from == Expression::Unknown
|
650
|
+
|
651
|
+
puts "disassemble_step #{Expression[addr]} #{Expression[from] if from} #{from_subfuncret} (/#{@addrs_todo.length})" if $DEBUG
|
652
|
+
|
653
|
+
addr = normalize(addr)
|
654
|
+
|
655
|
+
if from and from_subfuncret and di_at(from)
|
656
|
+
@decoded[from].block.each_to_normal { |subfunc|
|
657
|
+
subfunc = normalize(subfunc)
|
658
|
+
next if not f = @function[subfunc] or f.finalized
|
659
|
+
f.finalized = true
|
660
|
+
puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
|
661
|
+
@cpu.backtrace_update_function_binding(self, subfunc, f, f.return_address)
|
662
|
+
if not f.return_address
|
663
|
+
detect_function_thunk(subfunc)
|
664
|
+
end
|
665
|
+
}
|
666
|
+
end
|
667
|
+
|
668
|
+
if di = @decoded[addr]
|
669
|
+
if di.kind_of? DecodedInstruction
|
670
|
+
split_block(di.block, di.address) if not di.block_head? # this updates di.block
|
671
|
+
di.block.add_from(from, from_subfuncret ? :subfuncret : :normal) if from and from != :default
|
672
|
+
bf = di.block
|
673
|
+
elsif di == true
|
674
|
+
bf = @function[addr]
|
675
|
+
end
|
676
|
+
elsif bf = @function[addr]
|
677
|
+
detect_function_thunk_noreturn(from) if bf.noreturn
|
678
|
+
elsif s = get_section_at(addr)
|
679
|
+
block = InstructionBlock.new(normalize(addr), s[0])
|
680
|
+
block.add_from(from, from_subfuncret ? :subfuncret : :normal) if from and from != :default
|
681
|
+
disassemble_block(block)
|
682
|
+
elsif from and c_parser and name = Expression[addr].reduce_rec and name.kind_of? ::String and
|
683
|
+
s = c_parser.toplevel.symbol[name] and s.type.untypedef.kind_of? C::Function
|
684
|
+
bf = @function[addr] = @cpu.decode_c_function_prototype(@c_parser, s)
|
685
|
+
detect_function_thunk_noreturn(from) if bf.noreturn
|
686
|
+
elsif from
|
687
|
+
if bf = @function[:default]
|
688
|
+
puts "using default function for #{Expression[addr]} from #{Expression[from]}" if $DEBUG
|
689
|
+
if name = Expression[addr].reduce_rec and name.kind_of? ::String
|
690
|
+
@function[addr] = @function[:default].dup
|
691
|
+
else
|
692
|
+
addr = :default
|
693
|
+
end
|
694
|
+
if @decoded[from]
|
695
|
+
@decoded[from].block.add_to addr
|
696
|
+
end
|
697
|
+
else
|
698
|
+
puts "not disassembling unknown address #{Expression[addr]} from #{Expression[from]}" if $DEBUG
|
699
|
+
end
|
700
|
+
if from != :default
|
701
|
+
add_xref(addr, Xref.new(:x, from))
|
702
|
+
add_xref(Expression::Unknown, Xref.new(:x, from))
|
703
|
+
end
|
704
|
+
else
|
705
|
+
puts "not disassembling unknown address #{Expression[addr]}" if $VERBOSE
|
706
|
+
end
|
707
|
+
|
708
|
+
if bf and from and from != :default
|
709
|
+
if bf.kind_of? DecodedFunction
|
710
|
+
bff = bf.get_backtracked_for(self, addr, from)
|
711
|
+
else
|
712
|
+
bff = bf.backtracked_for
|
713
|
+
end
|
714
|
+
end
|
715
|
+
bff.each { |btt|
|
716
|
+
next if btt.address
|
717
|
+
if @decoded[from].kind_of? DecodedInstruction and @decoded[from].opcode.props[:saveip] and not from_subfuncret and not @function[addr]
|
718
|
+
backtrace_check_found(btt.expr, @decoded[addr], btt.origin, btt.type, btt.len, btt.maxdepth, btt.detached)
|
719
|
+
end
|
720
|
+
next if backtrace_check_funcret(btt, addr, from)
|
721
|
+
backtrace(btt.expr, from,
|
722
|
+
:include_start => true, :from_subfuncret => from_subfuncret,
|
723
|
+
:origin => btt.origin, :orig_expr => btt.orig_expr, :type => btt.type,
|
724
|
+
:len => btt.len, :detached => btt.detached, :maxdepth => btt.maxdepth)
|
725
|
+
} if bff
|
726
|
+
end
|
727
|
+
|
728
|
+
# splits an InstructionBlock, updates the blocks backtracked_for
|
729
|
+
def split_block(block, address=nil)
|
730
|
+
if not address # invoked as split_block(0x401012)
|
731
|
+
return if not @decoded[block].kind_of? DecodedInstruction
|
732
|
+
block, address = @decoded[block].block, block
|
733
|
+
end
|
734
|
+
return block if address == block.address
|
735
|
+
new_b = block.split address
|
736
|
+
new_b.backtracked_for.dup.each { |btt|
|
737
|
+
backtrace(btt.expr, btt.address,
|
738
|
+
:only_upto => block.list.last.address,
|
739
|
+
:include_start => !btt.exclude_instr, :from_subfuncret => btt.from_subfuncret,
|
740
|
+
:origin => btt.origin, :orig_expr => btt.orig_expr, :type => btt.type, :len => btt.len,
|
741
|
+
:detached => btt.detached, :maxdepth => btt.maxdepth)
|
742
|
+
}
|
743
|
+
new_b
|
744
|
+
end
|
745
|
+
|
746
|
+
# disassembles a new instruction block at block.address (must be normalized)
|
747
|
+
def disassemble_block(block)
|
748
|
+
raise if not block.list.empty?
|
749
|
+
di_addr = block.address
|
750
|
+
delay_slot = nil
|
751
|
+
di = nil
|
752
|
+
|
753
|
+
# try not to run for too long
|
754
|
+
# loop usage: break if the block continues to the following instruction, else return
|
755
|
+
@disassemble_maxblocklength.times {
|
756
|
+
# check collision into a known block
|
757
|
+
break if @decoded[di_addr]
|
758
|
+
|
759
|
+
# check self-modifying code
|
760
|
+
if @check_smc
|
761
|
+
#(-7...di.bin_length).each { |off| # uncomment to check for unaligned rewrites
|
762
|
+
waddr = di_addr #di_addr + off
|
763
|
+
each_xref(waddr, :w) { |x|
|
764
|
+
#next if off + x.len < 0
|
765
|
+
puts "W: disasm: self-modifying code at #{Expression[waddr]}" if $VERBOSE
|
766
|
+
@comment[di_addr] ||= []
|
767
|
+
@comment[di_addr] |= ["overwritten by #{@decoded[x.origin]}"]
|
768
|
+
@callback_selfmodifying[di_addr] if callback_selfmodifying
|
769
|
+
return
|
770
|
+
}
|
771
|
+
#}
|
772
|
+
end
|
773
|
+
|
774
|
+
# decode instruction
|
775
|
+
block.edata.ptr = di_addr - block.address + block.edata_ptr
|
776
|
+
if not di = @cpu.decode_instruction(block.edata, di_addr)
|
777
|
+
ed = block.edata
|
778
|
+
puts "#{ed.ptr >= ed.length ? "end of section reached" : "unknown instruction #{ed.data[di_addr-block.address+block.edata_ptr, 4].to_s.unpack('H*')}"} at #{Expression[di_addr]}" if $VERBOSE
|
779
|
+
return
|
780
|
+
end
|
781
|
+
|
782
|
+
@decoded[di_addr] = di
|
783
|
+
block.add_di di
|
784
|
+
puts di if $DEBUG
|
785
|
+
|
786
|
+
di = @callback_newinstr[di] if callback_newinstr
|
787
|
+
return if not di
|
788
|
+
block = di.block
|
789
|
+
|
790
|
+
di_addr = di.next_addr
|
791
|
+
|
792
|
+
backtrace_xrefs_di_rw(di)
|
793
|
+
|
794
|
+
if not di_addr or di.opcode.props[:stopexec] or not @program.get_xrefs_x(self, di).empty?
|
795
|
+
# do not backtrace until delay slot is finished (eg MIPS: di is a
|
796
|
+
# ret and the delay slot holds stack fixup needed to calc func_binding)
|
797
|
+
# XXX if the delay slot is also xref_x or :stopexec it is ignored
|
798
|
+
delay_slot ||= [di, @cpu.delay_slot(di)]
|
799
|
+
end
|
800
|
+
|
801
|
+
if delay_slot
|
802
|
+
di, delay = delay_slot
|
803
|
+
if delay == 0 or not di_addr
|
804
|
+
backtrace_xrefs_di_x(di)
|
805
|
+
if di.opcode.props[:stopexec] or not di_addr; return
|
806
|
+
else break
|
807
|
+
end
|
808
|
+
end
|
809
|
+
delay_slot[1] = delay - 1
|
810
|
+
end
|
811
|
+
}
|
812
|
+
|
813
|
+
ar = [di_addr]
|
814
|
+
ar = @callback_newaddr[block.list.last.address, ar] || ar if callback_newaddr
|
815
|
+
ar.each { |di_addr_| backtrace(di_addr_, di.address, :origin => di.address, :type => :x) }
|
816
|
+
|
817
|
+
block
|
818
|
+
end
|
819
|
+
|
820
|
+
# retrieve the list of execution crossrefs due to the decodedinstruction
|
821
|
+
# returns a list of symbolic expressions
|
822
|
+
def get_xrefs_x(di)
|
823
|
+
@program.get_xrefs_x(self, di)
|
824
|
+
end
|
825
|
+
|
826
|
+
# retrieve the list of data r/w crossrefs due to the decodedinstruction
|
827
|
+
# returns a list of [type, symbolic expression, length]
|
828
|
+
def get_xrefs_rw(di)
|
829
|
+
@program.get_xrefs_rw(self, di)
|
830
|
+
end
|
831
|
+
|
832
|
+
# disassembles_fast from a list of entrypoints, also dasm subfunctions
|
833
|
+
def disassemble_fast_deep(*entrypoints)
|
834
|
+
@entrypoints ||= []
|
835
|
+
@entrypoints |= entrypoints
|
836
|
+
|
837
|
+
entrypoints.each { |ep| do_disassemble_fast_deep(normalize(ep)) }
|
838
|
+
end
|
839
|
+
|
840
|
+
def do_disassemble_fast_deep(ep)
|
841
|
+
disassemble_fast(ep) { |fa, di|
|
842
|
+
fa = normalize(fa)
|
843
|
+
do_disassemble_fast_deep(fa)
|
844
|
+
if di and ndi = di_at(fa)
|
845
|
+
ndi.block.add_from_normal(di.address)
|
846
|
+
end
|
847
|
+
}
|
848
|
+
end
|
849
|
+
|
850
|
+
# disassembles fast from a list of entrypoints
|
851
|
+
# see disassemble_fast_step
|
852
|
+
def disassemble_fast(entrypoint, maxdepth=-1, &b)
|
853
|
+
ep = [entrypoint]
|
854
|
+
until ep.empty?
|
855
|
+
disassemble_fast_step(ep, &b)
|
856
|
+
maxdepth -= 1
|
857
|
+
ep.delete_if { |a| not @decoded[normalize(a[0])] } if maxdepth == 0
|
858
|
+
end
|
859
|
+
check_noreturn_function(entrypoint)
|
860
|
+
end
|
861
|
+
|
862
|
+
# disassembles one block from the ary, see disassemble_fast_block
|
863
|
+
def disassemble_fast_step(todo, &b)
|
864
|
+
return if not x = todo.pop
|
865
|
+
addr, from, from_subfuncret = x
|
866
|
+
|
867
|
+
addr = normalize(addr)
|
868
|
+
|
869
|
+
if di = @decoded[addr]
|
870
|
+
if di.kind_of? DecodedInstruction
|
871
|
+
split_block(di.block, di.address) if not di.block_head?
|
872
|
+
di.block.add_from(from, from_subfuncret ? :subfuncret : :normal) if from and from != :default
|
873
|
+
end
|
874
|
+
elsif s = get_section_at(addr)
|
875
|
+
block = InstructionBlock.new(normalize(addr), s[0])
|
876
|
+
block.add_from(from, from_subfuncret ? :subfuncret : :normal) if from and from != :default
|
877
|
+
todo.concat disassemble_fast_block(block, &b)
|
878
|
+
elsif name = Expression[addr].reduce_rec and name.kind_of? ::String and not @function[addr]
|
879
|
+
if c_parser and s = c_parser.toplevel.symbol[name] and s.type.untypedef.kind_of? C::Function
|
880
|
+
@function[addr] = @cpu.decode_c_function_prototype(@c_parser, s)
|
881
|
+
detect_function_thunk_noreturn(from) if @function[addr].noreturn
|
882
|
+
elsif @function[:default]
|
883
|
+
@function[addr] = @function[:default].dup
|
884
|
+
end
|
885
|
+
end
|
886
|
+
|
887
|
+
disassemble_fast_checkfunc(addr)
|
888
|
+
end
|
889
|
+
|
890
|
+
# check if an addr has an xref :x from a :saveip, if so mark as Function
|
891
|
+
def disassemble_fast_checkfunc(addr)
|
892
|
+
if @decoded[addr].kind_of? DecodedInstruction and not @function[addr]
|
893
|
+
func = false
|
894
|
+
each_xref(addr, :x) { |x_|
|
895
|
+
func = true if odi = di_at(x_.origin) and odi.opcode.props[:saveip]
|
896
|
+
}
|
897
|
+
if func
|
898
|
+
auto_label_at(addr, 'sub', 'loc', 'xref')
|
899
|
+
# XXX use default_btbind_callback ?
|
900
|
+
@function[addr] = DecodedFunction.new
|
901
|
+
@function[addr].finalized = true
|
902
|
+
detect_function_thunk(addr)
|
903
|
+
puts "found new function #{get_label_at(addr)} at #{Expression[addr]}" if $VERBOSE
|
904
|
+
end
|
905
|
+
end
|
906
|
+
end
|
907
|
+
|
908
|
+
# disassembles fast a new instruction block at block.address (must be normalized)
|
909
|
+
# does not recurse into subfunctions
|
910
|
+
# assumes all :saveip returns, except those pointing to a subfunc with noreturn
|
911
|
+
# yields subfunction addresses (targets of :saveip)
|
912
|
+
# only backtrace for :x with maxdepth 1 (ie handles only basic push+ret)
|
913
|
+
# returns a todo-style ary
|
914
|
+
# assumes @addrs_todo is empty
|
915
|
+
def disassemble_fast_block(block, &b)
|
916
|
+
block = InstructionBlock.new(normalize(block), get_section_at(block)[0]) if not block.kind_of? InstructionBlock
|
917
|
+
di_addr = block.address
|
918
|
+
delay_slot = nil
|
919
|
+
di = nil
|
920
|
+
ret = []
|
921
|
+
|
922
|
+
return ret if @decoded[di_addr]
|
923
|
+
|
924
|
+
@disassemble_maxblocklength.times {
|
925
|
+
break if @decoded[di_addr]
|
926
|
+
|
927
|
+
# decode instruction
|
928
|
+
block.edata.ptr = di_addr - block.address + block.edata_ptr
|
929
|
+
if not di = @cpu.decode_instruction(block.edata, di_addr)
|
930
|
+
return ret
|
931
|
+
end
|
932
|
+
|
933
|
+
@decoded[di_addr] = di
|
934
|
+
block.add_di di
|
935
|
+
puts di if $DEBUG
|
936
|
+
|
937
|
+
di = @callback_newinstr[di] if callback_newinstr
|
938
|
+
return ret if not di
|
939
|
+
|
940
|
+
di_addr = di.next_addr
|
941
|
+
|
942
|
+
if di.opcode.props[:stopexec] or di.opcode.props[:setip]
|
943
|
+
if di.opcode.props[:setip]
|
944
|
+
@addrs_todo = []
|
945
|
+
@program.get_xrefs_x(self, di).each { |expr|
|
946
|
+
backtrace(expr, di.address, :origin => di.address, :type => :x, :maxdepth => @backtrace_maxblocks_fast)
|
947
|
+
}
|
948
|
+
end
|
949
|
+
if di.opcode.props[:saveip]
|
950
|
+
@addrs_todo = []
|
951
|
+
ret.concat disassemble_fast_block_subfunc(di, &b)
|
952
|
+
else
|
953
|
+
ret.concat @addrs_todo
|
954
|
+
@addrs_todo = []
|
955
|
+
end
|
956
|
+
delay_slot ||= [di, @cpu.delay_slot(di)]
|
957
|
+
end
|
958
|
+
|
959
|
+
if delay_slot
|
960
|
+
if delay_slot[1] <= 0
|
961
|
+
return ret if delay_slot[0].opcode.props[:stopexec]
|
962
|
+
break
|
963
|
+
end
|
964
|
+
delay_slot[1] -= 1
|
965
|
+
end
|
966
|
+
}
|
967
|
+
|
968
|
+
di.block.add_to_normal(di_addr)
|
969
|
+
ret << [di_addr, di.address]
|
970
|
+
end
|
971
|
+
|
972
|
+
# handles when disassemble_fast encounters a call to a subfunction
|
973
|
+
def disassemble_fast_block_subfunc(di)
|
974
|
+
funcs = di.block.to_normal.to_a
|
975
|
+
do_ret = funcs.empty?
|
976
|
+
ret = []
|
977
|
+
na = di.next_addr + di.bin_length * @cpu.delay_slot(di)
|
978
|
+
funcs.each { |fa|
|
979
|
+
fa = normalize(fa)
|
980
|
+
disassemble_fast_checkfunc(fa)
|
981
|
+
yield fa, di if block_given?
|
982
|
+
if f = @function[fa] and bf = f.get_backtracked_for(self, fa, di.address) and not bf.empty?
|
983
|
+
# this includes retaddr unless f is noreturn
|
984
|
+
bf.each { |btt|
|
985
|
+
next if btt.type != :x
|
986
|
+
bt = backtrace(btt.expr, di.address, :include_start => true, :origin => btt.origin, :maxdepth => [@backtrace_maxblocks_fast, 1].max)
|
987
|
+
if btt.detached
|
988
|
+
ret.concat bt # callback argument
|
989
|
+
elsif bt.find { |a| normalize(a) == na }
|
990
|
+
do_ret = true
|
991
|
+
end
|
992
|
+
}
|
993
|
+
elsif not f or not f.noreturn
|
994
|
+
do_ret = true
|
995
|
+
end
|
996
|
+
}
|
997
|
+
if do_ret
|
998
|
+
di.block.add_to_subfuncret(na)
|
999
|
+
ret << [na, di.address, true]
|
1000
|
+
di.block.add_to_normal :default if not di.block.to_normal and @function[:default]
|
1001
|
+
end
|
1002
|
+
ret
|
1003
|
+
end
|
1004
|
+
|
1005
|
+
# trace whose xrefs this di is responsible of
|
1006
|
+
def backtrace_xrefs_di_rw(di)
|
1007
|
+
get_xrefs_rw(di).each { |type, ptr, len|
|
1008
|
+
backtrace(ptr, di.address, :origin => di.address, :type => type, :len => len).each { |xaddr|
|
1009
|
+
next if xaddr == Expression::Unknown
|
1010
|
+
if @check_smc and type == :w
|
1011
|
+
#len.times { |off| # check unaligned ?
|
1012
|
+
waddr = xaddr #+ off
|
1013
|
+
if wdi = di_at(waddr)
|
1014
|
+
puts "W: disasm: #{di} overwrites #{wdi}" if $VERBOSE
|
1015
|
+
wdi.add_comment "overwritten by #{di}"
|
1016
|
+
end
|
1017
|
+
#}
|
1018
|
+
end
|
1019
|
+
}
|
1020
|
+
}
|
1021
|
+
end
|
1022
|
+
|
1023
|
+
# trace xrefs for execution
|
1024
|
+
def backtrace_xrefs_di_x(di)
|
1025
|
+
ar = @program.get_xrefs_x(self, di)
|
1026
|
+
ar = @callback_newaddr[di.address, ar] || ar if callback_newaddr
|
1027
|
+
ar.each { |expr| backtrace(expr, di.address, :origin => di.address, :type => :x) }
|
1028
|
+
end
|
1029
|
+
|
1030
|
+
# checks if the function starting at funcaddr is an external function thunk (eg jmp [SomeExtFunc])
|
1031
|
+
# the argument must be the address of a decodedinstruction that is the first of a function,
|
1032
|
+
# which must not have return_addresses
|
1033
|
+
# returns the new thunk name if it was changed
|
1034
|
+
def detect_function_thunk(funcaddr)
|
1035
|
+
# check thunk linearity (no conditionnal branch etc)
|
1036
|
+
addr = funcaddr
|
1037
|
+
count = 0
|
1038
|
+
while b = block_at(addr)
|
1039
|
+
count += 1
|
1040
|
+
return if count > 5 or b.list.length > 4
|
1041
|
+
if b.to_subfuncret and not b.to_subfuncret.empty?
|
1042
|
+
return if b.to_subfuncret.length != 1
|
1043
|
+
addr = normalize(b.to_subfuncret.first)
|
1044
|
+
return if not b.to_normal or b.to_normal.length != 1
|
1045
|
+
# check that the subfunction is simple (eg get_eip)
|
1046
|
+
return if not sf = @function[normalize(b.to_normal.first)]
|
1047
|
+
return if not btb = sf.backtrace_binding
|
1048
|
+
btb = btb.dup
|
1049
|
+
btb.delete_if { |k, v| Expression[k] == Expression[v] }
|
1050
|
+
return if btb.length > 2 or btb.values.include? Expression::Unknown
|
1051
|
+
else
|
1052
|
+
return if not bt = b.to_normal
|
1053
|
+
if bt.include? :default
|
1054
|
+
addr = :default
|
1055
|
+
break
|
1056
|
+
elsif bt.length != 1
|
1057
|
+
return
|
1058
|
+
end
|
1059
|
+
addr = normalize(bt.first)
|
1060
|
+
end
|
1061
|
+
end
|
1062
|
+
fname = Expression[addr].reduce_rec
|
1063
|
+
if funcaddr != addr and f = @function[funcaddr]
|
1064
|
+
# forward get_backtrace_binding to target
|
1065
|
+
f.backtrace_binding = { :thunk => addr }
|
1066
|
+
f.noreturn = true if @function[addr] and @function[addr].noreturn
|
1067
|
+
end
|
1068
|
+
return if not fname.kind_of? ::String
|
1069
|
+
l = auto_label_at(funcaddr, 'sub', 'loc')
|
1070
|
+
return if l[0, 4] != 'sub_'
|
1071
|
+
puts "found thunk for #{fname} at #{Expression[funcaddr]}" if $DEBUG
|
1072
|
+
rename_label(l, @program.new_label("thunk_#{fname}"))
|
1073
|
+
end
|
1074
|
+
|
1075
|
+
# this is called when reaching a noreturn function call, with the call address
|
1076
|
+
# it is responsible for detecting the actual 'call' instruction leading to this
|
1077
|
+
# noreturn function, and eventually mark the call target as a thunk
|
1078
|
+
def detect_function_thunk_noreturn(addr)
|
1079
|
+
5.times {
|
1080
|
+
return if not di = di_at(addr)
|
1081
|
+
if di.opcode.props[:saveip] and not di.block.to_subfuncret
|
1082
|
+
if di.block.to_normal.to_a.length == 1
|
1083
|
+
taddr = normalize(di.block.to_normal.first)
|
1084
|
+
if di_at(taddr)
|
1085
|
+
@function[taddr] ||= DecodedFunction.new
|
1086
|
+
return detect_function_thunk(taddr)
|
1087
|
+
end
|
1088
|
+
end
|
1089
|
+
break
|
1090
|
+
else
|
1091
|
+
from = di.block.from_normal.to_a + di.block.from_subfuncret.to_a
|
1092
|
+
if from.length == 1
|
1093
|
+
addr = from.first
|
1094
|
+
else break
|
1095
|
+
end
|
1096
|
+
end
|
1097
|
+
}
|
1098
|
+
end
|
1099
|
+
|
1100
|
+
# given an address, detect if it may be a noreturn fuction
|
1101
|
+
# it is if all its end blocks are calls to noreturn functions
|
1102
|
+
# if it is, create a @function[fa] with noreturn = true
|
1103
|
+
# should only be called with fa = target of a call
|
1104
|
+
def check_noreturn_function(fa)
|
1105
|
+
fb = function_blocks(fa, false, false)
|
1106
|
+
lasts = fb.keys.find_all { |k| fb[k] == [] }
|
1107
|
+
return if lasts.empty?
|
1108
|
+
if lasts.all? { |la|
|
1109
|
+
b = block_at(la)
|
1110
|
+
next if not di = b.list.last
|
1111
|
+
(di.opcode.props[:saveip] and b.to_normal.to_a.all? { |tfa|
|
1112
|
+
tf = function_at(tfa) and tf.noreturn
|
1113
|
+
}) or (di.opcode.props[:stopexec] and not di.opcode.props[:setip])
|
1114
|
+
}
|
1115
|
+
# yay
|
1116
|
+
@function[fa] ||= DecodedFunction.new
|
1117
|
+
@function[fa].noreturn = true
|
1118
|
+
end
|
1119
|
+
end
|
1120
|
+
|
1121
|
+
|
1122
|
+
# walks the backtrace tree from an address, passing along an object
|
1123
|
+
#
|
1124
|
+
# the steps are (1st = event, followed by hash keys)
|
1125
|
+
#
|
1126
|
+
# for each decoded instruction encountered:
|
1127
|
+
# :di :di
|
1128
|
+
#
|
1129
|
+
# when backtracking to a block through a decodedfunction:
|
1130
|
+
# (yield for each of the block's subfunctions)
|
1131
|
+
# (the decodedinstruction responsible for the call will be yield next)
|
1132
|
+
# :func :func, :funcaddr, :addr, :depth
|
1133
|
+
#
|
1134
|
+
# when jumping from one block to another (excluding :loop): # XXX include :loops ?
|
1135
|
+
# :up :from, :to, :sfret
|
1136
|
+
#
|
1137
|
+
# when the backtrack has nothing to backtrack to (eg program entrypoint):
|
1138
|
+
# :end :addr
|
1139
|
+
#
|
1140
|
+
# when the backtrack stops by taking too long to complete:
|
1141
|
+
# :maxdepth :addr
|
1142
|
+
#
|
1143
|
+
# when the backtrack stops for encountering the specified stop address:
|
1144
|
+
# :stopaddr :addr
|
1145
|
+
#
|
1146
|
+
# when rebacktracking a block already seen in the current branch:
|
1147
|
+
# (looptrace is an array of [obj, block end addr, from_subfuncret], from oldest to newest)
|
1148
|
+
# :loop :looptrace
|
1149
|
+
#
|
1150
|
+
# when the address does not match a known instruction/function:
|
1151
|
+
# :unknown_addr :addr
|
1152
|
+
#
|
1153
|
+
# the block return value is used as follow for :di, :func, :up and :loop:
|
1154
|
+
# false => the backtrace stops for the branch
|
1155
|
+
# nil => the backtrace continues with the current object
|
1156
|
+
# anything else => the backtrace continues with this object
|
1157
|
+
#
|
1158
|
+
# method arguments:
|
1159
|
+
# obj is the initial value of the object
|
1160
|
+
# addr is the address where the backtrace starts
|
1161
|
+
# include_start is a bool specifying if the backtrace should start at addr or just before
|
1162
|
+
# from_subfuncret is a bool specifying if addr points to a decodedinstruction that calls a subfunction
|
1163
|
+
# stopaddr is an [array of] address of instruction, the backtrace will stop just after executing it
|
1164
|
+
# maxdepth is the maximum depth (in blocks) for each backtrace branch.
|
1165
|
+
# (defaults to dasm.backtrace_maxblocks, which defaults do Dasm.backtrace_maxblocks)
|
1166
|
+
def backtrace_walk(obj, addr, include_start, from_subfuncret, stopaddr, maxdepth)
|
1167
|
+
start_addr = normalize(addr)
|
1168
|
+
stopaddr = [stopaddr] if stopaddr and not stopaddr.kind_of? ::Array
|
1169
|
+
|
1170
|
+
# array of [obj, addr, from_subfuncret, loopdetect]
|
1171
|
+
# loopdetect is an array of [obj, addr, from_type] of each end of block encountered
|
1172
|
+
todo = []
|
1173
|
+
|
1174
|
+
# array of [obj, blockaddr]
|
1175
|
+
# avoids rewalking the same value
|
1176
|
+
done = []
|
1177
|
+
|
1178
|
+
# updates todo with the addresses to backtrace next
|
1179
|
+
walk_up = lambda { |w_obj, w_addr, w_loopdetect|
|
1180
|
+
if w_loopdetect.length > maxdepth
|
1181
|
+
yield :maxdepth, w_obj, :addr => w_addr, :loopdetect => w_loopdetect
|
1182
|
+
elsif stopaddr and stopaddr.include?(w_addr)
|
1183
|
+
yield :stopaddr, w_obj, :addr => w_addr, :loopdetect => w_loopdetect
|
1184
|
+
elsif w_di = @decoded[w_addr] and w_di != w_di.block.list.first and w_di.address != w_di.block.address
|
1185
|
+
prevdi = w_di.block.list[w_di.block.list.index(w_di)-1]
|
1186
|
+
todo << [w_obj, prevdi.address, :normal, w_loopdetect]
|
1187
|
+
elsif w_di
|
1188
|
+
next if done.include? [w_obj, w_addr]
|
1189
|
+
done << [w_obj, w_addr]
|
1190
|
+
hadsomething = false
|
1191
|
+
w_di.block.each_from { |f_addr, f_type|
|
1192
|
+
next if f_type == :indirect
|
1193
|
+
hadsomething = true
|
1194
|
+
o_f_addr = f_addr
|
1195
|
+
f_addr = @decoded[f_addr].block.list.last.address if @decoded[f_addr].kind_of? DecodedInstruction # delay slot
|
1196
|
+
if l = w_loopdetect.find { |l_obj, l_addr, l_type| l_addr == f_addr and l_type == f_type }
|
1197
|
+
f_obj = yield(:loop, w_obj, :looptrace => w_loopdetect[w_loopdetect.index(l)..-1], :loopdetect => w_loopdetect)
|
1198
|
+
if f_obj and f_obj != w_obj # should avoid infinite loops
|
1199
|
+
f_loopdetect = w_loopdetect[0...w_loopdetect.index(l)]
|
1200
|
+
end
|
1201
|
+
else
|
1202
|
+
f_obj = yield(:up, w_obj, :from => w_addr, :to => f_addr, :sfret => f_type, :loopdetect => w_loopdetect, :real_to => o_f_addr)
|
1203
|
+
end
|
1204
|
+
next if f_obj == false
|
1205
|
+
f_obj ||= w_obj
|
1206
|
+
f_loopdetect ||= w_loopdetect
|
1207
|
+
# only count non-trivial paths in loopdetect (ignore linear links)
|
1208
|
+
add_detect = [[f_obj, f_addr, f_type]]
|
1209
|
+
add_detect = [] if @decoded[f_addr].kind_of? DecodedInstruction and tmp = @decoded[f_addr].block and
|
1210
|
+
((w_di.block.from_subfuncret.to_a == [] and w_di.block.from_normal == [f_addr] and
|
1211
|
+
tmp.to_normal == [w_di.address] and tmp.to_subfuncret.to_a == []) or
|
1212
|
+
(w_di.block.from_subfuncret == [f_addr] and tmp.to_subfuncret == [w_di.address]))
|
1213
|
+
todo << [f_obj, f_addr, f_type, f_loopdetect + add_detect ]
|
1214
|
+
}
|
1215
|
+
yield :end, w_obj, :addr => w_addr, :loopdetect => w_loopdetect if not hadsomething
|
1216
|
+
elsif @function[w_addr] and w_addr != :default and w_addr != Expression::Unknown
|
1217
|
+
next if done.include? [w_obj, w_addr]
|
1218
|
+
oldlen = todo.length
|
1219
|
+
each_xref(w_addr, :x) { |x|
|
1220
|
+
f_addr = x.origin
|
1221
|
+
o_f_addr = f_addr
|
1222
|
+
f_addr = @decoded[f_addr].block.list.last.address if @decoded[f_addr].kind_of? DecodedInstruction # delay slot
|
1223
|
+
if l = w_loopdetect.find { |l_obj, l_addr, l_type| l_addr == w_addr }
|
1224
|
+
f_obj = yield(:loop, w_obj, :looptrace => w_loopdetect[w_loopdetect.index(l)..-1], :loopdetect => w_loopdetect)
|
1225
|
+
if f_obj and f_obj != w_obj
|
1226
|
+
f_loopdetect = w_loopdetect[0...w_loopdetect.index(l)]
|
1227
|
+
end
|
1228
|
+
else
|
1229
|
+
f_obj = yield(:up, w_obj, :from => w_addr, :to => f_addr, :sfret => :normal, :loopdetect => w_loopdetect, :real_to => o_f_addr)
|
1230
|
+
end
|
1231
|
+
next if f_obj == false
|
1232
|
+
f_obj ||= w_obj
|
1233
|
+
f_loopdetect ||= w_loopdetect
|
1234
|
+
todo << [f_obj, f_addr, :normal, f_loopdetect + [[f_obj, f_addr, :normal]] ]
|
1235
|
+
}
|
1236
|
+
yield :end, w_obj, :addr => w_addr, :loopdetect => w_loopdetect if todo.length == oldlen
|
1237
|
+
else
|
1238
|
+
yield :unknown_addr, w_obj, :addr => w_addr, :loopdetect => w_loopdetect
|
1239
|
+
end
|
1240
|
+
}
|
1241
|
+
|
1242
|
+
if include_start
|
1243
|
+
todo << [obj, start_addr, from_subfuncret ? :subfuncret : :normal, []]
|
1244
|
+
else
|
1245
|
+
walk_up[obj, start_addr, []]
|
1246
|
+
end
|
1247
|
+
|
1248
|
+
while not todo.empty?
|
1249
|
+
obj, addr, type, loopdetect = todo.pop
|
1250
|
+
di = @decoded[addr]
|
1251
|
+
if di and type == :subfuncret
|
1252
|
+
di.block.each_to_normal { |sf|
|
1253
|
+
next if not f = @function[normalize(sf)]
|
1254
|
+
s_obj = yield(:func, obj, :func => f, :funcaddr => sf, :addr => addr, :loopdetect => loopdetect)
|
1255
|
+
next if s_obj == false
|
1256
|
+
s_obj ||= obj
|
1257
|
+
if l = loopdetect.find { |l_obj, l_addr, l_type| addr == l_addr and l_type == :normal }
|
1258
|
+
l_obj = yield(:loop, s_obj, :looptrace => loopdetect[loopdetect.index(l)..-1], :loopdetect => loopdetect)
|
1259
|
+
if l_obj and l_obj != s_obj
|
1260
|
+
s_loopdetect = loopdetect[0...loopdetect.index(l)]
|
1261
|
+
end
|
1262
|
+
next if l_obj == false
|
1263
|
+
s_obj = l_obj if l_obj
|
1264
|
+
end
|
1265
|
+
s_loopdetect ||= loopdetect
|
1266
|
+
todo << [s_obj, addr, :normal, s_loopdetect + [[s_obj, addr, :normal]] ]
|
1267
|
+
}
|
1268
|
+
elsif di
|
1269
|
+
# XXX should interpolate index if di is not in block.list, but what if the addresses are not Comparable ?
|
1270
|
+
di.block.list[0..(di.block.list.index(di) || -1)].reverse_each { |di_|
|
1271
|
+
di = di_ # XXX not sure..
|
1272
|
+
if stopaddr and ea = di.next_addr and stopaddr.include?(ea)
|
1273
|
+
yield :stopaddr, obj, :addr => ea, :loopdetect => loopdetect
|
1274
|
+
break
|
1275
|
+
end
|
1276
|
+
ex_obj = obj
|
1277
|
+
obj = yield(:di, obj, :di => di, :loopdetect => loopdetect)
|
1278
|
+
break if obj == false
|
1279
|
+
obj ||= ex_obj
|
1280
|
+
}
|
1281
|
+
walk_up[obj, di.block.address, loopdetect] if obj
|
1282
|
+
elsif @function[addr] and addr != :default and addr != Expression::Unknown
|
1283
|
+
ex_obj = obj
|
1284
|
+
obj = yield(:func, obj, :func => @function[addr], :funcaddr => addr, :addr => addr, :loopdetect => loopdetect)
|
1285
|
+
next if obj == false
|
1286
|
+
obj ||= ex_obj
|
1287
|
+
walk_up[obj, addr, loopdetect]
|
1288
|
+
else
|
1289
|
+
yield :unknown_addr, obj, :addr => addr, :loopdetect => loopdetect
|
1290
|
+
end
|
1291
|
+
end
|
1292
|
+
end
|
1293
|
+
|
1294
|
+
# holds a backtrace result until a snapshot_addr is encountered
|
1295
|
+
class StoppedExpr
|
1296
|
+
attr_accessor :exprs
|
1297
|
+
def initialize(e) @exprs = e end
|
1298
|
+
end
|
1299
|
+
|
1300
|
+
|
1301
|
+
attr_accessor :debug_backtrace
|
1302
|
+
|
1303
|
+
# backtraces the value of an expression from start_addr
|
1304
|
+
# updates blocks backtracked_for if type is set
|
1305
|
+
# uses backtrace_walk
|
1306
|
+
# all values returned are from backtrace_check_found (which may generate xrefs, labels, addrs to dasm) unless :no_check is specified
|
1307
|
+
# options:
|
1308
|
+
# :include_start => start backtracking including start_addr
|
1309
|
+
# :from_subfuncret =>
|
1310
|
+
# :origin => origin to set for xrefs when resolution is successful
|
1311
|
+
# :orig_expr => initial expression
|
1312
|
+
# :type => xref type (:r, :w, :x, :addr) when :x, the results are added to #addrs_todo
|
1313
|
+
# :len => xref len (for :r/:w)
|
1314
|
+
# :snapshot_addr => addr (or array of) where the backtracker should stop
|
1315
|
+
# if a snapshot_addr is given, values found are ignored if continuing the backtrace does not get to it (eg maxdepth/unk_addr/end)
|
1316
|
+
# :maxdepth => maximum number of blocks to backtrace
|
1317
|
+
# :detached => true if backtracking type :x and the result should not have from = origin set in @addrs_todo
|
1318
|
+
# :max_complexity{_data} => maximum complexity of the expression before aborting its backtrace
|
1319
|
+
# :log => Array, will be updated with the backtrace evolution
|
1320
|
+
# :only_upto => backtrace only to update bt_for for current block & previous ending at only_upto
|
1321
|
+
# :no_check => don't use backtrace_check_found (will not backtrace indirection static values)
|
1322
|
+
# :terminals => array of symbols with constant value (stop backtracking if all symbols in the expr are terminals) (only supported with no_check)
|
1323
|
+
def backtrace(expr, start_addr, nargs={})
|
1324
|
+
include_start = nargs.delete :include_start
|
1325
|
+
from_subfuncret = nargs.delete :from_subfuncret
|
1326
|
+
origin = nargs.delete :origin
|
1327
|
+
origexpr = nargs.delete :orig_expr
|
1328
|
+
type = nargs.delete :type
|
1329
|
+
len = nargs.delete :len
|
1330
|
+
snapshot_addr = nargs.delete(:snapshot_addr) || nargs.delete(:stopaddr)
|
1331
|
+
maxdepth = nargs.delete(:maxdepth) || @backtrace_maxblocks
|
1332
|
+
detached = nargs.delete :detached
|
1333
|
+
max_complexity = nargs.delete(:max_complexity) || @backtrace_maxcomplexity
|
1334
|
+
max_complexity_data = nargs.delete(:max_complexity) || @backtrace_maxcomplexity_data
|
1335
|
+
bt_log = nargs.delete :log # array to receive the ongoing backtrace info
|
1336
|
+
only_upto = nargs.delete :only_upto
|
1337
|
+
no_check = nargs.delete :no_check
|
1338
|
+
terminals = nargs.delete(:terminals) || []
|
1339
|
+
raise ArgumentError, "invalid argument to backtrace #{nargs.keys.inspect}" if not nargs.empty?
|
1340
|
+
|
1341
|
+
expr = Expression[expr]
|
1342
|
+
|
1343
|
+
origexpr = expr if origin == start_addr
|
1344
|
+
|
1345
|
+
start_addr = normalize(start_addr)
|
1346
|
+
di = @decoded[start_addr]
|
1347
|
+
|
1348
|
+
if not snapshot_addr and @cpu.backtrace_is_stack_address(expr)
|
1349
|
+
puts " not backtracking stack address #{expr}" if debug_backtrace
|
1350
|
+
return []
|
1351
|
+
end
|
1352
|
+
|
1353
|
+
if type == :r or type == :w
|
1354
|
+
max_complexity = max_complexity_data
|
1355
|
+
maxdepth = @backtrace_maxblocks_data if backtrace_maxblocks_data and maxdepth > @backtrace_maxblocks_data
|
1356
|
+
end
|
1357
|
+
|
1358
|
+
if vals = (no_check ? (!need_backtrace(expr, terminals) and [expr]) : backtrace_check_found(expr,
|
1359
|
+
di, origin, type, len, maxdepth, detached))
|
1360
|
+
# no need to update backtracked_for
|
1361
|
+
return vals
|
1362
|
+
elsif maxdepth <= 0
|
1363
|
+
return [Expression::Unknown]
|
1364
|
+
end
|
1365
|
+
|
1366
|
+
# create initial backtracked_for
|
1367
|
+
if type and origin == start_addr and di
|
1368
|
+
btt = BacktraceTrace.new(expr, origin, origexpr, type, len, maxdepth-1)
|
1369
|
+
btt.address = di.address
|
1370
|
+
btt.exclude_instr = true if not include_start
|
1371
|
+
btt.from_subfuncret = true if from_subfuncret and include_start
|
1372
|
+
btt.detached = true if detached
|
1373
|
+
di.block.backtracked_for |= [btt]
|
1374
|
+
end
|
1375
|
+
|
1376
|
+
@callback_prebacktrace[] if callback_prebacktrace
|
1377
|
+
|
1378
|
+
# list of Expression/Integer
|
1379
|
+
result = []
|
1380
|
+
|
1381
|
+
puts "backtracking #{type} #{expr} from #{di || Expression[start_addr || 0]} for #{@decoded[origin]}" if debug_backtrace or $DEBUG
|
1382
|
+
bt_log << [:start, expr, start_addr] if bt_log
|
1383
|
+
backtrace_walk(expr, start_addr, include_start, from_subfuncret, snapshot_addr, maxdepth) { |ev, expr_, h|
|
1384
|
+
expr = expr_
|
1385
|
+
case ev
|
1386
|
+
when :unknown_addr, :maxdepth
|
1387
|
+
puts " backtrace end #{ev} #{expr}" if debug_backtrace
|
1388
|
+
result |= [expr] if not snapshot_addr
|
1389
|
+
@addrs_todo << [expr, (detached ? nil : origin)] if not snapshot_addr and type == :x and origin
|
1390
|
+
when :end
|
1391
|
+
if not expr.kind_of? StoppedExpr
|
1392
|
+
oldexpr = expr
|
1393
|
+
expr = backtrace_emu_blockup(h[:addr], expr)
|
1394
|
+
puts " backtrace up #{Expression[h[:addr]]} #{oldexpr}#{" => #{expr}" if expr != oldexpr}" if debug_backtrace
|
1395
|
+
bt_log << [:up, expr, oldexpr, h[:addr], :end] if bt_log and expr != oldexpr
|
1396
|
+
if expr != oldexpr and not snapshot_addr and vals = (no_check ?
|
1397
|
+
(!need_backtrace(expr, terminals) and [expr]) :
|
1398
|
+
backtrace_check_found(expr, nil, origin, type, len,
|
1399
|
+
maxdepth-h[:loopdetect].length, detached))
|
1400
|
+
result |= vals
|
1401
|
+
next
|
1402
|
+
end
|
1403
|
+
end
|
1404
|
+
puts " backtrace end #{ev} #{expr}" if debug_backtrace
|
1405
|
+
if not snapshot_addr
|
1406
|
+
result |= [expr]
|
1407
|
+
|
1408
|
+
btt = BacktraceTrace.new(expr, origin, origexpr, type, len, maxdepth-h[:loopdetect].length-1)
|
1409
|
+
btt.detached = true if detached
|
1410
|
+
@decoded[h[:addr]].block.backtracked_for |= [btt] if @decoded[h[:addr]]
|
1411
|
+
@function[h[:addr]].backtracked_for |= [btt] if @function[h[:addr]] and h[:addr] != :default
|
1412
|
+
@addrs_todo << [expr, (detached ? nil : origin)] if type == :x and origin
|
1413
|
+
end
|
1414
|
+
when :stopaddr
|
1415
|
+
if not expr.kind_of? StoppedExpr
|
1416
|
+
oldexpr = expr
|
1417
|
+
expr = backtrace_emu_blockup(h[:addr], expr)
|
1418
|
+
puts " backtrace up #{Expression[h[:addr]]} #{oldexpr}#{" => #{expr}" if expr != oldexpr}" if debug_backtrace
|
1419
|
+
bt_log << [:up, expr, oldexpr, h[:addr], :end] if bt_log and expr != oldexpr
|
1420
|
+
end
|
1421
|
+
puts " backtrace end #{ev} #{expr}" if debug_backtrace
|
1422
|
+
result |= ((expr.kind_of?(StoppedExpr)) ? expr.exprs : [expr])
|
1423
|
+
when :loop
|
1424
|
+
next false if expr.kind_of? StoppedExpr
|
1425
|
+
t = h[:looptrace]
|
1426
|
+
oldexpr = t[0][0]
|
1427
|
+
next false if expr == oldexpr # unmodifying loop
|
1428
|
+
puts " bt loop at #{Expression[t[0][1]]}: #{oldexpr} => #{expr} (#{t.map { |z| Expression[z[1]] }.join(' <- ')})" if debug_backtrace
|
1429
|
+
false
|
1430
|
+
when :up
|
1431
|
+
next false if only_upto and h[:to] != only_upto
|
1432
|
+
next expr if expr.kind_of? StoppedExpr
|
1433
|
+
oldexpr = expr
|
1434
|
+
expr = backtrace_emu_blockup(h[:from], expr)
|
1435
|
+
puts " backtrace up #{Expression[h[:from]]}->#{Expression[h[:to]]} #{oldexpr}#{" => #{expr}" if expr != oldexpr}" if debug_backtrace
|
1436
|
+
bt_log << [:up, expr, oldexpr, h[:from], h[:to]] if bt_log
|
1437
|
+
|
1438
|
+
if expr != oldexpr and vals = (no_check ? (!need_backtrace(expr, terminals) and [expr]) :
|
1439
|
+
backtrace_check_found(expr, @decoded[h[:from]], origin, type, len,
|
1440
|
+
maxdepth-h[:loopdetect].length, detached))
|
1441
|
+
if snapshot_addr
|
1442
|
+
expr = StoppedExpr.new vals
|
1443
|
+
next expr
|
1444
|
+
else
|
1445
|
+
result |= vals
|
1446
|
+
bt_log << [:found, vals, h[:from]] if bt_log
|
1447
|
+
next false
|
1448
|
+
end
|
1449
|
+
end
|
1450
|
+
|
1451
|
+
if origin and type
|
1452
|
+
# update backtracked_for
|
1453
|
+
update_btf = lambda { |btf, new_btt|
|
1454
|
+
# returns true if btf was modified
|
1455
|
+
if i = btf.index(new_btt)
|
1456
|
+
btf[i] = new_btt if btf[i].maxdepth < new_btt.maxdepth
|
1457
|
+
else
|
1458
|
+
btf << new_btt
|
1459
|
+
end
|
1460
|
+
}
|
1461
|
+
|
1462
|
+
btt = BacktraceTrace.new(expr, origin, origexpr, type, len, maxdepth-h[:loopdetect].length-1)
|
1463
|
+
btt.detached = true if detached
|
1464
|
+
if x = di_at(h[:from])
|
1465
|
+
update_btf[x.block.backtracked_for, btt]
|
1466
|
+
end
|
1467
|
+
if x = @function[h[:from]] and h[:from] != :default
|
1468
|
+
update_btf[x.backtracked_for, btt]
|
1469
|
+
end
|
1470
|
+
if x = di_at(h[:to])
|
1471
|
+
btt = btt.dup
|
1472
|
+
btt.address = x.address
|
1473
|
+
btt.from_subfuncret = true if h[:sfret] == :subfuncret
|
1474
|
+
if backtrace_check_funcret(btt, h[:from], h[:real_to] || h[:to])
|
1475
|
+
puts " function returns to caller" if debug_backtrace
|
1476
|
+
next false
|
1477
|
+
end
|
1478
|
+
if not update_btf[x.block.backtracked_for, btt]
|
1479
|
+
puts " already backtraced" if debug_backtrace
|
1480
|
+
next false
|
1481
|
+
end
|
1482
|
+
end
|
1483
|
+
end
|
1484
|
+
expr
|
1485
|
+
when :di, :func
|
1486
|
+
next if expr.kind_of? StoppedExpr
|
1487
|
+
if not snapshot_addr and @cpu.backtrace_is_stack_address(expr)
|
1488
|
+
puts " not backtracking stack address #{expr}" if debug_backtrace
|
1489
|
+
next false
|
1490
|
+
end
|
1491
|
+
|
1492
|
+
oldexpr = expr
|
1493
|
+
case ev
|
1494
|
+
when :di
|
1495
|
+
h[:addr] = h[:di].address
|
1496
|
+
expr = backtrace_emu_instr(h[:di], expr)
|
1497
|
+
bt_log << [ev, expr, oldexpr, h[:di], h[:addr]] if bt_log and expr != oldexpr
|
1498
|
+
when :func
|
1499
|
+
expr = backtrace_emu_subfunc(h[:func], h[:funcaddr], h[:addr], expr, origin, maxdepth-h[:loopdetect].length)
|
1500
|
+
if snapshot_addr and snapshot_addr == h[:funcaddr]
|
1501
|
+
# XXX recursiveness detection needs to be fixed
|
1502
|
+
puts " backtrace: recursive function #{Expression[h[:funcaddr]]}" if debug_backtrace
|
1503
|
+
next false
|
1504
|
+
end
|
1505
|
+
bt_log << [ev, expr, oldexpr, h[:funcaddr], h[:addr]] if bt_log and expr != oldexpr
|
1506
|
+
end
|
1507
|
+
puts " backtrace #{h[:di] || Expression[h[:funcaddr]]} #{oldexpr} => #{expr}" if debug_backtrace and expr != oldexpr
|
1508
|
+
if vals = (no_check ? (!need_backtrace(expr, terminals) and [expr]) : backtrace_check_found(expr,
|
1509
|
+
h[:di], origin, type, len, maxdepth-h[:loopdetect].length, detached))
|
1510
|
+
if snapshot_addr
|
1511
|
+
expr = StoppedExpr.new vals
|
1512
|
+
else
|
1513
|
+
result |= vals
|
1514
|
+
bt_log << [:found, vals, h[:addr]] if bt_log
|
1515
|
+
next false
|
1516
|
+
end
|
1517
|
+
elsif expr.complexity > max_complexity
|
1518
|
+
puts " backtrace aborting, expr too complex" if debug_backtrace
|
1519
|
+
next false
|
1520
|
+
end
|
1521
|
+
expr
|
1522
|
+
else raise ev.inspect
|
1523
|
+
end
|
1524
|
+
}
|
1525
|
+
|
1526
|
+
puts ' backtrace result: ' + result.map { |r| Expression[r] }.join(', ') if debug_backtrace
|
1527
|
+
|
1528
|
+
result
|
1529
|
+
end
|
1530
|
+
|
1531
|
+
# checks if the BacktraceTrace is a call to a known subfunction
|
1532
|
+
# returns true and updates self.addrs_todo
|
1533
|
+
def backtrace_check_funcret(btt, funcaddr, instraddr)
|
1534
|
+
if di = @decoded[instraddr] and @function[funcaddr] and btt.type == :x and
|
1535
|
+
not btt.from_subfuncret and
|
1536
|
+
@cpu.backtrace_is_function_return(btt.expr, @decoded[btt.origin]) and
|
1537
|
+
retaddr = backtrace_emu_instr(di, btt.expr) and
|
1538
|
+
not need_backtrace(retaddr)
|
1539
|
+
puts " backtrace addrs_todo << #{Expression[retaddr]} from #{di} (funcret)" if debug_backtrace
|
1540
|
+
di.block.add_to_subfuncret normalize(retaddr)
|
1541
|
+
if @decoded[funcaddr].kind_of? DecodedInstruction
|
1542
|
+
# check that all callers :saveip returns (eg recursive call that was resolved
|
1543
|
+
# before we found funcaddr was a function)
|
1544
|
+
@decoded[funcaddr].block.each_from_normal { |fm|
|
1545
|
+
if fdi = di_at(fm) and fdi.opcode.props[:saveip] and not fdi.block.to_subfuncret
|
1546
|
+
backtrace_check_funcret(btt, funcaddr, fm)
|
1547
|
+
end
|
1548
|
+
}
|
1549
|
+
end
|
1550
|
+
if not @function[funcaddr].finalized
|
1551
|
+
# the function is not fully disassembled: arrange for the retaddr to be
|
1552
|
+
# disassembled only after the subfunction is finished
|
1553
|
+
# for that we walk the code from the call, mark each block start, and insert the sfret
|
1554
|
+
# just before the 1st function block address in @addrs_todo (which is pop()ed by dasm_step)
|
1555
|
+
faddrlist = []
|
1556
|
+
todo = []
|
1557
|
+
di.block.each_to_normal { |t| todo << normalize(t) }
|
1558
|
+
while a = todo.pop
|
1559
|
+
next if faddrlist.include? a or not get_section_at(a)
|
1560
|
+
faddrlist << a
|
1561
|
+
if @decoded[a].kind_of? DecodedInstruction
|
1562
|
+
@decoded[a].block.each_to_samefunc(self) { |t| todo << normalize(t) }
|
1563
|
+
end
|
1564
|
+
end
|
1565
|
+
|
1566
|
+
idx = @addrs_todo.index(@addrs_todo.find { |r, i, sfr| faddrlist.include? normalize(r) }) || -1
|
1567
|
+
@addrs_todo.insert(idx, [retaddr, instraddr, true])
|
1568
|
+
else
|
1569
|
+
@addrs_todo << [retaddr, instraddr, true]
|
1570
|
+
end
|
1571
|
+
true
|
1572
|
+
end
|
1573
|
+
end
|
1574
|
+
|
1575
|
+
# applies one decodedinstruction to an expression
|
1576
|
+
def backtrace_emu_instr(di, expr)
|
1577
|
+
@cpu.backtrace_emu(di, expr)
|
1578
|
+
end
|
1579
|
+
|
1580
|
+
# applies one subfunction to an expression
|
1581
|
+
def backtrace_emu_subfunc(func, funcaddr, calladdr, expr, origin, maxdepth)
|
1582
|
+
bind = func.get_backtrace_binding(self, funcaddr, calladdr, expr, origin, maxdepth)
|
1583
|
+
Expression[expr.bind(bind).reduce]
|
1584
|
+
end
|
1585
|
+
|
1586
|
+
# applies a location binding
|
1587
|
+
def backtrace_emu_blockup(addr, expr)
|
1588
|
+
(ab = @address_binding[addr]) ? Expression[expr.bind(ab).reduce] : expr
|
1589
|
+
end
|
1590
|
+
|
1591
|
+
# static resolution of indirections
|
1592
|
+
def resolve(expr)
|
1593
|
+
binding = Expression[expr].expr_indirections.inject(@old_prog_binding) { |binding_, ind|
|
1594
|
+
e, b = get_section_at(resolve(ind.target))
|
1595
|
+
return expr if not e
|
1596
|
+
binding_.merge ind => Expression[ e.decode_imm("u#{8*ind.len}".to_sym, @cpu.endianness) ]
|
1597
|
+
}
|
1598
|
+
Expression[expr].bind(binding).reduce
|
1599
|
+
end
|
1600
|
+
|
1601
|
+
# returns true if the expression needs more backtrace
|
1602
|
+
# it checks for the presence of a symbol (not :unknown), which means it depends on some register value
|
1603
|
+
def need_backtrace(expr, terminals=[])
|
1604
|
+
return if expr.kind_of? ::Integer
|
1605
|
+
!(expr.externals.grep(::Symbol) - [:unknown] - terminals).empty?
|
1606
|
+
end
|
1607
|
+
|
1608
|
+
# returns an array of expressions, or nil if expr needs more backtrace
|
1609
|
+
# it needs more backtrace if expr.externals include a Symbol != :unknown (symbol == register value)
|
1610
|
+
# if it need no more backtrace, expr's indirections are recursively resolved
|
1611
|
+
# xrefs are created, and di args are updated (immediate => label)
|
1612
|
+
# if type is :x, addrs_todo is updated, and if di starts a block, expr is checked to see if it may be a subfunction return value
|
1613
|
+
#
|
1614
|
+
# expr indirection are solved by first finding the value of the pointer, and then rebacktracking for write-type access
|
1615
|
+
# detached is true if type is :x and from should not be set in addrs_todo (indirect call flow, eg external function callback)
|
1616
|
+
# if the backtrace ends pre entrypoint, returns the value encoded in the raw binary
|
1617
|
+
# XXX global variable (modified by another function), exported data, multithreaded app..
|
1618
|
+
# TODO handle memory aliasing (mov ebx, eax ; write [ebx] ; read [eax])
|
1619
|
+
# TODO trace expr evolution through backtrace, to modify immediates to an expr involving label names
|
1620
|
+
# TODO mov [ptr], imm ; <...> ; jmp [ptr] => rename imm as loc_XX
|
1621
|
+
# eg. mov eax, 42 ; add eax, 4 ; jmp eax => mov eax, some_label-4
|
1622
|
+
def backtrace_check_found(expr, di, origin, type, len, maxdepth, detached)
|
1623
|
+
# only entrypoints or block starts called by a :saveip are checked for being a function
|
1624
|
+
# want to execute [esp] from a block start
|
1625
|
+
if type == :x and di and di == di.block.list.first and @cpu.backtrace_is_function_return(expr, @decoded[origin]) and (
|
1626
|
+
# which is an entrypoint..
|
1627
|
+
(not di.block.from_normal and not di.block.from_subfuncret) or
|
1628
|
+
# ..or called from a saveip
|
1629
|
+
(bool = false ; di.block.each_from_normal { |fn| bool = true if @decoded[fn] and @decoded[fn].opcode.props[:saveip] } ; bool))
|
1630
|
+
|
1631
|
+
# now we can mark the current address a function start
|
1632
|
+
# the actual return address will be found later (we tell the caller to continue the backtrace)
|
1633
|
+
addr = di.address
|
1634
|
+
l = auto_label_at(addr, 'sub', 'loc', 'xref')
|
1635
|
+
if not f = @function[addr]
|
1636
|
+
f = @function[addr] = DecodedFunction.new
|
1637
|
+
puts "found new function #{l} at #{Expression[addr]}" if $VERBOSE
|
1638
|
+
end
|
1639
|
+
f.finalized = false
|
1640
|
+
|
1641
|
+
if @decoded[origin]
|
1642
|
+
f.return_address ||= []
|
1643
|
+
f.return_address |= [origin]
|
1644
|
+
@decoded[origin].add_comment "endsub #{l}"
|
1645
|
+
# TODO add_xref (to update the comment on rename_label)
|
1646
|
+
end
|
1647
|
+
|
1648
|
+
f.backtracked_for |= @decoded[addr].block.backtracked_for.find_all { |btt| not btt.address }
|
1649
|
+
end
|
1650
|
+
|
1651
|
+
return if need_backtrace(expr)
|
1652
|
+
|
1653
|
+
puts "backtrace #{type} found #{expr} from #{di} orig #{@decoded[origin] || Expression[origin] if origin}" if debug_backtrace
|
1654
|
+
result = backtrace_value(expr, maxdepth)
|
1655
|
+
# keep the ori pointer in the results to emulate volatile memory (eg decompiler prefers this)
|
1656
|
+
result << expr if not type
|
1657
|
+
result.uniq!
|
1658
|
+
|
1659
|
+
# create xrefs/labels
|
1660
|
+
result.each { |e|
|
1661
|
+
backtrace_found_result(e, di, type, origin, len, detached)
|
1662
|
+
} if type and origin
|
1663
|
+
|
1664
|
+
result
|
1665
|
+
end
|
1666
|
+
|
1667
|
+
# returns an array of expressions with Indirections resolved (recursive with backtrace_indirection)
|
1668
|
+
def backtrace_value(expr, maxdepth)
|
1669
|
+
# array of expression with all indirections resolved
|
1670
|
+
result = [Expression[expr.reduce]]
|
1671
|
+
|
1672
|
+
# solve each indirection sequentially, clone expr for each value (aka cross-product)
|
1673
|
+
result.first.expr_indirections.uniq.each { |i|
|
1674
|
+
next_result = []
|
1675
|
+
backtrace_indirection(i, maxdepth).each { |rr|
|
1676
|
+
next_result |= result.map { |e| Expression[e.bind(i => rr).reduce] }
|
1677
|
+
}
|
1678
|
+
result = next_result
|
1679
|
+
}
|
1680
|
+
|
1681
|
+
result.uniq
|
1682
|
+
end
|
1683
|
+
|
1684
|
+
# returns the array of values pointed by the indirection at its invocation (ind.origin)
|
1685
|
+
# first resolves the pointer using backtrace_value, if it does not point in edata keep the original pointer
|
1686
|
+
# then backtraces from ind.origin until it finds an :w xref origin
|
1687
|
+
# if no :w access is found, returns the value encoded in the raw section data
|
1688
|
+
# TODO handle unaligned (partial?) writes
|
1689
|
+
def backtrace_indirection(ind, maxdepth)
|
1690
|
+
if not ind.origin
|
1691
|
+
puts "backtrace_ind: no origin for #{ind}" if $VERBOSE
|
1692
|
+
return [ind]
|
1693
|
+
end
|
1694
|
+
|
1695
|
+
ret = []
|
1696
|
+
|
1697
|
+
decode_imm = lambda { |addr, len|
|
1698
|
+
edata, foo = get_section_at(addr)
|
1699
|
+
if edata
|
1700
|
+
Expression[ edata.decode_imm("u#{8*len}".to_sym, @cpu.endianness) ]
|
1701
|
+
else
|
1702
|
+
Expression::Unknown
|
1703
|
+
end
|
1704
|
+
}
|
1705
|
+
|
1706
|
+
# resolve pointers (they may include Indirections)
|
1707
|
+
backtrace_value(ind.target, maxdepth).each { |ptr|
|
1708
|
+
# find write xrefs to the ptr
|
1709
|
+
refs = []
|
1710
|
+
each_xref(ptr, :w) { |x|
|
1711
|
+
# XXX should be rebacktracked on new xref
|
1712
|
+
next if not @decoded[x.origin]
|
1713
|
+
refs |= [x.origin]
|
1714
|
+
} if ptr != Expression::Unknown
|
1715
|
+
|
1716
|
+
if refs.empty?
|
1717
|
+
if get_section_at(ptr)
|
1718
|
+
# static data, newer written : return encoded value
|
1719
|
+
ret |= [decode_imm[ptr, ind.len]]
|
1720
|
+
next
|
1721
|
+
else
|
1722
|
+
# unknown pointer : backtrace the indirection, hope it solves itself
|
1723
|
+
initval = ind
|
1724
|
+
end
|
1725
|
+
else
|
1726
|
+
# wait until we find a write xref, then backtrace the written value
|
1727
|
+
initval = true
|
1728
|
+
end
|
1729
|
+
|
1730
|
+
# wait until we arrive at an xref'ing instruction, then backtrace the written value
|
1731
|
+
backtrace_walk(initval, ind.origin, true, false, nil, maxdepth-1) { |ev, expr, h|
|
1732
|
+
case ev
|
1733
|
+
when :unknown_addr, :maxdepth, :stopaddr
|
1734
|
+
puts " backtrace_indirection for #{ind.target} failed: #{ev}" if debug_backtrace
|
1735
|
+
ret |= [Expression::Unknown]
|
1736
|
+
when :end
|
1737
|
+
if not refs.empty? and (expr == true or not need_backtrace(expr))
|
1738
|
+
if expr == true
|
1739
|
+
# found a path avoiding the :w xrefs, read the encoded initial value
|
1740
|
+
ret |= [decode_imm[ptr, ind.len]]
|
1741
|
+
else
|
1742
|
+
bd = expr.expr_indirections.inject({}) { |h_, i| h_.update i => decode_imm[i.target, i.len] }
|
1743
|
+
ret |= [Expression[expr.bind(bd).reduce]]
|
1744
|
+
end
|
1745
|
+
else
|
1746
|
+
# unknown pointer, backtrace did not resolve...
|
1747
|
+
ret |= [Expression::Unknown]
|
1748
|
+
end
|
1749
|
+
when :di
|
1750
|
+
di = h[:di]
|
1751
|
+
if expr == true
|
1752
|
+
next true if not refs.include? di.address
|
1753
|
+
# find the expression to backtrace: assume this is the :w xref from this di
|
1754
|
+
writes = get_xrefs_rw(di)
|
1755
|
+
writes = writes.find_all { |x_type, x_ptr, x_len| x_type == :w and x_len == ind.len }
|
1756
|
+
if writes.length != 1
|
1757
|
+
puts "backtrace_ind: incompatible xrefs to #{ptr} from #{di}" if $DEBUG
|
1758
|
+
ret |= [Expression::Unknown]
|
1759
|
+
next false
|
1760
|
+
end
|
1761
|
+
expr = Indirection.new(writes[0][1], ind.len, di.address)
|
1762
|
+
end
|
1763
|
+
expr = backtrace_emu_instr(di, expr)
|
1764
|
+
# may have new indirections... recall bt_value ?
|
1765
|
+
#if not need_backtrace(expr)
|
1766
|
+
if expr.expr_externals.all? { |e| @prog_binding[e] or @function[normalize(e)] } and expr.expr_indirections.empty?
|
1767
|
+
ret |= backtrace_value(expr, maxdepth-1-h[:loopdetect].length)
|
1768
|
+
false
|
1769
|
+
else
|
1770
|
+
expr
|
1771
|
+
end
|
1772
|
+
when :func
|
1773
|
+
next true if expr == true # XXX
|
1774
|
+
expr = backtrace_emu_subfunc(h[:func], h[:funcaddr], h[:addr], expr, ind.origin, maxdepth-h[:loopdetect].length)
|
1775
|
+
#if not need_backtrace(expr)
|
1776
|
+
if expr.expr_externals.all? { |e| @prog_binding[e] or @function[normalize(e)] } and expr.expr_indirections.empty?
|
1777
|
+
ret |= backtrace_value(expr, maxdepth-1-h[:loopdetect].length)
|
1778
|
+
false
|
1779
|
+
else
|
1780
|
+
expr
|
1781
|
+
end
|
1782
|
+
end
|
1783
|
+
}
|
1784
|
+
}
|
1785
|
+
|
1786
|
+
ret
|
1787
|
+
end
|
1788
|
+
|
1789
|
+
# creates xrefs, updates addrs_todo, updates instr args
|
1790
|
+
def backtrace_found_result(expr, di, type, origin, len, detached)
|
1791
|
+
n = normalize(expr)
|
1792
|
+
fallthrough = true if type == :x and o = di_at(origin) and not o.opcode.props[:stopexec] and n == o.block.list.last.next_addr # delay_slot
|
1793
|
+
add_xref(n, Xref.new(type, origin, len)) if origin != :default and origin != Expression::Unknown and not fallthrough
|
1794
|
+
unk = true if n == Expression::Unknown
|
1795
|
+
|
1796
|
+
add_xref(n, Xref.new(:addr, di.address)) if di and di.address != origin and not unk
|
1797
|
+
base = { nil => 'loc', 1 => 'byte', 2 => 'word', 4 => 'dword', 8 => 'qword' }[len] || 'xref'
|
1798
|
+
base = 'sub' if @function[n]
|
1799
|
+
n = Expression[auto_label_at(n, base, 'xref') || n] if not fallthrough
|
1800
|
+
n = Expression[n]
|
1801
|
+
|
1802
|
+
# update instr args
|
1803
|
+
# TODO trace expression evolution to allow handling of
|
1804
|
+
# mov eax, 28 ; add eax, 4 ; jmp eax
|
1805
|
+
# => mov eax, (loc_xx-4)
|
1806
|
+
if di and not unk # and di.address == origin
|
1807
|
+
@cpu.replace_instr_arg_immediate(di.instruction, expr, n)
|
1808
|
+
end
|
1809
|
+
if @decoded[origin] and not unk
|
1810
|
+
@cpu.backtrace_found_result(self, @decoded[origin], expr, type, len)
|
1811
|
+
end
|
1812
|
+
|
1813
|
+
# add comment
|
1814
|
+
if type and @decoded[origin] # and not @decoded[origin].instruction.args.include? n
|
1815
|
+
@decoded[origin].add_comment "#{type}#{len}:#{n}" if not fallthrough
|
1816
|
+
end
|
1817
|
+
|
1818
|
+
# check if target is a string
|
1819
|
+
if di and type == :r and (len == 1 or len == 2) and s = get_section_at(n)
|
1820
|
+
l = s[0].inv_export[s[0].ptr]
|
1821
|
+
case len
|
1822
|
+
when 1; str = s[0].read(32).unpack('C*')
|
1823
|
+
when 2; str = s[0].read(64).unpack('v*')
|
1824
|
+
end
|
1825
|
+
str = str.inject('') { |str_, c|
|
1826
|
+
case c
|
1827
|
+
when 0x20..0x7e, ?\n, ?\r, ?\t; str_ << c
|
1828
|
+
else break str_
|
1829
|
+
end
|
1830
|
+
}
|
1831
|
+
if str.length >= 4
|
1832
|
+
di.add_comment "#{'L' if len == 2}#{str.inspect}"
|
1833
|
+
str = 'a_' + str.downcase.delete('^a-z0-9')[0, 12]
|
1834
|
+
if str.length >= 8 and l[0, 5] == 'byte_'
|
1835
|
+
rename_label(l, @program.new_label(str))
|
1836
|
+
end
|
1837
|
+
end
|
1838
|
+
end
|
1839
|
+
|
1840
|
+
# XXX all this should be done in backtrace() { <here> }
|
1841
|
+
if type == :x and origin
|
1842
|
+
if detached
|
1843
|
+
o = @decoded[origin] ? origin : di ? di.address : nil # lib function callback have origin == libfuncname, so we must find a block somewhere else
|
1844
|
+
origin = nil
|
1845
|
+
@decoded[o].block.add_to_indirect(normalize(n)) if @decoded[o] and not unk
|
1846
|
+
else
|
1847
|
+
@decoded[origin].block.add_to_normal(normalize(n)) if @decoded[origin] and not unk
|
1848
|
+
end
|
1849
|
+
@addrs_todo << [n, origin]
|
1850
|
+
end
|
1851
|
+
end
|
1852
|
+
|
1853
|
+
def to_s
|
1854
|
+
a = ''
|
1855
|
+
dump { |l| a << l << "\n" }
|
1856
|
+
a
|
1857
|
+
end
|
1858
|
+
|
1859
|
+
# dumps the source, optionnally including data
|
1860
|
+
# yields (defaults puts) each line
|
1861
|
+
def dump(dump_data=true, &b)
|
1862
|
+
b ||= lambda { |l| puts l }
|
1863
|
+
@sections.sort_by { |addr, edata| addr.kind_of?(::Integer) ? addr : 0 }.each { |addr, edata|
|
1864
|
+
addr = Expression[addr] if addr.kind_of? ::String
|
1865
|
+
blockoffs = @decoded.values.grep(DecodedInstruction).map { |di| Expression[di.block.address, :-, addr].reduce if di.block_head? }.grep(::Integer).sort.reject { |o| o < 0 or o >= edata.length }
|
1866
|
+
b[@program.dump_section_header(addr, edata)]
|
1867
|
+
if not dump_data and edata.length > 16*1024 and blockoffs.empty?
|
1868
|
+
b["// [#{edata.length} data bytes]"]
|
1869
|
+
next
|
1870
|
+
end
|
1871
|
+
unk_off = 0 # last off displayed
|
1872
|
+
# blocks.sort_by { |b| b.addr }.each { |b|
|
1873
|
+
while unk_off < edata.length
|
1874
|
+
if unk_off == blockoffs.first
|
1875
|
+
blockoffs.shift
|
1876
|
+
di = @decoded[addr+unk_off]
|
1877
|
+
if unk_off != di.block.edata_ptr
|
1878
|
+
b["\n// ------ overlap (#{unk_off-di.block.edata_ptr}) ------"]
|
1879
|
+
elsif di.block.from_normal.kind_of? ::Array
|
1880
|
+
b["\n"]
|
1881
|
+
end
|
1882
|
+
dump_block(di.block, &b)
|
1883
|
+
unk_off += [di.block.bin_length, 1].max
|
1884
|
+
unk_off = blockoffs.first if blockoffs.first and unk_off > blockoffs.first
|
1885
|
+
else
|
1886
|
+
next_off = blockoffs.first || edata.length
|
1887
|
+
if dump_data or next_off - unk_off < 16
|
1888
|
+
unk_off = dump_data(addr + unk_off, edata, unk_off, &b)
|
1889
|
+
else
|
1890
|
+
b["// [#{next_off - unk_off} data bytes]"]
|
1891
|
+
unk_off = next_off
|
1892
|
+
end
|
1893
|
+
end
|
1894
|
+
end
|
1895
|
+
}
|
1896
|
+
end
|
1897
|
+
|
1898
|
+
# dumps a block of decoded instructions
|
1899
|
+
def dump_block(block, &b)
|
1900
|
+
b ||= lambda { |l| puts l }
|
1901
|
+
block = @decoded[block].block if @decoded[block]
|
1902
|
+
dump_block_header(block, &b)
|
1903
|
+
block.list.each { |di| b[di.show] }
|
1904
|
+
end
|
1905
|
+
|
1906
|
+
# shows the xrefs/labels at block start
|
1907
|
+
def dump_block_header(block, &b)
|
1908
|
+
b ||= lambda { |l| puts l }
|
1909
|
+
xr = []
|
1910
|
+
each_xref(block.address) { |x|
|
1911
|
+
case x.type
|
1912
|
+
when :x; xr << Expression[x.origin]
|
1913
|
+
when :r, :w; xr << "#{x.type}#{x.len}:#{Expression[x.origin]}"
|
1914
|
+
end
|
1915
|
+
}
|
1916
|
+
if not xr.empty?
|
1917
|
+
b["\n// Xrefs: #{xr[0, 8].join(' ')}#{' ...' if xr.length > 8}"]
|
1918
|
+
end
|
1919
|
+
if block.edata.inv_export[block.edata_ptr]
|
1920
|
+
b["\n"] if xr.empty?
|
1921
|
+
label_alias[block.address].each { |name| b["#{name}:"] }
|
1922
|
+
end
|
1923
|
+
if c = @comment[block.address]
|
1924
|
+
c = c.join("\n") if c.kind_of? ::Array
|
1925
|
+
c.each_line { |l| b["// #{l}"] }
|
1926
|
+
end
|
1927
|
+
end
|
1928
|
+
|
1929
|
+
# dumps data/labels, honours @xrefs.len if exists
|
1930
|
+
# dumps one line only
|
1931
|
+
# stops on end of edata/@decoded/@xref
|
1932
|
+
# returns the next offset to display
|
1933
|
+
# TODO array-style data access
|
1934
|
+
def dump_data(addr, edata, off, &b)
|
1935
|
+
b ||= lambda { |l| puts l }
|
1936
|
+
if l = edata.inv_export[off]
|
1937
|
+
l_list = label_alias[addr].to_a.sort
|
1938
|
+
l = l_list.pop || l
|
1939
|
+
l_list.each { |ll|
|
1940
|
+
b["#{ll}:"]
|
1941
|
+
}
|
1942
|
+
l = (l + ' ').ljust(16)
|
1943
|
+
else l = ''
|
1944
|
+
end
|
1945
|
+
elemlen = 1 # size of each element we dump (db by default)
|
1946
|
+
dumplen = -off % 16 # number of octets to dump
|
1947
|
+
dumplen = 16 if dumplen == 0
|
1948
|
+
cmt = []
|
1949
|
+
each_xref(addr) { |x|
|
1950
|
+
dumplen = elemlen = x.len if x.len == 2 or x.len == 4
|
1951
|
+
cmt << " #{x.type}#{x.len}:#{Expression[x.origin]}"
|
1952
|
+
}
|
1953
|
+
cmt = " ; @#{Expression[addr]}" + cmt.sort[0, 6].join
|
1954
|
+
if r = edata.reloc[off]
|
1955
|
+
dumplen = elemlen = r.type.to_s[1..-1].to_i/8
|
1956
|
+
end
|
1957
|
+
dataspec = { 1 => 'db ', 2 => 'dw ', 4 => 'dd ', 8 => 'dq ' }[elemlen]
|
1958
|
+
if not dataspec
|
1959
|
+
dataspec = 'db '
|
1960
|
+
elemlen = 1
|
1961
|
+
end
|
1962
|
+
l << dataspec
|
1963
|
+
|
1964
|
+
# dup(?)
|
1965
|
+
if off >= edata.data.length
|
1966
|
+
dups = edata.virtsize - off
|
1967
|
+
@prog_binding.each_value { |a|
|
1968
|
+
tmp = Expression[a, :-, addr].reduce
|
1969
|
+
dups = tmp if tmp.kind_of? ::Integer and tmp > 0 and tmp < dups
|
1970
|
+
}
|
1971
|
+
@xrefs.each_key { |a|
|
1972
|
+
tmp = Expression[a, :-, addr].reduce
|
1973
|
+
dups = tmp if tmp.kind_of? ::Integer and tmp > 0 and tmp < dups
|
1974
|
+
}
|
1975
|
+
dups /= elemlen
|
1976
|
+
dups = 1 if dups < 1
|
1977
|
+
b[(l + "#{dups} dup(?)").ljust(48) << cmt]
|
1978
|
+
return off + dups*elemlen
|
1979
|
+
end
|
1980
|
+
|
1981
|
+
vals = []
|
1982
|
+
edata.ptr = off
|
1983
|
+
dups = dumplen/elemlen
|
1984
|
+
elemsym = "u#{elemlen*8}".to_sym
|
1985
|
+
while edata.ptr < edata.data.length
|
1986
|
+
if vals.length > dups and vals.last != vals.first
|
1987
|
+
# we have a dup(), unread the last element which is different
|
1988
|
+
vals.pop
|
1989
|
+
addr = Expression[addr, :-, elemlen].reduce
|
1990
|
+
edata.ptr -= elemlen
|
1991
|
+
break
|
1992
|
+
end
|
1993
|
+
break if vals.length == dups and vals.uniq.length > 1
|
1994
|
+
vals << edata.decode_imm(elemsym, @cpu.endianness)
|
1995
|
+
addr += elemlen
|
1996
|
+
if i = (1-elemlen..0).find { |i_|
|
1997
|
+
t = addr + i_
|
1998
|
+
@xrefs[t] or @decoded[t] or edata.reloc[edata.ptr+i_] or edata.inv_export[edata.ptr+i_]
|
1999
|
+
}
|
2000
|
+
# i < 0
|
2001
|
+
edata.ptr += i
|
2002
|
+
addr += i
|
2003
|
+
break
|
2004
|
+
end
|
2005
|
+
break if edata.reloc[edata.ptr-elemlen]
|
2006
|
+
end
|
2007
|
+
|
2008
|
+
# line of repeated value => dup()
|
2009
|
+
if vals.length > 8 and vals.uniq.length == 1
|
2010
|
+
b[(l << "#{vals.length} dup(#{Expression[vals.first]})").ljust(48) << cmt]
|
2011
|
+
return edata.ptr
|
2012
|
+
end
|
2013
|
+
|
2014
|
+
# recognize strings
|
2015
|
+
vals = vals.inject([]) { |vals_, value|
|
2016
|
+
if (elemlen == 1 or elemlen == 2)
|
2017
|
+
case value
|
2018
|
+
when 0x20..0x7e, 0x0a, 0x0d
|
2019
|
+
if vals_.last.kind_of? ::String; vals_.last << value ; vals_
|
2020
|
+
else vals_ << value.chr
|
2021
|
+
end
|
2022
|
+
else vals_ << value
|
2023
|
+
end
|
2024
|
+
else vals_ << value
|
2025
|
+
end
|
2026
|
+
}
|
2027
|
+
|
2028
|
+
vals.map! { |value|
|
2029
|
+
if value.kind_of? ::String
|
2030
|
+
if value.length > 2 # or value == vals.first or value == vals.last # if there is no xref, don't care
|
2031
|
+
value.inspect
|
2032
|
+
else
|
2033
|
+
value.unpack('C*').map { |c| Expression[c] }
|
2034
|
+
end
|
2035
|
+
else
|
2036
|
+
Expression[value]
|
2037
|
+
end
|
2038
|
+
}
|
2039
|
+
vals.flatten!
|
2040
|
+
|
2041
|
+
b[(l << vals.join(', ')).ljust(48) << cmt]
|
2042
|
+
|
2043
|
+
edata.ptr
|
2044
|
+
end
|
2045
|
+
|
2046
|
+
def decompiler
|
2047
|
+
parse_c '' if not c_parser
|
2048
|
+
@decompiler ||= Decompiler.new(self)
|
2049
|
+
end
|
2050
|
+
def decompiler=(dc)
|
2051
|
+
@decompiler = dc
|
2052
|
+
end
|
2053
|
+
def decompile(*addr)
|
2054
|
+
decompiler.decompile(*addr)
|
2055
|
+
end
|
2056
|
+
def decompile_func(addr)
|
2057
|
+
decompiler.decompile_func(addr)
|
2058
|
+
end
|
2059
|
+
|
2060
|
+
# allows us to be AutoExe.loaded
|
2061
|
+
def self.autoexe_load(f, &b)
|
2062
|
+
d = load(f, &b)
|
2063
|
+
d.program
|
2064
|
+
end
|
2065
|
+
end
|
2066
|
+
end
|
2067
|
+
|
2068
|
+
require 'metasm/disassemble_api'
|