metasm 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/BUGS +11 -0
- data/CREDITS +17 -0
- data/README +270 -0
- data/TODO +114 -0
- data/doc/code_organisation.txt +146 -0
- data/doc/const_missing.txt +16 -0
- data/doc/core_classes.txt +75 -0
- data/doc/feature_list.txt +53 -0
- data/doc/index.txt +59 -0
- data/doc/install_notes.txt +170 -0
- data/doc/style.css +3 -0
- data/doc/use_cases.txt +18 -0
- data/lib/metasm.rb +80 -0
- data/lib/metasm/arm.rb +12 -0
- data/lib/metasm/arm/debug.rb +39 -0
- data/lib/metasm/arm/decode.rb +167 -0
- data/lib/metasm/arm/encode.rb +77 -0
- data/lib/metasm/arm/main.rb +75 -0
- data/lib/metasm/arm/opcodes.rb +177 -0
- data/lib/metasm/arm/parse.rb +130 -0
- data/lib/metasm/arm/render.rb +55 -0
- data/lib/metasm/compile_c.rb +1457 -0
- data/lib/metasm/dalvik.rb +8 -0
- data/lib/metasm/dalvik/decode.rb +196 -0
- data/lib/metasm/dalvik/main.rb +60 -0
- data/lib/metasm/dalvik/opcodes.rb +366 -0
- data/lib/metasm/decode.rb +213 -0
- data/lib/metasm/decompile.rb +2659 -0
- data/lib/metasm/disassemble.rb +2068 -0
- data/lib/metasm/disassemble_api.rb +1280 -0
- data/lib/metasm/dynldr.rb +1329 -0
- data/lib/metasm/encode.rb +333 -0
- data/lib/metasm/exe_format/a_out.rb +194 -0
- data/lib/metasm/exe_format/autoexe.rb +82 -0
- data/lib/metasm/exe_format/bflt.rb +189 -0
- data/lib/metasm/exe_format/coff.rb +455 -0
- data/lib/metasm/exe_format/coff_decode.rb +901 -0
- data/lib/metasm/exe_format/coff_encode.rb +1078 -0
- data/lib/metasm/exe_format/dex.rb +457 -0
- data/lib/metasm/exe_format/dol.rb +145 -0
- data/lib/metasm/exe_format/elf.rb +923 -0
- data/lib/metasm/exe_format/elf_decode.rb +979 -0
- data/lib/metasm/exe_format/elf_encode.rb +1375 -0
- data/lib/metasm/exe_format/macho.rb +827 -0
- data/lib/metasm/exe_format/main.rb +228 -0
- data/lib/metasm/exe_format/mz.rb +164 -0
- data/lib/metasm/exe_format/nds.rb +172 -0
- data/lib/metasm/exe_format/pe.rb +437 -0
- data/lib/metasm/exe_format/serialstruct.rb +246 -0
- data/lib/metasm/exe_format/shellcode.rb +114 -0
- data/lib/metasm/exe_format/xcoff.rb +167 -0
- data/lib/metasm/gui.rb +23 -0
- data/lib/metasm/gui/cstruct.rb +373 -0
- data/lib/metasm/gui/dasm_coverage.rb +199 -0
- data/lib/metasm/gui/dasm_decomp.rb +369 -0
- data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
- data/lib/metasm/gui/dasm_graph.rb +1354 -0
- data/lib/metasm/gui/dasm_hex.rb +543 -0
- data/lib/metasm/gui/dasm_listing.rb +599 -0
- data/lib/metasm/gui/dasm_main.rb +906 -0
- data/lib/metasm/gui/dasm_opcodes.rb +291 -0
- data/lib/metasm/gui/debug.rb +1228 -0
- data/lib/metasm/gui/gtk.rb +884 -0
- data/lib/metasm/gui/qt.rb +495 -0
- data/lib/metasm/gui/win32.rb +3004 -0
- data/lib/metasm/gui/x11.rb +621 -0
- data/lib/metasm/ia32.rb +14 -0
- data/lib/metasm/ia32/compile_c.rb +1523 -0
- data/lib/metasm/ia32/debug.rb +193 -0
- data/lib/metasm/ia32/decode.rb +1167 -0
- data/lib/metasm/ia32/decompile.rb +564 -0
- data/lib/metasm/ia32/encode.rb +314 -0
- data/lib/metasm/ia32/main.rb +233 -0
- data/lib/metasm/ia32/opcodes.rb +872 -0
- data/lib/metasm/ia32/parse.rb +327 -0
- data/lib/metasm/ia32/render.rb +91 -0
- data/lib/metasm/main.rb +1193 -0
- data/lib/metasm/mips.rb +11 -0
- data/lib/metasm/mips/compile_c.rb +7 -0
- data/lib/metasm/mips/decode.rb +253 -0
- data/lib/metasm/mips/encode.rb +51 -0
- data/lib/metasm/mips/main.rb +72 -0
- data/lib/metasm/mips/opcodes.rb +443 -0
- data/lib/metasm/mips/parse.rb +51 -0
- data/lib/metasm/mips/render.rb +43 -0
- data/lib/metasm/os/gnu_exports.rb +270 -0
- data/lib/metasm/os/linux.rb +1112 -0
- data/lib/metasm/os/main.rb +1686 -0
- data/lib/metasm/os/remote.rb +527 -0
- data/lib/metasm/os/windows.rb +2027 -0
- data/lib/metasm/os/windows_exports.rb +745 -0
- data/lib/metasm/parse.rb +876 -0
- data/lib/metasm/parse_c.rb +3938 -0
- data/lib/metasm/pic16c/decode.rb +42 -0
- data/lib/metasm/pic16c/main.rb +17 -0
- data/lib/metasm/pic16c/opcodes.rb +68 -0
- data/lib/metasm/ppc.rb +11 -0
- data/lib/metasm/ppc/decode.rb +264 -0
- data/lib/metasm/ppc/decompile.rb +251 -0
- data/lib/metasm/ppc/encode.rb +51 -0
- data/lib/metasm/ppc/main.rb +129 -0
- data/lib/metasm/ppc/opcodes.rb +410 -0
- data/lib/metasm/ppc/parse.rb +52 -0
- data/lib/metasm/preprocessor.rb +1277 -0
- data/lib/metasm/render.rb +130 -0
- data/lib/metasm/sh4.rb +8 -0
- data/lib/metasm/sh4/decode.rb +336 -0
- data/lib/metasm/sh4/main.rb +292 -0
- data/lib/metasm/sh4/opcodes.rb +381 -0
- data/lib/metasm/x86_64.rb +12 -0
- data/lib/metasm/x86_64/compile_c.rb +1025 -0
- data/lib/metasm/x86_64/debug.rb +59 -0
- data/lib/metasm/x86_64/decode.rb +268 -0
- data/lib/metasm/x86_64/encode.rb +264 -0
- data/lib/metasm/x86_64/main.rb +135 -0
- data/lib/metasm/x86_64/opcodes.rb +118 -0
- data/lib/metasm/x86_64/parse.rb +68 -0
- data/misc/bottleneck.rb +61 -0
- data/misc/cheader-findpppath.rb +58 -0
- data/misc/hexdiff.rb +74 -0
- data/misc/hexdump.rb +55 -0
- data/misc/metasm-all.rb +13 -0
- data/misc/objdiff.rb +47 -0
- data/misc/objscan.rb +40 -0
- data/misc/pdfparse.rb +661 -0
- data/misc/ppc_pdf2oplist.rb +192 -0
- data/misc/tcp_proxy_hex.rb +84 -0
- data/misc/txt2html.rb +440 -0
- data/samples/a.out.rb +31 -0
- data/samples/asmsyntax.rb +77 -0
- data/samples/bindiff.rb +555 -0
- data/samples/compilation-steps.rb +49 -0
- data/samples/cparser_makestackoffset.rb +55 -0
- data/samples/dasm-backtrack.rb +38 -0
- data/samples/dasmnavig.rb +318 -0
- data/samples/dbg-apihook.rb +228 -0
- data/samples/dbghelp.rb +143 -0
- data/samples/disassemble-gui.rb +102 -0
- data/samples/disassemble.rb +133 -0
- data/samples/dump_upx.rb +95 -0
- data/samples/dynamic_ruby.rb +1929 -0
- data/samples/elf_list_needed.rb +46 -0
- data/samples/elf_listexports.rb +33 -0
- data/samples/elfencode.rb +25 -0
- data/samples/exeencode.rb +128 -0
- data/samples/factorize-headers-elfimports.rb +77 -0
- data/samples/factorize-headers-peimports.rb +109 -0
- data/samples/factorize-headers.rb +43 -0
- data/samples/gdbclient.rb +583 -0
- data/samples/generate_libsigs.rb +102 -0
- data/samples/hotfix_gtk_dbg.rb +59 -0
- data/samples/install_win_env.rb +78 -0
- data/samples/lindebug.rb +924 -0
- data/samples/linux_injectsyscall.rb +95 -0
- data/samples/machoencode.rb +31 -0
- data/samples/metasm-shell.rb +91 -0
- data/samples/pe-hook.rb +69 -0
- data/samples/pe-ia32-cpuid.rb +203 -0
- data/samples/pe-mips.rb +35 -0
- data/samples/pe-shutdown.rb +78 -0
- data/samples/pe-testrelocs.rb +51 -0
- data/samples/pe-testrsrc.rb +24 -0
- data/samples/pe_listexports.rb +31 -0
- data/samples/peencode.rb +19 -0
- data/samples/peldr.rb +494 -0
- data/samples/preprocess-flatten.rb +19 -0
- data/samples/r0trace.rb +308 -0
- data/samples/rubstop.rb +399 -0
- data/samples/scan_pt_gnu_stack.rb +54 -0
- data/samples/scanpeexports.rb +62 -0
- data/samples/shellcode-c.rb +40 -0
- data/samples/shellcode-dynlink.rb +146 -0
- data/samples/source.asm +34 -0
- data/samples/struct_offset.rb +47 -0
- data/samples/testpe.rb +32 -0
- data/samples/testraw.rb +45 -0
- data/samples/win32genloader.rb +132 -0
- data/samples/win32hooker-advanced.rb +169 -0
- data/samples/win32hooker.rb +96 -0
- data/samples/win32livedasm.rb +33 -0
- data/samples/win32remotescan.rb +133 -0
- data/samples/wintrace.rb +92 -0
- data/tests/all.rb +8 -0
- data/tests/dasm.rb +39 -0
- data/tests/dynldr.rb +35 -0
- data/tests/encodeddata.rb +132 -0
- data/tests/ia32.rb +82 -0
- data/tests/mips.rb +116 -0
- data/tests/parse_c.rb +239 -0
- data/tests/preprocessor.rb +269 -0
- data/tests/x86_64.rb +62 -0
- metadata +255 -0
|
@@ -0,0 +1,2068 @@
|
|
|
1
|
+
# This file is part of Metasm, the Ruby assembly manipulation suite
|
|
2
|
+
# Copyright (C) 2006-2009 Yoann GUILLOT
|
|
3
|
+
#
|
|
4
|
+
# Licence is LGPL, see LICENCE in the top-level directory
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
require 'metasm/decode'
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
module Metasm
|
|
11
|
+
# holds information for decoded instructions: the original opcode, a pointer to the InstructionBlock, etc
|
|
12
|
+
class DecodedInstruction
|
|
13
|
+
# the instance of InstructionBlock this di is into
|
|
14
|
+
attr_accessor :block
|
|
15
|
+
# our offset (in bytes) from the start of the block, used only for hexdump
|
|
16
|
+
attr_accessor :block_offset
|
|
17
|
+
# the address of the instruction's first byte in memory
|
|
18
|
+
attr_accessor :address
|
|
19
|
+
# the disassembled data
|
|
20
|
+
attr_accessor :instruction, :opcode
|
|
21
|
+
# our, length in bytes
|
|
22
|
+
attr_accessor :bin_length
|
|
23
|
+
# array of arbitrary strings
|
|
24
|
+
attr_accessor :comment
|
|
25
|
+
# a cache of the binding used by the backtracker to emulate this instruction
|
|
26
|
+
attr_accessor :backtrace_binding
|
|
27
|
+
|
|
28
|
+
# create a new DecodedInstruction with an Instruction whose cpu is the argument
|
|
29
|
+
# can take an existing Instruction as argument
|
|
30
|
+
def initialize(arg, addr=nil)
|
|
31
|
+
case arg
|
|
32
|
+
when Instruction
|
|
33
|
+
@instruction = arg
|
|
34
|
+
@opcode = @instruction.cpu.opcode_list.find { |op| op.name == @instruction.opname } if @instruction.cpu
|
|
35
|
+
else @instruction = Instruction.new(arg)
|
|
36
|
+
end
|
|
37
|
+
@bin_length = 0
|
|
38
|
+
@address = addr if addr
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def next_addr=(a) @next_addr = a end
|
|
42
|
+
def next_addr
|
|
43
|
+
(@next_addr ||= nil) || (address + @bin_length) if address
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def show
|
|
47
|
+
if block
|
|
48
|
+
bin = @block.edata.data[@block.edata_ptr+@block_offset, @bin_length].unpack('C*').map { |c| '%02x' % c }.join
|
|
49
|
+
if @bin_length > 12
|
|
50
|
+
bin = bin[0, 20] + "..<+#{@bin_length-10}>"
|
|
51
|
+
end
|
|
52
|
+
" #{@instruction.to_s.ljust(44)} ; @#{Expression[address]} #{bin} #{@comment.sort[0,6].join(' ') if comment}"
|
|
53
|
+
else
|
|
54
|
+
"#{@instruction}#{' ; ' + @comment.join(' ') if comment}"
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
include Renderable
|
|
59
|
+
def render
|
|
60
|
+
ret = []
|
|
61
|
+
ret << Expression[address] << ' ' if address
|
|
62
|
+
ret << @instruction
|
|
63
|
+
ret << ' ; ' << @comment if comment
|
|
64
|
+
ret
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def add_comment(c)
|
|
68
|
+
@comment ||= []
|
|
69
|
+
@comment |= [c]
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# returns a copy of the DecInstr, with duplicated #instruction ("deep_copy")
|
|
73
|
+
def dup
|
|
74
|
+
new = super()
|
|
75
|
+
new.instruction = @instruction.dup
|
|
76
|
+
new
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# holds information on a backtracked expression near begin and end of instruction blocks (#backtracked_for)
|
|
81
|
+
class BacktraceTrace
|
|
82
|
+
# address of the instruction in the block from which rebacktrace should start (use with from_subfuncret bool)
|
|
83
|
+
# address is nil if the backtrace is from block start
|
|
84
|
+
# exclude_instr is a bool saying if the backtrace should start at address or at the preceding instruction
|
|
85
|
+
# these are optional: if absent, expr is to be rebacktracked when a new codepath arrives at the beginning of the block
|
|
86
|
+
attr_accessor :address, :from_subfuncret, :exclude_instr
|
|
87
|
+
# address of the instruction that initiated the backtrace
|
|
88
|
+
attr_accessor :origin
|
|
89
|
+
# the Expression to backtrace at this point
|
|
90
|
+
attr_accessor :expr
|
|
91
|
+
# the original backtracked Expression
|
|
92
|
+
attr_accessor :orig_expr
|
|
93
|
+
# length of r/w xref (in bytes)
|
|
94
|
+
attr_accessor :len
|
|
95
|
+
# :r/:w/:x
|
|
96
|
+
attr_accessor :type
|
|
97
|
+
# bool: true if this maps to a :x that should not have a from when resolved
|
|
98
|
+
attr_accessor :detached
|
|
99
|
+
# maxdepth at the point of the object creation
|
|
100
|
+
attr_accessor :maxdepth
|
|
101
|
+
|
|
102
|
+
def initialize(expr, origin, orig_expr, type, len=nil, maxdepth=nil)
|
|
103
|
+
@expr, @origin, @orig_expr, @type = expr, origin, orig_expr, type
|
|
104
|
+
@len = len if len
|
|
105
|
+
@maxdepth = maxdepth if maxdepth
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def hash ; [origin, expr].hash ; end
|
|
109
|
+
def eql?(o)
|
|
110
|
+
o.class == self.class and
|
|
111
|
+
[ address, from_subfuncret, exclude_instr, origin, orig_expr, len, type, detached] ==
|
|
112
|
+
[o.address, o.from_subfuncret, o.exclude_instr, o.origin, o.orig_expr, o.len, o.type, o.detached]
|
|
113
|
+
end
|
|
114
|
+
alias == eql?
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# a cross-reference, tracks read/write/execute memory accesses by decoded instructions
|
|
118
|
+
class Xref
|
|
119
|
+
# :r/:w/:x
|
|
120
|
+
attr_accessor :type
|
|
121
|
+
# length of r/w (in bytes)
|
|
122
|
+
attr_accessor :len
|
|
123
|
+
# address of the instruction responsible of the xref
|
|
124
|
+
attr_accessor :origin
|
|
125
|
+
# XXX list of instructions intervening in the backtrace ?
|
|
126
|
+
|
|
127
|
+
def initialize(type, origin, len=nil)
|
|
128
|
+
@origin, @type = origin, type
|
|
129
|
+
@len = len if len
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def hash ; @origin.hash ; end
|
|
133
|
+
def eql?(o) o.class == self.class and [type, len, origin] == [o.type, o.len, o.origin] end
|
|
134
|
+
alias == eql?
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# holds a list of contiguous decoded instructions, forming an uninterrupted block (except for eg CPU exceptions)
|
|
138
|
+
# most attributes are either a value or an array of values, use the associated iterator.
|
|
139
|
+
class InstructionBlock
|
|
140
|
+
# address of the first instruction
|
|
141
|
+
attr_accessor :address
|
|
142
|
+
# pointer to raw data
|
|
143
|
+
attr_accessor :edata, :edata_ptr
|
|
144
|
+
# list of DecodedInstructions
|
|
145
|
+
attr_accessor :list
|
|
146
|
+
# address of instructions giving control directly to us
|
|
147
|
+
# includes addr of normal instruction when call flow continues to us past the end of the preceding block
|
|
148
|
+
# does not include addresses of subfunction return instructions
|
|
149
|
+
# may be nil or an array
|
|
150
|
+
attr_accessor :from_normal
|
|
151
|
+
# address of instructions called/jumped to
|
|
152
|
+
attr_accessor :to_normal
|
|
153
|
+
# address of an instruction that calls a subfunction which returns to us
|
|
154
|
+
attr_accessor :from_subfuncret
|
|
155
|
+
# address of instruction executed after a called subfunction returns
|
|
156
|
+
attr_accessor :to_subfuncret
|
|
157
|
+
# address of instructions executed indirectly through us (callback in a subfunction, SEH...)
|
|
158
|
+
# XXX from_indirect is not populated for now
|
|
159
|
+
attr_accessor :from_indirect, :to_indirect
|
|
160
|
+
# array of BacktraceTrace
|
|
161
|
+
# when a new code path comes to us, it should be backtracked for the values of :r/:w/:x using btt with no address
|
|
162
|
+
# for internal use only (block splitting): btt with an address
|
|
163
|
+
attr_accessor :backtracked_for
|
|
164
|
+
|
|
165
|
+
# create a new InstructionBlock based at address
|
|
166
|
+
# also accepts a DecodedInstruction or an Array of them to initialize from
|
|
167
|
+
def initialize(arg0, edata=nil, edata_ptr=nil)
|
|
168
|
+
@list = []
|
|
169
|
+
case arg0
|
|
170
|
+
when DecodedInstruction
|
|
171
|
+
@address = arg0.address
|
|
172
|
+
add_di(arg0)
|
|
173
|
+
when Array
|
|
174
|
+
@address = arg0.first.address if not arg0.empty?
|
|
175
|
+
arg0.each { |di| add_di(di) }
|
|
176
|
+
else
|
|
177
|
+
@address = arg0
|
|
178
|
+
end
|
|
179
|
+
edata_ptr ||= edata ? edata.ptr : 0
|
|
180
|
+
@edata, @edata_ptr = edata, edata_ptr
|
|
181
|
+
@backtracked_for = []
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
def bin_length
|
|
185
|
+
(di = @list.last) ? di.block_offset + di.bin_length : 0
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
# splits the current block into a new one with all di from address addr to end
|
|
189
|
+
# caller is responsible for rebacktracing new.bt_for to regenerate correct old.btt/new.btt
|
|
190
|
+
def split(addr)
|
|
191
|
+
raise "invalid split @#{Expression[addr]}" if not idx = @list.index(@list.find { |di| di.address == addr }) or idx == 0
|
|
192
|
+
off = @list[idx].block_offset
|
|
193
|
+
new_b = self.class.new(addr, @edata, @edata_ptr + off)
|
|
194
|
+
new_b.add_di @list.delete_at(idx) while @list[idx]
|
|
195
|
+
new_b.to_normal, @to_normal = to_normal, new_b.to_normal
|
|
196
|
+
new_b.to_subfuncret, @to_subfuncret = to_subfuncret, new_b.to_subfuncret
|
|
197
|
+
new_b.add_from @list.last.address
|
|
198
|
+
add_to new_b.address
|
|
199
|
+
@backtracked_for.delete_if { |btt|
|
|
200
|
+
if btt.address and new_b.list.find { |di| di.address == btt.address }
|
|
201
|
+
new_b.backtracked_for << btt
|
|
202
|
+
true
|
|
203
|
+
end
|
|
204
|
+
}
|
|
205
|
+
new_b
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
# adds a decodedinstruction to the block list, updates di.block and di.block_offset
|
|
209
|
+
def add_di(di)
|
|
210
|
+
di.block = self
|
|
211
|
+
di.block_offset = bin_length
|
|
212
|
+
di.address ||= @address + di.block_offset
|
|
213
|
+
@list << di
|
|
214
|
+
end
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
# a factorized subfunction as seen by the disassembler
|
|
218
|
+
class DecodedFunction
|
|
219
|
+
# when backtracking an instruction that calls us, use this binding and then the instruction's
|
|
220
|
+
# the binding is lazily filled up for non-external functions, register by register, when
|
|
221
|
+
# a backtraced expression depends on it
|
|
222
|
+
attr_accessor :backtrace_binding
|
|
223
|
+
# same as InstructionBlock#backtracked_for
|
|
224
|
+
# includes the expression responsible of the function return (eg [esp] on ia32)
|
|
225
|
+
attr_accessor :backtracked_for
|
|
226
|
+
# addresses of instruction causing the function to return
|
|
227
|
+
attr_accessor :return_address
|
|
228
|
+
# a lambda called for dynamic backtrace_binding generation
|
|
229
|
+
attr_accessor :btbind_callback
|
|
230
|
+
# a lambda called for dynamic backtracked_for
|
|
231
|
+
attr_accessor :btfor_callback
|
|
232
|
+
# bool, if false the function is actually being disassembled
|
|
233
|
+
attr_accessor :finalized
|
|
234
|
+
# bool, if true the function does not return (eg exit() or ExitProcess())
|
|
235
|
+
attr_accessor :noreturn
|
|
236
|
+
|
|
237
|
+
# if btbind_callback is defined, calls it with args [dasm, binding, funcaddr, calladdr, expr, origin, maxdepth]
|
|
238
|
+
# else update lazily the binding from expr.externals, and return backtrace_binding
|
|
239
|
+
def get_backtrace_binding(dasm, funcaddr, calladdr, expr, origin, maxdepth)
|
|
240
|
+
if btbind_callback
|
|
241
|
+
@btbind_callback[dasm, @backtrace_binding, funcaddr, calladdr, expr, origin, maxdepth]
|
|
242
|
+
elsif backtrace_binding and dest = @backtrace_binding[:thunk] and target = dasm.function[dest]
|
|
243
|
+
target.get_backtrace_binding(dasm, funcaddr, calladdr, expr, origin, maxdepth)
|
|
244
|
+
else
|
|
245
|
+
unk_regs = expr.externals.grep(Symbol).uniq - @backtrace_binding.keys - [:unknown]
|
|
246
|
+
dasm.cpu.backtrace_update_function_binding(dasm, funcaddr, self, return_address, *unk_regs) if not unk_regs.empty?
|
|
247
|
+
@backtrace_binding
|
|
248
|
+
end
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
# if btfor_callback is defined, calls it with args [dasm, bt_for, funcaddr, calladdr]
|
|
252
|
+
# else return backtracked_for
|
|
253
|
+
def get_backtracked_for(dasm, funcaddr, calladdr)
|
|
254
|
+
if btfor_callback
|
|
255
|
+
@btfor_callback[dasm, @backtracked_for, funcaddr, calladdr]
|
|
256
|
+
elsif backtrace_binding and dest = @backtrace_binding[:thunk] and target = dasm.function[dest]
|
|
257
|
+
target.get_backtracked_for(dasm, funcaddr, calladdr)
|
|
258
|
+
else
|
|
259
|
+
@backtracked_for
|
|
260
|
+
end
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
def initialize
|
|
264
|
+
@backtracked_for = []
|
|
265
|
+
@backtrace_binding = {}
|
|
266
|
+
end
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
class CPU
|
|
270
|
+
# return the thing to backtrace to find +value+ before the execution of this instruction
|
|
271
|
+
# eg backtrace_emu('inc eax', Expression[:eax]) => Expression[:eax + 1]
|
|
272
|
+
# (the value of :eax after 'inc eax' is the value of :eax before plus 1)
|
|
273
|
+
# may return Expression::Unknown
|
|
274
|
+
def backtrace_emu(di, value)
|
|
275
|
+
Expression[Expression[value].bind(di.backtrace_binding ||= get_backtrace_binding(di)).reduce]
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
# returns a list of Expressions/Integer to backtrace to find an execution target
|
|
279
|
+
def get_xrefs_x(dasm, di)
|
|
280
|
+
end
|
|
281
|
+
|
|
282
|
+
# returns a list of [type, address, len]
|
|
283
|
+
def get_xrefs_rw(dasm, di)
|
|
284
|
+
get_xrefs_r(dasm, di).map { |addr, len| [:r, addr, len] } + get_xrefs_w(dasm, di).map { |addr, len| [:w, addr, len] }
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
# returns a list [addr, len]
|
|
288
|
+
def get_xrefs_r(dasm, di)
|
|
289
|
+
b = di.backtrace_binding ||= get_backtrace_binding(di)
|
|
290
|
+
r = b.values
|
|
291
|
+
x = get_xrefs_x(dasm, di)
|
|
292
|
+
r |= x if x
|
|
293
|
+
(r.grep(Indirection) + r.grep(Expression).map { |e| e.expr_indirections }.flatten).map { |e| [e.target, e.len] }
|
|
294
|
+
end
|
|
295
|
+
|
|
296
|
+
# returns a list [addr, len]
|
|
297
|
+
def get_xrefs_w(dasm, di)
|
|
298
|
+
b = di.backtrace_binding ||= get_backtrace_binding(di)
|
|
299
|
+
w = b.keys
|
|
300
|
+
(w.grep(Indirection) + w.grep(Expression).map { |e| e.expr_indirections }.flatten).map { |e| [e.target, e.len] }
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
# checks if the expression corresponds to a function return value with the instruction
|
|
304
|
+
# (eg di == 'call something' and expr == [esp])
|
|
305
|
+
def backtrace_is_function_return(expr, di=nil)
|
|
306
|
+
end
|
|
307
|
+
|
|
308
|
+
# updates f.backtrace_binding when a new return address has been found
|
|
309
|
+
# TODO update also when anything changes inside the function (new loop found etc) - use backtracked_for ?
|
|
310
|
+
def backtrace_update_function_binding(dasm, faddr, f, retaddrlist, *wantregs)
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
# returns if the expression is an address on the stack
|
|
314
|
+
# (to avoid trying to backtrace its absolute address until we found function boundaries)
|
|
315
|
+
def backtrace_is_stack_address(expr)
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
# updates the instruction arguments: replace an expression with another (eg when a label is renamed)
|
|
319
|
+
def replace_instr_arg_immediate(i, old, new)
|
|
320
|
+
i.args.map! { |a|
|
|
321
|
+
case a
|
|
322
|
+
when Expression; Expression[a.bind(old => new).reduce]
|
|
323
|
+
else a
|
|
324
|
+
end
|
|
325
|
+
}
|
|
326
|
+
end
|
|
327
|
+
|
|
328
|
+
# a callback called whenever a backtrace is successful
|
|
329
|
+
# di is the decodedinstruction at the backtrace's origin
|
|
330
|
+
def backtrace_found_result(dasm, di, expr, type, len)
|
|
331
|
+
end
|
|
332
|
+
end
|
|
333
|
+
|
|
334
|
+
class ExeFormat
|
|
335
|
+
# returns a string containing asm-style section declaration
|
|
336
|
+
def dump_section_header(addr, edata)
|
|
337
|
+
"\n// section at #{Expression[addr]}"
|
|
338
|
+
end
|
|
339
|
+
|
|
340
|
+
# returns an array of expressions that may be executed by this instruction
|
|
341
|
+
def get_xrefs_x(dasm, di) @cpu.get_xrefs_x(dasm, di) end
|
|
342
|
+
|
|
343
|
+
# returns an array of [type, expression, length] that may be accessed by this instruction (type is :r/:w, len is in bytes)
|
|
344
|
+
def get_xrefs_rw(dasm, di) @cpu.get_xrefs_rw(dasm, di) end
|
|
345
|
+
end
|
|
346
|
+
|
|
347
|
+
# a disassembler class
|
|
348
|
+
# holds a copy of a program sections, a list of decoded instructions, xrefs
|
|
349
|
+
# is able to backtrace an expression from an address following the call flow (backwards)
|
|
350
|
+
class Disassembler
|
|
351
|
+
attr_accessor :program, :cpu
|
|
352
|
+
# binding (jointure of @sections.values.exports)
|
|
353
|
+
attr_accessor :prog_binding
|
|
354
|
+
# hash addr => edata
|
|
355
|
+
attr_accessor :sections
|
|
356
|
+
# hash addr => DecodedInstruction
|
|
357
|
+
attr_accessor :decoded
|
|
358
|
+
# hash addr => DecodedFunction (includes 'imported' functions)
|
|
359
|
+
attr_accessor :function
|
|
360
|
+
# hash addr => (array of) xrefs - access with +add_xref+/+each_xref+
|
|
361
|
+
attr_accessor :xrefs
|
|
362
|
+
# bool, true to check write xrefs on each instr disasm (default true)
|
|
363
|
+
attr_accessor :check_smc
|
|
364
|
+
# list of [addr to disassemble, (optional)who jumped to it, (optional)got there by a subfunction return]
|
|
365
|
+
attr_accessor :addrs_todo
|
|
366
|
+
# hash address => binding
|
|
367
|
+
attr_accessor :address_binding
|
|
368
|
+
# number of blocks to backtrace before aborting if no result is found (defaults to class.backtrace_maxblocks, 50 by default)
|
|
369
|
+
attr_accessor :backtrace_maxblocks
|
|
370
|
+
# maximum backtrace length for :r/:w, defaults to backtrace_maxblocks
|
|
371
|
+
attr_accessor :backtrace_maxblocks_data
|
|
372
|
+
# max bt length for backtrace_fast blocks, default=0
|
|
373
|
+
attr_accessor :backtrace_maxblocks_fast
|
|
374
|
+
# max complexity for an Expr during backtrace before abort
|
|
375
|
+
attr_accessor :backtrace_maxcomplexity, :backtrace_maxcomplexity_data
|
|
376
|
+
# maximum number of instructions inside a basic block, split past this limit
|
|
377
|
+
attr_accessor :disassemble_maxblocklength
|
|
378
|
+
# a cparser that parsed some C header files, prototypes are converted to DecodedFunction when jumped to
|
|
379
|
+
attr_accessor :c_parser
|
|
380
|
+
# hash address => array of strings
|
|
381
|
+
# default dasm dump will only show comments at beginning of code blocks
|
|
382
|
+
attr_accessor :comment
|
|
383
|
+
# bool, set to true (default) if functions with undetermined binding should be assumed to return with ABI-conforming binding (conserve frame ptr)
|
|
384
|
+
attr_accessor :funcs_stdabi
|
|
385
|
+
# callback called whenever an instruction will backtrace :x (before the backtrace is started)
|
|
386
|
+
# arguments: |addr of origin, array of exprs to backtrace|
|
|
387
|
+
# must return the replacement array, nil == []
|
|
388
|
+
attr_accessor :callback_newaddr
|
|
389
|
+
# called whenever an instruction is decoded and added to an instruction block. arg: the new decoded instruction
|
|
390
|
+
# returns the new di to consider (nil to end block)
|
|
391
|
+
attr_accessor :callback_newinstr
|
|
392
|
+
# called whenever the disassembler tries to disassemble an addresse that has been written to. arg: the address
|
|
393
|
+
attr_accessor :callback_selfmodifying
|
|
394
|
+
# called when the disassembler stops (stopexec/undecodable instruction)
|
|
395
|
+
attr_accessor :callback_stopaddr
|
|
396
|
+
# callback called before each backtrace that may take some time
|
|
397
|
+
attr_accessor :callback_prebacktrace
|
|
398
|
+
# callback called once all addresses have been disassembled
|
|
399
|
+
attr_accessor :callback_finished
|
|
400
|
+
# pointer to the gui widget we're displayed in
|
|
401
|
+
attr_accessor :gui
|
|
402
|
+
|
|
403
|
+
@@backtrace_maxblocks = 50
|
|
404
|
+
|
|
405
|
+
# creates a new disassembler
|
|
406
|
+
def initialize(program, cpu=program.cpu)
|
|
407
|
+
reinitialize(program, cpu)
|
|
408
|
+
end
|
|
409
|
+
|
|
410
|
+
# resets the program
|
|
411
|
+
def reinitialize(program, cpu=program.cpu)
|
|
412
|
+
@program = program
|
|
413
|
+
@cpu = cpu
|
|
414
|
+
@sections = {}
|
|
415
|
+
@decoded = {}
|
|
416
|
+
@xrefs = {}
|
|
417
|
+
@function = {}
|
|
418
|
+
@check_smc = true
|
|
419
|
+
@prog_binding = {}
|
|
420
|
+
@old_prog_binding = {} # same as prog_binding, but keep old var names
|
|
421
|
+
@addrs_todo = []
|
|
422
|
+
@addrs_done = []
|
|
423
|
+
@address_binding = {}
|
|
424
|
+
@backtrace_maxblocks = @@backtrace_maxblocks
|
|
425
|
+
@backtrace_maxblocks_fast = 0
|
|
426
|
+
@backtrace_maxcomplexity = 40
|
|
427
|
+
@backtrace_maxcomplexity_data = 5
|
|
428
|
+
@disassemble_maxblocklength = 100
|
|
429
|
+
@comment = {}
|
|
430
|
+
@funcs_stdabi = true
|
|
431
|
+
end
|
|
432
|
+
|
|
433
|
+
# adds a section, updates prog_binding
|
|
434
|
+
# base addr is an Integer or a String (label name for offset 0)
|
|
435
|
+
def add_section(encoded, base)
|
|
436
|
+
encoded, base = base, encoded if base.kind_of? EncodedData
|
|
437
|
+
case base
|
|
438
|
+
when ::Integer
|
|
439
|
+
when ::String
|
|
440
|
+
raise "invalid section base #{base.inspect} - not at section start" if encoded.export[base] and encoded.export[base] != 0
|
|
441
|
+
raise "invalid section base #{base.inspect} - already seen at #{@prog_binding[base]}" if @prog_binding[base] and @prog_binding[base] != Expression[base]
|
|
442
|
+
encoded.add_export base, 0
|
|
443
|
+
else raise "invalid section base #{base.inspect} - expected string or integer"
|
|
444
|
+
end
|
|
445
|
+
|
|
446
|
+
@sections[base] = encoded
|
|
447
|
+
@label_alias_cache = nil
|
|
448
|
+
encoded.binding(base).each { |k, v|
|
|
449
|
+
@old_prog_binding[k] = @prog_binding[k] = v.reduce
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
# update section_edata.reloc
|
|
453
|
+
# label -> list of relocs that refers to it
|
|
454
|
+
@inv_section_reloc = {}
|
|
455
|
+
@sections.each { |b, e|
|
|
456
|
+
e.reloc.each { |o, r|
|
|
457
|
+
r.target.externals.grep(::String).each { |ext| (@inv_section_reloc[ext] ||= []) << [b, e, o, r] }
|
|
458
|
+
}
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
self
|
|
462
|
+
end
|
|
463
|
+
|
|
464
|
+
def add_xref(addr, x)
|
|
465
|
+
case @xrefs[addr]
|
|
466
|
+
when nil; @xrefs[addr] = x
|
|
467
|
+
when x
|
|
468
|
+
when ::Array; @xrefs[addr] |= [x]
|
|
469
|
+
else @xrefs[addr] = [@xrefs[addr], x]
|
|
470
|
+
end
|
|
471
|
+
end
|
|
472
|
+
|
|
473
|
+
# yields each xref to a given address, optionnaly restricted to a type
|
|
474
|
+
def each_xref(addr, type=nil)
|
|
475
|
+
addr = normalize addr
|
|
476
|
+
|
|
477
|
+
x = @xrefs[addr]
|
|
478
|
+
x = case x
|
|
479
|
+
when nil; []
|
|
480
|
+
when ::Array; x.dup
|
|
481
|
+
else [x]
|
|
482
|
+
end
|
|
483
|
+
|
|
484
|
+
x.delete_if { |x_| x_.type != type } if type
|
|
485
|
+
|
|
486
|
+
# add pseudo-xrefs for exe relocs
|
|
487
|
+
if (not type or type == :reloc) and l = get_label_at(addr) and a = @inv_section_reloc[l]
|
|
488
|
+
a.each { |b, e, o, r|
|
|
489
|
+
addr = Expression[b]+o
|
|
490
|
+
# ignore relocs embedded in an already-listed instr
|
|
491
|
+
x << Xref.new(:reloc, addr) if not x.find { |x_|
|
|
492
|
+
next if not x_.origin or not di_at(x_.origin)
|
|
493
|
+
(addr - x_.origin rescue 50) < @decoded[x_.origin].bin_length
|
|
494
|
+
}
|
|
495
|
+
}
|
|
496
|
+
end
|
|
497
|
+
|
|
498
|
+
x.each { |x_| yield x_ }
|
|
499
|
+
end
|
|
500
|
+
|
|
501
|
+
# parses a C header file, from which function prototypes will be converted to DecodedFunction when found in the code flow
|
|
502
|
+
def parse_c_file(file)
|
|
503
|
+
parse_c File.read(file), file
|
|
504
|
+
end
|
|
505
|
+
|
|
506
|
+
# parses a C string for function prototypes
|
|
507
|
+
def parse_c(str, filename=nil, lineno=1)
|
|
508
|
+
@c_parser ||= @cpu.new_cparser
|
|
509
|
+
@c_parser.lexer.define_weak('__METASM__DECODE__')
|
|
510
|
+
@c_parser.parse(str, filename, lineno)
|
|
511
|
+
end
|
|
512
|
+
|
|
513
|
+
# returns the canonical form of addr (absolute address integer or label of start of section + section offset)
|
|
514
|
+
def normalize(addr)
|
|
515
|
+
return addr if not addr or addr == :default
|
|
516
|
+
addr = Expression[addr].bind(@old_prog_binding).reduce if not addr.kind_of? Integer
|
|
517
|
+
addr %= 1 << [@cpu.size, 32].max if @cpu and addr.kind_of? Integer
|
|
518
|
+
addr
|
|
519
|
+
end
|
|
520
|
+
|
|
521
|
+
# returns [edata, edata_base] or nil
|
|
522
|
+
# edata.ptr points to addr
|
|
523
|
+
def get_section_at(addr, memcheck=true)
|
|
524
|
+
case addr = normalize(addr)
|
|
525
|
+
when ::Integer
|
|
526
|
+
if s = @sections.find { |b, e| b.kind_of? ::Integer and addr >= b and addr < b + e.length } ||
|
|
527
|
+
@sections.find { |b, e| b.kind_of? ::Integer and addr == b + e.length } # end label
|
|
528
|
+
s[1].ptr = addr - s[0]
|
|
529
|
+
return if memcheck and s[1].data.respond_to?(:page_invalid?) and s[1].data.page_invalid?(s[1].ptr)
|
|
530
|
+
[s[1], s[0]]
|
|
531
|
+
end
|
|
532
|
+
when Expression
|
|
533
|
+
if addr.op == :+ and addr.rexpr.kind_of? ::Integer and addr.rexpr >= 0 and addr.lexpr.kind_of? ::String and e = @sections[addr.lexpr]
|
|
534
|
+
e.ptr = addr.rexpr
|
|
535
|
+
return if memcheck and e.data.respond_to?(:page_invalid?) and e.data.page_invalid?(e.ptr)
|
|
536
|
+
[e, Expression[addr.lexpr]]
|
|
537
|
+
elsif addr.op == :+ and addr.rexpr.kind_of? ::String and not addr.lexpr and e = @sections[addr.rexpr]
|
|
538
|
+
e.ptr = 0
|
|
539
|
+
return if memcheck and e.data.respond_to?(:page_invalid?) and e.data.page_invalid?(e.ptr)
|
|
540
|
+
[e, addr.rexpr]
|
|
541
|
+
end
|
|
542
|
+
end
|
|
543
|
+
end
|
|
544
|
+
|
|
545
|
+
# returns the label at the specified address, creates it if needed using "prefix_addr"
|
|
546
|
+
# renames the existing label if it is in the form rewritepfx_addr
|
|
547
|
+
# returns nil if the address is not known and is not a string
|
|
548
|
+
def auto_label_at(addr, base='xref', *rewritepfx)
|
|
549
|
+
addr = Expression[addr].reduce
|
|
550
|
+
addrstr = "#{base}_#{Expression[addr]}"
|
|
551
|
+
return if addrstr !~ /^\w+$/
|
|
552
|
+
e, b = get_section_at(addr)
|
|
553
|
+
if not e
|
|
554
|
+
l = Expression[addr].reduce_rec if Expression[addr].reduce_rec.kind_of? ::String
|
|
555
|
+
l ||= addrstr if addr.kind_of? Expression and addr.externals.grep(::Symbol).empty?
|
|
556
|
+
elsif not l = e.inv_export[e.ptr]
|
|
557
|
+
l = @program.new_label(addrstr)
|
|
558
|
+
e.add_export l, e.ptr
|
|
559
|
+
@label_alias_cache = nil
|
|
560
|
+
@old_prog_binding[l] = @prog_binding[l] = b + e.ptr
|
|
561
|
+
elsif rewritepfx.find { |p| base != p and addrstr.sub(base, p) == l }
|
|
562
|
+
newl = addrstr
|
|
563
|
+
newl = @program.new_label(newl) unless @old_prog_binding[newl] and @old_prog_binding[newl] == @prog_binding[l] # avoid _uuid when a -> b -> a
|
|
564
|
+
rename_label l, newl
|
|
565
|
+
l = newl
|
|
566
|
+
end
|
|
567
|
+
l
|
|
568
|
+
end
|
|
569
|
+
|
|
570
|
+
# returns a hash associating addr => list of labels at this addr
|
|
571
|
+
def label_alias
|
|
572
|
+
if not @label_alias_cache
|
|
573
|
+
@label_alias_cache = {}
|
|
574
|
+
@prog_binding.each { |k, v|
|
|
575
|
+
(@label_alias_cache[v] ||= []) << k
|
|
576
|
+
}
|
|
577
|
+
end
|
|
578
|
+
@label_alias_cache
|
|
579
|
+
end
|
|
580
|
+
|
|
581
|
+
# decodes instructions from an entrypoint, (tries to) follows code flow
|
|
582
|
+
def disassemble(*entrypoints)
|
|
583
|
+
nil while disassemble_mainiter(entrypoints)
|
|
584
|
+
self
|
|
585
|
+
end
|
|
586
|
+
|
|
587
|
+
attr_accessor :entrypoints
|
|
588
|
+
|
|
589
|
+
# do one operation relevant to disassembling
|
|
590
|
+
# returns nil once done
|
|
591
|
+
def disassemble_mainiter(entrypoints=[])
|
|
592
|
+
@entrypoints ||= []
|
|
593
|
+
if @addrs_todo.empty? and entrypoints.empty?
|
|
594
|
+
post_disassemble
|
|
595
|
+
puts 'disassembly finished' if $VERBOSE
|
|
596
|
+
@callback_finished[] if callback_finished
|
|
597
|
+
return false
|
|
598
|
+
elsif @addrs_todo.empty?
|
|
599
|
+
ep = entrypoints.shift
|
|
600
|
+
l = auto_label_at(normalize(ep), 'entrypoint')
|
|
601
|
+
puts "start disassemble from #{l} (#{entrypoints.length})" if $VERBOSE and not entrypoints.empty?
|
|
602
|
+
@entrypoints << l
|
|
603
|
+
@addrs_todo << [ep]
|
|
604
|
+
else
|
|
605
|
+
disassemble_step
|
|
606
|
+
end
|
|
607
|
+
true
|
|
608
|
+
end
|
|
609
|
+
|
|
610
|
+
def post_disassemble
|
|
611
|
+
@decoded.each_value { |di|
|
|
612
|
+
next if not di.kind_of? DecodedInstruction
|
|
613
|
+
next if not di.opcode or not di.opcode.props[:saveip]
|
|
614
|
+
if not di.block.to_subfuncret
|
|
615
|
+
di.add_comment 'noreturn'
|
|
616
|
+
# there is no need to re-loop on all :saveip as check_noret is transitive
|
|
617
|
+
di.block.each_to_normal { |fa| check_noreturn_function(fa) }
|
|
618
|
+
end
|
|
619
|
+
}
|
|
620
|
+
@function.each { |addr, f|
|
|
621
|
+
next if not @decoded[addr]
|
|
622
|
+
if not f.finalized
|
|
623
|
+
f.finalized = true
|
|
624
|
+
puts " finalize subfunc #{Expression[addr]}" if debug_backtrace
|
|
625
|
+
@cpu.backtrace_update_function_binding(self, addr, f, f.return_address)
|
|
626
|
+
if not f.return_address
|
|
627
|
+
detect_function_thunk(addr)
|
|
628
|
+
end
|
|
629
|
+
end
|
|
630
|
+
@comment[addr] ||= []
|
|
631
|
+
bd = f.backtrace_binding.reject { |k, v| Expression[k] == Expression[v] or Expression[v] == Expression::Unknown }
|
|
632
|
+
unk = f.backtrace_binding.map { |k, v| k if v == Expression::Unknown }.compact
|
|
633
|
+
bd[unk.map { |u| Expression[u].to_s }.sort.join(',')] = Expression::Unknown if not unk.empty?
|
|
634
|
+
@comment[addr] |= ["function binding: " + bd.map { |k, v| "#{k} -> #{v}" }.sort.join(', ')]
|
|
635
|
+
@comment[addr] |= ["function ends at " + f.return_address.map { |ra| Expression[ra] }.join(', ')] if f.return_address
|
|
636
|
+
}
|
|
637
|
+
end
|
|
638
|
+
|
|
639
|
+
# disassembles one block from addrs_todo
|
|
640
|
+
# adds next addresses to handle to addrs_todo
|
|
641
|
+
# if @function[:default] exists, jumps to unknows locations are interpreted as to @function[:default]
|
|
642
|
+
def disassemble_step
|
|
643
|
+
return if not todo = @addrs_todo.pop or @addrs_done.include? todo
|
|
644
|
+
@addrs_done << todo if todo[1]
|
|
645
|
+
|
|
646
|
+
# from_sfret is true if from is the address of a function call that returns to addr
|
|
647
|
+
addr, from, from_subfuncret = todo
|
|
648
|
+
|
|
649
|
+
return if from == Expression::Unknown
|
|
650
|
+
|
|
651
|
+
puts "disassemble_step #{Expression[addr]} #{Expression[from] if from} #{from_subfuncret} (/#{@addrs_todo.length})" if $DEBUG
|
|
652
|
+
|
|
653
|
+
addr = normalize(addr)
|
|
654
|
+
|
|
655
|
+
if from and from_subfuncret and di_at(from)
|
|
656
|
+
@decoded[from].block.each_to_normal { |subfunc|
|
|
657
|
+
subfunc = normalize(subfunc)
|
|
658
|
+
next if not f = @function[subfunc] or f.finalized
|
|
659
|
+
f.finalized = true
|
|
660
|
+
puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
|
|
661
|
+
@cpu.backtrace_update_function_binding(self, subfunc, f, f.return_address)
|
|
662
|
+
if not f.return_address
|
|
663
|
+
detect_function_thunk(subfunc)
|
|
664
|
+
end
|
|
665
|
+
}
|
|
666
|
+
end
|
|
667
|
+
|
|
668
|
+
if di = @decoded[addr]
|
|
669
|
+
if di.kind_of? DecodedInstruction
|
|
670
|
+
split_block(di.block, di.address) if not di.block_head? # this updates di.block
|
|
671
|
+
di.block.add_from(from, from_subfuncret ? :subfuncret : :normal) if from and from != :default
|
|
672
|
+
bf = di.block
|
|
673
|
+
elsif di == true
|
|
674
|
+
bf = @function[addr]
|
|
675
|
+
end
|
|
676
|
+
elsif bf = @function[addr]
|
|
677
|
+
detect_function_thunk_noreturn(from) if bf.noreturn
|
|
678
|
+
elsif s = get_section_at(addr)
|
|
679
|
+
block = InstructionBlock.new(normalize(addr), s[0])
|
|
680
|
+
block.add_from(from, from_subfuncret ? :subfuncret : :normal) if from and from != :default
|
|
681
|
+
disassemble_block(block)
|
|
682
|
+
elsif from and c_parser and name = Expression[addr].reduce_rec and name.kind_of? ::String and
|
|
683
|
+
s = c_parser.toplevel.symbol[name] and s.type.untypedef.kind_of? C::Function
|
|
684
|
+
bf = @function[addr] = @cpu.decode_c_function_prototype(@c_parser, s)
|
|
685
|
+
detect_function_thunk_noreturn(from) if bf.noreturn
|
|
686
|
+
elsif from
|
|
687
|
+
if bf = @function[:default]
|
|
688
|
+
puts "using default function for #{Expression[addr]} from #{Expression[from]}" if $DEBUG
|
|
689
|
+
if name = Expression[addr].reduce_rec and name.kind_of? ::String
|
|
690
|
+
@function[addr] = @function[:default].dup
|
|
691
|
+
else
|
|
692
|
+
addr = :default
|
|
693
|
+
end
|
|
694
|
+
if @decoded[from]
|
|
695
|
+
@decoded[from].block.add_to addr
|
|
696
|
+
end
|
|
697
|
+
else
|
|
698
|
+
puts "not disassembling unknown address #{Expression[addr]} from #{Expression[from]}" if $DEBUG
|
|
699
|
+
end
|
|
700
|
+
if from != :default
|
|
701
|
+
add_xref(addr, Xref.new(:x, from))
|
|
702
|
+
add_xref(Expression::Unknown, Xref.new(:x, from))
|
|
703
|
+
end
|
|
704
|
+
else
|
|
705
|
+
puts "not disassembling unknown address #{Expression[addr]}" if $VERBOSE
|
|
706
|
+
end
|
|
707
|
+
|
|
708
|
+
if bf and from and from != :default
|
|
709
|
+
if bf.kind_of? DecodedFunction
|
|
710
|
+
bff = bf.get_backtracked_for(self, addr, from)
|
|
711
|
+
else
|
|
712
|
+
bff = bf.backtracked_for
|
|
713
|
+
end
|
|
714
|
+
end
|
|
715
|
+
bff.each { |btt|
|
|
716
|
+
next if btt.address
|
|
717
|
+
if @decoded[from].kind_of? DecodedInstruction and @decoded[from].opcode.props[:saveip] and not from_subfuncret and not @function[addr]
|
|
718
|
+
backtrace_check_found(btt.expr, @decoded[addr], btt.origin, btt.type, btt.len, btt.maxdepth, btt.detached)
|
|
719
|
+
end
|
|
720
|
+
next if backtrace_check_funcret(btt, addr, from)
|
|
721
|
+
backtrace(btt.expr, from,
|
|
722
|
+
:include_start => true, :from_subfuncret => from_subfuncret,
|
|
723
|
+
:origin => btt.origin, :orig_expr => btt.orig_expr, :type => btt.type,
|
|
724
|
+
:len => btt.len, :detached => btt.detached, :maxdepth => btt.maxdepth)
|
|
725
|
+
} if bff
|
|
726
|
+
end
|
|
727
|
+
|
|
728
|
+
# splits an InstructionBlock, updates the blocks backtracked_for
|
|
729
|
+
def split_block(block, address=nil)
|
|
730
|
+
if not address # invoked as split_block(0x401012)
|
|
731
|
+
return if not @decoded[block].kind_of? DecodedInstruction
|
|
732
|
+
block, address = @decoded[block].block, block
|
|
733
|
+
end
|
|
734
|
+
return block if address == block.address
|
|
735
|
+
new_b = block.split address
|
|
736
|
+
new_b.backtracked_for.dup.each { |btt|
|
|
737
|
+
backtrace(btt.expr, btt.address,
|
|
738
|
+
:only_upto => block.list.last.address,
|
|
739
|
+
:include_start => !btt.exclude_instr, :from_subfuncret => btt.from_subfuncret,
|
|
740
|
+
:origin => btt.origin, :orig_expr => btt.orig_expr, :type => btt.type, :len => btt.len,
|
|
741
|
+
:detached => btt.detached, :maxdepth => btt.maxdepth)
|
|
742
|
+
}
|
|
743
|
+
new_b
|
|
744
|
+
end
|
|
745
|
+
|
|
746
|
+
# disassembles a new instruction block at block.address (must be normalized)
|
|
747
|
+
def disassemble_block(block)
|
|
748
|
+
raise if not block.list.empty?
|
|
749
|
+
di_addr = block.address
|
|
750
|
+
delay_slot = nil
|
|
751
|
+
di = nil
|
|
752
|
+
|
|
753
|
+
# try not to run for too long
|
|
754
|
+
# loop usage: break if the block continues to the following instruction, else return
|
|
755
|
+
@disassemble_maxblocklength.times {
|
|
756
|
+
# check collision into a known block
|
|
757
|
+
break if @decoded[di_addr]
|
|
758
|
+
|
|
759
|
+
# check self-modifying code
|
|
760
|
+
if @check_smc
|
|
761
|
+
#(-7...di.bin_length).each { |off| # uncomment to check for unaligned rewrites
|
|
762
|
+
waddr = di_addr #di_addr + off
|
|
763
|
+
each_xref(waddr, :w) { |x|
|
|
764
|
+
#next if off + x.len < 0
|
|
765
|
+
puts "W: disasm: self-modifying code at #{Expression[waddr]}" if $VERBOSE
|
|
766
|
+
@comment[di_addr] ||= []
|
|
767
|
+
@comment[di_addr] |= ["overwritten by #{@decoded[x.origin]}"]
|
|
768
|
+
@callback_selfmodifying[di_addr] if callback_selfmodifying
|
|
769
|
+
return
|
|
770
|
+
}
|
|
771
|
+
#}
|
|
772
|
+
end
|
|
773
|
+
|
|
774
|
+
# decode instruction
|
|
775
|
+
block.edata.ptr = di_addr - block.address + block.edata_ptr
|
|
776
|
+
if not di = @cpu.decode_instruction(block.edata, di_addr)
|
|
777
|
+
ed = block.edata
|
|
778
|
+
puts "#{ed.ptr >= ed.length ? "end of section reached" : "unknown instruction #{ed.data[di_addr-block.address+block.edata_ptr, 4].to_s.unpack('H*')}"} at #{Expression[di_addr]}" if $VERBOSE
|
|
779
|
+
return
|
|
780
|
+
end
|
|
781
|
+
|
|
782
|
+
@decoded[di_addr] = di
|
|
783
|
+
block.add_di di
|
|
784
|
+
puts di if $DEBUG
|
|
785
|
+
|
|
786
|
+
di = @callback_newinstr[di] if callback_newinstr
|
|
787
|
+
return if not di
|
|
788
|
+
block = di.block
|
|
789
|
+
|
|
790
|
+
di_addr = di.next_addr
|
|
791
|
+
|
|
792
|
+
backtrace_xrefs_di_rw(di)
|
|
793
|
+
|
|
794
|
+
if not di_addr or di.opcode.props[:stopexec] or not @program.get_xrefs_x(self, di).empty?
|
|
795
|
+
# do not backtrace until delay slot is finished (eg MIPS: di is a
|
|
796
|
+
# ret and the delay slot holds stack fixup needed to calc func_binding)
|
|
797
|
+
# XXX if the delay slot is also xref_x or :stopexec it is ignored
|
|
798
|
+
delay_slot ||= [di, @cpu.delay_slot(di)]
|
|
799
|
+
end
|
|
800
|
+
|
|
801
|
+
if delay_slot
|
|
802
|
+
di, delay = delay_slot
|
|
803
|
+
if delay == 0 or not di_addr
|
|
804
|
+
backtrace_xrefs_di_x(di)
|
|
805
|
+
if di.opcode.props[:stopexec] or not di_addr; return
|
|
806
|
+
else break
|
|
807
|
+
end
|
|
808
|
+
end
|
|
809
|
+
delay_slot[1] = delay - 1
|
|
810
|
+
end
|
|
811
|
+
}
|
|
812
|
+
|
|
813
|
+
ar = [di_addr]
|
|
814
|
+
ar = @callback_newaddr[block.list.last.address, ar] || ar if callback_newaddr
|
|
815
|
+
ar.each { |di_addr_| backtrace(di_addr_, di.address, :origin => di.address, :type => :x) }
|
|
816
|
+
|
|
817
|
+
block
|
|
818
|
+
end
|
|
819
|
+
|
|
820
|
+
# retrieve the list of execution crossrefs due to the decodedinstruction
|
|
821
|
+
# returns a list of symbolic expressions
|
|
822
|
+
def get_xrefs_x(di)
|
|
823
|
+
@program.get_xrefs_x(self, di)
|
|
824
|
+
end
|
|
825
|
+
|
|
826
|
+
# retrieve the list of data r/w crossrefs due to the decodedinstruction
|
|
827
|
+
# returns a list of [type, symbolic expression, length]
|
|
828
|
+
def get_xrefs_rw(di)
|
|
829
|
+
@program.get_xrefs_rw(self, di)
|
|
830
|
+
end
|
|
831
|
+
|
|
832
|
+
# disassembles_fast from a list of entrypoints, also dasm subfunctions
|
|
833
|
+
def disassemble_fast_deep(*entrypoints)
|
|
834
|
+
@entrypoints ||= []
|
|
835
|
+
@entrypoints |= entrypoints
|
|
836
|
+
|
|
837
|
+
entrypoints.each { |ep| do_disassemble_fast_deep(normalize(ep)) }
|
|
838
|
+
end
|
|
839
|
+
|
|
840
|
+
def do_disassemble_fast_deep(ep)
|
|
841
|
+
disassemble_fast(ep) { |fa, di|
|
|
842
|
+
fa = normalize(fa)
|
|
843
|
+
do_disassemble_fast_deep(fa)
|
|
844
|
+
if di and ndi = di_at(fa)
|
|
845
|
+
ndi.block.add_from_normal(di.address)
|
|
846
|
+
end
|
|
847
|
+
}
|
|
848
|
+
end
|
|
849
|
+
|
|
850
|
+
# disassembles fast from a list of entrypoints
|
|
851
|
+
# see disassemble_fast_step
|
|
852
|
+
def disassemble_fast(entrypoint, maxdepth=-1, &b)
|
|
853
|
+
ep = [entrypoint]
|
|
854
|
+
until ep.empty?
|
|
855
|
+
disassemble_fast_step(ep, &b)
|
|
856
|
+
maxdepth -= 1
|
|
857
|
+
ep.delete_if { |a| not @decoded[normalize(a[0])] } if maxdepth == 0
|
|
858
|
+
end
|
|
859
|
+
check_noreturn_function(entrypoint)
|
|
860
|
+
end
|
|
861
|
+
|
|
862
|
+
# disassembles one block from the ary, see disassemble_fast_block
|
|
863
|
+
def disassemble_fast_step(todo, &b)
|
|
864
|
+
return if not x = todo.pop
|
|
865
|
+
addr, from, from_subfuncret = x
|
|
866
|
+
|
|
867
|
+
addr = normalize(addr)
|
|
868
|
+
|
|
869
|
+
if di = @decoded[addr]
|
|
870
|
+
if di.kind_of? DecodedInstruction
|
|
871
|
+
split_block(di.block, di.address) if not di.block_head?
|
|
872
|
+
di.block.add_from(from, from_subfuncret ? :subfuncret : :normal) if from and from != :default
|
|
873
|
+
end
|
|
874
|
+
elsif s = get_section_at(addr)
|
|
875
|
+
block = InstructionBlock.new(normalize(addr), s[0])
|
|
876
|
+
block.add_from(from, from_subfuncret ? :subfuncret : :normal) if from and from != :default
|
|
877
|
+
todo.concat disassemble_fast_block(block, &b)
|
|
878
|
+
elsif name = Expression[addr].reduce_rec and name.kind_of? ::String and not @function[addr]
|
|
879
|
+
if c_parser and s = c_parser.toplevel.symbol[name] and s.type.untypedef.kind_of? C::Function
|
|
880
|
+
@function[addr] = @cpu.decode_c_function_prototype(@c_parser, s)
|
|
881
|
+
detect_function_thunk_noreturn(from) if @function[addr].noreturn
|
|
882
|
+
elsif @function[:default]
|
|
883
|
+
@function[addr] = @function[:default].dup
|
|
884
|
+
end
|
|
885
|
+
end
|
|
886
|
+
|
|
887
|
+
disassemble_fast_checkfunc(addr)
|
|
888
|
+
end
|
|
889
|
+
|
|
890
|
+
# check if an addr has an xref :x from a :saveip, if so mark as Function
|
|
891
|
+
def disassemble_fast_checkfunc(addr)
|
|
892
|
+
if @decoded[addr].kind_of? DecodedInstruction and not @function[addr]
|
|
893
|
+
func = false
|
|
894
|
+
each_xref(addr, :x) { |x_|
|
|
895
|
+
func = true if odi = di_at(x_.origin) and odi.opcode.props[:saveip]
|
|
896
|
+
}
|
|
897
|
+
if func
|
|
898
|
+
auto_label_at(addr, 'sub', 'loc', 'xref')
|
|
899
|
+
# XXX use default_btbind_callback ?
|
|
900
|
+
@function[addr] = DecodedFunction.new
|
|
901
|
+
@function[addr].finalized = true
|
|
902
|
+
detect_function_thunk(addr)
|
|
903
|
+
puts "found new function #{get_label_at(addr)} at #{Expression[addr]}" if $VERBOSE
|
|
904
|
+
end
|
|
905
|
+
end
|
|
906
|
+
end
|
|
907
|
+
|
|
908
|
+
# disassembles fast a new instruction block at block.address (must be normalized)
|
|
909
|
+
# does not recurse into subfunctions
|
|
910
|
+
# assumes all :saveip returns, except those pointing to a subfunc with noreturn
|
|
911
|
+
# yields subfunction addresses (targets of :saveip)
|
|
912
|
+
# only backtrace for :x with maxdepth 1 (ie handles only basic push+ret)
|
|
913
|
+
# returns a todo-style ary
|
|
914
|
+
# assumes @addrs_todo is empty
|
|
915
|
+
def disassemble_fast_block(block, &b)
|
|
916
|
+
block = InstructionBlock.new(normalize(block), get_section_at(block)[0]) if not block.kind_of? InstructionBlock
|
|
917
|
+
di_addr = block.address
|
|
918
|
+
delay_slot = nil
|
|
919
|
+
di = nil
|
|
920
|
+
ret = []
|
|
921
|
+
|
|
922
|
+
return ret if @decoded[di_addr]
|
|
923
|
+
|
|
924
|
+
@disassemble_maxblocklength.times {
|
|
925
|
+
break if @decoded[di_addr]
|
|
926
|
+
|
|
927
|
+
# decode instruction
|
|
928
|
+
block.edata.ptr = di_addr - block.address + block.edata_ptr
|
|
929
|
+
if not di = @cpu.decode_instruction(block.edata, di_addr)
|
|
930
|
+
return ret
|
|
931
|
+
end
|
|
932
|
+
|
|
933
|
+
@decoded[di_addr] = di
|
|
934
|
+
block.add_di di
|
|
935
|
+
puts di if $DEBUG
|
|
936
|
+
|
|
937
|
+
di = @callback_newinstr[di] if callback_newinstr
|
|
938
|
+
return ret if not di
|
|
939
|
+
|
|
940
|
+
di_addr = di.next_addr
|
|
941
|
+
|
|
942
|
+
if di.opcode.props[:stopexec] or di.opcode.props[:setip]
|
|
943
|
+
if di.opcode.props[:setip]
|
|
944
|
+
@addrs_todo = []
|
|
945
|
+
@program.get_xrefs_x(self, di).each { |expr|
|
|
946
|
+
backtrace(expr, di.address, :origin => di.address, :type => :x, :maxdepth => @backtrace_maxblocks_fast)
|
|
947
|
+
}
|
|
948
|
+
end
|
|
949
|
+
if di.opcode.props[:saveip]
|
|
950
|
+
@addrs_todo = []
|
|
951
|
+
ret.concat disassemble_fast_block_subfunc(di, &b)
|
|
952
|
+
else
|
|
953
|
+
ret.concat @addrs_todo
|
|
954
|
+
@addrs_todo = []
|
|
955
|
+
end
|
|
956
|
+
delay_slot ||= [di, @cpu.delay_slot(di)]
|
|
957
|
+
end
|
|
958
|
+
|
|
959
|
+
if delay_slot
|
|
960
|
+
if delay_slot[1] <= 0
|
|
961
|
+
return ret if delay_slot[0].opcode.props[:stopexec]
|
|
962
|
+
break
|
|
963
|
+
end
|
|
964
|
+
delay_slot[1] -= 1
|
|
965
|
+
end
|
|
966
|
+
}
|
|
967
|
+
|
|
968
|
+
di.block.add_to_normal(di_addr)
|
|
969
|
+
ret << [di_addr, di.address]
|
|
970
|
+
end
|
|
971
|
+
|
|
972
|
+
# handles when disassemble_fast encounters a call to a subfunction
|
|
973
|
+
def disassemble_fast_block_subfunc(di)
|
|
974
|
+
funcs = di.block.to_normal.to_a
|
|
975
|
+
do_ret = funcs.empty?
|
|
976
|
+
ret = []
|
|
977
|
+
na = di.next_addr + di.bin_length * @cpu.delay_slot(di)
|
|
978
|
+
funcs.each { |fa|
|
|
979
|
+
fa = normalize(fa)
|
|
980
|
+
disassemble_fast_checkfunc(fa)
|
|
981
|
+
yield fa, di if block_given?
|
|
982
|
+
if f = @function[fa] and bf = f.get_backtracked_for(self, fa, di.address) and not bf.empty?
|
|
983
|
+
# this includes retaddr unless f is noreturn
|
|
984
|
+
bf.each { |btt|
|
|
985
|
+
next if btt.type != :x
|
|
986
|
+
bt = backtrace(btt.expr, di.address, :include_start => true, :origin => btt.origin, :maxdepth => [@backtrace_maxblocks_fast, 1].max)
|
|
987
|
+
if btt.detached
|
|
988
|
+
ret.concat bt # callback argument
|
|
989
|
+
elsif bt.find { |a| normalize(a) == na }
|
|
990
|
+
do_ret = true
|
|
991
|
+
end
|
|
992
|
+
}
|
|
993
|
+
elsif not f or not f.noreturn
|
|
994
|
+
do_ret = true
|
|
995
|
+
end
|
|
996
|
+
}
|
|
997
|
+
if do_ret
|
|
998
|
+
di.block.add_to_subfuncret(na)
|
|
999
|
+
ret << [na, di.address, true]
|
|
1000
|
+
di.block.add_to_normal :default if not di.block.to_normal and @function[:default]
|
|
1001
|
+
end
|
|
1002
|
+
ret
|
|
1003
|
+
end
|
|
1004
|
+
|
|
1005
|
+
# trace whose xrefs this di is responsible of
|
|
1006
|
+
def backtrace_xrefs_di_rw(di)
|
|
1007
|
+
get_xrefs_rw(di).each { |type, ptr, len|
|
|
1008
|
+
backtrace(ptr, di.address, :origin => di.address, :type => type, :len => len).each { |xaddr|
|
|
1009
|
+
next if xaddr == Expression::Unknown
|
|
1010
|
+
if @check_smc and type == :w
|
|
1011
|
+
#len.times { |off| # check unaligned ?
|
|
1012
|
+
waddr = xaddr #+ off
|
|
1013
|
+
if wdi = di_at(waddr)
|
|
1014
|
+
puts "W: disasm: #{di} overwrites #{wdi}" if $VERBOSE
|
|
1015
|
+
wdi.add_comment "overwritten by #{di}"
|
|
1016
|
+
end
|
|
1017
|
+
#}
|
|
1018
|
+
end
|
|
1019
|
+
}
|
|
1020
|
+
}
|
|
1021
|
+
end
|
|
1022
|
+
|
|
1023
|
+
# trace xrefs for execution
|
|
1024
|
+
def backtrace_xrefs_di_x(di)
|
|
1025
|
+
ar = @program.get_xrefs_x(self, di)
|
|
1026
|
+
ar = @callback_newaddr[di.address, ar] || ar if callback_newaddr
|
|
1027
|
+
ar.each { |expr| backtrace(expr, di.address, :origin => di.address, :type => :x) }
|
|
1028
|
+
end
|
|
1029
|
+
|
|
1030
|
+
# checks if the function starting at funcaddr is an external function thunk (eg jmp [SomeExtFunc])
|
|
1031
|
+
# the argument must be the address of a decodedinstruction that is the first of a function,
|
|
1032
|
+
# which must not have return_addresses
|
|
1033
|
+
# returns the new thunk name if it was changed
|
|
1034
|
+
def detect_function_thunk(funcaddr)
|
|
1035
|
+
# check thunk linearity (no conditionnal branch etc)
|
|
1036
|
+
addr = funcaddr
|
|
1037
|
+
count = 0
|
|
1038
|
+
while b = block_at(addr)
|
|
1039
|
+
count += 1
|
|
1040
|
+
return if count > 5 or b.list.length > 4
|
|
1041
|
+
if b.to_subfuncret and not b.to_subfuncret.empty?
|
|
1042
|
+
return if b.to_subfuncret.length != 1
|
|
1043
|
+
addr = normalize(b.to_subfuncret.first)
|
|
1044
|
+
return if not b.to_normal or b.to_normal.length != 1
|
|
1045
|
+
# check that the subfunction is simple (eg get_eip)
|
|
1046
|
+
return if not sf = @function[normalize(b.to_normal.first)]
|
|
1047
|
+
return if not btb = sf.backtrace_binding
|
|
1048
|
+
btb = btb.dup
|
|
1049
|
+
btb.delete_if { |k, v| Expression[k] == Expression[v] }
|
|
1050
|
+
return if btb.length > 2 or btb.values.include? Expression::Unknown
|
|
1051
|
+
else
|
|
1052
|
+
return if not bt = b.to_normal
|
|
1053
|
+
if bt.include? :default
|
|
1054
|
+
addr = :default
|
|
1055
|
+
break
|
|
1056
|
+
elsif bt.length != 1
|
|
1057
|
+
return
|
|
1058
|
+
end
|
|
1059
|
+
addr = normalize(bt.first)
|
|
1060
|
+
end
|
|
1061
|
+
end
|
|
1062
|
+
fname = Expression[addr].reduce_rec
|
|
1063
|
+
if funcaddr != addr and f = @function[funcaddr]
|
|
1064
|
+
# forward get_backtrace_binding to target
|
|
1065
|
+
f.backtrace_binding = { :thunk => addr }
|
|
1066
|
+
f.noreturn = true if @function[addr] and @function[addr].noreturn
|
|
1067
|
+
end
|
|
1068
|
+
return if not fname.kind_of? ::String
|
|
1069
|
+
l = auto_label_at(funcaddr, 'sub', 'loc')
|
|
1070
|
+
return if l[0, 4] != 'sub_'
|
|
1071
|
+
puts "found thunk for #{fname} at #{Expression[funcaddr]}" if $DEBUG
|
|
1072
|
+
rename_label(l, @program.new_label("thunk_#{fname}"))
|
|
1073
|
+
end
|
|
1074
|
+
|
|
1075
|
+
# this is called when reaching a noreturn function call, with the call address
|
|
1076
|
+
# it is responsible for detecting the actual 'call' instruction leading to this
|
|
1077
|
+
# noreturn function, and eventually mark the call target as a thunk
|
|
1078
|
+
def detect_function_thunk_noreturn(addr)
|
|
1079
|
+
5.times {
|
|
1080
|
+
return if not di = di_at(addr)
|
|
1081
|
+
if di.opcode.props[:saveip] and not di.block.to_subfuncret
|
|
1082
|
+
if di.block.to_normal.to_a.length == 1
|
|
1083
|
+
taddr = normalize(di.block.to_normal.first)
|
|
1084
|
+
if di_at(taddr)
|
|
1085
|
+
@function[taddr] ||= DecodedFunction.new
|
|
1086
|
+
return detect_function_thunk(taddr)
|
|
1087
|
+
end
|
|
1088
|
+
end
|
|
1089
|
+
break
|
|
1090
|
+
else
|
|
1091
|
+
from = di.block.from_normal.to_a + di.block.from_subfuncret.to_a
|
|
1092
|
+
if from.length == 1
|
|
1093
|
+
addr = from.first
|
|
1094
|
+
else break
|
|
1095
|
+
end
|
|
1096
|
+
end
|
|
1097
|
+
}
|
|
1098
|
+
end
|
|
1099
|
+
|
|
1100
|
+
# given an address, detect if it may be a noreturn fuction
|
|
1101
|
+
# it is if all its end blocks are calls to noreturn functions
|
|
1102
|
+
# if it is, create a @function[fa] with noreturn = true
|
|
1103
|
+
# should only be called with fa = target of a call
|
|
1104
|
+
def check_noreturn_function(fa)
|
|
1105
|
+
fb = function_blocks(fa, false, false)
|
|
1106
|
+
lasts = fb.keys.find_all { |k| fb[k] == [] }
|
|
1107
|
+
return if lasts.empty?
|
|
1108
|
+
if lasts.all? { |la|
|
|
1109
|
+
b = block_at(la)
|
|
1110
|
+
next if not di = b.list.last
|
|
1111
|
+
(di.opcode.props[:saveip] and b.to_normal.to_a.all? { |tfa|
|
|
1112
|
+
tf = function_at(tfa) and tf.noreturn
|
|
1113
|
+
}) or (di.opcode.props[:stopexec] and not di.opcode.props[:setip])
|
|
1114
|
+
}
|
|
1115
|
+
# yay
|
|
1116
|
+
@function[fa] ||= DecodedFunction.new
|
|
1117
|
+
@function[fa].noreturn = true
|
|
1118
|
+
end
|
|
1119
|
+
end
|
|
1120
|
+
|
|
1121
|
+
|
|
1122
|
+
# walks the backtrace tree from an address, passing along an object
|
|
1123
|
+
#
|
|
1124
|
+
# the steps are (1st = event, followed by hash keys)
|
|
1125
|
+
#
|
|
1126
|
+
# for each decoded instruction encountered:
|
|
1127
|
+
# :di :di
|
|
1128
|
+
#
|
|
1129
|
+
# when backtracking to a block through a decodedfunction:
|
|
1130
|
+
# (yield for each of the block's subfunctions)
|
|
1131
|
+
# (the decodedinstruction responsible for the call will be yield next)
|
|
1132
|
+
# :func :func, :funcaddr, :addr, :depth
|
|
1133
|
+
#
|
|
1134
|
+
# when jumping from one block to another (excluding :loop): # XXX include :loops ?
|
|
1135
|
+
# :up :from, :to, :sfret
|
|
1136
|
+
#
|
|
1137
|
+
# when the backtrack has nothing to backtrack to (eg program entrypoint):
|
|
1138
|
+
# :end :addr
|
|
1139
|
+
#
|
|
1140
|
+
# when the backtrack stops by taking too long to complete:
|
|
1141
|
+
# :maxdepth :addr
|
|
1142
|
+
#
|
|
1143
|
+
# when the backtrack stops for encountering the specified stop address:
|
|
1144
|
+
# :stopaddr :addr
|
|
1145
|
+
#
|
|
1146
|
+
# when rebacktracking a block already seen in the current branch:
|
|
1147
|
+
# (looptrace is an array of [obj, block end addr, from_subfuncret], from oldest to newest)
|
|
1148
|
+
# :loop :looptrace
|
|
1149
|
+
#
|
|
1150
|
+
# when the address does not match a known instruction/function:
|
|
1151
|
+
# :unknown_addr :addr
|
|
1152
|
+
#
|
|
1153
|
+
# the block return value is used as follow for :di, :func, :up and :loop:
|
|
1154
|
+
# false => the backtrace stops for the branch
|
|
1155
|
+
# nil => the backtrace continues with the current object
|
|
1156
|
+
# anything else => the backtrace continues with this object
|
|
1157
|
+
#
|
|
1158
|
+
# method arguments:
|
|
1159
|
+
# obj is the initial value of the object
|
|
1160
|
+
# addr is the address where the backtrace starts
|
|
1161
|
+
# include_start is a bool specifying if the backtrace should start at addr or just before
|
|
1162
|
+
# from_subfuncret is a bool specifying if addr points to a decodedinstruction that calls a subfunction
|
|
1163
|
+
# stopaddr is an [array of] address of instruction, the backtrace will stop just after executing it
|
|
1164
|
+
# maxdepth is the maximum depth (in blocks) for each backtrace branch.
|
|
1165
|
+
# (defaults to dasm.backtrace_maxblocks, which defaults do Dasm.backtrace_maxblocks)
|
|
1166
|
+
def backtrace_walk(obj, addr, include_start, from_subfuncret, stopaddr, maxdepth)
|
|
1167
|
+
start_addr = normalize(addr)
|
|
1168
|
+
stopaddr = [stopaddr] if stopaddr and not stopaddr.kind_of? ::Array
|
|
1169
|
+
|
|
1170
|
+
# array of [obj, addr, from_subfuncret, loopdetect]
|
|
1171
|
+
# loopdetect is an array of [obj, addr, from_type] of each end of block encountered
|
|
1172
|
+
todo = []
|
|
1173
|
+
|
|
1174
|
+
# array of [obj, blockaddr]
|
|
1175
|
+
# avoids rewalking the same value
|
|
1176
|
+
done = []
|
|
1177
|
+
|
|
1178
|
+
# updates todo with the addresses to backtrace next
|
|
1179
|
+
walk_up = lambda { |w_obj, w_addr, w_loopdetect|
|
|
1180
|
+
if w_loopdetect.length > maxdepth
|
|
1181
|
+
yield :maxdepth, w_obj, :addr => w_addr, :loopdetect => w_loopdetect
|
|
1182
|
+
elsif stopaddr and stopaddr.include?(w_addr)
|
|
1183
|
+
yield :stopaddr, w_obj, :addr => w_addr, :loopdetect => w_loopdetect
|
|
1184
|
+
elsif w_di = @decoded[w_addr] and w_di != w_di.block.list.first and w_di.address != w_di.block.address
|
|
1185
|
+
prevdi = w_di.block.list[w_di.block.list.index(w_di)-1]
|
|
1186
|
+
todo << [w_obj, prevdi.address, :normal, w_loopdetect]
|
|
1187
|
+
elsif w_di
|
|
1188
|
+
next if done.include? [w_obj, w_addr]
|
|
1189
|
+
done << [w_obj, w_addr]
|
|
1190
|
+
hadsomething = false
|
|
1191
|
+
w_di.block.each_from { |f_addr, f_type|
|
|
1192
|
+
next if f_type == :indirect
|
|
1193
|
+
hadsomething = true
|
|
1194
|
+
o_f_addr = f_addr
|
|
1195
|
+
f_addr = @decoded[f_addr].block.list.last.address if @decoded[f_addr].kind_of? DecodedInstruction # delay slot
|
|
1196
|
+
if l = w_loopdetect.find { |l_obj, l_addr, l_type| l_addr == f_addr and l_type == f_type }
|
|
1197
|
+
f_obj = yield(:loop, w_obj, :looptrace => w_loopdetect[w_loopdetect.index(l)..-1], :loopdetect => w_loopdetect)
|
|
1198
|
+
if f_obj and f_obj != w_obj # should avoid infinite loops
|
|
1199
|
+
f_loopdetect = w_loopdetect[0...w_loopdetect.index(l)]
|
|
1200
|
+
end
|
|
1201
|
+
else
|
|
1202
|
+
f_obj = yield(:up, w_obj, :from => w_addr, :to => f_addr, :sfret => f_type, :loopdetect => w_loopdetect, :real_to => o_f_addr)
|
|
1203
|
+
end
|
|
1204
|
+
next if f_obj == false
|
|
1205
|
+
f_obj ||= w_obj
|
|
1206
|
+
f_loopdetect ||= w_loopdetect
|
|
1207
|
+
# only count non-trivial paths in loopdetect (ignore linear links)
|
|
1208
|
+
add_detect = [[f_obj, f_addr, f_type]]
|
|
1209
|
+
add_detect = [] if @decoded[f_addr].kind_of? DecodedInstruction and tmp = @decoded[f_addr].block and
|
|
1210
|
+
((w_di.block.from_subfuncret.to_a == [] and w_di.block.from_normal == [f_addr] and
|
|
1211
|
+
tmp.to_normal == [w_di.address] and tmp.to_subfuncret.to_a == []) or
|
|
1212
|
+
(w_di.block.from_subfuncret == [f_addr] and tmp.to_subfuncret == [w_di.address]))
|
|
1213
|
+
todo << [f_obj, f_addr, f_type, f_loopdetect + add_detect ]
|
|
1214
|
+
}
|
|
1215
|
+
yield :end, w_obj, :addr => w_addr, :loopdetect => w_loopdetect if not hadsomething
|
|
1216
|
+
elsif @function[w_addr] and w_addr != :default and w_addr != Expression::Unknown
|
|
1217
|
+
next if done.include? [w_obj, w_addr]
|
|
1218
|
+
oldlen = todo.length
|
|
1219
|
+
each_xref(w_addr, :x) { |x|
|
|
1220
|
+
f_addr = x.origin
|
|
1221
|
+
o_f_addr = f_addr
|
|
1222
|
+
f_addr = @decoded[f_addr].block.list.last.address if @decoded[f_addr].kind_of? DecodedInstruction # delay slot
|
|
1223
|
+
if l = w_loopdetect.find { |l_obj, l_addr, l_type| l_addr == w_addr }
|
|
1224
|
+
f_obj = yield(:loop, w_obj, :looptrace => w_loopdetect[w_loopdetect.index(l)..-1], :loopdetect => w_loopdetect)
|
|
1225
|
+
if f_obj and f_obj != w_obj
|
|
1226
|
+
f_loopdetect = w_loopdetect[0...w_loopdetect.index(l)]
|
|
1227
|
+
end
|
|
1228
|
+
else
|
|
1229
|
+
f_obj = yield(:up, w_obj, :from => w_addr, :to => f_addr, :sfret => :normal, :loopdetect => w_loopdetect, :real_to => o_f_addr)
|
|
1230
|
+
end
|
|
1231
|
+
next if f_obj == false
|
|
1232
|
+
f_obj ||= w_obj
|
|
1233
|
+
f_loopdetect ||= w_loopdetect
|
|
1234
|
+
todo << [f_obj, f_addr, :normal, f_loopdetect + [[f_obj, f_addr, :normal]] ]
|
|
1235
|
+
}
|
|
1236
|
+
yield :end, w_obj, :addr => w_addr, :loopdetect => w_loopdetect if todo.length == oldlen
|
|
1237
|
+
else
|
|
1238
|
+
yield :unknown_addr, w_obj, :addr => w_addr, :loopdetect => w_loopdetect
|
|
1239
|
+
end
|
|
1240
|
+
}
|
|
1241
|
+
|
|
1242
|
+
if include_start
|
|
1243
|
+
todo << [obj, start_addr, from_subfuncret ? :subfuncret : :normal, []]
|
|
1244
|
+
else
|
|
1245
|
+
walk_up[obj, start_addr, []]
|
|
1246
|
+
end
|
|
1247
|
+
|
|
1248
|
+
while not todo.empty?
|
|
1249
|
+
obj, addr, type, loopdetect = todo.pop
|
|
1250
|
+
di = @decoded[addr]
|
|
1251
|
+
if di and type == :subfuncret
|
|
1252
|
+
di.block.each_to_normal { |sf|
|
|
1253
|
+
next if not f = @function[normalize(sf)]
|
|
1254
|
+
s_obj = yield(:func, obj, :func => f, :funcaddr => sf, :addr => addr, :loopdetect => loopdetect)
|
|
1255
|
+
next if s_obj == false
|
|
1256
|
+
s_obj ||= obj
|
|
1257
|
+
if l = loopdetect.find { |l_obj, l_addr, l_type| addr == l_addr and l_type == :normal }
|
|
1258
|
+
l_obj = yield(:loop, s_obj, :looptrace => loopdetect[loopdetect.index(l)..-1], :loopdetect => loopdetect)
|
|
1259
|
+
if l_obj and l_obj != s_obj
|
|
1260
|
+
s_loopdetect = loopdetect[0...loopdetect.index(l)]
|
|
1261
|
+
end
|
|
1262
|
+
next if l_obj == false
|
|
1263
|
+
s_obj = l_obj if l_obj
|
|
1264
|
+
end
|
|
1265
|
+
s_loopdetect ||= loopdetect
|
|
1266
|
+
todo << [s_obj, addr, :normal, s_loopdetect + [[s_obj, addr, :normal]] ]
|
|
1267
|
+
}
|
|
1268
|
+
elsif di
|
|
1269
|
+
# XXX should interpolate index if di is not in block.list, but what if the addresses are not Comparable ?
|
|
1270
|
+
di.block.list[0..(di.block.list.index(di) || -1)].reverse_each { |di_|
|
|
1271
|
+
di = di_ # XXX not sure..
|
|
1272
|
+
if stopaddr and ea = di.next_addr and stopaddr.include?(ea)
|
|
1273
|
+
yield :stopaddr, obj, :addr => ea, :loopdetect => loopdetect
|
|
1274
|
+
break
|
|
1275
|
+
end
|
|
1276
|
+
ex_obj = obj
|
|
1277
|
+
obj = yield(:di, obj, :di => di, :loopdetect => loopdetect)
|
|
1278
|
+
break if obj == false
|
|
1279
|
+
obj ||= ex_obj
|
|
1280
|
+
}
|
|
1281
|
+
walk_up[obj, di.block.address, loopdetect] if obj
|
|
1282
|
+
elsif @function[addr] and addr != :default and addr != Expression::Unknown
|
|
1283
|
+
ex_obj = obj
|
|
1284
|
+
obj = yield(:func, obj, :func => @function[addr], :funcaddr => addr, :addr => addr, :loopdetect => loopdetect)
|
|
1285
|
+
next if obj == false
|
|
1286
|
+
obj ||= ex_obj
|
|
1287
|
+
walk_up[obj, addr, loopdetect]
|
|
1288
|
+
else
|
|
1289
|
+
yield :unknown_addr, obj, :addr => addr, :loopdetect => loopdetect
|
|
1290
|
+
end
|
|
1291
|
+
end
|
|
1292
|
+
end
|
|
1293
|
+
|
|
1294
|
+
# holds a backtrace result until a snapshot_addr is encountered
|
|
1295
|
+
class StoppedExpr
|
|
1296
|
+
attr_accessor :exprs
|
|
1297
|
+
def initialize(e) @exprs = e end
|
|
1298
|
+
end
|
|
1299
|
+
|
|
1300
|
+
|
|
1301
|
+
attr_accessor :debug_backtrace
|
|
1302
|
+
|
|
1303
|
+
# backtraces the value of an expression from start_addr
|
|
1304
|
+
# updates blocks backtracked_for if type is set
|
|
1305
|
+
# uses backtrace_walk
|
|
1306
|
+
# all values returned are from backtrace_check_found (which may generate xrefs, labels, addrs to dasm) unless :no_check is specified
|
|
1307
|
+
# options:
|
|
1308
|
+
# :include_start => start backtracking including start_addr
|
|
1309
|
+
# :from_subfuncret =>
|
|
1310
|
+
# :origin => origin to set for xrefs when resolution is successful
|
|
1311
|
+
# :orig_expr => initial expression
|
|
1312
|
+
# :type => xref type (:r, :w, :x, :addr) when :x, the results are added to #addrs_todo
|
|
1313
|
+
# :len => xref len (for :r/:w)
|
|
1314
|
+
# :snapshot_addr => addr (or array of) where the backtracker should stop
|
|
1315
|
+
# if a snapshot_addr is given, values found are ignored if continuing the backtrace does not get to it (eg maxdepth/unk_addr/end)
|
|
1316
|
+
# :maxdepth => maximum number of blocks to backtrace
|
|
1317
|
+
# :detached => true if backtracking type :x and the result should not have from = origin set in @addrs_todo
|
|
1318
|
+
# :max_complexity{_data} => maximum complexity of the expression before aborting its backtrace
|
|
1319
|
+
# :log => Array, will be updated with the backtrace evolution
|
|
1320
|
+
# :only_upto => backtrace only to update bt_for for current block & previous ending at only_upto
|
|
1321
|
+
# :no_check => don't use backtrace_check_found (will not backtrace indirection static values)
|
|
1322
|
+
# :terminals => array of symbols with constant value (stop backtracking if all symbols in the expr are terminals) (only supported with no_check)
|
|
1323
|
+
def backtrace(expr, start_addr, nargs={})
|
|
1324
|
+
include_start = nargs.delete :include_start
|
|
1325
|
+
from_subfuncret = nargs.delete :from_subfuncret
|
|
1326
|
+
origin = nargs.delete :origin
|
|
1327
|
+
origexpr = nargs.delete :orig_expr
|
|
1328
|
+
type = nargs.delete :type
|
|
1329
|
+
len = nargs.delete :len
|
|
1330
|
+
snapshot_addr = nargs.delete(:snapshot_addr) || nargs.delete(:stopaddr)
|
|
1331
|
+
maxdepth = nargs.delete(:maxdepth) || @backtrace_maxblocks
|
|
1332
|
+
detached = nargs.delete :detached
|
|
1333
|
+
max_complexity = nargs.delete(:max_complexity) || @backtrace_maxcomplexity
|
|
1334
|
+
max_complexity_data = nargs.delete(:max_complexity) || @backtrace_maxcomplexity_data
|
|
1335
|
+
bt_log = nargs.delete :log # array to receive the ongoing backtrace info
|
|
1336
|
+
only_upto = nargs.delete :only_upto
|
|
1337
|
+
no_check = nargs.delete :no_check
|
|
1338
|
+
terminals = nargs.delete(:terminals) || []
|
|
1339
|
+
raise ArgumentError, "invalid argument to backtrace #{nargs.keys.inspect}" if not nargs.empty?
|
|
1340
|
+
|
|
1341
|
+
expr = Expression[expr]
|
|
1342
|
+
|
|
1343
|
+
origexpr = expr if origin == start_addr
|
|
1344
|
+
|
|
1345
|
+
start_addr = normalize(start_addr)
|
|
1346
|
+
di = @decoded[start_addr]
|
|
1347
|
+
|
|
1348
|
+
if not snapshot_addr and @cpu.backtrace_is_stack_address(expr)
|
|
1349
|
+
puts " not backtracking stack address #{expr}" if debug_backtrace
|
|
1350
|
+
return []
|
|
1351
|
+
end
|
|
1352
|
+
|
|
1353
|
+
if type == :r or type == :w
|
|
1354
|
+
max_complexity = max_complexity_data
|
|
1355
|
+
maxdepth = @backtrace_maxblocks_data if backtrace_maxblocks_data and maxdepth > @backtrace_maxblocks_data
|
|
1356
|
+
end
|
|
1357
|
+
|
|
1358
|
+
if vals = (no_check ? (!need_backtrace(expr, terminals) and [expr]) : backtrace_check_found(expr,
|
|
1359
|
+
di, origin, type, len, maxdepth, detached))
|
|
1360
|
+
# no need to update backtracked_for
|
|
1361
|
+
return vals
|
|
1362
|
+
elsif maxdepth <= 0
|
|
1363
|
+
return [Expression::Unknown]
|
|
1364
|
+
end
|
|
1365
|
+
|
|
1366
|
+
# create initial backtracked_for
|
|
1367
|
+
if type and origin == start_addr and di
|
|
1368
|
+
btt = BacktraceTrace.new(expr, origin, origexpr, type, len, maxdepth-1)
|
|
1369
|
+
btt.address = di.address
|
|
1370
|
+
btt.exclude_instr = true if not include_start
|
|
1371
|
+
btt.from_subfuncret = true if from_subfuncret and include_start
|
|
1372
|
+
btt.detached = true if detached
|
|
1373
|
+
di.block.backtracked_for |= [btt]
|
|
1374
|
+
end
|
|
1375
|
+
|
|
1376
|
+
@callback_prebacktrace[] if callback_prebacktrace
|
|
1377
|
+
|
|
1378
|
+
# list of Expression/Integer
|
|
1379
|
+
result = []
|
|
1380
|
+
|
|
1381
|
+
puts "backtracking #{type} #{expr} from #{di || Expression[start_addr || 0]} for #{@decoded[origin]}" if debug_backtrace or $DEBUG
|
|
1382
|
+
bt_log << [:start, expr, start_addr] if bt_log
|
|
1383
|
+
backtrace_walk(expr, start_addr, include_start, from_subfuncret, snapshot_addr, maxdepth) { |ev, expr_, h|
|
|
1384
|
+
expr = expr_
|
|
1385
|
+
case ev
|
|
1386
|
+
when :unknown_addr, :maxdepth
|
|
1387
|
+
puts " backtrace end #{ev} #{expr}" if debug_backtrace
|
|
1388
|
+
result |= [expr] if not snapshot_addr
|
|
1389
|
+
@addrs_todo << [expr, (detached ? nil : origin)] if not snapshot_addr and type == :x and origin
|
|
1390
|
+
when :end
|
|
1391
|
+
if not expr.kind_of? StoppedExpr
|
|
1392
|
+
oldexpr = expr
|
|
1393
|
+
expr = backtrace_emu_blockup(h[:addr], expr)
|
|
1394
|
+
puts " backtrace up #{Expression[h[:addr]]} #{oldexpr}#{" => #{expr}" if expr != oldexpr}" if debug_backtrace
|
|
1395
|
+
bt_log << [:up, expr, oldexpr, h[:addr], :end] if bt_log and expr != oldexpr
|
|
1396
|
+
if expr != oldexpr and not snapshot_addr and vals = (no_check ?
|
|
1397
|
+
(!need_backtrace(expr, terminals) and [expr]) :
|
|
1398
|
+
backtrace_check_found(expr, nil, origin, type, len,
|
|
1399
|
+
maxdepth-h[:loopdetect].length, detached))
|
|
1400
|
+
result |= vals
|
|
1401
|
+
next
|
|
1402
|
+
end
|
|
1403
|
+
end
|
|
1404
|
+
puts " backtrace end #{ev} #{expr}" if debug_backtrace
|
|
1405
|
+
if not snapshot_addr
|
|
1406
|
+
result |= [expr]
|
|
1407
|
+
|
|
1408
|
+
btt = BacktraceTrace.new(expr, origin, origexpr, type, len, maxdepth-h[:loopdetect].length-1)
|
|
1409
|
+
btt.detached = true if detached
|
|
1410
|
+
@decoded[h[:addr]].block.backtracked_for |= [btt] if @decoded[h[:addr]]
|
|
1411
|
+
@function[h[:addr]].backtracked_for |= [btt] if @function[h[:addr]] and h[:addr] != :default
|
|
1412
|
+
@addrs_todo << [expr, (detached ? nil : origin)] if type == :x and origin
|
|
1413
|
+
end
|
|
1414
|
+
when :stopaddr
|
|
1415
|
+
if not expr.kind_of? StoppedExpr
|
|
1416
|
+
oldexpr = expr
|
|
1417
|
+
expr = backtrace_emu_blockup(h[:addr], expr)
|
|
1418
|
+
puts " backtrace up #{Expression[h[:addr]]} #{oldexpr}#{" => #{expr}" if expr != oldexpr}" if debug_backtrace
|
|
1419
|
+
bt_log << [:up, expr, oldexpr, h[:addr], :end] if bt_log and expr != oldexpr
|
|
1420
|
+
end
|
|
1421
|
+
puts " backtrace end #{ev} #{expr}" if debug_backtrace
|
|
1422
|
+
result |= ((expr.kind_of?(StoppedExpr)) ? expr.exprs : [expr])
|
|
1423
|
+
when :loop
|
|
1424
|
+
next false if expr.kind_of? StoppedExpr
|
|
1425
|
+
t = h[:looptrace]
|
|
1426
|
+
oldexpr = t[0][0]
|
|
1427
|
+
next false if expr == oldexpr # unmodifying loop
|
|
1428
|
+
puts " bt loop at #{Expression[t[0][1]]}: #{oldexpr} => #{expr} (#{t.map { |z| Expression[z[1]] }.join(' <- ')})" if debug_backtrace
|
|
1429
|
+
false
|
|
1430
|
+
when :up
|
|
1431
|
+
next false if only_upto and h[:to] != only_upto
|
|
1432
|
+
next expr if expr.kind_of? StoppedExpr
|
|
1433
|
+
oldexpr = expr
|
|
1434
|
+
expr = backtrace_emu_blockup(h[:from], expr)
|
|
1435
|
+
puts " backtrace up #{Expression[h[:from]]}->#{Expression[h[:to]]} #{oldexpr}#{" => #{expr}" if expr != oldexpr}" if debug_backtrace
|
|
1436
|
+
bt_log << [:up, expr, oldexpr, h[:from], h[:to]] if bt_log
|
|
1437
|
+
|
|
1438
|
+
if expr != oldexpr and vals = (no_check ? (!need_backtrace(expr, terminals) and [expr]) :
|
|
1439
|
+
backtrace_check_found(expr, @decoded[h[:from]], origin, type, len,
|
|
1440
|
+
maxdepth-h[:loopdetect].length, detached))
|
|
1441
|
+
if snapshot_addr
|
|
1442
|
+
expr = StoppedExpr.new vals
|
|
1443
|
+
next expr
|
|
1444
|
+
else
|
|
1445
|
+
result |= vals
|
|
1446
|
+
bt_log << [:found, vals, h[:from]] if bt_log
|
|
1447
|
+
next false
|
|
1448
|
+
end
|
|
1449
|
+
end
|
|
1450
|
+
|
|
1451
|
+
if origin and type
|
|
1452
|
+
# update backtracked_for
|
|
1453
|
+
update_btf = lambda { |btf, new_btt|
|
|
1454
|
+
# returns true if btf was modified
|
|
1455
|
+
if i = btf.index(new_btt)
|
|
1456
|
+
btf[i] = new_btt if btf[i].maxdepth < new_btt.maxdepth
|
|
1457
|
+
else
|
|
1458
|
+
btf << new_btt
|
|
1459
|
+
end
|
|
1460
|
+
}
|
|
1461
|
+
|
|
1462
|
+
btt = BacktraceTrace.new(expr, origin, origexpr, type, len, maxdepth-h[:loopdetect].length-1)
|
|
1463
|
+
btt.detached = true if detached
|
|
1464
|
+
if x = di_at(h[:from])
|
|
1465
|
+
update_btf[x.block.backtracked_for, btt]
|
|
1466
|
+
end
|
|
1467
|
+
if x = @function[h[:from]] and h[:from] != :default
|
|
1468
|
+
update_btf[x.backtracked_for, btt]
|
|
1469
|
+
end
|
|
1470
|
+
if x = di_at(h[:to])
|
|
1471
|
+
btt = btt.dup
|
|
1472
|
+
btt.address = x.address
|
|
1473
|
+
btt.from_subfuncret = true if h[:sfret] == :subfuncret
|
|
1474
|
+
if backtrace_check_funcret(btt, h[:from], h[:real_to] || h[:to])
|
|
1475
|
+
puts " function returns to caller" if debug_backtrace
|
|
1476
|
+
next false
|
|
1477
|
+
end
|
|
1478
|
+
if not update_btf[x.block.backtracked_for, btt]
|
|
1479
|
+
puts " already backtraced" if debug_backtrace
|
|
1480
|
+
next false
|
|
1481
|
+
end
|
|
1482
|
+
end
|
|
1483
|
+
end
|
|
1484
|
+
expr
|
|
1485
|
+
when :di, :func
|
|
1486
|
+
next if expr.kind_of? StoppedExpr
|
|
1487
|
+
if not snapshot_addr and @cpu.backtrace_is_stack_address(expr)
|
|
1488
|
+
puts " not backtracking stack address #{expr}" if debug_backtrace
|
|
1489
|
+
next false
|
|
1490
|
+
end
|
|
1491
|
+
|
|
1492
|
+
oldexpr = expr
|
|
1493
|
+
case ev
|
|
1494
|
+
when :di
|
|
1495
|
+
h[:addr] = h[:di].address
|
|
1496
|
+
expr = backtrace_emu_instr(h[:di], expr)
|
|
1497
|
+
bt_log << [ev, expr, oldexpr, h[:di], h[:addr]] if bt_log and expr != oldexpr
|
|
1498
|
+
when :func
|
|
1499
|
+
expr = backtrace_emu_subfunc(h[:func], h[:funcaddr], h[:addr], expr, origin, maxdepth-h[:loopdetect].length)
|
|
1500
|
+
if snapshot_addr and snapshot_addr == h[:funcaddr]
|
|
1501
|
+
# XXX recursiveness detection needs to be fixed
|
|
1502
|
+
puts " backtrace: recursive function #{Expression[h[:funcaddr]]}" if debug_backtrace
|
|
1503
|
+
next false
|
|
1504
|
+
end
|
|
1505
|
+
bt_log << [ev, expr, oldexpr, h[:funcaddr], h[:addr]] if bt_log and expr != oldexpr
|
|
1506
|
+
end
|
|
1507
|
+
puts " backtrace #{h[:di] || Expression[h[:funcaddr]]} #{oldexpr} => #{expr}" if debug_backtrace and expr != oldexpr
|
|
1508
|
+
if vals = (no_check ? (!need_backtrace(expr, terminals) and [expr]) : backtrace_check_found(expr,
|
|
1509
|
+
h[:di], origin, type, len, maxdepth-h[:loopdetect].length, detached))
|
|
1510
|
+
if snapshot_addr
|
|
1511
|
+
expr = StoppedExpr.new vals
|
|
1512
|
+
else
|
|
1513
|
+
result |= vals
|
|
1514
|
+
bt_log << [:found, vals, h[:addr]] if bt_log
|
|
1515
|
+
next false
|
|
1516
|
+
end
|
|
1517
|
+
elsif expr.complexity > max_complexity
|
|
1518
|
+
puts " backtrace aborting, expr too complex" if debug_backtrace
|
|
1519
|
+
next false
|
|
1520
|
+
end
|
|
1521
|
+
expr
|
|
1522
|
+
else raise ev.inspect
|
|
1523
|
+
end
|
|
1524
|
+
}
|
|
1525
|
+
|
|
1526
|
+
puts ' backtrace result: ' + result.map { |r| Expression[r] }.join(', ') if debug_backtrace
|
|
1527
|
+
|
|
1528
|
+
result
|
|
1529
|
+
end
|
|
1530
|
+
|
|
1531
|
+
# checks if the BacktraceTrace is a call to a known subfunction
|
|
1532
|
+
# returns true and updates self.addrs_todo
|
|
1533
|
+
def backtrace_check_funcret(btt, funcaddr, instraddr)
|
|
1534
|
+
if di = @decoded[instraddr] and @function[funcaddr] and btt.type == :x and
|
|
1535
|
+
not btt.from_subfuncret and
|
|
1536
|
+
@cpu.backtrace_is_function_return(btt.expr, @decoded[btt.origin]) and
|
|
1537
|
+
retaddr = backtrace_emu_instr(di, btt.expr) and
|
|
1538
|
+
not need_backtrace(retaddr)
|
|
1539
|
+
puts " backtrace addrs_todo << #{Expression[retaddr]} from #{di} (funcret)" if debug_backtrace
|
|
1540
|
+
di.block.add_to_subfuncret normalize(retaddr)
|
|
1541
|
+
if @decoded[funcaddr].kind_of? DecodedInstruction
|
|
1542
|
+
# check that all callers :saveip returns (eg recursive call that was resolved
|
|
1543
|
+
# before we found funcaddr was a function)
|
|
1544
|
+
@decoded[funcaddr].block.each_from_normal { |fm|
|
|
1545
|
+
if fdi = di_at(fm) and fdi.opcode.props[:saveip] and not fdi.block.to_subfuncret
|
|
1546
|
+
backtrace_check_funcret(btt, funcaddr, fm)
|
|
1547
|
+
end
|
|
1548
|
+
}
|
|
1549
|
+
end
|
|
1550
|
+
if not @function[funcaddr].finalized
|
|
1551
|
+
# the function is not fully disassembled: arrange for the retaddr to be
|
|
1552
|
+
# disassembled only after the subfunction is finished
|
|
1553
|
+
# for that we walk the code from the call, mark each block start, and insert the sfret
|
|
1554
|
+
# just before the 1st function block address in @addrs_todo (which is pop()ed by dasm_step)
|
|
1555
|
+
faddrlist = []
|
|
1556
|
+
todo = []
|
|
1557
|
+
di.block.each_to_normal { |t| todo << normalize(t) }
|
|
1558
|
+
while a = todo.pop
|
|
1559
|
+
next if faddrlist.include? a or not get_section_at(a)
|
|
1560
|
+
faddrlist << a
|
|
1561
|
+
if @decoded[a].kind_of? DecodedInstruction
|
|
1562
|
+
@decoded[a].block.each_to_samefunc(self) { |t| todo << normalize(t) }
|
|
1563
|
+
end
|
|
1564
|
+
end
|
|
1565
|
+
|
|
1566
|
+
idx = @addrs_todo.index(@addrs_todo.find { |r, i, sfr| faddrlist.include? normalize(r) }) || -1
|
|
1567
|
+
@addrs_todo.insert(idx, [retaddr, instraddr, true])
|
|
1568
|
+
else
|
|
1569
|
+
@addrs_todo << [retaddr, instraddr, true]
|
|
1570
|
+
end
|
|
1571
|
+
true
|
|
1572
|
+
end
|
|
1573
|
+
end
|
|
1574
|
+
|
|
1575
|
+
# applies one decodedinstruction to an expression
|
|
1576
|
+
def backtrace_emu_instr(di, expr)
|
|
1577
|
+
@cpu.backtrace_emu(di, expr)
|
|
1578
|
+
end
|
|
1579
|
+
|
|
1580
|
+
# applies one subfunction to an expression
|
|
1581
|
+
def backtrace_emu_subfunc(func, funcaddr, calladdr, expr, origin, maxdepth)
|
|
1582
|
+
bind = func.get_backtrace_binding(self, funcaddr, calladdr, expr, origin, maxdepth)
|
|
1583
|
+
Expression[expr.bind(bind).reduce]
|
|
1584
|
+
end
|
|
1585
|
+
|
|
1586
|
+
# applies a location binding
|
|
1587
|
+
def backtrace_emu_blockup(addr, expr)
|
|
1588
|
+
(ab = @address_binding[addr]) ? Expression[expr.bind(ab).reduce] : expr
|
|
1589
|
+
end
|
|
1590
|
+
|
|
1591
|
+
# static resolution of indirections
|
|
1592
|
+
def resolve(expr)
|
|
1593
|
+
binding = Expression[expr].expr_indirections.inject(@old_prog_binding) { |binding_, ind|
|
|
1594
|
+
e, b = get_section_at(resolve(ind.target))
|
|
1595
|
+
return expr if not e
|
|
1596
|
+
binding_.merge ind => Expression[ e.decode_imm("u#{8*ind.len}".to_sym, @cpu.endianness) ]
|
|
1597
|
+
}
|
|
1598
|
+
Expression[expr].bind(binding).reduce
|
|
1599
|
+
end
|
|
1600
|
+
|
|
1601
|
+
# returns true if the expression needs more backtrace
|
|
1602
|
+
# it checks for the presence of a symbol (not :unknown), which means it depends on some register value
|
|
1603
|
+
def need_backtrace(expr, terminals=[])
|
|
1604
|
+
return if expr.kind_of? ::Integer
|
|
1605
|
+
!(expr.externals.grep(::Symbol) - [:unknown] - terminals).empty?
|
|
1606
|
+
end
|
|
1607
|
+
|
|
1608
|
+
# returns an array of expressions, or nil if expr needs more backtrace
|
|
1609
|
+
# it needs more backtrace if expr.externals include a Symbol != :unknown (symbol == register value)
|
|
1610
|
+
# if it need no more backtrace, expr's indirections are recursively resolved
|
|
1611
|
+
# xrefs are created, and di args are updated (immediate => label)
|
|
1612
|
+
# if type is :x, addrs_todo is updated, and if di starts a block, expr is checked to see if it may be a subfunction return value
|
|
1613
|
+
#
|
|
1614
|
+
# expr indirection are solved by first finding the value of the pointer, and then rebacktracking for write-type access
|
|
1615
|
+
# detached is true if type is :x and from should not be set in addrs_todo (indirect call flow, eg external function callback)
|
|
1616
|
+
# if the backtrace ends pre entrypoint, returns the value encoded in the raw binary
|
|
1617
|
+
# XXX global variable (modified by another function), exported data, multithreaded app..
|
|
1618
|
+
# TODO handle memory aliasing (mov ebx, eax ; write [ebx] ; read [eax])
|
|
1619
|
+
# TODO trace expr evolution through backtrace, to modify immediates to an expr involving label names
|
|
1620
|
+
# TODO mov [ptr], imm ; <...> ; jmp [ptr] => rename imm as loc_XX
|
|
1621
|
+
# eg. mov eax, 42 ; add eax, 4 ; jmp eax => mov eax, some_label-4
|
|
1622
|
+
def backtrace_check_found(expr, di, origin, type, len, maxdepth, detached)
|
|
1623
|
+
# only entrypoints or block starts called by a :saveip are checked for being a function
|
|
1624
|
+
# want to execute [esp] from a block start
|
|
1625
|
+
if type == :x and di and di == di.block.list.first and @cpu.backtrace_is_function_return(expr, @decoded[origin]) and (
|
|
1626
|
+
# which is an entrypoint..
|
|
1627
|
+
(not di.block.from_normal and not di.block.from_subfuncret) or
|
|
1628
|
+
# ..or called from a saveip
|
|
1629
|
+
(bool = false ; di.block.each_from_normal { |fn| bool = true if @decoded[fn] and @decoded[fn].opcode.props[:saveip] } ; bool))
|
|
1630
|
+
|
|
1631
|
+
# now we can mark the current address a function start
|
|
1632
|
+
# the actual return address will be found later (we tell the caller to continue the backtrace)
|
|
1633
|
+
addr = di.address
|
|
1634
|
+
l = auto_label_at(addr, 'sub', 'loc', 'xref')
|
|
1635
|
+
if not f = @function[addr]
|
|
1636
|
+
f = @function[addr] = DecodedFunction.new
|
|
1637
|
+
puts "found new function #{l} at #{Expression[addr]}" if $VERBOSE
|
|
1638
|
+
end
|
|
1639
|
+
f.finalized = false
|
|
1640
|
+
|
|
1641
|
+
if @decoded[origin]
|
|
1642
|
+
f.return_address ||= []
|
|
1643
|
+
f.return_address |= [origin]
|
|
1644
|
+
@decoded[origin].add_comment "endsub #{l}"
|
|
1645
|
+
# TODO add_xref (to update the comment on rename_label)
|
|
1646
|
+
end
|
|
1647
|
+
|
|
1648
|
+
f.backtracked_for |= @decoded[addr].block.backtracked_for.find_all { |btt| not btt.address }
|
|
1649
|
+
end
|
|
1650
|
+
|
|
1651
|
+
return if need_backtrace(expr)
|
|
1652
|
+
|
|
1653
|
+
puts "backtrace #{type} found #{expr} from #{di} orig #{@decoded[origin] || Expression[origin] if origin}" if debug_backtrace
|
|
1654
|
+
result = backtrace_value(expr, maxdepth)
|
|
1655
|
+
# keep the ori pointer in the results to emulate volatile memory (eg decompiler prefers this)
|
|
1656
|
+
result << expr if not type
|
|
1657
|
+
result.uniq!
|
|
1658
|
+
|
|
1659
|
+
# create xrefs/labels
|
|
1660
|
+
result.each { |e|
|
|
1661
|
+
backtrace_found_result(e, di, type, origin, len, detached)
|
|
1662
|
+
} if type and origin
|
|
1663
|
+
|
|
1664
|
+
result
|
|
1665
|
+
end
|
|
1666
|
+
|
|
1667
|
+
# returns an array of expressions with Indirections resolved (recursive with backtrace_indirection)
|
|
1668
|
+
def backtrace_value(expr, maxdepth)
|
|
1669
|
+
# array of expression with all indirections resolved
|
|
1670
|
+
result = [Expression[expr.reduce]]
|
|
1671
|
+
|
|
1672
|
+
# solve each indirection sequentially, clone expr for each value (aka cross-product)
|
|
1673
|
+
result.first.expr_indirections.uniq.each { |i|
|
|
1674
|
+
next_result = []
|
|
1675
|
+
backtrace_indirection(i, maxdepth).each { |rr|
|
|
1676
|
+
next_result |= result.map { |e| Expression[e.bind(i => rr).reduce] }
|
|
1677
|
+
}
|
|
1678
|
+
result = next_result
|
|
1679
|
+
}
|
|
1680
|
+
|
|
1681
|
+
result.uniq
|
|
1682
|
+
end
|
|
1683
|
+
|
|
1684
|
+
# returns the array of values pointed by the indirection at its invocation (ind.origin)
|
|
1685
|
+
# first resolves the pointer using backtrace_value, if it does not point in edata keep the original pointer
|
|
1686
|
+
# then backtraces from ind.origin until it finds an :w xref origin
|
|
1687
|
+
# if no :w access is found, returns the value encoded in the raw section data
|
|
1688
|
+
# TODO handle unaligned (partial?) writes
|
|
1689
|
+
def backtrace_indirection(ind, maxdepth)
|
|
1690
|
+
if not ind.origin
|
|
1691
|
+
puts "backtrace_ind: no origin for #{ind}" if $VERBOSE
|
|
1692
|
+
return [ind]
|
|
1693
|
+
end
|
|
1694
|
+
|
|
1695
|
+
ret = []
|
|
1696
|
+
|
|
1697
|
+
decode_imm = lambda { |addr, len|
|
|
1698
|
+
edata, foo = get_section_at(addr)
|
|
1699
|
+
if edata
|
|
1700
|
+
Expression[ edata.decode_imm("u#{8*len}".to_sym, @cpu.endianness) ]
|
|
1701
|
+
else
|
|
1702
|
+
Expression::Unknown
|
|
1703
|
+
end
|
|
1704
|
+
}
|
|
1705
|
+
|
|
1706
|
+
# resolve pointers (they may include Indirections)
|
|
1707
|
+
backtrace_value(ind.target, maxdepth).each { |ptr|
|
|
1708
|
+
# find write xrefs to the ptr
|
|
1709
|
+
refs = []
|
|
1710
|
+
each_xref(ptr, :w) { |x|
|
|
1711
|
+
# XXX should be rebacktracked on new xref
|
|
1712
|
+
next if not @decoded[x.origin]
|
|
1713
|
+
refs |= [x.origin]
|
|
1714
|
+
} if ptr != Expression::Unknown
|
|
1715
|
+
|
|
1716
|
+
if refs.empty?
|
|
1717
|
+
if get_section_at(ptr)
|
|
1718
|
+
# static data, newer written : return encoded value
|
|
1719
|
+
ret |= [decode_imm[ptr, ind.len]]
|
|
1720
|
+
next
|
|
1721
|
+
else
|
|
1722
|
+
# unknown pointer : backtrace the indirection, hope it solves itself
|
|
1723
|
+
initval = ind
|
|
1724
|
+
end
|
|
1725
|
+
else
|
|
1726
|
+
# wait until we find a write xref, then backtrace the written value
|
|
1727
|
+
initval = true
|
|
1728
|
+
end
|
|
1729
|
+
|
|
1730
|
+
# wait until we arrive at an xref'ing instruction, then backtrace the written value
|
|
1731
|
+
backtrace_walk(initval, ind.origin, true, false, nil, maxdepth-1) { |ev, expr, h|
|
|
1732
|
+
case ev
|
|
1733
|
+
when :unknown_addr, :maxdepth, :stopaddr
|
|
1734
|
+
puts " backtrace_indirection for #{ind.target} failed: #{ev}" if debug_backtrace
|
|
1735
|
+
ret |= [Expression::Unknown]
|
|
1736
|
+
when :end
|
|
1737
|
+
if not refs.empty? and (expr == true or not need_backtrace(expr))
|
|
1738
|
+
if expr == true
|
|
1739
|
+
# found a path avoiding the :w xrefs, read the encoded initial value
|
|
1740
|
+
ret |= [decode_imm[ptr, ind.len]]
|
|
1741
|
+
else
|
|
1742
|
+
bd = expr.expr_indirections.inject({}) { |h_, i| h_.update i => decode_imm[i.target, i.len] }
|
|
1743
|
+
ret |= [Expression[expr.bind(bd).reduce]]
|
|
1744
|
+
end
|
|
1745
|
+
else
|
|
1746
|
+
# unknown pointer, backtrace did not resolve...
|
|
1747
|
+
ret |= [Expression::Unknown]
|
|
1748
|
+
end
|
|
1749
|
+
when :di
|
|
1750
|
+
di = h[:di]
|
|
1751
|
+
if expr == true
|
|
1752
|
+
next true if not refs.include? di.address
|
|
1753
|
+
# find the expression to backtrace: assume this is the :w xref from this di
|
|
1754
|
+
writes = get_xrefs_rw(di)
|
|
1755
|
+
writes = writes.find_all { |x_type, x_ptr, x_len| x_type == :w and x_len == ind.len }
|
|
1756
|
+
if writes.length != 1
|
|
1757
|
+
puts "backtrace_ind: incompatible xrefs to #{ptr} from #{di}" if $DEBUG
|
|
1758
|
+
ret |= [Expression::Unknown]
|
|
1759
|
+
next false
|
|
1760
|
+
end
|
|
1761
|
+
expr = Indirection.new(writes[0][1], ind.len, di.address)
|
|
1762
|
+
end
|
|
1763
|
+
expr = backtrace_emu_instr(di, expr)
|
|
1764
|
+
# may have new indirections... recall bt_value ?
|
|
1765
|
+
#if not need_backtrace(expr)
|
|
1766
|
+
if expr.expr_externals.all? { |e| @prog_binding[e] or @function[normalize(e)] } and expr.expr_indirections.empty?
|
|
1767
|
+
ret |= backtrace_value(expr, maxdepth-1-h[:loopdetect].length)
|
|
1768
|
+
false
|
|
1769
|
+
else
|
|
1770
|
+
expr
|
|
1771
|
+
end
|
|
1772
|
+
when :func
|
|
1773
|
+
next true if expr == true # XXX
|
|
1774
|
+
expr = backtrace_emu_subfunc(h[:func], h[:funcaddr], h[:addr], expr, ind.origin, maxdepth-h[:loopdetect].length)
|
|
1775
|
+
#if not need_backtrace(expr)
|
|
1776
|
+
if expr.expr_externals.all? { |e| @prog_binding[e] or @function[normalize(e)] } and expr.expr_indirections.empty?
|
|
1777
|
+
ret |= backtrace_value(expr, maxdepth-1-h[:loopdetect].length)
|
|
1778
|
+
false
|
|
1779
|
+
else
|
|
1780
|
+
expr
|
|
1781
|
+
end
|
|
1782
|
+
end
|
|
1783
|
+
}
|
|
1784
|
+
}
|
|
1785
|
+
|
|
1786
|
+
ret
|
|
1787
|
+
end
|
|
1788
|
+
|
|
1789
|
+
# creates xrefs, updates addrs_todo, updates instr args
|
|
1790
|
+
def backtrace_found_result(expr, di, type, origin, len, detached)
|
|
1791
|
+
n = normalize(expr)
|
|
1792
|
+
fallthrough = true if type == :x and o = di_at(origin) and not o.opcode.props[:stopexec] and n == o.block.list.last.next_addr # delay_slot
|
|
1793
|
+
add_xref(n, Xref.new(type, origin, len)) if origin != :default and origin != Expression::Unknown and not fallthrough
|
|
1794
|
+
unk = true if n == Expression::Unknown
|
|
1795
|
+
|
|
1796
|
+
add_xref(n, Xref.new(:addr, di.address)) if di and di.address != origin and not unk
|
|
1797
|
+
base = { nil => 'loc', 1 => 'byte', 2 => 'word', 4 => 'dword', 8 => 'qword' }[len] || 'xref'
|
|
1798
|
+
base = 'sub' if @function[n]
|
|
1799
|
+
n = Expression[auto_label_at(n, base, 'xref') || n] if not fallthrough
|
|
1800
|
+
n = Expression[n]
|
|
1801
|
+
|
|
1802
|
+
# update instr args
|
|
1803
|
+
# TODO trace expression evolution to allow handling of
|
|
1804
|
+
# mov eax, 28 ; add eax, 4 ; jmp eax
|
|
1805
|
+
# => mov eax, (loc_xx-4)
|
|
1806
|
+
if di and not unk # and di.address == origin
|
|
1807
|
+
@cpu.replace_instr_arg_immediate(di.instruction, expr, n)
|
|
1808
|
+
end
|
|
1809
|
+
if @decoded[origin] and not unk
|
|
1810
|
+
@cpu.backtrace_found_result(self, @decoded[origin], expr, type, len)
|
|
1811
|
+
end
|
|
1812
|
+
|
|
1813
|
+
# add comment
|
|
1814
|
+
if type and @decoded[origin] # and not @decoded[origin].instruction.args.include? n
|
|
1815
|
+
@decoded[origin].add_comment "#{type}#{len}:#{n}" if not fallthrough
|
|
1816
|
+
end
|
|
1817
|
+
|
|
1818
|
+
# check if target is a string
|
|
1819
|
+
if di and type == :r and (len == 1 or len == 2) and s = get_section_at(n)
|
|
1820
|
+
l = s[0].inv_export[s[0].ptr]
|
|
1821
|
+
case len
|
|
1822
|
+
when 1; str = s[0].read(32).unpack('C*')
|
|
1823
|
+
when 2; str = s[0].read(64).unpack('v*')
|
|
1824
|
+
end
|
|
1825
|
+
str = str.inject('') { |str_, c|
|
|
1826
|
+
case c
|
|
1827
|
+
when 0x20..0x7e, ?\n, ?\r, ?\t; str_ << c
|
|
1828
|
+
else break str_
|
|
1829
|
+
end
|
|
1830
|
+
}
|
|
1831
|
+
if str.length >= 4
|
|
1832
|
+
di.add_comment "#{'L' if len == 2}#{str.inspect}"
|
|
1833
|
+
str = 'a_' + str.downcase.delete('^a-z0-9')[0, 12]
|
|
1834
|
+
if str.length >= 8 and l[0, 5] == 'byte_'
|
|
1835
|
+
rename_label(l, @program.new_label(str))
|
|
1836
|
+
end
|
|
1837
|
+
end
|
|
1838
|
+
end
|
|
1839
|
+
|
|
1840
|
+
# XXX all this should be done in backtrace() { <here> }
|
|
1841
|
+
if type == :x and origin
|
|
1842
|
+
if detached
|
|
1843
|
+
o = @decoded[origin] ? origin : di ? di.address : nil # lib function callback have origin == libfuncname, so we must find a block somewhere else
|
|
1844
|
+
origin = nil
|
|
1845
|
+
@decoded[o].block.add_to_indirect(normalize(n)) if @decoded[o] and not unk
|
|
1846
|
+
else
|
|
1847
|
+
@decoded[origin].block.add_to_normal(normalize(n)) if @decoded[origin] and not unk
|
|
1848
|
+
end
|
|
1849
|
+
@addrs_todo << [n, origin]
|
|
1850
|
+
end
|
|
1851
|
+
end
|
|
1852
|
+
|
|
1853
|
+
def to_s
|
|
1854
|
+
a = ''
|
|
1855
|
+
dump { |l| a << l << "\n" }
|
|
1856
|
+
a
|
|
1857
|
+
end
|
|
1858
|
+
|
|
1859
|
+
# dumps the source, optionnally including data
|
|
1860
|
+
# yields (defaults puts) each line
|
|
1861
|
+
def dump(dump_data=true, &b)
|
|
1862
|
+
b ||= lambda { |l| puts l }
|
|
1863
|
+
@sections.sort_by { |addr, edata| addr.kind_of?(::Integer) ? addr : 0 }.each { |addr, edata|
|
|
1864
|
+
addr = Expression[addr] if addr.kind_of? ::String
|
|
1865
|
+
blockoffs = @decoded.values.grep(DecodedInstruction).map { |di| Expression[di.block.address, :-, addr].reduce if di.block_head? }.grep(::Integer).sort.reject { |o| o < 0 or o >= edata.length }
|
|
1866
|
+
b[@program.dump_section_header(addr, edata)]
|
|
1867
|
+
if not dump_data and edata.length > 16*1024 and blockoffs.empty?
|
|
1868
|
+
b["// [#{edata.length} data bytes]"]
|
|
1869
|
+
next
|
|
1870
|
+
end
|
|
1871
|
+
unk_off = 0 # last off displayed
|
|
1872
|
+
# blocks.sort_by { |b| b.addr }.each { |b|
|
|
1873
|
+
while unk_off < edata.length
|
|
1874
|
+
if unk_off == blockoffs.first
|
|
1875
|
+
blockoffs.shift
|
|
1876
|
+
di = @decoded[addr+unk_off]
|
|
1877
|
+
if unk_off != di.block.edata_ptr
|
|
1878
|
+
b["\n// ------ overlap (#{unk_off-di.block.edata_ptr}) ------"]
|
|
1879
|
+
elsif di.block.from_normal.kind_of? ::Array
|
|
1880
|
+
b["\n"]
|
|
1881
|
+
end
|
|
1882
|
+
dump_block(di.block, &b)
|
|
1883
|
+
unk_off += [di.block.bin_length, 1].max
|
|
1884
|
+
unk_off = blockoffs.first if blockoffs.first and unk_off > blockoffs.first
|
|
1885
|
+
else
|
|
1886
|
+
next_off = blockoffs.first || edata.length
|
|
1887
|
+
if dump_data or next_off - unk_off < 16
|
|
1888
|
+
unk_off = dump_data(addr + unk_off, edata, unk_off, &b)
|
|
1889
|
+
else
|
|
1890
|
+
b["// [#{next_off - unk_off} data bytes]"]
|
|
1891
|
+
unk_off = next_off
|
|
1892
|
+
end
|
|
1893
|
+
end
|
|
1894
|
+
end
|
|
1895
|
+
}
|
|
1896
|
+
end
|
|
1897
|
+
|
|
1898
|
+
# dumps a block of decoded instructions
|
|
1899
|
+
def dump_block(block, &b)
|
|
1900
|
+
b ||= lambda { |l| puts l }
|
|
1901
|
+
block = @decoded[block].block if @decoded[block]
|
|
1902
|
+
dump_block_header(block, &b)
|
|
1903
|
+
block.list.each { |di| b[di.show] }
|
|
1904
|
+
end
|
|
1905
|
+
|
|
1906
|
+
# shows the xrefs/labels at block start
|
|
1907
|
+
def dump_block_header(block, &b)
|
|
1908
|
+
b ||= lambda { |l| puts l }
|
|
1909
|
+
xr = []
|
|
1910
|
+
each_xref(block.address) { |x|
|
|
1911
|
+
case x.type
|
|
1912
|
+
when :x; xr << Expression[x.origin]
|
|
1913
|
+
when :r, :w; xr << "#{x.type}#{x.len}:#{Expression[x.origin]}"
|
|
1914
|
+
end
|
|
1915
|
+
}
|
|
1916
|
+
if not xr.empty?
|
|
1917
|
+
b["\n// Xrefs: #{xr[0, 8].join(' ')}#{' ...' if xr.length > 8}"]
|
|
1918
|
+
end
|
|
1919
|
+
if block.edata.inv_export[block.edata_ptr]
|
|
1920
|
+
b["\n"] if xr.empty?
|
|
1921
|
+
label_alias[block.address].each { |name| b["#{name}:"] }
|
|
1922
|
+
end
|
|
1923
|
+
if c = @comment[block.address]
|
|
1924
|
+
c = c.join("\n") if c.kind_of? ::Array
|
|
1925
|
+
c.each_line { |l| b["// #{l}"] }
|
|
1926
|
+
end
|
|
1927
|
+
end
|
|
1928
|
+
|
|
1929
|
+
# dumps data/labels, honours @xrefs.len if exists
|
|
1930
|
+
# dumps one line only
|
|
1931
|
+
# stops on end of edata/@decoded/@xref
|
|
1932
|
+
# returns the next offset to display
|
|
1933
|
+
# TODO array-style data access
|
|
1934
|
+
def dump_data(addr, edata, off, &b)
|
|
1935
|
+
b ||= lambda { |l| puts l }
|
|
1936
|
+
if l = edata.inv_export[off]
|
|
1937
|
+
l_list = label_alias[addr].to_a.sort
|
|
1938
|
+
l = l_list.pop || l
|
|
1939
|
+
l_list.each { |ll|
|
|
1940
|
+
b["#{ll}:"]
|
|
1941
|
+
}
|
|
1942
|
+
l = (l + ' ').ljust(16)
|
|
1943
|
+
else l = ''
|
|
1944
|
+
end
|
|
1945
|
+
elemlen = 1 # size of each element we dump (db by default)
|
|
1946
|
+
dumplen = -off % 16 # number of octets to dump
|
|
1947
|
+
dumplen = 16 if dumplen == 0
|
|
1948
|
+
cmt = []
|
|
1949
|
+
each_xref(addr) { |x|
|
|
1950
|
+
dumplen = elemlen = x.len if x.len == 2 or x.len == 4
|
|
1951
|
+
cmt << " #{x.type}#{x.len}:#{Expression[x.origin]}"
|
|
1952
|
+
}
|
|
1953
|
+
cmt = " ; @#{Expression[addr]}" + cmt.sort[0, 6].join
|
|
1954
|
+
if r = edata.reloc[off]
|
|
1955
|
+
dumplen = elemlen = r.type.to_s[1..-1].to_i/8
|
|
1956
|
+
end
|
|
1957
|
+
dataspec = { 1 => 'db ', 2 => 'dw ', 4 => 'dd ', 8 => 'dq ' }[elemlen]
|
|
1958
|
+
if not dataspec
|
|
1959
|
+
dataspec = 'db '
|
|
1960
|
+
elemlen = 1
|
|
1961
|
+
end
|
|
1962
|
+
l << dataspec
|
|
1963
|
+
|
|
1964
|
+
# dup(?)
|
|
1965
|
+
if off >= edata.data.length
|
|
1966
|
+
dups = edata.virtsize - off
|
|
1967
|
+
@prog_binding.each_value { |a|
|
|
1968
|
+
tmp = Expression[a, :-, addr].reduce
|
|
1969
|
+
dups = tmp if tmp.kind_of? ::Integer and tmp > 0 and tmp < dups
|
|
1970
|
+
}
|
|
1971
|
+
@xrefs.each_key { |a|
|
|
1972
|
+
tmp = Expression[a, :-, addr].reduce
|
|
1973
|
+
dups = tmp if tmp.kind_of? ::Integer and tmp > 0 and tmp < dups
|
|
1974
|
+
}
|
|
1975
|
+
dups /= elemlen
|
|
1976
|
+
dups = 1 if dups < 1
|
|
1977
|
+
b[(l + "#{dups} dup(?)").ljust(48) << cmt]
|
|
1978
|
+
return off + dups*elemlen
|
|
1979
|
+
end
|
|
1980
|
+
|
|
1981
|
+
vals = []
|
|
1982
|
+
edata.ptr = off
|
|
1983
|
+
dups = dumplen/elemlen
|
|
1984
|
+
elemsym = "u#{elemlen*8}".to_sym
|
|
1985
|
+
while edata.ptr < edata.data.length
|
|
1986
|
+
if vals.length > dups and vals.last != vals.first
|
|
1987
|
+
# we have a dup(), unread the last element which is different
|
|
1988
|
+
vals.pop
|
|
1989
|
+
addr = Expression[addr, :-, elemlen].reduce
|
|
1990
|
+
edata.ptr -= elemlen
|
|
1991
|
+
break
|
|
1992
|
+
end
|
|
1993
|
+
break if vals.length == dups and vals.uniq.length > 1
|
|
1994
|
+
vals << edata.decode_imm(elemsym, @cpu.endianness)
|
|
1995
|
+
addr += elemlen
|
|
1996
|
+
if i = (1-elemlen..0).find { |i_|
|
|
1997
|
+
t = addr + i_
|
|
1998
|
+
@xrefs[t] or @decoded[t] or edata.reloc[edata.ptr+i_] or edata.inv_export[edata.ptr+i_]
|
|
1999
|
+
}
|
|
2000
|
+
# i < 0
|
|
2001
|
+
edata.ptr += i
|
|
2002
|
+
addr += i
|
|
2003
|
+
break
|
|
2004
|
+
end
|
|
2005
|
+
break if edata.reloc[edata.ptr-elemlen]
|
|
2006
|
+
end
|
|
2007
|
+
|
|
2008
|
+
# line of repeated value => dup()
|
|
2009
|
+
if vals.length > 8 and vals.uniq.length == 1
|
|
2010
|
+
b[(l << "#{vals.length} dup(#{Expression[vals.first]})").ljust(48) << cmt]
|
|
2011
|
+
return edata.ptr
|
|
2012
|
+
end
|
|
2013
|
+
|
|
2014
|
+
# recognize strings
|
|
2015
|
+
vals = vals.inject([]) { |vals_, value|
|
|
2016
|
+
if (elemlen == 1 or elemlen == 2)
|
|
2017
|
+
case value
|
|
2018
|
+
when 0x20..0x7e, 0x0a, 0x0d
|
|
2019
|
+
if vals_.last.kind_of? ::String; vals_.last << value ; vals_
|
|
2020
|
+
else vals_ << value.chr
|
|
2021
|
+
end
|
|
2022
|
+
else vals_ << value
|
|
2023
|
+
end
|
|
2024
|
+
else vals_ << value
|
|
2025
|
+
end
|
|
2026
|
+
}
|
|
2027
|
+
|
|
2028
|
+
vals.map! { |value|
|
|
2029
|
+
if value.kind_of? ::String
|
|
2030
|
+
if value.length > 2 # or value == vals.first or value == vals.last # if there is no xref, don't care
|
|
2031
|
+
value.inspect
|
|
2032
|
+
else
|
|
2033
|
+
value.unpack('C*').map { |c| Expression[c] }
|
|
2034
|
+
end
|
|
2035
|
+
else
|
|
2036
|
+
Expression[value]
|
|
2037
|
+
end
|
|
2038
|
+
}
|
|
2039
|
+
vals.flatten!
|
|
2040
|
+
|
|
2041
|
+
b[(l << vals.join(', ')).ljust(48) << cmt]
|
|
2042
|
+
|
|
2043
|
+
edata.ptr
|
|
2044
|
+
end
|
|
2045
|
+
|
|
2046
|
+
def decompiler
|
|
2047
|
+
parse_c '' if not c_parser
|
|
2048
|
+
@decompiler ||= Decompiler.new(self)
|
|
2049
|
+
end
|
|
2050
|
+
def decompiler=(dc)
|
|
2051
|
+
@decompiler = dc
|
|
2052
|
+
end
|
|
2053
|
+
def decompile(*addr)
|
|
2054
|
+
decompiler.decompile(*addr)
|
|
2055
|
+
end
|
|
2056
|
+
def decompile_func(addr)
|
|
2057
|
+
decompiler.decompile_func(addr)
|
|
2058
|
+
end
|
|
2059
|
+
|
|
2060
|
+
# allows us to be AutoExe.loaded
|
|
2061
|
+
def self.autoexe_load(f, &b)
|
|
2062
|
+
d = load(f, &b)
|
|
2063
|
+
d.program
|
|
2064
|
+
end
|
|
2065
|
+
end
|
|
2066
|
+
end
|
|
2067
|
+
|
|
2068
|
+
require 'metasm/disassemble_api'
|