metasm 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (192) hide show
  1. data/BUGS +11 -0
  2. data/CREDITS +17 -0
  3. data/README +270 -0
  4. data/TODO +114 -0
  5. data/doc/code_organisation.txt +146 -0
  6. data/doc/const_missing.txt +16 -0
  7. data/doc/core_classes.txt +75 -0
  8. data/doc/feature_list.txt +53 -0
  9. data/doc/index.txt +59 -0
  10. data/doc/install_notes.txt +170 -0
  11. data/doc/style.css +3 -0
  12. data/doc/use_cases.txt +18 -0
  13. data/lib/metasm.rb +80 -0
  14. data/lib/metasm/arm.rb +12 -0
  15. data/lib/metasm/arm/debug.rb +39 -0
  16. data/lib/metasm/arm/decode.rb +167 -0
  17. data/lib/metasm/arm/encode.rb +77 -0
  18. data/lib/metasm/arm/main.rb +75 -0
  19. data/lib/metasm/arm/opcodes.rb +177 -0
  20. data/lib/metasm/arm/parse.rb +130 -0
  21. data/lib/metasm/arm/render.rb +55 -0
  22. data/lib/metasm/compile_c.rb +1457 -0
  23. data/lib/metasm/dalvik.rb +8 -0
  24. data/lib/metasm/dalvik/decode.rb +196 -0
  25. data/lib/metasm/dalvik/main.rb +60 -0
  26. data/lib/metasm/dalvik/opcodes.rb +366 -0
  27. data/lib/metasm/decode.rb +213 -0
  28. data/lib/metasm/decompile.rb +2659 -0
  29. data/lib/metasm/disassemble.rb +2068 -0
  30. data/lib/metasm/disassemble_api.rb +1280 -0
  31. data/lib/metasm/dynldr.rb +1329 -0
  32. data/lib/metasm/encode.rb +333 -0
  33. data/lib/metasm/exe_format/a_out.rb +194 -0
  34. data/lib/metasm/exe_format/autoexe.rb +82 -0
  35. data/lib/metasm/exe_format/bflt.rb +189 -0
  36. data/lib/metasm/exe_format/coff.rb +455 -0
  37. data/lib/metasm/exe_format/coff_decode.rb +901 -0
  38. data/lib/metasm/exe_format/coff_encode.rb +1078 -0
  39. data/lib/metasm/exe_format/dex.rb +457 -0
  40. data/lib/metasm/exe_format/dol.rb +145 -0
  41. data/lib/metasm/exe_format/elf.rb +923 -0
  42. data/lib/metasm/exe_format/elf_decode.rb +979 -0
  43. data/lib/metasm/exe_format/elf_encode.rb +1375 -0
  44. data/lib/metasm/exe_format/macho.rb +827 -0
  45. data/lib/metasm/exe_format/main.rb +228 -0
  46. data/lib/metasm/exe_format/mz.rb +164 -0
  47. data/lib/metasm/exe_format/nds.rb +172 -0
  48. data/lib/metasm/exe_format/pe.rb +437 -0
  49. data/lib/metasm/exe_format/serialstruct.rb +246 -0
  50. data/lib/metasm/exe_format/shellcode.rb +114 -0
  51. data/lib/metasm/exe_format/xcoff.rb +167 -0
  52. data/lib/metasm/gui.rb +23 -0
  53. data/lib/metasm/gui/cstruct.rb +373 -0
  54. data/lib/metasm/gui/dasm_coverage.rb +199 -0
  55. data/lib/metasm/gui/dasm_decomp.rb +369 -0
  56. data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
  57. data/lib/metasm/gui/dasm_graph.rb +1354 -0
  58. data/lib/metasm/gui/dasm_hex.rb +543 -0
  59. data/lib/metasm/gui/dasm_listing.rb +599 -0
  60. data/lib/metasm/gui/dasm_main.rb +906 -0
  61. data/lib/metasm/gui/dasm_opcodes.rb +291 -0
  62. data/lib/metasm/gui/debug.rb +1228 -0
  63. data/lib/metasm/gui/gtk.rb +884 -0
  64. data/lib/metasm/gui/qt.rb +495 -0
  65. data/lib/metasm/gui/win32.rb +3004 -0
  66. data/lib/metasm/gui/x11.rb +621 -0
  67. data/lib/metasm/ia32.rb +14 -0
  68. data/lib/metasm/ia32/compile_c.rb +1523 -0
  69. data/lib/metasm/ia32/debug.rb +193 -0
  70. data/lib/metasm/ia32/decode.rb +1167 -0
  71. data/lib/metasm/ia32/decompile.rb +564 -0
  72. data/lib/metasm/ia32/encode.rb +314 -0
  73. data/lib/metasm/ia32/main.rb +233 -0
  74. data/lib/metasm/ia32/opcodes.rb +872 -0
  75. data/lib/metasm/ia32/parse.rb +327 -0
  76. data/lib/metasm/ia32/render.rb +91 -0
  77. data/lib/metasm/main.rb +1193 -0
  78. data/lib/metasm/mips.rb +11 -0
  79. data/lib/metasm/mips/compile_c.rb +7 -0
  80. data/lib/metasm/mips/decode.rb +253 -0
  81. data/lib/metasm/mips/encode.rb +51 -0
  82. data/lib/metasm/mips/main.rb +72 -0
  83. data/lib/metasm/mips/opcodes.rb +443 -0
  84. data/lib/metasm/mips/parse.rb +51 -0
  85. data/lib/metasm/mips/render.rb +43 -0
  86. data/lib/metasm/os/gnu_exports.rb +270 -0
  87. data/lib/metasm/os/linux.rb +1112 -0
  88. data/lib/metasm/os/main.rb +1686 -0
  89. data/lib/metasm/os/remote.rb +527 -0
  90. data/lib/metasm/os/windows.rb +2027 -0
  91. data/lib/metasm/os/windows_exports.rb +745 -0
  92. data/lib/metasm/parse.rb +876 -0
  93. data/lib/metasm/parse_c.rb +3938 -0
  94. data/lib/metasm/pic16c/decode.rb +42 -0
  95. data/lib/metasm/pic16c/main.rb +17 -0
  96. data/lib/metasm/pic16c/opcodes.rb +68 -0
  97. data/lib/metasm/ppc.rb +11 -0
  98. data/lib/metasm/ppc/decode.rb +264 -0
  99. data/lib/metasm/ppc/decompile.rb +251 -0
  100. data/lib/metasm/ppc/encode.rb +51 -0
  101. data/lib/metasm/ppc/main.rb +129 -0
  102. data/lib/metasm/ppc/opcodes.rb +410 -0
  103. data/lib/metasm/ppc/parse.rb +52 -0
  104. data/lib/metasm/preprocessor.rb +1277 -0
  105. data/lib/metasm/render.rb +130 -0
  106. data/lib/metasm/sh4.rb +8 -0
  107. data/lib/metasm/sh4/decode.rb +336 -0
  108. data/lib/metasm/sh4/main.rb +292 -0
  109. data/lib/metasm/sh4/opcodes.rb +381 -0
  110. data/lib/metasm/x86_64.rb +12 -0
  111. data/lib/metasm/x86_64/compile_c.rb +1025 -0
  112. data/lib/metasm/x86_64/debug.rb +59 -0
  113. data/lib/metasm/x86_64/decode.rb +268 -0
  114. data/lib/metasm/x86_64/encode.rb +264 -0
  115. data/lib/metasm/x86_64/main.rb +135 -0
  116. data/lib/metasm/x86_64/opcodes.rb +118 -0
  117. data/lib/metasm/x86_64/parse.rb +68 -0
  118. data/misc/bottleneck.rb +61 -0
  119. data/misc/cheader-findpppath.rb +58 -0
  120. data/misc/hexdiff.rb +74 -0
  121. data/misc/hexdump.rb +55 -0
  122. data/misc/metasm-all.rb +13 -0
  123. data/misc/objdiff.rb +47 -0
  124. data/misc/objscan.rb +40 -0
  125. data/misc/pdfparse.rb +661 -0
  126. data/misc/ppc_pdf2oplist.rb +192 -0
  127. data/misc/tcp_proxy_hex.rb +84 -0
  128. data/misc/txt2html.rb +440 -0
  129. data/samples/a.out.rb +31 -0
  130. data/samples/asmsyntax.rb +77 -0
  131. data/samples/bindiff.rb +555 -0
  132. data/samples/compilation-steps.rb +49 -0
  133. data/samples/cparser_makestackoffset.rb +55 -0
  134. data/samples/dasm-backtrack.rb +38 -0
  135. data/samples/dasmnavig.rb +318 -0
  136. data/samples/dbg-apihook.rb +228 -0
  137. data/samples/dbghelp.rb +143 -0
  138. data/samples/disassemble-gui.rb +102 -0
  139. data/samples/disassemble.rb +133 -0
  140. data/samples/dump_upx.rb +95 -0
  141. data/samples/dynamic_ruby.rb +1929 -0
  142. data/samples/elf_list_needed.rb +46 -0
  143. data/samples/elf_listexports.rb +33 -0
  144. data/samples/elfencode.rb +25 -0
  145. data/samples/exeencode.rb +128 -0
  146. data/samples/factorize-headers-elfimports.rb +77 -0
  147. data/samples/factorize-headers-peimports.rb +109 -0
  148. data/samples/factorize-headers.rb +43 -0
  149. data/samples/gdbclient.rb +583 -0
  150. data/samples/generate_libsigs.rb +102 -0
  151. data/samples/hotfix_gtk_dbg.rb +59 -0
  152. data/samples/install_win_env.rb +78 -0
  153. data/samples/lindebug.rb +924 -0
  154. data/samples/linux_injectsyscall.rb +95 -0
  155. data/samples/machoencode.rb +31 -0
  156. data/samples/metasm-shell.rb +91 -0
  157. data/samples/pe-hook.rb +69 -0
  158. data/samples/pe-ia32-cpuid.rb +203 -0
  159. data/samples/pe-mips.rb +35 -0
  160. data/samples/pe-shutdown.rb +78 -0
  161. data/samples/pe-testrelocs.rb +51 -0
  162. data/samples/pe-testrsrc.rb +24 -0
  163. data/samples/pe_listexports.rb +31 -0
  164. data/samples/peencode.rb +19 -0
  165. data/samples/peldr.rb +494 -0
  166. data/samples/preprocess-flatten.rb +19 -0
  167. data/samples/r0trace.rb +308 -0
  168. data/samples/rubstop.rb +399 -0
  169. data/samples/scan_pt_gnu_stack.rb +54 -0
  170. data/samples/scanpeexports.rb +62 -0
  171. data/samples/shellcode-c.rb +40 -0
  172. data/samples/shellcode-dynlink.rb +146 -0
  173. data/samples/source.asm +34 -0
  174. data/samples/struct_offset.rb +47 -0
  175. data/samples/testpe.rb +32 -0
  176. data/samples/testraw.rb +45 -0
  177. data/samples/win32genloader.rb +132 -0
  178. data/samples/win32hooker-advanced.rb +169 -0
  179. data/samples/win32hooker.rb +96 -0
  180. data/samples/win32livedasm.rb +33 -0
  181. data/samples/win32remotescan.rb +133 -0
  182. data/samples/wintrace.rb +92 -0
  183. data/tests/all.rb +8 -0
  184. data/tests/dasm.rb +39 -0
  185. data/tests/dynldr.rb +35 -0
  186. data/tests/encodeddata.rb +132 -0
  187. data/tests/ia32.rb +82 -0
  188. data/tests/mips.rb +116 -0
  189. data/tests/parse_c.rb +239 -0
  190. data/tests/preprocessor.rb +269 -0
  191. data/tests/x86_64.rb +62 -0
  192. metadata +255 -0
@@ -0,0 +1,2068 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/decode'
8
+
9
+
10
+ module Metasm
11
+ # holds information for decoded instructions: the original opcode, a pointer to the InstructionBlock, etc
12
+ class DecodedInstruction
13
+ # the instance of InstructionBlock this di is into
14
+ attr_accessor :block
15
+ # our offset (in bytes) from the start of the block, used only for hexdump
16
+ attr_accessor :block_offset
17
+ # the address of the instruction's first byte in memory
18
+ attr_accessor :address
19
+ # the disassembled data
20
+ attr_accessor :instruction, :opcode
21
+ # our, length in bytes
22
+ attr_accessor :bin_length
23
+ # array of arbitrary strings
24
+ attr_accessor :comment
25
+ # a cache of the binding used by the backtracker to emulate this instruction
26
+ attr_accessor :backtrace_binding
27
+
28
+ # create a new DecodedInstruction with an Instruction whose cpu is the argument
29
+ # can take an existing Instruction as argument
30
+ def initialize(arg, addr=nil)
31
+ case arg
32
+ when Instruction
33
+ @instruction = arg
34
+ @opcode = @instruction.cpu.opcode_list.find { |op| op.name == @instruction.opname } if @instruction.cpu
35
+ else @instruction = Instruction.new(arg)
36
+ end
37
+ @bin_length = 0
38
+ @address = addr if addr
39
+ end
40
+
41
+ def next_addr=(a) @next_addr = a end
42
+ def next_addr
43
+ (@next_addr ||= nil) || (address + @bin_length) if address
44
+ end
45
+
46
+ def show
47
+ if block
48
+ bin = @block.edata.data[@block.edata_ptr+@block_offset, @bin_length].unpack('C*').map { |c| '%02x' % c }.join
49
+ if @bin_length > 12
50
+ bin = bin[0, 20] + "..<+#{@bin_length-10}>"
51
+ end
52
+ " #{@instruction.to_s.ljust(44)} ; @#{Expression[address]} #{bin} #{@comment.sort[0,6].join(' ') if comment}"
53
+ else
54
+ "#{@instruction}#{' ; ' + @comment.join(' ') if comment}"
55
+ end
56
+ end
57
+
58
+ include Renderable
59
+ def render
60
+ ret = []
61
+ ret << Expression[address] << ' ' if address
62
+ ret << @instruction
63
+ ret << ' ; ' << @comment if comment
64
+ ret
65
+ end
66
+
67
+ def add_comment(c)
68
+ @comment ||= []
69
+ @comment |= [c]
70
+ end
71
+
72
+ # returns a copy of the DecInstr, with duplicated #instruction ("deep_copy")
73
+ def dup
74
+ new = super()
75
+ new.instruction = @instruction.dup
76
+ new
77
+ end
78
+ end
79
+
80
+ # holds information on a backtracked expression near begin and end of instruction blocks (#backtracked_for)
81
+ class BacktraceTrace
82
+ # address of the instruction in the block from which rebacktrace should start (use with from_subfuncret bool)
83
+ # address is nil if the backtrace is from block start
84
+ # exclude_instr is a bool saying if the backtrace should start at address or at the preceding instruction
85
+ # these are optional: if absent, expr is to be rebacktracked when a new codepath arrives at the beginning of the block
86
+ attr_accessor :address, :from_subfuncret, :exclude_instr
87
+ # address of the instruction that initiated the backtrace
88
+ attr_accessor :origin
89
+ # the Expression to backtrace at this point
90
+ attr_accessor :expr
91
+ # the original backtracked Expression
92
+ attr_accessor :orig_expr
93
+ # length of r/w xref (in bytes)
94
+ attr_accessor :len
95
+ # :r/:w/:x
96
+ attr_accessor :type
97
+ # bool: true if this maps to a :x that should not have a from when resolved
98
+ attr_accessor :detached
99
+ # maxdepth at the point of the object creation
100
+ attr_accessor :maxdepth
101
+
102
+ def initialize(expr, origin, orig_expr, type, len=nil, maxdepth=nil)
103
+ @expr, @origin, @orig_expr, @type = expr, origin, orig_expr, type
104
+ @len = len if len
105
+ @maxdepth = maxdepth if maxdepth
106
+ end
107
+
108
+ def hash ; [origin, expr].hash ; end
109
+ def eql?(o)
110
+ o.class == self.class and
111
+ [ address, from_subfuncret, exclude_instr, origin, orig_expr, len, type, detached] ==
112
+ [o.address, o.from_subfuncret, o.exclude_instr, o.origin, o.orig_expr, o.len, o.type, o.detached]
113
+ end
114
+ alias == eql?
115
+ end
116
+
117
+ # a cross-reference, tracks read/write/execute memory accesses by decoded instructions
118
+ class Xref
119
+ # :r/:w/:x
120
+ attr_accessor :type
121
+ # length of r/w (in bytes)
122
+ attr_accessor :len
123
+ # address of the instruction responsible of the xref
124
+ attr_accessor :origin
125
+ # XXX list of instructions intervening in the backtrace ?
126
+
127
+ def initialize(type, origin, len=nil)
128
+ @origin, @type = origin, type
129
+ @len = len if len
130
+ end
131
+
132
+ def hash ; @origin.hash ; end
133
+ def eql?(o) o.class == self.class and [type, len, origin] == [o.type, o.len, o.origin] end
134
+ alias == eql?
135
+ end
136
+
137
+ # holds a list of contiguous decoded instructions, forming an uninterrupted block (except for eg CPU exceptions)
138
+ # most attributes are either a value or an array of values, use the associated iterator.
139
+ class InstructionBlock
140
+ # address of the first instruction
141
+ attr_accessor :address
142
+ # pointer to raw data
143
+ attr_accessor :edata, :edata_ptr
144
+ # list of DecodedInstructions
145
+ attr_accessor :list
146
+ # address of instructions giving control directly to us
147
+ # includes addr of normal instruction when call flow continues to us past the end of the preceding block
148
+ # does not include addresses of subfunction return instructions
149
+ # may be nil or an array
150
+ attr_accessor :from_normal
151
+ # address of instructions called/jumped to
152
+ attr_accessor :to_normal
153
+ # address of an instruction that calls a subfunction which returns to us
154
+ attr_accessor :from_subfuncret
155
+ # address of instruction executed after a called subfunction returns
156
+ attr_accessor :to_subfuncret
157
+ # address of instructions executed indirectly through us (callback in a subfunction, SEH...)
158
+ # XXX from_indirect is not populated for now
159
+ attr_accessor :from_indirect, :to_indirect
160
+ # array of BacktraceTrace
161
+ # when a new code path comes to us, it should be backtracked for the values of :r/:w/:x using btt with no address
162
+ # for internal use only (block splitting): btt with an address
163
+ attr_accessor :backtracked_for
164
+
165
+ # create a new InstructionBlock based at address
166
+ # also accepts a DecodedInstruction or an Array of them to initialize from
167
+ def initialize(arg0, edata=nil, edata_ptr=nil)
168
+ @list = []
169
+ case arg0
170
+ when DecodedInstruction
171
+ @address = arg0.address
172
+ add_di(arg0)
173
+ when Array
174
+ @address = arg0.first.address if not arg0.empty?
175
+ arg0.each { |di| add_di(di) }
176
+ else
177
+ @address = arg0
178
+ end
179
+ edata_ptr ||= edata ? edata.ptr : 0
180
+ @edata, @edata_ptr = edata, edata_ptr
181
+ @backtracked_for = []
182
+ end
183
+
184
+ def bin_length
185
+ (di = @list.last) ? di.block_offset + di.bin_length : 0
186
+ end
187
+
188
+ # splits the current block into a new one with all di from address addr to end
189
+ # caller is responsible for rebacktracing new.bt_for to regenerate correct old.btt/new.btt
190
+ def split(addr)
191
+ raise "invalid split @#{Expression[addr]}" if not idx = @list.index(@list.find { |di| di.address == addr }) or idx == 0
192
+ off = @list[idx].block_offset
193
+ new_b = self.class.new(addr, @edata, @edata_ptr + off)
194
+ new_b.add_di @list.delete_at(idx) while @list[idx]
195
+ new_b.to_normal, @to_normal = to_normal, new_b.to_normal
196
+ new_b.to_subfuncret, @to_subfuncret = to_subfuncret, new_b.to_subfuncret
197
+ new_b.add_from @list.last.address
198
+ add_to new_b.address
199
+ @backtracked_for.delete_if { |btt|
200
+ if btt.address and new_b.list.find { |di| di.address == btt.address }
201
+ new_b.backtracked_for << btt
202
+ true
203
+ end
204
+ }
205
+ new_b
206
+ end
207
+
208
+ # adds a decodedinstruction to the block list, updates di.block and di.block_offset
209
+ def add_di(di)
210
+ di.block = self
211
+ di.block_offset = bin_length
212
+ di.address ||= @address + di.block_offset
213
+ @list << di
214
+ end
215
+ end
216
+
217
+ # a factorized subfunction as seen by the disassembler
218
+ class DecodedFunction
219
+ # when backtracking an instruction that calls us, use this binding and then the instruction's
220
+ # the binding is lazily filled up for non-external functions, register by register, when
221
+ # a backtraced expression depends on it
222
+ attr_accessor :backtrace_binding
223
+ # same as InstructionBlock#backtracked_for
224
+ # includes the expression responsible of the function return (eg [esp] on ia32)
225
+ attr_accessor :backtracked_for
226
+ # addresses of instruction causing the function to return
227
+ attr_accessor :return_address
228
+ # a lambda called for dynamic backtrace_binding generation
229
+ attr_accessor :btbind_callback
230
+ # a lambda called for dynamic backtracked_for
231
+ attr_accessor :btfor_callback
232
+ # bool, if false the function is actually being disassembled
233
+ attr_accessor :finalized
234
+ # bool, if true the function does not return (eg exit() or ExitProcess())
235
+ attr_accessor :noreturn
236
+
237
+ # if btbind_callback is defined, calls it with args [dasm, binding, funcaddr, calladdr, expr, origin, maxdepth]
238
+ # else update lazily the binding from expr.externals, and return backtrace_binding
239
+ def get_backtrace_binding(dasm, funcaddr, calladdr, expr, origin, maxdepth)
240
+ if btbind_callback
241
+ @btbind_callback[dasm, @backtrace_binding, funcaddr, calladdr, expr, origin, maxdepth]
242
+ elsif backtrace_binding and dest = @backtrace_binding[:thunk] and target = dasm.function[dest]
243
+ target.get_backtrace_binding(dasm, funcaddr, calladdr, expr, origin, maxdepth)
244
+ else
245
+ unk_regs = expr.externals.grep(Symbol).uniq - @backtrace_binding.keys - [:unknown]
246
+ dasm.cpu.backtrace_update_function_binding(dasm, funcaddr, self, return_address, *unk_regs) if not unk_regs.empty?
247
+ @backtrace_binding
248
+ end
249
+ end
250
+
251
+ # if btfor_callback is defined, calls it with args [dasm, bt_for, funcaddr, calladdr]
252
+ # else return backtracked_for
253
+ def get_backtracked_for(dasm, funcaddr, calladdr)
254
+ if btfor_callback
255
+ @btfor_callback[dasm, @backtracked_for, funcaddr, calladdr]
256
+ elsif backtrace_binding and dest = @backtrace_binding[:thunk] and target = dasm.function[dest]
257
+ target.get_backtracked_for(dasm, funcaddr, calladdr)
258
+ else
259
+ @backtracked_for
260
+ end
261
+ end
262
+
263
+ def initialize
264
+ @backtracked_for = []
265
+ @backtrace_binding = {}
266
+ end
267
+ end
268
+
269
+ class CPU
270
+ # return the thing to backtrace to find +value+ before the execution of this instruction
271
+ # eg backtrace_emu('inc eax', Expression[:eax]) => Expression[:eax + 1]
272
+ # (the value of :eax after 'inc eax' is the value of :eax before plus 1)
273
+ # may return Expression::Unknown
274
+ def backtrace_emu(di, value)
275
+ Expression[Expression[value].bind(di.backtrace_binding ||= get_backtrace_binding(di)).reduce]
276
+ end
277
+
278
+ # returns a list of Expressions/Integer to backtrace to find an execution target
279
+ def get_xrefs_x(dasm, di)
280
+ end
281
+
282
+ # returns a list of [type, address, len]
283
+ def get_xrefs_rw(dasm, di)
284
+ get_xrefs_r(dasm, di).map { |addr, len| [:r, addr, len] } + get_xrefs_w(dasm, di).map { |addr, len| [:w, addr, len] }
285
+ end
286
+
287
+ # returns a list [addr, len]
288
+ def get_xrefs_r(dasm, di)
289
+ b = di.backtrace_binding ||= get_backtrace_binding(di)
290
+ r = b.values
291
+ x = get_xrefs_x(dasm, di)
292
+ r |= x if x
293
+ (r.grep(Indirection) + r.grep(Expression).map { |e| e.expr_indirections }.flatten).map { |e| [e.target, e.len] }
294
+ end
295
+
296
+ # returns a list [addr, len]
297
+ def get_xrefs_w(dasm, di)
298
+ b = di.backtrace_binding ||= get_backtrace_binding(di)
299
+ w = b.keys
300
+ (w.grep(Indirection) + w.grep(Expression).map { |e| e.expr_indirections }.flatten).map { |e| [e.target, e.len] }
301
+ end
302
+
303
+ # checks if the expression corresponds to a function return value with the instruction
304
+ # (eg di == 'call something' and expr == [esp])
305
+ def backtrace_is_function_return(expr, di=nil)
306
+ end
307
+
308
+ # updates f.backtrace_binding when a new return address has been found
309
+ # TODO update also when anything changes inside the function (new loop found etc) - use backtracked_for ?
310
+ def backtrace_update_function_binding(dasm, faddr, f, retaddrlist, *wantregs)
311
+ end
312
+
313
+ # returns if the expression is an address on the stack
314
+ # (to avoid trying to backtrace its absolute address until we found function boundaries)
315
+ def backtrace_is_stack_address(expr)
316
+ end
317
+
318
+ # updates the instruction arguments: replace an expression with another (eg when a label is renamed)
319
+ def replace_instr_arg_immediate(i, old, new)
320
+ i.args.map! { |a|
321
+ case a
322
+ when Expression; Expression[a.bind(old => new).reduce]
323
+ else a
324
+ end
325
+ }
326
+ end
327
+
328
+ # a callback called whenever a backtrace is successful
329
+ # di is the decodedinstruction at the backtrace's origin
330
+ def backtrace_found_result(dasm, di, expr, type, len)
331
+ end
332
+ end
333
+
334
+ class ExeFormat
335
+ # returns a string containing asm-style section declaration
336
+ def dump_section_header(addr, edata)
337
+ "\n// section at #{Expression[addr]}"
338
+ end
339
+
340
+ # returns an array of expressions that may be executed by this instruction
341
+ def get_xrefs_x(dasm, di) @cpu.get_xrefs_x(dasm, di) end
342
+
343
+ # returns an array of [type, expression, length] that may be accessed by this instruction (type is :r/:w, len is in bytes)
344
+ def get_xrefs_rw(dasm, di) @cpu.get_xrefs_rw(dasm, di) end
345
+ end
346
+
347
+ # a disassembler class
348
+ # holds a copy of a program sections, a list of decoded instructions, xrefs
349
+ # is able to backtrace an expression from an address following the call flow (backwards)
350
+ class Disassembler
351
+ attr_accessor :program, :cpu
352
+ # binding (jointure of @sections.values.exports)
353
+ attr_accessor :prog_binding
354
+ # hash addr => edata
355
+ attr_accessor :sections
356
+ # hash addr => DecodedInstruction
357
+ attr_accessor :decoded
358
+ # hash addr => DecodedFunction (includes 'imported' functions)
359
+ attr_accessor :function
360
+ # hash addr => (array of) xrefs - access with +add_xref+/+each_xref+
361
+ attr_accessor :xrefs
362
+ # bool, true to check write xrefs on each instr disasm (default true)
363
+ attr_accessor :check_smc
364
+ # list of [addr to disassemble, (optional)who jumped to it, (optional)got there by a subfunction return]
365
+ attr_accessor :addrs_todo
366
+ # hash address => binding
367
+ attr_accessor :address_binding
368
+ # number of blocks to backtrace before aborting if no result is found (defaults to class.backtrace_maxblocks, 50 by default)
369
+ attr_accessor :backtrace_maxblocks
370
+ # maximum backtrace length for :r/:w, defaults to backtrace_maxblocks
371
+ attr_accessor :backtrace_maxblocks_data
372
+ # max bt length for backtrace_fast blocks, default=0
373
+ attr_accessor :backtrace_maxblocks_fast
374
+ # max complexity for an Expr during backtrace before abort
375
+ attr_accessor :backtrace_maxcomplexity, :backtrace_maxcomplexity_data
376
+ # maximum number of instructions inside a basic block, split past this limit
377
+ attr_accessor :disassemble_maxblocklength
378
+ # a cparser that parsed some C header files, prototypes are converted to DecodedFunction when jumped to
379
+ attr_accessor :c_parser
380
+ # hash address => array of strings
381
+ # default dasm dump will only show comments at beginning of code blocks
382
+ attr_accessor :comment
383
+ # bool, set to true (default) if functions with undetermined binding should be assumed to return with ABI-conforming binding (conserve frame ptr)
384
+ attr_accessor :funcs_stdabi
385
+ # callback called whenever an instruction will backtrace :x (before the backtrace is started)
386
+ # arguments: |addr of origin, array of exprs to backtrace|
387
+ # must return the replacement array, nil == []
388
+ attr_accessor :callback_newaddr
389
+ # called whenever an instruction is decoded and added to an instruction block. arg: the new decoded instruction
390
+ # returns the new di to consider (nil to end block)
391
+ attr_accessor :callback_newinstr
392
+ # called whenever the disassembler tries to disassemble an addresse that has been written to. arg: the address
393
+ attr_accessor :callback_selfmodifying
394
+ # called when the disassembler stops (stopexec/undecodable instruction)
395
+ attr_accessor :callback_stopaddr
396
+ # callback called before each backtrace that may take some time
397
+ attr_accessor :callback_prebacktrace
398
+ # callback called once all addresses have been disassembled
399
+ attr_accessor :callback_finished
400
+ # pointer to the gui widget we're displayed in
401
+ attr_accessor :gui
402
+
403
+ @@backtrace_maxblocks = 50
404
+
405
+ # creates a new disassembler
406
+ def initialize(program, cpu=program.cpu)
407
+ reinitialize(program, cpu)
408
+ end
409
+
410
+ # resets the program
411
+ def reinitialize(program, cpu=program.cpu)
412
+ @program = program
413
+ @cpu = cpu
414
+ @sections = {}
415
+ @decoded = {}
416
+ @xrefs = {}
417
+ @function = {}
418
+ @check_smc = true
419
+ @prog_binding = {}
420
+ @old_prog_binding = {} # same as prog_binding, but keep old var names
421
+ @addrs_todo = []
422
+ @addrs_done = []
423
+ @address_binding = {}
424
+ @backtrace_maxblocks = @@backtrace_maxblocks
425
+ @backtrace_maxblocks_fast = 0
426
+ @backtrace_maxcomplexity = 40
427
+ @backtrace_maxcomplexity_data = 5
428
+ @disassemble_maxblocklength = 100
429
+ @comment = {}
430
+ @funcs_stdabi = true
431
+ end
432
+
433
+ # adds a section, updates prog_binding
434
+ # base addr is an Integer or a String (label name for offset 0)
435
+ def add_section(encoded, base)
436
+ encoded, base = base, encoded if base.kind_of? EncodedData
437
+ case base
438
+ when ::Integer
439
+ when ::String
440
+ raise "invalid section base #{base.inspect} - not at section start" if encoded.export[base] and encoded.export[base] != 0
441
+ raise "invalid section base #{base.inspect} - already seen at #{@prog_binding[base]}" if @prog_binding[base] and @prog_binding[base] != Expression[base]
442
+ encoded.add_export base, 0
443
+ else raise "invalid section base #{base.inspect} - expected string or integer"
444
+ end
445
+
446
+ @sections[base] = encoded
447
+ @label_alias_cache = nil
448
+ encoded.binding(base).each { |k, v|
449
+ @old_prog_binding[k] = @prog_binding[k] = v.reduce
450
+ }
451
+
452
+ # update section_edata.reloc
453
+ # label -> list of relocs that refers to it
454
+ @inv_section_reloc = {}
455
+ @sections.each { |b, e|
456
+ e.reloc.each { |o, r|
457
+ r.target.externals.grep(::String).each { |ext| (@inv_section_reloc[ext] ||= []) << [b, e, o, r] }
458
+ }
459
+ }
460
+
461
+ self
462
+ end
463
+
464
+ def add_xref(addr, x)
465
+ case @xrefs[addr]
466
+ when nil; @xrefs[addr] = x
467
+ when x
468
+ when ::Array; @xrefs[addr] |= [x]
469
+ else @xrefs[addr] = [@xrefs[addr], x]
470
+ end
471
+ end
472
+
473
+ # yields each xref to a given address, optionnaly restricted to a type
474
+ def each_xref(addr, type=nil)
475
+ addr = normalize addr
476
+
477
+ x = @xrefs[addr]
478
+ x = case x
479
+ when nil; []
480
+ when ::Array; x.dup
481
+ else [x]
482
+ end
483
+
484
+ x.delete_if { |x_| x_.type != type } if type
485
+
486
+ # add pseudo-xrefs for exe relocs
487
+ if (not type or type == :reloc) and l = get_label_at(addr) and a = @inv_section_reloc[l]
488
+ a.each { |b, e, o, r|
489
+ addr = Expression[b]+o
490
+ # ignore relocs embedded in an already-listed instr
491
+ x << Xref.new(:reloc, addr) if not x.find { |x_|
492
+ next if not x_.origin or not di_at(x_.origin)
493
+ (addr - x_.origin rescue 50) < @decoded[x_.origin].bin_length
494
+ }
495
+ }
496
+ end
497
+
498
+ x.each { |x_| yield x_ }
499
+ end
500
+
501
+ # parses a C header file, from which function prototypes will be converted to DecodedFunction when found in the code flow
502
+ def parse_c_file(file)
503
+ parse_c File.read(file), file
504
+ end
505
+
506
+ # parses a C string for function prototypes
507
+ def parse_c(str, filename=nil, lineno=1)
508
+ @c_parser ||= @cpu.new_cparser
509
+ @c_parser.lexer.define_weak('__METASM__DECODE__')
510
+ @c_parser.parse(str, filename, lineno)
511
+ end
512
+
513
+ # returns the canonical form of addr (absolute address integer or label of start of section + section offset)
514
+ def normalize(addr)
515
+ return addr if not addr or addr == :default
516
+ addr = Expression[addr].bind(@old_prog_binding).reduce if not addr.kind_of? Integer
517
+ addr %= 1 << [@cpu.size, 32].max if @cpu and addr.kind_of? Integer
518
+ addr
519
+ end
520
+
521
+ # returns [edata, edata_base] or nil
522
+ # edata.ptr points to addr
523
+ def get_section_at(addr, memcheck=true)
524
+ case addr = normalize(addr)
525
+ when ::Integer
526
+ if s = @sections.find { |b, e| b.kind_of? ::Integer and addr >= b and addr < b + e.length } ||
527
+ @sections.find { |b, e| b.kind_of? ::Integer and addr == b + e.length } # end label
528
+ s[1].ptr = addr - s[0]
529
+ return if memcheck and s[1].data.respond_to?(:page_invalid?) and s[1].data.page_invalid?(s[1].ptr)
530
+ [s[1], s[0]]
531
+ end
532
+ when Expression
533
+ if addr.op == :+ and addr.rexpr.kind_of? ::Integer and addr.rexpr >= 0 and addr.lexpr.kind_of? ::String and e = @sections[addr.lexpr]
534
+ e.ptr = addr.rexpr
535
+ return if memcheck and e.data.respond_to?(:page_invalid?) and e.data.page_invalid?(e.ptr)
536
+ [e, Expression[addr.lexpr]]
537
+ elsif addr.op == :+ and addr.rexpr.kind_of? ::String and not addr.lexpr and e = @sections[addr.rexpr]
538
+ e.ptr = 0
539
+ return if memcheck and e.data.respond_to?(:page_invalid?) and e.data.page_invalid?(e.ptr)
540
+ [e, addr.rexpr]
541
+ end
542
+ end
543
+ end
544
+
545
+ # returns the label at the specified address, creates it if needed using "prefix_addr"
546
+ # renames the existing label if it is in the form rewritepfx_addr
547
+ # returns nil if the address is not known and is not a string
548
+ def auto_label_at(addr, base='xref', *rewritepfx)
549
+ addr = Expression[addr].reduce
550
+ addrstr = "#{base}_#{Expression[addr]}"
551
+ return if addrstr !~ /^\w+$/
552
+ e, b = get_section_at(addr)
553
+ if not e
554
+ l = Expression[addr].reduce_rec if Expression[addr].reduce_rec.kind_of? ::String
555
+ l ||= addrstr if addr.kind_of? Expression and addr.externals.grep(::Symbol).empty?
556
+ elsif not l = e.inv_export[e.ptr]
557
+ l = @program.new_label(addrstr)
558
+ e.add_export l, e.ptr
559
+ @label_alias_cache = nil
560
+ @old_prog_binding[l] = @prog_binding[l] = b + e.ptr
561
+ elsif rewritepfx.find { |p| base != p and addrstr.sub(base, p) == l }
562
+ newl = addrstr
563
+ newl = @program.new_label(newl) unless @old_prog_binding[newl] and @old_prog_binding[newl] == @prog_binding[l] # avoid _uuid when a -> b -> a
564
+ rename_label l, newl
565
+ l = newl
566
+ end
567
+ l
568
+ end
569
+
570
+ # returns a hash associating addr => list of labels at this addr
571
+ def label_alias
572
+ if not @label_alias_cache
573
+ @label_alias_cache = {}
574
+ @prog_binding.each { |k, v|
575
+ (@label_alias_cache[v] ||= []) << k
576
+ }
577
+ end
578
+ @label_alias_cache
579
+ end
580
+
581
+ # decodes instructions from an entrypoint, (tries to) follows code flow
582
+ def disassemble(*entrypoints)
583
+ nil while disassemble_mainiter(entrypoints)
584
+ self
585
+ end
586
+
587
+ attr_accessor :entrypoints
588
+
589
+ # do one operation relevant to disassembling
590
+ # returns nil once done
591
+ def disassemble_mainiter(entrypoints=[])
592
+ @entrypoints ||= []
593
+ if @addrs_todo.empty? and entrypoints.empty?
594
+ post_disassemble
595
+ puts 'disassembly finished' if $VERBOSE
596
+ @callback_finished[] if callback_finished
597
+ return false
598
+ elsif @addrs_todo.empty?
599
+ ep = entrypoints.shift
600
+ l = auto_label_at(normalize(ep), 'entrypoint')
601
+ puts "start disassemble from #{l} (#{entrypoints.length})" if $VERBOSE and not entrypoints.empty?
602
+ @entrypoints << l
603
+ @addrs_todo << [ep]
604
+ else
605
+ disassemble_step
606
+ end
607
+ true
608
+ end
609
+
610
+ def post_disassemble
611
+ @decoded.each_value { |di|
612
+ next if not di.kind_of? DecodedInstruction
613
+ next if not di.opcode or not di.opcode.props[:saveip]
614
+ if not di.block.to_subfuncret
615
+ di.add_comment 'noreturn'
616
+ # there is no need to re-loop on all :saveip as check_noret is transitive
617
+ di.block.each_to_normal { |fa| check_noreturn_function(fa) }
618
+ end
619
+ }
620
+ @function.each { |addr, f|
621
+ next if not @decoded[addr]
622
+ if not f.finalized
623
+ f.finalized = true
624
+ puts " finalize subfunc #{Expression[addr]}" if debug_backtrace
625
+ @cpu.backtrace_update_function_binding(self, addr, f, f.return_address)
626
+ if not f.return_address
627
+ detect_function_thunk(addr)
628
+ end
629
+ end
630
+ @comment[addr] ||= []
631
+ bd = f.backtrace_binding.reject { |k, v| Expression[k] == Expression[v] or Expression[v] == Expression::Unknown }
632
+ unk = f.backtrace_binding.map { |k, v| k if v == Expression::Unknown }.compact
633
+ bd[unk.map { |u| Expression[u].to_s }.sort.join(',')] = Expression::Unknown if not unk.empty?
634
+ @comment[addr] |= ["function binding: " + bd.map { |k, v| "#{k} -> #{v}" }.sort.join(', ')]
635
+ @comment[addr] |= ["function ends at " + f.return_address.map { |ra| Expression[ra] }.join(', ')] if f.return_address
636
+ }
637
+ end
638
+
639
+ # disassembles one block from addrs_todo
640
+ # adds next addresses to handle to addrs_todo
641
+ # if @function[:default] exists, jumps to unknows locations are interpreted as to @function[:default]
642
+ def disassemble_step
643
+ return if not todo = @addrs_todo.pop or @addrs_done.include? todo
644
+ @addrs_done << todo if todo[1]
645
+
646
+ # from_sfret is true if from is the address of a function call that returns to addr
647
+ addr, from, from_subfuncret = todo
648
+
649
+ return if from == Expression::Unknown
650
+
651
+ puts "disassemble_step #{Expression[addr]} #{Expression[from] if from} #{from_subfuncret} (/#{@addrs_todo.length})" if $DEBUG
652
+
653
+ addr = normalize(addr)
654
+
655
+ if from and from_subfuncret and di_at(from)
656
+ @decoded[from].block.each_to_normal { |subfunc|
657
+ subfunc = normalize(subfunc)
658
+ next if not f = @function[subfunc] or f.finalized
659
+ f.finalized = true
660
+ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
661
+ @cpu.backtrace_update_function_binding(self, subfunc, f, f.return_address)
662
+ if not f.return_address
663
+ detect_function_thunk(subfunc)
664
+ end
665
+ }
666
+ end
667
+
668
+ if di = @decoded[addr]
669
+ if di.kind_of? DecodedInstruction
670
+ split_block(di.block, di.address) if not di.block_head? # this updates di.block
671
+ di.block.add_from(from, from_subfuncret ? :subfuncret : :normal) if from and from != :default
672
+ bf = di.block
673
+ elsif di == true
674
+ bf = @function[addr]
675
+ end
676
+ elsif bf = @function[addr]
677
+ detect_function_thunk_noreturn(from) if bf.noreturn
678
+ elsif s = get_section_at(addr)
679
+ block = InstructionBlock.new(normalize(addr), s[0])
680
+ block.add_from(from, from_subfuncret ? :subfuncret : :normal) if from and from != :default
681
+ disassemble_block(block)
682
+ elsif from and c_parser and name = Expression[addr].reduce_rec and name.kind_of? ::String and
683
+ s = c_parser.toplevel.symbol[name] and s.type.untypedef.kind_of? C::Function
684
+ bf = @function[addr] = @cpu.decode_c_function_prototype(@c_parser, s)
685
+ detect_function_thunk_noreturn(from) if bf.noreturn
686
+ elsif from
687
+ if bf = @function[:default]
688
+ puts "using default function for #{Expression[addr]} from #{Expression[from]}" if $DEBUG
689
+ if name = Expression[addr].reduce_rec and name.kind_of? ::String
690
+ @function[addr] = @function[:default].dup
691
+ else
692
+ addr = :default
693
+ end
694
+ if @decoded[from]
695
+ @decoded[from].block.add_to addr
696
+ end
697
+ else
698
+ puts "not disassembling unknown address #{Expression[addr]} from #{Expression[from]}" if $DEBUG
699
+ end
700
+ if from != :default
701
+ add_xref(addr, Xref.new(:x, from))
702
+ add_xref(Expression::Unknown, Xref.new(:x, from))
703
+ end
704
+ else
705
+ puts "not disassembling unknown address #{Expression[addr]}" if $VERBOSE
706
+ end
707
+
708
+ if bf and from and from != :default
709
+ if bf.kind_of? DecodedFunction
710
+ bff = bf.get_backtracked_for(self, addr, from)
711
+ else
712
+ bff = bf.backtracked_for
713
+ end
714
+ end
715
+ bff.each { |btt|
716
+ next if btt.address
717
+ if @decoded[from].kind_of? DecodedInstruction and @decoded[from].opcode.props[:saveip] and not from_subfuncret and not @function[addr]
718
+ backtrace_check_found(btt.expr, @decoded[addr], btt.origin, btt.type, btt.len, btt.maxdepth, btt.detached)
719
+ end
720
+ next if backtrace_check_funcret(btt, addr, from)
721
+ backtrace(btt.expr, from,
722
+ :include_start => true, :from_subfuncret => from_subfuncret,
723
+ :origin => btt.origin, :orig_expr => btt.orig_expr, :type => btt.type,
724
+ :len => btt.len, :detached => btt.detached, :maxdepth => btt.maxdepth)
725
+ } if bff
726
+ end
727
+
728
+ # splits an InstructionBlock, updates the blocks backtracked_for
729
+ def split_block(block, address=nil)
730
+ if not address # invoked as split_block(0x401012)
731
+ return if not @decoded[block].kind_of? DecodedInstruction
732
+ block, address = @decoded[block].block, block
733
+ end
734
+ return block if address == block.address
735
+ new_b = block.split address
736
+ new_b.backtracked_for.dup.each { |btt|
737
+ backtrace(btt.expr, btt.address,
738
+ :only_upto => block.list.last.address,
739
+ :include_start => !btt.exclude_instr, :from_subfuncret => btt.from_subfuncret,
740
+ :origin => btt.origin, :orig_expr => btt.orig_expr, :type => btt.type, :len => btt.len,
741
+ :detached => btt.detached, :maxdepth => btt.maxdepth)
742
+ }
743
+ new_b
744
+ end
745
+
746
+ # disassembles a new instruction block at block.address (must be normalized)
747
+ def disassemble_block(block)
748
+ raise if not block.list.empty?
749
+ di_addr = block.address
750
+ delay_slot = nil
751
+ di = nil
752
+
753
+ # try not to run for too long
754
+ # loop usage: break if the block continues to the following instruction, else return
755
+ @disassemble_maxblocklength.times {
756
+ # check collision into a known block
757
+ break if @decoded[di_addr]
758
+
759
+ # check self-modifying code
760
+ if @check_smc
761
+ #(-7...di.bin_length).each { |off| # uncomment to check for unaligned rewrites
762
+ waddr = di_addr #di_addr + off
763
+ each_xref(waddr, :w) { |x|
764
+ #next if off + x.len < 0
765
+ puts "W: disasm: self-modifying code at #{Expression[waddr]}" if $VERBOSE
766
+ @comment[di_addr] ||= []
767
+ @comment[di_addr] |= ["overwritten by #{@decoded[x.origin]}"]
768
+ @callback_selfmodifying[di_addr] if callback_selfmodifying
769
+ return
770
+ }
771
+ #}
772
+ end
773
+
774
+ # decode instruction
775
+ block.edata.ptr = di_addr - block.address + block.edata_ptr
776
+ if not di = @cpu.decode_instruction(block.edata, di_addr)
777
+ ed = block.edata
778
+ puts "#{ed.ptr >= ed.length ? "end of section reached" : "unknown instruction #{ed.data[di_addr-block.address+block.edata_ptr, 4].to_s.unpack('H*')}"} at #{Expression[di_addr]}" if $VERBOSE
779
+ return
780
+ end
781
+
782
+ @decoded[di_addr] = di
783
+ block.add_di di
784
+ puts di if $DEBUG
785
+
786
+ di = @callback_newinstr[di] if callback_newinstr
787
+ return if not di
788
+ block = di.block
789
+
790
+ di_addr = di.next_addr
791
+
792
+ backtrace_xrefs_di_rw(di)
793
+
794
+ if not di_addr or di.opcode.props[:stopexec] or not @program.get_xrefs_x(self, di).empty?
795
+ # do not backtrace until delay slot is finished (eg MIPS: di is a
796
+ # ret and the delay slot holds stack fixup needed to calc func_binding)
797
+ # XXX if the delay slot is also xref_x or :stopexec it is ignored
798
+ delay_slot ||= [di, @cpu.delay_slot(di)]
799
+ end
800
+
801
+ if delay_slot
802
+ di, delay = delay_slot
803
+ if delay == 0 or not di_addr
804
+ backtrace_xrefs_di_x(di)
805
+ if di.opcode.props[:stopexec] or not di_addr; return
806
+ else break
807
+ end
808
+ end
809
+ delay_slot[1] = delay - 1
810
+ end
811
+ }
812
+
813
+ ar = [di_addr]
814
+ ar = @callback_newaddr[block.list.last.address, ar] || ar if callback_newaddr
815
+ ar.each { |di_addr_| backtrace(di_addr_, di.address, :origin => di.address, :type => :x) }
816
+
817
+ block
818
+ end
819
+
820
+ # retrieve the list of execution crossrefs due to the decodedinstruction
821
+ # returns a list of symbolic expressions
822
+ def get_xrefs_x(di)
823
+ @program.get_xrefs_x(self, di)
824
+ end
825
+
826
+ # retrieve the list of data r/w crossrefs due to the decodedinstruction
827
+ # returns a list of [type, symbolic expression, length]
828
+ def get_xrefs_rw(di)
829
+ @program.get_xrefs_rw(self, di)
830
+ end
831
+
832
+ # disassembles_fast from a list of entrypoints, also dasm subfunctions
833
+ def disassemble_fast_deep(*entrypoints)
834
+ @entrypoints ||= []
835
+ @entrypoints |= entrypoints
836
+
837
+ entrypoints.each { |ep| do_disassemble_fast_deep(normalize(ep)) }
838
+ end
839
+
840
+ def do_disassemble_fast_deep(ep)
841
+ disassemble_fast(ep) { |fa, di|
842
+ fa = normalize(fa)
843
+ do_disassemble_fast_deep(fa)
844
+ if di and ndi = di_at(fa)
845
+ ndi.block.add_from_normal(di.address)
846
+ end
847
+ }
848
+ end
849
+
850
+ # disassembles fast from a list of entrypoints
851
+ # see disassemble_fast_step
852
+ def disassemble_fast(entrypoint, maxdepth=-1, &b)
853
+ ep = [entrypoint]
854
+ until ep.empty?
855
+ disassemble_fast_step(ep, &b)
856
+ maxdepth -= 1
857
+ ep.delete_if { |a| not @decoded[normalize(a[0])] } if maxdepth == 0
858
+ end
859
+ check_noreturn_function(entrypoint)
860
+ end
861
+
862
+ # disassembles one block from the ary, see disassemble_fast_block
863
+ def disassemble_fast_step(todo, &b)
864
+ return if not x = todo.pop
865
+ addr, from, from_subfuncret = x
866
+
867
+ addr = normalize(addr)
868
+
869
+ if di = @decoded[addr]
870
+ if di.kind_of? DecodedInstruction
871
+ split_block(di.block, di.address) if not di.block_head?
872
+ di.block.add_from(from, from_subfuncret ? :subfuncret : :normal) if from and from != :default
873
+ end
874
+ elsif s = get_section_at(addr)
875
+ block = InstructionBlock.new(normalize(addr), s[0])
876
+ block.add_from(from, from_subfuncret ? :subfuncret : :normal) if from and from != :default
877
+ todo.concat disassemble_fast_block(block, &b)
878
+ elsif name = Expression[addr].reduce_rec and name.kind_of? ::String and not @function[addr]
879
+ if c_parser and s = c_parser.toplevel.symbol[name] and s.type.untypedef.kind_of? C::Function
880
+ @function[addr] = @cpu.decode_c_function_prototype(@c_parser, s)
881
+ detect_function_thunk_noreturn(from) if @function[addr].noreturn
882
+ elsif @function[:default]
883
+ @function[addr] = @function[:default].dup
884
+ end
885
+ end
886
+
887
+ disassemble_fast_checkfunc(addr)
888
+ end
889
+
890
+ # check if an addr has an xref :x from a :saveip, if so mark as Function
891
+ def disassemble_fast_checkfunc(addr)
892
+ if @decoded[addr].kind_of? DecodedInstruction and not @function[addr]
893
+ func = false
894
+ each_xref(addr, :x) { |x_|
895
+ func = true if odi = di_at(x_.origin) and odi.opcode.props[:saveip]
896
+ }
897
+ if func
898
+ auto_label_at(addr, 'sub', 'loc', 'xref')
899
+ # XXX use default_btbind_callback ?
900
+ @function[addr] = DecodedFunction.new
901
+ @function[addr].finalized = true
902
+ detect_function_thunk(addr)
903
+ puts "found new function #{get_label_at(addr)} at #{Expression[addr]}" if $VERBOSE
904
+ end
905
+ end
906
+ end
907
+
908
+ # disassembles fast a new instruction block at block.address (must be normalized)
909
+ # does not recurse into subfunctions
910
+ # assumes all :saveip returns, except those pointing to a subfunc with noreturn
911
+ # yields subfunction addresses (targets of :saveip)
912
+ # only backtrace for :x with maxdepth 1 (ie handles only basic push+ret)
913
+ # returns a todo-style ary
914
+ # assumes @addrs_todo is empty
915
+ def disassemble_fast_block(block, &b)
916
+ block = InstructionBlock.new(normalize(block), get_section_at(block)[0]) if not block.kind_of? InstructionBlock
917
+ di_addr = block.address
918
+ delay_slot = nil
919
+ di = nil
920
+ ret = []
921
+
922
+ return ret if @decoded[di_addr]
923
+
924
+ @disassemble_maxblocklength.times {
925
+ break if @decoded[di_addr]
926
+
927
+ # decode instruction
928
+ block.edata.ptr = di_addr - block.address + block.edata_ptr
929
+ if not di = @cpu.decode_instruction(block.edata, di_addr)
930
+ return ret
931
+ end
932
+
933
+ @decoded[di_addr] = di
934
+ block.add_di di
935
+ puts di if $DEBUG
936
+
937
+ di = @callback_newinstr[di] if callback_newinstr
938
+ return ret if not di
939
+
940
+ di_addr = di.next_addr
941
+
942
+ if di.opcode.props[:stopexec] or di.opcode.props[:setip]
943
+ if di.opcode.props[:setip]
944
+ @addrs_todo = []
945
+ @program.get_xrefs_x(self, di).each { |expr|
946
+ backtrace(expr, di.address, :origin => di.address, :type => :x, :maxdepth => @backtrace_maxblocks_fast)
947
+ }
948
+ end
949
+ if di.opcode.props[:saveip]
950
+ @addrs_todo = []
951
+ ret.concat disassemble_fast_block_subfunc(di, &b)
952
+ else
953
+ ret.concat @addrs_todo
954
+ @addrs_todo = []
955
+ end
956
+ delay_slot ||= [di, @cpu.delay_slot(di)]
957
+ end
958
+
959
+ if delay_slot
960
+ if delay_slot[1] <= 0
961
+ return ret if delay_slot[0].opcode.props[:stopexec]
962
+ break
963
+ end
964
+ delay_slot[1] -= 1
965
+ end
966
+ }
967
+
968
+ di.block.add_to_normal(di_addr)
969
+ ret << [di_addr, di.address]
970
+ end
971
+
972
+ # handles when disassemble_fast encounters a call to a subfunction
973
+ def disassemble_fast_block_subfunc(di)
974
+ funcs = di.block.to_normal.to_a
975
+ do_ret = funcs.empty?
976
+ ret = []
977
+ na = di.next_addr + di.bin_length * @cpu.delay_slot(di)
978
+ funcs.each { |fa|
979
+ fa = normalize(fa)
980
+ disassemble_fast_checkfunc(fa)
981
+ yield fa, di if block_given?
982
+ if f = @function[fa] and bf = f.get_backtracked_for(self, fa, di.address) and not bf.empty?
983
+ # this includes retaddr unless f is noreturn
984
+ bf.each { |btt|
985
+ next if btt.type != :x
986
+ bt = backtrace(btt.expr, di.address, :include_start => true, :origin => btt.origin, :maxdepth => [@backtrace_maxblocks_fast, 1].max)
987
+ if btt.detached
988
+ ret.concat bt # callback argument
989
+ elsif bt.find { |a| normalize(a) == na }
990
+ do_ret = true
991
+ end
992
+ }
993
+ elsif not f or not f.noreturn
994
+ do_ret = true
995
+ end
996
+ }
997
+ if do_ret
998
+ di.block.add_to_subfuncret(na)
999
+ ret << [na, di.address, true]
1000
+ di.block.add_to_normal :default if not di.block.to_normal and @function[:default]
1001
+ end
1002
+ ret
1003
+ end
1004
+
1005
+ # trace whose xrefs this di is responsible of
1006
+ def backtrace_xrefs_di_rw(di)
1007
+ get_xrefs_rw(di).each { |type, ptr, len|
1008
+ backtrace(ptr, di.address, :origin => di.address, :type => type, :len => len).each { |xaddr|
1009
+ next if xaddr == Expression::Unknown
1010
+ if @check_smc and type == :w
1011
+ #len.times { |off| # check unaligned ?
1012
+ waddr = xaddr #+ off
1013
+ if wdi = di_at(waddr)
1014
+ puts "W: disasm: #{di} overwrites #{wdi}" if $VERBOSE
1015
+ wdi.add_comment "overwritten by #{di}"
1016
+ end
1017
+ #}
1018
+ end
1019
+ }
1020
+ }
1021
+ end
1022
+
1023
+ # trace xrefs for execution
1024
+ def backtrace_xrefs_di_x(di)
1025
+ ar = @program.get_xrefs_x(self, di)
1026
+ ar = @callback_newaddr[di.address, ar] || ar if callback_newaddr
1027
+ ar.each { |expr| backtrace(expr, di.address, :origin => di.address, :type => :x) }
1028
+ end
1029
+
1030
+ # checks if the function starting at funcaddr is an external function thunk (eg jmp [SomeExtFunc])
1031
+ # the argument must be the address of a decodedinstruction that is the first of a function,
1032
+ # which must not have return_addresses
1033
+ # returns the new thunk name if it was changed
1034
+ def detect_function_thunk(funcaddr)
1035
+ # check thunk linearity (no conditionnal branch etc)
1036
+ addr = funcaddr
1037
+ count = 0
1038
+ while b = block_at(addr)
1039
+ count += 1
1040
+ return if count > 5 or b.list.length > 4
1041
+ if b.to_subfuncret and not b.to_subfuncret.empty?
1042
+ return if b.to_subfuncret.length != 1
1043
+ addr = normalize(b.to_subfuncret.first)
1044
+ return if not b.to_normal or b.to_normal.length != 1
1045
+ # check that the subfunction is simple (eg get_eip)
1046
+ return if not sf = @function[normalize(b.to_normal.first)]
1047
+ return if not btb = sf.backtrace_binding
1048
+ btb = btb.dup
1049
+ btb.delete_if { |k, v| Expression[k] == Expression[v] }
1050
+ return if btb.length > 2 or btb.values.include? Expression::Unknown
1051
+ else
1052
+ return if not bt = b.to_normal
1053
+ if bt.include? :default
1054
+ addr = :default
1055
+ break
1056
+ elsif bt.length != 1
1057
+ return
1058
+ end
1059
+ addr = normalize(bt.first)
1060
+ end
1061
+ end
1062
+ fname = Expression[addr].reduce_rec
1063
+ if funcaddr != addr and f = @function[funcaddr]
1064
+ # forward get_backtrace_binding to target
1065
+ f.backtrace_binding = { :thunk => addr }
1066
+ f.noreturn = true if @function[addr] and @function[addr].noreturn
1067
+ end
1068
+ return if not fname.kind_of? ::String
1069
+ l = auto_label_at(funcaddr, 'sub', 'loc')
1070
+ return if l[0, 4] != 'sub_'
1071
+ puts "found thunk for #{fname} at #{Expression[funcaddr]}" if $DEBUG
1072
+ rename_label(l, @program.new_label("thunk_#{fname}"))
1073
+ end
1074
+
1075
+ # this is called when reaching a noreturn function call, with the call address
1076
+ # it is responsible for detecting the actual 'call' instruction leading to this
1077
+ # noreturn function, and eventually mark the call target as a thunk
1078
+ def detect_function_thunk_noreturn(addr)
1079
+ 5.times {
1080
+ return if not di = di_at(addr)
1081
+ if di.opcode.props[:saveip] and not di.block.to_subfuncret
1082
+ if di.block.to_normal.to_a.length == 1
1083
+ taddr = normalize(di.block.to_normal.first)
1084
+ if di_at(taddr)
1085
+ @function[taddr] ||= DecodedFunction.new
1086
+ return detect_function_thunk(taddr)
1087
+ end
1088
+ end
1089
+ break
1090
+ else
1091
+ from = di.block.from_normal.to_a + di.block.from_subfuncret.to_a
1092
+ if from.length == 1
1093
+ addr = from.first
1094
+ else break
1095
+ end
1096
+ end
1097
+ }
1098
+ end
1099
+
1100
+ # given an address, detect if it may be a noreturn fuction
1101
+ # it is if all its end blocks are calls to noreturn functions
1102
+ # if it is, create a @function[fa] with noreturn = true
1103
+ # should only be called with fa = target of a call
1104
+ def check_noreturn_function(fa)
1105
+ fb = function_blocks(fa, false, false)
1106
+ lasts = fb.keys.find_all { |k| fb[k] == [] }
1107
+ return if lasts.empty?
1108
+ if lasts.all? { |la|
1109
+ b = block_at(la)
1110
+ next if not di = b.list.last
1111
+ (di.opcode.props[:saveip] and b.to_normal.to_a.all? { |tfa|
1112
+ tf = function_at(tfa) and tf.noreturn
1113
+ }) or (di.opcode.props[:stopexec] and not di.opcode.props[:setip])
1114
+ }
1115
+ # yay
1116
+ @function[fa] ||= DecodedFunction.new
1117
+ @function[fa].noreturn = true
1118
+ end
1119
+ end
1120
+
1121
+
1122
+ # walks the backtrace tree from an address, passing along an object
1123
+ #
1124
+ # the steps are (1st = event, followed by hash keys)
1125
+ #
1126
+ # for each decoded instruction encountered:
1127
+ # :di :di
1128
+ #
1129
+ # when backtracking to a block through a decodedfunction:
1130
+ # (yield for each of the block's subfunctions)
1131
+ # (the decodedinstruction responsible for the call will be yield next)
1132
+ # :func :func, :funcaddr, :addr, :depth
1133
+ #
1134
+ # when jumping from one block to another (excluding :loop): # XXX include :loops ?
1135
+ # :up :from, :to, :sfret
1136
+ #
1137
+ # when the backtrack has nothing to backtrack to (eg program entrypoint):
1138
+ # :end :addr
1139
+ #
1140
+ # when the backtrack stops by taking too long to complete:
1141
+ # :maxdepth :addr
1142
+ #
1143
+ # when the backtrack stops for encountering the specified stop address:
1144
+ # :stopaddr :addr
1145
+ #
1146
+ # when rebacktracking a block already seen in the current branch:
1147
+ # (looptrace is an array of [obj, block end addr, from_subfuncret], from oldest to newest)
1148
+ # :loop :looptrace
1149
+ #
1150
+ # when the address does not match a known instruction/function:
1151
+ # :unknown_addr :addr
1152
+ #
1153
+ # the block return value is used as follow for :di, :func, :up and :loop:
1154
+ # false => the backtrace stops for the branch
1155
+ # nil => the backtrace continues with the current object
1156
+ # anything else => the backtrace continues with this object
1157
+ #
1158
+ # method arguments:
1159
+ # obj is the initial value of the object
1160
+ # addr is the address where the backtrace starts
1161
+ # include_start is a bool specifying if the backtrace should start at addr or just before
1162
+ # from_subfuncret is a bool specifying if addr points to a decodedinstruction that calls a subfunction
1163
+ # stopaddr is an [array of] address of instruction, the backtrace will stop just after executing it
1164
+ # maxdepth is the maximum depth (in blocks) for each backtrace branch.
1165
+ # (defaults to dasm.backtrace_maxblocks, which defaults do Dasm.backtrace_maxblocks)
1166
+ def backtrace_walk(obj, addr, include_start, from_subfuncret, stopaddr, maxdepth)
1167
+ start_addr = normalize(addr)
1168
+ stopaddr = [stopaddr] if stopaddr and not stopaddr.kind_of? ::Array
1169
+
1170
+ # array of [obj, addr, from_subfuncret, loopdetect]
1171
+ # loopdetect is an array of [obj, addr, from_type] of each end of block encountered
1172
+ todo = []
1173
+
1174
+ # array of [obj, blockaddr]
1175
+ # avoids rewalking the same value
1176
+ done = []
1177
+
1178
+ # updates todo with the addresses to backtrace next
1179
+ walk_up = lambda { |w_obj, w_addr, w_loopdetect|
1180
+ if w_loopdetect.length > maxdepth
1181
+ yield :maxdepth, w_obj, :addr => w_addr, :loopdetect => w_loopdetect
1182
+ elsif stopaddr and stopaddr.include?(w_addr)
1183
+ yield :stopaddr, w_obj, :addr => w_addr, :loopdetect => w_loopdetect
1184
+ elsif w_di = @decoded[w_addr] and w_di != w_di.block.list.first and w_di.address != w_di.block.address
1185
+ prevdi = w_di.block.list[w_di.block.list.index(w_di)-1]
1186
+ todo << [w_obj, prevdi.address, :normal, w_loopdetect]
1187
+ elsif w_di
1188
+ next if done.include? [w_obj, w_addr]
1189
+ done << [w_obj, w_addr]
1190
+ hadsomething = false
1191
+ w_di.block.each_from { |f_addr, f_type|
1192
+ next if f_type == :indirect
1193
+ hadsomething = true
1194
+ o_f_addr = f_addr
1195
+ f_addr = @decoded[f_addr].block.list.last.address if @decoded[f_addr].kind_of? DecodedInstruction # delay slot
1196
+ if l = w_loopdetect.find { |l_obj, l_addr, l_type| l_addr == f_addr and l_type == f_type }
1197
+ f_obj = yield(:loop, w_obj, :looptrace => w_loopdetect[w_loopdetect.index(l)..-1], :loopdetect => w_loopdetect)
1198
+ if f_obj and f_obj != w_obj # should avoid infinite loops
1199
+ f_loopdetect = w_loopdetect[0...w_loopdetect.index(l)]
1200
+ end
1201
+ else
1202
+ f_obj = yield(:up, w_obj, :from => w_addr, :to => f_addr, :sfret => f_type, :loopdetect => w_loopdetect, :real_to => o_f_addr)
1203
+ end
1204
+ next if f_obj == false
1205
+ f_obj ||= w_obj
1206
+ f_loopdetect ||= w_loopdetect
1207
+ # only count non-trivial paths in loopdetect (ignore linear links)
1208
+ add_detect = [[f_obj, f_addr, f_type]]
1209
+ add_detect = [] if @decoded[f_addr].kind_of? DecodedInstruction and tmp = @decoded[f_addr].block and
1210
+ ((w_di.block.from_subfuncret.to_a == [] and w_di.block.from_normal == [f_addr] and
1211
+ tmp.to_normal == [w_di.address] and tmp.to_subfuncret.to_a == []) or
1212
+ (w_di.block.from_subfuncret == [f_addr] and tmp.to_subfuncret == [w_di.address]))
1213
+ todo << [f_obj, f_addr, f_type, f_loopdetect + add_detect ]
1214
+ }
1215
+ yield :end, w_obj, :addr => w_addr, :loopdetect => w_loopdetect if not hadsomething
1216
+ elsif @function[w_addr] and w_addr != :default and w_addr != Expression::Unknown
1217
+ next if done.include? [w_obj, w_addr]
1218
+ oldlen = todo.length
1219
+ each_xref(w_addr, :x) { |x|
1220
+ f_addr = x.origin
1221
+ o_f_addr = f_addr
1222
+ f_addr = @decoded[f_addr].block.list.last.address if @decoded[f_addr].kind_of? DecodedInstruction # delay slot
1223
+ if l = w_loopdetect.find { |l_obj, l_addr, l_type| l_addr == w_addr }
1224
+ f_obj = yield(:loop, w_obj, :looptrace => w_loopdetect[w_loopdetect.index(l)..-1], :loopdetect => w_loopdetect)
1225
+ if f_obj and f_obj != w_obj
1226
+ f_loopdetect = w_loopdetect[0...w_loopdetect.index(l)]
1227
+ end
1228
+ else
1229
+ f_obj = yield(:up, w_obj, :from => w_addr, :to => f_addr, :sfret => :normal, :loopdetect => w_loopdetect, :real_to => o_f_addr)
1230
+ end
1231
+ next if f_obj == false
1232
+ f_obj ||= w_obj
1233
+ f_loopdetect ||= w_loopdetect
1234
+ todo << [f_obj, f_addr, :normal, f_loopdetect + [[f_obj, f_addr, :normal]] ]
1235
+ }
1236
+ yield :end, w_obj, :addr => w_addr, :loopdetect => w_loopdetect if todo.length == oldlen
1237
+ else
1238
+ yield :unknown_addr, w_obj, :addr => w_addr, :loopdetect => w_loopdetect
1239
+ end
1240
+ }
1241
+
1242
+ if include_start
1243
+ todo << [obj, start_addr, from_subfuncret ? :subfuncret : :normal, []]
1244
+ else
1245
+ walk_up[obj, start_addr, []]
1246
+ end
1247
+
1248
+ while not todo.empty?
1249
+ obj, addr, type, loopdetect = todo.pop
1250
+ di = @decoded[addr]
1251
+ if di and type == :subfuncret
1252
+ di.block.each_to_normal { |sf|
1253
+ next if not f = @function[normalize(sf)]
1254
+ s_obj = yield(:func, obj, :func => f, :funcaddr => sf, :addr => addr, :loopdetect => loopdetect)
1255
+ next if s_obj == false
1256
+ s_obj ||= obj
1257
+ if l = loopdetect.find { |l_obj, l_addr, l_type| addr == l_addr and l_type == :normal }
1258
+ l_obj = yield(:loop, s_obj, :looptrace => loopdetect[loopdetect.index(l)..-1], :loopdetect => loopdetect)
1259
+ if l_obj and l_obj != s_obj
1260
+ s_loopdetect = loopdetect[0...loopdetect.index(l)]
1261
+ end
1262
+ next if l_obj == false
1263
+ s_obj = l_obj if l_obj
1264
+ end
1265
+ s_loopdetect ||= loopdetect
1266
+ todo << [s_obj, addr, :normal, s_loopdetect + [[s_obj, addr, :normal]] ]
1267
+ }
1268
+ elsif di
1269
+ # XXX should interpolate index if di is not in block.list, but what if the addresses are not Comparable ?
1270
+ di.block.list[0..(di.block.list.index(di) || -1)].reverse_each { |di_|
1271
+ di = di_ # XXX not sure..
1272
+ if stopaddr and ea = di.next_addr and stopaddr.include?(ea)
1273
+ yield :stopaddr, obj, :addr => ea, :loopdetect => loopdetect
1274
+ break
1275
+ end
1276
+ ex_obj = obj
1277
+ obj = yield(:di, obj, :di => di, :loopdetect => loopdetect)
1278
+ break if obj == false
1279
+ obj ||= ex_obj
1280
+ }
1281
+ walk_up[obj, di.block.address, loopdetect] if obj
1282
+ elsif @function[addr] and addr != :default and addr != Expression::Unknown
1283
+ ex_obj = obj
1284
+ obj = yield(:func, obj, :func => @function[addr], :funcaddr => addr, :addr => addr, :loopdetect => loopdetect)
1285
+ next if obj == false
1286
+ obj ||= ex_obj
1287
+ walk_up[obj, addr, loopdetect]
1288
+ else
1289
+ yield :unknown_addr, obj, :addr => addr, :loopdetect => loopdetect
1290
+ end
1291
+ end
1292
+ end
1293
+
1294
+ # holds a backtrace result until a snapshot_addr is encountered
1295
+ class StoppedExpr
1296
+ attr_accessor :exprs
1297
+ def initialize(e) @exprs = e end
1298
+ end
1299
+
1300
+
1301
+ attr_accessor :debug_backtrace
1302
+
1303
+ # backtraces the value of an expression from start_addr
1304
+ # updates blocks backtracked_for if type is set
1305
+ # uses backtrace_walk
1306
+ # all values returned are from backtrace_check_found (which may generate xrefs, labels, addrs to dasm) unless :no_check is specified
1307
+ # options:
1308
+ # :include_start => start backtracking including start_addr
1309
+ # :from_subfuncret =>
1310
+ # :origin => origin to set for xrefs when resolution is successful
1311
+ # :orig_expr => initial expression
1312
+ # :type => xref type (:r, :w, :x, :addr) when :x, the results are added to #addrs_todo
1313
+ # :len => xref len (for :r/:w)
1314
+ # :snapshot_addr => addr (or array of) where the backtracker should stop
1315
+ # if a snapshot_addr is given, values found are ignored if continuing the backtrace does not get to it (eg maxdepth/unk_addr/end)
1316
+ # :maxdepth => maximum number of blocks to backtrace
1317
+ # :detached => true if backtracking type :x and the result should not have from = origin set in @addrs_todo
1318
+ # :max_complexity{_data} => maximum complexity of the expression before aborting its backtrace
1319
+ # :log => Array, will be updated with the backtrace evolution
1320
+ # :only_upto => backtrace only to update bt_for for current block & previous ending at only_upto
1321
+ # :no_check => don't use backtrace_check_found (will not backtrace indirection static values)
1322
+ # :terminals => array of symbols with constant value (stop backtracking if all symbols in the expr are terminals) (only supported with no_check)
1323
+ def backtrace(expr, start_addr, nargs={})
1324
+ include_start = nargs.delete :include_start
1325
+ from_subfuncret = nargs.delete :from_subfuncret
1326
+ origin = nargs.delete :origin
1327
+ origexpr = nargs.delete :orig_expr
1328
+ type = nargs.delete :type
1329
+ len = nargs.delete :len
1330
+ snapshot_addr = nargs.delete(:snapshot_addr) || nargs.delete(:stopaddr)
1331
+ maxdepth = nargs.delete(:maxdepth) || @backtrace_maxblocks
1332
+ detached = nargs.delete :detached
1333
+ max_complexity = nargs.delete(:max_complexity) || @backtrace_maxcomplexity
1334
+ max_complexity_data = nargs.delete(:max_complexity) || @backtrace_maxcomplexity_data
1335
+ bt_log = nargs.delete :log # array to receive the ongoing backtrace info
1336
+ only_upto = nargs.delete :only_upto
1337
+ no_check = nargs.delete :no_check
1338
+ terminals = nargs.delete(:terminals) || []
1339
+ raise ArgumentError, "invalid argument to backtrace #{nargs.keys.inspect}" if not nargs.empty?
1340
+
1341
+ expr = Expression[expr]
1342
+
1343
+ origexpr = expr if origin == start_addr
1344
+
1345
+ start_addr = normalize(start_addr)
1346
+ di = @decoded[start_addr]
1347
+
1348
+ if not snapshot_addr and @cpu.backtrace_is_stack_address(expr)
1349
+ puts " not backtracking stack address #{expr}" if debug_backtrace
1350
+ return []
1351
+ end
1352
+
1353
+ if type == :r or type == :w
1354
+ max_complexity = max_complexity_data
1355
+ maxdepth = @backtrace_maxblocks_data if backtrace_maxblocks_data and maxdepth > @backtrace_maxblocks_data
1356
+ end
1357
+
1358
+ if vals = (no_check ? (!need_backtrace(expr, terminals) and [expr]) : backtrace_check_found(expr,
1359
+ di, origin, type, len, maxdepth, detached))
1360
+ # no need to update backtracked_for
1361
+ return vals
1362
+ elsif maxdepth <= 0
1363
+ return [Expression::Unknown]
1364
+ end
1365
+
1366
+ # create initial backtracked_for
1367
+ if type and origin == start_addr and di
1368
+ btt = BacktraceTrace.new(expr, origin, origexpr, type, len, maxdepth-1)
1369
+ btt.address = di.address
1370
+ btt.exclude_instr = true if not include_start
1371
+ btt.from_subfuncret = true if from_subfuncret and include_start
1372
+ btt.detached = true if detached
1373
+ di.block.backtracked_for |= [btt]
1374
+ end
1375
+
1376
+ @callback_prebacktrace[] if callback_prebacktrace
1377
+
1378
+ # list of Expression/Integer
1379
+ result = []
1380
+
1381
+ puts "backtracking #{type} #{expr} from #{di || Expression[start_addr || 0]} for #{@decoded[origin]}" if debug_backtrace or $DEBUG
1382
+ bt_log << [:start, expr, start_addr] if bt_log
1383
+ backtrace_walk(expr, start_addr, include_start, from_subfuncret, snapshot_addr, maxdepth) { |ev, expr_, h|
1384
+ expr = expr_
1385
+ case ev
1386
+ when :unknown_addr, :maxdepth
1387
+ puts " backtrace end #{ev} #{expr}" if debug_backtrace
1388
+ result |= [expr] if not snapshot_addr
1389
+ @addrs_todo << [expr, (detached ? nil : origin)] if not snapshot_addr and type == :x and origin
1390
+ when :end
1391
+ if not expr.kind_of? StoppedExpr
1392
+ oldexpr = expr
1393
+ expr = backtrace_emu_blockup(h[:addr], expr)
1394
+ puts " backtrace up #{Expression[h[:addr]]} #{oldexpr}#{" => #{expr}" if expr != oldexpr}" if debug_backtrace
1395
+ bt_log << [:up, expr, oldexpr, h[:addr], :end] if bt_log and expr != oldexpr
1396
+ if expr != oldexpr and not snapshot_addr and vals = (no_check ?
1397
+ (!need_backtrace(expr, terminals) and [expr]) :
1398
+ backtrace_check_found(expr, nil, origin, type, len,
1399
+ maxdepth-h[:loopdetect].length, detached))
1400
+ result |= vals
1401
+ next
1402
+ end
1403
+ end
1404
+ puts " backtrace end #{ev} #{expr}" if debug_backtrace
1405
+ if not snapshot_addr
1406
+ result |= [expr]
1407
+
1408
+ btt = BacktraceTrace.new(expr, origin, origexpr, type, len, maxdepth-h[:loopdetect].length-1)
1409
+ btt.detached = true if detached
1410
+ @decoded[h[:addr]].block.backtracked_for |= [btt] if @decoded[h[:addr]]
1411
+ @function[h[:addr]].backtracked_for |= [btt] if @function[h[:addr]] and h[:addr] != :default
1412
+ @addrs_todo << [expr, (detached ? nil : origin)] if type == :x and origin
1413
+ end
1414
+ when :stopaddr
1415
+ if not expr.kind_of? StoppedExpr
1416
+ oldexpr = expr
1417
+ expr = backtrace_emu_blockup(h[:addr], expr)
1418
+ puts " backtrace up #{Expression[h[:addr]]} #{oldexpr}#{" => #{expr}" if expr != oldexpr}" if debug_backtrace
1419
+ bt_log << [:up, expr, oldexpr, h[:addr], :end] if bt_log and expr != oldexpr
1420
+ end
1421
+ puts " backtrace end #{ev} #{expr}" if debug_backtrace
1422
+ result |= ((expr.kind_of?(StoppedExpr)) ? expr.exprs : [expr])
1423
+ when :loop
1424
+ next false if expr.kind_of? StoppedExpr
1425
+ t = h[:looptrace]
1426
+ oldexpr = t[0][0]
1427
+ next false if expr == oldexpr # unmodifying loop
1428
+ puts " bt loop at #{Expression[t[0][1]]}: #{oldexpr} => #{expr} (#{t.map { |z| Expression[z[1]] }.join(' <- ')})" if debug_backtrace
1429
+ false
1430
+ when :up
1431
+ next false if only_upto and h[:to] != only_upto
1432
+ next expr if expr.kind_of? StoppedExpr
1433
+ oldexpr = expr
1434
+ expr = backtrace_emu_blockup(h[:from], expr)
1435
+ puts " backtrace up #{Expression[h[:from]]}->#{Expression[h[:to]]} #{oldexpr}#{" => #{expr}" if expr != oldexpr}" if debug_backtrace
1436
+ bt_log << [:up, expr, oldexpr, h[:from], h[:to]] if bt_log
1437
+
1438
+ if expr != oldexpr and vals = (no_check ? (!need_backtrace(expr, terminals) and [expr]) :
1439
+ backtrace_check_found(expr, @decoded[h[:from]], origin, type, len,
1440
+ maxdepth-h[:loopdetect].length, detached))
1441
+ if snapshot_addr
1442
+ expr = StoppedExpr.new vals
1443
+ next expr
1444
+ else
1445
+ result |= vals
1446
+ bt_log << [:found, vals, h[:from]] if bt_log
1447
+ next false
1448
+ end
1449
+ end
1450
+
1451
+ if origin and type
1452
+ # update backtracked_for
1453
+ update_btf = lambda { |btf, new_btt|
1454
+ # returns true if btf was modified
1455
+ if i = btf.index(new_btt)
1456
+ btf[i] = new_btt if btf[i].maxdepth < new_btt.maxdepth
1457
+ else
1458
+ btf << new_btt
1459
+ end
1460
+ }
1461
+
1462
+ btt = BacktraceTrace.new(expr, origin, origexpr, type, len, maxdepth-h[:loopdetect].length-1)
1463
+ btt.detached = true if detached
1464
+ if x = di_at(h[:from])
1465
+ update_btf[x.block.backtracked_for, btt]
1466
+ end
1467
+ if x = @function[h[:from]] and h[:from] != :default
1468
+ update_btf[x.backtracked_for, btt]
1469
+ end
1470
+ if x = di_at(h[:to])
1471
+ btt = btt.dup
1472
+ btt.address = x.address
1473
+ btt.from_subfuncret = true if h[:sfret] == :subfuncret
1474
+ if backtrace_check_funcret(btt, h[:from], h[:real_to] || h[:to])
1475
+ puts " function returns to caller" if debug_backtrace
1476
+ next false
1477
+ end
1478
+ if not update_btf[x.block.backtracked_for, btt]
1479
+ puts " already backtraced" if debug_backtrace
1480
+ next false
1481
+ end
1482
+ end
1483
+ end
1484
+ expr
1485
+ when :di, :func
1486
+ next if expr.kind_of? StoppedExpr
1487
+ if not snapshot_addr and @cpu.backtrace_is_stack_address(expr)
1488
+ puts " not backtracking stack address #{expr}" if debug_backtrace
1489
+ next false
1490
+ end
1491
+
1492
+ oldexpr = expr
1493
+ case ev
1494
+ when :di
1495
+ h[:addr] = h[:di].address
1496
+ expr = backtrace_emu_instr(h[:di], expr)
1497
+ bt_log << [ev, expr, oldexpr, h[:di], h[:addr]] if bt_log and expr != oldexpr
1498
+ when :func
1499
+ expr = backtrace_emu_subfunc(h[:func], h[:funcaddr], h[:addr], expr, origin, maxdepth-h[:loopdetect].length)
1500
+ if snapshot_addr and snapshot_addr == h[:funcaddr]
1501
+ # XXX recursiveness detection needs to be fixed
1502
+ puts " backtrace: recursive function #{Expression[h[:funcaddr]]}" if debug_backtrace
1503
+ next false
1504
+ end
1505
+ bt_log << [ev, expr, oldexpr, h[:funcaddr], h[:addr]] if bt_log and expr != oldexpr
1506
+ end
1507
+ puts " backtrace #{h[:di] || Expression[h[:funcaddr]]} #{oldexpr} => #{expr}" if debug_backtrace and expr != oldexpr
1508
+ if vals = (no_check ? (!need_backtrace(expr, terminals) and [expr]) : backtrace_check_found(expr,
1509
+ h[:di], origin, type, len, maxdepth-h[:loopdetect].length, detached))
1510
+ if snapshot_addr
1511
+ expr = StoppedExpr.new vals
1512
+ else
1513
+ result |= vals
1514
+ bt_log << [:found, vals, h[:addr]] if bt_log
1515
+ next false
1516
+ end
1517
+ elsif expr.complexity > max_complexity
1518
+ puts " backtrace aborting, expr too complex" if debug_backtrace
1519
+ next false
1520
+ end
1521
+ expr
1522
+ else raise ev.inspect
1523
+ end
1524
+ }
1525
+
1526
+ puts ' backtrace result: ' + result.map { |r| Expression[r] }.join(', ') if debug_backtrace
1527
+
1528
+ result
1529
+ end
1530
+
1531
+ # checks if the BacktraceTrace is a call to a known subfunction
1532
+ # returns true and updates self.addrs_todo
1533
+ def backtrace_check_funcret(btt, funcaddr, instraddr)
1534
+ if di = @decoded[instraddr] and @function[funcaddr] and btt.type == :x and
1535
+ not btt.from_subfuncret and
1536
+ @cpu.backtrace_is_function_return(btt.expr, @decoded[btt.origin]) and
1537
+ retaddr = backtrace_emu_instr(di, btt.expr) and
1538
+ not need_backtrace(retaddr)
1539
+ puts " backtrace addrs_todo << #{Expression[retaddr]} from #{di} (funcret)" if debug_backtrace
1540
+ di.block.add_to_subfuncret normalize(retaddr)
1541
+ if @decoded[funcaddr].kind_of? DecodedInstruction
1542
+ # check that all callers :saveip returns (eg recursive call that was resolved
1543
+ # before we found funcaddr was a function)
1544
+ @decoded[funcaddr].block.each_from_normal { |fm|
1545
+ if fdi = di_at(fm) and fdi.opcode.props[:saveip] and not fdi.block.to_subfuncret
1546
+ backtrace_check_funcret(btt, funcaddr, fm)
1547
+ end
1548
+ }
1549
+ end
1550
+ if not @function[funcaddr].finalized
1551
+ # the function is not fully disassembled: arrange for the retaddr to be
1552
+ # disassembled only after the subfunction is finished
1553
+ # for that we walk the code from the call, mark each block start, and insert the sfret
1554
+ # just before the 1st function block address in @addrs_todo (which is pop()ed by dasm_step)
1555
+ faddrlist = []
1556
+ todo = []
1557
+ di.block.each_to_normal { |t| todo << normalize(t) }
1558
+ while a = todo.pop
1559
+ next if faddrlist.include? a or not get_section_at(a)
1560
+ faddrlist << a
1561
+ if @decoded[a].kind_of? DecodedInstruction
1562
+ @decoded[a].block.each_to_samefunc(self) { |t| todo << normalize(t) }
1563
+ end
1564
+ end
1565
+
1566
+ idx = @addrs_todo.index(@addrs_todo.find { |r, i, sfr| faddrlist.include? normalize(r) }) || -1
1567
+ @addrs_todo.insert(idx, [retaddr, instraddr, true])
1568
+ else
1569
+ @addrs_todo << [retaddr, instraddr, true]
1570
+ end
1571
+ true
1572
+ end
1573
+ end
1574
+
1575
+ # applies one decodedinstruction to an expression
1576
+ def backtrace_emu_instr(di, expr)
1577
+ @cpu.backtrace_emu(di, expr)
1578
+ end
1579
+
1580
+ # applies one subfunction to an expression
1581
+ def backtrace_emu_subfunc(func, funcaddr, calladdr, expr, origin, maxdepth)
1582
+ bind = func.get_backtrace_binding(self, funcaddr, calladdr, expr, origin, maxdepth)
1583
+ Expression[expr.bind(bind).reduce]
1584
+ end
1585
+
1586
+ # applies a location binding
1587
+ def backtrace_emu_blockup(addr, expr)
1588
+ (ab = @address_binding[addr]) ? Expression[expr.bind(ab).reduce] : expr
1589
+ end
1590
+
1591
+ # static resolution of indirections
1592
+ def resolve(expr)
1593
+ binding = Expression[expr].expr_indirections.inject(@old_prog_binding) { |binding_, ind|
1594
+ e, b = get_section_at(resolve(ind.target))
1595
+ return expr if not e
1596
+ binding_.merge ind => Expression[ e.decode_imm("u#{8*ind.len}".to_sym, @cpu.endianness) ]
1597
+ }
1598
+ Expression[expr].bind(binding).reduce
1599
+ end
1600
+
1601
+ # returns true if the expression needs more backtrace
1602
+ # it checks for the presence of a symbol (not :unknown), which means it depends on some register value
1603
+ def need_backtrace(expr, terminals=[])
1604
+ return if expr.kind_of? ::Integer
1605
+ !(expr.externals.grep(::Symbol) - [:unknown] - terminals).empty?
1606
+ end
1607
+
1608
+ # returns an array of expressions, or nil if expr needs more backtrace
1609
+ # it needs more backtrace if expr.externals include a Symbol != :unknown (symbol == register value)
1610
+ # if it need no more backtrace, expr's indirections are recursively resolved
1611
+ # xrefs are created, and di args are updated (immediate => label)
1612
+ # if type is :x, addrs_todo is updated, and if di starts a block, expr is checked to see if it may be a subfunction return value
1613
+ #
1614
+ # expr indirection are solved by first finding the value of the pointer, and then rebacktracking for write-type access
1615
+ # detached is true if type is :x and from should not be set in addrs_todo (indirect call flow, eg external function callback)
1616
+ # if the backtrace ends pre entrypoint, returns the value encoded in the raw binary
1617
+ # XXX global variable (modified by another function), exported data, multithreaded app..
1618
+ # TODO handle memory aliasing (mov ebx, eax ; write [ebx] ; read [eax])
1619
+ # TODO trace expr evolution through backtrace, to modify immediates to an expr involving label names
1620
+ # TODO mov [ptr], imm ; <...> ; jmp [ptr] => rename imm as loc_XX
1621
+ # eg. mov eax, 42 ; add eax, 4 ; jmp eax => mov eax, some_label-4
1622
+ def backtrace_check_found(expr, di, origin, type, len, maxdepth, detached)
1623
+ # only entrypoints or block starts called by a :saveip are checked for being a function
1624
+ # want to execute [esp] from a block start
1625
+ if type == :x and di and di == di.block.list.first and @cpu.backtrace_is_function_return(expr, @decoded[origin]) and (
1626
+ # which is an entrypoint..
1627
+ (not di.block.from_normal and not di.block.from_subfuncret) or
1628
+ # ..or called from a saveip
1629
+ (bool = false ; di.block.each_from_normal { |fn| bool = true if @decoded[fn] and @decoded[fn].opcode.props[:saveip] } ; bool))
1630
+
1631
+ # now we can mark the current address a function start
1632
+ # the actual return address will be found later (we tell the caller to continue the backtrace)
1633
+ addr = di.address
1634
+ l = auto_label_at(addr, 'sub', 'loc', 'xref')
1635
+ if not f = @function[addr]
1636
+ f = @function[addr] = DecodedFunction.new
1637
+ puts "found new function #{l} at #{Expression[addr]}" if $VERBOSE
1638
+ end
1639
+ f.finalized = false
1640
+
1641
+ if @decoded[origin]
1642
+ f.return_address ||= []
1643
+ f.return_address |= [origin]
1644
+ @decoded[origin].add_comment "endsub #{l}"
1645
+ # TODO add_xref (to update the comment on rename_label)
1646
+ end
1647
+
1648
+ f.backtracked_for |= @decoded[addr].block.backtracked_for.find_all { |btt| not btt.address }
1649
+ end
1650
+
1651
+ return if need_backtrace(expr)
1652
+
1653
+ puts "backtrace #{type} found #{expr} from #{di} orig #{@decoded[origin] || Expression[origin] if origin}" if debug_backtrace
1654
+ result = backtrace_value(expr, maxdepth)
1655
+ # keep the ori pointer in the results to emulate volatile memory (eg decompiler prefers this)
1656
+ result << expr if not type
1657
+ result.uniq!
1658
+
1659
+ # create xrefs/labels
1660
+ result.each { |e|
1661
+ backtrace_found_result(e, di, type, origin, len, detached)
1662
+ } if type and origin
1663
+
1664
+ result
1665
+ end
1666
+
1667
+ # returns an array of expressions with Indirections resolved (recursive with backtrace_indirection)
1668
+ def backtrace_value(expr, maxdepth)
1669
+ # array of expression with all indirections resolved
1670
+ result = [Expression[expr.reduce]]
1671
+
1672
+ # solve each indirection sequentially, clone expr for each value (aka cross-product)
1673
+ result.first.expr_indirections.uniq.each { |i|
1674
+ next_result = []
1675
+ backtrace_indirection(i, maxdepth).each { |rr|
1676
+ next_result |= result.map { |e| Expression[e.bind(i => rr).reduce] }
1677
+ }
1678
+ result = next_result
1679
+ }
1680
+
1681
+ result.uniq
1682
+ end
1683
+
1684
+ # returns the array of values pointed by the indirection at its invocation (ind.origin)
1685
+ # first resolves the pointer using backtrace_value, if it does not point in edata keep the original pointer
1686
+ # then backtraces from ind.origin until it finds an :w xref origin
1687
+ # if no :w access is found, returns the value encoded in the raw section data
1688
+ # TODO handle unaligned (partial?) writes
1689
+ def backtrace_indirection(ind, maxdepth)
1690
+ if not ind.origin
1691
+ puts "backtrace_ind: no origin for #{ind}" if $VERBOSE
1692
+ return [ind]
1693
+ end
1694
+
1695
+ ret = []
1696
+
1697
+ decode_imm = lambda { |addr, len|
1698
+ edata, foo = get_section_at(addr)
1699
+ if edata
1700
+ Expression[ edata.decode_imm("u#{8*len}".to_sym, @cpu.endianness) ]
1701
+ else
1702
+ Expression::Unknown
1703
+ end
1704
+ }
1705
+
1706
+ # resolve pointers (they may include Indirections)
1707
+ backtrace_value(ind.target, maxdepth).each { |ptr|
1708
+ # find write xrefs to the ptr
1709
+ refs = []
1710
+ each_xref(ptr, :w) { |x|
1711
+ # XXX should be rebacktracked on new xref
1712
+ next if not @decoded[x.origin]
1713
+ refs |= [x.origin]
1714
+ } if ptr != Expression::Unknown
1715
+
1716
+ if refs.empty?
1717
+ if get_section_at(ptr)
1718
+ # static data, newer written : return encoded value
1719
+ ret |= [decode_imm[ptr, ind.len]]
1720
+ next
1721
+ else
1722
+ # unknown pointer : backtrace the indirection, hope it solves itself
1723
+ initval = ind
1724
+ end
1725
+ else
1726
+ # wait until we find a write xref, then backtrace the written value
1727
+ initval = true
1728
+ end
1729
+
1730
+ # wait until we arrive at an xref'ing instruction, then backtrace the written value
1731
+ backtrace_walk(initval, ind.origin, true, false, nil, maxdepth-1) { |ev, expr, h|
1732
+ case ev
1733
+ when :unknown_addr, :maxdepth, :stopaddr
1734
+ puts " backtrace_indirection for #{ind.target} failed: #{ev}" if debug_backtrace
1735
+ ret |= [Expression::Unknown]
1736
+ when :end
1737
+ if not refs.empty? and (expr == true or not need_backtrace(expr))
1738
+ if expr == true
1739
+ # found a path avoiding the :w xrefs, read the encoded initial value
1740
+ ret |= [decode_imm[ptr, ind.len]]
1741
+ else
1742
+ bd = expr.expr_indirections.inject({}) { |h_, i| h_.update i => decode_imm[i.target, i.len] }
1743
+ ret |= [Expression[expr.bind(bd).reduce]]
1744
+ end
1745
+ else
1746
+ # unknown pointer, backtrace did not resolve...
1747
+ ret |= [Expression::Unknown]
1748
+ end
1749
+ when :di
1750
+ di = h[:di]
1751
+ if expr == true
1752
+ next true if not refs.include? di.address
1753
+ # find the expression to backtrace: assume this is the :w xref from this di
1754
+ writes = get_xrefs_rw(di)
1755
+ writes = writes.find_all { |x_type, x_ptr, x_len| x_type == :w and x_len == ind.len }
1756
+ if writes.length != 1
1757
+ puts "backtrace_ind: incompatible xrefs to #{ptr} from #{di}" if $DEBUG
1758
+ ret |= [Expression::Unknown]
1759
+ next false
1760
+ end
1761
+ expr = Indirection.new(writes[0][1], ind.len, di.address)
1762
+ end
1763
+ expr = backtrace_emu_instr(di, expr)
1764
+ # may have new indirections... recall bt_value ?
1765
+ #if not need_backtrace(expr)
1766
+ if expr.expr_externals.all? { |e| @prog_binding[e] or @function[normalize(e)] } and expr.expr_indirections.empty?
1767
+ ret |= backtrace_value(expr, maxdepth-1-h[:loopdetect].length)
1768
+ false
1769
+ else
1770
+ expr
1771
+ end
1772
+ when :func
1773
+ next true if expr == true # XXX
1774
+ expr = backtrace_emu_subfunc(h[:func], h[:funcaddr], h[:addr], expr, ind.origin, maxdepth-h[:loopdetect].length)
1775
+ #if not need_backtrace(expr)
1776
+ if expr.expr_externals.all? { |e| @prog_binding[e] or @function[normalize(e)] } and expr.expr_indirections.empty?
1777
+ ret |= backtrace_value(expr, maxdepth-1-h[:loopdetect].length)
1778
+ false
1779
+ else
1780
+ expr
1781
+ end
1782
+ end
1783
+ }
1784
+ }
1785
+
1786
+ ret
1787
+ end
1788
+
1789
+ # creates xrefs, updates addrs_todo, updates instr args
1790
+ def backtrace_found_result(expr, di, type, origin, len, detached)
1791
+ n = normalize(expr)
1792
+ fallthrough = true if type == :x and o = di_at(origin) and not o.opcode.props[:stopexec] and n == o.block.list.last.next_addr # delay_slot
1793
+ add_xref(n, Xref.new(type, origin, len)) if origin != :default and origin != Expression::Unknown and not fallthrough
1794
+ unk = true if n == Expression::Unknown
1795
+
1796
+ add_xref(n, Xref.new(:addr, di.address)) if di and di.address != origin and not unk
1797
+ base = { nil => 'loc', 1 => 'byte', 2 => 'word', 4 => 'dword', 8 => 'qword' }[len] || 'xref'
1798
+ base = 'sub' if @function[n]
1799
+ n = Expression[auto_label_at(n, base, 'xref') || n] if not fallthrough
1800
+ n = Expression[n]
1801
+
1802
+ # update instr args
1803
+ # TODO trace expression evolution to allow handling of
1804
+ # mov eax, 28 ; add eax, 4 ; jmp eax
1805
+ # => mov eax, (loc_xx-4)
1806
+ if di and not unk # and di.address == origin
1807
+ @cpu.replace_instr_arg_immediate(di.instruction, expr, n)
1808
+ end
1809
+ if @decoded[origin] and not unk
1810
+ @cpu.backtrace_found_result(self, @decoded[origin], expr, type, len)
1811
+ end
1812
+
1813
+ # add comment
1814
+ if type and @decoded[origin] # and not @decoded[origin].instruction.args.include? n
1815
+ @decoded[origin].add_comment "#{type}#{len}:#{n}" if not fallthrough
1816
+ end
1817
+
1818
+ # check if target is a string
1819
+ if di and type == :r and (len == 1 or len == 2) and s = get_section_at(n)
1820
+ l = s[0].inv_export[s[0].ptr]
1821
+ case len
1822
+ when 1; str = s[0].read(32).unpack('C*')
1823
+ when 2; str = s[0].read(64).unpack('v*')
1824
+ end
1825
+ str = str.inject('') { |str_, c|
1826
+ case c
1827
+ when 0x20..0x7e, ?\n, ?\r, ?\t; str_ << c
1828
+ else break str_
1829
+ end
1830
+ }
1831
+ if str.length >= 4
1832
+ di.add_comment "#{'L' if len == 2}#{str.inspect}"
1833
+ str = 'a_' + str.downcase.delete('^a-z0-9')[0, 12]
1834
+ if str.length >= 8 and l[0, 5] == 'byte_'
1835
+ rename_label(l, @program.new_label(str))
1836
+ end
1837
+ end
1838
+ end
1839
+
1840
+ # XXX all this should be done in backtrace() { <here> }
1841
+ if type == :x and origin
1842
+ if detached
1843
+ o = @decoded[origin] ? origin : di ? di.address : nil # lib function callback have origin == libfuncname, so we must find a block somewhere else
1844
+ origin = nil
1845
+ @decoded[o].block.add_to_indirect(normalize(n)) if @decoded[o] and not unk
1846
+ else
1847
+ @decoded[origin].block.add_to_normal(normalize(n)) if @decoded[origin] and not unk
1848
+ end
1849
+ @addrs_todo << [n, origin]
1850
+ end
1851
+ end
1852
+
1853
+ def to_s
1854
+ a = ''
1855
+ dump { |l| a << l << "\n" }
1856
+ a
1857
+ end
1858
+
1859
+ # dumps the source, optionnally including data
1860
+ # yields (defaults puts) each line
1861
+ def dump(dump_data=true, &b)
1862
+ b ||= lambda { |l| puts l }
1863
+ @sections.sort_by { |addr, edata| addr.kind_of?(::Integer) ? addr : 0 }.each { |addr, edata|
1864
+ addr = Expression[addr] if addr.kind_of? ::String
1865
+ blockoffs = @decoded.values.grep(DecodedInstruction).map { |di| Expression[di.block.address, :-, addr].reduce if di.block_head? }.grep(::Integer).sort.reject { |o| o < 0 or o >= edata.length }
1866
+ b[@program.dump_section_header(addr, edata)]
1867
+ if not dump_data and edata.length > 16*1024 and blockoffs.empty?
1868
+ b["// [#{edata.length} data bytes]"]
1869
+ next
1870
+ end
1871
+ unk_off = 0 # last off displayed
1872
+ # blocks.sort_by { |b| b.addr }.each { |b|
1873
+ while unk_off < edata.length
1874
+ if unk_off == blockoffs.first
1875
+ blockoffs.shift
1876
+ di = @decoded[addr+unk_off]
1877
+ if unk_off != di.block.edata_ptr
1878
+ b["\n// ------ overlap (#{unk_off-di.block.edata_ptr}) ------"]
1879
+ elsif di.block.from_normal.kind_of? ::Array
1880
+ b["\n"]
1881
+ end
1882
+ dump_block(di.block, &b)
1883
+ unk_off += [di.block.bin_length, 1].max
1884
+ unk_off = blockoffs.first if blockoffs.first and unk_off > blockoffs.first
1885
+ else
1886
+ next_off = blockoffs.first || edata.length
1887
+ if dump_data or next_off - unk_off < 16
1888
+ unk_off = dump_data(addr + unk_off, edata, unk_off, &b)
1889
+ else
1890
+ b["// [#{next_off - unk_off} data bytes]"]
1891
+ unk_off = next_off
1892
+ end
1893
+ end
1894
+ end
1895
+ }
1896
+ end
1897
+
1898
+ # dumps a block of decoded instructions
1899
+ def dump_block(block, &b)
1900
+ b ||= lambda { |l| puts l }
1901
+ block = @decoded[block].block if @decoded[block]
1902
+ dump_block_header(block, &b)
1903
+ block.list.each { |di| b[di.show] }
1904
+ end
1905
+
1906
+ # shows the xrefs/labels at block start
1907
+ def dump_block_header(block, &b)
1908
+ b ||= lambda { |l| puts l }
1909
+ xr = []
1910
+ each_xref(block.address) { |x|
1911
+ case x.type
1912
+ when :x; xr << Expression[x.origin]
1913
+ when :r, :w; xr << "#{x.type}#{x.len}:#{Expression[x.origin]}"
1914
+ end
1915
+ }
1916
+ if not xr.empty?
1917
+ b["\n// Xrefs: #{xr[0, 8].join(' ')}#{' ...' if xr.length > 8}"]
1918
+ end
1919
+ if block.edata.inv_export[block.edata_ptr]
1920
+ b["\n"] if xr.empty?
1921
+ label_alias[block.address].each { |name| b["#{name}:"] }
1922
+ end
1923
+ if c = @comment[block.address]
1924
+ c = c.join("\n") if c.kind_of? ::Array
1925
+ c.each_line { |l| b["// #{l}"] }
1926
+ end
1927
+ end
1928
+
1929
+ # dumps data/labels, honours @xrefs.len if exists
1930
+ # dumps one line only
1931
+ # stops on end of edata/@decoded/@xref
1932
+ # returns the next offset to display
1933
+ # TODO array-style data access
1934
+ def dump_data(addr, edata, off, &b)
1935
+ b ||= lambda { |l| puts l }
1936
+ if l = edata.inv_export[off]
1937
+ l_list = label_alias[addr].to_a.sort
1938
+ l = l_list.pop || l
1939
+ l_list.each { |ll|
1940
+ b["#{ll}:"]
1941
+ }
1942
+ l = (l + ' ').ljust(16)
1943
+ else l = ''
1944
+ end
1945
+ elemlen = 1 # size of each element we dump (db by default)
1946
+ dumplen = -off % 16 # number of octets to dump
1947
+ dumplen = 16 if dumplen == 0
1948
+ cmt = []
1949
+ each_xref(addr) { |x|
1950
+ dumplen = elemlen = x.len if x.len == 2 or x.len == 4
1951
+ cmt << " #{x.type}#{x.len}:#{Expression[x.origin]}"
1952
+ }
1953
+ cmt = " ; @#{Expression[addr]}" + cmt.sort[0, 6].join
1954
+ if r = edata.reloc[off]
1955
+ dumplen = elemlen = r.type.to_s[1..-1].to_i/8
1956
+ end
1957
+ dataspec = { 1 => 'db ', 2 => 'dw ', 4 => 'dd ', 8 => 'dq ' }[elemlen]
1958
+ if not dataspec
1959
+ dataspec = 'db '
1960
+ elemlen = 1
1961
+ end
1962
+ l << dataspec
1963
+
1964
+ # dup(?)
1965
+ if off >= edata.data.length
1966
+ dups = edata.virtsize - off
1967
+ @prog_binding.each_value { |a|
1968
+ tmp = Expression[a, :-, addr].reduce
1969
+ dups = tmp if tmp.kind_of? ::Integer and tmp > 0 and tmp < dups
1970
+ }
1971
+ @xrefs.each_key { |a|
1972
+ tmp = Expression[a, :-, addr].reduce
1973
+ dups = tmp if tmp.kind_of? ::Integer and tmp > 0 and tmp < dups
1974
+ }
1975
+ dups /= elemlen
1976
+ dups = 1 if dups < 1
1977
+ b[(l + "#{dups} dup(?)").ljust(48) << cmt]
1978
+ return off + dups*elemlen
1979
+ end
1980
+
1981
+ vals = []
1982
+ edata.ptr = off
1983
+ dups = dumplen/elemlen
1984
+ elemsym = "u#{elemlen*8}".to_sym
1985
+ while edata.ptr < edata.data.length
1986
+ if vals.length > dups and vals.last != vals.first
1987
+ # we have a dup(), unread the last element which is different
1988
+ vals.pop
1989
+ addr = Expression[addr, :-, elemlen].reduce
1990
+ edata.ptr -= elemlen
1991
+ break
1992
+ end
1993
+ break if vals.length == dups and vals.uniq.length > 1
1994
+ vals << edata.decode_imm(elemsym, @cpu.endianness)
1995
+ addr += elemlen
1996
+ if i = (1-elemlen..0).find { |i_|
1997
+ t = addr + i_
1998
+ @xrefs[t] or @decoded[t] or edata.reloc[edata.ptr+i_] or edata.inv_export[edata.ptr+i_]
1999
+ }
2000
+ # i < 0
2001
+ edata.ptr += i
2002
+ addr += i
2003
+ break
2004
+ end
2005
+ break if edata.reloc[edata.ptr-elemlen]
2006
+ end
2007
+
2008
+ # line of repeated value => dup()
2009
+ if vals.length > 8 and vals.uniq.length == 1
2010
+ b[(l << "#{vals.length} dup(#{Expression[vals.first]})").ljust(48) << cmt]
2011
+ return edata.ptr
2012
+ end
2013
+
2014
+ # recognize strings
2015
+ vals = vals.inject([]) { |vals_, value|
2016
+ if (elemlen == 1 or elemlen == 2)
2017
+ case value
2018
+ when 0x20..0x7e, 0x0a, 0x0d
2019
+ if vals_.last.kind_of? ::String; vals_.last << value ; vals_
2020
+ else vals_ << value.chr
2021
+ end
2022
+ else vals_ << value
2023
+ end
2024
+ else vals_ << value
2025
+ end
2026
+ }
2027
+
2028
+ vals.map! { |value|
2029
+ if value.kind_of? ::String
2030
+ if value.length > 2 # or value == vals.first or value == vals.last # if there is no xref, don't care
2031
+ value.inspect
2032
+ else
2033
+ value.unpack('C*').map { |c| Expression[c] }
2034
+ end
2035
+ else
2036
+ Expression[value]
2037
+ end
2038
+ }
2039
+ vals.flatten!
2040
+
2041
+ b[(l << vals.join(', ')).ljust(48) << cmt]
2042
+
2043
+ edata.ptr
2044
+ end
2045
+
2046
+ def decompiler
2047
+ parse_c '' if not c_parser
2048
+ @decompiler ||= Decompiler.new(self)
2049
+ end
2050
+ def decompiler=(dc)
2051
+ @decompiler = dc
2052
+ end
2053
+ def decompile(*addr)
2054
+ decompiler.decompile(*addr)
2055
+ end
2056
+ def decompile_func(addr)
2057
+ decompiler.decompile_func(addr)
2058
+ end
2059
+
2060
+ # allows us to be AutoExe.loaded
2061
+ def self.autoexe_load(f, &b)
2062
+ d = load(f, &b)
2063
+ d.program
2064
+ end
2065
+ end
2066
+ end
2067
+
2068
+ require 'metasm/disassemble_api'