metasm 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. data/BUGS +11 -0
  2. data/CREDITS +17 -0
  3. data/README +270 -0
  4. data/TODO +114 -0
  5. data/doc/code_organisation.txt +146 -0
  6. data/doc/const_missing.txt +16 -0
  7. data/doc/core_classes.txt +75 -0
  8. data/doc/feature_list.txt +53 -0
  9. data/doc/index.txt +59 -0
  10. data/doc/install_notes.txt +170 -0
  11. data/doc/style.css +3 -0
  12. data/doc/use_cases.txt +18 -0
  13. data/lib/metasm.rb +80 -0
  14. data/lib/metasm/arm.rb +12 -0
  15. data/lib/metasm/arm/debug.rb +39 -0
  16. data/lib/metasm/arm/decode.rb +167 -0
  17. data/lib/metasm/arm/encode.rb +77 -0
  18. data/lib/metasm/arm/main.rb +75 -0
  19. data/lib/metasm/arm/opcodes.rb +177 -0
  20. data/lib/metasm/arm/parse.rb +130 -0
  21. data/lib/metasm/arm/render.rb +55 -0
  22. data/lib/metasm/compile_c.rb +1457 -0
  23. data/lib/metasm/dalvik.rb +8 -0
  24. data/lib/metasm/dalvik/decode.rb +196 -0
  25. data/lib/metasm/dalvik/main.rb +60 -0
  26. data/lib/metasm/dalvik/opcodes.rb +366 -0
  27. data/lib/metasm/decode.rb +213 -0
  28. data/lib/metasm/decompile.rb +2659 -0
  29. data/lib/metasm/disassemble.rb +2068 -0
  30. data/lib/metasm/disassemble_api.rb +1280 -0
  31. data/lib/metasm/dynldr.rb +1329 -0
  32. data/lib/metasm/encode.rb +333 -0
  33. data/lib/metasm/exe_format/a_out.rb +194 -0
  34. data/lib/metasm/exe_format/autoexe.rb +82 -0
  35. data/lib/metasm/exe_format/bflt.rb +189 -0
  36. data/lib/metasm/exe_format/coff.rb +455 -0
  37. data/lib/metasm/exe_format/coff_decode.rb +901 -0
  38. data/lib/metasm/exe_format/coff_encode.rb +1078 -0
  39. data/lib/metasm/exe_format/dex.rb +457 -0
  40. data/lib/metasm/exe_format/dol.rb +145 -0
  41. data/lib/metasm/exe_format/elf.rb +923 -0
  42. data/lib/metasm/exe_format/elf_decode.rb +979 -0
  43. data/lib/metasm/exe_format/elf_encode.rb +1375 -0
  44. data/lib/metasm/exe_format/macho.rb +827 -0
  45. data/lib/metasm/exe_format/main.rb +228 -0
  46. data/lib/metasm/exe_format/mz.rb +164 -0
  47. data/lib/metasm/exe_format/nds.rb +172 -0
  48. data/lib/metasm/exe_format/pe.rb +437 -0
  49. data/lib/metasm/exe_format/serialstruct.rb +246 -0
  50. data/lib/metasm/exe_format/shellcode.rb +114 -0
  51. data/lib/metasm/exe_format/xcoff.rb +167 -0
  52. data/lib/metasm/gui.rb +23 -0
  53. data/lib/metasm/gui/cstruct.rb +373 -0
  54. data/lib/metasm/gui/dasm_coverage.rb +199 -0
  55. data/lib/metasm/gui/dasm_decomp.rb +369 -0
  56. data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
  57. data/lib/metasm/gui/dasm_graph.rb +1354 -0
  58. data/lib/metasm/gui/dasm_hex.rb +543 -0
  59. data/lib/metasm/gui/dasm_listing.rb +599 -0
  60. data/lib/metasm/gui/dasm_main.rb +906 -0
  61. data/lib/metasm/gui/dasm_opcodes.rb +291 -0
  62. data/lib/metasm/gui/debug.rb +1228 -0
  63. data/lib/metasm/gui/gtk.rb +884 -0
  64. data/lib/metasm/gui/qt.rb +495 -0
  65. data/lib/metasm/gui/win32.rb +3004 -0
  66. data/lib/metasm/gui/x11.rb +621 -0
  67. data/lib/metasm/ia32.rb +14 -0
  68. data/lib/metasm/ia32/compile_c.rb +1523 -0
  69. data/lib/metasm/ia32/debug.rb +193 -0
  70. data/lib/metasm/ia32/decode.rb +1167 -0
  71. data/lib/metasm/ia32/decompile.rb +564 -0
  72. data/lib/metasm/ia32/encode.rb +314 -0
  73. data/lib/metasm/ia32/main.rb +233 -0
  74. data/lib/metasm/ia32/opcodes.rb +872 -0
  75. data/lib/metasm/ia32/parse.rb +327 -0
  76. data/lib/metasm/ia32/render.rb +91 -0
  77. data/lib/metasm/main.rb +1193 -0
  78. data/lib/metasm/mips.rb +11 -0
  79. data/lib/metasm/mips/compile_c.rb +7 -0
  80. data/lib/metasm/mips/decode.rb +253 -0
  81. data/lib/metasm/mips/encode.rb +51 -0
  82. data/lib/metasm/mips/main.rb +72 -0
  83. data/lib/metasm/mips/opcodes.rb +443 -0
  84. data/lib/metasm/mips/parse.rb +51 -0
  85. data/lib/metasm/mips/render.rb +43 -0
  86. data/lib/metasm/os/gnu_exports.rb +270 -0
  87. data/lib/metasm/os/linux.rb +1112 -0
  88. data/lib/metasm/os/main.rb +1686 -0
  89. data/lib/metasm/os/remote.rb +527 -0
  90. data/lib/metasm/os/windows.rb +2027 -0
  91. data/lib/metasm/os/windows_exports.rb +745 -0
  92. data/lib/metasm/parse.rb +876 -0
  93. data/lib/metasm/parse_c.rb +3938 -0
  94. data/lib/metasm/pic16c/decode.rb +42 -0
  95. data/lib/metasm/pic16c/main.rb +17 -0
  96. data/lib/metasm/pic16c/opcodes.rb +68 -0
  97. data/lib/metasm/ppc.rb +11 -0
  98. data/lib/metasm/ppc/decode.rb +264 -0
  99. data/lib/metasm/ppc/decompile.rb +251 -0
  100. data/lib/metasm/ppc/encode.rb +51 -0
  101. data/lib/metasm/ppc/main.rb +129 -0
  102. data/lib/metasm/ppc/opcodes.rb +410 -0
  103. data/lib/metasm/ppc/parse.rb +52 -0
  104. data/lib/metasm/preprocessor.rb +1277 -0
  105. data/lib/metasm/render.rb +130 -0
  106. data/lib/metasm/sh4.rb +8 -0
  107. data/lib/metasm/sh4/decode.rb +336 -0
  108. data/lib/metasm/sh4/main.rb +292 -0
  109. data/lib/metasm/sh4/opcodes.rb +381 -0
  110. data/lib/metasm/x86_64.rb +12 -0
  111. data/lib/metasm/x86_64/compile_c.rb +1025 -0
  112. data/lib/metasm/x86_64/debug.rb +59 -0
  113. data/lib/metasm/x86_64/decode.rb +268 -0
  114. data/lib/metasm/x86_64/encode.rb +264 -0
  115. data/lib/metasm/x86_64/main.rb +135 -0
  116. data/lib/metasm/x86_64/opcodes.rb +118 -0
  117. data/lib/metasm/x86_64/parse.rb +68 -0
  118. data/misc/bottleneck.rb +61 -0
  119. data/misc/cheader-findpppath.rb +58 -0
  120. data/misc/hexdiff.rb +74 -0
  121. data/misc/hexdump.rb +55 -0
  122. data/misc/metasm-all.rb +13 -0
  123. data/misc/objdiff.rb +47 -0
  124. data/misc/objscan.rb +40 -0
  125. data/misc/pdfparse.rb +661 -0
  126. data/misc/ppc_pdf2oplist.rb +192 -0
  127. data/misc/tcp_proxy_hex.rb +84 -0
  128. data/misc/txt2html.rb +440 -0
  129. data/samples/a.out.rb +31 -0
  130. data/samples/asmsyntax.rb +77 -0
  131. data/samples/bindiff.rb +555 -0
  132. data/samples/compilation-steps.rb +49 -0
  133. data/samples/cparser_makestackoffset.rb +55 -0
  134. data/samples/dasm-backtrack.rb +38 -0
  135. data/samples/dasmnavig.rb +318 -0
  136. data/samples/dbg-apihook.rb +228 -0
  137. data/samples/dbghelp.rb +143 -0
  138. data/samples/disassemble-gui.rb +102 -0
  139. data/samples/disassemble.rb +133 -0
  140. data/samples/dump_upx.rb +95 -0
  141. data/samples/dynamic_ruby.rb +1929 -0
  142. data/samples/elf_list_needed.rb +46 -0
  143. data/samples/elf_listexports.rb +33 -0
  144. data/samples/elfencode.rb +25 -0
  145. data/samples/exeencode.rb +128 -0
  146. data/samples/factorize-headers-elfimports.rb +77 -0
  147. data/samples/factorize-headers-peimports.rb +109 -0
  148. data/samples/factorize-headers.rb +43 -0
  149. data/samples/gdbclient.rb +583 -0
  150. data/samples/generate_libsigs.rb +102 -0
  151. data/samples/hotfix_gtk_dbg.rb +59 -0
  152. data/samples/install_win_env.rb +78 -0
  153. data/samples/lindebug.rb +924 -0
  154. data/samples/linux_injectsyscall.rb +95 -0
  155. data/samples/machoencode.rb +31 -0
  156. data/samples/metasm-shell.rb +91 -0
  157. data/samples/pe-hook.rb +69 -0
  158. data/samples/pe-ia32-cpuid.rb +203 -0
  159. data/samples/pe-mips.rb +35 -0
  160. data/samples/pe-shutdown.rb +78 -0
  161. data/samples/pe-testrelocs.rb +51 -0
  162. data/samples/pe-testrsrc.rb +24 -0
  163. data/samples/pe_listexports.rb +31 -0
  164. data/samples/peencode.rb +19 -0
  165. data/samples/peldr.rb +494 -0
  166. data/samples/preprocess-flatten.rb +19 -0
  167. data/samples/r0trace.rb +308 -0
  168. data/samples/rubstop.rb +399 -0
  169. data/samples/scan_pt_gnu_stack.rb +54 -0
  170. data/samples/scanpeexports.rb +62 -0
  171. data/samples/shellcode-c.rb +40 -0
  172. data/samples/shellcode-dynlink.rb +146 -0
  173. data/samples/source.asm +34 -0
  174. data/samples/struct_offset.rb +47 -0
  175. data/samples/testpe.rb +32 -0
  176. data/samples/testraw.rb +45 -0
  177. data/samples/win32genloader.rb +132 -0
  178. data/samples/win32hooker-advanced.rb +169 -0
  179. data/samples/win32hooker.rb +96 -0
  180. data/samples/win32livedasm.rb +33 -0
  181. data/samples/win32remotescan.rb +133 -0
  182. data/samples/wintrace.rb +92 -0
  183. data/tests/all.rb +8 -0
  184. data/tests/dasm.rb +39 -0
  185. data/tests/dynldr.rb +35 -0
  186. data/tests/encodeddata.rb +132 -0
  187. data/tests/ia32.rb +82 -0
  188. data/tests/mips.rb +116 -0
  189. data/tests/parse_c.rb +239 -0
  190. data/tests/preprocessor.rb +269 -0
  191. data/tests/x86_64.rb +62 -0
  192. metadata +255 -0
@@ -0,0 +1,2068 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/decode'
8
+
9
+
10
+ module Metasm
11
+ # holds information for decoded instructions: the original opcode, a pointer to the InstructionBlock, etc
12
+ class DecodedInstruction
13
+ # the instance of InstructionBlock this di is into
14
+ attr_accessor :block
15
+ # our offset (in bytes) from the start of the block, used only for hexdump
16
+ attr_accessor :block_offset
17
+ # the address of the instruction's first byte in memory
18
+ attr_accessor :address
19
+ # the disassembled data
20
+ attr_accessor :instruction, :opcode
21
+ # our, length in bytes
22
+ attr_accessor :bin_length
23
+ # array of arbitrary strings
24
+ attr_accessor :comment
25
+ # a cache of the binding used by the backtracker to emulate this instruction
26
+ attr_accessor :backtrace_binding
27
+
28
+ # create a new DecodedInstruction with an Instruction whose cpu is the argument
29
+ # can take an existing Instruction as argument
30
+ def initialize(arg, addr=nil)
31
+ case arg
32
+ when Instruction
33
+ @instruction = arg
34
+ @opcode = @instruction.cpu.opcode_list.find { |op| op.name == @instruction.opname } if @instruction.cpu
35
+ else @instruction = Instruction.new(arg)
36
+ end
37
+ @bin_length = 0
38
+ @address = addr if addr
39
+ end
40
+
41
+ def next_addr=(a) @next_addr = a end
42
+ def next_addr
43
+ (@next_addr ||= nil) || (address + @bin_length) if address
44
+ end
45
+
46
+ def show
47
+ if block
48
+ bin = @block.edata.data[@block.edata_ptr+@block_offset, @bin_length].unpack('C*').map { |c| '%02x' % c }.join
49
+ if @bin_length > 12
50
+ bin = bin[0, 20] + "..<+#{@bin_length-10}>"
51
+ end
52
+ " #{@instruction.to_s.ljust(44)} ; @#{Expression[address]} #{bin} #{@comment.sort[0,6].join(' ') if comment}"
53
+ else
54
+ "#{@instruction}#{' ; ' + @comment.join(' ') if comment}"
55
+ end
56
+ end
57
+
58
+ include Renderable
59
+ def render
60
+ ret = []
61
+ ret << Expression[address] << ' ' if address
62
+ ret << @instruction
63
+ ret << ' ; ' << @comment if comment
64
+ ret
65
+ end
66
+
67
+ def add_comment(c)
68
+ @comment ||= []
69
+ @comment |= [c]
70
+ end
71
+
72
+ # returns a copy of the DecInstr, with duplicated #instruction ("deep_copy")
73
+ def dup
74
+ new = super()
75
+ new.instruction = @instruction.dup
76
+ new
77
+ end
78
+ end
79
+
80
+ # holds information on a backtracked expression near begin and end of instruction blocks (#backtracked_for)
81
+ class BacktraceTrace
82
+ # address of the instruction in the block from which rebacktrace should start (use with from_subfuncret bool)
83
+ # address is nil if the backtrace is from block start
84
+ # exclude_instr is a bool saying if the backtrace should start at address or at the preceding instruction
85
+ # these are optional: if absent, expr is to be rebacktracked when a new codepath arrives at the beginning of the block
86
+ attr_accessor :address, :from_subfuncret, :exclude_instr
87
+ # address of the instruction that initiated the backtrace
88
+ attr_accessor :origin
89
+ # the Expression to backtrace at this point
90
+ attr_accessor :expr
91
+ # the original backtracked Expression
92
+ attr_accessor :orig_expr
93
+ # length of r/w xref (in bytes)
94
+ attr_accessor :len
95
+ # :r/:w/:x
96
+ attr_accessor :type
97
+ # bool: true if this maps to a :x that should not have a from when resolved
98
+ attr_accessor :detached
99
+ # maxdepth at the point of the object creation
100
+ attr_accessor :maxdepth
101
+
102
+ def initialize(expr, origin, orig_expr, type, len=nil, maxdepth=nil)
103
+ @expr, @origin, @orig_expr, @type = expr, origin, orig_expr, type
104
+ @len = len if len
105
+ @maxdepth = maxdepth if maxdepth
106
+ end
107
+
108
+ def hash ; [origin, expr].hash ; end
109
+ def eql?(o)
110
+ o.class == self.class and
111
+ [ address, from_subfuncret, exclude_instr, origin, orig_expr, len, type, detached] ==
112
+ [o.address, o.from_subfuncret, o.exclude_instr, o.origin, o.orig_expr, o.len, o.type, o.detached]
113
+ end
114
+ alias == eql?
115
+ end
116
+
117
+ # a cross-reference, tracks read/write/execute memory accesses by decoded instructions
118
+ class Xref
119
+ # :r/:w/:x
120
+ attr_accessor :type
121
+ # length of r/w (in bytes)
122
+ attr_accessor :len
123
+ # address of the instruction responsible of the xref
124
+ attr_accessor :origin
125
+ # XXX list of instructions intervening in the backtrace ?
126
+
127
+ def initialize(type, origin, len=nil)
128
+ @origin, @type = origin, type
129
+ @len = len if len
130
+ end
131
+
132
+ def hash ; @origin.hash ; end
133
+ def eql?(o) o.class == self.class and [type, len, origin] == [o.type, o.len, o.origin] end
134
+ alias == eql?
135
+ end
136
+
137
+ # holds a list of contiguous decoded instructions, forming an uninterrupted block (except for eg CPU exceptions)
138
+ # most attributes are either a value or an array of values, use the associated iterator.
139
+ class InstructionBlock
140
+ # address of the first instruction
141
+ attr_accessor :address
142
+ # pointer to raw data
143
+ attr_accessor :edata, :edata_ptr
144
+ # list of DecodedInstructions
145
+ attr_accessor :list
146
+ # address of instructions giving control directly to us
147
+ # includes addr of normal instruction when call flow continues to us past the end of the preceding block
148
+ # does not include addresses of subfunction return instructions
149
+ # may be nil or an array
150
+ attr_accessor :from_normal
151
+ # address of instructions called/jumped to
152
+ attr_accessor :to_normal
153
+ # address of an instruction that calls a subfunction which returns to us
154
+ attr_accessor :from_subfuncret
155
+ # address of instruction executed after a called subfunction returns
156
+ attr_accessor :to_subfuncret
157
+ # address of instructions executed indirectly through us (callback in a subfunction, SEH...)
158
+ # XXX from_indirect is not populated for now
159
+ attr_accessor :from_indirect, :to_indirect
160
+ # array of BacktraceTrace
161
+ # when a new code path comes to us, it should be backtracked for the values of :r/:w/:x using btt with no address
162
+ # for internal use only (block splitting): btt with an address
163
+ attr_accessor :backtracked_for
164
+
165
+ # create a new InstructionBlock based at address
166
+ # also accepts a DecodedInstruction or an Array of them to initialize from
167
+ def initialize(arg0, edata=nil, edata_ptr=nil)
168
+ @list = []
169
+ case arg0
170
+ when DecodedInstruction
171
+ @address = arg0.address
172
+ add_di(arg0)
173
+ when Array
174
+ @address = arg0.first.address if not arg0.empty?
175
+ arg0.each { |di| add_di(di) }
176
+ else
177
+ @address = arg0
178
+ end
179
+ edata_ptr ||= edata ? edata.ptr : 0
180
+ @edata, @edata_ptr = edata, edata_ptr
181
+ @backtracked_for = []
182
+ end
183
+
184
+ def bin_length
185
+ (di = @list.last) ? di.block_offset + di.bin_length : 0
186
+ end
187
+
188
+ # splits the current block into a new one with all di from address addr to end
189
+ # caller is responsible for rebacktracing new.bt_for to regenerate correct old.btt/new.btt
190
+ def split(addr)
191
+ raise "invalid split @#{Expression[addr]}" if not idx = @list.index(@list.find { |di| di.address == addr }) or idx == 0
192
+ off = @list[idx].block_offset
193
+ new_b = self.class.new(addr, @edata, @edata_ptr + off)
194
+ new_b.add_di @list.delete_at(idx) while @list[idx]
195
+ new_b.to_normal, @to_normal = to_normal, new_b.to_normal
196
+ new_b.to_subfuncret, @to_subfuncret = to_subfuncret, new_b.to_subfuncret
197
+ new_b.add_from @list.last.address
198
+ add_to new_b.address
199
+ @backtracked_for.delete_if { |btt|
200
+ if btt.address and new_b.list.find { |di| di.address == btt.address }
201
+ new_b.backtracked_for << btt
202
+ true
203
+ end
204
+ }
205
+ new_b
206
+ end
207
+
208
+ # adds a decodedinstruction to the block list, updates di.block and di.block_offset
209
+ def add_di(di)
210
+ di.block = self
211
+ di.block_offset = bin_length
212
+ di.address ||= @address + di.block_offset
213
+ @list << di
214
+ end
215
+ end
216
+
217
+ # a factorized subfunction as seen by the disassembler
218
+ class DecodedFunction
219
+ # when backtracking an instruction that calls us, use this binding and then the instruction's
220
+ # the binding is lazily filled up for non-external functions, register by register, when
221
+ # a backtraced expression depends on it
222
+ attr_accessor :backtrace_binding
223
+ # same as InstructionBlock#backtracked_for
224
+ # includes the expression responsible of the function return (eg [esp] on ia32)
225
+ attr_accessor :backtracked_for
226
+ # addresses of instruction causing the function to return
227
+ attr_accessor :return_address
228
+ # a lambda called for dynamic backtrace_binding generation
229
+ attr_accessor :btbind_callback
230
+ # a lambda called for dynamic backtracked_for
231
+ attr_accessor :btfor_callback
232
+ # bool, if false the function is actually being disassembled
233
+ attr_accessor :finalized
234
+ # bool, if true the function does not return (eg exit() or ExitProcess())
235
+ attr_accessor :noreturn
236
+
237
+ # if btbind_callback is defined, calls it with args [dasm, binding, funcaddr, calladdr, expr, origin, maxdepth]
238
+ # else update lazily the binding from expr.externals, and return backtrace_binding
239
+ def get_backtrace_binding(dasm, funcaddr, calladdr, expr, origin, maxdepth)
240
+ if btbind_callback
241
+ @btbind_callback[dasm, @backtrace_binding, funcaddr, calladdr, expr, origin, maxdepth]
242
+ elsif backtrace_binding and dest = @backtrace_binding[:thunk] and target = dasm.function[dest]
243
+ target.get_backtrace_binding(dasm, funcaddr, calladdr, expr, origin, maxdepth)
244
+ else
245
+ unk_regs = expr.externals.grep(Symbol).uniq - @backtrace_binding.keys - [:unknown]
246
+ dasm.cpu.backtrace_update_function_binding(dasm, funcaddr, self, return_address, *unk_regs) if not unk_regs.empty?
247
+ @backtrace_binding
248
+ end
249
+ end
250
+
251
+ # if btfor_callback is defined, calls it with args [dasm, bt_for, funcaddr, calladdr]
252
+ # else return backtracked_for
253
+ def get_backtracked_for(dasm, funcaddr, calladdr)
254
+ if btfor_callback
255
+ @btfor_callback[dasm, @backtracked_for, funcaddr, calladdr]
256
+ elsif backtrace_binding and dest = @backtrace_binding[:thunk] and target = dasm.function[dest]
257
+ target.get_backtracked_for(dasm, funcaddr, calladdr)
258
+ else
259
+ @backtracked_for
260
+ end
261
+ end
262
+
263
+ def initialize
264
+ @backtracked_for = []
265
+ @backtrace_binding = {}
266
+ end
267
+ end
268
+
269
+ class CPU
270
+ # return the thing to backtrace to find +value+ before the execution of this instruction
271
+ # eg backtrace_emu('inc eax', Expression[:eax]) => Expression[:eax + 1]
272
+ # (the value of :eax after 'inc eax' is the value of :eax before plus 1)
273
+ # may return Expression::Unknown
274
+ def backtrace_emu(di, value)
275
+ Expression[Expression[value].bind(di.backtrace_binding ||= get_backtrace_binding(di)).reduce]
276
+ end
277
+
278
+ # returns a list of Expressions/Integer to backtrace to find an execution target
279
+ def get_xrefs_x(dasm, di)
280
+ end
281
+
282
+ # returns a list of [type, address, len]
283
+ def get_xrefs_rw(dasm, di)
284
+ get_xrefs_r(dasm, di).map { |addr, len| [:r, addr, len] } + get_xrefs_w(dasm, di).map { |addr, len| [:w, addr, len] }
285
+ end
286
+
287
+ # returns a list [addr, len]
288
+ def get_xrefs_r(dasm, di)
289
+ b = di.backtrace_binding ||= get_backtrace_binding(di)
290
+ r = b.values
291
+ x = get_xrefs_x(dasm, di)
292
+ r |= x if x
293
+ (r.grep(Indirection) + r.grep(Expression).map { |e| e.expr_indirections }.flatten).map { |e| [e.target, e.len] }
294
+ end
295
+
296
+ # returns a list [addr, len]
297
+ def get_xrefs_w(dasm, di)
298
+ b = di.backtrace_binding ||= get_backtrace_binding(di)
299
+ w = b.keys
300
+ (w.grep(Indirection) + w.grep(Expression).map { |e| e.expr_indirections }.flatten).map { |e| [e.target, e.len] }
301
+ end
302
+
303
+ # checks if the expression corresponds to a function return value with the instruction
304
+ # (eg di == 'call something' and expr == [esp])
305
+ def backtrace_is_function_return(expr, di=nil)
306
+ end
307
+
308
+ # updates f.backtrace_binding when a new return address has been found
309
+ # TODO update also when anything changes inside the function (new loop found etc) - use backtracked_for ?
310
+ def backtrace_update_function_binding(dasm, faddr, f, retaddrlist, *wantregs)
311
+ end
312
+
313
+ # returns if the expression is an address on the stack
314
+ # (to avoid trying to backtrace its absolute address until we found function boundaries)
315
+ def backtrace_is_stack_address(expr)
316
+ end
317
+
318
+ # updates the instruction arguments: replace an expression with another (eg when a label is renamed)
319
+ def replace_instr_arg_immediate(i, old, new)
320
+ i.args.map! { |a|
321
+ case a
322
+ when Expression; Expression[a.bind(old => new).reduce]
323
+ else a
324
+ end
325
+ }
326
+ end
327
+
328
+ # a callback called whenever a backtrace is successful
329
+ # di is the decodedinstruction at the backtrace's origin
330
+ def backtrace_found_result(dasm, di, expr, type, len)
331
+ end
332
+ end
333
+
334
+ class ExeFormat
335
+ # returns a string containing asm-style section declaration
336
+ def dump_section_header(addr, edata)
337
+ "\n// section at #{Expression[addr]}"
338
+ end
339
+
340
+ # returns an array of expressions that may be executed by this instruction
341
+ def get_xrefs_x(dasm, di) @cpu.get_xrefs_x(dasm, di) end
342
+
343
+ # returns an array of [type, expression, length] that may be accessed by this instruction (type is :r/:w, len is in bytes)
344
+ def get_xrefs_rw(dasm, di) @cpu.get_xrefs_rw(dasm, di) end
345
+ end
346
+
347
+ # a disassembler class
348
+ # holds a copy of a program sections, a list of decoded instructions, xrefs
349
+ # is able to backtrace an expression from an address following the call flow (backwards)
350
+ class Disassembler
351
+ attr_accessor :program, :cpu
352
+ # binding (jointure of @sections.values.exports)
353
+ attr_accessor :prog_binding
354
+ # hash addr => edata
355
+ attr_accessor :sections
356
+ # hash addr => DecodedInstruction
357
+ attr_accessor :decoded
358
+ # hash addr => DecodedFunction (includes 'imported' functions)
359
+ attr_accessor :function
360
+ # hash addr => (array of) xrefs - access with +add_xref+/+each_xref+
361
+ attr_accessor :xrefs
362
+ # bool, true to check write xrefs on each instr disasm (default true)
363
+ attr_accessor :check_smc
364
+ # list of [addr to disassemble, (optional)who jumped to it, (optional)got there by a subfunction return]
365
+ attr_accessor :addrs_todo
366
+ # hash address => binding
367
+ attr_accessor :address_binding
368
+ # number of blocks to backtrace before aborting if no result is found (defaults to class.backtrace_maxblocks, 50 by default)
369
+ attr_accessor :backtrace_maxblocks
370
+ # maximum backtrace length for :r/:w, defaults to backtrace_maxblocks
371
+ attr_accessor :backtrace_maxblocks_data
372
+ # max bt length for backtrace_fast blocks, default=0
373
+ attr_accessor :backtrace_maxblocks_fast
374
+ # max complexity for an Expr during backtrace before abort
375
+ attr_accessor :backtrace_maxcomplexity, :backtrace_maxcomplexity_data
376
+ # maximum number of instructions inside a basic block, split past this limit
377
+ attr_accessor :disassemble_maxblocklength
378
+ # a cparser that parsed some C header files, prototypes are converted to DecodedFunction when jumped to
379
+ attr_accessor :c_parser
380
+ # hash address => array of strings
381
+ # default dasm dump will only show comments at beginning of code blocks
382
+ attr_accessor :comment
383
+ # bool, set to true (default) if functions with undetermined binding should be assumed to return with ABI-conforming binding (conserve frame ptr)
384
+ attr_accessor :funcs_stdabi
385
+ # callback called whenever an instruction will backtrace :x (before the backtrace is started)
386
+ # arguments: |addr of origin, array of exprs to backtrace|
387
+ # must return the replacement array, nil == []
388
+ attr_accessor :callback_newaddr
389
+ # called whenever an instruction is decoded and added to an instruction block. arg: the new decoded instruction
390
+ # returns the new di to consider (nil to end block)
391
+ attr_accessor :callback_newinstr
392
+ # called whenever the disassembler tries to disassemble an addresse that has been written to. arg: the address
393
+ attr_accessor :callback_selfmodifying
394
+ # called when the disassembler stops (stopexec/undecodable instruction)
395
+ attr_accessor :callback_stopaddr
396
+ # callback called before each backtrace that may take some time
397
+ attr_accessor :callback_prebacktrace
398
+ # callback called once all addresses have been disassembled
399
+ attr_accessor :callback_finished
400
+ # pointer to the gui widget we're displayed in
401
+ attr_accessor :gui
402
+
403
+ @@backtrace_maxblocks = 50
404
+
405
+ # creates a new disassembler
406
+ def initialize(program, cpu=program.cpu)
407
+ reinitialize(program, cpu)
408
+ end
409
+
410
+ # resets the program
411
+ def reinitialize(program, cpu=program.cpu)
412
+ @program = program
413
+ @cpu = cpu
414
+ @sections = {}
415
+ @decoded = {}
416
+ @xrefs = {}
417
+ @function = {}
418
+ @check_smc = true
419
+ @prog_binding = {}
420
+ @old_prog_binding = {} # same as prog_binding, but keep old var names
421
+ @addrs_todo = []
422
+ @addrs_done = []
423
+ @address_binding = {}
424
+ @backtrace_maxblocks = @@backtrace_maxblocks
425
+ @backtrace_maxblocks_fast = 0
426
+ @backtrace_maxcomplexity = 40
427
+ @backtrace_maxcomplexity_data = 5
428
+ @disassemble_maxblocklength = 100
429
+ @comment = {}
430
+ @funcs_stdabi = true
431
+ end
432
+
433
+ # adds a section, updates prog_binding
434
+ # base addr is an Integer or a String (label name for offset 0)
435
+ def add_section(encoded, base)
436
+ encoded, base = base, encoded if base.kind_of? EncodedData
437
+ case base
438
+ when ::Integer
439
+ when ::String
440
+ raise "invalid section base #{base.inspect} - not at section start" if encoded.export[base] and encoded.export[base] != 0
441
+ raise "invalid section base #{base.inspect} - already seen at #{@prog_binding[base]}" if @prog_binding[base] and @prog_binding[base] != Expression[base]
442
+ encoded.add_export base, 0
443
+ else raise "invalid section base #{base.inspect} - expected string or integer"
444
+ end
445
+
446
+ @sections[base] = encoded
447
+ @label_alias_cache = nil
448
+ encoded.binding(base).each { |k, v|
449
+ @old_prog_binding[k] = @prog_binding[k] = v.reduce
450
+ }
451
+
452
+ # update section_edata.reloc
453
+ # label -> list of relocs that refers to it
454
+ @inv_section_reloc = {}
455
+ @sections.each { |b, e|
456
+ e.reloc.each { |o, r|
457
+ r.target.externals.grep(::String).each { |ext| (@inv_section_reloc[ext] ||= []) << [b, e, o, r] }
458
+ }
459
+ }
460
+
461
+ self
462
+ end
463
+
464
+ def add_xref(addr, x)
465
+ case @xrefs[addr]
466
+ when nil; @xrefs[addr] = x
467
+ when x
468
+ when ::Array; @xrefs[addr] |= [x]
469
+ else @xrefs[addr] = [@xrefs[addr], x]
470
+ end
471
+ end
472
+
473
+ # yields each xref to a given address, optionnaly restricted to a type
474
+ def each_xref(addr, type=nil)
475
+ addr = normalize addr
476
+
477
+ x = @xrefs[addr]
478
+ x = case x
479
+ when nil; []
480
+ when ::Array; x.dup
481
+ else [x]
482
+ end
483
+
484
+ x.delete_if { |x_| x_.type != type } if type
485
+
486
+ # add pseudo-xrefs for exe relocs
487
+ if (not type or type == :reloc) and l = get_label_at(addr) and a = @inv_section_reloc[l]
488
+ a.each { |b, e, o, r|
489
+ addr = Expression[b]+o
490
+ # ignore relocs embedded in an already-listed instr
491
+ x << Xref.new(:reloc, addr) if not x.find { |x_|
492
+ next if not x_.origin or not di_at(x_.origin)
493
+ (addr - x_.origin rescue 50) < @decoded[x_.origin].bin_length
494
+ }
495
+ }
496
+ end
497
+
498
+ x.each { |x_| yield x_ }
499
+ end
500
+
501
+ # parses a C header file, from which function prototypes will be converted to DecodedFunction when found in the code flow
502
+ def parse_c_file(file)
503
+ parse_c File.read(file), file
504
+ end
505
+
506
+ # parses a C string for function prototypes
507
+ def parse_c(str, filename=nil, lineno=1)
508
+ @c_parser ||= @cpu.new_cparser
509
+ @c_parser.lexer.define_weak('__METASM__DECODE__')
510
+ @c_parser.parse(str, filename, lineno)
511
+ end
512
+
513
+ # returns the canonical form of addr (absolute address integer or label of start of section + section offset)
514
+ def normalize(addr)
515
+ return addr if not addr or addr == :default
516
+ addr = Expression[addr].bind(@old_prog_binding).reduce if not addr.kind_of? Integer
517
+ addr %= 1 << [@cpu.size, 32].max if @cpu and addr.kind_of? Integer
518
+ addr
519
+ end
520
+
521
+ # returns [edata, edata_base] or nil
522
+ # edata.ptr points to addr
523
+ def get_section_at(addr, memcheck=true)
524
+ case addr = normalize(addr)
525
+ when ::Integer
526
+ if s = @sections.find { |b, e| b.kind_of? ::Integer and addr >= b and addr < b + e.length } ||
527
+ @sections.find { |b, e| b.kind_of? ::Integer and addr == b + e.length } # end label
528
+ s[1].ptr = addr - s[0]
529
+ return if memcheck and s[1].data.respond_to?(:page_invalid?) and s[1].data.page_invalid?(s[1].ptr)
530
+ [s[1], s[0]]
531
+ end
532
+ when Expression
533
+ if addr.op == :+ and addr.rexpr.kind_of? ::Integer and addr.rexpr >= 0 and addr.lexpr.kind_of? ::String and e = @sections[addr.lexpr]
534
+ e.ptr = addr.rexpr
535
+ return if memcheck and e.data.respond_to?(:page_invalid?) and e.data.page_invalid?(e.ptr)
536
+ [e, Expression[addr.lexpr]]
537
+ elsif addr.op == :+ and addr.rexpr.kind_of? ::String and not addr.lexpr and e = @sections[addr.rexpr]
538
+ e.ptr = 0
539
+ return if memcheck and e.data.respond_to?(:page_invalid?) and e.data.page_invalid?(e.ptr)
540
+ [e, addr.rexpr]
541
+ end
542
+ end
543
+ end
544
+
545
+ # returns the label at the specified address, creates it if needed using "prefix_addr"
546
+ # renames the existing label if it is in the form rewritepfx_addr
547
+ # returns nil if the address is not known and is not a string
548
+ def auto_label_at(addr, base='xref', *rewritepfx)
549
+ addr = Expression[addr].reduce
550
+ addrstr = "#{base}_#{Expression[addr]}"
551
+ return if addrstr !~ /^\w+$/
552
+ e, b = get_section_at(addr)
553
+ if not e
554
+ l = Expression[addr].reduce_rec if Expression[addr].reduce_rec.kind_of? ::String
555
+ l ||= addrstr if addr.kind_of? Expression and addr.externals.grep(::Symbol).empty?
556
+ elsif not l = e.inv_export[e.ptr]
557
+ l = @program.new_label(addrstr)
558
+ e.add_export l, e.ptr
559
+ @label_alias_cache = nil
560
+ @old_prog_binding[l] = @prog_binding[l] = b + e.ptr
561
+ elsif rewritepfx.find { |p| base != p and addrstr.sub(base, p) == l }
562
+ newl = addrstr
563
+ newl = @program.new_label(newl) unless @old_prog_binding[newl] and @old_prog_binding[newl] == @prog_binding[l] # avoid _uuid when a -> b -> a
564
+ rename_label l, newl
565
+ l = newl
566
+ end
567
+ l
568
+ end
569
+
570
+ # returns a hash associating addr => list of labels at this addr
571
+ def label_alias
572
+ if not @label_alias_cache
573
+ @label_alias_cache = {}
574
+ @prog_binding.each { |k, v|
575
+ (@label_alias_cache[v] ||= []) << k
576
+ }
577
+ end
578
+ @label_alias_cache
579
+ end
580
+
581
+ # decodes instructions from an entrypoint, (tries to) follows code flow
582
+ def disassemble(*entrypoints)
583
+ nil while disassemble_mainiter(entrypoints)
584
+ self
585
+ end
586
+
587
+ attr_accessor :entrypoints
588
+
589
+ # do one operation relevant to disassembling
590
+ # returns nil once done
591
+ def disassemble_mainiter(entrypoints=[])
592
+ @entrypoints ||= []
593
+ if @addrs_todo.empty? and entrypoints.empty?
594
+ post_disassemble
595
+ puts 'disassembly finished' if $VERBOSE
596
+ @callback_finished[] if callback_finished
597
+ return false
598
+ elsif @addrs_todo.empty?
599
+ ep = entrypoints.shift
600
+ l = auto_label_at(normalize(ep), 'entrypoint')
601
+ puts "start disassemble from #{l} (#{entrypoints.length})" if $VERBOSE and not entrypoints.empty?
602
+ @entrypoints << l
603
+ @addrs_todo << [ep]
604
+ else
605
+ disassemble_step
606
+ end
607
+ true
608
+ end
609
+
610
+ def post_disassemble
611
+ @decoded.each_value { |di|
612
+ next if not di.kind_of? DecodedInstruction
613
+ next if not di.opcode or not di.opcode.props[:saveip]
614
+ if not di.block.to_subfuncret
615
+ di.add_comment 'noreturn'
616
+ # there is no need to re-loop on all :saveip as check_noret is transitive
617
+ di.block.each_to_normal { |fa| check_noreturn_function(fa) }
618
+ end
619
+ }
620
+ @function.each { |addr, f|
621
+ next if not @decoded[addr]
622
+ if not f.finalized
623
+ f.finalized = true
624
+ puts " finalize subfunc #{Expression[addr]}" if debug_backtrace
625
+ @cpu.backtrace_update_function_binding(self, addr, f, f.return_address)
626
+ if not f.return_address
627
+ detect_function_thunk(addr)
628
+ end
629
+ end
630
+ @comment[addr] ||= []
631
+ bd = f.backtrace_binding.reject { |k, v| Expression[k] == Expression[v] or Expression[v] == Expression::Unknown }
632
+ unk = f.backtrace_binding.map { |k, v| k if v == Expression::Unknown }.compact
633
+ bd[unk.map { |u| Expression[u].to_s }.sort.join(',')] = Expression::Unknown if not unk.empty?
634
+ @comment[addr] |= ["function binding: " + bd.map { |k, v| "#{k} -> #{v}" }.sort.join(', ')]
635
+ @comment[addr] |= ["function ends at " + f.return_address.map { |ra| Expression[ra] }.join(', ')] if f.return_address
636
+ }
637
+ end
638
+
639
+ # disassembles one block from addrs_todo
640
+ # adds next addresses to handle to addrs_todo
641
+ # if @function[:default] exists, jumps to unknows locations are interpreted as to @function[:default]
642
+ def disassemble_step
643
+ return if not todo = @addrs_todo.pop or @addrs_done.include? todo
644
+ @addrs_done << todo if todo[1]
645
+
646
+ # from_sfret is true if from is the address of a function call that returns to addr
647
+ addr, from, from_subfuncret = todo
648
+
649
+ return if from == Expression::Unknown
650
+
651
+ puts "disassemble_step #{Expression[addr]} #{Expression[from] if from} #{from_subfuncret} (/#{@addrs_todo.length})" if $DEBUG
652
+
653
+ addr = normalize(addr)
654
+
655
+ if from and from_subfuncret and di_at(from)
656
+ @decoded[from].block.each_to_normal { |subfunc|
657
+ subfunc = normalize(subfunc)
658
+ next if not f = @function[subfunc] or f.finalized
659
+ f.finalized = true
660
+ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
661
+ @cpu.backtrace_update_function_binding(self, subfunc, f, f.return_address)
662
+ if not f.return_address
663
+ detect_function_thunk(subfunc)
664
+ end
665
+ }
666
+ end
667
+
668
+ if di = @decoded[addr]
669
+ if di.kind_of? DecodedInstruction
670
+ split_block(di.block, di.address) if not di.block_head? # this updates di.block
671
+ di.block.add_from(from, from_subfuncret ? :subfuncret : :normal) if from and from != :default
672
+ bf = di.block
673
+ elsif di == true
674
+ bf = @function[addr]
675
+ end
676
+ elsif bf = @function[addr]
677
+ detect_function_thunk_noreturn(from) if bf.noreturn
678
+ elsif s = get_section_at(addr)
679
+ block = InstructionBlock.new(normalize(addr), s[0])
680
+ block.add_from(from, from_subfuncret ? :subfuncret : :normal) if from and from != :default
681
+ disassemble_block(block)
682
+ elsif from and c_parser and name = Expression[addr].reduce_rec and name.kind_of? ::String and
683
+ s = c_parser.toplevel.symbol[name] and s.type.untypedef.kind_of? C::Function
684
+ bf = @function[addr] = @cpu.decode_c_function_prototype(@c_parser, s)
685
+ detect_function_thunk_noreturn(from) if bf.noreturn
686
+ elsif from
687
+ if bf = @function[:default]
688
+ puts "using default function for #{Expression[addr]} from #{Expression[from]}" if $DEBUG
689
+ if name = Expression[addr].reduce_rec and name.kind_of? ::String
690
+ @function[addr] = @function[:default].dup
691
+ else
692
+ addr = :default
693
+ end
694
+ if @decoded[from]
695
+ @decoded[from].block.add_to addr
696
+ end
697
+ else
698
+ puts "not disassembling unknown address #{Expression[addr]} from #{Expression[from]}" if $DEBUG
699
+ end
700
+ if from != :default
701
+ add_xref(addr, Xref.new(:x, from))
702
+ add_xref(Expression::Unknown, Xref.new(:x, from))
703
+ end
704
+ else
705
+ puts "not disassembling unknown address #{Expression[addr]}" if $VERBOSE
706
+ end
707
+
708
+ if bf and from and from != :default
709
+ if bf.kind_of? DecodedFunction
710
+ bff = bf.get_backtracked_for(self, addr, from)
711
+ else
712
+ bff = bf.backtracked_for
713
+ end
714
+ end
715
+ bff.each { |btt|
716
+ next if btt.address
717
+ if @decoded[from].kind_of? DecodedInstruction and @decoded[from].opcode.props[:saveip] and not from_subfuncret and not @function[addr]
718
+ backtrace_check_found(btt.expr, @decoded[addr], btt.origin, btt.type, btt.len, btt.maxdepth, btt.detached)
719
+ end
720
+ next if backtrace_check_funcret(btt, addr, from)
721
+ backtrace(btt.expr, from,
722
+ :include_start => true, :from_subfuncret => from_subfuncret,
723
+ :origin => btt.origin, :orig_expr => btt.orig_expr, :type => btt.type,
724
+ :len => btt.len, :detached => btt.detached, :maxdepth => btt.maxdepth)
725
+ } if bff
726
+ end
727
+
728
+ # splits an InstructionBlock, updates the blocks backtracked_for
729
+ def split_block(block, address=nil)
730
+ if not address # invoked as split_block(0x401012)
731
+ return if not @decoded[block].kind_of? DecodedInstruction
732
+ block, address = @decoded[block].block, block
733
+ end
734
+ return block if address == block.address
735
+ new_b = block.split address
736
+ new_b.backtracked_for.dup.each { |btt|
737
+ backtrace(btt.expr, btt.address,
738
+ :only_upto => block.list.last.address,
739
+ :include_start => !btt.exclude_instr, :from_subfuncret => btt.from_subfuncret,
740
+ :origin => btt.origin, :orig_expr => btt.orig_expr, :type => btt.type, :len => btt.len,
741
+ :detached => btt.detached, :maxdepth => btt.maxdepth)
742
+ }
743
+ new_b
744
+ end
745
+
746
+ # disassembles a new instruction block at block.address (must be normalized)
747
+ def disassemble_block(block)
748
+ raise if not block.list.empty?
749
+ di_addr = block.address
750
+ delay_slot = nil
751
+ di = nil
752
+
753
+ # try not to run for too long
754
+ # loop usage: break if the block continues to the following instruction, else return
755
+ @disassemble_maxblocklength.times {
756
+ # check collision into a known block
757
+ break if @decoded[di_addr]
758
+
759
+ # check self-modifying code
760
+ if @check_smc
761
+ #(-7...di.bin_length).each { |off| # uncomment to check for unaligned rewrites
762
+ waddr = di_addr #di_addr + off
763
+ each_xref(waddr, :w) { |x|
764
+ #next if off + x.len < 0
765
+ puts "W: disasm: self-modifying code at #{Expression[waddr]}" if $VERBOSE
766
+ @comment[di_addr] ||= []
767
+ @comment[di_addr] |= ["overwritten by #{@decoded[x.origin]}"]
768
+ @callback_selfmodifying[di_addr] if callback_selfmodifying
769
+ return
770
+ }
771
+ #}
772
+ end
773
+
774
+ # decode instruction
775
+ block.edata.ptr = di_addr - block.address + block.edata_ptr
776
+ if not di = @cpu.decode_instruction(block.edata, di_addr)
777
+ ed = block.edata
778
+ puts "#{ed.ptr >= ed.length ? "end of section reached" : "unknown instruction #{ed.data[di_addr-block.address+block.edata_ptr, 4].to_s.unpack('H*')}"} at #{Expression[di_addr]}" if $VERBOSE
779
+ return
780
+ end
781
+
782
+ @decoded[di_addr] = di
783
+ block.add_di di
784
+ puts di if $DEBUG
785
+
786
+ di = @callback_newinstr[di] if callback_newinstr
787
+ return if not di
788
+ block = di.block
789
+
790
+ di_addr = di.next_addr
791
+
792
+ backtrace_xrefs_di_rw(di)
793
+
794
+ if not di_addr or di.opcode.props[:stopexec] or not @program.get_xrefs_x(self, di).empty?
795
+ # do not backtrace until delay slot is finished (eg MIPS: di is a
796
+ # ret and the delay slot holds stack fixup needed to calc func_binding)
797
+ # XXX if the delay slot is also xref_x or :stopexec it is ignored
798
+ delay_slot ||= [di, @cpu.delay_slot(di)]
799
+ end
800
+
801
+ if delay_slot
802
+ di, delay = delay_slot
803
+ if delay == 0 or not di_addr
804
+ backtrace_xrefs_di_x(di)
805
+ if di.opcode.props[:stopexec] or not di_addr; return
806
+ else break
807
+ end
808
+ end
809
+ delay_slot[1] = delay - 1
810
+ end
811
+ }
812
+
813
+ ar = [di_addr]
814
+ ar = @callback_newaddr[block.list.last.address, ar] || ar if callback_newaddr
815
+ ar.each { |di_addr_| backtrace(di_addr_, di.address, :origin => di.address, :type => :x) }
816
+
817
+ block
818
+ end
819
+
820
+ # retrieve the list of execution crossrefs due to the decodedinstruction
821
+ # returns a list of symbolic expressions
822
+ def get_xrefs_x(di)
823
+ @program.get_xrefs_x(self, di)
824
+ end
825
+
826
+ # retrieve the list of data r/w crossrefs due to the decodedinstruction
827
+ # returns a list of [type, symbolic expression, length]
828
+ def get_xrefs_rw(di)
829
+ @program.get_xrefs_rw(self, di)
830
+ end
831
+
832
+ # disassembles_fast from a list of entrypoints, also dasm subfunctions
833
+ def disassemble_fast_deep(*entrypoints)
834
+ @entrypoints ||= []
835
+ @entrypoints |= entrypoints
836
+
837
+ entrypoints.each { |ep| do_disassemble_fast_deep(normalize(ep)) }
838
+ end
839
+
840
+ def do_disassemble_fast_deep(ep)
841
+ disassemble_fast(ep) { |fa, di|
842
+ fa = normalize(fa)
843
+ do_disassemble_fast_deep(fa)
844
+ if di and ndi = di_at(fa)
845
+ ndi.block.add_from_normal(di.address)
846
+ end
847
+ }
848
+ end
849
+
850
+ # disassembles fast from a list of entrypoints
851
+ # see disassemble_fast_step
852
+ def disassemble_fast(entrypoint, maxdepth=-1, &b)
853
+ ep = [entrypoint]
854
+ until ep.empty?
855
+ disassemble_fast_step(ep, &b)
856
+ maxdepth -= 1
857
+ ep.delete_if { |a| not @decoded[normalize(a[0])] } if maxdepth == 0
858
+ end
859
+ check_noreturn_function(entrypoint)
860
+ end
861
+
862
+ # disassembles one block from the ary, see disassemble_fast_block
863
+ def disassemble_fast_step(todo, &b)
864
+ return if not x = todo.pop
865
+ addr, from, from_subfuncret = x
866
+
867
+ addr = normalize(addr)
868
+
869
+ if di = @decoded[addr]
870
+ if di.kind_of? DecodedInstruction
871
+ split_block(di.block, di.address) if not di.block_head?
872
+ di.block.add_from(from, from_subfuncret ? :subfuncret : :normal) if from and from != :default
873
+ end
874
+ elsif s = get_section_at(addr)
875
+ block = InstructionBlock.new(normalize(addr), s[0])
876
+ block.add_from(from, from_subfuncret ? :subfuncret : :normal) if from and from != :default
877
+ todo.concat disassemble_fast_block(block, &b)
878
+ elsif name = Expression[addr].reduce_rec and name.kind_of? ::String and not @function[addr]
879
+ if c_parser and s = c_parser.toplevel.symbol[name] and s.type.untypedef.kind_of? C::Function
880
+ @function[addr] = @cpu.decode_c_function_prototype(@c_parser, s)
881
+ detect_function_thunk_noreturn(from) if @function[addr].noreturn
882
+ elsif @function[:default]
883
+ @function[addr] = @function[:default].dup
884
+ end
885
+ end
886
+
887
+ disassemble_fast_checkfunc(addr)
888
+ end
889
+
890
+ # check if an addr has an xref :x from a :saveip, if so mark as Function
891
+ def disassemble_fast_checkfunc(addr)
892
+ if @decoded[addr].kind_of? DecodedInstruction and not @function[addr]
893
+ func = false
894
+ each_xref(addr, :x) { |x_|
895
+ func = true if odi = di_at(x_.origin) and odi.opcode.props[:saveip]
896
+ }
897
+ if func
898
+ auto_label_at(addr, 'sub', 'loc', 'xref')
899
+ # XXX use default_btbind_callback ?
900
+ @function[addr] = DecodedFunction.new
901
+ @function[addr].finalized = true
902
+ detect_function_thunk(addr)
903
+ puts "found new function #{get_label_at(addr)} at #{Expression[addr]}" if $VERBOSE
904
+ end
905
+ end
906
+ end
907
+
908
+ # disassembles fast a new instruction block at block.address (must be normalized)
909
+ # does not recurse into subfunctions
910
+ # assumes all :saveip returns, except those pointing to a subfunc with noreturn
911
+ # yields subfunction addresses (targets of :saveip)
912
+ # only backtrace for :x with maxdepth 1 (ie handles only basic push+ret)
913
+ # returns a todo-style ary
914
+ # assumes @addrs_todo is empty
915
+ def disassemble_fast_block(block, &b)
916
+ block = InstructionBlock.new(normalize(block), get_section_at(block)[0]) if not block.kind_of? InstructionBlock
917
+ di_addr = block.address
918
+ delay_slot = nil
919
+ di = nil
920
+ ret = []
921
+
922
+ return ret if @decoded[di_addr]
923
+
924
+ @disassemble_maxblocklength.times {
925
+ break if @decoded[di_addr]
926
+
927
+ # decode instruction
928
+ block.edata.ptr = di_addr - block.address + block.edata_ptr
929
+ if not di = @cpu.decode_instruction(block.edata, di_addr)
930
+ return ret
931
+ end
932
+
933
+ @decoded[di_addr] = di
934
+ block.add_di di
935
+ puts di if $DEBUG
936
+
937
+ di = @callback_newinstr[di] if callback_newinstr
938
+ return ret if not di
939
+
940
+ di_addr = di.next_addr
941
+
942
+ if di.opcode.props[:stopexec] or di.opcode.props[:setip]
943
+ if di.opcode.props[:setip]
944
+ @addrs_todo = []
945
+ @program.get_xrefs_x(self, di).each { |expr|
946
+ backtrace(expr, di.address, :origin => di.address, :type => :x, :maxdepth => @backtrace_maxblocks_fast)
947
+ }
948
+ end
949
+ if di.opcode.props[:saveip]
950
+ @addrs_todo = []
951
+ ret.concat disassemble_fast_block_subfunc(di, &b)
952
+ else
953
+ ret.concat @addrs_todo
954
+ @addrs_todo = []
955
+ end
956
+ delay_slot ||= [di, @cpu.delay_slot(di)]
957
+ end
958
+
959
+ if delay_slot
960
+ if delay_slot[1] <= 0
961
+ return ret if delay_slot[0].opcode.props[:stopexec]
962
+ break
963
+ end
964
+ delay_slot[1] -= 1
965
+ end
966
+ }
967
+
968
+ di.block.add_to_normal(di_addr)
969
+ ret << [di_addr, di.address]
970
+ end
971
+
972
+ # handles when disassemble_fast encounters a call to a subfunction
973
+ def disassemble_fast_block_subfunc(di)
974
+ funcs = di.block.to_normal.to_a
975
+ do_ret = funcs.empty?
976
+ ret = []
977
+ na = di.next_addr + di.bin_length * @cpu.delay_slot(di)
978
+ funcs.each { |fa|
979
+ fa = normalize(fa)
980
+ disassemble_fast_checkfunc(fa)
981
+ yield fa, di if block_given?
982
+ if f = @function[fa] and bf = f.get_backtracked_for(self, fa, di.address) and not bf.empty?
983
+ # this includes retaddr unless f is noreturn
984
+ bf.each { |btt|
985
+ next if btt.type != :x
986
+ bt = backtrace(btt.expr, di.address, :include_start => true, :origin => btt.origin, :maxdepth => [@backtrace_maxblocks_fast, 1].max)
987
+ if btt.detached
988
+ ret.concat bt # callback argument
989
+ elsif bt.find { |a| normalize(a) == na }
990
+ do_ret = true
991
+ end
992
+ }
993
+ elsif not f or not f.noreturn
994
+ do_ret = true
995
+ end
996
+ }
997
+ if do_ret
998
+ di.block.add_to_subfuncret(na)
999
+ ret << [na, di.address, true]
1000
+ di.block.add_to_normal :default if not di.block.to_normal and @function[:default]
1001
+ end
1002
+ ret
1003
+ end
1004
+
1005
+ # trace whose xrefs this di is responsible of
1006
+ def backtrace_xrefs_di_rw(di)
1007
+ get_xrefs_rw(di).each { |type, ptr, len|
1008
+ backtrace(ptr, di.address, :origin => di.address, :type => type, :len => len).each { |xaddr|
1009
+ next if xaddr == Expression::Unknown
1010
+ if @check_smc and type == :w
1011
+ #len.times { |off| # check unaligned ?
1012
+ waddr = xaddr #+ off
1013
+ if wdi = di_at(waddr)
1014
+ puts "W: disasm: #{di} overwrites #{wdi}" if $VERBOSE
1015
+ wdi.add_comment "overwritten by #{di}"
1016
+ end
1017
+ #}
1018
+ end
1019
+ }
1020
+ }
1021
+ end
1022
+
1023
+ # trace xrefs for execution
1024
+ def backtrace_xrefs_di_x(di)
1025
+ ar = @program.get_xrefs_x(self, di)
1026
+ ar = @callback_newaddr[di.address, ar] || ar if callback_newaddr
1027
+ ar.each { |expr| backtrace(expr, di.address, :origin => di.address, :type => :x) }
1028
+ end
1029
+
1030
+ # checks if the function starting at funcaddr is an external function thunk (eg jmp [SomeExtFunc])
1031
+ # the argument must be the address of a decodedinstruction that is the first of a function,
1032
+ # which must not have return_addresses
1033
+ # returns the new thunk name if it was changed
1034
+ def detect_function_thunk(funcaddr)
1035
+ # check thunk linearity (no conditionnal branch etc)
1036
+ addr = funcaddr
1037
+ count = 0
1038
+ while b = block_at(addr)
1039
+ count += 1
1040
+ return if count > 5 or b.list.length > 4
1041
+ if b.to_subfuncret and not b.to_subfuncret.empty?
1042
+ return if b.to_subfuncret.length != 1
1043
+ addr = normalize(b.to_subfuncret.first)
1044
+ return if not b.to_normal or b.to_normal.length != 1
1045
+ # check that the subfunction is simple (eg get_eip)
1046
+ return if not sf = @function[normalize(b.to_normal.first)]
1047
+ return if not btb = sf.backtrace_binding
1048
+ btb = btb.dup
1049
+ btb.delete_if { |k, v| Expression[k] == Expression[v] }
1050
+ return if btb.length > 2 or btb.values.include? Expression::Unknown
1051
+ else
1052
+ return if not bt = b.to_normal
1053
+ if bt.include? :default
1054
+ addr = :default
1055
+ break
1056
+ elsif bt.length != 1
1057
+ return
1058
+ end
1059
+ addr = normalize(bt.first)
1060
+ end
1061
+ end
1062
+ fname = Expression[addr].reduce_rec
1063
+ if funcaddr != addr and f = @function[funcaddr]
1064
+ # forward get_backtrace_binding to target
1065
+ f.backtrace_binding = { :thunk => addr }
1066
+ f.noreturn = true if @function[addr] and @function[addr].noreturn
1067
+ end
1068
+ return if not fname.kind_of? ::String
1069
+ l = auto_label_at(funcaddr, 'sub', 'loc')
1070
+ return if l[0, 4] != 'sub_'
1071
+ puts "found thunk for #{fname} at #{Expression[funcaddr]}" if $DEBUG
1072
+ rename_label(l, @program.new_label("thunk_#{fname}"))
1073
+ end
1074
+
1075
+ # this is called when reaching a noreturn function call, with the call address
1076
+ # it is responsible for detecting the actual 'call' instruction leading to this
1077
+ # noreturn function, and eventually mark the call target as a thunk
1078
+ def detect_function_thunk_noreturn(addr)
1079
+ 5.times {
1080
+ return if not di = di_at(addr)
1081
+ if di.opcode.props[:saveip] and not di.block.to_subfuncret
1082
+ if di.block.to_normal.to_a.length == 1
1083
+ taddr = normalize(di.block.to_normal.first)
1084
+ if di_at(taddr)
1085
+ @function[taddr] ||= DecodedFunction.new
1086
+ return detect_function_thunk(taddr)
1087
+ end
1088
+ end
1089
+ break
1090
+ else
1091
+ from = di.block.from_normal.to_a + di.block.from_subfuncret.to_a
1092
+ if from.length == 1
1093
+ addr = from.first
1094
+ else break
1095
+ end
1096
+ end
1097
+ }
1098
+ end
1099
+
1100
+ # given an address, detect if it may be a noreturn fuction
1101
+ # it is if all its end blocks are calls to noreturn functions
1102
+ # if it is, create a @function[fa] with noreturn = true
1103
+ # should only be called with fa = target of a call
1104
+ def check_noreturn_function(fa)
1105
+ fb = function_blocks(fa, false, false)
1106
+ lasts = fb.keys.find_all { |k| fb[k] == [] }
1107
+ return if lasts.empty?
1108
+ if lasts.all? { |la|
1109
+ b = block_at(la)
1110
+ next if not di = b.list.last
1111
+ (di.opcode.props[:saveip] and b.to_normal.to_a.all? { |tfa|
1112
+ tf = function_at(tfa) and tf.noreturn
1113
+ }) or (di.opcode.props[:stopexec] and not di.opcode.props[:setip])
1114
+ }
1115
+ # yay
1116
+ @function[fa] ||= DecodedFunction.new
1117
+ @function[fa].noreturn = true
1118
+ end
1119
+ end
1120
+
1121
+
1122
+ # walks the backtrace tree from an address, passing along an object
1123
+ #
1124
+ # the steps are (1st = event, followed by hash keys)
1125
+ #
1126
+ # for each decoded instruction encountered:
1127
+ # :di :di
1128
+ #
1129
+ # when backtracking to a block through a decodedfunction:
1130
+ # (yield for each of the block's subfunctions)
1131
+ # (the decodedinstruction responsible for the call will be yield next)
1132
+ # :func :func, :funcaddr, :addr, :depth
1133
+ #
1134
+ # when jumping from one block to another (excluding :loop): # XXX include :loops ?
1135
+ # :up :from, :to, :sfret
1136
+ #
1137
+ # when the backtrack has nothing to backtrack to (eg program entrypoint):
1138
+ # :end :addr
1139
+ #
1140
+ # when the backtrack stops by taking too long to complete:
1141
+ # :maxdepth :addr
1142
+ #
1143
+ # when the backtrack stops for encountering the specified stop address:
1144
+ # :stopaddr :addr
1145
+ #
1146
+ # when rebacktracking a block already seen in the current branch:
1147
+ # (looptrace is an array of [obj, block end addr, from_subfuncret], from oldest to newest)
1148
+ # :loop :looptrace
1149
+ #
1150
+ # when the address does not match a known instruction/function:
1151
+ # :unknown_addr :addr
1152
+ #
1153
+ # the block return value is used as follow for :di, :func, :up and :loop:
1154
+ # false => the backtrace stops for the branch
1155
+ # nil => the backtrace continues with the current object
1156
+ # anything else => the backtrace continues with this object
1157
+ #
1158
+ # method arguments:
1159
+ # obj is the initial value of the object
1160
+ # addr is the address where the backtrace starts
1161
+ # include_start is a bool specifying if the backtrace should start at addr or just before
1162
+ # from_subfuncret is a bool specifying if addr points to a decodedinstruction that calls a subfunction
1163
+ # stopaddr is an [array of] address of instruction, the backtrace will stop just after executing it
1164
+ # maxdepth is the maximum depth (in blocks) for each backtrace branch.
1165
+ # (defaults to dasm.backtrace_maxblocks, which defaults do Dasm.backtrace_maxblocks)
1166
+ def backtrace_walk(obj, addr, include_start, from_subfuncret, stopaddr, maxdepth)
1167
+ start_addr = normalize(addr)
1168
+ stopaddr = [stopaddr] if stopaddr and not stopaddr.kind_of? ::Array
1169
+
1170
+ # array of [obj, addr, from_subfuncret, loopdetect]
1171
+ # loopdetect is an array of [obj, addr, from_type] of each end of block encountered
1172
+ todo = []
1173
+
1174
+ # array of [obj, blockaddr]
1175
+ # avoids rewalking the same value
1176
+ done = []
1177
+
1178
+ # updates todo with the addresses to backtrace next
1179
+ walk_up = lambda { |w_obj, w_addr, w_loopdetect|
1180
+ if w_loopdetect.length > maxdepth
1181
+ yield :maxdepth, w_obj, :addr => w_addr, :loopdetect => w_loopdetect
1182
+ elsif stopaddr and stopaddr.include?(w_addr)
1183
+ yield :stopaddr, w_obj, :addr => w_addr, :loopdetect => w_loopdetect
1184
+ elsif w_di = @decoded[w_addr] and w_di != w_di.block.list.first and w_di.address != w_di.block.address
1185
+ prevdi = w_di.block.list[w_di.block.list.index(w_di)-1]
1186
+ todo << [w_obj, prevdi.address, :normal, w_loopdetect]
1187
+ elsif w_di
1188
+ next if done.include? [w_obj, w_addr]
1189
+ done << [w_obj, w_addr]
1190
+ hadsomething = false
1191
+ w_di.block.each_from { |f_addr, f_type|
1192
+ next if f_type == :indirect
1193
+ hadsomething = true
1194
+ o_f_addr = f_addr
1195
+ f_addr = @decoded[f_addr].block.list.last.address if @decoded[f_addr].kind_of? DecodedInstruction # delay slot
1196
+ if l = w_loopdetect.find { |l_obj, l_addr, l_type| l_addr == f_addr and l_type == f_type }
1197
+ f_obj = yield(:loop, w_obj, :looptrace => w_loopdetect[w_loopdetect.index(l)..-1], :loopdetect => w_loopdetect)
1198
+ if f_obj and f_obj != w_obj # should avoid infinite loops
1199
+ f_loopdetect = w_loopdetect[0...w_loopdetect.index(l)]
1200
+ end
1201
+ else
1202
+ f_obj = yield(:up, w_obj, :from => w_addr, :to => f_addr, :sfret => f_type, :loopdetect => w_loopdetect, :real_to => o_f_addr)
1203
+ end
1204
+ next if f_obj == false
1205
+ f_obj ||= w_obj
1206
+ f_loopdetect ||= w_loopdetect
1207
+ # only count non-trivial paths in loopdetect (ignore linear links)
1208
+ add_detect = [[f_obj, f_addr, f_type]]
1209
+ add_detect = [] if @decoded[f_addr].kind_of? DecodedInstruction and tmp = @decoded[f_addr].block and
1210
+ ((w_di.block.from_subfuncret.to_a == [] and w_di.block.from_normal == [f_addr] and
1211
+ tmp.to_normal == [w_di.address] and tmp.to_subfuncret.to_a == []) or
1212
+ (w_di.block.from_subfuncret == [f_addr] and tmp.to_subfuncret == [w_di.address]))
1213
+ todo << [f_obj, f_addr, f_type, f_loopdetect + add_detect ]
1214
+ }
1215
+ yield :end, w_obj, :addr => w_addr, :loopdetect => w_loopdetect if not hadsomething
1216
+ elsif @function[w_addr] and w_addr != :default and w_addr != Expression::Unknown
1217
+ next if done.include? [w_obj, w_addr]
1218
+ oldlen = todo.length
1219
+ each_xref(w_addr, :x) { |x|
1220
+ f_addr = x.origin
1221
+ o_f_addr = f_addr
1222
+ f_addr = @decoded[f_addr].block.list.last.address if @decoded[f_addr].kind_of? DecodedInstruction # delay slot
1223
+ if l = w_loopdetect.find { |l_obj, l_addr, l_type| l_addr == w_addr }
1224
+ f_obj = yield(:loop, w_obj, :looptrace => w_loopdetect[w_loopdetect.index(l)..-1], :loopdetect => w_loopdetect)
1225
+ if f_obj and f_obj != w_obj
1226
+ f_loopdetect = w_loopdetect[0...w_loopdetect.index(l)]
1227
+ end
1228
+ else
1229
+ f_obj = yield(:up, w_obj, :from => w_addr, :to => f_addr, :sfret => :normal, :loopdetect => w_loopdetect, :real_to => o_f_addr)
1230
+ end
1231
+ next if f_obj == false
1232
+ f_obj ||= w_obj
1233
+ f_loopdetect ||= w_loopdetect
1234
+ todo << [f_obj, f_addr, :normal, f_loopdetect + [[f_obj, f_addr, :normal]] ]
1235
+ }
1236
+ yield :end, w_obj, :addr => w_addr, :loopdetect => w_loopdetect if todo.length == oldlen
1237
+ else
1238
+ yield :unknown_addr, w_obj, :addr => w_addr, :loopdetect => w_loopdetect
1239
+ end
1240
+ }
1241
+
1242
+ if include_start
1243
+ todo << [obj, start_addr, from_subfuncret ? :subfuncret : :normal, []]
1244
+ else
1245
+ walk_up[obj, start_addr, []]
1246
+ end
1247
+
1248
+ while not todo.empty?
1249
+ obj, addr, type, loopdetect = todo.pop
1250
+ di = @decoded[addr]
1251
+ if di and type == :subfuncret
1252
+ di.block.each_to_normal { |sf|
1253
+ next if not f = @function[normalize(sf)]
1254
+ s_obj = yield(:func, obj, :func => f, :funcaddr => sf, :addr => addr, :loopdetect => loopdetect)
1255
+ next if s_obj == false
1256
+ s_obj ||= obj
1257
+ if l = loopdetect.find { |l_obj, l_addr, l_type| addr == l_addr and l_type == :normal }
1258
+ l_obj = yield(:loop, s_obj, :looptrace => loopdetect[loopdetect.index(l)..-1], :loopdetect => loopdetect)
1259
+ if l_obj and l_obj != s_obj
1260
+ s_loopdetect = loopdetect[0...loopdetect.index(l)]
1261
+ end
1262
+ next if l_obj == false
1263
+ s_obj = l_obj if l_obj
1264
+ end
1265
+ s_loopdetect ||= loopdetect
1266
+ todo << [s_obj, addr, :normal, s_loopdetect + [[s_obj, addr, :normal]] ]
1267
+ }
1268
+ elsif di
1269
+ # XXX should interpolate index if di is not in block.list, but what if the addresses are not Comparable ?
1270
+ di.block.list[0..(di.block.list.index(di) || -1)].reverse_each { |di_|
1271
+ di = di_ # XXX not sure..
1272
+ if stopaddr and ea = di.next_addr and stopaddr.include?(ea)
1273
+ yield :stopaddr, obj, :addr => ea, :loopdetect => loopdetect
1274
+ break
1275
+ end
1276
+ ex_obj = obj
1277
+ obj = yield(:di, obj, :di => di, :loopdetect => loopdetect)
1278
+ break if obj == false
1279
+ obj ||= ex_obj
1280
+ }
1281
+ walk_up[obj, di.block.address, loopdetect] if obj
1282
+ elsif @function[addr] and addr != :default and addr != Expression::Unknown
1283
+ ex_obj = obj
1284
+ obj = yield(:func, obj, :func => @function[addr], :funcaddr => addr, :addr => addr, :loopdetect => loopdetect)
1285
+ next if obj == false
1286
+ obj ||= ex_obj
1287
+ walk_up[obj, addr, loopdetect]
1288
+ else
1289
+ yield :unknown_addr, obj, :addr => addr, :loopdetect => loopdetect
1290
+ end
1291
+ end
1292
+ end
1293
+
1294
+ # holds a backtrace result until a snapshot_addr is encountered
1295
+ class StoppedExpr
1296
+ attr_accessor :exprs
1297
+ def initialize(e) @exprs = e end
1298
+ end
1299
+
1300
+
1301
+ attr_accessor :debug_backtrace
1302
+
1303
+ # backtraces the value of an expression from start_addr
1304
+ # updates blocks backtracked_for if type is set
1305
+ # uses backtrace_walk
1306
+ # all values returned are from backtrace_check_found (which may generate xrefs, labels, addrs to dasm) unless :no_check is specified
1307
+ # options:
1308
+ # :include_start => start backtracking including start_addr
1309
+ # :from_subfuncret =>
1310
+ # :origin => origin to set for xrefs when resolution is successful
1311
+ # :orig_expr => initial expression
1312
+ # :type => xref type (:r, :w, :x, :addr) when :x, the results are added to #addrs_todo
1313
+ # :len => xref len (for :r/:w)
1314
+ # :snapshot_addr => addr (or array of) where the backtracker should stop
1315
+ # if a snapshot_addr is given, values found are ignored if continuing the backtrace does not get to it (eg maxdepth/unk_addr/end)
1316
+ # :maxdepth => maximum number of blocks to backtrace
1317
+ # :detached => true if backtracking type :x and the result should not have from = origin set in @addrs_todo
1318
+ # :max_complexity{_data} => maximum complexity of the expression before aborting its backtrace
1319
+ # :log => Array, will be updated with the backtrace evolution
1320
+ # :only_upto => backtrace only to update bt_for for current block & previous ending at only_upto
1321
+ # :no_check => don't use backtrace_check_found (will not backtrace indirection static values)
1322
+ # :terminals => array of symbols with constant value (stop backtracking if all symbols in the expr are terminals) (only supported with no_check)
1323
+ def backtrace(expr, start_addr, nargs={})
1324
+ include_start = nargs.delete :include_start
1325
+ from_subfuncret = nargs.delete :from_subfuncret
1326
+ origin = nargs.delete :origin
1327
+ origexpr = nargs.delete :orig_expr
1328
+ type = nargs.delete :type
1329
+ len = nargs.delete :len
1330
+ snapshot_addr = nargs.delete(:snapshot_addr) || nargs.delete(:stopaddr)
1331
+ maxdepth = nargs.delete(:maxdepth) || @backtrace_maxblocks
1332
+ detached = nargs.delete :detached
1333
+ max_complexity = nargs.delete(:max_complexity) || @backtrace_maxcomplexity
1334
+ max_complexity_data = nargs.delete(:max_complexity) || @backtrace_maxcomplexity_data
1335
+ bt_log = nargs.delete :log # array to receive the ongoing backtrace info
1336
+ only_upto = nargs.delete :only_upto
1337
+ no_check = nargs.delete :no_check
1338
+ terminals = nargs.delete(:terminals) || []
1339
+ raise ArgumentError, "invalid argument to backtrace #{nargs.keys.inspect}" if not nargs.empty?
1340
+
1341
+ expr = Expression[expr]
1342
+
1343
+ origexpr = expr if origin == start_addr
1344
+
1345
+ start_addr = normalize(start_addr)
1346
+ di = @decoded[start_addr]
1347
+
1348
+ if not snapshot_addr and @cpu.backtrace_is_stack_address(expr)
1349
+ puts " not backtracking stack address #{expr}" if debug_backtrace
1350
+ return []
1351
+ end
1352
+
1353
+ if type == :r or type == :w
1354
+ max_complexity = max_complexity_data
1355
+ maxdepth = @backtrace_maxblocks_data if backtrace_maxblocks_data and maxdepth > @backtrace_maxblocks_data
1356
+ end
1357
+
1358
+ if vals = (no_check ? (!need_backtrace(expr, terminals) and [expr]) : backtrace_check_found(expr,
1359
+ di, origin, type, len, maxdepth, detached))
1360
+ # no need to update backtracked_for
1361
+ return vals
1362
+ elsif maxdepth <= 0
1363
+ return [Expression::Unknown]
1364
+ end
1365
+
1366
+ # create initial backtracked_for
1367
+ if type and origin == start_addr and di
1368
+ btt = BacktraceTrace.new(expr, origin, origexpr, type, len, maxdepth-1)
1369
+ btt.address = di.address
1370
+ btt.exclude_instr = true if not include_start
1371
+ btt.from_subfuncret = true if from_subfuncret and include_start
1372
+ btt.detached = true if detached
1373
+ di.block.backtracked_for |= [btt]
1374
+ end
1375
+
1376
+ @callback_prebacktrace[] if callback_prebacktrace
1377
+
1378
+ # list of Expression/Integer
1379
+ result = []
1380
+
1381
+ puts "backtracking #{type} #{expr} from #{di || Expression[start_addr || 0]} for #{@decoded[origin]}" if debug_backtrace or $DEBUG
1382
+ bt_log << [:start, expr, start_addr] if bt_log
1383
+ backtrace_walk(expr, start_addr, include_start, from_subfuncret, snapshot_addr, maxdepth) { |ev, expr_, h|
1384
+ expr = expr_
1385
+ case ev
1386
+ when :unknown_addr, :maxdepth
1387
+ puts " backtrace end #{ev} #{expr}" if debug_backtrace
1388
+ result |= [expr] if not snapshot_addr
1389
+ @addrs_todo << [expr, (detached ? nil : origin)] if not snapshot_addr and type == :x and origin
1390
+ when :end
1391
+ if not expr.kind_of? StoppedExpr
1392
+ oldexpr = expr
1393
+ expr = backtrace_emu_blockup(h[:addr], expr)
1394
+ puts " backtrace up #{Expression[h[:addr]]} #{oldexpr}#{" => #{expr}" if expr != oldexpr}" if debug_backtrace
1395
+ bt_log << [:up, expr, oldexpr, h[:addr], :end] if bt_log and expr != oldexpr
1396
+ if expr != oldexpr and not snapshot_addr and vals = (no_check ?
1397
+ (!need_backtrace(expr, terminals) and [expr]) :
1398
+ backtrace_check_found(expr, nil, origin, type, len,
1399
+ maxdepth-h[:loopdetect].length, detached))
1400
+ result |= vals
1401
+ next
1402
+ end
1403
+ end
1404
+ puts " backtrace end #{ev} #{expr}" if debug_backtrace
1405
+ if not snapshot_addr
1406
+ result |= [expr]
1407
+
1408
+ btt = BacktraceTrace.new(expr, origin, origexpr, type, len, maxdepth-h[:loopdetect].length-1)
1409
+ btt.detached = true if detached
1410
+ @decoded[h[:addr]].block.backtracked_for |= [btt] if @decoded[h[:addr]]
1411
+ @function[h[:addr]].backtracked_for |= [btt] if @function[h[:addr]] and h[:addr] != :default
1412
+ @addrs_todo << [expr, (detached ? nil : origin)] if type == :x and origin
1413
+ end
1414
+ when :stopaddr
1415
+ if not expr.kind_of? StoppedExpr
1416
+ oldexpr = expr
1417
+ expr = backtrace_emu_blockup(h[:addr], expr)
1418
+ puts " backtrace up #{Expression[h[:addr]]} #{oldexpr}#{" => #{expr}" if expr != oldexpr}" if debug_backtrace
1419
+ bt_log << [:up, expr, oldexpr, h[:addr], :end] if bt_log and expr != oldexpr
1420
+ end
1421
+ puts " backtrace end #{ev} #{expr}" if debug_backtrace
1422
+ result |= ((expr.kind_of?(StoppedExpr)) ? expr.exprs : [expr])
1423
+ when :loop
1424
+ next false if expr.kind_of? StoppedExpr
1425
+ t = h[:looptrace]
1426
+ oldexpr = t[0][0]
1427
+ next false if expr == oldexpr # unmodifying loop
1428
+ puts " bt loop at #{Expression[t[0][1]]}: #{oldexpr} => #{expr} (#{t.map { |z| Expression[z[1]] }.join(' <- ')})" if debug_backtrace
1429
+ false
1430
+ when :up
1431
+ next false if only_upto and h[:to] != only_upto
1432
+ next expr if expr.kind_of? StoppedExpr
1433
+ oldexpr = expr
1434
+ expr = backtrace_emu_blockup(h[:from], expr)
1435
+ puts " backtrace up #{Expression[h[:from]]}->#{Expression[h[:to]]} #{oldexpr}#{" => #{expr}" if expr != oldexpr}" if debug_backtrace
1436
+ bt_log << [:up, expr, oldexpr, h[:from], h[:to]] if bt_log
1437
+
1438
+ if expr != oldexpr and vals = (no_check ? (!need_backtrace(expr, terminals) and [expr]) :
1439
+ backtrace_check_found(expr, @decoded[h[:from]], origin, type, len,
1440
+ maxdepth-h[:loopdetect].length, detached))
1441
+ if snapshot_addr
1442
+ expr = StoppedExpr.new vals
1443
+ next expr
1444
+ else
1445
+ result |= vals
1446
+ bt_log << [:found, vals, h[:from]] if bt_log
1447
+ next false
1448
+ end
1449
+ end
1450
+
1451
+ if origin and type
1452
+ # update backtracked_for
1453
+ update_btf = lambda { |btf, new_btt|
1454
+ # returns true if btf was modified
1455
+ if i = btf.index(new_btt)
1456
+ btf[i] = new_btt if btf[i].maxdepth < new_btt.maxdepth
1457
+ else
1458
+ btf << new_btt
1459
+ end
1460
+ }
1461
+
1462
+ btt = BacktraceTrace.new(expr, origin, origexpr, type, len, maxdepth-h[:loopdetect].length-1)
1463
+ btt.detached = true if detached
1464
+ if x = di_at(h[:from])
1465
+ update_btf[x.block.backtracked_for, btt]
1466
+ end
1467
+ if x = @function[h[:from]] and h[:from] != :default
1468
+ update_btf[x.backtracked_for, btt]
1469
+ end
1470
+ if x = di_at(h[:to])
1471
+ btt = btt.dup
1472
+ btt.address = x.address
1473
+ btt.from_subfuncret = true if h[:sfret] == :subfuncret
1474
+ if backtrace_check_funcret(btt, h[:from], h[:real_to] || h[:to])
1475
+ puts " function returns to caller" if debug_backtrace
1476
+ next false
1477
+ end
1478
+ if not update_btf[x.block.backtracked_for, btt]
1479
+ puts " already backtraced" if debug_backtrace
1480
+ next false
1481
+ end
1482
+ end
1483
+ end
1484
+ expr
1485
+ when :di, :func
1486
+ next if expr.kind_of? StoppedExpr
1487
+ if not snapshot_addr and @cpu.backtrace_is_stack_address(expr)
1488
+ puts " not backtracking stack address #{expr}" if debug_backtrace
1489
+ next false
1490
+ end
1491
+
1492
+ oldexpr = expr
1493
+ case ev
1494
+ when :di
1495
+ h[:addr] = h[:di].address
1496
+ expr = backtrace_emu_instr(h[:di], expr)
1497
+ bt_log << [ev, expr, oldexpr, h[:di], h[:addr]] if bt_log and expr != oldexpr
1498
+ when :func
1499
+ expr = backtrace_emu_subfunc(h[:func], h[:funcaddr], h[:addr], expr, origin, maxdepth-h[:loopdetect].length)
1500
+ if snapshot_addr and snapshot_addr == h[:funcaddr]
1501
+ # XXX recursiveness detection needs to be fixed
1502
+ puts " backtrace: recursive function #{Expression[h[:funcaddr]]}" if debug_backtrace
1503
+ next false
1504
+ end
1505
+ bt_log << [ev, expr, oldexpr, h[:funcaddr], h[:addr]] if bt_log and expr != oldexpr
1506
+ end
1507
+ puts " backtrace #{h[:di] || Expression[h[:funcaddr]]} #{oldexpr} => #{expr}" if debug_backtrace and expr != oldexpr
1508
+ if vals = (no_check ? (!need_backtrace(expr, terminals) and [expr]) : backtrace_check_found(expr,
1509
+ h[:di], origin, type, len, maxdepth-h[:loopdetect].length, detached))
1510
+ if snapshot_addr
1511
+ expr = StoppedExpr.new vals
1512
+ else
1513
+ result |= vals
1514
+ bt_log << [:found, vals, h[:addr]] if bt_log
1515
+ next false
1516
+ end
1517
+ elsif expr.complexity > max_complexity
1518
+ puts " backtrace aborting, expr too complex" if debug_backtrace
1519
+ next false
1520
+ end
1521
+ expr
1522
+ else raise ev.inspect
1523
+ end
1524
+ }
1525
+
1526
+ puts ' backtrace result: ' + result.map { |r| Expression[r] }.join(', ') if debug_backtrace
1527
+
1528
+ result
1529
+ end
1530
+
1531
+ # checks if the BacktraceTrace is a call to a known subfunction
1532
+ # returns true and updates self.addrs_todo
1533
+ def backtrace_check_funcret(btt, funcaddr, instraddr)
1534
+ if di = @decoded[instraddr] and @function[funcaddr] and btt.type == :x and
1535
+ not btt.from_subfuncret and
1536
+ @cpu.backtrace_is_function_return(btt.expr, @decoded[btt.origin]) and
1537
+ retaddr = backtrace_emu_instr(di, btt.expr) and
1538
+ not need_backtrace(retaddr)
1539
+ puts " backtrace addrs_todo << #{Expression[retaddr]} from #{di} (funcret)" if debug_backtrace
1540
+ di.block.add_to_subfuncret normalize(retaddr)
1541
+ if @decoded[funcaddr].kind_of? DecodedInstruction
1542
+ # check that all callers :saveip returns (eg recursive call that was resolved
1543
+ # before we found funcaddr was a function)
1544
+ @decoded[funcaddr].block.each_from_normal { |fm|
1545
+ if fdi = di_at(fm) and fdi.opcode.props[:saveip] and not fdi.block.to_subfuncret
1546
+ backtrace_check_funcret(btt, funcaddr, fm)
1547
+ end
1548
+ }
1549
+ end
1550
+ if not @function[funcaddr].finalized
1551
+ # the function is not fully disassembled: arrange for the retaddr to be
1552
+ # disassembled only after the subfunction is finished
1553
+ # for that we walk the code from the call, mark each block start, and insert the sfret
1554
+ # just before the 1st function block address in @addrs_todo (which is pop()ed by dasm_step)
1555
+ faddrlist = []
1556
+ todo = []
1557
+ di.block.each_to_normal { |t| todo << normalize(t) }
1558
+ while a = todo.pop
1559
+ next if faddrlist.include? a or not get_section_at(a)
1560
+ faddrlist << a
1561
+ if @decoded[a].kind_of? DecodedInstruction
1562
+ @decoded[a].block.each_to_samefunc(self) { |t| todo << normalize(t) }
1563
+ end
1564
+ end
1565
+
1566
+ idx = @addrs_todo.index(@addrs_todo.find { |r, i, sfr| faddrlist.include? normalize(r) }) || -1
1567
+ @addrs_todo.insert(idx, [retaddr, instraddr, true])
1568
+ else
1569
+ @addrs_todo << [retaddr, instraddr, true]
1570
+ end
1571
+ true
1572
+ end
1573
+ end
1574
+
1575
+ # applies one decodedinstruction to an expression
1576
+ def backtrace_emu_instr(di, expr)
1577
+ @cpu.backtrace_emu(di, expr)
1578
+ end
1579
+
1580
+ # applies one subfunction to an expression
1581
+ def backtrace_emu_subfunc(func, funcaddr, calladdr, expr, origin, maxdepth)
1582
+ bind = func.get_backtrace_binding(self, funcaddr, calladdr, expr, origin, maxdepth)
1583
+ Expression[expr.bind(bind).reduce]
1584
+ end
1585
+
1586
+ # applies a location binding
1587
+ def backtrace_emu_blockup(addr, expr)
1588
+ (ab = @address_binding[addr]) ? Expression[expr.bind(ab).reduce] : expr
1589
+ end
1590
+
1591
+ # static resolution of indirections
1592
+ def resolve(expr)
1593
+ binding = Expression[expr].expr_indirections.inject(@old_prog_binding) { |binding_, ind|
1594
+ e, b = get_section_at(resolve(ind.target))
1595
+ return expr if not e
1596
+ binding_.merge ind => Expression[ e.decode_imm("u#{8*ind.len}".to_sym, @cpu.endianness) ]
1597
+ }
1598
+ Expression[expr].bind(binding).reduce
1599
+ end
1600
+
1601
+ # returns true if the expression needs more backtrace
1602
+ # it checks for the presence of a symbol (not :unknown), which means it depends on some register value
1603
+ def need_backtrace(expr, terminals=[])
1604
+ return if expr.kind_of? ::Integer
1605
+ !(expr.externals.grep(::Symbol) - [:unknown] - terminals).empty?
1606
+ end
1607
+
1608
+ # returns an array of expressions, or nil if expr needs more backtrace
1609
+ # it needs more backtrace if expr.externals include a Symbol != :unknown (symbol == register value)
1610
+ # if it need no more backtrace, expr's indirections are recursively resolved
1611
+ # xrefs are created, and di args are updated (immediate => label)
1612
+ # if type is :x, addrs_todo is updated, and if di starts a block, expr is checked to see if it may be a subfunction return value
1613
+ #
1614
+ # expr indirection are solved by first finding the value of the pointer, and then rebacktracking for write-type access
1615
+ # detached is true if type is :x and from should not be set in addrs_todo (indirect call flow, eg external function callback)
1616
+ # if the backtrace ends pre entrypoint, returns the value encoded in the raw binary
1617
+ # XXX global variable (modified by another function), exported data, multithreaded app..
1618
+ # TODO handle memory aliasing (mov ebx, eax ; write [ebx] ; read [eax])
1619
+ # TODO trace expr evolution through backtrace, to modify immediates to an expr involving label names
1620
+ # TODO mov [ptr], imm ; <...> ; jmp [ptr] => rename imm as loc_XX
1621
+ # eg. mov eax, 42 ; add eax, 4 ; jmp eax => mov eax, some_label-4
1622
+ def backtrace_check_found(expr, di, origin, type, len, maxdepth, detached)
1623
+ # only entrypoints or block starts called by a :saveip are checked for being a function
1624
+ # want to execute [esp] from a block start
1625
+ if type == :x and di and di == di.block.list.first and @cpu.backtrace_is_function_return(expr, @decoded[origin]) and (
1626
+ # which is an entrypoint..
1627
+ (not di.block.from_normal and not di.block.from_subfuncret) or
1628
+ # ..or called from a saveip
1629
+ (bool = false ; di.block.each_from_normal { |fn| bool = true if @decoded[fn] and @decoded[fn].opcode.props[:saveip] } ; bool))
1630
+
1631
+ # now we can mark the current address a function start
1632
+ # the actual return address will be found later (we tell the caller to continue the backtrace)
1633
+ addr = di.address
1634
+ l = auto_label_at(addr, 'sub', 'loc', 'xref')
1635
+ if not f = @function[addr]
1636
+ f = @function[addr] = DecodedFunction.new
1637
+ puts "found new function #{l} at #{Expression[addr]}" if $VERBOSE
1638
+ end
1639
+ f.finalized = false
1640
+
1641
+ if @decoded[origin]
1642
+ f.return_address ||= []
1643
+ f.return_address |= [origin]
1644
+ @decoded[origin].add_comment "endsub #{l}"
1645
+ # TODO add_xref (to update the comment on rename_label)
1646
+ end
1647
+
1648
+ f.backtracked_for |= @decoded[addr].block.backtracked_for.find_all { |btt| not btt.address }
1649
+ end
1650
+
1651
+ return if need_backtrace(expr)
1652
+
1653
+ puts "backtrace #{type} found #{expr} from #{di} orig #{@decoded[origin] || Expression[origin] if origin}" if debug_backtrace
1654
+ result = backtrace_value(expr, maxdepth)
1655
+ # keep the ori pointer in the results to emulate volatile memory (eg decompiler prefers this)
1656
+ result << expr if not type
1657
+ result.uniq!
1658
+
1659
+ # create xrefs/labels
1660
+ result.each { |e|
1661
+ backtrace_found_result(e, di, type, origin, len, detached)
1662
+ } if type and origin
1663
+
1664
+ result
1665
+ end
1666
+
1667
+ # returns an array of expressions with Indirections resolved (recursive with backtrace_indirection)
1668
+ def backtrace_value(expr, maxdepth)
1669
+ # array of expression with all indirections resolved
1670
+ result = [Expression[expr.reduce]]
1671
+
1672
+ # solve each indirection sequentially, clone expr for each value (aka cross-product)
1673
+ result.first.expr_indirections.uniq.each { |i|
1674
+ next_result = []
1675
+ backtrace_indirection(i, maxdepth).each { |rr|
1676
+ next_result |= result.map { |e| Expression[e.bind(i => rr).reduce] }
1677
+ }
1678
+ result = next_result
1679
+ }
1680
+
1681
+ result.uniq
1682
+ end
1683
+
1684
+ # returns the array of values pointed by the indirection at its invocation (ind.origin)
1685
+ # first resolves the pointer using backtrace_value, if it does not point in edata keep the original pointer
1686
+ # then backtraces from ind.origin until it finds an :w xref origin
1687
+ # if no :w access is found, returns the value encoded in the raw section data
1688
+ # TODO handle unaligned (partial?) writes
1689
+ def backtrace_indirection(ind, maxdepth)
1690
+ if not ind.origin
1691
+ puts "backtrace_ind: no origin for #{ind}" if $VERBOSE
1692
+ return [ind]
1693
+ end
1694
+
1695
+ ret = []
1696
+
1697
+ decode_imm = lambda { |addr, len|
1698
+ edata, foo = get_section_at(addr)
1699
+ if edata
1700
+ Expression[ edata.decode_imm("u#{8*len}".to_sym, @cpu.endianness) ]
1701
+ else
1702
+ Expression::Unknown
1703
+ end
1704
+ }
1705
+
1706
+ # resolve pointers (they may include Indirections)
1707
+ backtrace_value(ind.target, maxdepth).each { |ptr|
1708
+ # find write xrefs to the ptr
1709
+ refs = []
1710
+ each_xref(ptr, :w) { |x|
1711
+ # XXX should be rebacktracked on new xref
1712
+ next if not @decoded[x.origin]
1713
+ refs |= [x.origin]
1714
+ } if ptr != Expression::Unknown
1715
+
1716
+ if refs.empty?
1717
+ if get_section_at(ptr)
1718
+ # static data, newer written : return encoded value
1719
+ ret |= [decode_imm[ptr, ind.len]]
1720
+ next
1721
+ else
1722
+ # unknown pointer : backtrace the indirection, hope it solves itself
1723
+ initval = ind
1724
+ end
1725
+ else
1726
+ # wait until we find a write xref, then backtrace the written value
1727
+ initval = true
1728
+ end
1729
+
1730
+ # wait until we arrive at an xref'ing instruction, then backtrace the written value
1731
+ backtrace_walk(initval, ind.origin, true, false, nil, maxdepth-1) { |ev, expr, h|
1732
+ case ev
1733
+ when :unknown_addr, :maxdepth, :stopaddr
1734
+ puts " backtrace_indirection for #{ind.target} failed: #{ev}" if debug_backtrace
1735
+ ret |= [Expression::Unknown]
1736
+ when :end
1737
+ if not refs.empty? and (expr == true or not need_backtrace(expr))
1738
+ if expr == true
1739
+ # found a path avoiding the :w xrefs, read the encoded initial value
1740
+ ret |= [decode_imm[ptr, ind.len]]
1741
+ else
1742
+ bd = expr.expr_indirections.inject({}) { |h_, i| h_.update i => decode_imm[i.target, i.len] }
1743
+ ret |= [Expression[expr.bind(bd).reduce]]
1744
+ end
1745
+ else
1746
+ # unknown pointer, backtrace did not resolve...
1747
+ ret |= [Expression::Unknown]
1748
+ end
1749
+ when :di
1750
+ di = h[:di]
1751
+ if expr == true
1752
+ next true if not refs.include? di.address
1753
+ # find the expression to backtrace: assume this is the :w xref from this di
1754
+ writes = get_xrefs_rw(di)
1755
+ writes = writes.find_all { |x_type, x_ptr, x_len| x_type == :w and x_len == ind.len }
1756
+ if writes.length != 1
1757
+ puts "backtrace_ind: incompatible xrefs to #{ptr} from #{di}" if $DEBUG
1758
+ ret |= [Expression::Unknown]
1759
+ next false
1760
+ end
1761
+ expr = Indirection.new(writes[0][1], ind.len, di.address)
1762
+ end
1763
+ expr = backtrace_emu_instr(di, expr)
1764
+ # may have new indirections... recall bt_value ?
1765
+ #if not need_backtrace(expr)
1766
+ if expr.expr_externals.all? { |e| @prog_binding[e] or @function[normalize(e)] } and expr.expr_indirections.empty?
1767
+ ret |= backtrace_value(expr, maxdepth-1-h[:loopdetect].length)
1768
+ false
1769
+ else
1770
+ expr
1771
+ end
1772
+ when :func
1773
+ next true if expr == true # XXX
1774
+ expr = backtrace_emu_subfunc(h[:func], h[:funcaddr], h[:addr], expr, ind.origin, maxdepth-h[:loopdetect].length)
1775
+ #if not need_backtrace(expr)
1776
+ if expr.expr_externals.all? { |e| @prog_binding[e] or @function[normalize(e)] } and expr.expr_indirections.empty?
1777
+ ret |= backtrace_value(expr, maxdepth-1-h[:loopdetect].length)
1778
+ false
1779
+ else
1780
+ expr
1781
+ end
1782
+ end
1783
+ }
1784
+ }
1785
+
1786
+ ret
1787
+ end
1788
+
1789
+ # creates xrefs, updates addrs_todo, updates instr args
1790
+ def backtrace_found_result(expr, di, type, origin, len, detached)
1791
+ n = normalize(expr)
1792
+ fallthrough = true if type == :x and o = di_at(origin) and not o.opcode.props[:stopexec] and n == o.block.list.last.next_addr # delay_slot
1793
+ add_xref(n, Xref.new(type, origin, len)) if origin != :default and origin != Expression::Unknown and not fallthrough
1794
+ unk = true if n == Expression::Unknown
1795
+
1796
+ add_xref(n, Xref.new(:addr, di.address)) if di and di.address != origin and not unk
1797
+ base = { nil => 'loc', 1 => 'byte', 2 => 'word', 4 => 'dword', 8 => 'qword' }[len] || 'xref'
1798
+ base = 'sub' if @function[n]
1799
+ n = Expression[auto_label_at(n, base, 'xref') || n] if not fallthrough
1800
+ n = Expression[n]
1801
+
1802
+ # update instr args
1803
+ # TODO trace expression evolution to allow handling of
1804
+ # mov eax, 28 ; add eax, 4 ; jmp eax
1805
+ # => mov eax, (loc_xx-4)
1806
+ if di and not unk # and di.address == origin
1807
+ @cpu.replace_instr_arg_immediate(di.instruction, expr, n)
1808
+ end
1809
+ if @decoded[origin] and not unk
1810
+ @cpu.backtrace_found_result(self, @decoded[origin], expr, type, len)
1811
+ end
1812
+
1813
+ # add comment
1814
+ if type and @decoded[origin] # and not @decoded[origin].instruction.args.include? n
1815
+ @decoded[origin].add_comment "#{type}#{len}:#{n}" if not fallthrough
1816
+ end
1817
+
1818
+ # check if target is a string
1819
+ if di and type == :r and (len == 1 or len == 2) and s = get_section_at(n)
1820
+ l = s[0].inv_export[s[0].ptr]
1821
+ case len
1822
+ when 1; str = s[0].read(32).unpack('C*')
1823
+ when 2; str = s[0].read(64).unpack('v*')
1824
+ end
1825
+ str = str.inject('') { |str_, c|
1826
+ case c
1827
+ when 0x20..0x7e, ?\n, ?\r, ?\t; str_ << c
1828
+ else break str_
1829
+ end
1830
+ }
1831
+ if str.length >= 4
1832
+ di.add_comment "#{'L' if len == 2}#{str.inspect}"
1833
+ str = 'a_' + str.downcase.delete('^a-z0-9')[0, 12]
1834
+ if str.length >= 8 and l[0, 5] == 'byte_'
1835
+ rename_label(l, @program.new_label(str))
1836
+ end
1837
+ end
1838
+ end
1839
+
1840
+ # XXX all this should be done in backtrace() { <here> }
1841
+ if type == :x and origin
1842
+ if detached
1843
+ o = @decoded[origin] ? origin : di ? di.address : nil # lib function callback have origin == libfuncname, so we must find a block somewhere else
1844
+ origin = nil
1845
+ @decoded[o].block.add_to_indirect(normalize(n)) if @decoded[o] and not unk
1846
+ else
1847
+ @decoded[origin].block.add_to_normal(normalize(n)) if @decoded[origin] and not unk
1848
+ end
1849
+ @addrs_todo << [n, origin]
1850
+ end
1851
+ end
1852
+
1853
+ def to_s
1854
+ a = ''
1855
+ dump { |l| a << l << "\n" }
1856
+ a
1857
+ end
1858
+
1859
+ # dumps the source, optionnally including data
1860
+ # yields (defaults puts) each line
1861
+ def dump(dump_data=true, &b)
1862
+ b ||= lambda { |l| puts l }
1863
+ @sections.sort_by { |addr, edata| addr.kind_of?(::Integer) ? addr : 0 }.each { |addr, edata|
1864
+ addr = Expression[addr] if addr.kind_of? ::String
1865
+ blockoffs = @decoded.values.grep(DecodedInstruction).map { |di| Expression[di.block.address, :-, addr].reduce if di.block_head? }.grep(::Integer).sort.reject { |o| o < 0 or o >= edata.length }
1866
+ b[@program.dump_section_header(addr, edata)]
1867
+ if not dump_data and edata.length > 16*1024 and blockoffs.empty?
1868
+ b["// [#{edata.length} data bytes]"]
1869
+ next
1870
+ end
1871
+ unk_off = 0 # last off displayed
1872
+ # blocks.sort_by { |b| b.addr }.each { |b|
1873
+ while unk_off < edata.length
1874
+ if unk_off == blockoffs.first
1875
+ blockoffs.shift
1876
+ di = @decoded[addr+unk_off]
1877
+ if unk_off != di.block.edata_ptr
1878
+ b["\n// ------ overlap (#{unk_off-di.block.edata_ptr}) ------"]
1879
+ elsif di.block.from_normal.kind_of? ::Array
1880
+ b["\n"]
1881
+ end
1882
+ dump_block(di.block, &b)
1883
+ unk_off += [di.block.bin_length, 1].max
1884
+ unk_off = blockoffs.first if blockoffs.first and unk_off > blockoffs.first
1885
+ else
1886
+ next_off = blockoffs.first || edata.length
1887
+ if dump_data or next_off - unk_off < 16
1888
+ unk_off = dump_data(addr + unk_off, edata, unk_off, &b)
1889
+ else
1890
+ b["// [#{next_off - unk_off} data bytes]"]
1891
+ unk_off = next_off
1892
+ end
1893
+ end
1894
+ end
1895
+ }
1896
+ end
1897
+
1898
+ # dumps a block of decoded instructions
1899
+ def dump_block(block, &b)
1900
+ b ||= lambda { |l| puts l }
1901
+ block = @decoded[block].block if @decoded[block]
1902
+ dump_block_header(block, &b)
1903
+ block.list.each { |di| b[di.show] }
1904
+ end
1905
+
1906
+ # shows the xrefs/labels at block start
1907
+ def dump_block_header(block, &b)
1908
+ b ||= lambda { |l| puts l }
1909
+ xr = []
1910
+ each_xref(block.address) { |x|
1911
+ case x.type
1912
+ when :x; xr << Expression[x.origin]
1913
+ when :r, :w; xr << "#{x.type}#{x.len}:#{Expression[x.origin]}"
1914
+ end
1915
+ }
1916
+ if not xr.empty?
1917
+ b["\n// Xrefs: #{xr[0, 8].join(' ')}#{' ...' if xr.length > 8}"]
1918
+ end
1919
+ if block.edata.inv_export[block.edata_ptr]
1920
+ b["\n"] if xr.empty?
1921
+ label_alias[block.address].each { |name| b["#{name}:"] }
1922
+ end
1923
+ if c = @comment[block.address]
1924
+ c = c.join("\n") if c.kind_of? ::Array
1925
+ c.each_line { |l| b["// #{l}"] }
1926
+ end
1927
+ end
1928
+
1929
+ # dumps data/labels, honours @xrefs.len if exists
1930
+ # dumps one line only
1931
+ # stops on end of edata/@decoded/@xref
1932
+ # returns the next offset to display
1933
+ # TODO array-style data access
1934
+ def dump_data(addr, edata, off, &b)
1935
+ b ||= lambda { |l| puts l }
1936
+ if l = edata.inv_export[off]
1937
+ l_list = label_alias[addr].to_a.sort
1938
+ l = l_list.pop || l
1939
+ l_list.each { |ll|
1940
+ b["#{ll}:"]
1941
+ }
1942
+ l = (l + ' ').ljust(16)
1943
+ else l = ''
1944
+ end
1945
+ elemlen = 1 # size of each element we dump (db by default)
1946
+ dumplen = -off % 16 # number of octets to dump
1947
+ dumplen = 16 if dumplen == 0
1948
+ cmt = []
1949
+ each_xref(addr) { |x|
1950
+ dumplen = elemlen = x.len if x.len == 2 or x.len == 4
1951
+ cmt << " #{x.type}#{x.len}:#{Expression[x.origin]}"
1952
+ }
1953
+ cmt = " ; @#{Expression[addr]}" + cmt.sort[0, 6].join
1954
+ if r = edata.reloc[off]
1955
+ dumplen = elemlen = r.type.to_s[1..-1].to_i/8
1956
+ end
1957
+ dataspec = { 1 => 'db ', 2 => 'dw ', 4 => 'dd ', 8 => 'dq ' }[elemlen]
1958
+ if not dataspec
1959
+ dataspec = 'db '
1960
+ elemlen = 1
1961
+ end
1962
+ l << dataspec
1963
+
1964
+ # dup(?)
1965
+ if off >= edata.data.length
1966
+ dups = edata.virtsize - off
1967
+ @prog_binding.each_value { |a|
1968
+ tmp = Expression[a, :-, addr].reduce
1969
+ dups = tmp if tmp.kind_of? ::Integer and tmp > 0 and tmp < dups
1970
+ }
1971
+ @xrefs.each_key { |a|
1972
+ tmp = Expression[a, :-, addr].reduce
1973
+ dups = tmp if tmp.kind_of? ::Integer and tmp > 0 and tmp < dups
1974
+ }
1975
+ dups /= elemlen
1976
+ dups = 1 if dups < 1
1977
+ b[(l + "#{dups} dup(?)").ljust(48) << cmt]
1978
+ return off + dups*elemlen
1979
+ end
1980
+
1981
+ vals = []
1982
+ edata.ptr = off
1983
+ dups = dumplen/elemlen
1984
+ elemsym = "u#{elemlen*8}".to_sym
1985
+ while edata.ptr < edata.data.length
1986
+ if vals.length > dups and vals.last != vals.first
1987
+ # we have a dup(), unread the last element which is different
1988
+ vals.pop
1989
+ addr = Expression[addr, :-, elemlen].reduce
1990
+ edata.ptr -= elemlen
1991
+ break
1992
+ end
1993
+ break if vals.length == dups and vals.uniq.length > 1
1994
+ vals << edata.decode_imm(elemsym, @cpu.endianness)
1995
+ addr += elemlen
1996
+ if i = (1-elemlen..0).find { |i_|
1997
+ t = addr + i_
1998
+ @xrefs[t] or @decoded[t] or edata.reloc[edata.ptr+i_] or edata.inv_export[edata.ptr+i_]
1999
+ }
2000
+ # i < 0
2001
+ edata.ptr += i
2002
+ addr += i
2003
+ break
2004
+ end
2005
+ break if edata.reloc[edata.ptr-elemlen]
2006
+ end
2007
+
2008
+ # line of repeated value => dup()
2009
+ if vals.length > 8 and vals.uniq.length == 1
2010
+ b[(l << "#{vals.length} dup(#{Expression[vals.first]})").ljust(48) << cmt]
2011
+ return edata.ptr
2012
+ end
2013
+
2014
+ # recognize strings
2015
+ vals = vals.inject([]) { |vals_, value|
2016
+ if (elemlen == 1 or elemlen == 2)
2017
+ case value
2018
+ when 0x20..0x7e, 0x0a, 0x0d
2019
+ if vals_.last.kind_of? ::String; vals_.last << value ; vals_
2020
+ else vals_ << value.chr
2021
+ end
2022
+ else vals_ << value
2023
+ end
2024
+ else vals_ << value
2025
+ end
2026
+ }
2027
+
2028
+ vals.map! { |value|
2029
+ if value.kind_of? ::String
2030
+ if value.length > 2 # or value == vals.first or value == vals.last # if there is no xref, don't care
2031
+ value.inspect
2032
+ else
2033
+ value.unpack('C*').map { |c| Expression[c] }
2034
+ end
2035
+ else
2036
+ Expression[value]
2037
+ end
2038
+ }
2039
+ vals.flatten!
2040
+
2041
+ b[(l << vals.join(', ')).ljust(48) << cmt]
2042
+
2043
+ edata.ptr
2044
+ end
2045
+
2046
+ def decompiler
2047
+ parse_c '' if not c_parser
2048
+ @decompiler ||= Decompiler.new(self)
2049
+ end
2050
+ def decompiler=(dc)
2051
+ @decompiler = dc
2052
+ end
2053
+ def decompile(*addr)
2054
+ decompiler.decompile(*addr)
2055
+ end
2056
+ def decompile_func(addr)
2057
+ decompiler.decompile_func(addr)
2058
+ end
2059
+
2060
+ # allows us to be AutoExe.loaded
2061
+ def self.autoexe_load(f, &b)
2062
+ d = load(f, &b)
2063
+ d.program
2064
+ end
2065
+ end
2066
+ end
2067
+
2068
+ require 'metasm/disassemble_api'