metasm 1.0.0 → 1.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (276) hide show
  1. checksums.yaml +7 -0
  2. checksums.yaml.gz.sig +0 -0
  3. data.tar.gz.sig +3 -0
  4. data/.gitignore +3 -0
  5. data/.hgtags +3 -0
  6. data/Gemfile +3 -0
  7. data/INSTALL +61 -0
  8. data/LICENCE +458 -0
  9. data/README +29 -21
  10. data/Rakefile +10 -0
  11. data/TODO +10 -12
  12. data/doc/code_organisation.txt +3 -1
  13. data/doc/core/DynLdr.txt +247 -0
  14. data/doc/core/ExeFormat.txt +43 -0
  15. data/doc/core/Expression.txt +220 -0
  16. data/doc/core/GNUExports.txt +27 -0
  17. data/doc/core/Ia32.txt +236 -0
  18. data/doc/core/SerialStruct.txt +108 -0
  19. data/doc/core/VirtualString.txt +145 -0
  20. data/doc/core/WindowsExports.txt +61 -0
  21. data/doc/core/index.txt +1 -0
  22. data/doc/style.css +6 -3
  23. data/doc/usage/debugger.txt +327 -0
  24. data/doc/usage/index.txt +1 -0
  25. data/doc/use_cases.txt +2 -2
  26. data/metasm.gemspec +23 -0
  27. data/{lib/metasm.rb → metasm.rb} +15 -3
  28. data/{lib/metasm → metasm}/compile_c.rb +15 -9
  29. data/metasm/cpu/arc.rb +8 -0
  30. data/metasm/cpu/arc/decode.rb +404 -0
  31. data/metasm/cpu/arc/main.rb +191 -0
  32. data/metasm/cpu/arc/opcodes.rb +588 -0
  33. data/metasm/cpu/arm.rb +14 -0
  34. data/{lib/metasm → metasm/cpu}/arm/debug.rb +2 -2
  35. data/{lib/metasm → metasm/cpu}/arm/decode.rb +15 -18
  36. data/{lib/metasm → metasm/cpu}/arm/encode.rb +23 -8
  37. data/{lib/metasm → metasm/cpu}/arm/main.rb +3 -6
  38. data/metasm/cpu/arm/opcodes.rb +324 -0
  39. data/{lib/metasm → metasm/cpu}/arm/parse.rb +25 -13
  40. data/{lib/metasm → metasm/cpu}/arm/render.rb +2 -2
  41. data/metasm/cpu/arm64.rb +15 -0
  42. data/metasm/cpu/arm64/debug.rb +38 -0
  43. data/metasm/cpu/arm64/decode.rb +285 -0
  44. data/metasm/cpu/arm64/encode.rb +41 -0
  45. data/metasm/cpu/arm64/main.rb +105 -0
  46. data/metasm/cpu/arm64/opcodes.rb +232 -0
  47. data/metasm/cpu/arm64/parse.rb +20 -0
  48. data/metasm/cpu/arm64/render.rb +95 -0
  49. data/{lib/metasm/mips/compile_c.rb → metasm/cpu/bpf.rb} +4 -2
  50. data/metasm/cpu/bpf/decode.rb +110 -0
  51. data/metasm/cpu/bpf/main.rb +60 -0
  52. data/metasm/cpu/bpf/opcodes.rb +81 -0
  53. data/metasm/cpu/bpf/render.rb +30 -0
  54. data/{lib/metasm/ppc.rb → metasm/cpu/cy16.rb} +2 -4
  55. data/metasm/cpu/cy16/decode.rb +247 -0
  56. data/metasm/cpu/cy16/main.rb +63 -0
  57. data/metasm/cpu/cy16/opcodes.rb +78 -0
  58. data/metasm/cpu/cy16/render.rb +30 -0
  59. data/metasm/cpu/dalvik.rb +11 -0
  60. data/{lib/metasm → metasm/cpu}/dalvik/decode.rb +34 -34
  61. data/{lib/metasm → metasm/cpu}/dalvik/main.rb +71 -4
  62. data/{lib/metasm → metasm/cpu}/dalvik/opcodes.rb +21 -12
  63. data/{lib/metasm/mips.rb → metasm/cpu/ebpf.rb} +3 -4
  64. data/metasm/cpu/ebpf/debug.rb +61 -0
  65. data/metasm/cpu/ebpf/decode.rb +142 -0
  66. data/metasm/cpu/ebpf/main.rb +58 -0
  67. data/metasm/cpu/ebpf/opcodes.rb +97 -0
  68. data/metasm/cpu/ebpf/render.rb +36 -0
  69. data/metasm/cpu/ia32.rb +17 -0
  70. data/{lib/metasm → metasm/cpu}/ia32/compile_c.rb +23 -9
  71. data/{lib/metasm → metasm/cpu}/ia32/debug.rb +44 -6
  72. data/{lib/metasm → metasm/cpu}/ia32/decode.rb +342 -128
  73. data/{lib/metasm → metasm/cpu}/ia32/decompile.rb +75 -53
  74. data/{lib/metasm → metasm/cpu}/ia32/encode.rb +19 -13
  75. data/{lib/metasm → metasm/cpu}/ia32/main.rb +66 -8
  76. data/metasm/cpu/ia32/opcodes.rb +1424 -0
  77. data/{lib/metasm → metasm/cpu}/ia32/parse.rb +55 -17
  78. data/{lib/metasm → metasm/cpu}/ia32/render.rb +32 -5
  79. data/metasm/cpu/mcs51.rb +8 -0
  80. data/metasm/cpu/mcs51/decode.rb +99 -0
  81. data/metasm/cpu/mcs51/main.rb +87 -0
  82. data/metasm/cpu/mcs51/opcodes.rb +120 -0
  83. data/metasm/cpu/mips.rb +14 -0
  84. data/metasm/cpu/mips/debug.rb +42 -0
  85. data/{lib/metasm → metasm/cpu}/mips/decode.rb +59 -38
  86. data/{lib/metasm → metasm/cpu}/mips/encode.rb +4 -3
  87. data/{lib/metasm → metasm/cpu}/mips/main.rb +13 -6
  88. data/{lib/metasm → metasm/cpu}/mips/opcodes.rb +87 -18
  89. data/{lib/metasm → metasm/cpu}/mips/parse.rb +1 -1
  90. data/{lib/metasm → metasm/cpu}/mips/render.rb +1 -1
  91. data/{lib/metasm/dalvik.rb → metasm/cpu/msp430.rb} +1 -1
  92. data/metasm/cpu/msp430/decode.rb +243 -0
  93. data/metasm/cpu/msp430/main.rb +62 -0
  94. data/metasm/cpu/msp430/opcodes.rb +101 -0
  95. data/metasm/cpu/openrisc.rb +11 -0
  96. data/metasm/cpu/openrisc/debug.rb +106 -0
  97. data/metasm/cpu/openrisc/decode.rb +182 -0
  98. data/metasm/cpu/openrisc/decompile.rb +350 -0
  99. data/metasm/cpu/openrisc/main.rb +70 -0
  100. data/metasm/cpu/openrisc/opcodes.rb +109 -0
  101. data/metasm/cpu/openrisc/render.rb +37 -0
  102. data/{lib/metasm → metasm/cpu}/pic16c/decode.rb +6 -7
  103. data/{lib/metasm → metasm/cpu}/pic16c/main.rb +0 -0
  104. data/{lib/metasm → metasm/cpu}/pic16c/opcodes.rb +1 -1
  105. data/metasm/cpu/ppc.rb +11 -0
  106. data/{lib/metasm → metasm/cpu}/ppc/decode.rb +18 -37
  107. data/{lib/metasm → metasm/cpu}/ppc/decompile.rb +3 -3
  108. data/{lib/metasm → metasm/cpu}/ppc/encode.rb +2 -2
  109. data/{lib/metasm → metasm/cpu}/ppc/main.rb +23 -18
  110. data/{lib/metasm → metasm/cpu}/ppc/opcodes.rb +11 -6
  111. data/metasm/cpu/ppc/parse.rb +55 -0
  112. data/metasm/cpu/python.rb +8 -0
  113. data/metasm/cpu/python/decode.rb +116 -0
  114. data/metasm/cpu/python/main.rb +36 -0
  115. data/metasm/cpu/python/opcodes.rb +180 -0
  116. data/{lib/metasm → metasm/cpu}/sh4.rb +1 -1
  117. data/{lib/metasm → metasm/cpu}/sh4/decode.rb +50 -23
  118. data/{lib/metasm → metasm/cpu}/sh4/main.rb +38 -27
  119. data/{lib/metasm → metasm/cpu}/sh4/opcodes.rb +7 -8
  120. data/metasm/cpu/st20.rb +9 -0
  121. data/metasm/cpu/st20/decode.rb +173 -0
  122. data/metasm/cpu/st20/decompile.rb +283 -0
  123. data/metasm/cpu/st20/main.rb +37 -0
  124. data/metasm/cpu/st20/opcodes.rb +140 -0
  125. data/{lib/metasm/arm.rb → metasm/cpu/webasm.rb} +4 -5
  126. data/metasm/cpu/webasm/debug.rb +31 -0
  127. data/metasm/cpu/webasm/decode.rb +321 -0
  128. data/metasm/cpu/webasm/decompile.rb +386 -0
  129. data/metasm/cpu/webasm/encode.rb +104 -0
  130. data/metasm/cpu/webasm/main.rb +81 -0
  131. data/metasm/cpu/webasm/opcodes.rb +214 -0
  132. data/metasm/cpu/x86_64.rb +15 -0
  133. data/{lib/metasm → metasm/cpu}/x86_64/compile_c.rb +40 -25
  134. data/{lib/metasm → metasm/cpu}/x86_64/debug.rb +4 -4
  135. data/{lib/metasm → metasm/cpu}/x86_64/decode.rb +58 -15
  136. data/{lib/metasm → metasm/cpu}/x86_64/encode.rb +59 -28
  137. data/{lib/metasm → metasm/cpu}/x86_64/main.rb +18 -6
  138. data/metasm/cpu/x86_64/opcodes.rb +138 -0
  139. data/{lib/metasm → metasm/cpu}/x86_64/parse.rb +12 -4
  140. data/metasm/cpu/x86_64/render.rb +35 -0
  141. data/metasm/cpu/z80.rb +9 -0
  142. data/metasm/cpu/z80/decode.rb +286 -0
  143. data/metasm/cpu/z80/main.rb +67 -0
  144. data/metasm/cpu/z80/opcodes.rb +224 -0
  145. data/metasm/cpu/z80/render.rb +48 -0
  146. data/{lib/metasm/os/main.rb → metasm/debug.rb} +201 -407
  147. data/{lib/metasm → metasm}/decode.rb +104 -24
  148. data/{lib/metasm → metasm}/decompile.rb +804 -478
  149. data/{lib/metasm → metasm}/disassemble.rb +385 -170
  150. data/{lib/metasm → metasm}/disassemble_api.rb +684 -105
  151. data/{lib/metasm → metasm}/dynldr.rb +231 -138
  152. data/{lib/metasm → metasm}/encode.rb +20 -5
  153. data/{lib/metasm → metasm}/exe_format/a_out.rb +9 -6
  154. data/{lib/metasm → metasm}/exe_format/autoexe.rb +3 -0
  155. data/{lib/metasm → metasm}/exe_format/bflt.rb +57 -27
  156. data/{lib/metasm → metasm}/exe_format/coff.rb +35 -7
  157. data/{lib/metasm → metasm}/exe_format/coff_decode.rb +70 -23
  158. data/{lib/metasm → metasm}/exe_format/coff_encode.rb +24 -22
  159. data/{lib/metasm → metasm}/exe_format/dex.rb +26 -8
  160. data/{lib/metasm → metasm}/exe_format/dol.rb +1 -0
  161. data/{lib/metasm → metasm}/exe_format/elf.rb +108 -58
  162. data/{lib/metasm → metasm}/exe_format/elf_decode.rb +202 -36
  163. data/{lib/metasm → metasm}/exe_format/elf_encode.rb +126 -32
  164. data/metasm/exe_format/gb.rb +65 -0
  165. data/metasm/exe_format/javaclass.rb +424 -0
  166. data/{lib/metasm → metasm}/exe_format/macho.rb +218 -16
  167. data/{lib/metasm → metasm}/exe_format/main.rb +28 -3
  168. data/{lib/metasm → metasm}/exe_format/mz.rb +2 -0
  169. data/{lib/metasm → metasm}/exe_format/nds.rb +7 -4
  170. data/{lib/metasm → metasm}/exe_format/pe.rb +96 -11
  171. data/metasm/exe_format/pyc.rb +167 -0
  172. data/{lib/metasm → metasm}/exe_format/serialstruct.rb +67 -14
  173. data/{lib/metasm → metasm}/exe_format/shellcode.rb +7 -3
  174. data/metasm/exe_format/shellcode_rwx.rb +114 -0
  175. data/metasm/exe_format/swf.rb +205 -0
  176. data/metasm/exe_format/wasm.rb +402 -0
  177. data/{lib/metasm → metasm}/exe_format/xcoff.rb +7 -7
  178. data/metasm/exe_format/zip.rb +335 -0
  179. data/metasm/gui.rb +13 -0
  180. data/{lib/metasm → metasm}/gui/cstruct.rb +35 -41
  181. data/{lib/metasm → metasm}/gui/dasm_coverage.rb +11 -11
  182. data/{lib/metasm → metasm}/gui/dasm_decomp.rb +177 -114
  183. data/{lib/metasm → metasm}/gui/dasm_funcgraph.rb +0 -0
  184. data/metasm/gui/dasm_graph.rb +1754 -0
  185. data/{lib/metasm → metasm}/gui/dasm_hex.rb +16 -12
  186. data/{lib/metasm → metasm}/gui/dasm_listing.rb +43 -28
  187. data/{lib/metasm → metasm}/gui/dasm_main.rb +360 -77
  188. data/{lib/metasm → metasm}/gui/dasm_opcodes.rb +5 -19
  189. data/{lib/metasm → metasm}/gui/debug.rb +109 -34
  190. data/{lib/metasm → metasm}/gui/gtk.rb +174 -44
  191. data/{lib/metasm → metasm}/gui/qt.rb +14 -4
  192. data/{lib/metasm → metasm}/gui/win32.rb +180 -43
  193. data/{lib/metasm → metasm}/gui/x11.rb +59 -59
  194. data/{lib/metasm → metasm}/main.rb +421 -286
  195. data/metasm/os/emulator.rb +175 -0
  196. data/{lib/metasm/os/remote.rb → metasm/os/gdbremote.rb} +146 -54
  197. data/{lib/metasm → metasm}/os/gnu_exports.rb +1 -1
  198. data/{lib/metasm → metasm}/os/linux.rb +628 -151
  199. data/metasm/os/main.rb +335 -0
  200. data/{lib/metasm → metasm}/os/windows.rb +151 -58
  201. data/{lib/metasm → metasm}/os/windows_exports.rb +141 -0
  202. data/{lib/metasm → metasm}/parse.rb +49 -36
  203. data/{lib/metasm → metasm}/parse_c.rb +405 -246
  204. data/{lib/metasm → metasm}/preprocessor.rb +71 -41
  205. data/{lib/metasm → metasm}/render.rb +14 -38
  206. data/misc/hexdump.rb +4 -3
  207. data/misc/lint.rb +58 -0
  208. data/misc/objdiff.rb +4 -1
  209. data/misc/objscan.rb +1 -1
  210. data/misc/openrisc-parser.rb +79 -0
  211. data/misc/txt2html.rb +9 -7
  212. data/samples/bindiff.rb +3 -4
  213. data/samples/dasm-plugins/bindiff.rb +15 -0
  214. data/samples/dasm-plugins/bookmark.rb +133 -0
  215. data/samples/dasm-plugins/c_constants.rb +57 -0
  216. data/samples/dasm-plugins/colortheme_solarized.rb +125 -0
  217. data/samples/dasm-plugins/cppobj_funcall.rb +60 -0
  218. data/samples/dasm-plugins/dasm_all.rb +70 -0
  219. data/samples/dasm-plugins/demangle_cpp.rb +31 -0
  220. data/samples/dasm-plugins/deobfuscate.rb +251 -0
  221. data/samples/dasm-plugins/dump_text.rb +35 -0
  222. data/samples/dasm-plugins/export_graph_svg.rb +86 -0
  223. data/samples/dasm-plugins/findgadget.rb +75 -0
  224. data/samples/dasm-plugins/hl_opcode.rb +32 -0
  225. data/samples/dasm-plugins/hotfix_gtk_dbg.rb +19 -0
  226. data/samples/dasm-plugins/imm2off.rb +34 -0
  227. data/samples/dasm-plugins/match_libsigs.rb +93 -0
  228. data/samples/dasm-plugins/patch_file.rb +95 -0
  229. data/samples/dasm-plugins/scanfuncstart.rb +36 -0
  230. data/samples/dasm-plugins/scanxrefs.rb +29 -0
  231. data/samples/dasm-plugins/selfmodify.rb +197 -0
  232. data/samples/dasm-plugins/stringsxrefs.rb +28 -0
  233. data/samples/dasmnavig.rb +1 -1
  234. data/samples/dbg-apihook.rb +24 -9
  235. data/samples/dbg-plugins/heapscan.rb +283 -0
  236. data/samples/dbg-plugins/heapscan/compiled_heapscan_lin.c +155 -0
  237. data/samples/dbg-plugins/heapscan/compiled_heapscan_win.c +128 -0
  238. data/samples/dbg-plugins/heapscan/graphheap.rb +616 -0
  239. data/samples/dbg-plugins/heapscan/heapscan.rb +709 -0
  240. data/samples/dbg-plugins/heapscan/winheap.h +174 -0
  241. data/samples/dbg-plugins/heapscan/winheap7.h +307 -0
  242. data/samples/dbg-plugins/trace_func.rb +214 -0
  243. data/samples/disassemble-gui.rb +48 -7
  244. data/samples/disassemble.rb +31 -6
  245. data/samples/dump_upx.rb +24 -12
  246. data/samples/dynamic_ruby.rb +35 -27
  247. data/samples/elfencode.rb +15 -0
  248. data/samples/emubios.rb +251 -0
  249. data/samples/emudbg.rb +127 -0
  250. data/samples/exeencode.rb +6 -5
  251. data/samples/factorize-headers-peimports.rb +1 -1
  252. data/samples/lindebug.rb +186 -391
  253. data/samples/metasm-shell.rb +68 -57
  254. data/samples/peldr.rb +2 -2
  255. data/tests/all.rb +1 -1
  256. data/tests/arc.rb +26 -0
  257. data/tests/dynldr.rb +22 -4
  258. data/tests/expression.rb +57 -0
  259. data/tests/graph_layout.rb +285 -0
  260. data/tests/ia32.rb +80 -26
  261. data/tests/mcs51.rb +27 -0
  262. data/tests/mips.rb +10 -3
  263. data/tests/preprocessor.rb +18 -0
  264. data/tests/x86_64.rb +66 -18
  265. metadata +465 -219
  266. metadata.gz.sig +2 -0
  267. data/lib/metasm/arm/opcodes.rb +0 -177
  268. data/lib/metasm/gui.rb +0 -23
  269. data/lib/metasm/gui/dasm_graph.rb +0 -1354
  270. data/lib/metasm/ia32.rb +0 -14
  271. data/lib/metasm/ia32/opcodes.rb +0 -872
  272. data/lib/metasm/ppc/parse.rb +0 -52
  273. data/lib/metasm/x86_64.rb +0 -12
  274. data/lib/metasm/x86_64/opcodes.rb +0 -118
  275. data/samples/gdbclient.rb +0 -583
  276. data/samples/rubstop.rb +0 -399
@@ -24,6 +24,10 @@ class DecodedInstruction
24
24
  attr_accessor :comment
25
25
  # a cache of the binding used by the backtracker to emulate this instruction
26
26
  attr_accessor :backtrace_binding
27
+ # used during fixed-size instruction decoding to hold the decoded raw opcode
28
+ attr_accessor :raw_data
29
+ # arbitrary data used during decoding, architecture-specific
30
+ attr_accessor :misc
27
31
 
28
32
  # create a new DecodedInstruction with an Instruction whose cpu is the argument
29
33
  # can take an existing Instruction as argument
@@ -60,7 +64,13 @@ class DecodedInstruction
60
64
  ret = []
61
65
  ret << Expression[address] << ' ' if address
62
66
  ret << @instruction
63
- ret << ' ; ' << @comment if comment
67
+ if comment
68
+ ret << ' ; '
69
+ @comment.each { |c|
70
+ ret << c << ' '
71
+ }
72
+ ret.pop
73
+ end
64
74
  ret
65
75
  end
66
76
 
@@ -98,11 +108,11 @@ class BacktraceTrace
98
108
  attr_accessor :detached
99
109
  # maxdepth at the point of the object creation
100
110
  attr_accessor :maxdepth
111
+ # disassembler cpu_context
112
+ attr_accessor :cpu_context
101
113
 
102
- def initialize(expr, origin, orig_expr, type, len=nil, maxdepth=nil)
103
- @expr, @origin, @orig_expr, @type = expr, origin, orig_expr, type
104
- @len = len if len
105
- @maxdepth = maxdepth if maxdepth
114
+ def initialize(expr, origin, orig_expr, type, len=nil, maxdepth=nil, cpu_context=nil)
115
+ @expr, @origin, @orig_expr, @type, @len, @maxdepth, @cpu_context = expr, origin, orig_expr, type, len, maxdepth, cpu_context
106
116
  end
107
117
 
108
118
  def hash ; [origin, expr].hash ; end
@@ -233,14 +243,19 @@ class DecodedFunction
233
243
  attr_accessor :finalized
234
244
  # bool, if true the function does not return (eg exit() or ExitProcess())
235
245
  attr_accessor :noreturn
246
+ # hash stackoff => varname
247
+ # varname is a single String object shared by all ExpressionStrings (to allow renames)
248
+ attr_accessor :localvars
249
+ # hash stack offset => di address
250
+ attr_accessor :localvars_xrefs
236
251
 
237
252
  # if btbind_callback is defined, calls it with args [dasm, binding, funcaddr, calladdr, expr, origin, maxdepth]
238
253
  # else update lazily the binding from expr.externals, and return backtrace_binding
239
254
  def get_backtrace_binding(dasm, funcaddr, calladdr, expr, origin, maxdepth)
240
- if btbind_callback
241
- @btbind_callback[dasm, @backtrace_binding, funcaddr, calladdr, expr, origin, maxdepth]
242
- elsif backtrace_binding and dest = @backtrace_binding[:thunk] and target = dasm.function[dest]
255
+ if backtrace_binding and dest = @backtrace_binding[:thunk] and target = dasm.function[dest]
243
256
  target.get_backtrace_binding(dasm, funcaddr, calladdr, expr, origin, maxdepth)
257
+ elsif btbind_callback
258
+ @btbind_callback[dasm, @backtrace_binding, funcaddr, calladdr, expr, origin, maxdepth]
244
259
  else
245
260
  unk_regs = expr.externals.grep(Symbol).uniq - @backtrace_binding.keys - [:unknown]
246
261
  dasm.cpu.backtrace_update_function_binding(dasm, funcaddr, self, return_address, *unk_regs) if not unk_regs.empty?
@@ -251,10 +266,10 @@ class DecodedFunction
251
266
  # if btfor_callback is defined, calls it with args [dasm, bt_for, funcaddr, calladdr]
252
267
  # else return backtracked_for
253
268
  def get_backtracked_for(dasm, funcaddr, calladdr)
254
- if btfor_callback
255
- @btfor_callback[dasm, @backtracked_for, funcaddr, calladdr]
256
- elsif backtrace_binding and dest = @backtrace_binding[:thunk] and target = dasm.function[dest]
269
+ if backtrace_binding and dest = @backtrace_binding[:thunk] and target = dasm.function[dest]
257
270
  target.get_backtracked_for(dasm, funcaddr, calladdr)
271
+ elsif btfor_callback
272
+ @btfor_callback[dasm, @backtracked_for, funcaddr, calladdr]
258
273
  else
259
274
  @backtracked_for
260
275
  end
@@ -264,9 +279,30 @@ class DecodedFunction
264
279
  @backtracked_for = []
265
280
  @backtrace_binding = {}
266
281
  end
282
+
283
+ def get_localvar_stackoff(off, di=nil, str=nil)
284
+ if di
285
+ @localvars_xrefs ||= {}
286
+ @localvars_xrefs[off] ||= []
287
+ @localvars_xrefs[off] |= [di.address]
288
+ end
289
+ @localvars ||= {}
290
+ @localvars[off] ||= (str || (off > 0 ? 'arg_%X' % off : 'var_%X' % -off))
291
+ end
267
292
  end
268
293
 
269
294
  class CPU
295
+ # decode an instruction with a dasm context
296
+ # context is a hash, should be modified inplace by the CPU
297
+ # will be passed to the next instruction(s) in the code flow
298
+ def decode_instruction_context(dasm, edata, di_addr, context)
299
+ decode_instruction(edata, di_addr)
300
+ end
301
+
302
+ # return the initial context for the disassembler, starts disassembling from addr
303
+ def disassemble_init_context(dasm, addr)
304
+ end
305
+
270
306
  # return the thing to backtrace to find +value+ before the execution of this instruction
271
307
  # eg backtrace_emu('inc eax', Expression[:eax]) => Expression[:eax + 1]
272
308
  # (the value of :eax after 'inc eax' is the value of :eax before plus 1)
@@ -275,8 +311,10 @@ class CPU
275
311
  Expression[Expression[value].bind(di.backtrace_binding ||= get_backtrace_binding(di)).reduce]
276
312
  end
277
313
 
278
- # returns a list of Expressions/Integer to backtrace to find an execution target
314
+ # return the list of jump targets for insturctions modifying the control flow
279
315
  def get_xrefs_x(dasm, di)
316
+ return [] if not di.opcode.props[:setip]
317
+ [symbolic(di.instruction.args.last, di)]
280
318
  end
281
319
 
282
320
  # returns a list of [type, address, len]
@@ -319,7 +357,7 @@ class CPU
319
357
  def replace_instr_arg_immediate(i, old, new)
320
358
  i.args.map! { |a|
321
359
  case a
322
- when Expression; Expression[a.bind(old => new).reduce]
360
+ when Expression; a == old ? new : Expression[a.bind(old => new).reduce]
323
361
  else a
324
362
  end
325
363
  }
@@ -377,6 +415,8 @@ class Disassembler
377
415
  attr_accessor :disassemble_maxblocklength
378
416
  # a cparser that parsed some C header files, prototypes are converted to DecodedFunction when jumped to
379
417
  attr_accessor :c_parser
418
+ # if false, disassembler skips internal functions with a prototype defined in a C header (eg static libraries)
419
+ attr_accessor :disassemble_known_functions
380
420
  # hash address => array of strings
381
421
  # default dasm dump will only show comments at beginning of code blocks
382
422
  attr_accessor :comment
@@ -399,6 +439,8 @@ class Disassembler
399
439
  attr_accessor :callback_finished
400
440
  # pointer to the gui widget we're displayed in
401
441
  attr_accessor :gui
442
+ # arbitrary data stored by other objects
443
+ attr_accessor :misc
402
444
 
403
445
  @@backtrace_maxblocks = 50
404
446
 
@@ -433,12 +475,14 @@ class Disassembler
433
475
  # adds a section, updates prog_binding
434
476
  # base addr is an Integer or a String (label name for offset 0)
435
477
  def add_section(encoded, base)
436
- encoded, base = base, encoded if base.kind_of? EncodedData
478
+ encoded, base = base, encoded if base.kind_of?(EncodedData)
437
479
  case base
438
480
  when ::Integer
439
481
  when ::String
440
482
  raise "invalid section base #{base.inspect} - not at section start" if encoded.export[base] and encoded.export[base] != 0
441
- raise "invalid section base #{base.inspect} - already seen at #{@prog_binding[base]}" if @prog_binding[base] and @prog_binding[base] != Expression[base]
483
+ if ed = get_edata_at(base)
484
+ ed.del_export(base)
485
+ end
442
486
  encoded.add_export base, 0
443
487
  else raise "invalid section base #{base.inspect} - expected string or integer"
444
488
  end
@@ -451,7 +495,7 @@ class Disassembler
451
495
 
452
496
  # update section_edata.reloc
453
497
  # label -> list of relocs that refers to it
454
- @inv_section_reloc = {}
498
+ @inv_section_reloc ||= {}
455
499
  @sections.each { |b, e|
456
500
  e.reloc.each { |o, r|
457
501
  r.target.externals.grep(::String).each { |ext| (@inv_section_reloc[ext] ||= []) << [b, e, o, r] }
@@ -470,7 +514,7 @@ class Disassembler
470
514
  end
471
515
  end
472
516
 
473
- # yields each xref to a given address, optionnaly restricted to a type
517
+ # yields each xref to a given address, optionaly restricted to a type
474
518
  def each_xref(addr, type=nil)
475
519
  addr = normalize addr
476
520
 
@@ -485,14 +529,16 @@ class Disassembler
485
529
 
486
530
  # add pseudo-xrefs for exe relocs
487
531
  if (not type or type == :reloc) and l = get_label_at(addr) and a = @inv_section_reloc[l]
532
+ x_more = []
488
533
  a.each { |b, e, o, r|
489
534
  addr = Expression[b]+o
490
535
  # ignore relocs embedded in an already-listed instr
491
- x << Xref.new(:reloc, addr) if not x.find { |x_|
536
+ x_more << Xref.new(:reloc, addr) if not x.find { |x_|
492
537
  next if not x_.origin or not di_at(x_.origin)
493
- (addr - x_.origin rescue 50) < @decoded[x_.origin].bin_length
538
+ (addr - x_.origin) < @decoded[x_.origin].bin_length rescue false
494
539
  }
495
540
  }
541
+ x.concat x_more
496
542
  end
497
543
 
498
544
  x.each { |x_| yield x_ }
@@ -505,16 +551,24 @@ class Disassembler
505
551
 
506
552
  # parses a C string for function prototypes
507
553
  def parse_c(str, filename=nil, lineno=1)
554
+ @c_parser_constcache = nil
508
555
  @c_parser ||= @cpu.new_cparser
509
556
  @c_parser.lexer.define_weak('__METASM__DECODE__')
510
557
  @c_parser.parse(str, filename, lineno)
558
+ rescue ParseError
559
+ @c_parser.lexer.feed! ''
560
+ raise
561
+ end
562
+
563
+ # list the constants ([name, integer value]) defined in the C code (#define / enums)
564
+ def c_constants
565
+ @c_parser_constcache ||= @c_parser.numeric_constants
511
566
  end
512
567
 
513
568
  # returns the canonical form of addr (absolute address integer or label of start of section + section offset)
514
569
  def normalize(addr)
515
570
  return addr if not addr or addr == :default
516
- addr = Expression[addr].bind(@old_prog_binding).reduce if not addr.kind_of? Integer
517
- addr %= 1 << [@cpu.size, 32].max if @cpu and addr.kind_of? Integer
571
+ addr = Expression[addr].bind(@old_prog_binding).reduce if not addr.kind_of?(Integer)
518
572
  addr
519
573
  end
520
574
 
@@ -523,18 +577,18 @@ class Disassembler
523
577
  def get_section_at(addr, memcheck=true)
524
578
  case addr = normalize(addr)
525
579
  when ::Integer
526
- if s = @sections.find { |b, e| b.kind_of? ::Integer and addr >= b and addr < b + e.length } ||
527
- @sections.find { |b, e| b.kind_of? ::Integer and addr == b + e.length } # end label
580
+ if s = @sections.find { |b, e| b.kind_of?(::Integer) and addr >= b and addr < b + e.length } ||
581
+ @sections.find { |b, e| b.kind_of?(::Integer) and addr == b + e.length } # end label
528
582
  s[1].ptr = addr - s[0]
529
583
  return if memcheck and s[1].data.respond_to?(:page_invalid?) and s[1].data.page_invalid?(s[1].ptr)
530
584
  [s[1], s[0]]
531
585
  end
532
586
  when Expression
533
- if addr.op == :+ and addr.rexpr.kind_of? ::Integer and addr.rexpr >= 0 and addr.lexpr.kind_of? ::String and e = @sections[addr.lexpr]
587
+ if addr.op == :+ and addr.rexpr.kind_of?(::Integer) and addr.rexpr >= 0 and addr.lexpr.kind_of?(::String) and e = @sections[addr.lexpr]
534
588
  e.ptr = addr.rexpr
535
589
  return if memcheck and e.data.respond_to?(:page_invalid?) and e.data.page_invalid?(e.ptr)
536
590
  [e, Expression[addr.lexpr]]
537
- elsif addr.op == :+ and addr.rexpr.kind_of? ::String and not addr.lexpr and e = @sections[addr.rexpr]
591
+ elsif addr.op == :+ and addr.rexpr.kind_of?(::String) and not addr.lexpr and e = @sections[addr.rexpr]
538
592
  e.ptr = 0
539
593
  return if memcheck and e.data.respond_to?(:page_invalid?) and e.data.page_invalid?(e.ptr)
540
594
  [e, addr.rexpr]
@@ -551,12 +605,14 @@ class Disassembler
551
605
  return if addrstr !~ /^\w+$/
552
606
  e, b = get_section_at(addr)
553
607
  if not e
554
- l = Expression[addr].reduce_rec if Expression[addr].reduce_rec.kind_of? ::String
555
- l ||= addrstr if addr.kind_of? Expression and addr.externals.grep(::Symbol).empty?
608
+ l = Expression[addr].reduce_rec if Expression[addr].reduce_rec.kind_of?(::String)
609
+ l ||= addrstr if addr.kind_of?(Expression) and addr.externals.grep(::Symbol).empty?
556
610
  elsif not l = e.inv_export[e.ptr]
557
611
  l = @program.new_label(addrstr)
558
612
  e.add_export l, e.ptr
559
- @label_alias_cache = nil
613
+ if @label_alias_cache ||= nil
614
+ (@label_alias_cache[b + e.ptr] ||= []) << l
615
+ end
560
616
  @old_prog_binding[l] = @prog_binding[l] = b + e.ptr
561
617
  elsif rewritepfx.find { |p| base != p and addrstr.sub(base, p) == l }
562
618
  newl = addrstr
@@ -568,6 +624,7 @@ class Disassembler
568
624
  end
569
625
 
570
626
  # returns a hash associating addr => list of labels at this addr
627
+ # label_alias[a] may be nil if a new label is created elsewhere in the edata with the same name
571
628
  def label_alias
572
629
  if not @label_alias_cache
573
630
  @label_alias_cache = {}
@@ -597,19 +654,24 @@ class Disassembler
597
654
  return false
598
655
  elsif @addrs_todo.empty?
599
656
  ep = entrypoints.shift
600
- l = auto_label_at(normalize(ep), 'entrypoint')
657
+ cpu_context = get_initial_cpu_context(ep)
658
+ l = auto_label_at(normalize(ep), 'entrypoint') || normalize(ep)
601
659
  puts "start disassemble from #{l} (#{entrypoints.length})" if $VERBOSE and not entrypoints.empty?
602
660
  @entrypoints << l
603
- @addrs_todo << [ep]
661
+ @addrs_todo << { :addr => ep, :cpu_context => cpu_context }
604
662
  else
605
663
  disassemble_step
606
664
  end
607
665
  true
608
666
  end
609
667
 
668
+ def get_initial_cpu_context(addr)
669
+ @cpu.disassemble_init_context(self, addr)
670
+ end
671
+
610
672
  def post_disassemble
611
673
  @decoded.each_value { |di|
612
- next if not di.kind_of? DecodedInstruction
674
+ next if not di.kind_of?(DecodedInstruction)
613
675
  next if not di.opcode or not di.opcode.props[:saveip]
614
676
  if not di.block.to_subfuncret
615
677
  di.add_comment 'noreturn'
@@ -622,17 +684,16 @@ class Disassembler
622
684
  if not f.finalized
623
685
  f.finalized = true
624
686
  puts " finalize subfunc #{Expression[addr]}" if debug_backtrace
625
- @cpu.backtrace_update_function_binding(self, addr, f, f.return_address)
687
+ backtrace_update_function_binding(addr, f)
626
688
  if not f.return_address
627
689
  detect_function_thunk(addr)
628
690
  end
629
691
  end
630
- @comment[addr] ||= []
631
692
  bd = f.backtrace_binding.reject { |k, v| Expression[k] == Expression[v] or Expression[v] == Expression::Unknown }
632
693
  unk = f.backtrace_binding.map { |k, v| k if v == Expression::Unknown }.compact
633
694
  bd[unk.map { |u| Expression[u].to_s }.sort.join(',')] = Expression::Unknown if not unk.empty?
634
- @comment[addr] |= ["function binding: " + bd.map { |k, v| "#{k} -> #{v}" }.sort.join(', ')]
635
- @comment[addr] |= ["function ends at " + f.return_address.map { |ra| Expression[ra] }.join(', ')] if f.return_address
695
+ add_comment(addr, "function binding: " + bd.map { |k, v| "#{k} -> #{v}" }.sort.join(', '))
696
+ add_comment(addr, "function ends at " + f.return_address.map { |ra| Expression[ra] }.join(', ')) if f.return_address
636
697
  }
637
698
  end
638
699
 
@@ -640,25 +701,26 @@ puts " finalize subfunc #{Expression[addr]}" if debug_backtrace
640
701
  # adds next addresses to handle to addrs_todo
641
702
  # if @function[:default] exists, jumps to unknows locations are interpreted as to @function[:default]
642
703
  def disassemble_step
643
- return if not todo = @addrs_todo.pop or @addrs_done.include? todo
644
- @addrs_done << todo if todo[1]
704
+ return if not x = @addrs_todo.pop or @addrs_done.include?(x)
705
+ @addrs_done << x if x[:from]
645
706
 
646
- # from_sfret is true if from is the address of a function call that returns to addr
647
- addr, from, from_subfuncret = todo
707
+ addr = x[:addr]
708
+ from = x[:from]
709
+ # from_subfuncret is true if from is the address of a function call that returns to addr
648
710
 
649
711
  return if from == Expression::Unknown
650
712
 
651
- puts "disassemble_step #{Expression[addr]} #{Expression[from] if from} #{from_subfuncret} (/#{@addrs_todo.length})" if $DEBUG
713
+ puts "disassemble_step #{Expression[addr]} #{Expression[from] if from} #{x[:from_subfuncret]} (/#{@addrs_todo.length})" if $DEBUG
652
714
 
653
715
  addr = normalize(addr)
654
716
 
655
- if from and from_subfuncret and di_at(from)
717
+ if from and x[:from_subfuncret] and di_at(from)
656
718
  @decoded[from].block.each_to_normal { |subfunc|
657
719
  subfunc = normalize(subfunc)
658
720
  next if not f = @function[subfunc] or f.finalized
659
721
  f.finalized = true
660
722
  puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
661
- @cpu.backtrace_update_function_binding(self, subfunc, f, f.return_address)
723
+ backtrace_update_function_binding(subfunc, f)
662
724
  if not f.return_address
663
725
  detect_function_thunk(subfunc)
664
726
  end
@@ -666,27 +728,36 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
666
728
  end
667
729
 
668
730
  if di = @decoded[addr]
669
- if di.kind_of? DecodedInstruction
670
- split_block(di.block, di.address) if not di.block_head? # this updates di.block
671
- di.block.add_from(from, from_subfuncret ? :subfuncret : :normal) if from and from != :default
731
+ if di.kind_of?(DecodedInstruction)
732
+ split_block(di.block, di.address, true) if not di.block_head? # this updates di.block
733
+ di.block.add_from(from, x[:from_subfuncret] ? :subfuncret : :normal) if from and from != :default
672
734
  bf = di.block
673
735
  elsif di == true
674
736
  bf = @function[addr]
675
737
  end
676
- elsif bf = @function[addr]
738
+ elsif from and bf = @function[addr]
677
739
  detect_function_thunk_noreturn(from) if bf.noreturn
678
740
  elsif s = get_section_at(addr)
679
- block = InstructionBlock.new(normalize(addr), s[0])
680
- block.add_from(from, from_subfuncret ? :subfuncret : :normal) if from and from != :default
681
- disassemble_block(block)
682
- elsif from and c_parser and name = Expression[addr].reduce_rec and name.kind_of? ::String and
683
- s = c_parser.toplevel.symbol[name] and s.type.untypedef.kind_of? C::Function
684
- bf = @function[addr] = @cpu.decode_c_function_prototype(@c_parser, s)
741
+ if from and c_parser and not disassemble_known_functions and name = get_all_labels_at(addr).find { |n|
742
+ cs = c_parser.toplevel.symbol[n] and cs.type.untypedef.kind_of?(C::Function) }
743
+ # do not disassemble internal function for which we have a prototype (eg static library)
744
+ puts "found known function #{name} at #{Expression[addr]}" if $VERBOSE
745
+ bf = @function[addr] = @cpu.decode_c_function_prototype(@c_parser, c_parser.toplevel.symbol[name])
746
+ detect_function_thunk_noreturn(from) if bf.noreturn
747
+ else
748
+ block = InstructionBlock.new(normalize(addr), s[0])
749
+ block.add_from(from, x[:from_subfuncret] ? :subfuncret : :normal) if from and from != :default
750
+ disassemble_block(block, x[:cpu_context])
751
+ end
752
+ elsif from and c_parser and name = Expression[addr].reduce_rec and name.kind_of?(::String) and
753
+ cs = c_parser.toplevel.symbol[name] and cs.type.untypedef.kind_of?(C::Function)
754
+ # use C header prototype for external functions if available
755
+ bf = @function[addr] = @cpu.decode_c_function_prototype(@c_parser, cs)
685
756
  detect_function_thunk_noreturn(from) if bf.noreturn
686
- elsif from
757
+ elsif from and not @function[addr]
687
758
  if bf = @function[:default]
688
759
  puts "using default function for #{Expression[addr]} from #{Expression[from]}" if $DEBUG
689
- if name = Expression[addr].reduce_rec and name.kind_of? ::String
760
+ if name = Expression[addr].reduce_rec and name.kind_of?(::String)
690
761
  @function[addr] = @function[:default].dup
691
762
  else
692
763
  addr = :default
@@ -706,7 +777,7 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
706
777
  end
707
778
 
708
779
  if bf and from and from != :default
709
- if bf.kind_of? DecodedFunction
780
+ if bf.kind_of?(DecodedFunction)
710
781
  bff = bf.get_backtracked_for(self, addr, from)
711
782
  else
712
783
  bff = bf.backtracked_for
@@ -714,37 +785,39 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
714
785
  end
715
786
  bff.each { |btt|
716
787
  next if btt.address
717
- if @decoded[from].kind_of? DecodedInstruction and @decoded[from].opcode.props[:saveip] and not from_subfuncret and not @function[addr]
718
- backtrace_check_found(btt.expr, @decoded[addr], btt.origin, btt.type, btt.len, btt.maxdepth, btt.detached)
788
+ if @decoded[from].kind_of?(DecodedInstruction) and @decoded[from].opcode.props[:saveip] and not x[:from_subfuncret] and not @function[addr]
789
+ backtrace_check_found(btt.expr, @decoded[addr], btt.origin, btt.type, btt.len, btt.maxdepth, btt.detached, btt.cpu_context)
719
790
  end
720
791
  next if backtrace_check_funcret(btt, addr, from)
721
792
  backtrace(btt.expr, from,
722
- :include_start => true, :from_subfuncret => from_subfuncret,
793
+ :include_start => true, :from_subfuncret => x[:from_subfuncret],
723
794
  :origin => btt.origin, :orig_expr => btt.orig_expr, :type => btt.type,
724
- :len => btt.len, :detached => btt.detached, :maxdepth => btt.maxdepth)
795
+ :len => btt.len, :detached => btt.detached, :maxdepth => btt.maxdepth, :cpu_context => btt.cpu_context)
725
796
  } if bff
726
797
  end
727
798
 
728
799
  # splits an InstructionBlock, updates the blocks backtracked_for
729
- def split_block(block, address=nil)
800
+ def split_block(block, address=nil, rebacktrace=false)
730
801
  if not address # invoked as split_block(0x401012)
731
- return if not @decoded[block].kind_of? DecodedInstruction
802
+ return if not @decoded[block].kind_of?(DecodedInstruction)
732
803
  block, address = @decoded[block].block, block
733
804
  end
734
805
  return block if address == block.address
735
806
  new_b = block.split address
736
- new_b.backtracked_for.dup.each { |btt|
737
- backtrace(btt.expr, btt.address,
738
- :only_upto => block.list.last.address,
739
- :include_start => !btt.exclude_instr, :from_subfuncret => btt.from_subfuncret,
740
- :origin => btt.origin, :orig_expr => btt.orig_expr, :type => btt.type, :len => btt.len,
741
- :detached => btt.detached, :maxdepth => btt.maxdepth)
742
- }
807
+ if rebacktrace
808
+ new_b.backtracked_for.dup.each { |btt|
809
+ backtrace(btt.expr, btt.address,
810
+ :only_upto => block.list.last.address,
811
+ :include_start => !btt.exclude_instr, :from_subfuncret => btt.from_subfuncret,
812
+ :origin => btt.origin, :orig_expr => btt.orig_expr, :type => btt.type, :len => btt.len,
813
+ :detached => btt.detached, :maxdepth => btt.maxdepth, :cpu_context => btt.cpu_context)
814
+ }
815
+ end
743
816
  new_b
744
817
  end
745
818
 
746
819
  # disassembles a new instruction block at block.address (must be normalized)
747
- def disassemble_block(block)
820
+ def disassemble_block(block, cpu_context)
748
821
  raise if not block.list.empty?
749
822
  di_addr = block.address
750
823
  delay_slot = nil
@@ -763,8 +836,7 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
763
836
  each_xref(waddr, :w) { |x|
764
837
  #next if off + x.len < 0
765
838
  puts "W: disasm: self-modifying code at #{Expression[waddr]}" if $VERBOSE
766
- @comment[di_addr] ||= []
767
- @comment[di_addr] |= ["overwritten by #{@decoded[x.origin]}"]
839
+ add_comment(di_addr, "overwritten by #{@decoded[x.origin]}")
768
840
  @callback_selfmodifying[di_addr] if callback_selfmodifying
769
841
  return
770
842
  }
@@ -773,9 +845,11 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
773
845
 
774
846
  # decode instruction
775
847
  block.edata.ptr = di_addr - block.address + block.edata_ptr
776
- if not di = @cpu.decode_instruction(block.edata, di_addr)
848
+ cpu_context = cpu_context.dup if cpu_context
849
+ if not di = @cpu.decode_instruction_context(self, block.edata, di_addr, cpu_context)
777
850
  ed = block.edata
778
- puts "#{ed.ptr >= ed.length ? "end of section reached" : "unknown instruction #{ed.data[di_addr-block.address+block.edata_ptr, 4].to_s.unpack('H*')}"} at #{Expression[di_addr]}" if $VERBOSE
851
+ break if ed.ptr >= ed.length and get_section_at(di_addr) and di = block.list.last
852
+ puts "#{ed.ptr >= ed.length ? "end of section reached" : "unknown instruction #{ed.data[di_addr-block.address+block.edata_ptr, 4].to_s.unpack('H*').first}"} at #{Expression[di_addr]}" if $VERBOSE
779
853
  return
780
854
  end
781
855
 
@@ -783,7 +857,18 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
783
857
  block.add_di di
784
858
  puts di if $DEBUG
785
859
 
786
- di = @callback_newinstr[di] if callback_newinstr
860
+ if callback_newinstr
861
+ ndi = @callback_newinstr[di]
862
+ if not ndi or not ndi.block
863
+ block.list.delete di
864
+ if ndi
865
+ block.add_di ndi
866
+ ndi.bin_length = di.bin_length if ndi.bin_length == 0
867
+ @decoded[di_addr] = ndi
868
+ end
869
+ end
870
+ di = ndi
871
+ end
787
872
  return if not di
788
873
  block = di.block
789
874
 
@@ -793,7 +878,7 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
793
878
 
794
879
  if not di_addr or di.opcode.props[:stopexec] or not @program.get_xrefs_x(self, di).empty?
795
880
  # do not backtrace until delay slot is finished (eg MIPS: di is a
796
- # ret and the delay slot holds stack fixup needed to calc func_binding)
881
+ # ret and the delay slot holds stack fixup needed to calc func_binding)
797
882
  # XXX if the delay slot is also xref_x or :stopexec it is ignored
798
883
  delay_slot ||= [di, @cpu.delay_slot(di)]
799
884
  end
@@ -801,18 +886,23 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
801
886
  if delay_slot
802
887
  di, delay = delay_slot
803
888
  if delay == 0 or not di_addr
804
- backtrace_xrefs_di_x(di)
889
+ backtrace_xrefs_di_x(di, cpu_context)
805
890
  if di.opcode.props[:stopexec] or not di_addr; return
806
891
  else break
807
892
  end
808
893
  end
809
894
  delay_slot[1] = delay - 1
810
895
  end
896
+
897
+ if block.edata.inv_export[di_addr - block.address + block.edata_ptr]
898
+ # ensure there is a block split if we have a label defined
899
+ break
900
+ end
811
901
  }
812
902
 
813
903
  ar = [di_addr]
814
904
  ar = @callback_newaddr[block.list.last.address, ar] || ar if callback_newaddr
815
- ar.each { |di_addr_| backtrace(di_addr_, di.address, :origin => di.address, :type => :x) }
905
+ ar.each { |di_addr_| backtrace(di_addr_, di.address, :origin => di.address, :type => :x, :cpu_context => cpu_context) }
816
906
 
817
907
  block
818
908
  end
@@ -834,51 +924,60 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
834
924
  @entrypoints ||= []
835
925
  @entrypoints |= entrypoints
836
926
 
837
- entrypoints.each { |ep| do_disassemble_fast_deep(normalize(ep)) }
927
+ entrypoints.each { |ep| do_disassemble_fast_deep(:addr => normalize(ep)) }
928
+
929
+ @callback_finished[] if callback_finished
838
930
  end
839
931
 
840
932
  def do_disassemble_fast_deep(ep)
841
933
  disassemble_fast(ep) { |fa, di|
842
- fa = normalize(fa)
843
- do_disassemble_fast_deep(fa)
844
- if di and ndi = di_at(fa)
845
- ndi.block.add_from_normal(di.address)
846
- end
934
+ do_disassemble_fast_deep(:addr => normalize(fa), :from => di.address)
847
935
  }
848
936
  end
849
937
 
850
938
  # disassembles fast from a list of entrypoints
851
939
  # see disassemble_fast_step
852
940
  def disassemble_fast(entrypoint, maxdepth=-1, &b)
853
- ep = [entrypoint]
854
- until ep.empty?
855
- disassemble_fast_step(ep, &b)
941
+ td = entrypoint
942
+ td = { :addr => entrypoint } unless td.kind_of?(::Hash)
943
+ td[:cpu_context] ||= get_initial_cpu_context(td[:addr])
944
+ todo = [td]
945
+ until todo.empty?
946
+ disassemble_fast_step(todo, &b)
856
947
  maxdepth -= 1
857
- ep.delete_if { |a| not @decoded[normalize(a[0])] } if maxdepth == 0
948
+ todo.delete_if { |a| not @decoded[normalize(a[:addr])] } if maxdepth == 0
858
949
  end
859
- check_noreturn_function(entrypoint)
950
+ check_noreturn_function(td[:addr])
860
951
  end
861
952
 
862
953
  # disassembles one block from the ary, see disassemble_fast_block
863
954
  def disassemble_fast_step(todo, &b)
864
955
  return if not x = todo.pop
865
- addr, from, from_subfuncret = x
866
956
 
867
- addr = normalize(addr)
957
+ addr = normalize(x[:addr])
868
958
 
869
959
  if di = @decoded[addr]
870
- if di.kind_of? DecodedInstruction
960
+ if di.kind_of?(DecodedInstruction)
871
961
  split_block(di.block, di.address) if not di.block_head?
872
- di.block.add_from(from, from_subfuncret ? :subfuncret : :normal) if from and from != :default
962
+ di.block.add_from(x[:from], x[:from_subfuncret] ? :subfuncret : :normal) if x[:from] and x[:from] != :default
873
963
  end
964
+ elsif @function[addr] and x[:from]
874
965
  elsif s = get_section_at(addr)
875
- block = InstructionBlock.new(normalize(addr), s[0])
876
- block.add_from(from, from_subfuncret ? :subfuncret : :normal) if from and from != :default
877
- todo.concat disassemble_fast_block(block, &b)
878
- elsif name = Expression[addr].reduce_rec and name.kind_of? ::String and not @function[addr]
879
- if c_parser and s = c_parser.toplevel.symbol[name] and s.type.untypedef.kind_of? C::Function
880
- @function[addr] = @cpu.decode_c_function_prototype(@c_parser, s)
881
- detect_function_thunk_noreturn(from) if @function[addr].noreturn
966
+ if x[:from] and c_parser and not disassemble_known_functions and name = get_all_labels_at(addr).find { |n|
967
+ cs = c_parser.toplevel.symbol[n] and cs.type.untypedef.kind_of?(C::Function) }
968
+ # do not disassemble internal function for which we have a prototype (eg static library)
969
+ puts "found known function #{name} at #{Expression[addr]}" if $VERBOSE
970
+ @function[addr] = @cpu.decode_c_function_prototype(@c_parser, c_parser.toplevel.symbol[name])
971
+ detect_function_thunk_noreturn(x[:from]) if @function[addr].noreturn
972
+ else
973
+ block = InstructionBlock.new(addr, s[0])
974
+ block.add_from(x[:from], x[:from_subfuncret] ? :subfuncret : :normal) if x[:from] and x[:from] != :default
975
+ todo.concat disassemble_fast_block(block, x[:cpu_context], &b)
976
+ end
977
+ elsif name = Expression[addr].reduce_rec and name.kind_of?(::String) and not @function[addr]
978
+ if c_parser and cs = c_parser.toplevel.symbol[name] and cs.type.untypedef.kind_of?(C::Function)
979
+ @function[addr] = @cpu.decode_c_function_prototype(@c_parser, cs)
980
+ detect_function_thunk_noreturn(x[:from]) if @function[addr].noreturn
882
981
  elsif @function[:default]
883
982
  @function[addr] = @function[:default].dup
884
983
  end
@@ -889,15 +988,14 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
889
988
 
890
989
  # check if an addr has an xref :x from a :saveip, if so mark as Function
891
990
  def disassemble_fast_checkfunc(addr)
892
- if @decoded[addr].kind_of? DecodedInstruction and not @function[addr]
991
+ if @decoded[addr].kind_of?(DecodedInstruction) and not @function[addr]
893
992
  func = false
894
993
  each_xref(addr, :x) { |x_|
895
994
  func = true if odi = di_at(x_.origin) and odi.opcode.props[:saveip]
896
995
  }
897
996
  if func
898
997
  auto_label_at(addr, 'sub', 'loc', 'xref')
899
- # XXX use default_btbind_callback ?
900
- @function[addr] = DecodedFunction.new
998
+ @function[addr] = (@function[:default] || DecodedFunction.new).dup
901
999
  @function[addr].finalized = true
902
1000
  detect_function_thunk(addr)
903
1001
  puts "found new function #{get_label_at(addr)} at #{Expression[addr]}" if $VERBOSE
@@ -909,11 +1007,11 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
909
1007
  # does not recurse into subfunctions
910
1008
  # assumes all :saveip returns, except those pointing to a subfunc with noreturn
911
1009
  # yields subfunction addresses (targets of :saveip)
912
- # only backtrace for :x with maxdepth 1 (ie handles only basic push+ret)
1010
+ # no backtrace for :x (change with backtrace_maxblocks_fast)
913
1011
  # returns a todo-style ary
914
1012
  # assumes @addrs_todo is empty
915
- def disassemble_fast_block(block, &b)
916
- block = InstructionBlock.new(normalize(block), get_section_at(block)[0]) if not block.kind_of? InstructionBlock
1013
+ def disassemble_fast_block(block, cpu_context, &b)
1014
+ block = InstructionBlock.new(normalize(block), get_section_at(block)[0]) if not block.kind_of?(InstructionBlock)
917
1015
  di_addr = block.address
918
1016
  delay_slot = nil
919
1017
  di = nil
@@ -926,7 +1024,9 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
926
1024
 
927
1025
  # decode instruction
928
1026
  block.edata.ptr = di_addr - block.address + block.edata_ptr
929
- if not di = @cpu.decode_instruction(block.edata, di_addr)
1027
+ cpu_context = cpu_context.dup if cpu_context
1028
+ if not di = @cpu.decode_instruction_context(self, block.edata, di_addr, cpu_context)
1029
+ break if block.edata.ptr >= block.edata.length and get_section_at(di_addr) and di = block.list.last
930
1030
  return ret
931
1031
  end
932
1032
 
@@ -934,7 +1034,18 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
934
1034
  block.add_di di
935
1035
  puts di if $DEBUG
936
1036
 
937
- di = @callback_newinstr[di] if callback_newinstr
1037
+ if callback_newinstr
1038
+ ndi = @callback_newinstr[di]
1039
+ if not ndi or not ndi.block
1040
+ block.list.delete di
1041
+ if ndi
1042
+ block.add_di ndi
1043
+ ndi.bin_length = di.bin_length if ndi.bin_length == 0
1044
+ @decoded[di_addr] = ndi
1045
+ end
1046
+ end
1047
+ di = ndi
1048
+ end
938
1049
  return ret if not di
939
1050
 
940
1051
  di_addr = di.next_addr
@@ -942,13 +1053,15 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
942
1053
  if di.opcode.props[:stopexec] or di.opcode.props[:setip]
943
1054
  if di.opcode.props[:setip]
944
1055
  @addrs_todo = []
945
- @program.get_xrefs_x(self, di).each { |expr|
946
- backtrace(expr, di.address, :origin => di.address, :type => :x, :maxdepth => @backtrace_maxblocks_fast)
1056
+ ar = @program.get_xrefs_x(self, di)
1057
+ ar = @callback_newaddr[di.address, ar] || ar if callback_newaddr
1058
+ ar.each { |expr|
1059
+ backtrace(expr, di.address, :origin => di.address, :type => :x, :maxdepth => @backtrace_maxblocks_fast, :cpu_context => cpu_context)
947
1060
  }
948
1061
  end
949
1062
  if di.opcode.props[:saveip]
950
1063
  @addrs_todo = []
951
- ret.concat disassemble_fast_block_subfunc(di, &b)
1064
+ ret.concat disassemble_fast_block_subfunc(di, cpu_context, &b)
952
1065
  else
953
1066
  ret.concat @addrs_todo
954
1067
  @addrs_todo = []
@@ -965,12 +1078,17 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
965
1078
  end
966
1079
  }
967
1080
 
968
- di.block.add_to_normal(di_addr)
969
- ret << [di_addr, di.address]
1081
+ ar = [di_addr]
1082
+ ar = @callback_newaddr[block.list.last.address, ar] || ar if callback_newaddr
1083
+ ar.each { |a|
1084
+ di.block.add_to_normal(a)
1085
+ ret << { :addr => a, :from => di.address, :cpu_context => cpu_context }
1086
+ }
1087
+ ret
970
1088
  end
971
1089
 
972
1090
  # handles when disassemble_fast encounters a call to a subfunction
973
- def disassemble_fast_block_subfunc(di)
1091
+ def disassemble_fast_block_subfunc(di, cpu_context)
974
1092
  funcs = di.block.to_normal.to_a
975
1093
  do_ret = funcs.empty?
976
1094
  ret = []
@@ -983,10 +1101,10 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
983
1101
  # this includes retaddr unless f is noreturn
984
1102
  bf.each { |btt|
985
1103
  next if btt.type != :x
986
- bt = backtrace(btt.expr, di.address, :include_start => true, :origin => btt.origin, :maxdepth => [@backtrace_maxblocks_fast, 1].max)
1104
+ bt = backtrace(btt.expr, di.address, :include_start => true, :origin => btt.origin, :maxdepth => [@backtrace_maxblocks_fast, 1].max, :cpu_context => cpu_context)
987
1105
  if btt.detached
988
- ret.concat bt # callback argument
989
- elsif bt.find { |a| normalize(a) == na }
1106
+ ret.concat bt.map { |a| { :addr => a } } # callback argument
1107
+ elsif not f.noreturn and bt.find { |a| normalize(a) == na }
990
1108
  do_ret = true
991
1109
  end
992
1110
  }
@@ -996,9 +1114,10 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
996
1114
  }
997
1115
  if do_ret
998
1116
  di.block.add_to_subfuncret(na)
999
- ret << [na, di.address, true]
1117
+ ret << { :addr => na, :from => di.address, :from_subfuncret => true, :cpu_context => cpu_context }
1000
1118
  di.block.add_to_normal :default if not di.block.to_normal and @function[:default]
1001
1119
  end
1120
+ di.add_comment 'noreturn' if ret.empty?
1002
1121
  ret
1003
1122
  end
1004
1123
 
@@ -1021,10 +1140,10 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
1021
1140
  end
1022
1141
 
1023
1142
  # trace xrefs for execution
1024
- def backtrace_xrefs_di_x(di)
1143
+ def backtrace_xrefs_di_x(di, cpu_context)
1025
1144
  ar = @program.get_xrefs_x(self, di)
1026
1145
  ar = @callback_newaddr[di.address, ar] || ar if callback_newaddr
1027
- ar.each { |expr| backtrace(expr, di.address, :origin => di.address, :type => :x) }
1146
+ ar.each { |expr| backtrace(expr, di.address, :origin => di.address, :type => :x, :cpu_context => cpu_context) }
1028
1147
  end
1029
1148
 
1030
1149
  # checks if the function starting at funcaddr is an external function thunk (eg jmp [SomeExtFunc])
@@ -1032,12 +1151,12 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
1032
1151
  # which must not have return_addresses
1033
1152
  # returns the new thunk name if it was changed
1034
1153
  def detect_function_thunk(funcaddr)
1035
- # check thunk linearity (no conditionnal branch etc)
1154
+ # check thunk linearity (no conditional branch etc)
1036
1155
  addr = funcaddr
1037
1156
  count = 0
1038
1157
  while b = block_at(addr)
1039
1158
  count += 1
1040
- return if count > 5 or b.list.length > 4
1159
+ return if count > 5 or b.list.length > 5
1041
1160
  if b.to_subfuncret and not b.to_subfuncret.empty?
1042
1161
  return if b.to_subfuncret.length != 1
1043
1162
  addr = normalize(b.to_subfuncret.first)
@@ -1047,7 +1166,7 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
1047
1166
  return if not btb = sf.backtrace_binding
1048
1167
  btb = btb.dup
1049
1168
  btb.delete_if { |k, v| Expression[k] == Expression[v] }
1050
- return if btb.length > 2 or btb.values.include? Expression::Unknown
1169
+ return if btb.length > 2 or btb.values.include? Expression::Unknown
1051
1170
  else
1052
1171
  return if not bt = b.to_normal
1053
1172
  if bt.include? :default
@@ -1065,7 +1184,7 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
1065
1184
  f.backtrace_binding = { :thunk => addr }
1066
1185
  f.noreturn = true if @function[addr] and @function[addr].noreturn
1067
1186
  end
1068
- return if not fname.kind_of? ::String
1187
+ return if not fname.kind_of?(::String)
1069
1188
  l = auto_label_at(funcaddr, 'sub', 'loc')
1070
1189
  return if l[0, 4] != 'sub_'
1071
1190
  puts "found thunk for #{fname} at #{Expression[funcaddr]}" if $DEBUG
@@ -1103,14 +1222,14 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
1103
1222
  # should only be called with fa = target of a call
1104
1223
  def check_noreturn_function(fa)
1105
1224
  fb = function_blocks(fa, false, false)
1225
+ return if fb.empty?
1106
1226
  lasts = fb.keys.find_all { |k| fb[k] == [] }
1107
- return if lasts.empty?
1108
1227
  if lasts.all? { |la|
1109
1228
  b = block_at(la)
1110
1229
  next if not di = b.list.last
1111
1230
  (di.opcode.props[:saveip] and b.to_normal.to_a.all? { |tfa|
1112
1231
  tf = function_at(tfa) and tf.noreturn
1113
- }) or (di.opcode.props[:stopexec] and not di.opcode.props[:setip])
1232
+ }) or (di.opcode.props[:stopexec] and not (di.opcode.props[:setip] or not get_xrefs_x(di).empty?))
1114
1233
  }
1115
1234
  # yay
1116
1235
  @function[fa] ||= DecodedFunction.new
@@ -1165,7 +1284,7 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
1165
1284
  # (defaults to dasm.backtrace_maxblocks, which defaults do Dasm.backtrace_maxblocks)
1166
1285
  def backtrace_walk(obj, addr, include_start, from_subfuncret, stopaddr, maxdepth)
1167
1286
  start_addr = normalize(addr)
1168
- stopaddr = [stopaddr] if stopaddr and not stopaddr.kind_of? ::Array
1287
+ stopaddr = [stopaddr] if stopaddr and not stopaddr.kind_of?(::Array)
1169
1288
 
1170
1289
  # array of [obj, addr, from_subfuncret, loopdetect]
1171
1290
  # loopdetect is an array of [obj, addr, from_type] of each end of block encountered
@@ -1192,7 +1311,7 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
1192
1311
  next if f_type == :indirect
1193
1312
  hadsomething = true
1194
1313
  o_f_addr = f_addr
1195
- f_addr = @decoded[f_addr].block.list.last.address if @decoded[f_addr].kind_of? DecodedInstruction # delay slot
1314
+ f_addr = @decoded[f_addr].block.list.last.address if @decoded[f_addr].kind_of?(DecodedInstruction) # delay slot
1196
1315
  if l = w_loopdetect.find { |l_obj, l_addr, l_type| l_addr == f_addr and l_type == f_type }
1197
1316
  f_obj = yield(:loop, w_obj, :looptrace => w_loopdetect[w_loopdetect.index(l)..-1], :loopdetect => w_loopdetect)
1198
1317
  if f_obj and f_obj != w_obj # should avoid infinite loops
@@ -1206,7 +1325,7 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
1206
1325
  f_loopdetect ||= w_loopdetect
1207
1326
  # only count non-trivial paths in loopdetect (ignore linear links)
1208
1327
  add_detect = [[f_obj, f_addr, f_type]]
1209
- add_detect = [] if @decoded[f_addr].kind_of? DecodedInstruction and tmp = @decoded[f_addr].block and
1328
+ add_detect = [] if @decoded[f_addr].kind_of?(DecodedInstruction) and tmp = @decoded[f_addr].block and
1210
1329
  ((w_di.block.from_subfuncret.to_a == [] and w_di.block.from_normal == [f_addr] and
1211
1330
  tmp.to_normal == [w_di.address] and tmp.to_subfuncret.to_a == []) or
1212
1331
  (w_di.block.from_subfuncret == [f_addr] and tmp.to_subfuncret == [w_di.address]))
@@ -1219,7 +1338,7 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
1219
1338
  each_xref(w_addr, :x) { |x|
1220
1339
  f_addr = x.origin
1221
1340
  o_f_addr = f_addr
1222
- f_addr = @decoded[f_addr].block.list.last.address if @decoded[f_addr].kind_of? DecodedInstruction # delay slot
1341
+ f_addr = @decoded[f_addr].block.list.last.address if @decoded[f_addr].kind_of?(DecodedInstruction) # delay slot
1223
1342
  if l = w_loopdetect.find { |l_obj, l_addr, l_type| l_addr == w_addr }
1224
1343
  f_obj = yield(:loop, w_obj, :looptrace => w_loopdetect[w_loopdetect.index(l)..-1], :loopdetect => w_loopdetect)
1225
1344
  if f_obj and f_obj != w_obj
@@ -1291,6 +1410,88 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
1291
1410
  end
1292
1411
  end
1293
1412
 
1413
+ # iterates over all instructions of a function from a given entrypoint
1414
+ # carries an object while walking, the object is yielded every instruction
1415
+ # every block is walked only once, after all previous blocks are done (if possible)
1416
+ # on a 'jz', a [:clone] event is yielded for every path beside the first
1417
+ # on a juction (eg a -> b -> d, a -> c -> d), a [:merge] event occurs if froms have different objs
1418
+ # event list:
1419
+ # [:di, <addr>, <decoded_instruction>, <object>]
1420
+ # [:clone, <newaddr>, <oldaddr>, <object>]
1421
+ # [:merge, <newaddr>, {<oldaddr1> => <object1>, <oldaddr2> => <object2>, ...}, <object1>]
1422
+ # [:subfunc, <subfunc_addr>, <call_addr>, <object>]
1423
+ # all events should return an object
1424
+ # :merge has a copy of object1 at the end so that uninterested callers can always return args[-1]
1425
+ # if an event returns false, the trace stops for the current branch
1426
+ def function_walk(addr_start, obj_start)
1427
+ # addresses of instrs already seen => obj
1428
+ done = {}
1429
+ todo = [[addr_start, obj_start]]
1430
+
1431
+ while hop = todo.pop
1432
+ addr, obj = hop
1433
+ next if done.has_key?(done)
1434
+
1435
+ di = di_at(addr)
1436
+ next if not di
1437
+
1438
+ if done.empty?
1439
+ dilist = di.block.list[di.block.list.index(di)..-1]
1440
+ else
1441
+ # new block, check all 'from' have been seen
1442
+ if not hop[2]
1443
+ # may retry later
1444
+ all_ok = true
1445
+ di.block.each_from_samefunc(self) { |fa| all_ok = false unless done.has_key?(fa) }
1446
+ if not all_ok
1447
+ todo.unshift([addr, obj, true])
1448
+ next
1449
+ end
1450
+ end
1451
+
1452
+ froms = {}
1453
+ di.block.each_from_samefunc(self) { |fa| froms[fa] = done[fa] if done[fa] }
1454
+ if froms.values.uniq.length > 1
1455
+ obj = yield([:merge, addr, froms, froms.values.first])
1456
+ next if obj == false
1457
+ end
1458
+
1459
+ dilist = di.block.list
1460
+ end
1461
+
1462
+ if dilist.each { |_di|
1463
+ break if done.has_key?(_di.address) # looped back into addr_start
1464
+ done[_di.address] = obj
1465
+ obj = yield([:di, _di.address, _di, obj])
1466
+ break if obj == false # also return false for the previous 'if'
1467
+ }
1468
+
1469
+ from = dilist.last.address
1470
+
1471
+ if di.block.to_normal and di.block.to_normal[0] and
1472
+ di.block.to_subfuncret and di.block.to_subfuncret[0]
1473
+ # current instruction block calls into a subfunction
1474
+ obj = di.block.to_normal.map { |subf|
1475
+ yield([:subfunc, subf, from, obj])
1476
+ }.first # propagate 1st subfunc result
1477
+ next if obj == false
1478
+ end
1479
+
1480
+ wantclone = false
1481
+ di.block.each_to_samefunc(self) { |ta|
1482
+ if wantclone
1483
+ nobj = yield([:clone, ta, from, obj])
1484
+ next if obj == false
1485
+ todo << [ta, nobj]
1486
+ else
1487
+ todo << [ta, obj]
1488
+ wantclone = true
1489
+ end
1490
+ }
1491
+ end
1492
+ end
1493
+ end
1494
+
1294
1495
  # holds a backtrace result until a snapshot_addr is encountered
1295
1496
  class StoppedExpr
1296
1497
  attr_accessor :exprs
@@ -1320,6 +1521,7 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
1320
1521
  # :only_upto => backtrace only to update bt_for for current block & previous ending at only_upto
1321
1522
  # :no_check => don't use backtrace_check_found (will not backtrace indirection static values)
1322
1523
  # :terminals => array of symbols with constant value (stop backtracking if all symbols in the expr are terminals) (only supported with no_check)
1524
+ # :cpu_context => disassembler cpu_context
1323
1525
  def backtrace(expr, start_addr, nargs={})
1324
1526
  include_start = nargs.delete :include_start
1325
1527
  from_subfuncret = nargs.delete :from_subfuncret
@@ -1336,6 +1538,7 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
1336
1538
  only_upto = nargs.delete :only_upto
1337
1539
  no_check = nargs.delete :no_check
1338
1540
  terminals = nargs.delete(:terminals) || []
1541
+ cpu_context = nargs.delete :cpu_context
1339
1542
  raise ArgumentError, "invalid argument to backtrace #{nargs.keys.inspect}" if not nargs.empty?
1340
1543
 
1341
1544
  expr = Expression[expr]
@@ -1356,7 +1559,7 @@ puts " not backtracking stack address #{expr}" if debug_backtrace
1356
1559
  end
1357
1560
 
1358
1561
  if vals = (no_check ? (!need_backtrace(expr, terminals) and [expr]) : backtrace_check_found(expr,
1359
- di, origin, type, len, maxdepth, detached))
1562
+ di, origin, type, len, maxdepth, detached, cpu_context, snapshot_addr))
1360
1563
  # no need to update backtracked_for
1361
1564
  return vals
1362
1565
  elsif maxdepth <= 0
@@ -1365,7 +1568,7 @@ puts " not backtracking stack address #{expr}" if debug_backtrace
1365
1568
 
1366
1569
  # create initial backtracked_for
1367
1570
  if type and origin == start_addr and di
1368
- btt = BacktraceTrace.new(expr, origin, origexpr, type, len, maxdepth-1)
1571
+ btt = BacktraceTrace.new(expr, origin, origexpr, type, len, maxdepth-1, cpu_context)
1369
1572
  btt.address = di.address
1370
1573
  btt.exclude_instr = true if not include_start
1371
1574
  btt.from_subfuncret = true if from_subfuncret and include_start
@@ -1386,9 +1589,9 @@ puts "backtracking #{type} #{expr} from #{di || Expression[start_addr || 0]} for
1386
1589
  when :unknown_addr, :maxdepth
1387
1590
  puts " backtrace end #{ev} #{expr}" if debug_backtrace
1388
1591
  result |= [expr] if not snapshot_addr
1389
- @addrs_todo << [expr, (detached ? nil : origin)] if not snapshot_addr and type == :x and origin
1592
+ @addrs_todo << { :addr => expr, :from => (detached ? nil : origin), :cpu_context => cpu_context } if not snapshot_addr and type == :x and origin
1390
1593
  when :end
1391
- if not expr.kind_of? StoppedExpr
1594
+ if not expr.kind_of?(StoppedExpr)
1392
1595
  oldexpr = expr
1393
1596
  expr = backtrace_emu_blockup(h[:addr], expr)
1394
1597
  puts " backtrace up #{Expression[h[:addr]]} #{oldexpr}#{" => #{expr}" if expr != oldexpr}" if debug_backtrace
@@ -1396,7 +1599,7 @@ puts " backtrace up #{Expression[h[:addr]]} #{oldexpr}#{" => #{expr}" if expr
1396
1599
  if expr != oldexpr and not snapshot_addr and vals = (no_check ?
1397
1600
  (!need_backtrace(expr, terminals) and [expr]) :
1398
1601
  backtrace_check_found(expr, nil, origin, type, len,
1399
- maxdepth-h[:loopdetect].length, detached))
1602
+ maxdepth-h[:loopdetect].length, detached, cpu_context, snapshot_addr))
1400
1603
  result |= vals
1401
1604
  next
1402
1605
  end
@@ -1405,14 +1608,14 @@ puts " backtrace end #{ev} #{expr}" if debug_backtrace
1405
1608
  if not snapshot_addr
1406
1609
  result |= [expr]
1407
1610
 
1408
- btt = BacktraceTrace.new(expr, origin, origexpr, type, len, maxdepth-h[:loopdetect].length-1)
1611
+ btt = BacktraceTrace.new(expr, origin, origexpr, type, len, maxdepth-h[:loopdetect].length-1, cpu_context)
1409
1612
  btt.detached = true if detached
1410
1613
  @decoded[h[:addr]].block.backtracked_for |= [btt] if @decoded[h[:addr]]
1411
1614
  @function[h[:addr]].backtracked_for |= [btt] if @function[h[:addr]] and h[:addr] != :default
1412
- @addrs_todo << [expr, (detached ? nil : origin)] if type == :x and origin
1615
+ @addrs_todo << { :addr => expr, :from => (detached ? nil : origin), :cpu_context => cpu_context } if type == :x and origin
1413
1616
  end
1414
1617
  when :stopaddr
1415
- if not expr.kind_of? StoppedExpr
1618
+ if not expr.kind_of?(StoppedExpr)
1416
1619
  oldexpr = expr
1417
1620
  expr = backtrace_emu_blockup(h[:addr], expr)
1418
1621
  puts " backtrace up #{Expression[h[:addr]]} #{oldexpr}#{" => #{expr}" if expr != oldexpr}" if debug_backtrace
@@ -1421,15 +1624,16 @@ puts " backtrace up #{Expression[h[:addr]]} #{oldexpr}#{" => #{expr}" if expr
1421
1624
  puts " backtrace end #{ev} #{expr}" if debug_backtrace
1422
1625
  result |= ((expr.kind_of?(StoppedExpr)) ? expr.exprs : [expr])
1423
1626
  when :loop
1424
- next false if expr.kind_of? StoppedExpr
1627
+ next false if expr.kind_of?(StoppedExpr)
1425
1628
  t = h[:looptrace]
1426
1629
  oldexpr = t[0][0]
1427
1630
  next false if expr == oldexpr # unmodifying loop
1428
1631
  puts " bt loop at #{Expression[t[0][1]]}: #{oldexpr} => #{expr} (#{t.map { |z| Expression[z[1]] }.join(' <- ')})" if debug_backtrace
1632
+ bt_log << [:loop, expr, oldexpr, t.map { |z| z[1] }] if bt_log
1429
1633
  false
1430
1634
  when :up
1431
1635
  next false if only_upto and h[:to] != only_upto
1432
- next expr if expr.kind_of? StoppedExpr
1636
+ next expr if expr.kind_of?(StoppedExpr)
1433
1637
  oldexpr = expr
1434
1638
  expr = backtrace_emu_blockup(h[:from], expr)
1435
1639
  puts " backtrace up #{Expression[h[:from]]}->#{Expression[h[:to]]} #{oldexpr}#{" => #{expr}" if expr != oldexpr}" if debug_backtrace
@@ -1437,7 +1641,7 @@ puts " backtrace up #{Expression[h[:from]]}->#{Expression[h[:to]]} #{oldexpr}#
1437
1641
 
1438
1642
  if expr != oldexpr and vals = (no_check ? (!need_backtrace(expr, terminals) and [expr]) :
1439
1643
  backtrace_check_found(expr, @decoded[h[:from]], origin, type, len,
1440
- maxdepth-h[:loopdetect].length, detached))
1644
+ maxdepth-h[:loopdetect].length, detached, cpu_context, snapshot_addr))
1441
1645
  if snapshot_addr
1442
1646
  expr = StoppedExpr.new vals
1443
1647
  next expr
@@ -1459,7 +1663,7 @@ puts " backtrace up #{Expression[h[:from]]}->#{Expression[h[:to]]} #{oldexpr}#
1459
1663
  end
1460
1664
  }
1461
1665
 
1462
- btt = BacktraceTrace.new(expr, origin, origexpr, type, len, maxdepth-h[:loopdetect].length-1)
1666
+ btt = BacktraceTrace.new(expr, origin, origexpr, type, len, maxdepth-h[:loopdetect].length-1, cpu_context)
1463
1667
  btt.detached = true if detached
1464
1668
  if x = di_at(h[:from])
1465
1669
  update_btf[x.block.backtracked_for, btt]
@@ -1483,7 +1687,7 @@ puts " already backtraced" if debug_backtrace
1483
1687
  end
1484
1688
  expr
1485
1689
  when :di, :func
1486
- next if expr.kind_of? StoppedExpr
1690
+ next if expr.kind_of?(StoppedExpr)
1487
1691
  if not snapshot_addr and @cpu.backtrace_is_stack_address(expr)
1488
1692
  puts " not backtracking stack address #{expr}" if debug_backtrace
1489
1693
  next false
@@ -1498,7 +1702,7 @@ oldexpr = expr
1498
1702
  when :func
1499
1703
  expr = backtrace_emu_subfunc(h[:func], h[:funcaddr], h[:addr], expr, origin, maxdepth-h[:loopdetect].length)
1500
1704
  if snapshot_addr and snapshot_addr == h[:funcaddr]
1501
- # XXX recursiveness detection needs to be fixed
1705
+ # XXX recursiveness detection needs to be fixed
1502
1706
  puts " backtrace: recursive function #{Expression[h[:funcaddr]]}" if debug_backtrace
1503
1707
  next false
1504
1708
  end
@@ -1506,7 +1710,7 @@ puts " backtrace: recursive function #{Expression[h[:funcaddr]]}" if debug_back
1506
1710
  end
1507
1711
  puts " backtrace #{h[:di] || Expression[h[:funcaddr]]} #{oldexpr} => #{expr}" if debug_backtrace and expr != oldexpr
1508
1712
  if vals = (no_check ? (!need_backtrace(expr, terminals) and [expr]) : backtrace_check_found(expr,
1509
- h[:di], origin, type, len, maxdepth-h[:loopdetect].length, detached))
1713
+ h[:di], origin, type, len, maxdepth-h[:loopdetect].length, detached, cpu_context, snapshot_addr))
1510
1714
  if snapshot_addr
1511
1715
  expr = StoppedExpr.new vals
1512
1716
  else
@@ -1538,7 +1742,7 @@ puts ' backtrace result: ' + result.map { |r| Expression[r] }.join(', ') if deb
1538
1742
  not need_backtrace(retaddr)
1539
1743
  puts " backtrace addrs_todo << #{Expression[retaddr]} from #{di} (funcret)" if debug_backtrace
1540
1744
  di.block.add_to_subfuncret normalize(retaddr)
1541
- if @decoded[funcaddr].kind_of? DecodedInstruction
1745
+ if @decoded[funcaddr].kind_of?(DecodedInstruction)
1542
1746
  # check that all callers :saveip returns (eg recursive call that was resolved
1543
1747
  # before we found funcaddr was a function)
1544
1748
  @decoded[funcaddr].block.each_from_normal { |fm|
@@ -1556,17 +1760,17 @@ puts " backtrace addrs_todo << #{Expression[retaddr]} from #{di} (funcret)" if
1556
1760
  todo = []
1557
1761
  di.block.each_to_normal { |t| todo << normalize(t) }
1558
1762
  while a = todo.pop
1559
- next if faddrlist.include? a or not get_section_at(a)
1763
+ next if faddrlist.include?(a) or not get_section_at(a)
1560
1764
  faddrlist << a
1561
- if @decoded[a].kind_of? DecodedInstruction
1765
+ if @decoded[a].kind_of?(DecodedInstruction)
1562
1766
  @decoded[a].block.each_to_samefunc(self) { |t| todo << normalize(t) }
1563
1767
  end
1564
1768
  end
1565
1769
 
1566
- idx = @addrs_todo.index(@addrs_todo.find { |r, i, sfr| faddrlist.include? normalize(r) }) || -1
1567
- @addrs_todo.insert(idx, [retaddr, instraddr, true])
1770
+ idx = @addrs_todo.index(@addrs_todo.find { |aa| faddrlist.include? normalize(aa[:addr]) }) || -1
1771
+ @addrs_todo.insert(idx, { :addr => retaddr, :from => instraddr, :from_subfuncret => true, :cpu_context => btt.cpu_context })
1568
1772
  else
1569
- @addrs_todo << [retaddr, instraddr, true]
1773
+ @addrs_todo << { :addr => retaddr, :from => instraddr, :from_subfuncret => true, :cpu_context => btt.cpu_context }
1570
1774
  end
1571
1775
  true
1572
1776
  end
@@ -1588,10 +1792,14 @@ puts " backtrace addrs_todo << #{Expression[retaddr]} from #{di} (funcret)" if
1588
1792
  (ab = @address_binding[addr]) ? Expression[expr.bind(ab).reduce] : expr
1589
1793
  end
1590
1794
 
1795
+ def backtrace_update_function_binding(addr, func=@function[addr], retaddrs=func.return_address)
1796
+ @cpu.backtrace_update_function_binding(self, addr, func, retaddrs)
1797
+ end
1798
+
1591
1799
  # static resolution of indirections
1592
1800
  def resolve(expr)
1593
1801
  binding = Expression[expr].expr_indirections.inject(@old_prog_binding) { |binding_, ind|
1594
- e, b = get_section_at(resolve(ind.target))
1802
+ e = get_edata_at(resolve(ind.target))
1595
1803
  return expr if not e
1596
1804
  binding_.merge ind => Expression[ e.decode_imm("u#{8*ind.len}".to_sym, @cpu.endianness) ]
1597
1805
  }
@@ -1601,7 +1809,7 @@ puts " backtrace addrs_todo << #{Expression[retaddr]} from #{di} (funcret)" if
1601
1809
  # returns true if the expression needs more backtrace
1602
1810
  # it checks for the presence of a symbol (not :unknown), which means it depends on some register value
1603
1811
  def need_backtrace(expr, terminals=[])
1604
- return if expr.kind_of? ::Integer
1812
+ return if expr.kind_of?(::Integer)
1605
1813
  !(expr.externals.grep(::Symbol) - [:unknown] - terminals).empty?
1606
1814
  end
1607
1815
 
@@ -1619,7 +1827,7 @@ puts " backtrace addrs_todo << #{Expression[retaddr]} from #{di} (funcret)" if
1619
1827
  # TODO trace expr evolution through backtrace, to modify immediates to an expr involving label names
1620
1828
  # TODO mov [ptr], imm ; <...> ; jmp [ptr] => rename imm as loc_XX
1621
1829
  # eg. mov eax, 42 ; add eax, 4 ; jmp eax => mov eax, some_label-4
1622
- def backtrace_check_found(expr, di, origin, type, len, maxdepth, detached)
1830
+ def backtrace_check_found(expr, di, origin, type, len, maxdepth, detached, cpu_context, snapshot_addr=nil)
1623
1831
  # only entrypoints or block starts called by a :saveip are checked for being a function
1624
1832
  # want to execute [esp] from a block start
1625
1833
  if type == :x and di and di == di.block.list.first and @cpu.backtrace_is_function_return(expr, @decoded[origin]) and (
@@ -1649,16 +1857,19 @@ puts " backtrace addrs_todo << #{Expression[retaddr]} from #{di} (funcret)" if
1649
1857
  end
1650
1858
 
1651
1859
  return if need_backtrace(expr)
1860
+ if snapshot_addr
1861
+ return if expr.expr_externals(true).find { |ee| ee.kind_of?(Indirection) }
1862
+ end
1652
1863
 
1653
1864
  puts "backtrace #{type} found #{expr} from #{di} orig #{@decoded[origin] || Expression[origin] if origin}" if debug_backtrace
1654
1865
  result = backtrace_value(expr, maxdepth)
1655
1866
  # keep the ori pointer in the results to emulate volatile memory (eg decompiler prefers this)
1656
- result << expr if not type
1867
+ #result << expr if not type # XXX returning multiple values for nothing is too confusing, TODO fix decompiler
1657
1868
  result.uniq!
1658
1869
 
1659
1870
  # create xrefs/labels
1660
1871
  result.each { |e|
1661
- backtrace_found_result(e, di, type, origin, len, detached)
1872
+ backtrace_found_result(e, di, type, origin, len, detached, cpu_context)
1662
1873
  } if type and origin
1663
1874
 
1664
1875
  result
@@ -1695,7 +1906,7 @@ puts "backtrace #{type} found #{expr} from #{di} orig #{@decoded[origin] || Expr
1695
1906
  ret = []
1696
1907
 
1697
1908
  decode_imm = lambda { |addr, len|
1698
- edata, foo = get_section_at(addr)
1909
+ edata = get_edata_at(addr)
1699
1910
  if edata
1700
1911
  Expression[ edata.decode_imm("u#{8*len}".to_sym, @cpu.endianness) ]
1701
1912
  else
@@ -1787,7 +1998,7 @@ puts " backtrace_indirection for #{ind.target} failed: #{ev}" if debug_backtra
1787
1998
  end
1788
1999
 
1789
2000
  # creates xrefs, updates addrs_todo, updates instr args
1790
- def backtrace_found_result(expr, di, type, origin, len, detached)
2001
+ def backtrace_found_result(expr, di, type, origin, len, detached, cpu_context)
1791
2002
  n = normalize(expr)
1792
2003
  fallthrough = true if type == :x and o = di_at(origin) and not o.opcode.props[:stopexec] and n == o.block.list.last.next_addr # delay_slot
1793
2004
  add_xref(n, Xref.new(type, origin, len)) if origin != :default and origin != Expression::Unknown and not fallthrough
@@ -1803,7 +2014,7 @@ puts " backtrace_indirection for #{ind.target} failed: #{ev}" if debug_backtra
1803
2014
  # TODO trace expression evolution to allow handling of
1804
2015
  # mov eax, 28 ; add eax, 4 ; jmp eax
1805
2016
  # => mov eax, (loc_xx-4)
1806
- if di and not unk # and di.address == origin
2017
+ if di and not unk and expr != n # and di.address == origin
1807
2018
  @cpu.replace_instr_arg_immediate(di.instruction, expr, n)
1808
2019
  end
1809
2020
  if @decoded[origin] and not unk
@@ -1846,22 +2057,26 @@ puts " backtrace_indirection for #{ind.target} failed: #{ev}" if debug_backtra
1846
2057
  else
1847
2058
  @decoded[origin].block.add_to_normal(normalize(n)) if @decoded[origin] and not unk
1848
2059
  end
1849
- @addrs_todo << [n, origin]
2060
+ @addrs_todo << { :addr => n, :from => origin, :cpu_context => cpu_context }
1850
2061
  end
1851
2062
  end
1852
2063
 
2064
+ def inspect
2065
+ "<Metasm::Disassembler @%x>" % object_id
2066
+ end
2067
+
1853
2068
  def to_s
1854
2069
  a = ''
1855
2070
  dump { |l| a << l << "\n" }
1856
2071
  a
1857
2072
  end
1858
2073
 
1859
- # dumps the source, optionnally including data
2074
+ # dumps the source, optionally including data
1860
2075
  # yields (defaults puts) each line
1861
2076
  def dump(dump_data=true, &b)
1862
2077
  b ||= lambda { |l| puts l }
1863
2078
  @sections.sort_by { |addr, edata| addr.kind_of?(::Integer) ? addr : 0 }.each { |addr, edata|
1864
- addr = Expression[addr] if addr.kind_of? ::String
2079
+ addr = Expression[addr] if addr.kind_of?(::String)
1865
2080
  blockoffs = @decoded.values.grep(DecodedInstruction).map { |di| Expression[di.block.address, :-, addr].reduce if di.block_head? }.grep(::Integer).sort.reject { |o| o < 0 or o >= edata.length }
1866
2081
  b[@program.dump_section_header(addr, edata)]
1867
2082
  if not dump_data and edata.length > 16*1024 and blockoffs.empty?
@@ -1876,7 +2091,7 @@ puts " backtrace_indirection for #{ind.target} failed: #{ev}" if debug_backtra
1876
2091
  di = @decoded[addr+unk_off]
1877
2092
  if unk_off != di.block.edata_ptr
1878
2093
  b["\n// ------ overlap (#{unk_off-di.block.edata_ptr}) ------"]
1879
- elsif di.block.from_normal.kind_of? ::Array
2094
+ elsif di.block.from_normal.kind_of?(::Array)
1880
2095
  b["\n"]
1881
2096
  end
1882
2097
  dump_block(di.block, &b)
@@ -1916,12 +2131,12 @@ puts " backtrace_indirection for #{ind.target} failed: #{ev}" if debug_backtra
1916
2131
  if not xr.empty?
1917
2132
  b["\n// Xrefs: #{xr[0, 8].join(' ')}#{' ...' if xr.length > 8}"]
1918
2133
  end
1919
- if block.edata.inv_export[block.edata_ptr]
2134
+ if block.edata.inv_export[block.edata_ptr] and label_alias[block.address]
1920
2135
  b["\n"] if xr.empty?
1921
2136
  label_alias[block.address].each { |name| b["#{name}:"] }
1922
2137
  end
1923
2138
  if c = @comment[block.address]
1924
- c = c.join("\n") if c.kind_of? ::Array
2139
+ c = c.join("\n") if c.kind_of?(::Array)
1925
2140
  c.each_line { |l| b["// #{l}"] }
1926
2141
  end
1927
2142
  end
@@ -1933,8 +2148,8 @@ puts " backtrace_indirection for #{ind.target} failed: #{ev}" if debug_backtra
1933
2148
  # TODO array-style data access
1934
2149
  def dump_data(addr, edata, off, &b)
1935
2150
  b ||= lambda { |l| puts l }
1936
- if l = edata.inv_export[off]
1937
- l_list = label_alias[addr].to_a.sort
2151
+ if l = edata.inv_export[off] and label_alias[addr]
2152
+ l_list = label_alias[addr].sort
1938
2153
  l = l_list.pop || l
1939
2154
  l_list.each { |ll|
1940
2155
  b["#{ll}:"]
@@ -1966,11 +2181,11 @@ puts " backtrace_indirection for #{ind.target} failed: #{ev}" if debug_backtra
1966
2181
  dups = edata.virtsize - off
1967
2182
  @prog_binding.each_value { |a|
1968
2183
  tmp = Expression[a, :-, addr].reduce
1969
- dups = tmp if tmp.kind_of? ::Integer and tmp > 0 and tmp < dups
2184
+ dups = tmp if tmp.kind_of?(::Integer) and tmp > 0 and tmp < dups
1970
2185
  }
1971
2186
  @xrefs.each_key { |a|
1972
2187
  tmp = Expression[a, :-, addr].reduce
1973
- dups = tmp if tmp.kind_of? ::Integer and tmp > 0 and tmp < dups
2188
+ dups = tmp if tmp.kind_of?(::Integer) and tmp > 0 and tmp < dups
1974
2189
  }
1975
2190
  dups /= elemlen
1976
2191
  dups = 1 if dups < 1
@@ -2016,7 +2231,7 @@ puts " backtrace_indirection for #{ind.target} failed: #{ev}" if debug_backtra
2016
2231
  if (elemlen == 1 or elemlen == 2)
2017
2232
  case value
2018
2233
  when 0x20..0x7e, 0x0a, 0x0d
2019
- if vals_.last.kind_of? ::String; vals_.last << value ; vals_
2234
+ if vals_.last.kind_of?(::String); vals_.last << value ; vals_
2020
2235
  else vals_ << value.chr
2021
2236
  end
2022
2237
  else vals_ << value
@@ -2026,7 +2241,7 @@ puts " backtrace_indirection for #{ind.target} failed: #{ev}" if debug_backtra
2026
2241
  }
2027
2242
 
2028
2243
  vals.map! { |value|
2029
- if value.kind_of? ::String
2244
+ if value.kind_of?(::String)
2030
2245
  if value.length > 2 # or value == vals.first or value == vals.last # if there is no xref, don't care
2031
2246
  value.inspect
2032
2247
  else