metasm 1.0.1 → 1.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (235) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +1 -0
  3. data/.hgtags +3 -0
  4. data/Gemfile +1 -0
  5. data/INSTALL +61 -0
  6. data/LICENCE +458 -0
  7. data/README +29 -21
  8. data/Rakefile +10 -0
  9. data/TODO +10 -12
  10. data/doc/code_organisation.txt +2 -0
  11. data/doc/core/DynLdr.txt +247 -0
  12. data/doc/core/ExeFormat.txt +43 -0
  13. data/doc/core/Expression.txt +220 -0
  14. data/doc/core/GNUExports.txt +27 -0
  15. data/doc/core/Ia32.txt +236 -0
  16. data/doc/core/SerialStruct.txt +108 -0
  17. data/doc/core/VirtualString.txt +145 -0
  18. data/doc/core/WindowsExports.txt +61 -0
  19. data/doc/core/index.txt +1 -0
  20. data/doc/style.css +6 -3
  21. data/doc/usage/debugger.txt +327 -0
  22. data/doc/usage/index.txt +1 -0
  23. data/doc/use_cases.txt +2 -2
  24. data/metasm.gemspec +22 -0
  25. data/{lib/metasm.rb → metasm.rb} +11 -3
  26. data/{lib/metasm → metasm}/compile_c.rb +13 -7
  27. data/metasm/cpu/arc.rb +8 -0
  28. data/metasm/cpu/arc/decode.rb +425 -0
  29. data/metasm/cpu/arc/main.rb +191 -0
  30. data/metasm/cpu/arc/opcodes.rb +588 -0
  31. data/{lib/metasm → metasm/cpu}/arm.rb +7 -5
  32. data/{lib/metasm → metasm/cpu}/arm/debug.rb +2 -2
  33. data/{lib/metasm → metasm/cpu}/arm/decode.rb +13 -12
  34. data/{lib/metasm → metasm/cpu}/arm/encode.rb +23 -8
  35. data/{lib/metasm → metasm/cpu}/arm/main.rb +0 -3
  36. data/metasm/cpu/arm/opcodes.rb +324 -0
  37. data/{lib/metasm → metasm/cpu}/arm/parse.rb +25 -13
  38. data/{lib/metasm → metasm/cpu}/arm/render.rb +2 -2
  39. data/metasm/cpu/arm64.rb +15 -0
  40. data/metasm/cpu/arm64/debug.rb +38 -0
  41. data/metasm/cpu/arm64/decode.rb +289 -0
  42. data/metasm/cpu/arm64/encode.rb +41 -0
  43. data/metasm/cpu/arm64/main.rb +105 -0
  44. data/metasm/cpu/arm64/opcodes.rb +232 -0
  45. data/metasm/cpu/arm64/parse.rb +20 -0
  46. data/metasm/cpu/arm64/render.rb +95 -0
  47. data/{lib/metasm/ppc.rb → metasm/cpu/bpf.rb} +2 -4
  48. data/metasm/cpu/bpf/decode.rb +142 -0
  49. data/metasm/cpu/bpf/main.rb +60 -0
  50. data/metasm/cpu/bpf/opcodes.rb +81 -0
  51. data/metasm/cpu/bpf/render.rb +41 -0
  52. data/metasm/cpu/cy16.rb +9 -0
  53. data/metasm/cpu/cy16/decode.rb +253 -0
  54. data/metasm/cpu/cy16/main.rb +63 -0
  55. data/metasm/cpu/cy16/opcodes.rb +78 -0
  56. data/metasm/cpu/cy16/render.rb +41 -0
  57. data/metasm/cpu/dalvik.rb +11 -0
  58. data/{lib/metasm → metasm/cpu}/dalvik/decode.rb +35 -13
  59. data/{lib/metasm → metasm/cpu}/dalvik/main.rb +51 -2
  60. data/{lib/metasm → metasm/cpu}/dalvik/opcodes.rb +19 -11
  61. data/metasm/cpu/ia32.rb +17 -0
  62. data/{lib/metasm → metasm/cpu}/ia32/compile_c.rb +5 -7
  63. data/{lib/metasm → metasm/cpu}/ia32/debug.rb +5 -5
  64. data/{lib/metasm → metasm/cpu}/ia32/decode.rb +246 -59
  65. data/{lib/metasm → metasm/cpu}/ia32/decompile.rb +7 -7
  66. data/{lib/metasm → metasm/cpu}/ia32/encode.rb +19 -13
  67. data/{lib/metasm → metasm/cpu}/ia32/main.rb +51 -8
  68. data/metasm/cpu/ia32/opcodes.rb +1424 -0
  69. data/{lib/metasm → metasm/cpu}/ia32/parse.rb +47 -16
  70. data/{lib/metasm → metasm/cpu}/ia32/render.rb +31 -4
  71. data/metasm/cpu/mips.rb +14 -0
  72. data/{lib/metasm → metasm/cpu}/mips/compile_c.rb +1 -1
  73. data/metasm/cpu/mips/debug.rb +42 -0
  74. data/{lib/metasm → metasm/cpu}/mips/decode.rb +46 -16
  75. data/{lib/metasm → metasm/cpu}/mips/encode.rb +4 -3
  76. data/{lib/metasm → metasm/cpu}/mips/main.rb +11 -4
  77. data/{lib/metasm → metasm/cpu}/mips/opcodes.rb +86 -17
  78. data/{lib/metasm → metasm/cpu}/mips/parse.rb +1 -1
  79. data/{lib/metasm → metasm/cpu}/mips/render.rb +1 -1
  80. data/{lib/metasm/dalvik.rb → metasm/cpu/msp430.rb} +1 -1
  81. data/metasm/cpu/msp430/decode.rb +247 -0
  82. data/metasm/cpu/msp430/main.rb +62 -0
  83. data/metasm/cpu/msp430/opcodes.rb +101 -0
  84. data/{lib/metasm → metasm/cpu}/pic16c/decode.rb +6 -7
  85. data/{lib/metasm → metasm/cpu}/pic16c/main.rb +0 -0
  86. data/{lib/metasm → metasm/cpu}/pic16c/opcodes.rb +1 -1
  87. data/{lib/metasm/mips.rb → metasm/cpu/ppc.rb} +4 -4
  88. data/{lib/metasm → metasm/cpu}/ppc/decode.rb +18 -12
  89. data/{lib/metasm → metasm/cpu}/ppc/decompile.rb +3 -3
  90. data/{lib/metasm → metasm/cpu}/ppc/encode.rb +2 -2
  91. data/{lib/metasm → metasm/cpu}/ppc/main.rb +17 -12
  92. data/{lib/metasm → metasm/cpu}/ppc/opcodes.rb +11 -5
  93. data/metasm/cpu/ppc/parse.rb +55 -0
  94. data/metasm/cpu/python.rb +8 -0
  95. data/metasm/cpu/python/decode.rb +136 -0
  96. data/metasm/cpu/python/main.rb +36 -0
  97. data/metasm/cpu/python/opcodes.rb +180 -0
  98. data/{lib/metasm → metasm/cpu}/sh4.rb +1 -1
  99. data/{lib/metasm → metasm/cpu}/sh4/decode.rb +48 -17
  100. data/{lib/metasm → metasm/cpu}/sh4/main.rb +13 -4
  101. data/{lib/metasm → metasm/cpu}/sh4/opcodes.rb +7 -8
  102. data/metasm/cpu/x86_64.rb +15 -0
  103. data/{lib/metasm → metasm/cpu}/x86_64/compile_c.rb +28 -17
  104. data/{lib/metasm → metasm/cpu}/x86_64/debug.rb +4 -4
  105. data/{lib/metasm → metasm/cpu}/x86_64/decode.rb +57 -15
  106. data/{lib/metasm → metasm/cpu}/x86_64/encode.rb +55 -26
  107. data/{lib/metasm → metasm/cpu}/x86_64/main.rb +14 -6
  108. data/metasm/cpu/x86_64/opcodes.rb +136 -0
  109. data/{lib/metasm → metasm/cpu}/x86_64/parse.rb +10 -2
  110. data/metasm/cpu/x86_64/render.rb +35 -0
  111. data/metasm/cpu/z80.rb +9 -0
  112. data/metasm/cpu/z80/decode.rb +313 -0
  113. data/metasm/cpu/z80/main.rb +67 -0
  114. data/metasm/cpu/z80/opcodes.rb +224 -0
  115. data/metasm/cpu/z80/render.rb +59 -0
  116. data/{lib/metasm/os/main.rb → metasm/debug.rb} +160 -401
  117. data/{lib/metasm → metasm}/decode.rb +35 -4
  118. data/{lib/metasm → metasm}/decompile.rb +15 -16
  119. data/{lib/metasm → metasm}/disassemble.rb +201 -45
  120. data/{lib/metasm → metasm}/disassemble_api.rb +651 -87
  121. data/{lib/metasm → metasm}/dynldr.rb +220 -133
  122. data/{lib/metasm → metasm}/encode.rb +10 -1
  123. data/{lib/metasm → metasm}/exe_format/a_out.rb +9 -6
  124. data/{lib/metasm → metasm}/exe_format/autoexe.rb +1 -0
  125. data/{lib/metasm → metasm}/exe_format/bflt.rb +57 -27
  126. data/{lib/metasm → metasm}/exe_format/coff.rb +11 -3
  127. data/{lib/metasm → metasm}/exe_format/coff_decode.rb +53 -20
  128. data/{lib/metasm → metasm}/exe_format/coff_encode.rb +11 -13
  129. data/{lib/metasm → metasm}/exe_format/dex.rb +13 -5
  130. data/{lib/metasm → metasm}/exe_format/dol.rb +1 -0
  131. data/{lib/metasm → metasm}/exe_format/elf.rb +93 -57
  132. data/{lib/metasm → metasm}/exe_format/elf_decode.rb +143 -34
  133. data/{lib/metasm → metasm}/exe_format/elf_encode.rb +122 -31
  134. data/metasm/exe_format/gb.rb +65 -0
  135. data/metasm/exe_format/javaclass.rb +424 -0
  136. data/{lib/metasm → metasm}/exe_format/macho.rb +204 -16
  137. data/{lib/metasm → metasm}/exe_format/main.rb +26 -3
  138. data/{lib/metasm → metasm}/exe_format/mz.rb +1 -0
  139. data/{lib/metasm → metasm}/exe_format/nds.rb +7 -4
  140. data/{lib/metasm → metasm}/exe_format/pe.rb +71 -8
  141. data/metasm/exe_format/pyc.rb +167 -0
  142. data/{lib/metasm → metasm}/exe_format/serialstruct.rb +67 -14
  143. data/{lib/metasm → metasm}/exe_format/shellcode.rb +7 -3
  144. data/metasm/exe_format/shellcode_rwx.rb +114 -0
  145. data/metasm/exe_format/swf.rb +205 -0
  146. data/{lib/metasm → metasm}/exe_format/xcoff.rb +7 -7
  147. data/metasm/exe_format/zip.rb +335 -0
  148. data/metasm/gui.rb +13 -0
  149. data/{lib/metasm → metasm}/gui/cstruct.rb +35 -41
  150. data/{lib/metasm → metasm}/gui/dasm_coverage.rb +11 -11
  151. data/{lib/metasm → metasm}/gui/dasm_decomp.rb +7 -20
  152. data/{lib/metasm → metasm}/gui/dasm_funcgraph.rb +0 -0
  153. data/metasm/gui/dasm_graph.rb +1695 -0
  154. data/{lib/metasm → metasm}/gui/dasm_hex.rb +12 -8
  155. data/{lib/metasm → metasm}/gui/dasm_listing.rb +43 -28
  156. data/{lib/metasm → metasm}/gui/dasm_main.rb +310 -53
  157. data/{lib/metasm → metasm}/gui/dasm_opcodes.rb +5 -19
  158. data/{lib/metasm → metasm}/gui/debug.rb +93 -27
  159. data/{lib/metasm → metasm}/gui/gtk.rb +162 -40
  160. data/{lib/metasm → metasm}/gui/qt.rb +12 -2
  161. data/{lib/metasm → metasm}/gui/win32.rb +179 -42
  162. data/{lib/metasm → metasm}/gui/x11.rb +59 -59
  163. data/{lib/metasm → metasm}/main.rb +389 -264
  164. data/{lib/metasm/os/remote.rb → metasm/os/gdbremote.rb} +146 -54
  165. data/{lib/metasm → metasm}/os/gnu_exports.rb +1 -1
  166. data/{lib/metasm → metasm}/os/linux.rb +628 -151
  167. data/metasm/os/main.rb +330 -0
  168. data/{lib/metasm → metasm}/os/windows.rb +132 -42
  169. data/{lib/metasm → metasm}/os/windows_exports.rb +141 -0
  170. data/{lib/metasm → metasm}/parse.rb +26 -24
  171. data/{lib/metasm → metasm}/parse_c.rb +221 -116
  172. data/{lib/metasm → metasm}/preprocessor.rb +55 -40
  173. data/{lib/metasm → metasm}/render.rb +14 -38
  174. data/misc/hexdump.rb +2 -1
  175. data/misc/lint.rb +58 -0
  176. data/misc/txt2html.rb +9 -7
  177. data/samples/bindiff.rb +3 -4
  178. data/samples/dasm-plugins/bindiff.rb +15 -0
  179. data/samples/dasm-plugins/bookmark.rb +133 -0
  180. data/samples/dasm-plugins/c_constants.rb +57 -0
  181. data/samples/dasm-plugins/colortheme_solarized.rb +125 -0
  182. data/samples/dasm-plugins/cppobj_funcall.rb +60 -0
  183. data/samples/dasm-plugins/dasm_all.rb +70 -0
  184. data/samples/dasm-plugins/demangle_cpp.rb +31 -0
  185. data/samples/dasm-plugins/deobfuscate.rb +251 -0
  186. data/samples/dasm-plugins/dump_text.rb +35 -0
  187. data/samples/dasm-plugins/export_graph_svg.rb +86 -0
  188. data/samples/dasm-plugins/findgadget.rb +75 -0
  189. data/samples/dasm-plugins/hl_opcode.rb +32 -0
  190. data/samples/dasm-plugins/hotfix_gtk_dbg.rb +19 -0
  191. data/samples/dasm-plugins/imm2off.rb +34 -0
  192. data/samples/dasm-plugins/match_libsigs.rb +93 -0
  193. data/samples/dasm-plugins/patch_file.rb +95 -0
  194. data/samples/dasm-plugins/scanfuncstart.rb +36 -0
  195. data/samples/dasm-plugins/scanxrefs.rb +26 -0
  196. data/samples/dasm-plugins/selfmodify.rb +197 -0
  197. data/samples/dasm-plugins/stringsxrefs.rb +28 -0
  198. data/samples/dasmnavig.rb +1 -1
  199. data/samples/dbg-apihook.rb +24 -9
  200. data/samples/dbg-plugins/heapscan.rb +283 -0
  201. data/samples/dbg-plugins/heapscan/compiled_heapscan_lin.c +155 -0
  202. data/samples/dbg-plugins/heapscan/compiled_heapscan_win.c +128 -0
  203. data/samples/dbg-plugins/heapscan/graphheap.rb +616 -0
  204. data/samples/dbg-plugins/heapscan/heapscan.rb +709 -0
  205. data/samples/dbg-plugins/heapscan/winheap.h +174 -0
  206. data/samples/dbg-plugins/heapscan/winheap7.h +307 -0
  207. data/samples/dbg-plugins/trace_func.rb +214 -0
  208. data/samples/disassemble-gui.rb +35 -5
  209. data/samples/disassemble.rb +31 -6
  210. data/samples/dump_upx.rb +24 -12
  211. data/samples/dynamic_ruby.rb +12 -3
  212. data/samples/exeencode.rb +6 -5
  213. data/samples/factorize-headers-peimports.rb +1 -1
  214. data/samples/lindebug.rb +175 -381
  215. data/samples/metasm-shell.rb +1 -2
  216. data/samples/peldr.rb +2 -2
  217. data/tests/all.rb +1 -1
  218. data/tests/arc.rb +26 -0
  219. data/tests/dynldr.rb +22 -4
  220. data/tests/expression.rb +55 -0
  221. data/tests/graph_layout.rb +285 -0
  222. data/tests/ia32.rb +79 -26
  223. data/tests/mips.rb +9 -2
  224. data/tests/x86_64.rb +66 -18
  225. metadata +330 -218
  226. data/lib/metasm/arm/opcodes.rb +0 -177
  227. data/lib/metasm/gui.rb +0 -23
  228. data/lib/metasm/gui/dasm_graph.rb +0 -1354
  229. data/lib/metasm/ia32.rb +0 -14
  230. data/lib/metasm/ia32/opcodes.rb +0 -873
  231. data/lib/metasm/ppc/parse.rb +0 -52
  232. data/lib/metasm/x86_64.rb +0 -12
  233. data/lib/metasm/x86_64/opcodes.rb +0 -118
  234. data/samples/gdbclient.rb +0 -583
  235. data/samples/rubstop.rb +0 -399
@@ -134,9 +134,10 @@ class EncodedData
134
134
  # bytes from rawsize to virtsize are returned as zeroes
135
135
  # ignores self.relocations
136
136
  def read(len=@virtsize-@ptr)
137
- len = @virtsize-@ptr if len > @virtsize-@ptr
138
- str = (@ptr < @data.length) ? @data[@ptr, len] : ''
139
- str = str.to_str.ljust(len, "\0") if str.length < len
137
+ vlen = len
138
+ vlen = @virtsize-@ptr if len > @virtsize-@ptr
139
+ str = (@ptr < @data.length) ? @data[@ptr, vlen] : ''
140
+ str = str.to_str.ljust(vlen, "\0") if str.length < vlen
140
141
  @ptr += len
141
142
  str
142
143
  end
@@ -182,7 +183,7 @@ class CPU
182
183
  # returns a DecodedInstruction or nil
183
184
  def decode_instruction(edata, addr)
184
185
  @bin_lookaside ||= build_bin_lookaside
185
- di = decode_findopcode edata
186
+ di = decode_findopcode edata if edata.ptr <= edata.length
186
187
  di.address = addr if di
187
188
  di = decode_instr_op(edata, di) if di
188
189
  decode_instr_interpret(di, addr) if di
@@ -209,5 +210,35 @@ class CPU
209
210
  def delay_slot(di=nil)
210
211
  0
211
212
  end
213
+
214
+ def disassembler_default_func
215
+ DecodedFunction.new
216
+ end
217
+
218
+ # return something like backtrace_binding in the forward direction
219
+ # set pc_reg to some reg name (eg :pc) to include effects on the instruction pointer
220
+ def get_fwdemu_binding(di, pc_reg=nil)
221
+ fdi = di.backtrace_binding ||= get_backtrace_binding(di)
222
+ fdi = fix_fwdemu_binding(di, fdi)
223
+ if pc_reg
224
+ if di.opcode.props[:setip]
225
+ xr = get_xrefs_x(nil, di)
226
+ if xr and xr.length == 1
227
+ fdi[pc_reg] = xr[0]
228
+ else
229
+ fdi[:incomplete_binding] = Expression[1]
230
+ end
231
+ else
232
+ fdi[pc_reg] = Expression[pc_reg, :+, di.bin_length]
233
+ end
234
+ end
235
+ fdi
236
+ end
237
+
238
+ # patch a forward binding from the backtrace binding
239
+ # useful only on specific instructions that update a register *and* dereference that register (eg push)
240
+ def fix_fwdemu_binding(di, fbd)
241
+ fbd
242
+ end
212
243
  end
213
244
  end
@@ -69,7 +69,7 @@ class Decompiler
69
69
  @c_parser.toplevel.symbol.delete func.name
70
70
  decompile_func(entry)
71
71
  @recurse = pre_recurse
72
- if not dcl = @c_parser.toplevel.statements.grep(C::Declaration).find { |decl| decl.var.name == func.name }
72
+ if not @c_parser.toplevel.statements.grep(C::Declaration).find { |decl| decl.var.name == func.name }
73
73
  @c_parser.toplevel.statements << C::Declaration.new(func)
74
74
  end
75
75
  end
@@ -208,7 +208,7 @@ class Decompiler
208
208
  @c_parser.toplevel.statements.delete_if { |ts| ts.kind_of? C::Declaration and ts.var.name == name }
209
209
  aoff = 1
210
210
  ptype.args.to_a.each { |a|
211
- aoff = (aoff + @c_parser.typesize[:ptr] - 1) / @c_parser.typesize[:ptr] * @c_parser.typesize[:ptr]
211
+ aoff = (aoff + @c_parser.typesize[:ptr] - 1) / @c_parser.typesize[:ptr] * @c_parser.typesize[:ptr]
212
212
  f.decompdata[:stackoff_type][aoff] ||= a.type
213
213
  f.decompdata[:stackoff_name][aoff] ||= a.name if a.name
214
214
  aoff += sizeof(a) # ary ?
@@ -293,7 +293,7 @@ class Decompiler
293
293
  @dasm.function[ta] = DecodedFunction.new
294
294
  puts "autofunc #{Expression[ta]}" if $VERBOSE
295
295
  end
296
-
296
+
297
297
  if @dasm.function[ta] and type != :subfuncret
298
298
  f = dasm.auto_label_at(ta, 'func')
299
299
  ta = dasm.normalize($1) if f =~ /^thunk_(.*)/
@@ -350,7 +350,7 @@ class Decompiler
350
350
  :include_start => i_s, :no_check => true, :terminals => [:frameptr])
351
351
  if vals.length == 1 and ee = vals.first and (ee.kind_of? Expression and (ee == Expression[:frameptr] or
352
352
  (ee.lexpr == :frameptr and ee.op == :+ and ee.rexpr.kind_of? ::Integer)))
353
- ee
353
+ ee
354
354
  else e
355
355
  end
356
356
  end
@@ -602,12 +602,12 @@ class Decompiler
602
602
  when C::If
603
603
  patch_test[ce.test]
604
604
  if ce.bthen.kind_of? C::Block
605
- case ce.bthen.statements.length
605
+ case ce.bthen.statements.length
606
606
  when 1
607
607
  walk(ce.bthen.statements) { |sst| sst.outer = ce.bthen.outer if sst.kind_of? C::Block and sst.outer == ce.bthen }
608
608
  ce.bthen = ce.bthen.statements.first
609
609
  when 0
610
- if not ce.belse and i = ce.bthen.outer.statements.index(ce)
610
+ if not ce.belse and i = ce.bthen.outer.statements.index(ce)
611
611
  ce.bthen.outer.statements[i] = ce.test # TODO remove sideeffectless parts
612
612
  end
613
613
  end
@@ -1521,7 +1521,7 @@ class Decompiler
1521
1521
  tabidx = off / sizeof(st)
1522
1522
  off -= tabidx * sizeof(st)
1523
1523
  ptr = C::CExpression[:&, [ptr, :'[]', [tabidx]]] if tabidx != 0 or ptr.type.untypedef.kind_of? C::Array
1524
- return ptr if off == 0 and (not msz or # avoid infinite recursion with eg chained list
1524
+ return ptr if off == 0 and (not msz or # avoid infinite recursion with eg chained list
1525
1525
  (ptr.kind_of? C::CExpression and ((ptr.op == :& and not ptr.lexpr and s=ptr.rexpr) or (ptr.op == :'.' and s=ptr)) and
1526
1526
  not s.type.untypedef.kind_of? C::Union))
1527
1527
 
@@ -1656,13 +1656,12 @@ class Decompiler
1656
1656
  ce.rexpr = p if ce.rexpr == v1
1657
1657
  }
1658
1658
  }
1659
-
1660
1659
  }
1661
1660
  end
1662
1661
 
1663
1662
  # to be run with scope = function body with only CExpr/Decl/Label/Goto/IfGoto/Return, with correct variables types
1664
1663
  # will transform += 1 to ++, inline them to prev/next statement ('++x; if (x)..' => 'if (++x)..')
1665
- # remove useless variables ('int i;', i never used or 'i = 1; j = i;', i never read after => 'j = 1;')
1664
+ # remove useless variables ('int i;', i never used or 'i = 1; j = i;', i never read after => 'j = 1;')
1666
1665
  # remove useless casts ('(int)i' with 'int i;' => 'i')
1667
1666
  def optimize(scope)
1668
1667
  optimize_code(scope)
@@ -1681,7 +1680,7 @@ class Decompiler
1681
1680
  t2 = t2.pointed.untypedef if t2.pointer? and t2.pointed.untypedef.kind_of? C::Function
1682
1681
  t1 == t2 or
1683
1682
  (t1.kind_of? C::Function and t2.kind_of? C::Function and sametype[t1.type, t2.type] and t1.args.to_a.length == t2.args.to_a.length and
1684
- t1.args.to_a.zip(t2.args.to_a).all? { |st1, st2| sametype[st1.type, st2.type] }) or
1683
+ t1.args.to_a.zip(t2.args.to_a).all? { |st1, st2| sametype[st1.type, st2.type] }) or
1685
1684
  (t1.kind_of? C::BaseType and t1.integral? and t2.kind_of? C::BaseType and t2.integral? and sizeof(nil, t1) == sizeof(nil, t2)) or
1686
1685
  (t1.pointer? and t2.pointer? and sametype[t1.type, t2.type])
1687
1686
  }
@@ -1871,7 +1870,7 @@ class Decompiler
1871
1870
  when ::Array; exp.any? { |_e| sideeffect _e, scope }
1872
1871
  when C::Variable; (scope and not scope.symbol[exp.name]) or exp.type.qualifier.to_a.include? :volatile
1873
1872
  when C::CExpression; (exp.op == :* and not exp.lexpr) or exp.op == :funcall or AssignOp.include?(exp.op) or
1874
- sideeffect(exp.lexpr, scope) or sideeffect(exp.rexpr, scope)
1873
+ sideeffect(exp.lexpr, scope) or sideeffect(exp.rexpr, scope)
1875
1874
  else true # failsafe
1876
1875
  end
1877
1876
  end
@@ -2009,7 +2008,7 @@ class Decompiler
2009
2008
  }.compact
2010
2009
 
2011
2010
  tw = to - [:write]
2012
- if to.include? :split or tw.length > 1
2011
+ if to.include? :split or tw.length > 1
2013
2012
  :split
2014
2013
  elsif tw.length == 1
2015
2014
  tw.first
@@ -2089,7 +2088,7 @@ class Decompiler
2089
2088
  if (e.op == :'++' or e.op == :'--') and v = (e.lexpr || e.rexpr) and v.kind_of? C::Variable and
2090
2089
  scope.symbol[v.name] and not v.type.qualifier.to_a.include? :volatile
2091
2090
  next if !((pos = :post.to_sym) and (oe = find_next_read_bl[label, i, v]) and oe.kind_of? C::CExpression) and
2092
- !((pos = :prev.to_sym) and (oe = find_prev_read[label, i-2, v]) and oe.kind_of? C::CExpression)
2091
+ !((pos = :prev.to_sym) and (oe = find_prev_read[label, i-2, v]) and oe.kind_of? C::CExpression)
2093
2092
  next if oe.op == :& and not oe.lexpr # no &(++eax)
2094
2093
 
2095
2094
  # merge pre/postincrement into next/prev var usage
@@ -2221,7 +2220,7 @@ class Decompiler
2221
2220
  }
2222
2221
  case cnt
2223
2222
  when 0
2224
- break if bad
2223
+ break if bad
2225
2224
  next
2226
2225
  when 1 # good
2227
2226
  break if e.complexity > 10 and ce_.complexity > 3 # try to keep the C readable
@@ -2443,7 +2442,7 @@ class Decompiler
2443
2442
  end
2444
2443
  # compare type.type cause var is an Array and the cast is a Pointer
2445
2444
  countderef[r.rexpr.name] += 1 if r.kind_of? C::CExpression and not r.op and r.rexpr.kind_of? C::Variable and
2446
- sizeof(nil, r.type.type) == sizeof(nil, r.rexpr.type.type) rescue nil
2445
+ sizeof(nil, r.type.type) == sizeof(nil, r.rexpr.type.type) rescue nil
2447
2446
  }
2448
2447
  vars.each { |n|
2449
2448
  if countref[n] == countderef[n]
@@ -2453,7 +2452,7 @@ class Decompiler
2453
2452
  v.initializer = v.initializer.first if v.initializer.kind_of? ::Array
2454
2453
  walk_ce(tl) { |ce|
2455
2454
  if ce.op == :'->' and C::CExpression[ce.lexpr] == C::CExpression[v]
2456
- ce.op = :'.'
2455
+ ce.op = :'.'
2457
2456
  elsif ce.lexpr == target
2458
2457
  ce.lexpr = v
2459
2458
  end
@@ -24,6 +24,8 @@ class DecodedInstruction
24
24
  attr_accessor :comment
25
25
  # a cache of the binding used by the backtracker to emulate this instruction
26
26
  attr_accessor :backtrace_binding
27
+ # used during fixed-size instruction decoding to hold the decoded raw opcode
28
+ attr_accessor :raw_data
27
29
 
28
30
  # create a new DecodedInstruction with an Instruction whose cpu is the argument
29
31
  # can take an existing Instruction as argument
@@ -233,6 +235,11 @@ class DecodedFunction
233
235
  attr_accessor :finalized
234
236
  # bool, if true the function does not return (eg exit() or ExitProcess())
235
237
  attr_accessor :noreturn
238
+ # hash stackoff => varname
239
+ # varname is a single String object shared by all ExpressionStrings (to allow renames)
240
+ attr_accessor :localvars
241
+ # hash stack offset => di address
242
+ attr_accessor :localvars_xrefs
236
243
 
237
244
  # if btbind_callback is defined, calls it with args [dasm, binding, funcaddr, calladdr, expr, origin, maxdepth]
238
245
  # else update lazily the binding from expr.externals, and return backtrace_binding
@@ -264,6 +271,16 @@ class DecodedFunction
264
271
  @backtracked_for = []
265
272
  @backtrace_binding = {}
266
273
  end
274
+
275
+ def get_localvar_stackoff(off, di=nil, str=nil)
276
+ if di
277
+ @localvars_xrefs ||= {}
278
+ @localvars_xrefs[off] ||= []
279
+ @localvars_xrefs[off] |= [di.address]
280
+ end
281
+ @localvars ||= {}
282
+ @localvars[off] ||= (str || (off > 0 ? 'arg_%X' % off : 'var_%X' % -off))
283
+ end
267
284
  end
268
285
 
269
286
  class CPU
@@ -438,7 +455,9 @@ class Disassembler
438
455
  when ::Integer
439
456
  when ::String
440
457
  raise "invalid section base #{base.inspect} - not at section start" if encoded.export[base] and encoded.export[base] != 0
441
- raise "invalid section base #{base.inspect} - already seen at #{@prog_binding[base]}" if @prog_binding[base] and @prog_binding[base] != Expression[base]
458
+ if ed = get_edata_at(base)
459
+ ed.del_export(base)
460
+ end
442
461
  encoded.add_export base, 0
443
462
  else raise "invalid section base #{base.inspect} - expected string or integer"
444
463
  end
@@ -451,7 +470,7 @@ class Disassembler
451
470
 
452
471
  # update section_edata.reloc
453
472
  # label -> list of relocs that refers to it
454
- @inv_section_reloc = {}
473
+ @inv_section_reloc ||= {}
455
474
  @sections.each { |b, e|
456
475
  e.reloc.each { |o, r|
457
476
  r.target.externals.grep(::String).each { |ext| (@inv_section_reloc[ext] ||= []) << [b, e, o, r] }
@@ -485,14 +504,16 @@ class Disassembler
485
504
 
486
505
  # add pseudo-xrefs for exe relocs
487
506
  if (not type or type == :reloc) and l = get_label_at(addr) and a = @inv_section_reloc[l]
507
+ x_more = []
488
508
  a.each { |b, e, o, r|
489
509
  addr = Expression[b]+o
490
510
  # ignore relocs embedded in an already-listed instr
491
- x << Xref.new(:reloc, addr) if not x.find { |x_|
511
+ x_more << Xref.new(:reloc, addr) if not x.find { |x_|
492
512
  next if not x_.origin or not di_at(x_.origin)
493
- (addr - x_.origin rescue 50) < @decoded[x_.origin].bin_length
513
+ (addr - x_.origin) < @decoded[x_.origin].bin_length rescue false
494
514
  }
495
515
  }
516
+ x.concat x_more
496
517
  end
497
518
 
498
519
  x.each { |x_| yield x_ }
@@ -505,9 +526,18 @@ class Disassembler
505
526
 
506
527
  # parses a C string for function prototypes
507
528
  def parse_c(str, filename=nil, lineno=1)
529
+ @c_parser_constcache = nil
508
530
  @c_parser ||= @cpu.new_cparser
509
531
  @c_parser.lexer.define_weak('__METASM__DECODE__')
510
532
  @c_parser.parse(str, filename, lineno)
533
+ rescue ParseError
534
+ @c_parser.lexer.feed! ''
535
+ raise
536
+ end
537
+
538
+ # list the constants ([name, integer value]) defined in the C code (#define / enums)
539
+ def c_constants
540
+ @c_parser_constcache ||= @c_parser.numeric_constants
511
541
  end
512
542
 
513
543
  # returns the canonical form of addr (absolute address integer or label of start of section + section offset)
@@ -568,6 +598,7 @@ class Disassembler
568
598
  end
569
599
 
570
600
  # returns a hash associating addr => list of labels at this addr
601
+ # label_alias[a] may be nil if a new label is created elsewhere in the edata with the same name
571
602
  def label_alias
572
603
  if not @label_alias_cache
573
604
  @label_alias_cache = {}
@@ -622,17 +653,16 @@ class Disassembler
622
653
  if not f.finalized
623
654
  f.finalized = true
624
655
  puts " finalize subfunc #{Expression[addr]}" if debug_backtrace
625
- @cpu.backtrace_update_function_binding(self, addr, f, f.return_address)
656
+ backtrace_update_function_binding(addr, f)
626
657
  if not f.return_address
627
658
  detect_function_thunk(addr)
628
659
  end
629
660
  end
630
- @comment[addr] ||= []
631
661
  bd = f.backtrace_binding.reject { |k, v| Expression[k] == Expression[v] or Expression[v] == Expression::Unknown }
632
662
  unk = f.backtrace_binding.map { |k, v| k if v == Expression::Unknown }.compact
633
663
  bd[unk.map { |u| Expression[u].to_s }.sort.join(',')] = Expression::Unknown if not unk.empty?
634
- @comment[addr] |= ["function binding: " + bd.map { |k, v| "#{k} -> #{v}" }.sort.join(', ')]
635
- @comment[addr] |= ["function ends at " + f.return_address.map { |ra| Expression[ra] }.join(', ')] if f.return_address
664
+ add_comment(addr, "function binding: " + bd.map { |k, v| "#{k} -> #{v}" }.sort.join(', '))
665
+ add_comment(addr, "function ends at " + f.return_address.map { |ra| Expression[ra] }.join(', ')) if f.return_address
636
666
  }
637
667
  end
638
668
 
@@ -658,7 +688,7 @@ puts " finalize subfunc #{Expression[addr]}" if debug_backtrace
658
688
  next if not f = @function[subfunc] or f.finalized
659
689
  f.finalized = true
660
690
  puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
661
- @cpu.backtrace_update_function_binding(self, subfunc, f, f.return_address)
691
+ backtrace_update_function_binding(subfunc, f)
662
692
  if not f.return_address
663
693
  detect_function_thunk(subfunc)
664
694
  end
@@ -667,7 +697,7 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
667
697
 
668
698
  if di = @decoded[addr]
669
699
  if di.kind_of? DecodedInstruction
670
- split_block(di.block, di.address) if not di.block_head? # this updates di.block
700
+ split_block(di.block, di.address, true) if not di.block_head? # this updates di.block
671
701
  di.block.add_from(from, from_subfuncret ? :subfuncret : :normal) if from and from != :default
672
702
  bf = di.block
673
703
  elsif di == true
@@ -726,20 +756,22 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
726
756
  end
727
757
 
728
758
  # splits an InstructionBlock, updates the blocks backtracked_for
729
- def split_block(block, address=nil)
759
+ def split_block(block, address=nil, rebacktrace=false)
730
760
  if not address # invoked as split_block(0x401012)
731
761
  return if not @decoded[block].kind_of? DecodedInstruction
732
762
  block, address = @decoded[block].block, block
733
763
  end
734
764
  return block if address == block.address
735
765
  new_b = block.split address
736
- new_b.backtracked_for.dup.each { |btt|
737
- backtrace(btt.expr, btt.address,
738
- :only_upto => block.list.last.address,
739
- :include_start => !btt.exclude_instr, :from_subfuncret => btt.from_subfuncret,
740
- :origin => btt.origin, :orig_expr => btt.orig_expr, :type => btt.type, :len => btt.len,
741
- :detached => btt.detached, :maxdepth => btt.maxdepth)
742
- }
766
+ if rebacktrace
767
+ new_b.backtracked_for.dup.each { |btt|
768
+ backtrace(btt.expr, btt.address,
769
+ :only_upto => block.list.last.address,
770
+ :include_start => !btt.exclude_instr, :from_subfuncret => btt.from_subfuncret,
771
+ :origin => btt.origin, :orig_expr => btt.orig_expr, :type => btt.type, :len => btt.len,
772
+ :detached => btt.detached, :maxdepth => btt.maxdepth)
773
+ }
774
+ end
743
775
  new_b
744
776
  end
745
777
 
@@ -763,8 +795,7 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
763
795
  each_xref(waddr, :w) { |x|
764
796
  #next if off + x.len < 0
765
797
  puts "W: disasm: self-modifying code at #{Expression[waddr]}" if $VERBOSE
766
- @comment[di_addr] ||= []
767
- @comment[di_addr] |= ["overwritten by #{@decoded[x.origin]}"]
798
+ add_comment(di_addr, "overwritten by #{@decoded[x.origin]}")
768
799
  @callback_selfmodifying[di_addr] if callback_selfmodifying
769
800
  return
770
801
  }
@@ -775,7 +806,8 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
775
806
  block.edata.ptr = di_addr - block.address + block.edata_ptr
776
807
  if not di = @cpu.decode_instruction(block.edata, di_addr)
777
808
  ed = block.edata
778
- puts "#{ed.ptr >= ed.length ? "end of section reached" : "unknown instruction #{ed.data[di_addr-block.address+block.edata_ptr, 4].to_s.unpack('H*')}"} at #{Expression[di_addr]}" if $VERBOSE
809
+ break if ed.ptr >= ed.length and get_section_at(di_addr) and di = block.list.last
810
+ puts "#{ed.ptr >= ed.length ? "end of section reached" : "unknown instruction #{ed.data[di_addr-block.address+block.edata_ptr, 4].to_s.unpack('H*').first}"} at #{Expression[di_addr]}" if $VERBOSE
779
811
  return
780
812
  end
781
813
 
@@ -783,7 +815,18 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
783
815
  block.add_di di
784
816
  puts di if $DEBUG
785
817
 
786
- di = @callback_newinstr[di] if callback_newinstr
818
+ if callback_newinstr
819
+ ndi = @callback_newinstr[di]
820
+ if not ndi or not ndi.block
821
+ block.list.delete di
822
+ if ndi
823
+ block.add_di ndi
824
+ ndi.bin_length = di.bin_length if ndi.bin_length == 0
825
+ @decoded[di_addr] = ndi
826
+ end
827
+ end
828
+ di = ndi
829
+ end
787
830
  return if not di
788
831
  block = di.block
789
832
 
@@ -793,7 +836,7 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
793
836
 
794
837
  if not di_addr or di.opcode.props[:stopexec] or not @program.get_xrefs_x(self, di).empty?
795
838
  # do not backtrace until delay slot is finished (eg MIPS: di is a
796
- # ret and the delay slot holds stack fixup needed to calc func_binding)
839
+ # ret and the delay slot holds stack fixup needed to calc func_binding)
797
840
  # XXX if the delay slot is also xref_x or :stopexec it is ignored
798
841
  delay_slot ||= [di, @cpu.delay_slot(di)]
799
842
  end
@@ -835,6 +878,8 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
835
878
  @entrypoints |= entrypoints
836
879
 
837
880
  entrypoints.each { |ep| do_disassemble_fast_deep(normalize(ep)) }
881
+
882
+ @callback_finished[] if callback_finished
838
883
  end
839
884
 
840
885
  def do_disassemble_fast_deep(ep)
@@ -896,8 +941,7 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
896
941
  }
897
942
  if func
898
943
  auto_label_at(addr, 'sub', 'loc', 'xref')
899
- # XXX use default_btbind_callback ?
900
- @function[addr] = DecodedFunction.new
944
+ @function[addr] = (@function[:default] || DecodedFunction.new).dup
901
945
  @function[addr].finalized = true
902
946
  detect_function_thunk(addr)
903
947
  puts "found new function #{get_label_at(addr)} at #{Expression[addr]}" if $VERBOSE
@@ -909,7 +953,7 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
909
953
  # does not recurse into subfunctions
910
954
  # assumes all :saveip returns, except those pointing to a subfunc with noreturn
911
955
  # yields subfunction addresses (targets of :saveip)
912
- # only backtrace for :x with maxdepth 1 (ie handles only basic push+ret)
956
+ # no backtrace for :x (change with backtrace_maxblocks_fast)
913
957
  # returns a todo-style ary
914
958
  # assumes @addrs_todo is empty
915
959
  def disassemble_fast_block(block, &b)
@@ -927,6 +971,7 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
927
971
  # decode instruction
928
972
  block.edata.ptr = di_addr - block.address + block.edata_ptr
929
973
  if not di = @cpu.decode_instruction(block.edata, di_addr)
974
+ break if block.edata.ptr >= block.edata.length and get_section_at(di_addr) and di = block.list.last
930
975
  return ret
931
976
  end
932
977
 
@@ -934,7 +979,18 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
934
979
  block.add_di di
935
980
  puts di if $DEBUG
936
981
 
937
- di = @callback_newinstr[di] if callback_newinstr
982
+ if callback_newinstr
983
+ ndi = @callback_newinstr[di]
984
+ if not ndi or not ndi.block
985
+ block.list.delete di
986
+ if ndi
987
+ block.add_di ndi
988
+ ndi.bin_length = di.bin_length if ndi.bin_length == 0
989
+ @decoded[di_addr] = ndi
990
+ end
991
+ end
992
+ di = ndi
993
+ end
938
994
  return ret if not di
939
995
 
940
996
  di_addr = di.next_addr
@@ -942,7 +998,9 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
942
998
  if di.opcode.props[:stopexec] or di.opcode.props[:setip]
943
999
  if di.opcode.props[:setip]
944
1000
  @addrs_todo = []
945
- @program.get_xrefs_x(self, di).each { |expr|
1001
+ ar = @program.get_xrefs_x(self, di)
1002
+ ar = @callback_newaddr[di.address, ar] || ar if callback_newaddr
1003
+ ar.each { |expr|
946
1004
  backtrace(expr, di.address, :origin => di.address, :type => :x, :maxdepth => @backtrace_maxblocks_fast)
947
1005
  }
948
1006
  end
@@ -965,8 +1023,13 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
965
1023
  end
966
1024
  }
967
1025
 
968
- di.block.add_to_normal(di_addr)
969
- ret << [di_addr, di.address]
1026
+ ar = [di_addr]
1027
+ ar = @callback_newaddr[block.list.last.address, ar] || ar if callback_newaddr
1028
+ ar.each { |a|
1029
+ di.block.add_to_normal(a)
1030
+ ret << [a, di.address]
1031
+ }
1032
+ ret
970
1033
  end
971
1034
 
972
1035
  # handles when disassemble_fast encounters a call to a subfunction
@@ -1037,7 +1100,7 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
1037
1100
  count = 0
1038
1101
  while b = block_at(addr)
1039
1102
  count += 1
1040
- return if count > 5 or b.list.length > 4
1103
+ return if count > 5 or b.list.length > 5
1041
1104
  if b.to_subfuncret and not b.to_subfuncret.empty?
1042
1105
  return if b.to_subfuncret.length != 1
1043
1106
  addr = normalize(b.to_subfuncret.first)
@@ -1047,7 +1110,7 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
1047
1110
  return if not btb = sf.backtrace_binding
1048
1111
  btb = btb.dup
1049
1112
  btb.delete_if { |k, v| Expression[k] == Expression[v] }
1050
- return if btb.length > 2 or btb.values.include? Expression::Unknown
1113
+ return if btb.length > 2 or btb.values.include? Expression::Unknown
1051
1114
  else
1052
1115
  return if not bt = b.to_normal
1053
1116
  if bt.include? :default
@@ -1291,6 +1354,88 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
1291
1354
  end
1292
1355
  end
1293
1356
 
1357
+ # iterates over all instructions of a function from a given entrypoint
1358
+ # carries an object while walking, the object is yielded every instruction
1359
+ # every block is walked only once, after all previous blocks are done (if possible)
1360
+ # on a 'jz', a [:clone] event is yielded for every path beside the first
1361
+ # on a juction (eg a -> b -> d, a -> c -> d), a [:merge] event occurs if froms have different objs
1362
+ # event list:
1363
+ # [:di, <addr>, <decoded_instruction>, <object>]
1364
+ # [:clone, <newaddr>, <oldaddr>, <object>]
1365
+ # [:merge, <newaddr>, {<oldaddr1> => <object1>, <oldaddr2> => <object2>, ...}, <object1>]
1366
+ # [:subfunc, <subfunc_addr>, <call_addr>, <object>]
1367
+ # all events should return an object
1368
+ # :merge has a copy of object1 at the end so that uninterested callers can always return args[-1]
1369
+ # if an event returns false, the trace stops for the current branch
1370
+ def function_walk(addr_start, obj_start)
1371
+ # addresses of instrs already seen => obj
1372
+ done = {}
1373
+ todo = [[addr_start, obj_start]]
1374
+
1375
+ while hop = todo.pop
1376
+ addr, obj = hop
1377
+ next if done.has_key?(done)
1378
+
1379
+ di = di_at(addr)
1380
+ next if not di
1381
+
1382
+ if done.empty?
1383
+ dilist = di.block.list[di.block.list.index(di)..-1]
1384
+ else
1385
+ # new block, check all 'from' have been seen
1386
+ if not hop[2]
1387
+ # may retry later
1388
+ all_ok = true
1389
+ di.block.each_from_samefunc(self) { |fa| all_ok = false unless done.has_key?(fa) }
1390
+ if not all_ok
1391
+ todo.unshift([addr, obj, true])
1392
+ next
1393
+ end
1394
+ end
1395
+
1396
+ froms = {}
1397
+ di.block.each_from_samefunc(self) { |fa| froms[fa] = done[fa] if done[fa] }
1398
+ if froms.values.uniq.length > 1
1399
+ obj = yield([:merge, addr, froms, froms.values.first])
1400
+ next if obj == false
1401
+ end
1402
+
1403
+ dilist = di.block.list
1404
+ end
1405
+
1406
+ if dilist.each { |_di|
1407
+ break if done.has_key?(_di.address) # looped back into addr_start
1408
+ done[_di.address] = obj
1409
+ obj = yield([:di, _di.address, _di, obj])
1410
+ break if obj == false # also return false for the previous 'if'
1411
+ }
1412
+
1413
+ from = dilist.last.address
1414
+
1415
+ if di.block.to_normal and di.block.to_normal[0] and
1416
+ di.block.to_subfuncret and di.block.to_subfuncret[0]
1417
+ # current instruction block calls into a subfunction
1418
+ obj = di.block.to_normal.map { |subf|
1419
+ yield([:subfunc, subf, from, obj])
1420
+ }.first # propagate 1st subfunc result
1421
+ next if obj == false
1422
+ end
1423
+
1424
+ wantclone = false
1425
+ di.block.each_to_samefunc(self) { |ta|
1426
+ if wantclone
1427
+ nobj = yield([:clone, ta, from, obj])
1428
+ next if obj == false
1429
+ todo << [ta, nobj]
1430
+ else
1431
+ todo << [ta, obj]
1432
+ wantclone = true
1433
+ end
1434
+ }
1435
+ end
1436
+ end
1437
+ end
1438
+
1294
1439
  # holds a backtrace result until a snapshot_addr is encountered
1295
1440
  class StoppedExpr
1296
1441
  attr_accessor :exprs
@@ -1356,7 +1501,7 @@ puts " not backtracking stack address #{expr}" if debug_backtrace
1356
1501
  end
1357
1502
 
1358
1503
  if vals = (no_check ? (!need_backtrace(expr, terminals) and [expr]) : backtrace_check_found(expr,
1359
- di, origin, type, len, maxdepth, detached))
1504
+ di, origin, type, len, maxdepth, detached, snapshot_addr))
1360
1505
  # no need to update backtracked_for
1361
1506
  return vals
1362
1507
  elsif maxdepth <= 0
@@ -1396,7 +1541,7 @@ puts " backtrace up #{Expression[h[:addr]]} #{oldexpr}#{" => #{expr}" if expr
1396
1541
  if expr != oldexpr and not snapshot_addr and vals = (no_check ?
1397
1542
  (!need_backtrace(expr, terminals) and [expr]) :
1398
1543
  backtrace_check_found(expr, nil, origin, type, len,
1399
- maxdepth-h[:loopdetect].length, detached))
1544
+ maxdepth-h[:loopdetect].length, detached, snapshot_addr))
1400
1545
  result |= vals
1401
1546
  next
1402
1547
  end
@@ -1437,7 +1582,7 @@ puts " backtrace up #{Expression[h[:from]]}->#{Expression[h[:to]]} #{oldexpr}#
1437
1582
 
1438
1583
  if expr != oldexpr and vals = (no_check ? (!need_backtrace(expr, terminals) and [expr]) :
1439
1584
  backtrace_check_found(expr, @decoded[h[:from]], origin, type, len,
1440
- maxdepth-h[:loopdetect].length, detached))
1585
+ maxdepth-h[:loopdetect].length, detached, snapshot_addr))
1441
1586
  if snapshot_addr
1442
1587
  expr = StoppedExpr.new vals
1443
1588
  next expr
@@ -1498,7 +1643,7 @@ oldexpr = expr
1498
1643
  when :func
1499
1644
  expr = backtrace_emu_subfunc(h[:func], h[:funcaddr], h[:addr], expr, origin, maxdepth-h[:loopdetect].length)
1500
1645
  if snapshot_addr and snapshot_addr == h[:funcaddr]
1501
- # XXX recursiveness detection needs to be fixed
1646
+ # XXX recursiveness detection needs to be fixed
1502
1647
  puts " backtrace: recursive function #{Expression[h[:funcaddr]]}" if debug_backtrace
1503
1648
  next false
1504
1649
  end
@@ -1506,7 +1651,7 @@ puts " backtrace: recursive function #{Expression[h[:funcaddr]]}" if debug_back
1506
1651
  end
1507
1652
  puts " backtrace #{h[:di] || Expression[h[:funcaddr]]} #{oldexpr} => #{expr}" if debug_backtrace and expr != oldexpr
1508
1653
  if vals = (no_check ? (!need_backtrace(expr, terminals) and [expr]) : backtrace_check_found(expr,
1509
- h[:di], origin, type, len, maxdepth-h[:loopdetect].length, detached))
1654
+ h[:di], origin, type, len, maxdepth-h[:loopdetect].length, detached, snapshot_addr))
1510
1655
  if snapshot_addr
1511
1656
  expr = StoppedExpr.new vals
1512
1657
  else
@@ -1588,10 +1733,14 @@ puts " backtrace addrs_todo << #{Expression[retaddr]} from #{di} (funcret)" if
1588
1733
  (ab = @address_binding[addr]) ? Expression[expr.bind(ab).reduce] : expr
1589
1734
  end
1590
1735
 
1736
+ def backtrace_update_function_binding(addr, func=@function[addr], retaddrs=func.return_address)
1737
+ @cpu.backtrace_update_function_binding(self, addr, func, retaddrs)
1738
+ end
1739
+
1591
1740
  # static resolution of indirections
1592
1741
  def resolve(expr)
1593
1742
  binding = Expression[expr].expr_indirections.inject(@old_prog_binding) { |binding_, ind|
1594
- e, b = get_section_at(resolve(ind.target))
1743
+ e = get_edata_at(resolve(ind.target))
1595
1744
  return expr if not e
1596
1745
  binding_.merge ind => Expression[ e.decode_imm("u#{8*ind.len}".to_sym, @cpu.endianness) ]
1597
1746
  }
@@ -1619,7 +1768,7 @@ puts " backtrace addrs_todo << #{Expression[retaddr]} from #{di} (funcret)" if
1619
1768
  # TODO trace expr evolution through backtrace, to modify immediates to an expr involving label names
1620
1769
  # TODO mov [ptr], imm ; <...> ; jmp [ptr] => rename imm as loc_XX
1621
1770
  # eg. mov eax, 42 ; add eax, 4 ; jmp eax => mov eax, some_label-4
1622
- def backtrace_check_found(expr, di, origin, type, len, maxdepth, detached)
1771
+ def backtrace_check_found(expr, di, origin, type, len, maxdepth, detached, snapshot_addr=nil)
1623
1772
  # only entrypoints or block starts called by a :saveip are checked for being a function
1624
1773
  # want to execute [esp] from a block start
1625
1774
  if type == :x and di and di == di.block.list.first and @cpu.backtrace_is_function_return(expr, @decoded[origin]) and (
@@ -1649,11 +1798,14 @@ puts " backtrace addrs_todo << #{Expression[retaddr]} from #{di} (funcret)" if
1649
1798
  end
1650
1799
 
1651
1800
  return if need_backtrace(expr)
1801
+ if snapshot_addr
1802
+ return if expr.expr_externals(true).find { |ee| ee.kind_of?(Indirection) }
1803
+ end
1652
1804
 
1653
1805
  puts "backtrace #{type} found #{expr} from #{di} orig #{@decoded[origin] || Expression[origin] if origin}" if debug_backtrace
1654
1806
  result = backtrace_value(expr, maxdepth)
1655
1807
  # keep the ori pointer in the results to emulate volatile memory (eg decompiler prefers this)
1656
- result << expr if not type
1808
+ #result << expr if not type # XXX returning multiple values for nothing is too confusing, TODO fix decompiler
1657
1809
  result.uniq!
1658
1810
 
1659
1811
  # create xrefs/labels
@@ -1695,7 +1847,7 @@ puts "backtrace #{type} found #{expr} from #{di} orig #{@decoded[origin] || Expr
1695
1847
  ret = []
1696
1848
 
1697
1849
  decode_imm = lambda { |addr, len|
1698
- edata, foo = get_section_at(addr)
1850
+ edata = get_edata_at(addr)
1699
1851
  if edata
1700
1852
  Expression[ edata.decode_imm("u#{8*len}".to_sym, @cpu.endianness) ]
1701
1853
  else
@@ -1803,7 +1955,7 @@ puts " backtrace_indirection for #{ind.target} failed: #{ev}" if debug_backtra
1803
1955
  # TODO trace expression evolution to allow handling of
1804
1956
  # mov eax, 28 ; add eax, 4 ; jmp eax
1805
1957
  # => mov eax, (loc_xx-4)
1806
- if di and not unk # and di.address == origin
1958
+ if di and not unk and expr != n # and di.address == origin
1807
1959
  @cpu.replace_instr_arg_immediate(di.instruction, expr, n)
1808
1960
  end
1809
1961
  if @decoded[origin] and not unk
@@ -1850,6 +2002,10 @@ puts " backtrace_indirection for #{ind.target} failed: #{ev}" if debug_backtra
1850
2002
  end
1851
2003
  end
1852
2004
 
2005
+ def inspect
2006
+ "<Metasm::Disassembler @%x>" % object_id
2007
+ end
2008
+
1853
2009
  def to_s
1854
2010
  a = ''
1855
2011
  dump { |l| a << l << "\n" }
@@ -1916,7 +2072,7 @@ puts " backtrace_indirection for #{ind.target} failed: #{ev}" if debug_backtra
1916
2072
  if not xr.empty?
1917
2073
  b["\n// Xrefs: #{xr[0, 8].join(' ')}#{' ...' if xr.length > 8}"]
1918
2074
  end
1919
- if block.edata.inv_export[block.edata_ptr]
2075
+ if block.edata.inv_export[block.edata_ptr] and label_alias[block.address]
1920
2076
  b["\n"] if xr.empty?
1921
2077
  label_alias[block.address].each { |name| b["#{name}:"] }
1922
2078
  end
@@ -1933,8 +2089,8 @@ puts " backtrace_indirection for #{ind.target} failed: #{ev}" if debug_backtra
1933
2089
  # TODO array-style data access
1934
2090
  def dump_data(addr, edata, off, &b)
1935
2091
  b ||= lambda { |l| puts l }
1936
- if l = edata.inv_export[off]
1937
- l_list = label_alias[addr].to_a.sort
2092
+ if l = edata.inv_export[off] and label_alias[addr]
2093
+ l_list = label_alias[addr].sort
1938
2094
  l = l_list.pop || l
1939
2095
  l_list.each { |ll|
1940
2096
  b["#{ll}:"]