metasm 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (235) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +1 -0
  3. data/.hgtags +3 -0
  4. data/Gemfile +1 -0
  5. data/INSTALL +61 -0
  6. data/LICENCE +458 -0
  7. data/README +29 -21
  8. data/Rakefile +10 -0
  9. data/TODO +10 -12
  10. data/doc/code_organisation.txt +2 -0
  11. data/doc/core/DynLdr.txt +247 -0
  12. data/doc/core/ExeFormat.txt +43 -0
  13. data/doc/core/Expression.txt +220 -0
  14. data/doc/core/GNUExports.txt +27 -0
  15. data/doc/core/Ia32.txt +236 -0
  16. data/doc/core/SerialStruct.txt +108 -0
  17. data/doc/core/VirtualString.txt +145 -0
  18. data/doc/core/WindowsExports.txt +61 -0
  19. data/doc/core/index.txt +1 -0
  20. data/doc/style.css +6 -3
  21. data/doc/usage/debugger.txt +327 -0
  22. data/doc/usage/index.txt +1 -0
  23. data/doc/use_cases.txt +2 -2
  24. data/metasm.gemspec +22 -0
  25. data/{lib/metasm.rb → metasm.rb} +11 -3
  26. data/{lib/metasm → metasm}/compile_c.rb +13 -7
  27. data/metasm/cpu/arc.rb +8 -0
  28. data/metasm/cpu/arc/decode.rb +425 -0
  29. data/metasm/cpu/arc/main.rb +191 -0
  30. data/metasm/cpu/arc/opcodes.rb +588 -0
  31. data/{lib/metasm → metasm/cpu}/arm.rb +7 -5
  32. data/{lib/metasm → metasm/cpu}/arm/debug.rb +2 -2
  33. data/{lib/metasm → metasm/cpu}/arm/decode.rb +13 -12
  34. data/{lib/metasm → metasm/cpu}/arm/encode.rb +23 -8
  35. data/{lib/metasm → metasm/cpu}/arm/main.rb +0 -3
  36. data/metasm/cpu/arm/opcodes.rb +324 -0
  37. data/{lib/metasm → metasm/cpu}/arm/parse.rb +25 -13
  38. data/{lib/metasm → metasm/cpu}/arm/render.rb +2 -2
  39. data/metasm/cpu/arm64.rb +15 -0
  40. data/metasm/cpu/arm64/debug.rb +38 -0
  41. data/metasm/cpu/arm64/decode.rb +289 -0
  42. data/metasm/cpu/arm64/encode.rb +41 -0
  43. data/metasm/cpu/arm64/main.rb +105 -0
  44. data/metasm/cpu/arm64/opcodes.rb +232 -0
  45. data/metasm/cpu/arm64/parse.rb +20 -0
  46. data/metasm/cpu/arm64/render.rb +95 -0
  47. data/{lib/metasm/ppc.rb → metasm/cpu/bpf.rb} +2 -4
  48. data/metasm/cpu/bpf/decode.rb +142 -0
  49. data/metasm/cpu/bpf/main.rb +60 -0
  50. data/metasm/cpu/bpf/opcodes.rb +81 -0
  51. data/metasm/cpu/bpf/render.rb +41 -0
  52. data/metasm/cpu/cy16.rb +9 -0
  53. data/metasm/cpu/cy16/decode.rb +253 -0
  54. data/metasm/cpu/cy16/main.rb +63 -0
  55. data/metasm/cpu/cy16/opcodes.rb +78 -0
  56. data/metasm/cpu/cy16/render.rb +41 -0
  57. data/metasm/cpu/dalvik.rb +11 -0
  58. data/{lib/metasm → metasm/cpu}/dalvik/decode.rb +35 -13
  59. data/{lib/metasm → metasm/cpu}/dalvik/main.rb +51 -2
  60. data/{lib/metasm → metasm/cpu}/dalvik/opcodes.rb +19 -11
  61. data/metasm/cpu/ia32.rb +17 -0
  62. data/{lib/metasm → metasm/cpu}/ia32/compile_c.rb +5 -7
  63. data/{lib/metasm → metasm/cpu}/ia32/debug.rb +5 -5
  64. data/{lib/metasm → metasm/cpu}/ia32/decode.rb +246 -59
  65. data/{lib/metasm → metasm/cpu}/ia32/decompile.rb +7 -7
  66. data/{lib/metasm → metasm/cpu}/ia32/encode.rb +19 -13
  67. data/{lib/metasm → metasm/cpu}/ia32/main.rb +51 -8
  68. data/metasm/cpu/ia32/opcodes.rb +1424 -0
  69. data/{lib/metasm → metasm/cpu}/ia32/parse.rb +47 -16
  70. data/{lib/metasm → metasm/cpu}/ia32/render.rb +31 -4
  71. data/metasm/cpu/mips.rb +14 -0
  72. data/{lib/metasm → metasm/cpu}/mips/compile_c.rb +1 -1
  73. data/metasm/cpu/mips/debug.rb +42 -0
  74. data/{lib/metasm → metasm/cpu}/mips/decode.rb +46 -16
  75. data/{lib/metasm → metasm/cpu}/mips/encode.rb +4 -3
  76. data/{lib/metasm → metasm/cpu}/mips/main.rb +11 -4
  77. data/{lib/metasm → metasm/cpu}/mips/opcodes.rb +86 -17
  78. data/{lib/metasm → metasm/cpu}/mips/parse.rb +1 -1
  79. data/{lib/metasm → metasm/cpu}/mips/render.rb +1 -1
  80. data/{lib/metasm/dalvik.rb → metasm/cpu/msp430.rb} +1 -1
  81. data/metasm/cpu/msp430/decode.rb +247 -0
  82. data/metasm/cpu/msp430/main.rb +62 -0
  83. data/metasm/cpu/msp430/opcodes.rb +101 -0
  84. data/{lib/metasm → metasm/cpu}/pic16c/decode.rb +6 -7
  85. data/{lib/metasm → metasm/cpu}/pic16c/main.rb +0 -0
  86. data/{lib/metasm → metasm/cpu}/pic16c/opcodes.rb +1 -1
  87. data/{lib/metasm/mips.rb → metasm/cpu/ppc.rb} +4 -4
  88. data/{lib/metasm → metasm/cpu}/ppc/decode.rb +18 -12
  89. data/{lib/metasm → metasm/cpu}/ppc/decompile.rb +3 -3
  90. data/{lib/metasm → metasm/cpu}/ppc/encode.rb +2 -2
  91. data/{lib/metasm → metasm/cpu}/ppc/main.rb +17 -12
  92. data/{lib/metasm → metasm/cpu}/ppc/opcodes.rb +11 -5
  93. data/metasm/cpu/ppc/parse.rb +55 -0
  94. data/metasm/cpu/python.rb +8 -0
  95. data/metasm/cpu/python/decode.rb +136 -0
  96. data/metasm/cpu/python/main.rb +36 -0
  97. data/metasm/cpu/python/opcodes.rb +180 -0
  98. data/{lib/metasm → metasm/cpu}/sh4.rb +1 -1
  99. data/{lib/metasm → metasm/cpu}/sh4/decode.rb +48 -17
  100. data/{lib/metasm → metasm/cpu}/sh4/main.rb +13 -4
  101. data/{lib/metasm → metasm/cpu}/sh4/opcodes.rb +7 -8
  102. data/metasm/cpu/x86_64.rb +15 -0
  103. data/{lib/metasm → metasm/cpu}/x86_64/compile_c.rb +28 -17
  104. data/{lib/metasm → metasm/cpu}/x86_64/debug.rb +4 -4
  105. data/{lib/metasm → metasm/cpu}/x86_64/decode.rb +57 -15
  106. data/{lib/metasm → metasm/cpu}/x86_64/encode.rb +55 -26
  107. data/{lib/metasm → metasm/cpu}/x86_64/main.rb +14 -6
  108. data/metasm/cpu/x86_64/opcodes.rb +136 -0
  109. data/{lib/metasm → metasm/cpu}/x86_64/parse.rb +10 -2
  110. data/metasm/cpu/x86_64/render.rb +35 -0
  111. data/metasm/cpu/z80.rb +9 -0
  112. data/metasm/cpu/z80/decode.rb +313 -0
  113. data/metasm/cpu/z80/main.rb +67 -0
  114. data/metasm/cpu/z80/opcodes.rb +224 -0
  115. data/metasm/cpu/z80/render.rb +59 -0
  116. data/{lib/metasm/os/main.rb → metasm/debug.rb} +160 -401
  117. data/{lib/metasm → metasm}/decode.rb +35 -4
  118. data/{lib/metasm → metasm}/decompile.rb +15 -16
  119. data/{lib/metasm → metasm}/disassemble.rb +201 -45
  120. data/{lib/metasm → metasm}/disassemble_api.rb +651 -87
  121. data/{lib/metasm → metasm}/dynldr.rb +220 -133
  122. data/{lib/metasm → metasm}/encode.rb +10 -1
  123. data/{lib/metasm → metasm}/exe_format/a_out.rb +9 -6
  124. data/{lib/metasm → metasm}/exe_format/autoexe.rb +1 -0
  125. data/{lib/metasm → metasm}/exe_format/bflt.rb +57 -27
  126. data/{lib/metasm → metasm}/exe_format/coff.rb +11 -3
  127. data/{lib/metasm → metasm}/exe_format/coff_decode.rb +53 -20
  128. data/{lib/metasm → metasm}/exe_format/coff_encode.rb +11 -13
  129. data/{lib/metasm → metasm}/exe_format/dex.rb +13 -5
  130. data/{lib/metasm → metasm}/exe_format/dol.rb +1 -0
  131. data/{lib/metasm → metasm}/exe_format/elf.rb +93 -57
  132. data/{lib/metasm → metasm}/exe_format/elf_decode.rb +143 -34
  133. data/{lib/metasm → metasm}/exe_format/elf_encode.rb +122 -31
  134. data/metasm/exe_format/gb.rb +65 -0
  135. data/metasm/exe_format/javaclass.rb +424 -0
  136. data/{lib/metasm → metasm}/exe_format/macho.rb +204 -16
  137. data/{lib/metasm → metasm}/exe_format/main.rb +26 -3
  138. data/{lib/metasm → metasm}/exe_format/mz.rb +1 -0
  139. data/{lib/metasm → metasm}/exe_format/nds.rb +7 -4
  140. data/{lib/metasm → metasm}/exe_format/pe.rb +71 -8
  141. data/metasm/exe_format/pyc.rb +167 -0
  142. data/{lib/metasm → metasm}/exe_format/serialstruct.rb +67 -14
  143. data/{lib/metasm → metasm}/exe_format/shellcode.rb +7 -3
  144. data/metasm/exe_format/shellcode_rwx.rb +114 -0
  145. data/metasm/exe_format/swf.rb +205 -0
  146. data/{lib/metasm → metasm}/exe_format/xcoff.rb +7 -7
  147. data/metasm/exe_format/zip.rb +335 -0
  148. data/metasm/gui.rb +13 -0
  149. data/{lib/metasm → metasm}/gui/cstruct.rb +35 -41
  150. data/{lib/metasm → metasm}/gui/dasm_coverage.rb +11 -11
  151. data/{lib/metasm → metasm}/gui/dasm_decomp.rb +7 -20
  152. data/{lib/metasm → metasm}/gui/dasm_funcgraph.rb +0 -0
  153. data/metasm/gui/dasm_graph.rb +1695 -0
  154. data/{lib/metasm → metasm}/gui/dasm_hex.rb +12 -8
  155. data/{lib/metasm → metasm}/gui/dasm_listing.rb +43 -28
  156. data/{lib/metasm → metasm}/gui/dasm_main.rb +310 -53
  157. data/{lib/metasm → metasm}/gui/dasm_opcodes.rb +5 -19
  158. data/{lib/metasm → metasm}/gui/debug.rb +93 -27
  159. data/{lib/metasm → metasm}/gui/gtk.rb +162 -40
  160. data/{lib/metasm → metasm}/gui/qt.rb +12 -2
  161. data/{lib/metasm → metasm}/gui/win32.rb +179 -42
  162. data/{lib/metasm → metasm}/gui/x11.rb +59 -59
  163. data/{lib/metasm → metasm}/main.rb +389 -264
  164. data/{lib/metasm/os/remote.rb → metasm/os/gdbremote.rb} +146 -54
  165. data/{lib/metasm → metasm}/os/gnu_exports.rb +1 -1
  166. data/{lib/metasm → metasm}/os/linux.rb +628 -151
  167. data/metasm/os/main.rb +330 -0
  168. data/{lib/metasm → metasm}/os/windows.rb +132 -42
  169. data/{lib/metasm → metasm}/os/windows_exports.rb +141 -0
  170. data/{lib/metasm → metasm}/parse.rb +26 -24
  171. data/{lib/metasm → metasm}/parse_c.rb +221 -116
  172. data/{lib/metasm → metasm}/preprocessor.rb +55 -40
  173. data/{lib/metasm → metasm}/render.rb +14 -38
  174. data/misc/hexdump.rb +2 -1
  175. data/misc/lint.rb +58 -0
  176. data/misc/txt2html.rb +9 -7
  177. data/samples/bindiff.rb +3 -4
  178. data/samples/dasm-plugins/bindiff.rb +15 -0
  179. data/samples/dasm-plugins/bookmark.rb +133 -0
  180. data/samples/dasm-plugins/c_constants.rb +57 -0
  181. data/samples/dasm-plugins/colortheme_solarized.rb +125 -0
  182. data/samples/dasm-plugins/cppobj_funcall.rb +60 -0
  183. data/samples/dasm-plugins/dasm_all.rb +70 -0
  184. data/samples/dasm-plugins/demangle_cpp.rb +31 -0
  185. data/samples/dasm-plugins/deobfuscate.rb +251 -0
  186. data/samples/dasm-plugins/dump_text.rb +35 -0
  187. data/samples/dasm-plugins/export_graph_svg.rb +86 -0
  188. data/samples/dasm-plugins/findgadget.rb +75 -0
  189. data/samples/dasm-plugins/hl_opcode.rb +32 -0
  190. data/samples/dasm-plugins/hotfix_gtk_dbg.rb +19 -0
  191. data/samples/dasm-plugins/imm2off.rb +34 -0
  192. data/samples/dasm-plugins/match_libsigs.rb +93 -0
  193. data/samples/dasm-plugins/patch_file.rb +95 -0
  194. data/samples/dasm-plugins/scanfuncstart.rb +36 -0
  195. data/samples/dasm-plugins/scanxrefs.rb +26 -0
  196. data/samples/dasm-plugins/selfmodify.rb +197 -0
  197. data/samples/dasm-plugins/stringsxrefs.rb +28 -0
  198. data/samples/dasmnavig.rb +1 -1
  199. data/samples/dbg-apihook.rb +24 -9
  200. data/samples/dbg-plugins/heapscan.rb +283 -0
  201. data/samples/dbg-plugins/heapscan/compiled_heapscan_lin.c +155 -0
  202. data/samples/dbg-plugins/heapscan/compiled_heapscan_win.c +128 -0
  203. data/samples/dbg-plugins/heapscan/graphheap.rb +616 -0
  204. data/samples/dbg-plugins/heapscan/heapscan.rb +709 -0
  205. data/samples/dbg-plugins/heapscan/winheap.h +174 -0
  206. data/samples/dbg-plugins/heapscan/winheap7.h +307 -0
  207. data/samples/dbg-plugins/trace_func.rb +214 -0
  208. data/samples/disassemble-gui.rb +35 -5
  209. data/samples/disassemble.rb +31 -6
  210. data/samples/dump_upx.rb +24 -12
  211. data/samples/dynamic_ruby.rb +12 -3
  212. data/samples/exeencode.rb +6 -5
  213. data/samples/factorize-headers-peimports.rb +1 -1
  214. data/samples/lindebug.rb +175 -381
  215. data/samples/metasm-shell.rb +1 -2
  216. data/samples/peldr.rb +2 -2
  217. data/tests/all.rb +1 -1
  218. data/tests/arc.rb +26 -0
  219. data/tests/dynldr.rb +22 -4
  220. data/tests/expression.rb +55 -0
  221. data/tests/graph_layout.rb +285 -0
  222. data/tests/ia32.rb +79 -26
  223. data/tests/mips.rb +9 -2
  224. data/tests/x86_64.rb +66 -18
  225. metadata +330 -218
  226. data/lib/metasm/arm/opcodes.rb +0 -177
  227. data/lib/metasm/gui.rb +0 -23
  228. data/lib/metasm/gui/dasm_graph.rb +0 -1354
  229. data/lib/metasm/ia32.rb +0 -14
  230. data/lib/metasm/ia32/opcodes.rb +0 -873
  231. data/lib/metasm/ppc/parse.rb +0 -52
  232. data/lib/metasm/x86_64.rb +0 -12
  233. data/lib/metasm/x86_64/opcodes.rb +0 -118
  234. data/samples/gdbclient.rb +0 -583
  235. data/samples/rubstop.rb +0 -399
@@ -134,9 +134,10 @@ class EncodedData
134
134
  # bytes from rawsize to virtsize are returned as zeroes
135
135
  # ignores self.relocations
136
136
  def read(len=@virtsize-@ptr)
137
- len = @virtsize-@ptr if len > @virtsize-@ptr
138
- str = (@ptr < @data.length) ? @data[@ptr, len] : ''
139
- str = str.to_str.ljust(len, "\0") if str.length < len
137
+ vlen = len
138
+ vlen = @virtsize-@ptr if len > @virtsize-@ptr
139
+ str = (@ptr < @data.length) ? @data[@ptr, vlen] : ''
140
+ str = str.to_str.ljust(vlen, "\0") if str.length < vlen
140
141
  @ptr += len
141
142
  str
142
143
  end
@@ -182,7 +183,7 @@ class CPU
182
183
  # returns a DecodedInstruction or nil
183
184
  def decode_instruction(edata, addr)
184
185
  @bin_lookaside ||= build_bin_lookaside
185
- di = decode_findopcode edata
186
+ di = decode_findopcode edata if edata.ptr <= edata.length
186
187
  di.address = addr if di
187
188
  di = decode_instr_op(edata, di) if di
188
189
  decode_instr_interpret(di, addr) if di
@@ -209,5 +210,35 @@ class CPU
209
210
  def delay_slot(di=nil)
210
211
  0
211
212
  end
213
+
214
+ def disassembler_default_func
215
+ DecodedFunction.new
216
+ end
217
+
218
+ # return something like backtrace_binding in the forward direction
219
+ # set pc_reg to some reg name (eg :pc) to include effects on the instruction pointer
220
+ def get_fwdemu_binding(di, pc_reg=nil)
221
+ fdi = di.backtrace_binding ||= get_backtrace_binding(di)
222
+ fdi = fix_fwdemu_binding(di, fdi)
223
+ if pc_reg
224
+ if di.opcode.props[:setip]
225
+ xr = get_xrefs_x(nil, di)
226
+ if xr and xr.length == 1
227
+ fdi[pc_reg] = xr[0]
228
+ else
229
+ fdi[:incomplete_binding] = Expression[1]
230
+ end
231
+ else
232
+ fdi[pc_reg] = Expression[pc_reg, :+, di.bin_length]
233
+ end
234
+ end
235
+ fdi
236
+ end
237
+
238
+ # patch a forward binding from the backtrace binding
239
+ # useful only on specific instructions that update a register *and* dereference that register (eg push)
240
+ def fix_fwdemu_binding(di, fbd)
241
+ fbd
242
+ end
212
243
  end
213
244
  end
@@ -69,7 +69,7 @@ class Decompiler
69
69
  @c_parser.toplevel.symbol.delete func.name
70
70
  decompile_func(entry)
71
71
  @recurse = pre_recurse
72
- if not dcl = @c_parser.toplevel.statements.grep(C::Declaration).find { |decl| decl.var.name == func.name }
72
+ if not @c_parser.toplevel.statements.grep(C::Declaration).find { |decl| decl.var.name == func.name }
73
73
  @c_parser.toplevel.statements << C::Declaration.new(func)
74
74
  end
75
75
  end
@@ -208,7 +208,7 @@ class Decompiler
208
208
  @c_parser.toplevel.statements.delete_if { |ts| ts.kind_of? C::Declaration and ts.var.name == name }
209
209
  aoff = 1
210
210
  ptype.args.to_a.each { |a|
211
- aoff = (aoff + @c_parser.typesize[:ptr] - 1) / @c_parser.typesize[:ptr] * @c_parser.typesize[:ptr]
211
+ aoff = (aoff + @c_parser.typesize[:ptr] - 1) / @c_parser.typesize[:ptr] * @c_parser.typesize[:ptr]
212
212
  f.decompdata[:stackoff_type][aoff] ||= a.type
213
213
  f.decompdata[:stackoff_name][aoff] ||= a.name if a.name
214
214
  aoff += sizeof(a) # ary ?
@@ -293,7 +293,7 @@ class Decompiler
293
293
  @dasm.function[ta] = DecodedFunction.new
294
294
  puts "autofunc #{Expression[ta]}" if $VERBOSE
295
295
  end
296
-
296
+
297
297
  if @dasm.function[ta] and type != :subfuncret
298
298
  f = dasm.auto_label_at(ta, 'func')
299
299
  ta = dasm.normalize($1) if f =~ /^thunk_(.*)/
@@ -350,7 +350,7 @@ class Decompiler
350
350
  :include_start => i_s, :no_check => true, :terminals => [:frameptr])
351
351
  if vals.length == 1 and ee = vals.first and (ee.kind_of? Expression and (ee == Expression[:frameptr] or
352
352
  (ee.lexpr == :frameptr and ee.op == :+ and ee.rexpr.kind_of? ::Integer)))
353
- ee
353
+ ee
354
354
  else e
355
355
  end
356
356
  end
@@ -602,12 +602,12 @@ class Decompiler
602
602
  when C::If
603
603
  patch_test[ce.test]
604
604
  if ce.bthen.kind_of? C::Block
605
- case ce.bthen.statements.length
605
+ case ce.bthen.statements.length
606
606
  when 1
607
607
  walk(ce.bthen.statements) { |sst| sst.outer = ce.bthen.outer if sst.kind_of? C::Block and sst.outer == ce.bthen }
608
608
  ce.bthen = ce.bthen.statements.first
609
609
  when 0
610
- if not ce.belse and i = ce.bthen.outer.statements.index(ce)
610
+ if not ce.belse and i = ce.bthen.outer.statements.index(ce)
611
611
  ce.bthen.outer.statements[i] = ce.test # TODO remove sideeffectless parts
612
612
  end
613
613
  end
@@ -1521,7 +1521,7 @@ class Decompiler
1521
1521
  tabidx = off / sizeof(st)
1522
1522
  off -= tabidx * sizeof(st)
1523
1523
  ptr = C::CExpression[:&, [ptr, :'[]', [tabidx]]] if tabidx != 0 or ptr.type.untypedef.kind_of? C::Array
1524
- return ptr if off == 0 and (not msz or # avoid infinite recursion with eg chained list
1524
+ return ptr if off == 0 and (not msz or # avoid infinite recursion with eg chained list
1525
1525
  (ptr.kind_of? C::CExpression and ((ptr.op == :& and not ptr.lexpr and s=ptr.rexpr) or (ptr.op == :'.' and s=ptr)) and
1526
1526
  not s.type.untypedef.kind_of? C::Union))
1527
1527
 
@@ -1656,13 +1656,12 @@ class Decompiler
1656
1656
  ce.rexpr = p if ce.rexpr == v1
1657
1657
  }
1658
1658
  }
1659
-
1660
1659
  }
1661
1660
  end
1662
1661
 
1663
1662
  # to be run with scope = function body with only CExpr/Decl/Label/Goto/IfGoto/Return, with correct variables types
1664
1663
  # will transform += 1 to ++, inline them to prev/next statement ('++x; if (x)..' => 'if (++x)..')
1665
- # remove useless variables ('int i;', i never used or 'i = 1; j = i;', i never read after => 'j = 1;')
1664
+ # remove useless variables ('int i;', i never used or 'i = 1; j = i;', i never read after => 'j = 1;')
1666
1665
  # remove useless casts ('(int)i' with 'int i;' => 'i')
1667
1666
  def optimize(scope)
1668
1667
  optimize_code(scope)
@@ -1681,7 +1680,7 @@ class Decompiler
1681
1680
  t2 = t2.pointed.untypedef if t2.pointer? and t2.pointed.untypedef.kind_of? C::Function
1682
1681
  t1 == t2 or
1683
1682
  (t1.kind_of? C::Function and t2.kind_of? C::Function and sametype[t1.type, t2.type] and t1.args.to_a.length == t2.args.to_a.length and
1684
- t1.args.to_a.zip(t2.args.to_a).all? { |st1, st2| sametype[st1.type, st2.type] }) or
1683
+ t1.args.to_a.zip(t2.args.to_a).all? { |st1, st2| sametype[st1.type, st2.type] }) or
1685
1684
  (t1.kind_of? C::BaseType and t1.integral? and t2.kind_of? C::BaseType and t2.integral? and sizeof(nil, t1) == sizeof(nil, t2)) or
1686
1685
  (t1.pointer? and t2.pointer? and sametype[t1.type, t2.type])
1687
1686
  }
@@ -1871,7 +1870,7 @@ class Decompiler
1871
1870
  when ::Array; exp.any? { |_e| sideeffect _e, scope }
1872
1871
  when C::Variable; (scope and not scope.symbol[exp.name]) or exp.type.qualifier.to_a.include? :volatile
1873
1872
  when C::CExpression; (exp.op == :* and not exp.lexpr) or exp.op == :funcall or AssignOp.include?(exp.op) or
1874
- sideeffect(exp.lexpr, scope) or sideeffect(exp.rexpr, scope)
1873
+ sideeffect(exp.lexpr, scope) or sideeffect(exp.rexpr, scope)
1875
1874
  else true # failsafe
1876
1875
  end
1877
1876
  end
@@ -2009,7 +2008,7 @@ class Decompiler
2009
2008
  }.compact
2010
2009
 
2011
2010
  tw = to - [:write]
2012
- if to.include? :split or tw.length > 1
2011
+ if to.include? :split or tw.length > 1
2013
2012
  :split
2014
2013
  elsif tw.length == 1
2015
2014
  tw.first
@@ -2089,7 +2088,7 @@ class Decompiler
2089
2088
  if (e.op == :'++' or e.op == :'--') and v = (e.lexpr || e.rexpr) and v.kind_of? C::Variable and
2090
2089
  scope.symbol[v.name] and not v.type.qualifier.to_a.include? :volatile
2091
2090
  next if !((pos = :post.to_sym) and (oe = find_next_read_bl[label, i, v]) and oe.kind_of? C::CExpression) and
2092
- !((pos = :prev.to_sym) and (oe = find_prev_read[label, i-2, v]) and oe.kind_of? C::CExpression)
2091
+ !((pos = :prev.to_sym) and (oe = find_prev_read[label, i-2, v]) and oe.kind_of? C::CExpression)
2093
2092
  next if oe.op == :& and not oe.lexpr # no &(++eax)
2094
2093
 
2095
2094
  # merge pre/postincrement into next/prev var usage
@@ -2221,7 +2220,7 @@ class Decompiler
2221
2220
  }
2222
2221
  case cnt
2223
2222
  when 0
2224
- break if bad
2223
+ break if bad
2225
2224
  next
2226
2225
  when 1 # good
2227
2226
  break if e.complexity > 10 and ce_.complexity > 3 # try to keep the C readable
@@ -2443,7 +2442,7 @@ class Decompiler
2443
2442
  end
2444
2443
  # compare type.type cause var is an Array and the cast is a Pointer
2445
2444
  countderef[r.rexpr.name] += 1 if r.kind_of? C::CExpression and not r.op and r.rexpr.kind_of? C::Variable and
2446
- sizeof(nil, r.type.type) == sizeof(nil, r.rexpr.type.type) rescue nil
2445
+ sizeof(nil, r.type.type) == sizeof(nil, r.rexpr.type.type) rescue nil
2447
2446
  }
2448
2447
  vars.each { |n|
2449
2448
  if countref[n] == countderef[n]
@@ -2453,7 +2452,7 @@ class Decompiler
2453
2452
  v.initializer = v.initializer.first if v.initializer.kind_of? ::Array
2454
2453
  walk_ce(tl) { |ce|
2455
2454
  if ce.op == :'->' and C::CExpression[ce.lexpr] == C::CExpression[v]
2456
- ce.op = :'.'
2455
+ ce.op = :'.'
2457
2456
  elsif ce.lexpr == target
2458
2457
  ce.lexpr = v
2459
2458
  end
@@ -24,6 +24,8 @@ class DecodedInstruction
24
24
  attr_accessor :comment
25
25
  # a cache of the binding used by the backtracker to emulate this instruction
26
26
  attr_accessor :backtrace_binding
27
+ # used during fixed-size instruction decoding to hold the decoded raw opcode
28
+ attr_accessor :raw_data
27
29
 
28
30
  # create a new DecodedInstruction with an Instruction whose cpu is the argument
29
31
  # can take an existing Instruction as argument
@@ -233,6 +235,11 @@ class DecodedFunction
233
235
  attr_accessor :finalized
234
236
  # bool, if true the function does not return (eg exit() or ExitProcess())
235
237
  attr_accessor :noreturn
238
+ # hash stackoff => varname
239
+ # varname is a single String object shared by all ExpressionStrings (to allow renames)
240
+ attr_accessor :localvars
241
+ # hash stack offset => di address
242
+ attr_accessor :localvars_xrefs
236
243
 
237
244
  # if btbind_callback is defined, calls it with args [dasm, binding, funcaddr, calladdr, expr, origin, maxdepth]
238
245
  # else update lazily the binding from expr.externals, and return backtrace_binding
@@ -264,6 +271,16 @@ class DecodedFunction
264
271
  @backtracked_for = []
265
272
  @backtrace_binding = {}
266
273
  end
274
+
275
+ def get_localvar_stackoff(off, di=nil, str=nil)
276
+ if di
277
+ @localvars_xrefs ||= {}
278
+ @localvars_xrefs[off] ||= []
279
+ @localvars_xrefs[off] |= [di.address]
280
+ end
281
+ @localvars ||= {}
282
+ @localvars[off] ||= (str || (off > 0 ? 'arg_%X' % off : 'var_%X' % -off))
283
+ end
267
284
  end
268
285
 
269
286
  class CPU
@@ -438,7 +455,9 @@ class Disassembler
438
455
  when ::Integer
439
456
  when ::String
440
457
  raise "invalid section base #{base.inspect} - not at section start" if encoded.export[base] and encoded.export[base] != 0
441
- raise "invalid section base #{base.inspect} - already seen at #{@prog_binding[base]}" if @prog_binding[base] and @prog_binding[base] != Expression[base]
458
+ if ed = get_edata_at(base)
459
+ ed.del_export(base)
460
+ end
442
461
  encoded.add_export base, 0
443
462
  else raise "invalid section base #{base.inspect} - expected string or integer"
444
463
  end
@@ -451,7 +470,7 @@ class Disassembler
451
470
 
452
471
  # update section_edata.reloc
453
472
  # label -> list of relocs that refers to it
454
- @inv_section_reloc = {}
473
+ @inv_section_reloc ||= {}
455
474
  @sections.each { |b, e|
456
475
  e.reloc.each { |o, r|
457
476
  r.target.externals.grep(::String).each { |ext| (@inv_section_reloc[ext] ||= []) << [b, e, o, r] }
@@ -485,14 +504,16 @@ class Disassembler
485
504
 
486
505
  # add pseudo-xrefs for exe relocs
487
506
  if (not type or type == :reloc) and l = get_label_at(addr) and a = @inv_section_reloc[l]
507
+ x_more = []
488
508
  a.each { |b, e, o, r|
489
509
  addr = Expression[b]+o
490
510
  # ignore relocs embedded in an already-listed instr
491
- x << Xref.new(:reloc, addr) if not x.find { |x_|
511
+ x_more << Xref.new(:reloc, addr) if not x.find { |x_|
492
512
  next if not x_.origin or not di_at(x_.origin)
493
- (addr - x_.origin rescue 50) < @decoded[x_.origin].bin_length
513
+ (addr - x_.origin) < @decoded[x_.origin].bin_length rescue false
494
514
  }
495
515
  }
516
+ x.concat x_more
496
517
  end
497
518
 
498
519
  x.each { |x_| yield x_ }
@@ -505,9 +526,18 @@ class Disassembler
505
526
 
506
527
  # parses a C string for function prototypes
507
528
  def parse_c(str, filename=nil, lineno=1)
529
+ @c_parser_constcache = nil
508
530
  @c_parser ||= @cpu.new_cparser
509
531
  @c_parser.lexer.define_weak('__METASM__DECODE__')
510
532
  @c_parser.parse(str, filename, lineno)
533
+ rescue ParseError
534
+ @c_parser.lexer.feed! ''
535
+ raise
536
+ end
537
+
538
+ # list the constants ([name, integer value]) defined in the C code (#define / enums)
539
+ def c_constants
540
+ @c_parser_constcache ||= @c_parser.numeric_constants
511
541
  end
512
542
 
513
543
  # returns the canonical form of addr (absolute address integer or label of start of section + section offset)
@@ -568,6 +598,7 @@ class Disassembler
568
598
  end
569
599
 
570
600
  # returns a hash associating addr => list of labels at this addr
601
+ # label_alias[a] may be nil if a new label is created elsewhere in the edata with the same name
571
602
  def label_alias
572
603
  if not @label_alias_cache
573
604
  @label_alias_cache = {}
@@ -622,17 +653,16 @@ class Disassembler
622
653
  if not f.finalized
623
654
  f.finalized = true
624
655
  puts " finalize subfunc #{Expression[addr]}" if debug_backtrace
625
- @cpu.backtrace_update_function_binding(self, addr, f, f.return_address)
656
+ backtrace_update_function_binding(addr, f)
626
657
  if not f.return_address
627
658
  detect_function_thunk(addr)
628
659
  end
629
660
  end
630
- @comment[addr] ||= []
631
661
  bd = f.backtrace_binding.reject { |k, v| Expression[k] == Expression[v] or Expression[v] == Expression::Unknown }
632
662
  unk = f.backtrace_binding.map { |k, v| k if v == Expression::Unknown }.compact
633
663
  bd[unk.map { |u| Expression[u].to_s }.sort.join(',')] = Expression::Unknown if not unk.empty?
634
- @comment[addr] |= ["function binding: " + bd.map { |k, v| "#{k} -> #{v}" }.sort.join(', ')]
635
- @comment[addr] |= ["function ends at " + f.return_address.map { |ra| Expression[ra] }.join(', ')] if f.return_address
664
+ add_comment(addr, "function binding: " + bd.map { |k, v| "#{k} -> #{v}" }.sort.join(', '))
665
+ add_comment(addr, "function ends at " + f.return_address.map { |ra| Expression[ra] }.join(', ')) if f.return_address
636
666
  }
637
667
  end
638
668
 
@@ -658,7 +688,7 @@ puts " finalize subfunc #{Expression[addr]}" if debug_backtrace
658
688
  next if not f = @function[subfunc] or f.finalized
659
689
  f.finalized = true
660
690
  puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
661
- @cpu.backtrace_update_function_binding(self, subfunc, f, f.return_address)
691
+ backtrace_update_function_binding(subfunc, f)
662
692
  if not f.return_address
663
693
  detect_function_thunk(subfunc)
664
694
  end
@@ -667,7 +697,7 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
667
697
 
668
698
  if di = @decoded[addr]
669
699
  if di.kind_of? DecodedInstruction
670
- split_block(di.block, di.address) if not di.block_head? # this updates di.block
700
+ split_block(di.block, di.address, true) if not di.block_head? # this updates di.block
671
701
  di.block.add_from(from, from_subfuncret ? :subfuncret : :normal) if from and from != :default
672
702
  bf = di.block
673
703
  elsif di == true
@@ -726,20 +756,22 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
726
756
  end
727
757
 
728
758
  # splits an InstructionBlock, updates the blocks backtracked_for
729
- def split_block(block, address=nil)
759
+ def split_block(block, address=nil, rebacktrace=false)
730
760
  if not address # invoked as split_block(0x401012)
731
761
  return if not @decoded[block].kind_of? DecodedInstruction
732
762
  block, address = @decoded[block].block, block
733
763
  end
734
764
  return block if address == block.address
735
765
  new_b = block.split address
736
- new_b.backtracked_for.dup.each { |btt|
737
- backtrace(btt.expr, btt.address,
738
- :only_upto => block.list.last.address,
739
- :include_start => !btt.exclude_instr, :from_subfuncret => btt.from_subfuncret,
740
- :origin => btt.origin, :orig_expr => btt.orig_expr, :type => btt.type, :len => btt.len,
741
- :detached => btt.detached, :maxdepth => btt.maxdepth)
742
- }
766
+ if rebacktrace
767
+ new_b.backtracked_for.dup.each { |btt|
768
+ backtrace(btt.expr, btt.address,
769
+ :only_upto => block.list.last.address,
770
+ :include_start => !btt.exclude_instr, :from_subfuncret => btt.from_subfuncret,
771
+ :origin => btt.origin, :orig_expr => btt.orig_expr, :type => btt.type, :len => btt.len,
772
+ :detached => btt.detached, :maxdepth => btt.maxdepth)
773
+ }
774
+ end
743
775
  new_b
744
776
  end
745
777
 
@@ -763,8 +795,7 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
763
795
  each_xref(waddr, :w) { |x|
764
796
  #next if off + x.len < 0
765
797
  puts "W: disasm: self-modifying code at #{Expression[waddr]}" if $VERBOSE
766
- @comment[di_addr] ||= []
767
- @comment[di_addr] |= ["overwritten by #{@decoded[x.origin]}"]
798
+ add_comment(di_addr, "overwritten by #{@decoded[x.origin]}")
768
799
  @callback_selfmodifying[di_addr] if callback_selfmodifying
769
800
  return
770
801
  }
@@ -775,7 +806,8 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
775
806
  block.edata.ptr = di_addr - block.address + block.edata_ptr
776
807
  if not di = @cpu.decode_instruction(block.edata, di_addr)
777
808
  ed = block.edata
778
- puts "#{ed.ptr >= ed.length ? "end of section reached" : "unknown instruction #{ed.data[di_addr-block.address+block.edata_ptr, 4].to_s.unpack('H*')}"} at #{Expression[di_addr]}" if $VERBOSE
809
+ break if ed.ptr >= ed.length and get_section_at(di_addr) and di = block.list.last
810
+ puts "#{ed.ptr >= ed.length ? "end of section reached" : "unknown instruction #{ed.data[di_addr-block.address+block.edata_ptr, 4].to_s.unpack('H*').first}"} at #{Expression[di_addr]}" if $VERBOSE
779
811
  return
780
812
  end
781
813
 
@@ -783,7 +815,18 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
783
815
  block.add_di di
784
816
  puts di if $DEBUG
785
817
 
786
- di = @callback_newinstr[di] if callback_newinstr
818
+ if callback_newinstr
819
+ ndi = @callback_newinstr[di]
820
+ if not ndi or not ndi.block
821
+ block.list.delete di
822
+ if ndi
823
+ block.add_di ndi
824
+ ndi.bin_length = di.bin_length if ndi.bin_length == 0
825
+ @decoded[di_addr] = ndi
826
+ end
827
+ end
828
+ di = ndi
829
+ end
787
830
  return if not di
788
831
  block = di.block
789
832
 
@@ -793,7 +836,7 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
793
836
 
794
837
  if not di_addr or di.opcode.props[:stopexec] or not @program.get_xrefs_x(self, di).empty?
795
838
  # do not backtrace until delay slot is finished (eg MIPS: di is a
796
- # ret and the delay slot holds stack fixup needed to calc func_binding)
839
+ # ret and the delay slot holds stack fixup needed to calc func_binding)
797
840
  # XXX if the delay slot is also xref_x or :stopexec it is ignored
798
841
  delay_slot ||= [di, @cpu.delay_slot(di)]
799
842
  end
@@ -835,6 +878,8 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
835
878
  @entrypoints |= entrypoints
836
879
 
837
880
  entrypoints.each { |ep| do_disassemble_fast_deep(normalize(ep)) }
881
+
882
+ @callback_finished[] if callback_finished
838
883
  end
839
884
 
840
885
  def do_disassemble_fast_deep(ep)
@@ -896,8 +941,7 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
896
941
  }
897
942
  if func
898
943
  auto_label_at(addr, 'sub', 'loc', 'xref')
899
- # XXX use default_btbind_callback ?
900
- @function[addr] = DecodedFunction.new
944
+ @function[addr] = (@function[:default] || DecodedFunction.new).dup
901
945
  @function[addr].finalized = true
902
946
  detect_function_thunk(addr)
903
947
  puts "found new function #{get_label_at(addr)} at #{Expression[addr]}" if $VERBOSE
@@ -909,7 +953,7 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
909
953
  # does not recurse into subfunctions
910
954
  # assumes all :saveip returns, except those pointing to a subfunc with noreturn
911
955
  # yields subfunction addresses (targets of :saveip)
912
- # only backtrace for :x with maxdepth 1 (ie handles only basic push+ret)
956
+ # no backtrace for :x (change with backtrace_maxblocks_fast)
913
957
  # returns a todo-style ary
914
958
  # assumes @addrs_todo is empty
915
959
  def disassemble_fast_block(block, &b)
@@ -927,6 +971,7 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
927
971
  # decode instruction
928
972
  block.edata.ptr = di_addr - block.address + block.edata_ptr
929
973
  if not di = @cpu.decode_instruction(block.edata, di_addr)
974
+ break if block.edata.ptr >= block.edata.length and get_section_at(di_addr) and di = block.list.last
930
975
  return ret
931
976
  end
932
977
 
@@ -934,7 +979,18 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
934
979
  block.add_di di
935
980
  puts di if $DEBUG
936
981
 
937
- di = @callback_newinstr[di] if callback_newinstr
982
+ if callback_newinstr
983
+ ndi = @callback_newinstr[di]
984
+ if not ndi or not ndi.block
985
+ block.list.delete di
986
+ if ndi
987
+ block.add_di ndi
988
+ ndi.bin_length = di.bin_length if ndi.bin_length == 0
989
+ @decoded[di_addr] = ndi
990
+ end
991
+ end
992
+ di = ndi
993
+ end
938
994
  return ret if not di
939
995
 
940
996
  di_addr = di.next_addr
@@ -942,7 +998,9 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
942
998
  if di.opcode.props[:stopexec] or di.opcode.props[:setip]
943
999
  if di.opcode.props[:setip]
944
1000
  @addrs_todo = []
945
- @program.get_xrefs_x(self, di).each { |expr|
1001
+ ar = @program.get_xrefs_x(self, di)
1002
+ ar = @callback_newaddr[di.address, ar] || ar if callback_newaddr
1003
+ ar.each { |expr|
946
1004
  backtrace(expr, di.address, :origin => di.address, :type => :x, :maxdepth => @backtrace_maxblocks_fast)
947
1005
  }
948
1006
  end
@@ -965,8 +1023,13 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
965
1023
  end
966
1024
  }
967
1025
 
968
- di.block.add_to_normal(di_addr)
969
- ret << [di_addr, di.address]
1026
+ ar = [di_addr]
1027
+ ar = @callback_newaddr[block.list.last.address, ar] || ar if callback_newaddr
1028
+ ar.each { |a|
1029
+ di.block.add_to_normal(a)
1030
+ ret << [a, di.address]
1031
+ }
1032
+ ret
970
1033
  end
971
1034
 
972
1035
  # handles when disassemble_fast encounters a call to a subfunction
@@ -1037,7 +1100,7 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
1037
1100
  count = 0
1038
1101
  while b = block_at(addr)
1039
1102
  count += 1
1040
- return if count > 5 or b.list.length > 4
1103
+ return if count > 5 or b.list.length > 5
1041
1104
  if b.to_subfuncret and not b.to_subfuncret.empty?
1042
1105
  return if b.to_subfuncret.length != 1
1043
1106
  addr = normalize(b.to_subfuncret.first)
@@ -1047,7 +1110,7 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
1047
1110
  return if not btb = sf.backtrace_binding
1048
1111
  btb = btb.dup
1049
1112
  btb.delete_if { |k, v| Expression[k] == Expression[v] }
1050
- return if btb.length > 2 or btb.values.include? Expression::Unknown
1113
+ return if btb.length > 2 or btb.values.include? Expression::Unknown
1051
1114
  else
1052
1115
  return if not bt = b.to_normal
1053
1116
  if bt.include? :default
@@ -1291,6 +1354,88 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
1291
1354
  end
1292
1355
  end
1293
1356
 
1357
+ # iterates over all instructions of a function from a given entrypoint
1358
+ # carries an object while walking, the object is yielded every instruction
1359
+ # every block is walked only once, after all previous blocks are done (if possible)
1360
+ # on a 'jz', a [:clone] event is yielded for every path beside the first
1361
+ # on a juction (eg a -> b -> d, a -> c -> d), a [:merge] event occurs if froms have different objs
1362
+ # event list:
1363
+ # [:di, <addr>, <decoded_instruction>, <object>]
1364
+ # [:clone, <newaddr>, <oldaddr>, <object>]
1365
+ # [:merge, <newaddr>, {<oldaddr1> => <object1>, <oldaddr2> => <object2>, ...}, <object1>]
1366
+ # [:subfunc, <subfunc_addr>, <call_addr>, <object>]
1367
+ # all events should return an object
1368
+ # :merge has a copy of object1 at the end so that uninterested callers can always return args[-1]
1369
+ # if an event returns false, the trace stops for the current branch
1370
+ def function_walk(addr_start, obj_start)
1371
+ # addresses of instrs already seen => obj
1372
+ done = {}
1373
+ todo = [[addr_start, obj_start]]
1374
+
1375
+ while hop = todo.pop
1376
+ addr, obj = hop
1377
+ next if done.has_key?(done)
1378
+
1379
+ di = di_at(addr)
1380
+ next if not di
1381
+
1382
+ if done.empty?
1383
+ dilist = di.block.list[di.block.list.index(di)..-1]
1384
+ else
1385
+ # new block, check all 'from' have been seen
1386
+ if not hop[2]
1387
+ # may retry later
1388
+ all_ok = true
1389
+ di.block.each_from_samefunc(self) { |fa| all_ok = false unless done.has_key?(fa) }
1390
+ if not all_ok
1391
+ todo.unshift([addr, obj, true])
1392
+ next
1393
+ end
1394
+ end
1395
+
1396
+ froms = {}
1397
+ di.block.each_from_samefunc(self) { |fa| froms[fa] = done[fa] if done[fa] }
1398
+ if froms.values.uniq.length > 1
1399
+ obj = yield([:merge, addr, froms, froms.values.first])
1400
+ next if obj == false
1401
+ end
1402
+
1403
+ dilist = di.block.list
1404
+ end
1405
+
1406
+ if dilist.each { |_di|
1407
+ break if done.has_key?(_di.address) # looped back into addr_start
1408
+ done[_di.address] = obj
1409
+ obj = yield([:di, _di.address, _di, obj])
1410
+ break if obj == false # also return false for the previous 'if'
1411
+ }
1412
+
1413
+ from = dilist.last.address
1414
+
1415
+ if di.block.to_normal and di.block.to_normal[0] and
1416
+ di.block.to_subfuncret and di.block.to_subfuncret[0]
1417
+ # current instruction block calls into a subfunction
1418
+ obj = di.block.to_normal.map { |subf|
1419
+ yield([:subfunc, subf, from, obj])
1420
+ }.first # propagate 1st subfunc result
1421
+ next if obj == false
1422
+ end
1423
+
1424
+ wantclone = false
1425
+ di.block.each_to_samefunc(self) { |ta|
1426
+ if wantclone
1427
+ nobj = yield([:clone, ta, from, obj])
1428
+ next if obj == false
1429
+ todo << [ta, nobj]
1430
+ else
1431
+ todo << [ta, obj]
1432
+ wantclone = true
1433
+ end
1434
+ }
1435
+ end
1436
+ end
1437
+ end
1438
+
1294
1439
  # holds a backtrace result until a snapshot_addr is encountered
1295
1440
  class StoppedExpr
1296
1441
  attr_accessor :exprs
@@ -1356,7 +1501,7 @@ puts " not backtracking stack address #{expr}" if debug_backtrace
1356
1501
  end
1357
1502
 
1358
1503
  if vals = (no_check ? (!need_backtrace(expr, terminals) and [expr]) : backtrace_check_found(expr,
1359
- di, origin, type, len, maxdepth, detached))
1504
+ di, origin, type, len, maxdepth, detached, snapshot_addr))
1360
1505
  # no need to update backtracked_for
1361
1506
  return vals
1362
1507
  elsif maxdepth <= 0
@@ -1396,7 +1541,7 @@ puts " backtrace up #{Expression[h[:addr]]} #{oldexpr}#{" => #{expr}" if expr
1396
1541
  if expr != oldexpr and not snapshot_addr and vals = (no_check ?
1397
1542
  (!need_backtrace(expr, terminals) and [expr]) :
1398
1543
  backtrace_check_found(expr, nil, origin, type, len,
1399
- maxdepth-h[:loopdetect].length, detached))
1544
+ maxdepth-h[:loopdetect].length, detached, snapshot_addr))
1400
1545
  result |= vals
1401
1546
  next
1402
1547
  end
@@ -1437,7 +1582,7 @@ puts " backtrace up #{Expression[h[:from]]}->#{Expression[h[:to]]} #{oldexpr}#
1437
1582
 
1438
1583
  if expr != oldexpr and vals = (no_check ? (!need_backtrace(expr, terminals) and [expr]) :
1439
1584
  backtrace_check_found(expr, @decoded[h[:from]], origin, type, len,
1440
- maxdepth-h[:loopdetect].length, detached))
1585
+ maxdepth-h[:loopdetect].length, detached, snapshot_addr))
1441
1586
  if snapshot_addr
1442
1587
  expr = StoppedExpr.new vals
1443
1588
  next expr
@@ -1498,7 +1643,7 @@ oldexpr = expr
1498
1643
  when :func
1499
1644
  expr = backtrace_emu_subfunc(h[:func], h[:funcaddr], h[:addr], expr, origin, maxdepth-h[:loopdetect].length)
1500
1645
  if snapshot_addr and snapshot_addr == h[:funcaddr]
1501
- # XXX recursiveness detection needs to be fixed
1646
+ # XXX recursiveness detection needs to be fixed
1502
1647
  puts " backtrace: recursive function #{Expression[h[:funcaddr]]}" if debug_backtrace
1503
1648
  next false
1504
1649
  end
@@ -1506,7 +1651,7 @@ puts " backtrace: recursive function #{Expression[h[:funcaddr]]}" if debug_back
1506
1651
  end
1507
1652
  puts " backtrace #{h[:di] || Expression[h[:funcaddr]]} #{oldexpr} => #{expr}" if debug_backtrace and expr != oldexpr
1508
1653
  if vals = (no_check ? (!need_backtrace(expr, terminals) and [expr]) : backtrace_check_found(expr,
1509
- h[:di], origin, type, len, maxdepth-h[:loopdetect].length, detached))
1654
+ h[:di], origin, type, len, maxdepth-h[:loopdetect].length, detached, snapshot_addr))
1510
1655
  if snapshot_addr
1511
1656
  expr = StoppedExpr.new vals
1512
1657
  else
@@ -1588,10 +1733,14 @@ puts " backtrace addrs_todo << #{Expression[retaddr]} from #{di} (funcret)" if
1588
1733
  (ab = @address_binding[addr]) ? Expression[expr.bind(ab).reduce] : expr
1589
1734
  end
1590
1735
 
1736
+ def backtrace_update_function_binding(addr, func=@function[addr], retaddrs=func.return_address)
1737
+ @cpu.backtrace_update_function_binding(self, addr, func, retaddrs)
1738
+ end
1739
+
1591
1740
  # static resolution of indirections
1592
1741
  def resolve(expr)
1593
1742
  binding = Expression[expr].expr_indirections.inject(@old_prog_binding) { |binding_, ind|
1594
- e, b = get_section_at(resolve(ind.target))
1743
+ e = get_edata_at(resolve(ind.target))
1595
1744
  return expr if not e
1596
1745
  binding_.merge ind => Expression[ e.decode_imm("u#{8*ind.len}".to_sym, @cpu.endianness) ]
1597
1746
  }
@@ -1619,7 +1768,7 @@ puts " backtrace addrs_todo << #{Expression[retaddr]} from #{di} (funcret)" if
1619
1768
  # TODO trace expr evolution through backtrace, to modify immediates to an expr involving label names
1620
1769
  # TODO mov [ptr], imm ; <...> ; jmp [ptr] => rename imm as loc_XX
1621
1770
  # eg. mov eax, 42 ; add eax, 4 ; jmp eax => mov eax, some_label-4
1622
- def backtrace_check_found(expr, di, origin, type, len, maxdepth, detached)
1771
+ def backtrace_check_found(expr, di, origin, type, len, maxdepth, detached, snapshot_addr=nil)
1623
1772
  # only entrypoints or block starts called by a :saveip are checked for being a function
1624
1773
  # want to execute [esp] from a block start
1625
1774
  if type == :x and di and di == di.block.list.first and @cpu.backtrace_is_function_return(expr, @decoded[origin]) and (
@@ -1649,11 +1798,14 @@ puts " backtrace addrs_todo << #{Expression[retaddr]} from #{di} (funcret)" if
1649
1798
  end
1650
1799
 
1651
1800
  return if need_backtrace(expr)
1801
+ if snapshot_addr
1802
+ return if expr.expr_externals(true).find { |ee| ee.kind_of?(Indirection) }
1803
+ end
1652
1804
 
1653
1805
  puts "backtrace #{type} found #{expr} from #{di} orig #{@decoded[origin] || Expression[origin] if origin}" if debug_backtrace
1654
1806
  result = backtrace_value(expr, maxdepth)
1655
1807
  # keep the ori pointer in the results to emulate volatile memory (eg decompiler prefers this)
1656
- result << expr if not type
1808
+ #result << expr if not type # XXX returning multiple values for nothing is too confusing, TODO fix decompiler
1657
1809
  result.uniq!
1658
1810
 
1659
1811
  # create xrefs/labels
@@ -1695,7 +1847,7 @@ puts "backtrace #{type} found #{expr} from #{di} orig #{@decoded[origin] || Expr
1695
1847
  ret = []
1696
1848
 
1697
1849
  decode_imm = lambda { |addr, len|
1698
- edata, foo = get_section_at(addr)
1850
+ edata = get_edata_at(addr)
1699
1851
  if edata
1700
1852
  Expression[ edata.decode_imm("u#{8*len}".to_sym, @cpu.endianness) ]
1701
1853
  else
@@ -1803,7 +1955,7 @@ puts " backtrace_indirection for #{ind.target} failed: #{ev}" if debug_backtra
1803
1955
  # TODO trace expression evolution to allow handling of
1804
1956
  # mov eax, 28 ; add eax, 4 ; jmp eax
1805
1957
  # => mov eax, (loc_xx-4)
1806
- if di and not unk # and di.address == origin
1958
+ if di and not unk and expr != n # and di.address == origin
1807
1959
  @cpu.replace_instr_arg_immediate(di.instruction, expr, n)
1808
1960
  end
1809
1961
  if @decoded[origin] and not unk
@@ -1850,6 +2002,10 @@ puts " backtrace_indirection for #{ind.target} failed: #{ev}" if debug_backtra
1850
2002
  end
1851
2003
  end
1852
2004
 
2005
+ def inspect
2006
+ "<Metasm::Disassembler @%x>" % object_id
2007
+ end
2008
+
1853
2009
  def to_s
1854
2010
  a = ''
1855
2011
  dump { |l| a << l << "\n" }
@@ -1916,7 +2072,7 @@ puts " backtrace_indirection for #{ind.target} failed: #{ev}" if debug_backtra
1916
2072
  if not xr.empty?
1917
2073
  b["\n// Xrefs: #{xr[0, 8].join(' ')}#{' ...' if xr.length > 8}"]
1918
2074
  end
1919
- if block.edata.inv_export[block.edata_ptr]
2075
+ if block.edata.inv_export[block.edata_ptr] and label_alias[block.address]
1920
2076
  b["\n"] if xr.empty?
1921
2077
  label_alias[block.address].each { |name| b["#{name}:"] }
1922
2078
  end
@@ -1933,8 +2089,8 @@ puts " backtrace_indirection for #{ind.target} failed: #{ev}" if debug_backtra
1933
2089
  # TODO array-style data access
1934
2090
  def dump_data(addr, edata, off, &b)
1935
2091
  b ||= lambda { |l| puts l }
1936
- if l = edata.inv_export[off]
1937
- l_list = label_alias[addr].to_a.sort
2092
+ if l = edata.inv_export[off] and label_alias[addr]
2093
+ l_list = label_alias[addr].sort
1938
2094
  l = l_list.pop || l
1939
2095
  l_list.each { |ll|
1940
2096
  b["#{ll}:"]