metasm 1.0.0 → 1.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (276) hide show
  1. checksums.yaml +7 -0
  2. checksums.yaml.gz.sig +0 -0
  3. data.tar.gz.sig +3 -0
  4. data/.gitignore +3 -0
  5. data/.hgtags +3 -0
  6. data/Gemfile +3 -0
  7. data/INSTALL +61 -0
  8. data/LICENCE +458 -0
  9. data/README +29 -21
  10. data/Rakefile +10 -0
  11. data/TODO +10 -12
  12. data/doc/code_organisation.txt +3 -1
  13. data/doc/core/DynLdr.txt +247 -0
  14. data/doc/core/ExeFormat.txt +43 -0
  15. data/doc/core/Expression.txt +220 -0
  16. data/doc/core/GNUExports.txt +27 -0
  17. data/doc/core/Ia32.txt +236 -0
  18. data/doc/core/SerialStruct.txt +108 -0
  19. data/doc/core/VirtualString.txt +145 -0
  20. data/doc/core/WindowsExports.txt +61 -0
  21. data/doc/core/index.txt +1 -0
  22. data/doc/style.css +6 -3
  23. data/doc/usage/debugger.txt +327 -0
  24. data/doc/usage/index.txt +1 -0
  25. data/doc/use_cases.txt +2 -2
  26. data/metasm.gemspec +23 -0
  27. data/{lib/metasm.rb → metasm.rb} +15 -3
  28. data/{lib/metasm → metasm}/compile_c.rb +15 -9
  29. data/metasm/cpu/arc.rb +8 -0
  30. data/metasm/cpu/arc/decode.rb +404 -0
  31. data/metasm/cpu/arc/main.rb +191 -0
  32. data/metasm/cpu/arc/opcodes.rb +588 -0
  33. data/metasm/cpu/arm.rb +14 -0
  34. data/{lib/metasm → metasm/cpu}/arm/debug.rb +2 -2
  35. data/{lib/metasm → metasm/cpu}/arm/decode.rb +15 -18
  36. data/{lib/metasm → metasm/cpu}/arm/encode.rb +23 -8
  37. data/{lib/metasm → metasm/cpu}/arm/main.rb +3 -6
  38. data/metasm/cpu/arm/opcodes.rb +324 -0
  39. data/{lib/metasm → metasm/cpu}/arm/parse.rb +25 -13
  40. data/{lib/metasm → metasm/cpu}/arm/render.rb +2 -2
  41. data/metasm/cpu/arm64.rb +15 -0
  42. data/metasm/cpu/arm64/debug.rb +38 -0
  43. data/metasm/cpu/arm64/decode.rb +285 -0
  44. data/metasm/cpu/arm64/encode.rb +41 -0
  45. data/metasm/cpu/arm64/main.rb +105 -0
  46. data/metasm/cpu/arm64/opcodes.rb +232 -0
  47. data/metasm/cpu/arm64/parse.rb +20 -0
  48. data/metasm/cpu/arm64/render.rb +95 -0
  49. data/{lib/metasm/mips/compile_c.rb → metasm/cpu/bpf.rb} +4 -2
  50. data/metasm/cpu/bpf/decode.rb +110 -0
  51. data/metasm/cpu/bpf/main.rb +60 -0
  52. data/metasm/cpu/bpf/opcodes.rb +81 -0
  53. data/metasm/cpu/bpf/render.rb +30 -0
  54. data/{lib/metasm/ppc.rb → metasm/cpu/cy16.rb} +2 -4
  55. data/metasm/cpu/cy16/decode.rb +247 -0
  56. data/metasm/cpu/cy16/main.rb +63 -0
  57. data/metasm/cpu/cy16/opcodes.rb +78 -0
  58. data/metasm/cpu/cy16/render.rb +30 -0
  59. data/metasm/cpu/dalvik.rb +11 -0
  60. data/{lib/metasm → metasm/cpu}/dalvik/decode.rb +34 -34
  61. data/{lib/metasm → metasm/cpu}/dalvik/main.rb +71 -4
  62. data/{lib/metasm → metasm/cpu}/dalvik/opcodes.rb +21 -12
  63. data/{lib/metasm/mips.rb → metasm/cpu/ebpf.rb} +3 -4
  64. data/metasm/cpu/ebpf/debug.rb +61 -0
  65. data/metasm/cpu/ebpf/decode.rb +142 -0
  66. data/metasm/cpu/ebpf/main.rb +58 -0
  67. data/metasm/cpu/ebpf/opcodes.rb +97 -0
  68. data/metasm/cpu/ebpf/render.rb +36 -0
  69. data/metasm/cpu/ia32.rb +17 -0
  70. data/{lib/metasm → metasm/cpu}/ia32/compile_c.rb +23 -9
  71. data/{lib/metasm → metasm/cpu}/ia32/debug.rb +44 -6
  72. data/{lib/metasm → metasm/cpu}/ia32/decode.rb +342 -128
  73. data/{lib/metasm → metasm/cpu}/ia32/decompile.rb +75 -53
  74. data/{lib/metasm → metasm/cpu}/ia32/encode.rb +19 -13
  75. data/{lib/metasm → metasm/cpu}/ia32/main.rb +66 -8
  76. data/metasm/cpu/ia32/opcodes.rb +1424 -0
  77. data/{lib/metasm → metasm/cpu}/ia32/parse.rb +55 -17
  78. data/{lib/metasm → metasm/cpu}/ia32/render.rb +32 -5
  79. data/metasm/cpu/mcs51.rb +8 -0
  80. data/metasm/cpu/mcs51/decode.rb +99 -0
  81. data/metasm/cpu/mcs51/main.rb +87 -0
  82. data/metasm/cpu/mcs51/opcodes.rb +120 -0
  83. data/metasm/cpu/mips.rb +14 -0
  84. data/metasm/cpu/mips/debug.rb +42 -0
  85. data/{lib/metasm → metasm/cpu}/mips/decode.rb +59 -38
  86. data/{lib/metasm → metasm/cpu}/mips/encode.rb +4 -3
  87. data/{lib/metasm → metasm/cpu}/mips/main.rb +13 -6
  88. data/{lib/metasm → metasm/cpu}/mips/opcodes.rb +87 -18
  89. data/{lib/metasm → metasm/cpu}/mips/parse.rb +1 -1
  90. data/{lib/metasm → metasm/cpu}/mips/render.rb +1 -1
  91. data/{lib/metasm/dalvik.rb → metasm/cpu/msp430.rb} +1 -1
  92. data/metasm/cpu/msp430/decode.rb +243 -0
  93. data/metasm/cpu/msp430/main.rb +62 -0
  94. data/metasm/cpu/msp430/opcodes.rb +101 -0
  95. data/metasm/cpu/openrisc.rb +11 -0
  96. data/metasm/cpu/openrisc/debug.rb +106 -0
  97. data/metasm/cpu/openrisc/decode.rb +182 -0
  98. data/metasm/cpu/openrisc/decompile.rb +350 -0
  99. data/metasm/cpu/openrisc/main.rb +70 -0
  100. data/metasm/cpu/openrisc/opcodes.rb +109 -0
  101. data/metasm/cpu/openrisc/render.rb +37 -0
  102. data/{lib/metasm → metasm/cpu}/pic16c/decode.rb +6 -7
  103. data/{lib/metasm → metasm/cpu}/pic16c/main.rb +0 -0
  104. data/{lib/metasm → metasm/cpu}/pic16c/opcodes.rb +1 -1
  105. data/metasm/cpu/ppc.rb +11 -0
  106. data/{lib/metasm → metasm/cpu}/ppc/decode.rb +18 -37
  107. data/{lib/metasm → metasm/cpu}/ppc/decompile.rb +3 -3
  108. data/{lib/metasm → metasm/cpu}/ppc/encode.rb +2 -2
  109. data/{lib/metasm → metasm/cpu}/ppc/main.rb +23 -18
  110. data/{lib/metasm → metasm/cpu}/ppc/opcodes.rb +11 -6
  111. data/metasm/cpu/ppc/parse.rb +55 -0
  112. data/metasm/cpu/python.rb +8 -0
  113. data/metasm/cpu/python/decode.rb +116 -0
  114. data/metasm/cpu/python/main.rb +36 -0
  115. data/metasm/cpu/python/opcodes.rb +180 -0
  116. data/{lib/metasm → metasm/cpu}/sh4.rb +1 -1
  117. data/{lib/metasm → metasm/cpu}/sh4/decode.rb +50 -23
  118. data/{lib/metasm → metasm/cpu}/sh4/main.rb +38 -27
  119. data/{lib/metasm → metasm/cpu}/sh4/opcodes.rb +7 -8
  120. data/metasm/cpu/st20.rb +9 -0
  121. data/metasm/cpu/st20/decode.rb +173 -0
  122. data/metasm/cpu/st20/decompile.rb +283 -0
  123. data/metasm/cpu/st20/main.rb +37 -0
  124. data/metasm/cpu/st20/opcodes.rb +140 -0
  125. data/{lib/metasm/arm.rb → metasm/cpu/webasm.rb} +4 -5
  126. data/metasm/cpu/webasm/debug.rb +31 -0
  127. data/metasm/cpu/webasm/decode.rb +321 -0
  128. data/metasm/cpu/webasm/decompile.rb +386 -0
  129. data/metasm/cpu/webasm/encode.rb +104 -0
  130. data/metasm/cpu/webasm/main.rb +81 -0
  131. data/metasm/cpu/webasm/opcodes.rb +214 -0
  132. data/metasm/cpu/x86_64.rb +15 -0
  133. data/{lib/metasm → metasm/cpu}/x86_64/compile_c.rb +40 -25
  134. data/{lib/metasm → metasm/cpu}/x86_64/debug.rb +4 -4
  135. data/{lib/metasm → metasm/cpu}/x86_64/decode.rb +58 -15
  136. data/{lib/metasm → metasm/cpu}/x86_64/encode.rb +59 -28
  137. data/{lib/metasm → metasm/cpu}/x86_64/main.rb +18 -6
  138. data/metasm/cpu/x86_64/opcodes.rb +138 -0
  139. data/{lib/metasm → metasm/cpu}/x86_64/parse.rb +12 -4
  140. data/metasm/cpu/x86_64/render.rb +35 -0
  141. data/metasm/cpu/z80.rb +9 -0
  142. data/metasm/cpu/z80/decode.rb +286 -0
  143. data/metasm/cpu/z80/main.rb +67 -0
  144. data/metasm/cpu/z80/opcodes.rb +224 -0
  145. data/metasm/cpu/z80/render.rb +48 -0
  146. data/{lib/metasm/os/main.rb → metasm/debug.rb} +201 -407
  147. data/{lib/metasm → metasm}/decode.rb +104 -24
  148. data/{lib/metasm → metasm}/decompile.rb +804 -478
  149. data/{lib/metasm → metasm}/disassemble.rb +385 -170
  150. data/{lib/metasm → metasm}/disassemble_api.rb +684 -105
  151. data/{lib/metasm → metasm}/dynldr.rb +231 -138
  152. data/{lib/metasm → metasm}/encode.rb +20 -5
  153. data/{lib/metasm → metasm}/exe_format/a_out.rb +9 -6
  154. data/{lib/metasm → metasm}/exe_format/autoexe.rb +3 -0
  155. data/{lib/metasm → metasm}/exe_format/bflt.rb +57 -27
  156. data/{lib/metasm → metasm}/exe_format/coff.rb +35 -7
  157. data/{lib/metasm → metasm}/exe_format/coff_decode.rb +70 -23
  158. data/{lib/metasm → metasm}/exe_format/coff_encode.rb +24 -22
  159. data/{lib/metasm → metasm}/exe_format/dex.rb +26 -8
  160. data/{lib/metasm → metasm}/exe_format/dol.rb +1 -0
  161. data/{lib/metasm → metasm}/exe_format/elf.rb +108 -58
  162. data/{lib/metasm → metasm}/exe_format/elf_decode.rb +202 -36
  163. data/{lib/metasm → metasm}/exe_format/elf_encode.rb +126 -32
  164. data/metasm/exe_format/gb.rb +65 -0
  165. data/metasm/exe_format/javaclass.rb +424 -0
  166. data/{lib/metasm → metasm}/exe_format/macho.rb +218 -16
  167. data/{lib/metasm → metasm}/exe_format/main.rb +28 -3
  168. data/{lib/metasm → metasm}/exe_format/mz.rb +2 -0
  169. data/{lib/metasm → metasm}/exe_format/nds.rb +7 -4
  170. data/{lib/metasm → metasm}/exe_format/pe.rb +96 -11
  171. data/metasm/exe_format/pyc.rb +167 -0
  172. data/{lib/metasm → metasm}/exe_format/serialstruct.rb +67 -14
  173. data/{lib/metasm → metasm}/exe_format/shellcode.rb +7 -3
  174. data/metasm/exe_format/shellcode_rwx.rb +114 -0
  175. data/metasm/exe_format/swf.rb +205 -0
  176. data/metasm/exe_format/wasm.rb +402 -0
  177. data/{lib/metasm → metasm}/exe_format/xcoff.rb +7 -7
  178. data/metasm/exe_format/zip.rb +335 -0
  179. data/metasm/gui.rb +13 -0
  180. data/{lib/metasm → metasm}/gui/cstruct.rb +35 -41
  181. data/{lib/metasm → metasm}/gui/dasm_coverage.rb +11 -11
  182. data/{lib/metasm → metasm}/gui/dasm_decomp.rb +177 -114
  183. data/{lib/metasm → metasm}/gui/dasm_funcgraph.rb +0 -0
  184. data/metasm/gui/dasm_graph.rb +1754 -0
  185. data/{lib/metasm → metasm}/gui/dasm_hex.rb +16 -12
  186. data/{lib/metasm → metasm}/gui/dasm_listing.rb +43 -28
  187. data/{lib/metasm → metasm}/gui/dasm_main.rb +360 -77
  188. data/{lib/metasm → metasm}/gui/dasm_opcodes.rb +5 -19
  189. data/{lib/metasm → metasm}/gui/debug.rb +109 -34
  190. data/{lib/metasm → metasm}/gui/gtk.rb +174 -44
  191. data/{lib/metasm → metasm}/gui/qt.rb +14 -4
  192. data/{lib/metasm → metasm}/gui/win32.rb +180 -43
  193. data/{lib/metasm → metasm}/gui/x11.rb +59 -59
  194. data/{lib/metasm → metasm}/main.rb +421 -286
  195. data/metasm/os/emulator.rb +175 -0
  196. data/{lib/metasm/os/remote.rb → metasm/os/gdbremote.rb} +146 -54
  197. data/{lib/metasm → metasm}/os/gnu_exports.rb +1 -1
  198. data/{lib/metasm → metasm}/os/linux.rb +628 -151
  199. data/metasm/os/main.rb +335 -0
  200. data/{lib/metasm → metasm}/os/windows.rb +151 -58
  201. data/{lib/metasm → metasm}/os/windows_exports.rb +141 -0
  202. data/{lib/metasm → metasm}/parse.rb +49 -36
  203. data/{lib/metasm → metasm}/parse_c.rb +405 -246
  204. data/{lib/metasm → metasm}/preprocessor.rb +71 -41
  205. data/{lib/metasm → metasm}/render.rb +14 -38
  206. data/misc/hexdump.rb +4 -3
  207. data/misc/lint.rb +58 -0
  208. data/misc/objdiff.rb +4 -1
  209. data/misc/objscan.rb +1 -1
  210. data/misc/openrisc-parser.rb +79 -0
  211. data/misc/txt2html.rb +9 -7
  212. data/samples/bindiff.rb +3 -4
  213. data/samples/dasm-plugins/bindiff.rb +15 -0
  214. data/samples/dasm-plugins/bookmark.rb +133 -0
  215. data/samples/dasm-plugins/c_constants.rb +57 -0
  216. data/samples/dasm-plugins/colortheme_solarized.rb +125 -0
  217. data/samples/dasm-plugins/cppobj_funcall.rb +60 -0
  218. data/samples/dasm-plugins/dasm_all.rb +70 -0
  219. data/samples/dasm-plugins/demangle_cpp.rb +31 -0
  220. data/samples/dasm-plugins/deobfuscate.rb +251 -0
  221. data/samples/dasm-plugins/dump_text.rb +35 -0
  222. data/samples/dasm-plugins/export_graph_svg.rb +86 -0
  223. data/samples/dasm-plugins/findgadget.rb +75 -0
  224. data/samples/dasm-plugins/hl_opcode.rb +32 -0
  225. data/samples/dasm-plugins/hotfix_gtk_dbg.rb +19 -0
  226. data/samples/dasm-plugins/imm2off.rb +34 -0
  227. data/samples/dasm-plugins/match_libsigs.rb +93 -0
  228. data/samples/dasm-plugins/patch_file.rb +95 -0
  229. data/samples/dasm-plugins/scanfuncstart.rb +36 -0
  230. data/samples/dasm-plugins/scanxrefs.rb +29 -0
  231. data/samples/dasm-plugins/selfmodify.rb +197 -0
  232. data/samples/dasm-plugins/stringsxrefs.rb +28 -0
  233. data/samples/dasmnavig.rb +1 -1
  234. data/samples/dbg-apihook.rb +24 -9
  235. data/samples/dbg-plugins/heapscan.rb +283 -0
  236. data/samples/dbg-plugins/heapscan/compiled_heapscan_lin.c +155 -0
  237. data/samples/dbg-plugins/heapscan/compiled_heapscan_win.c +128 -0
  238. data/samples/dbg-plugins/heapscan/graphheap.rb +616 -0
  239. data/samples/dbg-plugins/heapscan/heapscan.rb +709 -0
  240. data/samples/dbg-plugins/heapscan/winheap.h +174 -0
  241. data/samples/dbg-plugins/heapscan/winheap7.h +307 -0
  242. data/samples/dbg-plugins/trace_func.rb +214 -0
  243. data/samples/disassemble-gui.rb +48 -7
  244. data/samples/disassemble.rb +31 -6
  245. data/samples/dump_upx.rb +24 -12
  246. data/samples/dynamic_ruby.rb +35 -27
  247. data/samples/elfencode.rb +15 -0
  248. data/samples/emubios.rb +251 -0
  249. data/samples/emudbg.rb +127 -0
  250. data/samples/exeencode.rb +6 -5
  251. data/samples/factorize-headers-peimports.rb +1 -1
  252. data/samples/lindebug.rb +186 -391
  253. data/samples/metasm-shell.rb +68 -57
  254. data/samples/peldr.rb +2 -2
  255. data/tests/all.rb +1 -1
  256. data/tests/arc.rb +26 -0
  257. data/tests/dynldr.rb +22 -4
  258. data/tests/expression.rb +57 -0
  259. data/tests/graph_layout.rb +285 -0
  260. data/tests/ia32.rb +80 -26
  261. data/tests/mcs51.rb +27 -0
  262. data/tests/mips.rb +10 -3
  263. data/tests/preprocessor.rb +18 -0
  264. data/tests/x86_64.rb +66 -18
  265. metadata +465 -219
  266. metadata.gz.sig +2 -0
  267. data/lib/metasm/arm/opcodes.rb +0 -177
  268. data/lib/metasm/gui.rb +0 -23
  269. data/lib/metasm/gui/dasm_graph.rb +0 -1354
  270. data/lib/metasm/ia32.rb +0 -14
  271. data/lib/metasm/ia32/opcodes.rb +0 -872
  272. data/lib/metasm/ppc/parse.rb +0 -52
  273. data/lib/metasm/x86_64.rb +0 -12
  274. data/lib/metasm/x86_64/opcodes.rb +0 -118
  275. data/samples/gdbclient.rb +0 -583
  276. data/samples/rubstop.rb +0 -399
@@ -4,17 +4,26 @@
4
4
  # Licence is LGPL, see LICENCE in the top-level directory
5
5
 
6
6
 
7
- require 'metasm/ia32/main'
7
+ require 'metasm/cpu/ia32/main'
8
8
 
9
9
  module Metasm
10
10
  class Ia32
11
11
  # temporarily setup dasm.address_binding so that backtracking
12
12
  # stack-related offsets resolve in :frameptr (relative to func start)
13
13
  def decompile_makestackvars(dasm, funcstart, blocks)
14
+ esp = register_symbols[4]
14
15
  oldfuncbd = dasm.address_binding[funcstart]
15
- dasm.address_binding[funcstart] = { :esp => :frameptr } # this would suffice, the rest here is just optimisation
16
-
16
+ dasm.address_binding[funcstart] = { esp => :frameptr }
17
17
  patched_binding = [funcstart] # list of addresses to cleanup later
18
+
19
+ if blocks.length <= 12
20
+ blocks.each { |block| yield block }
21
+ return
22
+ end
23
+
24
+ # for large function, pre-trace and cache esp/ebp for every block start to improve decompilation time
25
+
26
+ ebp = register_symbols[5]
18
27
  ebp_frame = true
19
28
 
20
29
  # pretrace esp and ebp for each function block (cleared later)
@@ -24,16 +33,16 @@ class Ia32
24
33
  if not dasm.address_binding[blockstart]
25
34
  patched_binding << blockstart
26
35
  dasm.address_binding[blockstart] = {}
27
- foo = dasm.backtrace(:esp, blockstart, :snapshot_addr => funcstart)
36
+ foo = dasm.backtrace(esp, blockstart, :snapshot_addr => funcstart)
28
37
  if foo.length == 1 and ee = foo.first and ee.kind_of? Expression and (ee == Expression[:frameptr] or
29
38
  (ee.lexpr == :frameptr and ee.op == :+ and ee.rexpr.kind_of? ::Integer))
30
- dasm.address_binding[blockstart][:esp] = ee
39
+ dasm.address_binding[blockstart][esp] = ee
31
40
  end
32
41
  if ebp_frame
33
- foo = dasm.backtrace(:ebp, blockstart, :snapshot_addr => funcstart)
42
+ foo = dasm.backtrace(ebp, blockstart, :snapshot_addr => funcstart)
34
43
  if foo.length == 1 and ee = foo.first and ee.kind_of? Expression and (ee == Expression[:frameptr] or
35
44
  (ee.lexpr == :frameptr and ee.op == :+ and ee.rexpr.kind_of? ::Integer))
36
- dasm.address_binding[blockstart][:ebp] = ee
45
+ dasm.address_binding[blockstart][ebp] = ee
37
46
  else
38
47
  ebp_frame = false # func does not use ebp as frame ptr, no need to bt for later blocks
39
48
  end
@@ -48,11 +57,16 @@ class Ia32
48
57
  dasm.address_binding[funcstart] = oldfuncbd if oldfuncbd
49
58
  end
50
59
 
60
+ # add di-specific registry written/accessed
61
+ def decompile_func_finddeps_di(dcmp, func, di, a, w)
62
+ a << register_symbols[0] if di.opcode.name == 'ret' and (not func.type.kind_of? C::BaseType or func.type.type.name != :void) # standard ABI
63
+ end
64
+
51
65
  # list variable dependency for each block, remove useless writes
52
66
  # returns { blockaddr => [list of vars that are needed by a following block] }
53
67
  def decompile_func_finddeps(dcmp, blocks, func)
54
68
  deps_r = {} ; deps_w = {} ; deps_to = {}
55
- deps_subfunc = {} # things read/written by subfuncs
69
+ deps_subfunc = {} # things read/written by subfuncs
56
70
 
57
71
  # find read/writes by each block
58
72
  blocks.each { |b, to|
@@ -69,8 +83,8 @@ class Ia32
69
83
  else a |= Expression[k].externals # if dword [eax] <- 42, eax is read
70
84
  end
71
85
  }
72
- a << :eax if di.opcode.name == 'ret' and (not func.type.kind_of? C::BaseType or func.type.type.name != :void) # standard ABI
73
-
86
+ decompile_func_finddeps_di(dcmp, func, di, a, w)
87
+
74
88
  deps_r[b] |= a.map { |ee| Expression[ee].externals.grep(::Symbol) }.flatten - [:unknown] - deps_w[b]
75
89
  deps_w[b] |= w.map { |ee| Expression[ee].externals.grep(::Symbol) }.flatten - [:unknown]
76
90
  }
@@ -97,7 +111,7 @@ class Ia32
97
111
  }
98
112
  if stackoff # last block instr == subfunction call
99
113
  deps_r[b] |= deps_subfunc[b] - deps_w[b]
100
- deps_w[b] |= [:eax, :ecx, :edx] # standard ABI
114
+ deps_w[b] |= register_symbols[0, 3] # standard ABI
101
115
  end
102
116
  }
103
117
 
@@ -120,13 +134,13 @@ class Ia32
120
134
  else a |= Expression[k].externals # if dword [eax] <- 42, eax is read
121
135
  end
122
136
  }
123
- a << :eax if di.opcode.name == 'ret' and (not func.type.kind_of? C::BaseType or func.type.type.name != :void) # standard ABI
124
-
137
+ decompile_func_finddeps_di(dcmp, func, di, a, w)
138
+
125
139
  next true if (a.map { |ee| Expression[ee].externals.grep(::Symbol) }.flatten - [:unknown] - bw).include? r
126
140
  bw |= w.map { |ee| Expression[ee].externals.grep(::Symbol) }.flatten - [:unknown]
127
141
  false
128
142
  }
129
- if r == :eax and (rdi || blk.list.last).opcode.name == 'ret'
143
+ if r == register_symbols[0] and (rdi || blk.list.last).opcode.name == 'ret'
130
144
  func.type.type = C::BaseType.new(:void)
131
145
  false
132
146
  elsif rdi and rdi.backtrace_binding[r]
@@ -180,15 +194,18 @@ class Ia32
180
194
  end
181
195
 
182
196
  def decompile_blocks(dcmp, myblocks, deps, func, nextaddr = nil)
197
+ eax, ecx, edx, ebx, esp, ebp, esi, edi = register_symbols
198
+ ebx, esp, ebp = ebx, esp, ebp # fix ruby unused var warning
183
199
  scope = func.initializer
184
200
  func.type.args.each { |a| scope.symbol[a.name] = a }
185
201
  stmts = scope.statements
186
202
  blocks_toclean = myblocks.dup
187
203
  func_entry = myblocks.first[0]
204
+ di_addr = nil
188
205
  until myblocks.empty?
189
206
  b, to = myblocks.shift
190
207
  if l = dcmp.dasm.get_label_at(b)
191
- stmts << C::Label.new(l)
208
+ stmts << C::Label.new(l).with_misc(:di_addr => b)
192
209
  end
193
210
 
194
211
  # list of assignments [[dest reg, expr assigned]]
@@ -196,7 +213,11 @@ class Ia32
196
213
  # reg binding (reg => value, values.externals = regs at block start)
197
214
  binding = {}
198
215
  # Expr => CExpr
199
- ce = lambda { |*e| dcmp.decompile_cexpr(Expression[Expression[*e].reduce], scope) }
216
+ ce = lambda { |*e|
217
+ ret = dcmp.decompile_cexpr(Expression[Expression[*e].reduce], scope)
218
+ dcmp.walk_ce(ret) { |ee| ee.with_misc(:di_addr => di_addr) } if di_addr
219
+ ret
220
+ }
200
221
  # Expr => Expr.bind(binding) => CExpr
201
222
  ceb = lambda { |*e| ce[Expression[*e].bind(binding)] }
202
223
 
@@ -221,6 +242,7 @@ class Ia32
221
242
  # returns an array to use as funcall arguments
222
243
  get_func_args = lambda { |di, f|
223
244
  # XXX see remarks in #finddeps
245
+ # TODO x64
224
246
  bt = dcmp.dasm.backtrace(:esp, di.address, :snapshot_addr => func_entry, :include_start => true)
225
247
  stackoff = Expression[[bt, :+, @size/8], :-, :esp].bind(:esp => :frameptr).reduce rescue nil
226
248
  args_todo = f.type.args.to_a.dup
@@ -269,19 +291,20 @@ class Ia32
269
291
 
270
292
  # go !
271
293
  dcmp.dasm.decoded[b].block.list.each_with_index { |di, didx|
294
+ di_addr = di.address
272
295
  a = di.instruction.args
273
296
  if di.opcode.props[:setip] and not di.opcode.props[:stopexec]
274
297
  # conditional jump
275
298
  commit[]
276
299
  n = dcmp.backtrace_target(get_xrefs_x(dcmp.dasm, di).first, di.address)
277
300
  if di.opcode.name =~ /^loop(.+)?/
278
- cx = C::CExpression[:'--', ceb[:ecx]]
301
+ cx = C::CExpression[:'--', ceb[ecx]]
279
302
  cc = $1 ? C::CExpression[cx, :'&&', ceb[decode_cc_to_expr($1)]] : cx
280
303
  else
281
304
  cc = ceb[decode_cc_to_expr(di.opcode.name[1..-1])]
282
305
  end
283
306
  # XXX switch/indirect/multiple jmp
284
- stmts << C::If.new(C::CExpression[cc], C::Goto.new(n))
307
+ stmts << C::If.new(C::CExpression[cc], C::Goto.new(n).with_misc(:di_addr => di_addr)).with_misc(:di_addr => di_addr)
285
308
  to.delete dcmp.dasm.normalize(n)
286
309
  next
287
310
  end
@@ -290,7 +313,7 @@ class Ia32
290
313
  # mov cr0 etc
291
314
  a1, a2 = di.instruction.args
292
315
  case a1
293
- when Ia32::CtrlReg, Ia32::DbgReg, Ia32::SegReg
316
+ when Ia32::CtrlReg, Ia32::DbgReg, Ia32::TstReg, Ia32::SegReg
294
317
  sz = a1.kind_of?(Ia32::SegReg) ? 16 : 32
295
318
  if not dcmp.c_parser.toplevel.symbol["intrinsic_set_#{a1}"]
296
319
  dcmp.c_parser.parse("void intrinsic_set_#{a1}(__int#{sz});")
@@ -298,11 +321,11 @@ class Ia32
298
321
  f = dcmp.c_parser.toplevel.symbol["intrinsic_set_#{a1}"]
299
322
  a2 = a2.symbolic(di)
300
323
  a2 = [a2, :&, 0xffff] if sz == 16
301
- stmts << C::CExpression.new(f, :funcall, [ceb[a2]], f.type.type)
324
+ stmts << C::CExpression.new(f, :funcall, [ceb[a2]], f.type.type).with_misc(:di_addr => di_addr)
302
325
  next
303
326
  end
304
327
  case a2
305
- when Ia32::CtrlReg, Ia32::DbgReg, Ia32::SegReg
328
+ when Ia32::CtrlReg, Ia32::DbgReg, Ia32::TstReg, Ia32::SegReg
306
329
  if not dcmp.c_parser.toplevel.symbol["intrinsic_get_#{a2}"]
307
330
  sz = a2.kind_of?(Ia32::SegReg) ? 16 : 32
308
331
  dcmp.c_parser.parse("__int#{sz} intrinsic_get_#{a2}(void);")
@@ -310,7 +333,7 @@ class Ia32
310
333
  f = dcmp.c_parser.toplevel.symbol["intrinsic_get_#{a2}"]
311
334
  t = f.type.type
312
335
  binding.delete a1.symbolic(di)
313
- stmts << C::CExpression.new(ce[a1.symbolic(di)], :'=', C::CExpression.new(f, :funcall, [], t), t)
336
+ stmts << C::CExpression.new(ce[a1.symbolic(di)], :'=', C::CExpression.new(f, :funcall, [], t).with_misc(:di_addr => di_addr), t).with_misc(:di_addr => di_addr)
314
337
  next
315
338
  end
316
339
  end
@@ -319,8 +342,8 @@ class Ia32
319
342
  when 'ret'
320
343
  commit[]
321
344
  ret = nil
322
- ret = C::CExpression[ceb[:eax]] unless func.type.type.kind_of? C::BaseType and func.type.type.name == :void
323
- stmts << C::Return.new(ret)
345
+ ret = C::CExpression[ceb[eax]] unless func.type.type.kind_of? C::BaseType and func.type.type.name == :void
346
+ stmts << C::Return.new(ret).with_misc(:di_addr => di_addr)
324
347
  when 'call' # :saveip
325
348
  n = dcmp.backtrace_target(get_xrefs_x(dcmp.dasm, di).first, di.address)
326
349
  args = []
@@ -353,9 +376,9 @@ class Ia32
353
376
  end
354
377
  end
355
378
  commit[]
356
- binding.delete :eax
357
- e = C::CExpression[f, :funcall, args]
358
- e = C::CExpression[ce[:eax], :'=', e, f.type.type] if deps[b].include? :eax and f.type.type != C::BaseType.new(:void)
379
+ binding.delete eax
380
+ e = C::CExpression[f, :funcall, args].with_misc(:di_addr => di_addr)
381
+ e = C::CExpression[ce[eax], :'=', e, f.type.type].with_misc(:di_addr => di_addr) if deps[b].include? eax and f.type.type != C::BaseType.new(:void)
359
382
  stmts << e
360
383
  when 'jmp'
361
384
  #if di.comment.to_a.include? 'switch'
@@ -369,12 +392,12 @@ class Ia32
369
392
  # to.delete addr
370
393
  # next if not l = dcmp.dasm.get_label_at(addr)
371
394
  # sw.body.statements << C::Goto.new(l)
372
- # }
395
+ # }
373
396
  # stmts << sw
374
397
  a = di.instruction.args.first
375
398
  if a.kind_of? Expression
376
399
  elsif not a.respond_to? :symbolic
377
- stmts << C::Asm.new(di.instruction.to_s, nil, [], [], nil, nil)
400
+ stmts << C::Asm.new(di.instruction.to_s, nil, [], [], nil, nil).with_misc(:di_addr => di_addr)
378
401
  else
379
402
  n = di.instruction.args.first.symbolic(di)
380
403
  fptr = ceb[n]
@@ -385,12 +408,10 @@ class Ia32
385
408
  args = get_func_args[di, fptr.type]
386
409
  else
387
410
  proto = C::Function.new(C::BaseType.new(:void))
388
- fptr = C::CExpression[[fptr], C::Pointer.new(proto)]
411
+ fptr = C::CExpression[[fptr], C::Pointer.new(proto)].with_misc(:di_addr => di_addr)
389
412
  args = []
390
413
  end
391
- ret = C::Return.new(C::CExpression[fptr, :funcall, args])
392
- class << ret ; attr_accessor :from_instr end
393
- ret.from_instr = di
414
+ ret = C::Return.new(C::CExpression[fptr, :funcall, args].with_misc(:di_addr => di_addr)).with_misc(:di_addr => di_addr)
394
415
  stmts << ret
395
416
  to = []
396
417
  end
@@ -404,7 +425,7 @@ class Ia32
404
425
  end
405
426
  # need a way to transform arg => :frameptr+12
406
427
  arg = di.backtrace_binding.keys.grep(Indirection).first.pointer
407
- stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol['intrinsic_lgdt'], :funcall, [ceb[arg]], C::BaseType.new(:void))
428
+ stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol['intrinsic_lgdt'], :funcall, [ceb[arg]], C::BaseType.new(:void)).with_misc(:di_addr => di_addr)
408
429
  when 'lidt'
409
430
  if not dcmp.c_parser.toplevel.struct['interrupt_descriptor']
410
431
  dcmp.c_parser.parse('struct interrupt_descriptor { __int16 offset0_16; __int16 segment; __int16 flags; __int16 offset16_32; };')
@@ -414,29 +435,29 @@ class Ia32
414
435
  dcmp.c_parser.parse('void intrinsic_lidt(struct interrupt_table *);')
415
436
  end
416
437
  arg = di.backtrace_binding.keys.grep(Indirection).first.pointer
417
- stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol['intrinsic_lidt'], :funcall, [ceb[arg]], C::BaseType.new(:void))
438
+ stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol['intrinsic_lidt'], :funcall, [ceb[arg]], C::BaseType.new(:void)).with_misc(:di_addr => di_addr)
418
439
  when 'ltr', 'lldt'
419
440
  if not dcmp.c_parser.toplevel.symbol["intrinsic_#{di.opcode.name}"]
420
441
  dcmp.c_parser.parse("void intrinsic_#{di.opcode.name}(int);")
421
442
  end
422
443
  arg = di.backtrace_binding.keys.first
423
- stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol["intrinsic_#{di.opcode.name}"], :funcall, [ceb[arg]], C::BaseType.new(:void))
444
+ stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol["intrinsic_#{di.opcode.name}"], :funcall, [ceb[arg]], C::BaseType.new(:void)).with_misc(:di_addr => di_addr)
424
445
  when 'out'
425
446
  sz = di.instruction.args.find { |a_| a_.kind_of? Ia32::Reg and a_.val == 0 }.sz
426
447
  if not dcmp.c_parser.toplevel.symbol["intrinsic_out#{sz}"]
427
448
  dcmp.c_parser.parse("void intrinsic_out#{sz}(unsigned short port, __int#{sz} value);")
428
449
  end
429
- port = di.instruction.args.grep(Expression).first || :edx
430
- stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol["intrinsic_out#{sz}"], :funcall, [ceb[port], ceb[:eax]], C::BaseType.new(:void))
450
+ port = di.instruction.args.grep(Expression).first || edx
451
+ stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol["intrinsic_out#{sz}"], :funcall, [ceb[port], ceb[eax]], C::BaseType.new(:void)).with_misc(:di_addr => di_addr)
431
452
  when 'in'
432
453
  sz = di.instruction.args.find { |a_| a_.kind_of? Ia32::Reg and a_.val == 0 }.sz
433
454
  if not dcmp.c_parser.toplevel.symbol["intrinsic_in#{sz}"]
434
455
  dcmp.c_parser.parse("__int#{sz} intrinsic_in#{sz}(unsigned short port);")
435
456
  end
436
- port = di.instruction.args.grep(Expression).first || :edx
457
+ port = di.instruction.args.grep(Expression).first || edx
437
458
  f = dcmp.c_parser.toplevel.symbol["intrinsic_in#{sz}"]
438
- binding.delete :eax
439
- stmts << C::CExpression.new(ce[:eax], :'=', C::CExpression.new(f, :funcall, [ceb[port]], f.type.type), f.type.type)
459
+ binding.delete eax
460
+ stmts << C::CExpression.new(ce[eax], :'=', C::CExpression.new(f, :funcall, [ceb[port]], f.type.type), f.type.type).with_misc(:di_addr => di_addr)
440
461
  when 'sti', 'cli'
441
462
  stmts << C::Asm.new(di.instruction.to_s, nil, [], [], nil, nil)
442
463
  when /^(mov|sto|lod)s([bwdq])/
@@ -448,15 +469,15 @@ class Ia32
448
469
  blk = C::Block.new(scope)
449
470
  case op
450
471
  when 'mov'
451
- blk.statements << C::CExpression[[:*, [[ceb[:edi]], pt]], :'=', [:*, [[ceb[:esi]], pt]]]
452
- blk.statements << C::CExpression[ceb[:edi], :'=', [ceb[:edi], :+, [sz]]]
453
- blk.statements << C::CExpression[ceb[:esi], :'=', [ceb[:esi], :+, [sz]]]
472
+ blk.statements << C::CExpression[[:*, [[ceb[edi]], pt]], :'=', [:*, [[ceb[esi]], pt]]].with_misc(:di_addr => di_addr)
473
+ blk.statements << C::CExpression[ceb[edi], :'=', [ceb[edi], :+, [sz]]].with_misc(:di_addr => di_addr)
474
+ blk.statements << C::CExpression[ceb[esi], :'=', [ceb[esi], :+, [sz]]].with_misc(:di_addr => di_addr)
454
475
  when 'sto'
455
- blk.statements << C::CExpression[[:*, [[ceb[:edi]], pt]], :'=', ceb[:eax]]
456
- blk.statements << C::CExpression[ceb[:edi], :'=', [ceb[:edi], :+, [sz]]]
476
+ blk.statements << C::CExpression[[:*, [[ceb[edi]], pt]], :'=', ceb[eax]].with_misc(:di_addr => di_addr)
477
+ blk.statements << C::CExpression[ceb[edi], :'=', [ceb[edi], :+, [sz]]].with_misc(:di_addr => di_addr)
457
478
  when 'lod'
458
- blk.statements << C::CExpression[ceb[:eax], :'=', [:*, [[ceb[:esi]], pt]]]
459
- blk.statements << C::CExpression[ceb[:esi], :'=', [ceb[:esi], :+, [sz]]]
479
+ blk.statements << C::CExpression[ceb[eax], :'=', [:*, [[ceb[esi]], pt]]].with_misc(:di_addr => di_addr)
480
+ blk.statements << C::CExpression[ceb[esi], :'=', [ceb[esi], :+, [sz]]].with_misc(:di_addr => di_addr)
460
481
  #when 'sca'
461
482
  #when 'cmp'
462
483
  end
@@ -465,8 +486,8 @@ class Ia32
465
486
  when nil
466
487
  stmts.concat blk.statements
467
488
  when 'rep'
468
- blk.statements << C::CExpression[ceb[:ecx], :'=', [ceb[:ecx], :-, [1]]]
469
- stmts << C::While.new(C::CExpression[ceb[:ecx]], blk)
489
+ blk.statements << C::CExpression[ceb[ecx], :'=', [ceb[ecx], :-, [1]]].with_misc(:di_addr => di_addr)
490
+ stmts << C::While.new(C::CExpression[ceb[ecx]], blk).with_misc(:di_addr => di_addr)
470
491
  #when 'repz' # sca/cmp only
471
492
  #when 'repnz'
472
493
  end
@@ -475,7 +496,7 @@ class Ia32
475
496
  bd = get_fwdemu_binding(di)
476
497
  if di.backtrace_binding[:incomplete_binding]
477
498
  commit[]
478
- stmts << C::Asm.new(di.instruction.to_s, nil, nil, nil, nil, nil)
499
+ stmts << C::Asm.new(di.instruction.to_s, nil, nil, nil, nil, nil).with_misc(:di_addr => di_addr)
479
500
  else
480
501
  update = {}
481
502
  bd.each { |k, v|
@@ -490,6 +511,7 @@ class Ia32
490
511
  binding.update update
491
512
  end
492
513
  end
514
+ di_addr = nil
493
515
  }
494
516
  commit[]
495
517
 
@@ -512,9 +534,9 @@ class Ia32
512
534
  dcmp.dasm.decoded[b_].block.list.each { |di|
513
535
  di.backtrace_binding = nil
514
536
  }
515
- }
537
+ }
516
538
  end
517
-
539
+
518
540
  def decompile_check_abi(dcmp, entry, func)
519
541
  a = func.type.args || []
520
542
  a.delete_if { |arg| arg.has_attribute_var('register') and arg.has_attribute('unused') }
@@ -4,7 +4,7 @@
4
4
  # Licence is LGPL, see LICENCE in the top-level directory
5
5
 
6
6
 
7
- require 'metasm/ia32/opcodes'
7
+ require 'metasm/cpu/ia32/opcodes'
8
8
  require 'metasm/encode'
9
9
 
10
10
  module Metasm
@@ -195,27 +195,28 @@ class Ia32
195
195
  case k
196
196
  when :jmp; {:jmp => 0x3e, :nojmp => 0x2e}[v]
197
197
  when :lock; 0xf0
198
- when :rep; {'repnz' => 0xf2, 'repz' => 0xf3, 'rep' => 0xf2}[v] # TODO
198
+ when :rep; {'repnz' => 0xf2, 'repz' => 0xf3, 'rep' => 0xf2}[v]
199
+ when :jmphint; {'hintjmp' => 0x3e, 'hintnojmp' => 0x2e}[v]
200
+ when :seg; [0x26, 0x2E, 0x36, 0x3E, 0x64, 0x65][v.val]
199
201
  end
200
202
  }.compact.pack 'C*'
201
- pfx << op.props[:needpfx] if op.props[:needpfx]
202
203
 
203
204
  if op.name == 'movsx' or op.name == 'movzx'
204
205
  pfx << 0x66 if size == 48-i.args[0].sz
206
+ elsif op.name == 'crc32'
207
+ pfx << 0x66 if size == 48-i.args[1].sz
205
208
  else
206
209
  opsz = op.props[:argsz]
207
210
  oi.each { |oa, ia|
208
211
  case oa
209
- when :reg, :reg_eax, :modrm, :modrmA, :mrm_imm
212
+ when :reg, :reg_eax, :modrm, :mrm_imm
210
213
  raise EncodeError, "Incompatible arg size in #{i}" if ia.sz and opsz and opsz != ia.sz
211
214
  opsz = ia.sz
212
215
  end
213
216
  }
214
- pfx << 0x66 if (not op.props[:argsz] or opsz != op.props[:argsz]) and (
215
- (opsz and size == 48 - opsz) or (op.props[:opsz] and op.props[:opsz] != size))
216
- if op.props[:opsz] and size == 48 - op.props[:opsz]
217
- opsz = op.props[:opsz]
218
- end
217
+ pfx << 0x66 if (op.props[:opsz] and size == 48 - op.props[:opsz]) or
218
+ (not op.props[:argsz] and opsz and size == 48 - opsz)
219
+ opsz ||= op.props[:opsz]
219
220
  end
220
221
  opsz ||= size
221
222
 
@@ -226,7 +227,7 @@ class Ia32
226
227
  adsz ||= size
227
228
  # addrsize override / segment override
228
229
  if mrm = i.args.grep(ModRM).first
229
- if not op.props[:adsz] and ((mrm.b and mrm.b.sz != adsz) or (mrm.i and mrm.i.sz != adsz))
230
+ if not op.props[:adsz] and ((mrm.b and mrm.b.sz == 48 - adsz) or (mrm.i and mrm.i.sz == 48 - adsz))
230
231
  pfx << 0x67
231
232
  adsz = 48 - adsz
232
233
  end
@@ -240,10 +241,12 @@ class Ia32
240
241
  postponed = []
241
242
  oi.each { |oa, ia|
242
243
  case oa
243
- when :reg, :seg3, :seg3A, :seg2, :seg2A, :eeec, :eeed, :regfp, :regmmx, :regxmm
244
+ when :reg, :seg3, :seg3A, :seg2, :seg2A, :eeec, :eeed, :eeet, :regfp, :regmmx, :regxmm, :regymm
244
245
  # field arg
245
246
  set_field[oa, ia.val]
246
247
  pfx << 0x66 if oa == :regmmx and op.props[:xmmx] and ia.sz == 128
248
+ when :vexvreg, :vexvxmm, :vexvymm
249
+ set_field[:vex_vvvv, ia.val ^ 0xf]
247
250
  when :imm_val1, :imm_val3, :reg_cl, :reg_eax, :reg_dx, :regfp0
248
251
  # implicit
249
252
  else
@@ -251,7 +254,7 @@ class Ia32
251
254
  end
252
255
  }
253
256
 
254
- if !(op.args & [:modrm, :modrmA, :modrmxmm, :modrmmmx]).empty?
257
+ if !(op.args & [:modrm, :modrmmmx, :modrmxmm, :modrmymm]).empty?
255
258
  # reg field of modrm
256
259
  regval = (base[-1] >> 3) & 7
257
260
  base.pop
@@ -265,6 +268,8 @@ class Ia32
265
268
  postponed.first[1] = Expression[target, :-, postlabel]
266
269
  end
267
270
 
271
+ pfx << op.props[:needpfx] if op.props[:needpfx]
272
+
268
273
  #
269
274
  # append other arguments
270
275
  #
@@ -273,7 +278,7 @@ class Ia32
273
278
  postponed.each { |oa, ia|
274
279
  case oa
275
280
  when :farptr; ed = ia.encode(@endianness, "a#{opsz}".to_sym)
276
- when :modrm, :modrmA, :modrmmmx, :modrmxmm
281
+ when :modrm, :modrmmmx, :modrmxmm, :modrmymm
277
282
  if ia.kind_of? ModRM
278
283
  ed = ia.encode(regval, @endianness)
279
284
  if ed.kind_of?(::Array)
@@ -295,6 +300,7 @@ class Ia32
295
300
  when :mrm_imm; ed = ia.imm.encode("a#{adsz}".to_sym, @endianness)
296
301
  when :i8, :u8, :u16; ed = ia.encode(oa, @endianness)
297
302
  when :i; ed = ia.encode("a#{opsz}".to_sym, @endianness)
303
+ when :i4xmm, :i4ymm; ed = ia.val << 4 # u8
298
304
  else raise SyntaxError, "Internal error: want to encode field #{oa.inspect} as arg in #{i}"
299
305
  end
300
306