metasm 1.0.0 → 1.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (276) hide show
  1. checksums.yaml +7 -0
  2. checksums.yaml.gz.sig +0 -0
  3. data.tar.gz.sig +3 -0
  4. data/.gitignore +3 -0
  5. data/.hgtags +3 -0
  6. data/Gemfile +3 -0
  7. data/INSTALL +61 -0
  8. data/LICENCE +458 -0
  9. data/README +29 -21
  10. data/Rakefile +10 -0
  11. data/TODO +10 -12
  12. data/doc/code_organisation.txt +3 -1
  13. data/doc/core/DynLdr.txt +247 -0
  14. data/doc/core/ExeFormat.txt +43 -0
  15. data/doc/core/Expression.txt +220 -0
  16. data/doc/core/GNUExports.txt +27 -0
  17. data/doc/core/Ia32.txt +236 -0
  18. data/doc/core/SerialStruct.txt +108 -0
  19. data/doc/core/VirtualString.txt +145 -0
  20. data/doc/core/WindowsExports.txt +61 -0
  21. data/doc/core/index.txt +1 -0
  22. data/doc/style.css +6 -3
  23. data/doc/usage/debugger.txt +327 -0
  24. data/doc/usage/index.txt +1 -0
  25. data/doc/use_cases.txt +2 -2
  26. data/metasm.gemspec +23 -0
  27. data/{lib/metasm.rb → metasm.rb} +15 -3
  28. data/{lib/metasm → metasm}/compile_c.rb +15 -9
  29. data/metasm/cpu/arc.rb +8 -0
  30. data/metasm/cpu/arc/decode.rb +404 -0
  31. data/metasm/cpu/arc/main.rb +191 -0
  32. data/metasm/cpu/arc/opcodes.rb +588 -0
  33. data/metasm/cpu/arm.rb +14 -0
  34. data/{lib/metasm → metasm/cpu}/arm/debug.rb +2 -2
  35. data/{lib/metasm → metasm/cpu}/arm/decode.rb +15 -18
  36. data/{lib/metasm → metasm/cpu}/arm/encode.rb +23 -8
  37. data/{lib/metasm → metasm/cpu}/arm/main.rb +3 -6
  38. data/metasm/cpu/arm/opcodes.rb +324 -0
  39. data/{lib/metasm → metasm/cpu}/arm/parse.rb +25 -13
  40. data/{lib/metasm → metasm/cpu}/arm/render.rb +2 -2
  41. data/metasm/cpu/arm64.rb +15 -0
  42. data/metasm/cpu/arm64/debug.rb +38 -0
  43. data/metasm/cpu/arm64/decode.rb +285 -0
  44. data/metasm/cpu/arm64/encode.rb +41 -0
  45. data/metasm/cpu/arm64/main.rb +105 -0
  46. data/metasm/cpu/arm64/opcodes.rb +232 -0
  47. data/metasm/cpu/arm64/parse.rb +20 -0
  48. data/metasm/cpu/arm64/render.rb +95 -0
  49. data/{lib/metasm/mips/compile_c.rb → metasm/cpu/bpf.rb} +4 -2
  50. data/metasm/cpu/bpf/decode.rb +110 -0
  51. data/metasm/cpu/bpf/main.rb +60 -0
  52. data/metasm/cpu/bpf/opcodes.rb +81 -0
  53. data/metasm/cpu/bpf/render.rb +30 -0
  54. data/{lib/metasm/ppc.rb → metasm/cpu/cy16.rb} +2 -4
  55. data/metasm/cpu/cy16/decode.rb +247 -0
  56. data/metasm/cpu/cy16/main.rb +63 -0
  57. data/metasm/cpu/cy16/opcodes.rb +78 -0
  58. data/metasm/cpu/cy16/render.rb +30 -0
  59. data/metasm/cpu/dalvik.rb +11 -0
  60. data/{lib/metasm → metasm/cpu}/dalvik/decode.rb +34 -34
  61. data/{lib/metasm → metasm/cpu}/dalvik/main.rb +71 -4
  62. data/{lib/metasm → metasm/cpu}/dalvik/opcodes.rb +21 -12
  63. data/{lib/metasm/mips.rb → metasm/cpu/ebpf.rb} +3 -4
  64. data/metasm/cpu/ebpf/debug.rb +61 -0
  65. data/metasm/cpu/ebpf/decode.rb +142 -0
  66. data/metasm/cpu/ebpf/main.rb +58 -0
  67. data/metasm/cpu/ebpf/opcodes.rb +97 -0
  68. data/metasm/cpu/ebpf/render.rb +36 -0
  69. data/metasm/cpu/ia32.rb +17 -0
  70. data/{lib/metasm → metasm/cpu}/ia32/compile_c.rb +23 -9
  71. data/{lib/metasm → metasm/cpu}/ia32/debug.rb +44 -6
  72. data/{lib/metasm → metasm/cpu}/ia32/decode.rb +342 -128
  73. data/{lib/metasm → metasm/cpu}/ia32/decompile.rb +75 -53
  74. data/{lib/metasm → metasm/cpu}/ia32/encode.rb +19 -13
  75. data/{lib/metasm → metasm/cpu}/ia32/main.rb +66 -8
  76. data/metasm/cpu/ia32/opcodes.rb +1424 -0
  77. data/{lib/metasm → metasm/cpu}/ia32/parse.rb +55 -17
  78. data/{lib/metasm → metasm/cpu}/ia32/render.rb +32 -5
  79. data/metasm/cpu/mcs51.rb +8 -0
  80. data/metasm/cpu/mcs51/decode.rb +99 -0
  81. data/metasm/cpu/mcs51/main.rb +87 -0
  82. data/metasm/cpu/mcs51/opcodes.rb +120 -0
  83. data/metasm/cpu/mips.rb +14 -0
  84. data/metasm/cpu/mips/debug.rb +42 -0
  85. data/{lib/metasm → metasm/cpu}/mips/decode.rb +59 -38
  86. data/{lib/metasm → metasm/cpu}/mips/encode.rb +4 -3
  87. data/{lib/metasm → metasm/cpu}/mips/main.rb +13 -6
  88. data/{lib/metasm → metasm/cpu}/mips/opcodes.rb +87 -18
  89. data/{lib/metasm → metasm/cpu}/mips/parse.rb +1 -1
  90. data/{lib/metasm → metasm/cpu}/mips/render.rb +1 -1
  91. data/{lib/metasm/dalvik.rb → metasm/cpu/msp430.rb} +1 -1
  92. data/metasm/cpu/msp430/decode.rb +243 -0
  93. data/metasm/cpu/msp430/main.rb +62 -0
  94. data/metasm/cpu/msp430/opcodes.rb +101 -0
  95. data/metasm/cpu/openrisc.rb +11 -0
  96. data/metasm/cpu/openrisc/debug.rb +106 -0
  97. data/metasm/cpu/openrisc/decode.rb +182 -0
  98. data/metasm/cpu/openrisc/decompile.rb +350 -0
  99. data/metasm/cpu/openrisc/main.rb +70 -0
  100. data/metasm/cpu/openrisc/opcodes.rb +109 -0
  101. data/metasm/cpu/openrisc/render.rb +37 -0
  102. data/{lib/metasm → metasm/cpu}/pic16c/decode.rb +6 -7
  103. data/{lib/metasm → metasm/cpu}/pic16c/main.rb +0 -0
  104. data/{lib/metasm → metasm/cpu}/pic16c/opcodes.rb +1 -1
  105. data/metasm/cpu/ppc.rb +11 -0
  106. data/{lib/metasm → metasm/cpu}/ppc/decode.rb +18 -37
  107. data/{lib/metasm → metasm/cpu}/ppc/decompile.rb +3 -3
  108. data/{lib/metasm → metasm/cpu}/ppc/encode.rb +2 -2
  109. data/{lib/metasm → metasm/cpu}/ppc/main.rb +23 -18
  110. data/{lib/metasm → metasm/cpu}/ppc/opcodes.rb +11 -6
  111. data/metasm/cpu/ppc/parse.rb +55 -0
  112. data/metasm/cpu/python.rb +8 -0
  113. data/metasm/cpu/python/decode.rb +116 -0
  114. data/metasm/cpu/python/main.rb +36 -0
  115. data/metasm/cpu/python/opcodes.rb +180 -0
  116. data/{lib/metasm → metasm/cpu}/sh4.rb +1 -1
  117. data/{lib/metasm → metasm/cpu}/sh4/decode.rb +50 -23
  118. data/{lib/metasm → metasm/cpu}/sh4/main.rb +38 -27
  119. data/{lib/metasm → metasm/cpu}/sh4/opcodes.rb +7 -8
  120. data/metasm/cpu/st20.rb +9 -0
  121. data/metasm/cpu/st20/decode.rb +173 -0
  122. data/metasm/cpu/st20/decompile.rb +283 -0
  123. data/metasm/cpu/st20/main.rb +37 -0
  124. data/metasm/cpu/st20/opcodes.rb +140 -0
  125. data/{lib/metasm/arm.rb → metasm/cpu/webasm.rb} +4 -5
  126. data/metasm/cpu/webasm/debug.rb +31 -0
  127. data/metasm/cpu/webasm/decode.rb +321 -0
  128. data/metasm/cpu/webasm/decompile.rb +386 -0
  129. data/metasm/cpu/webasm/encode.rb +104 -0
  130. data/metasm/cpu/webasm/main.rb +81 -0
  131. data/metasm/cpu/webasm/opcodes.rb +214 -0
  132. data/metasm/cpu/x86_64.rb +15 -0
  133. data/{lib/metasm → metasm/cpu}/x86_64/compile_c.rb +40 -25
  134. data/{lib/metasm → metasm/cpu}/x86_64/debug.rb +4 -4
  135. data/{lib/metasm → metasm/cpu}/x86_64/decode.rb +58 -15
  136. data/{lib/metasm → metasm/cpu}/x86_64/encode.rb +59 -28
  137. data/{lib/metasm → metasm/cpu}/x86_64/main.rb +18 -6
  138. data/metasm/cpu/x86_64/opcodes.rb +138 -0
  139. data/{lib/metasm → metasm/cpu}/x86_64/parse.rb +12 -4
  140. data/metasm/cpu/x86_64/render.rb +35 -0
  141. data/metasm/cpu/z80.rb +9 -0
  142. data/metasm/cpu/z80/decode.rb +286 -0
  143. data/metasm/cpu/z80/main.rb +67 -0
  144. data/metasm/cpu/z80/opcodes.rb +224 -0
  145. data/metasm/cpu/z80/render.rb +48 -0
  146. data/{lib/metasm/os/main.rb → metasm/debug.rb} +201 -407
  147. data/{lib/metasm → metasm}/decode.rb +104 -24
  148. data/{lib/metasm → metasm}/decompile.rb +804 -478
  149. data/{lib/metasm → metasm}/disassemble.rb +385 -170
  150. data/{lib/metasm → metasm}/disassemble_api.rb +684 -105
  151. data/{lib/metasm → metasm}/dynldr.rb +231 -138
  152. data/{lib/metasm → metasm}/encode.rb +20 -5
  153. data/{lib/metasm → metasm}/exe_format/a_out.rb +9 -6
  154. data/{lib/metasm → metasm}/exe_format/autoexe.rb +3 -0
  155. data/{lib/metasm → metasm}/exe_format/bflt.rb +57 -27
  156. data/{lib/metasm → metasm}/exe_format/coff.rb +35 -7
  157. data/{lib/metasm → metasm}/exe_format/coff_decode.rb +70 -23
  158. data/{lib/metasm → metasm}/exe_format/coff_encode.rb +24 -22
  159. data/{lib/metasm → metasm}/exe_format/dex.rb +26 -8
  160. data/{lib/metasm → metasm}/exe_format/dol.rb +1 -0
  161. data/{lib/metasm → metasm}/exe_format/elf.rb +108 -58
  162. data/{lib/metasm → metasm}/exe_format/elf_decode.rb +202 -36
  163. data/{lib/metasm → metasm}/exe_format/elf_encode.rb +126 -32
  164. data/metasm/exe_format/gb.rb +65 -0
  165. data/metasm/exe_format/javaclass.rb +424 -0
  166. data/{lib/metasm → metasm}/exe_format/macho.rb +218 -16
  167. data/{lib/metasm → metasm}/exe_format/main.rb +28 -3
  168. data/{lib/metasm → metasm}/exe_format/mz.rb +2 -0
  169. data/{lib/metasm → metasm}/exe_format/nds.rb +7 -4
  170. data/{lib/metasm → metasm}/exe_format/pe.rb +96 -11
  171. data/metasm/exe_format/pyc.rb +167 -0
  172. data/{lib/metasm → metasm}/exe_format/serialstruct.rb +67 -14
  173. data/{lib/metasm → metasm}/exe_format/shellcode.rb +7 -3
  174. data/metasm/exe_format/shellcode_rwx.rb +114 -0
  175. data/metasm/exe_format/swf.rb +205 -0
  176. data/metasm/exe_format/wasm.rb +402 -0
  177. data/{lib/metasm → metasm}/exe_format/xcoff.rb +7 -7
  178. data/metasm/exe_format/zip.rb +335 -0
  179. data/metasm/gui.rb +13 -0
  180. data/{lib/metasm → metasm}/gui/cstruct.rb +35 -41
  181. data/{lib/metasm → metasm}/gui/dasm_coverage.rb +11 -11
  182. data/{lib/metasm → metasm}/gui/dasm_decomp.rb +177 -114
  183. data/{lib/metasm → metasm}/gui/dasm_funcgraph.rb +0 -0
  184. data/metasm/gui/dasm_graph.rb +1754 -0
  185. data/{lib/metasm → metasm}/gui/dasm_hex.rb +16 -12
  186. data/{lib/metasm → metasm}/gui/dasm_listing.rb +43 -28
  187. data/{lib/metasm → metasm}/gui/dasm_main.rb +360 -77
  188. data/{lib/metasm → metasm}/gui/dasm_opcodes.rb +5 -19
  189. data/{lib/metasm → metasm}/gui/debug.rb +109 -34
  190. data/{lib/metasm → metasm}/gui/gtk.rb +174 -44
  191. data/{lib/metasm → metasm}/gui/qt.rb +14 -4
  192. data/{lib/metasm → metasm}/gui/win32.rb +180 -43
  193. data/{lib/metasm → metasm}/gui/x11.rb +59 -59
  194. data/{lib/metasm → metasm}/main.rb +421 -286
  195. data/metasm/os/emulator.rb +175 -0
  196. data/{lib/metasm/os/remote.rb → metasm/os/gdbremote.rb} +146 -54
  197. data/{lib/metasm → metasm}/os/gnu_exports.rb +1 -1
  198. data/{lib/metasm → metasm}/os/linux.rb +628 -151
  199. data/metasm/os/main.rb +335 -0
  200. data/{lib/metasm → metasm}/os/windows.rb +151 -58
  201. data/{lib/metasm → metasm}/os/windows_exports.rb +141 -0
  202. data/{lib/metasm → metasm}/parse.rb +49 -36
  203. data/{lib/metasm → metasm}/parse_c.rb +405 -246
  204. data/{lib/metasm → metasm}/preprocessor.rb +71 -41
  205. data/{lib/metasm → metasm}/render.rb +14 -38
  206. data/misc/hexdump.rb +4 -3
  207. data/misc/lint.rb +58 -0
  208. data/misc/objdiff.rb +4 -1
  209. data/misc/objscan.rb +1 -1
  210. data/misc/openrisc-parser.rb +79 -0
  211. data/misc/txt2html.rb +9 -7
  212. data/samples/bindiff.rb +3 -4
  213. data/samples/dasm-plugins/bindiff.rb +15 -0
  214. data/samples/dasm-plugins/bookmark.rb +133 -0
  215. data/samples/dasm-plugins/c_constants.rb +57 -0
  216. data/samples/dasm-plugins/colortheme_solarized.rb +125 -0
  217. data/samples/dasm-plugins/cppobj_funcall.rb +60 -0
  218. data/samples/dasm-plugins/dasm_all.rb +70 -0
  219. data/samples/dasm-plugins/demangle_cpp.rb +31 -0
  220. data/samples/dasm-plugins/deobfuscate.rb +251 -0
  221. data/samples/dasm-plugins/dump_text.rb +35 -0
  222. data/samples/dasm-plugins/export_graph_svg.rb +86 -0
  223. data/samples/dasm-plugins/findgadget.rb +75 -0
  224. data/samples/dasm-plugins/hl_opcode.rb +32 -0
  225. data/samples/dasm-plugins/hotfix_gtk_dbg.rb +19 -0
  226. data/samples/dasm-plugins/imm2off.rb +34 -0
  227. data/samples/dasm-plugins/match_libsigs.rb +93 -0
  228. data/samples/dasm-plugins/patch_file.rb +95 -0
  229. data/samples/dasm-plugins/scanfuncstart.rb +36 -0
  230. data/samples/dasm-plugins/scanxrefs.rb +29 -0
  231. data/samples/dasm-plugins/selfmodify.rb +197 -0
  232. data/samples/dasm-plugins/stringsxrefs.rb +28 -0
  233. data/samples/dasmnavig.rb +1 -1
  234. data/samples/dbg-apihook.rb +24 -9
  235. data/samples/dbg-plugins/heapscan.rb +283 -0
  236. data/samples/dbg-plugins/heapscan/compiled_heapscan_lin.c +155 -0
  237. data/samples/dbg-plugins/heapscan/compiled_heapscan_win.c +128 -0
  238. data/samples/dbg-plugins/heapscan/graphheap.rb +616 -0
  239. data/samples/dbg-plugins/heapscan/heapscan.rb +709 -0
  240. data/samples/dbg-plugins/heapscan/winheap.h +174 -0
  241. data/samples/dbg-plugins/heapscan/winheap7.h +307 -0
  242. data/samples/dbg-plugins/trace_func.rb +214 -0
  243. data/samples/disassemble-gui.rb +48 -7
  244. data/samples/disassemble.rb +31 -6
  245. data/samples/dump_upx.rb +24 -12
  246. data/samples/dynamic_ruby.rb +35 -27
  247. data/samples/elfencode.rb +15 -0
  248. data/samples/emubios.rb +251 -0
  249. data/samples/emudbg.rb +127 -0
  250. data/samples/exeencode.rb +6 -5
  251. data/samples/factorize-headers-peimports.rb +1 -1
  252. data/samples/lindebug.rb +186 -391
  253. data/samples/metasm-shell.rb +68 -57
  254. data/samples/peldr.rb +2 -2
  255. data/tests/all.rb +1 -1
  256. data/tests/arc.rb +26 -0
  257. data/tests/dynldr.rb +22 -4
  258. data/tests/expression.rb +57 -0
  259. data/tests/graph_layout.rb +285 -0
  260. data/tests/ia32.rb +80 -26
  261. data/tests/mcs51.rb +27 -0
  262. data/tests/mips.rb +10 -3
  263. data/tests/preprocessor.rb +18 -0
  264. data/tests/x86_64.rb +66 -18
  265. metadata +465 -219
  266. metadata.gz.sig +2 -0
  267. data/lib/metasm/arm/opcodes.rb +0 -177
  268. data/lib/metasm/gui.rb +0 -23
  269. data/lib/metasm/gui/dasm_graph.rb +0 -1354
  270. data/lib/metasm/ia32.rb +0 -14
  271. data/lib/metasm/ia32/opcodes.rb +0 -872
  272. data/lib/metasm/ppc/parse.rb +0 -52
  273. data/lib/metasm/x86_64.rb +0 -12
  274. data/lib/metasm/x86_64/opcodes.rb +0 -118
  275. data/samples/gdbclient.rb +0 -583
  276. data/samples/rubstop.rb +0 -399
@@ -99,6 +99,28 @@ class InstructionBlock
99
99
  yield to if type == :indirect or dasm.function[to] or not dasm.decoded[to]
100
100
  }
101
101
  end
102
+
103
+ # returns the array used in each_from_samefunc
104
+ def from_samefunc(dasm)
105
+ ary = []
106
+ each_from_samefunc(dasm) { |a| ary << a }
107
+ ary
108
+ end
109
+ def from_otherfunc(dasm)
110
+ ary = []
111
+ each_from_otherfunc(dasm) { |a| ary << a }
112
+ ary
113
+ end
114
+ def to_samefunc(dasm)
115
+ ary = []
116
+ each_to_samefunc(dasm) { |a| ary << a }
117
+ ary
118
+ end
119
+ def to_otherfunc(dasm)
120
+ ary = []
121
+ each_to_otherfunc(dasm) { |a| ary << a }
122
+ ary
123
+ end
102
124
  end
103
125
 
104
126
  class DecodedInstruction
@@ -108,59 +130,21 @@ class DecodedInstruction
108
130
  end
109
131
  end
110
132
 
111
- class CPU
112
- # compat alias, for scripts using older version of metasm
113
- def get_backtrace_binding(di) backtrace_binding(di) end
114
-
115
- # return something like backtrace_binding in the forward direction
116
- # set pc_reg to some reg name (eg :pc) to include effects on the instruction pointer
117
- def get_fwdemu_binding(di, pc_reg=nil)
118
- fdi = di.backtrace_binding ||= get_backtrace_binding(di)
119
- # find self-updated regs & revert them in simultaneous affectations
120
- # XXX handles only a <- a+i for now, this covers all useful cases (except imul eax, eax, 42 jz foobar)
121
- fdi.keys.grep(::Symbol).each { |s|
122
- val = Expression[fdi[s]]
123
- next if val.lexpr != s or (val.op != :+ and val.op != :-) #or not val.rexpr.kind_of? ::Integer
124
- fwd = { s => val }
125
- inv = { s => val.dup }
126
- inv[s].op = ((inv[s].op == :+) ? :- : :+)
127
- nxt = {}
128
- fdi.each { |k, v|
129
- if k == s
130
- nxt[k] = v
131
- else
132
- k = k.bind(fwd).reduce_rec if k.kind_of? Indirection
133
- nxt[k] = Expression[Expression[v].bind(inv).reduce_rec]
134
- end
135
- }
136
- fdi = nxt
137
- }
138
- if pc_reg
139
- if di.opcode.props[:setip]
140
- xr = get_xrefs_x(nil, di)
141
- if xr and xr.length == 1
142
- fdi[pc_reg] = xr[0]
143
- else
144
- fdi[:incomplete_binding] = Expression[1]
145
- end
146
- else
147
- fdi[pc_reg] = Expression[pc_reg, :+, di.bin_length]
148
- end
149
- end
150
- fdi
151
- end
152
- end
153
-
154
133
  class Disassembler
155
134
  # access the default value for @@backtrace_maxblocks for newly created Disassemblers
156
135
  def self.backtrace_maxblocks ; @@backtrace_maxblocks ; end
157
136
  def self.backtrace_maxblocks=(b) ; @@backtrace_maxblocks = b ; end
158
137
 
159
- # returns the dasm section's edata containing addr
160
- # its #ptr points to addr
161
- # returns the 1st element of #get_section_at
162
- def get_edata_at(addr)
163
- if s = get_section_at(addr)
138
+ # adds a commentary at the given address
139
+ # comments are found in the array @comment: {addr => [list of strings]}
140
+ def add_comment(addr, cmt)
141
+ @comment[addr] ||= []
142
+ @comment[addr] |= [cmt]
143
+ end
144
+
145
+ # returns the 1st element of #get_section_at (ie the edata at a given address) or nil
146
+ def get_edata_at(*a)
147
+ if s = get_section_at(*a)
164
148
  s[0]
165
149
  end
166
150
  end
@@ -209,20 +193,20 @@ class Disassembler
209
193
 
210
194
  # yields every InstructionBlock
211
195
  # returns the list of IBlocks
212
- def each_instructionblock
196
+ def each_instructionblock(&b)
213
197
  ret = []
214
198
  @decoded.each { |addr, di|
215
199
  next if not di.kind_of? DecodedInstruction or not di.block_head?
216
200
  ret << di.block
217
- yield di.block if block_given?
201
+ b.call(di.block) if b
218
202
  }
219
203
  ret
220
204
  end
221
205
  alias instructionblocks each_instructionblock
222
206
 
223
207
  # return a backtrace_binding reversed (akin to code emulation) (but not really)
224
- def get_fwdemu_binding(di, pc=nil)
225
- @cpu.get_fwdemu_binding(di, pc)
208
+ def get_fwdemu_binding(di, pc=nil, dbg_ctx=nil)
209
+ @cpu.get_fwdemu_binding(di, pc, dbg_ctx)
226
210
  end
227
211
 
228
212
  # reads len raw bytes from the mmaped address space
@@ -287,24 +271,32 @@ class Disassembler
287
271
  if b = block_at(from_addr)
288
272
  b.add_to_normal(addr)
289
273
  end
290
- @addrs_todo << [addr, from_addr]
274
+ @addrs_todo << { :addr => addr, :from => from_addr }
291
275
  disassemble
292
276
  end
293
277
 
294
278
  # returns the label associated to an addr, or nil if none exist
295
279
  def get_label_at(addr)
296
- e, b = get_section_at(addr, false)
280
+ e = get_edata_at(addr, false)
297
281
  e.inv_export[e.ptr] if e
298
282
  end
299
283
 
284
+ # return the array of all labels associated to an addr
285
+ def get_all_labels_at(addr)
286
+ addr = normalize(addr)
287
+ label_alias[addr].to_a
288
+ end
289
+
300
290
  # sets the label for the specified address
301
291
  # returns nil if the address is not mapped
302
292
  # memcheck is passed to get_section_at to validate that the address is mapped
303
- def set_label_at(addr, name, memcheck=true)
293
+ # keep existing label if 'overwrite' is false
294
+ def set_label_at(addr, name, memcheck=true, overwrite=true)
304
295
  addr = Expression[addr].reduce
305
296
  e, b = get_section_at(addr, memcheck)
306
297
  if not e
307
- elsif not l = e.inv_export[e.ptr]
298
+ elsif not l = e.inv_export[e.ptr] or (!overwrite and l != name)
299
+ split_block(addr)
308
300
  l = @program.new_label(name)
309
301
  e.add_export l, e.ptr
310
302
  @label_alias_cache = nil
@@ -317,7 +309,7 @@ class Disassembler
317
309
 
318
310
  # remove a label at address addr
319
311
  def del_label_at(addr, name=get_label_at(addr))
320
- ed, b = get_section_at(addr)
312
+ ed = get_edata_at(addr)
321
313
  if ed and ed.inv_export[ed.ptr]
322
314
  ed.del_export name, ed.ptr
323
315
  @label_alias_cache = nil
@@ -325,6 +317,7 @@ class Disassembler
325
317
  each_xref(addr) { |xr|
326
318
  next if not xr.origin or not o = @decoded[xr.origin] or not o.kind_of? Renderable
327
319
  o.each_expr { |e|
320
+ next unless e.kind_of?(Expression)
328
321
  e.lexpr = addr if e.lexpr == name
329
322
  e.rexpr = addr if e.rexpr == name
330
323
  }
@@ -337,12 +330,14 @@ class Disassembler
337
330
  # returns the new label
338
331
  # the new label must be program-uniq (see @program.new_label)
339
332
  def rename_label(old, new)
333
+ return new if old == new
334
+ raise "label #{new.inspect} exists" if @prog_binding[new]
340
335
  each_xref(normalize(old)) { |x|
341
336
  next if not di = @decoded[x.origin]
342
337
  @cpu.replace_instr_arg_immediate(di.instruction, old, new)
343
338
  di.comment.to_a.each { |c| c.gsub!(old, new) }
344
339
  }
345
- e, l = get_section_at(old, false)
340
+ e = get_edata_at(old, false)
346
341
  if e
347
342
  e.add_export new, e.export.delete(old), true
348
343
  end
@@ -499,12 +494,12 @@ class Disassembler
499
494
  # if from..to spans multiple blocks
500
495
  # to.block is splitted after to
501
496
  # all path from from are replaced by a single link to after 'to', be careful !
502
- # (eg a->b->... & a->c ; from in a, to in c => a->b is lost)
497
+ # (eg a->b->... & a->c ; from in a, to in c => a->b is lost)
503
498
  # all instructions are stuffed in the first block
504
499
  # paths are only walked using from/to_normal
505
500
  # 'by' may be empty
506
501
  # returns the block containing the new instrs (nil if empty)
507
- def replace_instrs(from, to, by)
502
+ def replace_instrs(from, to, by, patch_by=false)
508
503
  raise 'bad from' if not fdi = di_at(from) or not fdi.block.list.index(fdi)
509
504
  raise 'bad to' if not tdi = di_at(to) or not tdi.block.list.index(tdi)
510
505
 
@@ -520,14 +515,28 @@ class Disassembler
520
515
  wantlen -= by.grep(DecodedInstruction).inject(0) { |len, di| len + di.bin_length }
521
516
  ldi = by.last
522
517
  ldi = DecodedInstruction.new(ldi) if ldi.kind_of? Instruction
523
- wantlen = by.grep(Instruction).length if wantlen < 0 or (ldi and ldi.opcode.props[:setip])
524
- by.map! { |di|
525
- if di.kind_of? Instruction
526
- di = DecodedInstruction.new(di)
527
- wantlen -= di.bin_length = wantlen / by.grep(Instruction).length
528
- end
529
- di
530
- }
518
+ nb_i = by.grep(Instruction).length
519
+ wantlen = nb_i if wantlen < 0 or (ldi and ldi.opcode.props[:setip])
520
+ if patch_by
521
+ by.map! { |di|
522
+ if di.kind_of? Instruction
523
+ di = DecodedInstruction.new(di)
524
+ wantlen -= di.bin_length = wantlen / by.grep(Instruction).length
525
+ nb_i -= 1
526
+ end
527
+ di
528
+ }
529
+ else
530
+ by = by.map { |di|
531
+ if di.kind_of? Instruction
532
+ di = DecodedInstruction.new(di)
533
+ wantlen -= (di.bin_length = wantlen / nb_i)
534
+ nb_i -= 1
535
+ end
536
+ di
537
+ }
538
+ end
539
+
531
540
 
532
541
  #puts " ** patch next_addr to #{Expression[tb.list.last.next_addr]}" if not by.empty? and by.last.opcode.props[:saveip]
533
542
  by.last.next_addr = tb.list.last.next_addr if not by.empty? and by.last.opcode.props[:saveip]
@@ -649,8 +658,8 @@ class Disassembler
649
658
  if b1 and not b1.kind_of? InstructionBlock
650
659
  return if not b1 = block_at(b1)
651
660
  end
652
- if b2 and not b2.kind_of? InstructionBlock
653
- return if not b2 = block_at(b2)
661
+ if b2 and not b2.kind_of? InstructionBlock
662
+ return if not b2 = block_at(b2)
654
663
  end
655
664
  if b1 and b2 and (allow_nonadjacent or b1.list.last.next_addr == b2.address) and
656
665
  b1.to_normal.to_a == [b2.address] and b2.from_normal.to_a.length == 1 and # that handles delay_slot
@@ -658,6 +667,8 @@ class Disassembler
658
667
  b1.to_indirect.to_a == [] and b2.from_indirect.to_a == []
659
668
  b2.list.each { |di| b1.add_di di }
660
669
  b1.to_normal = b2.to_normal
670
+ b1.to_subfuncret = b2.to_subfuncret
671
+ b1.to_indirect = b2.to_indirect
661
672
  b2.list.clear
662
673
  @addrs_done.delete_if { |ad| normalize(ad[0]) == b2.address }
663
674
  true
@@ -674,7 +685,7 @@ class Disassembler
674
685
  # give something equivalent to the code accessible from the (list of) entrypoints given
675
686
  # from the @decoded dasm graph
676
687
  # assume all jump targets have a matching label in @prog_binding
677
- # may add inconditionnal jumps in the listing to preserve the code flow
688
+ # may add inconditional jumps in the listing to preserve the code flow
678
689
  def flatten_graph(entry, include_subfunc=true)
679
690
  ret = []
680
691
  entry = [entry] if not entry.kind_of? Array
@@ -682,11 +693,16 @@ class Disassembler
682
693
  done = []
683
694
  inv_binding = @prog_binding.invert
684
695
  while addr = todo.pop
685
- next if done.include? addr or not di_at(addr)
696
+ next if done.include?(addr)
686
697
  done << addr
687
- b = @decoded[addr].block
688
698
 
689
699
  ret << Label.new(inv_binding[addr]) if inv_binding[addr]
700
+ if not di_at(addr)
701
+ ret << @cpu.instr_jump_stop
702
+ next
703
+ end
704
+
705
+ b = @decoded[addr].block
690
706
  ret.concat b.list.map { |di| di.instruction }
691
707
 
692
708
  b.each_to_otherfunc(self) { |to|
@@ -700,8 +716,8 @@ class Disassembler
700
716
 
701
717
  if not di = b.list[-1-@cpu.delay_slot] or not di.opcode.props[:stopexec] or di.opcode.props[:saveip]
702
718
  to = b.list.last.next_addr
703
- if todo.include? to
704
- if done.include? to or not di_at(to)
719
+ if todo.include?(to) and di_at(to)
720
+ if done.include?(to)
705
721
  if not to_l = inv_binding[to]
706
722
  to_l = auto_label_at(to, 'loc')
707
723
  if done.include? to and idx = ret.index(@decoded[to].block.list.first.instruction)
@@ -712,6 +728,8 @@ class Disassembler
712
728
  else
713
729
  todo << to # ensure it's next in the listing
714
730
  end
731
+ else
732
+ ret << @cpu.instr_jump_stop
715
733
  end
716
734
  end
717
735
  end
@@ -720,17 +738,23 @@ class Disassembler
720
738
  end
721
739
 
722
740
  # returns a demangled C++ name
723
- # from wgcc-2.2.2/undecorate.cpp
724
- # TODO
725
741
  def demangle_cppname(name)
726
- ret = name
727
- if name[0] == ??
742
+ case name[0]
743
+ when ?? # MSVC
728
744
  name = name[1..-1]
729
- if name[0] == ??
730
- name = name[1..-1]
731
- op = name[0, 1]
732
- op = name[0, 2] if op == '_'
733
- if op = {
745
+ demangle_msvc(name[1..-1]) if name[0] == ??
746
+ when ?_
747
+ name = name.sub(/_GLOBAL__[ID]_/, '')
748
+ demangle_gcc(name[2..-1][/\S*/]) if name[0, 2] == '_Z'
749
+ end
750
+ end
751
+
752
+ # from wgcc-2.2.2/undecorate.cpp
753
+ # TODO
754
+ def demangle_msvc(name)
755
+ op = name[0, 1]
756
+ op = name[0, 2] if op == '_'
757
+ if op = {
734
758
  '2' => "new", '3' => "delete", '4' => "=", '5' => ">>", '6' => "<<", '7' => "!", '8' => "==", '9' => "!=",
735
759
  'A' => "[]", 'C' => "->", 'D' => "*", 'E' => "++", 'F' => "--", 'G' => "-", 'H' => "+", 'I' => "&",
736
760
  'J' => "->*", 'K' => "/", 'L' => "%", 'M' => "<", 'N' => "<=", 'O' => ">", 'P' => ">=", 'Q' => ",",
@@ -743,11 +767,157 @@ class Disassembler
743
767
  '_M' => "`eh vector destructor iterator'", '_N' => "`eh vector vbase constructor iterator'", '_O' => "`copy constructor closure'",
744
768
  '_S' => "`local vftable'", '_T' => "`local vftable constructor closure'", '_U' => "new[]", '_V' => "delete[]",
745
769
  '_X' => "`placement delete closure'", '_Y' => "`placement delete[] closure'"}[op]
746
- ret = op[0] == ?` ? op[1..-2] : "op_#{op}"
770
+ op[0] == ?` ? op[1..-2] : "op_#{op}"
771
+ end
772
+ end
773
+
774
+ # from http://www.codesourcery.com/public/cxx-abi/abi.html
775
+ def demangle_gcc(name)
776
+ subs = []
777
+ ret = ''
778
+ decode_tok = lambda {
779
+ name ||= ''
780
+ case name[0]
781
+ when nil
782
+ ret = nil
783
+ when ?N
784
+ name = name[1..-1]
785
+ decode_tok[]
786
+ until name[0] == ?E
787
+ break if not ret
788
+ ret << '::'
789
+ decode_tok[]
790
+ end
791
+ name = name[1..-1]
792
+ when ?I
793
+ name = name[1..-1]
794
+ ret = ret[0..-3] if ret[-2, 2] == '::'
795
+ ret << '<'
796
+ decode_tok[]
797
+ until name[0] == ?E
798
+ break if not ret
799
+ ret << ', '
800
+ decode_tok[]
801
+ end
802
+ ret << ' ' if ret and ret[-1] == ?>
803
+ ret << '>' if ret
804
+ name = name[1..-1]
805
+ when ?T
806
+ case name[1]
807
+ when ?T; ret << 'vtti('
808
+ when ?V; ret << 'vtable('
809
+ when ?I; ret << 'typeinfo('
810
+ when ?S; ret << 'typename('
811
+ else ret = nil
812
+ end
813
+ name = name[2..-1].to_s
814
+ decode_tok[] if ret
815
+ ret << ')' if ret
816
+ name = name[1..-1] if name[0] == ?E
817
+ when ?C
818
+ name = name[2..-1]
819
+ base = ret[/([^:]*)(<.*|::)?$/, 1]
820
+ ret << base
821
+ when ?D
822
+ name = name[2..-1]
823
+ base = ret[/([^:]*)(<.*|::)?$/, 1]
824
+ ret << '~' << base
825
+ when ?0..?9
826
+ nr = name[/^[0-9]+/]
827
+ name = name[nr.length..-1].to_s
828
+ ret << name[0, nr.to_i]
829
+ name = name[nr.to_i..-1]
830
+ subs << ret[/[\w:]*$/]
831
+ when ?S
832
+ name = name[1..-1]
833
+ case name[0]
834
+ when ?_, ?0..?9, ?A..?Z
835
+ case name[0]
836
+ when ?_; idx = 0 ; name = name[1..-1]
837
+ when ?0..?9; idx = name[0, 1].unpack('C')[0] - 0x30 + 1 ; name = name[2..-1]
838
+ when ?A..?Z; idx = name[0, 1].unpack('C')[0] - 0x41 + 11 ; name = name[2..-1]
839
+ end
840
+ if not subs[idx]
841
+ ret = nil
842
+ else
843
+ ret << subs[idx]
844
+ end
845
+ when ?t
846
+ ret << 'std::'
847
+ name = name[1..-1]
848
+ decode_tok[]
849
+ else
850
+ std = { ?a => 'std::allocator',
851
+ ?b => 'std::basic_string',
852
+ ?s => 'std::string', # 'std::basic_string < char, std::char_traits<char>, std::allocator<char> >',
853
+ ?i => 'std::istream', # 'std::basic_istream<char, std::char_traits<char> >',
854
+ ?o => 'std::ostream', # 'std::basic_ostream<char, std::char_traits<char> >',
855
+ ?d => 'std::iostream', # 'std::basic_iostream<char, std::char_traits<char> >'
856
+ }[name[0]]
857
+ if not std
858
+ ret = nil
859
+ else
860
+ ret << std
861
+ end
862
+ name = name[1..-1]
863
+ end
864
+ when ?P, ?R, ?r, ?V, ?K
865
+ attr = { ?P => '*', ?R => '&', ?r => ' restrict', ?V => ' volatile', ?K => ' const' }[name[0]]
866
+ name = name[1..-1]
867
+ rl = ret.length
868
+ decode_tok[]
869
+ if ret
870
+ ret << attr
871
+ subs << ret[rl..-1]
872
+ end
873
+ else
874
+ if ret =~ /[(<]/ and ty = {
875
+ ?v => 'void', ?w => 'wchar_t', ?b => 'bool', ?c => 'char', ?a => 'signed char',
876
+ ?h => 'unsigned char', ?s => 'short', ?t => 'unsigned short', ?i => 'int',
877
+ ?j => 'unsigned int', ?l => 'long', ?m => 'unsigned long', ?x => '__int64',
878
+ ?y => 'unsigned __int64', ?n => '__int128', ?o => 'unsigned __int128', ?f => 'float',
879
+ ?d => 'double', ?e => 'long double', ?g => '__float128', ?z => '...'
880
+ }[name[0]]
881
+ name = name[1..-1]
882
+ ret << ty
883
+ else
884
+ fu = name[0, 2]
885
+ name = name[2..-1]
886
+ if op = {
887
+ 'nw' => ' new', 'na' => ' new[]', 'dl' => ' delete', 'da' => ' delete[]',
888
+ 'ps' => '+', 'ng' => '-', 'ad' => '&', 'de' => '*', 'co' => '~', 'pl' => '+',
889
+ 'mi' => '-', 'ml' => '*', 'dv' => '/', 'rm' => '%', 'an' => '&', 'or' => '|',
890
+ 'eo' => '^', 'aS' => '=', 'pL' => '+=', 'mI' => '-=', 'mL' => '*=', 'dV' => '/=',
891
+ 'rM' => '%=', 'aN' => '&=', 'oR' => '|=', 'eO' => '^=', 'ls' => '<<', 'rs' => '>>',
892
+ 'lS' => '<<=', 'rS' => '>>=', 'eq' => '==', 'ne' => '!=', 'lt' => '<', 'gt' => '>',
893
+ 'le' => '<=', 'ge' => '>=', 'nt' => '!', 'aa' => '&&', 'oo' => '||', 'pp' => '++',
894
+ 'mm' => '--', 'cm' => ',', 'pm' => '->*', 'pt' => '->', 'cl' => '()', 'ix' => '[]',
895
+ 'qu' => '?', 'st' => ' sizeof', 'sz' => ' sizeof', 'at' => ' alignof', 'az' => ' alignof'
896
+ }[fu]
897
+ ret << "operator#{op}"
898
+ elsif fu == 'cv'
899
+ ret << "cast<"
900
+ decode_tok[]
901
+ ret << ">" if ret
902
+ else
903
+ ret = nil
904
+ end
747
905
  end
748
906
  end
907
+ name ||= ''
908
+ }
909
+
910
+ decode_tok[]
911
+ subs.pop
912
+ if ret and name != ''
913
+ ret << '('
914
+ decode_tok[]
915
+ while ret and name != ''
916
+ ret << ', '
917
+ decode_tok[]
918
+ end
919
+ ret << ')' if ret
749
920
  end
750
- # TODO
751
921
  ret
752
922
  end
753
923
 
@@ -755,7 +925,8 @@ class Disassembler
755
925
  # return/yields all the addresses matching
756
926
  # if yield returns nil/false, do not include the addr in the final result
757
927
  # sections are scanned MB by MB, so this should work (slowly) on 4GB sections (eg debugger VM)
758
- def pattern_scan(pat, chunksz=nil, margin=nil)
928
+ # with addr_start/length, symbol-based section are skipped
929
+ def pattern_scan(pat, addr_start=nil, length=nil, chunksz=nil, margin=nil, &b)
759
930
  chunksz ||= 4*1024*1024 # scan 4MB at a time
760
931
  margin ||= 65536 # add this much bytes at each chunk to find /pat/ over chunk boundaries
761
932
 
@@ -763,9 +934,27 @@ class Disassembler
763
934
 
764
935
  found = []
765
936
  @sections.each { |sec_addr, e|
937
+ if addr_start
938
+ length ||= 0x1000_0000
939
+ begin
940
+ if sec_addr < addr_start
941
+ next if sec_addr+e.length <= addr_start
942
+ e = e[addr_start-sec_addr, e.length]
943
+ sec_addr = addr_start
944
+ end
945
+ if sec_addr+e.length > addr_start+length
946
+ next if sec_addr > addr_start+length
947
+ e = e[0, sec_addr+e.length-(addr_start+length)]
948
+ end
949
+ rescue
950
+ puts $!, $!.message, $!.backtrace if $DEBUG
951
+ # catch arithmetic error with symbol-based section
952
+ next
953
+ end
954
+ end
766
955
  e.pattern_scan(pat, chunksz, margin) { |eo|
767
956
  match_addr = sec_addr + eo
768
- found << match_addr if not block_given? or yield(match_addr)
957
+ found << match_addr if not b or b.call(match_addr)
769
958
  false
770
959
  }
771
960
  }
@@ -773,14 +962,14 @@ class Disassembler
773
962
  end
774
963
 
775
964
  # returns/yields [addr, string] found using pattern_scan /[\x20-\x7e]/
776
- def strings_scan(minlen=6)
965
+ def strings_scan(minlen=6, &b)
777
966
  ret = []
778
967
  nexto = 0
779
968
  pattern_scan(/[\x20-\x7e]{#{minlen},}/m, nil, 1024) { |o|
780
969
  if o - nexto > 0
781
970
  next unless e = get_edata_at(o)
782
971
  str = e.data[e.ptr, 1024][/[\x20-\x7e]{#{minlen},}/m]
783
- ret << [o, str] if not block_given? or yield(o, str)
972
+ ret << [o, str] if not b or b.call(o, str)
784
973
  nexto = o + str.length
785
974
  end
786
975
  }
@@ -797,7 +986,7 @@ class Disassembler
797
986
  end
798
987
 
799
988
  # loads a map file (addr => symbol)
800
- # off is an optionnal offset to add to every address found (for eg rebased binaries)
989
+ # off is an optional offset to add to every address found (for eg rebased binaries)
801
990
  # understands:
802
991
  # standard map files (eg linux-kernel.map: <addr> <type> <name>, e.g. 'c01001ba t setup_idt')
803
992
  # ida map files (<sectionidx>:<sectionoffset> <name>)
@@ -805,18 +994,24 @@ class Disassembler
805
994
  def load_map(str, off=0)
806
995
  str = File.read(str) rescue nil if not str.index("\n")
807
996
  sks = @sections.keys.sort
997
+ seen = {}
808
998
  str.each_line { |l|
809
999
  case l.strip
810
1000
  when /^([0-9A-F]+)\s+(\w+)\s+(\w+)/i # kernel.map style
811
- set_label_at($1.to_i(16)+off, $3)
1001
+ addr = $1.to_i(16)+off
1002
+ set_label_at(addr, $3, false, !seen[addr])
1003
+ seen[addr] = true
812
1004
  when /^([0-9A-F]+):([0-9A-F]+)\s+([a-z_]\w+)/i # IDA style
813
1005
  # we do not have section load order, let's just hope that the addresses are sorted (and sortable..)
814
1006
  # could check the 1st part of the file, with section sizes, but it is not very convenient
815
1007
  # the regexp is so that we skip the 1st part with section descriptions
816
- # in the file, section 1 is the 1st section ; we have an additionnal section (exe header) which fixes the 0-index
817
- set_label_at(sks[$1.to_i(16)] + $2.to_i(16) + off, $3)
1008
+ # in the file, section 1 is the 1st section ; we have an additional section (exe header) which fixes the 0-index
1009
+ # XXX this is PE-specific, TODO fix it for ELF (ida references sections, we reference segments...)
1010
+ addr = sks[$1.to_i(16)] + $2.to_i(16) + off
1011
+ set_label_at(addr, $3, false, !seen[addr])
1012
+ seen[addr] = true
818
1013
  end
819
- }
1014
+ }
820
1015
  end
821
1016
 
822
1017
  # saves the dasm state in a file
@@ -830,13 +1025,14 @@ class Disassembler
830
1025
  def save_io(fd)
831
1026
  fd.puts 'Metasm.dasm'
832
1027
 
833
- if @program.filename
1028
+ if @program.filename and not @program.kind_of?(Shellcode)
834
1029
  t = @program.filename.to_s
835
1030
  fd.puts "binarypath #{t.length}", t
836
1031
  else
837
1032
  t = "#{@cpu.class.name.sub(/.*::/, '')} #{@cpu.size} #{@cpu.endianness}"
838
1033
  fd.puts "cpu #{t.length}", t
839
1034
  # XXX will be reloaded as a Shellcode with this CPU, but it may be a custom EXE
1035
+ # do not output binarypath, we'll be loaded as a Shellcode, 'section' will suffice
840
1036
  end
841
1037
 
842
1038
  @sections.each { |a, e|
@@ -857,7 +1053,7 @@ class Disassembler
857
1053
  fd.puts "decoded #{t.length}", t
858
1054
 
859
1055
  t = @comment.map { |a, c|
860
- c.map { |l| l.chomp }.join("\n").split("\n").map { |lc| "#{Expression[a]} #{lc.chomp}" }
1056
+ c.to_a.map { |l| l.chomp }.join("\n").split("\n").map { |lc| "#{Expression[a]} #{lc.chomp}" }
861
1057
  }.join("\n")
862
1058
  fd.puts "comment #{t.length}", t
863
1059
 
@@ -942,6 +1138,7 @@ class Disassembler
942
1138
  reinitialize Shellcode.new(cpu)
943
1139
  @program.disassembler = self
944
1140
  @program.init_disassembler
1141
+ @sections.delete(0) # rm empty section at 0, other real 'section' follow
945
1142
  when 'section'
946
1143
  info = data[0, data.index("\n") || data.length]
947
1144
  data = data[info.length, data.length]
@@ -949,7 +1146,11 @@ class Disassembler
949
1146
  addr = Expression.parse(pp).reduce
950
1147
  len = Expression.parse(pp).reduce
951
1148
  edata = EncodedData.new(data.unpack('m*').first, :virtsize => len)
952
- add_section(addr, edata)
1149
+ # check for an existing section, eg from binarypath
1150
+ existing_section = get_section_at(addr)
1151
+ if not existing_section or existing_section[0].data.to_str != edata.data.to_str
1152
+ add_section(addr, edata)
1153
+ end
953
1154
  when 'map'
954
1155
  load_map data
955
1156
  when 'decoded'
@@ -1030,7 +1231,7 @@ class Disassembler
1030
1231
  len = (len != '' ? len.to_i : nil)
1031
1232
  o = (o.to_s != '' ? Expression.parse(pp.feed!(o)).reduce : nil) # :default/:unknown ?
1032
1233
  add_xref(a, Xref.new(t, o, len))
1033
- rescue
1234
+ rescue
1034
1235
  puts "load: bad xref #{l.inspect} #$!" if $VERBOSE
1035
1236
  end
1036
1237
  }
@@ -1104,12 +1305,354 @@ class Disassembler
1104
1305
  delta
1105
1306
  end
1106
1307
 
1308
+ # dataflow method
1309
+ # walks a function, starting at addr
1310
+ # follows the usage of registers, computing the evolution from the value they had at start_addr
1311
+ # whenever an instruction references the register (or anything derived from it),
1312
+ # yield [di, used_register, reg_value, trace_state] where reg_value is the Expression holding the value of
1313
+ # the register wrt the initial value at start_addr, and trace_state the value of all registers (reg_value
1314
+ # not yet applied)
1315
+ # reg_value may be nil if used_register is not modified by the function (eg call [eax])
1316
+ # the yield return value is propagated, unless it is nil/false
1317
+ # init_state is a hash { :reg => initial value }
1318
+ def trace_function_register(start_addr, init_state)
1319
+ function_walk(start_addr, init_state) { |args|
1320
+ trace_state = args.last
1321
+ case args.first
1322
+ when :di
1323
+ di = args[2]
1324
+ update = {}
1325
+ get_fwdemu_binding(di).each { |r, v|
1326
+ if v.kind_of?(Expression) and v.externals.find { |e| trace_state[e] }
1327
+ # XXX may mix old (from trace) and current (from v) registers
1328
+ newv = v.bind(trace_state)
1329
+ update[r] = yield(di, r, newv, trace_state)
1330
+ elsif r.kind_of?(ExpressionType) and rr = r.externals.find { |e| trace_state[e] }
1331
+ # reg dereferenced in a write (eg mov [esp], 42)
1332
+ next if update.has_key?(rr) # already yielded
1333
+ if yield(di, rr, trace_state[rr], trace_state) == false
1334
+ update[rr] = false
1335
+ end
1336
+ elsif trace_state[r]
1337
+ # started on mov reg, foo
1338
+ next if di.address == start_addr
1339
+ update[r] = false
1340
+ end
1341
+ }
1342
+
1343
+ # directly walk the instruction argument list for registers not appearing in the binding
1344
+ @cpu.instr_args_memoryptr(di).each { |ind|
1345
+ b = @cpu.instr_args_memoryptr_getbase(ind)
1346
+ if b and b = b.symbolic and not update.has_key?(b)
1347
+ yield(di, b, nil, trace_state)
1348
+ end
1349
+ }
1350
+ @cpu.instr_args_regs(di).each { |r|
1351
+ r = r.symbolic
1352
+ if not update.has_key?(r)
1353
+ yield(di, r, nil, trace_state)
1354
+ end
1355
+ }
1356
+
1357
+ update.each { |r, v|
1358
+ trace_state = trace_state.dup
1359
+ if v
1360
+ # cannot follow non-registers, or we would have to emulate every single
1361
+ # instruction (try following [esp+4] across a __stdcall..)
1362
+ trace_state[r] = v if r.kind_of?(::Symbol)
1363
+ else
1364
+ trace_state.delete r
1365
+ end
1366
+ }
1367
+ when :subfunc
1368
+ faddr = args[1]
1369
+ f = @function[faddr]
1370
+ f = @function[f.backtrace_binding[:thunk]] if f and f.backtrace_binding[:thunk]
1371
+ if f
1372
+ binding = f.backtrace_binding
1373
+ if binding.empty?
1374
+ backtrace_update_function_binding(faddr)
1375
+ binding = f.backtrace_binding
1376
+ end
1377
+ # XXX fwdemu_binding ?
1378
+ binding.each { |r, v|
1379
+ if v.externals.find { |e| trace_state[e] }
1380
+ if r.kind_of?(::Symbol)
1381
+ trace_state = trace_state.dup
1382
+ trace_state[r] = Expression[v.bind(trace_state)].reduce
1383
+ end
1384
+ elsif trace_state[r]
1385
+ trace_state = trace_state.dup
1386
+ trace_state.delete r
1387
+ end
1388
+ }
1389
+ end
1390
+ when :merge
1391
+ # when merging paths, keep the smallest common state subset
1392
+ # XXX may have unexplored froms
1393
+ conflicts = args[2]
1394
+ trace_state = trace_state.dup
1395
+ conflicts.each { |addr, st|
1396
+ trace_state.delete_if { |k, v| st[k] != v }
1397
+ }
1398
+ end
1399
+ trace_state = false if trace_state.empty?
1400
+ trace_state
1401
+ }
1402
+ end
1403
+
1404
+ # define a register as a pointer to a structure
1405
+ # rename all [reg+off] as [reg+struct.member] in current function
1406
+ # also trace assignments of pointer members
1407
+ def trace_update_reg_structptr(addr, reg, structname, structoff=0)
1408
+ sname = soff = ctx = nil
1409
+ expr_to_sname = lambda { |expr|
1410
+ if not expr.kind_of?(Expression) or expr.op != :+
1411
+ sname = nil
1412
+ next
1413
+ end
1414
+
1415
+ sname = expr.lexpr || expr.rexpr
1416
+ soff = (expr.lexpr ? expr.rexpr : 0)
1417
+
1418
+ if soff.kind_of?(Expression)
1419
+ # ignore index in ptr array
1420
+ if soff.op == :* and soff.lexpr == @cpu.size/8
1421
+ soff = 0
1422
+ elsif soff.rexpr.kind_of?(Expression) and soff.rexpr.op == :* and soff.rexpr.lexpr == @cpu.size/8
1423
+ soff = soff.lexpr
1424
+ elsif soff.lexpr.kind_of?(Expression) and soff.lexpr.op == :* and soff.lexpr.lexpr == @cpu.size/8
1425
+ soff = soff.rexpr
1426
+ end
1427
+ elsif soff.kind_of?(::Symbol)
1428
+ # array with 1 byte elements / pre-scaled idx?
1429
+ if not ctx[soff]
1430
+ soff = 0
1431
+ end
1432
+ end
1433
+ }
1434
+
1435
+ lastdi = nil
1436
+ trace_function_register(addr, reg => Expression[structname, :+, structoff]) { |di, r, val, trace|
1437
+
1438
+ next if r.to_s =~ /flag/ # XXX maybe too ia32-specific?
1439
+
1440
+ ctx = trace
1441
+ @cpu.instr_args_memoryptr(di).each { |ind|
1442
+ # find the structure dereference in di
1443
+ b = @cpu.instr_args_memoryptr_getbase(ind)
1444
+ b = b.symbolic if b
1445
+ next unless trace[b]
1446
+ imm = @cpu.instr_args_memoryptr_getoffset(ind) || 0
1447
+
1448
+ # check expr has the form 'traced_struct_reg + off'
1449
+ expr_to_sname[trace[b] + imm] # Expr#+ calls Expr#reduce
1450
+ next unless sname.kind_of?(::String) and soff.kind_of?(::Integer)
1451
+ next if not st = c_parser.toplevel.struct[sname] or not st.kind_of?(C::Union)
1452
+
1453
+ # ignore lea esi, [esi+0]
1454
+ next if soff == 0 and not di.backtrace_binding.find { |k, v| v-k != 0 }
1455
+
1456
+ # TODO if trace[b] offset != 0, we had a lea reg, [struct+substruct_off], tweak str accordingly
1457
+
1458
+ # resolve struct + off into struct.membername
1459
+ str = st.name.dup
1460
+ mb = st.expand_member_offset(c_parser, soff, str)
1461
+ # patch di
1462
+ imm = imm.rexpr if imm.kind_of?(Expression) and not imm.lexpr and imm.rexpr.kind_of?(ExpressionString)
1463
+ imm = imm.expr if imm.kind_of?(ExpressionString)
1464
+ @cpu.instr_args_memoryptr_setoffset(ind, ExpressionString.new(imm, str, :structoff))
1465
+
1466
+ # check if the type is an enum/bitfield, patch instruction immediates
1467
+ trace_update_reg_structptr_arg_enum(di, ind, mb, str) if mb
1468
+ } if lastdi != di.address
1469
+ lastdi = di.address
1470
+
1471
+ next Expression[structname, :+, structoff] if di.address == addr and r == reg
1472
+
1473
+ # check if we need to trace 'r' further
1474
+ val = val.reduce_rec if val.kind_of?(Expression)
1475
+ val = Expression[val] if val.kind_of?(::String)
1476
+ case val
1477
+ when Expression
1478
+ # only trace trivial structptr+off expressions
1479
+ expr_to_sname[val]
1480
+ if sname.kind_of?(::String) and soff.kind_of?(::Integer)
1481
+ Expression[sname, :+, soff]
1482
+ end
1483
+
1484
+ when Indirection
1485
+ # di is mov reg, [ptr+struct.offset]
1486
+ # check if the target member is a pointer to a struct, if so, trace it
1487
+ expr_to_sname[val.pointer.reduce]
1488
+
1489
+ next unless sname.kind_of?(::String) and soff.kind_of?(::Integer)
1490
+
1491
+ if st = c_parser.toplevel.struct[sname] and st.kind_of?(C::Union)
1492
+ pt = st.expand_member_offset(c_parser, soff, '')
1493
+ pt = pt.untypedef if pt
1494
+ if pt.kind_of?(C::Pointer)
1495
+ tt = pt.type.untypedef
1496
+ stars = ''
1497
+ while tt.kind_of?(C::Pointer)
1498
+ stars << '*'
1499
+ tt = tt.type.untypedef
1500
+ end
1501
+ if tt.kind_of?(C::Union) and tt.name
1502
+ Expression[tt.name + stars]
1503
+ end
1504
+ end
1505
+
1506
+ elsif soff == 0 and sname[-1] == ?*
1507
+ # XXX pointer to pointer to struct
1508
+ # full C type support would be better, but harder to fit in an Expr
1509
+ Expression[sname[0...-1]]
1510
+ end
1511
+ # in other cases, stop trace
1512
+ end
1513
+ }
1514
+ end
1515
+
1516
+ # found a special member of a struct, check if we can apply
1517
+ # bitfield/enum name to other constants in the di
1518
+ def trace_update_reg_structptr_arg_enum(di, ind, mb, str)
1519
+ if ename = mb.has_attribute_var('enum') and enum = c_parser.toplevel.struct[ename] and enum.kind_of?(C::Enum)
1520
+ # handle enums: struct moo { int __attribute__((enum(bla))) fld; };
1521
+ doit = lambda { |_di|
1522
+ if num = _di.instruction.args.grep(Expression).first and num_i = num.reduce and num_i.kind_of?(::Integer)
1523
+ # handle enum values on tagged structs
1524
+ if enum.members and name = enum.members.index(num_i)
1525
+ num.lexpr = nil
1526
+ num.op = :+
1527
+ num.rexpr = ExpressionString.new(Expression[num_i], name, :enum)
1528
+ _di.add_comment "enum::#{ename}" if _di.address != di.address
1529
+ end
1530
+ end
1531
+ }
1532
+
1533
+ doit[di]
1534
+
1535
+ # mov eax, [ptr+struct.enumfield] => trace eax
1536
+ if reg = @cpu.instr_args_regs(di).find { |r| v = di.backtrace_binding[r.symbolic] and (v - ind.symbolic) == 0 }
1537
+ reg = reg.symbolic
1538
+ trace_function_register(di.address, reg => Expression[0]) { |_di, r, val, trace|
1539
+ next if r != reg and val != Expression[reg]
1540
+ doit[_di]
1541
+ val
1542
+ }
1543
+ end
1544
+
1545
+ elsif mb.untypedef.kind_of?(C::Struct)
1546
+ # handle bitfields
1547
+
1548
+ byte_off = 0
1549
+ if str =~ /\+(\d+)$/
1550
+ # test byte [bitfield+1], 0x1 => test dword [bitfield], 0x100
1551
+ # XXX little-endian only
1552
+ byte_off = $1.to_i
1553
+ str[/\+\d+$/] = ''
1554
+ end
1555
+ cmt = str.split('.')[-2, 2].join('.') if str.count('.') > 1
1556
+
1557
+ doit = lambda { |_di, add|
1558
+ if num = _di.instruction.args.grep(Expression).first and num_i = num.reduce and num_i.kind_of?(::Integer)
1559
+ # TODO handle ~num_i
1560
+ num_left = num_i << add
1561
+ s_or = []
1562
+ mb.untypedef.members.each { |mm|
1563
+ if bo = mb.bitoffsetof(c_parser, mm)
1564
+ boff, blen = bo
1565
+ if mm.name && blen == 1 && ((num_left >> boff) & 1) > 0
1566
+ s_or << mm.name
1567
+ num_left &= ~(1 << boff)
1568
+ end
1569
+ end
1570
+ }
1571
+ if s_or.first
1572
+ if num_left != 0
1573
+ s_or << ('0x%X' % num_left)
1574
+ end
1575
+ s = s_or.join('|')
1576
+ num.lexpr = nil
1577
+ num.op = :+
1578
+ num.rexpr = ExpressionString.new(Expression[num_i], s, :bitfield)
1579
+ _di.add_comment cmt if _di.address != di.address
1580
+ end
1581
+ end
1582
+ }
1583
+
1584
+ doit[di, byte_off*8]
1585
+
1586
+ if reg = @cpu.instr_args_regs(di).find { |r| v = di.backtrace_binding[r.symbolic] and (v - ind.symbolic) == 0 }
1587
+ reg = reg.symbolic
1588
+ trace_function_register(di.address, reg => Expression[0]) { |_di, r, val, trace|
1589
+ if r.kind_of?(Expression) and r.op == :&
1590
+ if r.lexpr == reg
1591
+ # test al, 42
1592
+ doit[_di, byte_off*8]
1593
+ elsif r.lexpr.kind_of?(Expression) and r.lexpr.op == :>> and r.lexpr.lexpr == reg
1594
+ # test ah, 42
1595
+ doit[_di, byte_off*8+r.lexpr.rexpr]
1596
+ end
1597
+ end
1598
+ next if r != reg and val != Expression[reg]
1599
+ doit[_di, byte_off*8]
1600
+ _di.address == di.address && r == reg ? Expression[0] : val
1601
+ }
1602
+ end
1603
+ end
1604
+ end
1605
+
1107
1606
  # change Expression display mode for current object o to display integers as char constants
1108
1607
  def toggle_expr_char(o)
1109
- return if not o.kind_of? Renderable
1608
+ return if not o.kind_of?(Renderable)
1609
+ tochars = lambda { |v|
1610
+ if v.kind_of?(::Integer)
1611
+ a = []
1612
+ vv = v.abs
1613
+ a << (vv & 0xff)
1614
+ vv >>= 8
1615
+ while vv > 0
1616
+ a << (vv & 0xff)
1617
+ vv >>= 8
1618
+ end
1619
+ if a.all? { |b| b < 0x7f }
1620
+ s = a.pack('C*').inspect.gsub("'") { '\\\'' }[1...-1]
1621
+ ExpressionString.new(v, (v > 0 ? "'#{s}'" : "-'#{s}'"), :char)
1622
+ end
1623
+ end
1624
+ }
1625
+ o.each_expr { |e|
1626
+ if e.kind_of?(Expression)
1627
+ if nr = tochars[e.rexpr]
1628
+ e.rexpr = nr
1629
+ elsif e.rexpr.kind_of?(ExpressionString) and e.rexpr.type == :char
1630
+ e.rexpr = e.rexpr.expr
1631
+ end
1632
+ if nl = tochars[e.lexpr]
1633
+ e.lexpr = nl
1634
+ elsif e.lexpr.kind_of?(ExpressionString) and e.lexpr.type == :char
1635
+ e.lexpr = e.lexpr.expr
1636
+ end
1637
+ end
1638
+ }
1639
+ end
1640
+
1641
+ def toggle_expr_dec(o)
1642
+ return if not o.kind_of?(Renderable)
1110
1643
  o.each_expr { |e|
1111
- e.render_info ||= {}
1112
- e.render_info[:char] = e.render_info[:char] ? nil : @cpu.endianness
1644
+ if e.kind_of?(Expression)
1645
+ if e.rexpr.kind_of?(::Integer)
1646
+ e.rexpr = ExpressionString.new(Expression[e.rexpr], e.rexpr.to_s, :decimal)
1647
+ elsif e.rexpr.kind_of?(ExpressionString) and e.rexpr.type == :decimal
1648
+ e.rexpr = e.rexpr.reduce
1649
+ end
1650
+ if e.lexpr.kind_of?(::Integer)
1651
+ e.lexpr = ExpressionString.new(Expression[e.lexpr], e.lexpr.to_s, :decimal)
1652
+ elsif e.lexpr.kind_of?(ExpressionString) and e.lexpr.type == :decimal
1653
+ e.lexpr = e.lexpr.reduce
1654
+ end
1655
+ end
1113
1656
  }
1114
1657
  end
1115
1658
 
@@ -1118,6 +1661,7 @@ class Disassembler
1118
1661
  def toggle_expr_offset(o)
1119
1662
  return if not o.kind_of? Renderable
1120
1663
  o.each_expr { |e|
1664
+ next unless e.kind_of?(Expression)
1121
1665
  if n = @prog_binding[e.lexpr]
1122
1666
  e.lexpr = n
1123
1667
  elsif e.lexpr.kind_of? ::Integer and n = get_label_at(e.lexpr)
@@ -1133,6 +1677,15 @@ class Disassembler
1133
1677
  }
1134
1678
  end
1135
1679
 
1680
+ # toggle all ExpressionStrings
1681
+ def toggle_expr_str(o)
1682
+ return if not o.kind_of?(Renderable)
1683
+ o.each_expr { |e|
1684
+ next unless e.kind_of?(ExpressionString)
1685
+ e.hide_str = !e.hide_str
1686
+ }
1687
+ end
1688
+
1136
1689
  # call this function on a function entrypoint if the function is in fact a __noreturn
1137
1690
  # will cut the to_subfuncret of callers
1138
1691
  def fix_noreturn(o)
@@ -1184,7 +1737,7 @@ class Disassembler
1184
1737
  # searched for in the Metasmdir/samples/dasm-plugins subdirectory if not found in cwd
1185
1738
  def load_plugin(plugin_filename)
1186
1739
  if not File.exist?(plugin_filename)
1187
- if File.exist?(plugin_filename+'.rb')
1740
+ if File.exist?(plugin_filename+'.rb')
1188
1741
  plugin_filename += '.rb'
1189
1742
  elsif defined? Metasmdir
1190
1743
  # try autocomplete
@@ -1201,7 +1754,7 @@ class Disassembler
1201
1754
  end
1202
1755
 
1203
1756
  # same as load_plugin, but hides the @gui attribute while loading, preventing the plugin do popup stuff
1204
- # this is useful when you want to load a plugin from another plugin to enhance the plugin's functionnality
1757
+ # this is useful when you want to load a plugin from another plugin to enhance the plugin's functionality
1205
1758
  # XXX this also prevents setting up kbd_callbacks etc..
1206
1759
  def load_plugin_nogui(plugin_filename)
1207
1760
  oldgui = gui
@@ -1225,7 +1778,7 @@ class Disassembler
1225
1778
  if bd2.kind_of? DecodedInstruction
1226
1779
  bd2 = bd2.backtrace_binding ||= cpu.get_backtrace_binding(bd2)
1227
1780
  end
1228
-
1781
+
1229
1782
  reduce = lambda { |e| Expression[Expression[e].reduce] }
1230
1783
 
1231
1784
  bd = {}
@@ -1276,5 +1829,31 @@ class Disassembler
1276
1829
 
1277
1830
  bd
1278
1831
  end
1832
+
1833
+ def gui_hilight_word_regexp(word)
1834
+ @cpu.gui_hilight_word_regexp(word)
1835
+ end
1836
+
1837
+ # return a C::AllocCStruct from c_parser
1838
+ # TODO handle program.class::Header.to_c_struct
1839
+ def decode_c_struct(structname, addr)
1840
+ if c_parser and edata = get_edata_at(addr)
1841
+ c_parser.decode_c_struct(structname, edata.data, edata.ptr)
1842
+ end
1843
+ end
1844
+
1845
+ def decode_c_ary(structname, addr, len)
1846
+ if c_parser and edata = get_edata_at(addr)
1847
+ c_parser.decode_c_ary(structname, len, edata.data, edata.ptr)
1848
+ end
1849
+ end
1850
+
1851
+ # find the function containing addr, and find & rename stack vars in it
1852
+ def name_local_vars(addr)
1853
+ if @cpu.respond_to?(:name_local_vars) and faddr = find_function_start(addr)
1854
+ @function[faddr] ||= DecodedFunction.new # XXX
1855
+ @cpu.name_local_vars(self, faddr)
1856
+ end
1857
+ end
1279
1858
  end
1280
1859
  end