metasm 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (235) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +1 -0
  3. data/.hgtags +3 -0
  4. data/Gemfile +1 -0
  5. data/INSTALL +61 -0
  6. data/LICENCE +458 -0
  7. data/README +29 -21
  8. data/Rakefile +10 -0
  9. data/TODO +10 -12
  10. data/doc/code_organisation.txt +2 -0
  11. data/doc/core/DynLdr.txt +247 -0
  12. data/doc/core/ExeFormat.txt +43 -0
  13. data/doc/core/Expression.txt +220 -0
  14. data/doc/core/GNUExports.txt +27 -0
  15. data/doc/core/Ia32.txt +236 -0
  16. data/doc/core/SerialStruct.txt +108 -0
  17. data/doc/core/VirtualString.txt +145 -0
  18. data/doc/core/WindowsExports.txt +61 -0
  19. data/doc/core/index.txt +1 -0
  20. data/doc/style.css +6 -3
  21. data/doc/usage/debugger.txt +327 -0
  22. data/doc/usage/index.txt +1 -0
  23. data/doc/use_cases.txt +2 -2
  24. data/metasm.gemspec +22 -0
  25. data/{lib/metasm.rb → metasm.rb} +11 -3
  26. data/{lib/metasm → metasm}/compile_c.rb +13 -7
  27. data/metasm/cpu/arc.rb +8 -0
  28. data/metasm/cpu/arc/decode.rb +425 -0
  29. data/metasm/cpu/arc/main.rb +191 -0
  30. data/metasm/cpu/arc/opcodes.rb +588 -0
  31. data/{lib/metasm → metasm/cpu}/arm.rb +7 -5
  32. data/{lib/metasm → metasm/cpu}/arm/debug.rb +2 -2
  33. data/{lib/metasm → metasm/cpu}/arm/decode.rb +13 -12
  34. data/{lib/metasm → metasm/cpu}/arm/encode.rb +23 -8
  35. data/{lib/metasm → metasm/cpu}/arm/main.rb +0 -3
  36. data/metasm/cpu/arm/opcodes.rb +324 -0
  37. data/{lib/metasm → metasm/cpu}/arm/parse.rb +25 -13
  38. data/{lib/metasm → metasm/cpu}/arm/render.rb +2 -2
  39. data/metasm/cpu/arm64.rb +15 -0
  40. data/metasm/cpu/arm64/debug.rb +38 -0
  41. data/metasm/cpu/arm64/decode.rb +289 -0
  42. data/metasm/cpu/arm64/encode.rb +41 -0
  43. data/metasm/cpu/arm64/main.rb +105 -0
  44. data/metasm/cpu/arm64/opcodes.rb +232 -0
  45. data/metasm/cpu/arm64/parse.rb +20 -0
  46. data/metasm/cpu/arm64/render.rb +95 -0
  47. data/{lib/metasm/ppc.rb → metasm/cpu/bpf.rb} +2 -4
  48. data/metasm/cpu/bpf/decode.rb +142 -0
  49. data/metasm/cpu/bpf/main.rb +60 -0
  50. data/metasm/cpu/bpf/opcodes.rb +81 -0
  51. data/metasm/cpu/bpf/render.rb +41 -0
  52. data/metasm/cpu/cy16.rb +9 -0
  53. data/metasm/cpu/cy16/decode.rb +253 -0
  54. data/metasm/cpu/cy16/main.rb +63 -0
  55. data/metasm/cpu/cy16/opcodes.rb +78 -0
  56. data/metasm/cpu/cy16/render.rb +41 -0
  57. data/metasm/cpu/dalvik.rb +11 -0
  58. data/{lib/metasm → metasm/cpu}/dalvik/decode.rb +35 -13
  59. data/{lib/metasm → metasm/cpu}/dalvik/main.rb +51 -2
  60. data/{lib/metasm → metasm/cpu}/dalvik/opcodes.rb +19 -11
  61. data/metasm/cpu/ia32.rb +17 -0
  62. data/{lib/metasm → metasm/cpu}/ia32/compile_c.rb +5 -7
  63. data/{lib/metasm → metasm/cpu}/ia32/debug.rb +5 -5
  64. data/{lib/metasm → metasm/cpu}/ia32/decode.rb +246 -59
  65. data/{lib/metasm → metasm/cpu}/ia32/decompile.rb +7 -7
  66. data/{lib/metasm → metasm/cpu}/ia32/encode.rb +19 -13
  67. data/{lib/metasm → metasm/cpu}/ia32/main.rb +51 -8
  68. data/metasm/cpu/ia32/opcodes.rb +1424 -0
  69. data/{lib/metasm → metasm/cpu}/ia32/parse.rb +47 -16
  70. data/{lib/metasm → metasm/cpu}/ia32/render.rb +31 -4
  71. data/metasm/cpu/mips.rb +14 -0
  72. data/{lib/metasm → metasm/cpu}/mips/compile_c.rb +1 -1
  73. data/metasm/cpu/mips/debug.rb +42 -0
  74. data/{lib/metasm → metasm/cpu}/mips/decode.rb +46 -16
  75. data/{lib/metasm → metasm/cpu}/mips/encode.rb +4 -3
  76. data/{lib/metasm → metasm/cpu}/mips/main.rb +11 -4
  77. data/{lib/metasm → metasm/cpu}/mips/opcodes.rb +86 -17
  78. data/{lib/metasm → metasm/cpu}/mips/parse.rb +1 -1
  79. data/{lib/metasm → metasm/cpu}/mips/render.rb +1 -1
  80. data/{lib/metasm/dalvik.rb → metasm/cpu/msp430.rb} +1 -1
  81. data/metasm/cpu/msp430/decode.rb +247 -0
  82. data/metasm/cpu/msp430/main.rb +62 -0
  83. data/metasm/cpu/msp430/opcodes.rb +101 -0
  84. data/{lib/metasm → metasm/cpu}/pic16c/decode.rb +6 -7
  85. data/{lib/metasm → metasm/cpu}/pic16c/main.rb +0 -0
  86. data/{lib/metasm → metasm/cpu}/pic16c/opcodes.rb +1 -1
  87. data/{lib/metasm/mips.rb → metasm/cpu/ppc.rb} +4 -4
  88. data/{lib/metasm → metasm/cpu}/ppc/decode.rb +18 -12
  89. data/{lib/metasm → metasm/cpu}/ppc/decompile.rb +3 -3
  90. data/{lib/metasm → metasm/cpu}/ppc/encode.rb +2 -2
  91. data/{lib/metasm → metasm/cpu}/ppc/main.rb +17 -12
  92. data/{lib/metasm → metasm/cpu}/ppc/opcodes.rb +11 -5
  93. data/metasm/cpu/ppc/parse.rb +55 -0
  94. data/metasm/cpu/python.rb +8 -0
  95. data/metasm/cpu/python/decode.rb +136 -0
  96. data/metasm/cpu/python/main.rb +36 -0
  97. data/metasm/cpu/python/opcodes.rb +180 -0
  98. data/{lib/metasm → metasm/cpu}/sh4.rb +1 -1
  99. data/{lib/metasm → metasm/cpu}/sh4/decode.rb +48 -17
  100. data/{lib/metasm → metasm/cpu}/sh4/main.rb +13 -4
  101. data/{lib/metasm → metasm/cpu}/sh4/opcodes.rb +7 -8
  102. data/metasm/cpu/x86_64.rb +15 -0
  103. data/{lib/metasm → metasm/cpu}/x86_64/compile_c.rb +28 -17
  104. data/{lib/metasm → metasm/cpu}/x86_64/debug.rb +4 -4
  105. data/{lib/metasm → metasm/cpu}/x86_64/decode.rb +57 -15
  106. data/{lib/metasm → metasm/cpu}/x86_64/encode.rb +55 -26
  107. data/{lib/metasm → metasm/cpu}/x86_64/main.rb +14 -6
  108. data/metasm/cpu/x86_64/opcodes.rb +136 -0
  109. data/{lib/metasm → metasm/cpu}/x86_64/parse.rb +10 -2
  110. data/metasm/cpu/x86_64/render.rb +35 -0
  111. data/metasm/cpu/z80.rb +9 -0
  112. data/metasm/cpu/z80/decode.rb +313 -0
  113. data/metasm/cpu/z80/main.rb +67 -0
  114. data/metasm/cpu/z80/opcodes.rb +224 -0
  115. data/metasm/cpu/z80/render.rb +59 -0
  116. data/{lib/metasm/os/main.rb → metasm/debug.rb} +160 -401
  117. data/{lib/metasm → metasm}/decode.rb +35 -4
  118. data/{lib/metasm → metasm}/decompile.rb +15 -16
  119. data/{lib/metasm → metasm}/disassemble.rb +201 -45
  120. data/{lib/metasm → metasm}/disassemble_api.rb +651 -87
  121. data/{lib/metasm → metasm}/dynldr.rb +220 -133
  122. data/{lib/metasm → metasm}/encode.rb +10 -1
  123. data/{lib/metasm → metasm}/exe_format/a_out.rb +9 -6
  124. data/{lib/metasm → metasm}/exe_format/autoexe.rb +1 -0
  125. data/{lib/metasm → metasm}/exe_format/bflt.rb +57 -27
  126. data/{lib/metasm → metasm}/exe_format/coff.rb +11 -3
  127. data/{lib/metasm → metasm}/exe_format/coff_decode.rb +53 -20
  128. data/{lib/metasm → metasm}/exe_format/coff_encode.rb +11 -13
  129. data/{lib/metasm → metasm}/exe_format/dex.rb +13 -5
  130. data/{lib/metasm → metasm}/exe_format/dol.rb +1 -0
  131. data/{lib/metasm → metasm}/exe_format/elf.rb +93 -57
  132. data/{lib/metasm → metasm}/exe_format/elf_decode.rb +143 -34
  133. data/{lib/metasm → metasm}/exe_format/elf_encode.rb +122 -31
  134. data/metasm/exe_format/gb.rb +65 -0
  135. data/metasm/exe_format/javaclass.rb +424 -0
  136. data/{lib/metasm → metasm}/exe_format/macho.rb +204 -16
  137. data/{lib/metasm → metasm}/exe_format/main.rb +26 -3
  138. data/{lib/metasm → metasm}/exe_format/mz.rb +1 -0
  139. data/{lib/metasm → metasm}/exe_format/nds.rb +7 -4
  140. data/{lib/metasm → metasm}/exe_format/pe.rb +71 -8
  141. data/metasm/exe_format/pyc.rb +167 -0
  142. data/{lib/metasm → metasm}/exe_format/serialstruct.rb +67 -14
  143. data/{lib/metasm → metasm}/exe_format/shellcode.rb +7 -3
  144. data/metasm/exe_format/shellcode_rwx.rb +114 -0
  145. data/metasm/exe_format/swf.rb +205 -0
  146. data/{lib/metasm → metasm}/exe_format/xcoff.rb +7 -7
  147. data/metasm/exe_format/zip.rb +335 -0
  148. data/metasm/gui.rb +13 -0
  149. data/{lib/metasm → metasm}/gui/cstruct.rb +35 -41
  150. data/{lib/metasm → metasm}/gui/dasm_coverage.rb +11 -11
  151. data/{lib/metasm → metasm}/gui/dasm_decomp.rb +7 -20
  152. data/{lib/metasm → metasm}/gui/dasm_funcgraph.rb +0 -0
  153. data/metasm/gui/dasm_graph.rb +1695 -0
  154. data/{lib/metasm → metasm}/gui/dasm_hex.rb +12 -8
  155. data/{lib/metasm → metasm}/gui/dasm_listing.rb +43 -28
  156. data/{lib/metasm → metasm}/gui/dasm_main.rb +310 -53
  157. data/{lib/metasm → metasm}/gui/dasm_opcodes.rb +5 -19
  158. data/{lib/metasm → metasm}/gui/debug.rb +93 -27
  159. data/{lib/metasm → metasm}/gui/gtk.rb +162 -40
  160. data/{lib/metasm → metasm}/gui/qt.rb +12 -2
  161. data/{lib/metasm → metasm}/gui/win32.rb +179 -42
  162. data/{lib/metasm → metasm}/gui/x11.rb +59 -59
  163. data/{lib/metasm → metasm}/main.rb +389 -264
  164. data/{lib/metasm/os/remote.rb → metasm/os/gdbremote.rb} +146 -54
  165. data/{lib/metasm → metasm}/os/gnu_exports.rb +1 -1
  166. data/{lib/metasm → metasm}/os/linux.rb +628 -151
  167. data/metasm/os/main.rb +330 -0
  168. data/{lib/metasm → metasm}/os/windows.rb +132 -42
  169. data/{lib/metasm → metasm}/os/windows_exports.rb +141 -0
  170. data/{lib/metasm → metasm}/parse.rb +26 -24
  171. data/{lib/metasm → metasm}/parse_c.rb +221 -116
  172. data/{lib/metasm → metasm}/preprocessor.rb +55 -40
  173. data/{lib/metasm → metasm}/render.rb +14 -38
  174. data/misc/hexdump.rb +2 -1
  175. data/misc/lint.rb +58 -0
  176. data/misc/txt2html.rb +9 -7
  177. data/samples/bindiff.rb +3 -4
  178. data/samples/dasm-plugins/bindiff.rb +15 -0
  179. data/samples/dasm-plugins/bookmark.rb +133 -0
  180. data/samples/dasm-plugins/c_constants.rb +57 -0
  181. data/samples/dasm-plugins/colortheme_solarized.rb +125 -0
  182. data/samples/dasm-plugins/cppobj_funcall.rb +60 -0
  183. data/samples/dasm-plugins/dasm_all.rb +70 -0
  184. data/samples/dasm-plugins/demangle_cpp.rb +31 -0
  185. data/samples/dasm-plugins/deobfuscate.rb +251 -0
  186. data/samples/dasm-plugins/dump_text.rb +35 -0
  187. data/samples/dasm-plugins/export_graph_svg.rb +86 -0
  188. data/samples/dasm-plugins/findgadget.rb +75 -0
  189. data/samples/dasm-plugins/hl_opcode.rb +32 -0
  190. data/samples/dasm-plugins/hotfix_gtk_dbg.rb +19 -0
  191. data/samples/dasm-plugins/imm2off.rb +34 -0
  192. data/samples/dasm-plugins/match_libsigs.rb +93 -0
  193. data/samples/dasm-plugins/patch_file.rb +95 -0
  194. data/samples/dasm-plugins/scanfuncstart.rb +36 -0
  195. data/samples/dasm-plugins/scanxrefs.rb +26 -0
  196. data/samples/dasm-plugins/selfmodify.rb +197 -0
  197. data/samples/dasm-plugins/stringsxrefs.rb +28 -0
  198. data/samples/dasmnavig.rb +1 -1
  199. data/samples/dbg-apihook.rb +24 -9
  200. data/samples/dbg-plugins/heapscan.rb +283 -0
  201. data/samples/dbg-plugins/heapscan/compiled_heapscan_lin.c +155 -0
  202. data/samples/dbg-plugins/heapscan/compiled_heapscan_win.c +128 -0
  203. data/samples/dbg-plugins/heapscan/graphheap.rb +616 -0
  204. data/samples/dbg-plugins/heapscan/heapscan.rb +709 -0
  205. data/samples/dbg-plugins/heapscan/winheap.h +174 -0
  206. data/samples/dbg-plugins/heapscan/winheap7.h +307 -0
  207. data/samples/dbg-plugins/trace_func.rb +214 -0
  208. data/samples/disassemble-gui.rb +35 -5
  209. data/samples/disassemble.rb +31 -6
  210. data/samples/dump_upx.rb +24 -12
  211. data/samples/dynamic_ruby.rb +12 -3
  212. data/samples/exeencode.rb +6 -5
  213. data/samples/factorize-headers-peimports.rb +1 -1
  214. data/samples/lindebug.rb +175 -381
  215. data/samples/metasm-shell.rb +1 -2
  216. data/samples/peldr.rb +2 -2
  217. data/tests/all.rb +1 -1
  218. data/tests/arc.rb +26 -0
  219. data/tests/dynldr.rb +22 -4
  220. data/tests/expression.rb +55 -0
  221. data/tests/graph_layout.rb +285 -0
  222. data/tests/ia32.rb +79 -26
  223. data/tests/mips.rb +9 -2
  224. data/tests/x86_64.rb +66 -18
  225. metadata +330 -218
  226. data/lib/metasm/arm/opcodes.rb +0 -177
  227. data/lib/metasm/gui.rb +0 -23
  228. data/lib/metasm/gui/dasm_graph.rb +0 -1354
  229. data/lib/metasm/ia32.rb +0 -14
  230. data/lib/metasm/ia32/opcodes.rb +0 -873
  231. data/lib/metasm/ppc/parse.rb +0 -52
  232. data/lib/metasm/x86_64.rb +0 -12
  233. data/lib/metasm/x86_64/opcodes.rb +0 -118
  234. data/samples/gdbclient.rb +0 -583
  235. data/samples/rubstop.rb +0 -399
@@ -99,6 +99,28 @@ class InstructionBlock
99
99
  yield to if type == :indirect or dasm.function[to] or not dasm.decoded[to]
100
100
  }
101
101
  end
102
+
103
+ # returns the array used in each_from_samefunc
104
+ def from_samefunc(dasm)
105
+ ary = []
106
+ each_from_samefunc(dasm) { |a| ary << a }
107
+ ary
108
+ end
109
+ def from_otherfunc(dasm)
110
+ ary = []
111
+ each_from_otherfunc(dasm) { |a| ary << a }
112
+ ary
113
+ end
114
+ def to_samefunc(dasm)
115
+ ary = []
116
+ each_to_samefunc(dasm) { |a| ary << a }
117
+ ary
118
+ end
119
+ def to_otherfunc(dasm)
120
+ ary = []
121
+ each_to_otherfunc(dasm) { |a| ary << a }
122
+ ary
123
+ end
102
124
  end
103
125
 
104
126
  class DecodedInstruction
@@ -111,44 +133,6 @@ end
111
133
  class CPU
112
134
  # compat alias, for scripts using older version of metasm
113
135
  def get_backtrace_binding(di) backtrace_binding(di) end
114
-
115
- # return something like backtrace_binding in the forward direction
116
- # set pc_reg to some reg name (eg :pc) to include effects on the instruction pointer
117
- def get_fwdemu_binding(di, pc_reg=nil)
118
- fdi = di.backtrace_binding ||= get_backtrace_binding(di)
119
- # find self-updated regs & revert them in simultaneous affectations
120
- # XXX handles only a <- a+i for now, this covers all useful cases (except imul eax, eax, 42 jz foobar)
121
- fdi.keys.grep(::Symbol).each { |s|
122
- val = Expression[fdi[s]]
123
- next if val.lexpr != s or (val.op != :+ and val.op != :-) #or not val.rexpr.kind_of? ::Integer
124
- fwd = { s => val }
125
- inv = { s => val.dup }
126
- inv[s].op = ((inv[s].op == :+) ? :- : :+)
127
- nxt = {}
128
- fdi.each { |k, v|
129
- if k == s
130
- nxt[k] = v
131
- else
132
- k = k.bind(fwd).reduce_rec if k.kind_of? Indirection
133
- nxt[k] = Expression[Expression[v].bind(inv).reduce_rec]
134
- end
135
- }
136
- fdi = nxt
137
- }
138
- if pc_reg
139
- if di.opcode.props[:setip]
140
- xr = get_xrefs_x(nil, di)
141
- if xr and xr.length == 1
142
- fdi[pc_reg] = xr[0]
143
- else
144
- fdi[:incomplete_binding] = Expression[1]
145
- end
146
- else
147
- fdi[pc_reg] = Expression[pc_reg, :+, di.bin_length]
148
- end
149
- end
150
- fdi
151
- end
152
136
  end
153
137
 
154
138
  class Disassembler
@@ -156,11 +140,16 @@ class Disassembler
156
140
  def self.backtrace_maxblocks ; @@backtrace_maxblocks ; end
157
141
  def self.backtrace_maxblocks=(b) ; @@backtrace_maxblocks = b ; end
158
142
 
159
- # returns the dasm section's edata containing addr
160
- # its #ptr points to addr
161
- # returns the 1st element of #get_section_at
162
- def get_edata_at(addr)
163
- if s = get_section_at(addr)
143
+ # adds a commentary at the given address
144
+ # comments are found in the array @comment: {addr => [list of strings]}
145
+ def add_comment(addr, cmt)
146
+ @comment[addr] ||= []
147
+ @comment[addr] |= [cmt]
148
+ end
149
+
150
+ # returns the 1st element of #get_section_at (ie the edata at a given address) or nil
151
+ def get_edata_at(*a)
152
+ if s = get_section_at(*a)
164
153
  s[0]
165
154
  end
166
155
  end
@@ -209,12 +198,12 @@ class Disassembler
209
198
 
210
199
  # yields every InstructionBlock
211
200
  # returns the list of IBlocks
212
- def each_instructionblock
201
+ def each_instructionblock(&b)
213
202
  ret = []
214
203
  @decoded.each { |addr, di|
215
204
  next if not di.kind_of? DecodedInstruction or not di.block_head?
216
205
  ret << di.block
217
- yield di.block if block_given?
206
+ b.call(di.block) if b
218
207
  }
219
208
  ret
220
209
  end
@@ -293,18 +282,19 @@ class Disassembler
293
282
 
294
283
  # returns the label associated to an addr, or nil if none exist
295
284
  def get_label_at(addr)
296
- e, b = get_section_at(addr, false)
285
+ e = get_edata_at(addr, false)
297
286
  e.inv_export[e.ptr] if e
298
287
  end
299
288
 
300
289
  # sets the label for the specified address
301
290
  # returns nil if the address is not mapped
302
291
  # memcheck is passed to get_section_at to validate that the address is mapped
303
- def set_label_at(addr, name, memcheck=true)
292
+ # keep existing label if 'overwrite' is false
293
+ def set_label_at(addr, name, memcheck=true, overwrite=true)
304
294
  addr = Expression[addr].reduce
305
295
  e, b = get_section_at(addr, memcheck)
306
296
  if not e
307
- elsif not l = e.inv_export[e.ptr]
297
+ elsif not l = e.inv_export[e.ptr] or (!overwrite and l != name)
308
298
  l = @program.new_label(name)
309
299
  e.add_export l, e.ptr
310
300
  @label_alias_cache = nil
@@ -317,7 +307,7 @@ class Disassembler
317
307
 
318
308
  # remove a label at address addr
319
309
  def del_label_at(addr, name=get_label_at(addr))
320
- ed, b = get_section_at(addr)
310
+ ed = get_edata_at(addr)
321
311
  if ed and ed.inv_export[ed.ptr]
322
312
  ed.del_export name, ed.ptr
323
313
  @label_alias_cache = nil
@@ -325,6 +315,7 @@ class Disassembler
325
315
  each_xref(addr) { |xr|
326
316
  next if not xr.origin or not o = @decoded[xr.origin] or not o.kind_of? Renderable
327
317
  o.each_expr { |e|
318
+ next unless e.kind_of?(Expression)
328
319
  e.lexpr = addr if e.lexpr == name
329
320
  e.rexpr = addr if e.rexpr == name
330
321
  }
@@ -337,12 +328,14 @@ class Disassembler
337
328
  # returns the new label
338
329
  # the new label must be program-uniq (see @program.new_label)
339
330
  def rename_label(old, new)
331
+ return new if old == new
332
+ raise "label #{new.inspect} exists" if @prog_binding[new]
340
333
  each_xref(normalize(old)) { |x|
341
334
  next if not di = @decoded[x.origin]
342
335
  @cpu.replace_instr_arg_immediate(di.instruction, old, new)
343
336
  di.comment.to_a.each { |c| c.gsub!(old, new) }
344
337
  }
345
- e, l = get_section_at(old, false)
338
+ e = get_edata_at(old, false)
346
339
  if e
347
340
  e.add_export new, e.export.delete(old), true
348
341
  end
@@ -499,12 +492,12 @@ class Disassembler
499
492
  # if from..to spans multiple blocks
500
493
  # to.block is splitted after to
501
494
  # all path from from are replaced by a single link to after 'to', be careful !
502
- # (eg a->b->... & a->c ; from in a, to in c => a->b is lost)
495
+ # (eg a->b->... & a->c ; from in a, to in c => a->b is lost)
503
496
  # all instructions are stuffed in the first block
504
497
  # paths are only walked using from/to_normal
505
498
  # 'by' may be empty
506
499
  # returns the block containing the new instrs (nil if empty)
507
- def replace_instrs(from, to, by)
500
+ def replace_instrs(from, to, by, patch_by=false)
508
501
  raise 'bad from' if not fdi = di_at(from) or not fdi.block.list.index(fdi)
509
502
  raise 'bad to' if not tdi = di_at(to) or not tdi.block.list.index(tdi)
510
503
 
@@ -520,14 +513,28 @@ class Disassembler
520
513
  wantlen -= by.grep(DecodedInstruction).inject(0) { |len, di| len + di.bin_length }
521
514
  ldi = by.last
522
515
  ldi = DecodedInstruction.new(ldi) if ldi.kind_of? Instruction
523
- wantlen = by.grep(Instruction).length if wantlen < 0 or (ldi and ldi.opcode.props[:setip])
524
- by.map! { |di|
525
- if di.kind_of? Instruction
526
- di = DecodedInstruction.new(di)
527
- wantlen -= di.bin_length = wantlen / by.grep(Instruction).length
528
- end
529
- di
530
- }
516
+ nb_i = by.grep(Instruction).length
517
+ wantlen = nb_i if wantlen < 0 or (ldi and ldi.opcode.props[:setip])
518
+ if patch_by
519
+ by.map! { |di|
520
+ if di.kind_of? Instruction
521
+ di = DecodedInstruction.new(di)
522
+ wantlen -= di.bin_length = wantlen / by.grep(Instruction).length
523
+ nb_i -= 1
524
+ end
525
+ di
526
+ }
527
+ else
528
+ by = by.map { |di|
529
+ if di.kind_of? Instruction
530
+ di = DecodedInstruction.new(di)
531
+ wantlen -= (di.bin_length = wantlen / nb_i)
532
+ nb_i -= 1
533
+ end
534
+ di
535
+ }
536
+ end
537
+
531
538
 
532
539
  #puts " ** patch next_addr to #{Expression[tb.list.last.next_addr]}" if not by.empty? and by.last.opcode.props[:saveip]
533
540
  by.last.next_addr = tb.list.last.next_addr if not by.empty? and by.last.opcode.props[:saveip]
@@ -649,8 +656,8 @@ class Disassembler
649
656
  if b1 and not b1.kind_of? InstructionBlock
650
657
  return if not b1 = block_at(b1)
651
658
  end
652
- if b2 and not b2.kind_of? InstructionBlock
653
- return if not b2 = block_at(b2)
659
+ if b2 and not b2.kind_of? InstructionBlock
660
+ return if not b2 = block_at(b2)
654
661
  end
655
662
  if b1 and b2 and (allow_nonadjacent or b1.list.last.next_addr == b2.address) and
656
663
  b1.to_normal.to_a == [b2.address] and b2.from_normal.to_a.length == 1 and # that handles delay_slot
@@ -720,17 +727,23 @@ class Disassembler
720
727
  end
721
728
 
722
729
  # returns a demangled C++ name
723
- # from wgcc-2.2.2/undecorate.cpp
724
- # TODO
725
730
  def demangle_cppname(name)
726
- ret = name
727
- if name[0] == ??
731
+ case name[0]
732
+ when ?? # MSVC
728
733
  name = name[1..-1]
729
- if name[0] == ??
730
- name = name[1..-1]
731
- op = name[0, 1]
732
- op = name[0, 2] if op == '_'
733
- if op = {
734
+ demangle_msvc(name[1..-1]) if name[0] == ??
735
+ when ?_
736
+ name = name.sub(/_GLOBAL__[ID]_/, '')
737
+ demangle_gcc(name[2..-1][/\S*/]) if name[0, 2] == '_Z'
738
+ end
739
+ end
740
+
741
+ # from wgcc-2.2.2/undecorate.cpp
742
+ # TODO
743
+ def demangle_msvc(name)
744
+ op = name[0, 1]
745
+ op = name[0, 2] if op == '_'
746
+ if op = {
734
747
  '2' => "new", '3' => "delete", '4' => "=", '5' => ">>", '6' => "<<", '7' => "!", '8' => "==", '9' => "!=",
735
748
  'A' => "[]", 'C' => "->", 'D' => "*", 'E' => "++", 'F' => "--", 'G' => "-", 'H' => "+", 'I' => "&",
736
749
  'J' => "->*", 'K' => "/", 'L' => "%", 'M' => "<", 'N' => "<=", 'O' => ">", 'P' => ">=", 'Q' => ",",
@@ -743,11 +756,157 @@ class Disassembler
743
756
  '_M' => "`eh vector destructor iterator'", '_N' => "`eh vector vbase constructor iterator'", '_O' => "`copy constructor closure'",
744
757
  '_S' => "`local vftable'", '_T' => "`local vftable constructor closure'", '_U' => "new[]", '_V' => "delete[]",
745
758
  '_X' => "`placement delete closure'", '_Y' => "`placement delete[] closure'"}[op]
746
- ret = op[0] == ?` ? op[1..-2] : "op_#{op}"
759
+ op[0] == ?` ? op[1..-2] : "op_#{op}"
760
+ end
761
+ end
762
+
763
+ # from http://www.codesourcery.com/public/cxx-abi/abi.html
764
+ def demangle_gcc(name)
765
+ subs = []
766
+ ret = ''
767
+ decode_tok = lambda {
768
+ name ||= ''
769
+ case name[0]
770
+ when nil
771
+ ret = nil
772
+ when ?N
773
+ name = name[1..-1]
774
+ decode_tok[]
775
+ until name[0] == ?E
776
+ break if not ret
777
+ ret << '::'
778
+ decode_tok[]
747
779
  end
780
+ name = name[1..-1]
781
+ when ?I
782
+ name = name[1..-1]
783
+ ret = ret[0..-3] if ret[-2, 2] == '::'
784
+ ret << '<'
785
+ decode_tok[]
786
+ until name[0] == ?E
787
+ break if not ret
788
+ ret << ', '
789
+ decode_tok[]
790
+ end
791
+ ret << ' ' if ret and ret[-1] == ?>
792
+ ret << '>' if ret
793
+ name = name[1..-1]
794
+ when ?T
795
+ case name[1]
796
+ when ?T; ret << 'vtti('
797
+ when ?V; ret << 'vtable('
798
+ when ?I; ret << 'typeinfo('
799
+ when ?S; ret << 'typename('
800
+ else ret = nil
801
+ end
802
+ name = name[2..-1].to_s
803
+ decode_tok[] if ret
804
+ ret << ')' if ret
805
+ name = name[1..-1] if name[0] == ?E
806
+ when ?C
807
+ name = name[2..-1]
808
+ base = ret[/([^:]*)(<.*|::)?$/, 1]
809
+ ret << base
810
+ when ?D
811
+ name = name[2..-1]
812
+ base = ret[/([^:]*)(<.*|::)?$/, 1]
813
+ ret << '~' << base
814
+ when ?0..?9
815
+ nr = name[/^[0-9]+/]
816
+ name = name[nr.length..-1].to_s
817
+ ret << name[0, nr.to_i]
818
+ name = name[nr.to_i..-1]
819
+ subs << ret[/[\w:]*$/]
820
+ when ?S
821
+ name = name[1..-1]
822
+ case name[0]
823
+ when ?_, ?0..?9, ?A..?Z
824
+ case name[0]
825
+ when ?_; idx = 0 ; name = name[1..-1]
826
+ when ?0..?9; idx = name[0, 1].unpack('C')[0] - 0x30 + 1 ; name = name[2..-1]
827
+ when ?A..?Z; idx = name[0, 1].unpack('C')[0] - 0x41 + 11 ; name = name[2..-1]
828
+ end
829
+ if not subs[idx]
830
+ ret = nil
831
+ else
832
+ ret << subs[idx]
833
+ end
834
+ when ?t
835
+ ret << 'std::'
836
+ name = name[1..-1]
837
+ decode_tok[]
838
+ else
839
+ std = { ?a => 'std::allocator',
840
+ ?b => 'std::basic_string',
841
+ ?s => 'std::string', # 'std::basic_string < char, std::char_traits<char>, std::allocator<char> >',
842
+ ?i => 'std::istream', # 'std::basic_istream<char, std::char_traits<char> >',
843
+ ?o => 'std::ostream', # 'std::basic_ostream<char, std::char_traits<char> >',
844
+ ?d => 'std::iostream', # 'std::basic_iostream<char, std::char_traits<char> >'
845
+ }[name[0]]
846
+ if not std
847
+ ret = nil
848
+ else
849
+ ret << std
850
+ end
851
+ name = name[1..-1]
852
+ end
853
+ when ?P, ?R, ?r, ?V, ?K
854
+ attr = { ?P => '*', ?R => '&', ?r => ' restrict', ?V => ' volatile', ?K => ' const' }[name[0]]
855
+ name = name[1..-1]
856
+ rl = ret.length
857
+ decode_tok[]
858
+ if ret
859
+ ret << attr
860
+ subs << ret[rl..-1]
861
+ end
862
+ else
863
+ if ret =~ /[(<]/ and ty = {
864
+ ?v => 'void', ?w => 'wchar_t', ?b => 'bool', ?c => 'char', ?a => 'signed char',
865
+ ?h => 'unsigned char', ?s => 'short', ?t => 'unsigned short', ?i => 'int',
866
+ ?j => 'unsigned int', ?l => 'long', ?m => 'unsigned long', ?x => '__int64',
867
+ ?y => 'unsigned __int64', ?n => '__int128', ?o => 'unsigned __int128', ?f => 'float',
868
+ ?d => 'double', ?e => 'long double', ?g => '__float128', ?z => '...'
869
+ }[name[0]]
870
+ name = name[1..-1]
871
+ ret << ty
872
+ else
873
+ fu = name[0, 2]
874
+ name = name[2..-1]
875
+ if op = {
876
+ 'nw' => ' new', 'na' => ' new[]', 'dl' => ' delete', 'da' => ' delete[]',
877
+ 'ps' => '+', 'ng' => '-', 'ad' => '&', 'de' => '*', 'co' => '~', 'pl' => '+',
878
+ 'mi' => '-', 'ml' => '*', 'dv' => '/', 'rm' => '%', 'an' => '&', 'or' => '|',
879
+ 'eo' => '^', 'aS' => '=', 'pL' => '+=', 'mI' => '-=', 'mL' => '*=', 'dV' => '/=',
880
+ 'rM' => '%=', 'aN' => '&=', 'oR' => '|=', 'eO' => '^=', 'ls' => '<<', 'rs' => '>>',
881
+ 'lS' => '<<=', 'rS' => '>>=', 'eq' => '==', 'ne' => '!=', 'lt' => '<', 'gt' => '>',
882
+ 'le' => '<=', 'ge' => '>=', 'nt' => '!', 'aa' => '&&', 'oo' => '||', 'pp' => '++',
883
+ 'mm' => '--', 'cm' => ',', 'pm' => '->*', 'pt' => '->', 'cl' => '()', 'ix' => '[]',
884
+ 'qu' => '?', 'st' => ' sizeof', 'sz' => ' sizeof', 'at' => ' alignof', 'az' => ' alignof'
885
+ }[fu]
886
+ ret << "operator#{op}"
887
+ elsif fu == 'cv'
888
+ ret << "cast<"
889
+ decode_tok[]
890
+ ret << ">" if ret
891
+ else
892
+ ret = nil
893
+ end
894
+ end
895
+ end
896
+ name ||= ''
897
+ }
898
+
899
+ decode_tok[]
900
+ subs.pop
901
+ if ret and name != ''
902
+ ret << '('
903
+ decode_tok[]
904
+ while ret and name != ''
905
+ ret << ', '
906
+ decode_tok[]
748
907
  end
908
+ ret << ')' if ret
749
909
  end
750
- # TODO
751
910
  ret
752
911
  end
753
912
 
@@ -755,7 +914,8 @@ class Disassembler
755
914
  # return/yields all the addresses matching
756
915
  # if yield returns nil/false, do not include the addr in the final result
757
916
  # sections are scanned MB by MB, so this should work (slowly) on 4GB sections (eg debugger VM)
758
- def pattern_scan(pat, chunksz=nil, margin=nil)
917
+ # with addr_start/length, symbol-based section are skipped
918
+ def pattern_scan(pat, addr_start=nil, length=nil, chunksz=nil, margin=nil, &b)
759
919
  chunksz ||= 4*1024*1024 # scan 4MB at a time
760
920
  margin ||= 65536 # add this much bytes at each chunk to find /pat/ over chunk boundaries
761
921
 
@@ -763,9 +923,27 @@ class Disassembler
763
923
 
764
924
  found = []
765
925
  @sections.each { |sec_addr, e|
926
+ if addr_start
927
+ length ||= 0x1000_0000
928
+ begin
929
+ if sec_addr < addr_start
930
+ next if sec_addr+e.length <= addr_start
931
+ e = e[addr_start-sec_addr, e.length]
932
+ sec_addr = addr_start
933
+ end
934
+ if sec_addr+e.length > addr_start+length
935
+ next if sec_addr > addr_start+length
936
+ e = e[0, sec_addr+e.length-(addr_start+length)]
937
+ end
938
+ rescue
939
+ puts $!, $!.message, $!.backtrace if $DEBUG
940
+ # catch arithmetic error with symbol-based section
941
+ next
942
+ end
943
+ end
766
944
  e.pattern_scan(pat, chunksz, margin) { |eo|
767
945
  match_addr = sec_addr + eo
768
- found << match_addr if not block_given? or yield(match_addr)
946
+ found << match_addr if not b or b.call(match_addr)
769
947
  false
770
948
  }
771
949
  }
@@ -773,14 +951,14 @@ class Disassembler
773
951
  end
774
952
 
775
953
  # returns/yields [addr, string] found using pattern_scan /[\x20-\x7e]/
776
- def strings_scan(minlen=6)
954
+ def strings_scan(minlen=6, &b)
777
955
  ret = []
778
956
  nexto = 0
779
957
  pattern_scan(/[\x20-\x7e]{#{minlen},}/m, nil, 1024) { |o|
780
958
  if o - nexto > 0
781
959
  next unless e = get_edata_at(o)
782
960
  str = e.data[e.ptr, 1024][/[\x20-\x7e]{#{minlen},}/m]
783
- ret << [o, str] if not block_given? or yield(o, str)
961
+ ret << [o, str] if not b or b.call(o, str)
784
962
  nexto = o + str.length
785
963
  end
786
964
  }
@@ -805,18 +983,24 @@ class Disassembler
805
983
  def load_map(str, off=0)
806
984
  str = File.read(str) rescue nil if not str.index("\n")
807
985
  sks = @sections.keys.sort
986
+ seen = {}
808
987
  str.each_line { |l|
809
988
  case l.strip
810
989
  when /^([0-9A-F]+)\s+(\w+)\s+(\w+)/i # kernel.map style
811
- set_label_at($1.to_i(16)+off, $3)
990
+ addr = $1.to_i(16)+off
991
+ set_label_at(addr, $3, false, !seen[addr])
992
+ seen[addr] = true
812
993
  when /^([0-9A-F]+):([0-9A-F]+)\s+([a-z_]\w+)/i # IDA style
813
994
  # we do not have section load order, let's just hope that the addresses are sorted (and sortable..)
814
995
  # could check the 1st part of the file, with section sizes, but it is not very convenient
815
996
  # the regexp is so that we skip the 1st part with section descriptions
816
997
  # in the file, section 1 is the 1st section ; we have an additionnal section (exe header) which fixes the 0-index
817
- set_label_at(sks[$1.to_i(16)] + $2.to_i(16) + off, $3)
998
+ # XXX this is PE-specific, TODO fix it for ELF (ida references sections, we reference segments...)
999
+ addr = sks[$1.to_i(16)] + $2.to_i(16) + off
1000
+ set_label_at(addr, $3, false, !seen[addr])
1001
+ seen[addr] = true
818
1002
  end
819
- }
1003
+ }
820
1004
  end
821
1005
 
822
1006
  # saves the dasm state in a file
@@ -830,13 +1014,14 @@ class Disassembler
830
1014
  def save_io(fd)
831
1015
  fd.puts 'Metasm.dasm'
832
1016
 
833
- if @program.filename
1017
+ if @program.filename and not @program.kind_of?(Shellcode)
834
1018
  t = @program.filename.to_s
835
1019
  fd.puts "binarypath #{t.length}", t
836
1020
  else
837
1021
  t = "#{@cpu.class.name.sub(/.*::/, '')} #{@cpu.size} #{@cpu.endianness}"
838
1022
  fd.puts "cpu #{t.length}", t
839
1023
  # XXX will be reloaded as a Shellcode with this CPU, but it may be a custom EXE
1024
+ # do not output binarypath, we'll be loaded as a Shellcode, 'section' will suffice
840
1025
  end
841
1026
 
842
1027
  @sections.each { |a, e|
@@ -942,6 +1127,7 @@ class Disassembler
942
1127
  reinitialize Shellcode.new(cpu)
943
1128
  @program.disassembler = self
944
1129
  @program.init_disassembler
1130
+ @sections.delete(0) # rm empty section at 0, other real 'section' follow
945
1131
  when 'section'
946
1132
  info = data[0, data.index("\n") || data.length]
947
1133
  data = data[info.length, data.length]
@@ -1030,7 +1216,7 @@ class Disassembler
1030
1216
  len = (len != '' ? len.to_i : nil)
1031
1217
  o = (o.to_s != '' ? Expression.parse(pp.feed!(o)).reduce : nil) # :default/:unknown ?
1032
1218
  add_xref(a, Xref.new(t, o, len))
1033
- rescue
1219
+ rescue
1034
1220
  puts "load: bad xref #{l.inspect} #$!" if $VERBOSE
1035
1221
  end
1036
1222
  }
@@ -1104,12 +1290,354 @@ class Disassembler
1104
1290
  delta
1105
1291
  end
1106
1292
 
1293
+ # dataflow method
1294
+ # walks a function, starting at addr
1295
+ # follows the usage of registers, computing the evolution from the value they had at start_addr
1296
+ # whenever an instruction references the register (or anything derived from it),
1297
+ # yield [di, used_register, reg_value, trace_state] where reg_value is the Expression holding the value of
1298
+ # the register wrt the initial value at start_addr, and trace_state the value of all registers (reg_value
1299
+ # not yet applied)
1300
+ # reg_value may be nil if used_register is not modified by the function (eg call [eax])
1301
+ # the yield return value is propagated, unless it is nil/false
1302
+ # init_state is a hash { :reg => initial value }
1303
+ def trace_function_register(start_addr, init_state)
1304
+ function_walk(start_addr, init_state) { |args|
1305
+ trace_state = args.last
1306
+ case args.first
1307
+ when :di
1308
+ di = args[2]
1309
+ update = {}
1310
+ get_fwdemu_binding(di).each { |r, v|
1311
+ if v.kind_of?(Expression) and v.externals.find { |e| trace_state[e] }
1312
+ # XXX may mix old (from trace) and current (from v) registers
1313
+ newv = v.bind(trace_state)
1314
+ update[r] = yield(di, r, newv, trace_state)
1315
+ elsif r.kind_of?(ExpressionType) and rr = r.externals.find { |e| trace_state[e] }
1316
+ # reg dereferenced in a write (eg mov [esp], 42)
1317
+ next if update.has_key?(rr) # already yielded
1318
+ if yield(di, rr, trace_state[rr], trace_state) == false
1319
+ update[rr] = false
1320
+ end
1321
+ elsif trace_state[r]
1322
+ # started on mov reg, foo
1323
+ next if di.address == start_addr
1324
+ update[r] = false
1325
+ end
1326
+ }
1327
+
1328
+ # directly walk the instruction argument list for registers not appearing in the binding
1329
+ @cpu.instr_args_memoryptr(di).each { |ind|
1330
+ b = @cpu.instr_args_memoryptr_getbase(ind)
1331
+ if b and b = b.symbolic and not update.has_key?(b)
1332
+ yield(di, b, nil, trace_state)
1333
+ end
1334
+ }
1335
+ @cpu.instr_args_regs(di).each { |r|
1336
+ r = r.symbolic
1337
+ if not update.has_key?(r)
1338
+ yield(di, r, nil, trace_state)
1339
+ end
1340
+ }
1341
+
1342
+ update.each { |r, v|
1343
+ trace_state = trace_state.dup
1344
+ if v
1345
+ # cannot follow non-registers, or we would have to emulate every single
1346
+ # instruction (try following [esp+4] across a __stdcall..)
1347
+ trace_state[r] = v if r.kind_of?(::Symbol)
1348
+ else
1349
+ trace_state.delete r
1350
+ end
1351
+ }
1352
+ when :subfunc
1353
+ faddr = args[1]
1354
+ f = @function[faddr]
1355
+ f = @function[f.backtrace_binding[:thunk]] if f and f.backtrace_binding[:thunk]
1356
+ if f
1357
+ binding = f.backtrace_binding
1358
+ if binding.empty?
1359
+ backtrace_update_function_binding(faddr)
1360
+ binding = f.backtrace_binding
1361
+ end
1362
+ # XXX fwdemu_binding ?
1363
+ binding.each { |r, v|
1364
+ if v.externals.find { |e| trace_state[e] }
1365
+ if r.kind_of?(::Symbol)
1366
+ trace_state = trace_state.dup
1367
+ trace_state[r] = Expression[v.bind(trace_state)].reduce
1368
+ end
1369
+ elsif trace_state[r]
1370
+ trace_state = trace_state.dup
1371
+ trace_state.delete r
1372
+ end
1373
+ }
1374
+ end
1375
+ when :merge
1376
+ # when merging paths, keep the smallest common state subset
1377
+ # XXX may have unexplored froms
1378
+ conflicts = args[2]
1379
+ trace_state = trace_state.dup
1380
+ conflicts.each { |addr, st|
1381
+ trace_state.delete_if { |k, v| st[k] != v }
1382
+ }
1383
+ end
1384
+ trace_state = false if trace_state.empty?
1385
+ trace_state
1386
+ }
1387
+ end
1388
+
1389
+ # define a register as a pointer to a structure
1390
+ # rename all [reg+off] as [reg+struct.member] in current function
1391
+ # also trace assignments of pointer members
1392
+ def trace_update_reg_structptr(addr, reg, structname, structoff=0)
1393
+ sname = soff = ctx = nil
1394
+ expr_to_sname = lambda { |expr|
1395
+ if not expr.kind_of?(Expression) or expr.op != :+
1396
+ sname = nil
1397
+ next
1398
+ end
1399
+
1400
+ sname = expr.lexpr || expr.rexpr
1401
+ soff = (expr.lexpr ? expr.rexpr : 0)
1402
+
1403
+ if soff.kind_of?(Expression)
1404
+ # ignore index in ptr array
1405
+ if soff.op == :* and soff.lexpr == @cpu.size/8
1406
+ soff = 0
1407
+ elsif soff.rexpr.kind_of?(Expression) and soff.rexpr.op == :* and soff.rexpr.lexpr == @cpu.size/8
1408
+ soff = soff.lexpr
1409
+ elsif soff.lexpr.kind_of?(Expression) and soff.lexpr.op == :* and soff.lexpr.lexpr == @cpu.size/8
1410
+ soff = soff.rexpr
1411
+ end
1412
+ elsif soff.kind_of?(::Symbol)
1413
+ # array with 1 byte elements / pre-scaled idx?
1414
+ if not ctx[soff]
1415
+ soff = 0
1416
+ end
1417
+ end
1418
+ }
1419
+
1420
+ lastdi = nil
1421
+ trace_function_register(addr, reg => Expression[structname, :+, structoff]) { |di, r, val, trace|
1422
+
1423
+ next if r.to_s =~ /flag/ # XXX maybe too ia32-specific?
1424
+
1425
+ ctx = trace
1426
+ @cpu.instr_args_memoryptr(di).each { |ind|
1427
+ # find the structure dereference in di
1428
+ b = @cpu.instr_args_memoryptr_getbase(ind)
1429
+ b = b.symbolic if b
1430
+ next unless trace[b]
1431
+ imm = @cpu.instr_args_memoryptr_getoffset(ind) || 0
1432
+
1433
+ # check expr has the form 'traced_struct_reg + off'
1434
+ expr_to_sname[trace[b] + imm] # Expr#+ calls Expr#reduce
1435
+ next unless sname.kind_of?(::String) and soff.kind_of?(::Integer)
1436
+ next if not st = c_parser.toplevel.struct[sname] or not st.kind_of?(C::Union)
1437
+
1438
+ # ignore lea esi, [esi+0]
1439
+ next if soff == 0 and not di.backtrace_binding.find { |k, v| v-k != 0 }
1440
+
1441
+ # TODO if trace[b] offset != 0, we had a lea reg, [struct+substruct_off], tweak str accordingly
1442
+
1443
+ # resolve struct + off into struct.membername
1444
+ str = st.name.dup
1445
+ mb = st.expand_member_offset(c_parser, soff, str)
1446
+ # patch di
1447
+ imm = imm.rexpr if imm.kind_of?(Expression) and not imm.lexpr and imm.rexpr.kind_of?(ExpressionString)
1448
+ imm = imm.expr if imm.kind_of?(ExpressionString)
1449
+ @cpu.instr_args_memoryptr_setoffset(ind, ExpressionString.new(imm, str, :structoff))
1450
+
1451
+ # check if the type is an enum/bitfield, patch instruction immediates
1452
+ trace_update_reg_structptr_arg_enum(di, ind, mb, str) if mb
1453
+ } if lastdi != di.address
1454
+ lastdi = di.address
1455
+
1456
+ next Expression[structname, :+, structoff] if di.address == addr and r == reg
1457
+
1458
+ # check if we need to trace 'r' further
1459
+ val = val.reduce_rec if val.kind_of?(Expression)
1460
+ val = Expression[val] if val.kind_of?(::String)
1461
+ case val
1462
+ when Expression
1463
+ # only trace trivial structptr+off expressions
1464
+ expr_to_sname[val]
1465
+ if sname.kind_of?(::String) and soff.kind_of?(::Integer)
1466
+ Expression[sname, :+, soff]
1467
+ end
1468
+
1469
+ when Indirection
1470
+ # di is mov reg, [ptr+struct.offset]
1471
+ # check if the target member is a pointer to a struct, if so, trace it
1472
+ expr_to_sname[val.pointer.reduce]
1473
+
1474
+ next unless sname.kind_of?(::String) and soff.kind_of?(::Integer)
1475
+
1476
+ if st = c_parser.toplevel.struct[sname] and st.kind_of?(C::Union)
1477
+ pt = st.expand_member_offset(c_parser, soff, '')
1478
+ pt = pt.untypedef if pt
1479
+ if pt.kind_of?(C::Pointer)
1480
+ tt = pt.type.untypedef
1481
+ stars = ''
1482
+ while tt.kind_of?(C::Pointer)
1483
+ stars << '*'
1484
+ tt = tt.type.untypedef
1485
+ end
1486
+ if tt.kind_of?(C::Union) and tt.name
1487
+ Expression[tt.name + stars]
1488
+ end
1489
+ end
1490
+
1491
+ elsif soff == 0 and sname[-1] == ?*
1492
+ # XXX pointer to pointer to struct
1493
+ # full C type support would be better, but harder to fit in an Expr
1494
+ Expression[sname[0...-1]]
1495
+ end
1496
+ # in other cases, stop trace
1497
+ end
1498
+ }
1499
+ end
1500
+
1501
+ # found a special member of a struct, check if we can apply
1502
+ # bitfield/enum name to other constants in the di
1503
+ def trace_update_reg_structptr_arg_enum(di, ind, mb, str)
1504
+ if ename = mb.has_attribute_var('enum') and enum = c_parser.toplevel.struct[ename] and enum.kind_of?(C::Enum)
1505
+ # handle enums: struct moo { int __attribute__((enum(bla))) fld; };
1506
+ doit = lambda { |_di|
1507
+ if num = _di.instruction.args.grep(Expression).first and num_i = num.reduce and num_i.kind_of?(::Integer)
1508
+ # handle enum values on tagged structs
1509
+ if enum.members and name = enum.members.index(num_i)
1510
+ num.lexpr = nil
1511
+ num.op = :+
1512
+ num.rexpr = ExpressionString.new(Expression[num_i], name, :enum)
1513
+ _di.add_comment "enum::#{ename}" if _di.address != di.address
1514
+ end
1515
+ end
1516
+ }
1517
+
1518
+ doit[di]
1519
+
1520
+ # mov eax, [ptr+struct.enumfield] => trace eax
1521
+ if reg = @cpu.instr_args_regs(di).find { |r| v = di.backtrace_binding[r.symbolic] and (v - ind.symbolic) == 0 }
1522
+ reg = reg.symbolic
1523
+ trace_function_register(di.address, reg => Expression[0]) { |_di, r, val, trace|
1524
+ next if r != reg and val != Expression[reg]
1525
+ doit[_di]
1526
+ val
1527
+ }
1528
+ end
1529
+
1530
+ elsif mb.untypedef.kind_of?(C::Struct)
1531
+ # handle bitfields
1532
+
1533
+ byte_off = 0
1534
+ if str =~ /\+(\d+)$/
1535
+ # test byte [bitfield+1], 0x1 => test dword [bitfield], 0x100
1536
+ # XXX little-endian only
1537
+ byte_off = $1.to_i
1538
+ str[/\+\d+$/] = ''
1539
+ end
1540
+ cmt = str.split('.')[-2, 2].join('.') if str.count('.') > 1
1541
+
1542
+ doit = lambda { |_di, add|
1543
+ if num = _di.instruction.args.grep(Expression).first and num_i = num.reduce and num_i.kind_of?(::Integer)
1544
+ # TODO handle ~num_i
1545
+ num_left = num_i << add
1546
+ s_or = []
1547
+ mb.untypedef.members.each { |mm|
1548
+ if bo = mb.bitoffsetof(c_parser, mm)
1549
+ boff, blen = bo
1550
+ if mm.name && blen == 1 && ((num_left >> boff) & 1) > 0
1551
+ s_or << mm.name
1552
+ num_left &= ~(1 << boff)
1553
+ end
1554
+ end
1555
+ }
1556
+ if s_or.first
1557
+ if num_left != 0
1558
+ s_or << ('0x%X' % num_left)
1559
+ end
1560
+ s = s_or.join('|')
1561
+ num.lexpr = nil
1562
+ num.op = :+
1563
+ num.rexpr = ExpressionString.new(Expression[num_i], s, :bitfield)
1564
+ _di.add_comment cmt if _di.address != di.address
1565
+ end
1566
+ end
1567
+ }
1568
+
1569
+ doit[di, byte_off*8]
1570
+
1571
+ if reg = @cpu.instr_args_regs(di).find { |r| v = di.backtrace_binding[r.symbolic] and (v - ind.symbolic) == 0 }
1572
+ reg = reg.symbolic
1573
+ trace_function_register(di.address, reg => Expression[0]) { |_di, r, val, trace|
1574
+ if r.kind_of?(Expression) and r.op == :&
1575
+ if r.lexpr == reg
1576
+ # test al, 42
1577
+ doit[_di, byte_off*8]
1578
+ elsif r.lexpr.kind_of?(Expression) and r.lexpr.op == :>> and r.lexpr.lexpr == reg
1579
+ # test ah, 42
1580
+ doit[_di, byte_off*8+r.lexpr.rexpr]
1581
+ end
1582
+ end
1583
+ next if r != reg and val != Expression[reg]
1584
+ doit[_di, byte_off*8]
1585
+ _di.address == di.address && r == reg ? Expression[0] : val
1586
+ }
1587
+ end
1588
+ end
1589
+ end
1590
+
1107
1591
  # change Expression display mode for current object o to display integers as char constants
1108
1592
  def toggle_expr_char(o)
1109
- return if not o.kind_of? Renderable
1593
+ return if not o.kind_of?(Renderable)
1594
+ tochars = lambda { |v|
1595
+ if v.kind_of?(::Integer)
1596
+ a = []
1597
+ vv = v.abs
1598
+ a << (vv & 0xff)
1599
+ vv >>= 8
1600
+ while vv > 0
1601
+ a << (vv & 0xff)
1602
+ vv >>= 8
1603
+ end
1604
+ if a.all? { |b| b < 0x7f }
1605
+ s = a.pack('C*').inspect.gsub("'") { '\\\'' }[1...-1]
1606
+ ExpressionString.new(v, (v > 0 ? "'#{s}'" : "-'#{s}'"), :char)
1607
+ end
1608
+ end
1609
+ }
1110
1610
  o.each_expr { |e|
1111
- e.render_info ||= {}
1112
- e.render_info[:char] = e.render_info[:char] ? nil : @cpu.endianness
1611
+ if e.kind_of?(Expression)
1612
+ if nr = tochars[e.rexpr]
1613
+ e.rexpr = nr
1614
+ elsif e.rexpr.kind_of?(ExpressionString) and e.rexpr.type == :char
1615
+ e.rexpr = e.rexpr.expr
1616
+ end
1617
+ if nl = tochars[e.lexpr]
1618
+ e.lexpr = nl
1619
+ elsif e.lexpr.kind_of?(ExpressionString) and e.lexpr.type == :char
1620
+ e.lexpr = e.lexpr.expr
1621
+ end
1622
+ end
1623
+ }
1624
+ end
1625
+
1626
+ def toggle_expr_dec(o)
1627
+ return if not o.kind_of?(Renderable)
1628
+ o.each_expr { |e|
1629
+ if e.kind_of?(Expression)
1630
+ if e.rexpr.kind_of?(::Integer)
1631
+ e.rexpr = ExpressionString.new(Expression[e.rexpr], e.rexpr.to_s, :decimal)
1632
+ elsif e.rexpr.kind_of?(ExpressionString) and e.rexpr.type == :decimal
1633
+ e.rexpr = e.rexpr.reduce
1634
+ end
1635
+ if e.lexpr.kind_of?(::Integer)
1636
+ e.lexpr = ExpressionString.new(Expression[e.lexpr], e.lexpr.to_s, :decimal)
1637
+ elsif e.lexpr.kind_of?(ExpressionString) and e.lexpr.type == :decimal
1638
+ e.lexpr = e.lexpr.reduce
1639
+ end
1640
+ end
1113
1641
  }
1114
1642
  end
1115
1643
 
@@ -1118,6 +1646,7 @@ class Disassembler
1118
1646
  def toggle_expr_offset(o)
1119
1647
  return if not o.kind_of? Renderable
1120
1648
  o.each_expr { |e|
1649
+ next unless e.kind_of?(Expression)
1121
1650
  if n = @prog_binding[e.lexpr]
1122
1651
  e.lexpr = n
1123
1652
  elsif e.lexpr.kind_of? ::Integer and n = get_label_at(e.lexpr)
@@ -1133,6 +1662,15 @@ class Disassembler
1133
1662
  }
1134
1663
  end
1135
1664
 
1665
+ # toggle all ExpressionStrings
1666
+ def toggle_expr_str(o)
1667
+ return if not o.kind_of?(Renderable)
1668
+ o.each_expr { |e|
1669
+ next unless e.kind_of?(ExpressionString)
1670
+ e.hide_str = !e.hide_str
1671
+ }
1672
+ end
1673
+
1136
1674
  # call this function on a function entrypoint if the function is in fact a __noreturn
1137
1675
  # will cut the to_subfuncret of callers
1138
1676
  def fix_noreturn(o)
@@ -1184,7 +1722,7 @@ class Disassembler
1184
1722
  # searched for in the Metasmdir/samples/dasm-plugins subdirectory if not found in cwd
1185
1723
  def load_plugin(plugin_filename)
1186
1724
  if not File.exist?(plugin_filename)
1187
- if File.exist?(plugin_filename+'.rb')
1725
+ if File.exist?(plugin_filename+'.rb')
1188
1726
  plugin_filename += '.rb'
1189
1727
  elsif defined? Metasmdir
1190
1728
  # try autocomplete
@@ -1225,7 +1763,7 @@ class Disassembler
1225
1763
  if bd2.kind_of? DecodedInstruction
1226
1764
  bd2 = bd2.backtrace_binding ||= cpu.get_backtrace_binding(bd2)
1227
1765
  end
1228
-
1766
+
1229
1767
  reduce = lambda { |e| Expression[Expression[e].reduce] }
1230
1768
 
1231
1769
  bd = {}
@@ -1276,5 +1814,31 @@ class Disassembler
1276
1814
 
1277
1815
  bd
1278
1816
  end
1817
+
1818
+ def gui_hilight_word_regexp(word)
1819
+ @cpu.gui_hilight_word_regexp(word)
1820
+ end
1821
+
1822
+ # return a C::AllocCStruct from c_parser
1823
+ # TODO handle program.class::Header.to_c_struct
1824
+ def decode_c_struct(structname, addr)
1825
+ if c_parser and edata = get_edata_at(addr)
1826
+ c_parser.decode_c_struct(structname, edata.data, edata.ptr)
1827
+ end
1828
+ end
1829
+
1830
+ def decode_c_ary(structname, addr, len)
1831
+ if c_parser and edata = get_edata_at(addr)
1832
+ c_parser.decode_c_ary(structname, len, edata.data, edata.ptr)
1833
+ end
1834
+ end
1835
+
1836
+ # find the function containing addr, and find & rename stack vars in it
1837
+ def name_local_vars(addr)
1838
+ if @cpu.respond_to?(:name_local_vars) and faddr = find_function_start(addr)
1839
+ @function[faddr] ||= DecodedFunction.new # XXX
1840
+ @cpu.name_local_vars(self, faddr)
1841
+ end
1842
+ end
1279
1843
  end
1280
1844
  end