metasm 1.0.1 → 1.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (235) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +1 -0
  3. data/.hgtags +3 -0
  4. data/Gemfile +1 -0
  5. data/INSTALL +61 -0
  6. data/LICENCE +458 -0
  7. data/README +29 -21
  8. data/Rakefile +10 -0
  9. data/TODO +10 -12
  10. data/doc/code_organisation.txt +2 -0
  11. data/doc/core/DynLdr.txt +247 -0
  12. data/doc/core/ExeFormat.txt +43 -0
  13. data/doc/core/Expression.txt +220 -0
  14. data/doc/core/GNUExports.txt +27 -0
  15. data/doc/core/Ia32.txt +236 -0
  16. data/doc/core/SerialStruct.txt +108 -0
  17. data/doc/core/VirtualString.txt +145 -0
  18. data/doc/core/WindowsExports.txt +61 -0
  19. data/doc/core/index.txt +1 -0
  20. data/doc/style.css +6 -3
  21. data/doc/usage/debugger.txt +327 -0
  22. data/doc/usage/index.txt +1 -0
  23. data/doc/use_cases.txt +2 -2
  24. data/metasm.gemspec +22 -0
  25. data/{lib/metasm.rb → metasm.rb} +11 -3
  26. data/{lib/metasm → metasm}/compile_c.rb +13 -7
  27. data/metasm/cpu/arc.rb +8 -0
  28. data/metasm/cpu/arc/decode.rb +425 -0
  29. data/metasm/cpu/arc/main.rb +191 -0
  30. data/metasm/cpu/arc/opcodes.rb +588 -0
  31. data/{lib/metasm → metasm/cpu}/arm.rb +7 -5
  32. data/{lib/metasm → metasm/cpu}/arm/debug.rb +2 -2
  33. data/{lib/metasm → metasm/cpu}/arm/decode.rb +13 -12
  34. data/{lib/metasm → metasm/cpu}/arm/encode.rb +23 -8
  35. data/{lib/metasm → metasm/cpu}/arm/main.rb +0 -3
  36. data/metasm/cpu/arm/opcodes.rb +324 -0
  37. data/{lib/metasm → metasm/cpu}/arm/parse.rb +25 -13
  38. data/{lib/metasm → metasm/cpu}/arm/render.rb +2 -2
  39. data/metasm/cpu/arm64.rb +15 -0
  40. data/metasm/cpu/arm64/debug.rb +38 -0
  41. data/metasm/cpu/arm64/decode.rb +289 -0
  42. data/metasm/cpu/arm64/encode.rb +41 -0
  43. data/metasm/cpu/arm64/main.rb +105 -0
  44. data/metasm/cpu/arm64/opcodes.rb +232 -0
  45. data/metasm/cpu/arm64/parse.rb +20 -0
  46. data/metasm/cpu/arm64/render.rb +95 -0
  47. data/{lib/metasm/ppc.rb → metasm/cpu/bpf.rb} +2 -4
  48. data/metasm/cpu/bpf/decode.rb +142 -0
  49. data/metasm/cpu/bpf/main.rb +60 -0
  50. data/metasm/cpu/bpf/opcodes.rb +81 -0
  51. data/metasm/cpu/bpf/render.rb +41 -0
  52. data/metasm/cpu/cy16.rb +9 -0
  53. data/metasm/cpu/cy16/decode.rb +253 -0
  54. data/metasm/cpu/cy16/main.rb +63 -0
  55. data/metasm/cpu/cy16/opcodes.rb +78 -0
  56. data/metasm/cpu/cy16/render.rb +41 -0
  57. data/metasm/cpu/dalvik.rb +11 -0
  58. data/{lib/metasm → metasm/cpu}/dalvik/decode.rb +35 -13
  59. data/{lib/metasm → metasm/cpu}/dalvik/main.rb +51 -2
  60. data/{lib/metasm → metasm/cpu}/dalvik/opcodes.rb +19 -11
  61. data/metasm/cpu/ia32.rb +17 -0
  62. data/{lib/metasm → metasm/cpu}/ia32/compile_c.rb +5 -7
  63. data/{lib/metasm → metasm/cpu}/ia32/debug.rb +5 -5
  64. data/{lib/metasm → metasm/cpu}/ia32/decode.rb +246 -59
  65. data/{lib/metasm → metasm/cpu}/ia32/decompile.rb +7 -7
  66. data/{lib/metasm → metasm/cpu}/ia32/encode.rb +19 -13
  67. data/{lib/metasm → metasm/cpu}/ia32/main.rb +51 -8
  68. data/metasm/cpu/ia32/opcodes.rb +1424 -0
  69. data/{lib/metasm → metasm/cpu}/ia32/parse.rb +47 -16
  70. data/{lib/metasm → metasm/cpu}/ia32/render.rb +31 -4
  71. data/metasm/cpu/mips.rb +14 -0
  72. data/{lib/metasm → metasm/cpu}/mips/compile_c.rb +1 -1
  73. data/metasm/cpu/mips/debug.rb +42 -0
  74. data/{lib/metasm → metasm/cpu}/mips/decode.rb +46 -16
  75. data/{lib/metasm → metasm/cpu}/mips/encode.rb +4 -3
  76. data/{lib/metasm → metasm/cpu}/mips/main.rb +11 -4
  77. data/{lib/metasm → metasm/cpu}/mips/opcodes.rb +86 -17
  78. data/{lib/metasm → metasm/cpu}/mips/parse.rb +1 -1
  79. data/{lib/metasm → metasm/cpu}/mips/render.rb +1 -1
  80. data/{lib/metasm/dalvik.rb → metasm/cpu/msp430.rb} +1 -1
  81. data/metasm/cpu/msp430/decode.rb +247 -0
  82. data/metasm/cpu/msp430/main.rb +62 -0
  83. data/metasm/cpu/msp430/opcodes.rb +101 -0
  84. data/{lib/metasm → metasm/cpu}/pic16c/decode.rb +6 -7
  85. data/{lib/metasm → metasm/cpu}/pic16c/main.rb +0 -0
  86. data/{lib/metasm → metasm/cpu}/pic16c/opcodes.rb +1 -1
  87. data/{lib/metasm/mips.rb → metasm/cpu/ppc.rb} +4 -4
  88. data/{lib/metasm → metasm/cpu}/ppc/decode.rb +18 -12
  89. data/{lib/metasm → metasm/cpu}/ppc/decompile.rb +3 -3
  90. data/{lib/metasm → metasm/cpu}/ppc/encode.rb +2 -2
  91. data/{lib/metasm → metasm/cpu}/ppc/main.rb +17 -12
  92. data/{lib/metasm → metasm/cpu}/ppc/opcodes.rb +11 -5
  93. data/metasm/cpu/ppc/parse.rb +55 -0
  94. data/metasm/cpu/python.rb +8 -0
  95. data/metasm/cpu/python/decode.rb +136 -0
  96. data/metasm/cpu/python/main.rb +36 -0
  97. data/metasm/cpu/python/opcodes.rb +180 -0
  98. data/{lib/metasm → metasm/cpu}/sh4.rb +1 -1
  99. data/{lib/metasm → metasm/cpu}/sh4/decode.rb +48 -17
  100. data/{lib/metasm → metasm/cpu}/sh4/main.rb +13 -4
  101. data/{lib/metasm → metasm/cpu}/sh4/opcodes.rb +7 -8
  102. data/metasm/cpu/x86_64.rb +15 -0
  103. data/{lib/metasm → metasm/cpu}/x86_64/compile_c.rb +28 -17
  104. data/{lib/metasm → metasm/cpu}/x86_64/debug.rb +4 -4
  105. data/{lib/metasm → metasm/cpu}/x86_64/decode.rb +57 -15
  106. data/{lib/metasm → metasm/cpu}/x86_64/encode.rb +55 -26
  107. data/{lib/metasm → metasm/cpu}/x86_64/main.rb +14 -6
  108. data/metasm/cpu/x86_64/opcodes.rb +136 -0
  109. data/{lib/metasm → metasm/cpu}/x86_64/parse.rb +10 -2
  110. data/metasm/cpu/x86_64/render.rb +35 -0
  111. data/metasm/cpu/z80.rb +9 -0
  112. data/metasm/cpu/z80/decode.rb +313 -0
  113. data/metasm/cpu/z80/main.rb +67 -0
  114. data/metasm/cpu/z80/opcodes.rb +224 -0
  115. data/metasm/cpu/z80/render.rb +59 -0
  116. data/{lib/metasm/os/main.rb → metasm/debug.rb} +160 -401
  117. data/{lib/metasm → metasm}/decode.rb +35 -4
  118. data/{lib/metasm → metasm}/decompile.rb +15 -16
  119. data/{lib/metasm → metasm}/disassemble.rb +201 -45
  120. data/{lib/metasm → metasm}/disassemble_api.rb +651 -87
  121. data/{lib/metasm → metasm}/dynldr.rb +220 -133
  122. data/{lib/metasm → metasm}/encode.rb +10 -1
  123. data/{lib/metasm → metasm}/exe_format/a_out.rb +9 -6
  124. data/{lib/metasm → metasm}/exe_format/autoexe.rb +1 -0
  125. data/{lib/metasm → metasm}/exe_format/bflt.rb +57 -27
  126. data/{lib/metasm → metasm}/exe_format/coff.rb +11 -3
  127. data/{lib/metasm → metasm}/exe_format/coff_decode.rb +53 -20
  128. data/{lib/metasm → metasm}/exe_format/coff_encode.rb +11 -13
  129. data/{lib/metasm → metasm}/exe_format/dex.rb +13 -5
  130. data/{lib/metasm → metasm}/exe_format/dol.rb +1 -0
  131. data/{lib/metasm → metasm}/exe_format/elf.rb +93 -57
  132. data/{lib/metasm → metasm}/exe_format/elf_decode.rb +143 -34
  133. data/{lib/metasm → metasm}/exe_format/elf_encode.rb +122 -31
  134. data/metasm/exe_format/gb.rb +65 -0
  135. data/metasm/exe_format/javaclass.rb +424 -0
  136. data/{lib/metasm → metasm}/exe_format/macho.rb +204 -16
  137. data/{lib/metasm → metasm}/exe_format/main.rb +26 -3
  138. data/{lib/metasm → metasm}/exe_format/mz.rb +1 -0
  139. data/{lib/metasm → metasm}/exe_format/nds.rb +7 -4
  140. data/{lib/metasm → metasm}/exe_format/pe.rb +71 -8
  141. data/metasm/exe_format/pyc.rb +167 -0
  142. data/{lib/metasm → metasm}/exe_format/serialstruct.rb +67 -14
  143. data/{lib/metasm → metasm}/exe_format/shellcode.rb +7 -3
  144. data/metasm/exe_format/shellcode_rwx.rb +114 -0
  145. data/metasm/exe_format/swf.rb +205 -0
  146. data/{lib/metasm → metasm}/exe_format/xcoff.rb +7 -7
  147. data/metasm/exe_format/zip.rb +335 -0
  148. data/metasm/gui.rb +13 -0
  149. data/{lib/metasm → metasm}/gui/cstruct.rb +35 -41
  150. data/{lib/metasm → metasm}/gui/dasm_coverage.rb +11 -11
  151. data/{lib/metasm → metasm}/gui/dasm_decomp.rb +7 -20
  152. data/{lib/metasm → metasm}/gui/dasm_funcgraph.rb +0 -0
  153. data/metasm/gui/dasm_graph.rb +1695 -0
  154. data/{lib/metasm → metasm}/gui/dasm_hex.rb +12 -8
  155. data/{lib/metasm → metasm}/gui/dasm_listing.rb +43 -28
  156. data/{lib/metasm → metasm}/gui/dasm_main.rb +310 -53
  157. data/{lib/metasm → metasm}/gui/dasm_opcodes.rb +5 -19
  158. data/{lib/metasm → metasm}/gui/debug.rb +93 -27
  159. data/{lib/metasm → metasm}/gui/gtk.rb +162 -40
  160. data/{lib/metasm → metasm}/gui/qt.rb +12 -2
  161. data/{lib/metasm → metasm}/gui/win32.rb +179 -42
  162. data/{lib/metasm → metasm}/gui/x11.rb +59 -59
  163. data/{lib/metasm → metasm}/main.rb +389 -264
  164. data/{lib/metasm/os/remote.rb → metasm/os/gdbremote.rb} +146 -54
  165. data/{lib/metasm → metasm}/os/gnu_exports.rb +1 -1
  166. data/{lib/metasm → metasm}/os/linux.rb +628 -151
  167. data/metasm/os/main.rb +330 -0
  168. data/{lib/metasm → metasm}/os/windows.rb +132 -42
  169. data/{lib/metasm → metasm}/os/windows_exports.rb +141 -0
  170. data/{lib/metasm → metasm}/parse.rb +26 -24
  171. data/{lib/metasm → metasm}/parse_c.rb +221 -116
  172. data/{lib/metasm → metasm}/preprocessor.rb +55 -40
  173. data/{lib/metasm → metasm}/render.rb +14 -38
  174. data/misc/hexdump.rb +2 -1
  175. data/misc/lint.rb +58 -0
  176. data/misc/txt2html.rb +9 -7
  177. data/samples/bindiff.rb +3 -4
  178. data/samples/dasm-plugins/bindiff.rb +15 -0
  179. data/samples/dasm-plugins/bookmark.rb +133 -0
  180. data/samples/dasm-plugins/c_constants.rb +57 -0
  181. data/samples/dasm-plugins/colortheme_solarized.rb +125 -0
  182. data/samples/dasm-plugins/cppobj_funcall.rb +60 -0
  183. data/samples/dasm-plugins/dasm_all.rb +70 -0
  184. data/samples/dasm-plugins/demangle_cpp.rb +31 -0
  185. data/samples/dasm-plugins/deobfuscate.rb +251 -0
  186. data/samples/dasm-plugins/dump_text.rb +35 -0
  187. data/samples/dasm-plugins/export_graph_svg.rb +86 -0
  188. data/samples/dasm-plugins/findgadget.rb +75 -0
  189. data/samples/dasm-plugins/hl_opcode.rb +32 -0
  190. data/samples/dasm-plugins/hotfix_gtk_dbg.rb +19 -0
  191. data/samples/dasm-plugins/imm2off.rb +34 -0
  192. data/samples/dasm-plugins/match_libsigs.rb +93 -0
  193. data/samples/dasm-plugins/patch_file.rb +95 -0
  194. data/samples/dasm-plugins/scanfuncstart.rb +36 -0
  195. data/samples/dasm-plugins/scanxrefs.rb +26 -0
  196. data/samples/dasm-plugins/selfmodify.rb +197 -0
  197. data/samples/dasm-plugins/stringsxrefs.rb +28 -0
  198. data/samples/dasmnavig.rb +1 -1
  199. data/samples/dbg-apihook.rb +24 -9
  200. data/samples/dbg-plugins/heapscan.rb +283 -0
  201. data/samples/dbg-plugins/heapscan/compiled_heapscan_lin.c +155 -0
  202. data/samples/dbg-plugins/heapscan/compiled_heapscan_win.c +128 -0
  203. data/samples/dbg-plugins/heapscan/graphheap.rb +616 -0
  204. data/samples/dbg-plugins/heapscan/heapscan.rb +709 -0
  205. data/samples/dbg-plugins/heapscan/winheap.h +174 -0
  206. data/samples/dbg-plugins/heapscan/winheap7.h +307 -0
  207. data/samples/dbg-plugins/trace_func.rb +214 -0
  208. data/samples/disassemble-gui.rb +35 -5
  209. data/samples/disassemble.rb +31 -6
  210. data/samples/dump_upx.rb +24 -12
  211. data/samples/dynamic_ruby.rb +12 -3
  212. data/samples/exeencode.rb +6 -5
  213. data/samples/factorize-headers-peimports.rb +1 -1
  214. data/samples/lindebug.rb +175 -381
  215. data/samples/metasm-shell.rb +1 -2
  216. data/samples/peldr.rb +2 -2
  217. data/tests/all.rb +1 -1
  218. data/tests/arc.rb +26 -0
  219. data/tests/dynldr.rb +22 -4
  220. data/tests/expression.rb +55 -0
  221. data/tests/graph_layout.rb +285 -0
  222. data/tests/ia32.rb +79 -26
  223. data/tests/mips.rb +9 -2
  224. data/tests/x86_64.rb +66 -18
  225. metadata +330 -218
  226. data/lib/metasm/arm/opcodes.rb +0 -177
  227. data/lib/metasm/gui.rb +0 -23
  228. data/lib/metasm/gui/dasm_graph.rb +0 -1354
  229. data/lib/metasm/ia32.rb +0 -14
  230. data/lib/metasm/ia32/opcodes.rb +0 -873
  231. data/lib/metasm/ppc/parse.rb +0 -52
  232. data/lib/metasm/x86_64.rb +0 -12
  233. data/lib/metasm/x86_64/opcodes.rb +0 -118
  234. data/samples/gdbclient.rb +0 -583
  235. data/samples/rubstop.rb +0 -399
@@ -99,6 +99,28 @@ class InstructionBlock
99
99
  yield to if type == :indirect or dasm.function[to] or not dasm.decoded[to]
100
100
  }
101
101
  end
102
+
103
+ # returns the array used in each_from_samefunc
104
+ def from_samefunc(dasm)
105
+ ary = []
106
+ each_from_samefunc(dasm) { |a| ary << a }
107
+ ary
108
+ end
109
+ def from_otherfunc(dasm)
110
+ ary = []
111
+ each_from_otherfunc(dasm) { |a| ary << a }
112
+ ary
113
+ end
114
+ def to_samefunc(dasm)
115
+ ary = []
116
+ each_to_samefunc(dasm) { |a| ary << a }
117
+ ary
118
+ end
119
+ def to_otherfunc(dasm)
120
+ ary = []
121
+ each_to_otherfunc(dasm) { |a| ary << a }
122
+ ary
123
+ end
102
124
  end
103
125
 
104
126
  class DecodedInstruction
@@ -111,44 +133,6 @@ end
111
133
  class CPU
112
134
  # compat alias, for scripts using older version of metasm
113
135
  def get_backtrace_binding(di) backtrace_binding(di) end
114
-
115
- # return something like backtrace_binding in the forward direction
116
- # set pc_reg to some reg name (eg :pc) to include effects on the instruction pointer
117
- def get_fwdemu_binding(di, pc_reg=nil)
118
- fdi = di.backtrace_binding ||= get_backtrace_binding(di)
119
- # find self-updated regs & revert them in simultaneous affectations
120
- # XXX handles only a <- a+i for now, this covers all useful cases (except imul eax, eax, 42 jz foobar)
121
- fdi.keys.grep(::Symbol).each { |s|
122
- val = Expression[fdi[s]]
123
- next if val.lexpr != s or (val.op != :+ and val.op != :-) #or not val.rexpr.kind_of? ::Integer
124
- fwd = { s => val }
125
- inv = { s => val.dup }
126
- inv[s].op = ((inv[s].op == :+) ? :- : :+)
127
- nxt = {}
128
- fdi.each { |k, v|
129
- if k == s
130
- nxt[k] = v
131
- else
132
- k = k.bind(fwd).reduce_rec if k.kind_of? Indirection
133
- nxt[k] = Expression[Expression[v].bind(inv).reduce_rec]
134
- end
135
- }
136
- fdi = nxt
137
- }
138
- if pc_reg
139
- if di.opcode.props[:setip]
140
- xr = get_xrefs_x(nil, di)
141
- if xr and xr.length == 1
142
- fdi[pc_reg] = xr[0]
143
- else
144
- fdi[:incomplete_binding] = Expression[1]
145
- end
146
- else
147
- fdi[pc_reg] = Expression[pc_reg, :+, di.bin_length]
148
- end
149
- end
150
- fdi
151
- end
152
136
  end
153
137
 
154
138
  class Disassembler
@@ -156,11 +140,16 @@ class Disassembler
156
140
  def self.backtrace_maxblocks ; @@backtrace_maxblocks ; end
157
141
  def self.backtrace_maxblocks=(b) ; @@backtrace_maxblocks = b ; end
158
142
 
159
- # returns the dasm section's edata containing addr
160
- # its #ptr points to addr
161
- # returns the 1st element of #get_section_at
162
- def get_edata_at(addr)
163
- if s = get_section_at(addr)
143
+ # adds a commentary at the given address
144
+ # comments are found in the array @comment: {addr => [list of strings]}
145
+ def add_comment(addr, cmt)
146
+ @comment[addr] ||= []
147
+ @comment[addr] |= [cmt]
148
+ end
149
+
150
+ # returns the 1st element of #get_section_at (ie the edata at a given address) or nil
151
+ def get_edata_at(*a)
152
+ if s = get_section_at(*a)
164
153
  s[0]
165
154
  end
166
155
  end
@@ -209,12 +198,12 @@ class Disassembler
209
198
 
210
199
  # yields every InstructionBlock
211
200
  # returns the list of IBlocks
212
- def each_instructionblock
201
+ def each_instructionblock(&b)
213
202
  ret = []
214
203
  @decoded.each { |addr, di|
215
204
  next if not di.kind_of? DecodedInstruction or not di.block_head?
216
205
  ret << di.block
217
- yield di.block if block_given?
206
+ b.call(di.block) if b
218
207
  }
219
208
  ret
220
209
  end
@@ -293,18 +282,19 @@ class Disassembler
293
282
 
294
283
  # returns the label associated to an addr, or nil if none exist
295
284
  def get_label_at(addr)
296
- e, b = get_section_at(addr, false)
285
+ e = get_edata_at(addr, false)
297
286
  e.inv_export[e.ptr] if e
298
287
  end
299
288
 
300
289
  # sets the label for the specified address
301
290
  # returns nil if the address is not mapped
302
291
  # memcheck is passed to get_section_at to validate that the address is mapped
303
- def set_label_at(addr, name, memcheck=true)
292
+ # keep existing label if 'overwrite' is false
293
+ def set_label_at(addr, name, memcheck=true, overwrite=true)
304
294
  addr = Expression[addr].reduce
305
295
  e, b = get_section_at(addr, memcheck)
306
296
  if not e
307
- elsif not l = e.inv_export[e.ptr]
297
+ elsif not l = e.inv_export[e.ptr] or (!overwrite and l != name)
308
298
  l = @program.new_label(name)
309
299
  e.add_export l, e.ptr
310
300
  @label_alias_cache = nil
@@ -317,7 +307,7 @@ class Disassembler
317
307
 
318
308
  # remove a label at address addr
319
309
  def del_label_at(addr, name=get_label_at(addr))
320
- ed, b = get_section_at(addr)
310
+ ed = get_edata_at(addr)
321
311
  if ed and ed.inv_export[ed.ptr]
322
312
  ed.del_export name, ed.ptr
323
313
  @label_alias_cache = nil
@@ -325,6 +315,7 @@ class Disassembler
325
315
  each_xref(addr) { |xr|
326
316
  next if not xr.origin or not o = @decoded[xr.origin] or not o.kind_of? Renderable
327
317
  o.each_expr { |e|
318
+ next unless e.kind_of?(Expression)
328
319
  e.lexpr = addr if e.lexpr == name
329
320
  e.rexpr = addr if e.rexpr == name
330
321
  }
@@ -337,12 +328,14 @@ class Disassembler
337
328
  # returns the new label
338
329
  # the new label must be program-uniq (see @program.new_label)
339
330
  def rename_label(old, new)
331
+ return new if old == new
332
+ raise "label #{new.inspect} exists" if @prog_binding[new]
340
333
  each_xref(normalize(old)) { |x|
341
334
  next if not di = @decoded[x.origin]
342
335
  @cpu.replace_instr_arg_immediate(di.instruction, old, new)
343
336
  di.comment.to_a.each { |c| c.gsub!(old, new) }
344
337
  }
345
- e, l = get_section_at(old, false)
338
+ e = get_edata_at(old, false)
346
339
  if e
347
340
  e.add_export new, e.export.delete(old), true
348
341
  end
@@ -499,12 +492,12 @@ class Disassembler
499
492
  # if from..to spans multiple blocks
500
493
  # to.block is splitted after to
501
494
  # all path from from are replaced by a single link to after 'to', be careful !
502
- # (eg a->b->... & a->c ; from in a, to in c => a->b is lost)
495
+ # (eg a->b->... & a->c ; from in a, to in c => a->b is lost)
503
496
  # all instructions are stuffed in the first block
504
497
  # paths are only walked using from/to_normal
505
498
  # 'by' may be empty
506
499
  # returns the block containing the new instrs (nil if empty)
507
- def replace_instrs(from, to, by)
500
+ def replace_instrs(from, to, by, patch_by=false)
508
501
  raise 'bad from' if not fdi = di_at(from) or not fdi.block.list.index(fdi)
509
502
  raise 'bad to' if not tdi = di_at(to) or not tdi.block.list.index(tdi)
510
503
 
@@ -520,14 +513,28 @@ class Disassembler
520
513
  wantlen -= by.grep(DecodedInstruction).inject(0) { |len, di| len + di.bin_length }
521
514
  ldi = by.last
522
515
  ldi = DecodedInstruction.new(ldi) if ldi.kind_of? Instruction
523
- wantlen = by.grep(Instruction).length if wantlen < 0 or (ldi and ldi.opcode.props[:setip])
524
- by.map! { |di|
525
- if di.kind_of? Instruction
526
- di = DecodedInstruction.new(di)
527
- wantlen -= di.bin_length = wantlen / by.grep(Instruction).length
528
- end
529
- di
530
- }
516
+ nb_i = by.grep(Instruction).length
517
+ wantlen = nb_i if wantlen < 0 or (ldi and ldi.opcode.props[:setip])
518
+ if patch_by
519
+ by.map! { |di|
520
+ if di.kind_of? Instruction
521
+ di = DecodedInstruction.new(di)
522
+ wantlen -= di.bin_length = wantlen / by.grep(Instruction).length
523
+ nb_i -= 1
524
+ end
525
+ di
526
+ }
527
+ else
528
+ by = by.map { |di|
529
+ if di.kind_of? Instruction
530
+ di = DecodedInstruction.new(di)
531
+ wantlen -= (di.bin_length = wantlen / nb_i)
532
+ nb_i -= 1
533
+ end
534
+ di
535
+ }
536
+ end
537
+
531
538
 
532
539
  #puts " ** patch next_addr to #{Expression[tb.list.last.next_addr]}" if not by.empty? and by.last.opcode.props[:saveip]
533
540
  by.last.next_addr = tb.list.last.next_addr if not by.empty? and by.last.opcode.props[:saveip]
@@ -649,8 +656,8 @@ class Disassembler
649
656
  if b1 and not b1.kind_of? InstructionBlock
650
657
  return if not b1 = block_at(b1)
651
658
  end
652
- if b2 and not b2.kind_of? InstructionBlock
653
- return if not b2 = block_at(b2)
659
+ if b2 and not b2.kind_of? InstructionBlock
660
+ return if not b2 = block_at(b2)
654
661
  end
655
662
  if b1 and b2 and (allow_nonadjacent or b1.list.last.next_addr == b2.address) and
656
663
  b1.to_normal.to_a == [b2.address] and b2.from_normal.to_a.length == 1 and # that handles delay_slot
@@ -720,17 +727,23 @@ class Disassembler
720
727
  end
721
728
 
722
729
  # returns a demangled C++ name
723
- # from wgcc-2.2.2/undecorate.cpp
724
- # TODO
725
730
  def demangle_cppname(name)
726
- ret = name
727
- if name[0] == ??
731
+ case name[0]
732
+ when ?? # MSVC
728
733
  name = name[1..-1]
729
- if name[0] == ??
730
- name = name[1..-1]
731
- op = name[0, 1]
732
- op = name[0, 2] if op == '_'
733
- if op = {
734
+ demangle_msvc(name[1..-1]) if name[0] == ??
735
+ when ?_
736
+ name = name.sub(/_GLOBAL__[ID]_/, '')
737
+ demangle_gcc(name[2..-1][/\S*/]) if name[0, 2] == '_Z'
738
+ end
739
+ end
740
+
741
+ # from wgcc-2.2.2/undecorate.cpp
742
+ # TODO
743
+ def demangle_msvc(name)
744
+ op = name[0, 1]
745
+ op = name[0, 2] if op == '_'
746
+ if op = {
734
747
  '2' => "new", '3' => "delete", '4' => "=", '5' => ">>", '6' => "<<", '7' => "!", '8' => "==", '9' => "!=",
735
748
  'A' => "[]", 'C' => "->", 'D' => "*", 'E' => "++", 'F' => "--", 'G' => "-", 'H' => "+", 'I' => "&",
736
749
  'J' => "->*", 'K' => "/", 'L' => "%", 'M' => "<", 'N' => "<=", 'O' => ">", 'P' => ">=", 'Q' => ",",
@@ -743,11 +756,157 @@ class Disassembler
743
756
  '_M' => "`eh vector destructor iterator'", '_N' => "`eh vector vbase constructor iterator'", '_O' => "`copy constructor closure'",
744
757
  '_S' => "`local vftable'", '_T' => "`local vftable constructor closure'", '_U' => "new[]", '_V' => "delete[]",
745
758
  '_X' => "`placement delete closure'", '_Y' => "`placement delete[] closure'"}[op]
746
- ret = op[0] == ?` ? op[1..-2] : "op_#{op}"
759
+ op[0] == ?` ? op[1..-2] : "op_#{op}"
760
+ end
761
+ end
762
+
763
+ # from http://www.codesourcery.com/public/cxx-abi/abi.html
764
+ def demangle_gcc(name)
765
+ subs = []
766
+ ret = ''
767
+ decode_tok = lambda {
768
+ name ||= ''
769
+ case name[0]
770
+ when nil
771
+ ret = nil
772
+ when ?N
773
+ name = name[1..-1]
774
+ decode_tok[]
775
+ until name[0] == ?E
776
+ break if not ret
777
+ ret << '::'
778
+ decode_tok[]
747
779
  end
780
+ name = name[1..-1]
781
+ when ?I
782
+ name = name[1..-1]
783
+ ret = ret[0..-3] if ret[-2, 2] == '::'
784
+ ret << '<'
785
+ decode_tok[]
786
+ until name[0] == ?E
787
+ break if not ret
788
+ ret << ', '
789
+ decode_tok[]
790
+ end
791
+ ret << ' ' if ret and ret[-1] == ?>
792
+ ret << '>' if ret
793
+ name = name[1..-1]
794
+ when ?T
795
+ case name[1]
796
+ when ?T; ret << 'vtti('
797
+ when ?V; ret << 'vtable('
798
+ when ?I; ret << 'typeinfo('
799
+ when ?S; ret << 'typename('
800
+ else ret = nil
801
+ end
802
+ name = name[2..-1].to_s
803
+ decode_tok[] if ret
804
+ ret << ')' if ret
805
+ name = name[1..-1] if name[0] == ?E
806
+ when ?C
807
+ name = name[2..-1]
808
+ base = ret[/([^:]*)(<.*|::)?$/, 1]
809
+ ret << base
810
+ when ?D
811
+ name = name[2..-1]
812
+ base = ret[/([^:]*)(<.*|::)?$/, 1]
813
+ ret << '~' << base
814
+ when ?0..?9
815
+ nr = name[/^[0-9]+/]
816
+ name = name[nr.length..-1].to_s
817
+ ret << name[0, nr.to_i]
818
+ name = name[nr.to_i..-1]
819
+ subs << ret[/[\w:]*$/]
820
+ when ?S
821
+ name = name[1..-1]
822
+ case name[0]
823
+ when ?_, ?0..?9, ?A..?Z
824
+ case name[0]
825
+ when ?_; idx = 0 ; name = name[1..-1]
826
+ when ?0..?9; idx = name[0, 1].unpack('C')[0] - 0x30 + 1 ; name = name[2..-1]
827
+ when ?A..?Z; idx = name[0, 1].unpack('C')[0] - 0x41 + 11 ; name = name[2..-1]
828
+ end
829
+ if not subs[idx]
830
+ ret = nil
831
+ else
832
+ ret << subs[idx]
833
+ end
834
+ when ?t
835
+ ret << 'std::'
836
+ name = name[1..-1]
837
+ decode_tok[]
838
+ else
839
+ std = { ?a => 'std::allocator',
840
+ ?b => 'std::basic_string',
841
+ ?s => 'std::string', # 'std::basic_string < char, std::char_traits<char>, std::allocator<char> >',
842
+ ?i => 'std::istream', # 'std::basic_istream<char, std::char_traits<char> >',
843
+ ?o => 'std::ostream', # 'std::basic_ostream<char, std::char_traits<char> >',
844
+ ?d => 'std::iostream', # 'std::basic_iostream<char, std::char_traits<char> >'
845
+ }[name[0]]
846
+ if not std
847
+ ret = nil
848
+ else
849
+ ret << std
850
+ end
851
+ name = name[1..-1]
852
+ end
853
+ when ?P, ?R, ?r, ?V, ?K
854
+ attr = { ?P => '*', ?R => '&', ?r => ' restrict', ?V => ' volatile', ?K => ' const' }[name[0]]
855
+ name = name[1..-1]
856
+ rl = ret.length
857
+ decode_tok[]
858
+ if ret
859
+ ret << attr
860
+ subs << ret[rl..-1]
861
+ end
862
+ else
863
+ if ret =~ /[(<]/ and ty = {
864
+ ?v => 'void', ?w => 'wchar_t', ?b => 'bool', ?c => 'char', ?a => 'signed char',
865
+ ?h => 'unsigned char', ?s => 'short', ?t => 'unsigned short', ?i => 'int',
866
+ ?j => 'unsigned int', ?l => 'long', ?m => 'unsigned long', ?x => '__int64',
867
+ ?y => 'unsigned __int64', ?n => '__int128', ?o => 'unsigned __int128', ?f => 'float',
868
+ ?d => 'double', ?e => 'long double', ?g => '__float128', ?z => '...'
869
+ }[name[0]]
870
+ name = name[1..-1]
871
+ ret << ty
872
+ else
873
+ fu = name[0, 2]
874
+ name = name[2..-1]
875
+ if op = {
876
+ 'nw' => ' new', 'na' => ' new[]', 'dl' => ' delete', 'da' => ' delete[]',
877
+ 'ps' => '+', 'ng' => '-', 'ad' => '&', 'de' => '*', 'co' => '~', 'pl' => '+',
878
+ 'mi' => '-', 'ml' => '*', 'dv' => '/', 'rm' => '%', 'an' => '&', 'or' => '|',
879
+ 'eo' => '^', 'aS' => '=', 'pL' => '+=', 'mI' => '-=', 'mL' => '*=', 'dV' => '/=',
880
+ 'rM' => '%=', 'aN' => '&=', 'oR' => '|=', 'eO' => '^=', 'ls' => '<<', 'rs' => '>>',
881
+ 'lS' => '<<=', 'rS' => '>>=', 'eq' => '==', 'ne' => '!=', 'lt' => '<', 'gt' => '>',
882
+ 'le' => '<=', 'ge' => '>=', 'nt' => '!', 'aa' => '&&', 'oo' => '||', 'pp' => '++',
883
+ 'mm' => '--', 'cm' => ',', 'pm' => '->*', 'pt' => '->', 'cl' => '()', 'ix' => '[]',
884
+ 'qu' => '?', 'st' => ' sizeof', 'sz' => ' sizeof', 'at' => ' alignof', 'az' => ' alignof'
885
+ }[fu]
886
+ ret << "operator#{op}"
887
+ elsif fu == 'cv'
888
+ ret << "cast<"
889
+ decode_tok[]
890
+ ret << ">" if ret
891
+ else
892
+ ret = nil
893
+ end
894
+ end
895
+ end
896
+ name ||= ''
897
+ }
898
+
899
+ decode_tok[]
900
+ subs.pop
901
+ if ret and name != ''
902
+ ret << '('
903
+ decode_tok[]
904
+ while ret and name != ''
905
+ ret << ', '
906
+ decode_tok[]
748
907
  end
908
+ ret << ')' if ret
749
909
  end
750
- # TODO
751
910
  ret
752
911
  end
753
912
 
@@ -755,7 +914,8 @@ class Disassembler
755
914
  # return/yields all the addresses matching
756
915
  # if yield returns nil/false, do not include the addr in the final result
757
916
  # sections are scanned MB by MB, so this should work (slowly) on 4GB sections (eg debugger VM)
758
- def pattern_scan(pat, chunksz=nil, margin=nil)
917
+ # with addr_start/length, symbol-based section are skipped
918
+ def pattern_scan(pat, addr_start=nil, length=nil, chunksz=nil, margin=nil, &b)
759
919
  chunksz ||= 4*1024*1024 # scan 4MB at a time
760
920
  margin ||= 65536 # add this much bytes at each chunk to find /pat/ over chunk boundaries
761
921
 
@@ -763,9 +923,27 @@ class Disassembler
763
923
 
764
924
  found = []
765
925
  @sections.each { |sec_addr, e|
926
+ if addr_start
927
+ length ||= 0x1000_0000
928
+ begin
929
+ if sec_addr < addr_start
930
+ next if sec_addr+e.length <= addr_start
931
+ e = e[addr_start-sec_addr, e.length]
932
+ sec_addr = addr_start
933
+ end
934
+ if sec_addr+e.length > addr_start+length
935
+ next if sec_addr > addr_start+length
936
+ e = e[0, sec_addr+e.length-(addr_start+length)]
937
+ end
938
+ rescue
939
+ puts $!, $!.message, $!.backtrace if $DEBUG
940
+ # catch arithmetic error with symbol-based section
941
+ next
942
+ end
943
+ end
766
944
  e.pattern_scan(pat, chunksz, margin) { |eo|
767
945
  match_addr = sec_addr + eo
768
- found << match_addr if not block_given? or yield(match_addr)
946
+ found << match_addr if not b or b.call(match_addr)
769
947
  false
770
948
  }
771
949
  }
@@ -773,14 +951,14 @@ class Disassembler
773
951
  end
774
952
 
775
953
  # returns/yields [addr, string] found using pattern_scan /[\x20-\x7e]/
776
- def strings_scan(minlen=6)
954
+ def strings_scan(minlen=6, &b)
777
955
  ret = []
778
956
  nexto = 0
779
957
  pattern_scan(/[\x20-\x7e]{#{minlen},}/m, nil, 1024) { |o|
780
958
  if o - nexto > 0
781
959
  next unless e = get_edata_at(o)
782
960
  str = e.data[e.ptr, 1024][/[\x20-\x7e]{#{minlen},}/m]
783
- ret << [o, str] if not block_given? or yield(o, str)
961
+ ret << [o, str] if not b or b.call(o, str)
784
962
  nexto = o + str.length
785
963
  end
786
964
  }
@@ -805,18 +983,24 @@ class Disassembler
805
983
  def load_map(str, off=0)
806
984
  str = File.read(str) rescue nil if not str.index("\n")
807
985
  sks = @sections.keys.sort
986
+ seen = {}
808
987
  str.each_line { |l|
809
988
  case l.strip
810
989
  when /^([0-9A-F]+)\s+(\w+)\s+(\w+)/i # kernel.map style
811
- set_label_at($1.to_i(16)+off, $3)
990
+ addr = $1.to_i(16)+off
991
+ set_label_at(addr, $3, false, !seen[addr])
992
+ seen[addr] = true
812
993
  when /^([0-9A-F]+):([0-9A-F]+)\s+([a-z_]\w+)/i # IDA style
813
994
  # we do not have section load order, let's just hope that the addresses are sorted (and sortable..)
814
995
  # could check the 1st part of the file, with section sizes, but it is not very convenient
815
996
  # the regexp is so that we skip the 1st part with section descriptions
816
997
  # in the file, section 1 is the 1st section ; we have an additionnal section (exe header) which fixes the 0-index
817
- set_label_at(sks[$1.to_i(16)] + $2.to_i(16) + off, $3)
998
+ # XXX this is PE-specific, TODO fix it for ELF (ida references sections, we reference segments...)
999
+ addr = sks[$1.to_i(16)] + $2.to_i(16) + off
1000
+ set_label_at(addr, $3, false, !seen[addr])
1001
+ seen[addr] = true
818
1002
  end
819
- }
1003
+ }
820
1004
  end
821
1005
 
822
1006
  # saves the dasm state in a file
@@ -830,13 +1014,14 @@ class Disassembler
830
1014
  def save_io(fd)
831
1015
  fd.puts 'Metasm.dasm'
832
1016
 
833
- if @program.filename
1017
+ if @program.filename and not @program.kind_of?(Shellcode)
834
1018
  t = @program.filename.to_s
835
1019
  fd.puts "binarypath #{t.length}", t
836
1020
  else
837
1021
  t = "#{@cpu.class.name.sub(/.*::/, '')} #{@cpu.size} #{@cpu.endianness}"
838
1022
  fd.puts "cpu #{t.length}", t
839
1023
  # XXX will be reloaded as a Shellcode with this CPU, but it may be a custom EXE
1024
+ # do not output binarypath, we'll be loaded as a Shellcode, 'section' will suffice
840
1025
  end
841
1026
 
842
1027
  @sections.each { |a, e|
@@ -942,6 +1127,7 @@ class Disassembler
942
1127
  reinitialize Shellcode.new(cpu)
943
1128
  @program.disassembler = self
944
1129
  @program.init_disassembler
1130
+ @sections.delete(0) # rm empty section at 0, other real 'section' follow
945
1131
  when 'section'
946
1132
  info = data[0, data.index("\n") || data.length]
947
1133
  data = data[info.length, data.length]
@@ -1030,7 +1216,7 @@ class Disassembler
1030
1216
  len = (len != '' ? len.to_i : nil)
1031
1217
  o = (o.to_s != '' ? Expression.parse(pp.feed!(o)).reduce : nil) # :default/:unknown ?
1032
1218
  add_xref(a, Xref.new(t, o, len))
1033
- rescue
1219
+ rescue
1034
1220
  puts "load: bad xref #{l.inspect} #$!" if $VERBOSE
1035
1221
  end
1036
1222
  }
@@ -1104,12 +1290,354 @@ class Disassembler
1104
1290
  delta
1105
1291
  end
1106
1292
 
1293
+ # dataflow method
1294
+ # walks a function, starting at addr
1295
+ # follows the usage of registers, computing the evolution from the value they had at start_addr
1296
+ # whenever an instruction references the register (or anything derived from it),
1297
+ # yield [di, used_register, reg_value, trace_state] where reg_value is the Expression holding the value of
1298
+ # the register wrt the initial value at start_addr, and trace_state the value of all registers (reg_value
1299
+ # not yet applied)
1300
+ # reg_value may be nil if used_register is not modified by the function (eg call [eax])
1301
+ # the yield return value is propagated, unless it is nil/false
1302
+ # init_state is a hash { :reg => initial value }
1303
+ def trace_function_register(start_addr, init_state)
1304
+ function_walk(start_addr, init_state) { |args|
1305
+ trace_state = args.last
1306
+ case args.first
1307
+ when :di
1308
+ di = args[2]
1309
+ update = {}
1310
+ get_fwdemu_binding(di).each { |r, v|
1311
+ if v.kind_of?(Expression) and v.externals.find { |e| trace_state[e] }
1312
+ # XXX may mix old (from trace) and current (from v) registers
1313
+ newv = v.bind(trace_state)
1314
+ update[r] = yield(di, r, newv, trace_state)
1315
+ elsif r.kind_of?(ExpressionType) and rr = r.externals.find { |e| trace_state[e] }
1316
+ # reg dereferenced in a write (eg mov [esp], 42)
1317
+ next if update.has_key?(rr) # already yielded
1318
+ if yield(di, rr, trace_state[rr], trace_state) == false
1319
+ update[rr] = false
1320
+ end
1321
+ elsif trace_state[r]
1322
+ # started on mov reg, foo
1323
+ next if di.address == start_addr
1324
+ update[r] = false
1325
+ end
1326
+ }
1327
+
1328
+ # directly walk the instruction argument list for registers not appearing in the binding
1329
+ @cpu.instr_args_memoryptr(di).each { |ind|
1330
+ b = @cpu.instr_args_memoryptr_getbase(ind)
1331
+ if b and b = b.symbolic and not update.has_key?(b)
1332
+ yield(di, b, nil, trace_state)
1333
+ end
1334
+ }
1335
+ @cpu.instr_args_regs(di).each { |r|
1336
+ r = r.symbolic
1337
+ if not update.has_key?(r)
1338
+ yield(di, r, nil, trace_state)
1339
+ end
1340
+ }
1341
+
1342
+ update.each { |r, v|
1343
+ trace_state = trace_state.dup
1344
+ if v
1345
+ # cannot follow non-registers, or we would have to emulate every single
1346
+ # instruction (try following [esp+4] across a __stdcall..)
1347
+ trace_state[r] = v if r.kind_of?(::Symbol)
1348
+ else
1349
+ trace_state.delete r
1350
+ end
1351
+ }
1352
+ when :subfunc
1353
+ faddr = args[1]
1354
+ f = @function[faddr]
1355
+ f = @function[f.backtrace_binding[:thunk]] if f and f.backtrace_binding[:thunk]
1356
+ if f
1357
+ binding = f.backtrace_binding
1358
+ if binding.empty?
1359
+ backtrace_update_function_binding(faddr)
1360
+ binding = f.backtrace_binding
1361
+ end
1362
+ # XXX fwdemu_binding ?
1363
+ binding.each { |r, v|
1364
+ if v.externals.find { |e| trace_state[e] }
1365
+ if r.kind_of?(::Symbol)
1366
+ trace_state = trace_state.dup
1367
+ trace_state[r] = Expression[v.bind(trace_state)].reduce
1368
+ end
1369
+ elsif trace_state[r]
1370
+ trace_state = trace_state.dup
1371
+ trace_state.delete r
1372
+ end
1373
+ }
1374
+ end
1375
+ when :merge
1376
+ # when merging paths, keep the smallest common state subset
1377
+ # XXX may have unexplored froms
1378
+ conflicts = args[2]
1379
+ trace_state = trace_state.dup
1380
+ conflicts.each { |addr, st|
1381
+ trace_state.delete_if { |k, v| st[k] != v }
1382
+ }
1383
+ end
1384
+ trace_state = false if trace_state.empty?
1385
+ trace_state
1386
+ }
1387
+ end
1388
+
1389
+ # define a register as a pointer to a structure
1390
+ # rename all [reg+off] as [reg+struct.member] in current function
1391
+ # also trace assignments of pointer members
1392
+ def trace_update_reg_structptr(addr, reg, structname, structoff=0)
1393
+ sname = soff = ctx = nil
1394
+ expr_to_sname = lambda { |expr|
1395
+ if not expr.kind_of?(Expression) or expr.op != :+
1396
+ sname = nil
1397
+ next
1398
+ end
1399
+
1400
+ sname = expr.lexpr || expr.rexpr
1401
+ soff = (expr.lexpr ? expr.rexpr : 0)
1402
+
1403
+ if soff.kind_of?(Expression)
1404
+ # ignore index in ptr array
1405
+ if soff.op == :* and soff.lexpr == @cpu.size/8
1406
+ soff = 0
1407
+ elsif soff.rexpr.kind_of?(Expression) and soff.rexpr.op == :* and soff.rexpr.lexpr == @cpu.size/8
1408
+ soff = soff.lexpr
1409
+ elsif soff.lexpr.kind_of?(Expression) and soff.lexpr.op == :* and soff.lexpr.lexpr == @cpu.size/8
1410
+ soff = soff.rexpr
1411
+ end
1412
+ elsif soff.kind_of?(::Symbol)
1413
+ # array with 1 byte elements / pre-scaled idx?
1414
+ if not ctx[soff]
1415
+ soff = 0
1416
+ end
1417
+ end
1418
+ }
1419
+
1420
+ lastdi = nil
1421
+ trace_function_register(addr, reg => Expression[structname, :+, structoff]) { |di, r, val, trace|
1422
+
1423
+ next if r.to_s =~ /flag/ # XXX maybe too ia32-specific?
1424
+
1425
+ ctx = trace
1426
+ @cpu.instr_args_memoryptr(di).each { |ind|
1427
+ # find the structure dereference in di
1428
+ b = @cpu.instr_args_memoryptr_getbase(ind)
1429
+ b = b.symbolic if b
1430
+ next unless trace[b]
1431
+ imm = @cpu.instr_args_memoryptr_getoffset(ind) || 0
1432
+
1433
+ # check expr has the form 'traced_struct_reg + off'
1434
+ expr_to_sname[trace[b] + imm] # Expr#+ calls Expr#reduce
1435
+ next unless sname.kind_of?(::String) and soff.kind_of?(::Integer)
1436
+ next if not st = c_parser.toplevel.struct[sname] or not st.kind_of?(C::Union)
1437
+
1438
+ # ignore lea esi, [esi+0]
1439
+ next if soff == 0 and not di.backtrace_binding.find { |k, v| v-k != 0 }
1440
+
1441
+ # TODO if trace[b] offset != 0, we had a lea reg, [struct+substruct_off], tweak str accordingly
1442
+
1443
+ # resolve struct + off into struct.membername
1444
+ str = st.name.dup
1445
+ mb = st.expand_member_offset(c_parser, soff, str)
1446
+ # patch di
1447
+ imm = imm.rexpr if imm.kind_of?(Expression) and not imm.lexpr and imm.rexpr.kind_of?(ExpressionString)
1448
+ imm = imm.expr if imm.kind_of?(ExpressionString)
1449
+ @cpu.instr_args_memoryptr_setoffset(ind, ExpressionString.new(imm, str, :structoff))
1450
+
1451
+ # check if the type is an enum/bitfield, patch instruction immediates
1452
+ trace_update_reg_structptr_arg_enum(di, ind, mb, str) if mb
1453
+ } if lastdi != di.address
1454
+ lastdi = di.address
1455
+
1456
+ next Expression[structname, :+, structoff] if di.address == addr and r == reg
1457
+
1458
+ # check if we need to trace 'r' further
1459
+ val = val.reduce_rec if val.kind_of?(Expression)
1460
+ val = Expression[val] if val.kind_of?(::String)
1461
+ case val
1462
+ when Expression
1463
+ # only trace trivial structptr+off expressions
1464
+ expr_to_sname[val]
1465
+ if sname.kind_of?(::String) and soff.kind_of?(::Integer)
1466
+ Expression[sname, :+, soff]
1467
+ end
1468
+
1469
+ when Indirection
1470
+ # di is mov reg, [ptr+struct.offset]
1471
+ # check if the target member is a pointer to a struct, if so, trace it
1472
+ expr_to_sname[val.pointer.reduce]
1473
+
1474
+ next unless sname.kind_of?(::String) and soff.kind_of?(::Integer)
1475
+
1476
+ if st = c_parser.toplevel.struct[sname] and st.kind_of?(C::Union)
1477
+ pt = st.expand_member_offset(c_parser, soff, '')
1478
+ pt = pt.untypedef if pt
1479
+ if pt.kind_of?(C::Pointer)
1480
+ tt = pt.type.untypedef
1481
+ stars = ''
1482
+ while tt.kind_of?(C::Pointer)
1483
+ stars << '*'
1484
+ tt = tt.type.untypedef
1485
+ end
1486
+ if tt.kind_of?(C::Union) and tt.name
1487
+ Expression[tt.name + stars]
1488
+ end
1489
+ end
1490
+
1491
+ elsif soff == 0 and sname[-1] == ?*
1492
+ # XXX pointer to pointer to struct
1493
+ # full C type support would be better, but harder to fit in an Expr
1494
+ Expression[sname[0...-1]]
1495
+ end
1496
+ # in other cases, stop trace
1497
+ end
1498
+ }
1499
+ end
1500
+
1501
+ # found a special member of a struct, check if we can apply
1502
+ # bitfield/enum name to other constants in the di
1503
+ def trace_update_reg_structptr_arg_enum(di, ind, mb, str)
1504
+ if ename = mb.has_attribute_var('enum') and enum = c_parser.toplevel.struct[ename] and enum.kind_of?(C::Enum)
1505
+ # handle enums: struct moo { int __attribute__((enum(bla))) fld; };
1506
+ doit = lambda { |_di|
1507
+ if num = _di.instruction.args.grep(Expression).first and num_i = num.reduce and num_i.kind_of?(::Integer)
1508
+ # handle enum values on tagged structs
1509
+ if enum.members and name = enum.members.index(num_i)
1510
+ num.lexpr = nil
1511
+ num.op = :+
1512
+ num.rexpr = ExpressionString.new(Expression[num_i], name, :enum)
1513
+ _di.add_comment "enum::#{ename}" if _di.address != di.address
1514
+ end
1515
+ end
1516
+ }
1517
+
1518
+ doit[di]
1519
+
1520
+ # mov eax, [ptr+struct.enumfield] => trace eax
1521
+ if reg = @cpu.instr_args_regs(di).find { |r| v = di.backtrace_binding[r.symbolic] and (v - ind.symbolic) == 0 }
1522
+ reg = reg.symbolic
1523
+ trace_function_register(di.address, reg => Expression[0]) { |_di, r, val, trace|
1524
+ next if r != reg and val != Expression[reg]
1525
+ doit[_di]
1526
+ val
1527
+ }
1528
+ end
1529
+
1530
+ elsif mb.untypedef.kind_of?(C::Struct)
1531
+ # handle bitfields
1532
+
1533
+ byte_off = 0
1534
+ if str =~ /\+(\d+)$/
1535
+ # test byte [bitfield+1], 0x1 => test dword [bitfield], 0x100
1536
+ # XXX little-endian only
1537
+ byte_off = $1.to_i
1538
+ str[/\+\d+$/] = ''
1539
+ end
1540
+ cmt = str.split('.')[-2, 2].join('.') if str.count('.') > 1
1541
+
1542
+ doit = lambda { |_di, add|
1543
+ if num = _di.instruction.args.grep(Expression).first and num_i = num.reduce and num_i.kind_of?(::Integer)
1544
+ # TODO handle ~num_i
1545
+ num_left = num_i << add
1546
+ s_or = []
1547
+ mb.untypedef.members.each { |mm|
1548
+ if bo = mb.bitoffsetof(c_parser, mm)
1549
+ boff, blen = bo
1550
+ if mm.name && blen == 1 && ((num_left >> boff) & 1) > 0
1551
+ s_or << mm.name
1552
+ num_left &= ~(1 << boff)
1553
+ end
1554
+ end
1555
+ }
1556
+ if s_or.first
1557
+ if num_left != 0
1558
+ s_or << ('0x%X' % num_left)
1559
+ end
1560
+ s = s_or.join('|')
1561
+ num.lexpr = nil
1562
+ num.op = :+
1563
+ num.rexpr = ExpressionString.new(Expression[num_i], s, :bitfield)
1564
+ _di.add_comment cmt if _di.address != di.address
1565
+ end
1566
+ end
1567
+ }
1568
+
1569
+ doit[di, byte_off*8]
1570
+
1571
+ if reg = @cpu.instr_args_regs(di).find { |r| v = di.backtrace_binding[r.symbolic] and (v - ind.symbolic) == 0 }
1572
+ reg = reg.symbolic
1573
+ trace_function_register(di.address, reg => Expression[0]) { |_di, r, val, trace|
1574
+ if r.kind_of?(Expression) and r.op == :&
1575
+ if r.lexpr == reg
1576
+ # test al, 42
1577
+ doit[_di, byte_off*8]
1578
+ elsif r.lexpr.kind_of?(Expression) and r.lexpr.op == :>> and r.lexpr.lexpr == reg
1579
+ # test ah, 42
1580
+ doit[_di, byte_off*8+r.lexpr.rexpr]
1581
+ end
1582
+ end
1583
+ next if r != reg and val != Expression[reg]
1584
+ doit[_di, byte_off*8]
1585
+ _di.address == di.address && r == reg ? Expression[0] : val
1586
+ }
1587
+ end
1588
+ end
1589
+ end
1590
+
1107
1591
  # change Expression display mode for current object o to display integers as char constants
1108
1592
  def toggle_expr_char(o)
1109
- return if not o.kind_of? Renderable
1593
+ return if not o.kind_of?(Renderable)
1594
+ tochars = lambda { |v|
1595
+ if v.kind_of?(::Integer)
1596
+ a = []
1597
+ vv = v.abs
1598
+ a << (vv & 0xff)
1599
+ vv >>= 8
1600
+ while vv > 0
1601
+ a << (vv & 0xff)
1602
+ vv >>= 8
1603
+ end
1604
+ if a.all? { |b| b < 0x7f }
1605
+ s = a.pack('C*').inspect.gsub("'") { '\\\'' }[1...-1]
1606
+ ExpressionString.new(v, (v > 0 ? "'#{s}'" : "-'#{s}'"), :char)
1607
+ end
1608
+ end
1609
+ }
1110
1610
  o.each_expr { |e|
1111
- e.render_info ||= {}
1112
- e.render_info[:char] = e.render_info[:char] ? nil : @cpu.endianness
1611
+ if e.kind_of?(Expression)
1612
+ if nr = tochars[e.rexpr]
1613
+ e.rexpr = nr
1614
+ elsif e.rexpr.kind_of?(ExpressionString) and e.rexpr.type == :char
1615
+ e.rexpr = e.rexpr.expr
1616
+ end
1617
+ if nl = tochars[e.lexpr]
1618
+ e.lexpr = nl
1619
+ elsif e.lexpr.kind_of?(ExpressionString) and e.lexpr.type == :char
1620
+ e.lexpr = e.lexpr.expr
1621
+ end
1622
+ end
1623
+ }
1624
+ end
1625
+
1626
+ def toggle_expr_dec(o)
1627
+ return if not o.kind_of?(Renderable)
1628
+ o.each_expr { |e|
1629
+ if e.kind_of?(Expression)
1630
+ if e.rexpr.kind_of?(::Integer)
1631
+ e.rexpr = ExpressionString.new(Expression[e.rexpr], e.rexpr.to_s, :decimal)
1632
+ elsif e.rexpr.kind_of?(ExpressionString) and e.rexpr.type == :decimal
1633
+ e.rexpr = e.rexpr.reduce
1634
+ end
1635
+ if e.lexpr.kind_of?(::Integer)
1636
+ e.lexpr = ExpressionString.new(Expression[e.lexpr], e.lexpr.to_s, :decimal)
1637
+ elsif e.lexpr.kind_of?(ExpressionString) and e.lexpr.type == :decimal
1638
+ e.lexpr = e.lexpr.reduce
1639
+ end
1640
+ end
1113
1641
  }
1114
1642
  end
1115
1643
 
@@ -1118,6 +1646,7 @@ class Disassembler
1118
1646
  def toggle_expr_offset(o)
1119
1647
  return if not o.kind_of? Renderable
1120
1648
  o.each_expr { |e|
1649
+ next unless e.kind_of?(Expression)
1121
1650
  if n = @prog_binding[e.lexpr]
1122
1651
  e.lexpr = n
1123
1652
  elsif e.lexpr.kind_of? ::Integer and n = get_label_at(e.lexpr)
@@ -1133,6 +1662,15 @@ class Disassembler
1133
1662
  }
1134
1663
  end
1135
1664
 
1665
+ # toggle all ExpressionStrings
1666
+ def toggle_expr_str(o)
1667
+ return if not o.kind_of?(Renderable)
1668
+ o.each_expr { |e|
1669
+ next unless e.kind_of?(ExpressionString)
1670
+ e.hide_str = !e.hide_str
1671
+ }
1672
+ end
1673
+
1136
1674
  # call this function on a function entrypoint if the function is in fact a __noreturn
1137
1675
  # will cut the to_subfuncret of callers
1138
1676
  def fix_noreturn(o)
@@ -1184,7 +1722,7 @@ class Disassembler
1184
1722
  # searched for in the Metasmdir/samples/dasm-plugins subdirectory if not found in cwd
1185
1723
  def load_plugin(plugin_filename)
1186
1724
  if not File.exist?(plugin_filename)
1187
- if File.exist?(plugin_filename+'.rb')
1725
+ if File.exist?(plugin_filename+'.rb')
1188
1726
  plugin_filename += '.rb'
1189
1727
  elsif defined? Metasmdir
1190
1728
  # try autocomplete
@@ -1225,7 +1763,7 @@ class Disassembler
1225
1763
  if bd2.kind_of? DecodedInstruction
1226
1764
  bd2 = bd2.backtrace_binding ||= cpu.get_backtrace_binding(bd2)
1227
1765
  end
1228
-
1766
+
1229
1767
  reduce = lambda { |e| Expression[Expression[e].reduce] }
1230
1768
 
1231
1769
  bd = {}
@@ -1276,5 +1814,31 @@ class Disassembler
1276
1814
 
1277
1815
  bd
1278
1816
  end
1817
+
1818
+ def gui_hilight_word_regexp(word)
1819
+ @cpu.gui_hilight_word_regexp(word)
1820
+ end
1821
+
1822
+ # return a C::AllocCStruct from c_parser
1823
+ # TODO handle program.class::Header.to_c_struct
1824
+ def decode_c_struct(structname, addr)
1825
+ if c_parser and edata = get_edata_at(addr)
1826
+ c_parser.decode_c_struct(structname, edata.data, edata.ptr)
1827
+ end
1828
+ end
1829
+
1830
+ def decode_c_ary(structname, addr, len)
1831
+ if c_parser and edata = get_edata_at(addr)
1832
+ c_parser.decode_c_ary(structname, len, edata.data, edata.ptr)
1833
+ end
1834
+ end
1835
+
1836
+ # find the function containing addr, and find & rename stack vars in it
1837
+ def name_local_vars(addr)
1838
+ if @cpu.respond_to?(:name_local_vars) and faddr = find_function_start(addr)
1839
+ @function[faddr] ||= DecodedFunction.new # XXX
1840
+ @cpu.name_local_vars(self, faddr)
1841
+ end
1842
+ end
1279
1843
  end
1280
1844
  end