metasm 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. data/BUGS +11 -0
  2. data/CREDITS +17 -0
  3. data/README +270 -0
  4. data/TODO +114 -0
  5. data/doc/code_organisation.txt +146 -0
  6. data/doc/const_missing.txt +16 -0
  7. data/doc/core_classes.txt +75 -0
  8. data/doc/feature_list.txt +53 -0
  9. data/doc/index.txt +59 -0
  10. data/doc/install_notes.txt +170 -0
  11. data/doc/style.css +3 -0
  12. data/doc/use_cases.txt +18 -0
  13. data/lib/metasm.rb +80 -0
  14. data/lib/metasm/arm.rb +12 -0
  15. data/lib/metasm/arm/debug.rb +39 -0
  16. data/lib/metasm/arm/decode.rb +167 -0
  17. data/lib/metasm/arm/encode.rb +77 -0
  18. data/lib/metasm/arm/main.rb +75 -0
  19. data/lib/metasm/arm/opcodes.rb +177 -0
  20. data/lib/metasm/arm/parse.rb +130 -0
  21. data/lib/metasm/arm/render.rb +55 -0
  22. data/lib/metasm/compile_c.rb +1457 -0
  23. data/lib/metasm/dalvik.rb +8 -0
  24. data/lib/metasm/dalvik/decode.rb +196 -0
  25. data/lib/metasm/dalvik/main.rb +60 -0
  26. data/lib/metasm/dalvik/opcodes.rb +366 -0
  27. data/lib/metasm/decode.rb +213 -0
  28. data/lib/metasm/decompile.rb +2659 -0
  29. data/lib/metasm/disassemble.rb +2068 -0
  30. data/lib/metasm/disassemble_api.rb +1280 -0
  31. data/lib/metasm/dynldr.rb +1329 -0
  32. data/lib/metasm/encode.rb +333 -0
  33. data/lib/metasm/exe_format/a_out.rb +194 -0
  34. data/lib/metasm/exe_format/autoexe.rb +82 -0
  35. data/lib/metasm/exe_format/bflt.rb +189 -0
  36. data/lib/metasm/exe_format/coff.rb +455 -0
  37. data/lib/metasm/exe_format/coff_decode.rb +901 -0
  38. data/lib/metasm/exe_format/coff_encode.rb +1078 -0
  39. data/lib/metasm/exe_format/dex.rb +457 -0
  40. data/lib/metasm/exe_format/dol.rb +145 -0
  41. data/lib/metasm/exe_format/elf.rb +923 -0
  42. data/lib/metasm/exe_format/elf_decode.rb +979 -0
  43. data/lib/metasm/exe_format/elf_encode.rb +1375 -0
  44. data/lib/metasm/exe_format/macho.rb +827 -0
  45. data/lib/metasm/exe_format/main.rb +228 -0
  46. data/lib/metasm/exe_format/mz.rb +164 -0
  47. data/lib/metasm/exe_format/nds.rb +172 -0
  48. data/lib/metasm/exe_format/pe.rb +437 -0
  49. data/lib/metasm/exe_format/serialstruct.rb +246 -0
  50. data/lib/metasm/exe_format/shellcode.rb +114 -0
  51. data/lib/metasm/exe_format/xcoff.rb +167 -0
  52. data/lib/metasm/gui.rb +23 -0
  53. data/lib/metasm/gui/cstruct.rb +373 -0
  54. data/lib/metasm/gui/dasm_coverage.rb +199 -0
  55. data/lib/metasm/gui/dasm_decomp.rb +369 -0
  56. data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
  57. data/lib/metasm/gui/dasm_graph.rb +1354 -0
  58. data/lib/metasm/gui/dasm_hex.rb +543 -0
  59. data/lib/metasm/gui/dasm_listing.rb +599 -0
  60. data/lib/metasm/gui/dasm_main.rb +906 -0
  61. data/lib/metasm/gui/dasm_opcodes.rb +291 -0
  62. data/lib/metasm/gui/debug.rb +1228 -0
  63. data/lib/metasm/gui/gtk.rb +884 -0
  64. data/lib/metasm/gui/qt.rb +495 -0
  65. data/lib/metasm/gui/win32.rb +3004 -0
  66. data/lib/metasm/gui/x11.rb +621 -0
  67. data/lib/metasm/ia32.rb +14 -0
  68. data/lib/metasm/ia32/compile_c.rb +1523 -0
  69. data/lib/metasm/ia32/debug.rb +193 -0
  70. data/lib/metasm/ia32/decode.rb +1167 -0
  71. data/lib/metasm/ia32/decompile.rb +564 -0
  72. data/lib/metasm/ia32/encode.rb +314 -0
  73. data/lib/metasm/ia32/main.rb +233 -0
  74. data/lib/metasm/ia32/opcodes.rb +872 -0
  75. data/lib/metasm/ia32/parse.rb +327 -0
  76. data/lib/metasm/ia32/render.rb +91 -0
  77. data/lib/metasm/main.rb +1193 -0
  78. data/lib/metasm/mips.rb +11 -0
  79. data/lib/metasm/mips/compile_c.rb +7 -0
  80. data/lib/metasm/mips/decode.rb +253 -0
  81. data/lib/metasm/mips/encode.rb +51 -0
  82. data/lib/metasm/mips/main.rb +72 -0
  83. data/lib/metasm/mips/opcodes.rb +443 -0
  84. data/lib/metasm/mips/parse.rb +51 -0
  85. data/lib/metasm/mips/render.rb +43 -0
  86. data/lib/metasm/os/gnu_exports.rb +270 -0
  87. data/lib/metasm/os/linux.rb +1112 -0
  88. data/lib/metasm/os/main.rb +1686 -0
  89. data/lib/metasm/os/remote.rb +527 -0
  90. data/lib/metasm/os/windows.rb +2027 -0
  91. data/lib/metasm/os/windows_exports.rb +745 -0
  92. data/lib/metasm/parse.rb +876 -0
  93. data/lib/metasm/parse_c.rb +3938 -0
  94. data/lib/metasm/pic16c/decode.rb +42 -0
  95. data/lib/metasm/pic16c/main.rb +17 -0
  96. data/lib/metasm/pic16c/opcodes.rb +68 -0
  97. data/lib/metasm/ppc.rb +11 -0
  98. data/lib/metasm/ppc/decode.rb +264 -0
  99. data/lib/metasm/ppc/decompile.rb +251 -0
  100. data/lib/metasm/ppc/encode.rb +51 -0
  101. data/lib/metasm/ppc/main.rb +129 -0
  102. data/lib/metasm/ppc/opcodes.rb +410 -0
  103. data/lib/metasm/ppc/parse.rb +52 -0
  104. data/lib/metasm/preprocessor.rb +1277 -0
  105. data/lib/metasm/render.rb +130 -0
  106. data/lib/metasm/sh4.rb +8 -0
  107. data/lib/metasm/sh4/decode.rb +336 -0
  108. data/lib/metasm/sh4/main.rb +292 -0
  109. data/lib/metasm/sh4/opcodes.rb +381 -0
  110. data/lib/metasm/x86_64.rb +12 -0
  111. data/lib/metasm/x86_64/compile_c.rb +1025 -0
  112. data/lib/metasm/x86_64/debug.rb +59 -0
  113. data/lib/metasm/x86_64/decode.rb +268 -0
  114. data/lib/metasm/x86_64/encode.rb +264 -0
  115. data/lib/metasm/x86_64/main.rb +135 -0
  116. data/lib/metasm/x86_64/opcodes.rb +118 -0
  117. data/lib/metasm/x86_64/parse.rb +68 -0
  118. data/misc/bottleneck.rb +61 -0
  119. data/misc/cheader-findpppath.rb +58 -0
  120. data/misc/hexdiff.rb +74 -0
  121. data/misc/hexdump.rb +55 -0
  122. data/misc/metasm-all.rb +13 -0
  123. data/misc/objdiff.rb +47 -0
  124. data/misc/objscan.rb +40 -0
  125. data/misc/pdfparse.rb +661 -0
  126. data/misc/ppc_pdf2oplist.rb +192 -0
  127. data/misc/tcp_proxy_hex.rb +84 -0
  128. data/misc/txt2html.rb +440 -0
  129. data/samples/a.out.rb +31 -0
  130. data/samples/asmsyntax.rb +77 -0
  131. data/samples/bindiff.rb +555 -0
  132. data/samples/compilation-steps.rb +49 -0
  133. data/samples/cparser_makestackoffset.rb +55 -0
  134. data/samples/dasm-backtrack.rb +38 -0
  135. data/samples/dasmnavig.rb +318 -0
  136. data/samples/dbg-apihook.rb +228 -0
  137. data/samples/dbghelp.rb +143 -0
  138. data/samples/disassemble-gui.rb +102 -0
  139. data/samples/disassemble.rb +133 -0
  140. data/samples/dump_upx.rb +95 -0
  141. data/samples/dynamic_ruby.rb +1929 -0
  142. data/samples/elf_list_needed.rb +46 -0
  143. data/samples/elf_listexports.rb +33 -0
  144. data/samples/elfencode.rb +25 -0
  145. data/samples/exeencode.rb +128 -0
  146. data/samples/factorize-headers-elfimports.rb +77 -0
  147. data/samples/factorize-headers-peimports.rb +109 -0
  148. data/samples/factorize-headers.rb +43 -0
  149. data/samples/gdbclient.rb +583 -0
  150. data/samples/generate_libsigs.rb +102 -0
  151. data/samples/hotfix_gtk_dbg.rb +59 -0
  152. data/samples/install_win_env.rb +78 -0
  153. data/samples/lindebug.rb +924 -0
  154. data/samples/linux_injectsyscall.rb +95 -0
  155. data/samples/machoencode.rb +31 -0
  156. data/samples/metasm-shell.rb +91 -0
  157. data/samples/pe-hook.rb +69 -0
  158. data/samples/pe-ia32-cpuid.rb +203 -0
  159. data/samples/pe-mips.rb +35 -0
  160. data/samples/pe-shutdown.rb +78 -0
  161. data/samples/pe-testrelocs.rb +51 -0
  162. data/samples/pe-testrsrc.rb +24 -0
  163. data/samples/pe_listexports.rb +31 -0
  164. data/samples/peencode.rb +19 -0
  165. data/samples/peldr.rb +494 -0
  166. data/samples/preprocess-flatten.rb +19 -0
  167. data/samples/r0trace.rb +308 -0
  168. data/samples/rubstop.rb +399 -0
  169. data/samples/scan_pt_gnu_stack.rb +54 -0
  170. data/samples/scanpeexports.rb +62 -0
  171. data/samples/shellcode-c.rb +40 -0
  172. data/samples/shellcode-dynlink.rb +146 -0
  173. data/samples/source.asm +34 -0
  174. data/samples/struct_offset.rb +47 -0
  175. data/samples/testpe.rb +32 -0
  176. data/samples/testraw.rb +45 -0
  177. data/samples/win32genloader.rb +132 -0
  178. data/samples/win32hooker-advanced.rb +169 -0
  179. data/samples/win32hooker.rb +96 -0
  180. data/samples/win32livedasm.rb +33 -0
  181. data/samples/win32remotescan.rb +133 -0
  182. data/samples/wintrace.rb +92 -0
  183. data/tests/all.rb +8 -0
  184. data/tests/dasm.rb +39 -0
  185. data/tests/dynldr.rb +35 -0
  186. data/tests/encodeddata.rb +132 -0
  187. data/tests/ia32.rb +82 -0
  188. data/tests/mips.rb +116 -0
  189. data/tests/parse_c.rb +239 -0
  190. data/tests/preprocessor.rb +269 -0
  191. data/tests/x86_64.rb +62 -0
  192. metadata +255 -0
@@ -0,0 +1,1280 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+ # this file compliments disassemble.rb, adding misc user-friendly methods
7
+
8
+ module Metasm
9
+ class InstructionBlock
10
+ # adds an address to the from_normal/from_subfuncret list
11
+ def add_from(addr, type=:normal)
12
+ send "add_from_#{type}", addr
13
+ end
14
+ def add_from_normal(addr)
15
+ @from_normal ||= []
16
+ @from_normal |= [addr]
17
+ end
18
+ def add_from_subfuncret(addr)
19
+ @from_subfuncret ||= []
20
+ @from_subfuncret |= [addr]
21
+ end
22
+ def add_from_indirect(addr)
23
+ @from_indirect ||= []
24
+ @from_indirect |= [addr]
25
+ end
26
+ # iterates over every from address, yields [address, type in [:normal, :subfuncret, :indirect]]
27
+ def each_from
28
+ each_from_normal { |a| yield a, :normal }
29
+ each_from_subfuncret { |a| yield a, :subfuncret }
30
+ each_from_indirect { |a| yield a, :indirect }
31
+ end
32
+ def each_from_normal(&b)
33
+ @from_normal.each(&b) if from_normal
34
+ end
35
+ def each_from_subfuncret(&b)
36
+ @from_subfuncret.each(&b) if from_subfuncret
37
+ end
38
+ def each_from_indirect(&b)
39
+ @from_indirect.each(&b) if from_indirect
40
+ end
41
+
42
+ def add_to(addr, type=:normal)
43
+ send "add_to_#{type}", addr
44
+ end
45
+ def add_to_normal(addr)
46
+ @to_normal ||= []
47
+ @to_normal |= [addr]
48
+ end
49
+ def add_to_subfuncret(addr)
50
+ @to_subfuncret ||= []
51
+ @to_subfuncret |= [addr]
52
+ end
53
+ def add_to_indirect(addr)
54
+ @to_indirect ||= []
55
+ @to_indirect |= [addr]
56
+ end
57
+ def each_to
58
+ each_to_normal { |a| yield a, :normal }
59
+ each_to_subfuncret { |a| yield a, :subfuncret }
60
+ each_to_indirect { |a| yield a, :indirect }
61
+ end
62
+ def each_to_normal(&b)
63
+ @to_normal.each(&b) if to_normal
64
+ end
65
+ def each_to_subfuncret(&b)
66
+ @to_subfuncret.each(&b) if to_subfuncret
67
+ end
68
+ def each_to_indirect(&b)
69
+ @to_indirect.each(&b) if to_indirect
70
+ end
71
+
72
+ # yields all from that are from the same function
73
+ def each_from_samefunc(dasm, &b)
74
+ return if dasm.function[address]
75
+ @from_subfuncret.each(&b) if from_subfuncret
76
+ @from_normal.each(&b) if from_normal
77
+ end
78
+
79
+ # yields all from that are not in the same subfunction as this block
80
+ def each_from_otherfunc(dasm, &b)
81
+ @from_normal.each(&b) if from_normal and dasm.function[address]
82
+ @from_subfuncret.each(&b) if from_subfuncret and dasm.function[address]
83
+ @from_indirect.each(&b) if from_indirect
84
+ end
85
+
86
+ # yields all to that are in the same subfunction as this block
87
+ def each_to_samefunc(dasm)
88
+ each_to { |to, type|
89
+ next if type != :normal and type != :subfuncret
90
+ to = dasm.normalize(to)
91
+ yield to if not dasm.function[to]
92
+ }
93
+ end
94
+
95
+ # yields all to that are not in the same subfunction as this block
96
+ def each_to_otherfunc(dasm)
97
+ each_to { |to, type|
98
+ to = dasm.normalize(to)
99
+ yield to if type == :indirect or dasm.function[to] or not dasm.decoded[to]
100
+ }
101
+ end
102
+ end
103
+
104
+ class DecodedInstruction
105
+ # checks if this instruction is the first of its IBlock
106
+ def block_head?
107
+ self == @block.list.first
108
+ end
109
+ end
110
+
111
+ class CPU
112
+ # compat alias, for scripts using older version of metasm
113
+ def get_backtrace_binding(di) backtrace_binding(di) end
114
+
115
+ # return something like backtrace_binding in the forward direction
116
+ # set pc_reg to some reg name (eg :pc) to include effects on the instruction pointer
117
+ def get_fwdemu_binding(di, pc_reg=nil)
118
+ fdi = di.backtrace_binding ||= get_backtrace_binding(di)
119
+ # find self-updated regs & revert them in simultaneous affectations
120
+ # XXX handles only a <- a+i for now, this covers all useful cases (except imul eax, eax, 42 jz foobar)
121
+ fdi.keys.grep(::Symbol).each { |s|
122
+ val = Expression[fdi[s]]
123
+ next if val.lexpr != s or (val.op != :+ and val.op != :-) #or not val.rexpr.kind_of? ::Integer
124
+ fwd = { s => val }
125
+ inv = { s => val.dup }
126
+ inv[s].op = ((inv[s].op == :+) ? :- : :+)
127
+ nxt = {}
128
+ fdi.each { |k, v|
129
+ if k == s
130
+ nxt[k] = v
131
+ else
132
+ k = k.bind(fwd).reduce_rec if k.kind_of? Indirection
133
+ nxt[k] = Expression[Expression[v].bind(inv).reduce_rec]
134
+ end
135
+ }
136
+ fdi = nxt
137
+ }
138
+ if pc_reg
139
+ if di.opcode.props[:setip]
140
+ xr = get_xrefs_x(nil, di)
141
+ if xr and xr.length == 1
142
+ fdi[pc_reg] = xr[0]
143
+ else
144
+ fdi[:incomplete_binding] = Expression[1]
145
+ end
146
+ else
147
+ fdi[pc_reg] = Expression[pc_reg, :+, di.bin_length]
148
+ end
149
+ end
150
+ fdi
151
+ end
152
+ end
153
+
154
+ class Disassembler
155
+ # access the default value for @@backtrace_maxblocks for newly created Disassemblers
156
+ def self.backtrace_maxblocks ; @@backtrace_maxblocks ; end
157
+ def self.backtrace_maxblocks=(b) ; @@backtrace_maxblocks = b ; end
158
+
159
+ # returns the dasm section's edata containing addr
160
+ # its #ptr points to addr
161
+ # returns the 1st element of #get_section_at
162
+ def get_edata_at(addr)
163
+ if s = get_section_at(addr)
164
+ s[0]
165
+ end
166
+ end
167
+
168
+ # returns the DecodedInstruction at addr if it exists
169
+ def di_at(addr)
170
+ di = @decoded[addr] || @decoded[normalize(addr)] if addr
171
+ di if di.kind_of? DecodedInstruction
172
+ end
173
+
174
+ # returns the InstructionBlock containing the address at addr
175
+ def block_at(addr)
176
+ di = di_at(addr)
177
+ di.block if di
178
+ end
179
+
180
+ # returns the DecodedFunction at addr if it exists
181
+ def function_at(addr)
182
+ f = @function[addr] || @function[normalize(addr)] if addr
183
+ f if f.kind_of? DecodedFunction
184
+ end
185
+
186
+ # returns the DecodedInstruction covering addr
187
+ # returns one at starting nearest addr if multiple are available (overlapping instrs)
188
+ def di_including(addr)
189
+ return if not addr
190
+ addr = normalize(addr)
191
+ if off = (0...16).find { |o| @decoded[addr-o].kind_of? DecodedInstruction and @decoded[addr-o].bin_length > o }
192
+ @decoded[addr-off]
193
+ end
194
+ end
195
+
196
+ # returns the InstructionBlock containing the byte at addr
197
+ # returns the one of di_including() on multiple matches (overlapping instrs)
198
+ def block_including(addr)
199
+ di = di_including(addr)
200
+ di.block if di
201
+ end
202
+
203
+ # returns the DecodedFunction including this byte
204
+ # return the one of find_function_start() if multiple are possible (block shared by multiple funcs)
205
+ def function_including(addr)
206
+ return if not di = di_including(addr)
207
+ function_at(find_function_start(di.address))
208
+ end
209
+
210
+ # yields every InstructionBlock
211
+ # returns the list of IBlocks
212
+ def each_instructionblock
213
+ ret = []
214
+ @decoded.each { |addr, di|
215
+ next if not di.kind_of? DecodedInstruction or not di.block_head?
216
+ ret << di.block
217
+ yield di.block if block_given?
218
+ }
219
+ ret
220
+ end
221
+ alias instructionblocks each_instructionblock
222
+
223
+ # return a backtrace_binding reversed (akin to code emulation) (but not really)
224
+ def get_fwdemu_binding(di, pc=nil)
225
+ @cpu.get_fwdemu_binding(di, pc)
226
+ end
227
+
228
+ # reads len raw bytes from the mmaped address space
229
+ def read_raw_data(addr, len)
230
+ if e = get_section_at(addr)
231
+ e[0].read(len)
232
+ end
233
+ end
234
+
235
+ # read an int of arbitrary type (:u8, :i32, ...)
236
+ def decode_int(addr, type)
237
+ type = "u#{type*8}".to_sym if type.kind_of? Integer
238
+ if e = get_section_at(addr)
239
+ e[0].decode_imm(type, @cpu.endianness)
240
+ end
241
+ end
242
+
243
+ # read a byte at address addr
244
+ def decode_byte(addr)
245
+ decode_int(addr, :u8)
246
+ end
247
+
248
+ # read a dword at address addr
249
+ # the dword is cpu-sized (eg 32 or 64bits)
250
+ def decode_dword(addr)
251
+ decode_int(addr, @cpu.size/8)
252
+ end
253
+
254
+ # read a zero-terminated string from addr
255
+ # if no terminal 0 is found, return nil
256
+ def decode_strz(addr, maxsz=4096)
257
+ if e = get_section_at(addr)
258
+ str = e[0].read(maxsz).to_s
259
+ return if not len = str.index(?\0)
260
+ str[0, len]
261
+ end
262
+ end
263
+
264
+ # read a zero-terminated wide string from addr
265
+ # return nil if no terminal found
266
+ def decode_wstrz(addr, maxsz=4096)
267
+ if e = get_section_at(addr)
268
+ str = e[0].read(maxsz).to_s
269
+ return if not len = str.unpack('v*').index(0)
270
+ str[0, 2*len]
271
+ end
272
+ end
273
+
274
+ # disassembles one instruction at address
275
+ # returns nil if no instruction can be decoded there
276
+ # does not update any internal state of the disassembler, nor reuse the @decoded cache
277
+ def disassemble_instruction(addr)
278
+ if e = get_section_at(addr)
279
+ @cpu.decode_instruction(e[0], normalize(addr))
280
+ end
281
+ end
282
+
283
+ # disassemble addr as if the code flow came from from_addr
284
+ def disassemble_from(addr, from_addr)
285
+ from_addr = from_addr.address if from_addr.kind_of? DecodedInstruction
286
+ from_addr = normalize(from_addr)
287
+ if b = block_at(from_addr)
288
+ b.add_to_normal(addr)
289
+ end
290
+ @addrs_todo << [addr, from_addr]
291
+ disassemble
292
+ end
293
+
294
+ # returns the label associated to an addr, or nil if none exist
295
+ def get_label_at(addr)
296
+ e, b = get_section_at(addr, false)
297
+ e.inv_export[e.ptr] if e
298
+ end
299
+
300
+ # sets the label for the specified address
301
+ # returns nil if the address is not mapped
302
+ # memcheck is passed to get_section_at to validate that the address is mapped
303
+ def set_label_at(addr, name, memcheck=true)
304
+ addr = Expression[addr].reduce
305
+ e, b = get_section_at(addr, memcheck)
306
+ if not e
307
+ elsif not l = e.inv_export[e.ptr]
308
+ l = @program.new_label(name)
309
+ e.add_export l, e.ptr
310
+ @label_alias_cache = nil
311
+ @old_prog_binding[l] = @prog_binding[l] = b + e.ptr
312
+ elsif l != name
313
+ l = rename_label l, @program.new_label(name)
314
+ end
315
+ l
316
+ end
317
+
318
+ # remove a label at address addr
319
+ def del_label_at(addr, name=get_label_at(addr))
320
+ ed, b = get_section_at(addr)
321
+ if ed and ed.inv_export[ed.ptr]
322
+ ed.del_export name, ed.ptr
323
+ @label_alias_cache = nil
324
+ end
325
+ each_xref(addr) { |xr|
326
+ next if not xr.origin or not o = @decoded[xr.origin] or not o.kind_of? Renderable
327
+ o.each_expr { |e|
328
+ e.lexpr = addr if e.lexpr == name
329
+ e.rexpr = addr if e.rexpr == name
330
+ }
331
+ }
332
+ @old_prog_binding.delete name
333
+ @prog_binding.delete name
334
+ end
335
+
336
+ # changes a label to another, updates referring instructions etc
337
+ # returns the new label
338
+ # the new label must be program-uniq (see @program.new_label)
339
+ def rename_label(old, new)
340
+ each_xref(normalize(old)) { |x|
341
+ next if not di = @decoded[x.origin]
342
+ @cpu.replace_instr_arg_immediate(di.instruction, old, new)
343
+ di.comment.to_a.each { |c| c.gsub!(old, new) }
344
+ }
345
+ e, l = get_section_at(old, false)
346
+ if e
347
+ e.add_export new, e.export.delete(old), true
348
+ end
349
+ raise "cant rename nonexisting label #{old}" if not @prog_binding[old]
350
+ @label_alias_cache = nil
351
+ @old_prog_binding[new] = @prog_binding[new] = @prog_binding.delete(old)
352
+ @addrs_todo.each { |at|
353
+ case at[0]
354
+ when old; at[0] = new
355
+ when Expression; at[0] = at[0].bind(old => new)
356
+ end
357
+ }
358
+
359
+ if @inv_section_reloc[old]
360
+ @inv_section_reloc[old].each { |b, e_, o, r|
361
+ (0..16).each { |off|
362
+ if di = @decoded[Expression[b]+o-off] and di.bin_length > off
363
+ @cpu.replace_instr_arg_immediate(di.instruction, old, new)
364
+ end
365
+ }
366
+ r.target = r.target.bind(old => new)
367
+ }
368
+ @inv_section_reloc[new] = @inv_section_reloc.delete(old)
369
+ end
370
+
371
+ if c_parser and @c_parser.toplevel.symbol[old]
372
+ @c_parser.toplevel.symbol[new] = @c_parser.toplevel.symbol.delete(old)
373
+ @c_parser.toplevel.symbol[new].name = new
374
+ end
375
+
376
+ new
377
+ end
378
+
379
+ # finds the start of a function from the address of an instruction
380
+ def find_function_start(addr)
381
+ addr = addr.address if addr.kind_of? DecodedInstruction
382
+ todo = [addr]
383
+ done = []
384
+ while a = todo.pop
385
+ a = normalize(a)
386
+ di = @decoded[a]
387
+ next if done.include? a or not di.kind_of? DecodedInstruction
388
+ done << a
389
+ a = di.block.address
390
+ break a if @function[a]
391
+ l = []
392
+ di.block.each_from_samefunc(self) { |f| l << f }
393
+ break a if l.empty?
394
+ todo.concat l
395
+ end
396
+ end
397
+
398
+ # iterates over the blocks of a function, yields each func block address
399
+ # returns the graph of blocks (block address => [list of samefunc blocks])
400
+ def each_function_block(addr, incl_subfuncs = false, find_func_start = true)
401
+ addr = @function.index(addr) if addr.kind_of? DecodedFunction
402
+ addr = addr.address if addr.kind_of? DecodedInstruction
403
+ addr = find_function_start(addr) if not @function[addr] and find_func_start
404
+ todo = [addr]
405
+ ret = {}
406
+ while a = todo.pop
407
+ next if not di = di_at(a)
408
+ a = di.block.address
409
+ next if ret[a]
410
+ ret[a] = []
411
+ yield a if block_given?
412
+ di.block.each_to_samefunc(self) { |f| ret[a] << f ; todo << f }
413
+ di.block.each_to_otherfunc(self) { |f| ret[a] << f ; todo << f } if incl_subfuncs
414
+ end
415
+ ret
416
+ end
417
+ alias function_blocks each_function_block
418
+
419
+ # returns a graph of function calls
420
+ # for each func passed as arg (default: all), update the 'ret' hash
421
+ # associating func => [list of direct subfuncs called]
422
+ def function_graph(funcs = @function.keys + @entrypoints.to_a, ret={})
423
+ funcs = funcs.map { |f| normalize(f) }.uniq.find_all { |f| @decoded[f] }
424
+ funcs.each { |f|
425
+ next if ret[f]
426
+ ret[f] = []
427
+ each_function_block(f) { |b|
428
+ @decoded[b].block.each_to_otherfunc(self) { |sf|
429
+ ret[f] |= [sf]
430
+ }
431
+ }
432
+ }
433
+ ret
434
+ end
435
+
436
+ # return the graph of function => subfunction list
437
+ # recurses from an entrypoint
438
+ def function_graph_from(addr)
439
+ addr = normalize(addr)
440
+ addr = find_function_start(addr) || addr
441
+ ret = {}
442
+ osz = ret.length-1
443
+ while ret.length != osz
444
+ osz = ret.length
445
+ function_graph(ret.values.flatten + [addr], ret)
446
+ end
447
+ ret
448
+ end
449
+
450
+ # return the graph of function => subfunction list
451
+ # for which a (sub-sub)function includes addr
452
+ def function_graph_to(addr)
453
+ addr = normalize(addr)
454
+ addr = find_function_start(addr) || addr
455
+ full = function_graph
456
+ ret = {}
457
+ todo = [addr]
458
+ done = []
459
+ while a = todo.pop
460
+ next if done.include? a
461
+ done << a
462
+ full.each { |f, sf|
463
+ next if not sf.include? a
464
+ ret[f] ||= []
465
+ ret[f] |= [a]
466
+ todo << f
467
+ }
468
+ end
469
+ ret
470
+ end
471
+
472
+ # returns info on sections, from @program if supported
473
+ # returns an array of [name, addr, length, info]
474
+ def section_info
475
+ if @program.respond_to? :section_info
476
+ @program.section_info
477
+ else
478
+ list = []
479
+ @sections.each { |k, v|
480
+ list << [get_label_at(k), normalize(k), v.length, nil]
481
+ }
482
+ list
483
+ end
484
+ end
485
+
486
+ # transform an address into a file offset
487
+ def addr_to_fileoff(addr)
488
+ addr = normalize(addr)
489
+ @program.addr_to_fileoff(addr)
490
+ end
491
+
492
+ # transform a file offset into an address
493
+ def fileoff_to_addr(foff)
494
+ @program.fileoff_to_addr(foff)
495
+ end
496
+
497
+ # remove the decodedinstruction from..to, replace them by the new Instructions in 'by'
498
+ # this updates the block list structure, old di will still be visible in @decoded, except from original block (those are deleted)
499
+ # if from..to spans multiple blocks
500
+ # to.block is splitted after to
501
+ # all path from from are replaced by a single link to after 'to', be careful !
502
+ # (eg a->b->... & a->c ; from in a, to in c => a->b is lost)
503
+ # all instructions are stuffed in the first block
504
+ # paths are only walked using from/to_normal
505
+ # 'by' may be empty
506
+ # returns the block containing the new instrs (nil if empty)
507
+ def replace_instrs(from, to, by)
508
+ raise 'bad from' if not fdi = di_at(from) or not fdi.block.list.index(fdi)
509
+ raise 'bad to' if not tdi = di_at(to) or not tdi.block.list.index(tdi)
510
+
511
+ # create DecodedInstruction from Instructions in 'by' if needed
512
+ split_block(fdi.block, fdi.address)
513
+ split_block(tdi.block, tdi.block.list[tdi.block.list.index(tdi)+1].address) if tdi != tdi.block.list.last
514
+ fb = fdi.block
515
+ tb = tdi.block
516
+
517
+ # generate DecodedInstr from Instrs
518
+ # try to keep the bin_length of original block
519
+ wantlen = tdi.address + tdi.bin_length - fb.address
520
+ wantlen -= by.grep(DecodedInstruction).inject(0) { |len, di| len + di.bin_length }
521
+ ldi = by.last
522
+ ldi = DecodedInstruction.new(ldi) if ldi.kind_of? Instruction
523
+ wantlen = by.grep(Instruction).length if wantlen < 0 or (ldi and ldi.opcode.props[:setip])
524
+ by.map! { |di|
525
+ if di.kind_of? Instruction
526
+ di = DecodedInstruction.new(di)
527
+ wantlen -= di.bin_length = wantlen / by.grep(Instruction).length
528
+ end
529
+ di
530
+ }
531
+
532
+ #puts " ** patch next_addr to #{Expression[tb.list.last.next_addr]}" if not by.empty? and by.last.opcode.props[:saveip]
533
+ by.last.next_addr = tb.list.last.next_addr if not by.empty? and by.last.opcode.props[:saveip]
534
+ fb.list.each { |di| @decoded.delete di.address }
535
+ fb.list.clear
536
+ tb.list.each { |di| @decoded.delete di.address }
537
+ tb.list.clear
538
+ by.each { |di| fb.add_di di }
539
+ by.each_with_index { |di, i|
540
+ if odi = di_at(di.address)
541
+ # collision, hopefully with another deobfuscation run ?
542
+ if by[i..-1].all? { |mydi| mydi.to_s == @decoded[mydi.address].to_s }
543
+ puts "replace_instrs: merge at #{di}" if $DEBUG
544
+ by[i..-1] = by[i..-1].map { |xdi| @decoded[xdi.address] }
545
+ by[i..-1].each { fb.list.pop }
546
+ split_block(odi.block, odi.address)
547
+ tb.to_normal = [di.address]
548
+ (odi.block.from_normal ||= []) << to
549
+ odi.block.from_normal.uniq!
550
+ break
551
+ else
552
+ #raise "replace_instrs: collision #{di} vs #{odi}"
553
+ puts "replace_instrs: collision #{di} vs #{odi}" if $VERBOSE
554
+ while @decoded[di.address].kind_of? DecodedInstruction # find free space.. raise ?
555
+ di.address += 1 # XXX use floats ?
556
+ di.bin_length -= 1
557
+ end
558
+ end
559
+ end
560
+ @decoded[di.address] = di
561
+ }
562
+ @addrs_done.delete_if { |ad| normalize(ad[0]) == tb.address or ad[1] == tb.address }
563
+ @addrs_done.delete_if { |ad| normalize(ad[0]) == fb.address or ad[1] == fb.address } if by.empty? and tb.address != fb.address
564
+
565
+ # update to_normal/from_normal
566
+ fb.to_normal = tb.to_normal
567
+ fb.to_normal.to_a.each { |newto|
568
+ # other paths may already point to newto, we must only update the relevant entry
569
+ if ndi = di_at(newto) and idx = ndi.block.from_normal.to_a.index(to)
570
+ if by.empty?
571
+ ndi.block.from_normal[idx,1] = fb.from_normal.to_a
572
+ else
573
+ ndi.block.from_normal[idx] = fb.list.last.address
574
+ end
575
+ end
576
+ }
577
+
578
+ fb.to_subfuncret = tb.to_subfuncret
579
+ fb.to_subfuncret.to_a.each { |newto|
580
+ if ndi = di_at(newto) and idx = ndi.block.from_subfuncret.to_a.index(to)
581
+ if by.empty?
582
+ ndi.block.from_subfuncret[idx,1] = fb.from_subfuncret.to_a
583
+ else
584
+ ndi.block.from_subfuncret[idx] = fb.list.last.address
585
+ end
586
+ end
587
+ }
588
+
589
+ if by.empty?
590
+ tb.to_subfuncret = nil if tb.to_subfuncret == []
591
+ tolist = tb.to_subfuncret || tb.to_normal.to_a
592
+ if lfrom = get_label_at(fb.address) and tolist.length == 1
593
+ lto = auto_label_at(tolist.first)
594
+ each_xref(fb.address, :x) { |x|
595
+ next if not di = @decoded[x.origin]
596
+ @cpu.replace_instr_arg_immediate(di.instruction, lfrom, lto)
597
+ di.comment.to_a.each { |c| c.gsub!(lfrom, lto) }
598
+ }
599
+ end
600
+ fb.from_normal.to_a.each { |newfrom|
601
+ if ndi = di_at(newfrom) and idx = ndi.block.to_normal.to_a.index(from)
602
+ ndi.block.to_normal[idx..idx] = tolist
603
+ end
604
+ }
605
+ fb.from_subfuncret.to_a.each { |newfrom|
606
+ if ndi = di_at(newfrom) and idx = ndi.block.to_subfuncret.to_a.index(from)
607
+ ndi.block.to_subfuncret[idx..idx] = tolist
608
+ end
609
+ }
610
+ else
611
+ # merge with adjacent blocks
612
+ merge_blocks(fb, fb.to_normal.first) if fb.to_normal.to_a.length == 1 and di_at(fb.to_normal.first)
613
+ merge_blocks(fb.from_normal.first, fb) if fb.from_normal.to_a.length == 1 and di_at(fb.from_normal.first)
614
+ end
615
+
616
+ fb if not by.empty?
617
+ end
618
+
619
+ # undefine a sequence of decodedinstructions from an address
620
+ # stops at first non-linear branch
621
+ # removes @decoded, @comments, @xrefs, @addrs_done
622
+ # does not update @prog_binding (does not undefine labels)
623
+ def undefine_from(addr)
624
+ return if not di_at(addr)
625
+ @comment.delete addr if @function.delete addr
626
+ split_block(addr)
627
+ addrs = []
628
+ while di = di_at(addr)
629
+ di.block.list.each { |ddi| addrs << ddi.address }
630
+ break if di.block.to_subfuncret.to_a != [] or di.block.to_normal.to_a.length != 1
631
+ addr = di.block.to_normal.first
632
+ break if ndi = di_at(addr) and ndi.block.from_normal.to_a.length != 1
633
+ end
634
+ addrs.each { |a| @decoded.delete a }
635
+ @xrefs.delete_if { |a, x|
636
+ if not x.kind_of? Array
637
+ true if x and addrs.include? x.origin
638
+ else
639
+ x.delete_if { |xx| addrs.include? xx.origin }
640
+ true if x.empty?
641
+ end
642
+ }
643
+ @addrs_done.delete_if { |ad| !(addrs & [normalize(ad[0]), normalize(ad[1])]).empty? }
644
+ end
645
+
646
+ # merge two instruction blocks if they form a simple chain and are adjacent
647
+ # returns true if merged
648
+ def merge_blocks(b1, b2, allow_nonadjacent = false)
649
+ if b1 and not b1.kind_of? InstructionBlock
650
+ return if not b1 = block_at(b1)
651
+ end
652
+ if b2 and not b2.kind_of? InstructionBlock
653
+ return if not b2 = block_at(b2)
654
+ end
655
+ if b1 and b2 and (allow_nonadjacent or b1.list.last.next_addr == b2.address) and
656
+ b1.to_normal.to_a == [b2.address] and b2.from_normal.to_a.length == 1 and # that handles delay_slot
657
+ b1.to_subfuncret.to_a == [] and b2.from_subfuncret.to_a == [] and
658
+ b1.to_indirect.to_a == [] and b2.from_indirect.to_a == []
659
+ b2.list.each { |di| b1.add_di di }
660
+ b1.to_normal = b2.to_normal
661
+ b2.list.clear
662
+ @addrs_done.delete_if { |ad| normalize(ad[0]) == b2.address }
663
+ true
664
+ end
665
+ end
666
+
667
+ # computes the binding of a code sequence
668
+ # just a forwarder to CPU#code_binding
669
+ def code_binding(*a)
670
+ @cpu.code_binding(self, *a)
671
+ end
672
+
673
+ # returns an array of instructions/label that, once parsed and assembled, should
674
+ # give something equivalent to the code accessible from the (list of) entrypoints given
675
+ # from the @decoded dasm graph
676
+ # assume all jump targets have a matching label in @prog_binding
677
+ # may add inconditionnal jumps in the listing to preserve the code flow
678
+ def flatten_graph(entry, include_subfunc=true)
679
+ ret = []
680
+ entry = [entry] if not entry.kind_of? Array
681
+ todo = entry.map { |a| normalize(a) }
682
+ done = []
683
+ inv_binding = @prog_binding.invert
684
+ while addr = todo.pop
685
+ next if done.include? addr or not di_at(addr)
686
+ done << addr
687
+ b = @decoded[addr].block
688
+
689
+ ret << Label.new(inv_binding[addr]) if inv_binding[addr]
690
+ ret.concat b.list.map { |di| di.instruction }
691
+
692
+ b.each_to_otherfunc(self) { |to|
693
+ to = normalize to
694
+ todo.unshift to if include_subfunc
695
+ }
696
+ b.each_to_samefunc(self) { |to|
697
+ to = normalize to
698
+ todo << to
699
+ }
700
+
701
+ if not di = b.list[-1-@cpu.delay_slot] or not di.opcode.props[:stopexec] or di.opcode.props[:saveip]
702
+ to = b.list.last.next_addr
703
+ if todo.include? to
704
+ if done.include? to or not di_at(to)
705
+ if not to_l = inv_binding[to]
706
+ to_l = auto_label_at(to, 'loc')
707
+ if done.include? to and idx = ret.index(@decoded[to].block.list.first.instruction)
708
+ ret.insert(idx, Label.new(to_l))
709
+ end
710
+ end
711
+ ret << @cpu.instr_uncond_jump_to(to_l)
712
+ else
713
+ todo << to # ensure it's next in the listing
714
+ end
715
+ end
716
+ end
717
+ end
718
+
719
+ ret
720
+ end
721
+
722
+ # returns a demangled C++ name
723
+ # from wgcc-2.2.2/undecorate.cpp
724
+ # TODO
725
+ def demangle_cppname(name)
726
+ ret = name
727
+ if name[0] == ??
728
+ name = name[1..-1]
729
+ if name[0] == ??
730
+ name = name[1..-1]
731
+ op = name[0, 1]
732
+ op = name[0, 2] if op == '_'
733
+ if op = {
734
+ '2' => "new", '3' => "delete", '4' => "=", '5' => ">>", '6' => "<<", '7' => "!", '8' => "==", '9' => "!=",
735
+ 'A' => "[]", 'C' => "->", 'D' => "*", 'E' => "++", 'F' => "--", 'G' => "-", 'H' => "+", 'I' => "&",
736
+ 'J' => "->*", 'K' => "/", 'L' => "%", 'M' => "<", 'N' => "<=", 'O' => ">", 'P' => ">=", 'Q' => ",",
737
+ 'R' => "()", 'S' => "~", 'T' => "^", 'U' => "|", 'V' => "&&", 'W' => "||", 'X' => "*=", 'Y' => "+=",
738
+ 'Z' => "-=", '_0' => "/=", '_1' => "%=", '_2' => ">>=", '_3' => "<<=", '_4' => "&=", '_5' => "|=", '_6' => "^=",
739
+ '_7' => "`vftable'", '_8' => "`vbtable'", '_9' => "`vcall'", '_A' => "`typeof'", '_B' => "`local static guard'",
740
+ '_C' => "`string'", '_D' => "`vbase destructor'", '_E' => "`vector deleting destructor'", '_F' => "`default constructor closure'",
741
+ '_G' => "`scalar deleting destructor'", '_H' => "`vector constructor iterator'", '_I' => "`vector destructor iterator'",
742
+ '_J' => "`vector vbase constructor iterator'", '_K' => "`virtual displacement map'", '_L' => "`eh vector constructor iterator'",
743
+ '_M' => "`eh vector destructor iterator'", '_N' => "`eh vector vbase constructor iterator'", '_O' => "`copy constructor closure'",
744
+ '_S' => "`local vftable'", '_T' => "`local vftable constructor closure'", '_U' => "new[]", '_V' => "delete[]",
745
+ '_X' => "`placement delete closure'", '_Y' => "`placement delete[] closure'"}[op]
746
+ ret = op[0] == ?` ? op[1..-2] : "op_#{op}"
747
+ end
748
+ end
749
+ end
750
+ # TODO
751
+ ret
752
+ end
753
+
754
+ # scans all the sections raw for a given regexp
755
+ # return/yields all the addresses matching
756
+ # if yield returns nil/false, do not include the addr in the final result
757
+ # sections are scanned MB by MB, so this should work (slowly) on 4GB sections (eg debugger VM)
758
+ def pattern_scan(pat, chunksz=nil, margin=nil)
759
+ chunksz ||= 4*1024*1024 # scan 4MB at a time
760
+ margin ||= 65536 # add this much bytes at each chunk to find /pat/ over chunk boundaries
761
+
762
+ pat = Regexp.new(Regexp.escape(pat)) if pat.kind_of? ::String
763
+
764
+ found = []
765
+ @sections.each { |sec_addr, e|
766
+ e.pattern_scan(pat, chunksz, margin) { |eo|
767
+ match_addr = sec_addr + eo
768
+ found << match_addr if not block_given? or yield(match_addr)
769
+ false
770
+ }
771
+ }
772
+ found
773
+ end
774
+
775
+ # returns/yields [addr, string] found using pattern_scan /[\x20-\x7e]/
776
+ def strings_scan(minlen=6)
777
+ ret = []
778
+ nexto = 0
779
+ pattern_scan(/[\x20-\x7e]{#{minlen},}/m, nil, 1024) { |o|
780
+ if o - nexto > 0
781
+ next unless e = get_edata_at(o)
782
+ str = e.data[e.ptr, 1024][/[\x20-\x7e]{#{minlen},}/m]
783
+ ret << [o, str] if not block_given? or yield(o, str)
784
+ nexto = o + str.length
785
+ end
786
+ }
787
+ ret
788
+ end
789
+
790
+ # exports the addr => symbol map (see load_map)
791
+ def save_map
792
+ @prog_binding.map { |l, o|
793
+ type = di_at(o) ? 'c' : 'd' # XXX
794
+ o = o.to_s(16).rjust(8, '0') if o.kind_of? ::Integer
795
+ "#{o} #{type} #{l}"
796
+ }
797
+ end
798
+
799
+ # loads a map file (addr => symbol)
800
+ # off is an optionnal offset to add to every address found (for eg rebased binaries)
801
+ # understands:
802
+ # standard map files (eg linux-kernel.map: <addr> <type> <name>, e.g. 'c01001ba t setup_idt')
803
+ # ida map files (<sectionidx>:<sectionoffset> <name>)
804
+ # arg is either the map itself or the filename of the map (if it contains no newline)
805
+ def load_map(str, off=0)
806
+ str = File.read(str) rescue nil if not str.index("\n")
807
+ sks = @sections.keys.sort
808
+ str.each_line { |l|
809
+ case l.strip
810
+ when /^([0-9A-F]+)\s+(\w+)\s+(\w+)/i # kernel.map style
811
+ set_label_at($1.to_i(16)+off, $3)
812
+ when /^([0-9A-F]+):([0-9A-F]+)\s+([a-z_]\w+)/i # IDA style
813
+ # we do not have section load order, let's just hope that the addresses are sorted (and sortable..)
814
+ # could check the 1st part of the file, with section sizes, but it is not very convenient
815
+ # the regexp is so that we skip the 1st part with section descriptions
816
+ # in the file, section 1 is the 1st section ; we have an additionnal section (exe header) which fixes the 0-index
817
+ set_label_at(sks[$1.to_i(16)] + $2.to_i(16) + off, $3)
818
+ end
819
+ }
820
+ end
821
+
822
+ # saves the dasm state in a file
823
+ def save_file(file)
824
+ tmpfile = file + '.tmp'
825
+ File.open(tmpfile, 'wb') { |fd| save_io(fd) }
826
+ File.rename tmpfile, file
827
+ end
828
+
829
+ # saves the dasm state to an IO
830
+ def save_io(fd)
831
+ fd.puts 'Metasm.dasm'
832
+
833
+ if @program.filename
834
+ t = @program.filename.to_s
835
+ fd.puts "binarypath #{t.length}", t
836
+ else
837
+ t = "#{@cpu.class.name.sub(/.*::/, '')} #{@cpu.size} #{@cpu.endianness}"
838
+ fd.puts "cpu #{t.length}", t
839
+ # XXX will be reloaded as a Shellcode with this CPU, but it may be a custom EXE
840
+ end
841
+
842
+ @sections.each { |a, e|
843
+ # forget edata exports/relocs
844
+ # dump at most 16Mo per section
845
+ t = "#{Expression[a]} #{e.length}\n" +
846
+ [e.data[0, 2**24].to_str].pack('m*')
847
+ fd.puts "section #{t.length}", t
848
+ }
849
+
850
+ t = save_map.join("\n")
851
+ fd.puts "map #{t.length}", t
852
+
853
+ t = @decoded.map { |a, d|
854
+ next if not d.kind_of? DecodedInstruction
855
+ "#{Expression[a]},#{d.bin_length} #{d.instruction}#{" ; #{d.comment.join(' ')}" if d.comment}"
856
+ }.compact.sort.join("\n")
857
+ fd.puts "decoded #{t.length}", t
858
+
859
+ t = @comment.map { |a, c|
860
+ c.map { |l| l.chomp }.join("\n").split("\n").map { |lc| "#{Expression[a]} #{lc.chomp}" }
861
+ }.join("\n")
862
+ fd.puts "comment #{t.length}", t
863
+
864
+ bl = @decoded.values.map { |d|
865
+ d.block if d.kind_of? DecodedInstruction and d.block_head?
866
+ }.compact
867
+ t = bl.map { |b|
868
+ [Expression[b.address],
869
+ b.list.map { |d| Expression[d.address] }.join(','),
870
+ b.to_normal.to_a.map { |t_| Expression[t_] }.join(','),
871
+ b.to_subfuncret.to_a.map { |t_| Expression[t_] }.join(','),
872
+ b.to_indirect.to_a.map { |t_| Expression[t_] }.join(','),
873
+ b.from_normal.to_a.map { |t_| Expression[t_] }.join(','),
874
+ b.from_subfuncret.to_a.map { |t_| Expression[t_] }.join(','),
875
+ b.from_indirect.to_a.map { |t_| Expression[t_] }.join(','),
876
+ ].join(';')
877
+ }.sort.join("\n")
878
+ fd.puts "blocks #{t.length}", t
879
+
880
+ t = @function.map { |a, f|
881
+ next if not @decoded[a]
882
+ [a, *f.return_address.to_a].map { |e| Expression[e] }.join(',')
883
+ }.compact.sort.join("\n")
884
+ # TODO binding ?
885
+ fd.puts "funcs #{t.length}", t
886
+
887
+ t = @xrefs.map { |a, x|
888
+ a = ':default' if a == :default
889
+ a = ':unknown' if a == Expression::Unknown
890
+ # XXX origin
891
+ case x
892
+ when nil
893
+ when Xref
894
+ [Expression[a], x.type, x.len, (Expression[x.origin] if x.origin)].join(',')
895
+ when Array
896
+ x.map { |x_| [Expression[a], x_.type, x_.len, (Expression[x_.origin] if x_.origin)].join(',') }
897
+ end
898
+ }.compact.join("\n")
899
+ fd.puts "xrefs #{t.length}", t
900
+
901
+ t = @c_parser.to_s
902
+ fd.puts "c #{t.length}", t
903
+
904
+ #t = bl.map { |b| b.backtracked_for }
905
+ #fd.puts "trace #{t.length}" , t
906
+ end
907
+
908
+ # loads a disassembler from a saved file
909
+ def self.load(str, &b)
910
+ d = new(nil, nil)
911
+ d.load(str, &b)
912
+ d
913
+ end
914
+
915
+ # loads the dasm state from a savefile content
916
+ # will yield unknown segments / binarypath notfound
917
+ def load(str)
918
+ raise 'Not a metasm save file' if str[0, 12].chomp != 'Metasm.dasm'
919
+ off = 12
920
+ pp = Preprocessor.new
921
+ app = AsmPreprocessor.new
922
+ while off < str.length
923
+ i = str.index("\n", off) || str.length
924
+ type, len = str[off..i].chomp.split
925
+ off = i+1
926
+ data = str[off, len.to_i]
927
+ off += len.to_i
928
+ case type
929
+ when nil, ''
930
+ when 'binarypath'
931
+ data = yield(type, data) if not File.exist? data and block_given?
932
+ reinitialize AutoExe.decode_file(data)
933
+ @program.disassembler = self
934
+ @program.init_disassembler
935
+ when 'cpu'
936
+ cpuname, size, endianness = data.split
937
+ cpu = Metasm.const_get(cpuname)
938
+ raise 'invalid cpu' if not cpu < CPU
939
+ cpu = cpu.new
940
+ cpu.size = size.to_i
941
+ cpu.endianness = endianness.to_sym
942
+ reinitialize Shellcode.new(cpu)
943
+ @program.disassembler = self
944
+ @program.init_disassembler
945
+ when 'section'
946
+ info = data[0, data.index("\n") || data.length]
947
+ data = data[info.length, data.length]
948
+ pp.feed!(info)
949
+ addr = Expression.parse(pp).reduce
950
+ len = Expression.parse(pp).reduce
951
+ edata = EncodedData.new(data.unpack('m*').first, :virtsize => len)
952
+ add_section(addr, edata)
953
+ when 'map'
954
+ load_map data
955
+ when 'decoded'
956
+ data.each_line { |l|
957
+ begin
958
+ next if l !~ /^([^,]*),(\d*) ([^;]*)(?:; (.*))?/
959
+ a, len, instr, cmt = $1, $2, $3, $4
960
+ a = Expression.parse(pp.feed!(a)).reduce
961
+ instr = @cpu.parse_instruction(app.feed!(instr))
962
+ di = DecodedInstruction.new(instr, a)
963
+ di.bin_length = len.to_i
964
+ di.add_comment cmt if cmt
965
+ @decoded[a] = di
966
+ rescue
967
+ puts "load: bad di #{l.inspect}" if $VERBOSE
968
+ end
969
+ }
970
+ when 'blocks'
971
+ data.each_line { |l|
972
+ bla = l.chomp.split(';').map { |sl| sl.split(',') }
973
+ begin
974
+ a = Expression.parse(pp.feed!(bla.shift[0])).reduce
975
+ b = InstructionBlock.new(a, get_section_at(a).to_a[0])
976
+ bla.shift.each { |e|
977
+ a = Expression.parse(pp.feed!(e)).reduce
978
+ b.add_di(@decoded[a])
979
+ }
980
+ bla.zip([:to_normal, :to_subfuncret, :to_indirect, :from_normal, :from_subfuncret, :from_indirect]).each { |l_, s|
981
+ b.send("#{s}=", l_.map { |e| Expression.parse(pp.feed!(e)).reduce }) if not l_.empty?
982
+ }
983
+ rescue
984
+ puts "load: bad block #{l.inspect}" if $VERBOSE
985
+ end
986
+ }
987
+ when 'funcs'
988
+ data.each_line { |l|
989
+ begin
990
+ a, *r = l.split(',').map { |e| Expression.parse(pp.feed!(e)).reduce }
991
+ @function[a] = DecodedFunction.new
992
+ @function[a].return_address = r if not r.empty?
993
+ @function[a].finalized = true
994
+ # TODO
995
+ rescue
996
+ puts "load: bad function #{l.inspect} #$!" if $VERBOSE
997
+ end
998
+ }
999
+ when 'comment'
1000
+ data.each_line { |l|
1001
+ begin
1002
+ a, c = l.split(' ', 2)
1003
+ a = Expression.parse(pp.feed!(a)).reduce
1004
+ @comment[a] ||= []
1005
+ @comment[a] |= [c]
1006
+ rescue
1007
+ puts "load: bad comment #{l.inspect} #$!" if $VERBOSE
1008
+ end
1009
+ }
1010
+ when 'c'
1011
+ begin
1012
+ # TODO parse_invalid_c, split per function, whatever
1013
+ parse_c('')
1014
+ @c_parser.allow_bad_c = true
1015
+ parse_c(data, 'savefile#c')
1016
+ rescue
1017
+ puts "load: bad C: #$!", $!.backtrace if $VERBOSE
1018
+ end
1019
+ @c_parser.readtok until @c_parser.eos? if @c_parser
1020
+ when 'xrefs'
1021
+ data.each_line { |l|
1022
+ begin
1023
+ a, t, len, o = l.chomp.split(',')
1024
+ case a
1025
+ when ':default'; a = :default
1026
+ when ':unknown'; a = Expression::Unknown
1027
+ else a = Expression.parse(pp.feed!(a)).reduce
1028
+ end
1029
+ t = (t.empty? ? nil : t.to_sym)
1030
+ len = (len != '' ? len.to_i : nil)
1031
+ o = (o.to_s != '' ? Expression.parse(pp.feed!(o)).reduce : nil) # :default/:unknown ?
1032
+ add_xref(a, Xref.new(t, o, len))
1033
+ rescue
1034
+ puts "load: bad xref #{l.inspect} #$!" if $VERBOSE
1035
+ end
1036
+ }
1037
+ #when 'trace'
1038
+ else
1039
+ if block_given?
1040
+ yield(type, data)
1041
+ else
1042
+ puts "load: unsupported section #{type.inspect}" if $VERBOSE
1043
+ end
1044
+ end
1045
+ end
1046
+ end
1047
+
1048
+ # change the base address of the loaded binary
1049
+ # better done early (before disassembling anything)
1050
+ # returns the delta
1051
+ def rebase(newaddr)
1052
+ rebase_delta(newaddr - @sections.keys.min)
1053
+ end
1054
+
1055
+ def rebase_delta(delta)
1056
+ fix = lambda { |a|
1057
+ case a
1058
+ when Array
1059
+ a.map! { |e| fix[e] }
1060
+ when Hash
1061
+ tmp = {}
1062
+ a.each { |k, v| tmp[fix[k]] = v }
1063
+ a.replace tmp
1064
+ when Integer
1065
+ a += delta
1066
+ when BacktraceTrace
1067
+ a.origin = fix[a.origin]
1068
+ a.address = fix[a.address]
1069
+ end
1070
+ a
1071
+ }
1072
+
1073
+ fix[@sections]
1074
+ fix[@decoded]
1075
+ fix[@xrefs]
1076
+ fix[@function]
1077
+ fix[@addrs_todo]
1078
+ fix[@addrs_done]
1079
+ fix[@comment]
1080
+ @prog_binding.each_key { |k| @prog_binding[k] = fix[@prog_binding[k]] }
1081
+ @old_prog_binding.each_key { |k| @old_prog_binding[k] = fix[@old_prog_binding[k]] }
1082
+ @label_alias_cache = nil
1083
+
1084
+ @decoded.values.grep(DecodedInstruction).each { |di|
1085
+ if di.block_head?
1086
+ b = di.block
1087
+ b.address += delta
1088
+ fix[b.to_normal]
1089
+ fix[b.to_subfuncret]
1090
+ fix[b.to_indirect]
1091
+ fix[b.from_normal]
1092
+ fix[b.from_subfuncret]
1093
+ fix[b.from_indirect]
1094
+ fix[b.backtracked_for]
1095
+ end
1096
+ di.address = fix[di.address]
1097
+ di.next_addr = fix[di.next_addr]
1098
+ }
1099
+ @function.each_value { |f|
1100
+ f.return_address = fix[f.return_address]
1101
+ fix[f.backtracked_for]
1102
+ }
1103
+ @xrefs.values.flatten.compact.each { |x| x.origin = fix[x.origin] }
1104
+ delta
1105
+ end
1106
+
1107
+ # change Expression display mode for current object o to display integers as char constants
1108
+ def toggle_expr_char(o)
1109
+ return if not o.kind_of? Renderable
1110
+ o.each_expr { |e|
1111
+ e.render_info ||= {}
1112
+ e.render_info[:char] = e.render_info[:char] ? nil : @cpu.endianness
1113
+ }
1114
+ end
1115
+
1116
+ # patch Expressions in current object to include label names when available
1117
+ # XXX should we also create labels ?
1118
+ def toggle_expr_offset(o)
1119
+ return if not o.kind_of? Renderable
1120
+ o.each_expr { |e|
1121
+ if n = @prog_binding[e.lexpr]
1122
+ e.lexpr = n
1123
+ elsif e.lexpr.kind_of? ::Integer and n = get_label_at(e.lexpr)
1124
+ add_xref(normalize(e.lexpr), Xref.new(:addr, o.address)) if o.respond_to? :address
1125
+ e.lexpr = n
1126
+ end
1127
+ if n = @prog_binding[e.rexpr]
1128
+ e.rexpr = n
1129
+ elsif e.rexpr.kind_of? ::Integer and n = get_label_at(e.rexpr)
1130
+ add_xref(normalize(e.rexpr), Xref.new(:addr, o.address)) if o.respond_to? :address
1131
+ e.rexpr = n
1132
+ end
1133
+ }
1134
+ end
1135
+
1136
+ # call this function on a function entrypoint if the function is in fact a __noreturn
1137
+ # will cut the to_subfuncret of callers
1138
+ def fix_noreturn(o)
1139
+ each_xref(o, :x) { |a|
1140
+ a = normalize(a.origin)
1141
+ next if not di = di_at(a) or not di.opcode.props[:saveip]
1142
+ # XXX should check if caller also becomes __noreturn
1143
+ di.block.each_to_subfuncret { |to|
1144
+ next if not tdi = di_at(to) or not tdi.block.from_subfuncret
1145
+ tdi.block.from_subfuncret.delete_if { |aa| normalize(aa) == di.address }
1146
+ tdi.block.from_subfuncret = nil if tdi.block.from_subfuncret.empty?
1147
+ }
1148
+ di.block.to_subfuncret = nil
1149
+ }
1150
+ end
1151
+
1152
+ # find the addresses of calls calling the address, handles thunks
1153
+ def call_sites(funcaddr)
1154
+ find_call_site = proc { |a|
1155
+ until not di = di_at(a)
1156
+ if di.opcode.props[:saveip]
1157
+ cs = di.address
1158
+ break
1159
+ end
1160
+ if di.block.from_subfuncret.to_a.first
1161
+ while di.block.from_subfuncret.to_a.length == 1
1162
+ a = di.block.from_subfuncret[0]
1163
+ break if not di_at(a)
1164
+ a = @decoded[a].block.list.first.address
1165
+ di = @decoded[a]
1166
+ end
1167
+ end
1168
+ break if di.block.from_subfuncret.to_a.first
1169
+ break if di.block.from_normal.to_a.length != 1
1170
+ a = di.block.from_normal.first
1171
+ end
1172
+ cs
1173
+ }
1174
+ ret = []
1175
+ each_xref(normalize(funcaddr), :x) { |a|
1176
+ ret << find_call_site[a.origin]
1177
+ }
1178
+ ret.compact.uniq
1179
+ end
1180
+
1181
+ # loads a disassembler plugin script
1182
+ # this is simply a ruby script instance_eval() in the disassembler
1183
+ # the filename argument is autocompleted with '.rb' suffix, and also
1184
+ # searched for in the Metasmdir/samples/dasm-plugins subdirectory if not found in cwd
1185
+ def load_plugin(plugin_filename)
1186
+ if not File.exist?(plugin_filename)
1187
+ if File.exist?(plugin_filename+'.rb')
1188
+ plugin_filename += '.rb'
1189
+ elsif defined? Metasmdir
1190
+ # try autocomplete
1191
+ pf = File.join(Metasmdir, 'samples', 'dasm-plugins', plugin_filename)
1192
+ if File.exist? pf
1193
+ plugin_filename = pf
1194
+ elsif File.exist? pf + '.rb'
1195
+ plugin_filename = pf + '.rb'
1196
+ end
1197
+ end
1198
+ end
1199
+
1200
+ instance_eval File.read(plugin_filename)
1201
+ end
1202
+
1203
+ # same as load_plugin, but hides the @gui attribute while loading, preventing the plugin do popup stuff
1204
+ # this is useful when you want to load a plugin from another plugin to enhance the plugin's functionnality
1205
+ # XXX this also prevents setting up kbd_callbacks etc..
1206
+ def load_plugin_nogui(plugin_filename)
1207
+ oldgui = gui
1208
+ @gui = nil
1209
+ load_plugin(plugin_filename)
1210
+ ensure
1211
+ @gui = oldgui
1212
+ end
1213
+
1214
+ # compose two code/instruction's backtrace_binding
1215
+ # assumes bd1 is followed by bd2 in the code flow
1216
+ # eg inc edi + push edi =>
1217
+ # { Ind[:esp, 4] => Expr[:edi + 1], :esp => Expr[:esp - 4], :edi => Expr[:edi + 1] }
1218
+ # XXX if bd1 writes to memory with a pointer that is reused in bd2, this function has to
1219
+ # revert the change made by bd2, which only works with simple ptr addition now
1220
+ # XXX unhandled situations may be resolved using :unknown, or by returning incorrect values
1221
+ def compose_bt_binding(bd1, bd2)
1222
+ if bd1.kind_of? DecodedInstruction
1223
+ bd1 = bd1.backtrace_binding ||= cpu.get_backtrace_binding(bd1)
1224
+ end
1225
+ if bd2.kind_of? DecodedInstruction
1226
+ bd2 = bd2.backtrace_binding ||= cpu.get_backtrace_binding(bd2)
1227
+ end
1228
+
1229
+ reduce = lambda { |e| Expression[Expression[e].reduce] }
1230
+
1231
+ bd = {}
1232
+
1233
+ bd2.each { |k, v|
1234
+ bd[k] = reduce[v.bind(bd1)]
1235
+ }
1236
+
1237
+ # for each pointer appearing in keys of bd1, we must infer from bd2 what final
1238
+ # pointers should appear in bd
1239
+ # eg 'mov [eax], 0 mov ebx, eax' => { [eax] <- 0, [ebx] <- 0, ebx <- eax }
1240
+ bd1.each { |k, v|
1241
+ if k.kind_of? Indirection
1242
+ done = false
1243
+ k.pointer.externals.each { |e|
1244
+ # XXX this will break on nontrivial pointers or bd2
1245
+ bd2.each { |k2, v2|
1246
+ # we dont want to invert computation of flag_zero/carry etc (booh)
1247
+ next if k2.to_s =~ /flag/
1248
+
1249
+ # discard indirection etc, result would be too complex / not useful
1250
+ next if not Expression[v2].expr_externals.include? e
1251
+
1252
+ done = true
1253
+
1254
+ # try to reverse the computation made upon 'e'
1255
+ # only simple addition handled here
1256
+ ptr = reduce[k.pointer.bind(e => Expression[[k2, :-, v2], :+, e])]
1257
+
1258
+ # if bd2 does not rewrite e, duplicate the original pointer
1259
+ if not bd2[e]
1260
+ bd[k] ||= reduce[v]
1261
+
1262
+ # here we should not see 'e' in ptr anymore
1263
+ ptr = Expression::Unknown if ptr.externals.include? e
1264
+ else
1265
+ # cant check if add reversion was successful..
1266
+ end
1267
+
1268
+ bd[Indirection[reduce[ptr], k.len]] ||= reduce[v]
1269
+ }
1270
+ }
1271
+ bd[k] ||= reduce[v] if not done
1272
+ else
1273
+ bd[k] ||= reduce[v]
1274
+ end
1275
+ }
1276
+
1277
+ bd
1278
+ end
1279
+ end
1280
+ end