metasm 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (192) hide show
  1. data/BUGS +11 -0
  2. data/CREDITS +17 -0
  3. data/README +270 -0
  4. data/TODO +114 -0
  5. data/doc/code_organisation.txt +146 -0
  6. data/doc/const_missing.txt +16 -0
  7. data/doc/core_classes.txt +75 -0
  8. data/doc/feature_list.txt +53 -0
  9. data/doc/index.txt +59 -0
  10. data/doc/install_notes.txt +170 -0
  11. data/doc/style.css +3 -0
  12. data/doc/use_cases.txt +18 -0
  13. data/lib/metasm.rb +80 -0
  14. data/lib/metasm/arm.rb +12 -0
  15. data/lib/metasm/arm/debug.rb +39 -0
  16. data/lib/metasm/arm/decode.rb +167 -0
  17. data/lib/metasm/arm/encode.rb +77 -0
  18. data/lib/metasm/arm/main.rb +75 -0
  19. data/lib/metasm/arm/opcodes.rb +177 -0
  20. data/lib/metasm/arm/parse.rb +130 -0
  21. data/lib/metasm/arm/render.rb +55 -0
  22. data/lib/metasm/compile_c.rb +1457 -0
  23. data/lib/metasm/dalvik.rb +8 -0
  24. data/lib/metasm/dalvik/decode.rb +196 -0
  25. data/lib/metasm/dalvik/main.rb +60 -0
  26. data/lib/metasm/dalvik/opcodes.rb +366 -0
  27. data/lib/metasm/decode.rb +213 -0
  28. data/lib/metasm/decompile.rb +2659 -0
  29. data/lib/metasm/disassemble.rb +2068 -0
  30. data/lib/metasm/disassemble_api.rb +1280 -0
  31. data/lib/metasm/dynldr.rb +1329 -0
  32. data/lib/metasm/encode.rb +333 -0
  33. data/lib/metasm/exe_format/a_out.rb +194 -0
  34. data/lib/metasm/exe_format/autoexe.rb +82 -0
  35. data/lib/metasm/exe_format/bflt.rb +189 -0
  36. data/lib/metasm/exe_format/coff.rb +455 -0
  37. data/lib/metasm/exe_format/coff_decode.rb +901 -0
  38. data/lib/metasm/exe_format/coff_encode.rb +1078 -0
  39. data/lib/metasm/exe_format/dex.rb +457 -0
  40. data/lib/metasm/exe_format/dol.rb +145 -0
  41. data/lib/metasm/exe_format/elf.rb +923 -0
  42. data/lib/metasm/exe_format/elf_decode.rb +979 -0
  43. data/lib/metasm/exe_format/elf_encode.rb +1375 -0
  44. data/lib/metasm/exe_format/macho.rb +827 -0
  45. data/lib/metasm/exe_format/main.rb +228 -0
  46. data/lib/metasm/exe_format/mz.rb +164 -0
  47. data/lib/metasm/exe_format/nds.rb +172 -0
  48. data/lib/metasm/exe_format/pe.rb +437 -0
  49. data/lib/metasm/exe_format/serialstruct.rb +246 -0
  50. data/lib/metasm/exe_format/shellcode.rb +114 -0
  51. data/lib/metasm/exe_format/xcoff.rb +167 -0
  52. data/lib/metasm/gui.rb +23 -0
  53. data/lib/metasm/gui/cstruct.rb +373 -0
  54. data/lib/metasm/gui/dasm_coverage.rb +199 -0
  55. data/lib/metasm/gui/dasm_decomp.rb +369 -0
  56. data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
  57. data/lib/metasm/gui/dasm_graph.rb +1354 -0
  58. data/lib/metasm/gui/dasm_hex.rb +543 -0
  59. data/lib/metasm/gui/dasm_listing.rb +599 -0
  60. data/lib/metasm/gui/dasm_main.rb +906 -0
  61. data/lib/metasm/gui/dasm_opcodes.rb +291 -0
  62. data/lib/metasm/gui/debug.rb +1228 -0
  63. data/lib/metasm/gui/gtk.rb +884 -0
  64. data/lib/metasm/gui/qt.rb +495 -0
  65. data/lib/metasm/gui/win32.rb +3004 -0
  66. data/lib/metasm/gui/x11.rb +621 -0
  67. data/lib/metasm/ia32.rb +14 -0
  68. data/lib/metasm/ia32/compile_c.rb +1523 -0
  69. data/lib/metasm/ia32/debug.rb +193 -0
  70. data/lib/metasm/ia32/decode.rb +1167 -0
  71. data/lib/metasm/ia32/decompile.rb +564 -0
  72. data/lib/metasm/ia32/encode.rb +314 -0
  73. data/lib/metasm/ia32/main.rb +233 -0
  74. data/lib/metasm/ia32/opcodes.rb +872 -0
  75. data/lib/metasm/ia32/parse.rb +327 -0
  76. data/lib/metasm/ia32/render.rb +91 -0
  77. data/lib/metasm/main.rb +1193 -0
  78. data/lib/metasm/mips.rb +11 -0
  79. data/lib/metasm/mips/compile_c.rb +7 -0
  80. data/lib/metasm/mips/decode.rb +253 -0
  81. data/lib/metasm/mips/encode.rb +51 -0
  82. data/lib/metasm/mips/main.rb +72 -0
  83. data/lib/metasm/mips/opcodes.rb +443 -0
  84. data/lib/metasm/mips/parse.rb +51 -0
  85. data/lib/metasm/mips/render.rb +43 -0
  86. data/lib/metasm/os/gnu_exports.rb +270 -0
  87. data/lib/metasm/os/linux.rb +1112 -0
  88. data/lib/metasm/os/main.rb +1686 -0
  89. data/lib/metasm/os/remote.rb +527 -0
  90. data/lib/metasm/os/windows.rb +2027 -0
  91. data/lib/metasm/os/windows_exports.rb +745 -0
  92. data/lib/metasm/parse.rb +876 -0
  93. data/lib/metasm/parse_c.rb +3938 -0
  94. data/lib/metasm/pic16c/decode.rb +42 -0
  95. data/lib/metasm/pic16c/main.rb +17 -0
  96. data/lib/metasm/pic16c/opcodes.rb +68 -0
  97. data/lib/metasm/ppc.rb +11 -0
  98. data/lib/metasm/ppc/decode.rb +264 -0
  99. data/lib/metasm/ppc/decompile.rb +251 -0
  100. data/lib/metasm/ppc/encode.rb +51 -0
  101. data/lib/metasm/ppc/main.rb +129 -0
  102. data/lib/metasm/ppc/opcodes.rb +410 -0
  103. data/lib/metasm/ppc/parse.rb +52 -0
  104. data/lib/metasm/preprocessor.rb +1277 -0
  105. data/lib/metasm/render.rb +130 -0
  106. data/lib/metasm/sh4.rb +8 -0
  107. data/lib/metasm/sh4/decode.rb +336 -0
  108. data/lib/metasm/sh4/main.rb +292 -0
  109. data/lib/metasm/sh4/opcodes.rb +381 -0
  110. data/lib/metasm/x86_64.rb +12 -0
  111. data/lib/metasm/x86_64/compile_c.rb +1025 -0
  112. data/lib/metasm/x86_64/debug.rb +59 -0
  113. data/lib/metasm/x86_64/decode.rb +268 -0
  114. data/lib/metasm/x86_64/encode.rb +264 -0
  115. data/lib/metasm/x86_64/main.rb +135 -0
  116. data/lib/metasm/x86_64/opcodes.rb +118 -0
  117. data/lib/metasm/x86_64/parse.rb +68 -0
  118. data/misc/bottleneck.rb +61 -0
  119. data/misc/cheader-findpppath.rb +58 -0
  120. data/misc/hexdiff.rb +74 -0
  121. data/misc/hexdump.rb +55 -0
  122. data/misc/metasm-all.rb +13 -0
  123. data/misc/objdiff.rb +47 -0
  124. data/misc/objscan.rb +40 -0
  125. data/misc/pdfparse.rb +661 -0
  126. data/misc/ppc_pdf2oplist.rb +192 -0
  127. data/misc/tcp_proxy_hex.rb +84 -0
  128. data/misc/txt2html.rb +440 -0
  129. data/samples/a.out.rb +31 -0
  130. data/samples/asmsyntax.rb +77 -0
  131. data/samples/bindiff.rb +555 -0
  132. data/samples/compilation-steps.rb +49 -0
  133. data/samples/cparser_makestackoffset.rb +55 -0
  134. data/samples/dasm-backtrack.rb +38 -0
  135. data/samples/dasmnavig.rb +318 -0
  136. data/samples/dbg-apihook.rb +228 -0
  137. data/samples/dbghelp.rb +143 -0
  138. data/samples/disassemble-gui.rb +102 -0
  139. data/samples/disassemble.rb +133 -0
  140. data/samples/dump_upx.rb +95 -0
  141. data/samples/dynamic_ruby.rb +1929 -0
  142. data/samples/elf_list_needed.rb +46 -0
  143. data/samples/elf_listexports.rb +33 -0
  144. data/samples/elfencode.rb +25 -0
  145. data/samples/exeencode.rb +128 -0
  146. data/samples/factorize-headers-elfimports.rb +77 -0
  147. data/samples/factorize-headers-peimports.rb +109 -0
  148. data/samples/factorize-headers.rb +43 -0
  149. data/samples/gdbclient.rb +583 -0
  150. data/samples/generate_libsigs.rb +102 -0
  151. data/samples/hotfix_gtk_dbg.rb +59 -0
  152. data/samples/install_win_env.rb +78 -0
  153. data/samples/lindebug.rb +924 -0
  154. data/samples/linux_injectsyscall.rb +95 -0
  155. data/samples/machoencode.rb +31 -0
  156. data/samples/metasm-shell.rb +91 -0
  157. data/samples/pe-hook.rb +69 -0
  158. data/samples/pe-ia32-cpuid.rb +203 -0
  159. data/samples/pe-mips.rb +35 -0
  160. data/samples/pe-shutdown.rb +78 -0
  161. data/samples/pe-testrelocs.rb +51 -0
  162. data/samples/pe-testrsrc.rb +24 -0
  163. data/samples/pe_listexports.rb +31 -0
  164. data/samples/peencode.rb +19 -0
  165. data/samples/peldr.rb +494 -0
  166. data/samples/preprocess-flatten.rb +19 -0
  167. data/samples/r0trace.rb +308 -0
  168. data/samples/rubstop.rb +399 -0
  169. data/samples/scan_pt_gnu_stack.rb +54 -0
  170. data/samples/scanpeexports.rb +62 -0
  171. data/samples/shellcode-c.rb +40 -0
  172. data/samples/shellcode-dynlink.rb +146 -0
  173. data/samples/source.asm +34 -0
  174. data/samples/struct_offset.rb +47 -0
  175. data/samples/testpe.rb +32 -0
  176. data/samples/testraw.rb +45 -0
  177. data/samples/win32genloader.rb +132 -0
  178. data/samples/win32hooker-advanced.rb +169 -0
  179. data/samples/win32hooker.rb +96 -0
  180. data/samples/win32livedasm.rb +33 -0
  181. data/samples/win32remotescan.rb +133 -0
  182. data/samples/wintrace.rb +92 -0
  183. data/tests/all.rb +8 -0
  184. data/tests/dasm.rb +39 -0
  185. data/tests/dynldr.rb +35 -0
  186. data/tests/encodeddata.rb +132 -0
  187. data/tests/ia32.rb +82 -0
  188. data/tests/mips.rb +116 -0
  189. data/tests/parse_c.rb +239 -0
  190. data/tests/preprocessor.rb +269 -0
  191. data/tests/x86_64.rb +62 -0
  192. metadata +255 -0
@@ -0,0 +1,1280 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+ # this file compliments disassemble.rb, adding misc user-friendly methods
7
+
8
+ module Metasm
9
+ class InstructionBlock
10
+ # adds an address to the from_normal/from_subfuncret list
11
+ def add_from(addr, type=:normal)
12
+ send "add_from_#{type}", addr
13
+ end
14
+ def add_from_normal(addr)
15
+ @from_normal ||= []
16
+ @from_normal |= [addr]
17
+ end
18
+ def add_from_subfuncret(addr)
19
+ @from_subfuncret ||= []
20
+ @from_subfuncret |= [addr]
21
+ end
22
+ def add_from_indirect(addr)
23
+ @from_indirect ||= []
24
+ @from_indirect |= [addr]
25
+ end
26
+ # iterates over every from address, yields [address, type in [:normal, :subfuncret, :indirect]]
27
+ def each_from
28
+ each_from_normal { |a| yield a, :normal }
29
+ each_from_subfuncret { |a| yield a, :subfuncret }
30
+ each_from_indirect { |a| yield a, :indirect }
31
+ end
32
+ def each_from_normal(&b)
33
+ @from_normal.each(&b) if from_normal
34
+ end
35
+ def each_from_subfuncret(&b)
36
+ @from_subfuncret.each(&b) if from_subfuncret
37
+ end
38
+ def each_from_indirect(&b)
39
+ @from_indirect.each(&b) if from_indirect
40
+ end
41
+
42
+ def add_to(addr, type=:normal)
43
+ send "add_to_#{type}", addr
44
+ end
45
+ def add_to_normal(addr)
46
+ @to_normal ||= []
47
+ @to_normal |= [addr]
48
+ end
49
+ def add_to_subfuncret(addr)
50
+ @to_subfuncret ||= []
51
+ @to_subfuncret |= [addr]
52
+ end
53
+ def add_to_indirect(addr)
54
+ @to_indirect ||= []
55
+ @to_indirect |= [addr]
56
+ end
57
+ def each_to
58
+ each_to_normal { |a| yield a, :normal }
59
+ each_to_subfuncret { |a| yield a, :subfuncret }
60
+ each_to_indirect { |a| yield a, :indirect }
61
+ end
62
+ def each_to_normal(&b)
63
+ @to_normal.each(&b) if to_normal
64
+ end
65
+ def each_to_subfuncret(&b)
66
+ @to_subfuncret.each(&b) if to_subfuncret
67
+ end
68
+ def each_to_indirect(&b)
69
+ @to_indirect.each(&b) if to_indirect
70
+ end
71
+
72
+ # yields all from that are from the same function
73
+ def each_from_samefunc(dasm, &b)
74
+ return if dasm.function[address]
75
+ @from_subfuncret.each(&b) if from_subfuncret
76
+ @from_normal.each(&b) if from_normal
77
+ end
78
+
79
+ # yields all from that are not in the same subfunction as this block
80
+ def each_from_otherfunc(dasm, &b)
81
+ @from_normal.each(&b) if from_normal and dasm.function[address]
82
+ @from_subfuncret.each(&b) if from_subfuncret and dasm.function[address]
83
+ @from_indirect.each(&b) if from_indirect
84
+ end
85
+
86
+ # yields all to that are in the same subfunction as this block
87
+ def each_to_samefunc(dasm)
88
+ each_to { |to, type|
89
+ next if type != :normal and type != :subfuncret
90
+ to = dasm.normalize(to)
91
+ yield to if not dasm.function[to]
92
+ }
93
+ end
94
+
95
+ # yields all to that are not in the same subfunction as this block
96
+ def each_to_otherfunc(dasm)
97
+ each_to { |to, type|
98
+ to = dasm.normalize(to)
99
+ yield to if type == :indirect or dasm.function[to] or not dasm.decoded[to]
100
+ }
101
+ end
102
+ end
103
+
104
+ class DecodedInstruction
105
+ # checks if this instruction is the first of its IBlock
106
+ def block_head?
107
+ self == @block.list.first
108
+ end
109
+ end
110
+
111
+ class CPU
112
+ # compat alias, for scripts using older version of metasm
113
+ def get_backtrace_binding(di) backtrace_binding(di) end
114
+
115
+ # return something like backtrace_binding in the forward direction
116
+ # set pc_reg to some reg name (eg :pc) to include effects on the instruction pointer
117
+ def get_fwdemu_binding(di, pc_reg=nil)
118
+ fdi = di.backtrace_binding ||= get_backtrace_binding(di)
119
+ # find self-updated regs & revert them in simultaneous affectations
120
+ # XXX handles only a <- a+i for now, this covers all useful cases (except imul eax, eax, 42 jz foobar)
121
+ fdi.keys.grep(::Symbol).each { |s|
122
+ val = Expression[fdi[s]]
123
+ next if val.lexpr != s or (val.op != :+ and val.op != :-) #or not val.rexpr.kind_of? ::Integer
124
+ fwd = { s => val }
125
+ inv = { s => val.dup }
126
+ inv[s].op = ((inv[s].op == :+) ? :- : :+)
127
+ nxt = {}
128
+ fdi.each { |k, v|
129
+ if k == s
130
+ nxt[k] = v
131
+ else
132
+ k = k.bind(fwd).reduce_rec if k.kind_of? Indirection
133
+ nxt[k] = Expression[Expression[v].bind(inv).reduce_rec]
134
+ end
135
+ }
136
+ fdi = nxt
137
+ }
138
+ if pc_reg
139
+ if di.opcode.props[:setip]
140
+ xr = get_xrefs_x(nil, di)
141
+ if xr and xr.length == 1
142
+ fdi[pc_reg] = xr[0]
143
+ else
144
+ fdi[:incomplete_binding] = Expression[1]
145
+ end
146
+ else
147
+ fdi[pc_reg] = Expression[pc_reg, :+, di.bin_length]
148
+ end
149
+ end
150
+ fdi
151
+ end
152
+ end
153
+
154
+ class Disassembler
155
+ # access the default value for @@backtrace_maxblocks for newly created Disassemblers
156
+ def self.backtrace_maxblocks ; @@backtrace_maxblocks ; end
157
+ def self.backtrace_maxblocks=(b) ; @@backtrace_maxblocks = b ; end
158
+
159
+ # returns the dasm section's edata containing addr
160
+ # its #ptr points to addr
161
+ # returns the 1st element of #get_section_at
162
+ def get_edata_at(addr)
163
+ if s = get_section_at(addr)
164
+ s[0]
165
+ end
166
+ end
167
+
168
+ # returns the DecodedInstruction at addr if it exists
169
+ def di_at(addr)
170
+ di = @decoded[addr] || @decoded[normalize(addr)] if addr
171
+ di if di.kind_of? DecodedInstruction
172
+ end
173
+
174
+ # returns the InstructionBlock containing the address at addr
175
+ def block_at(addr)
176
+ di = di_at(addr)
177
+ di.block if di
178
+ end
179
+
180
+ # returns the DecodedFunction at addr if it exists
181
+ def function_at(addr)
182
+ f = @function[addr] || @function[normalize(addr)] if addr
183
+ f if f.kind_of? DecodedFunction
184
+ end
185
+
186
+ # returns the DecodedInstruction covering addr
187
+ # returns one at starting nearest addr if multiple are available (overlapping instrs)
188
+ def di_including(addr)
189
+ return if not addr
190
+ addr = normalize(addr)
191
+ if off = (0...16).find { |o| @decoded[addr-o].kind_of? DecodedInstruction and @decoded[addr-o].bin_length > o }
192
+ @decoded[addr-off]
193
+ end
194
+ end
195
+
196
+ # returns the InstructionBlock containing the byte at addr
197
+ # returns the one of di_including() on multiple matches (overlapping instrs)
198
+ def block_including(addr)
199
+ di = di_including(addr)
200
+ di.block if di
201
+ end
202
+
203
+ # returns the DecodedFunction including this byte
204
+ # return the one of find_function_start() if multiple are possible (block shared by multiple funcs)
205
+ def function_including(addr)
206
+ return if not di = di_including(addr)
207
+ function_at(find_function_start(di.address))
208
+ end
209
+
210
+ # yields every InstructionBlock
211
+ # returns the list of IBlocks
212
+ def each_instructionblock
213
+ ret = []
214
+ @decoded.each { |addr, di|
215
+ next if not di.kind_of? DecodedInstruction or not di.block_head?
216
+ ret << di.block
217
+ yield di.block if block_given?
218
+ }
219
+ ret
220
+ end
221
+ alias instructionblocks each_instructionblock
222
+
223
+ # return a backtrace_binding reversed (akin to code emulation) (but not really)
224
+ def get_fwdemu_binding(di, pc=nil)
225
+ @cpu.get_fwdemu_binding(di, pc)
226
+ end
227
+
228
+ # reads len raw bytes from the mmaped address space
229
+ def read_raw_data(addr, len)
230
+ if e = get_section_at(addr)
231
+ e[0].read(len)
232
+ end
233
+ end
234
+
235
+ # read an int of arbitrary type (:u8, :i32, ...)
236
+ def decode_int(addr, type)
237
+ type = "u#{type*8}".to_sym if type.kind_of? Integer
238
+ if e = get_section_at(addr)
239
+ e[0].decode_imm(type, @cpu.endianness)
240
+ end
241
+ end
242
+
243
+ # read a byte at address addr
244
+ def decode_byte(addr)
245
+ decode_int(addr, :u8)
246
+ end
247
+
248
+ # read a dword at address addr
249
+ # the dword is cpu-sized (eg 32 or 64bits)
250
+ def decode_dword(addr)
251
+ decode_int(addr, @cpu.size/8)
252
+ end
253
+
254
+ # read a zero-terminated string from addr
255
+ # if no terminal 0 is found, return nil
256
+ def decode_strz(addr, maxsz=4096)
257
+ if e = get_section_at(addr)
258
+ str = e[0].read(maxsz).to_s
259
+ return if not len = str.index(?\0)
260
+ str[0, len]
261
+ end
262
+ end
263
+
264
+ # read a zero-terminated wide string from addr
265
+ # return nil if no terminal found
266
+ def decode_wstrz(addr, maxsz=4096)
267
+ if e = get_section_at(addr)
268
+ str = e[0].read(maxsz).to_s
269
+ return if not len = str.unpack('v*').index(0)
270
+ str[0, 2*len]
271
+ end
272
+ end
273
+
274
+ # disassembles one instruction at address
275
+ # returns nil if no instruction can be decoded there
276
+ # does not update any internal state of the disassembler, nor reuse the @decoded cache
277
+ def disassemble_instruction(addr)
278
+ if e = get_section_at(addr)
279
+ @cpu.decode_instruction(e[0], normalize(addr))
280
+ end
281
+ end
282
+
283
+ # disassemble addr as if the code flow came from from_addr
284
+ def disassemble_from(addr, from_addr)
285
+ from_addr = from_addr.address if from_addr.kind_of? DecodedInstruction
286
+ from_addr = normalize(from_addr)
287
+ if b = block_at(from_addr)
288
+ b.add_to_normal(addr)
289
+ end
290
+ @addrs_todo << [addr, from_addr]
291
+ disassemble
292
+ end
293
+
294
+ # returns the label associated to an addr, or nil if none exist
295
+ def get_label_at(addr)
296
+ e, b = get_section_at(addr, false)
297
+ e.inv_export[e.ptr] if e
298
+ end
299
+
300
+ # sets the label for the specified address
301
+ # returns nil if the address is not mapped
302
+ # memcheck is passed to get_section_at to validate that the address is mapped
303
+ def set_label_at(addr, name, memcheck=true)
304
+ addr = Expression[addr].reduce
305
+ e, b = get_section_at(addr, memcheck)
306
+ if not e
307
+ elsif not l = e.inv_export[e.ptr]
308
+ l = @program.new_label(name)
309
+ e.add_export l, e.ptr
310
+ @label_alias_cache = nil
311
+ @old_prog_binding[l] = @prog_binding[l] = b + e.ptr
312
+ elsif l != name
313
+ l = rename_label l, @program.new_label(name)
314
+ end
315
+ l
316
+ end
317
+
318
+ # remove a label at address addr
319
+ def del_label_at(addr, name=get_label_at(addr))
320
+ ed, b = get_section_at(addr)
321
+ if ed and ed.inv_export[ed.ptr]
322
+ ed.del_export name, ed.ptr
323
+ @label_alias_cache = nil
324
+ end
325
+ each_xref(addr) { |xr|
326
+ next if not xr.origin or not o = @decoded[xr.origin] or not o.kind_of? Renderable
327
+ o.each_expr { |e|
328
+ e.lexpr = addr if e.lexpr == name
329
+ e.rexpr = addr if e.rexpr == name
330
+ }
331
+ }
332
+ @old_prog_binding.delete name
333
+ @prog_binding.delete name
334
+ end
335
+
336
+ # changes a label to another, updates referring instructions etc
337
+ # returns the new label
338
+ # the new label must be program-uniq (see @program.new_label)
339
+ def rename_label(old, new)
340
+ each_xref(normalize(old)) { |x|
341
+ next if not di = @decoded[x.origin]
342
+ @cpu.replace_instr_arg_immediate(di.instruction, old, new)
343
+ di.comment.to_a.each { |c| c.gsub!(old, new) }
344
+ }
345
+ e, l = get_section_at(old, false)
346
+ if e
347
+ e.add_export new, e.export.delete(old), true
348
+ end
349
+ raise "cant rename nonexisting label #{old}" if not @prog_binding[old]
350
+ @label_alias_cache = nil
351
+ @old_prog_binding[new] = @prog_binding[new] = @prog_binding.delete(old)
352
+ @addrs_todo.each { |at|
353
+ case at[0]
354
+ when old; at[0] = new
355
+ when Expression; at[0] = at[0].bind(old => new)
356
+ end
357
+ }
358
+
359
+ if @inv_section_reloc[old]
360
+ @inv_section_reloc[old].each { |b, e_, o, r|
361
+ (0..16).each { |off|
362
+ if di = @decoded[Expression[b]+o-off] and di.bin_length > off
363
+ @cpu.replace_instr_arg_immediate(di.instruction, old, new)
364
+ end
365
+ }
366
+ r.target = r.target.bind(old => new)
367
+ }
368
+ @inv_section_reloc[new] = @inv_section_reloc.delete(old)
369
+ end
370
+
371
+ if c_parser and @c_parser.toplevel.symbol[old]
372
+ @c_parser.toplevel.symbol[new] = @c_parser.toplevel.symbol.delete(old)
373
+ @c_parser.toplevel.symbol[new].name = new
374
+ end
375
+
376
+ new
377
+ end
378
+
379
+ # finds the start of a function from the address of an instruction
380
+ def find_function_start(addr)
381
+ addr = addr.address if addr.kind_of? DecodedInstruction
382
+ todo = [addr]
383
+ done = []
384
+ while a = todo.pop
385
+ a = normalize(a)
386
+ di = @decoded[a]
387
+ next if done.include? a or not di.kind_of? DecodedInstruction
388
+ done << a
389
+ a = di.block.address
390
+ break a if @function[a]
391
+ l = []
392
+ di.block.each_from_samefunc(self) { |f| l << f }
393
+ break a if l.empty?
394
+ todo.concat l
395
+ end
396
+ end
397
+
398
+ # iterates over the blocks of a function, yields each func block address
399
+ # returns the graph of blocks (block address => [list of samefunc blocks])
400
+ def each_function_block(addr, incl_subfuncs = false, find_func_start = true)
401
+ addr = @function.index(addr) if addr.kind_of? DecodedFunction
402
+ addr = addr.address if addr.kind_of? DecodedInstruction
403
+ addr = find_function_start(addr) if not @function[addr] and find_func_start
404
+ todo = [addr]
405
+ ret = {}
406
+ while a = todo.pop
407
+ next if not di = di_at(a)
408
+ a = di.block.address
409
+ next if ret[a]
410
+ ret[a] = []
411
+ yield a if block_given?
412
+ di.block.each_to_samefunc(self) { |f| ret[a] << f ; todo << f }
413
+ di.block.each_to_otherfunc(self) { |f| ret[a] << f ; todo << f } if incl_subfuncs
414
+ end
415
+ ret
416
+ end
417
+ alias function_blocks each_function_block
418
+
419
+ # returns a graph of function calls
420
+ # for each func passed as arg (default: all), update the 'ret' hash
421
+ # associating func => [list of direct subfuncs called]
422
+ def function_graph(funcs = @function.keys + @entrypoints.to_a, ret={})
423
+ funcs = funcs.map { |f| normalize(f) }.uniq.find_all { |f| @decoded[f] }
424
+ funcs.each { |f|
425
+ next if ret[f]
426
+ ret[f] = []
427
+ each_function_block(f) { |b|
428
+ @decoded[b].block.each_to_otherfunc(self) { |sf|
429
+ ret[f] |= [sf]
430
+ }
431
+ }
432
+ }
433
+ ret
434
+ end
435
+
436
+ # return the graph of function => subfunction list
437
+ # recurses from an entrypoint
438
+ def function_graph_from(addr)
439
+ addr = normalize(addr)
440
+ addr = find_function_start(addr) || addr
441
+ ret = {}
442
+ osz = ret.length-1
443
+ while ret.length != osz
444
+ osz = ret.length
445
+ function_graph(ret.values.flatten + [addr], ret)
446
+ end
447
+ ret
448
+ end
449
+
450
+ # return the graph of function => subfunction list
451
+ # for which a (sub-sub)function includes addr
452
+ def function_graph_to(addr)
453
+ addr = normalize(addr)
454
+ addr = find_function_start(addr) || addr
455
+ full = function_graph
456
+ ret = {}
457
+ todo = [addr]
458
+ done = []
459
+ while a = todo.pop
460
+ next if done.include? a
461
+ done << a
462
+ full.each { |f, sf|
463
+ next if not sf.include? a
464
+ ret[f] ||= []
465
+ ret[f] |= [a]
466
+ todo << f
467
+ }
468
+ end
469
+ ret
470
+ end
471
+
472
+ # returns info on sections, from @program if supported
473
+ # returns an array of [name, addr, length, info]
474
+ def section_info
475
+ if @program.respond_to? :section_info
476
+ @program.section_info
477
+ else
478
+ list = []
479
+ @sections.each { |k, v|
480
+ list << [get_label_at(k), normalize(k), v.length, nil]
481
+ }
482
+ list
483
+ end
484
+ end
485
+
486
+ # transform an address into a file offset
487
+ def addr_to_fileoff(addr)
488
+ addr = normalize(addr)
489
+ @program.addr_to_fileoff(addr)
490
+ end
491
+
492
+ # transform a file offset into an address
493
+ def fileoff_to_addr(foff)
494
+ @program.fileoff_to_addr(foff)
495
+ end
496
+
497
+ # remove the decodedinstruction from..to, replace them by the new Instructions in 'by'
498
+ # this updates the block list structure, old di will still be visible in @decoded, except from original block (those are deleted)
499
+ # if from..to spans multiple blocks
500
+ # to.block is splitted after to
501
+ # all path from from are replaced by a single link to after 'to', be careful !
502
+ # (eg a->b->... & a->c ; from in a, to in c => a->b is lost)
503
+ # all instructions are stuffed in the first block
504
+ # paths are only walked using from/to_normal
505
+ # 'by' may be empty
506
+ # returns the block containing the new instrs (nil if empty)
507
+ def replace_instrs(from, to, by)
508
+ raise 'bad from' if not fdi = di_at(from) or not fdi.block.list.index(fdi)
509
+ raise 'bad to' if not tdi = di_at(to) or not tdi.block.list.index(tdi)
510
+
511
+ # create DecodedInstruction from Instructions in 'by' if needed
512
+ split_block(fdi.block, fdi.address)
513
+ split_block(tdi.block, tdi.block.list[tdi.block.list.index(tdi)+1].address) if tdi != tdi.block.list.last
514
+ fb = fdi.block
515
+ tb = tdi.block
516
+
517
+ # generate DecodedInstr from Instrs
518
+ # try to keep the bin_length of original block
519
+ wantlen = tdi.address + tdi.bin_length - fb.address
520
+ wantlen -= by.grep(DecodedInstruction).inject(0) { |len, di| len + di.bin_length }
521
+ ldi = by.last
522
+ ldi = DecodedInstruction.new(ldi) if ldi.kind_of? Instruction
523
+ wantlen = by.grep(Instruction).length if wantlen < 0 or (ldi and ldi.opcode.props[:setip])
524
+ by.map! { |di|
525
+ if di.kind_of? Instruction
526
+ di = DecodedInstruction.new(di)
527
+ wantlen -= di.bin_length = wantlen / by.grep(Instruction).length
528
+ end
529
+ di
530
+ }
531
+
532
+ #puts " ** patch next_addr to #{Expression[tb.list.last.next_addr]}" if not by.empty? and by.last.opcode.props[:saveip]
533
+ by.last.next_addr = tb.list.last.next_addr if not by.empty? and by.last.opcode.props[:saveip]
534
+ fb.list.each { |di| @decoded.delete di.address }
535
+ fb.list.clear
536
+ tb.list.each { |di| @decoded.delete di.address }
537
+ tb.list.clear
538
+ by.each { |di| fb.add_di di }
539
+ by.each_with_index { |di, i|
540
+ if odi = di_at(di.address)
541
+ # collision, hopefully with another deobfuscation run ?
542
+ if by[i..-1].all? { |mydi| mydi.to_s == @decoded[mydi.address].to_s }
543
+ puts "replace_instrs: merge at #{di}" if $DEBUG
544
+ by[i..-1] = by[i..-1].map { |xdi| @decoded[xdi.address] }
545
+ by[i..-1].each { fb.list.pop }
546
+ split_block(odi.block, odi.address)
547
+ tb.to_normal = [di.address]
548
+ (odi.block.from_normal ||= []) << to
549
+ odi.block.from_normal.uniq!
550
+ break
551
+ else
552
+ #raise "replace_instrs: collision #{di} vs #{odi}"
553
+ puts "replace_instrs: collision #{di} vs #{odi}" if $VERBOSE
554
+ while @decoded[di.address].kind_of? DecodedInstruction # find free space.. raise ?
555
+ di.address += 1 # XXX use floats ?
556
+ di.bin_length -= 1
557
+ end
558
+ end
559
+ end
560
+ @decoded[di.address] = di
561
+ }
562
+ @addrs_done.delete_if { |ad| normalize(ad[0]) == tb.address or ad[1] == tb.address }
563
+ @addrs_done.delete_if { |ad| normalize(ad[0]) == fb.address or ad[1] == fb.address } if by.empty? and tb.address != fb.address
564
+
565
+ # update to_normal/from_normal
566
+ fb.to_normal = tb.to_normal
567
+ fb.to_normal.to_a.each { |newto|
568
+ # other paths may already point to newto, we must only update the relevant entry
569
+ if ndi = di_at(newto) and idx = ndi.block.from_normal.to_a.index(to)
570
+ if by.empty?
571
+ ndi.block.from_normal[idx,1] = fb.from_normal.to_a
572
+ else
573
+ ndi.block.from_normal[idx] = fb.list.last.address
574
+ end
575
+ end
576
+ }
577
+
578
+ fb.to_subfuncret = tb.to_subfuncret
579
+ fb.to_subfuncret.to_a.each { |newto|
580
+ if ndi = di_at(newto) and idx = ndi.block.from_subfuncret.to_a.index(to)
581
+ if by.empty?
582
+ ndi.block.from_subfuncret[idx,1] = fb.from_subfuncret.to_a
583
+ else
584
+ ndi.block.from_subfuncret[idx] = fb.list.last.address
585
+ end
586
+ end
587
+ }
588
+
589
+ if by.empty?
590
+ tb.to_subfuncret = nil if tb.to_subfuncret == []
591
+ tolist = tb.to_subfuncret || tb.to_normal.to_a
592
+ if lfrom = get_label_at(fb.address) and tolist.length == 1
593
+ lto = auto_label_at(tolist.first)
594
+ each_xref(fb.address, :x) { |x|
595
+ next if not di = @decoded[x.origin]
596
+ @cpu.replace_instr_arg_immediate(di.instruction, lfrom, lto)
597
+ di.comment.to_a.each { |c| c.gsub!(lfrom, lto) }
598
+ }
599
+ end
600
+ fb.from_normal.to_a.each { |newfrom|
601
+ if ndi = di_at(newfrom) and idx = ndi.block.to_normal.to_a.index(from)
602
+ ndi.block.to_normal[idx..idx] = tolist
603
+ end
604
+ }
605
+ fb.from_subfuncret.to_a.each { |newfrom|
606
+ if ndi = di_at(newfrom) and idx = ndi.block.to_subfuncret.to_a.index(from)
607
+ ndi.block.to_subfuncret[idx..idx] = tolist
608
+ end
609
+ }
610
+ else
611
+ # merge with adjacent blocks
612
+ merge_blocks(fb, fb.to_normal.first) if fb.to_normal.to_a.length == 1 and di_at(fb.to_normal.first)
613
+ merge_blocks(fb.from_normal.first, fb) if fb.from_normal.to_a.length == 1 and di_at(fb.from_normal.first)
614
+ end
615
+
616
+ fb if not by.empty?
617
+ end
618
+
619
+ # undefine a sequence of decodedinstructions from an address
620
+ # stops at first non-linear branch
621
+ # removes @decoded, @comments, @xrefs, @addrs_done
622
+ # does not update @prog_binding (does not undefine labels)
623
+ def undefine_from(addr)
624
+ return if not di_at(addr)
625
+ @comment.delete addr if @function.delete addr
626
+ split_block(addr)
627
+ addrs = []
628
+ while di = di_at(addr)
629
+ di.block.list.each { |ddi| addrs << ddi.address }
630
+ break if di.block.to_subfuncret.to_a != [] or di.block.to_normal.to_a.length != 1
631
+ addr = di.block.to_normal.first
632
+ break if ndi = di_at(addr) and ndi.block.from_normal.to_a.length != 1
633
+ end
634
+ addrs.each { |a| @decoded.delete a }
635
+ @xrefs.delete_if { |a, x|
636
+ if not x.kind_of? Array
637
+ true if x and addrs.include? x.origin
638
+ else
639
+ x.delete_if { |xx| addrs.include? xx.origin }
640
+ true if x.empty?
641
+ end
642
+ }
643
+ @addrs_done.delete_if { |ad| !(addrs & [normalize(ad[0]), normalize(ad[1])]).empty? }
644
+ end
645
+
646
+ # merge two instruction blocks if they form a simple chain and are adjacent
647
+ # returns true if merged
648
+ def merge_blocks(b1, b2, allow_nonadjacent = false)
649
+ if b1 and not b1.kind_of? InstructionBlock
650
+ return if not b1 = block_at(b1)
651
+ end
652
+ if b2 and not b2.kind_of? InstructionBlock
653
+ return if not b2 = block_at(b2)
654
+ end
655
+ if b1 and b2 and (allow_nonadjacent or b1.list.last.next_addr == b2.address) and
656
+ b1.to_normal.to_a == [b2.address] and b2.from_normal.to_a.length == 1 and # that handles delay_slot
657
+ b1.to_subfuncret.to_a == [] and b2.from_subfuncret.to_a == [] and
658
+ b1.to_indirect.to_a == [] and b2.from_indirect.to_a == []
659
+ b2.list.each { |di| b1.add_di di }
660
+ b1.to_normal = b2.to_normal
661
+ b2.list.clear
662
+ @addrs_done.delete_if { |ad| normalize(ad[0]) == b2.address }
663
+ true
664
+ end
665
+ end
666
+
667
+ # computes the binding of a code sequence
668
+ # just a forwarder to CPU#code_binding
669
+ def code_binding(*a)
670
+ @cpu.code_binding(self, *a)
671
+ end
672
+
673
+ # returns an array of instructions/label that, once parsed and assembled, should
674
+ # give something equivalent to the code accessible from the (list of) entrypoints given
675
+ # from the @decoded dasm graph
676
+ # assume all jump targets have a matching label in @prog_binding
677
+ # may add inconditionnal jumps in the listing to preserve the code flow
678
+ def flatten_graph(entry, include_subfunc=true)
679
+ ret = []
680
+ entry = [entry] if not entry.kind_of? Array
681
+ todo = entry.map { |a| normalize(a) }
682
+ done = []
683
+ inv_binding = @prog_binding.invert
684
+ while addr = todo.pop
685
+ next if done.include? addr or not di_at(addr)
686
+ done << addr
687
+ b = @decoded[addr].block
688
+
689
+ ret << Label.new(inv_binding[addr]) if inv_binding[addr]
690
+ ret.concat b.list.map { |di| di.instruction }
691
+
692
+ b.each_to_otherfunc(self) { |to|
693
+ to = normalize to
694
+ todo.unshift to if include_subfunc
695
+ }
696
+ b.each_to_samefunc(self) { |to|
697
+ to = normalize to
698
+ todo << to
699
+ }
700
+
701
+ if not di = b.list[-1-@cpu.delay_slot] or not di.opcode.props[:stopexec] or di.opcode.props[:saveip]
702
+ to = b.list.last.next_addr
703
+ if todo.include? to
704
+ if done.include? to or not di_at(to)
705
+ if not to_l = inv_binding[to]
706
+ to_l = auto_label_at(to, 'loc')
707
+ if done.include? to and idx = ret.index(@decoded[to].block.list.first.instruction)
708
+ ret.insert(idx, Label.new(to_l))
709
+ end
710
+ end
711
+ ret << @cpu.instr_uncond_jump_to(to_l)
712
+ else
713
+ todo << to # ensure it's next in the listing
714
+ end
715
+ end
716
+ end
717
+ end
718
+
719
+ ret
720
+ end
721
+
722
+ # returns a demangled C++ name
723
+ # from wgcc-2.2.2/undecorate.cpp
724
+ # TODO
725
+ def demangle_cppname(name)
726
+ ret = name
727
+ if name[0] == ??
728
+ name = name[1..-1]
729
+ if name[0] == ??
730
+ name = name[1..-1]
731
+ op = name[0, 1]
732
+ op = name[0, 2] if op == '_'
733
+ if op = {
734
+ '2' => "new", '3' => "delete", '4' => "=", '5' => ">>", '6' => "<<", '7' => "!", '8' => "==", '9' => "!=",
735
+ 'A' => "[]", 'C' => "->", 'D' => "*", 'E' => "++", 'F' => "--", 'G' => "-", 'H' => "+", 'I' => "&",
736
+ 'J' => "->*", 'K' => "/", 'L' => "%", 'M' => "<", 'N' => "<=", 'O' => ">", 'P' => ">=", 'Q' => ",",
737
+ 'R' => "()", 'S' => "~", 'T' => "^", 'U' => "|", 'V' => "&&", 'W' => "||", 'X' => "*=", 'Y' => "+=",
738
+ 'Z' => "-=", '_0' => "/=", '_1' => "%=", '_2' => ">>=", '_3' => "<<=", '_4' => "&=", '_5' => "|=", '_6' => "^=",
739
+ '_7' => "`vftable'", '_8' => "`vbtable'", '_9' => "`vcall'", '_A' => "`typeof'", '_B' => "`local static guard'",
740
+ '_C' => "`string'", '_D' => "`vbase destructor'", '_E' => "`vector deleting destructor'", '_F' => "`default constructor closure'",
741
+ '_G' => "`scalar deleting destructor'", '_H' => "`vector constructor iterator'", '_I' => "`vector destructor iterator'",
742
+ '_J' => "`vector vbase constructor iterator'", '_K' => "`virtual displacement map'", '_L' => "`eh vector constructor iterator'",
743
+ '_M' => "`eh vector destructor iterator'", '_N' => "`eh vector vbase constructor iterator'", '_O' => "`copy constructor closure'",
744
+ '_S' => "`local vftable'", '_T' => "`local vftable constructor closure'", '_U' => "new[]", '_V' => "delete[]",
745
+ '_X' => "`placement delete closure'", '_Y' => "`placement delete[] closure'"}[op]
746
+ ret = op[0] == ?` ? op[1..-2] : "op_#{op}"
747
+ end
748
+ end
749
+ end
750
+ # TODO
751
+ ret
752
+ end
753
+
754
+ # scans all the sections raw for a given regexp
755
+ # return/yields all the addresses matching
756
+ # if yield returns nil/false, do not include the addr in the final result
757
+ # sections are scanned MB by MB, so this should work (slowly) on 4GB sections (eg debugger VM)
758
+ def pattern_scan(pat, chunksz=nil, margin=nil)
759
+ chunksz ||= 4*1024*1024 # scan 4MB at a time
760
+ margin ||= 65536 # add this much bytes at each chunk to find /pat/ over chunk boundaries
761
+
762
+ pat = Regexp.new(Regexp.escape(pat)) if pat.kind_of? ::String
763
+
764
+ found = []
765
+ @sections.each { |sec_addr, e|
766
+ e.pattern_scan(pat, chunksz, margin) { |eo|
767
+ match_addr = sec_addr + eo
768
+ found << match_addr if not block_given? or yield(match_addr)
769
+ false
770
+ }
771
+ }
772
+ found
773
+ end
774
+
775
+ # returns/yields [addr, string] found using pattern_scan /[\x20-\x7e]/
776
+ def strings_scan(minlen=6)
777
+ ret = []
778
+ nexto = 0
779
+ pattern_scan(/[\x20-\x7e]{#{minlen},}/m, nil, 1024) { |o|
780
+ if o - nexto > 0
781
+ next unless e = get_edata_at(o)
782
+ str = e.data[e.ptr, 1024][/[\x20-\x7e]{#{minlen},}/m]
783
+ ret << [o, str] if not block_given? or yield(o, str)
784
+ nexto = o + str.length
785
+ end
786
+ }
787
+ ret
788
+ end
789
+
790
+ # exports the addr => symbol map (see load_map)
791
+ def save_map
792
+ @prog_binding.map { |l, o|
793
+ type = di_at(o) ? 'c' : 'd' # XXX
794
+ o = o.to_s(16).rjust(8, '0') if o.kind_of? ::Integer
795
+ "#{o} #{type} #{l}"
796
+ }
797
+ end
798
+
799
+ # loads a map file (addr => symbol)
800
+ # off is an optionnal offset to add to every address found (for eg rebased binaries)
801
+ # understands:
802
+ # standard map files (eg linux-kernel.map: <addr> <type> <name>, e.g. 'c01001ba t setup_idt')
803
+ # ida map files (<sectionidx>:<sectionoffset> <name>)
804
+ # arg is either the map itself or the filename of the map (if it contains no newline)
805
+ def load_map(str, off=0)
806
+ str = File.read(str) rescue nil if not str.index("\n")
807
+ sks = @sections.keys.sort
808
+ str.each_line { |l|
809
+ case l.strip
810
+ when /^([0-9A-F]+)\s+(\w+)\s+(\w+)/i # kernel.map style
811
+ set_label_at($1.to_i(16)+off, $3)
812
+ when /^([0-9A-F]+):([0-9A-F]+)\s+([a-z_]\w+)/i # IDA style
813
+ # we do not have section load order, let's just hope that the addresses are sorted (and sortable..)
814
+ # could check the 1st part of the file, with section sizes, but it is not very convenient
815
+ # the regexp is so that we skip the 1st part with section descriptions
816
+ # in the file, section 1 is the 1st section ; we have an additionnal section (exe header) which fixes the 0-index
817
+ set_label_at(sks[$1.to_i(16)] + $2.to_i(16) + off, $3)
818
+ end
819
+ }
820
+ end
821
+
822
+ # saves the dasm state in a file
823
+ def save_file(file)
824
+ tmpfile = file + '.tmp'
825
+ File.open(tmpfile, 'wb') { |fd| save_io(fd) }
826
+ File.rename tmpfile, file
827
+ end
828
+
829
+ # saves the dasm state to an IO
830
+ def save_io(fd)
831
+ fd.puts 'Metasm.dasm'
832
+
833
+ if @program.filename
834
+ t = @program.filename.to_s
835
+ fd.puts "binarypath #{t.length}", t
836
+ else
837
+ t = "#{@cpu.class.name.sub(/.*::/, '')} #{@cpu.size} #{@cpu.endianness}"
838
+ fd.puts "cpu #{t.length}", t
839
+ # XXX will be reloaded as a Shellcode with this CPU, but it may be a custom EXE
840
+ end
841
+
842
+ @sections.each { |a, e|
843
+ # forget edata exports/relocs
844
+ # dump at most 16Mo per section
845
+ t = "#{Expression[a]} #{e.length}\n" +
846
+ [e.data[0, 2**24].to_str].pack('m*')
847
+ fd.puts "section #{t.length}", t
848
+ }
849
+
850
+ t = save_map.join("\n")
851
+ fd.puts "map #{t.length}", t
852
+
853
+ t = @decoded.map { |a, d|
854
+ next if not d.kind_of? DecodedInstruction
855
+ "#{Expression[a]},#{d.bin_length} #{d.instruction}#{" ; #{d.comment.join(' ')}" if d.comment}"
856
+ }.compact.sort.join("\n")
857
+ fd.puts "decoded #{t.length}", t
858
+
859
+ t = @comment.map { |a, c|
860
+ c.map { |l| l.chomp }.join("\n").split("\n").map { |lc| "#{Expression[a]} #{lc.chomp}" }
861
+ }.join("\n")
862
+ fd.puts "comment #{t.length}", t
863
+
864
+ bl = @decoded.values.map { |d|
865
+ d.block if d.kind_of? DecodedInstruction and d.block_head?
866
+ }.compact
867
+ t = bl.map { |b|
868
+ [Expression[b.address],
869
+ b.list.map { |d| Expression[d.address] }.join(','),
870
+ b.to_normal.to_a.map { |t_| Expression[t_] }.join(','),
871
+ b.to_subfuncret.to_a.map { |t_| Expression[t_] }.join(','),
872
+ b.to_indirect.to_a.map { |t_| Expression[t_] }.join(','),
873
+ b.from_normal.to_a.map { |t_| Expression[t_] }.join(','),
874
+ b.from_subfuncret.to_a.map { |t_| Expression[t_] }.join(','),
875
+ b.from_indirect.to_a.map { |t_| Expression[t_] }.join(','),
876
+ ].join(';')
877
+ }.sort.join("\n")
878
+ fd.puts "blocks #{t.length}", t
879
+
880
+ t = @function.map { |a, f|
881
+ next if not @decoded[a]
882
+ [a, *f.return_address.to_a].map { |e| Expression[e] }.join(',')
883
+ }.compact.sort.join("\n")
884
+ # TODO binding ?
885
+ fd.puts "funcs #{t.length}", t
886
+
887
+ t = @xrefs.map { |a, x|
888
+ a = ':default' if a == :default
889
+ a = ':unknown' if a == Expression::Unknown
890
+ # XXX origin
891
+ case x
892
+ when nil
893
+ when Xref
894
+ [Expression[a], x.type, x.len, (Expression[x.origin] if x.origin)].join(',')
895
+ when Array
896
+ x.map { |x_| [Expression[a], x_.type, x_.len, (Expression[x_.origin] if x_.origin)].join(',') }
897
+ end
898
+ }.compact.join("\n")
899
+ fd.puts "xrefs #{t.length}", t
900
+
901
+ t = @c_parser.to_s
902
+ fd.puts "c #{t.length}", t
903
+
904
+ #t = bl.map { |b| b.backtracked_for }
905
+ #fd.puts "trace #{t.length}" , t
906
+ end
907
+
908
+ # loads a disassembler from a saved file
909
+ def self.load(str, &b)
910
+ d = new(nil, nil)
911
+ d.load(str, &b)
912
+ d
913
+ end
914
+
915
+ # loads the dasm state from a savefile content
916
+ # will yield unknown segments / binarypath notfound
917
+ def load(str)
918
+ raise 'Not a metasm save file' if str[0, 12].chomp != 'Metasm.dasm'
919
+ off = 12
920
+ pp = Preprocessor.new
921
+ app = AsmPreprocessor.new
922
+ while off < str.length
923
+ i = str.index("\n", off) || str.length
924
+ type, len = str[off..i].chomp.split
925
+ off = i+1
926
+ data = str[off, len.to_i]
927
+ off += len.to_i
928
+ case type
929
+ when nil, ''
930
+ when 'binarypath'
931
+ data = yield(type, data) if not File.exist? data and block_given?
932
+ reinitialize AutoExe.decode_file(data)
933
+ @program.disassembler = self
934
+ @program.init_disassembler
935
+ when 'cpu'
936
+ cpuname, size, endianness = data.split
937
+ cpu = Metasm.const_get(cpuname)
938
+ raise 'invalid cpu' if not cpu < CPU
939
+ cpu = cpu.new
940
+ cpu.size = size.to_i
941
+ cpu.endianness = endianness.to_sym
942
+ reinitialize Shellcode.new(cpu)
943
+ @program.disassembler = self
944
+ @program.init_disassembler
945
+ when 'section'
946
+ info = data[0, data.index("\n") || data.length]
947
+ data = data[info.length, data.length]
948
+ pp.feed!(info)
949
+ addr = Expression.parse(pp).reduce
950
+ len = Expression.parse(pp).reduce
951
+ edata = EncodedData.new(data.unpack('m*').first, :virtsize => len)
952
+ add_section(addr, edata)
953
+ when 'map'
954
+ load_map data
955
+ when 'decoded'
956
+ data.each_line { |l|
957
+ begin
958
+ next if l !~ /^([^,]*),(\d*) ([^;]*)(?:; (.*))?/
959
+ a, len, instr, cmt = $1, $2, $3, $4
960
+ a = Expression.parse(pp.feed!(a)).reduce
961
+ instr = @cpu.parse_instruction(app.feed!(instr))
962
+ di = DecodedInstruction.new(instr, a)
963
+ di.bin_length = len.to_i
964
+ di.add_comment cmt if cmt
965
+ @decoded[a] = di
966
+ rescue
967
+ puts "load: bad di #{l.inspect}" if $VERBOSE
968
+ end
969
+ }
970
+ when 'blocks'
971
+ data.each_line { |l|
972
+ bla = l.chomp.split(';').map { |sl| sl.split(',') }
973
+ begin
974
+ a = Expression.parse(pp.feed!(bla.shift[0])).reduce
975
+ b = InstructionBlock.new(a, get_section_at(a).to_a[0])
976
+ bla.shift.each { |e|
977
+ a = Expression.parse(pp.feed!(e)).reduce
978
+ b.add_di(@decoded[a])
979
+ }
980
+ bla.zip([:to_normal, :to_subfuncret, :to_indirect, :from_normal, :from_subfuncret, :from_indirect]).each { |l_, s|
981
+ b.send("#{s}=", l_.map { |e| Expression.parse(pp.feed!(e)).reduce }) if not l_.empty?
982
+ }
983
+ rescue
984
+ puts "load: bad block #{l.inspect}" if $VERBOSE
985
+ end
986
+ }
987
+ when 'funcs'
988
+ data.each_line { |l|
989
+ begin
990
+ a, *r = l.split(',').map { |e| Expression.parse(pp.feed!(e)).reduce }
991
+ @function[a] = DecodedFunction.new
992
+ @function[a].return_address = r if not r.empty?
993
+ @function[a].finalized = true
994
+ # TODO
995
+ rescue
996
+ puts "load: bad function #{l.inspect} #$!" if $VERBOSE
997
+ end
998
+ }
999
+ when 'comment'
1000
+ data.each_line { |l|
1001
+ begin
1002
+ a, c = l.split(' ', 2)
1003
+ a = Expression.parse(pp.feed!(a)).reduce
1004
+ @comment[a] ||= []
1005
+ @comment[a] |= [c]
1006
+ rescue
1007
+ puts "load: bad comment #{l.inspect} #$!" if $VERBOSE
1008
+ end
1009
+ }
1010
+ when 'c'
1011
+ begin
1012
+ # TODO parse_invalid_c, split per function, whatever
1013
+ parse_c('')
1014
+ @c_parser.allow_bad_c = true
1015
+ parse_c(data, 'savefile#c')
1016
+ rescue
1017
+ puts "load: bad C: #$!", $!.backtrace if $VERBOSE
1018
+ end
1019
+ @c_parser.readtok until @c_parser.eos? if @c_parser
1020
+ when 'xrefs'
1021
+ data.each_line { |l|
1022
+ begin
1023
+ a, t, len, o = l.chomp.split(',')
1024
+ case a
1025
+ when ':default'; a = :default
1026
+ when ':unknown'; a = Expression::Unknown
1027
+ else a = Expression.parse(pp.feed!(a)).reduce
1028
+ end
1029
+ t = (t.empty? ? nil : t.to_sym)
1030
+ len = (len != '' ? len.to_i : nil)
1031
+ o = (o.to_s != '' ? Expression.parse(pp.feed!(o)).reduce : nil) # :default/:unknown ?
1032
+ add_xref(a, Xref.new(t, o, len))
1033
+ rescue
1034
+ puts "load: bad xref #{l.inspect} #$!" if $VERBOSE
1035
+ end
1036
+ }
1037
+ #when 'trace'
1038
+ else
1039
+ if block_given?
1040
+ yield(type, data)
1041
+ else
1042
+ puts "load: unsupported section #{type.inspect}" if $VERBOSE
1043
+ end
1044
+ end
1045
+ end
1046
+ end
1047
+
1048
+ # change the base address of the loaded binary
1049
+ # better done early (before disassembling anything)
1050
+ # returns the delta
1051
+ def rebase(newaddr)
1052
+ rebase_delta(newaddr - @sections.keys.min)
1053
+ end
1054
+
1055
+ def rebase_delta(delta)
1056
+ fix = lambda { |a|
1057
+ case a
1058
+ when Array
1059
+ a.map! { |e| fix[e] }
1060
+ when Hash
1061
+ tmp = {}
1062
+ a.each { |k, v| tmp[fix[k]] = v }
1063
+ a.replace tmp
1064
+ when Integer
1065
+ a += delta
1066
+ when BacktraceTrace
1067
+ a.origin = fix[a.origin]
1068
+ a.address = fix[a.address]
1069
+ end
1070
+ a
1071
+ }
1072
+
1073
+ fix[@sections]
1074
+ fix[@decoded]
1075
+ fix[@xrefs]
1076
+ fix[@function]
1077
+ fix[@addrs_todo]
1078
+ fix[@addrs_done]
1079
+ fix[@comment]
1080
+ @prog_binding.each_key { |k| @prog_binding[k] = fix[@prog_binding[k]] }
1081
+ @old_prog_binding.each_key { |k| @old_prog_binding[k] = fix[@old_prog_binding[k]] }
1082
+ @label_alias_cache = nil
1083
+
1084
+ @decoded.values.grep(DecodedInstruction).each { |di|
1085
+ if di.block_head?
1086
+ b = di.block
1087
+ b.address += delta
1088
+ fix[b.to_normal]
1089
+ fix[b.to_subfuncret]
1090
+ fix[b.to_indirect]
1091
+ fix[b.from_normal]
1092
+ fix[b.from_subfuncret]
1093
+ fix[b.from_indirect]
1094
+ fix[b.backtracked_for]
1095
+ end
1096
+ di.address = fix[di.address]
1097
+ di.next_addr = fix[di.next_addr]
1098
+ }
1099
+ @function.each_value { |f|
1100
+ f.return_address = fix[f.return_address]
1101
+ fix[f.backtracked_for]
1102
+ }
1103
+ @xrefs.values.flatten.compact.each { |x| x.origin = fix[x.origin] }
1104
+ delta
1105
+ end
1106
+
1107
+ # change Expression display mode for current object o to display integers as char constants
1108
+ def toggle_expr_char(o)
1109
+ return if not o.kind_of? Renderable
1110
+ o.each_expr { |e|
1111
+ e.render_info ||= {}
1112
+ e.render_info[:char] = e.render_info[:char] ? nil : @cpu.endianness
1113
+ }
1114
+ end
1115
+
1116
+ # patch Expressions in current object to include label names when available
1117
+ # XXX should we also create labels ?
1118
+ def toggle_expr_offset(o)
1119
+ return if not o.kind_of? Renderable
1120
+ o.each_expr { |e|
1121
+ if n = @prog_binding[e.lexpr]
1122
+ e.lexpr = n
1123
+ elsif e.lexpr.kind_of? ::Integer and n = get_label_at(e.lexpr)
1124
+ add_xref(normalize(e.lexpr), Xref.new(:addr, o.address)) if o.respond_to? :address
1125
+ e.lexpr = n
1126
+ end
1127
+ if n = @prog_binding[e.rexpr]
1128
+ e.rexpr = n
1129
+ elsif e.rexpr.kind_of? ::Integer and n = get_label_at(e.rexpr)
1130
+ add_xref(normalize(e.rexpr), Xref.new(:addr, o.address)) if o.respond_to? :address
1131
+ e.rexpr = n
1132
+ end
1133
+ }
1134
+ end
1135
+
1136
+ # call this function on a function entrypoint if the function is in fact a __noreturn
1137
+ # will cut the to_subfuncret of callers
1138
+ def fix_noreturn(o)
1139
+ each_xref(o, :x) { |a|
1140
+ a = normalize(a.origin)
1141
+ next if not di = di_at(a) or not di.opcode.props[:saveip]
1142
+ # XXX should check if caller also becomes __noreturn
1143
+ di.block.each_to_subfuncret { |to|
1144
+ next if not tdi = di_at(to) or not tdi.block.from_subfuncret
1145
+ tdi.block.from_subfuncret.delete_if { |aa| normalize(aa) == di.address }
1146
+ tdi.block.from_subfuncret = nil if tdi.block.from_subfuncret.empty?
1147
+ }
1148
+ di.block.to_subfuncret = nil
1149
+ }
1150
+ end
1151
+
1152
+ # find the addresses of calls calling the address, handles thunks
1153
+ def call_sites(funcaddr)
1154
+ find_call_site = proc { |a|
1155
+ until not di = di_at(a)
1156
+ if di.opcode.props[:saveip]
1157
+ cs = di.address
1158
+ break
1159
+ end
1160
+ if di.block.from_subfuncret.to_a.first
1161
+ while di.block.from_subfuncret.to_a.length == 1
1162
+ a = di.block.from_subfuncret[0]
1163
+ break if not di_at(a)
1164
+ a = @decoded[a].block.list.first.address
1165
+ di = @decoded[a]
1166
+ end
1167
+ end
1168
+ break if di.block.from_subfuncret.to_a.first
1169
+ break if di.block.from_normal.to_a.length != 1
1170
+ a = di.block.from_normal.first
1171
+ end
1172
+ cs
1173
+ }
1174
+ ret = []
1175
+ each_xref(normalize(funcaddr), :x) { |a|
1176
+ ret << find_call_site[a.origin]
1177
+ }
1178
+ ret.compact.uniq
1179
+ end
1180
+
1181
+ # loads a disassembler plugin script
1182
+ # this is simply a ruby script instance_eval() in the disassembler
1183
+ # the filename argument is autocompleted with '.rb' suffix, and also
1184
+ # searched for in the Metasmdir/samples/dasm-plugins subdirectory if not found in cwd
1185
+ def load_plugin(plugin_filename)
1186
+ if not File.exist?(plugin_filename)
1187
+ if File.exist?(plugin_filename+'.rb')
1188
+ plugin_filename += '.rb'
1189
+ elsif defined? Metasmdir
1190
+ # try autocomplete
1191
+ pf = File.join(Metasmdir, 'samples', 'dasm-plugins', plugin_filename)
1192
+ if File.exist? pf
1193
+ plugin_filename = pf
1194
+ elsif File.exist? pf + '.rb'
1195
+ plugin_filename = pf + '.rb'
1196
+ end
1197
+ end
1198
+ end
1199
+
1200
+ instance_eval File.read(plugin_filename)
1201
+ end
1202
+
1203
+ # same as load_plugin, but hides the @gui attribute while loading, preventing the plugin do popup stuff
1204
+ # this is useful when you want to load a plugin from another plugin to enhance the plugin's functionnality
1205
+ # XXX this also prevents setting up kbd_callbacks etc..
1206
+ def load_plugin_nogui(plugin_filename)
1207
+ oldgui = gui
1208
+ @gui = nil
1209
+ load_plugin(plugin_filename)
1210
+ ensure
1211
+ @gui = oldgui
1212
+ end
1213
+
1214
+ # compose two code/instruction's backtrace_binding
1215
+ # assumes bd1 is followed by bd2 in the code flow
1216
+ # eg inc edi + push edi =>
1217
+ # { Ind[:esp, 4] => Expr[:edi + 1], :esp => Expr[:esp - 4], :edi => Expr[:edi + 1] }
1218
+ # XXX if bd1 writes to memory with a pointer that is reused in bd2, this function has to
1219
+ # revert the change made by bd2, which only works with simple ptr addition now
1220
+ # XXX unhandled situations may be resolved using :unknown, or by returning incorrect values
1221
+ def compose_bt_binding(bd1, bd2)
1222
+ if bd1.kind_of? DecodedInstruction
1223
+ bd1 = bd1.backtrace_binding ||= cpu.get_backtrace_binding(bd1)
1224
+ end
1225
+ if bd2.kind_of? DecodedInstruction
1226
+ bd2 = bd2.backtrace_binding ||= cpu.get_backtrace_binding(bd2)
1227
+ end
1228
+
1229
+ reduce = lambda { |e| Expression[Expression[e].reduce] }
1230
+
1231
+ bd = {}
1232
+
1233
+ bd2.each { |k, v|
1234
+ bd[k] = reduce[v.bind(bd1)]
1235
+ }
1236
+
1237
+ # for each pointer appearing in keys of bd1, we must infer from bd2 what final
1238
+ # pointers should appear in bd
1239
+ # eg 'mov [eax], 0 mov ebx, eax' => { [eax] <- 0, [ebx] <- 0, ebx <- eax }
1240
+ bd1.each { |k, v|
1241
+ if k.kind_of? Indirection
1242
+ done = false
1243
+ k.pointer.externals.each { |e|
1244
+ # XXX this will break on nontrivial pointers or bd2
1245
+ bd2.each { |k2, v2|
1246
+ # we dont want to invert computation of flag_zero/carry etc (booh)
1247
+ next if k2.to_s =~ /flag/
1248
+
1249
+ # discard indirection etc, result would be too complex / not useful
1250
+ next if not Expression[v2].expr_externals.include? e
1251
+
1252
+ done = true
1253
+
1254
+ # try to reverse the computation made upon 'e'
1255
+ # only simple addition handled here
1256
+ ptr = reduce[k.pointer.bind(e => Expression[[k2, :-, v2], :+, e])]
1257
+
1258
+ # if bd2 does not rewrite e, duplicate the original pointer
1259
+ if not bd2[e]
1260
+ bd[k] ||= reduce[v]
1261
+
1262
+ # here we should not see 'e' in ptr anymore
1263
+ ptr = Expression::Unknown if ptr.externals.include? e
1264
+ else
1265
+ # cant check if add reversion was successful..
1266
+ end
1267
+
1268
+ bd[Indirection[reduce[ptr], k.len]] ||= reduce[v]
1269
+ }
1270
+ }
1271
+ bd[k] ||= reduce[v] if not done
1272
+ else
1273
+ bd[k] ||= reduce[v]
1274
+ end
1275
+ }
1276
+
1277
+ bd
1278
+ end
1279
+ end
1280
+ end