metasm 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (192) hide show
  1. data/BUGS +11 -0
  2. data/CREDITS +17 -0
  3. data/README +270 -0
  4. data/TODO +114 -0
  5. data/doc/code_organisation.txt +146 -0
  6. data/doc/const_missing.txt +16 -0
  7. data/doc/core_classes.txt +75 -0
  8. data/doc/feature_list.txt +53 -0
  9. data/doc/index.txt +59 -0
  10. data/doc/install_notes.txt +170 -0
  11. data/doc/style.css +3 -0
  12. data/doc/use_cases.txt +18 -0
  13. data/lib/metasm.rb +80 -0
  14. data/lib/metasm/arm.rb +12 -0
  15. data/lib/metasm/arm/debug.rb +39 -0
  16. data/lib/metasm/arm/decode.rb +167 -0
  17. data/lib/metasm/arm/encode.rb +77 -0
  18. data/lib/metasm/arm/main.rb +75 -0
  19. data/lib/metasm/arm/opcodes.rb +177 -0
  20. data/lib/metasm/arm/parse.rb +130 -0
  21. data/lib/metasm/arm/render.rb +55 -0
  22. data/lib/metasm/compile_c.rb +1457 -0
  23. data/lib/metasm/dalvik.rb +8 -0
  24. data/lib/metasm/dalvik/decode.rb +196 -0
  25. data/lib/metasm/dalvik/main.rb +60 -0
  26. data/lib/metasm/dalvik/opcodes.rb +366 -0
  27. data/lib/metasm/decode.rb +213 -0
  28. data/lib/metasm/decompile.rb +2659 -0
  29. data/lib/metasm/disassemble.rb +2068 -0
  30. data/lib/metasm/disassemble_api.rb +1280 -0
  31. data/lib/metasm/dynldr.rb +1329 -0
  32. data/lib/metasm/encode.rb +333 -0
  33. data/lib/metasm/exe_format/a_out.rb +194 -0
  34. data/lib/metasm/exe_format/autoexe.rb +82 -0
  35. data/lib/metasm/exe_format/bflt.rb +189 -0
  36. data/lib/metasm/exe_format/coff.rb +455 -0
  37. data/lib/metasm/exe_format/coff_decode.rb +901 -0
  38. data/lib/metasm/exe_format/coff_encode.rb +1078 -0
  39. data/lib/metasm/exe_format/dex.rb +457 -0
  40. data/lib/metasm/exe_format/dol.rb +145 -0
  41. data/lib/metasm/exe_format/elf.rb +923 -0
  42. data/lib/metasm/exe_format/elf_decode.rb +979 -0
  43. data/lib/metasm/exe_format/elf_encode.rb +1375 -0
  44. data/lib/metasm/exe_format/macho.rb +827 -0
  45. data/lib/metasm/exe_format/main.rb +228 -0
  46. data/lib/metasm/exe_format/mz.rb +164 -0
  47. data/lib/metasm/exe_format/nds.rb +172 -0
  48. data/lib/metasm/exe_format/pe.rb +437 -0
  49. data/lib/metasm/exe_format/serialstruct.rb +246 -0
  50. data/lib/metasm/exe_format/shellcode.rb +114 -0
  51. data/lib/metasm/exe_format/xcoff.rb +167 -0
  52. data/lib/metasm/gui.rb +23 -0
  53. data/lib/metasm/gui/cstruct.rb +373 -0
  54. data/lib/metasm/gui/dasm_coverage.rb +199 -0
  55. data/lib/metasm/gui/dasm_decomp.rb +369 -0
  56. data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
  57. data/lib/metasm/gui/dasm_graph.rb +1354 -0
  58. data/lib/metasm/gui/dasm_hex.rb +543 -0
  59. data/lib/metasm/gui/dasm_listing.rb +599 -0
  60. data/lib/metasm/gui/dasm_main.rb +906 -0
  61. data/lib/metasm/gui/dasm_opcodes.rb +291 -0
  62. data/lib/metasm/gui/debug.rb +1228 -0
  63. data/lib/metasm/gui/gtk.rb +884 -0
  64. data/lib/metasm/gui/qt.rb +495 -0
  65. data/lib/metasm/gui/win32.rb +3004 -0
  66. data/lib/metasm/gui/x11.rb +621 -0
  67. data/lib/metasm/ia32.rb +14 -0
  68. data/lib/metasm/ia32/compile_c.rb +1523 -0
  69. data/lib/metasm/ia32/debug.rb +193 -0
  70. data/lib/metasm/ia32/decode.rb +1167 -0
  71. data/lib/metasm/ia32/decompile.rb +564 -0
  72. data/lib/metasm/ia32/encode.rb +314 -0
  73. data/lib/metasm/ia32/main.rb +233 -0
  74. data/lib/metasm/ia32/opcodes.rb +872 -0
  75. data/lib/metasm/ia32/parse.rb +327 -0
  76. data/lib/metasm/ia32/render.rb +91 -0
  77. data/lib/metasm/main.rb +1193 -0
  78. data/lib/metasm/mips.rb +11 -0
  79. data/lib/metasm/mips/compile_c.rb +7 -0
  80. data/lib/metasm/mips/decode.rb +253 -0
  81. data/lib/metasm/mips/encode.rb +51 -0
  82. data/lib/metasm/mips/main.rb +72 -0
  83. data/lib/metasm/mips/opcodes.rb +443 -0
  84. data/lib/metasm/mips/parse.rb +51 -0
  85. data/lib/metasm/mips/render.rb +43 -0
  86. data/lib/metasm/os/gnu_exports.rb +270 -0
  87. data/lib/metasm/os/linux.rb +1112 -0
  88. data/lib/metasm/os/main.rb +1686 -0
  89. data/lib/metasm/os/remote.rb +527 -0
  90. data/lib/metasm/os/windows.rb +2027 -0
  91. data/lib/metasm/os/windows_exports.rb +745 -0
  92. data/lib/metasm/parse.rb +876 -0
  93. data/lib/metasm/parse_c.rb +3938 -0
  94. data/lib/metasm/pic16c/decode.rb +42 -0
  95. data/lib/metasm/pic16c/main.rb +17 -0
  96. data/lib/metasm/pic16c/opcodes.rb +68 -0
  97. data/lib/metasm/ppc.rb +11 -0
  98. data/lib/metasm/ppc/decode.rb +264 -0
  99. data/lib/metasm/ppc/decompile.rb +251 -0
  100. data/lib/metasm/ppc/encode.rb +51 -0
  101. data/lib/metasm/ppc/main.rb +129 -0
  102. data/lib/metasm/ppc/opcodes.rb +410 -0
  103. data/lib/metasm/ppc/parse.rb +52 -0
  104. data/lib/metasm/preprocessor.rb +1277 -0
  105. data/lib/metasm/render.rb +130 -0
  106. data/lib/metasm/sh4.rb +8 -0
  107. data/lib/metasm/sh4/decode.rb +336 -0
  108. data/lib/metasm/sh4/main.rb +292 -0
  109. data/lib/metasm/sh4/opcodes.rb +381 -0
  110. data/lib/metasm/x86_64.rb +12 -0
  111. data/lib/metasm/x86_64/compile_c.rb +1025 -0
  112. data/lib/metasm/x86_64/debug.rb +59 -0
  113. data/lib/metasm/x86_64/decode.rb +268 -0
  114. data/lib/metasm/x86_64/encode.rb +264 -0
  115. data/lib/metasm/x86_64/main.rb +135 -0
  116. data/lib/metasm/x86_64/opcodes.rb +118 -0
  117. data/lib/metasm/x86_64/parse.rb +68 -0
  118. data/misc/bottleneck.rb +61 -0
  119. data/misc/cheader-findpppath.rb +58 -0
  120. data/misc/hexdiff.rb +74 -0
  121. data/misc/hexdump.rb +55 -0
  122. data/misc/metasm-all.rb +13 -0
  123. data/misc/objdiff.rb +47 -0
  124. data/misc/objscan.rb +40 -0
  125. data/misc/pdfparse.rb +661 -0
  126. data/misc/ppc_pdf2oplist.rb +192 -0
  127. data/misc/tcp_proxy_hex.rb +84 -0
  128. data/misc/txt2html.rb +440 -0
  129. data/samples/a.out.rb +31 -0
  130. data/samples/asmsyntax.rb +77 -0
  131. data/samples/bindiff.rb +555 -0
  132. data/samples/compilation-steps.rb +49 -0
  133. data/samples/cparser_makestackoffset.rb +55 -0
  134. data/samples/dasm-backtrack.rb +38 -0
  135. data/samples/dasmnavig.rb +318 -0
  136. data/samples/dbg-apihook.rb +228 -0
  137. data/samples/dbghelp.rb +143 -0
  138. data/samples/disassemble-gui.rb +102 -0
  139. data/samples/disassemble.rb +133 -0
  140. data/samples/dump_upx.rb +95 -0
  141. data/samples/dynamic_ruby.rb +1929 -0
  142. data/samples/elf_list_needed.rb +46 -0
  143. data/samples/elf_listexports.rb +33 -0
  144. data/samples/elfencode.rb +25 -0
  145. data/samples/exeencode.rb +128 -0
  146. data/samples/factorize-headers-elfimports.rb +77 -0
  147. data/samples/factorize-headers-peimports.rb +109 -0
  148. data/samples/factorize-headers.rb +43 -0
  149. data/samples/gdbclient.rb +583 -0
  150. data/samples/generate_libsigs.rb +102 -0
  151. data/samples/hotfix_gtk_dbg.rb +59 -0
  152. data/samples/install_win_env.rb +78 -0
  153. data/samples/lindebug.rb +924 -0
  154. data/samples/linux_injectsyscall.rb +95 -0
  155. data/samples/machoencode.rb +31 -0
  156. data/samples/metasm-shell.rb +91 -0
  157. data/samples/pe-hook.rb +69 -0
  158. data/samples/pe-ia32-cpuid.rb +203 -0
  159. data/samples/pe-mips.rb +35 -0
  160. data/samples/pe-shutdown.rb +78 -0
  161. data/samples/pe-testrelocs.rb +51 -0
  162. data/samples/pe-testrsrc.rb +24 -0
  163. data/samples/pe_listexports.rb +31 -0
  164. data/samples/peencode.rb +19 -0
  165. data/samples/peldr.rb +494 -0
  166. data/samples/preprocess-flatten.rb +19 -0
  167. data/samples/r0trace.rb +308 -0
  168. data/samples/rubstop.rb +399 -0
  169. data/samples/scan_pt_gnu_stack.rb +54 -0
  170. data/samples/scanpeexports.rb +62 -0
  171. data/samples/shellcode-c.rb +40 -0
  172. data/samples/shellcode-dynlink.rb +146 -0
  173. data/samples/source.asm +34 -0
  174. data/samples/struct_offset.rb +47 -0
  175. data/samples/testpe.rb +32 -0
  176. data/samples/testraw.rb +45 -0
  177. data/samples/win32genloader.rb +132 -0
  178. data/samples/win32hooker-advanced.rb +169 -0
  179. data/samples/win32hooker.rb +96 -0
  180. data/samples/win32livedasm.rb +33 -0
  181. data/samples/win32remotescan.rb +133 -0
  182. data/samples/wintrace.rb +92 -0
  183. data/tests/all.rb +8 -0
  184. data/tests/dasm.rb +39 -0
  185. data/tests/dynldr.rb +35 -0
  186. data/tests/encodeddata.rb +132 -0
  187. data/tests/ia32.rb +82 -0
  188. data/tests/mips.rb +116 -0
  189. data/tests/parse_c.rb +239 -0
  190. data/tests/preprocessor.rb +269 -0
  191. data/tests/x86_64.rb +62 -0
  192. metadata +255 -0
@@ -0,0 +1,333 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/main'
8
+
9
+ module Metasm
10
+ class ExeFormat
11
+ # encodes an Array of source (Label/Data/Instruction etc) to an EncodedData
12
+ # resolves ambiguities using +encode_resolve+
13
+ def assemble_sequence(seq, cpu)
14
+ # an array of edata or sub-array of ambiguous edata
15
+ # its last element is always an edata
16
+ ary = [EncodedData.new]
17
+
18
+ seq.each { |e|
19
+ case e
20
+ when Label; ary.last.add_export(e.name, ary.last.virtsize)
21
+ when Data; ary.last << e.encode(cpu.endianness)
22
+ when Align, Padding
23
+ e.fillwith = e.fillwith.encode(cpu.endianness) if e.fillwith and not e.fillwith.kind_of? EncodedData
24
+ ary << e << EncodedData.new
25
+ when Offset; ary << e << EncodedData.new
26
+ when Instruction
27
+ case i = cpu.encode_instruction(self, e)
28
+ when Array
29
+ case i.length
30
+ when 0; raise EncodeError, "failed to encode #{e}"
31
+ when 1; ary.last << i.first
32
+ else ary << i << EncodedData.new # to solve later
33
+ end
34
+ else
35
+ ary.last << i
36
+ end
37
+ end
38
+ }
39
+
40
+ edata = (ary.length > 1) ? assemble_resolve(ary) : ary.shift
41
+ edata.fixup edata.binding
42
+ edata
43
+ end
44
+
45
+ # chose among multiple possible sub-EncodedData
46
+ # assumes all ambiguous edata have the equivallent relocations in the same order
47
+ def assemble_resolve(ary)
48
+ startlabel = new_label('section_start')
49
+
50
+ # create two bindings where all elements are the shortest/longest possible
51
+ minbinding = {}
52
+ minoff = 0
53
+ maxbinding = {}
54
+ maxoff = 0
55
+
56
+ ary.each { |elem|
57
+ case elem
58
+ when Array
59
+ if elem.all? { |ed| ed.kind_of? EncodedData and ed.reloc.empty? }
60
+ elem = [elem.sort_by { |ed| ed.length }.first]
61
+ end
62
+ elem.each { |e|
63
+ e.export.each { |label, off|
64
+ minbinding[label] = Expression[startlabel, :+, minoff + off]
65
+ maxbinding[label] = Expression[startlabel, :+, maxoff + off]
66
+ }
67
+ }
68
+ minoff += elem.map { |e| e.virtsize }.min
69
+ maxoff += elem.map { |e| e.virtsize }.max
70
+
71
+ when EncodedData
72
+ elem.export.each { |label, off|
73
+ minbinding[label] = Expression[startlabel, :+, minoff + off]
74
+ maxbinding[label] = Expression[startlabel, :+, maxoff + off]
75
+ }
76
+ minoff += elem.virtsize
77
+ maxoff += elem.virtsize
78
+
79
+ when Align
80
+ minoff += 0
81
+ maxoff += elem.val - 1
82
+
83
+ when Padding
84
+ # find the surrounding Offsets and compute the largest/shortest edata sizes to determine min/max length for the padding
85
+ prevoff = ary[0..ary.index(elem)].grep(Offset).last
86
+ nextoff = ary[ary.index(elem)..-1].grep(Offset).first
87
+ raise elem, 'need .offset after .pad' if not nextoff
88
+
89
+ # find all elements between the surrounding Offsets
90
+ previdx = prevoff ? ary.index(prevoff) + 1 : 0
91
+ surround = ary[previdx..ary.index(nextoff)-1]
92
+ surround.delete elem
93
+ if surround.find { |nelem| nelem.kind_of? Padding }
94
+ raise elem, 'need .offset beetween two .pad'
95
+ end
96
+ if surround.find { |nelem| nelem.kind_of? Align and ary.index(nelem) > ary.index(elem) }
97
+ raise elem, 'cannot .align after a .pad' # XXX really ?
98
+ end
99
+
100
+ # lenmin/lenmax are the extrem length of the Padding
101
+ nxt = Expression[nextoff.val]
102
+ ext = nxt.externals
103
+ raise elem, "bad offset #{nxt}" if ext.length > 1 or (ext.length == 1 and not minbinding[ext.first])
104
+ nxt = Expression[nxt, :-, startlabel] if not nxt.bind(minbinding).reduce.kind_of? ::Integer
105
+ prv = Expression[prevoff ? prevoff.val : 0]
106
+ ext = prv.externals
107
+ raise elem, "bad offset #{prv}" if ext.length > 1 or (ext.length == 1 and not minbinding[ext.first])
108
+ prv = Expression[prv, :-, startlabel] if not prv.bind(minbinding).reduce.kind_of? ::Integer
109
+
110
+ lenmin = Expression[nxt.bind(minbinding), :-, prv.bind(maxbinding)].reduce
111
+ lenmax = Expression[nxt.bind(maxbinding), :-, prv.bind(minbinding)].reduce
112
+ raise elem, "bad labels: #{lenmin}" if not lenmin.kind_of? ::Integer or not lenmax.kind_of? ::Integer
113
+ surround.each { |nelem|
114
+ case nelem
115
+ when Array
116
+ lenmin -= nelem.map { |e| e.virtsize }.max
117
+ lenmax -= nelem.map { |e| e.virtsize }.min
118
+ when EncodedData
119
+ lenmin -= nelem.virtsize
120
+ lenmax -= nelem.virtsize
121
+ when Align
122
+ lenmin -= nelem.val - 1
123
+ lenmax -= 0
124
+ end
125
+ }
126
+ raise elem, "no room for .pad before '.offset #{nextoff.val}' at #{Backtrace.backtrace_str(nextoff.backtrace)}, need at least #{-lenmax} more bytes" if lenmax < 0
127
+ minoff += [lenmin, 0].max
128
+ maxoff += lenmax
129
+
130
+ when Offset
131
+ # nothing to do for now
132
+ else
133
+ raise "Internal error: bad object #{elem.inspect} in encode_resolve"
134
+ end
135
+ }
136
+
137
+ # checks an expression linearity
138
+ check_linear = lambda { |expr|
139
+ expr = expr.reduce if expr.kind_of? Expression
140
+ while expr.kind_of? Expression
141
+ case expr.op
142
+ when :*
143
+ if expr.lexpr.kind_of? Numeric; expr = expr.rexpr
144
+ elsif expr.rexpr.kind_of? Numeric; expr = expr.lexpr
145
+ else break
146
+ end
147
+ when :/, :>>, :<<
148
+ if expr.rexpr.kind_of? Numeric; expr = expr.lexpr
149
+ else break
150
+ end
151
+ when :+, :-
152
+ if not expr.lexpr; expr = expr.rexpr
153
+ elsif expr.lexpr.kind_of? Numeric; expr = expr.rexpr
154
+ elsif expr.rexpr.kind_of? Numeric; expr = expr.lexpr
155
+ else
156
+ break if not check_linear[expr.rexpr]
157
+ expr = expr.lexpr
158
+ end
159
+ else break
160
+ end
161
+ end
162
+
163
+ not expr.kind_of? Expression
164
+ }
165
+
166
+ # now we can resolve all relocations
167
+ # for linear expressions of internal variables (ie differences of labels from the ary):
168
+ # - calc target numeric bounds, and reject relocs not accepting worst case value
169
+ # - else reject all but largest place available
170
+ # then chose the shortest overall EData left
171
+ ary.map! { |elem|
172
+ case elem
173
+ when Array
174
+ # for each external, compute numeric target values using minbinding[external] and maxbinding[external]
175
+ # this gives us all extrem values for linear expressions
176
+ target_bounds = {}
177
+ rec_checkminmax = lambda { |idx, target, binding, extlist|
178
+ if extlist.empty?
179
+ (target_bounds[idx] ||= []) << target.bind(binding).reduce
180
+ else
181
+ rec_checkminmax[idx, target, binding.merge(extlist.last => minbinding[extlist.last]), extlist[0...-1]]
182
+ rec_checkminmax[idx, target, binding.merge(extlist.last => maxbinding[extlist.last]), extlist[0...-1]]
183
+ end
184
+ }
185
+ # biggest size disponible for this relocation (for non-linear/external)
186
+ wantsize = {}
187
+
188
+ elem.each { |e|
189
+ e.reloc.sort.each_with_index { |r_, i|
190
+ r = r_[1]
191
+ # has external ref
192
+ if not r.target.bind(minbinding).reduce.kind_of?(Numeric) or not check_linear[r.target]
193
+ # find the biggest relocation type for the current target
194
+ wantsize[i] = elem.map { |edata|
195
+ edata.reloc.sort[i][1].type
196
+ }.sort_by { |type| Expression::INT_SIZE[type] }.last # XXX do not use rel.length
197
+ else
198
+ rec_checkminmax[i, r.target, {}, r.target.externals]
199
+ end
200
+ }
201
+ }
202
+
203
+ # reject candidates with reloc type too small
204
+ acceptable = elem.find_all { |edata|
205
+ r = edata.reloc.sort
206
+ (0...r.length).all? { |i|
207
+ if wantsize[i]
208
+ r[i][1].type == wantsize[i]
209
+ else
210
+ target_bounds[i].all? { |b| Expression.in_range?(b, r[i][1].type) }
211
+ end
212
+ }
213
+ }
214
+
215
+ raise EncodeError, "cannot find candidate in #{elem.inspect}, immediate too big #{wantsize.inspect} #{target_bounds.inspect}" if acceptable.empty?
216
+
217
+ # keep the shortest
218
+ acceptable.sort_by { |edata| edata.virtsize }.first
219
+ else
220
+ elem
221
+ end
222
+ }
223
+
224
+ # assemble all parts, resolve padding sizes, check offset directives
225
+ edata = EncodedData.new
226
+
227
+ # fills edata with repetitions of data until targetsize
228
+ fillwith = lambda { |targetsize, data|
229
+ if data
230
+ if data.reloc.empty? and not data.data.empty? # avoid useless iterations
231
+ nr = (targetsize-edata.virtsize) / data.length - 1
232
+ if nr > 0
233
+ dat = data.data.ljust(data.virtsize, 0.chr)
234
+ edata << (dat * nr)
235
+ end
236
+ end
237
+ while edata.virtsize + data.virtsize <= targetsize
238
+ edata << data
239
+ end
240
+ if edata.virtsize < targetsize
241
+ edata << data[0, targetsize - edata.virtsize]
242
+ end
243
+ else
244
+ edata.virtsize = targetsize
245
+ end
246
+ }
247
+
248
+ ary.each { |elem|
249
+ case elem
250
+ when EncodedData
251
+ edata << elem
252
+ when Align
253
+ fillwith[EncodedData.align_size(edata.virtsize, elem.val), elem.fillwith]
254
+ when Offset
255
+ raise EncodeError, "could not enforce .offset #{elem.val} #{elem.backtrace}: offset now #{edata.virtsize}" if edata.virtsize != Expression[elem.val].bind(edata.binding(0)).reduce
256
+ when Padding
257
+ nextoff = ary[ary.index(elem)..-1].grep(Offset).first
258
+ targetsize = Expression[nextoff.val].bind(edata.binding(0)).reduce
259
+ ary[ary.index(elem)+1..ary.index(nextoff)-1].each { |nelem| targetsize -= nelem.virtsize }
260
+ raise EncodeError, "no room for .pad #{elem.backtrace_str} before .offset #{nextoff.val}, would be #{targetsize-edata.length} bytes long" if targetsize < edata.length
261
+ fillwith[targetsize, elem.fillwith]
262
+ else raise "Internal error: #{elem.inspect}"
263
+ end
264
+ }
265
+
266
+ edata
267
+ end
268
+ end
269
+
270
+ class Expression
271
+ def encode(type, endianness, backtrace=nil)
272
+ case val = reduce
273
+ when Integer; EncodedData.new Expression.encode_imm(val, type, endianness, backtrace)
274
+ else EncodedData.new([0].pack('C')*(INT_SIZE[type]/8), :reloc => {0 => Relocation.new(self, type, endianness, backtrace)})
275
+ end
276
+ end
277
+
278
+ class << self
279
+ def encode_imm(val, type, endianness, backtrace=nil)
280
+ type = INT_SIZE.keys.find { |k| k.to_s[0] == ?a and INT_SIZE[k] == 8*type } if type.kind_of? ::Integer
281
+ endianness = endianness.endianness if not endianness.kind_of? ::Symbol
282
+ raise "unsupported endianness #{endianness.inspect}" unless [:big, :little].include? endianness
283
+ raise(EncodeError, "immediate overflow #{type.inspect} #{Expression[val]} #{(Backtrace::backtrace_str(backtrace) if backtrace)}") if not in_range?(val, type)
284
+ s = (0...INT_SIZE[type]/8).map { |i| (val >> (8*i)) & 0xff }.pack('C*')
285
+ endianness != :little ? s.reverse : s
286
+ end
287
+ alias encode_immediate encode_imm
288
+ end
289
+ end
290
+
291
+ class Data
292
+ def encode(endianness)
293
+ edata = case @data
294
+ when :uninitialized
295
+ EncodedData.new('', :virtsize => Expression::INT_SIZE[INT_TYPE[@type]]/8)
296
+ when String
297
+ # db 'foo' => 'foo' # XXX could be optimised, but should not be significant
298
+ # dw 'foo' => "f\0o\0o\0" / "\0f\0o\0o"
299
+ @data.unpack('C*').inject(EncodedData.new) { |ed, chr| ed << Expression.encode_imm(chr, INT_TYPE[@type], endianness, @backtrace) }
300
+ when Expression
301
+ @data.encode INT_TYPE[@type], endianness, @backtrace
302
+ when Array
303
+ @data.inject(EncodedData.new) { |ed, d| ed << d.encode(endianness) }
304
+ end
305
+
306
+ # n times
307
+ (0...@count).inject(EncodedData.new) { |ed, cnt| ed << edata }
308
+ end
309
+ end
310
+
311
+ class CPU
312
+ # returns an EncodedData or an ary of them
313
+ # uses +#parse_arg_valid?+ to find the opcode whose signature matches with the instruction
314
+ # uses +encode_instr_op+ (arch-specific)
315
+ def encode_instruction(program, i)
316
+ errmsg = ''
317
+ oplist = opcode_list_byname[i.opname].to_a.find_all { |o|
318
+ o.args.length == i.args.length and
319
+ o.args.zip(i.args).all? { |f, a| parse_arg_valid?(o, f, a) }
320
+ }.map { |op|
321
+ begin
322
+ encode_instr_op(program, i, op)
323
+ rescue EncodeError
324
+ errmsg = " (#{$!.message})"
325
+ nil
326
+ end
327
+ }.compact.flatten
328
+ raise EncodeError, "no matching opcode found for #{i}#{errmsg}" if oplist.empty?
329
+ oplist.each { |ed| ed.reloc.each_value { |v| v.backtrace = i.backtrace } }
330
+ oplist
331
+ end
332
+ end
333
+ end
@@ -0,0 +1,194 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+ require 'metasm/exe_format/main'
7
+ require 'metasm/encode'
8
+ require 'metasm/decode'
9
+
10
+ module Metasm
11
+ class AOut < ExeFormat
12
+ MAGIC = { 0407 => 'OMAGIC', 0410 => 'NMAGIC', 0413 => 'ZMAGIC',
13
+ 0314 => 'QMAGIC', 0421 => 'CMAGIC'
14
+ }
15
+ MACHINE_TYPE = { 0 => 'OLDSUN2', 1 => '68010', 2 => '68020',
16
+ 3 => 'SPARC', 100 => 'PC386', 134 => 'I386', 135 => 'M68K',
17
+ 136 => 'M68K4K', 137 => 'NS32532', 138 => 'SPARC',
18
+ 139 => 'PMAX', 140 => 'VAX', 141 => 'ALPHA', 142 => 'MIPS',
19
+ 143 => 'ARM6', 151 => 'MIPS1', 152 => 'MIPS2', 300 => 'HP300',
20
+ 0x20B => 'HPUX800', 0x20C => 'HPUX'
21
+ }
22
+ FLAGS = { 0x10 => 'PIC', 0x20 => 'DYNAMIC' }
23
+ SYMBOL_TYPE = { 0 => 'UNDF', 1 => 'ABS', 2 => 'TEXT',
24
+ 3 => 'DATA', 4 => 'BSS', 5 => 'INDR', 6 => 'SIZE',
25
+ 9 => 'COMM', 10=> 'SETA', 11=> 'SETT', 12=> 'SETD',
26
+ 13=> 'SETB', 14=> 'SETV', 15=> 'FN'
27
+ }
28
+
29
+ attr_accessor :endianness, :header, :text, :data, :symbols, :textrel, :datarel
30
+
31
+ class Header < SerialStruct
32
+ bitfield :word, 0 => :magic, 16 => :machtype, 24 => :flags
33
+ fld_enum(:magic, MAGIC)
34
+ fld_enum(:machtype, MACHINE_TYPE)
35
+ fld_bits(:flags, FLAGS)
36
+ words :text, :data, :bss, :syms, :entry, :trsz, :drsz
37
+
38
+ def decode(aout)
39
+ super(aout)
40
+
41
+ case @magic
42
+ when 'OMAGIC', 'NMAGIC', 'ZMAGIC', 'QMAGIC'
43
+ else raise InvalidExeFormat, "Bad A.OUT signature #@magic"
44
+ end
45
+ end
46
+
47
+ def set_default_values(aout)
48
+ @magic ||= 'QMAGIC'
49
+ @machtype ||= 'PC386'
50
+ @flags ||= []
51
+ @text ||= aout.text.length + (@magic == 'QMAGIC' ? 32 : 0) if aout.text
52
+ @data ||= aout.data.length if aout.data
53
+
54
+ super(aout)
55
+ end
56
+ end
57
+
58
+ class Relocation < SerialStruct
59
+ word :address
60
+ bitfield :word, 0 => :symbolnum, 24 => :pcrel, 25 => :length,
61
+ 27 => :extern, 28 => :baserel, 29 => :jmptable, 30 => :relative, 31 => :rtcopy
62
+ fld_enum :length, 0 => 1, 1 => 2, 2 => 4, 3 => 8
63
+ fld_default :length, 4
64
+ end
65
+
66
+ class Symbol < SerialStruct
67
+ word :name_p
68
+ bitfield :byte, 0 => :extern, 1 => :type, 5 => :stab
69
+ byte :other
70
+ half :desc
71
+ word :value
72
+ attr_accessor :name
73
+
74
+ def decode(aout, strings=nil)
75
+ super(aout)
76
+ @name = strings[@name_p...(strings.index(?\0, @name_p))] if strings
77
+ end
78
+
79
+ def set_default_values(aout, strings=nil)
80
+ if strings and name and @name != ''
81
+ if not @name_p or strings[@name_p, @name.length] != @name
82
+ @name_p = strings.length
83
+ strings << @name << 0
84
+ end
85
+ end
86
+ super(aout, strings)
87
+ end
88
+ end
89
+
90
+ def decode_byte(edata = @encoded) edata.decode_imm(:u8 , @endianness) end
91
+ def decode_half(edata = @encoded) edata.decode_imm(:u16, @endianness) end
92
+ def decode_word(edata = @encoded) edata.decode_imm(:u32, @endianness) end
93
+ def encode_byte(w) Expression[w].encode(:u8 , @endianness) end
94
+ def encode_half(w) Expression[w].encode(:u16, @endianness) end
95
+ def encode_word(w) Expression[w].encode(:u32, @endianness) end
96
+
97
+ def initialize(cpu = nil)
98
+ @endianness = cpu ? cpu.endianness : :little
99
+ @header = Header.new
100
+ @text = EncodedData.new
101
+ @data = EncodedData.new
102
+ super(cpu)
103
+ end
104
+
105
+ def decode_header
106
+ @encoded.ptr = 0
107
+ @header.decode(self)
108
+ end
109
+
110
+ def decode
111
+ decode_header
112
+
113
+ tlen = @header.text
114
+ case @header.magic
115
+ when 'ZMAGIC'; @encoded.ptr = 1024
116
+ when 'QMAGIC'; tlen -= 32 # header is included in .text
117
+ end
118
+ @text = EncodedData.new << @encoded.read(tlen)
119
+
120
+ @data = EncodedData.new << @encoded.read(@header.data)
121
+
122
+ textrel = @encoded.read @header.trsz
123
+ datarel = @encoded.read @header.drsz
124
+ syms = @encoded.read @header.syms
125
+ strings = @encoded.read
126
+ # TODO
127
+ end
128
+
129
+ def encode
130
+ # non mmapable on linux anyway
131
+ # could support OMAGIC..
132
+ raise EncodeError, 'cannot encode non-QMAGIC a.out' if @header.magic and @header.magic != 'QMAGIC'
133
+
134
+ # data must be 4096-aligned
135
+ # 32 bytes of header included in .text
136
+ @text.virtsize = (@text.virtsize + 32 + 4096 - 1) / 4096 * 4096 - 32
137
+ if @data.rawsize % 4096 != 0
138
+ @data[(@data.rawsize + 4096 - 1) / 4096 * 4096 - 1] = 0
139
+ end
140
+
141
+ @header.text = @text.length+32
142
+ @header.data = @data.rawsize
143
+ @header.bss = @data.virtsize - @data.rawsize
144
+
145
+ @encoded = EncodedData.new
146
+ @encoded << @header.encode(self)
147
+ binding = @text.binding(4096+32).merge @data.binding(4096 + @header.text)
148
+ @encoded << @text << @data
149
+ @encoded.fixup! binding
150
+ @encoded.data
151
+ end
152
+
153
+ def parse_init
154
+ @textsrc ||= []
155
+ @datasrc ||= []
156
+ @cursource ||= @textsrc
157
+ super()
158
+ end
159
+
160
+ def parse_parser_instruction(instr)
161
+ case instr.raw.downcase
162
+ when '.text'; @cursource = @textsrc
163
+ when '.data'; @cursource = @datasrc
164
+ when '.entrypoint'
165
+ # ".entrypoint <somelabel/expression>" or ".entrypoint" (here)
166
+ @lexer.skip_space
167
+ if tok = @lexer.nexttok and tok.type == :string
168
+ raise instr if not entrypoint = Expression.parse(@lexer)
169
+ else
170
+ entrypoint = new_label('entrypoint')
171
+ @cursource << Label.new(entrypoint, instr.backtrace.dup)
172
+ end
173
+ @header.entry = entrypoint
174
+ else super(instr)
175
+ end
176
+ end
177
+
178
+ def assemble(*a)
179
+ parse(*a) if not a.empty?
180
+ @text << assemble_sequence(@textsrc, @cpu)
181
+ @textsrc.clear
182
+ @data << assemble_sequence(@datasrc, @cpu)
183
+ @datasrc.clear
184
+ self
185
+ end
186
+
187
+ def each_section
188
+ tva = 0
189
+ tva = 4096+32 if @header.magic == 'QMAGIC'
190
+ yield @text, tva
191
+ yield @data, tva + @text.virtsize
192
+ end
193
+ end
194
+ end