metasm 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. data/BUGS +11 -0
  2. data/CREDITS +17 -0
  3. data/README +270 -0
  4. data/TODO +114 -0
  5. data/doc/code_organisation.txt +146 -0
  6. data/doc/const_missing.txt +16 -0
  7. data/doc/core_classes.txt +75 -0
  8. data/doc/feature_list.txt +53 -0
  9. data/doc/index.txt +59 -0
  10. data/doc/install_notes.txt +170 -0
  11. data/doc/style.css +3 -0
  12. data/doc/use_cases.txt +18 -0
  13. data/lib/metasm.rb +80 -0
  14. data/lib/metasm/arm.rb +12 -0
  15. data/lib/metasm/arm/debug.rb +39 -0
  16. data/lib/metasm/arm/decode.rb +167 -0
  17. data/lib/metasm/arm/encode.rb +77 -0
  18. data/lib/metasm/arm/main.rb +75 -0
  19. data/lib/metasm/arm/opcodes.rb +177 -0
  20. data/lib/metasm/arm/parse.rb +130 -0
  21. data/lib/metasm/arm/render.rb +55 -0
  22. data/lib/metasm/compile_c.rb +1457 -0
  23. data/lib/metasm/dalvik.rb +8 -0
  24. data/lib/metasm/dalvik/decode.rb +196 -0
  25. data/lib/metasm/dalvik/main.rb +60 -0
  26. data/lib/metasm/dalvik/opcodes.rb +366 -0
  27. data/lib/metasm/decode.rb +213 -0
  28. data/lib/metasm/decompile.rb +2659 -0
  29. data/lib/metasm/disassemble.rb +2068 -0
  30. data/lib/metasm/disassemble_api.rb +1280 -0
  31. data/lib/metasm/dynldr.rb +1329 -0
  32. data/lib/metasm/encode.rb +333 -0
  33. data/lib/metasm/exe_format/a_out.rb +194 -0
  34. data/lib/metasm/exe_format/autoexe.rb +82 -0
  35. data/lib/metasm/exe_format/bflt.rb +189 -0
  36. data/lib/metasm/exe_format/coff.rb +455 -0
  37. data/lib/metasm/exe_format/coff_decode.rb +901 -0
  38. data/lib/metasm/exe_format/coff_encode.rb +1078 -0
  39. data/lib/metasm/exe_format/dex.rb +457 -0
  40. data/lib/metasm/exe_format/dol.rb +145 -0
  41. data/lib/metasm/exe_format/elf.rb +923 -0
  42. data/lib/metasm/exe_format/elf_decode.rb +979 -0
  43. data/lib/metasm/exe_format/elf_encode.rb +1375 -0
  44. data/lib/metasm/exe_format/macho.rb +827 -0
  45. data/lib/metasm/exe_format/main.rb +228 -0
  46. data/lib/metasm/exe_format/mz.rb +164 -0
  47. data/lib/metasm/exe_format/nds.rb +172 -0
  48. data/lib/metasm/exe_format/pe.rb +437 -0
  49. data/lib/metasm/exe_format/serialstruct.rb +246 -0
  50. data/lib/metasm/exe_format/shellcode.rb +114 -0
  51. data/lib/metasm/exe_format/xcoff.rb +167 -0
  52. data/lib/metasm/gui.rb +23 -0
  53. data/lib/metasm/gui/cstruct.rb +373 -0
  54. data/lib/metasm/gui/dasm_coverage.rb +199 -0
  55. data/lib/metasm/gui/dasm_decomp.rb +369 -0
  56. data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
  57. data/lib/metasm/gui/dasm_graph.rb +1354 -0
  58. data/lib/metasm/gui/dasm_hex.rb +543 -0
  59. data/lib/metasm/gui/dasm_listing.rb +599 -0
  60. data/lib/metasm/gui/dasm_main.rb +906 -0
  61. data/lib/metasm/gui/dasm_opcodes.rb +291 -0
  62. data/lib/metasm/gui/debug.rb +1228 -0
  63. data/lib/metasm/gui/gtk.rb +884 -0
  64. data/lib/metasm/gui/qt.rb +495 -0
  65. data/lib/metasm/gui/win32.rb +3004 -0
  66. data/lib/metasm/gui/x11.rb +621 -0
  67. data/lib/metasm/ia32.rb +14 -0
  68. data/lib/metasm/ia32/compile_c.rb +1523 -0
  69. data/lib/metasm/ia32/debug.rb +193 -0
  70. data/lib/metasm/ia32/decode.rb +1167 -0
  71. data/lib/metasm/ia32/decompile.rb +564 -0
  72. data/lib/metasm/ia32/encode.rb +314 -0
  73. data/lib/metasm/ia32/main.rb +233 -0
  74. data/lib/metasm/ia32/opcodes.rb +872 -0
  75. data/lib/metasm/ia32/parse.rb +327 -0
  76. data/lib/metasm/ia32/render.rb +91 -0
  77. data/lib/metasm/main.rb +1193 -0
  78. data/lib/metasm/mips.rb +11 -0
  79. data/lib/metasm/mips/compile_c.rb +7 -0
  80. data/lib/metasm/mips/decode.rb +253 -0
  81. data/lib/metasm/mips/encode.rb +51 -0
  82. data/lib/metasm/mips/main.rb +72 -0
  83. data/lib/metasm/mips/opcodes.rb +443 -0
  84. data/lib/metasm/mips/parse.rb +51 -0
  85. data/lib/metasm/mips/render.rb +43 -0
  86. data/lib/metasm/os/gnu_exports.rb +270 -0
  87. data/lib/metasm/os/linux.rb +1112 -0
  88. data/lib/metasm/os/main.rb +1686 -0
  89. data/lib/metasm/os/remote.rb +527 -0
  90. data/lib/metasm/os/windows.rb +2027 -0
  91. data/lib/metasm/os/windows_exports.rb +745 -0
  92. data/lib/metasm/parse.rb +876 -0
  93. data/lib/metasm/parse_c.rb +3938 -0
  94. data/lib/metasm/pic16c/decode.rb +42 -0
  95. data/lib/metasm/pic16c/main.rb +17 -0
  96. data/lib/metasm/pic16c/opcodes.rb +68 -0
  97. data/lib/metasm/ppc.rb +11 -0
  98. data/lib/metasm/ppc/decode.rb +264 -0
  99. data/lib/metasm/ppc/decompile.rb +251 -0
  100. data/lib/metasm/ppc/encode.rb +51 -0
  101. data/lib/metasm/ppc/main.rb +129 -0
  102. data/lib/metasm/ppc/opcodes.rb +410 -0
  103. data/lib/metasm/ppc/parse.rb +52 -0
  104. data/lib/metasm/preprocessor.rb +1277 -0
  105. data/lib/metasm/render.rb +130 -0
  106. data/lib/metasm/sh4.rb +8 -0
  107. data/lib/metasm/sh4/decode.rb +336 -0
  108. data/lib/metasm/sh4/main.rb +292 -0
  109. data/lib/metasm/sh4/opcodes.rb +381 -0
  110. data/lib/metasm/x86_64.rb +12 -0
  111. data/lib/metasm/x86_64/compile_c.rb +1025 -0
  112. data/lib/metasm/x86_64/debug.rb +59 -0
  113. data/lib/metasm/x86_64/decode.rb +268 -0
  114. data/lib/metasm/x86_64/encode.rb +264 -0
  115. data/lib/metasm/x86_64/main.rb +135 -0
  116. data/lib/metasm/x86_64/opcodes.rb +118 -0
  117. data/lib/metasm/x86_64/parse.rb +68 -0
  118. data/misc/bottleneck.rb +61 -0
  119. data/misc/cheader-findpppath.rb +58 -0
  120. data/misc/hexdiff.rb +74 -0
  121. data/misc/hexdump.rb +55 -0
  122. data/misc/metasm-all.rb +13 -0
  123. data/misc/objdiff.rb +47 -0
  124. data/misc/objscan.rb +40 -0
  125. data/misc/pdfparse.rb +661 -0
  126. data/misc/ppc_pdf2oplist.rb +192 -0
  127. data/misc/tcp_proxy_hex.rb +84 -0
  128. data/misc/txt2html.rb +440 -0
  129. data/samples/a.out.rb +31 -0
  130. data/samples/asmsyntax.rb +77 -0
  131. data/samples/bindiff.rb +555 -0
  132. data/samples/compilation-steps.rb +49 -0
  133. data/samples/cparser_makestackoffset.rb +55 -0
  134. data/samples/dasm-backtrack.rb +38 -0
  135. data/samples/dasmnavig.rb +318 -0
  136. data/samples/dbg-apihook.rb +228 -0
  137. data/samples/dbghelp.rb +143 -0
  138. data/samples/disassemble-gui.rb +102 -0
  139. data/samples/disassemble.rb +133 -0
  140. data/samples/dump_upx.rb +95 -0
  141. data/samples/dynamic_ruby.rb +1929 -0
  142. data/samples/elf_list_needed.rb +46 -0
  143. data/samples/elf_listexports.rb +33 -0
  144. data/samples/elfencode.rb +25 -0
  145. data/samples/exeencode.rb +128 -0
  146. data/samples/factorize-headers-elfimports.rb +77 -0
  147. data/samples/factorize-headers-peimports.rb +109 -0
  148. data/samples/factorize-headers.rb +43 -0
  149. data/samples/gdbclient.rb +583 -0
  150. data/samples/generate_libsigs.rb +102 -0
  151. data/samples/hotfix_gtk_dbg.rb +59 -0
  152. data/samples/install_win_env.rb +78 -0
  153. data/samples/lindebug.rb +924 -0
  154. data/samples/linux_injectsyscall.rb +95 -0
  155. data/samples/machoencode.rb +31 -0
  156. data/samples/metasm-shell.rb +91 -0
  157. data/samples/pe-hook.rb +69 -0
  158. data/samples/pe-ia32-cpuid.rb +203 -0
  159. data/samples/pe-mips.rb +35 -0
  160. data/samples/pe-shutdown.rb +78 -0
  161. data/samples/pe-testrelocs.rb +51 -0
  162. data/samples/pe-testrsrc.rb +24 -0
  163. data/samples/pe_listexports.rb +31 -0
  164. data/samples/peencode.rb +19 -0
  165. data/samples/peldr.rb +494 -0
  166. data/samples/preprocess-flatten.rb +19 -0
  167. data/samples/r0trace.rb +308 -0
  168. data/samples/rubstop.rb +399 -0
  169. data/samples/scan_pt_gnu_stack.rb +54 -0
  170. data/samples/scanpeexports.rb +62 -0
  171. data/samples/shellcode-c.rb +40 -0
  172. data/samples/shellcode-dynlink.rb +146 -0
  173. data/samples/source.asm +34 -0
  174. data/samples/struct_offset.rb +47 -0
  175. data/samples/testpe.rb +32 -0
  176. data/samples/testraw.rb +45 -0
  177. data/samples/win32genloader.rb +132 -0
  178. data/samples/win32hooker-advanced.rb +169 -0
  179. data/samples/win32hooker.rb +96 -0
  180. data/samples/win32livedasm.rb +33 -0
  181. data/samples/win32remotescan.rb +133 -0
  182. data/samples/wintrace.rb +92 -0
  183. data/tests/all.rb +8 -0
  184. data/tests/dasm.rb +39 -0
  185. data/tests/dynldr.rb +35 -0
  186. data/tests/encodeddata.rb +132 -0
  187. data/tests/ia32.rb +82 -0
  188. data/tests/mips.rb +116 -0
  189. data/tests/parse_c.rb +239 -0
  190. data/tests/preprocessor.rb +269 -0
  191. data/tests/x86_64.rb +62 -0
  192. metadata +255 -0
@@ -0,0 +1,333 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/main'
8
+
9
+ module Metasm
10
+ class ExeFormat
11
+ # encodes an Array of source (Label/Data/Instruction etc) to an EncodedData
12
+ # resolves ambiguities using +encode_resolve+
13
+ def assemble_sequence(seq, cpu)
14
+ # an array of edata or sub-array of ambiguous edata
15
+ # its last element is always an edata
16
+ ary = [EncodedData.new]
17
+
18
+ seq.each { |e|
19
+ case e
20
+ when Label; ary.last.add_export(e.name, ary.last.virtsize)
21
+ when Data; ary.last << e.encode(cpu.endianness)
22
+ when Align, Padding
23
+ e.fillwith = e.fillwith.encode(cpu.endianness) if e.fillwith and not e.fillwith.kind_of? EncodedData
24
+ ary << e << EncodedData.new
25
+ when Offset; ary << e << EncodedData.new
26
+ when Instruction
27
+ case i = cpu.encode_instruction(self, e)
28
+ when Array
29
+ case i.length
30
+ when 0; raise EncodeError, "failed to encode #{e}"
31
+ when 1; ary.last << i.first
32
+ else ary << i << EncodedData.new # to solve later
33
+ end
34
+ else
35
+ ary.last << i
36
+ end
37
+ end
38
+ }
39
+
40
+ edata = (ary.length > 1) ? assemble_resolve(ary) : ary.shift
41
+ edata.fixup edata.binding
42
+ edata
43
+ end
44
+
45
+ # chose among multiple possible sub-EncodedData
46
+ # assumes all ambiguous edata have the equivallent relocations in the same order
47
+ def assemble_resolve(ary)
48
+ startlabel = new_label('section_start')
49
+
50
+ # create two bindings where all elements are the shortest/longest possible
51
+ minbinding = {}
52
+ minoff = 0
53
+ maxbinding = {}
54
+ maxoff = 0
55
+
56
+ ary.each { |elem|
57
+ case elem
58
+ when Array
59
+ if elem.all? { |ed| ed.kind_of? EncodedData and ed.reloc.empty? }
60
+ elem = [elem.sort_by { |ed| ed.length }.first]
61
+ end
62
+ elem.each { |e|
63
+ e.export.each { |label, off|
64
+ minbinding[label] = Expression[startlabel, :+, minoff + off]
65
+ maxbinding[label] = Expression[startlabel, :+, maxoff + off]
66
+ }
67
+ }
68
+ minoff += elem.map { |e| e.virtsize }.min
69
+ maxoff += elem.map { |e| e.virtsize }.max
70
+
71
+ when EncodedData
72
+ elem.export.each { |label, off|
73
+ minbinding[label] = Expression[startlabel, :+, minoff + off]
74
+ maxbinding[label] = Expression[startlabel, :+, maxoff + off]
75
+ }
76
+ minoff += elem.virtsize
77
+ maxoff += elem.virtsize
78
+
79
+ when Align
80
+ minoff += 0
81
+ maxoff += elem.val - 1
82
+
83
+ when Padding
84
+ # find the surrounding Offsets and compute the largest/shortest edata sizes to determine min/max length for the padding
85
+ prevoff = ary[0..ary.index(elem)].grep(Offset).last
86
+ nextoff = ary[ary.index(elem)..-1].grep(Offset).first
87
+ raise elem, 'need .offset after .pad' if not nextoff
88
+
89
+ # find all elements between the surrounding Offsets
90
+ previdx = prevoff ? ary.index(prevoff) + 1 : 0
91
+ surround = ary[previdx..ary.index(nextoff)-1]
92
+ surround.delete elem
93
+ if surround.find { |nelem| nelem.kind_of? Padding }
94
+ raise elem, 'need .offset beetween two .pad'
95
+ end
96
+ if surround.find { |nelem| nelem.kind_of? Align and ary.index(nelem) > ary.index(elem) }
97
+ raise elem, 'cannot .align after a .pad' # XXX really ?
98
+ end
99
+
100
+ # lenmin/lenmax are the extrem length of the Padding
101
+ nxt = Expression[nextoff.val]
102
+ ext = nxt.externals
103
+ raise elem, "bad offset #{nxt}" if ext.length > 1 or (ext.length == 1 and not minbinding[ext.first])
104
+ nxt = Expression[nxt, :-, startlabel] if not nxt.bind(minbinding).reduce.kind_of? ::Integer
105
+ prv = Expression[prevoff ? prevoff.val : 0]
106
+ ext = prv.externals
107
+ raise elem, "bad offset #{prv}" if ext.length > 1 or (ext.length == 1 and not minbinding[ext.first])
108
+ prv = Expression[prv, :-, startlabel] if not prv.bind(minbinding).reduce.kind_of? ::Integer
109
+
110
+ lenmin = Expression[nxt.bind(minbinding), :-, prv.bind(maxbinding)].reduce
111
+ lenmax = Expression[nxt.bind(maxbinding), :-, prv.bind(minbinding)].reduce
112
+ raise elem, "bad labels: #{lenmin}" if not lenmin.kind_of? ::Integer or not lenmax.kind_of? ::Integer
113
+ surround.each { |nelem|
114
+ case nelem
115
+ when Array
116
+ lenmin -= nelem.map { |e| e.virtsize }.max
117
+ lenmax -= nelem.map { |e| e.virtsize }.min
118
+ when EncodedData
119
+ lenmin -= nelem.virtsize
120
+ lenmax -= nelem.virtsize
121
+ when Align
122
+ lenmin -= nelem.val - 1
123
+ lenmax -= 0
124
+ end
125
+ }
126
+ raise elem, "no room for .pad before '.offset #{nextoff.val}' at #{Backtrace.backtrace_str(nextoff.backtrace)}, need at least #{-lenmax} more bytes" if lenmax < 0
127
+ minoff += [lenmin, 0].max
128
+ maxoff += lenmax
129
+
130
+ when Offset
131
+ # nothing to do for now
132
+ else
133
+ raise "Internal error: bad object #{elem.inspect} in encode_resolve"
134
+ end
135
+ }
136
+
137
+ # checks an expression linearity
138
+ check_linear = lambda { |expr|
139
+ expr = expr.reduce if expr.kind_of? Expression
140
+ while expr.kind_of? Expression
141
+ case expr.op
142
+ when :*
143
+ if expr.lexpr.kind_of? Numeric; expr = expr.rexpr
144
+ elsif expr.rexpr.kind_of? Numeric; expr = expr.lexpr
145
+ else break
146
+ end
147
+ when :/, :>>, :<<
148
+ if expr.rexpr.kind_of? Numeric; expr = expr.lexpr
149
+ else break
150
+ end
151
+ when :+, :-
152
+ if not expr.lexpr; expr = expr.rexpr
153
+ elsif expr.lexpr.kind_of? Numeric; expr = expr.rexpr
154
+ elsif expr.rexpr.kind_of? Numeric; expr = expr.lexpr
155
+ else
156
+ break if not check_linear[expr.rexpr]
157
+ expr = expr.lexpr
158
+ end
159
+ else break
160
+ end
161
+ end
162
+
163
+ not expr.kind_of? Expression
164
+ }
165
+
166
+ # now we can resolve all relocations
167
+ # for linear expressions of internal variables (ie differences of labels from the ary):
168
+ # - calc target numeric bounds, and reject relocs not accepting worst case value
169
+ # - else reject all but largest place available
170
+ # then chose the shortest overall EData left
171
+ ary.map! { |elem|
172
+ case elem
173
+ when Array
174
+ # for each external, compute numeric target values using minbinding[external] and maxbinding[external]
175
+ # this gives us all extrem values for linear expressions
176
+ target_bounds = {}
177
+ rec_checkminmax = lambda { |idx, target, binding, extlist|
178
+ if extlist.empty?
179
+ (target_bounds[idx] ||= []) << target.bind(binding).reduce
180
+ else
181
+ rec_checkminmax[idx, target, binding.merge(extlist.last => minbinding[extlist.last]), extlist[0...-1]]
182
+ rec_checkminmax[idx, target, binding.merge(extlist.last => maxbinding[extlist.last]), extlist[0...-1]]
183
+ end
184
+ }
185
+ # biggest size disponible for this relocation (for non-linear/external)
186
+ wantsize = {}
187
+
188
+ elem.each { |e|
189
+ e.reloc.sort.each_with_index { |r_, i|
190
+ r = r_[1]
191
+ # has external ref
192
+ if not r.target.bind(minbinding).reduce.kind_of?(Numeric) or not check_linear[r.target]
193
+ # find the biggest relocation type for the current target
194
+ wantsize[i] = elem.map { |edata|
195
+ edata.reloc.sort[i][1].type
196
+ }.sort_by { |type| Expression::INT_SIZE[type] }.last # XXX do not use rel.length
197
+ else
198
+ rec_checkminmax[i, r.target, {}, r.target.externals]
199
+ end
200
+ }
201
+ }
202
+
203
+ # reject candidates with reloc type too small
204
+ acceptable = elem.find_all { |edata|
205
+ r = edata.reloc.sort
206
+ (0...r.length).all? { |i|
207
+ if wantsize[i]
208
+ r[i][1].type == wantsize[i]
209
+ else
210
+ target_bounds[i].all? { |b| Expression.in_range?(b, r[i][1].type) }
211
+ end
212
+ }
213
+ }
214
+
215
+ raise EncodeError, "cannot find candidate in #{elem.inspect}, immediate too big #{wantsize.inspect} #{target_bounds.inspect}" if acceptable.empty?
216
+
217
+ # keep the shortest
218
+ acceptable.sort_by { |edata| edata.virtsize }.first
219
+ else
220
+ elem
221
+ end
222
+ }
223
+
224
+ # assemble all parts, resolve padding sizes, check offset directives
225
+ edata = EncodedData.new
226
+
227
+ # fills edata with repetitions of data until targetsize
228
+ fillwith = lambda { |targetsize, data|
229
+ if data
230
+ if data.reloc.empty? and not data.data.empty? # avoid useless iterations
231
+ nr = (targetsize-edata.virtsize) / data.length - 1
232
+ if nr > 0
233
+ dat = data.data.ljust(data.virtsize, 0.chr)
234
+ edata << (dat * nr)
235
+ end
236
+ end
237
+ while edata.virtsize + data.virtsize <= targetsize
238
+ edata << data
239
+ end
240
+ if edata.virtsize < targetsize
241
+ edata << data[0, targetsize - edata.virtsize]
242
+ end
243
+ else
244
+ edata.virtsize = targetsize
245
+ end
246
+ }
247
+
248
+ ary.each { |elem|
249
+ case elem
250
+ when EncodedData
251
+ edata << elem
252
+ when Align
253
+ fillwith[EncodedData.align_size(edata.virtsize, elem.val), elem.fillwith]
254
+ when Offset
255
+ raise EncodeError, "could not enforce .offset #{elem.val} #{elem.backtrace}: offset now #{edata.virtsize}" if edata.virtsize != Expression[elem.val].bind(edata.binding(0)).reduce
256
+ when Padding
257
+ nextoff = ary[ary.index(elem)..-1].grep(Offset).first
258
+ targetsize = Expression[nextoff.val].bind(edata.binding(0)).reduce
259
+ ary[ary.index(elem)+1..ary.index(nextoff)-1].each { |nelem| targetsize -= nelem.virtsize }
260
+ raise EncodeError, "no room for .pad #{elem.backtrace_str} before .offset #{nextoff.val}, would be #{targetsize-edata.length} bytes long" if targetsize < edata.length
261
+ fillwith[targetsize, elem.fillwith]
262
+ else raise "Internal error: #{elem.inspect}"
263
+ end
264
+ }
265
+
266
+ edata
267
+ end
268
+ end
269
+
270
+ class Expression
271
+ def encode(type, endianness, backtrace=nil)
272
+ case val = reduce
273
+ when Integer; EncodedData.new Expression.encode_imm(val, type, endianness, backtrace)
274
+ else EncodedData.new([0].pack('C')*(INT_SIZE[type]/8), :reloc => {0 => Relocation.new(self, type, endianness, backtrace)})
275
+ end
276
+ end
277
+
278
+ class << self
279
+ def encode_imm(val, type, endianness, backtrace=nil)
280
+ type = INT_SIZE.keys.find { |k| k.to_s[0] == ?a and INT_SIZE[k] == 8*type } if type.kind_of? ::Integer
281
+ endianness = endianness.endianness if not endianness.kind_of? ::Symbol
282
+ raise "unsupported endianness #{endianness.inspect}" unless [:big, :little].include? endianness
283
+ raise(EncodeError, "immediate overflow #{type.inspect} #{Expression[val]} #{(Backtrace::backtrace_str(backtrace) if backtrace)}") if not in_range?(val, type)
284
+ s = (0...INT_SIZE[type]/8).map { |i| (val >> (8*i)) & 0xff }.pack('C*')
285
+ endianness != :little ? s.reverse : s
286
+ end
287
+ alias encode_immediate encode_imm
288
+ end
289
+ end
290
+
291
+ class Data
292
+ def encode(endianness)
293
+ edata = case @data
294
+ when :uninitialized
295
+ EncodedData.new('', :virtsize => Expression::INT_SIZE[INT_TYPE[@type]]/8)
296
+ when String
297
+ # db 'foo' => 'foo' # XXX could be optimised, but should not be significant
298
+ # dw 'foo' => "f\0o\0o\0" / "\0f\0o\0o"
299
+ @data.unpack('C*').inject(EncodedData.new) { |ed, chr| ed << Expression.encode_imm(chr, INT_TYPE[@type], endianness, @backtrace) }
300
+ when Expression
301
+ @data.encode INT_TYPE[@type], endianness, @backtrace
302
+ when Array
303
+ @data.inject(EncodedData.new) { |ed, d| ed << d.encode(endianness) }
304
+ end
305
+
306
+ # n times
307
+ (0...@count).inject(EncodedData.new) { |ed, cnt| ed << edata }
308
+ end
309
+ end
310
+
311
+ class CPU
312
+ # returns an EncodedData or an ary of them
313
+ # uses +#parse_arg_valid?+ to find the opcode whose signature matches with the instruction
314
+ # uses +encode_instr_op+ (arch-specific)
315
+ def encode_instruction(program, i)
316
+ errmsg = ''
317
+ oplist = opcode_list_byname[i.opname].to_a.find_all { |o|
318
+ o.args.length == i.args.length and
319
+ o.args.zip(i.args).all? { |f, a| parse_arg_valid?(o, f, a) }
320
+ }.map { |op|
321
+ begin
322
+ encode_instr_op(program, i, op)
323
+ rescue EncodeError
324
+ errmsg = " (#{$!.message})"
325
+ nil
326
+ end
327
+ }.compact.flatten
328
+ raise EncodeError, "no matching opcode found for #{i}#{errmsg}" if oplist.empty?
329
+ oplist.each { |ed| ed.reloc.each_value { |v| v.backtrace = i.backtrace } }
330
+ oplist
331
+ end
332
+ end
333
+ end
@@ -0,0 +1,194 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+ require 'metasm/exe_format/main'
7
+ require 'metasm/encode'
8
+ require 'metasm/decode'
9
+
10
+ module Metasm
11
+ class AOut < ExeFormat
12
+ MAGIC = { 0407 => 'OMAGIC', 0410 => 'NMAGIC', 0413 => 'ZMAGIC',
13
+ 0314 => 'QMAGIC', 0421 => 'CMAGIC'
14
+ }
15
+ MACHINE_TYPE = { 0 => 'OLDSUN2', 1 => '68010', 2 => '68020',
16
+ 3 => 'SPARC', 100 => 'PC386', 134 => 'I386', 135 => 'M68K',
17
+ 136 => 'M68K4K', 137 => 'NS32532', 138 => 'SPARC',
18
+ 139 => 'PMAX', 140 => 'VAX', 141 => 'ALPHA', 142 => 'MIPS',
19
+ 143 => 'ARM6', 151 => 'MIPS1', 152 => 'MIPS2', 300 => 'HP300',
20
+ 0x20B => 'HPUX800', 0x20C => 'HPUX'
21
+ }
22
+ FLAGS = { 0x10 => 'PIC', 0x20 => 'DYNAMIC' }
23
+ SYMBOL_TYPE = { 0 => 'UNDF', 1 => 'ABS', 2 => 'TEXT',
24
+ 3 => 'DATA', 4 => 'BSS', 5 => 'INDR', 6 => 'SIZE',
25
+ 9 => 'COMM', 10=> 'SETA', 11=> 'SETT', 12=> 'SETD',
26
+ 13=> 'SETB', 14=> 'SETV', 15=> 'FN'
27
+ }
28
+
29
+ attr_accessor :endianness, :header, :text, :data, :symbols, :textrel, :datarel
30
+
31
+ class Header < SerialStruct
32
+ bitfield :word, 0 => :magic, 16 => :machtype, 24 => :flags
33
+ fld_enum(:magic, MAGIC)
34
+ fld_enum(:machtype, MACHINE_TYPE)
35
+ fld_bits(:flags, FLAGS)
36
+ words :text, :data, :bss, :syms, :entry, :trsz, :drsz
37
+
38
+ def decode(aout)
39
+ super(aout)
40
+
41
+ case @magic
42
+ when 'OMAGIC', 'NMAGIC', 'ZMAGIC', 'QMAGIC'
43
+ else raise InvalidExeFormat, "Bad A.OUT signature #@magic"
44
+ end
45
+ end
46
+
47
+ def set_default_values(aout)
48
+ @magic ||= 'QMAGIC'
49
+ @machtype ||= 'PC386'
50
+ @flags ||= []
51
+ @text ||= aout.text.length + (@magic == 'QMAGIC' ? 32 : 0) if aout.text
52
+ @data ||= aout.data.length if aout.data
53
+
54
+ super(aout)
55
+ end
56
+ end
57
+
58
+ class Relocation < SerialStruct
59
+ word :address
60
+ bitfield :word, 0 => :symbolnum, 24 => :pcrel, 25 => :length,
61
+ 27 => :extern, 28 => :baserel, 29 => :jmptable, 30 => :relative, 31 => :rtcopy
62
+ fld_enum :length, 0 => 1, 1 => 2, 2 => 4, 3 => 8
63
+ fld_default :length, 4
64
+ end
65
+
66
+ class Symbol < SerialStruct
67
+ word :name_p
68
+ bitfield :byte, 0 => :extern, 1 => :type, 5 => :stab
69
+ byte :other
70
+ half :desc
71
+ word :value
72
+ attr_accessor :name
73
+
74
+ def decode(aout, strings=nil)
75
+ super(aout)
76
+ @name = strings[@name_p...(strings.index(?\0, @name_p))] if strings
77
+ end
78
+
79
+ def set_default_values(aout, strings=nil)
80
+ if strings and name and @name != ''
81
+ if not @name_p or strings[@name_p, @name.length] != @name
82
+ @name_p = strings.length
83
+ strings << @name << 0
84
+ end
85
+ end
86
+ super(aout, strings)
87
+ end
88
+ end
89
+
90
+ def decode_byte(edata = @encoded) edata.decode_imm(:u8 , @endianness) end
91
+ def decode_half(edata = @encoded) edata.decode_imm(:u16, @endianness) end
92
+ def decode_word(edata = @encoded) edata.decode_imm(:u32, @endianness) end
93
+ def encode_byte(w) Expression[w].encode(:u8 , @endianness) end
94
+ def encode_half(w) Expression[w].encode(:u16, @endianness) end
95
+ def encode_word(w) Expression[w].encode(:u32, @endianness) end
96
+
97
+ def initialize(cpu = nil)
98
+ @endianness = cpu ? cpu.endianness : :little
99
+ @header = Header.new
100
+ @text = EncodedData.new
101
+ @data = EncodedData.new
102
+ super(cpu)
103
+ end
104
+
105
+ def decode_header
106
+ @encoded.ptr = 0
107
+ @header.decode(self)
108
+ end
109
+
110
+ def decode
111
+ decode_header
112
+
113
+ tlen = @header.text
114
+ case @header.magic
115
+ when 'ZMAGIC'; @encoded.ptr = 1024
116
+ when 'QMAGIC'; tlen -= 32 # header is included in .text
117
+ end
118
+ @text = EncodedData.new << @encoded.read(tlen)
119
+
120
+ @data = EncodedData.new << @encoded.read(@header.data)
121
+
122
+ textrel = @encoded.read @header.trsz
123
+ datarel = @encoded.read @header.drsz
124
+ syms = @encoded.read @header.syms
125
+ strings = @encoded.read
126
+ # TODO
127
+ end
128
+
129
+ def encode
130
+ # non mmapable on linux anyway
131
+ # could support OMAGIC..
132
+ raise EncodeError, 'cannot encode non-QMAGIC a.out' if @header.magic and @header.magic != 'QMAGIC'
133
+
134
+ # data must be 4096-aligned
135
+ # 32 bytes of header included in .text
136
+ @text.virtsize = (@text.virtsize + 32 + 4096 - 1) / 4096 * 4096 - 32
137
+ if @data.rawsize % 4096 != 0
138
+ @data[(@data.rawsize + 4096 - 1) / 4096 * 4096 - 1] = 0
139
+ end
140
+
141
+ @header.text = @text.length+32
142
+ @header.data = @data.rawsize
143
+ @header.bss = @data.virtsize - @data.rawsize
144
+
145
+ @encoded = EncodedData.new
146
+ @encoded << @header.encode(self)
147
+ binding = @text.binding(4096+32).merge @data.binding(4096 + @header.text)
148
+ @encoded << @text << @data
149
+ @encoded.fixup! binding
150
+ @encoded.data
151
+ end
152
+
153
+ def parse_init
154
+ @textsrc ||= []
155
+ @datasrc ||= []
156
+ @cursource ||= @textsrc
157
+ super()
158
+ end
159
+
160
+ def parse_parser_instruction(instr)
161
+ case instr.raw.downcase
162
+ when '.text'; @cursource = @textsrc
163
+ when '.data'; @cursource = @datasrc
164
+ when '.entrypoint'
165
+ # ".entrypoint <somelabel/expression>" or ".entrypoint" (here)
166
+ @lexer.skip_space
167
+ if tok = @lexer.nexttok and tok.type == :string
168
+ raise instr if not entrypoint = Expression.parse(@lexer)
169
+ else
170
+ entrypoint = new_label('entrypoint')
171
+ @cursource << Label.new(entrypoint, instr.backtrace.dup)
172
+ end
173
+ @header.entry = entrypoint
174
+ else super(instr)
175
+ end
176
+ end
177
+
178
+ def assemble(*a)
179
+ parse(*a) if not a.empty?
180
+ @text << assemble_sequence(@textsrc, @cpu)
181
+ @textsrc.clear
182
+ @data << assemble_sequence(@datasrc, @cpu)
183
+ @datasrc.clear
184
+ self
185
+ end
186
+
187
+ def each_section
188
+ tva = 0
189
+ tva = 4096+32 if @header.magic == 'QMAGIC'
190
+ yield @text, tva
191
+ yield @data, tva + @text.virtsize
192
+ end
193
+ end
194
+ end