metasm 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/BUGS +11 -0
- data/CREDITS +17 -0
- data/README +270 -0
- data/TODO +114 -0
- data/doc/code_organisation.txt +146 -0
- data/doc/const_missing.txt +16 -0
- data/doc/core_classes.txt +75 -0
- data/doc/feature_list.txt +53 -0
- data/doc/index.txt +59 -0
- data/doc/install_notes.txt +170 -0
- data/doc/style.css +3 -0
- data/doc/use_cases.txt +18 -0
- data/lib/metasm.rb +80 -0
- data/lib/metasm/arm.rb +12 -0
- data/lib/metasm/arm/debug.rb +39 -0
- data/lib/metasm/arm/decode.rb +167 -0
- data/lib/metasm/arm/encode.rb +77 -0
- data/lib/metasm/arm/main.rb +75 -0
- data/lib/metasm/arm/opcodes.rb +177 -0
- data/lib/metasm/arm/parse.rb +130 -0
- data/lib/metasm/arm/render.rb +55 -0
- data/lib/metasm/compile_c.rb +1457 -0
- data/lib/metasm/dalvik.rb +8 -0
- data/lib/metasm/dalvik/decode.rb +196 -0
- data/lib/metasm/dalvik/main.rb +60 -0
- data/lib/metasm/dalvik/opcodes.rb +366 -0
- data/lib/metasm/decode.rb +213 -0
- data/lib/metasm/decompile.rb +2659 -0
- data/lib/metasm/disassemble.rb +2068 -0
- data/lib/metasm/disassemble_api.rb +1280 -0
- data/lib/metasm/dynldr.rb +1329 -0
- data/lib/metasm/encode.rb +333 -0
- data/lib/metasm/exe_format/a_out.rb +194 -0
- data/lib/metasm/exe_format/autoexe.rb +82 -0
- data/lib/metasm/exe_format/bflt.rb +189 -0
- data/lib/metasm/exe_format/coff.rb +455 -0
- data/lib/metasm/exe_format/coff_decode.rb +901 -0
- data/lib/metasm/exe_format/coff_encode.rb +1078 -0
- data/lib/metasm/exe_format/dex.rb +457 -0
- data/lib/metasm/exe_format/dol.rb +145 -0
- data/lib/metasm/exe_format/elf.rb +923 -0
- data/lib/metasm/exe_format/elf_decode.rb +979 -0
- data/lib/metasm/exe_format/elf_encode.rb +1375 -0
- data/lib/metasm/exe_format/macho.rb +827 -0
- data/lib/metasm/exe_format/main.rb +228 -0
- data/lib/metasm/exe_format/mz.rb +164 -0
- data/lib/metasm/exe_format/nds.rb +172 -0
- data/lib/metasm/exe_format/pe.rb +437 -0
- data/lib/metasm/exe_format/serialstruct.rb +246 -0
- data/lib/metasm/exe_format/shellcode.rb +114 -0
- data/lib/metasm/exe_format/xcoff.rb +167 -0
- data/lib/metasm/gui.rb +23 -0
- data/lib/metasm/gui/cstruct.rb +373 -0
- data/lib/metasm/gui/dasm_coverage.rb +199 -0
- data/lib/metasm/gui/dasm_decomp.rb +369 -0
- data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
- data/lib/metasm/gui/dasm_graph.rb +1354 -0
- data/lib/metasm/gui/dasm_hex.rb +543 -0
- data/lib/metasm/gui/dasm_listing.rb +599 -0
- data/lib/metasm/gui/dasm_main.rb +906 -0
- data/lib/metasm/gui/dasm_opcodes.rb +291 -0
- data/lib/metasm/gui/debug.rb +1228 -0
- data/lib/metasm/gui/gtk.rb +884 -0
- data/lib/metasm/gui/qt.rb +495 -0
- data/lib/metasm/gui/win32.rb +3004 -0
- data/lib/metasm/gui/x11.rb +621 -0
- data/lib/metasm/ia32.rb +14 -0
- data/lib/metasm/ia32/compile_c.rb +1523 -0
- data/lib/metasm/ia32/debug.rb +193 -0
- data/lib/metasm/ia32/decode.rb +1167 -0
- data/lib/metasm/ia32/decompile.rb +564 -0
- data/lib/metasm/ia32/encode.rb +314 -0
- data/lib/metasm/ia32/main.rb +233 -0
- data/lib/metasm/ia32/opcodes.rb +872 -0
- data/lib/metasm/ia32/parse.rb +327 -0
- data/lib/metasm/ia32/render.rb +91 -0
- data/lib/metasm/main.rb +1193 -0
- data/lib/metasm/mips.rb +11 -0
- data/lib/metasm/mips/compile_c.rb +7 -0
- data/lib/metasm/mips/decode.rb +253 -0
- data/lib/metasm/mips/encode.rb +51 -0
- data/lib/metasm/mips/main.rb +72 -0
- data/lib/metasm/mips/opcodes.rb +443 -0
- data/lib/metasm/mips/parse.rb +51 -0
- data/lib/metasm/mips/render.rb +43 -0
- data/lib/metasm/os/gnu_exports.rb +270 -0
- data/lib/metasm/os/linux.rb +1112 -0
- data/lib/metasm/os/main.rb +1686 -0
- data/lib/metasm/os/remote.rb +527 -0
- data/lib/metasm/os/windows.rb +2027 -0
- data/lib/metasm/os/windows_exports.rb +745 -0
- data/lib/metasm/parse.rb +876 -0
- data/lib/metasm/parse_c.rb +3938 -0
- data/lib/metasm/pic16c/decode.rb +42 -0
- data/lib/metasm/pic16c/main.rb +17 -0
- data/lib/metasm/pic16c/opcodes.rb +68 -0
- data/lib/metasm/ppc.rb +11 -0
- data/lib/metasm/ppc/decode.rb +264 -0
- data/lib/metasm/ppc/decompile.rb +251 -0
- data/lib/metasm/ppc/encode.rb +51 -0
- data/lib/metasm/ppc/main.rb +129 -0
- data/lib/metasm/ppc/opcodes.rb +410 -0
- data/lib/metasm/ppc/parse.rb +52 -0
- data/lib/metasm/preprocessor.rb +1277 -0
- data/lib/metasm/render.rb +130 -0
- data/lib/metasm/sh4.rb +8 -0
- data/lib/metasm/sh4/decode.rb +336 -0
- data/lib/metasm/sh4/main.rb +292 -0
- data/lib/metasm/sh4/opcodes.rb +381 -0
- data/lib/metasm/x86_64.rb +12 -0
- data/lib/metasm/x86_64/compile_c.rb +1025 -0
- data/lib/metasm/x86_64/debug.rb +59 -0
- data/lib/metasm/x86_64/decode.rb +268 -0
- data/lib/metasm/x86_64/encode.rb +264 -0
- data/lib/metasm/x86_64/main.rb +135 -0
- data/lib/metasm/x86_64/opcodes.rb +118 -0
- data/lib/metasm/x86_64/parse.rb +68 -0
- data/misc/bottleneck.rb +61 -0
- data/misc/cheader-findpppath.rb +58 -0
- data/misc/hexdiff.rb +74 -0
- data/misc/hexdump.rb +55 -0
- data/misc/metasm-all.rb +13 -0
- data/misc/objdiff.rb +47 -0
- data/misc/objscan.rb +40 -0
- data/misc/pdfparse.rb +661 -0
- data/misc/ppc_pdf2oplist.rb +192 -0
- data/misc/tcp_proxy_hex.rb +84 -0
- data/misc/txt2html.rb +440 -0
- data/samples/a.out.rb +31 -0
- data/samples/asmsyntax.rb +77 -0
- data/samples/bindiff.rb +555 -0
- data/samples/compilation-steps.rb +49 -0
- data/samples/cparser_makestackoffset.rb +55 -0
- data/samples/dasm-backtrack.rb +38 -0
- data/samples/dasmnavig.rb +318 -0
- data/samples/dbg-apihook.rb +228 -0
- data/samples/dbghelp.rb +143 -0
- data/samples/disassemble-gui.rb +102 -0
- data/samples/disassemble.rb +133 -0
- data/samples/dump_upx.rb +95 -0
- data/samples/dynamic_ruby.rb +1929 -0
- data/samples/elf_list_needed.rb +46 -0
- data/samples/elf_listexports.rb +33 -0
- data/samples/elfencode.rb +25 -0
- data/samples/exeencode.rb +128 -0
- data/samples/factorize-headers-elfimports.rb +77 -0
- data/samples/factorize-headers-peimports.rb +109 -0
- data/samples/factorize-headers.rb +43 -0
- data/samples/gdbclient.rb +583 -0
- data/samples/generate_libsigs.rb +102 -0
- data/samples/hotfix_gtk_dbg.rb +59 -0
- data/samples/install_win_env.rb +78 -0
- data/samples/lindebug.rb +924 -0
- data/samples/linux_injectsyscall.rb +95 -0
- data/samples/machoencode.rb +31 -0
- data/samples/metasm-shell.rb +91 -0
- data/samples/pe-hook.rb +69 -0
- data/samples/pe-ia32-cpuid.rb +203 -0
- data/samples/pe-mips.rb +35 -0
- data/samples/pe-shutdown.rb +78 -0
- data/samples/pe-testrelocs.rb +51 -0
- data/samples/pe-testrsrc.rb +24 -0
- data/samples/pe_listexports.rb +31 -0
- data/samples/peencode.rb +19 -0
- data/samples/peldr.rb +494 -0
- data/samples/preprocess-flatten.rb +19 -0
- data/samples/r0trace.rb +308 -0
- data/samples/rubstop.rb +399 -0
- data/samples/scan_pt_gnu_stack.rb +54 -0
- data/samples/scanpeexports.rb +62 -0
- data/samples/shellcode-c.rb +40 -0
- data/samples/shellcode-dynlink.rb +146 -0
- data/samples/source.asm +34 -0
- data/samples/struct_offset.rb +47 -0
- data/samples/testpe.rb +32 -0
- data/samples/testraw.rb +45 -0
- data/samples/win32genloader.rb +132 -0
- data/samples/win32hooker-advanced.rb +169 -0
- data/samples/win32hooker.rb +96 -0
- data/samples/win32livedasm.rb +33 -0
- data/samples/win32remotescan.rb +133 -0
- data/samples/wintrace.rb +92 -0
- data/tests/all.rb +8 -0
- data/tests/dasm.rb +39 -0
- data/tests/dynldr.rb +35 -0
- data/tests/encodeddata.rb +132 -0
- data/tests/ia32.rb +82 -0
- data/tests/mips.rb +116 -0
- data/tests/parse_c.rb +239 -0
- data/tests/preprocessor.rb +269 -0
- data/tests/x86_64.rb +62 -0
- metadata +255 -0
@@ -0,0 +1,333 @@
|
|
1
|
+
# This file is part of Metasm, the Ruby assembly manipulation suite
|
2
|
+
# Copyright (C) 2006-2009 Yoann GUILLOT
|
3
|
+
#
|
4
|
+
# Licence is LGPL, see LICENCE in the top-level directory
|
5
|
+
|
6
|
+
|
7
|
+
require 'metasm/main'
|
8
|
+
|
9
|
+
module Metasm
|
10
|
+
class ExeFormat
|
11
|
+
# encodes an Array of source (Label/Data/Instruction etc) to an EncodedData
|
12
|
+
# resolves ambiguities using +encode_resolve+
|
13
|
+
def assemble_sequence(seq, cpu)
|
14
|
+
# an array of edata or sub-array of ambiguous edata
|
15
|
+
# its last element is always an edata
|
16
|
+
ary = [EncodedData.new]
|
17
|
+
|
18
|
+
seq.each { |e|
|
19
|
+
case e
|
20
|
+
when Label; ary.last.add_export(e.name, ary.last.virtsize)
|
21
|
+
when Data; ary.last << e.encode(cpu.endianness)
|
22
|
+
when Align, Padding
|
23
|
+
e.fillwith = e.fillwith.encode(cpu.endianness) if e.fillwith and not e.fillwith.kind_of? EncodedData
|
24
|
+
ary << e << EncodedData.new
|
25
|
+
when Offset; ary << e << EncodedData.new
|
26
|
+
when Instruction
|
27
|
+
case i = cpu.encode_instruction(self, e)
|
28
|
+
when Array
|
29
|
+
case i.length
|
30
|
+
when 0; raise EncodeError, "failed to encode #{e}"
|
31
|
+
when 1; ary.last << i.first
|
32
|
+
else ary << i << EncodedData.new # to solve later
|
33
|
+
end
|
34
|
+
else
|
35
|
+
ary.last << i
|
36
|
+
end
|
37
|
+
end
|
38
|
+
}
|
39
|
+
|
40
|
+
edata = (ary.length > 1) ? assemble_resolve(ary) : ary.shift
|
41
|
+
edata.fixup edata.binding
|
42
|
+
edata
|
43
|
+
end
|
44
|
+
|
45
|
+
# chose among multiple possible sub-EncodedData
|
46
|
+
# assumes all ambiguous edata have the equivallent relocations in the same order
|
47
|
+
def assemble_resolve(ary)
|
48
|
+
startlabel = new_label('section_start')
|
49
|
+
|
50
|
+
# create two bindings where all elements are the shortest/longest possible
|
51
|
+
minbinding = {}
|
52
|
+
minoff = 0
|
53
|
+
maxbinding = {}
|
54
|
+
maxoff = 0
|
55
|
+
|
56
|
+
ary.each { |elem|
|
57
|
+
case elem
|
58
|
+
when Array
|
59
|
+
if elem.all? { |ed| ed.kind_of? EncodedData and ed.reloc.empty? }
|
60
|
+
elem = [elem.sort_by { |ed| ed.length }.first]
|
61
|
+
end
|
62
|
+
elem.each { |e|
|
63
|
+
e.export.each { |label, off|
|
64
|
+
minbinding[label] = Expression[startlabel, :+, minoff + off]
|
65
|
+
maxbinding[label] = Expression[startlabel, :+, maxoff + off]
|
66
|
+
}
|
67
|
+
}
|
68
|
+
minoff += elem.map { |e| e.virtsize }.min
|
69
|
+
maxoff += elem.map { |e| e.virtsize }.max
|
70
|
+
|
71
|
+
when EncodedData
|
72
|
+
elem.export.each { |label, off|
|
73
|
+
minbinding[label] = Expression[startlabel, :+, minoff + off]
|
74
|
+
maxbinding[label] = Expression[startlabel, :+, maxoff + off]
|
75
|
+
}
|
76
|
+
minoff += elem.virtsize
|
77
|
+
maxoff += elem.virtsize
|
78
|
+
|
79
|
+
when Align
|
80
|
+
minoff += 0
|
81
|
+
maxoff += elem.val - 1
|
82
|
+
|
83
|
+
when Padding
|
84
|
+
# find the surrounding Offsets and compute the largest/shortest edata sizes to determine min/max length for the padding
|
85
|
+
prevoff = ary[0..ary.index(elem)].grep(Offset).last
|
86
|
+
nextoff = ary[ary.index(elem)..-1].grep(Offset).first
|
87
|
+
raise elem, 'need .offset after .pad' if not nextoff
|
88
|
+
|
89
|
+
# find all elements between the surrounding Offsets
|
90
|
+
previdx = prevoff ? ary.index(prevoff) + 1 : 0
|
91
|
+
surround = ary[previdx..ary.index(nextoff)-1]
|
92
|
+
surround.delete elem
|
93
|
+
if surround.find { |nelem| nelem.kind_of? Padding }
|
94
|
+
raise elem, 'need .offset beetween two .pad'
|
95
|
+
end
|
96
|
+
if surround.find { |nelem| nelem.kind_of? Align and ary.index(nelem) > ary.index(elem) }
|
97
|
+
raise elem, 'cannot .align after a .pad' # XXX really ?
|
98
|
+
end
|
99
|
+
|
100
|
+
# lenmin/lenmax are the extrem length of the Padding
|
101
|
+
nxt = Expression[nextoff.val]
|
102
|
+
ext = nxt.externals
|
103
|
+
raise elem, "bad offset #{nxt}" if ext.length > 1 or (ext.length == 1 and not minbinding[ext.first])
|
104
|
+
nxt = Expression[nxt, :-, startlabel] if not nxt.bind(minbinding).reduce.kind_of? ::Integer
|
105
|
+
prv = Expression[prevoff ? prevoff.val : 0]
|
106
|
+
ext = prv.externals
|
107
|
+
raise elem, "bad offset #{prv}" if ext.length > 1 or (ext.length == 1 and not minbinding[ext.first])
|
108
|
+
prv = Expression[prv, :-, startlabel] if not prv.bind(minbinding).reduce.kind_of? ::Integer
|
109
|
+
|
110
|
+
lenmin = Expression[nxt.bind(minbinding), :-, prv.bind(maxbinding)].reduce
|
111
|
+
lenmax = Expression[nxt.bind(maxbinding), :-, prv.bind(minbinding)].reduce
|
112
|
+
raise elem, "bad labels: #{lenmin}" if not lenmin.kind_of? ::Integer or not lenmax.kind_of? ::Integer
|
113
|
+
surround.each { |nelem|
|
114
|
+
case nelem
|
115
|
+
when Array
|
116
|
+
lenmin -= nelem.map { |e| e.virtsize }.max
|
117
|
+
lenmax -= nelem.map { |e| e.virtsize }.min
|
118
|
+
when EncodedData
|
119
|
+
lenmin -= nelem.virtsize
|
120
|
+
lenmax -= nelem.virtsize
|
121
|
+
when Align
|
122
|
+
lenmin -= nelem.val - 1
|
123
|
+
lenmax -= 0
|
124
|
+
end
|
125
|
+
}
|
126
|
+
raise elem, "no room for .pad before '.offset #{nextoff.val}' at #{Backtrace.backtrace_str(nextoff.backtrace)}, need at least #{-lenmax} more bytes" if lenmax < 0
|
127
|
+
minoff += [lenmin, 0].max
|
128
|
+
maxoff += lenmax
|
129
|
+
|
130
|
+
when Offset
|
131
|
+
# nothing to do for now
|
132
|
+
else
|
133
|
+
raise "Internal error: bad object #{elem.inspect} in encode_resolve"
|
134
|
+
end
|
135
|
+
}
|
136
|
+
|
137
|
+
# checks an expression linearity
|
138
|
+
check_linear = lambda { |expr|
|
139
|
+
expr = expr.reduce if expr.kind_of? Expression
|
140
|
+
while expr.kind_of? Expression
|
141
|
+
case expr.op
|
142
|
+
when :*
|
143
|
+
if expr.lexpr.kind_of? Numeric; expr = expr.rexpr
|
144
|
+
elsif expr.rexpr.kind_of? Numeric; expr = expr.lexpr
|
145
|
+
else break
|
146
|
+
end
|
147
|
+
when :/, :>>, :<<
|
148
|
+
if expr.rexpr.kind_of? Numeric; expr = expr.lexpr
|
149
|
+
else break
|
150
|
+
end
|
151
|
+
when :+, :-
|
152
|
+
if not expr.lexpr; expr = expr.rexpr
|
153
|
+
elsif expr.lexpr.kind_of? Numeric; expr = expr.rexpr
|
154
|
+
elsif expr.rexpr.kind_of? Numeric; expr = expr.lexpr
|
155
|
+
else
|
156
|
+
break if not check_linear[expr.rexpr]
|
157
|
+
expr = expr.lexpr
|
158
|
+
end
|
159
|
+
else break
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
not expr.kind_of? Expression
|
164
|
+
}
|
165
|
+
|
166
|
+
# now we can resolve all relocations
|
167
|
+
# for linear expressions of internal variables (ie differences of labels from the ary):
|
168
|
+
# - calc target numeric bounds, and reject relocs not accepting worst case value
|
169
|
+
# - else reject all but largest place available
|
170
|
+
# then chose the shortest overall EData left
|
171
|
+
ary.map! { |elem|
|
172
|
+
case elem
|
173
|
+
when Array
|
174
|
+
# for each external, compute numeric target values using minbinding[external] and maxbinding[external]
|
175
|
+
# this gives us all extrem values for linear expressions
|
176
|
+
target_bounds = {}
|
177
|
+
rec_checkminmax = lambda { |idx, target, binding, extlist|
|
178
|
+
if extlist.empty?
|
179
|
+
(target_bounds[idx] ||= []) << target.bind(binding).reduce
|
180
|
+
else
|
181
|
+
rec_checkminmax[idx, target, binding.merge(extlist.last => minbinding[extlist.last]), extlist[0...-1]]
|
182
|
+
rec_checkminmax[idx, target, binding.merge(extlist.last => maxbinding[extlist.last]), extlist[0...-1]]
|
183
|
+
end
|
184
|
+
}
|
185
|
+
# biggest size disponible for this relocation (for non-linear/external)
|
186
|
+
wantsize = {}
|
187
|
+
|
188
|
+
elem.each { |e|
|
189
|
+
e.reloc.sort.each_with_index { |r_, i|
|
190
|
+
r = r_[1]
|
191
|
+
# has external ref
|
192
|
+
if not r.target.bind(minbinding).reduce.kind_of?(Numeric) or not check_linear[r.target]
|
193
|
+
# find the biggest relocation type for the current target
|
194
|
+
wantsize[i] = elem.map { |edata|
|
195
|
+
edata.reloc.sort[i][1].type
|
196
|
+
}.sort_by { |type| Expression::INT_SIZE[type] }.last # XXX do not use rel.length
|
197
|
+
else
|
198
|
+
rec_checkminmax[i, r.target, {}, r.target.externals]
|
199
|
+
end
|
200
|
+
}
|
201
|
+
}
|
202
|
+
|
203
|
+
# reject candidates with reloc type too small
|
204
|
+
acceptable = elem.find_all { |edata|
|
205
|
+
r = edata.reloc.sort
|
206
|
+
(0...r.length).all? { |i|
|
207
|
+
if wantsize[i]
|
208
|
+
r[i][1].type == wantsize[i]
|
209
|
+
else
|
210
|
+
target_bounds[i].all? { |b| Expression.in_range?(b, r[i][1].type) }
|
211
|
+
end
|
212
|
+
}
|
213
|
+
}
|
214
|
+
|
215
|
+
raise EncodeError, "cannot find candidate in #{elem.inspect}, immediate too big #{wantsize.inspect} #{target_bounds.inspect}" if acceptable.empty?
|
216
|
+
|
217
|
+
# keep the shortest
|
218
|
+
acceptable.sort_by { |edata| edata.virtsize }.first
|
219
|
+
else
|
220
|
+
elem
|
221
|
+
end
|
222
|
+
}
|
223
|
+
|
224
|
+
# assemble all parts, resolve padding sizes, check offset directives
|
225
|
+
edata = EncodedData.new
|
226
|
+
|
227
|
+
# fills edata with repetitions of data until targetsize
|
228
|
+
fillwith = lambda { |targetsize, data|
|
229
|
+
if data
|
230
|
+
if data.reloc.empty? and not data.data.empty? # avoid useless iterations
|
231
|
+
nr = (targetsize-edata.virtsize) / data.length - 1
|
232
|
+
if nr > 0
|
233
|
+
dat = data.data.ljust(data.virtsize, 0.chr)
|
234
|
+
edata << (dat * nr)
|
235
|
+
end
|
236
|
+
end
|
237
|
+
while edata.virtsize + data.virtsize <= targetsize
|
238
|
+
edata << data
|
239
|
+
end
|
240
|
+
if edata.virtsize < targetsize
|
241
|
+
edata << data[0, targetsize - edata.virtsize]
|
242
|
+
end
|
243
|
+
else
|
244
|
+
edata.virtsize = targetsize
|
245
|
+
end
|
246
|
+
}
|
247
|
+
|
248
|
+
ary.each { |elem|
|
249
|
+
case elem
|
250
|
+
when EncodedData
|
251
|
+
edata << elem
|
252
|
+
when Align
|
253
|
+
fillwith[EncodedData.align_size(edata.virtsize, elem.val), elem.fillwith]
|
254
|
+
when Offset
|
255
|
+
raise EncodeError, "could not enforce .offset #{elem.val} #{elem.backtrace}: offset now #{edata.virtsize}" if edata.virtsize != Expression[elem.val].bind(edata.binding(0)).reduce
|
256
|
+
when Padding
|
257
|
+
nextoff = ary[ary.index(elem)..-1].grep(Offset).first
|
258
|
+
targetsize = Expression[nextoff.val].bind(edata.binding(0)).reduce
|
259
|
+
ary[ary.index(elem)+1..ary.index(nextoff)-1].each { |nelem| targetsize -= nelem.virtsize }
|
260
|
+
raise EncodeError, "no room for .pad #{elem.backtrace_str} before .offset #{nextoff.val}, would be #{targetsize-edata.length} bytes long" if targetsize < edata.length
|
261
|
+
fillwith[targetsize, elem.fillwith]
|
262
|
+
else raise "Internal error: #{elem.inspect}"
|
263
|
+
end
|
264
|
+
}
|
265
|
+
|
266
|
+
edata
|
267
|
+
end
|
268
|
+
end
|
269
|
+
|
270
|
+
class Expression
|
271
|
+
def encode(type, endianness, backtrace=nil)
|
272
|
+
case val = reduce
|
273
|
+
when Integer; EncodedData.new Expression.encode_imm(val, type, endianness, backtrace)
|
274
|
+
else EncodedData.new([0].pack('C')*(INT_SIZE[type]/8), :reloc => {0 => Relocation.new(self, type, endianness, backtrace)})
|
275
|
+
end
|
276
|
+
end
|
277
|
+
|
278
|
+
class << self
|
279
|
+
def encode_imm(val, type, endianness, backtrace=nil)
|
280
|
+
type = INT_SIZE.keys.find { |k| k.to_s[0] == ?a and INT_SIZE[k] == 8*type } if type.kind_of? ::Integer
|
281
|
+
endianness = endianness.endianness if not endianness.kind_of? ::Symbol
|
282
|
+
raise "unsupported endianness #{endianness.inspect}" unless [:big, :little].include? endianness
|
283
|
+
raise(EncodeError, "immediate overflow #{type.inspect} #{Expression[val]} #{(Backtrace::backtrace_str(backtrace) if backtrace)}") if not in_range?(val, type)
|
284
|
+
s = (0...INT_SIZE[type]/8).map { |i| (val >> (8*i)) & 0xff }.pack('C*')
|
285
|
+
endianness != :little ? s.reverse : s
|
286
|
+
end
|
287
|
+
alias encode_immediate encode_imm
|
288
|
+
end
|
289
|
+
end
|
290
|
+
|
291
|
+
class Data
|
292
|
+
def encode(endianness)
|
293
|
+
edata = case @data
|
294
|
+
when :uninitialized
|
295
|
+
EncodedData.new('', :virtsize => Expression::INT_SIZE[INT_TYPE[@type]]/8)
|
296
|
+
when String
|
297
|
+
# db 'foo' => 'foo' # XXX could be optimised, but should not be significant
|
298
|
+
# dw 'foo' => "f\0o\0o\0" / "\0f\0o\0o"
|
299
|
+
@data.unpack('C*').inject(EncodedData.new) { |ed, chr| ed << Expression.encode_imm(chr, INT_TYPE[@type], endianness, @backtrace) }
|
300
|
+
when Expression
|
301
|
+
@data.encode INT_TYPE[@type], endianness, @backtrace
|
302
|
+
when Array
|
303
|
+
@data.inject(EncodedData.new) { |ed, d| ed << d.encode(endianness) }
|
304
|
+
end
|
305
|
+
|
306
|
+
# n times
|
307
|
+
(0...@count).inject(EncodedData.new) { |ed, cnt| ed << edata }
|
308
|
+
end
|
309
|
+
end
|
310
|
+
|
311
|
+
class CPU
|
312
|
+
# returns an EncodedData or an ary of them
|
313
|
+
# uses +#parse_arg_valid?+ to find the opcode whose signature matches with the instruction
|
314
|
+
# uses +encode_instr_op+ (arch-specific)
|
315
|
+
def encode_instruction(program, i)
|
316
|
+
errmsg = ''
|
317
|
+
oplist = opcode_list_byname[i.opname].to_a.find_all { |o|
|
318
|
+
o.args.length == i.args.length and
|
319
|
+
o.args.zip(i.args).all? { |f, a| parse_arg_valid?(o, f, a) }
|
320
|
+
}.map { |op|
|
321
|
+
begin
|
322
|
+
encode_instr_op(program, i, op)
|
323
|
+
rescue EncodeError
|
324
|
+
errmsg = " (#{$!.message})"
|
325
|
+
nil
|
326
|
+
end
|
327
|
+
}.compact.flatten
|
328
|
+
raise EncodeError, "no matching opcode found for #{i}#{errmsg}" if oplist.empty?
|
329
|
+
oplist.each { |ed| ed.reloc.each_value { |v| v.backtrace = i.backtrace } }
|
330
|
+
oplist
|
331
|
+
end
|
332
|
+
end
|
333
|
+
end
|
@@ -0,0 +1,194 @@
|
|
1
|
+
# This file is part of Metasm, the Ruby assembly manipulation suite
|
2
|
+
# Copyright (C) 2006-2009 Yoann GUILLOT
|
3
|
+
#
|
4
|
+
# Licence is LGPL, see LICENCE in the top-level directory
|
5
|
+
|
6
|
+
require 'metasm/exe_format/main'
|
7
|
+
require 'metasm/encode'
|
8
|
+
require 'metasm/decode'
|
9
|
+
|
10
|
+
module Metasm
|
11
|
+
class AOut < ExeFormat
|
12
|
+
MAGIC = { 0407 => 'OMAGIC', 0410 => 'NMAGIC', 0413 => 'ZMAGIC',
|
13
|
+
0314 => 'QMAGIC', 0421 => 'CMAGIC'
|
14
|
+
}
|
15
|
+
MACHINE_TYPE = { 0 => 'OLDSUN2', 1 => '68010', 2 => '68020',
|
16
|
+
3 => 'SPARC', 100 => 'PC386', 134 => 'I386', 135 => 'M68K',
|
17
|
+
136 => 'M68K4K', 137 => 'NS32532', 138 => 'SPARC',
|
18
|
+
139 => 'PMAX', 140 => 'VAX', 141 => 'ALPHA', 142 => 'MIPS',
|
19
|
+
143 => 'ARM6', 151 => 'MIPS1', 152 => 'MIPS2', 300 => 'HP300',
|
20
|
+
0x20B => 'HPUX800', 0x20C => 'HPUX'
|
21
|
+
}
|
22
|
+
FLAGS = { 0x10 => 'PIC', 0x20 => 'DYNAMIC' }
|
23
|
+
SYMBOL_TYPE = { 0 => 'UNDF', 1 => 'ABS', 2 => 'TEXT',
|
24
|
+
3 => 'DATA', 4 => 'BSS', 5 => 'INDR', 6 => 'SIZE',
|
25
|
+
9 => 'COMM', 10=> 'SETA', 11=> 'SETT', 12=> 'SETD',
|
26
|
+
13=> 'SETB', 14=> 'SETV', 15=> 'FN'
|
27
|
+
}
|
28
|
+
|
29
|
+
attr_accessor :endianness, :header, :text, :data, :symbols, :textrel, :datarel
|
30
|
+
|
31
|
+
class Header < SerialStruct
|
32
|
+
bitfield :word, 0 => :magic, 16 => :machtype, 24 => :flags
|
33
|
+
fld_enum(:magic, MAGIC)
|
34
|
+
fld_enum(:machtype, MACHINE_TYPE)
|
35
|
+
fld_bits(:flags, FLAGS)
|
36
|
+
words :text, :data, :bss, :syms, :entry, :trsz, :drsz
|
37
|
+
|
38
|
+
def decode(aout)
|
39
|
+
super(aout)
|
40
|
+
|
41
|
+
case @magic
|
42
|
+
when 'OMAGIC', 'NMAGIC', 'ZMAGIC', 'QMAGIC'
|
43
|
+
else raise InvalidExeFormat, "Bad A.OUT signature #@magic"
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def set_default_values(aout)
|
48
|
+
@magic ||= 'QMAGIC'
|
49
|
+
@machtype ||= 'PC386'
|
50
|
+
@flags ||= []
|
51
|
+
@text ||= aout.text.length + (@magic == 'QMAGIC' ? 32 : 0) if aout.text
|
52
|
+
@data ||= aout.data.length if aout.data
|
53
|
+
|
54
|
+
super(aout)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
class Relocation < SerialStruct
|
59
|
+
word :address
|
60
|
+
bitfield :word, 0 => :symbolnum, 24 => :pcrel, 25 => :length,
|
61
|
+
27 => :extern, 28 => :baserel, 29 => :jmptable, 30 => :relative, 31 => :rtcopy
|
62
|
+
fld_enum :length, 0 => 1, 1 => 2, 2 => 4, 3 => 8
|
63
|
+
fld_default :length, 4
|
64
|
+
end
|
65
|
+
|
66
|
+
class Symbol < SerialStruct
|
67
|
+
word :name_p
|
68
|
+
bitfield :byte, 0 => :extern, 1 => :type, 5 => :stab
|
69
|
+
byte :other
|
70
|
+
half :desc
|
71
|
+
word :value
|
72
|
+
attr_accessor :name
|
73
|
+
|
74
|
+
def decode(aout, strings=nil)
|
75
|
+
super(aout)
|
76
|
+
@name = strings[@name_p...(strings.index(?\0, @name_p))] if strings
|
77
|
+
end
|
78
|
+
|
79
|
+
def set_default_values(aout, strings=nil)
|
80
|
+
if strings and name and @name != ''
|
81
|
+
if not @name_p or strings[@name_p, @name.length] != @name
|
82
|
+
@name_p = strings.length
|
83
|
+
strings << @name << 0
|
84
|
+
end
|
85
|
+
end
|
86
|
+
super(aout, strings)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def decode_byte(edata = @encoded) edata.decode_imm(:u8 , @endianness) end
|
91
|
+
def decode_half(edata = @encoded) edata.decode_imm(:u16, @endianness) end
|
92
|
+
def decode_word(edata = @encoded) edata.decode_imm(:u32, @endianness) end
|
93
|
+
def encode_byte(w) Expression[w].encode(:u8 , @endianness) end
|
94
|
+
def encode_half(w) Expression[w].encode(:u16, @endianness) end
|
95
|
+
def encode_word(w) Expression[w].encode(:u32, @endianness) end
|
96
|
+
|
97
|
+
def initialize(cpu = nil)
|
98
|
+
@endianness = cpu ? cpu.endianness : :little
|
99
|
+
@header = Header.new
|
100
|
+
@text = EncodedData.new
|
101
|
+
@data = EncodedData.new
|
102
|
+
super(cpu)
|
103
|
+
end
|
104
|
+
|
105
|
+
def decode_header
|
106
|
+
@encoded.ptr = 0
|
107
|
+
@header.decode(self)
|
108
|
+
end
|
109
|
+
|
110
|
+
def decode
|
111
|
+
decode_header
|
112
|
+
|
113
|
+
tlen = @header.text
|
114
|
+
case @header.magic
|
115
|
+
when 'ZMAGIC'; @encoded.ptr = 1024
|
116
|
+
when 'QMAGIC'; tlen -= 32 # header is included in .text
|
117
|
+
end
|
118
|
+
@text = EncodedData.new << @encoded.read(tlen)
|
119
|
+
|
120
|
+
@data = EncodedData.new << @encoded.read(@header.data)
|
121
|
+
|
122
|
+
textrel = @encoded.read @header.trsz
|
123
|
+
datarel = @encoded.read @header.drsz
|
124
|
+
syms = @encoded.read @header.syms
|
125
|
+
strings = @encoded.read
|
126
|
+
# TODO
|
127
|
+
end
|
128
|
+
|
129
|
+
def encode
|
130
|
+
# non mmapable on linux anyway
|
131
|
+
# could support OMAGIC..
|
132
|
+
raise EncodeError, 'cannot encode non-QMAGIC a.out' if @header.magic and @header.magic != 'QMAGIC'
|
133
|
+
|
134
|
+
# data must be 4096-aligned
|
135
|
+
# 32 bytes of header included in .text
|
136
|
+
@text.virtsize = (@text.virtsize + 32 + 4096 - 1) / 4096 * 4096 - 32
|
137
|
+
if @data.rawsize % 4096 != 0
|
138
|
+
@data[(@data.rawsize + 4096 - 1) / 4096 * 4096 - 1] = 0
|
139
|
+
end
|
140
|
+
|
141
|
+
@header.text = @text.length+32
|
142
|
+
@header.data = @data.rawsize
|
143
|
+
@header.bss = @data.virtsize - @data.rawsize
|
144
|
+
|
145
|
+
@encoded = EncodedData.new
|
146
|
+
@encoded << @header.encode(self)
|
147
|
+
binding = @text.binding(4096+32).merge @data.binding(4096 + @header.text)
|
148
|
+
@encoded << @text << @data
|
149
|
+
@encoded.fixup! binding
|
150
|
+
@encoded.data
|
151
|
+
end
|
152
|
+
|
153
|
+
def parse_init
|
154
|
+
@textsrc ||= []
|
155
|
+
@datasrc ||= []
|
156
|
+
@cursource ||= @textsrc
|
157
|
+
super()
|
158
|
+
end
|
159
|
+
|
160
|
+
def parse_parser_instruction(instr)
|
161
|
+
case instr.raw.downcase
|
162
|
+
when '.text'; @cursource = @textsrc
|
163
|
+
when '.data'; @cursource = @datasrc
|
164
|
+
when '.entrypoint'
|
165
|
+
# ".entrypoint <somelabel/expression>" or ".entrypoint" (here)
|
166
|
+
@lexer.skip_space
|
167
|
+
if tok = @lexer.nexttok and tok.type == :string
|
168
|
+
raise instr if not entrypoint = Expression.parse(@lexer)
|
169
|
+
else
|
170
|
+
entrypoint = new_label('entrypoint')
|
171
|
+
@cursource << Label.new(entrypoint, instr.backtrace.dup)
|
172
|
+
end
|
173
|
+
@header.entry = entrypoint
|
174
|
+
else super(instr)
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
def assemble(*a)
|
179
|
+
parse(*a) if not a.empty?
|
180
|
+
@text << assemble_sequence(@textsrc, @cpu)
|
181
|
+
@textsrc.clear
|
182
|
+
@data << assemble_sequence(@datasrc, @cpu)
|
183
|
+
@datasrc.clear
|
184
|
+
self
|
185
|
+
end
|
186
|
+
|
187
|
+
def each_section
|
188
|
+
tva = 0
|
189
|
+
tva = 4096+32 if @header.magic == 'QMAGIC'
|
190
|
+
yield @text, tva
|
191
|
+
yield @data, tva + @text.virtsize
|
192
|
+
end
|
193
|
+
end
|
194
|
+
end
|