metasm 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/BUGS +11 -0
- data/CREDITS +17 -0
- data/README +270 -0
- data/TODO +114 -0
- data/doc/code_organisation.txt +146 -0
- data/doc/const_missing.txt +16 -0
- data/doc/core_classes.txt +75 -0
- data/doc/feature_list.txt +53 -0
- data/doc/index.txt +59 -0
- data/doc/install_notes.txt +170 -0
- data/doc/style.css +3 -0
- data/doc/use_cases.txt +18 -0
- data/lib/metasm.rb +80 -0
- data/lib/metasm/arm.rb +12 -0
- data/lib/metasm/arm/debug.rb +39 -0
- data/lib/metasm/arm/decode.rb +167 -0
- data/lib/metasm/arm/encode.rb +77 -0
- data/lib/metasm/arm/main.rb +75 -0
- data/lib/metasm/arm/opcodes.rb +177 -0
- data/lib/metasm/arm/parse.rb +130 -0
- data/lib/metasm/arm/render.rb +55 -0
- data/lib/metasm/compile_c.rb +1457 -0
- data/lib/metasm/dalvik.rb +8 -0
- data/lib/metasm/dalvik/decode.rb +196 -0
- data/lib/metasm/dalvik/main.rb +60 -0
- data/lib/metasm/dalvik/opcodes.rb +366 -0
- data/lib/metasm/decode.rb +213 -0
- data/lib/metasm/decompile.rb +2659 -0
- data/lib/metasm/disassemble.rb +2068 -0
- data/lib/metasm/disassemble_api.rb +1280 -0
- data/lib/metasm/dynldr.rb +1329 -0
- data/lib/metasm/encode.rb +333 -0
- data/lib/metasm/exe_format/a_out.rb +194 -0
- data/lib/metasm/exe_format/autoexe.rb +82 -0
- data/lib/metasm/exe_format/bflt.rb +189 -0
- data/lib/metasm/exe_format/coff.rb +455 -0
- data/lib/metasm/exe_format/coff_decode.rb +901 -0
- data/lib/metasm/exe_format/coff_encode.rb +1078 -0
- data/lib/metasm/exe_format/dex.rb +457 -0
- data/lib/metasm/exe_format/dol.rb +145 -0
- data/lib/metasm/exe_format/elf.rb +923 -0
- data/lib/metasm/exe_format/elf_decode.rb +979 -0
- data/lib/metasm/exe_format/elf_encode.rb +1375 -0
- data/lib/metasm/exe_format/macho.rb +827 -0
- data/lib/metasm/exe_format/main.rb +228 -0
- data/lib/metasm/exe_format/mz.rb +164 -0
- data/lib/metasm/exe_format/nds.rb +172 -0
- data/lib/metasm/exe_format/pe.rb +437 -0
- data/lib/metasm/exe_format/serialstruct.rb +246 -0
- data/lib/metasm/exe_format/shellcode.rb +114 -0
- data/lib/metasm/exe_format/xcoff.rb +167 -0
- data/lib/metasm/gui.rb +23 -0
- data/lib/metasm/gui/cstruct.rb +373 -0
- data/lib/metasm/gui/dasm_coverage.rb +199 -0
- data/lib/metasm/gui/dasm_decomp.rb +369 -0
- data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
- data/lib/metasm/gui/dasm_graph.rb +1354 -0
- data/lib/metasm/gui/dasm_hex.rb +543 -0
- data/lib/metasm/gui/dasm_listing.rb +599 -0
- data/lib/metasm/gui/dasm_main.rb +906 -0
- data/lib/metasm/gui/dasm_opcodes.rb +291 -0
- data/lib/metasm/gui/debug.rb +1228 -0
- data/lib/metasm/gui/gtk.rb +884 -0
- data/lib/metasm/gui/qt.rb +495 -0
- data/lib/metasm/gui/win32.rb +3004 -0
- data/lib/metasm/gui/x11.rb +621 -0
- data/lib/metasm/ia32.rb +14 -0
- data/lib/metasm/ia32/compile_c.rb +1523 -0
- data/lib/metasm/ia32/debug.rb +193 -0
- data/lib/metasm/ia32/decode.rb +1167 -0
- data/lib/metasm/ia32/decompile.rb +564 -0
- data/lib/metasm/ia32/encode.rb +314 -0
- data/lib/metasm/ia32/main.rb +233 -0
- data/lib/metasm/ia32/opcodes.rb +872 -0
- data/lib/metasm/ia32/parse.rb +327 -0
- data/lib/metasm/ia32/render.rb +91 -0
- data/lib/metasm/main.rb +1193 -0
- data/lib/metasm/mips.rb +11 -0
- data/lib/metasm/mips/compile_c.rb +7 -0
- data/lib/metasm/mips/decode.rb +253 -0
- data/lib/metasm/mips/encode.rb +51 -0
- data/lib/metasm/mips/main.rb +72 -0
- data/lib/metasm/mips/opcodes.rb +443 -0
- data/lib/metasm/mips/parse.rb +51 -0
- data/lib/metasm/mips/render.rb +43 -0
- data/lib/metasm/os/gnu_exports.rb +270 -0
- data/lib/metasm/os/linux.rb +1112 -0
- data/lib/metasm/os/main.rb +1686 -0
- data/lib/metasm/os/remote.rb +527 -0
- data/lib/metasm/os/windows.rb +2027 -0
- data/lib/metasm/os/windows_exports.rb +745 -0
- data/lib/metasm/parse.rb +876 -0
- data/lib/metasm/parse_c.rb +3938 -0
- data/lib/metasm/pic16c/decode.rb +42 -0
- data/lib/metasm/pic16c/main.rb +17 -0
- data/lib/metasm/pic16c/opcodes.rb +68 -0
- data/lib/metasm/ppc.rb +11 -0
- data/lib/metasm/ppc/decode.rb +264 -0
- data/lib/metasm/ppc/decompile.rb +251 -0
- data/lib/metasm/ppc/encode.rb +51 -0
- data/lib/metasm/ppc/main.rb +129 -0
- data/lib/metasm/ppc/opcodes.rb +410 -0
- data/lib/metasm/ppc/parse.rb +52 -0
- data/lib/metasm/preprocessor.rb +1277 -0
- data/lib/metasm/render.rb +130 -0
- data/lib/metasm/sh4.rb +8 -0
- data/lib/metasm/sh4/decode.rb +336 -0
- data/lib/metasm/sh4/main.rb +292 -0
- data/lib/metasm/sh4/opcodes.rb +381 -0
- data/lib/metasm/x86_64.rb +12 -0
- data/lib/metasm/x86_64/compile_c.rb +1025 -0
- data/lib/metasm/x86_64/debug.rb +59 -0
- data/lib/metasm/x86_64/decode.rb +268 -0
- data/lib/metasm/x86_64/encode.rb +264 -0
- data/lib/metasm/x86_64/main.rb +135 -0
- data/lib/metasm/x86_64/opcodes.rb +118 -0
- data/lib/metasm/x86_64/parse.rb +68 -0
- data/misc/bottleneck.rb +61 -0
- data/misc/cheader-findpppath.rb +58 -0
- data/misc/hexdiff.rb +74 -0
- data/misc/hexdump.rb +55 -0
- data/misc/metasm-all.rb +13 -0
- data/misc/objdiff.rb +47 -0
- data/misc/objscan.rb +40 -0
- data/misc/pdfparse.rb +661 -0
- data/misc/ppc_pdf2oplist.rb +192 -0
- data/misc/tcp_proxy_hex.rb +84 -0
- data/misc/txt2html.rb +440 -0
- data/samples/a.out.rb +31 -0
- data/samples/asmsyntax.rb +77 -0
- data/samples/bindiff.rb +555 -0
- data/samples/compilation-steps.rb +49 -0
- data/samples/cparser_makestackoffset.rb +55 -0
- data/samples/dasm-backtrack.rb +38 -0
- data/samples/dasmnavig.rb +318 -0
- data/samples/dbg-apihook.rb +228 -0
- data/samples/dbghelp.rb +143 -0
- data/samples/disassemble-gui.rb +102 -0
- data/samples/disassemble.rb +133 -0
- data/samples/dump_upx.rb +95 -0
- data/samples/dynamic_ruby.rb +1929 -0
- data/samples/elf_list_needed.rb +46 -0
- data/samples/elf_listexports.rb +33 -0
- data/samples/elfencode.rb +25 -0
- data/samples/exeencode.rb +128 -0
- data/samples/factorize-headers-elfimports.rb +77 -0
- data/samples/factorize-headers-peimports.rb +109 -0
- data/samples/factorize-headers.rb +43 -0
- data/samples/gdbclient.rb +583 -0
- data/samples/generate_libsigs.rb +102 -0
- data/samples/hotfix_gtk_dbg.rb +59 -0
- data/samples/install_win_env.rb +78 -0
- data/samples/lindebug.rb +924 -0
- data/samples/linux_injectsyscall.rb +95 -0
- data/samples/machoencode.rb +31 -0
- data/samples/metasm-shell.rb +91 -0
- data/samples/pe-hook.rb +69 -0
- data/samples/pe-ia32-cpuid.rb +203 -0
- data/samples/pe-mips.rb +35 -0
- data/samples/pe-shutdown.rb +78 -0
- data/samples/pe-testrelocs.rb +51 -0
- data/samples/pe-testrsrc.rb +24 -0
- data/samples/pe_listexports.rb +31 -0
- data/samples/peencode.rb +19 -0
- data/samples/peldr.rb +494 -0
- data/samples/preprocess-flatten.rb +19 -0
- data/samples/r0trace.rb +308 -0
- data/samples/rubstop.rb +399 -0
- data/samples/scan_pt_gnu_stack.rb +54 -0
- data/samples/scanpeexports.rb +62 -0
- data/samples/shellcode-c.rb +40 -0
- data/samples/shellcode-dynlink.rb +146 -0
- data/samples/source.asm +34 -0
- data/samples/struct_offset.rb +47 -0
- data/samples/testpe.rb +32 -0
- data/samples/testraw.rb +45 -0
- data/samples/win32genloader.rb +132 -0
- data/samples/win32hooker-advanced.rb +169 -0
- data/samples/win32hooker.rb +96 -0
- data/samples/win32livedasm.rb +33 -0
- data/samples/win32remotescan.rb +133 -0
- data/samples/wintrace.rb +92 -0
- data/tests/all.rb +8 -0
- data/tests/dasm.rb +39 -0
- data/tests/dynldr.rb +35 -0
- data/tests/encodeddata.rb +132 -0
- data/tests/ia32.rb +82 -0
- data/tests/mips.rb +116 -0
- data/tests/parse_c.rb +239 -0
- data/tests/preprocessor.rb +269 -0
- data/tests/x86_64.rb +62 -0
- metadata +255 -0
|
@@ -0,0 +1,333 @@
|
|
|
1
|
+
# This file is part of Metasm, the Ruby assembly manipulation suite
|
|
2
|
+
# Copyright (C) 2006-2009 Yoann GUILLOT
|
|
3
|
+
#
|
|
4
|
+
# Licence is LGPL, see LICENCE in the top-level directory
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
require 'metasm/main'
|
|
8
|
+
|
|
9
|
+
module Metasm
|
|
10
|
+
class ExeFormat
|
|
11
|
+
# encodes an Array of source (Label/Data/Instruction etc) to an EncodedData
|
|
12
|
+
# resolves ambiguities using +encode_resolve+
|
|
13
|
+
def assemble_sequence(seq, cpu)
|
|
14
|
+
# an array of edata or sub-array of ambiguous edata
|
|
15
|
+
# its last element is always an edata
|
|
16
|
+
ary = [EncodedData.new]
|
|
17
|
+
|
|
18
|
+
seq.each { |e|
|
|
19
|
+
case e
|
|
20
|
+
when Label; ary.last.add_export(e.name, ary.last.virtsize)
|
|
21
|
+
when Data; ary.last << e.encode(cpu.endianness)
|
|
22
|
+
when Align, Padding
|
|
23
|
+
e.fillwith = e.fillwith.encode(cpu.endianness) if e.fillwith and not e.fillwith.kind_of? EncodedData
|
|
24
|
+
ary << e << EncodedData.new
|
|
25
|
+
when Offset; ary << e << EncodedData.new
|
|
26
|
+
when Instruction
|
|
27
|
+
case i = cpu.encode_instruction(self, e)
|
|
28
|
+
when Array
|
|
29
|
+
case i.length
|
|
30
|
+
when 0; raise EncodeError, "failed to encode #{e}"
|
|
31
|
+
when 1; ary.last << i.first
|
|
32
|
+
else ary << i << EncodedData.new # to solve later
|
|
33
|
+
end
|
|
34
|
+
else
|
|
35
|
+
ary.last << i
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
edata = (ary.length > 1) ? assemble_resolve(ary) : ary.shift
|
|
41
|
+
edata.fixup edata.binding
|
|
42
|
+
edata
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# chose among multiple possible sub-EncodedData
|
|
46
|
+
# assumes all ambiguous edata have the equivallent relocations in the same order
|
|
47
|
+
def assemble_resolve(ary)
|
|
48
|
+
startlabel = new_label('section_start')
|
|
49
|
+
|
|
50
|
+
# create two bindings where all elements are the shortest/longest possible
|
|
51
|
+
minbinding = {}
|
|
52
|
+
minoff = 0
|
|
53
|
+
maxbinding = {}
|
|
54
|
+
maxoff = 0
|
|
55
|
+
|
|
56
|
+
ary.each { |elem|
|
|
57
|
+
case elem
|
|
58
|
+
when Array
|
|
59
|
+
if elem.all? { |ed| ed.kind_of? EncodedData and ed.reloc.empty? }
|
|
60
|
+
elem = [elem.sort_by { |ed| ed.length }.first]
|
|
61
|
+
end
|
|
62
|
+
elem.each { |e|
|
|
63
|
+
e.export.each { |label, off|
|
|
64
|
+
minbinding[label] = Expression[startlabel, :+, minoff + off]
|
|
65
|
+
maxbinding[label] = Expression[startlabel, :+, maxoff + off]
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
minoff += elem.map { |e| e.virtsize }.min
|
|
69
|
+
maxoff += elem.map { |e| e.virtsize }.max
|
|
70
|
+
|
|
71
|
+
when EncodedData
|
|
72
|
+
elem.export.each { |label, off|
|
|
73
|
+
minbinding[label] = Expression[startlabel, :+, minoff + off]
|
|
74
|
+
maxbinding[label] = Expression[startlabel, :+, maxoff + off]
|
|
75
|
+
}
|
|
76
|
+
minoff += elem.virtsize
|
|
77
|
+
maxoff += elem.virtsize
|
|
78
|
+
|
|
79
|
+
when Align
|
|
80
|
+
minoff += 0
|
|
81
|
+
maxoff += elem.val - 1
|
|
82
|
+
|
|
83
|
+
when Padding
|
|
84
|
+
# find the surrounding Offsets and compute the largest/shortest edata sizes to determine min/max length for the padding
|
|
85
|
+
prevoff = ary[0..ary.index(elem)].grep(Offset).last
|
|
86
|
+
nextoff = ary[ary.index(elem)..-1].grep(Offset).first
|
|
87
|
+
raise elem, 'need .offset after .pad' if not nextoff
|
|
88
|
+
|
|
89
|
+
# find all elements between the surrounding Offsets
|
|
90
|
+
previdx = prevoff ? ary.index(prevoff) + 1 : 0
|
|
91
|
+
surround = ary[previdx..ary.index(nextoff)-1]
|
|
92
|
+
surround.delete elem
|
|
93
|
+
if surround.find { |nelem| nelem.kind_of? Padding }
|
|
94
|
+
raise elem, 'need .offset beetween two .pad'
|
|
95
|
+
end
|
|
96
|
+
if surround.find { |nelem| nelem.kind_of? Align and ary.index(nelem) > ary.index(elem) }
|
|
97
|
+
raise elem, 'cannot .align after a .pad' # XXX really ?
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# lenmin/lenmax are the extrem length of the Padding
|
|
101
|
+
nxt = Expression[nextoff.val]
|
|
102
|
+
ext = nxt.externals
|
|
103
|
+
raise elem, "bad offset #{nxt}" if ext.length > 1 or (ext.length == 1 and not minbinding[ext.first])
|
|
104
|
+
nxt = Expression[nxt, :-, startlabel] if not nxt.bind(minbinding).reduce.kind_of? ::Integer
|
|
105
|
+
prv = Expression[prevoff ? prevoff.val : 0]
|
|
106
|
+
ext = prv.externals
|
|
107
|
+
raise elem, "bad offset #{prv}" if ext.length > 1 or (ext.length == 1 and not minbinding[ext.first])
|
|
108
|
+
prv = Expression[prv, :-, startlabel] if not prv.bind(minbinding).reduce.kind_of? ::Integer
|
|
109
|
+
|
|
110
|
+
lenmin = Expression[nxt.bind(minbinding), :-, prv.bind(maxbinding)].reduce
|
|
111
|
+
lenmax = Expression[nxt.bind(maxbinding), :-, prv.bind(minbinding)].reduce
|
|
112
|
+
raise elem, "bad labels: #{lenmin}" if not lenmin.kind_of? ::Integer or not lenmax.kind_of? ::Integer
|
|
113
|
+
surround.each { |nelem|
|
|
114
|
+
case nelem
|
|
115
|
+
when Array
|
|
116
|
+
lenmin -= nelem.map { |e| e.virtsize }.max
|
|
117
|
+
lenmax -= nelem.map { |e| e.virtsize }.min
|
|
118
|
+
when EncodedData
|
|
119
|
+
lenmin -= nelem.virtsize
|
|
120
|
+
lenmax -= nelem.virtsize
|
|
121
|
+
when Align
|
|
122
|
+
lenmin -= nelem.val - 1
|
|
123
|
+
lenmax -= 0
|
|
124
|
+
end
|
|
125
|
+
}
|
|
126
|
+
raise elem, "no room for .pad before '.offset #{nextoff.val}' at #{Backtrace.backtrace_str(nextoff.backtrace)}, need at least #{-lenmax} more bytes" if lenmax < 0
|
|
127
|
+
minoff += [lenmin, 0].max
|
|
128
|
+
maxoff += lenmax
|
|
129
|
+
|
|
130
|
+
when Offset
|
|
131
|
+
# nothing to do for now
|
|
132
|
+
else
|
|
133
|
+
raise "Internal error: bad object #{elem.inspect} in encode_resolve"
|
|
134
|
+
end
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
# checks an expression linearity
|
|
138
|
+
check_linear = lambda { |expr|
|
|
139
|
+
expr = expr.reduce if expr.kind_of? Expression
|
|
140
|
+
while expr.kind_of? Expression
|
|
141
|
+
case expr.op
|
|
142
|
+
when :*
|
|
143
|
+
if expr.lexpr.kind_of? Numeric; expr = expr.rexpr
|
|
144
|
+
elsif expr.rexpr.kind_of? Numeric; expr = expr.lexpr
|
|
145
|
+
else break
|
|
146
|
+
end
|
|
147
|
+
when :/, :>>, :<<
|
|
148
|
+
if expr.rexpr.kind_of? Numeric; expr = expr.lexpr
|
|
149
|
+
else break
|
|
150
|
+
end
|
|
151
|
+
when :+, :-
|
|
152
|
+
if not expr.lexpr; expr = expr.rexpr
|
|
153
|
+
elsif expr.lexpr.kind_of? Numeric; expr = expr.rexpr
|
|
154
|
+
elsif expr.rexpr.kind_of? Numeric; expr = expr.lexpr
|
|
155
|
+
else
|
|
156
|
+
break if not check_linear[expr.rexpr]
|
|
157
|
+
expr = expr.lexpr
|
|
158
|
+
end
|
|
159
|
+
else break
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
not expr.kind_of? Expression
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
# now we can resolve all relocations
|
|
167
|
+
# for linear expressions of internal variables (ie differences of labels from the ary):
|
|
168
|
+
# - calc target numeric bounds, and reject relocs not accepting worst case value
|
|
169
|
+
# - else reject all but largest place available
|
|
170
|
+
# then chose the shortest overall EData left
|
|
171
|
+
ary.map! { |elem|
|
|
172
|
+
case elem
|
|
173
|
+
when Array
|
|
174
|
+
# for each external, compute numeric target values using minbinding[external] and maxbinding[external]
|
|
175
|
+
# this gives us all extrem values for linear expressions
|
|
176
|
+
target_bounds = {}
|
|
177
|
+
rec_checkminmax = lambda { |idx, target, binding, extlist|
|
|
178
|
+
if extlist.empty?
|
|
179
|
+
(target_bounds[idx] ||= []) << target.bind(binding).reduce
|
|
180
|
+
else
|
|
181
|
+
rec_checkminmax[idx, target, binding.merge(extlist.last => minbinding[extlist.last]), extlist[0...-1]]
|
|
182
|
+
rec_checkminmax[idx, target, binding.merge(extlist.last => maxbinding[extlist.last]), extlist[0...-1]]
|
|
183
|
+
end
|
|
184
|
+
}
|
|
185
|
+
# biggest size disponible for this relocation (for non-linear/external)
|
|
186
|
+
wantsize = {}
|
|
187
|
+
|
|
188
|
+
elem.each { |e|
|
|
189
|
+
e.reloc.sort.each_with_index { |r_, i|
|
|
190
|
+
r = r_[1]
|
|
191
|
+
# has external ref
|
|
192
|
+
if not r.target.bind(minbinding).reduce.kind_of?(Numeric) or not check_linear[r.target]
|
|
193
|
+
# find the biggest relocation type for the current target
|
|
194
|
+
wantsize[i] = elem.map { |edata|
|
|
195
|
+
edata.reloc.sort[i][1].type
|
|
196
|
+
}.sort_by { |type| Expression::INT_SIZE[type] }.last # XXX do not use rel.length
|
|
197
|
+
else
|
|
198
|
+
rec_checkminmax[i, r.target, {}, r.target.externals]
|
|
199
|
+
end
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
# reject candidates with reloc type too small
|
|
204
|
+
acceptable = elem.find_all { |edata|
|
|
205
|
+
r = edata.reloc.sort
|
|
206
|
+
(0...r.length).all? { |i|
|
|
207
|
+
if wantsize[i]
|
|
208
|
+
r[i][1].type == wantsize[i]
|
|
209
|
+
else
|
|
210
|
+
target_bounds[i].all? { |b| Expression.in_range?(b, r[i][1].type) }
|
|
211
|
+
end
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
raise EncodeError, "cannot find candidate in #{elem.inspect}, immediate too big #{wantsize.inspect} #{target_bounds.inspect}" if acceptable.empty?
|
|
216
|
+
|
|
217
|
+
# keep the shortest
|
|
218
|
+
acceptable.sort_by { |edata| edata.virtsize }.first
|
|
219
|
+
else
|
|
220
|
+
elem
|
|
221
|
+
end
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
# assemble all parts, resolve padding sizes, check offset directives
|
|
225
|
+
edata = EncodedData.new
|
|
226
|
+
|
|
227
|
+
# fills edata with repetitions of data until targetsize
|
|
228
|
+
fillwith = lambda { |targetsize, data|
|
|
229
|
+
if data
|
|
230
|
+
if data.reloc.empty? and not data.data.empty? # avoid useless iterations
|
|
231
|
+
nr = (targetsize-edata.virtsize) / data.length - 1
|
|
232
|
+
if nr > 0
|
|
233
|
+
dat = data.data.ljust(data.virtsize, 0.chr)
|
|
234
|
+
edata << (dat * nr)
|
|
235
|
+
end
|
|
236
|
+
end
|
|
237
|
+
while edata.virtsize + data.virtsize <= targetsize
|
|
238
|
+
edata << data
|
|
239
|
+
end
|
|
240
|
+
if edata.virtsize < targetsize
|
|
241
|
+
edata << data[0, targetsize - edata.virtsize]
|
|
242
|
+
end
|
|
243
|
+
else
|
|
244
|
+
edata.virtsize = targetsize
|
|
245
|
+
end
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
ary.each { |elem|
|
|
249
|
+
case elem
|
|
250
|
+
when EncodedData
|
|
251
|
+
edata << elem
|
|
252
|
+
when Align
|
|
253
|
+
fillwith[EncodedData.align_size(edata.virtsize, elem.val), elem.fillwith]
|
|
254
|
+
when Offset
|
|
255
|
+
raise EncodeError, "could not enforce .offset #{elem.val} #{elem.backtrace}: offset now #{edata.virtsize}" if edata.virtsize != Expression[elem.val].bind(edata.binding(0)).reduce
|
|
256
|
+
when Padding
|
|
257
|
+
nextoff = ary[ary.index(elem)..-1].grep(Offset).first
|
|
258
|
+
targetsize = Expression[nextoff.val].bind(edata.binding(0)).reduce
|
|
259
|
+
ary[ary.index(elem)+1..ary.index(nextoff)-1].each { |nelem| targetsize -= nelem.virtsize }
|
|
260
|
+
raise EncodeError, "no room for .pad #{elem.backtrace_str} before .offset #{nextoff.val}, would be #{targetsize-edata.length} bytes long" if targetsize < edata.length
|
|
261
|
+
fillwith[targetsize, elem.fillwith]
|
|
262
|
+
else raise "Internal error: #{elem.inspect}"
|
|
263
|
+
end
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
edata
|
|
267
|
+
end
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
class Expression
|
|
271
|
+
def encode(type, endianness, backtrace=nil)
|
|
272
|
+
case val = reduce
|
|
273
|
+
when Integer; EncodedData.new Expression.encode_imm(val, type, endianness, backtrace)
|
|
274
|
+
else EncodedData.new([0].pack('C')*(INT_SIZE[type]/8), :reloc => {0 => Relocation.new(self, type, endianness, backtrace)})
|
|
275
|
+
end
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
class << self
|
|
279
|
+
def encode_imm(val, type, endianness, backtrace=nil)
|
|
280
|
+
type = INT_SIZE.keys.find { |k| k.to_s[0] == ?a and INT_SIZE[k] == 8*type } if type.kind_of? ::Integer
|
|
281
|
+
endianness = endianness.endianness if not endianness.kind_of? ::Symbol
|
|
282
|
+
raise "unsupported endianness #{endianness.inspect}" unless [:big, :little].include? endianness
|
|
283
|
+
raise(EncodeError, "immediate overflow #{type.inspect} #{Expression[val]} #{(Backtrace::backtrace_str(backtrace) if backtrace)}") if not in_range?(val, type)
|
|
284
|
+
s = (0...INT_SIZE[type]/8).map { |i| (val >> (8*i)) & 0xff }.pack('C*')
|
|
285
|
+
endianness != :little ? s.reverse : s
|
|
286
|
+
end
|
|
287
|
+
alias encode_immediate encode_imm
|
|
288
|
+
end
|
|
289
|
+
end
|
|
290
|
+
|
|
291
|
+
class Data
|
|
292
|
+
def encode(endianness)
|
|
293
|
+
edata = case @data
|
|
294
|
+
when :uninitialized
|
|
295
|
+
EncodedData.new('', :virtsize => Expression::INT_SIZE[INT_TYPE[@type]]/8)
|
|
296
|
+
when String
|
|
297
|
+
# db 'foo' => 'foo' # XXX could be optimised, but should not be significant
|
|
298
|
+
# dw 'foo' => "f\0o\0o\0" / "\0f\0o\0o"
|
|
299
|
+
@data.unpack('C*').inject(EncodedData.new) { |ed, chr| ed << Expression.encode_imm(chr, INT_TYPE[@type], endianness, @backtrace) }
|
|
300
|
+
when Expression
|
|
301
|
+
@data.encode INT_TYPE[@type], endianness, @backtrace
|
|
302
|
+
when Array
|
|
303
|
+
@data.inject(EncodedData.new) { |ed, d| ed << d.encode(endianness) }
|
|
304
|
+
end
|
|
305
|
+
|
|
306
|
+
# n times
|
|
307
|
+
(0...@count).inject(EncodedData.new) { |ed, cnt| ed << edata }
|
|
308
|
+
end
|
|
309
|
+
end
|
|
310
|
+
|
|
311
|
+
class CPU
|
|
312
|
+
# returns an EncodedData or an ary of them
|
|
313
|
+
# uses +#parse_arg_valid?+ to find the opcode whose signature matches with the instruction
|
|
314
|
+
# uses +encode_instr_op+ (arch-specific)
|
|
315
|
+
def encode_instruction(program, i)
|
|
316
|
+
errmsg = ''
|
|
317
|
+
oplist = opcode_list_byname[i.opname].to_a.find_all { |o|
|
|
318
|
+
o.args.length == i.args.length and
|
|
319
|
+
o.args.zip(i.args).all? { |f, a| parse_arg_valid?(o, f, a) }
|
|
320
|
+
}.map { |op|
|
|
321
|
+
begin
|
|
322
|
+
encode_instr_op(program, i, op)
|
|
323
|
+
rescue EncodeError
|
|
324
|
+
errmsg = " (#{$!.message})"
|
|
325
|
+
nil
|
|
326
|
+
end
|
|
327
|
+
}.compact.flatten
|
|
328
|
+
raise EncodeError, "no matching opcode found for #{i}#{errmsg}" if oplist.empty?
|
|
329
|
+
oplist.each { |ed| ed.reloc.each_value { |v| v.backtrace = i.backtrace } }
|
|
330
|
+
oplist
|
|
331
|
+
end
|
|
332
|
+
end
|
|
333
|
+
end
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
# This file is part of Metasm, the Ruby assembly manipulation suite
|
|
2
|
+
# Copyright (C) 2006-2009 Yoann GUILLOT
|
|
3
|
+
#
|
|
4
|
+
# Licence is LGPL, see LICENCE in the top-level directory
|
|
5
|
+
|
|
6
|
+
require 'metasm/exe_format/main'
|
|
7
|
+
require 'metasm/encode'
|
|
8
|
+
require 'metasm/decode'
|
|
9
|
+
|
|
10
|
+
module Metasm
|
|
11
|
+
class AOut < ExeFormat
|
|
12
|
+
MAGIC = { 0407 => 'OMAGIC', 0410 => 'NMAGIC', 0413 => 'ZMAGIC',
|
|
13
|
+
0314 => 'QMAGIC', 0421 => 'CMAGIC'
|
|
14
|
+
}
|
|
15
|
+
MACHINE_TYPE = { 0 => 'OLDSUN2', 1 => '68010', 2 => '68020',
|
|
16
|
+
3 => 'SPARC', 100 => 'PC386', 134 => 'I386', 135 => 'M68K',
|
|
17
|
+
136 => 'M68K4K', 137 => 'NS32532', 138 => 'SPARC',
|
|
18
|
+
139 => 'PMAX', 140 => 'VAX', 141 => 'ALPHA', 142 => 'MIPS',
|
|
19
|
+
143 => 'ARM6', 151 => 'MIPS1', 152 => 'MIPS2', 300 => 'HP300',
|
|
20
|
+
0x20B => 'HPUX800', 0x20C => 'HPUX'
|
|
21
|
+
}
|
|
22
|
+
FLAGS = { 0x10 => 'PIC', 0x20 => 'DYNAMIC' }
|
|
23
|
+
SYMBOL_TYPE = { 0 => 'UNDF', 1 => 'ABS', 2 => 'TEXT',
|
|
24
|
+
3 => 'DATA', 4 => 'BSS', 5 => 'INDR', 6 => 'SIZE',
|
|
25
|
+
9 => 'COMM', 10=> 'SETA', 11=> 'SETT', 12=> 'SETD',
|
|
26
|
+
13=> 'SETB', 14=> 'SETV', 15=> 'FN'
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
attr_accessor :endianness, :header, :text, :data, :symbols, :textrel, :datarel
|
|
30
|
+
|
|
31
|
+
class Header < SerialStruct
|
|
32
|
+
bitfield :word, 0 => :magic, 16 => :machtype, 24 => :flags
|
|
33
|
+
fld_enum(:magic, MAGIC)
|
|
34
|
+
fld_enum(:machtype, MACHINE_TYPE)
|
|
35
|
+
fld_bits(:flags, FLAGS)
|
|
36
|
+
words :text, :data, :bss, :syms, :entry, :trsz, :drsz
|
|
37
|
+
|
|
38
|
+
def decode(aout)
|
|
39
|
+
super(aout)
|
|
40
|
+
|
|
41
|
+
case @magic
|
|
42
|
+
when 'OMAGIC', 'NMAGIC', 'ZMAGIC', 'QMAGIC'
|
|
43
|
+
else raise InvalidExeFormat, "Bad A.OUT signature #@magic"
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def set_default_values(aout)
|
|
48
|
+
@magic ||= 'QMAGIC'
|
|
49
|
+
@machtype ||= 'PC386'
|
|
50
|
+
@flags ||= []
|
|
51
|
+
@text ||= aout.text.length + (@magic == 'QMAGIC' ? 32 : 0) if aout.text
|
|
52
|
+
@data ||= aout.data.length if aout.data
|
|
53
|
+
|
|
54
|
+
super(aout)
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
class Relocation < SerialStruct
|
|
59
|
+
word :address
|
|
60
|
+
bitfield :word, 0 => :symbolnum, 24 => :pcrel, 25 => :length,
|
|
61
|
+
27 => :extern, 28 => :baserel, 29 => :jmptable, 30 => :relative, 31 => :rtcopy
|
|
62
|
+
fld_enum :length, 0 => 1, 1 => 2, 2 => 4, 3 => 8
|
|
63
|
+
fld_default :length, 4
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
class Symbol < SerialStruct
|
|
67
|
+
word :name_p
|
|
68
|
+
bitfield :byte, 0 => :extern, 1 => :type, 5 => :stab
|
|
69
|
+
byte :other
|
|
70
|
+
half :desc
|
|
71
|
+
word :value
|
|
72
|
+
attr_accessor :name
|
|
73
|
+
|
|
74
|
+
def decode(aout, strings=nil)
|
|
75
|
+
super(aout)
|
|
76
|
+
@name = strings[@name_p...(strings.index(?\0, @name_p))] if strings
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def set_default_values(aout, strings=nil)
|
|
80
|
+
if strings and name and @name != ''
|
|
81
|
+
if not @name_p or strings[@name_p, @name.length] != @name
|
|
82
|
+
@name_p = strings.length
|
|
83
|
+
strings << @name << 0
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
super(aout, strings)
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def decode_byte(edata = @encoded) edata.decode_imm(:u8 , @endianness) end
|
|
91
|
+
def decode_half(edata = @encoded) edata.decode_imm(:u16, @endianness) end
|
|
92
|
+
def decode_word(edata = @encoded) edata.decode_imm(:u32, @endianness) end
|
|
93
|
+
def encode_byte(w) Expression[w].encode(:u8 , @endianness) end
|
|
94
|
+
def encode_half(w) Expression[w].encode(:u16, @endianness) end
|
|
95
|
+
def encode_word(w) Expression[w].encode(:u32, @endianness) end
|
|
96
|
+
|
|
97
|
+
def initialize(cpu = nil)
|
|
98
|
+
@endianness = cpu ? cpu.endianness : :little
|
|
99
|
+
@header = Header.new
|
|
100
|
+
@text = EncodedData.new
|
|
101
|
+
@data = EncodedData.new
|
|
102
|
+
super(cpu)
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def decode_header
|
|
106
|
+
@encoded.ptr = 0
|
|
107
|
+
@header.decode(self)
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def decode
|
|
111
|
+
decode_header
|
|
112
|
+
|
|
113
|
+
tlen = @header.text
|
|
114
|
+
case @header.magic
|
|
115
|
+
when 'ZMAGIC'; @encoded.ptr = 1024
|
|
116
|
+
when 'QMAGIC'; tlen -= 32 # header is included in .text
|
|
117
|
+
end
|
|
118
|
+
@text = EncodedData.new << @encoded.read(tlen)
|
|
119
|
+
|
|
120
|
+
@data = EncodedData.new << @encoded.read(@header.data)
|
|
121
|
+
|
|
122
|
+
textrel = @encoded.read @header.trsz
|
|
123
|
+
datarel = @encoded.read @header.drsz
|
|
124
|
+
syms = @encoded.read @header.syms
|
|
125
|
+
strings = @encoded.read
|
|
126
|
+
# TODO
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
def encode
|
|
130
|
+
# non mmapable on linux anyway
|
|
131
|
+
# could support OMAGIC..
|
|
132
|
+
raise EncodeError, 'cannot encode non-QMAGIC a.out' if @header.magic and @header.magic != 'QMAGIC'
|
|
133
|
+
|
|
134
|
+
# data must be 4096-aligned
|
|
135
|
+
# 32 bytes of header included in .text
|
|
136
|
+
@text.virtsize = (@text.virtsize + 32 + 4096 - 1) / 4096 * 4096 - 32
|
|
137
|
+
if @data.rawsize % 4096 != 0
|
|
138
|
+
@data[(@data.rawsize + 4096 - 1) / 4096 * 4096 - 1] = 0
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
@header.text = @text.length+32
|
|
142
|
+
@header.data = @data.rawsize
|
|
143
|
+
@header.bss = @data.virtsize - @data.rawsize
|
|
144
|
+
|
|
145
|
+
@encoded = EncodedData.new
|
|
146
|
+
@encoded << @header.encode(self)
|
|
147
|
+
binding = @text.binding(4096+32).merge @data.binding(4096 + @header.text)
|
|
148
|
+
@encoded << @text << @data
|
|
149
|
+
@encoded.fixup! binding
|
|
150
|
+
@encoded.data
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
def parse_init
|
|
154
|
+
@textsrc ||= []
|
|
155
|
+
@datasrc ||= []
|
|
156
|
+
@cursource ||= @textsrc
|
|
157
|
+
super()
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def parse_parser_instruction(instr)
|
|
161
|
+
case instr.raw.downcase
|
|
162
|
+
when '.text'; @cursource = @textsrc
|
|
163
|
+
when '.data'; @cursource = @datasrc
|
|
164
|
+
when '.entrypoint'
|
|
165
|
+
# ".entrypoint <somelabel/expression>" or ".entrypoint" (here)
|
|
166
|
+
@lexer.skip_space
|
|
167
|
+
if tok = @lexer.nexttok and tok.type == :string
|
|
168
|
+
raise instr if not entrypoint = Expression.parse(@lexer)
|
|
169
|
+
else
|
|
170
|
+
entrypoint = new_label('entrypoint')
|
|
171
|
+
@cursource << Label.new(entrypoint, instr.backtrace.dup)
|
|
172
|
+
end
|
|
173
|
+
@header.entry = entrypoint
|
|
174
|
+
else super(instr)
|
|
175
|
+
end
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
def assemble(*a)
|
|
179
|
+
parse(*a) if not a.empty?
|
|
180
|
+
@text << assemble_sequence(@textsrc, @cpu)
|
|
181
|
+
@textsrc.clear
|
|
182
|
+
@data << assemble_sequence(@datasrc, @cpu)
|
|
183
|
+
@datasrc.clear
|
|
184
|
+
self
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
def each_section
|
|
188
|
+
tva = 0
|
|
189
|
+
tva = 4096+32 if @header.magic == 'QMAGIC'
|
|
190
|
+
yield @text, tva
|
|
191
|
+
yield @data, tva + @text.virtsize
|
|
192
|
+
end
|
|
193
|
+
end
|
|
194
|
+
end
|