metasm 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/BUGS +11 -0
- data/CREDITS +17 -0
- data/README +270 -0
- data/TODO +114 -0
- data/doc/code_organisation.txt +146 -0
- data/doc/const_missing.txt +16 -0
- data/doc/core_classes.txt +75 -0
- data/doc/feature_list.txt +53 -0
- data/doc/index.txt +59 -0
- data/doc/install_notes.txt +170 -0
- data/doc/style.css +3 -0
- data/doc/use_cases.txt +18 -0
- data/lib/metasm.rb +80 -0
- data/lib/metasm/arm.rb +12 -0
- data/lib/metasm/arm/debug.rb +39 -0
- data/lib/metasm/arm/decode.rb +167 -0
- data/lib/metasm/arm/encode.rb +77 -0
- data/lib/metasm/arm/main.rb +75 -0
- data/lib/metasm/arm/opcodes.rb +177 -0
- data/lib/metasm/arm/parse.rb +130 -0
- data/lib/metasm/arm/render.rb +55 -0
- data/lib/metasm/compile_c.rb +1457 -0
- data/lib/metasm/dalvik.rb +8 -0
- data/lib/metasm/dalvik/decode.rb +196 -0
- data/lib/metasm/dalvik/main.rb +60 -0
- data/lib/metasm/dalvik/opcodes.rb +366 -0
- data/lib/metasm/decode.rb +213 -0
- data/lib/metasm/decompile.rb +2659 -0
- data/lib/metasm/disassemble.rb +2068 -0
- data/lib/metasm/disassemble_api.rb +1280 -0
- data/lib/metasm/dynldr.rb +1329 -0
- data/lib/metasm/encode.rb +333 -0
- data/lib/metasm/exe_format/a_out.rb +194 -0
- data/lib/metasm/exe_format/autoexe.rb +82 -0
- data/lib/metasm/exe_format/bflt.rb +189 -0
- data/lib/metasm/exe_format/coff.rb +455 -0
- data/lib/metasm/exe_format/coff_decode.rb +901 -0
- data/lib/metasm/exe_format/coff_encode.rb +1078 -0
- data/lib/metasm/exe_format/dex.rb +457 -0
- data/lib/metasm/exe_format/dol.rb +145 -0
- data/lib/metasm/exe_format/elf.rb +923 -0
- data/lib/metasm/exe_format/elf_decode.rb +979 -0
- data/lib/metasm/exe_format/elf_encode.rb +1375 -0
- data/lib/metasm/exe_format/macho.rb +827 -0
- data/lib/metasm/exe_format/main.rb +228 -0
- data/lib/metasm/exe_format/mz.rb +164 -0
- data/lib/metasm/exe_format/nds.rb +172 -0
- data/lib/metasm/exe_format/pe.rb +437 -0
- data/lib/metasm/exe_format/serialstruct.rb +246 -0
- data/lib/metasm/exe_format/shellcode.rb +114 -0
- data/lib/metasm/exe_format/xcoff.rb +167 -0
- data/lib/metasm/gui.rb +23 -0
- data/lib/metasm/gui/cstruct.rb +373 -0
- data/lib/metasm/gui/dasm_coverage.rb +199 -0
- data/lib/metasm/gui/dasm_decomp.rb +369 -0
- data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
- data/lib/metasm/gui/dasm_graph.rb +1354 -0
- data/lib/metasm/gui/dasm_hex.rb +543 -0
- data/lib/metasm/gui/dasm_listing.rb +599 -0
- data/lib/metasm/gui/dasm_main.rb +906 -0
- data/lib/metasm/gui/dasm_opcodes.rb +291 -0
- data/lib/metasm/gui/debug.rb +1228 -0
- data/lib/metasm/gui/gtk.rb +884 -0
- data/lib/metasm/gui/qt.rb +495 -0
- data/lib/metasm/gui/win32.rb +3004 -0
- data/lib/metasm/gui/x11.rb +621 -0
- data/lib/metasm/ia32.rb +14 -0
- data/lib/metasm/ia32/compile_c.rb +1523 -0
- data/lib/metasm/ia32/debug.rb +193 -0
- data/lib/metasm/ia32/decode.rb +1167 -0
- data/lib/metasm/ia32/decompile.rb +564 -0
- data/lib/metasm/ia32/encode.rb +314 -0
- data/lib/metasm/ia32/main.rb +233 -0
- data/lib/metasm/ia32/opcodes.rb +872 -0
- data/lib/metasm/ia32/parse.rb +327 -0
- data/lib/metasm/ia32/render.rb +91 -0
- data/lib/metasm/main.rb +1193 -0
- data/lib/metasm/mips.rb +11 -0
- data/lib/metasm/mips/compile_c.rb +7 -0
- data/lib/metasm/mips/decode.rb +253 -0
- data/lib/metasm/mips/encode.rb +51 -0
- data/lib/metasm/mips/main.rb +72 -0
- data/lib/metasm/mips/opcodes.rb +443 -0
- data/lib/metasm/mips/parse.rb +51 -0
- data/lib/metasm/mips/render.rb +43 -0
- data/lib/metasm/os/gnu_exports.rb +270 -0
- data/lib/metasm/os/linux.rb +1112 -0
- data/lib/metasm/os/main.rb +1686 -0
- data/lib/metasm/os/remote.rb +527 -0
- data/lib/metasm/os/windows.rb +2027 -0
- data/lib/metasm/os/windows_exports.rb +745 -0
- data/lib/metasm/parse.rb +876 -0
- data/lib/metasm/parse_c.rb +3938 -0
- data/lib/metasm/pic16c/decode.rb +42 -0
- data/lib/metasm/pic16c/main.rb +17 -0
- data/lib/metasm/pic16c/opcodes.rb +68 -0
- data/lib/metasm/ppc.rb +11 -0
- data/lib/metasm/ppc/decode.rb +264 -0
- data/lib/metasm/ppc/decompile.rb +251 -0
- data/lib/metasm/ppc/encode.rb +51 -0
- data/lib/metasm/ppc/main.rb +129 -0
- data/lib/metasm/ppc/opcodes.rb +410 -0
- data/lib/metasm/ppc/parse.rb +52 -0
- data/lib/metasm/preprocessor.rb +1277 -0
- data/lib/metasm/render.rb +130 -0
- data/lib/metasm/sh4.rb +8 -0
- data/lib/metasm/sh4/decode.rb +336 -0
- data/lib/metasm/sh4/main.rb +292 -0
- data/lib/metasm/sh4/opcodes.rb +381 -0
- data/lib/metasm/x86_64.rb +12 -0
- data/lib/metasm/x86_64/compile_c.rb +1025 -0
- data/lib/metasm/x86_64/debug.rb +59 -0
- data/lib/metasm/x86_64/decode.rb +268 -0
- data/lib/metasm/x86_64/encode.rb +264 -0
- data/lib/metasm/x86_64/main.rb +135 -0
- data/lib/metasm/x86_64/opcodes.rb +118 -0
- data/lib/metasm/x86_64/parse.rb +68 -0
- data/misc/bottleneck.rb +61 -0
- data/misc/cheader-findpppath.rb +58 -0
- data/misc/hexdiff.rb +74 -0
- data/misc/hexdump.rb +55 -0
- data/misc/metasm-all.rb +13 -0
- data/misc/objdiff.rb +47 -0
- data/misc/objscan.rb +40 -0
- data/misc/pdfparse.rb +661 -0
- data/misc/ppc_pdf2oplist.rb +192 -0
- data/misc/tcp_proxy_hex.rb +84 -0
- data/misc/txt2html.rb +440 -0
- data/samples/a.out.rb +31 -0
- data/samples/asmsyntax.rb +77 -0
- data/samples/bindiff.rb +555 -0
- data/samples/compilation-steps.rb +49 -0
- data/samples/cparser_makestackoffset.rb +55 -0
- data/samples/dasm-backtrack.rb +38 -0
- data/samples/dasmnavig.rb +318 -0
- data/samples/dbg-apihook.rb +228 -0
- data/samples/dbghelp.rb +143 -0
- data/samples/disassemble-gui.rb +102 -0
- data/samples/disassemble.rb +133 -0
- data/samples/dump_upx.rb +95 -0
- data/samples/dynamic_ruby.rb +1929 -0
- data/samples/elf_list_needed.rb +46 -0
- data/samples/elf_listexports.rb +33 -0
- data/samples/elfencode.rb +25 -0
- data/samples/exeencode.rb +128 -0
- data/samples/factorize-headers-elfimports.rb +77 -0
- data/samples/factorize-headers-peimports.rb +109 -0
- data/samples/factorize-headers.rb +43 -0
- data/samples/gdbclient.rb +583 -0
- data/samples/generate_libsigs.rb +102 -0
- data/samples/hotfix_gtk_dbg.rb +59 -0
- data/samples/install_win_env.rb +78 -0
- data/samples/lindebug.rb +924 -0
- data/samples/linux_injectsyscall.rb +95 -0
- data/samples/machoencode.rb +31 -0
- data/samples/metasm-shell.rb +91 -0
- data/samples/pe-hook.rb +69 -0
- data/samples/pe-ia32-cpuid.rb +203 -0
- data/samples/pe-mips.rb +35 -0
- data/samples/pe-shutdown.rb +78 -0
- data/samples/pe-testrelocs.rb +51 -0
- data/samples/pe-testrsrc.rb +24 -0
- data/samples/pe_listexports.rb +31 -0
- data/samples/peencode.rb +19 -0
- data/samples/peldr.rb +494 -0
- data/samples/preprocess-flatten.rb +19 -0
- data/samples/r0trace.rb +308 -0
- data/samples/rubstop.rb +399 -0
- data/samples/scan_pt_gnu_stack.rb +54 -0
- data/samples/scanpeexports.rb +62 -0
- data/samples/shellcode-c.rb +40 -0
- data/samples/shellcode-dynlink.rb +146 -0
- data/samples/source.asm +34 -0
- data/samples/struct_offset.rb +47 -0
- data/samples/testpe.rb +32 -0
- data/samples/testraw.rb +45 -0
- data/samples/win32genloader.rb +132 -0
- data/samples/win32hooker-advanced.rb +169 -0
- data/samples/win32hooker.rb +96 -0
- data/samples/win32livedasm.rb +33 -0
- data/samples/win32remotescan.rb +133 -0
- data/samples/wintrace.rb +92 -0
- data/tests/all.rb +8 -0
- data/tests/dasm.rb +39 -0
- data/tests/dynldr.rb +35 -0
- data/tests/encodeddata.rb +132 -0
- data/tests/ia32.rb +82 -0
- data/tests/mips.rb +116 -0
- data/tests/parse_c.rb +239 -0
- data/tests/preprocessor.rb +269 -0
- data/tests/x86_64.rb +62 -0
- metadata +255 -0
@@ -0,0 +1,979 @@
|
|
1
|
+
# This file is part of Metasm, the Ruby assembly manipulation suite
|
2
|
+
# Copyright (C) 2006-2009 Yoann GUILLOT
|
3
|
+
#
|
4
|
+
# Licence is LGPL, see LICENCE in the top-level directory
|
5
|
+
|
6
|
+
|
7
|
+
require 'metasm/decode'
|
8
|
+
require 'metasm/exe_format/elf' unless defined? Metasm::ELF
|
9
|
+
|
10
|
+
module Metasm
|
11
|
+
class ELF
|
12
|
+
class Header
|
13
|
+
# hook the decode sequence, to fixup elf data based on info
|
14
|
+
# we have (endianness & xword size, needed in decode_word etc)
|
15
|
+
decode_hook(:type) { |elf, hdr|
|
16
|
+
raise InvalidExeFormat, "E: ELF: invalid ELF signature #{hdr.magic.inspect}" if hdr.magic != "\x7fELF"
|
17
|
+
|
18
|
+
case hdr.e_class
|
19
|
+
when '32'; elf.bitsize = 32
|
20
|
+
when '64', '64_icc'; elf.bitsize = 64
|
21
|
+
else raise InvalidExeFormat, "E: ELF: unsupported class #{hdr.e_class}"
|
22
|
+
end
|
23
|
+
|
24
|
+
case hdr.data
|
25
|
+
when 'LSB'; elf.endianness = :little
|
26
|
+
when 'MSB'; elf.endianness = :big
|
27
|
+
else raise InvalidExeFormat, "E: ELF: unsupported endianness #{hdr.data}"
|
28
|
+
end
|
29
|
+
|
30
|
+
if hdr.i_version != 'CURRENT'
|
31
|
+
raise InvalidExeFormat, "E: ELF: unsupported ELF version #{hdr.i_version}"
|
32
|
+
end
|
33
|
+
}
|
34
|
+
end
|
35
|
+
|
36
|
+
class Symbol
|
37
|
+
def decode(elf, strtab=nil)
|
38
|
+
super(elf)
|
39
|
+
@name = elf.readstr(strtab, @name_p) if strtab
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# basic immediates decoding functions
|
44
|
+
def decode_byte( edata = @encoded) edata.decode_imm(:u8, @endianness) end
|
45
|
+
def decode_half( edata = @encoded) edata.decode_imm(:u16, @endianness) end
|
46
|
+
def decode_word( edata = @encoded) edata.decode_imm(:u32, @endianness) end
|
47
|
+
def decode_sword(edata = @encoded) edata.decode_imm(:i32, @endianness) end
|
48
|
+
def decode_xword(edata = @encoded) edata.decode_imm((@bitsize == 32 ? :u32 : :u64), @endianness) end
|
49
|
+
def decode_sxword(edata= @encoded) edata.decode_imm((@bitsize == 32 ? :i32 : :i64), @endianness) end
|
50
|
+
alias decode_addr decode_xword
|
51
|
+
alias decode_off decode_xword
|
52
|
+
|
53
|
+
def readstr(str, off)
|
54
|
+
if off > 0 and i = str.index(?\0, off) rescue false # LoadedElf with arbitrary pointer...
|
55
|
+
str[off...i]
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# transforms a virtual address to a file offset, from mmaped segments addresses
|
60
|
+
def addr_to_off(addr)
|
61
|
+
s = @segments.find { |s_| s_.type == 'LOAD' and s_.vaddr <= addr and s_.vaddr + s_.memsz > addr } if addr
|
62
|
+
addr - s.vaddr + s.offset if s
|
63
|
+
end
|
64
|
+
|
65
|
+
# memory address -> file offset
|
66
|
+
# handles relocated LoadedELF
|
67
|
+
def addr_to_fileoff(addr)
|
68
|
+
la = module_address
|
69
|
+
la = (la == 0 ? (@load_address ||= 0) : 0)
|
70
|
+
addr_to_off(addr - la)
|
71
|
+
end
|
72
|
+
|
73
|
+
# file offset -> memory address
|
74
|
+
# handles relocated LoadedELF
|
75
|
+
def fileoff_to_addr(foff)
|
76
|
+
if s = @segments.find { |s_| s_.type == 'LOAD' and s_.offset <= foff and s_.offset + s_.filesz > foff }
|
77
|
+
la = module_address
|
78
|
+
la = (la == 0 ? (@load_address ||= 0) : 0)
|
79
|
+
s.vaddr + la + foff - s.offset
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
# return the address of a label
|
84
|
+
def label_addr(name)
|
85
|
+
if name.kind_of? Integer
|
86
|
+
name
|
87
|
+
elsif s = @segments.find { |s_| s_.encoded and s_.encoded.export[name] }
|
88
|
+
s.vaddr + s.encoded.export[name]
|
89
|
+
elsif o = @encoded.export[name] and s = @segments.find { |s_| s_.offset <= o and s_.offset + s_.filesz > o }
|
90
|
+
s.vaddr + o - s.offset
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
# make an export of +self.encoded+, returns the label name if successful
|
95
|
+
def add_label(name, addr)
|
96
|
+
if not o = addr_to_off(addr)
|
97
|
+
puts "W: Elf: #{name} points to unmmaped space #{'0x%08X' % addr}" if $VERBOSE
|
98
|
+
else
|
99
|
+
l = new_label(name)
|
100
|
+
@encoded.add_export l, o
|
101
|
+
end
|
102
|
+
l
|
103
|
+
end
|
104
|
+
|
105
|
+
# decodes the elf header, section & program header
|
106
|
+
def decode_header(off = 0, decode_phdr=true, decode_shdr=true)
|
107
|
+
@encoded.ptr = off
|
108
|
+
@header.decode self
|
109
|
+
raise InvalidExeFormat, "Invalid elf header size: #{@header.ehsize}" if Header.size(self) != @header.ehsize
|
110
|
+
if decode_phdr and @header.phoff != 0
|
111
|
+
decode_program_header(@header.phoff+off)
|
112
|
+
end
|
113
|
+
if decode_shdr and @header.shoff != 0
|
114
|
+
decode_section_header(@header.shoff+off)
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
# decodes the section header
|
119
|
+
# section names are read from shstrndx if possible
|
120
|
+
def decode_section_header(off = @header.shoff)
|
121
|
+
raise InvalidExeFormat, "Invalid elf section header size: #{@header.shentsize}" if Section.size(self) != @header.shentsize
|
122
|
+
@encoded.add_export new_label('section_header'), off
|
123
|
+
@encoded.ptr = off
|
124
|
+
@sections = []
|
125
|
+
@header.shnum.times { @sections << Section.decode(self) }
|
126
|
+
|
127
|
+
# read sections name
|
128
|
+
if @header.shstrndx != 0 and str = @sections[@header.shstrndx] and str.encoded = @encoded[str.offset, str.size]
|
129
|
+
# LoadedElf may not have shstr mmaped
|
130
|
+
@sections[1..-1].each { |s|
|
131
|
+
s.name = readstr(str.encoded.data, s.name_p)
|
132
|
+
add_label("section_#{s.name}", s.addr) if s.name and s.addr > 0
|
133
|
+
}
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
# decodes the program header table
|
138
|
+
# marks the elf entrypoint as an export of +self.encoded+
|
139
|
+
def decode_program_header(off = @header.phoff)
|
140
|
+
raise InvalidExeFormat, "Invalid elf program header size: #{@header.phentsize}" if Segment.size(self) != @header.phentsize
|
141
|
+
@encoded.add_export new_label('program_header'), off
|
142
|
+
@encoded.ptr = off
|
143
|
+
@segments = []
|
144
|
+
@header.phnum.times { @segments << Segment.decode(self) }
|
145
|
+
|
146
|
+
if @header.entry != 0
|
147
|
+
add_label('entrypoint', @header.entry)
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
# read the dynamic symbols hash table, and checks that every global and named symbol is accessible through it
|
152
|
+
# outputs a warning if it's not and $VERBOSE is set
|
153
|
+
def check_symbols_hash(off = @tag['HASH'])
|
154
|
+
return if not @encoded.ptr = off
|
155
|
+
|
156
|
+
hash_bucket_len = decode_word
|
157
|
+
sym_count = decode_word
|
158
|
+
|
159
|
+
hash_bucket = [] ; hash_bucket_len.times { hash_bucket << decode_word }
|
160
|
+
hash_table = [] ; sym_count.times { hash_table << decode_word }
|
161
|
+
|
162
|
+
@symbols.each { |s|
|
163
|
+
next if not s.name or s.bind != 'GLOBAL' or s.shndx == 'UNDEF'
|
164
|
+
|
165
|
+
found = false
|
166
|
+
h = ELF.hash_symbol_name(s.name)
|
167
|
+
off = hash_bucket[h % hash_bucket_len]
|
168
|
+
sym_count.times { # to avoid DoS by loop
|
169
|
+
break if off == 0
|
170
|
+
if ss = @symbols[off] and ss.name == s.name
|
171
|
+
found = true
|
172
|
+
break
|
173
|
+
end
|
174
|
+
off = hash_table[off]
|
175
|
+
}
|
176
|
+
if not found
|
177
|
+
puts "W: Elf: Symbol #{s.name.inspect} not found in hash table" if $VERBOSE
|
178
|
+
end
|
179
|
+
}
|
180
|
+
end
|
181
|
+
|
182
|
+
# checks every symbol's accessibility through the gnu_hash table
|
183
|
+
def check_symbols_gnu_hash(off = @tag['GNU_HASH'], just_get_count=false)
|
184
|
+
return if not @encoded.ptr = off
|
185
|
+
|
186
|
+
# when present: the symndx first symbols are not sorted (SECTION/LOCAL/FILE/etc) symtable[symndx] is sorted (1st sorted symbol)
|
187
|
+
# the sorted symbols are sorted by [gnu_hash_symbol_name(symbol.name) % hash_bucket_len]
|
188
|
+
hash_bucket_len = decode_word
|
189
|
+
symndx = decode_word # index of first sorted symbol in symtab
|
190
|
+
maskwords = decode_word # number of words in the second part of the ghash section (32 or 64 bits)
|
191
|
+
shift2 = decode_word # used in the bloom filter
|
192
|
+
|
193
|
+
bloomfilter = [] ; maskwords.times { bloomfilter << decode_xword }
|
194
|
+
# "bloomfilter[N] has bit B cleared if there is no M (M > symndx) which satisfies (C = @header.class)
|
195
|
+
# ((gnu_hash(sym[M].name) / C) % maskwords) == N &&
|
196
|
+
# ((gnu_hash(sym[M].name) % C) == B ||
|
197
|
+
# ((gnu_hash(sym[M].name) >> shift2) % C) == B"
|
198
|
+
# bloomfilter may be [~0]
|
199
|
+
if shift2
|
200
|
+
end
|
201
|
+
|
202
|
+
hash_bucket = [] ; hash_bucket_len.times { hash_bucket << decode_word }
|
203
|
+
# bucket[N] contains the lowest M for which
|
204
|
+
# gnu_hash(sym[M]) % nbuckets == N
|
205
|
+
# or 0 if none
|
206
|
+
|
207
|
+
hsymcount = 0
|
208
|
+
part4 = []
|
209
|
+
hash_bucket.each { |hmodidx|
|
210
|
+
# for each bucket, walk all the chain
|
211
|
+
# we do not walk the chains in hash_bucket order here, this
|
212
|
+
# is just to read all the part4 as we don't know
|
213
|
+
# beforehand the number of hashed symbols
|
214
|
+
next if hmodidx == 0 # no hash chain for this mod
|
215
|
+
loop do
|
216
|
+
fu = decode_word
|
217
|
+
hsymcount += 1
|
218
|
+
part4 << fu
|
219
|
+
break if fu & 1 == 1
|
220
|
+
end
|
221
|
+
}
|
222
|
+
|
223
|
+
# part4[N] contains
|
224
|
+
# (gnu_hash(sym[N].name) & ~1) | (N == dynsymcount-1 || (gnu_hash(sym[N].name) % nbucket) != (gnu_hash(sym[N+1].name) % nbucket))
|
225
|
+
# that's the hash, with its lower bit replaced by the bool [1 if i am the last sym having my hash as hash]
|
226
|
+
|
227
|
+
return hsymcount+symndx if just_get_count
|
228
|
+
|
229
|
+
# TODO
|
230
|
+
end
|
231
|
+
|
232
|
+
# read dynamic tags array
|
233
|
+
def decode_tags(off = nil)
|
234
|
+
if not off
|
235
|
+
if s = @segments.find { |s_| s_.type == 'DYNAMIC' }
|
236
|
+
# this way it also works with LoadedELF
|
237
|
+
off = addr_to_off(s.vaddr)
|
238
|
+
elsif s = @sections.find { |s_| s_.type == 'DYNAMIC' }
|
239
|
+
# if no DYNAMIC segment, assume we decode an ET_REL from file
|
240
|
+
off = s.offset
|
241
|
+
end
|
242
|
+
end
|
243
|
+
return if not @encoded.ptr = off
|
244
|
+
|
245
|
+
@tag = {}
|
246
|
+
loop do
|
247
|
+
tag = decode_sxword
|
248
|
+
val = decode_xword
|
249
|
+
if tag >= DYNAMIC_TAG_LOPROC and tag < DYNAMIC_TAG_HIPROC
|
250
|
+
tag = int_to_hash(tag-DYNAMIC_TAG_LOPROC, DYNAMIC_TAG_PROC[@header.machine] || {})
|
251
|
+
tag += DYNAMIC_TAG_LOPROC if tag.kind_of? Integer
|
252
|
+
else
|
253
|
+
tag = int_to_hash(tag, DYNAMIC_TAG)
|
254
|
+
end
|
255
|
+
case tag
|
256
|
+
when 'NULL'
|
257
|
+
@tag[tag] = val
|
258
|
+
break
|
259
|
+
when Integer
|
260
|
+
puts "W: Elf: unknown dynamic tag 0x#{tag.to_s 16}" if $VERBOSE
|
261
|
+
@tag[tag] ||= []
|
262
|
+
@tag[tag] << val
|
263
|
+
when 'NEEDED' # here, list of tags for which multiple occurences are allowed
|
264
|
+
@tag[tag] ||= []
|
265
|
+
@tag[tag] << val
|
266
|
+
when 'POSFLAG_1'
|
267
|
+
puts "W: Elf: ignoring dynamic tag modifier #{tag} #{int_to_hash(val, DYNAMIC_POSFLAG_1)}" if $VERBOSE
|
268
|
+
else
|
269
|
+
if @tag[tag]
|
270
|
+
puts "W: Elf: ignoring re-occurence of dynamic tag #{tag} (value #{'0x%08X' % val})" if $VERBOSE
|
271
|
+
else
|
272
|
+
@tag[tag] = val
|
273
|
+
end
|
274
|
+
end
|
275
|
+
end
|
276
|
+
end
|
277
|
+
|
278
|
+
# interprets tags (convert flags, arrays etc), mark them as self.encoded.export
|
279
|
+
def decode_segments_tags_interpret
|
280
|
+
if @tag['STRTAB']
|
281
|
+
if not sz = @tag['STRSZ']
|
282
|
+
puts "W: Elf: no string table size tag" if $VERBOSE
|
283
|
+
else
|
284
|
+
if l = add_label('dynamic_strtab', @tag['STRTAB'])
|
285
|
+
@tag['STRTAB'] = l
|
286
|
+
strtab = @encoded[l, sz].data
|
287
|
+
end
|
288
|
+
end
|
289
|
+
end
|
290
|
+
|
291
|
+
@tag.keys.each { |k|
|
292
|
+
case k
|
293
|
+
when Integer
|
294
|
+
when 'NEEDED'
|
295
|
+
# array of strings
|
296
|
+
if not strtab
|
297
|
+
puts "W: Elf: no string table, needed for tag #{k}" if $VERBOSE
|
298
|
+
next
|
299
|
+
end
|
300
|
+
@tag[k].map! { |v| readstr(strtab, v) }
|
301
|
+
when 'SONAME', 'RPATH', 'RUNPATH'
|
302
|
+
# string
|
303
|
+
if not strtab
|
304
|
+
puts "W: Elf: no string table, needed for tag #{k}" if $VERBOSE
|
305
|
+
next
|
306
|
+
end
|
307
|
+
@tag[k] = readstr(strtab, @tag[k])
|
308
|
+
when 'INIT', 'FINI', 'PLTGOT', 'HASH', 'GNU_HASH', 'SYMTAB', 'RELA', 'REL', 'JMPREL'
|
309
|
+
@tag[k] = add_label('dynamic_' + k.downcase, @tag[k]) || @tag[k]
|
310
|
+
when 'INIT_ARRAY', 'FINI_ARRAY', 'PREINIT_ARRAY'
|
311
|
+
next if not l = add_label('dynamic_' + k.downcase, @tag[k])
|
312
|
+
if not sz = @tag.delete(k+'SZ')
|
313
|
+
puts "W: Elf: tag #{k} has no corresponding size tag" if $VERBOSE
|
314
|
+
next
|
315
|
+
end
|
316
|
+
|
317
|
+
tab = @encoded[l, sz]
|
318
|
+
tab.ptr = 0
|
319
|
+
@tag[k] = []
|
320
|
+
while tab.ptr < tab.length
|
321
|
+
a = decode_addr(tab)
|
322
|
+
@tag[k] << (add_label("dynamic_#{k.downcase}_#{@tag[k].length}", a) || a)
|
323
|
+
end
|
324
|
+
when 'PLTREL'; @tag[k] = int_to_hash(@tag[k], DYNAMIC_TAG)
|
325
|
+
when 'FLAGS'; @tag[k] = bits_to_hash(@tag[k], DYNAMIC_FLAGS)
|
326
|
+
when 'FLAGS_1'; @tag[k] = bits_to_hash(@tag[k], DYNAMIC_FLAGS_1)
|
327
|
+
when 'FEATURES_1'; @tag[k] = bits_to_hash(@tag[k], DYNAMIC_FEATURES_1)
|
328
|
+
end
|
329
|
+
}
|
330
|
+
end
|
331
|
+
|
332
|
+
# marks a symbol as @encoded.export (from s.value, using segments or sections)
|
333
|
+
def decode_symbol_export(s)
|
334
|
+
if s.name and s.shndx != 'UNDEF' and %w[NOTYPE OBJECT FUNC].include?(s.type)
|
335
|
+
if @header.type == 'REL'
|
336
|
+
sec = @sections[s.shndx]
|
337
|
+
o = sec.offset + s.value
|
338
|
+
elsif not o = addr_to_off(s.value)
|
339
|
+
# allow to point to end of segment
|
340
|
+
if not seg = @segments.find { |seg_| seg_.type == 'LOAD' and seg_.vaddr + seg_.memsz == s.value } # check end
|
341
|
+
puts "W: Elf: symbol points to unmmaped space (#{s.inspect})" if $VERBOSE and s.shndx != 'ABS'
|
342
|
+
return
|
343
|
+
end
|
344
|
+
# LoadedELF would have returned an addr_to_off = addr
|
345
|
+
o = s.value - seg.vaddr + seg.offset
|
346
|
+
end
|
347
|
+
name = s.name
|
348
|
+
while @encoded.export[name] and @encoded.export[name] != o
|
349
|
+
puts "W: Elf: symbol #{name} already seen at #{'%X' % @encoded.export[name]} - now at #{'%X' % o}) (may be a different version definition)" if $VERBOSE
|
350
|
+
name += '_' # do not modify inplace
|
351
|
+
end
|
352
|
+
@encoded.add_export name, o
|
353
|
+
end
|
354
|
+
end
|
355
|
+
|
356
|
+
# read symbol table, and mark all symbols found as exports of self.encoded
|
357
|
+
# tables locations are found in self.tags
|
358
|
+
# XXX symbol count is found from the hash table, this may not work with GNU_HASH only binaries
|
359
|
+
def decode_segments_symbols
|
360
|
+
return unless @tag['STRTAB'] and @tag['STRSZ'] and @tag['SYMTAB'] and (@tag['HASH'] or @tag['GNU_HASH'])
|
361
|
+
|
362
|
+
raise "E: ELF: unsupported symbol entry size: #{@tag['SYMENT']}" if @tag['SYMENT'] != Symbol.size(self)
|
363
|
+
|
364
|
+
# find number of symbols
|
365
|
+
if @tag['HASH']
|
366
|
+
@encoded.ptr = @tag['HASH'] # assume tag already interpreted (would need addr_to_off otherwise)
|
367
|
+
decode_word
|
368
|
+
sym_count = decode_word
|
369
|
+
else
|
370
|
+
sym_count = check_symbols_gnu_hash(@tag['GNU_HASH'], true)
|
371
|
+
end
|
372
|
+
|
373
|
+
strtab = @encoded[@tag['STRTAB'], @tag['STRSZ']].data.to_str
|
374
|
+
|
375
|
+
@encoded.ptr = @tag['SYMTAB']
|
376
|
+
@symbols.clear
|
377
|
+
sym_count.times {
|
378
|
+
s = Symbol.decode(self, strtab)
|
379
|
+
@symbols << s
|
380
|
+
decode_symbol_export(s)
|
381
|
+
}
|
382
|
+
|
383
|
+
check_symbols_hash if $VERBOSE
|
384
|
+
check_symbols_gnu_hash if $VERBOSE
|
385
|
+
end
|
386
|
+
|
387
|
+
# decode SYMTAB sections
|
388
|
+
def decode_sections_symbols
|
389
|
+
@symbols ||= []
|
390
|
+
@sections.to_a.each { |sec|
|
391
|
+
next if sec.type != 'SYMTAB'
|
392
|
+
next if not strtab = @sections[sec.link]
|
393
|
+
strtab = @encoded[strtab.offset, strtab.size].data
|
394
|
+
@encoded.ptr = sec.offset
|
395
|
+
syms = []
|
396
|
+
raise 'Invalid symbol table' if sec.size > @encoded.length
|
397
|
+
(sec.size / Symbol.size(self)).times { syms << Symbol.decode(self, strtab) }
|
398
|
+
alreadysegs = true if @header.type == 'DYN' or @header.type == 'EXEC'
|
399
|
+
syms.each { |s|
|
400
|
+
if alreadysegs
|
401
|
+
# if we already decoded the symbols from the DYNAMIC segment,
|
402
|
+
# ignore dups and imports from this section
|
403
|
+
next if s.shndx == 'UNDEF'
|
404
|
+
next if @symbols.find { |ss| ss.name == s.name }
|
405
|
+
end
|
406
|
+
@symbols << s
|
407
|
+
decode_symbol_export(s)
|
408
|
+
}
|
409
|
+
}
|
410
|
+
end
|
411
|
+
|
412
|
+
# decode REL/RELA sections
|
413
|
+
def decode_sections_relocs
|
414
|
+
@relocations ||= []
|
415
|
+
@sections.to_a.each { |sec|
|
416
|
+
case sec.type
|
417
|
+
when 'REL'; relcls = Relocation
|
418
|
+
when 'RELA'; relcls = RelocationAddend
|
419
|
+
else next
|
420
|
+
end
|
421
|
+
startidx = @relocations.length
|
422
|
+
@encoded.ptr = sec.offset
|
423
|
+
while @encoded.ptr < sec.offset + sec.size
|
424
|
+
@relocations << relcls.decode(self)
|
425
|
+
end
|
426
|
+
|
427
|
+
# create edata relocs
|
428
|
+
tsec = @sections[sec.info]
|
429
|
+
relocproc = "arch_decode_segments_reloc_#{@header.machine.to_s.downcase}"
|
430
|
+
next if not respond_to? relocproc
|
431
|
+
new_label('pcrel')
|
432
|
+
@relocations[startidx..-1].each { |r|
|
433
|
+
o = @encoded.ptr = tsec.offset + r.offset
|
434
|
+
r = r.dup
|
435
|
+
l = new_label('pcrel')
|
436
|
+
r.offset = Expression[l]
|
437
|
+
if rel = send(relocproc, r)
|
438
|
+
@encoded.reloc[o] = rel
|
439
|
+
end
|
440
|
+
}
|
441
|
+
}
|
442
|
+
end
|
443
|
+
|
444
|
+
# decode relocation tables (REL, RELA, JMPREL) from @tags
|
445
|
+
def decode_segments_relocs
|
446
|
+
@relocations.clear
|
447
|
+
if @encoded.ptr = @tag['REL']
|
448
|
+
raise "E: ELF: unsupported rel entry size #{@tag['RELENT']}" if @tag['RELENT'] != Relocation.size(self)
|
449
|
+
p_end = @encoded.ptr + @tag['RELSZ']
|
450
|
+
while @encoded.ptr < p_end
|
451
|
+
@relocations << Relocation.decode(self)
|
452
|
+
end
|
453
|
+
end
|
454
|
+
|
455
|
+
if @encoded.ptr = @tag['RELA']
|
456
|
+
raise "E: ELF: unsupported rela entry size #{@tag['RELAENT'].inspect}" if @tag['RELAENT'] != RelocationAddend.size(self)
|
457
|
+
p_end = @encoded.ptr + @tag['RELASZ']
|
458
|
+
while @encoded.ptr < p_end
|
459
|
+
@relocations << RelocationAddend.decode(self)
|
460
|
+
end
|
461
|
+
end
|
462
|
+
|
463
|
+
if @encoded.ptr = @tag['JMPREL']
|
464
|
+
case reltype = @tag['PLTREL']
|
465
|
+
when 'REL'; relcls = Relocation
|
466
|
+
when 'RELA'; relcls = RelocationAddend
|
467
|
+
else raise "E: ELF: unsupported plt relocation type #{reltype}"
|
468
|
+
end
|
469
|
+
p_end = @encoded.ptr + @tag['PLTRELSZ']
|
470
|
+
while @encoded.ptr < p_end
|
471
|
+
@relocations << relcls.decode(self)
|
472
|
+
end
|
473
|
+
end
|
474
|
+
end
|
475
|
+
|
476
|
+
# use relocations as self.encoded.reloc
|
477
|
+
def decode_segments_relocs_interpret
|
478
|
+
relocproc = "arch_decode_segments_reloc_#{@header.machine.to_s.downcase}"
|
479
|
+
if not respond_to? relocproc
|
480
|
+
puts "W: Elf: relocs for arch #{@header.machine} unsupported" if $VERBOSE
|
481
|
+
return
|
482
|
+
end
|
483
|
+
@relocations.each { |r|
|
484
|
+
next if r.offset == 0
|
485
|
+
if not o = addr_to_off(r.offset)
|
486
|
+
puts "W: Elf: relocation in unmmaped space (#{r.inspect})" if $VERBOSE
|
487
|
+
next
|
488
|
+
end
|
489
|
+
if @encoded.reloc[o]
|
490
|
+
puts "W: Elf: not rerelocating address #{'%08X' % r.offset}" if $VERBOSE
|
491
|
+
next
|
492
|
+
end
|
493
|
+
@encoded.ptr = o
|
494
|
+
if rel = send(relocproc, r)
|
495
|
+
@encoded.reloc[o] = rel
|
496
|
+
end
|
497
|
+
}
|
498
|
+
|
499
|
+
if @header.machine == 'MIPS' and @tag['PLTGOT'] and @tag['GOTSYM'] and @tag['LOCAL_GOTNO']
|
500
|
+
puts "emulating mips PLT-like relocs" if $VERBOSE
|
501
|
+
wsz = @bitsize/8
|
502
|
+
dyntab = label_addr(@tag['PLTGOT']) - (@tag['GOTSYM'] - @tag['LOCAL_GOTNO']) * wsz
|
503
|
+
dt_o = addr_to_off(dyntab)
|
504
|
+
@symbols.each_with_index { |sym, i|
|
505
|
+
next if i < @tag['GOTSYM'] or not sym.name
|
506
|
+
r = Metasm::Relocation.new(Expression[sym.name], "u#@bitsize".to_sym, @endianness)
|
507
|
+
@encoded.reloc[dt_o + wsz*i] = r
|
508
|
+
}
|
509
|
+
end
|
510
|
+
end
|
511
|
+
|
512
|
+
# returns the Metasm::Relocation that should be applied for reloc
|
513
|
+
# self.encoded.ptr must point to the location that will be relocated (for implicit addends)
|
514
|
+
def arch_decode_segments_reloc_386(reloc)
|
515
|
+
if reloc.symbol and n = reloc.symbol.name and reloc.symbol.shndx == 'UNDEF' and @sections and
|
516
|
+
s = @sections.find { |s_| s_.name and s_.offset <= @encoded.ptr and s_.offset + s_.size > @encoded.ptr }
|
517
|
+
@encoded.add_export(new_label("#{s.name}_#{n}"), @encoded.ptr, true)
|
518
|
+
end
|
519
|
+
|
520
|
+
# decode addend if needed
|
521
|
+
case reloc.type
|
522
|
+
when 'NONE', 'COPY', 'GLOB_DAT', 'JMP_SLOT' # no addend
|
523
|
+
else addend = reloc.addend || decode_sword
|
524
|
+
end
|
525
|
+
|
526
|
+
case reloc.type
|
527
|
+
when 'NONE'
|
528
|
+
when 'RELATIVE'
|
529
|
+
# base = @segments.find_all { |s| s.type == 'LOAD' }.map { |s| s.vaddr }.min & 0xffff_f000
|
530
|
+
# compiled to be loaded at seg.vaddr
|
531
|
+
target = addend
|
532
|
+
if o = addr_to_off(target)
|
533
|
+
if not label = @encoded.inv_export[o]
|
534
|
+
label = new_label("xref_#{Expression[target]}")
|
535
|
+
@encoded.add_export label, o
|
536
|
+
end
|
537
|
+
target = label
|
538
|
+
else
|
539
|
+
puts "W: Elf: relocation pointing out of mmaped space #{reloc.inspect}" if $VERBOSE
|
540
|
+
end
|
541
|
+
when 'GLOB_DAT', 'JMP_SLOT', '32', 'PC32', 'TLS_TPOFF', 'TLS_TPOFF32'
|
542
|
+
# XXX use versionned version
|
543
|
+
# lazy jmp_slot ?
|
544
|
+
target = 0
|
545
|
+
target = reloc.symbol.name if reloc.symbol.kind_of?(Symbol) and reloc.symbol.name
|
546
|
+
target = Expression[target, :-, reloc.offset] if reloc.type == 'PC32'
|
547
|
+
target = Expression[target, :+, addend] if addend and addend != 0
|
548
|
+
target = Expression[target, :+, 'tlsoffset'] if reloc.type == 'TLS_TPOFF'
|
549
|
+
target = Expression[:-, [target, :+, 'tlsoffset']] if reloc.type == 'TLS_TPOFF32'
|
550
|
+
when 'COPY'
|
551
|
+
# mark the address pointed as a copy of the relocation target
|
552
|
+
if not reloc.symbol or not name = reloc.symbol.name
|
553
|
+
puts "W: Elf: symbol to COPY has no name: #{reloc.inspect}" if $VERBOSE
|
554
|
+
name = ''
|
555
|
+
end
|
556
|
+
name = new_label("copy_of_#{name}")
|
557
|
+
@encoded.add_export name, @encoded.ptr
|
558
|
+
target = nil
|
559
|
+
else
|
560
|
+
puts "W: Elf: unhandled 386 reloc #{reloc.inspect}" if $VERBOSE
|
561
|
+
target = nil
|
562
|
+
end
|
563
|
+
|
564
|
+
Metasm::Relocation.new(Expression[target], :u32, @endianness) if target
|
565
|
+
end
|
566
|
+
|
567
|
+
# returns the Metasm::Relocation that should be applied for reloc
|
568
|
+
# self.encoded.ptr must point to the location that will be relocated (for implicit addends)
|
569
|
+
def arch_decode_segments_reloc_mips(reloc)
|
570
|
+
if reloc.symbol and n = reloc.symbol.name and reloc.symbol.shndx == 'UNDEF' and @sections and
|
571
|
+
s = @sections.find { |s_| s_.name and s_.offset <= @encoded.ptr and s_.offset + s_.size > @encoded.ptr }
|
572
|
+
@encoded.add_export(new_label("#{s.name}_#{n}"), @encoded.ptr, true)
|
573
|
+
end
|
574
|
+
|
575
|
+
# decode addend if needed
|
576
|
+
case reloc.type
|
577
|
+
when 'NONE' # no addend
|
578
|
+
else addend = reloc.addend || decode_sword
|
579
|
+
end
|
580
|
+
|
581
|
+
case reloc.type
|
582
|
+
when 'NONE'
|
583
|
+
when '32', 'REL32'
|
584
|
+
target = 0
|
585
|
+
target = reloc.symbol.name if reloc.symbol.kind_of?(Symbol) and reloc.symbol.name
|
586
|
+
target = Expression[target, :-, reloc.offset] if reloc.type == 'REL32'
|
587
|
+
target = Expression[target, :+, addend] if addend and addend != 0
|
588
|
+
else
|
589
|
+
puts "W: Elf: unhandled MIPS reloc #{reloc.inspect}" if $VERBOSE
|
590
|
+
target = nil
|
591
|
+
end
|
592
|
+
|
593
|
+
Metasm::Relocation.new(Expression[target], :u32, @endianness) if target
|
594
|
+
end
|
595
|
+
|
596
|
+
# returns the Metasm::Relocation that should be applied for reloc
|
597
|
+
# self.encoded.ptr must point to the location that will be relocated (for implicit addends)
|
598
|
+
def arch_decode_segments_reloc_x86_64(reloc)
|
599
|
+
if reloc.symbol and n = reloc.symbol.name and reloc.symbol.shndx == 'UNDEF' and @sections and
|
600
|
+
s = @sections.find { |s_| s_.name and s_.offset <= @encoded.ptr and s_.offset + s_.size > @encoded.ptr }
|
601
|
+
@encoded.add_export(new_label("#{s.name}_#{n}"), @encoded.ptr, true)
|
602
|
+
end
|
603
|
+
|
604
|
+
# decode addend if needed
|
605
|
+
case reloc.type
|
606
|
+
when 'NONE' # no addend
|
607
|
+
when '32', 'PC32'; addend = reloc.addend || decode_sword
|
608
|
+
else addend = reloc.addend || decode_sxword
|
609
|
+
end
|
610
|
+
|
611
|
+
sz = :u64
|
612
|
+
case reloc.type
|
613
|
+
when 'NONE'
|
614
|
+
when 'RELATIVE'
|
615
|
+
# base = @segments.find_all { |s| s.type == 'LOAD' }.map { |s| s.vaddr }.min & 0xffff_f000
|
616
|
+
# compiled to be loaded at seg.vaddr
|
617
|
+
target = addend
|
618
|
+
if o = addr_to_off(target)
|
619
|
+
if not label = @encoded.inv_export[o]
|
620
|
+
label = new_label("xref_#{Expression[target]}")
|
621
|
+
@encoded.add_export label, o
|
622
|
+
end
|
623
|
+
target = label
|
624
|
+
else
|
625
|
+
puts "W: Elf: relocation pointing out of mmaped space #{reloc.inspect}" if $VERBOSE
|
626
|
+
end
|
627
|
+
when 'GLOB_DAT', 'JMP_SLOT', '64', 'PC64', '32', 'PC32'
|
628
|
+
# XXX use versionned version
|
629
|
+
# lazy jmp_slot ?
|
630
|
+
target = 0
|
631
|
+
target = reloc.symbol.name if reloc.symbol.kind_of?(Symbol) and reloc.symbol.name
|
632
|
+
target = Expression[target, :-, reloc.offset] if reloc.type == 'PC64' or reloc.type == 'PC32'
|
633
|
+
target = Expression[target, :+, addend] if addend and addend != 0
|
634
|
+
sz = :u32 if reloc.type == '32' or reloc.type == 'PC32'
|
635
|
+
when 'COPY'
|
636
|
+
# mark the address pointed as a copy of the relocation target
|
637
|
+
if not reloc.symbol or not name = reloc.symbol.name
|
638
|
+
puts "W: Elf: symbol to COPY has no name: #{reloc.inspect}" if $VERBOSE
|
639
|
+
name = ''
|
640
|
+
end
|
641
|
+
name = new_label("copy_of_#{name}")
|
642
|
+
@encoded.add_export name, @encoded.ptr
|
643
|
+
target = nil
|
644
|
+
else
|
645
|
+
puts "W: Elf: unhandled X86_64 reloc #{reloc.inspect}" if $VERBOSE
|
646
|
+
target = nil
|
647
|
+
end
|
648
|
+
|
649
|
+
Metasm::Relocation.new(Expression[target], sz, @endianness) if target
|
650
|
+
end
|
651
|
+
|
652
|
+
class DwarfDebug
|
653
|
+
# decode a DWARF2 'compilation unit'
|
654
|
+
def decode(elf, info, abbrev, str)
|
655
|
+
super(elf, info)
|
656
|
+
len = @cu_len-7 # @cu_len is size from end of @cu_len field, so we substract ptsz/tag/abroff
|
657
|
+
info.ptr += len # advance for caller
|
658
|
+
info = info[info.ptr-len, len] # we'll work on our segment
|
659
|
+
abbrev.ptr = @abbrev_off
|
660
|
+
|
661
|
+
return if abbrev.ptr >= abbrev.length or info.ptr >= info.length
|
662
|
+
|
663
|
+
idx_abbroff = {}
|
664
|
+
|
665
|
+
# returns a list of siblings at current abbrev.ptr
|
666
|
+
decode_tree = lambda { |parent|
|
667
|
+
siblings = []
|
668
|
+
loop {
|
669
|
+
info_idx = elf.decode_leb(info)
|
670
|
+
break siblings if info_idx == 0
|
671
|
+
abbrev.ptr = idx_abbroff[info_idx] if idx_abbroff[info_idx]
|
672
|
+
idx_abbroff[info_idx] ||= abbrev.ptr
|
673
|
+
n = DwarfDebug::Node.decode(elf, info, abbrev, str, idx_abbroff)
|
674
|
+
idx_abbroff[info_idx+1] ||= abbrev.ptr
|
675
|
+
siblings << n
|
676
|
+
n.children = decode_tree[n] if n.has_child == 1
|
677
|
+
n.parent = parent
|
678
|
+
break n if not parent
|
679
|
+
}
|
680
|
+
}
|
681
|
+
@tree = decode_tree[nil]
|
682
|
+
end
|
683
|
+
|
684
|
+
class Node
|
685
|
+
def decode(elf, info, abbrev, str, idx_abbroff)
|
686
|
+
super(elf, abbrev)
|
687
|
+
return if @index == 0
|
688
|
+
@attributes = []
|
689
|
+
loop {
|
690
|
+
a = Attribute.decode(elf, abbrev)
|
691
|
+
break if a.attr == 0 and a.form == 0
|
692
|
+
if a.form == 'INDIRECT' # actual form tag is stored in info
|
693
|
+
a.form = elf.decode_leb(info)
|
694
|
+
a.form = DWARF_FORM[a.form] || a.form # XXX INDIRECT again ?
|
695
|
+
end
|
696
|
+
a.data = case a.form
|
697
|
+
when 'ADDR'; elf.decode_xword(info) # should use dbg.ptr_sz
|
698
|
+
when 'DATA1', 'REF1', 'BLOCK1', 'FLAG'; elf.decode_byte(info)
|
699
|
+
when 'DATA2', 'REF2', 'BLOCK2'; elf.decode_half(info)
|
700
|
+
when 'DATA4', 'REF4', 'BLOCK4'; elf.decode_word(info)
|
701
|
+
when 'DATA8', 'REF8', 'BLOCK8'; elf.decode_word(info) | (elf.decode_word(info) << 32)
|
702
|
+
when 'SDATA', 'UDATA', 'REF_UDATA', 'BLOCK'; elf.decode_leb(info)
|
703
|
+
when 'STRING'; elf.decode_strz(info)
|
704
|
+
when 'STRP'; str.ptr = elf.decode_word(info) ; elf.decode_strz(str)
|
705
|
+
end
|
706
|
+
case a.form
|
707
|
+
when /^REF/
|
708
|
+
when /^BLOCK/; a.data = info.read(a.data)
|
709
|
+
end
|
710
|
+
@attributes << a
|
711
|
+
}
|
712
|
+
end
|
713
|
+
end
|
714
|
+
end
|
715
|
+
|
716
|
+
# decode an ULEB128 (dwarf2): read bytes while high bit is set, littleendian
|
717
|
+
def decode_leb(ed = @encoded)
|
718
|
+
v = s = 0
|
719
|
+
loop {
|
720
|
+
b = ed.read(1).unpack('C').first.to_i
|
721
|
+
v |= (b & 0x7f) << s
|
722
|
+
s += 7
|
723
|
+
break v if (b&0x80) == 0
|
724
|
+
}
|
725
|
+
end
|
726
|
+
|
727
|
+
# decodes the debugging information if available
|
728
|
+
# only a subset of DWARF2/3 is handled right now
|
729
|
+
# most info taken from http://ratonland.org/?entry=39 & libdwarf/dwarf.h
|
730
|
+
def decode_debug
|
731
|
+
return if not @sections
|
732
|
+
|
733
|
+
# assert presence of DWARF sections
|
734
|
+
info = @sections.find { |sec| sec.name == '.debug_info' }
|
735
|
+
abbrev = @sections.find { |sec| sec.name == '.debug_abbrev' }
|
736
|
+
str = @sections.find { |sec| sec.name == '.debug_str' }
|
737
|
+
return if not info or not abbrev
|
738
|
+
|
739
|
+
# section -> content
|
740
|
+
info = @encoded[info.offset, info.size]
|
741
|
+
abbrev = @encoded[abbrev.offset, abbrev.size]
|
742
|
+
str = @encoded[str.offset, str.size] if str
|
743
|
+
|
744
|
+
@debug = []
|
745
|
+
|
746
|
+
while info.ptr < info.length
|
747
|
+
@debug << DwarfDebug.decode(self, info, abbrev, str)
|
748
|
+
end
|
749
|
+
end
|
750
|
+
|
751
|
+
# decodes the ELF dynamic tags, interpret them, and decodes symbols and relocs
|
752
|
+
def decode_segments_dynamic
|
753
|
+
return if not dynamic = @segments.find { |s| s.type == 'DYNAMIC' }
|
754
|
+
@encoded.ptr = add_label('dynamic_tags', dynamic.vaddr)
|
755
|
+
decode_tags
|
756
|
+
decode_segments_tags_interpret
|
757
|
+
decode_segments_symbols
|
758
|
+
decode_segments_relocs
|
759
|
+
decode_segments_relocs_interpret
|
760
|
+
end
|
761
|
+
|
762
|
+
# decodes the dynamic segment, fills segments.encoded
|
763
|
+
def decode_segments
|
764
|
+
decode_segments_dynamic
|
765
|
+
decode_sections_symbols
|
766
|
+
#decode_debug # too many info, decode on demand
|
767
|
+
@segments.each { |s|
|
768
|
+
case s.type
|
769
|
+
when 'LOAD', 'INTERP'
|
770
|
+
sz = s.filesz
|
771
|
+
pagepad = (-(s.offset + sz)) % 4096
|
772
|
+
s.encoded = @encoded[s.offset, sz] || EncodedData.new
|
773
|
+
if s.type == 'LOAD' and sz > 0 and not s.flags.include?('W')
|
774
|
+
# align loaded data to the next page boundary for readonly mmap
|
775
|
+
# but discard the labels/relocs etc
|
776
|
+
s.encoded << @encoded[s.offset+sz, pagepad].data rescue nil
|
777
|
+
s.encoded.virtsize = sz+pagepad
|
778
|
+
end
|
779
|
+
s.encoded.virtsize = s.memsz if s.memsz > s.encoded.virtsize
|
780
|
+
end
|
781
|
+
}
|
782
|
+
end
|
783
|
+
|
784
|
+
# decodes sections, interprets symbols/relocs, fills sections.encoded
|
785
|
+
def decode_sections
|
786
|
+
decode_sections_symbols
|
787
|
+
decode_sections_relocs
|
788
|
+
@sections.each { |s|
|
789
|
+
case s.type
|
790
|
+
when 'PROGBITS', 'NOBITS'
|
791
|
+
when 'TODO' # TODO
|
792
|
+
end
|
793
|
+
}
|
794
|
+
@sections.find_all { |s| s.type == 'PROGBITS' or s.type == 'NOBITS' }.each { |s|
|
795
|
+
if s.flags.include? 'ALLOC'
|
796
|
+
if s.type == 'NOBITS'
|
797
|
+
s.encoded = EncodedData.new '', :virtsize => s.size
|
798
|
+
else
|
799
|
+
s.encoded = @encoded[s.offset, s.size] || EncodedData.new
|
800
|
+
s.encoded.virtsize = s.size
|
801
|
+
end
|
802
|
+
end
|
803
|
+
}
|
804
|
+
end
|
805
|
+
|
806
|
+
def decode_exports
|
807
|
+
decode_segments_dynamic
|
808
|
+
end
|
809
|
+
|
810
|
+
# decodes the elf header, and depending on the elf type, decode segments or sections
|
811
|
+
def decode
|
812
|
+
decode_header
|
813
|
+
case @header.type
|
814
|
+
when 'DYN', 'EXEC'; decode_segments
|
815
|
+
when 'REL'; decode_sections
|
816
|
+
when 'CORE'
|
817
|
+
end
|
818
|
+
end
|
819
|
+
|
820
|
+
def each_section
|
821
|
+
@segments.each { |s| yield s.encoded, s.vaddr if s.type == 'LOAD' }
|
822
|
+
return if @header.type != 'REL'
|
823
|
+
@sections.each { |s|
|
824
|
+
next if not s.encoded
|
825
|
+
l = new_label(s.name)
|
826
|
+
s.encoded.add_export l, 0
|
827
|
+
yield s.encoded, l
|
828
|
+
}
|
829
|
+
end
|
830
|
+
|
831
|
+
# returns a metasm CPU object corresponding to +header.machine+
|
832
|
+
def cpu_from_headers
|
833
|
+
case @header.machine
|
834
|
+
when 'X86_64'; X86_64.new
|
835
|
+
when '386'; Ia32.new
|
836
|
+
when 'MIPS'; MIPS.new @endianness
|
837
|
+
when 'PPC'; PPC.new
|
838
|
+
when 'ARM'; ARM.new
|
839
|
+
else raise "unsupported cpu #{@header.machine}"
|
840
|
+
end
|
841
|
+
end
|
842
|
+
|
843
|
+
# returns an array including the ELF entrypoint (if not null) and the FUNC symbols addresses
|
844
|
+
# TODO include init/init_array
|
845
|
+
def get_default_entrypoints
|
846
|
+
ep = []
|
847
|
+
ep << @header.entry if @header.entry != 0
|
848
|
+
@symbols.each { |s|
|
849
|
+
ep << s.value if s.shndx != 'UNDEF' and s.type == 'FUNC'
|
850
|
+
} if @symbols
|
851
|
+
ep
|
852
|
+
end
|
853
|
+
|
854
|
+
def dump_section_header(addr, edata)
|
855
|
+
if s = @segments.find { |s_| s_.vaddr == addr }
|
856
|
+
"\n// ELF segment at #{Expression[addr]}, flags = #{s.flags.sort.join(', ')}"
|
857
|
+
else super(addr, edata)
|
858
|
+
end
|
859
|
+
end
|
860
|
+
|
861
|
+
# returns a disassembler with a special decodedfunction for dlsym, __libc_start_main, and a default function (i386 only)
|
862
|
+
def init_disassembler
|
863
|
+
d = super()
|
864
|
+
d.backtrace_maxblocks_data = 4
|
865
|
+
if d.get_section_at(0)
|
866
|
+
# fixes call [constructor] => 0
|
867
|
+
d.decoded[0] = true
|
868
|
+
d.function[0] = @cpu.disassembler_default_func
|
869
|
+
end
|
870
|
+
case @cpu.shortname
|
871
|
+
when 'ia32', 'x64'
|
872
|
+
old_cp = d.c_parser
|
873
|
+
d.c_parser = nil
|
874
|
+
d.parse_c <<EOC
|
875
|
+
void *dlsym(int, char *); // has special callback
|
876
|
+
// gcc's entrypoint, need pointers to reach main exe code (last callback)
|
877
|
+
void __libc_start_main(void(*)(), int, int, void(*)(), void(*)()) __attribute__((noreturn));
|
878
|
+
// standard noreturn, optimized by gcc
|
879
|
+
void __attribute__((noreturn)) exit(int);
|
880
|
+
void _exit __attribute__((noreturn))(int);
|
881
|
+
void abort(void) __attribute__((noreturn));
|
882
|
+
void __stack_chk_fail __attribute__((noreturn))(void);
|
883
|
+
EOC
|
884
|
+
d.function[Expression['dlsym']] = dls = @cpu.decode_c_function_prototype(d.c_parser, 'dlsym')
|
885
|
+
d.function[Expression['__libc_start_main']] = @cpu.decode_c_function_prototype(d.c_parser, '__libc_start_main')
|
886
|
+
d.function[Expression['exit']] = @cpu.decode_c_function_prototype(d.c_parser, 'exit')
|
887
|
+
d.function[Expression['_exit']] = @cpu.decode_c_function_prototype(d.c_parser, '_exit')
|
888
|
+
d.function[Expression['abort']] = @cpu.decode_c_function_prototype(d.c_parser, 'abort')
|
889
|
+
d.function[Expression['__stack_chk_fail']] = @cpu.decode_c_function_prototype(d.c_parser, '__stack_chk_fail')
|
890
|
+
d.c_parser = old_cp
|
891
|
+
dls.btbind_callback = lambda { |dasm, bind, funcaddr, calladdr, expr, origin, maxdepth|
|
892
|
+
sz = @cpu.size/8
|
893
|
+
raise 'dlsym call error' if not dasm.decoded[calladdr]
|
894
|
+
if @cpu.shortname == 'x64'
|
895
|
+
arg2 = :rsi
|
896
|
+
else
|
897
|
+
arg2 = Indirection.new(Expression[:esp, :+, 2*sz], sz, calladdr)
|
898
|
+
end
|
899
|
+
fnaddr = dasm.backtrace(arg2, calladdr, :include_start => true, :maxdepth => maxdepth)
|
900
|
+
if fnaddr.kind_of? ::Array and fnaddr.length == 1 and s = dasm.get_section_at(fnaddr.first) and fn = s[0].read(64) and i = fn.index(?\0) and i > sz # try to avoid ordinals
|
901
|
+
bind = bind.merge @cpu.register_symbols[0] => Expression[fn[0, i]]
|
902
|
+
end
|
903
|
+
bind
|
904
|
+
}
|
905
|
+
df = d.function[:default] = @cpu.disassembler_default_func
|
906
|
+
df.backtrace_binding[@cpu.register_symbols[4]] = Expression[@cpu.register_symbols[4], :+, @cpu.size/8]
|
907
|
+
df.btbind_callback = nil
|
908
|
+
when 'mips'
|
909
|
+
(d.address_binding[@header.entry] ||= {})[:$t9] ||= Expression[@header.entry]
|
910
|
+
@symbols.each { |s|
|
911
|
+
next if s.shndx == 'UNDEF' or s.type != 'FUNC'
|
912
|
+
(d.address_binding[s.value] ||= {})[:$t9] ||= Expression[s.value]
|
913
|
+
}
|
914
|
+
d.function[:default] = @cpu.disassembler_default_func
|
915
|
+
end
|
916
|
+
d
|
917
|
+
end
|
918
|
+
|
919
|
+
# returns an array of [name, addr, length, info]
|
920
|
+
def section_info
|
921
|
+
if @sections
|
922
|
+
@sections[1..-1].map { |s|
|
923
|
+
[s.name, s.addr, s.size, s.flags.join(',')]
|
924
|
+
}
|
925
|
+
else
|
926
|
+
@segments.map { |s|
|
927
|
+
[nil, s.vaddr, s.memsz, s.flags.join(',')]
|
928
|
+
}
|
929
|
+
end
|
930
|
+
end
|
931
|
+
|
932
|
+
def module_name
|
933
|
+
@tag and @tag['SONAME']
|
934
|
+
end
|
935
|
+
|
936
|
+
def module_address
|
937
|
+
@segments.map { |s_| s_.vaddr if s_.type == 'LOAD' }.compact.min || 0
|
938
|
+
end
|
939
|
+
|
940
|
+
def module_size
|
941
|
+
return 0 if not s = @segments.to_a.reverse.map { |s_| s_.vaddr + s_.memsz if s_.type == 'LOAD' }.compact.max
|
942
|
+
s - module_address
|
943
|
+
end
|
944
|
+
|
945
|
+
def module_symbols
|
946
|
+
syms = []
|
947
|
+
m_addr = module_address
|
948
|
+
syms << ['entrypoint', @header.entry-m_addr] if @header.entry != 0 or @header.type == 'EXEC'
|
949
|
+
@symbols.each { |s|
|
950
|
+
next if not s.name or s.shndx == 'UNDEF'
|
951
|
+
pfx = %w[LOCAL WEAK].include?(s.bind) ? s.bind.downcase + '_' : ''
|
952
|
+
syms << [pfx+s.name, s.value-m_addr, s.size]
|
953
|
+
}
|
954
|
+
syms
|
955
|
+
end
|
956
|
+
end
|
957
|
+
|
958
|
+
class LoadedELF
|
959
|
+
# decodes the dynamic segment, fills segments.encoded
|
960
|
+
def decode_segments
|
961
|
+
if @load_address == 0 and @segments.find { |s| s.type == 'LOAD' and s.vaddr > @encoded.length }
|
962
|
+
@load_address = @segments.find_all { |s| s.type == 'LOAD' }.map { |s| s.vaddr }.min
|
963
|
+
end
|
964
|
+
decode_segments_dynamic
|
965
|
+
@segments.each { |s|
|
966
|
+
if s.type == 'LOAD'
|
967
|
+
s.encoded = @encoded[addr_to_off(s.vaddr), s.memsz]
|
968
|
+
end
|
969
|
+
}
|
970
|
+
end
|
971
|
+
|
972
|
+
# do not try to decode the section header by default
|
973
|
+
def decode_header(off = 0)
|
974
|
+
@encoded.ptr = off
|
975
|
+
@header.decode self
|
976
|
+
decode_program_header(@header.phoff+off)
|
977
|
+
end
|
978
|
+
end
|
979
|
+
end
|