metasm 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (192) hide show
  1. data/BUGS +11 -0
  2. data/CREDITS +17 -0
  3. data/README +270 -0
  4. data/TODO +114 -0
  5. data/doc/code_organisation.txt +146 -0
  6. data/doc/const_missing.txt +16 -0
  7. data/doc/core_classes.txt +75 -0
  8. data/doc/feature_list.txt +53 -0
  9. data/doc/index.txt +59 -0
  10. data/doc/install_notes.txt +170 -0
  11. data/doc/style.css +3 -0
  12. data/doc/use_cases.txt +18 -0
  13. data/lib/metasm.rb +80 -0
  14. data/lib/metasm/arm.rb +12 -0
  15. data/lib/metasm/arm/debug.rb +39 -0
  16. data/lib/metasm/arm/decode.rb +167 -0
  17. data/lib/metasm/arm/encode.rb +77 -0
  18. data/lib/metasm/arm/main.rb +75 -0
  19. data/lib/metasm/arm/opcodes.rb +177 -0
  20. data/lib/metasm/arm/parse.rb +130 -0
  21. data/lib/metasm/arm/render.rb +55 -0
  22. data/lib/metasm/compile_c.rb +1457 -0
  23. data/lib/metasm/dalvik.rb +8 -0
  24. data/lib/metasm/dalvik/decode.rb +196 -0
  25. data/lib/metasm/dalvik/main.rb +60 -0
  26. data/lib/metasm/dalvik/opcodes.rb +366 -0
  27. data/lib/metasm/decode.rb +213 -0
  28. data/lib/metasm/decompile.rb +2659 -0
  29. data/lib/metasm/disassemble.rb +2068 -0
  30. data/lib/metasm/disassemble_api.rb +1280 -0
  31. data/lib/metasm/dynldr.rb +1329 -0
  32. data/lib/metasm/encode.rb +333 -0
  33. data/lib/metasm/exe_format/a_out.rb +194 -0
  34. data/lib/metasm/exe_format/autoexe.rb +82 -0
  35. data/lib/metasm/exe_format/bflt.rb +189 -0
  36. data/lib/metasm/exe_format/coff.rb +455 -0
  37. data/lib/metasm/exe_format/coff_decode.rb +901 -0
  38. data/lib/metasm/exe_format/coff_encode.rb +1078 -0
  39. data/lib/metasm/exe_format/dex.rb +457 -0
  40. data/lib/metasm/exe_format/dol.rb +145 -0
  41. data/lib/metasm/exe_format/elf.rb +923 -0
  42. data/lib/metasm/exe_format/elf_decode.rb +979 -0
  43. data/lib/metasm/exe_format/elf_encode.rb +1375 -0
  44. data/lib/metasm/exe_format/macho.rb +827 -0
  45. data/lib/metasm/exe_format/main.rb +228 -0
  46. data/lib/metasm/exe_format/mz.rb +164 -0
  47. data/lib/metasm/exe_format/nds.rb +172 -0
  48. data/lib/metasm/exe_format/pe.rb +437 -0
  49. data/lib/metasm/exe_format/serialstruct.rb +246 -0
  50. data/lib/metasm/exe_format/shellcode.rb +114 -0
  51. data/lib/metasm/exe_format/xcoff.rb +167 -0
  52. data/lib/metasm/gui.rb +23 -0
  53. data/lib/metasm/gui/cstruct.rb +373 -0
  54. data/lib/metasm/gui/dasm_coverage.rb +199 -0
  55. data/lib/metasm/gui/dasm_decomp.rb +369 -0
  56. data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
  57. data/lib/metasm/gui/dasm_graph.rb +1354 -0
  58. data/lib/metasm/gui/dasm_hex.rb +543 -0
  59. data/lib/metasm/gui/dasm_listing.rb +599 -0
  60. data/lib/metasm/gui/dasm_main.rb +906 -0
  61. data/lib/metasm/gui/dasm_opcodes.rb +291 -0
  62. data/lib/metasm/gui/debug.rb +1228 -0
  63. data/lib/metasm/gui/gtk.rb +884 -0
  64. data/lib/metasm/gui/qt.rb +495 -0
  65. data/lib/metasm/gui/win32.rb +3004 -0
  66. data/lib/metasm/gui/x11.rb +621 -0
  67. data/lib/metasm/ia32.rb +14 -0
  68. data/lib/metasm/ia32/compile_c.rb +1523 -0
  69. data/lib/metasm/ia32/debug.rb +193 -0
  70. data/lib/metasm/ia32/decode.rb +1167 -0
  71. data/lib/metasm/ia32/decompile.rb +564 -0
  72. data/lib/metasm/ia32/encode.rb +314 -0
  73. data/lib/metasm/ia32/main.rb +233 -0
  74. data/lib/metasm/ia32/opcodes.rb +872 -0
  75. data/lib/metasm/ia32/parse.rb +327 -0
  76. data/lib/metasm/ia32/render.rb +91 -0
  77. data/lib/metasm/main.rb +1193 -0
  78. data/lib/metasm/mips.rb +11 -0
  79. data/lib/metasm/mips/compile_c.rb +7 -0
  80. data/lib/metasm/mips/decode.rb +253 -0
  81. data/lib/metasm/mips/encode.rb +51 -0
  82. data/lib/metasm/mips/main.rb +72 -0
  83. data/lib/metasm/mips/opcodes.rb +443 -0
  84. data/lib/metasm/mips/parse.rb +51 -0
  85. data/lib/metasm/mips/render.rb +43 -0
  86. data/lib/metasm/os/gnu_exports.rb +270 -0
  87. data/lib/metasm/os/linux.rb +1112 -0
  88. data/lib/metasm/os/main.rb +1686 -0
  89. data/lib/metasm/os/remote.rb +527 -0
  90. data/lib/metasm/os/windows.rb +2027 -0
  91. data/lib/metasm/os/windows_exports.rb +745 -0
  92. data/lib/metasm/parse.rb +876 -0
  93. data/lib/metasm/parse_c.rb +3938 -0
  94. data/lib/metasm/pic16c/decode.rb +42 -0
  95. data/lib/metasm/pic16c/main.rb +17 -0
  96. data/lib/metasm/pic16c/opcodes.rb +68 -0
  97. data/lib/metasm/ppc.rb +11 -0
  98. data/lib/metasm/ppc/decode.rb +264 -0
  99. data/lib/metasm/ppc/decompile.rb +251 -0
  100. data/lib/metasm/ppc/encode.rb +51 -0
  101. data/lib/metasm/ppc/main.rb +129 -0
  102. data/lib/metasm/ppc/opcodes.rb +410 -0
  103. data/lib/metasm/ppc/parse.rb +52 -0
  104. data/lib/metasm/preprocessor.rb +1277 -0
  105. data/lib/metasm/render.rb +130 -0
  106. data/lib/metasm/sh4.rb +8 -0
  107. data/lib/metasm/sh4/decode.rb +336 -0
  108. data/lib/metasm/sh4/main.rb +292 -0
  109. data/lib/metasm/sh4/opcodes.rb +381 -0
  110. data/lib/metasm/x86_64.rb +12 -0
  111. data/lib/metasm/x86_64/compile_c.rb +1025 -0
  112. data/lib/metasm/x86_64/debug.rb +59 -0
  113. data/lib/metasm/x86_64/decode.rb +268 -0
  114. data/lib/metasm/x86_64/encode.rb +264 -0
  115. data/lib/metasm/x86_64/main.rb +135 -0
  116. data/lib/metasm/x86_64/opcodes.rb +118 -0
  117. data/lib/metasm/x86_64/parse.rb +68 -0
  118. data/misc/bottleneck.rb +61 -0
  119. data/misc/cheader-findpppath.rb +58 -0
  120. data/misc/hexdiff.rb +74 -0
  121. data/misc/hexdump.rb +55 -0
  122. data/misc/metasm-all.rb +13 -0
  123. data/misc/objdiff.rb +47 -0
  124. data/misc/objscan.rb +40 -0
  125. data/misc/pdfparse.rb +661 -0
  126. data/misc/ppc_pdf2oplist.rb +192 -0
  127. data/misc/tcp_proxy_hex.rb +84 -0
  128. data/misc/txt2html.rb +440 -0
  129. data/samples/a.out.rb +31 -0
  130. data/samples/asmsyntax.rb +77 -0
  131. data/samples/bindiff.rb +555 -0
  132. data/samples/compilation-steps.rb +49 -0
  133. data/samples/cparser_makestackoffset.rb +55 -0
  134. data/samples/dasm-backtrack.rb +38 -0
  135. data/samples/dasmnavig.rb +318 -0
  136. data/samples/dbg-apihook.rb +228 -0
  137. data/samples/dbghelp.rb +143 -0
  138. data/samples/disassemble-gui.rb +102 -0
  139. data/samples/disassemble.rb +133 -0
  140. data/samples/dump_upx.rb +95 -0
  141. data/samples/dynamic_ruby.rb +1929 -0
  142. data/samples/elf_list_needed.rb +46 -0
  143. data/samples/elf_listexports.rb +33 -0
  144. data/samples/elfencode.rb +25 -0
  145. data/samples/exeencode.rb +128 -0
  146. data/samples/factorize-headers-elfimports.rb +77 -0
  147. data/samples/factorize-headers-peimports.rb +109 -0
  148. data/samples/factorize-headers.rb +43 -0
  149. data/samples/gdbclient.rb +583 -0
  150. data/samples/generate_libsigs.rb +102 -0
  151. data/samples/hotfix_gtk_dbg.rb +59 -0
  152. data/samples/install_win_env.rb +78 -0
  153. data/samples/lindebug.rb +924 -0
  154. data/samples/linux_injectsyscall.rb +95 -0
  155. data/samples/machoencode.rb +31 -0
  156. data/samples/metasm-shell.rb +91 -0
  157. data/samples/pe-hook.rb +69 -0
  158. data/samples/pe-ia32-cpuid.rb +203 -0
  159. data/samples/pe-mips.rb +35 -0
  160. data/samples/pe-shutdown.rb +78 -0
  161. data/samples/pe-testrelocs.rb +51 -0
  162. data/samples/pe-testrsrc.rb +24 -0
  163. data/samples/pe_listexports.rb +31 -0
  164. data/samples/peencode.rb +19 -0
  165. data/samples/peldr.rb +494 -0
  166. data/samples/preprocess-flatten.rb +19 -0
  167. data/samples/r0trace.rb +308 -0
  168. data/samples/rubstop.rb +399 -0
  169. data/samples/scan_pt_gnu_stack.rb +54 -0
  170. data/samples/scanpeexports.rb +62 -0
  171. data/samples/shellcode-c.rb +40 -0
  172. data/samples/shellcode-dynlink.rb +146 -0
  173. data/samples/source.asm +34 -0
  174. data/samples/struct_offset.rb +47 -0
  175. data/samples/testpe.rb +32 -0
  176. data/samples/testraw.rb +45 -0
  177. data/samples/win32genloader.rb +132 -0
  178. data/samples/win32hooker-advanced.rb +169 -0
  179. data/samples/win32hooker.rb +96 -0
  180. data/samples/win32livedasm.rb +33 -0
  181. data/samples/win32remotescan.rb +133 -0
  182. data/samples/wintrace.rb +92 -0
  183. data/tests/all.rb +8 -0
  184. data/tests/dasm.rb +39 -0
  185. data/tests/dynldr.rb +35 -0
  186. data/tests/encodeddata.rb +132 -0
  187. data/tests/ia32.rb +82 -0
  188. data/tests/mips.rb +116 -0
  189. data/tests/parse_c.rb +239 -0
  190. data/tests/preprocessor.rb +269 -0
  191. data/tests/x86_64.rb +62 -0
  192. metadata +255 -0
@@ -0,0 +1,979 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/decode'
8
+ require 'metasm/exe_format/elf' unless defined? Metasm::ELF
9
+
10
+ module Metasm
11
+ class ELF
12
+ class Header
13
+ # hook the decode sequence, to fixup elf data based on info
14
+ # we have (endianness & xword size, needed in decode_word etc)
15
+ decode_hook(:type) { |elf, hdr|
16
+ raise InvalidExeFormat, "E: ELF: invalid ELF signature #{hdr.magic.inspect}" if hdr.magic != "\x7fELF"
17
+
18
+ case hdr.e_class
19
+ when '32'; elf.bitsize = 32
20
+ when '64', '64_icc'; elf.bitsize = 64
21
+ else raise InvalidExeFormat, "E: ELF: unsupported class #{hdr.e_class}"
22
+ end
23
+
24
+ case hdr.data
25
+ when 'LSB'; elf.endianness = :little
26
+ when 'MSB'; elf.endianness = :big
27
+ else raise InvalidExeFormat, "E: ELF: unsupported endianness #{hdr.data}"
28
+ end
29
+
30
+ if hdr.i_version != 'CURRENT'
31
+ raise InvalidExeFormat, "E: ELF: unsupported ELF version #{hdr.i_version}"
32
+ end
33
+ }
34
+ end
35
+
36
+ class Symbol
37
+ def decode(elf, strtab=nil)
38
+ super(elf)
39
+ @name = elf.readstr(strtab, @name_p) if strtab
40
+ end
41
+ end
42
+
43
+ # basic immediates decoding functions
44
+ def decode_byte( edata = @encoded) edata.decode_imm(:u8, @endianness) end
45
+ def decode_half( edata = @encoded) edata.decode_imm(:u16, @endianness) end
46
+ def decode_word( edata = @encoded) edata.decode_imm(:u32, @endianness) end
47
+ def decode_sword(edata = @encoded) edata.decode_imm(:i32, @endianness) end
48
+ def decode_xword(edata = @encoded) edata.decode_imm((@bitsize == 32 ? :u32 : :u64), @endianness) end
49
+ def decode_sxword(edata= @encoded) edata.decode_imm((@bitsize == 32 ? :i32 : :i64), @endianness) end
50
+ alias decode_addr decode_xword
51
+ alias decode_off decode_xword
52
+
53
+ def readstr(str, off)
54
+ if off > 0 and i = str.index(?\0, off) rescue false # LoadedElf with arbitrary pointer...
55
+ str[off...i]
56
+ end
57
+ end
58
+
59
+ # transforms a virtual address to a file offset, from mmaped segments addresses
60
+ def addr_to_off(addr)
61
+ s = @segments.find { |s_| s_.type == 'LOAD' and s_.vaddr <= addr and s_.vaddr + s_.memsz > addr } if addr
62
+ addr - s.vaddr + s.offset if s
63
+ end
64
+
65
+ # memory address -> file offset
66
+ # handles relocated LoadedELF
67
+ def addr_to_fileoff(addr)
68
+ la = module_address
69
+ la = (la == 0 ? (@load_address ||= 0) : 0)
70
+ addr_to_off(addr - la)
71
+ end
72
+
73
+ # file offset -> memory address
74
+ # handles relocated LoadedELF
75
+ def fileoff_to_addr(foff)
76
+ if s = @segments.find { |s_| s_.type == 'LOAD' and s_.offset <= foff and s_.offset + s_.filesz > foff }
77
+ la = module_address
78
+ la = (la == 0 ? (@load_address ||= 0) : 0)
79
+ s.vaddr + la + foff - s.offset
80
+ end
81
+ end
82
+
83
+ # return the address of a label
84
+ def label_addr(name)
85
+ if name.kind_of? Integer
86
+ name
87
+ elsif s = @segments.find { |s_| s_.encoded and s_.encoded.export[name] }
88
+ s.vaddr + s.encoded.export[name]
89
+ elsif o = @encoded.export[name] and s = @segments.find { |s_| s_.offset <= o and s_.offset + s_.filesz > o }
90
+ s.vaddr + o - s.offset
91
+ end
92
+ end
93
+
94
+ # make an export of +self.encoded+, returns the label name if successful
95
+ def add_label(name, addr)
96
+ if not o = addr_to_off(addr)
97
+ puts "W: Elf: #{name} points to unmmaped space #{'0x%08X' % addr}" if $VERBOSE
98
+ else
99
+ l = new_label(name)
100
+ @encoded.add_export l, o
101
+ end
102
+ l
103
+ end
104
+
105
+ # decodes the elf header, section & program header
106
+ def decode_header(off = 0, decode_phdr=true, decode_shdr=true)
107
+ @encoded.ptr = off
108
+ @header.decode self
109
+ raise InvalidExeFormat, "Invalid elf header size: #{@header.ehsize}" if Header.size(self) != @header.ehsize
110
+ if decode_phdr and @header.phoff != 0
111
+ decode_program_header(@header.phoff+off)
112
+ end
113
+ if decode_shdr and @header.shoff != 0
114
+ decode_section_header(@header.shoff+off)
115
+ end
116
+ end
117
+
118
+ # decodes the section header
119
+ # section names are read from shstrndx if possible
120
+ def decode_section_header(off = @header.shoff)
121
+ raise InvalidExeFormat, "Invalid elf section header size: #{@header.shentsize}" if Section.size(self) != @header.shentsize
122
+ @encoded.add_export new_label('section_header'), off
123
+ @encoded.ptr = off
124
+ @sections = []
125
+ @header.shnum.times { @sections << Section.decode(self) }
126
+
127
+ # read sections name
128
+ if @header.shstrndx != 0 and str = @sections[@header.shstrndx] and str.encoded = @encoded[str.offset, str.size]
129
+ # LoadedElf may not have shstr mmaped
130
+ @sections[1..-1].each { |s|
131
+ s.name = readstr(str.encoded.data, s.name_p)
132
+ add_label("section_#{s.name}", s.addr) if s.name and s.addr > 0
133
+ }
134
+ end
135
+ end
136
+
137
+ # decodes the program header table
138
+ # marks the elf entrypoint as an export of +self.encoded+
139
+ def decode_program_header(off = @header.phoff)
140
+ raise InvalidExeFormat, "Invalid elf program header size: #{@header.phentsize}" if Segment.size(self) != @header.phentsize
141
+ @encoded.add_export new_label('program_header'), off
142
+ @encoded.ptr = off
143
+ @segments = []
144
+ @header.phnum.times { @segments << Segment.decode(self) }
145
+
146
+ if @header.entry != 0
147
+ add_label('entrypoint', @header.entry)
148
+ end
149
+ end
150
+
151
+ # read the dynamic symbols hash table, and checks that every global and named symbol is accessible through it
152
+ # outputs a warning if it's not and $VERBOSE is set
153
+ def check_symbols_hash(off = @tag['HASH'])
154
+ return if not @encoded.ptr = off
155
+
156
+ hash_bucket_len = decode_word
157
+ sym_count = decode_word
158
+
159
+ hash_bucket = [] ; hash_bucket_len.times { hash_bucket << decode_word }
160
+ hash_table = [] ; sym_count.times { hash_table << decode_word }
161
+
162
+ @symbols.each { |s|
163
+ next if not s.name or s.bind != 'GLOBAL' or s.shndx == 'UNDEF'
164
+
165
+ found = false
166
+ h = ELF.hash_symbol_name(s.name)
167
+ off = hash_bucket[h % hash_bucket_len]
168
+ sym_count.times { # to avoid DoS by loop
169
+ break if off == 0
170
+ if ss = @symbols[off] and ss.name == s.name
171
+ found = true
172
+ break
173
+ end
174
+ off = hash_table[off]
175
+ }
176
+ if not found
177
+ puts "W: Elf: Symbol #{s.name.inspect} not found in hash table" if $VERBOSE
178
+ end
179
+ }
180
+ end
181
+
182
+ # checks every symbol's accessibility through the gnu_hash table
183
+ def check_symbols_gnu_hash(off = @tag['GNU_HASH'], just_get_count=false)
184
+ return if not @encoded.ptr = off
185
+
186
+ # when present: the symndx first symbols are not sorted (SECTION/LOCAL/FILE/etc) symtable[symndx] is sorted (1st sorted symbol)
187
+ # the sorted symbols are sorted by [gnu_hash_symbol_name(symbol.name) % hash_bucket_len]
188
+ hash_bucket_len = decode_word
189
+ symndx = decode_word # index of first sorted symbol in symtab
190
+ maskwords = decode_word # number of words in the second part of the ghash section (32 or 64 bits)
191
+ shift2 = decode_word # used in the bloom filter
192
+
193
+ bloomfilter = [] ; maskwords.times { bloomfilter << decode_xword }
194
+ # "bloomfilter[N] has bit B cleared if there is no M (M > symndx) which satisfies (C = @header.class)
195
+ # ((gnu_hash(sym[M].name) / C) % maskwords) == N &&
196
+ # ((gnu_hash(sym[M].name) % C) == B ||
197
+ # ((gnu_hash(sym[M].name) >> shift2) % C) == B"
198
+ # bloomfilter may be [~0]
199
+ if shift2
200
+ end
201
+
202
+ hash_bucket = [] ; hash_bucket_len.times { hash_bucket << decode_word }
203
+ # bucket[N] contains the lowest M for which
204
+ # gnu_hash(sym[M]) % nbuckets == N
205
+ # or 0 if none
206
+
207
+ hsymcount = 0
208
+ part4 = []
209
+ hash_bucket.each { |hmodidx|
210
+ # for each bucket, walk all the chain
211
+ # we do not walk the chains in hash_bucket order here, this
212
+ # is just to read all the part4 as we don't know
213
+ # beforehand the number of hashed symbols
214
+ next if hmodidx == 0 # no hash chain for this mod
215
+ loop do
216
+ fu = decode_word
217
+ hsymcount += 1
218
+ part4 << fu
219
+ break if fu & 1 == 1
220
+ end
221
+ }
222
+
223
+ # part4[N] contains
224
+ # (gnu_hash(sym[N].name) & ~1) | (N == dynsymcount-1 || (gnu_hash(sym[N].name) % nbucket) != (gnu_hash(sym[N+1].name) % nbucket))
225
+ # that's the hash, with its lower bit replaced by the bool [1 if i am the last sym having my hash as hash]
226
+
227
+ return hsymcount+symndx if just_get_count
228
+
229
+ # TODO
230
+ end
231
+
232
+ # read dynamic tags array
233
+ def decode_tags(off = nil)
234
+ if not off
235
+ if s = @segments.find { |s_| s_.type == 'DYNAMIC' }
236
+ # this way it also works with LoadedELF
237
+ off = addr_to_off(s.vaddr)
238
+ elsif s = @sections.find { |s_| s_.type == 'DYNAMIC' }
239
+ # if no DYNAMIC segment, assume we decode an ET_REL from file
240
+ off = s.offset
241
+ end
242
+ end
243
+ return if not @encoded.ptr = off
244
+
245
+ @tag = {}
246
+ loop do
247
+ tag = decode_sxword
248
+ val = decode_xword
249
+ if tag >= DYNAMIC_TAG_LOPROC and tag < DYNAMIC_TAG_HIPROC
250
+ tag = int_to_hash(tag-DYNAMIC_TAG_LOPROC, DYNAMIC_TAG_PROC[@header.machine] || {})
251
+ tag += DYNAMIC_TAG_LOPROC if tag.kind_of? Integer
252
+ else
253
+ tag = int_to_hash(tag, DYNAMIC_TAG)
254
+ end
255
+ case tag
256
+ when 'NULL'
257
+ @tag[tag] = val
258
+ break
259
+ when Integer
260
+ puts "W: Elf: unknown dynamic tag 0x#{tag.to_s 16}" if $VERBOSE
261
+ @tag[tag] ||= []
262
+ @tag[tag] << val
263
+ when 'NEEDED' # here, list of tags for which multiple occurences are allowed
264
+ @tag[tag] ||= []
265
+ @tag[tag] << val
266
+ when 'POSFLAG_1'
267
+ puts "W: Elf: ignoring dynamic tag modifier #{tag} #{int_to_hash(val, DYNAMIC_POSFLAG_1)}" if $VERBOSE
268
+ else
269
+ if @tag[tag]
270
+ puts "W: Elf: ignoring re-occurence of dynamic tag #{tag} (value #{'0x%08X' % val})" if $VERBOSE
271
+ else
272
+ @tag[tag] = val
273
+ end
274
+ end
275
+ end
276
+ end
277
+
278
+ # interprets tags (convert flags, arrays etc), mark them as self.encoded.export
279
+ def decode_segments_tags_interpret
280
+ if @tag['STRTAB']
281
+ if not sz = @tag['STRSZ']
282
+ puts "W: Elf: no string table size tag" if $VERBOSE
283
+ else
284
+ if l = add_label('dynamic_strtab', @tag['STRTAB'])
285
+ @tag['STRTAB'] = l
286
+ strtab = @encoded[l, sz].data
287
+ end
288
+ end
289
+ end
290
+
291
+ @tag.keys.each { |k|
292
+ case k
293
+ when Integer
294
+ when 'NEEDED'
295
+ # array of strings
296
+ if not strtab
297
+ puts "W: Elf: no string table, needed for tag #{k}" if $VERBOSE
298
+ next
299
+ end
300
+ @tag[k].map! { |v| readstr(strtab, v) }
301
+ when 'SONAME', 'RPATH', 'RUNPATH'
302
+ # string
303
+ if not strtab
304
+ puts "W: Elf: no string table, needed for tag #{k}" if $VERBOSE
305
+ next
306
+ end
307
+ @tag[k] = readstr(strtab, @tag[k])
308
+ when 'INIT', 'FINI', 'PLTGOT', 'HASH', 'GNU_HASH', 'SYMTAB', 'RELA', 'REL', 'JMPREL'
309
+ @tag[k] = add_label('dynamic_' + k.downcase, @tag[k]) || @tag[k]
310
+ when 'INIT_ARRAY', 'FINI_ARRAY', 'PREINIT_ARRAY'
311
+ next if not l = add_label('dynamic_' + k.downcase, @tag[k])
312
+ if not sz = @tag.delete(k+'SZ')
313
+ puts "W: Elf: tag #{k} has no corresponding size tag" if $VERBOSE
314
+ next
315
+ end
316
+
317
+ tab = @encoded[l, sz]
318
+ tab.ptr = 0
319
+ @tag[k] = []
320
+ while tab.ptr < tab.length
321
+ a = decode_addr(tab)
322
+ @tag[k] << (add_label("dynamic_#{k.downcase}_#{@tag[k].length}", a) || a)
323
+ end
324
+ when 'PLTREL'; @tag[k] = int_to_hash(@tag[k], DYNAMIC_TAG)
325
+ when 'FLAGS'; @tag[k] = bits_to_hash(@tag[k], DYNAMIC_FLAGS)
326
+ when 'FLAGS_1'; @tag[k] = bits_to_hash(@tag[k], DYNAMIC_FLAGS_1)
327
+ when 'FEATURES_1'; @tag[k] = bits_to_hash(@tag[k], DYNAMIC_FEATURES_1)
328
+ end
329
+ }
330
+ end
331
+
332
+ # marks a symbol as @encoded.export (from s.value, using segments or sections)
333
+ def decode_symbol_export(s)
334
+ if s.name and s.shndx != 'UNDEF' and %w[NOTYPE OBJECT FUNC].include?(s.type)
335
+ if @header.type == 'REL'
336
+ sec = @sections[s.shndx]
337
+ o = sec.offset + s.value
338
+ elsif not o = addr_to_off(s.value)
339
+ # allow to point to end of segment
340
+ if not seg = @segments.find { |seg_| seg_.type == 'LOAD' and seg_.vaddr + seg_.memsz == s.value } # check end
341
+ puts "W: Elf: symbol points to unmmaped space (#{s.inspect})" if $VERBOSE and s.shndx != 'ABS'
342
+ return
343
+ end
344
+ # LoadedELF would have returned an addr_to_off = addr
345
+ o = s.value - seg.vaddr + seg.offset
346
+ end
347
+ name = s.name
348
+ while @encoded.export[name] and @encoded.export[name] != o
349
+ puts "W: Elf: symbol #{name} already seen at #{'%X' % @encoded.export[name]} - now at #{'%X' % o}) (may be a different version definition)" if $VERBOSE
350
+ name += '_' # do not modify inplace
351
+ end
352
+ @encoded.add_export name, o
353
+ end
354
+ end
355
+
356
+ # read symbol table, and mark all symbols found as exports of self.encoded
357
+ # tables locations are found in self.tags
358
+ # XXX symbol count is found from the hash table, this may not work with GNU_HASH only binaries
359
+ def decode_segments_symbols
360
+ return unless @tag['STRTAB'] and @tag['STRSZ'] and @tag['SYMTAB'] and (@tag['HASH'] or @tag['GNU_HASH'])
361
+
362
+ raise "E: ELF: unsupported symbol entry size: #{@tag['SYMENT']}" if @tag['SYMENT'] != Symbol.size(self)
363
+
364
+ # find number of symbols
365
+ if @tag['HASH']
366
+ @encoded.ptr = @tag['HASH'] # assume tag already interpreted (would need addr_to_off otherwise)
367
+ decode_word
368
+ sym_count = decode_word
369
+ else
370
+ sym_count = check_symbols_gnu_hash(@tag['GNU_HASH'], true)
371
+ end
372
+
373
+ strtab = @encoded[@tag['STRTAB'], @tag['STRSZ']].data.to_str
374
+
375
+ @encoded.ptr = @tag['SYMTAB']
376
+ @symbols.clear
377
+ sym_count.times {
378
+ s = Symbol.decode(self, strtab)
379
+ @symbols << s
380
+ decode_symbol_export(s)
381
+ }
382
+
383
+ check_symbols_hash if $VERBOSE
384
+ check_symbols_gnu_hash if $VERBOSE
385
+ end
386
+
387
+ # decode SYMTAB sections
388
+ def decode_sections_symbols
389
+ @symbols ||= []
390
+ @sections.to_a.each { |sec|
391
+ next if sec.type != 'SYMTAB'
392
+ next if not strtab = @sections[sec.link]
393
+ strtab = @encoded[strtab.offset, strtab.size].data
394
+ @encoded.ptr = sec.offset
395
+ syms = []
396
+ raise 'Invalid symbol table' if sec.size > @encoded.length
397
+ (sec.size / Symbol.size(self)).times { syms << Symbol.decode(self, strtab) }
398
+ alreadysegs = true if @header.type == 'DYN' or @header.type == 'EXEC'
399
+ syms.each { |s|
400
+ if alreadysegs
401
+ # if we already decoded the symbols from the DYNAMIC segment,
402
+ # ignore dups and imports from this section
403
+ next if s.shndx == 'UNDEF'
404
+ next if @symbols.find { |ss| ss.name == s.name }
405
+ end
406
+ @symbols << s
407
+ decode_symbol_export(s)
408
+ }
409
+ }
410
+ end
411
+
412
+ # decode REL/RELA sections
413
+ def decode_sections_relocs
414
+ @relocations ||= []
415
+ @sections.to_a.each { |sec|
416
+ case sec.type
417
+ when 'REL'; relcls = Relocation
418
+ when 'RELA'; relcls = RelocationAddend
419
+ else next
420
+ end
421
+ startidx = @relocations.length
422
+ @encoded.ptr = sec.offset
423
+ while @encoded.ptr < sec.offset + sec.size
424
+ @relocations << relcls.decode(self)
425
+ end
426
+
427
+ # create edata relocs
428
+ tsec = @sections[sec.info]
429
+ relocproc = "arch_decode_segments_reloc_#{@header.machine.to_s.downcase}"
430
+ next if not respond_to? relocproc
431
+ new_label('pcrel')
432
+ @relocations[startidx..-1].each { |r|
433
+ o = @encoded.ptr = tsec.offset + r.offset
434
+ r = r.dup
435
+ l = new_label('pcrel')
436
+ r.offset = Expression[l]
437
+ if rel = send(relocproc, r)
438
+ @encoded.reloc[o] = rel
439
+ end
440
+ }
441
+ }
442
+ end
443
+
444
+ # decode relocation tables (REL, RELA, JMPREL) from @tags
445
+ def decode_segments_relocs
446
+ @relocations.clear
447
+ if @encoded.ptr = @tag['REL']
448
+ raise "E: ELF: unsupported rel entry size #{@tag['RELENT']}" if @tag['RELENT'] != Relocation.size(self)
449
+ p_end = @encoded.ptr + @tag['RELSZ']
450
+ while @encoded.ptr < p_end
451
+ @relocations << Relocation.decode(self)
452
+ end
453
+ end
454
+
455
+ if @encoded.ptr = @tag['RELA']
456
+ raise "E: ELF: unsupported rela entry size #{@tag['RELAENT'].inspect}" if @tag['RELAENT'] != RelocationAddend.size(self)
457
+ p_end = @encoded.ptr + @tag['RELASZ']
458
+ while @encoded.ptr < p_end
459
+ @relocations << RelocationAddend.decode(self)
460
+ end
461
+ end
462
+
463
+ if @encoded.ptr = @tag['JMPREL']
464
+ case reltype = @tag['PLTREL']
465
+ when 'REL'; relcls = Relocation
466
+ when 'RELA'; relcls = RelocationAddend
467
+ else raise "E: ELF: unsupported plt relocation type #{reltype}"
468
+ end
469
+ p_end = @encoded.ptr + @tag['PLTRELSZ']
470
+ while @encoded.ptr < p_end
471
+ @relocations << relcls.decode(self)
472
+ end
473
+ end
474
+ end
475
+
476
+ # use relocations as self.encoded.reloc
477
+ def decode_segments_relocs_interpret
478
+ relocproc = "arch_decode_segments_reloc_#{@header.machine.to_s.downcase}"
479
+ if not respond_to? relocproc
480
+ puts "W: Elf: relocs for arch #{@header.machine} unsupported" if $VERBOSE
481
+ return
482
+ end
483
+ @relocations.each { |r|
484
+ next if r.offset == 0
485
+ if not o = addr_to_off(r.offset)
486
+ puts "W: Elf: relocation in unmmaped space (#{r.inspect})" if $VERBOSE
487
+ next
488
+ end
489
+ if @encoded.reloc[o]
490
+ puts "W: Elf: not rerelocating address #{'%08X' % r.offset}" if $VERBOSE
491
+ next
492
+ end
493
+ @encoded.ptr = o
494
+ if rel = send(relocproc, r)
495
+ @encoded.reloc[o] = rel
496
+ end
497
+ }
498
+
499
+ if @header.machine == 'MIPS' and @tag['PLTGOT'] and @tag['GOTSYM'] and @tag['LOCAL_GOTNO']
500
+ puts "emulating mips PLT-like relocs" if $VERBOSE
501
+ wsz = @bitsize/8
502
+ dyntab = label_addr(@tag['PLTGOT']) - (@tag['GOTSYM'] - @tag['LOCAL_GOTNO']) * wsz
503
+ dt_o = addr_to_off(dyntab)
504
+ @symbols.each_with_index { |sym, i|
505
+ next if i < @tag['GOTSYM'] or not sym.name
506
+ r = Metasm::Relocation.new(Expression[sym.name], "u#@bitsize".to_sym, @endianness)
507
+ @encoded.reloc[dt_o + wsz*i] = r
508
+ }
509
+ end
510
+ end
511
+
512
+ # returns the Metasm::Relocation that should be applied for reloc
513
+ # self.encoded.ptr must point to the location that will be relocated (for implicit addends)
514
+ def arch_decode_segments_reloc_386(reloc)
515
+ if reloc.symbol and n = reloc.symbol.name and reloc.symbol.shndx == 'UNDEF' and @sections and
516
+ s = @sections.find { |s_| s_.name and s_.offset <= @encoded.ptr and s_.offset + s_.size > @encoded.ptr }
517
+ @encoded.add_export(new_label("#{s.name}_#{n}"), @encoded.ptr, true)
518
+ end
519
+
520
+ # decode addend if needed
521
+ case reloc.type
522
+ when 'NONE', 'COPY', 'GLOB_DAT', 'JMP_SLOT' # no addend
523
+ else addend = reloc.addend || decode_sword
524
+ end
525
+
526
+ case reloc.type
527
+ when 'NONE'
528
+ when 'RELATIVE'
529
+ # base = @segments.find_all { |s| s.type == 'LOAD' }.map { |s| s.vaddr }.min & 0xffff_f000
530
+ # compiled to be loaded at seg.vaddr
531
+ target = addend
532
+ if o = addr_to_off(target)
533
+ if not label = @encoded.inv_export[o]
534
+ label = new_label("xref_#{Expression[target]}")
535
+ @encoded.add_export label, o
536
+ end
537
+ target = label
538
+ else
539
+ puts "W: Elf: relocation pointing out of mmaped space #{reloc.inspect}" if $VERBOSE
540
+ end
541
+ when 'GLOB_DAT', 'JMP_SLOT', '32', 'PC32', 'TLS_TPOFF', 'TLS_TPOFF32'
542
+ # XXX use versionned version
543
+ # lazy jmp_slot ?
544
+ target = 0
545
+ target = reloc.symbol.name if reloc.symbol.kind_of?(Symbol) and reloc.symbol.name
546
+ target = Expression[target, :-, reloc.offset] if reloc.type == 'PC32'
547
+ target = Expression[target, :+, addend] if addend and addend != 0
548
+ target = Expression[target, :+, 'tlsoffset'] if reloc.type == 'TLS_TPOFF'
549
+ target = Expression[:-, [target, :+, 'tlsoffset']] if reloc.type == 'TLS_TPOFF32'
550
+ when 'COPY'
551
+ # mark the address pointed as a copy of the relocation target
552
+ if not reloc.symbol or not name = reloc.symbol.name
553
+ puts "W: Elf: symbol to COPY has no name: #{reloc.inspect}" if $VERBOSE
554
+ name = ''
555
+ end
556
+ name = new_label("copy_of_#{name}")
557
+ @encoded.add_export name, @encoded.ptr
558
+ target = nil
559
+ else
560
+ puts "W: Elf: unhandled 386 reloc #{reloc.inspect}" if $VERBOSE
561
+ target = nil
562
+ end
563
+
564
+ Metasm::Relocation.new(Expression[target], :u32, @endianness) if target
565
+ end
566
+
567
+ # returns the Metasm::Relocation that should be applied for reloc
568
+ # self.encoded.ptr must point to the location that will be relocated (for implicit addends)
569
+ def arch_decode_segments_reloc_mips(reloc)
570
+ if reloc.symbol and n = reloc.symbol.name and reloc.symbol.shndx == 'UNDEF' and @sections and
571
+ s = @sections.find { |s_| s_.name and s_.offset <= @encoded.ptr and s_.offset + s_.size > @encoded.ptr }
572
+ @encoded.add_export(new_label("#{s.name}_#{n}"), @encoded.ptr, true)
573
+ end
574
+
575
+ # decode addend if needed
576
+ case reloc.type
577
+ when 'NONE' # no addend
578
+ else addend = reloc.addend || decode_sword
579
+ end
580
+
581
+ case reloc.type
582
+ when 'NONE'
583
+ when '32', 'REL32'
584
+ target = 0
585
+ target = reloc.symbol.name if reloc.symbol.kind_of?(Symbol) and reloc.symbol.name
586
+ target = Expression[target, :-, reloc.offset] if reloc.type == 'REL32'
587
+ target = Expression[target, :+, addend] if addend and addend != 0
588
+ else
589
+ puts "W: Elf: unhandled MIPS reloc #{reloc.inspect}" if $VERBOSE
590
+ target = nil
591
+ end
592
+
593
+ Metasm::Relocation.new(Expression[target], :u32, @endianness) if target
594
+ end
595
+
596
+ # returns the Metasm::Relocation that should be applied for reloc
597
+ # self.encoded.ptr must point to the location that will be relocated (for implicit addends)
598
+ def arch_decode_segments_reloc_x86_64(reloc)
599
+ if reloc.symbol and n = reloc.symbol.name and reloc.symbol.shndx == 'UNDEF' and @sections and
600
+ s = @sections.find { |s_| s_.name and s_.offset <= @encoded.ptr and s_.offset + s_.size > @encoded.ptr }
601
+ @encoded.add_export(new_label("#{s.name}_#{n}"), @encoded.ptr, true)
602
+ end
603
+
604
+ # decode addend if needed
605
+ case reloc.type
606
+ when 'NONE' # no addend
607
+ when '32', 'PC32'; addend = reloc.addend || decode_sword
608
+ else addend = reloc.addend || decode_sxword
609
+ end
610
+
611
+ sz = :u64
612
+ case reloc.type
613
+ when 'NONE'
614
+ when 'RELATIVE'
615
+ # base = @segments.find_all { |s| s.type == 'LOAD' }.map { |s| s.vaddr }.min & 0xffff_f000
616
+ # compiled to be loaded at seg.vaddr
617
+ target = addend
618
+ if o = addr_to_off(target)
619
+ if not label = @encoded.inv_export[o]
620
+ label = new_label("xref_#{Expression[target]}")
621
+ @encoded.add_export label, o
622
+ end
623
+ target = label
624
+ else
625
+ puts "W: Elf: relocation pointing out of mmaped space #{reloc.inspect}" if $VERBOSE
626
+ end
627
+ when 'GLOB_DAT', 'JMP_SLOT', '64', 'PC64', '32', 'PC32'
628
+ # XXX use versionned version
629
+ # lazy jmp_slot ?
630
+ target = 0
631
+ target = reloc.symbol.name if reloc.symbol.kind_of?(Symbol) and reloc.symbol.name
632
+ target = Expression[target, :-, reloc.offset] if reloc.type == 'PC64' or reloc.type == 'PC32'
633
+ target = Expression[target, :+, addend] if addend and addend != 0
634
+ sz = :u32 if reloc.type == '32' or reloc.type == 'PC32'
635
+ when 'COPY'
636
+ # mark the address pointed as a copy of the relocation target
637
+ if not reloc.symbol or not name = reloc.symbol.name
638
+ puts "W: Elf: symbol to COPY has no name: #{reloc.inspect}" if $VERBOSE
639
+ name = ''
640
+ end
641
+ name = new_label("copy_of_#{name}")
642
+ @encoded.add_export name, @encoded.ptr
643
+ target = nil
644
+ else
645
+ puts "W: Elf: unhandled X86_64 reloc #{reloc.inspect}" if $VERBOSE
646
+ target = nil
647
+ end
648
+
649
+ Metasm::Relocation.new(Expression[target], sz, @endianness) if target
650
+ end
651
+
652
+ class DwarfDebug
653
+ # decode a DWARF2 'compilation unit'
654
+ def decode(elf, info, abbrev, str)
655
+ super(elf, info)
656
+ len = @cu_len-7 # @cu_len is size from end of @cu_len field, so we substract ptsz/tag/abroff
657
+ info.ptr += len # advance for caller
658
+ info = info[info.ptr-len, len] # we'll work on our segment
659
+ abbrev.ptr = @abbrev_off
660
+
661
+ return if abbrev.ptr >= abbrev.length or info.ptr >= info.length
662
+
663
+ idx_abbroff = {}
664
+
665
+ # returns a list of siblings at current abbrev.ptr
666
+ decode_tree = lambda { |parent|
667
+ siblings = []
668
+ loop {
669
+ info_idx = elf.decode_leb(info)
670
+ break siblings if info_idx == 0
671
+ abbrev.ptr = idx_abbroff[info_idx] if idx_abbroff[info_idx]
672
+ idx_abbroff[info_idx] ||= abbrev.ptr
673
+ n = DwarfDebug::Node.decode(elf, info, abbrev, str, idx_abbroff)
674
+ idx_abbroff[info_idx+1] ||= abbrev.ptr
675
+ siblings << n
676
+ n.children = decode_tree[n] if n.has_child == 1
677
+ n.parent = parent
678
+ break n if not parent
679
+ }
680
+ }
681
+ @tree = decode_tree[nil]
682
+ end
683
+
684
+ class Node
685
+ def decode(elf, info, abbrev, str, idx_abbroff)
686
+ super(elf, abbrev)
687
+ return if @index == 0
688
+ @attributes = []
689
+ loop {
690
+ a = Attribute.decode(elf, abbrev)
691
+ break if a.attr == 0 and a.form == 0
692
+ if a.form == 'INDIRECT' # actual form tag is stored in info
693
+ a.form = elf.decode_leb(info)
694
+ a.form = DWARF_FORM[a.form] || a.form # XXX INDIRECT again ?
695
+ end
696
+ a.data = case a.form
697
+ when 'ADDR'; elf.decode_xword(info) # should use dbg.ptr_sz
698
+ when 'DATA1', 'REF1', 'BLOCK1', 'FLAG'; elf.decode_byte(info)
699
+ when 'DATA2', 'REF2', 'BLOCK2'; elf.decode_half(info)
700
+ when 'DATA4', 'REF4', 'BLOCK4'; elf.decode_word(info)
701
+ when 'DATA8', 'REF8', 'BLOCK8'; elf.decode_word(info) | (elf.decode_word(info) << 32)
702
+ when 'SDATA', 'UDATA', 'REF_UDATA', 'BLOCK'; elf.decode_leb(info)
703
+ when 'STRING'; elf.decode_strz(info)
704
+ when 'STRP'; str.ptr = elf.decode_word(info) ; elf.decode_strz(str)
705
+ end
706
+ case a.form
707
+ when /^REF/
708
+ when /^BLOCK/; a.data = info.read(a.data)
709
+ end
710
+ @attributes << a
711
+ }
712
+ end
713
+ end
714
+ end
715
+
716
+ # decode an ULEB128 (dwarf2): read bytes while high bit is set, littleendian
717
+ def decode_leb(ed = @encoded)
718
+ v = s = 0
719
+ loop {
720
+ b = ed.read(1).unpack('C').first.to_i
721
+ v |= (b & 0x7f) << s
722
+ s += 7
723
+ break v if (b&0x80) == 0
724
+ }
725
+ end
726
+
727
+ # decodes the debugging information if available
728
+ # only a subset of DWARF2/3 is handled right now
729
+ # most info taken from http://ratonland.org/?entry=39 & libdwarf/dwarf.h
730
+ def decode_debug
731
+ return if not @sections
732
+
733
+ # assert presence of DWARF sections
734
+ info = @sections.find { |sec| sec.name == '.debug_info' }
735
+ abbrev = @sections.find { |sec| sec.name == '.debug_abbrev' }
736
+ str = @sections.find { |sec| sec.name == '.debug_str' }
737
+ return if not info or not abbrev
738
+
739
+ # section -> content
740
+ info = @encoded[info.offset, info.size]
741
+ abbrev = @encoded[abbrev.offset, abbrev.size]
742
+ str = @encoded[str.offset, str.size] if str
743
+
744
+ @debug = []
745
+
746
+ while info.ptr < info.length
747
+ @debug << DwarfDebug.decode(self, info, abbrev, str)
748
+ end
749
+ end
750
+
751
+ # decodes the ELF dynamic tags, interpret them, and decodes symbols and relocs
752
+ def decode_segments_dynamic
753
+ return if not dynamic = @segments.find { |s| s.type == 'DYNAMIC' }
754
+ @encoded.ptr = add_label('dynamic_tags', dynamic.vaddr)
755
+ decode_tags
756
+ decode_segments_tags_interpret
757
+ decode_segments_symbols
758
+ decode_segments_relocs
759
+ decode_segments_relocs_interpret
760
+ end
761
+
762
+ # decodes the dynamic segment, fills segments.encoded
763
+ def decode_segments
764
+ decode_segments_dynamic
765
+ decode_sections_symbols
766
+ #decode_debug # too many info, decode on demand
767
+ @segments.each { |s|
768
+ case s.type
769
+ when 'LOAD', 'INTERP'
770
+ sz = s.filesz
771
+ pagepad = (-(s.offset + sz)) % 4096
772
+ s.encoded = @encoded[s.offset, sz] || EncodedData.new
773
+ if s.type == 'LOAD' and sz > 0 and not s.flags.include?('W')
774
+ # align loaded data to the next page boundary for readonly mmap
775
+ # but discard the labels/relocs etc
776
+ s.encoded << @encoded[s.offset+sz, pagepad].data rescue nil
777
+ s.encoded.virtsize = sz+pagepad
778
+ end
779
+ s.encoded.virtsize = s.memsz if s.memsz > s.encoded.virtsize
780
+ end
781
+ }
782
+ end
783
+
784
+ # decodes sections, interprets symbols/relocs, fills sections.encoded
785
+ def decode_sections
786
+ decode_sections_symbols
787
+ decode_sections_relocs
788
+ @sections.each { |s|
789
+ case s.type
790
+ when 'PROGBITS', 'NOBITS'
791
+ when 'TODO' # TODO
792
+ end
793
+ }
794
+ @sections.find_all { |s| s.type == 'PROGBITS' or s.type == 'NOBITS' }.each { |s|
795
+ if s.flags.include? 'ALLOC'
796
+ if s.type == 'NOBITS'
797
+ s.encoded = EncodedData.new '', :virtsize => s.size
798
+ else
799
+ s.encoded = @encoded[s.offset, s.size] || EncodedData.new
800
+ s.encoded.virtsize = s.size
801
+ end
802
+ end
803
+ }
804
+ end
805
+
806
+ def decode_exports
807
+ decode_segments_dynamic
808
+ end
809
+
810
+ # decodes the elf header, and depending on the elf type, decode segments or sections
811
+ def decode
812
+ decode_header
813
+ case @header.type
814
+ when 'DYN', 'EXEC'; decode_segments
815
+ when 'REL'; decode_sections
816
+ when 'CORE'
817
+ end
818
+ end
819
+
820
+ def each_section
821
+ @segments.each { |s| yield s.encoded, s.vaddr if s.type == 'LOAD' }
822
+ return if @header.type != 'REL'
823
+ @sections.each { |s|
824
+ next if not s.encoded
825
+ l = new_label(s.name)
826
+ s.encoded.add_export l, 0
827
+ yield s.encoded, l
828
+ }
829
+ end
830
+
831
+ # returns a metasm CPU object corresponding to +header.machine+
832
+ def cpu_from_headers
833
+ case @header.machine
834
+ when 'X86_64'; X86_64.new
835
+ when '386'; Ia32.new
836
+ when 'MIPS'; MIPS.new @endianness
837
+ when 'PPC'; PPC.new
838
+ when 'ARM'; ARM.new
839
+ else raise "unsupported cpu #{@header.machine}"
840
+ end
841
+ end
842
+
843
+ # returns an array including the ELF entrypoint (if not null) and the FUNC symbols addresses
844
+ # TODO include init/init_array
845
+ def get_default_entrypoints
846
+ ep = []
847
+ ep << @header.entry if @header.entry != 0
848
+ @symbols.each { |s|
849
+ ep << s.value if s.shndx != 'UNDEF' and s.type == 'FUNC'
850
+ } if @symbols
851
+ ep
852
+ end
853
+
854
+ def dump_section_header(addr, edata)
855
+ if s = @segments.find { |s_| s_.vaddr == addr }
856
+ "\n// ELF segment at #{Expression[addr]}, flags = #{s.flags.sort.join(', ')}"
857
+ else super(addr, edata)
858
+ end
859
+ end
860
+
861
+ # returns a disassembler with a special decodedfunction for dlsym, __libc_start_main, and a default function (i386 only)
862
+ def init_disassembler
863
+ d = super()
864
+ d.backtrace_maxblocks_data = 4
865
+ if d.get_section_at(0)
866
+ # fixes call [constructor] => 0
867
+ d.decoded[0] = true
868
+ d.function[0] = @cpu.disassembler_default_func
869
+ end
870
+ case @cpu.shortname
871
+ when 'ia32', 'x64'
872
+ old_cp = d.c_parser
873
+ d.c_parser = nil
874
+ d.parse_c <<EOC
875
+ void *dlsym(int, char *); // has special callback
876
+ // gcc's entrypoint, need pointers to reach main exe code (last callback)
877
+ void __libc_start_main(void(*)(), int, int, void(*)(), void(*)()) __attribute__((noreturn));
878
+ // standard noreturn, optimized by gcc
879
+ void __attribute__((noreturn)) exit(int);
880
+ void _exit __attribute__((noreturn))(int);
881
+ void abort(void) __attribute__((noreturn));
882
+ void __stack_chk_fail __attribute__((noreturn))(void);
883
+ EOC
884
+ d.function[Expression['dlsym']] = dls = @cpu.decode_c_function_prototype(d.c_parser, 'dlsym')
885
+ d.function[Expression['__libc_start_main']] = @cpu.decode_c_function_prototype(d.c_parser, '__libc_start_main')
886
+ d.function[Expression['exit']] = @cpu.decode_c_function_prototype(d.c_parser, 'exit')
887
+ d.function[Expression['_exit']] = @cpu.decode_c_function_prototype(d.c_parser, '_exit')
888
+ d.function[Expression['abort']] = @cpu.decode_c_function_prototype(d.c_parser, 'abort')
889
+ d.function[Expression['__stack_chk_fail']] = @cpu.decode_c_function_prototype(d.c_parser, '__stack_chk_fail')
890
+ d.c_parser = old_cp
891
+ dls.btbind_callback = lambda { |dasm, bind, funcaddr, calladdr, expr, origin, maxdepth|
892
+ sz = @cpu.size/8
893
+ raise 'dlsym call error' if not dasm.decoded[calladdr]
894
+ if @cpu.shortname == 'x64'
895
+ arg2 = :rsi
896
+ else
897
+ arg2 = Indirection.new(Expression[:esp, :+, 2*sz], sz, calladdr)
898
+ end
899
+ fnaddr = dasm.backtrace(arg2, calladdr, :include_start => true, :maxdepth => maxdepth)
900
+ if fnaddr.kind_of? ::Array and fnaddr.length == 1 and s = dasm.get_section_at(fnaddr.first) and fn = s[0].read(64) and i = fn.index(?\0) and i > sz # try to avoid ordinals
901
+ bind = bind.merge @cpu.register_symbols[0] => Expression[fn[0, i]]
902
+ end
903
+ bind
904
+ }
905
+ df = d.function[:default] = @cpu.disassembler_default_func
906
+ df.backtrace_binding[@cpu.register_symbols[4]] = Expression[@cpu.register_symbols[4], :+, @cpu.size/8]
907
+ df.btbind_callback = nil
908
+ when 'mips'
909
+ (d.address_binding[@header.entry] ||= {})[:$t9] ||= Expression[@header.entry]
910
+ @symbols.each { |s|
911
+ next if s.shndx == 'UNDEF' or s.type != 'FUNC'
912
+ (d.address_binding[s.value] ||= {})[:$t9] ||= Expression[s.value]
913
+ }
914
+ d.function[:default] = @cpu.disassembler_default_func
915
+ end
916
+ d
917
+ end
918
+
919
+ # returns an array of [name, addr, length, info]
920
+ def section_info
921
+ if @sections
922
+ @sections[1..-1].map { |s|
923
+ [s.name, s.addr, s.size, s.flags.join(',')]
924
+ }
925
+ else
926
+ @segments.map { |s|
927
+ [nil, s.vaddr, s.memsz, s.flags.join(',')]
928
+ }
929
+ end
930
+ end
931
+
932
+ def module_name
933
+ @tag and @tag['SONAME']
934
+ end
935
+
936
+ def module_address
937
+ @segments.map { |s_| s_.vaddr if s_.type == 'LOAD' }.compact.min || 0
938
+ end
939
+
940
+ def module_size
941
+ return 0 if not s = @segments.to_a.reverse.map { |s_| s_.vaddr + s_.memsz if s_.type == 'LOAD' }.compact.max
942
+ s - module_address
943
+ end
944
+
945
+ def module_symbols
946
+ syms = []
947
+ m_addr = module_address
948
+ syms << ['entrypoint', @header.entry-m_addr] if @header.entry != 0 or @header.type == 'EXEC'
949
+ @symbols.each { |s|
950
+ next if not s.name or s.shndx == 'UNDEF'
951
+ pfx = %w[LOCAL WEAK].include?(s.bind) ? s.bind.downcase + '_' : ''
952
+ syms << [pfx+s.name, s.value-m_addr, s.size]
953
+ }
954
+ syms
955
+ end
956
+ end
957
+
958
+ class LoadedELF
959
+ # decodes the dynamic segment, fills segments.encoded
960
+ def decode_segments
961
+ if @load_address == 0 and @segments.find { |s| s.type == 'LOAD' and s.vaddr > @encoded.length }
962
+ @load_address = @segments.find_all { |s| s.type == 'LOAD' }.map { |s| s.vaddr }.min
963
+ end
964
+ decode_segments_dynamic
965
+ @segments.each { |s|
966
+ if s.type == 'LOAD'
967
+ s.encoded = @encoded[addr_to_off(s.vaddr), s.memsz]
968
+ end
969
+ }
970
+ end
971
+
972
+ # do not try to decode the section header by default
973
+ def decode_header(off = 0)
974
+ @encoded.ptr = off
975
+ @header.decode self
976
+ decode_program_header(@header.phoff+off)
977
+ end
978
+ end
979
+ end