metasm 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. data/BUGS +11 -0
  2. data/CREDITS +17 -0
  3. data/README +270 -0
  4. data/TODO +114 -0
  5. data/doc/code_organisation.txt +146 -0
  6. data/doc/const_missing.txt +16 -0
  7. data/doc/core_classes.txt +75 -0
  8. data/doc/feature_list.txt +53 -0
  9. data/doc/index.txt +59 -0
  10. data/doc/install_notes.txt +170 -0
  11. data/doc/style.css +3 -0
  12. data/doc/use_cases.txt +18 -0
  13. data/lib/metasm.rb +80 -0
  14. data/lib/metasm/arm.rb +12 -0
  15. data/lib/metasm/arm/debug.rb +39 -0
  16. data/lib/metasm/arm/decode.rb +167 -0
  17. data/lib/metasm/arm/encode.rb +77 -0
  18. data/lib/metasm/arm/main.rb +75 -0
  19. data/lib/metasm/arm/opcodes.rb +177 -0
  20. data/lib/metasm/arm/parse.rb +130 -0
  21. data/lib/metasm/arm/render.rb +55 -0
  22. data/lib/metasm/compile_c.rb +1457 -0
  23. data/lib/metasm/dalvik.rb +8 -0
  24. data/lib/metasm/dalvik/decode.rb +196 -0
  25. data/lib/metasm/dalvik/main.rb +60 -0
  26. data/lib/metasm/dalvik/opcodes.rb +366 -0
  27. data/lib/metasm/decode.rb +213 -0
  28. data/lib/metasm/decompile.rb +2659 -0
  29. data/lib/metasm/disassemble.rb +2068 -0
  30. data/lib/metasm/disassemble_api.rb +1280 -0
  31. data/lib/metasm/dynldr.rb +1329 -0
  32. data/lib/metasm/encode.rb +333 -0
  33. data/lib/metasm/exe_format/a_out.rb +194 -0
  34. data/lib/metasm/exe_format/autoexe.rb +82 -0
  35. data/lib/metasm/exe_format/bflt.rb +189 -0
  36. data/lib/metasm/exe_format/coff.rb +455 -0
  37. data/lib/metasm/exe_format/coff_decode.rb +901 -0
  38. data/lib/metasm/exe_format/coff_encode.rb +1078 -0
  39. data/lib/metasm/exe_format/dex.rb +457 -0
  40. data/lib/metasm/exe_format/dol.rb +145 -0
  41. data/lib/metasm/exe_format/elf.rb +923 -0
  42. data/lib/metasm/exe_format/elf_decode.rb +979 -0
  43. data/lib/metasm/exe_format/elf_encode.rb +1375 -0
  44. data/lib/metasm/exe_format/macho.rb +827 -0
  45. data/lib/metasm/exe_format/main.rb +228 -0
  46. data/lib/metasm/exe_format/mz.rb +164 -0
  47. data/lib/metasm/exe_format/nds.rb +172 -0
  48. data/lib/metasm/exe_format/pe.rb +437 -0
  49. data/lib/metasm/exe_format/serialstruct.rb +246 -0
  50. data/lib/metasm/exe_format/shellcode.rb +114 -0
  51. data/lib/metasm/exe_format/xcoff.rb +167 -0
  52. data/lib/metasm/gui.rb +23 -0
  53. data/lib/metasm/gui/cstruct.rb +373 -0
  54. data/lib/metasm/gui/dasm_coverage.rb +199 -0
  55. data/lib/metasm/gui/dasm_decomp.rb +369 -0
  56. data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
  57. data/lib/metasm/gui/dasm_graph.rb +1354 -0
  58. data/lib/metasm/gui/dasm_hex.rb +543 -0
  59. data/lib/metasm/gui/dasm_listing.rb +599 -0
  60. data/lib/metasm/gui/dasm_main.rb +906 -0
  61. data/lib/metasm/gui/dasm_opcodes.rb +291 -0
  62. data/lib/metasm/gui/debug.rb +1228 -0
  63. data/lib/metasm/gui/gtk.rb +884 -0
  64. data/lib/metasm/gui/qt.rb +495 -0
  65. data/lib/metasm/gui/win32.rb +3004 -0
  66. data/lib/metasm/gui/x11.rb +621 -0
  67. data/lib/metasm/ia32.rb +14 -0
  68. data/lib/metasm/ia32/compile_c.rb +1523 -0
  69. data/lib/metasm/ia32/debug.rb +193 -0
  70. data/lib/metasm/ia32/decode.rb +1167 -0
  71. data/lib/metasm/ia32/decompile.rb +564 -0
  72. data/lib/metasm/ia32/encode.rb +314 -0
  73. data/lib/metasm/ia32/main.rb +233 -0
  74. data/lib/metasm/ia32/opcodes.rb +872 -0
  75. data/lib/metasm/ia32/parse.rb +327 -0
  76. data/lib/metasm/ia32/render.rb +91 -0
  77. data/lib/metasm/main.rb +1193 -0
  78. data/lib/metasm/mips.rb +11 -0
  79. data/lib/metasm/mips/compile_c.rb +7 -0
  80. data/lib/metasm/mips/decode.rb +253 -0
  81. data/lib/metasm/mips/encode.rb +51 -0
  82. data/lib/metasm/mips/main.rb +72 -0
  83. data/lib/metasm/mips/opcodes.rb +443 -0
  84. data/lib/metasm/mips/parse.rb +51 -0
  85. data/lib/metasm/mips/render.rb +43 -0
  86. data/lib/metasm/os/gnu_exports.rb +270 -0
  87. data/lib/metasm/os/linux.rb +1112 -0
  88. data/lib/metasm/os/main.rb +1686 -0
  89. data/lib/metasm/os/remote.rb +527 -0
  90. data/lib/metasm/os/windows.rb +2027 -0
  91. data/lib/metasm/os/windows_exports.rb +745 -0
  92. data/lib/metasm/parse.rb +876 -0
  93. data/lib/metasm/parse_c.rb +3938 -0
  94. data/lib/metasm/pic16c/decode.rb +42 -0
  95. data/lib/metasm/pic16c/main.rb +17 -0
  96. data/lib/metasm/pic16c/opcodes.rb +68 -0
  97. data/lib/metasm/ppc.rb +11 -0
  98. data/lib/metasm/ppc/decode.rb +264 -0
  99. data/lib/metasm/ppc/decompile.rb +251 -0
  100. data/lib/metasm/ppc/encode.rb +51 -0
  101. data/lib/metasm/ppc/main.rb +129 -0
  102. data/lib/metasm/ppc/opcodes.rb +410 -0
  103. data/lib/metasm/ppc/parse.rb +52 -0
  104. data/lib/metasm/preprocessor.rb +1277 -0
  105. data/lib/metasm/render.rb +130 -0
  106. data/lib/metasm/sh4.rb +8 -0
  107. data/lib/metasm/sh4/decode.rb +336 -0
  108. data/lib/metasm/sh4/main.rb +292 -0
  109. data/lib/metasm/sh4/opcodes.rb +381 -0
  110. data/lib/metasm/x86_64.rb +12 -0
  111. data/lib/metasm/x86_64/compile_c.rb +1025 -0
  112. data/lib/metasm/x86_64/debug.rb +59 -0
  113. data/lib/metasm/x86_64/decode.rb +268 -0
  114. data/lib/metasm/x86_64/encode.rb +264 -0
  115. data/lib/metasm/x86_64/main.rb +135 -0
  116. data/lib/metasm/x86_64/opcodes.rb +118 -0
  117. data/lib/metasm/x86_64/parse.rb +68 -0
  118. data/misc/bottleneck.rb +61 -0
  119. data/misc/cheader-findpppath.rb +58 -0
  120. data/misc/hexdiff.rb +74 -0
  121. data/misc/hexdump.rb +55 -0
  122. data/misc/metasm-all.rb +13 -0
  123. data/misc/objdiff.rb +47 -0
  124. data/misc/objscan.rb +40 -0
  125. data/misc/pdfparse.rb +661 -0
  126. data/misc/ppc_pdf2oplist.rb +192 -0
  127. data/misc/tcp_proxy_hex.rb +84 -0
  128. data/misc/txt2html.rb +440 -0
  129. data/samples/a.out.rb +31 -0
  130. data/samples/asmsyntax.rb +77 -0
  131. data/samples/bindiff.rb +555 -0
  132. data/samples/compilation-steps.rb +49 -0
  133. data/samples/cparser_makestackoffset.rb +55 -0
  134. data/samples/dasm-backtrack.rb +38 -0
  135. data/samples/dasmnavig.rb +318 -0
  136. data/samples/dbg-apihook.rb +228 -0
  137. data/samples/dbghelp.rb +143 -0
  138. data/samples/disassemble-gui.rb +102 -0
  139. data/samples/disassemble.rb +133 -0
  140. data/samples/dump_upx.rb +95 -0
  141. data/samples/dynamic_ruby.rb +1929 -0
  142. data/samples/elf_list_needed.rb +46 -0
  143. data/samples/elf_listexports.rb +33 -0
  144. data/samples/elfencode.rb +25 -0
  145. data/samples/exeencode.rb +128 -0
  146. data/samples/factorize-headers-elfimports.rb +77 -0
  147. data/samples/factorize-headers-peimports.rb +109 -0
  148. data/samples/factorize-headers.rb +43 -0
  149. data/samples/gdbclient.rb +583 -0
  150. data/samples/generate_libsigs.rb +102 -0
  151. data/samples/hotfix_gtk_dbg.rb +59 -0
  152. data/samples/install_win_env.rb +78 -0
  153. data/samples/lindebug.rb +924 -0
  154. data/samples/linux_injectsyscall.rb +95 -0
  155. data/samples/machoencode.rb +31 -0
  156. data/samples/metasm-shell.rb +91 -0
  157. data/samples/pe-hook.rb +69 -0
  158. data/samples/pe-ia32-cpuid.rb +203 -0
  159. data/samples/pe-mips.rb +35 -0
  160. data/samples/pe-shutdown.rb +78 -0
  161. data/samples/pe-testrelocs.rb +51 -0
  162. data/samples/pe-testrsrc.rb +24 -0
  163. data/samples/pe_listexports.rb +31 -0
  164. data/samples/peencode.rb +19 -0
  165. data/samples/peldr.rb +494 -0
  166. data/samples/preprocess-flatten.rb +19 -0
  167. data/samples/r0trace.rb +308 -0
  168. data/samples/rubstop.rb +399 -0
  169. data/samples/scan_pt_gnu_stack.rb +54 -0
  170. data/samples/scanpeexports.rb +62 -0
  171. data/samples/shellcode-c.rb +40 -0
  172. data/samples/shellcode-dynlink.rb +146 -0
  173. data/samples/source.asm +34 -0
  174. data/samples/struct_offset.rb +47 -0
  175. data/samples/testpe.rb +32 -0
  176. data/samples/testraw.rb +45 -0
  177. data/samples/win32genloader.rb +132 -0
  178. data/samples/win32hooker-advanced.rb +169 -0
  179. data/samples/win32hooker.rb +96 -0
  180. data/samples/win32livedasm.rb +33 -0
  181. data/samples/win32remotescan.rb +133 -0
  182. data/samples/wintrace.rb +92 -0
  183. data/tests/all.rb +8 -0
  184. data/tests/dasm.rb +39 -0
  185. data/tests/dynldr.rb +35 -0
  186. data/tests/encodeddata.rb +132 -0
  187. data/tests/ia32.rb +82 -0
  188. data/tests/mips.rb +116 -0
  189. data/tests/parse_c.rb +239 -0
  190. data/tests/preprocessor.rb +269 -0
  191. data/tests/x86_64.rb +62 -0
  192. metadata +255 -0
@@ -0,0 +1,979 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/decode'
8
+ require 'metasm/exe_format/elf' unless defined? Metasm::ELF
9
+
10
+ module Metasm
11
+ class ELF
12
+ class Header
13
+ # hook the decode sequence, to fixup elf data based on info
14
+ # we have (endianness & xword size, needed in decode_word etc)
15
+ decode_hook(:type) { |elf, hdr|
16
+ raise InvalidExeFormat, "E: ELF: invalid ELF signature #{hdr.magic.inspect}" if hdr.magic != "\x7fELF"
17
+
18
+ case hdr.e_class
19
+ when '32'; elf.bitsize = 32
20
+ when '64', '64_icc'; elf.bitsize = 64
21
+ else raise InvalidExeFormat, "E: ELF: unsupported class #{hdr.e_class}"
22
+ end
23
+
24
+ case hdr.data
25
+ when 'LSB'; elf.endianness = :little
26
+ when 'MSB'; elf.endianness = :big
27
+ else raise InvalidExeFormat, "E: ELF: unsupported endianness #{hdr.data}"
28
+ end
29
+
30
+ if hdr.i_version != 'CURRENT'
31
+ raise InvalidExeFormat, "E: ELF: unsupported ELF version #{hdr.i_version}"
32
+ end
33
+ }
34
+ end
35
+
36
+ class Symbol
37
+ def decode(elf, strtab=nil)
38
+ super(elf)
39
+ @name = elf.readstr(strtab, @name_p) if strtab
40
+ end
41
+ end
42
+
43
+ # basic immediates decoding functions
44
+ def decode_byte( edata = @encoded) edata.decode_imm(:u8, @endianness) end
45
+ def decode_half( edata = @encoded) edata.decode_imm(:u16, @endianness) end
46
+ def decode_word( edata = @encoded) edata.decode_imm(:u32, @endianness) end
47
+ def decode_sword(edata = @encoded) edata.decode_imm(:i32, @endianness) end
48
+ def decode_xword(edata = @encoded) edata.decode_imm((@bitsize == 32 ? :u32 : :u64), @endianness) end
49
+ def decode_sxword(edata= @encoded) edata.decode_imm((@bitsize == 32 ? :i32 : :i64), @endianness) end
50
+ alias decode_addr decode_xword
51
+ alias decode_off decode_xword
52
+
53
+ def readstr(str, off)
54
+ if off > 0 and i = str.index(?\0, off) rescue false # LoadedElf with arbitrary pointer...
55
+ str[off...i]
56
+ end
57
+ end
58
+
59
+ # transforms a virtual address to a file offset, from mmaped segments addresses
60
+ def addr_to_off(addr)
61
+ s = @segments.find { |s_| s_.type == 'LOAD' and s_.vaddr <= addr and s_.vaddr + s_.memsz > addr } if addr
62
+ addr - s.vaddr + s.offset if s
63
+ end
64
+
65
+ # memory address -> file offset
66
+ # handles relocated LoadedELF
67
+ def addr_to_fileoff(addr)
68
+ la = module_address
69
+ la = (la == 0 ? (@load_address ||= 0) : 0)
70
+ addr_to_off(addr - la)
71
+ end
72
+
73
+ # file offset -> memory address
74
+ # handles relocated LoadedELF
75
+ def fileoff_to_addr(foff)
76
+ if s = @segments.find { |s_| s_.type == 'LOAD' and s_.offset <= foff and s_.offset + s_.filesz > foff }
77
+ la = module_address
78
+ la = (la == 0 ? (@load_address ||= 0) : 0)
79
+ s.vaddr + la + foff - s.offset
80
+ end
81
+ end
82
+
83
+ # return the address of a label
84
+ def label_addr(name)
85
+ if name.kind_of? Integer
86
+ name
87
+ elsif s = @segments.find { |s_| s_.encoded and s_.encoded.export[name] }
88
+ s.vaddr + s.encoded.export[name]
89
+ elsif o = @encoded.export[name] and s = @segments.find { |s_| s_.offset <= o and s_.offset + s_.filesz > o }
90
+ s.vaddr + o - s.offset
91
+ end
92
+ end
93
+
94
+ # make an export of +self.encoded+, returns the label name if successful
95
+ def add_label(name, addr)
96
+ if not o = addr_to_off(addr)
97
+ puts "W: Elf: #{name} points to unmmaped space #{'0x%08X' % addr}" if $VERBOSE
98
+ else
99
+ l = new_label(name)
100
+ @encoded.add_export l, o
101
+ end
102
+ l
103
+ end
104
+
105
+ # decodes the elf header, section & program header
106
+ def decode_header(off = 0, decode_phdr=true, decode_shdr=true)
107
+ @encoded.ptr = off
108
+ @header.decode self
109
+ raise InvalidExeFormat, "Invalid elf header size: #{@header.ehsize}" if Header.size(self) != @header.ehsize
110
+ if decode_phdr and @header.phoff != 0
111
+ decode_program_header(@header.phoff+off)
112
+ end
113
+ if decode_shdr and @header.shoff != 0
114
+ decode_section_header(@header.shoff+off)
115
+ end
116
+ end
117
+
118
+ # decodes the section header
119
+ # section names are read from shstrndx if possible
120
+ def decode_section_header(off = @header.shoff)
121
+ raise InvalidExeFormat, "Invalid elf section header size: #{@header.shentsize}" if Section.size(self) != @header.shentsize
122
+ @encoded.add_export new_label('section_header'), off
123
+ @encoded.ptr = off
124
+ @sections = []
125
+ @header.shnum.times { @sections << Section.decode(self) }
126
+
127
+ # read sections name
128
+ if @header.shstrndx != 0 and str = @sections[@header.shstrndx] and str.encoded = @encoded[str.offset, str.size]
129
+ # LoadedElf may not have shstr mmaped
130
+ @sections[1..-1].each { |s|
131
+ s.name = readstr(str.encoded.data, s.name_p)
132
+ add_label("section_#{s.name}", s.addr) if s.name and s.addr > 0
133
+ }
134
+ end
135
+ end
136
+
137
+ # decodes the program header table
138
+ # marks the elf entrypoint as an export of +self.encoded+
139
+ def decode_program_header(off = @header.phoff)
140
+ raise InvalidExeFormat, "Invalid elf program header size: #{@header.phentsize}" if Segment.size(self) != @header.phentsize
141
+ @encoded.add_export new_label('program_header'), off
142
+ @encoded.ptr = off
143
+ @segments = []
144
+ @header.phnum.times { @segments << Segment.decode(self) }
145
+
146
+ if @header.entry != 0
147
+ add_label('entrypoint', @header.entry)
148
+ end
149
+ end
150
+
151
+ # read the dynamic symbols hash table, and checks that every global and named symbol is accessible through it
152
+ # outputs a warning if it's not and $VERBOSE is set
153
+ def check_symbols_hash(off = @tag['HASH'])
154
+ return if not @encoded.ptr = off
155
+
156
+ hash_bucket_len = decode_word
157
+ sym_count = decode_word
158
+
159
+ hash_bucket = [] ; hash_bucket_len.times { hash_bucket << decode_word }
160
+ hash_table = [] ; sym_count.times { hash_table << decode_word }
161
+
162
+ @symbols.each { |s|
163
+ next if not s.name or s.bind != 'GLOBAL' or s.shndx == 'UNDEF'
164
+
165
+ found = false
166
+ h = ELF.hash_symbol_name(s.name)
167
+ off = hash_bucket[h % hash_bucket_len]
168
+ sym_count.times { # to avoid DoS by loop
169
+ break if off == 0
170
+ if ss = @symbols[off] and ss.name == s.name
171
+ found = true
172
+ break
173
+ end
174
+ off = hash_table[off]
175
+ }
176
+ if not found
177
+ puts "W: Elf: Symbol #{s.name.inspect} not found in hash table" if $VERBOSE
178
+ end
179
+ }
180
+ end
181
+
182
+ # checks every symbol's accessibility through the gnu_hash table
183
+ def check_symbols_gnu_hash(off = @tag['GNU_HASH'], just_get_count=false)
184
+ return if not @encoded.ptr = off
185
+
186
+ # when present: the symndx first symbols are not sorted (SECTION/LOCAL/FILE/etc) symtable[symndx] is sorted (1st sorted symbol)
187
+ # the sorted symbols are sorted by [gnu_hash_symbol_name(symbol.name) % hash_bucket_len]
188
+ hash_bucket_len = decode_word
189
+ symndx = decode_word # index of first sorted symbol in symtab
190
+ maskwords = decode_word # number of words in the second part of the ghash section (32 or 64 bits)
191
+ shift2 = decode_word # used in the bloom filter
192
+
193
+ bloomfilter = [] ; maskwords.times { bloomfilter << decode_xword }
194
+ # "bloomfilter[N] has bit B cleared if there is no M (M > symndx) which satisfies (C = @header.class)
195
+ # ((gnu_hash(sym[M].name) / C) % maskwords) == N &&
196
+ # ((gnu_hash(sym[M].name) % C) == B ||
197
+ # ((gnu_hash(sym[M].name) >> shift2) % C) == B"
198
+ # bloomfilter may be [~0]
199
+ if shift2
200
+ end
201
+
202
+ hash_bucket = [] ; hash_bucket_len.times { hash_bucket << decode_word }
203
+ # bucket[N] contains the lowest M for which
204
+ # gnu_hash(sym[M]) % nbuckets == N
205
+ # or 0 if none
206
+
207
+ hsymcount = 0
208
+ part4 = []
209
+ hash_bucket.each { |hmodidx|
210
+ # for each bucket, walk all the chain
211
+ # we do not walk the chains in hash_bucket order here, this
212
+ # is just to read all the part4 as we don't know
213
+ # beforehand the number of hashed symbols
214
+ next if hmodidx == 0 # no hash chain for this mod
215
+ loop do
216
+ fu = decode_word
217
+ hsymcount += 1
218
+ part4 << fu
219
+ break if fu & 1 == 1
220
+ end
221
+ }
222
+
223
+ # part4[N] contains
224
+ # (gnu_hash(sym[N].name) & ~1) | (N == dynsymcount-1 || (gnu_hash(sym[N].name) % nbucket) != (gnu_hash(sym[N+1].name) % nbucket))
225
+ # that's the hash, with its lower bit replaced by the bool [1 if i am the last sym having my hash as hash]
226
+
227
+ return hsymcount+symndx if just_get_count
228
+
229
+ # TODO
230
+ end
231
+
232
+ # read dynamic tags array
233
+ def decode_tags(off = nil)
234
+ if not off
235
+ if s = @segments.find { |s_| s_.type == 'DYNAMIC' }
236
+ # this way it also works with LoadedELF
237
+ off = addr_to_off(s.vaddr)
238
+ elsif s = @sections.find { |s_| s_.type == 'DYNAMIC' }
239
+ # if no DYNAMIC segment, assume we decode an ET_REL from file
240
+ off = s.offset
241
+ end
242
+ end
243
+ return if not @encoded.ptr = off
244
+
245
+ @tag = {}
246
+ loop do
247
+ tag = decode_sxword
248
+ val = decode_xword
249
+ if tag >= DYNAMIC_TAG_LOPROC and tag < DYNAMIC_TAG_HIPROC
250
+ tag = int_to_hash(tag-DYNAMIC_TAG_LOPROC, DYNAMIC_TAG_PROC[@header.machine] || {})
251
+ tag += DYNAMIC_TAG_LOPROC if tag.kind_of? Integer
252
+ else
253
+ tag = int_to_hash(tag, DYNAMIC_TAG)
254
+ end
255
+ case tag
256
+ when 'NULL'
257
+ @tag[tag] = val
258
+ break
259
+ when Integer
260
+ puts "W: Elf: unknown dynamic tag 0x#{tag.to_s 16}" if $VERBOSE
261
+ @tag[tag] ||= []
262
+ @tag[tag] << val
263
+ when 'NEEDED' # here, list of tags for which multiple occurences are allowed
264
+ @tag[tag] ||= []
265
+ @tag[tag] << val
266
+ when 'POSFLAG_1'
267
+ puts "W: Elf: ignoring dynamic tag modifier #{tag} #{int_to_hash(val, DYNAMIC_POSFLAG_1)}" if $VERBOSE
268
+ else
269
+ if @tag[tag]
270
+ puts "W: Elf: ignoring re-occurence of dynamic tag #{tag} (value #{'0x%08X' % val})" if $VERBOSE
271
+ else
272
+ @tag[tag] = val
273
+ end
274
+ end
275
+ end
276
+ end
277
+
278
+ # interprets tags (convert flags, arrays etc), mark them as self.encoded.export
279
+ def decode_segments_tags_interpret
280
+ if @tag['STRTAB']
281
+ if not sz = @tag['STRSZ']
282
+ puts "W: Elf: no string table size tag" if $VERBOSE
283
+ else
284
+ if l = add_label('dynamic_strtab', @tag['STRTAB'])
285
+ @tag['STRTAB'] = l
286
+ strtab = @encoded[l, sz].data
287
+ end
288
+ end
289
+ end
290
+
291
+ @tag.keys.each { |k|
292
+ case k
293
+ when Integer
294
+ when 'NEEDED'
295
+ # array of strings
296
+ if not strtab
297
+ puts "W: Elf: no string table, needed for tag #{k}" if $VERBOSE
298
+ next
299
+ end
300
+ @tag[k].map! { |v| readstr(strtab, v) }
301
+ when 'SONAME', 'RPATH', 'RUNPATH'
302
+ # string
303
+ if not strtab
304
+ puts "W: Elf: no string table, needed for tag #{k}" if $VERBOSE
305
+ next
306
+ end
307
+ @tag[k] = readstr(strtab, @tag[k])
308
+ when 'INIT', 'FINI', 'PLTGOT', 'HASH', 'GNU_HASH', 'SYMTAB', 'RELA', 'REL', 'JMPREL'
309
+ @tag[k] = add_label('dynamic_' + k.downcase, @tag[k]) || @tag[k]
310
+ when 'INIT_ARRAY', 'FINI_ARRAY', 'PREINIT_ARRAY'
311
+ next if not l = add_label('dynamic_' + k.downcase, @tag[k])
312
+ if not sz = @tag.delete(k+'SZ')
313
+ puts "W: Elf: tag #{k} has no corresponding size tag" if $VERBOSE
314
+ next
315
+ end
316
+
317
+ tab = @encoded[l, sz]
318
+ tab.ptr = 0
319
+ @tag[k] = []
320
+ while tab.ptr < tab.length
321
+ a = decode_addr(tab)
322
+ @tag[k] << (add_label("dynamic_#{k.downcase}_#{@tag[k].length}", a) || a)
323
+ end
324
+ when 'PLTREL'; @tag[k] = int_to_hash(@tag[k], DYNAMIC_TAG)
325
+ when 'FLAGS'; @tag[k] = bits_to_hash(@tag[k], DYNAMIC_FLAGS)
326
+ when 'FLAGS_1'; @tag[k] = bits_to_hash(@tag[k], DYNAMIC_FLAGS_1)
327
+ when 'FEATURES_1'; @tag[k] = bits_to_hash(@tag[k], DYNAMIC_FEATURES_1)
328
+ end
329
+ }
330
+ end
331
+
332
+ # marks a symbol as @encoded.export (from s.value, using segments or sections)
333
+ def decode_symbol_export(s)
334
+ if s.name and s.shndx != 'UNDEF' and %w[NOTYPE OBJECT FUNC].include?(s.type)
335
+ if @header.type == 'REL'
336
+ sec = @sections[s.shndx]
337
+ o = sec.offset + s.value
338
+ elsif not o = addr_to_off(s.value)
339
+ # allow to point to end of segment
340
+ if not seg = @segments.find { |seg_| seg_.type == 'LOAD' and seg_.vaddr + seg_.memsz == s.value } # check end
341
+ puts "W: Elf: symbol points to unmmaped space (#{s.inspect})" if $VERBOSE and s.shndx != 'ABS'
342
+ return
343
+ end
344
+ # LoadedELF would have returned an addr_to_off = addr
345
+ o = s.value - seg.vaddr + seg.offset
346
+ end
347
+ name = s.name
348
+ while @encoded.export[name] and @encoded.export[name] != o
349
+ puts "W: Elf: symbol #{name} already seen at #{'%X' % @encoded.export[name]} - now at #{'%X' % o}) (may be a different version definition)" if $VERBOSE
350
+ name += '_' # do not modify inplace
351
+ end
352
+ @encoded.add_export name, o
353
+ end
354
+ end
355
+
356
+ # read symbol table, and mark all symbols found as exports of self.encoded
357
+ # tables locations are found in self.tags
358
+ # XXX symbol count is found from the hash table, this may not work with GNU_HASH only binaries
359
+ def decode_segments_symbols
360
+ return unless @tag['STRTAB'] and @tag['STRSZ'] and @tag['SYMTAB'] and (@tag['HASH'] or @tag['GNU_HASH'])
361
+
362
+ raise "E: ELF: unsupported symbol entry size: #{@tag['SYMENT']}" if @tag['SYMENT'] != Symbol.size(self)
363
+
364
+ # find number of symbols
365
+ if @tag['HASH']
366
+ @encoded.ptr = @tag['HASH'] # assume tag already interpreted (would need addr_to_off otherwise)
367
+ decode_word
368
+ sym_count = decode_word
369
+ else
370
+ sym_count = check_symbols_gnu_hash(@tag['GNU_HASH'], true)
371
+ end
372
+
373
+ strtab = @encoded[@tag['STRTAB'], @tag['STRSZ']].data.to_str
374
+
375
+ @encoded.ptr = @tag['SYMTAB']
376
+ @symbols.clear
377
+ sym_count.times {
378
+ s = Symbol.decode(self, strtab)
379
+ @symbols << s
380
+ decode_symbol_export(s)
381
+ }
382
+
383
+ check_symbols_hash if $VERBOSE
384
+ check_symbols_gnu_hash if $VERBOSE
385
+ end
386
+
387
+ # decode SYMTAB sections
388
+ def decode_sections_symbols
389
+ @symbols ||= []
390
+ @sections.to_a.each { |sec|
391
+ next if sec.type != 'SYMTAB'
392
+ next if not strtab = @sections[sec.link]
393
+ strtab = @encoded[strtab.offset, strtab.size].data
394
+ @encoded.ptr = sec.offset
395
+ syms = []
396
+ raise 'Invalid symbol table' if sec.size > @encoded.length
397
+ (sec.size / Symbol.size(self)).times { syms << Symbol.decode(self, strtab) }
398
+ alreadysegs = true if @header.type == 'DYN' or @header.type == 'EXEC'
399
+ syms.each { |s|
400
+ if alreadysegs
401
+ # if we already decoded the symbols from the DYNAMIC segment,
402
+ # ignore dups and imports from this section
403
+ next if s.shndx == 'UNDEF'
404
+ next if @symbols.find { |ss| ss.name == s.name }
405
+ end
406
+ @symbols << s
407
+ decode_symbol_export(s)
408
+ }
409
+ }
410
+ end
411
+
412
+ # decode REL/RELA sections
413
+ def decode_sections_relocs
414
+ @relocations ||= []
415
+ @sections.to_a.each { |sec|
416
+ case sec.type
417
+ when 'REL'; relcls = Relocation
418
+ when 'RELA'; relcls = RelocationAddend
419
+ else next
420
+ end
421
+ startidx = @relocations.length
422
+ @encoded.ptr = sec.offset
423
+ while @encoded.ptr < sec.offset + sec.size
424
+ @relocations << relcls.decode(self)
425
+ end
426
+
427
+ # create edata relocs
428
+ tsec = @sections[sec.info]
429
+ relocproc = "arch_decode_segments_reloc_#{@header.machine.to_s.downcase}"
430
+ next if not respond_to? relocproc
431
+ new_label('pcrel')
432
+ @relocations[startidx..-1].each { |r|
433
+ o = @encoded.ptr = tsec.offset + r.offset
434
+ r = r.dup
435
+ l = new_label('pcrel')
436
+ r.offset = Expression[l]
437
+ if rel = send(relocproc, r)
438
+ @encoded.reloc[o] = rel
439
+ end
440
+ }
441
+ }
442
+ end
443
+
444
+ # decode relocation tables (REL, RELA, JMPREL) from @tags
445
+ def decode_segments_relocs
446
+ @relocations.clear
447
+ if @encoded.ptr = @tag['REL']
448
+ raise "E: ELF: unsupported rel entry size #{@tag['RELENT']}" if @tag['RELENT'] != Relocation.size(self)
449
+ p_end = @encoded.ptr + @tag['RELSZ']
450
+ while @encoded.ptr < p_end
451
+ @relocations << Relocation.decode(self)
452
+ end
453
+ end
454
+
455
+ if @encoded.ptr = @tag['RELA']
456
+ raise "E: ELF: unsupported rela entry size #{@tag['RELAENT'].inspect}" if @tag['RELAENT'] != RelocationAddend.size(self)
457
+ p_end = @encoded.ptr + @tag['RELASZ']
458
+ while @encoded.ptr < p_end
459
+ @relocations << RelocationAddend.decode(self)
460
+ end
461
+ end
462
+
463
+ if @encoded.ptr = @tag['JMPREL']
464
+ case reltype = @tag['PLTREL']
465
+ when 'REL'; relcls = Relocation
466
+ when 'RELA'; relcls = RelocationAddend
467
+ else raise "E: ELF: unsupported plt relocation type #{reltype}"
468
+ end
469
+ p_end = @encoded.ptr + @tag['PLTRELSZ']
470
+ while @encoded.ptr < p_end
471
+ @relocations << relcls.decode(self)
472
+ end
473
+ end
474
+ end
475
+
476
+ # use relocations as self.encoded.reloc
477
+ def decode_segments_relocs_interpret
478
+ relocproc = "arch_decode_segments_reloc_#{@header.machine.to_s.downcase}"
479
+ if not respond_to? relocproc
480
+ puts "W: Elf: relocs for arch #{@header.machine} unsupported" if $VERBOSE
481
+ return
482
+ end
483
+ @relocations.each { |r|
484
+ next if r.offset == 0
485
+ if not o = addr_to_off(r.offset)
486
+ puts "W: Elf: relocation in unmmaped space (#{r.inspect})" if $VERBOSE
487
+ next
488
+ end
489
+ if @encoded.reloc[o]
490
+ puts "W: Elf: not rerelocating address #{'%08X' % r.offset}" if $VERBOSE
491
+ next
492
+ end
493
+ @encoded.ptr = o
494
+ if rel = send(relocproc, r)
495
+ @encoded.reloc[o] = rel
496
+ end
497
+ }
498
+
499
+ if @header.machine == 'MIPS' and @tag['PLTGOT'] and @tag['GOTSYM'] and @tag['LOCAL_GOTNO']
500
+ puts "emulating mips PLT-like relocs" if $VERBOSE
501
+ wsz = @bitsize/8
502
+ dyntab = label_addr(@tag['PLTGOT']) - (@tag['GOTSYM'] - @tag['LOCAL_GOTNO']) * wsz
503
+ dt_o = addr_to_off(dyntab)
504
+ @symbols.each_with_index { |sym, i|
505
+ next if i < @tag['GOTSYM'] or not sym.name
506
+ r = Metasm::Relocation.new(Expression[sym.name], "u#@bitsize".to_sym, @endianness)
507
+ @encoded.reloc[dt_o + wsz*i] = r
508
+ }
509
+ end
510
+ end
511
+
512
+ # returns the Metasm::Relocation that should be applied for reloc
513
+ # self.encoded.ptr must point to the location that will be relocated (for implicit addends)
514
+ def arch_decode_segments_reloc_386(reloc)
515
+ if reloc.symbol and n = reloc.symbol.name and reloc.symbol.shndx == 'UNDEF' and @sections and
516
+ s = @sections.find { |s_| s_.name and s_.offset <= @encoded.ptr and s_.offset + s_.size > @encoded.ptr }
517
+ @encoded.add_export(new_label("#{s.name}_#{n}"), @encoded.ptr, true)
518
+ end
519
+
520
+ # decode addend if needed
521
+ case reloc.type
522
+ when 'NONE', 'COPY', 'GLOB_DAT', 'JMP_SLOT' # no addend
523
+ else addend = reloc.addend || decode_sword
524
+ end
525
+
526
+ case reloc.type
527
+ when 'NONE'
528
+ when 'RELATIVE'
529
+ # base = @segments.find_all { |s| s.type == 'LOAD' }.map { |s| s.vaddr }.min & 0xffff_f000
530
+ # compiled to be loaded at seg.vaddr
531
+ target = addend
532
+ if o = addr_to_off(target)
533
+ if not label = @encoded.inv_export[o]
534
+ label = new_label("xref_#{Expression[target]}")
535
+ @encoded.add_export label, o
536
+ end
537
+ target = label
538
+ else
539
+ puts "W: Elf: relocation pointing out of mmaped space #{reloc.inspect}" if $VERBOSE
540
+ end
541
+ when 'GLOB_DAT', 'JMP_SLOT', '32', 'PC32', 'TLS_TPOFF', 'TLS_TPOFF32'
542
+ # XXX use versionned version
543
+ # lazy jmp_slot ?
544
+ target = 0
545
+ target = reloc.symbol.name if reloc.symbol.kind_of?(Symbol) and reloc.symbol.name
546
+ target = Expression[target, :-, reloc.offset] if reloc.type == 'PC32'
547
+ target = Expression[target, :+, addend] if addend and addend != 0
548
+ target = Expression[target, :+, 'tlsoffset'] if reloc.type == 'TLS_TPOFF'
549
+ target = Expression[:-, [target, :+, 'tlsoffset']] if reloc.type == 'TLS_TPOFF32'
550
+ when 'COPY'
551
+ # mark the address pointed as a copy of the relocation target
552
+ if not reloc.symbol or not name = reloc.symbol.name
553
+ puts "W: Elf: symbol to COPY has no name: #{reloc.inspect}" if $VERBOSE
554
+ name = ''
555
+ end
556
+ name = new_label("copy_of_#{name}")
557
+ @encoded.add_export name, @encoded.ptr
558
+ target = nil
559
+ else
560
+ puts "W: Elf: unhandled 386 reloc #{reloc.inspect}" if $VERBOSE
561
+ target = nil
562
+ end
563
+
564
+ Metasm::Relocation.new(Expression[target], :u32, @endianness) if target
565
+ end
566
+
567
+ # returns the Metasm::Relocation that should be applied for reloc
568
+ # self.encoded.ptr must point to the location that will be relocated (for implicit addends)
569
+ def arch_decode_segments_reloc_mips(reloc)
570
+ if reloc.symbol and n = reloc.symbol.name and reloc.symbol.shndx == 'UNDEF' and @sections and
571
+ s = @sections.find { |s_| s_.name and s_.offset <= @encoded.ptr and s_.offset + s_.size > @encoded.ptr }
572
+ @encoded.add_export(new_label("#{s.name}_#{n}"), @encoded.ptr, true)
573
+ end
574
+
575
+ # decode addend if needed
576
+ case reloc.type
577
+ when 'NONE' # no addend
578
+ else addend = reloc.addend || decode_sword
579
+ end
580
+
581
+ case reloc.type
582
+ when 'NONE'
583
+ when '32', 'REL32'
584
+ target = 0
585
+ target = reloc.symbol.name if reloc.symbol.kind_of?(Symbol) and reloc.symbol.name
586
+ target = Expression[target, :-, reloc.offset] if reloc.type == 'REL32'
587
+ target = Expression[target, :+, addend] if addend and addend != 0
588
+ else
589
+ puts "W: Elf: unhandled MIPS reloc #{reloc.inspect}" if $VERBOSE
590
+ target = nil
591
+ end
592
+
593
+ Metasm::Relocation.new(Expression[target], :u32, @endianness) if target
594
+ end
595
+
596
+ # returns the Metasm::Relocation that should be applied for reloc
597
+ # self.encoded.ptr must point to the location that will be relocated (for implicit addends)
598
+ def arch_decode_segments_reloc_x86_64(reloc)
599
+ if reloc.symbol and n = reloc.symbol.name and reloc.symbol.shndx == 'UNDEF' and @sections and
600
+ s = @sections.find { |s_| s_.name and s_.offset <= @encoded.ptr and s_.offset + s_.size > @encoded.ptr }
601
+ @encoded.add_export(new_label("#{s.name}_#{n}"), @encoded.ptr, true)
602
+ end
603
+
604
+ # decode addend if needed
605
+ case reloc.type
606
+ when 'NONE' # no addend
607
+ when '32', 'PC32'; addend = reloc.addend || decode_sword
608
+ else addend = reloc.addend || decode_sxword
609
+ end
610
+
611
+ sz = :u64
612
+ case reloc.type
613
+ when 'NONE'
614
+ when 'RELATIVE'
615
+ # base = @segments.find_all { |s| s.type == 'LOAD' }.map { |s| s.vaddr }.min & 0xffff_f000
616
+ # compiled to be loaded at seg.vaddr
617
+ target = addend
618
+ if o = addr_to_off(target)
619
+ if not label = @encoded.inv_export[o]
620
+ label = new_label("xref_#{Expression[target]}")
621
+ @encoded.add_export label, o
622
+ end
623
+ target = label
624
+ else
625
+ puts "W: Elf: relocation pointing out of mmaped space #{reloc.inspect}" if $VERBOSE
626
+ end
627
+ when 'GLOB_DAT', 'JMP_SLOT', '64', 'PC64', '32', 'PC32'
628
+ # XXX use versionned version
629
+ # lazy jmp_slot ?
630
+ target = 0
631
+ target = reloc.symbol.name if reloc.symbol.kind_of?(Symbol) and reloc.symbol.name
632
+ target = Expression[target, :-, reloc.offset] if reloc.type == 'PC64' or reloc.type == 'PC32'
633
+ target = Expression[target, :+, addend] if addend and addend != 0
634
+ sz = :u32 if reloc.type == '32' or reloc.type == 'PC32'
635
+ when 'COPY'
636
+ # mark the address pointed as a copy of the relocation target
637
+ if not reloc.symbol or not name = reloc.symbol.name
638
+ puts "W: Elf: symbol to COPY has no name: #{reloc.inspect}" if $VERBOSE
639
+ name = ''
640
+ end
641
+ name = new_label("copy_of_#{name}")
642
+ @encoded.add_export name, @encoded.ptr
643
+ target = nil
644
+ else
645
+ puts "W: Elf: unhandled X86_64 reloc #{reloc.inspect}" if $VERBOSE
646
+ target = nil
647
+ end
648
+
649
+ Metasm::Relocation.new(Expression[target], sz, @endianness) if target
650
+ end
651
+
652
+ class DwarfDebug
653
+ # decode a DWARF2 'compilation unit'
654
+ def decode(elf, info, abbrev, str)
655
+ super(elf, info)
656
+ len = @cu_len-7 # @cu_len is size from end of @cu_len field, so we substract ptsz/tag/abroff
657
+ info.ptr += len # advance for caller
658
+ info = info[info.ptr-len, len] # we'll work on our segment
659
+ abbrev.ptr = @abbrev_off
660
+
661
+ return if abbrev.ptr >= abbrev.length or info.ptr >= info.length
662
+
663
+ idx_abbroff = {}
664
+
665
+ # returns a list of siblings at current abbrev.ptr
666
+ decode_tree = lambda { |parent|
667
+ siblings = []
668
+ loop {
669
+ info_idx = elf.decode_leb(info)
670
+ break siblings if info_idx == 0
671
+ abbrev.ptr = idx_abbroff[info_idx] if idx_abbroff[info_idx]
672
+ idx_abbroff[info_idx] ||= abbrev.ptr
673
+ n = DwarfDebug::Node.decode(elf, info, abbrev, str, idx_abbroff)
674
+ idx_abbroff[info_idx+1] ||= abbrev.ptr
675
+ siblings << n
676
+ n.children = decode_tree[n] if n.has_child == 1
677
+ n.parent = parent
678
+ break n if not parent
679
+ }
680
+ }
681
+ @tree = decode_tree[nil]
682
+ end
683
+
684
+ class Node
685
+ def decode(elf, info, abbrev, str, idx_abbroff)
686
+ super(elf, abbrev)
687
+ return if @index == 0
688
+ @attributes = []
689
+ loop {
690
+ a = Attribute.decode(elf, abbrev)
691
+ break if a.attr == 0 and a.form == 0
692
+ if a.form == 'INDIRECT' # actual form tag is stored in info
693
+ a.form = elf.decode_leb(info)
694
+ a.form = DWARF_FORM[a.form] || a.form # XXX INDIRECT again ?
695
+ end
696
+ a.data = case a.form
697
+ when 'ADDR'; elf.decode_xword(info) # should use dbg.ptr_sz
698
+ when 'DATA1', 'REF1', 'BLOCK1', 'FLAG'; elf.decode_byte(info)
699
+ when 'DATA2', 'REF2', 'BLOCK2'; elf.decode_half(info)
700
+ when 'DATA4', 'REF4', 'BLOCK4'; elf.decode_word(info)
701
+ when 'DATA8', 'REF8', 'BLOCK8'; elf.decode_word(info) | (elf.decode_word(info) << 32)
702
+ when 'SDATA', 'UDATA', 'REF_UDATA', 'BLOCK'; elf.decode_leb(info)
703
+ when 'STRING'; elf.decode_strz(info)
704
+ when 'STRP'; str.ptr = elf.decode_word(info) ; elf.decode_strz(str)
705
+ end
706
+ case a.form
707
+ when /^REF/
708
+ when /^BLOCK/; a.data = info.read(a.data)
709
+ end
710
+ @attributes << a
711
+ }
712
+ end
713
+ end
714
+ end
715
+
716
+ # decode an ULEB128 (dwarf2): read bytes while high bit is set, littleendian
717
+ def decode_leb(ed = @encoded)
718
+ v = s = 0
719
+ loop {
720
+ b = ed.read(1).unpack('C').first.to_i
721
+ v |= (b & 0x7f) << s
722
+ s += 7
723
+ break v if (b&0x80) == 0
724
+ }
725
+ end
726
+
727
+ # decodes the debugging information if available
728
+ # only a subset of DWARF2/3 is handled right now
729
+ # most info taken from http://ratonland.org/?entry=39 & libdwarf/dwarf.h
730
+ def decode_debug
731
+ return if not @sections
732
+
733
+ # assert presence of DWARF sections
734
+ info = @sections.find { |sec| sec.name == '.debug_info' }
735
+ abbrev = @sections.find { |sec| sec.name == '.debug_abbrev' }
736
+ str = @sections.find { |sec| sec.name == '.debug_str' }
737
+ return if not info or not abbrev
738
+
739
+ # section -> content
740
+ info = @encoded[info.offset, info.size]
741
+ abbrev = @encoded[abbrev.offset, abbrev.size]
742
+ str = @encoded[str.offset, str.size] if str
743
+
744
+ @debug = []
745
+
746
+ while info.ptr < info.length
747
+ @debug << DwarfDebug.decode(self, info, abbrev, str)
748
+ end
749
+ end
750
+
751
+ # decodes the ELF dynamic tags, interpret them, and decodes symbols and relocs
752
+ def decode_segments_dynamic
753
+ return if not dynamic = @segments.find { |s| s.type == 'DYNAMIC' }
754
+ @encoded.ptr = add_label('dynamic_tags', dynamic.vaddr)
755
+ decode_tags
756
+ decode_segments_tags_interpret
757
+ decode_segments_symbols
758
+ decode_segments_relocs
759
+ decode_segments_relocs_interpret
760
+ end
761
+
762
+ # decodes the dynamic segment, fills segments.encoded
763
+ def decode_segments
764
+ decode_segments_dynamic
765
+ decode_sections_symbols
766
+ #decode_debug # too many info, decode on demand
767
+ @segments.each { |s|
768
+ case s.type
769
+ when 'LOAD', 'INTERP'
770
+ sz = s.filesz
771
+ pagepad = (-(s.offset + sz)) % 4096
772
+ s.encoded = @encoded[s.offset, sz] || EncodedData.new
773
+ if s.type == 'LOAD' and sz > 0 and not s.flags.include?('W')
774
+ # align loaded data to the next page boundary for readonly mmap
775
+ # but discard the labels/relocs etc
776
+ s.encoded << @encoded[s.offset+sz, pagepad].data rescue nil
777
+ s.encoded.virtsize = sz+pagepad
778
+ end
779
+ s.encoded.virtsize = s.memsz if s.memsz > s.encoded.virtsize
780
+ end
781
+ }
782
+ end
783
+
784
+ # decodes sections, interprets symbols/relocs, fills sections.encoded
785
+ def decode_sections
786
+ decode_sections_symbols
787
+ decode_sections_relocs
788
+ @sections.each { |s|
789
+ case s.type
790
+ when 'PROGBITS', 'NOBITS'
791
+ when 'TODO' # TODO
792
+ end
793
+ }
794
+ @sections.find_all { |s| s.type == 'PROGBITS' or s.type == 'NOBITS' }.each { |s|
795
+ if s.flags.include? 'ALLOC'
796
+ if s.type == 'NOBITS'
797
+ s.encoded = EncodedData.new '', :virtsize => s.size
798
+ else
799
+ s.encoded = @encoded[s.offset, s.size] || EncodedData.new
800
+ s.encoded.virtsize = s.size
801
+ end
802
+ end
803
+ }
804
+ end
805
+
806
+ def decode_exports
807
+ decode_segments_dynamic
808
+ end
809
+
810
+ # decodes the elf header, and depending on the elf type, decode segments or sections
811
+ def decode
812
+ decode_header
813
+ case @header.type
814
+ when 'DYN', 'EXEC'; decode_segments
815
+ when 'REL'; decode_sections
816
+ when 'CORE'
817
+ end
818
+ end
819
+
820
+ def each_section
821
+ @segments.each { |s| yield s.encoded, s.vaddr if s.type == 'LOAD' }
822
+ return if @header.type != 'REL'
823
+ @sections.each { |s|
824
+ next if not s.encoded
825
+ l = new_label(s.name)
826
+ s.encoded.add_export l, 0
827
+ yield s.encoded, l
828
+ }
829
+ end
830
+
831
+ # returns a metasm CPU object corresponding to +header.machine+
832
+ def cpu_from_headers
833
+ case @header.machine
834
+ when 'X86_64'; X86_64.new
835
+ when '386'; Ia32.new
836
+ when 'MIPS'; MIPS.new @endianness
837
+ when 'PPC'; PPC.new
838
+ when 'ARM'; ARM.new
839
+ else raise "unsupported cpu #{@header.machine}"
840
+ end
841
+ end
842
+
843
+ # returns an array including the ELF entrypoint (if not null) and the FUNC symbols addresses
844
+ # TODO include init/init_array
845
+ def get_default_entrypoints
846
+ ep = []
847
+ ep << @header.entry if @header.entry != 0
848
+ @symbols.each { |s|
849
+ ep << s.value if s.shndx != 'UNDEF' and s.type == 'FUNC'
850
+ } if @symbols
851
+ ep
852
+ end
853
+
854
+ def dump_section_header(addr, edata)
855
+ if s = @segments.find { |s_| s_.vaddr == addr }
856
+ "\n// ELF segment at #{Expression[addr]}, flags = #{s.flags.sort.join(', ')}"
857
+ else super(addr, edata)
858
+ end
859
+ end
860
+
861
+ # returns a disassembler with a special decodedfunction for dlsym, __libc_start_main, and a default function (i386 only)
862
+ def init_disassembler
863
+ d = super()
864
+ d.backtrace_maxblocks_data = 4
865
+ if d.get_section_at(0)
866
+ # fixes call [constructor] => 0
867
+ d.decoded[0] = true
868
+ d.function[0] = @cpu.disassembler_default_func
869
+ end
870
+ case @cpu.shortname
871
+ when 'ia32', 'x64'
872
+ old_cp = d.c_parser
873
+ d.c_parser = nil
874
+ d.parse_c <<EOC
875
+ void *dlsym(int, char *); // has special callback
876
+ // gcc's entrypoint, need pointers to reach main exe code (last callback)
877
+ void __libc_start_main(void(*)(), int, int, void(*)(), void(*)()) __attribute__((noreturn));
878
+ // standard noreturn, optimized by gcc
879
+ void __attribute__((noreturn)) exit(int);
880
+ void _exit __attribute__((noreturn))(int);
881
+ void abort(void) __attribute__((noreturn));
882
+ void __stack_chk_fail __attribute__((noreturn))(void);
883
+ EOC
884
+ d.function[Expression['dlsym']] = dls = @cpu.decode_c_function_prototype(d.c_parser, 'dlsym')
885
+ d.function[Expression['__libc_start_main']] = @cpu.decode_c_function_prototype(d.c_parser, '__libc_start_main')
886
+ d.function[Expression['exit']] = @cpu.decode_c_function_prototype(d.c_parser, 'exit')
887
+ d.function[Expression['_exit']] = @cpu.decode_c_function_prototype(d.c_parser, '_exit')
888
+ d.function[Expression['abort']] = @cpu.decode_c_function_prototype(d.c_parser, 'abort')
889
+ d.function[Expression['__stack_chk_fail']] = @cpu.decode_c_function_prototype(d.c_parser, '__stack_chk_fail')
890
+ d.c_parser = old_cp
891
+ dls.btbind_callback = lambda { |dasm, bind, funcaddr, calladdr, expr, origin, maxdepth|
892
+ sz = @cpu.size/8
893
+ raise 'dlsym call error' if not dasm.decoded[calladdr]
894
+ if @cpu.shortname == 'x64'
895
+ arg2 = :rsi
896
+ else
897
+ arg2 = Indirection.new(Expression[:esp, :+, 2*sz], sz, calladdr)
898
+ end
899
+ fnaddr = dasm.backtrace(arg2, calladdr, :include_start => true, :maxdepth => maxdepth)
900
+ if fnaddr.kind_of? ::Array and fnaddr.length == 1 and s = dasm.get_section_at(fnaddr.first) and fn = s[0].read(64) and i = fn.index(?\0) and i > sz # try to avoid ordinals
901
+ bind = bind.merge @cpu.register_symbols[0] => Expression[fn[0, i]]
902
+ end
903
+ bind
904
+ }
905
+ df = d.function[:default] = @cpu.disassembler_default_func
906
+ df.backtrace_binding[@cpu.register_symbols[4]] = Expression[@cpu.register_symbols[4], :+, @cpu.size/8]
907
+ df.btbind_callback = nil
908
+ when 'mips'
909
+ (d.address_binding[@header.entry] ||= {})[:$t9] ||= Expression[@header.entry]
910
+ @symbols.each { |s|
911
+ next if s.shndx == 'UNDEF' or s.type != 'FUNC'
912
+ (d.address_binding[s.value] ||= {})[:$t9] ||= Expression[s.value]
913
+ }
914
+ d.function[:default] = @cpu.disassembler_default_func
915
+ end
916
+ d
917
+ end
918
+
919
+ # returns an array of [name, addr, length, info]
920
+ def section_info
921
+ if @sections
922
+ @sections[1..-1].map { |s|
923
+ [s.name, s.addr, s.size, s.flags.join(',')]
924
+ }
925
+ else
926
+ @segments.map { |s|
927
+ [nil, s.vaddr, s.memsz, s.flags.join(',')]
928
+ }
929
+ end
930
+ end
931
+
932
+ def module_name
933
+ @tag and @tag['SONAME']
934
+ end
935
+
936
+ def module_address
937
+ @segments.map { |s_| s_.vaddr if s_.type == 'LOAD' }.compact.min || 0
938
+ end
939
+
940
+ def module_size
941
+ return 0 if not s = @segments.to_a.reverse.map { |s_| s_.vaddr + s_.memsz if s_.type == 'LOAD' }.compact.max
942
+ s - module_address
943
+ end
944
+
945
+ def module_symbols
946
+ syms = []
947
+ m_addr = module_address
948
+ syms << ['entrypoint', @header.entry-m_addr] if @header.entry != 0 or @header.type == 'EXEC'
949
+ @symbols.each { |s|
950
+ next if not s.name or s.shndx == 'UNDEF'
951
+ pfx = %w[LOCAL WEAK].include?(s.bind) ? s.bind.downcase + '_' : ''
952
+ syms << [pfx+s.name, s.value-m_addr, s.size]
953
+ }
954
+ syms
955
+ end
956
+ end
957
+
958
+ class LoadedELF
959
+ # decodes the dynamic segment, fills segments.encoded
960
+ def decode_segments
961
+ if @load_address == 0 and @segments.find { |s| s.type == 'LOAD' and s.vaddr > @encoded.length }
962
+ @load_address = @segments.find_all { |s| s.type == 'LOAD' }.map { |s| s.vaddr }.min
963
+ end
964
+ decode_segments_dynamic
965
+ @segments.each { |s|
966
+ if s.type == 'LOAD'
967
+ s.encoded = @encoded[addr_to_off(s.vaddr), s.memsz]
968
+ end
969
+ }
970
+ end
971
+
972
+ # do not try to decode the section header by default
973
+ def decode_header(off = 0)
974
+ @encoded.ptr = off
975
+ @header.decode self
976
+ decode_program_header(@header.phoff+off)
977
+ end
978
+ end
979
+ end