metasm 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (192) hide show
  1. data/BUGS +11 -0
  2. data/CREDITS +17 -0
  3. data/README +270 -0
  4. data/TODO +114 -0
  5. data/doc/code_organisation.txt +146 -0
  6. data/doc/const_missing.txt +16 -0
  7. data/doc/core_classes.txt +75 -0
  8. data/doc/feature_list.txt +53 -0
  9. data/doc/index.txt +59 -0
  10. data/doc/install_notes.txt +170 -0
  11. data/doc/style.css +3 -0
  12. data/doc/use_cases.txt +18 -0
  13. data/lib/metasm.rb +80 -0
  14. data/lib/metasm/arm.rb +12 -0
  15. data/lib/metasm/arm/debug.rb +39 -0
  16. data/lib/metasm/arm/decode.rb +167 -0
  17. data/lib/metasm/arm/encode.rb +77 -0
  18. data/lib/metasm/arm/main.rb +75 -0
  19. data/lib/metasm/arm/opcodes.rb +177 -0
  20. data/lib/metasm/arm/parse.rb +130 -0
  21. data/lib/metasm/arm/render.rb +55 -0
  22. data/lib/metasm/compile_c.rb +1457 -0
  23. data/lib/metasm/dalvik.rb +8 -0
  24. data/lib/metasm/dalvik/decode.rb +196 -0
  25. data/lib/metasm/dalvik/main.rb +60 -0
  26. data/lib/metasm/dalvik/opcodes.rb +366 -0
  27. data/lib/metasm/decode.rb +213 -0
  28. data/lib/metasm/decompile.rb +2659 -0
  29. data/lib/metasm/disassemble.rb +2068 -0
  30. data/lib/metasm/disassemble_api.rb +1280 -0
  31. data/lib/metasm/dynldr.rb +1329 -0
  32. data/lib/metasm/encode.rb +333 -0
  33. data/lib/metasm/exe_format/a_out.rb +194 -0
  34. data/lib/metasm/exe_format/autoexe.rb +82 -0
  35. data/lib/metasm/exe_format/bflt.rb +189 -0
  36. data/lib/metasm/exe_format/coff.rb +455 -0
  37. data/lib/metasm/exe_format/coff_decode.rb +901 -0
  38. data/lib/metasm/exe_format/coff_encode.rb +1078 -0
  39. data/lib/metasm/exe_format/dex.rb +457 -0
  40. data/lib/metasm/exe_format/dol.rb +145 -0
  41. data/lib/metasm/exe_format/elf.rb +923 -0
  42. data/lib/metasm/exe_format/elf_decode.rb +979 -0
  43. data/lib/metasm/exe_format/elf_encode.rb +1375 -0
  44. data/lib/metasm/exe_format/macho.rb +827 -0
  45. data/lib/metasm/exe_format/main.rb +228 -0
  46. data/lib/metasm/exe_format/mz.rb +164 -0
  47. data/lib/metasm/exe_format/nds.rb +172 -0
  48. data/lib/metasm/exe_format/pe.rb +437 -0
  49. data/lib/metasm/exe_format/serialstruct.rb +246 -0
  50. data/lib/metasm/exe_format/shellcode.rb +114 -0
  51. data/lib/metasm/exe_format/xcoff.rb +167 -0
  52. data/lib/metasm/gui.rb +23 -0
  53. data/lib/metasm/gui/cstruct.rb +373 -0
  54. data/lib/metasm/gui/dasm_coverage.rb +199 -0
  55. data/lib/metasm/gui/dasm_decomp.rb +369 -0
  56. data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
  57. data/lib/metasm/gui/dasm_graph.rb +1354 -0
  58. data/lib/metasm/gui/dasm_hex.rb +543 -0
  59. data/lib/metasm/gui/dasm_listing.rb +599 -0
  60. data/lib/metasm/gui/dasm_main.rb +906 -0
  61. data/lib/metasm/gui/dasm_opcodes.rb +291 -0
  62. data/lib/metasm/gui/debug.rb +1228 -0
  63. data/lib/metasm/gui/gtk.rb +884 -0
  64. data/lib/metasm/gui/qt.rb +495 -0
  65. data/lib/metasm/gui/win32.rb +3004 -0
  66. data/lib/metasm/gui/x11.rb +621 -0
  67. data/lib/metasm/ia32.rb +14 -0
  68. data/lib/metasm/ia32/compile_c.rb +1523 -0
  69. data/lib/metasm/ia32/debug.rb +193 -0
  70. data/lib/metasm/ia32/decode.rb +1167 -0
  71. data/lib/metasm/ia32/decompile.rb +564 -0
  72. data/lib/metasm/ia32/encode.rb +314 -0
  73. data/lib/metasm/ia32/main.rb +233 -0
  74. data/lib/metasm/ia32/opcodes.rb +872 -0
  75. data/lib/metasm/ia32/parse.rb +327 -0
  76. data/lib/metasm/ia32/render.rb +91 -0
  77. data/lib/metasm/main.rb +1193 -0
  78. data/lib/metasm/mips.rb +11 -0
  79. data/lib/metasm/mips/compile_c.rb +7 -0
  80. data/lib/metasm/mips/decode.rb +253 -0
  81. data/lib/metasm/mips/encode.rb +51 -0
  82. data/lib/metasm/mips/main.rb +72 -0
  83. data/lib/metasm/mips/opcodes.rb +443 -0
  84. data/lib/metasm/mips/parse.rb +51 -0
  85. data/lib/metasm/mips/render.rb +43 -0
  86. data/lib/metasm/os/gnu_exports.rb +270 -0
  87. data/lib/metasm/os/linux.rb +1112 -0
  88. data/lib/metasm/os/main.rb +1686 -0
  89. data/lib/metasm/os/remote.rb +527 -0
  90. data/lib/metasm/os/windows.rb +2027 -0
  91. data/lib/metasm/os/windows_exports.rb +745 -0
  92. data/lib/metasm/parse.rb +876 -0
  93. data/lib/metasm/parse_c.rb +3938 -0
  94. data/lib/metasm/pic16c/decode.rb +42 -0
  95. data/lib/metasm/pic16c/main.rb +17 -0
  96. data/lib/metasm/pic16c/opcodes.rb +68 -0
  97. data/lib/metasm/ppc.rb +11 -0
  98. data/lib/metasm/ppc/decode.rb +264 -0
  99. data/lib/metasm/ppc/decompile.rb +251 -0
  100. data/lib/metasm/ppc/encode.rb +51 -0
  101. data/lib/metasm/ppc/main.rb +129 -0
  102. data/lib/metasm/ppc/opcodes.rb +410 -0
  103. data/lib/metasm/ppc/parse.rb +52 -0
  104. data/lib/metasm/preprocessor.rb +1277 -0
  105. data/lib/metasm/render.rb +130 -0
  106. data/lib/metasm/sh4.rb +8 -0
  107. data/lib/metasm/sh4/decode.rb +336 -0
  108. data/lib/metasm/sh4/main.rb +292 -0
  109. data/lib/metasm/sh4/opcodes.rb +381 -0
  110. data/lib/metasm/x86_64.rb +12 -0
  111. data/lib/metasm/x86_64/compile_c.rb +1025 -0
  112. data/lib/metasm/x86_64/debug.rb +59 -0
  113. data/lib/metasm/x86_64/decode.rb +268 -0
  114. data/lib/metasm/x86_64/encode.rb +264 -0
  115. data/lib/metasm/x86_64/main.rb +135 -0
  116. data/lib/metasm/x86_64/opcodes.rb +118 -0
  117. data/lib/metasm/x86_64/parse.rb +68 -0
  118. data/misc/bottleneck.rb +61 -0
  119. data/misc/cheader-findpppath.rb +58 -0
  120. data/misc/hexdiff.rb +74 -0
  121. data/misc/hexdump.rb +55 -0
  122. data/misc/metasm-all.rb +13 -0
  123. data/misc/objdiff.rb +47 -0
  124. data/misc/objscan.rb +40 -0
  125. data/misc/pdfparse.rb +661 -0
  126. data/misc/ppc_pdf2oplist.rb +192 -0
  127. data/misc/tcp_proxy_hex.rb +84 -0
  128. data/misc/txt2html.rb +440 -0
  129. data/samples/a.out.rb +31 -0
  130. data/samples/asmsyntax.rb +77 -0
  131. data/samples/bindiff.rb +555 -0
  132. data/samples/compilation-steps.rb +49 -0
  133. data/samples/cparser_makestackoffset.rb +55 -0
  134. data/samples/dasm-backtrack.rb +38 -0
  135. data/samples/dasmnavig.rb +318 -0
  136. data/samples/dbg-apihook.rb +228 -0
  137. data/samples/dbghelp.rb +143 -0
  138. data/samples/disassemble-gui.rb +102 -0
  139. data/samples/disassemble.rb +133 -0
  140. data/samples/dump_upx.rb +95 -0
  141. data/samples/dynamic_ruby.rb +1929 -0
  142. data/samples/elf_list_needed.rb +46 -0
  143. data/samples/elf_listexports.rb +33 -0
  144. data/samples/elfencode.rb +25 -0
  145. data/samples/exeencode.rb +128 -0
  146. data/samples/factorize-headers-elfimports.rb +77 -0
  147. data/samples/factorize-headers-peimports.rb +109 -0
  148. data/samples/factorize-headers.rb +43 -0
  149. data/samples/gdbclient.rb +583 -0
  150. data/samples/generate_libsigs.rb +102 -0
  151. data/samples/hotfix_gtk_dbg.rb +59 -0
  152. data/samples/install_win_env.rb +78 -0
  153. data/samples/lindebug.rb +924 -0
  154. data/samples/linux_injectsyscall.rb +95 -0
  155. data/samples/machoencode.rb +31 -0
  156. data/samples/metasm-shell.rb +91 -0
  157. data/samples/pe-hook.rb +69 -0
  158. data/samples/pe-ia32-cpuid.rb +203 -0
  159. data/samples/pe-mips.rb +35 -0
  160. data/samples/pe-shutdown.rb +78 -0
  161. data/samples/pe-testrelocs.rb +51 -0
  162. data/samples/pe-testrsrc.rb +24 -0
  163. data/samples/pe_listexports.rb +31 -0
  164. data/samples/peencode.rb +19 -0
  165. data/samples/peldr.rb +494 -0
  166. data/samples/preprocess-flatten.rb +19 -0
  167. data/samples/r0trace.rb +308 -0
  168. data/samples/rubstop.rb +399 -0
  169. data/samples/scan_pt_gnu_stack.rb +54 -0
  170. data/samples/scanpeexports.rb +62 -0
  171. data/samples/shellcode-c.rb +40 -0
  172. data/samples/shellcode-dynlink.rb +146 -0
  173. data/samples/source.asm +34 -0
  174. data/samples/struct_offset.rb +47 -0
  175. data/samples/testpe.rb +32 -0
  176. data/samples/testraw.rb +45 -0
  177. data/samples/win32genloader.rb +132 -0
  178. data/samples/win32hooker-advanced.rb +169 -0
  179. data/samples/win32hooker.rb +96 -0
  180. data/samples/win32livedasm.rb +33 -0
  181. data/samples/win32remotescan.rb +133 -0
  182. data/samples/wintrace.rb +92 -0
  183. data/tests/all.rb +8 -0
  184. data/tests/dasm.rb +39 -0
  185. data/tests/dynldr.rb +35 -0
  186. data/tests/encodeddata.rb +132 -0
  187. data/tests/ia32.rb +82 -0
  188. data/tests/mips.rb +116 -0
  189. data/tests/parse_c.rb +239 -0
  190. data/tests/preprocessor.rb +269 -0
  191. data/tests/x86_64.rb +62 -0
  192. metadata +255 -0
@@ -0,0 +1,82 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+ require 'metasm/exe_format/main'
7
+
8
+ module Metasm
9
+ # special class that decodes a PE, ELF, MachO or UnivBinary file from its signature
10
+ # XXX UnivBinary is not a real ExeFormat, just a container..
11
+ class AutoExe < ExeFormat
12
+ class UnknownSignature < InvalidExeFormat ; end
13
+
14
+ # actually calls autoexe_load for the detected filetype from #execlass_from_signature
15
+ def self.load(str, *a, &b)
16
+ s = str
17
+ s = str.data if s.kind_of? EncodedData
18
+ execlass_from_signature(s).autoexe_load(str, *a, &b)
19
+ end
20
+
21
+ # match the actual exe class from the raw file inspection using the registered signature list
22
+ # calls #unknown_signature if nothing matches
23
+ def self.execlass_from_signature(raw)
24
+ m = @signatures.find { |sig, exe|
25
+ case sig
26
+ when String; raw[0, sig.length] == sig
27
+ when Proc; sig[raw]
28
+ end
29
+ }
30
+ e = m ? m[1] : unknown_signature(raw)
31
+ case e
32
+ when String; Metasm.const_get(e)
33
+ when Proc; e.call
34
+ else e
35
+ end
36
+ end
37
+
38
+ # register a new binary file signature
39
+ def self.register_signature(sig, exe=nil, &b)
40
+ (@signatures ||= []) << [sig, exe || b]
41
+ end
42
+
43
+ def self.init_signatures(sig=[])
44
+ @signatures = sig
45
+ end
46
+
47
+ # this function is called when no signature matches
48
+ def self.unknown_signature(raw)
49
+ raise UnknownSignature, "unrecognized executable file format #{raw[0, 4].unpack('H*').first.inspect}"
50
+ end
51
+
52
+ # raw signature copies (avoid triggering exefmt autorequire)
53
+ init_signatures
54
+ register_signature("\x7fELF") { ELF }
55
+ register_signature(lambda { |raw| raw[0, 2] == "MZ" and off = raw[0x3c, 4].to_s.unpack('V')[0] and off < raw.length and raw[off, 4] == "PE\0\0" }) { PE }
56
+ %w[feedface cefaedfe feedfacf cffaedfe].each { |sig| register_signature([sig].pack('H*')) { MachO } }
57
+ register_signature("\xca\xfe\xba\xbe") { UniversalBinary }
58
+ register_signature("dex\n") { DEX }
59
+ register_signature("dey\n") { DEY }
60
+ register_signature("\xfa\x70\x0e\x1f") { FatELF }
61
+ register_signature('Metasm.dasm') { Disassembler }
62
+
63
+ # replacement for AutoExe where #load defaults to a Shellcode of the specified CPU
64
+ def self.orshellcode(cpu=nil, &b)
65
+ # here we create an anonymous subclass of AutoExe whose #unknown_sig is patched to return a Shellcode instead of raise()ing
66
+ c = ::Class.new(self)
67
+ # yeeehaa
68
+ class << c ; self ; end.send(:define_method, :unknown_signature) { |raw|
69
+ Shellcode.withcpu(cpu || b[raw])
70
+ }
71
+ c.init_signatures @signatures
72
+ c
73
+ end
74
+ end
75
+
76
+ # special class that decodes a LoadedPE or LoadedELF from its signature (used to read memory-mapped binaries)
77
+ class LoadedAutoExe < AutoExe
78
+ init_signatures
79
+ register_signature("\x7fELF") { LoadedELF }
80
+ register_signature(lambda { |raw| raw[0, 2] == "MZ" and off = raw[0x3c, 4].to_s.unpack('V')[0] and off < raw.length and raw[off, 4] == "PE\0\0" }) { LoadedPE }
81
+ end
82
+ end
@@ -0,0 +1,189 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+ require 'metasm/exe_format/main'
7
+ require 'metasm/encode'
8
+ require 'metasm/decode'
9
+
10
+ module Metasm
11
+ # BFLT is the binary flat format used by the uClinux
12
+ class Bflt < ExeFormat
13
+ MAGIC = 'bFLT'
14
+ FLAGS = { 1 => 'RAM', 2 => 'GOTPIC', 4 => 'GZIP' }
15
+
16
+ attr_accessor :header, :text, :data, :reloc, :got
17
+
18
+ class Header < SerialStruct
19
+ mem :magic, 4
20
+ words :rev, :entry, :data_start, :data_end, :bss_end, :stack_size,
21
+ :reloc_start, :reloc_count, :flags
22
+ mem :pad, 6*4
23
+ fld_bits(:flags, FLAGS)
24
+
25
+ def decode(exe)
26
+ super(exe)
27
+
28
+ case @magic
29
+ when MAGIC
30
+ else raise InvalidExeFormat, "Bad bFLT signature #@magic"
31
+ end
32
+ end
33
+
34
+ def set_default_values(exe)
35
+ @magic ||= MAGIC
36
+ @rev ||= 4
37
+ @entry ||= 0x40
38
+ @data_start ||= @entry + exe.text.length if exe.text
39
+ @data_end ||= @data_start + exe.data.data.length if exe.data
40
+ @bss_end ||= @data_start + exe.data.length if exe.data
41
+ @stack_size ||= 0x1000
42
+ @reloc_start ||= @data_end
43
+ @reloc_count ||= exe.reloc.length
44
+ @flags ||= []
45
+
46
+ super(exe)
47
+ end
48
+ end
49
+
50
+ def decode_word(edata = @encoded) edata.decode_imm(:u32, @endianness) end
51
+ def encode_word(w) Expression[w].encode(:u32, @endianness) end
52
+
53
+ def initialize(cpu = nil)
54
+ @endianness = cpu ? cpu.endianness : :little
55
+ @header = Header.new
56
+ @text = EncodedData.new
57
+ @data = EncodedData.new
58
+ super(cpu)
59
+ end
60
+
61
+ def decode_header
62
+ @encoded.ptr = 0
63
+ @header.decode(self)
64
+ end
65
+
66
+ def decode
67
+ decode_header
68
+
69
+ @encoded.ptr = @header.entry
70
+ @text = EncodedData.new << @encoded.read(@header.data_start - @header.entry)
71
+ @data = EncodedData.new << @encoded.read(@header.data_end - @header.data_start)
72
+ @data.virtsize += (@header.bss_end - @header.data_end)
73
+
74
+ if @header.flags.include? 'GZIP'
75
+ # TODO gzip
76
+ raise 'bFLT decoder: gzip format not supported'
77
+ end
78
+
79
+ @reloc = []
80
+ @encoded.ptr = @header.reloc_start
81
+ @header.reloc_count.times { @reloc << decode_word }
82
+ if @header.version == 2
83
+ @reloc.map! { |r| r & 0x3fff_ffff }
84
+ end
85
+
86
+ decode_interpret_relocs
87
+ end
88
+
89
+ def decode_interpret_relocs
90
+ @reloc.each { |r|
91
+ # where the reloc is
92
+ if r >= @header.entry and r < @header.data_start
93
+ section = @text
94
+ base = @header.entry
95
+ elsif r >= @header.data_start and r < @header.data_end
96
+ section = @data
97
+ base = @header.data_start
98
+ else
99
+ puts "out of bounds reloc at #{Expression[r]}" if $VERBOSE
100
+ next
101
+ end
102
+
103
+ # what it points to
104
+ section.ptr = r-base
105
+ target = decode_word(section)
106
+ if target >= @header.entry and target < @header.data_start
107
+ target = label_at(@text, target - @header.entry, "xref_#{Expression[target]}")
108
+ elsif target >= @header.data_start and target < @header.bss_end
109
+ target = label_at(@data, target - @header.data_start, "xref_#{Expression[target]}")
110
+ else
111
+ puts "out of bounds reloc target at #{Expression[r]}" if $VERBOSE
112
+ next
113
+ end
114
+
115
+ @text.reloc[r-base] = Relocation.new(Expression[target], :u32, @endianness)
116
+ }
117
+ end
118
+
119
+ def encode
120
+ create_relocation_table
121
+
122
+ # TODO got, gzip
123
+ if @header.flags.include? 'GZIP'
124
+ puts "W: bFLT: clearing gzip flag" if $VERBOSE
125
+ @header.flags.delete 'GZIP'
126
+ end
127
+
128
+ @encoded = EncodedData.new
129
+ @encoded << @header.encode(self)
130
+
131
+ binding = @text.binding(@header.entry).merge(@data.binding(@header.data_start))
132
+ @encoded << @text << @data.data
133
+ @encoded.fixup! binding
134
+ @encoded.reloc.clear
135
+
136
+ @relocs.each { |r| @encoded << encode_word(r) }
137
+
138
+ @encoded.data
139
+ end
140
+
141
+ def create_relocation_table
142
+ @reloc = []
143
+ mapaddr = new_label('mapaddr')
144
+ binding = @text.binding(mapaddr).merge(@data.binding(mapaddr))
145
+ [@text, @data].each { |section|
146
+ base = @header.entry || 0x40
147
+ base = @header.data_start || base+@text.length if section == @data
148
+ section.reloc.each { |o, r|
149
+ if r.endianness == @endianness and [:u32, :a32, :i32].include? r.type and
150
+ Expression[r.target.bind(binding), :-, mapaddr].reduce.kind_of? ::Integer
151
+ @reloc << (base+o)
152
+ else
153
+ puts "bFLT: ignoring unsupported reloc #{r.inspect} at #{Expression[o]}" if $VERBOSE
154
+ end
155
+ }
156
+ }
157
+ end
158
+
159
+ def parse_init
160
+ @textsrc ||= []
161
+ @datasrc ||= []
162
+ @cursource ||= @textsrc
163
+ super()
164
+ end
165
+
166
+ def parse_parser_instruction(instr)
167
+ case instr.raw.downcase
168
+ when '.text'; @cursource = @textsrc
169
+ when '.data'; @cursource = @datasrc
170
+ # entrypoint is the 1st byte of .text
171
+ else super(instr)
172
+ end
173
+ end
174
+
175
+ def assemble(*a)
176
+ parse(*a) if not a.empty?
177
+ @text << assemble_sequence(@textsrc, @cpu)
178
+ @textsrc.clear
179
+ @data << assemble_sequence(@datasrc, @cpu)
180
+ @datasrc.clear
181
+ self
182
+ end
183
+
184
+ def each_section
185
+ yield @text, @header.entry
186
+ yield @data, @header.data_start
187
+ end
188
+ end
189
+ end
@@ -0,0 +1,455 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/exe_format/main'
8
+
9
+ module Metasm
10
+ # the COFF object file format
11
+ # mostly used on windows (PE/COFF)
12
+ class COFF < ExeFormat
13
+ CHARACTERISTIC_BITS = {
14
+ 0x0001 => 'RELOCS_STRIPPED', 0x0002 => 'EXECUTABLE_IMAGE',
15
+ 0x0004 => 'LINE_NUMS_STRIPPED', 0x0008 => 'LOCAL_SYMS_STRIPPED',
16
+ 0x0010 => 'AGGRESSIVE_WS_TRIM', 0x0020 => 'LARGE_ADDRESS_AWARE',
17
+ 0x0040 => 'x16BIT_MACHINE', 0x0080 => 'BYTES_REVERSED_LO',
18
+ 0x0100 => 'x32BIT_MACHINE', 0x0200 => 'DEBUG_STRIPPED',
19
+ 0x0400 => 'REMOVABLE_RUN_FROM_SWAP', 0x0800 => 'NET_RUN_FROM_SWAP',
20
+ 0x1000 => 'SYSTEM', 0x2000 => 'DLL',
21
+ 0x4000 => 'UP_SYSTEM_ONLY', 0x8000 => 'BYTES_REVERSED_HI'
22
+ }
23
+
24
+ MACHINE = {
25
+ 0x0 => 'UNKNOWN', 0x184 => 'ALPHA', 0x1c0 => 'ARM',
26
+ 0x1d3 => 'AM33', 0x8664=> 'AMD64', 0xebc => 'EBC',
27
+ 0x9041=> 'M32R', 0x1f1 => 'POWERPCFP',
28
+ 0x284 => 'ALPHA64', 0x14c => 'I386', 0x200 => 'IA64',
29
+ 0x268 => 'M68K', 0x266 => 'MIPS16', 0x366 => 'MIPSFPU',
30
+ 0x466 => 'MIPSFPU16', 0x1f0 => 'POWERPC', 0x162 => 'R3000',
31
+ 0x166 => 'R4000', 0x168 => 'R10000', 0x1a2 => 'SH3',
32
+ 0x1a3 => 'SH3DSP', 0x1a6 => 'SH4', 0x1a8 => 'SH5',
33
+ 0x1c2 => 'THUMB', 0x169 => 'WCEMIPSV2'
34
+ }
35
+
36
+ # PE+ is for 64bits address spaces
37
+ SIGNATURE = { 0x10b => 'PE', 0x20b => 'PE+', 0x107 => 'ROM' }
38
+
39
+ SUBSYSTEM = {
40
+ 0 => 'UNKNOWN', 1 => 'NATIVE', 2 => 'WINDOWS_GUI',
41
+ 3 => 'WINDOWS_CUI', 5 => 'OS/2_CUI', 7 => 'POSIX_CUI',
42
+ 8 => 'WIN9X_DRIVER', 9 => 'WINDOWS_CE_GUI',
43
+ 10 => 'EFI_APPLICATION',
44
+ 11 => 'EFI_BOOT_SERVICE_DRIVER', 12 => 'EFI_RUNTIME_DRIVER',
45
+ 13 => 'EFI_ROM', 14 => 'XBOX'
46
+ }
47
+
48
+ DLL_CHARACTERISTIC_BITS = {
49
+ 0x40 => 'DYNAMIC_BASE', 0x80 => 'FORCE_INTEGRITY', 0x100 => 'NX_COMPAT',
50
+ 0x200 => 'NO_ISOLATION', 0x400 => 'NO_SEH', 0x800 => 'NO_BIND',
51
+ 0x2000 => 'WDM_DRIVER', 0x8000 => 'TERMINAL_SERVER_AWARE'
52
+ }
53
+
54
+ BASE_RELOCATION_TYPE = { 0 => 'ABSOLUTE', 1 => 'HIGH', 2 => 'LOW', 3 => 'HIGHLOW',
55
+ 4 => 'HIGHADJ', 5 => 'MIPS_JMPADDR', 9 => 'MIPS_JMPADDR16', 10 => 'DIR64'
56
+ }
57
+
58
+ RELOCATION_TYPE = Hash.new({}).merge(
59
+ 'AMD64' => { 0 => 'ABSOLUTE', 1 => 'ADDR64', 2 => 'ADDR32', 3 => 'ADDR32NB',
60
+ 4 => 'REL32', 5 => 'REL32_1', 6 => 'REL32_2', 7 => 'REL32_3',
61
+ 8 => 'REL32_4', 9 => 'REL32_5', 10 => 'SECTION', 11 => 'SECREL',
62
+ 12 => 'SECREL7', 13 => 'TOKEN', 14 => 'SREL32', 15 => 'PAIR',
63
+ 16 => 'SSPAN32' },
64
+ 'ARM' => { 0 => 'ABSOLUTE', 1 => 'ADDR32', 2 => 'ADDR32NB', 3 => 'BRANCH24',
65
+ 4 => 'BRANCH11', 14 => 'SECTION', 15 => 'SECREL' },
66
+ 'I386' => { 0 => 'ABSOLUTE', 1 => 'DIR16', 2 => 'REL16', 6 => 'DIR32',
67
+ 7 => 'DIR32NB', 9 => 'SEG12', 10 => 'SECTION', 11 => 'SECREL',
68
+ 12 => 'TOKEN', 13 => 'SECREL7', 20 => 'REL32' }
69
+ )
70
+
71
+ # lsb of symbol type, unused
72
+ SYMBOL_BTYPE = { 0 => 'NULL', 1 => 'VOID', 2 => 'CHAR', 3 => 'SHORT',
73
+ 4 => 'INT', 5 => 'LONG', 6 => 'FLOAT', 7 => 'DOUBLE', 8 => 'STRUCT',
74
+ 9 => 'UNION', 10 => 'ENUM', 11 => 'MOE', 12 => 'BYTE', 13 => 'WORD',
75
+ 14 => 'UINT', 15 => 'DWORD'}
76
+ SYMBOL_TYPE = { 0 => 'NULL', 1 => 'POINTER', 2 => 'FUNCTION', 3 => 'ARRAY' }
77
+ SYMBOL_SECTION = { 0 => 'UNDEF', 0xffff => 'ABS', 0xfffe => 'DEBUG' }
78
+ SYMBOL_STORAGE = { 0xff => 'EOF', 0 => 'NULL', 1 => 'AUTO', 2 => 'EXTERNAL',
79
+ 3 => 'STATIC', 4 => 'REGISTER', 5 => 'EXT_DEF', 6 => 'LABEL',
80
+ 7 => 'UNDEF_LABEL', 8 => 'STRUCT_MEMBER', 9 => 'ARGUMENT', 10 => 'STRUCT_TAG',
81
+ 11 => 'UNION_MEMBER', 12 => 'UNION_TAG', 13 => 'TYPEDEF', 14 => 'UNDEF_STATIC',
82
+ 15 => 'ENUM_TAG', 16 => 'ENUM_MEMBER', 17 => 'REG_PARAM', 18 => 'BIT_FIELD',
83
+ 100 => 'BLOCK', 101 => 'FUNCTION', 102 => 'END_STRUCT',
84
+ 103 => 'FILE', 104 => 'SECTION', 105 => 'WEAK_EXT',
85
+ }
86
+
87
+ DEBUG_TYPE = { 0 => 'UNKNOWN', 1 => 'COFF', 2 => 'CODEVIEW', 3 => 'FPO', 4 => 'MISC',
88
+ 5 => 'EXCEPTION', 6 => 'FIXUP', 7 => 'OMAP_TO_SRC', 8 => 'OMAP_FROM_SRC',
89
+ 9 => 'BORLAND', 10 => 'RESERVED10', 11 => 'CLSID' }
90
+
91
+ DIRECTORIES = %w[export_table import_table resource_table exception_table certificate_table
92
+ base_relocation_table debug architecture global_ptr tls_table load_config
93
+ bound_import iat delay_import com_runtime reserved]
94
+
95
+ SECTION_CHARACTERISTIC_BITS = {
96
+ 0x20 => 'CONTAINS_CODE', 0x40 => 'CONTAINS_DATA', 0x80 => 'CONTAINS_UDATA',
97
+ 0x100 => 'LNK_OTHER', 0x200 => 'LNK_INFO', 0x800 => 'LNK_REMOVE',
98
+ 0x1000 => 'LNK_COMDAT', 0x8000 => 'GPREL',
99
+ 0x20000 => 'MEM_PURGEABLE|16BIT', 0x40000 => 'MEM_LOCKED', 0x80000 => 'MEM_PRELOAD',
100
+ 0x100000 => 'ALIGN_1BYTES', 0x200000 => 'ALIGN_2BYTES',
101
+ 0x300000 => 'ALIGN_4BYTES', 0x400000 => 'ALIGN_8BYTES',
102
+ 0x500000 => 'ALIGN_16BYTES', 0x600000 => 'ALIGN_32BYTES',
103
+ 0x700000 => 'ALIGN_64BYTES', 0x800000 => 'ALIGN_128BYTES',
104
+ 0x900000 => 'ALIGN_256BYTES', 0xA00000 => 'ALIGN_512BYTES',
105
+ 0xB00000 => 'ALIGN_1024BYTES', 0xC00000 => 'ALIGN_2048BYTES',
106
+ 0xD00000 => 'ALIGN_4096BYTES', 0xE00000 => 'ALIGN_8192BYTES',
107
+ 0x01000000 => 'LNK_NRELOC_OVFL', 0x02000000 => 'MEM_DISCARDABLE',
108
+ 0x04000000 => 'MEM_NOT_CACHED', 0x08000000 => 'MEM_NOT_PAGED',
109
+ 0x10000000 => 'MEM_SHARED', 0x20000000 => 'MEM_EXECUTE',
110
+ 0x40000000 => 'MEM_READ', 0x80000000 => 'MEM_WRITE'
111
+ }
112
+ # NRELOC_OVFL means there are more than 0xffff reloc
113
+ # the reloc count must be set to 0xffff, and the real reloc count
114
+ # is the VA of the first relocation
115
+
116
+ ORDINAL_REGEX = /^Ordinal_(\d+)$/
117
+
118
+ COMIMAGE_FLAGS = {
119
+ 1 => 'ILONLY', 2 => '32BITREQUIRED', 4 => 'IL_LIBRARY',
120
+ 8 => 'STRONGNAMESIGNED', 16 => 'NATIVE_ENTRYPOINT',
121
+ 0x10000 => 'TRACKDEBUGDATA'
122
+ }
123
+
124
+ class SerialStruct < Metasm::SerialStruct
125
+ new_int_field :xword
126
+ end
127
+
128
+ class Header < SerialStruct
129
+ half :machine, 'I386', MACHINE
130
+ half :num_sect
131
+ words :time, :ptr_sym, :num_sym
132
+ half :size_opthdr
133
+ half :characteristics
134
+ fld_bits :characteristics, CHARACTERISTIC_BITS
135
+ end
136
+
137
+ # present in linked files (exe/dll/kmod)
138
+ class OptionalHeader < SerialStruct
139
+ half :signature, 'PE', SIGNATURE
140
+ bytes :link_ver_maj, :link_ver_min
141
+ words :code_size, :data_size, :udata_size, :entrypoint, :base_of_code
142
+ # base_of_data does not exist in 64-bit
143
+ new_field(:base_of_data, lambda { |exe, hdr| exe.decode_word if exe.bitsize != 64 }, lambda { |exe, hdr, val| exe.encode_word(val) if exe.bitsize != 64 }, 0)
144
+ # NT-specific fields
145
+ xword :image_base
146
+ words :sect_align, :file_align
147
+ halfs :os_ver_maj, :os_ver_min, :img_ver_maj, :img_ver_min, :subsys_maj, :subsys_min
148
+ words :reserved, :image_size, :headers_size, :checksum
149
+ half :subsystem, 0, SUBSYSTEM
150
+ half :dll_characts
151
+ fld_bits :dll_characts, DLL_CHARACTERISTIC_BITS
152
+ xwords :stack_reserve, :stack_commit, :heap_reserve, :heap_commit
153
+ words :ldrflags, :numrva
154
+ end
155
+
156
+ # COFF relocatable object symbol (table offset found in the Header.ptr_sym)
157
+ class Symbol < SerialStruct
158
+ str :name, 8 # if the 1st 4 bytes are 0, the word at 4...8 is the name index in the string table
159
+ word :value
160
+ half :sec_nr
161
+ fld_enum :sec_nr, SYMBOL_SECTION
162
+ bitfield :half, 0 => :type_base, 4 => :type
163
+ fld_enum :type_base, SYMBOL_BTYPE
164
+ fld_enum :type, SYMBOL_TYPE
165
+ bytes :storage, :nr_aux
166
+ fld_enum :storage, SYMBOL_STORAGE
167
+
168
+ attr_accessor :aux
169
+ end
170
+
171
+ class Section < SerialStruct
172
+ str :name, 8
173
+ words :virtsize, :virtaddr, :rawsize, :rawaddr, :relocaddr, :linenoaddr
174
+ halfs :relocnr, :linenonr
175
+ word :characteristics
176
+ fld_bits :characteristics, SECTION_CHARACTERISTIC_BITS
177
+
178
+ attr_accessor :encoded, :relocs
179
+ end
180
+
181
+ # COFF relocatable object relocation (per section, see relocaddr/relocnr)
182
+ class RelocObj < SerialStruct
183
+ word :va
184
+ word :symidx
185
+ half :type
186
+ fld_enum(:type) { |coff, rel| RELOCATION_TYPE[coff.header.machine] || {} }
187
+ attr_accessor :sym
188
+ end
189
+
190
+ # lists the functions/addresses exported to the OS (pendant of ImportDirectory)
191
+ class ExportDirectory < SerialStruct
192
+ words :reserved, :timestamp
193
+ halfs :version_major, :version_minor
194
+ words :libname_p, :ordinal_base, :num_exports, :num_names, :func_p, :names_p, :ord_p
195
+ attr_accessor :libname, :exports
196
+
197
+ class Export
198
+ attr_accessor :forwarder_lib, :forwarder_ordinal, :forwarder_name, :target, :target_rva, :name_p, :name, :ordinal
199
+ end
200
+ end
201
+
202
+ # contains the name of dynamic libraries required by the program, and the function to import from them
203
+ class ImportDirectory < SerialStruct
204
+ words :ilt_p, :timestamp, :firstforwarder, :libname_p, :iat_p
205
+ fld_default :firstforwarder, 0xffff_ffff
206
+ attr_accessor :libname, :imports, :iat
207
+
208
+ class Import
209
+ attr_accessor :ordinal, :hint, :hintname_p, :name, :target, :thunk
210
+ end
211
+ end
212
+
213
+ # tree-like structure, holds all misc data the program might need (icons, cursors, version information)
214
+ # conventionnally structured in a 3-level depth structure:
215
+ # I resource type (icon/cursor/etc, see +TYPES+)
216
+ # II resource id (icon n1, icon 'toto', ...)
217
+ # III language-specific version (icon n1 en, icon n1 en-dvorak...)
218
+ class ResourceDirectory < SerialStruct
219
+ words :characteristics, :timestamp
220
+ halfs :major_version, :minor_version, :nr_names, :nr_id
221
+ attr_accessor :entries
222
+ attr_accessor :curoff_label # internal use, in encoder
223
+
224
+ class Entry
225
+ attr_accessor :name_p, :name, :name_w,
226
+ :id, :subdir_p, :subdir, :dataentry_p,
227
+ :data_p, :data, :codepage, :reserved
228
+ end
229
+ end
230
+
231
+ # array of relocations to apply to an executable file
232
+ # when it is loaded at an address that is not its preferred_base_address
233
+ class RelocationTable < SerialStruct
234
+ word :base_addr
235
+ attr_accessor :relocs
236
+
237
+ class Relocation < SerialStruct
238
+ bitfield :half, 0 => :offset, 12 => :type
239
+ fld_enum :type, BASE_RELOCATION_TYPE
240
+ end
241
+ end
242
+
243
+ class DebugDirectory < SerialStruct
244
+ words :characteristics, :timestamp
245
+ halfs :major_version, :minor_version
246
+ words :type, :size_of_data, :addr, :pointer
247
+ fld_enum :type, DEBUG_TYPE
248
+
249
+ attr_accessor :data
250
+
251
+ class NB10 < SerialStruct
252
+ word :offset
253
+ word :signature
254
+ word :age
255
+ strz :pdbfilename
256
+ end
257
+
258
+ class RSDS < SerialStruct
259
+ mem :guid, 16
260
+ word :age
261
+ strz :pdbfilename
262
+ end
263
+ end
264
+
265
+ class TLSDirectory < SerialStruct
266
+ xwords :start_va, :end_va, :index_addr, :callback_p
267
+ words :zerofill_sz, :characteristics
268
+
269
+ attr_accessor :callbacks
270
+ end
271
+
272
+ # the 'load configuration' directory (used for SafeSEH)
273
+ class LoadConfig < SerialStruct
274
+ words :signature, :timestamp
275
+ halfs :major_version, :minor_version
276
+ words :globalflags_clear, :globalflags_set, :critsec_timeout
277
+ # lockpfxtable is an array of VA of LOCK prefixes, to be nopped on singleproc machines (!)
278
+ xwords :decommitblock, :decommittotal, :lockpfxtable, :maxalloc, :maxvirtmem, :process_affinity_mask
279
+ word :process_heap_flags
280
+ halfs :service_pack_id, :reserved
281
+ xwords :editlist, :security_cookie, :sehtable_p, :sehcount
282
+
283
+ attr_accessor :safeseh
284
+ end
285
+
286
+ class DelayImportDirectory < SerialStruct
287
+ words :attributes, :libname_p, :handle_p, :iat_p, :int_p, :biat_p, :uiat_p, :timestamp
288
+
289
+ attr_accessor :libname
290
+ end
291
+
292
+ # structure defining entrypoints and stuff for .net binaries
293
+ class Cor20Header < SerialStruct
294
+ word :size
295
+ halfs :major_version, :minor_version # runtime version
296
+ words :metadata_rva, :metadata_sz
297
+ word :flags
298
+ fld_bits :flags, COMIMAGE_FLAGS
299
+ word :entrypoint # RVA to native or managed ep, depending on flags
300
+ words :resources_rva, :resources_sz
301
+ words :strongnamesig_rva, :strongnamesig_sz
302
+ words :codemgr_rva, :codemgr_sz
303
+ words :vtfixup_rva, :vtfixup_sz
304
+ words :eatjumps_rva, :eatjumps_sz
305
+ words :managednativehdr_rva, :managednativehdr_sz
306
+
307
+ attr_accessor :metadata, :resources, :strongnamesig, :codemgr, :vtfixup, :eatjumps, :managednativehdr
308
+ end
309
+
310
+ # for the icon, the one that appears in the explorer is
311
+ # (NT) the one with the lowest ID
312
+ # (98) the first to appear in the table
313
+ class ResourceDirectory
314
+ def to_hash(depth=0)
315
+ map = case depth
316
+ when 0; TYPE
317
+ when 1; {} # resource-id
318
+ when 2; {} # lang
319
+ else {}
320
+ end
321
+ @entries.inject({}) { |h, e|
322
+ k = e.id ? map.fetch(e.id, e.id) : e.name ? e.name : e.name_w
323
+ v = e.subdir ? e.subdir.to_hash(depth+1) : e.data
324
+ h.update k => v
325
+ }
326
+ end
327
+
328
+ def self.from_hash(h, depth=0)
329
+ map = case depth
330
+ when 0; TYPE
331
+ when 1; {} # resource-id
332
+ when 2; {} # lang
333
+ else {}
334
+ end
335
+ ret = new
336
+ ret.entries = h.map { |k, v|
337
+ e = Entry.new
338
+ k.kind_of?(Integer) ? (e.id = k) : map.index(k) ? (e.id = map.index(k)) : (e.name = k) # name_w ?
339
+ v.kind_of?(Hash) ? (e.subdir = from_hash(v, depth+1)) : (e.data = v)
340
+ e
341
+ }
342
+ ret
343
+ end
344
+
345
+ # returns a string with the to_hash key tree
346
+ def to_s
347
+ to_s_a(0).join("\n")
348
+ end
349
+
350
+ def to_s_a(depth)
351
+ @entries.map { |e|
352
+ ar = []
353
+ ar << if e.id
354
+ if depth == 0 and TYPE.has_key?(e.id); "#{e.id.to_s} (#{TYPE[e.id]})".ljust(18)
355
+ else e.id.to_s.ljust(5)
356
+ end
357
+ else (e.name || e.name_w).inspect
358
+ end
359
+ if e.subdir
360
+ sa = e.subdir.to_s_a(depth+1)
361
+ if sa.length == 1
362
+ ar.last << " | #{sa.first}"
363
+ else
364
+ ar << sa.map { |s| ' ' + s }
365
+ end
366
+ elsif e.data.length > 16
367
+ ar.last << " #{e.data[0, 8].inspect}... <#{e.data.length} bytes>"
368
+ else
369
+ ar.last << ' ' << e.data.inspect
370
+ end
371
+ ar
372
+ }.flatten
373
+ end
374
+
375
+ TYPE = {
376
+ 1 => 'CURSOR', 2 => 'BITMAP', 3 => 'ICON', 4 => 'MENU',
377
+ 5 => 'DIALOG', 6 => 'STRING', 7 => 'FONTDIR', 8 => 'FONT',
378
+ 9 => 'ACCELERATOR', 10 => 'RCADATA', 11 => 'MESSAGETABLE',
379
+ 12 => 'GROUP_CURSOR', 14 => 'GROUP_ICON', 16 => 'VERSION',
380
+ 17 => 'DLGINCLUDE', 19 => 'PLUGPLAY', 20 => 'VXD',
381
+ 21 => 'ANICURSOR', 22 => 'ANIICON', 23 => 'HTML',
382
+ 24 => 'MANIFEST'
383
+ }
384
+
385
+ ACCELERATOR_BITS = {
386
+ 1 => 'VIRTKEY', 2 => 'NOINVERT', 4 => 'SHIFT', 8 => 'CTRL',
387
+ 16 => 'ALT', 128 => 'LAST'
388
+ }
389
+
390
+ # cursor = raw data, cursor_group = header , pareil pour les icons
391
+ class Cursor
392
+ attr_accessor :xhotspot, :yhotspot, :data
393
+ end
394
+ end
395
+
396
+ attr_accessor :header, :optheader, :directory, :sections, :endianness, :symbols, :bitsize,
397
+ :export, :imports, :resource, :certificates, :relocations, :debug, :tls, :loadconfig, :delayimports, :com_header
398
+
399
+ # boolean, set to true to have #decode() ignore the base_relocs directory
400
+ attr_accessor :nodecode_relocs
401
+
402
+ def initialize(*a)
403
+ cpu = a.grep(CPU).first
404
+ @nodecode_relocs = true if a.include? :nodecode_relocs
405
+
406
+ @directory = {} # DIRECTORIES.key => [rva, size]
407
+ @sections = []
408
+ @endianness = cpu ? cpu.endianness : :little
409
+ @bitsize = cpu ? cpu.size : 32
410
+ @header = Header.new
411
+ @optheader = OptionalHeader.new
412
+ super(cpu)
413
+ end
414
+
415
+ def shortname; 'coff'; end
416
+ end
417
+
418
+ # the COFF archive file format
419
+ # maybe used in .lib files (they hold binary import information for libraries)
420
+ # used for unix .a static library files (with no 2nd linker and newline-separated longnames)
421
+ class COFFArchive < ExeFormat
422
+ class Member < SerialStruct
423
+ mem :name, 16
424
+ mem :date, 12
425
+ mem :uid, 6
426
+ mem :gid, 6
427
+ mem :mode, 8
428
+ mem :size, 10
429
+ mem :eoh, 2
430
+
431
+ attr_accessor :offset, :encoded
432
+ end
433
+
434
+ class ImportHeader < SerialStruct
435
+ halfs :sig1, :sig2, :version, :machine
436
+ words :timestamp, :size_of_data
437
+ half :hint
438
+ bitfield :half, 0 => :reserved, 11 => :name_type, 14 => :type
439
+ #fld_enum :type, IMPORT_TYPE
440
+ #fld_enum :name_type, NAME_TYPE
441
+ strz :symname
442
+ strz :libname
443
+ end
444
+
445
+ attr_accessor :members, :signature, :first_linker, :second_linker, :longnames
446
+
447
+ # return the 1st member whose name is name
448
+ def member(name)
449
+ @members.find { |m| m.name == name }
450
+ end
451
+ end
452
+ end
453
+
454
+ require 'metasm/exe_format/coff_encode'
455
+ require 'metasm/exe_format/coff_decode'