metasm 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. data/BUGS +11 -0
  2. data/CREDITS +17 -0
  3. data/README +270 -0
  4. data/TODO +114 -0
  5. data/doc/code_organisation.txt +146 -0
  6. data/doc/const_missing.txt +16 -0
  7. data/doc/core_classes.txt +75 -0
  8. data/doc/feature_list.txt +53 -0
  9. data/doc/index.txt +59 -0
  10. data/doc/install_notes.txt +170 -0
  11. data/doc/style.css +3 -0
  12. data/doc/use_cases.txt +18 -0
  13. data/lib/metasm.rb +80 -0
  14. data/lib/metasm/arm.rb +12 -0
  15. data/lib/metasm/arm/debug.rb +39 -0
  16. data/lib/metasm/arm/decode.rb +167 -0
  17. data/lib/metasm/arm/encode.rb +77 -0
  18. data/lib/metasm/arm/main.rb +75 -0
  19. data/lib/metasm/arm/opcodes.rb +177 -0
  20. data/lib/metasm/arm/parse.rb +130 -0
  21. data/lib/metasm/arm/render.rb +55 -0
  22. data/lib/metasm/compile_c.rb +1457 -0
  23. data/lib/metasm/dalvik.rb +8 -0
  24. data/lib/metasm/dalvik/decode.rb +196 -0
  25. data/lib/metasm/dalvik/main.rb +60 -0
  26. data/lib/metasm/dalvik/opcodes.rb +366 -0
  27. data/lib/metasm/decode.rb +213 -0
  28. data/lib/metasm/decompile.rb +2659 -0
  29. data/lib/metasm/disassemble.rb +2068 -0
  30. data/lib/metasm/disassemble_api.rb +1280 -0
  31. data/lib/metasm/dynldr.rb +1329 -0
  32. data/lib/metasm/encode.rb +333 -0
  33. data/lib/metasm/exe_format/a_out.rb +194 -0
  34. data/lib/metasm/exe_format/autoexe.rb +82 -0
  35. data/lib/metasm/exe_format/bflt.rb +189 -0
  36. data/lib/metasm/exe_format/coff.rb +455 -0
  37. data/lib/metasm/exe_format/coff_decode.rb +901 -0
  38. data/lib/metasm/exe_format/coff_encode.rb +1078 -0
  39. data/lib/metasm/exe_format/dex.rb +457 -0
  40. data/lib/metasm/exe_format/dol.rb +145 -0
  41. data/lib/metasm/exe_format/elf.rb +923 -0
  42. data/lib/metasm/exe_format/elf_decode.rb +979 -0
  43. data/lib/metasm/exe_format/elf_encode.rb +1375 -0
  44. data/lib/metasm/exe_format/macho.rb +827 -0
  45. data/lib/metasm/exe_format/main.rb +228 -0
  46. data/lib/metasm/exe_format/mz.rb +164 -0
  47. data/lib/metasm/exe_format/nds.rb +172 -0
  48. data/lib/metasm/exe_format/pe.rb +437 -0
  49. data/lib/metasm/exe_format/serialstruct.rb +246 -0
  50. data/lib/metasm/exe_format/shellcode.rb +114 -0
  51. data/lib/metasm/exe_format/xcoff.rb +167 -0
  52. data/lib/metasm/gui.rb +23 -0
  53. data/lib/metasm/gui/cstruct.rb +373 -0
  54. data/lib/metasm/gui/dasm_coverage.rb +199 -0
  55. data/lib/metasm/gui/dasm_decomp.rb +369 -0
  56. data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
  57. data/lib/metasm/gui/dasm_graph.rb +1354 -0
  58. data/lib/metasm/gui/dasm_hex.rb +543 -0
  59. data/lib/metasm/gui/dasm_listing.rb +599 -0
  60. data/lib/metasm/gui/dasm_main.rb +906 -0
  61. data/lib/metasm/gui/dasm_opcodes.rb +291 -0
  62. data/lib/metasm/gui/debug.rb +1228 -0
  63. data/lib/metasm/gui/gtk.rb +884 -0
  64. data/lib/metasm/gui/qt.rb +495 -0
  65. data/lib/metasm/gui/win32.rb +3004 -0
  66. data/lib/metasm/gui/x11.rb +621 -0
  67. data/lib/metasm/ia32.rb +14 -0
  68. data/lib/metasm/ia32/compile_c.rb +1523 -0
  69. data/lib/metasm/ia32/debug.rb +193 -0
  70. data/lib/metasm/ia32/decode.rb +1167 -0
  71. data/lib/metasm/ia32/decompile.rb +564 -0
  72. data/lib/metasm/ia32/encode.rb +314 -0
  73. data/lib/metasm/ia32/main.rb +233 -0
  74. data/lib/metasm/ia32/opcodes.rb +872 -0
  75. data/lib/metasm/ia32/parse.rb +327 -0
  76. data/lib/metasm/ia32/render.rb +91 -0
  77. data/lib/metasm/main.rb +1193 -0
  78. data/lib/metasm/mips.rb +11 -0
  79. data/lib/metasm/mips/compile_c.rb +7 -0
  80. data/lib/metasm/mips/decode.rb +253 -0
  81. data/lib/metasm/mips/encode.rb +51 -0
  82. data/lib/metasm/mips/main.rb +72 -0
  83. data/lib/metasm/mips/opcodes.rb +443 -0
  84. data/lib/metasm/mips/parse.rb +51 -0
  85. data/lib/metasm/mips/render.rb +43 -0
  86. data/lib/metasm/os/gnu_exports.rb +270 -0
  87. data/lib/metasm/os/linux.rb +1112 -0
  88. data/lib/metasm/os/main.rb +1686 -0
  89. data/lib/metasm/os/remote.rb +527 -0
  90. data/lib/metasm/os/windows.rb +2027 -0
  91. data/lib/metasm/os/windows_exports.rb +745 -0
  92. data/lib/metasm/parse.rb +876 -0
  93. data/lib/metasm/parse_c.rb +3938 -0
  94. data/lib/metasm/pic16c/decode.rb +42 -0
  95. data/lib/metasm/pic16c/main.rb +17 -0
  96. data/lib/metasm/pic16c/opcodes.rb +68 -0
  97. data/lib/metasm/ppc.rb +11 -0
  98. data/lib/metasm/ppc/decode.rb +264 -0
  99. data/lib/metasm/ppc/decompile.rb +251 -0
  100. data/lib/metasm/ppc/encode.rb +51 -0
  101. data/lib/metasm/ppc/main.rb +129 -0
  102. data/lib/metasm/ppc/opcodes.rb +410 -0
  103. data/lib/metasm/ppc/parse.rb +52 -0
  104. data/lib/metasm/preprocessor.rb +1277 -0
  105. data/lib/metasm/render.rb +130 -0
  106. data/lib/metasm/sh4.rb +8 -0
  107. data/lib/metasm/sh4/decode.rb +336 -0
  108. data/lib/metasm/sh4/main.rb +292 -0
  109. data/lib/metasm/sh4/opcodes.rb +381 -0
  110. data/lib/metasm/x86_64.rb +12 -0
  111. data/lib/metasm/x86_64/compile_c.rb +1025 -0
  112. data/lib/metasm/x86_64/debug.rb +59 -0
  113. data/lib/metasm/x86_64/decode.rb +268 -0
  114. data/lib/metasm/x86_64/encode.rb +264 -0
  115. data/lib/metasm/x86_64/main.rb +135 -0
  116. data/lib/metasm/x86_64/opcodes.rb +118 -0
  117. data/lib/metasm/x86_64/parse.rb +68 -0
  118. data/misc/bottleneck.rb +61 -0
  119. data/misc/cheader-findpppath.rb +58 -0
  120. data/misc/hexdiff.rb +74 -0
  121. data/misc/hexdump.rb +55 -0
  122. data/misc/metasm-all.rb +13 -0
  123. data/misc/objdiff.rb +47 -0
  124. data/misc/objscan.rb +40 -0
  125. data/misc/pdfparse.rb +661 -0
  126. data/misc/ppc_pdf2oplist.rb +192 -0
  127. data/misc/tcp_proxy_hex.rb +84 -0
  128. data/misc/txt2html.rb +440 -0
  129. data/samples/a.out.rb +31 -0
  130. data/samples/asmsyntax.rb +77 -0
  131. data/samples/bindiff.rb +555 -0
  132. data/samples/compilation-steps.rb +49 -0
  133. data/samples/cparser_makestackoffset.rb +55 -0
  134. data/samples/dasm-backtrack.rb +38 -0
  135. data/samples/dasmnavig.rb +318 -0
  136. data/samples/dbg-apihook.rb +228 -0
  137. data/samples/dbghelp.rb +143 -0
  138. data/samples/disassemble-gui.rb +102 -0
  139. data/samples/disassemble.rb +133 -0
  140. data/samples/dump_upx.rb +95 -0
  141. data/samples/dynamic_ruby.rb +1929 -0
  142. data/samples/elf_list_needed.rb +46 -0
  143. data/samples/elf_listexports.rb +33 -0
  144. data/samples/elfencode.rb +25 -0
  145. data/samples/exeencode.rb +128 -0
  146. data/samples/factorize-headers-elfimports.rb +77 -0
  147. data/samples/factorize-headers-peimports.rb +109 -0
  148. data/samples/factorize-headers.rb +43 -0
  149. data/samples/gdbclient.rb +583 -0
  150. data/samples/generate_libsigs.rb +102 -0
  151. data/samples/hotfix_gtk_dbg.rb +59 -0
  152. data/samples/install_win_env.rb +78 -0
  153. data/samples/lindebug.rb +924 -0
  154. data/samples/linux_injectsyscall.rb +95 -0
  155. data/samples/machoencode.rb +31 -0
  156. data/samples/metasm-shell.rb +91 -0
  157. data/samples/pe-hook.rb +69 -0
  158. data/samples/pe-ia32-cpuid.rb +203 -0
  159. data/samples/pe-mips.rb +35 -0
  160. data/samples/pe-shutdown.rb +78 -0
  161. data/samples/pe-testrelocs.rb +51 -0
  162. data/samples/pe-testrsrc.rb +24 -0
  163. data/samples/pe_listexports.rb +31 -0
  164. data/samples/peencode.rb +19 -0
  165. data/samples/peldr.rb +494 -0
  166. data/samples/preprocess-flatten.rb +19 -0
  167. data/samples/r0trace.rb +308 -0
  168. data/samples/rubstop.rb +399 -0
  169. data/samples/scan_pt_gnu_stack.rb +54 -0
  170. data/samples/scanpeexports.rb +62 -0
  171. data/samples/shellcode-c.rb +40 -0
  172. data/samples/shellcode-dynlink.rb +146 -0
  173. data/samples/source.asm +34 -0
  174. data/samples/struct_offset.rb +47 -0
  175. data/samples/testpe.rb +32 -0
  176. data/samples/testraw.rb +45 -0
  177. data/samples/win32genloader.rb +132 -0
  178. data/samples/win32hooker-advanced.rb +169 -0
  179. data/samples/win32hooker.rb +96 -0
  180. data/samples/win32livedasm.rb +33 -0
  181. data/samples/win32remotescan.rb +133 -0
  182. data/samples/wintrace.rb +92 -0
  183. data/tests/all.rb +8 -0
  184. data/tests/dasm.rb +39 -0
  185. data/tests/dynldr.rb +35 -0
  186. data/tests/encodeddata.rb +132 -0
  187. data/tests/ia32.rb +82 -0
  188. data/tests/mips.rb +116 -0
  189. data/tests/parse_c.rb +239 -0
  190. data/tests/preprocessor.rb +269 -0
  191. data/tests/x86_64.rb +62 -0
  192. metadata +255 -0
@@ -0,0 +1,82 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+ require 'metasm/exe_format/main'
7
+
8
+ module Metasm
9
+ # special class that decodes a PE, ELF, MachO or UnivBinary file from its signature
10
+ # XXX UnivBinary is not a real ExeFormat, just a container..
11
+ class AutoExe < ExeFormat
12
+ class UnknownSignature < InvalidExeFormat ; end
13
+
14
+ # actually calls autoexe_load for the detected filetype from #execlass_from_signature
15
+ def self.load(str, *a, &b)
16
+ s = str
17
+ s = str.data if s.kind_of? EncodedData
18
+ execlass_from_signature(s).autoexe_load(str, *a, &b)
19
+ end
20
+
21
+ # match the actual exe class from the raw file inspection using the registered signature list
22
+ # calls #unknown_signature if nothing matches
23
+ def self.execlass_from_signature(raw)
24
+ m = @signatures.find { |sig, exe|
25
+ case sig
26
+ when String; raw[0, sig.length] == sig
27
+ when Proc; sig[raw]
28
+ end
29
+ }
30
+ e = m ? m[1] : unknown_signature(raw)
31
+ case e
32
+ when String; Metasm.const_get(e)
33
+ when Proc; e.call
34
+ else e
35
+ end
36
+ end
37
+
38
+ # register a new binary file signature
39
+ def self.register_signature(sig, exe=nil, &b)
40
+ (@signatures ||= []) << [sig, exe || b]
41
+ end
42
+
43
+ def self.init_signatures(sig=[])
44
+ @signatures = sig
45
+ end
46
+
47
+ # this function is called when no signature matches
48
+ def self.unknown_signature(raw)
49
+ raise UnknownSignature, "unrecognized executable file format #{raw[0, 4].unpack('H*').first.inspect}"
50
+ end
51
+
52
+ # raw signature copies (avoid triggering exefmt autorequire)
53
+ init_signatures
54
+ register_signature("\x7fELF") { ELF }
55
+ register_signature(lambda { |raw| raw[0, 2] == "MZ" and off = raw[0x3c, 4].to_s.unpack('V')[0] and off < raw.length and raw[off, 4] == "PE\0\0" }) { PE }
56
+ %w[feedface cefaedfe feedfacf cffaedfe].each { |sig| register_signature([sig].pack('H*')) { MachO } }
57
+ register_signature("\xca\xfe\xba\xbe") { UniversalBinary }
58
+ register_signature("dex\n") { DEX }
59
+ register_signature("dey\n") { DEY }
60
+ register_signature("\xfa\x70\x0e\x1f") { FatELF }
61
+ register_signature('Metasm.dasm') { Disassembler }
62
+
63
+ # replacement for AutoExe where #load defaults to a Shellcode of the specified CPU
64
+ def self.orshellcode(cpu=nil, &b)
65
+ # here we create an anonymous subclass of AutoExe whose #unknown_sig is patched to return a Shellcode instead of raise()ing
66
+ c = ::Class.new(self)
67
+ # yeeehaa
68
+ class << c ; self ; end.send(:define_method, :unknown_signature) { |raw|
69
+ Shellcode.withcpu(cpu || b[raw])
70
+ }
71
+ c.init_signatures @signatures
72
+ c
73
+ end
74
+ end
75
+
76
+ # special class that decodes a LoadedPE or LoadedELF from its signature (used to read memory-mapped binaries)
77
+ class LoadedAutoExe < AutoExe
78
+ init_signatures
79
+ register_signature("\x7fELF") { LoadedELF }
80
+ register_signature(lambda { |raw| raw[0, 2] == "MZ" and off = raw[0x3c, 4].to_s.unpack('V')[0] and off < raw.length and raw[off, 4] == "PE\0\0" }) { LoadedPE }
81
+ end
82
+ end
@@ -0,0 +1,189 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+ require 'metasm/exe_format/main'
7
+ require 'metasm/encode'
8
+ require 'metasm/decode'
9
+
10
+ module Metasm
11
+ # BFLT is the binary flat format used by the uClinux
12
+ class Bflt < ExeFormat
13
+ MAGIC = 'bFLT'
14
+ FLAGS = { 1 => 'RAM', 2 => 'GOTPIC', 4 => 'GZIP' }
15
+
16
+ attr_accessor :header, :text, :data, :reloc, :got
17
+
18
+ class Header < SerialStruct
19
+ mem :magic, 4
20
+ words :rev, :entry, :data_start, :data_end, :bss_end, :stack_size,
21
+ :reloc_start, :reloc_count, :flags
22
+ mem :pad, 6*4
23
+ fld_bits(:flags, FLAGS)
24
+
25
+ def decode(exe)
26
+ super(exe)
27
+
28
+ case @magic
29
+ when MAGIC
30
+ else raise InvalidExeFormat, "Bad bFLT signature #@magic"
31
+ end
32
+ end
33
+
34
+ def set_default_values(exe)
35
+ @magic ||= MAGIC
36
+ @rev ||= 4
37
+ @entry ||= 0x40
38
+ @data_start ||= @entry + exe.text.length if exe.text
39
+ @data_end ||= @data_start + exe.data.data.length if exe.data
40
+ @bss_end ||= @data_start + exe.data.length if exe.data
41
+ @stack_size ||= 0x1000
42
+ @reloc_start ||= @data_end
43
+ @reloc_count ||= exe.reloc.length
44
+ @flags ||= []
45
+
46
+ super(exe)
47
+ end
48
+ end
49
+
50
+ def decode_word(edata = @encoded) edata.decode_imm(:u32, @endianness) end
51
+ def encode_word(w) Expression[w].encode(:u32, @endianness) end
52
+
53
+ def initialize(cpu = nil)
54
+ @endianness = cpu ? cpu.endianness : :little
55
+ @header = Header.new
56
+ @text = EncodedData.new
57
+ @data = EncodedData.new
58
+ super(cpu)
59
+ end
60
+
61
+ def decode_header
62
+ @encoded.ptr = 0
63
+ @header.decode(self)
64
+ end
65
+
66
+ def decode
67
+ decode_header
68
+
69
+ @encoded.ptr = @header.entry
70
+ @text = EncodedData.new << @encoded.read(@header.data_start - @header.entry)
71
+ @data = EncodedData.new << @encoded.read(@header.data_end - @header.data_start)
72
+ @data.virtsize += (@header.bss_end - @header.data_end)
73
+
74
+ if @header.flags.include? 'GZIP'
75
+ # TODO gzip
76
+ raise 'bFLT decoder: gzip format not supported'
77
+ end
78
+
79
+ @reloc = []
80
+ @encoded.ptr = @header.reloc_start
81
+ @header.reloc_count.times { @reloc << decode_word }
82
+ if @header.version == 2
83
+ @reloc.map! { |r| r & 0x3fff_ffff }
84
+ end
85
+
86
+ decode_interpret_relocs
87
+ end
88
+
89
+ def decode_interpret_relocs
90
+ @reloc.each { |r|
91
+ # where the reloc is
92
+ if r >= @header.entry and r < @header.data_start
93
+ section = @text
94
+ base = @header.entry
95
+ elsif r >= @header.data_start and r < @header.data_end
96
+ section = @data
97
+ base = @header.data_start
98
+ else
99
+ puts "out of bounds reloc at #{Expression[r]}" if $VERBOSE
100
+ next
101
+ end
102
+
103
+ # what it points to
104
+ section.ptr = r-base
105
+ target = decode_word(section)
106
+ if target >= @header.entry and target < @header.data_start
107
+ target = label_at(@text, target - @header.entry, "xref_#{Expression[target]}")
108
+ elsif target >= @header.data_start and target < @header.bss_end
109
+ target = label_at(@data, target - @header.data_start, "xref_#{Expression[target]}")
110
+ else
111
+ puts "out of bounds reloc target at #{Expression[r]}" if $VERBOSE
112
+ next
113
+ end
114
+
115
+ @text.reloc[r-base] = Relocation.new(Expression[target], :u32, @endianness)
116
+ }
117
+ end
118
+
119
+ def encode
120
+ create_relocation_table
121
+
122
+ # TODO got, gzip
123
+ if @header.flags.include? 'GZIP'
124
+ puts "W: bFLT: clearing gzip flag" if $VERBOSE
125
+ @header.flags.delete 'GZIP'
126
+ end
127
+
128
+ @encoded = EncodedData.new
129
+ @encoded << @header.encode(self)
130
+
131
+ binding = @text.binding(@header.entry).merge(@data.binding(@header.data_start))
132
+ @encoded << @text << @data.data
133
+ @encoded.fixup! binding
134
+ @encoded.reloc.clear
135
+
136
+ @relocs.each { |r| @encoded << encode_word(r) }
137
+
138
+ @encoded.data
139
+ end
140
+
141
+ def create_relocation_table
142
+ @reloc = []
143
+ mapaddr = new_label('mapaddr')
144
+ binding = @text.binding(mapaddr).merge(@data.binding(mapaddr))
145
+ [@text, @data].each { |section|
146
+ base = @header.entry || 0x40
147
+ base = @header.data_start || base+@text.length if section == @data
148
+ section.reloc.each { |o, r|
149
+ if r.endianness == @endianness and [:u32, :a32, :i32].include? r.type and
150
+ Expression[r.target.bind(binding), :-, mapaddr].reduce.kind_of? ::Integer
151
+ @reloc << (base+o)
152
+ else
153
+ puts "bFLT: ignoring unsupported reloc #{r.inspect} at #{Expression[o]}" if $VERBOSE
154
+ end
155
+ }
156
+ }
157
+ end
158
+
159
+ def parse_init
160
+ @textsrc ||= []
161
+ @datasrc ||= []
162
+ @cursource ||= @textsrc
163
+ super()
164
+ end
165
+
166
+ def parse_parser_instruction(instr)
167
+ case instr.raw.downcase
168
+ when '.text'; @cursource = @textsrc
169
+ when '.data'; @cursource = @datasrc
170
+ # entrypoint is the 1st byte of .text
171
+ else super(instr)
172
+ end
173
+ end
174
+
175
+ def assemble(*a)
176
+ parse(*a) if not a.empty?
177
+ @text << assemble_sequence(@textsrc, @cpu)
178
+ @textsrc.clear
179
+ @data << assemble_sequence(@datasrc, @cpu)
180
+ @datasrc.clear
181
+ self
182
+ end
183
+
184
+ def each_section
185
+ yield @text, @header.entry
186
+ yield @data, @header.data_start
187
+ end
188
+ end
189
+ end
@@ -0,0 +1,455 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/exe_format/main'
8
+
9
+ module Metasm
10
+ # the COFF object file format
11
+ # mostly used on windows (PE/COFF)
12
+ class COFF < ExeFormat
13
+ CHARACTERISTIC_BITS = {
14
+ 0x0001 => 'RELOCS_STRIPPED', 0x0002 => 'EXECUTABLE_IMAGE',
15
+ 0x0004 => 'LINE_NUMS_STRIPPED', 0x0008 => 'LOCAL_SYMS_STRIPPED',
16
+ 0x0010 => 'AGGRESSIVE_WS_TRIM', 0x0020 => 'LARGE_ADDRESS_AWARE',
17
+ 0x0040 => 'x16BIT_MACHINE', 0x0080 => 'BYTES_REVERSED_LO',
18
+ 0x0100 => 'x32BIT_MACHINE', 0x0200 => 'DEBUG_STRIPPED',
19
+ 0x0400 => 'REMOVABLE_RUN_FROM_SWAP', 0x0800 => 'NET_RUN_FROM_SWAP',
20
+ 0x1000 => 'SYSTEM', 0x2000 => 'DLL',
21
+ 0x4000 => 'UP_SYSTEM_ONLY', 0x8000 => 'BYTES_REVERSED_HI'
22
+ }
23
+
24
+ MACHINE = {
25
+ 0x0 => 'UNKNOWN', 0x184 => 'ALPHA', 0x1c0 => 'ARM',
26
+ 0x1d3 => 'AM33', 0x8664=> 'AMD64', 0xebc => 'EBC',
27
+ 0x9041=> 'M32R', 0x1f1 => 'POWERPCFP',
28
+ 0x284 => 'ALPHA64', 0x14c => 'I386', 0x200 => 'IA64',
29
+ 0x268 => 'M68K', 0x266 => 'MIPS16', 0x366 => 'MIPSFPU',
30
+ 0x466 => 'MIPSFPU16', 0x1f0 => 'POWERPC', 0x162 => 'R3000',
31
+ 0x166 => 'R4000', 0x168 => 'R10000', 0x1a2 => 'SH3',
32
+ 0x1a3 => 'SH3DSP', 0x1a6 => 'SH4', 0x1a8 => 'SH5',
33
+ 0x1c2 => 'THUMB', 0x169 => 'WCEMIPSV2'
34
+ }
35
+
36
+ # PE+ is for 64bits address spaces
37
+ SIGNATURE = { 0x10b => 'PE', 0x20b => 'PE+', 0x107 => 'ROM' }
38
+
39
+ SUBSYSTEM = {
40
+ 0 => 'UNKNOWN', 1 => 'NATIVE', 2 => 'WINDOWS_GUI',
41
+ 3 => 'WINDOWS_CUI', 5 => 'OS/2_CUI', 7 => 'POSIX_CUI',
42
+ 8 => 'WIN9X_DRIVER', 9 => 'WINDOWS_CE_GUI',
43
+ 10 => 'EFI_APPLICATION',
44
+ 11 => 'EFI_BOOT_SERVICE_DRIVER', 12 => 'EFI_RUNTIME_DRIVER',
45
+ 13 => 'EFI_ROM', 14 => 'XBOX'
46
+ }
47
+
48
+ DLL_CHARACTERISTIC_BITS = {
49
+ 0x40 => 'DYNAMIC_BASE', 0x80 => 'FORCE_INTEGRITY', 0x100 => 'NX_COMPAT',
50
+ 0x200 => 'NO_ISOLATION', 0x400 => 'NO_SEH', 0x800 => 'NO_BIND',
51
+ 0x2000 => 'WDM_DRIVER', 0x8000 => 'TERMINAL_SERVER_AWARE'
52
+ }
53
+
54
+ BASE_RELOCATION_TYPE = { 0 => 'ABSOLUTE', 1 => 'HIGH', 2 => 'LOW', 3 => 'HIGHLOW',
55
+ 4 => 'HIGHADJ', 5 => 'MIPS_JMPADDR', 9 => 'MIPS_JMPADDR16', 10 => 'DIR64'
56
+ }
57
+
58
+ RELOCATION_TYPE = Hash.new({}).merge(
59
+ 'AMD64' => { 0 => 'ABSOLUTE', 1 => 'ADDR64', 2 => 'ADDR32', 3 => 'ADDR32NB',
60
+ 4 => 'REL32', 5 => 'REL32_1', 6 => 'REL32_2', 7 => 'REL32_3',
61
+ 8 => 'REL32_4', 9 => 'REL32_5', 10 => 'SECTION', 11 => 'SECREL',
62
+ 12 => 'SECREL7', 13 => 'TOKEN', 14 => 'SREL32', 15 => 'PAIR',
63
+ 16 => 'SSPAN32' },
64
+ 'ARM' => { 0 => 'ABSOLUTE', 1 => 'ADDR32', 2 => 'ADDR32NB', 3 => 'BRANCH24',
65
+ 4 => 'BRANCH11', 14 => 'SECTION', 15 => 'SECREL' },
66
+ 'I386' => { 0 => 'ABSOLUTE', 1 => 'DIR16', 2 => 'REL16', 6 => 'DIR32',
67
+ 7 => 'DIR32NB', 9 => 'SEG12', 10 => 'SECTION', 11 => 'SECREL',
68
+ 12 => 'TOKEN', 13 => 'SECREL7', 20 => 'REL32' }
69
+ )
70
+
71
+ # lsb of symbol type, unused
72
+ SYMBOL_BTYPE = { 0 => 'NULL', 1 => 'VOID', 2 => 'CHAR', 3 => 'SHORT',
73
+ 4 => 'INT', 5 => 'LONG', 6 => 'FLOAT', 7 => 'DOUBLE', 8 => 'STRUCT',
74
+ 9 => 'UNION', 10 => 'ENUM', 11 => 'MOE', 12 => 'BYTE', 13 => 'WORD',
75
+ 14 => 'UINT', 15 => 'DWORD'}
76
+ SYMBOL_TYPE = { 0 => 'NULL', 1 => 'POINTER', 2 => 'FUNCTION', 3 => 'ARRAY' }
77
+ SYMBOL_SECTION = { 0 => 'UNDEF', 0xffff => 'ABS', 0xfffe => 'DEBUG' }
78
+ SYMBOL_STORAGE = { 0xff => 'EOF', 0 => 'NULL', 1 => 'AUTO', 2 => 'EXTERNAL',
79
+ 3 => 'STATIC', 4 => 'REGISTER', 5 => 'EXT_DEF', 6 => 'LABEL',
80
+ 7 => 'UNDEF_LABEL', 8 => 'STRUCT_MEMBER', 9 => 'ARGUMENT', 10 => 'STRUCT_TAG',
81
+ 11 => 'UNION_MEMBER', 12 => 'UNION_TAG', 13 => 'TYPEDEF', 14 => 'UNDEF_STATIC',
82
+ 15 => 'ENUM_TAG', 16 => 'ENUM_MEMBER', 17 => 'REG_PARAM', 18 => 'BIT_FIELD',
83
+ 100 => 'BLOCK', 101 => 'FUNCTION', 102 => 'END_STRUCT',
84
+ 103 => 'FILE', 104 => 'SECTION', 105 => 'WEAK_EXT',
85
+ }
86
+
87
+ DEBUG_TYPE = { 0 => 'UNKNOWN', 1 => 'COFF', 2 => 'CODEVIEW', 3 => 'FPO', 4 => 'MISC',
88
+ 5 => 'EXCEPTION', 6 => 'FIXUP', 7 => 'OMAP_TO_SRC', 8 => 'OMAP_FROM_SRC',
89
+ 9 => 'BORLAND', 10 => 'RESERVED10', 11 => 'CLSID' }
90
+
91
+ DIRECTORIES = %w[export_table import_table resource_table exception_table certificate_table
92
+ base_relocation_table debug architecture global_ptr tls_table load_config
93
+ bound_import iat delay_import com_runtime reserved]
94
+
95
+ SECTION_CHARACTERISTIC_BITS = {
96
+ 0x20 => 'CONTAINS_CODE', 0x40 => 'CONTAINS_DATA', 0x80 => 'CONTAINS_UDATA',
97
+ 0x100 => 'LNK_OTHER', 0x200 => 'LNK_INFO', 0x800 => 'LNK_REMOVE',
98
+ 0x1000 => 'LNK_COMDAT', 0x8000 => 'GPREL',
99
+ 0x20000 => 'MEM_PURGEABLE|16BIT', 0x40000 => 'MEM_LOCKED', 0x80000 => 'MEM_PRELOAD',
100
+ 0x100000 => 'ALIGN_1BYTES', 0x200000 => 'ALIGN_2BYTES',
101
+ 0x300000 => 'ALIGN_4BYTES', 0x400000 => 'ALIGN_8BYTES',
102
+ 0x500000 => 'ALIGN_16BYTES', 0x600000 => 'ALIGN_32BYTES',
103
+ 0x700000 => 'ALIGN_64BYTES', 0x800000 => 'ALIGN_128BYTES',
104
+ 0x900000 => 'ALIGN_256BYTES', 0xA00000 => 'ALIGN_512BYTES',
105
+ 0xB00000 => 'ALIGN_1024BYTES', 0xC00000 => 'ALIGN_2048BYTES',
106
+ 0xD00000 => 'ALIGN_4096BYTES', 0xE00000 => 'ALIGN_8192BYTES',
107
+ 0x01000000 => 'LNK_NRELOC_OVFL', 0x02000000 => 'MEM_DISCARDABLE',
108
+ 0x04000000 => 'MEM_NOT_CACHED', 0x08000000 => 'MEM_NOT_PAGED',
109
+ 0x10000000 => 'MEM_SHARED', 0x20000000 => 'MEM_EXECUTE',
110
+ 0x40000000 => 'MEM_READ', 0x80000000 => 'MEM_WRITE'
111
+ }
112
+ # NRELOC_OVFL means there are more than 0xffff reloc
113
+ # the reloc count must be set to 0xffff, and the real reloc count
114
+ # is the VA of the first relocation
115
+
116
+ ORDINAL_REGEX = /^Ordinal_(\d+)$/
117
+
118
+ COMIMAGE_FLAGS = {
119
+ 1 => 'ILONLY', 2 => '32BITREQUIRED', 4 => 'IL_LIBRARY',
120
+ 8 => 'STRONGNAMESIGNED', 16 => 'NATIVE_ENTRYPOINT',
121
+ 0x10000 => 'TRACKDEBUGDATA'
122
+ }
123
+
124
+ class SerialStruct < Metasm::SerialStruct
125
+ new_int_field :xword
126
+ end
127
+
128
+ class Header < SerialStruct
129
+ half :machine, 'I386', MACHINE
130
+ half :num_sect
131
+ words :time, :ptr_sym, :num_sym
132
+ half :size_opthdr
133
+ half :characteristics
134
+ fld_bits :characteristics, CHARACTERISTIC_BITS
135
+ end
136
+
137
+ # present in linked files (exe/dll/kmod)
138
+ class OptionalHeader < SerialStruct
139
+ half :signature, 'PE', SIGNATURE
140
+ bytes :link_ver_maj, :link_ver_min
141
+ words :code_size, :data_size, :udata_size, :entrypoint, :base_of_code
142
+ # base_of_data does not exist in 64-bit
143
+ new_field(:base_of_data, lambda { |exe, hdr| exe.decode_word if exe.bitsize != 64 }, lambda { |exe, hdr, val| exe.encode_word(val) if exe.bitsize != 64 }, 0)
144
+ # NT-specific fields
145
+ xword :image_base
146
+ words :sect_align, :file_align
147
+ halfs :os_ver_maj, :os_ver_min, :img_ver_maj, :img_ver_min, :subsys_maj, :subsys_min
148
+ words :reserved, :image_size, :headers_size, :checksum
149
+ half :subsystem, 0, SUBSYSTEM
150
+ half :dll_characts
151
+ fld_bits :dll_characts, DLL_CHARACTERISTIC_BITS
152
+ xwords :stack_reserve, :stack_commit, :heap_reserve, :heap_commit
153
+ words :ldrflags, :numrva
154
+ end
155
+
156
+ # COFF relocatable object symbol (table offset found in the Header.ptr_sym)
157
+ class Symbol < SerialStruct
158
+ str :name, 8 # if the 1st 4 bytes are 0, the word at 4...8 is the name index in the string table
159
+ word :value
160
+ half :sec_nr
161
+ fld_enum :sec_nr, SYMBOL_SECTION
162
+ bitfield :half, 0 => :type_base, 4 => :type
163
+ fld_enum :type_base, SYMBOL_BTYPE
164
+ fld_enum :type, SYMBOL_TYPE
165
+ bytes :storage, :nr_aux
166
+ fld_enum :storage, SYMBOL_STORAGE
167
+
168
+ attr_accessor :aux
169
+ end
170
+
171
+ class Section < SerialStruct
172
+ str :name, 8
173
+ words :virtsize, :virtaddr, :rawsize, :rawaddr, :relocaddr, :linenoaddr
174
+ halfs :relocnr, :linenonr
175
+ word :characteristics
176
+ fld_bits :characteristics, SECTION_CHARACTERISTIC_BITS
177
+
178
+ attr_accessor :encoded, :relocs
179
+ end
180
+
181
+ # COFF relocatable object relocation (per section, see relocaddr/relocnr)
182
+ class RelocObj < SerialStruct
183
+ word :va
184
+ word :symidx
185
+ half :type
186
+ fld_enum(:type) { |coff, rel| RELOCATION_TYPE[coff.header.machine] || {} }
187
+ attr_accessor :sym
188
+ end
189
+
190
+ # lists the functions/addresses exported to the OS (pendant of ImportDirectory)
191
+ class ExportDirectory < SerialStruct
192
+ words :reserved, :timestamp
193
+ halfs :version_major, :version_minor
194
+ words :libname_p, :ordinal_base, :num_exports, :num_names, :func_p, :names_p, :ord_p
195
+ attr_accessor :libname, :exports
196
+
197
+ class Export
198
+ attr_accessor :forwarder_lib, :forwarder_ordinal, :forwarder_name, :target, :target_rva, :name_p, :name, :ordinal
199
+ end
200
+ end
201
+
202
+ # contains the name of dynamic libraries required by the program, and the function to import from them
203
+ class ImportDirectory < SerialStruct
204
+ words :ilt_p, :timestamp, :firstforwarder, :libname_p, :iat_p
205
+ fld_default :firstforwarder, 0xffff_ffff
206
+ attr_accessor :libname, :imports, :iat
207
+
208
+ class Import
209
+ attr_accessor :ordinal, :hint, :hintname_p, :name, :target, :thunk
210
+ end
211
+ end
212
+
213
+ # tree-like structure, holds all misc data the program might need (icons, cursors, version information)
214
+ # conventionnally structured in a 3-level depth structure:
215
+ # I resource type (icon/cursor/etc, see +TYPES+)
216
+ # II resource id (icon n1, icon 'toto', ...)
217
+ # III language-specific version (icon n1 en, icon n1 en-dvorak...)
218
+ class ResourceDirectory < SerialStruct
219
+ words :characteristics, :timestamp
220
+ halfs :major_version, :minor_version, :nr_names, :nr_id
221
+ attr_accessor :entries
222
+ attr_accessor :curoff_label # internal use, in encoder
223
+
224
+ class Entry
225
+ attr_accessor :name_p, :name, :name_w,
226
+ :id, :subdir_p, :subdir, :dataentry_p,
227
+ :data_p, :data, :codepage, :reserved
228
+ end
229
+ end
230
+
231
+ # array of relocations to apply to an executable file
232
+ # when it is loaded at an address that is not its preferred_base_address
233
+ class RelocationTable < SerialStruct
234
+ word :base_addr
235
+ attr_accessor :relocs
236
+
237
+ class Relocation < SerialStruct
238
+ bitfield :half, 0 => :offset, 12 => :type
239
+ fld_enum :type, BASE_RELOCATION_TYPE
240
+ end
241
+ end
242
+
243
+ class DebugDirectory < SerialStruct
244
+ words :characteristics, :timestamp
245
+ halfs :major_version, :minor_version
246
+ words :type, :size_of_data, :addr, :pointer
247
+ fld_enum :type, DEBUG_TYPE
248
+
249
+ attr_accessor :data
250
+
251
+ class NB10 < SerialStruct
252
+ word :offset
253
+ word :signature
254
+ word :age
255
+ strz :pdbfilename
256
+ end
257
+
258
+ class RSDS < SerialStruct
259
+ mem :guid, 16
260
+ word :age
261
+ strz :pdbfilename
262
+ end
263
+ end
264
+
265
+ class TLSDirectory < SerialStruct
266
+ xwords :start_va, :end_va, :index_addr, :callback_p
267
+ words :zerofill_sz, :characteristics
268
+
269
+ attr_accessor :callbacks
270
+ end
271
+
272
+ # the 'load configuration' directory (used for SafeSEH)
273
+ class LoadConfig < SerialStruct
274
+ words :signature, :timestamp
275
+ halfs :major_version, :minor_version
276
+ words :globalflags_clear, :globalflags_set, :critsec_timeout
277
+ # lockpfxtable is an array of VA of LOCK prefixes, to be nopped on singleproc machines (!)
278
+ xwords :decommitblock, :decommittotal, :lockpfxtable, :maxalloc, :maxvirtmem, :process_affinity_mask
279
+ word :process_heap_flags
280
+ halfs :service_pack_id, :reserved
281
+ xwords :editlist, :security_cookie, :sehtable_p, :sehcount
282
+
283
+ attr_accessor :safeseh
284
+ end
285
+
286
+ class DelayImportDirectory < SerialStruct
287
+ words :attributes, :libname_p, :handle_p, :iat_p, :int_p, :biat_p, :uiat_p, :timestamp
288
+
289
+ attr_accessor :libname
290
+ end
291
+
292
+ # structure defining entrypoints and stuff for .net binaries
293
+ class Cor20Header < SerialStruct
294
+ word :size
295
+ halfs :major_version, :minor_version # runtime version
296
+ words :metadata_rva, :metadata_sz
297
+ word :flags
298
+ fld_bits :flags, COMIMAGE_FLAGS
299
+ word :entrypoint # RVA to native or managed ep, depending on flags
300
+ words :resources_rva, :resources_sz
301
+ words :strongnamesig_rva, :strongnamesig_sz
302
+ words :codemgr_rva, :codemgr_sz
303
+ words :vtfixup_rva, :vtfixup_sz
304
+ words :eatjumps_rva, :eatjumps_sz
305
+ words :managednativehdr_rva, :managednativehdr_sz
306
+
307
+ attr_accessor :metadata, :resources, :strongnamesig, :codemgr, :vtfixup, :eatjumps, :managednativehdr
308
+ end
309
+
310
+ # for the icon, the one that appears in the explorer is
311
+ # (NT) the one with the lowest ID
312
+ # (98) the first to appear in the table
313
+ class ResourceDirectory
314
+ def to_hash(depth=0)
315
+ map = case depth
316
+ when 0; TYPE
317
+ when 1; {} # resource-id
318
+ when 2; {} # lang
319
+ else {}
320
+ end
321
+ @entries.inject({}) { |h, e|
322
+ k = e.id ? map.fetch(e.id, e.id) : e.name ? e.name : e.name_w
323
+ v = e.subdir ? e.subdir.to_hash(depth+1) : e.data
324
+ h.update k => v
325
+ }
326
+ end
327
+
328
+ def self.from_hash(h, depth=0)
329
+ map = case depth
330
+ when 0; TYPE
331
+ when 1; {} # resource-id
332
+ when 2; {} # lang
333
+ else {}
334
+ end
335
+ ret = new
336
+ ret.entries = h.map { |k, v|
337
+ e = Entry.new
338
+ k.kind_of?(Integer) ? (e.id = k) : map.index(k) ? (e.id = map.index(k)) : (e.name = k) # name_w ?
339
+ v.kind_of?(Hash) ? (e.subdir = from_hash(v, depth+1)) : (e.data = v)
340
+ e
341
+ }
342
+ ret
343
+ end
344
+
345
+ # returns a string with the to_hash key tree
346
+ def to_s
347
+ to_s_a(0).join("\n")
348
+ end
349
+
350
+ def to_s_a(depth)
351
+ @entries.map { |e|
352
+ ar = []
353
+ ar << if e.id
354
+ if depth == 0 and TYPE.has_key?(e.id); "#{e.id.to_s} (#{TYPE[e.id]})".ljust(18)
355
+ else e.id.to_s.ljust(5)
356
+ end
357
+ else (e.name || e.name_w).inspect
358
+ end
359
+ if e.subdir
360
+ sa = e.subdir.to_s_a(depth+1)
361
+ if sa.length == 1
362
+ ar.last << " | #{sa.first}"
363
+ else
364
+ ar << sa.map { |s| ' ' + s }
365
+ end
366
+ elsif e.data.length > 16
367
+ ar.last << " #{e.data[0, 8].inspect}... <#{e.data.length} bytes>"
368
+ else
369
+ ar.last << ' ' << e.data.inspect
370
+ end
371
+ ar
372
+ }.flatten
373
+ end
374
+
375
+ TYPE = {
376
+ 1 => 'CURSOR', 2 => 'BITMAP', 3 => 'ICON', 4 => 'MENU',
377
+ 5 => 'DIALOG', 6 => 'STRING', 7 => 'FONTDIR', 8 => 'FONT',
378
+ 9 => 'ACCELERATOR', 10 => 'RCADATA', 11 => 'MESSAGETABLE',
379
+ 12 => 'GROUP_CURSOR', 14 => 'GROUP_ICON', 16 => 'VERSION',
380
+ 17 => 'DLGINCLUDE', 19 => 'PLUGPLAY', 20 => 'VXD',
381
+ 21 => 'ANICURSOR', 22 => 'ANIICON', 23 => 'HTML',
382
+ 24 => 'MANIFEST'
383
+ }
384
+
385
+ ACCELERATOR_BITS = {
386
+ 1 => 'VIRTKEY', 2 => 'NOINVERT', 4 => 'SHIFT', 8 => 'CTRL',
387
+ 16 => 'ALT', 128 => 'LAST'
388
+ }
389
+
390
+ # cursor = raw data, cursor_group = header , pareil pour les icons
391
+ class Cursor
392
+ attr_accessor :xhotspot, :yhotspot, :data
393
+ end
394
+ end
395
+
396
+ attr_accessor :header, :optheader, :directory, :sections, :endianness, :symbols, :bitsize,
397
+ :export, :imports, :resource, :certificates, :relocations, :debug, :tls, :loadconfig, :delayimports, :com_header
398
+
399
+ # boolean, set to true to have #decode() ignore the base_relocs directory
400
+ attr_accessor :nodecode_relocs
401
+
402
+ def initialize(*a)
403
+ cpu = a.grep(CPU).first
404
+ @nodecode_relocs = true if a.include? :nodecode_relocs
405
+
406
+ @directory = {} # DIRECTORIES.key => [rva, size]
407
+ @sections = []
408
+ @endianness = cpu ? cpu.endianness : :little
409
+ @bitsize = cpu ? cpu.size : 32
410
+ @header = Header.new
411
+ @optheader = OptionalHeader.new
412
+ super(cpu)
413
+ end
414
+
415
+ def shortname; 'coff'; end
416
+ end
417
+
418
+ # the COFF archive file format
419
+ # maybe used in .lib files (they hold binary import information for libraries)
420
+ # used for unix .a static library files (with no 2nd linker and newline-separated longnames)
421
+ class COFFArchive < ExeFormat
422
+ class Member < SerialStruct
423
+ mem :name, 16
424
+ mem :date, 12
425
+ mem :uid, 6
426
+ mem :gid, 6
427
+ mem :mode, 8
428
+ mem :size, 10
429
+ mem :eoh, 2
430
+
431
+ attr_accessor :offset, :encoded
432
+ end
433
+
434
+ class ImportHeader < SerialStruct
435
+ halfs :sig1, :sig2, :version, :machine
436
+ words :timestamp, :size_of_data
437
+ half :hint
438
+ bitfield :half, 0 => :reserved, 11 => :name_type, 14 => :type
439
+ #fld_enum :type, IMPORT_TYPE
440
+ #fld_enum :name_type, NAME_TYPE
441
+ strz :symname
442
+ strz :libname
443
+ end
444
+
445
+ attr_accessor :members, :signature, :first_linker, :second_linker, :longnames
446
+
447
+ # return the 1st member whose name is name
448
+ def member(name)
449
+ @members.find { |m| m.name == name }
450
+ end
451
+ end
452
+ end
453
+
454
+ require 'metasm/exe_format/coff_encode'
455
+ require 'metasm/exe_format/coff_decode'