metasm 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (192) hide show
  1. data/BUGS +11 -0
  2. data/CREDITS +17 -0
  3. data/README +270 -0
  4. data/TODO +114 -0
  5. data/doc/code_organisation.txt +146 -0
  6. data/doc/const_missing.txt +16 -0
  7. data/doc/core_classes.txt +75 -0
  8. data/doc/feature_list.txt +53 -0
  9. data/doc/index.txt +59 -0
  10. data/doc/install_notes.txt +170 -0
  11. data/doc/style.css +3 -0
  12. data/doc/use_cases.txt +18 -0
  13. data/lib/metasm.rb +80 -0
  14. data/lib/metasm/arm.rb +12 -0
  15. data/lib/metasm/arm/debug.rb +39 -0
  16. data/lib/metasm/arm/decode.rb +167 -0
  17. data/lib/metasm/arm/encode.rb +77 -0
  18. data/lib/metasm/arm/main.rb +75 -0
  19. data/lib/metasm/arm/opcodes.rb +177 -0
  20. data/lib/metasm/arm/parse.rb +130 -0
  21. data/lib/metasm/arm/render.rb +55 -0
  22. data/lib/metasm/compile_c.rb +1457 -0
  23. data/lib/metasm/dalvik.rb +8 -0
  24. data/lib/metasm/dalvik/decode.rb +196 -0
  25. data/lib/metasm/dalvik/main.rb +60 -0
  26. data/lib/metasm/dalvik/opcodes.rb +366 -0
  27. data/lib/metasm/decode.rb +213 -0
  28. data/lib/metasm/decompile.rb +2659 -0
  29. data/lib/metasm/disassemble.rb +2068 -0
  30. data/lib/metasm/disassemble_api.rb +1280 -0
  31. data/lib/metasm/dynldr.rb +1329 -0
  32. data/lib/metasm/encode.rb +333 -0
  33. data/lib/metasm/exe_format/a_out.rb +194 -0
  34. data/lib/metasm/exe_format/autoexe.rb +82 -0
  35. data/lib/metasm/exe_format/bflt.rb +189 -0
  36. data/lib/metasm/exe_format/coff.rb +455 -0
  37. data/lib/metasm/exe_format/coff_decode.rb +901 -0
  38. data/lib/metasm/exe_format/coff_encode.rb +1078 -0
  39. data/lib/metasm/exe_format/dex.rb +457 -0
  40. data/lib/metasm/exe_format/dol.rb +145 -0
  41. data/lib/metasm/exe_format/elf.rb +923 -0
  42. data/lib/metasm/exe_format/elf_decode.rb +979 -0
  43. data/lib/metasm/exe_format/elf_encode.rb +1375 -0
  44. data/lib/metasm/exe_format/macho.rb +827 -0
  45. data/lib/metasm/exe_format/main.rb +228 -0
  46. data/lib/metasm/exe_format/mz.rb +164 -0
  47. data/lib/metasm/exe_format/nds.rb +172 -0
  48. data/lib/metasm/exe_format/pe.rb +437 -0
  49. data/lib/metasm/exe_format/serialstruct.rb +246 -0
  50. data/lib/metasm/exe_format/shellcode.rb +114 -0
  51. data/lib/metasm/exe_format/xcoff.rb +167 -0
  52. data/lib/metasm/gui.rb +23 -0
  53. data/lib/metasm/gui/cstruct.rb +373 -0
  54. data/lib/metasm/gui/dasm_coverage.rb +199 -0
  55. data/lib/metasm/gui/dasm_decomp.rb +369 -0
  56. data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
  57. data/lib/metasm/gui/dasm_graph.rb +1354 -0
  58. data/lib/metasm/gui/dasm_hex.rb +543 -0
  59. data/lib/metasm/gui/dasm_listing.rb +599 -0
  60. data/lib/metasm/gui/dasm_main.rb +906 -0
  61. data/lib/metasm/gui/dasm_opcodes.rb +291 -0
  62. data/lib/metasm/gui/debug.rb +1228 -0
  63. data/lib/metasm/gui/gtk.rb +884 -0
  64. data/lib/metasm/gui/qt.rb +495 -0
  65. data/lib/metasm/gui/win32.rb +3004 -0
  66. data/lib/metasm/gui/x11.rb +621 -0
  67. data/lib/metasm/ia32.rb +14 -0
  68. data/lib/metasm/ia32/compile_c.rb +1523 -0
  69. data/lib/metasm/ia32/debug.rb +193 -0
  70. data/lib/metasm/ia32/decode.rb +1167 -0
  71. data/lib/metasm/ia32/decompile.rb +564 -0
  72. data/lib/metasm/ia32/encode.rb +314 -0
  73. data/lib/metasm/ia32/main.rb +233 -0
  74. data/lib/metasm/ia32/opcodes.rb +872 -0
  75. data/lib/metasm/ia32/parse.rb +327 -0
  76. data/lib/metasm/ia32/render.rb +91 -0
  77. data/lib/metasm/main.rb +1193 -0
  78. data/lib/metasm/mips.rb +11 -0
  79. data/lib/metasm/mips/compile_c.rb +7 -0
  80. data/lib/metasm/mips/decode.rb +253 -0
  81. data/lib/metasm/mips/encode.rb +51 -0
  82. data/lib/metasm/mips/main.rb +72 -0
  83. data/lib/metasm/mips/opcodes.rb +443 -0
  84. data/lib/metasm/mips/parse.rb +51 -0
  85. data/lib/metasm/mips/render.rb +43 -0
  86. data/lib/metasm/os/gnu_exports.rb +270 -0
  87. data/lib/metasm/os/linux.rb +1112 -0
  88. data/lib/metasm/os/main.rb +1686 -0
  89. data/lib/metasm/os/remote.rb +527 -0
  90. data/lib/metasm/os/windows.rb +2027 -0
  91. data/lib/metasm/os/windows_exports.rb +745 -0
  92. data/lib/metasm/parse.rb +876 -0
  93. data/lib/metasm/parse_c.rb +3938 -0
  94. data/lib/metasm/pic16c/decode.rb +42 -0
  95. data/lib/metasm/pic16c/main.rb +17 -0
  96. data/lib/metasm/pic16c/opcodes.rb +68 -0
  97. data/lib/metasm/ppc.rb +11 -0
  98. data/lib/metasm/ppc/decode.rb +264 -0
  99. data/lib/metasm/ppc/decompile.rb +251 -0
  100. data/lib/metasm/ppc/encode.rb +51 -0
  101. data/lib/metasm/ppc/main.rb +129 -0
  102. data/lib/metasm/ppc/opcodes.rb +410 -0
  103. data/lib/metasm/ppc/parse.rb +52 -0
  104. data/lib/metasm/preprocessor.rb +1277 -0
  105. data/lib/metasm/render.rb +130 -0
  106. data/lib/metasm/sh4.rb +8 -0
  107. data/lib/metasm/sh4/decode.rb +336 -0
  108. data/lib/metasm/sh4/main.rb +292 -0
  109. data/lib/metasm/sh4/opcodes.rb +381 -0
  110. data/lib/metasm/x86_64.rb +12 -0
  111. data/lib/metasm/x86_64/compile_c.rb +1025 -0
  112. data/lib/metasm/x86_64/debug.rb +59 -0
  113. data/lib/metasm/x86_64/decode.rb +268 -0
  114. data/lib/metasm/x86_64/encode.rb +264 -0
  115. data/lib/metasm/x86_64/main.rb +135 -0
  116. data/lib/metasm/x86_64/opcodes.rb +118 -0
  117. data/lib/metasm/x86_64/parse.rb +68 -0
  118. data/misc/bottleneck.rb +61 -0
  119. data/misc/cheader-findpppath.rb +58 -0
  120. data/misc/hexdiff.rb +74 -0
  121. data/misc/hexdump.rb +55 -0
  122. data/misc/metasm-all.rb +13 -0
  123. data/misc/objdiff.rb +47 -0
  124. data/misc/objscan.rb +40 -0
  125. data/misc/pdfparse.rb +661 -0
  126. data/misc/ppc_pdf2oplist.rb +192 -0
  127. data/misc/tcp_proxy_hex.rb +84 -0
  128. data/misc/txt2html.rb +440 -0
  129. data/samples/a.out.rb +31 -0
  130. data/samples/asmsyntax.rb +77 -0
  131. data/samples/bindiff.rb +555 -0
  132. data/samples/compilation-steps.rb +49 -0
  133. data/samples/cparser_makestackoffset.rb +55 -0
  134. data/samples/dasm-backtrack.rb +38 -0
  135. data/samples/dasmnavig.rb +318 -0
  136. data/samples/dbg-apihook.rb +228 -0
  137. data/samples/dbghelp.rb +143 -0
  138. data/samples/disassemble-gui.rb +102 -0
  139. data/samples/disassemble.rb +133 -0
  140. data/samples/dump_upx.rb +95 -0
  141. data/samples/dynamic_ruby.rb +1929 -0
  142. data/samples/elf_list_needed.rb +46 -0
  143. data/samples/elf_listexports.rb +33 -0
  144. data/samples/elfencode.rb +25 -0
  145. data/samples/exeencode.rb +128 -0
  146. data/samples/factorize-headers-elfimports.rb +77 -0
  147. data/samples/factorize-headers-peimports.rb +109 -0
  148. data/samples/factorize-headers.rb +43 -0
  149. data/samples/gdbclient.rb +583 -0
  150. data/samples/generate_libsigs.rb +102 -0
  151. data/samples/hotfix_gtk_dbg.rb +59 -0
  152. data/samples/install_win_env.rb +78 -0
  153. data/samples/lindebug.rb +924 -0
  154. data/samples/linux_injectsyscall.rb +95 -0
  155. data/samples/machoencode.rb +31 -0
  156. data/samples/metasm-shell.rb +91 -0
  157. data/samples/pe-hook.rb +69 -0
  158. data/samples/pe-ia32-cpuid.rb +203 -0
  159. data/samples/pe-mips.rb +35 -0
  160. data/samples/pe-shutdown.rb +78 -0
  161. data/samples/pe-testrelocs.rb +51 -0
  162. data/samples/pe-testrsrc.rb +24 -0
  163. data/samples/pe_listexports.rb +31 -0
  164. data/samples/peencode.rb +19 -0
  165. data/samples/peldr.rb +494 -0
  166. data/samples/preprocess-flatten.rb +19 -0
  167. data/samples/r0trace.rb +308 -0
  168. data/samples/rubstop.rb +399 -0
  169. data/samples/scan_pt_gnu_stack.rb +54 -0
  170. data/samples/scanpeexports.rb +62 -0
  171. data/samples/shellcode-c.rb +40 -0
  172. data/samples/shellcode-dynlink.rb +146 -0
  173. data/samples/source.asm +34 -0
  174. data/samples/struct_offset.rb +47 -0
  175. data/samples/testpe.rb +32 -0
  176. data/samples/testraw.rb +45 -0
  177. data/samples/win32genloader.rb +132 -0
  178. data/samples/win32hooker-advanced.rb +169 -0
  179. data/samples/win32hooker.rb +96 -0
  180. data/samples/win32livedasm.rb +33 -0
  181. data/samples/win32remotescan.rb +133 -0
  182. data/samples/wintrace.rb +92 -0
  183. data/tests/all.rb +8 -0
  184. data/tests/dasm.rb +39 -0
  185. data/tests/dynldr.rb +35 -0
  186. data/tests/encodeddata.rb +132 -0
  187. data/tests/ia32.rb +82 -0
  188. data/tests/mips.rb +116 -0
  189. data/tests/parse_c.rb +239 -0
  190. data/tests/preprocessor.rb +269 -0
  191. data/tests/x86_64.rb +62 -0
  192. metadata +255 -0
@@ -0,0 +1,827 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/exe_format/main'
8
+ require 'metasm/encode'
9
+ require 'metasm/decode'
10
+
11
+ module Metasm
12
+ class MachO < ExeFormat
13
+ MAGIC = "\xfe\xed\xfa\xce" # 0xfeedface
14
+ CIGAM = MAGIC.reverse # 0xcefaedfe
15
+ MAGIC64 = "\xfe\xed\xfa\xcf" # 0xfeedfacf
16
+ CIGAM64 = MAGIC64.reverse # 0xcffaedfe
17
+
18
+ MAGICS = [MAGIC, CIGAM, MAGIC64, CIGAM64]
19
+
20
+ CPU = {
21
+ 1 => 'VAX', 2 => 'ROMP',
22
+ 4 => 'NS32032', 5 => 'NS32332',
23
+ 6 => 'MC680x0', 7 => 'I386',
24
+ 8 => 'MIPS', 9 => 'NS32532',
25
+ 11 => 'HPPA', 12 => 'ARM',
26
+ 13 => 'MC88000', 14 => 'SPARC',
27
+ 15 => 'I860', 16 => 'I860_LITTLE',
28
+ 17 => 'RS6000', 18 => 'POWERPC',
29
+ #0x100_0000 => 'CPU_ARCH_ABI64',
30
+ 0x100_0000|7 => 'X86_64',
31
+ 0x100_0000|18 => 'POWERPC64',
32
+ 255 => 'VEO',
33
+ 0xffff_ffff => 'ANY',
34
+ }
35
+
36
+ SUBCPU = {
37
+ 'VAX' => { 0 => 'ALL',
38
+ 1 => '780', 2 => '785', 3 => '750', 4 => '730',
39
+ 5 => 'UVAXI', 6 => 'UVAXII', 7 => '8200', 8 => '8500',
40
+ 9 => '8600', 10 => '8650', 11 => '8800', 12 => 'UVAXIII',
41
+ },
42
+ 'ROMP' => { 0 => 'ALL', 1 => 'PC', 2 => 'APC', 3 => '135',
43
+ 0 => 'MMAX_ALL', 1 => 'MMAX_DPC', 2 => 'SQT',
44
+ 3 => 'MMAX_APC_FPU', 4 => 'MMAX_APC_FPA', 5 => 'MMAX_XPC',
45
+ },
46
+ 'I386' => { 3 => 'ALL', 4 => '486', 4+128 => '486SX',
47
+ 0 => 'INTEL_MODEL_ALL', 10 => 'PENTIUM_4',
48
+ 5 => 'PENT', 0x16 => 'PENTPRO', 0x36 => 'PENTII_M3', 0x56 => 'PENTII_M5',
49
+ },
50
+ 'MIPS' => { 0 => 'ALL', 1 => 'R2300', 2 => 'R2600', 3 => 'R2800', 4 => 'R2000a', },
51
+ 'MC680x0' => { 1 => 'ALL', 2 => 'MC68040', 3 => 'MC68030_ONLY', },
52
+ 'HPPA' => { 0 => 'ALL', 1 => '7100LC', },
53
+ 'ARM' => { 0 => 'ALL', 1 => 'A500_ARCH', 2 => 'A500', 3 => 'A440',
54
+ 4 => 'M4', 5 => 'A680', 6 => 'ARMV6', 9 => 'ARMV7',
55
+ },
56
+ 'MC88000' => { 0 => 'ALL', 1 => 'MC88100', 2 => 'MC88110', },
57
+ :wtf => { 0 => 'MC98000_ALL', 1 => 'MC98601', },
58
+ 'I860' => { 0 => 'ALL', 1 => '860', },
59
+ 'RS6000' => { 0 => 'ALL', 1 => 'RS6000', },
60
+ :wtf2 => { 0 => 'SUN4_ALL', 1 => 'SUN4_260', 2 => 'SUN4_110', },
61
+ 'SPARC' => { 0 => 'SPARC_ALL', },
62
+ 'POWERPC' => { 0 => 'ALL', 1 => '601', 2 => '602', 3 => '603', 4 => '603e',
63
+ 5 => '603ev', 6 => '604', 7 => '604e', 8 => '620',
64
+ 9 => '750', 10 => '7400', 11 => '7450', 100 => '970',
65
+ },
66
+ 'VEO' => { 1 => 'VEO_1', 2 => 'VEO_ALL', },
67
+ }
68
+ SUBCPU['POWERPC64'] = SUBCPU['POWERPC'].dup
69
+ SUBCPU['X86_64'] = SUBCPU['I386'].dup
70
+
71
+ SUBCPUFLAG = { 0x80 => 'LIB64' }
72
+
73
+
74
+ FILETYPE = {
75
+ 1 => 'OBJECT', 2 => 'EXECUTE', 3 => 'FVMLIB',
76
+ 4 => 'CORE', 5 => 'PRELOAD', 6 => 'DYLIB',
77
+ 7 => 'DYLINKER', 8 => 'BUNDLE', 9 => 'DYLIB_STUB',
78
+ }
79
+
80
+ FLAGS = {
81
+ 0x1 => 'NOUNDEFS', 0x2 => 'INCRLINK', 0x4 => 'DYLDLINK', 0x8 => 'BINDATLOAD',
82
+ 0x10 => 'PREBOUND', 0x20 => 'SPLIT_SEGS', 0x40 => 'LAZY_INIT', 0x80 => 'TWOLEVEL',
83
+ 0x100 => 'FORCE_FLAT', 0x200 => 'NOMULTIDEFS', 0x400 => 'NOFIXPREBINDING', 0x800 => 'PREBINDABLE',
84
+ 0x1000 => 'ALLMODSBOUND', 0x2000 => 'SUBSECTIONS_VIA_SYMBOLS', 0x4000 => 'CANONICAL', 0x8000 => 'WEAK_DEFINES',
85
+ 0x10000 => 'BINDS_TO_WEAK', 0x20000 => 'ALLOW_STACK_EXECUTION',
86
+ }
87
+
88
+ SEG_PROT = { 1 => 'READ', 2 => 'WRITE', 4 => 'EXECUTE' }
89
+
90
+ LOAD_COMMAND = {
91
+ 0x1 => 'SEGMENT', 0x2 => 'SYMTAB', 0x3 => 'SYMSEG', 0x4 => 'THREAD',
92
+ 0x5 => 'UNIXTHREAD', 0x6 => 'LOADFVMLIB', 0x7 => 'IDFVMLIB', 0x8 => 'IDENT',
93
+ 0x9 => 'FVMFILE', 0xa => 'PREPAGE', 0xb => 'DYSYMTAB', 0xc => 'LOAD_DYLIB',
94
+ 0xd => 'ID_DYLIB', 0xe => 'LOAD_DYLINKER', 0xf => 'ID_DYLINKER', 0x10 => 'PREBOUND_DYLIB',
95
+ 0x11 => 'ROUTINES', 0x12 => 'SUB_FRAMEWORK', 0x13 => 'SUB_UMBRELLA', 0x14 => 'SUB_CLIENT',
96
+ 0x15 => 'SUB_LIBRARY', 0x16 => 'TWOLEVEL_HINTS', 0x17 => 'PREBIND_CKSUM',
97
+ 0x8000_0018 => 'LOAD_WEAK_DYLIB', 0x19 => 'SEGMENT_64', 0x1a => 'ROUTINES_64',
98
+ 0x1b => 'UUID', 0x8000_001c => 'RPATH', 0x1d => 'CODE_SIGNATURE_PTR', 0x1e => 'CODE_SEGMENT_SPLIT_INFO',
99
+ 0x8000_001f => 'REEXPORT_DYLIB',
100
+ #0x8000_0000 => 'REQ_DYLD',
101
+ }
102
+
103
+ THREAD_FLAVOR = {
104
+ 'POWERPC' => {
105
+ 1 => 'THREAD_STATE',
106
+ 2 => 'FLOAT_STATE',
107
+ 3 => 'EXCEPTION_STATE',
108
+ 4 => 'VECTOR_STATE'
109
+ },
110
+ 'I386' => {
111
+ 1 => 'NEW_THREAD_STATE',
112
+ 2 => 'FLOAT_STATE',
113
+ 3 => 'ISA_PORT_MAP_STATE',
114
+ 4 => 'V86_ASSIST_STATE',
115
+ 5 => 'REGS_SEGS_STATE',
116
+ 6 => 'THREAD_SYSCALL_STATE',
117
+ 7 => 'THREAD_STATE_NONE',
118
+ 8 => 'SAVED_STATE',
119
+ -1 & 0xffffffff => 'THREAD_STATE',
120
+ -2 & 0xffffffff => 'THREAD_FPSTATE',
121
+ -3 & 0xffffffff => 'THREAD_EXCEPTSTATE',
122
+ -4 & 0xffffffff => 'THREAD_CTHREADSTATE'
123
+ }
124
+ }
125
+
126
+ SYM_SCOPE = { 0 => 'LOCAL', 1 => 'GLOBAL' }
127
+ SYM_TYPE = { 0 => 'UNDF', 2/2 => 'ABS', 0xa/2 => 'INDR', 0xe/2 => 'SECT', 0x1e/2 => 'TYPE' }
128
+ SYM_STAB = { }
129
+
130
+ class SerialStruct < Metasm::SerialStruct
131
+ new_int_field :xword
132
+ end
133
+
134
+ class Header < SerialStruct
135
+ mem :magic, 4
136
+ decode_hook { |m, h|
137
+ case h.magic
138
+ when MAGIC; m.size = 32 ; m.endianness = :big
139
+ when CIGAM; m.size = 32 ; m.endianness = :little
140
+ when MAGIC64; m.size = 64 ; m.endianness = :big
141
+ when CIGAM64; m.size = 64 ; m.endianness = :little
142
+ else raise InvalidExeFormat, "Invalid Mach-O signature #{h.magic.unpack('H*').first.inspect}"
143
+ end
144
+ }
145
+ word :cputype
146
+ bitfield :word, 0 => :cpusubtype, 24 => :cpusubtypeflag
147
+ words :filetype, :ncmds, :sizeofcmds, :flags
148
+ fld_enum :cputype, CPU
149
+ fld_enum(:cpusubtype) { |m, h| SUBCPU[h.cputype] || {} }
150
+ fld_bits :cpusubtypeflag, SUBCPUFLAG
151
+ fld_enum :filetype, FILETYPE
152
+ fld_bits :flags, FLAGS
153
+ attr_accessor :reserved # word 64bit only
154
+
155
+ def set_default_values(m)
156
+ @magic ||= case [m.size, m.endianness]
157
+ when [32, :big]; MAGIC
158
+ when [32, :little]; CIGAM
159
+ when [64, :big]; MAGIC64
160
+ when [64, :little]; CIGAM64
161
+ end
162
+ @cpusubtype ||= 'ALL'
163
+ @filetype ||= 'EXECUTE'
164
+ @ncmds ||= m.commands.length
165
+ @sizeofcmds ||= m.new_label('sizeofcmds')
166
+ super(m)
167
+ end
168
+
169
+ def decode(m)
170
+ super(m)
171
+ @reserved = m.decode_word if m.size == 64
172
+ end
173
+ end
174
+
175
+ class LoadCommand < SerialStruct
176
+ words :cmd, :cmdsize
177
+ fld_enum :cmd, LOAD_COMMAND
178
+ attr_accessor :data
179
+
180
+ def decode(m)
181
+ super(m)
182
+ ptr = m.encoded.ptr
183
+ if @cmd.kind_of? String and self.class.constants.map { |c| c.to_s }.include? @cmd
184
+ @data = self.class.const_get(@cmd).decode(m)
185
+ end
186
+ m.encoded.ptr = ptr + @cmdsize - 8
187
+ end
188
+
189
+ def set_default_values(m)
190
+ @cmd ||= data.class.name.sub(/.*::/, '')
191
+ @cmdsize ||= 'cmdsize'
192
+ super(m)
193
+ end
194
+
195
+ def encode(m)
196
+ ed = super(m)
197
+ ed << @data.encode(m) if @data
198
+ ed.align(m.size >> 3)
199
+ ed.fixup! @cmdsize => ed.length if @cmdsize.kind_of? String
200
+ ed
201
+ end
202
+
203
+
204
+ class UUID < SerialStruct
205
+ mem :uuid, 16
206
+ end
207
+
208
+ class SEGMENT < SerialStruct
209
+ str :name, 16
210
+ xwords :virtaddr, :virtsize, :fileoff, :filesize
211
+ words :maxprot, :initprot, :nsects, :flags
212
+ fld_bits :maxprot, SEG_PROT
213
+ fld_bits :initprot, SEG_PROT
214
+ attr_accessor :sections, :encoded
215
+
216
+ def decode(m)
217
+ super(m)
218
+ @sections = []
219
+ @nsects.times { @sections << SECTION.decode(m, self) }
220
+ end
221
+
222
+ def set_default_values(m)
223
+ # TODO (in the caller?) @encoded = @sections.map { |s| s.encoded }.join
224
+ @virtaddr ||= m.new_label('virtaddr')
225
+ @virtsize ||= @encoded.length
226
+ @fileoff ||= m.new_label('fileoff')
227
+ @filesize ||= @encoded.rawsize
228
+ @sections ||= []
229
+ @nsects ||= @sections.length
230
+ @maxprot ||= %w[READ WRITE EXECUTE]
231
+ @initprot ||= %w[READ]
232
+ super(m)
233
+ end
234
+
235
+ def encode(m)
236
+ ed = super(m) # need to call set_default_values before using @sections
237
+ @sections.inject(ed) { |ed_, s| ed_ << s.encode(m) }
238
+ end
239
+ end
240
+ SEGMENT_64 = SEGMENT
241
+
242
+ class SECTION < SerialStruct
243
+ str :name, 16
244
+ str :segname, 16
245
+ xwords :addr, :size
246
+ words :offset, :align, :reloff, :nreloc, :flags, :res1, :res2
247
+ attr_accessor :res3 # word 64bit only
248
+ attr_accessor :segment, :encoded
249
+
250
+ def decode(m, s)
251
+ super(m)
252
+ @res3 = m.decode_word if m.size == 64
253
+ @segment = s
254
+ end
255
+
256
+ def set_default_values(m)
257
+ @segname ||= @segment.name
258
+ # addr, offset, etc = @segment.virtaddr + 42
259
+ super(m)
260
+ end
261
+
262
+ def decode_inner(m)
263
+ @encoded = m.encoded[m.addr_to_off(@addr), @size]
264
+ end
265
+ end
266
+ SECTION_64 = SECTION
267
+
268
+ class SYMTAB < SerialStruct
269
+ words :symoff, :nsyms, :stroff, :strsize
270
+ end
271
+
272
+ class DYSYMTAB < SerialStruct
273
+ words :ilocalsym, :nlocalsym, :iextdefsym, :nextdefsym, :iundefsym, :nundefsym,
274
+ :tocoff, :ntoc, :modtaboff, :nmodtab, :extrefsymoff, :nextrefsyms,
275
+ :indirectsymoff, :nindirectsyms, :extreloff, :nextrel, :locreloff, :nlocrel
276
+ end
277
+
278
+ class THREAD < SerialStruct
279
+ words :flavor, :count
280
+ fld_enum(:flavor) { |m, t| THREAD_FLAVOR[m.header.cputype] || {} }
281
+ attr_accessor :ctx
282
+
283
+ def entrypoint(m)
284
+ @ctx ||= {}
285
+ case m.header.cputype
286
+ when 'I386'; @ctx[:eip]
287
+ when 'X86_64'; @ctx[:rip]
288
+ when 'POWERPC'; @ctx[:srr0]
289
+ when 'ARM'; @ctx[:pc]
290
+ end
291
+ end
292
+
293
+ def set_entrypoint(m, ep)
294
+ @ctx ||= {}
295
+ case m.header.cputype
296
+ when 'I386'; @ctx[:eip] = ep
297
+ when 'X86_64'; @ctx[:rip] = ep
298
+ when 'POWERPC'; @ctx[:srr0] = ep
299
+ when 'ARM'; @ctx[:pc] = ep
300
+ end
301
+ end
302
+
303
+ def ctx_keys(m)
304
+ case m.header.cputype
305
+ when 'I386'; %w[eax ebx ecx edx edi esi ebp esp ss eflags eip cs ds es fs gs]
306
+ when 'X86_64'; %w[rax rbx rcx rdx rdi rsi rbp rsp r8 r9 r10 r11 r12 r13 r14 r15 rip rflags cs fs gs]
307
+ when 'POWERPC'; %w[srr0 srr1 r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 r10 r11 r12 r13 r14 r15 r16 r17 r18 r19 r20 r21 r22 r23 r24 r25 r26 r27 r28 r29 r30 r31 cr xer lr ctr mq vrsave]
308
+ when 'ARM'; %w[r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 r10 r11 r12 sp lr pc]
309
+ else [*1..@count].map { |i| "r#{i}" }
310
+ end.map { |k| k.to_sym }
311
+ end
312
+
313
+ def decode(m)
314
+ super(m)
315
+ @ctx = ctx_keys(m)[0, @count].inject({}) { |ctx, r| ctx.update r => m.decode_xword }
316
+ end
317
+
318
+ def set_default_values(m)
319
+ @ctx ||= {}
320
+ ctx_keys(m).each { |k| @ctx[k] ||= 0 }
321
+ @count ||= @ctx.length
322
+ super(m)
323
+ end
324
+
325
+ def encode(m)
326
+ ctx_keys(m).inject(super(m)) { |ed, r| ed << m.encode_word(@ctx[r]) }
327
+ end
328
+ end
329
+ UNIXTHREAD = THREAD
330
+
331
+ class STRING < SerialStruct
332
+ word :stroff
333
+ attr_accessor :str
334
+
335
+ def decode(m)
336
+ ptr = m.encoded.ptr
337
+ super(m)
338
+ ptr = m.encoded.ptr = ptr + @stroff - 8
339
+ @str = m.decode_strz
340
+ end
341
+ end
342
+
343
+ class DYLIB < STRING
344
+ word :stroff
345
+ words :timestamp, :cur_version, :compat_version
346
+ end
347
+ LOAD_DYLIB = DYLIB
348
+ ID_DYLIB = DYLIB
349
+
350
+ class PREBOUND_DYLIB < STRING
351
+ word :stroff
352
+ word :nmodules
353
+ word :linked_modules
354
+ end
355
+
356
+ LOAD_DYLINKER = STRING
357
+ ID_DYLINKER = STRING
358
+
359
+ class ROUTINES < SerialStruct
360
+ xwords :init_addr, :init_module, :res1, :res2, :res3, :res4, :res5, :res6
361
+ end
362
+ ROUTINES_64 = ROUTINES
363
+
364
+ class TWOLEVEL_HINTS < SerialStruct
365
+ words :offset, :nhints
366
+ end
367
+ class TWOLEVEL_HINT < SerialStruct
368
+ bitfield :word, 0 => :isub_image, 8 => :itoc
369
+ end
370
+
371
+ SUB_FRAMEWORK = STRING
372
+ SUB_UMBRELLA = STRING
373
+ SUB_LIBRARY = STRING
374
+ SUB_CLIENT = STRING
375
+
376
+ class CODE_SIGNATURE_PTR < SerialStruct
377
+ word :offset
378
+ word :size
379
+ attr_accessor :codesig
380
+
381
+ def decode(m)
382
+ ptr = m.encoded.ptr
383
+ super(m)
384
+ m.encoded.ptr = @offset
385
+ @codesig = CODE_SIGNATURE.decode(m)
386
+ m.encoded.ptr = ptr + @size
387
+ end
388
+ end
389
+ end
390
+
391
+ class CODE_SIGNATURE < SerialStruct
392
+ word :magic
393
+ word :size
394
+ word :count
395
+ attr_accessor :slots
396
+
397
+ def decode(m)
398
+ cs_base = m.encoded.ptr
399
+ e = m.endianness
400
+ m.endianness = :big
401
+
402
+ super(m)
403
+ @slots = []
404
+ @count.times { @slots << CS_SLOT_PTR.decode(m, cs_base) }
405
+ m.endianness = e
406
+ end
407
+ end
408
+
409
+ class CS_SLOT_PTR < SerialStruct
410
+ word :type
411
+ word :offset
412
+ attr_accessor :body
413
+
414
+ def decode(m, cs_base)
415
+ super(m)
416
+ ptr = m.encoded.ptr
417
+ m.encoded.ptr = cs_base + @offset
418
+
419
+ if @type == 0
420
+ @body = CS_CODE_DIRECTORY.decode(m)
421
+ else
422
+ @body = CS_SLOT.decode(m)
423
+ end
424
+ m.encoded.ptr = ptr
425
+ end
426
+ end
427
+
428
+ class CS_SLOT < SerialStruct
429
+ word :magic
430
+ word :size
431
+ attr_accessor :data
432
+
433
+ def decode(m)
434
+ super(m)
435
+ @data = m.encoded.read(@size)
436
+ end
437
+ end
438
+
439
+ class CS_CODE_DIRECTORY < SerialStruct
440
+ words :magic, :size, :version
441
+ mem :unk1, 4
442
+ word :hash_offset
443
+ word :name_offset
444
+ word :special_page_count
445
+ word :code_page_count
446
+ mem :unk3, 8
447
+ attr_accessor :name, :cs_slots_hash, :code_hash
448
+
449
+ def decode(m)
450
+ super(m)
451
+ ptr = m.encoded.ptr
452
+
453
+ m.encoded.ptr += @name_offset - 40
454
+ @name = m.decode_strz
455
+ @cs_slots_hash = m.encoded.read(@special_page_count * 20)
456
+
457
+ m.encoded.ptr = ptr + @hash_offset - 40
458
+ @code_hash = m.encoded.read(@size - @hash_offset)
459
+
460
+ m.encoded.ptr = ptr
461
+ end
462
+ end
463
+
464
+ class Symbol < SerialStruct
465
+ word :nameoff
466
+ bitfield :byte, 0 => :scope, 1 => :type, 5 => :stab
467
+ fld_enum :scope, SYM_SCOPE
468
+ fld_enum :type, SYM_TYPE
469
+ fld_enum :stab, SYM_STAB
470
+ byte :sect
471
+ half :desc
472
+ xword :value
473
+ attr_accessor :name
474
+
475
+ def decode(m, buf=nil)
476
+ super(m)
477
+ idx = buf.index(?\0, @nameoff) if buf
478
+ @name = @name = buf[@nameoff..idx-1] if idx
479
+ end
480
+ end
481
+
482
+ def encode_byte(val) Expression[val].encode( :u8, @endianness) end
483
+ def encode_half(val) Expression[val].encode(:u16, @endianness) end
484
+ def encode_word(val) Expression[val].encode(:u32, @endianness) end
485
+ def encode_xword(val) Expression[val].encode((@size == 32 ? :u32 : :u64), @endianness) end
486
+ def decode_byte(edata = @encoded) edata.decode_imm( :u8, @endianness) end
487
+ def decode_half(edata = @encoded) edata.decode_imm(:u16, @endianness) end
488
+ def decode_word(edata = @encoded) edata.decode_imm(:u32, @endianness) end
489
+ def decode_xword(edata= @encoded) edata.decode_imm((@size == 32 ? :u32 : :u64), @endianness) end
490
+
491
+
492
+ attr_accessor :endianness, :size
493
+ attr_accessor :header, :source
494
+ attr_accessor :segments
495
+ attr_accessor :commands
496
+ attr_accessor :symbols
497
+
498
+ def initialize(cpu=nil)
499
+ super(cpu)
500
+ @endianness ||= cpu ? cpu.endianness : :little
501
+ @size ||= cpu ? cpu.size : 32
502
+ @header = Header.new
503
+ @commands = []
504
+ @segments = []
505
+ end
506
+
507
+ # decodes the Mach header from the current offset in self.encoded
508
+ def decode_header
509
+ @header.decode self
510
+ @header.ncmds.times { @commands << LoadCommand.decode(self) }
511
+ @commands.each { |cmd|
512
+ e = cmd.data
513
+ case cmd.cmd
514
+ when 'SEGMENT', 'SEGMENT_64'; @segments << e
515
+ end
516
+ }
517
+ end
518
+
519
+ def decode
520
+ decode_header
521
+ @segments.each { |s| decode_segment(s) }
522
+ decode_symbols
523
+ decode_relocations
524
+ end
525
+
526
+ def decode_symbols
527
+ @symbols = []
528
+ ep_count = 0
529
+ @commands.each { |cmd|
530
+ e = cmd.data
531
+ case cmd.cmd
532
+ when 'SYMTAB'
533
+ @encoded.ptr = e.stroff
534
+ buf = @encoded.read e.strsize
535
+ @encoded.ptr = e.symoff
536
+ e.nsyms.times { @symbols << Symbol.decode(self, buf) }
537
+ when 'THREAD', 'UNIXTHREAD'
538
+ ep_count += 1
539
+ ep = cmd.data.entrypoint(self)
540
+ next if not seg = @segments.find { |seg_| ep >= seg_.virtaddr and ep < seg_.virtaddr + seg_.virtsize }
541
+ seg.encoded.add_export("entrypoint#{"_#{ep_count}" if ep_count >= 2 }", ep - seg.virtaddr)
542
+ end
543
+ }
544
+ @symbols.each { |s|
545
+ next if s.value == 0 or not s.name
546
+ next if not seg = @segments.find { |seg_| s.value >= seg_.virtaddr and s.value < seg_.virtaddr + seg_.virtsize }
547
+ seg.encoded.add_export(s.name, s.value - seg.virtaddr)
548
+ }
549
+ end
550
+
551
+ def decode_relocations
552
+ end
553
+
554
+ def decode_segment(s)
555
+ s.encoded = @encoded[s.fileoff, s.filesize]
556
+ s.encoded.virtsize = s.virtsize
557
+ s.sections.each { |ss| ss.encoded = @encoded[ss.offset, ss.size] }
558
+ end
559
+
560
+ def each_section(&b)
561
+ @segments.each { |s| yield s.encoded, s.virtaddr }
562
+ end
563
+
564
+ def get_default_entrypoints
565
+ @commands.find_all { |cmd| cmd.cmd == 'THREAD' or cmd.cmd == 'UNIXTHREAD' }.map { |cmd| cmd.data.entrypoint(self) }
566
+ end
567
+
568
+ def cpu_from_headers
569
+ case @header.cputype
570
+ when 'I386'; Ia32.new
571
+ when 'X86_64'; X86_64.new
572
+ when 'POWERPC'; PowerPC.new
573
+ when 'ARM'; ARM.new
574
+ else raise "unsupported cpu #{@header.cputype}"
575
+ end
576
+ end
577
+
578
+ def encode(type=nil)
579
+ @encoded = EncodedData.new
580
+
581
+ init_header_cpu
582
+
583
+ if false and maybeyoureallyneedthis
584
+ segz = LoadCommand::SEGMENT.new
585
+ segz.name = '__PAGEZERO'
586
+ segz.encoded = EncodedData.new
587
+ segz.encoded.virtsize = 0x1000
588
+ segz.initprot = segz.maxprot = 0
589
+ @segments.unshift segz
590
+ end
591
+
592
+ # TODO sections -> segments
593
+ @segments.each { |seg|
594
+ cname = (@size == 64 ? 'SEGMENT_64' : 'SEGMENT')
595
+ if not @commands.find { |cmd| cmd.cmd == cname and cmd.data == seg }
596
+ cmd = LoadCommand.new
597
+ cmd.cmd = cname
598
+ cmd.data = seg
599
+ @commands << cmd
600
+ end
601
+ }
602
+
603
+ binding = {}
604
+ @encoded << @header.encode(self)
605
+
606
+ first = @segments.find { |seg| seg.encoded.rawsize > 0 }
607
+
608
+ first.virtsize = new_label('virtsize')
609
+ first.filesize = new_label('filesize')
610
+
611
+ hlen = @encoded.length
612
+ @commands.each { |cmd| @encoded << cmd.encode(self) }
613
+ binding[@header.sizeofcmds] = @encoded.length - hlen if @header.sizeofcmds.kind_of? String
614
+
615
+ # put header in first segment
616
+ first.encoded = @encoded << first.encoded
617
+
618
+ @encoded = EncodedData.new
619
+
620
+ addr = @encoded.length
621
+ @segments.each { |seg|
622
+ seg.encoded.align 0x1000
623
+ binding[seg.virtaddr] = addr
624
+ binding[seg.virtsize] = seg.encoded.length if seg.filesize.kind_of? String
625
+ binding[seg.fileoff] = @encoded.length
626
+ binding[seg.filesize] = seg.encoded.rawsize if seg.filesize.kind_of? String
627
+ binding.update seg.encoded.binding(addr)
628
+ @encoded << seg.encoded[0, seg.encoded.rawsize]
629
+ @encoded.align 0x1000
630
+ addr += seg.encoded.length
631
+ }
632
+
633
+ @encoded.fixup! binding
634
+ @encoded.data
635
+ end
636
+
637
+ def parse_init
638
+ # allow the user to specify a section, falls back to .text if none specified
639
+ if not defined? @cursource or not @cursource
640
+ @cursource = Object.new
641
+ class << @cursource
642
+ attr_accessor :exe
643
+ def <<(*a)
644
+ t = Preprocessor::Token.new(nil)
645
+ t.raw = '.text'
646
+ exe.parse_parser_instruction t
647
+ exe.cursource.send(:<<, *a)
648
+ end
649
+ end
650
+ @cursource.exe = self
651
+ end
652
+
653
+ @source ||= {}
654
+
655
+ init_header_cpu # for '.entrypoint'
656
+
657
+ super()
658
+ end
659
+
660
+ def init_header_cpu
661
+ @header.cputype ||= case @cpu.shortname
662
+ when 'ia32'; 'I386'
663
+ when 'x64'; 'X86_64'
664
+ when 'powerpc'; 'POWERPC'
665
+ when 'arm'; 'ARM'
666
+ end
667
+ end
668
+
669
+ # handles macho meta-instructions
670
+ #
671
+ # syntax:
672
+ # .section "<name>" [<perms>]
673
+ # change current section (where normal instruction/data are put)
674
+ # perms = list of 'r' 'w' 'x', may be prefixed by 'no'
675
+ # shortcuts: .text .data .rodata .bss
676
+ # .entrypoint [<label>]
677
+ # defines the program entrypoint to the specified label / current location
678
+ #
679
+ def parse_parser_instruction(instr)
680
+ readstr = lambda {
681
+ @lexer.skip_space
682
+ t = nil
683
+ raise instr, "string expected, found #{t.raw.inspect if t}" if not t = @lexer.readtok or (t.type != :string and t.type != :quoted)
684
+ t.value || t.raw
685
+ }
686
+ check_eol = lambda {
687
+ @lexer.skip_space
688
+ t = nil
689
+ raise instr, "eol expected, found #{t.raw.inspect if t}" if t = @lexer.nexttok and t.type != :eol
690
+ }
691
+
692
+ case instr.raw.downcase
693
+ when '.text', '.data', '.rodata', '.bss'
694
+ sname = instr.raw.upcase.sub('.', '__')
695
+ if not @segments.find { |s| s.kind_of? LoadCommand::SEGMENT and s.name == sname }
696
+ s = LoadCommand::SEGMENT.new
697
+ s.name = sname
698
+ s.encoded = EncodedData.new
699
+ s.initprot = case sname
700
+ when '__TEXT'; %w[READ EXECUTE]
701
+ when '__DATA', '__BSS'; %w[READ WRITE]
702
+ when '__RODATA'; %w[READ]
703
+ end
704
+ s.maxprot = %w[READ WRITE EXECUTE]
705
+ @segments << s
706
+ end
707
+ @cursource = @source[sname] ||= []
708
+ check_eol[] if instr.backtrace # special case for magic @cursource
709
+
710
+ when '.section'
711
+ # .section <section name|"section name"> [(no)wxalloc] [base=<expr>]
712
+ sname = readstr[]
713
+ if not s = @segments.find { |s_| s_.name == sname }
714
+ s = LoadCommand::SEGMENT.new
715
+ s.name = sname
716
+ s.encoded = EncodedData.new
717
+ s.initprot = %w[READ]
718
+ s.maxprot = %w[READ WRITE EXECUTE]
719
+ @segments << s
720
+ end
721
+ loop do
722
+ @lexer.skip_space
723
+ break if not tok = @lexer.nexttok or tok.type != :string
724
+ case @lexer.readtok.raw.downcase
725
+ when /^(no)?(r)?(w)?(x)?$/
726
+ ar = []
727
+ ar << 'READ' if $2
728
+ ar << 'WRITE' if $3
729
+ ar << 'EXECINSTR' if $4
730
+ if $1; s.initprot -= ar
731
+ else s.initprot |= ar
732
+ end
733
+ else raise instr, 'unknown specifier'
734
+ end
735
+ end
736
+ @cursource = @source[sname] ||= []
737
+ check_eol[]
738
+
739
+ when '.entrypoint' # XXX thread-specific
740
+ # ".entrypoint <somelabel/expression>" or ".entrypoint" (here)
741
+ @lexer.skip_space
742
+ if tok = @lexer.nexttok and tok.type == :string
743
+ raise instr if not entrypoint = Expression.parse(@lexer)
744
+ else
745
+ entrypoint = new_label('entrypoint')
746
+ @cursource << Label.new(entrypoint, instr.backtrace.dup)
747
+ end
748
+ if not cmd = @commands.find { |cmd_| cmd_.cmd == 'THREAD' or cmd_.cmd == 'UNIXTHREAD' }
749
+ cmd = LoadCommand.new
750
+ cmd.cmd = 'UNIXTHREAD' # UNIXTHREAD creates a stack
751
+ cmd.data = LoadCommand::THREAD.new
752
+ cmd.data.ctx = {}
753
+ cmd.data.flavor = 'NEW_THREAD_STATE' # XXX i386 specific
754
+ @commands << cmd
755
+ end
756
+ cmd.data.set_entrypoint(self, entrypoint)
757
+ check_eol[]
758
+
759
+ else super(instr)
760
+ end
761
+ end
762
+
763
+ # assembles the hash self.source to a section array
764
+ def assemble(*a)
765
+ parse(*a) if not a.empty?
766
+ @source.each { |k, v|
767
+ raise "no segment named #{k} ?" if not s = @segments.find { |s_| s_.name == k }
768
+ s.encoded << assemble_sequence(v, @cpu)
769
+ v.clear
770
+ }
771
+ end
772
+ end
773
+ MACHO = MachO
774
+
775
+
776
+
777
+ class UniversalBinary < ExeFormat
778
+ MAGIC = "\xca\xfe\xba\xbe" # 0xcafebabe
779
+
780
+ class Header < SerialStruct
781
+ mem :magic, 4
782
+ word :nfat_arch
783
+
784
+ def decode(u)
785
+ super(u)
786
+ raise InvalidExeFormat, "Invalid UniversalBinary signature #{@magic.unpack('H*').first.inspect}" if @magic != MAGIC
787
+ end
788
+ end
789
+ class FatArch < SerialStruct
790
+ words :cputype, :subcpu, :offset, :size, :align
791
+ fld_enum :cputype, MachO::CPU
792
+ fld_enum(:subcpu) { |x, a| MachO::SUBCPU[a.cputype] || {} }
793
+ attr_accessor :encoded
794
+ end
795
+
796
+ def encode_word(val) Expression[val].encode(:u32, @endianness) end
797
+ def decode_word(edata = @encoded) edata.decode_imm(:u32, @endianness) end
798
+
799
+ attr_accessor :endianness, :encoded, :header, :archive
800
+ def initialize
801
+ @endianness = :big
802
+ super()
803
+ end
804
+
805
+ def decode
806
+ @header = Header.decode(self)
807
+ @archive = []
808
+ @header.nfat_arch.times { @archive << FatArch.decode(self) }
809
+ @archive.each { |a|
810
+ a.encoded = @encoded[a.offset, a.size] || EncodedData.new
811
+ }
812
+ end
813
+
814
+ def [](i) AutoExe.decode(@archive[i].encoded) if @archive[i] end
815
+ def <<(exe) @archive << exe end
816
+
817
+ def self.autoexe_load(*a)
818
+ ub = super(*a)
819
+ ub.decode
820
+ # TODO have a global callback or whatever to prompt the user
821
+ # which file he wants to load in the dasm
822
+ puts "UniversalBinary: using 1st archive member" if $VERBOSE
823
+ AutoExe.load(ub.archive[0].encoded)
824
+ end
825
+
826
+ end
827
+ end