metasm 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. data/BUGS +11 -0
  2. data/CREDITS +17 -0
  3. data/README +270 -0
  4. data/TODO +114 -0
  5. data/doc/code_organisation.txt +146 -0
  6. data/doc/const_missing.txt +16 -0
  7. data/doc/core_classes.txt +75 -0
  8. data/doc/feature_list.txt +53 -0
  9. data/doc/index.txt +59 -0
  10. data/doc/install_notes.txt +170 -0
  11. data/doc/style.css +3 -0
  12. data/doc/use_cases.txt +18 -0
  13. data/lib/metasm.rb +80 -0
  14. data/lib/metasm/arm.rb +12 -0
  15. data/lib/metasm/arm/debug.rb +39 -0
  16. data/lib/metasm/arm/decode.rb +167 -0
  17. data/lib/metasm/arm/encode.rb +77 -0
  18. data/lib/metasm/arm/main.rb +75 -0
  19. data/lib/metasm/arm/opcodes.rb +177 -0
  20. data/lib/metasm/arm/parse.rb +130 -0
  21. data/lib/metasm/arm/render.rb +55 -0
  22. data/lib/metasm/compile_c.rb +1457 -0
  23. data/lib/metasm/dalvik.rb +8 -0
  24. data/lib/metasm/dalvik/decode.rb +196 -0
  25. data/lib/metasm/dalvik/main.rb +60 -0
  26. data/lib/metasm/dalvik/opcodes.rb +366 -0
  27. data/lib/metasm/decode.rb +213 -0
  28. data/lib/metasm/decompile.rb +2659 -0
  29. data/lib/metasm/disassemble.rb +2068 -0
  30. data/lib/metasm/disassemble_api.rb +1280 -0
  31. data/lib/metasm/dynldr.rb +1329 -0
  32. data/lib/metasm/encode.rb +333 -0
  33. data/lib/metasm/exe_format/a_out.rb +194 -0
  34. data/lib/metasm/exe_format/autoexe.rb +82 -0
  35. data/lib/metasm/exe_format/bflt.rb +189 -0
  36. data/lib/metasm/exe_format/coff.rb +455 -0
  37. data/lib/metasm/exe_format/coff_decode.rb +901 -0
  38. data/lib/metasm/exe_format/coff_encode.rb +1078 -0
  39. data/lib/metasm/exe_format/dex.rb +457 -0
  40. data/lib/metasm/exe_format/dol.rb +145 -0
  41. data/lib/metasm/exe_format/elf.rb +923 -0
  42. data/lib/metasm/exe_format/elf_decode.rb +979 -0
  43. data/lib/metasm/exe_format/elf_encode.rb +1375 -0
  44. data/lib/metasm/exe_format/macho.rb +827 -0
  45. data/lib/metasm/exe_format/main.rb +228 -0
  46. data/lib/metasm/exe_format/mz.rb +164 -0
  47. data/lib/metasm/exe_format/nds.rb +172 -0
  48. data/lib/metasm/exe_format/pe.rb +437 -0
  49. data/lib/metasm/exe_format/serialstruct.rb +246 -0
  50. data/lib/metasm/exe_format/shellcode.rb +114 -0
  51. data/lib/metasm/exe_format/xcoff.rb +167 -0
  52. data/lib/metasm/gui.rb +23 -0
  53. data/lib/metasm/gui/cstruct.rb +373 -0
  54. data/lib/metasm/gui/dasm_coverage.rb +199 -0
  55. data/lib/metasm/gui/dasm_decomp.rb +369 -0
  56. data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
  57. data/lib/metasm/gui/dasm_graph.rb +1354 -0
  58. data/lib/metasm/gui/dasm_hex.rb +543 -0
  59. data/lib/metasm/gui/dasm_listing.rb +599 -0
  60. data/lib/metasm/gui/dasm_main.rb +906 -0
  61. data/lib/metasm/gui/dasm_opcodes.rb +291 -0
  62. data/lib/metasm/gui/debug.rb +1228 -0
  63. data/lib/metasm/gui/gtk.rb +884 -0
  64. data/lib/metasm/gui/qt.rb +495 -0
  65. data/lib/metasm/gui/win32.rb +3004 -0
  66. data/lib/metasm/gui/x11.rb +621 -0
  67. data/lib/metasm/ia32.rb +14 -0
  68. data/lib/metasm/ia32/compile_c.rb +1523 -0
  69. data/lib/metasm/ia32/debug.rb +193 -0
  70. data/lib/metasm/ia32/decode.rb +1167 -0
  71. data/lib/metasm/ia32/decompile.rb +564 -0
  72. data/lib/metasm/ia32/encode.rb +314 -0
  73. data/lib/metasm/ia32/main.rb +233 -0
  74. data/lib/metasm/ia32/opcodes.rb +872 -0
  75. data/lib/metasm/ia32/parse.rb +327 -0
  76. data/lib/metasm/ia32/render.rb +91 -0
  77. data/lib/metasm/main.rb +1193 -0
  78. data/lib/metasm/mips.rb +11 -0
  79. data/lib/metasm/mips/compile_c.rb +7 -0
  80. data/lib/metasm/mips/decode.rb +253 -0
  81. data/lib/metasm/mips/encode.rb +51 -0
  82. data/lib/metasm/mips/main.rb +72 -0
  83. data/lib/metasm/mips/opcodes.rb +443 -0
  84. data/lib/metasm/mips/parse.rb +51 -0
  85. data/lib/metasm/mips/render.rb +43 -0
  86. data/lib/metasm/os/gnu_exports.rb +270 -0
  87. data/lib/metasm/os/linux.rb +1112 -0
  88. data/lib/metasm/os/main.rb +1686 -0
  89. data/lib/metasm/os/remote.rb +527 -0
  90. data/lib/metasm/os/windows.rb +2027 -0
  91. data/lib/metasm/os/windows_exports.rb +745 -0
  92. data/lib/metasm/parse.rb +876 -0
  93. data/lib/metasm/parse_c.rb +3938 -0
  94. data/lib/metasm/pic16c/decode.rb +42 -0
  95. data/lib/metasm/pic16c/main.rb +17 -0
  96. data/lib/metasm/pic16c/opcodes.rb +68 -0
  97. data/lib/metasm/ppc.rb +11 -0
  98. data/lib/metasm/ppc/decode.rb +264 -0
  99. data/lib/metasm/ppc/decompile.rb +251 -0
  100. data/lib/metasm/ppc/encode.rb +51 -0
  101. data/lib/metasm/ppc/main.rb +129 -0
  102. data/lib/metasm/ppc/opcodes.rb +410 -0
  103. data/lib/metasm/ppc/parse.rb +52 -0
  104. data/lib/metasm/preprocessor.rb +1277 -0
  105. data/lib/metasm/render.rb +130 -0
  106. data/lib/metasm/sh4.rb +8 -0
  107. data/lib/metasm/sh4/decode.rb +336 -0
  108. data/lib/metasm/sh4/main.rb +292 -0
  109. data/lib/metasm/sh4/opcodes.rb +381 -0
  110. data/lib/metasm/x86_64.rb +12 -0
  111. data/lib/metasm/x86_64/compile_c.rb +1025 -0
  112. data/lib/metasm/x86_64/debug.rb +59 -0
  113. data/lib/metasm/x86_64/decode.rb +268 -0
  114. data/lib/metasm/x86_64/encode.rb +264 -0
  115. data/lib/metasm/x86_64/main.rb +135 -0
  116. data/lib/metasm/x86_64/opcodes.rb +118 -0
  117. data/lib/metasm/x86_64/parse.rb +68 -0
  118. data/misc/bottleneck.rb +61 -0
  119. data/misc/cheader-findpppath.rb +58 -0
  120. data/misc/hexdiff.rb +74 -0
  121. data/misc/hexdump.rb +55 -0
  122. data/misc/metasm-all.rb +13 -0
  123. data/misc/objdiff.rb +47 -0
  124. data/misc/objscan.rb +40 -0
  125. data/misc/pdfparse.rb +661 -0
  126. data/misc/ppc_pdf2oplist.rb +192 -0
  127. data/misc/tcp_proxy_hex.rb +84 -0
  128. data/misc/txt2html.rb +440 -0
  129. data/samples/a.out.rb +31 -0
  130. data/samples/asmsyntax.rb +77 -0
  131. data/samples/bindiff.rb +555 -0
  132. data/samples/compilation-steps.rb +49 -0
  133. data/samples/cparser_makestackoffset.rb +55 -0
  134. data/samples/dasm-backtrack.rb +38 -0
  135. data/samples/dasmnavig.rb +318 -0
  136. data/samples/dbg-apihook.rb +228 -0
  137. data/samples/dbghelp.rb +143 -0
  138. data/samples/disassemble-gui.rb +102 -0
  139. data/samples/disassemble.rb +133 -0
  140. data/samples/dump_upx.rb +95 -0
  141. data/samples/dynamic_ruby.rb +1929 -0
  142. data/samples/elf_list_needed.rb +46 -0
  143. data/samples/elf_listexports.rb +33 -0
  144. data/samples/elfencode.rb +25 -0
  145. data/samples/exeencode.rb +128 -0
  146. data/samples/factorize-headers-elfimports.rb +77 -0
  147. data/samples/factorize-headers-peimports.rb +109 -0
  148. data/samples/factorize-headers.rb +43 -0
  149. data/samples/gdbclient.rb +583 -0
  150. data/samples/generate_libsigs.rb +102 -0
  151. data/samples/hotfix_gtk_dbg.rb +59 -0
  152. data/samples/install_win_env.rb +78 -0
  153. data/samples/lindebug.rb +924 -0
  154. data/samples/linux_injectsyscall.rb +95 -0
  155. data/samples/machoencode.rb +31 -0
  156. data/samples/metasm-shell.rb +91 -0
  157. data/samples/pe-hook.rb +69 -0
  158. data/samples/pe-ia32-cpuid.rb +203 -0
  159. data/samples/pe-mips.rb +35 -0
  160. data/samples/pe-shutdown.rb +78 -0
  161. data/samples/pe-testrelocs.rb +51 -0
  162. data/samples/pe-testrsrc.rb +24 -0
  163. data/samples/pe_listexports.rb +31 -0
  164. data/samples/peencode.rb +19 -0
  165. data/samples/peldr.rb +494 -0
  166. data/samples/preprocess-flatten.rb +19 -0
  167. data/samples/r0trace.rb +308 -0
  168. data/samples/rubstop.rb +399 -0
  169. data/samples/scan_pt_gnu_stack.rb +54 -0
  170. data/samples/scanpeexports.rb +62 -0
  171. data/samples/shellcode-c.rb +40 -0
  172. data/samples/shellcode-dynlink.rb +146 -0
  173. data/samples/source.asm +34 -0
  174. data/samples/struct_offset.rb +47 -0
  175. data/samples/testpe.rb +32 -0
  176. data/samples/testraw.rb +45 -0
  177. data/samples/win32genloader.rb +132 -0
  178. data/samples/win32hooker-advanced.rb +169 -0
  179. data/samples/win32hooker.rb +96 -0
  180. data/samples/win32livedasm.rb +33 -0
  181. data/samples/win32remotescan.rb +133 -0
  182. data/samples/wintrace.rb +92 -0
  183. data/tests/all.rb +8 -0
  184. data/tests/dasm.rb +39 -0
  185. data/tests/dynldr.rb +35 -0
  186. data/tests/encodeddata.rb +132 -0
  187. data/tests/ia32.rb +82 -0
  188. data/tests/mips.rb +116 -0
  189. data/tests/parse_c.rb +239 -0
  190. data/tests/preprocessor.rb +269 -0
  191. data/tests/x86_64.rb +62 -0
  192. metadata +255 -0
@@ -0,0 +1,314 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/ia32/opcodes'
8
+ require 'metasm/encode'
9
+
10
+ module Metasm
11
+ class Ia32
12
+ class InvalidModRM < Exception ; end
13
+ class ModRM
14
+ # returns the byte representing the register encoded as modrm
15
+ # works with Reg/SimdReg
16
+ def self.encode_reg(reg, mregval = 0)
17
+ 0xc0 | (mregval << 3) | reg.val
18
+ end
19
+
20
+ # The argument is an integer representing the 'reg' field of the mrm
21
+ #
22
+ # caller is responsible for setting the adsz
23
+ # returns an array, 1 element per possible immediate size (for un-reduce()able Expression)
24
+ def encode(reg = 0, endianness = :little)
25
+ reg = reg.val if reg.kind_of? Argument
26
+ case @adsz
27
+ when 16; encode16(reg, endianness)
28
+ when 32; encode32(reg, endianness)
29
+ end
30
+ end
31
+
32
+ private
33
+ def encode16(reg, endianness)
34
+ if not b
35
+ # imm only
36
+ return [EncodedData.new << (6 | (reg << 3)) << @imm.encode(:u16, endianness)]
37
+ end
38
+
39
+ imm = @imm.reduce if self.imm
40
+ imm = nil if imm == 0
41
+ ret = EncodedData.new
42
+ ret <<
43
+ case [@b.val, (@i.val if i)]
44
+ when [3, 6], [6, 3]; 0
45
+ when [3, 7], [7, 3]; 1
46
+ when [5, 6], [6, 5]; 2
47
+ when [5, 7], [7, 5]; 3
48
+ when [6, nil]; 4
49
+ when [7, nil]; 5
50
+ when [5, nil]
51
+ imm ||= 0
52
+ 6
53
+ when [3, nil]; 7
54
+ else raise InvalidModRM, 'invalid modrm16'
55
+ end
56
+
57
+ # add bits in the first octet of ret.data (1.9 compatibility layer)
58
+ or_bits = lambda { |v| # rape me
59
+ if ret.data[0].kind_of? Integer
60
+ ret.data[0] |= v
61
+ else
62
+ ret.data[0] = (ret.data[0].unpack('C').first | v).chr
63
+ end
64
+ }
65
+
66
+ or_bits[reg << 3]
67
+
68
+ if imm
69
+ case Expression.in_range?(imm, :i8)
70
+ when true
71
+ or_bits[1 << 6]
72
+ [ret << Expression.encode_imm(imm, :i8, endianness)]
73
+ when false
74
+ or_bits[2 << 6]
75
+ [ret << Expression.encode_imm(imm, :a16, endianness)]
76
+ when nil
77
+ rets = ret.dup
78
+ or_bits[1<<6]
79
+ ret << @imm.encode(:i8, endianness)
80
+ ret, rets = rets, ret # or_bits uses ret
81
+ or_bits[2<<6]
82
+ ret << @imm.encode(:a16, endianness)
83
+ [ret, rets]
84
+ end
85
+ else
86
+ [ret]
87
+ end
88
+ end
89
+
90
+ def encode32(reg, endianness)
91
+ # 0 => [ [0 ], [1 ], [2 ], [3 ], [:sib ], [:i32 ], [6 ], [7 ] ], \
92
+ # 1 => [ [0, :i8 ], [1, :i8 ], [2, :i8 ], [3, :i8 ], [:sib, :i8 ], [5, :i8 ], [6, :i8 ], [7, :i8 ] ], \
93
+ # 2 => [ [0, :i32], [1, :i32], [2, :i32], [3, :i32], [:sib, :i32], [5, :i32], [6, :i32], [7, :i32] ]
94
+ #
95
+ # b => 0 1 2 3 4 5+i|i 6 7
96
+ # i => 0 1 2 3 nil 5 6 7
97
+
98
+ ret = EncodedData.new << (reg << 3)
99
+
100
+ # add bits in the first octet of ret.data (1.9 compatibility layer)
101
+ or_bits = lambda { |v| # rape me
102
+ if ret.data[0].kind_of? Integer
103
+ ret.data[0] |= v
104
+ else
105
+ ret.data[0] = (ret.data[0].unpack('C').first | v).chr
106
+ end
107
+ }
108
+
109
+ if not self.b and not self.i
110
+ or_bits[5]
111
+ [ret << @imm.encode(:a32, endianness)]
112
+
113
+ elsif not self.b and self.s != 1
114
+ # sib with no b
115
+ raise EncodeError, "Invalid ModRM #{self}" if @i.val == 4
116
+ or_bits[4]
117
+ s = {8=>3, 4=>2, 2=>1}[@s]
118
+ imm = self.imm || Expression[0]
119
+ fu = (s << 6) | (@i.val << 3) | 5
120
+ fu = fu.chr if s >= 2 # rb1.9 encoding fix
121
+ [ret << fu << imm.encode(:a32, endianness)]
122
+ else
123
+ imm = @imm.reduce if self.imm
124
+ imm = nil if imm == 0
125
+
126
+ if not self.i or (not self.b and self.s == 1)
127
+ # no sib byte (except for [esp])
128
+ b = self.b || self.i
129
+
130
+ or_bits[b.val]
131
+ ret << 0x24 if b.val == 4
132
+ else
133
+ # sib
134
+ or_bits[4]
135
+
136
+ i, b = @i, @b
137
+ b, i = i, b if @s == 1 and (i.val == 4 or b.val == 5)
138
+
139
+ raise EncodeError, "Invalid ModRM #{self}" if i.val == 4
140
+
141
+ s = {8=>3, 4=>2, 2=>1, 1=>0}[@s]
142
+ fu = (s << 6) | (i.val << 3) | b.val
143
+ fu = fu.chr if s >= 2 # rb1.9 encoding fix
144
+ ret << fu
145
+ end
146
+
147
+ imm ||= 0 if b.val == 5
148
+ if imm
149
+ case Expression.in_range?(imm, :i8)
150
+ when true
151
+ or_bits[1<<6]
152
+ [ret << Expression.encode_imm(imm, :i8, endianness)]
153
+ when false
154
+ or_bits[2<<6]
155
+ [ret << Expression.encode_imm(imm, :a32, endianness)]
156
+ when nil
157
+ rets = ret.dup
158
+ or_bits[1<<6]
159
+ ret << @imm.encode(:i8, endianness)
160
+ rets, ret = ret, rets # or_bits[] modifies ret directly
161
+ or_bits[2<<6]
162
+ ret << @imm.encode(:a32, endianness)
163
+ [ret, rets]
164
+ end
165
+ else
166
+ [ret]
167
+ end
168
+ end
169
+ end
170
+ end
171
+
172
+ class Farptr
173
+ def encode(endianness, atype)
174
+ @addr.encode(atype, endianness) << @seg.encode(:u16, endianness)
175
+ end
176
+ end
177
+
178
+ # returns all forms of the encoding of instruction i using opcode op
179
+ # program may be used to create a new label for relative jump/call
180
+ def encode_instr_op(program, i, op)
181
+ base = op.bin.dup
182
+ oi = op.args.zip(i.args)
183
+ set_field = lambda { |f, v|
184
+ v ||= 0 # ST => ST(0)
185
+ fld = op.fields[f]
186
+ base[fld[0]] |= v << fld[1]
187
+ }
188
+
189
+ size = i.prefix[:sz] || @size
190
+
191
+ #
192
+ # handle prefixes and bit fields
193
+ #
194
+ pfx = i.prefix.map { |k, v|
195
+ case k
196
+ when :jmp; {:jmp => 0x3e, :nojmp => 0x2e}[v]
197
+ when :lock; 0xf0
198
+ when :rep; {'repnz' => 0xf2, 'repz' => 0xf3, 'rep' => 0xf2}[v] # TODO
199
+ end
200
+ }.compact.pack 'C*'
201
+ pfx << op.props[:needpfx] if op.props[:needpfx]
202
+
203
+ if op.name == 'movsx' or op.name == 'movzx'
204
+ pfx << 0x66 if size == 48-i.args[0].sz
205
+ else
206
+ opsz = op.props[:argsz]
207
+ oi.each { |oa, ia|
208
+ case oa
209
+ when :reg, :reg_eax, :modrm, :modrmA, :mrm_imm
210
+ raise EncodeError, "Incompatible arg size in #{i}" if ia.sz and opsz and opsz != ia.sz
211
+ opsz = ia.sz
212
+ end
213
+ }
214
+ pfx << 0x66 if (not op.props[:argsz] or opsz != op.props[:argsz]) and (
215
+ (opsz and size == 48 - opsz) or (op.props[:opsz] and op.props[:opsz] != size))
216
+ if op.props[:opsz] and size == 48 - op.props[:opsz]
217
+ opsz = op.props[:opsz]
218
+ end
219
+ end
220
+ opsz ||= size
221
+
222
+ if op.props[:adsz] and size == 48 - op.props[:adsz]
223
+ pfx << 0x67
224
+ adsz = 48 - size
225
+ end
226
+ adsz ||= size
227
+ # addrsize override / segment override
228
+ if mrm = i.args.grep(ModRM).first
229
+ if not op.props[:adsz] and ((mrm.b and mrm.b.sz != adsz) or (mrm.i and mrm.i.sz != adsz))
230
+ pfx << 0x67
231
+ adsz = 48 - adsz
232
+ end
233
+ pfx << [0x26, 0x2E, 0x36, 0x3E, 0x64, 0x65][mrm.seg.val] if mrm.seg
234
+ end
235
+
236
+
237
+ #
238
+ # encode embedded arguments
239
+ #
240
+ postponed = []
241
+ oi.each { |oa, ia|
242
+ case oa
243
+ when :reg, :seg3, :seg3A, :seg2, :seg2A, :eeec, :eeed, :regfp, :regmmx, :regxmm
244
+ # field arg
245
+ set_field[oa, ia.val]
246
+ pfx << 0x66 if oa == :regmmx and op.props[:xmmx] and ia.sz == 128
247
+ when :imm_val1, :imm_val3, :reg_cl, :reg_eax, :reg_dx, :regfp0
248
+ # implicit
249
+ else
250
+ postponed << [oa, ia]
251
+ end
252
+ }
253
+
254
+ if !(op.args & [:modrm, :modrmA, :modrmxmm, :modrmmmx]).empty?
255
+ # reg field of modrm
256
+ regval = (base[-1] >> 3) & 7
257
+ base.pop
258
+ end
259
+
260
+ # convert label name for jmp/call/loop to relative offset
261
+ if op.props[:setip] and op.name[0, 3] != 'ret' and i.args.first.kind_of? Expression
262
+ postlabel = program.new_label('post'+op.name)
263
+ target = postponed.first[1]
264
+ target = target.rexpr if target.kind_of? Expression and target.op == :+ and not target.lexpr
265
+ postponed.first[1] = Expression[target, :-, postlabel]
266
+ end
267
+
268
+ #
269
+ # append other arguments
270
+ #
271
+ ret = EncodedData.new(pfx + base.pack('C*'))
272
+
273
+ postponed.each { |oa, ia|
274
+ case oa
275
+ when :farptr; ed = ia.encode(@endianness, "a#{opsz}".to_sym)
276
+ when :modrm, :modrmA, :modrmmmx, :modrmxmm
277
+ if ia.kind_of? ModRM
278
+ ed = ia.encode(regval, @endianness)
279
+ if ed.kind_of?(::Array)
280
+ if ed.length > 1
281
+ # we know that no opcode can have more than 1 modrm
282
+ ary = []
283
+ ed.each { |m|
284
+ ary << (ret.dup << m)
285
+ }
286
+ ret = ary
287
+ next
288
+ else
289
+ ed = ed.first
290
+ end
291
+ end
292
+ else
293
+ ed = ModRM.encode_reg(ia, regval)
294
+ end
295
+ when :mrm_imm; ed = ia.imm.encode("a#{adsz}".to_sym, @endianness)
296
+ when :i8, :u8, :u16; ed = ia.encode(oa, @endianness)
297
+ when :i; ed = ia.encode("a#{opsz}".to_sym, @endianness)
298
+ else raise SyntaxError, "Internal error: want to encode field #{oa.inspect} as arg in #{i}"
299
+ end
300
+
301
+ if ret.kind_of?(::Array)
302
+ ret.each { |e| e << ed }
303
+ else
304
+ ret << ed
305
+ end
306
+ }
307
+
308
+ # we know that no opcode with setip accept both modrm and immediate arg, so ret is not an ::Array
309
+ ret.add_export(postlabel, ret.virtsize) if postlabel
310
+
311
+ ret
312
+ end
313
+ end
314
+ end
@@ -0,0 +1,233 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/main'
8
+
9
+ module Metasm
10
+
11
+ # The ia32 aka x86 CPU
12
+ # currently limited to 16 and 32bit modes
13
+ class Ia32 < CPU
14
+
15
+ # some ruby magic to declare classes with index -> name association (registers)
16
+ class Argument
17
+ class << self
18
+ # for subclasses
19
+ attr_accessor :i_to_s, :s_to_i
20
+ end
21
+
22
+ private
23
+ # index -> name, name -> index
24
+ def self.simple_map(a)
25
+ # { 1 => 'dr1' }
26
+ @i_to_s = Hash[*a.flatten]
27
+ # { 'dr1' => 1 }
28
+ @s_to_i = @i_to_s.invert
29
+
30
+ class_eval {
31
+ attr_accessor :val
32
+ def initialize(v)
33
+ raise Exception, "invalid #{self.class} #{v}" unless self.class.i_to_s[v]
34
+ @val = v
35
+ end
36
+
37
+ def self.from_str(s) new(@s_to_i[s]) end
38
+ }
39
+ end
40
+
41
+ # size -> (index -> name), name -> [index, size]
42
+ def self.double_map(h)
43
+ # { 32 => { 1 => 'ecx' } }
44
+ @i_to_s = h
45
+ # { 'ecx' => [1, 32] }
46
+ @s_to_i = {} ; @i_to_s.each { |sz, hh| hh.each_with_index { |r, i| @s_to_i[r] = [i, sz] } }
47
+
48
+ class_eval {
49
+ attr_accessor :val, :sz
50
+ def initialize(v, sz)
51
+ raise Exception, "invalid #{self.class} #{sz}/#{v}" unless self.class.i_to_s[sz] and self.class.i_to_s[sz][v]
52
+ @val = v
53
+ @sz = sz
54
+ end
55
+
56
+ def self.from_str(s)
57
+ raise "Bad #{name} #{s.inspect}" if not x = @s_to_i[s]
58
+ new(*x)
59
+ end
60
+ }
61
+ end
62
+
63
+ end
64
+
65
+
66
+ # segment register: es, cs, ss, ds, fs, gs and the theoretical segr6/7
67
+ class SegReg < Argument
68
+ simple_map((0..7).zip(%w(es cs ss ds fs gs segr6 segr7)))
69
+ end
70
+
71
+ # debug register (dr0..dr3, dr6, dr7), and theoretical dr4/5
72
+ class DbgReg < Argument
73
+ simple_map((0..7).map { |i| [i, "dr#{i}"] })
74
+ end
75
+
76
+ # control register (cr0, cr2, cr3, cr4) and theoretical cr1/5/6/7
77
+ class CtrlReg < Argument
78
+ simple_map((0..7).map { |i| [i, "cr#{i}"] })
79
+ end
80
+
81
+ # floating point registers
82
+ class FpReg < Argument
83
+ simple_map((0..7).map { |i| [i, "ST(#{i})"] } << [nil, 'ST'])
84
+ end
85
+
86
+ # a single operation multiple data register (mm0..mm7, xmm0..xmm7)
87
+ class SimdReg < Argument
88
+ double_map 64 => (0..7).map { |n| "mm#{n}" },
89
+ 128 => (0..7).map { |n| "xmm#{n}" }
90
+ def symbolic(di=nil) ; to_s.to_sym end
91
+ end
92
+
93
+ # general purpose registers, all sizes
94
+ class Reg < Argument
95
+ double_map 8 => %w{ al cl dl bl ah ch dh bh},
96
+ 16 => %w{ ax cx dx bx sp bp si di},
97
+ 32 => %w{eax ecx edx ebx esp ebp esi edi}
98
+
99
+ Sym = @i_to_s[32].map { |s| s.to_sym }
100
+
101
+ # returns a symbolic representation of the register:
102
+ # eax => :eax
103
+ # cx => :ecx & 0xffff
104
+ # ah => (:eax >> 8) & 0xff
105
+ def symbolic(di=nil)
106
+ s = Sym[@val]
107
+ if @sz == 8 and to_s[-1] == ?h
108
+ Expression[[Sym[@val-4], :>>, 8], :&, 0xff]
109
+ elsif @sz == 8
110
+ Expression[s, :&, 0xff]
111
+ elsif @sz == 16
112
+ Expression[s, :&, 0xffff]
113
+ else
114
+ s
115
+ end
116
+ end
117
+
118
+ # checks if two registers have bits in common
119
+ def share?(other)
120
+ other.val % (other.sz >> 1) == @val % (@sz >> 1) and (other.sz != @sz or @sz != 8 or other.val == @val)
121
+ end
122
+ end
123
+
124
+ # a far pointer
125
+ # an immediate (numeric) pointer and an immediate segment selector
126
+ class Farptr < Argument
127
+ attr_accessor :seg, :addr
128
+ def initialize(seg, addr)
129
+ @seg, @addr = seg, addr
130
+ end
131
+ end
132
+
133
+ # ModRM represents indirections in x86 (eg dword ptr [eax+4*ebx+12h])
134
+ class ModRM < Argument
135
+ # valid combinaisons for a modrm
136
+ # ints are reg indexes, symbols are immediates, except :sib
137
+ Sum = {
138
+ 16 => {
139
+ 0 => [ [3, 6], [3, 7], [5, 6], [5, 7], [6], [7], [:i16], [3] ],
140
+ 1 => [ [3, 6, :i8 ], [3, 7, :i8 ], [5, 6, :i8 ], [5, 7, :i8 ], [6, :i8 ], [7, :i8 ], [5, :i8 ], [3, :i8 ] ],
141
+ 2 => [ [3, 6, :i16], [3, 7, :i16], [5, 6, :i16], [5, 7, :i16], [6, :i16], [7, :i16], [5, :i16], [3, :i16] ]
142
+ },
143
+ 32 => {
144
+ 0 => [ [0], [1], [2], [3], [:sib], [:i32], [6], [7] ],
145
+ 1 => [ [0, :i8 ], [1, :i8 ], [2, :i8 ], [3, :i8 ], [:sib, :i8 ], [5, :i8 ], [6, :i8 ], [7, :i8 ] ],
146
+ 2 => [ [0, :i32], [1, :i32], [2, :i32], [3, :i32], [:sib, :i32], [5, :i32], [6, :i32], [7, :i32] ]
147
+ }
148
+ }
149
+
150
+
151
+ attr_accessor :adsz, :sz
152
+ attr_accessor :seg
153
+ attr_accessor :s, :i, :b, :imm
154
+
155
+ # creates a new ModRM with the specified attributes:
156
+ # - adsz (16/32), sz (8/16/32: byte ptr, word ptr, dword ptr)
157
+ # - s, i, b, imm
158
+ # - segment selector override
159
+ def initialize(adsz, sz, s, i, b, imm, seg = nil)
160
+ @adsz, @sz = adsz, sz
161
+ @s, @i = s, i if i
162
+ @b = b if b
163
+ @imm = imm if imm
164
+ @seg = seg if seg
165
+ end
166
+
167
+ # returns the symbolic representation of the ModRM (ie an Indirection)
168
+ # segment selectors are represented as eg "segment_base_fs"
169
+ # not present when same as implicit (ds:edx, ss:esp)
170
+ def symbolic(di=nil)
171
+ p = nil
172
+ p = Expression[p, :+, @b.symbolic(di)] if b
173
+ p = Expression[p, :+, [@s, :*, @i.symbolic(di)]] if i
174
+ p = Expression[p, :+, @imm] if imm
175
+ p = Expression["segment_base_#@seg", :+, p] if seg and seg.val != ((b && (@b.val == 4 || @b.val == 5)) ? 2 : 3)
176
+ Indirection[p.reduce, @sz/8, (di.address if di)]
177
+ end
178
+ end
179
+
180
+
181
+ # Create a new instance of an Ia32 cpu
182
+ # arguments (any order)
183
+ # - size in bits (16, 32) [32]
184
+ # - instruction set (386, 486, pentium...) [latest]
185
+ # - endianness [:little]
186
+ def initialize(*a)
187
+ super()
188
+ @size = (a & [16, 32]).first || 32
189
+ a.delete @size
190
+ @endianness = (a & [:big, :little]).first || :little
191
+ a.delete @endianness
192
+ @family = a.pop || :latest
193
+ raise "Invalid arguments #{a.inspect}" if not a.empty?
194
+ raise "Invalid Ia32 family #{@family.inspect}" if not respond_to?("init_#@family")
195
+ end
196
+
197
+ # wrapper to transparently forward Ia32.new(64) to X86_64.new
198
+ def self.new(*a)
199
+ return X86_64.new(*a) if a.include? 64 and self == Ia32
200
+ super(*a)
201
+ end
202
+
203
+ # initializes the @opcode_list according to @family
204
+ def init_opcode_list
205
+ send("init_#@family")
206
+ @opcode_list
207
+ end
208
+
209
+ # defines some preprocessor macros to say who we are:
210
+ # _M_IX86 = 500, _X86_, __i386__
211
+ # pass any value in nodefine to just call super w/o defining anything of our own
212
+ def tune_prepro(pp, nodefine = false)
213
+ super(pp)
214
+ return if nodefine
215
+ pp.define_weak('_M_IX86', 500)
216
+ pp.define_weak('_X86_')
217
+ pp.define_weak('__i386__')
218
+ end
219
+
220
+ # returns a Reg object if the arg is a valid register (eg 'ax' => Reg.new(0, 16))
221
+ # returns nil if str is invalid
222
+ def str_to_reg(str)
223
+ Reg.from_str(str) if Reg.s_to_i.has_key? str
224
+ end
225
+
226
+ def shortname
227
+ "ia32#{'_16' if @size == 16}#{'_be' if @endianness == :big}"
228
+ end
229
+ end
230
+
231
+ X86 = Ia32
232
+
233
+ end