metasm 1.0.1 → 1.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (235) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +1 -0
  3. data/.hgtags +3 -0
  4. data/Gemfile +1 -0
  5. data/INSTALL +61 -0
  6. data/LICENCE +458 -0
  7. data/README +29 -21
  8. data/Rakefile +10 -0
  9. data/TODO +10 -12
  10. data/doc/code_organisation.txt +2 -0
  11. data/doc/core/DynLdr.txt +247 -0
  12. data/doc/core/ExeFormat.txt +43 -0
  13. data/doc/core/Expression.txt +220 -0
  14. data/doc/core/GNUExports.txt +27 -0
  15. data/doc/core/Ia32.txt +236 -0
  16. data/doc/core/SerialStruct.txt +108 -0
  17. data/doc/core/VirtualString.txt +145 -0
  18. data/doc/core/WindowsExports.txt +61 -0
  19. data/doc/core/index.txt +1 -0
  20. data/doc/style.css +6 -3
  21. data/doc/usage/debugger.txt +327 -0
  22. data/doc/usage/index.txt +1 -0
  23. data/doc/use_cases.txt +2 -2
  24. data/metasm.gemspec +22 -0
  25. data/{lib/metasm.rb → metasm.rb} +11 -3
  26. data/{lib/metasm → metasm}/compile_c.rb +13 -7
  27. data/metasm/cpu/arc.rb +8 -0
  28. data/metasm/cpu/arc/decode.rb +425 -0
  29. data/metasm/cpu/arc/main.rb +191 -0
  30. data/metasm/cpu/arc/opcodes.rb +588 -0
  31. data/{lib/metasm → metasm/cpu}/arm.rb +7 -5
  32. data/{lib/metasm → metasm/cpu}/arm/debug.rb +2 -2
  33. data/{lib/metasm → metasm/cpu}/arm/decode.rb +13 -12
  34. data/{lib/metasm → metasm/cpu}/arm/encode.rb +23 -8
  35. data/{lib/metasm → metasm/cpu}/arm/main.rb +0 -3
  36. data/metasm/cpu/arm/opcodes.rb +324 -0
  37. data/{lib/metasm → metasm/cpu}/arm/parse.rb +25 -13
  38. data/{lib/metasm → metasm/cpu}/arm/render.rb +2 -2
  39. data/metasm/cpu/arm64.rb +15 -0
  40. data/metasm/cpu/arm64/debug.rb +38 -0
  41. data/metasm/cpu/arm64/decode.rb +289 -0
  42. data/metasm/cpu/arm64/encode.rb +41 -0
  43. data/metasm/cpu/arm64/main.rb +105 -0
  44. data/metasm/cpu/arm64/opcodes.rb +232 -0
  45. data/metasm/cpu/arm64/parse.rb +20 -0
  46. data/metasm/cpu/arm64/render.rb +95 -0
  47. data/{lib/metasm/ppc.rb → metasm/cpu/bpf.rb} +2 -4
  48. data/metasm/cpu/bpf/decode.rb +142 -0
  49. data/metasm/cpu/bpf/main.rb +60 -0
  50. data/metasm/cpu/bpf/opcodes.rb +81 -0
  51. data/metasm/cpu/bpf/render.rb +41 -0
  52. data/metasm/cpu/cy16.rb +9 -0
  53. data/metasm/cpu/cy16/decode.rb +253 -0
  54. data/metasm/cpu/cy16/main.rb +63 -0
  55. data/metasm/cpu/cy16/opcodes.rb +78 -0
  56. data/metasm/cpu/cy16/render.rb +41 -0
  57. data/metasm/cpu/dalvik.rb +11 -0
  58. data/{lib/metasm → metasm/cpu}/dalvik/decode.rb +35 -13
  59. data/{lib/metasm → metasm/cpu}/dalvik/main.rb +51 -2
  60. data/{lib/metasm → metasm/cpu}/dalvik/opcodes.rb +19 -11
  61. data/metasm/cpu/ia32.rb +17 -0
  62. data/{lib/metasm → metasm/cpu}/ia32/compile_c.rb +5 -7
  63. data/{lib/metasm → metasm/cpu}/ia32/debug.rb +5 -5
  64. data/{lib/metasm → metasm/cpu}/ia32/decode.rb +246 -59
  65. data/{lib/metasm → metasm/cpu}/ia32/decompile.rb +7 -7
  66. data/{lib/metasm → metasm/cpu}/ia32/encode.rb +19 -13
  67. data/{lib/metasm → metasm/cpu}/ia32/main.rb +51 -8
  68. data/metasm/cpu/ia32/opcodes.rb +1424 -0
  69. data/{lib/metasm → metasm/cpu}/ia32/parse.rb +47 -16
  70. data/{lib/metasm → metasm/cpu}/ia32/render.rb +31 -4
  71. data/metasm/cpu/mips.rb +14 -0
  72. data/{lib/metasm → metasm/cpu}/mips/compile_c.rb +1 -1
  73. data/metasm/cpu/mips/debug.rb +42 -0
  74. data/{lib/metasm → metasm/cpu}/mips/decode.rb +46 -16
  75. data/{lib/metasm → metasm/cpu}/mips/encode.rb +4 -3
  76. data/{lib/metasm → metasm/cpu}/mips/main.rb +11 -4
  77. data/{lib/metasm → metasm/cpu}/mips/opcodes.rb +86 -17
  78. data/{lib/metasm → metasm/cpu}/mips/parse.rb +1 -1
  79. data/{lib/metasm → metasm/cpu}/mips/render.rb +1 -1
  80. data/{lib/metasm/dalvik.rb → metasm/cpu/msp430.rb} +1 -1
  81. data/metasm/cpu/msp430/decode.rb +247 -0
  82. data/metasm/cpu/msp430/main.rb +62 -0
  83. data/metasm/cpu/msp430/opcodes.rb +101 -0
  84. data/{lib/metasm → metasm/cpu}/pic16c/decode.rb +6 -7
  85. data/{lib/metasm → metasm/cpu}/pic16c/main.rb +0 -0
  86. data/{lib/metasm → metasm/cpu}/pic16c/opcodes.rb +1 -1
  87. data/{lib/metasm/mips.rb → metasm/cpu/ppc.rb} +4 -4
  88. data/{lib/metasm → metasm/cpu}/ppc/decode.rb +18 -12
  89. data/{lib/metasm → metasm/cpu}/ppc/decompile.rb +3 -3
  90. data/{lib/metasm → metasm/cpu}/ppc/encode.rb +2 -2
  91. data/{lib/metasm → metasm/cpu}/ppc/main.rb +17 -12
  92. data/{lib/metasm → metasm/cpu}/ppc/opcodes.rb +11 -5
  93. data/metasm/cpu/ppc/parse.rb +55 -0
  94. data/metasm/cpu/python.rb +8 -0
  95. data/metasm/cpu/python/decode.rb +136 -0
  96. data/metasm/cpu/python/main.rb +36 -0
  97. data/metasm/cpu/python/opcodes.rb +180 -0
  98. data/{lib/metasm → metasm/cpu}/sh4.rb +1 -1
  99. data/{lib/metasm → metasm/cpu}/sh4/decode.rb +48 -17
  100. data/{lib/metasm → metasm/cpu}/sh4/main.rb +13 -4
  101. data/{lib/metasm → metasm/cpu}/sh4/opcodes.rb +7 -8
  102. data/metasm/cpu/x86_64.rb +15 -0
  103. data/{lib/metasm → metasm/cpu}/x86_64/compile_c.rb +28 -17
  104. data/{lib/metasm → metasm/cpu}/x86_64/debug.rb +4 -4
  105. data/{lib/metasm → metasm/cpu}/x86_64/decode.rb +57 -15
  106. data/{lib/metasm → metasm/cpu}/x86_64/encode.rb +55 -26
  107. data/{lib/metasm → metasm/cpu}/x86_64/main.rb +14 -6
  108. data/metasm/cpu/x86_64/opcodes.rb +136 -0
  109. data/{lib/metasm → metasm/cpu}/x86_64/parse.rb +10 -2
  110. data/metasm/cpu/x86_64/render.rb +35 -0
  111. data/metasm/cpu/z80.rb +9 -0
  112. data/metasm/cpu/z80/decode.rb +313 -0
  113. data/metasm/cpu/z80/main.rb +67 -0
  114. data/metasm/cpu/z80/opcodes.rb +224 -0
  115. data/metasm/cpu/z80/render.rb +59 -0
  116. data/{lib/metasm/os/main.rb → metasm/debug.rb} +160 -401
  117. data/{lib/metasm → metasm}/decode.rb +35 -4
  118. data/{lib/metasm → metasm}/decompile.rb +15 -16
  119. data/{lib/metasm → metasm}/disassemble.rb +201 -45
  120. data/{lib/metasm → metasm}/disassemble_api.rb +651 -87
  121. data/{lib/metasm → metasm}/dynldr.rb +220 -133
  122. data/{lib/metasm → metasm}/encode.rb +10 -1
  123. data/{lib/metasm → metasm}/exe_format/a_out.rb +9 -6
  124. data/{lib/metasm → metasm}/exe_format/autoexe.rb +1 -0
  125. data/{lib/metasm → metasm}/exe_format/bflt.rb +57 -27
  126. data/{lib/metasm → metasm}/exe_format/coff.rb +11 -3
  127. data/{lib/metasm → metasm}/exe_format/coff_decode.rb +53 -20
  128. data/{lib/metasm → metasm}/exe_format/coff_encode.rb +11 -13
  129. data/{lib/metasm → metasm}/exe_format/dex.rb +13 -5
  130. data/{lib/metasm → metasm}/exe_format/dol.rb +1 -0
  131. data/{lib/metasm → metasm}/exe_format/elf.rb +93 -57
  132. data/{lib/metasm → metasm}/exe_format/elf_decode.rb +143 -34
  133. data/{lib/metasm → metasm}/exe_format/elf_encode.rb +122 -31
  134. data/metasm/exe_format/gb.rb +65 -0
  135. data/metasm/exe_format/javaclass.rb +424 -0
  136. data/{lib/metasm → metasm}/exe_format/macho.rb +204 -16
  137. data/{lib/metasm → metasm}/exe_format/main.rb +26 -3
  138. data/{lib/metasm → metasm}/exe_format/mz.rb +1 -0
  139. data/{lib/metasm → metasm}/exe_format/nds.rb +7 -4
  140. data/{lib/metasm → metasm}/exe_format/pe.rb +71 -8
  141. data/metasm/exe_format/pyc.rb +167 -0
  142. data/{lib/metasm → metasm}/exe_format/serialstruct.rb +67 -14
  143. data/{lib/metasm → metasm}/exe_format/shellcode.rb +7 -3
  144. data/metasm/exe_format/shellcode_rwx.rb +114 -0
  145. data/metasm/exe_format/swf.rb +205 -0
  146. data/{lib/metasm → metasm}/exe_format/xcoff.rb +7 -7
  147. data/metasm/exe_format/zip.rb +335 -0
  148. data/metasm/gui.rb +13 -0
  149. data/{lib/metasm → metasm}/gui/cstruct.rb +35 -41
  150. data/{lib/metasm → metasm}/gui/dasm_coverage.rb +11 -11
  151. data/{lib/metasm → metasm}/gui/dasm_decomp.rb +7 -20
  152. data/{lib/metasm → metasm}/gui/dasm_funcgraph.rb +0 -0
  153. data/metasm/gui/dasm_graph.rb +1695 -0
  154. data/{lib/metasm → metasm}/gui/dasm_hex.rb +12 -8
  155. data/{lib/metasm → metasm}/gui/dasm_listing.rb +43 -28
  156. data/{lib/metasm → metasm}/gui/dasm_main.rb +310 -53
  157. data/{lib/metasm → metasm}/gui/dasm_opcodes.rb +5 -19
  158. data/{lib/metasm → metasm}/gui/debug.rb +93 -27
  159. data/{lib/metasm → metasm}/gui/gtk.rb +162 -40
  160. data/{lib/metasm → metasm}/gui/qt.rb +12 -2
  161. data/{lib/metasm → metasm}/gui/win32.rb +179 -42
  162. data/{lib/metasm → metasm}/gui/x11.rb +59 -59
  163. data/{lib/metasm → metasm}/main.rb +389 -264
  164. data/{lib/metasm/os/remote.rb → metasm/os/gdbremote.rb} +146 -54
  165. data/{lib/metasm → metasm}/os/gnu_exports.rb +1 -1
  166. data/{lib/metasm → metasm}/os/linux.rb +628 -151
  167. data/metasm/os/main.rb +330 -0
  168. data/{lib/metasm → metasm}/os/windows.rb +132 -42
  169. data/{lib/metasm → metasm}/os/windows_exports.rb +141 -0
  170. data/{lib/metasm → metasm}/parse.rb +26 -24
  171. data/{lib/metasm → metasm}/parse_c.rb +221 -116
  172. data/{lib/metasm → metasm}/preprocessor.rb +55 -40
  173. data/{lib/metasm → metasm}/render.rb +14 -38
  174. data/misc/hexdump.rb +2 -1
  175. data/misc/lint.rb +58 -0
  176. data/misc/txt2html.rb +9 -7
  177. data/samples/bindiff.rb +3 -4
  178. data/samples/dasm-plugins/bindiff.rb +15 -0
  179. data/samples/dasm-plugins/bookmark.rb +133 -0
  180. data/samples/dasm-plugins/c_constants.rb +57 -0
  181. data/samples/dasm-plugins/colortheme_solarized.rb +125 -0
  182. data/samples/dasm-plugins/cppobj_funcall.rb +60 -0
  183. data/samples/dasm-plugins/dasm_all.rb +70 -0
  184. data/samples/dasm-plugins/demangle_cpp.rb +31 -0
  185. data/samples/dasm-plugins/deobfuscate.rb +251 -0
  186. data/samples/dasm-plugins/dump_text.rb +35 -0
  187. data/samples/dasm-plugins/export_graph_svg.rb +86 -0
  188. data/samples/dasm-plugins/findgadget.rb +75 -0
  189. data/samples/dasm-plugins/hl_opcode.rb +32 -0
  190. data/samples/dasm-plugins/hotfix_gtk_dbg.rb +19 -0
  191. data/samples/dasm-plugins/imm2off.rb +34 -0
  192. data/samples/dasm-plugins/match_libsigs.rb +93 -0
  193. data/samples/dasm-plugins/patch_file.rb +95 -0
  194. data/samples/dasm-plugins/scanfuncstart.rb +36 -0
  195. data/samples/dasm-plugins/scanxrefs.rb +26 -0
  196. data/samples/dasm-plugins/selfmodify.rb +197 -0
  197. data/samples/dasm-plugins/stringsxrefs.rb +28 -0
  198. data/samples/dasmnavig.rb +1 -1
  199. data/samples/dbg-apihook.rb +24 -9
  200. data/samples/dbg-plugins/heapscan.rb +283 -0
  201. data/samples/dbg-plugins/heapscan/compiled_heapscan_lin.c +155 -0
  202. data/samples/dbg-plugins/heapscan/compiled_heapscan_win.c +128 -0
  203. data/samples/dbg-plugins/heapscan/graphheap.rb +616 -0
  204. data/samples/dbg-plugins/heapscan/heapscan.rb +709 -0
  205. data/samples/dbg-plugins/heapscan/winheap.h +174 -0
  206. data/samples/dbg-plugins/heapscan/winheap7.h +307 -0
  207. data/samples/dbg-plugins/trace_func.rb +214 -0
  208. data/samples/disassemble-gui.rb +35 -5
  209. data/samples/disassemble.rb +31 -6
  210. data/samples/dump_upx.rb +24 -12
  211. data/samples/dynamic_ruby.rb +12 -3
  212. data/samples/exeencode.rb +6 -5
  213. data/samples/factorize-headers-peimports.rb +1 -1
  214. data/samples/lindebug.rb +175 -381
  215. data/samples/metasm-shell.rb +1 -2
  216. data/samples/peldr.rb +2 -2
  217. data/tests/all.rb +1 -1
  218. data/tests/arc.rb +26 -0
  219. data/tests/dynldr.rb +22 -4
  220. data/tests/expression.rb +55 -0
  221. data/tests/graph_layout.rb +285 -0
  222. data/tests/ia32.rb +79 -26
  223. data/tests/mips.rb +9 -2
  224. data/tests/x86_64.rb +66 -18
  225. metadata +330 -218
  226. data/lib/metasm/arm/opcodes.rb +0 -177
  227. data/lib/metasm/gui.rb +0 -23
  228. data/lib/metasm/gui/dasm_graph.rb +0 -1354
  229. data/lib/metasm/ia32.rb +0 -14
  230. data/lib/metasm/ia32/opcodes.rb +0 -873
  231. data/lib/metasm/ppc/parse.rb +0 -52
  232. data/lib/metasm/x86_64.rb +0 -12
  233. data/lib/metasm/x86_64/opcodes.rb +0 -118
  234. data/samples/gdbclient.rb +0 -583
  235. data/samples/rubstop.rb +0 -399
@@ -0,0 +1,220 @@
1
+ Expression
2
+ ==========
3
+
4
+ Metasm uses this class to represent arbitrary symbolic arithmetic expressions, e.g.
5
+ * `42`
6
+ * `eax + 12`
7
+ * `loc_4228h + 4*ebx - 12`
8
+
9
+ These expressions can include `Integers`, `Symbols`, and `Strings`.
10
+
11
+ The symbols and strings represent arbitrary variables, with the convention that
12
+ strings represent fixed quantities (eg addresses, labels), whereas symbols
13
+ represent more variable stuff (eg register values).
14
+
15
+ There is also a special symbol that may be used, `:unknown`, to represent a
16
+ value that is known to be unknown. See the `reduce` section.
17
+
18
+ See also <core/Indirection.txt>.
19
+
20
+ The Expression class holds all methods relative to Integer binary manipulation,
21
+ that is `encoding` and `decoding` from/to a binary blob (see also
22
+ <core/EncodedData.txt>)
23
+
24
+
25
+ Members
26
+ -------
27
+
28
+ Expressions hold exactly 3 members:
29
+ * `lexpr`, the left-hand side of the expression
30
+ * `rexpr`, the right-hand side
31
+ * `op`, the operator
32
+
33
+ `lexpr` and `rexpr` can be any value, most often String, Symbol, Integer or
34
+ Expression. For unary operators, `lexpr` is `nil`.
35
+
36
+ `op` is a Symbol representing the operation.
37
+ It should be from the list:
38
+ * arithmetic: `+ - / * >> << & | ^`
39
+ * boolean: `|| && == != > >= < <=`
40
+ * unary: `+ - ~ !`
41
+
42
+
43
+ Instantiation
44
+ -------------
45
+
46
+ In ruby code, use the class method `[]`. It takes 1 to 3 arguments, `lexpr`,
47
+ `op`, and `rexpr`. `lexpr` defaults to `nil`, and `op` defaults to `:+` (except
48
+ for negative numeric values, which is stored with `op` == `:-` and `rexpr` ==
49
+ abs).
50
+
51
+ If `lexpr` or `rexpr` are an `Array`, the `[]` constructor is called
52
+ recursively, to ease the definition of nested Expressions.
53
+
54
+ Exemples:
55
+
56
+ Expression[42]
57
+ Expression[:eax, :+, 12]
58
+ Expression[:-, 'my_var']
59
+ Expression[[:eax, :-, 4], :*, [:ebx, :+, 0x12]]
60
+
61
+ The Expression class also includes a parser, to allow creating an expression
62
+ from a string. `parse_string!` will create an Expression and update its
63
+ argument to point after the last part read successfully into the expr.
64
+ The parser handles standard C operator precedence.
65
+
66
+ str = "1 + var"
67
+ Expression.parse_string!(str) # => Expression[1, :+, "var"]
68
+ str = "42 bla"
69
+ Expression.parse_string!(str) # => Expression[42]
70
+ str # => "bla"
71
+
72
+ Use `parse_string` without the ! to parse the string without updating it.
73
+
74
+ External variables
75
+ ------------------
76
+
77
+ The `externals` method will return all non-integer members of the Expression.
78
+
79
+ Expression[[:eax, :+, 42], :-, "bla"].externals # => [:eax, "bla"]
80
+
81
+
82
+ Pattern matching
83
+ ----------------
84
+
85
+ The `match` method allows to check an Expression against a pattern without
86
+ having to check individual members. The pattern should be an Expression,
87
+ whose variable members should be Strings or Symbols, which are also passed as
88
+ arguments to the match function. On successful match, the correspondance
89
+ between variable patterns and their actual value matched is returned as a Hash.
90
+
91
+ Expression[1, :+, 2].match(Expression['var', :+, 2], 'var')
92
+ # => { 'var' => 1 }
93
+ Expression[1, :+, 2].match(Expression['var', :+, 'var'], 'var')
94
+ # => nil
95
+ Expression[1, :+, 1].match(Expression['var', :op, 'var'], 'var', :op)
96
+ # => { 'var' => 1, :op => :+ }
97
+
98
+
99
+ Reduction
100
+ ---------
101
+
102
+ Metasm Expressions include a basic symbolic computation engine, that allows
103
+ some simple transformations of the Expression. The reduction will also
104
+ compute numerical values whenever possible. If the final result is fully
105
+ numeric, an Integer is returned, otherwise a new Expression is returned.
106
+
107
+ In this context, the special value `:unknown` has a particular meaning.
108
+
109
+ Expression[1, :+, 2].reduce
110
+ # => 3
111
+ Expression[:eax, :+, [:ebx, :-, :eax]].reduce
112
+ # => Expression[:ebx]
113
+ Expression[1, :+, [:eax, :+, 2]].reduce
114
+ # => Expression[:eax, :+, 3]
115
+ Expression[:unknown, :+, :eax].reduce
116
+ # => Expression[:unknown]
117
+
118
+ The symbolic engine operates mostly on addition/substractions, and
119
+ no-operations (eg shift by 0). It also handles some boolean composition.
120
+
121
+ The detail can be found in the #replace_rec method body, in `metasm/main.rb`.
122
+
123
+ The reduce method can also take a block argument, which will be called at
124
+ every step in the recursive reduction, for custom operations. If the block
125
+ returns nil, the result is unchanged, otherwise the new value is used as
126
+ replacement. For exemple, if you operate on 32-bit values and want to get rid
127
+ of `bla & 0xffffffff`, use
128
+
129
+ some_expr.reduce { |e|
130
+ if e.kind_of?(Expression) and e.op == :& and e.rexpr == 0xffff_ffff
131
+ e.lexpr
132
+ end
133
+ }
134
+
135
+
136
+ Binding
137
+ -------
138
+
139
+ An expression involving variable externals can be bound using a Hash. This will
140
+ replace any occurence of a key of the Hash by its value in the expression
141
+ members. The `bind` method will return a new Expression with the substitutions,
142
+ and the `bind!` method will update the Expression in-place.
143
+
144
+ Expression['val', :+, 'stuff'].bind('val' => 4, 'stuff' => 8).reduce
145
+ # => 12
146
+ Expression[:eax, :+, :ebx].bind(:ebx => 42)
147
+ # Expression[:eax, :+, 42]
148
+ Expression[:eax, :+, :ebx].bind(:ebx => :ecx)
149
+ # Expression[:eax, :+, :ecx]
150
+
151
+ You can use Expressions as keys, but they will only be used on perfect matches.
152
+
153
+
154
+ Binary packing
155
+ --------------
156
+
157
+ Encoding
158
+ ########
159
+
160
+ The `encode` method will generate an EncodedData holding the expression, either
161
+ as binary if it can reduce to an integral value, or as a relocation.
162
+ The arguments are the relocation type and the endianness, plus an optional
163
+ backtrace (to notify the user where an overflowing relocation comes from).
164
+
165
+ The `encode_imm` class method will generate a raw String for a given
166
+ integral value, a type and an endianness.
167
+ The type can be given as a byte size.
168
+
169
+ Expression.encode_imm(42, :u8, :little) # => "*"
170
+ Expression.encode_imm(42, 1, :big) # => "*"
171
+ Expression.encode_imm(256, :u8, :little) # raise EncodeError
172
+
173
+ On overflows (value cannot be encoded in the bit field) an EncodeError
174
+ exception is raised.
175
+
176
+ Decoding
177
+ ########
178
+
179
+ The `decode_imm` class method can be used to read a binary value into an
180
+ Integer, with an optional offset into the binary string.
181
+
182
+ Expression.decode_imm("*", :u8, :little) # => 42
183
+ Expression.decode_imm("bla\xfe\xff", :i16, :little, 3) # => -2
184
+
185
+
186
+ Arithmetic coercion
187
+ -------------------
188
+
189
+ Expression implement the `:+` and `:-` ruby methods, so that `expr + 4`
190
+ works as expected. The result is reduced.
191
+
192
+
193
+ Integer methods
194
+ ---------------
195
+
196
+ The Expression class offers a few methods to work with integers.
197
+
198
+ make_signed
199
+ ###########
200
+
201
+ `make_signed` will convert a raw unsigned to its equivalent signed value,
202
+ given a bit size.
203
+
204
+ Expression.make_signed(1, 16) # => 1
205
+ Expression.make_signed(0xffff, 16) # => -1
206
+
207
+
208
+ in_range?
209
+ #########
210
+
211
+ `in_range?` can check if a given numeric value would fit in a particular
212
+ <core/Relocation.txt> field. The method can return true or false if it
213
+ fits or not, or `nil` if the result is unknown (eg the expr has no numeric
214
+ value).
215
+
216
+ Expression.in_range?(42, :i8) # => true
217
+ Expression.in_range?(128, :i8) # => false
218
+ Expression.in_range?(-128, :i8) # => true
219
+ Expression.in_range?(Expression['bla'], :u32) # => nil
220
+
@@ -0,0 +1,27 @@
1
+ GNUExports
2
+ ==========
3
+
4
+ This class is defined in `metasm/os/gnu_exports.rb`
5
+
6
+ It defines an `EXPORT` constant, a Hash, whose keys
7
+ are the standard linux API symbol names, and values
8
+ are the library name where you can find this symbol.
9
+
10
+ The equivallent for windows is <core/WindowsExports.txt>
11
+
12
+ Usage
13
+ -----
14
+
15
+ The main usage of this class is the automatic generation
16
+ of the <core/ELF.txt> dynamic tag `DT_NEEDED` from the
17
+ external symbols referenced by a binary during compilation.
18
+
19
+ This is done in the `automagic_symbols` method.
20
+
21
+ Symbols
22
+ -------
23
+
24
+ The current version holds the symbols of the debian
25
+ glibc, from `libc.so.6` and `libdl.so.2`.
26
+
27
+ Ruby symbols are also defined, from `libruby1.8.so.1.8`.
@@ -0,0 +1,236 @@
1
+ Ia32
2
+ ====
3
+
4
+ The Ia32 architecture, aka *Intel_x86*, is the most advanced among the
5
+ architectures implemented in the framework. It is a subclass of the
6
+ generic <core/CPU.txt>.
7
+
8
+ It can handle binary code for the 16 and 32bits modes of the processor.
9
+
10
+ It is a superclass for the <core/X86_64.txt> object, a distinct processor
11
+ that handles 64-bit *long_mode* (aka *x64*, *amd64*, *em64t*)
12
+
13
+ The CPU `shortname` is `ia32` (`ia32_16` in 16-bit mode, and a `_be` suffix
14
+ if bigendian)
15
+
16
+ Opcodes
17
+ -------
18
+
19
+ The opcodes list can be customized to match that available on a specific
20
+ version of the processor. The possibilities are:
21
+
22
+ * 386_common
23
+ * 386
24
+ * 387
25
+ * 486
26
+ * pentium
27
+ * p6
28
+ * 3dnow
29
+ * sse
30
+ * sse2
31
+ * sse3
32
+ * vmx
33
+ * sse42
34
+
35
+ Most opcodes are available in the framework, with the notable exception of:
36
+
37
+ * most sse2 simd instructions
38
+ * the AVX instructions
39
+ * amd-specific instructions
40
+
41
+ The `386_common` family is the subset of 386 instruction that are most
42
+ commonly found in standard usermode programs (no `in`/`out`/bcd
43
+ arithmetic/far call/etc).
44
+ This can be useful when manipulating stuff that in not known to be i386
45
+ binary code.
46
+
47
+
48
+ Initialization
49
+ --------------
50
+
51
+ An Ia32 <core/CPU.txt> object can be created using the following code:
52
+
53
+ Metasm::Ia32.new
54
+
55
+ The `X86` alias may be used in place of `Ia32`.
56
+
57
+ The constructor accepts optional arguments to specify the CPU size, the
58
+ opcode family, and the endianness of the processor. The arguments can
59
+ be given in any order. For exemple,
60
+
61
+ Metasm::Ia32.new(16, 'pentium', :big)
62
+
63
+ will create a 16-bit mode cpu, with opcodes up to the 'pentium' CPU family,
64
+ in big-endian mode.
65
+
66
+ The Ia32 initializer has the convenience feature that it will create an
67
+ X86_64 instance when given the 64 bit size (e.g. `Ia32.new(64)` returns an
68
+ X86_64 instance)
69
+
70
+
71
+ Assembler
72
+ ---------
73
+
74
+ The parser handles only Intel-style asm syntax, *e.g.*
75
+
76
+ some_label:
77
+ mov eax, 10h
78
+ mov ecx, fs:[eax+16]
79
+ push dword ptr fs:[1Ch]
80
+ call ecx
81
+ test al, al
82
+ jnz some_label
83
+ ret
84
+ fmulp ST(4)
85
+
86
+
87
+ Instruction arguments
88
+ #####################
89
+
90
+ The parser recognizes standard registers, such as
91
+
92
+ * `eax`
93
+ * `ah`
94
+ * `mm4` (mmx 64bit register)
95
+ * `xmm2` (xmm 128bit register)
96
+ * `ST` (current top of the FPU stack)
97
+ * `ST(3)` (FPU reg nr.3)
98
+ * `cs` (segment register)
99
+ * `dr3` (debug register)
100
+ * `cr2` (control register)
101
+
102
+ It also supports inexistant registers, such as
103
+
104
+ * `cr7`
105
+ * `dr4`
106
+ * `segr6` (segment register nr.6)
107
+
108
+ The indirections are called `ModRM`. They take the form:
109
+
110
+ * `[eax]` (memory pointed by `eax`)
111
+ * `byte ptr [eax]` (1-byte memory pointed by `eax`)
112
+ * `byte [eax]` (same as previous)
113
+ * `fs:[eax]` (offset `eax` from the base of the `fs` segment)
114
+ * `[fs:eax]` (same as previous)
115
+
116
+ The pointer itself can be:
117
+
118
+ * `[eax]` (any register)
119
+ * `[eax+12]` (base + numeric offset)
120
+ * `[eax+ebx]` (base + register index)
121
+ * `[eax + 4*ebx]` (base + 1,2,4 or 8 * index)
122
+ * `[eax + 2*ebx + 42]` (both)
123
+
124
+ Note that the form base + s*index cannot use `esp` as index with s != 1.
125
+
126
+ For indirection sizes, the size is taken from the size of other arguments
127
+ if it is not specified (eg `mov eax, [42]` will be 4 bytes, and `mov al, [42]`
128
+ will be 1). The explicit size specifier can be:
129
+
130
+ * `byte` (8bits)
131
+ * `word` (16)
132
+ * `dword` (32)
133
+ * `qword` (64)
134
+ * `oword` (128)
135
+ * `_12bits` (12, arbitrary numbers can be used)
136
+
137
+
138
+ Parser commands
139
+ ###############
140
+
141
+ The following commands are recognized in an asm source:
142
+
143
+ * `.mode`
144
+ * `.bits`
145
+
146
+ They are synonymous, and serve to change the mode of the processor to either
147
+ 16 or 32bits.
148
+
149
+ They should be the first instruction in the source, changing the mode during
150
+ parsing is not supported. This would change only the mode for the next
151
+ instructions to be parsed, and for all instructions (incl. those already parsed
152
+ at this point) when encoding, which is likely **not** what you want. See the
153
+ `codeXX` prefixes.
154
+
155
+ Note that changing the CPU size once it was created may have bad side-effects.
156
+ For exemple, some preprocessor macros may already have been generated according
157
+ to the original size of the CPU and will be incorrect from this point on.
158
+
159
+
160
+ Prefixes
161
+ ########
162
+
163
+ The following prefixes are handled:
164
+
165
+ * `lock`
166
+ * `rep`, `repz`, `repnz`, `repe`, `repne`
167
+ * `code16`, `code32`
168
+ * `hintjmp`, `hintnojmp` (aliases: `ht`, `hnt`)
169
+ * `seg_cs` ... `seg_gs`
170
+
171
+ The `repXX` prefixes are for string operations (`movsd` etc), but will be set
172
+ for any opcode. Only the last of the family will be encoded.
173
+
174
+ The `code16` will generate instructions to be run on a CPU in 16bit mode,
175
+ independantly of the global CPU mode. For exemple,
176
+
177
+ code16 mov ax, 42h
178
+
179
+ will generate `"\xb8\x42\x00"` (no opsz override prefix), and will decode or
180
+ run incorrectly on an 32bit CPU.
181
+
182
+ The `hintjmp` prefix is useful for conditional jumps to give a hint to the
183
+ CPU branch predictor as to whether the branch is take or not.
184
+
185
+ The `seg_cs` prefix family is used to declare arbitrary segment override.
186
+ These should be used only in instructions with no ModRM argument.
187
+
188
+
189
+ Suffixes
190
+ ########
191
+
192
+ The parser implements a specific feature to allow the differenciation of
193
+ otherwise ambiguous opcodes, in the form of instruction suffixes.
194
+
195
+ By default, the assembler will generate the shortest encoding for a given
196
+ instruction. To force encoding of another form you can add a specific
197
+ suffix to the instruction. In general, metasm will use e.g. register sizes
198
+ when possible to avoid this kind of situations, but with immediate-only
199
+ displacement this is necessary.
200
+
201
+ or.a16 [1234h], eax ; use a 16-bit address
202
+ or [bx], eax ; use a 16-bit address (implicit from the bx register)
203
+ or eax, 1 ; "\x83\xc8\x01"
204
+ or.i8 eax, 1 ; "\x83\xc8\x01" (same, shortest encoding)
205
+ or.i eax, 1 ; "\x81\xc8\x01\x00\x00\x00" (constant stored in a 32bit field)
206
+ movsd.a16 ; use a 16-byte address-size override prefix (copy dword [si] to [di])
207
+ push.i16 42h ; push a 16-bit integer
208
+
209
+ The suffixes are available as follow:
210
+
211
+ * if the opcode takes an integer argument that can be encoded as either a 8bits or <cpu size>bits, the `.i` and `.i8` variants are created
212
+ * if the opcode takes a memory indirection as argument, or is a string operation (`movsd`, `scasb`, etc) the `.a16` and `.a32` variants are created
213
+ * if the opcode takes a single integer argument, a far pointer, or is a return instruction, the `.i16` and `.i32` variants are created
214
+
215
+
216
+ C parser
217
+ --------
218
+
219
+ The Ia32 C parser will initialize the type sizes with the `ilp32` memory
220
+ model, which is:
221
+
222
+ * short = 16bits
223
+ * int = 32bits
224
+ * long = 32bits
225
+ * long long = 64bits
226
+ * pointer = 32bits
227
+
228
+ In 16bit mode, the model is `ilp16`, which may not be correct (the 16bits
229
+ compiler has not been tested anyway).
230
+
231
+ The following macros are defined (in the asm preprocessor too)
232
+
233
+ * `_M_IX86` = 500
234
+ * `_X86_`
235
+ * `__i386__`
236
+