metasm 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (235) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +1 -0
  3. data/.hgtags +3 -0
  4. data/Gemfile +1 -0
  5. data/INSTALL +61 -0
  6. data/LICENCE +458 -0
  7. data/README +29 -21
  8. data/Rakefile +10 -0
  9. data/TODO +10 -12
  10. data/doc/code_organisation.txt +2 -0
  11. data/doc/core/DynLdr.txt +247 -0
  12. data/doc/core/ExeFormat.txt +43 -0
  13. data/doc/core/Expression.txt +220 -0
  14. data/doc/core/GNUExports.txt +27 -0
  15. data/doc/core/Ia32.txt +236 -0
  16. data/doc/core/SerialStruct.txt +108 -0
  17. data/doc/core/VirtualString.txt +145 -0
  18. data/doc/core/WindowsExports.txt +61 -0
  19. data/doc/core/index.txt +1 -0
  20. data/doc/style.css +6 -3
  21. data/doc/usage/debugger.txt +327 -0
  22. data/doc/usage/index.txt +1 -0
  23. data/doc/use_cases.txt +2 -2
  24. data/metasm.gemspec +22 -0
  25. data/{lib/metasm.rb → metasm.rb} +11 -3
  26. data/{lib/metasm → metasm}/compile_c.rb +13 -7
  27. data/metasm/cpu/arc.rb +8 -0
  28. data/metasm/cpu/arc/decode.rb +425 -0
  29. data/metasm/cpu/arc/main.rb +191 -0
  30. data/metasm/cpu/arc/opcodes.rb +588 -0
  31. data/{lib/metasm → metasm/cpu}/arm.rb +7 -5
  32. data/{lib/metasm → metasm/cpu}/arm/debug.rb +2 -2
  33. data/{lib/metasm → metasm/cpu}/arm/decode.rb +13 -12
  34. data/{lib/metasm → metasm/cpu}/arm/encode.rb +23 -8
  35. data/{lib/metasm → metasm/cpu}/arm/main.rb +0 -3
  36. data/metasm/cpu/arm/opcodes.rb +324 -0
  37. data/{lib/metasm → metasm/cpu}/arm/parse.rb +25 -13
  38. data/{lib/metasm → metasm/cpu}/arm/render.rb +2 -2
  39. data/metasm/cpu/arm64.rb +15 -0
  40. data/metasm/cpu/arm64/debug.rb +38 -0
  41. data/metasm/cpu/arm64/decode.rb +289 -0
  42. data/metasm/cpu/arm64/encode.rb +41 -0
  43. data/metasm/cpu/arm64/main.rb +105 -0
  44. data/metasm/cpu/arm64/opcodes.rb +232 -0
  45. data/metasm/cpu/arm64/parse.rb +20 -0
  46. data/metasm/cpu/arm64/render.rb +95 -0
  47. data/{lib/metasm/ppc.rb → metasm/cpu/bpf.rb} +2 -4
  48. data/metasm/cpu/bpf/decode.rb +142 -0
  49. data/metasm/cpu/bpf/main.rb +60 -0
  50. data/metasm/cpu/bpf/opcodes.rb +81 -0
  51. data/metasm/cpu/bpf/render.rb +41 -0
  52. data/metasm/cpu/cy16.rb +9 -0
  53. data/metasm/cpu/cy16/decode.rb +253 -0
  54. data/metasm/cpu/cy16/main.rb +63 -0
  55. data/metasm/cpu/cy16/opcodes.rb +78 -0
  56. data/metasm/cpu/cy16/render.rb +41 -0
  57. data/metasm/cpu/dalvik.rb +11 -0
  58. data/{lib/metasm → metasm/cpu}/dalvik/decode.rb +35 -13
  59. data/{lib/metasm → metasm/cpu}/dalvik/main.rb +51 -2
  60. data/{lib/metasm → metasm/cpu}/dalvik/opcodes.rb +19 -11
  61. data/metasm/cpu/ia32.rb +17 -0
  62. data/{lib/metasm → metasm/cpu}/ia32/compile_c.rb +5 -7
  63. data/{lib/metasm → metasm/cpu}/ia32/debug.rb +5 -5
  64. data/{lib/metasm → metasm/cpu}/ia32/decode.rb +246 -59
  65. data/{lib/metasm → metasm/cpu}/ia32/decompile.rb +7 -7
  66. data/{lib/metasm → metasm/cpu}/ia32/encode.rb +19 -13
  67. data/{lib/metasm → metasm/cpu}/ia32/main.rb +51 -8
  68. data/metasm/cpu/ia32/opcodes.rb +1424 -0
  69. data/{lib/metasm → metasm/cpu}/ia32/parse.rb +47 -16
  70. data/{lib/metasm → metasm/cpu}/ia32/render.rb +31 -4
  71. data/metasm/cpu/mips.rb +14 -0
  72. data/{lib/metasm → metasm/cpu}/mips/compile_c.rb +1 -1
  73. data/metasm/cpu/mips/debug.rb +42 -0
  74. data/{lib/metasm → metasm/cpu}/mips/decode.rb +46 -16
  75. data/{lib/metasm → metasm/cpu}/mips/encode.rb +4 -3
  76. data/{lib/metasm → metasm/cpu}/mips/main.rb +11 -4
  77. data/{lib/metasm → metasm/cpu}/mips/opcodes.rb +86 -17
  78. data/{lib/metasm → metasm/cpu}/mips/parse.rb +1 -1
  79. data/{lib/metasm → metasm/cpu}/mips/render.rb +1 -1
  80. data/{lib/metasm/dalvik.rb → metasm/cpu/msp430.rb} +1 -1
  81. data/metasm/cpu/msp430/decode.rb +247 -0
  82. data/metasm/cpu/msp430/main.rb +62 -0
  83. data/metasm/cpu/msp430/opcodes.rb +101 -0
  84. data/{lib/metasm → metasm/cpu}/pic16c/decode.rb +6 -7
  85. data/{lib/metasm → metasm/cpu}/pic16c/main.rb +0 -0
  86. data/{lib/metasm → metasm/cpu}/pic16c/opcodes.rb +1 -1
  87. data/{lib/metasm/mips.rb → metasm/cpu/ppc.rb} +4 -4
  88. data/{lib/metasm → metasm/cpu}/ppc/decode.rb +18 -12
  89. data/{lib/metasm → metasm/cpu}/ppc/decompile.rb +3 -3
  90. data/{lib/metasm → metasm/cpu}/ppc/encode.rb +2 -2
  91. data/{lib/metasm → metasm/cpu}/ppc/main.rb +17 -12
  92. data/{lib/metasm → metasm/cpu}/ppc/opcodes.rb +11 -5
  93. data/metasm/cpu/ppc/parse.rb +55 -0
  94. data/metasm/cpu/python.rb +8 -0
  95. data/metasm/cpu/python/decode.rb +136 -0
  96. data/metasm/cpu/python/main.rb +36 -0
  97. data/metasm/cpu/python/opcodes.rb +180 -0
  98. data/{lib/metasm → metasm/cpu}/sh4.rb +1 -1
  99. data/{lib/metasm → metasm/cpu}/sh4/decode.rb +48 -17
  100. data/{lib/metasm → metasm/cpu}/sh4/main.rb +13 -4
  101. data/{lib/metasm → metasm/cpu}/sh4/opcodes.rb +7 -8
  102. data/metasm/cpu/x86_64.rb +15 -0
  103. data/{lib/metasm → metasm/cpu}/x86_64/compile_c.rb +28 -17
  104. data/{lib/metasm → metasm/cpu}/x86_64/debug.rb +4 -4
  105. data/{lib/metasm → metasm/cpu}/x86_64/decode.rb +57 -15
  106. data/{lib/metasm → metasm/cpu}/x86_64/encode.rb +55 -26
  107. data/{lib/metasm → metasm/cpu}/x86_64/main.rb +14 -6
  108. data/metasm/cpu/x86_64/opcodes.rb +136 -0
  109. data/{lib/metasm → metasm/cpu}/x86_64/parse.rb +10 -2
  110. data/metasm/cpu/x86_64/render.rb +35 -0
  111. data/metasm/cpu/z80.rb +9 -0
  112. data/metasm/cpu/z80/decode.rb +313 -0
  113. data/metasm/cpu/z80/main.rb +67 -0
  114. data/metasm/cpu/z80/opcodes.rb +224 -0
  115. data/metasm/cpu/z80/render.rb +59 -0
  116. data/{lib/metasm/os/main.rb → metasm/debug.rb} +160 -401
  117. data/{lib/metasm → metasm}/decode.rb +35 -4
  118. data/{lib/metasm → metasm}/decompile.rb +15 -16
  119. data/{lib/metasm → metasm}/disassemble.rb +201 -45
  120. data/{lib/metasm → metasm}/disassemble_api.rb +651 -87
  121. data/{lib/metasm → metasm}/dynldr.rb +220 -133
  122. data/{lib/metasm → metasm}/encode.rb +10 -1
  123. data/{lib/metasm → metasm}/exe_format/a_out.rb +9 -6
  124. data/{lib/metasm → metasm}/exe_format/autoexe.rb +1 -0
  125. data/{lib/metasm → metasm}/exe_format/bflt.rb +57 -27
  126. data/{lib/metasm → metasm}/exe_format/coff.rb +11 -3
  127. data/{lib/metasm → metasm}/exe_format/coff_decode.rb +53 -20
  128. data/{lib/metasm → metasm}/exe_format/coff_encode.rb +11 -13
  129. data/{lib/metasm → metasm}/exe_format/dex.rb +13 -5
  130. data/{lib/metasm → metasm}/exe_format/dol.rb +1 -0
  131. data/{lib/metasm → metasm}/exe_format/elf.rb +93 -57
  132. data/{lib/metasm → metasm}/exe_format/elf_decode.rb +143 -34
  133. data/{lib/metasm → metasm}/exe_format/elf_encode.rb +122 -31
  134. data/metasm/exe_format/gb.rb +65 -0
  135. data/metasm/exe_format/javaclass.rb +424 -0
  136. data/{lib/metasm → metasm}/exe_format/macho.rb +204 -16
  137. data/{lib/metasm → metasm}/exe_format/main.rb +26 -3
  138. data/{lib/metasm → metasm}/exe_format/mz.rb +1 -0
  139. data/{lib/metasm → metasm}/exe_format/nds.rb +7 -4
  140. data/{lib/metasm → metasm}/exe_format/pe.rb +71 -8
  141. data/metasm/exe_format/pyc.rb +167 -0
  142. data/{lib/metasm → metasm}/exe_format/serialstruct.rb +67 -14
  143. data/{lib/metasm → metasm}/exe_format/shellcode.rb +7 -3
  144. data/metasm/exe_format/shellcode_rwx.rb +114 -0
  145. data/metasm/exe_format/swf.rb +205 -0
  146. data/{lib/metasm → metasm}/exe_format/xcoff.rb +7 -7
  147. data/metasm/exe_format/zip.rb +335 -0
  148. data/metasm/gui.rb +13 -0
  149. data/{lib/metasm → metasm}/gui/cstruct.rb +35 -41
  150. data/{lib/metasm → metasm}/gui/dasm_coverage.rb +11 -11
  151. data/{lib/metasm → metasm}/gui/dasm_decomp.rb +7 -20
  152. data/{lib/metasm → metasm}/gui/dasm_funcgraph.rb +0 -0
  153. data/metasm/gui/dasm_graph.rb +1695 -0
  154. data/{lib/metasm → metasm}/gui/dasm_hex.rb +12 -8
  155. data/{lib/metasm → metasm}/gui/dasm_listing.rb +43 -28
  156. data/{lib/metasm → metasm}/gui/dasm_main.rb +310 -53
  157. data/{lib/metasm → metasm}/gui/dasm_opcodes.rb +5 -19
  158. data/{lib/metasm → metasm}/gui/debug.rb +93 -27
  159. data/{lib/metasm → metasm}/gui/gtk.rb +162 -40
  160. data/{lib/metasm → metasm}/gui/qt.rb +12 -2
  161. data/{lib/metasm → metasm}/gui/win32.rb +179 -42
  162. data/{lib/metasm → metasm}/gui/x11.rb +59 -59
  163. data/{lib/metasm → metasm}/main.rb +389 -264
  164. data/{lib/metasm/os/remote.rb → metasm/os/gdbremote.rb} +146 -54
  165. data/{lib/metasm → metasm}/os/gnu_exports.rb +1 -1
  166. data/{lib/metasm → metasm}/os/linux.rb +628 -151
  167. data/metasm/os/main.rb +330 -0
  168. data/{lib/metasm → metasm}/os/windows.rb +132 -42
  169. data/{lib/metasm → metasm}/os/windows_exports.rb +141 -0
  170. data/{lib/metasm → metasm}/parse.rb +26 -24
  171. data/{lib/metasm → metasm}/parse_c.rb +221 -116
  172. data/{lib/metasm → metasm}/preprocessor.rb +55 -40
  173. data/{lib/metasm → metasm}/render.rb +14 -38
  174. data/misc/hexdump.rb +2 -1
  175. data/misc/lint.rb +58 -0
  176. data/misc/txt2html.rb +9 -7
  177. data/samples/bindiff.rb +3 -4
  178. data/samples/dasm-plugins/bindiff.rb +15 -0
  179. data/samples/dasm-plugins/bookmark.rb +133 -0
  180. data/samples/dasm-plugins/c_constants.rb +57 -0
  181. data/samples/dasm-plugins/colortheme_solarized.rb +125 -0
  182. data/samples/dasm-plugins/cppobj_funcall.rb +60 -0
  183. data/samples/dasm-plugins/dasm_all.rb +70 -0
  184. data/samples/dasm-plugins/demangle_cpp.rb +31 -0
  185. data/samples/dasm-plugins/deobfuscate.rb +251 -0
  186. data/samples/dasm-plugins/dump_text.rb +35 -0
  187. data/samples/dasm-plugins/export_graph_svg.rb +86 -0
  188. data/samples/dasm-plugins/findgadget.rb +75 -0
  189. data/samples/dasm-plugins/hl_opcode.rb +32 -0
  190. data/samples/dasm-plugins/hotfix_gtk_dbg.rb +19 -0
  191. data/samples/dasm-plugins/imm2off.rb +34 -0
  192. data/samples/dasm-plugins/match_libsigs.rb +93 -0
  193. data/samples/dasm-plugins/patch_file.rb +95 -0
  194. data/samples/dasm-plugins/scanfuncstart.rb +36 -0
  195. data/samples/dasm-plugins/scanxrefs.rb +26 -0
  196. data/samples/dasm-plugins/selfmodify.rb +197 -0
  197. data/samples/dasm-plugins/stringsxrefs.rb +28 -0
  198. data/samples/dasmnavig.rb +1 -1
  199. data/samples/dbg-apihook.rb +24 -9
  200. data/samples/dbg-plugins/heapscan.rb +283 -0
  201. data/samples/dbg-plugins/heapscan/compiled_heapscan_lin.c +155 -0
  202. data/samples/dbg-plugins/heapscan/compiled_heapscan_win.c +128 -0
  203. data/samples/dbg-plugins/heapscan/graphheap.rb +616 -0
  204. data/samples/dbg-plugins/heapscan/heapscan.rb +709 -0
  205. data/samples/dbg-plugins/heapscan/winheap.h +174 -0
  206. data/samples/dbg-plugins/heapscan/winheap7.h +307 -0
  207. data/samples/dbg-plugins/trace_func.rb +214 -0
  208. data/samples/disassemble-gui.rb +35 -5
  209. data/samples/disassemble.rb +31 -6
  210. data/samples/dump_upx.rb +24 -12
  211. data/samples/dynamic_ruby.rb +12 -3
  212. data/samples/exeencode.rb +6 -5
  213. data/samples/factorize-headers-peimports.rb +1 -1
  214. data/samples/lindebug.rb +175 -381
  215. data/samples/metasm-shell.rb +1 -2
  216. data/samples/peldr.rb +2 -2
  217. data/tests/all.rb +1 -1
  218. data/tests/arc.rb +26 -0
  219. data/tests/dynldr.rb +22 -4
  220. data/tests/expression.rb +55 -0
  221. data/tests/graph_layout.rb +285 -0
  222. data/tests/ia32.rb +79 -26
  223. data/tests/mips.rb +9 -2
  224. data/tests/x86_64.rb +66 -18
  225. metadata +330 -218
  226. data/lib/metasm/arm/opcodes.rb +0 -177
  227. data/lib/metasm/gui.rb +0 -23
  228. data/lib/metasm/gui/dasm_graph.rb +0 -1354
  229. data/lib/metasm/ia32.rb +0 -14
  230. data/lib/metasm/ia32/opcodes.rb +0 -873
  231. data/lib/metasm/ppc/parse.rb +0 -52
  232. data/lib/metasm/x86_64.rb +0 -12
  233. data/lib/metasm/x86_64/opcodes.rb +0 -118
  234. data/samples/gdbclient.rb +0 -583
  235. data/samples/rubstop.rb +0 -399
@@ -0,0 +1,220 @@
1
+ Expression
2
+ ==========
3
+
4
+ Metasm uses this class to represent arbitrary symbolic arithmetic expressions, e.g.
5
+ * `42`
6
+ * `eax + 12`
7
+ * `loc_4228h + 4*ebx - 12`
8
+
9
+ These expressions can include `Integers`, `Symbols`, and `Strings`.
10
+
11
+ The symbols and strings represent arbitrary variables, with the convention that
12
+ strings represent fixed quantities (eg addresses, labels), whereas symbols
13
+ represent more variable stuff (eg register values).
14
+
15
+ There is also a special symbol that may be used, `:unknown`, to represent a
16
+ value that is known to be unknown. See the `reduce` section.
17
+
18
+ See also <core/Indirection.txt>.
19
+
20
+ The Expression class holds all methods relative to Integer binary manipulation,
21
+ that is `encoding` and `decoding` from/to a binary blob (see also
22
+ <core/EncodedData.txt>)
23
+
24
+
25
+ Members
26
+ -------
27
+
28
+ Expressions hold exactly 3 members:
29
+ * `lexpr`, the left-hand side of the expression
30
+ * `rexpr`, the right-hand side
31
+ * `op`, the operator
32
+
33
+ `lexpr` and `rexpr` can be any value, most often String, Symbol, Integer or
34
+ Expression. For unary operators, `lexpr` is `nil`.
35
+
36
+ `op` is a Symbol representing the operation.
37
+ It should be from the list:
38
+ * arithmetic: `+ - / * >> << & | ^`
39
+ * boolean: `|| && == != > >= < <=`
40
+ * unary: `+ - ~ !`
41
+
42
+
43
+ Instantiation
44
+ -------------
45
+
46
+ In ruby code, use the class method `[]`. It takes 1 to 3 arguments, `lexpr`,
47
+ `op`, and `rexpr`. `lexpr` defaults to `nil`, and `op` defaults to `:+` (except
48
+ for negative numeric values, which is stored with `op` == `:-` and `rexpr` ==
49
+ abs).
50
+
51
+ If `lexpr` or `rexpr` are an `Array`, the `[]` constructor is called
52
+ recursively, to ease the definition of nested Expressions.
53
+
54
+ Exemples:
55
+
56
+ Expression[42]
57
+ Expression[:eax, :+, 12]
58
+ Expression[:-, 'my_var']
59
+ Expression[[:eax, :-, 4], :*, [:ebx, :+, 0x12]]
60
+
61
+ The Expression class also includes a parser, to allow creating an expression
62
+ from a string. `parse_string!` will create an Expression and update its
63
+ argument to point after the last part read successfully into the expr.
64
+ The parser handles standard C operator precedence.
65
+
66
+ str = "1 + var"
67
+ Expression.parse_string!(str) # => Expression[1, :+, "var"]
68
+ str = "42 bla"
69
+ Expression.parse_string!(str) # => Expression[42]
70
+ str # => "bla"
71
+
72
+ Use `parse_string` without the ! to parse the string without updating it.
73
+
74
+ External variables
75
+ ------------------
76
+
77
+ The `externals` method will return all non-integer members of the Expression.
78
+
79
+ Expression[[:eax, :+, 42], :-, "bla"].externals # => [:eax, "bla"]
80
+
81
+
82
+ Pattern matching
83
+ ----------------
84
+
85
+ The `match` method allows to check an Expression against a pattern without
86
+ having to check individual members. The pattern should be an Expression,
87
+ whose variable members should be Strings or Symbols, which are also passed as
88
+ arguments to the match function. On successful match, the correspondance
89
+ between variable patterns and their actual value matched is returned as a Hash.
90
+
91
+ Expression[1, :+, 2].match(Expression['var', :+, 2], 'var')
92
+ # => { 'var' => 1 }
93
+ Expression[1, :+, 2].match(Expression['var', :+, 'var'], 'var')
94
+ # => nil
95
+ Expression[1, :+, 1].match(Expression['var', :op, 'var'], 'var', :op)
96
+ # => { 'var' => 1, :op => :+ }
97
+
98
+
99
+ Reduction
100
+ ---------
101
+
102
+ Metasm Expressions include a basic symbolic computation engine, that allows
103
+ some simple transformations of the Expression. The reduction will also
104
+ compute numerical values whenever possible. If the final result is fully
105
+ numeric, an Integer is returned, otherwise a new Expression is returned.
106
+
107
+ In this context, the special value `:unknown` has a particular meaning.
108
+
109
+ Expression[1, :+, 2].reduce
110
+ # => 3
111
+ Expression[:eax, :+, [:ebx, :-, :eax]].reduce
112
+ # => Expression[:ebx]
113
+ Expression[1, :+, [:eax, :+, 2]].reduce
114
+ # => Expression[:eax, :+, 3]
115
+ Expression[:unknown, :+, :eax].reduce
116
+ # => Expression[:unknown]
117
+
118
+ The symbolic engine operates mostly on addition/substractions, and
119
+ no-operations (eg shift by 0). It also handles some boolean composition.
120
+
121
+ The detail can be found in the #replace_rec method body, in `metasm/main.rb`.
122
+
123
+ The reduce method can also take a block argument, which will be called at
124
+ every step in the recursive reduction, for custom operations. If the block
125
+ returns nil, the result is unchanged, otherwise the new value is used as
126
+ replacement. For exemple, if you operate on 32-bit values and want to get rid
127
+ of `bla & 0xffffffff`, use
128
+
129
+ some_expr.reduce { |e|
130
+ if e.kind_of?(Expression) and e.op == :& and e.rexpr == 0xffff_ffff
131
+ e.lexpr
132
+ end
133
+ }
134
+
135
+
136
+ Binding
137
+ -------
138
+
139
+ An expression involving variable externals can be bound using a Hash. This will
140
+ replace any occurence of a key of the Hash by its value in the expression
141
+ members. The `bind` method will return a new Expression with the substitutions,
142
+ and the `bind!` method will update the Expression in-place.
143
+
144
+ Expression['val', :+, 'stuff'].bind('val' => 4, 'stuff' => 8).reduce
145
+ # => 12
146
+ Expression[:eax, :+, :ebx].bind(:ebx => 42)
147
+ # Expression[:eax, :+, 42]
148
+ Expression[:eax, :+, :ebx].bind(:ebx => :ecx)
149
+ # Expression[:eax, :+, :ecx]
150
+
151
+ You can use Expressions as keys, but they will only be used on perfect matches.
152
+
153
+
154
+ Binary packing
155
+ --------------
156
+
157
+ Encoding
158
+ ########
159
+
160
+ The `encode` method will generate an EncodedData holding the expression, either
161
+ as binary if it can reduce to an integral value, or as a relocation.
162
+ The arguments are the relocation type and the endianness, plus an optional
163
+ backtrace (to notify the user where an overflowing relocation comes from).
164
+
165
+ The `encode_imm` class method will generate a raw String for a given
166
+ integral value, a type and an endianness.
167
+ The type can be given as a byte size.
168
+
169
+ Expression.encode_imm(42, :u8, :little) # => "*"
170
+ Expression.encode_imm(42, 1, :big) # => "*"
171
+ Expression.encode_imm(256, :u8, :little) # raise EncodeError
172
+
173
+ On overflows (value cannot be encoded in the bit field) an EncodeError
174
+ exception is raised.
175
+
176
+ Decoding
177
+ ########
178
+
179
+ The `decode_imm` class method can be used to read a binary value into an
180
+ Integer, with an optional offset into the binary string.
181
+
182
+ Expression.decode_imm("*", :u8, :little) # => 42
183
+ Expression.decode_imm("bla\xfe\xff", :i16, :little, 3) # => -2
184
+
185
+
186
+ Arithmetic coercion
187
+ -------------------
188
+
189
+ Expression implement the `:+` and `:-` ruby methods, so that `expr + 4`
190
+ works as expected. The result is reduced.
191
+
192
+
193
+ Integer methods
194
+ ---------------
195
+
196
+ The Expression class offers a few methods to work with integers.
197
+
198
+ make_signed
199
+ ###########
200
+
201
+ `make_signed` will convert a raw unsigned to its equivalent signed value,
202
+ given a bit size.
203
+
204
+ Expression.make_signed(1, 16) # => 1
205
+ Expression.make_signed(0xffff, 16) # => -1
206
+
207
+
208
+ in_range?
209
+ #########
210
+
211
+ `in_range?` can check if a given numeric value would fit in a particular
212
+ <core/Relocation.txt> field. The method can return true or false if it
213
+ fits or not, or `nil` if the result is unknown (eg the expr has no numeric
214
+ value).
215
+
216
+ Expression.in_range?(42, :i8) # => true
217
+ Expression.in_range?(128, :i8) # => false
218
+ Expression.in_range?(-128, :i8) # => true
219
+ Expression.in_range?(Expression['bla'], :u32) # => nil
220
+
@@ -0,0 +1,27 @@
1
+ GNUExports
2
+ ==========
3
+
4
+ This class is defined in `metasm/os/gnu_exports.rb`
5
+
6
+ It defines an `EXPORT` constant, a Hash, whose keys
7
+ are the standard linux API symbol names, and values
8
+ are the library name where you can find this symbol.
9
+
10
+ The equivallent for windows is <core/WindowsExports.txt>
11
+
12
+ Usage
13
+ -----
14
+
15
+ The main usage of this class is the automatic generation
16
+ of the <core/ELF.txt> dynamic tag `DT_NEEDED` from the
17
+ external symbols referenced by a binary during compilation.
18
+
19
+ This is done in the `automagic_symbols` method.
20
+
21
+ Symbols
22
+ -------
23
+
24
+ The current version holds the symbols of the debian
25
+ glibc, from `libc.so.6` and `libdl.so.2`.
26
+
27
+ Ruby symbols are also defined, from `libruby1.8.so.1.8`.
@@ -0,0 +1,236 @@
1
+ Ia32
2
+ ====
3
+
4
+ The Ia32 architecture, aka *Intel_x86*, is the most advanced among the
5
+ architectures implemented in the framework. It is a subclass of the
6
+ generic <core/CPU.txt>.
7
+
8
+ It can handle binary code for the 16 and 32bits modes of the processor.
9
+
10
+ It is a superclass for the <core/X86_64.txt> object, a distinct processor
11
+ that handles 64-bit *long_mode* (aka *x64*, *amd64*, *em64t*)
12
+
13
+ The CPU `shortname` is `ia32` (`ia32_16` in 16-bit mode, and a `_be` suffix
14
+ if bigendian)
15
+
16
+ Opcodes
17
+ -------
18
+
19
+ The opcodes list can be customized to match that available on a specific
20
+ version of the processor. The possibilities are:
21
+
22
+ * 386_common
23
+ * 386
24
+ * 387
25
+ * 486
26
+ * pentium
27
+ * p6
28
+ * 3dnow
29
+ * sse
30
+ * sse2
31
+ * sse3
32
+ * vmx
33
+ * sse42
34
+
35
+ Most opcodes are available in the framework, with the notable exception of:
36
+
37
+ * most sse2 simd instructions
38
+ * the AVX instructions
39
+ * amd-specific instructions
40
+
41
+ The `386_common` family is the subset of 386 instruction that are most
42
+ commonly found in standard usermode programs (no `in`/`out`/bcd
43
+ arithmetic/far call/etc).
44
+ This can be useful when manipulating stuff that in not known to be i386
45
+ binary code.
46
+
47
+
48
+ Initialization
49
+ --------------
50
+
51
+ An Ia32 <core/CPU.txt> object can be created using the following code:
52
+
53
+ Metasm::Ia32.new
54
+
55
+ The `X86` alias may be used in place of `Ia32`.
56
+
57
+ The constructor accepts optional arguments to specify the CPU size, the
58
+ opcode family, and the endianness of the processor. The arguments can
59
+ be given in any order. For exemple,
60
+
61
+ Metasm::Ia32.new(16, 'pentium', :big)
62
+
63
+ will create a 16-bit mode cpu, with opcodes up to the 'pentium' CPU family,
64
+ in big-endian mode.
65
+
66
+ The Ia32 initializer has the convenience feature that it will create an
67
+ X86_64 instance when given the 64 bit size (e.g. `Ia32.new(64)` returns an
68
+ X86_64 instance)
69
+
70
+
71
+ Assembler
72
+ ---------
73
+
74
+ The parser handles only Intel-style asm syntax, *e.g.*
75
+
76
+ some_label:
77
+ mov eax, 10h
78
+ mov ecx, fs:[eax+16]
79
+ push dword ptr fs:[1Ch]
80
+ call ecx
81
+ test al, al
82
+ jnz some_label
83
+ ret
84
+ fmulp ST(4)
85
+
86
+
87
+ Instruction arguments
88
+ #####################
89
+
90
+ The parser recognizes standard registers, such as
91
+
92
+ * `eax`
93
+ * `ah`
94
+ * `mm4` (mmx 64bit register)
95
+ * `xmm2` (xmm 128bit register)
96
+ * `ST` (current top of the FPU stack)
97
+ * `ST(3)` (FPU reg nr.3)
98
+ * `cs` (segment register)
99
+ * `dr3` (debug register)
100
+ * `cr2` (control register)
101
+
102
+ It also supports inexistant registers, such as
103
+
104
+ * `cr7`
105
+ * `dr4`
106
+ * `segr6` (segment register nr.6)
107
+
108
+ The indirections are called `ModRM`. They take the form:
109
+
110
+ * `[eax]` (memory pointed by `eax`)
111
+ * `byte ptr [eax]` (1-byte memory pointed by `eax`)
112
+ * `byte [eax]` (same as previous)
113
+ * `fs:[eax]` (offset `eax` from the base of the `fs` segment)
114
+ * `[fs:eax]` (same as previous)
115
+
116
+ The pointer itself can be:
117
+
118
+ * `[eax]` (any register)
119
+ * `[eax+12]` (base + numeric offset)
120
+ * `[eax+ebx]` (base + register index)
121
+ * `[eax + 4*ebx]` (base + 1,2,4 or 8 * index)
122
+ * `[eax + 2*ebx + 42]` (both)
123
+
124
+ Note that the form base + s*index cannot use `esp` as index with s != 1.
125
+
126
+ For indirection sizes, the size is taken from the size of other arguments
127
+ if it is not specified (eg `mov eax, [42]` will be 4 bytes, and `mov al, [42]`
128
+ will be 1). The explicit size specifier can be:
129
+
130
+ * `byte` (8bits)
131
+ * `word` (16)
132
+ * `dword` (32)
133
+ * `qword` (64)
134
+ * `oword` (128)
135
+ * `_12bits` (12, arbitrary numbers can be used)
136
+
137
+
138
+ Parser commands
139
+ ###############
140
+
141
+ The following commands are recognized in an asm source:
142
+
143
+ * `.mode`
144
+ * `.bits`
145
+
146
+ They are synonymous, and serve to change the mode of the processor to either
147
+ 16 or 32bits.
148
+
149
+ They should be the first instruction in the source, changing the mode during
150
+ parsing is not supported. This would change only the mode for the next
151
+ instructions to be parsed, and for all instructions (incl. those already parsed
152
+ at this point) when encoding, which is likely **not** what you want. See the
153
+ `codeXX` prefixes.
154
+
155
+ Note that changing the CPU size once it was created may have bad side-effects.
156
+ For exemple, some preprocessor macros may already have been generated according
157
+ to the original size of the CPU and will be incorrect from this point on.
158
+
159
+
160
+ Prefixes
161
+ ########
162
+
163
+ The following prefixes are handled:
164
+
165
+ * `lock`
166
+ * `rep`, `repz`, `repnz`, `repe`, `repne`
167
+ * `code16`, `code32`
168
+ * `hintjmp`, `hintnojmp` (aliases: `ht`, `hnt`)
169
+ * `seg_cs` ... `seg_gs`
170
+
171
+ The `repXX` prefixes are for string operations (`movsd` etc), but will be set
172
+ for any opcode. Only the last of the family will be encoded.
173
+
174
+ The `code16` will generate instructions to be run on a CPU in 16bit mode,
175
+ independantly of the global CPU mode. For exemple,
176
+
177
+ code16 mov ax, 42h
178
+
179
+ will generate `"\xb8\x42\x00"` (no opsz override prefix), and will decode or
180
+ run incorrectly on an 32bit CPU.
181
+
182
+ The `hintjmp` prefix is useful for conditional jumps to give a hint to the
183
+ CPU branch predictor as to whether the branch is take or not.
184
+
185
+ The `seg_cs` prefix family is used to declare arbitrary segment override.
186
+ These should be used only in instructions with no ModRM argument.
187
+
188
+
189
+ Suffixes
190
+ ########
191
+
192
+ The parser implements a specific feature to allow the differenciation of
193
+ otherwise ambiguous opcodes, in the form of instruction suffixes.
194
+
195
+ By default, the assembler will generate the shortest encoding for a given
196
+ instruction. To force encoding of another form you can add a specific
197
+ suffix to the instruction. In general, metasm will use e.g. register sizes
198
+ when possible to avoid this kind of situations, but with immediate-only
199
+ displacement this is necessary.
200
+
201
+ or.a16 [1234h], eax ; use a 16-bit address
202
+ or [bx], eax ; use a 16-bit address (implicit from the bx register)
203
+ or eax, 1 ; "\x83\xc8\x01"
204
+ or.i8 eax, 1 ; "\x83\xc8\x01" (same, shortest encoding)
205
+ or.i eax, 1 ; "\x81\xc8\x01\x00\x00\x00" (constant stored in a 32bit field)
206
+ movsd.a16 ; use a 16-byte address-size override prefix (copy dword [si] to [di])
207
+ push.i16 42h ; push a 16-bit integer
208
+
209
+ The suffixes are available as follow:
210
+
211
+ * if the opcode takes an integer argument that can be encoded as either a 8bits or <cpu size>bits, the `.i` and `.i8` variants are created
212
+ * if the opcode takes a memory indirection as argument, or is a string operation (`movsd`, `scasb`, etc) the `.a16` and `.a32` variants are created
213
+ * if the opcode takes a single integer argument, a far pointer, or is a return instruction, the `.i16` and `.i32` variants are created
214
+
215
+
216
+ C parser
217
+ --------
218
+
219
+ The Ia32 C parser will initialize the type sizes with the `ilp32` memory
220
+ model, which is:
221
+
222
+ * short = 16bits
223
+ * int = 32bits
224
+ * long = 32bits
225
+ * long long = 64bits
226
+ * pointer = 32bits
227
+
228
+ In 16bit mode, the model is `ilp16`, which may not be correct (the 16bits
229
+ compiler has not been tested anyway).
230
+
231
+ The following macros are defined (in the asm preprocessor too)
232
+
233
+ * `_M_IX86` = 500
234
+ * `_X86_`
235
+ * `__i386__`
236
+