metasm 1.0.1 → 1.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +1 -0
- data/.hgtags +3 -0
- data/Gemfile +1 -0
- data/INSTALL +61 -0
- data/LICENCE +458 -0
- data/README +29 -21
- data/Rakefile +10 -0
- data/TODO +10 -12
- data/doc/code_organisation.txt +2 -0
- data/doc/core/DynLdr.txt +247 -0
- data/doc/core/ExeFormat.txt +43 -0
- data/doc/core/Expression.txt +220 -0
- data/doc/core/GNUExports.txt +27 -0
- data/doc/core/Ia32.txt +236 -0
- data/doc/core/SerialStruct.txt +108 -0
- data/doc/core/VirtualString.txt +145 -0
- data/doc/core/WindowsExports.txt +61 -0
- data/doc/core/index.txt +1 -0
- data/doc/style.css +6 -3
- data/doc/usage/debugger.txt +327 -0
- data/doc/usage/index.txt +1 -0
- data/doc/use_cases.txt +2 -2
- data/metasm.gemspec +22 -0
- data/{lib/metasm.rb → metasm.rb} +11 -3
- data/{lib/metasm → metasm}/compile_c.rb +13 -7
- data/metasm/cpu/arc.rb +8 -0
- data/metasm/cpu/arc/decode.rb +425 -0
- data/metasm/cpu/arc/main.rb +191 -0
- data/metasm/cpu/arc/opcodes.rb +588 -0
- data/{lib/metasm → metasm/cpu}/arm.rb +7 -5
- data/{lib/metasm → metasm/cpu}/arm/debug.rb +2 -2
- data/{lib/metasm → metasm/cpu}/arm/decode.rb +13 -12
- data/{lib/metasm → metasm/cpu}/arm/encode.rb +23 -8
- data/{lib/metasm → metasm/cpu}/arm/main.rb +0 -3
- data/metasm/cpu/arm/opcodes.rb +324 -0
- data/{lib/metasm → metasm/cpu}/arm/parse.rb +25 -13
- data/{lib/metasm → metasm/cpu}/arm/render.rb +2 -2
- data/metasm/cpu/arm64.rb +15 -0
- data/metasm/cpu/arm64/debug.rb +38 -0
- data/metasm/cpu/arm64/decode.rb +289 -0
- data/metasm/cpu/arm64/encode.rb +41 -0
- data/metasm/cpu/arm64/main.rb +105 -0
- data/metasm/cpu/arm64/opcodes.rb +232 -0
- data/metasm/cpu/arm64/parse.rb +20 -0
- data/metasm/cpu/arm64/render.rb +95 -0
- data/{lib/metasm/ppc.rb → metasm/cpu/bpf.rb} +2 -4
- data/metasm/cpu/bpf/decode.rb +142 -0
- data/metasm/cpu/bpf/main.rb +60 -0
- data/metasm/cpu/bpf/opcodes.rb +81 -0
- data/metasm/cpu/bpf/render.rb +41 -0
- data/metasm/cpu/cy16.rb +9 -0
- data/metasm/cpu/cy16/decode.rb +253 -0
- data/metasm/cpu/cy16/main.rb +63 -0
- data/metasm/cpu/cy16/opcodes.rb +78 -0
- data/metasm/cpu/cy16/render.rb +41 -0
- data/metasm/cpu/dalvik.rb +11 -0
- data/{lib/metasm → metasm/cpu}/dalvik/decode.rb +35 -13
- data/{lib/metasm → metasm/cpu}/dalvik/main.rb +51 -2
- data/{lib/metasm → metasm/cpu}/dalvik/opcodes.rb +19 -11
- data/metasm/cpu/ia32.rb +17 -0
- data/{lib/metasm → metasm/cpu}/ia32/compile_c.rb +5 -7
- data/{lib/metasm → metasm/cpu}/ia32/debug.rb +5 -5
- data/{lib/metasm → metasm/cpu}/ia32/decode.rb +246 -59
- data/{lib/metasm → metasm/cpu}/ia32/decompile.rb +7 -7
- data/{lib/metasm → metasm/cpu}/ia32/encode.rb +19 -13
- data/{lib/metasm → metasm/cpu}/ia32/main.rb +51 -8
- data/metasm/cpu/ia32/opcodes.rb +1424 -0
- data/{lib/metasm → metasm/cpu}/ia32/parse.rb +47 -16
- data/{lib/metasm → metasm/cpu}/ia32/render.rb +31 -4
- data/metasm/cpu/mips.rb +14 -0
- data/{lib/metasm → metasm/cpu}/mips/compile_c.rb +1 -1
- data/metasm/cpu/mips/debug.rb +42 -0
- data/{lib/metasm → metasm/cpu}/mips/decode.rb +46 -16
- data/{lib/metasm → metasm/cpu}/mips/encode.rb +4 -3
- data/{lib/metasm → metasm/cpu}/mips/main.rb +11 -4
- data/{lib/metasm → metasm/cpu}/mips/opcodes.rb +86 -17
- data/{lib/metasm → metasm/cpu}/mips/parse.rb +1 -1
- data/{lib/metasm → metasm/cpu}/mips/render.rb +1 -1
- data/{lib/metasm/dalvik.rb → metasm/cpu/msp430.rb} +1 -1
- data/metasm/cpu/msp430/decode.rb +247 -0
- data/metasm/cpu/msp430/main.rb +62 -0
- data/metasm/cpu/msp430/opcodes.rb +101 -0
- data/{lib/metasm → metasm/cpu}/pic16c/decode.rb +6 -7
- data/{lib/metasm → metasm/cpu}/pic16c/main.rb +0 -0
- data/{lib/metasm → metasm/cpu}/pic16c/opcodes.rb +1 -1
- data/{lib/metasm/mips.rb → metasm/cpu/ppc.rb} +4 -4
- data/{lib/metasm → metasm/cpu}/ppc/decode.rb +18 -12
- data/{lib/metasm → metasm/cpu}/ppc/decompile.rb +3 -3
- data/{lib/metasm → metasm/cpu}/ppc/encode.rb +2 -2
- data/{lib/metasm → metasm/cpu}/ppc/main.rb +17 -12
- data/{lib/metasm → metasm/cpu}/ppc/opcodes.rb +11 -5
- data/metasm/cpu/ppc/parse.rb +55 -0
- data/metasm/cpu/python.rb +8 -0
- data/metasm/cpu/python/decode.rb +136 -0
- data/metasm/cpu/python/main.rb +36 -0
- data/metasm/cpu/python/opcodes.rb +180 -0
- data/{lib/metasm → metasm/cpu}/sh4.rb +1 -1
- data/{lib/metasm → metasm/cpu}/sh4/decode.rb +48 -17
- data/{lib/metasm → metasm/cpu}/sh4/main.rb +13 -4
- data/{lib/metasm → metasm/cpu}/sh4/opcodes.rb +7 -8
- data/metasm/cpu/x86_64.rb +15 -0
- data/{lib/metasm → metasm/cpu}/x86_64/compile_c.rb +28 -17
- data/{lib/metasm → metasm/cpu}/x86_64/debug.rb +4 -4
- data/{lib/metasm → metasm/cpu}/x86_64/decode.rb +57 -15
- data/{lib/metasm → metasm/cpu}/x86_64/encode.rb +55 -26
- data/{lib/metasm → metasm/cpu}/x86_64/main.rb +14 -6
- data/metasm/cpu/x86_64/opcodes.rb +136 -0
- data/{lib/metasm → metasm/cpu}/x86_64/parse.rb +10 -2
- data/metasm/cpu/x86_64/render.rb +35 -0
- data/metasm/cpu/z80.rb +9 -0
- data/metasm/cpu/z80/decode.rb +313 -0
- data/metasm/cpu/z80/main.rb +67 -0
- data/metasm/cpu/z80/opcodes.rb +224 -0
- data/metasm/cpu/z80/render.rb +59 -0
- data/{lib/metasm/os/main.rb → metasm/debug.rb} +160 -401
- data/{lib/metasm → metasm}/decode.rb +35 -4
- data/{lib/metasm → metasm}/decompile.rb +15 -16
- data/{lib/metasm → metasm}/disassemble.rb +201 -45
- data/{lib/metasm → metasm}/disassemble_api.rb +651 -87
- data/{lib/metasm → metasm}/dynldr.rb +220 -133
- data/{lib/metasm → metasm}/encode.rb +10 -1
- data/{lib/metasm → metasm}/exe_format/a_out.rb +9 -6
- data/{lib/metasm → metasm}/exe_format/autoexe.rb +1 -0
- data/{lib/metasm → metasm}/exe_format/bflt.rb +57 -27
- data/{lib/metasm → metasm}/exe_format/coff.rb +11 -3
- data/{lib/metasm → metasm}/exe_format/coff_decode.rb +53 -20
- data/{lib/metasm → metasm}/exe_format/coff_encode.rb +11 -13
- data/{lib/metasm → metasm}/exe_format/dex.rb +13 -5
- data/{lib/metasm → metasm}/exe_format/dol.rb +1 -0
- data/{lib/metasm → metasm}/exe_format/elf.rb +93 -57
- data/{lib/metasm → metasm}/exe_format/elf_decode.rb +143 -34
- data/{lib/metasm → metasm}/exe_format/elf_encode.rb +122 -31
- data/metasm/exe_format/gb.rb +65 -0
- data/metasm/exe_format/javaclass.rb +424 -0
- data/{lib/metasm → metasm}/exe_format/macho.rb +204 -16
- data/{lib/metasm → metasm}/exe_format/main.rb +26 -3
- data/{lib/metasm → metasm}/exe_format/mz.rb +1 -0
- data/{lib/metasm → metasm}/exe_format/nds.rb +7 -4
- data/{lib/metasm → metasm}/exe_format/pe.rb +71 -8
- data/metasm/exe_format/pyc.rb +167 -0
- data/{lib/metasm → metasm}/exe_format/serialstruct.rb +67 -14
- data/{lib/metasm → metasm}/exe_format/shellcode.rb +7 -3
- data/metasm/exe_format/shellcode_rwx.rb +114 -0
- data/metasm/exe_format/swf.rb +205 -0
- data/{lib/metasm → metasm}/exe_format/xcoff.rb +7 -7
- data/metasm/exe_format/zip.rb +335 -0
- data/metasm/gui.rb +13 -0
- data/{lib/metasm → metasm}/gui/cstruct.rb +35 -41
- data/{lib/metasm → metasm}/gui/dasm_coverage.rb +11 -11
- data/{lib/metasm → metasm}/gui/dasm_decomp.rb +7 -20
- data/{lib/metasm → metasm}/gui/dasm_funcgraph.rb +0 -0
- data/metasm/gui/dasm_graph.rb +1695 -0
- data/{lib/metasm → metasm}/gui/dasm_hex.rb +12 -8
- data/{lib/metasm → metasm}/gui/dasm_listing.rb +43 -28
- data/{lib/metasm → metasm}/gui/dasm_main.rb +310 -53
- data/{lib/metasm → metasm}/gui/dasm_opcodes.rb +5 -19
- data/{lib/metasm → metasm}/gui/debug.rb +93 -27
- data/{lib/metasm → metasm}/gui/gtk.rb +162 -40
- data/{lib/metasm → metasm}/gui/qt.rb +12 -2
- data/{lib/metasm → metasm}/gui/win32.rb +179 -42
- data/{lib/metasm → metasm}/gui/x11.rb +59 -59
- data/{lib/metasm → metasm}/main.rb +389 -264
- data/{lib/metasm/os/remote.rb → metasm/os/gdbremote.rb} +146 -54
- data/{lib/metasm → metasm}/os/gnu_exports.rb +1 -1
- data/{lib/metasm → metasm}/os/linux.rb +628 -151
- data/metasm/os/main.rb +330 -0
- data/{lib/metasm → metasm}/os/windows.rb +132 -42
- data/{lib/metasm → metasm}/os/windows_exports.rb +141 -0
- data/{lib/metasm → metasm}/parse.rb +26 -24
- data/{lib/metasm → metasm}/parse_c.rb +221 -116
- data/{lib/metasm → metasm}/preprocessor.rb +55 -40
- data/{lib/metasm → metasm}/render.rb +14 -38
- data/misc/hexdump.rb +2 -1
- data/misc/lint.rb +58 -0
- data/misc/txt2html.rb +9 -7
- data/samples/bindiff.rb +3 -4
- data/samples/dasm-plugins/bindiff.rb +15 -0
- data/samples/dasm-plugins/bookmark.rb +133 -0
- data/samples/dasm-plugins/c_constants.rb +57 -0
- data/samples/dasm-plugins/colortheme_solarized.rb +125 -0
- data/samples/dasm-plugins/cppobj_funcall.rb +60 -0
- data/samples/dasm-plugins/dasm_all.rb +70 -0
- data/samples/dasm-plugins/demangle_cpp.rb +31 -0
- data/samples/dasm-plugins/deobfuscate.rb +251 -0
- data/samples/dasm-plugins/dump_text.rb +35 -0
- data/samples/dasm-plugins/export_graph_svg.rb +86 -0
- data/samples/dasm-plugins/findgadget.rb +75 -0
- data/samples/dasm-plugins/hl_opcode.rb +32 -0
- data/samples/dasm-plugins/hotfix_gtk_dbg.rb +19 -0
- data/samples/dasm-plugins/imm2off.rb +34 -0
- data/samples/dasm-plugins/match_libsigs.rb +93 -0
- data/samples/dasm-plugins/patch_file.rb +95 -0
- data/samples/dasm-plugins/scanfuncstart.rb +36 -0
- data/samples/dasm-plugins/scanxrefs.rb +26 -0
- data/samples/dasm-plugins/selfmodify.rb +197 -0
- data/samples/dasm-plugins/stringsxrefs.rb +28 -0
- data/samples/dasmnavig.rb +1 -1
- data/samples/dbg-apihook.rb +24 -9
- data/samples/dbg-plugins/heapscan.rb +283 -0
- data/samples/dbg-plugins/heapscan/compiled_heapscan_lin.c +155 -0
- data/samples/dbg-plugins/heapscan/compiled_heapscan_win.c +128 -0
- data/samples/dbg-plugins/heapscan/graphheap.rb +616 -0
- data/samples/dbg-plugins/heapscan/heapscan.rb +709 -0
- data/samples/dbg-plugins/heapscan/winheap.h +174 -0
- data/samples/dbg-plugins/heapscan/winheap7.h +307 -0
- data/samples/dbg-plugins/trace_func.rb +214 -0
- data/samples/disassemble-gui.rb +35 -5
- data/samples/disassemble.rb +31 -6
- data/samples/dump_upx.rb +24 -12
- data/samples/dynamic_ruby.rb +12 -3
- data/samples/exeencode.rb +6 -5
- data/samples/factorize-headers-peimports.rb +1 -1
- data/samples/lindebug.rb +175 -381
- data/samples/metasm-shell.rb +1 -2
- data/samples/peldr.rb +2 -2
- data/tests/all.rb +1 -1
- data/tests/arc.rb +26 -0
- data/tests/dynldr.rb +22 -4
- data/tests/expression.rb +55 -0
- data/tests/graph_layout.rb +285 -0
- data/tests/ia32.rb +79 -26
- data/tests/mips.rb +9 -2
- data/tests/x86_64.rb +66 -18
- metadata +330 -218
- data/lib/metasm/arm/opcodes.rb +0 -177
- data/lib/metasm/gui.rb +0 -23
- data/lib/metasm/gui/dasm_graph.rb +0 -1354
- data/lib/metasm/ia32.rb +0 -14
- data/lib/metasm/ia32/opcodes.rb +0 -873
- data/lib/metasm/ppc/parse.rb +0 -52
- data/lib/metasm/x86_64.rb +0 -12
- data/lib/metasm/x86_64/opcodes.rb +0 -118
- data/samples/gdbclient.rb +0 -583
- data/samples/rubstop.rb +0 -399
@@ -0,0 +1,220 @@
|
|
1
|
+
Expression
|
2
|
+
==========
|
3
|
+
|
4
|
+
Metasm uses this class to represent arbitrary symbolic arithmetic expressions, e.g.
|
5
|
+
* `42`
|
6
|
+
* `eax + 12`
|
7
|
+
* `loc_4228h + 4*ebx - 12`
|
8
|
+
|
9
|
+
These expressions can include `Integers`, `Symbols`, and `Strings`.
|
10
|
+
|
11
|
+
The symbols and strings represent arbitrary variables, with the convention that
|
12
|
+
strings represent fixed quantities (eg addresses, labels), whereas symbols
|
13
|
+
represent more variable stuff (eg register values).
|
14
|
+
|
15
|
+
There is also a special symbol that may be used, `:unknown`, to represent a
|
16
|
+
value that is known to be unknown. See the `reduce` section.
|
17
|
+
|
18
|
+
See also <core/Indirection.txt>.
|
19
|
+
|
20
|
+
The Expression class holds all methods relative to Integer binary manipulation,
|
21
|
+
that is `encoding` and `decoding` from/to a binary blob (see also
|
22
|
+
<core/EncodedData.txt>)
|
23
|
+
|
24
|
+
|
25
|
+
Members
|
26
|
+
-------
|
27
|
+
|
28
|
+
Expressions hold exactly 3 members:
|
29
|
+
* `lexpr`, the left-hand side of the expression
|
30
|
+
* `rexpr`, the right-hand side
|
31
|
+
* `op`, the operator
|
32
|
+
|
33
|
+
`lexpr` and `rexpr` can be any value, most often String, Symbol, Integer or
|
34
|
+
Expression. For unary operators, `lexpr` is `nil`.
|
35
|
+
|
36
|
+
`op` is a Symbol representing the operation.
|
37
|
+
It should be from the list:
|
38
|
+
* arithmetic: `+ - / * >> << & | ^`
|
39
|
+
* boolean: `|| && == != > >= < <=`
|
40
|
+
* unary: `+ - ~ !`
|
41
|
+
|
42
|
+
|
43
|
+
Instantiation
|
44
|
+
-------------
|
45
|
+
|
46
|
+
In ruby code, use the class method `[]`. It takes 1 to 3 arguments, `lexpr`,
|
47
|
+
`op`, and `rexpr`. `lexpr` defaults to `nil`, and `op` defaults to `:+` (except
|
48
|
+
for negative numeric values, which is stored with `op` == `:-` and `rexpr` ==
|
49
|
+
abs).
|
50
|
+
|
51
|
+
If `lexpr` or `rexpr` are an `Array`, the `[]` constructor is called
|
52
|
+
recursively, to ease the definition of nested Expressions.
|
53
|
+
|
54
|
+
Exemples:
|
55
|
+
|
56
|
+
Expression[42]
|
57
|
+
Expression[:eax, :+, 12]
|
58
|
+
Expression[:-, 'my_var']
|
59
|
+
Expression[[:eax, :-, 4], :*, [:ebx, :+, 0x12]]
|
60
|
+
|
61
|
+
The Expression class also includes a parser, to allow creating an expression
|
62
|
+
from a string. `parse_string!` will create an Expression and update its
|
63
|
+
argument to point after the last part read successfully into the expr.
|
64
|
+
The parser handles standard C operator precedence.
|
65
|
+
|
66
|
+
str = "1 + var"
|
67
|
+
Expression.parse_string!(str) # => Expression[1, :+, "var"]
|
68
|
+
str = "42 bla"
|
69
|
+
Expression.parse_string!(str) # => Expression[42]
|
70
|
+
str # => "bla"
|
71
|
+
|
72
|
+
Use `parse_string` without the ! to parse the string without updating it.
|
73
|
+
|
74
|
+
External variables
|
75
|
+
------------------
|
76
|
+
|
77
|
+
The `externals` method will return all non-integer members of the Expression.
|
78
|
+
|
79
|
+
Expression[[:eax, :+, 42], :-, "bla"].externals # => [:eax, "bla"]
|
80
|
+
|
81
|
+
|
82
|
+
Pattern matching
|
83
|
+
----------------
|
84
|
+
|
85
|
+
The `match` method allows to check an Expression against a pattern without
|
86
|
+
having to check individual members. The pattern should be an Expression,
|
87
|
+
whose variable members should be Strings or Symbols, which are also passed as
|
88
|
+
arguments to the match function. On successful match, the correspondance
|
89
|
+
between variable patterns and their actual value matched is returned as a Hash.
|
90
|
+
|
91
|
+
Expression[1, :+, 2].match(Expression['var', :+, 2], 'var')
|
92
|
+
# => { 'var' => 1 }
|
93
|
+
Expression[1, :+, 2].match(Expression['var', :+, 'var'], 'var')
|
94
|
+
# => nil
|
95
|
+
Expression[1, :+, 1].match(Expression['var', :op, 'var'], 'var', :op)
|
96
|
+
# => { 'var' => 1, :op => :+ }
|
97
|
+
|
98
|
+
|
99
|
+
Reduction
|
100
|
+
---------
|
101
|
+
|
102
|
+
Metasm Expressions include a basic symbolic computation engine, that allows
|
103
|
+
some simple transformations of the Expression. The reduction will also
|
104
|
+
compute numerical values whenever possible. If the final result is fully
|
105
|
+
numeric, an Integer is returned, otherwise a new Expression is returned.
|
106
|
+
|
107
|
+
In this context, the special value `:unknown` has a particular meaning.
|
108
|
+
|
109
|
+
Expression[1, :+, 2].reduce
|
110
|
+
# => 3
|
111
|
+
Expression[:eax, :+, [:ebx, :-, :eax]].reduce
|
112
|
+
# => Expression[:ebx]
|
113
|
+
Expression[1, :+, [:eax, :+, 2]].reduce
|
114
|
+
# => Expression[:eax, :+, 3]
|
115
|
+
Expression[:unknown, :+, :eax].reduce
|
116
|
+
# => Expression[:unknown]
|
117
|
+
|
118
|
+
The symbolic engine operates mostly on addition/substractions, and
|
119
|
+
no-operations (eg shift by 0). It also handles some boolean composition.
|
120
|
+
|
121
|
+
The detail can be found in the #replace_rec method body, in `metasm/main.rb`.
|
122
|
+
|
123
|
+
The reduce method can also take a block argument, which will be called at
|
124
|
+
every step in the recursive reduction, for custom operations. If the block
|
125
|
+
returns nil, the result is unchanged, otherwise the new value is used as
|
126
|
+
replacement. For exemple, if you operate on 32-bit values and want to get rid
|
127
|
+
of `bla & 0xffffffff`, use
|
128
|
+
|
129
|
+
some_expr.reduce { |e|
|
130
|
+
if e.kind_of?(Expression) and e.op == :& and e.rexpr == 0xffff_ffff
|
131
|
+
e.lexpr
|
132
|
+
end
|
133
|
+
}
|
134
|
+
|
135
|
+
|
136
|
+
Binding
|
137
|
+
-------
|
138
|
+
|
139
|
+
An expression involving variable externals can be bound using a Hash. This will
|
140
|
+
replace any occurence of a key of the Hash by its value in the expression
|
141
|
+
members. The `bind` method will return a new Expression with the substitutions,
|
142
|
+
and the `bind!` method will update the Expression in-place.
|
143
|
+
|
144
|
+
Expression['val', :+, 'stuff'].bind('val' => 4, 'stuff' => 8).reduce
|
145
|
+
# => 12
|
146
|
+
Expression[:eax, :+, :ebx].bind(:ebx => 42)
|
147
|
+
# Expression[:eax, :+, 42]
|
148
|
+
Expression[:eax, :+, :ebx].bind(:ebx => :ecx)
|
149
|
+
# Expression[:eax, :+, :ecx]
|
150
|
+
|
151
|
+
You can use Expressions as keys, but they will only be used on perfect matches.
|
152
|
+
|
153
|
+
|
154
|
+
Binary packing
|
155
|
+
--------------
|
156
|
+
|
157
|
+
Encoding
|
158
|
+
########
|
159
|
+
|
160
|
+
The `encode` method will generate an EncodedData holding the expression, either
|
161
|
+
as binary if it can reduce to an integral value, or as a relocation.
|
162
|
+
The arguments are the relocation type and the endianness, plus an optional
|
163
|
+
backtrace (to notify the user where an overflowing relocation comes from).
|
164
|
+
|
165
|
+
The `encode_imm` class method will generate a raw String for a given
|
166
|
+
integral value, a type and an endianness.
|
167
|
+
The type can be given as a byte size.
|
168
|
+
|
169
|
+
Expression.encode_imm(42, :u8, :little) # => "*"
|
170
|
+
Expression.encode_imm(42, 1, :big) # => "*"
|
171
|
+
Expression.encode_imm(256, :u8, :little) # raise EncodeError
|
172
|
+
|
173
|
+
On overflows (value cannot be encoded in the bit field) an EncodeError
|
174
|
+
exception is raised.
|
175
|
+
|
176
|
+
Decoding
|
177
|
+
########
|
178
|
+
|
179
|
+
The `decode_imm` class method can be used to read a binary value into an
|
180
|
+
Integer, with an optional offset into the binary string.
|
181
|
+
|
182
|
+
Expression.decode_imm("*", :u8, :little) # => 42
|
183
|
+
Expression.decode_imm("bla\xfe\xff", :i16, :little, 3) # => -2
|
184
|
+
|
185
|
+
|
186
|
+
Arithmetic coercion
|
187
|
+
-------------------
|
188
|
+
|
189
|
+
Expression implement the `:+` and `:-` ruby methods, so that `expr + 4`
|
190
|
+
works as expected. The result is reduced.
|
191
|
+
|
192
|
+
|
193
|
+
Integer methods
|
194
|
+
---------------
|
195
|
+
|
196
|
+
The Expression class offers a few methods to work with integers.
|
197
|
+
|
198
|
+
make_signed
|
199
|
+
###########
|
200
|
+
|
201
|
+
`make_signed` will convert a raw unsigned to its equivalent signed value,
|
202
|
+
given a bit size.
|
203
|
+
|
204
|
+
Expression.make_signed(1, 16) # => 1
|
205
|
+
Expression.make_signed(0xffff, 16) # => -1
|
206
|
+
|
207
|
+
|
208
|
+
in_range?
|
209
|
+
#########
|
210
|
+
|
211
|
+
`in_range?` can check if a given numeric value would fit in a particular
|
212
|
+
<core/Relocation.txt> field. The method can return true or false if it
|
213
|
+
fits or not, or `nil` if the result is unknown (eg the expr has no numeric
|
214
|
+
value).
|
215
|
+
|
216
|
+
Expression.in_range?(42, :i8) # => true
|
217
|
+
Expression.in_range?(128, :i8) # => false
|
218
|
+
Expression.in_range?(-128, :i8) # => true
|
219
|
+
Expression.in_range?(Expression['bla'], :u32) # => nil
|
220
|
+
|
@@ -0,0 +1,27 @@
|
|
1
|
+
GNUExports
|
2
|
+
==========
|
3
|
+
|
4
|
+
This class is defined in `metasm/os/gnu_exports.rb`
|
5
|
+
|
6
|
+
It defines an `EXPORT` constant, a Hash, whose keys
|
7
|
+
are the standard linux API symbol names, and values
|
8
|
+
are the library name where you can find this symbol.
|
9
|
+
|
10
|
+
The equivallent for windows is <core/WindowsExports.txt>
|
11
|
+
|
12
|
+
Usage
|
13
|
+
-----
|
14
|
+
|
15
|
+
The main usage of this class is the automatic generation
|
16
|
+
of the <core/ELF.txt> dynamic tag `DT_NEEDED` from the
|
17
|
+
external symbols referenced by a binary during compilation.
|
18
|
+
|
19
|
+
This is done in the `automagic_symbols` method.
|
20
|
+
|
21
|
+
Symbols
|
22
|
+
-------
|
23
|
+
|
24
|
+
The current version holds the symbols of the debian
|
25
|
+
glibc, from `libc.so.6` and `libdl.so.2`.
|
26
|
+
|
27
|
+
Ruby symbols are also defined, from `libruby1.8.so.1.8`.
|
data/doc/core/Ia32.txt
ADDED
@@ -0,0 +1,236 @@
|
|
1
|
+
Ia32
|
2
|
+
====
|
3
|
+
|
4
|
+
The Ia32 architecture, aka *Intel_x86*, is the most advanced among the
|
5
|
+
architectures implemented in the framework. It is a subclass of the
|
6
|
+
generic <core/CPU.txt>.
|
7
|
+
|
8
|
+
It can handle binary code for the 16 and 32bits modes of the processor.
|
9
|
+
|
10
|
+
It is a superclass for the <core/X86_64.txt> object, a distinct processor
|
11
|
+
that handles 64-bit *long_mode* (aka *x64*, *amd64*, *em64t*)
|
12
|
+
|
13
|
+
The CPU `shortname` is `ia32` (`ia32_16` in 16-bit mode, and a `_be` suffix
|
14
|
+
if bigendian)
|
15
|
+
|
16
|
+
Opcodes
|
17
|
+
-------
|
18
|
+
|
19
|
+
The opcodes list can be customized to match that available on a specific
|
20
|
+
version of the processor. The possibilities are:
|
21
|
+
|
22
|
+
* 386_common
|
23
|
+
* 386
|
24
|
+
* 387
|
25
|
+
* 486
|
26
|
+
* pentium
|
27
|
+
* p6
|
28
|
+
* 3dnow
|
29
|
+
* sse
|
30
|
+
* sse2
|
31
|
+
* sse3
|
32
|
+
* vmx
|
33
|
+
* sse42
|
34
|
+
|
35
|
+
Most opcodes are available in the framework, with the notable exception of:
|
36
|
+
|
37
|
+
* most sse2 simd instructions
|
38
|
+
* the AVX instructions
|
39
|
+
* amd-specific instructions
|
40
|
+
|
41
|
+
The `386_common` family is the subset of 386 instruction that are most
|
42
|
+
commonly found in standard usermode programs (no `in`/`out`/bcd
|
43
|
+
arithmetic/far call/etc).
|
44
|
+
This can be useful when manipulating stuff that in not known to be i386
|
45
|
+
binary code.
|
46
|
+
|
47
|
+
|
48
|
+
Initialization
|
49
|
+
--------------
|
50
|
+
|
51
|
+
An Ia32 <core/CPU.txt> object can be created using the following code:
|
52
|
+
|
53
|
+
Metasm::Ia32.new
|
54
|
+
|
55
|
+
The `X86` alias may be used in place of `Ia32`.
|
56
|
+
|
57
|
+
The constructor accepts optional arguments to specify the CPU size, the
|
58
|
+
opcode family, and the endianness of the processor. The arguments can
|
59
|
+
be given in any order. For exemple,
|
60
|
+
|
61
|
+
Metasm::Ia32.new(16, 'pentium', :big)
|
62
|
+
|
63
|
+
will create a 16-bit mode cpu, with opcodes up to the 'pentium' CPU family,
|
64
|
+
in big-endian mode.
|
65
|
+
|
66
|
+
The Ia32 initializer has the convenience feature that it will create an
|
67
|
+
X86_64 instance when given the 64 bit size (e.g. `Ia32.new(64)` returns an
|
68
|
+
X86_64 instance)
|
69
|
+
|
70
|
+
|
71
|
+
Assembler
|
72
|
+
---------
|
73
|
+
|
74
|
+
The parser handles only Intel-style asm syntax, *e.g.*
|
75
|
+
|
76
|
+
some_label:
|
77
|
+
mov eax, 10h
|
78
|
+
mov ecx, fs:[eax+16]
|
79
|
+
push dword ptr fs:[1Ch]
|
80
|
+
call ecx
|
81
|
+
test al, al
|
82
|
+
jnz some_label
|
83
|
+
ret
|
84
|
+
fmulp ST(4)
|
85
|
+
|
86
|
+
|
87
|
+
Instruction arguments
|
88
|
+
#####################
|
89
|
+
|
90
|
+
The parser recognizes standard registers, such as
|
91
|
+
|
92
|
+
* `eax`
|
93
|
+
* `ah`
|
94
|
+
* `mm4` (mmx 64bit register)
|
95
|
+
* `xmm2` (xmm 128bit register)
|
96
|
+
* `ST` (current top of the FPU stack)
|
97
|
+
* `ST(3)` (FPU reg nr.3)
|
98
|
+
* `cs` (segment register)
|
99
|
+
* `dr3` (debug register)
|
100
|
+
* `cr2` (control register)
|
101
|
+
|
102
|
+
It also supports inexistant registers, such as
|
103
|
+
|
104
|
+
* `cr7`
|
105
|
+
* `dr4`
|
106
|
+
* `segr6` (segment register nr.6)
|
107
|
+
|
108
|
+
The indirections are called `ModRM`. They take the form:
|
109
|
+
|
110
|
+
* `[eax]` (memory pointed by `eax`)
|
111
|
+
* `byte ptr [eax]` (1-byte memory pointed by `eax`)
|
112
|
+
* `byte [eax]` (same as previous)
|
113
|
+
* `fs:[eax]` (offset `eax` from the base of the `fs` segment)
|
114
|
+
* `[fs:eax]` (same as previous)
|
115
|
+
|
116
|
+
The pointer itself can be:
|
117
|
+
|
118
|
+
* `[eax]` (any register)
|
119
|
+
* `[eax+12]` (base + numeric offset)
|
120
|
+
* `[eax+ebx]` (base + register index)
|
121
|
+
* `[eax + 4*ebx]` (base + 1,2,4 or 8 * index)
|
122
|
+
* `[eax + 2*ebx + 42]` (both)
|
123
|
+
|
124
|
+
Note that the form base + s*index cannot use `esp` as index with s != 1.
|
125
|
+
|
126
|
+
For indirection sizes, the size is taken from the size of other arguments
|
127
|
+
if it is not specified (eg `mov eax, [42]` will be 4 bytes, and `mov al, [42]`
|
128
|
+
will be 1). The explicit size specifier can be:
|
129
|
+
|
130
|
+
* `byte` (8bits)
|
131
|
+
* `word` (16)
|
132
|
+
* `dword` (32)
|
133
|
+
* `qword` (64)
|
134
|
+
* `oword` (128)
|
135
|
+
* `_12bits` (12, arbitrary numbers can be used)
|
136
|
+
|
137
|
+
|
138
|
+
Parser commands
|
139
|
+
###############
|
140
|
+
|
141
|
+
The following commands are recognized in an asm source:
|
142
|
+
|
143
|
+
* `.mode`
|
144
|
+
* `.bits`
|
145
|
+
|
146
|
+
They are synonymous, and serve to change the mode of the processor to either
|
147
|
+
16 or 32bits.
|
148
|
+
|
149
|
+
They should be the first instruction in the source, changing the mode during
|
150
|
+
parsing is not supported. This would change only the mode for the next
|
151
|
+
instructions to be parsed, and for all instructions (incl. those already parsed
|
152
|
+
at this point) when encoding, which is likely **not** what you want. See the
|
153
|
+
`codeXX` prefixes.
|
154
|
+
|
155
|
+
Note that changing the CPU size once it was created may have bad side-effects.
|
156
|
+
For exemple, some preprocessor macros may already have been generated according
|
157
|
+
to the original size of the CPU and will be incorrect from this point on.
|
158
|
+
|
159
|
+
|
160
|
+
Prefixes
|
161
|
+
########
|
162
|
+
|
163
|
+
The following prefixes are handled:
|
164
|
+
|
165
|
+
* `lock`
|
166
|
+
* `rep`, `repz`, `repnz`, `repe`, `repne`
|
167
|
+
* `code16`, `code32`
|
168
|
+
* `hintjmp`, `hintnojmp` (aliases: `ht`, `hnt`)
|
169
|
+
* `seg_cs` ... `seg_gs`
|
170
|
+
|
171
|
+
The `repXX` prefixes are for string operations (`movsd` etc), but will be set
|
172
|
+
for any opcode. Only the last of the family will be encoded.
|
173
|
+
|
174
|
+
The `code16` will generate instructions to be run on a CPU in 16bit mode,
|
175
|
+
independantly of the global CPU mode. For exemple,
|
176
|
+
|
177
|
+
code16 mov ax, 42h
|
178
|
+
|
179
|
+
will generate `"\xb8\x42\x00"` (no opsz override prefix), and will decode or
|
180
|
+
run incorrectly on an 32bit CPU.
|
181
|
+
|
182
|
+
The `hintjmp` prefix is useful for conditional jumps to give a hint to the
|
183
|
+
CPU branch predictor as to whether the branch is take or not.
|
184
|
+
|
185
|
+
The `seg_cs` prefix family is used to declare arbitrary segment override.
|
186
|
+
These should be used only in instructions with no ModRM argument.
|
187
|
+
|
188
|
+
|
189
|
+
Suffixes
|
190
|
+
########
|
191
|
+
|
192
|
+
The parser implements a specific feature to allow the differenciation of
|
193
|
+
otherwise ambiguous opcodes, in the form of instruction suffixes.
|
194
|
+
|
195
|
+
By default, the assembler will generate the shortest encoding for a given
|
196
|
+
instruction. To force encoding of another form you can add a specific
|
197
|
+
suffix to the instruction. In general, metasm will use e.g. register sizes
|
198
|
+
when possible to avoid this kind of situations, but with immediate-only
|
199
|
+
displacement this is necessary.
|
200
|
+
|
201
|
+
or.a16 [1234h], eax ; use a 16-bit address
|
202
|
+
or [bx], eax ; use a 16-bit address (implicit from the bx register)
|
203
|
+
or eax, 1 ; "\x83\xc8\x01"
|
204
|
+
or.i8 eax, 1 ; "\x83\xc8\x01" (same, shortest encoding)
|
205
|
+
or.i eax, 1 ; "\x81\xc8\x01\x00\x00\x00" (constant stored in a 32bit field)
|
206
|
+
movsd.a16 ; use a 16-byte address-size override prefix (copy dword [si] to [di])
|
207
|
+
push.i16 42h ; push a 16-bit integer
|
208
|
+
|
209
|
+
The suffixes are available as follow:
|
210
|
+
|
211
|
+
* if the opcode takes an integer argument that can be encoded as either a 8bits or <cpu size>bits, the `.i` and `.i8` variants are created
|
212
|
+
* if the opcode takes a memory indirection as argument, or is a string operation (`movsd`, `scasb`, etc) the `.a16` and `.a32` variants are created
|
213
|
+
* if the opcode takes a single integer argument, a far pointer, or is a return instruction, the `.i16` and `.i32` variants are created
|
214
|
+
|
215
|
+
|
216
|
+
C parser
|
217
|
+
--------
|
218
|
+
|
219
|
+
The Ia32 C parser will initialize the type sizes with the `ilp32` memory
|
220
|
+
model, which is:
|
221
|
+
|
222
|
+
* short = 16bits
|
223
|
+
* int = 32bits
|
224
|
+
* long = 32bits
|
225
|
+
* long long = 64bits
|
226
|
+
* pointer = 32bits
|
227
|
+
|
228
|
+
In 16bit mode, the model is `ilp16`, which may not be correct (the 16bits
|
229
|
+
compiler has not been tested anyway).
|
230
|
+
|
231
|
+
The following macros are defined (in the asm preprocessor too)
|
232
|
+
|
233
|
+
* `_M_IX86` = 500
|
234
|
+
* `_X86_`
|
235
|
+
* `__i386__`
|
236
|
+
|