metasm 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/BUGS +11 -0
- data/CREDITS +17 -0
- data/README +270 -0
- data/TODO +114 -0
- data/doc/code_organisation.txt +146 -0
- data/doc/const_missing.txt +16 -0
- data/doc/core_classes.txt +75 -0
- data/doc/feature_list.txt +53 -0
- data/doc/index.txt +59 -0
- data/doc/install_notes.txt +170 -0
- data/doc/style.css +3 -0
- data/doc/use_cases.txt +18 -0
- data/lib/metasm.rb +80 -0
- data/lib/metasm/arm.rb +12 -0
- data/lib/metasm/arm/debug.rb +39 -0
- data/lib/metasm/arm/decode.rb +167 -0
- data/lib/metasm/arm/encode.rb +77 -0
- data/lib/metasm/arm/main.rb +75 -0
- data/lib/metasm/arm/opcodes.rb +177 -0
- data/lib/metasm/arm/parse.rb +130 -0
- data/lib/metasm/arm/render.rb +55 -0
- data/lib/metasm/compile_c.rb +1457 -0
- data/lib/metasm/dalvik.rb +8 -0
- data/lib/metasm/dalvik/decode.rb +196 -0
- data/lib/metasm/dalvik/main.rb +60 -0
- data/lib/metasm/dalvik/opcodes.rb +366 -0
- data/lib/metasm/decode.rb +213 -0
- data/lib/metasm/decompile.rb +2659 -0
- data/lib/metasm/disassemble.rb +2068 -0
- data/lib/metasm/disassemble_api.rb +1280 -0
- data/lib/metasm/dynldr.rb +1329 -0
- data/lib/metasm/encode.rb +333 -0
- data/lib/metasm/exe_format/a_out.rb +194 -0
- data/lib/metasm/exe_format/autoexe.rb +82 -0
- data/lib/metasm/exe_format/bflt.rb +189 -0
- data/lib/metasm/exe_format/coff.rb +455 -0
- data/lib/metasm/exe_format/coff_decode.rb +901 -0
- data/lib/metasm/exe_format/coff_encode.rb +1078 -0
- data/lib/metasm/exe_format/dex.rb +457 -0
- data/lib/metasm/exe_format/dol.rb +145 -0
- data/lib/metasm/exe_format/elf.rb +923 -0
- data/lib/metasm/exe_format/elf_decode.rb +979 -0
- data/lib/metasm/exe_format/elf_encode.rb +1375 -0
- data/lib/metasm/exe_format/macho.rb +827 -0
- data/lib/metasm/exe_format/main.rb +228 -0
- data/lib/metasm/exe_format/mz.rb +164 -0
- data/lib/metasm/exe_format/nds.rb +172 -0
- data/lib/metasm/exe_format/pe.rb +437 -0
- data/lib/metasm/exe_format/serialstruct.rb +246 -0
- data/lib/metasm/exe_format/shellcode.rb +114 -0
- data/lib/metasm/exe_format/xcoff.rb +167 -0
- data/lib/metasm/gui.rb +23 -0
- data/lib/metasm/gui/cstruct.rb +373 -0
- data/lib/metasm/gui/dasm_coverage.rb +199 -0
- data/lib/metasm/gui/dasm_decomp.rb +369 -0
- data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
- data/lib/metasm/gui/dasm_graph.rb +1354 -0
- data/lib/metasm/gui/dasm_hex.rb +543 -0
- data/lib/metasm/gui/dasm_listing.rb +599 -0
- data/lib/metasm/gui/dasm_main.rb +906 -0
- data/lib/metasm/gui/dasm_opcodes.rb +291 -0
- data/lib/metasm/gui/debug.rb +1228 -0
- data/lib/metasm/gui/gtk.rb +884 -0
- data/lib/metasm/gui/qt.rb +495 -0
- data/lib/metasm/gui/win32.rb +3004 -0
- data/lib/metasm/gui/x11.rb +621 -0
- data/lib/metasm/ia32.rb +14 -0
- data/lib/metasm/ia32/compile_c.rb +1523 -0
- data/lib/metasm/ia32/debug.rb +193 -0
- data/lib/metasm/ia32/decode.rb +1167 -0
- data/lib/metasm/ia32/decompile.rb +564 -0
- data/lib/metasm/ia32/encode.rb +314 -0
- data/lib/metasm/ia32/main.rb +233 -0
- data/lib/metasm/ia32/opcodes.rb +872 -0
- data/lib/metasm/ia32/parse.rb +327 -0
- data/lib/metasm/ia32/render.rb +91 -0
- data/lib/metasm/main.rb +1193 -0
- data/lib/metasm/mips.rb +11 -0
- data/lib/metasm/mips/compile_c.rb +7 -0
- data/lib/metasm/mips/decode.rb +253 -0
- data/lib/metasm/mips/encode.rb +51 -0
- data/lib/metasm/mips/main.rb +72 -0
- data/lib/metasm/mips/opcodes.rb +443 -0
- data/lib/metasm/mips/parse.rb +51 -0
- data/lib/metasm/mips/render.rb +43 -0
- data/lib/metasm/os/gnu_exports.rb +270 -0
- data/lib/metasm/os/linux.rb +1112 -0
- data/lib/metasm/os/main.rb +1686 -0
- data/lib/metasm/os/remote.rb +527 -0
- data/lib/metasm/os/windows.rb +2027 -0
- data/lib/metasm/os/windows_exports.rb +745 -0
- data/lib/metasm/parse.rb +876 -0
- data/lib/metasm/parse_c.rb +3938 -0
- data/lib/metasm/pic16c/decode.rb +42 -0
- data/lib/metasm/pic16c/main.rb +17 -0
- data/lib/metasm/pic16c/opcodes.rb +68 -0
- data/lib/metasm/ppc.rb +11 -0
- data/lib/metasm/ppc/decode.rb +264 -0
- data/lib/metasm/ppc/decompile.rb +251 -0
- data/lib/metasm/ppc/encode.rb +51 -0
- data/lib/metasm/ppc/main.rb +129 -0
- data/lib/metasm/ppc/opcodes.rb +410 -0
- data/lib/metasm/ppc/parse.rb +52 -0
- data/lib/metasm/preprocessor.rb +1277 -0
- data/lib/metasm/render.rb +130 -0
- data/lib/metasm/sh4.rb +8 -0
- data/lib/metasm/sh4/decode.rb +336 -0
- data/lib/metasm/sh4/main.rb +292 -0
- data/lib/metasm/sh4/opcodes.rb +381 -0
- data/lib/metasm/x86_64.rb +12 -0
- data/lib/metasm/x86_64/compile_c.rb +1025 -0
- data/lib/metasm/x86_64/debug.rb +59 -0
- data/lib/metasm/x86_64/decode.rb +268 -0
- data/lib/metasm/x86_64/encode.rb +264 -0
- data/lib/metasm/x86_64/main.rb +135 -0
- data/lib/metasm/x86_64/opcodes.rb +118 -0
- data/lib/metasm/x86_64/parse.rb +68 -0
- data/misc/bottleneck.rb +61 -0
- data/misc/cheader-findpppath.rb +58 -0
- data/misc/hexdiff.rb +74 -0
- data/misc/hexdump.rb +55 -0
- data/misc/metasm-all.rb +13 -0
- data/misc/objdiff.rb +47 -0
- data/misc/objscan.rb +40 -0
- data/misc/pdfparse.rb +661 -0
- data/misc/ppc_pdf2oplist.rb +192 -0
- data/misc/tcp_proxy_hex.rb +84 -0
- data/misc/txt2html.rb +440 -0
- data/samples/a.out.rb +31 -0
- data/samples/asmsyntax.rb +77 -0
- data/samples/bindiff.rb +555 -0
- data/samples/compilation-steps.rb +49 -0
- data/samples/cparser_makestackoffset.rb +55 -0
- data/samples/dasm-backtrack.rb +38 -0
- data/samples/dasmnavig.rb +318 -0
- data/samples/dbg-apihook.rb +228 -0
- data/samples/dbghelp.rb +143 -0
- data/samples/disassemble-gui.rb +102 -0
- data/samples/disassemble.rb +133 -0
- data/samples/dump_upx.rb +95 -0
- data/samples/dynamic_ruby.rb +1929 -0
- data/samples/elf_list_needed.rb +46 -0
- data/samples/elf_listexports.rb +33 -0
- data/samples/elfencode.rb +25 -0
- data/samples/exeencode.rb +128 -0
- data/samples/factorize-headers-elfimports.rb +77 -0
- data/samples/factorize-headers-peimports.rb +109 -0
- data/samples/factorize-headers.rb +43 -0
- data/samples/gdbclient.rb +583 -0
- data/samples/generate_libsigs.rb +102 -0
- data/samples/hotfix_gtk_dbg.rb +59 -0
- data/samples/install_win_env.rb +78 -0
- data/samples/lindebug.rb +924 -0
- data/samples/linux_injectsyscall.rb +95 -0
- data/samples/machoencode.rb +31 -0
- data/samples/metasm-shell.rb +91 -0
- data/samples/pe-hook.rb +69 -0
- data/samples/pe-ia32-cpuid.rb +203 -0
- data/samples/pe-mips.rb +35 -0
- data/samples/pe-shutdown.rb +78 -0
- data/samples/pe-testrelocs.rb +51 -0
- data/samples/pe-testrsrc.rb +24 -0
- data/samples/pe_listexports.rb +31 -0
- data/samples/peencode.rb +19 -0
- data/samples/peldr.rb +494 -0
- data/samples/preprocess-flatten.rb +19 -0
- data/samples/r0trace.rb +308 -0
- data/samples/rubstop.rb +399 -0
- data/samples/scan_pt_gnu_stack.rb +54 -0
- data/samples/scanpeexports.rb +62 -0
- data/samples/shellcode-c.rb +40 -0
- data/samples/shellcode-dynlink.rb +146 -0
- data/samples/source.asm +34 -0
- data/samples/struct_offset.rb +47 -0
- data/samples/testpe.rb +32 -0
- data/samples/testraw.rb +45 -0
- data/samples/win32genloader.rb +132 -0
- data/samples/win32hooker-advanced.rb +169 -0
- data/samples/win32hooker.rb +96 -0
- data/samples/win32livedasm.rb +33 -0
- data/samples/win32remotescan.rb +133 -0
- data/samples/wintrace.rb +92 -0
- data/tests/all.rb +8 -0
- data/tests/dasm.rb +39 -0
- data/tests/dynldr.rb +35 -0
- data/tests/encodeddata.rb +132 -0
- data/tests/ia32.rb +82 -0
- data/tests/mips.rb +116 -0
- data/tests/parse_c.rb +239 -0
- data/tests/preprocessor.rb +269 -0
- data/tests/x86_64.rb +62 -0
- metadata +255 -0
data/BUGS
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
List of known bugs/missing features, in no particular order:
|
2
|
+
|
3
|
+
PPC cpu cannot parse/encode code
|
4
|
+
Disassembler is sloooow
|
5
|
+
The GTK UI is quite sluggish too
|
6
|
+
Disassembler backtracker does weird things
|
7
|
+
Mach-O encoder does not work (binaries won't load on OSX)
|
8
|
+
ELF encoder may need tweaks to handle OpenBSD
|
9
|
+
Ia32 compile_c misses many features (divisions, bitfields), and needs a register allocator
|
10
|
+
Asm parser does not handle well ; comments (eg "foo ; */* blargimdead") (c-style comments are parsed before asm-style, so multiline /* after ; is bad)
|
11
|
+
The BUGS file is incomplete
|
data/CREDITS
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
N: Yoann GUILLOT
|
2
|
+
E: yoann at ofjj.net
|
3
|
+
D: Lead developper
|
4
|
+
|
5
|
+
N: Julien TINNES
|
6
|
+
E: julien at cr0.org
|
7
|
+
D: Senior Product Manager
|
8
|
+
D: Ideas, bug hunting, Yoann-slapping
|
9
|
+
D: Metasploit integration
|
10
|
+
|
11
|
+
N: Arnaud CORNET
|
12
|
+
E: arnaud.cornet at gmail.com
|
13
|
+
D: Initial ELF support
|
14
|
+
|
15
|
+
N: Raphael RIGO
|
16
|
+
E: raphael at cr0.org
|
17
|
+
D: Initial MIPS support and misc stuff
|
data/README
ADDED
@@ -0,0 +1,270 @@
|
|
1
|
+
Metasm, the Ruby assembly manipulation suite
|
2
|
+
============================================
|
3
|
+
|
4
|
+
* sample scripts in samples/ -- read comments at the beginning of the files
|
5
|
+
* all files are licensed under the terms of the LGPL
|
6
|
+
|
7
|
+
Author: Yoann Guillot <john at ofjj.net>
|
8
|
+
|
9
|
+
|
10
|
+
Basic overview:
|
11
|
+
|
12
|
+
Metasm allows you to interact with executables formats (ExeFormat):
|
13
|
+
PE, ELF, Mach-O, Shellcode, etc.
|
14
|
+
There are three approaches to an ExeFormat:
|
15
|
+
- compiling one up, from scratch
|
16
|
+
- decompiling an existing format
|
17
|
+
- manipulating the file structure
|
18
|
+
|
19
|
+
|
20
|
+
Ready-to-use scripts can be found in the samples/ subdirectory, check the
|
21
|
+
comments in the scripts headers. You can also try the --help argument if
|
22
|
+
you're feeling lucky.
|
23
|
+
|
24
|
+
|
25
|
+
Here is a short overview of the Metasm internals.
|
26
|
+
|
27
|
+
|
28
|
+
Assembly:
|
29
|
+
|
30
|
+
When compiling, you start from a source text (ruby String, consisting
|
31
|
+
mostly in a sequence of instructions/data/padding directive), which is parsed.
|
32
|
+
|
33
|
+
The string is handed to a Preprocessor instance (which handles #if, #ifdef,
|
34
|
+
#include, #define, /* */ etc, should be 100% compatible with gcc -E), which is
|
35
|
+
encapsulated in an AsmPreprocessor for assembler sources (to handles asm macro
|
36
|
+
definitions, 'equ' and asm ';' comments).
|
37
|
+
The interface to do that is ExeFormat#parse(text[, filename, lineno]) or
|
38
|
+
ExeFormat.assemble (which calls .new, #parse and #assemble).
|
39
|
+
|
40
|
+
The (Asm)Preprocessor returns tokens to the ExeFormat, which parses them as Data,
|
41
|
+
Padding, Labels or parser directives. Parser directives always start with a dot.
|
42
|
+
They can be generic (.pad, .offset...) or ExeFormat-specific (.section,
|
43
|
+
.import, .entrypoint...). They are handled by #parse_parser_instruction().
|
44
|
+
If the ExeFormat does not recognize a word, it is handed to its CPU instance,
|
45
|
+
which is responsible for parsing Instructions (or raise an exception).
|
46
|
+
All those tokens are stored in one or more arrays in the @source attribute of
|
47
|
+
the ExeFormat (Shellcode's @source is an Array, for PE/ELF it is a hash
|
48
|
+
[section name] => [Array of parsed data])
|
49
|
+
Every immediate value can be an arbitrary Expression (see later).
|
50
|
+
|
51
|
+
You can then assemble the source to binary sections using ExeFormat#assemble.
|
52
|
+
|
53
|
+
Once the section binaries are available, the whole binary executable can be
|
54
|
+
written to disk using ExeFormat#encode_file(filename[, format]).
|
55
|
+
|
56
|
+
PE and ELF include an autoimport feature that allows automatic creation of
|
57
|
+
import-related data for known OS-specific functions (e.g. unresolved calls to
|
58
|
+
'strcpy' will generate data so that the binary is linked against the libc
|
59
|
+
library at runtime).
|
60
|
+
|
61
|
+
The samples/{exe,pe,elf}encode.rb can take an asm source file as argument
|
62
|
+
and compile it to a working executable.
|
63
|
+
|
64
|
+
The CPU classes are responsible for parsing and encoding individual
|
65
|
+
instructions. The current Ia32 parser uses the Intel syntax (e.g. mov eax, 42).
|
66
|
+
The generic parser recognizes labels as a string at the beginning of a line
|
67
|
+
followed by a colon (e.g. 'some_label:'). GCC-style local labels may be used
|
68
|
+
(e.g. '1:', refered to using '1b' (backward) or '1f' (forward) ; may be
|
69
|
+
redefined as many times as needed.)
|
70
|
+
Data are specified using 'db'-style notation (e.g. 'dd 42h', 'db "blabla", 0')
|
71
|
+
See samples/asmsyntax.rb
|
72
|
+
|
73
|
+
|
74
|
+
EncodedData:
|
75
|
+
|
76
|
+
In Metasm all binary data is stored as an EncodedData.
|
77
|
+
EncodedData has 3 main attributes:
|
78
|
+
- #data which holds the raw binary data (generally a ruby String, but see
|
79
|
+
VirtualString)
|
80
|
+
- #export which is a hash associating an export name (label name) to an offset
|
81
|
+
within #data
|
82
|
+
- #reloc which is a hash whose keys are offsets within #data, and whose values
|
83
|
+
are Relocation objects.
|
84
|
+
A Relocation object has an endianness (:little/:big), a type (:u32 for unsigned
|
85
|
+
32bits) and a target (the intended value stored here).
|
86
|
+
The target is an arbitrary arithmetic/logic Expression.
|
87
|
+
|
88
|
+
EncodedData also has a #virtsize (for e.g. .bss sections), and a #ptr (internal
|
89
|
+
offset used when decoding things)
|
90
|
+
|
91
|
+
You can fixup an EncodedData, with a Hash variable name => value (value should
|
92
|
+
be an Expression or a numeric value). When you do that, each relocation's target
|
93
|
+
is bound using the binding, and if the result is calculable (no external variable
|
94
|
+
name used in the Expression), the result is encoded using the relocation's
|
95
|
+
size/sign/endianness information. If it overflows (try to store 128 in an 8bit
|
96
|
+
signed relocation), an EncodeError exception is raised. Use the :a32 type to
|
97
|
+
allow silent overflow truncating.
|
98
|
+
If the relocation's target is not numeric, the target is unchanged if you use
|
99
|
+
EncodedData#fixup, or it is replaced with the bound target with #fixup! .
|
100
|
+
|
101
|
+
|
102
|
+
Disassembly:
|
103
|
+
|
104
|
+
This code is found in the metasm/decode.rb source file, which defines the
|
105
|
+
Disassembler class.
|
106
|
+
|
107
|
+
The disassembler needs a decoded ExeFormat (to be able to say what data is at
|
108
|
+
which virtual address) and an entrypoint (a virtual address or export name).
|
109
|
+
It can then start to disassemble instructions. When it encounters an
|
110
|
+
Opcode marked as :setip, it asks the CPU for the jump destination (an
|
111
|
+
Expression that may involve register values, for e.g. jmp eax), and backtraces
|
112
|
+
instructions until it finds the numeric value.
|
113
|
+
|
114
|
+
On decoding, the Disassembler maintains a #decoded hash associating addresses
|
115
|
+
(expressions/integer #normalize()d) to DecodedInstructions.
|
116
|
+
|
117
|
+
The disassembly generates an InstructionBlock graph. Each block holds a list of
|
118
|
+
DecodedInstruction, and pointers to the next/previous block (by address).
|
119
|
+
|
120
|
+
The disassembler also traces data accesses by instructions, and stores Xrefs
|
121
|
+
for them.
|
122
|
+
The backtrace parameters can be tweaked, and the maximum depth to consider
|
123
|
+
can be specifically changed for :r/:w backtraces (instruction memory xrefs)
|
124
|
+
using #backtrace_maxblocks_data.
|
125
|
+
When an Expression is backtracked, each walked block is marked so that loops
|
126
|
+
are detected, and so that if a new code path is found to an existing block,
|
127
|
+
backtraces can be resumed using this new path.
|
128
|
+
|
129
|
+
The disassembler makes very few assumptions, and in particular does not
|
130
|
+
suppose that functions will return ; they will only if the backtrace of the
|
131
|
+
'ret' instructions is conclusive. This is quite powerful, but also implies
|
132
|
+
that any error in the backtracking process can lead to a full stop ; and also
|
133
|
+
means that the disassembler is quite slow.
|
134
|
+
|
135
|
+
The special method #disassemble_fast can be used to work around this when the
|
136
|
+
code is known to be well-formed (ie it assumes that all calls returns)
|
137
|
+
|
138
|
+
When a subfunction is found, a special DecodedFunction is created, which holds
|
139
|
+
a summary of the function's effects (like a DecodedInstruction on steroids).
|
140
|
+
This allows the backtracker to 'step over' subfunctions, which greatly improves
|
141
|
+
speed. The DecodedFunctions may be callback-based, to allow a very dynamic
|
142
|
+
behaviour.
|
143
|
+
External function calls create dedicated DecodedFunctions, which holds some
|
144
|
+
API information (e.g. stack fixup information, basic parameter accesses...)
|
145
|
+
This information may be derived from a C header parsed beforehand.
|
146
|
+
If no C function prototype is available, a special 'default' entry is used,
|
147
|
+
which assumes that the function has a standard ABI.
|
148
|
+
|
149
|
+
Ia32 implements a specific :default entry, which handles automatic stack fixup
|
150
|
+
resolution, by assuming that the last 'call' instruction returns. This may lead
|
151
|
+
to unexpected results ; for maximum accuracy a C header holding information for
|
152
|
+
all external functions is recommanded (see samples/factorize-headers-peimports
|
153
|
+
for a script to generate such a header from a full Visual Studio installation
|
154
|
+
and the target binary).
|
155
|
+
|
156
|
+
Ia32 also implements a specific GetProcAddress/dlsym callback, that will
|
157
|
+
yield the correct return value if the parameters can be backtraced.
|
158
|
+
|
159
|
+
The scripts implementing a full disassembler are samples/disassemble{-gui}.rb
|
160
|
+
See the comments for the GUI key bindings.
|
161
|
+
|
162
|
+
|
163
|
+
ExeFormat manipulation:
|
164
|
+
|
165
|
+
You can encode/decode an ExeFormat (ie decode sections, imports, headers etc)
|
166
|
+
|
167
|
+
Constructor: ExeFormat.decode_file(str), ExeFormat.decode_file_header(str)
|
168
|
+
Methods: ExeFormat#encode_file(filename), ExeFormat#encode_string
|
169
|
+
|
170
|
+
PE and ELF files have a LoadedPE/LoadedELF counterpart, that is able to work
|
171
|
+
with memory-mmaped versions of those formats (e.g. to debugging running
|
172
|
+
processes)
|
173
|
+
|
174
|
+
|
175
|
+
VirtualString:
|
176
|
+
|
177
|
+
A VirtualString is a String-like object: you can read and may rewrite slices of
|
178
|
+
it. It can be used as EncodedData#data, and thus allows virtualization
|
179
|
+
of most Metasm algorithms.
|
180
|
+
You cannot change a VirtualString length.
|
181
|
+
Taking a slice of a VirtualString will return either a String (for small sizes)
|
182
|
+
or another VirtualString (a 'window' into the other). You can force getting a
|
183
|
+
small VirtualString using the #dup(offset, length) method.
|
184
|
+
Any unimplemented method called on it is forwarded to a frozen String which is
|
185
|
+
a full copy of the VirtualString (should be avoided if possible, the underlying
|
186
|
+
string may be very big & slow to access).
|
187
|
+
|
188
|
+
There are currently 3 VirtualStrings implemented:
|
189
|
+
- VirtualFile, whichs loads a file by page-sized chunks on demand,
|
190
|
+
- WindowsRemoteString, which maps another process' virtual memory (uses the
|
191
|
+
windows debug api through WinDbgAPI)
|
192
|
+
- LinuxRemoteString, which maps another process' virtual memory (need ptrace
|
193
|
+
rights, memory reading is done using /proc/pid/mem)
|
194
|
+
|
195
|
+
The Win/Lin version are quite powerful, and allow things like live process
|
196
|
+
disassembly/patching easily (using LoadedPE/LoadedELF as ExeFormat)
|
197
|
+
|
198
|
+
|
199
|
+
Debugging:
|
200
|
+
|
201
|
+
Metasm includes a few interfaces to allow live debugging.
|
202
|
+
The WinOS and LinOS classes offer access to the underlying OS processes (e.g.
|
203
|
+
OS.current.find_process('foobar') will retrieve a running process with foobar
|
204
|
+
in its filename ; then process.mem can be used to access its memory.)
|
205
|
+
|
206
|
+
The Windows and Linux debugging APIs (x86 only) have a basic ruby interface
|
207
|
+
(PTrace32, extended in samples/rubstop.rb ; and WinDBG, a simple mapping of the
|
208
|
+
windows debugging API) ; those will be more worked on/integrated in the future.
|
209
|
+
|
210
|
+
A linux console debugging interface is available in samples/lindebug.rb ; it
|
211
|
+
uses a SoftICE-like look and feel.
|
212
|
+
This interface can talk to a gdb-server through samples/gdbclient.rb ; use
|
213
|
+
[udp:]<host:port> as target.
|
214
|
+
|
215
|
+
The disassembler scripts allow live process interaction by using as target
|
216
|
+
'live:<pid or part of filename>'.
|
217
|
+
|
218
|
+
A generic debugging interface is available, it is defined in metasm/os/main.rb
|
219
|
+
It may be accessed using the Metasm::OS.current.create_debugger('foo')
|
220
|
+
|
221
|
+
It can be viewed in action using the GUI and 'open live' target.
|
222
|
+
|
223
|
+
|
224
|
+
C Parser:
|
225
|
+
|
226
|
+
Metasm includes a hand-written C Parser.
|
227
|
+
It handles all the constructs i am aware of, except hex floats:
|
228
|
+
- static const L"bla"
|
229
|
+
- variable arguments
|
230
|
+
- incomplete types
|
231
|
+
- __attributes__(()), __declspec()
|
232
|
+
- #pragma once
|
233
|
+
- #pragma pack()
|
234
|
+
- C99 declarators - type bla = { [ 2 ... 14 ].toto = 28 };
|
235
|
+
- Nested functions
|
236
|
+
- __int8 etc native types
|
237
|
+
- Label addresses (&&label)
|
238
|
+
Also note that all those things are parsed, but most of them will fail to
|
239
|
+
compile on the Ia32 backend (the only one implemented so far.)
|
240
|
+
|
241
|
+
When you parse a C String using C::Parser.parse(text), you receive a Parser
|
242
|
+
object. It holds a #toplevel field, which is a C::Block, which holds #structs,
|
243
|
+
#symbols and #statements. The top-level functions are found in the #symbol hash
|
244
|
+
whose keys are the symbol names, associated to a C::Variable object holding
|
245
|
+
the functions. The function parameter/attributes are accessible through
|
246
|
+
func.type, and the code is in func.initializer, which is itself a C::Block.
|
247
|
+
Under it you'll find a tree-like structure of C::Statements (If, While, Asm,
|
248
|
+
CExpressions...)
|
249
|
+
|
250
|
+
A C::Parser may be #precompiled to transform it into a simplified version that
|
251
|
+
is easier to compile: typedefs are removed, control sequences are transformed
|
252
|
+
in if () goto ; etc.
|
253
|
+
|
254
|
+
To compile a C program, use PE/ELF.compile_c, that will create a C::Parser with
|
255
|
+
exe-specific macros defined (eg __PE__ or __ELF__).
|
256
|
+
|
257
|
+
The prefered way to create a C::Parser is to initialize it with a CPU and the
|
258
|
+
desired ExeFormat, so that it is
|
259
|
+
correctly initialized (eg type sizes: is long 4 or 8 bytes? etc) ; and
|
260
|
+
may define preprocessor macros needed to correctly parse standard headers.
|
261
|
+
Vendor-specific headers may need to use either #pragma prepare_visualstudio
|
262
|
+
(to parse the Microsoft Visual Studio headers) or prepare_gcc (for gcc), the
|
263
|
+
latter may be auto-detected (or may not).
|
264
|
+
Vendor headers tested are VS2003 (incl. DDK) and gcc4 ; ymmv.
|
265
|
+
|
266
|
+
Currently the CPU#compilation of a C code will generate an asm source (text),
|
267
|
+
which may then be parsed & assembled to binary code.
|
268
|
+
|
269
|
+
See ExeFormat#compile_c, and samples/exeencode.rb
|
270
|
+
|
data/TODO
ADDED
@@ -0,0 +1,114 @@
|
|
1
|
+
List of TODO items, by section, in random order
|
2
|
+
|
3
|
+
Ia32
|
4
|
+
emu fpu
|
5
|
+
add all sse2 instrs
|
6
|
+
realmode
|
7
|
+
|
8
|
+
X86_64
|
9
|
+
decompiler
|
10
|
+
|
11
|
+
CPU
|
12
|
+
Sparc
|
13
|
+
Cell
|
14
|
+
|
15
|
+
Parser
|
16
|
+
Allow single-file multiplexer (C code + Asm + asm16bit + ...)
|
17
|
+
Fix the asm prepro comment issue: '; a /* b\n c ; */' should see 'c'
|
18
|
+
|
19
|
+
Assembler
|
20
|
+
Handle cpu pseudo-instrs (mips 'li' -> lui high + ori low)
|
21
|
+
SplitReloc? (for pseudo-instrs)
|
22
|
+
Ia32 GAS syntax
|
23
|
+
Make the autoimport depend on the target platform and not on the exeformat
|
24
|
+
Encode FPU constants
|
25
|
+
|
26
|
+
Disasm
|
27
|
+
DecodedData
|
28
|
+
Exe decoding generate decodeddata ?
|
29
|
+
Function-local namespace (esp+12 -> esp+var_42)
|
30
|
+
Fix thunk detection (thunk: mov ecx, 42 jmp [iat_thiscall] is not a thunk)
|
31
|
+
Test with ET_REL style exe
|
32
|
+
Store stuff out of mem (to handle big binaries)
|
33
|
+
Better :default usage
|
34
|
+
good on call eax, but not on <600k instrs> ret
|
35
|
+
use binary personality ? (uses call vs uses pushret..)
|
36
|
+
Improve backtrace -> patch di.instr.args exprs
|
37
|
+
path-specific backtracking ( foo: call a ; a: jmp retloc ; bar: call b ; b: jmp retloc ; retloc: ret ; call foo ; ret : last ret trackback should only reach a:)
|
38
|
+
Decode pseudo/macro-instrs (mips 'li')
|
39
|
+
Deoptimizer (instr reordering for readability)
|
40
|
+
Optimizer (deobfuscating)
|
41
|
+
Per-instr context (allows to mix cell/ppc, x86 32/16bits, arm/armthumb..)
|
42
|
+
|
43
|
+
Compiler
|
44
|
+
Optimizer
|
45
|
+
Register allocator
|
46
|
+
Instr reordering
|
47
|
+
Asm intrinsics
|
48
|
+
Asm inline
|
49
|
+
inline functions
|
50
|
+
Separate partial compilation + linking (src1.c -> obj1.o, src2.c -> obj2.o, obj1.o+obj2.o -> bin)
|
51
|
+
Make generic compiler from cpu.instr_binding ?
|
52
|
+
create a cpu.what_instr_has_binding(:a => (:a + :b)) => 'add a, b' ?
|
53
|
+
Shellcode compiler (exit() => mov eax, 1 int 80h inline)
|
54
|
+
|
55
|
+
Decompiler
|
56
|
+
Fix decompiling on loaded savefile
|
57
|
+
Rewrite cpu-specific to really dumb
|
58
|
+
Just translate di.binding to C
|
59
|
+
maybe w/ trivial var dependency check for unused regs, but beware :incomplete instrs deps
|
60
|
+
Check interdependency ('xadd')
|
61
|
+
Move frame pointer checks / stack var detection to C code
|
62
|
+
Update asm listing from info in C (stack vars, stack var names..)
|
63
|
+
Handle renaming/retyping register vars / aliases
|
64
|
+
Handle switch() / computed goto
|
65
|
+
Fix inline asm reg dependencies
|
66
|
+
Handle direct syscalls (mov eax, 1 int 80h => exit())
|
67
|
+
Autodecode structs
|
68
|
+
FPU
|
69
|
+
Handle/hide compiler-generated stuff (getip, stack cookie setup/check..)
|
70
|
+
Handle call 1f ; 1: pop eax
|
71
|
+
More user control (force/forbid register arg, return type, etc)
|
72
|
+
|
73
|
+
Debugger
|
74
|
+
OSX
|
75
|
+
Detour-style functionnality to patch binary code (also static to patch exe files?)
|
76
|
+
Move constants in a data/ folder (ptrace reg numbers, syscalls, etc)
|
77
|
+
Generic remote process manip
|
78
|
+
create blank state
|
79
|
+
linux virtualallocex
|
80
|
+
pax-compatible code patch through mmap
|
81
|
+
Remote debugging (small standalone C client)
|
82
|
+
Support dbghelp.dll (ms symbol server info)
|
83
|
+
Support debugee function call (gdb 'call')
|
84
|
+
Manipulate memory through C struct casts
|
85
|
+
|
86
|
+
ExeFormat
|
87
|
+
Handle minor editing without decode/reencode (eg patch ELF entrypoint)
|
88
|
+
|
89
|
+
ELF
|
90
|
+
test encoding openbsd binaries
|
91
|
+
handle symbol versions
|
92
|
+
LoadedELF.dump
|
93
|
+
Check relocation encoding (eg samples/dynamic_ruby with cpu.generate_PIC=false)
|
94
|
+
|
95
|
+
MachO
|
96
|
+
|
97
|
+
PE
|
98
|
+
resource editor ?
|
99
|
+
rc compiler ?
|
100
|
+
add simple accessor for resource stuff (manifest, icon, ...)
|
101
|
+
|
102
|
+
GUI
|
103
|
+
debugger
|
104
|
+
specialize widgets
|
105
|
+
show breakpoints
|
106
|
+
show jump direction from current flag values
|
107
|
+
have a console frontend
|
108
|
+
better graph positionning fallback
|
109
|
+
zoom font when zooming graph
|
110
|
+
copy/paste, selection
|
111
|
+
map (part of) the binary & debug it (map a PE on a linux host & run it)
|
112
|
+
|
113
|
+
Ruby
|
114
|
+
compile ruby AST to native optimized code
|
@@ -0,0 +1,146 @@
|
|
1
|
+
Metasm source code organisation
|
2
|
+
===============================
|
3
|
+
|
4
|
+
The metasm source code takes advantage of the ruby language facilities,
|
5
|
+
which allows splitting the definition of a single class in multiple files.
|
6
|
+
|
7
|
+
Each file in the source tree holds code related to a particular feature of
|
8
|
+
the framework.
|
9
|
+
|
10
|
+
Directories
|
11
|
+
-----------
|
12
|
+
|
13
|
+
The top-level directories are :
|
14
|
+
|
15
|
+
* `doc/`: this documentation
|
16
|
+
* `metasm/`: the framework core
|
17
|
+
* `samples/`: a set of sample scripts showing various functionnalities of the framework
|
18
|
+
* `tests/`: a few unit tests (too few..)
|
19
|
+
* `misc/`: misc ruby scripts, not directly related to metasm
|
20
|
+
|
21
|
+
The core
|
22
|
+
--------
|
23
|
+
|
24
|
+
The `metasm/` directory holds most of the code of the framework, along with the
|
25
|
+
main `metasm.rb` file in the top directory.
|
26
|
+
|
27
|
+
The top-level `metasm.rb` has code to load parts of the framework source on demand
|
28
|
+
in the ruby interpreter, which is implemented with ruby's <const_missing.txt>
|
29
|
+
|
30
|
+
|
31
|
+
Executable formats
|
32
|
+
##################
|
33
|
+
|
34
|
+
The `exe_format/` subdirectory contains the implementations of the various
|
35
|
+
binary file formats supported in the framework.
|
36
|
+
|
37
|
+
Three files have a special meaning here:
|
38
|
+
|
39
|
+
* `main.rb`: it defines the <core/ExeFormat.txt> class
|
40
|
+
* `serialstruct.rb`: here you'll find the definitions of <core/SerialStruct.txt>
|
41
|
+
* `autoexe.rb`: the implementation of <core/AutoExe.txt>, which allows the recognition of arbitrary files from their binary signature.
|
42
|
+
|
43
|
+
The `main.rb` file is included in all other formats, as all file classes
|
44
|
+
are subclasses of `ExeFormat`.
|
45
|
+
|
46
|
+
The `serialstruct.rb` implements a helper class to ease the description of
|
47
|
+
binary structures, and generate parsing/encoding functions for those.
|
48
|
+
|
49
|
+
All other files implement a specific file format handler. The bigger files
|
50
|
+
(`ELF` and `PE/COFF`) are split between the parsing/encoding functions and
|
51
|
+
decoding/disassembly.
|
52
|
+
|
53
|
+
|
54
|
+
CPUs
|
55
|
+
####
|
56
|
+
|
57
|
+
All supported architectures have a dedicated subdirectory, and a helper file
|
58
|
+
that will simply include all the arch-specific files.
|
59
|
+
|
60
|
+
All those files will contribute to add functions to the same class implementing
|
61
|
+
the CPU interface. Not all CPUs implement all those features. They are:
|
62
|
+
|
63
|
+
* `main.rb`: inner classes definitions (for registers etc), generic functions
|
64
|
+
* `opcodes.rb`: initializes the opcode list for the architecture
|
65
|
+
* `encode.rb`: methods to encode instructions
|
66
|
+
* `decode.rb`: methods to decode/emulate instructions
|
67
|
+
* `parse.rb`: methods to parse asm instructions from a source file
|
68
|
+
* `render.rb`: methods to output an instruction to a string
|
69
|
+
* `compile_c.rb`: the C compiler implementation
|
70
|
+
* `decompile.rb`: the arch-specific part of the generic decompiler
|
71
|
+
* `debug.rb`: arch-specific information used when debugging target of this architecture
|
72
|
+
|
73
|
+
In some cases the files are small enough to be all merged into the `main.rb` file.
|
74
|
+
|
75
|
+
|
76
|
+
Operating systems
|
77
|
+
#################
|
78
|
+
|
79
|
+
The `os/` subdirectory holds the code used to abstract an operating systems.
|
80
|
+
|
81
|
+
The files here define an API allowing to enumerate running processes, and interact
|
82
|
+
with them in various ways. The <core/Debugger.txt> class and subclasses are
|
83
|
+
defined there.
|
84
|
+
|
85
|
+
Those files also holds the list of known functions and in which system libraries
|
86
|
+
they can be found (see <core/WindowsExports.txt> or <core/GNUExports.txt>), which
|
87
|
+
are used when linking executable files.
|
88
|
+
|
89
|
+
|
90
|
+
Graphical user-interface
|
91
|
+
########################
|
92
|
+
|
93
|
+
The `gui/` subdirectory contains the code needed by the metasm graphical user-interfaces.
|
94
|
+
|
95
|
+
Currently those include the disassembler and the debugger (see the *samples* section).
|
96
|
+
|
97
|
+
Those GUI elements are implemented using a custom GUI abstraction, and reside in the
|
98
|
+
various `dasm_*.rb` and `debug.rb`.
|
99
|
+
|
100
|
+
The actual implementation of the GUI are found in:
|
101
|
+
|
102
|
+
* `win32.rb`: the native Win32 API backend
|
103
|
+
* `gtk.rb`: a Gtk2 backend, intended for unix platforms
|
104
|
+
* `qt.rb`: a Qt backend experiment
|
105
|
+
|
106
|
+
Please note that the Qt backend does not work *at all*.
|
107
|
+
|
108
|
+
The `gui.rb` file in the main directory is used to chose among the available GUI backend
|
109
|
+
the most appropriate for the current session.
|
110
|
+
|
111
|
+
|
112
|
+
Others
|
113
|
+
######
|
114
|
+
|
115
|
+
The other files directly in the `metasm/` directory are either support files
|
116
|
+
(eg `encode.rb`, `parse.rb`) that hold generic functions to be used by
|
117
|
+
specific cpu/exeformat instances, or implement arch-agnostic features.
|
118
|
+
Those include:
|
119
|
+
|
120
|
+
* `preprocessor.rb`: the C/asm preprocessor/lexer
|
121
|
+
* `parse_c.rb`: this is the implementation of the C parser
|
122
|
+
* `compile_c.rb`: this is a C precompiler, it generates a very simplified C from a standard source
|
123
|
+
* `decompile.rb`: the generic decompiler code, it uses arch-specific functions defined in the arch folder
|
124
|
+
* `dynldr.rb`: this module is used when interacting directly with the host operating system through <core/DynLdr.txt>
|
125
|
+
|
126
|
+
|
127
|
+
The samples
|
128
|
+
-----------
|
129
|
+
|
130
|
+
The `samples/` directory contains a lot of small files that intend to be
|
131
|
+
exemples of how to use the framework. It also holds experiments and
|
132
|
+
work-in-progress for features that may later be integrated into the main
|
133
|
+
framework.
|
134
|
+
|
135
|
+
The comment at the beginning of the file should be clear about the purpose
|
136
|
+
of the script, and the scripts are expected to be copy/pasted and tweaked
|
137
|
+
for the specific task needed by the user (that's you).
|
138
|
+
|
139
|
+
Some of those files however are full-featured applications:
|
140
|
+
|
141
|
+
* `exeencode.rb`: a shellcode compiler, with its `peencode.rb`, `elfencode.rb`, `machoencode.rb` counterparts
|
142
|
+
* `disassemble.rb`: a disassembler
|
143
|
+
* `disassemble-gui.rb`: the graphical disassembler / debugger
|
144
|
+
|
145
|
+
The `samples/dasm-plugins/` subdirectory holds various plugins for the disassembler.
|
146
|
+
|