metasm 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (235) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +1 -0
  3. data/.hgtags +3 -0
  4. data/Gemfile +1 -0
  5. data/INSTALL +61 -0
  6. data/LICENCE +458 -0
  7. data/README +29 -21
  8. data/Rakefile +10 -0
  9. data/TODO +10 -12
  10. data/doc/code_organisation.txt +2 -0
  11. data/doc/core/DynLdr.txt +247 -0
  12. data/doc/core/ExeFormat.txt +43 -0
  13. data/doc/core/Expression.txt +220 -0
  14. data/doc/core/GNUExports.txt +27 -0
  15. data/doc/core/Ia32.txt +236 -0
  16. data/doc/core/SerialStruct.txt +108 -0
  17. data/doc/core/VirtualString.txt +145 -0
  18. data/doc/core/WindowsExports.txt +61 -0
  19. data/doc/core/index.txt +1 -0
  20. data/doc/style.css +6 -3
  21. data/doc/usage/debugger.txt +327 -0
  22. data/doc/usage/index.txt +1 -0
  23. data/doc/use_cases.txt +2 -2
  24. data/metasm.gemspec +22 -0
  25. data/{lib/metasm.rb → metasm.rb} +11 -3
  26. data/{lib/metasm → metasm}/compile_c.rb +13 -7
  27. data/metasm/cpu/arc.rb +8 -0
  28. data/metasm/cpu/arc/decode.rb +425 -0
  29. data/metasm/cpu/arc/main.rb +191 -0
  30. data/metasm/cpu/arc/opcodes.rb +588 -0
  31. data/{lib/metasm → metasm/cpu}/arm.rb +7 -5
  32. data/{lib/metasm → metasm/cpu}/arm/debug.rb +2 -2
  33. data/{lib/metasm → metasm/cpu}/arm/decode.rb +13 -12
  34. data/{lib/metasm → metasm/cpu}/arm/encode.rb +23 -8
  35. data/{lib/metasm → metasm/cpu}/arm/main.rb +0 -3
  36. data/metasm/cpu/arm/opcodes.rb +324 -0
  37. data/{lib/metasm → metasm/cpu}/arm/parse.rb +25 -13
  38. data/{lib/metasm → metasm/cpu}/arm/render.rb +2 -2
  39. data/metasm/cpu/arm64.rb +15 -0
  40. data/metasm/cpu/arm64/debug.rb +38 -0
  41. data/metasm/cpu/arm64/decode.rb +289 -0
  42. data/metasm/cpu/arm64/encode.rb +41 -0
  43. data/metasm/cpu/arm64/main.rb +105 -0
  44. data/metasm/cpu/arm64/opcodes.rb +232 -0
  45. data/metasm/cpu/arm64/parse.rb +20 -0
  46. data/metasm/cpu/arm64/render.rb +95 -0
  47. data/{lib/metasm/ppc.rb → metasm/cpu/bpf.rb} +2 -4
  48. data/metasm/cpu/bpf/decode.rb +142 -0
  49. data/metasm/cpu/bpf/main.rb +60 -0
  50. data/metasm/cpu/bpf/opcodes.rb +81 -0
  51. data/metasm/cpu/bpf/render.rb +41 -0
  52. data/metasm/cpu/cy16.rb +9 -0
  53. data/metasm/cpu/cy16/decode.rb +253 -0
  54. data/metasm/cpu/cy16/main.rb +63 -0
  55. data/metasm/cpu/cy16/opcodes.rb +78 -0
  56. data/metasm/cpu/cy16/render.rb +41 -0
  57. data/metasm/cpu/dalvik.rb +11 -0
  58. data/{lib/metasm → metasm/cpu}/dalvik/decode.rb +35 -13
  59. data/{lib/metasm → metasm/cpu}/dalvik/main.rb +51 -2
  60. data/{lib/metasm → metasm/cpu}/dalvik/opcodes.rb +19 -11
  61. data/metasm/cpu/ia32.rb +17 -0
  62. data/{lib/metasm → metasm/cpu}/ia32/compile_c.rb +5 -7
  63. data/{lib/metasm → metasm/cpu}/ia32/debug.rb +5 -5
  64. data/{lib/metasm → metasm/cpu}/ia32/decode.rb +246 -59
  65. data/{lib/metasm → metasm/cpu}/ia32/decompile.rb +7 -7
  66. data/{lib/metasm → metasm/cpu}/ia32/encode.rb +19 -13
  67. data/{lib/metasm → metasm/cpu}/ia32/main.rb +51 -8
  68. data/metasm/cpu/ia32/opcodes.rb +1424 -0
  69. data/{lib/metasm → metasm/cpu}/ia32/parse.rb +47 -16
  70. data/{lib/metasm → metasm/cpu}/ia32/render.rb +31 -4
  71. data/metasm/cpu/mips.rb +14 -0
  72. data/{lib/metasm → metasm/cpu}/mips/compile_c.rb +1 -1
  73. data/metasm/cpu/mips/debug.rb +42 -0
  74. data/{lib/metasm → metasm/cpu}/mips/decode.rb +46 -16
  75. data/{lib/metasm → metasm/cpu}/mips/encode.rb +4 -3
  76. data/{lib/metasm → metasm/cpu}/mips/main.rb +11 -4
  77. data/{lib/metasm → metasm/cpu}/mips/opcodes.rb +86 -17
  78. data/{lib/metasm → metasm/cpu}/mips/parse.rb +1 -1
  79. data/{lib/metasm → metasm/cpu}/mips/render.rb +1 -1
  80. data/{lib/metasm/dalvik.rb → metasm/cpu/msp430.rb} +1 -1
  81. data/metasm/cpu/msp430/decode.rb +247 -0
  82. data/metasm/cpu/msp430/main.rb +62 -0
  83. data/metasm/cpu/msp430/opcodes.rb +101 -0
  84. data/{lib/metasm → metasm/cpu}/pic16c/decode.rb +6 -7
  85. data/{lib/metasm → metasm/cpu}/pic16c/main.rb +0 -0
  86. data/{lib/metasm → metasm/cpu}/pic16c/opcodes.rb +1 -1
  87. data/{lib/metasm/mips.rb → metasm/cpu/ppc.rb} +4 -4
  88. data/{lib/metasm → metasm/cpu}/ppc/decode.rb +18 -12
  89. data/{lib/metasm → metasm/cpu}/ppc/decompile.rb +3 -3
  90. data/{lib/metasm → metasm/cpu}/ppc/encode.rb +2 -2
  91. data/{lib/metasm → metasm/cpu}/ppc/main.rb +17 -12
  92. data/{lib/metasm → metasm/cpu}/ppc/opcodes.rb +11 -5
  93. data/metasm/cpu/ppc/parse.rb +55 -0
  94. data/metasm/cpu/python.rb +8 -0
  95. data/metasm/cpu/python/decode.rb +136 -0
  96. data/metasm/cpu/python/main.rb +36 -0
  97. data/metasm/cpu/python/opcodes.rb +180 -0
  98. data/{lib/metasm → metasm/cpu}/sh4.rb +1 -1
  99. data/{lib/metasm → metasm/cpu}/sh4/decode.rb +48 -17
  100. data/{lib/metasm → metasm/cpu}/sh4/main.rb +13 -4
  101. data/{lib/metasm → metasm/cpu}/sh4/opcodes.rb +7 -8
  102. data/metasm/cpu/x86_64.rb +15 -0
  103. data/{lib/metasm → metasm/cpu}/x86_64/compile_c.rb +28 -17
  104. data/{lib/metasm → metasm/cpu}/x86_64/debug.rb +4 -4
  105. data/{lib/metasm → metasm/cpu}/x86_64/decode.rb +57 -15
  106. data/{lib/metasm → metasm/cpu}/x86_64/encode.rb +55 -26
  107. data/{lib/metasm → metasm/cpu}/x86_64/main.rb +14 -6
  108. data/metasm/cpu/x86_64/opcodes.rb +136 -0
  109. data/{lib/metasm → metasm/cpu}/x86_64/parse.rb +10 -2
  110. data/metasm/cpu/x86_64/render.rb +35 -0
  111. data/metasm/cpu/z80.rb +9 -0
  112. data/metasm/cpu/z80/decode.rb +313 -0
  113. data/metasm/cpu/z80/main.rb +67 -0
  114. data/metasm/cpu/z80/opcodes.rb +224 -0
  115. data/metasm/cpu/z80/render.rb +59 -0
  116. data/{lib/metasm/os/main.rb → metasm/debug.rb} +160 -401
  117. data/{lib/metasm → metasm}/decode.rb +35 -4
  118. data/{lib/metasm → metasm}/decompile.rb +15 -16
  119. data/{lib/metasm → metasm}/disassemble.rb +201 -45
  120. data/{lib/metasm → metasm}/disassemble_api.rb +651 -87
  121. data/{lib/metasm → metasm}/dynldr.rb +220 -133
  122. data/{lib/metasm → metasm}/encode.rb +10 -1
  123. data/{lib/metasm → metasm}/exe_format/a_out.rb +9 -6
  124. data/{lib/metasm → metasm}/exe_format/autoexe.rb +1 -0
  125. data/{lib/metasm → metasm}/exe_format/bflt.rb +57 -27
  126. data/{lib/metasm → metasm}/exe_format/coff.rb +11 -3
  127. data/{lib/metasm → metasm}/exe_format/coff_decode.rb +53 -20
  128. data/{lib/metasm → metasm}/exe_format/coff_encode.rb +11 -13
  129. data/{lib/metasm → metasm}/exe_format/dex.rb +13 -5
  130. data/{lib/metasm → metasm}/exe_format/dol.rb +1 -0
  131. data/{lib/metasm → metasm}/exe_format/elf.rb +93 -57
  132. data/{lib/metasm → metasm}/exe_format/elf_decode.rb +143 -34
  133. data/{lib/metasm → metasm}/exe_format/elf_encode.rb +122 -31
  134. data/metasm/exe_format/gb.rb +65 -0
  135. data/metasm/exe_format/javaclass.rb +424 -0
  136. data/{lib/metasm → metasm}/exe_format/macho.rb +204 -16
  137. data/{lib/metasm → metasm}/exe_format/main.rb +26 -3
  138. data/{lib/metasm → metasm}/exe_format/mz.rb +1 -0
  139. data/{lib/metasm → metasm}/exe_format/nds.rb +7 -4
  140. data/{lib/metasm → metasm}/exe_format/pe.rb +71 -8
  141. data/metasm/exe_format/pyc.rb +167 -0
  142. data/{lib/metasm → metasm}/exe_format/serialstruct.rb +67 -14
  143. data/{lib/metasm → metasm}/exe_format/shellcode.rb +7 -3
  144. data/metasm/exe_format/shellcode_rwx.rb +114 -0
  145. data/metasm/exe_format/swf.rb +205 -0
  146. data/{lib/metasm → metasm}/exe_format/xcoff.rb +7 -7
  147. data/metasm/exe_format/zip.rb +335 -0
  148. data/metasm/gui.rb +13 -0
  149. data/{lib/metasm → metasm}/gui/cstruct.rb +35 -41
  150. data/{lib/metasm → metasm}/gui/dasm_coverage.rb +11 -11
  151. data/{lib/metasm → metasm}/gui/dasm_decomp.rb +7 -20
  152. data/{lib/metasm → metasm}/gui/dasm_funcgraph.rb +0 -0
  153. data/metasm/gui/dasm_graph.rb +1695 -0
  154. data/{lib/metasm → metasm}/gui/dasm_hex.rb +12 -8
  155. data/{lib/metasm → metasm}/gui/dasm_listing.rb +43 -28
  156. data/{lib/metasm → metasm}/gui/dasm_main.rb +310 -53
  157. data/{lib/metasm → metasm}/gui/dasm_opcodes.rb +5 -19
  158. data/{lib/metasm → metasm}/gui/debug.rb +93 -27
  159. data/{lib/metasm → metasm}/gui/gtk.rb +162 -40
  160. data/{lib/metasm → metasm}/gui/qt.rb +12 -2
  161. data/{lib/metasm → metasm}/gui/win32.rb +179 -42
  162. data/{lib/metasm → metasm}/gui/x11.rb +59 -59
  163. data/{lib/metasm → metasm}/main.rb +389 -264
  164. data/{lib/metasm/os/remote.rb → metasm/os/gdbremote.rb} +146 -54
  165. data/{lib/metasm → metasm}/os/gnu_exports.rb +1 -1
  166. data/{lib/metasm → metasm}/os/linux.rb +628 -151
  167. data/metasm/os/main.rb +330 -0
  168. data/{lib/metasm → metasm}/os/windows.rb +132 -42
  169. data/{lib/metasm → metasm}/os/windows_exports.rb +141 -0
  170. data/{lib/metasm → metasm}/parse.rb +26 -24
  171. data/{lib/metasm → metasm}/parse_c.rb +221 -116
  172. data/{lib/metasm → metasm}/preprocessor.rb +55 -40
  173. data/{lib/metasm → metasm}/render.rb +14 -38
  174. data/misc/hexdump.rb +2 -1
  175. data/misc/lint.rb +58 -0
  176. data/misc/txt2html.rb +9 -7
  177. data/samples/bindiff.rb +3 -4
  178. data/samples/dasm-plugins/bindiff.rb +15 -0
  179. data/samples/dasm-plugins/bookmark.rb +133 -0
  180. data/samples/dasm-plugins/c_constants.rb +57 -0
  181. data/samples/dasm-plugins/colortheme_solarized.rb +125 -0
  182. data/samples/dasm-plugins/cppobj_funcall.rb +60 -0
  183. data/samples/dasm-plugins/dasm_all.rb +70 -0
  184. data/samples/dasm-plugins/demangle_cpp.rb +31 -0
  185. data/samples/dasm-plugins/deobfuscate.rb +251 -0
  186. data/samples/dasm-plugins/dump_text.rb +35 -0
  187. data/samples/dasm-plugins/export_graph_svg.rb +86 -0
  188. data/samples/dasm-plugins/findgadget.rb +75 -0
  189. data/samples/dasm-plugins/hl_opcode.rb +32 -0
  190. data/samples/dasm-plugins/hotfix_gtk_dbg.rb +19 -0
  191. data/samples/dasm-plugins/imm2off.rb +34 -0
  192. data/samples/dasm-plugins/match_libsigs.rb +93 -0
  193. data/samples/dasm-plugins/patch_file.rb +95 -0
  194. data/samples/dasm-plugins/scanfuncstart.rb +36 -0
  195. data/samples/dasm-plugins/scanxrefs.rb +26 -0
  196. data/samples/dasm-plugins/selfmodify.rb +197 -0
  197. data/samples/dasm-plugins/stringsxrefs.rb +28 -0
  198. data/samples/dasmnavig.rb +1 -1
  199. data/samples/dbg-apihook.rb +24 -9
  200. data/samples/dbg-plugins/heapscan.rb +283 -0
  201. data/samples/dbg-plugins/heapscan/compiled_heapscan_lin.c +155 -0
  202. data/samples/dbg-plugins/heapscan/compiled_heapscan_win.c +128 -0
  203. data/samples/dbg-plugins/heapscan/graphheap.rb +616 -0
  204. data/samples/dbg-plugins/heapscan/heapscan.rb +709 -0
  205. data/samples/dbg-plugins/heapscan/winheap.h +174 -0
  206. data/samples/dbg-plugins/heapscan/winheap7.h +307 -0
  207. data/samples/dbg-plugins/trace_func.rb +214 -0
  208. data/samples/disassemble-gui.rb +35 -5
  209. data/samples/disassemble.rb +31 -6
  210. data/samples/dump_upx.rb +24 -12
  211. data/samples/dynamic_ruby.rb +12 -3
  212. data/samples/exeencode.rb +6 -5
  213. data/samples/factorize-headers-peimports.rb +1 -1
  214. data/samples/lindebug.rb +175 -381
  215. data/samples/metasm-shell.rb +1 -2
  216. data/samples/peldr.rb +2 -2
  217. data/tests/all.rb +1 -1
  218. data/tests/arc.rb +26 -0
  219. data/tests/dynldr.rb +22 -4
  220. data/tests/expression.rb +55 -0
  221. data/tests/graph_layout.rb +285 -0
  222. data/tests/ia32.rb +79 -26
  223. data/tests/mips.rb +9 -2
  224. data/tests/x86_64.rb +66 -18
  225. metadata +330 -218
  226. data/lib/metasm/arm/opcodes.rb +0 -177
  227. data/lib/metasm/gui.rb +0 -23
  228. data/lib/metasm/gui/dasm_graph.rb +0 -1354
  229. data/lib/metasm/ia32.rb +0 -14
  230. data/lib/metasm/ia32/opcodes.rb +0 -873
  231. data/lib/metasm/ppc/parse.rb +0 -52
  232. data/lib/metasm/x86_64.rb +0 -12
  233. data/lib/metasm/x86_64/opcodes.rb +0 -118
  234. data/samples/gdbclient.rb +0 -583
  235. data/samples/rubstop.rb +0 -399
data/README CHANGED
@@ -21,6 +21,10 @@ Ready-to-use scripts can be found in the samples/ subdirectory, check the
21
21
  comments in the scripts headers. You can also try the --help argument if
22
22
  you're feeling lucky.
23
23
 
24
+ For more information, check the doc/ subdirectory. The text files can be
25
+ compiled to html using the misc/txt2html.rb script.
26
+
27
+
24
28
 
25
29
  Here is a short overview of the Metasm internals.
26
30
 
@@ -167,8 +171,8 @@ You can encode/decode an ExeFormat (ie decode sections, imports, headers etc)
167
171
  Constructor: ExeFormat.decode_file(str), ExeFormat.decode_file_header(str)
168
172
  Methods: ExeFormat#encode_file(filename), ExeFormat#encode_string
169
173
 
170
- PE and ELF files have a LoadedPE/LoadedELF counterpart, that is able to work
171
- with memory-mmaped versions of those formats (e.g. to debugging running
174
+ PE and ELF files have a LoadedPE/LoadedELF counterpart, that are able to work
175
+ with memory-mmaped versions of those formats (e.g. to debug running
172
176
  processes)
173
177
 
174
178
 
@@ -198,27 +202,31 @@ disassembly/patching easily (using LoadedPE/LoadedELF as ExeFormat)
198
202
 
199
203
  Debugging:
200
204
 
201
- Metasm includes a few interfaces to allow live debugging.
205
+ Metasm includes a few interfaces to handle debugging.
202
206
  The WinOS and LinOS classes offer access to the underlying OS processes (e.g.
203
207
  OS.current.find_process('foobar') will retrieve a running process with foobar
204
208
  in its filename ; then process.mem can be used to access its memory.)
205
209
 
206
- The Windows and Linux debugging APIs (x86 only) have a basic ruby interface
207
- (PTrace32, extended in samples/rubstop.rb ; and WinDBG, a simple mapping of the
208
- windows debugging API) ; those will be more worked on/integrated in the future.
210
+ The Windows and Linux low-level debugging APIs have a basic ruby interface
211
+ (PTrace and WinAPI) ; which are used by the unified high-end Debugger class.
212
+ Remote debugging is supported through the GDB server wire protocol.
209
213
 
210
- A linux console debugging interface is available in samples/lindebug.rb ; it
211
- uses a SoftICE-like look and feel.
212
- This interface can talk to a gdb-server through samples/gdbclient.rb ; use
213
- [udp:]<host:port> as target.
214
+ High-level debuggers can be created with the following ruby line:
215
+ Metasm::OS.current.create_debugger('foo')
214
216
 
215
- The disassembler scripts allow live process interaction by using as target
216
- 'live:<pid or part of filename>'.
217
+ Only one kind of host debugger class can exist at a time ; to debug multiple
218
+ processes, attach to other processes using the existing class. This is due
219
+ to the way the OS debugging API works on Windows and Linux.
217
220
 
218
- A generic debugging interface is available, it is defined in metasm/os/main.rb
219
- It may be accessed using the Metasm::OS.current.create_debugger('foo')
221
+ The low-level backends are defined in the os/ subdirectory, the front-end is
222
+ defined in debug.rb.
220
223
 
221
- It can be viewed in action using the GUI and 'open live' target.
224
+ A linux console debugging interface is available in samples/lindebug.rb ; it
225
+ uses a (simplified) SoftICE-like look and feel.
226
+ It can talk to a gdb-server socket ; use a [udp:]<host:port> target.
227
+
228
+ The disassembler-gui sample allow live process interaction when using as
229
+ target 'live:<pid or part of program name>'.
222
230
 
223
231
 
224
232
  C Parser:
@@ -236,7 +244,11 @@ It handles all the constructs i am aware of, except hex floats:
236
244
  - __int8 etc native types
237
245
  - Label addresses (&&label)
238
246
  Also note that all those things are parsed, but most of them will fail to
239
- compile on the Ia32 backend (the only one implemented so far.)
247
+ compile on the Ia32/X64 backend (the only one implemented so far.)
248
+
249
+ Parsing C files should be done using an existing ExeFormat, with the
250
+ parse_c_file method. This ensures that format-specific macros/ABI are correctly
251
+ defined (ex: size of the 'long' type, ABI to pass parameters to functions, etc)
240
252
 
241
253
  When you parse a C String using C::Parser.parse(text), you receive a Parser
242
254
  object. It holds a #toplevel field, which is a C::Block, which holds #structs,
@@ -249,15 +261,11 @@ CExpressions...)
249
261
 
250
262
  A C::Parser may be #precompiled to transform it into a simplified version that
251
263
  is easier to compile: typedefs are removed, control sequences are transformed
252
- in if () goto ; etc.
264
+ into 'if (XX) goto YY;' etc.
253
265
 
254
266
  To compile a C program, use PE/ELF.compile_c, that will create a C::Parser with
255
267
  exe-specific macros defined (eg __PE__ or __ELF__).
256
268
 
257
- The prefered way to create a C::Parser is to initialize it with a CPU and the
258
- desired ExeFormat, so that it is
259
- correctly initialized (eg type sizes: is long 4 or 8 bytes? etc) ; and
260
- may define preprocessor macros needed to correctly parse standard headers.
261
269
  Vendor-specific headers may need to use either #pragma prepare_visualstudio
262
270
  (to parse the Microsoft Visual Studio headers) or prepare_gcc (for gcc), the
263
271
  latter may be auto-detected (or may not).
@@ -0,0 +1,10 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ require "rake/testtask"
4
+
5
+ Rake::TestTask.new do |t|
6
+ t.test_files = FileList['tests/*.rb']
7
+ end
8
+
9
+ task default: :test
10
+
data/TODO CHANGED
@@ -1,14 +1,13 @@
1
1
  List of TODO items, by section, in random order
2
2
 
3
3
  Ia32
4
- emu fpu
5
- add all sse2 instrs
6
4
  realmode
7
5
 
8
6
  X86_64
9
7
  decompiler
10
8
 
11
9
  CPU
10
+ Arm
12
11
  Sparc
13
12
  Cell
14
13
 
@@ -26,19 +25,20 @@ Assembler
26
25
  Disasm
27
26
  DecodedData
28
27
  Exe decoding generate decodeddata ?
29
- Function-local namespace (esp+12 -> esp+var_42)
30
28
  Fix thunk detection (thunk: mov ecx, 42 jmp [iat_thiscall] is not a thunk)
31
29
  Test with ET_REL style exe
32
30
  Store stuff out of mem (to handle big binaries)
33
31
  Better :default usage
34
32
  good on call eax, but not on <600k instrs> ret
35
33
  use binary personality ? (uses call vs uses pushret..)
36
- Improve backtrace -> patch di.instr.args exprs
34
+ Improve 'backtrace => patch di.instr.args'
37
35
  path-specific backtracking ( foo: call a ; a: jmp retloc ; bar: call b ; b: jmp retloc ; retloc: ret ; call foo ; ret : last ret trackback should only reach a:)
38
36
  Decode pseudo/macro-instrs (mips 'li')
39
37
  Deoptimizer (instr reordering for readability)
40
38
  Optimizer (deobfuscating)
41
39
  Per-instr context (allows to mix cell/ppc, x86 32/16bits, arm/armthumb..)
40
+ Better save/load dasm state
41
+ Parse symbol.map generated by IDA for ELF files
42
42
 
43
43
  Compiler
44
44
  Optimizer
@@ -69,6 +69,7 @@ Decompiler
69
69
  Handle/hide compiler-generated stuff (getip, stack cookie setup/check..)
70
70
  Handle call 1f ; 1: pop eax
71
71
  More user control (force/forbid register arg, return type, etc)
72
+ Preserve C decompiled line association to range of asm decoded addrs
72
73
 
73
74
  Debugger
74
75
  OSX
@@ -77,14 +78,9 @@ Debugger
77
78
  Generic remote process manip
78
79
  create blank state
79
80
  linux virtualallocex
80
- pax-compatible code patch through mmap
81
81
  Remote debugging (small standalone C client)
82
82
  Support dbghelp.dll (ms symbol server info)
83
83
  Support debugee function call (gdb 'call')
84
- Manipulate memory through C struct casts
85
-
86
- ExeFormat
87
- Handle minor editing without decode/reencode (eg patch ELF entrypoint)
88
84
 
89
85
  ELF
90
86
  test encoding openbsd binaries
@@ -98,6 +94,7 @@ PE
98
94
  resource editor ?
99
95
  rc compiler ?
100
96
  add simple accessor for resource stuff (manifest, icon, ...)
97
+ parse PDB
101
98
 
102
99
  GUI
103
100
  debugger
@@ -105,10 +102,11 @@ GUI
105
102
  show breakpoints
106
103
  show jump direction from current flag values
107
104
  have a console frontend
108
- better graph positionning fallback
109
105
  zoom font when zooming graph
110
- copy/paste, selection
106
+ text selection
107
+ copy/paste
111
108
  map (part of) the binary & debug it (map a PE on a linux host & run it)
109
+ html frontend
112
110
 
113
111
  Ruby
114
- compile ruby AST to native optimized code
112
+ write a fast ruby-like interpreter
@@ -54,6 +54,8 @@ decoding/disassembly.
54
54
  CPUs
55
55
  ####
56
56
 
57
+ The cpu-specific code is stored inside the `cpu/` subdirectory.
58
+
57
59
  All supported architectures have a dedicated subdirectory, and a helper file
58
60
  that will simply include all the arch-specific files.
59
61
 
@@ -0,0 +1,247 @@
1
+ DynLdr
2
+ ======
3
+
4
+ DynLdr is a class that uses metasm to dynamically add native methods,
5
+ or native method wrappers, available to the running ruby interpreter.
6
+
7
+ It leverages the built-in C parser / compiler.
8
+
9
+ It is implemented in `metasm/dynldr.rb`.
10
+
11
+ Currently only supported for <core/Ia32.txt> and <core/X86_64.txt> under
12
+ Windows and Linux.
13
+
14
+
15
+ Basics
16
+ ------
17
+
18
+ Native library wrapper
19
+ ######################
20
+
21
+ The main usage is to generate interfaces to native libraries.
22
+
23
+ This is done through the `#new_api_c` method.
24
+
25
+ The following exemple will read the specified C header fragment,
26
+ define ruby constants for all `#define`/`enum`, and define ruby
27
+ method wrappers to call the native functions whose prototype is
28
+ present in the header.
29
+
30
+ All referenced native functions must be exported by the given
31
+ library file.
32
+
33
+ class MyInterface < DynLdr
34
+ c_header = <<EOS
35
+ #define SomeConst 42
36
+ enum { V1, V2 };
37
+
38
+ __stdcall int methodist(char*, int);
39
+ EOS
40
+
41
+ new_api_c c_header, 'mylib.dll'
42
+ end
43
+
44
+ Then you can call, from the ruby:
45
+
46
+ MyInterface.methodist("lol", MyInterface::SOMECONST)
47
+
48
+ Constant/enum names are converted to full uppercase, and method
49
+ names are converted to full lowercase.
50
+
51
+ Dynamic native inline function
52
+ ##############################
53
+
54
+ You can also dynamically compile native functions, that are compiled
55
+ in memory and copied to RWX memory with the right ruby wrapper:
56
+
57
+ class MyInterface < DynLdr
58
+ new_func_c <<EOS
59
+ int bla(char*arg) {
60
+ if (strlen(arg) > 4)
61
+ return 1;
62
+ else
63
+ return 0;
64
+ }
65
+ EOS
66
+ end
67
+
68
+ References to external functions are allowed, and resolved automatically.
69
+
70
+ The ruby objects used as arguments to the wrapper method are
71
+ automatically converted to the right C type.
72
+
73
+
74
+ You can also write native functions in assembly, but you must specify a
75
+ C prototype, used for argument and return value conversion.
76
+
77
+ class MyInterface < DynLdr
78
+ new_func_asm "int increment(int i);", <<EOS
79
+ mov eax, [esp+4]
80
+ inc eax
81
+ ret
82
+ EOS
83
+
84
+ p increment(4)
85
+
86
+ end
87
+
88
+
89
+ Structures
90
+ ----------
91
+
92
+ `DynLdr` handles C structures.
93
+
94
+ Once a structure is specified in the C part, you can create a ruby object
95
+ using `MyClass.alloc_c_struct(structname)`, which will allocate an object of the
96
+ right size to hold all the structure members, and with the right accessors.
97
+
98
+ To access/modify struct members, you can either use a `Hash`-style access
99
+
100
+ structobj['membername'] = 42
101
+
102
+ or `Struct`-style access
103
+
104
+ structobj.membername = 42
105
+
106
+ Member names are matched case-insensitively, and nested structures/unions
107
+ are also searched.
108
+
109
+ The struct members can be initially populated by passing a `Hash` argument
110
+ to the `alloc_c_struct` constructor. Additionally, this hash may use the
111
+ special value `:size` to reference the byte size of the current structure.
112
+
113
+ class MyInterface < DynLdr
114
+ new_api_c <<EOS
115
+ struct sname {
116
+ int s_mysize;
117
+ int s_value;
118
+ union {
119
+ struct {
120
+ int s_bits:4;
121
+ int s_bits2:4;
122
+ };
123
+ int s_union;
124
+ }
125
+ };
126
+ EOS
127
+ end
128
+
129
+ # field s_mysize holds the size of the structure in bytes, ie 12
130
+ s_obj = MyInterface.alloc_c_struct('sname', :s_mysize => :size, :s_value => 42)
131
+
132
+ # we can access fields using Hash-style access
133
+ s_obj['s_UniOn'] = 0xa8
134
+
135
+ # or Struct-style access
136
+ puts '0x%x' % s_obj.s_BiTS2 # => '0xa'
137
+
138
+ This object can be directly passed as argument to a wrapped function, and
139
+ the native function will receive a pointer to this structure (that it can
140
+ freely modify).
141
+
142
+ This object is a `C::AllocStruct`, defined in `metasm/parse_c.rb`.
143
+ Internally, it is based on a ruby `String`, and has a reference to the parser's
144
+ `Struct` to find the mapping membername -> offsets/length.
145
+
146
+ See <core/CParser.txt> for more details.
147
+
148
+
149
+ Callbacks
150
+ ---------
151
+
152
+ `DynLdr` handles C callbacks, with arbitrary ABI.
153
+
154
+ Any number of callbacks can be defined at any time.
155
+
156
+ C callbacks are backed by a ruby `Proc`, eg `lambda {}`.
157
+
158
+
159
+ class MyInterface < DynLdr
160
+ new_api_c <<EOS
161
+ void qsort(void *, int, int, int(*)(void*, void*));
162
+ EOS
163
+
164
+ str = "sanotheusnaonetuh"
165
+ cmp = lambda { |p1, p2|
166
+ memory_read(p1, 1) <=> memory_read(p2, 1)
167
+ }
168
+ qsort(str, str.length, 1, cmp)
169
+ p str
170
+ end
171
+
172
+
173
+
174
+ Argument conversion
175
+ -------------------
176
+
177
+ Ruby objects passed to a wrapper method are converted to the corresponding
178
+ C type
179
+
180
+ * `Strings` are converted to a C pointer to the byte buffer (also directly
181
+ accessible from the ruby through `DynLdr.str_ptr(obj)`
182
+ * `Integers` are converted to their C equivalent, according to the prototype
183
+ (`char`, `unsigned long long`, ...)
184
+ * `Procs` are converted to a C callback
185
+ * `Floats` are not supported for now.
186
+
187
+
188
+ Working with memory
189
+ -------------------
190
+
191
+ DynLdr provides different ways to allocate memory.
192
+
193
+ * `alloc_c_struct` to allocate a C structure
194
+ * `alloc_c_ary` to allocate C array of some type
195
+ * `alloc_c_ptr`, which is just an ary of size 1
196
+ * `memory_alloc` allocates memory from a new memory page
197
+
198
+ `memory_alloc` works by calling `mmap` under linux and `VirtualAlloc` under windows,
199
+ and is suitable for allocating memory where you want to control
200
+ the memory permissions (read, write, execute). This is done through `memory_perm`.
201
+
202
+ `memory_perm` takes for argument the start address, the length, and the new permission, specified as a String (e.g. 'r', 'rwx')
203
+
204
+ To work with memory that may be returned by an API (e.g. `malloc`),
205
+ DynLdr provides ways to read and write arbitrary pointers from the ruby
206
+ interpreter memory.
207
+ Take care, those may generate faults when called with invalid addresses that
208
+ will crash the ruby interpreter.
209
+
210
+ * `memory_read` takes a pointer and a length, and returns a String
211
+ * `memory_read_int` takes a pointer, and returns an Integer (of pointer size,
212
+ e.g. 64 bit in a 64-bit interpreter)
213
+ * `memory_write` takes a pointer and a String, and writes it to memory
214
+ * `memory_write_int`
215
+
216
+
217
+ Hacking
218
+ -------
219
+
220
+ Internally, DynLdr relies on a number of features that are not directly
221
+ available from the ruby interpreter.
222
+
223
+ So the first thing done by the script is to generate a binary native module
224
+ that will act as a C extension to the ruby interpreter.
225
+ This binary is necessarily different depending on the interpreter.
226
+ The binary name includes the target architecture, in the format
227
+ dynldr-*arch*-*cpu*-*19*.so, e.g.
228
+
229
+ * dynldr-linux-ia32.so
230
+ * dynldr-windows-x64-19.so
231
+
232
+ This native module is (re)generated if it does not exist, or is older than the
233
+ `dynldr.rb` script.
234
+
235
+ A special trick is used in this module, as it does not know the actual name
236
+ of the ruby library used by the interpreter. So on linux, the `libruby` is
237
+ removed from the `DT_NEEDED` library list, and on windows a special stub
238
+ is assembled to manually resolve the ruby imports needed by the module from
239
+ any instance of `libruby` present in the running process.
240
+
241
+ The native file is written to a directory writeably by the current user.
242
+ The following list of directories are tried, until a suitable one is found:
243
+
244
+ * the `metasm` directory itself
245
+ * the `$HOME`/`$APPDATA`/`$USERPROFILE` directory
246
+ * the `$TMP`/`$TEMP`/current directory
247
+
@@ -0,0 +1,43 @@
1
+ ExeFormat
2
+ =========
3
+
4
+ This class is the parent of all executable format handlers.
5
+
6
+ It is defined in `metasm/exe_format/main.rb`.
7
+
8
+ It defines some standard shortcut functions, such as:
9
+
10
+ * `Exe.decode_file(filename)`
11
+ * `Exe.assemble(cpu,asm_source)`
12
+ * `Exe.compile_c(cpu,c_source)`
13
+ * `Exe#encode_file(filename)`
14
+
15
+ These methods will instanciate a new Exe, and call the corresponding
16
+ methods, *e.g.* `load` with the file content, and `decode`.
17
+
18
+ The handling of the different structures in the binary format should be
19
+ done using the <core/SerialStruct.txt> facility.
20
+
21
+ The subclasses are expected to implement various functions, depending on the
22
+ usage (refer to the ELF and COFF implementations for more details):
23
+
24
+ File decoding/disassembly
25
+ -------------------------
26
+
27
+ * `#decode_header`: parse the raw data in `#encoded` only to parse the file header
28
+ * `#decode`: parse all the raw data in `#encoded`
29
+ * `#cpu_from_headers`: return a <core/CPU.txt> instance according to the exe header information
30
+ * `#get_default_entrypoints`: the list of entrypoints (exported functions, etc)
31
+ * `#dump_section_header`: return a string that may be assembled to recreate the specified section
32
+ * `#section_info`: return a list of generic section informations for the disassembler
33
+
34
+
35
+ File encoding/source parsing
36
+ ----------------------------
37
+
38
+ * `#tune_prepro`: define exe-specific macros for the preprocessor (optional)
39
+ * `#parse_init`: initialize the `@cursource` array to receive the parsed asm source
40
+ * `#parse_parser_instruction`: parse exe-specific instructions, eg `.text`, `.import`...
41
+ * `#assemble`: assemble the content of the @cursource into binary section contents
42
+ * `#encode`: assemble the various sections and a binary header into `@encoded`
43
+