metasm 1.0.1 → 1.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (235) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +1 -0
  3. data/.hgtags +3 -0
  4. data/Gemfile +1 -0
  5. data/INSTALL +61 -0
  6. data/LICENCE +458 -0
  7. data/README +29 -21
  8. data/Rakefile +10 -0
  9. data/TODO +10 -12
  10. data/doc/code_organisation.txt +2 -0
  11. data/doc/core/DynLdr.txt +247 -0
  12. data/doc/core/ExeFormat.txt +43 -0
  13. data/doc/core/Expression.txt +220 -0
  14. data/doc/core/GNUExports.txt +27 -0
  15. data/doc/core/Ia32.txt +236 -0
  16. data/doc/core/SerialStruct.txt +108 -0
  17. data/doc/core/VirtualString.txt +145 -0
  18. data/doc/core/WindowsExports.txt +61 -0
  19. data/doc/core/index.txt +1 -0
  20. data/doc/style.css +6 -3
  21. data/doc/usage/debugger.txt +327 -0
  22. data/doc/usage/index.txt +1 -0
  23. data/doc/use_cases.txt +2 -2
  24. data/metasm.gemspec +22 -0
  25. data/{lib/metasm.rb → metasm.rb} +11 -3
  26. data/{lib/metasm → metasm}/compile_c.rb +13 -7
  27. data/metasm/cpu/arc.rb +8 -0
  28. data/metasm/cpu/arc/decode.rb +425 -0
  29. data/metasm/cpu/arc/main.rb +191 -0
  30. data/metasm/cpu/arc/opcodes.rb +588 -0
  31. data/{lib/metasm → metasm/cpu}/arm.rb +7 -5
  32. data/{lib/metasm → metasm/cpu}/arm/debug.rb +2 -2
  33. data/{lib/metasm → metasm/cpu}/arm/decode.rb +13 -12
  34. data/{lib/metasm → metasm/cpu}/arm/encode.rb +23 -8
  35. data/{lib/metasm → metasm/cpu}/arm/main.rb +0 -3
  36. data/metasm/cpu/arm/opcodes.rb +324 -0
  37. data/{lib/metasm → metasm/cpu}/arm/parse.rb +25 -13
  38. data/{lib/metasm → metasm/cpu}/arm/render.rb +2 -2
  39. data/metasm/cpu/arm64.rb +15 -0
  40. data/metasm/cpu/arm64/debug.rb +38 -0
  41. data/metasm/cpu/arm64/decode.rb +289 -0
  42. data/metasm/cpu/arm64/encode.rb +41 -0
  43. data/metasm/cpu/arm64/main.rb +105 -0
  44. data/metasm/cpu/arm64/opcodes.rb +232 -0
  45. data/metasm/cpu/arm64/parse.rb +20 -0
  46. data/metasm/cpu/arm64/render.rb +95 -0
  47. data/{lib/metasm/ppc.rb → metasm/cpu/bpf.rb} +2 -4
  48. data/metasm/cpu/bpf/decode.rb +142 -0
  49. data/metasm/cpu/bpf/main.rb +60 -0
  50. data/metasm/cpu/bpf/opcodes.rb +81 -0
  51. data/metasm/cpu/bpf/render.rb +41 -0
  52. data/metasm/cpu/cy16.rb +9 -0
  53. data/metasm/cpu/cy16/decode.rb +253 -0
  54. data/metasm/cpu/cy16/main.rb +63 -0
  55. data/metasm/cpu/cy16/opcodes.rb +78 -0
  56. data/metasm/cpu/cy16/render.rb +41 -0
  57. data/metasm/cpu/dalvik.rb +11 -0
  58. data/{lib/metasm → metasm/cpu}/dalvik/decode.rb +35 -13
  59. data/{lib/metasm → metasm/cpu}/dalvik/main.rb +51 -2
  60. data/{lib/metasm → metasm/cpu}/dalvik/opcodes.rb +19 -11
  61. data/metasm/cpu/ia32.rb +17 -0
  62. data/{lib/metasm → metasm/cpu}/ia32/compile_c.rb +5 -7
  63. data/{lib/metasm → metasm/cpu}/ia32/debug.rb +5 -5
  64. data/{lib/metasm → metasm/cpu}/ia32/decode.rb +246 -59
  65. data/{lib/metasm → metasm/cpu}/ia32/decompile.rb +7 -7
  66. data/{lib/metasm → metasm/cpu}/ia32/encode.rb +19 -13
  67. data/{lib/metasm → metasm/cpu}/ia32/main.rb +51 -8
  68. data/metasm/cpu/ia32/opcodes.rb +1424 -0
  69. data/{lib/metasm → metasm/cpu}/ia32/parse.rb +47 -16
  70. data/{lib/metasm → metasm/cpu}/ia32/render.rb +31 -4
  71. data/metasm/cpu/mips.rb +14 -0
  72. data/{lib/metasm → metasm/cpu}/mips/compile_c.rb +1 -1
  73. data/metasm/cpu/mips/debug.rb +42 -0
  74. data/{lib/metasm → metasm/cpu}/mips/decode.rb +46 -16
  75. data/{lib/metasm → metasm/cpu}/mips/encode.rb +4 -3
  76. data/{lib/metasm → metasm/cpu}/mips/main.rb +11 -4
  77. data/{lib/metasm → metasm/cpu}/mips/opcodes.rb +86 -17
  78. data/{lib/metasm → metasm/cpu}/mips/parse.rb +1 -1
  79. data/{lib/metasm → metasm/cpu}/mips/render.rb +1 -1
  80. data/{lib/metasm/dalvik.rb → metasm/cpu/msp430.rb} +1 -1
  81. data/metasm/cpu/msp430/decode.rb +247 -0
  82. data/metasm/cpu/msp430/main.rb +62 -0
  83. data/metasm/cpu/msp430/opcodes.rb +101 -0
  84. data/{lib/metasm → metasm/cpu}/pic16c/decode.rb +6 -7
  85. data/{lib/metasm → metasm/cpu}/pic16c/main.rb +0 -0
  86. data/{lib/metasm → metasm/cpu}/pic16c/opcodes.rb +1 -1
  87. data/{lib/metasm/mips.rb → metasm/cpu/ppc.rb} +4 -4
  88. data/{lib/metasm → metasm/cpu}/ppc/decode.rb +18 -12
  89. data/{lib/metasm → metasm/cpu}/ppc/decompile.rb +3 -3
  90. data/{lib/metasm → metasm/cpu}/ppc/encode.rb +2 -2
  91. data/{lib/metasm → metasm/cpu}/ppc/main.rb +17 -12
  92. data/{lib/metasm → metasm/cpu}/ppc/opcodes.rb +11 -5
  93. data/metasm/cpu/ppc/parse.rb +55 -0
  94. data/metasm/cpu/python.rb +8 -0
  95. data/metasm/cpu/python/decode.rb +136 -0
  96. data/metasm/cpu/python/main.rb +36 -0
  97. data/metasm/cpu/python/opcodes.rb +180 -0
  98. data/{lib/metasm → metasm/cpu}/sh4.rb +1 -1
  99. data/{lib/metasm → metasm/cpu}/sh4/decode.rb +48 -17
  100. data/{lib/metasm → metasm/cpu}/sh4/main.rb +13 -4
  101. data/{lib/metasm → metasm/cpu}/sh4/opcodes.rb +7 -8
  102. data/metasm/cpu/x86_64.rb +15 -0
  103. data/{lib/metasm → metasm/cpu}/x86_64/compile_c.rb +28 -17
  104. data/{lib/metasm → metasm/cpu}/x86_64/debug.rb +4 -4
  105. data/{lib/metasm → metasm/cpu}/x86_64/decode.rb +57 -15
  106. data/{lib/metasm → metasm/cpu}/x86_64/encode.rb +55 -26
  107. data/{lib/metasm → metasm/cpu}/x86_64/main.rb +14 -6
  108. data/metasm/cpu/x86_64/opcodes.rb +136 -0
  109. data/{lib/metasm → metasm/cpu}/x86_64/parse.rb +10 -2
  110. data/metasm/cpu/x86_64/render.rb +35 -0
  111. data/metasm/cpu/z80.rb +9 -0
  112. data/metasm/cpu/z80/decode.rb +313 -0
  113. data/metasm/cpu/z80/main.rb +67 -0
  114. data/metasm/cpu/z80/opcodes.rb +224 -0
  115. data/metasm/cpu/z80/render.rb +59 -0
  116. data/{lib/metasm/os/main.rb → metasm/debug.rb} +160 -401
  117. data/{lib/metasm → metasm}/decode.rb +35 -4
  118. data/{lib/metasm → metasm}/decompile.rb +15 -16
  119. data/{lib/metasm → metasm}/disassemble.rb +201 -45
  120. data/{lib/metasm → metasm}/disassemble_api.rb +651 -87
  121. data/{lib/metasm → metasm}/dynldr.rb +220 -133
  122. data/{lib/metasm → metasm}/encode.rb +10 -1
  123. data/{lib/metasm → metasm}/exe_format/a_out.rb +9 -6
  124. data/{lib/metasm → metasm}/exe_format/autoexe.rb +1 -0
  125. data/{lib/metasm → metasm}/exe_format/bflt.rb +57 -27
  126. data/{lib/metasm → metasm}/exe_format/coff.rb +11 -3
  127. data/{lib/metasm → metasm}/exe_format/coff_decode.rb +53 -20
  128. data/{lib/metasm → metasm}/exe_format/coff_encode.rb +11 -13
  129. data/{lib/metasm → metasm}/exe_format/dex.rb +13 -5
  130. data/{lib/metasm → metasm}/exe_format/dol.rb +1 -0
  131. data/{lib/metasm → metasm}/exe_format/elf.rb +93 -57
  132. data/{lib/metasm → metasm}/exe_format/elf_decode.rb +143 -34
  133. data/{lib/metasm → metasm}/exe_format/elf_encode.rb +122 -31
  134. data/metasm/exe_format/gb.rb +65 -0
  135. data/metasm/exe_format/javaclass.rb +424 -0
  136. data/{lib/metasm → metasm}/exe_format/macho.rb +204 -16
  137. data/{lib/metasm → metasm}/exe_format/main.rb +26 -3
  138. data/{lib/metasm → metasm}/exe_format/mz.rb +1 -0
  139. data/{lib/metasm → metasm}/exe_format/nds.rb +7 -4
  140. data/{lib/metasm → metasm}/exe_format/pe.rb +71 -8
  141. data/metasm/exe_format/pyc.rb +167 -0
  142. data/{lib/metasm → metasm}/exe_format/serialstruct.rb +67 -14
  143. data/{lib/metasm → metasm}/exe_format/shellcode.rb +7 -3
  144. data/metasm/exe_format/shellcode_rwx.rb +114 -0
  145. data/metasm/exe_format/swf.rb +205 -0
  146. data/{lib/metasm → metasm}/exe_format/xcoff.rb +7 -7
  147. data/metasm/exe_format/zip.rb +335 -0
  148. data/metasm/gui.rb +13 -0
  149. data/{lib/metasm → metasm}/gui/cstruct.rb +35 -41
  150. data/{lib/metasm → metasm}/gui/dasm_coverage.rb +11 -11
  151. data/{lib/metasm → metasm}/gui/dasm_decomp.rb +7 -20
  152. data/{lib/metasm → metasm}/gui/dasm_funcgraph.rb +0 -0
  153. data/metasm/gui/dasm_graph.rb +1695 -0
  154. data/{lib/metasm → metasm}/gui/dasm_hex.rb +12 -8
  155. data/{lib/metasm → metasm}/gui/dasm_listing.rb +43 -28
  156. data/{lib/metasm → metasm}/gui/dasm_main.rb +310 -53
  157. data/{lib/metasm → metasm}/gui/dasm_opcodes.rb +5 -19
  158. data/{lib/metasm → metasm}/gui/debug.rb +93 -27
  159. data/{lib/metasm → metasm}/gui/gtk.rb +162 -40
  160. data/{lib/metasm → metasm}/gui/qt.rb +12 -2
  161. data/{lib/metasm → metasm}/gui/win32.rb +179 -42
  162. data/{lib/metasm → metasm}/gui/x11.rb +59 -59
  163. data/{lib/metasm → metasm}/main.rb +389 -264
  164. data/{lib/metasm/os/remote.rb → metasm/os/gdbremote.rb} +146 -54
  165. data/{lib/metasm → metasm}/os/gnu_exports.rb +1 -1
  166. data/{lib/metasm → metasm}/os/linux.rb +628 -151
  167. data/metasm/os/main.rb +330 -0
  168. data/{lib/metasm → metasm}/os/windows.rb +132 -42
  169. data/{lib/metasm → metasm}/os/windows_exports.rb +141 -0
  170. data/{lib/metasm → metasm}/parse.rb +26 -24
  171. data/{lib/metasm → metasm}/parse_c.rb +221 -116
  172. data/{lib/metasm → metasm}/preprocessor.rb +55 -40
  173. data/{lib/metasm → metasm}/render.rb +14 -38
  174. data/misc/hexdump.rb +2 -1
  175. data/misc/lint.rb +58 -0
  176. data/misc/txt2html.rb +9 -7
  177. data/samples/bindiff.rb +3 -4
  178. data/samples/dasm-plugins/bindiff.rb +15 -0
  179. data/samples/dasm-plugins/bookmark.rb +133 -0
  180. data/samples/dasm-plugins/c_constants.rb +57 -0
  181. data/samples/dasm-plugins/colortheme_solarized.rb +125 -0
  182. data/samples/dasm-plugins/cppobj_funcall.rb +60 -0
  183. data/samples/dasm-plugins/dasm_all.rb +70 -0
  184. data/samples/dasm-plugins/demangle_cpp.rb +31 -0
  185. data/samples/dasm-plugins/deobfuscate.rb +251 -0
  186. data/samples/dasm-plugins/dump_text.rb +35 -0
  187. data/samples/dasm-plugins/export_graph_svg.rb +86 -0
  188. data/samples/dasm-plugins/findgadget.rb +75 -0
  189. data/samples/dasm-plugins/hl_opcode.rb +32 -0
  190. data/samples/dasm-plugins/hotfix_gtk_dbg.rb +19 -0
  191. data/samples/dasm-plugins/imm2off.rb +34 -0
  192. data/samples/dasm-plugins/match_libsigs.rb +93 -0
  193. data/samples/dasm-plugins/patch_file.rb +95 -0
  194. data/samples/dasm-plugins/scanfuncstart.rb +36 -0
  195. data/samples/dasm-plugins/scanxrefs.rb +26 -0
  196. data/samples/dasm-plugins/selfmodify.rb +197 -0
  197. data/samples/dasm-plugins/stringsxrefs.rb +28 -0
  198. data/samples/dasmnavig.rb +1 -1
  199. data/samples/dbg-apihook.rb +24 -9
  200. data/samples/dbg-plugins/heapscan.rb +283 -0
  201. data/samples/dbg-plugins/heapscan/compiled_heapscan_lin.c +155 -0
  202. data/samples/dbg-plugins/heapscan/compiled_heapscan_win.c +128 -0
  203. data/samples/dbg-plugins/heapscan/graphheap.rb +616 -0
  204. data/samples/dbg-plugins/heapscan/heapscan.rb +709 -0
  205. data/samples/dbg-plugins/heapscan/winheap.h +174 -0
  206. data/samples/dbg-plugins/heapscan/winheap7.h +307 -0
  207. data/samples/dbg-plugins/trace_func.rb +214 -0
  208. data/samples/disassemble-gui.rb +35 -5
  209. data/samples/disassemble.rb +31 -6
  210. data/samples/dump_upx.rb +24 -12
  211. data/samples/dynamic_ruby.rb +12 -3
  212. data/samples/exeencode.rb +6 -5
  213. data/samples/factorize-headers-peimports.rb +1 -1
  214. data/samples/lindebug.rb +175 -381
  215. data/samples/metasm-shell.rb +1 -2
  216. data/samples/peldr.rb +2 -2
  217. data/tests/all.rb +1 -1
  218. data/tests/arc.rb +26 -0
  219. data/tests/dynldr.rb +22 -4
  220. data/tests/expression.rb +55 -0
  221. data/tests/graph_layout.rb +285 -0
  222. data/tests/ia32.rb +79 -26
  223. data/tests/mips.rb +9 -2
  224. data/tests/x86_64.rb +66 -18
  225. metadata +330 -218
  226. data/lib/metasm/arm/opcodes.rb +0 -177
  227. data/lib/metasm/gui.rb +0 -23
  228. data/lib/metasm/gui/dasm_graph.rb +0 -1354
  229. data/lib/metasm/ia32.rb +0 -14
  230. data/lib/metasm/ia32/opcodes.rb +0 -873
  231. data/lib/metasm/ppc/parse.rb +0 -52
  232. data/lib/metasm/x86_64.rb +0 -12
  233. data/lib/metasm/x86_64/opcodes.rb +0 -118
  234. data/samples/gdbclient.rb +0 -583
  235. data/samples/rubstop.rb +0 -399
@@ -0,0 +1,108 @@
1
+ SerialStruct
2
+ ============
3
+
4
+ This is a helper class to handle binary packed data, especially to
5
+ represent <core/ExeFormat.txt> structures.
6
+
7
+ The implementation is in `metasm/exe_format/serialstruct.rb`.
8
+
9
+ Basics
10
+ ------
11
+
12
+ The class defines some class methods, such as:
13
+
14
+ * `dword`
15
+ * `byte`
16
+ * `strz`
17
+
18
+ These methods can be used directly in subclass definitions, e.g.
19
+
20
+ class MyHeader < SerialStruct
21
+ dword :signature
22
+ dword :length
23
+ end
24
+
25
+ This will associate the sequence of fields to this structure, which
26
+ is used in the `#encode` and `#decode` methods.
27
+ These methods rely on an <core/ExeFormat.txt> instance to define
28
+ the corresponding `decode_dword` and `encode_dword` methods.
29
+
30
+ You can then simply call:
31
+
32
+ hdr = MyHeader.decode(myexefmt)
33
+
34
+ which will call `myexefmt.decode_word` twice to populate the
35
+ `signature` and `length` fields of the MyHeader.instance.
36
+
37
+ You can also redefine the `#decode` method to handle special cases.
38
+
39
+
40
+ The fields defined this way can be assigned a default value that
41
+ will be used when encoding the structure. The syntax is:
42
+
43
+ dword :fieldname, defaultvalue
44
+
45
+ If you have a long sequence of identically-typed fields, you can use
46
+ the plural form:
47
+
48
+ dwords :f1, :f2, :f3, :f4
49
+
50
+ To define your own field types, you should create a new subclass and call the
51
+ `new_field` class method. For integral fields, use `new_int_field(fldname)`
52
+ that will automatically define the decode/encode routines, and create the
53
+ plural form.
54
+
55
+ class MyStruct < SerialStruct
56
+ new_int_field :zword
57
+ zwords :offset, :length
58
+ end
59
+
60
+
61
+ Symbolic constants
62
+ ------------------
63
+
64
+ The class has built-in support for symbolic constants and bit fields.
65
+
66
+ For exemple, suppose you have a numeric `:type` field, which corresponds
67
+ to a set of numeric constants `TYPE_FOO TYPE_BAR TYPE_LOL`. You can use:
68
+
69
+ TYPES = { 2 => 'FOO', 3 => 'BAR', 4 => 'LOL' }
70
+
71
+ dword :type
72
+ fld_enum :type, TYPES
73
+
74
+ With this, the standard '#decode' method will first decode the numeric value
75
+ of the field, and then lookup the value in the enum hash to find the
76
+ corresponding symbol, and use it as the field value.
77
+ If there is no mapping, the numeric value is retained. The reverse operation
78
+ is done with `#encode`.
79
+
80
+ For the bitfields, the method is `fld_bits`, and the software will try to
81
+ match *OR-ed* values from the bitfield to generate an array of symbols.
82
+
83
+ BITS = { 1 => 'B1', 2 => 'B2', 4 => 'B4' }
84
+
85
+ dword :foo
86
+ fld_bits :foo, BITS
87
+
88
+ which will give, for the numeric value `0x15`, `["B1", "B4", 0x10]`
89
+
90
+ The hashes used for fld_bits or fld_enum can be dynamically determined, by
91
+ using the block version of those methods. The block will receive the ExeFormat
92
+ instance and the SerialStruct instance, and should return the Hash.
93
+ This can be useful when a bitfield signification varies given some generic
94
+ property of the exe, eg the target architecture.
95
+
96
+
97
+ Hooks
98
+ -----
99
+
100
+ It is also possible to define a hook that will be called at some point during
101
+ the object binary decoding. It will receive the exe and struct instances.
102
+
103
+ class Header < SerialStruct
104
+ dword :machine
105
+ decode_hook { |exe, hdr| raise "unknown machine" if hdr.machine > 4 }
106
+ dword :bodylength
107
+ end
108
+
@@ -0,0 +1,145 @@
1
+ VirtualString
2
+ =============
3
+
4
+ This class is an abstract representation of an arbitrary sized byte array
5
+ with methods to load parts of it on demand. It is useful to represent
6
+ a program virtual memory and allow metasm to work on it while only reading
7
+ bytes from it when actually needed.
8
+
9
+ The base class is defined in `metasm/os/main.rb`.
10
+
11
+
12
+ Basics
13
+ ------
14
+
15
+ The API of the object is designed to be compatible with a standard String (ASCII-8BIT).
16
+ The main restriction is that the size of this string cannot be changed:
17
+ concatenation / shortening is not supported.
18
+
19
+ The main operation on the object should be `[]` and `[]=`, that is,
20
+ reading some subpart of the string, or overwriting some substring.
21
+ The arguments are the same as for a String, with the exception that
22
+ rewrite raises an IndexError if the rewriting would change the string
23
+ length.
24
+
25
+ A few methods are written specifically with the VirtualString semantics,
26
+ others are redirected to a temporary real String generated with `realstring`.
27
+
28
+ The VirtualString works with a `page` concept, that represents some arbitrary
29
+ chunks of data that can be actually read from the underlying target, e.g. a
30
+ memory page (4096 bytes) when mapping a process virtual address space.
31
+ Instances get to define a `pagelength` sound for the specific implementation.
32
+
33
+ Whenever a substring is requested from a VirtualString, if the substring
34
+ length is less than the page size, an actual read is made and a String is
35
+ returned.
36
+
37
+ If the length is greater however, a new VirtualString is created to map this
38
+ new *view* without actually reading.
39
+
40
+ To force the conversion to a String, use the `realstring` or `to_str` method.
41
+ The latter is prefered, as it works on both Strings and VirtualStrings.
42
+
43
+ To force the creation of a new VirtualString, use the `dup(start, len)` method.
44
+
45
+ When reading actual bytes, a local page cache is used. By default is has only 4
46
+ pages, and can be invalidated using `invalidate`.
47
+ The cache is automatically invalidated when part of the string is written to.
48
+
49
+ The VirtualString may index *invalid* pages (e.g. unmapped memory range in a
50
+ process address space) ; you can check that with `page_invalid?` with an index
51
+ as parameter.
52
+
53
+
54
+ Creation
55
+ --------
56
+
57
+ To create your own flavor of VirtualString, you must:
58
+
59
+ * define your subclass that inherits from `VirtualString`
60
+ * define your initializer, that takes whatever arguments make sense (e.g. a
61
+ *pid*, *handle*, Socket..)
62
+ * your initializer must call super(a, l) with arguments:
63
+ ** current view absolute address (should default to 0), will be saved in
64
+ `@addr_start`
65
+ ** current view size (should default to something sensible, like 1<<32), saved
66
+ in `@length`
67
+ * your initializer can override the default page size by defining the
68
+ `@pagelength` variable.
69
+ * implement a `dup` method that takes optional arguments:
70
+ ** new base address (default=`@addr_start`)
71
+ ** new length (default=`@length`)
72
+ ** returns a new instance of your class mapping over the specified window
73
+ * implement a `get_page` method, whose arguments are:
74
+ ** absolute page address (will always be page-aligned)
75
+ ** optional length, default=`@pagelength`
76
+ ** returns a String of `length` bytes, or `nil` (e.g. unmapped area)
77
+ * optionally implement a `rewrite_at` method, to make your string writeable.
78
+ Arguments are the absolute write address, and the data to write there (a String).
79
+
80
+ Feel free to override any other method with an optimized version.
81
+ For exemple, the default `realstring` will repeatadly call `get_page` with
82
+ each page in the range 0..`length`, you may have a more efficient alternative.
83
+
84
+ You can alter the cache size by rewriting the `@pagecache_len` variable
85
+ **after** calling `super()` in `initialize`. The default value is 4, which you
86
+ may want to increase.
87
+
88
+ See the `WindowsRemoteString` source for a simple exemple (ignore the `open_pid`
89
+ method).
90
+
91
+ Standard subclasses
92
+ -------------------
93
+
94
+ VirtualFile
95
+ ###########
96
+
97
+ Defined in `metasm/os/main.rb`.
98
+
99
+ This class maps over an open file descriptor, and allows reading data on-demand.
100
+ It implements the `read` class method, similar to `File.read`, with the
101
+ file opened in binary mode. For a small file (<=4096), the content is
102
+ directly returned, otherwise a VirtualString is created.
103
+
104
+ This class is used by the default <core/ExeFormat.txt> `decode_file[_header]`
105
+ methods.
106
+
107
+
108
+ LinuxRemoteString
109
+ #################
110
+
111
+ Defined in `metasm/os/linux.rb`.
112
+
113
+ This class maps over the virtual memory of a Linux process.
114
+ Accesses are done through the `/proc/<pid>/mem` for reading.
115
+ The linux kernel requires that the target process be ptraced before we can
116
+ read this file, so the object will use the debugger instance passed to the
117
+ constructor, or create a new <core/PTrace.txt> object to stop the process
118
+ and read its memory during `get_page`.
119
+
120
+ If a <core/Debugger.txt> object was given, `get_page` will return `nil` if the
121
+ debugger indicates that the target is not stopped.
122
+
123
+ Writing is done through `PTrace#writemem` using `PTRACE_POKEDATA`.
124
+
125
+
126
+ WindowsRemoteString
127
+ ###################
128
+
129
+ Defined in `metasm/os/windows.rb`.
130
+
131
+ This class maps over the virtual memory of a Windows process.
132
+
133
+ The memory accesses are done using the `Read/WriteProcessMemory` API.
134
+
135
+ The class method `open_pid` is defined, that will try to `OpenProcess`
136
+ first in read/write, and fallback to read-only mode.
137
+
138
+
139
+ GdbRemoteString
140
+ ###############
141
+
142
+ Defined in `metasm/os/gdbremote.rb`.
143
+
144
+ Maps over the virtual memory of a remote process debugged with a
145
+ <core/GdbClient.txt> instance, using `setmem` and `getmem`.
@@ -0,0 +1,61 @@
1
+ WindowsExports
2
+ ==============
3
+
4
+ This class is defined in `metasm/os/windows_exports.rb`
5
+
6
+ It defines an `EXPORT` constant, a Hash, whose keys
7
+ are the standard win32 API symbol names, and values
8
+ are the library name where you can find this symbol.
9
+
10
+ The equivalent for GNU/Linux is <core/GNUExports.txt>
11
+
12
+ Usage
13
+ -----
14
+
15
+ The main usage of this class is the automatic generation
16
+ of the <core/PE.txt> import directories from the
17
+ external symbols referenced by a binary during compilation.
18
+
19
+ This is done in the `automagic_symbols` method.
20
+
21
+ Symbols
22
+ -------
23
+
24
+ The current version holds the symbols available in the
25
+ Windows XP SP2 32-bit standard libraries:
26
+
27
+ * `ntdll`
28
+ * `kernel32`
29
+ * `user32`
30
+ * `gdi32`
31
+ * `advapi32`
32
+ * `ws2_32`
33
+ * `msvcrt`
34
+ * `comdlg32`
35
+ * `psapi`
36
+
37
+
38
+ Ruby symbols are also defined, from `msvcrt-ruby18`.
39
+
40
+
41
+ Ruby library name
42
+ -----------------
43
+
44
+ On creation, the current ruby library name is inferred
45
+ from the `RUBY_PLATFORM` constant, in an effort to
46
+ try to use the available ruby library filename.
47
+
48
+ The only transformation supported now is to rewrite
49
+ the ruby version number appearing in the filename for
50
+ msvcrt-compiled binaries, so that you get the correct
51
+ `msvcrt-ruby192` name for exemple under ruby1.9.
52
+
53
+ This is implemented in the `patch_rubylib_to_current_interpreter`
54
+ method (which is aptly named).
55
+
56
+ Warning
57
+ #######
58
+
59
+ Note that binaries compiled this way will not work on
60
+ other machines where the exact same library is unavailable.
61
+
@@ -0,0 +1 @@
1
+ See <core_classes.txt>
@@ -1,3 +1,6 @@
1
- span.quote {
2
- font-family: monospace;
3
- }
1
+ body { background-color: #002b36; color: #839496; }
2
+ a { text-decoration: none; color:#268bd2; }
3
+ a:hover, a:visited, a:active { color:#2aa198; }
4
+ a:hover { text-decoration: underline; }
5
+ a.brokenlink { color: #dc322f; }
6
+ span.quote { font-family: monospace; }
@@ -0,0 +1,327 @@
1
+ The Debugger
2
+ ============
3
+
4
+ Metasm includes functionnalities to communicate with some operating system
5
+ debugging infrastructures.
6
+
7
+ Currently supported:
8
+
9
+ * Windows (x86, x64)
10
+ * Linux (x86, x64)
11
+
12
+ Generic interface
13
+ -----------------
14
+
15
+ Metasm exposes a generic API that will work on all supported platforms.
16
+
17
+ This interface is implemented using system-specific classes, that you may
18
+ directly access for tighter control.
19
+
20
+ Global operating system interface is available through the <core/OS.txt> class.
21
+ It has methods to enumerate live processes, spawn new processes, and provide
22
+ process/thread access.
23
+
24
+ Individual process debugging is wrapped in the <core/Debugger.txt> class.
25
+
26
+ Windows debugging
27
+ -----------------
28
+
29
+ The windows debugger relies on <core/DynLdr.txt> to interface directly with
30
+ the *Win32_API*.
31
+
32
+ Support exists for 32-bit process, and, using a 64-bit ruby interpreter, for
33
+ 64-bit process and WoW64-process debugging.
34
+
35
+ The operating system wrapper is <core/WinOS.txt>, the debugger is
36
+ <core/WinDebugger.txt>.
37
+
38
+ Linux debugging
39
+ ---------------
40
+
41
+ The linux debugger relies on the */proc* filesystem for process and thread
42
+ enumeration, process memory access, etc ; and on the *ptrace* syscall
43
+ (through `Kernel.syscall()`) for actual debugging.
44
+
45
+ You'll need a 64-bit ruby interpreter to debug 64-bit target processes.
46
+
47
+ The operating system wrapper is <core/LinOS.txt>, the debugger is
48
+ <core/LinDebugger.txt>.
49
+
50
+ Due to linux limitations, the memory of a process is accessible for read or
51
+ write only if one of its thread is stopped by the debugger.
52
+
53
+ Remote debugging
54
+ ----------------
55
+
56
+ Metasm also implements a client for the *GdbServer* protocol.
57
+ See <core/GdbRemoteDebugger.txt>.
58
+
59
+
60
+ The debugging interface
61
+ =======================
62
+
63
+ The `Debugger` object is a generic interface to the low-level operating
64
+ system interface. It manages all the generic machinery to handle multi-process
65
+ and multi-thread debugging, conditional breakpoints, symbols, etc.
66
+
67
+ The debugger is asynchronous: you can issue a command to `run` the target
68
+ process, do whatever you want in your script, and check from time to time
69
+ if some debugging event happened, and then handle it.
70
+
71
+ The debugger object maintains some attributes for the target process.
72
+ The most important are:
73
+ * a <core/Disassembler.txt>
74
+ * an accessor to the process memory, as a <core/VirtualString.txt>
75
+
76
+ The process memory and register set is cached for faster access, use
77
+ the `invalidate` method to force a refresh. The debugger will automatically
78
+ invalidate on any debug event.
79
+
80
+ Multi-process / multi-thread
81
+ ----------------------------
82
+
83
+ The `Debugger` offers accessor for the state of the current active thread.
84
+
85
+ You can change the current active thread or process by using the `pid` and
86
+ `tid` accessors.
87
+
88
+ When handling a debugging event, the debugger will accept any event in
89
+ any of the debuggee, and return in the context of this thread.
90
+
91
+ To enumerate the processes or threads, use one of the following functions,
92
+ that will execute the block after setting pid/tid to all available value:
93
+ * `each_pid` (all pids)
94
+ * `each_tid` (all tids of current pid)
95
+ * `each_pid_tid` (all tids of all pids)
96
+
97
+ By default, the debugger will not attach to child process spawned by a
98
+ debuggee. To do so, set the `trace_children` variable before you attach.
99
+ On Windows, this variable only has effect when set before a `create_process`.
100
+
101
+ Target manipulation
102
+ -------------------
103
+
104
+ You can check the state of the debuggee through the `state` accessor.
105
+ It can have one of the 3 values:
106
+ * `:stopped`, when the thread has stopped due to a debug event (breakpoint hit,
107
+ exception raised, ...)
108
+ * `:running`, when the thread is runnig
109
+ * `:dead`, when all supervised process have ended
110
+
111
+ To update the state of a `:running` process, call the `check_target` (non
112
+ blocking) or `wait_target`.
113
+
114
+ When `:stopped`, the `info` attribute can give more specific informations.
115
+ It consists of an arbitrary String.
116
+
117
+ Most of the other accessors require the target to be in the `:stopped` state.
118
+
119
+ To manipulate the value of a register, use `get_reg_value(:eax)` or
120
+ `set_reg_value(:eax,0x42)`.
121
+
122
+ To manipulate the memory, use `memory[address,length]`. You can also
123
+ use `memory_read_int(address)` to read/write integers with the target
124
+ endianness.
125
+
126
+ A shortcut method is available, through the `[]` method. When used with one
127
+ argument, it is interpreted as a register name to be retrieved, with two
128
+ arguments it is a memory range.
129
+
130
+ dbg[:eax] # read the 'eax' register
131
+ dbg[0x1234, 10] = 'hohohohoho' # patch the christmas spirit in memory
132
+
133
+ To optimize reading large sections of the process memory that you know to be
134
+ in a single memory mapping, use the `read_mapped_range(addr, len)` method ;
135
+ it will try to use a single OS-specific call instead of reading the range one
136
+ 4096-byte page at a time. This method returns a String directly.
137
+
138
+ You can manipulate complex expressions using the `resolve(expr)` method.
139
+ It accepts a String representation of an arbitrary expression. Any register,
140
+ symbol (function name) and/or memory dereference can be used inside.
141
+ You can puts an `:` before a register name to force it to be parsed as a
142
+ register and not a symbol ; this can be useful for non-standard registers.
143
+
144
+ The memory functions accept such expressions in place of addresses most of the
145
+ time. For exemple:
146
+
147
+ dbg["some_pointer + eax + 4*[ecx]", 3] = 'foo'
148
+
149
+ Running the target
150
+ ------------------
151
+
152
+ When the debuggee is `:stopped`, you can resume execution using these methods:
153
+ * `continue`: resume execution until the next exception/breakpoint hit (alias: `run`)
154
+ * `singlestep`: executes one CPU instruction and break in the debugger
155
+ * `stepover`: same as singlestep, except if the instruction pointer is on a
156
+ subfunction call, then break only after the function returns.
157
+
158
+ These methods will set the target to `:running` and return immediately.
159
+ To wait for the end of the `singlestep`, you can use `wait_target`, it
160
+ will block until a debug event happens. Usually that means that the instruction
161
+ has been executed, but that could also mean that another thread/process under
162
+ supervision ran into a breakpoint, or that the instruction raised an exception.
163
+
164
+ If you have an active loop to run in your script, you can also call
165
+ `check_target` periodically, and check the value of `dbg.state` to detect
166
+ a debug event.
167
+
168
+ For convenience, you can call `continue_wait` that will call `continue` and
169
+ `wait_target`, `singlestep_wait`, etc.
170
+
171
+ When calling `singlestep`, you can pass a ruby block that will run when the
172
+ singlestep succeeds, even if many other debug events happen inbetween.
173
+
174
+ Breakpoints
175
+ -----------
176
+
177
+ Depending on the target architecture, you can have access up to three types of
178
+ breakpoints: software, hardware, and memory.
179
+
180
+ A hardware breakpoint uses features of the cpu to gain control at a given time.
181
+ On x86/x64, they have these characteristics:
182
+ * you can have at most 4 hardware breakpoints active at one time
183
+ * a hardware breakpoint is specific to a thread, as they use special registers
184
+ * they can be set up to break on execution of a specific address, or
185
+ on read or writes of 1, 2 or 4 bytes in memory at a specific address
186
+
187
+ A software breakpoint consists in replacing an instruction in the memory space
188
+ of the target with a specific pattern that will raise an exception when run.
189
+ The advantages over hardware breakpoints are that you can have as many as you
190
+ wish at the same time. The disadvantage is that it changes the target address
191
+ space, which may be a problem ; also it means that the breakpoint is active
192
+ for all threads of a given process.
193
+
194
+ Finally a memory breakpoint uses the virtual memory mechanism to take control
195
+ on reads or writes on arbitrary memory ranges.
196
+
197
+ Breakpoints
198
+ ***********
199
+
200
+ All breakpoints can be conditional. This means that whenever a breakpoint
201
+ hits, the debugger will evaluate an expression to determine if it should
202
+ ignore the breakpoint or handle it and give control to your script.
203
+
204
+ The expression can be any arithmetic expression, and should evaluate to 0
205
+ (ignore the breakpoint) or non-0 (break and give control to the script).
206
+
207
+ The arithmetic expression can refer to any register, memory dereference,
208
+ or symbol ; and additionally the special registers `:tid` and `:pid` can
209
+ be used to check the current debuggee context (eg for a thread-specific
210
+ breakpoint).
211
+
212
+ All breakpoints can have a callback. This is a ruby block that will be run
213
+ whenever the breakpoint hits (and has a valid condition if applicable).
214
+ Your callback can do anything, including resuming the execution of the target.
215
+
216
+ Finally, all breakpoints can be singleshot. This means that whenever the
217
+ breakpoint hits, it is deleted (it will hit only once).
218
+
219
+ Hardware breakpoint (hwbp)
220
+ **************************
221
+
222
+ A hardware breakpoint is set using:
223
+
224
+ hwbp(addr, mtype=:x, mlen=1, oneshot=false, cond=nil, &callback)
225
+
226
+ * addr is the address of the breakpoint. Can be an expression (resolved now)
227
+ * mtype specifies the type of hw breakpoint: `:r`, `:w`, `:x`
228
+ * mlen specifies the size of the area. `:r/:w` mtype only, and must be in [1, 2, 4, 8]
229
+ * oneshot is a boolean, set to true for a singleshot breakpoint
230
+ * cond is the conditional expression, String or Expression (resolved at evaluation time)
231
+ * callback is a ruby block to run when the breakpoint hits
232
+
233
+ Exemple:
234
+
235
+ # run the block whenever any of the 4 bytes pointed by eax+12 are read:
236
+ dbg.hwbp('eax+12', :r, 4) { puts "dont read me bro!" }
237
+
238
+
239
+ Software breakpoint (bpx)
240
+ *************************
241
+
242
+ To set a software breakpoint, use:
243
+
244
+ bpx(addr, oneshot=false, cond=nil, &callback)
245
+
246
+ Arguments are the same as `hwbp`.
247
+
248
+ A software breakpoint involves the modification of the target address space,
249
+ which impacts all threads of the process.
250
+ On a hit, only the affected thread is stopped.
251
+
252
+ When resuming the thread, it should see the original instruction that was
253
+ overwritten. If we restore temporarily the original code, there exist a race
254
+ condition, where another thread could use this window to run through the
255
+ code without hitting the breakpoint. To avoid that, metasm will try, when
256
+ possible, to emulate the effects of the original instruction on the active
257
+ thread. This works only when metasm knows the full effects of the replaced
258
+ instruction. If this is not the case, the old method to revert the original
259
+ code, run the target thread in singlestep, and re-insert the breakpoint is
260
+ used.
261
+
262
+ Memory breakpoint (bpm)
263
+ ***********************
264
+
265
+ To define a memory breakpoint, use:
266
+
267
+ bpm(addr, mtype=:r, mlen=4096, oneshot=false, cond=nil, &callback)
268
+
269
+ * mtype is `:r` or `:w`
270
+
271
+ A memory breakpoint is split in pages and managed internally by the framework.
272
+ If the range does not fit on page boundary, an implicit condition is added to
273
+ check that the exception actually happens inside the watched range, and ignores
274
+ the breakpoint otherwise.
275
+
276
+ Currently, memory breakpoints are not implemented.
277
+ Maybe someday on windows, using guard pages.
278
+
279
+ Breakpoint management
280
+ *********************
281
+
282
+ When setting up a breakpoint, a `Breakpoint` object is returned.
283
+ It shall be used to remove the breakpoint, using `del_bp`.
284
+
285
+ To enumerate breakpoints, use `all_breakpoints(addr=nil)`.
286
+ This will return an Array of the breakpoints defined for the current thread, ie
287
+ the current thread hwbp, the current process bpx, and the current process bpm
288
+ list.
289
+
290
+ Callbacks
291
+ ---------
292
+
293
+ It is possible to define a callback to be run when a specific debug event
294
+ occurs. They are a ruby Proc that will be called when the event occurs, in
295
+ the context of the right thread, and will receive a Hash of information on the
296
+ event specifics. The Hash keys depend on the callback.
297
+
298
+ The list is:
299
+ * `callback_singlestep`
300
+ * `callback_bpx`
301
+ * `callback_hwbp`
302
+ * `callback_bpm`
303
+ * `callback_exception` - any other exception (memory corruption, division by 0,
304
+ breakpoint exception not matching any bpx, ...)
305
+ * `callback_newthread`
306
+ * `callback_endthread`
307
+ * `callback_newprocess`
308
+ * `callback_endprocess`
309
+
310
+ Linux-specific:
311
+ * `callback_syscall` - stopped after a `ptrace_syscall`
312
+ * `callback_exec` - target ran the `exec` syscall
313
+ * `callback_branch` - branch trace mode, experimental kernel feature, needs
314
+ recent CPU
315
+
316
+ Windows-specific:
317
+ * `callback_loadlibrary`
318
+ * `callback_unloadlibrary`
319
+ * `callback_debugstring`
320
+ * `callback_ripevent`
321
+
322
+ A few variables are available to change the default mode of stopping on any
323
+ debug event:
324
+ * `pass_all_exceptions` - do not stop on unknown exceptions, forward them to
325
+ the debuggee
326
+ * `ignore_newthread` - do not stop on new thread creation
327
+ * `ignore_endthread` - do not stop on thread deletion