metasm 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (235) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +1 -0
  3. data/.hgtags +3 -0
  4. data/Gemfile +1 -0
  5. data/INSTALL +61 -0
  6. data/LICENCE +458 -0
  7. data/README +29 -21
  8. data/Rakefile +10 -0
  9. data/TODO +10 -12
  10. data/doc/code_organisation.txt +2 -0
  11. data/doc/core/DynLdr.txt +247 -0
  12. data/doc/core/ExeFormat.txt +43 -0
  13. data/doc/core/Expression.txt +220 -0
  14. data/doc/core/GNUExports.txt +27 -0
  15. data/doc/core/Ia32.txt +236 -0
  16. data/doc/core/SerialStruct.txt +108 -0
  17. data/doc/core/VirtualString.txt +145 -0
  18. data/doc/core/WindowsExports.txt +61 -0
  19. data/doc/core/index.txt +1 -0
  20. data/doc/style.css +6 -3
  21. data/doc/usage/debugger.txt +327 -0
  22. data/doc/usage/index.txt +1 -0
  23. data/doc/use_cases.txt +2 -2
  24. data/metasm.gemspec +22 -0
  25. data/{lib/metasm.rb → metasm.rb} +11 -3
  26. data/{lib/metasm → metasm}/compile_c.rb +13 -7
  27. data/metasm/cpu/arc.rb +8 -0
  28. data/metasm/cpu/arc/decode.rb +425 -0
  29. data/metasm/cpu/arc/main.rb +191 -0
  30. data/metasm/cpu/arc/opcodes.rb +588 -0
  31. data/{lib/metasm → metasm/cpu}/arm.rb +7 -5
  32. data/{lib/metasm → metasm/cpu}/arm/debug.rb +2 -2
  33. data/{lib/metasm → metasm/cpu}/arm/decode.rb +13 -12
  34. data/{lib/metasm → metasm/cpu}/arm/encode.rb +23 -8
  35. data/{lib/metasm → metasm/cpu}/arm/main.rb +0 -3
  36. data/metasm/cpu/arm/opcodes.rb +324 -0
  37. data/{lib/metasm → metasm/cpu}/arm/parse.rb +25 -13
  38. data/{lib/metasm → metasm/cpu}/arm/render.rb +2 -2
  39. data/metasm/cpu/arm64.rb +15 -0
  40. data/metasm/cpu/arm64/debug.rb +38 -0
  41. data/metasm/cpu/arm64/decode.rb +289 -0
  42. data/metasm/cpu/arm64/encode.rb +41 -0
  43. data/metasm/cpu/arm64/main.rb +105 -0
  44. data/metasm/cpu/arm64/opcodes.rb +232 -0
  45. data/metasm/cpu/arm64/parse.rb +20 -0
  46. data/metasm/cpu/arm64/render.rb +95 -0
  47. data/{lib/metasm/ppc.rb → metasm/cpu/bpf.rb} +2 -4
  48. data/metasm/cpu/bpf/decode.rb +142 -0
  49. data/metasm/cpu/bpf/main.rb +60 -0
  50. data/metasm/cpu/bpf/opcodes.rb +81 -0
  51. data/metasm/cpu/bpf/render.rb +41 -0
  52. data/metasm/cpu/cy16.rb +9 -0
  53. data/metasm/cpu/cy16/decode.rb +253 -0
  54. data/metasm/cpu/cy16/main.rb +63 -0
  55. data/metasm/cpu/cy16/opcodes.rb +78 -0
  56. data/metasm/cpu/cy16/render.rb +41 -0
  57. data/metasm/cpu/dalvik.rb +11 -0
  58. data/{lib/metasm → metasm/cpu}/dalvik/decode.rb +35 -13
  59. data/{lib/metasm → metasm/cpu}/dalvik/main.rb +51 -2
  60. data/{lib/metasm → metasm/cpu}/dalvik/opcodes.rb +19 -11
  61. data/metasm/cpu/ia32.rb +17 -0
  62. data/{lib/metasm → metasm/cpu}/ia32/compile_c.rb +5 -7
  63. data/{lib/metasm → metasm/cpu}/ia32/debug.rb +5 -5
  64. data/{lib/metasm → metasm/cpu}/ia32/decode.rb +246 -59
  65. data/{lib/metasm → metasm/cpu}/ia32/decompile.rb +7 -7
  66. data/{lib/metasm → metasm/cpu}/ia32/encode.rb +19 -13
  67. data/{lib/metasm → metasm/cpu}/ia32/main.rb +51 -8
  68. data/metasm/cpu/ia32/opcodes.rb +1424 -0
  69. data/{lib/metasm → metasm/cpu}/ia32/parse.rb +47 -16
  70. data/{lib/metasm → metasm/cpu}/ia32/render.rb +31 -4
  71. data/metasm/cpu/mips.rb +14 -0
  72. data/{lib/metasm → metasm/cpu}/mips/compile_c.rb +1 -1
  73. data/metasm/cpu/mips/debug.rb +42 -0
  74. data/{lib/metasm → metasm/cpu}/mips/decode.rb +46 -16
  75. data/{lib/metasm → metasm/cpu}/mips/encode.rb +4 -3
  76. data/{lib/metasm → metasm/cpu}/mips/main.rb +11 -4
  77. data/{lib/metasm → metasm/cpu}/mips/opcodes.rb +86 -17
  78. data/{lib/metasm → metasm/cpu}/mips/parse.rb +1 -1
  79. data/{lib/metasm → metasm/cpu}/mips/render.rb +1 -1
  80. data/{lib/metasm/dalvik.rb → metasm/cpu/msp430.rb} +1 -1
  81. data/metasm/cpu/msp430/decode.rb +247 -0
  82. data/metasm/cpu/msp430/main.rb +62 -0
  83. data/metasm/cpu/msp430/opcodes.rb +101 -0
  84. data/{lib/metasm → metasm/cpu}/pic16c/decode.rb +6 -7
  85. data/{lib/metasm → metasm/cpu}/pic16c/main.rb +0 -0
  86. data/{lib/metasm → metasm/cpu}/pic16c/opcodes.rb +1 -1
  87. data/{lib/metasm/mips.rb → metasm/cpu/ppc.rb} +4 -4
  88. data/{lib/metasm → metasm/cpu}/ppc/decode.rb +18 -12
  89. data/{lib/metasm → metasm/cpu}/ppc/decompile.rb +3 -3
  90. data/{lib/metasm → metasm/cpu}/ppc/encode.rb +2 -2
  91. data/{lib/metasm → metasm/cpu}/ppc/main.rb +17 -12
  92. data/{lib/metasm → metasm/cpu}/ppc/opcodes.rb +11 -5
  93. data/metasm/cpu/ppc/parse.rb +55 -0
  94. data/metasm/cpu/python.rb +8 -0
  95. data/metasm/cpu/python/decode.rb +136 -0
  96. data/metasm/cpu/python/main.rb +36 -0
  97. data/metasm/cpu/python/opcodes.rb +180 -0
  98. data/{lib/metasm → metasm/cpu}/sh4.rb +1 -1
  99. data/{lib/metasm → metasm/cpu}/sh4/decode.rb +48 -17
  100. data/{lib/metasm → metasm/cpu}/sh4/main.rb +13 -4
  101. data/{lib/metasm → metasm/cpu}/sh4/opcodes.rb +7 -8
  102. data/metasm/cpu/x86_64.rb +15 -0
  103. data/{lib/metasm → metasm/cpu}/x86_64/compile_c.rb +28 -17
  104. data/{lib/metasm → metasm/cpu}/x86_64/debug.rb +4 -4
  105. data/{lib/metasm → metasm/cpu}/x86_64/decode.rb +57 -15
  106. data/{lib/metasm → metasm/cpu}/x86_64/encode.rb +55 -26
  107. data/{lib/metasm → metasm/cpu}/x86_64/main.rb +14 -6
  108. data/metasm/cpu/x86_64/opcodes.rb +136 -0
  109. data/{lib/metasm → metasm/cpu}/x86_64/parse.rb +10 -2
  110. data/metasm/cpu/x86_64/render.rb +35 -0
  111. data/metasm/cpu/z80.rb +9 -0
  112. data/metasm/cpu/z80/decode.rb +313 -0
  113. data/metasm/cpu/z80/main.rb +67 -0
  114. data/metasm/cpu/z80/opcodes.rb +224 -0
  115. data/metasm/cpu/z80/render.rb +59 -0
  116. data/{lib/metasm/os/main.rb → metasm/debug.rb} +160 -401
  117. data/{lib/metasm → metasm}/decode.rb +35 -4
  118. data/{lib/metasm → metasm}/decompile.rb +15 -16
  119. data/{lib/metasm → metasm}/disassemble.rb +201 -45
  120. data/{lib/metasm → metasm}/disassemble_api.rb +651 -87
  121. data/{lib/metasm → metasm}/dynldr.rb +220 -133
  122. data/{lib/metasm → metasm}/encode.rb +10 -1
  123. data/{lib/metasm → metasm}/exe_format/a_out.rb +9 -6
  124. data/{lib/metasm → metasm}/exe_format/autoexe.rb +1 -0
  125. data/{lib/metasm → metasm}/exe_format/bflt.rb +57 -27
  126. data/{lib/metasm → metasm}/exe_format/coff.rb +11 -3
  127. data/{lib/metasm → metasm}/exe_format/coff_decode.rb +53 -20
  128. data/{lib/metasm → metasm}/exe_format/coff_encode.rb +11 -13
  129. data/{lib/metasm → metasm}/exe_format/dex.rb +13 -5
  130. data/{lib/metasm → metasm}/exe_format/dol.rb +1 -0
  131. data/{lib/metasm → metasm}/exe_format/elf.rb +93 -57
  132. data/{lib/metasm → metasm}/exe_format/elf_decode.rb +143 -34
  133. data/{lib/metasm → metasm}/exe_format/elf_encode.rb +122 -31
  134. data/metasm/exe_format/gb.rb +65 -0
  135. data/metasm/exe_format/javaclass.rb +424 -0
  136. data/{lib/metasm → metasm}/exe_format/macho.rb +204 -16
  137. data/{lib/metasm → metasm}/exe_format/main.rb +26 -3
  138. data/{lib/metasm → metasm}/exe_format/mz.rb +1 -0
  139. data/{lib/metasm → metasm}/exe_format/nds.rb +7 -4
  140. data/{lib/metasm → metasm}/exe_format/pe.rb +71 -8
  141. data/metasm/exe_format/pyc.rb +167 -0
  142. data/{lib/metasm → metasm}/exe_format/serialstruct.rb +67 -14
  143. data/{lib/metasm → metasm}/exe_format/shellcode.rb +7 -3
  144. data/metasm/exe_format/shellcode_rwx.rb +114 -0
  145. data/metasm/exe_format/swf.rb +205 -0
  146. data/{lib/metasm → metasm}/exe_format/xcoff.rb +7 -7
  147. data/metasm/exe_format/zip.rb +335 -0
  148. data/metasm/gui.rb +13 -0
  149. data/{lib/metasm → metasm}/gui/cstruct.rb +35 -41
  150. data/{lib/metasm → metasm}/gui/dasm_coverage.rb +11 -11
  151. data/{lib/metasm → metasm}/gui/dasm_decomp.rb +7 -20
  152. data/{lib/metasm → metasm}/gui/dasm_funcgraph.rb +0 -0
  153. data/metasm/gui/dasm_graph.rb +1695 -0
  154. data/{lib/metasm → metasm}/gui/dasm_hex.rb +12 -8
  155. data/{lib/metasm → metasm}/gui/dasm_listing.rb +43 -28
  156. data/{lib/metasm → metasm}/gui/dasm_main.rb +310 -53
  157. data/{lib/metasm → metasm}/gui/dasm_opcodes.rb +5 -19
  158. data/{lib/metasm → metasm}/gui/debug.rb +93 -27
  159. data/{lib/metasm → metasm}/gui/gtk.rb +162 -40
  160. data/{lib/metasm → metasm}/gui/qt.rb +12 -2
  161. data/{lib/metasm → metasm}/gui/win32.rb +179 -42
  162. data/{lib/metasm → metasm}/gui/x11.rb +59 -59
  163. data/{lib/metasm → metasm}/main.rb +389 -264
  164. data/{lib/metasm/os/remote.rb → metasm/os/gdbremote.rb} +146 -54
  165. data/{lib/metasm → metasm}/os/gnu_exports.rb +1 -1
  166. data/{lib/metasm → metasm}/os/linux.rb +628 -151
  167. data/metasm/os/main.rb +330 -0
  168. data/{lib/metasm → metasm}/os/windows.rb +132 -42
  169. data/{lib/metasm → metasm}/os/windows_exports.rb +141 -0
  170. data/{lib/metasm → metasm}/parse.rb +26 -24
  171. data/{lib/metasm → metasm}/parse_c.rb +221 -116
  172. data/{lib/metasm → metasm}/preprocessor.rb +55 -40
  173. data/{lib/metasm → metasm}/render.rb +14 -38
  174. data/misc/hexdump.rb +2 -1
  175. data/misc/lint.rb +58 -0
  176. data/misc/txt2html.rb +9 -7
  177. data/samples/bindiff.rb +3 -4
  178. data/samples/dasm-plugins/bindiff.rb +15 -0
  179. data/samples/dasm-plugins/bookmark.rb +133 -0
  180. data/samples/dasm-plugins/c_constants.rb +57 -0
  181. data/samples/dasm-plugins/colortheme_solarized.rb +125 -0
  182. data/samples/dasm-plugins/cppobj_funcall.rb +60 -0
  183. data/samples/dasm-plugins/dasm_all.rb +70 -0
  184. data/samples/dasm-plugins/demangle_cpp.rb +31 -0
  185. data/samples/dasm-plugins/deobfuscate.rb +251 -0
  186. data/samples/dasm-plugins/dump_text.rb +35 -0
  187. data/samples/dasm-plugins/export_graph_svg.rb +86 -0
  188. data/samples/dasm-plugins/findgadget.rb +75 -0
  189. data/samples/dasm-plugins/hl_opcode.rb +32 -0
  190. data/samples/dasm-plugins/hotfix_gtk_dbg.rb +19 -0
  191. data/samples/dasm-plugins/imm2off.rb +34 -0
  192. data/samples/dasm-plugins/match_libsigs.rb +93 -0
  193. data/samples/dasm-plugins/patch_file.rb +95 -0
  194. data/samples/dasm-plugins/scanfuncstart.rb +36 -0
  195. data/samples/dasm-plugins/scanxrefs.rb +26 -0
  196. data/samples/dasm-plugins/selfmodify.rb +197 -0
  197. data/samples/dasm-plugins/stringsxrefs.rb +28 -0
  198. data/samples/dasmnavig.rb +1 -1
  199. data/samples/dbg-apihook.rb +24 -9
  200. data/samples/dbg-plugins/heapscan.rb +283 -0
  201. data/samples/dbg-plugins/heapscan/compiled_heapscan_lin.c +155 -0
  202. data/samples/dbg-plugins/heapscan/compiled_heapscan_win.c +128 -0
  203. data/samples/dbg-plugins/heapscan/graphheap.rb +616 -0
  204. data/samples/dbg-plugins/heapscan/heapscan.rb +709 -0
  205. data/samples/dbg-plugins/heapscan/winheap.h +174 -0
  206. data/samples/dbg-plugins/heapscan/winheap7.h +307 -0
  207. data/samples/dbg-plugins/trace_func.rb +214 -0
  208. data/samples/disassemble-gui.rb +35 -5
  209. data/samples/disassemble.rb +31 -6
  210. data/samples/dump_upx.rb +24 -12
  211. data/samples/dynamic_ruby.rb +12 -3
  212. data/samples/exeencode.rb +6 -5
  213. data/samples/factorize-headers-peimports.rb +1 -1
  214. data/samples/lindebug.rb +175 -381
  215. data/samples/metasm-shell.rb +1 -2
  216. data/samples/peldr.rb +2 -2
  217. data/tests/all.rb +1 -1
  218. data/tests/arc.rb +26 -0
  219. data/tests/dynldr.rb +22 -4
  220. data/tests/expression.rb +55 -0
  221. data/tests/graph_layout.rb +285 -0
  222. data/tests/ia32.rb +79 -26
  223. data/tests/mips.rb +9 -2
  224. data/tests/x86_64.rb +66 -18
  225. metadata +330 -218
  226. data/lib/metasm/arm/opcodes.rb +0 -177
  227. data/lib/metasm/gui.rb +0 -23
  228. data/lib/metasm/gui/dasm_graph.rb +0 -1354
  229. data/lib/metasm/ia32.rb +0 -14
  230. data/lib/metasm/ia32/opcodes.rb +0 -873
  231. data/lib/metasm/ppc/parse.rb +0 -52
  232. data/lib/metasm/x86_64.rb +0 -12
  233. data/lib/metasm/x86_64/opcodes.rb +0 -118
  234. data/samples/gdbclient.rb +0 -583
  235. data/samples/rubstop.rb +0 -399
@@ -0,0 +1,108 @@
1
+ SerialStruct
2
+ ============
3
+
4
+ This is a helper class to handle binary packed data, especially to
5
+ represent <core/ExeFormat.txt> structures.
6
+
7
+ The implementation is in `metasm/exe_format/serialstruct.rb`.
8
+
9
+ Basics
10
+ ------
11
+
12
+ The class defines some class methods, such as:
13
+
14
+ * `dword`
15
+ * `byte`
16
+ * `strz`
17
+
18
+ These methods can be used directly in subclass definitions, e.g.
19
+
20
+ class MyHeader < SerialStruct
21
+ dword :signature
22
+ dword :length
23
+ end
24
+
25
+ This will associate the sequence of fields to this structure, which
26
+ is used in the `#encode` and `#decode` methods.
27
+ These methods rely on an <core/ExeFormat.txt> instance to define
28
+ the corresponding `decode_dword` and `encode_dword` methods.
29
+
30
+ You can then simply call:
31
+
32
+ hdr = MyHeader.decode(myexefmt)
33
+
34
+ which will call `myexefmt.decode_word` twice to populate the
35
+ `signature` and `length` fields of the MyHeader.instance.
36
+
37
+ You can also redefine the `#decode` method to handle special cases.
38
+
39
+
40
+ The fields defined this way can be assigned a default value that
41
+ will be used when encoding the structure. The syntax is:
42
+
43
+ dword :fieldname, defaultvalue
44
+
45
+ If you have a long sequence of identically-typed fields, you can use
46
+ the plural form:
47
+
48
+ dwords :f1, :f2, :f3, :f4
49
+
50
+ To define your own field types, you should create a new subclass and call the
51
+ `new_field` class method. For integral fields, use `new_int_field(fldname)`
52
+ that will automatically define the decode/encode routines, and create the
53
+ plural form.
54
+
55
+ class MyStruct < SerialStruct
56
+ new_int_field :zword
57
+ zwords :offset, :length
58
+ end
59
+
60
+
61
+ Symbolic constants
62
+ ------------------
63
+
64
+ The class has built-in support for symbolic constants and bit fields.
65
+
66
+ For exemple, suppose you have a numeric `:type` field, which corresponds
67
+ to a set of numeric constants `TYPE_FOO TYPE_BAR TYPE_LOL`. You can use:
68
+
69
+ TYPES = { 2 => 'FOO', 3 => 'BAR', 4 => 'LOL' }
70
+
71
+ dword :type
72
+ fld_enum :type, TYPES
73
+
74
+ With this, the standard '#decode' method will first decode the numeric value
75
+ of the field, and then lookup the value in the enum hash to find the
76
+ corresponding symbol, and use it as the field value.
77
+ If there is no mapping, the numeric value is retained. The reverse operation
78
+ is done with `#encode`.
79
+
80
+ For the bitfields, the method is `fld_bits`, and the software will try to
81
+ match *OR-ed* values from the bitfield to generate an array of symbols.
82
+
83
+ BITS = { 1 => 'B1', 2 => 'B2', 4 => 'B4' }
84
+
85
+ dword :foo
86
+ fld_bits :foo, BITS
87
+
88
+ which will give, for the numeric value `0x15`, `["B1", "B4", 0x10]`
89
+
90
+ The hashes used for fld_bits or fld_enum can be dynamically determined, by
91
+ using the block version of those methods. The block will receive the ExeFormat
92
+ instance and the SerialStruct instance, and should return the Hash.
93
+ This can be useful when a bitfield signification varies given some generic
94
+ property of the exe, eg the target architecture.
95
+
96
+
97
+ Hooks
98
+ -----
99
+
100
+ It is also possible to define a hook that will be called at some point during
101
+ the object binary decoding. It will receive the exe and struct instances.
102
+
103
+ class Header < SerialStruct
104
+ dword :machine
105
+ decode_hook { |exe, hdr| raise "unknown machine" if hdr.machine > 4 }
106
+ dword :bodylength
107
+ end
108
+
@@ -0,0 +1,145 @@
1
+ VirtualString
2
+ =============
3
+
4
+ This class is an abstract representation of an arbitrary sized byte array
5
+ with methods to load parts of it on demand. It is useful to represent
6
+ a program virtual memory and allow metasm to work on it while only reading
7
+ bytes from it when actually needed.
8
+
9
+ The base class is defined in `metasm/os/main.rb`.
10
+
11
+
12
+ Basics
13
+ ------
14
+
15
+ The API of the object is designed to be compatible with a standard String (ASCII-8BIT).
16
+ The main restriction is that the size of this string cannot be changed:
17
+ concatenation / shortening is not supported.
18
+
19
+ The main operation on the object should be `[]` and `[]=`, that is,
20
+ reading some subpart of the string, or overwriting some substring.
21
+ The arguments are the same as for a String, with the exception that
22
+ rewrite raises an IndexError if the rewriting would change the string
23
+ length.
24
+
25
+ A few methods are written specifically with the VirtualString semantics,
26
+ others are redirected to a temporary real String generated with `realstring`.
27
+
28
+ The VirtualString works with a `page` concept, that represents some arbitrary
29
+ chunks of data that can be actually read from the underlying target, e.g. a
30
+ memory page (4096 bytes) when mapping a process virtual address space.
31
+ Instances get to define a `pagelength` sound for the specific implementation.
32
+
33
+ Whenever a substring is requested from a VirtualString, if the substring
34
+ length is less than the page size, an actual read is made and a String is
35
+ returned.
36
+
37
+ If the length is greater however, a new VirtualString is created to map this
38
+ new *view* without actually reading.
39
+
40
+ To force the conversion to a String, use the `realstring` or `to_str` method.
41
+ The latter is prefered, as it works on both Strings and VirtualStrings.
42
+
43
+ To force the creation of a new VirtualString, use the `dup(start, len)` method.
44
+
45
+ When reading actual bytes, a local page cache is used. By default is has only 4
46
+ pages, and can be invalidated using `invalidate`.
47
+ The cache is automatically invalidated when part of the string is written to.
48
+
49
+ The VirtualString may index *invalid* pages (e.g. unmapped memory range in a
50
+ process address space) ; you can check that with `page_invalid?` with an index
51
+ as parameter.
52
+
53
+
54
+ Creation
55
+ --------
56
+
57
+ To create your own flavor of VirtualString, you must:
58
+
59
+ * define your subclass that inherits from `VirtualString`
60
+ * define your initializer, that takes whatever arguments make sense (e.g. a
61
+ *pid*, *handle*, Socket..)
62
+ * your initializer must call super(a, l) with arguments:
63
+ ** current view absolute address (should default to 0), will be saved in
64
+ `@addr_start`
65
+ ** current view size (should default to something sensible, like 1<<32), saved
66
+ in `@length`
67
+ * your initializer can override the default page size by defining the
68
+ `@pagelength` variable.
69
+ * implement a `dup` method that takes optional arguments:
70
+ ** new base address (default=`@addr_start`)
71
+ ** new length (default=`@length`)
72
+ ** returns a new instance of your class mapping over the specified window
73
+ * implement a `get_page` method, whose arguments are:
74
+ ** absolute page address (will always be page-aligned)
75
+ ** optional length, default=`@pagelength`
76
+ ** returns a String of `length` bytes, or `nil` (e.g. unmapped area)
77
+ * optionally implement a `rewrite_at` method, to make your string writeable.
78
+ Arguments are the absolute write address, and the data to write there (a String).
79
+
80
+ Feel free to override any other method with an optimized version.
81
+ For exemple, the default `realstring` will repeatadly call `get_page` with
82
+ each page in the range 0..`length`, you may have a more efficient alternative.
83
+
84
+ You can alter the cache size by rewriting the `@pagecache_len` variable
85
+ **after** calling `super()` in `initialize`. The default value is 4, which you
86
+ may want to increase.
87
+
88
+ See the `WindowsRemoteString` source for a simple exemple (ignore the `open_pid`
89
+ method).
90
+
91
+ Standard subclasses
92
+ -------------------
93
+
94
+ VirtualFile
95
+ ###########
96
+
97
+ Defined in `metasm/os/main.rb`.
98
+
99
+ This class maps over an open file descriptor, and allows reading data on-demand.
100
+ It implements the `read` class method, similar to `File.read`, with the
101
+ file opened in binary mode. For a small file (<=4096), the content is
102
+ directly returned, otherwise a VirtualString is created.
103
+
104
+ This class is used by the default <core/ExeFormat.txt> `decode_file[_header]`
105
+ methods.
106
+
107
+
108
+ LinuxRemoteString
109
+ #################
110
+
111
+ Defined in `metasm/os/linux.rb`.
112
+
113
+ This class maps over the virtual memory of a Linux process.
114
+ Accesses are done through the `/proc/<pid>/mem` for reading.
115
+ The linux kernel requires that the target process be ptraced before we can
116
+ read this file, so the object will use the debugger instance passed to the
117
+ constructor, or create a new <core/PTrace.txt> object to stop the process
118
+ and read its memory during `get_page`.
119
+
120
+ If a <core/Debugger.txt> object was given, `get_page` will return `nil` if the
121
+ debugger indicates that the target is not stopped.
122
+
123
+ Writing is done through `PTrace#writemem` using `PTRACE_POKEDATA`.
124
+
125
+
126
+ WindowsRemoteString
127
+ ###################
128
+
129
+ Defined in `metasm/os/windows.rb`.
130
+
131
+ This class maps over the virtual memory of a Windows process.
132
+
133
+ The memory accesses are done using the `Read/WriteProcessMemory` API.
134
+
135
+ The class method `open_pid` is defined, that will try to `OpenProcess`
136
+ first in read/write, and fallback to read-only mode.
137
+
138
+
139
+ GdbRemoteString
140
+ ###############
141
+
142
+ Defined in `metasm/os/gdbremote.rb`.
143
+
144
+ Maps over the virtual memory of a remote process debugged with a
145
+ <core/GdbClient.txt> instance, using `setmem` and `getmem`.
@@ -0,0 +1,61 @@
1
+ WindowsExports
2
+ ==============
3
+
4
+ This class is defined in `metasm/os/windows_exports.rb`
5
+
6
+ It defines an `EXPORT` constant, a Hash, whose keys
7
+ are the standard win32 API symbol names, and values
8
+ are the library name where you can find this symbol.
9
+
10
+ The equivalent for GNU/Linux is <core/GNUExports.txt>
11
+
12
+ Usage
13
+ -----
14
+
15
+ The main usage of this class is the automatic generation
16
+ of the <core/PE.txt> import directories from the
17
+ external symbols referenced by a binary during compilation.
18
+
19
+ This is done in the `automagic_symbols` method.
20
+
21
+ Symbols
22
+ -------
23
+
24
+ The current version holds the symbols available in the
25
+ Windows XP SP2 32-bit standard libraries:
26
+
27
+ * `ntdll`
28
+ * `kernel32`
29
+ * `user32`
30
+ * `gdi32`
31
+ * `advapi32`
32
+ * `ws2_32`
33
+ * `msvcrt`
34
+ * `comdlg32`
35
+ * `psapi`
36
+
37
+
38
+ Ruby symbols are also defined, from `msvcrt-ruby18`.
39
+
40
+
41
+ Ruby library name
42
+ -----------------
43
+
44
+ On creation, the current ruby library name is inferred
45
+ from the `RUBY_PLATFORM` constant, in an effort to
46
+ try to use the available ruby library filename.
47
+
48
+ The only transformation supported now is to rewrite
49
+ the ruby version number appearing in the filename for
50
+ msvcrt-compiled binaries, so that you get the correct
51
+ `msvcrt-ruby192` name for exemple under ruby1.9.
52
+
53
+ This is implemented in the `patch_rubylib_to_current_interpreter`
54
+ method (which is aptly named).
55
+
56
+ Warning
57
+ #######
58
+
59
+ Note that binaries compiled this way will not work on
60
+ other machines where the exact same library is unavailable.
61
+
@@ -0,0 +1 @@
1
+ See <core_classes.txt>
@@ -1,3 +1,6 @@
1
- span.quote {
2
- font-family: monospace;
3
- }
1
+ body { background-color: #002b36; color: #839496; }
2
+ a { text-decoration: none; color:#268bd2; }
3
+ a:hover, a:visited, a:active { color:#2aa198; }
4
+ a:hover { text-decoration: underline; }
5
+ a.brokenlink { color: #dc322f; }
6
+ span.quote { font-family: monospace; }
@@ -0,0 +1,327 @@
1
+ The Debugger
2
+ ============
3
+
4
+ Metasm includes functionnalities to communicate with some operating system
5
+ debugging infrastructures.
6
+
7
+ Currently supported:
8
+
9
+ * Windows (x86, x64)
10
+ * Linux (x86, x64)
11
+
12
+ Generic interface
13
+ -----------------
14
+
15
+ Metasm exposes a generic API that will work on all supported platforms.
16
+
17
+ This interface is implemented using system-specific classes, that you may
18
+ directly access for tighter control.
19
+
20
+ Global operating system interface is available through the <core/OS.txt> class.
21
+ It has methods to enumerate live processes, spawn new processes, and provide
22
+ process/thread access.
23
+
24
+ Individual process debugging is wrapped in the <core/Debugger.txt> class.
25
+
26
+ Windows debugging
27
+ -----------------
28
+
29
+ The windows debugger relies on <core/DynLdr.txt> to interface directly with
30
+ the *Win32_API*.
31
+
32
+ Support exists for 32-bit process, and, using a 64-bit ruby interpreter, for
33
+ 64-bit process and WoW64-process debugging.
34
+
35
+ The operating system wrapper is <core/WinOS.txt>, the debugger is
36
+ <core/WinDebugger.txt>.
37
+
38
+ Linux debugging
39
+ ---------------
40
+
41
+ The linux debugger relies on the */proc* filesystem for process and thread
42
+ enumeration, process memory access, etc ; and on the *ptrace* syscall
43
+ (through `Kernel.syscall()`) for actual debugging.
44
+
45
+ You'll need a 64-bit ruby interpreter to debug 64-bit target processes.
46
+
47
+ The operating system wrapper is <core/LinOS.txt>, the debugger is
48
+ <core/LinDebugger.txt>.
49
+
50
+ Due to linux limitations, the memory of a process is accessible for read or
51
+ write only if one of its thread is stopped by the debugger.
52
+
53
+ Remote debugging
54
+ ----------------
55
+
56
+ Metasm also implements a client for the *GdbServer* protocol.
57
+ See <core/GdbRemoteDebugger.txt>.
58
+
59
+
60
+ The debugging interface
61
+ =======================
62
+
63
+ The `Debugger` object is a generic interface to the low-level operating
64
+ system interface. It manages all the generic machinery to handle multi-process
65
+ and multi-thread debugging, conditional breakpoints, symbols, etc.
66
+
67
+ The debugger is asynchronous: you can issue a command to `run` the target
68
+ process, do whatever you want in your script, and check from time to time
69
+ if some debugging event happened, and then handle it.
70
+
71
+ The debugger object maintains some attributes for the target process.
72
+ The most important are:
73
+ * a <core/Disassembler.txt>
74
+ * an accessor to the process memory, as a <core/VirtualString.txt>
75
+
76
+ The process memory and register set is cached for faster access, use
77
+ the `invalidate` method to force a refresh. The debugger will automatically
78
+ invalidate on any debug event.
79
+
80
+ Multi-process / multi-thread
81
+ ----------------------------
82
+
83
+ The `Debugger` offers accessor for the state of the current active thread.
84
+
85
+ You can change the current active thread or process by using the `pid` and
86
+ `tid` accessors.
87
+
88
+ When handling a debugging event, the debugger will accept any event in
89
+ any of the debuggee, and return in the context of this thread.
90
+
91
+ To enumerate the processes or threads, use one of the following functions,
92
+ that will execute the block after setting pid/tid to all available value:
93
+ * `each_pid` (all pids)
94
+ * `each_tid` (all tids of current pid)
95
+ * `each_pid_tid` (all tids of all pids)
96
+
97
+ By default, the debugger will not attach to child process spawned by a
98
+ debuggee. To do so, set the `trace_children` variable before you attach.
99
+ On Windows, this variable only has effect when set before a `create_process`.
100
+
101
+ Target manipulation
102
+ -------------------
103
+
104
+ You can check the state of the debuggee through the `state` accessor.
105
+ It can have one of the 3 values:
106
+ * `:stopped`, when the thread has stopped due to a debug event (breakpoint hit,
107
+ exception raised, ...)
108
+ * `:running`, when the thread is runnig
109
+ * `:dead`, when all supervised process have ended
110
+
111
+ To update the state of a `:running` process, call the `check_target` (non
112
+ blocking) or `wait_target`.
113
+
114
+ When `:stopped`, the `info` attribute can give more specific informations.
115
+ It consists of an arbitrary String.
116
+
117
+ Most of the other accessors require the target to be in the `:stopped` state.
118
+
119
+ To manipulate the value of a register, use `get_reg_value(:eax)` or
120
+ `set_reg_value(:eax,0x42)`.
121
+
122
+ To manipulate the memory, use `memory[address,length]`. You can also
123
+ use `memory_read_int(address)` to read/write integers with the target
124
+ endianness.
125
+
126
+ A shortcut method is available, through the `[]` method. When used with one
127
+ argument, it is interpreted as a register name to be retrieved, with two
128
+ arguments it is a memory range.
129
+
130
+ dbg[:eax] # read the 'eax' register
131
+ dbg[0x1234, 10] = 'hohohohoho' # patch the christmas spirit in memory
132
+
133
+ To optimize reading large sections of the process memory that you know to be
134
+ in a single memory mapping, use the `read_mapped_range(addr, len)` method ;
135
+ it will try to use a single OS-specific call instead of reading the range one
136
+ 4096-byte page at a time. This method returns a String directly.
137
+
138
+ You can manipulate complex expressions using the `resolve(expr)` method.
139
+ It accepts a String representation of an arbitrary expression. Any register,
140
+ symbol (function name) and/or memory dereference can be used inside.
141
+ You can puts an `:` before a register name to force it to be parsed as a
142
+ register and not a symbol ; this can be useful for non-standard registers.
143
+
144
+ The memory functions accept such expressions in place of addresses most of the
145
+ time. For exemple:
146
+
147
+ dbg["some_pointer + eax + 4*[ecx]", 3] = 'foo'
148
+
149
+ Running the target
150
+ ------------------
151
+
152
+ When the debuggee is `:stopped`, you can resume execution using these methods:
153
+ * `continue`: resume execution until the next exception/breakpoint hit (alias: `run`)
154
+ * `singlestep`: executes one CPU instruction and break in the debugger
155
+ * `stepover`: same as singlestep, except if the instruction pointer is on a
156
+ subfunction call, then break only after the function returns.
157
+
158
+ These methods will set the target to `:running` and return immediately.
159
+ To wait for the end of the `singlestep`, you can use `wait_target`, it
160
+ will block until a debug event happens. Usually that means that the instruction
161
+ has been executed, but that could also mean that another thread/process under
162
+ supervision ran into a breakpoint, or that the instruction raised an exception.
163
+
164
+ If you have an active loop to run in your script, you can also call
165
+ `check_target` periodically, and check the value of `dbg.state` to detect
166
+ a debug event.
167
+
168
+ For convenience, you can call `continue_wait` that will call `continue` and
169
+ `wait_target`, `singlestep_wait`, etc.
170
+
171
+ When calling `singlestep`, you can pass a ruby block that will run when the
172
+ singlestep succeeds, even if many other debug events happen inbetween.
173
+
174
+ Breakpoints
175
+ -----------
176
+
177
+ Depending on the target architecture, you can have access up to three types of
178
+ breakpoints: software, hardware, and memory.
179
+
180
+ A hardware breakpoint uses features of the cpu to gain control at a given time.
181
+ On x86/x64, they have these characteristics:
182
+ * you can have at most 4 hardware breakpoints active at one time
183
+ * a hardware breakpoint is specific to a thread, as they use special registers
184
+ * they can be set up to break on execution of a specific address, or
185
+ on read or writes of 1, 2 or 4 bytes in memory at a specific address
186
+
187
+ A software breakpoint consists in replacing an instruction in the memory space
188
+ of the target with a specific pattern that will raise an exception when run.
189
+ The advantages over hardware breakpoints are that you can have as many as you
190
+ wish at the same time. The disadvantage is that it changes the target address
191
+ space, which may be a problem ; also it means that the breakpoint is active
192
+ for all threads of a given process.
193
+
194
+ Finally a memory breakpoint uses the virtual memory mechanism to take control
195
+ on reads or writes on arbitrary memory ranges.
196
+
197
+ Breakpoints
198
+ ***********
199
+
200
+ All breakpoints can be conditional. This means that whenever a breakpoint
201
+ hits, the debugger will evaluate an expression to determine if it should
202
+ ignore the breakpoint or handle it and give control to your script.
203
+
204
+ The expression can be any arithmetic expression, and should evaluate to 0
205
+ (ignore the breakpoint) or non-0 (break and give control to the script).
206
+
207
+ The arithmetic expression can refer to any register, memory dereference,
208
+ or symbol ; and additionally the special registers `:tid` and `:pid` can
209
+ be used to check the current debuggee context (eg for a thread-specific
210
+ breakpoint).
211
+
212
+ All breakpoints can have a callback. This is a ruby block that will be run
213
+ whenever the breakpoint hits (and has a valid condition if applicable).
214
+ Your callback can do anything, including resuming the execution of the target.
215
+
216
+ Finally, all breakpoints can be singleshot. This means that whenever the
217
+ breakpoint hits, it is deleted (it will hit only once).
218
+
219
+ Hardware breakpoint (hwbp)
220
+ **************************
221
+
222
+ A hardware breakpoint is set using:
223
+
224
+ hwbp(addr, mtype=:x, mlen=1, oneshot=false, cond=nil, &callback)
225
+
226
+ * addr is the address of the breakpoint. Can be an expression (resolved now)
227
+ * mtype specifies the type of hw breakpoint: `:r`, `:w`, `:x`
228
+ * mlen specifies the size of the area. `:r/:w` mtype only, and must be in [1, 2, 4, 8]
229
+ * oneshot is a boolean, set to true for a singleshot breakpoint
230
+ * cond is the conditional expression, String or Expression (resolved at evaluation time)
231
+ * callback is a ruby block to run when the breakpoint hits
232
+
233
+ Exemple:
234
+
235
+ # run the block whenever any of the 4 bytes pointed by eax+12 are read:
236
+ dbg.hwbp('eax+12', :r, 4) { puts "dont read me bro!" }
237
+
238
+
239
+ Software breakpoint (bpx)
240
+ *************************
241
+
242
+ To set a software breakpoint, use:
243
+
244
+ bpx(addr, oneshot=false, cond=nil, &callback)
245
+
246
+ Arguments are the same as `hwbp`.
247
+
248
+ A software breakpoint involves the modification of the target address space,
249
+ which impacts all threads of the process.
250
+ On a hit, only the affected thread is stopped.
251
+
252
+ When resuming the thread, it should see the original instruction that was
253
+ overwritten. If we restore temporarily the original code, there exist a race
254
+ condition, where another thread could use this window to run through the
255
+ code without hitting the breakpoint. To avoid that, metasm will try, when
256
+ possible, to emulate the effects of the original instruction on the active
257
+ thread. This works only when metasm knows the full effects of the replaced
258
+ instruction. If this is not the case, the old method to revert the original
259
+ code, run the target thread in singlestep, and re-insert the breakpoint is
260
+ used.
261
+
262
+ Memory breakpoint (bpm)
263
+ ***********************
264
+
265
+ To define a memory breakpoint, use:
266
+
267
+ bpm(addr, mtype=:r, mlen=4096, oneshot=false, cond=nil, &callback)
268
+
269
+ * mtype is `:r` or `:w`
270
+
271
+ A memory breakpoint is split in pages and managed internally by the framework.
272
+ If the range does not fit on page boundary, an implicit condition is added to
273
+ check that the exception actually happens inside the watched range, and ignores
274
+ the breakpoint otherwise.
275
+
276
+ Currently, memory breakpoints are not implemented.
277
+ Maybe someday on windows, using guard pages.
278
+
279
+ Breakpoint management
280
+ *********************
281
+
282
+ When setting up a breakpoint, a `Breakpoint` object is returned.
283
+ It shall be used to remove the breakpoint, using `del_bp`.
284
+
285
+ To enumerate breakpoints, use `all_breakpoints(addr=nil)`.
286
+ This will return an Array of the breakpoints defined for the current thread, ie
287
+ the current thread hwbp, the current process bpx, and the current process bpm
288
+ list.
289
+
290
+ Callbacks
291
+ ---------
292
+
293
+ It is possible to define a callback to be run when a specific debug event
294
+ occurs. They are a ruby Proc that will be called when the event occurs, in
295
+ the context of the right thread, and will receive a Hash of information on the
296
+ event specifics. The Hash keys depend on the callback.
297
+
298
+ The list is:
299
+ * `callback_singlestep`
300
+ * `callback_bpx`
301
+ * `callback_hwbp`
302
+ * `callback_bpm`
303
+ * `callback_exception` - any other exception (memory corruption, division by 0,
304
+ breakpoint exception not matching any bpx, ...)
305
+ * `callback_newthread`
306
+ * `callback_endthread`
307
+ * `callback_newprocess`
308
+ * `callback_endprocess`
309
+
310
+ Linux-specific:
311
+ * `callback_syscall` - stopped after a `ptrace_syscall`
312
+ * `callback_exec` - target ran the `exec` syscall
313
+ * `callback_branch` - branch trace mode, experimental kernel feature, needs
314
+ recent CPU
315
+
316
+ Windows-specific:
317
+ * `callback_loadlibrary`
318
+ * `callback_unloadlibrary`
319
+ * `callback_debugstring`
320
+ * `callback_ripevent`
321
+
322
+ A few variables are available to change the default mode of stopping on any
323
+ debug event:
324
+ * `pass_all_exceptions` - do not stop on unknown exceptions, forward them to
325
+ the debuggee
326
+ * `ignore_newthread` - do not stop on new thread creation
327
+ * `ignore_endthread` - do not stop on thread deletion