metasm 1.0.0 → 1.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (276) hide show
  1. checksums.yaml +7 -0
  2. checksums.yaml.gz.sig +0 -0
  3. data.tar.gz.sig +3 -0
  4. data/.gitignore +3 -0
  5. data/.hgtags +3 -0
  6. data/Gemfile +3 -0
  7. data/INSTALL +61 -0
  8. data/LICENCE +458 -0
  9. data/README +29 -21
  10. data/Rakefile +10 -0
  11. data/TODO +10 -12
  12. data/doc/code_organisation.txt +3 -1
  13. data/doc/core/DynLdr.txt +247 -0
  14. data/doc/core/ExeFormat.txt +43 -0
  15. data/doc/core/Expression.txt +220 -0
  16. data/doc/core/GNUExports.txt +27 -0
  17. data/doc/core/Ia32.txt +236 -0
  18. data/doc/core/SerialStruct.txt +108 -0
  19. data/doc/core/VirtualString.txt +145 -0
  20. data/doc/core/WindowsExports.txt +61 -0
  21. data/doc/core/index.txt +1 -0
  22. data/doc/style.css +6 -3
  23. data/doc/usage/debugger.txt +327 -0
  24. data/doc/usage/index.txt +1 -0
  25. data/doc/use_cases.txt +2 -2
  26. data/metasm.gemspec +23 -0
  27. data/{lib/metasm.rb → metasm.rb} +15 -3
  28. data/{lib/metasm → metasm}/compile_c.rb +15 -9
  29. data/metasm/cpu/arc.rb +8 -0
  30. data/metasm/cpu/arc/decode.rb +404 -0
  31. data/metasm/cpu/arc/main.rb +191 -0
  32. data/metasm/cpu/arc/opcodes.rb +588 -0
  33. data/metasm/cpu/arm.rb +14 -0
  34. data/{lib/metasm → metasm/cpu}/arm/debug.rb +2 -2
  35. data/{lib/metasm → metasm/cpu}/arm/decode.rb +15 -18
  36. data/{lib/metasm → metasm/cpu}/arm/encode.rb +23 -8
  37. data/{lib/metasm → metasm/cpu}/arm/main.rb +3 -6
  38. data/metasm/cpu/arm/opcodes.rb +324 -0
  39. data/{lib/metasm → metasm/cpu}/arm/parse.rb +25 -13
  40. data/{lib/metasm → metasm/cpu}/arm/render.rb +2 -2
  41. data/metasm/cpu/arm64.rb +15 -0
  42. data/metasm/cpu/arm64/debug.rb +38 -0
  43. data/metasm/cpu/arm64/decode.rb +285 -0
  44. data/metasm/cpu/arm64/encode.rb +41 -0
  45. data/metasm/cpu/arm64/main.rb +105 -0
  46. data/metasm/cpu/arm64/opcodes.rb +232 -0
  47. data/metasm/cpu/arm64/parse.rb +20 -0
  48. data/metasm/cpu/arm64/render.rb +95 -0
  49. data/{lib/metasm/mips/compile_c.rb → metasm/cpu/bpf.rb} +4 -2
  50. data/metasm/cpu/bpf/decode.rb +110 -0
  51. data/metasm/cpu/bpf/main.rb +60 -0
  52. data/metasm/cpu/bpf/opcodes.rb +81 -0
  53. data/metasm/cpu/bpf/render.rb +30 -0
  54. data/{lib/metasm/ppc.rb → metasm/cpu/cy16.rb} +2 -4
  55. data/metasm/cpu/cy16/decode.rb +247 -0
  56. data/metasm/cpu/cy16/main.rb +63 -0
  57. data/metasm/cpu/cy16/opcodes.rb +78 -0
  58. data/metasm/cpu/cy16/render.rb +30 -0
  59. data/metasm/cpu/dalvik.rb +11 -0
  60. data/{lib/metasm → metasm/cpu}/dalvik/decode.rb +34 -34
  61. data/{lib/metasm → metasm/cpu}/dalvik/main.rb +71 -4
  62. data/{lib/metasm → metasm/cpu}/dalvik/opcodes.rb +21 -12
  63. data/{lib/metasm/mips.rb → metasm/cpu/ebpf.rb} +3 -4
  64. data/metasm/cpu/ebpf/debug.rb +61 -0
  65. data/metasm/cpu/ebpf/decode.rb +142 -0
  66. data/metasm/cpu/ebpf/main.rb +58 -0
  67. data/metasm/cpu/ebpf/opcodes.rb +97 -0
  68. data/metasm/cpu/ebpf/render.rb +36 -0
  69. data/metasm/cpu/ia32.rb +17 -0
  70. data/{lib/metasm → metasm/cpu}/ia32/compile_c.rb +23 -9
  71. data/{lib/metasm → metasm/cpu}/ia32/debug.rb +44 -6
  72. data/{lib/metasm → metasm/cpu}/ia32/decode.rb +342 -128
  73. data/{lib/metasm → metasm/cpu}/ia32/decompile.rb +75 -53
  74. data/{lib/metasm → metasm/cpu}/ia32/encode.rb +19 -13
  75. data/{lib/metasm → metasm/cpu}/ia32/main.rb +66 -8
  76. data/metasm/cpu/ia32/opcodes.rb +1424 -0
  77. data/{lib/metasm → metasm/cpu}/ia32/parse.rb +55 -17
  78. data/{lib/metasm → metasm/cpu}/ia32/render.rb +32 -5
  79. data/metasm/cpu/mcs51.rb +8 -0
  80. data/metasm/cpu/mcs51/decode.rb +99 -0
  81. data/metasm/cpu/mcs51/main.rb +87 -0
  82. data/metasm/cpu/mcs51/opcodes.rb +120 -0
  83. data/metasm/cpu/mips.rb +14 -0
  84. data/metasm/cpu/mips/debug.rb +42 -0
  85. data/{lib/metasm → metasm/cpu}/mips/decode.rb +59 -38
  86. data/{lib/metasm → metasm/cpu}/mips/encode.rb +4 -3
  87. data/{lib/metasm → metasm/cpu}/mips/main.rb +13 -6
  88. data/{lib/metasm → metasm/cpu}/mips/opcodes.rb +87 -18
  89. data/{lib/metasm → metasm/cpu}/mips/parse.rb +1 -1
  90. data/{lib/metasm → metasm/cpu}/mips/render.rb +1 -1
  91. data/{lib/metasm/dalvik.rb → metasm/cpu/msp430.rb} +1 -1
  92. data/metasm/cpu/msp430/decode.rb +243 -0
  93. data/metasm/cpu/msp430/main.rb +62 -0
  94. data/metasm/cpu/msp430/opcodes.rb +101 -0
  95. data/metasm/cpu/openrisc.rb +11 -0
  96. data/metasm/cpu/openrisc/debug.rb +106 -0
  97. data/metasm/cpu/openrisc/decode.rb +182 -0
  98. data/metasm/cpu/openrisc/decompile.rb +350 -0
  99. data/metasm/cpu/openrisc/main.rb +70 -0
  100. data/metasm/cpu/openrisc/opcodes.rb +109 -0
  101. data/metasm/cpu/openrisc/render.rb +37 -0
  102. data/{lib/metasm → metasm/cpu}/pic16c/decode.rb +6 -7
  103. data/{lib/metasm → metasm/cpu}/pic16c/main.rb +0 -0
  104. data/{lib/metasm → metasm/cpu}/pic16c/opcodes.rb +1 -1
  105. data/metasm/cpu/ppc.rb +11 -0
  106. data/{lib/metasm → metasm/cpu}/ppc/decode.rb +18 -37
  107. data/{lib/metasm → metasm/cpu}/ppc/decompile.rb +3 -3
  108. data/{lib/metasm → metasm/cpu}/ppc/encode.rb +2 -2
  109. data/{lib/metasm → metasm/cpu}/ppc/main.rb +23 -18
  110. data/{lib/metasm → metasm/cpu}/ppc/opcodes.rb +11 -6
  111. data/metasm/cpu/ppc/parse.rb +55 -0
  112. data/metasm/cpu/python.rb +8 -0
  113. data/metasm/cpu/python/decode.rb +116 -0
  114. data/metasm/cpu/python/main.rb +36 -0
  115. data/metasm/cpu/python/opcodes.rb +180 -0
  116. data/{lib/metasm → metasm/cpu}/sh4.rb +1 -1
  117. data/{lib/metasm → metasm/cpu}/sh4/decode.rb +50 -23
  118. data/{lib/metasm → metasm/cpu}/sh4/main.rb +38 -27
  119. data/{lib/metasm → metasm/cpu}/sh4/opcodes.rb +7 -8
  120. data/metasm/cpu/st20.rb +9 -0
  121. data/metasm/cpu/st20/decode.rb +173 -0
  122. data/metasm/cpu/st20/decompile.rb +283 -0
  123. data/metasm/cpu/st20/main.rb +37 -0
  124. data/metasm/cpu/st20/opcodes.rb +140 -0
  125. data/{lib/metasm/arm.rb → metasm/cpu/webasm.rb} +4 -5
  126. data/metasm/cpu/webasm/debug.rb +31 -0
  127. data/metasm/cpu/webasm/decode.rb +321 -0
  128. data/metasm/cpu/webasm/decompile.rb +386 -0
  129. data/metasm/cpu/webasm/encode.rb +104 -0
  130. data/metasm/cpu/webasm/main.rb +81 -0
  131. data/metasm/cpu/webasm/opcodes.rb +214 -0
  132. data/metasm/cpu/x86_64.rb +15 -0
  133. data/{lib/metasm → metasm/cpu}/x86_64/compile_c.rb +40 -25
  134. data/{lib/metasm → metasm/cpu}/x86_64/debug.rb +4 -4
  135. data/{lib/metasm → metasm/cpu}/x86_64/decode.rb +58 -15
  136. data/{lib/metasm → metasm/cpu}/x86_64/encode.rb +59 -28
  137. data/{lib/metasm → metasm/cpu}/x86_64/main.rb +18 -6
  138. data/metasm/cpu/x86_64/opcodes.rb +138 -0
  139. data/{lib/metasm → metasm/cpu}/x86_64/parse.rb +12 -4
  140. data/metasm/cpu/x86_64/render.rb +35 -0
  141. data/metasm/cpu/z80.rb +9 -0
  142. data/metasm/cpu/z80/decode.rb +286 -0
  143. data/metasm/cpu/z80/main.rb +67 -0
  144. data/metasm/cpu/z80/opcodes.rb +224 -0
  145. data/metasm/cpu/z80/render.rb +48 -0
  146. data/{lib/metasm/os/main.rb → metasm/debug.rb} +201 -407
  147. data/{lib/metasm → metasm}/decode.rb +104 -24
  148. data/{lib/metasm → metasm}/decompile.rb +804 -478
  149. data/{lib/metasm → metasm}/disassemble.rb +385 -170
  150. data/{lib/metasm → metasm}/disassemble_api.rb +684 -105
  151. data/{lib/metasm → metasm}/dynldr.rb +231 -138
  152. data/{lib/metasm → metasm}/encode.rb +20 -5
  153. data/{lib/metasm → metasm}/exe_format/a_out.rb +9 -6
  154. data/{lib/metasm → metasm}/exe_format/autoexe.rb +3 -0
  155. data/{lib/metasm → metasm}/exe_format/bflt.rb +57 -27
  156. data/{lib/metasm → metasm}/exe_format/coff.rb +35 -7
  157. data/{lib/metasm → metasm}/exe_format/coff_decode.rb +70 -23
  158. data/{lib/metasm → metasm}/exe_format/coff_encode.rb +24 -22
  159. data/{lib/metasm → metasm}/exe_format/dex.rb +26 -8
  160. data/{lib/metasm → metasm}/exe_format/dol.rb +1 -0
  161. data/{lib/metasm → metasm}/exe_format/elf.rb +108 -58
  162. data/{lib/metasm → metasm}/exe_format/elf_decode.rb +202 -36
  163. data/{lib/metasm → metasm}/exe_format/elf_encode.rb +126 -32
  164. data/metasm/exe_format/gb.rb +65 -0
  165. data/metasm/exe_format/javaclass.rb +424 -0
  166. data/{lib/metasm → metasm}/exe_format/macho.rb +218 -16
  167. data/{lib/metasm → metasm}/exe_format/main.rb +28 -3
  168. data/{lib/metasm → metasm}/exe_format/mz.rb +2 -0
  169. data/{lib/metasm → metasm}/exe_format/nds.rb +7 -4
  170. data/{lib/metasm → metasm}/exe_format/pe.rb +96 -11
  171. data/metasm/exe_format/pyc.rb +167 -0
  172. data/{lib/metasm → metasm}/exe_format/serialstruct.rb +67 -14
  173. data/{lib/metasm → metasm}/exe_format/shellcode.rb +7 -3
  174. data/metasm/exe_format/shellcode_rwx.rb +114 -0
  175. data/metasm/exe_format/swf.rb +205 -0
  176. data/metasm/exe_format/wasm.rb +402 -0
  177. data/{lib/metasm → metasm}/exe_format/xcoff.rb +7 -7
  178. data/metasm/exe_format/zip.rb +335 -0
  179. data/metasm/gui.rb +13 -0
  180. data/{lib/metasm → metasm}/gui/cstruct.rb +35 -41
  181. data/{lib/metasm → metasm}/gui/dasm_coverage.rb +11 -11
  182. data/{lib/metasm → metasm}/gui/dasm_decomp.rb +177 -114
  183. data/{lib/metasm → metasm}/gui/dasm_funcgraph.rb +0 -0
  184. data/metasm/gui/dasm_graph.rb +1754 -0
  185. data/{lib/metasm → metasm}/gui/dasm_hex.rb +16 -12
  186. data/{lib/metasm → metasm}/gui/dasm_listing.rb +43 -28
  187. data/{lib/metasm → metasm}/gui/dasm_main.rb +360 -77
  188. data/{lib/metasm → metasm}/gui/dasm_opcodes.rb +5 -19
  189. data/{lib/metasm → metasm}/gui/debug.rb +109 -34
  190. data/{lib/metasm → metasm}/gui/gtk.rb +174 -44
  191. data/{lib/metasm → metasm}/gui/qt.rb +14 -4
  192. data/{lib/metasm → metasm}/gui/win32.rb +180 -43
  193. data/{lib/metasm → metasm}/gui/x11.rb +59 -59
  194. data/{lib/metasm → metasm}/main.rb +421 -286
  195. data/metasm/os/emulator.rb +175 -0
  196. data/{lib/metasm/os/remote.rb → metasm/os/gdbremote.rb} +146 -54
  197. data/{lib/metasm → metasm}/os/gnu_exports.rb +1 -1
  198. data/{lib/metasm → metasm}/os/linux.rb +628 -151
  199. data/metasm/os/main.rb +335 -0
  200. data/{lib/metasm → metasm}/os/windows.rb +151 -58
  201. data/{lib/metasm → metasm}/os/windows_exports.rb +141 -0
  202. data/{lib/metasm → metasm}/parse.rb +49 -36
  203. data/{lib/metasm → metasm}/parse_c.rb +405 -246
  204. data/{lib/metasm → metasm}/preprocessor.rb +71 -41
  205. data/{lib/metasm → metasm}/render.rb +14 -38
  206. data/misc/hexdump.rb +4 -3
  207. data/misc/lint.rb +58 -0
  208. data/misc/objdiff.rb +4 -1
  209. data/misc/objscan.rb +1 -1
  210. data/misc/openrisc-parser.rb +79 -0
  211. data/misc/txt2html.rb +9 -7
  212. data/samples/bindiff.rb +3 -4
  213. data/samples/dasm-plugins/bindiff.rb +15 -0
  214. data/samples/dasm-plugins/bookmark.rb +133 -0
  215. data/samples/dasm-plugins/c_constants.rb +57 -0
  216. data/samples/dasm-plugins/colortheme_solarized.rb +125 -0
  217. data/samples/dasm-plugins/cppobj_funcall.rb +60 -0
  218. data/samples/dasm-plugins/dasm_all.rb +70 -0
  219. data/samples/dasm-plugins/demangle_cpp.rb +31 -0
  220. data/samples/dasm-plugins/deobfuscate.rb +251 -0
  221. data/samples/dasm-plugins/dump_text.rb +35 -0
  222. data/samples/dasm-plugins/export_graph_svg.rb +86 -0
  223. data/samples/dasm-plugins/findgadget.rb +75 -0
  224. data/samples/dasm-plugins/hl_opcode.rb +32 -0
  225. data/samples/dasm-plugins/hotfix_gtk_dbg.rb +19 -0
  226. data/samples/dasm-plugins/imm2off.rb +34 -0
  227. data/samples/dasm-plugins/match_libsigs.rb +93 -0
  228. data/samples/dasm-plugins/patch_file.rb +95 -0
  229. data/samples/dasm-plugins/scanfuncstart.rb +36 -0
  230. data/samples/dasm-plugins/scanxrefs.rb +29 -0
  231. data/samples/dasm-plugins/selfmodify.rb +197 -0
  232. data/samples/dasm-plugins/stringsxrefs.rb +28 -0
  233. data/samples/dasmnavig.rb +1 -1
  234. data/samples/dbg-apihook.rb +24 -9
  235. data/samples/dbg-plugins/heapscan.rb +283 -0
  236. data/samples/dbg-plugins/heapscan/compiled_heapscan_lin.c +155 -0
  237. data/samples/dbg-plugins/heapscan/compiled_heapscan_win.c +128 -0
  238. data/samples/dbg-plugins/heapscan/graphheap.rb +616 -0
  239. data/samples/dbg-plugins/heapscan/heapscan.rb +709 -0
  240. data/samples/dbg-plugins/heapscan/winheap.h +174 -0
  241. data/samples/dbg-plugins/heapscan/winheap7.h +307 -0
  242. data/samples/dbg-plugins/trace_func.rb +214 -0
  243. data/samples/disassemble-gui.rb +48 -7
  244. data/samples/disassemble.rb +31 -6
  245. data/samples/dump_upx.rb +24 -12
  246. data/samples/dynamic_ruby.rb +35 -27
  247. data/samples/elfencode.rb +15 -0
  248. data/samples/emubios.rb +251 -0
  249. data/samples/emudbg.rb +127 -0
  250. data/samples/exeencode.rb +6 -5
  251. data/samples/factorize-headers-peimports.rb +1 -1
  252. data/samples/lindebug.rb +186 -391
  253. data/samples/metasm-shell.rb +68 -57
  254. data/samples/peldr.rb +2 -2
  255. data/tests/all.rb +1 -1
  256. data/tests/arc.rb +26 -0
  257. data/tests/dynldr.rb +22 -4
  258. data/tests/expression.rb +57 -0
  259. data/tests/graph_layout.rb +285 -0
  260. data/tests/ia32.rb +80 -26
  261. data/tests/mcs51.rb +27 -0
  262. data/tests/mips.rb +10 -3
  263. data/tests/preprocessor.rb +18 -0
  264. data/tests/x86_64.rb +66 -18
  265. metadata +465 -219
  266. metadata.gz.sig +2 -0
  267. data/lib/metasm/arm/opcodes.rb +0 -177
  268. data/lib/metasm/gui.rb +0 -23
  269. data/lib/metasm/gui/dasm_graph.rb +0 -1354
  270. data/lib/metasm/ia32.rb +0 -14
  271. data/lib/metasm/ia32/opcodes.rb +0 -872
  272. data/lib/metasm/ppc/parse.rb +0 -52
  273. data/lib/metasm/x86_64.rb +0 -12
  274. data/lib/metasm/x86_64/opcodes.rb +0 -118
  275. data/samples/gdbclient.rb +0 -583
  276. data/samples/rubstop.rb +0 -399
@@ -0,0 +1,43 @@
1
+ ExeFormat
2
+ =========
3
+
4
+ This class is the parent of all executable format handlers.
5
+
6
+ It is defined in `metasm/exe_format/main.rb`.
7
+
8
+ It defines some standard shortcut functions, such as:
9
+
10
+ * `Exe.decode_file(filename)`
11
+ * `Exe.assemble(cpu,asm_source)`
12
+ * `Exe.compile_c(cpu,c_source)`
13
+ * `Exe#encode_file(filename)`
14
+
15
+ These methods will instanciate a new Exe, and call the corresponding
16
+ methods, *e.g.* `load` with the file content, and `decode`.
17
+
18
+ The handling of the different structures in the binary format should be
19
+ done using the <core/SerialStruct.txt> facility.
20
+
21
+ The subclasses are expected to implement various functions, depending on the
22
+ usage (refer to the ELF and COFF implementations for more details):
23
+
24
+ File decoding/disassembly
25
+ -------------------------
26
+
27
+ * `#decode_header`: parse the raw data in `#encoded` only to parse the file header
28
+ * `#decode`: parse all the raw data in `#encoded`
29
+ * `#cpu_from_headers`: return a <core/CPU.txt> instance according to the exe header information
30
+ * `#get_default_entrypoints`: the list of entrypoints (exported functions, etc)
31
+ * `#dump_section_header`: return a string that may be assembled to recreate the specified section
32
+ * `#section_info`: return a list of generic section informations for the disassembler
33
+
34
+
35
+ File encoding/source parsing
36
+ ----------------------------
37
+
38
+ * `#tune_prepro`: define exe-specific macros for the preprocessor (optional)
39
+ * `#parse_init`: initialize the `@cursource` array to receive the parsed asm source
40
+ * `#parse_parser_instruction`: parse exe-specific instructions, eg `.text`, `.import`...
41
+ * `#assemble`: assemble the content of the @cursource into binary section contents
42
+ * `#encode`: assemble the various sections and a binary header into `@encoded`
43
+
@@ -0,0 +1,220 @@
1
+ Expression
2
+ ==========
3
+
4
+ Metasm uses this class to represent arbitrary symbolic arithmetic expressions, e.g.
5
+ * `42`
6
+ * `eax + 12`
7
+ * `loc_4228h + 4*ebx - 12`
8
+
9
+ These expressions can include `Integers`, `Symbols`, and `Strings`.
10
+
11
+ The symbols and strings represent arbitrary variables, with the convention that
12
+ strings represent fixed quantities (eg addresses, labels), whereas symbols
13
+ represent more variable stuff (eg register values).
14
+
15
+ There is also a special symbol that may be used, `:unknown`, to represent a
16
+ value that is known to be unknown. See the `reduce` section.
17
+
18
+ See also <core/Indirection.txt>.
19
+
20
+ The Expression class holds all methods relative to Integer binary manipulation,
21
+ that is `encoding` and `decoding` from/to a binary blob (see also
22
+ <core/EncodedData.txt>)
23
+
24
+
25
+ Members
26
+ -------
27
+
28
+ Expressions hold exactly 3 members:
29
+ * `lexpr`, the left-hand side of the expression
30
+ * `rexpr`, the right-hand side
31
+ * `op`, the operator
32
+
33
+ `lexpr` and `rexpr` can be any value, most often String, Symbol, Integer or
34
+ Expression. For unary operators, `lexpr` is `nil`.
35
+
36
+ `op` is a Symbol representing the operation.
37
+ It should be from the list:
38
+ * arithmetic: `+ - / * >> << & | ^`
39
+ * boolean: `|| && == != > >= < <=`
40
+ * unary: `+ - ~ !`
41
+
42
+
43
+ Instantiation
44
+ -------------
45
+
46
+ In ruby code, use the class method `[]`. It takes 1 to 3 arguments, `lexpr`,
47
+ `op`, and `rexpr`. `lexpr` defaults to `nil`, and `op` defaults to `:+` (except
48
+ for negative numeric values, which is stored with `op` == `:-` and `rexpr` ==
49
+ abs).
50
+
51
+ If `lexpr` or `rexpr` are an `Array`, the `[]` constructor is called
52
+ recursively, to ease the definition of nested Expressions.
53
+
54
+ Exemples:
55
+
56
+ Expression[42]
57
+ Expression[:eax, :+, 12]
58
+ Expression[:-, 'my_var']
59
+ Expression[[:eax, :-, 4], :*, [:ebx, :+, 0x12]]
60
+
61
+ The Expression class also includes a parser, to allow creating an expression
62
+ from a string. `parse_string!` will create an Expression and update its
63
+ argument to point after the last part read successfully into the expr.
64
+ The parser handles standard C operator precedence.
65
+
66
+ str = "1 + var"
67
+ Expression.parse_string!(str) # => Expression[1, :+, "var"]
68
+ str = "42 bla"
69
+ Expression.parse_string!(str) # => Expression[42]
70
+ str # => "bla"
71
+
72
+ Use `parse_string` without the ! to parse the string without updating it.
73
+
74
+ External variables
75
+ ------------------
76
+
77
+ The `externals` method will return all non-integer members of the Expression.
78
+
79
+ Expression[[:eax, :+, 42], :-, "bla"].externals # => [:eax, "bla"]
80
+
81
+
82
+ Pattern matching
83
+ ----------------
84
+
85
+ The `match` method allows to check an Expression against a pattern without
86
+ having to check individual members. The pattern should be an Expression,
87
+ whose variable members should be Strings or Symbols, which are also passed as
88
+ arguments to the match function. On successful match, the correspondance
89
+ between variable patterns and their actual value matched is returned as a Hash.
90
+
91
+ Expression[1, :+, 2].match(Expression['var', :+, 2], 'var')
92
+ # => { 'var' => 1 }
93
+ Expression[1, :+, 2].match(Expression['var', :+, 'var'], 'var')
94
+ # => nil
95
+ Expression[1, :+, 1].match(Expression['var', :op, 'var'], 'var', :op)
96
+ # => { 'var' => 1, :op => :+ }
97
+
98
+
99
+ Reduction
100
+ ---------
101
+
102
+ Metasm Expressions include a basic symbolic computation engine, that allows
103
+ some simple transformations of the Expression. The reduction will also
104
+ compute numerical values whenever possible. If the final result is fully
105
+ numeric, an Integer is returned, otherwise a new Expression is returned.
106
+
107
+ In this context, the special value `:unknown` has a particular meaning.
108
+
109
+ Expression[1, :+, 2].reduce
110
+ # => 3
111
+ Expression[:eax, :+, [:ebx, :-, :eax]].reduce
112
+ # => Expression[:ebx]
113
+ Expression[1, :+, [:eax, :+, 2]].reduce
114
+ # => Expression[:eax, :+, 3]
115
+ Expression[:unknown, :+, :eax].reduce
116
+ # => Expression[:unknown]
117
+
118
+ The symbolic engine operates mostly on addition/substractions, and
119
+ no-operations (eg shift by 0). It also handles some boolean composition.
120
+
121
+ The detail can be found in the #replace_rec method body, in `metasm/main.rb`.
122
+
123
+ The reduce method can also take a block argument, which will be called at
124
+ every step in the recursive reduction, for custom operations. If the block
125
+ returns nil, the result is unchanged, otherwise the new value is used as
126
+ replacement. For exemple, if you operate on 32-bit values and want to get rid
127
+ of `bla & 0xffffffff`, use
128
+
129
+ some_expr.reduce { |e|
130
+ if e.kind_of?(Expression) and e.op == :& and e.rexpr == 0xffff_ffff
131
+ e.lexpr
132
+ end
133
+ }
134
+
135
+
136
+ Binding
137
+ -------
138
+
139
+ An expression involving variable externals can be bound using a Hash. This will
140
+ replace any occurence of a key of the Hash by its value in the expression
141
+ members. The `bind` method will return a new Expression with the substitutions,
142
+ and the `bind!` method will update the Expression in-place.
143
+
144
+ Expression['val', :+, 'stuff'].bind('val' => 4, 'stuff' => 8).reduce
145
+ # => 12
146
+ Expression[:eax, :+, :ebx].bind(:ebx => 42)
147
+ # Expression[:eax, :+, 42]
148
+ Expression[:eax, :+, :ebx].bind(:ebx => :ecx)
149
+ # Expression[:eax, :+, :ecx]
150
+
151
+ You can use Expressions as keys, but they will only be used on perfect matches.
152
+
153
+
154
+ Binary packing
155
+ --------------
156
+
157
+ Encoding
158
+ ########
159
+
160
+ The `encode` method will generate an EncodedData holding the expression, either
161
+ as binary if it can reduce to an integral value, or as a relocation.
162
+ The arguments are the relocation type and the endianness, plus an optional
163
+ backtrace (to notify the user where an overflowing relocation comes from).
164
+
165
+ The `encode_imm` class method will generate a raw String for a given
166
+ integral value, a type and an endianness.
167
+ The type can be given as a byte size.
168
+
169
+ Expression.encode_imm(42, :u8, :little) # => "*"
170
+ Expression.encode_imm(42, 1, :big) # => "*"
171
+ Expression.encode_imm(256, :u8, :little) # raise EncodeError
172
+
173
+ On overflows (value cannot be encoded in the bit field) an EncodeError
174
+ exception is raised.
175
+
176
+ Decoding
177
+ ########
178
+
179
+ The `decode_imm` class method can be used to read a binary value into an
180
+ Integer, with an optional offset into the binary string.
181
+
182
+ Expression.decode_imm("*", :u8, :little) # => 42
183
+ Expression.decode_imm("bla\xfe\xff", :i16, :little, 3) # => -2
184
+
185
+
186
+ Arithmetic coercion
187
+ -------------------
188
+
189
+ Expression implement the `:+` and `:-` ruby methods, so that `expr + 4`
190
+ works as expected. The result is reduced.
191
+
192
+
193
+ Integer methods
194
+ ---------------
195
+
196
+ The Expression class offers a few methods to work with integers.
197
+
198
+ make_signed
199
+ ###########
200
+
201
+ `make_signed` will convert a raw unsigned to its equivalent signed value,
202
+ given a bit size.
203
+
204
+ Expression.make_signed(1, 16) # => 1
205
+ Expression.make_signed(0xffff, 16) # => -1
206
+
207
+
208
+ in_range?
209
+ #########
210
+
211
+ `in_range?` can check if a given numeric value would fit in a particular
212
+ <core/Relocation.txt> field. The method can return true or false if it
213
+ fits or not, or `nil` if the result is unknown (eg the expr has no numeric
214
+ value).
215
+
216
+ Expression.in_range?(42, :i8) # => true
217
+ Expression.in_range?(128, :i8) # => false
218
+ Expression.in_range?(-128, :i8) # => true
219
+ Expression.in_range?(Expression['bla'], :u32) # => nil
220
+
@@ -0,0 +1,27 @@
1
+ GNUExports
2
+ ==========
3
+
4
+ This class is defined in `metasm/os/gnu_exports.rb`
5
+
6
+ It defines an `EXPORT` constant, a Hash, whose keys
7
+ are the standard linux API symbol names, and values
8
+ are the library name where you can find this symbol.
9
+
10
+ The equivallent for windows is <core/WindowsExports.txt>
11
+
12
+ Usage
13
+ -----
14
+
15
+ The main usage of this class is the automatic generation
16
+ of the <core/ELF.txt> dynamic tag `DT_NEEDED` from the
17
+ external symbols referenced by a binary during compilation.
18
+
19
+ This is done in the `automagic_symbols` method.
20
+
21
+ Symbols
22
+ -------
23
+
24
+ The current version holds the symbols of the debian
25
+ glibc, from `libc.so.6` and `libdl.so.2`.
26
+
27
+ Ruby symbols are also defined, from `libruby1.8.so.1.8`.
data/doc/core/Ia32.txt ADDED
@@ -0,0 +1,236 @@
1
+ Ia32
2
+ ====
3
+
4
+ The Ia32 architecture, aka *Intel_x86*, is the most advanced among the
5
+ architectures implemented in the framework. It is a subclass of the
6
+ generic <core/CPU.txt>.
7
+
8
+ It can handle binary code for the 16 and 32bits modes of the processor.
9
+
10
+ It is a superclass for the <core/X86_64.txt> object, a distinct processor
11
+ that handles 64-bit *long_mode* (aka *x64*, *amd64*, *em64t*)
12
+
13
+ The CPU `shortname` is `ia32` (`ia32_16` in 16-bit mode, and a `_be` suffix
14
+ if bigendian)
15
+
16
+ Opcodes
17
+ -------
18
+
19
+ The opcodes list can be customized to match that available on a specific
20
+ version of the processor. The possibilities are:
21
+
22
+ * 386_common
23
+ * 386
24
+ * 387
25
+ * 486
26
+ * pentium
27
+ * p6
28
+ * 3dnow
29
+ * sse
30
+ * sse2
31
+ * sse3
32
+ * vmx
33
+ * sse42
34
+
35
+ Most opcodes are available in the framework, with the notable exception of:
36
+
37
+ * most sse2 simd instructions
38
+ * the AVX instructions
39
+ * amd-specific instructions
40
+
41
+ The `386_common` family is the subset of 386 instruction that are most
42
+ commonly found in standard usermode programs (no `in`/`out`/bcd
43
+ arithmetic/far call/etc).
44
+ This can be useful when manipulating stuff that in not known to be i386
45
+ binary code.
46
+
47
+
48
+ Initialization
49
+ --------------
50
+
51
+ An Ia32 <core/CPU.txt> object can be created using the following code:
52
+
53
+ Metasm::Ia32.new
54
+
55
+ The `X86` alias may be used in place of `Ia32`.
56
+
57
+ The constructor accepts optional arguments to specify the CPU size, the
58
+ opcode family, and the endianness of the processor. The arguments can
59
+ be given in any order. For exemple,
60
+
61
+ Metasm::Ia32.new(16, 'pentium', :big)
62
+
63
+ will create a 16-bit mode cpu, with opcodes up to the 'pentium' CPU family,
64
+ in big-endian mode.
65
+
66
+ The Ia32 initializer has the convenience feature that it will create an
67
+ X86_64 instance when given the 64 bit size (e.g. `Ia32.new(64)` returns an
68
+ X86_64 instance)
69
+
70
+
71
+ Assembler
72
+ ---------
73
+
74
+ The parser handles only Intel-style asm syntax, *e.g.*
75
+
76
+ some_label:
77
+ mov eax, 10h
78
+ mov ecx, fs:[eax+16]
79
+ push dword ptr fs:[1Ch]
80
+ call ecx
81
+ test al, al
82
+ jnz some_label
83
+ ret
84
+ fmulp ST(4)
85
+
86
+
87
+ Instruction arguments
88
+ #####################
89
+
90
+ The parser recognizes standard registers, such as
91
+
92
+ * `eax`
93
+ * `ah`
94
+ * `mm4` (mmx 64bit register)
95
+ * `xmm2` (xmm 128bit register)
96
+ * `ST` (current top of the FPU stack)
97
+ * `ST(3)` (FPU reg nr.3)
98
+ * `cs` (segment register)
99
+ * `dr3` (debug register)
100
+ * `cr2` (control register)
101
+
102
+ It also supports inexistant registers, such as
103
+
104
+ * `cr7`
105
+ * `dr4`
106
+ * `segr6` (segment register nr.6)
107
+
108
+ The indirections are called `ModRM`. They take the form:
109
+
110
+ * `[eax]` (memory pointed by `eax`)
111
+ * `byte ptr [eax]` (1-byte memory pointed by `eax`)
112
+ * `byte [eax]` (same as previous)
113
+ * `fs:[eax]` (offset `eax` from the base of the `fs` segment)
114
+ * `[fs:eax]` (same as previous)
115
+
116
+ The pointer itself can be:
117
+
118
+ * `[eax]` (any register)
119
+ * `[eax+12]` (base + numeric offset)
120
+ * `[eax+ebx]` (base + register index)
121
+ * `[eax + 4*ebx]` (base + 1,2,4 or 8 * index)
122
+ * `[eax + 2*ebx + 42]` (both)
123
+
124
+ Note that the form base + s*index cannot use `esp` as index with s != 1.
125
+
126
+ For indirection sizes, the size is taken from the size of other arguments
127
+ if it is not specified (eg `mov eax, [42]` will be 4 bytes, and `mov al, [42]`
128
+ will be 1). The explicit size specifier can be:
129
+
130
+ * `byte` (8bits)
131
+ * `word` (16)
132
+ * `dword` (32)
133
+ * `qword` (64)
134
+ * `oword` (128)
135
+ * `_12bits` (12, arbitrary numbers can be used)
136
+
137
+
138
+ Parser commands
139
+ ###############
140
+
141
+ The following commands are recognized in an asm source:
142
+
143
+ * `.mode`
144
+ * `.bits`
145
+
146
+ They are synonymous, and serve to change the mode of the processor to either
147
+ 16 or 32bits.
148
+
149
+ They should be the first instruction in the source, changing the mode during
150
+ parsing is not supported. This would change only the mode for the next
151
+ instructions to be parsed, and for all instructions (incl. those already parsed
152
+ at this point) when encoding, which is likely **not** what you want. See the
153
+ `codeXX` prefixes.
154
+
155
+ Note that changing the CPU size once it was created may have bad side-effects.
156
+ For exemple, some preprocessor macros may already have been generated according
157
+ to the original size of the CPU and will be incorrect from this point on.
158
+
159
+
160
+ Prefixes
161
+ ########
162
+
163
+ The following prefixes are handled:
164
+
165
+ * `lock`
166
+ * `rep`, `repz`, `repnz`, `repe`, `repne`
167
+ * `code16`, `code32`
168
+ * `hintjmp`, `hintnojmp` (aliases: `ht`, `hnt`)
169
+ * `seg_cs` ... `seg_gs`
170
+
171
+ The `repXX` prefixes are for string operations (`movsd` etc), but will be set
172
+ for any opcode. Only the last of the family will be encoded.
173
+
174
+ The `code16` will generate instructions to be run on a CPU in 16bit mode,
175
+ independantly of the global CPU mode. For exemple,
176
+
177
+ code16 mov ax, 42h
178
+
179
+ will generate `"\xb8\x42\x00"` (no opsz override prefix), and will decode or
180
+ run incorrectly on an 32bit CPU.
181
+
182
+ The `hintjmp` prefix is useful for conditional jumps to give a hint to the
183
+ CPU branch predictor as to whether the branch is take or not.
184
+
185
+ The `seg_cs` prefix family is used to declare arbitrary segment override.
186
+ These should be used only in instructions with no ModRM argument.
187
+
188
+
189
+ Suffixes
190
+ ########
191
+
192
+ The parser implements a specific feature to allow the differenciation of
193
+ otherwise ambiguous opcodes, in the form of instruction suffixes.
194
+
195
+ By default, the assembler will generate the shortest encoding for a given
196
+ instruction. To force encoding of another form you can add a specific
197
+ suffix to the instruction. In general, metasm will use e.g. register sizes
198
+ when possible to avoid this kind of situations, but with immediate-only
199
+ displacement this is necessary.
200
+
201
+ or.a16 [1234h], eax ; use a 16-bit address
202
+ or [bx], eax ; use a 16-bit address (implicit from the bx register)
203
+ or eax, 1 ; "\x83\xc8\x01"
204
+ or.i8 eax, 1 ; "\x83\xc8\x01" (same, shortest encoding)
205
+ or.i eax, 1 ; "\x81\xc8\x01\x00\x00\x00" (constant stored in a 32bit field)
206
+ movsd.a16 ; use a 16-byte address-size override prefix (copy dword [si] to [di])
207
+ push.i16 42h ; push a 16-bit integer
208
+
209
+ The suffixes are available as follow:
210
+
211
+ * if the opcode takes an integer argument that can be encoded as either a 8bits or <cpu size>bits, the `.i` and `.i8` variants are created
212
+ * if the opcode takes a memory indirection as argument, or is a string operation (`movsd`, `scasb`, etc) the `.a16` and `.a32` variants are created
213
+ * if the opcode takes a single integer argument, a far pointer, or is a return instruction, the `.i16` and `.i32` variants are created
214
+
215
+
216
+ C parser
217
+ --------
218
+
219
+ The Ia32 C parser will initialize the type sizes with the `ilp32` memory
220
+ model, which is:
221
+
222
+ * short = 16bits
223
+ * int = 32bits
224
+ * long = 32bits
225
+ * long long = 64bits
226
+ * pointer = 32bits
227
+
228
+ In 16bit mode, the model is `ilp16`, which may not be correct (the 16bits
229
+ compiler has not been tested anyway).
230
+
231
+ The following macros are defined (in the asm preprocessor too)
232
+
233
+ * `_M_IX86` = 500
234
+ * `_X86_`
235
+ * `__i386__`
236
+