metasm 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. data/BUGS +11 -0
  2. data/CREDITS +17 -0
  3. data/README +270 -0
  4. data/TODO +114 -0
  5. data/doc/code_organisation.txt +146 -0
  6. data/doc/const_missing.txt +16 -0
  7. data/doc/core_classes.txt +75 -0
  8. data/doc/feature_list.txt +53 -0
  9. data/doc/index.txt +59 -0
  10. data/doc/install_notes.txt +170 -0
  11. data/doc/style.css +3 -0
  12. data/doc/use_cases.txt +18 -0
  13. data/lib/metasm.rb +80 -0
  14. data/lib/metasm/arm.rb +12 -0
  15. data/lib/metasm/arm/debug.rb +39 -0
  16. data/lib/metasm/arm/decode.rb +167 -0
  17. data/lib/metasm/arm/encode.rb +77 -0
  18. data/lib/metasm/arm/main.rb +75 -0
  19. data/lib/metasm/arm/opcodes.rb +177 -0
  20. data/lib/metasm/arm/parse.rb +130 -0
  21. data/lib/metasm/arm/render.rb +55 -0
  22. data/lib/metasm/compile_c.rb +1457 -0
  23. data/lib/metasm/dalvik.rb +8 -0
  24. data/lib/metasm/dalvik/decode.rb +196 -0
  25. data/lib/metasm/dalvik/main.rb +60 -0
  26. data/lib/metasm/dalvik/opcodes.rb +366 -0
  27. data/lib/metasm/decode.rb +213 -0
  28. data/lib/metasm/decompile.rb +2659 -0
  29. data/lib/metasm/disassemble.rb +2068 -0
  30. data/lib/metasm/disassemble_api.rb +1280 -0
  31. data/lib/metasm/dynldr.rb +1329 -0
  32. data/lib/metasm/encode.rb +333 -0
  33. data/lib/metasm/exe_format/a_out.rb +194 -0
  34. data/lib/metasm/exe_format/autoexe.rb +82 -0
  35. data/lib/metasm/exe_format/bflt.rb +189 -0
  36. data/lib/metasm/exe_format/coff.rb +455 -0
  37. data/lib/metasm/exe_format/coff_decode.rb +901 -0
  38. data/lib/metasm/exe_format/coff_encode.rb +1078 -0
  39. data/lib/metasm/exe_format/dex.rb +457 -0
  40. data/lib/metasm/exe_format/dol.rb +145 -0
  41. data/lib/metasm/exe_format/elf.rb +923 -0
  42. data/lib/metasm/exe_format/elf_decode.rb +979 -0
  43. data/lib/metasm/exe_format/elf_encode.rb +1375 -0
  44. data/lib/metasm/exe_format/macho.rb +827 -0
  45. data/lib/metasm/exe_format/main.rb +228 -0
  46. data/lib/metasm/exe_format/mz.rb +164 -0
  47. data/lib/metasm/exe_format/nds.rb +172 -0
  48. data/lib/metasm/exe_format/pe.rb +437 -0
  49. data/lib/metasm/exe_format/serialstruct.rb +246 -0
  50. data/lib/metasm/exe_format/shellcode.rb +114 -0
  51. data/lib/metasm/exe_format/xcoff.rb +167 -0
  52. data/lib/metasm/gui.rb +23 -0
  53. data/lib/metasm/gui/cstruct.rb +373 -0
  54. data/lib/metasm/gui/dasm_coverage.rb +199 -0
  55. data/lib/metasm/gui/dasm_decomp.rb +369 -0
  56. data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
  57. data/lib/metasm/gui/dasm_graph.rb +1354 -0
  58. data/lib/metasm/gui/dasm_hex.rb +543 -0
  59. data/lib/metasm/gui/dasm_listing.rb +599 -0
  60. data/lib/metasm/gui/dasm_main.rb +906 -0
  61. data/lib/metasm/gui/dasm_opcodes.rb +291 -0
  62. data/lib/metasm/gui/debug.rb +1228 -0
  63. data/lib/metasm/gui/gtk.rb +884 -0
  64. data/lib/metasm/gui/qt.rb +495 -0
  65. data/lib/metasm/gui/win32.rb +3004 -0
  66. data/lib/metasm/gui/x11.rb +621 -0
  67. data/lib/metasm/ia32.rb +14 -0
  68. data/lib/metasm/ia32/compile_c.rb +1523 -0
  69. data/lib/metasm/ia32/debug.rb +193 -0
  70. data/lib/metasm/ia32/decode.rb +1167 -0
  71. data/lib/metasm/ia32/decompile.rb +564 -0
  72. data/lib/metasm/ia32/encode.rb +314 -0
  73. data/lib/metasm/ia32/main.rb +233 -0
  74. data/lib/metasm/ia32/opcodes.rb +872 -0
  75. data/lib/metasm/ia32/parse.rb +327 -0
  76. data/lib/metasm/ia32/render.rb +91 -0
  77. data/lib/metasm/main.rb +1193 -0
  78. data/lib/metasm/mips.rb +11 -0
  79. data/lib/metasm/mips/compile_c.rb +7 -0
  80. data/lib/metasm/mips/decode.rb +253 -0
  81. data/lib/metasm/mips/encode.rb +51 -0
  82. data/lib/metasm/mips/main.rb +72 -0
  83. data/lib/metasm/mips/opcodes.rb +443 -0
  84. data/lib/metasm/mips/parse.rb +51 -0
  85. data/lib/metasm/mips/render.rb +43 -0
  86. data/lib/metasm/os/gnu_exports.rb +270 -0
  87. data/lib/metasm/os/linux.rb +1112 -0
  88. data/lib/metasm/os/main.rb +1686 -0
  89. data/lib/metasm/os/remote.rb +527 -0
  90. data/lib/metasm/os/windows.rb +2027 -0
  91. data/lib/metasm/os/windows_exports.rb +745 -0
  92. data/lib/metasm/parse.rb +876 -0
  93. data/lib/metasm/parse_c.rb +3938 -0
  94. data/lib/metasm/pic16c/decode.rb +42 -0
  95. data/lib/metasm/pic16c/main.rb +17 -0
  96. data/lib/metasm/pic16c/opcodes.rb +68 -0
  97. data/lib/metasm/ppc.rb +11 -0
  98. data/lib/metasm/ppc/decode.rb +264 -0
  99. data/lib/metasm/ppc/decompile.rb +251 -0
  100. data/lib/metasm/ppc/encode.rb +51 -0
  101. data/lib/metasm/ppc/main.rb +129 -0
  102. data/lib/metasm/ppc/opcodes.rb +410 -0
  103. data/lib/metasm/ppc/parse.rb +52 -0
  104. data/lib/metasm/preprocessor.rb +1277 -0
  105. data/lib/metasm/render.rb +130 -0
  106. data/lib/metasm/sh4.rb +8 -0
  107. data/lib/metasm/sh4/decode.rb +336 -0
  108. data/lib/metasm/sh4/main.rb +292 -0
  109. data/lib/metasm/sh4/opcodes.rb +381 -0
  110. data/lib/metasm/x86_64.rb +12 -0
  111. data/lib/metasm/x86_64/compile_c.rb +1025 -0
  112. data/lib/metasm/x86_64/debug.rb +59 -0
  113. data/lib/metasm/x86_64/decode.rb +268 -0
  114. data/lib/metasm/x86_64/encode.rb +264 -0
  115. data/lib/metasm/x86_64/main.rb +135 -0
  116. data/lib/metasm/x86_64/opcodes.rb +118 -0
  117. data/lib/metasm/x86_64/parse.rb +68 -0
  118. data/misc/bottleneck.rb +61 -0
  119. data/misc/cheader-findpppath.rb +58 -0
  120. data/misc/hexdiff.rb +74 -0
  121. data/misc/hexdump.rb +55 -0
  122. data/misc/metasm-all.rb +13 -0
  123. data/misc/objdiff.rb +47 -0
  124. data/misc/objscan.rb +40 -0
  125. data/misc/pdfparse.rb +661 -0
  126. data/misc/ppc_pdf2oplist.rb +192 -0
  127. data/misc/tcp_proxy_hex.rb +84 -0
  128. data/misc/txt2html.rb +440 -0
  129. data/samples/a.out.rb +31 -0
  130. data/samples/asmsyntax.rb +77 -0
  131. data/samples/bindiff.rb +555 -0
  132. data/samples/compilation-steps.rb +49 -0
  133. data/samples/cparser_makestackoffset.rb +55 -0
  134. data/samples/dasm-backtrack.rb +38 -0
  135. data/samples/dasmnavig.rb +318 -0
  136. data/samples/dbg-apihook.rb +228 -0
  137. data/samples/dbghelp.rb +143 -0
  138. data/samples/disassemble-gui.rb +102 -0
  139. data/samples/disassemble.rb +133 -0
  140. data/samples/dump_upx.rb +95 -0
  141. data/samples/dynamic_ruby.rb +1929 -0
  142. data/samples/elf_list_needed.rb +46 -0
  143. data/samples/elf_listexports.rb +33 -0
  144. data/samples/elfencode.rb +25 -0
  145. data/samples/exeencode.rb +128 -0
  146. data/samples/factorize-headers-elfimports.rb +77 -0
  147. data/samples/factorize-headers-peimports.rb +109 -0
  148. data/samples/factorize-headers.rb +43 -0
  149. data/samples/gdbclient.rb +583 -0
  150. data/samples/generate_libsigs.rb +102 -0
  151. data/samples/hotfix_gtk_dbg.rb +59 -0
  152. data/samples/install_win_env.rb +78 -0
  153. data/samples/lindebug.rb +924 -0
  154. data/samples/linux_injectsyscall.rb +95 -0
  155. data/samples/machoencode.rb +31 -0
  156. data/samples/metasm-shell.rb +91 -0
  157. data/samples/pe-hook.rb +69 -0
  158. data/samples/pe-ia32-cpuid.rb +203 -0
  159. data/samples/pe-mips.rb +35 -0
  160. data/samples/pe-shutdown.rb +78 -0
  161. data/samples/pe-testrelocs.rb +51 -0
  162. data/samples/pe-testrsrc.rb +24 -0
  163. data/samples/pe_listexports.rb +31 -0
  164. data/samples/peencode.rb +19 -0
  165. data/samples/peldr.rb +494 -0
  166. data/samples/preprocess-flatten.rb +19 -0
  167. data/samples/r0trace.rb +308 -0
  168. data/samples/rubstop.rb +399 -0
  169. data/samples/scan_pt_gnu_stack.rb +54 -0
  170. data/samples/scanpeexports.rb +62 -0
  171. data/samples/shellcode-c.rb +40 -0
  172. data/samples/shellcode-dynlink.rb +146 -0
  173. data/samples/source.asm +34 -0
  174. data/samples/struct_offset.rb +47 -0
  175. data/samples/testpe.rb +32 -0
  176. data/samples/testraw.rb +45 -0
  177. data/samples/win32genloader.rb +132 -0
  178. data/samples/win32hooker-advanced.rb +169 -0
  179. data/samples/win32hooker.rb +96 -0
  180. data/samples/win32livedasm.rb +33 -0
  181. data/samples/win32remotescan.rb +133 -0
  182. data/samples/wintrace.rb +92 -0
  183. data/tests/all.rb +8 -0
  184. data/tests/dasm.rb +39 -0
  185. data/tests/dynldr.rb +35 -0
  186. data/tests/encodeddata.rb +132 -0
  187. data/tests/ia32.rb +82 -0
  188. data/tests/mips.rb +116 -0
  189. data/tests/parse_c.rb +239 -0
  190. data/tests/preprocessor.rb +269 -0
  191. data/tests/x86_64.rb +62 -0
  192. metadata +255 -0
@@ -0,0 +1,42 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/pic16c/opcodes'
8
+ require 'metasm/decode'
9
+
10
+ module Metasm
11
+ class Pic16c
12
+ def build_opcode_bin_mask(op)
13
+ # bit = 0 if can be mutated by an field value, 1 if fixed by opcode
14
+ op.bin_mask = Array.new(op.bin.length, 0)
15
+ op.fields.each { |f, (oct, off)|
16
+ op.bin_mask[oct] |= (@fields_mask[f] << off)
17
+ }
18
+ op.bin_mask.map! { |v| 255 ^ v }
19
+ end
20
+
21
+ def build_bin_lookaside
22
+ # sets up a hash byte value => list of opcodes that may match
23
+ # opcode.bin_mask is built here
24
+ lookaside = Array.new(256) { [] }
25
+ @opcode_list.each { |op|
26
+
27
+ build_opcode_bin_mask op
28
+
29
+ b = op.bin[0]
30
+ msk = op.bin_mask[0]
31
+
32
+
33
+ for i in b..(b | (255^msk))
34
+ ext if i & msk != b & msk
35
+
36
+ lookaside[i] << op
37
+ end
38
+ }
39
+ lookaside
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,17 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/main'
8
+
9
+ module Metasm
10
+ class Pic16c < CPU
11
+ def initialize(endianness = :big)
12
+ super()
13
+ @endianness = endianness
14
+ init
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,68 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/pic16c/main'
8
+
9
+ module Metasm
10
+ class Pic16c
11
+ def addop(name, bin, *l)
12
+ o = Opcode.new name, bin
13
+ l.each { |ll|
14
+ if @props_allowed[ll]
15
+ o.props[ll] = true
16
+ else
17
+ o.args << ll
18
+ o.fields[ll] = @fields_off[ll]
19
+ end
20
+ }
21
+ @opcode_list << o
22
+ end
23
+
24
+ def init
25
+ @fields_mask = {:f => 0x7f, :b => 0x7, :k => 0xff, :klong => 0x3ff, :d => 1 }
26
+ @props_allowed = {:setip => true, :saveip => true, :stopexec => true }
27
+ @fields_off = { :f => 0, :b => 7, :k => 0, :klong => 0, :d => 7, :d => 7 }
28
+
29
+ addop 'addwf', 0b00_0111_0000_0000, :f, :d
30
+ addop 'andwf', 0b00_0101_0000_0000, :f, :d
31
+ addop 'clrf', 0b00_0001_1000_0000, :f
32
+ addop 'clrw', 0b00_0001_0000_0000 # 00_0001_0xxx_xxxx
33
+ addop 'comf', 0b00_1001_0000_0000, :f, :d
34
+ addop 'decf', 0b00_0011_0000_0000, :f, :d
35
+ addop 'decfsz',0b00_1011_0000_0000, :f, :d
36
+ addop 'incf', 0b00_1010_0000_0000, :f, :d
37
+ addop 'incfsz',0b00_1111_0000_0000, :f, :d
38
+ addop 'iorwf', 0b00_0100_0000_0000, :f, :d
39
+ addop 'movf', 0b00_1000_0000_0000, :f, :d
40
+ addop 'movwf', 0b00_0000_1000_0000, :f
41
+ addop 'nop', 0b00_0000_0000_0000 # 00_0000_0xx0_0000
42
+ addop 'rlf', 0b00_1101_0000_0000, :f, :d
43
+ addop 'rrf', 0b00_1100_0000_0000, :f, :d
44
+ addop 'subwf', 0b00_0010_0000_0000, :f, :d
45
+ addop 'swapf', 0b00_1110_0000_0000, :f, :d
46
+ addop 'xorwf', 0b00_0110_0000_0000, :f, :d
47
+
48
+ addop 'bcf', 0b01_0000_0000_0000, :f, :b
49
+ addop 'bsf', 0b01_0100_0000_0000, :f, :b
50
+ addop 'btfsc', 0b01_1000_0000_0000, :f, :b, :setip
51
+ addop 'btfss', 0b01_1100_0000_0000, :f, :b, :setip
52
+
53
+ addop 'addlw', 0b11_1110_0000_0000, :k # 00_000x_0000_0000
54
+ addop 'andlw', 0b11_1001_0000_0000, :k
55
+ addop 'call', 0b10_0000_0000_0000, :klong, :setip, :stopexec, :saveip
56
+ addop 'clrwdt',0b00_0000_0110_0100
57
+ addop 'goto', 0b10_1000_0000_0000, :klong, :setip, :stopexec
58
+ addop 'iorlw', 0b11_1000_0000_0000, :k
59
+ addop 'movlw', 0b11_0000_0000_0000, :k # 00_00xx_0000_0000
60
+ addop 'retfie',0b00_0000_0000_1001, :setip, :stopexec
61
+ addop 'retlw', 0b11_0100_0000_0000, :k, :setip, :stopexec # 00_00xx_0000_0000
62
+ addop 'return',0b00_0000_0000_1000, :setip, :stopexec
63
+ addop 'sleep', 0b00_0000_0110_0011
64
+ addop 'sublw', 0b11_1100_0000_0000, :k # 00_000x_0000_0000
65
+ addop 'xorlw', 0b11_1010_0000_0000, :k
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,11 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/main'
8
+ require 'metasm/ppc/parse'
9
+ require 'metasm/ppc/encode'
10
+ require 'metasm/ppc/decode'
11
+ require 'metasm/ppc/decompile'
@@ -0,0 +1,264 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/ppc/opcodes'
8
+ require 'metasm/decode'
9
+
10
+ module Metasm
11
+ class PowerPC
12
+ def build_opcode_bin_mask(op)
13
+ # bit = 0 if can be mutated by an field value, 1 if fixed by opcode
14
+ return if not op.bin.kind_of? Integer
15
+ op.bin_mask = 0
16
+ op.args.each { |f|
17
+ op.bin_mask |= @fields_mask[f] << @fields_shift[f]
18
+ }
19
+ op.bin_mask = 0xffff_ffff ^ op.bin_mask
20
+ end
21
+
22
+ def build_bin_lookaside
23
+ lookaside = Array.new(256) { [] }
24
+ opcode_list.each { |op|
25
+ next if not op.bin.kind_of? Integer
26
+ build_opcode_bin_mask op
27
+
28
+ b = op.bin >> 24
29
+ msk = op.bin_mask >> 24
30
+
31
+ for i in b..(b | (255^msk))
32
+ next if i & msk != b & msk
33
+ lookaside[i] << op
34
+ end
35
+ }
36
+ lookaside
37
+ end
38
+
39
+ def decode_findopcode(edata)
40
+ return if edata.ptr >= edata.data.length
41
+ di = DecodedInstruction.new(self)
42
+ val = edata.decode_imm(:u32, @endianness)
43
+ edata.ptr -= 4
44
+ di if di.opcode = @bin_lookaside[val >> 24].find { |op|
45
+ (op.bin & op.bin_mask) == (val & op.bin_mask)
46
+ }
47
+ end
48
+
49
+ def decode_instr_op(edata, di)
50
+ before_ptr = edata.ptr
51
+ op = di.opcode
52
+ di.instruction.opname = op.name
53
+ val = edata.decode_imm(:u32, @endianness)
54
+
55
+ field_val = lambda { |f|
56
+ r = (val >> @fields_shift[f]) & @fields_mask[f]
57
+ case f
58
+ when :bd, :d, :ds, :dq, :si, :ui; r = Expression.make_signed(r<<@fields_shift[f], 16)
59
+ when :li; r = Expression.make_signed(r<<@fields_shift[f], 26)
60
+ else r
61
+ end
62
+ }
63
+
64
+ op.args.each { |a|
65
+ di.instruction.args << case a
66
+ when :ra, :rb, :rs, :rt; GPR.new field_val[a]
67
+ when :fra, :frb, :frc, :frs, :frt; FPR.new field_val[a]
68
+ when :ra_i16, :ra_i16s, :ra_i16q
69
+ i = field_val[{:ra_i16 => :d, :ra_i16s => :ds, :ra_i16q => :dq}[a]]
70
+ Memref.new GPR.new(field_val[:ra]), Expression[i]
71
+ when :bd, :d, :ds, :dq, :si, :ui, :li, :sh, :ma, :mb, :me, :ma_, :mb_, :me_; Expression[field_val[a]]
72
+ when :ign_bo_zzz, :ign_bo_z, :ign_bo_at, :ign_bo_at2, :ign_bi, :aa, :lk, :oe, :rc, :l; next
73
+ else raise SyntaxError, "Internal error: invalid argument #{a} in #{op.name}"
74
+ end
75
+ }
76
+ di.bin_length += edata.ptr - before_ptr
77
+
78
+ decode_aliases(di.instruction)
79
+
80
+ di
81
+ end
82
+
83
+ def decode_aliases(i)
84
+ case i.opname
85
+ when /^n?or\.?$/
86
+ if i.args[1] == i.args[2]
87
+ i.args.pop
88
+ i.opname = {'or' => 'mr', 'or.' => 'mr.', 'nor' => 'not', 'nor.' => 'not.'}[i.opname]
89
+ end
90
+ when /^addi/
91
+ if a = i.args[2].reduce and a.kind_of? Integer and a < 0
92
+ i.args[2] = Expression[-a]
93
+ i.opname = i.opname.sub('addi', 'subi')
94
+ end
95
+ end
96
+
97
+ case i.opname
98
+ when /^(add|sub|xor|and|or|div|mul|nand)/
99
+ if i.args.length == 3 and i.args[0] == i.args[1]
100
+ i.args.shift
101
+ end
102
+ end
103
+
104
+ end
105
+
106
+ # converts relative branch offsets to absolute addresses
107
+ # else just add the offset +off+ of the instruction + its length (off may be an Expression)
108
+ # assumes edata.ptr points just after the instruction (as decode_instr_op left it)
109
+ # do not call twice on the same di !
110
+ def decode_instr_interpret(di, addr)
111
+ if di.opcode.props[:setip] and di.instruction.args.last.kind_of? Expression and di.opcode.name[0] != ?t and di.opcode.name[-1] != ?a
112
+ arg = Expression[addr, :+, di.instruction.args.last].reduce
113
+ di.instruction.args[-1] = Expression[arg]
114
+ end
115
+
116
+ di
117
+ end
118
+
119
+ # TODO
120
+ def backtrace_update_function_binding(dasm, faddr, f, retaddrlist, *wantregs)
121
+ retaddrlist.to_a.map! { |retaddr| dasm.decoded[retaddr] ? dasm.decoded[retaddr].block.list.last.address : retaddr }
122
+ b = f.backtrace_binding
123
+
124
+ bt_val = lambda { |r|
125
+ bt = []
126
+ retaddrlist.to_a.each { |retaddr|
127
+ bt |= dasm.backtrace(Expression[r], retaddr,
128
+ :include_start => true, :snapshot_addr => faddr, :origin => retaddr)
129
+ }
130
+ b[r] = ((bt.length == 1) ? bt.first : Expression::Unknown)
131
+ }
132
+ wantregs = GPR::Sym if wantregs.empty?
133
+ wantregs.map { |r| r.to_sym }.each(&bt_val)
134
+
135
+ #puts "update_func_bind: #{Expression[faddr]} has sp -> #{b[:$sp]}" if not Expression[b[:$sp], :-, :$sp].reduce.kind_of?(::Integer) if $VERBOSE
136
+ end
137
+
138
+ def backtrace_is_function_return(expr, di=nil)
139
+ expr.reduce_rec == :lr
140
+ end
141
+
142
+ def backtrace_is_stack_address(expr)
143
+ Expression[expr].expr_externals.include? :sp
144
+ end
145
+
146
+ def replace_instr_arg_immediate(i, old, new)
147
+ i.args.map! { |a|
148
+ case a
149
+ when Expression; a == old ? new : Expression[a.bind(old => new).reduce]
150
+ when Memref
151
+ a.offset = (a.offset == old ? new : Expression[a.offset.bind(old => new).reduce]) if a.offset.kind_of? Expression
152
+ a
153
+ else a
154
+ end
155
+ }
156
+ end
157
+
158
+ def disassembler_default_func
159
+ df = DecodedFunction.new
160
+ df.backtrace_binding = (0..31).inject({}) { |h, r| r != 1 ? h.update("r#{r}".to_sym => Expression::Unknown) : h }
161
+ df.backtracked_for = [BacktraceTrace.new(Expression[:lr], :default, Expression[:lr], :x)]
162
+ df.btfor_callback = lambda { |dasm, btfor, funcaddr, calladdr|
163
+ if funcaddr != :default
164
+ btfor
165
+ elsif di = dasm.decoded[calladdr] and di.opcode.props[:saveip]
166
+ btfor
167
+ else []
168
+ end
169
+ }
170
+ df
171
+ end
172
+
173
+ # hash opname => lambda { |di, *sym_args| binding }
174
+ def backtrace_binding
175
+ @backtrace_binding ||= init_backtrace_binding
176
+ end
177
+ def backtrace_binding=(b) @backtrace_binding = b end
178
+
179
+ def init_backtrace_binding
180
+ @backtrace_binding ||= {}
181
+ opcode_list.map { |ol| ol.name }.uniq.each { |op|
182
+ binding = case op
183
+ when 'mr', 'li', 'la'; lambda { |di, a0, a1| { a0 => Expression[a1] } }
184
+ when 'lis'; lambda { |di, a0, a1| { a0 => Expression[a1, :<<, 16] } }
185
+ when 'mtctr'; lambda { |di, a0| { :ctr => Expression[a0] } }
186
+ when 'mfctr'; lambda { |di, a0| { a0 => Expression[:ctr] } }
187
+ when 'mtlr'; lambda { |di, a0| { :lr => Expression[a0] } }
188
+ when 'mflr'; lambda { |di, a0| { a0 => Expression[:lr] } }
189
+ when 'lwzu'; lambda { |di, a0, m|
190
+ ret = { a0 => Expression[m] }
191
+ ptr = m.pointer.externals.grep(Symbol).first
192
+ ret[ptr] = m.pointer if ptr != a0
193
+ ret
194
+ }
195
+ when 'lwz'; lambda { |di, a0, m| { a0 => Expression[m] } }
196
+ when 'stwu'; lambda { |di, a0, m|
197
+ { m => Expression[a0], m.pointer.externals.grep(Symbol).first => m.pointer }
198
+ }
199
+ when 'stw'; lambda { |di, a0, m| { m => Expression[a0] } }
200
+ when 'rlwinm'; lambda { |di, a0, a1, sh, mb, me|
201
+ mb, me = mb.reduce, me.reduce
202
+ cpmsk = (1<<@size) - 1
203
+ a1 = Expression[a1, :&, cpmsk]
204
+ rol = Expression[[a1, :<<, sh], :|, [a1, :>>, [@size, :-, sh]]]
205
+ if mb == me+1
206
+ msk = cpmsk
207
+ elsif mb < me+1
208
+ msk = (((1 << ((me+1)-mb)) - 1) << (@size-(me+1)))
209
+ else
210
+ msk = (((1 << (mb-(me+1))) - 1) << (@size-mb)) ^ cpmsk
211
+ end
212
+ { a0 => Expression[Expression[rol, :&, msk].reduce] }
213
+ }
214
+
215
+ when 'add', 'addi', 'add.', 'addi.'; lambda { |di, *a| { a[0] => Expression[a[-2], :+, a[-1]] } }
216
+ when 'addis', 'addis.'; lambda { |di, *a| { a[0] => Expression[a[-2], :+, [a[-1], :<<, 16]] } }
217
+ when 'sub', 'subi', 'sub.', 'subi.'; lambda { |di, *a| { a[0] => Expression[a[-2], :-, a[-1]] } }
218
+ when 'subis', 'subis.'; lambda { |di, *a| { a[0] => Expression[a[-2], :-, [a[-1], :<<, 16]] } }
219
+ when /^b.*la?$/; lambda { |di, *a| { :lr => Expression[di.next_addr] } }
220
+ when 'nop', /^cmp/, /^b/; lambda { |di, *a| {} }
221
+ end
222
+
223
+ @backtrace_binding[op] ||= binding if binding
224
+ }
225
+ @backtrace_binding
226
+ end
227
+
228
+ def get_backtrace_binding(di)
229
+ a = di.instruction.args.map { |arg|
230
+ case arg
231
+ when Memref; arg.symbolic(di.address)
232
+ when Reg; arg.symbolic
233
+ else arg
234
+ end
235
+ }
236
+
237
+ binding = if binding = backtrace_binding[di.instruction.opname]
238
+ binding[di, *a]
239
+ else
240
+ puts "unknown instruction to emu #{di}" if $VERBOSE
241
+ {}
242
+ end
243
+
244
+ binding
245
+ end
246
+
247
+ def get_xrefs_x(dasm, di)
248
+ return [] if not di.opcode.props[:setip]
249
+
250
+ arg = case di.instruction.opname
251
+ when 'bctr', 'bctrl'; :ctr
252
+ when 'blr', 'blrl'; :lr
253
+ else di.instruction.args.last
254
+ end
255
+
256
+ [Expression[
257
+ case arg
258
+ when Memref; Indirection[[arg.base.to_s.to_sym, :+, arg.offset], @size/8, di.address]
259
+ when Reg; arg.to_s.to_sym
260
+ else arg
261
+ end]]
262
+ end
263
+ end
264
+ end
@@ -0,0 +1,251 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/ppc/main'
8
+
9
+ module Metasm
10
+ class PowerPC
11
+ # temporarily setup dasm.address_binding so that backtracking
12
+ # stack-related offsets resolve in :frameptr (relative to func start)
13
+ def decompile_makestackvars(dasm, funcstart, blocks)
14
+ oldfuncbd = dasm.address_binding[funcstart]
15
+ dasm.address_binding[funcstart] = { :sp => :frameptr } # this would suffice, the rest here is just optimisation
16
+
17
+ blocks.each { |block|
18
+ yield block
19
+ }
20
+
21
+ dasm.address_binding[funcstart] = oldfuncbd if oldfuncbd
22
+ end
23
+
24
+ # list variable dependency for each block, remove useless writes
25
+ # returns { blockaddr => [list of vars that are needed by a following block] }
26
+ def decompile_func_finddeps(dcmp, blocks, func)
27
+ deps_r = {} ; deps_w = {} ; deps_to = {}
28
+ deps_subfunc = {} # things read/written by subfuncs
29
+
30
+ # find read/writes by each block
31
+ blocks.each { |b, to|
32
+ deps_r[b] = [] ; deps_w[b] = [] ; deps_to[b] = to
33
+ deps_subfunc[b] = []
34
+
35
+ blk = dcmp.dasm.decoded[b].block
36
+ blk.list.each { |di|
37
+ a = di.backtrace_binding.values
38
+ w = []
39
+ di.backtrace_binding.keys.each { |k|
40
+ case k
41
+ when ::Symbol; w |= [k]
42
+ else a |= Expression[k].externals # if dword [eax] <- 42, eax is read
43
+ end
44
+ }
45
+ #a << :eax if di.opcode.name == 'ret' # standard ABI
46
+
47
+ deps_r[b] |= a.map { |ee| Expression[ee].externals.grep(::Symbol) }.flatten - [:unknown] - deps_w[b]
48
+ deps_w[b] |= w.map { |ee| Expression[ee].externals.grep(::Symbol) }.flatten - [:unknown]
49
+ }
50
+ stackoff = nil
51
+ blk.each_to_normal { |t|
52
+ t = dcmp.backtrace_target(t, blk.list.last.address)
53
+ next if not t = dcmp.c_parser.toplevel.symbol[t]
54
+ t.type = C::Function.new(C::BaseType.new(:int)) if not t.type.kind_of? C::Function # XXX this may seem a bit extreme, and yes, it is.
55
+ stackoff ||= Expression[dcmp.dasm.backtrace(:sp, blk.list.last.address, :snapshot_addr => blocks.first[0]).first, :-, :sp].reduce
56
+ }
57
+ if stackoff # last block instr == subfunction call
58
+ deps_r[b] |= deps_subfunc[b] - deps_w[b]
59
+ #deps_w[b] |= [:eax, :ecx, :edx] # standard ABI
60
+ end
61
+ }
62
+
63
+
64
+
65
+ # find regs read and never written (must have been set by caller and are part of the func ABI)
66
+ uninitialized = lambda { |b, r, done|
67
+ from = deps_to.keys.find_all { |f| deps_to[f].include? b } - done
68
+ from.empty? or from.find { |f|
69
+ !deps_w[f].include?(r) and uninitialized[f, r, done + [b]]
70
+ }
71
+ }
72
+
73
+ # remove writes from a block if no following block read the value
74
+ dw = {}
75
+ deps_w.each { |b, deps|
76
+ dw[b] = deps.reject { |dep|
77
+ ret = true
78
+ done = []
79
+ todo = deps_to[b].dup
80
+ while a = todo.pop
81
+ next if done.include? a
82
+ done << a
83
+ if not deps_r[a] or deps_r[a].include? dep
84
+ ret = false
85
+ break
86
+ elsif not deps_w[a].include? dep
87
+ todo.concat deps_to[a]
88
+ end
89
+ end
90
+ ret
91
+ }
92
+ }
93
+
94
+ dw
95
+ end
96
+
97
+ def decompile_blocks(dcmp, myblocks, deps, func, nextaddr = nil)
98
+ scope = func.initializer
99
+ func.type.args.each { |a| scope.symbol[a.name] = a }
100
+ stmts = scope.statements
101
+ func_entry = myblocks.first[0]
102
+ until myblocks.empty?
103
+ b, to = myblocks.shift
104
+ if l = dcmp.dasm.get_label_at(b)
105
+ stmts << C::Label.new(l)
106
+ end
107
+
108
+ # list of assignments [[dest reg, expr assigned]]
109
+ ops = []
110
+ # reg binding (reg => value, values.externals = regs at block start)
111
+ binding = {}
112
+ # Expr => CExpr
113
+ ce = lambda { |*e| dcmp.decompile_cexpr(Expression[Expression[*e].reduce], scope) }
114
+ # Expr => Expr.bind(binding) => CExpr
115
+ ceb = lambda { |*e| ce[Expression[*e].bind(binding)] }
116
+
117
+ # dumps a CExprs that implements an assignment to a reg (uses ops[], patches op => [reg, nil])
118
+ commit = lambda {
119
+ deps[b].map { |k|
120
+ [k, ops.rindex(ops.reverse.find { |r, v| r == k })]
121
+ }.sort_by { |k, i| i.to_i }.each { |k, i|
122
+ next if not i or not binding[k]
123
+ e = k
124
+ final = []
125
+ ops[0..i].reverse_each { |r, v|
126
+ final << r if not v
127
+ e = Expression[e].bind(r => v).reduce if not final.include? r
128
+ }
129
+ ops[i][1] = nil
130
+ binding.delete k
131
+ stmts << ce[k, :'=', e] if k != e
132
+ }
133
+ }
134
+
135
+ # go !
136
+ dcmp.dasm.decoded[b].block.list.each_with_index { |di, didx|
137
+ a = di.instruction.args
138
+ if di.opcode.props[:setip] and not di.opcode.props[:stopexec]
139
+ # conditional jump
140
+ commit[]
141
+ n = dcmp.backtrace_target(get_xrefs_x(dcmp.dasm, di).first, di.address)
142
+ #cc = ceb[decode_cc_to_expr(di.opcode.name[1..-1])]
143
+ cc = ceb[:condjmp]
144
+ stmts << C::If.new(C::CExpression[cc], C::Goto.new(n))
145
+ to.delete dcmp.dasm.normalize(n)
146
+ next
147
+ end
148
+
149
+ case di.opcode.name
150
+ when 'blr'
151
+ commit[]
152
+ stmts << C::Return.new(nil)
153
+ when 'bl' # :saveip
154
+ n = dcmp.backtrace_target(get_xrefs_x(dcmp.dasm, di).first, di.address)
155
+ args = []
156
+ if t = dcmp.c_parser.toplevel.symbol[n] and t.type.args
157
+ stackoff = Expression[dcmp.dasm.backtrace(:sp, di.address, :snapshot_addr => func_entry), :-, :sp].bind(:sp => :frameptr).reduce rescue nil
158
+ args_todo = t.type.args.dup
159
+ args = []
160
+ args_todo.each {
161
+ if stackoff.kind_of? Integer
162
+ var = Indirection[[:frameptr, :+, stackoff], @size/8]
163
+ stackoff += @size/8
164
+ else
165
+ var = 0
166
+ end
167
+ args << ceb[var]
168
+ binding.delete var
169
+ }
170
+ end
171
+ commit[]
172
+ #next if not di.block.to_subfuncret
173
+
174
+ if n.kind_of? ::String
175
+ if not f = dcmp.c_parser.toplevel.symbol[n]
176
+ # internal functions are predeclared, so this one is extern
177
+ f = dcmp.c_parser.toplevel.symbol[n] = C::Variable.new
178
+ f.name = n
179
+ f.type = C::Function.new(C::BaseType.new(:int))
180
+ dcmp.c_parser.toplevel.statements << C::Declaration.new(f)
181
+ end
182
+ commit[]
183
+ else
184
+ # indirect funcall
185
+ fptr = ceb[n]
186
+ binding.delete n
187
+ commit[]
188
+ proto = C::Function.new(C::BaseType.new(:int))
189
+ f = C::CExpression[[fptr], proto]
190
+ end
191
+ binding.delete :eax
192
+ e = C::CExpression[f, :funcall, args]
193
+ e = C::CExpression[ce[:eax], :'=', e, f.type.type] if deps[b].include? :eax and f.type.type != C::BaseType.new(:void)
194
+ stmts << e
195
+ when 'b'
196
+ a = di.instruction.args.first
197
+ if a.kind_of? Expression
198
+ else
199
+ # indirect jmp, convert to return (*fptr)();
200
+ n = di.instruction.args.first.symbolic
201
+ fptr = ceb[n]
202
+ binding.delete n
203
+ commit[]
204
+ proto = C::Function.new(C::BaseType.new(:void))
205
+ ret = C::Return.new(C::CExpression[[[fptr], C::Pointer.new(proto)], :funcall, []])
206
+ class << ret ; attr_accessor :from_instr end
207
+ ret.from_instr = di
208
+ stmts << ret
209
+ to = []
210
+ end
211
+ else
212
+ bd = get_fwdemu_binding(di)
213
+ if di.backtrace_binding[:incomplete_binding]
214
+ commit[]
215
+ stmts << C::Asm.new(di.instruction.to_s, nil, nil, nil, nil, nil)
216
+ else
217
+ bd.each { |k, v|
218
+ if k.kind_of? ::Symbol
219
+ ops << [k, v]
220
+ else # memory
221
+ stmts << ceb[k, :'=', v]
222
+ binding.delete k
223
+ end
224
+ }
225
+ update = {}
226
+ bd.each { |k, v|
227
+ next if not k.kind_of? ::Symbol
228
+ update[k] = Expression[Expression[v].bind(binding).reduce]
229
+ }
230
+ binding.update update
231
+ end
232
+ end
233
+ }
234
+ commit[]
235
+
236
+ case to.length
237
+ when 0
238
+ if not myblocks.empty? and not %w[ret jmp].include? dcmp.dasm.decoded[b].block.list.last.instruction.opname
239
+ puts " block #{Expression[b]} has no to and don't end in ret"
240
+ end
241
+ when 1
242
+ if (myblocks.empty? ? nextaddr != to[0] : myblocks.first.first != to[0])
243
+ stmts << C::Goto.new(dcmp.dasm.auto_label_at(to[0], 'unknown_goto'))
244
+ end
245
+ else
246
+ puts " block #{Expression[b]} with multiple to"
247
+ end
248
+ end
249
+ end
250
+ end
251
+ end