metasm 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (192) hide show
  1. data/BUGS +11 -0
  2. data/CREDITS +17 -0
  3. data/README +270 -0
  4. data/TODO +114 -0
  5. data/doc/code_organisation.txt +146 -0
  6. data/doc/const_missing.txt +16 -0
  7. data/doc/core_classes.txt +75 -0
  8. data/doc/feature_list.txt +53 -0
  9. data/doc/index.txt +59 -0
  10. data/doc/install_notes.txt +170 -0
  11. data/doc/style.css +3 -0
  12. data/doc/use_cases.txt +18 -0
  13. data/lib/metasm.rb +80 -0
  14. data/lib/metasm/arm.rb +12 -0
  15. data/lib/metasm/arm/debug.rb +39 -0
  16. data/lib/metasm/arm/decode.rb +167 -0
  17. data/lib/metasm/arm/encode.rb +77 -0
  18. data/lib/metasm/arm/main.rb +75 -0
  19. data/lib/metasm/arm/opcodes.rb +177 -0
  20. data/lib/metasm/arm/parse.rb +130 -0
  21. data/lib/metasm/arm/render.rb +55 -0
  22. data/lib/metasm/compile_c.rb +1457 -0
  23. data/lib/metasm/dalvik.rb +8 -0
  24. data/lib/metasm/dalvik/decode.rb +196 -0
  25. data/lib/metasm/dalvik/main.rb +60 -0
  26. data/lib/metasm/dalvik/opcodes.rb +366 -0
  27. data/lib/metasm/decode.rb +213 -0
  28. data/lib/metasm/decompile.rb +2659 -0
  29. data/lib/metasm/disassemble.rb +2068 -0
  30. data/lib/metasm/disassemble_api.rb +1280 -0
  31. data/lib/metasm/dynldr.rb +1329 -0
  32. data/lib/metasm/encode.rb +333 -0
  33. data/lib/metasm/exe_format/a_out.rb +194 -0
  34. data/lib/metasm/exe_format/autoexe.rb +82 -0
  35. data/lib/metasm/exe_format/bflt.rb +189 -0
  36. data/lib/metasm/exe_format/coff.rb +455 -0
  37. data/lib/metasm/exe_format/coff_decode.rb +901 -0
  38. data/lib/metasm/exe_format/coff_encode.rb +1078 -0
  39. data/lib/metasm/exe_format/dex.rb +457 -0
  40. data/lib/metasm/exe_format/dol.rb +145 -0
  41. data/lib/metasm/exe_format/elf.rb +923 -0
  42. data/lib/metasm/exe_format/elf_decode.rb +979 -0
  43. data/lib/metasm/exe_format/elf_encode.rb +1375 -0
  44. data/lib/metasm/exe_format/macho.rb +827 -0
  45. data/lib/metasm/exe_format/main.rb +228 -0
  46. data/lib/metasm/exe_format/mz.rb +164 -0
  47. data/lib/metasm/exe_format/nds.rb +172 -0
  48. data/lib/metasm/exe_format/pe.rb +437 -0
  49. data/lib/metasm/exe_format/serialstruct.rb +246 -0
  50. data/lib/metasm/exe_format/shellcode.rb +114 -0
  51. data/lib/metasm/exe_format/xcoff.rb +167 -0
  52. data/lib/metasm/gui.rb +23 -0
  53. data/lib/metasm/gui/cstruct.rb +373 -0
  54. data/lib/metasm/gui/dasm_coverage.rb +199 -0
  55. data/lib/metasm/gui/dasm_decomp.rb +369 -0
  56. data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
  57. data/lib/metasm/gui/dasm_graph.rb +1354 -0
  58. data/lib/metasm/gui/dasm_hex.rb +543 -0
  59. data/lib/metasm/gui/dasm_listing.rb +599 -0
  60. data/lib/metasm/gui/dasm_main.rb +906 -0
  61. data/lib/metasm/gui/dasm_opcodes.rb +291 -0
  62. data/lib/metasm/gui/debug.rb +1228 -0
  63. data/lib/metasm/gui/gtk.rb +884 -0
  64. data/lib/metasm/gui/qt.rb +495 -0
  65. data/lib/metasm/gui/win32.rb +3004 -0
  66. data/lib/metasm/gui/x11.rb +621 -0
  67. data/lib/metasm/ia32.rb +14 -0
  68. data/lib/metasm/ia32/compile_c.rb +1523 -0
  69. data/lib/metasm/ia32/debug.rb +193 -0
  70. data/lib/metasm/ia32/decode.rb +1167 -0
  71. data/lib/metasm/ia32/decompile.rb +564 -0
  72. data/lib/metasm/ia32/encode.rb +314 -0
  73. data/lib/metasm/ia32/main.rb +233 -0
  74. data/lib/metasm/ia32/opcodes.rb +872 -0
  75. data/lib/metasm/ia32/parse.rb +327 -0
  76. data/lib/metasm/ia32/render.rb +91 -0
  77. data/lib/metasm/main.rb +1193 -0
  78. data/lib/metasm/mips.rb +11 -0
  79. data/lib/metasm/mips/compile_c.rb +7 -0
  80. data/lib/metasm/mips/decode.rb +253 -0
  81. data/lib/metasm/mips/encode.rb +51 -0
  82. data/lib/metasm/mips/main.rb +72 -0
  83. data/lib/metasm/mips/opcodes.rb +443 -0
  84. data/lib/metasm/mips/parse.rb +51 -0
  85. data/lib/metasm/mips/render.rb +43 -0
  86. data/lib/metasm/os/gnu_exports.rb +270 -0
  87. data/lib/metasm/os/linux.rb +1112 -0
  88. data/lib/metasm/os/main.rb +1686 -0
  89. data/lib/metasm/os/remote.rb +527 -0
  90. data/lib/metasm/os/windows.rb +2027 -0
  91. data/lib/metasm/os/windows_exports.rb +745 -0
  92. data/lib/metasm/parse.rb +876 -0
  93. data/lib/metasm/parse_c.rb +3938 -0
  94. data/lib/metasm/pic16c/decode.rb +42 -0
  95. data/lib/metasm/pic16c/main.rb +17 -0
  96. data/lib/metasm/pic16c/opcodes.rb +68 -0
  97. data/lib/metasm/ppc.rb +11 -0
  98. data/lib/metasm/ppc/decode.rb +264 -0
  99. data/lib/metasm/ppc/decompile.rb +251 -0
  100. data/lib/metasm/ppc/encode.rb +51 -0
  101. data/lib/metasm/ppc/main.rb +129 -0
  102. data/lib/metasm/ppc/opcodes.rb +410 -0
  103. data/lib/metasm/ppc/parse.rb +52 -0
  104. data/lib/metasm/preprocessor.rb +1277 -0
  105. data/lib/metasm/render.rb +130 -0
  106. data/lib/metasm/sh4.rb +8 -0
  107. data/lib/metasm/sh4/decode.rb +336 -0
  108. data/lib/metasm/sh4/main.rb +292 -0
  109. data/lib/metasm/sh4/opcodes.rb +381 -0
  110. data/lib/metasm/x86_64.rb +12 -0
  111. data/lib/metasm/x86_64/compile_c.rb +1025 -0
  112. data/lib/metasm/x86_64/debug.rb +59 -0
  113. data/lib/metasm/x86_64/decode.rb +268 -0
  114. data/lib/metasm/x86_64/encode.rb +264 -0
  115. data/lib/metasm/x86_64/main.rb +135 -0
  116. data/lib/metasm/x86_64/opcodes.rb +118 -0
  117. data/lib/metasm/x86_64/parse.rb +68 -0
  118. data/misc/bottleneck.rb +61 -0
  119. data/misc/cheader-findpppath.rb +58 -0
  120. data/misc/hexdiff.rb +74 -0
  121. data/misc/hexdump.rb +55 -0
  122. data/misc/metasm-all.rb +13 -0
  123. data/misc/objdiff.rb +47 -0
  124. data/misc/objscan.rb +40 -0
  125. data/misc/pdfparse.rb +661 -0
  126. data/misc/ppc_pdf2oplist.rb +192 -0
  127. data/misc/tcp_proxy_hex.rb +84 -0
  128. data/misc/txt2html.rb +440 -0
  129. data/samples/a.out.rb +31 -0
  130. data/samples/asmsyntax.rb +77 -0
  131. data/samples/bindiff.rb +555 -0
  132. data/samples/compilation-steps.rb +49 -0
  133. data/samples/cparser_makestackoffset.rb +55 -0
  134. data/samples/dasm-backtrack.rb +38 -0
  135. data/samples/dasmnavig.rb +318 -0
  136. data/samples/dbg-apihook.rb +228 -0
  137. data/samples/dbghelp.rb +143 -0
  138. data/samples/disassemble-gui.rb +102 -0
  139. data/samples/disassemble.rb +133 -0
  140. data/samples/dump_upx.rb +95 -0
  141. data/samples/dynamic_ruby.rb +1929 -0
  142. data/samples/elf_list_needed.rb +46 -0
  143. data/samples/elf_listexports.rb +33 -0
  144. data/samples/elfencode.rb +25 -0
  145. data/samples/exeencode.rb +128 -0
  146. data/samples/factorize-headers-elfimports.rb +77 -0
  147. data/samples/factorize-headers-peimports.rb +109 -0
  148. data/samples/factorize-headers.rb +43 -0
  149. data/samples/gdbclient.rb +583 -0
  150. data/samples/generate_libsigs.rb +102 -0
  151. data/samples/hotfix_gtk_dbg.rb +59 -0
  152. data/samples/install_win_env.rb +78 -0
  153. data/samples/lindebug.rb +924 -0
  154. data/samples/linux_injectsyscall.rb +95 -0
  155. data/samples/machoencode.rb +31 -0
  156. data/samples/metasm-shell.rb +91 -0
  157. data/samples/pe-hook.rb +69 -0
  158. data/samples/pe-ia32-cpuid.rb +203 -0
  159. data/samples/pe-mips.rb +35 -0
  160. data/samples/pe-shutdown.rb +78 -0
  161. data/samples/pe-testrelocs.rb +51 -0
  162. data/samples/pe-testrsrc.rb +24 -0
  163. data/samples/pe_listexports.rb +31 -0
  164. data/samples/peencode.rb +19 -0
  165. data/samples/peldr.rb +494 -0
  166. data/samples/preprocess-flatten.rb +19 -0
  167. data/samples/r0trace.rb +308 -0
  168. data/samples/rubstop.rb +399 -0
  169. data/samples/scan_pt_gnu_stack.rb +54 -0
  170. data/samples/scanpeexports.rb +62 -0
  171. data/samples/shellcode-c.rb +40 -0
  172. data/samples/shellcode-dynlink.rb +146 -0
  173. data/samples/source.asm +34 -0
  174. data/samples/struct_offset.rb +47 -0
  175. data/samples/testpe.rb +32 -0
  176. data/samples/testraw.rb +45 -0
  177. data/samples/win32genloader.rb +132 -0
  178. data/samples/win32hooker-advanced.rb +169 -0
  179. data/samples/win32hooker.rb +96 -0
  180. data/samples/win32livedasm.rb +33 -0
  181. data/samples/win32remotescan.rb +133 -0
  182. data/samples/wintrace.rb +92 -0
  183. data/tests/all.rb +8 -0
  184. data/tests/dasm.rb +39 -0
  185. data/tests/dynldr.rb +35 -0
  186. data/tests/encodeddata.rb +132 -0
  187. data/tests/ia32.rb +82 -0
  188. data/tests/mips.rb +116 -0
  189. data/tests/parse_c.rb +239 -0
  190. data/tests/preprocessor.rb +269 -0
  191. data/tests/x86_64.rb +62 -0
  192. metadata +255 -0
@@ -0,0 +1,42 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/pic16c/opcodes'
8
+ require 'metasm/decode'
9
+
10
+ module Metasm
11
+ class Pic16c
12
+ def build_opcode_bin_mask(op)
13
+ # bit = 0 if can be mutated by an field value, 1 if fixed by opcode
14
+ op.bin_mask = Array.new(op.bin.length, 0)
15
+ op.fields.each { |f, (oct, off)|
16
+ op.bin_mask[oct] |= (@fields_mask[f] << off)
17
+ }
18
+ op.bin_mask.map! { |v| 255 ^ v }
19
+ end
20
+
21
+ def build_bin_lookaside
22
+ # sets up a hash byte value => list of opcodes that may match
23
+ # opcode.bin_mask is built here
24
+ lookaside = Array.new(256) { [] }
25
+ @opcode_list.each { |op|
26
+
27
+ build_opcode_bin_mask op
28
+
29
+ b = op.bin[0]
30
+ msk = op.bin_mask[0]
31
+
32
+
33
+ for i in b..(b | (255^msk))
34
+ ext if i & msk != b & msk
35
+
36
+ lookaside[i] << op
37
+ end
38
+ }
39
+ lookaside
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,17 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/main'
8
+
9
+ module Metasm
10
+ class Pic16c < CPU
11
+ def initialize(endianness = :big)
12
+ super()
13
+ @endianness = endianness
14
+ init
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,68 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/pic16c/main'
8
+
9
+ module Metasm
10
+ class Pic16c
11
+ def addop(name, bin, *l)
12
+ o = Opcode.new name, bin
13
+ l.each { |ll|
14
+ if @props_allowed[ll]
15
+ o.props[ll] = true
16
+ else
17
+ o.args << ll
18
+ o.fields[ll] = @fields_off[ll]
19
+ end
20
+ }
21
+ @opcode_list << o
22
+ end
23
+
24
+ def init
25
+ @fields_mask = {:f => 0x7f, :b => 0x7, :k => 0xff, :klong => 0x3ff, :d => 1 }
26
+ @props_allowed = {:setip => true, :saveip => true, :stopexec => true }
27
+ @fields_off = { :f => 0, :b => 7, :k => 0, :klong => 0, :d => 7, :d => 7 }
28
+
29
+ addop 'addwf', 0b00_0111_0000_0000, :f, :d
30
+ addop 'andwf', 0b00_0101_0000_0000, :f, :d
31
+ addop 'clrf', 0b00_0001_1000_0000, :f
32
+ addop 'clrw', 0b00_0001_0000_0000 # 00_0001_0xxx_xxxx
33
+ addop 'comf', 0b00_1001_0000_0000, :f, :d
34
+ addop 'decf', 0b00_0011_0000_0000, :f, :d
35
+ addop 'decfsz',0b00_1011_0000_0000, :f, :d
36
+ addop 'incf', 0b00_1010_0000_0000, :f, :d
37
+ addop 'incfsz',0b00_1111_0000_0000, :f, :d
38
+ addop 'iorwf', 0b00_0100_0000_0000, :f, :d
39
+ addop 'movf', 0b00_1000_0000_0000, :f, :d
40
+ addop 'movwf', 0b00_0000_1000_0000, :f
41
+ addop 'nop', 0b00_0000_0000_0000 # 00_0000_0xx0_0000
42
+ addop 'rlf', 0b00_1101_0000_0000, :f, :d
43
+ addop 'rrf', 0b00_1100_0000_0000, :f, :d
44
+ addop 'subwf', 0b00_0010_0000_0000, :f, :d
45
+ addop 'swapf', 0b00_1110_0000_0000, :f, :d
46
+ addop 'xorwf', 0b00_0110_0000_0000, :f, :d
47
+
48
+ addop 'bcf', 0b01_0000_0000_0000, :f, :b
49
+ addop 'bsf', 0b01_0100_0000_0000, :f, :b
50
+ addop 'btfsc', 0b01_1000_0000_0000, :f, :b, :setip
51
+ addop 'btfss', 0b01_1100_0000_0000, :f, :b, :setip
52
+
53
+ addop 'addlw', 0b11_1110_0000_0000, :k # 00_000x_0000_0000
54
+ addop 'andlw', 0b11_1001_0000_0000, :k
55
+ addop 'call', 0b10_0000_0000_0000, :klong, :setip, :stopexec, :saveip
56
+ addop 'clrwdt',0b00_0000_0110_0100
57
+ addop 'goto', 0b10_1000_0000_0000, :klong, :setip, :stopexec
58
+ addop 'iorlw', 0b11_1000_0000_0000, :k
59
+ addop 'movlw', 0b11_0000_0000_0000, :k # 00_00xx_0000_0000
60
+ addop 'retfie',0b00_0000_0000_1001, :setip, :stopexec
61
+ addop 'retlw', 0b11_0100_0000_0000, :k, :setip, :stopexec # 00_00xx_0000_0000
62
+ addop 'return',0b00_0000_0000_1000, :setip, :stopexec
63
+ addop 'sleep', 0b00_0000_0110_0011
64
+ addop 'sublw', 0b11_1100_0000_0000, :k # 00_000x_0000_0000
65
+ addop 'xorlw', 0b11_1010_0000_0000, :k
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,11 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/main'
8
+ require 'metasm/ppc/parse'
9
+ require 'metasm/ppc/encode'
10
+ require 'metasm/ppc/decode'
11
+ require 'metasm/ppc/decompile'
@@ -0,0 +1,264 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/ppc/opcodes'
8
+ require 'metasm/decode'
9
+
10
+ module Metasm
11
+ class PowerPC
12
+ def build_opcode_bin_mask(op)
13
+ # bit = 0 if can be mutated by an field value, 1 if fixed by opcode
14
+ return if not op.bin.kind_of? Integer
15
+ op.bin_mask = 0
16
+ op.args.each { |f|
17
+ op.bin_mask |= @fields_mask[f] << @fields_shift[f]
18
+ }
19
+ op.bin_mask = 0xffff_ffff ^ op.bin_mask
20
+ end
21
+
22
+ def build_bin_lookaside
23
+ lookaside = Array.new(256) { [] }
24
+ opcode_list.each { |op|
25
+ next if not op.bin.kind_of? Integer
26
+ build_opcode_bin_mask op
27
+
28
+ b = op.bin >> 24
29
+ msk = op.bin_mask >> 24
30
+
31
+ for i in b..(b | (255^msk))
32
+ next if i & msk != b & msk
33
+ lookaside[i] << op
34
+ end
35
+ }
36
+ lookaside
37
+ end
38
+
39
+ def decode_findopcode(edata)
40
+ return if edata.ptr >= edata.data.length
41
+ di = DecodedInstruction.new(self)
42
+ val = edata.decode_imm(:u32, @endianness)
43
+ edata.ptr -= 4
44
+ di if di.opcode = @bin_lookaside[val >> 24].find { |op|
45
+ (op.bin & op.bin_mask) == (val & op.bin_mask)
46
+ }
47
+ end
48
+
49
+ def decode_instr_op(edata, di)
50
+ before_ptr = edata.ptr
51
+ op = di.opcode
52
+ di.instruction.opname = op.name
53
+ val = edata.decode_imm(:u32, @endianness)
54
+
55
+ field_val = lambda { |f|
56
+ r = (val >> @fields_shift[f]) & @fields_mask[f]
57
+ case f
58
+ when :bd, :d, :ds, :dq, :si, :ui; r = Expression.make_signed(r<<@fields_shift[f], 16)
59
+ when :li; r = Expression.make_signed(r<<@fields_shift[f], 26)
60
+ else r
61
+ end
62
+ }
63
+
64
+ op.args.each { |a|
65
+ di.instruction.args << case a
66
+ when :ra, :rb, :rs, :rt; GPR.new field_val[a]
67
+ when :fra, :frb, :frc, :frs, :frt; FPR.new field_val[a]
68
+ when :ra_i16, :ra_i16s, :ra_i16q
69
+ i = field_val[{:ra_i16 => :d, :ra_i16s => :ds, :ra_i16q => :dq}[a]]
70
+ Memref.new GPR.new(field_val[:ra]), Expression[i]
71
+ when :bd, :d, :ds, :dq, :si, :ui, :li, :sh, :ma, :mb, :me, :ma_, :mb_, :me_; Expression[field_val[a]]
72
+ when :ign_bo_zzz, :ign_bo_z, :ign_bo_at, :ign_bo_at2, :ign_bi, :aa, :lk, :oe, :rc, :l; next
73
+ else raise SyntaxError, "Internal error: invalid argument #{a} in #{op.name}"
74
+ end
75
+ }
76
+ di.bin_length += edata.ptr - before_ptr
77
+
78
+ decode_aliases(di.instruction)
79
+
80
+ di
81
+ end
82
+
83
+ def decode_aliases(i)
84
+ case i.opname
85
+ when /^n?or\.?$/
86
+ if i.args[1] == i.args[2]
87
+ i.args.pop
88
+ i.opname = {'or' => 'mr', 'or.' => 'mr.', 'nor' => 'not', 'nor.' => 'not.'}[i.opname]
89
+ end
90
+ when /^addi/
91
+ if a = i.args[2].reduce and a.kind_of? Integer and a < 0
92
+ i.args[2] = Expression[-a]
93
+ i.opname = i.opname.sub('addi', 'subi')
94
+ end
95
+ end
96
+
97
+ case i.opname
98
+ when /^(add|sub|xor|and|or|div|mul|nand)/
99
+ if i.args.length == 3 and i.args[0] == i.args[1]
100
+ i.args.shift
101
+ end
102
+ end
103
+
104
+ end
105
+
106
+ # converts relative branch offsets to absolute addresses
107
+ # else just add the offset +off+ of the instruction + its length (off may be an Expression)
108
+ # assumes edata.ptr points just after the instruction (as decode_instr_op left it)
109
+ # do not call twice on the same di !
110
+ def decode_instr_interpret(di, addr)
111
+ if di.opcode.props[:setip] and di.instruction.args.last.kind_of? Expression and di.opcode.name[0] != ?t and di.opcode.name[-1] != ?a
112
+ arg = Expression[addr, :+, di.instruction.args.last].reduce
113
+ di.instruction.args[-1] = Expression[arg]
114
+ end
115
+
116
+ di
117
+ end
118
+
119
+ # TODO
120
+ def backtrace_update_function_binding(dasm, faddr, f, retaddrlist, *wantregs)
121
+ retaddrlist.to_a.map! { |retaddr| dasm.decoded[retaddr] ? dasm.decoded[retaddr].block.list.last.address : retaddr }
122
+ b = f.backtrace_binding
123
+
124
+ bt_val = lambda { |r|
125
+ bt = []
126
+ retaddrlist.to_a.each { |retaddr|
127
+ bt |= dasm.backtrace(Expression[r], retaddr,
128
+ :include_start => true, :snapshot_addr => faddr, :origin => retaddr)
129
+ }
130
+ b[r] = ((bt.length == 1) ? bt.first : Expression::Unknown)
131
+ }
132
+ wantregs = GPR::Sym if wantregs.empty?
133
+ wantregs.map { |r| r.to_sym }.each(&bt_val)
134
+
135
+ #puts "update_func_bind: #{Expression[faddr]} has sp -> #{b[:$sp]}" if not Expression[b[:$sp], :-, :$sp].reduce.kind_of?(::Integer) if $VERBOSE
136
+ end
137
+
138
+ def backtrace_is_function_return(expr, di=nil)
139
+ expr.reduce_rec == :lr
140
+ end
141
+
142
+ def backtrace_is_stack_address(expr)
143
+ Expression[expr].expr_externals.include? :sp
144
+ end
145
+
146
+ def replace_instr_arg_immediate(i, old, new)
147
+ i.args.map! { |a|
148
+ case a
149
+ when Expression; a == old ? new : Expression[a.bind(old => new).reduce]
150
+ when Memref
151
+ a.offset = (a.offset == old ? new : Expression[a.offset.bind(old => new).reduce]) if a.offset.kind_of? Expression
152
+ a
153
+ else a
154
+ end
155
+ }
156
+ end
157
+
158
+ def disassembler_default_func
159
+ df = DecodedFunction.new
160
+ df.backtrace_binding = (0..31).inject({}) { |h, r| r != 1 ? h.update("r#{r}".to_sym => Expression::Unknown) : h }
161
+ df.backtracked_for = [BacktraceTrace.new(Expression[:lr], :default, Expression[:lr], :x)]
162
+ df.btfor_callback = lambda { |dasm, btfor, funcaddr, calladdr|
163
+ if funcaddr != :default
164
+ btfor
165
+ elsif di = dasm.decoded[calladdr] and di.opcode.props[:saveip]
166
+ btfor
167
+ else []
168
+ end
169
+ }
170
+ df
171
+ end
172
+
173
+ # hash opname => lambda { |di, *sym_args| binding }
174
+ def backtrace_binding
175
+ @backtrace_binding ||= init_backtrace_binding
176
+ end
177
+ def backtrace_binding=(b) @backtrace_binding = b end
178
+
179
+ def init_backtrace_binding
180
+ @backtrace_binding ||= {}
181
+ opcode_list.map { |ol| ol.name }.uniq.each { |op|
182
+ binding = case op
183
+ when 'mr', 'li', 'la'; lambda { |di, a0, a1| { a0 => Expression[a1] } }
184
+ when 'lis'; lambda { |di, a0, a1| { a0 => Expression[a1, :<<, 16] } }
185
+ when 'mtctr'; lambda { |di, a0| { :ctr => Expression[a0] } }
186
+ when 'mfctr'; lambda { |di, a0| { a0 => Expression[:ctr] } }
187
+ when 'mtlr'; lambda { |di, a0| { :lr => Expression[a0] } }
188
+ when 'mflr'; lambda { |di, a0| { a0 => Expression[:lr] } }
189
+ when 'lwzu'; lambda { |di, a0, m|
190
+ ret = { a0 => Expression[m] }
191
+ ptr = m.pointer.externals.grep(Symbol).first
192
+ ret[ptr] = m.pointer if ptr != a0
193
+ ret
194
+ }
195
+ when 'lwz'; lambda { |di, a0, m| { a0 => Expression[m] } }
196
+ when 'stwu'; lambda { |di, a0, m|
197
+ { m => Expression[a0], m.pointer.externals.grep(Symbol).first => m.pointer }
198
+ }
199
+ when 'stw'; lambda { |di, a0, m| { m => Expression[a0] } }
200
+ when 'rlwinm'; lambda { |di, a0, a1, sh, mb, me|
201
+ mb, me = mb.reduce, me.reduce
202
+ cpmsk = (1<<@size) - 1
203
+ a1 = Expression[a1, :&, cpmsk]
204
+ rol = Expression[[a1, :<<, sh], :|, [a1, :>>, [@size, :-, sh]]]
205
+ if mb == me+1
206
+ msk = cpmsk
207
+ elsif mb < me+1
208
+ msk = (((1 << ((me+1)-mb)) - 1) << (@size-(me+1)))
209
+ else
210
+ msk = (((1 << (mb-(me+1))) - 1) << (@size-mb)) ^ cpmsk
211
+ end
212
+ { a0 => Expression[Expression[rol, :&, msk].reduce] }
213
+ }
214
+
215
+ when 'add', 'addi', 'add.', 'addi.'; lambda { |di, *a| { a[0] => Expression[a[-2], :+, a[-1]] } }
216
+ when 'addis', 'addis.'; lambda { |di, *a| { a[0] => Expression[a[-2], :+, [a[-1], :<<, 16]] } }
217
+ when 'sub', 'subi', 'sub.', 'subi.'; lambda { |di, *a| { a[0] => Expression[a[-2], :-, a[-1]] } }
218
+ when 'subis', 'subis.'; lambda { |di, *a| { a[0] => Expression[a[-2], :-, [a[-1], :<<, 16]] } }
219
+ when /^b.*la?$/; lambda { |di, *a| { :lr => Expression[di.next_addr] } }
220
+ when 'nop', /^cmp/, /^b/; lambda { |di, *a| {} }
221
+ end
222
+
223
+ @backtrace_binding[op] ||= binding if binding
224
+ }
225
+ @backtrace_binding
226
+ end
227
+
228
+ def get_backtrace_binding(di)
229
+ a = di.instruction.args.map { |arg|
230
+ case arg
231
+ when Memref; arg.symbolic(di.address)
232
+ when Reg; arg.symbolic
233
+ else arg
234
+ end
235
+ }
236
+
237
+ binding = if binding = backtrace_binding[di.instruction.opname]
238
+ binding[di, *a]
239
+ else
240
+ puts "unknown instruction to emu #{di}" if $VERBOSE
241
+ {}
242
+ end
243
+
244
+ binding
245
+ end
246
+
247
+ def get_xrefs_x(dasm, di)
248
+ return [] if not di.opcode.props[:setip]
249
+
250
+ arg = case di.instruction.opname
251
+ when 'bctr', 'bctrl'; :ctr
252
+ when 'blr', 'blrl'; :lr
253
+ else di.instruction.args.last
254
+ end
255
+
256
+ [Expression[
257
+ case arg
258
+ when Memref; Indirection[[arg.base.to_s.to_sym, :+, arg.offset], @size/8, di.address]
259
+ when Reg; arg.to_s.to_sym
260
+ else arg
261
+ end]]
262
+ end
263
+ end
264
+ end
@@ -0,0 +1,251 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/ppc/main'
8
+
9
+ module Metasm
10
+ class PowerPC
11
+ # temporarily setup dasm.address_binding so that backtracking
12
+ # stack-related offsets resolve in :frameptr (relative to func start)
13
+ def decompile_makestackvars(dasm, funcstart, blocks)
14
+ oldfuncbd = dasm.address_binding[funcstart]
15
+ dasm.address_binding[funcstart] = { :sp => :frameptr } # this would suffice, the rest here is just optimisation
16
+
17
+ blocks.each { |block|
18
+ yield block
19
+ }
20
+
21
+ dasm.address_binding[funcstart] = oldfuncbd if oldfuncbd
22
+ end
23
+
24
+ # list variable dependency for each block, remove useless writes
25
+ # returns { blockaddr => [list of vars that are needed by a following block] }
26
+ def decompile_func_finddeps(dcmp, blocks, func)
27
+ deps_r = {} ; deps_w = {} ; deps_to = {}
28
+ deps_subfunc = {} # things read/written by subfuncs
29
+
30
+ # find read/writes by each block
31
+ blocks.each { |b, to|
32
+ deps_r[b] = [] ; deps_w[b] = [] ; deps_to[b] = to
33
+ deps_subfunc[b] = []
34
+
35
+ blk = dcmp.dasm.decoded[b].block
36
+ blk.list.each { |di|
37
+ a = di.backtrace_binding.values
38
+ w = []
39
+ di.backtrace_binding.keys.each { |k|
40
+ case k
41
+ when ::Symbol; w |= [k]
42
+ else a |= Expression[k].externals # if dword [eax] <- 42, eax is read
43
+ end
44
+ }
45
+ #a << :eax if di.opcode.name == 'ret' # standard ABI
46
+
47
+ deps_r[b] |= a.map { |ee| Expression[ee].externals.grep(::Symbol) }.flatten - [:unknown] - deps_w[b]
48
+ deps_w[b] |= w.map { |ee| Expression[ee].externals.grep(::Symbol) }.flatten - [:unknown]
49
+ }
50
+ stackoff = nil
51
+ blk.each_to_normal { |t|
52
+ t = dcmp.backtrace_target(t, blk.list.last.address)
53
+ next if not t = dcmp.c_parser.toplevel.symbol[t]
54
+ t.type = C::Function.new(C::BaseType.new(:int)) if not t.type.kind_of? C::Function # XXX this may seem a bit extreme, and yes, it is.
55
+ stackoff ||= Expression[dcmp.dasm.backtrace(:sp, blk.list.last.address, :snapshot_addr => blocks.first[0]).first, :-, :sp].reduce
56
+ }
57
+ if stackoff # last block instr == subfunction call
58
+ deps_r[b] |= deps_subfunc[b] - deps_w[b]
59
+ #deps_w[b] |= [:eax, :ecx, :edx] # standard ABI
60
+ end
61
+ }
62
+
63
+
64
+
65
+ # find regs read and never written (must have been set by caller and are part of the func ABI)
66
+ uninitialized = lambda { |b, r, done|
67
+ from = deps_to.keys.find_all { |f| deps_to[f].include? b } - done
68
+ from.empty? or from.find { |f|
69
+ !deps_w[f].include?(r) and uninitialized[f, r, done + [b]]
70
+ }
71
+ }
72
+
73
+ # remove writes from a block if no following block read the value
74
+ dw = {}
75
+ deps_w.each { |b, deps|
76
+ dw[b] = deps.reject { |dep|
77
+ ret = true
78
+ done = []
79
+ todo = deps_to[b].dup
80
+ while a = todo.pop
81
+ next if done.include? a
82
+ done << a
83
+ if not deps_r[a] or deps_r[a].include? dep
84
+ ret = false
85
+ break
86
+ elsif not deps_w[a].include? dep
87
+ todo.concat deps_to[a]
88
+ end
89
+ end
90
+ ret
91
+ }
92
+ }
93
+
94
+ dw
95
+ end
96
+
97
+ def decompile_blocks(dcmp, myblocks, deps, func, nextaddr = nil)
98
+ scope = func.initializer
99
+ func.type.args.each { |a| scope.symbol[a.name] = a }
100
+ stmts = scope.statements
101
+ func_entry = myblocks.first[0]
102
+ until myblocks.empty?
103
+ b, to = myblocks.shift
104
+ if l = dcmp.dasm.get_label_at(b)
105
+ stmts << C::Label.new(l)
106
+ end
107
+
108
+ # list of assignments [[dest reg, expr assigned]]
109
+ ops = []
110
+ # reg binding (reg => value, values.externals = regs at block start)
111
+ binding = {}
112
+ # Expr => CExpr
113
+ ce = lambda { |*e| dcmp.decompile_cexpr(Expression[Expression[*e].reduce], scope) }
114
+ # Expr => Expr.bind(binding) => CExpr
115
+ ceb = lambda { |*e| ce[Expression[*e].bind(binding)] }
116
+
117
+ # dumps a CExprs that implements an assignment to a reg (uses ops[], patches op => [reg, nil])
118
+ commit = lambda {
119
+ deps[b].map { |k|
120
+ [k, ops.rindex(ops.reverse.find { |r, v| r == k })]
121
+ }.sort_by { |k, i| i.to_i }.each { |k, i|
122
+ next if not i or not binding[k]
123
+ e = k
124
+ final = []
125
+ ops[0..i].reverse_each { |r, v|
126
+ final << r if not v
127
+ e = Expression[e].bind(r => v).reduce if not final.include? r
128
+ }
129
+ ops[i][1] = nil
130
+ binding.delete k
131
+ stmts << ce[k, :'=', e] if k != e
132
+ }
133
+ }
134
+
135
+ # go !
136
+ dcmp.dasm.decoded[b].block.list.each_with_index { |di, didx|
137
+ a = di.instruction.args
138
+ if di.opcode.props[:setip] and not di.opcode.props[:stopexec]
139
+ # conditional jump
140
+ commit[]
141
+ n = dcmp.backtrace_target(get_xrefs_x(dcmp.dasm, di).first, di.address)
142
+ #cc = ceb[decode_cc_to_expr(di.opcode.name[1..-1])]
143
+ cc = ceb[:condjmp]
144
+ stmts << C::If.new(C::CExpression[cc], C::Goto.new(n))
145
+ to.delete dcmp.dasm.normalize(n)
146
+ next
147
+ end
148
+
149
+ case di.opcode.name
150
+ when 'blr'
151
+ commit[]
152
+ stmts << C::Return.new(nil)
153
+ when 'bl' # :saveip
154
+ n = dcmp.backtrace_target(get_xrefs_x(dcmp.dasm, di).first, di.address)
155
+ args = []
156
+ if t = dcmp.c_parser.toplevel.symbol[n] and t.type.args
157
+ stackoff = Expression[dcmp.dasm.backtrace(:sp, di.address, :snapshot_addr => func_entry), :-, :sp].bind(:sp => :frameptr).reduce rescue nil
158
+ args_todo = t.type.args.dup
159
+ args = []
160
+ args_todo.each {
161
+ if stackoff.kind_of? Integer
162
+ var = Indirection[[:frameptr, :+, stackoff], @size/8]
163
+ stackoff += @size/8
164
+ else
165
+ var = 0
166
+ end
167
+ args << ceb[var]
168
+ binding.delete var
169
+ }
170
+ end
171
+ commit[]
172
+ #next if not di.block.to_subfuncret
173
+
174
+ if n.kind_of? ::String
175
+ if not f = dcmp.c_parser.toplevel.symbol[n]
176
+ # internal functions are predeclared, so this one is extern
177
+ f = dcmp.c_parser.toplevel.symbol[n] = C::Variable.new
178
+ f.name = n
179
+ f.type = C::Function.new(C::BaseType.new(:int))
180
+ dcmp.c_parser.toplevel.statements << C::Declaration.new(f)
181
+ end
182
+ commit[]
183
+ else
184
+ # indirect funcall
185
+ fptr = ceb[n]
186
+ binding.delete n
187
+ commit[]
188
+ proto = C::Function.new(C::BaseType.new(:int))
189
+ f = C::CExpression[[fptr], proto]
190
+ end
191
+ binding.delete :eax
192
+ e = C::CExpression[f, :funcall, args]
193
+ e = C::CExpression[ce[:eax], :'=', e, f.type.type] if deps[b].include? :eax and f.type.type != C::BaseType.new(:void)
194
+ stmts << e
195
+ when 'b'
196
+ a = di.instruction.args.first
197
+ if a.kind_of? Expression
198
+ else
199
+ # indirect jmp, convert to return (*fptr)();
200
+ n = di.instruction.args.first.symbolic
201
+ fptr = ceb[n]
202
+ binding.delete n
203
+ commit[]
204
+ proto = C::Function.new(C::BaseType.new(:void))
205
+ ret = C::Return.new(C::CExpression[[[fptr], C::Pointer.new(proto)], :funcall, []])
206
+ class << ret ; attr_accessor :from_instr end
207
+ ret.from_instr = di
208
+ stmts << ret
209
+ to = []
210
+ end
211
+ else
212
+ bd = get_fwdemu_binding(di)
213
+ if di.backtrace_binding[:incomplete_binding]
214
+ commit[]
215
+ stmts << C::Asm.new(di.instruction.to_s, nil, nil, nil, nil, nil)
216
+ else
217
+ bd.each { |k, v|
218
+ if k.kind_of? ::Symbol
219
+ ops << [k, v]
220
+ else # memory
221
+ stmts << ceb[k, :'=', v]
222
+ binding.delete k
223
+ end
224
+ }
225
+ update = {}
226
+ bd.each { |k, v|
227
+ next if not k.kind_of? ::Symbol
228
+ update[k] = Expression[Expression[v].bind(binding).reduce]
229
+ }
230
+ binding.update update
231
+ end
232
+ end
233
+ }
234
+ commit[]
235
+
236
+ case to.length
237
+ when 0
238
+ if not myblocks.empty? and not %w[ret jmp].include? dcmp.dasm.decoded[b].block.list.last.instruction.opname
239
+ puts " block #{Expression[b]} has no to and don't end in ret"
240
+ end
241
+ when 1
242
+ if (myblocks.empty? ? nextaddr != to[0] : myblocks.first.first != to[0])
243
+ stmts << C::Goto.new(dcmp.dasm.auto_label_at(to[0], 'unknown_goto'))
244
+ end
245
+ else
246
+ puts " block #{Expression[b]} with multiple to"
247
+ end
248
+ end
249
+ end
250
+ end
251
+ end