metasm 1.0.2 → 1.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/Gemfile +1 -0
- data/doc/code_organisation.txt +1 -1
- data/metasm.gemspec +1 -1
- data/metasm.rb +2 -1
- data/metasm/cpu/arc/decode.rb +3 -3
- data/metasm/cpu/arm/decode.rb +2 -2
- data/metasm/cpu/ia32/compile_c.rb +18 -2
- data/metasm/cpu/ia32/decode.rb +9 -4
- data/metasm/cpu/ia32/decompile.rb +22 -8
- data/metasm/cpu/ia32/opcodes.rb +5 -5
- data/metasm/cpu/mcs51.rb +8 -0
- data/metasm/cpu/mcs51/decode.rb +99 -0
- data/metasm/cpu/mcs51/main.rb +76 -0
- data/metasm/cpu/mcs51/opcodes.rb +120 -0
- data/metasm/cpu/mips/decode.rb +5 -4
- data/metasm/cpu/st20.rb +9 -0
- data/metasm/cpu/st20/decode.rb +180 -0
- data/metasm/cpu/st20/decompile.rb +283 -0
- data/metasm/cpu/st20/main.rb +37 -0
- data/metasm/cpu/st20/opcodes.rb +140 -0
- data/metasm/cpu/x86_64/encode.rb +4 -2
- data/metasm/cpu/x86_64/opcodes.rb +4 -2
- data/metasm/decode.rb +16 -15
- data/metasm/decompile.rb +1 -1
- data/metasm/disassemble.rb +3 -1
- data/metasm/disassemble_api.rb +3 -1
- data/metasm/dynldr.rb +9 -3
- data/metasm/encode.rb +2 -2
- data/metasm/exe_format/coff.rb +3 -1
- data/metasm/exe_format/coff_decode.rb +5 -3
- data/metasm/exe_format/elf.rb +4 -0
- data/metasm/exe_format/elf_decode.rb +1 -2
- data/metasm/exe_format/elf_encode.rb +4 -1
- data/metasm/exe_format/macho.rb +20 -6
- data/metasm/exe_format/pe.rb +1 -1
- data/metasm/exe_format/serialstruct.rb +1 -1
- data/metasm/gui.rb +1 -1
- data/metasm/gui/dasm_hex.rb +2 -2
- data/metasm/gui/dasm_main.rb +8 -8
- data/metasm/gui/debug.rb +4 -4
- data/metasm/gui/gtk.rb +1 -1
- data/metasm/gui/qt.rb +2 -2
- data/metasm/gui/win32.rb +1 -1
- data/metasm/main.rb +11 -6
- data/metasm/os/windows.rb +26 -23
- data/misc/hexdump.rb +2 -2
- data/misc/objdiff.rb +4 -1
- data/misc/objscan.rb +1 -1
- data/samples/dasm-plugins/bindiff.rb +1 -1
- data/samples/dasm-plugins/scanxrefs.rb +2 -1
- data/samples/dynamic_ruby.rb +24 -25
- data/samples/elfencode.rb +15 -0
- data/samples/exeencode.rb +2 -2
- data/samples/metasm-shell.rb +67 -55
- data/tests/mcs51.rb +27 -0
- metadata +13 -2
@@ -0,0 +1,120 @@
|
|
1
|
+
# This file is part of Metasm, the Ruby assembly manipulation suite
|
2
|
+
# Copyright (C) 2015-2016 Google
|
3
|
+
#
|
4
|
+
# Licence is LGPL, see LICENCE in the top-level directory
|
5
|
+
|
6
|
+
require 'metasm/cpu/mcs51/main'
|
7
|
+
|
8
|
+
module Metasm
|
9
|
+
|
10
|
+
class MCS51
|
11
|
+
def addop(name, bin, *args)
|
12
|
+
o = Opcode.new name, bin
|
13
|
+
args.each { |a|
|
14
|
+
o.args << a if @fields_mask[a] or @valid_args[a]
|
15
|
+
o.fields[a] = @fields_shift[a] if @fields_mask[a]
|
16
|
+
raise "unknown #{a.inspect}" unless @valid_args[a] or @fields_mask[a]
|
17
|
+
}
|
18
|
+
@opcode_list << o
|
19
|
+
end
|
20
|
+
|
21
|
+
def init_mcs51
|
22
|
+
@opcode_list = []
|
23
|
+
@valid_args.update [:rd, :r_a, :r_b, :r_c, :d8, :rel8, :m8,
|
24
|
+
:addr_11, :addr_16].inject({}) { |h, v| h.update v => true }
|
25
|
+
@fields_mask.update :rd => 15, :addr_11 => 7
|
26
|
+
@fields_shift.update :rd => 0, :addr_11 => 5
|
27
|
+
|
28
|
+
addop 'nop', 0x00
|
29
|
+
addop 'ret', 0x22
|
30
|
+
addop 'reti', 0x32
|
31
|
+
addop 'swap', 0xc4, :r_a
|
32
|
+
addop '???', 0xa5
|
33
|
+
addop 'rr', 0x03, :r_a
|
34
|
+
addop 'rrc', 0x13, :r_a
|
35
|
+
addop 'rl', 0x23, :r_a
|
36
|
+
addop 'rlc', 0x33, :r_a
|
37
|
+
|
38
|
+
addop 'jc', 0x40, :rel8
|
39
|
+
addop 'jnc', 0x50, :rel8
|
40
|
+
addop 'jz', 0x60, :rel8
|
41
|
+
addop 'jnz', 0x70, :rel8
|
42
|
+
addop 'sjmp', 0x80, :rel8
|
43
|
+
|
44
|
+
addop 'div', 0x84, :r_a, :r_b
|
45
|
+
addop 'mul', 0xa4, :r_a, :r_b
|
46
|
+
|
47
|
+
addop 'push', 0xc0, :m8
|
48
|
+
addop 'pop', 0xd0, :m8
|
49
|
+
|
50
|
+
addop 'clr', 0xc3, :r_c
|
51
|
+
addop 'clr', 0xe4, :r_a
|
52
|
+
addop 'cpl', 0xb3, :r_c
|
53
|
+
addop 'cpl', 0xf4, :r_a
|
54
|
+
addop 'da', 0xd4
|
55
|
+
|
56
|
+
addop 'ajmp', 0x01, :addr_11
|
57
|
+
addop 'acall', 0x11, :addr_11
|
58
|
+
addop 'ljmp', 0x02, :addr_16
|
59
|
+
addop 'lcall', 0x12, :addr_16
|
60
|
+
|
61
|
+
addop 'inc', 0x04, :r_a
|
62
|
+
addop 'inc', 0x05, :m8
|
63
|
+
addop 'inc', 0x00, :rd
|
64
|
+
|
65
|
+
addop 'dec', 0x14, :r_a
|
66
|
+
addop 'dec', 0x15, :m8
|
67
|
+
addop 'dec', 0x10, :rd
|
68
|
+
|
69
|
+
addop 'add', 0x24, :r_a, :d8
|
70
|
+
addop 'add', 0x25, :r_a, :m8
|
71
|
+
addop 'add', 0x20, :r_a, :rd
|
72
|
+
|
73
|
+
addop 'addc', 0x34, :r_a, :d8
|
74
|
+
addop 'addc', 0x35, :r_a, :m8
|
75
|
+
addop 'addc', 0x30, :r_a, :rd
|
76
|
+
|
77
|
+
addop 'orl', 0x42, :m8, :r_a
|
78
|
+
addop 'orl', 0x43, :m8, :d8
|
79
|
+
addop 'orl', 0x44, :r_a, :d8
|
80
|
+
addop 'orl', 0x45, :r_a, :m8
|
81
|
+
addop 'orl', 0x40, :r_a, :rd
|
82
|
+
|
83
|
+
addop 'anl', 0x52, :m8, :r_a
|
84
|
+
addop 'anl', 0x53, :m8, :d8
|
85
|
+
addop 'anl', 0x54, :r_a, :d8
|
86
|
+
addop 'anl', 0x55, :r_a, :m8
|
87
|
+
addop 'anl', 0x50, :r_a, :rd
|
88
|
+
|
89
|
+
addop 'xrl', 0x62, :m8, :r_a
|
90
|
+
addop 'xrl', 0x63, :m8, :d8
|
91
|
+
addop 'xrl', 0x64, :r_a, :d8
|
92
|
+
addop 'xrl', 0x65, :r_a, :m8
|
93
|
+
addop 'xrl', 0x60, :r_a, :rd
|
94
|
+
|
95
|
+
addop 'mov', 0x74, :r_a, :d8
|
96
|
+
addop 'mov', 0x75, :m8, :d8
|
97
|
+
addop 'mov', 0x70, :rd, :d8
|
98
|
+
addop 'mov', 0xa0, :rd, :m8
|
99
|
+
addop 'mov', 0x85, :m8, :m8
|
100
|
+
addop 'mov', 0x80, :m8, :rd
|
101
|
+
addop 'mov', 0xe0, :r_a, :rd
|
102
|
+
addop 'mov', 0xf0, :rd, :r_a
|
103
|
+
|
104
|
+
addop 'subb', 0x94, :r_a, :d8
|
105
|
+
addop 'subb', 0x95, :r_a, :m8
|
106
|
+
addop 'subb', 0x90, :r_a, :rd
|
107
|
+
|
108
|
+
addop 'cnje', 0xb4, :r_a, :d8, :rel8
|
109
|
+
addop 'cnje', 0xb5, :r_a, :m8, :rel8
|
110
|
+
addop 'cnje', 0xb0, :rd, :d8, :rel8
|
111
|
+
|
112
|
+
addop 'xch', 0xc5, :r_a, :m8
|
113
|
+
addop 'xch', 0xc0, :r_a, :rd
|
114
|
+
|
115
|
+
addop 'djnz', 0xd5, :m8, :rel8
|
116
|
+
addop 'djnz', 0xd0, :rd, :rel8
|
117
|
+
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
data/metasm/cpu/mips/decode.rb
CHANGED
@@ -149,11 +149,12 @@ class MIPS
|
|
149
149
|
when 'and', 'andi'; lambda { |di, a0, a1, a2| { a0 => Expression[a1, :&, a2] } }
|
150
150
|
when 'or', 'ori'; lambda { |di, a0, a1, a2| { a0 => Expression[a1, :|, a2] } }
|
151
151
|
when 'nor'; lambda { |di, a0, a1, a2| { a0 => Expression[:~, [a1, :|, a2]] } }
|
152
|
+
when 'not'; lambda { |di, a0, a1| { a0 => Expression[:~, a1] } }
|
152
153
|
when 'xor'; lambda { |di, a0, a1, a2| { a0 => Expression[a1, :^, a2] } }
|
153
154
|
when 'sll', 'sllv'; lambda { |di, a0, a1, a2| { a0 => Expression[a1, :>>, a2] } }
|
154
155
|
when 'srl', 'srlv', 'sra', 'srav'; lambda { |di, a0, a1, a2| { a0 => Expression[a1, :<<, a2] } } # XXX sign-extend
|
155
|
-
when 'lw';
|
156
|
-
when 'sw';
|
156
|
+
when 'lw', 'lwl', 'lwr'; lambda { |di, a0, a1| { a0 => Expression[a1] } }
|
157
|
+
when 'sw', 'swl', 'swr'; lambda { |di, a0, a1| { a1 => Expression[a0] } }
|
157
158
|
when 'lh', 'lhu'; lambda { |di, a0, a1| { a0 => Expression[a1] } } # XXX sign-extend
|
158
159
|
when 'sh'; lambda { |di, a0, a1| { a1 => Expression[a0] } }
|
159
160
|
when 'lb', 'lbu'; lambda { |di, a0, a1| { a0 => Expression[a1] } }
|
@@ -161,8 +162,8 @@ class MIPS
|
|
161
162
|
when /^slti?u?/; lambda { |di, a0, a1, a2| { a0 => Expression[a1, :<, a2] } } # XXX signedness
|
162
163
|
when 'mfhi'; lambda { |di, a0| { a0 => Expression[:hi] } }
|
163
164
|
when 'mflo'; lambda { |di, a0| { a0 => Expression[:lo] } }
|
164
|
-
when 'mult'; lambda { |di, a0, a1| { :hi => Expression[[a0, :*, a1], :>>, 32], :lo => Expression[[a0, :*, a1], :&, 0xffff_ffff] } }
|
165
|
-
when 'div'; lambda { |di, a0, a1| { :hi => Expression[a0, :%, a1], :lo => Expression[a0, :/, a1] } }
|
165
|
+
when 'mult', 'multu'; lambda { |di, a0, a1| { :hi => Expression[[a0, :*, a1], :>>, 32], :lo => Expression[[a0, :*, a1], :&, 0xffff_ffff] } }
|
166
|
+
when 'div', 'divu'; lambda { |di, a0, a1| { :hi => Expression[a0, :%, a1], :lo => Expression[a0, :/, a1] } }
|
166
167
|
when 'jal', 'jalr'; lambda { |di, a0| { :$ra => Expression[Expression[di.address, :+, 2*di.bin_length].reduce] } }
|
167
168
|
when 'li', 'mov'; lambda { |di, a0, a1| { a0 => Expression[a1] } }
|
168
169
|
when 'syscall'; lambda { |di, *a| { :$v0 => Expression::Unknown } }
|
data/metasm/cpu/st20.rb
ADDED
@@ -0,0 +1,9 @@
|
|
1
|
+
# This file is part of Metasm, the Ruby assembly manipulation suite
|
2
|
+
# Copyright (C) 2006-2009 Yoann GUILLOT
|
3
|
+
#
|
4
|
+
# Licence is LGPL, see LICENCE in the top-level directory
|
5
|
+
|
6
|
+
|
7
|
+
require 'metasm/main'
|
8
|
+
require 'metasm/cpu/st20/decode'
|
9
|
+
require 'metasm/cpu/st20/decompile'
|
@@ -0,0 +1,180 @@
|
|
1
|
+
# This file is part of Metasm, the Ruby assembly manipulation suite
|
2
|
+
# Copyright (C) 2006-2009 Yoann GUILLOT
|
3
|
+
#
|
4
|
+
# Licence is LGPL, see LICENCE in the top-level directory
|
5
|
+
|
6
|
+
|
7
|
+
require 'metasm/cpu/st20/opcodes'
|
8
|
+
require 'metasm/decode'
|
9
|
+
|
10
|
+
module Metasm
|
11
|
+
class ST20
|
12
|
+
# decodes the instruction at edata.ptr, mapped at virtual address off
|
13
|
+
def decode_instruction(edata, addr)
|
14
|
+
return if edata.ptr >= edata.length
|
15
|
+
di = DecodedInstruction.new self
|
16
|
+
di.address = addr
|
17
|
+
di = decode_instr_op(edata, di)
|
18
|
+
decode_instr_interpret(di, addr)
|
19
|
+
end
|
20
|
+
|
21
|
+
def decode_instr_op(edata, di, pfx=0)
|
22
|
+
# decode one byte from the bitstream, recurse if the byte is a prefix
|
23
|
+
|
24
|
+
if edata.ptr >= edata.length or di.bin_length >= 4
|
25
|
+
di.instruction.args << Expression[pfx]
|
26
|
+
return di
|
27
|
+
end
|
28
|
+
|
29
|
+
# bytestream structure :
|
30
|
+
# sequence of prefixes, which build a word 4 bits at a time
|
31
|
+
# last element = function code
|
32
|
+
# 'opr' is a special function, means use the prefix word as an opcode number from 'operate'
|
33
|
+
byte = edata.read(1).unpack('C')[0]
|
34
|
+
fcode = byte & 0xf0
|
35
|
+
arg = byte & 0x0f
|
36
|
+
pfx = (pfx << 4) | arg
|
37
|
+
di.opcode = @opcode_list[fcode >> 4]
|
38
|
+
di.instruction.opname = di.opcode.name
|
39
|
+
di.bin_length += 1
|
40
|
+
|
41
|
+
case di.instruction.opname
|
42
|
+
when 'pfix'
|
43
|
+
return decode_instr_op(edata, di, pfx)
|
44
|
+
|
45
|
+
when 'nfix'
|
46
|
+
pfx ^= -1
|
47
|
+
di.instruction.opname = 'pfix' # will be displayed on EOS, and we cannot represent the whole decoded pfx with 'nfix'
|
48
|
+
return decode_instr_op(edata, di, pfx)
|
49
|
+
|
50
|
+
when 'opr'
|
51
|
+
if op = @op_operate[pfx]
|
52
|
+
# operands have no arg (they work on the implicit 3-register stack A B C)
|
53
|
+
di.instruction.opname = op
|
54
|
+
di.opcode = @opc_operate[op] || di.opcode
|
55
|
+
else
|
56
|
+
# unknown operand, keep the generic form
|
57
|
+
di.instruction.args << Expression[pfx]
|
58
|
+
end
|
59
|
+
else
|
60
|
+
di.instruction.args << Expression[pfx]
|
61
|
+
end
|
62
|
+
|
63
|
+
di
|
64
|
+
end
|
65
|
+
|
66
|
+
def decode_instr_interpret(di, addr)
|
67
|
+
case di.instruction.opname
|
68
|
+
when 'j', 'cj', 'fcall'
|
69
|
+
delta = di.instruction.args.last.reduce
|
70
|
+
arg = Expression[[addr, :+, di.bin_length], :+, delta].reduce
|
71
|
+
di.instruction.args[-1] = Expression[arg]
|
72
|
+
end
|
73
|
+
|
74
|
+
di
|
75
|
+
end
|
76
|
+
|
77
|
+
def get_backtrace_binding(di)
|
78
|
+
arg = di.instruction.args[0]
|
79
|
+
sz = @size/8
|
80
|
+
unk = Expression::Unknown
|
81
|
+
case di.instruction.opname
|
82
|
+
when 'j'; {}
|
83
|
+
when 'ldlp'; { :a => Expression[:wspace, :+, [sz, :*, arg]], :b => :a, :c => :b }
|
84
|
+
when 'ldnl'; { :a => Indirection[[:a, :+, [sz, :*, arg]], sz, di] }
|
85
|
+
when 'ldc'; { :a => arg, :b => :a, :c => :b }
|
86
|
+
when 'ldnlp'; { :a => Expression[:a, :+, [sz, :*, arg]] }
|
87
|
+
when 'ldl'; { :a => Indirection[[:wspace, :+, [sz, :*, arg]], sz, di], :b => :a, :c => :b }
|
88
|
+
when 'adc'; { :a => Expression[:a, :+, arg] }
|
89
|
+
when 'fcall'; {
|
90
|
+
:a => Expression[di.next_addr],
|
91
|
+
:wspace => Expression[:wspace, :-, [4*sz]],
|
92
|
+
Indirection[[:wspace, :-, [4*sz]], sz, di] => di.next_addr,
|
93
|
+
Indirection[[:wspace, :-, [3*sz]], sz, di] => :a,
|
94
|
+
Indirection[[:wspace, :-, [2*sz]], sz, di] => :b,
|
95
|
+
Indirection[[:wspace, :-, [1*sz]], sz, di] => :c,
|
96
|
+
}
|
97
|
+
# cj+(:a != 0) => a=b, b=c, c=unk ; (:a == 0) => jump, a=a, b=b, c=c
|
98
|
+
when 'cj'; { :a => unk, :b => unk, :c => unk }
|
99
|
+
when 'ajw'; { :wspace => Expression[:wspace, :+, [4, :*, arg]] }
|
100
|
+
when 'eqc'; { :a => Expression[:a, :==, arg] }
|
101
|
+
when 'stl'; { Indirection[[:wspace, :+, [sz, :*, arg]], sz, di] => :a, :a => :b, :b => :c, :c => unk }
|
102
|
+
when 'stnl'; { Indirection[[:a, :+, [sz, :*, arg]], sz, di] => :b, :a => :c, :b => unk, :c => unk }
|
103
|
+
|
104
|
+
when 'add'; { :a => Expression[:b, :+, :a], :b => :c, :c => unk }
|
105
|
+
when 'sub'; { :a => Expression[:b, :-, :a], :b => :c, :c => unk }
|
106
|
+
when 'prod'; { :a => Expression[:b, :*, :a], :b => :c, :c => unk }
|
107
|
+
when 'xor'; { :a => Expression[:b, :^, :a], :b => :c, :c => unk }
|
108
|
+
when 'ldpi'; { :a => Indirection[[di.next_addr, :+, :a], sz, di] }
|
109
|
+
when 'mint'; { :a => Expression[-1 << (@size-1)], :b => :a, :c => :b }
|
110
|
+
when 'in'; { :a => unk, :b => unk, :c => unk } # read a bytes from channel b at buffer c
|
111
|
+
when 'out'; { :a => unk, :b => unk, :c => unk } # write a bytes to channel b from buffer c
|
112
|
+
when 'lb'; { :a => Indirection[:a, 1, di] }
|
113
|
+
when 'sb'; { Indirection[:a, 1, di] => Expression[:b, :&, 0xff], :a => :c, :b => unk, :c => unk }
|
114
|
+
when 'bsub'; { :a => Expression[:a, :+, :b], :b => :c, :c => unk }
|
115
|
+
when 'ssub'; { :a => Expression[:a, :+, [2, :*, :b]], :b => :c, :c => unk }
|
116
|
+
when 'wsub'; { :a => Expression[:a, :+, [sz, :*, :b]], :b => :c, :c => unk }
|
117
|
+
when 'gajw'; { :wspace => Expression[:a], :a => Expression[:wspace] }
|
118
|
+
when 'dup'; { :b => :a, :c => :b }
|
119
|
+
else
|
120
|
+
puts "unhandled instruction to backtrace: #{di}" if $VERBOSE
|
121
|
+
{ :incomplete_binding => Expression[1], :a => unk, :b => unk, :c => unk }
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
def get_xrefs_x(dasm, di)
|
126
|
+
return [] if not di.opcode.props[:setip]
|
127
|
+
|
128
|
+
case di.opcode.basename
|
129
|
+
when 'j', 'cj'
|
130
|
+
[Expression[di.instruction.args.first]]
|
131
|
+
#when 'ret'
|
132
|
+
#[Indirection[:sp, 2, di.address]]
|
133
|
+
else
|
134
|
+
[]
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
# checks if expr is a valid return expression matching the :saveip instruction
|
139
|
+
def backtrace_is_function_return(expr, di=nil)
|
140
|
+
expr = Expression[expr].reduce_rec
|
141
|
+
expr.kind_of?(Indirection) and expr.len == 2 and expr.target == Expression[:sp]
|
142
|
+
end
|
143
|
+
|
144
|
+
# updates the function backtrace_binding
|
145
|
+
def backtrace_update_function_binding(dasm, faddr, f, retaddrlist, *wantregs)
|
146
|
+
b = f.backtrace_binding
|
147
|
+
|
148
|
+
bt_val = lambda { |r|
|
149
|
+
next if not retaddrlist
|
150
|
+
b[r] = Expression::Unknown
|
151
|
+
bt = []
|
152
|
+
retaddrlist.each { |retaddr|
|
153
|
+
bt |= dasm.backtrace(Expression[r], retaddr, :include_start => true,
|
154
|
+
:snapshot_addr => faddr, :origin => retaddr)
|
155
|
+
}
|
156
|
+
if bt.length != 1
|
157
|
+
b[r] = Expression::Unknown
|
158
|
+
else
|
159
|
+
b[r] = bt.first
|
160
|
+
end
|
161
|
+
}
|
162
|
+
|
163
|
+
wantregs.each(&bt_val)
|
164
|
+
|
165
|
+
b
|
166
|
+
end
|
167
|
+
|
168
|
+
# returns true if the expression is an address on the stack
|
169
|
+
def backtrace_is_stack_address(expr)
|
170
|
+
Expression[expr].expr_externals.include?(:sp)
|
171
|
+
end
|
172
|
+
|
173
|
+
# updates an instruction's argument replacing an expression with another (eg label renamed)
|
174
|
+
def replace_instr_arg_immediate(i, old, new)
|
175
|
+
i.args.map! { |a|
|
176
|
+
a == old ? new : Expression[a.bind(old => new).reduce]
|
177
|
+
}
|
178
|
+
end
|
179
|
+
end
|
180
|
+
end
|
@@ -0,0 +1,283 @@
|
|
1
|
+
# This file is part of Metasm, the Ruby assembly manipulation suite
|
2
|
+
# Copyright (C) 2006-2009 Yoann GUILLOT
|
3
|
+
#
|
4
|
+
# Licence is LGPL, see LICENCE in the top-level directory
|
5
|
+
|
6
|
+
|
7
|
+
require 'metasm/cpu/st20/main'
|
8
|
+
|
9
|
+
module Metasm
|
10
|
+
class ST20
|
11
|
+
# temporarily setup dasm.address_binding so that backtracking
|
12
|
+
# stack-related offsets resolve in :frameptr (relative to func start)
|
13
|
+
def decompile_makestackvars(dasm, funcstart, blocks)
|
14
|
+
oldfuncbd = dasm.address_binding[funcstart]
|
15
|
+
dasm.address_binding[funcstart] = { :wspace => :frameptr }
|
16
|
+
blocks.each { |block| yield block }
|
17
|
+
dasm.address_binding[funcstart] = oldfuncbd
|
18
|
+
end
|
19
|
+
|
20
|
+
# add di-specific registry written/accessed
|
21
|
+
def decompile_func_finddeps_di(dcmp, func, di, a, w)
|
22
|
+
case di.instruction.opname
|
23
|
+
when 'ret'
|
24
|
+
a << :a if not func.type.kind_of? C::BaseType or func.type.type.name != :void # standard ABI
|
25
|
+
when 'in', 'out'
|
26
|
+
a << :a << :b << :c
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
# list variable dependency for each block, remove useless writes
|
31
|
+
# returns { blockaddr => [list of vars that are needed by a following block] }
|
32
|
+
def decompile_func_finddeps(dcmp, blocks, func)
|
33
|
+
deps_r = {} ; deps_w = {} ; deps_to = {}
|
34
|
+
deps_subfunc = {} # things read/written by subfuncs
|
35
|
+
|
36
|
+
# find read/writes by each block
|
37
|
+
blocks.each { |b, to|
|
38
|
+
deps_r[b] = [] ; deps_w[b] = [] ; deps_to[b] = to
|
39
|
+
deps_subfunc[b] = []
|
40
|
+
|
41
|
+
blk = dcmp.dasm.decoded[b].block
|
42
|
+
blk.list.each { |di|
|
43
|
+
a = di.backtrace_binding.values
|
44
|
+
w = []
|
45
|
+
di.backtrace_binding.keys.each { |k|
|
46
|
+
case k
|
47
|
+
when ::Symbol; w |= [k]
|
48
|
+
else a |= Expression[k].externals
|
49
|
+
end
|
50
|
+
}
|
51
|
+
decompile_func_finddeps_di(dcmp, func, di, a, w)
|
52
|
+
|
53
|
+
deps_r[b] |= a.map { |ee| Expression[ee].externals.grep(::Symbol) }.flatten - [:unknown] - deps_w[b]
|
54
|
+
deps_w[b] |= w.map { |ee| Expression[ee].externals.grep(::Symbol) }.flatten - [:unknown]
|
55
|
+
}
|
56
|
+
blk.each_to_normal { |t|
|
57
|
+
t = dcmp.backtrace_target(t, blk.list.last.address)
|
58
|
+
next if not t = dcmp.c_parser.toplevel.symbol[t]
|
59
|
+
t.type = C::Function.new(C::BaseType.new(:int)) if not t.type.kind_of? C::Function
|
60
|
+
t.type.args.to_a.each { |arg|
|
61
|
+
if reg = arg.has_attribute('register')
|
62
|
+
deps_subfunc[b] |= [reg.to_sym]
|
63
|
+
end
|
64
|
+
}
|
65
|
+
}
|
66
|
+
}
|
67
|
+
|
68
|
+
bt = blocks.transpose
|
69
|
+
roots = bt[0] - bt[1].flatten # XXX jmp 1stblock ?
|
70
|
+
|
71
|
+
# find regs read and never written (must have been set by caller and are part of the func ABI)
|
72
|
+
uninitialized = lambda { |b, r, done|
|
73
|
+
if not deps_r[b]
|
74
|
+
elsif deps_r[b].include?(r)
|
75
|
+
true
|
76
|
+
elsif deps_w[b].include?(r)
|
77
|
+
else
|
78
|
+
done << b
|
79
|
+
(deps_to[b] - done).find { |tb| uninitialized[tb, r, done] }
|
80
|
+
end
|
81
|
+
}
|
82
|
+
|
83
|
+
regargs = []
|
84
|
+
register_symbols.each { |r|
|
85
|
+
if roots.find { |root| uninitialized[root, r, []] }
|
86
|
+
regargs << r
|
87
|
+
end
|
88
|
+
}
|
89
|
+
|
90
|
+
# TODO honor user-defined prototype if available (eg no, really, eax is not read in this function returning al)
|
91
|
+
regargs.sort_by { |r| r.to_s }.each { |r|
|
92
|
+
a = C::Variable.new(r.to_s, C::BaseType.new(:int, :unsigned))
|
93
|
+
a.add_attribute("register(#{r})")
|
94
|
+
func.type.args << a
|
95
|
+
}
|
96
|
+
|
97
|
+
# remove writes from a block if no following block read the value
|
98
|
+
dw = {}
|
99
|
+
deps_w.each { |b, deps|
|
100
|
+
dw[b] = deps.reject { |dep|
|
101
|
+
ret = true
|
102
|
+
done = []
|
103
|
+
todo = deps_to[b].dup
|
104
|
+
while a = todo.pop
|
105
|
+
next if done.include? a
|
106
|
+
done << a
|
107
|
+
if not deps_r[a] or deps_r[a].include? dep
|
108
|
+
ret = false
|
109
|
+
break
|
110
|
+
elsif not deps_w[a].include? dep
|
111
|
+
todo.concat deps_to[a]
|
112
|
+
end
|
113
|
+
end
|
114
|
+
ret
|
115
|
+
}
|
116
|
+
}
|
117
|
+
|
118
|
+
dw
|
119
|
+
end
|
120
|
+
|
121
|
+
def abi_funcall
|
122
|
+
{ :retval => :a, :changed => register_symbols }
|
123
|
+
end
|
124
|
+
|
125
|
+
def decompile_blocks(dcmp, myblocks, deps, func, nextaddr = nil)
|
126
|
+
scope = func.initializer
|
127
|
+
func.type.args.each { |a| scope.symbol[a.name] = a }
|
128
|
+
stmts = scope.statements
|
129
|
+
blocks_toclean = myblocks.dup
|
130
|
+
until myblocks.empty?
|
131
|
+
b, to = myblocks.shift
|
132
|
+
if l = dcmp.dasm.get_label_at(b)
|
133
|
+
stmts << C::Label.new(l)
|
134
|
+
end
|
135
|
+
|
136
|
+
# list of assignments [[dest reg, expr assigned]]
|
137
|
+
ops = []
|
138
|
+
# reg binding (reg => value, values.externals = regs at block start)
|
139
|
+
binding = {}
|
140
|
+
# Expr => CExpr
|
141
|
+
ce = lambda { |*e| dcmp.decompile_cexpr(Expression[Expression[*e].reduce], scope) }
|
142
|
+
# Expr => Expr.bind(binding) => CExpr
|
143
|
+
ceb = lambda { |*e| ce[Expression[*e].bind(binding)] }
|
144
|
+
|
145
|
+
# dumps a CExprs that implements an assignment to a reg (uses ops[], patches op => [reg, nil])
|
146
|
+
commit = lambda {
|
147
|
+
deps[b].map { |k|
|
148
|
+
[k, ops.rindex(ops.reverse.find { |r, v| r == k })]
|
149
|
+
}.sort_by { |k, i| i.to_i }.each { |k, i|
|
150
|
+
next if not i or not binding[k]
|
151
|
+
e = k
|
152
|
+
final = []
|
153
|
+
ops[0..i].reverse_each { |r, v|
|
154
|
+
final << r if not v
|
155
|
+
e = Expression[e].bind(r => v).reduce if not final.include? r
|
156
|
+
}
|
157
|
+
ops[i][1] = nil
|
158
|
+
binding.delete k
|
159
|
+
stmts << ce[k, :'=', e] if k != e
|
160
|
+
}
|
161
|
+
}
|
162
|
+
|
163
|
+
# returns an array to use as funcall arguments
|
164
|
+
get_func_args = lambda { |di, f|
|
165
|
+
# XXX see remarks in #finddeps
|
166
|
+
args_todo = f.type.args.to_a.dup
|
167
|
+
args = []
|
168
|
+
args_todo.each { |a_|
|
169
|
+
if r = a_.has_attribute_var('register')
|
170
|
+
args << Expression[r.to_sym]
|
171
|
+
else
|
172
|
+
args << Expression[0]
|
173
|
+
end
|
174
|
+
}
|
175
|
+
args.map { |e| ceb[e] }
|
176
|
+
}
|
177
|
+
|
178
|
+
# go !
|
179
|
+
dcmp.dasm.decoded[b].block.list.each_with_index { |di, didx|
|
180
|
+
if di.opcode.props[:setip] and not di.opcode.props[:stopexec]
|
181
|
+
# conditional jump
|
182
|
+
commit[]
|
183
|
+
n = dcmp.backtrace_target(get_xrefs_x(dcmp.dasm, di).first, di.address)
|
184
|
+
cc = ceb[:a, :'!=', 0]
|
185
|
+
# XXX switch/indirect/multiple jmp
|
186
|
+
stmts << C::If.new(C::CExpression[cc], C::Goto.new(n))
|
187
|
+
to.delete dcmp.dasm.normalize(n)
|
188
|
+
next
|
189
|
+
end
|
190
|
+
|
191
|
+
case di.instruction.opname
|
192
|
+
when 'ret'
|
193
|
+
commit[]
|
194
|
+
ret = nil
|
195
|
+
ret = C::CExpression[ceb[:a]] unless func.type.type.kind_of? C::BaseType and func.type.type.name == :void
|
196
|
+
stmts << C::Return.new(ret)
|
197
|
+
when 'fcall' # :saveip
|
198
|
+
n = dcmp.backtrace_target(get_xrefs_x(dcmp.dasm, di).first, di.address)
|
199
|
+
args = []
|
200
|
+
if f = dcmp.c_parser.toplevel.symbol[n] and f.type.kind_of? C::Function and f.type.args
|
201
|
+
args = get_func_args[di, f]
|
202
|
+
end
|
203
|
+
commit[]
|
204
|
+
#next if not di.block.to_subfuncret
|
205
|
+
|
206
|
+
if not n.kind_of? ::String or (f and not f.type.kind_of? C::Function)
|
207
|
+
# indirect funcall
|
208
|
+
fptr = ceb[n]
|
209
|
+
binding.delete n
|
210
|
+
proto = C::Function.new(C::BaseType.new(:int))
|
211
|
+
proto = f.type if f and f.type.kind_of? C::Function
|
212
|
+
f = C::CExpression[[fptr], C::Pointer.new(proto)]
|
213
|
+
elsif not f
|
214
|
+
# internal functions are predeclared, so this one is extern
|
215
|
+
f = C::Variable.new
|
216
|
+
f.name = n
|
217
|
+
f.type = C::Function.new(C::BaseType.new(:int))
|
218
|
+
if dcmp.recurse > 0
|
219
|
+
dcmp.c_parser.toplevel.symbol[n] = f
|
220
|
+
dcmp.c_parser.toplevel.statements << C::Declaration.new(f)
|
221
|
+
end
|
222
|
+
end
|
223
|
+
commit[]
|
224
|
+
binding.delete :a
|
225
|
+
e = C::CExpression[f, :funcall, args]
|
226
|
+
e = C::CExpression[ce[:a], :'=', e, f.type.type] if deps[b].include? :a and f.type.type != C::BaseType.new(:void)
|
227
|
+
stmts << e
|
228
|
+
when 'in', 'out'
|
229
|
+
if not dcmp.c_parser.toplevel.symbol["intrinsic_#{di.instruction.opname}"]
|
230
|
+
dcmp.c_parser.parse("void intrinsic_#{di.instruction.opname}(unsigned int len, unsigned int channel, char *buf);")
|
231
|
+
end
|
232
|
+
f = dcmp.c_parser.toplevel.symbol["intrinsic_#{di.instruction.opname}"]
|
233
|
+
stmts << C::CExpression.new(f, :funcall, [ceb[:a], ceb[:b], ceb[:c]], f.type.type)
|
234
|
+
else
|
235
|
+
bd = get_fwdemu_binding(di)
|
236
|
+
if di.backtrace_binding[:incomplete_binding]
|
237
|
+
commit[]
|
238
|
+
stmts << C::Asm.new(di.instruction.to_s, nil, nil, nil, nil, nil)
|
239
|
+
else
|
240
|
+
update = {}
|
241
|
+
bd.each { |k, v|
|
242
|
+
if k.kind_of? ::Symbol and not deps[b].include? k
|
243
|
+
ops << [k, v]
|
244
|
+
update[k] = Expression[Expression[v].bind(binding).reduce]
|
245
|
+
else
|
246
|
+
stmts << ceb[k, :'=', v]
|
247
|
+
stmts.pop if stmts.last.kind_of? C::Variable # [:eflag_s, :=, :unknown].reduce
|
248
|
+
end
|
249
|
+
}
|
250
|
+
binding.update update
|
251
|
+
end
|
252
|
+
end
|
253
|
+
}
|
254
|
+
commit[]
|
255
|
+
|
256
|
+
case to.length
|
257
|
+
when 0
|
258
|
+
if not myblocks.empty? and not %w[ret jmp].include? dcmp.dasm.decoded[b].block.list.last.instruction.opname
|
259
|
+
puts " block #{Expression[b]} has no to and don't end in ret"
|
260
|
+
end
|
261
|
+
when 1
|
262
|
+
if (myblocks.empty? ? nextaddr != to[0] : myblocks.first.first != to[0])
|
263
|
+
stmts << C::Goto.new(dcmp.dasm.auto_label_at(to[0], 'unknown_goto'))
|
264
|
+
end
|
265
|
+
else
|
266
|
+
puts " block #{Expression[b]} with multiple to"
|
267
|
+
end
|
268
|
+
end
|
269
|
+
|
270
|
+
# cleanup di.bt_binding (we set :frameptr etc in those, this may confuse the dasm)
|
271
|
+
blocks_toclean.each { |b_, to_|
|
272
|
+
dcmp.dasm.decoded[b_].block.list.each { |di|
|
273
|
+
di.backtrace_binding = nil
|
274
|
+
}
|
275
|
+
}
|
276
|
+
end
|
277
|
+
|
278
|
+
def decompile_check_abi(dcmp, entry, func)
|
279
|
+
a = func.type.args || []
|
280
|
+
a.delete_if { |arg| arg.has_attribute_var('register') and arg.has_attribute('unused') }
|
281
|
+
end
|
282
|
+
end
|
283
|
+
end
|