metasm 1.0.2 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/Gemfile +1 -0
- data/doc/code_organisation.txt +1 -1
- data/metasm.gemspec +1 -1
- data/metasm.rb +2 -1
- data/metasm/cpu/arc/decode.rb +3 -3
- data/metasm/cpu/arm/decode.rb +2 -2
- data/metasm/cpu/ia32/compile_c.rb +18 -2
- data/metasm/cpu/ia32/decode.rb +9 -4
- data/metasm/cpu/ia32/decompile.rb +22 -8
- data/metasm/cpu/ia32/opcodes.rb +5 -5
- data/metasm/cpu/mcs51.rb +8 -0
- data/metasm/cpu/mcs51/decode.rb +99 -0
- data/metasm/cpu/mcs51/main.rb +76 -0
- data/metasm/cpu/mcs51/opcodes.rb +120 -0
- data/metasm/cpu/mips/decode.rb +5 -4
- data/metasm/cpu/st20.rb +9 -0
- data/metasm/cpu/st20/decode.rb +180 -0
- data/metasm/cpu/st20/decompile.rb +283 -0
- data/metasm/cpu/st20/main.rb +37 -0
- data/metasm/cpu/st20/opcodes.rb +140 -0
- data/metasm/cpu/x86_64/encode.rb +4 -2
- data/metasm/cpu/x86_64/opcodes.rb +4 -2
- data/metasm/decode.rb +16 -15
- data/metasm/decompile.rb +1 -1
- data/metasm/disassemble.rb +3 -1
- data/metasm/disassemble_api.rb +3 -1
- data/metasm/dynldr.rb +9 -3
- data/metasm/encode.rb +2 -2
- data/metasm/exe_format/coff.rb +3 -1
- data/metasm/exe_format/coff_decode.rb +5 -3
- data/metasm/exe_format/elf.rb +4 -0
- data/metasm/exe_format/elf_decode.rb +1 -2
- data/metasm/exe_format/elf_encode.rb +4 -1
- data/metasm/exe_format/macho.rb +20 -6
- data/metasm/exe_format/pe.rb +1 -1
- data/metasm/exe_format/serialstruct.rb +1 -1
- data/metasm/gui.rb +1 -1
- data/metasm/gui/dasm_hex.rb +2 -2
- data/metasm/gui/dasm_main.rb +8 -8
- data/metasm/gui/debug.rb +4 -4
- data/metasm/gui/gtk.rb +1 -1
- data/metasm/gui/qt.rb +2 -2
- data/metasm/gui/win32.rb +1 -1
- data/metasm/main.rb +11 -6
- data/metasm/os/windows.rb +26 -23
- data/misc/hexdump.rb +2 -2
- data/misc/objdiff.rb +4 -1
- data/misc/objscan.rb +1 -1
- data/samples/dasm-plugins/bindiff.rb +1 -1
- data/samples/dasm-plugins/scanxrefs.rb +2 -1
- data/samples/dynamic_ruby.rb +24 -25
- data/samples/elfencode.rb +15 -0
- data/samples/exeencode.rb +2 -2
- data/samples/metasm-shell.rb +67 -55
- data/tests/mcs51.rb +27 -0
- metadata +13 -2
@@ -0,0 +1,120 @@
|
|
1
|
+
# This file is part of Metasm, the Ruby assembly manipulation suite
|
2
|
+
# Copyright (C) 2015-2016 Google
|
3
|
+
#
|
4
|
+
# Licence is LGPL, see LICENCE in the top-level directory
|
5
|
+
|
6
|
+
require 'metasm/cpu/mcs51/main'
|
7
|
+
|
8
|
+
module Metasm
|
9
|
+
|
10
|
+
class MCS51
|
11
|
+
def addop(name, bin, *args)
|
12
|
+
o = Opcode.new name, bin
|
13
|
+
args.each { |a|
|
14
|
+
o.args << a if @fields_mask[a] or @valid_args[a]
|
15
|
+
o.fields[a] = @fields_shift[a] if @fields_mask[a]
|
16
|
+
raise "unknown #{a.inspect}" unless @valid_args[a] or @fields_mask[a]
|
17
|
+
}
|
18
|
+
@opcode_list << o
|
19
|
+
end
|
20
|
+
|
21
|
+
def init_mcs51
|
22
|
+
@opcode_list = []
|
23
|
+
@valid_args.update [:rd, :r_a, :r_b, :r_c, :d8, :rel8, :m8,
|
24
|
+
:addr_11, :addr_16].inject({}) { |h, v| h.update v => true }
|
25
|
+
@fields_mask.update :rd => 15, :addr_11 => 7
|
26
|
+
@fields_shift.update :rd => 0, :addr_11 => 5
|
27
|
+
|
28
|
+
addop 'nop', 0x00
|
29
|
+
addop 'ret', 0x22
|
30
|
+
addop 'reti', 0x32
|
31
|
+
addop 'swap', 0xc4, :r_a
|
32
|
+
addop '???', 0xa5
|
33
|
+
addop 'rr', 0x03, :r_a
|
34
|
+
addop 'rrc', 0x13, :r_a
|
35
|
+
addop 'rl', 0x23, :r_a
|
36
|
+
addop 'rlc', 0x33, :r_a
|
37
|
+
|
38
|
+
addop 'jc', 0x40, :rel8
|
39
|
+
addop 'jnc', 0x50, :rel8
|
40
|
+
addop 'jz', 0x60, :rel8
|
41
|
+
addop 'jnz', 0x70, :rel8
|
42
|
+
addop 'sjmp', 0x80, :rel8
|
43
|
+
|
44
|
+
addop 'div', 0x84, :r_a, :r_b
|
45
|
+
addop 'mul', 0xa4, :r_a, :r_b
|
46
|
+
|
47
|
+
addop 'push', 0xc0, :m8
|
48
|
+
addop 'pop', 0xd0, :m8
|
49
|
+
|
50
|
+
addop 'clr', 0xc3, :r_c
|
51
|
+
addop 'clr', 0xe4, :r_a
|
52
|
+
addop 'cpl', 0xb3, :r_c
|
53
|
+
addop 'cpl', 0xf4, :r_a
|
54
|
+
addop 'da', 0xd4
|
55
|
+
|
56
|
+
addop 'ajmp', 0x01, :addr_11
|
57
|
+
addop 'acall', 0x11, :addr_11
|
58
|
+
addop 'ljmp', 0x02, :addr_16
|
59
|
+
addop 'lcall', 0x12, :addr_16
|
60
|
+
|
61
|
+
addop 'inc', 0x04, :r_a
|
62
|
+
addop 'inc', 0x05, :m8
|
63
|
+
addop 'inc', 0x00, :rd
|
64
|
+
|
65
|
+
addop 'dec', 0x14, :r_a
|
66
|
+
addop 'dec', 0x15, :m8
|
67
|
+
addop 'dec', 0x10, :rd
|
68
|
+
|
69
|
+
addop 'add', 0x24, :r_a, :d8
|
70
|
+
addop 'add', 0x25, :r_a, :m8
|
71
|
+
addop 'add', 0x20, :r_a, :rd
|
72
|
+
|
73
|
+
addop 'addc', 0x34, :r_a, :d8
|
74
|
+
addop 'addc', 0x35, :r_a, :m8
|
75
|
+
addop 'addc', 0x30, :r_a, :rd
|
76
|
+
|
77
|
+
addop 'orl', 0x42, :m8, :r_a
|
78
|
+
addop 'orl', 0x43, :m8, :d8
|
79
|
+
addop 'orl', 0x44, :r_a, :d8
|
80
|
+
addop 'orl', 0x45, :r_a, :m8
|
81
|
+
addop 'orl', 0x40, :r_a, :rd
|
82
|
+
|
83
|
+
addop 'anl', 0x52, :m8, :r_a
|
84
|
+
addop 'anl', 0x53, :m8, :d8
|
85
|
+
addop 'anl', 0x54, :r_a, :d8
|
86
|
+
addop 'anl', 0x55, :r_a, :m8
|
87
|
+
addop 'anl', 0x50, :r_a, :rd
|
88
|
+
|
89
|
+
addop 'xrl', 0x62, :m8, :r_a
|
90
|
+
addop 'xrl', 0x63, :m8, :d8
|
91
|
+
addop 'xrl', 0x64, :r_a, :d8
|
92
|
+
addop 'xrl', 0x65, :r_a, :m8
|
93
|
+
addop 'xrl', 0x60, :r_a, :rd
|
94
|
+
|
95
|
+
addop 'mov', 0x74, :r_a, :d8
|
96
|
+
addop 'mov', 0x75, :m8, :d8
|
97
|
+
addop 'mov', 0x70, :rd, :d8
|
98
|
+
addop 'mov', 0xa0, :rd, :m8
|
99
|
+
addop 'mov', 0x85, :m8, :m8
|
100
|
+
addop 'mov', 0x80, :m8, :rd
|
101
|
+
addop 'mov', 0xe0, :r_a, :rd
|
102
|
+
addop 'mov', 0xf0, :rd, :r_a
|
103
|
+
|
104
|
+
addop 'subb', 0x94, :r_a, :d8
|
105
|
+
addop 'subb', 0x95, :r_a, :m8
|
106
|
+
addop 'subb', 0x90, :r_a, :rd
|
107
|
+
|
108
|
+
addop 'cnje', 0xb4, :r_a, :d8, :rel8
|
109
|
+
addop 'cnje', 0xb5, :r_a, :m8, :rel8
|
110
|
+
addop 'cnje', 0xb0, :rd, :d8, :rel8
|
111
|
+
|
112
|
+
addop 'xch', 0xc5, :r_a, :m8
|
113
|
+
addop 'xch', 0xc0, :r_a, :rd
|
114
|
+
|
115
|
+
addop 'djnz', 0xd5, :m8, :rel8
|
116
|
+
addop 'djnz', 0xd0, :rd, :rel8
|
117
|
+
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
data/metasm/cpu/mips/decode.rb
CHANGED
@@ -149,11 +149,12 @@ class MIPS
|
|
149
149
|
when 'and', 'andi'; lambda { |di, a0, a1, a2| { a0 => Expression[a1, :&, a2] } }
|
150
150
|
when 'or', 'ori'; lambda { |di, a0, a1, a2| { a0 => Expression[a1, :|, a2] } }
|
151
151
|
when 'nor'; lambda { |di, a0, a1, a2| { a0 => Expression[:~, [a1, :|, a2]] } }
|
152
|
+
when 'not'; lambda { |di, a0, a1| { a0 => Expression[:~, a1] } }
|
152
153
|
when 'xor'; lambda { |di, a0, a1, a2| { a0 => Expression[a1, :^, a2] } }
|
153
154
|
when 'sll', 'sllv'; lambda { |di, a0, a1, a2| { a0 => Expression[a1, :>>, a2] } }
|
154
155
|
when 'srl', 'srlv', 'sra', 'srav'; lambda { |di, a0, a1, a2| { a0 => Expression[a1, :<<, a2] } } # XXX sign-extend
|
155
|
-
when 'lw';
|
156
|
-
when 'sw';
|
156
|
+
when 'lw', 'lwl', 'lwr'; lambda { |di, a0, a1| { a0 => Expression[a1] } }
|
157
|
+
when 'sw', 'swl', 'swr'; lambda { |di, a0, a1| { a1 => Expression[a0] } }
|
157
158
|
when 'lh', 'lhu'; lambda { |di, a0, a1| { a0 => Expression[a1] } } # XXX sign-extend
|
158
159
|
when 'sh'; lambda { |di, a0, a1| { a1 => Expression[a0] } }
|
159
160
|
when 'lb', 'lbu'; lambda { |di, a0, a1| { a0 => Expression[a1] } }
|
@@ -161,8 +162,8 @@ class MIPS
|
|
161
162
|
when /^slti?u?/; lambda { |di, a0, a1, a2| { a0 => Expression[a1, :<, a2] } } # XXX signedness
|
162
163
|
when 'mfhi'; lambda { |di, a0| { a0 => Expression[:hi] } }
|
163
164
|
when 'mflo'; lambda { |di, a0| { a0 => Expression[:lo] } }
|
164
|
-
when 'mult'; lambda { |di, a0, a1| { :hi => Expression[[a0, :*, a1], :>>, 32], :lo => Expression[[a0, :*, a1], :&, 0xffff_ffff] } }
|
165
|
-
when 'div'; lambda { |di, a0, a1| { :hi => Expression[a0, :%, a1], :lo => Expression[a0, :/, a1] } }
|
165
|
+
when 'mult', 'multu'; lambda { |di, a0, a1| { :hi => Expression[[a0, :*, a1], :>>, 32], :lo => Expression[[a0, :*, a1], :&, 0xffff_ffff] } }
|
166
|
+
when 'div', 'divu'; lambda { |di, a0, a1| { :hi => Expression[a0, :%, a1], :lo => Expression[a0, :/, a1] } }
|
166
167
|
when 'jal', 'jalr'; lambda { |di, a0| { :$ra => Expression[Expression[di.address, :+, 2*di.bin_length].reduce] } }
|
167
168
|
when 'li', 'mov'; lambda { |di, a0, a1| { a0 => Expression[a1] } }
|
168
169
|
when 'syscall'; lambda { |di, *a| { :$v0 => Expression::Unknown } }
|
data/metasm/cpu/st20.rb
ADDED
@@ -0,0 +1,9 @@
|
|
1
|
+
# This file is part of Metasm, the Ruby assembly manipulation suite
|
2
|
+
# Copyright (C) 2006-2009 Yoann GUILLOT
|
3
|
+
#
|
4
|
+
# Licence is LGPL, see LICENCE in the top-level directory
|
5
|
+
|
6
|
+
|
7
|
+
require 'metasm/main'
|
8
|
+
require 'metasm/cpu/st20/decode'
|
9
|
+
require 'metasm/cpu/st20/decompile'
|
@@ -0,0 +1,180 @@
|
|
1
|
+
# This file is part of Metasm, the Ruby assembly manipulation suite
|
2
|
+
# Copyright (C) 2006-2009 Yoann GUILLOT
|
3
|
+
#
|
4
|
+
# Licence is LGPL, see LICENCE in the top-level directory
|
5
|
+
|
6
|
+
|
7
|
+
require 'metasm/cpu/st20/opcodes'
|
8
|
+
require 'metasm/decode'
|
9
|
+
|
10
|
+
module Metasm
|
11
|
+
class ST20
|
12
|
+
# decodes the instruction at edata.ptr, mapped at virtual address off
|
13
|
+
def decode_instruction(edata, addr)
|
14
|
+
return if edata.ptr >= edata.length
|
15
|
+
di = DecodedInstruction.new self
|
16
|
+
di.address = addr
|
17
|
+
di = decode_instr_op(edata, di)
|
18
|
+
decode_instr_interpret(di, addr)
|
19
|
+
end
|
20
|
+
|
21
|
+
def decode_instr_op(edata, di, pfx=0)
|
22
|
+
# decode one byte from the bitstream, recurse if the byte is a prefix
|
23
|
+
|
24
|
+
if edata.ptr >= edata.length or di.bin_length >= 4
|
25
|
+
di.instruction.args << Expression[pfx]
|
26
|
+
return di
|
27
|
+
end
|
28
|
+
|
29
|
+
# bytestream structure :
|
30
|
+
# sequence of prefixes, which build a word 4 bits at a time
|
31
|
+
# last element = function code
|
32
|
+
# 'opr' is a special function, means use the prefix word as an opcode number from 'operate'
|
33
|
+
byte = edata.read(1).unpack('C')[0]
|
34
|
+
fcode = byte & 0xf0
|
35
|
+
arg = byte & 0x0f
|
36
|
+
pfx = (pfx << 4) | arg
|
37
|
+
di.opcode = @opcode_list[fcode >> 4]
|
38
|
+
di.instruction.opname = di.opcode.name
|
39
|
+
di.bin_length += 1
|
40
|
+
|
41
|
+
case di.instruction.opname
|
42
|
+
when 'pfix'
|
43
|
+
return decode_instr_op(edata, di, pfx)
|
44
|
+
|
45
|
+
when 'nfix'
|
46
|
+
pfx ^= -1
|
47
|
+
di.instruction.opname = 'pfix' # will be displayed on EOS, and we cannot represent the whole decoded pfx with 'nfix'
|
48
|
+
return decode_instr_op(edata, di, pfx)
|
49
|
+
|
50
|
+
when 'opr'
|
51
|
+
if op = @op_operate[pfx]
|
52
|
+
# operands have no arg (they work on the implicit 3-register stack A B C)
|
53
|
+
di.instruction.opname = op
|
54
|
+
di.opcode = @opc_operate[op] || di.opcode
|
55
|
+
else
|
56
|
+
# unknown operand, keep the generic form
|
57
|
+
di.instruction.args << Expression[pfx]
|
58
|
+
end
|
59
|
+
else
|
60
|
+
di.instruction.args << Expression[pfx]
|
61
|
+
end
|
62
|
+
|
63
|
+
di
|
64
|
+
end
|
65
|
+
|
66
|
+
def decode_instr_interpret(di, addr)
|
67
|
+
case di.instruction.opname
|
68
|
+
when 'j', 'cj', 'fcall'
|
69
|
+
delta = di.instruction.args.last.reduce
|
70
|
+
arg = Expression[[addr, :+, di.bin_length], :+, delta].reduce
|
71
|
+
di.instruction.args[-1] = Expression[arg]
|
72
|
+
end
|
73
|
+
|
74
|
+
di
|
75
|
+
end
|
76
|
+
|
77
|
+
def get_backtrace_binding(di)
|
78
|
+
arg = di.instruction.args[0]
|
79
|
+
sz = @size/8
|
80
|
+
unk = Expression::Unknown
|
81
|
+
case di.instruction.opname
|
82
|
+
when 'j'; {}
|
83
|
+
when 'ldlp'; { :a => Expression[:wspace, :+, [sz, :*, arg]], :b => :a, :c => :b }
|
84
|
+
when 'ldnl'; { :a => Indirection[[:a, :+, [sz, :*, arg]], sz, di] }
|
85
|
+
when 'ldc'; { :a => arg, :b => :a, :c => :b }
|
86
|
+
when 'ldnlp'; { :a => Expression[:a, :+, [sz, :*, arg]] }
|
87
|
+
when 'ldl'; { :a => Indirection[[:wspace, :+, [sz, :*, arg]], sz, di], :b => :a, :c => :b }
|
88
|
+
when 'adc'; { :a => Expression[:a, :+, arg] }
|
89
|
+
when 'fcall'; {
|
90
|
+
:a => Expression[di.next_addr],
|
91
|
+
:wspace => Expression[:wspace, :-, [4*sz]],
|
92
|
+
Indirection[[:wspace, :-, [4*sz]], sz, di] => di.next_addr,
|
93
|
+
Indirection[[:wspace, :-, [3*sz]], sz, di] => :a,
|
94
|
+
Indirection[[:wspace, :-, [2*sz]], sz, di] => :b,
|
95
|
+
Indirection[[:wspace, :-, [1*sz]], sz, di] => :c,
|
96
|
+
}
|
97
|
+
# cj+(:a != 0) => a=b, b=c, c=unk ; (:a == 0) => jump, a=a, b=b, c=c
|
98
|
+
when 'cj'; { :a => unk, :b => unk, :c => unk }
|
99
|
+
when 'ajw'; { :wspace => Expression[:wspace, :+, [4, :*, arg]] }
|
100
|
+
when 'eqc'; { :a => Expression[:a, :==, arg] }
|
101
|
+
when 'stl'; { Indirection[[:wspace, :+, [sz, :*, arg]], sz, di] => :a, :a => :b, :b => :c, :c => unk }
|
102
|
+
when 'stnl'; { Indirection[[:a, :+, [sz, :*, arg]], sz, di] => :b, :a => :c, :b => unk, :c => unk }
|
103
|
+
|
104
|
+
when 'add'; { :a => Expression[:b, :+, :a], :b => :c, :c => unk }
|
105
|
+
when 'sub'; { :a => Expression[:b, :-, :a], :b => :c, :c => unk }
|
106
|
+
when 'prod'; { :a => Expression[:b, :*, :a], :b => :c, :c => unk }
|
107
|
+
when 'xor'; { :a => Expression[:b, :^, :a], :b => :c, :c => unk }
|
108
|
+
when 'ldpi'; { :a => Indirection[[di.next_addr, :+, :a], sz, di] }
|
109
|
+
when 'mint'; { :a => Expression[-1 << (@size-1)], :b => :a, :c => :b }
|
110
|
+
when 'in'; { :a => unk, :b => unk, :c => unk } # read a bytes from channel b at buffer c
|
111
|
+
when 'out'; { :a => unk, :b => unk, :c => unk } # write a bytes to channel b from buffer c
|
112
|
+
when 'lb'; { :a => Indirection[:a, 1, di] }
|
113
|
+
when 'sb'; { Indirection[:a, 1, di] => Expression[:b, :&, 0xff], :a => :c, :b => unk, :c => unk }
|
114
|
+
when 'bsub'; { :a => Expression[:a, :+, :b], :b => :c, :c => unk }
|
115
|
+
when 'ssub'; { :a => Expression[:a, :+, [2, :*, :b]], :b => :c, :c => unk }
|
116
|
+
when 'wsub'; { :a => Expression[:a, :+, [sz, :*, :b]], :b => :c, :c => unk }
|
117
|
+
when 'gajw'; { :wspace => Expression[:a], :a => Expression[:wspace] }
|
118
|
+
when 'dup'; { :b => :a, :c => :b }
|
119
|
+
else
|
120
|
+
puts "unhandled instruction to backtrace: #{di}" if $VERBOSE
|
121
|
+
{ :incomplete_binding => Expression[1], :a => unk, :b => unk, :c => unk }
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
def get_xrefs_x(dasm, di)
|
126
|
+
return [] if not di.opcode.props[:setip]
|
127
|
+
|
128
|
+
case di.opcode.basename
|
129
|
+
when 'j', 'cj'
|
130
|
+
[Expression[di.instruction.args.first]]
|
131
|
+
#when 'ret'
|
132
|
+
#[Indirection[:sp, 2, di.address]]
|
133
|
+
else
|
134
|
+
[]
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
# checks if expr is a valid return expression matching the :saveip instruction
|
139
|
+
def backtrace_is_function_return(expr, di=nil)
|
140
|
+
expr = Expression[expr].reduce_rec
|
141
|
+
expr.kind_of?(Indirection) and expr.len == 2 and expr.target == Expression[:sp]
|
142
|
+
end
|
143
|
+
|
144
|
+
# updates the function backtrace_binding
|
145
|
+
def backtrace_update_function_binding(dasm, faddr, f, retaddrlist, *wantregs)
|
146
|
+
b = f.backtrace_binding
|
147
|
+
|
148
|
+
bt_val = lambda { |r|
|
149
|
+
next if not retaddrlist
|
150
|
+
b[r] = Expression::Unknown
|
151
|
+
bt = []
|
152
|
+
retaddrlist.each { |retaddr|
|
153
|
+
bt |= dasm.backtrace(Expression[r], retaddr, :include_start => true,
|
154
|
+
:snapshot_addr => faddr, :origin => retaddr)
|
155
|
+
}
|
156
|
+
if bt.length != 1
|
157
|
+
b[r] = Expression::Unknown
|
158
|
+
else
|
159
|
+
b[r] = bt.first
|
160
|
+
end
|
161
|
+
}
|
162
|
+
|
163
|
+
wantregs.each(&bt_val)
|
164
|
+
|
165
|
+
b
|
166
|
+
end
|
167
|
+
|
168
|
+
# returns true if the expression is an address on the stack
|
169
|
+
def backtrace_is_stack_address(expr)
|
170
|
+
Expression[expr].expr_externals.include?(:sp)
|
171
|
+
end
|
172
|
+
|
173
|
+
# updates an instruction's argument replacing an expression with another (eg label renamed)
|
174
|
+
def replace_instr_arg_immediate(i, old, new)
|
175
|
+
i.args.map! { |a|
|
176
|
+
a == old ? new : Expression[a.bind(old => new).reduce]
|
177
|
+
}
|
178
|
+
end
|
179
|
+
end
|
180
|
+
end
|
@@ -0,0 +1,283 @@
|
|
1
|
+
# This file is part of Metasm, the Ruby assembly manipulation suite
|
2
|
+
# Copyright (C) 2006-2009 Yoann GUILLOT
|
3
|
+
#
|
4
|
+
# Licence is LGPL, see LICENCE in the top-level directory
|
5
|
+
|
6
|
+
|
7
|
+
require 'metasm/cpu/st20/main'
|
8
|
+
|
9
|
+
module Metasm
|
10
|
+
class ST20
|
11
|
+
# temporarily setup dasm.address_binding so that backtracking
|
12
|
+
# stack-related offsets resolve in :frameptr (relative to func start)
|
13
|
+
def decompile_makestackvars(dasm, funcstart, blocks)
|
14
|
+
oldfuncbd = dasm.address_binding[funcstart]
|
15
|
+
dasm.address_binding[funcstart] = { :wspace => :frameptr }
|
16
|
+
blocks.each { |block| yield block }
|
17
|
+
dasm.address_binding[funcstart] = oldfuncbd
|
18
|
+
end
|
19
|
+
|
20
|
+
# add di-specific registry written/accessed
|
21
|
+
def decompile_func_finddeps_di(dcmp, func, di, a, w)
|
22
|
+
case di.instruction.opname
|
23
|
+
when 'ret'
|
24
|
+
a << :a if not func.type.kind_of? C::BaseType or func.type.type.name != :void # standard ABI
|
25
|
+
when 'in', 'out'
|
26
|
+
a << :a << :b << :c
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
# list variable dependency for each block, remove useless writes
|
31
|
+
# returns { blockaddr => [list of vars that are needed by a following block] }
|
32
|
+
def decompile_func_finddeps(dcmp, blocks, func)
|
33
|
+
deps_r = {} ; deps_w = {} ; deps_to = {}
|
34
|
+
deps_subfunc = {} # things read/written by subfuncs
|
35
|
+
|
36
|
+
# find read/writes by each block
|
37
|
+
blocks.each { |b, to|
|
38
|
+
deps_r[b] = [] ; deps_w[b] = [] ; deps_to[b] = to
|
39
|
+
deps_subfunc[b] = []
|
40
|
+
|
41
|
+
blk = dcmp.dasm.decoded[b].block
|
42
|
+
blk.list.each { |di|
|
43
|
+
a = di.backtrace_binding.values
|
44
|
+
w = []
|
45
|
+
di.backtrace_binding.keys.each { |k|
|
46
|
+
case k
|
47
|
+
when ::Symbol; w |= [k]
|
48
|
+
else a |= Expression[k].externals
|
49
|
+
end
|
50
|
+
}
|
51
|
+
decompile_func_finddeps_di(dcmp, func, di, a, w)
|
52
|
+
|
53
|
+
deps_r[b] |= a.map { |ee| Expression[ee].externals.grep(::Symbol) }.flatten - [:unknown] - deps_w[b]
|
54
|
+
deps_w[b] |= w.map { |ee| Expression[ee].externals.grep(::Symbol) }.flatten - [:unknown]
|
55
|
+
}
|
56
|
+
blk.each_to_normal { |t|
|
57
|
+
t = dcmp.backtrace_target(t, blk.list.last.address)
|
58
|
+
next if not t = dcmp.c_parser.toplevel.symbol[t]
|
59
|
+
t.type = C::Function.new(C::BaseType.new(:int)) if not t.type.kind_of? C::Function
|
60
|
+
t.type.args.to_a.each { |arg|
|
61
|
+
if reg = arg.has_attribute('register')
|
62
|
+
deps_subfunc[b] |= [reg.to_sym]
|
63
|
+
end
|
64
|
+
}
|
65
|
+
}
|
66
|
+
}
|
67
|
+
|
68
|
+
bt = blocks.transpose
|
69
|
+
roots = bt[0] - bt[1].flatten # XXX jmp 1stblock ?
|
70
|
+
|
71
|
+
# find regs read and never written (must have been set by caller and are part of the func ABI)
|
72
|
+
uninitialized = lambda { |b, r, done|
|
73
|
+
if not deps_r[b]
|
74
|
+
elsif deps_r[b].include?(r)
|
75
|
+
true
|
76
|
+
elsif deps_w[b].include?(r)
|
77
|
+
else
|
78
|
+
done << b
|
79
|
+
(deps_to[b] - done).find { |tb| uninitialized[tb, r, done] }
|
80
|
+
end
|
81
|
+
}
|
82
|
+
|
83
|
+
regargs = []
|
84
|
+
register_symbols.each { |r|
|
85
|
+
if roots.find { |root| uninitialized[root, r, []] }
|
86
|
+
regargs << r
|
87
|
+
end
|
88
|
+
}
|
89
|
+
|
90
|
+
# TODO honor user-defined prototype if available (eg no, really, eax is not read in this function returning al)
|
91
|
+
regargs.sort_by { |r| r.to_s }.each { |r|
|
92
|
+
a = C::Variable.new(r.to_s, C::BaseType.new(:int, :unsigned))
|
93
|
+
a.add_attribute("register(#{r})")
|
94
|
+
func.type.args << a
|
95
|
+
}
|
96
|
+
|
97
|
+
# remove writes from a block if no following block read the value
|
98
|
+
dw = {}
|
99
|
+
deps_w.each { |b, deps|
|
100
|
+
dw[b] = deps.reject { |dep|
|
101
|
+
ret = true
|
102
|
+
done = []
|
103
|
+
todo = deps_to[b].dup
|
104
|
+
while a = todo.pop
|
105
|
+
next if done.include? a
|
106
|
+
done << a
|
107
|
+
if not deps_r[a] or deps_r[a].include? dep
|
108
|
+
ret = false
|
109
|
+
break
|
110
|
+
elsif not deps_w[a].include? dep
|
111
|
+
todo.concat deps_to[a]
|
112
|
+
end
|
113
|
+
end
|
114
|
+
ret
|
115
|
+
}
|
116
|
+
}
|
117
|
+
|
118
|
+
dw
|
119
|
+
end
|
120
|
+
|
121
|
+
def abi_funcall
|
122
|
+
{ :retval => :a, :changed => register_symbols }
|
123
|
+
end
|
124
|
+
|
125
|
+
def decompile_blocks(dcmp, myblocks, deps, func, nextaddr = nil)
|
126
|
+
scope = func.initializer
|
127
|
+
func.type.args.each { |a| scope.symbol[a.name] = a }
|
128
|
+
stmts = scope.statements
|
129
|
+
blocks_toclean = myblocks.dup
|
130
|
+
until myblocks.empty?
|
131
|
+
b, to = myblocks.shift
|
132
|
+
if l = dcmp.dasm.get_label_at(b)
|
133
|
+
stmts << C::Label.new(l)
|
134
|
+
end
|
135
|
+
|
136
|
+
# list of assignments [[dest reg, expr assigned]]
|
137
|
+
ops = []
|
138
|
+
# reg binding (reg => value, values.externals = regs at block start)
|
139
|
+
binding = {}
|
140
|
+
# Expr => CExpr
|
141
|
+
ce = lambda { |*e| dcmp.decompile_cexpr(Expression[Expression[*e].reduce], scope) }
|
142
|
+
# Expr => Expr.bind(binding) => CExpr
|
143
|
+
ceb = lambda { |*e| ce[Expression[*e].bind(binding)] }
|
144
|
+
|
145
|
+
# dumps a CExprs that implements an assignment to a reg (uses ops[], patches op => [reg, nil])
|
146
|
+
commit = lambda {
|
147
|
+
deps[b].map { |k|
|
148
|
+
[k, ops.rindex(ops.reverse.find { |r, v| r == k })]
|
149
|
+
}.sort_by { |k, i| i.to_i }.each { |k, i|
|
150
|
+
next if not i or not binding[k]
|
151
|
+
e = k
|
152
|
+
final = []
|
153
|
+
ops[0..i].reverse_each { |r, v|
|
154
|
+
final << r if not v
|
155
|
+
e = Expression[e].bind(r => v).reduce if not final.include? r
|
156
|
+
}
|
157
|
+
ops[i][1] = nil
|
158
|
+
binding.delete k
|
159
|
+
stmts << ce[k, :'=', e] if k != e
|
160
|
+
}
|
161
|
+
}
|
162
|
+
|
163
|
+
# returns an array to use as funcall arguments
|
164
|
+
get_func_args = lambda { |di, f|
|
165
|
+
# XXX see remarks in #finddeps
|
166
|
+
args_todo = f.type.args.to_a.dup
|
167
|
+
args = []
|
168
|
+
args_todo.each { |a_|
|
169
|
+
if r = a_.has_attribute_var('register')
|
170
|
+
args << Expression[r.to_sym]
|
171
|
+
else
|
172
|
+
args << Expression[0]
|
173
|
+
end
|
174
|
+
}
|
175
|
+
args.map { |e| ceb[e] }
|
176
|
+
}
|
177
|
+
|
178
|
+
# go !
|
179
|
+
dcmp.dasm.decoded[b].block.list.each_with_index { |di, didx|
|
180
|
+
if di.opcode.props[:setip] and not di.opcode.props[:stopexec]
|
181
|
+
# conditional jump
|
182
|
+
commit[]
|
183
|
+
n = dcmp.backtrace_target(get_xrefs_x(dcmp.dasm, di).first, di.address)
|
184
|
+
cc = ceb[:a, :'!=', 0]
|
185
|
+
# XXX switch/indirect/multiple jmp
|
186
|
+
stmts << C::If.new(C::CExpression[cc], C::Goto.new(n))
|
187
|
+
to.delete dcmp.dasm.normalize(n)
|
188
|
+
next
|
189
|
+
end
|
190
|
+
|
191
|
+
case di.instruction.opname
|
192
|
+
when 'ret'
|
193
|
+
commit[]
|
194
|
+
ret = nil
|
195
|
+
ret = C::CExpression[ceb[:a]] unless func.type.type.kind_of? C::BaseType and func.type.type.name == :void
|
196
|
+
stmts << C::Return.new(ret)
|
197
|
+
when 'fcall' # :saveip
|
198
|
+
n = dcmp.backtrace_target(get_xrefs_x(dcmp.dasm, di).first, di.address)
|
199
|
+
args = []
|
200
|
+
if f = dcmp.c_parser.toplevel.symbol[n] and f.type.kind_of? C::Function and f.type.args
|
201
|
+
args = get_func_args[di, f]
|
202
|
+
end
|
203
|
+
commit[]
|
204
|
+
#next if not di.block.to_subfuncret
|
205
|
+
|
206
|
+
if not n.kind_of? ::String or (f and not f.type.kind_of? C::Function)
|
207
|
+
# indirect funcall
|
208
|
+
fptr = ceb[n]
|
209
|
+
binding.delete n
|
210
|
+
proto = C::Function.new(C::BaseType.new(:int))
|
211
|
+
proto = f.type if f and f.type.kind_of? C::Function
|
212
|
+
f = C::CExpression[[fptr], C::Pointer.new(proto)]
|
213
|
+
elsif not f
|
214
|
+
# internal functions are predeclared, so this one is extern
|
215
|
+
f = C::Variable.new
|
216
|
+
f.name = n
|
217
|
+
f.type = C::Function.new(C::BaseType.new(:int))
|
218
|
+
if dcmp.recurse > 0
|
219
|
+
dcmp.c_parser.toplevel.symbol[n] = f
|
220
|
+
dcmp.c_parser.toplevel.statements << C::Declaration.new(f)
|
221
|
+
end
|
222
|
+
end
|
223
|
+
commit[]
|
224
|
+
binding.delete :a
|
225
|
+
e = C::CExpression[f, :funcall, args]
|
226
|
+
e = C::CExpression[ce[:a], :'=', e, f.type.type] if deps[b].include? :a and f.type.type != C::BaseType.new(:void)
|
227
|
+
stmts << e
|
228
|
+
when 'in', 'out'
|
229
|
+
if not dcmp.c_parser.toplevel.symbol["intrinsic_#{di.instruction.opname}"]
|
230
|
+
dcmp.c_parser.parse("void intrinsic_#{di.instruction.opname}(unsigned int len, unsigned int channel, char *buf);")
|
231
|
+
end
|
232
|
+
f = dcmp.c_parser.toplevel.symbol["intrinsic_#{di.instruction.opname}"]
|
233
|
+
stmts << C::CExpression.new(f, :funcall, [ceb[:a], ceb[:b], ceb[:c]], f.type.type)
|
234
|
+
else
|
235
|
+
bd = get_fwdemu_binding(di)
|
236
|
+
if di.backtrace_binding[:incomplete_binding]
|
237
|
+
commit[]
|
238
|
+
stmts << C::Asm.new(di.instruction.to_s, nil, nil, nil, nil, nil)
|
239
|
+
else
|
240
|
+
update = {}
|
241
|
+
bd.each { |k, v|
|
242
|
+
if k.kind_of? ::Symbol and not deps[b].include? k
|
243
|
+
ops << [k, v]
|
244
|
+
update[k] = Expression[Expression[v].bind(binding).reduce]
|
245
|
+
else
|
246
|
+
stmts << ceb[k, :'=', v]
|
247
|
+
stmts.pop if stmts.last.kind_of? C::Variable # [:eflag_s, :=, :unknown].reduce
|
248
|
+
end
|
249
|
+
}
|
250
|
+
binding.update update
|
251
|
+
end
|
252
|
+
end
|
253
|
+
}
|
254
|
+
commit[]
|
255
|
+
|
256
|
+
case to.length
|
257
|
+
when 0
|
258
|
+
if not myblocks.empty? and not %w[ret jmp].include? dcmp.dasm.decoded[b].block.list.last.instruction.opname
|
259
|
+
puts " block #{Expression[b]} has no to and don't end in ret"
|
260
|
+
end
|
261
|
+
when 1
|
262
|
+
if (myblocks.empty? ? nextaddr != to[0] : myblocks.first.first != to[0])
|
263
|
+
stmts << C::Goto.new(dcmp.dasm.auto_label_at(to[0], 'unknown_goto'))
|
264
|
+
end
|
265
|
+
else
|
266
|
+
puts " block #{Expression[b]} with multiple to"
|
267
|
+
end
|
268
|
+
end
|
269
|
+
|
270
|
+
# cleanup di.bt_binding (we set :frameptr etc in those, this may confuse the dasm)
|
271
|
+
blocks_toclean.each { |b_, to_|
|
272
|
+
dcmp.dasm.decoded[b_].block.list.each { |di|
|
273
|
+
di.backtrace_binding = nil
|
274
|
+
}
|
275
|
+
}
|
276
|
+
end
|
277
|
+
|
278
|
+
def decompile_check_abi(dcmp, entry, func)
|
279
|
+
a = func.type.args || []
|
280
|
+
a.delete_if { |arg| arg.has_attribute_var('register') and arg.has_attribute('unused') }
|
281
|
+
end
|
282
|
+
end
|
283
|
+
end
|