metasm 1.0.2 → 1.0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/Gemfile +1 -0
  4. data/doc/code_organisation.txt +1 -1
  5. data/metasm.gemspec +1 -1
  6. data/metasm.rb +2 -1
  7. data/metasm/cpu/arc/decode.rb +3 -3
  8. data/metasm/cpu/arm/decode.rb +2 -2
  9. data/metasm/cpu/ia32/compile_c.rb +18 -2
  10. data/metasm/cpu/ia32/decode.rb +9 -4
  11. data/metasm/cpu/ia32/decompile.rb +22 -8
  12. data/metasm/cpu/ia32/opcodes.rb +5 -5
  13. data/metasm/cpu/mcs51.rb +8 -0
  14. data/metasm/cpu/mcs51/decode.rb +99 -0
  15. data/metasm/cpu/mcs51/main.rb +76 -0
  16. data/metasm/cpu/mcs51/opcodes.rb +120 -0
  17. data/metasm/cpu/mips/decode.rb +5 -4
  18. data/metasm/cpu/st20.rb +9 -0
  19. data/metasm/cpu/st20/decode.rb +180 -0
  20. data/metasm/cpu/st20/decompile.rb +283 -0
  21. data/metasm/cpu/st20/main.rb +37 -0
  22. data/metasm/cpu/st20/opcodes.rb +140 -0
  23. data/metasm/cpu/x86_64/encode.rb +4 -2
  24. data/metasm/cpu/x86_64/opcodes.rb +4 -2
  25. data/metasm/decode.rb +16 -15
  26. data/metasm/decompile.rb +1 -1
  27. data/metasm/disassemble.rb +3 -1
  28. data/metasm/disassemble_api.rb +3 -1
  29. data/metasm/dynldr.rb +9 -3
  30. data/metasm/encode.rb +2 -2
  31. data/metasm/exe_format/coff.rb +3 -1
  32. data/metasm/exe_format/coff_decode.rb +5 -3
  33. data/metasm/exe_format/elf.rb +4 -0
  34. data/metasm/exe_format/elf_decode.rb +1 -2
  35. data/metasm/exe_format/elf_encode.rb +4 -1
  36. data/metasm/exe_format/macho.rb +20 -6
  37. data/metasm/exe_format/pe.rb +1 -1
  38. data/metasm/exe_format/serialstruct.rb +1 -1
  39. data/metasm/gui.rb +1 -1
  40. data/metasm/gui/dasm_hex.rb +2 -2
  41. data/metasm/gui/dasm_main.rb +8 -8
  42. data/metasm/gui/debug.rb +4 -4
  43. data/metasm/gui/gtk.rb +1 -1
  44. data/metasm/gui/qt.rb +2 -2
  45. data/metasm/gui/win32.rb +1 -1
  46. data/metasm/main.rb +11 -6
  47. data/metasm/os/windows.rb +26 -23
  48. data/misc/hexdump.rb +2 -2
  49. data/misc/objdiff.rb +4 -1
  50. data/misc/objscan.rb +1 -1
  51. data/samples/dasm-plugins/bindiff.rb +1 -1
  52. data/samples/dasm-plugins/scanxrefs.rb +2 -1
  53. data/samples/dynamic_ruby.rb +24 -25
  54. data/samples/elfencode.rb +15 -0
  55. data/samples/exeencode.rb +2 -2
  56. data/samples/metasm-shell.rb +67 -55
  57. data/tests/mcs51.rb +27 -0
  58. metadata +13 -2
@@ -0,0 +1,120 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2015-2016 Google
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+ require 'metasm/cpu/mcs51/main'
7
+
8
+ module Metasm
9
+
10
+ class MCS51
11
+ def addop(name, bin, *args)
12
+ o = Opcode.new name, bin
13
+ args.each { |a|
14
+ o.args << a if @fields_mask[a] or @valid_args[a]
15
+ o.fields[a] = @fields_shift[a] if @fields_mask[a]
16
+ raise "unknown #{a.inspect}" unless @valid_args[a] or @fields_mask[a]
17
+ }
18
+ @opcode_list << o
19
+ end
20
+
21
+ def init_mcs51
22
+ @opcode_list = []
23
+ @valid_args.update [:rd, :r_a, :r_b, :r_c, :d8, :rel8, :m8,
24
+ :addr_11, :addr_16].inject({}) { |h, v| h.update v => true }
25
+ @fields_mask.update :rd => 15, :addr_11 => 7
26
+ @fields_shift.update :rd => 0, :addr_11 => 5
27
+
28
+ addop 'nop', 0x00
29
+ addop 'ret', 0x22
30
+ addop 'reti', 0x32
31
+ addop 'swap', 0xc4, :r_a
32
+ addop '???', 0xa5
33
+ addop 'rr', 0x03, :r_a
34
+ addop 'rrc', 0x13, :r_a
35
+ addop 'rl', 0x23, :r_a
36
+ addop 'rlc', 0x33, :r_a
37
+
38
+ addop 'jc', 0x40, :rel8
39
+ addop 'jnc', 0x50, :rel8
40
+ addop 'jz', 0x60, :rel8
41
+ addop 'jnz', 0x70, :rel8
42
+ addop 'sjmp', 0x80, :rel8
43
+
44
+ addop 'div', 0x84, :r_a, :r_b
45
+ addop 'mul', 0xa4, :r_a, :r_b
46
+
47
+ addop 'push', 0xc0, :m8
48
+ addop 'pop', 0xd0, :m8
49
+
50
+ addop 'clr', 0xc3, :r_c
51
+ addop 'clr', 0xe4, :r_a
52
+ addop 'cpl', 0xb3, :r_c
53
+ addop 'cpl', 0xf4, :r_a
54
+ addop 'da', 0xd4
55
+
56
+ addop 'ajmp', 0x01, :addr_11
57
+ addop 'acall', 0x11, :addr_11
58
+ addop 'ljmp', 0x02, :addr_16
59
+ addop 'lcall', 0x12, :addr_16
60
+
61
+ addop 'inc', 0x04, :r_a
62
+ addop 'inc', 0x05, :m8
63
+ addop 'inc', 0x00, :rd
64
+
65
+ addop 'dec', 0x14, :r_a
66
+ addop 'dec', 0x15, :m8
67
+ addop 'dec', 0x10, :rd
68
+
69
+ addop 'add', 0x24, :r_a, :d8
70
+ addop 'add', 0x25, :r_a, :m8
71
+ addop 'add', 0x20, :r_a, :rd
72
+
73
+ addop 'addc', 0x34, :r_a, :d8
74
+ addop 'addc', 0x35, :r_a, :m8
75
+ addop 'addc', 0x30, :r_a, :rd
76
+
77
+ addop 'orl', 0x42, :m8, :r_a
78
+ addop 'orl', 0x43, :m8, :d8
79
+ addop 'orl', 0x44, :r_a, :d8
80
+ addop 'orl', 0x45, :r_a, :m8
81
+ addop 'orl', 0x40, :r_a, :rd
82
+
83
+ addop 'anl', 0x52, :m8, :r_a
84
+ addop 'anl', 0x53, :m8, :d8
85
+ addop 'anl', 0x54, :r_a, :d8
86
+ addop 'anl', 0x55, :r_a, :m8
87
+ addop 'anl', 0x50, :r_a, :rd
88
+
89
+ addop 'xrl', 0x62, :m8, :r_a
90
+ addop 'xrl', 0x63, :m8, :d8
91
+ addop 'xrl', 0x64, :r_a, :d8
92
+ addop 'xrl', 0x65, :r_a, :m8
93
+ addop 'xrl', 0x60, :r_a, :rd
94
+
95
+ addop 'mov', 0x74, :r_a, :d8
96
+ addop 'mov', 0x75, :m8, :d8
97
+ addop 'mov', 0x70, :rd, :d8
98
+ addop 'mov', 0xa0, :rd, :m8
99
+ addop 'mov', 0x85, :m8, :m8
100
+ addop 'mov', 0x80, :m8, :rd
101
+ addop 'mov', 0xe0, :r_a, :rd
102
+ addop 'mov', 0xf0, :rd, :r_a
103
+
104
+ addop 'subb', 0x94, :r_a, :d8
105
+ addop 'subb', 0x95, :r_a, :m8
106
+ addop 'subb', 0x90, :r_a, :rd
107
+
108
+ addop 'cnje', 0xb4, :r_a, :d8, :rel8
109
+ addop 'cnje', 0xb5, :r_a, :m8, :rel8
110
+ addop 'cnje', 0xb0, :rd, :d8, :rel8
111
+
112
+ addop 'xch', 0xc5, :r_a, :m8
113
+ addop 'xch', 0xc0, :r_a, :rd
114
+
115
+ addop 'djnz', 0xd5, :m8, :rel8
116
+ addop 'djnz', 0xd0, :rd, :rel8
117
+
118
+ end
119
+ end
120
+ end
@@ -149,11 +149,12 @@ class MIPS
149
149
  when 'and', 'andi'; lambda { |di, a0, a1, a2| { a0 => Expression[a1, :&, a2] } }
150
150
  when 'or', 'ori'; lambda { |di, a0, a1, a2| { a0 => Expression[a1, :|, a2] } }
151
151
  when 'nor'; lambda { |di, a0, a1, a2| { a0 => Expression[:~, [a1, :|, a2]] } }
152
+ when 'not'; lambda { |di, a0, a1| { a0 => Expression[:~, a1] } }
152
153
  when 'xor'; lambda { |di, a0, a1, a2| { a0 => Expression[a1, :^, a2] } }
153
154
  when 'sll', 'sllv'; lambda { |di, a0, a1, a2| { a0 => Expression[a1, :>>, a2] } }
154
155
  when 'srl', 'srlv', 'sra', 'srav'; lambda { |di, a0, a1, a2| { a0 => Expression[a1, :<<, a2] } } # XXX sign-extend
155
- when 'lw'; lambda { |di, a0, a1| { a0 => Expression[a1] } }
156
- when 'sw'; lambda { |di, a0, a1| { a1 => Expression[a0] } }
156
+ when 'lw', 'lwl', 'lwr'; lambda { |di, a0, a1| { a0 => Expression[a1] } }
157
+ when 'sw', 'swl', 'swr'; lambda { |di, a0, a1| { a1 => Expression[a0] } }
157
158
  when 'lh', 'lhu'; lambda { |di, a0, a1| { a0 => Expression[a1] } } # XXX sign-extend
158
159
  when 'sh'; lambda { |di, a0, a1| { a1 => Expression[a0] } }
159
160
  when 'lb', 'lbu'; lambda { |di, a0, a1| { a0 => Expression[a1] } }
@@ -161,8 +162,8 @@ class MIPS
161
162
  when /^slti?u?/; lambda { |di, a0, a1, a2| { a0 => Expression[a1, :<, a2] } } # XXX signedness
162
163
  when 'mfhi'; lambda { |di, a0| { a0 => Expression[:hi] } }
163
164
  when 'mflo'; lambda { |di, a0| { a0 => Expression[:lo] } }
164
- when 'mult'; lambda { |di, a0, a1| { :hi => Expression[[a0, :*, a1], :>>, 32], :lo => Expression[[a0, :*, a1], :&, 0xffff_ffff] } }
165
- when 'div'; lambda { |di, a0, a1| { :hi => Expression[a0, :%, a1], :lo => Expression[a0, :/, a1] } }
165
+ when 'mult', 'multu'; lambda { |di, a0, a1| { :hi => Expression[[a0, :*, a1], :>>, 32], :lo => Expression[[a0, :*, a1], :&, 0xffff_ffff] } }
166
+ when 'div', 'divu'; lambda { |di, a0, a1| { :hi => Expression[a0, :%, a1], :lo => Expression[a0, :/, a1] } }
166
167
  when 'jal', 'jalr'; lambda { |di, a0| { :$ra => Expression[Expression[di.address, :+, 2*di.bin_length].reduce] } }
167
168
  when 'li', 'mov'; lambda { |di, a0, a1| { a0 => Expression[a1] } }
168
169
  when 'syscall'; lambda { |di, *a| { :$v0 => Expression::Unknown } }
@@ -0,0 +1,9 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/main'
8
+ require 'metasm/cpu/st20/decode'
9
+ require 'metasm/cpu/st20/decompile'
@@ -0,0 +1,180 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/cpu/st20/opcodes'
8
+ require 'metasm/decode'
9
+
10
+ module Metasm
11
+ class ST20
12
+ # decodes the instruction at edata.ptr, mapped at virtual address off
13
+ def decode_instruction(edata, addr)
14
+ return if edata.ptr >= edata.length
15
+ di = DecodedInstruction.new self
16
+ di.address = addr
17
+ di = decode_instr_op(edata, di)
18
+ decode_instr_interpret(di, addr)
19
+ end
20
+
21
+ def decode_instr_op(edata, di, pfx=0)
22
+ # decode one byte from the bitstream, recurse if the byte is a prefix
23
+
24
+ if edata.ptr >= edata.length or di.bin_length >= 4
25
+ di.instruction.args << Expression[pfx]
26
+ return di
27
+ end
28
+
29
+ # bytestream structure :
30
+ # sequence of prefixes, which build a word 4 bits at a time
31
+ # last element = function code
32
+ # 'opr' is a special function, means use the prefix word as an opcode number from 'operate'
33
+ byte = edata.read(1).unpack('C')[0]
34
+ fcode = byte & 0xf0
35
+ arg = byte & 0x0f
36
+ pfx = (pfx << 4) | arg
37
+ di.opcode = @opcode_list[fcode >> 4]
38
+ di.instruction.opname = di.opcode.name
39
+ di.bin_length += 1
40
+
41
+ case di.instruction.opname
42
+ when 'pfix'
43
+ return decode_instr_op(edata, di, pfx)
44
+
45
+ when 'nfix'
46
+ pfx ^= -1
47
+ di.instruction.opname = 'pfix' # will be displayed on EOS, and we cannot represent the whole decoded pfx with 'nfix'
48
+ return decode_instr_op(edata, di, pfx)
49
+
50
+ when 'opr'
51
+ if op = @op_operate[pfx]
52
+ # operands have no arg (they work on the implicit 3-register stack A B C)
53
+ di.instruction.opname = op
54
+ di.opcode = @opc_operate[op] || di.opcode
55
+ else
56
+ # unknown operand, keep the generic form
57
+ di.instruction.args << Expression[pfx]
58
+ end
59
+ else
60
+ di.instruction.args << Expression[pfx]
61
+ end
62
+
63
+ di
64
+ end
65
+
66
+ def decode_instr_interpret(di, addr)
67
+ case di.instruction.opname
68
+ when 'j', 'cj', 'fcall'
69
+ delta = di.instruction.args.last.reduce
70
+ arg = Expression[[addr, :+, di.bin_length], :+, delta].reduce
71
+ di.instruction.args[-1] = Expression[arg]
72
+ end
73
+
74
+ di
75
+ end
76
+
77
+ def get_backtrace_binding(di)
78
+ arg = di.instruction.args[0]
79
+ sz = @size/8
80
+ unk = Expression::Unknown
81
+ case di.instruction.opname
82
+ when 'j'; {}
83
+ when 'ldlp'; { :a => Expression[:wspace, :+, [sz, :*, arg]], :b => :a, :c => :b }
84
+ when 'ldnl'; { :a => Indirection[[:a, :+, [sz, :*, arg]], sz, di] }
85
+ when 'ldc'; { :a => arg, :b => :a, :c => :b }
86
+ when 'ldnlp'; { :a => Expression[:a, :+, [sz, :*, arg]] }
87
+ when 'ldl'; { :a => Indirection[[:wspace, :+, [sz, :*, arg]], sz, di], :b => :a, :c => :b }
88
+ when 'adc'; { :a => Expression[:a, :+, arg] }
89
+ when 'fcall'; {
90
+ :a => Expression[di.next_addr],
91
+ :wspace => Expression[:wspace, :-, [4*sz]],
92
+ Indirection[[:wspace, :-, [4*sz]], sz, di] => di.next_addr,
93
+ Indirection[[:wspace, :-, [3*sz]], sz, di] => :a,
94
+ Indirection[[:wspace, :-, [2*sz]], sz, di] => :b,
95
+ Indirection[[:wspace, :-, [1*sz]], sz, di] => :c,
96
+ }
97
+ # cj+(:a != 0) => a=b, b=c, c=unk ; (:a == 0) => jump, a=a, b=b, c=c
98
+ when 'cj'; { :a => unk, :b => unk, :c => unk }
99
+ when 'ajw'; { :wspace => Expression[:wspace, :+, [4, :*, arg]] }
100
+ when 'eqc'; { :a => Expression[:a, :==, arg] }
101
+ when 'stl'; { Indirection[[:wspace, :+, [sz, :*, arg]], sz, di] => :a, :a => :b, :b => :c, :c => unk }
102
+ when 'stnl'; { Indirection[[:a, :+, [sz, :*, arg]], sz, di] => :b, :a => :c, :b => unk, :c => unk }
103
+
104
+ when 'add'; { :a => Expression[:b, :+, :a], :b => :c, :c => unk }
105
+ when 'sub'; { :a => Expression[:b, :-, :a], :b => :c, :c => unk }
106
+ when 'prod'; { :a => Expression[:b, :*, :a], :b => :c, :c => unk }
107
+ when 'xor'; { :a => Expression[:b, :^, :a], :b => :c, :c => unk }
108
+ when 'ldpi'; { :a => Indirection[[di.next_addr, :+, :a], sz, di] }
109
+ when 'mint'; { :a => Expression[-1 << (@size-1)], :b => :a, :c => :b }
110
+ when 'in'; { :a => unk, :b => unk, :c => unk } # read a bytes from channel b at buffer c
111
+ when 'out'; { :a => unk, :b => unk, :c => unk } # write a bytes to channel b from buffer c
112
+ when 'lb'; { :a => Indirection[:a, 1, di] }
113
+ when 'sb'; { Indirection[:a, 1, di] => Expression[:b, :&, 0xff], :a => :c, :b => unk, :c => unk }
114
+ when 'bsub'; { :a => Expression[:a, :+, :b], :b => :c, :c => unk }
115
+ when 'ssub'; { :a => Expression[:a, :+, [2, :*, :b]], :b => :c, :c => unk }
116
+ when 'wsub'; { :a => Expression[:a, :+, [sz, :*, :b]], :b => :c, :c => unk }
117
+ when 'gajw'; { :wspace => Expression[:a], :a => Expression[:wspace] }
118
+ when 'dup'; { :b => :a, :c => :b }
119
+ else
120
+ puts "unhandled instruction to backtrace: #{di}" if $VERBOSE
121
+ { :incomplete_binding => Expression[1], :a => unk, :b => unk, :c => unk }
122
+ end
123
+ end
124
+
125
+ def get_xrefs_x(dasm, di)
126
+ return [] if not di.opcode.props[:setip]
127
+
128
+ case di.opcode.basename
129
+ when 'j', 'cj'
130
+ [Expression[di.instruction.args.first]]
131
+ #when 'ret'
132
+ #[Indirection[:sp, 2, di.address]]
133
+ else
134
+ []
135
+ end
136
+ end
137
+
138
+ # checks if expr is a valid return expression matching the :saveip instruction
139
+ def backtrace_is_function_return(expr, di=nil)
140
+ expr = Expression[expr].reduce_rec
141
+ expr.kind_of?(Indirection) and expr.len == 2 and expr.target == Expression[:sp]
142
+ end
143
+
144
+ # updates the function backtrace_binding
145
+ def backtrace_update_function_binding(dasm, faddr, f, retaddrlist, *wantregs)
146
+ b = f.backtrace_binding
147
+
148
+ bt_val = lambda { |r|
149
+ next if not retaddrlist
150
+ b[r] = Expression::Unknown
151
+ bt = []
152
+ retaddrlist.each { |retaddr|
153
+ bt |= dasm.backtrace(Expression[r], retaddr, :include_start => true,
154
+ :snapshot_addr => faddr, :origin => retaddr)
155
+ }
156
+ if bt.length != 1
157
+ b[r] = Expression::Unknown
158
+ else
159
+ b[r] = bt.first
160
+ end
161
+ }
162
+
163
+ wantregs.each(&bt_val)
164
+
165
+ b
166
+ end
167
+
168
+ # returns true if the expression is an address on the stack
169
+ def backtrace_is_stack_address(expr)
170
+ Expression[expr].expr_externals.include?(:sp)
171
+ end
172
+
173
+ # updates an instruction's argument replacing an expression with another (eg label renamed)
174
+ def replace_instr_arg_immediate(i, old, new)
175
+ i.args.map! { |a|
176
+ a == old ? new : Expression[a.bind(old => new).reduce]
177
+ }
178
+ end
179
+ end
180
+ end
@@ -0,0 +1,283 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/cpu/st20/main'
8
+
9
+ module Metasm
10
+ class ST20
11
+ # temporarily setup dasm.address_binding so that backtracking
12
+ # stack-related offsets resolve in :frameptr (relative to func start)
13
+ def decompile_makestackvars(dasm, funcstart, blocks)
14
+ oldfuncbd = dasm.address_binding[funcstart]
15
+ dasm.address_binding[funcstart] = { :wspace => :frameptr }
16
+ blocks.each { |block| yield block }
17
+ dasm.address_binding[funcstart] = oldfuncbd
18
+ end
19
+
20
+ # add di-specific registry written/accessed
21
+ def decompile_func_finddeps_di(dcmp, func, di, a, w)
22
+ case di.instruction.opname
23
+ when 'ret'
24
+ a << :a if not func.type.kind_of? C::BaseType or func.type.type.name != :void # standard ABI
25
+ when 'in', 'out'
26
+ a << :a << :b << :c
27
+ end
28
+ end
29
+
30
+ # list variable dependency for each block, remove useless writes
31
+ # returns { blockaddr => [list of vars that are needed by a following block] }
32
+ def decompile_func_finddeps(dcmp, blocks, func)
33
+ deps_r = {} ; deps_w = {} ; deps_to = {}
34
+ deps_subfunc = {} # things read/written by subfuncs
35
+
36
+ # find read/writes by each block
37
+ blocks.each { |b, to|
38
+ deps_r[b] = [] ; deps_w[b] = [] ; deps_to[b] = to
39
+ deps_subfunc[b] = []
40
+
41
+ blk = dcmp.dasm.decoded[b].block
42
+ blk.list.each { |di|
43
+ a = di.backtrace_binding.values
44
+ w = []
45
+ di.backtrace_binding.keys.each { |k|
46
+ case k
47
+ when ::Symbol; w |= [k]
48
+ else a |= Expression[k].externals
49
+ end
50
+ }
51
+ decompile_func_finddeps_di(dcmp, func, di, a, w)
52
+
53
+ deps_r[b] |= a.map { |ee| Expression[ee].externals.grep(::Symbol) }.flatten - [:unknown] - deps_w[b]
54
+ deps_w[b] |= w.map { |ee| Expression[ee].externals.grep(::Symbol) }.flatten - [:unknown]
55
+ }
56
+ blk.each_to_normal { |t|
57
+ t = dcmp.backtrace_target(t, blk.list.last.address)
58
+ next if not t = dcmp.c_parser.toplevel.symbol[t]
59
+ t.type = C::Function.new(C::BaseType.new(:int)) if not t.type.kind_of? C::Function
60
+ t.type.args.to_a.each { |arg|
61
+ if reg = arg.has_attribute('register')
62
+ deps_subfunc[b] |= [reg.to_sym]
63
+ end
64
+ }
65
+ }
66
+ }
67
+
68
+ bt = blocks.transpose
69
+ roots = bt[0] - bt[1].flatten # XXX jmp 1stblock ?
70
+
71
+ # find regs read and never written (must have been set by caller and are part of the func ABI)
72
+ uninitialized = lambda { |b, r, done|
73
+ if not deps_r[b]
74
+ elsif deps_r[b].include?(r)
75
+ true
76
+ elsif deps_w[b].include?(r)
77
+ else
78
+ done << b
79
+ (deps_to[b] - done).find { |tb| uninitialized[tb, r, done] }
80
+ end
81
+ }
82
+
83
+ regargs = []
84
+ register_symbols.each { |r|
85
+ if roots.find { |root| uninitialized[root, r, []] }
86
+ regargs << r
87
+ end
88
+ }
89
+
90
+ # TODO honor user-defined prototype if available (eg no, really, eax is not read in this function returning al)
91
+ regargs.sort_by { |r| r.to_s }.each { |r|
92
+ a = C::Variable.new(r.to_s, C::BaseType.new(:int, :unsigned))
93
+ a.add_attribute("register(#{r})")
94
+ func.type.args << a
95
+ }
96
+
97
+ # remove writes from a block if no following block read the value
98
+ dw = {}
99
+ deps_w.each { |b, deps|
100
+ dw[b] = deps.reject { |dep|
101
+ ret = true
102
+ done = []
103
+ todo = deps_to[b].dup
104
+ while a = todo.pop
105
+ next if done.include? a
106
+ done << a
107
+ if not deps_r[a] or deps_r[a].include? dep
108
+ ret = false
109
+ break
110
+ elsif not deps_w[a].include? dep
111
+ todo.concat deps_to[a]
112
+ end
113
+ end
114
+ ret
115
+ }
116
+ }
117
+
118
+ dw
119
+ end
120
+
121
+ def abi_funcall
122
+ { :retval => :a, :changed => register_symbols }
123
+ end
124
+
125
+ def decompile_blocks(dcmp, myblocks, deps, func, nextaddr = nil)
126
+ scope = func.initializer
127
+ func.type.args.each { |a| scope.symbol[a.name] = a }
128
+ stmts = scope.statements
129
+ blocks_toclean = myblocks.dup
130
+ until myblocks.empty?
131
+ b, to = myblocks.shift
132
+ if l = dcmp.dasm.get_label_at(b)
133
+ stmts << C::Label.new(l)
134
+ end
135
+
136
+ # list of assignments [[dest reg, expr assigned]]
137
+ ops = []
138
+ # reg binding (reg => value, values.externals = regs at block start)
139
+ binding = {}
140
+ # Expr => CExpr
141
+ ce = lambda { |*e| dcmp.decompile_cexpr(Expression[Expression[*e].reduce], scope) }
142
+ # Expr => Expr.bind(binding) => CExpr
143
+ ceb = lambda { |*e| ce[Expression[*e].bind(binding)] }
144
+
145
+ # dumps a CExprs that implements an assignment to a reg (uses ops[], patches op => [reg, nil])
146
+ commit = lambda {
147
+ deps[b].map { |k|
148
+ [k, ops.rindex(ops.reverse.find { |r, v| r == k })]
149
+ }.sort_by { |k, i| i.to_i }.each { |k, i|
150
+ next if not i or not binding[k]
151
+ e = k
152
+ final = []
153
+ ops[0..i].reverse_each { |r, v|
154
+ final << r if not v
155
+ e = Expression[e].bind(r => v).reduce if not final.include? r
156
+ }
157
+ ops[i][1] = nil
158
+ binding.delete k
159
+ stmts << ce[k, :'=', e] if k != e
160
+ }
161
+ }
162
+
163
+ # returns an array to use as funcall arguments
164
+ get_func_args = lambda { |di, f|
165
+ # XXX see remarks in #finddeps
166
+ args_todo = f.type.args.to_a.dup
167
+ args = []
168
+ args_todo.each { |a_|
169
+ if r = a_.has_attribute_var('register')
170
+ args << Expression[r.to_sym]
171
+ else
172
+ args << Expression[0]
173
+ end
174
+ }
175
+ args.map { |e| ceb[e] }
176
+ }
177
+
178
+ # go !
179
+ dcmp.dasm.decoded[b].block.list.each_with_index { |di, didx|
180
+ if di.opcode.props[:setip] and not di.opcode.props[:stopexec]
181
+ # conditional jump
182
+ commit[]
183
+ n = dcmp.backtrace_target(get_xrefs_x(dcmp.dasm, di).first, di.address)
184
+ cc = ceb[:a, :'!=', 0]
185
+ # XXX switch/indirect/multiple jmp
186
+ stmts << C::If.new(C::CExpression[cc], C::Goto.new(n))
187
+ to.delete dcmp.dasm.normalize(n)
188
+ next
189
+ end
190
+
191
+ case di.instruction.opname
192
+ when 'ret'
193
+ commit[]
194
+ ret = nil
195
+ ret = C::CExpression[ceb[:a]] unless func.type.type.kind_of? C::BaseType and func.type.type.name == :void
196
+ stmts << C::Return.new(ret)
197
+ when 'fcall' # :saveip
198
+ n = dcmp.backtrace_target(get_xrefs_x(dcmp.dasm, di).first, di.address)
199
+ args = []
200
+ if f = dcmp.c_parser.toplevel.symbol[n] and f.type.kind_of? C::Function and f.type.args
201
+ args = get_func_args[di, f]
202
+ end
203
+ commit[]
204
+ #next if not di.block.to_subfuncret
205
+
206
+ if not n.kind_of? ::String or (f and not f.type.kind_of? C::Function)
207
+ # indirect funcall
208
+ fptr = ceb[n]
209
+ binding.delete n
210
+ proto = C::Function.new(C::BaseType.new(:int))
211
+ proto = f.type if f and f.type.kind_of? C::Function
212
+ f = C::CExpression[[fptr], C::Pointer.new(proto)]
213
+ elsif not f
214
+ # internal functions are predeclared, so this one is extern
215
+ f = C::Variable.new
216
+ f.name = n
217
+ f.type = C::Function.new(C::BaseType.new(:int))
218
+ if dcmp.recurse > 0
219
+ dcmp.c_parser.toplevel.symbol[n] = f
220
+ dcmp.c_parser.toplevel.statements << C::Declaration.new(f)
221
+ end
222
+ end
223
+ commit[]
224
+ binding.delete :a
225
+ e = C::CExpression[f, :funcall, args]
226
+ e = C::CExpression[ce[:a], :'=', e, f.type.type] if deps[b].include? :a and f.type.type != C::BaseType.new(:void)
227
+ stmts << e
228
+ when 'in', 'out'
229
+ if not dcmp.c_parser.toplevel.symbol["intrinsic_#{di.instruction.opname}"]
230
+ dcmp.c_parser.parse("void intrinsic_#{di.instruction.opname}(unsigned int len, unsigned int channel, char *buf);")
231
+ end
232
+ f = dcmp.c_parser.toplevel.symbol["intrinsic_#{di.instruction.opname}"]
233
+ stmts << C::CExpression.new(f, :funcall, [ceb[:a], ceb[:b], ceb[:c]], f.type.type)
234
+ else
235
+ bd = get_fwdemu_binding(di)
236
+ if di.backtrace_binding[:incomplete_binding]
237
+ commit[]
238
+ stmts << C::Asm.new(di.instruction.to_s, nil, nil, nil, nil, nil)
239
+ else
240
+ update = {}
241
+ bd.each { |k, v|
242
+ if k.kind_of? ::Symbol and not deps[b].include? k
243
+ ops << [k, v]
244
+ update[k] = Expression[Expression[v].bind(binding).reduce]
245
+ else
246
+ stmts << ceb[k, :'=', v]
247
+ stmts.pop if stmts.last.kind_of? C::Variable # [:eflag_s, :=, :unknown].reduce
248
+ end
249
+ }
250
+ binding.update update
251
+ end
252
+ end
253
+ }
254
+ commit[]
255
+
256
+ case to.length
257
+ when 0
258
+ if not myblocks.empty? and not %w[ret jmp].include? dcmp.dasm.decoded[b].block.list.last.instruction.opname
259
+ puts " block #{Expression[b]} has no to and don't end in ret"
260
+ end
261
+ when 1
262
+ if (myblocks.empty? ? nextaddr != to[0] : myblocks.first.first != to[0])
263
+ stmts << C::Goto.new(dcmp.dasm.auto_label_at(to[0], 'unknown_goto'))
264
+ end
265
+ else
266
+ puts " block #{Expression[b]} with multiple to"
267
+ end
268
+ end
269
+
270
+ # cleanup di.bt_binding (we set :frameptr etc in those, this may confuse the dasm)
271
+ blocks_toclean.each { |b_, to_|
272
+ dcmp.dasm.decoded[b_].block.list.each { |di|
273
+ di.backtrace_binding = nil
274
+ }
275
+ }
276
+ end
277
+
278
+ def decompile_check_abi(dcmp, entry, func)
279
+ a = func.type.args || []
280
+ a.delete_if { |arg| arg.has_attribute_var('register') and arg.has_attribute('unused') }
281
+ end
282
+ end
283
+ end