metasm 1.0.2 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/Gemfile +1 -0
  4. data/doc/code_organisation.txt +1 -1
  5. data/metasm.gemspec +1 -1
  6. data/metasm.rb +2 -1
  7. data/metasm/cpu/arc/decode.rb +3 -3
  8. data/metasm/cpu/arm/decode.rb +2 -2
  9. data/metasm/cpu/ia32/compile_c.rb +18 -2
  10. data/metasm/cpu/ia32/decode.rb +9 -4
  11. data/metasm/cpu/ia32/decompile.rb +22 -8
  12. data/metasm/cpu/ia32/opcodes.rb +5 -5
  13. data/metasm/cpu/mcs51.rb +8 -0
  14. data/metasm/cpu/mcs51/decode.rb +99 -0
  15. data/metasm/cpu/mcs51/main.rb +76 -0
  16. data/metasm/cpu/mcs51/opcodes.rb +120 -0
  17. data/metasm/cpu/mips/decode.rb +5 -4
  18. data/metasm/cpu/st20.rb +9 -0
  19. data/metasm/cpu/st20/decode.rb +180 -0
  20. data/metasm/cpu/st20/decompile.rb +283 -0
  21. data/metasm/cpu/st20/main.rb +37 -0
  22. data/metasm/cpu/st20/opcodes.rb +140 -0
  23. data/metasm/cpu/x86_64/encode.rb +4 -2
  24. data/metasm/cpu/x86_64/opcodes.rb +4 -2
  25. data/metasm/decode.rb +16 -15
  26. data/metasm/decompile.rb +1 -1
  27. data/metasm/disassemble.rb +3 -1
  28. data/metasm/disassemble_api.rb +3 -1
  29. data/metasm/dynldr.rb +9 -3
  30. data/metasm/encode.rb +2 -2
  31. data/metasm/exe_format/coff.rb +3 -1
  32. data/metasm/exe_format/coff_decode.rb +5 -3
  33. data/metasm/exe_format/elf.rb +4 -0
  34. data/metasm/exe_format/elf_decode.rb +1 -2
  35. data/metasm/exe_format/elf_encode.rb +4 -1
  36. data/metasm/exe_format/macho.rb +20 -6
  37. data/metasm/exe_format/pe.rb +1 -1
  38. data/metasm/exe_format/serialstruct.rb +1 -1
  39. data/metasm/gui.rb +1 -1
  40. data/metasm/gui/dasm_hex.rb +2 -2
  41. data/metasm/gui/dasm_main.rb +8 -8
  42. data/metasm/gui/debug.rb +4 -4
  43. data/metasm/gui/gtk.rb +1 -1
  44. data/metasm/gui/qt.rb +2 -2
  45. data/metasm/gui/win32.rb +1 -1
  46. data/metasm/main.rb +11 -6
  47. data/metasm/os/windows.rb +26 -23
  48. data/misc/hexdump.rb +2 -2
  49. data/misc/objdiff.rb +4 -1
  50. data/misc/objscan.rb +1 -1
  51. data/samples/dasm-plugins/bindiff.rb +1 -1
  52. data/samples/dasm-plugins/scanxrefs.rb +2 -1
  53. data/samples/dynamic_ruby.rb +24 -25
  54. data/samples/elfencode.rb +15 -0
  55. data/samples/exeencode.rb +2 -2
  56. data/samples/metasm-shell.rb +67 -55
  57. data/tests/mcs51.rb +27 -0
  58. metadata +13 -2
@@ -0,0 +1,120 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2015-2016 Google
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+ require 'metasm/cpu/mcs51/main'
7
+
8
+ module Metasm
9
+
10
+ class MCS51
11
+ def addop(name, bin, *args)
12
+ o = Opcode.new name, bin
13
+ args.each { |a|
14
+ o.args << a if @fields_mask[a] or @valid_args[a]
15
+ o.fields[a] = @fields_shift[a] if @fields_mask[a]
16
+ raise "unknown #{a.inspect}" unless @valid_args[a] or @fields_mask[a]
17
+ }
18
+ @opcode_list << o
19
+ end
20
+
21
+ def init_mcs51
22
+ @opcode_list = []
23
+ @valid_args.update [:rd, :r_a, :r_b, :r_c, :d8, :rel8, :m8,
24
+ :addr_11, :addr_16].inject({}) { |h, v| h.update v => true }
25
+ @fields_mask.update :rd => 15, :addr_11 => 7
26
+ @fields_shift.update :rd => 0, :addr_11 => 5
27
+
28
+ addop 'nop', 0x00
29
+ addop 'ret', 0x22
30
+ addop 'reti', 0x32
31
+ addop 'swap', 0xc4, :r_a
32
+ addop '???', 0xa5
33
+ addop 'rr', 0x03, :r_a
34
+ addop 'rrc', 0x13, :r_a
35
+ addop 'rl', 0x23, :r_a
36
+ addop 'rlc', 0x33, :r_a
37
+
38
+ addop 'jc', 0x40, :rel8
39
+ addop 'jnc', 0x50, :rel8
40
+ addop 'jz', 0x60, :rel8
41
+ addop 'jnz', 0x70, :rel8
42
+ addop 'sjmp', 0x80, :rel8
43
+
44
+ addop 'div', 0x84, :r_a, :r_b
45
+ addop 'mul', 0xa4, :r_a, :r_b
46
+
47
+ addop 'push', 0xc0, :m8
48
+ addop 'pop', 0xd0, :m8
49
+
50
+ addop 'clr', 0xc3, :r_c
51
+ addop 'clr', 0xe4, :r_a
52
+ addop 'cpl', 0xb3, :r_c
53
+ addop 'cpl', 0xf4, :r_a
54
+ addop 'da', 0xd4
55
+
56
+ addop 'ajmp', 0x01, :addr_11
57
+ addop 'acall', 0x11, :addr_11
58
+ addop 'ljmp', 0x02, :addr_16
59
+ addop 'lcall', 0x12, :addr_16
60
+
61
+ addop 'inc', 0x04, :r_a
62
+ addop 'inc', 0x05, :m8
63
+ addop 'inc', 0x00, :rd
64
+
65
+ addop 'dec', 0x14, :r_a
66
+ addop 'dec', 0x15, :m8
67
+ addop 'dec', 0x10, :rd
68
+
69
+ addop 'add', 0x24, :r_a, :d8
70
+ addop 'add', 0x25, :r_a, :m8
71
+ addop 'add', 0x20, :r_a, :rd
72
+
73
+ addop 'addc', 0x34, :r_a, :d8
74
+ addop 'addc', 0x35, :r_a, :m8
75
+ addop 'addc', 0x30, :r_a, :rd
76
+
77
+ addop 'orl', 0x42, :m8, :r_a
78
+ addop 'orl', 0x43, :m8, :d8
79
+ addop 'orl', 0x44, :r_a, :d8
80
+ addop 'orl', 0x45, :r_a, :m8
81
+ addop 'orl', 0x40, :r_a, :rd
82
+
83
+ addop 'anl', 0x52, :m8, :r_a
84
+ addop 'anl', 0x53, :m8, :d8
85
+ addop 'anl', 0x54, :r_a, :d8
86
+ addop 'anl', 0x55, :r_a, :m8
87
+ addop 'anl', 0x50, :r_a, :rd
88
+
89
+ addop 'xrl', 0x62, :m8, :r_a
90
+ addop 'xrl', 0x63, :m8, :d8
91
+ addop 'xrl', 0x64, :r_a, :d8
92
+ addop 'xrl', 0x65, :r_a, :m8
93
+ addop 'xrl', 0x60, :r_a, :rd
94
+
95
+ addop 'mov', 0x74, :r_a, :d8
96
+ addop 'mov', 0x75, :m8, :d8
97
+ addop 'mov', 0x70, :rd, :d8
98
+ addop 'mov', 0xa0, :rd, :m8
99
+ addop 'mov', 0x85, :m8, :m8
100
+ addop 'mov', 0x80, :m8, :rd
101
+ addop 'mov', 0xe0, :r_a, :rd
102
+ addop 'mov', 0xf0, :rd, :r_a
103
+
104
+ addop 'subb', 0x94, :r_a, :d8
105
+ addop 'subb', 0x95, :r_a, :m8
106
+ addop 'subb', 0x90, :r_a, :rd
107
+
108
+ addop 'cnje', 0xb4, :r_a, :d8, :rel8
109
+ addop 'cnje', 0xb5, :r_a, :m8, :rel8
110
+ addop 'cnje', 0xb0, :rd, :d8, :rel8
111
+
112
+ addop 'xch', 0xc5, :r_a, :m8
113
+ addop 'xch', 0xc0, :r_a, :rd
114
+
115
+ addop 'djnz', 0xd5, :m8, :rel8
116
+ addop 'djnz', 0xd0, :rd, :rel8
117
+
118
+ end
119
+ end
120
+ end
@@ -149,11 +149,12 @@ class MIPS
149
149
  when 'and', 'andi'; lambda { |di, a0, a1, a2| { a0 => Expression[a1, :&, a2] } }
150
150
  when 'or', 'ori'; lambda { |di, a0, a1, a2| { a0 => Expression[a1, :|, a2] } }
151
151
  when 'nor'; lambda { |di, a0, a1, a2| { a0 => Expression[:~, [a1, :|, a2]] } }
152
+ when 'not'; lambda { |di, a0, a1| { a0 => Expression[:~, a1] } }
152
153
  when 'xor'; lambda { |di, a0, a1, a2| { a0 => Expression[a1, :^, a2] } }
153
154
  when 'sll', 'sllv'; lambda { |di, a0, a1, a2| { a0 => Expression[a1, :>>, a2] } }
154
155
  when 'srl', 'srlv', 'sra', 'srav'; lambda { |di, a0, a1, a2| { a0 => Expression[a1, :<<, a2] } } # XXX sign-extend
155
- when 'lw'; lambda { |di, a0, a1| { a0 => Expression[a1] } }
156
- when 'sw'; lambda { |di, a0, a1| { a1 => Expression[a0] } }
156
+ when 'lw', 'lwl', 'lwr'; lambda { |di, a0, a1| { a0 => Expression[a1] } }
157
+ when 'sw', 'swl', 'swr'; lambda { |di, a0, a1| { a1 => Expression[a0] } }
157
158
  when 'lh', 'lhu'; lambda { |di, a0, a1| { a0 => Expression[a1] } } # XXX sign-extend
158
159
  when 'sh'; lambda { |di, a0, a1| { a1 => Expression[a0] } }
159
160
  when 'lb', 'lbu'; lambda { |di, a0, a1| { a0 => Expression[a1] } }
@@ -161,8 +162,8 @@ class MIPS
161
162
  when /^slti?u?/; lambda { |di, a0, a1, a2| { a0 => Expression[a1, :<, a2] } } # XXX signedness
162
163
  when 'mfhi'; lambda { |di, a0| { a0 => Expression[:hi] } }
163
164
  when 'mflo'; lambda { |di, a0| { a0 => Expression[:lo] } }
164
- when 'mult'; lambda { |di, a0, a1| { :hi => Expression[[a0, :*, a1], :>>, 32], :lo => Expression[[a0, :*, a1], :&, 0xffff_ffff] } }
165
- when 'div'; lambda { |di, a0, a1| { :hi => Expression[a0, :%, a1], :lo => Expression[a0, :/, a1] } }
165
+ when 'mult', 'multu'; lambda { |di, a0, a1| { :hi => Expression[[a0, :*, a1], :>>, 32], :lo => Expression[[a0, :*, a1], :&, 0xffff_ffff] } }
166
+ when 'div', 'divu'; lambda { |di, a0, a1| { :hi => Expression[a0, :%, a1], :lo => Expression[a0, :/, a1] } }
166
167
  when 'jal', 'jalr'; lambda { |di, a0| { :$ra => Expression[Expression[di.address, :+, 2*di.bin_length].reduce] } }
167
168
  when 'li', 'mov'; lambda { |di, a0, a1| { a0 => Expression[a1] } }
168
169
  when 'syscall'; lambda { |di, *a| { :$v0 => Expression::Unknown } }
@@ -0,0 +1,9 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/main'
8
+ require 'metasm/cpu/st20/decode'
9
+ require 'metasm/cpu/st20/decompile'
@@ -0,0 +1,180 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/cpu/st20/opcodes'
8
+ require 'metasm/decode'
9
+
10
+ module Metasm
11
+ class ST20
12
+ # decodes the instruction at edata.ptr, mapped at virtual address off
13
+ def decode_instruction(edata, addr)
14
+ return if edata.ptr >= edata.length
15
+ di = DecodedInstruction.new self
16
+ di.address = addr
17
+ di = decode_instr_op(edata, di)
18
+ decode_instr_interpret(di, addr)
19
+ end
20
+
21
+ def decode_instr_op(edata, di, pfx=0)
22
+ # decode one byte from the bitstream, recurse if the byte is a prefix
23
+
24
+ if edata.ptr >= edata.length or di.bin_length >= 4
25
+ di.instruction.args << Expression[pfx]
26
+ return di
27
+ end
28
+
29
+ # bytestream structure :
30
+ # sequence of prefixes, which build a word 4 bits at a time
31
+ # last element = function code
32
+ # 'opr' is a special function, means use the prefix word as an opcode number from 'operate'
33
+ byte = edata.read(1).unpack('C')[0]
34
+ fcode = byte & 0xf0
35
+ arg = byte & 0x0f
36
+ pfx = (pfx << 4) | arg
37
+ di.opcode = @opcode_list[fcode >> 4]
38
+ di.instruction.opname = di.opcode.name
39
+ di.bin_length += 1
40
+
41
+ case di.instruction.opname
42
+ when 'pfix'
43
+ return decode_instr_op(edata, di, pfx)
44
+
45
+ when 'nfix'
46
+ pfx ^= -1
47
+ di.instruction.opname = 'pfix' # will be displayed on EOS, and we cannot represent the whole decoded pfx with 'nfix'
48
+ return decode_instr_op(edata, di, pfx)
49
+
50
+ when 'opr'
51
+ if op = @op_operate[pfx]
52
+ # operands have no arg (they work on the implicit 3-register stack A B C)
53
+ di.instruction.opname = op
54
+ di.opcode = @opc_operate[op] || di.opcode
55
+ else
56
+ # unknown operand, keep the generic form
57
+ di.instruction.args << Expression[pfx]
58
+ end
59
+ else
60
+ di.instruction.args << Expression[pfx]
61
+ end
62
+
63
+ di
64
+ end
65
+
66
+ def decode_instr_interpret(di, addr)
67
+ case di.instruction.opname
68
+ when 'j', 'cj', 'fcall'
69
+ delta = di.instruction.args.last.reduce
70
+ arg = Expression[[addr, :+, di.bin_length], :+, delta].reduce
71
+ di.instruction.args[-1] = Expression[arg]
72
+ end
73
+
74
+ di
75
+ end
76
+
77
+ def get_backtrace_binding(di)
78
+ arg = di.instruction.args[0]
79
+ sz = @size/8
80
+ unk = Expression::Unknown
81
+ case di.instruction.opname
82
+ when 'j'; {}
83
+ when 'ldlp'; { :a => Expression[:wspace, :+, [sz, :*, arg]], :b => :a, :c => :b }
84
+ when 'ldnl'; { :a => Indirection[[:a, :+, [sz, :*, arg]], sz, di] }
85
+ when 'ldc'; { :a => arg, :b => :a, :c => :b }
86
+ when 'ldnlp'; { :a => Expression[:a, :+, [sz, :*, arg]] }
87
+ when 'ldl'; { :a => Indirection[[:wspace, :+, [sz, :*, arg]], sz, di], :b => :a, :c => :b }
88
+ when 'adc'; { :a => Expression[:a, :+, arg] }
89
+ when 'fcall'; {
90
+ :a => Expression[di.next_addr],
91
+ :wspace => Expression[:wspace, :-, [4*sz]],
92
+ Indirection[[:wspace, :-, [4*sz]], sz, di] => di.next_addr,
93
+ Indirection[[:wspace, :-, [3*sz]], sz, di] => :a,
94
+ Indirection[[:wspace, :-, [2*sz]], sz, di] => :b,
95
+ Indirection[[:wspace, :-, [1*sz]], sz, di] => :c,
96
+ }
97
+ # cj+(:a != 0) => a=b, b=c, c=unk ; (:a == 0) => jump, a=a, b=b, c=c
98
+ when 'cj'; { :a => unk, :b => unk, :c => unk }
99
+ when 'ajw'; { :wspace => Expression[:wspace, :+, [4, :*, arg]] }
100
+ when 'eqc'; { :a => Expression[:a, :==, arg] }
101
+ when 'stl'; { Indirection[[:wspace, :+, [sz, :*, arg]], sz, di] => :a, :a => :b, :b => :c, :c => unk }
102
+ when 'stnl'; { Indirection[[:a, :+, [sz, :*, arg]], sz, di] => :b, :a => :c, :b => unk, :c => unk }
103
+
104
+ when 'add'; { :a => Expression[:b, :+, :a], :b => :c, :c => unk }
105
+ when 'sub'; { :a => Expression[:b, :-, :a], :b => :c, :c => unk }
106
+ when 'prod'; { :a => Expression[:b, :*, :a], :b => :c, :c => unk }
107
+ when 'xor'; { :a => Expression[:b, :^, :a], :b => :c, :c => unk }
108
+ when 'ldpi'; { :a => Indirection[[di.next_addr, :+, :a], sz, di] }
109
+ when 'mint'; { :a => Expression[-1 << (@size-1)], :b => :a, :c => :b }
110
+ when 'in'; { :a => unk, :b => unk, :c => unk } # read a bytes from channel b at buffer c
111
+ when 'out'; { :a => unk, :b => unk, :c => unk } # write a bytes to channel b from buffer c
112
+ when 'lb'; { :a => Indirection[:a, 1, di] }
113
+ when 'sb'; { Indirection[:a, 1, di] => Expression[:b, :&, 0xff], :a => :c, :b => unk, :c => unk }
114
+ when 'bsub'; { :a => Expression[:a, :+, :b], :b => :c, :c => unk }
115
+ when 'ssub'; { :a => Expression[:a, :+, [2, :*, :b]], :b => :c, :c => unk }
116
+ when 'wsub'; { :a => Expression[:a, :+, [sz, :*, :b]], :b => :c, :c => unk }
117
+ when 'gajw'; { :wspace => Expression[:a], :a => Expression[:wspace] }
118
+ when 'dup'; { :b => :a, :c => :b }
119
+ else
120
+ puts "unhandled instruction to backtrace: #{di}" if $VERBOSE
121
+ { :incomplete_binding => Expression[1], :a => unk, :b => unk, :c => unk }
122
+ end
123
+ end
124
+
125
+ def get_xrefs_x(dasm, di)
126
+ return [] if not di.opcode.props[:setip]
127
+
128
+ case di.opcode.basename
129
+ when 'j', 'cj'
130
+ [Expression[di.instruction.args.first]]
131
+ #when 'ret'
132
+ #[Indirection[:sp, 2, di.address]]
133
+ else
134
+ []
135
+ end
136
+ end
137
+
138
+ # checks if expr is a valid return expression matching the :saveip instruction
139
+ def backtrace_is_function_return(expr, di=nil)
140
+ expr = Expression[expr].reduce_rec
141
+ expr.kind_of?(Indirection) and expr.len == 2 and expr.target == Expression[:sp]
142
+ end
143
+
144
+ # updates the function backtrace_binding
145
+ def backtrace_update_function_binding(dasm, faddr, f, retaddrlist, *wantregs)
146
+ b = f.backtrace_binding
147
+
148
+ bt_val = lambda { |r|
149
+ next if not retaddrlist
150
+ b[r] = Expression::Unknown
151
+ bt = []
152
+ retaddrlist.each { |retaddr|
153
+ bt |= dasm.backtrace(Expression[r], retaddr, :include_start => true,
154
+ :snapshot_addr => faddr, :origin => retaddr)
155
+ }
156
+ if bt.length != 1
157
+ b[r] = Expression::Unknown
158
+ else
159
+ b[r] = bt.first
160
+ end
161
+ }
162
+
163
+ wantregs.each(&bt_val)
164
+
165
+ b
166
+ end
167
+
168
+ # returns true if the expression is an address on the stack
169
+ def backtrace_is_stack_address(expr)
170
+ Expression[expr].expr_externals.include?(:sp)
171
+ end
172
+
173
+ # updates an instruction's argument replacing an expression with another (eg label renamed)
174
+ def replace_instr_arg_immediate(i, old, new)
175
+ i.args.map! { |a|
176
+ a == old ? new : Expression[a.bind(old => new).reduce]
177
+ }
178
+ end
179
+ end
180
+ end
@@ -0,0 +1,283 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/cpu/st20/main'
8
+
9
+ module Metasm
10
+ class ST20
11
+ # temporarily setup dasm.address_binding so that backtracking
12
+ # stack-related offsets resolve in :frameptr (relative to func start)
13
+ def decompile_makestackvars(dasm, funcstart, blocks)
14
+ oldfuncbd = dasm.address_binding[funcstart]
15
+ dasm.address_binding[funcstart] = { :wspace => :frameptr }
16
+ blocks.each { |block| yield block }
17
+ dasm.address_binding[funcstart] = oldfuncbd
18
+ end
19
+
20
+ # add di-specific registry written/accessed
21
+ def decompile_func_finddeps_di(dcmp, func, di, a, w)
22
+ case di.instruction.opname
23
+ when 'ret'
24
+ a << :a if not func.type.kind_of? C::BaseType or func.type.type.name != :void # standard ABI
25
+ when 'in', 'out'
26
+ a << :a << :b << :c
27
+ end
28
+ end
29
+
30
+ # list variable dependency for each block, remove useless writes
31
+ # returns { blockaddr => [list of vars that are needed by a following block] }
32
+ def decompile_func_finddeps(dcmp, blocks, func)
33
+ deps_r = {} ; deps_w = {} ; deps_to = {}
34
+ deps_subfunc = {} # things read/written by subfuncs
35
+
36
+ # find read/writes by each block
37
+ blocks.each { |b, to|
38
+ deps_r[b] = [] ; deps_w[b] = [] ; deps_to[b] = to
39
+ deps_subfunc[b] = []
40
+
41
+ blk = dcmp.dasm.decoded[b].block
42
+ blk.list.each { |di|
43
+ a = di.backtrace_binding.values
44
+ w = []
45
+ di.backtrace_binding.keys.each { |k|
46
+ case k
47
+ when ::Symbol; w |= [k]
48
+ else a |= Expression[k].externals
49
+ end
50
+ }
51
+ decompile_func_finddeps_di(dcmp, func, di, a, w)
52
+
53
+ deps_r[b] |= a.map { |ee| Expression[ee].externals.grep(::Symbol) }.flatten - [:unknown] - deps_w[b]
54
+ deps_w[b] |= w.map { |ee| Expression[ee].externals.grep(::Symbol) }.flatten - [:unknown]
55
+ }
56
+ blk.each_to_normal { |t|
57
+ t = dcmp.backtrace_target(t, blk.list.last.address)
58
+ next if not t = dcmp.c_parser.toplevel.symbol[t]
59
+ t.type = C::Function.new(C::BaseType.new(:int)) if not t.type.kind_of? C::Function
60
+ t.type.args.to_a.each { |arg|
61
+ if reg = arg.has_attribute('register')
62
+ deps_subfunc[b] |= [reg.to_sym]
63
+ end
64
+ }
65
+ }
66
+ }
67
+
68
+ bt = blocks.transpose
69
+ roots = bt[0] - bt[1].flatten # XXX jmp 1stblock ?
70
+
71
+ # find regs read and never written (must have been set by caller and are part of the func ABI)
72
+ uninitialized = lambda { |b, r, done|
73
+ if not deps_r[b]
74
+ elsif deps_r[b].include?(r)
75
+ true
76
+ elsif deps_w[b].include?(r)
77
+ else
78
+ done << b
79
+ (deps_to[b] - done).find { |tb| uninitialized[tb, r, done] }
80
+ end
81
+ }
82
+
83
+ regargs = []
84
+ register_symbols.each { |r|
85
+ if roots.find { |root| uninitialized[root, r, []] }
86
+ regargs << r
87
+ end
88
+ }
89
+
90
+ # TODO honor user-defined prototype if available (eg no, really, eax is not read in this function returning al)
91
+ regargs.sort_by { |r| r.to_s }.each { |r|
92
+ a = C::Variable.new(r.to_s, C::BaseType.new(:int, :unsigned))
93
+ a.add_attribute("register(#{r})")
94
+ func.type.args << a
95
+ }
96
+
97
+ # remove writes from a block if no following block read the value
98
+ dw = {}
99
+ deps_w.each { |b, deps|
100
+ dw[b] = deps.reject { |dep|
101
+ ret = true
102
+ done = []
103
+ todo = deps_to[b].dup
104
+ while a = todo.pop
105
+ next if done.include? a
106
+ done << a
107
+ if not deps_r[a] or deps_r[a].include? dep
108
+ ret = false
109
+ break
110
+ elsif not deps_w[a].include? dep
111
+ todo.concat deps_to[a]
112
+ end
113
+ end
114
+ ret
115
+ }
116
+ }
117
+
118
+ dw
119
+ end
120
+
121
+ def abi_funcall
122
+ { :retval => :a, :changed => register_symbols }
123
+ end
124
+
125
+ def decompile_blocks(dcmp, myblocks, deps, func, nextaddr = nil)
126
+ scope = func.initializer
127
+ func.type.args.each { |a| scope.symbol[a.name] = a }
128
+ stmts = scope.statements
129
+ blocks_toclean = myblocks.dup
130
+ until myblocks.empty?
131
+ b, to = myblocks.shift
132
+ if l = dcmp.dasm.get_label_at(b)
133
+ stmts << C::Label.new(l)
134
+ end
135
+
136
+ # list of assignments [[dest reg, expr assigned]]
137
+ ops = []
138
+ # reg binding (reg => value, values.externals = regs at block start)
139
+ binding = {}
140
+ # Expr => CExpr
141
+ ce = lambda { |*e| dcmp.decompile_cexpr(Expression[Expression[*e].reduce], scope) }
142
+ # Expr => Expr.bind(binding) => CExpr
143
+ ceb = lambda { |*e| ce[Expression[*e].bind(binding)] }
144
+
145
+ # dumps a CExprs that implements an assignment to a reg (uses ops[], patches op => [reg, nil])
146
+ commit = lambda {
147
+ deps[b].map { |k|
148
+ [k, ops.rindex(ops.reverse.find { |r, v| r == k })]
149
+ }.sort_by { |k, i| i.to_i }.each { |k, i|
150
+ next if not i or not binding[k]
151
+ e = k
152
+ final = []
153
+ ops[0..i].reverse_each { |r, v|
154
+ final << r if not v
155
+ e = Expression[e].bind(r => v).reduce if not final.include? r
156
+ }
157
+ ops[i][1] = nil
158
+ binding.delete k
159
+ stmts << ce[k, :'=', e] if k != e
160
+ }
161
+ }
162
+
163
+ # returns an array to use as funcall arguments
164
+ get_func_args = lambda { |di, f|
165
+ # XXX see remarks in #finddeps
166
+ args_todo = f.type.args.to_a.dup
167
+ args = []
168
+ args_todo.each { |a_|
169
+ if r = a_.has_attribute_var('register')
170
+ args << Expression[r.to_sym]
171
+ else
172
+ args << Expression[0]
173
+ end
174
+ }
175
+ args.map { |e| ceb[e] }
176
+ }
177
+
178
+ # go !
179
+ dcmp.dasm.decoded[b].block.list.each_with_index { |di, didx|
180
+ if di.opcode.props[:setip] and not di.opcode.props[:stopexec]
181
+ # conditional jump
182
+ commit[]
183
+ n = dcmp.backtrace_target(get_xrefs_x(dcmp.dasm, di).first, di.address)
184
+ cc = ceb[:a, :'!=', 0]
185
+ # XXX switch/indirect/multiple jmp
186
+ stmts << C::If.new(C::CExpression[cc], C::Goto.new(n))
187
+ to.delete dcmp.dasm.normalize(n)
188
+ next
189
+ end
190
+
191
+ case di.instruction.opname
192
+ when 'ret'
193
+ commit[]
194
+ ret = nil
195
+ ret = C::CExpression[ceb[:a]] unless func.type.type.kind_of? C::BaseType and func.type.type.name == :void
196
+ stmts << C::Return.new(ret)
197
+ when 'fcall' # :saveip
198
+ n = dcmp.backtrace_target(get_xrefs_x(dcmp.dasm, di).first, di.address)
199
+ args = []
200
+ if f = dcmp.c_parser.toplevel.symbol[n] and f.type.kind_of? C::Function and f.type.args
201
+ args = get_func_args[di, f]
202
+ end
203
+ commit[]
204
+ #next if not di.block.to_subfuncret
205
+
206
+ if not n.kind_of? ::String or (f and not f.type.kind_of? C::Function)
207
+ # indirect funcall
208
+ fptr = ceb[n]
209
+ binding.delete n
210
+ proto = C::Function.new(C::BaseType.new(:int))
211
+ proto = f.type if f and f.type.kind_of? C::Function
212
+ f = C::CExpression[[fptr], C::Pointer.new(proto)]
213
+ elsif not f
214
+ # internal functions are predeclared, so this one is extern
215
+ f = C::Variable.new
216
+ f.name = n
217
+ f.type = C::Function.new(C::BaseType.new(:int))
218
+ if dcmp.recurse > 0
219
+ dcmp.c_parser.toplevel.symbol[n] = f
220
+ dcmp.c_parser.toplevel.statements << C::Declaration.new(f)
221
+ end
222
+ end
223
+ commit[]
224
+ binding.delete :a
225
+ e = C::CExpression[f, :funcall, args]
226
+ e = C::CExpression[ce[:a], :'=', e, f.type.type] if deps[b].include? :a and f.type.type != C::BaseType.new(:void)
227
+ stmts << e
228
+ when 'in', 'out'
229
+ if not dcmp.c_parser.toplevel.symbol["intrinsic_#{di.instruction.opname}"]
230
+ dcmp.c_parser.parse("void intrinsic_#{di.instruction.opname}(unsigned int len, unsigned int channel, char *buf);")
231
+ end
232
+ f = dcmp.c_parser.toplevel.symbol["intrinsic_#{di.instruction.opname}"]
233
+ stmts << C::CExpression.new(f, :funcall, [ceb[:a], ceb[:b], ceb[:c]], f.type.type)
234
+ else
235
+ bd = get_fwdemu_binding(di)
236
+ if di.backtrace_binding[:incomplete_binding]
237
+ commit[]
238
+ stmts << C::Asm.new(di.instruction.to_s, nil, nil, nil, nil, nil)
239
+ else
240
+ update = {}
241
+ bd.each { |k, v|
242
+ if k.kind_of? ::Symbol and not deps[b].include? k
243
+ ops << [k, v]
244
+ update[k] = Expression[Expression[v].bind(binding).reduce]
245
+ else
246
+ stmts << ceb[k, :'=', v]
247
+ stmts.pop if stmts.last.kind_of? C::Variable # [:eflag_s, :=, :unknown].reduce
248
+ end
249
+ }
250
+ binding.update update
251
+ end
252
+ end
253
+ }
254
+ commit[]
255
+
256
+ case to.length
257
+ when 0
258
+ if not myblocks.empty? and not %w[ret jmp].include? dcmp.dasm.decoded[b].block.list.last.instruction.opname
259
+ puts " block #{Expression[b]} has no to and don't end in ret"
260
+ end
261
+ when 1
262
+ if (myblocks.empty? ? nextaddr != to[0] : myblocks.first.first != to[0])
263
+ stmts << C::Goto.new(dcmp.dasm.auto_label_at(to[0], 'unknown_goto'))
264
+ end
265
+ else
266
+ puts " block #{Expression[b]} with multiple to"
267
+ end
268
+ end
269
+
270
+ # cleanup di.bt_binding (we set :frameptr etc in those, this may confuse the dasm)
271
+ blocks_toclean.each { |b_, to_|
272
+ dcmp.dasm.decoded[b_].block.list.each { |di|
273
+ di.backtrace_binding = nil
274
+ }
275
+ }
276
+ end
277
+
278
+ def decompile_check_abi(dcmp, entry, func)
279
+ a = func.type.args || []
280
+ a.delete_if { |arg| arg.has_attribute_var('register') and arg.has_attribute('unused') }
281
+ end
282
+ end
283
+ end