metasm 1.0.4 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +5 -5
  2. data/.gitignore +1 -0
  3. data/Gemfile +3 -3
  4. data/Rakefile +1 -1
  5. data/cortex.yaml +17 -0
  6. data/metasm/cpu/arm64/decode.rb +87 -11
  7. data/metasm/cpu/arm64/decompile.rb +142 -0
  8. data/metasm/cpu/arm64/opcodes.rb +53 -23
  9. data/metasm/cpu/arm64.rb +1 -0
  10. data/metasm/cpu/dwarf/debug.rb +39 -0
  11. data/metasm/cpu/dwarf/decode.rb +124 -0
  12. data/metasm/cpu/dwarf/decompile.rb +212 -0
  13. data/metasm/cpu/dwarf/encode.rb +49 -0
  14. data/metasm/cpu/dwarf/main.rb +37 -0
  15. data/metasm/cpu/dwarf/opcodes.rb +107 -0
  16. data/metasm/cpu/dwarf.rb +11 -0
  17. data/metasm/cpu/ia32/debug.rb +8 -0
  18. data/metasm/cpu/ia32/decode.rb +25 -1
  19. data/metasm/cpu/ia32/decompile.rb +205 -342
  20. data/metasm/cpu/mips/decode.rb +1 -1
  21. data/metasm/cpu/ppc/decode.rb +1 -1
  22. data/metasm/cpu/sh4/decode.rb +1 -1
  23. data/metasm/cpu/x86_64/decompile.rb +68 -0
  24. data/metasm/cpu/x86_64.rb +1 -0
  25. data/metasm/decode.rb +14 -0
  26. data/metasm/decompile.rb +51 -27
  27. data/metasm/disassemble.rb +24 -15
  28. data/metasm/dynldr.rb +23 -4
  29. data/metasm/encode.rb +11 -0
  30. data/metasm/exe_format/elf.rb +60 -2
  31. data/metasm/exe_format/elf_decode.rb +201 -6
  32. data/metasm/exe_format/shellcode.rb +39 -0
  33. data/metasm/gui/dasm_decomp.rb +1 -0
  34. data/metasm/os/emulator.rb +7 -0
  35. data/metasm/parse_c.rb +1 -1
  36. data/metasm.gemspec +1 -2
  37. data/metasm.rb +1 -1
  38. data/samples/disassemble-gui.rb +27 -11
  39. data/samples/disassemble.rb +9 -12
  40. data/samples/emudbg.rb +1 -1
  41. data/samples/factorize-headers-elfimports.rb +4 -1
  42. data/samples/lindebug.rb +16 -2
  43. data/tests/shellcode.rb +111 -0
  44. metadata +19 -102
  45. checksums.yaml.gz.sig +0 -3
  46. data.tar.gz.sig +0 -0
  47. metadata.gz.sig +0 -0
@@ -34,14 +34,14 @@ class Ia32
34
34
  patched_binding << blockstart
35
35
  dasm.address_binding[blockstart] = {}
36
36
  foo = dasm.backtrace(esp, blockstart, :snapshot_addr => funcstart)
37
- if foo.length == 1 and ee = foo.first and ee.kind_of? Expression and (ee == Expression[:frameptr] or
38
- (ee.lexpr == :frameptr and ee.op == :+ and ee.rexpr.kind_of? ::Integer))
37
+ if foo.length == 1 and ee = foo.first and ee.kind_of?(Expression) and (ee == Expression[:frameptr] or
38
+ (ee.lexpr == :frameptr and ee.op == :+ and ee.rexpr.kind_of?(::Integer)))
39
39
  dasm.address_binding[blockstart][esp] = ee
40
40
  end
41
41
  if ebp_frame
42
42
  foo = dasm.backtrace(ebp, blockstart, :snapshot_addr => funcstart)
43
- if foo.length == 1 and ee = foo.first and ee.kind_of? Expression and (ee == Expression[:frameptr] or
44
- (ee.lexpr == :frameptr and ee.op == :+ and ee.rexpr.kind_of? ::Integer))
43
+ if foo.length == 1 and ee = foo.first and ee.kind_of?(Expression) and (ee == Expression[:frameptr] or
44
+ (ee.lexpr == :frameptr and ee.op == :+ and ee.rexpr.kind_of?(::Integer)))
45
45
  dasm.address_binding[blockstart][ebp] = ee
46
46
  else
47
47
  ebp_frame = false # func does not use ebp as frame ptr, no need to bt for later blocks
@@ -59,143 +59,194 @@ class Ia32
59
59
 
60
60
  # add di-specific registry written/accessed
61
61
  def decompile_func_finddeps_di(dcmp, func, di, a, w)
62
- a << register_symbols[0] if di.opcode.name == 'ret' and (not func.type.kind_of? C::BaseType or func.type.type.name != :void) # standard ABI
62
+ a << register_symbols[0] if di.opcode.name == 'ret' and (not func.type.kind_of?(C::BaseType) or func.type.type.name != :void) # standard ABI
63
63
  end
64
64
 
65
65
  # list variable dependency for each block, remove useless writes
66
66
  # returns { blockaddr => [list of vars that are needed by a following block] }
67
67
  def decompile_func_finddeps(dcmp, blocks, func)
68
- deps_r = {} ; deps_w = {} ; deps_to = {}
69
- deps_subfunc = {} # things read/written by subfuncs
70
-
71
- # find read/writes by each block
72
- blocks.each { |b, to|
73
- deps_r[b] = [] ; deps_w[b] = [] ; deps_to[b] = to
74
- deps_subfunc[b] = []
75
-
76
- blk = dcmp.dasm.decoded[b].block
77
- blk.list.each { |di|
78
- a = di.backtrace_binding.values
79
- w = []
80
- di.backtrace_binding.keys.each { |k|
81
- case k
82
- when ::Symbol; w |= [k]
83
- else a |= Expression[k].externals # if dword [eax] <- 42, eax is read
84
- end
85
- }
86
- decompile_func_finddeps_di(dcmp, func, di, a, w)
87
-
88
- deps_r[b] |= a.map { |ee| Expression[ee].externals.grep(::Symbol) }.flatten - [:unknown] - deps_w[b]
89
- deps_w[b] |= w.map { |ee| Expression[ee].externals.grep(::Symbol) }.flatten - [:unknown]
90
- }
91
- stackoff = nil
92
- blk.each_to_normal { |t|
93
- t = dcmp.backtrace_target(t, blk.list.last.address)
94
- next if not t = dcmp.c_parser.toplevel.symbol[t]
95
- t.type = C::Function.new(C::BaseType.new(:int)) if not t.type.kind_of? C::Function # XXX this may seem a bit extreme, and yes, it is.
96
- stackoff ||= Expression[dcmp.dasm.backtrace(:esp, blk.list.last.address, :snapshot_addr => blocks.first[0]).first, :-, :esp].reduce
68
+ # TODO honor user-defined prototype if available (eg no, really, eax is not read in this function returning al)
69
+ #regargs.sort_by { |r| r.to_s }.each { |r|
70
+ #a = C::Variable.new(r.to_s, C::BaseType.new(:int, :unsigned))
71
+ #a.add_attribute("register(#{r})")
72
+ #func.type.args << a
73
+ #}
74
+ end
97
75
 
98
- # things that are needed by the subfunction
99
- if t.has_attribute('fastcall')
100
- a = t.type.args.to_a
101
- dep = [:ecx, :edx]
102
- dep.shift if not a[0] or a[0].has_attribute('unused')
103
- dep.pop if not a[1] or a[1].has_attribute('unused')
104
- deps_subfunc[b] |= dep
105
- end
106
- t.type.args.to_a.each { |arg|
107
- if reg = arg.has_attribute('register')
108
- deps_subfunc[b] |= [reg.to_sym]
109
- end
110
- }
111
- }
112
- if stackoff # last block instr == subfunction call
113
- deps_r[b] |= deps_subfunc[b] - deps_w[b]
114
- deps_w[b] |= register_symbols[0, 3] # standard ABI
76
+ # return the array of arguments (symbols, indirections wrt frameptr) to be used as arguments for decompilation of the function call in di
77
+ def decompile_get_func_args(dcmp, func_entry, di, f)
78
+ bt = dcmp.dasm.backtrace(:esp, di.address, :snapshot_addr => func_entry, :include_start => true)
79
+ stackoff = Expression[[bt, :+, @size/8], :-, :esp].bind(:esp => :frameptr).reduce rescue nil
80
+ args_todo = f.type.args.to_a.dup
81
+ args = []
82
+ if f.has_attribute('fastcall') # XXX DRY
83
+ if a = args_todo.shift
84
+ mask = (1 << (8*dcmp.c_parser.sizeof(a))) - 1
85
+ mask = 0 if a.has_attribute('unused')
86
+ args << Expression[:ecx, :&, mask]
87
+ end
88
+ if a = args_todo.shift
89
+ mask = (1 << (8*dcmp.c_parser.sizeof(a))) - 1 # char => dl
90
+ mask = 0 if a.has_attribute('unused')
91
+ args << Expression[:edx, :&, mask]
92
+ end
93
+ end
94
+ args_todo.each { |a_|
95
+ if r = a_.has_attribute_var('register')
96
+ args << Expression[r.to_sym]
97
+ elsif stackoff.kind_of?(Integer)
98
+ args << Indirection[[:frameptr, :+, stackoff], @size/8]
99
+ stackoff += [dcmp.sizeof(a_), @size/8].max
100
+ else
101
+ args << Expression[0]
115
102
  end
116
103
  }
117
104
 
118
-
119
- bt = blocks.transpose
120
- roots = bt[0] - bt[1].flatten # XXX jmp 1stblock ?
121
-
122
- # find regs read and never written (must have been set by caller and are part of the func ABI)
123
- uninitialized = lambda { |b, r, done|
124
- if not deps_r[b]
125
- elsif deps_r[b].include?(r)
126
- blk = dcmp.dasm.decoded[b].block
127
- bw = []
128
- rdi = blk.list.find { |di|
129
- a = di.backtrace_binding.values
130
- w = []
131
- di.backtrace_binding.keys.each { |k|
132
- case k
133
- when ::Symbol; w |= [k]
134
- else a |= Expression[k].externals # if dword [eax] <- 42, eax is read
135
- end
105
+ if f.type.varargs and f.type.args.last.type.pointer? and stackoff.kind_of?(Integer)
106
+ # check if last arg is a fmtstring
107
+ bt = dcmp.dasm.backtrace(args.last, di.address, :snapshot_addr => func_entry, :include_start => true)
108
+ if bt.length == 1 and s = dcmp.dasm.get_section_at(bt.first)
109
+ fmt = s[0].read(512)
110
+ fmt = fmt.unpack('v*').pack('C*') if dcmp.sizeof(f.type.args.last.type.untypedef.type) == 2
111
+ if fmt.index(?\0)
112
+ fmt = fmt[0...fmt.index(?\0)]
113
+ fmt.gsub('%%', '').count('%').times { # XXX %.*s etc..
114
+ args << Indirection[[:frameptr, :+, stackoff], @size/8]
115
+ stackoff += @size/8
136
116
  }
137
- decompile_func_finddeps_di(dcmp, func, di, a, w)
138
-
139
- next true if (a.map { |ee| Expression[ee].externals.grep(::Symbol) }.flatten - [:unknown] - bw).include? r
140
- bw |= w.map { |ee| Expression[ee].externals.grep(::Symbol) }.flatten - [:unknown]
141
- false
142
- }
143
- if r == register_symbols[0] and (rdi || blk.list.last).opcode.name == 'ret'
144
- func.type.type = C::BaseType.new(:void)
145
- false
146
- elsif rdi and rdi.backtrace_binding[r]
147
- false # mov al, 42 ; ret -> don't regarg eax
148
- else
149
- true
150
117
  end
151
- elsif deps_w[b].include?(r)
152
- else
153
- done << b
154
- (deps_to[b] - done).find { |tb| uninitialized[tb, r, done] }
155
118
  end
156
- }
119
+ end
157
120
 
158
- regargs = []
159
- register_symbols.each { |r|
160
- if roots.find { |root| uninitialized[root, r, []] }
161
- regargs << r
162
- end
163
- }
121
+ args
122
+ end
164
123
 
165
- # TODO honor user-defined prototype if available (eg no, really, eax is not read in this function returning al)
166
- regargs.sort_by { |r| r.to_s }.each { |r|
167
- a = C::Variable.new(r.to_s, C::BaseType.new(:int, :unsigned))
168
- a.add_attribute("register(#{r})")
169
- func.type.args << a
124
+ def decompile_special_instr(dcmp, di, stmts, scope)
125
+ di_addr = di.address
126
+ ce = lambda { |*e|
127
+ ret = dcmp.decompile_cexpr(Expression[Expression[*e].reduce], scope)
128
+ dcmp.walk_ce(ret) { |ee| ee.with_misc(:di_addr => di_addr) }
129
+ ret
170
130
  }
171
131
 
172
- # remove writes from a block if no following block read the value
173
- dw = {}
174
- deps_w.each { |b, deps|
175
- dw[b] = deps.reject { |dep|
176
- ret = true
177
- done = []
178
- todo = deps_to[b].dup
179
- while a = todo.pop
180
- next if done.include? a
181
- done << a
182
- if not deps_r[a] or deps_r[a].include? dep
183
- ret = false
184
- break
185
- elsif not deps_w[a].include? dep
186
- todo.concat deps_to[a]
187
- end
132
+ case di.opcode.name
133
+ when 'mov'
134
+ # mov cr0 etc
135
+ a1, a2 = di.instruction.args
136
+ case a1
137
+ when Ia32::CtrlReg, Ia32::DbgReg, Ia32::TstReg, Ia32::SegReg
138
+ sz = a1.kind_of?(Ia32::SegReg) ? 16 : 32
139
+ if not dcmp.c_parser.toplevel.symbol["intrinsic_set_#{a1}"]
140
+ dcmp.c_parser.parse("void intrinsic_set_#{a1}(__int#{sz});")
188
141
  end
189
- ret
190
- }
191
- }
142
+ f = dcmp.c_parser.toplevel.symbol["intrinsic_set_#{a1}"]
143
+ a2 = a2.symbolic(di)
144
+ a2 = [a2, :&, 0xffff] if sz == 16
145
+ stmts << ce[C::CExpression.new(f, :funcall, [ce[a2]], f.type.type)]
146
+ return true
147
+ end
148
+ case a2
149
+ when Ia32::CtrlReg, Ia32::DbgReg, Ia32::TstReg, Ia32::SegReg
150
+ if not dcmp.c_parser.toplevel.symbol["intrinsic_get_#{a2}"]
151
+ sz = a2.kind_of?(Ia32::SegReg) ? 16 : 32
152
+ dcmp.c_parser.parse("__int#{sz} intrinsic_get_#{a2}(void);")
153
+ end
154
+ f = dcmp.c_parser.toplevel.symbol["intrinsic_get_#{a2}"]
155
+ t = f.type.type
156
+ stmts << ce[a1.symbolic(di), :'=', C::CExpression.new(f, :funcall, [], t)]
157
+ return true
158
+ end
159
+ when 'lgdt'
160
+ if not dcmp.c_parser.toplevel.struct['segment_descriptor']
161
+ dcmp.c_parser.parse('struct segment_descriptor { __int16 limit; __int16 base0_16; __int8 base16_24; __int8 flags1; __int8 flags2_limit_16_20; __int8 base24_32; };')
162
+ dcmp.c_parser.parse('struct segment_table { __int16 size; struct segment_descriptor *table; } __attribute__((pack(2)));')
163
+ end
164
+ if not dcmp.c_parser.toplevel.symbol['intrinsic_lgdt']
165
+ dcmp.c_parser.parse('void intrinsic_lgdt(struct segment_table *);')
166
+ end
167
+ # need a way to transform arg => :frameptr+12
168
+ arg = di.backtrace_binding.keys.grep(Indirection).first.pointer
169
+ stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol['intrinsic_lgdt'], :funcall, [ce[arg]], C::BaseType.new(:void)).with_misc(:di_addr => di_addr)
170
+ return true
171
+ when 'lidt'
172
+ if not dcmp.c_parser.toplevel.struct['interrupt_descriptor']
173
+ dcmp.c_parser.parse('struct interrupt_descriptor { __int16 offset0_16; __int16 segment; __int16 flags; __int16 offset16_32; };')
174
+ dcmp.c_parser.parse('struct interrupt_table { __int16 size; struct interrupt_descriptor *table; } __attribute__((pack(2)));')
175
+ end
176
+ if not dcmp.c_parser.toplevel.symbol['intrinsic_lidt']
177
+ dcmp.c_parser.parse('void intrinsic_lidt(struct interrupt_table *);')
178
+ end
179
+ arg = di.backtrace_binding.keys.grep(Indirection).first.pointer
180
+ stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol['intrinsic_lidt'], :funcall, [ce[arg]], C::BaseType.new(:void)).with_misc(:di_addr => di_addr)
181
+ return true
182
+ when 'ltr', 'lldt'
183
+ if not dcmp.c_parser.toplevel.symbol["intrinsic_#{di.opcode.name}"]
184
+ dcmp.c_parser.parse("void intrinsic_#{di.opcode.name}(int);")
185
+ end
186
+ arg = di.backtrace_binding.keys.first
187
+ stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol["intrinsic_#{di.opcode.name}"], :funcall, [ce[arg]], C::BaseType.new(:void)).with_misc(:di_addr => di_addr)
188
+ return true
189
+ when 'out'
190
+ sz = di.instruction.args.find { |a_| a_.kind_of?(Ia32::Reg) and a_.val == 0 }.sz
191
+ if not dcmp.c_parser.toplevel.symbol["intrinsic_out#{sz}"]
192
+ dcmp.c_parser.parse("void intrinsic_out#{sz}(unsigned short port, __int#{sz} value);")
193
+ end
194
+ port = di.instruction.args.grep(Expression).first || edx
195
+ stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol["intrinsic_out#{sz}"], :funcall, [ce[port], ce[eax]], C::BaseType.new(:void)).with_misc(:di_addr => di_addr)
196
+ return true
197
+ when 'in'
198
+ sz = di.instruction.args.find { |a_| a_.kind_of?(Ia32::Reg) and a_.val == 0 }.sz
199
+ if not dcmp.c_parser.toplevel.symbol["intrinsic_in#{sz}"]
200
+ dcmp.c_parser.parse("__int#{sz} intrinsic_in#{sz}(unsigned short port);")
201
+ end
202
+ port = di.instruction.args.grep(Expression).first || edx
203
+ f = dcmp.c_parser.toplevel.symbol["intrinsic_in#{sz}"]
204
+ stmts << C::CExpression.new(ce[eax], :'=', C::CExpression.new(f, :funcall, [ce[port]], f.type.type), f.type.type).with_misc(:di_addr => di_addr)
205
+ return true
206
+ when 'sti', 'cli'
207
+ stmts << C::Asm.new(di.instruction.to_s, nil, [], [], nil, nil)
208
+ return true
209
+ when /^(mov|sto|lod)s([bwdq])/
210
+ op, sz = $1, $2
211
+ eax, ecx, edx, ebx, esp, ebp, esi, edi = register_symbols
212
+ eax, ecx, edx, ebx, esp, ebp, esi, edi = eax, ecx, edx, ebx, esp, ebp, esi, edi
213
+ sz = { 'b' => 1, 'w' => 2, 'd' => 4, 'q' => 8 }[sz]
214
+ pt = C::Pointer.new(C::BaseType.new("__int#{sz*8}".to_sym))
215
+
216
+ blk = C::Block.new(scope)
217
+ case op
218
+ when 'mov'
219
+ blk.statements << C::CExpression[[:*, [[ce[edi]], pt]], :'=', [:*, [[ce[esi]], pt]]].with_misc(:di_addr => di_addr)
220
+ blk.statements << C::CExpression[ce[edi], :'=', [ce[edi], :+, [sz]]].with_misc(:di_addr => di_addr)
221
+ blk.statements << C::CExpression[ce[esi], :'=', [ce[esi], :+, [sz]]].with_misc(:di_addr => di_addr)
222
+ when 'sto'
223
+ blk.statements << C::CExpression[[:*, [[ce[edi]], pt]], :'=', ce[eax]].with_misc(:di_addr => di_addr)
224
+ blk.statements << C::CExpression[ce[edi], :'=', [ce[edi], :+, [sz]]].with_misc(:di_addr => di_addr)
225
+ when 'lod'
226
+ blk.statements << C::CExpression[ce[eax], :'=', [:*, [[ce[esi]], pt]]].with_misc(:di_addr => di_addr)
227
+ blk.statements << C::CExpression[ce[esi], :'=', [ce[esi], :+, [sz]]].with_misc(:di_addr => di_addr)
228
+ #when 'sca'
229
+ #when 'cmp'
230
+ end
231
+
232
+ case (di.instruction.prefix || {})[:rep]
233
+ when nil
234
+ stmts.concat blk.statements
235
+ when 'rep'
236
+ blk.statements << C::CExpression[ce[ecx], :'=', [ce[ecx], :-, [1]]].with_misc(:di_addr => di_addr)
237
+ stmts << C::While.new(C::CExpression[ce[ecx]], blk).with_misc(:di_addr => di_addr)
238
+ #when 'repz' # sca/cmp only
239
+ #when 'repnz'
240
+ end
241
+ return true
242
+ end
192
243
 
193
- dw
244
+ false
194
245
  end
195
246
 
196
247
  def decompile_blocks(dcmp, myblocks, deps, func, nextaddr = nil)
197
- eax, ecx, edx, ebx, esp, ebp, esi, edi = register_symbols
198
- ebx, esp, ebp = ebx, esp, ebp # fix ruby unused var warning
248
+ eax, ecx, edx, _ = register_symbols
249
+ ebx, esp, ebp, esi, edi = ebx, esp, ebp # fix ruby unused var warning
199
250
  scope = func.initializer
200
251
  func.type.args.each { |a| scope.symbol[a.name] = a }
201
252
  stmts = scope.statements
@@ -208,86 +259,12 @@ class Ia32
208
259
  stmts << C::Label.new(l).with_misc(:di_addr => b)
209
260
  end
210
261
 
211
- # list of assignments [[dest reg, expr assigned]]
212
- ops = []
213
- # reg binding (reg => value, values.externals = regs at block start)
214
- binding = {}
215
262
  # Expr => CExpr
216
263
  ce = lambda { |*e|
217
264
  ret = dcmp.decompile_cexpr(Expression[Expression[*e].reduce], scope)
218
265
  dcmp.walk_ce(ret) { |ee| ee.with_misc(:di_addr => di_addr) } if di_addr
219
266
  ret
220
267
  }
221
- # Expr => Expr.bind(binding) => CExpr
222
- ceb = lambda { |*e| ce[Expression[*e].bind(binding)] }
223
-
224
- # dumps a CExprs that implements an assignment to a reg (uses ops[], patches op => [reg, nil])
225
- commit = lambda {
226
- deps[b].map { |k|
227
- [k, ops.rindex(ops.reverse.find { |r, v| r == k })]
228
- }.sort_by { |k, i| i.to_i }.each { |k, i|
229
- next if not i or not binding[k]
230
- e = k
231
- final = []
232
- ops[0..i].reverse_each { |r, v|
233
- final << r if not v
234
- e = Expression[e].bind(r => v).reduce if not final.include? r
235
- }
236
- ops[i][1] = nil
237
- binding.delete k
238
- stmts << ce[k, :'=', e] if k != e
239
- }
240
- }
241
-
242
- # returns an array to use as funcall arguments
243
- get_func_args = lambda { |di, f|
244
- # XXX see remarks in #finddeps
245
- # TODO x64
246
- bt = dcmp.dasm.backtrace(:esp, di.address, :snapshot_addr => func_entry, :include_start => true)
247
- stackoff = Expression[[bt, :+, @size/8], :-, :esp].bind(:esp => :frameptr).reduce rescue nil
248
- args_todo = f.type.args.to_a.dup
249
- args = []
250
- if f.has_attribute('fastcall') # XXX DRY
251
- if a = args_todo.shift
252
- mask = (1 << (8*dcmp.c_parser.sizeof(a))) - 1
253
- mask = 0 if a.has_attribute('unused')
254
- args << Expression[:ecx, :&, mask]
255
- end
256
- if a = args_todo.shift
257
- mask = (1 << (8*dcmp.c_parser.sizeof(a))) - 1 # char => dl
258
- mask = 0 if a.has_attribute('unused')
259
- args << Expression[:edx, :&, mask]
260
- end
261
- end
262
- args_todo.each { |a_|
263
- if r = a_.has_attribute_var('register')
264
- args << Expression[r.to_sym]
265
- elsif stackoff.kind_of? Integer
266
- args << Indirection[[:frameptr, :+, stackoff], @size/8]
267
- stackoff += [dcmp.sizeof(a_), @size/8].max
268
- else
269
- args << Expression[0]
270
- end
271
- }
272
-
273
- if f.type.varargs and f.type.args.last.type.pointer? and stackoff.kind_of? Integer
274
- # check if last arg is a fmtstring
275
- bt = dcmp.dasm.backtrace(args.last, di.address, :snapshot_addr => func_entry, :include_start => true)
276
- if bt.length == 1 and s = dcmp.dasm.get_section_at(bt.first)
277
- fmt = s[0].read(512)
278
- fmt = fmt.unpack('v*').pack('C*') if dcmp.sizeof(f.type.args.last.type.untypedef.type) == 2
279
- if fmt.index(?\0)
280
- fmt = fmt[0...fmt.index(?\0)]
281
- fmt.gsub('%%', '').count('%').times { # XXX %.*s etc..
282
- args << Indirection[[:frameptr, :+, stackoff], @size/8]
283
- stackoff += @size/8
284
- }
285
- end
286
- end
287
- end
288
-
289
- args.map { |e| ceb[e] }
290
- }
291
268
 
292
269
  # go !
293
270
  dcmp.dasm.decoded[b].block.list.each_with_index { |di, didx|
@@ -295,13 +272,13 @@ class Ia32
295
272
  a = di.instruction.args
296
273
  if di.opcode.props[:setip] and not di.opcode.props[:stopexec]
297
274
  # conditional jump
298
- commit[]
299
275
  n = dcmp.backtrace_target(get_xrefs_x(dcmp.dasm, di).first, di.address)
300
276
  if di.opcode.name =~ /^loop(.+)?/
301
- cx = C::CExpression[:'--', ceb[ecx]]
302
- cc = $1 ? C::CExpression[cx, :'&&', ceb[decode_cc_to_expr($1)]] : cx
277
+ loopcc = decode_cc_to_expr($1) if $1
278
+ cx = C::CExpression[:'--', ce[ecx]]
279
+ cc = loopcc ? C::CExpression[cx, :'&&', ce[loopcc]] : cx
303
280
  else
304
- cc = ceb[decode_cc_to_expr(di.opcode.name[1..-1])]
281
+ cc = ce[decode_cc_to_expr(di.opcode.name[1..-1])]
305
282
  end
306
283
  # XXX switch/indirect/multiple jmp
307
284
  stmts << C::If.new(C::CExpression[cc], C::Goto.new(n).with_misc(:di_addr => di_addr)).with_misc(:di_addr => di_addr)
@@ -309,61 +286,36 @@ class Ia32
309
286
  next
310
287
  end
311
288
 
312
- if di.opcode.name == 'mov'
313
- # mov cr0 etc
314
- a1, a2 = di.instruction.args
315
- case a1
316
- when Ia32::CtrlReg, Ia32::DbgReg, Ia32::TstReg, Ia32::SegReg
317
- sz = a1.kind_of?(Ia32::SegReg) ? 16 : 32
318
- if not dcmp.c_parser.toplevel.symbol["intrinsic_set_#{a1}"]
319
- dcmp.c_parser.parse("void intrinsic_set_#{a1}(__int#{sz});")
320
- end
321
- f = dcmp.c_parser.toplevel.symbol["intrinsic_set_#{a1}"]
322
- a2 = a2.symbolic(di)
323
- a2 = [a2, :&, 0xffff] if sz == 16
324
- stmts << C::CExpression.new(f, :funcall, [ceb[a2]], f.type.type).with_misc(:di_addr => di_addr)
325
- next
326
- end
327
- case a2
328
- when Ia32::CtrlReg, Ia32::DbgReg, Ia32::TstReg, Ia32::SegReg
329
- if not dcmp.c_parser.toplevel.symbol["intrinsic_get_#{a2}"]
330
- sz = a2.kind_of?(Ia32::SegReg) ? 16 : 32
331
- dcmp.c_parser.parse("__int#{sz} intrinsic_get_#{a2}(void);")
332
- end
333
- f = dcmp.c_parser.toplevel.symbol["intrinsic_get_#{a2}"]
334
- t = f.type.type
335
- binding.delete a1.symbolic(di)
336
- stmts << C::CExpression.new(ce[a1.symbolic(di)], :'=', C::CExpression.new(f, :funcall, [], t).with_misc(:di_addr => di_addr), t).with_misc(:di_addr => di_addr)
337
- next
338
- end
289
+ if decompile_special_instr(dcmp, di, stmts, scope)
290
+ next
339
291
  end
340
292
 
341
293
  case di.opcode.name
342
294
  when 'ret'
343
- commit[]
295
+ # TODO no to_normal => ret, else jmp
344
296
  ret = nil
345
- ret = C::CExpression[ceb[eax]] unless func.type.type.kind_of? C::BaseType and func.type.type.name == :void
297
+ ret = C::CExpression[ce[eax]] unless func.type.type.kind_of?(C::BaseType) and func.type.type.name == :void
346
298
  stmts << C::Return.new(ret).with_misc(:di_addr => di_addr)
347
299
  when 'call' # :saveip
300
+ # TODO check actual funcall VS call/pop
348
301
  n = dcmp.backtrace_target(get_xrefs_x(dcmp.dasm, di).first, di.address)
349
- args = []
350
- if f = dcmp.c_parser.toplevel.symbol[n] and f.type.kind_of? C::Function and f.type.args
351
- args = get_func_args[di, f]
352
- elsif defined? @dasm_func_default_off and o = @dasm_func_default_off[[dcmp.dasm, di.address]] and o.kind_of? Integer and o > @size/8
302
+ if f = dcmp.c_parser.toplevel.symbol[n] and f.type.kind_of?(C::Function) and f.type.args
303
+ elsif defined?(@dasm_func_default_off) and o = @dasm_func_default_off[[dcmp.dasm, di.address]] and o.kind_of?(::Integer) and o > @size/8
304
+ f = C::Variable.new
305
+ f.type = C::Function.new(C::BaseType.new(:int), [])
306
+ ((o/(@size/8))-1).times { f.type.args << C::Variable.new(nil, C::BaseType.new(:int)) }
307
+ else
353
308
  f = C::Variable.new
354
309
  f.type = C::Function.new(C::BaseType.new(:int), [])
355
- ((o/(@size/8))-1).times { f.type.args << C::Variable.new(nil,C::BaseType.new(:int)) }
356
- args = get_func_args[di, f]
357
310
  end
358
- commit[]
311
+ args = decompile_get_func_args(dcmp, func_entry, di, f).map { |arg| ce[arg] }
359
312
  #next if not di.block.to_subfuncret
360
313
 
361
- if not n.kind_of? ::String or (f and not f.type.kind_of? C::Function)
314
+ if not n.kind_of?(::String) or (f and not f.type.kind_of?(C::Function))
362
315
  # indirect funcall
363
- fptr = ceb[n]
364
- binding.delete n
316
+ fptr = ce[n]
365
317
  proto = C::Function.new(C::BaseType.new(:int))
366
- proto = f.type if f and f.type.kind_of? C::Function
318
+ proto = f.type if f and f.type.kind_of?(C::Function)
367
319
  f = C::CExpression[[fptr], C::Pointer.new(proto)]
368
320
  elsif not f
369
321
  # internal functions are predeclared, so this one is extern
@@ -375,17 +327,13 @@ class Ia32
375
327
  dcmp.c_parser.toplevel.statements << C::Declaration.new(f)
376
328
  end
377
329
  end
378
- commit[]
379
- binding.delete eax
380
330
  e = C::CExpression[f, :funcall, args].with_misc(:di_addr => di_addr)
381
- e = C::CExpression[ce[eax], :'=', e, f.type.type].with_misc(:di_addr => di_addr) if deps[b].include? eax and f.type.type != C::BaseType.new(:void)
331
+ e = C::CExpression[ce[eax], :'=', e, f.type.type].with_misc(:di_addr => di_addr) if f.type.type != C::BaseType.new(:void)
382
332
  stmts << e
383
333
  when 'jmp'
384
- #if di.comment.to_a.include? 'switch'
334
+ #if di.comment.to_a.include?('switch')
385
335
  # n = di.instruction.args.first.symbolic(di)
386
- # fptr = ceb[n]
387
- # binding.delete n
388
- # commit[]
336
+ # fptr = ce[n]
389
337
  # sw = C::Switch.new(fptr, C::Block.new(scope))
390
338
  # di.block.to_normal.to_a.each { |addr|
391
339
  # addr = dcmp.dasm.normalize addr
@@ -395,17 +343,15 @@ class Ia32
395
343
  # }
396
344
  # stmts << sw
397
345
  a = di.instruction.args.first
398
- if a.kind_of? Expression
399
- elsif not a.respond_to? :symbolic
346
+ if a.kind_of?(Expression)
347
+ elsif not a.respond_to?(:symbolic)
400
348
  stmts << C::Asm.new(di.instruction.to_s, nil, [], [], nil, nil).with_misc(:di_addr => di_addr)
401
349
  else
402
350
  n = di.instruction.args.first.symbolic(di)
403
- fptr = ceb[n]
404
- binding.delete n
405
- commit[]
406
- if fptr.kind_of? C::CExpression and fptr.type.pointer? and fptr.type.untypedef.type.kind_of? C::Function
351
+ fptr = ce[n]
352
+ if fptr.kind_of?(C::CExpression) and fptr.type.pointer? and fptr.type.untypedef.type.kind_of?(C::Function)
407
353
  proto = fptr.type.untypedef.type
408
- args = get_func_args[di, fptr.type]
354
+ args = decompile_get_func_args(dcmp, func_entry, di, fptr.type).map { |arg| ce[arg] }
409
355
  else
410
356
  proto = C::Function.new(C::BaseType.new(:void))
411
357
  fptr = C::CExpression[[fptr], C::Pointer.new(proto)].with_misc(:di_addr => di_addr)
@@ -415,110 +361,27 @@ class Ia32
415
361
  stmts << ret
416
362
  to = []
417
363
  end
418
- when 'lgdt'
419
- if not dcmp.c_parser.toplevel.struct['segment_descriptor']
420
- dcmp.c_parser.parse('struct segment_descriptor { __int16 limit; __int16 base0_16; __int8 base16_24; __int8 flags1; __int8 flags2_limit_16_20; __int8 base24_32; };')
421
- dcmp.c_parser.parse('struct segment_table { __int16 size; struct segment_descriptor *table; } __attribute__((pack(2)));')
422
- end
423
- if not dcmp.c_parser.toplevel.symbol['intrinsic_lgdt']
424
- dcmp.c_parser.parse('void intrinsic_lgdt(struct segment_table *);')
425
- end
426
- # need a way to transform arg => :frameptr+12
427
- arg = di.backtrace_binding.keys.grep(Indirection).first.pointer
428
- stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol['intrinsic_lgdt'], :funcall, [ceb[arg]], C::BaseType.new(:void)).with_misc(:di_addr => di_addr)
429
- when 'lidt'
430
- if not dcmp.c_parser.toplevel.struct['interrupt_descriptor']
431
- dcmp.c_parser.parse('struct interrupt_descriptor { __int16 offset0_16; __int16 segment; __int16 flags; __int16 offset16_32; };')
432
- dcmp.c_parser.parse('struct interrupt_table { __int16 size; struct interrupt_descriptor *table; } __attribute__((pack(2)));')
433
- end
434
- if not dcmp.c_parser.toplevel.symbol['intrinsic_lidt']
435
- dcmp.c_parser.parse('void intrinsic_lidt(struct interrupt_table *);')
436
- end
437
- arg = di.backtrace_binding.keys.grep(Indirection).first.pointer
438
- stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol['intrinsic_lidt'], :funcall, [ceb[arg]], C::BaseType.new(:void)).with_misc(:di_addr => di_addr)
439
- when 'ltr', 'lldt'
440
- if not dcmp.c_parser.toplevel.symbol["intrinsic_#{di.opcode.name}"]
441
- dcmp.c_parser.parse("void intrinsic_#{di.opcode.name}(int);")
442
- end
443
- arg = di.backtrace_binding.keys.first
444
- stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol["intrinsic_#{di.opcode.name}"], :funcall, [ceb[arg]], C::BaseType.new(:void)).with_misc(:di_addr => di_addr)
445
- when 'out'
446
- sz = di.instruction.args.find { |a_| a_.kind_of? Ia32::Reg and a_.val == 0 }.sz
447
- if not dcmp.c_parser.toplevel.symbol["intrinsic_out#{sz}"]
448
- dcmp.c_parser.parse("void intrinsic_out#{sz}(unsigned short port, __int#{sz} value);")
449
- end
450
- port = di.instruction.args.grep(Expression).first || edx
451
- stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol["intrinsic_out#{sz}"], :funcall, [ceb[port], ceb[eax]], C::BaseType.new(:void)).with_misc(:di_addr => di_addr)
452
- when 'in'
453
- sz = di.instruction.args.find { |a_| a_.kind_of? Ia32::Reg and a_.val == 0 }.sz
454
- if not dcmp.c_parser.toplevel.symbol["intrinsic_in#{sz}"]
455
- dcmp.c_parser.parse("__int#{sz} intrinsic_in#{sz}(unsigned short port);")
456
- end
457
- port = di.instruction.args.grep(Expression).first || edx
458
- f = dcmp.c_parser.toplevel.symbol["intrinsic_in#{sz}"]
459
- binding.delete eax
460
- stmts << C::CExpression.new(ce[eax], :'=', C::CExpression.new(f, :funcall, [ceb[port]], f.type.type), f.type.type).with_misc(:di_addr => di_addr)
461
- when 'sti', 'cli'
462
- stmts << C::Asm.new(di.instruction.to_s, nil, [], [], nil, nil)
463
- when /^(mov|sto|lod)s([bwdq])/
464
- op, sz = $1, $2
465
- commit[]
466
- sz = { 'b' => 1, 'w' => 2, 'd' => 4, 'q' => 8 }[sz]
467
- pt = C::Pointer.new(C::BaseType.new("__int#{sz*8}".to_sym))
468
-
469
- blk = C::Block.new(scope)
470
- case op
471
- when 'mov'
472
- blk.statements << C::CExpression[[:*, [[ceb[edi]], pt]], :'=', [:*, [[ceb[esi]], pt]]].with_misc(:di_addr => di_addr)
473
- blk.statements << C::CExpression[ceb[edi], :'=', [ceb[edi], :+, [sz]]].with_misc(:di_addr => di_addr)
474
- blk.statements << C::CExpression[ceb[esi], :'=', [ceb[esi], :+, [sz]]].with_misc(:di_addr => di_addr)
475
- when 'sto'
476
- blk.statements << C::CExpression[[:*, [[ceb[edi]], pt]], :'=', ceb[eax]].with_misc(:di_addr => di_addr)
477
- blk.statements << C::CExpression[ceb[edi], :'=', [ceb[edi], :+, [sz]]].with_misc(:di_addr => di_addr)
478
- when 'lod'
479
- blk.statements << C::CExpression[ceb[eax], :'=', [:*, [[ceb[esi]], pt]]].with_misc(:di_addr => di_addr)
480
- blk.statements << C::CExpression[ceb[esi], :'=', [ceb[esi], :+, [sz]]].with_misc(:di_addr => di_addr)
481
- #when 'sca'
482
- #when 'cmp'
483
- end
484
-
485
- case (di.instruction.prefix || {})[:rep]
486
- when nil
487
- stmts.concat blk.statements
488
- when 'rep'
489
- blk.statements << C::CExpression[ceb[ecx], :'=', [ceb[ecx], :-, [1]]].with_misc(:di_addr => di_addr)
490
- stmts << C::While.new(C::CExpression[ceb[ecx]], blk).with_misc(:di_addr => di_addr)
491
- #when 'repz' # sca/cmp only
492
- #when 'repnz'
493
- end
494
- next
495
364
  else
496
365
  bd = get_fwdemu_binding(di)
497
366
  if di.backtrace_binding[:incomplete_binding]
498
- commit[]
499
367
  stmts << C::Asm.new(di.instruction.to_s, nil, nil, nil, nil, nil).with_misc(:di_addr => di_addr)
500
368
  else
501
369
  update = {}
502
370
  bd.each { |k, v|
503
- if k.kind_of? ::Symbol and not deps[b].include? k
504
- ops << [k, v]
505
- update[k] = Expression[Expression[v].bind(binding).reduce]
506
- else
507
- stmts << ceb[k, :'=', v]
508
- stmts.pop if stmts.last.kind_of? C::Variable # [:eflag_s, :=, :unknown].reduce
509
- end
371
+ stmts << ce[k, :'=', v]
372
+ stmts.pop if stmts.last.kind_of?(C::Variable) # [:eflag_s, :=, :unknown].reduce
510
373
  }
511
- binding.update update
512
374
  end
513
375
  end
514
376
  di_addr = nil
515
377
  }
516
- commit[]
517
378
 
518
379
  case to.length
519
380
  when 0
520
- if not myblocks.empty? and not %w[ret jmp].include? dcmp.dasm.decoded[b].block.list.last.instruction.opname
521
- puts " block #{Expression[b]} has no to and don't end in ret"
381
+ if not myblocks.empty? and not %w[ret jmp].include?(dcmp.dasm.decoded[b].block.list.last.instruction.opname)
382
+ if not stmts.last.kind_of?(C::CExpression) or stmts.last.op != :funcall or not stmts.last.lexpr.has_attribute('noreturn')
383
+ puts " block #{Expression[b]} has no to and don't end in ret"
384
+ end
522
385
  end
523
386
  when 1
524
387
  if (myblocks.empty? ? nextaddr != to[0] : myblocks.first.first != to[0])
@@ -554,7 +417,7 @@ class Ia32
554
417
  #func.add_attribute 'noreturn'
555
418
  else
556
419
  adj = f.return_address.map { |ra_| dcmp.dasm.backtrace(:esp, ra_, :include_start => true, :stopaddr => entry) }.flatten.uniq
557
- if adj.length == 1 and so = Expression[adj.first, :-, :esp].reduce and so.kind_of? ::Integer
420
+ if adj.length == 1 and so = Expression[adj.first, :-, :esp].reduce and so.kind_of?(::Integer)
558
421
  argsz = a.map { |fa|
559
422
  next if not fa.stackoff
560
423
  (fa.stackoff + [dcmp.sizeof(fa), dcmp.c_parser.typesize[:ptr]].max-1) / dcmp.c_parser.typesize[:ptr]