metasm 1.0.4 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +5 -5
  2. data/.gitignore +1 -0
  3. data/Gemfile +3 -3
  4. data/Rakefile +1 -1
  5. data/cortex.yaml +17 -0
  6. data/metasm/cpu/arm64/decode.rb +87 -11
  7. data/metasm/cpu/arm64/decompile.rb +142 -0
  8. data/metasm/cpu/arm64/opcodes.rb +53 -23
  9. data/metasm/cpu/arm64.rb +1 -0
  10. data/metasm/cpu/dwarf/debug.rb +39 -0
  11. data/metasm/cpu/dwarf/decode.rb +124 -0
  12. data/metasm/cpu/dwarf/decompile.rb +212 -0
  13. data/metasm/cpu/dwarf/encode.rb +49 -0
  14. data/metasm/cpu/dwarf/main.rb +37 -0
  15. data/metasm/cpu/dwarf/opcodes.rb +107 -0
  16. data/metasm/cpu/dwarf.rb +11 -0
  17. data/metasm/cpu/ia32/debug.rb +8 -0
  18. data/metasm/cpu/ia32/decode.rb +25 -1
  19. data/metasm/cpu/ia32/decompile.rb +205 -342
  20. data/metasm/cpu/mips/decode.rb +1 -1
  21. data/metasm/cpu/ppc/decode.rb +1 -1
  22. data/metasm/cpu/sh4/decode.rb +1 -1
  23. data/metasm/cpu/x86_64/decompile.rb +68 -0
  24. data/metasm/cpu/x86_64.rb +1 -0
  25. data/metasm/decode.rb +14 -0
  26. data/metasm/decompile.rb +51 -27
  27. data/metasm/disassemble.rb +24 -15
  28. data/metasm/dynldr.rb +23 -4
  29. data/metasm/encode.rb +11 -0
  30. data/metasm/exe_format/elf.rb +60 -2
  31. data/metasm/exe_format/elf_decode.rb +201 -6
  32. data/metasm/exe_format/shellcode.rb +39 -0
  33. data/metasm/gui/dasm_decomp.rb +1 -0
  34. data/metasm/os/emulator.rb +7 -0
  35. data/metasm/parse_c.rb +1 -1
  36. data/metasm.gemspec +1 -2
  37. data/metasm.rb +1 -1
  38. data/samples/disassemble-gui.rb +27 -11
  39. data/samples/disassemble.rb +9 -12
  40. data/samples/emudbg.rb +1 -1
  41. data/samples/factorize-headers-elfimports.rb +4 -1
  42. data/samples/lindebug.rb +16 -2
  43. data/tests/shellcode.rb +111 -0
  44. metadata +19 -102
  45. checksums.yaml.gz.sig +0 -3
  46. data.tar.gz.sig +0 -0
  47. metadata.gz.sig +0 -0
@@ -0,0 +1,212 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/cpu/dwarf/main'
8
+
9
+ module Metasm
10
+ class Dwarf
11
+ def decompile_makestackvars(dasm, funcstart, blocks)
12
+ oldbd = {}
13
+ blocks.each { |block|
14
+ oldbd[block.address] = dasm.address_binding[block.address]
15
+ dasm.address_binding[block.address] = {}
16
+ }
17
+
18
+ dasm.address_binding[funcstart][:opstack] = Expression[:frameptr]
19
+ blocks.each { |block|
20
+ # cache the value of opstack wrt :frameptr at each block entry
21
+ if not stkoff = dasm.address_binding[block.address][:opstack]
22
+ stkoff = dasm.backtrace(:opstack, block.address, :snapshot_addr => funcstart)
23
+ # conserve the minimum offset in case of conflicts
24
+ stkoff = Expression[:frameptr] + stkoff.map { |so| so - :frameptr }.min
25
+ end
26
+ dasm.address_binding[block.address][:opstack] = stkoff
27
+ block.list.first.misc ||= {}
28
+ block.list.first.misc[:opstack_before] = stkoff
29
+
30
+ # compute the value at the end of the block and propagate as start value for next blocks
31
+ # allows coherent tracing along all paths if blocks are walked in code order, even with loops with stack leak/consume
32
+ last_di = block.list.last
33
+ stkoff = dasm.backtrace(:opstack, last_di.address, :snapshot_addr => funcstart, :include_start => true).first
34
+ last_di.misc ||= {}
35
+ last_di.misc[:opstack_after] = stkoff
36
+ block.each_to_normal { |at|
37
+ dasm.address_binding[at][:opstack] ||= stkoff if dasm.address_binding[at]
38
+ }
39
+
40
+ yield block
41
+ }
42
+ oldbd.each { |a, b| b ? dasm.address_binding[a] = b : dasm.address_binding.delete(a) }
43
+ end
44
+
45
+ def decompile_func_finddeps_di(dcmp, func, di, a, w)
46
+ end
47
+
48
+ def decompile_func_finddeps(dcmp, blocks, func)
49
+ {}
50
+ end
51
+
52
+ def decompile_blocks(dcmp, myblocks, deps, func, nextaddr = nil)
53
+ scope = func.initializer
54
+ stmts = scope.statements
55
+
56
+ # TODO handle loops pushing/poping the stack (:opstack_before loop.first != :opstack_after loop.last)
57
+
58
+ # opstack offset => current C variable
59
+ opstack = {}
60
+
61
+ # *(_int32*)(local_base+16) => 16
62
+ ce_ptr_offset = lambda { |ee, base|
63
+ if ee.kind_of?(C::CExpression) and ee.op == :* and not ee.lexpr and ee.rexpr.kind_of?(C::CExpression) and
64
+ not ee.rexpr.op and ee.rexpr.rexpr.kind_of?(C::CExpression)
65
+ if not ee.rexpr.rexpr.op and ee.rexpr.rexpr.rexpr.kind_of?(C::Variable) and ee.rexpr.rexpr.rexpr.name == base
66
+ 0
67
+ elsif ee.rexpr.rexpr.lexpr.kind_of?(C::Variable) and ee.rexpr.rexpr.lexpr.name == base and
68
+ ee.rexpr.rexpr.rexpr.kind_of?(C::CExpression) and not ee.rexpr.rexpr.rexpr.op and ee.rexpr.rexpr.rexpr.rexpr.kind_of?(::Integer)
69
+ if ee.rexpr.rexpr.op == :+
70
+ ee.rexpr.rexpr.rexpr.rexpr
71
+ elsif ee.rexpr.rexpr.op == :-
72
+ -ee.rexpr.rexpr.rexpr.rexpr
73
+ end
74
+ end
75
+ end
76
+ }
77
+ ce_opstack_offset = lambda { |ee| ce_ptr_offset[ee, 'frameptr'] }
78
+
79
+ basetype = C::BaseType.new("__int#@size".to_sym)
80
+ new_opstack_var = lambda { |off|
81
+ varname = "loc_#{off}"
82
+ ne = C::Variable.new(varname, basetype)
83
+ scope.symbol[varname] = ne
84
+ stmts << C::Declaration.new(ne)
85
+ ne
86
+ }
87
+ get_opstack_var = lambda { |off|
88
+ opstack[off] ||= new_opstack_var[off]
89
+ }
90
+
91
+ di_addr = nil
92
+
93
+ # Expr => CExpr
94
+ ce = lambda { |*e|
95
+ c_expr = dcmp.decompile_cexpr(Expression[Expression[*e].reduce], scope)
96
+ dcmp.walk_ce(c_expr, true) { |ee|
97
+ if soff = ce_opstack_offset[ee.rexpr]
98
+ # must do soff.rexpr before lexpr in case of reaffectation !
99
+ ee.rexpr = get_opstack_var[soff/8]
100
+ ee.rexpr = C::CExpression[ee.rexpr] if not ee.op and ee.type.pointer?
101
+ end
102
+ if soff = ce_opstack_offset[ee.lexpr]
103
+ ee.lexpr = get_opstack_var[soff/8]
104
+ end
105
+ }
106
+ ret = if soff = ce_opstack_offset[c_expr]
107
+ C::CExpression[get_opstack_var[soff/8]]
108
+ else
109
+ c_expr
110
+ end
111
+ dcmp.walk_ce(ret) { |ee| ee.with_misc :di_addr => di_addr } if di_addr
112
+ ret
113
+ }
114
+
115
+
116
+ blocks_toclean = myblocks.dup
117
+ until myblocks.empty?
118
+ b, to = myblocks.shift
119
+ if l = dcmp.dasm.get_label_at(b)
120
+ stmts << C::Label.new(l).with_misc(:di_addr => b)
121
+ end
122
+
123
+ # go !
124
+ di_list = dcmp.dasm.decoded[b].block.list.dup
125
+ di_list.each { |di|
126
+ di_addr = di.address
127
+ bd = get_fwdemu_binding(di)
128
+ case di.opcode.name
129
+ when 'bra'
130
+ n = dcmp.backtrace_target(get_xrefs_x(dcmp.dasm, di).first, di.address)
131
+ stacktop = ce[get_opstack_var[(bd[:opstack] - :frameptr + 8) / 8]]
132
+ stmts << C::If.new(C::CExpression[stacktop], C::Goto.new(n).with_misc(:di_addr => di.address)).with_misc(:di_addr => di.address)
133
+ stmts << ce[stacktop, :'=', 0]
134
+ # XXX does not assign cond = 0 in if as decompiler may expect only single goto in if body
135
+ to.delete dcmp.dasm.normalize(n)
136
+ when 'swap'
137
+ offs = []
138
+ bd.each { |k, v|
139
+ cvar = dcmp.decompile_cexpr(Expression[Expression[k].reduce], scope)
140
+ if soff = ce_opstack_offset[cvar]
141
+ offs << (soff/8)
142
+ end
143
+ }
144
+ off = offs.min
145
+ stmts << ce[get_opstack_var[:tmp], :'=', get_opstack_var[off]]
146
+ stmts << ce[get_opstack_var[off], :'=', get_opstack_var[off+1]]
147
+ stmts << ce[get_opstack_var[off+1], :'=', get_opstack_var[:tmp]]
148
+ stmts << ce[get_opstack_var[:tmp], :'=', 0]
149
+ when 'rot'
150
+ offs = []
151
+ bd.each { |k, v|
152
+ cvar = dcmp.decompile_cexpr(Expression[Expression[k].reduce], scope)
153
+ if soff = ce_opstack_offset[cvar]
154
+ offs << (soff/8)
155
+ end
156
+ }
157
+
158
+ off = offs.min
159
+ stmts << ce[get_opstack_var[:tmp], :'=', get_opstack_var[off]]
160
+ stmts << ce[get_opstack_var[off], :'=', get_opstack_var[off+2]]
161
+ stmts << ce[get_opstack_var[off+2], :'=', get_opstack_var[off+1]]
162
+ stmts << ce[get_opstack_var[off+1], :'=', get_opstack_var[:tmp]]
163
+ stmts << ce[get_opstack_var[:tmp], :'=', 0]
164
+ else
165
+ if di.backtrace_binding[:incomplete_binding]
166
+ stmts << C::Asm.new(di.instruction.to_s, nil, nil, nil, nil, nil).with_misc(:di_addr => di.address)
167
+ else
168
+ bd.each { |k, v|
169
+ next if k == :opstack
170
+ e = ce[k, :'=', v]
171
+ stmts << e if not e.kind_of?(C::Variable) # [:eflag_s, :=, :unknown].reduce
172
+ }
173
+ rawbd = dcmp.disassembler.cpu.get_backtrace_binding(di)
174
+ if rawbd[:opstack] == Expression[:opstack, :-, 8] and (bd[:opstack] - :frameptr).kind_of?(::Integer)
175
+ stacktop = ce[get_opstack_var[(bd[:opstack] - :frameptr + 8) / 8]]
176
+ stmts << ce[stacktop, :'=', 0]
177
+ end
178
+ end
179
+ end
180
+ di_addr = nil
181
+ }
182
+
183
+ case to.length
184
+ when 0
185
+ if not myblocks.empty? and not stmts.last.kind_of?(C::Return)
186
+ puts " block #{Expression[b]} has no to and don't end in ret"
187
+ end
188
+ when 1
189
+ if (myblocks.empty? ? nextaddr != to[0] : myblocks.first.first != to[0])
190
+ if dcmp.dasm.decoded[to[0]]
191
+ stmts << C::Goto.new(dcmp.dasm.auto_label_at(to[0], 'unknown_goto'))
192
+ else
193
+ stmts << C::Return.new(C::CExpression[ce[get_opstack_var[(di_list.last.misc[:opstack_after] - :frameptr)/8]]])
194
+ end
195
+ end
196
+ else
197
+ puts " block #{Expression[b]} with multiple to"
198
+ end
199
+ end
200
+
201
+ # cleanup di.bt_binding (we set :frameptr etc in those, this may confuse the dasm)
202
+ blocks_toclean.each { |b_, to_|
203
+ dcmp.dasm.decoded[b_].block.list.each { |di|
204
+ di.backtrace_binding = nil
205
+ }
206
+ }
207
+ end
208
+
209
+ def decompile_check_abi(dcmp, entry, func)
210
+ end
211
+ end
212
+ end
@@ -0,0 +1,49 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/cpu/dwarf/opcodes'
8
+ require 'metasm/parse'
9
+
10
+ module Metasm
11
+ class Dwarf
12
+ def parse_argument(lexer)
13
+ lexer = AsmPreprocessor.new(lexer) if lexer.kind_of? String
14
+ lexer.skip_space
15
+ return if not tok = lexer.readtok
16
+
17
+ if tok.raw =~ /^r(\d+)/
18
+ Reg.new($1.to_i)
19
+ else
20
+ lexer.unreadtok tok
21
+ expr = Expression.parse(lexer)
22
+ lexer.skip_space
23
+ expr
24
+ end
25
+ end
26
+
27
+ def parse_arg_valid?(o, spec, arg)
28
+ # TODO check :reg and :imm spec, :uXX range
29
+ spec and arg
30
+ end
31
+
32
+
33
+ def encode_instr_op(program, i, op)
34
+ ed = EncodedData.new([op.bin].pack('C*'))
35
+ op.args.zip(i.args).each { |oa, ia|
36
+ case oa
37
+ when :reg
38
+ when :imm; raise "TODO encode imm"
39
+ when :i8, :u8, :i16, :u16, :i32, :u32, :i64, :u64; ed << ia.encode(oa, @endianness)
40
+ when :addr; ed << ia.encode("u#@size".to_sym, @endianness)
41
+ when :uleb; ed << ia.encode_leb(false)
42
+ when :sleb; ed << ia.encode_leb(true)
43
+ else raise "TODO encode op #{oa} #{ia}"
44
+ end
45
+ }
46
+ ed
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,37 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2010 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+ require 'metasm/main'
7
+
8
+ module Metasm
9
+ class Dwarf < CPU
10
+ def initialize(*args)
11
+ super()
12
+ @size = args.grep(Integer).first || 64
13
+ @endianness = args.delete(:little) || args.delete(:big) || :little
14
+ end
15
+
16
+ class Reg
17
+ attr_accessor :i
18
+
19
+ def initialize(i)
20
+ @i = i
21
+ end
22
+
23
+ def symbolic(di=nil)
24
+ "r#@i".to_sym
25
+ end
26
+
27
+ include Renderable
28
+ def render
29
+ ["r#@i"]
30
+ end
31
+ end
32
+
33
+ def init_opcode_list
34
+ init
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,107 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2010 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+ require 'metasm/cpu/dwarf/main'
7
+
8
+ module Metasm
9
+ class Dwarf
10
+ def addop(name, bin, *args)
11
+ o = Opcode.new name, bin
12
+
13
+ args.each { |a|
14
+ if @valid_props[a]
15
+ o.props[a] = true
16
+ elsif a.kind_of?(::Symbol)
17
+ o.args << a
18
+ elsif a.kind_of?(::Hash)
19
+ o.props.update a
20
+ else
21
+ raise "Internal error #{a.inspect}"
22
+ end
23
+ }
24
+
25
+ @opcode_list << o
26
+ end
27
+
28
+ def addop_20(name, bin, *args)
29
+ 0x20.times { |i|
30
+ addop name, bin+i, :imm, *args, :imm => i
31
+ }
32
+ end
33
+
34
+ def init
35
+ @opcode_list = []
36
+ @valid_props = { :setip => true, :stopexec => true }
37
+
38
+ addop 'addr', 0x03, :addr
39
+
40
+ addop 'lit', 0x08, :u8
41
+ addop 'lit', 0x09, :i8
42
+ addop 'lit', 0x0A, :u16
43
+ addop 'lit', 0x0B, :i16
44
+ addop 'lit', 0x0C, :u32
45
+ addop 'lit', 0x0D, :i32
46
+ addop 'lit', 0x0E, :u64
47
+ addop 'lit', 0x0F, :i64
48
+ addop 'lit', 0x10, :uleb
49
+ addop 'lit', 0x11, :sleb
50
+ addop 'dup', 0x12
51
+ addop 'drop', 0x13
52
+ addop 'over', 0x14
53
+ addop 'pick', 0x15, :u8
54
+ addop 'swap', 0x16
55
+ addop 'rot', 0x17
56
+ addop 'xderef', 0x18
57
+
58
+ # push(op(pop()))
59
+ addop 'deref', 0x06
60
+ addop 'abs', 0x19
61
+ addop 'neg', 0x1F
62
+ addop 'not', 0x20
63
+ addop 'add_u', 0x23, :uleb # real name plus_u
64
+ addop 'plus_u', 0x23, :uleb
65
+ addop 'deref_size', 0x94, :u8
66
+
67
+ # push(op(stk[-2], stk[-1]))) pop args
68
+ addop 'and', 0x1A
69
+ addop 'div', 0x1B
70
+ addop 'sub', 0x1C # real name is 'minus'
71
+ addop 'minus', 0x1C
72
+ addop 'mod', 0x1D
73
+ addop 'mul', 0x1E
74
+ addop 'or', 0x21
75
+ addop 'add', 0x22
76
+ addop 'plus', 0x22
77
+ addop 'shl', 0x24
78
+ addop 'shr', 0x25
79
+ addop 'shra', 0x26
80
+ addop 'xor', 0x27
81
+
82
+ addop 'bra', 0x28, :i16, :setip # branch if top of stack not null
83
+
84
+ # pop(op(stk[-1], stk[-2])) pop args
85
+ addop 'eq', 0x29
86
+ addop 'ge', 0x2A
87
+ addop 'gt', 0x2B
88
+ addop 'le', 0x2C
89
+ addop 'lt', 0x2D
90
+ addop 'ne', 0x2E
91
+
92
+ addop 'skip', 0x2F, :i16, :setip, :stopexec
93
+
94
+ addop_20 'lit', 0x30
95
+ addop_20 'reg', 0x50, :reg
96
+ addop_20 'breg', 0x70, :sleb, :reg
97
+
98
+ addop 'reg', 0x90, :uleb, :reg
99
+ addop 'fbreg', 0x91, :uleb
100
+ addop 'breg', 0x92, :uleb, :sleb, :reg
101
+ addop 'piece', 0x93
102
+ addop 'xderef_size', 0x95
103
+ addop 'nop', 0x96
104
+ addop 'addr', 0xF1, :gnu # GNU_encoded_addr
105
+ end
106
+ end
107
+ end
@@ -0,0 +1,11 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/main'
8
+ require 'metasm/cpu/dwarf/decode'
9
+ require 'metasm/cpu/dwarf/encode'
10
+ require 'metasm/cpu/dwarf/decompile'
11
+ require 'metasm/cpu/dwarf/debug'
@@ -227,5 +227,13 @@ class Ia32
227
227
  fbd[pc_reg] = di.next_addr
228
228
  end
229
229
  end
230
+
231
+ def initialize_emudbg(dbg)
232
+ stack = EncodedData.new("\x00" * 0x10000)
233
+ stack_addr = 0x10000
234
+ stack_addr += 0x10000 while dbg.disassembler.get_section_at(stack_addr)
235
+ dbg.disassembler.add_section(stack, stack_addr)
236
+ dbg.set_reg_value(dbg_register_list[7], stack_addr + 0xf000)
237
+ end
230
238
  end
231
239
  end
@@ -542,7 +542,31 @@ class Ia32
542
542
  Expression[edx, :&, m] => Expression[[e, :>>, opsz(di)], :&, m] }
543
543
  end
544
544
  }
545
- when 'div', 'idiv'; lambda { |di, *a| { eax => Expression::Unknown, edx => Expression::Unknown, :incomplete_binding => Expression[1] } }
545
+ when 'div', 'idiv'; lambda { |di, a0|
546
+ # TODO idiv => signed
547
+ case opsz(di)
548
+ when 8
549
+ src = Expression[eax, :&, 0xffff]
550
+ quot = Expression[[src, :/, a0], :&, 0xff]
551
+ rem = Expression[[src, :%, a0], :&, 0xff]
552
+ { Expression[eax, :&, 0xffff] => Expression[quot, :|, [rem, :<<, 8]] }
553
+ when 16
554
+ src = Expression[[eax, :&, 0xffff], :|, [[edx, :&, 0xffff], :<<, 16]]
555
+ quot = Expression[[src, :/, a0], :&, 0xffff]
556
+ rem = Expression[[src, :%, a0], :&, 0xffff]
557
+ { Expression[eax, :&, 0xffff] => quot, Expression[edx, :&, 0xffff] => rem }
558
+ when 32
559
+ src = Expression[[eax, :&, 0xffffffff], :|, [[edx, :&, 0xffffffff], :<<, 32]]
560
+ quot = Expression[[src, :/, a0], :&, 0xffffffff]
561
+ rem = Expression[[src, :%, a0], :&, 0xffffffff]
562
+ { Expression[eax, :&, 0xffffffff] => quot, Expression[edx, :&, 0xffffffff] => rem }
563
+ when 64
564
+ src = Expression[eax, :|, [edx, :<<, 64]]
565
+ quot = Expression[src, :/, a0]
566
+ rem = Expression[src, :%, a0]
567
+ { eax => quot, edx => rem }
568
+ end
569
+ }
546
570
  when 'rdtsc'; lambda { |di| { eax => Expression::Unknown, edx => Expression::Unknown, :incomplete_binding => Expression[1] } }
547
571
  when /^(stos|movs|lods|scas|cmps)[bwdq]$/
548
572
  lambda { |di, *a|