metasm 1.0.4 → 1.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.gitignore +1 -0
- data/Gemfile +3 -3
- data/Rakefile +1 -1
- data/cortex.yaml +17 -0
- data/metasm/cpu/arm64/decode.rb +87 -11
- data/metasm/cpu/arm64/decompile.rb +142 -0
- data/metasm/cpu/arm64/opcodes.rb +53 -23
- data/metasm/cpu/arm64.rb +1 -0
- data/metasm/cpu/dwarf/debug.rb +39 -0
- data/metasm/cpu/dwarf/decode.rb +124 -0
- data/metasm/cpu/dwarf/decompile.rb +212 -0
- data/metasm/cpu/dwarf/encode.rb +49 -0
- data/metasm/cpu/dwarf/main.rb +37 -0
- data/metasm/cpu/dwarf/opcodes.rb +107 -0
- data/metasm/cpu/dwarf.rb +11 -0
- data/metasm/cpu/ia32/debug.rb +8 -0
- data/metasm/cpu/ia32/decode.rb +25 -1
- data/metasm/cpu/ia32/decompile.rb +205 -342
- data/metasm/cpu/mips/decode.rb +1 -1
- data/metasm/cpu/ppc/decode.rb +1 -1
- data/metasm/cpu/sh4/decode.rb +1 -1
- data/metasm/cpu/x86_64/decompile.rb +68 -0
- data/metasm/cpu/x86_64.rb +1 -0
- data/metasm/decode.rb +14 -0
- data/metasm/decompile.rb +51 -27
- data/metasm/disassemble.rb +24 -15
- data/metasm/dynldr.rb +23 -4
- data/metasm/encode.rb +11 -0
- data/metasm/exe_format/elf.rb +60 -2
- data/metasm/exe_format/elf_decode.rb +201 -6
- data/metasm/exe_format/shellcode.rb +39 -0
- data/metasm/gui/dasm_decomp.rb +1 -0
- data/metasm/os/emulator.rb +7 -0
- data/metasm/parse_c.rb +1 -1
- data/metasm.gemspec +1 -2
- data/metasm.rb +1 -1
- data/samples/disassemble-gui.rb +27 -11
- data/samples/disassemble.rb +9 -12
- data/samples/emudbg.rb +1 -1
- data/samples/factorize-headers-elfimports.rb +4 -1
- data/samples/lindebug.rb +16 -2
- data/tests/shellcode.rb +111 -0
- metadata +19 -102
- checksums.yaml.gz.sig +0 -3
- data.tar.gz.sig +0 -0
- metadata.gz.sig +0 -0
data/metasm/cpu/mips/decode.rb
CHANGED
data/metasm/cpu/ppc/decode.rb
CHANGED
data/metasm/cpu/sh4/decode.rb
CHANGED
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# This file is part of Metasm, the Ruby assembly manipulation suite
|
|
2
|
+
# Copyright (C) 2006-2009 Yoann GUILLOT
|
|
3
|
+
#
|
|
4
|
+
# Licence is LGPL, see LICENCE in the top-level directory
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
require 'metasm/cpu/x86_64/main'
|
|
8
|
+
|
|
9
|
+
module Metasm
|
|
10
|
+
class X86_64
|
|
11
|
+
def func_abi(dcmp)
|
|
12
|
+
# TODO check cpu.abi_funcall
|
|
13
|
+
@func_abi ||= nil
|
|
14
|
+
return @func_abi if @func_abi
|
|
15
|
+
|
|
16
|
+
if dcmp.dasm.program.shortname == 'coff'
|
|
17
|
+
@func_abi = { :args => [:rcx, :rdx, :r10, :r11], :stackoff => 32 } # TODO
|
|
18
|
+
else
|
|
19
|
+
@func_abi = { :args => [:rdi, :rsi, :rdx, :rcx], :stackoff => 0 } # XXX saved rip offset ?
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# return the array of arguments (symbols, indirections wrt frameptr) to be used as arguments for decompilation of the function call in di
|
|
24
|
+
def decompile_get_func_args(dcmp, func_entry, di, f)
|
|
25
|
+
abi_args = func_abi(dcmp)[:args].dup
|
|
26
|
+
stackoff = func_abi(dcmp)[:stackoff]
|
|
27
|
+
|
|
28
|
+
args = []
|
|
29
|
+
f.type.args.to_a.each { |a|
|
|
30
|
+
if r = a.has_attribute_var('register')
|
|
31
|
+
args << Expression[r.to_sym]
|
|
32
|
+
abi_args.delete r.to_sym
|
|
33
|
+
elsif o = a.has_attribute_var('stackoff')
|
|
34
|
+
args << Indirection[[:frameptr, :+, Integer(o)], 8]
|
|
35
|
+
elsif abi_args.empty?
|
|
36
|
+
args << Indirection[[:frameptr, :+, stackoff], 8]
|
|
37
|
+
stackoff += 8
|
|
38
|
+
else
|
|
39
|
+
args << Expression[abi_args.shift]
|
|
40
|
+
end
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
args
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def decompile_check_abi(dcmp, entry, func)
|
|
47
|
+
abi_regargs = func_abi(dcmp)[:args].map { |ra| ra.to_s }
|
|
48
|
+
a = func.type.args || []
|
|
49
|
+
|
|
50
|
+
# delete unused regs not part of the ABI
|
|
51
|
+
a.delete_if { |arg| arg.has_attribute('unused') and ra = arg.has_attribute_var('register') and not abi_regargs.index(ra) }
|
|
52
|
+
|
|
53
|
+
# delete last regs of the ABI if unused
|
|
54
|
+
abi_regargs.reverse.each { |ra|
|
|
55
|
+
break if a.find { |arg| arg.has_attribute_var('register') == ra and not arg.has_attribute('unused') }
|
|
56
|
+
a.delete_if { |arg| arg.has_attribute('unused') and arg.has_attribute_var('register') == ra }
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
# reorder ABI regs according to ABI
|
|
60
|
+
a.sort_by! { |arg| ra = arg.has_attribute_var('register') ; abi_regargs.index(ra) || (1000 + a.index(arg)) }
|
|
61
|
+
|
|
62
|
+
# TODO
|
|
63
|
+
#if not f = dcmp.dasm.function[entry] or not f.return_address
|
|
64
|
+
#func.add_attribute 'noreturn'
|
|
65
|
+
#end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
data/metasm/cpu/x86_64.rb
CHANGED
data/metasm/decode.rb
CHANGED
|
@@ -165,6 +165,20 @@ class EncodedData
|
|
|
165
165
|
Expression.decode_imm(read(isz/8), type, endianness)
|
|
166
166
|
end
|
|
167
167
|
alias decode_immediate decode_imm
|
|
168
|
+
|
|
169
|
+
LEB_MAX_BYTES=(128.0/7).ceil
|
|
170
|
+
# decode a length-encoded immediate
|
|
171
|
+
def decode_leb(signed=false, max_bytes=LEB_MAX_BYTES)
|
|
172
|
+
v = s = 0
|
|
173
|
+
while s < 7*max_bytes
|
|
174
|
+
b = get_byte
|
|
175
|
+
v |= (b & 0x7f) << s
|
|
176
|
+
s += 7
|
|
177
|
+
break if (b&0x80) == 0
|
|
178
|
+
end
|
|
179
|
+
v = Expression.make_signed(v, s) if signed
|
|
180
|
+
v
|
|
181
|
+
end
|
|
168
182
|
end
|
|
169
183
|
|
|
170
184
|
class Expression
|
data/metasm/decompile.rb
CHANGED
|
@@ -99,7 +99,7 @@ class Decompiler
|
|
|
99
99
|
|
|
100
100
|
# [esp+8] => [:frameptr-12]
|
|
101
101
|
# TODO slow
|
|
102
|
-
makestackvars
|
|
102
|
+
makestackvars(entry, myblocks.map { |b, to| @dasm.decoded[b].block })
|
|
103
103
|
|
|
104
104
|
# find registry dependencies between blocks
|
|
105
105
|
deps = @dasm.cpu.decompile_func_finddeps(self, myblocks, func)
|
|
@@ -107,6 +107,10 @@ class Decompiler
|
|
|
107
107
|
scope = func.initializer = C::Block.new(@c_parser.toplevel)
|
|
108
108
|
if df = @dasm.function[entry]
|
|
109
109
|
scope.decompdata = df.decompdata ||= {:unalias_type => {}, :unalias_name => {}}
|
|
110
|
+
if df.noreturn
|
|
111
|
+
func.add_attribute('noreturn')
|
|
112
|
+
func.type.type = C::BaseType.new(:void)
|
|
113
|
+
end
|
|
110
114
|
else
|
|
111
115
|
scope.decompdata ||= {:unalias_type => {}, :unalias_name => {}}
|
|
112
116
|
end
|
|
@@ -140,7 +144,11 @@ class Decompiler
|
|
|
140
144
|
@dasm.cpu.decompile_check_abi(self, entry, func)
|
|
141
145
|
|
|
142
146
|
case ret = scope.statements.last
|
|
143
|
-
when C::CExpression
|
|
147
|
+
when C::CExpression
|
|
148
|
+
if ret.op == :funcall and ret.lexpr.has_attribute('noreturn')
|
|
149
|
+
else
|
|
150
|
+
puts "no return at end of func" if $VERBOSE
|
|
151
|
+
end
|
|
144
152
|
when C::Return
|
|
145
153
|
if not ret.value
|
|
146
154
|
scope.statements.pop
|
|
@@ -368,6 +376,7 @@ class Decompiler
|
|
|
368
376
|
repl_bind = {} # di => bt_bd
|
|
369
377
|
|
|
370
378
|
@dasm.cpu.decompile_makestackvars(@dasm, funcstart, blocks) { |block|
|
|
379
|
+
blockstart = block.address
|
|
371
380
|
block.list.each { |di|
|
|
372
381
|
bd = di.backtrace_binding ||= @dasm.cpu.get_backtrace_binding(di)
|
|
373
382
|
newbd = repl_bind[di] = {}
|
|
@@ -423,18 +432,18 @@ class Decompiler
|
|
|
423
432
|
p = C::CExpression[[p], itype]
|
|
424
433
|
C::CExpression[:*, p]
|
|
425
434
|
when ::Integer
|
|
426
|
-
C::CExpression[e]
|
|
435
|
+
C::CExpression[e, C::BaseType.new("__int#{@dasm.cpu.size}".to_sym)]
|
|
427
436
|
when C::CExpression
|
|
428
437
|
e
|
|
429
438
|
else
|
|
430
439
|
name = e.to_s
|
|
431
440
|
if not s = scope.symbol_ancestors[name]
|
|
432
441
|
s = C::Variable.new
|
|
433
|
-
s.type = C::BaseType.new(
|
|
442
|
+
s.type = C::BaseType.new("__int#{@dasm.cpu.size}".to_sym)
|
|
434
443
|
case e
|
|
435
444
|
when ::String # edata relocation (rel.length = size of pointer)
|
|
436
|
-
return @c_parser.toplevel.symbol[e] || new_global_var(e, itype ||
|
|
437
|
-
when ::Symbol; s.
|
|
445
|
+
return @c_parser.toplevel.symbol[e] || new_global_var(e, itype || s.type, scope)
|
|
446
|
+
when ::Symbol; s.add_attribute("register(#{name})")
|
|
438
447
|
else s.type.qualifier = [:volatile]
|
|
439
448
|
puts "decompile_cexpr unhandled #{e.inspect}, using #{e.to_s.inspect}" if $VERBOSE
|
|
440
449
|
end
|
|
@@ -571,9 +580,9 @@ class Decompiler
|
|
|
571
580
|
e
|
|
572
581
|
when C::Goto
|
|
573
582
|
if e.target == brk
|
|
574
|
-
C::Break.new
|
|
583
|
+
C::Break.new.with_misc(e.misc)
|
|
575
584
|
elsif e.target == cnt
|
|
576
|
-
C::Continue.new
|
|
585
|
+
C::Continue.new.with_misc(e.misc)
|
|
577
586
|
else e
|
|
578
587
|
end
|
|
579
588
|
else e
|
|
@@ -591,7 +600,7 @@ class Decompiler
|
|
|
591
600
|
}
|
|
592
601
|
walk(i.bthen.statements) { |sst| sst.outer = i.bthen.outer if sst.kind_of?(C::Block) and sst.outer == i.bthen }
|
|
593
602
|
scope.statements.concat i.bthen.statements
|
|
594
|
-
i.bthen = C::Break.new
|
|
603
|
+
i.bthen = C::Break.new.with_misc(i.misc)
|
|
595
604
|
end
|
|
596
605
|
|
|
597
606
|
patch_test = lambda { |ce|
|
|
@@ -631,7 +640,7 @@ class Decompiler
|
|
|
631
640
|
ce.body = ce.body.statements.first
|
|
632
641
|
when 0
|
|
633
642
|
if ce.kind_of?(C::DoWhile) and i = ce.body.outer.statements.index(ce)
|
|
634
|
-
ce = ce.body.outer.statements[i] = C::While.new(ce.test, ce.body)
|
|
643
|
+
ce = ce.body.outer.statements[i] = C::While.new(ce.test, ce.body).with_misc(ce.misc)
|
|
635
644
|
end
|
|
636
645
|
ce.body = nil
|
|
637
646
|
end
|
|
@@ -645,13 +654,13 @@ class Decompiler
|
|
|
645
654
|
i = ce.body.statements.last
|
|
646
655
|
if i.kind_of?(C::If) and not i.belse and i.bthen.kind_of?(C::Break)
|
|
647
656
|
ce.body.statements.pop
|
|
648
|
-
next C::DoWhile.new(i.test.negate, ce.body)
|
|
657
|
+
next C::DoWhile.new(i.test.negate, ce.body).with_misc(ce.misc)
|
|
649
658
|
end
|
|
650
659
|
end
|
|
651
660
|
|
|
652
661
|
# if (a) b = 1; else b = 2; => b = a ? 1 : 2
|
|
653
662
|
if ce.kind_of?(C::If) and ce.belse.kind_of?(C::CExpression) and ce.belse.op == :'=' and ce.belse.lexpr.kind_of?(C::Variable) and ce.bthen.kind_of?(C::CExpression) and ce.bthen.op == :'=' and ce.bthen.lexpr == ce.belse.lexpr
|
|
654
|
-
next C::CExpression[ce.bthen.lexpr, :'=', [ce.test, :'?:', [ce.bthen.rexpr, ce.belse.rexpr]]]
|
|
663
|
+
next C::CExpression[ce.bthen.lexpr, :'=', [ce.test, :'?:', [ce.bthen.rexpr, ce.belse.rexpr]]].with_misc(ce.misc)
|
|
655
664
|
end
|
|
656
665
|
}
|
|
657
666
|
|
|
@@ -831,15 +840,15 @@ class Decompiler
|
|
|
831
840
|
ss.bthen.statements.pop
|
|
832
841
|
if l = ary[ssi+1] and l.kind_of?(C::Label)
|
|
833
842
|
ss.bthen.statements.grep(C::If).each { |it|
|
|
834
|
-
it.bthen = C::Break.new if it.bthen.kind_of?(C::Goto) and it.bthen.target == l.name
|
|
843
|
+
it.bthen = C::Break.new.with_misc(it.bthen.misc) if it.bthen.kind_of?(C::Goto) and it.bthen.target == l.name
|
|
835
844
|
}
|
|
836
845
|
end
|
|
837
|
-
ary[ssi] = C::While.new(ss.test, ss.bthen)
|
|
846
|
+
ary[ssi] = C::While.new(ss.test, ss.bthen).with_misc(ss.misc)
|
|
838
847
|
elsif ss.bthen.statements.last.kind_of?(C::Return) and gi = ((si+1)..ary.length).to_a.reverse.find { |_si| ary[_si].kind_of?(C::Goto) and ary[_si].target == s.name }
|
|
839
848
|
# l: if (a) { b; return; } c; goto l; => while (!a) { c; } b; return;
|
|
840
849
|
wb = C::Block.new(scope)
|
|
841
850
|
wb.statements = decompile_cseq_while(ary[ssi+1...gi], wb)
|
|
842
|
-
w = C::While.new(C::CExpression.negate(ss.test), wb)
|
|
851
|
+
w = C::While.new(C::CExpression.negate(ss.test), wb).with_misc(ss)
|
|
843
852
|
ary[ssi..gi] = [w, *ss.bthen.statements]
|
|
844
853
|
finished = false ; break #retry
|
|
845
854
|
end
|
|
@@ -848,7 +857,7 @@ class Decompiler
|
|
|
848
857
|
# l: a; goto l; => while(1) { a; }
|
|
849
858
|
wb = C::Block.new(scope)
|
|
850
859
|
wb.statements = decompile_cseq_while(ary[si...gi], wb)
|
|
851
|
-
w = C::While.new(C::CExpression[1], wb)
|
|
860
|
+
w = C::While.new(C::CExpression[1], wb).with_misc(ary[gi].misc)
|
|
852
861
|
ary[si..gi] = [w]
|
|
853
862
|
finished = false ; break #retry
|
|
854
863
|
end
|
|
@@ -861,10 +870,10 @@ class Decompiler
|
|
|
861
870
|
if g.bthen.kind_of?(C::Block) and g.bthen.statements.length > 1
|
|
862
871
|
nary = ary[si...gi] + [C::If.new(C::CExpression.negate(g.test), C::Break.new)] + g.bthen.statements[0...-1]
|
|
863
872
|
wb.statements = decompile_cseq_while(nary, wb)
|
|
864
|
-
w = C::DoWhile.new(C::CExpression[1], wb)
|
|
873
|
+
w = C::DoWhile.new(C::CExpression[1], wb).with_misc(g.misc)
|
|
865
874
|
else
|
|
866
875
|
wb.statements = decompile_cseq_while(ary[si...gi], wb)
|
|
867
|
-
w = C::DoWhile.new(g.test, wb)
|
|
876
|
+
w = C::DoWhile.new(g.test, wb).with_misc(g.misc)
|
|
868
877
|
end
|
|
869
878
|
ary[si..gi] = [w]
|
|
870
879
|
finished = false ; break #retry
|
|
@@ -1001,10 +1010,18 @@ class Decompiler
|
|
|
1001
1010
|
vars = scope.symbol.values.sort_by { |v| walk_ce(funcalls) { |ce| break true if ce.rexpr == v } ? 0 : 1 }
|
|
1002
1011
|
|
|
1003
1012
|
# find the domains of var aliases
|
|
1004
|
-
vars.each { |var|
|
|
1013
|
+
vars.each { |var|
|
|
1014
|
+
if unalias_var(var, scope, g)
|
|
1015
|
+
if not var.stackoff or var.stackoff > 0 # dont allow local vars as args
|
|
1016
|
+
func.type.args << var unless func.type.args.find { |aa| aa.name == var.name }
|
|
1017
|
+
scope.statements.delete_if { |sm| sm.kind_of?(C::Declaration) and sm.var.name == var.name }
|
|
1018
|
+
end
|
|
1019
|
+
end
|
|
1020
|
+
}
|
|
1005
1021
|
end
|
|
1006
1022
|
|
|
1007
1023
|
# duplicates a var per domain value
|
|
1024
|
+
# return var if used before being set (eg func arg)
|
|
1008
1025
|
def unalias_var(var, scope, g = c_to_graph(scope))
|
|
1009
1026
|
# [label, index] of references to var (reading it, writing it, ro/wo it (eg eax = *eax => eax_0 = *eax_1))
|
|
1010
1027
|
read = {}
|
|
@@ -1023,8 +1040,8 @@ class Decompiler
|
|
|
1023
1040
|
g_exprs.each { |label, exprs|
|
|
1024
1041
|
exprs.each_with_index { |ce, i|
|
|
1025
1042
|
if ce_read(ce, var)
|
|
1026
|
-
if (ce.op == :'=' and isvar(ce.lexpr, var) and not ce_write(ce.rexpr, var)) or
|
|
1027
|
-
(ce.op == :funcall and r and not ce_write(ce.lexpr, var) and not ce_write(ce.rexpr, var) and @dasm.cpu.abi_funcall[:changed].include?(r.to_sym))
|
|
1043
|
+
if (ce.kind_of?(C::CExpression) and ce.op == :'=' and isvar(ce.lexpr, var) and not ce_write(ce.rexpr, var)) or
|
|
1044
|
+
(ce.kind_of?(C::CExpression) and ce.op == :funcall and r and not ce_write(ce.lexpr, var) and not ce_write(ce.rexpr, var) and @dasm.cpu.abi_funcall[:changed].include?(r.to_sym))
|
|
1028
1045
|
(ro[label] ||= []) << i
|
|
1029
1046
|
(wo[label] ||= []) << i
|
|
1030
1047
|
unchecked << [label, i, :up] << [label, i, :down]
|
|
@@ -1109,6 +1126,8 @@ class Decompiler
|
|
|
1109
1126
|
end
|
|
1110
1127
|
}
|
|
1111
1128
|
|
|
1129
|
+
reach_func_top = false
|
|
1130
|
+
n_i = 0
|
|
1112
1131
|
# check it out
|
|
1113
1132
|
while o = unchecked.shift
|
|
1114
1133
|
dom = []
|
|
@@ -1147,15 +1166,17 @@ class Decompiler
|
|
|
1147
1166
|
|
|
1148
1167
|
unchecked -= dom + dom_wo + dom_ro
|
|
1149
1168
|
|
|
1150
|
-
|
|
1169
|
+
if func_top
|
|
1170
|
+
reach_func_top = true
|
|
1171
|
+
next
|
|
1172
|
+
end
|
|
1151
1173
|
|
|
1152
1174
|
# patch
|
|
1153
|
-
n_i = 0
|
|
1154
1175
|
n_i += 1 while scope.symbol_ancestors[newvarname = "#{var.name}_a#{n_i}"]
|
|
1155
1176
|
|
|
1156
1177
|
nv = var.dup
|
|
1157
1178
|
nv.misc = var.misc ? var.misc.dup : {}
|
|
1158
|
-
nv.storage = :register if nv.has_attribute_var('register')
|
|
1179
|
+
#nv.storage = :register if nv.has_attribute_var('register')
|
|
1159
1180
|
nv.attributes = nv.attributes.dup if nv.attributes
|
|
1160
1181
|
nv.name = newvarname
|
|
1161
1182
|
nv.misc[:unalias_name] = newvarname
|
|
@@ -1191,6 +1212,8 @@ class Decompiler
|
|
|
1191
1212
|
nv.add_attribute('out')
|
|
1192
1213
|
end
|
|
1193
1214
|
end
|
|
1215
|
+
|
|
1216
|
+
reach_func_top
|
|
1194
1217
|
end
|
|
1195
1218
|
|
|
1196
1219
|
# revert the unaliasing namechange of vars where no alias subsists
|
|
@@ -1433,7 +1456,7 @@ class Decompiler
|
|
|
1433
1456
|
f = f.pointed if f.pointer?
|
|
1434
1457
|
next if not f.kind_of?(C::Function)
|
|
1435
1458
|
# cast func args to arg prototypes
|
|
1436
|
-
f.args.to_a.zip(ce.rexpr).each_with_index { |(proto, arg), i| ce.rexpr[i] = C::CExpression[arg, proto.type] ; known_type[arg, proto.type] }
|
|
1459
|
+
f.args.to_a.zip(ce.rexpr).each_with_index { |(proto, arg), i| if arg ; ce.rexpr[i] = C::CExpression[arg, proto.type] ; known_type[arg, proto.type] ; end }
|
|
1437
1460
|
elsif ce.op == :* and not ce.lexpr
|
|
1438
1461
|
if e = ce.rexpr and e.kind_of?(C::CExpression) and not e.op and e = e.rexpr and e.kind_of?(C::CExpression) and
|
|
1439
1462
|
e.op == :& and not e.lexpr and e.rexpr.kind_of?(C::Variable) and e.rexpr.stackoff
|
|
@@ -1582,7 +1605,7 @@ class Decompiler
|
|
|
1582
1605
|
walk_ce(scope) { |ce|
|
|
1583
1606
|
count_refs[ce.lexpr.name] += 1 if ce.lexpr.kind_of?(C::Variable)
|
|
1584
1607
|
count_refs[ce.rexpr.name] += 1 if ce.rexpr.kind_of?(C::Variable)
|
|
1585
|
-
if is_cast[ce] and ce.rexpr.rexpr.kind_of?(C::Variable)
|
|
1608
|
+
if is_cast[ce] and ce.type.pointer? and ce.rexpr.rexpr.kind_of?(C::Variable)
|
|
1586
1609
|
(uses[ce.rexpr.rexpr.name] ||= []) << ce.type.pointed
|
|
1587
1610
|
end
|
|
1588
1611
|
}
|
|
@@ -2039,6 +2062,7 @@ class Decompiler
|
|
|
2039
2062
|
when C::CExpression
|
|
2040
2063
|
@exprs[l_cur] = [stmt]
|
|
2041
2064
|
@to[l_cur] = [l_after]
|
|
2065
|
+
@to[l_cur] = [] if stmt.op == :funcall and stmt.lexpr.has_attribute('noreturn')
|
|
2042
2066
|
when C::Return
|
|
2043
2067
|
@exprs[l_cur] = [stmt.value] if stmt.value
|
|
2044
2068
|
@to[l_cur] = []
|
|
@@ -2884,7 +2908,7 @@ class Decompiler
|
|
|
2884
2908
|
rename = lambda { |var, name|
|
|
2885
2909
|
var = var.rexpr if var.kind_of?(C::CExpression) and not var.op
|
|
2886
2910
|
next if not var.kind_of?(C::Variable) or not scope.symbol[var.name] or not name
|
|
2887
|
-
next if (var.name !~ /^(var|arg)_/ and not var.
|
|
2911
|
+
next if (var.name !~ /^(var|arg)_/ and not var.has_attribute_var('register')) or not scope.symbol[var.name] or name =~ /^(var|arg)_/
|
|
2888
2912
|
s = scope.symbol_ancestors
|
|
2889
2913
|
n = name
|
|
2890
2914
|
i = 0
|
data/metasm/disassemble.rb
CHANGED
|
@@ -1220,6 +1220,7 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
|
|
|
1220
1220
|
# it is if all its end blocks are calls to noreturn functions
|
|
1221
1221
|
# if it is, create a @function[fa] with noreturn = true
|
|
1222
1222
|
# should only be called with fa = target of a call
|
|
1223
|
+
# populates function[fa].return_address
|
|
1223
1224
|
def check_noreturn_function(fa)
|
|
1224
1225
|
fb = function_blocks(fa, false, false)
|
|
1225
1226
|
return if fb.empty?
|
|
@@ -1234,6 +1235,14 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
|
|
|
1234
1235
|
# yay
|
|
1235
1236
|
@function[fa] ||= DecodedFunction.new
|
|
1236
1237
|
@function[fa].noreturn = true
|
|
1238
|
+
elsif @function[fa]
|
|
1239
|
+
lasts.each { |la|
|
|
1240
|
+
di = block_at(la).list.last
|
|
1241
|
+
if di.opcode.props[:stopexec] and di.opcode.props[:setip]
|
|
1242
|
+
(@function[fa].return_address ||= []) << di.address
|
|
1243
|
+
end
|
|
1244
|
+
}
|
|
1245
|
+
false
|
|
1237
1246
|
end
|
|
1238
1247
|
end
|
|
1239
1248
|
|
|
@@ -1520,7 +1529,7 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
|
|
|
1520
1529
|
# :log => Array, will be updated with the backtrace evolution
|
|
1521
1530
|
# :only_upto => backtrace only to update bt_for for current block & previous ending at only_upto
|
|
1522
1531
|
# :no_check => don't use backtrace_check_found (will not backtrace indirection static values)
|
|
1523
|
-
# :terminals => array of symbols with constant value (stop backtracking if all symbols in the expr are terminals)
|
|
1532
|
+
# :terminals => array of symbols with constant value (stop backtracking if all symbols in the expr are terminals)
|
|
1524
1533
|
# :cpu_context => disassembler cpu_context
|
|
1525
1534
|
def backtrace(expr, start_addr, nargs={})
|
|
1526
1535
|
include_start = nargs.delete :include_start
|
|
@@ -1559,7 +1568,7 @@ puts " not backtracking stack address #{expr}" if debug_backtrace
|
|
|
1559
1568
|
end
|
|
1560
1569
|
|
|
1561
1570
|
if vals = (no_check ? (!need_backtrace(expr, terminals) and [expr]) : backtrace_check_found(expr,
|
|
1562
|
-
di, origin, type, len, maxdepth, detached, cpu_context, snapshot_addr))
|
|
1571
|
+
di, origin, type, len, maxdepth, detached, cpu_context, snapshot_addr, terminals))
|
|
1563
1572
|
# no need to update backtracked_for
|
|
1564
1573
|
return vals
|
|
1565
1574
|
elsif maxdepth <= 0
|
|
@@ -1599,7 +1608,7 @@ puts " backtrace up #{Expression[h[:addr]]} #{oldexpr}#{" => #{expr}" if expr
|
|
|
1599
1608
|
if expr != oldexpr and not snapshot_addr and vals = (no_check ?
|
|
1600
1609
|
(!need_backtrace(expr, terminals) and [expr]) :
|
|
1601
1610
|
backtrace_check_found(expr, nil, origin, type, len,
|
|
1602
|
-
maxdepth-h[:loopdetect].length, detached, cpu_context, snapshot_addr))
|
|
1611
|
+
maxdepth-h[:loopdetect].length, detached, cpu_context, snapshot_addr, terminals))
|
|
1603
1612
|
result |= vals
|
|
1604
1613
|
next
|
|
1605
1614
|
end
|
|
@@ -1641,7 +1650,7 @@ puts " backtrace up #{Expression[h[:from]]}->#{Expression[h[:to]]} #{oldexpr}#
|
|
|
1641
1650
|
|
|
1642
1651
|
if expr != oldexpr and vals = (no_check ? (!need_backtrace(expr, terminals) and [expr]) :
|
|
1643
1652
|
backtrace_check_found(expr, @decoded[h[:from]], origin, type, len,
|
|
1644
|
-
maxdepth-h[:loopdetect].length, detached, cpu_context, snapshot_addr))
|
|
1653
|
+
maxdepth-h[:loopdetect].length, detached, cpu_context, snapshot_addr, terminals))
|
|
1645
1654
|
if snapshot_addr
|
|
1646
1655
|
expr = StoppedExpr.new vals
|
|
1647
1656
|
next expr
|
|
@@ -1710,7 +1719,7 @@ puts " backtrace: recursive function #{Expression[h[:funcaddr]]}" if debug_back
|
|
|
1710
1719
|
end
|
|
1711
1720
|
puts " backtrace #{h[:di] || Expression[h[:funcaddr]]} #{oldexpr} => #{expr}" if debug_backtrace and expr != oldexpr
|
|
1712
1721
|
if vals = (no_check ? (!need_backtrace(expr, terminals) and [expr]) : backtrace_check_found(expr,
|
|
1713
|
-
h[:di], origin, type, len, maxdepth-h[:loopdetect].length, detached, cpu_context, snapshot_addr))
|
|
1722
|
+
h[:di], origin, type, len, maxdepth-h[:loopdetect].length, detached, cpu_context, snapshot_addr, terminals))
|
|
1714
1723
|
if snapshot_addr
|
|
1715
1724
|
expr = StoppedExpr.new vals
|
|
1716
1725
|
else
|
|
@@ -1827,7 +1836,7 @@ puts " backtrace addrs_todo << #{Expression[retaddr]} from #{di} (funcret)" if
|
|
|
1827
1836
|
# TODO trace expr evolution through backtrace, to modify immediates to an expr involving label names
|
|
1828
1837
|
# TODO mov [ptr], imm ; <...> ; jmp [ptr] => rename imm as loc_XX
|
|
1829
1838
|
# eg. mov eax, 42 ; add eax, 4 ; jmp eax => mov eax, some_label-4
|
|
1830
|
-
def backtrace_check_found(expr, di, origin, type, len, maxdepth, detached, cpu_context, snapshot_addr=nil)
|
|
1839
|
+
def backtrace_check_found(expr, di, origin, type, len, maxdepth, detached, cpu_context, snapshot_addr=nil, terminals=[])
|
|
1831
1840
|
# only entrypoints or block starts called by a :saveip are checked for being a function
|
|
1832
1841
|
# want to execute [esp] from a block start
|
|
1833
1842
|
if type == :x and di and di == di.block.list.first and @cpu.backtrace_is_function_return(expr, @decoded[origin]) and (
|
|
@@ -1856,13 +1865,13 @@ puts " backtrace addrs_todo << #{Expression[retaddr]} from #{di} (funcret)" if
|
|
|
1856
1865
|
f.backtracked_for |= @decoded[addr].block.backtracked_for.find_all { |btt| not btt.address }
|
|
1857
1866
|
end
|
|
1858
1867
|
|
|
1859
|
-
return if need_backtrace(expr)
|
|
1868
|
+
return if need_backtrace(expr, terminals)
|
|
1860
1869
|
if snapshot_addr
|
|
1861
1870
|
return if expr.expr_externals(true).find { |ee| ee.kind_of?(Indirection) }
|
|
1862
1871
|
end
|
|
1863
1872
|
|
|
1864
1873
|
puts "backtrace #{type} found #{expr} from #{di} orig #{@decoded[origin] || Expression[origin] if origin}" if debug_backtrace
|
|
1865
|
-
result = backtrace_value(expr, maxdepth)
|
|
1874
|
+
result = backtrace_value(expr, maxdepth, terminals)
|
|
1866
1875
|
# keep the ori pointer in the results to emulate volatile memory (eg decompiler prefers this)
|
|
1867
1876
|
#result << expr if not type # XXX returning multiple values for nothing is too confusing, TODO fix decompiler
|
|
1868
1877
|
result.uniq!
|
|
@@ -1876,14 +1885,14 @@ puts "backtrace #{type} found #{expr} from #{di} orig #{@decoded[origin] || Expr
|
|
|
1876
1885
|
end
|
|
1877
1886
|
|
|
1878
1887
|
# returns an array of expressions with Indirections resolved (recursive with backtrace_indirection)
|
|
1879
|
-
def backtrace_value(expr, maxdepth)
|
|
1888
|
+
def backtrace_value(expr, maxdepth, terminals=[])
|
|
1880
1889
|
# array of expression with all indirections resolved
|
|
1881
1890
|
result = [Expression[expr.reduce]]
|
|
1882
1891
|
|
|
1883
1892
|
# solve each indirection sequentially, clone expr for each value (aka cross-product)
|
|
1884
1893
|
result.first.expr_indirections.uniq.each { |i|
|
|
1885
1894
|
next_result = []
|
|
1886
|
-
backtrace_indirection(i, maxdepth).each { |rr|
|
|
1895
|
+
backtrace_indirection(i, maxdepth, terminals).each { |rr|
|
|
1887
1896
|
next_result |= result.map { |e| Expression[e.bind(i => rr).reduce] }
|
|
1888
1897
|
}
|
|
1889
1898
|
result = next_result
|
|
@@ -1897,7 +1906,7 @@ puts "backtrace #{type} found #{expr} from #{di} orig #{@decoded[origin] || Expr
|
|
|
1897
1906
|
# then backtraces from ind.origin until it finds an :w xref origin
|
|
1898
1907
|
# if no :w access is found, returns the value encoded in the raw section data
|
|
1899
1908
|
# TODO handle unaligned (partial?) writes
|
|
1900
|
-
def backtrace_indirection(ind, maxdepth)
|
|
1909
|
+
def backtrace_indirection(ind, maxdepth, terminals=[])
|
|
1901
1910
|
if not ind.origin
|
|
1902
1911
|
puts "backtrace_ind: no origin for #{ind}" if $VERBOSE
|
|
1903
1912
|
return [ind]
|
|
@@ -1915,7 +1924,7 @@ puts "backtrace #{type} found #{expr} from #{di} orig #{@decoded[origin] || Expr
|
|
|
1915
1924
|
}
|
|
1916
1925
|
|
|
1917
1926
|
# resolve pointers (they may include Indirections)
|
|
1918
|
-
backtrace_value(ind.target, maxdepth).each { |ptr|
|
|
1927
|
+
backtrace_value(ind.target, maxdepth, terminals).each { |ptr|
|
|
1919
1928
|
# find write xrefs to the ptr
|
|
1920
1929
|
refs = []
|
|
1921
1930
|
each_xref(ptr, :w) { |x|
|
|
@@ -1945,7 +1954,7 @@ puts "backtrace #{type} found #{expr} from #{di} orig #{@decoded[origin] || Expr
|
|
|
1945
1954
|
puts " backtrace_indirection for #{ind.target} failed: #{ev}" if debug_backtrace
|
|
1946
1955
|
ret |= [Expression::Unknown]
|
|
1947
1956
|
when :end
|
|
1948
|
-
if not refs.empty? and (expr == true or not need_backtrace(expr))
|
|
1957
|
+
if not refs.empty? and (expr == true or not need_backtrace(expr, terminals))
|
|
1949
1958
|
if expr == true
|
|
1950
1959
|
# found a path avoiding the :w xrefs, read the encoded initial value
|
|
1951
1960
|
ret |= [decode_imm[ptr, ind.len]]
|
|
@@ -1975,7 +1984,7 @@ puts " backtrace_indirection for #{ind.target} failed: #{ev}" if debug_backtra
|
|
|
1975
1984
|
# may have new indirections... recall bt_value ?
|
|
1976
1985
|
#if not need_backtrace(expr)
|
|
1977
1986
|
if expr.expr_externals.all? { |e| @prog_binding[e] or @function[normalize(e)] } and expr.expr_indirections.empty?
|
|
1978
|
-
ret |= backtrace_value(expr, maxdepth-1-h[:loopdetect].length)
|
|
1987
|
+
ret |= backtrace_value(expr, maxdepth-1-h[:loopdetect].length, terminals)
|
|
1979
1988
|
false
|
|
1980
1989
|
else
|
|
1981
1990
|
expr
|
|
@@ -1985,7 +1994,7 @@ puts " backtrace_indirection for #{ind.target} failed: #{ev}" if debug_backtra
|
|
|
1985
1994
|
expr = backtrace_emu_subfunc(h[:func], h[:funcaddr], h[:addr], expr, ind.origin, maxdepth-h[:loopdetect].length)
|
|
1986
1995
|
#if not need_backtrace(expr)
|
|
1987
1996
|
if expr.expr_externals.all? { |e| @prog_binding[e] or @function[normalize(e)] } and expr.expr_indirections.empty?
|
|
1988
|
-
ret |= backtrace_value(expr, maxdepth-1-h[:loopdetect].length)
|
|
1997
|
+
ret |= backtrace_value(expr, maxdepth-1-h[:loopdetect].length, terminals)
|
|
1989
1998
|
false
|
|
1990
1999
|
else
|
|
1991
2000
|
expr
|
data/metasm/dynldr.rb
CHANGED
|
@@ -57,6 +57,12 @@ extern VALUE *rb_eArgError __attribute__((import));
|
|
|
57
57
|
#define DYNLDR_RUBY_19 #{RUBY_VERSION >= '1.9' ? 1 : 0}
|
|
58
58
|
#endif
|
|
59
59
|
|
|
60
|
+
// Ruby 3.2+ changed the RString/RArray struct layout (Variable Width Allocation).
|
|
61
|
+
// len is now a direct field, and embedded string data starts after len (at the ptr offset).
|
|
62
|
+
#ifndef DYNLDR_RUBY_32
|
|
63
|
+
#define DYNLDR_RUBY_32 #{RUBY_VERSION >= '3.2' ? 1 : 0}
|
|
64
|
+
#endif
|
|
65
|
+
|
|
60
66
|
#if #{RUBY_VERSION >= '2.0' ? 1 : 0}
|
|
61
67
|
// flonums. WHY?
|
|
62
68
|
// also breaks Qtrue/Qnil
|
|
@@ -69,11 +75,24 @@ extern VALUE *rb_eArgError __attribute__((import));
|
|
|
69
75
|
#define T_FIXNUM 0x15
|
|
70
76
|
#define T_MASK 0x1f
|
|
71
77
|
#define RSTRING_NOEMBED (1<<13)
|
|
72
|
-
#
|
|
73
|
-
|
|
78
|
+
#if DYNLDR_RUBY_32
|
|
79
|
+
// Ruby 3.2+: len is always a direct field; embedded string data starts at the ptr field offset
|
|
80
|
+
#define STR_PTR(o) ((RString(o)->flags & RSTRING_NOEMBED) ? RString(o)->ptr : (char*)&RString(o)->ptr)
|
|
81
|
+
#define STR_LEN(o) (RString(o)->len)
|
|
82
|
+
#else
|
|
83
|
+
// Ruby 1.9 - 3.1: embedded string data starts at the len field offset, length encoded in flags
|
|
84
|
+
#define STR_PTR(o) ((RString(o)->flags & RSTRING_NOEMBED) ? RString(o)->ptr : (char*)&RString(o)->len)
|
|
85
|
+
#define STR_LEN(o) ((RString(o)->flags & RSTRING_NOEMBED) ? RString(o)->len : (RString(o)->flags >> 14) & 0x1f)
|
|
86
|
+
#endif
|
|
74
87
|
#define RARRAY_EMBED (1<<13)
|
|
75
|
-
#
|
|
76
|
-
|
|
88
|
+
#if DYNLDR_RUBY_32
|
|
89
|
+
// Ruby 3.2+: embedded array data starts at the len field offset (unchanged), but length uses more bits
|
|
90
|
+
#define ARY_PTR(o) ((RArray(o)->flags & RARRAY_EMBED) ? (VALUE*)&RArray(o)->len : RArray(o)->ptr)
|
|
91
|
+
#define ARY_LEN(o) ((RArray(o)->flags & RARRAY_EMBED) ? ((RArray(o)->flags >> 15) & 0x1f) : RArray(o)->len)
|
|
92
|
+
#else
|
|
93
|
+
#define ARY_PTR(o) ((RArray(o)->flags & RARRAY_EMBED) ? (VALUE*)&RArray(o)->len : RArray(o)->ptr)
|
|
94
|
+
#define ARY_LEN(o) ((RArray(o)->flags & RARRAY_EMBED) ? ((RArray(o)->flags >> 15) & 3) : RArray(o)->len)
|
|
95
|
+
#endif
|
|
77
96
|
#else
|
|
78
97
|
#define T_STRING 0x07
|
|
79
98
|
#define T_ARRAY 0x09
|
data/metasm/encode.rb
CHANGED
|
@@ -290,6 +290,17 @@ class Expression
|
|
|
290
290
|
end
|
|
291
291
|
end
|
|
292
292
|
|
|
293
|
+
def encode_leb(signed=false)
|
|
294
|
+
v = reduce
|
|
295
|
+
raise "need numeric value for #{self}" if not v.kind_of?(::Integer)
|
|
296
|
+
out = EncodedData.new
|
|
297
|
+
while v > 0x7f or v < -0x40 or (signed and v > 0x3f)
|
|
298
|
+
out << [0x80 | (v&0x7f)].pack('C*')
|
|
299
|
+
v >>= 7
|
|
300
|
+
end
|
|
301
|
+
out << [v & 0x7f].pack('C*')
|
|
302
|
+
end
|
|
303
|
+
|
|
293
304
|
class << self
|
|
294
305
|
def encode_imm(val, type, endianness, backtrace=nil)
|
|
295
306
|
type = INT_SIZE.keys.find { |k| k.to_s[0] == ?a and INT_SIZE[k] == 8*type } if type.kind_of? ::Integer
|