metasm 1.0.4 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +5 -5
  2. data/.gitignore +1 -0
  3. data/Gemfile +3 -3
  4. data/Rakefile +1 -1
  5. data/cortex.yaml +17 -0
  6. data/metasm/cpu/arm64/decode.rb +87 -11
  7. data/metasm/cpu/arm64/decompile.rb +142 -0
  8. data/metasm/cpu/arm64/opcodes.rb +53 -23
  9. data/metasm/cpu/arm64.rb +1 -0
  10. data/metasm/cpu/dwarf/debug.rb +39 -0
  11. data/metasm/cpu/dwarf/decode.rb +124 -0
  12. data/metasm/cpu/dwarf/decompile.rb +212 -0
  13. data/metasm/cpu/dwarf/encode.rb +49 -0
  14. data/metasm/cpu/dwarf/main.rb +37 -0
  15. data/metasm/cpu/dwarf/opcodes.rb +107 -0
  16. data/metasm/cpu/dwarf.rb +11 -0
  17. data/metasm/cpu/ia32/debug.rb +8 -0
  18. data/metasm/cpu/ia32/decode.rb +25 -1
  19. data/metasm/cpu/ia32/decompile.rb +205 -342
  20. data/metasm/cpu/mips/decode.rb +1 -1
  21. data/metasm/cpu/ppc/decode.rb +1 -1
  22. data/metasm/cpu/sh4/decode.rb +1 -1
  23. data/metasm/cpu/x86_64/decompile.rb +68 -0
  24. data/metasm/cpu/x86_64.rb +1 -0
  25. data/metasm/decode.rb +14 -0
  26. data/metasm/decompile.rb +51 -27
  27. data/metasm/disassemble.rb +24 -15
  28. data/metasm/dynldr.rb +23 -4
  29. data/metasm/encode.rb +11 -0
  30. data/metasm/exe_format/elf.rb +60 -2
  31. data/metasm/exe_format/elf_decode.rb +201 -6
  32. data/metasm/exe_format/shellcode.rb +39 -0
  33. data/metasm/gui/dasm_decomp.rb +1 -0
  34. data/metasm/os/emulator.rb +7 -0
  35. data/metasm/parse_c.rb +1 -1
  36. data/metasm.gemspec +1 -2
  37. data/metasm.rb +1 -1
  38. data/samples/disassemble-gui.rb +27 -11
  39. data/samples/disassemble.rb +9 -12
  40. data/samples/emudbg.rb +1 -1
  41. data/samples/factorize-headers-elfimports.rb +4 -1
  42. data/samples/lindebug.rb +16 -2
  43. data/tests/shellcode.rb +111 -0
  44. metadata +19 -102
  45. checksums.yaml.gz.sig +0 -3
  46. data.tar.gz.sig +0 -0
  47. metadata.gz.sig +0 -0
@@ -221,7 +221,7 @@ class MIPS
221
221
  end
222
222
 
223
223
  def backtrace_is_function_return(expr, di=nil)
224
- expr.reduce_rec == :$ra
224
+ Expression[expr].reduce_rec == :$ra
225
225
  end
226
226
 
227
227
  def backtrace_is_stack_address(expr)
@@ -142,7 +142,7 @@ class PowerPC
142
142
  end
143
143
 
144
144
  def backtrace_is_function_return(expr, di=nil)
145
- expr.reduce_rec == :lr
145
+ Expression[expr].reduce_rec == :lr
146
146
  end
147
147
 
148
148
  def backtrace_is_stack_address(expr)
@@ -341,7 +341,7 @@ class Sh4
341
341
  end
342
342
 
343
343
  def backtrace_is_function_return(expr, di=nil)
344
- expr.reduce_rec == :pr
344
+ Expression[expr].reduce_rec == :pr
345
345
  end
346
346
 
347
347
  def delay_slot(di=nil)
@@ -0,0 +1,68 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/cpu/x86_64/main'
8
+
9
+ module Metasm
10
+ class X86_64
11
+ def func_abi(dcmp)
12
+ # TODO check cpu.abi_funcall
13
+ @func_abi ||= nil
14
+ return @func_abi if @func_abi
15
+
16
+ if dcmp.dasm.program.shortname == 'coff'
17
+ @func_abi = { :args => [:rcx, :rdx, :r10, :r11], :stackoff => 32 } # TODO
18
+ else
19
+ @func_abi = { :args => [:rdi, :rsi, :rdx, :rcx], :stackoff => 0 } # XXX saved rip offset ?
20
+ end
21
+ end
22
+
23
+ # return the array of arguments (symbols, indirections wrt frameptr) to be used as arguments for decompilation of the function call in di
24
+ def decompile_get_func_args(dcmp, func_entry, di, f)
25
+ abi_args = func_abi(dcmp)[:args].dup
26
+ stackoff = func_abi(dcmp)[:stackoff]
27
+
28
+ args = []
29
+ f.type.args.to_a.each { |a|
30
+ if r = a.has_attribute_var('register')
31
+ args << Expression[r.to_sym]
32
+ abi_args.delete r.to_sym
33
+ elsif o = a.has_attribute_var('stackoff')
34
+ args << Indirection[[:frameptr, :+, Integer(o)], 8]
35
+ elsif abi_args.empty?
36
+ args << Indirection[[:frameptr, :+, stackoff], 8]
37
+ stackoff += 8
38
+ else
39
+ args << Expression[abi_args.shift]
40
+ end
41
+ }
42
+
43
+ args
44
+ end
45
+
46
+ def decompile_check_abi(dcmp, entry, func)
47
+ abi_regargs = func_abi(dcmp)[:args].map { |ra| ra.to_s }
48
+ a = func.type.args || []
49
+
50
+ # delete unused regs not part of the ABI
51
+ a.delete_if { |arg| arg.has_attribute('unused') and ra = arg.has_attribute_var('register') and not abi_regargs.index(ra) }
52
+
53
+ # delete last regs of the ABI if unused
54
+ abi_regargs.reverse.each { |ra|
55
+ break if a.find { |arg| arg.has_attribute_var('register') == ra and not arg.has_attribute('unused') }
56
+ a.delete_if { |arg| arg.has_attribute('unused') and arg.has_attribute_var('register') == ra }
57
+ }
58
+
59
+ # reorder ABI regs according to ABI
60
+ a.sort_by! { |arg| ra = arg.has_attribute_var('register') ; abi_regargs.index(ra) || (1000 + a.index(arg)) }
61
+
62
+ # TODO
63
+ #if not f = dcmp.dasm.function[entry] or not f.return_address
64
+ #func.add_attribute 'noreturn'
65
+ #end
66
+ end
67
+ end
68
+ end
data/metasm/cpu/x86_64.rb CHANGED
@@ -13,3 +13,4 @@ require 'metasm/cpu/x86_64/decode'
13
13
  require 'metasm/cpu/x86_64/render'
14
14
  require 'metasm/cpu/x86_64/debug'
15
15
  require 'metasm/cpu/x86_64/compile_c'
16
+ require 'metasm/cpu/x86_64/decompile'
data/metasm/decode.rb CHANGED
@@ -165,6 +165,20 @@ class EncodedData
165
165
  Expression.decode_imm(read(isz/8), type, endianness)
166
166
  end
167
167
  alias decode_immediate decode_imm
168
+
169
+ LEB_MAX_BYTES=(128.0/7).ceil
170
+ # decode a length-encoded immediate
171
+ def decode_leb(signed=false, max_bytes=LEB_MAX_BYTES)
172
+ v = s = 0
173
+ while s < 7*max_bytes
174
+ b = get_byte
175
+ v |= (b & 0x7f) << s
176
+ s += 7
177
+ break if (b&0x80) == 0
178
+ end
179
+ v = Expression.make_signed(v, s) if signed
180
+ v
181
+ end
168
182
  end
169
183
 
170
184
  class Expression
data/metasm/decompile.rb CHANGED
@@ -99,7 +99,7 @@ class Decompiler
99
99
 
100
100
  # [esp+8] => [:frameptr-12]
101
101
  # TODO slow
102
- makestackvars entry, myblocks.map { |b, to| @dasm.decoded[b].block }
102
+ makestackvars(entry, myblocks.map { |b, to| @dasm.decoded[b].block })
103
103
 
104
104
  # find registry dependencies between blocks
105
105
  deps = @dasm.cpu.decompile_func_finddeps(self, myblocks, func)
@@ -107,6 +107,10 @@ class Decompiler
107
107
  scope = func.initializer = C::Block.new(@c_parser.toplevel)
108
108
  if df = @dasm.function[entry]
109
109
  scope.decompdata = df.decompdata ||= {:unalias_type => {}, :unalias_name => {}}
110
+ if df.noreturn
111
+ func.add_attribute('noreturn')
112
+ func.type.type = C::BaseType.new(:void)
113
+ end
110
114
  else
111
115
  scope.decompdata ||= {:unalias_type => {}, :unalias_name => {}}
112
116
  end
@@ -140,7 +144,11 @@ class Decompiler
140
144
  @dasm.cpu.decompile_check_abi(self, entry, func)
141
145
 
142
146
  case ret = scope.statements.last
143
- when C::CExpression; puts "no return at end of func" if $VERBOSE
147
+ when C::CExpression
148
+ if ret.op == :funcall and ret.lexpr.has_attribute('noreturn')
149
+ else
150
+ puts "no return at end of func" if $VERBOSE
151
+ end
144
152
  when C::Return
145
153
  if not ret.value
146
154
  scope.statements.pop
@@ -368,6 +376,7 @@ class Decompiler
368
376
  repl_bind = {} # di => bt_bd
369
377
 
370
378
  @dasm.cpu.decompile_makestackvars(@dasm, funcstart, blocks) { |block|
379
+ blockstart = block.address
371
380
  block.list.each { |di|
372
381
  bd = di.backtrace_binding ||= @dasm.cpu.get_backtrace_binding(di)
373
382
  newbd = repl_bind[di] = {}
@@ -423,18 +432,18 @@ class Decompiler
423
432
  p = C::CExpression[[p], itype]
424
433
  C::CExpression[:*, p]
425
434
  when ::Integer
426
- C::CExpression[e]
435
+ C::CExpression[e, C::BaseType.new("__int#{@dasm.cpu.size}".to_sym)]
427
436
  when C::CExpression
428
437
  e
429
438
  else
430
439
  name = e.to_s
431
440
  if not s = scope.symbol_ancestors[name]
432
441
  s = C::Variable.new
433
- s.type = C::BaseType.new(:__int32)
442
+ s.type = C::BaseType.new("__int#{@dasm.cpu.size}".to_sym)
434
443
  case e
435
444
  when ::String # edata relocation (rel.length = size of pointer)
436
- return @c_parser.toplevel.symbol[e] || new_global_var(e, itype || C::BaseType.new(:int), scope)
437
- when ::Symbol; s.storage = :register ; s.add_attribute("register(#{name})")
445
+ return @c_parser.toplevel.symbol[e] || new_global_var(e, itype || s.type, scope)
446
+ when ::Symbol; s.add_attribute("register(#{name})")
438
447
  else s.type.qualifier = [:volatile]
439
448
  puts "decompile_cexpr unhandled #{e.inspect}, using #{e.to_s.inspect}" if $VERBOSE
440
449
  end
@@ -571,9 +580,9 @@ class Decompiler
571
580
  e
572
581
  when C::Goto
573
582
  if e.target == brk
574
- C::Break.new
583
+ C::Break.new.with_misc(e.misc)
575
584
  elsif e.target == cnt
576
- C::Continue.new
585
+ C::Continue.new.with_misc(e.misc)
577
586
  else e
578
587
  end
579
588
  else e
@@ -591,7 +600,7 @@ class Decompiler
591
600
  }
592
601
  walk(i.bthen.statements) { |sst| sst.outer = i.bthen.outer if sst.kind_of?(C::Block) and sst.outer == i.bthen }
593
602
  scope.statements.concat i.bthen.statements
594
- i.bthen = C::Break.new
603
+ i.bthen = C::Break.new.with_misc(i.misc)
595
604
  end
596
605
 
597
606
  patch_test = lambda { |ce|
@@ -631,7 +640,7 @@ class Decompiler
631
640
  ce.body = ce.body.statements.first
632
641
  when 0
633
642
  if ce.kind_of?(C::DoWhile) and i = ce.body.outer.statements.index(ce)
634
- ce = ce.body.outer.statements[i] = C::While.new(ce.test, ce.body)
643
+ ce = ce.body.outer.statements[i] = C::While.new(ce.test, ce.body).with_misc(ce.misc)
635
644
  end
636
645
  ce.body = nil
637
646
  end
@@ -645,13 +654,13 @@ class Decompiler
645
654
  i = ce.body.statements.last
646
655
  if i.kind_of?(C::If) and not i.belse and i.bthen.kind_of?(C::Break)
647
656
  ce.body.statements.pop
648
- next C::DoWhile.new(i.test.negate, ce.body)
657
+ next C::DoWhile.new(i.test.negate, ce.body).with_misc(ce.misc)
649
658
  end
650
659
  end
651
660
 
652
661
  # if (a) b = 1; else b = 2; => b = a ? 1 : 2
653
662
  if ce.kind_of?(C::If) and ce.belse.kind_of?(C::CExpression) and ce.belse.op == :'=' and ce.belse.lexpr.kind_of?(C::Variable) and ce.bthen.kind_of?(C::CExpression) and ce.bthen.op == :'=' and ce.bthen.lexpr == ce.belse.lexpr
654
- next C::CExpression[ce.bthen.lexpr, :'=', [ce.test, :'?:', [ce.bthen.rexpr, ce.belse.rexpr]]]
663
+ next C::CExpression[ce.bthen.lexpr, :'=', [ce.test, :'?:', [ce.bthen.rexpr, ce.belse.rexpr]]].with_misc(ce.misc)
655
664
  end
656
665
  }
657
666
 
@@ -831,15 +840,15 @@ class Decompiler
831
840
  ss.bthen.statements.pop
832
841
  if l = ary[ssi+1] and l.kind_of?(C::Label)
833
842
  ss.bthen.statements.grep(C::If).each { |it|
834
- it.bthen = C::Break.new if it.bthen.kind_of?(C::Goto) and it.bthen.target == l.name
843
+ it.bthen = C::Break.new.with_misc(it.bthen.misc) if it.bthen.kind_of?(C::Goto) and it.bthen.target == l.name
835
844
  }
836
845
  end
837
- ary[ssi] = C::While.new(ss.test, ss.bthen)
846
+ ary[ssi] = C::While.new(ss.test, ss.bthen).with_misc(ss.misc)
838
847
  elsif ss.bthen.statements.last.kind_of?(C::Return) and gi = ((si+1)..ary.length).to_a.reverse.find { |_si| ary[_si].kind_of?(C::Goto) and ary[_si].target == s.name }
839
848
  # l: if (a) { b; return; } c; goto l; => while (!a) { c; } b; return;
840
849
  wb = C::Block.new(scope)
841
850
  wb.statements = decompile_cseq_while(ary[ssi+1...gi], wb)
842
- w = C::While.new(C::CExpression.negate(ss.test), wb)
851
+ w = C::While.new(C::CExpression.negate(ss.test), wb).with_misc(ss)
843
852
  ary[ssi..gi] = [w, *ss.bthen.statements]
844
853
  finished = false ; break #retry
845
854
  end
@@ -848,7 +857,7 @@ class Decompiler
848
857
  # l: a; goto l; => while(1) { a; }
849
858
  wb = C::Block.new(scope)
850
859
  wb.statements = decompile_cseq_while(ary[si...gi], wb)
851
- w = C::While.new(C::CExpression[1], wb)
860
+ w = C::While.new(C::CExpression[1], wb).with_misc(ary[gi].misc)
852
861
  ary[si..gi] = [w]
853
862
  finished = false ; break #retry
854
863
  end
@@ -861,10 +870,10 @@ class Decompiler
861
870
  if g.bthen.kind_of?(C::Block) and g.bthen.statements.length > 1
862
871
  nary = ary[si...gi] + [C::If.new(C::CExpression.negate(g.test), C::Break.new)] + g.bthen.statements[0...-1]
863
872
  wb.statements = decompile_cseq_while(nary, wb)
864
- w = C::DoWhile.new(C::CExpression[1], wb)
873
+ w = C::DoWhile.new(C::CExpression[1], wb).with_misc(g.misc)
865
874
  else
866
875
  wb.statements = decompile_cseq_while(ary[si...gi], wb)
867
- w = C::DoWhile.new(g.test, wb)
876
+ w = C::DoWhile.new(g.test, wb).with_misc(g.misc)
868
877
  end
869
878
  ary[si..gi] = [w]
870
879
  finished = false ; break #retry
@@ -1001,10 +1010,18 @@ class Decompiler
1001
1010
  vars = scope.symbol.values.sort_by { |v| walk_ce(funcalls) { |ce| break true if ce.rexpr == v } ? 0 : 1 }
1002
1011
 
1003
1012
  # find the domains of var aliases
1004
- vars.each { |var| unalias_var(var, scope, g) }
1013
+ vars.each { |var|
1014
+ if unalias_var(var, scope, g)
1015
+ if not var.stackoff or var.stackoff > 0 # dont allow local vars as args
1016
+ func.type.args << var unless func.type.args.find { |aa| aa.name == var.name }
1017
+ scope.statements.delete_if { |sm| sm.kind_of?(C::Declaration) and sm.var.name == var.name }
1018
+ end
1019
+ end
1020
+ }
1005
1021
  end
1006
1022
 
1007
1023
  # duplicates a var per domain value
1024
+ # return var if used before being set (eg func arg)
1008
1025
  def unalias_var(var, scope, g = c_to_graph(scope))
1009
1026
  # [label, index] of references to var (reading it, writing it, ro/wo it (eg eax = *eax => eax_0 = *eax_1))
1010
1027
  read = {}
@@ -1023,8 +1040,8 @@ class Decompiler
1023
1040
  g_exprs.each { |label, exprs|
1024
1041
  exprs.each_with_index { |ce, i|
1025
1042
  if ce_read(ce, var)
1026
- if (ce.op == :'=' and isvar(ce.lexpr, var) and not ce_write(ce.rexpr, var)) or
1027
- (ce.op == :funcall and r and not ce_write(ce.lexpr, var) and not ce_write(ce.rexpr, var) and @dasm.cpu.abi_funcall[:changed].include?(r.to_sym))
1043
+ if (ce.kind_of?(C::CExpression) and ce.op == :'=' and isvar(ce.lexpr, var) and not ce_write(ce.rexpr, var)) or
1044
+ (ce.kind_of?(C::CExpression) and ce.op == :funcall and r and not ce_write(ce.lexpr, var) and not ce_write(ce.rexpr, var) and @dasm.cpu.abi_funcall[:changed].include?(r.to_sym))
1028
1045
  (ro[label] ||= []) << i
1029
1046
  (wo[label] ||= []) << i
1030
1047
  unchecked << [label, i, :up] << [label, i, :down]
@@ -1109,6 +1126,8 @@ class Decompiler
1109
1126
  end
1110
1127
  }
1111
1128
 
1129
+ reach_func_top = false
1130
+ n_i = 0
1112
1131
  # check it out
1113
1132
  while o = unchecked.shift
1114
1133
  dom = []
@@ -1147,15 +1166,17 @@ class Decompiler
1147
1166
 
1148
1167
  unchecked -= dom + dom_wo + dom_ro
1149
1168
 
1150
- next if func_top
1169
+ if func_top
1170
+ reach_func_top = true
1171
+ next
1172
+ end
1151
1173
 
1152
1174
  # patch
1153
- n_i = 0
1154
1175
  n_i += 1 while scope.symbol_ancestors[newvarname = "#{var.name}_a#{n_i}"]
1155
1176
 
1156
1177
  nv = var.dup
1157
1178
  nv.misc = var.misc ? var.misc.dup : {}
1158
- nv.storage = :register if nv.has_attribute_var('register')
1179
+ #nv.storage = :register if nv.has_attribute_var('register')
1159
1180
  nv.attributes = nv.attributes.dup if nv.attributes
1160
1181
  nv.name = newvarname
1161
1182
  nv.misc[:unalias_name] = newvarname
@@ -1191,6 +1212,8 @@ class Decompiler
1191
1212
  nv.add_attribute('out')
1192
1213
  end
1193
1214
  end
1215
+
1216
+ reach_func_top
1194
1217
  end
1195
1218
 
1196
1219
  # revert the unaliasing namechange of vars where no alias subsists
@@ -1433,7 +1456,7 @@ class Decompiler
1433
1456
  f = f.pointed if f.pointer?
1434
1457
  next if not f.kind_of?(C::Function)
1435
1458
  # cast func args to arg prototypes
1436
- f.args.to_a.zip(ce.rexpr).each_with_index { |(proto, arg), i| ce.rexpr[i] = C::CExpression[arg, proto.type] ; known_type[arg, proto.type] }
1459
+ f.args.to_a.zip(ce.rexpr).each_with_index { |(proto, arg), i| if arg ; ce.rexpr[i] = C::CExpression[arg, proto.type] ; known_type[arg, proto.type] ; end }
1437
1460
  elsif ce.op == :* and not ce.lexpr
1438
1461
  if e = ce.rexpr and e.kind_of?(C::CExpression) and not e.op and e = e.rexpr and e.kind_of?(C::CExpression) and
1439
1462
  e.op == :& and not e.lexpr and e.rexpr.kind_of?(C::Variable) and e.rexpr.stackoff
@@ -1582,7 +1605,7 @@ class Decompiler
1582
1605
  walk_ce(scope) { |ce|
1583
1606
  count_refs[ce.lexpr.name] += 1 if ce.lexpr.kind_of?(C::Variable)
1584
1607
  count_refs[ce.rexpr.name] += 1 if ce.rexpr.kind_of?(C::Variable)
1585
- if is_cast[ce] and ce.rexpr.rexpr.kind_of?(C::Variable)
1608
+ if is_cast[ce] and ce.type.pointer? and ce.rexpr.rexpr.kind_of?(C::Variable)
1586
1609
  (uses[ce.rexpr.rexpr.name] ||= []) << ce.type.pointed
1587
1610
  end
1588
1611
  }
@@ -2039,6 +2062,7 @@ class Decompiler
2039
2062
  when C::CExpression
2040
2063
  @exprs[l_cur] = [stmt]
2041
2064
  @to[l_cur] = [l_after]
2065
+ @to[l_cur] = [] if stmt.op == :funcall and stmt.lexpr.has_attribute('noreturn')
2042
2066
  when C::Return
2043
2067
  @exprs[l_cur] = [stmt.value] if stmt.value
2044
2068
  @to[l_cur] = []
@@ -2884,7 +2908,7 @@ class Decompiler
2884
2908
  rename = lambda { |var, name|
2885
2909
  var = var.rexpr if var.kind_of?(C::CExpression) and not var.op
2886
2910
  next if not var.kind_of?(C::Variable) or not scope.symbol[var.name] or not name
2887
- next if (var.name !~ /^(var|arg)_/ and not var.storage == :register) or not scope.symbol[var.name] or name =~ /^(var|arg)_/
2911
+ next if (var.name !~ /^(var|arg)_/ and not var.has_attribute_var('register')) or not scope.symbol[var.name] or name =~ /^(var|arg)_/
2888
2912
  s = scope.symbol_ancestors
2889
2913
  n = name
2890
2914
  i = 0
@@ -1220,6 +1220,7 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
1220
1220
  # it is if all its end blocks are calls to noreturn functions
1221
1221
  # if it is, create a @function[fa] with noreturn = true
1222
1222
  # should only be called with fa = target of a call
1223
+ # populates function[fa].return_address
1223
1224
  def check_noreturn_function(fa)
1224
1225
  fb = function_blocks(fa, false, false)
1225
1226
  return if fb.empty?
@@ -1234,6 +1235,14 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
1234
1235
  # yay
1235
1236
  @function[fa] ||= DecodedFunction.new
1236
1237
  @function[fa].noreturn = true
1238
+ elsif @function[fa]
1239
+ lasts.each { |la|
1240
+ di = block_at(la).list.last
1241
+ if di.opcode.props[:stopexec] and di.opcode.props[:setip]
1242
+ (@function[fa].return_address ||= []) << di.address
1243
+ end
1244
+ }
1245
+ false
1237
1246
  end
1238
1247
  end
1239
1248
 
@@ -1520,7 +1529,7 @@ puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace
1520
1529
  # :log => Array, will be updated with the backtrace evolution
1521
1530
  # :only_upto => backtrace only to update bt_for for current block & previous ending at only_upto
1522
1531
  # :no_check => don't use backtrace_check_found (will not backtrace indirection static values)
1523
- # :terminals => array of symbols with constant value (stop backtracking if all symbols in the expr are terminals) (only supported with no_check)
1532
+ # :terminals => array of symbols with constant value (stop backtracking if all symbols in the expr are terminals)
1524
1533
  # :cpu_context => disassembler cpu_context
1525
1534
  def backtrace(expr, start_addr, nargs={})
1526
1535
  include_start = nargs.delete :include_start
@@ -1559,7 +1568,7 @@ puts " not backtracking stack address #{expr}" if debug_backtrace
1559
1568
  end
1560
1569
 
1561
1570
  if vals = (no_check ? (!need_backtrace(expr, terminals) and [expr]) : backtrace_check_found(expr,
1562
- di, origin, type, len, maxdepth, detached, cpu_context, snapshot_addr))
1571
+ di, origin, type, len, maxdepth, detached, cpu_context, snapshot_addr, terminals))
1563
1572
  # no need to update backtracked_for
1564
1573
  return vals
1565
1574
  elsif maxdepth <= 0
@@ -1599,7 +1608,7 @@ puts " backtrace up #{Expression[h[:addr]]} #{oldexpr}#{" => #{expr}" if expr
1599
1608
  if expr != oldexpr and not snapshot_addr and vals = (no_check ?
1600
1609
  (!need_backtrace(expr, terminals) and [expr]) :
1601
1610
  backtrace_check_found(expr, nil, origin, type, len,
1602
- maxdepth-h[:loopdetect].length, detached, cpu_context, snapshot_addr))
1611
+ maxdepth-h[:loopdetect].length, detached, cpu_context, snapshot_addr, terminals))
1603
1612
  result |= vals
1604
1613
  next
1605
1614
  end
@@ -1641,7 +1650,7 @@ puts " backtrace up #{Expression[h[:from]]}->#{Expression[h[:to]]} #{oldexpr}#
1641
1650
 
1642
1651
  if expr != oldexpr and vals = (no_check ? (!need_backtrace(expr, terminals) and [expr]) :
1643
1652
  backtrace_check_found(expr, @decoded[h[:from]], origin, type, len,
1644
- maxdepth-h[:loopdetect].length, detached, cpu_context, snapshot_addr))
1653
+ maxdepth-h[:loopdetect].length, detached, cpu_context, snapshot_addr, terminals))
1645
1654
  if snapshot_addr
1646
1655
  expr = StoppedExpr.new vals
1647
1656
  next expr
@@ -1710,7 +1719,7 @@ puts " backtrace: recursive function #{Expression[h[:funcaddr]]}" if debug_back
1710
1719
  end
1711
1720
  puts " backtrace #{h[:di] || Expression[h[:funcaddr]]} #{oldexpr} => #{expr}" if debug_backtrace and expr != oldexpr
1712
1721
  if vals = (no_check ? (!need_backtrace(expr, terminals) and [expr]) : backtrace_check_found(expr,
1713
- h[:di], origin, type, len, maxdepth-h[:loopdetect].length, detached, cpu_context, snapshot_addr))
1722
+ h[:di], origin, type, len, maxdepth-h[:loopdetect].length, detached, cpu_context, snapshot_addr, terminals))
1714
1723
  if snapshot_addr
1715
1724
  expr = StoppedExpr.new vals
1716
1725
  else
@@ -1827,7 +1836,7 @@ puts " backtrace addrs_todo << #{Expression[retaddr]} from #{di} (funcret)" if
1827
1836
  # TODO trace expr evolution through backtrace, to modify immediates to an expr involving label names
1828
1837
  # TODO mov [ptr], imm ; <...> ; jmp [ptr] => rename imm as loc_XX
1829
1838
  # eg. mov eax, 42 ; add eax, 4 ; jmp eax => mov eax, some_label-4
1830
- def backtrace_check_found(expr, di, origin, type, len, maxdepth, detached, cpu_context, snapshot_addr=nil)
1839
+ def backtrace_check_found(expr, di, origin, type, len, maxdepth, detached, cpu_context, snapshot_addr=nil, terminals=[])
1831
1840
  # only entrypoints or block starts called by a :saveip are checked for being a function
1832
1841
  # want to execute [esp] from a block start
1833
1842
  if type == :x and di and di == di.block.list.first and @cpu.backtrace_is_function_return(expr, @decoded[origin]) and (
@@ -1856,13 +1865,13 @@ puts " backtrace addrs_todo << #{Expression[retaddr]} from #{di} (funcret)" if
1856
1865
  f.backtracked_for |= @decoded[addr].block.backtracked_for.find_all { |btt| not btt.address }
1857
1866
  end
1858
1867
 
1859
- return if need_backtrace(expr)
1868
+ return if need_backtrace(expr, terminals)
1860
1869
  if snapshot_addr
1861
1870
  return if expr.expr_externals(true).find { |ee| ee.kind_of?(Indirection) }
1862
1871
  end
1863
1872
 
1864
1873
  puts "backtrace #{type} found #{expr} from #{di} orig #{@decoded[origin] || Expression[origin] if origin}" if debug_backtrace
1865
- result = backtrace_value(expr, maxdepth)
1874
+ result = backtrace_value(expr, maxdepth, terminals)
1866
1875
  # keep the ori pointer in the results to emulate volatile memory (eg decompiler prefers this)
1867
1876
  #result << expr if not type # XXX returning multiple values for nothing is too confusing, TODO fix decompiler
1868
1877
  result.uniq!
@@ -1876,14 +1885,14 @@ puts "backtrace #{type} found #{expr} from #{di} orig #{@decoded[origin] || Expr
1876
1885
  end
1877
1886
 
1878
1887
  # returns an array of expressions with Indirections resolved (recursive with backtrace_indirection)
1879
- def backtrace_value(expr, maxdepth)
1888
+ def backtrace_value(expr, maxdepth, terminals=[])
1880
1889
  # array of expression with all indirections resolved
1881
1890
  result = [Expression[expr.reduce]]
1882
1891
 
1883
1892
  # solve each indirection sequentially, clone expr for each value (aka cross-product)
1884
1893
  result.first.expr_indirections.uniq.each { |i|
1885
1894
  next_result = []
1886
- backtrace_indirection(i, maxdepth).each { |rr|
1895
+ backtrace_indirection(i, maxdepth, terminals).each { |rr|
1887
1896
  next_result |= result.map { |e| Expression[e.bind(i => rr).reduce] }
1888
1897
  }
1889
1898
  result = next_result
@@ -1897,7 +1906,7 @@ puts "backtrace #{type} found #{expr} from #{di} orig #{@decoded[origin] || Expr
1897
1906
  # then backtraces from ind.origin until it finds an :w xref origin
1898
1907
  # if no :w access is found, returns the value encoded in the raw section data
1899
1908
  # TODO handle unaligned (partial?) writes
1900
- def backtrace_indirection(ind, maxdepth)
1909
+ def backtrace_indirection(ind, maxdepth, terminals=[])
1901
1910
  if not ind.origin
1902
1911
  puts "backtrace_ind: no origin for #{ind}" if $VERBOSE
1903
1912
  return [ind]
@@ -1915,7 +1924,7 @@ puts "backtrace #{type} found #{expr} from #{di} orig #{@decoded[origin] || Expr
1915
1924
  }
1916
1925
 
1917
1926
  # resolve pointers (they may include Indirections)
1918
- backtrace_value(ind.target, maxdepth).each { |ptr|
1927
+ backtrace_value(ind.target, maxdepth, terminals).each { |ptr|
1919
1928
  # find write xrefs to the ptr
1920
1929
  refs = []
1921
1930
  each_xref(ptr, :w) { |x|
@@ -1945,7 +1954,7 @@ puts "backtrace #{type} found #{expr} from #{di} orig #{@decoded[origin] || Expr
1945
1954
  puts " backtrace_indirection for #{ind.target} failed: #{ev}" if debug_backtrace
1946
1955
  ret |= [Expression::Unknown]
1947
1956
  when :end
1948
- if not refs.empty? and (expr == true or not need_backtrace(expr))
1957
+ if not refs.empty? and (expr == true or not need_backtrace(expr, terminals))
1949
1958
  if expr == true
1950
1959
  # found a path avoiding the :w xrefs, read the encoded initial value
1951
1960
  ret |= [decode_imm[ptr, ind.len]]
@@ -1975,7 +1984,7 @@ puts " backtrace_indirection for #{ind.target} failed: #{ev}" if debug_backtra
1975
1984
  # may have new indirections... recall bt_value ?
1976
1985
  #if not need_backtrace(expr)
1977
1986
  if expr.expr_externals.all? { |e| @prog_binding[e] or @function[normalize(e)] } and expr.expr_indirections.empty?
1978
- ret |= backtrace_value(expr, maxdepth-1-h[:loopdetect].length)
1987
+ ret |= backtrace_value(expr, maxdepth-1-h[:loopdetect].length, terminals)
1979
1988
  false
1980
1989
  else
1981
1990
  expr
@@ -1985,7 +1994,7 @@ puts " backtrace_indirection for #{ind.target} failed: #{ev}" if debug_backtra
1985
1994
  expr = backtrace_emu_subfunc(h[:func], h[:funcaddr], h[:addr], expr, ind.origin, maxdepth-h[:loopdetect].length)
1986
1995
  #if not need_backtrace(expr)
1987
1996
  if expr.expr_externals.all? { |e| @prog_binding[e] or @function[normalize(e)] } and expr.expr_indirections.empty?
1988
- ret |= backtrace_value(expr, maxdepth-1-h[:loopdetect].length)
1997
+ ret |= backtrace_value(expr, maxdepth-1-h[:loopdetect].length, terminals)
1989
1998
  false
1990
1999
  else
1991
2000
  expr
data/metasm/dynldr.rb CHANGED
@@ -57,6 +57,12 @@ extern VALUE *rb_eArgError __attribute__((import));
57
57
  #define DYNLDR_RUBY_19 #{RUBY_VERSION >= '1.9' ? 1 : 0}
58
58
  #endif
59
59
 
60
+ // Ruby 3.2+ changed the RString/RArray struct layout (Variable Width Allocation).
61
+ // len is now a direct field, and embedded string data starts after len (at the ptr offset).
62
+ #ifndef DYNLDR_RUBY_32
63
+ #define DYNLDR_RUBY_32 #{RUBY_VERSION >= '3.2' ? 1 : 0}
64
+ #endif
65
+
60
66
  #if #{RUBY_VERSION >= '2.0' ? 1 : 0}
61
67
  // flonums. WHY?
62
68
  // also breaks Qtrue/Qnil
@@ -69,11 +75,24 @@ extern VALUE *rb_eArgError __attribute__((import));
69
75
  #define T_FIXNUM 0x15
70
76
  #define T_MASK 0x1f
71
77
  #define RSTRING_NOEMBED (1<<13)
72
- #define STR_PTR(o) ((RString(o)->flags & RSTRING_NOEMBED) ? RString(o)->ptr : (char*)&RString(o)->len)
73
- #define STR_LEN(o) ((RString(o)->flags & RSTRING_NOEMBED) ? RString(o)->len : (RString(o)->flags >> 14) & 0x1f)
78
+ #if DYNLDR_RUBY_32
79
+ // Ruby 3.2+: len is always a direct field; embedded string data starts at the ptr field offset
80
+ #define STR_PTR(o) ((RString(o)->flags & RSTRING_NOEMBED) ? RString(o)->ptr : (char*)&RString(o)->ptr)
81
+ #define STR_LEN(o) (RString(o)->len)
82
+ #else
83
+ // Ruby 1.9 - 3.1: embedded string data starts at the len field offset, length encoded in flags
84
+ #define STR_PTR(o) ((RString(o)->flags & RSTRING_NOEMBED) ? RString(o)->ptr : (char*)&RString(o)->len)
85
+ #define STR_LEN(o) ((RString(o)->flags & RSTRING_NOEMBED) ? RString(o)->len : (RString(o)->flags >> 14) & 0x1f)
86
+ #endif
74
87
  #define RARRAY_EMBED (1<<13)
75
- #define ARY_PTR(o) ((RArray(o)->flags & RARRAY_EMBED) ? (VALUE*)&RArray(o)->len : RArray(o)->ptr)
76
- #define ARY_LEN(o) ((RArray(o)->flags & RARRAY_EMBED) ? ((RArray(o)->flags >> 15) & 3) : RArray(o)->len)
88
+ #if DYNLDR_RUBY_32
89
+ // Ruby 3.2+: embedded array data starts at the len field offset (unchanged), but length uses more bits
90
+ #define ARY_PTR(o) ((RArray(o)->flags & RARRAY_EMBED) ? (VALUE*)&RArray(o)->len : RArray(o)->ptr)
91
+ #define ARY_LEN(o) ((RArray(o)->flags & RARRAY_EMBED) ? ((RArray(o)->flags >> 15) & 0x1f) : RArray(o)->len)
92
+ #else
93
+ #define ARY_PTR(o) ((RArray(o)->flags & RARRAY_EMBED) ? (VALUE*)&RArray(o)->len : RArray(o)->ptr)
94
+ #define ARY_LEN(o) ((RArray(o)->flags & RARRAY_EMBED) ? ((RArray(o)->flags >> 15) & 3) : RArray(o)->len)
95
+ #endif
77
96
  #else
78
97
  #define T_STRING 0x07
79
98
  #define T_ARRAY 0x09
data/metasm/encode.rb CHANGED
@@ -290,6 +290,17 @@ class Expression
290
290
  end
291
291
  end
292
292
 
293
+ def encode_leb(signed=false)
294
+ v = reduce
295
+ raise "need numeric value for #{self}" if not v.kind_of?(::Integer)
296
+ out = EncodedData.new
297
+ while v > 0x7f or v < -0x40 or (signed and v > 0x3f)
298
+ out << [0x80 | (v&0x7f)].pack('C*')
299
+ v >>= 7
300
+ end
301
+ out << [v & 0x7f].pack('C*')
302
+ end
303
+
293
304
  class << self
294
305
  def encode_imm(val, type, endianness, backtrace=nil)
295
306
  type = INT_SIZE.keys.find { |k| k.to_s[0] == ?a and INT_SIZE[k] == 8*type } if type.kind_of? ::Integer