metasm 1.0.3 → 1.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (114) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +3 -0
  3. data.tar.gz.sig +0 -0
  4. data/Gemfile +3 -2
  5. data/metasm.gemspec +3 -2
  6. data/metasm.rb +4 -1
  7. data/metasm/compile_c.rb +2 -2
  8. data/metasm/cpu/arc/decode.rb +0 -21
  9. data/metasm/cpu/arc/main.rb +4 -4
  10. data/metasm/cpu/arm/decode.rb +1 -5
  11. data/metasm/cpu/arm/main.rb +3 -3
  12. data/metasm/cpu/arm64/decode.rb +2 -6
  13. data/metasm/cpu/arm64/main.rb +5 -5
  14. data/metasm/cpu/bpf/decode.rb +3 -35
  15. data/metasm/cpu/bpf/main.rb +5 -5
  16. data/metasm/cpu/bpf/render.rb +1 -12
  17. data/metasm/cpu/cy16/decode.rb +0 -6
  18. data/metasm/cpu/cy16/main.rb +3 -3
  19. data/metasm/cpu/cy16/render.rb +0 -11
  20. data/metasm/cpu/dalvik/decode.rb +4 -26
  21. data/metasm/cpu/dalvik/main.rb +20 -2
  22. data/metasm/cpu/dalvik/opcodes.rb +3 -2
  23. data/metasm/cpu/{mips/compile_c.rb → ebpf.rb} +5 -2
  24. data/metasm/cpu/ebpf/debug.rb +61 -0
  25. data/metasm/cpu/ebpf/decode.rb +142 -0
  26. data/metasm/cpu/ebpf/main.rb +58 -0
  27. data/metasm/cpu/ebpf/opcodes.rb +97 -0
  28. data/metasm/cpu/ebpf/render.rb +36 -0
  29. data/metasm/cpu/ia32/debug.rb +39 -1
  30. data/metasm/cpu/ia32/decode.rb +111 -90
  31. data/metasm/cpu/ia32/decompile.rb +45 -37
  32. data/metasm/cpu/ia32/main.rb +10 -0
  33. data/metasm/cpu/ia32/parse.rb +6 -0
  34. data/metasm/cpu/mcs51/decode.rb +1 -1
  35. data/metasm/cpu/mcs51/main.rb +11 -0
  36. data/metasm/cpu/mips/decode.rb +8 -18
  37. data/metasm/cpu/mips/main.rb +3 -3
  38. data/metasm/cpu/mips/opcodes.rb +1 -1
  39. data/metasm/cpu/msp430/decode.rb +2 -6
  40. data/metasm/cpu/msp430/main.rb +3 -3
  41. data/metasm/cpu/openrisc.rb +11 -0
  42. data/metasm/cpu/openrisc/debug.rb +106 -0
  43. data/metasm/cpu/openrisc/decode.rb +182 -0
  44. data/metasm/cpu/openrisc/decompile.rb +350 -0
  45. data/metasm/cpu/openrisc/main.rb +70 -0
  46. data/metasm/cpu/openrisc/opcodes.rb +109 -0
  47. data/metasm/cpu/openrisc/render.rb +37 -0
  48. data/metasm/cpu/ppc/decode.rb +0 -25
  49. data/metasm/cpu/ppc/main.rb +6 -6
  50. data/metasm/cpu/ppc/opcodes.rb +3 -4
  51. data/metasm/cpu/python/decode.rb +0 -20
  52. data/metasm/cpu/python/main.rb +1 -1
  53. data/metasm/cpu/sh4/decode.rb +2 -6
  54. data/metasm/cpu/sh4/main.rb +25 -23
  55. data/metasm/cpu/st20/decode.rb +0 -7
  56. data/metasm/cpu/webasm.rb +11 -0
  57. data/metasm/cpu/webasm/debug.rb +31 -0
  58. data/metasm/cpu/webasm/decode.rb +321 -0
  59. data/metasm/cpu/webasm/decompile.rb +386 -0
  60. data/metasm/cpu/webasm/encode.rb +104 -0
  61. data/metasm/cpu/webasm/main.rb +81 -0
  62. data/metasm/cpu/webasm/opcodes.rb +214 -0
  63. data/metasm/cpu/x86_64/compile_c.rb +13 -9
  64. data/metasm/cpu/x86_64/parse.rb +1 -1
  65. data/metasm/cpu/z80/decode.rb +0 -27
  66. data/metasm/cpu/z80/main.rb +3 -3
  67. data/metasm/cpu/z80/render.rb +0 -11
  68. data/metasm/debug.rb +43 -8
  69. data/metasm/decode.rb +62 -14
  70. data/metasm/decompile.rb +793 -466
  71. data/metasm/disassemble.rb +188 -131
  72. data/metasm/disassemble_api.rb +30 -17
  73. data/metasm/dynldr.rb +2 -2
  74. data/metasm/encode.rb +8 -2
  75. data/metasm/exe_format/autoexe.rb +2 -0
  76. data/metasm/exe_format/coff.rb +21 -3
  77. data/metasm/exe_format/coff_decode.rb +12 -0
  78. data/metasm/exe_format/coff_encode.rb +6 -3
  79. data/metasm/exe_format/dex.rb +13 -3
  80. data/metasm/exe_format/elf.rb +12 -2
  81. data/metasm/exe_format/elf_decode.rb +59 -1
  82. data/metasm/exe_format/main.rb +2 -0
  83. data/metasm/exe_format/mz.rb +1 -0
  84. data/metasm/exe_format/pe.rb +25 -3
  85. data/metasm/exe_format/wasm.rb +402 -0
  86. data/metasm/gui/dasm_decomp.rb +171 -95
  87. data/metasm/gui/dasm_graph.rb +61 -2
  88. data/metasm/gui/dasm_hex.rb +2 -2
  89. data/metasm/gui/dasm_main.rb +45 -19
  90. data/metasm/gui/debug.rb +13 -4
  91. data/metasm/gui/gtk.rb +12 -4
  92. data/metasm/main.rb +108 -103
  93. data/metasm/os/emulator.rb +175 -0
  94. data/metasm/os/main.rb +11 -6
  95. data/metasm/parse.rb +23 -12
  96. data/metasm/parse_c.rb +189 -135
  97. data/metasm/preprocessor.rb +16 -1
  98. data/misc/openrisc-parser.rb +79 -0
  99. data/samples/dasm-plugins/scanxrefs.rb +6 -4
  100. data/samples/dasm-plugins/selfmodify.rb +8 -8
  101. data/samples/dbg-plugins/trace_func.rb +1 -1
  102. data/samples/disassemble-gui.rb +14 -3
  103. data/samples/emubios.rb +251 -0
  104. data/samples/emudbg.rb +127 -0
  105. data/samples/lindebug.rb +79 -78
  106. data/samples/metasm-shell.rb +8 -8
  107. data/tests/all.rb +1 -1
  108. data/tests/expression.rb +2 -0
  109. data/tests/graph_layout.rb +1 -1
  110. data/tests/ia32.rb +1 -0
  111. data/tests/mips.rb +1 -1
  112. data/tests/preprocessor.rb +18 -0
  113. metadata +124 -6
  114. metadata.gz.sig +0 -0
@@ -0,0 +1,36 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/cpu/ebpf/opcodes'
8
+ require 'metasm/render'
9
+
10
+ module Metasm
11
+ class EBPF
12
+ class Reg
13
+ include Renderable
14
+ def render ; ["r#@v"] end
15
+ end
16
+
17
+ class Memref
18
+ include Renderable
19
+ def render
20
+ r = []
21
+ r << { 1 => 'byte ', 2 => 'word ', 4 => 'dword ', 8 => 'qword ' }[@msz]
22
+ r << '['
23
+ r << @base if @base
24
+ r << '+' if @base and @offset
25
+ r << @offset if @offset
26
+ r << ']'
27
+ end
28
+ end
29
+
30
+ class Pktref
31
+ def render
32
+ ['pkt '] + super
33
+ end
34
+ end
35
+ end
36
+ end
@@ -66,9 +66,11 @@ class Ia32
66
66
  end
67
67
  end
68
68
 
69
+ DBG_BPX = "\xcc"
70
+ DBG_BPX.force_encoding('BINARY') if DBG_BPX.respond_to?(:force_encoding)
69
71
  def dbg_enable_bpx(dbg, bp)
70
72
  bp.internal[:previous] ||= dbg.memory[bp.address, 1]
71
- dbg.memory[bp.address, 1] = "\xcc"
73
+ dbg.memory[bp.address, 1] = DBG_BPX
72
74
  end
73
75
 
74
76
  def dbg_disable_bpx(dbg, bp)
@@ -189,5 +191,41 @@ class Ia32
189
191
  def dbg_func_arg_set(dbg, argnr, arg)
190
192
  dbg.memory_write_int(Expression[:esp, :+, 4*(argnr+1)], arg)
191
193
  end
194
+
195
+ def dbg_resolve_pc(di, fbd, pc_reg, dbg_ctx)
196
+ a = di.instruction.args.map { |aa| symbolic(aa) }
197
+
198
+ cond = case di.opcode.name
199
+ when 'jz', 'je'; dbg_ctx.get_flag(:z)
200
+ when 'jnz', 'jne'; !dbg_ctx.get_flag(:z)
201
+ when 'jo'; dbg_ctx.get_flag(:o)
202
+ when 'jno'; !dbg_ctx.get_flag(:o)
203
+ when 'js'; dbg_ctx.get_flag(:s)
204
+ when 'jns'; !dbg_ctx.get_flag(:s)
205
+ when 'jc', 'jb', 'jnae'; dbg_ctx.get_flag(:c)
206
+ when 'jnc', 'jnb', 'jae'; !dbg_ctx.get_flag(:c)
207
+ when 'jbe', 'jna'; dbg_ctx.get_flag(:c) or dbg_ctx.get_flag(:z)
208
+ when 'jnbe', 'ja'; !dbg_ctx.get_flag(:c) and !dbg_ctx.get_flag(:z)
209
+ when 'jl', 'jnge'; dbg_ctx.get_flag(:s) != dbg_ctx.get_flag(:o)
210
+ when 'jnl', 'jge'; dbg_ctx.get_flag(:s) == dbg_ctx.get_flag(:o)
211
+ when 'jle', 'jng'; dbg_ctx.get_flag(:z) or dbg_ctx.get_flag(:s) != dbg_ctx.get_flag(:o)
212
+ when 'jnle', 'jg'; !dbg_ctx.get_flag(:z) and dbg_ctx.get_flag(:s) == dbg_ctx.get_flag(:o)
213
+ when 'jp', 'jpe'; dbg_ctx.get_flag(:p)
214
+ when 'jnp', 'jpo'; !dbg_ctx.get_flag(:p)
215
+ when 'loop'; dbg_ctx[dbg_register_list[2]] != 0
216
+ when 'loopz', 'loope'; dbg_ctx[dbg_register_list[2]] != 0 and dbg_ctx.get_flag(:z)
217
+ when 'loopnz', 'loopne'; dbg_ctx[dbg_register_list[2]] != 0 and !dbg_ctx.get_flag(:z)
218
+ when 'jcxz', 'jecxz', 'jrcxz'
219
+ mask = {?c => 0xffff, ?e => 0xffff_ffff, ?r => -1}[di.opcode.name[1]]
220
+ dbg_ctx[dbg_register_list[2]] & mask == 0
221
+ else return super(di, fbd, pc_reg, dbg_ctx)
222
+ end
223
+
224
+ if cond
225
+ fbd[pc_reg] = a.last
226
+ else
227
+ fbd[pc_reg] = di.next_addr
228
+ end
229
+ end
192
230
  end
193
231
  end
@@ -323,12 +323,6 @@ class Ia32
323
323
  end
324
324
  end
325
325
 
326
- # hash opcode_name => lambda { |dasm, di, *symbolic_args| instr_binding }
327
- def backtrace_binding
328
- @backtrace_binding ||= init_backtrace_binding
329
- end
330
- def backtrace_binding=(b) @backtrace_binding = b end
331
-
332
326
  def opsz(di, op=nil)
333
327
  if di and di.instruction.prefix and di.instruction.prefix[:opsz] and (op || di.opcode).props[:needpfx] != 0x66; 48-@size
334
328
  else @size
@@ -443,7 +437,7 @@ class Ia32
443
437
  lambda { |di| bt = lambda { |pos| Expression[[Indirection[esp, opsz(di)/8, di.address], :>>, pos], :&, 1] }
444
438
  { esp => Expression[esp, :+, opsz(di)/8], :eflag_c => bt[0], :eflag_z => bt[6], :eflag_s => bt[7], :eflag_o => bt[11] } }
445
439
  when 'sahf'
446
- lambda { |di| bt = lambda { |pos| Expression[[eax, :>>, pos], :&, 1] }
440
+ lambda { |di| bt = lambda { |pos| Expression[[eax, :>>, 8+pos], :&, 1] }
447
441
  { :eflag_c => bt[0], :eflag_z => bt[6], :eflag_s => bt[7] } }
448
442
  when 'lahf'
449
443
  lambda { |di|
@@ -452,7 +446,7 @@ class Ia32
452
446
  bts[0, :eflag_c] #bts[2, :eflag_p] #bts[4, :eflag_a]
453
447
  bts[6, :eflag_z]
454
448
  bts[7, :eflag_s]
455
- { eax => efl }
449
+ { Expression[[eax, :>>, 8], :&, 0xff] => efl }
456
450
  }
457
451
  when 'pushad'
458
452
  lambda { |di|
@@ -663,6 +657,24 @@ class Ia32
663
657
  { a0 => 0 }
664
658
  end
665
659
  }
660
+ when 'movdqa', 'movdqu', 'movaps', 'movups'; lambda { |di, a0, a1| { a0 => Expression[a1] } }
661
+ when 'cmpxchg'; lambda { |di, a0, a1| # eax == a0 ? a0 <= a1, zf <= 1 : eax <= a0, zf <= 0
662
+ eax_ = self.class::Reg.new(0, opsz(di)).symbolic
663
+ cmp = Expression[eax_, :==, a0]
664
+ { :eflag_z => cmp,
665
+ eax_ => Expression[[cmp, :*, eax_], :|, [[1, :-, cmp], :*, a0]],
666
+ a0 => Expression[[cmp, :*, a1], :|, [[1, :-, cmp], :*, a0]] } }
667
+ when 'cmpxchg8b', 'cmpxchg16b'; lambda { |di, a0| # edx:eax == mem ? mem <= ecx:ebx, zf <= 1 : edx:eax <= mem, zf <= 0
668
+ sz = (di.opcode.name =~ /8b/ ? 32 : 64)
669
+ eax_ = self.class::Reg.new(0, sz).symbolic
670
+ ecx_ = self.class::Reg.new(1, sz).symbolic
671
+ edx_ = self.class::Reg.new(2, sz).symbolic
672
+ ebx_ = self.class::Reg.new(3, sz).symbolic
673
+ cmp = Expression[[[edx_, :<<, sz], :|, eax_], :==, a0]
674
+ { :eflag_z => cmp,
675
+ eax_ => Expression[[cmp, :*, eax_], :|, [[1, :-, cmp], :*, [a0, :&, (1 << sz) - 1]]],
676
+ edx_ => Expression[[cmp, :*, edx_], :|, [[1, :-, cmp], :*, [a0, :>>, sz]]],
677
+ a0 => Expression[[cmp, :*, [[ecx_, :<<, sz], :|, ebx_]], :|, [[1, :-, cmp], :*, a0]] } }
666
678
  when 'nop', 'pause', 'wait', 'cmp', 'test'; lambda { |di, *a| {} }
667
679
  end
668
680
 
@@ -690,7 +702,7 @@ class Ia32
690
702
  end
691
703
  ret
692
704
  }
693
- when 'inc', 'dec', 'neg', 'shl', 'shr', 'sar', 'ror', 'rol', 'rcr', 'rcl', 'shld', 'shrd'
705
+ when 'inc', 'dec', 'neg', 'shl', 'shr', 'sal', 'sar', 'ror', 'rol', 'rcr', 'rcl', 'shld', 'shrd'
694
706
  lambda { |di, a0, *a|
695
707
  ret = (binding ? binding[di, a0, *a] : {})
696
708
  res = ret[a0] || Expression::Unknown
@@ -699,6 +711,8 @@ class Ia32
699
711
  case op
700
712
  when 'neg'; ret[:eflag_c] = Expression[[res, :&, mask[di]], :'!=', 0]
701
713
  when 'inc', 'dec' # don't touch carry flag
714
+ when 'shr', 'sar', 'shrd'; ret[:eflag_c] = Expression[[a0, :>>, [a[0], :-, 1]], :&, 1] # XXX shr 0 => no touch flag
715
+ when 'shl', 'sal', 'shld'; ret[:eflag_c] = Expression[[a0, :>>, [di.instruction.args[0].sz, :-, a[0]]], :&, 1]
702
716
  else ret[:eflag_c] = Expression::Unknown # :incomplete_binding ?
703
717
  end
704
718
  ret[:eflag_o] = case op
@@ -726,8 +740,8 @@ class Ia32
726
740
  @backtrace_binding
727
741
  end
728
742
 
729
- # returns the condition (bool Expression) under which a conditionnal jump is taken
730
- # returns nil if not a conditionnal jump
743
+ # returns the condition (bool Expression) under which a conditional jump is taken
744
+ # returns nil if not a conditional jump
731
745
  # backtrace for the condition must include the jump itself (eg loop -> ecx--)
732
746
  def get_jump_condition(di)
733
747
  ecx = register_symbols[1]
@@ -742,12 +756,7 @@ class Ia32
742
756
  end
743
757
 
744
758
  def get_backtrace_binding(di)
745
- a = di.instruction.args.map { |arg|
746
- case arg
747
- when ModRM, Reg, SimdReg; arg.symbolic(di)
748
- else arg
749
- end
750
- }
759
+ a = di.instruction.args.map { |arg| symbolic(arg, di) }
751
760
 
752
761
  if binding = backtrace_binding[di.opcode.basename]
753
762
  bd = binding[di, *a]
@@ -798,14 +807,24 @@ class Ia32
798
807
  end
799
808
 
800
809
  case di.opcode.name
801
- when 'push', 'call'
802
- fbd = fbd.dup
810
+ when /^push/, 'call'
811
+ ori = fbd
812
+ fbd = {}
803
813
  sz = opsz(di)/8
804
814
  esp = register_symbols[4]
805
- if i = fbd.delete(Indirection[esp, sz])
806
- fbd[Indirection[[esp, :-, sz], sz]] = i
815
+ if ori[esp] and ori[Indirection[esp, sz]]
816
+ ori.each { |k, v|
817
+ if k.kind_of?(Indirection)
818
+ fbd[k.bind(esp => ori[esp]).reduce_rec] = v
819
+ else
820
+ fbd[k] = v
821
+ end
822
+ }
823
+ else
824
+ fbd = ori.dup
825
+ fbd[:incomplete_binding] = Expression[1] # TODO
807
826
  end
808
- when 'pop', 'ret' # nothing to do
827
+ when /^pop/, 'ret' # nothing to do
809
828
  when /^(push|pop|call|ret|enter|leave|stos|movs|lods|scas|cmps)/
810
829
  fbd = fbd.dup
811
830
  fbd[:incomplete_binding] = Expression[1] # TODO
@@ -820,9 +839,8 @@ class Ia32
820
839
  case di.opcode.basename
821
840
  when 'ret'; return [Indirection[register_symbols[4], sz/8, di.address]]
822
841
  when 'jmp', 'call'
823
- a = di.instruction.args.first
824
- if dasm and a.kind_of?(ModRM) and a.imm and (a.s == sz/8 or a.s == 4) and not a.b and dasm.get_section_at(a.imm)
825
- return get_xrefs_x_jmptable(dasm, di, a, a.s*8)
842
+ if dasm and not di.instruction.args.first.kind_of?(Expression) and switch_table = get_xrefs_x_jmptable(dasm, di)
843
+ return switch_table
826
844
  end
827
845
  end
828
846
 
@@ -834,70 +852,57 @@ class Ia32
834
852
  when Expression, ::Integer; [Expression[tg]]
835
853
  when Farptr; tg.seg.reduce < 0x30 ? [tg.addr] : [Expression[[tg.seg, :*, 0x10], :+, tg.addr]]
836
854
  else
837
- puts "unhandled setip at #{di.address} #{di.instruction}" if $DEBUG
855
+ puts "unhandled setip at #{Expression[di.address]} #{di.instruction}" if $DEBUG
838
856
  []
839
857
  end
840
858
  end
841
859
 
842
- # we detected a jmp table (jmp [base+4*idx])
843
- # try to return an accurate dest list
844
- def get_xrefs_x_jmptable(dasm, di, mrm, sz)
845
- # include the symbolic dest for backtrack stuff
846
- ret = [Expression[mrm.symbolic(di)]]
847
- i = mrm.i
848
- if di.block.list.length == 2 and di.block.list[0].opcode.name =~ /^mov/ and a0 = di.block.list[0].instruction.args[0] and
849
- a0.respond_to? :symbolic and a0.symbolic == i.symbolic
850
- i = di.block.list[0].instruction.args[1]
851
- end
852
- pb = di.block.from_normal.to_a
853
- if pb.length == 1 and pdi = dasm.decoded[pb[0]] and pdi.opcode.name =~ /^jn?be?/ and ppdi = pdi.block.list[-2] and ppdi.opcode.name == 'cmp' and
854
- ppdi.instruction.args[0].symbolic == i.symbolic and lim = Expression[ppdi.instruction.args[1]].reduce and lim.kind_of? Integer
855
- # cmp eax, 42 ; jbe switch ; switch: jmp [base+4*eax]
856
- s = dasm.get_section_at(mrm.imm)
857
- lim += 1 if pdi.opcode.name[-1] == ?e
858
- lim.times { |v|
859
- dasm.add_xref(s[1]+s[0].ptr, Xref.new(:r, di.address, sz/8))
860
- ret << Indirection[[mrm.imm, :+, v*sz/8], sz/8, di.address]
861
- s[0].read(sz/8)
862
- }
863
- l = dasm.auto_label_at(mrm.imm, 'jmp_table', 'xref')
864
- replace_instr_arg_immediate(di.instruction, mrm.imm, Expression[l])
865
- # add 'case 1' comments
866
- cases = {}
867
- ret.each_with_index { |ind, idx|
868
- idx -= 1 # ret[0] = symbolic
869
- next if idx < 0
870
- a = dasm.backtrace(ind, di.address)
871
- if a.length == 1 and a[0].kind_of?(Expression) and addr = a[0].reduce and addr.kind_of?(::Integer)
872
- (cases[addr] ||= []) << idx
873
- end
860
+ # indirect call, try to match a switch table pattern (eg jmp [base+4*idx])
861
+ # return a list of target addresses if found, nil otherwise
862
+ def get_xrefs_x_jmptable(dasm, di)
863
+ puts "search jmptable for #{Expression[di.address]} #{di.instruction}" if $DEBUG
864
+ arg0 = di.instruction.args.first.symbolic(di)
865
+
866
+ bt_log = []
867
+ dasm.backtrace(arg0, di.address, :maxdepth => 3, :log => bt_log)
868
+
869
+ expr = nil
870
+ index = nil
871
+ index_max = nil
872
+
873
+ bt_log.each { |btl|
874
+ next if btl[0] != :up
875
+ last = dasm.di_at(btl[4])
876
+ break if not last or last.block.to_normal.length > 2
877
+ next if last.block.to_normal.length != 2
878
+ # search cmp eax, 42 ; ja too_big ; jmp [base+4*eax]
879
+ # XXX 256 cases switch => no cmp...
880
+ prelast = last.block.list.reverse.find { |pl| pl.opcode.name == 'cmp' }
881
+ break unless prelast and cmp_value = prelast.instruction.args.last and cmp_value.kind_of?(Expression) and cmp_value.reduce.kind_of?(::Integer)
882
+ cmp_value = cmp_value.reduce % (1 << prelast.instruction.args.first.sz) # cmp al, -12h ; jnbe => -12h is unsigned 0eeh
883
+ index = prelast.instruction.args.first.symbolic(prelast)
884
+ index = index.externals.first if index.kind_of?(Expression) # cmp bl, 13 => ebx
885
+ expr = Expression[btl[1], :&, ((1 << @size) - 1)] # XXX without the mask, additions may overflow (this breaks elsewhere too, need Expr32)
886
+ (expr.externals.grep(Symbol) - [index]).uniq.each { |r|
887
+ rv = dasm.backtrace(r, prelast.address, :maxdepth => 3)
888
+ expr = expr.bind(r => rv[0]) if rv.length == 1
874
889
  }
875
- cases.each { |addr, list|
876
- dasm.add_comment(addr, "case #{list.join(', ')}:")
877
- }
878
- return ret
879
- end
880
-
881
- puts "unrecognized jmp table pattern, using wild guess for #{di}" if $VERBOSE
882
- di.add_comment 'wildguess'
883
- if s = dasm.get_section_at(mrm.imm - 3*sz/8)
884
- v = -3
885
- else
886
- s = dasm.get_section_at(mrm.imm)
887
- v = 0
888
- end
889
- while s[0].ptr < s[0].length
890
- ptr = dasm.normalize s[0].decode_imm("u#{sz}".to_sym, @endianness)
891
- diff = Expression[ptr, :-, di.address].reduce
892
- if (diff.kind_of? ::Integer and diff.abs < 4096) or (di.opcode.basename == 'call' and ptr != 0 and dasm.get_section_at(ptr))
893
- dasm.add_xref(s[1]+s[0].ptr-sz/8, Xref.new(:r, di.address, sz/8))
894
- ret << Indirection[[mrm.imm, :+, v*sz/8], sz/8, di.address]
895
- elsif v > 0
896
- break
890
+ cmp_value = prelast.instruction.args.last.reduce % (1 << prelast.instruction.args.first.sz)
891
+ case last.opcode.name
892
+ when 'jae', 'jb', 'jnae', 'jnb'; index_max = cmp_value-1
893
+ when 'ja', 'jbe', 'jna', 'jnbe'; index_max = cmp_value
894
+ else; expr = nil
897
895
  end
898
- v += 1
896
+ break
897
+ }
898
+
899
+ if expr and expr.externals.grep(Symbol).uniq == [index]
900
+ # yay !
901
+ # include the symbolic dest for backtrace stuff
902
+ puts "found jmptable for #{Expression[di.address]} #{di.instruction} (#{index_max+1} entries)" if $VERBOSE
903
+ # TODO add labels / tables / xrefs etc
904
+ [Expression[arg0]] + (0..index_max).map { |i| expr.bind(index => i) }
899
905
  end
900
- ret
901
906
  end
902
907
 
903
908
  # checks if expr is a valid return expression matching the :saveip instruction
@@ -1239,11 +1244,15 @@ class Ia32
1239
1244
  # the binding will not include memory access from subfunctions
1240
1245
  # entry should be an entrypoint of the disassembler if finish is nil
1241
1246
  # the code sequence must have only one end, with no to_normal
1242
- def code_binding(dasm, entry, finish=nil)
1247
+ # options:
1248
+ # :include_flags => include EFLAGS in the returned binding
1249
+ def code_binding(dasm, entry, finish=nil, nargs={})
1250
+ include_flags = nargs.delete :include_flags
1251
+
1243
1252
  entry = dasm.normalize(entry)
1244
1253
  finish = dasm.normalize(finish) if finish
1245
1254
  lastdi = nil
1246
- binding = {}
1255
+ bd = {}
1247
1256
  bt = lambda { |from, expr, inc_start|
1248
1257
  ret = dasm.backtrace(Expression[expr], from, :snapshot_addr => entry, :include_start => inc_start)
1249
1258
  ret.length == 1 ? ret.first : Expression::Unknown
@@ -1268,7 +1277,7 @@ class Ia32
1268
1277
  get_xrefs_w(dasm, di).each { |waddr, len|
1269
1278
  # we want the ptr expressed with reg values at entry
1270
1279
  ptr = bt[a, waddr, false]
1271
- binding[Indirection[ptr, len, a]] = bt[a, Indirection[waddr, len, a], true]
1280
+ bd[Indirection[ptr, len, a]] = bt[a, Indirection[waddr, len, a], true]
1272
1281
  }
1273
1282
  false
1274
1283
  end
@@ -1291,13 +1300,13 @@ class Ia32
1291
1300
  if lastdi.opcode.props[:setip]
1292
1301
  e = get_xrefs_x(dasm, lastdi)
1293
1302
  raise 'bad code_binding ending' if e.to_a.length != 1 or not lastdi.opcode.props[:stopexec]
1294
- binding[:ip] = bt[lastdi.address, e.first, false]
1303
+ bd[:ip] = bt[lastdi.address, e.first, false]
1295
1304
  elsif not lastdi.opcode.props[:stopexec]
1296
- binding[:ip] = lastdi.next_addr
1305
+ bd[:ip] = lastdi.next_addr
1297
1306
  end
1298
1307
  end
1299
1308
  end
1300
- binding.delete_if { |k, v| Expression[k] == Expression[v] }
1309
+ bd.delete_if { |k, v| Expression[k] == Expression[v] }
1301
1310
 
1302
1311
  # add register binding
1303
1312
  raise "no code_binding end" if not lastdi and not finish
@@ -1310,10 +1319,22 @@ class Ia32
1310
1319
  mask = 0xffff_ffff # dont use 1<<@size, because 16bit code may use e.g. edi (through opszoverride)
1311
1320
  mask = 0xffff_ffff_ffff_ffff if @size == 64
1312
1321
  val = Expression[val, :&, mask].reduce
1313
- binding[reg] = Expression[val]
1322
+ bd[reg] = Expression[val]
1314
1323
  }
1315
1324
 
1316
- binding
1325
+ # add EFLAGS binding
1326
+ if include_flags
1327
+ [:eflag_z, :eflag_s, :eflag_c, :eflag_o].each { |eflag|
1328
+ val =
1329
+ if lastdi; bt[lastdi.address, eflag, true]
1330
+ else bt[finish, eflag, false]
1331
+ end
1332
+ next if val == Expression[eflag]
1333
+ bd[eflag] = Expression[val.reduce]
1334
+ }
1335
+ end
1336
+
1337
+ bd
1317
1338
  end
1318
1339
 
1319
1340
  # trace the stack pointer register across a function, rename occurences of esp+XX to esp+var_XX
@@ -59,7 +59,7 @@ class Ia32
59
59
 
60
60
  # add di-specific registry written/accessed
61
61
  def decompile_func_finddeps_di(dcmp, func, di, a, w)
62
- a << :eax if di.opcode.name == 'ret' and (not func.type.kind_of? C::BaseType or func.type.type.name != :void) # standard ABI
62
+ a << register_symbols[0] if di.opcode.name == 'ret' and (not func.type.kind_of? C::BaseType or func.type.type.name != :void) # standard ABI
63
63
  end
64
64
 
65
65
  # list variable dependency for each block, remove useless writes
@@ -111,7 +111,7 @@ class Ia32
111
111
  }
112
112
  if stackoff # last block instr == subfunction call
113
113
  deps_r[b] |= deps_subfunc[b] - deps_w[b]
114
- deps_w[b] |= [:eax, :ecx, :edx] # standard ABI
114
+ deps_w[b] |= register_symbols[0, 3] # standard ABI
115
115
  end
116
116
  }
117
117
 
@@ -140,7 +140,7 @@ class Ia32
140
140
  bw |= w.map { |ee| Expression[ee].externals.grep(::Symbol) }.flatten - [:unknown]
141
141
  false
142
142
  }
143
- if r == :eax and (rdi || blk.list.last).opcode.name == 'ret'
143
+ if r == register_symbols[0] and (rdi || blk.list.last).opcode.name == 'ret'
144
144
  func.type.type = C::BaseType.new(:void)
145
145
  false
146
146
  elsif rdi and rdi.backtrace_binding[r]
@@ -194,15 +194,18 @@ class Ia32
194
194
  end
195
195
 
196
196
  def decompile_blocks(dcmp, myblocks, deps, func, nextaddr = nil)
197
+ eax, ecx, edx, ebx, esp, ebp, esi, edi = register_symbols
198
+ ebx, esp, ebp = ebx, esp, ebp # fix ruby unused var warning
197
199
  scope = func.initializer
198
200
  func.type.args.each { |a| scope.symbol[a.name] = a }
199
201
  stmts = scope.statements
200
202
  blocks_toclean = myblocks.dup
201
203
  func_entry = myblocks.first[0]
204
+ di_addr = nil
202
205
  until myblocks.empty?
203
206
  b, to = myblocks.shift
204
207
  if l = dcmp.dasm.get_label_at(b)
205
- stmts << C::Label.new(l)
208
+ stmts << C::Label.new(l).with_misc(:di_addr => b)
206
209
  end
207
210
 
208
211
  # list of assignments [[dest reg, expr assigned]]
@@ -210,7 +213,11 @@ class Ia32
210
213
  # reg binding (reg => value, values.externals = regs at block start)
211
214
  binding = {}
212
215
  # Expr => CExpr
213
- ce = lambda { |*e| dcmp.decompile_cexpr(Expression[Expression[*e].reduce], scope) }
216
+ ce = lambda { |*e|
217
+ ret = dcmp.decompile_cexpr(Expression[Expression[*e].reduce], scope)
218
+ dcmp.walk_ce(ret) { |ee| ee.with_misc(:di_addr => di_addr) } if di_addr
219
+ ret
220
+ }
214
221
  # Expr => Expr.bind(binding) => CExpr
215
222
  ceb = lambda { |*e| ce[Expression[*e].bind(binding)] }
216
223
 
@@ -235,6 +242,7 @@ class Ia32
235
242
  # returns an array to use as funcall arguments
236
243
  get_func_args = lambda { |di, f|
237
244
  # XXX see remarks in #finddeps
245
+ # TODO x64
238
246
  bt = dcmp.dasm.backtrace(:esp, di.address, :snapshot_addr => func_entry, :include_start => true)
239
247
  stackoff = Expression[[bt, :+, @size/8], :-, :esp].bind(:esp => :frameptr).reduce rescue nil
240
248
  args_todo = f.type.args.to_a.dup
@@ -283,19 +291,20 @@ class Ia32
283
291
 
284
292
  # go !
285
293
  dcmp.dasm.decoded[b].block.list.each_with_index { |di, didx|
294
+ di_addr = di.address
286
295
  a = di.instruction.args
287
296
  if di.opcode.props[:setip] and not di.opcode.props[:stopexec]
288
297
  # conditional jump
289
298
  commit[]
290
299
  n = dcmp.backtrace_target(get_xrefs_x(dcmp.dasm, di).first, di.address)
291
300
  if di.opcode.name =~ /^loop(.+)?/
292
- cx = C::CExpression[:'--', ceb[:ecx]]
301
+ cx = C::CExpression[:'--', ceb[ecx]]
293
302
  cc = $1 ? C::CExpression[cx, :'&&', ceb[decode_cc_to_expr($1)]] : cx
294
303
  else
295
304
  cc = ceb[decode_cc_to_expr(di.opcode.name[1..-1])]
296
305
  end
297
306
  # XXX switch/indirect/multiple jmp
298
- stmts << C::If.new(C::CExpression[cc], C::Goto.new(n))
307
+ stmts << C::If.new(C::CExpression[cc], C::Goto.new(n).with_misc(:di_addr => di_addr)).with_misc(:di_addr => di_addr)
299
308
  to.delete dcmp.dasm.normalize(n)
300
309
  next
301
310
  end
@@ -312,7 +321,7 @@ class Ia32
312
321
  f = dcmp.c_parser.toplevel.symbol["intrinsic_set_#{a1}"]
313
322
  a2 = a2.symbolic(di)
314
323
  a2 = [a2, :&, 0xffff] if sz == 16
315
- stmts << C::CExpression.new(f, :funcall, [ceb[a2]], f.type.type)
324
+ stmts << C::CExpression.new(f, :funcall, [ceb[a2]], f.type.type).with_misc(:di_addr => di_addr)
316
325
  next
317
326
  end
318
327
  case a2
@@ -324,7 +333,7 @@ class Ia32
324
333
  f = dcmp.c_parser.toplevel.symbol["intrinsic_get_#{a2}"]
325
334
  t = f.type.type
326
335
  binding.delete a1.symbolic(di)
327
- stmts << C::CExpression.new(ce[a1.symbolic(di)], :'=', C::CExpression.new(f, :funcall, [], t), t)
336
+ stmts << C::CExpression.new(ce[a1.symbolic(di)], :'=', C::CExpression.new(f, :funcall, [], t).with_misc(:di_addr => di_addr), t).with_misc(:di_addr => di_addr)
328
337
  next
329
338
  end
330
339
  end
@@ -333,8 +342,8 @@ class Ia32
333
342
  when 'ret'
334
343
  commit[]
335
344
  ret = nil
336
- ret = C::CExpression[ceb[:eax]] unless func.type.type.kind_of? C::BaseType and func.type.type.name == :void
337
- stmts << C::Return.new(ret)
345
+ ret = C::CExpression[ceb[eax]] unless func.type.type.kind_of? C::BaseType and func.type.type.name == :void
346
+ stmts << C::Return.new(ret).with_misc(:di_addr => di_addr)
338
347
  when 'call' # :saveip
339
348
  n = dcmp.backtrace_target(get_xrefs_x(dcmp.dasm, di).first, di.address)
340
349
  args = []
@@ -367,9 +376,9 @@ class Ia32
367
376
  end
368
377
  end
369
378
  commit[]
370
- binding.delete :eax
371
- e = C::CExpression[f, :funcall, args]
372
- e = C::CExpression[ce[:eax], :'=', e, f.type.type] if deps[b].include? :eax and f.type.type != C::BaseType.new(:void)
379
+ binding.delete eax
380
+ e = C::CExpression[f, :funcall, args].with_misc(:di_addr => di_addr)
381
+ e = C::CExpression[ce[eax], :'=', e, f.type.type].with_misc(:di_addr => di_addr) if deps[b].include? eax and f.type.type != C::BaseType.new(:void)
373
382
  stmts << e
374
383
  when 'jmp'
375
384
  #if di.comment.to_a.include? 'switch'
@@ -388,7 +397,7 @@ class Ia32
388
397
  a = di.instruction.args.first
389
398
  if a.kind_of? Expression
390
399
  elsif not a.respond_to? :symbolic
391
- stmts << C::Asm.new(di.instruction.to_s, nil, [], [], nil, nil)
400
+ stmts << C::Asm.new(di.instruction.to_s, nil, [], [], nil, nil).with_misc(:di_addr => di_addr)
392
401
  else
393
402
  n = di.instruction.args.first.symbolic(di)
394
403
  fptr = ceb[n]
@@ -399,12 +408,10 @@ class Ia32
399
408
  args = get_func_args[di, fptr.type]
400
409
  else
401
410
  proto = C::Function.new(C::BaseType.new(:void))
402
- fptr = C::CExpression[[fptr], C::Pointer.new(proto)]
411
+ fptr = C::CExpression[[fptr], C::Pointer.new(proto)].with_misc(:di_addr => di_addr)
403
412
  args = []
404
413
  end
405
- ret = C::Return.new(C::CExpression[fptr, :funcall, args])
406
- class << ret ; attr_accessor :from_instr end
407
- ret.from_instr = di
414
+ ret = C::Return.new(C::CExpression[fptr, :funcall, args].with_misc(:di_addr => di_addr)).with_misc(:di_addr => di_addr)
408
415
  stmts << ret
409
416
  to = []
410
417
  end
@@ -418,7 +425,7 @@ class Ia32
418
425
  end
419
426
  # need a way to transform arg => :frameptr+12
420
427
  arg = di.backtrace_binding.keys.grep(Indirection).first.pointer
421
- stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol['intrinsic_lgdt'], :funcall, [ceb[arg]], C::BaseType.new(:void))
428
+ stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol['intrinsic_lgdt'], :funcall, [ceb[arg]], C::BaseType.new(:void)).with_misc(:di_addr => di_addr)
422
429
  when 'lidt'
423
430
  if not dcmp.c_parser.toplevel.struct['interrupt_descriptor']
424
431
  dcmp.c_parser.parse('struct interrupt_descriptor { __int16 offset0_16; __int16 segment; __int16 flags; __int16 offset16_32; };')
@@ -428,29 +435,29 @@ class Ia32
428
435
  dcmp.c_parser.parse('void intrinsic_lidt(struct interrupt_table *);')
429
436
  end
430
437
  arg = di.backtrace_binding.keys.grep(Indirection).first.pointer
431
- stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol['intrinsic_lidt'], :funcall, [ceb[arg]], C::BaseType.new(:void))
438
+ stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol['intrinsic_lidt'], :funcall, [ceb[arg]], C::BaseType.new(:void)).with_misc(:di_addr => di_addr)
432
439
  when 'ltr', 'lldt'
433
440
  if not dcmp.c_parser.toplevel.symbol["intrinsic_#{di.opcode.name}"]
434
441
  dcmp.c_parser.parse("void intrinsic_#{di.opcode.name}(int);")
435
442
  end
436
443
  arg = di.backtrace_binding.keys.first
437
- stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol["intrinsic_#{di.opcode.name}"], :funcall, [ceb[arg]], C::BaseType.new(:void))
444
+ stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol["intrinsic_#{di.opcode.name}"], :funcall, [ceb[arg]], C::BaseType.new(:void)).with_misc(:di_addr => di_addr)
438
445
  when 'out'
439
446
  sz = di.instruction.args.find { |a_| a_.kind_of? Ia32::Reg and a_.val == 0 }.sz
440
447
  if not dcmp.c_parser.toplevel.symbol["intrinsic_out#{sz}"]
441
448
  dcmp.c_parser.parse("void intrinsic_out#{sz}(unsigned short port, __int#{sz} value);")
442
449
  end
443
- port = di.instruction.args.grep(Expression).first || :edx
444
- stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol["intrinsic_out#{sz}"], :funcall, [ceb[port], ceb[:eax]], C::BaseType.new(:void))
450
+ port = di.instruction.args.grep(Expression).first || edx
451
+ stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol["intrinsic_out#{sz}"], :funcall, [ceb[port], ceb[eax]], C::BaseType.new(:void)).with_misc(:di_addr => di_addr)
445
452
  when 'in'
446
453
  sz = di.instruction.args.find { |a_| a_.kind_of? Ia32::Reg and a_.val == 0 }.sz
447
454
  if not dcmp.c_parser.toplevel.symbol["intrinsic_in#{sz}"]
448
455
  dcmp.c_parser.parse("__int#{sz} intrinsic_in#{sz}(unsigned short port);")
449
456
  end
450
- port = di.instruction.args.grep(Expression).first || :edx
457
+ port = di.instruction.args.grep(Expression).first || edx
451
458
  f = dcmp.c_parser.toplevel.symbol["intrinsic_in#{sz}"]
452
- binding.delete :eax
453
- stmts << C::CExpression.new(ce[:eax], :'=', C::CExpression.new(f, :funcall, [ceb[port]], f.type.type), f.type.type)
459
+ binding.delete eax
460
+ stmts << C::CExpression.new(ce[eax], :'=', C::CExpression.new(f, :funcall, [ceb[port]], f.type.type), f.type.type).with_misc(:di_addr => di_addr)
454
461
  when 'sti', 'cli'
455
462
  stmts << C::Asm.new(di.instruction.to_s, nil, [], [], nil, nil)
456
463
  when /^(mov|sto|lod)s([bwdq])/
@@ -462,15 +469,15 @@ class Ia32
462
469
  blk = C::Block.new(scope)
463
470
  case op
464
471
  when 'mov'
465
- blk.statements << C::CExpression[[:*, [[ceb[:edi]], pt]], :'=', [:*, [[ceb[:esi]], pt]]]
466
- blk.statements << C::CExpression[ceb[:edi], :'=', [ceb[:edi], :+, [sz]]]
467
- blk.statements << C::CExpression[ceb[:esi], :'=', [ceb[:esi], :+, [sz]]]
472
+ blk.statements << C::CExpression[[:*, [[ceb[edi]], pt]], :'=', [:*, [[ceb[esi]], pt]]].with_misc(:di_addr => di_addr)
473
+ blk.statements << C::CExpression[ceb[edi], :'=', [ceb[edi], :+, [sz]]].with_misc(:di_addr => di_addr)
474
+ blk.statements << C::CExpression[ceb[esi], :'=', [ceb[esi], :+, [sz]]].with_misc(:di_addr => di_addr)
468
475
  when 'sto'
469
- blk.statements << C::CExpression[[:*, [[ceb[:edi]], pt]], :'=', ceb[:eax]]
470
- blk.statements << C::CExpression[ceb[:edi], :'=', [ceb[:edi], :+, [sz]]]
476
+ blk.statements << C::CExpression[[:*, [[ceb[edi]], pt]], :'=', ceb[eax]].with_misc(:di_addr => di_addr)
477
+ blk.statements << C::CExpression[ceb[edi], :'=', [ceb[edi], :+, [sz]]].with_misc(:di_addr => di_addr)
471
478
  when 'lod'
472
- blk.statements << C::CExpression[ceb[:eax], :'=', [:*, [[ceb[:esi]], pt]]]
473
- blk.statements << C::CExpression[ceb[:esi], :'=', [ceb[:esi], :+, [sz]]]
479
+ blk.statements << C::CExpression[ceb[eax], :'=', [:*, [[ceb[esi]], pt]]].with_misc(:di_addr => di_addr)
480
+ blk.statements << C::CExpression[ceb[esi], :'=', [ceb[esi], :+, [sz]]].with_misc(:di_addr => di_addr)
474
481
  #when 'sca'
475
482
  #when 'cmp'
476
483
  end
@@ -479,8 +486,8 @@ class Ia32
479
486
  when nil
480
487
  stmts.concat blk.statements
481
488
  when 'rep'
482
- blk.statements << C::CExpression[ceb[:ecx], :'=', [ceb[:ecx], :-, [1]]]
483
- stmts << C::While.new(C::CExpression[ceb[:ecx]], blk)
489
+ blk.statements << C::CExpression[ceb[ecx], :'=', [ceb[ecx], :-, [1]]].with_misc(:di_addr => di_addr)
490
+ stmts << C::While.new(C::CExpression[ceb[ecx]], blk).with_misc(:di_addr => di_addr)
484
491
  #when 'repz' # sca/cmp only
485
492
  #when 'repnz'
486
493
  end
@@ -489,7 +496,7 @@ class Ia32
489
496
  bd = get_fwdemu_binding(di)
490
497
  if di.backtrace_binding[:incomplete_binding]
491
498
  commit[]
492
- stmts << C::Asm.new(di.instruction.to_s, nil, nil, nil, nil, nil)
499
+ stmts << C::Asm.new(di.instruction.to_s, nil, nil, nil, nil, nil).with_misc(:di_addr => di_addr)
493
500
  else
494
501
  update = {}
495
502
  bd.each { |k, v|
@@ -504,6 +511,7 @@ class Ia32
504
511
  binding.update update
505
512
  end
506
513
  end
514
+ di_addr = nil
507
515
  }
508
516
  commit[]
509
517