metasm 1.0.3 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +3 -0
  3. data.tar.gz.sig +0 -0
  4. data/Gemfile +3 -2
  5. data/metasm.gemspec +3 -2
  6. data/metasm.rb +4 -1
  7. data/metasm/compile_c.rb +2 -2
  8. data/metasm/cpu/arc/decode.rb +0 -21
  9. data/metasm/cpu/arc/main.rb +4 -4
  10. data/metasm/cpu/arm/decode.rb +1 -5
  11. data/metasm/cpu/arm/main.rb +3 -3
  12. data/metasm/cpu/arm64/decode.rb +2 -6
  13. data/metasm/cpu/arm64/main.rb +5 -5
  14. data/metasm/cpu/bpf/decode.rb +3 -35
  15. data/metasm/cpu/bpf/main.rb +5 -5
  16. data/metasm/cpu/bpf/render.rb +1 -12
  17. data/metasm/cpu/cy16/decode.rb +0 -6
  18. data/metasm/cpu/cy16/main.rb +3 -3
  19. data/metasm/cpu/cy16/render.rb +0 -11
  20. data/metasm/cpu/dalvik/decode.rb +4 -26
  21. data/metasm/cpu/dalvik/main.rb +20 -2
  22. data/metasm/cpu/dalvik/opcodes.rb +3 -2
  23. data/metasm/cpu/{mips/compile_c.rb → ebpf.rb} +5 -2
  24. data/metasm/cpu/ebpf/debug.rb +61 -0
  25. data/metasm/cpu/ebpf/decode.rb +142 -0
  26. data/metasm/cpu/ebpf/main.rb +58 -0
  27. data/metasm/cpu/ebpf/opcodes.rb +97 -0
  28. data/metasm/cpu/ebpf/render.rb +36 -0
  29. data/metasm/cpu/ia32/debug.rb +39 -1
  30. data/metasm/cpu/ia32/decode.rb +111 -90
  31. data/metasm/cpu/ia32/decompile.rb +45 -37
  32. data/metasm/cpu/ia32/main.rb +10 -0
  33. data/metasm/cpu/ia32/parse.rb +6 -0
  34. data/metasm/cpu/mcs51/decode.rb +1 -1
  35. data/metasm/cpu/mcs51/main.rb +11 -0
  36. data/metasm/cpu/mips/decode.rb +8 -18
  37. data/metasm/cpu/mips/main.rb +3 -3
  38. data/metasm/cpu/mips/opcodes.rb +1 -1
  39. data/metasm/cpu/msp430/decode.rb +2 -6
  40. data/metasm/cpu/msp430/main.rb +3 -3
  41. data/metasm/cpu/openrisc.rb +11 -0
  42. data/metasm/cpu/openrisc/debug.rb +106 -0
  43. data/metasm/cpu/openrisc/decode.rb +182 -0
  44. data/metasm/cpu/openrisc/decompile.rb +350 -0
  45. data/metasm/cpu/openrisc/main.rb +70 -0
  46. data/metasm/cpu/openrisc/opcodes.rb +109 -0
  47. data/metasm/cpu/openrisc/render.rb +37 -0
  48. data/metasm/cpu/ppc/decode.rb +0 -25
  49. data/metasm/cpu/ppc/main.rb +6 -6
  50. data/metasm/cpu/ppc/opcodes.rb +3 -4
  51. data/metasm/cpu/python/decode.rb +0 -20
  52. data/metasm/cpu/python/main.rb +1 -1
  53. data/metasm/cpu/sh4/decode.rb +2 -6
  54. data/metasm/cpu/sh4/main.rb +25 -23
  55. data/metasm/cpu/st20/decode.rb +0 -7
  56. data/metasm/cpu/webasm.rb +11 -0
  57. data/metasm/cpu/webasm/debug.rb +31 -0
  58. data/metasm/cpu/webasm/decode.rb +321 -0
  59. data/metasm/cpu/webasm/decompile.rb +386 -0
  60. data/metasm/cpu/webasm/encode.rb +104 -0
  61. data/metasm/cpu/webasm/main.rb +81 -0
  62. data/metasm/cpu/webasm/opcodes.rb +214 -0
  63. data/metasm/cpu/x86_64/compile_c.rb +13 -9
  64. data/metasm/cpu/x86_64/parse.rb +1 -1
  65. data/metasm/cpu/z80/decode.rb +0 -27
  66. data/metasm/cpu/z80/main.rb +3 -3
  67. data/metasm/cpu/z80/render.rb +0 -11
  68. data/metasm/debug.rb +43 -8
  69. data/metasm/decode.rb +62 -14
  70. data/metasm/decompile.rb +793 -466
  71. data/metasm/disassemble.rb +188 -131
  72. data/metasm/disassemble_api.rb +30 -17
  73. data/metasm/dynldr.rb +2 -2
  74. data/metasm/encode.rb +8 -2
  75. data/metasm/exe_format/autoexe.rb +2 -0
  76. data/metasm/exe_format/coff.rb +21 -3
  77. data/metasm/exe_format/coff_decode.rb +12 -0
  78. data/metasm/exe_format/coff_encode.rb +6 -3
  79. data/metasm/exe_format/dex.rb +13 -3
  80. data/metasm/exe_format/elf.rb +12 -2
  81. data/metasm/exe_format/elf_decode.rb +59 -1
  82. data/metasm/exe_format/main.rb +2 -0
  83. data/metasm/exe_format/mz.rb +1 -0
  84. data/metasm/exe_format/pe.rb +25 -3
  85. data/metasm/exe_format/wasm.rb +402 -0
  86. data/metasm/gui/dasm_decomp.rb +171 -95
  87. data/metasm/gui/dasm_graph.rb +61 -2
  88. data/metasm/gui/dasm_hex.rb +2 -2
  89. data/metasm/gui/dasm_main.rb +45 -19
  90. data/metasm/gui/debug.rb +13 -4
  91. data/metasm/gui/gtk.rb +12 -4
  92. data/metasm/main.rb +108 -103
  93. data/metasm/os/emulator.rb +175 -0
  94. data/metasm/os/main.rb +11 -6
  95. data/metasm/parse.rb +23 -12
  96. data/metasm/parse_c.rb +189 -135
  97. data/metasm/preprocessor.rb +16 -1
  98. data/misc/openrisc-parser.rb +79 -0
  99. data/samples/dasm-plugins/scanxrefs.rb +6 -4
  100. data/samples/dasm-plugins/selfmodify.rb +8 -8
  101. data/samples/dbg-plugins/trace_func.rb +1 -1
  102. data/samples/disassemble-gui.rb +14 -3
  103. data/samples/emubios.rb +251 -0
  104. data/samples/emudbg.rb +127 -0
  105. data/samples/lindebug.rb +79 -78
  106. data/samples/metasm-shell.rb +8 -8
  107. data/tests/all.rb +1 -1
  108. data/tests/expression.rb +2 -0
  109. data/tests/graph_layout.rb +1 -1
  110. data/tests/ia32.rb +1 -0
  111. data/tests/mips.rb +1 -1
  112. data/tests/preprocessor.rb +18 -0
  113. metadata +124 -6
  114. metadata.gz.sig +0 -0
@@ -0,0 +1,36 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/cpu/ebpf/opcodes'
8
+ require 'metasm/render'
9
+
10
+ module Metasm
11
+ class EBPF
12
+ class Reg
13
+ include Renderable
14
+ def render ; ["r#@v"] end
15
+ end
16
+
17
+ class Memref
18
+ include Renderable
19
+ def render
20
+ r = []
21
+ r << { 1 => 'byte ', 2 => 'word ', 4 => 'dword ', 8 => 'qword ' }[@msz]
22
+ r << '['
23
+ r << @base if @base
24
+ r << '+' if @base and @offset
25
+ r << @offset if @offset
26
+ r << ']'
27
+ end
28
+ end
29
+
30
+ class Pktref
31
+ def render
32
+ ['pkt '] + super
33
+ end
34
+ end
35
+ end
36
+ end
@@ -66,9 +66,11 @@ class Ia32
66
66
  end
67
67
  end
68
68
 
69
+ DBG_BPX = "\xcc"
70
+ DBG_BPX.force_encoding('BINARY') if DBG_BPX.respond_to?(:force_encoding)
69
71
  def dbg_enable_bpx(dbg, bp)
70
72
  bp.internal[:previous] ||= dbg.memory[bp.address, 1]
71
- dbg.memory[bp.address, 1] = "\xcc"
73
+ dbg.memory[bp.address, 1] = DBG_BPX
72
74
  end
73
75
 
74
76
  def dbg_disable_bpx(dbg, bp)
@@ -189,5 +191,41 @@ class Ia32
189
191
  def dbg_func_arg_set(dbg, argnr, arg)
190
192
  dbg.memory_write_int(Expression[:esp, :+, 4*(argnr+1)], arg)
191
193
  end
194
+
195
+ def dbg_resolve_pc(di, fbd, pc_reg, dbg_ctx)
196
+ a = di.instruction.args.map { |aa| symbolic(aa) }
197
+
198
+ cond = case di.opcode.name
199
+ when 'jz', 'je'; dbg_ctx.get_flag(:z)
200
+ when 'jnz', 'jne'; !dbg_ctx.get_flag(:z)
201
+ when 'jo'; dbg_ctx.get_flag(:o)
202
+ when 'jno'; !dbg_ctx.get_flag(:o)
203
+ when 'js'; dbg_ctx.get_flag(:s)
204
+ when 'jns'; !dbg_ctx.get_flag(:s)
205
+ when 'jc', 'jb', 'jnae'; dbg_ctx.get_flag(:c)
206
+ when 'jnc', 'jnb', 'jae'; !dbg_ctx.get_flag(:c)
207
+ when 'jbe', 'jna'; dbg_ctx.get_flag(:c) or dbg_ctx.get_flag(:z)
208
+ when 'jnbe', 'ja'; !dbg_ctx.get_flag(:c) and !dbg_ctx.get_flag(:z)
209
+ when 'jl', 'jnge'; dbg_ctx.get_flag(:s) != dbg_ctx.get_flag(:o)
210
+ when 'jnl', 'jge'; dbg_ctx.get_flag(:s) == dbg_ctx.get_flag(:o)
211
+ when 'jle', 'jng'; dbg_ctx.get_flag(:z) or dbg_ctx.get_flag(:s) != dbg_ctx.get_flag(:o)
212
+ when 'jnle', 'jg'; !dbg_ctx.get_flag(:z) and dbg_ctx.get_flag(:s) == dbg_ctx.get_flag(:o)
213
+ when 'jp', 'jpe'; dbg_ctx.get_flag(:p)
214
+ when 'jnp', 'jpo'; !dbg_ctx.get_flag(:p)
215
+ when 'loop'; dbg_ctx[dbg_register_list[2]] != 0
216
+ when 'loopz', 'loope'; dbg_ctx[dbg_register_list[2]] != 0 and dbg_ctx.get_flag(:z)
217
+ when 'loopnz', 'loopne'; dbg_ctx[dbg_register_list[2]] != 0 and !dbg_ctx.get_flag(:z)
218
+ when 'jcxz', 'jecxz', 'jrcxz'
219
+ mask = {?c => 0xffff, ?e => 0xffff_ffff, ?r => -1}[di.opcode.name[1]]
220
+ dbg_ctx[dbg_register_list[2]] & mask == 0
221
+ else return super(di, fbd, pc_reg, dbg_ctx)
222
+ end
223
+
224
+ if cond
225
+ fbd[pc_reg] = a.last
226
+ else
227
+ fbd[pc_reg] = di.next_addr
228
+ end
229
+ end
192
230
  end
193
231
  end
@@ -323,12 +323,6 @@ class Ia32
323
323
  end
324
324
  end
325
325
 
326
- # hash opcode_name => lambda { |dasm, di, *symbolic_args| instr_binding }
327
- def backtrace_binding
328
- @backtrace_binding ||= init_backtrace_binding
329
- end
330
- def backtrace_binding=(b) @backtrace_binding = b end
331
-
332
326
  def opsz(di, op=nil)
333
327
  if di and di.instruction.prefix and di.instruction.prefix[:opsz] and (op || di.opcode).props[:needpfx] != 0x66; 48-@size
334
328
  else @size
@@ -443,7 +437,7 @@ class Ia32
443
437
  lambda { |di| bt = lambda { |pos| Expression[[Indirection[esp, opsz(di)/8, di.address], :>>, pos], :&, 1] }
444
438
  { esp => Expression[esp, :+, opsz(di)/8], :eflag_c => bt[0], :eflag_z => bt[6], :eflag_s => bt[7], :eflag_o => bt[11] } }
445
439
  when 'sahf'
446
- lambda { |di| bt = lambda { |pos| Expression[[eax, :>>, pos], :&, 1] }
440
+ lambda { |di| bt = lambda { |pos| Expression[[eax, :>>, 8+pos], :&, 1] }
447
441
  { :eflag_c => bt[0], :eflag_z => bt[6], :eflag_s => bt[7] } }
448
442
  when 'lahf'
449
443
  lambda { |di|
@@ -452,7 +446,7 @@ class Ia32
452
446
  bts[0, :eflag_c] #bts[2, :eflag_p] #bts[4, :eflag_a]
453
447
  bts[6, :eflag_z]
454
448
  bts[7, :eflag_s]
455
- { eax => efl }
449
+ { Expression[[eax, :>>, 8], :&, 0xff] => efl }
456
450
  }
457
451
  when 'pushad'
458
452
  lambda { |di|
@@ -663,6 +657,24 @@ class Ia32
663
657
  { a0 => 0 }
664
658
  end
665
659
  }
660
+ when 'movdqa', 'movdqu', 'movaps', 'movups'; lambda { |di, a0, a1| { a0 => Expression[a1] } }
661
+ when 'cmpxchg'; lambda { |di, a0, a1| # eax == a0 ? a0 <= a1, zf <= 1 : eax <= a0, zf <= 0
662
+ eax_ = self.class::Reg.new(0, opsz(di)).symbolic
663
+ cmp = Expression[eax_, :==, a0]
664
+ { :eflag_z => cmp,
665
+ eax_ => Expression[[cmp, :*, eax_], :|, [[1, :-, cmp], :*, a0]],
666
+ a0 => Expression[[cmp, :*, a1], :|, [[1, :-, cmp], :*, a0]] } }
667
+ when 'cmpxchg8b', 'cmpxchg16b'; lambda { |di, a0| # edx:eax == mem ? mem <= ecx:ebx, zf <= 1 : edx:eax <= mem, zf <= 0
668
+ sz = (di.opcode.name =~ /8b/ ? 32 : 64)
669
+ eax_ = self.class::Reg.new(0, sz).symbolic
670
+ ecx_ = self.class::Reg.new(1, sz).symbolic
671
+ edx_ = self.class::Reg.new(2, sz).symbolic
672
+ ebx_ = self.class::Reg.new(3, sz).symbolic
673
+ cmp = Expression[[[edx_, :<<, sz], :|, eax_], :==, a0]
674
+ { :eflag_z => cmp,
675
+ eax_ => Expression[[cmp, :*, eax_], :|, [[1, :-, cmp], :*, [a0, :&, (1 << sz) - 1]]],
676
+ edx_ => Expression[[cmp, :*, edx_], :|, [[1, :-, cmp], :*, [a0, :>>, sz]]],
677
+ a0 => Expression[[cmp, :*, [[ecx_, :<<, sz], :|, ebx_]], :|, [[1, :-, cmp], :*, a0]] } }
666
678
  when 'nop', 'pause', 'wait', 'cmp', 'test'; lambda { |di, *a| {} }
667
679
  end
668
680
 
@@ -690,7 +702,7 @@ class Ia32
690
702
  end
691
703
  ret
692
704
  }
693
- when 'inc', 'dec', 'neg', 'shl', 'shr', 'sar', 'ror', 'rol', 'rcr', 'rcl', 'shld', 'shrd'
705
+ when 'inc', 'dec', 'neg', 'shl', 'shr', 'sal', 'sar', 'ror', 'rol', 'rcr', 'rcl', 'shld', 'shrd'
694
706
  lambda { |di, a0, *a|
695
707
  ret = (binding ? binding[di, a0, *a] : {})
696
708
  res = ret[a0] || Expression::Unknown
@@ -699,6 +711,8 @@ class Ia32
699
711
  case op
700
712
  when 'neg'; ret[:eflag_c] = Expression[[res, :&, mask[di]], :'!=', 0]
701
713
  when 'inc', 'dec' # don't touch carry flag
714
+ when 'shr', 'sar', 'shrd'; ret[:eflag_c] = Expression[[a0, :>>, [a[0], :-, 1]], :&, 1] # XXX shr 0 => no touch flag
715
+ when 'shl', 'sal', 'shld'; ret[:eflag_c] = Expression[[a0, :>>, [di.instruction.args[0].sz, :-, a[0]]], :&, 1]
702
716
  else ret[:eflag_c] = Expression::Unknown # :incomplete_binding ?
703
717
  end
704
718
  ret[:eflag_o] = case op
@@ -726,8 +740,8 @@ class Ia32
726
740
  @backtrace_binding
727
741
  end
728
742
 
729
- # returns the condition (bool Expression) under which a conditionnal jump is taken
730
- # returns nil if not a conditionnal jump
743
+ # returns the condition (bool Expression) under which a conditional jump is taken
744
+ # returns nil if not a conditional jump
731
745
  # backtrace for the condition must include the jump itself (eg loop -> ecx--)
732
746
  def get_jump_condition(di)
733
747
  ecx = register_symbols[1]
@@ -742,12 +756,7 @@ class Ia32
742
756
  end
743
757
 
744
758
  def get_backtrace_binding(di)
745
- a = di.instruction.args.map { |arg|
746
- case arg
747
- when ModRM, Reg, SimdReg; arg.symbolic(di)
748
- else arg
749
- end
750
- }
759
+ a = di.instruction.args.map { |arg| symbolic(arg, di) }
751
760
 
752
761
  if binding = backtrace_binding[di.opcode.basename]
753
762
  bd = binding[di, *a]
@@ -798,14 +807,24 @@ class Ia32
798
807
  end
799
808
 
800
809
  case di.opcode.name
801
- when 'push', 'call'
802
- fbd = fbd.dup
810
+ when /^push/, 'call'
811
+ ori = fbd
812
+ fbd = {}
803
813
  sz = opsz(di)/8
804
814
  esp = register_symbols[4]
805
- if i = fbd.delete(Indirection[esp, sz])
806
- fbd[Indirection[[esp, :-, sz], sz]] = i
815
+ if ori[esp] and ori[Indirection[esp, sz]]
816
+ ori.each { |k, v|
817
+ if k.kind_of?(Indirection)
818
+ fbd[k.bind(esp => ori[esp]).reduce_rec] = v
819
+ else
820
+ fbd[k] = v
821
+ end
822
+ }
823
+ else
824
+ fbd = ori.dup
825
+ fbd[:incomplete_binding] = Expression[1] # TODO
807
826
  end
808
- when 'pop', 'ret' # nothing to do
827
+ when /^pop/, 'ret' # nothing to do
809
828
  when /^(push|pop|call|ret|enter|leave|stos|movs|lods|scas|cmps)/
810
829
  fbd = fbd.dup
811
830
  fbd[:incomplete_binding] = Expression[1] # TODO
@@ -820,9 +839,8 @@ class Ia32
820
839
  case di.opcode.basename
821
840
  when 'ret'; return [Indirection[register_symbols[4], sz/8, di.address]]
822
841
  when 'jmp', 'call'
823
- a = di.instruction.args.first
824
- if dasm and a.kind_of?(ModRM) and a.imm and (a.s == sz/8 or a.s == 4) and not a.b and dasm.get_section_at(a.imm)
825
- return get_xrefs_x_jmptable(dasm, di, a, a.s*8)
842
+ if dasm and not di.instruction.args.first.kind_of?(Expression) and switch_table = get_xrefs_x_jmptable(dasm, di)
843
+ return switch_table
826
844
  end
827
845
  end
828
846
 
@@ -834,70 +852,57 @@ class Ia32
834
852
  when Expression, ::Integer; [Expression[tg]]
835
853
  when Farptr; tg.seg.reduce < 0x30 ? [tg.addr] : [Expression[[tg.seg, :*, 0x10], :+, tg.addr]]
836
854
  else
837
- puts "unhandled setip at #{di.address} #{di.instruction}" if $DEBUG
855
+ puts "unhandled setip at #{Expression[di.address]} #{di.instruction}" if $DEBUG
838
856
  []
839
857
  end
840
858
  end
841
859
 
842
- # we detected a jmp table (jmp [base+4*idx])
843
- # try to return an accurate dest list
844
- def get_xrefs_x_jmptable(dasm, di, mrm, sz)
845
- # include the symbolic dest for backtrack stuff
846
- ret = [Expression[mrm.symbolic(di)]]
847
- i = mrm.i
848
- if di.block.list.length == 2 and di.block.list[0].opcode.name =~ /^mov/ and a0 = di.block.list[0].instruction.args[0] and
849
- a0.respond_to? :symbolic and a0.symbolic == i.symbolic
850
- i = di.block.list[0].instruction.args[1]
851
- end
852
- pb = di.block.from_normal.to_a
853
- if pb.length == 1 and pdi = dasm.decoded[pb[0]] and pdi.opcode.name =~ /^jn?be?/ and ppdi = pdi.block.list[-2] and ppdi.opcode.name == 'cmp' and
854
- ppdi.instruction.args[0].symbolic == i.symbolic and lim = Expression[ppdi.instruction.args[1]].reduce and lim.kind_of? Integer
855
- # cmp eax, 42 ; jbe switch ; switch: jmp [base+4*eax]
856
- s = dasm.get_section_at(mrm.imm)
857
- lim += 1 if pdi.opcode.name[-1] == ?e
858
- lim.times { |v|
859
- dasm.add_xref(s[1]+s[0].ptr, Xref.new(:r, di.address, sz/8))
860
- ret << Indirection[[mrm.imm, :+, v*sz/8], sz/8, di.address]
861
- s[0].read(sz/8)
862
- }
863
- l = dasm.auto_label_at(mrm.imm, 'jmp_table', 'xref')
864
- replace_instr_arg_immediate(di.instruction, mrm.imm, Expression[l])
865
- # add 'case 1' comments
866
- cases = {}
867
- ret.each_with_index { |ind, idx|
868
- idx -= 1 # ret[0] = symbolic
869
- next if idx < 0
870
- a = dasm.backtrace(ind, di.address)
871
- if a.length == 1 and a[0].kind_of?(Expression) and addr = a[0].reduce and addr.kind_of?(::Integer)
872
- (cases[addr] ||= []) << idx
873
- end
860
+ # indirect call, try to match a switch table pattern (eg jmp [base+4*idx])
861
+ # return a list of target addresses if found, nil otherwise
862
+ def get_xrefs_x_jmptable(dasm, di)
863
+ puts "search jmptable for #{Expression[di.address]} #{di.instruction}" if $DEBUG
864
+ arg0 = di.instruction.args.first.symbolic(di)
865
+
866
+ bt_log = []
867
+ dasm.backtrace(arg0, di.address, :maxdepth => 3, :log => bt_log)
868
+
869
+ expr = nil
870
+ index = nil
871
+ index_max = nil
872
+
873
+ bt_log.each { |btl|
874
+ next if btl[0] != :up
875
+ last = dasm.di_at(btl[4])
876
+ break if not last or last.block.to_normal.length > 2
877
+ next if last.block.to_normal.length != 2
878
+ # search cmp eax, 42 ; ja too_big ; jmp [base+4*eax]
879
+ # XXX 256 cases switch => no cmp...
880
+ prelast = last.block.list.reverse.find { |pl| pl.opcode.name == 'cmp' }
881
+ break unless prelast and cmp_value = prelast.instruction.args.last and cmp_value.kind_of?(Expression) and cmp_value.reduce.kind_of?(::Integer)
882
+ cmp_value = cmp_value.reduce % (1 << prelast.instruction.args.first.sz) # cmp al, -12h ; jnbe => -12h is unsigned 0eeh
883
+ index = prelast.instruction.args.first.symbolic(prelast)
884
+ index = index.externals.first if index.kind_of?(Expression) # cmp bl, 13 => ebx
885
+ expr = Expression[btl[1], :&, ((1 << @size) - 1)] # XXX without the mask, additions may overflow (this breaks elsewhere too, need Expr32)
886
+ (expr.externals.grep(Symbol) - [index]).uniq.each { |r|
887
+ rv = dasm.backtrace(r, prelast.address, :maxdepth => 3)
888
+ expr = expr.bind(r => rv[0]) if rv.length == 1
874
889
  }
875
- cases.each { |addr, list|
876
- dasm.add_comment(addr, "case #{list.join(', ')}:")
877
- }
878
- return ret
879
- end
880
-
881
- puts "unrecognized jmp table pattern, using wild guess for #{di}" if $VERBOSE
882
- di.add_comment 'wildguess'
883
- if s = dasm.get_section_at(mrm.imm - 3*sz/8)
884
- v = -3
885
- else
886
- s = dasm.get_section_at(mrm.imm)
887
- v = 0
888
- end
889
- while s[0].ptr < s[0].length
890
- ptr = dasm.normalize s[0].decode_imm("u#{sz}".to_sym, @endianness)
891
- diff = Expression[ptr, :-, di.address].reduce
892
- if (diff.kind_of? ::Integer and diff.abs < 4096) or (di.opcode.basename == 'call' and ptr != 0 and dasm.get_section_at(ptr))
893
- dasm.add_xref(s[1]+s[0].ptr-sz/8, Xref.new(:r, di.address, sz/8))
894
- ret << Indirection[[mrm.imm, :+, v*sz/8], sz/8, di.address]
895
- elsif v > 0
896
- break
890
+ cmp_value = prelast.instruction.args.last.reduce % (1 << prelast.instruction.args.first.sz)
891
+ case last.opcode.name
892
+ when 'jae', 'jb', 'jnae', 'jnb'; index_max = cmp_value-1
893
+ when 'ja', 'jbe', 'jna', 'jnbe'; index_max = cmp_value
894
+ else; expr = nil
897
895
  end
898
- v += 1
896
+ break
897
+ }
898
+
899
+ if expr and expr.externals.grep(Symbol).uniq == [index]
900
+ # yay !
901
+ # include the symbolic dest for backtrace stuff
902
+ puts "found jmptable for #{Expression[di.address]} #{di.instruction} (#{index_max+1} entries)" if $VERBOSE
903
+ # TODO add labels / tables / xrefs etc
904
+ [Expression[arg0]] + (0..index_max).map { |i| expr.bind(index => i) }
899
905
  end
900
- ret
901
906
  end
902
907
 
903
908
  # checks if expr is a valid return expression matching the :saveip instruction
@@ -1239,11 +1244,15 @@ class Ia32
1239
1244
  # the binding will not include memory access from subfunctions
1240
1245
  # entry should be an entrypoint of the disassembler if finish is nil
1241
1246
  # the code sequence must have only one end, with no to_normal
1242
- def code_binding(dasm, entry, finish=nil)
1247
+ # options:
1248
+ # :include_flags => include EFLAGS in the returned binding
1249
+ def code_binding(dasm, entry, finish=nil, nargs={})
1250
+ include_flags = nargs.delete :include_flags
1251
+
1243
1252
  entry = dasm.normalize(entry)
1244
1253
  finish = dasm.normalize(finish) if finish
1245
1254
  lastdi = nil
1246
- binding = {}
1255
+ bd = {}
1247
1256
  bt = lambda { |from, expr, inc_start|
1248
1257
  ret = dasm.backtrace(Expression[expr], from, :snapshot_addr => entry, :include_start => inc_start)
1249
1258
  ret.length == 1 ? ret.first : Expression::Unknown
@@ -1268,7 +1277,7 @@ class Ia32
1268
1277
  get_xrefs_w(dasm, di).each { |waddr, len|
1269
1278
  # we want the ptr expressed with reg values at entry
1270
1279
  ptr = bt[a, waddr, false]
1271
- binding[Indirection[ptr, len, a]] = bt[a, Indirection[waddr, len, a], true]
1280
+ bd[Indirection[ptr, len, a]] = bt[a, Indirection[waddr, len, a], true]
1272
1281
  }
1273
1282
  false
1274
1283
  end
@@ -1291,13 +1300,13 @@ class Ia32
1291
1300
  if lastdi.opcode.props[:setip]
1292
1301
  e = get_xrefs_x(dasm, lastdi)
1293
1302
  raise 'bad code_binding ending' if e.to_a.length != 1 or not lastdi.opcode.props[:stopexec]
1294
- binding[:ip] = bt[lastdi.address, e.first, false]
1303
+ bd[:ip] = bt[lastdi.address, e.first, false]
1295
1304
  elsif not lastdi.opcode.props[:stopexec]
1296
- binding[:ip] = lastdi.next_addr
1305
+ bd[:ip] = lastdi.next_addr
1297
1306
  end
1298
1307
  end
1299
1308
  end
1300
- binding.delete_if { |k, v| Expression[k] == Expression[v] }
1309
+ bd.delete_if { |k, v| Expression[k] == Expression[v] }
1301
1310
 
1302
1311
  # add register binding
1303
1312
  raise "no code_binding end" if not lastdi and not finish
@@ -1310,10 +1319,22 @@ class Ia32
1310
1319
  mask = 0xffff_ffff # dont use 1<<@size, because 16bit code may use e.g. edi (through opszoverride)
1311
1320
  mask = 0xffff_ffff_ffff_ffff if @size == 64
1312
1321
  val = Expression[val, :&, mask].reduce
1313
- binding[reg] = Expression[val]
1322
+ bd[reg] = Expression[val]
1314
1323
  }
1315
1324
 
1316
- binding
1325
+ # add EFLAGS binding
1326
+ if include_flags
1327
+ [:eflag_z, :eflag_s, :eflag_c, :eflag_o].each { |eflag|
1328
+ val =
1329
+ if lastdi; bt[lastdi.address, eflag, true]
1330
+ else bt[finish, eflag, false]
1331
+ end
1332
+ next if val == Expression[eflag]
1333
+ bd[eflag] = Expression[val.reduce]
1334
+ }
1335
+ end
1336
+
1337
+ bd
1317
1338
  end
1318
1339
 
1319
1340
  # trace the stack pointer register across a function, rename occurences of esp+XX to esp+var_XX
@@ -59,7 +59,7 @@ class Ia32
59
59
 
60
60
  # add di-specific registry written/accessed
61
61
  def decompile_func_finddeps_di(dcmp, func, di, a, w)
62
- a << :eax if di.opcode.name == 'ret' and (not func.type.kind_of? C::BaseType or func.type.type.name != :void) # standard ABI
62
+ a << register_symbols[0] if di.opcode.name == 'ret' and (not func.type.kind_of? C::BaseType or func.type.type.name != :void) # standard ABI
63
63
  end
64
64
 
65
65
  # list variable dependency for each block, remove useless writes
@@ -111,7 +111,7 @@ class Ia32
111
111
  }
112
112
  if stackoff # last block instr == subfunction call
113
113
  deps_r[b] |= deps_subfunc[b] - deps_w[b]
114
- deps_w[b] |= [:eax, :ecx, :edx] # standard ABI
114
+ deps_w[b] |= register_symbols[0, 3] # standard ABI
115
115
  end
116
116
  }
117
117
 
@@ -140,7 +140,7 @@ class Ia32
140
140
  bw |= w.map { |ee| Expression[ee].externals.grep(::Symbol) }.flatten - [:unknown]
141
141
  false
142
142
  }
143
- if r == :eax and (rdi || blk.list.last).opcode.name == 'ret'
143
+ if r == register_symbols[0] and (rdi || blk.list.last).opcode.name == 'ret'
144
144
  func.type.type = C::BaseType.new(:void)
145
145
  false
146
146
  elsif rdi and rdi.backtrace_binding[r]
@@ -194,15 +194,18 @@ class Ia32
194
194
  end
195
195
 
196
196
  def decompile_blocks(dcmp, myblocks, deps, func, nextaddr = nil)
197
+ eax, ecx, edx, ebx, esp, ebp, esi, edi = register_symbols
198
+ ebx, esp, ebp = ebx, esp, ebp # fix ruby unused var warning
197
199
  scope = func.initializer
198
200
  func.type.args.each { |a| scope.symbol[a.name] = a }
199
201
  stmts = scope.statements
200
202
  blocks_toclean = myblocks.dup
201
203
  func_entry = myblocks.first[0]
204
+ di_addr = nil
202
205
  until myblocks.empty?
203
206
  b, to = myblocks.shift
204
207
  if l = dcmp.dasm.get_label_at(b)
205
- stmts << C::Label.new(l)
208
+ stmts << C::Label.new(l).with_misc(:di_addr => b)
206
209
  end
207
210
 
208
211
  # list of assignments [[dest reg, expr assigned]]
@@ -210,7 +213,11 @@ class Ia32
210
213
  # reg binding (reg => value, values.externals = regs at block start)
211
214
  binding = {}
212
215
  # Expr => CExpr
213
- ce = lambda { |*e| dcmp.decompile_cexpr(Expression[Expression[*e].reduce], scope) }
216
+ ce = lambda { |*e|
217
+ ret = dcmp.decompile_cexpr(Expression[Expression[*e].reduce], scope)
218
+ dcmp.walk_ce(ret) { |ee| ee.with_misc(:di_addr => di_addr) } if di_addr
219
+ ret
220
+ }
214
221
  # Expr => Expr.bind(binding) => CExpr
215
222
  ceb = lambda { |*e| ce[Expression[*e].bind(binding)] }
216
223
 
@@ -235,6 +242,7 @@ class Ia32
235
242
  # returns an array to use as funcall arguments
236
243
  get_func_args = lambda { |di, f|
237
244
  # XXX see remarks in #finddeps
245
+ # TODO x64
238
246
  bt = dcmp.dasm.backtrace(:esp, di.address, :snapshot_addr => func_entry, :include_start => true)
239
247
  stackoff = Expression[[bt, :+, @size/8], :-, :esp].bind(:esp => :frameptr).reduce rescue nil
240
248
  args_todo = f.type.args.to_a.dup
@@ -283,19 +291,20 @@ class Ia32
283
291
 
284
292
  # go !
285
293
  dcmp.dasm.decoded[b].block.list.each_with_index { |di, didx|
294
+ di_addr = di.address
286
295
  a = di.instruction.args
287
296
  if di.opcode.props[:setip] and not di.opcode.props[:stopexec]
288
297
  # conditional jump
289
298
  commit[]
290
299
  n = dcmp.backtrace_target(get_xrefs_x(dcmp.dasm, di).first, di.address)
291
300
  if di.opcode.name =~ /^loop(.+)?/
292
- cx = C::CExpression[:'--', ceb[:ecx]]
301
+ cx = C::CExpression[:'--', ceb[ecx]]
293
302
  cc = $1 ? C::CExpression[cx, :'&&', ceb[decode_cc_to_expr($1)]] : cx
294
303
  else
295
304
  cc = ceb[decode_cc_to_expr(di.opcode.name[1..-1])]
296
305
  end
297
306
  # XXX switch/indirect/multiple jmp
298
- stmts << C::If.new(C::CExpression[cc], C::Goto.new(n))
307
+ stmts << C::If.new(C::CExpression[cc], C::Goto.new(n).with_misc(:di_addr => di_addr)).with_misc(:di_addr => di_addr)
299
308
  to.delete dcmp.dasm.normalize(n)
300
309
  next
301
310
  end
@@ -312,7 +321,7 @@ class Ia32
312
321
  f = dcmp.c_parser.toplevel.symbol["intrinsic_set_#{a1}"]
313
322
  a2 = a2.symbolic(di)
314
323
  a2 = [a2, :&, 0xffff] if sz == 16
315
- stmts << C::CExpression.new(f, :funcall, [ceb[a2]], f.type.type)
324
+ stmts << C::CExpression.new(f, :funcall, [ceb[a2]], f.type.type).with_misc(:di_addr => di_addr)
316
325
  next
317
326
  end
318
327
  case a2
@@ -324,7 +333,7 @@ class Ia32
324
333
  f = dcmp.c_parser.toplevel.symbol["intrinsic_get_#{a2}"]
325
334
  t = f.type.type
326
335
  binding.delete a1.symbolic(di)
327
- stmts << C::CExpression.new(ce[a1.symbolic(di)], :'=', C::CExpression.new(f, :funcall, [], t), t)
336
+ stmts << C::CExpression.new(ce[a1.symbolic(di)], :'=', C::CExpression.new(f, :funcall, [], t).with_misc(:di_addr => di_addr), t).with_misc(:di_addr => di_addr)
328
337
  next
329
338
  end
330
339
  end
@@ -333,8 +342,8 @@ class Ia32
333
342
  when 'ret'
334
343
  commit[]
335
344
  ret = nil
336
- ret = C::CExpression[ceb[:eax]] unless func.type.type.kind_of? C::BaseType and func.type.type.name == :void
337
- stmts << C::Return.new(ret)
345
+ ret = C::CExpression[ceb[eax]] unless func.type.type.kind_of? C::BaseType and func.type.type.name == :void
346
+ stmts << C::Return.new(ret).with_misc(:di_addr => di_addr)
338
347
  when 'call' # :saveip
339
348
  n = dcmp.backtrace_target(get_xrefs_x(dcmp.dasm, di).first, di.address)
340
349
  args = []
@@ -367,9 +376,9 @@ class Ia32
367
376
  end
368
377
  end
369
378
  commit[]
370
- binding.delete :eax
371
- e = C::CExpression[f, :funcall, args]
372
- e = C::CExpression[ce[:eax], :'=', e, f.type.type] if deps[b].include? :eax and f.type.type != C::BaseType.new(:void)
379
+ binding.delete eax
380
+ e = C::CExpression[f, :funcall, args].with_misc(:di_addr => di_addr)
381
+ e = C::CExpression[ce[eax], :'=', e, f.type.type].with_misc(:di_addr => di_addr) if deps[b].include? eax and f.type.type != C::BaseType.new(:void)
373
382
  stmts << e
374
383
  when 'jmp'
375
384
  #if di.comment.to_a.include? 'switch'
@@ -388,7 +397,7 @@ class Ia32
388
397
  a = di.instruction.args.first
389
398
  if a.kind_of? Expression
390
399
  elsif not a.respond_to? :symbolic
391
- stmts << C::Asm.new(di.instruction.to_s, nil, [], [], nil, nil)
400
+ stmts << C::Asm.new(di.instruction.to_s, nil, [], [], nil, nil).with_misc(:di_addr => di_addr)
392
401
  else
393
402
  n = di.instruction.args.first.symbolic(di)
394
403
  fptr = ceb[n]
@@ -399,12 +408,10 @@ class Ia32
399
408
  args = get_func_args[di, fptr.type]
400
409
  else
401
410
  proto = C::Function.new(C::BaseType.new(:void))
402
- fptr = C::CExpression[[fptr], C::Pointer.new(proto)]
411
+ fptr = C::CExpression[[fptr], C::Pointer.new(proto)].with_misc(:di_addr => di_addr)
403
412
  args = []
404
413
  end
405
- ret = C::Return.new(C::CExpression[fptr, :funcall, args])
406
- class << ret ; attr_accessor :from_instr end
407
- ret.from_instr = di
414
+ ret = C::Return.new(C::CExpression[fptr, :funcall, args].with_misc(:di_addr => di_addr)).with_misc(:di_addr => di_addr)
408
415
  stmts << ret
409
416
  to = []
410
417
  end
@@ -418,7 +425,7 @@ class Ia32
418
425
  end
419
426
  # need a way to transform arg => :frameptr+12
420
427
  arg = di.backtrace_binding.keys.grep(Indirection).first.pointer
421
- stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol['intrinsic_lgdt'], :funcall, [ceb[arg]], C::BaseType.new(:void))
428
+ stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol['intrinsic_lgdt'], :funcall, [ceb[arg]], C::BaseType.new(:void)).with_misc(:di_addr => di_addr)
422
429
  when 'lidt'
423
430
  if not dcmp.c_parser.toplevel.struct['interrupt_descriptor']
424
431
  dcmp.c_parser.parse('struct interrupt_descriptor { __int16 offset0_16; __int16 segment; __int16 flags; __int16 offset16_32; };')
@@ -428,29 +435,29 @@ class Ia32
428
435
  dcmp.c_parser.parse('void intrinsic_lidt(struct interrupt_table *);')
429
436
  end
430
437
  arg = di.backtrace_binding.keys.grep(Indirection).first.pointer
431
- stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol['intrinsic_lidt'], :funcall, [ceb[arg]], C::BaseType.new(:void))
438
+ stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol['intrinsic_lidt'], :funcall, [ceb[arg]], C::BaseType.new(:void)).with_misc(:di_addr => di_addr)
432
439
  when 'ltr', 'lldt'
433
440
  if not dcmp.c_parser.toplevel.symbol["intrinsic_#{di.opcode.name}"]
434
441
  dcmp.c_parser.parse("void intrinsic_#{di.opcode.name}(int);")
435
442
  end
436
443
  arg = di.backtrace_binding.keys.first
437
- stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol["intrinsic_#{di.opcode.name}"], :funcall, [ceb[arg]], C::BaseType.new(:void))
444
+ stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol["intrinsic_#{di.opcode.name}"], :funcall, [ceb[arg]], C::BaseType.new(:void)).with_misc(:di_addr => di_addr)
438
445
  when 'out'
439
446
  sz = di.instruction.args.find { |a_| a_.kind_of? Ia32::Reg and a_.val == 0 }.sz
440
447
  if not dcmp.c_parser.toplevel.symbol["intrinsic_out#{sz}"]
441
448
  dcmp.c_parser.parse("void intrinsic_out#{sz}(unsigned short port, __int#{sz} value);")
442
449
  end
443
- port = di.instruction.args.grep(Expression).first || :edx
444
- stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol["intrinsic_out#{sz}"], :funcall, [ceb[port], ceb[:eax]], C::BaseType.new(:void))
450
+ port = di.instruction.args.grep(Expression).first || edx
451
+ stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol["intrinsic_out#{sz}"], :funcall, [ceb[port], ceb[eax]], C::BaseType.new(:void)).with_misc(:di_addr => di_addr)
445
452
  when 'in'
446
453
  sz = di.instruction.args.find { |a_| a_.kind_of? Ia32::Reg and a_.val == 0 }.sz
447
454
  if not dcmp.c_parser.toplevel.symbol["intrinsic_in#{sz}"]
448
455
  dcmp.c_parser.parse("__int#{sz} intrinsic_in#{sz}(unsigned short port);")
449
456
  end
450
- port = di.instruction.args.grep(Expression).first || :edx
457
+ port = di.instruction.args.grep(Expression).first || edx
451
458
  f = dcmp.c_parser.toplevel.symbol["intrinsic_in#{sz}"]
452
- binding.delete :eax
453
- stmts << C::CExpression.new(ce[:eax], :'=', C::CExpression.new(f, :funcall, [ceb[port]], f.type.type), f.type.type)
459
+ binding.delete eax
460
+ stmts << C::CExpression.new(ce[eax], :'=', C::CExpression.new(f, :funcall, [ceb[port]], f.type.type), f.type.type).with_misc(:di_addr => di_addr)
454
461
  when 'sti', 'cli'
455
462
  stmts << C::Asm.new(di.instruction.to_s, nil, [], [], nil, nil)
456
463
  when /^(mov|sto|lod)s([bwdq])/
@@ -462,15 +469,15 @@ class Ia32
462
469
  blk = C::Block.new(scope)
463
470
  case op
464
471
  when 'mov'
465
- blk.statements << C::CExpression[[:*, [[ceb[:edi]], pt]], :'=', [:*, [[ceb[:esi]], pt]]]
466
- blk.statements << C::CExpression[ceb[:edi], :'=', [ceb[:edi], :+, [sz]]]
467
- blk.statements << C::CExpression[ceb[:esi], :'=', [ceb[:esi], :+, [sz]]]
472
+ blk.statements << C::CExpression[[:*, [[ceb[edi]], pt]], :'=', [:*, [[ceb[esi]], pt]]].with_misc(:di_addr => di_addr)
473
+ blk.statements << C::CExpression[ceb[edi], :'=', [ceb[edi], :+, [sz]]].with_misc(:di_addr => di_addr)
474
+ blk.statements << C::CExpression[ceb[esi], :'=', [ceb[esi], :+, [sz]]].with_misc(:di_addr => di_addr)
468
475
  when 'sto'
469
- blk.statements << C::CExpression[[:*, [[ceb[:edi]], pt]], :'=', ceb[:eax]]
470
- blk.statements << C::CExpression[ceb[:edi], :'=', [ceb[:edi], :+, [sz]]]
476
+ blk.statements << C::CExpression[[:*, [[ceb[edi]], pt]], :'=', ceb[eax]].with_misc(:di_addr => di_addr)
477
+ blk.statements << C::CExpression[ceb[edi], :'=', [ceb[edi], :+, [sz]]].with_misc(:di_addr => di_addr)
471
478
  when 'lod'
472
- blk.statements << C::CExpression[ceb[:eax], :'=', [:*, [[ceb[:esi]], pt]]]
473
- blk.statements << C::CExpression[ceb[:esi], :'=', [ceb[:esi], :+, [sz]]]
479
+ blk.statements << C::CExpression[ceb[eax], :'=', [:*, [[ceb[esi]], pt]]].with_misc(:di_addr => di_addr)
480
+ blk.statements << C::CExpression[ceb[esi], :'=', [ceb[esi], :+, [sz]]].with_misc(:di_addr => di_addr)
474
481
  #when 'sca'
475
482
  #when 'cmp'
476
483
  end
@@ -479,8 +486,8 @@ class Ia32
479
486
  when nil
480
487
  stmts.concat blk.statements
481
488
  when 'rep'
482
- blk.statements << C::CExpression[ceb[:ecx], :'=', [ceb[:ecx], :-, [1]]]
483
- stmts << C::While.new(C::CExpression[ceb[:ecx]], blk)
489
+ blk.statements << C::CExpression[ceb[ecx], :'=', [ceb[ecx], :-, [1]]].with_misc(:di_addr => di_addr)
490
+ stmts << C::While.new(C::CExpression[ceb[ecx]], blk).with_misc(:di_addr => di_addr)
484
491
  #when 'repz' # sca/cmp only
485
492
  #when 'repnz'
486
493
  end
@@ -489,7 +496,7 @@ class Ia32
489
496
  bd = get_fwdemu_binding(di)
490
497
  if di.backtrace_binding[:incomplete_binding]
491
498
  commit[]
492
- stmts << C::Asm.new(di.instruction.to_s, nil, nil, nil, nil, nil)
499
+ stmts << C::Asm.new(di.instruction.to_s, nil, nil, nil, nil, nil).with_misc(:di_addr => di_addr)
493
500
  else
494
501
  update = {}
495
502
  bd.each { |k, v|
@@ -504,6 +511,7 @@ class Ia32
504
511
  binding.update update
505
512
  end
506
513
  end
514
+ di_addr = nil
507
515
  }
508
516
  commit[]
509
517