metasm 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (192) hide show
  1. data/BUGS +11 -0
  2. data/CREDITS +17 -0
  3. data/README +270 -0
  4. data/TODO +114 -0
  5. data/doc/code_organisation.txt +146 -0
  6. data/doc/const_missing.txt +16 -0
  7. data/doc/core_classes.txt +75 -0
  8. data/doc/feature_list.txt +53 -0
  9. data/doc/index.txt +59 -0
  10. data/doc/install_notes.txt +170 -0
  11. data/doc/style.css +3 -0
  12. data/doc/use_cases.txt +18 -0
  13. data/lib/metasm.rb +80 -0
  14. data/lib/metasm/arm.rb +12 -0
  15. data/lib/metasm/arm/debug.rb +39 -0
  16. data/lib/metasm/arm/decode.rb +167 -0
  17. data/lib/metasm/arm/encode.rb +77 -0
  18. data/lib/metasm/arm/main.rb +75 -0
  19. data/lib/metasm/arm/opcodes.rb +177 -0
  20. data/lib/metasm/arm/parse.rb +130 -0
  21. data/lib/metasm/arm/render.rb +55 -0
  22. data/lib/metasm/compile_c.rb +1457 -0
  23. data/lib/metasm/dalvik.rb +8 -0
  24. data/lib/metasm/dalvik/decode.rb +196 -0
  25. data/lib/metasm/dalvik/main.rb +60 -0
  26. data/lib/metasm/dalvik/opcodes.rb +366 -0
  27. data/lib/metasm/decode.rb +213 -0
  28. data/lib/metasm/decompile.rb +2659 -0
  29. data/lib/metasm/disassemble.rb +2068 -0
  30. data/lib/metasm/disassemble_api.rb +1280 -0
  31. data/lib/metasm/dynldr.rb +1329 -0
  32. data/lib/metasm/encode.rb +333 -0
  33. data/lib/metasm/exe_format/a_out.rb +194 -0
  34. data/lib/metasm/exe_format/autoexe.rb +82 -0
  35. data/lib/metasm/exe_format/bflt.rb +189 -0
  36. data/lib/metasm/exe_format/coff.rb +455 -0
  37. data/lib/metasm/exe_format/coff_decode.rb +901 -0
  38. data/lib/metasm/exe_format/coff_encode.rb +1078 -0
  39. data/lib/metasm/exe_format/dex.rb +457 -0
  40. data/lib/metasm/exe_format/dol.rb +145 -0
  41. data/lib/metasm/exe_format/elf.rb +923 -0
  42. data/lib/metasm/exe_format/elf_decode.rb +979 -0
  43. data/lib/metasm/exe_format/elf_encode.rb +1375 -0
  44. data/lib/metasm/exe_format/macho.rb +827 -0
  45. data/lib/metasm/exe_format/main.rb +228 -0
  46. data/lib/metasm/exe_format/mz.rb +164 -0
  47. data/lib/metasm/exe_format/nds.rb +172 -0
  48. data/lib/metasm/exe_format/pe.rb +437 -0
  49. data/lib/metasm/exe_format/serialstruct.rb +246 -0
  50. data/lib/metasm/exe_format/shellcode.rb +114 -0
  51. data/lib/metasm/exe_format/xcoff.rb +167 -0
  52. data/lib/metasm/gui.rb +23 -0
  53. data/lib/metasm/gui/cstruct.rb +373 -0
  54. data/lib/metasm/gui/dasm_coverage.rb +199 -0
  55. data/lib/metasm/gui/dasm_decomp.rb +369 -0
  56. data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
  57. data/lib/metasm/gui/dasm_graph.rb +1354 -0
  58. data/lib/metasm/gui/dasm_hex.rb +543 -0
  59. data/lib/metasm/gui/dasm_listing.rb +599 -0
  60. data/lib/metasm/gui/dasm_main.rb +906 -0
  61. data/lib/metasm/gui/dasm_opcodes.rb +291 -0
  62. data/lib/metasm/gui/debug.rb +1228 -0
  63. data/lib/metasm/gui/gtk.rb +884 -0
  64. data/lib/metasm/gui/qt.rb +495 -0
  65. data/lib/metasm/gui/win32.rb +3004 -0
  66. data/lib/metasm/gui/x11.rb +621 -0
  67. data/lib/metasm/ia32.rb +14 -0
  68. data/lib/metasm/ia32/compile_c.rb +1523 -0
  69. data/lib/metasm/ia32/debug.rb +193 -0
  70. data/lib/metasm/ia32/decode.rb +1167 -0
  71. data/lib/metasm/ia32/decompile.rb +564 -0
  72. data/lib/metasm/ia32/encode.rb +314 -0
  73. data/lib/metasm/ia32/main.rb +233 -0
  74. data/lib/metasm/ia32/opcodes.rb +872 -0
  75. data/lib/metasm/ia32/parse.rb +327 -0
  76. data/lib/metasm/ia32/render.rb +91 -0
  77. data/lib/metasm/main.rb +1193 -0
  78. data/lib/metasm/mips.rb +11 -0
  79. data/lib/metasm/mips/compile_c.rb +7 -0
  80. data/lib/metasm/mips/decode.rb +253 -0
  81. data/lib/metasm/mips/encode.rb +51 -0
  82. data/lib/metasm/mips/main.rb +72 -0
  83. data/lib/metasm/mips/opcodes.rb +443 -0
  84. data/lib/metasm/mips/parse.rb +51 -0
  85. data/lib/metasm/mips/render.rb +43 -0
  86. data/lib/metasm/os/gnu_exports.rb +270 -0
  87. data/lib/metasm/os/linux.rb +1112 -0
  88. data/lib/metasm/os/main.rb +1686 -0
  89. data/lib/metasm/os/remote.rb +527 -0
  90. data/lib/metasm/os/windows.rb +2027 -0
  91. data/lib/metasm/os/windows_exports.rb +745 -0
  92. data/lib/metasm/parse.rb +876 -0
  93. data/lib/metasm/parse_c.rb +3938 -0
  94. data/lib/metasm/pic16c/decode.rb +42 -0
  95. data/lib/metasm/pic16c/main.rb +17 -0
  96. data/lib/metasm/pic16c/opcodes.rb +68 -0
  97. data/lib/metasm/ppc.rb +11 -0
  98. data/lib/metasm/ppc/decode.rb +264 -0
  99. data/lib/metasm/ppc/decompile.rb +251 -0
  100. data/lib/metasm/ppc/encode.rb +51 -0
  101. data/lib/metasm/ppc/main.rb +129 -0
  102. data/lib/metasm/ppc/opcodes.rb +410 -0
  103. data/lib/metasm/ppc/parse.rb +52 -0
  104. data/lib/metasm/preprocessor.rb +1277 -0
  105. data/lib/metasm/render.rb +130 -0
  106. data/lib/metasm/sh4.rb +8 -0
  107. data/lib/metasm/sh4/decode.rb +336 -0
  108. data/lib/metasm/sh4/main.rb +292 -0
  109. data/lib/metasm/sh4/opcodes.rb +381 -0
  110. data/lib/metasm/x86_64.rb +12 -0
  111. data/lib/metasm/x86_64/compile_c.rb +1025 -0
  112. data/lib/metasm/x86_64/debug.rb +59 -0
  113. data/lib/metasm/x86_64/decode.rb +268 -0
  114. data/lib/metasm/x86_64/encode.rb +264 -0
  115. data/lib/metasm/x86_64/main.rb +135 -0
  116. data/lib/metasm/x86_64/opcodes.rb +118 -0
  117. data/lib/metasm/x86_64/parse.rb +68 -0
  118. data/misc/bottleneck.rb +61 -0
  119. data/misc/cheader-findpppath.rb +58 -0
  120. data/misc/hexdiff.rb +74 -0
  121. data/misc/hexdump.rb +55 -0
  122. data/misc/metasm-all.rb +13 -0
  123. data/misc/objdiff.rb +47 -0
  124. data/misc/objscan.rb +40 -0
  125. data/misc/pdfparse.rb +661 -0
  126. data/misc/ppc_pdf2oplist.rb +192 -0
  127. data/misc/tcp_proxy_hex.rb +84 -0
  128. data/misc/txt2html.rb +440 -0
  129. data/samples/a.out.rb +31 -0
  130. data/samples/asmsyntax.rb +77 -0
  131. data/samples/bindiff.rb +555 -0
  132. data/samples/compilation-steps.rb +49 -0
  133. data/samples/cparser_makestackoffset.rb +55 -0
  134. data/samples/dasm-backtrack.rb +38 -0
  135. data/samples/dasmnavig.rb +318 -0
  136. data/samples/dbg-apihook.rb +228 -0
  137. data/samples/dbghelp.rb +143 -0
  138. data/samples/disassemble-gui.rb +102 -0
  139. data/samples/disassemble.rb +133 -0
  140. data/samples/dump_upx.rb +95 -0
  141. data/samples/dynamic_ruby.rb +1929 -0
  142. data/samples/elf_list_needed.rb +46 -0
  143. data/samples/elf_listexports.rb +33 -0
  144. data/samples/elfencode.rb +25 -0
  145. data/samples/exeencode.rb +128 -0
  146. data/samples/factorize-headers-elfimports.rb +77 -0
  147. data/samples/factorize-headers-peimports.rb +109 -0
  148. data/samples/factorize-headers.rb +43 -0
  149. data/samples/gdbclient.rb +583 -0
  150. data/samples/generate_libsigs.rb +102 -0
  151. data/samples/hotfix_gtk_dbg.rb +59 -0
  152. data/samples/install_win_env.rb +78 -0
  153. data/samples/lindebug.rb +924 -0
  154. data/samples/linux_injectsyscall.rb +95 -0
  155. data/samples/machoencode.rb +31 -0
  156. data/samples/metasm-shell.rb +91 -0
  157. data/samples/pe-hook.rb +69 -0
  158. data/samples/pe-ia32-cpuid.rb +203 -0
  159. data/samples/pe-mips.rb +35 -0
  160. data/samples/pe-shutdown.rb +78 -0
  161. data/samples/pe-testrelocs.rb +51 -0
  162. data/samples/pe-testrsrc.rb +24 -0
  163. data/samples/pe_listexports.rb +31 -0
  164. data/samples/peencode.rb +19 -0
  165. data/samples/peldr.rb +494 -0
  166. data/samples/preprocess-flatten.rb +19 -0
  167. data/samples/r0trace.rb +308 -0
  168. data/samples/rubstop.rb +399 -0
  169. data/samples/scan_pt_gnu_stack.rb +54 -0
  170. data/samples/scanpeexports.rb +62 -0
  171. data/samples/shellcode-c.rb +40 -0
  172. data/samples/shellcode-dynlink.rb +146 -0
  173. data/samples/source.asm +34 -0
  174. data/samples/struct_offset.rb +47 -0
  175. data/samples/testpe.rb +32 -0
  176. data/samples/testraw.rb +45 -0
  177. data/samples/win32genloader.rb +132 -0
  178. data/samples/win32hooker-advanced.rb +169 -0
  179. data/samples/win32hooker.rb +96 -0
  180. data/samples/win32livedasm.rb +33 -0
  181. data/samples/win32remotescan.rb +133 -0
  182. data/samples/wintrace.rb +92 -0
  183. data/tests/all.rb +8 -0
  184. data/tests/dasm.rb +39 -0
  185. data/tests/dynldr.rb +35 -0
  186. data/tests/encodeddata.rb +132 -0
  187. data/tests/ia32.rb +82 -0
  188. data/tests/mips.rb +116 -0
  189. data/tests/parse_c.rb +239 -0
  190. data/tests/preprocessor.rb +269 -0
  191. data/tests/x86_64.rb +62 -0
  192. metadata +255 -0
@@ -0,0 +1,193 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/ia32/opcodes'
8
+
9
+ module Metasm
10
+ class Ia32
11
+ def dbg_register_pc
12
+ @dbg_register_pc ||= :eip
13
+ end
14
+ def dbg_register_sp
15
+ @dbg_register_sp ||= dbg_register_list[7]
16
+ end
17
+ def dbg_register_flags
18
+ @dbg_register_flags ||= :eflags
19
+ end
20
+
21
+ def dbg_register_list
22
+ @dbg_register_list ||= [:eax, :ebx, :ecx, :edx, :esi, :edi, :ebp, :esp, :eip]
23
+ end
24
+
25
+ def dbg_register_size
26
+ @dbg_register_size ||= Hash.new(32).update(:cs => 16, :ds => 16, :es => 16, :fs => 16, :gs => 16)
27
+ end
28
+
29
+ def dbg_flag_list
30
+ @dbg_flag_list ||= [:c, :p, :a, :z, :s, :i, :d, :o]
31
+ end
32
+
33
+ DBG_FLAGS = { :c => 0, :p => 2, :a => 4, :z => 6, :s => 7, :t => 8, :i => 9, :d => 10, :o => 11 }
34
+ def dbg_get_flag(dbg, f)
35
+ (dbg.get_reg_value(dbg_register_flags) >> DBG_FLAGS[f]) & 1
36
+ end
37
+ def dbg_set_flag(dbg, f)
38
+ fl = dbg.get_reg_value(dbg_register_flags)
39
+ fl |= 1 << DBG_FLAGS[f]
40
+ dbg.set_reg_value(dbg_register_flags, fl)
41
+ end
42
+ def dbg_unset_flag(dbg, f)
43
+ fl = dbg.get_reg_value(dbg_register_flags)
44
+ fl &= ~(1 << DBG_FLAGS[f])
45
+ dbg.set_reg_value(dbg_register_flags, fl)
46
+ end
47
+
48
+ def dbg_enable_singlestep(dbg)
49
+ dbg_set_flag(dbg, :t)
50
+ end
51
+ def dbg_disable_singlestep(dbg)
52
+ dbg_unset_flag(dbg, :t)
53
+ end
54
+
55
+ def dbg_enable_bp(dbg, bp)
56
+ case bp.type
57
+ when :bpx; dbg_enable_bpx( dbg, bp)
58
+ else dbg_enable_bphw(dbg, bp)
59
+ end
60
+ end
61
+
62
+ def dbg_disable_bp(dbg, bp)
63
+ case bp.type
64
+ when :bpx; dbg_disable_bpx( dbg, bp)
65
+ else dbg_disable_bphw(dbg, bp)
66
+ end
67
+ end
68
+
69
+ def dbg_enable_bpx(dbg, bp)
70
+ bp.internal[:previous] ||= dbg.memory[bp.address, 1]
71
+ dbg.memory[bp.address, 1] = "\xcc"
72
+ end
73
+
74
+ def dbg_disable_bpx(dbg, bp)
75
+ dbg.memory[bp.address, 1] = bp.internal[:previous]
76
+ end
77
+
78
+ # allocate a debug register for a hwbp by checking the list of hwbp existing in dbg
79
+ def dbg_alloc_bphw(dbg, bp)
80
+ if not bp.internal[:dr]
81
+ may = [0, 1, 2, 3]
82
+ dbg.breakpoint_thread.values.each { |bb| may.delete bb.internal[:dr] }
83
+ raise 'alloc_bphw: no free debugregister' if may.empty?
84
+ bp.internal[:dr] = may.first
85
+ end
86
+ bp.internal[:type] ||= :x
87
+ bp.internal[:len] ||= 1
88
+ bp.internal[:dr]
89
+ end
90
+
91
+ def dbg_enable_bphw(dbg, bp)
92
+ nr = dbg_alloc_bphw(dbg, bp)
93
+ dr7 = dbg[:dr7]
94
+ l = { 1 => 0, 2 => 1, 4 => 3, 8 => 2 }[bp.internal[:len]]
95
+ rw = { :x => 0, :w => 1, :r => 3 }[bp.internal[:type]]
96
+ raise "enable_bphw: invalid breakpoint #{bp.inspect}" if not l or not rw
97
+ dr7 &= ~((15 << (16+4*nr)) | (3 << (2*nr))) # clear
98
+ dr7 |= ((l << 2) | rw) << (16+4*nr) # set drN len/rw
99
+ dr7 |= 3 << (2*nr) # enable global/local drN
100
+
101
+ dbg["dr#{nr}"] = bp.address
102
+ dbg[:dr7] = dr7
103
+ end
104
+
105
+ def dbg_disable_bphw(dbg, bp)
106
+ nr = bp.internal[:dr]
107
+ dr7 = dbg[:dr7]
108
+ dr7 &= ~(3 << (2*nr))
109
+ dbg[:dr7] = dr7
110
+ end
111
+
112
+ def dbg_check_pre_run(dbg)
113
+ if dbg[:dr6] == 0 and dbg[:dr7] == 0
114
+ dbg[:dr7] = 0x10000 # some OS (eg Windows) only return dr6 if dr7 != 0
115
+ end
116
+ dbg[:dr6] = 0
117
+ end
118
+
119
+ def dbg_evt_bpx(dbg, b)
120
+ if b.address == dbg.pc-1
121
+ dbg.pc -= 1
122
+ end
123
+ end
124
+
125
+ def dbg_find_bpx(dbg)
126
+ return if dbg[:dr6] & 0x4000 != 0
127
+ pc = dbg.pc
128
+ dbg.breakpoint[pc-1] || dbg.breakpoint[pc]
129
+ end
130
+
131
+ def dbg_find_hwbp(dbg)
132
+ dr6 = dbg[:dr6]
133
+ return if dr6 & 0xf == 0
134
+ dn = (0..3).find { |n| dr6 & (1 << n) }
135
+ dbg.breakpoint_thread.values.find { |b| b.internal[:dr] == dn }
136
+ end
137
+
138
+ def dbg_need_stepover(dbg, addr, di)
139
+ di and ((di.instruction.prefix and di.instruction.prefix[:rep]) or di.opcode.props[:saveip])
140
+ end
141
+
142
+ def dbg_end_stepout(dbg, addr, di)
143
+ di and di.opcode.name == 'ret'
144
+ end
145
+
146
+ # return (yield) a list of [addr, symbolic name]
147
+ def dbg_stacktrace(dbg, rec=500)
148
+ ret = []
149
+ s = dbg.addrname!(dbg.pc)
150
+ yield(dbg.pc, s) if block_given?
151
+ ret << [dbg.pc, s]
152
+ fp = dbg.get_reg_value(dbg_register_list[6])
153
+ stack = dbg.get_reg_value(dbg_register_list[7]) - 8
154
+ while fp > stack and fp <= stack+0x10000 and rec != 0
155
+ rec -= 1
156
+ ra = dbg.resolve_expr Indirection[fp+4, 4]
157
+ s = dbg.addrname!(ra)
158
+ yield(ra, s) if block_given?
159
+ ret << [ra, s]
160
+ stack = fp # ensure we walk the stack upwards
161
+ fp = dbg.resolve_expr Indirection[fp, 4]
162
+ end
163
+ ret
164
+ end
165
+
166
+ # retrieve the current function return value
167
+ # only valid at function exit
168
+ def dbg_func_retval(dbg)
169
+ dbg.get_reg_value(dbg_register_list[0])
170
+ end
171
+ def dbg_func_retval_set(dbg, val)
172
+ dbg.set_reg_value(dbg_register_list[0], val)
173
+ end
174
+
175
+ # retrieve the current function return address
176
+ # to be called only on entry of the subfunction
177
+ def dbg_func_retaddr(dbg)
178
+ dbg.memory_read_int(dbg_register_list[7])
179
+ end
180
+ def dbg_func_retaddr_set(dbg, ret)
181
+ dbg.memory_write_int(dbg_register_list[7], ret)
182
+ end
183
+
184
+ # retrieve the current function arguments
185
+ # only valid at function entry (eg right after the call)
186
+ def dbg_func_arg(dbg, argnr)
187
+ dbg.memory_read_int(Expression[:esp, :+, 4*(argnr+1)])
188
+ end
189
+ def dbg_func_arg_set(dbg, argnr, arg)
190
+ dbg.memory_write_int(Expression[:esp, :+, 4*(argnr+1)], arg)
191
+ end
192
+ end
193
+ end
@@ -0,0 +1,1167 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/ia32/opcodes'
8
+ require 'metasm/decode'
9
+
10
+ module Metasm
11
+ class Ia32
12
+ class ModRM
13
+ def self.decode(edata, byte, endianness, adsz, opsz, seg=nil, regclass=Reg)
14
+ m = (byte >> 6) & 3
15
+ rm = byte & 7
16
+
17
+ if m == 3
18
+ return regclass.new(rm, opsz)
19
+ end
20
+
21
+ sum = Sum[adsz][m][rm]
22
+
23
+ s, i, b, imm = nil
24
+ sum.each { |a|
25
+ case a
26
+ when Integer
27
+ if not b
28
+ b = Reg.new(a, adsz)
29
+ else
30
+ s = 1
31
+ i = Reg.new(a, adsz)
32
+ end
33
+
34
+ when :sib
35
+ sib = edata.get_byte.to_i
36
+
37
+ ii = ((sib >> 3) & 7)
38
+ if ii != 4
39
+ s = 1 << ((sib >> 6) & 3)
40
+ i = Reg.new(ii, adsz)
41
+ end
42
+
43
+ bb = sib & 7
44
+ if bb == 5 and m == 0
45
+ imm = Expression[edata.decode_imm("i#{adsz}".to_sym, endianness)]
46
+ else
47
+ b = Reg.new(bb, adsz)
48
+ end
49
+
50
+ when :i8, :i16, :i32
51
+ imm = Expression[edata.decode_imm(a, endianness)]
52
+ end
53
+ }
54
+
55
+ if imm and imm.reduce.kind_of? Integer and imm.reduce < -0x10_0000
56
+ # probably a base address -> unsigned
57
+ imm = Expression[imm.reduce & ((1 << (adsz || 32)) - 1)]
58
+ end
59
+
60
+ new adsz, opsz, s, i, b, imm, seg
61
+ end
62
+ end
63
+
64
+ class Farptr
65
+ def self.decode(edata, endianness, adsz)
66
+ addr = Expression[edata.decode_imm("u#{adsz}".to_sym, endianness)]
67
+ seg = Expression[edata.decode_imm(:u16, endianness)]
68
+ new seg, addr
69
+ end
70
+ end
71
+
72
+ def build_opcode_bin_mask(op)
73
+ # bit = 0 if can be mutated by an field value, 1 if fixed by opcode
74
+ op.bin_mask = Array.new(op.bin.length, 0)
75
+ op.fields.each { |f, (oct, off)|
76
+ op.bin_mask[oct] |= (@fields_mask[f] << off)
77
+ }
78
+ op.bin_mask.map! { |v| 255 ^ v }
79
+ end
80
+
81
+ def build_bin_lookaside
82
+ # sets up a hash byte value => list of opcodes that may match
83
+ # opcode.bin_mask is built here
84
+ lookaside = Array.new(256) { [] }
85
+ opcode_list.each { |op|
86
+
87
+ build_opcode_bin_mask op
88
+
89
+ b = op.bin[0]
90
+ msk = op.bin_mask[0]
91
+
92
+ for i in b..(b | (255^msk))
93
+ next if i & msk != b & msk
94
+ lookaside[i] << op
95
+ end
96
+ }
97
+ lookaside
98
+ end
99
+
100
+ def decode_prefix(instr, byte)
101
+ instr.prefix ||= {}
102
+ (instr.prefix[:list] ||= []) << byte
103
+
104
+ # XXX actual limit = 15-instr.length
105
+ return false if instr.prefix[:list].length >= 15
106
+
107
+ case byte
108
+ when 0x66; instr.prefix[:opsz] = true
109
+ when 0x67; instr.prefix[:adsz] = true
110
+ when 0xF0; instr.prefix[:lock] = true
111
+ when 0xF2; instr.prefix[:rep] = :nz
112
+ when 0xF3; instr.prefix[:rep] = :z # postprocessed by decode_instr
113
+ when 0x26, 0x2E, 0x36, 0x3E, 0x64, 0x65
114
+ if byte & 0x40 == 0
115
+ v = (byte >> 3) & 3
116
+ else
117
+ v = byte & 7
118
+ end
119
+ instr.prefix[:seg] = SegReg.new(v)
120
+
121
+ instr.prefix[:jmphint] = ((byte & 0x10) == 0x10)
122
+ else
123
+ return false
124
+ end
125
+ true
126
+ end
127
+
128
+ # tries to find the opcode encoded at edata.ptr
129
+ # if no match, tries to match a prefix (update di.instruction.prefix)
130
+ # on match, edata.ptr points to the first byte of the opcode (after prefixes)
131
+ def decode_findopcode(edata)
132
+ di = DecodedInstruction.new self
133
+ while edata.ptr < edata.data.length
134
+ pfx = di.instruction.prefix || {}
135
+ byte = edata.data[edata.ptr]
136
+ byte = byte.unpack('C').first if byte.kind_of? ::String # 1.9
137
+ return di if di.opcode = @bin_lookaside[byte].find { |op|
138
+ # fetch the relevant bytes from edata
139
+ bseq = edata.data[edata.ptr, op.bin.length].unpack('C*')
140
+ di.opcode = op if op.props[:opsz] # needed by opsz(di)
141
+
142
+ # check against full opcode mask
143
+ op.bin.zip(bseq, op.bin_mask).all? { |b1, b2, m| b2 and ((b1 & m) == (b2 & m)) } and
144
+ # check special cases
145
+ !(
146
+ # fail if any of those is true
147
+ (fld = op.fields[:seg2A] and (bseq[fld[0]] >> fld[1]) & @fields_mask[:seg2A] == 1) or
148
+ (fld = op.fields[:seg3A] and (bseq[fld[0]] >> fld[1]) & @fields_mask[:seg3A] < 4) or
149
+ (fld = op.fields[:seg3A] || op.fields[:seg3] and (bseq[fld[0]] >> fld[1]) & @fields_mask[:seg3] > 5) or
150
+ (fld = op.fields[:modrmA] and (bseq[fld[0]] >> fld[1]) & 0xC0 == 0xC0) or
151
+ (sz = op.props[:opsz] and opsz(di) != sz) or
152
+ (ndpfx = op.props[:needpfx] and not pfx[:list].to_a.include? ndpfx) or
153
+ # return non-ambiguous opcode (eg push.i16 in 32bit mode) / sync with addop_post in opcode.rb
154
+ (pfx[:opsz] and (op.args == [:i] or op.args == [:farptr] or op.name[0, 3] == 'ret') and not op.props[:opsz]) or
155
+ (pfx[:adsz] and op.props[:adsz] and op.props[:adsz] == @size)
156
+ )
157
+ }
158
+
159
+ break if not decode_prefix(di.instruction, edata.get_byte)
160
+ di.bin_length += 1
161
+ end
162
+ end
163
+
164
+ def decode_instr_op(edata, di)
165
+ before_ptr = edata.ptr
166
+ op = di.opcode
167
+ di.instruction.opname = op.name
168
+ bseq = edata.read(op.bin.length).unpack('C*') # decode_findopcode ensures that data >= op.length
169
+ pfx = di.instruction.prefix || {}
170
+
171
+ case op.props[:needpfx]
172
+ when 0x66; pfx.delete :opsz
173
+ when 0x67; pfx.delete :adsz
174
+ when 0xF2, 0xF3; pfx.delete :rep
175
+ end
176
+
177
+ field_val = lambda { |f|
178
+ if fld = op.fields[f]
179
+ (bseq[fld[0]] >> fld[1]) & @fields_mask[f]
180
+ end
181
+ }
182
+
183
+ opsz = opsz(di)
184
+
185
+ if pfx[:adsz]
186
+ adsz = 48 - @size
187
+ else
188
+ adsz = @size
189
+ end
190
+
191
+ mmxsz = ((op.props[:xmmx] && pfx[:opsz]) ? 128 : 64)
192
+ op.args.each { |a|
193
+ di.instruction.args << case a
194
+ when :reg; Reg.new field_val[a], opsz
195
+ when :eeec; CtrlReg.new field_val[a]
196
+ when :eeed; DbgReg.new field_val[a]
197
+ when :seg2, :seg2A, :seg3, :seg3A; SegReg.new field_val[a]
198
+ when :regfp; FpReg.new field_val[a]
199
+ when :regmmx; SimdReg.new field_val[a], mmxsz
200
+ when :regxmm; SimdReg.new field_val[a], 128
201
+
202
+ when :farptr; Farptr.decode edata, @endianness, opsz
203
+ when :i8, :u8, :u16; Expression[edata.decode_imm(a, @endianness)]
204
+ when :i; Expression[edata.decode_imm("#{op.props[:unsigned_imm] ? 'a' : 'i'}#{opsz}".to_sym, @endianness)]
205
+
206
+ when :mrm_imm; ModRM.decode edata, (adsz == 16 ? 6 : 5), @endianness, adsz, opsz, pfx[:seg]
207
+ when :modrm, :modrmA; ModRM.decode edata, field_val[a], @endianness, adsz, opsz, pfx[:seg]
208
+ when :modrmmmx; ModRM.decode edata, field_val[:modrm], @endianness, adsz, mmxsz, pfx[:seg], SimdReg
209
+ when :modrmxmm; ModRM.decode edata, field_val[:modrm], @endianness, adsz, 128, pfx[:seg], SimdReg
210
+
211
+ when :imm_val1; Expression[1]
212
+ when :imm_val3; Expression[3]
213
+ when :reg_cl; Reg.new 1, 8
214
+ when :reg_eax; Reg.new 0, opsz
215
+ when :reg_dx; Reg.new 2, 16
216
+ when :regfp0; FpReg.new nil
217
+ else raise SyntaxError, "Internal error: invalid argument #{a} in #{op.name}"
218
+ end
219
+ }
220
+
221
+ di.bin_length += edata.ptr - before_ptr
222
+
223
+ if op.name == 'movsx' or op.name == 'movzx'
224
+ if di.opcode.props[:argsz] == 8
225
+ di.instruction.args[1].sz = 8
226
+ else
227
+ di.instruction.args[1].sz = 16
228
+ end
229
+ if pfx[:opsz]
230
+ di.instruction.args[0].sz = 48-@size
231
+ else
232
+ di.instruction.args[0].sz = @size
233
+ end
234
+ end
235
+
236
+ pfx.delete :seg
237
+ case pfx.delete(:rep)
238
+ when :nz
239
+ if di.opcode.props[:strop]
240
+ pfx[:rep] = 'rep'
241
+ elsif di.opcode.props[:stropz]
242
+ pfx[:rep] = 'repnz'
243
+ end
244
+ when :z
245
+ if di.opcode.props[:strop]
246
+ pfx[:rep] = 'rep'
247
+ elsif di.opcode.props[:stropz]
248
+ pfx[:rep] = 'repz'
249
+ end
250
+ end
251
+
252
+ di
253
+ end
254
+
255
+ # converts relative jump/call offsets to absolute addresses
256
+ # adds the eip delta to the offset +off+ of the instruction (may be an Expression) + its bin_length
257
+ # do not call twice on the same di !
258
+ def decode_instr_interpret(di, addr)
259
+ if di.opcode.props[:setip] and di.instruction.args.last.kind_of? Expression and di.instruction.opname[0, 3] != 'ret'
260
+ delta = di.instruction.args.last.reduce
261
+ arg = Expression[[addr, :+, di.bin_length], :+, delta].reduce
262
+ di.instruction.args[-1] = Expression[arg]
263
+ end
264
+
265
+ di
266
+ end
267
+
268
+ # return the list of registers as symbols in the order used by pushad
269
+ # for use in backtrace and stuff, for compatibility with x64
270
+ # esp is [4]
271
+ REG_SYMS = [:eax, :ecx, :edx, :ebx, :esp, :ebp, :esi, :edi]
272
+ def register_symbols
273
+ REG_SYMS
274
+ end
275
+
276
+ # interprets a condition code (in an opcode name) as an expression involving backtracked eflags
277
+ # eflag_p is never computed, and this returns Expression::Unknown for this flag
278
+ # ex: 'z' => Expression[:eflag_z]
279
+ def decode_cc_to_expr(cc)
280
+ case cc
281
+ when 'o'; Expression[:eflag_o]
282
+ when 'no'; Expression[:'!', :eflag_o]
283
+ when 'b', 'nae', 'c'; Expression[:eflag_c]
284
+ when 'nb', 'ae', 'nc'; Expression[:'!', :eflag_c]
285
+ when 'z', 'e'; Expression[:eflag_z]
286
+ when 'nz', 'ne'; Expression[:'!', :eflag_z]
287
+ when 'be', 'na'; Expression[:eflag_c, :|, :eflag_z]
288
+ when 'nbe', 'a'; Expression[:'!', [:eflag_c, :|, :eflag_z]]
289
+ when 's'; Expression[:eflag_s]
290
+ when 'ns'; Expression[:'!', :eflag_s]
291
+ when 'p', 'pe'; Expression::Unknown
292
+ when 'np', 'po'; Expression::Unknown
293
+ when 'l', 'nge'; Expression[:eflag_s, :'!=', :eflag_o]
294
+ when 'nl', 'ge'; Expression[:eflag_s, :==, :eflag_o]
295
+ when 'le', 'ng'; Expression[[:eflag_s, :'!=', :eflag_o], :|, :eflag_z]
296
+ when 'nle', 'g'; Expression[[:eflag_s, :==, :eflag_o], :&, :eflag_z]
297
+ when 'ecxz'; Expression[:'!', register_symbols[1]]
298
+ when 'cxz'; Expression[:'!', [register_symbols[1], :&, 0xffff]]
299
+ end
300
+ end
301
+
302
+ # hash opcode_name => lambda { |dasm, di, *symbolic_args| instr_binding }
303
+ def backtrace_binding
304
+ @backtrace_binding ||= init_backtrace_binding
305
+ end
306
+ def backtrace_binding=(b) @backtrace_binding = b end
307
+
308
+ def opsz(di)
309
+ ret = @size
310
+ ret = di.opcode.props[:argsz] if di and di.opcode.props[:argsz]
311
+ ret = 48 - ret if di and not di.opcode.props[:argsz] and di.instruction.prefix and di.instruction.prefix[:opsz]
312
+ ret
313
+ end
314
+
315
+ # populate the @backtrace_binding hash with default values
316
+ def init_backtrace_binding
317
+ @backtrace_binding ||= {}
318
+
319
+ eax, ecx, edx, ebx, esp, ebp, esi, edi = register_symbols
320
+
321
+ mask = lambda { |di| (1 << opsz(di))-1 } # 32bits => 0xffff_ffff
322
+ sign = lambda { |v, di| Expression[[[v, :&, mask[di]], :>>, opsz(di)-1], :'!=', 0] }
323
+
324
+ opcode_list.map { |ol| ol.basename }.uniq.sort.each { |op|
325
+ binding = case op
326
+ when 'mov', 'movsx', 'movzx', 'movsxd', 'movd', 'movq'; lambda { |di, a0, a1| { a0 => Expression[a1] } }
327
+ when 'lea'; lambda { |di, a0, a1| { a0 => a1.target } }
328
+ when 'xchg'; lambda { |di, a0, a1| { a0 => Expression[a1], a1 => Expression[a0] } }
329
+ when 'add', 'sub', 'or', 'xor', 'and', 'pxor', 'adc', 'sbb'
330
+ lambda { |di, a0, a1|
331
+ e_op = { 'add' => :+, 'sub' => :-, 'or' => :|, 'and' => :&, 'xor' => :^, 'pxor' => :^, 'adc' => :+, 'sbb' => :- }[op]
332
+ ret = Expression[a0, e_op, a1]
333
+ ret = Expression[ret, e_op, :eflag_c] if op == 'adc' or op == 'sbb'
334
+ # optimises eax ^ eax => 0
335
+ # avoid hiding memory accesses (to not hide possible fault)
336
+ ret = Expression[ret.reduce] if not a0.kind_of? Indirection
337
+ { a0 => ret }
338
+ }
339
+ when 'xadd'; lambda { |di, a0, a1| { a0 => Expression[a0, :+, a1], a1 => Expression[a0] } }
340
+ when 'inc'; lambda { |di, a0| { a0 => Expression[a0, :+, 1] } }
341
+ when 'dec'; lambda { |di, a0| { a0 => Expression[a0, :-, 1] } }
342
+ when 'not'; lambda { |di, a0| { a0 => Expression[a0, :^, mask[di]] } }
343
+ when 'neg'; lambda { |di, a0| { a0 => Expression[:-, a0] } }
344
+ when 'rol', 'ror'
345
+ lambda { |di, a0, a1|
346
+ e_op = (op[2] == ?r ? :>> : :<<)
347
+ inv_op = {:<< => :>>, :>> => :<< }[e_op]
348
+ sz = [a1, :%, opsz(di)]
349
+ isz = [[opsz(di), :-, a1], :%, opsz(di)]
350
+ # ror a, b => (a >> b) | (a << (32-b))
351
+ { a0 => Expression[[[a0, e_op, sz], :|, [a0, inv_op, isz]], :&, mask[di]] }
352
+ }
353
+ when 'sar', 'shl', 'sal'; lambda { |di, a0, a1| { a0 => Expression[a0, (op[-1] == ?r ? :>> : :<<), [a1, :%, [opsz(di), 32].max]] } }
354
+ when 'shr'; lambda { |di, a0, a1| { a0 => Expression[[a0, :&, mask[di]], :>>, [a1, :%, opsz(di)]] } }
355
+ when 'cwd', 'cdq', 'cqo'; lambda { |di| { Expression[edx, :&, mask[di]] => Expression[mask[di], :*, sign[eax, di]] } }
356
+ when 'cbw', 'cwde', 'cdqe'; lambda { |di|
357
+ o2 = opsz(di)/2 ; m2 = (1 << o2) - 1
358
+ { Expression[eax, :&, mask[di]] => Expression[[eax, :&, m2], :|, [m2 << o2, :*, [[eax, :>>, o2-1], :&, 1]]] } }
359
+ when 'push'
360
+ lambda { |di, a0| { esp => Expression[esp, :-, opsz(di)/8],
361
+ Indirection[esp, opsz(di)/8, di.address] => Expression[a0] } }
362
+ when 'pop'
363
+ lambda { |di, a0| { esp => Expression[esp, :+, opsz(di)/8],
364
+ a0 => Indirection[esp, opsz(di)/8, di.address] } }
365
+ when 'pushfd'
366
+ # TODO Unknown per bit
367
+ lambda { |di|
368
+ efl = Expression[0x202]
369
+ bts = lambda { |pos, v| efl = Expression[efl, :|, [[v, :&, 1], :<<, pos]] }
370
+ bts[0, :eflag_c]
371
+ bts[6, :eflag_z]
372
+ bts[7, :eflag_s]
373
+ bts[11, :eflag_o]
374
+ { esp => Expression[esp, :-, opsz(di)/8], Indirection[esp, opsz(di)/8, di.address] => efl }
375
+ }
376
+ when 'popfd'
377
+ lambda { |di| bt = lambda { |pos| Expression[[Indirection[esp, opsz(di)/8, di.address], :>>, pos], :&, 1] }
378
+ { esp => Expression[esp, :+, opsz(di)/8], :eflag_c => bt[0], :eflag_z => bt[6], :eflag_s => bt[7], :eflag_o => bt[11] } }
379
+ when 'sahf'
380
+ lambda { |di| bt = lambda { |pos| Expression[[eax, :>>, pos], :&, 1] }
381
+ { :eflag_c => bt[0], :eflag_z => bt[6], :eflag_s => bt[7] } }
382
+ when 'lahf'
383
+ lambda { |di|
384
+ efl = Expression[2]
385
+ bts = lambda { |pos, v| efl = Expression[efl, :|, [[v, :&, 1], :<<, pos]] }
386
+ bts[0, :eflag_c] #bts[2, :eflag_p] #bts[4, :eflag_a]
387
+ bts[6, :eflag_z]
388
+ bts[7, :eflag_s]
389
+ { eax => efl }
390
+ }
391
+ when 'pushad'
392
+ lambda { |di|
393
+ ret = {}
394
+ st_off = 0
395
+ register_symbols.reverse_each { |r|
396
+ ret[Indirection[Expression[esp, :+, st_off].reduce, opsz(di)/8, di.address]] = Expression[r]
397
+ st_off += opsz(di)/8
398
+ }
399
+ ret[esp] = Expression[esp, :-, st_off]
400
+ ret
401
+ }
402
+ when 'popad'
403
+ lambda { |di|
404
+ ret = {}
405
+ st_off = 0
406
+ register_symbols.reverse_each { |r|
407
+ ret[r] = Indirection[Expression[esp, :+, st_off].reduce, opsz(di)/8, di.address]
408
+ st_off += opsz(di)/8
409
+ }
410
+ ret[esp] = Expression[esp, :+, st_off] # esp is not popped
411
+ ret
412
+ }
413
+ when 'call'
414
+ lambda { |di, a0| { esp => Expression[esp, :-, opsz(di)/8],
415
+ Indirection[esp, opsz(di)/8, di.address] => Expression[di.next_addr] } }
416
+ when 'ret'; lambda { |di, *a| { esp => Expression[esp, :+, [opsz(di)/8, :+, a[0] || 0]] } }
417
+ when 'loop', 'loopz', 'loopnz'; lambda { |di, a0| { ecx => Expression[ecx, :-, 1] } }
418
+ when 'enter'
419
+ lambda { |di, a0, a1|
420
+ sz = opsz(di)/8
421
+ depth = a1.reduce % 32
422
+ b = { Indirection[ebp, sz, di.address] => Expression[ebp],
423
+ Indirection[[esp, :+, a0.reduce+sz*depth], sz, di.address] => Expression[ebp],
424
+ ebp => Expression[esp, :-, sz],
425
+ esp => Expression[esp, :-, a0.reduce+sz*depth+sz] }
426
+ (1..depth).each { |i|
427
+ b[Indirection[[esp, :+, a0.reduce+i*sz], sz, di.address]] =
428
+ b[Indirection[[ebp, :-, i*sz], sz, di.address]] =
429
+ Expression::Unknown # TODO Indirection[[ebp, :-, i*sz], sz, di.address]
430
+ }
431
+ b
432
+ }
433
+ when 'leave'; lambda { |di| { ebp => Indirection[[ebp], opsz(di)/8, di.address], esp => Expression[ebp, :+, opsz(di)/8] } }
434
+ when 'aaa'; lambda { |di| { eax => Expression::Unknown, :incomplete_binding => Expression[1] } }
435
+ when 'imul'
436
+ lambda { |di, *a|
437
+ # 1 operand form == same as 'mul' (ax:dx stuff)
438
+ next { eax => Expression::Unknown, edx => Expression::Unknown, :incomplete_binding => Expression[1] } if not a[1]
439
+
440
+ if a[2]; e = Expression[a[1], :*, a[2]]
441
+ else e = Expression[[a[0], :*, a[1]], :&, (1 << (di.instruction.args.first.sz || opsz(di))) - 1]
442
+ end
443
+ { a[0] => e }
444
+ }
445
+ when 'mul', 'div', 'idiv'; lambda { |di, *a| { eax => Expression::Unknown, edx => Expression::Unknown, :incomplete_binding => Expression[1] } }
446
+ when 'rdtsc'; lambda { |di| { eax => Expression::Unknown, edx => Expression::Unknown, :incomplete_binding => Expression[1] } }
447
+ when /^(stos|movs|lods|scas|cmps)[bwd]$/
448
+ lambda { |di|
449
+ op =~ /^(stos|movs|lods|scas|cmps)([bwd])$/
450
+ e_op = $1
451
+ sz = { 'b' => 1, 'w' => 2, 'd' => 4 }[$2]
452
+ eax_ = Reg.new(0, 8*sz).symbolic
453
+ dir = :+
454
+ if di.block and (di.block.list.find { |ddi| ddi.opcode.name == 'std' } rescue nil)
455
+ dir = :-
456
+ end
457
+ pesi = Indirection[esi, sz, di.address]
458
+ pedi = Indirection[edi, sz, di.address]
459
+ pfx = di.instruction.prefix || {}
460
+ bd =
461
+ case e_op
462
+ when 'movs'
463
+ case pfx[:rep]
464
+ when nil; { pedi => pesi, esi => Expression[esi, dir, sz], edi => Expression[edi, dir, sz] }
465
+ else { pedi => pesi, esi => Expression[esi, dir, [sz ,:*, ecx]], edi => Expression[edi, dir, [sz, :*, ecx]], ecx => 0 }
466
+ end
467
+ when 'stos'
468
+ case pfx[:rep]
469
+ when nil; { pedi => Expression[eax_], edi => Expression[edi, dir, sz] }
470
+ else { pedi => Expression[eax_], edi => Expression[edi, dir, [sz, :*, ecx]], ecx => 0 }
471
+ end
472
+ when 'lods'
473
+ case pfx[:rep]
474
+ when nil; { eax_ => pesi, esi => Expression[esi, dir, sz] }
475
+ else { eax_ => Indirection[[esi, dir, [sz, :*, [ecx, :-, 1]]], sz, di.address], esi => Expression[esi, dir, [sz, :*, ecx]], ecx => 0 }
476
+ end
477
+ when 'scas'
478
+ case pfx[:rep]
479
+ when nil; { edi => Expression[edi, dir, sz] }
480
+ else { edi => Expression::Unknown, ecx => Expression::Unknown }
481
+ end
482
+ when 'cmps'
483
+ case pfx[:rep]
484
+ when nil; { edi => Expression[edi, dir, sz], esi => Expression[esi, dir, sz] }
485
+ else { edi => Expression::Unknown, esi => Expression::Unknown, ecx => Expression::Unknown }
486
+ end
487
+ end
488
+ bd[:incomplete_binding] = Expression[1] if pfx[:rep]
489
+ bd
490
+ }
491
+ when 'clc'; lambda { |di| { :eflag_c => Expression[0] } }
492
+ when 'stc'; lambda { |di| { :eflag_c => Expression[1] } }
493
+ when 'cmc'; lambda { |di| { :eflag_c => Expression[:'!', :eflag_c] } }
494
+ when 'cld'; lambda { |di| { :eflag_d => Expression[0] } }
495
+ when 'std'; lambda { |di| { :eflag_d => Expression[1] } }
496
+ when 'setalc'; lambda { |di| { Reg.new(0, 8).symbolic => Expression[:eflag_c, :*, 0xff] } }
497
+ when /^set/; lambda { |di, *a| { a[0] => Expression[decode_cc_to_expr(op[/^set(.*)/, 1])] } }
498
+ when /^cmov/; lambda { |di, *a| fl = decode_cc_to_expr(op[/^cmov(.*)/, 1]) ; { a[0] => Expression[[fl, :*, a[1]], :|, [[1, :-, fl], :*, a[0]]] } }
499
+ when /^j/
500
+ lambda { |di, a0|
501
+ ret = { 'dummy_metasm_0' => Expression[a0] } # mark modr/m as read
502
+ if fl = decode_cc_to_expr(op[/^j(.*)/, 1]) and fl != Expression::Unknown
503
+ ret['dummy_metasm_1'] = fl # mark eflags as read
504
+ end
505
+ ret
506
+ }
507
+ when 'fstenv', 'fnstenv'
508
+ lambda { |di, a0|
509
+ # stores the address of the last non-control fpu instr run
510
+ lastfpuinstr = di.block.list[0...di.block.list.index(di)].reverse.find { |pdi|
511
+ case pdi.opcode.name
512
+ when /fn?init|fn?clex|fldcw|fn?st[cs]w|fn?stenv|fldenv|fn?save|frstor|f?wait/
513
+ when /^f/; true
514
+ end
515
+ } if di.block
516
+ lastfpuinstr = lastfpuinstr.address if lastfpuinstr
517
+ ret = {}
518
+ save_at = lambda { |off, val| ret[Indirection[a0.target + off, 4, di.address]] = val }
519
+ save_at[0, Expression::Unknown]
520
+ save_at[4, Expression::Unknown]
521
+ save_at[8, Expression::Unknown]
522
+ save_at[12, lastfpuinstr || Expression::Unknown]
523
+ save_at[16, Expression::Unknown]
524
+ save_at[20, Expression::Unknown]
525
+ save_at[24, Expression::Unknown]
526
+ ret
527
+ }
528
+ when 'bt'; lambda { |di, a0, a1| { :eflag_c => Expression[[a0, :>>, [a1, :%, opsz(di)]], :&, 1] } }
529
+ when 'bts'; lambda { |di, a0, a1| { :eflag_c => Expression[[a0, :>>, [a1, :%, opsz(di)]], :&, 1],
530
+ a0 => Expression[a0, :|, [1, :<<, [a1, :%, opsz(di)]]] } }
531
+ when 'btr'; lambda { |di, a0, a1| { :eflag_c => Expression[[a0, :>>, [a1, :%, opsz(di)]], :&, 1],
532
+ a0 => Expression[a0, :&, [[1, :<<, [a1, :%, opsz(di)]], :^, mask[di]]] } }
533
+ when 'btc'; lambda { |di, a0, a1| { :eflag_c => Expression[[a0, :>>, [a1, :%, opsz(di)]], :&, 1],
534
+ a0 => Expression[a0, :^, [1, :<<, [a1, :%, opsz(di)]]] } }
535
+ when 'bswap'
536
+ lambda { |di, a0|
537
+ if opsz(di) == 64
538
+ { a0 => Expression[
539
+ [[[[a0, :&, 0xff000000_00000000], :>>, 56], :|,
540
+ [[a0, :&, 0x00ff0000_00000000], :>>, 40]], :|,
541
+ [[[a0, :&, 0x0000ff00_00000000], :>>, 24], :|,
542
+ [[a0, :&, 0x000000ff_00000000], :>>, 8]]], :|,
543
+ [[[[a0, :&, 0x00000000_ff000000], :<<, 8], :|,
544
+ [[a0, :&, 0x00000000_00ff0000], :<<, 24]], :|,
545
+ [[[a0, :&, 0x00000000_0000ff00], :<<, 40], :|,
546
+ [[a0, :&, 0x00000000_000000ff], :<<, 56]]]] }
547
+ else # XXX opsz != 32 => undef
548
+ { a0 => Expression[
549
+ [[[a0, :&, 0xff000000], :>>, 24], :|,
550
+ [[a0, :&, 0x00ff0000], :>>, 8]], :|,
551
+ [[[a0, :&, 0x0000ff00], :<<, 8], :|,
552
+ [[a0, :&, 0x000000ff], :<<, 24]]] }
553
+ end
554
+ }
555
+ when 'nop', 'pause', 'wait', 'cmp', 'test'; lambda { |di, *a| {} }
556
+ end
557
+
558
+ # add eflags side-effects
559
+
560
+ full_binding = case op
561
+ when 'adc', 'add', 'and', 'cmp', 'or', 'sbb', 'sub', 'xor', 'test', 'xadd'
562
+ lambda { |di, a0, a1|
563
+ e_op = { 'adc' => :+, 'add' => :+, 'xadd' => :+, 'and' => :&, 'cmp' => :-, 'or' => :|, 'sbb' => :-, 'sub' => :-, 'xor' => :^, 'test' => :& }[op]
564
+ res = Expression[[a0, :&, mask[di]], e_op, [a1, :&, mask[di]]]
565
+ res = Expression[res, e_op, :eflag_c] if op == 'adc' or op == 'sbb'
566
+
567
+ ret = (binding ? binding[di, a0, a1] : {})
568
+ ret[:eflag_z] = Expression[[res, :&, mask[di]], :==, 0]
569
+ ret[:eflag_s] = sign[res, di]
570
+ ret[:eflag_c] = case e_op
571
+ when :+; Expression[res, :>, mask[di]]
572
+ when :-; Expression[[a0, :&, mask[di]], :<, [a1, :&, mask[di]]]
573
+ else Expression[0]
574
+ end
575
+ ret[:eflag_o] = case e_op
576
+ when :+; Expression[[sign[a0, di], :==, sign[a1, di]], :'&&', [sign[a0, di], :'!=', sign[res, di]]]
577
+ when :-; Expression[[sign[a0, di], :==, [:'!', sign[a1, di]]], :'&&', [sign[a0, di], :'!=', sign[res, di]]]
578
+ else Expression[0]
579
+ end
580
+ ret
581
+ }
582
+ when 'inc', 'dec', 'neg', 'shl', 'shr', 'sar', 'ror', 'rol', 'rcr', 'rcl', 'shld', 'shrd'
583
+ lambda { |di, a0, *a|
584
+ ret = (binding ? binding[di, a0, *a] : {})
585
+ res = ret[a0] || Expression::Unknown
586
+ ret[:eflag_z] = Expression[[res, :&, mask[di]], :==, 0]
587
+ ret[:eflag_s] = sign[res, di]
588
+ case op
589
+ when 'neg'; ret[:eflag_c] = Expression[[res, :&, mask[di]], :'!=', 0]
590
+ when 'inc', 'dec' # don't touch carry flag
591
+ else ret[:eflag_c] = Expression::Unknown # :incomplete_binding ?
592
+ end
593
+ ret[:eflag_o] = case op
594
+ when 'inc'; Expression[[a0, :&, mask[di]], :==, mask[di] >> 1]
595
+ when 'dec'; Expression[[res , :&, mask[di]], :==, mask[di] >> 1]
596
+ when 'neg'; Expression[[a0, :&, mask[di]], :==, (mask[di]+1) >> 1]
597
+ else Expression::Unknown
598
+ end
599
+ ret
600
+ }
601
+ when 'imul', 'mul', 'idiv', 'div', /^(scas|cmps)[bwdq]$/
602
+ lambda { |di, *a|
603
+ ret = (binding ? binding[di, *a] : {})
604
+ ret[:eflag_z] = ret[:eflag_s] = ret[:eflag_c] = ret[:eflag_o] = Expression::Unknown # :incomplete_binding ?
605
+ ret
606
+ }
607
+ end
608
+
609
+ @backtrace_binding[op] ||= full_binding || binding if full_binding || binding
610
+ }
611
+ @backtrace_binding
612
+ end
613
+
614
+ # returns the condition (bool Expression) under which a conditionnal jump is taken
615
+ # returns nil if not a conditionnal jump
616
+ # backtrace for the condition must include the jump itself (eg loop -> ecx--)
617
+ def get_jump_condition(di)
618
+ ecx = register_symbols[1]
619
+ case di.opcode.name
620
+ when /^j(.*)/
621
+ decode_cc_to_expr($1)
622
+ when /^loop(.+)?/
623
+ e = Expression[ecx, :'!=', 0]
624
+ e = Expression[e, :'||', decode_cc_to_expr($1)] if $1
625
+ e
626
+ end
627
+ end
628
+
629
+ def get_backtrace_binding(di)
630
+ a = di.instruction.args.map { |arg|
631
+ case arg
632
+ when ModRM, Reg, SimdReg; arg.symbolic(di)
633
+ else arg
634
+ end
635
+ }
636
+
637
+ if binding = backtrace_binding[di.opcode.basename]
638
+ bd = binding[di, *a]
639
+ # handle modifications to al/ah etc
640
+ bd.keys.grep(Expression).each { |e|
641
+ # must be in the form (x & mask), with x either :reg or (:reg >> shift) eg ah == ((eax >> 8) & 0xff)
642
+ if e.op == :& and mask = e.rexpr and mask.kind_of? Integer
643
+ reg = e.lexpr
644
+ reg = reg.lexpr if reg.kind_of? Expression and reg.op == :>> and shift = reg.rexpr and shift.kind_of? Integer
645
+ next if not reg.kind_of? Symbol
646
+ if bd.has_key? reg
647
+ # xchg ah, al ; pop sp..
648
+ puts "backtrace: conflict for #{di}: #{e} vs #{reg}" if $VERBOSE
649
+ bd[reg] = Expression::Unknown
650
+ next
651
+ end
652
+ val = bd.delete e
653
+ mask <<= shift if shift
654
+ invmask = mask ^ (@size == 64 ? 0xffff_ffff_ffff_ffff : 0xffff_ffff)
655
+ if invmask == 0xffff_ffff_0000_0000 and not di.opcode.props[:op32no64]
656
+ bd[reg] = Expression[val, :&, 0xffff_ffff]
657
+ elsif invmask == 0
658
+ bd[reg] = val
659
+ else
660
+ val = Expression[val, :<<, shift] if shift
661
+ bd[reg] = Expression[[reg, :&, invmask], :|, [val, :&, mask]]
662
+ end
663
+ end
664
+ }
665
+ bd
666
+ else
667
+ puts "unhandled instruction to backtrace: #{di}" if $VERBOSE
668
+ # assume nothing except the 1st arg is modified
669
+ case a[0]
670
+ when Indirection, Symbol; { a[0] => Expression::Unknown }
671
+ when Expression; (x = a[0].externals.first) ? { x => Expression::Unknown } : {}
672
+ else {}
673
+ end.update(:incomplete_binding => Expression[1])
674
+ end
675
+ end
676
+
677
+ def get_xrefs_x(dasm, di)
678
+ return [] if not di.opcode.props[:setip]
679
+
680
+ sz = opsz(di)
681
+ case di.opcode.basename
682
+ when 'ret'; return [Indirection[register_symbols[4], sz/8, di.address]]
683
+ when 'jmp', 'call'
684
+ a = di.instruction.args.first
685
+ if dasm and a.kind_of?(ModRM) and a.imm and a.s == sz/8 and not a.b and dasm.get_section_at(a.imm)
686
+ return get_xrefs_x_jmptable(dasm, di, a, sz)
687
+ end
688
+ end
689
+
690
+ case tg = di.instruction.args.first
691
+ when ModRM
692
+ tg.sz ||= sz if tg.kind_of? ModRM
693
+ [Expression[tg.symbolic(di)]]
694
+ when Reg; [Expression[tg.symbolic(di)]]
695
+ when Expression, ::Integer; [Expression[tg]]
696
+ when Farptr; tg.seg.reduce < 0x30 ? [tg.addr] : [Expression[[tg.seg, :*, 0x10], :+, tg.addr]]
697
+ else
698
+ puts "unhandled setip at #{di.address} #{di.instruction}" if $DEBUG
699
+ []
700
+ end
701
+ end
702
+
703
+ # we detected a jmp table (jmp [base+4*idx])
704
+ # try to return an accurate dest list
705
+ def get_xrefs_x_jmptable(dasm, di, mrm, sz)
706
+ # include the symbolic dest for backtrack stuff
707
+ ret = [Expression[mrm.symbolic(di)]]
708
+ i = mrm.i
709
+ if di.block.list.length == 2 and di.block.list[0].opcode.name =~ /^mov/ and a0 = di.block.list[0].instruction.args[0] and
710
+ a0.respond_to? :symbolic and a0.symbolic == i.symbolic
711
+ i = di.block.list[0].instruction.args[1]
712
+ end
713
+ pb = di.block.from_normal.to_a
714
+ if pb.length == 1 and pdi = dasm.decoded[pb[0]] and pdi.opcode.name =~ /^jn?be?/ and ppdi = pdi.block.list[-2] and ppdi.opcode.name == 'cmp' and
715
+ ppdi.instruction.args[0].symbolic == i.symbolic and lim = Expression[ppdi.instruction.args[1]].reduce and lim.kind_of? Integer
716
+ # cmp eax, 42 ; jbe switch ; switch: jmp [base+4*eax]
717
+ s = dasm.get_section_at(mrm.imm)
718
+ lim += 1 if pdi.opcode.name[-1] == ?e
719
+ lim.times { |v|
720
+ dasm.add_xref(s[1]+s[0].ptr, Xref.new(:r, di.address, sz/8))
721
+ ret << Indirection[[mrm.imm, :+, v*sz/8], sz/8, di.address]
722
+ s[0].read(sz/8)
723
+ }
724
+ l = dasm.auto_label_at(mrm.imm, 'jmp_table', 'xref')
725
+ replace_instr_arg_immediate(di.instruction, mrm.imm, Expression[l])
726
+ return ret
727
+ end
728
+
729
+ puts "unrecognized jmp table pattern, using wild guess for #{di}" if $VERBOSE
730
+ di.add_comment 'wildguess'
731
+ if s = dasm.get_section_at(mrm.imm - 3*sz/8)
732
+ v = -3
733
+ else
734
+ s = dasm.get_section_at(mrm.imm)
735
+ v = 0
736
+ end
737
+ loop do
738
+ ptr = dasm.normalize s[0].decode_imm("u#{sz}".to_sym, @endianness)
739
+ diff = Expression[ptr, :-, di.address].reduce
740
+ if (diff.kind_of? ::Integer and diff.abs < 4096) or (di.opcode.basename == 'call' and ptr != 0 and dasm.get_section_at(ptr))
741
+ dasm.add_xref(s[1]+s[0].ptr-sz/8, Xref.new(:r, di.address, sz/8))
742
+ ret << Indirection[[mrm.imm, :+, v*sz/8], sz/8, di.address]
743
+ elsif v > 0
744
+ break
745
+ end
746
+ v += 1
747
+ end
748
+ ret
749
+ end
750
+
751
+ # checks if expr is a valid return expression matching the :saveip instruction
752
+ def backtrace_is_function_return(expr, di=nil)
753
+ expr = Expression[expr].reduce_rec
754
+ expr.kind_of? Indirection and expr.len == @size/8 and expr.target == Expression[register_symbols[4]]
755
+ end
756
+
757
+ # updates the function backtrace_binding
758
+ # if the function is big and no specific register is given, do nothing (the binding will be lazily updated later, on demand)
759
+ # XXX assume retaddrlist is either a list of addr of ret or a list with a single entry which is an external function name (thunk)
760
+ def backtrace_update_function_binding(dasm, faddr, f, retaddrlist, *wantregs)
761
+ b = f.backtrace_binding
762
+
763
+ esp, ebp = register_symbols[4, 2]
764
+
765
+ # XXX handle retaddrlist for multiple/mixed thunks
766
+ if retaddrlist and not dasm.decoded[retaddrlist.first] and di = dasm.decoded[faddr]
767
+ # no return instruction, must be a thunk : find the last instruction (to backtrace from it)
768
+ done = []
769
+ while ndi = dasm.decoded[di.block.to_subfuncret.to_a.first] || dasm.decoded[di.block.to_normal.to_a.first] and ndi.kind_of? DecodedInstruction and not done.include? ndi.address
770
+ done << ndi.address
771
+ di = ndi
772
+ end
773
+ if not di.block.to_subfuncret.to_a.first and di.block.to_normal and di.block.to_normal.length > 1
774
+ thunklast = di.block.list.last.address
775
+ end
776
+ end
777
+
778
+ bt_val = lambda { |r|
779
+ next if not retaddrlist
780
+ b[r] = Expression::Unknown # TODO :pending or something ? (for recursive lazy functions)
781
+ bt = []
782
+ retaddrlist.each { |retaddr|
783
+ bt |= dasm.backtrace(Expression[r], (thunklast ? thunklast : retaddr),
784
+ :include_start => true, :snapshot_addr => faddr, :origin => retaddr, :from_subfuncret => thunklast)
785
+ }
786
+ if bt.length != 1
787
+ b[r] = Expression::Unknown
788
+ else
789
+ b[r] = bt.first
790
+ end
791
+ }
792
+
793
+ if not wantregs.empty?
794
+ wantregs.each(&bt_val)
795
+ else
796
+ if dasm.function_blocks(faddr, true).length < 20
797
+ register_symbols.each(&bt_val)
798
+ else
799
+ [ebp, esp].each(&bt_val)
800
+ end
801
+ end
802
+
803
+ backtrace_update_function_binding_check(dasm, faddr, f, b, &bt_val)
804
+
805
+ b
806
+ end
807
+
808
+ def backtrace_update_function_binding_check(dasm, faddr, f, b)
809
+ sz = @size/8
810
+ if b[:ebp] and b[:ebp] != Expression[:ebp]
811
+ # may be a custom 'enter' function (eg recent Visual Studio)
812
+ # TODO put all memory writes in the binding ?
813
+ [[:ebp], [:esp, :+, 1*sz], [:esp, :+, 2*sz], [:esp, :+, 3*sz]].each { |ptr|
814
+ ind = Indirection[ptr, sz, faddr]
815
+ yield(ind)
816
+ b.delete(ind) if b[ind] and not [:ebx, :edx, :esi, :edi, :ebp].include? b[ind].reduce_rec
817
+ }
818
+ end
819
+ if dasm.funcs_stdabi
820
+ if b[:esp] and b[:esp] == Expression::Unknown and not f.btbind_callback
821
+ puts "update_func_bind: #{Expression[faddr]} has esp -> unknown, use dynamic callback" if $DEBUG
822
+ f.btbind_callback = disassembler_default_btbind_callback
823
+ end
824
+ [:ebp, :ebx, :esi, :edi].each { |reg|
825
+ if b[reg] and b[reg] == Expression::Unknown
826
+ puts "update_func_bind: #{Expression[faddr]} has #{reg} -> unknown, presume it is preserved" if $DEBUG
827
+ b[reg] = Expression[reg]
828
+ end
829
+ }
830
+ else
831
+ if b[:esp] and not Expression[b[:esp], :-, :esp].reduce.kind_of?(::Integer)
832
+ puts "update_func_bind: #{Expression[faddr]} has esp -> #{b[:esp]}" if $DEBUG
833
+ end
834
+ end
835
+
836
+ # rename some functions
837
+ # TODO database and real signatures
838
+ rename =
839
+ if b[:eax] and Expression[b[:eax], :-, faddr].reduce == 0
840
+ 'geteip' # metasm pic linker
841
+ elsif b[:eax] and b[:ebx] and Expression[b[:eax], :-, :eax].reduce == 0 and Expression[b[:ebx], :-, Indirection[:esp, sz, nil]].reduce == 0
842
+ 'get_pc_thunk_ebx' # elf pic convention
843
+ elsif b[:esp] and Expression[b[:esp], :-, [:esp, :-, Indirection[[:esp, :+, 2*sz], sz]]].reduce.kind_of? ::Integer and
844
+ dasm.decoded[faddr].block.list.find { |di| di.backtrace_binding[Indirection['segment_base_fs', sz]] }
845
+ '__SEH_prolog'
846
+ elsif b[:esp] == Expression[:ebp, :+, sz] and
847
+ dasm.decoded[faddr].block.list.find { |di| di.backtrace_binding[Indirection['segment_base_fs', sz]] }
848
+ '__SEH_epilog'
849
+ end
850
+ dasm.auto_label_at(faddr, rename, 'loc', 'sub') if rename
851
+ end
852
+
853
+ # returns true if the expression is an address on the stack
854
+ def backtrace_is_stack_address(expr)
855
+ Expression[expr].expr_externals.include? register_symbols[4]
856
+ end
857
+
858
+ # updates an instruction's argument replacing an expression with another (eg label renamed)
859
+ def replace_instr_arg_immediate(i, old, new)
860
+ i.args.map! { |a|
861
+ case a
862
+ when Expression; a == old ? new : Expression[a.bind(old => new).reduce]
863
+ when ModRM
864
+ a.imm = (a.imm == old ? new : Expression[a.imm.bind(old => new).reduce]) if a.imm
865
+ a
866
+ else a
867
+ end
868
+ }
869
+ end
870
+
871
+ # returns a DecodedFunction from a parsed C function prototype
872
+ # TODO rebacktrace already decoded functions (load a header file after dasm finished)
873
+ # TODO walk structs args
874
+ def decode_c_function_prototype(cp, sym, orig=nil)
875
+ sym = cp.toplevel.symbol[sym] if sym.kind_of?(::String)
876
+ df = DecodedFunction.new
877
+ orig ||= Expression[sym.name]
878
+
879
+ new_bt = lambda { |expr, rlen|
880
+ df.backtracked_for << BacktraceTrace.new(expr, orig, expr, rlen ? :r : :x, rlen)
881
+ }
882
+
883
+ # return instr emulation
884
+ if sym.has_attribute 'noreturn' or sym.has_attribute '__noreturn__'
885
+ df.noreturn = true
886
+ else
887
+ new_bt[Indirection[:esp, @size/8, orig], nil]
888
+ end
889
+
890
+ # register dirty (XXX assume standard ABI)
891
+ [:eax, :ecx, :edx].each { |r|
892
+ df.backtrace_binding.update r => Expression::Unknown
893
+ }
894
+
895
+ # emulate ret <n>
896
+ al = cp.typesize[:ptr]
897
+ stackoff = al
898
+ if sym.has_attribute 'fastcall'
899
+ stackoff = sym.type.args.to_a[2..-1].to_a.inject(al) { |sum, a| sum += (cp.sizeof(a) + al - 1) / al * al }
900
+ elsif sym.has_attribute 'stdcall'
901
+ stackoff = sym.type.args.to_a.inject(al) { |sum, a| sum += (cp.sizeof(a) + al - 1) / al * al }
902
+ end
903
+ df.backtrace_binding[:esp] = Expression[:esp, :+, stackoff]
904
+
905
+ # scan args for function pointers
906
+ # TODO walk structs/unions..
907
+ stackoff = al
908
+ sym.type.args.to_a.each { |a|
909
+ p = Indirection[[:esp, :+, stackoff], al, orig]
910
+ stackoff += (cp.sizeof(a) + al - 1) / al * al
911
+ if a.type.untypedef.kind_of? C::Pointer
912
+ pt = a.type.untypedef.type.untypedef
913
+ if pt.kind_of? C::Function
914
+ new_bt[p, nil]
915
+ df.backtracked_for.last.detached = true
916
+ elsif pt.kind_of? C::Struct
917
+ new_bt[p, al]
918
+ else
919
+ new_bt[p, cp.sizeof(nil, pt)]
920
+ end
921
+ end
922
+ }
923
+
924
+ df
925
+ end
926
+
927
+ # the lambda for the :default backtrace_binding callback of the disassembler
928
+ # tries to determine the stack offset of unprototyped functions
929
+ # working:
930
+ # checks that origin is a ret, that expr is an indirection from esp and that expr.origin is the ret
931
+ # bt_walk from calladdr until we finds a call into us, and assumes it is the current function start
932
+ # TODO handle foo: call bar ; bar: pop eax ; call <withourcallback> ; ret -> bar is not the function start (foo is)
933
+ # then backtrace expr from calladdr to funcstart (snapshot), using esp -> esp+<stackoffvariable>
934
+ # from the result, compute stackoffvariable (only if trivial)
935
+ # will not work if the current function calls any other unknown function (unless all are __cdecl)
936
+ # will not work if the current function is framed (ebp leave ret): in this case the function will return, but its esp will be unknown
937
+ # if the stack offset is found and funcaddr is a string, fixup the static binding and remove the dynamic binding
938
+ # TODO dynamise thunks bt_for & bt_cb
939
+ def disassembler_default_btbind_callback
940
+ esp = register_symbols[4]
941
+
942
+ lambda { |dasm, bind, funcaddr, calladdr, expr, origin, maxdepth|
943
+ @dasm_func_default_off ||= {}
944
+ if off = @dasm_func_default_off[[dasm, calladdr]]
945
+ bind = bind.merge(esp => Expression[esp, :+, off])
946
+ break bind
947
+ end
948
+ break bind if not odi = dasm.decoded[origin] or odi.opcode.basename != 'ret'
949
+ expr = expr.reduce_rec if expr.kind_of? Expression
950
+ break bind unless expr.kind_of? Indirection and expr.origin == origin
951
+ break bind unless expr.externals.reject { |e| e =~ /^autostackoffset_/ } == [esp]
952
+
953
+ curfunc = dasm.function[funcaddr]
954
+ if curfunc.backtrace_binding and tk = curfunc.backtrace_binding[:thunk] and dasm.function[tk]
955
+ curfunc = dasm.function[tk]
956
+ end
957
+
958
+ # scan from calladdr for the probable parent function start
959
+ func_start = nil
960
+ dasm.backtrace_walk(true, calladdr, false, false, nil, maxdepth) { |ev, foo, h|
961
+ if ev == :up and h[:sfret] != :subfuncret and di = dasm.decoded[h[:to]] and di.opcode.basename == 'call'
962
+ func_start = h[:from]
963
+ break
964
+ elsif ev == :end
965
+ # entrypoints are functions too
966
+ func_start = h[:addr]
967
+ break
968
+ end
969
+ }
970
+ break bind if not func_start
971
+ puts "automagic #{Expression[funcaddr]}: found func start for #{dasm.decoded[origin]} at #{Expression[func_start]}" if dasm.debug_backtrace
972
+ s_off = "autostackoffset_#{Expression[funcaddr]}_#{Expression[calladdr]}"
973
+ list = dasm.backtrace(expr.bind(esp => Expression[esp, :+, s_off]), calladdr, :include_start => true, :snapshot_addr => func_start, :maxdepth => maxdepth, :origin => origin)
974
+ # check if this backtrace made us find our binding
975
+ if off = @dasm_func_default_off[[dasm, calladdr]]
976
+ bind = bind.merge(esp => Expression[esp, :+, off])
977
+ break bind
978
+ elsif not curfunc.btbind_callback
979
+ break curfunc.backtrace_binding
980
+ end
981
+ e_expr = list.find { |e_expr_|
982
+ # TODO cleanup this
983
+ e_expr_ = Expression[e_expr_].reduce_rec
984
+ next if not e_expr_.kind_of? Indirection
985
+ off = Expression[[esp, :+, s_off], :-, e_expr_.target].reduce
986
+ off.kind_of? Integer and off >= @size/8 and off < 10*@size/8 and (off % (@size/8)) == 0
987
+ } || list.first
988
+
989
+ e_expr = e_expr.rexpr if e_expr.kind_of? Expression and e_expr.op == :+ and not e_expr.lexpr
990
+ break bind unless e_expr.kind_of? Indirection
991
+
992
+ off = Expression[[esp, :+, s_off], :-, e_expr.target].reduce
993
+ if off.kind_of? Expression
994
+ bd = off.externals.grep(/^autostackoffset_/).inject({}) { |bd_, xt| bd_.update xt => @size/8 }
995
+ bd.delete s_off
996
+ if off.bind(bd).reduce == @size/8
997
+ # all __cdecl
998
+ off = @size/8
999
+ else
1000
+ # check if all calls are to the same extern func
1001
+ bd.delete_if { |k, v| k !~ /^autostackoffset_#{Expression[funcaddr]}_/ }
1002
+ bd.each_key { |k| bd[k] = 0 }
1003
+ if off.bind(bd).reduce.kind_of? Integer
1004
+ off = off.bind(bd).reduce / (bd.length + 1)
1005
+ end
1006
+ end
1007
+ end
1008
+ if off.kind_of? Integer
1009
+ if off < @size/8 or off > 20*@size/8 or (off % (@size/8)) != 0
1010
+ puts "autostackoffset: ignoring off #{off} for #{Expression[funcaddr]} from #{dasm.decoded[calladdr]}" if $VERBOSE
1011
+ off = :unknown
1012
+ end
1013
+ end
1014
+
1015
+ bind = bind.merge esp => Expression[esp, :+, off] if off != :unknown
1016
+ if funcaddr != :default
1017
+ if not off.kind_of? ::Integer
1018
+ #XXX we allow the current function to return, so we should handle the func backtracking its esp
1019
+ #(and other register that are saved and restored in epilog)
1020
+ puts "stackoff #{dasm.decoded[calladdr]} | #{Expression[func_start]} | #{expr} | #{e_expr} | #{off}" if dasm.debug_backtrace
1021
+ else
1022
+ puts "autostackoffset: found #{off} for #{Expression[funcaddr]} from #{dasm.decoded[calladdr]}" if $VERBOSE
1023
+ curfunc.btbind_callback = nil
1024
+ curfunc.backtrace_binding = bind
1025
+
1026
+ # rebacktrace the return address, so that other unknown funcs that depend on us are solved
1027
+ dasm.backtrace(Indirection[esp, @size/8, origin], origin, :origin => origin)
1028
+ end
1029
+ else
1030
+ if off.kind_of? ::Integer and dasm.decoded[calladdr]
1031
+ puts "autostackoffset: found #{off-@size/8} for #{dasm.decoded[calladdr]}" if $VERBOSE
1032
+ di = dasm.decoded[calladdr]
1033
+ di.comment.delete_if { |c| c =~ /^stackoff=/ } if di.comment
1034
+ di.add_comment "stackoff=#{off-@size/8}"
1035
+ @dasm_func_default_off[[dasm, calladdr]] = off
1036
+
1037
+ dasm.backtrace(Indirection[esp, @size/8, origin], origin, :origin => origin)
1038
+ elsif cachedoff = @dasm_func_default_off[[dasm, calladdr]]
1039
+ bind[esp] = Expression[esp, :+, cachedoff]
1040
+ elsif off.kind_of? ::Integer
1041
+ dasm.decoded[calladdr].add_comment "stackoff=#{off-@size/8}"
1042
+ end
1043
+
1044
+ puts "stackoff #{dasm.decoded[calladdr]} | #{Expression[func_start]} | #{expr} | #{e_expr} | #{off}" if dasm.debug_backtrace
1045
+ end
1046
+
1047
+ bind
1048
+ }
1049
+ end
1050
+
1051
+ # the :default backtracked_for callback
1052
+ # returns empty unless funcaddr is not default or calladdr is a call or a jmp
1053
+ def disassembler_default_btfor_callback
1054
+ lambda { |dasm, btfor, funcaddr, calladdr|
1055
+ if funcaddr != :default; btfor
1056
+ elsif di = dasm.decoded[calladdr] and (di.opcode.name == 'call' or di.opcode.name == 'jmp'); btfor
1057
+ else []
1058
+ end
1059
+ }
1060
+ end
1061
+
1062
+ # returns a DecodedFunction suitable for :default
1063
+ # uses disassembler_default_bt{for/bind}_callback
1064
+ def disassembler_default_func
1065
+ esp = register_symbols[4]
1066
+ cp = new_cparser
1067
+ cp.parse 'void stdfunc(void);'
1068
+ f = decode_c_function_prototype(cp, 'stdfunc', :default)
1069
+ f.backtrace_binding[esp] = Expression[esp, :+, :unknown]
1070
+ f.btbind_callback = disassembler_default_btbind_callback
1071
+ f.btfor_callback = disassembler_default_btfor_callback
1072
+ f
1073
+ end
1074
+
1075
+ # returns a hash { :retval => r, :changed => [] }
1076
+ def abi_funcall
1077
+ { :retval => register_symbols[0], :changed => register_symbols[0, 3] }
1078
+ end
1079
+
1080
+
1081
+ # computes the binding of the sequence of code starting at entry included
1082
+ # the binding is a hash showing the value of modified elements at the
1083
+ # end of the code sequence, relative to their value at entry
1084
+ # the elements are all the registers and the memory written to
1085
+ # if finish is nil, the binding will include :ip, which is the address
1086
+ # to be executed next (if it exists)
1087
+ # the binding will not include memory access from subfunctions
1088
+ # entry should be an entrypoint of the disassembler if finish is nil
1089
+ # the code sequence must have only one end, with no to_normal
1090
+ def code_binding(dasm, entry, finish=nil)
1091
+ entry = dasm.normalize(entry)
1092
+ finish = dasm.normalize(finish) if finish
1093
+ lastdi = nil
1094
+ binding = {}
1095
+ bt = lambda { |from, expr, inc_start|
1096
+ ret = dasm.backtrace(Expression[expr], from, :snapshot_addr => entry, :include_start => inc_start)
1097
+ ret.length == 1 ? ret.first : Expression::Unknown
1098
+ }
1099
+
1100
+ # walk blocks, search for finish, scan memory writes
1101
+ todo = [entry]
1102
+ done = [Expression::Unknown]
1103
+ while addr = todo.pop
1104
+ addr = dasm.normalize(addr)
1105
+ next if done.include? addr or addr == finish or not dasm.decoded[addr].kind_of? DecodedInstruction
1106
+ done << addr
1107
+ b = dasm.decoded[addr].block
1108
+
1109
+ next if b.list.find { |di|
1110
+ a = di.address
1111
+ if a == finish
1112
+ lastdi = b.list[b.list.index(di) - 1]
1113
+ true
1114
+ else
1115
+ # check writes from the instruction
1116
+ get_xrefs_w(dasm, di).each { |waddr, len|
1117
+ # we want the ptr expressed with reg values at entry
1118
+ ptr = bt[a, waddr, false]
1119
+ binding[Indirection[ptr, len, a]] = bt[a, Indirection[waddr, len, a], true]
1120
+ }
1121
+ false
1122
+ end
1123
+ }
1124
+
1125
+ hasnext = false
1126
+ b.each_to_samefunc(dasm) { |t|
1127
+ hasnext = true
1128
+ if t == finish
1129
+ lastdi = b.list.last
1130
+ else
1131
+ todo << t
1132
+ end
1133
+ }
1134
+
1135
+ # check end of sequence
1136
+ if not hasnext
1137
+ raise "two-ended code_binding #{lastdi} & #{b.list.last}" if lastdi
1138
+ lastdi = b.list.last
1139
+ if lastdi.opcode.props[:setip]
1140
+ e = get_xrefs_x(dasm, lastdi)
1141
+ raise 'bad code_binding ending' if e.to_a.length != 1 or not lastdi.opcode.props[:stopexec]
1142
+ binding[:ip] = bt[lastdi.address, e.first, false]
1143
+ elsif not lastdi.opcode.props[:stopexec]
1144
+ binding[:ip] = lastdi.next_addr
1145
+ end
1146
+ end
1147
+ end
1148
+ binding.delete_if { |k, v| Expression[k] == Expression[v] }
1149
+
1150
+ # add register binding
1151
+ raise "no code_binding end" if not lastdi and not finish
1152
+ register_symbols.each { |reg|
1153
+ val =
1154
+ if lastdi; bt[lastdi.address, reg, true]
1155
+ else bt[finish, reg, false]
1156
+ end
1157
+ next if val == Expression[reg]
1158
+ mask = 0xffff_ffff # dont use 1<<@size, because 16bit code may use e.g. edi (through opszoverride)
1159
+ mask = 0xffff_ffff_ffff_ffff if @size == 64
1160
+ val = Expression[val, :&, mask].reduce
1161
+ binding[reg] = Expression[val]
1162
+ }
1163
+
1164
+ binding
1165
+ end
1166
+ end
1167
+ end