metasm 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. data/BUGS +11 -0
  2. data/CREDITS +17 -0
  3. data/README +270 -0
  4. data/TODO +114 -0
  5. data/doc/code_organisation.txt +146 -0
  6. data/doc/const_missing.txt +16 -0
  7. data/doc/core_classes.txt +75 -0
  8. data/doc/feature_list.txt +53 -0
  9. data/doc/index.txt +59 -0
  10. data/doc/install_notes.txt +170 -0
  11. data/doc/style.css +3 -0
  12. data/doc/use_cases.txt +18 -0
  13. data/lib/metasm.rb +80 -0
  14. data/lib/metasm/arm.rb +12 -0
  15. data/lib/metasm/arm/debug.rb +39 -0
  16. data/lib/metasm/arm/decode.rb +167 -0
  17. data/lib/metasm/arm/encode.rb +77 -0
  18. data/lib/metasm/arm/main.rb +75 -0
  19. data/lib/metasm/arm/opcodes.rb +177 -0
  20. data/lib/metasm/arm/parse.rb +130 -0
  21. data/lib/metasm/arm/render.rb +55 -0
  22. data/lib/metasm/compile_c.rb +1457 -0
  23. data/lib/metasm/dalvik.rb +8 -0
  24. data/lib/metasm/dalvik/decode.rb +196 -0
  25. data/lib/metasm/dalvik/main.rb +60 -0
  26. data/lib/metasm/dalvik/opcodes.rb +366 -0
  27. data/lib/metasm/decode.rb +213 -0
  28. data/lib/metasm/decompile.rb +2659 -0
  29. data/lib/metasm/disassemble.rb +2068 -0
  30. data/lib/metasm/disassemble_api.rb +1280 -0
  31. data/lib/metasm/dynldr.rb +1329 -0
  32. data/lib/metasm/encode.rb +333 -0
  33. data/lib/metasm/exe_format/a_out.rb +194 -0
  34. data/lib/metasm/exe_format/autoexe.rb +82 -0
  35. data/lib/metasm/exe_format/bflt.rb +189 -0
  36. data/lib/metasm/exe_format/coff.rb +455 -0
  37. data/lib/metasm/exe_format/coff_decode.rb +901 -0
  38. data/lib/metasm/exe_format/coff_encode.rb +1078 -0
  39. data/lib/metasm/exe_format/dex.rb +457 -0
  40. data/lib/metasm/exe_format/dol.rb +145 -0
  41. data/lib/metasm/exe_format/elf.rb +923 -0
  42. data/lib/metasm/exe_format/elf_decode.rb +979 -0
  43. data/lib/metasm/exe_format/elf_encode.rb +1375 -0
  44. data/lib/metasm/exe_format/macho.rb +827 -0
  45. data/lib/metasm/exe_format/main.rb +228 -0
  46. data/lib/metasm/exe_format/mz.rb +164 -0
  47. data/lib/metasm/exe_format/nds.rb +172 -0
  48. data/lib/metasm/exe_format/pe.rb +437 -0
  49. data/lib/metasm/exe_format/serialstruct.rb +246 -0
  50. data/lib/metasm/exe_format/shellcode.rb +114 -0
  51. data/lib/metasm/exe_format/xcoff.rb +167 -0
  52. data/lib/metasm/gui.rb +23 -0
  53. data/lib/metasm/gui/cstruct.rb +373 -0
  54. data/lib/metasm/gui/dasm_coverage.rb +199 -0
  55. data/lib/metasm/gui/dasm_decomp.rb +369 -0
  56. data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
  57. data/lib/metasm/gui/dasm_graph.rb +1354 -0
  58. data/lib/metasm/gui/dasm_hex.rb +543 -0
  59. data/lib/metasm/gui/dasm_listing.rb +599 -0
  60. data/lib/metasm/gui/dasm_main.rb +906 -0
  61. data/lib/metasm/gui/dasm_opcodes.rb +291 -0
  62. data/lib/metasm/gui/debug.rb +1228 -0
  63. data/lib/metasm/gui/gtk.rb +884 -0
  64. data/lib/metasm/gui/qt.rb +495 -0
  65. data/lib/metasm/gui/win32.rb +3004 -0
  66. data/lib/metasm/gui/x11.rb +621 -0
  67. data/lib/metasm/ia32.rb +14 -0
  68. data/lib/metasm/ia32/compile_c.rb +1523 -0
  69. data/lib/metasm/ia32/debug.rb +193 -0
  70. data/lib/metasm/ia32/decode.rb +1167 -0
  71. data/lib/metasm/ia32/decompile.rb +564 -0
  72. data/lib/metasm/ia32/encode.rb +314 -0
  73. data/lib/metasm/ia32/main.rb +233 -0
  74. data/lib/metasm/ia32/opcodes.rb +872 -0
  75. data/lib/metasm/ia32/parse.rb +327 -0
  76. data/lib/metasm/ia32/render.rb +91 -0
  77. data/lib/metasm/main.rb +1193 -0
  78. data/lib/metasm/mips.rb +11 -0
  79. data/lib/metasm/mips/compile_c.rb +7 -0
  80. data/lib/metasm/mips/decode.rb +253 -0
  81. data/lib/metasm/mips/encode.rb +51 -0
  82. data/lib/metasm/mips/main.rb +72 -0
  83. data/lib/metasm/mips/opcodes.rb +443 -0
  84. data/lib/metasm/mips/parse.rb +51 -0
  85. data/lib/metasm/mips/render.rb +43 -0
  86. data/lib/metasm/os/gnu_exports.rb +270 -0
  87. data/lib/metasm/os/linux.rb +1112 -0
  88. data/lib/metasm/os/main.rb +1686 -0
  89. data/lib/metasm/os/remote.rb +527 -0
  90. data/lib/metasm/os/windows.rb +2027 -0
  91. data/lib/metasm/os/windows_exports.rb +745 -0
  92. data/lib/metasm/parse.rb +876 -0
  93. data/lib/metasm/parse_c.rb +3938 -0
  94. data/lib/metasm/pic16c/decode.rb +42 -0
  95. data/lib/metasm/pic16c/main.rb +17 -0
  96. data/lib/metasm/pic16c/opcodes.rb +68 -0
  97. data/lib/metasm/ppc.rb +11 -0
  98. data/lib/metasm/ppc/decode.rb +264 -0
  99. data/lib/metasm/ppc/decompile.rb +251 -0
  100. data/lib/metasm/ppc/encode.rb +51 -0
  101. data/lib/metasm/ppc/main.rb +129 -0
  102. data/lib/metasm/ppc/opcodes.rb +410 -0
  103. data/lib/metasm/ppc/parse.rb +52 -0
  104. data/lib/metasm/preprocessor.rb +1277 -0
  105. data/lib/metasm/render.rb +130 -0
  106. data/lib/metasm/sh4.rb +8 -0
  107. data/lib/metasm/sh4/decode.rb +336 -0
  108. data/lib/metasm/sh4/main.rb +292 -0
  109. data/lib/metasm/sh4/opcodes.rb +381 -0
  110. data/lib/metasm/x86_64.rb +12 -0
  111. data/lib/metasm/x86_64/compile_c.rb +1025 -0
  112. data/lib/metasm/x86_64/debug.rb +59 -0
  113. data/lib/metasm/x86_64/decode.rb +268 -0
  114. data/lib/metasm/x86_64/encode.rb +264 -0
  115. data/lib/metasm/x86_64/main.rb +135 -0
  116. data/lib/metasm/x86_64/opcodes.rb +118 -0
  117. data/lib/metasm/x86_64/parse.rb +68 -0
  118. data/misc/bottleneck.rb +61 -0
  119. data/misc/cheader-findpppath.rb +58 -0
  120. data/misc/hexdiff.rb +74 -0
  121. data/misc/hexdump.rb +55 -0
  122. data/misc/metasm-all.rb +13 -0
  123. data/misc/objdiff.rb +47 -0
  124. data/misc/objscan.rb +40 -0
  125. data/misc/pdfparse.rb +661 -0
  126. data/misc/ppc_pdf2oplist.rb +192 -0
  127. data/misc/tcp_proxy_hex.rb +84 -0
  128. data/misc/txt2html.rb +440 -0
  129. data/samples/a.out.rb +31 -0
  130. data/samples/asmsyntax.rb +77 -0
  131. data/samples/bindiff.rb +555 -0
  132. data/samples/compilation-steps.rb +49 -0
  133. data/samples/cparser_makestackoffset.rb +55 -0
  134. data/samples/dasm-backtrack.rb +38 -0
  135. data/samples/dasmnavig.rb +318 -0
  136. data/samples/dbg-apihook.rb +228 -0
  137. data/samples/dbghelp.rb +143 -0
  138. data/samples/disassemble-gui.rb +102 -0
  139. data/samples/disassemble.rb +133 -0
  140. data/samples/dump_upx.rb +95 -0
  141. data/samples/dynamic_ruby.rb +1929 -0
  142. data/samples/elf_list_needed.rb +46 -0
  143. data/samples/elf_listexports.rb +33 -0
  144. data/samples/elfencode.rb +25 -0
  145. data/samples/exeencode.rb +128 -0
  146. data/samples/factorize-headers-elfimports.rb +77 -0
  147. data/samples/factorize-headers-peimports.rb +109 -0
  148. data/samples/factorize-headers.rb +43 -0
  149. data/samples/gdbclient.rb +583 -0
  150. data/samples/generate_libsigs.rb +102 -0
  151. data/samples/hotfix_gtk_dbg.rb +59 -0
  152. data/samples/install_win_env.rb +78 -0
  153. data/samples/lindebug.rb +924 -0
  154. data/samples/linux_injectsyscall.rb +95 -0
  155. data/samples/machoencode.rb +31 -0
  156. data/samples/metasm-shell.rb +91 -0
  157. data/samples/pe-hook.rb +69 -0
  158. data/samples/pe-ia32-cpuid.rb +203 -0
  159. data/samples/pe-mips.rb +35 -0
  160. data/samples/pe-shutdown.rb +78 -0
  161. data/samples/pe-testrelocs.rb +51 -0
  162. data/samples/pe-testrsrc.rb +24 -0
  163. data/samples/pe_listexports.rb +31 -0
  164. data/samples/peencode.rb +19 -0
  165. data/samples/peldr.rb +494 -0
  166. data/samples/preprocess-flatten.rb +19 -0
  167. data/samples/r0trace.rb +308 -0
  168. data/samples/rubstop.rb +399 -0
  169. data/samples/scan_pt_gnu_stack.rb +54 -0
  170. data/samples/scanpeexports.rb +62 -0
  171. data/samples/shellcode-c.rb +40 -0
  172. data/samples/shellcode-dynlink.rb +146 -0
  173. data/samples/source.asm +34 -0
  174. data/samples/struct_offset.rb +47 -0
  175. data/samples/testpe.rb +32 -0
  176. data/samples/testraw.rb +45 -0
  177. data/samples/win32genloader.rb +132 -0
  178. data/samples/win32hooker-advanced.rb +169 -0
  179. data/samples/win32hooker.rb +96 -0
  180. data/samples/win32livedasm.rb +33 -0
  181. data/samples/win32remotescan.rb +133 -0
  182. data/samples/wintrace.rb +92 -0
  183. data/tests/all.rb +8 -0
  184. data/tests/dasm.rb +39 -0
  185. data/tests/dynldr.rb +35 -0
  186. data/tests/encodeddata.rb +132 -0
  187. data/tests/ia32.rb +82 -0
  188. data/tests/mips.rb +116 -0
  189. data/tests/parse_c.rb +239 -0
  190. data/tests/preprocessor.rb +269 -0
  191. data/tests/x86_64.rb +62 -0
  192. metadata +255 -0
@@ -0,0 +1,193 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/ia32/opcodes'
8
+
9
+ module Metasm
10
+ class Ia32
11
+ def dbg_register_pc
12
+ @dbg_register_pc ||= :eip
13
+ end
14
+ def dbg_register_sp
15
+ @dbg_register_sp ||= dbg_register_list[7]
16
+ end
17
+ def dbg_register_flags
18
+ @dbg_register_flags ||= :eflags
19
+ end
20
+
21
+ def dbg_register_list
22
+ @dbg_register_list ||= [:eax, :ebx, :ecx, :edx, :esi, :edi, :ebp, :esp, :eip]
23
+ end
24
+
25
+ def dbg_register_size
26
+ @dbg_register_size ||= Hash.new(32).update(:cs => 16, :ds => 16, :es => 16, :fs => 16, :gs => 16)
27
+ end
28
+
29
+ def dbg_flag_list
30
+ @dbg_flag_list ||= [:c, :p, :a, :z, :s, :i, :d, :o]
31
+ end
32
+
33
+ DBG_FLAGS = { :c => 0, :p => 2, :a => 4, :z => 6, :s => 7, :t => 8, :i => 9, :d => 10, :o => 11 }
34
+ def dbg_get_flag(dbg, f)
35
+ (dbg.get_reg_value(dbg_register_flags) >> DBG_FLAGS[f]) & 1
36
+ end
37
+ def dbg_set_flag(dbg, f)
38
+ fl = dbg.get_reg_value(dbg_register_flags)
39
+ fl |= 1 << DBG_FLAGS[f]
40
+ dbg.set_reg_value(dbg_register_flags, fl)
41
+ end
42
+ def dbg_unset_flag(dbg, f)
43
+ fl = dbg.get_reg_value(dbg_register_flags)
44
+ fl &= ~(1 << DBG_FLAGS[f])
45
+ dbg.set_reg_value(dbg_register_flags, fl)
46
+ end
47
+
48
+ def dbg_enable_singlestep(dbg)
49
+ dbg_set_flag(dbg, :t)
50
+ end
51
+ def dbg_disable_singlestep(dbg)
52
+ dbg_unset_flag(dbg, :t)
53
+ end
54
+
55
+ def dbg_enable_bp(dbg, bp)
56
+ case bp.type
57
+ when :bpx; dbg_enable_bpx( dbg, bp)
58
+ else dbg_enable_bphw(dbg, bp)
59
+ end
60
+ end
61
+
62
+ def dbg_disable_bp(dbg, bp)
63
+ case bp.type
64
+ when :bpx; dbg_disable_bpx( dbg, bp)
65
+ else dbg_disable_bphw(dbg, bp)
66
+ end
67
+ end
68
+
69
+ def dbg_enable_bpx(dbg, bp)
70
+ bp.internal[:previous] ||= dbg.memory[bp.address, 1]
71
+ dbg.memory[bp.address, 1] = "\xcc"
72
+ end
73
+
74
+ def dbg_disable_bpx(dbg, bp)
75
+ dbg.memory[bp.address, 1] = bp.internal[:previous]
76
+ end
77
+
78
+ # allocate a debug register for a hwbp by checking the list of hwbp existing in dbg
79
+ def dbg_alloc_bphw(dbg, bp)
80
+ if not bp.internal[:dr]
81
+ may = [0, 1, 2, 3]
82
+ dbg.breakpoint_thread.values.each { |bb| may.delete bb.internal[:dr] }
83
+ raise 'alloc_bphw: no free debugregister' if may.empty?
84
+ bp.internal[:dr] = may.first
85
+ end
86
+ bp.internal[:type] ||= :x
87
+ bp.internal[:len] ||= 1
88
+ bp.internal[:dr]
89
+ end
90
+
91
+ def dbg_enable_bphw(dbg, bp)
92
+ nr = dbg_alloc_bphw(dbg, bp)
93
+ dr7 = dbg[:dr7]
94
+ l = { 1 => 0, 2 => 1, 4 => 3, 8 => 2 }[bp.internal[:len]]
95
+ rw = { :x => 0, :w => 1, :r => 3 }[bp.internal[:type]]
96
+ raise "enable_bphw: invalid breakpoint #{bp.inspect}" if not l or not rw
97
+ dr7 &= ~((15 << (16+4*nr)) | (3 << (2*nr))) # clear
98
+ dr7 |= ((l << 2) | rw) << (16+4*nr) # set drN len/rw
99
+ dr7 |= 3 << (2*nr) # enable global/local drN
100
+
101
+ dbg["dr#{nr}"] = bp.address
102
+ dbg[:dr7] = dr7
103
+ end
104
+
105
+ def dbg_disable_bphw(dbg, bp)
106
+ nr = bp.internal[:dr]
107
+ dr7 = dbg[:dr7]
108
+ dr7 &= ~(3 << (2*nr))
109
+ dbg[:dr7] = dr7
110
+ end
111
+
112
+ def dbg_check_pre_run(dbg)
113
+ if dbg[:dr6] == 0 and dbg[:dr7] == 0
114
+ dbg[:dr7] = 0x10000 # some OS (eg Windows) only return dr6 if dr7 != 0
115
+ end
116
+ dbg[:dr6] = 0
117
+ end
118
+
119
+ def dbg_evt_bpx(dbg, b)
120
+ if b.address == dbg.pc-1
121
+ dbg.pc -= 1
122
+ end
123
+ end
124
+
125
+ def dbg_find_bpx(dbg)
126
+ return if dbg[:dr6] & 0x4000 != 0
127
+ pc = dbg.pc
128
+ dbg.breakpoint[pc-1] || dbg.breakpoint[pc]
129
+ end
130
+
131
+ def dbg_find_hwbp(dbg)
132
+ dr6 = dbg[:dr6]
133
+ return if dr6 & 0xf == 0
134
+ dn = (0..3).find { |n| dr6 & (1 << n) }
135
+ dbg.breakpoint_thread.values.find { |b| b.internal[:dr] == dn }
136
+ end
137
+
138
+ def dbg_need_stepover(dbg, addr, di)
139
+ di and ((di.instruction.prefix and di.instruction.prefix[:rep]) or di.opcode.props[:saveip])
140
+ end
141
+
142
+ def dbg_end_stepout(dbg, addr, di)
143
+ di and di.opcode.name == 'ret'
144
+ end
145
+
146
+ # return (yield) a list of [addr, symbolic name]
147
+ def dbg_stacktrace(dbg, rec=500)
148
+ ret = []
149
+ s = dbg.addrname!(dbg.pc)
150
+ yield(dbg.pc, s) if block_given?
151
+ ret << [dbg.pc, s]
152
+ fp = dbg.get_reg_value(dbg_register_list[6])
153
+ stack = dbg.get_reg_value(dbg_register_list[7]) - 8
154
+ while fp > stack and fp <= stack+0x10000 and rec != 0
155
+ rec -= 1
156
+ ra = dbg.resolve_expr Indirection[fp+4, 4]
157
+ s = dbg.addrname!(ra)
158
+ yield(ra, s) if block_given?
159
+ ret << [ra, s]
160
+ stack = fp # ensure we walk the stack upwards
161
+ fp = dbg.resolve_expr Indirection[fp, 4]
162
+ end
163
+ ret
164
+ end
165
+
166
+ # retrieve the current function return value
167
+ # only valid at function exit
168
+ def dbg_func_retval(dbg)
169
+ dbg.get_reg_value(dbg_register_list[0])
170
+ end
171
+ def dbg_func_retval_set(dbg, val)
172
+ dbg.set_reg_value(dbg_register_list[0], val)
173
+ end
174
+
175
+ # retrieve the current function return address
176
+ # to be called only on entry of the subfunction
177
+ def dbg_func_retaddr(dbg)
178
+ dbg.memory_read_int(dbg_register_list[7])
179
+ end
180
+ def dbg_func_retaddr_set(dbg, ret)
181
+ dbg.memory_write_int(dbg_register_list[7], ret)
182
+ end
183
+
184
+ # retrieve the current function arguments
185
+ # only valid at function entry (eg right after the call)
186
+ def dbg_func_arg(dbg, argnr)
187
+ dbg.memory_read_int(Expression[:esp, :+, 4*(argnr+1)])
188
+ end
189
+ def dbg_func_arg_set(dbg, argnr, arg)
190
+ dbg.memory_write_int(Expression[:esp, :+, 4*(argnr+1)], arg)
191
+ end
192
+ end
193
+ end
@@ -0,0 +1,1167 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/ia32/opcodes'
8
+ require 'metasm/decode'
9
+
10
+ module Metasm
11
+ class Ia32
12
+ class ModRM
13
+ def self.decode(edata, byte, endianness, adsz, opsz, seg=nil, regclass=Reg)
14
+ m = (byte >> 6) & 3
15
+ rm = byte & 7
16
+
17
+ if m == 3
18
+ return regclass.new(rm, opsz)
19
+ end
20
+
21
+ sum = Sum[adsz][m][rm]
22
+
23
+ s, i, b, imm = nil
24
+ sum.each { |a|
25
+ case a
26
+ when Integer
27
+ if not b
28
+ b = Reg.new(a, adsz)
29
+ else
30
+ s = 1
31
+ i = Reg.new(a, adsz)
32
+ end
33
+
34
+ when :sib
35
+ sib = edata.get_byte.to_i
36
+
37
+ ii = ((sib >> 3) & 7)
38
+ if ii != 4
39
+ s = 1 << ((sib >> 6) & 3)
40
+ i = Reg.new(ii, adsz)
41
+ end
42
+
43
+ bb = sib & 7
44
+ if bb == 5 and m == 0
45
+ imm = Expression[edata.decode_imm("i#{adsz}".to_sym, endianness)]
46
+ else
47
+ b = Reg.new(bb, adsz)
48
+ end
49
+
50
+ when :i8, :i16, :i32
51
+ imm = Expression[edata.decode_imm(a, endianness)]
52
+ end
53
+ }
54
+
55
+ if imm and imm.reduce.kind_of? Integer and imm.reduce < -0x10_0000
56
+ # probably a base address -> unsigned
57
+ imm = Expression[imm.reduce & ((1 << (adsz || 32)) - 1)]
58
+ end
59
+
60
+ new adsz, opsz, s, i, b, imm, seg
61
+ end
62
+ end
63
+
64
+ class Farptr
65
+ def self.decode(edata, endianness, adsz)
66
+ addr = Expression[edata.decode_imm("u#{adsz}".to_sym, endianness)]
67
+ seg = Expression[edata.decode_imm(:u16, endianness)]
68
+ new seg, addr
69
+ end
70
+ end
71
+
72
+ def build_opcode_bin_mask(op)
73
+ # bit = 0 if can be mutated by an field value, 1 if fixed by opcode
74
+ op.bin_mask = Array.new(op.bin.length, 0)
75
+ op.fields.each { |f, (oct, off)|
76
+ op.bin_mask[oct] |= (@fields_mask[f] << off)
77
+ }
78
+ op.bin_mask.map! { |v| 255 ^ v }
79
+ end
80
+
81
+ def build_bin_lookaside
82
+ # sets up a hash byte value => list of opcodes that may match
83
+ # opcode.bin_mask is built here
84
+ lookaside = Array.new(256) { [] }
85
+ opcode_list.each { |op|
86
+
87
+ build_opcode_bin_mask op
88
+
89
+ b = op.bin[0]
90
+ msk = op.bin_mask[0]
91
+
92
+ for i in b..(b | (255^msk))
93
+ next if i & msk != b & msk
94
+ lookaside[i] << op
95
+ end
96
+ }
97
+ lookaside
98
+ end
99
+
100
+ def decode_prefix(instr, byte)
101
+ instr.prefix ||= {}
102
+ (instr.prefix[:list] ||= []) << byte
103
+
104
+ # XXX actual limit = 15-instr.length
105
+ return false if instr.prefix[:list].length >= 15
106
+
107
+ case byte
108
+ when 0x66; instr.prefix[:opsz] = true
109
+ when 0x67; instr.prefix[:adsz] = true
110
+ when 0xF0; instr.prefix[:lock] = true
111
+ when 0xF2; instr.prefix[:rep] = :nz
112
+ when 0xF3; instr.prefix[:rep] = :z # postprocessed by decode_instr
113
+ when 0x26, 0x2E, 0x36, 0x3E, 0x64, 0x65
114
+ if byte & 0x40 == 0
115
+ v = (byte >> 3) & 3
116
+ else
117
+ v = byte & 7
118
+ end
119
+ instr.prefix[:seg] = SegReg.new(v)
120
+
121
+ instr.prefix[:jmphint] = ((byte & 0x10) == 0x10)
122
+ else
123
+ return false
124
+ end
125
+ true
126
+ end
127
+
128
+ # tries to find the opcode encoded at edata.ptr
129
+ # if no match, tries to match a prefix (update di.instruction.prefix)
130
+ # on match, edata.ptr points to the first byte of the opcode (after prefixes)
131
+ def decode_findopcode(edata)
132
+ di = DecodedInstruction.new self
133
+ while edata.ptr < edata.data.length
134
+ pfx = di.instruction.prefix || {}
135
+ byte = edata.data[edata.ptr]
136
+ byte = byte.unpack('C').first if byte.kind_of? ::String # 1.9
137
+ return di if di.opcode = @bin_lookaside[byte].find { |op|
138
+ # fetch the relevant bytes from edata
139
+ bseq = edata.data[edata.ptr, op.bin.length].unpack('C*')
140
+ di.opcode = op if op.props[:opsz] # needed by opsz(di)
141
+
142
+ # check against full opcode mask
143
+ op.bin.zip(bseq, op.bin_mask).all? { |b1, b2, m| b2 and ((b1 & m) == (b2 & m)) } and
144
+ # check special cases
145
+ !(
146
+ # fail if any of those is true
147
+ (fld = op.fields[:seg2A] and (bseq[fld[0]] >> fld[1]) & @fields_mask[:seg2A] == 1) or
148
+ (fld = op.fields[:seg3A] and (bseq[fld[0]] >> fld[1]) & @fields_mask[:seg3A] < 4) or
149
+ (fld = op.fields[:seg3A] || op.fields[:seg3] and (bseq[fld[0]] >> fld[1]) & @fields_mask[:seg3] > 5) or
150
+ (fld = op.fields[:modrmA] and (bseq[fld[0]] >> fld[1]) & 0xC0 == 0xC0) or
151
+ (sz = op.props[:opsz] and opsz(di) != sz) or
152
+ (ndpfx = op.props[:needpfx] and not pfx[:list].to_a.include? ndpfx) or
153
+ # return non-ambiguous opcode (eg push.i16 in 32bit mode) / sync with addop_post in opcode.rb
154
+ (pfx[:opsz] and (op.args == [:i] or op.args == [:farptr] or op.name[0, 3] == 'ret') and not op.props[:opsz]) or
155
+ (pfx[:adsz] and op.props[:adsz] and op.props[:adsz] == @size)
156
+ )
157
+ }
158
+
159
+ break if not decode_prefix(di.instruction, edata.get_byte)
160
+ di.bin_length += 1
161
+ end
162
+ end
163
+
164
+ def decode_instr_op(edata, di)
165
+ before_ptr = edata.ptr
166
+ op = di.opcode
167
+ di.instruction.opname = op.name
168
+ bseq = edata.read(op.bin.length).unpack('C*') # decode_findopcode ensures that data >= op.length
169
+ pfx = di.instruction.prefix || {}
170
+
171
+ case op.props[:needpfx]
172
+ when 0x66; pfx.delete :opsz
173
+ when 0x67; pfx.delete :adsz
174
+ when 0xF2, 0xF3; pfx.delete :rep
175
+ end
176
+
177
+ field_val = lambda { |f|
178
+ if fld = op.fields[f]
179
+ (bseq[fld[0]] >> fld[1]) & @fields_mask[f]
180
+ end
181
+ }
182
+
183
+ opsz = opsz(di)
184
+
185
+ if pfx[:adsz]
186
+ adsz = 48 - @size
187
+ else
188
+ adsz = @size
189
+ end
190
+
191
+ mmxsz = ((op.props[:xmmx] && pfx[:opsz]) ? 128 : 64)
192
+ op.args.each { |a|
193
+ di.instruction.args << case a
194
+ when :reg; Reg.new field_val[a], opsz
195
+ when :eeec; CtrlReg.new field_val[a]
196
+ when :eeed; DbgReg.new field_val[a]
197
+ when :seg2, :seg2A, :seg3, :seg3A; SegReg.new field_val[a]
198
+ when :regfp; FpReg.new field_val[a]
199
+ when :regmmx; SimdReg.new field_val[a], mmxsz
200
+ when :regxmm; SimdReg.new field_val[a], 128
201
+
202
+ when :farptr; Farptr.decode edata, @endianness, opsz
203
+ when :i8, :u8, :u16; Expression[edata.decode_imm(a, @endianness)]
204
+ when :i; Expression[edata.decode_imm("#{op.props[:unsigned_imm] ? 'a' : 'i'}#{opsz}".to_sym, @endianness)]
205
+
206
+ when :mrm_imm; ModRM.decode edata, (adsz == 16 ? 6 : 5), @endianness, adsz, opsz, pfx[:seg]
207
+ when :modrm, :modrmA; ModRM.decode edata, field_val[a], @endianness, adsz, opsz, pfx[:seg]
208
+ when :modrmmmx; ModRM.decode edata, field_val[:modrm], @endianness, adsz, mmxsz, pfx[:seg], SimdReg
209
+ when :modrmxmm; ModRM.decode edata, field_val[:modrm], @endianness, adsz, 128, pfx[:seg], SimdReg
210
+
211
+ when :imm_val1; Expression[1]
212
+ when :imm_val3; Expression[3]
213
+ when :reg_cl; Reg.new 1, 8
214
+ when :reg_eax; Reg.new 0, opsz
215
+ when :reg_dx; Reg.new 2, 16
216
+ when :regfp0; FpReg.new nil
217
+ else raise SyntaxError, "Internal error: invalid argument #{a} in #{op.name}"
218
+ end
219
+ }
220
+
221
+ di.bin_length += edata.ptr - before_ptr
222
+
223
+ if op.name == 'movsx' or op.name == 'movzx'
224
+ if di.opcode.props[:argsz] == 8
225
+ di.instruction.args[1].sz = 8
226
+ else
227
+ di.instruction.args[1].sz = 16
228
+ end
229
+ if pfx[:opsz]
230
+ di.instruction.args[0].sz = 48-@size
231
+ else
232
+ di.instruction.args[0].sz = @size
233
+ end
234
+ end
235
+
236
+ pfx.delete :seg
237
+ case pfx.delete(:rep)
238
+ when :nz
239
+ if di.opcode.props[:strop]
240
+ pfx[:rep] = 'rep'
241
+ elsif di.opcode.props[:stropz]
242
+ pfx[:rep] = 'repnz'
243
+ end
244
+ when :z
245
+ if di.opcode.props[:strop]
246
+ pfx[:rep] = 'rep'
247
+ elsif di.opcode.props[:stropz]
248
+ pfx[:rep] = 'repz'
249
+ end
250
+ end
251
+
252
+ di
253
+ end
254
+
255
+ # converts relative jump/call offsets to absolute addresses
256
+ # adds the eip delta to the offset +off+ of the instruction (may be an Expression) + its bin_length
257
+ # do not call twice on the same di !
258
+ def decode_instr_interpret(di, addr)
259
+ if di.opcode.props[:setip] and di.instruction.args.last.kind_of? Expression and di.instruction.opname[0, 3] != 'ret'
260
+ delta = di.instruction.args.last.reduce
261
+ arg = Expression[[addr, :+, di.bin_length], :+, delta].reduce
262
+ di.instruction.args[-1] = Expression[arg]
263
+ end
264
+
265
+ di
266
+ end
267
+
268
+ # return the list of registers as symbols in the order used by pushad
269
+ # for use in backtrace and stuff, for compatibility with x64
270
+ # esp is [4]
271
+ REG_SYMS = [:eax, :ecx, :edx, :ebx, :esp, :ebp, :esi, :edi]
272
+ def register_symbols
273
+ REG_SYMS
274
+ end
275
+
276
+ # interprets a condition code (in an opcode name) as an expression involving backtracked eflags
277
+ # eflag_p is never computed, and this returns Expression::Unknown for this flag
278
+ # ex: 'z' => Expression[:eflag_z]
279
+ def decode_cc_to_expr(cc)
280
+ case cc
281
+ when 'o'; Expression[:eflag_o]
282
+ when 'no'; Expression[:'!', :eflag_o]
283
+ when 'b', 'nae', 'c'; Expression[:eflag_c]
284
+ when 'nb', 'ae', 'nc'; Expression[:'!', :eflag_c]
285
+ when 'z', 'e'; Expression[:eflag_z]
286
+ when 'nz', 'ne'; Expression[:'!', :eflag_z]
287
+ when 'be', 'na'; Expression[:eflag_c, :|, :eflag_z]
288
+ when 'nbe', 'a'; Expression[:'!', [:eflag_c, :|, :eflag_z]]
289
+ when 's'; Expression[:eflag_s]
290
+ when 'ns'; Expression[:'!', :eflag_s]
291
+ when 'p', 'pe'; Expression::Unknown
292
+ when 'np', 'po'; Expression::Unknown
293
+ when 'l', 'nge'; Expression[:eflag_s, :'!=', :eflag_o]
294
+ when 'nl', 'ge'; Expression[:eflag_s, :==, :eflag_o]
295
+ when 'le', 'ng'; Expression[[:eflag_s, :'!=', :eflag_o], :|, :eflag_z]
296
+ when 'nle', 'g'; Expression[[:eflag_s, :==, :eflag_o], :&, :eflag_z]
297
+ when 'ecxz'; Expression[:'!', register_symbols[1]]
298
+ when 'cxz'; Expression[:'!', [register_symbols[1], :&, 0xffff]]
299
+ end
300
+ end
301
+
302
+ # hash opcode_name => lambda { |dasm, di, *symbolic_args| instr_binding }
303
+ def backtrace_binding
304
+ @backtrace_binding ||= init_backtrace_binding
305
+ end
306
+ def backtrace_binding=(b) @backtrace_binding = b end
307
+
308
+ def opsz(di)
309
+ ret = @size
310
+ ret = di.opcode.props[:argsz] if di and di.opcode.props[:argsz]
311
+ ret = 48 - ret if di and not di.opcode.props[:argsz] and di.instruction.prefix and di.instruction.prefix[:opsz]
312
+ ret
313
+ end
314
+
315
+ # populate the @backtrace_binding hash with default values
316
+ def init_backtrace_binding
317
+ @backtrace_binding ||= {}
318
+
319
+ eax, ecx, edx, ebx, esp, ebp, esi, edi = register_symbols
320
+
321
+ mask = lambda { |di| (1 << opsz(di))-1 } # 32bits => 0xffff_ffff
322
+ sign = lambda { |v, di| Expression[[[v, :&, mask[di]], :>>, opsz(di)-1], :'!=', 0] }
323
+
324
+ opcode_list.map { |ol| ol.basename }.uniq.sort.each { |op|
325
+ binding = case op
326
+ when 'mov', 'movsx', 'movzx', 'movsxd', 'movd', 'movq'; lambda { |di, a0, a1| { a0 => Expression[a1] } }
327
+ when 'lea'; lambda { |di, a0, a1| { a0 => a1.target } }
328
+ when 'xchg'; lambda { |di, a0, a1| { a0 => Expression[a1], a1 => Expression[a0] } }
329
+ when 'add', 'sub', 'or', 'xor', 'and', 'pxor', 'adc', 'sbb'
330
+ lambda { |di, a0, a1|
331
+ e_op = { 'add' => :+, 'sub' => :-, 'or' => :|, 'and' => :&, 'xor' => :^, 'pxor' => :^, 'adc' => :+, 'sbb' => :- }[op]
332
+ ret = Expression[a0, e_op, a1]
333
+ ret = Expression[ret, e_op, :eflag_c] if op == 'adc' or op == 'sbb'
334
+ # optimises eax ^ eax => 0
335
+ # avoid hiding memory accesses (to not hide possible fault)
336
+ ret = Expression[ret.reduce] if not a0.kind_of? Indirection
337
+ { a0 => ret }
338
+ }
339
+ when 'xadd'; lambda { |di, a0, a1| { a0 => Expression[a0, :+, a1], a1 => Expression[a0] } }
340
+ when 'inc'; lambda { |di, a0| { a0 => Expression[a0, :+, 1] } }
341
+ when 'dec'; lambda { |di, a0| { a0 => Expression[a0, :-, 1] } }
342
+ when 'not'; lambda { |di, a0| { a0 => Expression[a0, :^, mask[di]] } }
343
+ when 'neg'; lambda { |di, a0| { a0 => Expression[:-, a0] } }
344
+ when 'rol', 'ror'
345
+ lambda { |di, a0, a1|
346
+ e_op = (op[2] == ?r ? :>> : :<<)
347
+ inv_op = {:<< => :>>, :>> => :<< }[e_op]
348
+ sz = [a1, :%, opsz(di)]
349
+ isz = [[opsz(di), :-, a1], :%, opsz(di)]
350
+ # ror a, b => (a >> b) | (a << (32-b))
351
+ { a0 => Expression[[[a0, e_op, sz], :|, [a0, inv_op, isz]], :&, mask[di]] }
352
+ }
353
+ when 'sar', 'shl', 'sal'; lambda { |di, a0, a1| { a0 => Expression[a0, (op[-1] == ?r ? :>> : :<<), [a1, :%, [opsz(di), 32].max]] } }
354
+ when 'shr'; lambda { |di, a0, a1| { a0 => Expression[[a0, :&, mask[di]], :>>, [a1, :%, opsz(di)]] } }
355
+ when 'cwd', 'cdq', 'cqo'; lambda { |di| { Expression[edx, :&, mask[di]] => Expression[mask[di], :*, sign[eax, di]] } }
356
+ when 'cbw', 'cwde', 'cdqe'; lambda { |di|
357
+ o2 = opsz(di)/2 ; m2 = (1 << o2) - 1
358
+ { Expression[eax, :&, mask[di]] => Expression[[eax, :&, m2], :|, [m2 << o2, :*, [[eax, :>>, o2-1], :&, 1]]] } }
359
+ when 'push'
360
+ lambda { |di, a0| { esp => Expression[esp, :-, opsz(di)/8],
361
+ Indirection[esp, opsz(di)/8, di.address] => Expression[a0] } }
362
+ when 'pop'
363
+ lambda { |di, a0| { esp => Expression[esp, :+, opsz(di)/8],
364
+ a0 => Indirection[esp, opsz(di)/8, di.address] } }
365
+ when 'pushfd'
366
+ # TODO Unknown per bit
367
+ lambda { |di|
368
+ efl = Expression[0x202]
369
+ bts = lambda { |pos, v| efl = Expression[efl, :|, [[v, :&, 1], :<<, pos]] }
370
+ bts[0, :eflag_c]
371
+ bts[6, :eflag_z]
372
+ bts[7, :eflag_s]
373
+ bts[11, :eflag_o]
374
+ { esp => Expression[esp, :-, opsz(di)/8], Indirection[esp, opsz(di)/8, di.address] => efl }
375
+ }
376
+ when 'popfd'
377
+ lambda { |di| bt = lambda { |pos| Expression[[Indirection[esp, opsz(di)/8, di.address], :>>, pos], :&, 1] }
378
+ { esp => Expression[esp, :+, opsz(di)/8], :eflag_c => bt[0], :eflag_z => bt[6], :eflag_s => bt[7], :eflag_o => bt[11] } }
379
+ when 'sahf'
380
+ lambda { |di| bt = lambda { |pos| Expression[[eax, :>>, pos], :&, 1] }
381
+ { :eflag_c => bt[0], :eflag_z => bt[6], :eflag_s => bt[7] } }
382
+ when 'lahf'
383
+ lambda { |di|
384
+ efl = Expression[2]
385
+ bts = lambda { |pos, v| efl = Expression[efl, :|, [[v, :&, 1], :<<, pos]] }
386
+ bts[0, :eflag_c] #bts[2, :eflag_p] #bts[4, :eflag_a]
387
+ bts[6, :eflag_z]
388
+ bts[7, :eflag_s]
389
+ { eax => efl }
390
+ }
391
+ when 'pushad'
392
+ lambda { |di|
393
+ ret = {}
394
+ st_off = 0
395
+ register_symbols.reverse_each { |r|
396
+ ret[Indirection[Expression[esp, :+, st_off].reduce, opsz(di)/8, di.address]] = Expression[r]
397
+ st_off += opsz(di)/8
398
+ }
399
+ ret[esp] = Expression[esp, :-, st_off]
400
+ ret
401
+ }
402
+ when 'popad'
403
+ lambda { |di|
404
+ ret = {}
405
+ st_off = 0
406
+ register_symbols.reverse_each { |r|
407
+ ret[r] = Indirection[Expression[esp, :+, st_off].reduce, opsz(di)/8, di.address]
408
+ st_off += opsz(di)/8
409
+ }
410
+ ret[esp] = Expression[esp, :+, st_off] # esp is not popped
411
+ ret
412
+ }
413
+ when 'call'
414
+ lambda { |di, a0| { esp => Expression[esp, :-, opsz(di)/8],
415
+ Indirection[esp, opsz(di)/8, di.address] => Expression[di.next_addr] } }
416
+ when 'ret'; lambda { |di, *a| { esp => Expression[esp, :+, [opsz(di)/8, :+, a[0] || 0]] } }
417
+ when 'loop', 'loopz', 'loopnz'; lambda { |di, a0| { ecx => Expression[ecx, :-, 1] } }
418
+ when 'enter'
419
+ lambda { |di, a0, a1|
420
+ sz = opsz(di)/8
421
+ depth = a1.reduce % 32
422
+ b = { Indirection[ebp, sz, di.address] => Expression[ebp],
423
+ Indirection[[esp, :+, a0.reduce+sz*depth], sz, di.address] => Expression[ebp],
424
+ ebp => Expression[esp, :-, sz],
425
+ esp => Expression[esp, :-, a0.reduce+sz*depth+sz] }
426
+ (1..depth).each { |i|
427
+ b[Indirection[[esp, :+, a0.reduce+i*sz], sz, di.address]] =
428
+ b[Indirection[[ebp, :-, i*sz], sz, di.address]] =
429
+ Expression::Unknown # TODO Indirection[[ebp, :-, i*sz], sz, di.address]
430
+ }
431
+ b
432
+ }
433
+ when 'leave'; lambda { |di| { ebp => Indirection[[ebp], opsz(di)/8, di.address], esp => Expression[ebp, :+, opsz(di)/8] } }
434
+ when 'aaa'; lambda { |di| { eax => Expression::Unknown, :incomplete_binding => Expression[1] } }
435
+ when 'imul'
436
+ lambda { |di, *a|
437
+ # 1 operand form == same as 'mul' (ax:dx stuff)
438
+ next { eax => Expression::Unknown, edx => Expression::Unknown, :incomplete_binding => Expression[1] } if not a[1]
439
+
440
+ if a[2]; e = Expression[a[1], :*, a[2]]
441
+ else e = Expression[[a[0], :*, a[1]], :&, (1 << (di.instruction.args.first.sz || opsz(di))) - 1]
442
+ end
443
+ { a[0] => e }
444
+ }
445
+ when 'mul', 'div', 'idiv'; lambda { |di, *a| { eax => Expression::Unknown, edx => Expression::Unknown, :incomplete_binding => Expression[1] } }
446
+ when 'rdtsc'; lambda { |di| { eax => Expression::Unknown, edx => Expression::Unknown, :incomplete_binding => Expression[1] } }
447
+ when /^(stos|movs|lods|scas|cmps)[bwd]$/
448
+ lambda { |di|
449
+ op =~ /^(stos|movs|lods|scas|cmps)([bwd])$/
450
+ e_op = $1
451
+ sz = { 'b' => 1, 'w' => 2, 'd' => 4 }[$2]
452
+ eax_ = Reg.new(0, 8*sz).symbolic
453
+ dir = :+
454
+ if di.block and (di.block.list.find { |ddi| ddi.opcode.name == 'std' } rescue nil)
455
+ dir = :-
456
+ end
457
+ pesi = Indirection[esi, sz, di.address]
458
+ pedi = Indirection[edi, sz, di.address]
459
+ pfx = di.instruction.prefix || {}
460
+ bd =
461
+ case e_op
462
+ when 'movs'
463
+ case pfx[:rep]
464
+ when nil; { pedi => pesi, esi => Expression[esi, dir, sz], edi => Expression[edi, dir, sz] }
465
+ else { pedi => pesi, esi => Expression[esi, dir, [sz ,:*, ecx]], edi => Expression[edi, dir, [sz, :*, ecx]], ecx => 0 }
466
+ end
467
+ when 'stos'
468
+ case pfx[:rep]
469
+ when nil; { pedi => Expression[eax_], edi => Expression[edi, dir, sz] }
470
+ else { pedi => Expression[eax_], edi => Expression[edi, dir, [sz, :*, ecx]], ecx => 0 }
471
+ end
472
+ when 'lods'
473
+ case pfx[:rep]
474
+ when nil; { eax_ => pesi, esi => Expression[esi, dir, sz] }
475
+ else { eax_ => Indirection[[esi, dir, [sz, :*, [ecx, :-, 1]]], sz, di.address], esi => Expression[esi, dir, [sz, :*, ecx]], ecx => 0 }
476
+ end
477
+ when 'scas'
478
+ case pfx[:rep]
479
+ when nil; { edi => Expression[edi, dir, sz] }
480
+ else { edi => Expression::Unknown, ecx => Expression::Unknown }
481
+ end
482
+ when 'cmps'
483
+ case pfx[:rep]
484
+ when nil; { edi => Expression[edi, dir, sz], esi => Expression[esi, dir, sz] }
485
+ else { edi => Expression::Unknown, esi => Expression::Unknown, ecx => Expression::Unknown }
486
+ end
487
+ end
488
+ bd[:incomplete_binding] = Expression[1] if pfx[:rep]
489
+ bd
490
+ }
491
+ when 'clc'; lambda { |di| { :eflag_c => Expression[0] } }
492
+ when 'stc'; lambda { |di| { :eflag_c => Expression[1] } }
493
+ when 'cmc'; lambda { |di| { :eflag_c => Expression[:'!', :eflag_c] } }
494
+ when 'cld'; lambda { |di| { :eflag_d => Expression[0] } }
495
+ when 'std'; lambda { |di| { :eflag_d => Expression[1] } }
496
+ when 'setalc'; lambda { |di| { Reg.new(0, 8).symbolic => Expression[:eflag_c, :*, 0xff] } }
497
+ when /^set/; lambda { |di, *a| { a[0] => Expression[decode_cc_to_expr(op[/^set(.*)/, 1])] } }
498
+ when /^cmov/; lambda { |di, *a| fl = decode_cc_to_expr(op[/^cmov(.*)/, 1]) ; { a[0] => Expression[[fl, :*, a[1]], :|, [[1, :-, fl], :*, a[0]]] } }
499
+ when /^j/
500
+ lambda { |di, a0|
501
+ ret = { 'dummy_metasm_0' => Expression[a0] } # mark modr/m as read
502
+ if fl = decode_cc_to_expr(op[/^j(.*)/, 1]) and fl != Expression::Unknown
503
+ ret['dummy_metasm_1'] = fl # mark eflags as read
504
+ end
505
+ ret
506
+ }
507
+ when 'fstenv', 'fnstenv'
508
+ lambda { |di, a0|
509
+ # stores the address of the last non-control fpu instr run
510
+ lastfpuinstr = di.block.list[0...di.block.list.index(di)].reverse.find { |pdi|
511
+ case pdi.opcode.name
512
+ when /fn?init|fn?clex|fldcw|fn?st[cs]w|fn?stenv|fldenv|fn?save|frstor|f?wait/
513
+ when /^f/; true
514
+ end
515
+ } if di.block
516
+ lastfpuinstr = lastfpuinstr.address if lastfpuinstr
517
+ ret = {}
518
+ save_at = lambda { |off, val| ret[Indirection[a0.target + off, 4, di.address]] = val }
519
+ save_at[0, Expression::Unknown]
520
+ save_at[4, Expression::Unknown]
521
+ save_at[8, Expression::Unknown]
522
+ save_at[12, lastfpuinstr || Expression::Unknown]
523
+ save_at[16, Expression::Unknown]
524
+ save_at[20, Expression::Unknown]
525
+ save_at[24, Expression::Unknown]
526
+ ret
527
+ }
528
+ when 'bt'; lambda { |di, a0, a1| { :eflag_c => Expression[[a0, :>>, [a1, :%, opsz(di)]], :&, 1] } }
529
+ when 'bts'; lambda { |di, a0, a1| { :eflag_c => Expression[[a0, :>>, [a1, :%, opsz(di)]], :&, 1],
530
+ a0 => Expression[a0, :|, [1, :<<, [a1, :%, opsz(di)]]] } }
531
+ when 'btr'; lambda { |di, a0, a1| { :eflag_c => Expression[[a0, :>>, [a1, :%, opsz(di)]], :&, 1],
532
+ a0 => Expression[a0, :&, [[1, :<<, [a1, :%, opsz(di)]], :^, mask[di]]] } }
533
+ when 'btc'; lambda { |di, a0, a1| { :eflag_c => Expression[[a0, :>>, [a1, :%, opsz(di)]], :&, 1],
534
+ a0 => Expression[a0, :^, [1, :<<, [a1, :%, opsz(di)]]] } }
535
+ when 'bswap'
536
+ lambda { |di, a0|
537
+ if opsz(di) == 64
538
+ { a0 => Expression[
539
+ [[[[a0, :&, 0xff000000_00000000], :>>, 56], :|,
540
+ [[a0, :&, 0x00ff0000_00000000], :>>, 40]], :|,
541
+ [[[a0, :&, 0x0000ff00_00000000], :>>, 24], :|,
542
+ [[a0, :&, 0x000000ff_00000000], :>>, 8]]], :|,
543
+ [[[[a0, :&, 0x00000000_ff000000], :<<, 8], :|,
544
+ [[a0, :&, 0x00000000_00ff0000], :<<, 24]], :|,
545
+ [[[a0, :&, 0x00000000_0000ff00], :<<, 40], :|,
546
+ [[a0, :&, 0x00000000_000000ff], :<<, 56]]]] }
547
+ else # XXX opsz != 32 => undef
548
+ { a0 => Expression[
549
+ [[[a0, :&, 0xff000000], :>>, 24], :|,
550
+ [[a0, :&, 0x00ff0000], :>>, 8]], :|,
551
+ [[[a0, :&, 0x0000ff00], :<<, 8], :|,
552
+ [[a0, :&, 0x000000ff], :<<, 24]]] }
553
+ end
554
+ }
555
+ when 'nop', 'pause', 'wait', 'cmp', 'test'; lambda { |di, *a| {} }
556
+ end
557
+
558
+ # add eflags side-effects
559
+
560
+ full_binding = case op
561
+ when 'adc', 'add', 'and', 'cmp', 'or', 'sbb', 'sub', 'xor', 'test', 'xadd'
562
+ lambda { |di, a0, a1|
563
+ e_op = { 'adc' => :+, 'add' => :+, 'xadd' => :+, 'and' => :&, 'cmp' => :-, 'or' => :|, 'sbb' => :-, 'sub' => :-, 'xor' => :^, 'test' => :& }[op]
564
+ res = Expression[[a0, :&, mask[di]], e_op, [a1, :&, mask[di]]]
565
+ res = Expression[res, e_op, :eflag_c] if op == 'adc' or op == 'sbb'
566
+
567
+ ret = (binding ? binding[di, a0, a1] : {})
568
+ ret[:eflag_z] = Expression[[res, :&, mask[di]], :==, 0]
569
+ ret[:eflag_s] = sign[res, di]
570
+ ret[:eflag_c] = case e_op
571
+ when :+; Expression[res, :>, mask[di]]
572
+ when :-; Expression[[a0, :&, mask[di]], :<, [a1, :&, mask[di]]]
573
+ else Expression[0]
574
+ end
575
+ ret[:eflag_o] = case e_op
576
+ when :+; Expression[[sign[a0, di], :==, sign[a1, di]], :'&&', [sign[a0, di], :'!=', sign[res, di]]]
577
+ when :-; Expression[[sign[a0, di], :==, [:'!', sign[a1, di]]], :'&&', [sign[a0, di], :'!=', sign[res, di]]]
578
+ else Expression[0]
579
+ end
580
+ ret
581
+ }
582
+ when 'inc', 'dec', 'neg', 'shl', 'shr', 'sar', 'ror', 'rol', 'rcr', 'rcl', 'shld', 'shrd'
583
+ lambda { |di, a0, *a|
584
+ ret = (binding ? binding[di, a0, *a] : {})
585
+ res = ret[a0] || Expression::Unknown
586
+ ret[:eflag_z] = Expression[[res, :&, mask[di]], :==, 0]
587
+ ret[:eflag_s] = sign[res, di]
588
+ case op
589
+ when 'neg'; ret[:eflag_c] = Expression[[res, :&, mask[di]], :'!=', 0]
590
+ when 'inc', 'dec' # don't touch carry flag
591
+ else ret[:eflag_c] = Expression::Unknown # :incomplete_binding ?
592
+ end
593
+ ret[:eflag_o] = case op
594
+ when 'inc'; Expression[[a0, :&, mask[di]], :==, mask[di] >> 1]
595
+ when 'dec'; Expression[[res , :&, mask[di]], :==, mask[di] >> 1]
596
+ when 'neg'; Expression[[a0, :&, mask[di]], :==, (mask[di]+1) >> 1]
597
+ else Expression::Unknown
598
+ end
599
+ ret
600
+ }
601
+ when 'imul', 'mul', 'idiv', 'div', /^(scas|cmps)[bwdq]$/
602
+ lambda { |di, *a|
603
+ ret = (binding ? binding[di, *a] : {})
604
+ ret[:eflag_z] = ret[:eflag_s] = ret[:eflag_c] = ret[:eflag_o] = Expression::Unknown # :incomplete_binding ?
605
+ ret
606
+ }
607
+ end
608
+
609
+ @backtrace_binding[op] ||= full_binding || binding if full_binding || binding
610
+ }
611
+ @backtrace_binding
612
+ end
613
+
614
+ # returns the condition (bool Expression) under which a conditionnal jump is taken
615
+ # returns nil if not a conditionnal jump
616
+ # backtrace for the condition must include the jump itself (eg loop -> ecx--)
617
+ def get_jump_condition(di)
618
+ ecx = register_symbols[1]
619
+ case di.opcode.name
620
+ when /^j(.*)/
621
+ decode_cc_to_expr($1)
622
+ when /^loop(.+)?/
623
+ e = Expression[ecx, :'!=', 0]
624
+ e = Expression[e, :'||', decode_cc_to_expr($1)] if $1
625
+ e
626
+ end
627
+ end
628
+
629
+ def get_backtrace_binding(di)
630
+ a = di.instruction.args.map { |arg|
631
+ case arg
632
+ when ModRM, Reg, SimdReg; arg.symbolic(di)
633
+ else arg
634
+ end
635
+ }
636
+
637
+ if binding = backtrace_binding[di.opcode.basename]
638
+ bd = binding[di, *a]
639
+ # handle modifications to al/ah etc
640
+ bd.keys.grep(Expression).each { |e|
641
+ # must be in the form (x & mask), with x either :reg or (:reg >> shift) eg ah == ((eax >> 8) & 0xff)
642
+ if e.op == :& and mask = e.rexpr and mask.kind_of? Integer
643
+ reg = e.lexpr
644
+ reg = reg.lexpr if reg.kind_of? Expression and reg.op == :>> and shift = reg.rexpr and shift.kind_of? Integer
645
+ next if not reg.kind_of? Symbol
646
+ if bd.has_key? reg
647
+ # xchg ah, al ; pop sp..
648
+ puts "backtrace: conflict for #{di}: #{e} vs #{reg}" if $VERBOSE
649
+ bd[reg] = Expression::Unknown
650
+ next
651
+ end
652
+ val = bd.delete e
653
+ mask <<= shift if shift
654
+ invmask = mask ^ (@size == 64 ? 0xffff_ffff_ffff_ffff : 0xffff_ffff)
655
+ if invmask == 0xffff_ffff_0000_0000 and not di.opcode.props[:op32no64]
656
+ bd[reg] = Expression[val, :&, 0xffff_ffff]
657
+ elsif invmask == 0
658
+ bd[reg] = val
659
+ else
660
+ val = Expression[val, :<<, shift] if shift
661
+ bd[reg] = Expression[[reg, :&, invmask], :|, [val, :&, mask]]
662
+ end
663
+ end
664
+ }
665
+ bd
666
+ else
667
+ puts "unhandled instruction to backtrace: #{di}" if $VERBOSE
668
+ # assume nothing except the 1st arg is modified
669
+ case a[0]
670
+ when Indirection, Symbol; { a[0] => Expression::Unknown }
671
+ when Expression; (x = a[0].externals.first) ? { x => Expression::Unknown } : {}
672
+ else {}
673
+ end.update(:incomplete_binding => Expression[1])
674
+ end
675
+ end
676
+
677
+ def get_xrefs_x(dasm, di)
678
+ return [] if not di.opcode.props[:setip]
679
+
680
+ sz = opsz(di)
681
+ case di.opcode.basename
682
+ when 'ret'; return [Indirection[register_symbols[4], sz/8, di.address]]
683
+ when 'jmp', 'call'
684
+ a = di.instruction.args.first
685
+ if dasm and a.kind_of?(ModRM) and a.imm and a.s == sz/8 and not a.b and dasm.get_section_at(a.imm)
686
+ return get_xrefs_x_jmptable(dasm, di, a, sz)
687
+ end
688
+ end
689
+
690
+ case tg = di.instruction.args.first
691
+ when ModRM
692
+ tg.sz ||= sz if tg.kind_of? ModRM
693
+ [Expression[tg.symbolic(di)]]
694
+ when Reg; [Expression[tg.symbolic(di)]]
695
+ when Expression, ::Integer; [Expression[tg]]
696
+ when Farptr; tg.seg.reduce < 0x30 ? [tg.addr] : [Expression[[tg.seg, :*, 0x10], :+, tg.addr]]
697
+ else
698
+ puts "unhandled setip at #{di.address} #{di.instruction}" if $DEBUG
699
+ []
700
+ end
701
+ end
702
+
703
+ # we detected a jmp table (jmp [base+4*idx])
704
+ # try to return an accurate dest list
705
+ def get_xrefs_x_jmptable(dasm, di, mrm, sz)
706
+ # include the symbolic dest for backtrack stuff
707
+ ret = [Expression[mrm.symbolic(di)]]
708
+ i = mrm.i
709
+ if di.block.list.length == 2 and di.block.list[0].opcode.name =~ /^mov/ and a0 = di.block.list[0].instruction.args[0] and
710
+ a0.respond_to? :symbolic and a0.symbolic == i.symbolic
711
+ i = di.block.list[0].instruction.args[1]
712
+ end
713
+ pb = di.block.from_normal.to_a
714
+ if pb.length == 1 and pdi = dasm.decoded[pb[0]] and pdi.opcode.name =~ /^jn?be?/ and ppdi = pdi.block.list[-2] and ppdi.opcode.name == 'cmp' and
715
+ ppdi.instruction.args[0].symbolic == i.symbolic and lim = Expression[ppdi.instruction.args[1]].reduce and lim.kind_of? Integer
716
+ # cmp eax, 42 ; jbe switch ; switch: jmp [base+4*eax]
717
+ s = dasm.get_section_at(mrm.imm)
718
+ lim += 1 if pdi.opcode.name[-1] == ?e
719
+ lim.times { |v|
720
+ dasm.add_xref(s[1]+s[0].ptr, Xref.new(:r, di.address, sz/8))
721
+ ret << Indirection[[mrm.imm, :+, v*sz/8], sz/8, di.address]
722
+ s[0].read(sz/8)
723
+ }
724
+ l = dasm.auto_label_at(mrm.imm, 'jmp_table', 'xref')
725
+ replace_instr_arg_immediate(di.instruction, mrm.imm, Expression[l])
726
+ return ret
727
+ end
728
+
729
+ puts "unrecognized jmp table pattern, using wild guess for #{di}" if $VERBOSE
730
+ di.add_comment 'wildguess'
731
+ if s = dasm.get_section_at(mrm.imm - 3*sz/8)
732
+ v = -3
733
+ else
734
+ s = dasm.get_section_at(mrm.imm)
735
+ v = 0
736
+ end
737
+ loop do
738
+ ptr = dasm.normalize s[0].decode_imm("u#{sz}".to_sym, @endianness)
739
+ diff = Expression[ptr, :-, di.address].reduce
740
+ if (diff.kind_of? ::Integer and diff.abs < 4096) or (di.opcode.basename == 'call' and ptr != 0 and dasm.get_section_at(ptr))
741
+ dasm.add_xref(s[1]+s[0].ptr-sz/8, Xref.new(:r, di.address, sz/8))
742
+ ret << Indirection[[mrm.imm, :+, v*sz/8], sz/8, di.address]
743
+ elsif v > 0
744
+ break
745
+ end
746
+ v += 1
747
+ end
748
+ ret
749
+ end
750
+
751
+ # checks if expr is a valid return expression matching the :saveip instruction
752
+ def backtrace_is_function_return(expr, di=nil)
753
+ expr = Expression[expr].reduce_rec
754
+ expr.kind_of? Indirection and expr.len == @size/8 and expr.target == Expression[register_symbols[4]]
755
+ end
756
+
757
+ # updates the function backtrace_binding
758
+ # if the function is big and no specific register is given, do nothing (the binding will be lazily updated later, on demand)
759
+ # XXX assume retaddrlist is either a list of addr of ret or a list with a single entry which is an external function name (thunk)
760
+ def backtrace_update_function_binding(dasm, faddr, f, retaddrlist, *wantregs)
761
+ b = f.backtrace_binding
762
+
763
+ esp, ebp = register_symbols[4, 2]
764
+
765
+ # XXX handle retaddrlist for multiple/mixed thunks
766
+ if retaddrlist and not dasm.decoded[retaddrlist.first] and di = dasm.decoded[faddr]
767
+ # no return instruction, must be a thunk : find the last instruction (to backtrace from it)
768
+ done = []
769
+ while ndi = dasm.decoded[di.block.to_subfuncret.to_a.first] || dasm.decoded[di.block.to_normal.to_a.first] and ndi.kind_of? DecodedInstruction and not done.include? ndi.address
770
+ done << ndi.address
771
+ di = ndi
772
+ end
773
+ if not di.block.to_subfuncret.to_a.first and di.block.to_normal and di.block.to_normal.length > 1
774
+ thunklast = di.block.list.last.address
775
+ end
776
+ end
777
+
778
+ bt_val = lambda { |r|
779
+ next if not retaddrlist
780
+ b[r] = Expression::Unknown # TODO :pending or something ? (for recursive lazy functions)
781
+ bt = []
782
+ retaddrlist.each { |retaddr|
783
+ bt |= dasm.backtrace(Expression[r], (thunklast ? thunklast : retaddr),
784
+ :include_start => true, :snapshot_addr => faddr, :origin => retaddr, :from_subfuncret => thunklast)
785
+ }
786
+ if bt.length != 1
787
+ b[r] = Expression::Unknown
788
+ else
789
+ b[r] = bt.first
790
+ end
791
+ }
792
+
793
+ if not wantregs.empty?
794
+ wantregs.each(&bt_val)
795
+ else
796
+ if dasm.function_blocks(faddr, true).length < 20
797
+ register_symbols.each(&bt_val)
798
+ else
799
+ [ebp, esp].each(&bt_val)
800
+ end
801
+ end
802
+
803
+ backtrace_update_function_binding_check(dasm, faddr, f, b, &bt_val)
804
+
805
+ b
806
+ end
807
+
808
+ def backtrace_update_function_binding_check(dasm, faddr, f, b)
809
+ sz = @size/8
810
+ if b[:ebp] and b[:ebp] != Expression[:ebp]
811
+ # may be a custom 'enter' function (eg recent Visual Studio)
812
+ # TODO put all memory writes in the binding ?
813
+ [[:ebp], [:esp, :+, 1*sz], [:esp, :+, 2*sz], [:esp, :+, 3*sz]].each { |ptr|
814
+ ind = Indirection[ptr, sz, faddr]
815
+ yield(ind)
816
+ b.delete(ind) if b[ind] and not [:ebx, :edx, :esi, :edi, :ebp].include? b[ind].reduce_rec
817
+ }
818
+ end
819
+ if dasm.funcs_stdabi
820
+ if b[:esp] and b[:esp] == Expression::Unknown and not f.btbind_callback
821
+ puts "update_func_bind: #{Expression[faddr]} has esp -> unknown, use dynamic callback" if $DEBUG
822
+ f.btbind_callback = disassembler_default_btbind_callback
823
+ end
824
+ [:ebp, :ebx, :esi, :edi].each { |reg|
825
+ if b[reg] and b[reg] == Expression::Unknown
826
+ puts "update_func_bind: #{Expression[faddr]} has #{reg} -> unknown, presume it is preserved" if $DEBUG
827
+ b[reg] = Expression[reg]
828
+ end
829
+ }
830
+ else
831
+ if b[:esp] and not Expression[b[:esp], :-, :esp].reduce.kind_of?(::Integer)
832
+ puts "update_func_bind: #{Expression[faddr]} has esp -> #{b[:esp]}" if $DEBUG
833
+ end
834
+ end
835
+
836
+ # rename some functions
837
+ # TODO database and real signatures
838
+ rename =
839
+ if b[:eax] and Expression[b[:eax], :-, faddr].reduce == 0
840
+ 'geteip' # metasm pic linker
841
+ elsif b[:eax] and b[:ebx] and Expression[b[:eax], :-, :eax].reduce == 0 and Expression[b[:ebx], :-, Indirection[:esp, sz, nil]].reduce == 0
842
+ 'get_pc_thunk_ebx' # elf pic convention
843
+ elsif b[:esp] and Expression[b[:esp], :-, [:esp, :-, Indirection[[:esp, :+, 2*sz], sz]]].reduce.kind_of? ::Integer and
844
+ dasm.decoded[faddr].block.list.find { |di| di.backtrace_binding[Indirection['segment_base_fs', sz]] }
845
+ '__SEH_prolog'
846
+ elsif b[:esp] == Expression[:ebp, :+, sz] and
847
+ dasm.decoded[faddr].block.list.find { |di| di.backtrace_binding[Indirection['segment_base_fs', sz]] }
848
+ '__SEH_epilog'
849
+ end
850
+ dasm.auto_label_at(faddr, rename, 'loc', 'sub') if rename
851
+ end
852
+
853
+ # returns true if the expression is an address on the stack
854
+ def backtrace_is_stack_address(expr)
855
+ Expression[expr].expr_externals.include? register_symbols[4]
856
+ end
857
+
858
+ # updates an instruction's argument replacing an expression with another (eg label renamed)
859
+ def replace_instr_arg_immediate(i, old, new)
860
+ i.args.map! { |a|
861
+ case a
862
+ when Expression; a == old ? new : Expression[a.bind(old => new).reduce]
863
+ when ModRM
864
+ a.imm = (a.imm == old ? new : Expression[a.imm.bind(old => new).reduce]) if a.imm
865
+ a
866
+ else a
867
+ end
868
+ }
869
+ end
870
+
871
+ # returns a DecodedFunction from a parsed C function prototype
872
+ # TODO rebacktrace already decoded functions (load a header file after dasm finished)
873
+ # TODO walk structs args
874
+ def decode_c_function_prototype(cp, sym, orig=nil)
875
+ sym = cp.toplevel.symbol[sym] if sym.kind_of?(::String)
876
+ df = DecodedFunction.new
877
+ orig ||= Expression[sym.name]
878
+
879
+ new_bt = lambda { |expr, rlen|
880
+ df.backtracked_for << BacktraceTrace.new(expr, orig, expr, rlen ? :r : :x, rlen)
881
+ }
882
+
883
+ # return instr emulation
884
+ if sym.has_attribute 'noreturn' or sym.has_attribute '__noreturn__'
885
+ df.noreturn = true
886
+ else
887
+ new_bt[Indirection[:esp, @size/8, orig], nil]
888
+ end
889
+
890
+ # register dirty (XXX assume standard ABI)
891
+ [:eax, :ecx, :edx].each { |r|
892
+ df.backtrace_binding.update r => Expression::Unknown
893
+ }
894
+
895
+ # emulate ret <n>
896
+ al = cp.typesize[:ptr]
897
+ stackoff = al
898
+ if sym.has_attribute 'fastcall'
899
+ stackoff = sym.type.args.to_a[2..-1].to_a.inject(al) { |sum, a| sum += (cp.sizeof(a) + al - 1) / al * al }
900
+ elsif sym.has_attribute 'stdcall'
901
+ stackoff = sym.type.args.to_a.inject(al) { |sum, a| sum += (cp.sizeof(a) + al - 1) / al * al }
902
+ end
903
+ df.backtrace_binding[:esp] = Expression[:esp, :+, stackoff]
904
+
905
+ # scan args for function pointers
906
+ # TODO walk structs/unions..
907
+ stackoff = al
908
+ sym.type.args.to_a.each { |a|
909
+ p = Indirection[[:esp, :+, stackoff], al, orig]
910
+ stackoff += (cp.sizeof(a) + al - 1) / al * al
911
+ if a.type.untypedef.kind_of? C::Pointer
912
+ pt = a.type.untypedef.type.untypedef
913
+ if pt.kind_of? C::Function
914
+ new_bt[p, nil]
915
+ df.backtracked_for.last.detached = true
916
+ elsif pt.kind_of? C::Struct
917
+ new_bt[p, al]
918
+ else
919
+ new_bt[p, cp.sizeof(nil, pt)]
920
+ end
921
+ end
922
+ }
923
+
924
+ df
925
+ end
926
+
927
+ # the lambda for the :default backtrace_binding callback of the disassembler
928
+ # tries to determine the stack offset of unprototyped functions
929
+ # working:
930
+ # checks that origin is a ret, that expr is an indirection from esp and that expr.origin is the ret
931
+ # bt_walk from calladdr until we finds a call into us, and assumes it is the current function start
932
+ # TODO handle foo: call bar ; bar: pop eax ; call <withourcallback> ; ret -> bar is not the function start (foo is)
933
+ # then backtrace expr from calladdr to funcstart (snapshot), using esp -> esp+<stackoffvariable>
934
+ # from the result, compute stackoffvariable (only if trivial)
935
+ # will not work if the current function calls any other unknown function (unless all are __cdecl)
936
+ # will not work if the current function is framed (ebp leave ret): in this case the function will return, but its esp will be unknown
937
+ # if the stack offset is found and funcaddr is a string, fixup the static binding and remove the dynamic binding
938
+ # TODO dynamise thunks bt_for & bt_cb
939
+ def disassembler_default_btbind_callback
940
+ esp = register_symbols[4]
941
+
942
+ lambda { |dasm, bind, funcaddr, calladdr, expr, origin, maxdepth|
943
+ @dasm_func_default_off ||= {}
944
+ if off = @dasm_func_default_off[[dasm, calladdr]]
945
+ bind = bind.merge(esp => Expression[esp, :+, off])
946
+ break bind
947
+ end
948
+ break bind if not odi = dasm.decoded[origin] or odi.opcode.basename != 'ret'
949
+ expr = expr.reduce_rec if expr.kind_of? Expression
950
+ break bind unless expr.kind_of? Indirection and expr.origin == origin
951
+ break bind unless expr.externals.reject { |e| e =~ /^autostackoffset_/ } == [esp]
952
+
953
+ curfunc = dasm.function[funcaddr]
954
+ if curfunc.backtrace_binding and tk = curfunc.backtrace_binding[:thunk] and dasm.function[tk]
955
+ curfunc = dasm.function[tk]
956
+ end
957
+
958
+ # scan from calladdr for the probable parent function start
959
+ func_start = nil
960
+ dasm.backtrace_walk(true, calladdr, false, false, nil, maxdepth) { |ev, foo, h|
961
+ if ev == :up and h[:sfret] != :subfuncret and di = dasm.decoded[h[:to]] and di.opcode.basename == 'call'
962
+ func_start = h[:from]
963
+ break
964
+ elsif ev == :end
965
+ # entrypoints are functions too
966
+ func_start = h[:addr]
967
+ break
968
+ end
969
+ }
970
+ break bind if not func_start
971
+ puts "automagic #{Expression[funcaddr]}: found func start for #{dasm.decoded[origin]} at #{Expression[func_start]}" if dasm.debug_backtrace
972
+ s_off = "autostackoffset_#{Expression[funcaddr]}_#{Expression[calladdr]}"
973
+ list = dasm.backtrace(expr.bind(esp => Expression[esp, :+, s_off]), calladdr, :include_start => true, :snapshot_addr => func_start, :maxdepth => maxdepth, :origin => origin)
974
+ # check if this backtrace made us find our binding
975
+ if off = @dasm_func_default_off[[dasm, calladdr]]
976
+ bind = bind.merge(esp => Expression[esp, :+, off])
977
+ break bind
978
+ elsif not curfunc.btbind_callback
979
+ break curfunc.backtrace_binding
980
+ end
981
+ e_expr = list.find { |e_expr_|
982
+ # TODO cleanup this
983
+ e_expr_ = Expression[e_expr_].reduce_rec
984
+ next if not e_expr_.kind_of? Indirection
985
+ off = Expression[[esp, :+, s_off], :-, e_expr_.target].reduce
986
+ off.kind_of? Integer and off >= @size/8 and off < 10*@size/8 and (off % (@size/8)) == 0
987
+ } || list.first
988
+
989
+ e_expr = e_expr.rexpr if e_expr.kind_of? Expression and e_expr.op == :+ and not e_expr.lexpr
990
+ break bind unless e_expr.kind_of? Indirection
991
+
992
+ off = Expression[[esp, :+, s_off], :-, e_expr.target].reduce
993
+ if off.kind_of? Expression
994
+ bd = off.externals.grep(/^autostackoffset_/).inject({}) { |bd_, xt| bd_.update xt => @size/8 }
995
+ bd.delete s_off
996
+ if off.bind(bd).reduce == @size/8
997
+ # all __cdecl
998
+ off = @size/8
999
+ else
1000
+ # check if all calls are to the same extern func
1001
+ bd.delete_if { |k, v| k !~ /^autostackoffset_#{Expression[funcaddr]}_/ }
1002
+ bd.each_key { |k| bd[k] = 0 }
1003
+ if off.bind(bd).reduce.kind_of? Integer
1004
+ off = off.bind(bd).reduce / (bd.length + 1)
1005
+ end
1006
+ end
1007
+ end
1008
+ if off.kind_of? Integer
1009
+ if off < @size/8 or off > 20*@size/8 or (off % (@size/8)) != 0
1010
+ puts "autostackoffset: ignoring off #{off} for #{Expression[funcaddr]} from #{dasm.decoded[calladdr]}" if $VERBOSE
1011
+ off = :unknown
1012
+ end
1013
+ end
1014
+
1015
+ bind = bind.merge esp => Expression[esp, :+, off] if off != :unknown
1016
+ if funcaddr != :default
1017
+ if not off.kind_of? ::Integer
1018
+ #XXX we allow the current function to return, so we should handle the func backtracking its esp
1019
+ #(and other register that are saved and restored in epilog)
1020
+ puts "stackoff #{dasm.decoded[calladdr]} | #{Expression[func_start]} | #{expr} | #{e_expr} | #{off}" if dasm.debug_backtrace
1021
+ else
1022
+ puts "autostackoffset: found #{off} for #{Expression[funcaddr]} from #{dasm.decoded[calladdr]}" if $VERBOSE
1023
+ curfunc.btbind_callback = nil
1024
+ curfunc.backtrace_binding = bind
1025
+
1026
+ # rebacktrace the return address, so that other unknown funcs that depend on us are solved
1027
+ dasm.backtrace(Indirection[esp, @size/8, origin], origin, :origin => origin)
1028
+ end
1029
+ else
1030
+ if off.kind_of? ::Integer and dasm.decoded[calladdr]
1031
+ puts "autostackoffset: found #{off-@size/8} for #{dasm.decoded[calladdr]}" if $VERBOSE
1032
+ di = dasm.decoded[calladdr]
1033
+ di.comment.delete_if { |c| c =~ /^stackoff=/ } if di.comment
1034
+ di.add_comment "stackoff=#{off-@size/8}"
1035
+ @dasm_func_default_off[[dasm, calladdr]] = off
1036
+
1037
+ dasm.backtrace(Indirection[esp, @size/8, origin], origin, :origin => origin)
1038
+ elsif cachedoff = @dasm_func_default_off[[dasm, calladdr]]
1039
+ bind[esp] = Expression[esp, :+, cachedoff]
1040
+ elsif off.kind_of? ::Integer
1041
+ dasm.decoded[calladdr].add_comment "stackoff=#{off-@size/8}"
1042
+ end
1043
+
1044
+ puts "stackoff #{dasm.decoded[calladdr]} | #{Expression[func_start]} | #{expr} | #{e_expr} | #{off}" if dasm.debug_backtrace
1045
+ end
1046
+
1047
+ bind
1048
+ }
1049
+ end
1050
+
1051
+ # the :default backtracked_for callback
1052
+ # returns empty unless funcaddr is not default or calladdr is a call or a jmp
1053
+ def disassembler_default_btfor_callback
1054
+ lambda { |dasm, btfor, funcaddr, calladdr|
1055
+ if funcaddr != :default; btfor
1056
+ elsif di = dasm.decoded[calladdr] and (di.opcode.name == 'call' or di.opcode.name == 'jmp'); btfor
1057
+ else []
1058
+ end
1059
+ }
1060
+ end
1061
+
1062
+ # returns a DecodedFunction suitable for :default
1063
+ # uses disassembler_default_bt{for/bind}_callback
1064
+ def disassembler_default_func
1065
+ esp = register_symbols[4]
1066
+ cp = new_cparser
1067
+ cp.parse 'void stdfunc(void);'
1068
+ f = decode_c_function_prototype(cp, 'stdfunc', :default)
1069
+ f.backtrace_binding[esp] = Expression[esp, :+, :unknown]
1070
+ f.btbind_callback = disassembler_default_btbind_callback
1071
+ f.btfor_callback = disassembler_default_btfor_callback
1072
+ f
1073
+ end
1074
+
1075
+ # returns a hash { :retval => r, :changed => [] }
1076
+ def abi_funcall
1077
+ { :retval => register_symbols[0], :changed => register_symbols[0, 3] }
1078
+ end
1079
+
1080
+
1081
+ # computes the binding of the sequence of code starting at entry included
1082
+ # the binding is a hash showing the value of modified elements at the
1083
+ # end of the code sequence, relative to their value at entry
1084
+ # the elements are all the registers and the memory written to
1085
+ # if finish is nil, the binding will include :ip, which is the address
1086
+ # to be executed next (if it exists)
1087
+ # the binding will not include memory access from subfunctions
1088
+ # entry should be an entrypoint of the disassembler if finish is nil
1089
+ # the code sequence must have only one end, with no to_normal
1090
+ def code_binding(dasm, entry, finish=nil)
1091
+ entry = dasm.normalize(entry)
1092
+ finish = dasm.normalize(finish) if finish
1093
+ lastdi = nil
1094
+ binding = {}
1095
+ bt = lambda { |from, expr, inc_start|
1096
+ ret = dasm.backtrace(Expression[expr], from, :snapshot_addr => entry, :include_start => inc_start)
1097
+ ret.length == 1 ? ret.first : Expression::Unknown
1098
+ }
1099
+
1100
+ # walk blocks, search for finish, scan memory writes
1101
+ todo = [entry]
1102
+ done = [Expression::Unknown]
1103
+ while addr = todo.pop
1104
+ addr = dasm.normalize(addr)
1105
+ next if done.include? addr or addr == finish or not dasm.decoded[addr].kind_of? DecodedInstruction
1106
+ done << addr
1107
+ b = dasm.decoded[addr].block
1108
+
1109
+ next if b.list.find { |di|
1110
+ a = di.address
1111
+ if a == finish
1112
+ lastdi = b.list[b.list.index(di) - 1]
1113
+ true
1114
+ else
1115
+ # check writes from the instruction
1116
+ get_xrefs_w(dasm, di).each { |waddr, len|
1117
+ # we want the ptr expressed with reg values at entry
1118
+ ptr = bt[a, waddr, false]
1119
+ binding[Indirection[ptr, len, a]] = bt[a, Indirection[waddr, len, a], true]
1120
+ }
1121
+ false
1122
+ end
1123
+ }
1124
+
1125
+ hasnext = false
1126
+ b.each_to_samefunc(dasm) { |t|
1127
+ hasnext = true
1128
+ if t == finish
1129
+ lastdi = b.list.last
1130
+ else
1131
+ todo << t
1132
+ end
1133
+ }
1134
+
1135
+ # check end of sequence
1136
+ if not hasnext
1137
+ raise "two-ended code_binding #{lastdi} & #{b.list.last}" if lastdi
1138
+ lastdi = b.list.last
1139
+ if lastdi.opcode.props[:setip]
1140
+ e = get_xrefs_x(dasm, lastdi)
1141
+ raise 'bad code_binding ending' if e.to_a.length != 1 or not lastdi.opcode.props[:stopexec]
1142
+ binding[:ip] = bt[lastdi.address, e.first, false]
1143
+ elsif not lastdi.opcode.props[:stopexec]
1144
+ binding[:ip] = lastdi.next_addr
1145
+ end
1146
+ end
1147
+ end
1148
+ binding.delete_if { |k, v| Expression[k] == Expression[v] }
1149
+
1150
+ # add register binding
1151
+ raise "no code_binding end" if not lastdi and not finish
1152
+ register_symbols.each { |reg|
1153
+ val =
1154
+ if lastdi; bt[lastdi.address, reg, true]
1155
+ else bt[finish, reg, false]
1156
+ end
1157
+ next if val == Expression[reg]
1158
+ mask = 0xffff_ffff # dont use 1<<@size, because 16bit code may use e.g. edi (through opszoverride)
1159
+ mask = 0xffff_ffff_ffff_ffff if @size == 64
1160
+ val = Expression[val, :&, mask].reduce
1161
+ binding[reg] = Expression[val]
1162
+ }
1163
+
1164
+ binding
1165
+ end
1166
+ end
1167
+ end