metasm 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (192) hide show
  1. data/BUGS +11 -0
  2. data/CREDITS +17 -0
  3. data/README +270 -0
  4. data/TODO +114 -0
  5. data/doc/code_organisation.txt +146 -0
  6. data/doc/const_missing.txt +16 -0
  7. data/doc/core_classes.txt +75 -0
  8. data/doc/feature_list.txt +53 -0
  9. data/doc/index.txt +59 -0
  10. data/doc/install_notes.txt +170 -0
  11. data/doc/style.css +3 -0
  12. data/doc/use_cases.txt +18 -0
  13. data/lib/metasm.rb +80 -0
  14. data/lib/metasm/arm.rb +12 -0
  15. data/lib/metasm/arm/debug.rb +39 -0
  16. data/lib/metasm/arm/decode.rb +167 -0
  17. data/lib/metasm/arm/encode.rb +77 -0
  18. data/lib/metasm/arm/main.rb +75 -0
  19. data/lib/metasm/arm/opcodes.rb +177 -0
  20. data/lib/metasm/arm/parse.rb +130 -0
  21. data/lib/metasm/arm/render.rb +55 -0
  22. data/lib/metasm/compile_c.rb +1457 -0
  23. data/lib/metasm/dalvik.rb +8 -0
  24. data/lib/metasm/dalvik/decode.rb +196 -0
  25. data/lib/metasm/dalvik/main.rb +60 -0
  26. data/lib/metasm/dalvik/opcodes.rb +366 -0
  27. data/lib/metasm/decode.rb +213 -0
  28. data/lib/metasm/decompile.rb +2659 -0
  29. data/lib/metasm/disassemble.rb +2068 -0
  30. data/lib/metasm/disassemble_api.rb +1280 -0
  31. data/lib/metasm/dynldr.rb +1329 -0
  32. data/lib/metasm/encode.rb +333 -0
  33. data/lib/metasm/exe_format/a_out.rb +194 -0
  34. data/lib/metasm/exe_format/autoexe.rb +82 -0
  35. data/lib/metasm/exe_format/bflt.rb +189 -0
  36. data/lib/metasm/exe_format/coff.rb +455 -0
  37. data/lib/metasm/exe_format/coff_decode.rb +901 -0
  38. data/lib/metasm/exe_format/coff_encode.rb +1078 -0
  39. data/lib/metasm/exe_format/dex.rb +457 -0
  40. data/lib/metasm/exe_format/dol.rb +145 -0
  41. data/lib/metasm/exe_format/elf.rb +923 -0
  42. data/lib/metasm/exe_format/elf_decode.rb +979 -0
  43. data/lib/metasm/exe_format/elf_encode.rb +1375 -0
  44. data/lib/metasm/exe_format/macho.rb +827 -0
  45. data/lib/metasm/exe_format/main.rb +228 -0
  46. data/lib/metasm/exe_format/mz.rb +164 -0
  47. data/lib/metasm/exe_format/nds.rb +172 -0
  48. data/lib/metasm/exe_format/pe.rb +437 -0
  49. data/lib/metasm/exe_format/serialstruct.rb +246 -0
  50. data/lib/metasm/exe_format/shellcode.rb +114 -0
  51. data/lib/metasm/exe_format/xcoff.rb +167 -0
  52. data/lib/metasm/gui.rb +23 -0
  53. data/lib/metasm/gui/cstruct.rb +373 -0
  54. data/lib/metasm/gui/dasm_coverage.rb +199 -0
  55. data/lib/metasm/gui/dasm_decomp.rb +369 -0
  56. data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
  57. data/lib/metasm/gui/dasm_graph.rb +1354 -0
  58. data/lib/metasm/gui/dasm_hex.rb +543 -0
  59. data/lib/metasm/gui/dasm_listing.rb +599 -0
  60. data/lib/metasm/gui/dasm_main.rb +906 -0
  61. data/lib/metasm/gui/dasm_opcodes.rb +291 -0
  62. data/lib/metasm/gui/debug.rb +1228 -0
  63. data/lib/metasm/gui/gtk.rb +884 -0
  64. data/lib/metasm/gui/qt.rb +495 -0
  65. data/lib/metasm/gui/win32.rb +3004 -0
  66. data/lib/metasm/gui/x11.rb +621 -0
  67. data/lib/metasm/ia32.rb +14 -0
  68. data/lib/metasm/ia32/compile_c.rb +1523 -0
  69. data/lib/metasm/ia32/debug.rb +193 -0
  70. data/lib/metasm/ia32/decode.rb +1167 -0
  71. data/lib/metasm/ia32/decompile.rb +564 -0
  72. data/lib/metasm/ia32/encode.rb +314 -0
  73. data/lib/metasm/ia32/main.rb +233 -0
  74. data/lib/metasm/ia32/opcodes.rb +872 -0
  75. data/lib/metasm/ia32/parse.rb +327 -0
  76. data/lib/metasm/ia32/render.rb +91 -0
  77. data/lib/metasm/main.rb +1193 -0
  78. data/lib/metasm/mips.rb +11 -0
  79. data/lib/metasm/mips/compile_c.rb +7 -0
  80. data/lib/metasm/mips/decode.rb +253 -0
  81. data/lib/metasm/mips/encode.rb +51 -0
  82. data/lib/metasm/mips/main.rb +72 -0
  83. data/lib/metasm/mips/opcodes.rb +443 -0
  84. data/lib/metasm/mips/parse.rb +51 -0
  85. data/lib/metasm/mips/render.rb +43 -0
  86. data/lib/metasm/os/gnu_exports.rb +270 -0
  87. data/lib/metasm/os/linux.rb +1112 -0
  88. data/lib/metasm/os/main.rb +1686 -0
  89. data/lib/metasm/os/remote.rb +527 -0
  90. data/lib/metasm/os/windows.rb +2027 -0
  91. data/lib/metasm/os/windows_exports.rb +745 -0
  92. data/lib/metasm/parse.rb +876 -0
  93. data/lib/metasm/parse_c.rb +3938 -0
  94. data/lib/metasm/pic16c/decode.rb +42 -0
  95. data/lib/metasm/pic16c/main.rb +17 -0
  96. data/lib/metasm/pic16c/opcodes.rb +68 -0
  97. data/lib/metasm/ppc.rb +11 -0
  98. data/lib/metasm/ppc/decode.rb +264 -0
  99. data/lib/metasm/ppc/decompile.rb +251 -0
  100. data/lib/metasm/ppc/encode.rb +51 -0
  101. data/lib/metasm/ppc/main.rb +129 -0
  102. data/lib/metasm/ppc/opcodes.rb +410 -0
  103. data/lib/metasm/ppc/parse.rb +52 -0
  104. data/lib/metasm/preprocessor.rb +1277 -0
  105. data/lib/metasm/render.rb +130 -0
  106. data/lib/metasm/sh4.rb +8 -0
  107. data/lib/metasm/sh4/decode.rb +336 -0
  108. data/lib/metasm/sh4/main.rb +292 -0
  109. data/lib/metasm/sh4/opcodes.rb +381 -0
  110. data/lib/metasm/x86_64.rb +12 -0
  111. data/lib/metasm/x86_64/compile_c.rb +1025 -0
  112. data/lib/metasm/x86_64/debug.rb +59 -0
  113. data/lib/metasm/x86_64/decode.rb +268 -0
  114. data/lib/metasm/x86_64/encode.rb +264 -0
  115. data/lib/metasm/x86_64/main.rb +135 -0
  116. data/lib/metasm/x86_64/opcodes.rb +118 -0
  117. data/lib/metasm/x86_64/parse.rb +68 -0
  118. data/misc/bottleneck.rb +61 -0
  119. data/misc/cheader-findpppath.rb +58 -0
  120. data/misc/hexdiff.rb +74 -0
  121. data/misc/hexdump.rb +55 -0
  122. data/misc/metasm-all.rb +13 -0
  123. data/misc/objdiff.rb +47 -0
  124. data/misc/objscan.rb +40 -0
  125. data/misc/pdfparse.rb +661 -0
  126. data/misc/ppc_pdf2oplist.rb +192 -0
  127. data/misc/tcp_proxy_hex.rb +84 -0
  128. data/misc/txt2html.rb +440 -0
  129. data/samples/a.out.rb +31 -0
  130. data/samples/asmsyntax.rb +77 -0
  131. data/samples/bindiff.rb +555 -0
  132. data/samples/compilation-steps.rb +49 -0
  133. data/samples/cparser_makestackoffset.rb +55 -0
  134. data/samples/dasm-backtrack.rb +38 -0
  135. data/samples/dasmnavig.rb +318 -0
  136. data/samples/dbg-apihook.rb +228 -0
  137. data/samples/dbghelp.rb +143 -0
  138. data/samples/disassemble-gui.rb +102 -0
  139. data/samples/disassemble.rb +133 -0
  140. data/samples/dump_upx.rb +95 -0
  141. data/samples/dynamic_ruby.rb +1929 -0
  142. data/samples/elf_list_needed.rb +46 -0
  143. data/samples/elf_listexports.rb +33 -0
  144. data/samples/elfencode.rb +25 -0
  145. data/samples/exeencode.rb +128 -0
  146. data/samples/factorize-headers-elfimports.rb +77 -0
  147. data/samples/factorize-headers-peimports.rb +109 -0
  148. data/samples/factorize-headers.rb +43 -0
  149. data/samples/gdbclient.rb +583 -0
  150. data/samples/generate_libsigs.rb +102 -0
  151. data/samples/hotfix_gtk_dbg.rb +59 -0
  152. data/samples/install_win_env.rb +78 -0
  153. data/samples/lindebug.rb +924 -0
  154. data/samples/linux_injectsyscall.rb +95 -0
  155. data/samples/machoencode.rb +31 -0
  156. data/samples/metasm-shell.rb +91 -0
  157. data/samples/pe-hook.rb +69 -0
  158. data/samples/pe-ia32-cpuid.rb +203 -0
  159. data/samples/pe-mips.rb +35 -0
  160. data/samples/pe-shutdown.rb +78 -0
  161. data/samples/pe-testrelocs.rb +51 -0
  162. data/samples/pe-testrsrc.rb +24 -0
  163. data/samples/pe_listexports.rb +31 -0
  164. data/samples/peencode.rb +19 -0
  165. data/samples/peldr.rb +494 -0
  166. data/samples/preprocess-flatten.rb +19 -0
  167. data/samples/r0trace.rb +308 -0
  168. data/samples/rubstop.rb +399 -0
  169. data/samples/scan_pt_gnu_stack.rb +54 -0
  170. data/samples/scanpeexports.rb +62 -0
  171. data/samples/shellcode-c.rb +40 -0
  172. data/samples/shellcode-dynlink.rb +146 -0
  173. data/samples/source.asm +34 -0
  174. data/samples/struct_offset.rb +47 -0
  175. data/samples/testpe.rb +32 -0
  176. data/samples/testraw.rb +45 -0
  177. data/samples/win32genloader.rb +132 -0
  178. data/samples/win32hooker-advanced.rb +169 -0
  179. data/samples/win32hooker.rb +96 -0
  180. data/samples/win32livedasm.rb +33 -0
  181. data/samples/win32remotescan.rb +133 -0
  182. data/samples/wintrace.rb +92 -0
  183. data/tests/all.rb +8 -0
  184. data/tests/dasm.rb +39 -0
  185. data/tests/dynldr.rb +35 -0
  186. data/tests/encodeddata.rb +132 -0
  187. data/tests/ia32.rb +82 -0
  188. data/tests/mips.rb +116 -0
  189. data/tests/parse_c.rb +239 -0
  190. data/tests/preprocessor.rb +269 -0
  191. data/tests/x86_64.rb +62 -0
  192. metadata +255 -0
@@ -0,0 +1,564 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/ia32/main'
8
+
9
+ module Metasm
10
+ class Ia32
11
+ # temporarily setup dasm.address_binding so that backtracking
12
+ # stack-related offsets resolve in :frameptr (relative to func start)
13
+ def decompile_makestackvars(dasm, funcstart, blocks)
14
+ oldfuncbd = dasm.address_binding[funcstart]
15
+ dasm.address_binding[funcstart] = { :esp => :frameptr } # this would suffice, the rest here is just optimisation
16
+
17
+ patched_binding = [funcstart] # list of addresses to cleanup later
18
+ ebp_frame = true
19
+
20
+ # pretrace esp and ebp for each function block (cleared later)
21
+ # TODO with more than 1 unknown __stdcall ext func per path, esp -> unknown, which makes very ugly C (*esp-- = 12...); add heuristics ?
22
+ blocks.each { |block|
23
+ blockstart = block.address
24
+ if not dasm.address_binding[blockstart]
25
+ patched_binding << blockstart
26
+ dasm.address_binding[blockstart] = {}
27
+ foo = dasm.backtrace(:esp, blockstart, :snapshot_addr => funcstart)
28
+ if foo.length == 1 and ee = foo.first and ee.kind_of? Expression and (ee == Expression[:frameptr] or
29
+ (ee.lexpr == :frameptr and ee.op == :+ and ee.rexpr.kind_of? ::Integer))
30
+ dasm.address_binding[blockstart][:esp] = ee
31
+ end
32
+ if ebp_frame
33
+ foo = dasm.backtrace(:ebp, blockstart, :snapshot_addr => funcstart)
34
+ if foo.length == 1 and ee = foo.first and ee.kind_of? Expression and (ee == Expression[:frameptr] or
35
+ (ee.lexpr == :frameptr and ee.op == :+ and ee.rexpr.kind_of? ::Integer))
36
+ dasm.address_binding[blockstart][:ebp] = ee
37
+ else
38
+ ebp_frame = false # func does not use ebp as frame ptr, no need to bt for later blocks
39
+ end
40
+ end
41
+ end
42
+
43
+ yield block
44
+ }
45
+
46
+ ensure
47
+ patched_binding.each { |a| dasm.address_binding.delete a }
48
+ dasm.address_binding[funcstart] = oldfuncbd if oldfuncbd
49
+ end
50
+
51
+ # list variable dependency for each block, remove useless writes
52
+ # returns { blockaddr => [list of vars that are needed by a following block] }
53
+ def decompile_func_finddeps(dcmp, blocks, func)
54
+ deps_r = {} ; deps_w = {} ; deps_to = {}
55
+ deps_subfunc = {} # things read/written by subfuncs
56
+
57
+ # find read/writes by each block
58
+ blocks.each { |b, to|
59
+ deps_r[b] = [] ; deps_w[b] = [] ; deps_to[b] = to
60
+ deps_subfunc[b] = []
61
+
62
+ blk = dcmp.dasm.decoded[b].block
63
+ blk.list.each { |di|
64
+ a = di.backtrace_binding.values
65
+ w = []
66
+ di.backtrace_binding.keys.each { |k|
67
+ case k
68
+ when ::Symbol; w |= [k]
69
+ else a |= Expression[k].externals # if dword [eax] <- 42, eax is read
70
+ end
71
+ }
72
+ a << :eax if di.opcode.name == 'ret' and (not func.type.kind_of? C::BaseType or func.type.type.name != :void) # standard ABI
73
+
74
+ deps_r[b] |= a.map { |ee| Expression[ee].externals.grep(::Symbol) }.flatten - [:unknown] - deps_w[b]
75
+ deps_w[b] |= w.map { |ee| Expression[ee].externals.grep(::Symbol) }.flatten - [:unknown]
76
+ }
77
+ stackoff = nil
78
+ blk.each_to_normal { |t|
79
+ t = dcmp.backtrace_target(t, blk.list.last.address)
80
+ next if not t = dcmp.c_parser.toplevel.symbol[t]
81
+ t.type = C::Function.new(C::BaseType.new(:int)) if not t.type.kind_of? C::Function # XXX this may seem a bit extreme, and yes, it is.
82
+ stackoff ||= Expression[dcmp.dasm.backtrace(:esp, blk.list.last.address, :snapshot_addr => blocks.first[0]).first, :-, :esp].reduce
83
+
84
+ # things that are needed by the subfunction
85
+ if t.has_attribute('fastcall')
86
+ a = t.type.args.to_a
87
+ dep = [:ecx, :edx]
88
+ dep.shift if not a[0] or a[0].has_attribute('unused')
89
+ dep.pop if not a[1] or a[1].has_attribute('unused')
90
+ deps_subfunc[b] |= dep
91
+ end
92
+ t.type.args.to_a.each { |arg|
93
+ if reg = arg.has_attribute('register')
94
+ deps_subfunc[b] |= [reg.to_sym]
95
+ end
96
+ }
97
+ }
98
+ if stackoff # last block instr == subfunction call
99
+ deps_r[b] |= deps_subfunc[b] - deps_w[b]
100
+ deps_w[b] |= [:eax, :ecx, :edx] # standard ABI
101
+ end
102
+ }
103
+
104
+
105
+ bt = blocks.transpose
106
+ roots = bt[0] - bt[1].flatten # XXX jmp 1stblock ?
107
+
108
+ # find regs read and never written (must have been set by caller and are part of the func ABI)
109
+ uninitialized = lambda { |b, r, done|
110
+ if not deps_r[b]
111
+ elsif deps_r[b].include?(r)
112
+ blk = dcmp.dasm.decoded[b].block
113
+ bw = []
114
+ rdi = blk.list.find { |di|
115
+ a = di.backtrace_binding.values
116
+ w = []
117
+ di.backtrace_binding.keys.each { |k|
118
+ case k
119
+ when ::Symbol; w |= [k]
120
+ else a |= Expression[k].externals # if dword [eax] <- 42, eax is read
121
+ end
122
+ }
123
+ a << :eax if di.opcode.name == 'ret' and (not func.type.kind_of? C::BaseType or func.type.type.name != :void) # standard ABI
124
+
125
+ next true if (a.map { |ee| Expression[ee].externals.grep(::Symbol) }.flatten - [:unknown] - bw).include? r
126
+ bw |= w.map { |ee| Expression[ee].externals.grep(::Symbol) }.flatten - [:unknown]
127
+ false
128
+ }
129
+ if r == :eax and (rdi || blk.list.last).opcode.name == 'ret'
130
+ func.type.type = C::BaseType.new(:void)
131
+ false
132
+ elsif rdi and rdi.backtrace_binding[r]
133
+ false # mov al, 42 ; ret -> don't regarg eax
134
+ else
135
+ true
136
+ end
137
+ elsif deps_w[b].include?(r)
138
+ else
139
+ done << b
140
+ (deps_to[b] - done).find { |tb| uninitialized[tb, r, done] }
141
+ end
142
+ }
143
+
144
+ regargs = []
145
+ register_symbols.each { |r|
146
+ if roots.find { |root| uninitialized[root, r, []] }
147
+ regargs << r
148
+ end
149
+ }
150
+
151
+ # TODO honor user-defined prototype if available (eg no, really, eax is not read in this function returning al)
152
+ regargs.sort_by { |r| r.to_s }.each { |r|
153
+ a = C::Variable.new(r.to_s, C::BaseType.new(:int, :unsigned))
154
+ a.add_attribute("register(#{r})")
155
+ func.type.args << a
156
+ }
157
+
158
+ # remove writes from a block if no following block read the value
159
+ dw = {}
160
+ deps_w.each { |b, deps|
161
+ dw[b] = deps.reject { |dep|
162
+ ret = true
163
+ done = []
164
+ todo = deps_to[b].dup
165
+ while a = todo.pop
166
+ next if done.include? a
167
+ done << a
168
+ if not deps_r[a] or deps_r[a].include? dep
169
+ ret = false
170
+ break
171
+ elsif not deps_w[a].include? dep
172
+ todo.concat deps_to[a]
173
+ end
174
+ end
175
+ ret
176
+ }
177
+ }
178
+
179
+ dw
180
+ end
181
+
182
+ def decompile_blocks(dcmp, myblocks, deps, func, nextaddr = nil)
183
+ scope = func.initializer
184
+ func.type.args.each { |a| scope.symbol[a.name] = a }
185
+ stmts = scope.statements
186
+ blocks_toclean = myblocks.dup
187
+ func_entry = myblocks.first[0]
188
+ until myblocks.empty?
189
+ b, to = myblocks.shift
190
+ if l = dcmp.dasm.get_label_at(b)
191
+ stmts << C::Label.new(l)
192
+ end
193
+
194
+ # list of assignments [[dest reg, expr assigned]]
195
+ ops = []
196
+ # reg binding (reg => value, values.externals = regs at block start)
197
+ binding = {}
198
+ # Expr => CExpr
199
+ ce = lambda { |*e| dcmp.decompile_cexpr(Expression[Expression[*e].reduce], scope) }
200
+ # Expr => Expr.bind(binding) => CExpr
201
+ ceb = lambda { |*e| ce[Expression[*e].bind(binding)] }
202
+
203
+ # dumps a CExprs that implements an assignment to a reg (uses ops[], patches op => [reg, nil])
204
+ commit = lambda {
205
+ deps[b].map { |k|
206
+ [k, ops.rindex(ops.reverse.find { |r, v| r == k })]
207
+ }.sort_by { |k, i| i.to_i }.each { |k, i|
208
+ next if not i or not binding[k]
209
+ e = k
210
+ final = []
211
+ ops[0..i].reverse_each { |r, v|
212
+ final << r if not v
213
+ e = Expression[e].bind(r => v).reduce if not final.include? r
214
+ }
215
+ ops[i][1] = nil
216
+ binding.delete k
217
+ stmts << ce[k, :'=', e] if k != e
218
+ }
219
+ }
220
+
221
+ # returns an array to use as funcall arguments
222
+ get_func_args = lambda { |di, f|
223
+ # XXX see remarks in #finddeps
224
+ bt = dcmp.dasm.backtrace(:esp, di.address, :snapshot_addr => func_entry, :include_start => true)
225
+ stackoff = Expression[[bt, :+, @size/8], :-, :esp].bind(:esp => :frameptr).reduce rescue nil
226
+ args_todo = f.type.args.to_a.dup
227
+ args = []
228
+ if f.has_attribute('fastcall') # XXX DRY
229
+ if a = args_todo.shift
230
+ mask = (1 << (8*dcmp.c_parser.sizeof(a))) - 1
231
+ mask = 0 if a.has_attribute('unused')
232
+ args << Expression[:ecx, :&, mask]
233
+ end
234
+ if a = args_todo.shift
235
+ mask = (1 << (8*dcmp.c_parser.sizeof(a))) - 1 # char => dl
236
+ mask = 0 if a.has_attribute('unused')
237
+ args << Expression[:edx, :&, mask]
238
+ end
239
+ end
240
+ args_todo.each { |a_|
241
+ if r = a_.has_attribute_var('register')
242
+ args << Expression[r.to_sym]
243
+ elsif stackoff.kind_of? Integer
244
+ args << Indirection[[:frameptr, :+, stackoff], @size/8]
245
+ stackoff += [dcmp.sizeof(a_), @size/8].max
246
+ else
247
+ args << Expression[0]
248
+ end
249
+ }
250
+
251
+ if f.type.varargs and f.type.args.last.type.pointer? and stackoff.kind_of? Integer
252
+ # check if last arg is a fmtstring
253
+ bt = dcmp.dasm.backtrace(args.last, di.address, :snapshot_addr => func_entry, :include_start => true)
254
+ if bt.length == 1 and s = dcmp.dasm.get_section_at(bt.first)
255
+ fmt = s[0].read(512)
256
+ fmt = fmt.unpack('v*').pack('C*') if dcmp.sizeof(f.type.args.last.type.untypedef.type) == 2
257
+ if fmt.index(?\0)
258
+ fmt = fmt[0...fmt.index(?\0)]
259
+ fmt.gsub('%%', '').count('%').times { # XXX %.*s etc..
260
+ args << Indirection[[:frameptr, :+, stackoff], @size/8]
261
+ stackoff += @size/8
262
+ }
263
+ end
264
+ end
265
+ end
266
+
267
+ args.map { |e| ceb[e] }
268
+ }
269
+
270
+ # go !
271
+ dcmp.dasm.decoded[b].block.list.each_with_index { |di, didx|
272
+ a = di.instruction.args
273
+ if di.opcode.props[:setip] and not di.opcode.props[:stopexec]
274
+ # conditional jump
275
+ commit[]
276
+ n = dcmp.backtrace_target(get_xrefs_x(dcmp.dasm, di).first, di.address)
277
+ if di.opcode.name =~ /^loop(.+)?/
278
+ cx = C::CExpression[:'--', ceb[:ecx]]
279
+ cc = $1 ? C::CExpression[cx, :'&&', ceb[decode_cc_to_expr($1)]] : cx
280
+ else
281
+ cc = ceb[decode_cc_to_expr(di.opcode.name[1..-1])]
282
+ end
283
+ # XXX switch/indirect/multiple jmp
284
+ stmts << C::If.new(C::CExpression[cc], C::Goto.new(n))
285
+ to.delete dcmp.dasm.normalize(n)
286
+ next
287
+ end
288
+
289
+ if di.opcode.name == 'mov'
290
+ # mov cr0 etc
291
+ a1, a2 = di.instruction.args
292
+ case a1
293
+ when Ia32::CtrlReg, Ia32::DbgReg, Ia32::SegReg
294
+ sz = a1.kind_of?(Ia32::SegReg) ? 16 : 32
295
+ if not dcmp.c_parser.toplevel.symbol["intrinsic_set_#{a1}"]
296
+ dcmp.c_parser.parse("void intrinsic_set_#{a1}(__int#{sz});")
297
+ end
298
+ f = dcmp.c_parser.toplevel.symbol["intrinsic_set_#{a1}"]
299
+ a2 = a2.symbolic(di)
300
+ a2 = [a2, :&, 0xffff] if sz == 16
301
+ stmts << C::CExpression.new(f, :funcall, [ceb[a2]], f.type.type)
302
+ next
303
+ end
304
+ case a2
305
+ when Ia32::CtrlReg, Ia32::DbgReg, Ia32::SegReg
306
+ if not dcmp.c_parser.toplevel.symbol["intrinsic_get_#{a2}"]
307
+ sz = a2.kind_of?(Ia32::SegReg) ? 16 : 32
308
+ dcmp.c_parser.parse("__int#{sz} intrinsic_get_#{a2}(void);")
309
+ end
310
+ f = dcmp.c_parser.toplevel.symbol["intrinsic_get_#{a2}"]
311
+ t = f.type.type
312
+ binding.delete a1.symbolic(di)
313
+ stmts << C::CExpression.new(ce[a1.symbolic(di)], :'=', C::CExpression.new(f, :funcall, [], t), t)
314
+ next
315
+ end
316
+ end
317
+
318
+ case di.opcode.name
319
+ when 'ret'
320
+ commit[]
321
+ ret = nil
322
+ ret = C::CExpression[ceb[:eax]] unless func.type.type.kind_of? C::BaseType and func.type.type.name == :void
323
+ stmts << C::Return.new(ret)
324
+ when 'call' # :saveip
325
+ n = dcmp.backtrace_target(get_xrefs_x(dcmp.dasm, di).first, di.address)
326
+ args = []
327
+ if f = dcmp.c_parser.toplevel.symbol[n] and f.type.kind_of? C::Function and f.type.args
328
+ args = get_func_args[di, f]
329
+ elsif defined? @dasm_func_default_off and o = @dasm_func_default_off[[dcmp.dasm, di.address]] and o.kind_of? Integer and o > @size/8
330
+ f = C::Variable.new
331
+ f.type = C::Function.new(C::BaseType.new(:int), [])
332
+ ((o/(@size/8))-1).times { f.type.args << C::Variable.new(nil,C::BaseType.new(:int)) }
333
+ args = get_func_args[di, f]
334
+ end
335
+ commit[]
336
+ #next if not di.block.to_subfuncret
337
+
338
+ if not n.kind_of? ::String or (f and not f.type.kind_of? C::Function)
339
+ # indirect funcall
340
+ fptr = ceb[n]
341
+ binding.delete n
342
+ proto = C::Function.new(C::BaseType.new(:int))
343
+ proto = f.type if f and f.type.kind_of? C::Function
344
+ f = C::CExpression[[fptr], C::Pointer.new(proto)]
345
+ elsif not f
346
+ # internal functions are predeclared, so this one is extern
347
+ f = C::Variable.new
348
+ f.name = n
349
+ f.type = C::Function.new(C::BaseType.new(:int))
350
+ if dcmp.recurse > 0
351
+ dcmp.c_parser.toplevel.symbol[n] = f
352
+ dcmp.c_parser.toplevel.statements << C::Declaration.new(f)
353
+ end
354
+ end
355
+ commit[]
356
+ binding.delete :eax
357
+ e = C::CExpression[f, :funcall, args]
358
+ e = C::CExpression[ce[:eax], :'=', e, f.type.type] if deps[b].include? :eax and f.type.type != C::BaseType.new(:void)
359
+ stmts << e
360
+ when 'jmp'
361
+ #if di.comment.to_a.include? 'switch'
362
+ # n = di.instruction.args.first.symbolic(di)
363
+ # fptr = ceb[n]
364
+ # binding.delete n
365
+ # commit[]
366
+ # sw = C::Switch.new(fptr, C::Block.new(scope))
367
+ # di.block.to_normal.to_a.each { |addr|
368
+ # addr = dcmp.dasm.normalize addr
369
+ # to.delete addr
370
+ # next if not l = dcmp.dasm.get_label_at(addr)
371
+ # sw.body.statements << C::Goto.new(l)
372
+ # }
373
+ # stmts << sw
374
+ a = di.instruction.args.first
375
+ if a.kind_of? Expression
376
+ elsif not a.respond_to? :symbolic
377
+ stmts << C::Asm.new(di.instruction.to_s, nil, [], [], nil, nil)
378
+ else
379
+ n = di.instruction.args.first.symbolic(di)
380
+ fptr = ceb[n]
381
+ binding.delete n
382
+ commit[]
383
+ if fptr.kind_of? C::CExpression and fptr.type.pointer? and fptr.type.untypedef.type.kind_of? C::Function
384
+ proto = fptr.type.untypedef.type
385
+ args = get_func_args[di, fptr.type]
386
+ else
387
+ proto = C::Function.new(C::BaseType.new(:void))
388
+ fptr = C::CExpression[[fptr], C::Pointer.new(proto)]
389
+ args = []
390
+ end
391
+ ret = C::Return.new(C::CExpression[fptr, :funcall, args])
392
+ class << ret ; attr_accessor :from_instr end
393
+ ret.from_instr = di
394
+ stmts << ret
395
+ to = []
396
+ end
397
+ when 'lgdt'
398
+ if not dcmp.c_parser.toplevel.struct['segment_descriptor']
399
+ dcmp.c_parser.parse('struct segment_descriptor { __int16 limit; __int16 base0_16; __int8 base16_24; __int8 flags1; __int8 flags2_limit_16_20; __int8 base24_32; };')
400
+ dcmp.c_parser.parse('struct segment_table { __int16 size; struct segment_descriptor *table; } __attribute__((pack(2)));')
401
+ end
402
+ if not dcmp.c_parser.toplevel.symbol['intrinsic_lgdt']
403
+ dcmp.c_parser.parse('void intrinsic_lgdt(struct segment_table *);')
404
+ end
405
+ # need a way to transform arg => :frameptr+12
406
+ arg = di.backtrace_binding.keys.grep(Indirection).first.pointer
407
+ stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol['intrinsic_lgdt'], :funcall, [ceb[arg]], C::BaseType.new(:void))
408
+ when 'lidt'
409
+ if not dcmp.c_parser.toplevel.struct['interrupt_descriptor']
410
+ dcmp.c_parser.parse('struct interrupt_descriptor { __int16 offset0_16; __int16 segment; __int16 flags; __int16 offset16_32; };')
411
+ dcmp.c_parser.parse('struct interrupt_table { __int16 size; struct interrupt_descriptor *table; } __attribute__((pack(2)));')
412
+ end
413
+ if not dcmp.c_parser.toplevel.symbol['intrinsic_lidt']
414
+ dcmp.c_parser.parse('void intrinsic_lidt(struct interrupt_table *);')
415
+ end
416
+ arg = di.backtrace_binding.keys.grep(Indirection).first.pointer
417
+ stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol['intrinsic_lidt'], :funcall, [ceb[arg]], C::BaseType.new(:void))
418
+ when 'ltr', 'lldt'
419
+ if not dcmp.c_parser.toplevel.symbol["intrinsic_#{di.opcode.name}"]
420
+ dcmp.c_parser.parse("void intrinsic_#{di.opcode.name}(int);")
421
+ end
422
+ arg = di.backtrace_binding.keys.first
423
+ stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol["intrinsic_#{di.opcode.name}"], :funcall, [ceb[arg]], C::BaseType.new(:void))
424
+ when 'out'
425
+ sz = di.instruction.args.find { |a_| a_.kind_of? Ia32::Reg and a_.val == 0 }.sz
426
+ if not dcmp.c_parser.toplevel.symbol["intrinsic_out#{sz}"]
427
+ dcmp.c_parser.parse("void intrinsic_out#{sz}(unsigned short port, __int#{sz} value);")
428
+ end
429
+ port = di.instruction.args.grep(Expression).first || :edx
430
+ stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol["intrinsic_out#{sz}"], :funcall, [ceb[port], ceb[:eax]], C::BaseType.new(:void))
431
+ when 'in'
432
+ sz = di.instruction.args.find { |a_| a_.kind_of? Ia32::Reg and a_.val == 0 }.sz
433
+ if not dcmp.c_parser.toplevel.symbol["intrinsic_in#{sz}"]
434
+ dcmp.c_parser.parse("__int#{sz} intrinsic_in#{sz}(unsigned short port);")
435
+ end
436
+ port = di.instruction.args.grep(Expression).first || :edx
437
+ f = dcmp.c_parser.toplevel.symbol["intrinsic_in#{sz}"]
438
+ binding.delete :eax
439
+ stmts << C::CExpression.new(ce[:eax], :'=', C::CExpression.new(f, :funcall, [ceb[port]], f.type.type), f.type.type)
440
+ when 'sti', 'cli'
441
+ stmts << C::Asm.new(di.instruction.to_s, nil, [], [], nil, nil)
442
+ when /^(mov|sto|lod)s([bwdq])/
443
+ op, sz = $1, $2
444
+ commit[]
445
+ sz = { 'b' => 1, 'w' => 2, 'd' => 4, 'q' => 8 }[sz]
446
+ pt = C::Pointer.new(C::BaseType.new("__int#{sz*8}".to_sym))
447
+
448
+ blk = C::Block.new(scope)
449
+ case op
450
+ when 'mov'
451
+ blk.statements << C::CExpression[[:*, [[ceb[:edi]], pt]], :'=', [:*, [[ceb[:esi]], pt]]]
452
+ blk.statements << C::CExpression[ceb[:edi], :'=', [ceb[:edi], :+, [sz]]]
453
+ blk.statements << C::CExpression[ceb[:esi], :'=', [ceb[:esi], :+, [sz]]]
454
+ when 'sto'
455
+ blk.statements << C::CExpression[[:*, [[ceb[:edi]], pt]], :'=', ceb[:eax]]
456
+ blk.statements << C::CExpression[ceb[:edi], :'=', [ceb[:edi], :+, [sz]]]
457
+ when 'lod'
458
+ blk.statements << C::CExpression[ceb[:eax], :'=', [:*, [[ceb[:esi]], pt]]]
459
+ blk.statements << C::CExpression[ceb[:esi], :'=', [ceb[:esi], :+, [sz]]]
460
+ #when 'sca'
461
+ #when 'cmp'
462
+ end
463
+
464
+ case (di.instruction.prefix || {})[:rep]
465
+ when nil
466
+ stmts.concat blk.statements
467
+ when 'rep'
468
+ blk.statements << C::CExpression[ceb[:ecx], :'=', [ceb[:ecx], :-, [1]]]
469
+ stmts << C::While.new(C::CExpression[ceb[:ecx]], blk)
470
+ #when 'repz' # sca/cmp only
471
+ #when 'repnz'
472
+ end
473
+ next
474
+ else
475
+ bd = get_fwdemu_binding(di)
476
+ if di.backtrace_binding[:incomplete_binding]
477
+ commit[]
478
+ stmts << C::Asm.new(di.instruction.to_s, nil, nil, nil, nil, nil)
479
+ else
480
+ update = {}
481
+ bd.each { |k, v|
482
+ if k.kind_of? ::Symbol and not deps[b].include? k
483
+ ops << [k, v]
484
+ update[k] = Expression[Expression[v].bind(binding).reduce]
485
+ else
486
+ stmts << ceb[k, :'=', v]
487
+ stmts.pop if stmts.last.kind_of? C::Variable # [:eflag_s, :=, :unknown].reduce
488
+ end
489
+ }
490
+ binding.update update
491
+ end
492
+ end
493
+ }
494
+ commit[]
495
+
496
+ case to.length
497
+ when 0
498
+ if not myblocks.empty? and not %w[ret jmp].include? dcmp.dasm.decoded[b].block.list.last.instruction.opname
499
+ puts " block #{Expression[b]} has no to and don't end in ret"
500
+ end
501
+ when 1
502
+ if (myblocks.empty? ? nextaddr != to[0] : myblocks.first.first != to[0])
503
+ stmts << C::Goto.new(dcmp.dasm.auto_label_at(to[0], 'unknown_goto'))
504
+ end
505
+ else
506
+ puts " block #{Expression[b]} with multiple to"
507
+ end
508
+ end
509
+
510
+ # cleanup di.bt_binding (we set :frameptr etc in those, this may confuse the dasm)
511
+ blocks_toclean.each { |b_, to_|
512
+ dcmp.dasm.decoded[b_].block.list.each { |di|
513
+ di.backtrace_binding = nil
514
+ }
515
+ }
516
+ end
517
+
518
+ def decompile_check_abi(dcmp, entry, func)
519
+ a = func.type.args || []
520
+ a.delete_if { |arg| arg.has_attribute_var('register') and arg.has_attribute('unused') }
521
+ ra = a.map { |arg| arg.has_attribute_var('register') }.compact
522
+ if (a.length == 1 and ra == ['ecx']) or (a.length >= 2 and ra.sort == ['ecx', 'edx'])
523
+ func.add_attribute 'fastcall'
524
+ # reorder args
525
+ ecx = a.find { |arg| arg.has_attribute_var('register') == 'ecx' }
526
+ edx = a.find { |arg| arg.has_attribute_var('register') == 'edx' }
527
+ a.insert(0, a.delete(ecx))
528
+ a.insert(1, a.delete(edx)) if edx
529
+ end
530
+
531
+ if not f = dcmp.dasm.function[entry] or not f.return_address
532
+ #func.add_attribute 'noreturn'
533
+ else
534
+ adj = f.return_address.map { |ra_| dcmp.dasm.backtrace(:esp, ra_, :include_start => true, :stopaddr => entry) }.flatten.uniq
535
+ if adj.length == 1 and so = Expression[adj.first, :-, :esp].reduce and so.kind_of? ::Integer
536
+ argsz = a.map { |fa|
537
+ next if not fa.stackoff
538
+ (fa.stackoff + [dcmp.sizeof(fa), dcmp.c_parser.typesize[:ptr]].max-1) / dcmp.c_parser.typesize[:ptr]
539
+ }.compact.max.to_i
540
+ so /= dcmp.dasm.cpu.size/8
541
+ so -= 1
542
+ if so > argsz
543
+ aso = a.empty? ? 0 : a.last.stackoff.to_i + dcmp.c_parser.typesize[:ptr]
544
+ (so-argsz).times {
545
+ a << C::Variable.new(dcmp.stackoff_to_varname(aso), C::BaseType.new(:int))
546
+ a.last.add_attribute('unused')
547
+ aso += dcmp.sizeof(a.last)
548
+ }
549
+ argsz = so
550
+ end
551
+ case so
552
+ when 0
553
+ when argsz
554
+ func.add_attribute 'stdcall' if not func.has_attribute('fastcall')
555
+ else
556
+ func.add_attribute "stackoff:#{so*dcmp.dasm.cpu.size/8}"
557
+ end
558
+ else
559
+ func.add_attribute "breakstack:#{adj.inspect}"
560
+ end
561
+ end
562
+ end
563
+ end
564
+ end