metasm 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. data/BUGS +11 -0
  2. data/CREDITS +17 -0
  3. data/README +270 -0
  4. data/TODO +114 -0
  5. data/doc/code_organisation.txt +146 -0
  6. data/doc/const_missing.txt +16 -0
  7. data/doc/core_classes.txt +75 -0
  8. data/doc/feature_list.txt +53 -0
  9. data/doc/index.txt +59 -0
  10. data/doc/install_notes.txt +170 -0
  11. data/doc/style.css +3 -0
  12. data/doc/use_cases.txt +18 -0
  13. data/lib/metasm.rb +80 -0
  14. data/lib/metasm/arm.rb +12 -0
  15. data/lib/metasm/arm/debug.rb +39 -0
  16. data/lib/metasm/arm/decode.rb +167 -0
  17. data/lib/metasm/arm/encode.rb +77 -0
  18. data/lib/metasm/arm/main.rb +75 -0
  19. data/lib/metasm/arm/opcodes.rb +177 -0
  20. data/lib/metasm/arm/parse.rb +130 -0
  21. data/lib/metasm/arm/render.rb +55 -0
  22. data/lib/metasm/compile_c.rb +1457 -0
  23. data/lib/metasm/dalvik.rb +8 -0
  24. data/lib/metasm/dalvik/decode.rb +196 -0
  25. data/lib/metasm/dalvik/main.rb +60 -0
  26. data/lib/metasm/dalvik/opcodes.rb +366 -0
  27. data/lib/metasm/decode.rb +213 -0
  28. data/lib/metasm/decompile.rb +2659 -0
  29. data/lib/metasm/disassemble.rb +2068 -0
  30. data/lib/metasm/disassemble_api.rb +1280 -0
  31. data/lib/metasm/dynldr.rb +1329 -0
  32. data/lib/metasm/encode.rb +333 -0
  33. data/lib/metasm/exe_format/a_out.rb +194 -0
  34. data/lib/metasm/exe_format/autoexe.rb +82 -0
  35. data/lib/metasm/exe_format/bflt.rb +189 -0
  36. data/lib/metasm/exe_format/coff.rb +455 -0
  37. data/lib/metasm/exe_format/coff_decode.rb +901 -0
  38. data/lib/metasm/exe_format/coff_encode.rb +1078 -0
  39. data/lib/metasm/exe_format/dex.rb +457 -0
  40. data/lib/metasm/exe_format/dol.rb +145 -0
  41. data/lib/metasm/exe_format/elf.rb +923 -0
  42. data/lib/metasm/exe_format/elf_decode.rb +979 -0
  43. data/lib/metasm/exe_format/elf_encode.rb +1375 -0
  44. data/lib/metasm/exe_format/macho.rb +827 -0
  45. data/lib/metasm/exe_format/main.rb +228 -0
  46. data/lib/metasm/exe_format/mz.rb +164 -0
  47. data/lib/metasm/exe_format/nds.rb +172 -0
  48. data/lib/metasm/exe_format/pe.rb +437 -0
  49. data/lib/metasm/exe_format/serialstruct.rb +246 -0
  50. data/lib/metasm/exe_format/shellcode.rb +114 -0
  51. data/lib/metasm/exe_format/xcoff.rb +167 -0
  52. data/lib/metasm/gui.rb +23 -0
  53. data/lib/metasm/gui/cstruct.rb +373 -0
  54. data/lib/metasm/gui/dasm_coverage.rb +199 -0
  55. data/lib/metasm/gui/dasm_decomp.rb +369 -0
  56. data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
  57. data/lib/metasm/gui/dasm_graph.rb +1354 -0
  58. data/lib/metasm/gui/dasm_hex.rb +543 -0
  59. data/lib/metasm/gui/dasm_listing.rb +599 -0
  60. data/lib/metasm/gui/dasm_main.rb +906 -0
  61. data/lib/metasm/gui/dasm_opcodes.rb +291 -0
  62. data/lib/metasm/gui/debug.rb +1228 -0
  63. data/lib/metasm/gui/gtk.rb +884 -0
  64. data/lib/metasm/gui/qt.rb +495 -0
  65. data/lib/metasm/gui/win32.rb +3004 -0
  66. data/lib/metasm/gui/x11.rb +621 -0
  67. data/lib/metasm/ia32.rb +14 -0
  68. data/lib/metasm/ia32/compile_c.rb +1523 -0
  69. data/lib/metasm/ia32/debug.rb +193 -0
  70. data/lib/metasm/ia32/decode.rb +1167 -0
  71. data/lib/metasm/ia32/decompile.rb +564 -0
  72. data/lib/metasm/ia32/encode.rb +314 -0
  73. data/lib/metasm/ia32/main.rb +233 -0
  74. data/lib/metasm/ia32/opcodes.rb +872 -0
  75. data/lib/metasm/ia32/parse.rb +327 -0
  76. data/lib/metasm/ia32/render.rb +91 -0
  77. data/lib/metasm/main.rb +1193 -0
  78. data/lib/metasm/mips.rb +11 -0
  79. data/lib/metasm/mips/compile_c.rb +7 -0
  80. data/lib/metasm/mips/decode.rb +253 -0
  81. data/lib/metasm/mips/encode.rb +51 -0
  82. data/lib/metasm/mips/main.rb +72 -0
  83. data/lib/metasm/mips/opcodes.rb +443 -0
  84. data/lib/metasm/mips/parse.rb +51 -0
  85. data/lib/metasm/mips/render.rb +43 -0
  86. data/lib/metasm/os/gnu_exports.rb +270 -0
  87. data/lib/metasm/os/linux.rb +1112 -0
  88. data/lib/metasm/os/main.rb +1686 -0
  89. data/lib/metasm/os/remote.rb +527 -0
  90. data/lib/metasm/os/windows.rb +2027 -0
  91. data/lib/metasm/os/windows_exports.rb +745 -0
  92. data/lib/metasm/parse.rb +876 -0
  93. data/lib/metasm/parse_c.rb +3938 -0
  94. data/lib/metasm/pic16c/decode.rb +42 -0
  95. data/lib/metasm/pic16c/main.rb +17 -0
  96. data/lib/metasm/pic16c/opcodes.rb +68 -0
  97. data/lib/metasm/ppc.rb +11 -0
  98. data/lib/metasm/ppc/decode.rb +264 -0
  99. data/lib/metasm/ppc/decompile.rb +251 -0
  100. data/lib/metasm/ppc/encode.rb +51 -0
  101. data/lib/metasm/ppc/main.rb +129 -0
  102. data/lib/metasm/ppc/opcodes.rb +410 -0
  103. data/lib/metasm/ppc/parse.rb +52 -0
  104. data/lib/metasm/preprocessor.rb +1277 -0
  105. data/lib/metasm/render.rb +130 -0
  106. data/lib/metasm/sh4.rb +8 -0
  107. data/lib/metasm/sh4/decode.rb +336 -0
  108. data/lib/metasm/sh4/main.rb +292 -0
  109. data/lib/metasm/sh4/opcodes.rb +381 -0
  110. data/lib/metasm/x86_64.rb +12 -0
  111. data/lib/metasm/x86_64/compile_c.rb +1025 -0
  112. data/lib/metasm/x86_64/debug.rb +59 -0
  113. data/lib/metasm/x86_64/decode.rb +268 -0
  114. data/lib/metasm/x86_64/encode.rb +264 -0
  115. data/lib/metasm/x86_64/main.rb +135 -0
  116. data/lib/metasm/x86_64/opcodes.rb +118 -0
  117. data/lib/metasm/x86_64/parse.rb +68 -0
  118. data/misc/bottleneck.rb +61 -0
  119. data/misc/cheader-findpppath.rb +58 -0
  120. data/misc/hexdiff.rb +74 -0
  121. data/misc/hexdump.rb +55 -0
  122. data/misc/metasm-all.rb +13 -0
  123. data/misc/objdiff.rb +47 -0
  124. data/misc/objscan.rb +40 -0
  125. data/misc/pdfparse.rb +661 -0
  126. data/misc/ppc_pdf2oplist.rb +192 -0
  127. data/misc/tcp_proxy_hex.rb +84 -0
  128. data/misc/txt2html.rb +440 -0
  129. data/samples/a.out.rb +31 -0
  130. data/samples/asmsyntax.rb +77 -0
  131. data/samples/bindiff.rb +555 -0
  132. data/samples/compilation-steps.rb +49 -0
  133. data/samples/cparser_makestackoffset.rb +55 -0
  134. data/samples/dasm-backtrack.rb +38 -0
  135. data/samples/dasmnavig.rb +318 -0
  136. data/samples/dbg-apihook.rb +228 -0
  137. data/samples/dbghelp.rb +143 -0
  138. data/samples/disassemble-gui.rb +102 -0
  139. data/samples/disassemble.rb +133 -0
  140. data/samples/dump_upx.rb +95 -0
  141. data/samples/dynamic_ruby.rb +1929 -0
  142. data/samples/elf_list_needed.rb +46 -0
  143. data/samples/elf_listexports.rb +33 -0
  144. data/samples/elfencode.rb +25 -0
  145. data/samples/exeencode.rb +128 -0
  146. data/samples/factorize-headers-elfimports.rb +77 -0
  147. data/samples/factorize-headers-peimports.rb +109 -0
  148. data/samples/factorize-headers.rb +43 -0
  149. data/samples/gdbclient.rb +583 -0
  150. data/samples/generate_libsigs.rb +102 -0
  151. data/samples/hotfix_gtk_dbg.rb +59 -0
  152. data/samples/install_win_env.rb +78 -0
  153. data/samples/lindebug.rb +924 -0
  154. data/samples/linux_injectsyscall.rb +95 -0
  155. data/samples/machoencode.rb +31 -0
  156. data/samples/metasm-shell.rb +91 -0
  157. data/samples/pe-hook.rb +69 -0
  158. data/samples/pe-ia32-cpuid.rb +203 -0
  159. data/samples/pe-mips.rb +35 -0
  160. data/samples/pe-shutdown.rb +78 -0
  161. data/samples/pe-testrelocs.rb +51 -0
  162. data/samples/pe-testrsrc.rb +24 -0
  163. data/samples/pe_listexports.rb +31 -0
  164. data/samples/peencode.rb +19 -0
  165. data/samples/peldr.rb +494 -0
  166. data/samples/preprocess-flatten.rb +19 -0
  167. data/samples/r0trace.rb +308 -0
  168. data/samples/rubstop.rb +399 -0
  169. data/samples/scan_pt_gnu_stack.rb +54 -0
  170. data/samples/scanpeexports.rb +62 -0
  171. data/samples/shellcode-c.rb +40 -0
  172. data/samples/shellcode-dynlink.rb +146 -0
  173. data/samples/source.asm +34 -0
  174. data/samples/struct_offset.rb +47 -0
  175. data/samples/testpe.rb +32 -0
  176. data/samples/testraw.rb +45 -0
  177. data/samples/win32genloader.rb +132 -0
  178. data/samples/win32hooker-advanced.rb +169 -0
  179. data/samples/win32hooker.rb +96 -0
  180. data/samples/win32livedasm.rb +33 -0
  181. data/samples/win32remotescan.rb +133 -0
  182. data/samples/wintrace.rb +92 -0
  183. data/tests/all.rb +8 -0
  184. data/tests/dasm.rb +39 -0
  185. data/tests/dynldr.rb +35 -0
  186. data/tests/encodeddata.rb +132 -0
  187. data/tests/ia32.rb +82 -0
  188. data/tests/mips.rb +116 -0
  189. data/tests/parse_c.rb +239 -0
  190. data/tests/preprocessor.rb +269 -0
  191. data/tests/x86_64.rb +62 -0
  192. metadata +255 -0
@@ -0,0 +1,564 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/ia32/main'
8
+
9
+ module Metasm
10
+ class Ia32
11
+ # temporarily setup dasm.address_binding so that backtracking
12
+ # stack-related offsets resolve in :frameptr (relative to func start)
13
+ def decompile_makestackvars(dasm, funcstart, blocks)
14
+ oldfuncbd = dasm.address_binding[funcstart]
15
+ dasm.address_binding[funcstart] = { :esp => :frameptr } # this would suffice, the rest here is just optimisation
16
+
17
+ patched_binding = [funcstart] # list of addresses to cleanup later
18
+ ebp_frame = true
19
+
20
+ # pretrace esp and ebp for each function block (cleared later)
21
+ # TODO with more than 1 unknown __stdcall ext func per path, esp -> unknown, which makes very ugly C (*esp-- = 12...); add heuristics ?
22
+ blocks.each { |block|
23
+ blockstart = block.address
24
+ if not dasm.address_binding[blockstart]
25
+ patched_binding << blockstart
26
+ dasm.address_binding[blockstart] = {}
27
+ foo = dasm.backtrace(:esp, blockstart, :snapshot_addr => funcstart)
28
+ if foo.length == 1 and ee = foo.first and ee.kind_of? Expression and (ee == Expression[:frameptr] or
29
+ (ee.lexpr == :frameptr and ee.op == :+ and ee.rexpr.kind_of? ::Integer))
30
+ dasm.address_binding[blockstart][:esp] = ee
31
+ end
32
+ if ebp_frame
33
+ foo = dasm.backtrace(:ebp, blockstart, :snapshot_addr => funcstart)
34
+ if foo.length == 1 and ee = foo.first and ee.kind_of? Expression and (ee == Expression[:frameptr] or
35
+ (ee.lexpr == :frameptr and ee.op == :+ and ee.rexpr.kind_of? ::Integer))
36
+ dasm.address_binding[blockstart][:ebp] = ee
37
+ else
38
+ ebp_frame = false # func does not use ebp as frame ptr, no need to bt for later blocks
39
+ end
40
+ end
41
+ end
42
+
43
+ yield block
44
+ }
45
+
46
+ ensure
47
+ patched_binding.each { |a| dasm.address_binding.delete a }
48
+ dasm.address_binding[funcstart] = oldfuncbd if oldfuncbd
49
+ end
50
+
51
+ # list variable dependency for each block, remove useless writes
52
+ # returns { blockaddr => [list of vars that are needed by a following block] }
53
+ def decompile_func_finddeps(dcmp, blocks, func)
54
+ deps_r = {} ; deps_w = {} ; deps_to = {}
55
+ deps_subfunc = {} # things read/written by subfuncs
56
+
57
+ # find read/writes by each block
58
+ blocks.each { |b, to|
59
+ deps_r[b] = [] ; deps_w[b] = [] ; deps_to[b] = to
60
+ deps_subfunc[b] = []
61
+
62
+ blk = dcmp.dasm.decoded[b].block
63
+ blk.list.each { |di|
64
+ a = di.backtrace_binding.values
65
+ w = []
66
+ di.backtrace_binding.keys.each { |k|
67
+ case k
68
+ when ::Symbol; w |= [k]
69
+ else a |= Expression[k].externals # if dword [eax] <- 42, eax is read
70
+ end
71
+ }
72
+ a << :eax if di.opcode.name == 'ret' and (not func.type.kind_of? C::BaseType or func.type.type.name != :void) # standard ABI
73
+
74
+ deps_r[b] |= a.map { |ee| Expression[ee].externals.grep(::Symbol) }.flatten - [:unknown] - deps_w[b]
75
+ deps_w[b] |= w.map { |ee| Expression[ee].externals.grep(::Symbol) }.flatten - [:unknown]
76
+ }
77
+ stackoff = nil
78
+ blk.each_to_normal { |t|
79
+ t = dcmp.backtrace_target(t, blk.list.last.address)
80
+ next if not t = dcmp.c_parser.toplevel.symbol[t]
81
+ t.type = C::Function.new(C::BaseType.new(:int)) if not t.type.kind_of? C::Function # XXX this may seem a bit extreme, and yes, it is.
82
+ stackoff ||= Expression[dcmp.dasm.backtrace(:esp, blk.list.last.address, :snapshot_addr => blocks.first[0]).first, :-, :esp].reduce
83
+
84
+ # things that are needed by the subfunction
85
+ if t.has_attribute('fastcall')
86
+ a = t.type.args.to_a
87
+ dep = [:ecx, :edx]
88
+ dep.shift if not a[0] or a[0].has_attribute('unused')
89
+ dep.pop if not a[1] or a[1].has_attribute('unused')
90
+ deps_subfunc[b] |= dep
91
+ end
92
+ t.type.args.to_a.each { |arg|
93
+ if reg = arg.has_attribute('register')
94
+ deps_subfunc[b] |= [reg.to_sym]
95
+ end
96
+ }
97
+ }
98
+ if stackoff # last block instr == subfunction call
99
+ deps_r[b] |= deps_subfunc[b] - deps_w[b]
100
+ deps_w[b] |= [:eax, :ecx, :edx] # standard ABI
101
+ end
102
+ }
103
+
104
+
105
+ bt = blocks.transpose
106
+ roots = bt[0] - bt[1].flatten # XXX jmp 1stblock ?
107
+
108
+ # find regs read and never written (must have been set by caller and are part of the func ABI)
109
+ uninitialized = lambda { |b, r, done|
110
+ if not deps_r[b]
111
+ elsif deps_r[b].include?(r)
112
+ blk = dcmp.dasm.decoded[b].block
113
+ bw = []
114
+ rdi = blk.list.find { |di|
115
+ a = di.backtrace_binding.values
116
+ w = []
117
+ di.backtrace_binding.keys.each { |k|
118
+ case k
119
+ when ::Symbol; w |= [k]
120
+ else a |= Expression[k].externals # if dword [eax] <- 42, eax is read
121
+ end
122
+ }
123
+ a << :eax if di.opcode.name == 'ret' and (not func.type.kind_of? C::BaseType or func.type.type.name != :void) # standard ABI
124
+
125
+ next true if (a.map { |ee| Expression[ee].externals.grep(::Symbol) }.flatten - [:unknown] - bw).include? r
126
+ bw |= w.map { |ee| Expression[ee].externals.grep(::Symbol) }.flatten - [:unknown]
127
+ false
128
+ }
129
+ if r == :eax and (rdi || blk.list.last).opcode.name == 'ret'
130
+ func.type.type = C::BaseType.new(:void)
131
+ false
132
+ elsif rdi and rdi.backtrace_binding[r]
133
+ false # mov al, 42 ; ret -> don't regarg eax
134
+ else
135
+ true
136
+ end
137
+ elsif deps_w[b].include?(r)
138
+ else
139
+ done << b
140
+ (deps_to[b] - done).find { |tb| uninitialized[tb, r, done] }
141
+ end
142
+ }
143
+
144
+ regargs = []
145
+ register_symbols.each { |r|
146
+ if roots.find { |root| uninitialized[root, r, []] }
147
+ regargs << r
148
+ end
149
+ }
150
+
151
+ # TODO honor user-defined prototype if available (eg no, really, eax is not read in this function returning al)
152
+ regargs.sort_by { |r| r.to_s }.each { |r|
153
+ a = C::Variable.new(r.to_s, C::BaseType.new(:int, :unsigned))
154
+ a.add_attribute("register(#{r})")
155
+ func.type.args << a
156
+ }
157
+
158
+ # remove writes from a block if no following block read the value
159
+ dw = {}
160
+ deps_w.each { |b, deps|
161
+ dw[b] = deps.reject { |dep|
162
+ ret = true
163
+ done = []
164
+ todo = deps_to[b].dup
165
+ while a = todo.pop
166
+ next if done.include? a
167
+ done << a
168
+ if not deps_r[a] or deps_r[a].include? dep
169
+ ret = false
170
+ break
171
+ elsif not deps_w[a].include? dep
172
+ todo.concat deps_to[a]
173
+ end
174
+ end
175
+ ret
176
+ }
177
+ }
178
+
179
+ dw
180
+ end
181
+
182
+ def decompile_blocks(dcmp, myblocks, deps, func, nextaddr = nil)
183
+ scope = func.initializer
184
+ func.type.args.each { |a| scope.symbol[a.name] = a }
185
+ stmts = scope.statements
186
+ blocks_toclean = myblocks.dup
187
+ func_entry = myblocks.first[0]
188
+ until myblocks.empty?
189
+ b, to = myblocks.shift
190
+ if l = dcmp.dasm.get_label_at(b)
191
+ stmts << C::Label.new(l)
192
+ end
193
+
194
+ # list of assignments [[dest reg, expr assigned]]
195
+ ops = []
196
+ # reg binding (reg => value, values.externals = regs at block start)
197
+ binding = {}
198
+ # Expr => CExpr
199
+ ce = lambda { |*e| dcmp.decompile_cexpr(Expression[Expression[*e].reduce], scope) }
200
+ # Expr => Expr.bind(binding) => CExpr
201
+ ceb = lambda { |*e| ce[Expression[*e].bind(binding)] }
202
+
203
+ # dumps a CExprs that implements an assignment to a reg (uses ops[], patches op => [reg, nil])
204
+ commit = lambda {
205
+ deps[b].map { |k|
206
+ [k, ops.rindex(ops.reverse.find { |r, v| r == k })]
207
+ }.sort_by { |k, i| i.to_i }.each { |k, i|
208
+ next if not i or not binding[k]
209
+ e = k
210
+ final = []
211
+ ops[0..i].reverse_each { |r, v|
212
+ final << r if not v
213
+ e = Expression[e].bind(r => v).reduce if not final.include? r
214
+ }
215
+ ops[i][1] = nil
216
+ binding.delete k
217
+ stmts << ce[k, :'=', e] if k != e
218
+ }
219
+ }
220
+
221
+ # returns an array to use as funcall arguments
222
+ get_func_args = lambda { |di, f|
223
+ # XXX see remarks in #finddeps
224
+ bt = dcmp.dasm.backtrace(:esp, di.address, :snapshot_addr => func_entry, :include_start => true)
225
+ stackoff = Expression[[bt, :+, @size/8], :-, :esp].bind(:esp => :frameptr).reduce rescue nil
226
+ args_todo = f.type.args.to_a.dup
227
+ args = []
228
+ if f.has_attribute('fastcall') # XXX DRY
229
+ if a = args_todo.shift
230
+ mask = (1 << (8*dcmp.c_parser.sizeof(a))) - 1
231
+ mask = 0 if a.has_attribute('unused')
232
+ args << Expression[:ecx, :&, mask]
233
+ end
234
+ if a = args_todo.shift
235
+ mask = (1 << (8*dcmp.c_parser.sizeof(a))) - 1 # char => dl
236
+ mask = 0 if a.has_attribute('unused')
237
+ args << Expression[:edx, :&, mask]
238
+ end
239
+ end
240
+ args_todo.each { |a_|
241
+ if r = a_.has_attribute_var('register')
242
+ args << Expression[r.to_sym]
243
+ elsif stackoff.kind_of? Integer
244
+ args << Indirection[[:frameptr, :+, stackoff], @size/8]
245
+ stackoff += [dcmp.sizeof(a_), @size/8].max
246
+ else
247
+ args << Expression[0]
248
+ end
249
+ }
250
+
251
+ if f.type.varargs and f.type.args.last.type.pointer? and stackoff.kind_of? Integer
252
+ # check if last arg is a fmtstring
253
+ bt = dcmp.dasm.backtrace(args.last, di.address, :snapshot_addr => func_entry, :include_start => true)
254
+ if bt.length == 1 and s = dcmp.dasm.get_section_at(bt.first)
255
+ fmt = s[0].read(512)
256
+ fmt = fmt.unpack('v*').pack('C*') if dcmp.sizeof(f.type.args.last.type.untypedef.type) == 2
257
+ if fmt.index(?\0)
258
+ fmt = fmt[0...fmt.index(?\0)]
259
+ fmt.gsub('%%', '').count('%').times { # XXX %.*s etc..
260
+ args << Indirection[[:frameptr, :+, stackoff], @size/8]
261
+ stackoff += @size/8
262
+ }
263
+ end
264
+ end
265
+ end
266
+
267
+ args.map { |e| ceb[e] }
268
+ }
269
+
270
+ # go !
271
+ dcmp.dasm.decoded[b].block.list.each_with_index { |di, didx|
272
+ a = di.instruction.args
273
+ if di.opcode.props[:setip] and not di.opcode.props[:stopexec]
274
+ # conditional jump
275
+ commit[]
276
+ n = dcmp.backtrace_target(get_xrefs_x(dcmp.dasm, di).first, di.address)
277
+ if di.opcode.name =~ /^loop(.+)?/
278
+ cx = C::CExpression[:'--', ceb[:ecx]]
279
+ cc = $1 ? C::CExpression[cx, :'&&', ceb[decode_cc_to_expr($1)]] : cx
280
+ else
281
+ cc = ceb[decode_cc_to_expr(di.opcode.name[1..-1])]
282
+ end
283
+ # XXX switch/indirect/multiple jmp
284
+ stmts << C::If.new(C::CExpression[cc], C::Goto.new(n))
285
+ to.delete dcmp.dasm.normalize(n)
286
+ next
287
+ end
288
+
289
+ if di.opcode.name == 'mov'
290
+ # mov cr0 etc
291
+ a1, a2 = di.instruction.args
292
+ case a1
293
+ when Ia32::CtrlReg, Ia32::DbgReg, Ia32::SegReg
294
+ sz = a1.kind_of?(Ia32::SegReg) ? 16 : 32
295
+ if not dcmp.c_parser.toplevel.symbol["intrinsic_set_#{a1}"]
296
+ dcmp.c_parser.parse("void intrinsic_set_#{a1}(__int#{sz});")
297
+ end
298
+ f = dcmp.c_parser.toplevel.symbol["intrinsic_set_#{a1}"]
299
+ a2 = a2.symbolic(di)
300
+ a2 = [a2, :&, 0xffff] if sz == 16
301
+ stmts << C::CExpression.new(f, :funcall, [ceb[a2]], f.type.type)
302
+ next
303
+ end
304
+ case a2
305
+ when Ia32::CtrlReg, Ia32::DbgReg, Ia32::SegReg
306
+ if not dcmp.c_parser.toplevel.symbol["intrinsic_get_#{a2}"]
307
+ sz = a2.kind_of?(Ia32::SegReg) ? 16 : 32
308
+ dcmp.c_parser.parse("__int#{sz} intrinsic_get_#{a2}(void);")
309
+ end
310
+ f = dcmp.c_parser.toplevel.symbol["intrinsic_get_#{a2}"]
311
+ t = f.type.type
312
+ binding.delete a1.symbolic(di)
313
+ stmts << C::CExpression.new(ce[a1.symbolic(di)], :'=', C::CExpression.new(f, :funcall, [], t), t)
314
+ next
315
+ end
316
+ end
317
+
318
+ case di.opcode.name
319
+ when 'ret'
320
+ commit[]
321
+ ret = nil
322
+ ret = C::CExpression[ceb[:eax]] unless func.type.type.kind_of? C::BaseType and func.type.type.name == :void
323
+ stmts << C::Return.new(ret)
324
+ when 'call' # :saveip
325
+ n = dcmp.backtrace_target(get_xrefs_x(dcmp.dasm, di).first, di.address)
326
+ args = []
327
+ if f = dcmp.c_parser.toplevel.symbol[n] and f.type.kind_of? C::Function and f.type.args
328
+ args = get_func_args[di, f]
329
+ elsif defined? @dasm_func_default_off and o = @dasm_func_default_off[[dcmp.dasm, di.address]] and o.kind_of? Integer and o > @size/8
330
+ f = C::Variable.new
331
+ f.type = C::Function.new(C::BaseType.new(:int), [])
332
+ ((o/(@size/8))-1).times { f.type.args << C::Variable.new(nil,C::BaseType.new(:int)) }
333
+ args = get_func_args[di, f]
334
+ end
335
+ commit[]
336
+ #next if not di.block.to_subfuncret
337
+
338
+ if not n.kind_of? ::String or (f and not f.type.kind_of? C::Function)
339
+ # indirect funcall
340
+ fptr = ceb[n]
341
+ binding.delete n
342
+ proto = C::Function.new(C::BaseType.new(:int))
343
+ proto = f.type if f and f.type.kind_of? C::Function
344
+ f = C::CExpression[[fptr], C::Pointer.new(proto)]
345
+ elsif not f
346
+ # internal functions are predeclared, so this one is extern
347
+ f = C::Variable.new
348
+ f.name = n
349
+ f.type = C::Function.new(C::BaseType.new(:int))
350
+ if dcmp.recurse > 0
351
+ dcmp.c_parser.toplevel.symbol[n] = f
352
+ dcmp.c_parser.toplevel.statements << C::Declaration.new(f)
353
+ end
354
+ end
355
+ commit[]
356
+ binding.delete :eax
357
+ e = C::CExpression[f, :funcall, args]
358
+ e = C::CExpression[ce[:eax], :'=', e, f.type.type] if deps[b].include? :eax and f.type.type != C::BaseType.new(:void)
359
+ stmts << e
360
+ when 'jmp'
361
+ #if di.comment.to_a.include? 'switch'
362
+ # n = di.instruction.args.first.symbolic(di)
363
+ # fptr = ceb[n]
364
+ # binding.delete n
365
+ # commit[]
366
+ # sw = C::Switch.new(fptr, C::Block.new(scope))
367
+ # di.block.to_normal.to_a.each { |addr|
368
+ # addr = dcmp.dasm.normalize addr
369
+ # to.delete addr
370
+ # next if not l = dcmp.dasm.get_label_at(addr)
371
+ # sw.body.statements << C::Goto.new(l)
372
+ # }
373
+ # stmts << sw
374
+ a = di.instruction.args.first
375
+ if a.kind_of? Expression
376
+ elsif not a.respond_to? :symbolic
377
+ stmts << C::Asm.new(di.instruction.to_s, nil, [], [], nil, nil)
378
+ else
379
+ n = di.instruction.args.first.symbolic(di)
380
+ fptr = ceb[n]
381
+ binding.delete n
382
+ commit[]
383
+ if fptr.kind_of? C::CExpression and fptr.type.pointer? and fptr.type.untypedef.type.kind_of? C::Function
384
+ proto = fptr.type.untypedef.type
385
+ args = get_func_args[di, fptr.type]
386
+ else
387
+ proto = C::Function.new(C::BaseType.new(:void))
388
+ fptr = C::CExpression[[fptr], C::Pointer.new(proto)]
389
+ args = []
390
+ end
391
+ ret = C::Return.new(C::CExpression[fptr, :funcall, args])
392
+ class << ret ; attr_accessor :from_instr end
393
+ ret.from_instr = di
394
+ stmts << ret
395
+ to = []
396
+ end
397
+ when 'lgdt'
398
+ if not dcmp.c_parser.toplevel.struct['segment_descriptor']
399
+ dcmp.c_parser.parse('struct segment_descriptor { __int16 limit; __int16 base0_16; __int8 base16_24; __int8 flags1; __int8 flags2_limit_16_20; __int8 base24_32; };')
400
+ dcmp.c_parser.parse('struct segment_table { __int16 size; struct segment_descriptor *table; } __attribute__((pack(2)));')
401
+ end
402
+ if not dcmp.c_parser.toplevel.symbol['intrinsic_lgdt']
403
+ dcmp.c_parser.parse('void intrinsic_lgdt(struct segment_table *);')
404
+ end
405
+ # need a way to transform arg => :frameptr+12
406
+ arg = di.backtrace_binding.keys.grep(Indirection).first.pointer
407
+ stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol['intrinsic_lgdt'], :funcall, [ceb[arg]], C::BaseType.new(:void))
408
+ when 'lidt'
409
+ if not dcmp.c_parser.toplevel.struct['interrupt_descriptor']
410
+ dcmp.c_parser.parse('struct interrupt_descriptor { __int16 offset0_16; __int16 segment; __int16 flags; __int16 offset16_32; };')
411
+ dcmp.c_parser.parse('struct interrupt_table { __int16 size; struct interrupt_descriptor *table; } __attribute__((pack(2)));')
412
+ end
413
+ if not dcmp.c_parser.toplevel.symbol['intrinsic_lidt']
414
+ dcmp.c_parser.parse('void intrinsic_lidt(struct interrupt_table *);')
415
+ end
416
+ arg = di.backtrace_binding.keys.grep(Indirection).first.pointer
417
+ stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol['intrinsic_lidt'], :funcall, [ceb[arg]], C::BaseType.new(:void))
418
+ when 'ltr', 'lldt'
419
+ if not dcmp.c_parser.toplevel.symbol["intrinsic_#{di.opcode.name}"]
420
+ dcmp.c_parser.parse("void intrinsic_#{di.opcode.name}(int);")
421
+ end
422
+ arg = di.backtrace_binding.keys.first
423
+ stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol["intrinsic_#{di.opcode.name}"], :funcall, [ceb[arg]], C::BaseType.new(:void))
424
+ when 'out'
425
+ sz = di.instruction.args.find { |a_| a_.kind_of? Ia32::Reg and a_.val == 0 }.sz
426
+ if not dcmp.c_parser.toplevel.symbol["intrinsic_out#{sz}"]
427
+ dcmp.c_parser.parse("void intrinsic_out#{sz}(unsigned short port, __int#{sz} value);")
428
+ end
429
+ port = di.instruction.args.grep(Expression).first || :edx
430
+ stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol["intrinsic_out#{sz}"], :funcall, [ceb[port], ceb[:eax]], C::BaseType.new(:void))
431
+ when 'in'
432
+ sz = di.instruction.args.find { |a_| a_.kind_of? Ia32::Reg and a_.val == 0 }.sz
433
+ if not dcmp.c_parser.toplevel.symbol["intrinsic_in#{sz}"]
434
+ dcmp.c_parser.parse("__int#{sz} intrinsic_in#{sz}(unsigned short port);")
435
+ end
436
+ port = di.instruction.args.grep(Expression).first || :edx
437
+ f = dcmp.c_parser.toplevel.symbol["intrinsic_in#{sz}"]
438
+ binding.delete :eax
439
+ stmts << C::CExpression.new(ce[:eax], :'=', C::CExpression.new(f, :funcall, [ceb[port]], f.type.type), f.type.type)
440
+ when 'sti', 'cli'
441
+ stmts << C::Asm.new(di.instruction.to_s, nil, [], [], nil, nil)
442
+ when /^(mov|sto|lod)s([bwdq])/
443
+ op, sz = $1, $2
444
+ commit[]
445
+ sz = { 'b' => 1, 'w' => 2, 'd' => 4, 'q' => 8 }[sz]
446
+ pt = C::Pointer.new(C::BaseType.new("__int#{sz*8}".to_sym))
447
+
448
+ blk = C::Block.new(scope)
449
+ case op
450
+ when 'mov'
451
+ blk.statements << C::CExpression[[:*, [[ceb[:edi]], pt]], :'=', [:*, [[ceb[:esi]], pt]]]
452
+ blk.statements << C::CExpression[ceb[:edi], :'=', [ceb[:edi], :+, [sz]]]
453
+ blk.statements << C::CExpression[ceb[:esi], :'=', [ceb[:esi], :+, [sz]]]
454
+ when 'sto'
455
+ blk.statements << C::CExpression[[:*, [[ceb[:edi]], pt]], :'=', ceb[:eax]]
456
+ blk.statements << C::CExpression[ceb[:edi], :'=', [ceb[:edi], :+, [sz]]]
457
+ when 'lod'
458
+ blk.statements << C::CExpression[ceb[:eax], :'=', [:*, [[ceb[:esi]], pt]]]
459
+ blk.statements << C::CExpression[ceb[:esi], :'=', [ceb[:esi], :+, [sz]]]
460
+ #when 'sca'
461
+ #when 'cmp'
462
+ end
463
+
464
+ case (di.instruction.prefix || {})[:rep]
465
+ when nil
466
+ stmts.concat blk.statements
467
+ when 'rep'
468
+ blk.statements << C::CExpression[ceb[:ecx], :'=', [ceb[:ecx], :-, [1]]]
469
+ stmts << C::While.new(C::CExpression[ceb[:ecx]], blk)
470
+ #when 'repz' # sca/cmp only
471
+ #when 'repnz'
472
+ end
473
+ next
474
+ else
475
+ bd = get_fwdemu_binding(di)
476
+ if di.backtrace_binding[:incomplete_binding]
477
+ commit[]
478
+ stmts << C::Asm.new(di.instruction.to_s, nil, nil, nil, nil, nil)
479
+ else
480
+ update = {}
481
+ bd.each { |k, v|
482
+ if k.kind_of? ::Symbol and not deps[b].include? k
483
+ ops << [k, v]
484
+ update[k] = Expression[Expression[v].bind(binding).reduce]
485
+ else
486
+ stmts << ceb[k, :'=', v]
487
+ stmts.pop if stmts.last.kind_of? C::Variable # [:eflag_s, :=, :unknown].reduce
488
+ end
489
+ }
490
+ binding.update update
491
+ end
492
+ end
493
+ }
494
+ commit[]
495
+
496
+ case to.length
497
+ when 0
498
+ if not myblocks.empty? and not %w[ret jmp].include? dcmp.dasm.decoded[b].block.list.last.instruction.opname
499
+ puts " block #{Expression[b]} has no to and don't end in ret"
500
+ end
501
+ when 1
502
+ if (myblocks.empty? ? nextaddr != to[0] : myblocks.first.first != to[0])
503
+ stmts << C::Goto.new(dcmp.dasm.auto_label_at(to[0], 'unknown_goto'))
504
+ end
505
+ else
506
+ puts " block #{Expression[b]} with multiple to"
507
+ end
508
+ end
509
+
510
+ # cleanup di.bt_binding (we set :frameptr etc in those, this may confuse the dasm)
511
+ blocks_toclean.each { |b_, to_|
512
+ dcmp.dasm.decoded[b_].block.list.each { |di|
513
+ di.backtrace_binding = nil
514
+ }
515
+ }
516
+ end
517
+
518
+ def decompile_check_abi(dcmp, entry, func)
519
+ a = func.type.args || []
520
+ a.delete_if { |arg| arg.has_attribute_var('register') and arg.has_attribute('unused') }
521
+ ra = a.map { |arg| arg.has_attribute_var('register') }.compact
522
+ if (a.length == 1 and ra == ['ecx']) or (a.length >= 2 and ra.sort == ['ecx', 'edx'])
523
+ func.add_attribute 'fastcall'
524
+ # reorder args
525
+ ecx = a.find { |arg| arg.has_attribute_var('register') == 'ecx' }
526
+ edx = a.find { |arg| arg.has_attribute_var('register') == 'edx' }
527
+ a.insert(0, a.delete(ecx))
528
+ a.insert(1, a.delete(edx)) if edx
529
+ end
530
+
531
+ if not f = dcmp.dasm.function[entry] or not f.return_address
532
+ #func.add_attribute 'noreturn'
533
+ else
534
+ adj = f.return_address.map { |ra_| dcmp.dasm.backtrace(:esp, ra_, :include_start => true, :stopaddr => entry) }.flatten.uniq
535
+ if adj.length == 1 and so = Expression[adj.first, :-, :esp].reduce and so.kind_of? ::Integer
536
+ argsz = a.map { |fa|
537
+ next if not fa.stackoff
538
+ (fa.stackoff + [dcmp.sizeof(fa), dcmp.c_parser.typesize[:ptr]].max-1) / dcmp.c_parser.typesize[:ptr]
539
+ }.compact.max.to_i
540
+ so /= dcmp.dasm.cpu.size/8
541
+ so -= 1
542
+ if so > argsz
543
+ aso = a.empty? ? 0 : a.last.stackoff.to_i + dcmp.c_parser.typesize[:ptr]
544
+ (so-argsz).times {
545
+ a << C::Variable.new(dcmp.stackoff_to_varname(aso), C::BaseType.new(:int))
546
+ a.last.add_attribute('unused')
547
+ aso += dcmp.sizeof(a.last)
548
+ }
549
+ argsz = so
550
+ end
551
+ case so
552
+ when 0
553
+ when argsz
554
+ func.add_attribute 'stdcall' if not func.has_attribute('fastcall')
555
+ else
556
+ func.add_attribute "stackoff:#{so*dcmp.dasm.cpu.size/8}"
557
+ end
558
+ else
559
+ func.add_attribute "breakstack:#{adj.inspect}"
560
+ end
561
+ end
562
+ end
563
+ end
564
+ end