metasm 1.0.3 → 1.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (114) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +3 -0
  3. data.tar.gz.sig +0 -0
  4. data/Gemfile +3 -2
  5. data/metasm.gemspec +3 -2
  6. data/metasm.rb +4 -1
  7. data/metasm/compile_c.rb +2 -2
  8. data/metasm/cpu/arc/decode.rb +0 -21
  9. data/metasm/cpu/arc/main.rb +4 -4
  10. data/metasm/cpu/arm/decode.rb +1 -5
  11. data/metasm/cpu/arm/main.rb +3 -3
  12. data/metasm/cpu/arm64/decode.rb +2 -6
  13. data/metasm/cpu/arm64/main.rb +5 -5
  14. data/metasm/cpu/bpf/decode.rb +3 -35
  15. data/metasm/cpu/bpf/main.rb +5 -5
  16. data/metasm/cpu/bpf/render.rb +1 -12
  17. data/metasm/cpu/cy16/decode.rb +0 -6
  18. data/metasm/cpu/cy16/main.rb +3 -3
  19. data/metasm/cpu/cy16/render.rb +0 -11
  20. data/metasm/cpu/dalvik/decode.rb +4 -26
  21. data/metasm/cpu/dalvik/main.rb +20 -2
  22. data/metasm/cpu/dalvik/opcodes.rb +3 -2
  23. data/metasm/cpu/{mips/compile_c.rb → ebpf.rb} +5 -2
  24. data/metasm/cpu/ebpf/debug.rb +61 -0
  25. data/metasm/cpu/ebpf/decode.rb +142 -0
  26. data/metasm/cpu/ebpf/main.rb +58 -0
  27. data/metasm/cpu/ebpf/opcodes.rb +97 -0
  28. data/metasm/cpu/ebpf/render.rb +36 -0
  29. data/metasm/cpu/ia32/debug.rb +39 -1
  30. data/metasm/cpu/ia32/decode.rb +111 -90
  31. data/metasm/cpu/ia32/decompile.rb +45 -37
  32. data/metasm/cpu/ia32/main.rb +10 -0
  33. data/metasm/cpu/ia32/parse.rb +6 -0
  34. data/metasm/cpu/mcs51/decode.rb +1 -1
  35. data/metasm/cpu/mcs51/main.rb +11 -0
  36. data/metasm/cpu/mips/decode.rb +8 -18
  37. data/metasm/cpu/mips/main.rb +3 -3
  38. data/metasm/cpu/mips/opcodes.rb +1 -1
  39. data/metasm/cpu/msp430/decode.rb +2 -6
  40. data/metasm/cpu/msp430/main.rb +3 -3
  41. data/metasm/cpu/openrisc.rb +11 -0
  42. data/metasm/cpu/openrisc/debug.rb +106 -0
  43. data/metasm/cpu/openrisc/decode.rb +182 -0
  44. data/metasm/cpu/openrisc/decompile.rb +350 -0
  45. data/metasm/cpu/openrisc/main.rb +70 -0
  46. data/metasm/cpu/openrisc/opcodes.rb +109 -0
  47. data/metasm/cpu/openrisc/render.rb +37 -0
  48. data/metasm/cpu/ppc/decode.rb +0 -25
  49. data/metasm/cpu/ppc/main.rb +6 -6
  50. data/metasm/cpu/ppc/opcodes.rb +3 -4
  51. data/metasm/cpu/python/decode.rb +0 -20
  52. data/metasm/cpu/python/main.rb +1 -1
  53. data/metasm/cpu/sh4/decode.rb +2 -6
  54. data/metasm/cpu/sh4/main.rb +25 -23
  55. data/metasm/cpu/st20/decode.rb +0 -7
  56. data/metasm/cpu/webasm.rb +11 -0
  57. data/metasm/cpu/webasm/debug.rb +31 -0
  58. data/metasm/cpu/webasm/decode.rb +321 -0
  59. data/metasm/cpu/webasm/decompile.rb +386 -0
  60. data/metasm/cpu/webasm/encode.rb +104 -0
  61. data/metasm/cpu/webasm/main.rb +81 -0
  62. data/metasm/cpu/webasm/opcodes.rb +214 -0
  63. data/metasm/cpu/x86_64/compile_c.rb +13 -9
  64. data/metasm/cpu/x86_64/parse.rb +1 -1
  65. data/metasm/cpu/z80/decode.rb +0 -27
  66. data/metasm/cpu/z80/main.rb +3 -3
  67. data/metasm/cpu/z80/render.rb +0 -11
  68. data/metasm/debug.rb +43 -8
  69. data/metasm/decode.rb +62 -14
  70. data/metasm/decompile.rb +793 -466
  71. data/metasm/disassemble.rb +188 -131
  72. data/metasm/disassemble_api.rb +30 -17
  73. data/metasm/dynldr.rb +2 -2
  74. data/metasm/encode.rb +8 -2
  75. data/metasm/exe_format/autoexe.rb +2 -0
  76. data/metasm/exe_format/coff.rb +21 -3
  77. data/metasm/exe_format/coff_decode.rb +12 -0
  78. data/metasm/exe_format/coff_encode.rb +6 -3
  79. data/metasm/exe_format/dex.rb +13 -3
  80. data/metasm/exe_format/elf.rb +12 -2
  81. data/metasm/exe_format/elf_decode.rb +59 -1
  82. data/metasm/exe_format/main.rb +2 -0
  83. data/metasm/exe_format/mz.rb +1 -0
  84. data/metasm/exe_format/pe.rb +25 -3
  85. data/metasm/exe_format/wasm.rb +402 -0
  86. data/metasm/gui/dasm_decomp.rb +171 -95
  87. data/metasm/gui/dasm_graph.rb +61 -2
  88. data/metasm/gui/dasm_hex.rb +2 -2
  89. data/metasm/gui/dasm_main.rb +45 -19
  90. data/metasm/gui/debug.rb +13 -4
  91. data/metasm/gui/gtk.rb +12 -4
  92. data/metasm/main.rb +108 -103
  93. data/metasm/os/emulator.rb +175 -0
  94. data/metasm/os/main.rb +11 -6
  95. data/metasm/parse.rb +23 -12
  96. data/metasm/parse_c.rb +189 -135
  97. data/metasm/preprocessor.rb +16 -1
  98. data/misc/openrisc-parser.rb +79 -0
  99. data/samples/dasm-plugins/scanxrefs.rb +6 -4
  100. data/samples/dasm-plugins/selfmodify.rb +8 -8
  101. data/samples/dbg-plugins/trace_func.rb +1 -1
  102. data/samples/disassemble-gui.rb +14 -3
  103. data/samples/emubios.rb +251 -0
  104. data/samples/emudbg.rb +127 -0
  105. data/samples/lindebug.rb +79 -78
  106. data/samples/metasm-shell.rb +8 -8
  107. data/tests/all.rb +1 -1
  108. data/tests/expression.rb +2 -0
  109. data/tests/graph_layout.rb +1 -1
  110. data/tests/ia32.rb +1 -0
  111. data/tests/mips.rb +1 -1
  112. data/tests/preprocessor.rb +18 -0
  113. metadata +124 -6
  114. metadata.gz.sig +0 -0
@@ -0,0 +1,321 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2010 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+ require 'metasm/cpu/webasm/opcodes'
7
+ require 'metasm/decode'
8
+
9
+ module Metasm
10
+ class WebAsm
11
+ def build_bin_lookaside
12
+ lookaside = (0..0xff).inject({}) { |h, i| h.update i => [] }
13
+ opcode_list.each { |op|
14
+ lookaside[op.bin] << op
15
+ }
16
+ lookaside
17
+ end
18
+
19
+ def decode_uleb(ed, signed=false)
20
+ v = s = 0
21
+ while s < 10*7
22
+ b = ed.read(1).unpack('C').first.to_i
23
+ v |= (b & 0x7f) << s
24
+ s += 7
25
+ break if (b&0x80) == 0
26
+ end
27
+ v = Expression.make_signed(v, s) if signed
28
+ v
29
+ end
30
+
31
+ # when starting disassembly, pre-decode all instructions until the final 'end' and fixup the xrefs (if/block/loop...)
32
+ def disassemble_init_context(dasm, addr)
33
+ dasm.misc ||= {}
34
+ dasm.misc[:cpu_context] ||= {}
35
+ cache = dasm.misc[:cpu_context][:di_cache] ||= {}
36
+ addr = dasm.normalize(addr)
37
+ return dasm.misc[:cpu_context] if cache[addr]
38
+
39
+ code_start = addr
40
+ stack = [[]]
41
+ set_misc_x = lambda { |di, tg| di.misc[:x] ||= [] ; di.misc[:x] |= [tg] }
42
+ while di = dasm.disassemble_instruction(addr)
43
+ cache[addr] = di
44
+ di.misc ||= {}
45
+ di.misc[:code_start] = code_start
46
+ case di.opcode.name
47
+ when 'if', 'loop', 'block'
48
+ stack << [di]
49
+ when 'else'
50
+ raise "bad #{di} #{stack.last.inspect}" if stack.last.empty? or stack.last.last.opcode.name != 'if'
51
+ stack.last.each { |ddi| set_misc_x[ddi, di.next_addr] } # 'if' points past here
52
+ di.misc[:end_of] = stack.last[0] # store matching 'if'
53
+ stack.last[0] = di # 'else' replace 'if'
54
+ when 'br', 'br_if', 'br_table'
55
+ if di.opcode.name == 'br_table'
56
+ depths = di.instruction.args.first.ary.uniq | [di.instruction.args.first.default]
57
+ else
58
+ depths = [di.instruction.args.first.reduce]
59
+ end
60
+ depths.each { |depth|
61
+ tg = stack[-depth-1] # XXX skip if/else in the stack ?
62
+ raise "bad #{di} (#{stack.length})" if not tg
63
+ if tg.first and tg.first.opcode.name == 'loop'
64
+ set_misc_x[di, tg.first.address]
65
+ else
66
+ tg << di
67
+ end
68
+ }
69
+ when 'end'
70
+ dis = stack.pop
71
+ dis.each { |ddi| set_misc_x[ddi, di.next_addr] if ddi.opcode.name != 'loop' and ddi.opcode.name != 'block' }
72
+ if stack.empty?
73
+ # stack empty: end of func
74
+ di.opcode = @opcode_list.find { |op| op.name == 'end' and op.props[:stopexec] }
75
+ break
76
+ else
77
+ if dis.first
78
+ di.misc[:end_of] = dis.first # store matching loop/block/if
79
+ if dis.first.opcode.name == 'else'
80
+ di.misc[:end_of] = dis.first.misc[:end_of] # else patched stack.last, recover original 'if'
81
+ end
82
+ end
83
+ di.opcode = @opcode_list.find { |op| op.name == 'end' and not op.props[:stopexec] }
84
+ end
85
+ end
86
+ addr = di.next_addr
87
+ end
88
+
89
+ dasm.misc[:cpu_context]
90
+ end
91
+
92
+ # reuse the instructions from the cache
93
+ def decode_instruction_context(dasm, edata, di_addr, ctx)
94
+ ctx ||= disassemble_init_context(dasm, di_addr)
95
+ if not ctx[:di_cache][di_addr]
96
+ di_addr = dasm.normalize(di_addr)
97
+ disassemble_init_context(dasm, di_addr)
98
+ end
99
+ ctx[:di_cache][di_addr]
100
+ end
101
+
102
+ def decode_findopcode(edata)
103
+ di = DecodedInstruction.new(self)
104
+ val = edata.decode_imm(:u8, @endianness)
105
+ di if di.opcode = bin_lookaside[val].first
106
+ end
107
+
108
+ def decode_instr_op(edata, di)
109
+ before_ptr = edata.ptr
110
+ op = di.opcode
111
+ di.instruction.opname = op.name
112
+
113
+ op.args.each { |a|
114
+ di.instruction.args << case a
115
+ when :f32; Expression[edata.decode_imm(:u32, @endianness)]
116
+ when :f64; Expression[edata.decode_imm(:u64, @endianness)]
117
+ when :memoff; Memref.new(decode_uleb(edata))
118
+ when :uleb; Expression[decode_uleb(edata)]
119
+ when :sleb; Expression[decode_uleb(edata, true)]
120
+ when :blocksig; BlockSignature.new(decode_uleb(edata, true))
121
+ when :br_table; decode_br_table(edata)
122
+ else raise SyntaxError, "Internal error: invalid argument #{a} in #{op.name}"
123
+ end
124
+ }
125
+
126
+ di.bin_length = 1 + edata.ptr - before_ptr
127
+ di
128
+ end
129
+
130
+ def decode_instr_interpret(di, addr)
131
+ case di.opcode.name
132
+ when 'call'
133
+ fnr = di.instruction.args.first.reduce
134
+ di.misc ||= {}
135
+ di.misc[:tg_func_nr] = fnr
136
+ if f = @wasm_file.get_function_nr(fnr)
137
+ tg = f[:init_offset] ? f[:init_offset] : "#{f[:module]}_#{f[:field]}"
138
+ di.instruction.args[0] = Expression[tg]
139
+ di.misc[:x] = [tg]
140
+ else
141
+ di.misc[:x] = [:default]
142
+ end
143
+ when 'call_indirect'
144
+ di.misc ||= {}
145
+ di.misc[:x] = [:default]
146
+ end
147
+ di
148
+ end
149
+
150
+ def decode_br_table(edata)
151
+ count = decode_uleb(edata)
152
+ ary = []
153
+ count.times { ary << decode_uleb(edata) }
154
+ default = decode_uleb(edata)
155
+ BrTable.new(ary, default)
156
+ end
157
+
158
+ def init_backtrace_binding
159
+ @backtrace_binding ||= {}
160
+
161
+ typesz = Hash.new(8).update 'i32' => 4, 'f32' => 4
162
+ opstack = lambda { |off, sz| Indirection[Expression[:opstack, :+, off].reduce, sz] }
163
+ add_opstack = lambda { |delta, hash| { :opstack => Expression[:opstack, :+, delta].reduce }.update hash }
164
+ globsz = lambda { |di|
165
+ glob_nr = Expression[di.instruction.args.first].reduce
166
+ g = @wasm_file.get_global_nr(glob_nr)
167
+ g ? typesz[g[:type]] : 8
168
+ }
169
+ global = lambda { |di|
170
+ glob_nr = Expression[di.instruction.args.first].reduce
171
+ g = @wasm_file.get_global_nr(glob_nr)
172
+ n = g && g[:module] ? "#{g[:module]}_#{g[:field]}" : "global_#{glob_nr}"
173
+ Indirection[n, globsz[di]]
174
+ }
175
+ locsz = lambda { |di|
176
+ loc_nr = Expression[di.instruction.args.first].reduce
177
+ ci = @wasm_file.code_info[di.misc[:code_start]]
178
+ next typesz[ci[:params][loc_nr]] if loc_nr < ci[:params].length
179
+ loc_nr -= ci[:params].length
180
+ next typesz[ci[:local_var][loc_nr]] if ci[:local_var][loc_nr]
181
+ 8
182
+ }
183
+ local = lambda { |di|
184
+ loc_nr = Expression[di.instruction.args.first].reduce
185
+ Indirection[[:local_base, :+, loc_nr*8], locsz[di]]
186
+ }
187
+
188
+ opcode_list.map { |ol| ol.name }.uniq.each { |opname|
189
+ sz = (opname[1, 2] == '32' ? 4 : 8)
190
+ @backtrace_binding[opname] ||= case opname
191
+ when 'call', 'call_indirect'
192
+ lambda { |di|
193
+ stack_off = 0
194
+ if opname == 'call'
195
+ f = @wasm_file.get_function_nr(di.misc[:tg_func_nr])
196
+ proto = f ? f[:type] : {}
197
+ # TODO use local_base
198
+ h = { :callstack => Expression[:callstack, :+, 8], Indirection[:callstack, 8] => Expression[di.next_addr] }
199
+ proto_params_offset = 0
200
+ else
201
+ proto = @wasm_file.type[di.instruction.args.first.reduce]
202
+ h = { :callstack => Expression[:callstack, :+, 8], Indirection[:callstack, 8] => Expression[di.next_addr], 'func_idx' => Expression[opstack[0, 4]] }
203
+ stack_off += 8
204
+ proto_params_offset = 1
205
+ end
206
+ stack_off -= 8*proto[:ret].to_a.length
207
+ stack_off += 8*proto[:params].to_a.length
208
+ h.update :opstack => Expression[:opstack, :+, stack_off]
209
+ proto[:ret].to_a.each_with_index { |rt, i| h.update opstack[8*i, typesz[rt]] => Expression["ret_#{i}"] }
210
+ proto[:params].to_a.each_with_index { |pt, i| h.update "param_#{i}" => Expression[opstack[8*(proto[:params].length-i-1+proto_params_offset), typesz[pt]]] }
211
+ h
212
+ }
213
+ when 'if', 'br_if'; lambda { |di| add_opstack[ 8, :flag => Expression[opstack[0, 8]]] }
214
+ when 'block', 'loop', 'br', 'nop', 'else'; lambda { |di| {} }
215
+ when 'end', 'return'; lambda { |di| di.opcode.props[:stopexec] ? { :callstack => Expression[:callstack, :-, 8] } : {} }
216
+ when 'drop'; lambda { |di| add_opstack[8, {}] }
217
+ when 'select'; lambda { |di| add_opstack[16, opstack[0, 8] => Expression[[opstack[8, 8], :*, [1, :-, opstack[0, 8]]], :|, [opstack[16, 8], :*, opstack[0, 8]]]] }
218
+ when 'get_local'; lambda { |di| add_opstack[-8, opstack[0, locsz[di]] => Expression[local[di]]] }
219
+ when 'set_local'; lambda { |di| add_opstack[ 8, local[di] => Expression[opstack[0, locsz[di]]]] }
220
+ when 'tee_local'; lambda { |di| add_opstack[ 0, local[di] => Expression[opstack[0, locsz[di]]]] }
221
+ when 'get_global'; lambda { |di| add_opstack[-8, opstack[0, globsz[di]] => Expression[global[di]]] }
222
+ when 'set_global'; lambda { |di| add_opstack[ 8, global[di] => Expression[opstack[0, globsz[di]]]] }
223
+ when /\.load(.*)/
224
+ mode = $1; memsz = (mode.include?('32') ? 4 : mode.include?('16') ? 2 : mode.include?('8') ? 1 : sz)
225
+ lambda { |di| add_opstack[ 0, opstack[0, sz] => Expression[Indirection[[opstack[0, 4], :+, [:mem, :+, di.instruction.args[1].off]], memsz]]] }
226
+ when /\.store(.*)/
227
+ mode = $1; memsz = (mode.include?('32') ? 4 : mode.include?('16') ? 2 : mode.include?('8') ? 1 : sz)
228
+ lambda { |di| add_opstack[ 16, Indirection[[opstack[8, 4], :+, [:mem, :+, di.instruction.args[1].off]], memsz] => Expression[opstack[0, sz], :&, (1 << (8*memsz)) - 1]] }
229
+ when /\.const/; lambda { |di| add_opstack[-8, opstack[0, sz] => Expression[di.instruction.args.first.reduce]] }
230
+ when /\.eqz/; lambda { |di| add_opstack[ 0, opstack[0, 8] => Expression[opstack[0, sz], :==, 0]] }
231
+ when /\.eq/; lambda { |di| add_opstack[ 8, opstack[0, 8] => Expression[opstack[8, sz], :==, opstack[0, sz]]] }
232
+ when /\.ne/; lambda { |di| add_opstack[ 8, opstack[0, 8] => Expression[opstack[8, sz], :!=, opstack[0, sz]]] }
233
+ when /\.lt/; lambda { |di| add_opstack[ 8, opstack[0, 8] => Expression[opstack[8, sz], :<, opstack[0, sz]]] }
234
+ when /\.gt/; lambda { |di| add_opstack[ 8, opstack[0, 8] => Expression[opstack[8, sz], :>, opstack[0, sz]]] }
235
+ when /\.le/; lambda { |di| add_opstack[ 8, opstack[0, 8] => Expression[opstack[8, sz], :<=, opstack[0, sz]]] }
236
+ when /\.ge/; lambda { |di| add_opstack[ 8, opstack[0, 8] => Expression[opstack[8, sz], :>=, opstack[0, sz]]] }
237
+
238
+ when /\.(clz|ctz|popcnt)/; lambda { |di| add_opstack[ 0, :bits => Expression[opstack[0, sz]]] }
239
+ when /\.add/; lambda { |di| add_opstack[ 8, opstack[0, sz] => Expression[opstack[8, sz], :+, opstack[0, sz]]] }
240
+ when /\.sub/; lambda { |di| add_opstack[ 8, opstack[0, sz] => Expression[opstack[8, sz], :-, opstack[0, sz]]] }
241
+ when /\.mul/; lambda { |di| add_opstack[ 8, opstack[0, sz] => Expression[opstack[8, sz], :*, opstack[0, sz]]] }
242
+ when /\.div/; lambda { |di| add_opstack[ 8, opstack[0, sz] => Expression[opstack[8, sz], :/, opstack[0, sz]]] }
243
+ when /\.rem/; lambda { |di| add_opstack[ 8, opstack[0, sz] => Expression[opstack[8, sz], :%, opstack[0, sz]]] }
244
+ when /\.and/; lambda { |di| add_opstack[ 8, opstack[0, sz] => Expression[opstack[8, sz], :&, opstack[0, sz]]] }
245
+ when /\.or/; lambda { |di| add_opstack[ 8, opstack[0, sz] => Expression[opstack[8, sz], :|, opstack[0, sz]]] }
246
+ when /\.xor/; lambda { |di| add_opstack[ 8, opstack[0, sz] => Expression[opstack[8, sz], :^, opstack[0, sz]]] }
247
+ when /\.shl/; lambda { |di| add_opstack[ 8, opstack[0, sz] => Expression[opstack[8, sz], :<<, opstack[0, sz]]] }
248
+ when /\.shr/; lambda { |di| add_opstack[ 8, opstack[0, sz] => Expression[opstack[8, sz], :>>, opstack[0, sz]]] }
249
+ when /\.rotl/; lambda { |di| add_opstack[ 8, opstack[0, sz] => Expression[[opstack[8, sz], :<<, opstack[0, sz]], :|, [opstack[8, sz], :>>, [8*sz, :-, opstack[0, sz]]]]] }
250
+ when /\.rotr/; lambda { |di| add_opstack[ 8, opstack[0, sz] => Expression[[opstack[8, sz], :>>, opstack[0, sz]], :|, [opstack[8, sz], :<<, [8*sz, :-, opstack[0, sz]]]]] }
251
+ when /f.*\.(abs|neg|ceil|floor|trunc|nearest|sqrt|copysign)/; lambda { |di| add_opstack[0, :incomplete_binding => 1] }
252
+ when /f.*\.(min|max)/; lambda { |di| add_opstack[8, :incomplete_binding => 1] }
253
+ when /i32.wrap/; lambda { |di| add_opstack[ 0, opstack[0, 4] => Expression[opstack[0, 8]]] }
254
+ when /i64.extend/; lambda { |di| add_opstack[ 0, opstack[0, 8] => Expression[opstack[0, 4]]] }
255
+ when /trunc|convert|promote|demote|reinterpret/; lambda { |di| add_opstack[0, :incomplete_binding => 1] }
256
+ end
257
+ }
258
+
259
+ @backtrace_binding
260
+ end
261
+
262
+ def get_backtrace_binding(di)
263
+ if binding = backtrace_binding[di.opcode.name]
264
+ binding[di] || {}
265
+ else
266
+ puts "unhandled instruction to backtrace: #{di}" if $VERBOSE
267
+ {:incomplete_binding => Expression[1]}
268
+ end
269
+ end
270
+
271
+ def fix_fwdemu_binding(di, fbd)
272
+ ori = fbd
273
+ fbd = {}
274
+ ori.each { |k, v|
275
+ if k.kind_of?(Indirection) and not k.target.lexpr.kind_of?(Indirection)
276
+ # dont fixup store8 etc
277
+ fbd[k.bind(:opstack => ori[:opstack]).reduce_rec] = v
278
+ else
279
+ fbd[k] = v
280
+ end
281
+ }
282
+ fbd
283
+ end
284
+
285
+ def get_xrefs_x(dasm, di)
286
+ if di.opcode.props[:stopexec]
287
+ case di.opcode.name
288
+ when 'return', 'end'
289
+ return [Indirection[:callstack, 8]]
290
+ end
291
+ end
292
+ return [] if not di.opcode.props[:setip]
293
+
294
+ di.misc ? [di.misc[:x]].flatten : []
295
+ end
296
+
297
+ def backtrace_is_function_return(expr, di=nil)
298
+ expr and Expression[expr] == Expression[Indirection[:callstack, 8]]
299
+ end
300
+
301
+ def disassembler_default_func
302
+ df = DecodedFunction.new
303
+ ra = Indirection[:callstack, 8]
304
+ df.backtracked_for << BacktraceTrace.new(ra, :default, ra, :x, nil)
305
+ df.backtrace_binding = { :callstack => Expression[:callstack, :-, 8] }
306
+ df
307
+ end
308
+
309
+ def backtrace_update_function_binding(dasm, faddr, f, retaddrlist, *wantregs)
310
+ f.backtrace_binding = { :callstack => Expression[:callstack, :-, 8] }
311
+ end
312
+
313
+ def backtrace_is_stack_address(expr)
314
+ ([:local_base, :opstack] & Expression[expr].expr_externals).first
315
+ end
316
+
317
+ def decode_c_function_prototype(cp, sym, orig=nil)
318
+ disassembler_default_func
319
+ end
320
+ end
321
+ end
@@ -0,0 +1,386 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/cpu/webasm/main'
8
+
9
+ module Metasm
10
+ class WebAsm
11
+ def wasm_type_to_type(t)
12
+ case t
13
+ when 'i32'; C::BaseType.new(:int)
14
+ when 'i64'; C::BaseType.new(:longlong)
15
+ when 'f32'; C::BaseType.new(:float)
16
+ when 'f64'; C::BaseType.new(:double)
17
+ when 'anyfunc'; C::Function.new(C::BaseType.new(:void))
18
+ when Hash
19
+ ret = t[:ret].first ? wasm_type_to_type(t[:ret].first) : C::BaseType.new(:void)
20
+ args = t[:params].map { |p| C::Variable.new(nil, wasm_type_to_type(p)) }
21
+ C::Function.new(ret, args)
22
+ end
23
+ end
24
+
25
+ def decompile_init(dcmp)
26
+ mem = dcmp.c_parser.toplevel.symbol['mem'] = C::Variable.new('mem', C::Pointer.new(C::BaseType.new(:char)))
27
+ mem.storage = :static
28
+ dcmp.c_parser.toplevel.statements << C::Declaration.new(mem)
29
+
30
+ global_idx = 0
31
+ @wasm_file.import.to_a.each { |i|
32
+ case i[:kind]
33
+ when 'global'
34
+ global_idx += 1
35
+ var = C::Variable.new
36
+ var.name = '%s_%s' % [i[:module], i[:field]]
37
+ var.type = C::Array.new(wasm_type_to_type(i[:type]), 1)
38
+ var.storage = :extern
39
+ dcmp.c_parser.toplevel.symbol[var.name] = var
40
+ dcmp.c_parser.toplevel.statements << C::Declaration.new(var)
41
+ when 'function'
42
+ var = C::Variable.new
43
+ var.name = '%s_%s' % [i[:module], i[:field]]
44
+ var.type = wasm_type_to_type(i[:type])
45
+ var.storage = :extern
46
+ dcmp.c_parser.toplevel.symbol[var.name] = var
47
+ dcmp.c_parser.toplevel.statements << C::Declaration.new(var)
48
+ end
49
+ }
50
+
51
+ @wasm_file.global.to_a.each_with_index { |g, idx|
52
+ g_name = 'global_%d' % global_idx
53
+ global_idx += 1
54
+ var = C::Variable.new
55
+ var.name = g_name
56
+ var.type = C::Array.new(wasm_type_to_type(g[:type]), 1)
57
+ var.storage = :static
58
+ dcmp.c_parser.toplevel.symbol[var.name] = var
59
+ dcmp.c_parser.toplevel.statements << C::Declaration.new(var)
60
+
61
+ # decompile initializers
62
+ g_init_name = g_name + '_init'
63
+ dcmp.dasm.disassemble(g_init_name)
64
+ dcmp.decompile_func(g_init_name)
65
+ if init = dcmp.c_parser.toplevel.symbol[g_init_name] and init.initializer.kind_of?(C::Block) and
66
+ init.initializer.statements.first.kind_of?(C::Return)
67
+ dcmp.c_parser.toplevel.symbol[g_name].initializer = [ init.initializer.statements.first.value ]
68
+ dcmp.c_parser.toplevel.symbol.delete(g_init_name)
69
+ dcmp.c_parser.toplevel.statements.delete_if { |st| st.kind_of?(C::Declaration) and st.var.name == g_init_name }
70
+ end
71
+ }
72
+
73
+ @wasm_file.table.to_a.each_with_index { |t, idx|
74
+ break if idx > 0
75
+ t_name = 'indirect_calltable'
76
+ var = C::Variable.new
77
+ var.name = t_name
78
+ sz = t[:limits][:initial_size]
79
+ var.type = C::Array.new(C::Pointer.new(wasm_type_to_type(t[:type])), sz)
80
+ var.storage = :static
81
+ dcmp.c_parser.toplevel.symbol[var.name] = var
82
+ dcmp.c_parser.toplevel.statements << C::Declaration.new(var)
83
+ var.initializer = [C::CExpression[0]] * sz
84
+
85
+ # initializer
86
+ @wasm_file.element.to_a.each_with_index { |e, eidx|
87
+ next if e[:table_index] != idx
88
+ # address of the code that evals the index at which to place the elements inside the table
89
+ e_init_name = "element_#{eidx}_init_addr"
90
+ dcmp.dasm.disassemble(e_init_name)
91
+ dcmp.decompile_func(e_init_name)
92
+ if init = dcmp.c_parser.toplevel.symbol[e_init_name] and init.initializer.kind_of?(C::Block) and
93
+ init.initializer.statements.first.kind_of?(C::Return)
94
+ eoff = init.initializer.statements.first.value.reduce(dcmp.c_parser)
95
+ dcmp.c_parser.toplevel.symbol.delete(e_init_name)
96
+ dcmp.c_parser.toplevel.statements.delete_if { |st| st.kind_of?(C::Declaration) and st.var.name == e_init_name }
97
+ e[:elems].each_with_index { |ev, vidx|
98
+ # table 0 is the only table in a wasm file and contains a list of function indexes used with the call_indirect asm instruction
99
+ # e_init_name gives the index at which we should put e[:elems], and we convert the func indexes into C names
100
+ vidx += eoff
101
+ if vidx >= sz or vidx < 0
102
+ puts "W: initializing indirect_calltable, would put #{ev} beyond end of table (#{vidx} > #{sz})"
103
+ next
104
+ end
105
+ if not tg_func = @wasm_file.get_function_nr(ev)
106
+ puts "W: initializing indirect_calltable, bad func index #{ev}"
107
+ next
108
+ end
109
+ funcname = dcmp.dasm.get_label_at(tg_func[:init_offset]) || "func_at_#{'%x' % tg_func[:init_offset]}"
110
+ # XXX should decompile funcname now ?
111
+ var.initializer[vidx] = C::CExpression[:&, C::Variable.new(funcname)]
112
+ }
113
+ end
114
+ }
115
+ }
116
+ end
117
+
118
+ def abi_funcall
119
+ @abi_funcall ||= { :changed => [] }
120
+ end
121
+
122
+ def decompile_makestackvars(dasm, funcstart, blocks)
123
+ @decomp_mkstackvars_terminals = [:frameptr, :local_base, :mem]
124
+ oldbd = {}
125
+ oldbd[funcstart] = dasm.address_binding[funcstart]
126
+ dasm.address_binding[funcstart] = { :opstack => Expression[:frameptr] }
127
+ blocks.each { |block|
128
+ oldbd[block.address] = dasm.address_binding[block.address]
129
+ stkoff = dasm.backtrace(:opstack, block.address, :snapshot_addr => funcstart)
130
+ dasm.address_binding[block.address] = { :opstack => Expression[:frameptr, :+, stkoff[0]-:frameptr] }
131
+ yield block
132
+ # store frameptr offset at each 'end' 'return' or 'else' instruction
133
+ if di = block.list.last and %w[end return else].include?(di.opcode.name)
134
+ stkoff = dasm.backtrace(:opstack, di.address, :snapshot_addr => funcstart)
135
+ if stkoff.length == 1 and (stkoff[0] - :frameptr).kind_of?(::Integer)
136
+ di.misc[:dcmp_stackoff] = stkoff[0] - :frameptr
137
+ end
138
+ end
139
+ }
140
+ oldbd.each { |a, b| b ? dasm.address_binding[a] = b : dasm.address_binding.delete(a) }
141
+ end
142
+
143
+ def decompile_func_finddeps_di(dcmp, func, di, a, w)
144
+ end
145
+
146
+ def decompile_func_finddeps(dcmp, blocks, func)
147
+ {}
148
+ end
149
+
150
+ def decompile_blocks(dcmp, myblocks, deps, func, nextaddr = nil)
151
+ func_entry = myblocks.first[0]
152
+ if w_func = @wasm_file.function_body.find { |fb| fb[:init_offset] == func_entry }
153
+ elsif g = @wasm_file.global.find { |gg| gg[:init_offset] == func_entry }
154
+ w_func = { :local_var => [], :type => { :params => [], :ret => [g[:type]] } }
155
+ elsif (@wasm_file.element.to_a + @wasm_file.data.to_a).find { |gg| gg[:init_offset] == func_entry }
156
+ w_func = { :local_var => [], :type => { :params => [], :ret => ['i32'] } }
157
+ end
158
+ scope = func.initializer
159
+ func.type.args.each { |a| scope.symbol[a.name] = a }
160
+ stmts = scope.statements
161
+
162
+ local = []
163
+ w_func[:type][:params].each { |t|
164
+ local << C::Variable.new("arg_#{local.length}", wasm_type_to_type(t))
165
+ scope.symbol[local.last.name] = local.last
166
+ func.type.args << local.last
167
+ }
168
+ w_func[:local_var].each { |t|
169
+ local << C::Variable.new("var_#{local.length}", wasm_type_to_type(t))
170
+ scope.symbol[local.last.name] = local.last
171
+ local.last.initializer = C::CExpression[0]
172
+ stmts << C::Declaration.new(local.last)
173
+ }
174
+
175
+ opstack = {}
176
+
177
+ # *(_int32*)(local_base+16) => 16
178
+ ce_ptr_offset = lambda { |ee, base|
179
+ if ee.kind_of?(C::CExpression) and ee.op == :* and not ee.lexpr and ee.rexpr.kind_of?(C::CExpression) and
180
+ not ee.rexpr.op and ee.rexpr.rexpr.kind_of?(C::CExpression)
181
+ if not ee.rexpr.rexpr.op and ee.rexpr.rexpr.rexpr.kind_of?(C::Variable) and ee.rexpr.rexpr.rexpr.name == base
182
+ 0
183
+ elsif ee.rexpr.rexpr.lexpr.kind_of?(C::Variable) and ee.rexpr.rexpr.lexpr.name == base and
184
+ ee.rexpr.rexpr.rexpr.kind_of?(C::CExpression) and not ee.rexpr.rexpr.rexpr.op and ee.rexpr.rexpr.rexpr.rexpr.kind_of?(::Integer)
185
+ if ee.rexpr.rexpr.op == :+
186
+ ee.rexpr.rexpr.rexpr.rexpr
187
+ elsif ee.rexpr.rexpr.op == :-
188
+ -ee.rexpr.rexpr.rexpr.rexpr
189
+ end
190
+ end
191
+ end
192
+ }
193
+ opstack_idx = -1
194
+ ce_local_offset = lambda { |ee| ce_ptr_offset[ee, 'local_base'] }
195
+ ce_opstack_offset = lambda { |ee| ce_ptr_offset[ee, 'frameptr'] }
196
+
197
+ di_addr = nil
198
+
199
+ # Expr => CExpr
200
+ ce = lambda { |*e|
201
+ c_expr = dcmp.decompile_cexpr(Expression[Expression[*e].reduce], scope)
202
+ dcmp.walk_ce(c_expr, true) { |ee|
203
+ if ee.rexpr.kind_of?(::Array)
204
+ # funcall arglist
205
+ ee.rexpr.map! { |eee|
206
+ if loff = ce_local_offset[eee]
207
+ C::CExpression[local[loff/8]]
208
+ elsif soff = ce_opstack_offset[eee]
209
+ C::CExpression[opstack[-soff/8]]
210
+ else
211
+ eee
212
+ end
213
+ }
214
+ end
215
+ if loff = ce_local_offset[ee.lexpr]
216
+ ee.lexpr = local[loff/8]
217
+ end
218
+ if loff = ce_local_offset[ee.rexpr]
219
+ ee.rexpr = local[loff/8]
220
+ ee.rexpr = C::CExpression[ee.rexpr] if not ee.op and ee.type.pointer?
221
+ end
222
+ if soff = ce_opstack_offset[ee.rexpr]
223
+ # must do soff.rexpr before lexpr in case of reaffectation !
224
+ ee.rexpr = opstack[-soff/8]
225
+ ee.rexpr = C::CExpression[ee.rexpr] if not ee.op and ee.type.pointer?
226
+ end
227
+ if soff = ce_opstack_offset[ee.lexpr]
228
+ if ee.op == :'='
229
+ # affectation: create a new variable
230
+ varname = "loc_#{opstack_idx += 1}"
231
+ ne = C::Variable.new(varname, wasm_type_to_type("i#{8*dcmp.sizeof(ee.lexpr)}"))
232
+ scope.symbol[varname] = ne
233
+ stmts << C::Declaration.new(ne)
234
+ opstack[-soff/8] = ne
235
+ end
236
+ ee.lexpr = opstack[-soff/8]
237
+ end
238
+ }
239
+ ret = if loff = ce_local_offset[c_expr]
240
+ C::CExpression[local[loff/8]]
241
+ elsif soff = ce_opstack_offset[c_expr]
242
+ C::CExpression[opstack[-soff/8]]
243
+ else
244
+ c_expr
245
+ end
246
+ dcmp.walk_ce(ret) { |ee| ee.with_misc :di_addr => di_addr if di_addr }
247
+ ret
248
+ }
249
+
250
+
251
+ blocks_toclean = myblocks.dup
252
+ until myblocks.empty?
253
+ b, to = myblocks.shift
254
+ if l = dcmp.dasm.get_label_at(b)
255
+ stmts << C::Label.new(l)
256
+ end
257
+
258
+ # go !
259
+ di_list = dcmp.dasm.decoded[b].block.list.dup
260
+ di_list.each { |di|
261
+ di_addr = di.address
262
+ if di.opcode.name == 'if' or di.opcode.name == 'br_if'
263
+ n = dcmp.backtrace_target(get_xrefs_x(dcmp.dasm, di).first, di.address)
264
+ bd = get_fwdemu_binding(di)
265
+ if di.opcode.name == 'if'
266
+ cc = ce[:!, bd[:flag]]
267
+ else
268
+ cc = ce[bd[:flag]]
269
+ end
270
+ stmts << C::If.new(C::CExpression[cc], C::Goto.new(n).with_misc(:di_addr => di.address)).with_misc(:di_addr => di.address)
271
+ to.delete dcmp.dasm.normalize(n)
272
+ elsif (di.opcode.name == 'end' or di.opcode.name == 'return') and di.opcode.props[:stopexec]
273
+ fsig = w_func[:type]
274
+ rettype = wasm_type_to_type(fsig[:ret].first) if fsig[:ret] and fsig[:ret].first
275
+ if not fsig[:ret].empty?
276
+ off = di.misc[:dcmp_stackoff] || -8
277
+ ret = C::CExpression[ce[Indirection[[:frameptr, :+, off], dcmp.sizeof(rettype)]]]
278
+ end
279
+ stmts << C::Return.new(ret).with_misc(:di_addr => di.address)
280
+ elsif (di.opcode.name == 'end' or di.opcode.name == 'else') and di.misc[:dcmp_stackoff] and di.misc[:end_of]
281
+ # end of block returning a value: store the value in a real variable instead of the autogenerated local
282
+ # so that if { } else {} both update the same var
283
+ start = di.misc[:end_of]
284
+ start_rettype = start.instruction.args.first.to_s
285
+ if start_rettype != 'none'
286
+ retsz = dcmp.sizeof(wasm_type_to_type(start_rettype))
287
+ off = di.misc[:dcmp_stackoff]
288
+ if not start.misc[:dcmp_retval] or not scope.symbol[start.misc[:dcmp_retval]]
289
+ stmts << C::CExpression[ce[Indirection[[:frameptr, :+, off], retsz], :'=', Indirection[[:frameptr, :+, off], retsz]]]
290
+ start.misc[:dcmp_retval] = stmts.last.lexpr.name
291
+ else
292
+ stmts << C::CExpression[ce[scope.symbol[start.misc[:dcmp_retval]], :'=', Indirection[[:frameptr, :+, off], retsz]]]
293
+ end
294
+ end
295
+ elsif di.opcode.name == 'call'
296
+ tg = di.misc[:x].first
297
+ raise "no call target for #{di}" if not tg
298
+ tg = dcmp.dasm.auto_label_at(tg, 'sub') if dcmp.dasm.get_section_at(tg)
299
+ f = dcmp.c_parser.toplevel.symbol[tg]
300
+ raise "no global function #{tg} for #{di}" if not f
301
+
302
+ args = []
303
+ bd = get_fwdemu_binding(di)
304
+ i = 0
305
+ while bd_arg = bd["param_#{i}"]
306
+ args << ce[bd_arg]
307
+ i += 1
308
+ end
309
+ e = C::CExpression[f, :funcall, args].with_misc(:di_addr => di.address)
310
+ if bd_ret = bd.index(Expression["ret_0"])
311
+ e = ce[bd_ret, :'=', e]
312
+ end
313
+ stmts << e
314
+ elsif di.opcode.name == 'call_indirect'
315
+ args = []
316
+ bd = get_fwdemu_binding(di)
317
+ wt = @wasm_file.type[di.instruction.args.first.reduce]
318
+ fptr = C::CExpression[[dcmp.c_parser.toplevel.symbol['indirect_calltable'], :[], ce[bd['func_idx']]], wasm_type_to_type(wt)]
319
+ i = 0
320
+ while bd_arg = bd["param_#{i}"]
321
+ args << ce[bd_arg]
322
+ i += 1
323
+ end
324
+ e = C::CExpression[fptr, :funcall, args].with_misc(:di_addr => di.address)
325
+ if bd_ret = bd.index(Expression["ret_0"])
326
+ e = ce[bd_ret, :'=', e]
327
+ end
328
+ stmts << e
329
+ else
330
+ bd = get_fwdemu_binding(di)
331
+ if di.backtrace_binding[:incomplete_binding]
332
+ stmts << C::Asm.new(di.instruction.to_s, nil, nil, nil, nil, nil).with_misc(:di_addr => di.address)
333
+ else
334
+ bd.each { |k, v|
335
+ next if k == :opstack
336
+ e = ce[k, :'=', v]
337
+ stmts << e if not e.kind_of?(C::Variable) # [:eflag_s, :=, :unknown].reduce
338
+ }
339
+ end
340
+ end
341
+ di_addr = nil
342
+ }
343
+
344
+ case to.length
345
+ when 0
346
+ if not myblocks.empty? and not stmts.last.kind_of?(C::Return)
347
+ puts " block #{Expression[b]} has no to and don't end in ret"
348
+ end
349
+ when 1
350
+ if (myblocks.empty? ? nextaddr != to[0] : myblocks.first.first != to[0])
351
+ stmts << C::Goto.new(dcmp.dasm.auto_label_at(to[0], 'unknown_goto'))
352
+ end
353
+ else
354
+ puts " block #{Expression[b]} with multiple to"
355
+ end
356
+ end
357
+
358
+ # cleanup di.bt_binding (we set :frameptr etc in those, this may confuse the dasm)
359
+ blocks_toclean.each { |b_, to_|
360
+ dcmp.dasm.decoded[b_].block.list.each { |di|
361
+ di.backtrace_binding = nil
362
+ }
363
+ }
364
+ end
365
+
366
+ def decompile_check_abi(dcmp, entry, func)
367
+ scope = func.initializer
368
+ @wasm_file.function_body.to_a.each { |fb|
369
+ next if fb[:init_offset] != entry
370
+ w_type = wasm_type_to_type(fb[:type])
371
+ func.type.type = w_type.type
372
+ if func.type.args.length > w_type.args.length
373
+ # detected an argument that is actually a local variable, move into func scope
374
+ while a = func.type.args.delete_at(w_type.args.length)
375
+ if a.has_attribute('unused')
376
+ scope.symbol.delete a.name
377
+ else
378
+ a.initializer = C::CExpression[0]
379
+ scope.statements[0, 0] = [C::Declaration.new(a)]
380
+ end
381
+ end
382
+ end
383
+ }
384
+ end
385
+ end
386
+ end