metasm 1.0.3 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +3 -0
  3. data.tar.gz.sig +0 -0
  4. data/Gemfile +3 -2
  5. data/metasm.gemspec +3 -2
  6. data/metasm.rb +4 -1
  7. data/metasm/compile_c.rb +2 -2
  8. data/metasm/cpu/arc/decode.rb +0 -21
  9. data/metasm/cpu/arc/main.rb +4 -4
  10. data/metasm/cpu/arm/decode.rb +1 -5
  11. data/metasm/cpu/arm/main.rb +3 -3
  12. data/metasm/cpu/arm64/decode.rb +2 -6
  13. data/metasm/cpu/arm64/main.rb +5 -5
  14. data/metasm/cpu/bpf/decode.rb +3 -35
  15. data/metasm/cpu/bpf/main.rb +5 -5
  16. data/metasm/cpu/bpf/render.rb +1 -12
  17. data/metasm/cpu/cy16/decode.rb +0 -6
  18. data/metasm/cpu/cy16/main.rb +3 -3
  19. data/metasm/cpu/cy16/render.rb +0 -11
  20. data/metasm/cpu/dalvik/decode.rb +4 -26
  21. data/metasm/cpu/dalvik/main.rb +20 -2
  22. data/metasm/cpu/dalvik/opcodes.rb +3 -2
  23. data/metasm/cpu/{mips/compile_c.rb → ebpf.rb} +5 -2
  24. data/metasm/cpu/ebpf/debug.rb +61 -0
  25. data/metasm/cpu/ebpf/decode.rb +142 -0
  26. data/metasm/cpu/ebpf/main.rb +58 -0
  27. data/metasm/cpu/ebpf/opcodes.rb +97 -0
  28. data/metasm/cpu/ebpf/render.rb +36 -0
  29. data/metasm/cpu/ia32/debug.rb +39 -1
  30. data/metasm/cpu/ia32/decode.rb +111 -90
  31. data/metasm/cpu/ia32/decompile.rb +45 -37
  32. data/metasm/cpu/ia32/main.rb +10 -0
  33. data/metasm/cpu/ia32/parse.rb +6 -0
  34. data/metasm/cpu/mcs51/decode.rb +1 -1
  35. data/metasm/cpu/mcs51/main.rb +11 -0
  36. data/metasm/cpu/mips/decode.rb +8 -18
  37. data/metasm/cpu/mips/main.rb +3 -3
  38. data/metasm/cpu/mips/opcodes.rb +1 -1
  39. data/metasm/cpu/msp430/decode.rb +2 -6
  40. data/metasm/cpu/msp430/main.rb +3 -3
  41. data/metasm/cpu/openrisc.rb +11 -0
  42. data/metasm/cpu/openrisc/debug.rb +106 -0
  43. data/metasm/cpu/openrisc/decode.rb +182 -0
  44. data/metasm/cpu/openrisc/decompile.rb +350 -0
  45. data/metasm/cpu/openrisc/main.rb +70 -0
  46. data/metasm/cpu/openrisc/opcodes.rb +109 -0
  47. data/metasm/cpu/openrisc/render.rb +37 -0
  48. data/metasm/cpu/ppc/decode.rb +0 -25
  49. data/metasm/cpu/ppc/main.rb +6 -6
  50. data/metasm/cpu/ppc/opcodes.rb +3 -4
  51. data/metasm/cpu/python/decode.rb +0 -20
  52. data/metasm/cpu/python/main.rb +1 -1
  53. data/metasm/cpu/sh4/decode.rb +2 -6
  54. data/metasm/cpu/sh4/main.rb +25 -23
  55. data/metasm/cpu/st20/decode.rb +0 -7
  56. data/metasm/cpu/webasm.rb +11 -0
  57. data/metasm/cpu/webasm/debug.rb +31 -0
  58. data/metasm/cpu/webasm/decode.rb +321 -0
  59. data/metasm/cpu/webasm/decompile.rb +386 -0
  60. data/metasm/cpu/webasm/encode.rb +104 -0
  61. data/metasm/cpu/webasm/main.rb +81 -0
  62. data/metasm/cpu/webasm/opcodes.rb +214 -0
  63. data/metasm/cpu/x86_64/compile_c.rb +13 -9
  64. data/metasm/cpu/x86_64/parse.rb +1 -1
  65. data/metasm/cpu/z80/decode.rb +0 -27
  66. data/metasm/cpu/z80/main.rb +3 -3
  67. data/metasm/cpu/z80/render.rb +0 -11
  68. data/metasm/debug.rb +43 -8
  69. data/metasm/decode.rb +62 -14
  70. data/metasm/decompile.rb +793 -466
  71. data/metasm/disassemble.rb +188 -131
  72. data/metasm/disassemble_api.rb +30 -17
  73. data/metasm/dynldr.rb +2 -2
  74. data/metasm/encode.rb +8 -2
  75. data/metasm/exe_format/autoexe.rb +2 -0
  76. data/metasm/exe_format/coff.rb +21 -3
  77. data/metasm/exe_format/coff_decode.rb +12 -0
  78. data/metasm/exe_format/coff_encode.rb +6 -3
  79. data/metasm/exe_format/dex.rb +13 -3
  80. data/metasm/exe_format/elf.rb +12 -2
  81. data/metasm/exe_format/elf_decode.rb +59 -1
  82. data/metasm/exe_format/main.rb +2 -0
  83. data/metasm/exe_format/mz.rb +1 -0
  84. data/metasm/exe_format/pe.rb +25 -3
  85. data/metasm/exe_format/wasm.rb +402 -0
  86. data/metasm/gui/dasm_decomp.rb +171 -95
  87. data/metasm/gui/dasm_graph.rb +61 -2
  88. data/metasm/gui/dasm_hex.rb +2 -2
  89. data/metasm/gui/dasm_main.rb +45 -19
  90. data/metasm/gui/debug.rb +13 -4
  91. data/metasm/gui/gtk.rb +12 -4
  92. data/metasm/main.rb +108 -103
  93. data/metasm/os/emulator.rb +175 -0
  94. data/metasm/os/main.rb +11 -6
  95. data/metasm/parse.rb +23 -12
  96. data/metasm/parse_c.rb +189 -135
  97. data/metasm/preprocessor.rb +16 -1
  98. data/misc/openrisc-parser.rb +79 -0
  99. data/samples/dasm-plugins/scanxrefs.rb +6 -4
  100. data/samples/dasm-plugins/selfmodify.rb +8 -8
  101. data/samples/dbg-plugins/trace_func.rb +1 -1
  102. data/samples/disassemble-gui.rb +14 -3
  103. data/samples/emubios.rb +251 -0
  104. data/samples/emudbg.rb +127 -0
  105. data/samples/lindebug.rb +79 -78
  106. data/samples/metasm-shell.rb +8 -8
  107. data/tests/all.rb +1 -1
  108. data/tests/expression.rb +2 -0
  109. data/tests/graph_layout.rb +1 -1
  110. data/tests/ia32.rb +1 -0
  111. data/tests/mips.rb +1 -1
  112. data/tests/preprocessor.rb +18 -0
  113. metadata +124 -6
  114. metadata.gz.sig +0 -0
@@ -0,0 +1,321 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2010 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+ require 'metasm/cpu/webasm/opcodes'
7
+ require 'metasm/decode'
8
+
9
+ module Metasm
10
+ class WebAsm
11
+ def build_bin_lookaside
12
+ lookaside = (0..0xff).inject({}) { |h, i| h.update i => [] }
13
+ opcode_list.each { |op|
14
+ lookaside[op.bin] << op
15
+ }
16
+ lookaside
17
+ end
18
+
19
+ def decode_uleb(ed, signed=false)
20
+ v = s = 0
21
+ while s < 10*7
22
+ b = ed.read(1).unpack('C').first.to_i
23
+ v |= (b & 0x7f) << s
24
+ s += 7
25
+ break if (b&0x80) == 0
26
+ end
27
+ v = Expression.make_signed(v, s) if signed
28
+ v
29
+ end
30
+
31
+ # when starting disassembly, pre-decode all instructions until the final 'end' and fixup the xrefs (if/block/loop...)
32
+ def disassemble_init_context(dasm, addr)
33
+ dasm.misc ||= {}
34
+ dasm.misc[:cpu_context] ||= {}
35
+ cache = dasm.misc[:cpu_context][:di_cache] ||= {}
36
+ addr = dasm.normalize(addr)
37
+ return dasm.misc[:cpu_context] if cache[addr]
38
+
39
+ code_start = addr
40
+ stack = [[]]
41
+ set_misc_x = lambda { |di, tg| di.misc[:x] ||= [] ; di.misc[:x] |= [tg] }
42
+ while di = dasm.disassemble_instruction(addr)
43
+ cache[addr] = di
44
+ di.misc ||= {}
45
+ di.misc[:code_start] = code_start
46
+ case di.opcode.name
47
+ when 'if', 'loop', 'block'
48
+ stack << [di]
49
+ when 'else'
50
+ raise "bad #{di} #{stack.last.inspect}" if stack.last.empty? or stack.last.last.opcode.name != 'if'
51
+ stack.last.each { |ddi| set_misc_x[ddi, di.next_addr] } # 'if' points past here
52
+ di.misc[:end_of] = stack.last[0] # store matching 'if'
53
+ stack.last[0] = di # 'else' replace 'if'
54
+ when 'br', 'br_if', 'br_table'
55
+ if di.opcode.name == 'br_table'
56
+ depths = di.instruction.args.first.ary.uniq | [di.instruction.args.first.default]
57
+ else
58
+ depths = [di.instruction.args.first.reduce]
59
+ end
60
+ depths.each { |depth|
61
+ tg = stack[-depth-1] # XXX skip if/else in the stack ?
62
+ raise "bad #{di} (#{stack.length})" if not tg
63
+ if tg.first and tg.first.opcode.name == 'loop'
64
+ set_misc_x[di, tg.first.address]
65
+ else
66
+ tg << di
67
+ end
68
+ }
69
+ when 'end'
70
+ dis = stack.pop
71
+ dis.each { |ddi| set_misc_x[ddi, di.next_addr] if ddi.opcode.name != 'loop' and ddi.opcode.name != 'block' }
72
+ if stack.empty?
73
+ # stack empty: end of func
74
+ di.opcode = @opcode_list.find { |op| op.name == 'end' and op.props[:stopexec] }
75
+ break
76
+ else
77
+ if dis.first
78
+ di.misc[:end_of] = dis.first # store matching loop/block/if
79
+ if dis.first.opcode.name == 'else'
80
+ di.misc[:end_of] = dis.first.misc[:end_of] # else patched stack.last, recover original 'if'
81
+ end
82
+ end
83
+ di.opcode = @opcode_list.find { |op| op.name == 'end' and not op.props[:stopexec] }
84
+ end
85
+ end
86
+ addr = di.next_addr
87
+ end
88
+
89
+ dasm.misc[:cpu_context]
90
+ end
91
+
92
+ # reuse the instructions from the cache
93
+ def decode_instruction_context(dasm, edata, di_addr, ctx)
94
+ ctx ||= disassemble_init_context(dasm, di_addr)
95
+ if not ctx[:di_cache][di_addr]
96
+ di_addr = dasm.normalize(di_addr)
97
+ disassemble_init_context(dasm, di_addr)
98
+ end
99
+ ctx[:di_cache][di_addr]
100
+ end
101
+
102
+ def decode_findopcode(edata)
103
+ di = DecodedInstruction.new(self)
104
+ val = edata.decode_imm(:u8, @endianness)
105
+ di if di.opcode = bin_lookaside[val].first
106
+ end
107
+
108
+ def decode_instr_op(edata, di)
109
+ before_ptr = edata.ptr
110
+ op = di.opcode
111
+ di.instruction.opname = op.name
112
+
113
+ op.args.each { |a|
114
+ di.instruction.args << case a
115
+ when :f32; Expression[edata.decode_imm(:u32, @endianness)]
116
+ when :f64; Expression[edata.decode_imm(:u64, @endianness)]
117
+ when :memoff; Memref.new(decode_uleb(edata))
118
+ when :uleb; Expression[decode_uleb(edata)]
119
+ when :sleb; Expression[decode_uleb(edata, true)]
120
+ when :blocksig; BlockSignature.new(decode_uleb(edata, true))
121
+ when :br_table; decode_br_table(edata)
122
+ else raise SyntaxError, "Internal error: invalid argument #{a} in #{op.name}"
123
+ end
124
+ }
125
+
126
+ di.bin_length = 1 + edata.ptr - before_ptr
127
+ di
128
+ end
129
+
130
+ def decode_instr_interpret(di, addr)
131
+ case di.opcode.name
132
+ when 'call'
133
+ fnr = di.instruction.args.first.reduce
134
+ di.misc ||= {}
135
+ di.misc[:tg_func_nr] = fnr
136
+ if f = @wasm_file.get_function_nr(fnr)
137
+ tg = f[:init_offset] ? f[:init_offset] : "#{f[:module]}_#{f[:field]}"
138
+ di.instruction.args[0] = Expression[tg]
139
+ di.misc[:x] = [tg]
140
+ else
141
+ di.misc[:x] = [:default]
142
+ end
143
+ when 'call_indirect'
144
+ di.misc ||= {}
145
+ di.misc[:x] = [:default]
146
+ end
147
+ di
148
+ end
149
+
150
+ def decode_br_table(edata)
151
+ count = decode_uleb(edata)
152
+ ary = []
153
+ count.times { ary << decode_uleb(edata) }
154
+ default = decode_uleb(edata)
155
+ BrTable.new(ary, default)
156
+ end
157
+
158
+ def init_backtrace_binding
159
+ @backtrace_binding ||= {}
160
+
161
+ typesz = Hash.new(8).update 'i32' => 4, 'f32' => 4
162
+ opstack = lambda { |off, sz| Indirection[Expression[:opstack, :+, off].reduce, sz] }
163
+ add_opstack = lambda { |delta, hash| { :opstack => Expression[:opstack, :+, delta].reduce }.update hash }
164
+ globsz = lambda { |di|
165
+ glob_nr = Expression[di.instruction.args.first].reduce
166
+ g = @wasm_file.get_global_nr(glob_nr)
167
+ g ? typesz[g[:type]] : 8
168
+ }
169
+ global = lambda { |di|
170
+ glob_nr = Expression[di.instruction.args.first].reduce
171
+ g = @wasm_file.get_global_nr(glob_nr)
172
+ n = g && g[:module] ? "#{g[:module]}_#{g[:field]}" : "global_#{glob_nr}"
173
+ Indirection[n, globsz[di]]
174
+ }
175
+ locsz = lambda { |di|
176
+ loc_nr = Expression[di.instruction.args.first].reduce
177
+ ci = @wasm_file.code_info[di.misc[:code_start]]
178
+ next typesz[ci[:params][loc_nr]] if loc_nr < ci[:params].length
179
+ loc_nr -= ci[:params].length
180
+ next typesz[ci[:local_var][loc_nr]] if ci[:local_var][loc_nr]
181
+ 8
182
+ }
183
+ local = lambda { |di|
184
+ loc_nr = Expression[di.instruction.args.first].reduce
185
+ Indirection[[:local_base, :+, loc_nr*8], locsz[di]]
186
+ }
187
+
188
+ opcode_list.map { |ol| ol.name }.uniq.each { |opname|
189
+ sz = (opname[1, 2] == '32' ? 4 : 8)
190
+ @backtrace_binding[opname] ||= case opname
191
+ when 'call', 'call_indirect'
192
+ lambda { |di|
193
+ stack_off = 0
194
+ if opname == 'call'
195
+ f = @wasm_file.get_function_nr(di.misc[:tg_func_nr])
196
+ proto = f ? f[:type] : {}
197
+ # TODO use local_base
198
+ h = { :callstack => Expression[:callstack, :+, 8], Indirection[:callstack, 8] => Expression[di.next_addr] }
199
+ proto_params_offset = 0
200
+ else
201
+ proto = @wasm_file.type[di.instruction.args.first.reduce]
202
+ h = { :callstack => Expression[:callstack, :+, 8], Indirection[:callstack, 8] => Expression[di.next_addr], 'func_idx' => Expression[opstack[0, 4]] }
203
+ stack_off += 8
204
+ proto_params_offset = 1
205
+ end
206
+ stack_off -= 8*proto[:ret].to_a.length
207
+ stack_off += 8*proto[:params].to_a.length
208
+ h.update :opstack => Expression[:opstack, :+, stack_off]
209
+ proto[:ret].to_a.each_with_index { |rt, i| h.update opstack[8*i, typesz[rt]] => Expression["ret_#{i}"] }
210
+ proto[:params].to_a.each_with_index { |pt, i| h.update "param_#{i}" => Expression[opstack[8*(proto[:params].length-i-1+proto_params_offset), typesz[pt]]] }
211
+ h
212
+ }
213
+ when 'if', 'br_if'; lambda { |di| add_opstack[ 8, :flag => Expression[opstack[0, 8]]] }
214
+ when 'block', 'loop', 'br', 'nop', 'else'; lambda { |di| {} }
215
+ when 'end', 'return'; lambda { |di| di.opcode.props[:stopexec] ? { :callstack => Expression[:callstack, :-, 8] } : {} }
216
+ when 'drop'; lambda { |di| add_opstack[8, {}] }
217
+ when 'select'; lambda { |di| add_opstack[16, opstack[0, 8] => Expression[[opstack[8, 8], :*, [1, :-, opstack[0, 8]]], :|, [opstack[16, 8], :*, opstack[0, 8]]]] }
218
+ when 'get_local'; lambda { |di| add_opstack[-8, opstack[0, locsz[di]] => Expression[local[di]]] }
219
+ when 'set_local'; lambda { |di| add_opstack[ 8, local[di] => Expression[opstack[0, locsz[di]]]] }
220
+ when 'tee_local'; lambda { |di| add_opstack[ 0, local[di] => Expression[opstack[0, locsz[di]]]] }
221
+ when 'get_global'; lambda { |di| add_opstack[-8, opstack[0, globsz[di]] => Expression[global[di]]] }
222
+ when 'set_global'; lambda { |di| add_opstack[ 8, global[di] => Expression[opstack[0, globsz[di]]]] }
223
+ when /\.load(.*)/
224
+ mode = $1; memsz = (mode.include?('32') ? 4 : mode.include?('16') ? 2 : mode.include?('8') ? 1 : sz)
225
+ lambda { |di| add_opstack[ 0, opstack[0, sz] => Expression[Indirection[[opstack[0, 4], :+, [:mem, :+, di.instruction.args[1].off]], memsz]]] }
226
+ when /\.store(.*)/
227
+ mode = $1; memsz = (mode.include?('32') ? 4 : mode.include?('16') ? 2 : mode.include?('8') ? 1 : sz)
228
+ lambda { |di| add_opstack[ 16, Indirection[[opstack[8, 4], :+, [:mem, :+, di.instruction.args[1].off]], memsz] => Expression[opstack[0, sz], :&, (1 << (8*memsz)) - 1]] }
229
+ when /\.const/; lambda { |di| add_opstack[-8, opstack[0, sz] => Expression[di.instruction.args.first.reduce]] }
230
+ when /\.eqz/; lambda { |di| add_opstack[ 0, opstack[0, 8] => Expression[opstack[0, sz], :==, 0]] }
231
+ when /\.eq/; lambda { |di| add_opstack[ 8, opstack[0, 8] => Expression[opstack[8, sz], :==, opstack[0, sz]]] }
232
+ when /\.ne/; lambda { |di| add_opstack[ 8, opstack[0, 8] => Expression[opstack[8, sz], :!=, opstack[0, sz]]] }
233
+ when /\.lt/; lambda { |di| add_opstack[ 8, opstack[0, 8] => Expression[opstack[8, sz], :<, opstack[0, sz]]] }
234
+ when /\.gt/; lambda { |di| add_opstack[ 8, opstack[0, 8] => Expression[opstack[8, sz], :>, opstack[0, sz]]] }
235
+ when /\.le/; lambda { |di| add_opstack[ 8, opstack[0, 8] => Expression[opstack[8, sz], :<=, opstack[0, sz]]] }
236
+ when /\.ge/; lambda { |di| add_opstack[ 8, opstack[0, 8] => Expression[opstack[8, sz], :>=, opstack[0, sz]]] }
237
+
238
+ when /\.(clz|ctz|popcnt)/; lambda { |di| add_opstack[ 0, :bits => Expression[opstack[0, sz]]] }
239
+ when /\.add/; lambda { |di| add_opstack[ 8, opstack[0, sz] => Expression[opstack[8, sz], :+, opstack[0, sz]]] }
240
+ when /\.sub/; lambda { |di| add_opstack[ 8, opstack[0, sz] => Expression[opstack[8, sz], :-, opstack[0, sz]]] }
241
+ when /\.mul/; lambda { |di| add_opstack[ 8, opstack[0, sz] => Expression[opstack[8, sz], :*, opstack[0, sz]]] }
242
+ when /\.div/; lambda { |di| add_opstack[ 8, opstack[0, sz] => Expression[opstack[8, sz], :/, opstack[0, sz]]] }
243
+ when /\.rem/; lambda { |di| add_opstack[ 8, opstack[0, sz] => Expression[opstack[8, sz], :%, opstack[0, sz]]] }
244
+ when /\.and/; lambda { |di| add_opstack[ 8, opstack[0, sz] => Expression[opstack[8, sz], :&, opstack[0, sz]]] }
245
+ when /\.or/; lambda { |di| add_opstack[ 8, opstack[0, sz] => Expression[opstack[8, sz], :|, opstack[0, sz]]] }
246
+ when /\.xor/; lambda { |di| add_opstack[ 8, opstack[0, sz] => Expression[opstack[8, sz], :^, opstack[0, sz]]] }
247
+ when /\.shl/; lambda { |di| add_opstack[ 8, opstack[0, sz] => Expression[opstack[8, sz], :<<, opstack[0, sz]]] }
248
+ when /\.shr/; lambda { |di| add_opstack[ 8, opstack[0, sz] => Expression[opstack[8, sz], :>>, opstack[0, sz]]] }
249
+ when /\.rotl/; lambda { |di| add_opstack[ 8, opstack[0, sz] => Expression[[opstack[8, sz], :<<, opstack[0, sz]], :|, [opstack[8, sz], :>>, [8*sz, :-, opstack[0, sz]]]]] }
250
+ when /\.rotr/; lambda { |di| add_opstack[ 8, opstack[0, sz] => Expression[[opstack[8, sz], :>>, opstack[0, sz]], :|, [opstack[8, sz], :<<, [8*sz, :-, opstack[0, sz]]]]] }
251
+ when /f.*\.(abs|neg|ceil|floor|trunc|nearest|sqrt|copysign)/; lambda { |di| add_opstack[0, :incomplete_binding => 1] }
252
+ when /f.*\.(min|max)/; lambda { |di| add_opstack[8, :incomplete_binding => 1] }
253
+ when /i32.wrap/; lambda { |di| add_opstack[ 0, opstack[0, 4] => Expression[opstack[0, 8]]] }
254
+ when /i64.extend/; lambda { |di| add_opstack[ 0, opstack[0, 8] => Expression[opstack[0, 4]]] }
255
+ when /trunc|convert|promote|demote|reinterpret/; lambda { |di| add_opstack[0, :incomplete_binding => 1] }
256
+ end
257
+ }
258
+
259
+ @backtrace_binding
260
+ end
261
+
262
+ def get_backtrace_binding(di)
263
+ if binding = backtrace_binding[di.opcode.name]
264
+ binding[di] || {}
265
+ else
266
+ puts "unhandled instruction to backtrace: #{di}" if $VERBOSE
267
+ {:incomplete_binding => Expression[1]}
268
+ end
269
+ end
270
+
271
+ def fix_fwdemu_binding(di, fbd)
272
+ ori = fbd
273
+ fbd = {}
274
+ ori.each { |k, v|
275
+ if k.kind_of?(Indirection) and not k.target.lexpr.kind_of?(Indirection)
276
+ # dont fixup store8 etc
277
+ fbd[k.bind(:opstack => ori[:opstack]).reduce_rec] = v
278
+ else
279
+ fbd[k] = v
280
+ end
281
+ }
282
+ fbd
283
+ end
284
+
285
+ def get_xrefs_x(dasm, di)
286
+ if di.opcode.props[:stopexec]
287
+ case di.opcode.name
288
+ when 'return', 'end'
289
+ return [Indirection[:callstack, 8]]
290
+ end
291
+ end
292
+ return [] if not di.opcode.props[:setip]
293
+
294
+ di.misc ? [di.misc[:x]].flatten : []
295
+ end
296
+
297
+ def backtrace_is_function_return(expr, di=nil)
298
+ expr and Expression[expr] == Expression[Indirection[:callstack, 8]]
299
+ end
300
+
301
+ def disassembler_default_func
302
+ df = DecodedFunction.new
303
+ ra = Indirection[:callstack, 8]
304
+ df.backtracked_for << BacktraceTrace.new(ra, :default, ra, :x, nil)
305
+ df.backtrace_binding = { :callstack => Expression[:callstack, :-, 8] }
306
+ df
307
+ end
308
+
309
+ def backtrace_update_function_binding(dasm, faddr, f, retaddrlist, *wantregs)
310
+ f.backtrace_binding = { :callstack => Expression[:callstack, :-, 8] }
311
+ end
312
+
313
+ def backtrace_is_stack_address(expr)
314
+ ([:local_base, :opstack] & Expression[expr].expr_externals).first
315
+ end
316
+
317
+ def decode_c_function_prototype(cp, sym, orig=nil)
318
+ disassembler_default_func
319
+ end
320
+ end
321
+ end
@@ -0,0 +1,386 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/cpu/webasm/main'
8
+
9
+ module Metasm
10
+ class WebAsm
11
+ def wasm_type_to_type(t)
12
+ case t
13
+ when 'i32'; C::BaseType.new(:int)
14
+ when 'i64'; C::BaseType.new(:longlong)
15
+ when 'f32'; C::BaseType.new(:float)
16
+ when 'f64'; C::BaseType.new(:double)
17
+ when 'anyfunc'; C::Function.new(C::BaseType.new(:void))
18
+ when Hash
19
+ ret = t[:ret].first ? wasm_type_to_type(t[:ret].first) : C::BaseType.new(:void)
20
+ args = t[:params].map { |p| C::Variable.new(nil, wasm_type_to_type(p)) }
21
+ C::Function.new(ret, args)
22
+ end
23
+ end
24
+
25
+ def decompile_init(dcmp)
26
+ mem = dcmp.c_parser.toplevel.symbol['mem'] = C::Variable.new('mem', C::Pointer.new(C::BaseType.new(:char)))
27
+ mem.storage = :static
28
+ dcmp.c_parser.toplevel.statements << C::Declaration.new(mem)
29
+
30
+ global_idx = 0
31
+ @wasm_file.import.to_a.each { |i|
32
+ case i[:kind]
33
+ when 'global'
34
+ global_idx += 1
35
+ var = C::Variable.new
36
+ var.name = '%s_%s' % [i[:module], i[:field]]
37
+ var.type = C::Array.new(wasm_type_to_type(i[:type]), 1)
38
+ var.storage = :extern
39
+ dcmp.c_parser.toplevel.symbol[var.name] = var
40
+ dcmp.c_parser.toplevel.statements << C::Declaration.new(var)
41
+ when 'function'
42
+ var = C::Variable.new
43
+ var.name = '%s_%s' % [i[:module], i[:field]]
44
+ var.type = wasm_type_to_type(i[:type])
45
+ var.storage = :extern
46
+ dcmp.c_parser.toplevel.symbol[var.name] = var
47
+ dcmp.c_parser.toplevel.statements << C::Declaration.new(var)
48
+ end
49
+ }
50
+
51
+ @wasm_file.global.to_a.each_with_index { |g, idx|
52
+ g_name = 'global_%d' % global_idx
53
+ global_idx += 1
54
+ var = C::Variable.new
55
+ var.name = g_name
56
+ var.type = C::Array.new(wasm_type_to_type(g[:type]), 1)
57
+ var.storage = :static
58
+ dcmp.c_parser.toplevel.symbol[var.name] = var
59
+ dcmp.c_parser.toplevel.statements << C::Declaration.new(var)
60
+
61
+ # decompile initializers
62
+ g_init_name = g_name + '_init'
63
+ dcmp.dasm.disassemble(g_init_name)
64
+ dcmp.decompile_func(g_init_name)
65
+ if init = dcmp.c_parser.toplevel.symbol[g_init_name] and init.initializer.kind_of?(C::Block) and
66
+ init.initializer.statements.first.kind_of?(C::Return)
67
+ dcmp.c_parser.toplevel.symbol[g_name].initializer = [ init.initializer.statements.first.value ]
68
+ dcmp.c_parser.toplevel.symbol.delete(g_init_name)
69
+ dcmp.c_parser.toplevel.statements.delete_if { |st| st.kind_of?(C::Declaration) and st.var.name == g_init_name }
70
+ end
71
+ }
72
+
73
+ @wasm_file.table.to_a.each_with_index { |t, idx|
74
+ break if idx > 0
75
+ t_name = 'indirect_calltable'
76
+ var = C::Variable.new
77
+ var.name = t_name
78
+ sz = t[:limits][:initial_size]
79
+ var.type = C::Array.new(C::Pointer.new(wasm_type_to_type(t[:type])), sz)
80
+ var.storage = :static
81
+ dcmp.c_parser.toplevel.symbol[var.name] = var
82
+ dcmp.c_parser.toplevel.statements << C::Declaration.new(var)
83
+ var.initializer = [C::CExpression[0]] * sz
84
+
85
+ # initializer
86
+ @wasm_file.element.to_a.each_with_index { |e, eidx|
87
+ next if e[:table_index] != idx
88
+ # address of the code that evals the index at which to place the elements inside the table
89
+ e_init_name = "element_#{eidx}_init_addr"
90
+ dcmp.dasm.disassemble(e_init_name)
91
+ dcmp.decompile_func(e_init_name)
92
+ if init = dcmp.c_parser.toplevel.symbol[e_init_name] and init.initializer.kind_of?(C::Block) and
93
+ init.initializer.statements.first.kind_of?(C::Return)
94
+ eoff = init.initializer.statements.first.value.reduce(dcmp.c_parser)
95
+ dcmp.c_parser.toplevel.symbol.delete(e_init_name)
96
+ dcmp.c_parser.toplevel.statements.delete_if { |st| st.kind_of?(C::Declaration) and st.var.name == e_init_name }
97
+ e[:elems].each_with_index { |ev, vidx|
98
+ # table 0 is the only table in a wasm file and contains a list of function indexes used with the call_indirect asm instruction
99
+ # e_init_name gives the index at which we should put e[:elems], and we convert the func indexes into C names
100
+ vidx += eoff
101
+ if vidx >= sz or vidx < 0
102
+ puts "W: initializing indirect_calltable, would put #{ev} beyond end of table (#{vidx} > #{sz})"
103
+ next
104
+ end
105
+ if not tg_func = @wasm_file.get_function_nr(ev)
106
+ puts "W: initializing indirect_calltable, bad func index #{ev}"
107
+ next
108
+ end
109
+ funcname = dcmp.dasm.get_label_at(tg_func[:init_offset]) || "func_at_#{'%x' % tg_func[:init_offset]}"
110
+ # XXX should decompile funcname now ?
111
+ var.initializer[vidx] = C::CExpression[:&, C::Variable.new(funcname)]
112
+ }
113
+ end
114
+ }
115
+ }
116
+ end
117
+
118
+ def abi_funcall
119
+ @abi_funcall ||= { :changed => [] }
120
+ end
121
+
122
+ def decompile_makestackvars(dasm, funcstart, blocks)
123
+ @decomp_mkstackvars_terminals = [:frameptr, :local_base, :mem]
124
+ oldbd = {}
125
+ oldbd[funcstart] = dasm.address_binding[funcstart]
126
+ dasm.address_binding[funcstart] = { :opstack => Expression[:frameptr] }
127
+ blocks.each { |block|
128
+ oldbd[block.address] = dasm.address_binding[block.address]
129
+ stkoff = dasm.backtrace(:opstack, block.address, :snapshot_addr => funcstart)
130
+ dasm.address_binding[block.address] = { :opstack => Expression[:frameptr, :+, stkoff[0]-:frameptr] }
131
+ yield block
132
+ # store frameptr offset at each 'end' 'return' or 'else' instruction
133
+ if di = block.list.last and %w[end return else].include?(di.opcode.name)
134
+ stkoff = dasm.backtrace(:opstack, di.address, :snapshot_addr => funcstart)
135
+ if stkoff.length == 1 and (stkoff[0] - :frameptr).kind_of?(::Integer)
136
+ di.misc[:dcmp_stackoff] = stkoff[0] - :frameptr
137
+ end
138
+ end
139
+ }
140
+ oldbd.each { |a, b| b ? dasm.address_binding[a] = b : dasm.address_binding.delete(a) }
141
+ end
142
+
143
+ def decompile_func_finddeps_di(dcmp, func, di, a, w)
144
+ end
145
+
146
+ def decompile_func_finddeps(dcmp, blocks, func)
147
+ {}
148
+ end
149
+
150
+ def decompile_blocks(dcmp, myblocks, deps, func, nextaddr = nil)
151
+ func_entry = myblocks.first[0]
152
+ if w_func = @wasm_file.function_body.find { |fb| fb[:init_offset] == func_entry }
153
+ elsif g = @wasm_file.global.find { |gg| gg[:init_offset] == func_entry }
154
+ w_func = { :local_var => [], :type => { :params => [], :ret => [g[:type]] } }
155
+ elsif (@wasm_file.element.to_a + @wasm_file.data.to_a).find { |gg| gg[:init_offset] == func_entry }
156
+ w_func = { :local_var => [], :type => { :params => [], :ret => ['i32'] } }
157
+ end
158
+ scope = func.initializer
159
+ func.type.args.each { |a| scope.symbol[a.name] = a }
160
+ stmts = scope.statements
161
+
162
+ local = []
163
+ w_func[:type][:params].each { |t|
164
+ local << C::Variable.new("arg_#{local.length}", wasm_type_to_type(t))
165
+ scope.symbol[local.last.name] = local.last
166
+ func.type.args << local.last
167
+ }
168
+ w_func[:local_var].each { |t|
169
+ local << C::Variable.new("var_#{local.length}", wasm_type_to_type(t))
170
+ scope.symbol[local.last.name] = local.last
171
+ local.last.initializer = C::CExpression[0]
172
+ stmts << C::Declaration.new(local.last)
173
+ }
174
+
175
+ opstack = {}
176
+
177
+ # *(_int32*)(local_base+16) => 16
178
+ ce_ptr_offset = lambda { |ee, base|
179
+ if ee.kind_of?(C::CExpression) and ee.op == :* and not ee.lexpr and ee.rexpr.kind_of?(C::CExpression) and
180
+ not ee.rexpr.op and ee.rexpr.rexpr.kind_of?(C::CExpression)
181
+ if not ee.rexpr.rexpr.op and ee.rexpr.rexpr.rexpr.kind_of?(C::Variable) and ee.rexpr.rexpr.rexpr.name == base
182
+ 0
183
+ elsif ee.rexpr.rexpr.lexpr.kind_of?(C::Variable) and ee.rexpr.rexpr.lexpr.name == base and
184
+ ee.rexpr.rexpr.rexpr.kind_of?(C::CExpression) and not ee.rexpr.rexpr.rexpr.op and ee.rexpr.rexpr.rexpr.rexpr.kind_of?(::Integer)
185
+ if ee.rexpr.rexpr.op == :+
186
+ ee.rexpr.rexpr.rexpr.rexpr
187
+ elsif ee.rexpr.rexpr.op == :-
188
+ -ee.rexpr.rexpr.rexpr.rexpr
189
+ end
190
+ end
191
+ end
192
+ }
193
+ opstack_idx = -1
194
+ ce_local_offset = lambda { |ee| ce_ptr_offset[ee, 'local_base'] }
195
+ ce_opstack_offset = lambda { |ee| ce_ptr_offset[ee, 'frameptr'] }
196
+
197
+ di_addr = nil
198
+
199
+ # Expr => CExpr
200
+ ce = lambda { |*e|
201
+ c_expr = dcmp.decompile_cexpr(Expression[Expression[*e].reduce], scope)
202
+ dcmp.walk_ce(c_expr, true) { |ee|
203
+ if ee.rexpr.kind_of?(::Array)
204
+ # funcall arglist
205
+ ee.rexpr.map! { |eee|
206
+ if loff = ce_local_offset[eee]
207
+ C::CExpression[local[loff/8]]
208
+ elsif soff = ce_opstack_offset[eee]
209
+ C::CExpression[opstack[-soff/8]]
210
+ else
211
+ eee
212
+ end
213
+ }
214
+ end
215
+ if loff = ce_local_offset[ee.lexpr]
216
+ ee.lexpr = local[loff/8]
217
+ end
218
+ if loff = ce_local_offset[ee.rexpr]
219
+ ee.rexpr = local[loff/8]
220
+ ee.rexpr = C::CExpression[ee.rexpr] if not ee.op and ee.type.pointer?
221
+ end
222
+ if soff = ce_opstack_offset[ee.rexpr]
223
+ # must do soff.rexpr before lexpr in case of reaffectation !
224
+ ee.rexpr = opstack[-soff/8]
225
+ ee.rexpr = C::CExpression[ee.rexpr] if not ee.op and ee.type.pointer?
226
+ end
227
+ if soff = ce_opstack_offset[ee.lexpr]
228
+ if ee.op == :'='
229
+ # affectation: create a new variable
230
+ varname = "loc_#{opstack_idx += 1}"
231
+ ne = C::Variable.new(varname, wasm_type_to_type("i#{8*dcmp.sizeof(ee.lexpr)}"))
232
+ scope.symbol[varname] = ne
233
+ stmts << C::Declaration.new(ne)
234
+ opstack[-soff/8] = ne
235
+ end
236
+ ee.lexpr = opstack[-soff/8]
237
+ end
238
+ }
239
+ ret = if loff = ce_local_offset[c_expr]
240
+ C::CExpression[local[loff/8]]
241
+ elsif soff = ce_opstack_offset[c_expr]
242
+ C::CExpression[opstack[-soff/8]]
243
+ else
244
+ c_expr
245
+ end
246
+ dcmp.walk_ce(ret) { |ee| ee.with_misc :di_addr => di_addr if di_addr }
247
+ ret
248
+ }
249
+
250
+
251
+ blocks_toclean = myblocks.dup
252
+ until myblocks.empty?
253
+ b, to = myblocks.shift
254
+ if l = dcmp.dasm.get_label_at(b)
255
+ stmts << C::Label.new(l)
256
+ end
257
+
258
+ # go !
259
+ di_list = dcmp.dasm.decoded[b].block.list.dup
260
+ di_list.each { |di|
261
+ di_addr = di.address
262
+ if di.opcode.name == 'if' or di.opcode.name == 'br_if'
263
+ n = dcmp.backtrace_target(get_xrefs_x(dcmp.dasm, di).first, di.address)
264
+ bd = get_fwdemu_binding(di)
265
+ if di.opcode.name == 'if'
266
+ cc = ce[:!, bd[:flag]]
267
+ else
268
+ cc = ce[bd[:flag]]
269
+ end
270
+ stmts << C::If.new(C::CExpression[cc], C::Goto.new(n).with_misc(:di_addr => di.address)).with_misc(:di_addr => di.address)
271
+ to.delete dcmp.dasm.normalize(n)
272
+ elsif (di.opcode.name == 'end' or di.opcode.name == 'return') and di.opcode.props[:stopexec]
273
+ fsig = w_func[:type]
274
+ rettype = wasm_type_to_type(fsig[:ret].first) if fsig[:ret] and fsig[:ret].first
275
+ if not fsig[:ret].empty?
276
+ off = di.misc[:dcmp_stackoff] || -8
277
+ ret = C::CExpression[ce[Indirection[[:frameptr, :+, off], dcmp.sizeof(rettype)]]]
278
+ end
279
+ stmts << C::Return.new(ret).with_misc(:di_addr => di.address)
280
+ elsif (di.opcode.name == 'end' or di.opcode.name == 'else') and di.misc[:dcmp_stackoff] and di.misc[:end_of]
281
+ # end of block returning a value: store the value in a real variable instead of the autogenerated local
282
+ # so that if { } else {} both update the same var
283
+ start = di.misc[:end_of]
284
+ start_rettype = start.instruction.args.first.to_s
285
+ if start_rettype != 'none'
286
+ retsz = dcmp.sizeof(wasm_type_to_type(start_rettype))
287
+ off = di.misc[:dcmp_stackoff]
288
+ if not start.misc[:dcmp_retval] or not scope.symbol[start.misc[:dcmp_retval]]
289
+ stmts << C::CExpression[ce[Indirection[[:frameptr, :+, off], retsz], :'=', Indirection[[:frameptr, :+, off], retsz]]]
290
+ start.misc[:dcmp_retval] = stmts.last.lexpr.name
291
+ else
292
+ stmts << C::CExpression[ce[scope.symbol[start.misc[:dcmp_retval]], :'=', Indirection[[:frameptr, :+, off], retsz]]]
293
+ end
294
+ end
295
+ elsif di.opcode.name == 'call'
296
+ tg = di.misc[:x].first
297
+ raise "no call target for #{di}" if not tg
298
+ tg = dcmp.dasm.auto_label_at(tg, 'sub') if dcmp.dasm.get_section_at(tg)
299
+ f = dcmp.c_parser.toplevel.symbol[tg]
300
+ raise "no global function #{tg} for #{di}" if not f
301
+
302
+ args = []
303
+ bd = get_fwdemu_binding(di)
304
+ i = 0
305
+ while bd_arg = bd["param_#{i}"]
306
+ args << ce[bd_arg]
307
+ i += 1
308
+ end
309
+ e = C::CExpression[f, :funcall, args].with_misc(:di_addr => di.address)
310
+ if bd_ret = bd.index(Expression["ret_0"])
311
+ e = ce[bd_ret, :'=', e]
312
+ end
313
+ stmts << e
314
+ elsif di.opcode.name == 'call_indirect'
315
+ args = []
316
+ bd = get_fwdemu_binding(di)
317
+ wt = @wasm_file.type[di.instruction.args.first.reduce]
318
+ fptr = C::CExpression[[dcmp.c_parser.toplevel.symbol['indirect_calltable'], :[], ce[bd['func_idx']]], wasm_type_to_type(wt)]
319
+ i = 0
320
+ while bd_arg = bd["param_#{i}"]
321
+ args << ce[bd_arg]
322
+ i += 1
323
+ end
324
+ e = C::CExpression[fptr, :funcall, args].with_misc(:di_addr => di.address)
325
+ if bd_ret = bd.index(Expression["ret_0"])
326
+ e = ce[bd_ret, :'=', e]
327
+ end
328
+ stmts << e
329
+ else
330
+ bd = get_fwdemu_binding(di)
331
+ if di.backtrace_binding[:incomplete_binding]
332
+ stmts << C::Asm.new(di.instruction.to_s, nil, nil, nil, nil, nil).with_misc(:di_addr => di.address)
333
+ else
334
+ bd.each { |k, v|
335
+ next if k == :opstack
336
+ e = ce[k, :'=', v]
337
+ stmts << e if not e.kind_of?(C::Variable) # [:eflag_s, :=, :unknown].reduce
338
+ }
339
+ end
340
+ end
341
+ di_addr = nil
342
+ }
343
+
344
+ case to.length
345
+ when 0
346
+ if not myblocks.empty? and not stmts.last.kind_of?(C::Return)
347
+ puts " block #{Expression[b]} has no to and don't end in ret"
348
+ end
349
+ when 1
350
+ if (myblocks.empty? ? nextaddr != to[0] : myblocks.first.first != to[0])
351
+ stmts << C::Goto.new(dcmp.dasm.auto_label_at(to[0], 'unknown_goto'))
352
+ end
353
+ else
354
+ puts " block #{Expression[b]} with multiple to"
355
+ end
356
+ end
357
+
358
+ # cleanup di.bt_binding (we set :frameptr etc in those, this may confuse the dasm)
359
+ blocks_toclean.each { |b_, to_|
360
+ dcmp.dasm.decoded[b_].block.list.each { |di|
361
+ di.backtrace_binding = nil
362
+ }
363
+ }
364
+ end
365
+
366
+ def decompile_check_abi(dcmp, entry, func)
367
+ scope = func.initializer
368
+ @wasm_file.function_body.to_a.each { |fb|
369
+ next if fb[:init_offset] != entry
370
+ w_type = wasm_type_to_type(fb[:type])
371
+ func.type.type = w_type.type
372
+ if func.type.args.length > w_type.args.length
373
+ # detected an argument that is actually a local variable, move into func scope
374
+ while a = func.type.args.delete_at(w_type.args.length)
375
+ if a.has_attribute('unused')
376
+ scope.symbol.delete a.name
377
+ else
378
+ a.initializer = C::CExpression[0]
379
+ scope.statements[0, 0] = [C::Declaration.new(a)]
380
+ end
381
+ end
382
+ end
383
+ }
384
+ end
385
+ end
386
+ end