metasm 1.0.3 → 1.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +3 -0
- data.tar.gz.sig +0 -0
- data/Gemfile +3 -2
- data/metasm.gemspec +3 -2
- data/metasm.rb +4 -1
- data/metasm/compile_c.rb +2 -2
- data/metasm/cpu/arc/decode.rb +0 -21
- data/metasm/cpu/arc/main.rb +4 -4
- data/metasm/cpu/arm/decode.rb +1 -5
- data/metasm/cpu/arm/main.rb +3 -3
- data/metasm/cpu/arm64/decode.rb +2 -6
- data/metasm/cpu/arm64/main.rb +5 -5
- data/metasm/cpu/bpf/decode.rb +3 -35
- data/metasm/cpu/bpf/main.rb +5 -5
- data/metasm/cpu/bpf/render.rb +1 -12
- data/metasm/cpu/cy16/decode.rb +0 -6
- data/metasm/cpu/cy16/main.rb +3 -3
- data/metasm/cpu/cy16/render.rb +0 -11
- data/metasm/cpu/dalvik/decode.rb +4 -26
- data/metasm/cpu/dalvik/main.rb +20 -2
- data/metasm/cpu/dalvik/opcodes.rb +3 -2
- data/metasm/cpu/{mips/compile_c.rb → ebpf.rb} +5 -2
- data/metasm/cpu/ebpf/debug.rb +61 -0
- data/metasm/cpu/ebpf/decode.rb +142 -0
- data/metasm/cpu/ebpf/main.rb +58 -0
- data/metasm/cpu/ebpf/opcodes.rb +97 -0
- data/metasm/cpu/ebpf/render.rb +36 -0
- data/metasm/cpu/ia32/debug.rb +39 -1
- data/metasm/cpu/ia32/decode.rb +111 -90
- data/metasm/cpu/ia32/decompile.rb +45 -37
- data/metasm/cpu/ia32/main.rb +10 -0
- data/metasm/cpu/ia32/parse.rb +6 -0
- data/metasm/cpu/mcs51/decode.rb +1 -1
- data/metasm/cpu/mcs51/main.rb +11 -0
- data/metasm/cpu/mips/decode.rb +8 -18
- data/metasm/cpu/mips/main.rb +3 -3
- data/metasm/cpu/mips/opcodes.rb +1 -1
- data/metasm/cpu/msp430/decode.rb +2 -6
- data/metasm/cpu/msp430/main.rb +3 -3
- data/metasm/cpu/openrisc.rb +11 -0
- data/metasm/cpu/openrisc/debug.rb +106 -0
- data/metasm/cpu/openrisc/decode.rb +182 -0
- data/metasm/cpu/openrisc/decompile.rb +350 -0
- data/metasm/cpu/openrisc/main.rb +70 -0
- data/metasm/cpu/openrisc/opcodes.rb +109 -0
- data/metasm/cpu/openrisc/render.rb +37 -0
- data/metasm/cpu/ppc/decode.rb +0 -25
- data/metasm/cpu/ppc/main.rb +6 -6
- data/metasm/cpu/ppc/opcodes.rb +3 -4
- data/metasm/cpu/python/decode.rb +0 -20
- data/metasm/cpu/python/main.rb +1 -1
- data/metasm/cpu/sh4/decode.rb +2 -6
- data/metasm/cpu/sh4/main.rb +25 -23
- data/metasm/cpu/st20/decode.rb +0 -7
- data/metasm/cpu/webasm.rb +11 -0
- data/metasm/cpu/webasm/debug.rb +31 -0
- data/metasm/cpu/webasm/decode.rb +321 -0
- data/metasm/cpu/webasm/decompile.rb +386 -0
- data/metasm/cpu/webasm/encode.rb +104 -0
- data/metasm/cpu/webasm/main.rb +81 -0
- data/metasm/cpu/webasm/opcodes.rb +214 -0
- data/metasm/cpu/x86_64/compile_c.rb +13 -9
- data/metasm/cpu/x86_64/parse.rb +1 -1
- data/metasm/cpu/z80/decode.rb +0 -27
- data/metasm/cpu/z80/main.rb +3 -3
- data/metasm/cpu/z80/render.rb +0 -11
- data/metasm/debug.rb +43 -8
- data/metasm/decode.rb +62 -14
- data/metasm/decompile.rb +793 -466
- data/metasm/disassemble.rb +188 -131
- data/metasm/disassemble_api.rb +30 -17
- data/metasm/dynldr.rb +2 -2
- data/metasm/encode.rb +8 -2
- data/metasm/exe_format/autoexe.rb +2 -0
- data/metasm/exe_format/coff.rb +21 -3
- data/metasm/exe_format/coff_decode.rb +12 -0
- data/metasm/exe_format/coff_encode.rb +6 -3
- data/metasm/exe_format/dex.rb +13 -3
- data/metasm/exe_format/elf.rb +12 -2
- data/metasm/exe_format/elf_decode.rb +59 -1
- data/metasm/exe_format/main.rb +2 -0
- data/metasm/exe_format/mz.rb +1 -0
- data/metasm/exe_format/pe.rb +25 -3
- data/metasm/exe_format/wasm.rb +402 -0
- data/metasm/gui/dasm_decomp.rb +171 -95
- data/metasm/gui/dasm_graph.rb +61 -2
- data/metasm/gui/dasm_hex.rb +2 -2
- data/metasm/gui/dasm_main.rb +45 -19
- data/metasm/gui/debug.rb +13 -4
- data/metasm/gui/gtk.rb +12 -4
- data/metasm/main.rb +108 -103
- data/metasm/os/emulator.rb +175 -0
- data/metasm/os/main.rb +11 -6
- data/metasm/parse.rb +23 -12
- data/metasm/parse_c.rb +189 -135
- data/metasm/preprocessor.rb +16 -1
- data/misc/openrisc-parser.rb +79 -0
- data/samples/dasm-plugins/scanxrefs.rb +6 -4
- data/samples/dasm-plugins/selfmodify.rb +8 -8
- data/samples/dbg-plugins/trace_func.rb +1 -1
- data/samples/disassemble-gui.rb +14 -3
- data/samples/emubios.rb +251 -0
- data/samples/emudbg.rb +127 -0
- data/samples/lindebug.rb +79 -78
- data/samples/metasm-shell.rb +8 -8
- data/tests/all.rb +1 -1
- data/tests/expression.rb +2 -0
- data/tests/graph_layout.rb +1 -1
- data/tests/ia32.rb +1 -0
- data/tests/mips.rb +1 -1
- data/tests/preprocessor.rb +18 -0
- metadata +124 -6
- metadata.gz.sig +0 -0
data/metasm/cpu/x86_64/parse.rb
CHANGED
@@ -49,7 +49,7 @@ class X86_64
|
|
49
49
|
|
50
50
|
# check if the argument matches the opcode's argument spec
|
51
51
|
def parse_arg_valid?(o, spec, arg)
|
52
|
-
return if arg.kind_of? ModRM and ((arg.b and arg.b.val == 16 and arg.i) or (arg.i and arg.i.val == 16 and (arg.b or arg.s != 1)))
|
52
|
+
return if arg.kind_of? ModRM and ((arg.b and arg.b.val == 16 and arg.i) or (arg.i and arg.i.val == 16 and (arg.b or arg.s != 1))) # cannot encode [rip+base], only [rip+imm]
|
53
53
|
return if arg.kind_of? Reg and arg.sz >= 32 and arg.val == 16 # eip/rip only in modrm
|
54
54
|
return if o.props[:auto64] and arg.respond_to? :sz and arg.sz == 32
|
55
55
|
# vex c4/c5
|
data/metasm/cpu/z80/decode.rb
CHANGED
@@ -151,12 +151,6 @@ class Z80
|
|
151
151
|
di
|
152
152
|
end
|
153
153
|
|
154
|
-
# hash opcode_name => lambda { |dasm, di, *symbolic_args| instr_binding }
|
155
|
-
def backtrace_binding
|
156
|
-
@backtrace_binding ||= init_backtrace_binding
|
157
|
-
end
|
158
|
-
def backtrace_binding=(b) @backtrace_binding = b end
|
159
|
-
|
160
154
|
# populate the @backtrace_binding hash with default values
|
161
155
|
def init_backtrace_binding
|
162
156
|
@backtrace_binding ||= {}
|
@@ -206,27 +200,6 @@ class Z80
|
|
206
200
|
@backtrace_binding
|
207
201
|
end
|
208
202
|
|
209
|
-
def get_backtrace_binding(di)
|
210
|
-
a = di.instruction.args.map { |arg|
|
211
|
-
case arg
|
212
|
-
when Memref, Reg; arg.symbolic(di)
|
213
|
-
else arg
|
214
|
-
end
|
215
|
-
}
|
216
|
-
|
217
|
-
if binding = backtrace_binding[di.opcode.basename]
|
218
|
-
binding[di, *a]
|
219
|
-
else
|
220
|
-
puts "unhandled instruction to backtrace: #{di}" if $VERBOSE
|
221
|
-
# assume nothing except the 1st arg is modified
|
222
|
-
case a[0]
|
223
|
-
when Indirection, Symbol; { a[0] => Expression::Unknown }
|
224
|
-
when Expression; (x = a[0].externals.first) ? { x => Expression::Unknown } : {}
|
225
|
-
else {}
|
226
|
-
end.update(:incomplete_binding => Expression[1])
|
227
|
-
end
|
228
|
-
end
|
229
|
-
|
230
203
|
# patch a forward binding from the backtrace binding
|
231
204
|
def fix_fwdemu_binding(di, fbd)
|
232
205
|
case di.opcode.name
|
data/metasm/cpu/z80/main.rb
CHANGED
@@ -26,7 +26,7 @@ class Z80 < CPU
|
|
26
26
|
@i = i
|
27
27
|
end
|
28
28
|
|
29
|
-
def symbolic(
|
29
|
+
def symbolic(di=nil) ; to_s.to_sym ; end
|
30
30
|
|
31
31
|
def self.from_str(s)
|
32
32
|
raise "Bad name #{s.inspect}" if not x = @s_to_i[s]
|
@@ -43,11 +43,11 @@ class Z80 < CPU
|
|
43
43
|
@sz = sz
|
44
44
|
end
|
45
45
|
|
46
|
-
def symbolic(
|
46
|
+
def symbolic(di=nil)
|
47
47
|
p = nil
|
48
48
|
p = Expression[p, :+, @base.symbolic] if base
|
49
49
|
p = Expression[p, :+, @offset] if offset
|
50
|
-
Indirection[p.reduce, @sz,
|
50
|
+
Indirection[p.reduce, @sz, (di.address if di)]
|
51
51
|
end
|
52
52
|
end
|
53
53
|
|
data/metasm/cpu/z80/render.rb
CHANGED
data/metasm/debug.rb
CHANGED
@@ -16,7 +16,7 @@ class Debugger
|
|
16
16
|
:state,
|
17
17
|
# type: type of breakpoint (:bpx = soft, :hwbp = hard, :bpm = memory)
|
18
18
|
:type,
|
19
|
-
# Expression if this is a
|
19
|
+
# Expression if this is a conditional bp
|
20
20
|
# may be a Proc, String or Expression, evaluated every time the breakpoint hits
|
21
21
|
# if it returns 0 or false, the breakpoint is ignored
|
22
22
|
:condition,
|
@@ -460,7 +460,7 @@ class Debugger
|
|
460
460
|
# TODO make it so this doesn't interfere with other 'real' disassembler later commands, eg disassemble() or disassemble_fast_deep()
|
461
461
|
# (right now, when they see the block already present they stop all processing)
|
462
462
|
def init_bpx_disassemble(addr)
|
463
|
-
@disassembler.
|
463
|
+
@disassembler.disassemble_fast(addr)
|
464
464
|
@disassembler.di_at(addr)
|
465
465
|
end
|
466
466
|
|
@@ -612,10 +612,10 @@ class Debugger
|
|
612
612
|
# due to a side-effect of the debugger (bpx with wrong condition etc)
|
613
613
|
# returns nil if the execution should be avoided (just deleted the dead thread/process)
|
614
614
|
def check_pre_run(run_m, *run_a)
|
615
|
-
if @dead_process
|
615
|
+
if @dead_process ||= nil
|
616
616
|
del_pid
|
617
617
|
return
|
618
|
-
elsif @dead_thread
|
618
|
+
elsif @dead_thread ||= nil
|
619
619
|
del_tid
|
620
620
|
return
|
621
621
|
elsif @state == :running
|
@@ -894,7 +894,7 @@ class Debugger
|
|
894
894
|
|
895
895
|
# checks if @breakpoint_cause is valid, or was obsoleted by the user changing pc
|
896
896
|
def check_breakpoint_cause
|
897
|
-
if bp =
|
897
|
+
if bp = breakpoint_cause and
|
898
898
|
(bp.type == :bpx or (bp.type == :hwbp and bp.internal[:type] == :x)) and
|
899
899
|
pc != bp.address
|
900
900
|
bp = @breakpoint_cause = nil
|
@@ -1274,7 +1274,7 @@ class Debugger
|
|
1274
1274
|
if i.kind_of? Indirection and p = i.pointer.reduce and p.kind_of? ::Integer
|
1275
1275
|
i.len ||= @cpu.size/8
|
1276
1276
|
p &= (1 << @cpu.size) - 1 if p < 0
|
1277
|
-
|
1277
|
+
@memory.decode_imm(p, i.len, @cpu)
|
1278
1278
|
end
|
1279
1279
|
}
|
1280
1280
|
end
|
@@ -1290,11 +1290,11 @@ class Debugger
|
|
1290
1290
|
if arg1
|
1291
1291
|
arg0 = resolve_expr(arg0) if not arg0.kind_of? ::Integer
|
1292
1292
|
arg1 = resolve_expr(arg1) if not arg1.kind_of? ::Integer
|
1293
|
-
@memory[arg0, arg1].to_str
|
1293
|
+
(@memory[arg0, arg1] || '').to_str
|
1294
1294
|
elsif arg0.kind_of? ::Range
|
1295
1295
|
arg0.begin = resolve_expr(arg0.begin) if not arg0.begin.kind_of? ::Integer # cannot happen, invalid ruby Range
|
1296
1296
|
arg0.end = resolve_expr(arg0.end) if not arg0.end.kind_of? ::Integer
|
1297
|
-
@memory[arg0].to_str
|
1297
|
+
(@memory[arg0] || '').to_str
|
1298
1298
|
else
|
1299
1299
|
get_reg_value(arg0)
|
1300
1300
|
end
|
@@ -1442,4 +1442,39 @@ class Debugger
|
|
1442
1442
|
s.length == len ? s : (s = @memory[addr, len] ? s.to_str : nil)
|
1443
1443
|
end
|
1444
1444
|
end
|
1445
|
+
|
1446
|
+
class CPU
|
1447
|
+
# return the CPU register used to store the current instruction pointer
|
1448
|
+
def dbg_register_pc
|
1449
|
+
@dbg_register_pc ||= :pc
|
1450
|
+
end
|
1451
|
+
|
1452
|
+
# return the list of CPU registers
|
1453
|
+
def dbg_register_list
|
1454
|
+
@dbg_register_list ||= [dbg_register_pc]
|
1455
|
+
end
|
1456
|
+
|
1457
|
+
# return the list of flags for the CPU
|
1458
|
+
def dbg_flag_list
|
1459
|
+
@dbg_flag_list ||= []
|
1460
|
+
end
|
1461
|
+
|
1462
|
+
# return a hash with register name => register size in bits
|
1463
|
+
def dbg_register_size
|
1464
|
+
@dbg_register_size ||= Hash.new(@size)
|
1465
|
+
end
|
1466
|
+
|
1467
|
+
# returns true if stepover is different from stepinto for this instruction
|
1468
|
+
def dbg_need_stepover(dbg, addr, di)
|
1469
|
+
di and di.opcode.props[:saveip]
|
1470
|
+
end
|
1471
|
+
|
1472
|
+
# activate a software breakpoint
|
1473
|
+
def dbg_enable_bp(dbg, bp)
|
1474
|
+
end
|
1475
|
+
|
1476
|
+
# deactivate a software breakpoint
|
1477
|
+
def dbg_disable_bp(dbg, bp)
|
1478
|
+
end
|
1479
|
+
end
|
1445
1480
|
end
|
data/metasm/decode.rb
CHANGED
@@ -17,16 +17,22 @@ class Indirection < ExpressionType
|
|
17
17
|
alias pointer target
|
18
18
|
alias pointer= target=
|
19
19
|
# length in bytes of data referenced
|
20
|
-
|
20
|
+
attr_reader :len
|
21
21
|
# address of the instruction who generated the indirection
|
22
22
|
attr_accessor :origin
|
23
23
|
|
24
24
|
def initialize(target, len, origin)
|
25
|
-
@target, @
|
25
|
+
@target, @origin = target, origin
|
26
|
+
self.len = len
|
26
27
|
end
|
27
28
|
|
28
|
-
def
|
29
|
-
|
29
|
+
def len=(len)
|
30
|
+
@len = len
|
31
|
+
@max_bits_mask ||= (1 << (len*8)) - 1 if len.kind_of?(::Integer)
|
32
|
+
end
|
33
|
+
|
34
|
+
def reduce_rec(cb=nil)
|
35
|
+
ptr = Expression[@target.reduce(&cb)]
|
30
36
|
(ptr == Expression::Unknown) ? ptr : Indirection.new(ptr, @len, @origin)
|
31
37
|
end
|
32
38
|
|
@@ -180,10 +186,14 @@ class Expression
|
|
180
186
|
end
|
181
187
|
|
182
188
|
class CPU
|
189
|
+
def bin_lookaside
|
190
|
+
@bin_lookaside ||= build_bin_lookaside
|
191
|
+
end
|
192
|
+
|
183
193
|
# decodes the instruction at edata.ptr, mapped at virtual address off
|
184
194
|
# returns a DecodedInstruction or nil
|
185
195
|
def decode_instruction(edata, addr)
|
186
|
-
|
196
|
+
bin_lookaside
|
187
197
|
di = decode_findopcode edata if edata.ptr <= edata.length
|
188
198
|
di.address = addr if di
|
189
199
|
di = decode_instr_op(edata, di) if di
|
@@ -207,6 +217,18 @@ class CPU
|
|
207
217
|
di
|
208
218
|
end
|
209
219
|
|
220
|
+
# return a symbolic representation of an instruction argument (eg Reg[0] => :eax)
|
221
|
+
def symbolic(arg, di=nil)
|
222
|
+
case arg
|
223
|
+
when ExpressionType
|
224
|
+
arg
|
225
|
+
when Integer
|
226
|
+
Expression[arg]
|
227
|
+
else
|
228
|
+
arg.symbolic(di)
|
229
|
+
end
|
230
|
+
end
|
231
|
+
|
210
232
|
# number of instructions following a jump that are still executed
|
211
233
|
def delay_slot(di=nil)
|
212
234
|
0
|
@@ -216,24 +238,50 @@ class CPU
|
|
216
238
|
DecodedFunction.new
|
217
239
|
end
|
218
240
|
|
241
|
+
# hash opcode_name => lambda { |dasm, di, *symbolic_args| instr_binding }
|
242
|
+
def backtrace_binding
|
243
|
+
@backtrace_binding ||= init_backtrace_binding
|
244
|
+
end
|
245
|
+
def backtrace_binding=(b) @backtrace_binding = b end
|
246
|
+
|
247
|
+
# return the backtrace binding for a specific di
|
248
|
+
def get_backtrace_binding(di)
|
249
|
+
a = di.instruction.args.map { |arg| symbolic(arg, di) }
|
250
|
+
|
251
|
+
if binding = backtrace_binding[di.opcode.name]
|
252
|
+
binding[di, *a]
|
253
|
+
else
|
254
|
+
puts "unhandled instruction to backtrace: #{di}" if $VERBOSE
|
255
|
+
{:incomplete_binding => Expression[1]}
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
219
259
|
# return something like backtrace_binding in the forward direction
|
220
260
|
# set pc_reg to some reg name (eg :pc) to include effects on the instruction pointer
|
221
|
-
|
222
|
-
|
223
|
-
|
261
|
+
# pass a debugger to allow reading the context and actually resolve the next pc in case of conditional jumps
|
262
|
+
def get_fwdemu_binding(di, pc_reg=nil, dbg_ctx=nil)
|
263
|
+
fbd = di.backtrace_binding ||= get_backtrace_binding(di)
|
264
|
+
fbd = fix_fwdemu_binding(di, fbd)
|
224
265
|
if pc_reg
|
266
|
+
n_a = Expression[pc_reg, :+, di.bin_length]
|
225
267
|
if di.opcode.props[:setip]
|
226
|
-
xr = get_xrefs_x(nil, di)
|
227
|
-
|
228
|
-
|
268
|
+
xr = get_xrefs_x(nil, di).to_a
|
269
|
+
xr |= [n_a] if not di.opcode.props[:stopexec]
|
270
|
+
if xr.length == 1
|
271
|
+
fbd[pc_reg] = xr[0]
|
229
272
|
else
|
230
|
-
|
273
|
+
dbg_resolve_pc(di, fbd, pc_reg, dbg_ctx)
|
231
274
|
end
|
232
275
|
else
|
233
|
-
|
276
|
+
fbd[pc_reg] = Expression[pc_reg, :+, di.bin_length]
|
234
277
|
end
|
235
278
|
end
|
236
|
-
|
279
|
+
fbd
|
280
|
+
end
|
281
|
+
|
282
|
+
# resolve the program counter following a conditional jump using a debugging context
|
283
|
+
def dbg_resolve_pc(di, fbd, pc_reg, dbg_ctx)
|
284
|
+
fbd[:incomplete_binding] = Expression[1]
|
237
285
|
end
|
238
286
|
|
239
287
|
# patch a forward binding from the backtrace binding
|
data/metasm/decompile.rb
CHANGED
@@ -13,24 +13,19 @@ class C::Variable; attr_accessor :stackoff; end
|
|
13
13
|
class C::Block; attr_accessor :decompdata; end
|
14
14
|
class DecodedFunction; attr_accessor :decompdata; end
|
15
15
|
|
16
|
-
class CPU
|
17
|
-
def decompile_check_abi(dcmp, entry, func)
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
16
|
class Decompiler
|
22
|
-
# TODO add methods to C::CExpr
|
23
|
-
AssignOp = [:'=', :'+=', :'-=', :'*=', :'/=', :'%=', :'^=', :'&=', :'|=', :'>>=', :'<<=', :'++', :'--']
|
24
|
-
|
25
17
|
attr_accessor :dasm, :c_parser
|
26
|
-
attr_accessor :forbid_optimize_dataflow, :forbid_optimize_code, :forbid_decompile_ifwhile, :forbid_decompile_types, :forbid_optimize_labels
|
18
|
+
attr_accessor :forbid_optimize_dataflow, :forbid_optimize_code, :forbid_decompile_ifwhile, :forbid_decompile_types, :forbid_optimize_labels, :forbid_all_optimizations
|
27
19
|
# recursive flag: for each subfunction, recurse is decremented, when 0 only the prototype is decompiled, when <0 nothing is done
|
28
20
|
attr_accessor :recurse
|
29
21
|
|
22
|
+
def disassembler ; dasm ; end
|
23
|
+
|
30
24
|
def initialize(dasm, cp = dasm.c_parser)
|
31
25
|
@dasm = dasm
|
32
26
|
@recurse = 1/0.0 # Infinity
|
33
27
|
@c_parser = cp || @dasm.cpu.new_cparser
|
28
|
+
@dasm.cpu.decompile_init(self) if @dasm.cpu.respond_to?(:decompile_init)
|
34
29
|
end
|
35
30
|
|
36
31
|
# decompile recursively function from an entrypoint, then perform global optimisation (static vars, ...)
|
@@ -103,6 +98,7 @@ class Decompiler
|
|
103
98
|
myblocks = listblocks_func(entry)
|
104
99
|
|
105
100
|
# [esp+8] => [:frameptr-12]
|
101
|
+
# TODO slow
|
106
102
|
makestackvars entry, myblocks.map { |b, to| @dasm.decoded[b].block }
|
107
103
|
|
108
104
|
# find registry dependencies between blocks
|
@@ -110,24 +106,31 @@ class Decompiler
|
|
110
106
|
|
111
107
|
scope = func.initializer = C::Block.new(@c_parser.toplevel)
|
112
108
|
if df = @dasm.function[entry]
|
113
|
-
scope.decompdata = df.decompdata ||= {:
|
109
|
+
scope.decompdata = df.decompdata ||= {:unalias_type => {}, :unalias_name => {}}
|
114
110
|
else
|
115
|
-
scope.decompdata ||= {:
|
111
|
+
scope.decompdata ||= {:unalias_type => {}, :unalias_name => {}}
|
116
112
|
end
|
117
113
|
|
118
114
|
# di blocks => raw c statements, declare variables
|
119
115
|
@dasm.cpu.decompile_blocks(self, myblocks, deps, func)
|
116
|
+
puts "dcmp debug #{func.name} {", scope, '}' if $DEBUG
|
117
|
+
|
118
|
+
return if forbid_all_optimizations
|
120
119
|
|
121
120
|
simplify_goto(scope)
|
122
121
|
namestackvars(scope)
|
123
122
|
unalias_vars(scope, func)
|
124
123
|
decompile_c_types(scope)
|
125
|
-
|
124
|
+
optimize_code(scope)
|
125
|
+
optimize_vars(scope)
|
126
|
+
optimize_vars(scope) # 1st run may transform i = i+1 into i++ which second run may coalesce into if(i)
|
126
127
|
remove_unreferenced_vars(scope)
|
128
|
+
decompile_c_types_again(scope)
|
127
129
|
cleanup_var_decl(scope, func)
|
128
130
|
if @recurse > 0
|
129
131
|
decompile_controlseq(scope)
|
130
132
|
optimize_vars(scope)
|
133
|
+
optimize_code(scope)
|
131
134
|
optimize_ctrl(scope)
|
132
135
|
optimize_vars(scope)
|
133
136
|
remove_unreferenced_vars(scope)
|
@@ -143,7 +146,7 @@ class Decompiler
|
|
143
146
|
scope.statements.pop
|
144
147
|
else
|
145
148
|
v = ret.value
|
146
|
-
v = v.rexpr if v.kind_of?
|
149
|
+
v = v.rexpr if v.kind_of?(C::CExpression) and not v.op and v.rexpr.kind_of?(C::Typed)
|
147
150
|
func.type.type = v.type
|
148
151
|
end
|
149
152
|
end
|
@@ -156,21 +159,21 @@ class Decompiler
|
|
156
159
|
|
157
160
|
# redecompile a function, redecompiles functions calling it if its prototype changed
|
158
161
|
def redecompile(name)
|
159
|
-
@c_parser.toplevel.statements.delete_if { |st| st.kind_of?
|
162
|
+
@c_parser.toplevel.statements.delete_if { |st| st.kind_of?(C::Declaration) and st.var.name == name }
|
160
163
|
oldvar = @c_parser.toplevel.symbol.delete name
|
161
164
|
|
162
165
|
decompile_func(name)
|
163
166
|
|
164
|
-
if oldvar and newvar = @c_parser.toplevel.symbol[name] and oldvar.type.kind_of?
|
167
|
+
if oldvar and newvar = @c_parser.toplevel.symbol[name] and oldvar.type.kind_of?(C::Function) and newvar.type.kind_of?(C::Function)
|
165
168
|
o, n = oldvar.type, newvar.type
|
166
169
|
if o.type != n.type or o.args.to_a.length != n.args.to_a.length or o.args.to_a.zip(n.args.to_a).find { |oa, na| oa.type != na.type }
|
167
170
|
# XXX a may depend on b and c, and b may depend on c -> redecompile c twice
|
168
171
|
# XXX if the dcmp is unstable, may also infinite loop on mutually recursive funcs..
|
169
172
|
@c_parser.toplevel.statements.dup.each { |st|
|
170
|
-
next if not st.kind_of?
|
173
|
+
next if not st.kind_of?(C::Declaration)
|
171
174
|
next if not st.var.initializer
|
172
175
|
next if st.var.name == name
|
173
|
-
next if not walk_ce(st) { |ce| break true if ce.op == :funcall and ce.lexpr.kind_of?
|
176
|
+
next if not walk_ce(st) { |ce| break true if ce.op == :funcall and ce.lexpr.kind_of?(C::Variable) and ce.lexpr.name == name }
|
174
177
|
redecompile(st.var.name)
|
175
178
|
}
|
176
179
|
end
|
@@ -181,38 +184,39 @@ class Decompiler
|
|
181
184
|
addr = @dasm.normalize(addr)
|
182
185
|
|
183
186
|
# (almost) NULL ptr
|
184
|
-
return if addr.kind_of?
|
187
|
+
return if addr.kind_of?(Integer) and addr >= 0 and addr < 32
|
185
188
|
|
186
189
|
# check preceding structure we're hitting
|
187
190
|
# TODO check what we step over when defining a new static struct
|
188
191
|
0x100.times { |i_|
|
189
192
|
next if not n = @dasm.get_label_at(addr-i_)
|
190
193
|
next if not v = @c_parser.toplevel.symbol[n]
|
191
|
-
next if not v.type.pointer? or not v.type.pointed.untypedef.kind_of?
|
194
|
+
next if not v.type.pointer? or not v.type.pointed.untypedef.kind_of?(C::Union)
|
192
195
|
break if i_ == 0 # XXX it crashes later if we dont break here
|
193
196
|
next if sizeof(v.type.pointed) <= i_
|
194
197
|
return structoffset(v.type.pointed.untypedef, C::CExpression[v], i_, nil)
|
195
198
|
}
|
196
199
|
|
197
200
|
ptype = type.pointed.untypedef if type.pointer?
|
198
|
-
if ptype.kind_of?
|
201
|
+
if ptype.kind_of?(C::Function)
|
199
202
|
name = @dasm.auto_label_at(addr, 'sub', 'xref', 'byte', 'word', 'dword', 'unk')
|
200
203
|
if @dasm.get_section_at(addr) and @recurse > 0
|
201
204
|
puts "found function pointer to #{name}" if $VERBOSE
|
202
205
|
@dasm.disassemble(addr) if not @dasm.decoded[addr] # TODO disassemble_fast ?
|
203
206
|
f = @dasm.function[addr] ||= DecodedFunction.new
|
204
207
|
# TODO detect thunks (__noreturn)
|
205
|
-
f.decompdata ||= { :
|
206
|
-
if not s = @c_parser.toplevel.symbol[name] or not s.initializer or not s.type.untypedef.kind_of?
|
208
|
+
f.decompdata ||= { :unalias_type => {}, :unalias_name => {} }
|
209
|
+
if not s = @c_parser.toplevel.symbol[name] or not s.initializer or not s.type.untypedef.kind_of?(C::Function)
|
207
210
|
os = @c_parser.toplevel.symbol.delete name
|
208
|
-
@c_parser.toplevel.statements.delete_if { |ts| ts.kind_of?
|
209
|
-
aoff = 1
|
210
|
-
ptype.args.to_a.each { |a|
|
211
|
-
|
212
|
-
|
213
|
-
f.decompdata[:
|
214
|
-
aoff
|
215
|
-
|
211
|
+
@c_parser.toplevel.statements.delete_if { |ts| ts.kind_of?(C::Declaration) and ts.var.name == name }
|
212
|
+
#aoff = 1
|
213
|
+
#ptype.args.to_a.each { |a|
|
214
|
+
# TODO
|
215
|
+
#aoff = (aoff + @c_parser.typesize[:ptr] - 1) / @c_parser.typesize[:ptr] * @c_parser.typesize[:ptr]
|
216
|
+
#f.decompdata[:unalias_type][aoff] ||= a.type
|
217
|
+
#f.decompdata[:unalias_name][aoff] ||= a.name if a.name
|
218
|
+
#aoff += sizeof(a) # ary ?
|
219
|
+
#}
|
216
220
|
decompile_func_rec(addr)
|
217
221
|
s = @c_parser.toplevel.symbol[name]
|
218
222
|
walk_ce([@c_parser.toplevel, scope]) { |ce|
|
@@ -230,7 +234,7 @@ class Decompiler
|
|
230
234
|
when 4; 'dword'
|
231
235
|
else 'unk'
|
232
236
|
end
|
233
|
-
name = 'stru' if ptype.kind_of?
|
237
|
+
name = 'stru' if ptype.kind_of?(C::Union)
|
234
238
|
name = @dasm.auto_label_at(addr, name, 'xref', 'byte', 'word', 'dword', 'unk', 'stru')
|
235
239
|
|
236
240
|
if not var = @c_parser.toplevel.symbol[name]
|
@@ -240,28 +244,28 @@ class Decompiler
|
|
240
244
|
@c_parser.toplevel.symbol[var.name] = var
|
241
245
|
@c_parser.toplevel.statements << C::Declaration.new(var)
|
242
246
|
end
|
243
|
-
if ptype.kind_of?
|
247
|
+
if ptype.kind_of?(C::Union) and type.pointer? and s = @dasm.get_section_at(name) and s[0].ptr < s[0].length
|
244
248
|
# TODO struct init, array, fptrs..
|
245
|
-
elsif type.pointer? and not type.pointed.untypedef.kind_of?
|
246
|
-
[1, 2, 4].include?
|
249
|
+
elsif type.pointer? and not type.pointed.untypedef.kind_of?(C::Function) and s = @dasm.get_section_at(name) and s[0].ptr < s[0].length and
|
250
|
+
[1, 2, 4].include?(tsz) and (not var.type.pointer? or sizeof(var.type.pointed) != sizeof(type.pointed) or not var.initializer)
|
247
251
|
# TODO do not overlap other statics (but labels may refer to elements of the array...)
|
248
252
|
data = (0..256).map {
|
249
253
|
v = s[0].decode_imm("u#{tsz*8}".to_sym, @dasm.cpu.endianness)
|
250
|
-
v = decompile_cexpr(v, @c_parser.toplevel) if v.kind_of?
|
254
|
+
v = decompile_cexpr(v, @c_parser.toplevel) if v.kind_of?(Expression) # relocation
|
251
255
|
v
|
252
256
|
}
|
253
257
|
var.initializer = data.map { |v| C::CExpression[v, C::BaseType.new(:int)] } unless (data - [0]).empty?
|
254
|
-
if (tsz == 1 or tsz == 2) and eos = data.index(0) and (0..3).all? { |i| data[i] >= 0x20 and data[i] < 0x7f } # printable str
|
258
|
+
if (tsz == 1 or tsz == 2) and eos = data.index(0) and ((0..3).all? { |i| data[i] >= 0x20 and data[i] < 0x7f } or ptype.to_s == '(char)') # printable str
|
255
259
|
# XXX 0x80 with ruby1.9...
|
256
260
|
var.initializer = C::CExpression[data[0, eos].pack('C*'), C::Pointer.new(ptype)] rescue nil
|
257
261
|
end
|
258
|
-
if var.initializer.kind_of?
|
259
|
-
i.rexpr.type.kind_of?
|
262
|
+
if var.initializer.kind_of?(::Array) and i = var.initializer.first and i.kind_of?(C::CExpression) and not i.op and i.rexpr.kind_of?(C::Variable) and
|
263
|
+
i.rexpr.type.kind_of?(C::Function) and not @dasm.get_section_at(@dasm.normalize(i.rexpr.name)) # iat_ExternalFunc
|
260
264
|
i.type = i.rexpr.type
|
261
265
|
type = var.type = C::Array.new(C::Pointer.new(i.type))
|
262
266
|
var.initializer = [i]
|
263
267
|
end
|
264
|
-
var.initializer = nil if var.initializer.kind_of?
|
268
|
+
var.initializer = nil if var.initializer.kind_of?(::Array) and not type.untypedef.kind_of?(C::Array)
|
265
269
|
end
|
266
270
|
|
267
271
|
# TODO patch existing references to addr ? (or would they have already triggered new_global_var?)
|
@@ -285,7 +289,7 @@ class Decompiler
|
|
285
289
|
next if type == :indirect
|
286
290
|
ta = dasm.normalize ta
|
287
291
|
if type != :subfuncret and not @dasm.function[ta] and
|
288
|
-
(not @dasm.function[entry] or @autofuncs.include?
|
292
|
+
(not @dasm.function[entry] or @autofuncs.include?(entry)) and
|
289
293
|
di.block.list.last.opcode.props[:saveip]
|
290
294
|
# possible noreturn function
|
291
295
|
# XXX call $+5; pop eax
|
@@ -317,7 +321,7 @@ class Decompiler
|
|
317
321
|
return expr if n == Expression::Unknown
|
318
322
|
n = Expression[n].reduce_rec
|
319
323
|
n = @dasm.get_label_at(n) || n
|
320
|
-
n = $1 if n.kind_of?
|
324
|
+
n = $1 if n.kind_of?(::String) and n =~ /^thunk_(.*)/
|
321
325
|
n
|
322
326
|
else
|
323
327
|
expr
|
@@ -329,27 +333,29 @@ class Decompiler
|
|
329
333
|
blockstart = nil
|
330
334
|
cache_di = nil
|
331
335
|
cache = {} # [i_s, e, type] => backtrace
|
332
|
-
|
336
|
+
@decomp_mkstackvars_terminals ||= [:frameptr]
|
337
|
+
tovar = lambda { |di, e, i_s_c|
|
338
|
+
i_s = (i_s_c > 0)
|
333
339
|
case e
|
334
|
-
when Expression; Expression[tovar[di, e.lexpr,
|
335
|
-
when Indirection; Indirection[tovar[di, e.target,
|
340
|
+
when Expression; Expression[tovar[di, e.lexpr, i_s_c], e.op, tovar[di, e.rexpr, i_s_c]].reduce
|
341
|
+
when Indirection; Indirection[tovar[di, e.target, i_s_c-1], e.len, e.origin]
|
336
342
|
when :frameptr; e
|
337
343
|
when ::Symbol
|
338
344
|
cache.clear if cache_di != di ; cache_di = di
|
339
345
|
vals = cache[[e, i_s, 0]] ||= @dasm.backtrace(e, di.address, :snapshot_addr => blockstart,
|
340
|
-
:include_start => i_s, :no_check => true, :terminals =>
|
346
|
+
:include_start => i_s, :no_check => true, :terminals => @decomp_mkstackvars_terminals)
|
341
347
|
# backtrace only to blockstart first
|
342
|
-
if vals.length == 1 and ee = vals.first and ee.kind_of?
|
343
|
-
(ee.lexpr == :frameptr and ee.op == :+ and ee.rexpr.kind_of?
|
344
|
-
(not ee.lexpr and ee.op == :+ and ee.rexpr.kind_of?
|
345
|
-
(eep == Expression[:frameptr] or (eep.lexpr == :frameptr and eep.op == :+ and eep.rexpr.kind_of?
|
348
|
+
if vals.length == 1 and ee = vals.first and ee.kind_of?(Expression) and (ee == Expression[:frameptr] or
|
349
|
+
(ee.lexpr == :frameptr and ee.op == :+ and ee.rexpr.kind_of?(::Integer)) or
|
350
|
+
(not ee.lexpr and ee.op == :+ and ee.rexpr.kind_of?(Indirection) and eep = ee.rexpr.pointer and
|
351
|
+
(eep == Expression[:frameptr] or (eep.lexpr == :frameptr and eep.op == :+ and eep.rexpr.kind_of?(::Integer)))))
|
346
352
|
ee
|
347
353
|
else
|
348
354
|
# fallback on full run (could restart from blockstart with ee, but may reevaluate addr_binding..
|
349
355
|
vals = cache[[e, i_s, 1]] ||= @dasm.backtrace(e, di.address, :snapshot_addr => funcstart,
|
350
|
-
:include_start => i_s, :no_check => true, :terminals =>
|
351
|
-
if vals.length == 1 and ee = vals.first and (ee.kind_of?
|
352
|
-
(ee.lexpr == :frameptr and ee.op == :+ and ee.rexpr.kind_of?
|
356
|
+
:include_start => i_s, :no_check => true, :terminals => @decomp_mkstackvars_terminals)
|
357
|
+
if vals.length == 1 and ee = vals.first and (ee.kind_of?(Expression) and (ee == Expression[:frameptr] or
|
358
|
+
(ee.lexpr == :frameptr and ee.op == :+ and ee.rexpr.kind_of?(::Integer))))
|
353
359
|
ee
|
354
360
|
else e
|
355
361
|
end
|
@@ -366,9 +372,9 @@ class Decompiler
|
|
366
372
|
bd = di.backtrace_binding ||= @dasm.cpu.get_backtrace_binding(di)
|
367
373
|
newbd = repl_bind[di] = {}
|
368
374
|
bd.each { |k, v|
|
369
|
-
k = tovar[di, k,
|
370
|
-
next if k == Expression[:frameptr] or (k.kind_of?
|
371
|
-
newbd[k] = tovar[di, v,
|
375
|
+
k = tovar[di, k, 2] if k.kind_of?(Indirection)
|
376
|
+
next if k == Expression[:frameptr] or (k.kind_of?(Expression) and k.lexpr == :frameptr and k.op == :+ and k.rexpr.kind_of?( ::Integer))
|
377
|
+
newbd[k] = tovar[di, v, 0]
|
372
378
|
}
|
373
379
|
}
|
374
380
|
}
|
@@ -391,9 +397,9 @@ class Decompiler
|
|
391
397
|
def decompile_cexpr(e, scope, itype=nil)
|
392
398
|
case e
|
393
399
|
when Expression
|
394
|
-
if e.op == :'=' and e.lexpr.kind_of?
|
400
|
+
if e.op == :'=' and e.lexpr.kind_of?(::String) and e.lexpr =~ /^dummy_metasm_/
|
395
401
|
decompile_cexpr(e.rexpr, scope, itype)
|
396
|
-
elsif e.op == :+ and e.rexpr.kind_of?
|
402
|
+
elsif e.op == :+ and e.rexpr.kind_of?(::Integer) and e.rexpr < 0
|
397
403
|
decompile_cexpr(Expression[e.lexpr, :-, -e.rexpr], scope, itype)
|
398
404
|
elsif e.lexpr
|
399
405
|
a = decompile_cexpr(e.lexpr, scope, itype)
|
@@ -414,7 +420,7 @@ class Decompiler
|
|
414
420
|
end
|
415
421
|
itype = C::Pointer.new(bt)
|
416
422
|
p = decompile_cexpr(e.target, scope, itype)
|
417
|
-
p = C::CExpression[[p], itype]
|
423
|
+
p = C::CExpression[[p], itype]
|
418
424
|
C::CExpression[:*, p]
|
419
425
|
when ::Integer
|
420
426
|
C::CExpression[e]
|
@@ -442,17 +448,17 @@ class Decompiler
|
|
442
448
|
|
443
449
|
# simplify goto -> goto / goto -> return
|
444
450
|
def simplify_goto(scope, keepret = false)
|
445
|
-
if not keepret and scope.statements[-1].kind_of?
|
451
|
+
if not keepret and scope.statements[-1].kind_of?(C::Return) and not scope.statements[-2].kind_of?(C::Label)
|
446
452
|
scope.statements.insert(-2, C::Label.new("ret_label"))
|
447
453
|
end
|
448
454
|
|
449
455
|
jumpto = {}
|
450
456
|
walk(scope) { |s|
|
451
|
-
next if not s.kind_of?
|
457
|
+
next if not s.kind_of?(C::Block)
|
452
458
|
s.statements.each_with_index { |ss, i|
|
453
459
|
case ss
|
454
460
|
when C::Goto, C::Return
|
455
|
-
while l = s.statements[i -= 1] and l.kind_of?
|
461
|
+
while l = s.statements[i -= 1] and l.kind_of?(C::Label)
|
456
462
|
jumpto[l.name] = ss
|
457
463
|
end
|
458
464
|
end
|
@@ -464,11 +470,11 @@ class Decompiler
|
|
464
470
|
when C::Goto
|
465
471
|
if jumpto[s.target]
|
466
472
|
r = jumpto[s.target].dup
|
467
|
-
r.value = r.value.deep_dup if r.kind_of?
|
473
|
+
r.value = r.value.deep_dup if r.kind_of?(C::Return) and r.value.kind_of?(C::CExpression)
|
468
474
|
r
|
469
475
|
end
|
470
476
|
when C::Return
|
471
|
-
if not keepret and scope.statements[-1].kind_of?
|
477
|
+
if not keepret and scope.statements[-1].kind_of?(C::Return) and s.value == scope.statements[-1].value and s != scope.statements[-1]
|
472
478
|
C::Goto.new(scope.statements[-2].name)
|
473
479
|
end
|
474
480
|
end
|
@@ -493,7 +499,7 @@ class Decompiler
|
|
493
499
|
remove_labels(scope)
|
494
500
|
|
495
501
|
walk(scope) { |s|
|
496
|
-
next if not s.kind_of?
|
502
|
+
next if not s.kind_of?(C::Block)
|
497
503
|
del = false
|
498
504
|
# remove dead code goto a; goto b; if (0) { z: bla; } => rm goto b
|
499
505
|
s.statements.delete_if { |st|
|
@@ -508,10 +514,10 @@ class Decompiler
|
|
508
514
|
}
|
509
515
|
# if () { goto x; } x:
|
510
516
|
s.statements.each_with_index { |ss, i|
|
511
|
-
if ss.kind_of?
|
517
|
+
if ss.kind_of?(C::If)
|
512
518
|
t = ss.bthen
|
513
|
-
t = t.statements.first if t.kind_of?
|
514
|
-
if t.kind_of?
|
519
|
+
t = t.statements.first if t.kind_of?(C::Block)
|
520
|
+
if t.kind_of?(C::Goto) and s.statements[i+1].kind_of?(C::Label) and s.statements[i+1].name == t.target
|
515
521
|
ss.bthen = C::Block.new(scope)
|
516
522
|
end
|
517
523
|
end
|
@@ -548,8 +554,8 @@ class Decompiler
|
|
548
554
|
e.each_with_index { |st, i|
|
549
555
|
case st
|
550
556
|
when C::While, C::DoWhile
|
551
|
-
l1 = (e[i+1].name if e[i+1].kind_of?
|
552
|
-
l2 = (e[i-1].name if e[i-1].kind_of?
|
557
|
+
l1 = (e[i+1].name if e[i+1].kind_of?(C::Label))
|
558
|
+
l2 = (e[i-1].name if e[i-1].kind_of?(C::Label))
|
553
559
|
e[i].body = walk[st.body, l1, l2]
|
554
560
|
else
|
555
561
|
e[i] = walk[st, brk, cnt]
|
@@ -578,20 +584,20 @@ class Decompiler
|
|
578
584
|
remove_labels(scope)
|
579
585
|
|
580
586
|
# while (1) { a; if(b) { c; return; }; d; } => while (1) { a; if (b) break; d; } c;
|
581
|
-
while st = scope.statements.last and st.kind_of?
|
582
|
-
not st.test.op and st.test.rexpr == 1 and st.body.kind_of?
|
587
|
+
while st = scope.statements.last and st.kind_of?(C::While) and st.test.kind_of?(C::CExpression) and
|
588
|
+
not st.test.op and st.test.rexpr == 1 and st.body.kind_of?(C::Block)
|
583
589
|
break if not i = st.body.statements.find { |ist|
|
584
|
-
ist.kind_of?
|
590
|
+
ist.kind_of?(C::If) and not ist.belse and ist.bthen.kind_of?(C::Block) and ist.bthen.statements.last.kind_of?(C::Return)
|
585
591
|
}
|
586
|
-
walk(i.bthen.statements) { |sst| sst.outer = i.bthen.outer if sst.kind_of?
|
592
|
+
walk(i.bthen.statements) { |sst| sst.outer = i.bthen.outer if sst.kind_of?(C::Block) and sst.outer == i.bthen }
|
587
593
|
scope.statements.concat i.bthen.statements
|
588
594
|
i.bthen = C::Break.new
|
589
595
|
end
|
590
596
|
|
591
597
|
patch_test = lambda { |ce|
|
592
|
-
ce = ce.rexpr if ce.kind_of?
|
598
|
+
ce = ce.rexpr if ce.kind_of?(C::CExpression) and ce.op == :'!'
|
593
599
|
# if (a+1) => if (a != -1)
|
594
|
-
if ce.kind_of?
|
600
|
+
if ce.kind_of?(C::CExpression) and (ce.op == :+ or ce.op == :-) and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr.kind_of?(::Integer) and ce.lexpr
|
595
601
|
ce.rexpr.rexpr = -ce.rexpr.rexpr if ce.op == :+
|
596
602
|
ce.op = :'!='
|
597
603
|
end
|
@@ -601,10 +607,10 @@ class Decompiler
|
|
601
607
|
case ce
|
602
608
|
when C::If
|
603
609
|
patch_test[ce.test]
|
604
|
-
if ce.bthen.kind_of?
|
610
|
+
if ce.bthen.kind_of?(C::Block)
|
605
611
|
case ce.bthen.statements.length
|
606
612
|
when 1
|
607
|
-
walk(ce.bthen.statements) { |sst| sst.outer = ce.bthen.outer if sst.kind_of?
|
613
|
+
walk(ce.bthen.statements) { |sst| sst.outer = ce.bthen.outer if sst.kind_of?(C::Block) and sst.outer == ce.bthen }
|
608
614
|
ce.bthen = ce.bthen.statements.first
|
609
615
|
when 0
|
610
616
|
if not ce.belse and i = ce.bthen.outer.statements.index(ce)
|
@@ -612,19 +618,19 @@ class Decompiler
|
|
612
618
|
end
|
613
619
|
end
|
614
620
|
end
|
615
|
-
if ce.belse.kind_of?
|
616
|
-
walk(ce.belse.statements) { |sst| sst.outer = ce.belse.outer if sst.kind_of?
|
621
|
+
if ce.belse.kind_of?(C::Block) and ce.belse.statements.length == 1
|
622
|
+
walk(ce.belse.statements) { |sst| sst.outer = ce.belse.outer if sst.kind_of?(C::Block) and sst.outer == ce.belse }
|
617
623
|
ce.belse = ce.belse.statements.first
|
618
624
|
end
|
619
625
|
when C::While, C::DoWhile
|
620
626
|
patch_test[ce.test]
|
621
|
-
if ce.body.kind_of?
|
627
|
+
if ce.body.kind_of?(C::Block)
|
622
628
|
case ce.body.statements.length
|
623
629
|
when 1
|
624
|
-
walk(ce.body.statements) { |sst| sst.outer = ce.body.outer if sst.kind_of?
|
630
|
+
walk(ce.body.statements) { |sst| sst.outer = ce.body.outer if sst.kind_of?(C::Block) and sst.outer == ce.body }
|
625
631
|
ce.body = ce.body.statements.first
|
626
632
|
when 0
|
627
|
-
if ce.kind_of?
|
633
|
+
if ce.kind_of?(C::DoWhile) and i = ce.body.outer.statements.index(ce)
|
628
634
|
ce = ce.body.outer.statements[i] = C::While.new(ce.test, ce.body)
|
629
635
|
end
|
630
636
|
ce.body = nil
|
@@ -632,14 +638,33 @@ class Decompiler
|
|
632
638
|
end
|
633
639
|
end
|
634
640
|
}
|
641
|
+
|
642
|
+
walk(scope, false, true) { |ce|
|
643
|
+
# while (1) { a; if (b) break; } => do { a } while (!b);
|
644
|
+
if ce.kind_of?(C::While) and ce.test.kind_of?(C::CExpression) and not ce.test.op and ce.test.rexpr == 1 and ce.body.kind_of?(C::Block)
|
645
|
+
i = ce.body.statements.last
|
646
|
+
if i.kind_of?(C::If) and not i.belse and i.bthen.kind_of?(C::Break)
|
647
|
+
ce.body.statements.pop
|
648
|
+
next C::DoWhile.new(i.test.negate, ce.body)
|
649
|
+
end
|
650
|
+
end
|
651
|
+
|
652
|
+
# if (a) b = 1; else b = 2; => b = a ? 1 : 2
|
653
|
+
if ce.kind_of?(C::If) and ce.belse.kind_of?(C::CExpression) and ce.belse.op == :'=' and ce.belse.lexpr.kind_of?(C::Variable) and ce.bthen.kind_of?(C::CExpression) and ce.bthen.op == :'=' and ce.bthen.lexpr == ce.belse.lexpr
|
654
|
+
next C::CExpression[ce.bthen.lexpr, :'=', [ce.test, :'?:', [ce.bthen.rexpr, ce.belse.rexpr]]]
|
655
|
+
end
|
656
|
+
}
|
657
|
+
|
658
|
+
# TODO for (;;) {}
|
659
|
+
|
635
660
|
walk(scope) { |ce|
|
636
|
-
next if not ce.kind_of?
|
661
|
+
next if not ce.kind_of?(C::Block)
|
637
662
|
st = ce.statements
|
638
663
|
st.length.times { |n|
|
639
|
-
while st[n].kind_of?
|
640
|
-
(st[n].bthen.kind_of?
|
641
|
-
(st[n].bthen.kind_of?
|
642
|
-
(st[n].bthen.kind_of?
|
664
|
+
while st[n].kind_of?(C::If) and st[n+1].kind_of?(C::If) and not st[n].belse and not st[n+1].belse and (
|
665
|
+
(st[n].bthen.kind_of?(C::Return) and st[n+1].bthen.kind_of?(C::Return) and st[n].bthen.value == st[n+1].bthen.value) or
|
666
|
+
(st[n].bthen.kind_of?(C::Break) and st[n+1].bthen.kind_of?(C::Break)) or
|
667
|
+
(st[n].bthen.kind_of?(C::Continue) and st[n+1].bthen.kind_of?(C::Continue)))
|
643
668
|
# if (a) return x; if (b) return x; => if (a || b) return x;
|
644
669
|
st[n].test = C::CExpression[st[n].test, :'||', st[n+1].test]
|
645
670
|
st.delete_at(n+1)
|
@@ -659,19 +684,19 @@ class Decompiler
|
|
659
684
|
inner_labels = ary.grep(C::Label).map { |l| l.name }
|
660
685
|
while s = ary.shift
|
661
686
|
# recurse if it's not the first run
|
662
|
-
if s.kind_of?
|
663
|
-
s.bthen.statements = decompile_cseq_if(s.bthen.statements, s.bthen) if s.bthen.kind_of?
|
664
|
-
s.belse.statements = decompile_cseq_if(s.belse.statements, s.belse) if s.belse.kind_of?
|
687
|
+
if s.kind_of?(C::If)
|
688
|
+
s.bthen.statements = decompile_cseq_if(s.bthen.statements, s.bthen) if s.bthen.kind_of?(C::Block)
|
689
|
+
s.belse.statements = decompile_cseq_if(s.belse.statements, s.belse) if s.belse.kind_of?(C::Block)
|
665
690
|
end
|
666
691
|
|
667
692
|
# if (a) goto x; if (b) goto x; => if (a || b) goto x;
|
668
|
-
while s.kind_of?
|
693
|
+
while s.kind_of?(C::If) and s.bthen.kind_of?(C::Goto) and not s.belse and ary.first.kind_of?(C::If) and ary.first.bthen.kind_of?(C::Goto) and
|
669
694
|
not ary.first.belse and s.bthen.target == ary.first.bthen.target
|
670
695
|
s.test = C::CExpression[s.test, :'||', ary.shift.test]
|
671
696
|
end
|
672
697
|
|
673
698
|
# if (a) goto x; b; x: => if (!a) { b; }
|
674
|
-
if s.kind_of?
|
699
|
+
if s.kind_of?(C::If) and s.bthen.kind_of?(C::Goto) and l = ary.grep(C::Label).find { |l_| l_.name == s.bthen.target }
|
675
700
|
# if {goto l;} a; l: => if (!) {a;}
|
676
701
|
s.test = C::CExpression.negate s.test
|
677
702
|
s.bthen = C::Block.new(scope)
|
@@ -680,20 +705,20 @@ class Decompiler
|
|
680
705
|
ary[0...ary.index(l)] = []
|
681
706
|
end
|
682
707
|
|
683
|
-
if s.kind_of?
|
684
|
-
s.bthen = C::Block.new(scope, [s.bthen]) if s.bthen.kind_of?
|
708
|
+
if s.kind_of?(C::If) and (s.bthen.kind_of?(C::Block) or s.bthen.kind_of?(C::Goto))
|
709
|
+
s.bthen = C::Block.new(scope, [s.bthen]) if s.bthen.kind_of?(C::Goto)
|
685
710
|
|
686
711
|
bts = s.bthen.statements
|
687
712
|
|
688
713
|
# if (a) if (b) { c; } => if (a && b) { c; }
|
689
|
-
if bts.length == 1 and bts.first.kind_of?
|
714
|
+
if bts.length == 1 and bts.first.kind_of?(C::If) and not bts.first.belse
|
690
715
|
s.test = C::CExpression[s.test, :'&&', bts.first.test]
|
691
716
|
bts = bts.first.bthen
|
692
717
|
bts = s.bthen.statements = bts.kind_of?(C::Block) ? bts.statements : [bts]
|
693
718
|
end
|
694
719
|
|
695
720
|
# if (a) { if (b) goto c; d; } c: => if (a && !b) { d; }
|
696
|
-
if bts.first.kind_of?
|
721
|
+
if bts.first.kind_of?(C::If) and l = bts.first.bthen and (l = l.kind_of?(C::Block) ? l.statements.first : l) and l.kind_of?(C::Goto) and ary[0].kind_of?(C::Label) and l.target == ary[0].name
|
697
722
|
s.test = C::CExpression[s.test, :'&&', C::CExpression.negate(bts.first.test)]
|
698
723
|
if e = bts.shift.belse
|
699
724
|
bts.unshift e
|
@@ -701,18 +726,18 @@ class Decompiler
|
|
701
726
|
end
|
702
727
|
|
703
728
|
# if () { goto a; } a:
|
704
|
-
if bts.last.kind_of?
|
729
|
+
if bts.last.kind_of?(C::Goto) and ary[0].kind_of?(C::Label) and bts.last.target == ary[0].name
|
705
730
|
bts.pop
|
706
731
|
end
|
707
732
|
|
708
733
|
# if { a; goto outer; } b; return; => if (!) { b; return; } a; goto outer;
|
709
|
-
if bts.last.kind_of?
|
734
|
+
if bts.last.kind_of?(C::Goto) and not inner_labels.include?(bts.last.target) and g = ary.find { |ss| ss.kind_of?(C::Goto) or ss.kind_of?(C::Return) } and g.kind_of?(C::Return)
|
710
735
|
s.test = C::CExpression.negate s.test
|
711
736
|
ary[0..ary.index(g)], bts[0..-1] = bts, ary[0..ary.index(g)]
|
712
737
|
end
|
713
738
|
|
714
739
|
# if { a; goto l; } b; l: => if {a;} else {b;}
|
715
|
-
if bts.last.kind_of?
|
740
|
+
if bts.last.kind_of?(C::Goto) and l = ary.grep(C::Label).find { |l_| l_.name == bts.last.target }
|
716
741
|
s.belse = C::Block.new(scope)
|
717
742
|
s.belse.statements = decompile_cseq_if(ary[0...ary.index(l)], s.belse)
|
718
743
|
ary[0...ary.index(l)] = []
|
@@ -720,7 +745,7 @@ class Decompiler
|
|
720
745
|
end
|
721
746
|
|
722
747
|
# if { a; l: b; goto any;} c; goto l; => if { a; } else { c; } b; goto any;
|
723
|
-
if not s.belse and (bts.last.kind_of?
|
748
|
+
if not s.belse and (bts.last.kind_of?(C::Goto) or bts.last.kind_of?(C::Return)) and g = ary.grep(C::Goto).first and l = bts.grep(C::Label).find { |l_| l_.name == g.target }
|
724
749
|
s.belse = C::Block.new(scope)
|
725
750
|
s.belse.statements = decompile_cseq_if(ary[0...ary.index(g)], s.belse)
|
726
751
|
ary[0..ary.index(g)], bts[bts.index(l)..-1] = bts[bts.index(l)..-1], []
|
@@ -730,8 +755,8 @@ class Decompiler
|
|
730
755
|
if s.belse
|
731
756
|
bes = s.belse.statements
|
732
757
|
while not bts.empty?
|
733
|
-
if bts.last.kind_of?
|
734
|
-
elsif bes.last.kind_of?
|
758
|
+
if bts.last.kind_of?(C::Label); ary.unshift bts.pop
|
759
|
+
elsif bes.last.kind_of?(C::Label); ary.unshift bes.pop
|
735
760
|
elsif bts.last.to_s == bes.last.to_s; ary.unshift bes.pop ; bts.pop
|
736
761
|
else break
|
737
762
|
end
|
@@ -754,24 +779,24 @@ class Decompiler
|
|
754
779
|
end
|
755
780
|
|
756
781
|
# l1: l2: if () goto l1; goto l2; => if(!) goto l2; goto l1;
|
757
|
-
if s.kind_of?
|
782
|
+
if s.kind_of?(C::If)
|
758
783
|
ls = s.bthen
|
759
|
-
ls = ls.statements.last if ls.kind_of?
|
760
|
-
if ls.kind_of?
|
784
|
+
ls = ls.statements.last if ls.kind_of?(C::Block)
|
785
|
+
if ls.kind_of?(C::Goto)
|
761
786
|
if li = inner_labels.index(ls.target)
|
762
787
|
table = inner_labels
|
763
788
|
else
|
764
|
-
table = ary.map { |st| st.name if st.kind_of?
|
789
|
+
table = ary.map { |st| st.name if st.kind_of?(C::Label) }.compact.reverse
|
765
790
|
li = table.index(ls.target) || table.length
|
766
791
|
end
|
767
792
|
g = ary.find { |ss|
|
768
|
-
break if ss.kind_of?
|
769
|
-
next if not ss.kind_of?
|
793
|
+
break if ss.kind_of?(C::Return)
|
794
|
+
next if not ss.kind_of?(C::Goto)
|
770
795
|
table.index(ss.target).to_i > li
|
771
796
|
}
|
772
797
|
if g
|
773
798
|
s.test = C::CExpression.negate s.test
|
774
|
-
if not s.bthen.kind_of?
|
799
|
+
if not s.bthen.kind_of?(C::Block)
|
775
800
|
ls = C::Block.new(scope)
|
776
801
|
ls.statements << s.bthen
|
777
802
|
s.bthen = ls
|
@@ -790,81 +815,96 @@ class Decompiler
|
|
790
815
|
return if forbid_decompile_ifwhile
|
791
816
|
|
792
817
|
# find the next instruction that is not a label
|
793
|
-
ni = lambda { |
|
794
|
-
|
795
|
-
|
796
|
-
|
797
|
-
ary.
|
818
|
+
ni = lambda { |li| (li..ary.length).find { |ni_| not ary[ni_].kind_of?(C::Label) } }
|
819
|
+
|
820
|
+
finished = false; while not finished; finished = true # ruby1.9 does not support 'retry'
|
821
|
+
i = 0
|
822
|
+
while i < ary.length
|
823
|
+
si = i
|
824
|
+
s = ary[si]
|
825
|
+
i += 1
|
798
826
|
case s
|
799
827
|
when C::Label
|
800
|
-
if
|
801
|
-
if ss.bthen.statements.last.kind_of?
|
828
|
+
if ssi = ni[si] and ss = ary[ssi] and ss.kind_of?(C::If) and not ss.belse and ss.bthen.kind_of?(C::Block)
|
829
|
+
if ss.bthen.statements.last.kind_of?(C::Goto) and ss.bthen.statements.last.target == s.name
|
830
|
+
# l: if (a) { b; goto l; } => while(a) { b; }
|
802
831
|
ss.bthen.statements.pop
|
803
|
-
if l = ary[
|
804
|
-
ss.bthen.statements.grep(C::If).each { |
|
805
|
-
|
832
|
+
if l = ary[ssi+1] and l.kind_of?(C::Label)
|
833
|
+
ss.bthen.statements.grep(C::If).each { |it|
|
834
|
+
it.bthen = C::Break.new if it.bthen.kind_of?(C::Goto) and it.bthen.target == l.name
|
806
835
|
}
|
807
836
|
end
|
808
|
-
ary[
|
809
|
-
elsif ss.bthen.statements.last.kind_of?
|
837
|
+
ary[ssi] = C::While.new(ss.test, ss.bthen)
|
838
|
+
elsif ss.bthen.statements.last.kind_of?(C::Return) and gi = ((si+1)..ary.length).to_a.reverse.find { |_si| ary[_si].kind_of?(C::Goto) and ary[_si].target == s.name }
|
839
|
+
# l: if (a) { b; return; } c; goto l; => while (!a) { c; } b; return;
|
810
840
|
wb = C::Block.new(scope)
|
811
|
-
wb.statements = decompile_cseq_while(ary[
|
841
|
+
wb.statements = decompile_cseq_while(ary[ssi+1...gi], wb)
|
812
842
|
w = C::While.new(C::CExpression.negate(ss.test), wb)
|
813
|
-
ary[
|
843
|
+
ary[ssi..gi] = [w, *ss.bthen.statements]
|
814
844
|
finished = false ; break #retry
|
815
845
|
end
|
816
846
|
end
|
817
|
-
if
|
847
|
+
if gi = (si..ary.length).to_a.reverse.find { |_si| ary[_si].kind_of?(C::Goto) and ary[_si].target == s.name }
|
848
|
+
# l: a; goto l; => while(1) { a; }
|
818
849
|
wb = C::Block.new(scope)
|
819
|
-
wb.statements = decompile_cseq_while(ary[
|
850
|
+
wb.statements = decompile_cseq_while(ary[si...gi], wb)
|
820
851
|
w = C::While.new(C::CExpression[1], wb)
|
821
|
-
ary[
|
852
|
+
ary[si..gi] = [w]
|
822
853
|
finished = false ; break #retry
|
823
854
|
end
|
824
|
-
if
|
825
|
-
(gt = gt.kind_of?(C::Block)
|
855
|
+
if gi = (si..ary.length).to_a.reverse.find { |_si| ary[_si].kind_of?(C::If) and not ary[_si].belse and gt = ary[_si].bthen and
|
856
|
+
(gt = gt.kind_of?(C::Block) ? gt.statements.last : gt) and gt.kind_of?(C::Goto) and gt.target == s.name }
|
857
|
+
# l: a; if (b) goto l; => do { a; } while (b);
|
858
|
+
# l: a; if (b) { c; goto l; } => do { a; if (!b) break; c; } while(1);
|
826
859
|
wb = C::Block.new(scope)
|
827
|
-
|
828
|
-
|
829
|
-
|
860
|
+
g = ary[gi]
|
861
|
+
if g.bthen.kind_of?(C::Block) and g.bthen.statements.length > 1
|
862
|
+
nary = ary[si...gi] + [C::If.new(C::CExpression.negate(g.test), C::Break.new)] + g.bthen.statements[0...-1]
|
863
|
+
wb.statements = decompile_cseq_while(nary, wb)
|
864
|
+
w = C::DoWhile.new(C::CExpression[1], wb)
|
865
|
+
else
|
866
|
+
wb.statements = decompile_cseq_while(ary[si...gi], wb)
|
867
|
+
w = C::DoWhile.new(g.test, wb)
|
868
|
+
end
|
869
|
+
ary[si..gi] = [w]
|
830
870
|
finished = false ; break #retry
|
831
871
|
end
|
832
872
|
when C::If
|
833
|
-
decompile_cseq_while(s.bthen.statements, s.bthen) if s.bthen.kind_of?
|
834
|
-
decompile_cseq_while(s.belse.statements, s.belse) if s.belse.kind_of?
|
873
|
+
decompile_cseq_while(s.bthen.statements, s.bthen) if s.bthen.kind_of?(C::Block)
|
874
|
+
decompile_cseq_while(s.belse.statements, s.belse) if s.belse.kind_of?(C::Block)
|
835
875
|
when C::While, C::DoWhile
|
836
|
-
decompile_cseq_while(s.body.statements, s.body) if s.body.kind_of?
|
876
|
+
decompile_cseq_while(s.body.statements, s.body) if s.body.kind_of?(C::Block)
|
837
877
|
end
|
838
|
-
}
|
839
878
|
end
|
879
|
+
end # while finished
|
840
880
|
ary
|
841
881
|
end
|
842
882
|
|
843
883
|
# TODO
|
844
884
|
def decompile_cseq_switch(scope)
|
845
|
-
uncast = lambda { |e| e = e.rexpr while e.kind_of?
|
885
|
+
uncast = lambda { |e| e = e.rexpr while e.kind_of?(C::CExpression) and not e.op ; e }
|
846
886
|
walk(scope) { |s|
|
847
887
|
# XXX pfff...
|
848
|
-
next if not s.kind_of?
|
888
|
+
next if not s.kind_of?(C::If)
|
849
889
|
# if (v < 12) return ((void(*)())(tableaddr+4*v))();
|
850
890
|
t = s.bthen
|
851
|
-
t = t.statements.first if t.kind_of?
|
852
|
-
next if not t.kind_of?
|
853
|
-
next if t.from_instr.comment.to_a.include?
|
854
|
-
next if not t.value.kind_of?
|
891
|
+
t = t.statements.first if t.kind_of?(C::Block) and t.statements.length == 1
|
892
|
+
next if not t.kind_of?(C::Return) or not t.respond_to?(:from_instr)
|
893
|
+
next if t.from_instr.comment.to_a.include?('switch')
|
894
|
+
next if not t.value.kind_of?(C::CExpression) or t.value.op != :funcall or t.value.rexpr != [] or not t.value.lexpr.kind_of?(C::CExpression) or t.value.lexpr.op
|
855
895
|
p = uncast[t.value.lexpr.rexpr]
|
856
|
-
next if not p.kind_of?
|
896
|
+
next if not p.kind_of?(C::CExpression) or p.op != :* or p.lexpr
|
857
897
|
p = uncast[p.rexpr]
|
858
|
-
next if not p.kind_of?
|
898
|
+
next if not p.kind_of?(C::CExpression) or p.op != :+
|
859
899
|
r, l = uncast[p.rexpr], uncast[p.lexpr]
|
860
|
-
r, l = l, r if r.kind_of?
|
861
|
-
next if not r.kind_of?
|
900
|
+
r, l = l, r if r.kind_of?(C::CExpression)
|
901
|
+
next if not r.kind_of?(::Integer) or not l.kind_of?(C::CExpression) or l.op != :* or not l.lexpr
|
862
902
|
lr, ll = uncast[l.rexpr], uncast[l.lexpr]
|
863
|
-
lr, ll = ll, lr if not ll.kind_of?
|
903
|
+
lr, ll = ll, lr if not ll.kind_of?(::Integer)
|
864
904
|
next if ll != sizeof(nil, C::Pointer.new(C::BaseType.new(:void)))
|
865
905
|
base, index = r, lr
|
866
|
-
if s.test.kind_of?
|
867
|
-
s.test.rexpr.kind_of?
|
906
|
+
if s.test.kind_of?(C::CExpression) and (s.test.op == :<= or s.test.op == :<) and s.test.lexpr == index and
|
907
|
+
s.test.rexpr.kind_of?(C::CExpression) and not s.test.rexpr.op and s.test.rexpr.rexpr.kind_of?(::Integer)
|
868
908
|
t.from_instr.add_comment 'switch'
|
869
909
|
sup = s.test.rexpr.rexpr
|
870
910
|
rng = ((s.test.op == :<) ? (0...sup) : (0..sup))
|
@@ -883,19 +923,19 @@ class Decompiler
|
|
883
923
|
|
884
924
|
used = []
|
885
925
|
walk(scope) { |ss|
|
886
|
-
used |= [ss.target] if ss.kind_of?
|
926
|
+
used |= [ss.target] if ss.kind_of?(C::Goto)
|
887
927
|
}
|
888
928
|
walk(scope) { |s|
|
889
|
-
next if not s.kind_of?
|
929
|
+
next if not s.kind_of?(C::Block)
|
890
930
|
s.statements.delete_if { |l|
|
891
|
-
l.kind_of?
|
931
|
+
l.kind_of?(C::Label) and not used.include?(l.name)
|
892
932
|
}
|
893
933
|
}
|
894
934
|
|
895
935
|
# remove implicit continue; at end of loop
|
896
936
|
walk(scope) { |s|
|
897
|
-
next if not s.kind_of?
|
898
|
-
if s.body.kind_of?
|
937
|
+
next if not s.kind_of?(C::While)
|
938
|
+
if s.body.kind_of?(C::Block) and s.body.statements.last.kind_of?(C::Continue)
|
899
939
|
s.body.statements.pop
|
900
940
|
end
|
901
941
|
}
|
@@ -903,11 +943,11 @@ class Decompiler
|
|
903
943
|
|
904
944
|
# checks if expr is a var (var or *&var)
|
905
945
|
def isvar(ce, var)
|
906
|
-
if var.stackoff and ce.kind_of?
|
946
|
+
if var.stackoff and ce.kind_of?(C::CExpression)
|
907
947
|
return unless ce.op == :* and not ce.lexpr
|
908
948
|
ce = ce.rexpr
|
909
|
-
ce = ce.rexpr while ce.kind_of?
|
910
|
-
return unless ce.kind_of?
|
949
|
+
ce = ce.rexpr while ce.kind_of?(C::CExpression) and not ce.op
|
950
|
+
return unless ce.kind_of?(C::CExpression) and ce.op == :& and not ce.lexpr
|
911
951
|
ce = ce.rexpr
|
912
952
|
end
|
913
953
|
ce == var
|
@@ -929,7 +969,7 @@ class Decompiler
|
|
929
969
|
# checks if expr writes var
|
930
970
|
def ce_write(ce_, var)
|
931
971
|
walk_ce(ce_) { |ce|
|
932
|
-
break true if AssignOp.include?(ce.op) and (isvar(ce.lexpr, var) or
|
972
|
+
break true if C::CExpression::AssignOp.include?(ce.op) and (isvar(ce.lexpr, var) or
|
933
973
|
(((ce.op == :'++' or ce.op == :'--') and isvar(ce.rexpr, var))))
|
934
974
|
}
|
935
975
|
end
|
@@ -971,6 +1011,8 @@ class Decompiler
|
|
971
1011
|
write = {}
|
972
1012
|
ro = {}
|
973
1013
|
wo = {}
|
1014
|
+
g_exprs = {}
|
1015
|
+
g.exprs_var[var.name].to_h.each { |k, v| g_exprs[k] = v.map { |i| g.exprs[k][i] } }
|
974
1016
|
|
975
1017
|
# list of [l, i] for which domain is not known
|
976
1018
|
unchecked = []
|
@@ -978,7 +1020,7 @@ class Decompiler
|
|
978
1020
|
# mark all exprs of the graph
|
979
1021
|
# TODO handle var_14 __attribute__((out)) = &curvar <=> curvar write
|
980
1022
|
r = var.has_attribute_var('register')
|
981
|
-
|
1023
|
+
g_exprs.each { |label, exprs|
|
982
1024
|
exprs.each_with_index { |ce, i|
|
983
1025
|
if ce_read(ce, var)
|
984
1026
|
if (ce.op == :'=' and isvar(ce.lexpr, var) and not ce_write(ce.rexpr, var)) or
|
@@ -1006,28 +1048,29 @@ class Decompiler
|
|
1006
1048
|
todo_w = [[l, i-1]]
|
1007
1049
|
done_w = []
|
1008
1050
|
while o = todo_w.pop
|
1009
|
-
next if done_w.include?
|
1051
|
+
next if done_w.include?(o)
|
1010
1052
|
done_w << o
|
1011
1053
|
l, i = o
|
1012
1054
|
loop do
|
1013
|
-
if read[l].to_a.include?
|
1055
|
+
if read[l].to_a.include?(i)
|
1014
1056
|
# XXX not optimal (should mark only the uppest read)
|
1015
|
-
todo_down |= [[l, i]] if not dom.include?
|
1057
|
+
todo_down |= [[l, i]] if not dom.include?([l, i])
|
1016
1058
|
dom |= [[l, i]]
|
1017
|
-
elsif write[l].to_a.include?
|
1018
|
-
todo_down |= [[l, i]] if not dom.include?
|
1059
|
+
elsif write[l].to_a.include?(i)
|
1060
|
+
todo_down |= [[l, i]] if not dom.include?([l, i])
|
1019
1061
|
dom |= [[l, i]]
|
1020
1062
|
break
|
1021
|
-
elsif wo[l].to_a.include?
|
1022
|
-
todo_down |= [[l, i]] if not dom_wo.include?
|
1063
|
+
elsif wo[l].to_a.include?(i)
|
1064
|
+
todo_down |= [[l, i]] if not dom_wo.include?([l, i, :down])
|
1023
1065
|
dom_wo |= [[l, i, :down]]
|
1024
1066
|
break
|
1025
1067
|
end
|
1026
1068
|
i -= 1
|
1027
1069
|
if i < 0
|
1028
1070
|
g.from_optim[l].to_a.each { |ll|
|
1029
|
-
todo_w << [ll,
|
1071
|
+
todo_w << [ll, g_exprs[ll].to_a.length-1]
|
1030
1072
|
}
|
1073
|
+
# read unitialized
|
1031
1074
|
func_top = true if g.from_optim[l].to_a.empty?
|
1032
1075
|
break
|
1033
1076
|
end
|
@@ -1036,27 +1079,27 @@ class Decompiler
|
|
1036
1079
|
}
|
1037
1080
|
|
1038
1081
|
# flood by walking the graph down from [l, i] (excluded)
|
1039
|
-
#
|
1082
|
+
# marks stuff to walk up
|
1040
1083
|
walk_down = lambda { |l, i|
|
1041
1084
|
todo_w = [[l, i+1]]
|
1042
1085
|
done_w = []
|
1043
1086
|
while o = todo_w.pop
|
1044
|
-
next if done_w.include?
|
1087
|
+
next if done_w.include?(o)
|
1045
1088
|
done_w << o
|
1046
1089
|
l, i = o
|
1047
1090
|
loop do
|
1048
|
-
if read[l].to_a.include?
|
1049
|
-
todo_up |= [[l, i]] if not dom.include?
|
1091
|
+
if read[l].to_a.include?(i)
|
1092
|
+
todo_up |= [[l, i]] if not dom.include?([l, i])
|
1050
1093
|
dom |= [[l, i]]
|
1051
|
-
elsif write[l].to_a.include?
|
1094
|
+
elsif write[l].to_a.include?(i)
|
1052
1095
|
break
|
1053
|
-
elsif ro[l].to_a.include?
|
1054
|
-
todo_up |= [[l, i]] if not dom_ro.include?
|
1096
|
+
elsif ro[l].to_a.include?(i)
|
1097
|
+
todo_up |= [[l, i]] if not dom_ro.include?([l, i, :up])
|
1055
1098
|
dom_ro |= [[l, i, :up]]
|
1056
1099
|
break
|
1057
1100
|
end
|
1058
1101
|
i += 1
|
1059
|
-
if i >=
|
1102
|
+
if i >= g_exprs[l].to_a.length
|
1060
1103
|
g.to_optim[l].to_a.each { |ll|
|
1061
1104
|
todo_w << [ll, 0]
|
1062
1105
|
}
|
@@ -1077,11 +1120,11 @@ class Decompiler
|
|
1077
1120
|
todo_down = []
|
1078
1121
|
|
1079
1122
|
# init
|
1080
|
-
if read[o[0]].to_a.include?
|
1123
|
+
if read[o[0]].to_a.include?(o[1])
|
1081
1124
|
todo_up << o
|
1082
1125
|
todo_down << o
|
1083
1126
|
dom << o
|
1084
|
-
elsif write[o[0]].to_a.include?
|
1127
|
+
elsif write[o[0]].to_a.include?(o[1])
|
1085
1128
|
todo_down << o
|
1086
1129
|
dom << o
|
1087
1130
|
elsif o[2] == :up
|
@@ -1111,25 +1154,29 @@ class Decompiler
|
|
1111
1154
|
n_i += 1 while scope.symbol_ancestors[newvarname = "#{var.name}_a#{n_i}"]
|
1112
1155
|
|
1113
1156
|
nv = var.dup
|
1157
|
+
nv.misc = var.misc ? var.misc.dup : {}
|
1114
1158
|
nv.storage = :register if nv.has_attribute_var('register')
|
1115
1159
|
nv.attributes = nv.attributes.dup if nv.attributes
|
1116
1160
|
nv.name = newvarname
|
1161
|
+
nv.misc[:unalias_name] = newvarname
|
1117
1162
|
scope.statements << C::Declaration.new(nv)
|
1118
1163
|
scope.symbol[nv.name] = nv
|
1119
1164
|
|
1120
|
-
dom.each { |oo| ce_patch(
|
1165
|
+
dom.each { |oo| ce_patch(g_exprs[oo[0]][oo[1]], var, nv) }
|
1121
1166
|
dom_ro.each { |oo|
|
1122
|
-
ce =
|
1123
|
-
if ce.op == :funcall
|
1167
|
+
ce = g_exprs[oo[0]][oo[1]]
|
1168
|
+
if ce.op == :funcall
|
1169
|
+
ce_patch(ce, var, nv)
|
1170
|
+
elsif ce.rexpr.kind_of?(C::CExpression)
|
1124
1171
|
ce_patch(ce.rexpr, var, nv)
|
1125
1172
|
else
|
1126
1173
|
ce.rexpr = nv
|
1127
1174
|
end
|
1128
1175
|
}
|
1129
1176
|
dom_wo.each { |oo|
|
1130
|
-
ce =
|
1177
|
+
ce = g_exprs[oo[0]][oo[1]]
|
1131
1178
|
if ce.op == :funcall
|
1132
|
-
elsif ce.lexpr.kind_of?
|
1179
|
+
elsif ce.lexpr.kind_of?(C::CExpression)
|
1133
1180
|
ce_patch(ce.lexpr, var, nv)
|
1134
1181
|
else
|
1135
1182
|
ce.lexpr = nv
|
@@ -1174,13 +1221,10 @@ class Decompiler
|
|
1174
1221
|
v
|
1175
1222
|
}
|
1176
1223
|
|
1177
|
-
scope.decompdata[:stackoff_name].each { |o, n| newvar[o, n] }
|
1178
|
-
scope.decompdata[:stackoff_type].each { |o, t| newvar[o, stackoff_to_varname(o)] }
|
1179
|
-
|
1180
1224
|
walk_ce(scope) { |e|
|
1181
1225
|
next if e.op != :+ and e.op != :-
|
1182
|
-
next if not e.lexpr.kind_of?
|
1183
|
-
next if not e.rexpr.kind_of?
|
1226
|
+
next if not e.lexpr.kind_of?(C::Variable) or e.lexpr.name != 'frameptr'
|
1227
|
+
next if not e.rexpr.kind_of?(C::CExpression) or e.rexpr.op or not e.rexpr.rexpr.kind_of?(::Integer)
|
1184
1228
|
off = e.rexpr.rexpr
|
1185
1229
|
off = -off if e.op == :-
|
1186
1230
|
v = newvar[off, stackoff_to_varname(off)]
|
@@ -1200,24 +1244,24 @@ class Decompiler
|
|
1200
1244
|
types = {}
|
1201
1245
|
|
1202
1246
|
pscopevar = lambda { |e|
|
1203
|
-
e = e.rexpr while e.kind_of?
|
1204
|
-
if e.kind_of?
|
1247
|
+
e = e.rexpr while e.kind_of?(C::CExpression) and not e.op and e.rexpr.kind_of?(C::CExpression)
|
1248
|
+
if e.kind_of?(C::CExpression) and e.op == :& and not e.lexpr and e.rexpr.kind_of?(C::Variable)
|
1205
1249
|
e.rexpr.name if scope.symbol[e.rexpr.name]
|
1206
1250
|
end
|
1207
1251
|
}
|
1208
1252
|
scopevar = lambda { |e|
|
1209
|
-
e = e.rexpr if e.kind_of?
|
1210
|
-
if e.kind_of?
|
1253
|
+
e = e.rexpr if e.kind_of?(C::CExpression) and not e.op
|
1254
|
+
if e.kind_of?(C::Variable) and scope.symbol[e.name]
|
1211
1255
|
e.name
|
1212
|
-
elsif e.kind_of?
|
1256
|
+
elsif e.kind_of?(C::CExpression) and e.op == :* and not e.lexpr
|
1213
1257
|
pscopevar[e.rexpr]
|
1214
1258
|
end
|
1215
1259
|
}
|
1216
1260
|
globalvar = lambda { |e|
|
1217
|
-
e = e.rexpr if e.kind_of?
|
1218
|
-
if e.kind_of?
|
1261
|
+
e = e.rexpr if e.kind_of?(C::CExpression) and not e.op
|
1262
|
+
if e.kind_of?(::Integer) and e > 0x10000 and @dasm.get_section_at(e)
|
1219
1263
|
e
|
1220
|
-
elsif e.kind_of?
|
1264
|
+
elsif e.kind_of?(C::Variable) and not scope.symbol[e.name] and @c_parser.toplevel.symbol[e.name] and @dasm.get_section_at(e.name)
|
1221
1265
|
e.name
|
1222
1266
|
end
|
1223
1267
|
}
|
@@ -1225,8 +1269,8 @@ class Decompiler
|
|
1225
1269
|
# check if a newly found type for o is better than current type
|
1226
1270
|
# order: foo* > void* > foo
|
1227
1271
|
better_type = lambda { |t0, t1|
|
1228
|
-
t1 == C::BaseType.new(:void) or (t0.pointer? and t1.kind_of?
|
1229
|
-
(t0.kind_of?
|
1272
|
+
t1 == C::BaseType.new(:void) or (t0.pointer? and t1.kind_of?(C::BaseType)) or t0.untypedef.kind_of?(C::Union) or
|
1273
|
+
(t0.kind_of?(C::BaseType) and t1.kind_of?(C::BaseType) and (@c_parser.typesize[t0.name] > @c_parser.typesize[t1.name] or (t0.name == t1.name and t0.qualifier))) or
|
1230
1274
|
(t0.pointer? and t1.pointer? and better_type[t0.pointed, t1.pointed])
|
1231
1275
|
}
|
1232
1276
|
|
@@ -1234,11 +1278,11 @@ class Decompiler
|
|
1234
1278
|
if ne = new_global_var(e, t, scope)
|
1235
1279
|
ne.type = t if better_type[t, ne.type] # TODO patch existing scopes using ne
|
1236
1280
|
# TODO rename (dword_xx -> byte_xx etc)
|
1237
|
-
e = scope.symbol_ancestors[e] || e if e.kind_of?
|
1281
|
+
e = scope.symbol_ancestors[e] || e if e.kind_of?(String) # exe reloc
|
1238
1282
|
walk_ce(scope) { |ce|
|
1239
1283
|
ce.lexpr = ne if ce.lexpr == e
|
1240
1284
|
ce.rexpr = ne if ce.rexpr == e
|
1241
|
-
if ce.op == :* and not ce.lexpr and ce.rexpr == ne and ne.type.pointer? and ne.type.pointed.untypedef.kind_of?
|
1285
|
+
if ce.op == :* and not ce.lexpr and ce.rexpr == ne and ne.type.pointer? and ne.type.pointed.untypedef.kind_of?(C::Union)
|
1242
1286
|
# *struct -> struct->bla
|
1243
1287
|
ce.rexpr = structoffset(ne.type.pointed.untypedef, ce.rexpr, 0, sizeof(ce.type))
|
1244
1288
|
elsif ce.lexpr == ne or ce.rexpr == ne
|
@@ -1255,10 +1299,10 @@ class Decompiler
|
|
1255
1299
|
# check if need to change the type of a var
|
1256
1300
|
# propagate_type if type is updated
|
1257
1301
|
update_type = lambda { |n, t|
|
1258
|
-
next if propagating.include?
|
1302
|
+
next if propagating.include?(n)
|
1259
1303
|
o = scope.symbol[n].stackoff
|
1260
|
-
next if not o and t.untypedef.kind_of?
|
1261
|
-
next if
|
1304
|
+
next if not o and t.untypedef.kind_of?(C::Union)
|
1305
|
+
next if scope.decompdata[:unalias_type][n] and t != scope.decompdata[:unalias_type][n]
|
1262
1306
|
next if t0 = types[n] and not better_type[t, t0]
|
1263
1307
|
next if o and (t.integral? or t.pointer?) and o % sizeof(t) != 0 # keep vars aligned
|
1264
1308
|
types[n] = t
|
@@ -1268,7 +1312,7 @@ class Decompiler
|
|
1268
1312
|
propagating.delete n
|
1269
1313
|
next if not o
|
1270
1314
|
t = t.untypedef
|
1271
|
-
if t.kind_of?
|
1315
|
+
if t.kind_of?(C::Struct)
|
1272
1316
|
t.members.to_a.each { |m|
|
1273
1317
|
mo = t.offsetof(@c_parser, m.name)
|
1274
1318
|
next if mo == 0
|
@@ -1282,23 +1326,23 @@ class Decompiler
|
|
1282
1326
|
# try to update the type of a var from knowing the type of an expr (through dereferences etc)
|
1283
1327
|
known_type = lambda { |e, t|
|
1284
1328
|
loop do
|
1285
|
-
e = e.rexpr while e.kind_of?
|
1329
|
+
e = e.rexpr while e.kind_of?(C::CExpression) and not e.op and e.type == t
|
1286
1330
|
if o = scopevar[e]
|
1287
1331
|
update_type[o, t]
|
1288
1332
|
elsif o = globalvar[e]
|
1289
1333
|
update_global_type[o, t]
|
1290
|
-
elsif not e.kind_of?
|
1334
|
+
elsif not e.kind_of?(C::CExpression)
|
1291
1335
|
elsif o = pscopevar[e] and t.pointer?
|
1292
1336
|
update_type[o, t.pointed]
|
1293
1337
|
elsif e.op == :* and not e.lexpr
|
1294
1338
|
e = e.rexpr
|
1295
1339
|
t = C::Pointer.new(t)
|
1296
1340
|
next
|
1297
|
-
elsif t.pointer? and e.op == :+ and e.lexpr.kind_of?
|
1341
|
+
elsif t.pointer? and e.op == :+ and e.lexpr.kind_of?(C::CExpression) and e.lexpr.type.integral? and e.rexpr.kind_of?(C::Variable)
|
1298
1342
|
e.lexpr, e.rexpr = e.rexpr, e.lexpr
|
1299
1343
|
next
|
1300
|
-
elsif e.op == :+ and e.lexpr and e.rexpr.kind_of?
|
1301
|
-
if not e.rexpr.op and e.rexpr.rexpr.kind_of?
|
1344
|
+
elsif e.op == :+ and e.lexpr and e.rexpr.kind_of?(C::CExpression)
|
1345
|
+
if not e.rexpr.op and e.rexpr.rexpr.kind_of?(::Integer)
|
1302
1346
|
if t.pointer? and e.rexpr.rexpr < 0x1000 and (e.rexpr.rexpr % sizeof(t.pointed)) == 0 # XXX relocatable + base=0..
|
1303
1347
|
e = e.lexpr # (int)*(x+2) === (int) *x
|
1304
1348
|
next
|
@@ -1307,13 +1351,13 @@ class Decompiler
|
|
1307
1351
|
e = e.rexpr
|
1308
1352
|
next
|
1309
1353
|
end
|
1310
|
-
elsif t.pointer? and (e.lexpr.kind_of?
|
1354
|
+
elsif t.pointer? and (e.lexpr.kind_of?(C::CExpression) and e.lexpr.lexpr and [:<<, :>>, :*, :&].include?(e.lexpr.op)) or
|
1311
1355
|
(o = scopevar[e.lexpr] and types[o] and types[o].integral? and
|
1312
1356
|
!(o = scopevar[e.rexpr] and types[o] and types[o].integral?))
|
1313
1357
|
e.lexpr, e.rexpr = e.rexpr, e.lexpr # swap
|
1314
1358
|
e = e.lexpr
|
1315
1359
|
next
|
1316
|
-
elsif t.pointer? and ((e.rexpr.kind_of?
|
1360
|
+
elsif t.pointer? and ((e.rexpr.kind_of?(C::CExpression) and e.rexpr.lexpr and [:<<, :>>, :*, :&].include?(e.rexpr.op)) or
|
1317
1361
|
(o = scopevar[e.rexpr] and types[o] and types[o].integral? and
|
1318
1362
|
!(o = scopevar[e.lexpr] and types[o] and types[o].integral?)))
|
1319
1363
|
e = e.lexpr
|
@@ -1329,11 +1373,11 @@ class Decompiler
|
|
1329
1373
|
walk_ce(scope) { |ce|
|
1330
1374
|
next if ce.op != :'='
|
1331
1375
|
|
1332
|
-
if ce.lexpr.kind_of?
|
1376
|
+
if ce.lexpr.kind_of?(C::Variable) and ce.lexpr.name == var
|
1333
1377
|
known_type[ce.rexpr, type]
|
1334
1378
|
next
|
1335
1379
|
end
|
1336
|
-
if ce.rexpr.kind_of?
|
1380
|
+
if ce.rexpr.kind_of?(C::Variable) and ce.rexpr.name == var
|
1337
1381
|
known_type[ce.lexpr, type]
|
1338
1382
|
next
|
1339
1383
|
end
|
@@ -1341,7 +1385,7 @@ class Decompiler
|
|
1341
1385
|
# int **x; y = **x => int y
|
1342
1386
|
t = type
|
1343
1387
|
l = ce.lexpr
|
1344
|
-
while l.kind_of?
|
1388
|
+
while l.kind_of?(C::CExpression) and l.op == :* and not l.lexpr
|
1345
1389
|
if var == pscopevar[l.rexpr]
|
1346
1390
|
known_type[ce.rexpr, t]
|
1347
1391
|
break
|
@@ -1355,7 +1399,7 @@ class Decompiler
|
|
1355
1399
|
# int **x; **x = y => int y
|
1356
1400
|
t = type
|
1357
1401
|
r = ce.rexpr
|
1358
|
-
while r.kind_of?
|
1402
|
+
while r.kind_of?(C::CExpression) and r.op == :* and not r.lexpr
|
1359
1403
|
if var == pscopevar[r.rexpr]
|
1360
1404
|
known_type[ce.lexpr, t]
|
1361
1405
|
break
|
@@ -1373,32 +1417,32 @@ class Decompiler
|
|
1373
1417
|
# put all those macros in use
|
1374
1418
|
# use user-defined types first
|
1375
1419
|
scope.symbol.each_value { |v|
|
1376
|
-
next if not v.kind_of?
|
1420
|
+
next if not v.kind_of?(C::Variable) or not t = scope.decompdata[:unalias_type][v.name]
|
1377
1421
|
known_type[v, t]
|
1378
1422
|
}
|
1379
1423
|
|
1380
1424
|
# try to infer types from C semantics
|
1381
1425
|
later = []
|
1382
1426
|
walk_ce(scope) { |ce|
|
1383
|
-
if ce.op == :'=' and ce.rexpr.kind_of?
|
1384
|
-
ce.rexpr.rexpr.abs < 0x10000 and (not ce.lexpr.kind_of?
|
1427
|
+
if ce.op == :'=' and ce.rexpr.kind_of?(C::CExpression) and (ce.rexpr.op == :funcall or (ce.rexpr.op == nil and ce.rexpr.rexpr.kind_of?(::Integer) and
|
1428
|
+
ce.rexpr.rexpr.abs < 0x10000 and (not ce.lexpr.kind_of?(C::CExpression) or ce.lexpr.op != :'*' or ce.lexpr.lexpr)))
|
1385
1429
|
# var = int
|
1386
1430
|
known_type[ce.lexpr, ce.rexpr.type]
|
1387
1431
|
elsif ce.op == :funcall
|
1388
1432
|
f = ce.lexpr.type
|
1389
1433
|
f = f.pointed if f.pointer?
|
1390
|
-
next if not f.kind_of?
|
1434
|
+
next if not f.kind_of?(C::Function)
|
1391
1435
|
# cast func args to arg prototypes
|
1392
1436
|
f.args.to_a.zip(ce.rexpr).each_with_index { |(proto, arg), i| ce.rexpr[i] = C::CExpression[arg, proto.type] ; known_type[arg, proto.type] }
|
1393
1437
|
elsif ce.op == :* and not ce.lexpr
|
1394
|
-
if e = ce.rexpr and e.kind_of?
|
1395
|
-
e.op == :& and not e.lexpr and e.rexpr.kind_of?
|
1438
|
+
if e = ce.rexpr and e.kind_of?(C::CExpression) and not e.op and e = e.rexpr and e.kind_of?(C::CExpression) and
|
1439
|
+
e.op == :& and not e.lexpr and e.rexpr.kind_of?(C::Variable) and e.rexpr.stackoff
|
1396
1440
|
# skip *(__int32*)&var_12 for now, avoid saying var12 is an int if it may be a ptr or anything
|
1397
1441
|
later << [ce.rexpr, C::Pointer.new(ce.type)]
|
1398
1442
|
next
|
1399
1443
|
end
|
1400
1444
|
known_type[ce.rexpr, C::Pointer.new(ce.type)]
|
1401
|
-
elsif not ce.op and ce.type.pointer? and ce.type.pointed.kind_of?
|
1445
|
+
elsif not ce.op and ce.type.pointer? and ce.type.pointed.kind_of?(C::Function)
|
1402
1446
|
# cast to fptr: must be a fptr
|
1403
1447
|
known_type[ce.rexpr, ce.type]
|
1404
1448
|
end
|
@@ -1423,7 +1467,7 @@ class Decompiler
|
|
1423
1467
|
v = scope.symbol[n]
|
1424
1468
|
next if not o = v.stackoff
|
1425
1469
|
t = t.untypedef
|
1426
|
-
if t.kind_of?
|
1470
|
+
if t.kind_of?(C::Struct)
|
1427
1471
|
t.members.to_a.each { |tm|
|
1428
1472
|
moff = t.offsetof(@c_parser, tm.name)
|
1429
1473
|
next if moff == 0
|
@@ -1469,7 +1513,7 @@ class Decompiler
|
|
1469
1513
|
}
|
1470
1514
|
when o = scopevar[ce.lexpr]; ce.lexpr = maycast[varat[o], ce.lexpr]
|
1471
1515
|
when o = scopevar[ce.rexpr]; ce.rexpr = maycast[varat[o], ce.rexpr]
|
1472
|
-
ce.rexpr = C::CExpression[ce.rexpr] if not ce.op and ce.rexpr.kind_of?
|
1516
|
+
ce.rexpr = C::CExpression[ce.rexpr] if not ce.op and ce.rexpr.kind_of?(C::Variable)
|
1473
1517
|
when o = pscopevar[ce.lexpr]; ce.lexpr = maycast_p[varat[o], ce.lexpr]
|
1474
1518
|
when o = pscopevar[ce.rexpr]; ce.rexpr = maycast_p[varat[o], ce.rexpr]
|
1475
1519
|
when o = scopevar[ce]; ce.replace C::CExpression[maycast[varat[o], ce]]
|
@@ -1485,14 +1529,14 @@ class Decompiler
|
|
1485
1529
|
varandff = Hash.new(0)
|
1486
1530
|
varandffff = Hash.new(0)
|
1487
1531
|
walk_ce(scope) { |ce|
|
1488
|
-
if ce.op == :& and ce.lexpr.kind_of?
|
1532
|
+
if ce.op == :& and ce.lexpr.kind_of?(C::Variable) and ce.lexpr.type.integral? and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr.kind_of?(::Integer)
|
1489
1533
|
case ce.rexpr.rexpr
|
1490
1534
|
when 0xff; varandff[ce.lexpr.name] += 1
|
1491
1535
|
when 0xffff; varandffff[ce.lexpr.name] += 1
|
1492
1536
|
end
|
1493
1537
|
end
|
1494
|
-
varuse[ce.lexpr.name] += 1 if ce.lexpr.kind_of?
|
1495
|
-
varuse[ce.rexpr.name] += 1 if ce.rexpr.kind_of?
|
1538
|
+
varuse[ce.lexpr.name] += 1 if ce.lexpr.kind_of?(C::Variable)
|
1539
|
+
varuse[ce.rexpr.name] += 1 if ce.rexpr.kind_of?(C::Variable)
|
1496
1540
|
}
|
1497
1541
|
varandff.each { |k, v|
|
1498
1542
|
scope.symbol[k].type = C::BaseType.new(:__int8, :unsigned) if varuse[k] == v
|
@@ -1505,28 +1549,96 @@ class Decompiler
|
|
1505
1549
|
walk_ce(scope, true) { |ce|
|
1506
1550
|
if ce.op
|
1507
1551
|
ce.type = C::CExpression[ce.lexpr, ce.op, ce.rexpr].type rescue next
|
1508
|
-
if ce.op == :'=' and ce.rexpr.kind_of?
|
1509
|
-
known_type[ce.rexpr, ce.type] if ce.type.pointer? and ce.type.pointed.untypedef.kind_of?
|
1552
|
+
if ce.op == :'=' and ce.rexpr.kind_of?(C::Typed) and ce.rexpr.type != ce.type and (not ce.rexpr.type.integral? or not ce.type.integral?)
|
1553
|
+
known_type[ce.rexpr, ce.type] if ce.type.pointer? and ce.type.pointed.untypedef.kind_of?(C::Function) # localvar = &struct with fptr
|
1510
1554
|
ce.rexpr = C::CExpression[[ce.rexpr], ce.type]
|
1511
1555
|
end
|
1512
|
-
elsif ce.type.pointer? and ce.rexpr.kind_of?
|
1556
|
+
elsif ce.type.pointer? and ce.rexpr.kind_of?(C::CExpression) and ce.rexpr.op == :& and not ce.rexpr.lexpr and sizeof(ce.rexpr.rexpr.type) == sizeof(ce.type.pointed)
|
1513
1557
|
ce.type = ce.rexpr.type
|
1514
1558
|
end
|
1515
1559
|
}
|
1516
1560
|
end
|
1517
1561
|
|
1562
|
+
# use casts to determine variable types after code optimization
|
1563
|
+
# if all uses of var_42 are through *(int*)(&var_42), set type to int
|
1564
|
+
def decompile_c_types_again(scope)
|
1565
|
+
return if forbid_decompile_types
|
1566
|
+
|
1567
|
+
update_type = lambda { |n, t|
|
1568
|
+
o = scope.symbol[n].stackoff
|
1569
|
+
next if not o and t.untypedef.kind_of?(C::Union)
|
1570
|
+
next if scope.decompdata[:unalias_type][n] and t != scope.decompdata[:unalias_type][n]
|
1571
|
+
scope.symbol[n].type = t
|
1572
|
+
}
|
1573
|
+
|
1574
|
+
# true if e is a cast of a var address
|
1575
|
+
is_cast = lambda { |ce|
|
1576
|
+
true if not ce.op and not ce.lexpr and ce.rexpr and ce.rexpr.kind_of?(C::CExpression) and ce.rexpr.op == :& and not ce.rexpr.lexpr
|
1577
|
+
}
|
1578
|
+
|
1579
|
+
# scan code for casts
|
1580
|
+
uses = {}
|
1581
|
+
count_refs = Hash.new(0)
|
1582
|
+
walk_ce(scope) { |ce|
|
1583
|
+
count_refs[ce.lexpr.name] += 1 if ce.lexpr.kind_of?(C::Variable)
|
1584
|
+
count_refs[ce.rexpr.name] += 1 if ce.rexpr.kind_of?(C::Variable)
|
1585
|
+
if is_cast[ce] and ce.rexpr.rexpr.kind_of?(C::Variable)
|
1586
|
+
(uses[ce.rexpr.rexpr.name] ||= []) << ce.type.pointed
|
1587
|
+
end
|
1588
|
+
}
|
1589
|
+
|
1590
|
+
# given a list of types, return a type compatible with all
|
1591
|
+
summary_type = lambda { |type_list|
|
1592
|
+
t = type_list.first
|
1593
|
+
type_list.each { |tt|
|
1594
|
+
if sizeof(t) != sizeof(tt) or t.integral? != tt.integral? or t.pointer? != tt.pointer?
|
1595
|
+
t = nil
|
1596
|
+
break
|
1597
|
+
elsif t != tt
|
1598
|
+
t = tt if t.to_s =~ /__/
|
1599
|
+
end
|
1600
|
+
}
|
1601
|
+
t
|
1602
|
+
}
|
1603
|
+
|
1604
|
+
updated = {}
|
1605
|
+
uses.each { |n, tl|
|
1606
|
+
if tl.length == count_refs[n] and t = summary_type[tl] and update_type[n, t]
|
1607
|
+
updated[n] = true
|
1608
|
+
end
|
1609
|
+
}
|
1610
|
+
return if updated.empty?
|
1611
|
+
|
1612
|
+
walk_ce(scope, true) { |ce|
|
1613
|
+
if is_cast[ce] and updated[ce.rexpr.rexpr.name]
|
1614
|
+
ce.op = :&
|
1615
|
+
ce.rexpr = ce.rexpr.rexpr
|
1616
|
+
elsif ce.op == :* and not ce.lexpr and ce.rexpr.kind_of?(C::CExpression) and ce.rexpr.op == :& and not ce.rexpr.lexpr and ce.rexpr.rexpr.kind_of?(C::Variable) and updated[ce.rexpr.rexpr.name]
|
1617
|
+
ce.op = nil
|
1618
|
+
ce.rexpr = ce.rexpr.rexpr
|
1619
|
+
elsif ce.op
|
1620
|
+
if ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr.kind_of?(C::Variable) and updated[ce.rexpr.rexpr.name]
|
1621
|
+
ce.rexpr = ce.rexpr.rexpr
|
1622
|
+
end
|
1623
|
+
if ce.lexpr.kind_of?(C::CExpression) and not ce.lexpr.op and ce.lexpr.rexpr.kind_of?(C::Variable) and updated[ce.lexpr.rexpr.name]
|
1624
|
+
ce.lexpr = ce.lexpr.rexpr
|
1625
|
+
end
|
1626
|
+
end
|
1627
|
+
}
|
1628
|
+
end
|
1629
|
+
|
1518
1630
|
# struct foo { int i; int j; struct { int k; int l; } m; }; bla+12 => &bla->m.l
|
1519
1631
|
# st is a struct, ptr is an expr pointing to a struct, off is a numeric offset from ptr, msz is the size of the pointed member (nil ignored)
|
1520
1632
|
def structoffset(st, ptr, off, msz)
|
1521
1633
|
tabidx = off / sizeof(st)
|
1522
1634
|
off -= tabidx * sizeof(st)
|
1523
|
-
ptr = C::CExpression[:&, [ptr, :'[]', [tabidx]]] if tabidx != 0 or ptr.type.untypedef.kind_of?
|
1635
|
+
ptr = C::CExpression[:&, [ptr, :'[]', [tabidx]]] if tabidx != 0 or ptr.type.untypedef.kind_of?(C::Array)
|
1524
1636
|
return ptr if off == 0 and (not msz or # avoid infinite recursion with eg chained list
|
1525
|
-
(ptr.kind_of?
|
1526
|
-
not s.type.untypedef.kind_of?
|
1637
|
+
(ptr.kind_of?(C::CExpression) and ((ptr.op == :& and not ptr.lexpr and s=ptr.rexpr) or (ptr.op == :'.' and s=ptr)) and
|
1638
|
+
not s.type.untypedef.kind_of?(C::Union)))
|
1527
1639
|
|
1528
1640
|
m_ptr = lambda { |m|
|
1529
|
-
if ptr.kind_of?
|
1641
|
+
if ptr.kind_of?(C::CExpression) and ptr.op == :& and not ptr.lexpr
|
1530
1642
|
C::CExpression[ptr.rexpr, :'.', m.name]
|
1531
1643
|
else
|
1532
1644
|
C::CExpression[ptr, :'->', m.name]
|
@@ -1544,7 +1656,7 @@ class Decompiler
|
|
1544
1656
|
sst = sm.type.untypedef
|
1545
1657
|
#return ptr if mo[sm] == 0 and sst.pointer? and sst.type.untypedef == st # TODO fix infinite recursion on mutually recursive ptrs
|
1546
1658
|
ptr = C::CExpression[:&, m_ptr[sm]]
|
1547
|
-
if sst.kind_of?
|
1659
|
+
if sst.kind_of?(C::Union)
|
1548
1660
|
return structoffset(sst, ptr, off, msz)
|
1549
1661
|
end
|
1550
1662
|
end
|
@@ -1561,15 +1673,15 @@ class Decompiler
|
|
1561
1673
|
# must be run only once, right after type setting
|
1562
1674
|
def fix_pointer_arithmetic(scope)
|
1563
1675
|
walk_ce(scope, true) { |ce|
|
1564
|
-
if ce.lexpr and ce.lexpr.type.pointer? and [:&, :>>, :<<].include?
|
1676
|
+
if ce.lexpr and ce.lexpr.type.pointer? and [:&, :>>, :<<].include?(ce.op)
|
1565
1677
|
ce.lexpr = C::CExpression[[ce.lexpr], C::BaseType.new(:int)]
|
1566
1678
|
end
|
1567
1679
|
|
1568
|
-
if ce.op == :+ and ce.lexpr and ((ce.lexpr.type.integral? and ce.rexpr.type.pointer?) or (ce.rexpr.type.pointer? and ce.rexpr.type.pointed.untypedef.kind_of?
|
1680
|
+
if ce.op == :+ and ce.lexpr and ((ce.lexpr.type.integral? and ce.rexpr.type.pointer?) or (ce.rexpr.type.pointer? and ce.rexpr.type.pointed.untypedef.kind_of?(C::Union)))
|
1569
1681
|
ce.rexpr, ce.lexpr = ce.lexpr, ce.rexpr
|
1570
1682
|
end
|
1571
1683
|
|
1572
|
-
if ce.op == :* and not ce.lexpr and ce.rexpr.type.pointer? and ce.rexpr.type.pointed.untypedef.kind_of?
|
1684
|
+
if ce.op == :* and not ce.lexpr and ce.rexpr.type.pointer? and ce.rexpr.type.pointed.untypedef.kind_of?(C::Struct)
|
1573
1685
|
s = ce.rexpr.type.pointed.untypedef
|
1574
1686
|
m = s.members.to_a.find { |m_| s.offsetof(@c_parser, m_.name) == 0 }
|
1575
1687
|
if sizeof(m) != sizeof(ce)
|
@@ -1582,7 +1694,7 @@ class Decompiler
|
|
1582
1694
|
ce.rexpr = m.name
|
1583
1695
|
ce.type = m.type
|
1584
1696
|
next
|
1585
|
-
elsif ce.op == :'=' and ce.lexpr.type.untypedef.kind_of?
|
1697
|
+
elsif ce.op == :'=' and ce.lexpr.type.untypedef.kind_of?(C::Struct)
|
1586
1698
|
s = ce.lexpr.type.untypedef
|
1587
1699
|
m = s.members.to_a.find { |m_| s.offsetof(@c_parser, m_.name) == 0 }
|
1588
1700
|
ce.lexpr = C::CExpression.new(ce.lexpr, :'.', m.name, m.type)
|
@@ -1594,18 +1706,18 @@ class Decompiler
|
|
1594
1706
|
ce.type = ce.lexpr.type
|
1595
1707
|
end
|
1596
1708
|
|
1597
|
-
if ce.op == :& and not ce.lexpr and ce.rexpr.kind_of?
|
1709
|
+
if ce.op == :& and not ce.lexpr and ce.rexpr.kind_of?(C::CExpression) and ce.rexpr.op == :* and not ce.rexpr.lexpr
|
1598
1710
|
ce.replace C::CExpression[ce.rexpr.rexpr]
|
1599
1711
|
end
|
1600
1712
|
|
1601
1713
|
next if not ce.lexpr or not ce.lexpr.type.pointer?
|
1602
|
-
if ce.op == :+ and (s = ce.lexpr.type.pointed.untypedef).kind_of?
|
1603
|
-
ce.rexpr.rexpr.kind_of?
|
1714
|
+
if ce.op == :+ and (s = ce.lexpr.type.pointed.untypedef).kind_of?(C::Union) and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and
|
1715
|
+
ce.rexpr.rexpr.kind_of?(::Integer) and o = ce.rexpr.rexpr
|
1604
1716
|
# structptr + 4 => &structptr->member
|
1605
1717
|
ce.replace structoffset(s, ce.lexpr, o, nil)
|
1606
|
-
elsif [:+, :-, :'+=', :'-='].include?
|
1607
|
-
(ce.rexpr.op == :* and i = ce.rexpr.lexpr and ((i.kind_of?
|
1608
|
-
i.kind_of?
|
1718
|
+
elsif [:+, :-, :'+=', :'-='].include?(ce.op) and ce.rexpr.kind_of?(C::CExpression) and ((not ce.rexpr.op and i = ce.rexpr.rexpr) or
|
1719
|
+
(ce.rexpr.op == :* and i = ce.rexpr.lexpr and ((i.kind_of?(C::CExpression) and not i.op and i = i.rexpr) or true))) and
|
1720
|
+
i.kind_of?(::Integer) and psz = sizeof(nil, ce.lexpr.type.pointed) and i % psz == 0
|
1609
1721
|
# ptr += 4 => ptr += 1
|
1610
1722
|
if not ce.rexpr.op
|
1611
1723
|
ce.rexpr.rexpr /= psz
|
@@ -1646,9 +1758,10 @@ class Decompiler
|
|
1646
1758
|
# XXX o1 may overlap o2 AND another (int32 v_10; int32 v_E; int32 v_C;)
|
1647
1759
|
# TODO should check stuff with aliasing domains
|
1648
1760
|
next if v1.name == v2.name or o1 >= o2+l2 or o1+l1 <= o2 or l1 > l2 or (l2 == l1 and o2 >= o1)
|
1761
|
+
next if o1 == o2 and l1 != l2
|
1649
1762
|
# v1 => *(&v2+delta)
|
1650
1763
|
p = C::CExpression[:&, v2]
|
1651
|
-
p = C::CExpression[p, :+, [o1-o2]]
|
1764
|
+
p = C::CExpression[p, :+, [o1-o2]] if o1 != o2
|
1652
1765
|
p = C::CExpression[p, C::Pointer.new(v1.type)] if v1.type != p.type.type
|
1653
1766
|
p = C::CExpression[:*, p]
|
1654
1767
|
walk_ce(scope) { |ce|
|
@@ -1659,16 +1772,6 @@ class Decompiler
|
|
1659
1772
|
}
|
1660
1773
|
end
|
1661
1774
|
|
1662
|
-
# to be run with scope = function body with only CExpr/Decl/Label/Goto/IfGoto/Return, with correct variables types
|
1663
|
-
# will transform += 1 to ++, inline them to prev/next statement ('++x; if (x)..' => 'if (++x)..')
|
1664
|
-
# remove useless variables ('int i;', i never used or 'i = 1; j = i;', i never read after => 'j = 1;')
|
1665
|
-
# remove useless casts ('(int)i' with 'int i;' => 'i')
|
1666
|
-
def optimize(scope)
|
1667
|
-
optimize_code(scope)
|
1668
|
-
optimize_vars(scope)
|
1669
|
-
optimize_vars(scope) # 1st run may transform i = i+1 into i++ which second run may coalesce into if(i)
|
1670
|
-
end
|
1671
|
-
|
1672
1775
|
# simplify cexpressions (char & 255, redundant casts, etc)
|
1673
1776
|
def optimize_code(scope)
|
1674
1777
|
return if forbid_optimize_code
|
@@ -1676,12 +1779,12 @@ class Decompiler
|
|
1676
1779
|
sametype = lambda { |t1, t2|
|
1677
1780
|
t1 = t1.untypedef
|
1678
1781
|
t2 = t2.untypedef
|
1679
|
-
t1 = t1.pointed.untypedef if t1.pointer? and t1.pointed.untypedef.kind_of?
|
1680
|
-
t2 = t2.pointed.untypedef if t2.pointer? and t2.pointed.untypedef.kind_of?
|
1782
|
+
t1 = t1.pointed.untypedef if t1.pointer? and t1.pointed.untypedef.kind_of?(C::Function)
|
1783
|
+
t2 = t2.pointed.untypedef if t2.pointer? and t2.pointed.untypedef.kind_of?(C::Function)
|
1681
1784
|
t1 == t2 or
|
1682
|
-
(t1.kind_of?
|
1785
|
+
(t1.kind_of?(C::Function) and t2.kind_of?(C::Function) and sametype[t1.type, t2.type] and t1.args.to_a.length == t2.args.to_a.length and
|
1683
1786
|
t1.args.to_a.zip(t2.args.to_a).all? { |st1, st2| sametype[st1.type, st2.type] }) or
|
1684
|
-
(t1.kind_of?
|
1787
|
+
(t1.kind_of?(C::BaseType) and t1.integral? and t2.kind_of?(C::BaseType) and t2.integral? and sizeof(nil, t1) == sizeof(nil, t2)) or
|
1685
1788
|
(t1.pointer? and t2.pointer? and sametype[t1.type, t2.type])
|
1686
1789
|
}
|
1687
1790
|
|
@@ -1689,56 +1792,92 @@ class Decompiler
|
|
1689
1792
|
future_array = []
|
1690
1793
|
walk_ce(scope, true) { |ce|
|
1691
1794
|
# (whatever)0 => 0
|
1692
|
-
if not ce.op and ce.rexpr.kind_of?
|
1795
|
+
if not ce.op and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr == 0
|
1693
1796
|
ce.replace ce.rexpr
|
1694
1797
|
end
|
1695
1798
|
|
1696
1799
|
# *&bla => bla if types ok
|
1697
|
-
if ce.op == :* and not ce.lexpr and ce.rexpr.kind_of?
|
1800
|
+
if ce.op == :* and not ce.lexpr and ce.rexpr.kind_of?(C::CExpression) and ce.rexpr.op == :& and not ce.rexpr.lexpr and sametype[ce.rexpr.type.pointed, ce.rexpr.rexpr.type]
|
1698
1801
|
ce.replace C::CExpression[ce.rexpr.rexpr]
|
1699
1802
|
end
|
1700
1803
|
|
1701
|
-
# int x + 0xffffffff
|
1702
|
-
if ce.lexpr and ce.rexpr.kind_of?
|
1804
|
+
# int x + 0xffffffff => x-1
|
1805
|
+
if ce.lexpr and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and [:+, :-, :'+=', :'-=', :'!=', :==, :>, :<, :>=, :<=].include?(ce.op) and
|
1703
1806
|
ce.rexpr.rexpr == (1 << (8*sizeof(ce.lexpr)))-1
|
1704
1807
|
ce.op = {:+ => :-, :- => :+, :'+=' => :'-=', :'-=' => :'+='}[ce.op]
|
1705
1808
|
ce.rexpr.rexpr = 1
|
1706
1809
|
end
|
1707
1810
|
|
1811
|
+
# i + ptr => ptr + i
|
1812
|
+
if ce.op == :+ and ce.lexpr and ce.rexpr.type.pointer? and ce.lexpr.type.integral?
|
1813
|
+
ce.rexpr, ce.lexpr = ce.lexpr, ce.rexpr
|
1814
|
+
end
|
1815
|
+
|
1816
|
+
# i + v => v + i
|
1817
|
+
if ce.op == :+ and ce.lexpr.kind_of?(C::CExpression) and not ce.lexpr.op and ce.lexpr.rexpr.kind_of?(::Integer)
|
1818
|
+
# avoid infinite swapping
|
1819
|
+
if not ce.rexpr.kind_of?(C::CExpression) or ce.rexpr.op or not ce.rexpr.rexpr.kind_of?(::Integer)
|
1820
|
+
ce.rexpr, ce.lexpr = ce.lexpr, ce.rexpr
|
1821
|
+
end
|
1822
|
+
end
|
1823
|
+
|
1824
|
+
# (a + b) + c => a + (b + c)
|
1825
|
+
if ce.op == :+ and ce.lexpr.kind_of?(C::CExpression) and ce.lexpr.op == :+ and ce.lexpr.lexpr
|
1826
|
+
ce.lexpr, ce.rexpr = ce.lexpr.lexpr, C::CExpression[ce.lexpr.rexpr, :+, ce.rexpr]
|
1827
|
+
optimize_code(ce)
|
1828
|
+
end
|
1829
|
+
|
1830
|
+
# 1 + 2 => 3
|
1831
|
+
if (ce.op == :+ or ce.op == :- or ce.op == :*) and ce.lexpr.kind_of?(C::CExpression) and ce.type.integral? and not ce.lexpr.op and ce.lexpr.rexpr.kind_of?(::Integer) and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr.kind_of?(::Integer)
|
1832
|
+
ce.lexpr, ce.op, ce.rexpr = nil, nil, ce.lexpr.rexpr.send(ce.op, ce.rexpr.rexpr)
|
1833
|
+
end
|
1834
|
+
|
1835
|
+
# 4 * (a + 1) => 4*a + 4
|
1836
|
+
if ce.op == :* and ce.lexpr.kind_of?(C::CExpression) and ce.type.integral? and not ce.lexpr.op and ce.lexpr.rexpr.kind_of?(::Integer) and ce.rexpr.kind_of?(C::CExpression) and (ce.rexpr.op == :+ or ce.rexpr.op == :-) and ce.rexpr.lexpr and ce.rexpr.rexpr.kind_of?(C::CExpression) and not ce.rexpr.rexpr.op and ce.rexpr.rexpr.rexpr.kind_of?(::Integer)
|
1837
|
+
ce.replace C::CExpression[[ce.lexpr, ce.op, ce.rexpr.lexpr], ce.rexpr.op, [ce.lexpr.rexpr * ce.rexpr.rexpr.rexpr]]
|
1838
|
+
optimize_code(ce)
|
1839
|
+
end
|
1840
|
+
|
1708
1841
|
# int *ptr; *(ptr + 4) => ptr[4]
|
1709
|
-
if ce.op == :* and not ce.lexpr and ce.rexpr.kind_of?
|
1842
|
+
if ce.op == :* and not ce.lexpr and ce.rexpr.kind_of?(C::CExpression) and ce.rexpr.op == :+ and var = ce.rexpr.lexpr and var.kind_of?(C::Variable) and var.type.pointer?
|
1710
1843
|
ce.lexpr, ce.op, ce.rexpr = ce.rexpr.lexpr, :'[]', ce.rexpr.rexpr
|
1711
1844
|
future_array << var.name
|
1712
1845
|
end
|
1713
1846
|
|
1847
|
+
# ptr + (i << 3) => ptr + 8*i
|
1848
|
+
if (ce.op == :+ or ce.op == :[]) and ce.rexpr.kind_of?(C::CExpression) and ce.rexpr.op == :<< and ce.rexpr.rexpr.kind_of?(C::CExpression) and not ce.rexpr.rexpr.op and ce.rexpr.rexpr.rexpr.kind_of?(::Integer)
|
1849
|
+
ce.rexpr.rexpr.rexpr = 1 << ce.rexpr.rexpr.rexpr
|
1850
|
+
ce.rexpr.lexpr, ce.rexpr.op, ce.rexpr.rexpr = ce.rexpr.rexpr, :*, ce.rexpr.lexpr
|
1851
|
+
end
|
1852
|
+
|
1714
1853
|
# char x; x & 255 => x
|
1715
|
-
if ce.op == :& and ce.lexpr and (ce.lexpr.type.integral? or ce.lexpr.type.pointer?) and ce.rexpr.kind_of?
|
1716
|
-
not ce.rexpr.op and ce.rexpr.rexpr.kind_of?
|
1854
|
+
if ce.op == :& and ce.lexpr and (ce.lexpr.type.integral? or ce.lexpr.type.pointer?) and ce.rexpr.kind_of?(C::CExpression) and
|
1855
|
+
not ce.rexpr.op and ce.rexpr.rexpr.kind_of?(::Integer) and m = (1 << (8*sizeof(ce.lexpr))) - 1 and
|
1717
1856
|
ce.rexpr.rexpr & m == m
|
1718
1857
|
ce.replace C::CExpression[ce.lexpr]
|
1719
1858
|
end
|
1720
1859
|
|
1721
1860
|
# a + -b => a - b
|
1722
|
-
if ce.op == :+ and ce.lexpr and ce.rexpr.kind_of?
|
1861
|
+
if ce.op == :+ and ce.lexpr and ce.rexpr.kind_of?(C::CExpression) and ce.rexpr.op == :- and not ce.rexpr.lexpr
|
1723
1862
|
ce.op, ce.rexpr = :-, ce.rexpr.rexpr
|
1724
1863
|
end
|
1725
1864
|
|
1726
1865
|
# (((int) i >> 31) & 1) => i < 0
|
1727
|
-
if ce.op == :& and ce.rexpr.kind_of?
|
1728
|
-
ce.lexpr.kind_of?
|
1866
|
+
if ce.op == :& and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr == 1 and
|
1867
|
+
ce.lexpr.kind_of?(C::CExpression) and ce.lexpr.op == :>> and ce.lexpr.rexpr.kind_of?(C::CExpression) and
|
1729
1868
|
not ce.lexpr.rexpr.op and ce.lexpr.rexpr.rexpr == sizeof(ce.lexpr.lexpr) * 8 - 1
|
1730
1869
|
ce.replace C::CExpression[ce.lexpr.lexpr, :<, [0]]
|
1731
1870
|
end
|
1732
1871
|
|
1733
1872
|
# a-b == 0 => a == b
|
1734
|
-
if ce.rexpr.kind_of?
|
1735
|
-
ce.lexpr.kind_of?
|
1873
|
+
if ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr == 0 and [:==, :'!=', :<, :>, :<=, :>=].include?(ce.op) and
|
1874
|
+
ce.lexpr.kind_of?(C::CExpression) and ce.lexpr.op == :- and ce.lexpr.lexpr
|
1736
1875
|
ce.lexpr, ce.rexpr = ce.lexpr.lexpr, ce.lexpr.rexpr
|
1737
1876
|
end
|
1738
1877
|
|
1739
1878
|
# (a > 0) != 0
|
1740
|
-
if ce.op == :'!=' and ce.rexpr.kind_of?
|
1741
|
-
[:<, :<=, :>, :>=, :'==', :'!=', :'!'].include?
|
1879
|
+
if ce.op == :'!=' and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr == 0 and ce.lexpr.kind_of?(C::CExpression) and
|
1880
|
+
[:<, :<=, :>, :>=, :'==', :'!=', :'!'].include?(ce.lexpr.op)
|
1742
1881
|
ce.replace ce.lexpr
|
1743
1882
|
end
|
1744
1883
|
|
@@ -1747,21 +1886,21 @@ class Decompiler
|
|
1747
1886
|
# a>=b => true if r => a<0 == b>=0 and a<0 => a<0 and b>=0
|
1748
1887
|
|
1749
1888
|
# x != (a && (b != x)) => [x && (!a || b)] || [!x && !(!a || b)]
|
1750
|
-
if ce.op == :'!=' and ce.lexpr.kind_of?
|
1751
|
-
ce.rexpr.op == :'&&' and ce.rexpr.rexpr.kind_of?
|
1889
|
+
if ce.op == :'!=' and ce.lexpr.kind_of?(C::CExpression) and ce.lexpr.op == :< and ce.rexpr.kind_of?(C::CExpression) and
|
1890
|
+
ce.rexpr.op == :'&&' and ce.rexpr.rexpr.kind_of?(C::CExpression) and ce.rexpr.rexpr.op == :'!=' and
|
1752
1891
|
ce.rexpr.rexpr.rexpr == ce.lexpr and not walk_ce(ce) { |ce_| break true if ce_.op == :funcall }
|
1753
1892
|
x, a, b = ce.lexpr, ce.rexpr.lexpr, ce.rexpr.rexpr.lexpr
|
1754
1893
|
ce.replace C::CExpression[ [x, :'&&', [[:'!',a],:'||',b]] , :'||', [[:'!', x], :'&&', [:'!', [[:'!',a],:'||',b]]] ]
|
1755
1894
|
optimize_code(ce)
|
1756
1895
|
end
|
1757
1896
|
# (a != b) || a => a || b
|
1758
|
-
if ce.op == :'||' and ce.lexpr.kind_of?
|
1897
|
+
if ce.op == :'||' and ce.lexpr.kind_of?(C::CExpression) and ce.lexpr.op == :'!=' and ce.lexpr.lexpr == ce.rexpr and not walk_ce(ce) { |ce_| break true if ce_.op == :funcall }
|
1759
1898
|
ce.lexpr, ce.rexpr = ce.rexpr, ce.lexpr.rexpr
|
1760
1899
|
optimize_code(ce)
|
1761
1900
|
end
|
1762
1901
|
# (a<b) && !(a>=0 && b<0) || (a>=b) && (a>=0 && b<0) => (signed)a < (signed)b
|
1763
|
-
if ce.op == :'||' and ce.lexpr.kind_of?
|
1764
|
-
ce.lexpr.lexpr.kind_of?
|
1902
|
+
if ce.op == :'||' and ce.lexpr.kind_of?(C::CExpression) and ce.rexpr.kind_of?(C::CExpression) and ce.lexpr.op == :'&&' and ce.rexpr.op == :'&&' and
|
1903
|
+
ce.lexpr.lexpr.kind_of?(C::CExpression) and ce.lexpr.lexpr.op == :<
|
1765
1904
|
a, b = ce.lexpr.lexpr.lexpr, ce.lexpr.lexpr.rexpr
|
1766
1905
|
if ce.lexpr.rexpr === C::CExpression[[a, :'>=', [0]], :'&&', [b, :'<', [0]]].negate and
|
1767
1906
|
ce.rexpr.lexpr === ce.lexpr.lexpr.negate and ce.rexpr.rexpr === ce.lexpr.rexpr.negate
|
@@ -1769,7 +1908,7 @@ class Decompiler
|
|
1769
1908
|
end
|
1770
1909
|
end
|
1771
1910
|
# a && 1
|
1772
|
-
if (ce.op == :'||' or ce.op == :'&&') and ce.rexpr.kind_of?
|
1911
|
+
if (ce.op == :'||' or ce.op == :'&&') and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr.kind_of?(::Integer)
|
1773
1912
|
if ((ce.op == :'||' and ce.rexpr.rexpr == 0) or (ce.op == :'&&' and ce.rexpr.rexpr != 0))
|
1774
1913
|
ce.replace C::CExpression[ce.lexpr]
|
1775
1914
|
elsif not walk_ce(ce) { |ce_| break true if ce.op == :funcall } # cannot wipe if sideeffect
|
@@ -1782,82 +1921,76 @@ class Decompiler
|
|
1782
1921
|
end
|
1783
1922
|
|
1784
1923
|
# (a < b) | (a == b) => a <= b
|
1785
|
-
if ce.op == :| and ce.rexpr.kind_of?
|
1924
|
+
if ce.op == :| and ce.rexpr.kind_of?(C::CExpression) and ce.rexpr.op == :== and ce.lexpr.kind_of?(C::CExpression) and
|
1786
1925
|
(ce.lexpr.op == :< or ce.lexpr.op == :>) and ce.lexpr.lexpr == ce.rexpr.lexpr and ce.lexpr.rexpr == ce.rexpr.rexpr
|
1787
1926
|
ce.op = {:< => :<=, :> => :>=}[ce.lexpr.op]
|
1788
1927
|
ce.lexpr, ce.rexpr = ce.lexpr.lexpr, ce.lexpr.rexpr
|
1789
1928
|
end
|
1790
1929
|
|
1791
1930
|
# a == 0 => !a
|
1792
|
-
if ce.op == :== and ce.rexpr.kind_of?
|
1931
|
+
if ce.op == :== and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr == 0
|
1793
1932
|
ce.lexpr, ce.op, ce.rexpr = nil, :'!', ce.lexpr
|
1794
1933
|
end
|
1795
1934
|
|
1796
|
-
if ce.op == :'!' and ce.rexpr.kind_of?
|
1935
|
+
if ce.op == :'!' and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr.kind_of?(::Integer)
|
1797
1936
|
ce.replace C::CExpression[[ce.rexpr.rexpr == 0 ? 1 : 0]]
|
1798
1937
|
end
|
1799
1938
|
|
1800
1939
|
# !(bool) => bool
|
1801
|
-
if ce.op == :'!' and ce.rexpr.kind_of?
|
1940
|
+
if ce.op == :'!' and ce.rexpr.kind_of?(C::CExpression) and [:'==', :'!=', :<, :>, :<=, :>=, :'||', :'&&', :'!'].include?(ce.rexpr.op)
|
1802
1941
|
ce.replace ce.rexpr.negate
|
1803
1942
|
end
|
1804
1943
|
|
1805
1944
|
# (foo)(bar)x => (foo)x
|
1806
|
-
if not ce.op and ce.rexpr.kind_of?
|
1945
|
+
if not ce.op and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr.kind_of?(C::CExpression)
|
1807
1946
|
ce.rexpr = ce.rexpr.rexpr
|
1808
1947
|
end
|
1809
1948
|
|
1810
1949
|
# &struct.1stmember => &struct
|
1811
|
-
if ce.op == :& and not ce.lexpr and ce.rexpr.kind_of?
|
1812
|
-
s.kind_of?
|
1950
|
+
if ce.op == :& and not ce.lexpr and ce.rexpr.kind_of?(C::CExpression) and ce.rexpr.op == :'.' and s = ce.rexpr.lexpr.type and
|
1951
|
+
s.kind_of?(C::Union) and s.offsetof(@c_parser, ce.rexpr.rexpr) == 0
|
1813
1952
|
ce.rexpr = ce.rexpr.lexpr
|
1814
1953
|
ce.type = C::Pointer.new(ce.rexpr.type)
|
1815
1954
|
end
|
1816
1955
|
|
1817
1956
|
# (1stmember*)structptr => &structptr->1stmember
|
1818
|
-
if not ce.op and ce.type.pointer? and not ce.type.pointed.void? and ce.rexpr.kind_of?
|
1819
|
-
s = ce.rexpr.type.pointed.untypedef and s.kind_of?
|
1957
|
+
if not ce.op and ce.type.pointer? and not ce.type.pointed.void? and ce.rexpr.kind_of?(C::Typed) and ce.rexpr.type.pointer? and
|
1958
|
+
s = ce.rexpr.type.pointed.untypedef and s.kind_of?(C::Union) and ce.type.pointed.untypedef != s
|
1820
1959
|
ce.rexpr = C::CExpression[structoffset(s, ce.rexpr, 0, sizeof(ce.type.pointed))]
|
1821
|
-
#ce.replace ce.rexpr if not ce.type.pointed.untypedef.kind_of?
|
1822
|
-
#ce.rexpr.type.pointed.untypedef.kind_of?
|
1960
|
+
#ce.replace ce.rexpr if not ce.type.pointed.untypedef.kind_of?(C::Function) or (ce.rexpr.type.pointer? and
|
1961
|
+
#ce.rexpr.type.pointed.untypedef.kind_of?(C::Function)) # XXX ugly
|
1823
1962
|
# int32* v1 = (int32*)pstruct;
|
1824
1963
|
# z = v1+4 if v1 is not cast, the + is invalid (sizeof pointed changes)
|
1825
1964
|
# TODO when finding type of pstruct, set type of v1 accordingly
|
1826
1965
|
end
|
1827
1966
|
|
1828
1967
|
# (&foo)->bar => foo.bar
|
1829
|
-
if ce.op == :'->' and ce.lexpr.kind_of?
|
1968
|
+
if ce.op == :'->' and ce.lexpr.kind_of?(C::CExpression) and ce.lexpr.op == :& and not ce.lexpr.lexpr
|
1830
1969
|
ce.lexpr = ce.lexpr.rexpr
|
1831
1970
|
ce.op = :'.'
|
1832
1971
|
end
|
1833
1972
|
|
1834
1973
|
# (foo)bla => bla if bla of type foo
|
1835
|
-
if not ce.op and ce.rexpr.kind_of?
|
1974
|
+
if not ce.op and ce.rexpr.kind_of?(C::Typed) and sametype[ce.type, ce.rexpr.type]
|
1836
1975
|
ce.replace C::CExpression[ce.rexpr]
|
1837
1976
|
end
|
1838
|
-
if ce.lexpr.kind_of?
|
1977
|
+
if ce.lexpr.kind_of?(C::CExpression) and not ce.lexpr.op and ce.lexpr.rexpr.kind_of?(C::Variable) and ce.lexpr.type == ce.lexpr.rexpr.type
|
1839
1978
|
ce.lexpr = ce.lexpr.rexpr
|
1840
1979
|
end
|
1841
|
-
|
1842
|
-
if ce.op == :'=' and ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == :* and not ce.lexpr.lexpr and ce.lexpr.rexpr.kind_of? C::CExpression and
|
1843
|
-
not ce.lexpr.rexpr.op and ce.lexpr.rexpr.type.pointer? and ce.lexpr.rexpr.type.pointed != ce.rexpr.type
|
1844
|
-
ce.lexpr.rexpr.type = C::Pointer.new(ce.rexpr.type)
|
1845
|
-
optimize_code(ce.lexpr)
|
1846
|
-
end
|
1847
1980
|
}
|
1848
1981
|
|
1849
1982
|
# if there is a ptr[4], change all *ptr to ptr[0] for consistency
|
1850
1983
|
# do this after the first pass, which may change &*ptr to ptr
|
1851
1984
|
walk_ce(scope) { |ce|
|
1852
|
-
if ce.op == :* and not ce.lexpr and ce.rexpr.kind_of?
|
1985
|
+
if ce.op == :* and not ce.lexpr and ce.rexpr.kind_of?(C::Variable) and future_array.include?(ce.rexpr.name)
|
1853
1986
|
ce.lexpr, ce.op, ce.rexpr = ce.rexpr, :'[]', C::CExpression[0]
|
1854
1987
|
end
|
1855
1988
|
} if not future_array.empty?
|
1856
1989
|
|
1857
1990
|
# if (x != 0) => if (x)
|
1858
1991
|
walk(scope) { |st|
|
1859
|
-
if st.kind_of?
|
1860
|
-
st.test.rexpr.kind_of?
|
1992
|
+
if st.kind_of?(C::If) and st.test.kind_of?(C::CExpression) and st.test.op == :'!=' and
|
1993
|
+
st.test.rexpr.kind_of?(C::CExpression) and not st.test.rexpr.op and st.test.rexpr.rexpr == 0
|
1861
1994
|
st.test = C::CExpression[st.test.lexpr]
|
1862
1995
|
end
|
1863
1996
|
}
|
@@ -1868,8 +2001,8 @@ class Decompiler
|
|
1868
2001
|
case exp
|
1869
2002
|
when nil, ::Numeric, ::String; false
|
1870
2003
|
when ::Array; exp.any? { |_e| sideeffect _e, scope }
|
1871
|
-
when C::Variable; (scope and not scope.symbol[exp.name]) or exp.type.qualifier.to_a.include?
|
1872
|
-
when C::CExpression; (exp.op == :* and not exp.lexpr) or exp.op == :funcall or AssignOp.include?(exp.op) or
|
2004
|
+
when C::Variable; (scope and not scope.symbol[exp.name]) or exp.type.qualifier.to_a.include?(:volatile)
|
2005
|
+
when C::CExpression; (exp.op == :* and not exp.lexpr) or exp.op == :funcall or C::CExpression::AssignOp.include?(exp.op) or
|
1873
2006
|
sideeffect(exp.lexpr, scope) or sideeffect(exp.rexpr, scope)
|
1874
2007
|
else true # failsafe
|
1875
2008
|
end
|
@@ -1878,97 +2011,153 @@ class Decompiler
|
|
1878
2011
|
# converts C code to a graph of cexprs (nodes = cexprs, edges = codepaths)
|
1879
2012
|
# returns a CGraph
|
1880
2013
|
class CGraph
|
1881
|
-
# exprs: label => [exprs], to: label => [labels], block: label => are exprs
|
2014
|
+
# exprs: label => [exprs], to: label => [labels], block: label => are exprs in a block (vs If#test), start: 1st label
|
1882
2015
|
attr_accessor :exprs, :to, :block, :start, :to_optim, :from_optim
|
1883
|
-
|
1884
|
-
|
1885
|
-
|
1886
|
-
|
1887
|
-
|
1888
|
-
|
1889
|
-
|
2016
|
+
|
2017
|
+
def initialize
|
2018
|
+
@exprs = {} # label => [exprs]
|
2019
|
+
@to = {} # label => [labels]
|
2020
|
+
@block = {} # label => is label in a block? (vs If#test)
|
2021
|
+
@anon_label = 0 # when no label is there, use anon_label++
|
2022
|
+
@exprs_var = nil # similar to @exprs, indexed by var name, lazy initialization
|
2023
|
+
end
|
2024
|
+
|
2025
|
+
def build(stmt)
|
2026
|
+
@start = @anon_label
|
2027
|
+
to_graph(stmt, @start, nil, nil, nil)
|
2028
|
+
optimize
|
2029
|
+
self
|
2030
|
+
end
|
2031
|
+
|
1890
2032
|
# converts C code to a graph of codepath of cexprs
|
1891
|
-
to_graph
|
2033
|
+
def to_graph(stmt, l_cur, l_after, l_cont, l_break)
|
1892
2034
|
case stmt
|
1893
|
-
when C::Label;
|
1894
|
-
when C::Goto;
|
1895
|
-
when C::Continue;
|
1896
|
-
when C::Break;
|
2035
|
+
when C::Label; @to[l_cur] = [stmt.name] ; @to[stmt.name] = [l_after]
|
2036
|
+
when C::Goto; @to[l_cur] = [stmt.target]
|
2037
|
+
when C::Continue; @to[l_cur] = [l_cont]
|
2038
|
+
when C::Break; @to[l_cur] = [l_break]
|
1897
2039
|
when C::CExpression
|
1898
|
-
|
1899
|
-
|
2040
|
+
@exprs[l_cur] = [stmt]
|
2041
|
+
@to[l_cur] = [l_after]
|
1900
2042
|
when C::Return
|
1901
|
-
|
1902
|
-
|
2043
|
+
@exprs[l_cur] = [stmt.value] if stmt.value
|
2044
|
+
@to[l_cur] = []
|
1903
2045
|
when C::Block
|
1904
|
-
to_graph
|
2046
|
+
to_graph(stmt.statements, l_cur, l_after, l_cont, l_break)
|
1905
2047
|
when ::Array
|
1906
|
-
|
1907
|
-
|
2048
|
+
@exprs[l_cur] = []
|
2049
|
+
@block[l_cur] = true
|
1908
2050
|
stmt.each_with_index { |s, i|
|
1909
2051
|
case s
|
1910
2052
|
when C::Declaration
|
1911
2053
|
when C::CExpression
|
1912
|
-
|
2054
|
+
@exprs[l_cur] << s
|
1913
2055
|
else
|
1914
|
-
l = anon_label += 1
|
1915
|
-
ll = anon_label += 1
|
1916
|
-
|
1917
|
-
|
1918
|
-
to_graph
|
2056
|
+
l = @anon_label += 1
|
2057
|
+
ll = @anon_label += 1
|
2058
|
+
@to[l_cur] = [l]
|
2059
|
+
@block[l_cur] = true
|
2060
|
+
to_graph(stmt[i], l, ll, l_cont, l_break)
|
1919
2061
|
l_cur = ll
|
1920
|
-
|
2062
|
+
@exprs[l_cur] = []
|
1921
2063
|
end
|
1922
2064
|
}
|
1923
|
-
|
2065
|
+
@to[l_cur] = [l_after].compact
|
1924
2066
|
when C::If
|
1925
|
-
|
1926
|
-
lt = anon_label += 1
|
1927
|
-
to_graph
|
1928
|
-
le = anon_label += 1
|
1929
|
-
to_graph
|
1930
|
-
|
2067
|
+
@exprs[l_cur] = [stmt.test]
|
2068
|
+
lt = @anon_label += 1
|
2069
|
+
to_graph(stmt.bthen, lt, l_after, l_cont, l_break)
|
2070
|
+
le = @anon_label += 1
|
2071
|
+
to_graph(stmt.belse, le, l_after, l_cont, l_break)
|
2072
|
+
@to[l_cur] = [lt, le]
|
1931
2073
|
when C::While, C::DoWhile
|
1932
|
-
la = anon_label += 1
|
1933
|
-
if stmt.kind_of?
|
2074
|
+
la = @anon_label += 1
|
2075
|
+
if stmt.kind_of?(C::DoWhile)
|
1934
2076
|
lt, lb = la, l_cur
|
1935
2077
|
else
|
1936
2078
|
lt, lb = l_cur, la
|
1937
2079
|
end
|
1938
|
-
|
1939
|
-
|
1940
|
-
to_graph
|
1941
|
-
when C::Asm, nil;
|
2080
|
+
@exprs[lt] = [stmt.test]
|
2081
|
+
@to[lt] = [lb, l_after]
|
2082
|
+
to_graph(stmt.body, lb, lt, lt, l_after)
|
2083
|
+
when C::Asm, nil; @to[l_cur] = [l_after]
|
1942
2084
|
else puts "to_graph unhandled #{stmt.class}: #{stmt}" if $VERBOSE
|
1943
2085
|
end
|
1944
|
-
|
1945
|
-
|
1946
|
-
g.start = anon_label
|
1947
|
-
to_graph[st, g.start, nil, nil, nil]
|
2086
|
+
end
|
1948
2087
|
|
1949
2088
|
# optimize graph
|
1950
|
-
|
1951
|
-
|
1952
|
-
|
1953
|
-
|
1954
|
-
|
1955
|
-
|
1956
|
-
|
1957
|
-
|
1958
|
-
|
1959
|
-
|
2089
|
+
def optimize
|
2090
|
+
@to_optim = {}
|
2091
|
+
@to.each { |k, v| @to_optim[k] = v.uniq }
|
2092
|
+
@exprs.delete_if { |k, v| v == [] }
|
2093
|
+
@to_optim.delete_if { |k, v|
|
2094
|
+
if v.length == 1 and not @exprs[k] and v != [k]
|
2095
|
+
@to_optim.each_value { |t| if i = t.index(k) ; t[i] = v.first ; end }
|
2096
|
+
true
|
2097
|
+
elsif v.length == 0 and not @exprs[k]
|
2098
|
+
@to_optim.each_value { |t| t.delete k }
|
2099
|
+
true
|
2100
|
+
end
|
2101
|
+
}
|
2102
|
+
|
2103
|
+
@from_optim = {}
|
2104
|
+
@to_optim.each { |k, v| v.each { |t| (@from_optim[t] ||= []) << k } }
|
2105
|
+
end
|
2106
|
+
|
2107
|
+
# varname => { label => [list of indices of @exprs[label] referencing varname] }
|
2108
|
+
def exprs_var
|
2109
|
+
@exprs_var ||= init_exprs_var
|
2110
|
+
end
|
2111
|
+
|
2112
|
+
# returns the list of variable names referenced by a CExpr
|
2113
|
+
def get_expr_vars(e)
|
2114
|
+
case e
|
2115
|
+
when C::CExpression; get_expr_vars(e.lexpr) + get_expr_vars(e.rexpr)
|
2116
|
+
when ::Array; e.inject([]) { |a, ee| a.concat get_expr_vars(ee) }
|
2117
|
+
when C::Variable; [e.name]
|
2118
|
+
else; []
|
1960
2119
|
end
|
1961
|
-
|
2120
|
+
end
|
1962
2121
|
|
1963
|
-
|
1964
|
-
|
2122
|
+
# initialize @exprs_var
|
2123
|
+
def init_exprs_var
|
2124
|
+
@exprs_var = {}
|
2125
|
+
@exprs.each_key { |label| update_exprs_var(label) }
|
2126
|
+
@exprs_var
|
2127
|
+
end
|
1965
2128
|
|
1966
|
-
|
2129
|
+
# populate one label of @exprs_var
|
2130
|
+
def update_exprs_var(label)
|
2131
|
+
@exprs[label].each_with_index { |e, idx|
|
2132
|
+
get_expr_vars(e).uniq.each { |varname|
|
2133
|
+
@exprs_var[varname] ||= {}
|
2134
|
+
@exprs_var[varname][label] ||= []
|
2135
|
+
@exprs_var[varname][label] << idx
|
2136
|
+
}
|
2137
|
+
}
|
2138
|
+
end
|
2139
|
+
|
2140
|
+
# invalidates one label (eg exprs were deleted)
|
2141
|
+
# rebuilds @exprs_var if necessary
|
2142
|
+
def invalidate(label=nil)
|
2143
|
+
if @exprs_var
|
2144
|
+
if label
|
2145
|
+
@exprs_var.each { |v, h| h.delete(label) }
|
2146
|
+
update_exprs_var(label)
|
2147
|
+
else
|
2148
|
+
@exprs_var = nil
|
2149
|
+
end
|
2150
|
+
end
|
2151
|
+
end
|
2152
|
+
end
|
2153
|
+
def c_to_graph(stmt)
|
2154
|
+
CGraph.new.build(stmt)
|
1967
2155
|
end
|
1968
2156
|
|
1969
2157
|
# dataflow optimization
|
1970
2158
|
# condenses expressions (++x; if (x) => if (++x))
|
1971
2159
|
# remove local var assignment (x = 1; f(x); x = 2; g(x); => f(1); g(2); etc)
|
2160
|
+
# XXX omg
|
1972
2161
|
def optimize_vars(scope)
|
1973
2162
|
return if forbid_optimize_dataflow
|
1974
2163
|
|
@@ -1996,36 +2185,40 @@ class Decompiler
|
|
1996
2185
|
|
1997
2186
|
# badlabels is a list of labels that may be reached without passing through the first invocation block
|
1998
2187
|
find_next_read_rec = lambda { |label, idx, var, done, badlabels|
|
1999
|
-
next if done.include?
|
2188
|
+
next if done.include?(label)
|
2000
2189
|
done << label if idx == 0
|
2190
|
+
list = g.exprs_var[var.name][label].to_a.find_all { |i| i >= idx }
|
2191
|
+
idx = list.shift
|
2001
2192
|
|
2002
|
-
idx
|
2193
|
+
idx = list.shift while idx and not ret = find_next_read_ce[g.exprs[label][idx], var]
|
2003
2194
|
next ret if ret
|
2004
2195
|
|
2005
2196
|
to = g.to_optim[label].to_a.map { |t|
|
2006
|
-
break [:split] if badlabels.include?
|
2197
|
+
break [:split] if badlabels.include?(t)
|
2007
2198
|
find_next_read_rec[t, 0, var, done, badlabels]
|
2008
2199
|
}.compact
|
2009
2200
|
|
2010
2201
|
tw = to - [:write]
|
2011
|
-
if to.include?
|
2202
|
+
if to.include?(:split) or tw.length > 1
|
2012
2203
|
:split
|
2013
2204
|
elsif tw.length == 1
|
2014
2205
|
tw.first
|
2015
|
-
elsif to.include?
|
2206
|
+
elsif to.include?(:write)
|
2016
2207
|
:write
|
2017
2208
|
end
|
2018
2209
|
}
|
2019
2210
|
# return the previous subexpr reading var with no fwd path to another reading (otherwise split), see loop comment for reason
|
2020
2211
|
find_next_read = nil
|
2021
2212
|
find_prev_read_rec = lambda { |label, idx, var, done|
|
2022
|
-
next if done.include?
|
2213
|
+
next if done.include?(label)
|
2023
2214
|
done << label if idx == g.exprs[label].length-1
|
2215
|
+
list = g.exprs_var[var.name][label].to_a.find_all { |i| i <= idx }
|
2216
|
+
idx = list.pop
|
2024
2217
|
|
2025
|
-
idx
|
2026
|
-
if ret.kind_of?
|
2218
|
+
idx = list.pop while idx and not ret = find_next_read_ce[g.exprs[label][idx], var]
|
2219
|
+
if ret.kind_of?(C::CExpression)
|
2027
2220
|
fwchk = find_next_read[label, idx+1, var]
|
2028
|
-
ret = fwchk if not fwchk.kind_of?
|
2221
|
+
ret = fwchk if not fwchk.kind_of?(C::CExpression)
|
2029
2222
|
end
|
2030
2223
|
next ret if ret
|
2031
2224
|
|
@@ -2033,25 +2226,25 @@ class Decompiler
|
|
2033
2226
|
find_prev_read_rec[f, g.exprs[f].to_a.length-1, var, done]
|
2034
2227
|
}.compact
|
2035
2228
|
|
2036
|
-
next :split if from.include?
|
2229
|
+
next :split if from.include?(:split)
|
2037
2230
|
fw = from - [:write]
|
2038
2231
|
if fw.length == 1
|
2039
2232
|
fw.first
|
2040
2233
|
elsif fw.length > 1
|
2041
2234
|
:split
|
2042
|
-
elsif from.include?
|
2235
|
+
elsif from.include?(:write)
|
2043
2236
|
:write
|
2044
2237
|
end
|
2045
2238
|
}
|
2046
2239
|
|
2047
|
-
# list of labels reachable without
|
2240
|
+
# list of labels reachable without passing through label
|
2048
2241
|
badlab = {}
|
2049
2242
|
build_badlabel = lambda { |label|
|
2050
2243
|
next if badlab[label]
|
2051
2244
|
badlab[label] = []
|
2052
2245
|
todo = [g.start]
|
2053
2246
|
while l = todo.pop
|
2054
|
-
next if l == label or badlab[label].include?
|
2247
|
+
next if l == label or badlab[label].include?(l)
|
2055
2248
|
badlab[label] << l
|
2056
2249
|
todo.concat g.to_optim[l].to_a
|
2057
2250
|
end
|
@@ -2085,10 +2278,10 @@ class Decompiler
|
|
2085
2278
|
|
2086
2279
|
# TODO x = x + 1 => x += 1 => ++x here, move all other optimizations after (in optim_code)
|
2087
2280
|
# needs also int & 0xffffffff -> int, *&var etc (decomp_type? optim_type?)
|
2088
|
-
if (e.op == :'++' or e.op == :'--') and v = (e.lexpr || e.rexpr) and v.kind_of?
|
2089
|
-
scope.symbol[v.name] and not v.type.qualifier.to_a.include?
|
2090
|
-
next if !((pos = :post.to_sym) and (oe = find_next_read_bl[label, i, v]) and oe.kind_of?
|
2091
|
-
!((pos = :prev.to_sym) and (oe = find_prev_read[label, i-2, v]) and oe.kind_of?
|
2281
|
+
if (e.op == :'++' or e.op == :'--') and v = (e.lexpr || e.rexpr) and v.kind_of?(C::Variable) and
|
2282
|
+
scope.symbol[v.name] and not v.type.qualifier.to_a.include?(:volatile)
|
2283
|
+
next if !((pos = :post.to_sym) and (oe = find_next_read_bl[label, i, v]) and oe.kind_of?(C::CExpression)) and
|
2284
|
+
!((pos = :prev.to_sym) and (oe = find_prev_read[label, i-2, v]) and oe.kind_of?(C::CExpression))
|
2092
2285
|
next if oe.op == :& and not oe.lexpr # no &(++eax)
|
2093
2286
|
|
2094
2287
|
# merge pre/postincrement into next/prev var usage
|
@@ -2121,7 +2314,7 @@ class Decompiler
|
|
2121
2314
|
when :'+=', :'-='
|
2122
2315
|
# TODO i++; i += 4 => i += 5
|
2123
2316
|
next
|
2124
|
-
when *AssignOp
|
2317
|
+
when *C::CExpression::AssignOp
|
2125
2318
|
next # ++i; i |= 4 => ignore
|
2126
2319
|
else
|
2127
2320
|
if pos == :post and v == oe.lexpr; oe.lexpr = C::CExpression[e.op, v]
|
@@ -2134,16 +2327,27 @@ class Decompiler
|
|
2134
2327
|
|
2135
2328
|
i -= 1
|
2136
2329
|
exprs.delete_at(i)
|
2330
|
+
g.invalidate(label)
|
2137
2331
|
e.lexpr = e.op = e.rexpr = nil
|
2138
2332
|
|
2139
2333
|
|
2140
|
-
elsif e.op == :'=' and v = e.lexpr and v.kind_of?
|
2141
|
-
not v.type.qualifier.to_a.include?
|
2334
|
+
elsif e.op == :'=' and v = e.lexpr and v.kind_of?(C::Variable) and scope.symbol[v.name] and
|
2335
|
+
not v.type.qualifier.to_a.include?(:volatile) and not find_next_read_ce[e.rexpr, v]
|
2142
2336
|
|
2143
2337
|
# reduce trivial static assignments
|
2144
|
-
|
2145
|
-
|
2146
|
-
|
2338
|
+
# b = a + 1 ; a = b => a = a + 1 ; b = a
|
2339
|
+
if ne = g.exprs[label][i] and ne.op == :'=' and ne.rexpr == e.lexpr and ne.lexpr.kind_of?(C::Variable) and find_next_read_ce[e.rexpr, ne.lexpr]
|
2340
|
+
e.lexpr, ne.lexpr, ne.rexpr = ne.lexpr, ne.rexpr, ne.lexpr
|
2341
|
+
optimize_code(e)
|
2342
|
+
i -= 1
|
2343
|
+
g.invalidate(label)
|
2344
|
+
next
|
2345
|
+
end
|
2346
|
+
|
2347
|
+
# i = 4 ; f(i) => f(4)
|
2348
|
+
if (e.rexpr.kind_of?(C::CExpression) and iv = e.rexpr.reduce(@c_parser) and iv.kind_of?(::Integer)) or
|
2349
|
+
(e.rexpr.kind_of?(C::CExpression) and e.rexpr.op == :& and not e.rexpr.lexpr and e.rexpr.lexpr.kind_of?(C::Variable)) or
|
2350
|
+
(e.rexpr.kind_of?(C::Variable) and e.rexpr.type.kind_of?(C::Array))
|
2147
2351
|
rewritten = false
|
2148
2352
|
readers = []
|
2149
2353
|
discard = [e]
|
@@ -2173,7 +2377,7 @@ class Decompiler
|
|
2173
2377
|
end
|
2174
2378
|
end
|
2175
2379
|
|
2176
|
-
case
|
2380
|
+
case find_next_read[label, i, v]
|
2177
2381
|
when C::CExpression
|
2178
2382
|
# read in one place only, try to patch rexpr in there
|
2179
2383
|
r = e.rexpr
|
@@ -2183,12 +2387,12 @@ class Decompiler
|
|
2183
2387
|
isfunc = false
|
2184
2388
|
depend_vars = []
|
2185
2389
|
walk_ce(C::CExpression[r]) { |ce|
|
2186
|
-
isfunc = true if ce.op == :func and (not ce.lexpr.kind_of?
|
2390
|
+
isfunc = true if ce.op == :func and (not ce.lexpr.kind_of?(C::Variable) or
|
2187
2391
|
not ce.lexpr.has_attribute('pure')) # XXX is there a C attr for func depending only on staticvars+param ?
|
2188
|
-
depend_vars << ce.lexpr if ce.lexpr.kind_of?
|
2189
|
-
depend_vars << ce.rexpr if ce.rexpr.kind_of?
|
2392
|
+
depend_vars << ce.lexpr if ce.lexpr.kind_of?(C::Variable)
|
2393
|
+
depend_vars << ce.rexpr if ce.rexpr.kind_of?(C::Variable) and (ce.lexpr or ce.op != :&) # a = &v; v = 12; func(a) => func(&v)
|
2190
2394
|
depend_vars << ce if ce.lvalue?
|
2191
|
-
depend_vars.concat(ce.rexpr.grep(C::Variable)) if ce.rexpr.kind_of?
|
2395
|
+
depend_vars.concat(ce.rexpr.grep(C::Variable)) if ce.rexpr.kind_of?(::Array)
|
2192
2396
|
}
|
2193
2397
|
depend_vars.uniq!
|
2194
2398
|
|
@@ -2209,11 +2413,11 @@ class Decompiler
|
|
2209
2413
|
ce.rexpr.each { |a| cnt += 1 if a == v }
|
2210
2414
|
cnt += 1 if ce.lexpr == v
|
2211
2415
|
when :'='
|
2212
|
-
bad = true if depend_vars.include?
|
2416
|
+
bad = true if depend_vars.include?(ce.lexpr)
|
2213
2417
|
cnt += 1 if ce.rexpr == v
|
2214
2418
|
else
|
2215
|
-
bad = true if (ce.op == :'++' or ce.op == :'--') and depend_vars.include?
|
2216
|
-
bad = true if AssignOp.include?
|
2419
|
+
bad = true if (ce.op == :'++' or ce.op == :'--') and depend_vars.include?(ce.rexpr)
|
2420
|
+
bad = true if C::CExpression::AssignOp.include?(ce.op) and depend_vars.include?(ce.lexpr)
|
2217
2421
|
cnt += 1 if ce.lexpr == v
|
2218
2422
|
cnt += 1 if ce.rexpr == v
|
2219
2423
|
end
|
@@ -2226,7 +2430,7 @@ class Decompiler
|
|
2226
2430
|
break if e.complexity > 10 and ce_.complexity > 3 # try to keep the C readable
|
2227
2431
|
# x = 1; y = x; z = x; => cannot suppress x
|
2228
2432
|
nr = find_next_read[l_l, n_i+1, v]
|
2229
|
-
break if (nr.kind_of?
|
2433
|
+
break if (nr.kind_of?(C::CExpression) or nr == :split) and not walk_ce(ce_) { |ce| break true if ce.op == :'=' and ce.lexpr == v }
|
2230
2434
|
else break # a = 1; b = a + a => fail
|
2231
2435
|
end
|
2232
2436
|
|
@@ -2246,12 +2450,12 @@ class Decompiler
|
|
2246
2450
|
elsif isfunc
|
2247
2451
|
break :fail
|
2248
2452
|
end
|
2249
|
-
when *AssignOp
|
2250
|
-
break :fail if not ce.lexpr and depend_vars.include?
|
2453
|
+
when *C::CExpression::AssignOp
|
2454
|
+
break :fail if not ce.lexpr and depend_vars.include?(ce.rexpr) # ++depend
|
2251
2455
|
if ce.rexpr == v
|
2252
2456
|
ce.rexpr = r
|
2253
2457
|
break :done
|
2254
|
-
elsif ce.lexpr == v or depend_vars.include?
|
2458
|
+
elsif ce.lexpr == v or depend_vars.include?(ce.lexpr)
|
2255
2459
|
break :fail
|
2256
2460
|
end
|
2257
2461
|
else
|
@@ -2270,6 +2474,8 @@ class Decompiler
|
|
2270
2474
|
when :done
|
2271
2475
|
i -= 1
|
2272
2476
|
exprs.delete_at(i)
|
2477
|
+
g.invalidate(label)
|
2478
|
+
g.invalidate(l_l) if l_l != label
|
2273
2479
|
e.lexpr = e.op = e.rexpr = nil
|
2274
2480
|
break
|
2275
2481
|
when :fail
|
@@ -2277,8 +2483,9 @@ class Decompiler
|
|
2277
2483
|
end
|
2278
2484
|
}
|
2279
2485
|
# ignore branches that will never reuse v
|
2280
|
-
may_to = g.to_optim[l_l].find_all { |to| find_next_read[to, 0, v].kind_of?
|
2281
|
-
if may_to.length == 1 and to = may_to.first and to != l_l and g.from_optim[to] == [l_l]
|
2486
|
+
may_to = g.to_optim[l_l].find_all { |to| find_next_read[to, 0, v].kind_of?(C::CExpression) }
|
2487
|
+
if may_to.length == 1 and to = may_to.first and to != l_l and g.from_optim[to] == [l_l] and
|
2488
|
+
not sideeffect(e.rexpr) # dont do cross-block var optimization, eg 'a = f() ; if() return a' =!> 'if () return f()'
|
2282
2489
|
l_i = 0
|
2283
2490
|
l_l = to
|
2284
2491
|
else break
|
@@ -2292,10 +2499,10 @@ class Decompiler
|
|
2292
2499
|
# remove sideeffectless subexprs
|
2293
2500
|
loop do
|
2294
2501
|
case e.op
|
2295
|
-
when :funcall, *AssignOp
|
2502
|
+
when :funcall, *C::CExpression::AssignOp
|
2296
2503
|
else
|
2297
|
-
l = (e.lexpr.kind_of?
|
2298
|
-
r = (e.rexpr.kind_of?
|
2504
|
+
l = (e.lexpr.kind_of?(C::CExpression) and sideeffect(e.lexpr))
|
2505
|
+
r = (e.rexpr.kind_of?(C::CExpression) and sideeffect(e.rexpr))
|
2299
2506
|
if l and r # could split...
|
2300
2507
|
elsif l
|
2301
2508
|
e.replace(e.lexpr)
|
@@ -2311,87 +2518,183 @@ class Decompiler
|
|
2311
2518
|
end
|
2312
2519
|
break
|
2313
2520
|
end
|
2521
|
+
g.invalidate(label)
|
2314
2522
|
end
|
2315
2523
|
end
|
2316
2524
|
end
|
2317
2525
|
}
|
2318
2526
|
|
2527
|
+
|
2528
|
+
# var propagation
|
2529
|
+
# find vars who are written only once, and replace all their use by their assignment value
|
2530
|
+
# XXX this may supercede some of the ugly stuff just before
|
2531
|
+
loop do
|
2532
|
+
g.invalidate
|
2533
|
+
writtenonce = {} # var => [label, offset] of assignment
|
2534
|
+
neverwritten = {} # var => true (eg args)
|
2535
|
+
g.exprs_var.each { |varname, h1|
|
2536
|
+
next if not var = scope.symbol[varname]
|
2537
|
+
neverwritten[varname] = true
|
2538
|
+
h1.each { |label, idx_list|
|
2539
|
+
idx_list.each { |expr_idx|
|
2540
|
+
e = g.exprs[label][expr_idx]
|
2541
|
+
if ce_write(e, var)
|
2542
|
+
neverwritten.delete varname
|
2543
|
+
if writtenonce[varname]
|
2544
|
+
# written twice, abort
|
2545
|
+
writtenonce.delete varname
|
2546
|
+
break
|
2547
|
+
elsif e.op == :'=' and e.lexpr == var and not ce_write(e.rexpr, var) and not ce_read(e.rexpr, var)
|
2548
|
+
# good !
|
2549
|
+
writtenonce[varname] = [label, expr_idx]
|
2550
|
+
else
|
2551
|
+
# unhandled write, abort
|
2552
|
+
break
|
2553
|
+
end
|
2554
|
+
end
|
2555
|
+
}
|
2556
|
+
}
|
2557
|
+
}
|
2558
|
+
# XXX check cycles ?
|
2559
|
+
|
2560
|
+
is_trivial_assign = lambda { |e, rec_max|
|
2561
|
+
case e
|
2562
|
+
when C::Variable; writtenonce[e.name] or neverwritten[e.name]
|
2563
|
+
when ::Integer, nil; true
|
2564
|
+
when C::CExpression
|
2565
|
+
rec_max > 0 and
|
2566
|
+
not sideeffect(e) and
|
2567
|
+
(e.op == :+ or e.op == :- or e.op == nil) and
|
2568
|
+
is_trivial_assign[e.lexpr, rec_max-1] and
|
2569
|
+
is_trivial_assign[e.rexpr, rec_max-1]
|
2570
|
+
end
|
2571
|
+
}
|
2572
|
+
break if not trivial_var = writtenonce.keys.find { |var|
|
2573
|
+
l, i = writtenonce[var]
|
2574
|
+
e = g.exprs[l][i].rexpr
|
2575
|
+
is_trivial_assign[e, 3]
|
2576
|
+
}
|
2577
|
+
label, idx = writtenonce[trivial_var]
|
2578
|
+
assign_expr = g.exprs[label][idx]
|
2579
|
+
var = assign_expr.lexpr
|
2580
|
+
value = assign_expr.rexpr
|
2581
|
+
g.exprs_var[trivial_var].each { |l, list|
|
2582
|
+
list.each { |i|
|
2583
|
+
e = g.exprs[l][i]
|
2584
|
+
if l == label and i == idx
|
2585
|
+
e.lexpr = e.op = e.rexpr = nil
|
2586
|
+
else
|
2587
|
+
ce_patch(e, var, value)
|
2588
|
+
optimize_code(e)
|
2589
|
+
end
|
2590
|
+
}
|
2591
|
+
}
|
2592
|
+
end
|
2593
|
+
|
2594
|
+
|
2319
2595
|
# wipe cexprs marked in the previous step
|
2320
2596
|
walk(scope) { |st|
|
2321
|
-
next if not st.kind_of?
|
2322
|
-
st.statements.delete_if { |e| e.kind_of?
|
2597
|
+
next if not st.kind_of?(C::Block)
|
2598
|
+
st.statements.delete_if { |e| e.kind_of?(C::CExpression) and not e.lexpr and not e.op and not e.rexpr }
|
2323
2599
|
}
|
2600
|
+
g.invalidate
|
2324
2601
|
|
2325
2602
|
# reoptimize cexprs
|
2326
2603
|
walk_ce(scope, true) { |ce|
|
2327
2604
|
# redo some simplification that may become available after variable propagation
|
2328
2605
|
# int8 & 255 => int8
|
2329
|
-
if ce.op == :& and ce.lexpr and ce.lexpr.type.integral? and ce.rexpr.kind_of?
|
2606
|
+
if ce.op == :& and ce.lexpr and ce.lexpr.type.integral? and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr == (1 << (8*sizeof(ce.lexpr))) - 1
|
2330
2607
|
ce.replace C::CExpression[ce.lexpr]
|
2331
2608
|
end
|
2332
2609
|
|
2333
2610
|
# int *ptr; *(ptr + 4) => ptr[4]
|
2334
|
-
if ce.op == :* and not ce.lexpr and ce.rexpr.kind_of?
|
2611
|
+
if ce.op == :* and not ce.lexpr and ce.rexpr.kind_of?(C::CExpression) and ce.rexpr.op == :+ and var = ce.rexpr.lexpr and var.kind_of?(C::Variable) and var.type.pointer?
|
2335
2612
|
ce.lexpr, ce.op, ce.rexpr = ce.rexpr.lexpr, :'[]', ce.rexpr.rexpr
|
2336
2613
|
end
|
2337
2614
|
|
2338
2615
|
# useless casts
|
2339
|
-
if not ce.op and ce.rexpr.kind_of?
|
2340
|
-
(ce.type.pointer? and ce.rexpr.rexpr == 0 and not ce.type.pointed.untypedef.kind_of?
|
2616
|
+
if not ce.op and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and (ce.rexpr.rexpr.kind_of?(C::CExpression) or
|
2617
|
+
(ce.type.pointer? and ce.rexpr.rexpr == 0 and not ce.type.pointed.untypedef.kind_of?(C::Union))) # keep ((struct*)0)->memb
|
2341
2618
|
ce.rexpr = ce.rexpr.rexpr
|
2342
2619
|
end
|
2343
|
-
if not ce.op and ce.rexpr.kind_of?
|
2620
|
+
if not ce.op and ce.rexpr.kind_of?(C::CExpression) and (ce.type == ce.rexpr.type or (ce.type.integral? and ce.rexpr.type.integral?))
|
2344
2621
|
ce.replace ce.rexpr
|
2345
2622
|
end
|
2346
2623
|
# useless casts (type)*((oeua)Ptype)
|
2347
|
-
if not ce.op and ce.rexpr.kind_of?
|
2348
|
-
p = ce.rexpr.rexpr.rexpr and p.kind_of?
|
2624
|
+
if not ce.op and ce.rexpr.kind_of?(C::CExpression) and ce.rexpr.op == :* and not ce.rexpr.lexpr and ce.rexpr.rexpr.kind_of?(C::CExpression) and not ce.rexpr.rexpr.op and
|
2625
|
+
p = ce.rexpr.rexpr.rexpr and p.kind_of?(C::Typed) and p.type.pointer? and ce.type == p.type.pointed
|
2349
2626
|
ce.op = ce.rexpr.op
|
2350
2627
|
ce.rexpr = ce.rexpr.rexpr.rexpr
|
2351
2628
|
end
|
2629
|
+
|
2630
|
+
# (char *)42 => new global var
|
2631
|
+
if not ce.op and ce.type.pointer? and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr.kind_of?(::Integer)
|
2632
|
+
ce.rexpr = new_global_var(ce.rexpr.rexpr, ce.type, scope) || ce.rexpr
|
2633
|
+
end
|
2634
|
+
|
2352
2635
|
# (a > 0) != 0
|
2353
|
-
if ce.op == :'!=' and ce.rexpr.kind_of?
|
2354
|
-
[:<, :<=, :>, :>=, :'==', :'!=', :'!'].include?
|
2636
|
+
if ce.op == :'!=' and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr == 0 and ce.lexpr.kind_of?(C::CExpression) and
|
2637
|
+
[:<, :<=, :>, :>=, :'==', :'!=', :'!'].include?(ce.lexpr.op)
|
2355
2638
|
ce.replace ce.lexpr
|
2356
2639
|
end
|
2357
2640
|
# a == 0 => !a
|
2358
|
-
if ce.op == :== and ce.rexpr.kind_of?
|
2641
|
+
if ce.op == :== and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr == 0
|
2359
2642
|
ce.replace C::CExpression[:'!', ce.lexpr]
|
2360
2643
|
end
|
2361
2644
|
# !(int)a => !a
|
2362
|
-
if ce.op == :'!' and ce.rexpr.kind_of?
|
2645
|
+
if ce.op == :'!' and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr.kind_of?(C::CExpression)
|
2363
2646
|
ce.rexpr = ce.rexpr.rexpr
|
2364
2647
|
end
|
2365
2648
|
# (int)a < (int)b => a < b TODO uint <-> int
|
2366
|
-
if [:<, :<=, :>, :>=].include?
|
2367
|
-
ce.rexpr.rexpr.kind_of?
|
2649
|
+
if [:<, :<=, :>, :>=].include?(ce.op) and ce.rexpr.kind_of?(C::CExpression) and ce.lexpr.kind_of?(C::CExpression) and not ce.rexpr.op and not ce.lexpr.op and
|
2650
|
+
ce.rexpr.rexpr.kind_of?(C::CExpression) and ce.rexpr.rexpr.type.pointer? and ce.lexpr.rexpr.kind_of?(C::CExpression) and ce.lexpr.rexpr.type.pointer?
|
2368
2651
|
ce.rexpr = ce.rexpr.rexpr
|
2369
2652
|
ce.lexpr = ce.lexpr.rexpr
|
2370
2653
|
end
|
2371
2654
|
|
2372
2655
|
# a & 3 & 1
|
2373
|
-
while (ce.op == :& or ce.op == :|) and ce.rexpr.kind_of?
|
2374
|
-
ce.lexpr.kind_of?
|
2375
|
-
ce.lexpr.rexpr.kind_of?
|
2656
|
+
while (ce.op == :& or ce.op == :|) and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr.kind_of?(::Integer) and
|
2657
|
+
ce.lexpr.kind_of?(C::CExpression) and ce.lexpr.op == ce.op and ce.lexpr.lexpr and
|
2658
|
+
ce.lexpr.rexpr.kind_of?(C::CExpression) and ce.lexpr.rexpr.rexpr.kind_of?(::Integer)
|
2376
2659
|
ce.lexpr, ce.rexpr.rexpr = ce.lexpr.lexpr, ce.lexpr.rexpr.rexpr.send(ce.op, ce.rexpr.rexpr)
|
2377
2660
|
end
|
2378
2661
|
|
2379
2662
|
# x = x | 4 => x |= 4
|
2380
|
-
if ce.op == :'=' and ce.rexpr.kind_of?
|
2663
|
+
if ce.op == :'=' and ce.rexpr.kind_of?(C::CExpression) and [:+, :-, :*, :/, :|, :&, :^, :>>, :<<].include?(ce.rexpr.op) and ce.rexpr.lexpr == ce.lexpr
|
2381
2664
|
ce.op = (ce.rexpr.op.to_s + '=').to_sym
|
2382
2665
|
ce.rexpr = ce.rexpr.rexpr
|
2383
2666
|
end
|
2384
2667
|
|
2668
|
+
# x = 4 | x => x |= 4
|
2669
|
+
if ce.op == :'=' and ce.rexpr.kind_of?(C::CExpression) and [:+, :*, :|, :&, :^].include?(ce.rexpr.op) and ce.rexpr.rexpr == ce.lexpr
|
2670
|
+
ce.op = (ce.rexpr.op.to_s + '=').to_sym
|
2671
|
+
ce.rexpr = ce.rexpr.lexpr
|
2672
|
+
end
|
2673
|
+
|
2385
2674
|
# x += 1 => ++x
|
2386
|
-
if (ce.op == :'+=' or ce.op == :'-=') and ce.rexpr.kind_of?
|
2387
|
-
|
2675
|
+
if (ce.op == :'+=' or ce.op == :'-=') and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and (ce.rexpr.rexpr == 1 or ce.rexpr.rexpr == -1)
|
2676
|
+
if ce.rexpr.rexpr == 1
|
2677
|
+
ce.op = {:'+=' => :'++', :'-=' => :'--'}[ce.op]
|
2678
|
+
else
|
2679
|
+
ce.op = {:'+=' => :'--', :'-=' => :'++'}[ce.op]
|
2680
|
+
end
|
2681
|
+
ce.rexpr = ce.lexpr
|
2682
|
+
ce.lexpr = nil
|
2388
2683
|
end
|
2389
2684
|
|
2390
2685
|
# --x+1 => x--
|
2391
|
-
if (ce.op == :+ or ce.op == :-) and ce.lexpr.kind_of?
|
2392
|
-
ce.lexpr.rexpr and ce.rexpr.kind_of?
|
2686
|
+
if (ce.op == :+ or ce.op == :-) and ce.lexpr.kind_of?(C::CExpression) and ce.lexpr.op == {:+ => :'--', :- => :'++'}[ce.op] and
|
2687
|
+
ce.lexpr.rexpr and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr == 1
|
2393
2688
|
ce.lexpr, ce.op, ce.rexpr = ce.lexpr.rexpr, ce.lexpr.op, nil
|
2394
2689
|
end
|
2690
|
+
|
2691
|
+
# 1+2 => 3
|
2692
|
+
if ce.lexpr.kind_of?(C::CExpression) and not ce.lexpr.op and ce.lexpr.rexpr.kind_of?(::Integer) and
|
2693
|
+
ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr.kind_of?(::Integer) and
|
2694
|
+
[:+, :-, :*, :/, :|, :&, :^, :>, :'==', :'!=', :<, :>=, :<=].include?(ce.op)
|
2695
|
+
ce.rexpr = ce.reduce(@c_parser)
|
2696
|
+
ce.lexpr = ce.op = nil
|
2697
|
+
end
|
2395
2698
|
}
|
2396
2699
|
end
|
2397
2700
|
|
@@ -2399,14 +2702,14 @@ class Decompiler
|
|
2399
2702
|
used = {}
|
2400
2703
|
walk_ce(scope) { |ce|
|
2401
2704
|
# remove unreferenced local vars
|
2402
|
-
used[ce.rexpr.name] = true if ce.rexpr.kind_of?
|
2403
|
-
used[ce.lexpr.name] = true if ce.lexpr.kind_of?
|
2404
|
-
ce.rexpr.each { |v| used[v.name] = true if v.kind_of?
|
2705
|
+
used[ce.rexpr.name] = true if ce.rexpr.kind_of?(C::Variable)
|
2706
|
+
used[ce.lexpr.name] = true if ce.lexpr.kind_of?(C::Variable)
|
2707
|
+
ce.rexpr.each { |v| used[v.name] = true if v.kind_of?(C::Variable) } if ce.rexpr.kind_of?(::Array)
|
2405
2708
|
}
|
2406
2709
|
unused = scope.symbol.keys.find_all { |n| not used[n] }
|
2407
2710
|
unused.each { |v| scope.symbol[v].add_attribute 'unused' } # fastcall args need it
|
2408
|
-
scope.statements.delete_if { |sm| sm.kind_of?
|
2409
|
-
scope.symbol.delete_if { |n, v| unused.include?
|
2711
|
+
scope.statements.delete_if { |sm| sm.kind_of?(C::Declaration) and unused.include?(sm.var.name) }
|
2712
|
+
scope.symbol.delete_if { |n, v| unused.include?(n) }
|
2410
2713
|
end
|
2411
2714
|
|
2412
2715
|
def finalize
|
@@ -2417,17 +2720,17 @@ class Decompiler
|
|
2417
2720
|
def optimize_global
|
2418
2721
|
# check all global vars (pointers to global data)
|
2419
2722
|
tl = @c_parser.toplevel
|
2420
|
-
vars = tl.symbol.keys.find_all { |k| tl.symbol[k].kind_of?
|
2723
|
+
vars = tl.symbol.keys.find_all { |k| tl.symbol[k].kind_of?(C::Variable) and not tl.symbol[k].type.kind_of?(C::Function) and not tl.symbol[k].storage == :extern and not tl.symbol[k].storage == :static }
|
2421
2724
|
countref = Hash.new(0)
|
2422
2725
|
|
2423
2726
|
walk_ce(tl) { |ce|
|
2424
2727
|
# XXX int foo; void bar() { int foo; } => false negative
|
2425
|
-
countref[ce.rexpr.name] += 1 if ce.rexpr.kind_of?
|
2426
|
-
countref[ce.lexpr.name] += 1 if ce.lexpr.kind_of?
|
2728
|
+
countref[ce.rexpr.name] += 1 if ce.rexpr.kind_of?(C::Variable)
|
2729
|
+
countref[ce.lexpr.name] += 1 if ce.lexpr.kind_of?(C::Variable)
|
2427
2730
|
}
|
2428
2731
|
|
2429
2732
|
vars.delete_if { |v| countref[v] == 0 }
|
2430
|
-
countref.delete_if { |k, v| not vars.include?
|
2733
|
+
countref.delete_if { |k, v| not vars.include?(k) }
|
2431
2734
|
|
2432
2735
|
# by default globals are C::Arrays
|
2433
2736
|
# if all references are *foo, dereference the var type
|
@@ -2441,7 +2744,7 @@ class Decompiler
|
|
2441
2744
|
else next
|
2442
2745
|
end
|
2443
2746
|
# compare type.type cause var is an Array and the cast is a Pointer
|
2444
|
-
countderef[r.rexpr.name] += 1 if r.kind_of?
|
2747
|
+
countderef[r.rexpr.name] += 1 if r.kind_of?(C::CExpression) and not r.op and r.rexpr.kind_of?(C::Variable) and
|
2445
2748
|
sizeof(nil, r.type.type) == sizeof(nil, r.rexpr.type.type) rescue nil
|
2446
2749
|
}
|
2447
2750
|
vars.each { |n|
|
@@ -2449,7 +2752,7 @@ class Decompiler
|
|
2449
2752
|
v = tl.symbol[n]
|
2450
2753
|
target = C::CExpression[:*, [v]]
|
2451
2754
|
v.type = v.type.type
|
2452
|
-
v.initializer = v.initializer.first if v.initializer.kind_of?
|
2755
|
+
v.initializer = v.initializer.first if v.initializer.kind_of?(::Array)
|
2453
2756
|
walk_ce(tl) { |ce|
|
2454
2757
|
if ce.op == :'->' and C::CExpression[ce.lexpr] == C::CExpression[v]
|
2455
2758
|
ce.op = :'.'
|
@@ -2464,28 +2767,28 @@ class Decompiler
|
|
2464
2767
|
|
2465
2768
|
# if a global var appears only in one function, make it a static variable
|
2466
2769
|
tl.statements.each { |st|
|
2467
|
-
next if not st.kind_of?
|
2770
|
+
next if not st.kind_of?(C::Declaration) or not st.var.type.kind_of?(C::Function) or not scope = st.var.initializer
|
2468
2771
|
localcountref = Hash.new(0)
|
2469
2772
|
walk_ce(scope) { |ce|
|
2470
|
-
localcountref[ce.rexpr.name] += 1 if ce.rexpr.kind_of?
|
2471
|
-
localcountref[ce.lexpr.name] += 1 if ce.lexpr.kind_of?
|
2773
|
+
localcountref[ce.rexpr.name] += 1 if ce.rexpr.kind_of?(C::Variable)
|
2774
|
+
localcountref[ce.lexpr.name] += 1 if ce.lexpr.kind_of?(C::Variable)
|
2472
2775
|
}
|
2473
2776
|
|
2474
2777
|
vars.delete_if { |n|
|
2475
2778
|
next if scope.symbol[n]
|
2476
2779
|
next if localcountref[n] != countref[n]
|
2477
2780
|
v = tl.symbol.delete(n)
|
2478
|
-
tl.statements.delete_if { |d| d.kind_of?
|
2781
|
+
tl.statements.delete_if { |d| d.kind_of?(C::Declaration) and d.var.name == n }
|
2479
2782
|
|
2480
|
-
if countref[n] == 1 and v.initializer.kind_of?
|
2783
|
+
if countref[n] == 1 and v.initializer.kind_of?(C::CExpression) and v.initializer.rexpr.kind_of?(String)
|
2481
2784
|
walk_ce(scope) { |ce|
|
2482
|
-
if ce.rexpr.kind_of?
|
2785
|
+
if ce.rexpr.kind_of?(C::Variable) and ce.rexpr.name == n
|
2483
2786
|
if not ce.op
|
2484
2787
|
ce.replace v.initializer
|
2485
2788
|
else
|
2486
2789
|
ce.rexpr = v.initializer
|
2487
2790
|
end
|
2488
|
-
elsif ce.lexpr.kind_of?
|
2791
|
+
elsif ce.lexpr.kind_of?(C::Variable) and ce.lexpr.name == n
|
2489
2792
|
ce.lexpr = v.initializer
|
2490
2793
|
end
|
2491
2794
|
}
|
@@ -2507,7 +2810,7 @@ class Decompiler
|
|
2507
2810
|
args = func.type.args
|
2508
2811
|
decl = []
|
2509
2812
|
scope.statements.delete_if { |sm|
|
2510
|
-
next if not sm.kind_of?
|
2813
|
+
next if not sm.kind_of?(C::Declaration)
|
2511
2814
|
if sm.var.stackoff.to_i > 0 and sm.var.name !~ /_a(\d+)$/ # aliased vars: use 1st domain only
|
2512
2815
|
args << sm.var
|
2513
2816
|
else
|
@@ -2520,8 +2823,8 @@ class Decompiler
|
|
2520
2823
|
# XXX a = 1 ; b = a ; a = 2
|
2521
2824
|
go = true # break from delete_if does not delete..
|
2522
2825
|
scope.statements.delete_if { |st|
|
2523
|
-
if go and st.kind_of?
|
2524
|
-
st.rexpr.rexpr.kind_of?
|
2826
|
+
if go and st.kind_of?(C::CExpression) and st.op == :'=' and st.rexpr.kind_of?(C::CExpression) and not st.rexpr.op and
|
2827
|
+
st.rexpr.rexpr.kind_of?(::Integer) and st.lexpr.kind_of?(C::Variable) and scope.symbol[st.lexpr.name]
|
2525
2828
|
st.lexpr.initializer = st.rexpr
|
2526
2829
|
else
|
2527
2830
|
go = false
|
@@ -2542,7 +2845,7 @@ class Decompiler
|
|
2542
2845
|
end
|
2543
2846
|
while curoff > argoff
|
2544
2847
|
wantarg = C::Variable.new
|
2545
|
-
wantarg.name =
|
2848
|
+
wantarg.name = stackoff_to_varname(argoff)
|
2546
2849
|
wantarg.type = C::BaseType.new(:int)
|
2547
2850
|
wantarg.attributes = ['unused']
|
2548
2851
|
func.type.args << wantarg
|
@@ -2552,6 +2855,18 @@ class Decompiler
|
|
2552
2855
|
func.type.args << a
|
2553
2856
|
argoff += @c_parser.typesize[:ptr]
|
2554
2857
|
}
|
2858
|
+
|
2859
|
+
# use user-supplied names
|
2860
|
+
scope.symbol.keys.each { |s|
|
2861
|
+
v = scope.symbol[s]
|
2862
|
+
next if not v.kind_of?(C::Variable)
|
2863
|
+
v.misc ||= {}
|
2864
|
+
uan = v.misc[:unalias_name] ||= s
|
2865
|
+
if newname = scope.decompdata[:unalias_name][uan] and newname != s
|
2866
|
+
v.name = newname
|
2867
|
+
scope.symbol[newname] = scope.symbol.delete(s)
|
2868
|
+
end
|
2869
|
+
}
|
2555
2870
|
end
|
2556
2871
|
|
2557
2872
|
# rename local variables from subfunc arg names
|
@@ -2563,12 +2878,12 @@ class Decompiler
|
|
2563
2878
|
walk_ce(scope) { |ce|
|
2564
2879
|
funcs << ce if ce.op == :funcall
|
2565
2880
|
cntrs << (ce.lexpr || ce.rexpr) if ce.op == :'++'
|
2566
|
-
cmpi << ce.lexpr if [:<, :>, :<=, :>=, :==, :'!='].include?
|
2881
|
+
cmpi << ce.lexpr if [:<, :>, :<=, :>=, :==, :'!='].include?(ce.op) and ce.rexpr.kind_of?(C::CExpression) and ce.rexpr.rexpr.kind_of?(::Integer)
|
2567
2882
|
}
|
2568
2883
|
|
2569
2884
|
rename = lambda { |var, name|
|
2570
|
-
var = var.rexpr if var.kind_of?
|
2571
|
-
next if not var.kind_of?
|
2885
|
+
var = var.rexpr if var.kind_of?(C::CExpression) and not var.op
|
2886
|
+
next if not var.kind_of?(C::Variable) or not scope.symbol[var.name] or not name
|
2572
2887
|
next if (var.name !~ /^(var|arg)_/ and not var.storage == :register) or not scope.symbol[var.name] or name =~ /^(var|arg)_/
|
2573
2888
|
s = scope.symbol_ancestors
|
2574
2889
|
n = name
|
@@ -2579,13 +2894,13 @@ class Decompiler
|
|
2579
2894
|
}
|
2580
2895
|
|
2581
2896
|
funcs.each { |ce|
|
2582
|
-
next if not ce.lexpr.kind_of?
|
2897
|
+
next if not ce.lexpr.kind_of?(C::Variable) or not ce.lexpr.type.kind_of?(C::Function)
|
2583
2898
|
ce.rexpr.to_a.zip(ce.lexpr.type.args.to_a).each { |a, fa| rename[a, fa.name] if fa }
|
2584
2899
|
}
|
2585
2900
|
funcs.each { |ce|
|
2586
|
-
next if not ce.lexpr.kind_of?
|
2901
|
+
next if not ce.lexpr.kind_of?(C::Variable) or not ce.lexpr.type.kind_of?(C::Function)
|
2587
2902
|
ce.rexpr.to_a.zip(ce.lexpr.type.args.to_a).each { |a, fa|
|
2588
|
-
next if not a.kind_of?
|
2903
|
+
next if not a.kind_of?(C::CExpression) or a.op != :& or a.lexpr
|
2589
2904
|
next if not fa or not fa.name
|
2590
2905
|
rename[a.rexpr, fa.name.sub(/^l?p/, '')]
|
2591
2906
|
}
|
@@ -2624,34 +2939,46 @@ class Decompiler
|
|
2624
2939
|
end
|
2625
2940
|
|
2626
2941
|
# yields each statement (recursive)
|
2627
|
-
|
2942
|
+
# replace the element by the block return value if patch is true
|
2943
|
+
def walk(scope, post=false, patch=false, &b)
|
2628
2944
|
case scope
|
2629
|
-
when ::Array
|
2945
|
+
when ::Array
|
2946
|
+
scope.each_with_index { |s, i|
|
2947
|
+
v = walk(s, post, patch, &b)
|
2948
|
+
scope[i] = v if patch and v
|
2949
|
+
}
|
2950
|
+
nil
|
2630
2951
|
when C::Statement
|
2631
|
-
yield scope if not post
|
2952
|
+
v = yield scope if not post
|
2632
2953
|
case scope
|
2633
|
-
when C::Block
|
2954
|
+
when C::Block
|
2955
|
+
walk(scope.statements, post, patch, &b)
|
2634
2956
|
when C::If
|
2635
|
-
yield scope.test
|
2636
|
-
|
2637
|
-
walk(scope.
|
2957
|
+
vv = yield scope.test
|
2958
|
+
scope.test = vv if patch and vv
|
2959
|
+
walk(scope.bthen, post, patch, &b)
|
2960
|
+
walk(scope.belse, post, patch, &b) if scope.belse
|
2638
2961
|
when C::While, C::DoWhile
|
2639
|
-
yield scope.test
|
2640
|
-
|
2962
|
+
vv = yield scope.test
|
2963
|
+
scope.test = vv if patch and vv
|
2964
|
+
walk(scope.body, post, patch, &b)
|
2641
2965
|
when C::Return
|
2642
|
-
yield scope.value
|
2966
|
+
vv = yield scope.value
|
2967
|
+
scope.value = vv if patch and vv
|
2643
2968
|
end
|
2644
|
-
yield scope if post
|
2969
|
+
v = yield scope if post
|
2970
|
+
v
|
2645
2971
|
when C::Declaration
|
2646
|
-
walk(scope.var.initializer, post, &b) if scope.var.initializer
|
2972
|
+
walk(scope.var.initializer, post, patch, &b) if scope.var.initializer
|
2973
|
+
nil
|
2647
2974
|
end
|
2648
2975
|
end
|
2649
2976
|
|
2650
2977
|
# forwards to @c_parser, handles cast to Array (these should not happen btw...)
|
2651
2978
|
def sizeof(var, type=nil)
|
2652
|
-
var, type = nil, var if var.kind_of?
|
2979
|
+
var, type = nil, var if var.kind_of?(C::Type) and not type
|
2653
2980
|
type ||= var.type
|
2654
|
-
return @c_parser.typesize[:ptr] if type.kind_of?
|
2981
|
+
return @c_parser.typesize[:ptr] if type.kind_of?(C::Array) and not var.kind_of?(C::Variable)
|
2655
2982
|
@c_parser.sizeof(var, type) rescue -1
|
2656
2983
|
end
|
2657
2984
|
end
|