metasm 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (192) hide show
  1. data/BUGS +11 -0
  2. data/CREDITS +17 -0
  3. data/README +270 -0
  4. data/TODO +114 -0
  5. data/doc/code_organisation.txt +146 -0
  6. data/doc/const_missing.txt +16 -0
  7. data/doc/core_classes.txt +75 -0
  8. data/doc/feature_list.txt +53 -0
  9. data/doc/index.txt +59 -0
  10. data/doc/install_notes.txt +170 -0
  11. data/doc/style.css +3 -0
  12. data/doc/use_cases.txt +18 -0
  13. data/lib/metasm.rb +80 -0
  14. data/lib/metasm/arm.rb +12 -0
  15. data/lib/metasm/arm/debug.rb +39 -0
  16. data/lib/metasm/arm/decode.rb +167 -0
  17. data/lib/metasm/arm/encode.rb +77 -0
  18. data/lib/metasm/arm/main.rb +75 -0
  19. data/lib/metasm/arm/opcodes.rb +177 -0
  20. data/lib/metasm/arm/parse.rb +130 -0
  21. data/lib/metasm/arm/render.rb +55 -0
  22. data/lib/metasm/compile_c.rb +1457 -0
  23. data/lib/metasm/dalvik.rb +8 -0
  24. data/lib/metasm/dalvik/decode.rb +196 -0
  25. data/lib/metasm/dalvik/main.rb +60 -0
  26. data/lib/metasm/dalvik/opcodes.rb +366 -0
  27. data/lib/metasm/decode.rb +213 -0
  28. data/lib/metasm/decompile.rb +2659 -0
  29. data/lib/metasm/disassemble.rb +2068 -0
  30. data/lib/metasm/disassemble_api.rb +1280 -0
  31. data/lib/metasm/dynldr.rb +1329 -0
  32. data/lib/metasm/encode.rb +333 -0
  33. data/lib/metasm/exe_format/a_out.rb +194 -0
  34. data/lib/metasm/exe_format/autoexe.rb +82 -0
  35. data/lib/metasm/exe_format/bflt.rb +189 -0
  36. data/lib/metasm/exe_format/coff.rb +455 -0
  37. data/lib/metasm/exe_format/coff_decode.rb +901 -0
  38. data/lib/metasm/exe_format/coff_encode.rb +1078 -0
  39. data/lib/metasm/exe_format/dex.rb +457 -0
  40. data/lib/metasm/exe_format/dol.rb +145 -0
  41. data/lib/metasm/exe_format/elf.rb +923 -0
  42. data/lib/metasm/exe_format/elf_decode.rb +979 -0
  43. data/lib/metasm/exe_format/elf_encode.rb +1375 -0
  44. data/lib/metasm/exe_format/macho.rb +827 -0
  45. data/lib/metasm/exe_format/main.rb +228 -0
  46. data/lib/metasm/exe_format/mz.rb +164 -0
  47. data/lib/metasm/exe_format/nds.rb +172 -0
  48. data/lib/metasm/exe_format/pe.rb +437 -0
  49. data/lib/metasm/exe_format/serialstruct.rb +246 -0
  50. data/lib/metasm/exe_format/shellcode.rb +114 -0
  51. data/lib/metasm/exe_format/xcoff.rb +167 -0
  52. data/lib/metasm/gui.rb +23 -0
  53. data/lib/metasm/gui/cstruct.rb +373 -0
  54. data/lib/metasm/gui/dasm_coverage.rb +199 -0
  55. data/lib/metasm/gui/dasm_decomp.rb +369 -0
  56. data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
  57. data/lib/metasm/gui/dasm_graph.rb +1354 -0
  58. data/lib/metasm/gui/dasm_hex.rb +543 -0
  59. data/lib/metasm/gui/dasm_listing.rb +599 -0
  60. data/lib/metasm/gui/dasm_main.rb +906 -0
  61. data/lib/metasm/gui/dasm_opcodes.rb +291 -0
  62. data/lib/metasm/gui/debug.rb +1228 -0
  63. data/lib/metasm/gui/gtk.rb +884 -0
  64. data/lib/metasm/gui/qt.rb +495 -0
  65. data/lib/metasm/gui/win32.rb +3004 -0
  66. data/lib/metasm/gui/x11.rb +621 -0
  67. data/lib/metasm/ia32.rb +14 -0
  68. data/lib/metasm/ia32/compile_c.rb +1523 -0
  69. data/lib/metasm/ia32/debug.rb +193 -0
  70. data/lib/metasm/ia32/decode.rb +1167 -0
  71. data/lib/metasm/ia32/decompile.rb +564 -0
  72. data/lib/metasm/ia32/encode.rb +314 -0
  73. data/lib/metasm/ia32/main.rb +233 -0
  74. data/lib/metasm/ia32/opcodes.rb +872 -0
  75. data/lib/metasm/ia32/parse.rb +327 -0
  76. data/lib/metasm/ia32/render.rb +91 -0
  77. data/lib/metasm/main.rb +1193 -0
  78. data/lib/metasm/mips.rb +11 -0
  79. data/lib/metasm/mips/compile_c.rb +7 -0
  80. data/lib/metasm/mips/decode.rb +253 -0
  81. data/lib/metasm/mips/encode.rb +51 -0
  82. data/lib/metasm/mips/main.rb +72 -0
  83. data/lib/metasm/mips/opcodes.rb +443 -0
  84. data/lib/metasm/mips/parse.rb +51 -0
  85. data/lib/metasm/mips/render.rb +43 -0
  86. data/lib/metasm/os/gnu_exports.rb +270 -0
  87. data/lib/metasm/os/linux.rb +1112 -0
  88. data/lib/metasm/os/main.rb +1686 -0
  89. data/lib/metasm/os/remote.rb +527 -0
  90. data/lib/metasm/os/windows.rb +2027 -0
  91. data/lib/metasm/os/windows_exports.rb +745 -0
  92. data/lib/metasm/parse.rb +876 -0
  93. data/lib/metasm/parse_c.rb +3938 -0
  94. data/lib/metasm/pic16c/decode.rb +42 -0
  95. data/lib/metasm/pic16c/main.rb +17 -0
  96. data/lib/metasm/pic16c/opcodes.rb +68 -0
  97. data/lib/metasm/ppc.rb +11 -0
  98. data/lib/metasm/ppc/decode.rb +264 -0
  99. data/lib/metasm/ppc/decompile.rb +251 -0
  100. data/lib/metasm/ppc/encode.rb +51 -0
  101. data/lib/metasm/ppc/main.rb +129 -0
  102. data/lib/metasm/ppc/opcodes.rb +410 -0
  103. data/lib/metasm/ppc/parse.rb +52 -0
  104. data/lib/metasm/preprocessor.rb +1277 -0
  105. data/lib/metasm/render.rb +130 -0
  106. data/lib/metasm/sh4.rb +8 -0
  107. data/lib/metasm/sh4/decode.rb +336 -0
  108. data/lib/metasm/sh4/main.rb +292 -0
  109. data/lib/metasm/sh4/opcodes.rb +381 -0
  110. data/lib/metasm/x86_64.rb +12 -0
  111. data/lib/metasm/x86_64/compile_c.rb +1025 -0
  112. data/lib/metasm/x86_64/debug.rb +59 -0
  113. data/lib/metasm/x86_64/decode.rb +268 -0
  114. data/lib/metasm/x86_64/encode.rb +264 -0
  115. data/lib/metasm/x86_64/main.rb +135 -0
  116. data/lib/metasm/x86_64/opcodes.rb +118 -0
  117. data/lib/metasm/x86_64/parse.rb +68 -0
  118. data/misc/bottleneck.rb +61 -0
  119. data/misc/cheader-findpppath.rb +58 -0
  120. data/misc/hexdiff.rb +74 -0
  121. data/misc/hexdump.rb +55 -0
  122. data/misc/metasm-all.rb +13 -0
  123. data/misc/objdiff.rb +47 -0
  124. data/misc/objscan.rb +40 -0
  125. data/misc/pdfparse.rb +661 -0
  126. data/misc/ppc_pdf2oplist.rb +192 -0
  127. data/misc/tcp_proxy_hex.rb +84 -0
  128. data/misc/txt2html.rb +440 -0
  129. data/samples/a.out.rb +31 -0
  130. data/samples/asmsyntax.rb +77 -0
  131. data/samples/bindiff.rb +555 -0
  132. data/samples/compilation-steps.rb +49 -0
  133. data/samples/cparser_makestackoffset.rb +55 -0
  134. data/samples/dasm-backtrack.rb +38 -0
  135. data/samples/dasmnavig.rb +318 -0
  136. data/samples/dbg-apihook.rb +228 -0
  137. data/samples/dbghelp.rb +143 -0
  138. data/samples/disassemble-gui.rb +102 -0
  139. data/samples/disassemble.rb +133 -0
  140. data/samples/dump_upx.rb +95 -0
  141. data/samples/dynamic_ruby.rb +1929 -0
  142. data/samples/elf_list_needed.rb +46 -0
  143. data/samples/elf_listexports.rb +33 -0
  144. data/samples/elfencode.rb +25 -0
  145. data/samples/exeencode.rb +128 -0
  146. data/samples/factorize-headers-elfimports.rb +77 -0
  147. data/samples/factorize-headers-peimports.rb +109 -0
  148. data/samples/factorize-headers.rb +43 -0
  149. data/samples/gdbclient.rb +583 -0
  150. data/samples/generate_libsigs.rb +102 -0
  151. data/samples/hotfix_gtk_dbg.rb +59 -0
  152. data/samples/install_win_env.rb +78 -0
  153. data/samples/lindebug.rb +924 -0
  154. data/samples/linux_injectsyscall.rb +95 -0
  155. data/samples/machoencode.rb +31 -0
  156. data/samples/metasm-shell.rb +91 -0
  157. data/samples/pe-hook.rb +69 -0
  158. data/samples/pe-ia32-cpuid.rb +203 -0
  159. data/samples/pe-mips.rb +35 -0
  160. data/samples/pe-shutdown.rb +78 -0
  161. data/samples/pe-testrelocs.rb +51 -0
  162. data/samples/pe-testrsrc.rb +24 -0
  163. data/samples/pe_listexports.rb +31 -0
  164. data/samples/peencode.rb +19 -0
  165. data/samples/peldr.rb +494 -0
  166. data/samples/preprocess-flatten.rb +19 -0
  167. data/samples/r0trace.rb +308 -0
  168. data/samples/rubstop.rb +399 -0
  169. data/samples/scan_pt_gnu_stack.rb +54 -0
  170. data/samples/scanpeexports.rb +62 -0
  171. data/samples/shellcode-c.rb +40 -0
  172. data/samples/shellcode-dynlink.rb +146 -0
  173. data/samples/source.asm +34 -0
  174. data/samples/struct_offset.rb +47 -0
  175. data/samples/testpe.rb +32 -0
  176. data/samples/testraw.rb +45 -0
  177. data/samples/win32genloader.rb +132 -0
  178. data/samples/win32hooker-advanced.rb +169 -0
  179. data/samples/win32hooker.rb +96 -0
  180. data/samples/win32livedasm.rb +33 -0
  181. data/samples/win32remotescan.rb +133 -0
  182. data/samples/wintrace.rb +92 -0
  183. data/tests/all.rb +8 -0
  184. data/tests/dasm.rb +39 -0
  185. data/tests/dynldr.rb +35 -0
  186. data/tests/encodeddata.rb +132 -0
  187. data/tests/ia32.rb +82 -0
  188. data/tests/mips.rb +116 -0
  189. data/tests/parse_c.rb +239 -0
  190. data/tests/preprocessor.rb +269 -0
  191. data/tests/x86_64.rb +62 -0
  192. metadata +255 -0
@@ -0,0 +1,213 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/main'
8
+ require 'metasm/render'
9
+
10
+
11
+ module Metasm
12
+ # symbolic pointer dereference
13
+ # API similar to Expression
14
+ class Indirection < ExpressionType
15
+ # Expression (the pointer)
16
+ attr_accessor :target
17
+ alias pointer target
18
+ alias pointer= target=
19
+ # length in bytes of data referenced
20
+ attr_accessor :len
21
+ # address of the instruction who generated the indirection
22
+ attr_accessor :origin
23
+
24
+ def initialize(target, len, origin)
25
+ @target, @len, @origin = target, len, origin
26
+ end
27
+
28
+ def reduce_rec
29
+ ptr = Expression[@target.reduce]
30
+ (ptr == Expression::Unknown) ? ptr : Indirection.new(ptr, @len, @origin)
31
+ end
32
+
33
+ def bind(h)
34
+ h[self] || Indirection.new(@target.bind(h), @len, @origin)
35
+ end
36
+
37
+ def hash ; @target.hash^@len.to_i end
38
+ def eql?(o) o.class == self.class and [o.target, o.len] == [@target, @len] end
39
+ alias == eql?
40
+
41
+ include Renderable
42
+ def render
43
+ ret = []
44
+ qual = {1 => 'byte', 2 => 'word', 4 => 'dword', 8 => 'qword'}[len] || "_#{len*8}bits" if len
45
+ ret << "#{qual} ptr " if qual
46
+ ret << '[' << @target << ']'
47
+ end
48
+
49
+ # returns the complexity of the expression (number of externals +1 per indirection)
50
+ def complexity
51
+ 1+@target.complexity
52
+ end
53
+
54
+ def self.[](t, l, o=nil)
55
+ new(Expression[*t], l, o)
56
+ end
57
+
58
+ def inspect
59
+ "Indirection[#{@target.inspect.sub(/^Expression/, '')}, #{@len.inspect}#{', '+@origin.inspect if @origin}]"
60
+ end
61
+
62
+ def externals
63
+ @target.externals
64
+ end
65
+
66
+ def match_rec(target, vars)
67
+ return false if not target.kind_of? Indirection
68
+ t = target.target
69
+ if vars[t]
70
+ return false if @target != vars[t]
71
+ elsif vars.has_key? t
72
+ vars[t] = @target
73
+ elsif t.kind_of? ExpressionType
74
+ return false if not @target.match_rec(t, vars)
75
+ else
76
+ return false if targ != @target
77
+ end
78
+ if vars[target.len]
79
+ return false if @len != vars[target.len]
80
+ elsif vars.has_key? target.len
81
+ vars[target.len] = @len
82
+ else
83
+ return false if target.len != @len
84
+ end
85
+ vars
86
+ end
87
+ end
88
+
89
+ class Expression
90
+ # returns the complexity of the expression (number of externals +1 per indirection)
91
+ def complexity
92
+ case @lexpr
93
+ when ExpressionType; @lexpr.complexity
94
+ when nil, ::Numeric; 0
95
+ else 1
96
+ end +
97
+ case @rexpr
98
+ when ExpressionType; @rexpr.complexity
99
+ when nil, ::Numeric; 0
100
+ else 1
101
+ end
102
+ end
103
+
104
+ def expr_indirections
105
+ ret = case @lexpr
106
+ when Indirection; [@lexpr]
107
+ when ExpressionType; @lexpr.expr_indirections
108
+ else []
109
+ end
110
+ case @rexpr
111
+ when Indirection; ret << @rexpr
112
+ when ExpressionType; ret.concat @rexpr.expr_indirections
113
+ else ret
114
+ end
115
+ end
116
+ end
117
+
118
+ class EncodedData
119
+ # returns an ::Integer from self.ptr, advances ptr
120
+ # bytes from rawsize to virtsize = 0
121
+ # ignores self.relocations
122
+ def get_byte
123
+ @ptr += 1
124
+ if @ptr <= @data.length
125
+ b = @data[ptr-1]
126
+ b = b.unpack('C').first if b.kind_of? ::String # 1.9
127
+ b
128
+ elsif @ptr <= @virtsize
129
+ 0
130
+ end
131
+ end
132
+
133
+ # reads len bytes from self.data, advances ptr
134
+ # bytes from rawsize to virtsize are returned as zeroes
135
+ # ignores self.relocations
136
+ def read(len=@virtsize-@ptr)
137
+ len = @virtsize-@ptr if len > @virtsize-@ptr
138
+ str = (@ptr < @data.length) ? @data[@ptr, len] : ''
139
+ str = str.to_str.ljust(len, "\0") if str.length < len
140
+ @ptr += len
141
+ str
142
+ end
143
+
144
+ # decodes an immediate value from self.ptr, advances ptr
145
+ # returns an Expression on relocation, or an ::Integer
146
+ # if ptr has a relocation but the type/endianness does not match, the reloc is ignored and a warning is issued
147
+ # TODO arg type => sign+len
148
+ def decode_imm(type, endianness)
149
+ raise "invalid imm type #{type.inspect}" if not isz = Expression::INT_SIZE[type]
150
+ if rel = @reloc[@ptr]
151
+ if Expression::INT_SIZE[rel.type] == isz and rel.endianness == endianness
152
+ @ptr += rel.length
153
+ return rel.target
154
+ end
155
+ puts "W: Immediate type/endianness mismatch, ignoring relocation #{rel.target.inspect} (wanted #{type.inspect})" if $DEBUG
156
+ end
157
+ Expression.decode_imm(read(isz/8), type, endianness)
158
+ end
159
+ alias decode_immediate decode_imm
160
+ end
161
+
162
+ class Expression
163
+ # decodes an immediate from a raw binary string
164
+ # type may be a length in bytes, interpreted as unsigned, or an expression type (eg :u32)
165
+ # endianness is either an endianness or an object than responds to endianness
166
+ def self.decode_imm(str, type, endianness, off=0)
167
+ type = INT_SIZE.keys.find { |k| k.to_s[0] == ?a and INT_SIZE[k] == 8*type } if type.kind_of? ::Integer
168
+ endianness = endianness.endianness if not endianness.kind_of? ::Symbol
169
+ str = str[off, INT_SIZE[type]/8].to_s
170
+ str = str.reverse if endianness == :little
171
+ val = str.unpack('C*').inject(0) { |val_, b| (val_ << 8) | b }
172
+ val = make_signed(val, INT_SIZE[type]) if type.to_s[0] == ?i
173
+ val
174
+ end
175
+ class << self
176
+ alias decode_immediate decode_imm
177
+ end
178
+ end
179
+
180
+ class CPU
181
+ # decodes the instruction at edata.ptr, mapped at virtual address off
182
+ # returns a DecodedInstruction or nil
183
+ def decode_instruction(edata, addr)
184
+ @bin_lookaside ||= build_bin_lookaside
185
+ di = decode_findopcode edata
186
+ di.address = addr if di
187
+ di = decode_instr_op(edata, di) if di
188
+ decode_instr_interpret(di, addr) if di
189
+ end
190
+
191
+ # matches the binary opcode at edata.ptr
192
+ # returns di or nil
193
+ def decode_findopcode(edata)
194
+ DecodedInstruction.new self
195
+ end
196
+
197
+ # decodes di.instruction
198
+ # returns di or nil
199
+ def decode_instr_op(edata, di)
200
+ end
201
+
202
+ # may modify di.instruction.args for eg jump offset => absolute address
203
+ # returns di or nil
204
+ def decode_instr_interpret(di, addr)
205
+ di
206
+ end
207
+
208
+ # number of instructions following a jump that are still executed
209
+ def delay_slot(di=nil)
210
+ 0
211
+ end
212
+ end
213
+ end
@@ -0,0 +1,2659 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/main'
8
+ require 'metasm/decode'
9
+ require 'metasm/parse_c'
10
+
11
+ module Metasm
12
+ class C::Variable; attr_accessor :stackoff; end
13
+ class C::Block; attr_accessor :decompdata; end
14
+ class DecodedFunction; attr_accessor :decompdata; end
15
+
16
+ class CPU
17
+ def decompile_check_abi(dcmp, entry, func)
18
+ end
19
+ end
20
+
21
+ class Decompiler
22
+ # TODO add methods to C::CExpr
23
+ AssignOp = [:'=', :'+=', :'-=', :'*=', :'/=', :'%=', :'^=', :'&=', :'|=', :'>>=', :'<<=', :'++', :'--']
24
+
25
+ attr_accessor :dasm, :c_parser
26
+ attr_accessor :forbid_optimize_dataflow, :forbid_optimize_code, :forbid_decompile_ifwhile, :forbid_decompile_types, :forbid_optimize_labels
27
+ # recursive flag: for each subfunction, recurse is decremented, when 0 only the prototype is decompiled, when <0 nothing is done
28
+ attr_accessor :recurse
29
+
30
+ def initialize(dasm, cp = dasm.c_parser)
31
+ @dasm = dasm
32
+ @recurse = 1/0.0 # Infinity
33
+ @c_parser = cp || @dasm.cpu.new_cparser
34
+ end
35
+
36
+ # decompile recursively function from an entrypoint, then perform global optimisation (static vars, ...)
37
+ # should be called once after everything is decompiled (global optimizations may bring bad results otherwise)
38
+ # use decompile_func for incremental decompilation
39
+ # returns the c_parser
40
+ def decompile(*entry)
41
+ entry.each { |f| decompile_func(f) }
42
+ finalize
43
+ @c_parser
44
+ end
45
+
46
+ # decompile a function, decompiling subfunctions as needed
47
+ # may return :restart, which means that the decompilation should restart from the entrypoint (and bubble up) (eg a new codepath is found which may changes dependency in blocks etc)
48
+ def decompile_func(entry)
49
+ return if @recurse < 0
50
+ entry = @dasm.normalize entry
51
+ return if not @dasm.decoded[entry]
52
+
53
+ # create a new toplevel function to hold our code
54
+ func = C::Variable.new
55
+ func.name = @dasm.auto_label_at(entry, 'func')
56
+ if f = @dasm.function[entry] and f.decompdata and f.decompdata[:return_type]
57
+ rettype = f.decompdata[:return_type]
58
+ else
59
+ rettype = C::BaseType.new(:int)
60
+ end
61
+ func.type = C::Function.new rettype, []
62
+ if @c_parser.toplevel.symbol[func.name]
63
+ return if @recurse == 0
64
+ if not @c_parser.toplevel.statements.grep(C::Declaration).find { |decl| decl.var.name == func.name }
65
+ # recursive dependency: declare prototype
66
+ puts "function #{func.name} is recursive: predecompiling for prototype" if $VERBOSE
67
+ pre_recurse = @recurse
68
+ @recurse = 0
69
+ @c_parser.toplevel.symbol.delete func.name
70
+ decompile_func(entry)
71
+ @recurse = pre_recurse
72
+ if not dcl = @c_parser.toplevel.statements.grep(C::Declaration).find { |decl| decl.var.name == func.name }
73
+ @c_parser.toplevel.statements << C::Declaration.new(func)
74
+ end
75
+ end
76
+ return
77
+ end
78
+ @c_parser.toplevel.symbol[func.name] = func
79
+ puts "decompiling #{func.name}" if $VERBOSE
80
+
81
+ while catch(:restart) { do_decompile_func(entry, func) } == :restart
82
+ retval = :restart
83
+ end
84
+
85
+ @c_parser.toplevel.symbol[func.name] = func # recursive func prototype could have overwritten us
86
+ @c_parser.toplevel.statements << C::Declaration.new(func)
87
+
88
+ puts " decompiled #{func.name}" if $VERBOSE
89
+
90
+ retval
91
+ end
92
+
93
+ # calls decompile_func with recurse -= 1 (internal use)
94
+ def decompile_func_rec(entry)
95
+ @recurse -= 1
96
+ decompile_func(entry)
97
+ ensure
98
+ @recurse += 1
99
+ end
100
+
101
+ def do_decompile_func(entry, func)
102
+ # find decodedinstruction graph of the function, decompile subfuncs
103
+ myblocks = listblocks_func(entry)
104
+
105
+ # [esp+8] => [:frameptr-12]
106
+ makestackvars entry, myblocks.map { |b, to| @dasm.decoded[b].block }
107
+
108
+ # find registry dependencies between blocks
109
+ deps = @dasm.cpu.decompile_func_finddeps(self, myblocks, func)
110
+
111
+ scope = func.initializer = C::Block.new(@c_parser.toplevel)
112
+ if df = @dasm.function[entry]
113
+ scope.decompdata = df.decompdata ||= {:stackoff_type => {}, :stackoff_name => {}}
114
+ else
115
+ scope.decompdata ||= {:stackoff_type => {}, :stackoff_name => {}}
116
+ end
117
+
118
+ # di blocks => raw c statements, declare variables
119
+ @dasm.cpu.decompile_blocks(self, myblocks, deps, func)
120
+
121
+ simplify_goto(scope)
122
+ namestackvars(scope)
123
+ unalias_vars(scope, func)
124
+ decompile_c_types(scope)
125
+ optimize(scope)
126
+ remove_unreferenced_vars(scope)
127
+ cleanup_var_decl(scope, func)
128
+ if @recurse > 0
129
+ decompile_controlseq(scope)
130
+ optimize_vars(scope)
131
+ optimize_ctrl(scope)
132
+ optimize_vars(scope)
133
+ remove_unreferenced_vars(scope)
134
+ simplify_varname_noalias(scope)
135
+ rename_variables(scope)
136
+ end
137
+ @dasm.cpu.decompile_check_abi(self, entry, func)
138
+
139
+ case ret = scope.statements.last
140
+ when C::CExpression; puts "no return at end of func" if $VERBOSE
141
+ when C::Return
142
+ if not ret.value
143
+ scope.statements.pop
144
+ else
145
+ v = ret.value
146
+ v = v.rexpr if v.kind_of? C::CExpression and not v.op and v.rexpr.kind_of? C::Typed
147
+ func.type.type = v.type
148
+ end
149
+ end
150
+
151
+ if @recurse == 0
152
+ # we need only the prototype
153
+ func.initializer = nil
154
+ end
155
+ end
156
+
157
+ # redecompile a function, redecompiles functions calling it if its prototype changed
158
+ def redecompile(name)
159
+ @c_parser.toplevel.statements.delete_if { |st| st.kind_of? C::Declaration and st.var.name == name }
160
+ oldvar = @c_parser.toplevel.symbol.delete name
161
+
162
+ decompile_func(name)
163
+
164
+ if oldvar and newvar = @c_parser.toplevel.symbol[name] and oldvar.type.kind_of? C::Function and newvar.type.kind_of? C::Function
165
+ o, n = oldvar.type, newvar.type
166
+ if o.type != n.type or o.args.to_a.length != n.args.to_a.length or o.args.to_a.zip(n.args.to_a).find { |oa, na| oa.type != na.type }
167
+ # XXX a may depend on b and c, and b may depend on c -> redecompile c twice
168
+ # XXX if the dcmp is unstable, may also infinite loop on mutually recursive funcs..
169
+ @c_parser.toplevel.statements.dup.each { |st|
170
+ next if not st.kind_of? C::Declaration
171
+ next if not st.var.initializer
172
+ next if st.var.name == name
173
+ next if not walk_ce(st) { |ce| break true if ce.op == :funcall and ce.lexpr.kind_of? C::Variable and ce.lexpr.name == name }
174
+ redecompile(st.var.name)
175
+ }
176
+ end
177
+ end
178
+ end
179
+
180
+ def new_global_var(addr, type, scope=nil)
181
+ addr = @dasm.normalize(addr)
182
+
183
+ # (almost) NULL ptr
184
+ return if addr.kind_of? Fixnum and addr >= 0 and addr < 32
185
+
186
+ # check preceding structure we're hitting
187
+ # TODO check what we step over when defining a new static struct
188
+ 0x100.times { |i_|
189
+ next if not n = @dasm.get_label_at(addr-i_)
190
+ next if not v = @c_parser.toplevel.symbol[n]
191
+ next if not v.type.pointer? or not v.type.pointed.untypedef.kind_of? C::Union
192
+ break if i_ == 0 # XXX it crashes later if we dont break here
193
+ next if sizeof(v.type.pointed) <= i_
194
+ return structoffset(v.type.pointed.untypedef, C::CExpression[v], i_, nil)
195
+ }
196
+
197
+ ptype = type.pointed.untypedef if type.pointer?
198
+ if ptype.kind_of? C::Function
199
+ name = @dasm.auto_label_at(addr, 'sub', 'xref', 'byte', 'word', 'dword', 'unk')
200
+ if @dasm.get_section_at(addr) and @recurse > 0
201
+ puts "found function pointer to #{name}" if $VERBOSE
202
+ @dasm.disassemble(addr) if not @dasm.decoded[addr] # TODO disassemble_fast ?
203
+ f = @dasm.function[addr] ||= DecodedFunction.new
204
+ # TODO detect thunks (__noreturn)
205
+ f.decompdata ||= { :stackoff_type => {}, :stackoff_name => {} }
206
+ if not s = @c_parser.toplevel.symbol[name] or not s.initializer or not s.type.untypedef.kind_of? C::Function
207
+ os = @c_parser.toplevel.symbol.delete name
208
+ @c_parser.toplevel.statements.delete_if { |ts| ts.kind_of? C::Declaration and ts.var.name == name }
209
+ aoff = 1
210
+ ptype.args.to_a.each { |a|
211
+ aoff = (aoff + @c_parser.typesize[:ptr] - 1) / @c_parser.typesize[:ptr] * @c_parser.typesize[:ptr]
212
+ f.decompdata[:stackoff_type][aoff] ||= a.type
213
+ f.decompdata[:stackoff_name][aoff] ||= a.name if a.name
214
+ aoff += sizeof(a) # ary ?
215
+ }
216
+ decompile_func_rec(addr)
217
+ s = @c_parser.toplevel.symbol[name]
218
+ walk_ce([@c_parser.toplevel, scope]) { |ce|
219
+ ce.lexpr = s if ce.lexpr == os
220
+ ce.rexpr = s if ce.rexpr == os
221
+ } if os and s # update existing references to old instance
222
+ # else redecompile with new prototye ?
223
+ end
224
+ end
225
+ end
226
+
227
+ name = case (type.pointer? && tsz = sizeof(nil, ptype))
228
+ when 1; 'byte'
229
+ when 2; 'word'
230
+ when 4; 'dword'
231
+ else 'unk'
232
+ end
233
+ name = 'stru' if ptype.kind_of? C::Union
234
+ name = @dasm.auto_label_at(addr, name, 'xref', 'byte', 'word', 'dword', 'unk', 'stru')
235
+
236
+ if not var = @c_parser.toplevel.symbol[name]
237
+ var = C::Variable.new
238
+ var.name = name
239
+ var.type = type.pointer? ? C::Array.new(ptype) : type
240
+ @c_parser.toplevel.symbol[var.name] = var
241
+ @c_parser.toplevel.statements << C::Declaration.new(var)
242
+ end
243
+ if ptype.kind_of? C::Union and type.pointer? and s = @dasm.get_section_at(name) and s[0].ptr < s[0].length
244
+ # TODO struct init, array, fptrs..
245
+ elsif type.pointer? and not type.pointed.untypedef.kind_of? C::Function and s = @dasm.get_section_at(name) and s[0].ptr < s[0].length and
246
+ [1, 2, 4].include? tsz and (not var.type.pointer? or sizeof(var.type.pointed) != sizeof(type.pointed) or not var.initializer)
247
+ # TODO do not overlap other statics (but labels may refer to elements of the array...)
248
+ data = (0..256).map {
249
+ v = s[0].decode_imm("u#{tsz*8}".to_sym, @dasm.cpu.endianness)
250
+ v = decompile_cexpr(v, @c_parser.toplevel) if v.kind_of? Expression # relocation
251
+ v
252
+ }
253
+ var.initializer = data.map { |v| C::CExpression[v, C::BaseType.new(:int)] } unless (data - [0]).empty?
254
+ if (tsz == 1 or tsz == 2) and eos = data.index(0) and (0..3).all? { |i| data[i] >= 0x20 and data[i] < 0x7f } # printable str
255
+ # XXX 0x80 with ruby1.9...
256
+ var.initializer = C::CExpression[data[0, eos].pack('C*'), C::Pointer.new(ptype)] rescue nil
257
+ end
258
+ if var.initializer.kind_of? ::Array and i = var.initializer.first and i.kind_of? C::CExpression and not i.op and i.rexpr.kind_of? C::Variable and
259
+ i.rexpr.type.kind_of? C::Function and not @dasm.get_section_at(@dasm.normalize(i.rexpr.name)) # iat_ExternalFunc
260
+ i.type = i.rexpr.type
261
+ type = var.type = C::Array.new(C::Pointer.new(i.type))
262
+ var.initializer = [i]
263
+ end
264
+ var.initializer = nil if var.initializer.kind_of? ::Array and not type.untypedef.kind_of? C::Array
265
+ end
266
+
267
+ # TODO patch existing references to addr ? (or would they have already triggered new_global_var?)
268
+
269
+ # return the object to use to replace the raw addr
270
+ var
271
+ end
272
+
273
+ # return an array of [address of block start, list of block to]]
274
+ # decompile subfunctions
275
+ def listblocks_func(entry)
276
+ @autofuncs ||= []
277
+ blocks = []
278
+ entry = dasm.normalize entry
279
+ todo = [entry]
280
+ while a = todo.pop
281
+ next if blocks.find { |aa, at| aa == a }
282
+ next if not di = @dasm.di_at(a)
283
+ blocks << [a, []]
284
+ di.block.each_to { |ta, type|
285
+ next if type == :indirect
286
+ ta = dasm.normalize ta
287
+ if type != :subfuncret and not @dasm.function[ta] and
288
+ (not @dasm.function[entry] or @autofuncs.include? entry) and
289
+ di.block.list.last.opcode.props[:saveip]
290
+ # possible noreturn function
291
+ # XXX call $+5; pop eax
292
+ @autofuncs << ta
293
+ @dasm.function[ta] = DecodedFunction.new
294
+ puts "autofunc #{Expression[ta]}" if $VERBOSE
295
+ end
296
+
297
+ if @dasm.function[ta] and type != :subfuncret
298
+ f = dasm.auto_label_at(ta, 'func')
299
+ ta = dasm.normalize($1) if f =~ /^thunk_(.*)/
300
+ ret = decompile_func_rec(ta) if (ta != entry or di.block.to_subfuncret)
301
+ throw :restart, :restart if ret == :restart
302
+ else
303
+ @dasm.auto_label_at(ta, 'label') if blocks.find { |aa, at| aa == ta }
304
+ blocks.last[1] |= [ta]
305
+ todo << ta
306
+ end
307
+ }
308
+ end
309
+ blocks
310
+ end
311
+
312
+ # backtraces an expression from addr
313
+ # returns an integer, a label name, or an Expression
314
+ # XXX '(GetProcAddr("foo"))()' should not decompile to 'foo()'
315
+ def backtrace_target(expr, addr)
316
+ if n = @dasm.backtrace(expr, addr).first
317
+ return expr if n == Expression::Unknown
318
+ n = Expression[n].reduce_rec
319
+ n = @dasm.get_label_at(n) || n
320
+ n = $1 if n.kind_of? ::String and n =~ /^thunk_(.*)/
321
+ n
322
+ else
323
+ expr
324
+ end
325
+ end
326
+
327
+ # patches instruction's backtrace_binding to replace things referring to a static stack offset from func start by :frameptr+off
328
+ def makestackvars(funcstart, blocks)
329
+ blockstart = nil
330
+ cache_di = nil
331
+ cache = {} # [i_s, e, type] => backtrace
332
+ tovar = lambda { |di, e, i_s|
333
+ case e
334
+ when Expression; Expression[tovar[di, e.lexpr, i_s], e.op, tovar[di, e.rexpr, i_s]].reduce
335
+ when Indirection; Indirection[tovar[di, e.target, i_s], e.len, e.origin]
336
+ when :frameptr; e
337
+ when ::Symbol
338
+ cache.clear if cache_di != di ; cache_di = di
339
+ vals = cache[[e, i_s, 0]] ||= @dasm.backtrace(e, di.address, :snapshot_addr => blockstart,
340
+ :include_start => i_s, :no_check => true, :terminals => [:frameptr])
341
+ # backtrace only to blockstart first
342
+ if vals.length == 1 and ee = vals.first and ee.kind_of? Expression and (ee == Expression[:frameptr] or
343
+ (ee.lexpr == :frameptr and ee.op == :+ and ee.rexpr.kind_of? ::Integer) or
344
+ (not ee.lexpr and ee.op == :+ and ee.rexpr.kind_of? Indirection and eep = ee.rexpr.pointer and
345
+ (eep == Expression[:frameptr] or (eep.lexpr == :frameptr and eep.op == :+ and eep.rexpr.kind_of? ::Integer))))
346
+ ee
347
+ else
348
+ # fallback on full run (could restart from blockstart with ee, but may reevaluate addr_binding..
349
+ vals = cache[[e, i_s, 1]] ||= @dasm.backtrace(e, di.address, :snapshot_addr => funcstart,
350
+ :include_start => i_s, :no_check => true, :terminals => [:frameptr])
351
+ if vals.length == 1 and ee = vals.first and (ee.kind_of? Expression and (ee == Expression[:frameptr] or
352
+ (ee.lexpr == :frameptr and ee.op == :+ and ee.rexpr.kind_of? ::Integer)))
353
+ ee
354
+ else e
355
+ end
356
+ end
357
+ else e
358
+ end
359
+ }
360
+
361
+ # must not change bt_bindings until everything is backtracked
362
+ repl_bind = {} # di => bt_bd
363
+
364
+ @dasm.cpu.decompile_makestackvars(@dasm, funcstart, blocks) { |block|
365
+ block.list.each { |di|
366
+ bd = di.backtrace_binding ||= @dasm.cpu.get_backtrace_binding(di)
367
+ newbd = repl_bind[di] = {}
368
+ bd.each { |k, v|
369
+ k = tovar[di, k, true] if k.kind_of? Indirection
370
+ next if k == Expression[:frameptr] or (k.kind_of? Expression and k.lexpr == :frameptr and k.op == :+ and k.rexpr.kind_of? ::Integer)
371
+ newbd[k] = tovar[di, v, false]
372
+ }
373
+ }
374
+ }
375
+
376
+ repl_bind.each { |di, bd| di.backtrace_binding = bd }
377
+ end
378
+
379
+ # give a name to a stackoffset (relative to start of func)
380
+ # 4 => :arg_0, -8 => :var_4 etc
381
+ def stackoff_to_varname(off)
382
+ if off >= @c_parser.typesize[:ptr]; 'arg_%X' % ( off-@c_parser.typesize[:ptr]) # 4 => arg_0, 8 => arg_4..
383
+ elsif off > 0; 'arg_0%X' % off
384
+ elsif off == 0; 'retaddr'
385
+ elsif off <= -@dasm.cpu.size/8; 'var_%X' % (-off-@dasm.cpu.size/8) # -4 => var_0, -8 => var_4..
386
+ else 'var_0%X' % -off
387
+ end
388
+ end
389
+
390
+ # turns an Expression to a CExpression, create+declares needed variables in scope
391
+ def decompile_cexpr(e, scope, itype=nil)
392
+ case e
393
+ when Expression
394
+ if e.op == :'=' and e.lexpr.kind_of? ::String and e.lexpr =~ /^dummy_metasm_/
395
+ decompile_cexpr(e.rexpr, scope, itype)
396
+ elsif e.op == :+ and e.rexpr.kind_of? ::Integer and e.rexpr < 0
397
+ decompile_cexpr(Expression[e.lexpr, :-, -e.rexpr], scope, itype)
398
+ elsif e.lexpr
399
+ a = decompile_cexpr(e.lexpr, scope, itype)
400
+ C::CExpression[a, e.op, decompile_cexpr(e.rexpr, scope, itype)]
401
+ elsif e.op == :+
402
+ decompile_cexpr(e.rexpr, scope, itype)
403
+ else
404
+ a = decompile_cexpr(e.rexpr, scope, itype)
405
+ C::CExpression[e.op, a]
406
+ end
407
+ when Indirection
408
+ case e.len
409
+ when 1, 2, 4, 8
410
+ bt = C::BaseType.new("__int#{e.len*8}".to_sym)
411
+ else
412
+ bt = C::Struct.new
413
+ bt.members = [C::Variable.new('data', C::Array.new(C::BaseType.new(:__int8), e.len))]
414
+ end
415
+ itype = C::Pointer.new(bt)
416
+ p = decompile_cexpr(e.target, scope, itype)
417
+ p = C::CExpression[[p], itype] if not p.type.kind_of? C::Pointer
418
+ C::CExpression[:*, p]
419
+ when ::Integer
420
+ C::CExpression[e]
421
+ when C::CExpression
422
+ e
423
+ else
424
+ name = e.to_s
425
+ if not s = scope.symbol_ancestors[name]
426
+ s = C::Variable.new
427
+ s.type = C::BaseType.new(:__int32)
428
+ case e
429
+ when ::String # edata relocation (rel.length = size of pointer)
430
+ return @c_parser.toplevel.symbol[e] || new_global_var(e, itype || C::BaseType.new(:int), scope)
431
+ when ::Symbol; s.storage = :register ; s.add_attribute("register(#{name})")
432
+ else s.type.qualifier = [:volatile]
433
+ puts "decompile_cexpr unhandled #{e.inspect}, using #{e.to_s.inspect}" if $VERBOSE
434
+ end
435
+ s.name = name
436
+ scope.symbol[s.name] = s
437
+ scope.statements << C::Declaration.new(s)
438
+ end
439
+ s
440
+ end
441
+ end
442
+
443
+ # simplify goto -> goto / goto -> return
444
+ def simplify_goto(scope, keepret = false)
445
+ if not keepret and scope.statements[-1].kind_of? C::Return and not scope.statements[-2].kind_of? C::Label
446
+ scope.statements.insert(-2, C::Label.new("ret_label"))
447
+ end
448
+
449
+ jumpto = {}
450
+ walk(scope) { |s|
451
+ next if not s.kind_of? C::Block
452
+ s.statements.each_with_index { |ss, i|
453
+ case ss
454
+ when C::Goto, C::Return
455
+ while l = s.statements[i -= 1] and l.kind_of? C::Label
456
+ jumpto[l.name] = ss
457
+ end
458
+ end
459
+ }
460
+ }
461
+
462
+ simpler = lambda { |s|
463
+ case s
464
+ when C::Goto
465
+ if jumpto[s.target]
466
+ r = jumpto[s.target].dup
467
+ r.value = r.value.deep_dup if r.kind_of? C::Return and r.value.kind_of? C::CExpression
468
+ r
469
+ end
470
+ when C::Return
471
+ if not keepret and scope.statements[-1].kind_of? C::Return and s.value == scope.statements[-1].value and s != scope.statements[-1]
472
+ C::Goto.new(scope.statements[-2].name)
473
+ end
474
+ end
475
+ }
476
+
477
+ walk(scope) { |s|
478
+ case s
479
+ when C::Block
480
+ s.statements.each_with_index { |ss, i|
481
+ if sp = simpler[ss]
482
+ ss = s.statements[i] = sp
483
+ end
484
+ }
485
+ when C::If
486
+ if sp = simpler[s.bthen]
487
+ s.bthen = sp
488
+ end
489
+ end
490
+ }
491
+
492
+ # remove unreferenced labels
493
+ remove_labels(scope)
494
+
495
+ walk(scope) { |s|
496
+ next if not s.kind_of? C::Block
497
+ del = false
498
+ # remove dead code goto a; goto b; if (0) { z: bla; } => rm goto b
499
+ s.statements.delete_if { |st|
500
+ case st
501
+ when C::Goto, C::Return
502
+ olddel = del
503
+ del = true
504
+ olddel
505
+ else
506
+ del = false
507
+ end
508
+ }
509
+ # if () { goto x; } x:
510
+ s.statements.each_with_index { |ss, i|
511
+ if ss.kind_of? C::If
512
+ t = ss.bthen
513
+ t = t.statements.first if t.kind_of? C::Block
514
+ if t.kind_of? C::Goto and s.statements[i+1].kind_of? C::Label and s.statements[i+1].name == t.target
515
+ ss.bthen = C::Block.new(scope)
516
+ end
517
+ end
518
+ }
519
+ }
520
+
521
+ remove_labels(scope)
522
+ end
523
+
524
+ # changes ifgoto, goto to while/ifelse..
525
+ def decompile_controlseq(scope)
526
+ # TODO replace all this crap by a method using the graph representation
527
+ scope.statements = decompile_cseq_if(scope.statements, scope)
528
+ remove_labels(scope)
529
+ scope.statements = decompile_cseq_if(scope.statements, scope)
530
+ remove_labels(scope)
531
+ # TODO harmonize _if/_while api (if returns a replacement, while patches)
532
+ decompile_cseq_while(scope.statements, scope)
533
+ decompile_cseq_switch(scope)
534
+ end
535
+
536
+ # optimize if() { a; } to if() a;
537
+ def optimize_ctrl(scope)
538
+ simplify_goto(scope, true)
539
+
540
+ # break/continue
541
+ # XXX if (foo) while (bar) goto bla; bla: should => break
542
+ walk = lambda { |e, brk, cnt|
543
+ case e
544
+ when C::Block
545
+ walk[e.statements, brk, cnt]
546
+ e
547
+ when ::Array
548
+ e.each_with_index { |st, i|
549
+ case st
550
+ when C::While, C::DoWhile
551
+ l1 = (e[i+1].name if e[i+1].kind_of? C::Label)
552
+ l2 = (e[i-1].name if e[i-1].kind_of? C::Label)
553
+ e[i].body = walk[st.body, l1, l2]
554
+ else
555
+ e[i] = walk[st, brk, cnt]
556
+ end
557
+ }
558
+ e
559
+ when C::If
560
+ e.bthen = walk[e.bthen, brk, cnt] if e.bthen
561
+ e.belse = walk[e.belse, brk, cnt] if e.belse
562
+ e
563
+ when C::While, C::DoWhile
564
+ e.body = walk[e.body, nil, nil]
565
+ e
566
+ when C::Goto
567
+ if e.target == brk
568
+ C::Break.new
569
+ elsif e.target == cnt
570
+ C::Continue.new
571
+ else e
572
+ end
573
+ else e
574
+ end
575
+ }
576
+ walk[scope, nil, nil]
577
+
578
+ remove_labels(scope)
579
+
580
+ # while (1) { a; if(b) { c; return; }; d; } => while (1) { a; if (b) break; d; } c;
581
+ while st = scope.statements.last and st.kind_of? C::While and st.test.kind_of? C::CExpression and
582
+ not st.test.op and st.test.rexpr == 1 and st.body.kind_of? C::Block
583
+ break if not i = st.body.statements.find { |ist|
584
+ ist.kind_of? C::If and not ist.belse and ist.bthen.kind_of? C::Block and ist.bthen.statements.last.kind_of? C::Return
585
+ }
586
+ walk(i.bthen.statements) { |sst| sst.outer = i.bthen.outer if sst.kind_of? C::Block and sst.outer == i.bthen }
587
+ scope.statements.concat i.bthen.statements
588
+ i.bthen = C::Break.new
589
+ end
590
+
591
+ patch_test = lambda { |ce|
592
+ ce = ce.rexpr if ce.kind_of? C::CExpression and ce.op == :'!'
593
+ # if (a+1) => if (a != -1)
594
+ if ce.kind_of? C::CExpression and (ce.op == :+ or ce.op == :-) and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? ::Integer and ce.lexpr
595
+ ce.rexpr.rexpr = -ce.rexpr.rexpr if ce.op == :+
596
+ ce.op = :'!='
597
+ end
598
+ }
599
+
600
+ walk(scope) { |ce|
601
+ case ce
602
+ when C::If
603
+ patch_test[ce.test]
604
+ if ce.bthen.kind_of? C::Block
605
+ case ce.bthen.statements.length
606
+ when 1
607
+ walk(ce.bthen.statements) { |sst| sst.outer = ce.bthen.outer if sst.kind_of? C::Block and sst.outer == ce.bthen }
608
+ ce.bthen = ce.bthen.statements.first
609
+ when 0
610
+ if not ce.belse and i = ce.bthen.outer.statements.index(ce)
611
+ ce.bthen.outer.statements[i] = ce.test # TODO remove sideeffectless parts
612
+ end
613
+ end
614
+ end
615
+ if ce.belse.kind_of? C::Block and ce.belse.statements.length == 1
616
+ walk(ce.belse.statements) { |sst| sst.outer = ce.belse.outer if sst.kind_of? C::Block and sst.outer == ce.belse }
617
+ ce.belse = ce.belse.statements.first
618
+ end
619
+ when C::While, C::DoWhile
620
+ patch_test[ce.test]
621
+ if ce.body.kind_of? C::Block
622
+ case ce.body.statements.length
623
+ when 1
624
+ walk(ce.body.statements) { |sst| sst.outer = ce.body.outer if sst.kind_of? C::Block and sst.outer == ce.body }
625
+ ce.body = ce.body.statements.first
626
+ when 0
627
+ if ce.kind_of? C::DoWhile and i = ce.body.outer.statements.index(ce)
628
+ ce = ce.body.outer.statements[i] = C::While.new(ce.test, ce.body)
629
+ end
630
+ ce.body = nil
631
+ end
632
+ end
633
+ end
634
+ }
635
+ walk(scope) { |ce|
636
+ next if not ce.kind_of? C::Block
637
+ st = ce.statements
638
+ st.length.times { |n|
639
+ while st[n].kind_of? C::If and st[n+1].kind_of? C::If and not st[n].belse and not st[n+1].belse and (
640
+ (st[n].bthen.kind_of? C::Return and st[n+1].bthen.kind_of? C::Return and st[n].bthen.value == st[n+1].bthen.value) or
641
+ (st[n].bthen.kind_of? C::Break and st[n+1].bthen.kind_of? C::Break) or
642
+ (st[n].bthen.kind_of? C::Continue and st[n+1].bthen.kind_of? C::Continue))
643
+ # if (a) return x; if (b) return x; => if (a || b) return x;
644
+ st[n].test = C::CExpression[st[n].test, :'||', st[n+1].test]
645
+ st.delete_at(n+1)
646
+ end
647
+ }
648
+ }
649
+ end
650
+
651
+ # ifgoto => ifthen
652
+ # ary is an array of statements where we try to find if () {} [else {}]
653
+ # recurses to then/else content
654
+ def decompile_cseq_if(ary, scope)
655
+ return ary if forbid_decompile_ifwhile
656
+ # the array of decompiled statements to use as replacement
657
+ ret = []
658
+ # list of labels appearing in ary
659
+ inner_labels = ary.grep(C::Label).map { |l| l.name }
660
+ while s = ary.shift
661
+ # recurse if it's not the first run
662
+ if s.kind_of? C::If
663
+ s.bthen.statements = decompile_cseq_if(s.bthen.statements, s.bthen) if s.bthen.kind_of? C::Block
664
+ s.belse.statements = decompile_cseq_if(s.belse.statements, s.belse) if s.belse.kind_of? C::Block
665
+ end
666
+
667
+ # if (a) goto x; if (b) goto x; => if (a || b) goto x;
668
+ while s.kind_of? C::If and s.bthen.kind_of? C::Goto and not s.belse and ary.first.kind_of? C::If and ary.first.bthen.kind_of? C::Goto and
669
+ not ary.first.belse and s.bthen.target == ary.first.bthen.target
670
+ s.test = C::CExpression[s.test, :'||', ary.shift.test]
671
+ end
672
+
673
+ # if (a) goto x; b; x: => if (!a) { b; }
674
+ if s.kind_of? C::If and s.bthen.kind_of? C::Goto and l = ary.grep(C::Label).find { |l_| l_.name == s.bthen.target }
675
+ # if {goto l;} a; l: => if (!) {a;}
676
+ s.test = C::CExpression.negate s.test
677
+ s.bthen = C::Block.new(scope)
678
+ s.bthen.statements = decompile_cseq_if(ary[0..ary.index(l)], s.bthen)
679
+ s.bthen.statements.pop # remove l: from bthen, it is in ary (was needed in bthen for inner ifs)
680
+ ary[0...ary.index(l)] = []
681
+ end
682
+
683
+ if s.kind_of? C::If and (s.bthen.kind_of? C::Block or s.bthen.kind_of? C::Goto)
684
+ s.bthen = C::Block.new(scope, [s.bthen]) if s.bthen.kind_of? C::Goto
685
+
686
+ bts = s.bthen.statements
687
+
688
+ # if (a) if (b) { c; } => if (a && b) { c; }
689
+ if bts.length == 1 and bts.first.kind_of? C::If and not bts.first.belse
690
+ s.test = C::CExpression[s.test, :'&&', bts.first.test]
691
+ bts = bts.first.bthen
692
+ bts = s.bthen.statements = bts.kind_of?(C::Block) ? bts.statements : [bts]
693
+ end
694
+
695
+ # if (a) { if (b) goto c; d; } c: => if (a && !b) { d; }
696
+ if bts.first.kind_of? C::If and l = bts.first.bthen and (l = l.kind_of?(C::Block) ? l.statements.first : l) and l.kind_of? C::Goto and ary[0].kind_of? C::Label and l.target == ary[0].name
697
+ s.test = C::CExpression[s.test, :'&&', C::CExpression.negate(bts.first.test)]
698
+ if e = bts.shift.belse
699
+ bts.unshift e
700
+ end
701
+ end
702
+
703
+ # if () { goto a; } a:
704
+ if bts.last.kind_of? C::Goto and ary[0].kind_of? C::Label and bts.last.target == ary[0].name
705
+ bts.pop
706
+ end
707
+
708
+ # if { a; goto outer; } b; return; => if (!) { b; return; } a; goto outer;
709
+ if bts.last.kind_of? C::Goto and not inner_labels.include? bts.last.target and g = ary.find { |ss| ss.kind_of? C::Goto or ss.kind_of? C::Return } and g.kind_of? C::Return
710
+ s.test = C::CExpression.negate s.test
711
+ ary[0..ary.index(g)], bts[0..-1] = bts, ary[0..ary.index(g)]
712
+ end
713
+
714
+ # if { a; goto l; } b; l: => if {a;} else {b;}
715
+ if bts.last.kind_of? C::Goto and l = ary.grep(C::Label).find { |l_| l_.name == bts.last.target }
716
+ s.belse = C::Block.new(scope)
717
+ s.belse.statements = decompile_cseq_if(ary[0...ary.index(l)], s.belse)
718
+ ary[0...ary.index(l)] = []
719
+ bts.pop
720
+ end
721
+
722
+ # if { a; l: b; goto any;} c; goto l; => if { a; } else { c; } b; goto any;
723
+ if not s.belse and (bts.last.kind_of? C::Goto or bts.last.kind_of? C::Return) and g = ary.grep(C::Goto).first and l = bts.grep(C::Label).find { |l_| l_.name == g.target }
724
+ s.belse = C::Block.new(scope)
725
+ s.belse.statements = decompile_cseq_if(ary[0...ary.index(g)], s.belse)
726
+ ary[0..ary.index(g)], bts[bts.index(l)..-1] = bts[bts.index(l)..-1], []
727
+ end
728
+
729
+ # if { a; b; c; } else { d; b; c; } => if {a;} else {d;} b; c;
730
+ if s.belse
731
+ bes = s.belse.statements
732
+ while not bts.empty?
733
+ if bts.last.kind_of? C::Label; ary.unshift bts.pop
734
+ elsif bes.last.kind_of? C::Label; ary.unshift bes.pop
735
+ elsif bts.last.to_s == bes.last.to_s; ary.unshift bes.pop ; bts.pop
736
+ else break
737
+ end
738
+ end
739
+
740
+ # if () { a; } else { b; } => if () { a; } else b;
741
+ # if () { a; } else {} => if () { a; }
742
+ case bes.length
743
+ when 0; s.belse = nil
744
+ #when 1; s.belse = bes.first
745
+ end
746
+ end
747
+
748
+ # if () {} else { a; } => if (!) { a; }
749
+ # if () { a; } => if () a;
750
+ case bts.length
751
+ when 0; s.test, s.bthen, s.belse = C::CExpression.negate(s.test), s.belse, nil if s.belse
752
+ #when 1; s.bthen = bts.first # later (allows simpler handling in _while)
753
+ end
754
+ end
755
+
756
+ # l1: l2: if () goto l1; goto l2; => if(!) goto l2; goto l1;
757
+ if s.kind_of? C::If
758
+ ls = s.bthen
759
+ ls = ls.statements.last if ls.kind_of? C::Block
760
+ if ls.kind_of? C::Goto
761
+ if li = inner_labels.index(ls.target)
762
+ table = inner_labels
763
+ else
764
+ table = ary.map { |st| st.name if st.kind_of? C::Label }.compact.reverse
765
+ li = table.index(ls.target) || table.length
766
+ end
767
+ g = ary.find { |ss|
768
+ break if ss.kind_of? C::Return
769
+ next if not ss.kind_of? C::Goto
770
+ table.index(ss.target).to_i > li
771
+ }
772
+ if g
773
+ s.test = C::CExpression.negate s.test
774
+ if not s.bthen.kind_of? C::Block
775
+ ls = C::Block.new(scope)
776
+ ls.statements << s.bthen
777
+ s.bthen = ls
778
+ end
779
+ ary[0..ary.index(g)], s.bthen.statements = s.bthen.statements, decompile_cseq_if(ary[0..ary.index(g)], scope)
780
+ end
781
+ end
782
+ end
783
+
784
+ ret << s
785
+ end
786
+ ret
787
+ end
788
+
789
+ def decompile_cseq_while(ary, scope)
790
+ return if forbid_decompile_ifwhile
791
+
792
+ # find the next instruction that is not a label
793
+ ni = lambda { |l| ary[ary.index(l)..-1].find { |s| not s.kind_of? C::Label } }
794
+
795
+ # TODO XXX get rid of #index
796
+ finished = false ; while not finished ; finished = true # 1.9 does not support 'retry'
797
+ ary.each { |s|
798
+ case s
799
+ when C::Label
800
+ if ss = ni[s] and ss.kind_of? C::If and not ss.belse and ss.bthen.kind_of? C::Block
801
+ if ss.bthen.statements.last.kind_of? C::Goto and ss.bthen.statements.last.target == s.name
802
+ ss.bthen.statements.pop
803
+ if l = ary[ary.index(ss)+1] and l.kind_of? C::Label
804
+ ss.bthen.statements.grep(C::If).each { |i|
805
+ i.bthen = C::Break.new if i.bthen.kind_of? C::Goto and i.bthen.target == l.name
806
+ }
807
+ end
808
+ ary[ary.index(ss)] = C::While.new(ss.test, ss.bthen)
809
+ elsif ss.bthen.statements.last.kind_of? C::Return and g = ary[ary.index(s)+1..-1].reverse.find { |_s| _s.kind_of? C::Goto and _s.target == s.name }
810
+ wb = C::Block.new(scope)
811
+ wb.statements = decompile_cseq_while(ary[ary.index(ss)+1...ary.index(g)], wb)
812
+ w = C::While.new(C::CExpression.negate(ss.test), wb)
813
+ ary[ary.index(ss)..ary.index(g)] = [w, *ss.bthen.statements]
814
+ finished = false ; break #retry
815
+ end
816
+ end
817
+ if g = ary[ary.index(s)..-1].reverse.find { |_s| _s.kind_of? C::Goto and _s.target == s.name }
818
+ wb = C::Block.new(scope)
819
+ wb.statements = decompile_cseq_while(ary[ary.index(s)...ary.index(g)], wb)
820
+ w = C::While.new(C::CExpression[1], wb)
821
+ ary[ary.index(s)..ary.index(g)] = [w]
822
+ finished = false ; break #retry
823
+ end
824
+ if g = ary[ary.index(s)..-1].reverse.find { |_s| _s.kind_of? C::If and not _s.belse and gt = _s.bthen and
825
+ (gt = gt.kind_of?(C::Block) && gt.statements.length == 1 ? gt.statements.first : gt) and gt.kind_of? C::Goto and gt.target == s.name }
826
+ wb = C::Block.new(scope)
827
+ wb.statements = decompile_cseq_while(ary[ary.index(s)...ary.index(g)], wb)
828
+ w = C::DoWhile.new(g.test, wb)
829
+ ary[ary.index(s)..ary.index(g)] = [w]
830
+ finished = false ; break #retry
831
+ end
832
+ when C::If
833
+ decompile_cseq_while(s.bthen.statements, s.bthen) if s.bthen.kind_of? C::Block
834
+ decompile_cseq_while(s.belse.statements, s.belse) if s.belse.kind_of? C::Block
835
+ when C::While, C::DoWhile
836
+ decompile_cseq_while(s.body.statements, s.body) if s.body.kind_of? C::Block
837
+ end
838
+ }
839
+ end
840
+ ary
841
+ end
842
+
843
+ # TODO
844
+ def decompile_cseq_switch(scope)
845
+ uncast = lambda { |e| e = e.rexpr while e.kind_of? C::CExpression and not e.op ; e }
846
+ walk(scope) { |s|
847
+ # XXX pfff...
848
+ next if not s.kind_of? C::If
849
+ # if (v < 12) return ((void(*)())(tableaddr+4*v))();
850
+ t = s.bthen
851
+ t = t.statements.first if t.kind_of? C::Block and t.statements.length == 1
852
+ next if not t.kind_of? C::Return or not t.respond_to? :from_instr
853
+ next if t.from_instr.comment.to_a.include? 'switch'
854
+ next if not t.value.kind_of? C::CExpression or t.value.op != :funcall or t.value.rexpr != [] or not t.value.lexpr.kind_of? C::CExpression or t.value.lexpr.op
855
+ p = uncast[t.value.lexpr.rexpr]
856
+ next if not p.kind_of? C::CExpression or p.op != :* or p.lexpr
857
+ p = uncast[p.rexpr]
858
+ next if not p.kind_of? C::CExpression or p.op != :+
859
+ r, l = uncast[p.rexpr], uncast[p.lexpr]
860
+ r, l = l, r if r.kind_of? C::CExpression
861
+ next if not r.kind_of? ::Integer or not l.kind_of? C::CExpression or l.op != :* or not l.lexpr
862
+ lr, ll = uncast[l.rexpr], uncast[l.lexpr]
863
+ lr, ll = ll, lr if not ll.kind_of? ::Integer
864
+ next if ll != sizeof(nil, C::Pointer.new(C::BaseType.new(:void)))
865
+ base, index = r, lr
866
+ if s.test.kind_of? C::CExpression and (s.test.op == :<= or s.test.op == :<) and s.test.lexpr == index and
867
+ s.test.rexpr.kind_of? C::CExpression and not s.test.rexpr.op and s.test.rexpr.rexpr.kind_of? ::Integer
868
+ t.from_instr.add_comment 'switch'
869
+ sup = s.test.rexpr.rexpr
870
+ rng = ((s.test.op == :<) ? (0...sup) : (0..sup))
871
+ from = t.from_instr.address
872
+ rng.map { |i| @dasm.backtrace(Indirection[base+ll*i, ll, from], from, :type => :x, :origin => from, :maxdepth => 0) }
873
+ @dasm.disassemble
874
+ throw :restart, :restart
875
+ end
876
+ puts "unhandled switch() at #{t.from_instr}" if $VERBOSE
877
+ }
878
+ end
879
+
880
+ # remove unused labels
881
+ def remove_labels(scope)
882
+ return if forbid_optimize_labels
883
+
884
+ used = []
885
+ walk(scope) { |ss|
886
+ used |= [ss.target] if ss.kind_of? C::Goto
887
+ }
888
+ walk(scope) { |s|
889
+ next if not s.kind_of? C::Block
890
+ s.statements.delete_if { |l|
891
+ l.kind_of? C::Label and not used.include? l.name
892
+ }
893
+ }
894
+
895
+ # remove implicit continue; at end of loop
896
+ walk(scope) { |s|
897
+ next if not s.kind_of? C::While
898
+ if s.body.kind_of? C::Block and s.body.statements.last.kind_of? C::Continue
899
+ s.body.statements.pop
900
+ end
901
+ }
902
+ end
903
+
904
+ # checks if expr is a var (var or *&var)
905
+ def isvar(ce, var)
906
+ if var.stackoff and ce.kind_of? C::CExpression
907
+ return unless ce.op == :* and not ce.lexpr
908
+ ce = ce.rexpr
909
+ ce = ce.rexpr while ce.kind_of? C::CExpression and not ce.op
910
+ return unless ce.kind_of? C::CExpression and ce.op == :& and not ce.lexpr
911
+ ce = ce.rexpr
912
+ end
913
+ ce == var
914
+ end
915
+
916
+ # checks if expr reads var
917
+ def ce_read(ce_, var)
918
+ isvar(ce_, var) or
919
+ walk_ce(ce_) { |ce|
920
+ case ce.op
921
+ when :funcall; break true if isvar(ce.lexpr, var) or ce.rexpr.find { |a| isvar(a, var) }
922
+ when :'='; break true if isvar(ce.rexpr, var)
923
+ break ce_read(ce.rexpr, var) if isvar(ce.lexpr, var) # *&var = 2
924
+ else break true if isvar(ce.lexpr, var) or isvar(ce.rexpr, var)
925
+ end
926
+ }
927
+ end
928
+
929
+ # checks if expr writes var
930
+ def ce_write(ce_, var)
931
+ walk_ce(ce_) { |ce|
932
+ break true if AssignOp.include?(ce.op) and (isvar(ce.lexpr, var) or
933
+ (((ce.op == :'++' or ce.op == :'--') and isvar(ce.rexpr, var))))
934
+ }
935
+ end
936
+
937
+ # patches a set of exprs, replacing oldce by newce
938
+ def ce_patch(exprs, oldce, newce)
939
+ walk_ce(exprs) { |ce|
940
+ case ce.op
941
+ when :funcall
942
+ ce.lexpr = newce if ce.lexpr == oldce
943
+ ce.rexpr.each_with_index { |a, i| ce.rexpr[i] = newce if a == oldce }
944
+ else
945
+ ce.lexpr = newce if ce.lexpr == oldce
946
+ ce.rexpr = newce if ce.rexpr == oldce
947
+ end
948
+ }
949
+ end
950
+
951
+
952
+ # duplicate vars per domain value
953
+ # eg eax = 1; foo(eax); eax = 2; bar(eax); => eax = 1; foo(eax) eax_1 = 2; bar(eax_1);
954
+ # eax = 1; if (bla) eax = 2; foo(eax); => no change
955
+ def unalias_vars(scope, func)
956
+ g = c_to_graph(scope)
957
+
958
+ # unalias func args first, they may include __attr__((out)) needed by the others
959
+ funcalls = []
960
+ walk_ce(scope) { |ce| funcalls << ce if ce.op == :funcall }
961
+ vars = scope.symbol.values.sort_by { |v| walk_ce(funcalls) { |ce| break true if ce.rexpr == v } ? 0 : 1 }
962
+
963
+ # find the domains of var aliases
964
+ vars.each { |var| unalias_var(var, scope, g) }
965
+ end
966
+
967
+ # duplicates a var per domain value
968
+ def unalias_var(var, scope, g = c_to_graph(scope))
969
+ # [label, index] of references to var (reading it, writing it, ro/wo it (eg eax = *eax => eax_0 = *eax_1))
970
+ read = {}
971
+ write = {}
972
+ ro = {}
973
+ wo = {}
974
+
975
+ # list of [l, i] for which domain is not known
976
+ unchecked = []
977
+
978
+ # mark all exprs of the graph
979
+ # TODO handle var_14 __attribute__((out)) = &curvar <=> curvar write
980
+ r = var.has_attribute_var('register')
981
+ g.exprs.each { |label, exprs|
982
+ exprs.each_with_index { |ce, i|
983
+ if ce_read(ce, var)
984
+ if (ce.op == :'=' and isvar(ce.lexpr, var) and not ce_write(ce.rexpr, var)) or
985
+ (ce.op == :funcall and r and not ce_write(ce.lexpr, var) and not ce_write(ce.rexpr, var) and @dasm.cpu.abi_funcall[:changed].include?(r.to_sym))
986
+ (ro[label] ||= []) << i
987
+ (wo[label] ||= []) << i
988
+ unchecked << [label, i, :up] << [label, i, :down]
989
+ else
990
+ (read[label] ||= []) << i
991
+ unchecked << [label, i]
992
+ end
993
+ elsif ce_write(ce, var)
994
+ (write[label] ||= []) << i
995
+ unchecked << [label, i]
996
+ end
997
+ }
998
+ }
999
+
1000
+ # stuff when filling the domain (flood algorithm)
1001
+ dom = dom_ro = dom_wo = todo_up = todo_down = func_top = nil
1002
+
1003
+ # flood by walking the graph up from [l, i] (excluded)
1004
+ # marks stuff do walk down
1005
+ walk_up = lambda { |l, i|
1006
+ todo_w = [[l, i-1]]
1007
+ done_w = []
1008
+ while o = todo_w.pop
1009
+ next if done_w.include? o
1010
+ done_w << o
1011
+ l, i = o
1012
+ loop do
1013
+ if read[l].to_a.include? i
1014
+ # XXX not optimal (should mark only the uppest read)
1015
+ todo_down |= [[l, i]] if not dom.include? [l, i]
1016
+ dom |= [[l, i]]
1017
+ elsif write[l].to_a.include? i
1018
+ todo_down |= [[l, i]] if not dom.include? [l, i]
1019
+ dom |= [[l, i]]
1020
+ break
1021
+ elsif wo[l].to_a.include? i
1022
+ todo_down |= [[l, i]] if not dom_wo.include? [l, i, :down]
1023
+ dom_wo |= [[l, i, :down]]
1024
+ break
1025
+ end
1026
+ i -= 1
1027
+ if i < 0
1028
+ g.from_optim[l].to_a.each { |ll|
1029
+ todo_w << [ll, g.exprs[ll].to_a.length-1]
1030
+ }
1031
+ func_top = true if g.from_optim[l].to_a.empty?
1032
+ break
1033
+ end
1034
+ end
1035
+ end
1036
+ }
1037
+
1038
+ # flood by walking the graph down from [l, i] (excluded)
1039
+ # malks stuff to walk up
1040
+ walk_down = lambda { |l, i|
1041
+ todo_w = [[l, i+1]]
1042
+ done_w = []
1043
+ while o = todo_w.pop
1044
+ next if done_w.include? o
1045
+ done_w << o
1046
+ l, i = o
1047
+ loop do
1048
+ if read[l].to_a.include? i
1049
+ todo_up |= [[l, i]] if not dom.include? [l, i]
1050
+ dom |= [[l, i]]
1051
+ elsif write[l].to_a.include? i
1052
+ break
1053
+ elsif ro[l].to_a.include? i
1054
+ todo_up |= [[l, i]] if not dom_ro.include? [l, i, :up]
1055
+ dom_ro |= [[l, i, :up]]
1056
+ break
1057
+ end
1058
+ i += 1
1059
+ if i >= g.exprs[l].to_a.length
1060
+ g.to_optim[l].to_a.each { |ll|
1061
+ todo_w << [ll, 0]
1062
+ }
1063
+ break
1064
+ end
1065
+ end
1066
+ end
1067
+ }
1068
+
1069
+ # check it out
1070
+ while o = unchecked.shift
1071
+ dom = []
1072
+ dom_ro = []
1073
+ dom_wo = []
1074
+ func_top = false
1075
+
1076
+ todo_up = []
1077
+ todo_down = []
1078
+
1079
+ # init
1080
+ if read[o[0]].to_a.include? o[1]
1081
+ todo_up << o
1082
+ todo_down << o
1083
+ dom << o
1084
+ elsif write[o[0]].to_a.include? o[1]
1085
+ todo_down << o
1086
+ dom << o
1087
+ elsif o[2] == :up
1088
+ todo_up << o
1089
+ dom_ro << o
1090
+ elsif o[2] == :down
1091
+ todo_down << o
1092
+ dom_wo << o
1093
+ else raise
1094
+ end
1095
+
1096
+ # loop
1097
+ while todo_up.first or todo_down.first
1098
+ todo_up.each { |oo| walk_up[oo[0], oo[1]] }
1099
+ todo_up.clear
1100
+
1101
+ todo_down.each { |oo| walk_down[oo[0], oo[1]] }
1102
+ todo_down.clear
1103
+ end
1104
+
1105
+ unchecked -= dom + dom_wo + dom_ro
1106
+
1107
+ next if func_top
1108
+
1109
+ # patch
1110
+ n_i = 0
1111
+ n_i += 1 while scope.symbol_ancestors[newvarname = "#{var.name}_a#{n_i}"]
1112
+
1113
+ nv = var.dup
1114
+ nv.storage = :register if nv.has_attribute_var('register')
1115
+ nv.attributes = nv.attributes.dup if nv.attributes
1116
+ nv.name = newvarname
1117
+ scope.statements << C::Declaration.new(nv)
1118
+ scope.symbol[nv.name] = nv
1119
+
1120
+ dom.each { |oo| ce_patch(g.exprs[oo[0]][oo[1]], var, nv) }
1121
+ dom_ro.each { |oo|
1122
+ ce = g.exprs[oo[0]][oo[1]]
1123
+ if ce.op == :funcall or ce.rexpr.kind_of? C::CExpression
1124
+ ce_patch(ce.rexpr, var, nv)
1125
+ else
1126
+ ce.rexpr = nv
1127
+ end
1128
+ }
1129
+ dom_wo.each { |oo|
1130
+ ce = g.exprs[oo[0]][oo[1]]
1131
+ if ce.op == :funcall
1132
+ elsif ce.lexpr.kind_of? C::CExpression
1133
+ ce_patch(ce.lexpr, var, nv)
1134
+ else
1135
+ ce.lexpr = nv
1136
+ end
1137
+ }
1138
+
1139
+ # check if the var is only used as an __out__ parameter
1140
+ if false and dom_ro.empty? and dom_wo.empty? and dom.length == 2 and # TODO
1141
+ arg.has_attribute('out') and not arg.has_attribute('in')
1142
+ # *(int32*)&var_10 = &var_4;
1143
+ # set_pointed_value(*(int32*)&var_10); => writeonly var_4, may start a new domain
1144
+ nv.add_attribute('out')
1145
+ end
1146
+ end
1147
+ end
1148
+
1149
+ # revert the unaliasing namechange of vars where no alias subsists
1150
+ def simplify_varname_noalias(scope)
1151
+ names = scope.symbol.keys
1152
+ names.delete_if { |k|
1153
+ next if not b = k[/^(.*)_a\d+$/, 1]
1154
+ next if scope.symbol[k].stackoff.to_i > 0
1155
+ if not names.find { |n| n != k and (n == b or n[/^(.*)_a\d+$/, 1] == b) }
1156
+ scope.symbol[b] = scope.symbol.delete(k)
1157
+ scope.symbol[b].name = b
1158
+ end
1159
+ }
1160
+ end
1161
+
1162
+ # patch scope to transform :frameoff-x into &var_x
1163
+ def namestackvars(scope)
1164
+ off2var = {}
1165
+ newvar = lambda { |o, n|
1166
+ if not v = off2var[o]
1167
+ v = off2var[o] = C::Variable.new
1168
+ v.type = C::BaseType.new(:void)
1169
+ v.name = n
1170
+ v.stackoff = o
1171
+ scope.symbol[v.name] = v
1172
+ scope.statements << C::Declaration.new(v)
1173
+ end
1174
+ v
1175
+ }
1176
+
1177
+ scope.decompdata[:stackoff_name].each { |o, n| newvar[o, n] }
1178
+ scope.decompdata[:stackoff_type].each { |o, t| newvar[o, stackoff_to_varname(o)] }
1179
+
1180
+ walk_ce(scope) { |e|
1181
+ next if e.op != :+ and e.op != :-
1182
+ next if not e.lexpr.kind_of? C::Variable or e.lexpr.name != 'frameptr'
1183
+ next if not e.rexpr.kind_of? C::CExpression or e.rexpr.op or not e.rexpr.rexpr.kind_of? ::Integer
1184
+ off = e.rexpr.rexpr
1185
+ off = -off if e.op == :-
1186
+ v = newvar[off, stackoff_to_varname(off)]
1187
+ e.replace C::CExpression[:&, v]
1188
+ }
1189
+ end
1190
+
1191
+ # assign type to vars (regs, stack & global)
1192
+ # types are found by subfunction argument types & indirections, and propagated through assignments etc
1193
+ # TODO when updating the type of a var, update the type of all cexprs where it appears
1194
+ def decompile_c_types(scope)
1195
+ return if forbid_decompile_types
1196
+
1197
+ # TODO *(int8*)(ptr+8); *(int32*)(ptr+12) => automatic struct
1198
+
1199
+ # name => type
1200
+ types = {}
1201
+
1202
+ pscopevar = lambda { |e|
1203
+ e = e.rexpr while e.kind_of? C::CExpression and not e.op and e.rexpr.kind_of? C::CExpression
1204
+ if e.kind_of? C::CExpression and e.op == :& and not e.lexpr and e.rexpr.kind_of? C::Variable
1205
+ e.rexpr.name if scope.symbol[e.rexpr.name]
1206
+ end
1207
+ }
1208
+ scopevar = lambda { |e|
1209
+ e = e.rexpr if e.kind_of? C::CExpression and not e.op
1210
+ if e.kind_of? C::Variable and scope.symbol[e.name]
1211
+ e.name
1212
+ elsif e.kind_of? C::CExpression and e.op == :* and not e.lexpr
1213
+ pscopevar[e.rexpr]
1214
+ end
1215
+ }
1216
+ globalvar = lambda { |e|
1217
+ e = e.rexpr if e.kind_of? C::CExpression and not e.op
1218
+ if e.kind_of? ::Integer and @dasm.get_section_at(e)
1219
+ e
1220
+ elsif e.kind_of? C::Variable and not scope.symbol[e.name] and @c_parser.toplevel.symbol[e.name] and @dasm.get_section_at(e.name)
1221
+ e.name
1222
+ end
1223
+ }
1224
+
1225
+ # check if a newly found type for o is better than current type
1226
+ # order: foo* > void* > foo
1227
+ better_type = lambda { |t0, t1|
1228
+ t1 == C::BaseType.new(:void) or (t0.pointer? and t1.kind_of? C::BaseType) or t0.untypedef.kind_of? C::Union or
1229
+ (t0.kind_of? C::BaseType and t1.kind_of? C::BaseType and (@c_parser.typesize[t0.name] > @c_parser.typesize[t1.name] or (t0.name == t1.name and t0.qualifier))) or
1230
+ (t0.pointer? and t1.pointer? and better_type[t0.pointed, t1.pointed])
1231
+ }
1232
+
1233
+ update_global_type = lambda { |e, t|
1234
+ if ne = new_global_var(e, t, scope)
1235
+ ne.type = t if better_type[t, ne.type] # TODO patch existing scopes using ne
1236
+ # TODO rename (dword_xx -> byte_xx etc)
1237
+ e = scope.symbol_ancestors[e] || e if e.kind_of? String # exe reloc
1238
+ walk_ce(scope) { |ce|
1239
+ ce.lexpr = ne if ce.lexpr == e
1240
+ ce.rexpr = ne if ce.rexpr == e
1241
+ if ce.op == :* and not ce.lexpr and ce.rexpr == ne and ne.type.pointer? and ne.type.pointed.untypedef.kind_of? C::Union
1242
+ # *struct -> struct->bla
1243
+ ce.rexpr = structoffset(ne.type.pointed.untypedef, ce.rexpr, 0, sizeof(ce.type))
1244
+ elsif ce.lexpr == ne or ce.rexpr == ne
1245
+ # set ce type according to l/r
1246
+ # TODO set ce.parent type etc
1247
+ ce.type = C::CExpression[ce.lexpr, ce.op, ce.rexpr].type
1248
+ end
1249
+ }
1250
+ end
1251
+ }
1252
+
1253
+ propagate_type = nil # fwd declaration
1254
+ propagating = [] # recursion guard (x = &x)
1255
+ # check if need to change the type of a var
1256
+ # propagate_type if type is updated
1257
+ update_type = lambda { |n, t|
1258
+ next if propagating.include? n
1259
+ o = scope.symbol[n].stackoff
1260
+ next if not o and t.untypedef.kind_of? C::Union
1261
+ next if o and scope.decompdata[:stackoff_type][o] and t != scope.decompdata[:stackoff_type][o]
1262
+ next if t0 = types[n] and not better_type[t, t0]
1263
+ next if o and (t.integral? or t.pointer?) and o % sizeof(t) != 0 # keep vars aligned
1264
+ types[n] = t
1265
+ next if t == t0
1266
+ propagating << n
1267
+ propagate_type[n, t]
1268
+ propagating.delete n
1269
+ next if not o
1270
+ t = t.untypedef
1271
+ if t.kind_of? C::Struct
1272
+ t.members.to_a.each { |m|
1273
+ mo = t.offsetof(@c_parser, m.name)
1274
+ next if mo == 0
1275
+ scope.symbol.each { |vn, vv|
1276
+ update_type[vn, m.type] if vv.stackoff == o+mo
1277
+ }
1278
+ }
1279
+ end
1280
+ }
1281
+
1282
+ # try to update the type of a var from knowing the type of an expr (through dereferences etc)
1283
+ known_type = lambda { |e, t|
1284
+ loop do
1285
+ e = e.rexpr while e.kind_of? C::CExpression and not e.op and e.type == t
1286
+ if o = scopevar[e]
1287
+ update_type[o, t]
1288
+ elsif o = globalvar[e]
1289
+ update_global_type[o, t]
1290
+ elsif not e.kind_of? C::CExpression
1291
+ elsif o = pscopevar[e] and t.pointer?
1292
+ update_type[o, t.pointed]
1293
+ elsif e.op == :* and not e.lexpr
1294
+ e = e.rexpr
1295
+ t = C::Pointer.new(t)
1296
+ next
1297
+ elsif t.pointer? and e.op == :+ and e.lexpr.kind_of? C::CExpression and e.lexpr.type.integral? and e.rexpr.kind_of? C::Variable
1298
+ e.lexpr, e.rexpr = e.rexpr, e.lexpr
1299
+ next
1300
+ elsif e.op == :+ and e.lexpr and e.rexpr.kind_of? C::CExpression
1301
+ if not e.rexpr.op and e.rexpr.rexpr.kind_of? ::Integer
1302
+ if t.pointer? and e.rexpr.rexpr < 0x1000 and (e.rexpr.rexpr % sizeof(t.pointed)) == 0 # XXX relocatable + base=0..
1303
+ e = e.lexpr # (int)*(x+2) === (int) *x
1304
+ next
1305
+ elsif globalvar[e.rexpr.rexpr]
1306
+ known_type[e.lexpr, C::BaseType.new(:int)]
1307
+ e = e.rexpr
1308
+ next
1309
+ end
1310
+ elsif t.pointer? and (e.lexpr.kind_of? C::CExpression and e.lexpr.lexpr and [:<<, :>>, :*, :&].include? e.lexpr.op) or
1311
+ (o = scopevar[e.lexpr] and types[o] and types[o].integral? and
1312
+ !(o = scopevar[e.rexpr] and types[o] and types[o].integral?))
1313
+ e.lexpr, e.rexpr = e.rexpr, e.lexpr # swap
1314
+ e = e.lexpr
1315
+ next
1316
+ elsif t.pointer? and ((e.rexpr.kind_of? C::CExpression and e.rexpr.lexpr and [:<<, :>>, :*, :&].include? e.rexpr.op) or
1317
+ (o = scopevar[e.rexpr] and types[o] and types[o].integral? and
1318
+ !(o = scopevar[e.lexpr] and types[o] and types[o].integral?)))
1319
+ e = e.lexpr
1320
+ next
1321
+ end
1322
+ end
1323
+ break
1324
+ end
1325
+ }
1326
+
1327
+ # we found a type for a var, propagate it through affectations
1328
+ propagate_type = lambda { |var, type|
1329
+ walk_ce(scope) { |ce|
1330
+ next if ce.op != :'='
1331
+
1332
+ if ce.lexpr.kind_of? C::Variable and ce.lexpr.name == var
1333
+ known_type[ce.rexpr, type]
1334
+ next
1335
+ end
1336
+ if ce.rexpr.kind_of? C::Variable and ce.rexpr.name == var
1337
+ known_type[ce.lexpr, type]
1338
+ next
1339
+ end
1340
+
1341
+ # int **x; y = **x => int y
1342
+ t = type
1343
+ l = ce.lexpr
1344
+ while l.kind_of? C::CExpression and l.op == :* and not l.lexpr
1345
+ if var == pscopevar[l.rexpr]
1346
+ known_type[ce.rexpr, t]
1347
+ break
1348
+ elsif t.pointer?
1349
+ l = l.rexpr
1350
+ t = t.pointed
1351
+ else break
1352
+ end
1353
+ end
1354
+
1355
+ # int **x; **x = y => int y
1356
+ t = type
1357
+ r = ce.rexpr
1358
+ while r.kind_of? C::CExpression and r.op == :* and not r.lexpr
1359
+ if var == pscopevar[r.rexpr]
1360
+ known_type[ce.lexpr, t]
1361
+ break
1362
+ elsif t.pointer?
1363
+ r = r.rexpr
1364
+ t = t.pointed
1365
+ else break
1366
+ end
1367
+ end
1368
+
1369
+ # TODO int *x; *x = *y; ?
1370
+ }
1371
+ }
1372
+
1373
+ # put all those macros in use
1374
+ # use user-defined types first
1375
+ scope.symbol.each_value { |v|
1376
+ next if not v.kind_of? C::Variable or not v.stackoff or not t = scope.decompdata[:stackoff_type][v.stackoff]
1377
+ known_type[v, t]
1378
+ }
1379
+
1380
+ # try to infer types from C semantics
1381
+ later = []
1382
+ walk_ce(scope) { |ce|
1383
+ if ce.op == :'=' and ce.rexpr.kind_of? C::CExpression and (ce.rexpr.op == :funcall or (ce.rexpr.op == nil and ce.rexpr.rexpr.kind_of? ::Integer and
1384
+ ce.rexpr.rexpr.abs < 0x10000 and (not ce.lexpr.kind_of? C::CExpression or ce.lexpr.op != :'*' or ce.lexpr.lexpr)))
1385
+ # var = int
1386
+ known_type[ce.lexpr, ce.rexpr.type]
1387
+ elsif ce.op == :funcall
1388
+ f = ce.lexpr.type
1389
+ f = f.pointed if f.pointer?
1390
+ next if not f.kind_of? C::Function
1391
+ # cast func args to arg prototypes
1392
+ f.args.to_a.zip(ce.rexpr).each_with_index { |(proto, arg), i| ce.rexpr[i] = C::CExpression[arg, proto.type] ; known_type[arg, proto.type] }
1393
+ elsif ce.op == :* and not ce.lexpr
1394
+ if e = ce.rexpr and e.kind_of? C::CExpression and not e.op and e = e.rexpr and e.kind_of? C::CExpression and
1395
+ e.op == :& and not e.lexpr and e.rexpr.kind_of? C::Variable and e.rexpr.stackoff
1396
+ # skip *(__int32*)&var_12 for now, avoid saying var12 is an int if it may be a ptr or anything
1397
+ later << [ce.rexpr, C::Pointer.new(ce.type)]
1398
+ next
1399
+ end
1400
+ known_type[ce.rexpr, C::Pointer.new(ce.type)]
1401
+ elsif not ce.op and ce.type.pointer? and ce.type.pointed.kind_of? C::Function
1402
+ # cast to fptr: must be a fptr
1403
+ known_type[ce.rexpr, ce.type]
1404
+ end
1405
+ }
1406
+
1407
+ later.each { |ce, t| known_type[ce, t] }
1408
+
1409
+ # offsets have types now
1410
+ types.each { |v, t|
1411
+ # keep var type qualifiers
1412
+ q = scope.symbol[v].type.qualifier
1413
+ scope.symbol[v].type = t
1414
+ t.qualifier = q if q
1415
+ }
1416
+
1417
+
1418
+ # remove offsets to struct members
1419
+ # XXX this defeats antialiasing
1420
+ # off => [structoff, membername, membertype]
1421
+ memb = {}
1422
+ types.dup.each { |n, t|
1423
+ v = scope.symbol[n]
1424
+ next if not o = v.stackoff
1425
+ t = t.untypedef
1426
+ if t.kind_of? C::Struct
1427
+ t.members.to_a.each { |tm|
1428
+ moff = t.offsetof(@c_parser, tm.name)
1429
+ next if moff == 0
1430
+ types.delete_if { |vv, tt| scope.symbol[vv].stackoff == o+moff }
1431
+ memb[o+moff] = [v, tm.name, tm.type]
1432
+ }
1433
+ end
1434
+ }
1435
+
1436
+ # patch local variables into the CExprs, incl unknown offsets
1437
+ varat = lambda { |n|
1438
+ v = scope.symbol[n]
1439
+ if s = memb[v.stackoff]
1440
+ v = C::CExpression[s[0], :'.', s[1], s[2]]
1441
+ else
1442
+ v.type = types[n] || C::BaseType.new(:int)
1443
+ end
1444
+ v
1445
+ }
1446
+
1447
+ maycast = lambda { |v, e|
1448
+ if sizeof(v) != sizeof(e)
1449
+ v = C::CExpression[:*, [[:&, v], C::Pointer.new(e.type)]]
1450
+ end
1451
+ v
1452
+ }
1453
+ maycast_p = lambda { |v, e|
1454
+ if not e.type.pointer? or sizeof(v) != sizeof(nil, e.type.pointed)
1455
+ C::CExpression[[:&, v], e.type]
1456
+ else
1457
+ C::CExpression[:&, v]
1458
+ end
1459
+ }
1460
+
1461
+ walk_ce(scope, true) { |ce|
1462
+ case
1463
+ when ce.op == :funcall
1464
+ ce.rexpr.map! { |re|
1465
+ if o = scopevar[re]; C::CExpression[maycast[varat[o], re]]
1466
+ elsif o = pscopevar[re]; C::CExpression[maycast_p[varat[o], re]]
1467
+ else re
1468
+ end
1469
+ }
1470
+ when o = scopevar[ce.lexpr]; ce.lexpr = maycast[varat[o], ce.lexpr]
1471
+ when o = scopevar[ce.rexpr]; ce.rexpr = maycast[varat[o], ce.rexpr]
1472
+ ce.rexpr = C::CExpression[ce.rexpr] if not ce.op and ce.rexpr.kind_of? C::Variable
1473
+ when o = pscopevar[ce.lexpr]; ce.lexpr = maycast_p[varat[o], ce.lexpr]
1474
+ when o = pscopevar[ce.rexpr]; ce.rexpr = maycast_p[varat[o], ce.rexpr]
1475
+ when o = scopevar[ce]; ce.replace C::CExpression[maycast[varat[o], ce]]
1476
+ when o = pscopevar[ce]; ce.replace C::CExpression[maycast_p[varat[o], ce]]
1477
+ end
1478
+ }
1479
+
1480
+ fix_type_overlap(scope)
1481
+ fix_pointer_arithmetic(scope)
1482
+
1483
+ # if int32 var_4 is always var_4 & 255, change type to int8
1484
+ varuse = Hash.new(0)
1485
+ varandff = Hash.new(0)
1486
+ varandffff = Hash.new(0)
1487
+ walk_ce(scope) { |ce|
1488
+ if ce.op == :& and ce.lexpr.kind_of? C::Variable and ce.lexpr.type.integral? and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? ::Integer
1489
+ case ce.rexpr.rexpr
1490
+ when 0xff; varandff[ce.lexpr.name] += 1
1491
+ when 0xffff; varandffff[ce.lexpr.name] += 1
1492
+ end
1493
+ end
1494
+ varuse[ce.lexpr.name] += 1 if ce.lexpr.kind_of? C::Variable
1495
+ varuse[ce.rexpr.name] += 1 if ce.rexpr.kind_of? C::Variable
1496
+ }
1497
+ varandff.each { |k, v|
1498
+ scope.symbol[k].type = C::BaseType.new(:__int8, :unsigned) if varuse[k] == v
1499
+ }
1500
+ varandffff.each { |k, v|
1501
+ scope.symbol[k].type = C::BaseType.new(:__int16, :unsigned) if varuse[k] == v
1502
+ }
1503
+
1504
+ # propagate types to cexprs
1505
+ walk_ce(scope, true) { |ce|
1506
+ if ce.op
1507
+ ce.type = C::CExpression[ce.lexpr, ce.op, ce.rexpr].type rescue next
1508
+ if ce.op == :'=' and ce.rexpr.kind_of? C::Typed and ce.rexpr.type != ce.type and (not ce.rexpr.type.integral? or not ce.type.integral?)
1509
+ known_type[ce.rexpr, ce.type] if ce.type.pointer? and ce.type.pointed.untypedef.kind_of? C::Function # localvar = &struct with fptr
1510
+ ce.rexpr = C::CExpression[[ce.rexpr], ce.type]
1511
+ end
1512
+ elsif ce.type.pointer? and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :& and not ce.rexpr.lexpr and sizeof(ce.rexpr.rexpr.type) == sizeof(ce.type.pointed)
1513
+ ce.type = ce.rexpr.type
1514
+ end
1515
+ }
1516
+ end
1517
+
1518
+ # struct foo { int i; int j; struct { int k; int l; } m; }; bla+12 => &bla->m.l
1519
+ # st is a struct, ptr is an expr pointing to a struct, off is a numeric offset from ptr, msz is the size of the pointed member (nil ignored)
1520
+ def structoffset(st, ptr, off, msz)
1521
+ tabidx = off / sizeof(st)
1522
+ off -= tabidx * sizeof(st)
1523
+ ptr = C::CExpression[:&, [ptr, :'[]', [tabidx]]] if tabidx != 0 or ptr.type.untypedef.kind_of? C::Array
1524
+ return ptr if off == 0 and (not msz or # avoid infinite recursion with eg chained list
1525
+ (ptr.kind_of? C::CExpression and ((ptr.op == :& and not ptr.lexpr and s=ptr.rexpr) or (ptr.op == :'.' and s=ptr)) and
1526
+ not s.type.untypedef.kind_of? C::Union))
1527
+
1528
+ m_ptr = lambda { |m|
1529
+ if ptr.kind_of? C::CExpression and ptr.op == :& and not ptr.lexpr
1530
+ C::CExpression[ptr.rexpr, :'.', m.name]
1531
+ else
1532
+ C::CExpression[ptr, :'->', m.name]
1533
+ end
1534
+ }
1535
+
1536
+ # recursive proc to list all named members, including in anonymous substructs
1537
+ submemb = lambda { |sm| sm.name ? sm : sm.type.kind_of?(C::Union) ? sm.type.members.to_a.map { |ssm| submemb[ssm] } : nil }
1538
+ mbs = st.members.to_a.map { |m| submemb[m] }.flatten.compact
1539
+ mo = mbs.inject({}) { |h, m| h.update m => st.offsetof(@c_parser, m.name) }
1540
+
1541
+ if sm = mbs.find { |m| mo[m] == off and (not msz or sizeof(m) == msz) } ||
1542
+ mbs.find { |m| mo[m] <= off and mo[m]+sizeof(m) > off }
1543
+ off -= mo[sm]
1544
+ sst = sm.type.untypedef
1545
+ #return ptr if mo[sm] == 0 and sst.pointer? and sst.type.untypedef == st # TODO fix infinite recursion on mutually recursive ptrs
1546
+ ptr = C::CExpression[:&, m_ptr[sm]]
1547
+ if sst.kind_of? C::Union
1548
+ return structoffset(sst, ptr, off, msz)
1549
+ end
1550
+ end
1551
+
1552
+ if off != 0
1553
+ C::CExpression[[[ptr], C::Pointer.new(C::BaseType.new(:__int8))], :+, [off]]
1554
+ else
1555
+ ptr
1556
+ end
1557
+ end
1558
+
1559
+ # fix pointer arithmetic (eg int foo += 4 => int* foo += 1)
1560
+ # use struct member access (eg *(structptr+8) => structptr->bla)
1561
+ # must be run only once, right after type setting
1562
+ def fix_pointer_arithmetic(scope)
1563
+ walk_ce(scope, true) { |ce|
1564
+ if ce.lexpr and ce.lexpr.type.pointer? and [:&, :>>, :<<].include? ce.op
1565
+ ce.lexpr = C::CExpression[[ce.lexpr], C::BaseType.new(:int)]
1566
+ end
1567
+
1568
+ if ce.op == :+ and ce.lexpr and ((ce.lexpr.type.integral? and ce.rexpr.type.pointer?) or (ce.rexpr.type.pointer? and ce.rexpr.type.pointed.untypedef.kind_of? C::Union))
1569
+ ce.rexpr, ce.lexpr = ce.lexpr, ce.rexpr
1570
+ end
1571
+
1572
+ if ce.op == :* and not ce.lexpr and ce.rexpr.type.pointer? and ce.rexpr.type.pointed.untypedef.kind_of? C::Struct
1573
+ s = ce.rexpr.type.pointed.untypedef
1574
+ m = s.members.to_a.find { |m_| s.offsetof(@c_parser, m_.name) == 0 }
1575
+ if sizeof(m) != sizeof(ce)
1576
+ ce.rexpr = C::CExpression[[ce.rexpr, C::Pointer.new(s)], C::Pointer.new(ce.type)]
1577
+ next
1578
+ end
1579
+ # *structptr => structptr->member
1580
+ ce.lexpr = ce.rexpr
1581
+ ce.op = :'->'
1582
+ ce.rexpr = m.name
1583
+ ce.type = m.type
1584
+ next
1585
+ elsif ce.op == :'=' and ce.lexpr.type.untypedef.kind_of? C::Struct
1586
+ s = ce.lexpr.type.untypedef
1587
+ m = s.members.to_a.find { |m_| s.offsetof(@c_parser, m_.name) == 0 }
1588
+ ce.lexpr = C::CExpression.new(ce.lexpr, :'.', m.name, m.type)
1589
+ ce.type = m.type
1590
+ next
1591
+ end
1592
+
1593
+ if ce.op == :+ and ce.lexpr and ce.lexpr.type.pointer? and not ce.type.pointer?
1594
+ ce.type = ce.lexpr.type
1595
+ end
1596
+
1597
+ if ce.op == :& and not ce.lexpr and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :* and not ce.rexpr.lexpr
1598
+ ce.replace C::CExpression[ce.rexpr.rexpr]
1599
+ end
1600
+
1601
+ next if not ce.lexpr or not ce.lexpr.type.pointer?
1602
+ if ce.op == :+ and (s = ce.lexpr.type.pointed.untypedef).kind_of? C::Union and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and
1603
+ ce.rexpr.rexpr.kind_of? ::Integer and o = ce.rexpr.rexpr
1604
+ # structptr + 4 => &structptr->member
1605
+ ce.replace structoffset(s, ce.lexpr, o, nil)
1606
+ elsif [:+, :-, :'+=', :'-='].include? ce.op and ce.rexpr.kind_of? C::CExpression and ((not ce.rexpr.op and i = ce.rexpr.rexpr) or
1607
+ (ce.rexpr.op == :* and i = ce.rexpr.lexpr and ((i.kind_of? C::CExpression and not i.op and i = i.rexpr) or true))) and
1608
+ i.kind_of? ::Integer and psz = sizeof(nil, ce.lexpr.type.pointed) and i % psz == 0
1609
+ # ptr += 4 => ptr += 1
1610
+ if not ce.rexpr.op
1611
+ ce.rexpr.rexpr /= psz
1612
+ else
1613
+ ce.rexpr.lexpr.rexpr /= psz
1614
+ if ce.rexpr.lexpr.rexpr == 1
1615
+ ce.rexpr = ce.rexpr.rexpr
1616
+ end
1617
+ end
1618
+ ce.type = ce.lexpr.type
1619
+
1620
+ elsif (ce.op == :+ or ce.op == :-) and sizeof(nil, ce.lexpr.type.pointed) != 1
1621
+ # ptr+x => (ptrtype*)(((__int8*)ptr)+x)
1622
+ # XXX create struct ?
1623
+ ce.rexpr = C::CExpression[ce.rexpr, C::BaseType.new(:int)] if not ce.rexpr.type.integral?
1624
+ if sizeof(nil, ce.lexpr.type.pointed) != 1
1625
+ ptype = ce.lexpr.type
1626
+ p = C::CExpression[[ce.lexpr], C::Pointer.new(C::BaseType.new(:__int8))]
1627
+ ce.replace C::CExpression[[p, ce.op, ce.rexpr, p.type], ptype]
1628
+ end
1629
+ end
1630
+ }
1631
+ end
1632
+
1633
+ # handling of var overlapping (eg __int32 var_10; __int8 var_F => replace all var_F by *(&var_10 + 1))
1634
+ # must be done before fix_pointer_arithmetic
1635
+ def fix_type_overlap(scope)
1636
+ varinfo = {}
1637
+ scope.symbol.each_value { |var|
1638
+ next if not off = var.stackoff
1639
+ len = sizeof(var)
1640
+ varinfo[var] = [off, len]
1641
+ }
1642
+
1643
+ varinfo.each { |v1, (o1, l1)|
1644
+ next if not v1.type.integral?
1645
+ varinfo.each { |v2, (o2, l2)|
1646
+ # XXX o1 may overlap o2 AND another (int32 v_10; int32 v_E; int32 v_C;)
1647
+ # TODO should check stuff with aliasing domains
1648
+ next if v1.name == v2.name or o1 >= o2+l2 or o1+l1 <= o2 or l1 > l2 or (l2 == l1 and o2 >= o1)
1649
+ # v1 => *(&v2+delta)
1650
+ p = C::CExpression[:&, v2]
1651
+ p = C::CExpression[p, :+, [o1-o2]]
1652
+ p = C::CExpression[p, C::Pointer.new(v1.type)] if v1.type != p.type.type
1653
+ p = C::CExpression[:*, p]
1654
+ walk_ce(scope) { |ce|
1655
+ ce.lexpr = p if ce.lexpr == v1
1656
+ ce.rexpr = p if ce.rexpr == v1
1657
+ }
1658
+ }
1659
+
1660
+ }
1661
+ end
1662
+
1663
+ # to be run with scope = function body with only CExpr/Decl/Label/Goto/IfGoto/Return, with correct variables types
1664
+ # will transform += 1 to ++, inline them to prev/next statement ('++x; if (x)..' => 'if (++x)..')
1665
+ # remove useless variables ('int i;', i never used or 'i = 1; j = i;', i never read after => 'j = 1;')
1666
+ # remove useless casts ('(int)i' with 'int i;' => 'i')
1667
+ def optimize(scope)
1668
+ optimize_code(scope)
1669
+ optimize_vars(scope)
1670
+ optimize_vars(scope) # 1st run may transform i = i+1 into i++ which second run may coalesce into if(i)
1671
+ end
1672
+
1673
+ # simplify cexpressions (char & 255, redundant casts, etc)
1674
+ def optimize_code(scope)
1675
+ return if forbid_optimize_code
1676
+
1677
+ sametype = lambda { |t1, t2|
1678
+ t1 = t1.untypedef
1679
+ t2 = t2.untypedef
1680
+ t1 = t1.pointed.untypedef if t1.pointer? and t1.pointed.untypedef.kind_of? C::Function
1681
+ t2 = t2.pointed.untypedef if t2.pointer? and t2.pointed.untypedef.kind_of? C::Function
1682
+ t1 == t2 or
1683
+ (t1.kind_of? C::Function and t2.kind_of? C::Function and sametype[t1.type, t2.type] and t1.args.to_a.length == t2.args.to_a.length and
1684
+ t1.args.to_a.zip(t2.args.to_a).all? { |st1, st2| sametype[st1.type, st2.type] }) or
1685
+ (t1.kind_of? C::BaseType and t1.integral? and t2.kind_of? C::BaseType and t2.integral? and sizeof(nil, t1) == sizeof(nil, t2)) or
1686
+ (t1.pointer? and t2.pointer? and sametype[t1.type, t2.type])
1687
+ }
1688
+
1689
+ # most of this is a CExpr#reduce
1690
+ future_array = []
1691
+ walk_ce(scope, true) { |ce|
1692
+ # (whatever)0 => 0
1693
+ if not ce.op and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 0
1694
+ ce.replace ce.rexpr
1695
+ end
1696
+
1697
+ # *&bla => bla if types ok
1698
+ if ce.op == :* and not ce.lexpr and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :& and not ce.rexpr.lexpr and sametype[ce.rexpr.type.pointed, ce.rexpr.rexpr.type]
1699
+ ce.replace C::CExpression[ce.rexpr.rexpr]
1700
+ end
1701
+
1702
+ # int x + 0xffffffff -> x-1
1703
+ if ce.lexpr and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and [:+, :-, :'+=', :'-=', :'!=', :==, :>, :<, :>=, :<=].include? ce.op and
1704
+ ce.rexpr.rexpr == (1 << (8*sizeof(ce.lexpr)))-1
1705
+ ce.op = {:+ => :-, :- => :+, :'+=' => :'-=', :'-=' => :'+='}[ce.op]
1706
+ ce.rexpr.rexpr = 1
1707
+ end
1708
+
1709
+ # int *ptr; *(ptr + 4) => ptr[4]
1710
+ if ce.op == :* and not ce.lexpr and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :+ and var = ce.rexpr.lexpr and var.kind_of? C::Variable and var.type.pointer?
1711
+ ce.lexpr, ce.op, ce.rexpr = ce.rexpr.lexpr, :'[]', ce.rexpr.rexpr
1712
+ future_array << var.name
1713
+ end
1714
+
1715
+ # char x; x & 255 => x
1716
+ if ce.op == :& and ce.lexpr and (ce.lexpr.type.integral? or ce.lexpr.type.pointer?) and ce.rexpr.kind_of? C::CExpression and
1717
+ not ce.rexpr.op and ce.rexpr.rexpr.kind_of? ::Integer and m = (1 << (8*sizeof(ce.lexpr))) - 1 and
1718
+ ce.rexpr.rexpr & m == m
1719
+ ce.replace C::CExpression[ce.lexpr]
1720
+ end
1721
+
1722
+ # a + -b => a - b
1723
+ if ce.op == :+ and ce.lexpr and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :- and not ce.rexpr.lexpr
1724
+ ce.op, ce.rexpr = :-, ce.rexpr.rexpr
1725
+ end
1726
+
1727
+ # (((int) i >> 31) & 1) => i < 0
1728
+ if ce.op == :& and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 1 and
1729
+ ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == :>> and ce.lexpr.rexpr.kind_of? C::CExpression and
1730
+ not ce.lexpr.rexpr.op and ce.lexpr.rexpr.rexpr == sizeof(ce.lexpr.lexpr) * 8 - 1
1731
+ ce.replace C::CExpression[ce.lexpr.lexpr, :<, [0]]
1732
+ end
1733
+
1734
+ # a-b == 0 => a == b
1735
+ if ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 0 and [:==, :'!=', :<, :>, :<=, :>=].include? ce.op and
1736
+ ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == :- and ce.lexpr.lexpr
1737
+ ce.lexpr, ce.rexpr = ce.lexpr.lexpr, ce.lexpr.rexpr
1738
+ end
1739
+
1740
+ # (a > 0) != 0
1741
+ if ce.op == :'!=' and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 0 and ce.lexpr.kind_of? C::CExpression and
1742
+ [:<, :<=, :>, :>=, :'==', :'!=', :'!'].include? ce.lexpr.op
1743
+ ce.replace ce.lexpr
1744
+ end
1745
+
1746
+ # (a < b) != ( [(a < 0) == !(b < 0)] && [(a < 0) != (a < b)] ) => jl
1747
+ # a<b => true if !r => a<0 == b<0 or a>=0 => a>=0 or b>=0
1748
+ # a>=b => true if r => a<0 == b>=0 and a<0 => a<0 and b>=0
1749
+
1750
+ # x != (a && (b != x)) => [x && (!a || b)] || [!x && !(!a || b)]
1751
+ if ce.op == :'!=' and ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == :< and ce.rexpr.kind_of? C::CExpression and
1752
+ ce.rexpr.op == :'&&' and ce.rexpr.rexpr.kind_of? C::CExpression and ce.rexpr.rexpr.op == :'!=' and
1753
+ ce.rexpr.rexpr.rexpr == ce.lexpr and not walk_ce(ce) { |ce_| break true if ce_.op == :funcall }
1754
+ x, a, b = ce.lexpr, ce.rexpr.lexpr, ce.rexpr.rexpr.lexpr
1755
+ ce.replace C::CExpression[ [x, :'&&', [[:'!',a],:'||',b]] , :'||', [[:'!', x], :'&&', [:'!', [[:'!',a],:'||',b]]] ]
1756
+ optimize_code(ce)
1757
+ end
1758
+ # (a != b) || a => a || b
1759
+ if ce.op == :'||' and ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == :'!=' and ce.lexpr.lexpr == ce.rexpr and not walk_ce(ce) { |ce_| break true if ce_.op == :funcall }
1760
+ ce.lexpr, ce.rexpr = ce.rexpr, ce.lexpr.rexpr
1761
+ optimize_code(ce)
1762
+ end
1763
+ # (a<b) && !(a>=0 && b<0) || (a>=b) && (a>=0 && b<0) => (signed)a < (signed)b
1764
+ if ce.op == :'||' and ce.lexpr.kind_of? C::CExpression and ce.rexpr.kind_of? C::CExpression and ce.lexpr.op == :'&&' and ce.rexpr.op == :'&&' and
1765
+ ce.lexpr.lexpr.kind_of? C::CExpression and ce.lexpr.lexpr.op == :<
1766
+ a, b = ce.lexpr.lexpr.lexpr, ce.lexpr.lexpr.rexpr
1767
+ if ce.lexpr.rexpr === C::CExpression[[a, :'>=', [0]], :'&&', [b, :'<', [0]]].negate and
1768
+ ce.rexpr.lexpr === ce.lexpr.lexpr.negate and ce.rexpr.rexpr === ce.lexpr.rexpr.negate
1769
+ ce.replace C::CExpression[a, :'<', b]
1770
+ end
1771
+ end
1772
+ # a && 1
1773
+ if (ce.op == :'||' or ce.op == :'&&') and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? ::Integer
1774
+ if ((ce.op == :'||' and ce.rexpr.rexpr == 0) or (ce.op == :'&&' and ce.rexpr.rexpr != 0))
1775
+ ce.replace C::CExpression[ce.lexpr]
1776
+ elsif not walk_ce(ce) { |ce_| break true if ce.op == :funcall } # cannot wipe if sideeffect
1777
+ ce.replace C::CExpression[[ce.op == :'||' ? 1 : 0]]
1778
+ end
1779
+ end
1780
+ # (b < c || b >= c)
1781
+ if (ce.op == :'||' or ce.op == :'&&') and C::CExpression.negate(ce.lexpr) == C::CExpression[ce.rexpr]
1782
+ ce.replace C::CExpression[[(ce.op == :'||') ? 1 : 0]]
1783
+ end
1784
+
1785
+ # (a < b) | (a == b) => a <= b
1786
+ if ce.op == :| and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :== and ce.lexpr.kind_of? C::CExpression and
1787
+ (ce.lexpr.op == :< or ce.lexpr.op == :>) and ce.lexpr.lexpr == ce.rexpr.lexpr and ce.lexpr.rexpr == ce.rexpr.rexpr
1788
+ ce.op = {:< => :<=, :> => :>=}[ce.lexpr.op]
1789
+ ce.lexpr, ce.rexpr = ce.lexpr.lexpr, ce.lexpr.rexpr
1790
+ end
1791
+
1792
+ # a == 0 => !a
1793
+ if ce.op == :== and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 0
1794
+ ce.lexpr, ce.op, ce.rexpr = nil, :'!', ce.lexpr
1795
+ end
1796
+
1797
+ if ce.op == :'!' and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? ::Integer
1798
+ ce.replace C::CExpression[[ce.rexpr.rexpr == 0 ? 1 : 0]]
1799
+ end
1800
+
1801
+ # !(bool) => bool
1802
+ if ce.op == :'!' and ce.rexpr.kind_of? C::CExpression and [:'==', :'!=', :<, :>, :<=, :>=, :'||', :'&&', :'!'].include? ce.rexpr.op
1803
+ ce.replace ce.rexpr.negate
1804
+ end
1805
+
1806
+ # (foo)(bar)x => (foo)x
1807
+ if not ce.op and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? C::CExpression
1808
+ ce.rexpr = ce.rexpr.rexpr
1809
+ end
1810
+
1811
+ # &struct.1stmember => &struct
1812
+ if ce.op == :& and not ce.lexpr and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :'.' and s = ce.rexpr.lexpr.type and
1813
+ s.kind_of? C::Union and s.offsetof(@c_parser, ce.rexpr.rexpr) == 0
1814
+ ce.rexpr = ce.rexpr.lexpr
1815
+ ce.type = C::Pointer.new(ce.rexpr.type)
1816
+ end
1817
+
1818
+ # (1stmember*)structptr => &structptr->1stmember
1819
+ if not ce.op and ce.type.pointer? and not ce.type.pointed.void? and ce.rexpr.kind_of? C::Typed and ce.rexpr.type.pointer? and
1820
+ s = ce.rexpr.type.pointed.untypedef and s.kind_of? C::Union and ce.type.pointed.untypedef != s
1821
+ ce.rexpr = C::CExpression[structoffset(s, ce.rexpr, 0, sizeof(ce.type.pointed))]
1822
+ #ce.replace ce.rexpr if not ce.type.pointed.untypedef.kind_of? C::Function or (ce.rexpr.type.pointer? and
1823
+ #ce.rexpr.type.pointed.untypedef.kind_of? C::Function) # XXX ugly
1824
+ # int32* v1 = (int32*)pstruct;
1825
+ # z = v1+4 if v1 is not cast, the + is invalid (sizeof pointed changes)
1826
+ # TODO when finding type of pstruct, set type of v1 accordingly
1827
+ end
1828
+
1829
+ # (&foo)->bar => foo.bar
1830
+ if ce.op == :'->' and ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == :& and not ce.lexpr.lexpr
1831
+ ce.lexpr = ce.lexpr.rexpr
1832
+ ce.op = :'.'
1833
+ end
1834
+
1835
+ # (foo)bla => bla if bla of type foo
1836
+ if not ce.op and ce.rexpr.kind_of? C::Typed and sametype[ce.type, ce.rexpr.type]
1837
+ ce.replace C::CExpression[ce.rexpr]
1838
+ end
1839
+ if ce.lexpr.kind_of? C::CExpression and not ce.lexpr.op and ce.lexpr.rexpr.kind_of? C::Variable and ce.lexpr.type == ce.lexpr.rexpr.type
1840
+ ce.lexpr = ce.lexpr.rexpr
1841
+ end
1842
+
1843
+ if ce.op == :'=' and ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == :* and not ce.lexpr.lexpr and ce.lexpr.rexpr.kind_of? C::CExpression and
1844
+ not ce.lexpr.rexpr.op and ce.lexpr.rexpr.type.pointer? and ce.lexpr.rexpr.type.pointed != ce.rexpr.type
1845
+ ce.lexpr.rexpr.type = C::Pointer.new(ce.rexpr.type)
1846
+ optimize_code(ce.lexpr)
1847
+ end
1848
+ }
1849
+
1850
+ # if there is a ptr[4], change all *ptr to ptr[0] for consistency
1851
+ # do this after the first pass, which may change &*ptr to ptr
1852
+ walk_ce(scope) { |ce|
1853
+ if ce.op == :* and not ce.lexpr and ce.rexpr.kind_of? C::Variable and future_array.include? ce.rexpr.name
1854
+ ce.lexpr, ce.op, ce.rexpr = ce.rexpr, :'[]', C::CExpression[0]
1855
+ end
1856
+ } if not future_array.empty?
1857
+
1858
+ # if (x != 0) => if (x)
1859
+ walk(scope) { |st|
1860
+ if st.kind_of? C::If and st.test.kind_of? C::CExpression and st.test.op == :'!=' and
1861
+ st.test.rexpr.kind_of? C::CExpression and not st.test.rexpr.op and st.test.rexpr.rexpr == 0
1862
+ st.test = C::CExpression[st.test.lexpr]
1863
+ end
1864
+ }
1865
+ end
1866
+
1867
+ # checks if an expr has sideeffects (funcall, var assignment, mem dereference, use var out of scope if specified)
1868
+ def sideeffect(exp, scope=nil)
1869
+ case exp
1870
+ when nil, ::Numeric, ::String; false
1871
+ when ::Array; exp.any? { |_e| sideeffect _e, scope }
1872
+ when C::Variable; (scope and not scope.symbol[exp.name]) or exp.type.qualifier.to_a.include? :volatile
1873
+ when C::CExpression; (exp.op == :* and not exp.lexpr) or exp.op == :funcall or AssignOp.include?(exp.op) or
1874
+ sideeffect(exp.lexpr, scope) or sideeffect(exp.rexpr, scope)
1875
+ else true # failsafe
1876
+ end
1877
+ end
1878
+
1879
+ # converts C code to a graph of cexprs (nodes = cexprs, edges = codepaths)
1880
+ # returns a CGraph
1881
+ class CGraph
1882
+ # exprs: label => [exprs], to: label => [labels], block: label => are exprs standalone (vs If#test), start: 1st label
1883
+ attr_accessor :exprs, :to, :block, :start, :to_optim, :from_optim
1884
+ end
1885
+ def c_to_graph(st)
1886
+ g = CGraph.new
1887
+ g.exprs = {} # label => [exprs]
1888
+ g.to = {} # label => [labels]
1889
+ g.block = {} # label => is label in a block? (vs If#test)
1890
+ anon_label = 0 # when no label is there, use anon_label++
1891
+ # converts C code to a graph of codepath of cexprs
1892
+ to_graph = lambda { |stmt, l_cur, l_after, l_cont, l_break|
1893
+ case stmt
1894
+ when C::Label; g.to[l_cur] = [stmt.name] ; g.to[stmt.name] = [l_after]
1895
+ when C::Goto; g.to[l_cur] = [stmt.target]
1896
+ when C::Continue; g.to[l_cur] = [l_cont]
1897
+ when C::Break; g.to[l_cur] = [l_break]
1898
+ when C::CExpression
1899
+ g.exprs[l_cur] = [stmt]
1900
+ g.to[l_cur] = [l_after]
1901
+ when C::Return
1902
+ g.exprs[l_cur] = [stmt.value] if stmt.value
1903
+ g.to[l_cur] = []
1904
+ when C::Block
1905
+ to_graph[stmt.statements, l_cur, l_after, l_cont, l_break]
1906
+ when ::Array
1907
+ g.exprs[l_cur] = []
1908
+ g.block[l_cur] = true
1909
+ stmt.each_with_index { |s, i|
1910
+ case s
1911
+ when C::Declaration
1912
+ when C::CExpression
1913
+ g.exprs[l_cur] << s
1914
+ else
1915
+ l = anon_label += 1
1916
+ ll = anon_label += 1
1917
+ g.to[l_cur] = [l]
1918
+ g.block[l_cur] = true
1919
+ to_graph[stmt[i], l, ll, l_cont, l_break]
1920
+ l_cur = ll
1921
+ g.exprs[l_cur] = []
1922
+ end
1923
+ }
1924
+ g.to[l_cur] = [l_after].compact
1925
+ when C::If
1926
+ g.exprs[l_cur] = [stmt.test]
1927
+ lt = anon_label += 1
1928
+ to_graph[stmt.bthen, lt, l_after, l_cont, l_break]
1929
+ le = anon_label += 1
1930
+ to_graph[stmt.belse, le, l_after, l_cont, l_break]
1931
+ g.to[l_cur] = [lt, le]
1932
+ when C::While, C::DoWhile
1933
+ la = anon_label += 1
1934
+ if stmt.kind_of? C::DoWhile
1935
+ lt, lb = la, l_cur
1936
+ else
1937
+ lt, lb = l_cur, la
1938
+ end
1939
+ g.exprs[lt] = [stmt.test]
1940
+ g.to[lt] = [lb, l_after]
1941
+ to_graph[stmt.body, lb, lt, lt, l_after]
1942
+ when C::Asm, nil; g.to[l_cur] = [l_after]
1943
+ else puts "to_graph unhandled #{stmt.class}: #{stmt}" if $VERBOSE
1944
+ end
1945
+ }
1946
+
1947
+ g.start = anon_label
1948
+ to_graph[st, g.start, nil, nil, nil]
1949
+
1950
+ # optimize graph
1951
+ g.to_optim = {}
1952
+ g.to.each { |k, v| g.to_optim[k] = v.uniq }
1953
+ g.exprs.delete_if { |k, v| v == [] }
1954
+ g.to_optim.delete_if { |k, v|
1955
+ if v.length == 1 and not g.exprs[k] and v != [k]
1956
+ g.to_optim.each_value { |t| if i = t.index(k) ; t[i] = v.first ; end }
1957
+ true
1958
+ elsif v.length == 0 and not g.exprs[k]
1959
+ g.to_optim.each_value { |t| t.delete k }
1960
+ true
1961
+ end
1962
+ }
1963
+
1964
+ g.from_optim = {}
1965
+ g.to_optim.each { |k, v| v.each { |t| (g.from_optim[t] ||= []) << k } }
1966
+
1967
+ g
1968
+ end
1969
+
1970
+ # dataflow optimization
1971
+ # condenses expressions (++x; if (x) => if (++x))
1972
+ # remove local var assignment (x = 1; f(x); x = 2; g(x); => f(1); g(2); etc)
1973
+ def optimize_vars(scope)
1974
+ return if forbid_optimize_dataflow
1975
+
1976
+ g = c_to_graph(scope)
1977
+
1978
+ # walks a cexpr in evaluation order (not strictly, but this is not strictly defined anyway..)
1979
+ # returns the first subexpr to read var in ce
1980
+ # returns :write if var is rewritten
1981
+ # returns nil if var not read
1982
+ # may return a cexpr var += 2
1983
+ find_next_read_ce = lambda { |ce_, var|
1984
+ walk_ce(ce_, true) { |ce|
1985
+ case ce.op
1986
+ when :funcall
1987
+ break ce if ce.lexpr == var or ce.rexpr.find { |a| a == var }
1988
+ when :'='
1989
+ # a=a / a=a+1 => yield a, not :write
1990
+ break ce if ce.rexpr == var
1991
+ break :write if ce.lexpr == var
1992
+ else
1993
+ break ce if ce.lexpr == var or ce.rexpr == var
1994
+ end
1995
+ }
1996
+ }
1997
+
1998
+ # badlabels is a list of labels that may be reached without passing through the first invocation block
1999
+ find_next_read_rec = lambda { |label, idx, var, done, badlabels|
2000
+ next if done.include? label
2001
+ done << label if idx == 0
2002
+
2003
+ idx += 1 while ce = g.exprs[label].to_a[idx] and not ret = find_next_read_ce[ce, var]
2004
+ next ret if ret
2005
+
2006
+ to = g.to_optim[label].to_a.map { |t|
2007
+ break [:split] if badlabels.include? t
2008
+ find_next_read_rec[t, 0, var, done, badlabels]
2009
+ }.compact
2010
+
2011
+ tw = to - [:write]
2012
+ if to.include? :split or tw.length > 1
2013
+ :split
2014
+ elsif tw.length == 1
2015
+ tw.first
2016
+ elsif to.include? :write
2017
+ :write
2018
+ end
2019
+ }
2020
+ # return the previous subexpr reading var with no fwd path to another reading (otherwise split), see loop comment for reason
2021
+ find_next_read = nil
2022
+ find_prev_read_rec = lambda { |label, idx, var, done|
2023
+ next if done.include? label
2024
+ done << label if idx == g.exprs[label].length-1
2025
+
2026
+ idx -= 1 while idx >= 0 and ce = g.exprs[label].to_a[idx] and not ret = find_next_read_ce[ce, var]
2027
+ if ret.kind_of? C::CExpression
2028
+ fwchk = find_next_read[label, idx+1, var]
2029
+ ret = fwchk if not fwchk.kind_of? C::CExpression
2030
+ end
2031
+ next ret if ret
2032
+
2033
+ from = g.from_optim[label].to_a.map { |f|
2034
+ find_prev_read_rec[f, g.exprs[f].to_a.length-1, var, done]
2035
+ }.compact
2036
+
2037
+ next :split if from.include? :split
2038
+ fw = from - [:write]
2039
+ if fw.length == 1
2040
+ fw.first
2041
+ elsif fw.length > 1
2042
+ :split
2043
+ elsif from.include? :write
2044
+ :write
2045
+ end
2046
+ }
2047
+
2048
+ # list of labels reachable without using a label
2049
+ badlab = {}
2050
+ build_badlabel = lambda { |label|
2051
+ next if badlab[label]
2052
+ badlab[label] = []
2053
+ todo = [g.start]
2054
+ while l = todo.pop
2055
+ next if l == label or badlab[label].include? l
2056
+ badlab[label] << l
2057
+ todo.concat g.to_optim[l].to_a
2058
+ end
2059
+ }
2060
+
2061
+ # returns the next subexpr where var is read
2062
+ # returns :write if var is written before being read
2063
+ # returns :split if the codepath splits with both subpath reading or codepath merges with another
2064
+ # returns nil if var is never read
2065
+ # idx is the index of the first cexpr at g.exprs[label] to look at
2066
+ find_next_read = lambda { |label, idx, var|
2067
+ find_next_read_rec[label, idx, var, [], []]
2068
+ }
2069
+ find_prev_read = lambda { |label, idx, var|
2070
+ find_prev_read_rec[label, idx, var, []]
2071
+ }
2072
+ # same as find_next_read, but returns :split if there exist a path from g.start to the read without passing through label
2073
+ find_next_read_bl = lambda { |label, idx, var|
2074
+ build_badlabel[label]
2075
+ find_next_read_rec[label, idx, var, [], badlab[label]]
2076
+ }
2077
+
2078
+ # walk each node, optimize data accesses there
2079
+ # replace no longer useful exprs with CExpr[nil, nil, nil], those are wiped later.
2080
+ g.exprs.each { |label, exprs|
2081
+ next if not g.block[label]
2082
+ i = 0
2083
+ while i < exprs.length
2084
+ e = exprs[i]
2085
+ i += 1
2086
+
2087
+ # TODO x = x + 1 => x += 1 => ++x here, move all other optimizations after (in optim_code)
2088
+ # needs also int & 0xffffffff -> int, *&var etc (decomp_type? optim_type?)
2089
+ if (e.op == :'++' or e.op == :'--') and v = (e.lexpr || e.rexpr) and v.kind_of? C::Variable and
2090
+ scope.symbol[v.name] and not v.type.qualifier.to_a.include? :volatile
2091
+ next if !((pos = :post.to_sym) and (oe = find_next_read_bl[label, i, v]) and oe.kind_of? C::CExpression) and
2092
+ !((pos = :prev.to_sym) and (oe = find_prev_read[label, i-2, v]) and oe.kind_of? C::CExpression)
2093
+ next if oe.op == :& and not oe.lexpr # no &(++eax)
2094
+
2095
+ # merge pre/postincrement into next/prev var usage
2096
+ # find_prev_read must fwd check when it finds something, to avoid
2097
+ # while(x) x++; return x; to be converted to while(x++); return x; (return wrong value)
2098
+ case oe.op
2099
+ when e.op
2100
+ # bla(i--); --i bla(--i); --i ++i; bla(i++) => ignore
2101
+ next if pos == :pre or oe.lexpr
2102
+ # ++i; bla(++i) => bla(i += 2)
2103
+ oe.lexpr = oe.rexpr
2104
+ oe.op = ((oe.op == :'++') ? :'+=' : :'-=')
2105
+ oe.rexpr = C::CExpression[2]
2106
+
2107
+ when :'++', :'--' # opposite of e.op
2108
+ if (pos == :post and not oe.lexpr) or (pos == :pre and not oe.rexpr)
2109
+ # ++i; bla(--i) => bla(i)
2110
+ # bla(i--); ++i => bla(i)
2111
+ oe.op = nil
2112
+ elsif pos == :post
2113
+ # ++i; bla(i--) => bla(i+1)
2114
+ oe.op = ((oe.op == :'++') ? :- : :+)
2115
+ oe.rexpr = C::CExpression[1]
2116
+ elsif pos == :pre
2117
+ # bla(--i); ++i => bla(i-1)
2118
+ oe.lexpr = oe.rexpr
2119
+ oe.op = ((oe.op == :'++') ? :+ : :-)
2120
+ oe.rexpr = C::CExpression[1]
2121
+ end
2122
+ when :'+=', :'-='
2123
+ # TODO i++; i += 4 => i += 5
2124
+ next
2125
+ when *AssignOp
2126
+ next # ++i; i |= 4 => ignore
2127
+ else
2128
+ if pos == :post and v == oe.lexpr; oe.lexpr = C::CExpression[e.op, v]
2129
+ elsif pos == :post and v == oe.rexpr; oe.rexpr = C::CExpression[e.op, v]
2130
+ elsif pos == :prev and v == oe.rexpr; oe.rexpr = C::CExpression[v, e.op]
2131
+ elsif pos == :prev and v == oe.lexpr; oe.lexpr = C::CExpression[v, e.op]
2132
+ else raise 'foobar' # find_dir_read failed
2133
+ end
2134
+ end
2135
+
2136
+ i -= 1
2137
+ exprs.delete_at(i)
2138
+ e.lexpr = e.op = e.rexpr = nil
2139
+
2140
+
2141
+ elsif e.op == :'=' and v = e.lexpr and v.kind_of? C::Variable and scope.symbol[v.name] and
2142
+ not v.type.qualifier.to_a.include? :volatile and not find_next_read_ce[e.rexpr, v]
2143
+
2144
+ # reduce trivial static assignments
2145
+ if (e.rexpr.kind_of? C::CExpression and iv = e.rexpr.reduce(@c_parser) and iv.kind_of? ::Integer) or
2146
+ (e.rexpr.kind_of? C::CExpression and e.rexpr.op == :& and not e.rexpr.lexpr and e.rexpr.lexpr.kind_of? C::Variable) or
2147
+ (e.rexpr.kind_of? C::Variable and e.rexpr.type.kind_of? C::Array)
2148
+ rewritten = false
2149
+ readers = []
2150
+ discard = [e]
2151
+ g.exprs.each { |l, el|
2152
+ el.each_with_index { |ce, ci|
2153
+ if ce_write(ce, v) and [label, i-1] != [l, ci]
2154
+ if ce == e
2155
+ discard << ce
2156
+ else
2157
+ rewritten = true
2158
+ break
2159
+ end
2160
+ elsif ce_read(ce, v)
2161
+ if walk_ce(ce) { |_ce| break true if _ce.op == :& and not _ce.lexpr and _ce.rexpr == v }
2162
+ # i = 2 ; j = &i =!> j = &2
2163
+ rewritten = true
2164
+ break
2165
+ end
2166
+ readers << ce
2167
+ end
2168
+ } if not rewritten
2169
+ }
2170
+ if not rewritten
2171
+ ce_patch(readers, v, C::CExpression[iv || e.rexpr])
2172
+ discard.each { |d| d.lexpr = d.op = d.rexpr = nil }
2173
+ next
2174
+ end
2175
+ end
2176
+
2177
+ case nr = find_next_read[label, i, v]
2178
+ when C::CExpression
2179
+ # read in one place only, try to patch rexpr in there
2180
+ r = e.rexpr
2181
+
2182
+ # must check for conflicts (x = y; y += 1; foo(x) =!> foo(y))
2183
+ # XXX x = a[1]; *(a+1) = 28; foo(x)...
2184
+ isfunc = false
2185
+ depend_vars = []
2186
+ walk_ce(C::CExpression[r]) { |ce|
2187
+ isfunc = true if ce.op == :func and (not ce.lexpr.kind_of? C::Variable or
2188
+ not ce.lexpr.has_attribute('pure')) # XXX is there a C attr for func depending only on staticvars+param ?
2189
+ depend_vars << ce.lexpr if ce.lexpr.kind_of? C::Variable
2190
+ depend_vars << ce.rexpr if ce.rexpr.kind_of? C::Variable and (ce.lexpr or ce.op != :&) # a = &v; v = 12; func(a) => func(&v)
2191
+ depend_vars << ce if ce.lvalue?
2192
+ depend_vars.concat(ce.rexpr.grep(C::Variable)) if ce.rexpr.kind_of? ::Array
2193
+ }
2194
+ depend_vars.uniq!
2195
+
2196
+ # XXX x = 1; if () { x = 2; } foo(x) =!> foo(1) (find_next_read will return this)
2197
+ # we'll just redo a find_next_read like
2198
+ # XXX b = &a; a = 1; *b = 2; foo(a) unhandled & generate bad C
2199
+ l_l = label
2200
+ l_i = i
2201
+ while g.exprs[l_l].to_a.each_with_index { |ce_, n_i|
2202
+ next if n_i < l_i
2203
+ # count occurences of read v in ce_
2204
+ cnt = 0
2205
+ bad = false
2206
+ walk_ce(ce_) { |ce|
2207
+ case ce.op
2208
+ when :funcall
2209
+ bad = true if isfunc
2210
+ ce.rexpr.each { |a| cnt += 1 if a == v }
2211
+ cnt += 1 if ce.lexpr == v
2212
+ when :'='
2213
+ bad = true if depend_vars.include? ce.lexpr
2214
+ cnt += 1 if ce.rexpr == v
2215
+ else
2216
+ bad = true if (ce.op == :'++' or ce.op == :'--') and depend_vars.include? ce.rexpr
2217
+ bad = true if AssignOp.include? ce.op and depend_vars.include? ce.lexpr
2218
+ cnt += 1 if ce.lexpr == v
2219
+ cnt += 1 if ce.rexpr == v
2220
+ end
2221
+ }
2222
+ case cnt
2223
+ when 0
2224
+ break if bad
2225
+ next
2226
+ when 1 # good
2227
+ break if e.complexity > 10 and ce_.complexity > 3 # try to keep the C readable
2228
+ # x = 1; y = x; z = x; => cannot suppress x
2229
+ nr = find_next_read[l_l, n_i+1, v]
2230
+ break if (nr.kind_of? C::CExpression or nr == :split) and not walk_ce(ce_) { |ce| break true if ce.op == :'=' and ce.lexpr == v }
2231
+ else break # a = 1; b = a + a => fail
2232
+ end
2233
+
2234
+ # TODO XXX x = 1; y = x; z = x;
2235
+ res = walk_ce(ce_, true) { |ce|
2236
+ case ce.op
2237
+ when :funcall
2238
+ if ce.rexpr.to_a.each_with_index { |a,i_|
2239
+ next if a != v
2240
+ ce.rexpr[i_] = r
2241
+ break :done
2242
+ } == :done
2243
+ break :done
2244
+ elsif ce.lexpr == v
2245
+ ce.lexpr = r
2246
+ break :done
2247
+ elsif isfunc
2248
+ break :fail
2249
+ end
2250
+ when *AssignOp
2251
+ break :fail if not ce.lexpr and depend_vars.include? ce.rexpr # ++depend
2252
+ if ce.rexpr == v
2253
+ ce.rexpr = r
2254
+ break :done
2255
+ elsif ce.lexpr == v or depend_vars.include? ce.lexpr
2256
+ break :fail
2257
+ end
2258
+ else
2259
+ break :fail if ce.op == :& and not ce.lexpr and ce.rexpr == v
2260
+ if ce.lexpr == v
2261
+ ce.lexpr = r
2262
+ break :done
2263
+ elsif ce.rexpr == v
2264
+ ce_.type = r.type if not ce_.op and ce_.rexpr == v # return (int32)eax
2265
+ ce.rexpr = r
2266
+ break :done
2267
+ end
2268
+ end
2269
+ }
2270
+ case res
2271
+ when :done
2272
+ i -= 1
2273
+ exprs.delete_at(i)
2274
+ e.lexpr = e.op = e.rexpr = nil
2275
+ break
2276
+ when :fail
2277
+ break
2278
+ end
2279
+ }
2280
+ # ignore branches that will never reuse v
2281
+ may_to = g.to_optim[l_l].find_all { |to| find_next_read[to, 0, v].kind_of? C::CExpression }
2282
+ if may_to.length == 1 and to = may_to.first and to != l_l and g.from_optim[to] == [l_l]
2283
+ l_i = 0
2284
+ l_l = to
2285
+ else break
2286
+ end
2287
+ end
2288
+
2289
+ when nil, :write
2290
+ # useless assignment (value never read later)
2291
+ # XXX foo = &bar; bar = 12; baz(*foo)
2292
+ e.replace(C::CExpression[e.rexpr])
2293
+ # remove sideeffectless subexprs
2294
+ loop do
2295
+ case e.op
2296
+ when :funcall, *AssignOp
2297
+ else
2298
+ l = (e.lexpr.kind_of? C::CExpression and sideeffect(e.lexpr))
2299
+ r = (e.rexpr.kind_of? C::CExpression and sideeffect(e.rexpr))
2300
+ if l and r # could split...
2301
+ elsif l
2302
+ e.replace(e.lexpr)
2303
+ next
2304
+ elsif r
2305
+ e.replace(e.rexpr)
2306
+ next
2307
+ else # remove the assignment altogether
2308
+ i -= 1
2309
+ exprs.delete_at(i)
2310
+ e.lexpr = e.op = e.rexpr = nil
2311
+ end
2312
+ end
2313
+ break
2314
+ end
2315
+ end
2316
+ end
2317
+ end
2318
+ }
2319
+
2320
+ # wipe cexprs marked in the previous step
2321
+ walk(scope) { |st|
2322
+ next if not st.kind_of? C::Block
2323
+ st.statements.delete_if { |e| e.kind_of? C::CExpression and not e.lexpr and not e.op and not e.rexpr }
2324
+ }
2325
+
2326
+ # reoptimize cexprs
2327
+ walk_ce(scope, true) { |ce|
2328
+ # redo some simplification that may become available after variable propagation
2329
+ # int8 & 255 => int8
2330
+ if ce.op == :& and ce.lexpr and ce.lexpr.type.integral? and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == (1 << (8*sizeof(ce.lexpr))) - 1
2331
+ ce.replace C::CExpression[ce.lexpr]
2332
+ end
2333
+
2334
+ # int *ptr; *(ptr + 4) => ptr[4]
2335
+ if ce.op == :* and not ce.lexpr and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :+ and var = ce.rexpr.lexpr and var.kind_of? C::Variable and var.type.pointer?
2336
+ ce.lexpr, ce.op, ce.rexpr = ce.rexpr.lexpr, :'[]', ce.rexpr.rexpr
2337
+ end
2338
+
2339
+ # useless casts
2340
+ if not ce.op and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and (ce.rexpr.rexpr.kind_of? C::CExpression or
2341
+ (ce.type.pointer? and ce.rexpr.rexpr == 0 and not ce.type.pointed.untypedef.kind_of? C::Union)) # keep ((struct*)0)->memb
2342
+ ce.rexpr = ce.rexpr.rexpr
2343
+ end
2344
+ if not ce.op and ce.rexpr.kind_of? C::CExpression and (ce.type == ce.rexpr.type or (ce.type.integral? and ce.rexpr.type.integral?))
2345
+ ce.replace ce.rexpr
2346
+ end
2347
+ # useless casts (type)*((oeua)Ptype)
2348
+ if not ce.op and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :* and not ce.rexpr.lexpr and ce.rexpr.rexpr.kind_of? C::CExpression and not ce.rexpr.rexpr.op and
2349
+ p = ce.rexpr.rexpr.rexpr and p.kind_of? C::Typed and p.type.pointer? and ce.type == p.type.pointed
2350
+ ce.op = ce.rexpr.op
2351
+ ce.rexpr = ce.rexpr.rexpr.rexpr
2352
+ end
2353
+ # (a > 0) != 0
2354
+ if ce.op == :'!=' and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 0 and ce.lexpr.kind_of? C::CExpression and
2355
+ [:<, :<=, :>, :>=, :'==', :'!=', :'!'].include? ce.lexpr.op
2356
+ ce.replace ce.lexpr
2357
+ end
2358
+ # a == 0 => !a
2359
+ if ce.op == :== and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 0
2360
+ ce.replace C::CExpression[:'!', ce.lexpr]
2361
+ end
2362
+ # !(int)a => !a
2363
+ if ce.op == :'!' and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? C::CExpression
2364
+ ce.rexpr = ce.rexpr.rexpr
2365
+ end
2366
+ # (int)a < (int)b => a < b TODO uint <-> int
2367
+ if [:<, :<=, :>, :>=].include? ce.op and ce.rexpr.kind_of? C::CExpression and ce.lexpr.kind_of? C::CExpression and not ce.rexpr.op and not ce.lexpr.op and
2368
+ ce.rexpr.rexpr.kind_of? C::CExpression and ce.rexpr.rexpr.type.pointer? and ce.lexpr.rexpr.kind_of? C::CExpression and ce.lexpr.rexpr.type.pointer?
2369
+ ce.rexpr = ce.rexpr.rexpr
2370
+ ce.lexpr = ce.lexpr.rexpr
2371
+ end
2372
+
2373
+ # a & 3 & 1
2374
+ while (ce.op == :& or ce.op == :|) and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? ::Integer and
2375
+ ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == ce.op and ce.lexpr.lexpr and
2376
+ ce.lexpr.rexpr.kind_of? C::CExpression and ce.lexpr.rexpr.rexpr.kind_of? ::Integer
2377
+ ce.lexpr, ce.rexpr.rexpr = ce.lexpr.lexpr, ce.lexpr.rexpr.rexpr.send(ce.op, ce.rexpr.rexpr)
2378
+ end
2379
+
2380
+ # x = x | 4 => x |= 4
2381
+ if ce.op == :'=' and ce.rexpr.kind_of? C::CExpression and [:+, :-, :*, :/, :|, :&, :^, :>>, :<<].include? ce.rexpr.op and ce.rexpr.lexpr == ce.lexpr
2382
+ ce.op = (ce.rexpr.op.to_s + '=').to_sym
2383
+ ce.rexpr = ce.rexpr.rexpr
2384
+ end
2385
+
2386
+ # x += 1 => ++x
2387
+ if (ce.op == :'+=' or ce.op == :'-=') and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 1
2388
+ ce.lexpr, ce.op, ce.rexpr = nil, {:'+=' => :'++', :'-=' => :'--'}[ce.op], ce.lexpr
2389
+ end
2390
+
2391
+ # --x+1 => x--
2392
+ if (ce.op == :+ or ce.op == :-) and ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == {:+ => :'--', :- => :'++'}[ce.op] and
2393
+ ce.lexpr.rexpr and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 1
2394
+ ce.lexpr, ce.op, ce.rexpr = ce.lexpr.rexpr, ce.lexpr.op, nil
2395
+ end
2396
+ }
2397
+ end
2398
+
2399
+ def remove_unreferenced_vars(scope)
2400
+ used = {}
2401
+ walk_ce(scope) { |ce|
2402
+ # remove unreferenced local vars
2403
+ used[ce.rexpr.name] = true if ce.rexpr.kind_of? C::Variable
2404
+ used[ce.lexpr.name] = true if ce.lexpr.kind_of? C::Variable
2405
+ ce.rexpr.each { |v| used[v.name] = true if v.kind_of? C::Variable } if ce.rexpr.kind_of?(::Array)
2406
+ }
2407
+ unused = scope.symbol.keys.find_all { |n| not used[n] }
2408
+ unused.each { |v| scope.symbol[v].add_attribute 'unused' } # fastcall args need it
2409
+ scope.statements.delete_if { |sm| sm.kind_of? C::Declaration and unused.include? sm.var.name }
2410
+ scope.symbol.delete_if { |n, v| unused.include? n }
2411
+ end
2412
+
2413
+ def finalize
2414
+ optimize_global
2415
+ true
2416
+ end
2417
+
2418
+ def optimize_global
2419
+ # check all global vars (pointers to global data)
2420
+ tl = @c_parser.toplevel
2421
+ vars = tl.symbol.keys.find_all { |k| tl.symbol[k].kind_of? C::Variable and not tl.symbol[k].type.kind_of? C::Function }
2422
+ countref = Hash.new(0)
2423
+
2424
+ walk_ce(tl) { |ce|
2425
+ # XXX int foo; void bar() { int foo; } => false negative
2426
+ countref[ce.rexpr.name] += 1 if ce.rexpr.kind_of? C::Variable
2427
+ countref[ce.lexpr.name] += 1 if ce.lexpr.kind_of? C::Variable
2428
+ }
2429
+
2430
+ vars.delete_if { |v| countref[v] == 0 }
2431
+ countref.delete_if { |k, v| not vars.include? k }
2432
+
2433
+ # by default globals are C::Arrays
2434
+ # if all references are *foo, dereference the var type
2435
+ # TODO allow foo to appear (change to &foo) (but still disallow casts/foo+12 etc)
2436
+ countderef = Hash.new(0)
2437
+ walk_ce(tl) { |ce|
2438
+ if ce.op == :* and not ce.lexpr
2439
+ r = ce.rexpr
2440
+ elsif ce.op == :'->'
2441
+ r = C::CExpression[ce.lexpr]
2442
+ else next
2443
+ end
2444
+ # compare type.type cause var is an Array and the cast is a Pointer
2445
+ countderef[r.rexpr.name] += 1 if r.kind_of? C::CExpression and not r.op and r.rexpr.kind_of? C::Variable and
2446
+ sizeof(nil, r.type.type) == sizeof(nil, r.rexpr.type.type) rescue nil
2447
+ }
2448
+ vars.each { |n|
2449
+ if countref[n] == countderef[n]
2450
+ v = tl.symbol[n]
2451
+ target = C::CExpression[:*, [v]]
2452
+ v.type = v.type.type
2453
+ v.initializer = v.initializer.first if v.initializer.kind_of? ::Array
2454
+ walk_ce(tl) { |ce|
2455
+ if ce.op == :'->' and C::CExpression[ce.lexpr] == C::CExpression[v]
2456
+ ce.op = :'.'
2457
+ elsif ce.lexpr == target
2458
+ ce.lexpr = v
2459
+ end
2460
+ ce.rexpr = v if ce.rexpr == target
2461
+ ce.lexpr, ce.op, ce.rexpr = nil, nil, v if ce == target
2462
+ }
2463
+ end
2464
+ }
2465
+
2466
+ # if a global var appears only in one function, make it a static variable
2467
+ tl.statements.each { |st|
2468
+ next if not st.kind_of? C::Declaration or not st.var.type.kind_of? C::Function or not scope = st.var.initializer
2469
+ localcountref = Hash.new(0)
2470
+ walk_ce(scope) { |ce|
2471
+ localcountref[ce.rexpr.name] += 1 if ce.rexpr.kind_of? C::Variable
2472
+ localcountref[ce.lexpr.name] += 1 if ce.lexpr.kind_of? C::Variable
2473
+ }
2474
+
2475
+ vars.delete_if { |n|
2476
+ next if scope.symbol[n]
2477
+ next if localcountref[n] != countref[n]
2478
+ v = tl.symbol.delete(n)
2479
+ tl.statements.delete_if { |d| d.kind_of? C::Declaration and d.var.name == n }
2480
+
2481
+ if countref[n] == 1 and v.initializer.kind_of? C::CExpression and v.initializer.rexpr.kind_of? String
2482
+ walk_ce(scope) { |ce|
2483
+ if ce.rexpr.kind_of? C::Variable and ce.rexpr.name == n
2484
+ if not ce.op
2485
+ ce.replace v.initializer
2486
+ else
2487
+ ce.rexpr = v.initializer
2488
+ end
2489
+ elsif ce.lexpr.kind_of? C::Variable and ce.lexpr.name == n
2490
+ ce.lexpr = v.initializer
2491
+ end
2492
+ }
2493
+ else
2494
+ v.storage = :static
2495
+ scope.symbol[v.name] = v
2496
+ scope.statements.unshift C::Declaration.new(v)
2497
+ end
2498
+
2499
+ true
2500
+ }
2501
+ }
2502
+ end
2503
+
2504
+ # reorder statements to put decl first, move assignments to decl, move args to func prototype
2505
+ def cleanup_var_decl(scope, func)
2506
+ scope.symbol.each_value { |v| v.type = C::BaseType.new(:int) if v.type.void? }
2507
+
2508
+ args = func.type.args
2509
+ decl = []
2510
+ scope.statements.delete_if { |sm|
2511
+ next if not sm.kind_of? C::Declaration
2512
+ if sm.var.stackoff.to_i > 0 and sm.var.name !~ /_a(\d+)$/ # aliased vars: use 1st domain only
2513
+ args << sm.var
2514
+ else
2515
+ decl << sm
2516
+ end
2517
+ true
2518
+ }
2519
+
2520
+ # move trivial affectations to initialiser
2521
+ # XXX a = 1 ; b = a ; a = 2
2522
+ go = true # break from delete_if does not delete..
2523
+ scope.statements.delete_if { |st|
2524
+ if go and st.kind_of? C::CExpression and st.op == :'=' and st.rexpr.kind_of? C::CExpression and not st.rexpr.op and
2525
+ st.rexpr.rexpr.kind_of? ::Integer and st.lexpr.kind_of? C::Variable and scope.symbol[st.lexpr.name]
2526
+ st.lexpr.initializer = st.rexpr
2527
+ else
2528
+ go = false
2529
+ end
2530
+ }
2531
+
2532
+ # reorder declarations
2533
+ scope.statements[0, 0] = decl.sort_by { |sm| [-sm.var.stackoff.to_i, sm.var.name] }
2534
+
2535
+ # ensure arglist has no hole (create&add unreferenced args)
2536
+ func.type.args = []
2537
+ argoff = @c_parser.typesize[:ptr]
2538
+ args.sort_by { |sm| sm.stackoff.to_i }.each { |a|
2539
+ # XXX misalignment ?
2540
+ if not curoff = a.stackoff
2541
+ func.type.args << a # __fastcall
2542
+ next
2543
+ end
2544
+ while curoff > argoff
2545
+ wantarg = C::Variable.new
2546
+ wantarg.name = scope.decompdata[:stackoff_name][argoff] || stackoff_to_varname(argoff)
2547
+ wantarg.type = C::BaseType.new(:int)
2548
+ wantarg.attributes = ['unused']
2549
+ func.type.args << wantarg
2550
+ scope.symbol[wantarg.name] = wantarg
2551
+ argoff += @c_parser.typesize[:ptr]
2552
+ end
2553
+ func.type.args << a
2554
+ argoff += @c_parser.typesize[:ptr]
2555
+ }
2556
+ end
2557
+
2558
+ # rename local variables from subfunc arg names
2559
+ def rename_variables(scope)
2560
+ funcs = []
2561
+ cntrs = []
2562
+ cmpi = []
2563
+
2564
+ walk_ce(scope) { |ce|
2565
+ funcs << ce if ce.op == :funcall
2566
+ cntrs << (ce.lexpr || ce.rexpr) if ce.op == :'++'
2567
+ cmpi << ce.lexpr if [:<, :>, :<=, :>=, :==, :'!='].include? ce.op and ce.rexpr.kind_of? C::CExpression and ce.rexpr.rexpr.kind_of? ::Integer
2568
+ }
2569
+
2570
+ rename = lambda { |var, name|
2571
+ var = var.rexpr if var.kind_of? C::CExpression and not var.op
2572
+ next if not var.kind_of? C::Variable or not scope.symbol[var.name] or not name
2573
+ next if (var.name !~ /^(var|arg)_/ and not var.storage == :register) or not scope.symbol[var.name] or name =~ /^(var|arg)_/
2574
+ s = scope.symbol_ancestors
2575
+ n = name
2576
+ i = 0
2577
+ n = name + "#{i+=1}" while s[n]
2578
+ scope.symbol[n] = scope.symbol.delete(var.name)
2579
+ var.name = n
2580
+ }
2581
+
2582
+ funcs.each { |ce|
2583
+ next if not ce.lexpr.kind_of? C::Variable or not ce.lexpr.type.kind_of? C::Function
2584
+ ce.rexpr.to_a.zip(ce.lexpr.type.args.to_a).each { |a, fa| rename[a, fa.name] if fa }
2585
+ }
2586
+ funcs.each { |ce|
2587
+ next if not ce.lexpr.kind_of? C::Variable or not ce.lexpr.type.kind_of? C::Function
2588
+ ce.rexpr.to_a.zip(ce.lexpr.type.args.to_a).each { |a, fa|
2589
+ next if not a.kind_of? C::CExpression or a.op != :& or a.lexpr
2590
+ next if not fa or not fa.name
2591
+ rename[a.rexpr, fa.name.sub(/^l?p/, '')]
2592
+ }
2593
+ }
2594
+ (cntrs & cmpi).each { |v| rename[v, 'cntr'] }
2595
+ end
2596
+
2597
+ # yield each CExpr member (recursive, allows arrays, order: self(!post), lexpr, rexpr, self(post))
2598
+ # if given a non-CExpr, walks it until it finds a CExpr to yield
2599
+ def walk_ce(ce, post=false, &b)
2600
+ case ce
2601
+ when C::CExpression
2602
+ yield ce if not post
2603
+ walk_ce(ce.lexpr, post, &b)
2604
+ walk_ce(ce.rexpr, post, &b)
2605
+ yield ce if post
2606
+ when ::Array
2607
+ ce.each { |ce_| walk_ce(ce_, post, &b) }
2608
+ when C::Statement
2609
+ case ce
2610
+ when C::Block; walk_ce(ce.statements, post, &b)
2611
+ when C::If
2612
+ walk_ce(ce.test, post, &b)
2613
+ walk_ce(ce.bthen, post, &b)
2614
+ walk_ce(ce.belse, post, &b) if ce.belse
2615
+ when C::While, C::DoWhile
2616
+ walk_ce(ce.test, post, &b)
2617
+ walk_ce(ce.body, post, &b)
2618
+ when C::Return
2619
+ walk_ce(ce.value, post, &b) if ce.value
2620
+ end
2621
+ when C::Declaration
2622
+ walk_ce(ce.var.initializer, post, &b) if ce.var.initializer
2623
+ end
2624
+ nil
2625
+ end
2626
+
2627
+ # yields each statement (recursive)
2628
+ def walk(scope, post=false, &b)
2629
+ case scope
2630
+ when ::Array; scope.each { |s| walk(s, post, &b) }
2631
+ when C::Statement
2632
+ yield scope if not post
2633
+ case scope
2634
+ when C::Block; walk(scope.statements, post, &b)
2635
+ when C::If
2636
+ yield scope.test
2637
+ walk(scope.bthen, post, &b)
2638
+ walk(scope.belse, post, &b) if scope.belse
2639
+ when C::While, C::DoWhile
2640
+ yield scope.test
2641
+ walk(scope.body, post, &b)
2642
+ when C::Return
2643
+ yield scope.value
2644
+ end
2645
+ yield scope if post
2646
+ when C::Declaration
2647
+ walk(scope.var.initializer, post, &b) if scope.var.initializer
2648
+ end
2649
+ end
2650
+
2651
+ # forwards to @c_parser, handles cast to Array (these should not happen btw...)
2652
+ def sizeof(var, type=nil)
2653
+ var, type = nil, var if var.kind_of? C::Type and not type
2654
+ type ||= var.type
2655
+ return @c_parser.typesize[:ptr] if type.kind_of? C::Array and not var.kind_of? C::Variable
2656
+ @c_parser.sizeof(var, type) rescue -1
2657
+ end
2658
+ end
2659
+ end