metasm 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. data/BUGS +11 -0
  2. data/CREDITS +17 -0
  3. data/README +270 -0
  4. data/TODO +114 -0
  5. data/doc/code_organisation.txt +146 -0
  6. data/doc/const_missing.txt +16 -0
  7. data/doc/core_classes.txt +75 -0
  8. data/doc/feature_list.txt +53 -0
  9. data/doc/index.txt +59 -0
  10. data/doc/install_notes.txt +170 -0
  11. data/doc/style.css +3 -0
  12. data/doc/use_cases.txt +18 -0
  13. data/lib/metasm.rb +80 -0
  14. data/lib/metasm/arm.rb +12 -0
  15. data/lib/metasm/arm/debug.rb +39 -0
  16. data/lib/metasm/arm/decode.rb +167 -0
  17. data/lib/metasm/arm/encode.rb +77 -0
  18. data/lib/metasm/arm/main.rb +75 -0
  19. data/lib/metasm/arm/opcodes.rb +177 -0
  20. data/lib/metasm/arm/parse.rb +130 -0
  21. data/lib/metasm/arm/render.rb +55 -0
  22. data/lib/metasm/compile_c.rb +1457 -0
  23. data/lib/metasm/dalvik.rb +8 -0
  24. data/lib/metasm/dalvik/decode.rb +196 -0
  25. data/lib/metasm/dalvik/main.rb +60 -0
  26. data/lib/metasm/dalvik/opcodes.rb +366 -0
  27. data/lib/metasm/decode.rb +213 -0
  28. data/lib/metasm/decompile.rb +2659 -0
  29. data/lib/metasm/disassemble.rb +2068 -0
  30. data/lib/metasm/disassemble_api.rb +1280 -0
  31. data/lib/metasm/dynldr.rb +1329 -0
  32. data/lib/metasm/encode.rb +333 -0
  33. data/lib/metasm/exe_format/a_out.rb +194 -0
  34. data/lib/metasm/exe_format/autoexe.rb +82 -0
  35. data/lib/metasm/exe_format/bflt.rb +189 -0
  36. data/lib/metasm/exe_format/coff.rb +455 -0
  37. data/lib/metasm/exe_format/coff_decode.rb +901 -0
  38. data/lib/metasm/exe_format/coff_encode.rb +1078 -0
  39. data/lib/metasm/exe_format/dex.rb +457 -0
  40. data/lib/metasm/exe_format/dol.rb +145 -0
  41. data/lib/metasm/exe_format/elf.rb +923 -0
  42. data/lib/metasm/exe_format/elf_decode.rb +979 -0
  43. data/lib/metasm/exe_format/elf_encode.rb +1375 -0
  44. data/lib/metasm/exe_format/macho.rb +827 -0
  45. data/lib/metasm/exe_format/main.rb +228 -0
  46. data/lib/metasm/exe_format/mz.rb +164 -0
  47. data/lib/metasm/exe_format/nds.rb +172 -0
  48. data/lib/metasm/exe_format/pe.rb +437 -0
  49. data/lib/metasm/exe_format/serialstruct.rb +246 -0
  50. data/lib/metasm/exe_format/shellcode.rb +114 -0
  51. data/lib/metasm/exe_format/xcoff.rb +167 -0
  52. data/lib/metasm/gui.rb +23 -0
  53. data/lib/metasm/gui/cstruct.rb +373 -0
  54. data/lib/metasm/gui/dasm_coverage.rb +199 -0
  55. data/lib/metasm/gui/dasm_decomp.rb +369 -0
  56. data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
  57. data/lib/metasm/gui/dasm_graph.rb +1354 -0
  58. data/lib/metasm/gui/dasm_hex.rb +543 -0
  59. data/lib/metasm/gui/dasm_listing.rb +599 -0
  60. data/lib/metasm/gui/dasm_main.rb +906 -0
  61. data/lib/metasm/gui/dasm_opcodes.rb +291 -0
  62. data/lib/metasm/gui/debug.rb +1228 -0
  63. data/lib/metasm/gui/gtk.rb +884 -0
  64. data/lib/metasm/gui/qt.rb +495 -0
  65. data/lib/metasm/gui/win32.rb +3004 -0
  66. data/lib/metasm/gui/x11.rb +621 -0
  67. data/lib/metasm/ia32.rb +14 -0
  68. data/lib/metasm/ia32/compile_c.rb +1523 -0
  69. data/lib/metasm/ia32/debug.rb +193 -0
  70. data/lib/metasm/ia32/decode.rb +1167 -0
  71. data/lib/metasm/ia32/decompile.rb +564 -0
  72. data/lib/metasm/ia32/encode.rb +314 -0
  73. data/lib/metasm/ia32/main.rb +233 -0
  74. data/lib/metasm/ia32/opcodes.rb +872 -0
  75. data/lib/metasm/ia32/parse.rb +327 -0
  76. data/lib/metasm/ia32/render.rb +91 -0
  77. data/lib/metasm/main.rb +1193 -0
  78. data/lib/metasm/mips.rb +11 -0
  79. data/lib/metasm/mips/compile_c.rb +7 -0
  80. data/lib/metasm/mips/decode.rb +253 -0
  81. data/lib/metasm/mips/encode.rb +51 -0
  82. data/lib/metasm/mips/main.rb +72 -0
  83. data/lib/metasm/mips/opcodes.rb +443 -0
  84. data/lib/metasm/mips/parse.rb +51 -0
  85. data/lib/metasm/mips/render.rb +43 -0
  86. data/lib/metasm/os/gnu_exports.rb +270 -0
  87. data/lib/metasm/os/linux.rb +1112 -0
  88. data/lib/metasm/os/main.rb +1686 -0
  89. data/lib/metasm/os/remote.rb +527 -0
  90. data/lib/metasm/os/windows.rb +2027 -0
  91. data/lib/metasm/os/windows_exports.rb +745 -0
  92. data/lib/metasm/parse.rb +876 -0
  93. data/lib/metasm/parse_c.rb +3938 -0
  94. data/lib/metasm/pic16c/decode.rb +42 -0
  95. data/lib/metasm/pic16c/main.rb +17 -0
  96. data/lib/metasm/pic16c/opcodes.rb +68 -0
  97. data/lib/metasm/ppc.rb +11 -0
  98. data/lib/metasm/ppc/decode.rb +264 -0
  99. data/lib/metasm/ppc/decompile.rb +251 -0
  100. data/lib/metasm/ppc/encode.rb +51 -0
  101. data/lib/metasm/ppc/main.rb +129 -0
  102. data/lib/metasm/ppc/opcodes.rb +410 -0
  103. data/lib/metasm/ppc/parse.rb +52 -0
  104. data/lib/metasm/preprocessor.rb +1277 -0
  105. data/lib/metasm/render.rb +130 -0
  106. data/lib/metasm/sh4.rb +8 -0
  107. data/lib/metasm/sh4/decode.rb +336 -0
  108. data/lib/metasm/sh4/main.rb +292 -0
  109. data/lib/metasm/sh4/opcodes.rb +381 -0
  110. data/lib/metasm/x86_64.rb +12 -0
  111. data/lib/metasm/x86_64/compile_c.rb +1025 -0
  112. data/lib/metasm/x86_64/debug.rb +59 -0
  113. data/lib/metasm/x86_64/decode.rb +268 -0
  114. data/lib/metasm/x86_64/encode.rb +264 -0
  115. data/lib/metasm/x86_64/main.rb +135 -0
  116. data/lib/metasm/x86_64/opcodes.rb +118 -0
  117. data/lib/metasm/x86_64/parse.rb +68 -0
  118. data/misc/bottleneck.rb +61 -0
  119. data/misc/cheader-findpppath.rb +58 -0
  120. data/misc/hexdiff.rb +74 -0
  121. data/misc/hexdump.rb +55 -0
  122. data/misc/metasm-all.rb +13 -0
  123. data/misc/objdiff.rb +47 -0
  124. data/misc/objscan.rb +40 -0
  125. data/misc/pdfparse.rb +661 -0
  126. data/misc/ppc_pdf2oplist.rb +192 -0
  127. data/misc/tcp_proxy_hex.rb +84 -0
  128. data/misc/txt2html.rb +440 -0
  129. data/samples/a.out.rb +31 -0
  130. data/samples/asmsyntax.rb +77 -0
  131. data/samples/bindiff.rb +555 -0
  132. data/samples/compilation-steps.rb +49 -0
  133. data/samples/cparser_makestackoffset.rb +55 -0
  134. data/samples/dasm-backtrack.rb +38 -0
  135. data/samples/dasmnavig.rb +318 -0
  136. data/samples/dbg-apihook.rb +228 -0
  137. data/samples/dbghelp.rb +143 -0
  138. data/samples/disassemble-gui.rb +102 -0
  139. data/samples/disassemble.rb +133 -0
  140. data/samples/dump_upx.rb +95 -0
  141. data/samples/dynamic_ruby.rb +1929 -0
  142. data/samples/elf_list_needed.rb +46 -0
  143. data/samples/elf_listexports.rb +33 -0
  144. data/samples/elfencode.rb +25 -0
  145. data/samples/exeencode.rb +128 -0
  146. data/samples/factorize-headers-elfimports.rb +77 -0
  147. data/samples/factorize-headers-peimports.rb +109 -0
  148. data/samples/factorize-headers.rb +43 -0
  149. data/samples/gdbclient.rb +583 -0
  150. data/samples/generate_libsigs.rb +102 -0
  151. data/samples/hotfix_gtk_dbg.rb +59 -0
  152. data/samples/install_win_env.rb +78 -0
  153. data/samples/lindebug.rb +924 -0
  154. data/samples/linux_injectsyscall.rb +95 -0
  155. data/samples/machoencode.rb +31 -0
  156. data/samples/metasm-shell.rb +91 -0
  157. data/samples/pe-hook.rb +69 -0
  158. data/samples/pe-ia32-cpuid.rb +203 -0
  159. data/samples/pe-mips.rb +35 -0
  160. data/samples/pe-shutdown.rb +78 -0
  161. data/samples/pe-testrelocs.rb +51 -0
  162. data/samples/pe-testrsrc.rb +24 -0
  163. data/samples/pe_listexports.rb +31 -0
  164. data/samples/peencode.rb +19 -0
  165. data/samples/peldr.rb +494 -0
  166. data/samples/preprocess-flatten.rb +19 -0
  167. data/samples/r0trace.rb +308 -0
  168. data/samples/rubstop.rb +399 -0
  169. data/samples/scan_pt_gnu_stack.rb +54 -0
  170. data/samples/scanpeexports.rb +62 -0
  171. data/samples/shellcode-c.rb +40 -0
  172. data/samples/shellcode-dynlink.rb +146 -0
  173. data/samples/source.asm +34 -0
  174. data/samples/struct_offset.rb +47 -0
  175. data/samples/testpe.rb +32 -0
  176. data/samples/testraw.rb +45 -0
  177. data/samples/win32genloader.rb +132 -0
  178. data/samples/win32hooker-advanced.rb +169 -0
  179. data/samples/win32hooker.rb +96 -0
  180. data/samples/win32livedasm.rb +33 -0
  181. data/samples/win32remotescan.rb +133 -0
  182. data/samples/wintrace.rb +92 -0
  183. data/tests/all.rb +8 -0
  184. data/tests/dasm.rb +39 -0
  185. data/tests/dynldr.rb +35 -0
  186. data/tests/encodeddata.rb +132 -0
  187. data/tests/ia32.rb +82 -0
  188. data/tests/mips.rb +116 -0
  189. data/tests/parse_c.rb +239 -0
  190. data/tests/preprocessor.rb +269 -0
  191. data/tests/x86_64.rb +62 -0
  192. metadata +255 -0
@@ -0,0 +1,213 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/main'
8
+ require 'metasm/render'
9
+
10
+
11
+ module Metasm
12
+ # symbolic pointer dereference
13
+ # API similar to Expression
14
+ class Indirection < ExpressionType
15
+ # Expression (the pointer)
16
+ attr_accessor :target
17
+ alias pointer target
18
+ alias pointer= target=
19
+ # length in bytes of data referenced
20
+ attr_accessor :len
21
+ # address of the instruction who generated the indirection
22
+ attr_accessor :origin
23
+
24
+ def initialize(target, len, origin)
25
+ @target, @len, @origin = target, len, origin
26
+ end
27
+
28
+ def reduce_rec
29
+ ptr = Expression[@target.reduce]
30
+ (ptr == Expression::Unknown) ? ptr : Indirection.new(ptr, @len, @origin)
31
+ end
32
+
33
+ def bind(h)
34
+ h[self] || Indirection.new(@target.bind(h), @len, @origin)
35
+ end
36
+
37
+ def hash ; @target.hash^@len.to_i end
38
+ def eql?(o) o.class == self.class and [o.target, o.len] == [@target, @len] end
39
+ alias == eql?
40
+
41
+ include Renderable
42
+ def render
43
+ ret = []
44
+ qual = {1 => 'byte', 2 => 'word', 4 => 'dword', 8 => 'qword'}[len] || "_#{len*8}bits" if len
45
+ ret << "#{qual} ptr " if qual
46
+ ret << '[' << @target << ']'
47
+ end
48
+
49
+ # returns the complexity of the expression (number of externals +1 per indirection)
50
+ def complexity
51
+ 1+@target.complexity
52
+ end
53
+
54
+ def self.[](t, l, o=nil)
55
+ new(Expression[*t], l, o)
56
+ end
57
+
58
+ def inspect
59
+ "Indirection[#{@target.inspect.sub(/^Expression/, '')}, #{@len.inspect}#{', '+@origin.inspect if @origin}]"
60
+ end
61
+
62
+ def externals
63
+ @target.externals
64
+ end
65
+
66
+ def match_rec(target, vars)
67
+ return false if not target.kind_of? Indirection
68
+ t = target.target
69
+ if vars[t]
70
+ return false if @target != vars[t]
71
+ elsif vars.has_key? t
72
+ vars[t] = @target
73
+ elsif t.kind_of? ExpressionType
74
+ return false if not @target.match_rec(t, vars)
75
+ else
76
+ return false if targ != @target
77
+ end
78
+ if vars[target.len]
79
+ return false if @len != vars[target.len]
80
+ elsif vars.has_key? target.len
81
+ vars[target.len] = @len
82
+ else
83
+ return false if target.len != @len
84
+ end
85
+ vars
86
+ end
87
+ end
88
+
89
+ class Expression
90
+ # returns the complexity of the expression (number of externals +1 per indirection)
91
+ def complexity
92
+ case @lexpr
93
+ when ExpressionType; @lexpr.complexity
94
+ when nil, ::Numeric; 0
95
+ else 1
96
+ end +
97
+ case @rexpr
98
+ when ExpressionType; @rexpr.complexity
99
+ when nil, ::Numeric; 0
100
+ else 1
101
+ end
102
+ end
103
+
104
+ def expr_indirections
105
+ ret = case @lexpr
106
+ when Indirection; [@lexpr]
107
+ when ExpressionType; @lexpr.expr_indirections
108
+ else []
109
+ end
110
+ case @rexpr
111
+ when Indirection; ret << @rexpr
112
+ when ExpressionType; ret.concat @rexpr.expr_indirections
113
+ else ret
114
+ end
115
+ end
116
+ end
117
+
118
+ class EncodedData
119
+ # returns an ::Integer from self.ptr, advances ptr
120
+ # bytes from rawsize to virtsize = 0
121
+ # ignores self.relocations
122
+ def get_byte
123
+ @ptr += 1
124
+ if @ptr <= @data.length
125
+ b = @data[ptr-1]
126
+ b = b.unpack('C').first if b.kind_of? ::String # 1.9
127
+ b
128
+ elsif @ptr <= @virtsize
129
+ 0
130
+ end
131
+ end
132
+
133
+ # reads len bytes from self.data, advances ptr
134
+ # bytes from rawsize to virtsize are returned as zeroes
135
+ # ignores self.relocations
136
+ def read(len=@virtsize-@ptr)
137
+ len = @virtsize-@ptr if len > @virtsize-@ptr
138
+ str = (@ptr < @data.length) ? @data[@ptr, len] : ''
139
+ str = str.to_str.ljust(len, "\0") if str.length < len
140
+ @ptr += len
141
+ str
142
+ end
143
+
144
+ # decodes an immediate value from self.ptr, advances ptr
145
+ # returns an Expression on relocation, or an ::Integer
146
+ # if ptr has a relocation but the type/endianness does not match, the reloc is ignored and a warning is issued
147
+ # TODO arg type => sign+len
148
+ def decode_imm(type, endianness)
149
+ raise "invalid imm type #{type.inspect}" if not isz = Expression::INT_SIZE[type]
150
+ if rel = @reloc[@ptr]
151
+ if Expression::INT_SIZE[rel.type] == isz and rel.endianness == endianness
152
+ @ptr += rel.length
153
+ return rel.target
154
+ end
155
+ puts "W: Immediate type/endianness mismatch, ignoring relocation #{rel.target.inspect} (wanted #{type.inspect})" if $DEBUG
156
+ end
157
+ Expression.decode_imm(read(isz/8), type, endianness)
158
+ end
159
+ alias decode_immediate decode_imm
160
+ end
161
+
162
+ class Expression
163
+ # decodes an immediate from a raw binary string
164
+ # type may be a length in bytes, interpreted as unsigned, or an expression type (eg :u32)
165
+ # endianness is either an endianness or an object than responds to endianness
166
+ def self.decode_imm(str, type, endianness, off=0)
167
+ type = INT_SIZE.keys.find { |k| k.to_s[0] == ?a and INT_SIZE[k] == 8*type } if type.kind_of? ::Integer
168
+ endianness = endianness.endianness if not endianness.kind_of? ::Symbol
169
+ str = str[off, INT_SIZE[type]/8].to_s
170
+ str = str.reverse if endianness == :little
171
+ val = str.unpack('C*').inject(0) { |val_, b| (val_ << 8) | b }
172
+ val = make_signed(val, INT_SIZE[type]) if type.to_s[0] == ?i
173
+ val
174
+ end
175
+ class << self
176
+ alias decode_immediate decode_imm
177
+ end
178
+ end
179
+
180
+ class CPU
181
+ # decodes the instruction at edata.ptr, mapped at virtual address off
182
+ # returns a DecodedInstruction or nil
183
+ def decode_instruction(edata, addr)
184
+ @bin_lookaside ||= build_bin_lookaside
185
+ di = decode_findopcode edata
186
+ di.address = addr if di
187
+ di = decode_instr_op(edata, di) if di
188
+ decode_instr_interpret(di, addr) if di
189
+ end
190
+
191
+ # matches the binary opcode at edata.ptr
192
+ # returns di or nil
193
+ def decode_findopcode(edata)
194
+ DecodedInstruction.new self
195
+ end
196
+
197
+ # decodes di.instruction
198
+ # returns di or nil
199
+ def decode_instr_op(edata, di)
200
+ end
201
+
202
+ # may modify di.instruction.args for eg jump offset => absolute address
203
+ # returns di or nil
204
+ def decode_instr_interpret(di, addr)
205
+ di
206
+ end
207
+
208
+ # number of instructions following a jump that are still executed
209
+ def delay_slot(di=nil)
210
+ 0
211
+ end
212
+ end
213
+ end
@@ -0,0 +1,2659 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/main'
8
+ require 'metasm/decode'
9
+ require 'metasm/parse_c'
10
+
11
+ module Metasm
12
+ class C::Variable; attr_accessor :stackoff; end
13
+ class C::Block; attr_accessor :decompdata; end
14
+ class DecodedFunction; attr_accessor :decompdata; end
15
+
16
+ class CPU
17
+ def decompile_check_abi(dcmp, entry, func)
18
+ end
19
+ end
20
+
21
+ class Decompiler
22
+ # TODO add methods to C::CExpr
23
+ AssignOp = [:'=', :'+=', :'-=', :'*=', :'/=', :'%=', :'^=', :'&=', :'|=', :'>>=', :'<<=', :'++', :'--']
24
+
25
+ attr_accessor :dasm, :c_parser
26
+ attr_accessor :forbid_optimize_dataflow, :forbid_optimize_code, :forbid_decompile_ifwhile, :forbid_decompile_types, :forbid_optimize_labels
27
+ # recursive flag: for each subfunction, recurse is decremented, when 0 only the prototype is decompiled, when <0 nothing is done
28
+ attr_accessor :recurse
29
+
30
+ def initialize(dasm, cp = dasm.c_parser)
31
+ @dasm = dasm
32
+ @recurse = 1/0.0 # Infinity
33
+ @c_parser = cp || @dasm.cpu.new_cparser
34
+ end
35
+
36
+ # decompile recursively function from an entrypoint, then perform global optimisation (static vars, ...)
37
+ # should be called once after everything is decompiled (global optimizations may bring bad results otherwise)
38
+ # use decompile_func for incremental decompilation
39
+ # returns the c_parser
40
+ def decompile(*entry)
41
+ entry.each { |f| decompile_func(f) }
42
+ finalize
43
+ @c_parser
44
+ end
45
+
46
+ # decompile a function, decompiling subfunctions as needed
47
+ # may return :restart, which means that the decompilation should restart from the entrypoint (and bubble up) (eg a new codepath is found which may changes dependency in blocks etc)
48
+ def decompile_func(entry)
49
+ return if @recurse < 0
50
+ entry = @dasm.normalize entry
51
+ return if not @dasm.decoded[entry]
52
+
53
+ # create a new toplevel function to hold our code
54
+ func = C::Variable.new
55
+ func.name = @dasm.auto_label_at(entry, 'func')
56
+ if f = @dasm.function[entry] and f.decompdata and f.decompdata[:return_type]
57
+ rettype = f.decompdata[:return_type]
58
+ else
59
+ rettype = C::BaseType.new(:int)
60
+ end
61
+ func.type = C::Function.new rettype, []
62
+ if @c_parser.toplevel.symbol[func.name]
63
+ return if @recurse == 0
64
+ if not @c_parser.toplevel.statements.grep(C::Declaration).find { |decl| decl.var.name == func.name }
65
+ # recursive dependency: declare prototype
66
+ puts "function #{func.name} is recursive: predecompiling for prototype" if $VERBOSE
67
+ pre_recurse = @recurse
68
+ @recurse = 0
69
+ @c_parser.toplevel.symbol.delete func.name
70
+ decompile_func(entry)
71
+ @recurse = pre_recurse
72
+ if not dcl = @c_parser.toplevel.statements.grep(C::Declaration).find { |decl| decl.var.name == func.name }
73
+ @c_parser.toplevel.statements << C::Declaration.new(func)
74
+ end
75
+ end
76
+ return
77
+ end
78
+ @c_parser.toplevel.symbol[func.name] = func
79
+ puts "decompiling #{func.name}" if $VERBOSE
80
+
81
+ while catch(:restart) { do_decompile_func(entry, func) } == :restart
82
+ retval = :restart
83
+ end
84
+
85
+ @c_parser.toplevel.symbol[func.name] = func # recursive func prototype could have overwritten us
86
+ @c_parser.toplevel.statements << C::Declaration.new(func)
87
+
88
+ puts " decompiled #{func.name}" if $VERBOSE
89
+
90
+ retval
91
+ end
92
+
93
+ # calls decompile_func with recurse -= 1 (internal use)
94
+ def decompile_func_rec(entry)
95
+ @recurse -= 1
96
+ decompile_func(entry)
97
+ ensure
98
+ @recurse += 1
99
+ end
100
+
101
+ def do_decompile_func(entry, func)
102
+ # find decodedinstruction graph of the function, decompile subfuncs
103
+ myblocks = listblocks_func(entry)
104
+
105
+ # [esp+8] => [:frameptr-12]
106
+ makestackvars entry, myblocks.map { |b, to| @dasm.decoded[b].block }
107
+
108
+ # find registry dependencies between blocks
109
+ deps = @dasm.cpu.decompile_func_finddeps(self, myblocks, func)
110
+
111
+ scope = func.initializer = C::Block.new(@c_parser.toplevel)
112
+ if df = @dasm.function[entry]
113
+ scope.decompdata = df.decompdata ||= {:stackoff_type => {}, :stackoff_name => {}}
114
+ else
115
+ scope.decompdata ||= {:stackoff_type => {}, :stackoff_name => {}}
116
+ end
117
+
118
+ # di blocks => raw c statements, declare variables
119
+ @dasm.cpu.decompile_blocks(self, myblocks, deps, func)
120
+
121
+ simplify_goto(scope)
122
+ namestackvars(scope)
123
+ unalias_vars(scope, func)
124
+ decompile_c_types(scope)
125
+ optimize(scope)
126
+ remove_unreferenced_vars(scope)
127
+ cleanup_var_decl(scope, func)
128
+ if @recurse > 0
129
+ decompile_controlseq(scope)
130
+ optimize_vars(scope)
131
+ optimize_ctrl(scope)
132
+ optimize_vars(scope)
133
+ remove_unreferenced_vars(scope)
134
+ simplify_varname_noalias(scope)
135
+ rename_variables(scope)
136
+ end
137
+ @dasm.cpu.decompile_check_abi(self, entry, func)
138
+
139
+ case ret = scope.statements.last
140
+ when C::CExpression; puts "no return at end of func" if $VERBOSE
141
+ when C::Return
142
+ if not ret.value
143
+ scope.statements.pop
144
+ else
145
+ v = ret.value
146
+ v = v.rexpr if v.kind_of? C::CExpression and not v.op and v.rexpr.kind_of? C::Typed
147
+ func.type.type = v.type
148
+ end
149
+ end
150
+
151
+ if @recurse == 0
152
+ # we need only the prototype
153
+ func.initializer = nil
154
+ end
155
+ end
156
+
157
+ # redecompile a function, redecompiles functions calling it if its prototype changed
158
+ def redecompile(name)
159
+ @c_parser.toplevel.statements.delete_if { |st| st.kind_of? C::Declaration and st.var.name == name }
160
+ oldvar = @c_parser.toplevel.symbol.delete name
161
+
162
+ decompile_func(name)
163
+
164
+ if oldvar and newvar = @c_parser.toplevel.symbol[name] and oldvar.type.kind_of? C::Function and newvar.type.kind_of? C::Function
165
+ o, n = oldvar.type, newvar.type
166
+ if o.type != n.type or o.args.to_a.length != n.args.to_a.length or o.args.to_a.zip(n.args.to_a).find { |oa, na| oa.type != na.type }
167
+ # XXX a may depend on b and c, and b may depend on c -> redecompile c twice
168
+ # XXX if the dcmp is unstable, may also infinite loop on mutually recursive funcs..
169
+ @c_parser.toplevel.statements.dup.each { |st|
170
+ next if not st.kind_of? C::Declaration
171
+ next if not st.var.initializer
172
+ next if st.var.name == name
173
+ next if not walk_ce(st) { |ce| break true if ce.op == :funcall and ce.lexpr.kind_of? C::Variable and ce.lexpr.name == name }
174
+ redecompile(st.var.name)
175
+ }
176
+ end
177
+ end
178
+ end
179
+
180
+ def new_global_var(addr, type, scope=nil)
181
+ addr = @dasm.normalize(addr)
182
+
183
+ # (almost) NULL ptr
184
+ return if addr.kind_of? Fixnum and addr >= 0 and addr < 32
185
+
186
+ # check preceding structure we're hitting
187
+ # TODO check what we step over when defining a new static struct
188
+ 0x100.times { |i_|
189
+ next if not n = @dasm.get_label_at(addr-i_)
190
+ next if not v = @c_parser.toplevel.symbol[n]
191
+ next if not v.type.pointer? or not v.type.pointed.untypedef.kind_of? C::Union
192
+ break if i_ == 0 # XXX it crashes later if we dont break here
193
+ next if sizeof(v.type.pointed) <= i_
194
+ return structoffset(v.type.pointed.untypedef, C::CExpression[v], i_, nil)
195
+ }
196
+
197
+ ptype = type.pointed.untypedef if type.pointer?
198
+ if ptype.kind_of? C::Function
199
+ name = @dasm.auto_label_at(addr, 'sub', 'xref', 'byte', 'word', 'dword', 'unk')
200
+ if @dasm.get_section_at(addr) and @recurse > 0
201
+ puts "found function pointer to #{name}" if $VERBOSE
202
+ @dasm.disassemble(addr) if not @dasm.decoded[addr] # TODO disassemble_fast ?
203
+ f = @dasm.function[addr] ||= DecodedFunction.new
204
+ # TODO detect thunks (__noreturn)
205
+ f.decompdata ||= { :stackoff_type => {}, :stackoff_name => {} }
206
+ if not s = @c_parser.toplevel.symbol[name] or not s.initializer or not s.type.untypedef.kind_of? C::Function
207
+ os = @c_parser.toplevel.symbol.delete name
208
+ @c_parser.toplevel.statements.delete_if { |ts| ts.kind_of? C::Declaration and ts.var.name == name }
209
+ aoff = 1
210
+ ptype.args.to_a.each { |a|
211
+ aoff = (aoff + @c_parser.typesize[:ptr] - 1) / @c_parser.typesize[:ptr] * @c_parser.typesize[:ptr]
212
+ f.decompdata[:stackoff_type][aoff] ||= a.type
213
+ f.decompdata[:stackoff_name][aoff] ||= a.name if a.name
214
+ aoff += sizeof(a) # ary ?
215
+ }
216
+ decompile_func_rec(addr)
217
+ s = @c_parser.toplevel.symbol[name]
218
+ walk_ce([@c_parser.toplevel, scope]) { |ce|
219
+ ce.lexpr = s if ce.lexpr == os
220
+ ce.rexpr = s if ce.rexpr == os
221
+ } if os and s # update existing references to old instance
222
+ # else redecompile with new prototye ?
223
+ end
224
+ end
225
+ end
226
+
227
+ name = case (type.pointer? && tsz = sizeof(nil, ptype))
228
+ when 1; 'byte'
229
+ when 2; 'word'
230
+ when 4; 'dword'
231
+ else 'unk'
232
+ end
233
+ name = 'stru' if ptype.kind_of? C::Union
234
+ name = @dasm.auto_label_at(addr, name, 'xref', 'byte', 'word', 'dword', 'unk', 'stru')
235
+
236
+ if not var = @c_parser.toplevel.symbol[name]
237
+ var = C::Variable.new
238
+ var.name = name
239
+ var.type = type.pointer? ? C::Array.new(ptype) : type
240
+ @c_parser.toplevel.symbol[var.name] = var
241
+ @c_parser.toplevel.statements << C::Declaration.new(var)
242
+ end
243
+ if ptype.kind_of? C::Union and type.pointer? and s = @dasm.get_section_at(name) and s[0].ptr < s[0].length
244
+ # TODO struct init, array, fptrs..
245
+ elsif type.pointer? and not type.pointed.untypedef.kind_of? C::Function and s = @dasm.get_section_at(name) and s[0].ptr < s[0].length and
246
+ [1, 2, 4].include? tsz and (not var.type.pointer? or sizeof(var.type.pointed) != sizeof(type.pointed) or not var.initializer)
247
+ # TODO do not overlap other statics (but labels may refer to elements of the array...)
248
+ data = (0..256).map {
249
+ v = s[0].decode_imm("u#{tsz*8}".to_sym, @dasm.cpu.endianness)
250
+ v = decompile_cexpr(v, @c_parser.toplevel) if v.kind_of? Expression # relocation
251
+ v
252
+ }
253
+ var.initializer = data.map { |v| C::CExpression[v, C::BaseType.new(:int)] } unless (data - [0]).empty?
254
+ if (tsz == 1 or tsz == 2) and eos = data.index(0) and (0..3).all? { |i| data[i] >= 0x20 and data[i] < 0x7f } # printable str
255
+ # XXX 0x80 with ruby1.9...
256
+ var.initializer = C::CExpression[data[0, eos].pack('C*'), C::Pointer.new(ptype)] rescue nil
257
+ end
258
+ if var.initializer.kind_of? ::Array and i = var.initializer.first and i.kind_of? C::CExpression and not i.op and i.rexpr.kind_of? C::Variable and
259
+ i.rexpr.type.kind_of? C::Function and not @dasm.get_section_at(@dasm.normalize(i.rexpr.name)) # iat_ExternalFunc
260
+ i.type = i.rexpr.type
261
+ type = var.type = C::Array.new(C::Pointer.new(i.type))
262
+ var.initializer = [i]
263
+ end
264
+ var.initializer = nil if var.initializer.kind_of? ::Array and not type.untypedef.kind_of? C::Array
265
+ end
266
+
267
+ # TODO patch existing references to addr ? (or would they have already triggered new_global_var?)
268
+
269
+ # return the object to use to replace the raw addr
270
+ var
271
+ end
272
+
273
+ # return an array of [address of block start, list of block to]]
274
+ # decompile subfunctions
275
+ def listblocks_func(entry)
276
+ @autofuncs ||= []
277
+ blocks = []
278
+ entry = dasm.normalize entry
279
+ todo = [entry]
280
+ while a = todo.pop
281
+ next if blocks.find { |aa, at| aa == a }
282
+ next if not di = @dasm.di_at(a)
283
+ blocks << [a, []]
284
+ di.block.each_to { |ta, type|
285
+ next if type == :indirect
286
+ ta = dasm.normalize ta
287
+ if type != :subfuncret and not @dasm.function[ta] and
288
+ (not @dasm.function[entry] or @autofuncs.include? entry) and
289
+ di.block.list.last.opcode.props[:saveip]
290
+ # possible noreturn function
291
+ # XXX call $+5; pop eax
292
+ @autofuncs << ta
293
+ @dasm.function[ta] = DecodedFunction.new
294
+ puts "autofunc #{Expression[ta]}" if $VERBOSE
295
+ end
296
+
297
+ if @dasm.function[ta] and type != :subfuncret
298
+ f = dasm.auto_label_at(ta, 'func')
299
+ ta = dasm.normalize($1) if f =~ /^thunk_(.*)/
300
+ ret = decompile_func_rec(ta) if (ta != entry or di.block.to_subfuncret)
301
+ throw :restart, :restart if ret == :restart
302
+ else
303
+ @dasm.auto_label_at(ta, 'label') if blocks.find { |aa, at| aa == ta }
304
+ blocks.last[1] |= [ta]
305
+ todo << ta
306
+ end
307
+ }
308
+ end
309
+ blocks
310
+ end
311
+
312
+ # backtraces an expression from addr
313
+ # returns an integer, a label name, or an Expression
314
+ # XXX '(GetProcAddr("foo"))()' should not decompile to 'foo()'
315
+ def backtrace_target(expr, addr)
316
+ if n = @dasm.backtrace(expr, addr).first
317
+ return expr if n == Expression::Unknown
318
+ n = Expression[n].reduce_rec
319
+ n = @dasm.get_label_at(n) || n
320
+ n = $1 if n.kind_of? ::String and n =~ /^thunk_(.*)/
321
+ n
322
+ else
323
+ expr
324
+ end
325
+ end
326
+
327
+ # patches instruction's backtrace_binding to replace things referring to a static stack offset from func start by :frameptr+off
328
+ def makestackvars(funcstart, blocks)
329
+ blockstart = nil
330
+ cache_di = nil
331
+ cache = {} # [i_s, e, type] => backtrace
332
+ tovar = lambda { |di, e, i_s|
333
+ case e
334
+ when Expression; Expression[tovar[di, e.lexpr, i_s], e.op, tovar[di, e.rexpr, i_s]].reduce
335
+ when Indirection; Indirection[tovar[di, e.target, i_s], e.len, e.origin]
336
+ when :frameptr; e
337
+ when ::Symbol
338
+ cache.clear if cache_di != di ; cache_di = di
339
+ vals = cache[[e, i_s, 0]] ||= @dasm.backtrace(e, di.address, :snapshot_addr => blockstart,
340
+ :include_start => i_s, :no_check => true, :terminals => [:frameptr])
341
+ # backtrace only to blockstart first
342
+ if vals.length == 1 and ee = vals.first and ee.kind_of? Expression and (ee == Expression[:frameptr] or
343
+ (ee.lexpr == :frameptr and ee.op == :+ and ee.rexpr.kind_of? ::Integer) or
344
+ (not ee.lexpr and ee.op == :+ and ee.rexpr.kind_of? Indirection and eep = ee.rexpr.pointer and
345
+ (eep == Expression[:frameptr] or (eep.lexpr == :frameptr and eep.op == :+ and eep.rexpr.kind_of? ::Integer))))
346
+ ee
347
+ else
348
+ # fallback on full run (could restart from blockstart with ee, but may reevaluate addr_binding..
349
+ vals = cache[[e, i_s, 1]] ||= @dasm.backtrace(e, di.address, :snapshot_addr => funcstart,
350
+ :include_start => i_s, :no_check => true, :terminals => [:frameptr])
351
+ if vals.length == 1 and ee = vals.first and (ee.kind_of? Expression and (ee == Expression[:frameptr] or
352
+ (ee.lexpr == :frameptr and ee.op == :+ and ee.rexpr.kind_of? ::Integer)))
353
+ ee
354
+ else e
355
+ end
356
+ end
357
+ else e
358
+ end
359
+ }
360
+
361
+ # must not change bt_bindings until everything is backtracked
362
+ repl_bind = {} # di => bt_bd
363
+
364
+ @dasm.cpu.decompile_makestackvars(@dasm, funcstart, blocks) { |block|
365
+ block.list.each { |di|
366
+ bd = di.backtrace_binding ||= @dasm.cpu.get_backtrace_binding(di)
367
+ newbd = repl_bind[di] = {}
368
+ bd.each { |k, v|
369
+ k = tovar[di, k, true] if k.kind_of? Indirection
370
+ next if k == Expression[:frameptr] or (k.kind_of? Expression and k.lexpr == :frameptr and k.op == :+ and k.rexpr.kind_of? ::Integer)
371
+ newbd[k] = tovar[di, v, false]
372
+ }
373
+ }
374
+ }
375
+
376
+ repl_bind.each { |di, bd| di.backtrace_binding = bd }
377
+ end
378
+
379
+ # give a name to a stackoffset (relative to start of func)
380
+ # 4 => :arg_0, -8 => :var_4 etc
381
+ def stackoff_to_varname(off)
382
+ if off >= @c_parser.typesize[:ptr]; 'arg_%X' % ( off-@c_parser.typesize[:ptr]) # 4 => arg_0, 8 => arg_4..
383
+ elsif off > 0; 'arg_0%X' % off
384
+ elsif off == 0; 'retaddr'
385
+ elsif off <= -@dasm.cpu.size/8; 'var_%X' % (-off-@dasm.cpu.size/8) # -4 => var_0, -8 => var_4..
386
+ else 'var_0%X' % -off
387
+ end
388
+ end
389
+
390
+ # turns an Expression to a CExpression, create+declares needed variables in scope
391
+ def decompile_cexpr(e, scope, itype=nil)
392
+ case e
393
+ when Expression
394
+ if e.op == :'=' and e.lexpr.kind_of? ::String and e.lexpr =~ /^dummy_metasm_/
395
+ decompile_cexpr(e.rexpr, scope, itype)
396
+ elsif e.op == :+ and e.rexpr.kind_of? ::Integer and e.rexpr < 0
397
+ decompile_cexpr(Expression[e.lexpr, :-, -e.rexpr], scope, itype)
398
+ elsif e.lexpr
399
+ a = decompile_cexpr(e.lexpr, scope, itype)
400
+ C::CExpression[a, e.op, decompile_cexpr(e.rexpr, scope, itype)]
401
+ elsif e.op == :+
402
+ decompile_cexpr(e.rexpr, scope, itype)
403
+ else
404
+ a = decompile_cexpr(e.rexpr, scope, itype)
405
+ C::CExpression[e.op, a]
406
+ end
407
+ when Indirection
408
+ case e.len
409
+ when 1, 2, 4, 8
410
+ bt = C::BaseType.new("__int#{e.len*8}".to_sym)
411
+ else
412
+ bt = C::Struct.new
413
+ bt.members = [C::Variable.new('data', C::Array.new(C::BaseType.new(:__int8), e.len))]
414
+ end
415
+ itype = C::Pointer.new(bt)
416
+ p = decompile_cexpr(e.target, scope, itype)
417
+ p = C::CExpression[[p], itype] if not p.type.kind_of? C::Pointer
418
+ C::CExpression[:*, p]
419
+ when ::Integer
420
+ C::CExpression[e]
421
+ when C::CExpression
422
+ e
423
+ else
424
+ name = e.to_s
425
+ if not s = scope.symbol_ancestors[name]
426
+ s = C::Variable.new
427
+ s.type = C::BaseType.new(:__int32)
428
+ case e
429
+ when ::String # edata relocation (rel.length = size of pointer)
430
+ return @c_parser.toplevel.symbol[e] || new_global_var(e, itype || C::BaseType.new(:int), scope)
431
+ when ::Symbol; s.storage = :register ; s.add_attribute("register(#{name})")
432
+ else s.type.qualifier = [:volatile]
433
+ puts "decompile_cexpr unhandled #{e.inspect}, using #{e.to_s.inspect}" if $VERBOSE
434
+ end
435
+ s.name = name
436
+ scope.symbol[s.name] = s
437
+ scope.statements << C::Declaration.new(s)
438
+ end
439
+ s
440
+ end
441
+ end
442
+
443
+ # simplify goto -> goto / goto -> return
444
+ def simplify_goto(scope, keepret = false)
445
+ if not keepret and scope.statements[-1].kind_of? C::Return and not scope.statements[-2].kind_of? C::Label
446
+ scope.statements.insert(-2, C::Label.new("ret_label"))
447
+ end
448
+
449
+ jumpto = {}
450
+ walk(scope) { |s|
451
+ next if not s.kind_of? C::Block
452
+ s.statements.each_with_index { |ss, i|
453
+ case ss
454
+ when C::Goto, C::Return
455
+ while l = s.statements[i -= 1] and l.kind_of? C::Label
456
+ jumpto[l.name] = ss
457
+ end
458
+ end
459
+ }
460
+ }
461
+
462
+ simpler = lambda { |s|
463
+ case s
464
+ when C::Goto
465
+ if jumpto[s.target]
466
+ r = jumpto[s.target].dup
467
+ r.value = r.value.deep_dup if r.kind_of? C::Return and r.value.kind_of? C::CExpression
468
+ r
469
+ end
470
+ when C::Return
471
+ if not keepret and scope.statements[-1].kind_of? C::Return and s.value == scope.statements[-1].value and s != scope.statements[-1]
472
+ C::Goto.new(scope.statements[-2].name)
473
+ end
474
+ end
475
+ }
476
+
477
+ walk(scope) { |s|
478
+ case s
479
+ when C::Block
480
+ s.statements.each_with_index { |ss, i|
481
+ if sp = simpler[ss]
482
+ ss = s.statements[i] = sp
483
+ end
484
+ }
485
+ when C::If
486
+ if sp = simpler[s.bthen]
487
+ s.bthen = sp
488
+ end
489
+ end
490
+ }
491
+
492
+ # remove unreferenced labels
493
+ remove_labels(scope)
494
+
495
+ walk(scope) { |s|
496
+ next if not s.kind_of? C::Block
497
+ del = false
498
+ # remove dead code goto a; goto b; if (0) { z: bla; } => rm goto b
499
+ s.statements.delete_if { |st|
500
+ case st
501
+ when C::Goto, C::Return
502
+ olddel = del
503
+ del = true
504
+ olddel
505
+ else
506
+ del = false
507
+ end
508
+ }
509
+ # if () { goto x; } x:
510
+ s.statements.each_with_index { |ss, i|
511
+ if ss.kind_of? C::If
512
+ t = ss.bthen
513
+ t = t.statements.first if t.kind_of? C::Block
514
+ if t.kind_of? C::Goto and s.statements[i+1].kind_of? C::Label and s.statements[i+1].name == t.target
515
+ ss.bthen = C::Block.new(scope)
516
+ end
517
+ end
518
+ }
519
+ }
520
+
521
+ remove_labels(scope)
522
+ end
523
+
524
+ # changes ifgoto, goto to while/ifelse..
525
+ def decompile_controlseq(scope)
526
+ # TODO replace all this crap by a method using the graph representation
527
+ scope.statements = decompile_cseq_if(scope.statements, scope)
528
+ remove_labels(scope)
529
+ scope.statements = decompile_cseq_if(scope.statements, scope)
530
+ remove_labels(scope)
531
+ # TODO harmonize _if/_while api (if returns a replacement, while patches)
532
+ decompile_cseq_while(scope.statements, scope)
533
+ decompile_cseq_switch(scope)
534
+ end
535
+
536
+ # optimize if() { a; } to if() a;
537
+ def optimize_ctrl(scope)
538
+ simplify_goto(scope, true)
539
+
540
+ # break/continue
541
+ # XXX if (foo) while (bar) goto bla; bla: should => break
542
+ walk = lambda { |e, brk, cnt|
543
+ case e
544
+ when C::Block
545
+ walk[e.statements, brk, cnt]
546
+ e
547
+ when ::Array
548
+ e.each_with_index { |st, i|
549
+ case st
550
+ when C::While, C::DoWhile
551
+ l1 = (e[i+1].name if e[i+1].kind_of? C::Label)
552
+ l2 = (e[i-1].name if e[i-1].kind_of? C::Label)
553
+ e[i].body = walk[st.body, l1, l2]
554
+ else
555
+ e[i] = walk[st, brk, cnt]
556
+ end
557
+ }
558
+ e
559
+ when C::If
560
+ e.bthen = walk[e.bthen, brk, cnt] if e.bthen
561
+ e.belse = walk[e.belse, brk, cnt] if e.belse
562
+ e
563
+ when C::While, C::DoWhile
564
+ e.body = walk[e.body, nil, nil]
565
+ e
566
+ when C::Goto
567
+ if e.target == brk
568
+ C::Break.new
569
+ elsif e.target == cnt
570
+ C::Continue.new
571
+ else e
572
+ end
573
+ else e
574
+ end
575
+ }
576
+ walk[scope, nil, nil]
577
+
578
+ remove_labels(scope)
579
+
580
+ # while (1) { a; if(b) { c; return; }; d; } => while (1) { a; if (b) break; d; } c;
581
+ while st = scope.statements.last and st.kind_of? C::While and st.test.kind_of? C::CExpression and
582
+ not st.test.op and st.test.rexpr == 1 and st.body.kind_of? C::Block
583
+ break if not i = st.body.statements.find { |ist|
584
+ ist.kind_of? C::If and not ist.belse and ist.bthen.kind_of? C::Block and ist.bthen.statements.last.kind_of? C::Return
585
+ }
586
+ walk(i.bthen.statements) { |sst| sst.outer = i.bthen.outer if sst.kind_of? C::Block and sst.outer == i.bthen }
587
+ scope.statements.concat i.bthen.statements
588
+ i.bthen = C::Break.new
589
+ end
590
+
591
+ patch_test = lambda { |ce|
592
+ ce = ce.rexpr if ce.kind_of? C::CExpression and ce.op == :'!'
593
+ # if (a+1) => if (a != -1)
594
+ if ce.kind_of? C::CExpression and (ce.op == :+ or ce.op == :-) and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? ::Integer and ce.lexpr
595
+ ce.rexpr.rexpr = -ce.rexpr.rexpr if ce.op == :+
596
+ ce.op = :'!='
597
+ end
598
+ }
599
+
600
+ walk(scope) { |ce|
601
+ case ce
602
+ when C::If
603
+ patch_test[ce.test]
604
+ if ce.bthen.kind_of? C::Block
605
+ case ce.bthen.statements.length
606
+ when 1
607
+ walk(ce.bthen.statements) { |sst| sst.outer = ce.bthen.outer if sst.kind_of? C::Block and sst.outer == ce.bthen }
608
+ ce.bthen = ce.bthen.statements.first
609
+ when 0
610
+ if not ce.belse and i = ce.bthen.outer.statements.index(ce)
611
+ ce.bthen.outer.statements[i] = ce.test # TODO remove sideeffectless parts
612
+ end
613
+ end
614
+ end
615
+ if ce.belse.kind_of? C::Block and ce.belse.statements.length == 1
616
+ walk(ce.belse.statements) { |sst| sst.outer = ce.belse.outer if sst.kind_of? C::Block and sst.outer == ce.belse }
617
+ ce.belse = ce.belse.statements.first
618
+ end
619
+ when C::While, C::DoWhile
620
+ patch_test[ce.test]
621
+ if ce.body.kind_of? C::Block
622
+ case ce.body.statements.length
623
+ when 1
624
+ walk(ce.body.statements) { |sst| sst.outer = ce.body.outer if sst.kind_of? C::Block and sst.outer == ce.body }
625
+ ce.body = ce.body.statements.first
626
+ when 0
627
+ if ce.kind_of? C::DoWhile and i = ce.body.outer.statements.index(ce)
628
+ ce = ce.body.outer.statements[i] = C::While.new(ce.test, ce.body)
629
+ end
630
+ ce.body = nil
631
+ end
632
+ end
633
+ end
634
+ }
635
+ walk(scope) { |ce|
636
+ next if not ce.kind_of? C::Block
637
+ st = ce.statements
638
+ st.length.times { |n|
639
+ while st[n].kind_of? C::If and st[n+1].kind_of? C::If and not st[n].belse and not st[n+1].belse and (
640
+ (st[n].bthen.kind_of? C::Return and st[n+1].bthen.kind_of? C::Return and st[n].bthen.value == st[n+1].bthen.value) or
641
+ (st[n].bthen.kind_of? C::Break and st[n+1].bthen.kind_of? C::Break) or
642
+ (st[n].bthen.kind_of? C::Continue and st[n+1].bthen.kind_of? C::Continue))
643
+ # if (a) return x; if (b) return x; => if (a || b) return x;
644
+ st[n].test = C::CExpression[st[n].test, :'||', st[n+1].test]
645
+ st.delete_at(n+1)
646
+ end
647
+ }
648
+ }
649
+ end
650
+
651
+ # ifgoto => ifthen
652
+ # ary is an array of statements where we try to find if () {} [else {}]
653
+ # recurses to then/else content
654
+ def decompile_cseq_if(ary, scope)
655
+ return ary if forbid_decompile_ifwhile
656
+ # the array of decompiled statements to use as replacement
657
+ ret = []
658
+ # list of labels appearing in ary
659
+ inner_labels = ary.grep(C::Label).map { |l| l.name }
660
+ while s = ary.shift
661
+ # recurse if it's not the first run
662
+ if s.kind_of? C::If
663
+ s.bthen.statements = decompile_cseq_if(s.bthen.statements, s.bthen) if s.bthen.kind_of? C::Block
664
+ s.belse.statements = decompile_cseq_if(s.belse.statements, s.belse) if s.belse.kind_of? C::Block
665
+ end
666
+
667
+ # if (a) goto x; if (b) goto x; => if (a || b) goto x;
668
+ while s.kind_of? C::If and s.bthen.kind_of? C::Goto and not s.belse and ary.first.kind_of? C::If and ary.first.bthen.kind_of? C::Goto and
669
+ not ary.first.belse and s.bthen.target == ary.first.bthen.target
670
+ s.test = C::CExpression[s.test, :'||', ary.shift.test]
671
+ end
672
+
673
+ # if (a) goto x; b; x: => if (!a) { b; }
674
+ if s.kind_of? C::If and s.bthen.kind_of? C::Goto and l = ary.grep(C::Label).find { |l_| l_.name == s.bthen.target }
675
+ # if {goto l;} a; l: => if (!) {a;}
676
+ s.test = C::CExpression.negate s.test
677
+ s.bthen = C::Block.new(scope)
678
+ s.bthen.statements = decompile_cseq_if(ary[0..ary.index(l)], s.bthen)
679
+ s.bthen.statements.pop # remove l: from bthen, it is in ary (was needed in bthen for inner ifs)
680
+ ary[0...ary.index(l)] = []
681
+ end
682
+
683
+ if s.kind_of? C::If and (s.bthen.kind_of? C::Block or s.bthen.kind_of? C::Goto)
684
+ s.bthen = C::Block.new(scope, [s.bthen]) if s.bthen.kind_of? C::Goto
685
+
686
+ bts = s.bthen.statements
687
+
688
+ # if (a) if (b) { c; } => if (a && b) { c; }
689
+ if bts.length == 1 and bts.first.kind_of? C::If and not bts.first.belse
690
+ s.test = C::CExpression[s.test, :'&&', bts.first.test]
691
+ bts = bts.first.bthen
692
+ bts = s.bthen.statements = bts.kind_of?(C::Block) ? bts.statements : [bts]
693
+ end
694
+
695
+ # if (a) { if (b) goto c; d; } c: => if (a && !b) { d; }
696
+ if bts.first.kind_of? C::If and l = bts.first.bthen and (l = l.kind_of?(C::Block) ? l.statements.first : l) and l.kind_of? C::Goto and ary[0].kind_of? C::Label and l.target == ary[0].name
697
+ s.test = C::CExpression[s.test, :'&&', C::CExpression.negate(bts.first.test)]
698
+ if e = bts.shift.belse
699
+ bts.unshift e
700
+ end
701
+ end
702
+
703
+ # if () { goto a; } a:
704
+ if bts.last.kind_of? C::Goto and ary[0].kind_of? C::Label and bts.last.target == ary[0].name
705
+ bts.pop
706
+ end
707
+
708
+ # if { a; goto outer; } b; return; => if (!) { b; return; } a; goto outer;
709
+ if bts.last.kind_of? C::Goto and not inner_labels.include? bts.last.target and g = ary.find { |ss| ss.kind_of? C::Goto or ss.kind_of? C::Return } and g.kind_of? C::Return
710
+ s.test = C::CExpression.negate s.test
711
+ ary[0..ary.index(g)], bts[0..-1] = bts, ary[0..ary.index(g)]
712
+ end
713
+
714
+ # if { a; goto l; } b; l: => if {a;} else {b;}
715
+ if bts.last.kind_of? C::Goto and l = ary.grep(C::Label).find { |l_| l_.name == bts.last.target }
716
+ s.belse = C::Block.new(scope)
717
+ s.belse.statements = decompile_cseq_if(ary[0...ary.index(l)], s.belse)
718
+ ary[0...ary.index(l)] = []
719
+ bts.pop
720
+ end
721
+
722
+ # if { a; l: b; goto any;} c; goto l; => if { a; } else { c; } b; goto any;
723
+ if not s.belse and (bts.last.kind_of? C::Goto or bts.last.kind_of? C::Return) and g = ary.grep(C::Goto).first and l = bts.grep(C::Label).find { |l_| l_.name == g.target }
724
+ s.belse = C::Block.new(scope)
725
+ s.belse.statements = decompile_cseq_if(ary[0...ary.index(g)], s.belse)
726
+ ary[0..ary.index(g)], bts[bts.index(l)..-1] = bts[bts.index(l)..-1], []
727
+ end
728
+
729
+ # if { a; b; c; } else { d; b; c; } => if {a;} else {d;} b; c;
730
+ if s.belse
731
+ bes = s.belse.statements
732
+ while not bts.empty?
733
+ if bts.last.kind_of? C::Label; ary.unshift bts.pop
734
+ elsif bes.last.kind_of? C::Label; ary.unshift bes.pop
735
+ elsif bts.last.to_s == bes.last.to_s; ary.unshift bes.pop ; bts.pop
736
+ else break
737
+ end
738
+ end
739
+
740
+ # if () { a; } else { b; } => if () { a; } else b;
741
+ # if () { a; } else {} => if () { a; }
742
+ case bes.length
743
+ when 0; s.belse = nil
744
+ #when 1; s.belse = bes.first
745
+ end
746
+ end
747
+
748
+ # if () {} else { a; } => if (!) { a; }
749
+ # if () { a; } => if () a;
750
+ case bts.length
751
+ when 0; s.test, s.bthen, s.belse = C::CExpression.negate(s.test), s.belse, nil if s.belse
752
+ #when 1; s.bthen = bts.first # later (allows simpler handling in _while)
753
+ end
754
+ end
755
+
756
+ # l1: l2: if () goto l1; goto l2; => if(!) goto l2; goto l1;
757
+ if s.kind_of? C::If
758
+ ls = s.bthen
759
+ ls = ls.statements.last if ls.kind_of? C::Block
760
+ if ls.kind_of? C::Goto
761
+ if li = inner_labels.index(ls.target)
762
+ table = inner_labels
763
+ else
764
+ table = ary.map { |st| st.name if st.kind_of? C::Label }.compact.reverse
765
+ li = table.index(ls.target) || table.length
766
+ end
767
+ g = ary.find { |ss|
768
+ break if ss.kind_of? C::Return
769
+ next if not ss.kind_of? C::Goto
770
+ table.index(ss.target).to_i > li
771
+ }
772
+ if g
773
+ s.test = C::CExpression.negate s.test
774
+ if not s.bthen.kind_of? C::Block
775
+ ls = C::Block.new(scope)
776
+ ls.statements << s.bthen
777
+ s.bthen = ls
778
+ end
779
+ ary[0..ary.index(g)], s.bthen.statements = s.bthen.statements, decompile_cseq_if(ary[0..ary.index(g)], scope)
780
+ end
781
+ end
782
+ end
783
+
784
+ ret << s
785
+ end
786
+ ret
787
+ end
788
+
789
+ def decompile_cseq_while(ary, scope)
790
+ return if forbid_decompile_ifwhile
791
+
792
+ # find the next instruction that is not a label
793
+ ni = lambda { |l| ary[ary.index(l)..-1].find { |s| not s.kind_of? C::Label } }
794
+
795
+ # TODO XXX get rid of #index
796
+ finished = false ; while not finished ; finished = true # 1.9 does not support 'retry'
797
+ ary.each { |s|
798
+ case s
799
+ when C::Label
800
+ if ss = ni[s] and ss.kind_of? C::If and not ss.belse and ss.bthen.kind_of? C::Block
801
+ if ss.bthen.statements.last.kind_of? C::Goto and ss.bthen.statements.last.target == s.name
802
+ ss.bthen.statements.pop
803
+ if l = ary[ary.index(ss)+1] and l.kind_of? C::Label
804
+ ss.bthen.statements.grep(C::If).each { |i|
805
+ i.bthen = C::Break.new if i.bthen.kind_of? C::Goto and i.bthen.target == l.name
806
+ }
807
+ end
808
+ ary[ary.index(ss)] = C::While.new(ss.test, ss.bthen)
809
+ elsif ss.bthen.statements.last.kind_of? C::Return and g = ary[ary.index(s)+1..-1].reverse.find { |_s| _s.kind_of? C::Goto and _s.target == s.name }
810
+ wb = C::Block.new(scope)
811
+ wb.statements = decompile_cseq_while(ary[ary.index(ss)+1...ary.index(g)], wb)
812
+ w = C::While.new(C::CExpression.negate(ss.test), wb)
813
+ ary[ary.index(ss)..ary.index(g)] = [w, *ss.bthen.statements]
814
+ finished = false ; break #retry
815
+ end
816
+ end
817
+ if g = ary[ary.index(s)..-1].reverse.find { |_s| _s.kind_of? C::Goto and _s.target == s.name }
818
+ wb = C::Block.new(scope)
819
+ wb.statements = decompile_cseq_while(ary[ary.index(s)...ary.index(g)], wb)
820
+ w = C::While.new(C::CExpression[1], wb)
821
+ ary[ary.index(s)..ary.index(g)] = [w]
822
+ finished = false ; break #retry
823
+ end
824
+ if g = ary[ary.index(s)..-1].reverse.find { |_s| _s.kind_of? C::If and not _s.belse and gt = _s.bthen and
825
+ (gt = gt.kind_of?(C::Block) && gt.statements.length == 1 ? gt.statements.first : gt) and gt.kind_of? C::Goto and gt.target == s.name }
826
+ wb = C::Block.new(scope)
827
+ wb.statements = decompile_cseq_while(ary[ary.index(s)...ary.index(g)], wb)
828
+ w = C::DoWhile.new(g.test, wb)
829
+ ary[ary.index(s)..ary.index(g)] = [w]
830
+ finished = false ; break #retry
831
+ end
832
+ when C::If
833
+ decompile_cseq_while(s.bthen.statements, s.bthen) if s.bthen.kind_of? C::Block
834
+ decompile_cseq_while(s.belse.statements, s.belse) if s.belse.kind_of? C::Block
835
+ when C::While, C::DoWhile
836
+ decompile_cseq_while(s.body.statements, s.body) if s.body.kind_of? C::Block
837
+ end
838
+ }
839
+ end
840
+ ary
841
+ end
842
+
843
+ # TODO
844
+ def decompile_cseq_switch(scope)
845
+ uncast = lambda { |e| e = e.rexpr while e.kind_of? C::CExpression and not e.op ; e }
846
+ walk(scope) { |s|
847
+ # XXX pfff...
848
+ next if not s.kind_of? C::If
849
+ # if (v < 12) return ((void(*)())(tableaddr+4*v))();
850
+ t = s.bthen
851
+ t = t.statements.first if t.kind_of? C::Block and t.statements.length == 1
852
+ next if not t.kind_of? C::Return or not t.respond_to? :from_instr
853
+ next if t.from_instr.comment.to_a.include? 'switch'
854
+ next if not t.value.kind_of? C::CExpression or t.value.op != :funcall or t.value.rexpr != [] or not t.value.lexpr.kind_of? C::CExpression or t.value.lexpr.op
855
+ p = uncast[t.value.lexpr.rexpr]
856
+ next if not p.kind_of? C::CExpression or p.op != :* or p.lexpr
857
+ p = uncast[p.rexpr]
858
+ next if not p.kind_of? C::CExpression or p.op != :+
859
+ r, l = uncast[p.rexpr], uncast[p.lexpr]
860
+ r, l = l, r if r.kind_of? C::CExpression
861
+ next if not r.kind_of? ::Integer or not l.kind_of? C::CExpression or l.op != :* or not l.lexpr
862
+ lr, ll = uncast[l.rexpr], uncast[l.lexpr]
863
+ lr, ll = ll, lr if not ll.kind_of? ::Integer
864
+ next if ll != sizeof(nil, C::Pointer.new(C::BaseType.new(:void)))
865
+ base, index = r, lr
866
+ if s.test.kind_of? C::CExpression and (s.test.op == :<= or s.test.op == :<) and s.test.lexpr == index and
867
+ s.test.rexpr.kind_of? C::CExpression and not s.test.rexpr.op and s.test.rexpr.rexpr.kind_of? ::Integer
868
+ t.from_instr.add_comment 'switch'
869
+ sup = s.test.rexpr.rexpr
870
+ rng = ((s.test.op == :<) ? (0...sup) : (0..sup))
871
+ from = t.from_instr.address
872
+ rng.map { |i| @dasm.backtrace(Indirection[base+ll*i, ll, from], from, :type => :x, :origin => from, :maxdepth => 0) }
873
+ @dasm.disassemble
874
+ throw :restart, :restart
875
+ end
876
+ puts "unhandled switch() at #{t.from_instr}" if $VERBOSE
877
+ }
878
+ end
879
+
880
+ # remove unused labels
881
+ def remove_labels(scope)
882
+ return if forbid_optimize_labels
883
+
884
+ used = []
885
+ walk(scope) { |ss|
886
+ used |= [ss.target] if ss.kind_of? C::Goto
887
+ }
888
+ walk(scope) { |s|
889
+ next if not s.kind_of? C::Block
890
+ s.statements.delete_if { |l|
891
+ l.kind_of? C::Label and not used.include? l.name
892
+ }
893
+ }
894
+
895
+ # remove implicit continue; at end of loop
896
+ walk(scope) { |s|
897
+ next if not s.kind_of? C::While
898
+ if s.body.kind_of? C::Block and s.body.statements.last.kind_of? C::Continue
899
+ s.body.statements.pop
900
+ end
901
+ }
902
+ end
903
+
904
+ # checks if expr is a var (var or *&var)
905
+ def isvar(ce, var)
906
+ if var.stackoff and ce.kind_of? C::CExpression
907
+ return unless ce.op == :* and not ce.lexpr
908
+ ce = ce.rexpr
909
+ ce = ce.rexpr while ce.kind_of? C::CExpression and not ce.op
910
+ return unless ce.kind_of? C::CExpression and ce.op == :& and not ce.lexpr
911
+ ce = ce.rexpr
912
+ end
913
+ ce == var
914
+ end
915
+
916
+ # checks if expr reads var
917
+ def ce_read(ce_, var)
918
+ isvar(ce_, var) or
919
+ walk_ce(ce_) { |ce|
920
+ case ce.op
921
+ when :funcall; break true if isvar(ce.lexpr, var) or ce.rexpr.find { |a| isvar(a, var) }
922
+ when :'='; break true if isvar(ce.rexpr, var)
923
+ break ce_read(ce.rexpr, var) if isvar(ce.lexpr, var) # *&var = 2
924
+ else break true if isvar(ce.lexpr, var) or isvar(ce.rexpr, var)
925
+ end
926
+ }
927
+ end
928
+
929
+ # checks if expr writes var
930
+ def ce_write(ce_, var)
931
+ walk_ce(ce_) { |ce|
932
+ break true if AssignOp.include?(ce.op) and (isvar(ce.lexpr, var) or
933
+ (((ce.op == :'++' or ce.op == :'--') and isvar(ce.rexpr, var))))
934
+ }
935
+ end
936
+
937
+ # patches a set of exprs, replacing oldce by newce
938
+ def ce_patch(exprs, oldce, newce)
939
+ walk_ce(exprs) { |ce|
940
+ case ce.op
941
+ when :funcall
942
+ ce.lexpr = newce if ce.lexpr == oldce
943
+ ce.rexpr.each_with_index { |a, i| ce.rexpr[i] = newce if a == oldce }
944
+ else
945
+ ce.lexpr = newce if ce.lexpr == oldce
946
+ ce.rexpr = newce if ce.rexpr == oldce
947
+ end
948
+ }
949
+ end
950
+
951
+
952
+ # duplicate vars per domain value
953
+ # eg eax = 1; foo(eax); eax = 2; bar(eax); => eax = 1; foo(eax) eax_1 = 2; bar(eax_1);
954
+ # eax = 1; if (bla) eax = 2; foo(eax); => no change
955
+ def unalias_vars(scope, func)
956
+ g = c_to_graph(scope)
957
+
958
+ # unalias func args first, they may include __attr__((out)) needed by the others
959
+ funcalls = []
960
+ walk_ce(scope) { |ce| funcalls << ce if ce.op == :funcall }
961
+ vars = scope.symbol.values.sort_by { |v| walk_ce(funcalls) { |ce| break true if ce.rexpr == v } ? 0 : 1 }
962
+
963
+ # find the domains of var aliases
964
+ vars.each { |var| unalias_var(var, scope, g) }
965
+ end
966
+
967
+ # duplicates a var per domain value
968
+ def unalias_var(var, scope, g = c_to_graph(scope))
969
+ # [label, index] of references to var (reading it, writing it, ro/wo it (eg eax = *eax => eax_0 = *eax_1))
970
+ read = {}
971
+ write = {}
972
+ ro = {}
973
+ wo = {}
974
+
975
+ # list of [l, i] for which domain is not known
976
+ unchecked = []
977
+
978
+ # mark all exprs of the graph
979
+ # TODO handle var_14 __attribute__((out)) = &curvar <=> curvar write
980
+ r = var.has_attribute_var('register')
981
+ g.exprs.each { |label, exprs|
982
+ exprs.each_with_index { |ce, i|
983
+ if ce_read(ce, var)
984
+ if (ce.op == :'=' and isvar(ce.lexpr, var) and not ce_write(ce.rexpr, var)) or
985
+ (ce.op == :funcall and r and not ce_write(ce.lexpr, var) and not ce_write(ce.rexpr, var) and @dasm.cpu.abi_funcall[:changed].include?(r.to_sym))
986
+ (ro[label] ||= []) << i
987
+ (wo[label] ||= []) << i
988
+ unchecked << [label, i, :up] << [label, i, :down]
989
+ else
990
+ (read[label] ||= []) << i
991
+ unchecked << [label, i]
992
+ end
993
+ elsif ce_write(ce, var)
994
+ (write[label] ||= []) << i
995
+ unchecked << [label, i]
996
+ end
997
+ }
998
+ }
999
+
1000
+ # stuff when filling the domain (flood algorithm)
1001
+ dom = dom_ro = dom_wo = todo_up = todo_down = func_top = nil
1002
+
1003
+ # flood by walking the graph up from [l, i] (excluded)
1004
+ # marks stuff do walk down
1005
+ walk_up = lambda { |l, i|
1006
+ todo_w = [[l, i-1]]
1007
+ done_w = []
1008
+ while o = todo_w.pop
1009
+ next if done_w.include? o
1010
+ done_w << o
1011
+ l, i = o
1012
+ loop do
1013
+ if read[l].to_a.include? i
1014
+ # XXX not optimal (should mark only the uppest read)
1015
+ todo_down |= [[l, i]] if not dom.include? [l, i]
1016
+ dom |= [[l, i]]
1017
+ elsif write[l].to_a.include? i
1018
+ todo_down |= [[l, i]] if not dom.include? [l, i]
1019
+ dom |= [[l, i]]
1020
+ break
1021
+ elsif wo[l].to_a.include? i
1022
+ todo_down |= [[l, i]] if not dom_wo.include? [l, i, :down]
1023
+ dom_wo |= [[l, i, :down]]
1024
+ break
1025
+ end
1026
+ i -= 1
1027
+ if i < 0
1028
+ g.from_optim[l].to_a.each { |ll|
1029
+ todo_w << [ll, g.exprs[ll].to_a.length-1]
1030
+ }
1031
+ func_top = true if g.from_optim[l].to_a.empty?
1032
+ break
1033
+ end
1034
+ end
1035
+ end
1036
+ }
1037
+
1038
+ # flood by walking the graph down from [l, i] (excluded)
1039
+ # malks stuff to walk up
1040
+ walk_down = lambda { |l, i|
1041
+ todo_w = [[l, i+1]]
1042
+ done_w = []
1043
+ while o = todo_w.pop
1044
+ next if done_w.include? o
1045
+ done_w << o
1046
+ l, i = o
1047
+ loop do
1048
+ if read[l].to_a.include? i
1049
+ todo_up |= [[l, i]] if not dom.include? [l, i]
1050
+ dom |= [[l, i]]
1051
+ elsif write[l].to_a.include? i
1052
+ break
1053
+ elsif ro[l].to_a.include? i
1054
+ todo_up |= [[l, i]] if not dom_ro.include? [l, i, :up]
1055
+ dom_ro |= [[l, i, :up]]
1056
+ break
1057
+ end
1058
+ i += 1
1059
+ if i >= g.exprs[l].to_a.length
1060
+ g.to_optim[l].to_a.each { |ll|
1061
+ todo_w << [ll, 0]
1062
+ }
1063
+ break
1064
+ end
1065
+ end
1066
+ end
1067
+ }
1068
+
1069
+ # check it out
1070
+ while o = unchecked.shift
1071
+ dom = []
1072
+ dom_ro = []
1073
+ dom_wo = []
1074
+ func_top = false
1075
+
1076
+ todo_up = []
1077
+ todo_down = []
1078
+
1079
+ # init
1080
+ if read[o[0]].to_a.include? o[1]
1081
+ todo_up << o
1082
+ todo_down << o
1083
+ dom << o
1084
+ elsif write[o[0]].to_a.include? o[1]
1085
+ todo_down << o
1086
+ dom << o
1087
+ elsif o[2] == :up
1088
+ todo_up << o
1089
+ dom_ro << o
1090
+ elsif o[2] == :down
1091
+ todo_down << o
1092
+ dom_wo << o
1093
+ else raise
1094
+ end
1095
+
1096
+ # loop
1097
+ while todo_up.first or todo_down.first
1098
+ todo_up.each { |oo| walk_up[oo[0], oo[1]] }
1099
+ todo_up.clear
1100
+
1101
+ todo_down.each { |oo| walk_down[oo[0], oo[1]] }
1102
+ todo_down.clear
1103
+ end
1104
+
1105
+ unchecked -= dom + dom_wo + dom_ro
1106
+
1107
+ next if func_top
1108
+
1109
+ # patch
1110
+ n_i = 0
1111
+ n_i += 1 while scope.symbol_ancestors[newvarname = "#{var.name}_a#{n_i}"]
1112
+
1113
+ nv = var.dup
1114
+ nv.storage = :register if nv.has_attribute_var('register')
1115
+ nv.attributes = nv.attributes.dup if nv.attributes
1116
+ nv.name = newvarname
1117
+ scope.statements << C::Declaration.new(nv)
1118
+ scope.symbol[nv.name] = nv
1119
+
1120
+ dom.each { |oo| ce_patch(g.exprs[oo[0]][oo[1]], var, nv) }
1121
+ dom_ro.each { |oo|
1122
+ ce = g.exprs[oo[0]][oo[1]]
1123
+ if ce.op == :funcall or ce.rexpr.kind_of? C::CExpression
1124
+ ce_patch(ce.rexpr, var, nv)
1125
+ else
1126
+ ce.rexpr = nv
1127
+ end
1128
+ }
1129
+ dom_wo.each { |oo|
1130
+ ce = g.exprs[oo[0]][oo[1]]
1131
+ if ce.op == :funcall
1132
+ elsif ce.lexpr.kind_of? C::CExpression
1133
+ ce_patch(ce.lexpr, var, nv)
1134
+ else
1135
+ ce.lexpr = nv
1136
+ end
1137
+ }
1138
+
1139
+ # check if the var is only used as an __out__ parameter
1140
+ if false and dom_ro.empty? and dom_wo.empty? and dom.length == 2 and # TODO
1141
+ arg.has_attribute('out') and not arg.has_attribute('in')
1142
+ # *(int32*)&var_10 = &var_4;
1143
+ # set_pointed_value(*(int32*)&var_10); => writeonly var_4, may start a new domain
1144
+ nv.add_attribute('out')
1145
+ end
1146
+ end
1147
+ end
1148
+
1149
+ # revert the unaliasing namechange of vars where no alias subsists
1150
+ def simplify_varname_noalias(scope)
1151
+ names = scope.symbol.keys
1152
+ names.delete_if { |k|
1153
+ next if not b = k[/^(.*)_a\d+$/, 1]
1154
+ next if scope.symbol[k].stackoff.to_i > 0
1155
+ if not names.find { |n| n != k and (n == b or n[/^(.*)_a\d+$/, 1] == b) }
1156
+ scope.symbol[b] = scope.symbol.delete(k)
1157
+ scope.symbol[b].name = b
1158
+ end
1159
+ }
1160
+ end
1161
+
1162
+ # patch scope to transform :frameoff-x into &var_x
1163
+ def namestackvars(scope)
1164
+ off2var = {}
1165
+ newvar = lambda { |o, n|
1166
+ if not v = off2var[o]
1167
+ v = off2var[o] = C::Variable.new
1168
+ v.type = C::BaseType.new(:void)
1169
+ v.name = n
1170
+ v.stackoff = o
1171
+ scope.symbol[v.name] = v
1172
+ scope.statements << C::Declaration.new(v)
1173
+ end
1174
+ v
1175
+ }
1176
+
1177
+ scope.decompdata[:stackoff_name].each { |o, n| newvar[o, n] }
1178
+ scope.decompdata[:stackoff_type].each { |o, t| newvar[o, stackoff_to_varname(o)] }
1179
+
1180
+ walk_ce(scope) { |e|
1181
+ next if e.op != :+ and e.op != :-
1182
+ next if not e.lexpr.kind_of? C::Variable or e.lexpr.name != 'frameptr'
1183
+ next if not e.rexpr.kind_of? C::CExpression or e.rexpr.op or not e.rexpr.rexpr.kind_of? ::Integer
1184
+ off = e.rexpr.rexpr
1185
+ off = -off if e.op == :-
1186
+ v = newvar[off, stackoff_to_varname(off)]
1187
+ e.replace C::CExpression[:&, v]
1188
+ }
1189
+ end
1190
+
1191
+ # assign type to vars (regs, stack & global)
1192
+ # types are found by subfunction argument types & indirections, and propagated through assignments etc
1193
+ # TODO when updating the type of a var, update the type of all cexprs where it appears
1194
+ def decompile_c_types(scope)
1195
+ return if forbid_decompile_types
1196
+
1197
+ # TODO *(int8*)(ptr+8); *(int32*)(ptr+12) => automatic struct
1198
+
1199
+ # name => type
1200
+ types = {}
1201
+
1202
+ pscopevar = lambda { |e|
1203
+ e = e.rexpr while e.kind_of? C::CExpression and not e.op and e.rexpr.kind_of? C::CExpression
1204
+ if e.kind_of? C::CExpression and e.op == :& and not e.lexpr and e.rexpr.kind_of? C::Variable
1205
+ e.rexpr.name if scope.symbol[e.rexpr.name]
1206
+ end
1207
+ }
1208
+ scopevar = lambda { |e|
1209
+ e = e.rexpr if e.kind_of? C::CExpression and not e.op
1210
+ if e.kind_of? C::Variable and scope.symbol[e.name]
1211
+ e.name
1212
+ elsif e.kind_of? C::CExpression and e.op == :* and not e.lexpr
1213
+ pscopevar[e.rexpr]
1214
+ end
1215
+ }
1216
+ globalvar = lambda { |e|
1217
+ e = e.rexpr if e.kind_of? C::CExpression and not e.op
1218
+ if e.kind_of? ::Integer and @dasm.get_section_at(e)
1219
+ e
1220
+ elsif e.kind_of? C::Variable and not scope.symbol[e.name] and @c_parser.toplevel.symbol[e.name] and @dasm.get_section_at(e.name)
1221
+ e.name
1222
+ end
1223
+ }
1224
+
1225
+ # check if a newly found type for o is better than current type
1226
+ # order: foo* > void* > foo
1227
+ better_type = lambda { |t0, t1|
1228
+ t1 == C::BaseType.new(:void) or (t0.pointer? and t1.kind_of? C::BaseType) or t0.untypedef.kind_of? C::Union or
1229
+ (t0.kind_of? C::BaseType and t1.kind_of? C::BaseType and (@c_parser.typesize[t0.name] > @c_parser.typesize[t1.name] or (t0.name == t1.name and t0.qualifier))) or
1230
+ (t0.pointer? and t1.pointer? and better_type[t0.pointed, t1.pointed])
1231
+ }
1232
+
1233
+ update_global_type = lambda { |e, t|
1234
+ if ne = new_global_var(e, t, scope)
1235
+ ne.type = t if better_type[t, ne.type] # TODO patch existing scopes using ne
1236
+ # TODO rename (dword_xx -> byte_xx etc)
1237
+ e = scope.symbol_ancestors[e] || e if e.kind_of? String # exe reloc
1238
+ walk_ce(scope) { |ce|
1239
+ ce.lexpr = ne if ce.lexpr == e
1240
+ ce.rexpr = ne if ce.rexpr == e
1241
+ if ce.op == :* and not ce.lexpr and ce.rexpr == ne and ne.type.pointer? and ne.type.pointed.untypedef.kind_of? C::Union
1242
+ # *struct -> struct->bla
1243
+ ce.rexpr = structoffset(ne.type.pointed.untypedef, ce.rexpr, 0, sizeof(ce.type))
1244
+ elsif ce.lexpr == ne or ce.rexpr == ne
1245
+ # set ce type according to l/r
1246
+ # TODO set ce.parent type etc
1247
+ ce.type = C::CExpression[ce.lexpr, ce.op, ce.rexpr].type
1248
+ end
1249
+ }
1250
+ end
1251
+ }
1252
+
1253
+ propagate_type = nil # fwd declaration
1254
+ propagating = [] # recursion guard (x = &x)
1255
+ # check if need to change the type of a var
1256
+ # propagate_type if type is updated
1257
+ update_type = lambda { |n, t|
1258
+ next if propagating.include? n
1259
+ o = scope.symbol[n].stackoff
1260
+ next if not o and t.untypedef.kind_of? C::Union
1261
+ next if o and scope.decompdata[:stackoff_type][o] and t != scope.decompdata[:stackoff_type][o]
1262
+ next if t0 = types[n] and not better_type[t, t0]
1263
+ next if o and (t.integral? or t.pointer?) and o % sizeof(t) != 0 # keep vars aligned
1264
+ types[n] = t
1265
+ next if t == t0
1266
+ propagating << n
1267
+ propagate_type[n, t]
1268
+ propagating.delete n
1269
+ next if not o
1270
+ t = t.untypedef
1271
+ if t.kind_of? C::Struct
1272
+ t.members.to_a.each { |m|
1273
+ mo = t.offsetof(@c_parser, m.name)
1274
+ next if mo == 0
1275
+ scope.symbol.each { |vn, vv|
1276
+ update_type[vn, m.type] if vv.stackoff == o+mo
1277
+ }
1278
+ }
1279
+ end
1280
+ }
1281
+
1282
+ # try to update the type of a var from knowing the type of an expr (through dereferences etc)
1283
+ known_type = lambda { |e, t|
1284
+ loop do
1285
+ e = e.rexpr while e.kind_of? C::CExpression and not e.op and e.type == t
1286
+ if o = scopevar[e]
1287
+ update_type[o, t]
1288
+ elsif o = globalvar[e]
1289
+ update_global_type[o, t]
1290
+ elsif not e.kind_of? C::CExpression
1291
+ elsif o = pscopevar[e] and t.pointer?
1292
+ update_type[o, t.pointed]
1293
+ elsif e.op == :* and not e.lexpr
1294
+ e = e.rexpr
1295
+ t = C::Pointer.new(t)
1296
+ next
1297
+ elsif t.pointer? and e.op == :+ and e.lexpr.kind_of? C::CExpression and e.lexpr.type.integral? and e.rexpr.kind_of? C::Variable
1298
+ e.lexpr, e.rexpr = e.rexpr, e.lexpr
1299
+ next
1300
+ elsif e.op == :+ and e.lexpr and e.rexpr.kind_of? C::CExpression
1301
+ if not e.rexpr.op and e.rexpr.rexpr.kind_of? ::Integer
1302
+ if t.pointer? and e.rexpr.rexpr < 0x1000 and (e.rexpr.rexpr % sizeof(t.pointed)) == 0 # XXX relocatable + base=0..
1303
+ e = e.lexpr # (int)*(x+2) === (int) *x
1304
+ next
1305
+ elsif globalvar[e.rexpr.rexpr]
1306
+ known_type[e.lexpr, C::BaseType.new(:int)]
1307
+ e = e.rexpr
1308
+ next
1309
+ end
1310
+ elsif t.pointer? and (e.lexpr.kind_of? C::CExpression and e.lexpr.lexpr and [:<<, :>>, :*, :&].include? e.lexpr.op) or
1311
+ (o = scopevar[e.lexpr] and types[o] and types[o].integral? and
1312
+ !(o = scopevar[e.rexpr] and types[o] and types[o].integral?))
1313
+ e.lexpr, e.rexpr = e.rexpr, e.lexpr # swap
1314
+ e = e.lexpr
1315
+ next
1316
+ elsif t.pointer? and ((e.rexpr.kind_of? C::CExpression and e.rexpr.lexpr and [:<<, :>>, :*, :&].include? e.rexpr.op) or
1317
+ (o = scopevar[e.rexpr] and types[o] and types[o].integral? and
1318
+ !(o = scopevar[e.lexpr] and types[o] and types[o].integral?)))
1319
+ e = e.lexpr
1320
+ next
1321
+ end
1322
+ end
1323
+ break
1324
+ end
1325
+ }
1326
+
1327
+ # we found a type for a var, propagate it through affectations
1328
+ propagate_type = lambda { |var, type|
1329
+ walk_ce(scope) { |ce|
1330
+ next if ce.op != :'='
1331
+
1332
+ if ce.lexpr.kind_of? C::Variable and ce.lexpr.name == var
1333
+ known_type[ce.rexpr, type]
1334
+ next
1335
+ end
1336
+ if ce.rexpr.kind_of? C::Variable and ce.rexpr.name == var
1337
+ known_type[ce.lexpr, type]
1338
+ next
1339
+ end
1340
+
1341
+ # int **x; y = **x => int y
1342
+ t = type
1343
+ l = ce.lexpr
1344
+ while l.kind_of? C::CExpression and l.op == :* and not l.lexpr
1345
+ if var == pscopevar[l.rexpr]
1346
+ known_type[ce.rexpr, t]
1347
+ break
1348
+ elsif t.pointer?
1349
+ l = l.rexpr
1350
+ t = t.pointed
1351
+ else break
1352
+ end
1353
+ end
1354
+
1355
+ # int **x; **x = y => int y
1356
+ t = type
1357
+ r = ce.rexpr
1358
+ while r.kind_of? C::CExpression and r.op == :* and not r.lexpr
1359
+ if var == pscopevar[r.rexpr]
1360
+ known_type[ce.lexpr, t]
1361
+ break
1362
+ elsif t.pointer?
1363
+ r = r.rexpr
1364
+ t = t.pointed
1365
+ else break
1366
+ end
1367
+ end
1368
+
1369
+ # TODO int *x; *x = *y; ?
1370
+ }
1371
+ }
1372
+
1373
+ # put all those macros in use
1374
+ # use user-defined types first
1375
+ scope.symbol.each_value { |v|
1376
+ next if not v.kind_of? C::Variable or not v.stackoff or not t = scope.decompdata[:stackoff_type][v.stackoff]
1377
+ known_type[v, t]
1378
+ }
1379
+
1380
+ # try to infer types from C semantics
1381
+ later = []
1382
+ walk_ce(scope) { |ce|
1383
+ if ce.op == :'=' and ce.rexpr.kind_of? C::CExpression and (ce.rexpr.op == :funcall or (ce.rexpr.op == nil and ce.rexpr.rexpr.kind_of? ::Integer and
1384
+ ce.rexpr.rexpr.abs < 0x10000 and (not ce.lexpr.kind_of? C::CExpression or ce.lexpr.op != :'*' or ce.lexpr.lexpr)))
1385
+ # var = int
1386
+ known_type[ce.lexpr, ce.rexpr.type]
1387
+ elsif ce.op == :funcall
1388
+ f = ce.lexpr.type
1389
+ f = f.pointed if f.pointer?
1390
+ next if not f.kind_of? C::Function
1391
+ # cast func args to arg prototypes
1392
+ f.args.to_a.zip(ce.rexpr).each_with_index { |(proto, arg), i| ce.rexpr[i] = C::CExpression[arg, proto.type] ; known_type[arg, proto.type] }
1393
+ elsif ce.op == :* and not ce.lexpr
1394
+ if e = ce.rexpr and e.kind_of? C::CExpression and not e.op and e = e.rexpr and e.kind_of? C::CExpression and
1395
+ e.op == :& and not e.lexpr and e.rexpr.kind_of? C::Variable and e.rexpr.stackoff
1396
+ # skip *(__int32*)&var_12 for now, avoid saying var12 is an int if it may be a ptr or anything
1397
+ later << [ce.rexpr, C::Pointer.new(ce.type)]
1398
+ next
1399
+ end
1400
+ known_type[ce.rexpr, C::Pointer.new(ce.type)]
1401
+ elsif not ce.op and ce.type.pointer? and ce.type.pointed.kind_of? C::Function
1402
+ # cast to fptr: must be a fptr
1403
+ known_type[ce.rexpr, ce.type]
1404
+ end
1405
+ }
1406
+
1407
+ later.each { |ce, t| known_type[ce, t] }
1408
+
1409
+ # offsets have types now
1410
+ types.each { |v, t|
1411
+ # keep var type qualifiers
1412
+ q = scope.symbol[v].type.qualifier
1413
+ scope.symbol[v].type = t
1414
+ t.qualifier = q if q
1415
+ }
1416
+
1417
+
1418
+ # remove offsets to struct members
1419
+ # XXX this defeats antialiasing
1420
+ # off => [structoff, membername, membertype]
1421
+ memb = {}
1422
+ types.dup.each { |n, t|
1423
+ v = scope.symbol[n]
1424
+ next if not o = v.stackoff
1425
+ t = t.untypedef
1426
+ if t.kind_of? C::Struct
1427
+ t.members.to_a.each { |tm|
1428
+ moff = t.offsetof(@c_parser, tm.name)
1429
+ next if moff == 0
1430
+ types.delete_if { |vv, tt| scope.symbol[vv].stackoff == o+moff }
1431
+ memb[o+moff] = [v, tm.name, tm.type]
1432
+ }
1433
+ end
1434
+ }
1435
+
1436
+ # patch local variables into the CExprs, incl unknown offsets
1437
+ varat = lambda { |n|
1438
+ v = scope.symbol[n]
1439
+ if s = memb[v.stackoff]
1440
+ v = C::CExpression[s[0], :'.', s[1], s[2]]
1441
+ else
1442
+ v.type = types[n] || C::BaseType.new(:int)
1443
+ end
1444
+ v
1445
+ }
1446
+
1447
+ maycast = lambda { |v, e|
1448
+ if sizeof(v) != sizeof(e)
1449
+ v = C::CExpression[:*, [[:&, v], C::Pointer.new(e.type)]]
1450
+ end
1451
+ v
1452
+ }
1453
+ maycast_p = lambda { |v, e|
1454
+ if not e.type.pointer? or sizeof(v) != sizeof(nil, e.type.pointed)
1455
+ C::CExpression[[:&, v], e.type]
1456
+ else
1457
+ C::CExpression[:&, v]
1458
+ end
1459
+ }
1460
+
1461
+ walk_ce(scope, true) { |ce|
1462
+ case
1463
+ when ce.op == :funcall
1464
+ ce.rexpr.map! { |re|
1465
+ if o = scopevar[re]; C::CExpression[maycast[varat[o], re]]
1466
+ elsif o = pscopevar[re]; C::CExpression[maycast_p[varat[o], re]]
1467
+ else re
1468
+ end
1469
+ }
1470
+ when o = scopevar[ce.lexpr]; ce.lexpr = maycast[varat[o], ce.lexpr]
1471
+ when o = scopevar[ce.rexpr]; ce.rexpr = maycast[varat[o], ce.rexpr]
1472
+ ce.rexpr = C::CExpression[ce.rexpr] if not ce.op and ce.rexpr.kind_of? C::Variable
1473
+ when o = pscopevar[ce.lexpr]; ce.lexpr = maycast_p[varat[o], ce.lexpr]
1474
+ when o = pscopevar[ce.rexpr]; ce.rexpr = maycast_p[varat[o], ce.rexpr]
1475
+ when o = scopevar[ce]; ce.replace C::CExpression[maycast[varat[o], ce]]
1476
+ when o = pscopevar[ce]; ce.replace C::CExpression[maycast_p[varat[o], ce]]
1477
+ end
1478
+ }
1479
+
1480
+ fix_type_overlap(scope)
1481
+ fix_pointer_arithmetic(scope)
1482
+
1483
+ # if int32 var_4 is always var_4 & 255, change type to int8
1484
+ varuse = Hash.new(0)
1485
+ varandff = Hash.new(0)
1486
+ varandffff = Hash.new(0)
1487
+ walk_ce(scope) { |ce|
1488
+ if ce.op == :& and ce.lexpr.kind_of? C::Variable and ce.lexpr.type.integral? and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? ::Integer
1489
+ case ce.rexpr.rexpr
1490
+ when 0xff; varandff[ce.lexpr.name] += 1
1491
+ when 0xffff; varandffff[ce.lexpr.name] += 1
1492
+ end
1493
+ end
1494
+ varuse[ce.lexpr.name] += 1 if ce.lexpr.kind_of? C::Variable
1495
+ varuse[ce.rexpr.name] += 1 if ce.rexpr.kind_of? C::Variable
1496
+ }
1497
+ varandff.each { |k, v|
1498
+ scope.symbol[k].type = C::BaseType.new(:__int8, :unsigned) if varuse[k] == v
1499
+ }
1500
+ varandffff.each { |k, v|
1501
+ scope.symbol[k].type = C::BaseType.new(:__int16, :unsigned) if varuse[k] == v
1502
+ }
1503
+
1504
+ # propagate types to cexprs
1505
+ walk_ce(scope, true) { |ce|
1506
+ if ce.op
1507
+ ce.type = C::CExpression[ce.lexpr, ce.op, ce.rexpr].type rescue next
1508
+ if ce.op == :'=' and ce.rexpr.kind_of? C::Typed and ce.rexpr.type != ce.type and (not ce.rexpr.type.integral? or not ce.type.integral?)
1509
+ known_type[ce.rexpr, ce.type] if ce.type.pointer? and ce.type.pointed.untypedef.kind_of? C::Function # localvar = &struct with fptr
1510
+ ce.rexpr = C::CExpression[[ce.rexpr], ce.type]
1511
+ end
1512
+ elsif ce.type.pointer? and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :& and not ce.rexpr.lexpr and sizeof(ce.rexpr.rexpr.type) == sizeof(ce.type.pointed)
1513
+ ce.type = ce.rexpr.type
1514
+ end
1515
+ }
1516
+ end
1517
+
1518
+ # struct foo { int i; int j; struct { int k; int l; } m; }; bla+12 => &bla->m.l
1519
+ # st is a struct, ptr is an expr pointing to a struct, off is a numeric offset from ptr, msz is the size of the pointed member (nil ignored)
1520
+ def structoffset(st, ptr, off, msz)
1521
+ tabidx = off / sizeof(st)
1522
+ off -= tabidx * sizeof(st)
1523
+ ptr = C::CExpression[:&, [ptr, :'[]', [tabidx]]] if tabidx != 0 or ptr.type.untypedef.kind_of? C::Array
1524
+ return ptr if off == 0 and (not msz or # avoid infinite recursion with eg chained list
1525
+ (ptr.kind_of? C::CExpression and ((ptr.op == :& and not ptr.lexpr and s=ptr.rexpr) or (ptr.op == :'.' and s=ptr)) and
1526
+ not s.type.untypedef.kind_of? C::Union))
1527
+
1528
+ m_ptr = lambda { |m|
1529
+ if ptr.kind_of? C::CExpression and ptr.op == :& and not ptr.lexpr
1530
+ C::CExpression[ptr.rexpr, :'.', m.name]
1531
+ else
1532
+ C::CExpression[ptr, :'->', m.name]
1533
+ end
1534
+ }
1535
+
1536
+ # recursive proc to list all named members, including in anonymous substructs
1537
+ submemb = lambda { |sm| sm.name ? sm : sm.type.kind_of?(C::Union) ? sm.type.members.to_a.map { |ssm| submemb[ssm] } : nil }
1538
+ mbs = st.members.to_a.map { |m| submemb[m] }.flatten.compact
1539
+ mo = mbs.inject({}) { |h, m| h.update m => st.offsetof(@c_parser, m.name) }
1540
+
1541
+ if sm = mbs.find { |m| mo[m] == off and (not msz or sizeof(m) == msz) } ||
1542
+ mbs.find { |m| mo[m] <= off and mo[m]+sizeof(m) > off }
1543
+ off -= mo[sm]
1544
+ sst = sm.type.untypedef
1545
+ #return ptr if mo[sm] == 0 and sst.pointer? and sst.type.untypedef == st # TODO fix infinite recursion on mutually recursive ptrs
1546
+ ptr = C::CExpression[:&, m_ptr[sm]]
1547
+ if sst.kind_of? C::Union
1548
+ return structoffset(sst, ptr, off, msz)
1549
+ end
1550
+ end
1551
+
1552
+ if off != 0
1553
+ C::CExpression[[[ptr], C::Pointer.new(C::BaseType.new(:__int8))], :+, [off]]
1554
+ else
1555
+ ptr
1556
+ end
1557
+ end
1558
+
1559
+ # fix pointer arithmetic (eg int foo += 4 => int* foo += 1)
1560
+ # use struct member access (eg *(structptr+8) => structptr->bla)
1561
+ # must be run only once, right after type setting
1562
+ def fix_pointer_arithmetic(scope)
1563
+ walk_ce(scope, true) { |ce|
1564
+ if ce.lexpr and ce.lexpr.type.pointer? and [:&, :>>, :<<].include? ce.op
1565
+ ce.lexpr = C::CExpression[[ce.lexpr], C::BaseType.new(:int)]
1566
+ end
1567
+
1568
+ if ce.op == :+ and ce.lexpr and ((ce.lexpr.type.integral? and ce.rexpr.type.pointer?) or (ce.rexpr.type.pointer? and ce.rexpr.type.pointed.untypedef.kind_of? C::Union))
1569
+ ce.rexpr, ce.lexpr = ce.lexpr, ce.rexpr
1570
+ end
1571
+
1572
+ if ce.op == :* and not ce.lexpr and ce.rexpr.type.pointer? and ce.rexpr.type.pointed.untypedef.kind_of? C::Struct
1573
+ s = ce.rexpr.type.pointed.untypedef
1574
+ m = s.members.to_a.find { |m_| s.offsetof(@c_parser, m_.name) == 0 }
1575
+ if sizeof(m) != sizeof(ce)
1576
+ ce.rexpr = C::CExpression[[ce.rexpr, C::Pointer.new(s)], C::Pointer.new(ce.type)]
1577
+ next
1578
+ end
1579
+ # *structptr => structptr->member
1580
+ ce.lexpr = ce.rexpr
1581
+ ce.op = :'->'
1582
+ ce.rexpr = m.name
1583
+ ce.type = m.type
1584
+ next
1585
+ elsif ce.op == :'=' and ce.lexpr.type.untypedef.kind_of? C::Struct
1586
+ s = ce.lexpr.type.untypedef
1587
+ m = s.members.to_a.find { |m_| s.offsetof(@c_parser, m_.name) == 0 }
1588
+ ce.lexpr = C::CExpression.new(ce.lexpr, :'.', m.name, m.type)
1589
+ ce.type = m.type
1590
+ next
1591
+ end
1592
+
1593
+ if ce.op == :+ and ce.lexpr and ce.lexpr.type.pointer? and not ce.type.pointer?
1594
+ ce.type = ce.lexpr.type
1595
+ end
1596
+
1597
+ if ce.op == :& and not ce.lexpr and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :* and not ce.rexpr.lexpr
1598
+ ce.replace C::CExpression[ce.rexpr.rexpr]
1599
+ end
1600
+
1601
+ next if not ce.lexpr or not ce.lexpr.type.pointer?
1602
+ if ce.op == :+ and (s = ce.lexpr.type.pointed.untypedef).kind_of? C::Union and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and
1603
+ ce.rexpr.rexpr.kind_of? ::Integer and o = ce.rexpr.rexpr
1604
+ # structptr + 4 => &structptr->member
1605
+ ce.replace structoffset(s, ce.lexpr, o, nil)
1606
+ elsif [:+, :-, :'+=', :'-='].include? ce.op and ce.rexpr.kind_of? C::CExpression and ((not ce.rexpr.op and i = ce.rexpr.rexpr) or
1607
+ (ce.rexpr.op == :* and i = ce.rexpr.lexpr and ((i.kind_of? C::CExpression and not i.op and i = i.rexpr) or true))) and
1608
+ i.kind_of? ::Integer and psz = sizeof(nil, ce.lexpr.type.pointed) and i % psz == 0
1609
+ # ptr += 4 => ptr += 1
1610
+ if not ce.rexpr.op
1611
+ ce.rexpr.rexpr /= psz
1612
+ else
1613
+ ce.rexpr.lexpr.rexpr /= psz
1614
+ if ce.rexpr.lexpr.rexpr == 1
1615
+ ce.rexpr = ce.rexpr.rexpr
1616
+ end
1617
+ end
1618
+ ce.type = ce.lexpr.type
1619
+
1620
+ elsif (ce.op == :+ or ce.op == :-) and sizeof(nil, ce.lexpr.type.pointed) != 1
1621
+ # ptr+x => (ptrtype*)(((__int8*)ptr)+x)
1622
+ # XXX create struct ?
1623
+ ce.rexpr = C::CExpression[ce.rexpr, C::BaseType.new(:int)] if not ce.rexpr.type.integral?
1624
+ if sizeof(nil, ce.lexpr.type.pointed) != 1
1625
+ ptype = ce.lexpr.type
1626
+ p = C::CExpression[[ce.lexpr], C::Pointer.new(C::BaseType.new(:__int8))]
1627
+ ce.replace C::CExpression[[p, ce.op, ce.rexpr, p.type], ptype]
1628
+ end
1629
+ end
1630
+ }
1631
+ end
1632
+
1633
+ # handling of var overlapping (eg __int32 var_10; __int8 var_F => replace all var_F by *(&var_10 + 1))
1634
+ # must be done before fix_pointer_arithmetic
1635
+ def fix_type_overlap(scope)
1636
+ varinfo = {}
1637
+ scope.symbol.each_value { |var|
1638
+ next if not off = var.stackoff
1639
+ len = sizeof(var)
1640
+ varinfo[var] = [off, len]
1641
+ }
1642
+
1643
+ varinfo.each { |v1, (o1, l1)|
1644
+ next if not v1.type.integral?
1645
+ varinfo.each { |v2, (o2, l2)|
1646
+ # XXX o1 may overlap o2 AND another (int32 v_10; int32 v_E; int32 v_C;)
1647
+ # TODO should check stuff with aliasing domains
1648
+ next if v1.name == v2.name or o1 >= o2+l2 or o1+l1 <= o2 or l1 > l2 or (l2 == l1 and o2 >= o1)
1649
+ # v1 => *(&v2+delta)
1650
+ p = C::CExpression[:&, v2]
1651
+ p = C::CExpression[p, :+, [o1-o2]]
1652
+ p = C::CExpression[p, C::Pointer.new(v1.type)] if v1.type != p.type.type
1653
+ p = C::CExpression[:*, p]
1654
+ walk_ce(scope) { |ce|
1655
+ ce.lexpr = p if ce.lexpr == v1
1656
+ ce.rexpr = p if ce.rexpr == v1
1657
+ }
1658
+ }
1659
+
1660
+ }
1661
+ end
1662
+
1663
+ # to be run with scope = function body with only CExpr/Decl/Label/Goto/IfGoto/Return, with correct variables types
1664
+ # will transform += 1 to ++, inline them to prev/next statement ('++x; if (x)..' => 'if (++x)..')
1665
+ # remove useless variables ('int i;', i never used or 'i = 1; j = i;', i never read after => 'j = 1;')
1666
+ # remove useless casts ('(int)i' with 'int i;' => 'i')
1667
+ def optimize(scope)
1668
+ optimize_code(scope)
1669
+ optimize_vars(scope)
1670
+ optimize_vars(scope) # 1st run may transform i = i+1 into i++ which second run may coalesce into if(i)
1671
+ end
1672
+
1673
+ # simplify cexpressions (char & 255, redundant casts, etc)
1674
+ def optimize_code(scope)
1675
+ return if forbid_optimize_code
1676
+
1677
+ sametype = lambda { |t1, t2|
1678
+ t1 = t1.untypedef
1679
+ t2 = t2.untypedef
1680
+ t1 = t1.pointed.untypedef if t1.pointer? and t1.pointed.untypedef.kind_of? C::Function
1681
+ t2 = t2.pointed.untypedef if t2.pointer? and t2.pointed.untypedef.kind_of? C::Function
1682
+ t1 == t2 or
1683
+ (t1.kind_of? C::Function and t2.kind_of? C::Function and sametype[t1.type, t2.type] and t1.args.to_a.length == t2.args.to_a.length and
1684
+ t1.args.to_a.zip(t2.args.to_a).all? { |st1, st2| sametype[st1.type, st2.type] }) or
1685
+ (t1.kind_of? C::BaseType and t1.integral? and t2.kind_of? C::BaseType and t2.integral? and sizeof(nil, t1) == sizeof(nil, t2)) or
1686
+ (t1.pointer? and t2.pointer? and sametype[t1.type, t2.type])
1687
+ }
1688
+
1689
+ # most of this is a CExpr#reduce
1690
+ future_array = []
1691
+ walk_ce(scope, true) { |ce|
1692
+ # (whatever)0 => 0
1693
+ if not ce.op and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 0
1694
+ ce.replace ce.rexpr
1695
+ end
1696
+
1697
+ # *&bla => bla if types ok
1698
+ if ce.op == :* and not ce.lexpr and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :& and not ce.rexpr.lexpr and sametype[ce.rexpr.type.pointed, ce.rexpr.rexpr.type]
1699
+ ce.replace C::CExpression[ce.rexpr.rexpr]
1700
+ end
1701
+
1702
+ # int x + 0xffffffff -> x-1
1703
+ if ce.lexpr and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and [:+, :-, :'+=', :'-=', :'!=', :==, :>, :<, :>=, :<=].include? ce.op and
1704
+ ce.rexpr.rexpr == (1 << (8*sizeof(ce.lexpr)))-1
1705
+ ce.op = {:+ => :-, :- => :+, :'+=' => :'-=', :'-=' => :'+='}[ce.op]
1706
+ ce.rexpr.rexpr = 1
1707
+ end
1708
+
1709
+ # int *ptr; *(ptr + 4) => ptr[4]
1710
+ if ce.op == :* and not ce.lexpr and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :+ and var = ce.rexpr.lexpr and var.kind_of? C::Variable and var.type.pointer?
1711
+ ce.lexpr, ce.op, ce.rexpr = ce.rexpr.lexpr, :'[]', ce.rexpr.rexpr
1712
+ future_array << var.name
1713
+ end
1714
+
1715
+ # char x; x & 255 => x
1716
+ if ce.op == :& and ce.lexpr and (ce.lexpr.type.integral? or ce.lexpr.type.pointer?) and ce.rexpr.kind_of? C::CExpression and
1717
+ not ce.rexpr.op and ce.rexpr.rexpr.kind_of? ::Integer and m = (1 << (8*sizeof(ce.lexpr))) - 1 and
1718
+ ce.rexpr.rexpr & m == m
1719
+ ce.replace C::CExpression[ce.lexpr]
1720
+ end
1721
+
1722
+ # a + -b => a - b
1723
+ if ce.op == :+ and ce.lexpr and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :- and not ce.rexpr.lexpr
1724
+ ce.op, ce.rexpr = :-, ce.rexpr.rexpr
1725
+ end
1726
+
1727
+ # (((int) i >> 31) & 1) => i < 0
1728
+ if ce.op == :& and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 1 and
1729
+ ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == :>> and ce.lexpr.rexpr.kind_of? C::CExpression and
1730
+ not ce.lexpr.rexpr.op and ce.lexpr.rexpr.rexpr == sizeof(ce.lexpr.lexpr) * 8 - 1
1731
+ ce.replace C::CExpression[ce.lexpr.lexpr, :<, [0]]
1732
+ end
1733
+
1734
+ # a-b == 0 => a == b
1735
+ if ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 0 and [:==, :'!=', :<, :>, :<=, :>=].include? ce.op and
1736
+ ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == :- and ce.lexpr.lexpr
1737
+ ce.lexpr, ce.rexpr = ce.lexpr.lexpr, ce.lexpr.rexpr
1738
+ end
1739
+
1740
+ # (a > 0) != 0
1741
+ if ce.op == :'!=' and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 0 and ce.lexpr.kind_of? C::CExpression and
1742
+ [:<, :<=, :>, :>=, :'==', :'!=', :'!'].include? ce.lexpr.op
1743
+ ce.replace ce.lexpr
1744
+ end
1745
+
1746
+ # (a < b) != ( [(a < 0) == !(b < 0)] && [(a < 0) != (a < b)] ) => jl
1747
+ # a<b => true if !r => a<0 == b<0 or a>=0 => a>=0 or b>=0
1748
+ # a>=b => true if r => a<0 == b>=0 and a<0 => a<0 and b>=0
1749
+
1750
+ # x != (a && (b != x)) => [x && (!a || b)] || [!x && !(!a || b)]
1751
+ if ce.op == :'!=' and ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == :< and ce.rexpr.kind_of? C::CExpression and
1752
+ ce.rexpr.op == :'&&' and ce.rexpr.rexpr.kind_of? C::CExpression and ce.rexpr.rexpr.op == :'!=' and
1753
+ ce.rexpr.rexpr.rexpr == ce.lexpr and not walk_ce(ce) { |ce_| break true if ce_.op == :funcall }
1754
+ x, a, b = ce.lexpr, ce.rexpr.lexpr, ce.rexpr.rexpr.lexpr
1755
+ ce.replace C::CExpression[ [x, :'&&', [[:'!',a],:'||',b]] , :'||', [[:'!', x], :'&&', [:'!', [[:'!',a],:'||',b]]] ]
1756
+ optimize_code(ce)
1757
+ end
1758
+ # (a != b) || a => a || b
1759
+ if ce.op == :'||' and ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == :'!=' and ce.lexpr.lexpr == ce.rexpr and not walk_ce(ce) { |ce_| break true if ce_.op == :funcall }
1760
+ ce.lexpr, ce.rexpr = ce.rexpr, ce.lexpr.rexpr
1761
+ optimize_code(ce)
1762
+ end
1763
+ # (a<b) && !(a>=0 && b<0) || (a>=b) && (a>=0 && b<0) => (signed)a < (signed)b
1764
+ if ce.op == :'||' and ce.lexpr.kind_of? C::CExpression and ce.rexpr.kind_of? C::CExpression and ce.lexpr.op == :'&&' and ce.rexpr.op == :'&&' and
1765
+ ce.lexpr.lexpr.kind_of? C::CExpression and ce.lexpr.lexpr.op == :<
1766
+ a, b = ce.lexpr.lexpr.lexpr, ce.lexpr.lexpr.rexpr
1767
+ if ce.lexpr.rexpr === C::CExpression[[a, :'>=', [0]], :'&&', [b, :'<', [0]]].negate and
1768
+ ce.rexpr.lexpr === ce.lexpr.lexpr.negate and ce.rexpr.rexpr === ce.lexpr.rexpr.negate
1769
+ ce.replace C::CExpression[a, :'<', b]
1770
+ end
1771
+ end
1772
+ # a && 1
1773
+ if (ce.op == :'||' or ce.op == :'&&') and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? ::Integer
1774
+ if ((ce.op == :'||' and ce.rexpr.rexpr == 0) or (ce.op == :'&&' and ce.rexpr.rexpr != 0))
1775
+ ce.replace C::CExpression[ce.lexpr]
1776
+ elsif not walk_ce(ce) { |ce_| break true if ce.op == :funcall } # cannot wipe if sideeffect
1777
+ ce.replace C::CExpression[[ce.op == :'||' ? 1 : 0]]
1778
+ end
1779
+ end
1780
+ # (b < c || b >= c)
1781
+ if (ce.op == :'||' or ce.op == :'&&') and C::CExpression.negate(ce.lexpr) == C::CExpression[ce.rexpr]
1782
+ ce.replace C::CExpression[[(ce.op == :'||') ? 1 : 0]]
1783
+ end
1784
+
1785
+ # (a < b) | (a == b) => a <= b
1786
+ if ce.op == :| and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :== and ce.lexpr.kind_of? C::CExpression and
1787
+ (ce.lexpr.op == :< or ce.lexpr.op == :>) and ce.lexpr.lexpr == ce.rexpr.lexpr and ce.lexpr.rexpr == ce.rexpr.rexpr
1788
+ ce.op = {:< => :<=, :> => :>=}[ce.lexpr.op]
1789
+ ce.lexpr, ce.rexpr = ce.lexpr.lexpr, ce.lexpr.rexpr
1790
+ end
1791
+
1792
+ # a == 0 => !a
1793
+ if ce.op == :== and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 0
1794
+ ce.lexpr, ce.op, ce.rexpr = nil, :'!', ce.lexpr
1795
+ end
1796
+
1797
+ if ce.op == :'!' and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? ::Integer
1798
+ ce.replace C::CExpression[[ce.rexpr.rexpr == 0 ? 1 : 0]]
1799
+ end
1800
+
1801
+ # !(bool) => bool
1802
+ if ce.op == :'!' and ce.rexpr.kind_of? C::CExpression and [:'==', :'!=', :<, :>, :<=, :>=, :'||', :'&&', :'!'].include? ce.rexpr.op
1803
+ ce.replace ce.rexpr.negate
1804
+ end
1805
+
1806
+ # (foo)(bar)x => (foo)x
1807
+ if not ce.op and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? C::CExpression
1808
+ ce.rexpr = ce.rexpr.rexpr
1809
+ end
1810
+
1811
+ # &struct.1stmember => &struct
1812
+ if ce.op == :& and not ce.lexpr and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :'.' and s = ce.rexpr.lexpr.type and
1813
+ s.kind_of? C::Union and s.offsetof(@c_parser, ce.rexpr.rexpr) == 0
1814
+ ce.rexpr = ce.rexpr.lexpr
1815
+ ce.type = C::Pointer.new(ce.rexpr.type)
1816
+ end
1817
+
1818
+ # (1stmember*)structptr => &structptr->1stmember
1819
+ if not ce.op and ce.type.pointer? and not ce.type.pointed.void? and ce.rexpr.kind_of? C::Typed and ce.rexpr.type.pointer? and
1820
+ s = ce.rexpr.type.pointed.untypedef and s.kind_of? C::Union and ce.type.pointed.untypedef != s
1821
+ ce.rexpr = C::CExpression[structoffset(s, ce.rexpr, 0, sizeof(ce.type.pointed))]
1822
+ #ce.replace ce.rexpr if not ce.type.pointed.untypedef.kind_of? C::Function or (ce.rexpr.type.pointer? and
1823
+ #ce.rexpr.type.pointed.untypedef.kind_of? C::Function) # XXX ugly
1824
+ # int32* v1 = (int32*)pstruct;
1825
+ # z = v1+4 if v1 is not cast, the + is invalid (sizeof pointed changes)
1826
+ # TODO when finding type of pstruct, set type of v1 accordingly
1827
+ end
1828
+
1829
+ # (&foo)->bar => foo.bar
1830
+ if ce.op == :'->' and ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == :& and not ce.lexpr.lexpr
1831
+ ce.lexpr = ce.lexpr.rexpr
1832
+ ce.op = :'.'
1833
+ end
1834
+
1835
+ # (foo)bla => bla if bla of type foo
1836
+ if not ce.op and ce.rexpr.kind_of? C::Typed and sametype[ce.type, ce.rexpr.type]
1837
+ ce.replace C::CExpression[ce.rexpr]
1838
+ end
1839
+ if ce.lexpr.kind_of? C::CExpression and not ce.lexpr.op and ce.lexpr.rexpr.kind_of? C::Variable and ce.lexpr.type == ce.lexpr.rexpr.type
1840
+ ce.lexpr = ce.lexpr.rexpr
1841
+ end
1842
+
1843
+ if ce.op == :'=' and ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == :* and not ce.lexpr.lexpr and ce.lexpr.rexpr.kind_of? C::CExpression and
1844
+ not ce.lexpr.rexpr.op and ce.lexpr.rexpr.type.pointer? and ce.lexpr.rexpr.type.pointed != ce.rexpr.type
1845
+ ce.lexpr.rexpr.type = C::Pointer.new(ce.rexpr.type)
1846
+ optimize_code(ce.lexpr)
1847
+ end
1848
+ }
1849
+
1850
+ # if there is a ptr[4], change all *ptr to ptr[0] for consistency
1851
+ # do this after the first pass, which may change &*ptr to ptr
1852
+ walk_ce(scope) { |ce|
1853
+ if ce.op == :* and not ce.lexpr and ce.rexpr.kind_of? C::Variable and future_array.include? ce.rexpr.name
1854
+ ce.lexpr, ce.op, ce.rexpr = ce.rexpr, :'[]', C::CExpression[0]
1855
+ end
1856
+ } if not future_array.empty?
1857
+
1858
+ # if (x != 0) => if (x)
1859
+ walk(scope) { |st|
1860
+ if st.kind_of? C::If and st.test.kind_of? C::CExpression and st.test.op == :'!=' and
1861
+ st.test.rexpr.kind_of? C::CExpression and not st.test.rexpr.op and st.test.rexpr.rexpr == 0
1862
+ st.test = C::CExpression[st.test.lexpr]
1863
+ end
1864
+ }
1865
+ end
1866
+
1867
+ # checks if an expr has sideeffects (funcall, var assignment, mem dereference, use var out of scope if specified)
1868
+ def sideeffect(exp, scope=nil)
1869
+ case exp
1870
+ when nil, ::Numeric, ::String; false
1871
+ when ::Array; exp.any? { |_e| sideeffect _e, scope }
1872
+ when C::Variable; (scope and not scope.symbol[exp.name]) or exp.type.qualifier.to_a.include? :volatile
1873
+ when C::CExpression; (exp.op == :* and not exp.lexpr) or exp.op == :funcall or AssignOp.include?(exp.op) or
1874
+ sideeffect(exp.lexpr, scope) or sideeffect(exp.rexpr, scope)
1875
+ else true # failsafe
1876
+ end
1877
+ end
1878
+
1879
+ # converts C code to a graph of cexprs (nodes = cexprs, edges = codepaths)
1880
+ # returns a CGraph
1881
+ class CGraph
1882
+ # exprs: label => [exprs], to: label => [labels], block: label => are exprs standalone (vs If#test), start: 1st label
1883
+ attr_accessor :exprs, :to, :block, :start, :to_optim, :from_optim
1884
+ end
1885
+ def c_to_graph(st)
1886
+ g = CGraph.new
1887
+ g.exprs = {} # label => [exprs]
1888
+ g.to = {} # label => [labels]
1889
+ g.block = {} # label => is label in a block? (vs If#test)
1890
+ anon_label = 0 # when no label is there, use anon_label++
1891
+ # converts C code to a graph of codepath of cexprs
1892
+ to_graph = lambda { |stmt, l_cur, l_after, l_cont, l_break|
1893
+ case stmt
1894
+ when C::Label; g.to[l_cur] = [stmt.name] ; g.to[stmt.name] = [l_after]
1895
+ when C::Goto; g.to[l_cur] = [stmt.target]
1896
+ when C::Continue; g.to[l_cur] = [l_cont]
1897
+ when C::Break; g.to[l_cur] = [l_break]
1898
+ when C::CExpression
1899
+ g.exprs[l_cur] = [stmt]
1900
+ g.to[l_cur] = [l_after]
1901
+ when C::Return
1902
+ g.exprs[l_cur] = [stmt.value] if stmt.value
1903
+ g.to[l_cur] = []
1904
+ when C::Block
1905
+ to_graph[stmt.statements, l_cur, l_after, l_cont, l_break]
1906
+ when ::Array
1907
+ g.exprs[l_cur] = []
1908
+ g.block[l_cur] = true
1909
+ stmt.each_with_index { |s, i|
1910
+ case s
1911
+ when C::Declaration
1912
+ when C::CExpression
1913
+ g.exprs[l_cur] << s
1914
+ else
1915
+ l = anon_label += 1
1916
+ ll = anon_label += 1
1917
+ g.to[l_cur] = [l]
1918
+ g.block[l_cur] = true
1919
+ to_graph[stmt[i], l, ll, l_cont, l_break]
1920
+ l_cur = ll
1921
+ g.exprs[l_cur] = []
1922
+ end
1923
+ }
1924
+ g.to[l_cur] = [l_after].compact
1925
+ when C::If
1926
+ g.exprs[l_cur] = [stmt.test]
1927
+ lt = anon_label += 1
1928
+ to_graph[stmt.bthen, lt, l_after, l_cont, l_break]
1929
+ le = anon_label += 1
1930
+ to_graph[stmt.belse, le, l_after, l_cont, l_break]
1931
+ g.to[l_cur] = [lt, le]
1932
+ when C::While, C::DoWhile
1933
+ la = anon_label += 1
1934
+ if stmt.kind_of? C::DoWhile
1935
+ lt, lb = la, l_cur
1936
+ else
1937
+ lt, lb = l_cur, la
1938
+ end
1939
+ g.exprs[lt] = [stmt.test]
1940
+ g.to[lt] = [lb, l_after]
1941
+ to_graph[stmt.body, lb, lt, lt, l_after]
1942
+ when C::Asm, nil; g.to[l_cur] = [l_after]
1943
+ else puts "to_graph unhandled #{stmt.class}: #{stmt}" if $VERBOSE
1944
+ end
1945
+ }
1946
+
1947
+ g.start = anon_label
1948
+ to_graph[st, g.start, nil, nil, nil]
1949
+
1950
+ # optimize graph
1951
+ g.to_optim = {}
1952
+ g.to.each { |k, v| g.to_optim[k] = v.uniq }
1953
+ g.exprs.delete_if { |k, v| v == [] }
1954
+ g.to_optim.delete_if { |k, v|
1955
+ if v.length == 1 and not g.exprs[k] and v != [k]
1956
+ g.to_optim.each_value { |t| if i = t.index(k) ; t[i] = v.first ; end }
1957
+ true
1958
+ elsif v.length == 0 and not g.exprs[k]
1959
+ g.to_optim.each_value { |t| t.delete k }
1960
+ true
1961
+ end
1962
+ }
1963
+
1964
+ g.from_optim = {}
1965
+ g.to_optim.each { |k, v| v.each { |t| (g.from_optim[t] ||= []) << k } }
1966
+
1967
+ g
1968
+ end
1969
+
1970
+ # dataflow optimization
1971
+ # condenses expressions (++x; if (x) => if (++x))
1972
+ # remove local var assignment (x = 1; f(x); x = 2; g(x); => f(1); g(2); etc)
1973
+ def optimize_vars(scope)
1974
+ return if forbid_optimize_dataflow
1975
+
1976
+ g = c_to_graph(scope)
1977
+
1978
+ # walks a cexpr in evaluation order (not strictly, but this is not strictly defined anyway..)
1979
+ # returns the first subexpr to read var in ce
1980
+ # returns :write if var is rewritten
1981
+ # returns nil if var not read
1982
+ # may return a cexpr var += 2
1983
+ find_next_read_ce = lambda { |ce_, var|
1984
+ walk_ce(ce_, true) { |ce|
1985
+ case ce.op
1986
+ when :funcall
1987
+ break ce if ce.lexpr == var or ce.rexpr.find { |a| a == var }
1988
+ when :'='
1989
+ # a=a / a=a+1 => yield a, not :write
1990
+ break ce if ce.rexpr == var
1991
+ break :write if ce.lexpr == var
1992
+ else
1993
+ break ce if ce.lexpr == var or ce.rexpr == var
1994
+ end
1995
+ }
1996
+ }
1997
+
1998
+ # badlabels is a list of labels that may be reached without passing through the first invocation block
1999
+ find_next_read_rec = lambda { |label, idx, var, done, badlabels|
2000
+ next if done.include? label
2001
+ done << label if idx == 0
2002
+
2003
+ idx += 1 while ce = g.exprs[label].to_a[idx] and not ret = find_next_read_ce[ce, var]
2004
+ next ret if ret
2005
+
2006
+ to = g.to_optim[label].to_a.map { |t|
2007
+ break [:split] if badlabels.include? t
2008
+ find_next_read_rec[t, 0, var, done, badlabels]
2009
+ }.compact
2010
+
2011
+ tw = to - [:write]
2012
+ if to.include? :split or tw.length > 1
2013
+ :split
2014
+ elsif tw.length == 1
2015
+ tw.first
2016
+ elsif to.include? :write
2017
+ :write
2018
+ end
2019
+ }
2020
+ # return the previous subexpr reading var with no fwd path to another reading (otherwise split), see loop comment for reason
2021
+ find_next_read = nil
2022
+ find_prev_read_rec = lambda { |label, idx, var, done|
2023
+ next if done.include? label
2024
+ done << label if idx == g.exprs[label].length-1
2025
+
2026
+ idx -= 1 while idx >= 0 and ce = g.exprs[label].to_a[idx] and not ret = find_next_read_ce[ce, var]
2027
+ if ret.kind_of? C::CExpression
2028
+ fwchk = find_next_read[label, idx+1, var]
2029
+ ret = fwchk if not fwchk.kind_of? C::CExpression
2030
+ end
2031
+ next ret if ret
2032
+
2033
+ from = g.from_optim[label].to_a.map { |f|
2034
+ find_prev_read_rec[f, g.exprs[f].to_a.length-1, var, done]
2035
+ }.compact
2036
+
2037
+ next :split if from.include? :split
2038
+ fw = from - [:write]
2039
+ if fw.length == 1
2040
+ fw.first
2041
+ elsif fw.length > 1
2042
+ :split
2043
+ elsif from.include? :write
2044
+ :write
2045
+ end
2046
+ }
2047
+
2048
+ # list of labels reachable without using a label
2049
+ badlab = {}
2050
+ build_badlabel = lambda { |label|
2051
+ next if badlab[label]
2052
+ badlab[label] = []
2053
+ todo = [g.start]
2054
+ while l = todo.pop
2055
+ next if l == label or badlab[label].include? l
2056
+ badlab[label] << l
2057
+ todo.concat g.to_optim[l].to_a
2058
+ end
2059
+ }
2060
+
2061
+ # returns the next subexpr where var is read
2062
+ # returns :write if var is written before being read
2063
+ # returns :split if the codepath splits with both subpath reading or codepath merges with another
2064
+ # returns nil if var is never read
2065
+ # idx is the index of the first cexpr at g.exprs[label] to look at
2066
+ find_next_read = lambda { |label, idx, var|
2067
+ find_next_read_rec[label, idx, var, [], []]
2068
+ }
2069
+ find_prev_read = lambda { |label, idx, var|
2070
+ find_prev_read_rec[label, idx, var, []]
2071
+ }
2072
+ # same as find_next_read, but returns :split if there exist a path from g.start to the read without passing through label
2073
+ find_next_read_bl = lambda { |label, idx, var|
2074
+ build_badlabel[label]
2075
+ find_next_read_rec[label, idx, var, [], badlab[label]]
2076
+ }
2077
+
2078
+ # walk each node, optimize data accesses there
2079
+ # replace no longer useful exprs with CExpr[nil, nil, nil], those are wiped later.
2080
+ g.exprs.each { |label, exprs|
2081
+ next if not g.block[label]
2082
+ i = 0
2083
+ while i < exprs.length
2084
+ e = exprs[i]
2085
+ i += 1
2086
+
2087
+ # TODO x = x + 1 => x += 1 => ++x here, move all other optimizations after (in optim_code)
2088
+ # needs also int & 0xffffffff -> int, *&var etc (decomp_type? optim_type?)
2089
+ if (e.op == :'++' or e.op == :'--') and v = (e.lexpr || e.rexpr) and v.kind_of? C::Variable and
2090
+ scope.symbol[v.name] and not v.type.qualifier.to_a.include? :volatile
2091
+ next if !((pos = :post.to_sym) and (oe = find_next_read_bl[label, i, v]) and oe.kind_of? C::CExpression) and
2092
+ !((pos = :prev.to_sym) and (oe = find_prev_read[label, i-2, v]) and oe.kind_of? C::CExpression)
2093
+ next if oe.op == :& and not oe.lexpr # no &(++eax)
2094
+
2095
+ # merge pre/postincrement into next/prev var usage
2096
+ # find_prev_read must fwd check when it finds something, to avoid
2097
+ # while(x) x++; return x; to be converted to while(x++); return x; (return wrong value)
2098
+ case oe.op
2099
+ when e.op
2100
+ # bla(i--); --i bla(--i); --i ++i; bla(i++) => ignore
2101
+ next if pos == :pre or oe.lexpr
2102
+ # ++i; bla(++i) => bla(i += 2)
2103
+ oe.lexpr = oe.rexpr
2104
+ oe.op = ((oe.op == :'++') ? :'+=' : :'-=')
2105
+ oe.rexpr = C::CExpression[2]
2106
+
2107
+ when :'++', :'--' # opposite of e.op
2108
+ if (pos == :post and not oe.lexpr) or (pos == :pre and not oe.rexpr)
2109
+ # ++i; bla(--i) => bla(i)
2110
+ # bla(i--); ++i => bla(i)
2111
+ oe.op = nil
2112
+ elsif pos == :post
2113
+ # ++i; bla(i--) => bla(i+1)
2114
+ oe.op = ((oe.op == :'++') ? :- : :+)
2115
+ oe.rexpr = C::CExpression[1]
2116
+ elsif pos == :pre
2117
+ # bla(--i); ++i => bla(i-1)
2118
+ oe.lexpr = oe.rexpr
2119
+ oe.op = ((oe.op == :'++') ? :+ : :-)
2120
+ oe.rexpr = C::CExpression[1]
2121
+ end
2122
+ when :'+=', :'-='
2123
+ # TODO i++; i += 4 => i += 5
2124
+ next
2125
+ when *AssignOp
2126
+ next # ++i; i |= 4 => ignore
2127
+ else
2128
+ if pos == :post and v == oe.lexpr; oe.lexpr = C::CExpression[e.op, v]
2129
+ elsif pos == :post and v == oe.rexpr; oe.rexpr = C::CExpression[e.op, v]
2130
+ elsif pos == :prev and v == oe.rexpr; oe.rexpr = C::CExpression[v, e.op]
2131
+ elsif pos == :prev and v == oe.lexpr; oe.lexpr = C::CExpression[v, e.op]
2132
+ else raise 'foobar' # find_dir_read failed
2133
+ end
2134
+ end
2135
+
2136
+ i -= 1
2137
+ exprs.delete_at(i)
2138
+ e.lexpr = e.op = e.rexpr = nil
2139
+
2140
+
2141
+ elsif e.op == :'=' and v = e.lexpr and v.kind_of? C::Variable and scope.symbol[v.name] and
2142
+ not v.type.qualifier.to_a.include? :volatile and not find_next_read_ce[e.rexpr, v]
2143
+
2144
+ # reduce trivial static assignments
2145
+ if (e.rexpr.kind_of? C::CExpression and iv = e.rexpr.reduce(@c_parser) and iv.kind_of? ::Integer) or
2146
+ (e.rexpr.kind_of? C::CExpression and e.rexpr.op == :& and not e.rexpr.lexpr and e.rexpr.lexpr.kind_of? C::Variable) or
2147
+ (e.rexpr.kind_of? C::Variable and e.rexpr.type.kind_of? C::Array)
2148
+ rewritten = false
2149
+ readers = []
2150
+ discard = [e]
2151
+ g.exprs.each { |l, el|
2152
+ el.each_with_index { |ce, ci|
2153
+ if ce_write(ce, v) and [label, i-1] != [l, ci]
2154
+ if ce == e
2155
+ discard << ce
2156
+ else
2157
+ rewritten = true
2158
+ break
2159
+ end
2160
+ elsif ce_read(ce, v)
2161
+ if walk_ce(ce) { |_ce| break true if _ce.op == :& and not _ce.lexpr and _ce.rexpr == v }
2162
+ # i = 2 ; j = &i =!> j = &2
2163
+ rewritten = true
2164
+ break
2165
+ end
2166
+ readers << ce
2167
+ end
2168
+ } if not rewritten
2169
+ }
2170
+ if not rewritten
2171
+ ce_patch(readers, v, C::CExpression[iv || e.rexpr])
2172
+ discard.each { |d| d.lexpr = d.op = d.rexpr = nil }
2173
+ next
2174
+ end
2175
+ end
2176
+
2177
+ case nr = find_next_read[label, i, v]
2178
+ when C::CExpression
2179
+ # read in one place only, try to patch rexpr in there
2180
+ r = e.rexpr
2181
+
2182
+ # must check for conflicts (x = y; y += 1; foo(x) =!> foo(y))
2183
+ # XXX x = a[1]; *(a+1) = 28; foo(x)...
2184
+ isfunc = false
2185
+ depend_vars = []
2186
+ walk_ce(C::CExpression[r]) { |ce|
2187
+ isfunc = true if ce.op == :func and (not ce.lexpr.kind_of? C::Variable or
2188
+ not ce.lexpr.has_attribute('pure')) # XXX is there a C attr for func depending only on staticvars+param ?
2189
+ depend_vars << ce.lexpr if ce.lexpr.kind_of? C::Variable
2190
+ depend_vars << ce.rexpr if ce.rexpr.kind_of? C::Variable and (ce.lexpr or ce.op != :&) # a = &v; v = 12; func(a) => func(&v)
2191
+ depend_vars << ce if ce.lvalue?
2192
+ depend_vars.concat(ce.rexpr.grep(C::Variable)) if ce.rexpr.kind_of? ::Array
2193
+ }
2194
+ depend_vars.uniq!
2195
+
2196
+ # XXX x = 1; if () { x = 2; } foo(x) =!> foo(1) (find_next_read will return this)
2197
+ # we'll just redo a find_next_read like
2198
+ # XXX b = &a; a = 1; *b = 2; foo(a) unhandled & generate bad C
2199
+ l_l = label
2200
+ l_i = i
2201
+ while g.exprs[l_l].to_a.each_with_index { |ce_, n_i|
2202
+ next if n_i < l_i
2203
+ # count occurences of read v in ce_
2204
+ cnt = 0
2205
+ bad = false
2206
+ walk_ce(ce_) { |ce|
2207
+ case ce.op
2208
+ when :funcall
2209
+ bad = true if isfunc
2210
+ ce.rexpr.each { |a| cnt += 1 if a == v }
2211
+ cnt += 1 if ce.lexpr == v
2212
+ when :'='
2213
+ bad = true if depend_vars.include? ce.lexpr
2214
+ cnt += 1 if ce.rexpr == v
2215
+ else
2216
+ bad = true if (ce.op == :'++' or ce.op == :'--') and depend_vars.include? ce.rexpr
2217
+ bad = true if AssignOp.include? ce.op and depend_vars.include? ce.lexpr
2218
+ cnt += 1 if ce.lexpr == v
2219
+ cnt += 1 if ce.rexpr == v
2220
+ end
2221
+ }
2222
+ case cnt
2223
+ when 0
2224
+ break if bad
2225
+ next
2226
+ when 1 # good
2227
+ break if e.complexity > 10 and ce_.complexity > 3 # try to keep the C readable
2228
+ # x = 1; y = x; z = x; => cannot suppress x
2229
+ nr = find_next_read[l_l, n_i+1, v]
2230
+ break if (nr.kind_of? C::CExpression or nr == :split) and not walk_ce(ce_) { |ce| break true if ce.op == :'=' and ce.lexpr == v }
2231
+ else break # a = 1; b = a + a => fail
2232
+ end
2233
+
2234
+ # TODO XXX x = 1; y = x; z = x;
2235
+ res = walk_ce(ce_, true) { |ce|
2236
+ case ce.op
2237
+ when :funcall
2238
+ if ce.rexpr.to_a.each_with_index { |a,i_|
2239
+ next if a != v
2240
+ ce.rexpr[i_] = r
2241
+ break :done
2242
+ } == :done
2243
+ break :done
2244
+ elsif ce.lexpr == v
2245
+ ce.lexpr = r
2246
+ break :done
2247
+ elsif isfunc
2248
+ break :fail
2249
+ end
2250
+ when *AssignOp
2251
+ break :fail if not ce.lexpr and depend_vars.include? ce.rexpr # ++depend
2252
+ if ce.rexpr == v
2253
+ ce.rexpr = r
2254
+ break :done
2255
+ elsif ce.lexpr == v or depend_vars.include? ce.lexpr
2256
+ break :fail
2257
+ end
2258
+ else
2259
+ break :fail if ce.op == :& and not ce.lexpr and ce.rexpr == v
2260
+ if ce.lexpr == v
2261
+ ce.lexpr = r
2262
+ break :done
2263
+ elsif ce.rexpr == v
2264
+ ce_.type = r.type if not ce_.op and ce_.rexpr == v # return (int32)eax
2265
+ ce.rexpr = r
2266
+ break :done
2267
+ end
2268
+ end
2269
+ }
2270
+ case res
2271
+ when :done
2272
+ i -= 1
2273
+ exprs.delete_at(i)
2274
+ e.lexpr = e.op = e.rexpr = nil
2275
+ break
2276
+ when :fail
2277
+ break
2278
+ end
2279
+ }
2280
+ # ignore branches that will never reuse v
2281
+ may_to = g.to_optim[l_l].find_all { |to| find_next_read[to, 0, v].kind_of? C::CExpression }
2282
+ if may_to.length == 1 and to = may_to.first and to != l_l and g.from_optim[to] == [l_l]
2283
+ l_i = 0
2284
+ l_l = to
2285
+ else break
2286
+ end
2287
+ end
2288
+
2289
+ when nil, :write
2290
+ # useless assignment (value never read later)
2291
+ # XXX foo = &bar; bar = 12; baz(*foo)
2292
+ e.replace(C::CExpression[e.rexpr])
2293
+ # remove sideeffectless subexprs
2294
+ loop do
2295
+ case e.op
2296
+ when :funcall, *AssignOp
2297
+ else
2298
+ l = (e.lexpr.kind_of? C::CExpression and sideeffect(e.lexpr))
2299
+ r = (e.rexpr.kind_of? C::CExpression and sideeffect(e.rexpr))
2300
+ if l and r # could split...
2301
+ elsif l
2302
+ e.replace(e.lexpr)
2303
+ next
2304
+ elsif r
2305
+ e.replace(e.rexpr)
2306
+ next
2307
+ else # remove the assignment altogether
2308
+ i -= 1
2309
+ exprs.delete_at(i)
2310
+ e.lexpr = e.op = e.rexpr = nil
2311
+ end
2312
+ end
2313
+ break
2314
+ end
2315
+ end
2316
+ end
2317
+ end
2318
+ }
2319
+
2320
+ # wipe cexprs marked in the previous step
2321
+ walk(scope) { |st|
2322
+ next if not st.kind_of? C::Block
2323
+ st.statements.delete_if { |e| e.kind_of? C::CExpression and not e.lexpr and not e.op and not e.rexpr }
2324
+ }
2325
+
2326
+ # reoptimize cexprs
2327
+ walk_ce(scope, true) { |ce|
2328
+ # redo some simplification that may become available after variable propagation
2329
+ # int8 & 255 => int8
2330
+ if ce.op == :& and ce.lexpr and ce.lexpr.type.integral? and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == (1 << (8*sizeof(ce.lexpr))) - 1
2331
+ ce.replace C::CExpression[ce.lexpr]
2332
+ end
2333
+
2334
+ # int *ptr; *(ptr + 4) => ptr[4]
2335
+ if ce.op == :* and not ce.lexpr and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :+ and var = ce.rexpr.lexpr and var.kind_of? C::Variable and var.type.pointer?
2336
+ ce.lexpr, ce.op, ce.rexpr = ce.rexpr.lexpr, :'[]', ce.rexpr.rexpr
2337
+ end
2338
+
2339
+ # useless casts
2340
+ if not ce.op and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and (ce.rexpr.rexpr.kind_of? C::CExpression or
2341
+ (ce.type.pointer? and ce.rexpr.rexpr == 0 and not ce.type.pointed.untypedef.kind_of? C::Union)) # keep ((struct*)0)->memb
2342
+ ce.rexpr = ce.rexpr.rexpr
2343
+ end
2344
+ if not ce.op and ce.rexpr.kind_of? C::CExpression and (ce.type == ce.rexpr.type or (ce.type.integral? and ce.rexpr.type.integral?))
2345
+ ce.replace ce.rexpr
2346
+ end
2347
+ # useless casts (type)*((oeua)Ptype)
2348
+ if not ce.op and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :* and not ce.rexpr.lexpr and ce.rexpr.rexpr.kind_of? C::CExpression and not ce.rexpr.rexpr.op and
2349
+ p = ce.rexpr.rexpr.rexpr and p.kind_of? C::Typed and p.type.pointer? and ce.type == p.type.pointed
2350
+ ce.op = ce.rexpr.op
2351
+ ce.rexpr = ce.rexpr.rexpr.rexpr
2352
+ end
2353
+ # (a > 0) != 0
2354
+ if ce.op == :'!=' and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 0 and ce.lexpr.kind_of? C::CExpression and
2355
+ [:<, :<=, :>, :>=, :'==', :'!=', :'!'].include? ce.lexpr.op
2356
+ ce.replace ce.lexpr
2357
+ end
2358
+ # a == 0 => !a
2359
+ if ce.op == :== and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 0
2360
+ ce.replace C::CExpression[:'!', ce.lexpr]
2361
+ end
2362
+ # !(int)a => !a
2363
+ if ce.op == :'!' and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? C::CExpression
2364
+ ce.rexpr = ce.rexpr.rexpr
2365
+ end
2366
+ # (int)a < (int)b => a < b TODO uint <-> int
2367
+ if [:<, :<=, :>, :>=].include? ce.op and ce.rexpr.kind_of? C::CExpression and ce.lexpr.kind_of? C::CExpression and not ce.rexpr.op and not ce.lexpr.op and
2368
+ ce.rexpr.rexpr.kind_of? C::CExpression and ce.rexpr.rexpr.type.pointer? and ce.lexpr.rexpr.kind_of? C::CExpression and ce.lexpr.rexpr.type.pointer?
2369
+ ce.rexpr = ce.rexpr.rexpr
2370
+ ce.lexpr = ce.lexpr.rexpr
2371
+ end
2372
+
2373
+ # a & 3 & 1
2374
+ while (ce.op == :& or ce.op == :|) and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? ::Integer and
2375
+ ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == ce.op and ce.lexpr.lexpr and
2376
+ ce.lexpr.rexpr.kind_of? C::CExpression and ce.lexpr.rexpr.rexpr.kind_of? ::Integer
2377
+ ce.lexpr, ce.rexpr.rexpr = ce.lexpr.lexpr, ce.lexpr.rexpr.rexpr.send(ce.op, ce.rexpr.rexpr)
2378
+ end
2379
+
2380
+ # x = x | 4 => x |= 4
2381
+ if ce.op == :'=' and ce.rexpr.kind_of? C::CExpression and [:+, :-, :*, :/, :|, :&, :^, :>>, :<<].include? ce.rexpr.op and ce.rexpr.lexpr == ce.lexpr
2382
+ ce.op = (ce.rexpr.op.to_s + '=').to_sym
2383
+ ce.rexpr = ce.rexpr.rexpr
2384
+ end
2385
+
2386
+ # x += 1 => ++x
2387
+ if (ce.op == :'+=' or ce.op == :'-=') and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 1
2388
+ ce.lexpr, ce.op, ce.rexpr = nil, {:'+=' => :'++', :'-=' => :'--'}[ce.op], ce.lexpr
2389
+ end
2390
+
2391
+ # --x+1 => x--
2392
+ if (ce.op == :+ or ce.op == :-) and ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == {:+ => :'--', :- => :'++'}[ce.op] and
2393
+ ce.lexpr.rexpr and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 1
2394
+ ce.lexpr, ce.op, ce.rexpr = ce.lexpr.rexpr, ce.lexpr.op, nil
2395
+ end
2396
+ }
2397
+ end
2398
+
2399
+ def remove_unreferenced_vars(scope)
2400
+ used = {}
2401
+ walk_ce(scope) { |ce|
2402
+ # remove unreferenced local vars
2403
+ used[ce.rexpr.name] = true if ce.rexpr.kind_of? C::Variable
2404
+ used[ce.lexpr.name] = true if ce.lexpr.kind_of? C::Variable
2405
+ ce.rexpr.each { |v| used[v.name] = true if v.kind_of? C::Variable } if ce.rexpr.kind_of?(::Array)
2406
+ }
2407
+ unused = scope.symbol.keys.find_all { |n| not used[n] }
2408
+ unused.each { |v| scope.symbol[v].add_attribute 'unused' } # fastcall args need it
2409
+ scope.statements.delete_if { |sm| sm.kind_of? C::Declaration and unused.include? sm.var.name }
2410
+ scope.symbol.delete_if { |n, v| unused.include? n }
2411
+ end
2412
+
2413
+ def finalize
2414
+ optimize_global
2415
+ true
2416
+ end
2417
+
2418
+ def optimize_global
2419
+ # check all global vars (pointers to global data)
2420
+ tl = @c_parser.toplevel
2421
+ vars = tl.symbol.keys.find_all { |k| tl.symbol[k].kind_of? C::Variable and not tl.symbol[k].type.kind_of? C::Function }
2422
+ countref = Hash.new(0)
2423
+
2424
+ walk_ce(tl) { |ce|
2425
+ # XXX int foo; void bar() { int foo; } => false negative
2426
+ countref[ce.rexpr.name] += 1 if ce.rexpr.kind_of? C::Variable
2427
+ countref[ce.lexpr.name] += 1 if ce.lexpr.kind_of? C::Variable
2428
+ }
2429
+
2430
+ vars.delete_if { |v| countref[v] == 0 }
2431
+ countref.delete_if { |k, v| not vars.include? k }
2432
+
2433
+ # by default globals are C::Arrays
2434
+ # if all references are *foo, dereference the var type
2435
+ # TODO allow foo to appear (change to &foo) (but still disallow casts/foo+12 etc)
2436
+ countderef = Hash.new(0)
2437
+ walk_ce(tl) { |ce|
2438
+ if ce.op == :* and not ce.lexpr
2439
+ r = ce.rexpr
2440
+ elsif ce.op == :'->'
2441
+ r = C::CExpression[ce.lexpr]
2442
+ else next
2443
+ end
2444
+ # compare type.type cause var is an Array and the cast is a Pointer
2445
+ countderef[r.rexpr.name] += 1 if r.kind_of? C::CExpression and not r.op and r.rexpr.kind_of? C::Variable and
2446
+ sizeof(nil, r.type.type) == sizeof(nil, r.rexpr.type.type) rescue nil
2447
+ }
2448
+ vars.each { |n|
2449
+ if countref[n] == countderef[n]
2450
+ v = tl.symbol[n]
2451
+ target = C::CExpression[:*, [v]]
2452
+ v.type = v.type.type
2453
+ v.initializer = v.initializer.first if v.initializer.kind_of? ::Array
2454
+ walk_ce(tl) { |ce|
2455
+ if ce.op == :'->' and C::CExpression[ce.lexpr] == C::CExpression[v]
2456
+ ce.op = :'.'
2457
+ elsif ce.lexpr == target
2458
+ ce.lexpr = v
2459
+ end
2460
+ ce.rexpr = v if ce.rexpr == target
2461
+ ce.lexpr, ce.op, ce.rexpr = nil, nil, v if ce == target
2462
+ }
2463
+ end
2464
+ }
2465
+
2466
+ # if a global var appears only in one function, make it a static variable
2467
+ tl.statements.each { |st|
2468
+ next if not st.kind_of? C::Declaration or not st.var.type.kind_of? C::Function or not scope = st.var.initializer
2469
+ localcountref = Hash.new(0)
2470
+ walk_ce(scope) { |ce|
2471
+ localcountref[ce.rexpr.name] += 1 if ce.rexpr.kind_of? C::Variable
2472
+ localcountref[ce.lexpr.name] += 1 if ce.lexpr.kind_of? C::Variable
2473
+ }
2474
+
2475
+ vars.delete_if { |n|
2476
+ next if scope.symbol[n]
2477
+ next if localcountref[n] != countref[n]
2478
+ v = tl.symbol.delete(n)
2479
+ tl.statements.delete_if { |d| d.kind_of? C::Declaration and d.var.name == n }
2480
+
2481
+ if countref[n] == 1 and v.initializer.kind_of? C::CExpression and v.initializer.rexpr.kind_of? String
2482
+ walk_ce(scope) { |ce|
2483
+ if ce.rexpr.kind_of? C::Variable and ce.rexpr.name == n
2484
+ if not ce.op
2485
+ ce.replace v.initializer
2486
+ else
2487
+ ce.rexpr = v.initializer
2488
+ end
2489
+ elsif ce.lexpr.kind_of? C::Variable and ce.lexpr.name == n
2490
+ ce.lexpr = v.initializer
2491
+ end
2492
+ }
2493
+ else
2494
+ v.storage = :static
2495
+ scope.symbol[v.name] = v
2496
+ scope.statements.unshift C::Declaration.new(v)
2497
+ end
2498
+
2499
+ true
2500
+ }
2501
+ }
2502
+ end
2503
+
2504
+ # reorder statements to put decl first, move assignments to decl, move args to func prototype
2505
+ def cleanup_var_decl(scope, func)
2506
+ scope.symbol.each_value { |v| v.type = C::BaseType.new(:int) if v.type.void? }
2507
+
2508
+ args = func.type.args
2509
+ decl = []
2510
+ scope.statements.delete_if { |sm|
2511
+ next if not sm.kind_of? C::Declaration
2512
+ if sm.var.stackoff.to_i > 0 and sm.var.name !~ /_a(\d+)$/ # aliased vars: use 1st domain only
2513
+ args << sm.var
2514
+ else
2515
+ decl << sm
2516
+ end
2517
+ true
2518
+ }
2519
+
2520
+ # move trivial affectations to initialiser
2521
+ # XXX a = 1 ; b = a ; a = 2
2522
+ go = true # break from delete_if does not delete..
2523
+ scope.statements.delete_if { |st|
2524
+ if go and st.kind_of? C::CExpression and st.op == :'=' and st.rexpr.kind_of? C::CExpression and not st.rexpr.op and
2525
+ st.rexpr.rexpr.kind_of? ::Integer and st.lexpr.kind_of? C::Variable and scope.symbol[st.lexpr.name]
2526
+ st.lexpr.initializer = st.rexpr
2527
+ else
2528
+ go = false
2529
+ end
2530
+ }
2531
+
2532
+ # reorder declarations
2533
+ scope.statements[0, 0] = decl.sort_by { |sm| [-sm.var.stackoff.to_i, sm.var.name] }
2534
+
2535
+ # ensure arglist has no hole (create&add unreferenced args)
2536
+ func.type.args = []
2537
+ argoff = @c_parser.typesize[:ptr]
2538
+ args.sort_by { |sm| sm.stackoff.to_i }.each { |a|
2539
+ # XXX misalignment ?
2540
+ if not curoff = a.stackoff
2541
+ func.type.args << a # __fastcall
2542
+ next
2543
+ end
2544
+ while curoff > argoff
2545
+ wantarg = C::Variable.new
2546
+ wantarg.name = scope.decompdata[:stackoff_name][argoff] || stackoff_to_varname(argoff)
2547
+ wantarg.type = C::BaseType.new(:int)
2548
+ wantarg.attributes = ['unused']
2549
+ func.type.args << wantarg
2550
+ scope.symbol[wantarg.name] = wantarg
2551
+ argoff += @c_parser.typesize[:ptr]
2552
+ end
2553
+ func.type.args << a
2554
+ argoff += @c_parser.typesize[:ptr]
2555
+ }
2556
+ end
2557
+
2558
+ # rename local variables from subfunc arg names
2559
+ def rename_variables(scope)
2560
+ funcs = []
2561
+ cntrs = []
2562
+ cmpi = []
2563
+
2564
+ walk_ce(scope) { |ce|
2565
+ funcs << ce if ce.op == :funcall
2566
+ cntrs << (ce.lexpr || ce.rexpr) if ce.op == :'++'
2567
+ cmpi << ce.lexpr if [:<, :>, :<=, :>=, :==, :'!='].include? ce.op and ce.rexpr.kind_of? C::CExpression and ce.rexpr.rexpr.kind_of? ::Integer
2568
+ }
2569
+
2570
+ rename = lambda { |var, name|
2571
+ var = var.rexpr if var.kind_of? C::CExpression and not var.op
2572
+ next if not var.kind_of? C::Variable or not scope.symbol[var.name] or not name
2573
+ next if (var.name !~ /^(var|arg)_/ and not var.storage == :register) or not scope.symbol[var.name] or name =~ /^(var|arg)_/
2574
+ s = scope.symbol_ancestors
2575
+ n = name
2576
+ i = 0
2577
+ n = name + "#{i+=1}" while s[n]
2578
+ scope.symbol[n] = scope.symbol.delete(var.name)
2579
+ var.name = n
2580
+ }
2581
+
2582
+ funcs.each { |ce|
2583
+ next if not ce.lexpr.kind_of? C::Variable or not ce.lexpr.type.kind_of? C::Function
2584
+ ce.rexpr.to_a.zip(ce.lexpr.type.args.to_a).each { |a, fa| rename[a, fa.name] if fa }
2585
+ }
2586
+ funcs.each { |ce|
2587
+ next if not ce.lexpr.kind_of? C::Variable or not ce.lexpr.type.kind_of? C::Function
2588
+ ce.rexpr.to_a.zip(ce.lexpr.type.args.to_a).each { |a, fa|
2589
+ next if not a.kind_of? C::CExpression or a.op != :& or a.lexpr
2590
+ next if not fa or not fa.name
2591
+ rename[a.rexpr, fa.name.sub(/^l?p/, '')]
2592
+ }
2593
+ }
2594
+ (cntrs & cmpi).each { |v| rename[v, 'cntr'] }
2595
+ end
2596
+
2597
+ # yield each CExpr member (recursive, allows arrays, order: self(!post), lexpr, rexpr, self(post))
2598
+ # if given a non-CExpr, walks it until it finds a CExpr to yield
2599
+ def walk_ce(ce, post=false, &b)
2600
+ case ce
2601
+ when C::CExpression
2602
+ yield ce if not post
2603
+ walk_ce(ce.lexpr, post, &b)
2604
+ walk_ce(ce.rexpr, post, &b)
2605
+ yield ce if post
2606
+ when ::Array
2607
+ ce.each { |ce_| walk_ce(ce_, post, &b) }
2608
+ when C::Statement
2609
+ case ce
2610
+ when C::Block; walk_ce(ce.statements, post, &b)
2611
+ when C::If
2612
+ walk_ce(ce.test, post, &b)
2613
+ walk_ce(ce.bthen, post, &b)
2614
+ walk_ce(ce.belse, post, &b) if ce.belse
2615
+ when C::While, C::DoWhile
2616
+ walk_ce(ce.test, post, &b)
2617
+ walk_ce(ce.body, post, &b)
2618
+ when C::Return
2619
+ walk_ce(ce.value, post, &b) if ce.value
2620
+ end
2621
+ when C::Declaration
2622
+ walk_ce(ce.var.initializer, post, &b) if ce.var.initializer
2623
+ end
2624
+ nil
2625
+ end
2626
+
2627
+ # yields each statement (recursive)
2628
+ def walk(scope, post=false, &b)
2629
+ case scope
2630
+ when ::Array; scope.each { |s| walk(s, post, &b) }
2631
+ when C::Statement
2632
+ yield scope if not post
2633
+ case scope
2634
+ when C::Block; walk(scope.statements, post, &b)
2635
+ when C::If
2636
+ yield scope.test
2637
+ walk(scope.bthen, post, &b)
2638
+ walk(scope.belse, post, &b) if scope.belse
2639
+ when C::While, C::DoWhile
2640
+ yield scope.test
2641
+ walk(scope.body, post, &b)
2642
+ when C::Return
2643
+ yield scope.value
2644
+ end
2645
+ yield scope if post
2646
+ when C::Declaration
2647
+ walk(scope.var.initializer, post, &b) if scope.var.initializer
2648
+ end
2649
+ end
2650
+
2651
+ # forwards to @c_parser, handles cast to Array (these should not happen btw...)
2652
+ def sizeof(var, type=nil)
2653
+ var, type = nil, var if var.kind_of? C::Type and not type
2654
+ type ||= var.type
2655
+ return @c_parser.typesize[:ptr] if type.kind_of? C::Array and not var.kind_of? C::Variable
2656
+ @c_parser.sizeof(var, type) rescue -1
2657
+ end
2658
+ end
2659
+ end