metasm 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (192) hide show
  1. data/BUGS +11 -0
  2. data/CREDITS +17 -0
  3. data/README +270 -0
  4. data/TODO +114 -0
  5. data/doc/code_organisation.txt +146 -0
  6. data/doc/const_missing.txt +16 -0
  7. data/doc/core_classes.txt +75 -0
  8. data/doc/feature_list.txt +53 -0
  9. data/doc/index.txt +59 -0
  10. data/doc/install_notes.txt +170 -0
  11. data/doc/style.css +3 -0
  12. data/doc/use_cases.txt +18 -0
  13. data/lib/metasm.rb +80 -0
  14. data/lib/metasm/arm.rb +12 -0
  15. data/lib/metasm/arm/debug.rb +39 -0
  16. data/lib/metasm/arm/decode.rb +167 -0
  17. data/lib/metasm/arm/encode.rb +77 -0
  18. data/lib/metasm/arm/main.rb +75 -0
  19. data/lib/metasm/arm/opcodes.rb +177 -0
  20. data/lib/metasm/arm/parse.rb +130 -0
  21. data/lib/metasm/arm/render.rb +55 -0
  22. data/lib/metasm/compile_c.rb +1457 -0
  23. data/lib/metasm/dalvik.rb +8 -0
  24. data/lib/metasm/dalvik/decode.rb +196 -0
  25. data/lib/metasm/dalvik/main.rb +60 -0
  26. data/lib/metasm/dalvik/opcodes.rb +366 -0
  27. data/lib/metasm/decode.rb +213 -0
  28. data/lib/metasm/decompile.rb +2659 -0
  29. data/lib/metasm/disassemble.rb +2068 -0
  30. data/lib/metasm/disassemble_api.rb +1280 -0
  31. data/lib/metasm/dynldr.rb +1329 -0
  32. data/lib/metasm/encode.rb +333 -0
  33. data/lib/metasm/exe_format/a_out.rb +194 -0
  34. data/lib/metasm/exe_format/autoexe.rb +82 -0
  35. data/lib/metasm/exe_format/bflt.rb +189 -0
  36. data/lib/metasm/exe_format/coff.rb +455 -0
  37. data/lib/metasm/exe_format/coff_decode.rb +901 -0
  38. data/lib/metasm/exe_format/coff_encode.rb +1078 -0
  39. data/lib/metasm/exe_format/dex.rb +457 -0
  40. data/lib/metasm/exe_format/dol.rb +145 -0
  41. data/lib/metasm/exe_format/elf.rb +923 -0
  42. data/lib/metasm/exe_format/elf_decode.rb +979 -0
  43. data/lib/metasm/exe_format/elf_encode.rb +1375 -0
  44. data/lib/metasm/exe_format/macho.rb +827 -0
  45. data/lib/metasm/exe_format/main.rb +228 -0
  46. data/lib/metasm/exe_format/mz.rb +164 -0
  47. data/lib/metasm/exe_format/nds.rb +172 -0
  48. data/lib/metasm/exe_format/pe.rb +437 -0
  49. data/lib/metasm/exe_format/serialstruct.rb +246 -0
  50. data/lib/metasm/exe_format/shellcode.rb +114 -0
  51. data/lib/metasm/exe_format/xcoff.rb +167 -0
  52. data/lib/metasm/gui.rb +23 -0
  53. data/lib/metasm/gui/cstruct.rb +373 -0
  54. data/lib/metasm/gui/dasm_coverage.rb +199 -0
  55. data/lib/metasm/gui/dasm_decomp.rb +369 -0
  56. data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
  57. data/lib/metasm/gui/dasm_graph.rb +1354 -0
  58. data/lib/metasm/gui/dasm_hex.rb +543 -0
  59. data/lib/metasm/gui/dasm_listing.rb +599 -0
  60. data/lib/metasm/gui/dasm_main.rb +906 -0
  61. data/lib/metasm/gui/dasm_opcodes.rb +291 -0
  62. data/lib/metasm/gui/debug.rb +1228 -0
  63. data/lib/metasm/gui/gtk.rb +884 -0
  64. data/lib/metasm/gui/qt.rb +495 -0
  65. data/lib/metasm/gui/win32.rb +3004 -0
  66. data/lib/metasm/gui/x11.rb +621 -0
  67. data/lib/metasm/ia32.rb +14 -0
  68. data/lib/metasm/ia32/compile_c.rb +1523 -0
  69. data/lib/metasm/ia32/debug.rb +193 -0
  70. data/lib/metasm/ia32/decode.rb +1167 -0
  71. data/lib/metasm/ia32/decompile.rb +564 -0
  72. data/lib/metasm/ia32/encode.rb +314 -0
  73. data/lib/metasm/ia32/main.rb +233 -0
  74. data/lib/metasm/ia32/opcodes.rb +872 -0
  75. data/lib/metasm/ia32/parse.rb +327 -0
  76. data/lib/metasm/ia32/render.rb +91 -0
  77. data/lib/metasm/main.rb +1193 -0
  78. data/lib/metasm/mips.rb +11 -0
  79. data/lib/metasm/mips/compile_c.rb +7 -0
  80. data/lib/metasm/mips/decode.rb +253 -0
  81. data/lib/metasm/mips/encode.rb +51 -0
  82. data/lib/metasm/mips/main.rb +72 -0
  83. data/lib/metasm/mips/opcodes.rb +443 -0
  84. data/lib/metasm/mips/parse.rb +51 -0
  85. data/lib/metasm/mips/render.rb +43 -0
  86. data/lib/metasm/os/gnu_exports.rb +270 -0
  87. data/lib/metasm/os/linux.rb +1112 -0
  88. data/lib/metasm/os/main.rb +1686 -0
  89. data/lib/metasm/os/remote.rb +527 -0
  90. data/lib/metasm/os/windows.rb +2027 -0
  91. data/lib/metasm/os/windows_exports.rb +745 -0
  92. data/lib/metasm/parse.rb +876 -0
  93. data/lib/metasm/parse_c.rb +3938 -0
  94. data/lib/metasm/pic16c/decode.rb +42 -0
  95. data/lib/metasm/pic16c/main.rb +17 -0
  96. data/lib/metasm/pic16c/opcodes.rb +68 -0
  97. data/lib/metasm/ppc.rb +11 -0
  98. data/lib/metasm/ppc/decode.rb +264 -0
  99. data/lib/metasm/ppc/decompile.rb +251 -0
  100. data/lib/metasm/ppc/encode.rb +51 -0
  101. data/lib/metasm/ppc/main.rb +129 -0
  102. data/lib/metasm/ppc/opcodes.rb +410 -0
  103. data/lib/metasm/ppc/parse.rb +52 -0
  104. data/lib/metasm/preprocessor.rb +1277 -0
  105. data/lib/metasm/render.rb +130 -0
  106. data/lib/metasm/sh4.rb +8 -0
  107. data/lib/metasm/sh4/decode.rb +336 -0
  108. data/lib/metasm/sh4/main.rb +292 -0
  109. data/lib/metasm/sh4/opcodes.rb +381 -0
  110. data/lib/metasm/x86_64.rb +12 -0
  111. data/lib/metasm/x86_64/compile_c.rb +1025 -0
  112. data/lib/metasm/x86_64/debug.rb +59 -0
  113. data/lib/metasm/x86_64/decode.rb +268 -0
  114. data/lib/metasm/x86_64/encode.rb +264 -0
  115. data/lib/metasm/x86_64/main.rb +135 -0
  116. data/lib/metasm/x86_64/opcodes.rb +118 -0
  117. data/lib/metasm/x86_64/parse.rb +68 -0
  118. data/misc/bottleneck.rb +61 -0
  119. data/misc/cheader-findpppath.rb +58 -0
  120. data/misc/hexdiff.rb +74 -0
  121. data/misc/hexdump.rb +55 -0
  122. data/misc/metasm-all.rb +13 -0
  123. data/misc/objdiff.rb +47 -0
  124. data/misc/objscan.rb +40 -0
  125. data/misc/pdfparse.rb +661 -0
  126. data/misc/ppc_pdf2oplist.rb +192 -0
  127. data/misc/tcp_proxy_hex.rb +84 -0
  128. data/misc/txt2html.rb +440 -0
  129. data/samples/a.out.rb +31 -0
  130. data/samples/asmsyntax.rb +77 -0
  131. data/samples/bindiff.rb +555 -0
  132. data/samples/compilation-steps.rb +49 -0
  133. data/samples/cparser_makestackoffset.rb +55 -0
  134. data/samples/dasm-backtrack.rb +38 -0
  135. data/samples/dasmnavig.rb +318 -0
  136. data/samples/dbg-apihook.rb +228 -0
  137. data/samples/dbghelp.rb +143 -0
  138. data/samples/disassemble-gui.rb +102 -0
  139. data/samples/disassemble.rb +133 -0
  140. data/samples/dump_upx.rb +95 -0
  141. data/samples/dynamic_ruby.rb +1929 -0
  142. data/samples/elf_list_needed.rb +46 -0
  143. data/samples/elf_listexports.rb +33 -0
  144. data/samples/elfencode.rb +25 -0
  145. data/samples/exeencode.rb +128 -0
  146. data/samples/factorize-headers-elfimports.rb +77 -0
  147. data/samples/factorize-headers-peimports.rb +109 -0
  148. data/samples/factorize-headers.rb +43 -0
  149. data/samples/gdbclient.rb +583 -0
  150. data/samples/generate_libsigs.rb +102 -0
  151. data/samples/hotfix_gtk_dbg.rb +59 -0
  152. data/samples/install_win_env.rb +78 -0
  153. data/samples/lindebug.rb +924 -0
  154. data/samples/linux_injectsyscall.rb +95 -0
  155. data/samples/machoencode.rb +31 -0
  156. data/samples/metasm-shell.rb +91 -0
  157. data/samples/pe-hook.rb +69 -0
  158. data/samples/pe-ia32-cpuid.rb +203 -0
  159. data/samples/pe-mips.rb +35 -0
  160. data/samples/pe-shutdown.rb +78 -0
  161. data/samples/pe-testrelocs.rb +51 -0
  162. data/samples/pe-testrsrc.rb +24 -0
  163. data/samples/pe_listexports.rb +31 -0
  164. data/samples/peencode.rb +19 -0
  165. data/samples/peldr.rb +494 -0
  166. data/samples/preprocess-flatten.rb +19 -0
  167. data/samples/r0trace.rb +308 -0
  168. data/samples/rubstop.rb +399 -0
  169. data/samples/scan_pt_gnu_stack.rb +54 -0
  170. data/samples/scanpeexports.rb +62 -0
  171. data/samples/shellcode-c.rb +40 -0
  172. data/samples/shellcode-dynlink.rb +146 -0
  173. data/samples/source.asm +34 -0
  174. data/samples/struct_offset.rb +47 -0
  175. data/samples/testpe.rb +32 -0
  176. data/samples/testraw.rb +45 -0
  177. data/samples/win32genloader.rb +132 -0
  178. data/samples/win32hooker-advanced.rb +169 -0
  179. data/samples/win32hooker.rb +96 -0
  180. data/samples/win32livedasm.rb +33 -0
  181. data/samples/win32remotescan.rb +133 -0
  182. data/samples/wintrace.rb +92 -0
  183. data/tests/all.rb +8 -0
  184. data/tests/dasm.rb +39 -0
  185. data/tests/dynldr.rb +35 -0
  186. data/tests/encodeddata.rb +132 -0
  187. data/tests/ia32.rb +82 -0
  188. data/tests/mips.rb +116 -0
  189. data/tests/parse_c.rb +239 -0
  190. data/tests/preprocessor.rb +269 -0
  191. data/tests/x86_64.rb +62 -0
  192. metadata +255 -0
@@ -0,0 +1,52 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/ppc/opcodes'
8
+ require 'metasm/parse'
9
+
10
+ module Metasm
11
+ class PowerPC
12
+ # TODO
13
+ def parse_arg_valid?(op, sym, arg)
14
+ # special case for lw reg, imm32(reg) ? (pseudo-instr, need to convert to 'lui t0, up imm32 ori t0 down imm32 add t0, reg lw reg, 0(t0)
15
+ case sym
16
+ when :rs, :rt, :rd; arg.kind_of? Reg
17
+ when :sa, :i16, :i20, :i26; arg.kind_of? Expression
18
+ when :rs_i16; arg.kind_of? Memref
19
+ when :ft; arg.kind_of? FpReg
20
+ else raise "internal error: mips arg #{sym.inspect}"
21
+ end
22
+ end
23
+
24
+ def parse_argument(pgm)
25
+ pgm.skip_space
26
+ return if not tok = pgm.nexttok
27
+ if tok.type == :string and Reg.s_to_i[tok.raw]
28
+ pgm.readtok
29
+ arg = Reg.new Reg.s_to_i[tok.raw]
30
+ elsif tok.type == :string and FpReg.s_to_i[tok.raw]
31
+ pgm.readtok
32
+ arg = FpReg.new FpReg.s_to_i[tok.raw]
33
+ else
34
+ arg = Expression.parse pgm
35
+ pgm.skip_space
36
+ # check memory indirection: 'off(base reg)' # XXX scaled index ?
37
+ if arg and pgm.nexttok and pgm.nexttok.type == :punct and pgm.nexttok.raw == '('
38
+ pgm.readtok
39
+ pgm.skip_space_eol
40
+ ntok = pgm.readtok
41
+ raise tok, "Invalid base #{ntok}" unless ntok and ntok.type == :string and Reg.s_to_i[ntok.raw]
42
+ base = Reg.new Reg.s_to_i[ntok.raw]
43
+ pgm.skip_space_eol
44
+ ntok = pgm.readtok
45
+ raise tok, "Invalid memory reference, ')' expected" if not ntok or ntok.type != :punct or ntok.raw != ')'
46
+ arg = Memref.new base, arg
47
+ end
48
+ end
49
+ arg
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,1277 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/main'
8
+
9
+
10
+ module Metasm
11
+ # A source text preprocessor (C-like)
12
+ # defines the methods nexttok, readtok and unreadtok
13
+ # they spits out Tokens of type :
14
+ # :string for words (/[a-Z0-9$_]+/)
15
+ # :punct for punctuation (/[.,:*+-]/ etc), any unhandled character
16
+ # :space for space/tabs/comment/\r
17
+ # :eol for newline :space including at least one \n not escaped
18
+ # :quoted for quoted string, tok.raw includes delimiter and all content. tok.value holds the interpreted value (handles \x, \oct, \r etc). 1-line only
19
+ # or nil on end of stream
20
+ # \ at end of line discards a newline, otherwise returns a tok :punct with the \
21
+ # preprocessor directives start with a :punct '#' just after an :eol (so you can have spaces before #), they take a whole line
22
+ # comments are C/C++ style (//...\n or /*...*/), returned as :eol (resp. :space)
23
+ class Preprocessor
24
+ # a token, as returned by the preprocessor
25
+ class Token
26
+ # the token type: :space, :eol, :quoted, :string, :punct, ...
27
+ attr_accessor :type
28
+ # the interpreted value of the token (Integer for an int, etc)
29
+ attr_accessor :value
30
+ # the raw string that gave this token
31
+ attr_accessor :raw
32
+ # a list of token this on is expanded from (Preprocessor macro expansion)
33
+ attr_accessor :expanded_from
34
+
35
+ include Backtrace
36
+
37
+ def initialize(backtrace)
38
+ @backtrace = backtrace
39
+ @value = nil
40
+ @raw = ''
41
+ end
42
+
43
+ # used when doing 'raise tok, "foo"'
44
+ # raises a ParseError, adding backtrace information
45
+ def exception(msg='syntax error')
46
+ msgh = msg.to_s
47
+ if msg
48
+ msgh << ' near '
49
+ expanded_from.to_a.each { |ef| msgh << ef.exception(nil).message << " expanded to \n\t" }
50
+ end
51
+ msgh << ((@raw.length > 35) ? (@raw[0..10] + '<...>' + @raw[-10..-1]).inspect : @raw.inspect)
52
+ msgh << " at " << backtrace_str
53
+ ParseError.new msgh
54
+ end
55
+
56
+ def dup
57
+ n = self.class.new(backtrace)
58
+ n.type = @type
59
+ n.value = @value.kind_of?(String) ? @value.dup : @value
60
+ n.raw = @raw.dup
61
+ n.expanded_from = @expanded_from.dup if defined? @expanded_from
62
+ n
63
+ end
64
+ end
65
+
66
+ # a preprocessor macro
67
+ class Macro
68
+ # the token holding the name used in the macro definition
69
+ attr_accessor :name
70
+ # array of tokens of formal arguments
71
+ attr_accessor :args
72
+ # array of tokens of macro body
73
+ attr_accessor :body
74
+ # bool
75
+ attr_accessor :varargs
76
+
77
+ def initialize(name)
78
+ @name = name
79
+ @body = []
80
+ end
81
+
82
+
83
+ # parses an argument list from the lexer or from a list of tokens
84
+ # modifies the list, returns an array of list of tokens/nil
85
+ # handles nesting
86
+ def self.parse_arglist(lexer, list=nil)
87
+ readtok = lambda { list ? list.shift : lexer.readtok(false) }
88
+ unreadtok = lambda { |t| list ? (list.unshift(t) if t) : lexer.unreadtok(t) }
89
+ tok = nil
90
+ unreadlist = []
91
+ unreadlist << tok while tok = readtok[] and tok.type == :space
92
+ if not tok or tok.type != :punct or tok.raw != '('
93
+ unreadtok[tok]
94
+ unreadlist.reverse_each { |t| unreadtok[t] }
95
+ return nil
96
+ end
97
+ args = []
98
+ # each argument is any token sequence
99
+ # if it includes an '(' then find the matching ')', whatever is inside (handle nesting)
100
+ # arg cannot include ',' in the top-level
101
+ # args are parsed with no macro expansion
102
+ # convert any space/eol sequence to a single space, strips them at begin/end of argument
103
+ loop do
104
+ arg = []
105
+ nest = 0
106
+ loop do
107
+ raise lexer, 'unterminated arg list' if not tok = readtok[]
108
+ case tok.type
109
+ when :eol, :space
110
+ next if arg.last and arg.last.type == :space
111
+ tok = tok.dup
112
+ tok.type = :space
113
+ tok.raw = ' '
114
+ when :punct
115
+ case tok.raw
116
+ when ','; break if nest == 0
117
+ when ')'; break if nest == 0 ; nest -= 1
118
+ when '('; nest += 1
119
+ end
120
+ end
121
+ arg << tok
122
+ end
123
+ arg.pop if arg.last and arg.last.type == :space
124
+ args << arg if not arg.empty? or args.length > 0 or tok.raw != ')'
125
+ break if tok.raw == ')'
126
+ end
127
+ args
128
+ end
129
+
130
+ # applies a preprocessor macro
131
+ # parses arguments if needed
132
+ # macros are lazy
133
+ # fills tokens.expanded_from
134
+ # returns an array of tokens
135
+ def apply(lexer, name, args, list=nil)
136
+ expfrom = name.expanded_from.to_a + [name]
137
+ if args
138
+ # hargs is a hash argname.raw => array of tokens
139
+ hargs = @args.zip(args).inject({}) { |h, (af, ar)| h.update af.raw => ar }
140
+
141
+ if not varargs
142
+ raise name, 'invalid argument count' if args.length != @args.length
143
+ else
144
+ raise name, 'invalid argument count' if args.length < @args.length
145
+ virg = name.dup # concat remaining args in __VA_ARGS__
146
+ virg.type = :punct
147
+ virg.raw = ','
148
+ va = args[@args.length..-1].map { |a| a + [virg.dup] }.flatten
149
+ va.pop
150
+ hargs['__VA_ARGS__'] = va
151
+ end
152
+ else
153
+ hargs = {}
154
+ end
155
+
156
+ res = []
157
+ b = @body.map { |t| t = t.dup ; t.expanded_from = expfrom ; t }
158
+ while t = b.shift
159
+ if a = hargs[t.raw]
160
+ # expand macros
161
+ a = a.dup
162
+ while at = a.shift
163
+ margs = nil
164
+ if at.type == :string and am = lexer.definition[at.raw] and not at.expanded_from.to_a.find { |ef| ef.raw == @name.raw } and
165
+ ((am.args and margs = Macro.parse_arglist(lexer, a)) or not am.args)
166
+ toks = am.apply(lexer, at, margs, a)
167
+ a = toks + a # reroll
168
+ else
169
+ res << at.dup if not res.last or res.last.type != :space or at.type != :space
170
+ end
171
+ end
172
+ elsif t.type == :punct and t.raw == '##'
173
+ # the '##' operator: concat the next token to the last in body
174
+ nil while t = b.shift and t.type == :space
175
+ res.pop while res.last and res.last.type == :space
176
+ if not a = hargs[t.raw]
177
+ a = [t]
178
+ end
179
+ if varargs and t.raw == '__VA_ARGS__' and res.last and res.last.type == :punct and res.last.raw == ','
180
+ if args.length == @args.length # pop last , if no vararg passed # XXX poof(1, 2,) != poof(1, 2)
181
+ res.pop
182
+ else # allow merging with ',' without warning
183
+ res.concat a
184
+ end
185
+ else
186
+ a = a[1..-1] if a.first and a.first.type == :space
187
+ if not res.last or res.last.type != :string or not a.first or a.first.type != :string
188
+ puts name.exception("cannot merge token #{res.last.raw} with #{a.first ? a.first.raw : 'nil'}").message if not a.first or (a.first.raw != '.' and res.last.raw != '.') if $VERBOSE
189
+ res.concat a
190
+ else
191
+ res[-1] = res[-1].dup
192
+ res.last.raw << a.first.raw
193
+ res.concat a[1..-1]
194
+ end
195
+ end
196
+ elsif args and t.type == :punct and t.raw == '#' # map an arg to a qstring
197
+ nil while t = b.shift and t.type == :space
198
+ t.type = :quoted
199
+ t.value = hargs[t.raw].map { |aa| aa.raw }.join
200
+ t.value = t.value[1..-1] if t.value[0] == ?\ # delete leading space
201
+ t.raw = t.value.inspect
202
+ res << t
203
+ else
204
+ res << t
205
+ end
206
+ end
207
+ res
208
+ end
209
+
210
+ # parses the argument list and the body from lexer
211
+ # converts # + # to ## in body
212
+ def parse_definition(lexer)
213
+ varg = nil
214
+ if tok = lexer.readtok_nopp and tok.type == :punct and tok.raw == '('
215
+ @args = []
216
+ loop do
217
+ nil while tok = lexer.readtok_nopp and tok.type == :space
218
+ # check '...'
219
+ if tok and tok.type == :punct and tok.raw == '.'
220
+ t1 = lexer.readtok_nopp
221
+ t2 = lexer.readtok_nopp
222
+ t3 = lexer.readtok_nopp
223
+ t3 = lexer.readtok_nopp while t3 and t3.type == :space
224
+ raise @name, 'booh' if not t1 or t1.type != :punct or t1.raw != '.' or
225
+ not t2 or t2.type != :punct or t2.raw != '.' or
226
+ not t3 or t3.type != :punct or t3.raw != ')'
227
+ @varargs = true
228
+ break
229
+ end
230
+ break if tok and tok.type == :punct and tok.raw == ')' and @args.empty? # allow empty list
231
+ raise @name, 'invalid arg definition' if not tok or tok.type != :string
232
+ @args << tok
233
+ nil while tok = lexer.readtok_nopp and tok.type == :space
234
+ # check '...'
235
+ if tok and tok.type == :punct and tok.raw == '.'
236
+ t1 = lexer.readtok_nopp
237
+ t2 = lexer.readtok_nopp
238
+ t3 = lexer.readtok_nopp
239
+ t3 = lexer.readtok_nopp while t3 and t3.type == :space
240
+ raise @name, 'booh' if not t1 or t1.type != :punct or t1.raw != '.' or
241
+ not t2 or t2.type != :punct or t2.raw != '.' or
242
+ not t3 or t3.type != :punct or t3.raw != ')'
243
+ @varargs = true
244
+ varg = @args.pop.raw
245
+ break
246
+ end
247
+ raise @name, 'invalid arg separator' if not tok or tok.type != :punct or (tok.raw != ')' and tok.raw != ',')
248
+ break if tok.raw == ')'
249
+ end
250
+ else lexer.unreadtok tok
251
+ end
252
+
253
+ nil while tok = lexer.readtok_nopp and tok.type == :space
254
+ lexer.unreadtok tok
255
+
256
+ while tok = lexer.readtok_nopp
257
+ tok = tok.dup
258
+ case tok.type
259
+ when :eol
260
+ lexer.unreadtok tok
261
+ break
262
+ when :space
263
+ next if @body.last and @body.last.type == :space
264
+ tok.raw = ' '
265
+ when :string
266
+ tok.raw = '__VA_ARGS__' if varg and tok.raw == varg
267
+ when :punct
268
+ if tok.raw == '#'
269
+ ntok = lexer.readtok_nopp
270
+ if ntok and ntok.type == :punct and ntok.raw == '#'
271
+ tok.raw << '#'
272
+ else
273
+ lexer.unreadtok ntok
274
+ end
275
+ end
276
+ end
277
+ @body << tok
278
+ end
279
+ @body.pop if @body.last and @body.last.type == :space
280
+
281
+ # check macro is correct
282
+ invalid_body = nil
283
+ if (@body[-1] and @body[-1].raw == '##') or (@body[0] and @body[0].raw == '##')
284
+ invalid_body ||= 'cannot have ## at begin or end of macro body'
285
+ end
286
+ if args
287
+ if @args.map { |a| a.raw }.uniq.length != @args.length
288
+ invalid_body ||= 'duplicate macro parameter'
289
+ end
290
+ @body.each_with_index { |tok_, i|
291
+ if tok_.type == :punct and tok_.raw == '#'
292
+ a = @body[i+1]
293
+ a = @body[i+2] if not a or a.type == :space
294
+ if not a.type == :string or (not @args.find { |aa| aa.raw == a.raw } and (not varargs or a.raw != '__VA_ARGS__'))
295
+ invalid_body ||= 'cannot have # followed by non-argument'
296
+ end
297
+ end
298
+ }
299
+ end
300
+ if invalid_body
301
+ puts "W: #{lexer.filename}:#{lexer.lineno}, in #{@name.raw}: #{invalid_body}" if $VERBOSE
302
+ false
303
+ else
304
+ true
305
+ end
306
+ end
307
+
308
+ def dump(comment = true)
309
+ str = ''
310
+ str << "\n// from #{@name.backtrace[-2, 2] * ':'}\n" if comment
311
+ str << "#define #{@name.raw}"
312
+ if args
313
+ str << '(' << (@args.map { |t| t.raw } + (varargs ? ['...'] : [])).join(', ') << ')'
314
+ end
315
+ str << ' ' << @body.map { |t| t.raw }.join
316
+ end
317
+ end
318
+
319
+ # special object, handles __FILE__ __LINE__ __COUNTER__ __DATE__ __TIME__ macros
320
+ class SpecialMacro
321
+ def args ; end
322
+ def body ; [@name] end
323
+
324
+ attr_accessor :name
325
+ def initialize(raw)
326
+ @name = Token.new(nil)
327
+ @name.type = :string
328
+ @name.raw = raw
329
+ end
330
+
331
+ def apply(lexer, name, emptyarglist, toklist=nil)
332
+ tok = @name.dup
333
+ tok.expanded_from = name.expanded_from.to_a + [name]
334
+ case @name.raw
335
+ when '__FILE__', '__DATE__', '__TIME__' # returns a :quoted
336
+ tok.type = :quoted
337
+ tok.value = \
338
+ case @name.raw
339
+ when '__FILE__'
340
+ name = name.expanded_from.first if name.expanded_from
341
+ name.backtrace.to_a[-2].to_s
342
+ when '__DATE__'; Time.now.strftime('%b %e %Y')
343
+ when '__TIME__'; Time.now.strftime('%H:%M:%S')
344
+ end
345
+ tok.raw = tok.value.inspect
346
+ when '__LINE__', '__COUNTER__' # returns a :string
347
+ tok.type = :string
348
+ case @name.raw
349
+ when '__LINE__'
350
+ name = name.expanded_from.first if name.expanded_from
351
+ tok.value = name.backtrace.to_a[-1]
352
+ when '__COUNTER__'
353
+ tok.value = @counter ||= 0
354
+ @counter += 1
355
+ end
356
+ tok.raw = tok.value.to_s
357
+ else raise name, 'internal error'
358
+ end
359
+ [tok]
360
+ end
361
+ end
362
+
363
+ # the raw string we're reading
364
+ attr_accessor :text, :pos
365
+ # the backtrace information for current file
366
+ attr_accessor :filename, :lineno
367
+ # the unreadtok queue
368
+ attr_accessor :queue
369
+ # the backtrace (array of previous [filename, lineno, text, pos] that #included us)
370
+ attr_accessor :backtrace
371
+ # a hash of macro definitions: macro name => [macro def tok, [macro args tok], [macro body toks]]
372
+ attr_accessor :definition
373
+ # array of directories to search for #included <files>
374
+ attr_accessor :include_search_path
375
+ # a Proc called for unhandled #pragma occurences
376
+ # takes the pragma 1st tok as arg, must unread the final :eol, should fallback to the previous callback
377
+ attr_accessor :pragma_callback
378
+ # hash filename => file content
379
+ attr_accessor :hooked_include
380
+ attr_accessor :warn_redefinition
381
+
382
+ # global default search directory for #included <files>
383
+ @@include_search_path = ['/usr/include']
384
+ def self.include_search_path ; @@include_search_path end
385
+ def self.include_search_path=(np) @@include_search_path=np end
386
+
387
+ def initialize(text='')
388
+ @queue = []
389
+ @backtrace = []
390
+ @definition = %w[__FILE__ __LINE__ __COUNTER__ __DATE__ __TIME__].inject({}) { |h, n| h.update n => SpecialMacro.new(n) }
391
+ @include_search_path = @@include_search_path.dup
392
+ # stack of :accept/:discard/:discard_all/:testing, represents the current nesting of #if..#endif
393
+ @ifelse_nesting = []
394
+ @text = text
395
+ @pos = 0
396
+ @filename = 'unknown'
397
+ @lineno = 1
398
+ @warn_redefinition = true
399
+ @hooked_include = {}
400
+ @pragma_once = {}
401
+ @pragma_callback = lambda { |otok|
402
+ tok = otok
403
+ str = tok.raw.dup
404
+ str << tok.raw while tok = readtok and tok.type != :eol
405
+ unreadtok tok
406
+ puts otok.exception("unhandled pragma #{str.inspect}").message if $VERBOSE
407
+ }
408
+ define '__METASM__', VERSION
409
+ end
410
+
411
+ def exception(msg='syntax error')
412
+ backtrace_str = Backtrace.backtrace_str([@filename, @lineno] + @backtrace.map { |f, l, *a| [f, l] }.flatten)
413
+ ParseError.new "at #{backtrace_str}: #{msg}"
414
+ end
415
+
416
+ # returns the preprocessed content
417
+ def dump
418
+ ret = ''
419
+ neol = 0
420
+ while not eos?
421
+ t = readtok
422
+ case t.type
423
+ when :space; ret << ' '
424
+ when :eol; ret << "\n" if (neol += 1) <= 2
425
+ when :quoted; neol = 0 ; ret << t.raw # keep quoted style
426
+ else neol = 0 ; ret << (t.value || t.raw).to_s
427
+ end
428
+ end
429
+ ret
430
+ end
431
+
432
+ attr_accessor :traced_macros
433
+ # preprocess text, and retrieve all macros defined in #included <files> and used in the text
434
+ # returns a C source-like string
435
+ def self.factorize(text, comment=false)
436
+ p = new(text)
437
+ p.traced_macros = []
438
+ p.readtok while not p.eos?
439
+ p.dump_macros(p.traced_macros, comment)
440
+ end
441
+
442
+ # dumps the definition of the macros whose name is in the list + their dependencies
443
+ # returns one big C-style source string
444
+ def dump_macros(list, comment = true)
445
+ depend = {}
446
+ # build dependency graph (we can output macros in any order, but it's more human-readable)
447
+ walk = lambda { |mname|
448
+ depend[mname] ||= []
449
+ @definition[mname].body.each { |t|
450
+ name = t.raw
451
+ if @definition[name]
452
+ depend[mname] << name
453
+ if not depend[name]
454
+ depend[name] = []
455
+ walk[name]
456
+ end
457
+ end
458
+ }
459
+ }
460
+ list.each { |mname| walk[mname] }
461
+
462
+ res = []
463
+ while not depend.empty?
464
+ todo_now = depend.keys.find_all { |k| (depend[k] - [k]).empty? }
465
+ if todo_now.empty?
466
+ dep_cycle = lambda { |ary|
467
+ deps = depend[ary.last]
468
+ if deps.include? ary.first; ary
469
+ elsif (deps-ary).find { |d| deps = dep_cycle[ary + [d]] }; deps
470
+ end
471
+ }
472
+ if not depend.find { |k, dep| todo_now = dep_cycle[[k]] }
473
+ todo_now = depend.keys
474
+ end
475
+ end
476
+ todo_now.sort.each { |k|
477
+ res << @definition[k].dump(comment) if @definition[k].kind_of? Macro
478
+ depend.delete k
479
+ }
480
+ depend.each_key { |k| depend[k] -= todo_now }
481
+ end
482
+ res.join("\n")
483
+ end
484
+
485
+ # starts a new lexer, with the specified initial filename/line number (for backtraces)
486
+ def feed(text, filename='unknown', lineno=1)
487
+ raise self, 'cannot start new text, did not finish current source' if not eos?
488
+ feed!(text, filename, lineno)
489
+ end
490
+
491
+ # starts a new lexer, with the specified initial filename/line number (for backtraces)
492
+ # discards old text/whatever
493
+ def feed!(text, filename='unknown', lineno=1)
494
+ raise ArgumentError, 'need something to parse!' if not text
495
+ @text = text
496
+ # @filename[-1] used in trace_macros to distinguish generic/specific files
497
+ @filename = "\"#{filename}\""
498
+ @lineno = lineno
499
+ @pos = 0
500
+ @queue = []
501
+ self
502
+ end
503
+
504
+ # calls #feed on the content of the file
505
+ def feed_file(filename)
506
+ feed(File.read(filename), filename)
507
+ end
508
+
509
+ Trigraph = { ?= => ?#, ?) => ?], ?! => ?|,
510
+ ?( => ?[, ?' => ?^, ?> => ?},
511
+ ?/ => ?\\,?< => ?{, ?- => ?~ }
512
+
513
+ # reads one character from self.text
514
+ # updates self.lineno
515
+ # handles trigraphs and \-continued lines
516
+ def getchar
517
+ @ungetcharpos = @pos
518
+ @ungetcharlineno = @lineno
519
+ c = @text[@pos]
520
+ @pos += 1
521
+
522
+ # check trigraph
523
+ if c == ?? and @text[@pos] == ?? and Trigraph[@text[@pos+1]]
524
+ puts "can i has trigraf plox ??#{c.chr} (#@filename:#@lineno)" if $VERBOSE
525
+ c = Trigraph[@text[@pos+1]]
526
+ @pos += 2
527
+ end
528
+
529
+ # check line continuation
530
+ # TODO portability
531
+ if c == ?\\ and (@text[@pos] == ?\n or (@text[@pos] == ?\r and @text[@pos+1] == ?\n))
532
+ @lineno += 1
533
+ @pos += 1 if @text[@pos] == ?\r
534
+ @pos += 1
535
+ return getchar
536
+ end
537
+
538
+ if c == ?\r and @text[@pos] == ?\n
539
+ @pos += 1
540
+ c = ?\n
541
+ end
542
+
543
+ # update lineno
544
+ if c == ?\n
545
+ @lineno += 1
546
+ end
547
+
548
+ c
549
+ end
550
+
551
+ def ungetchar
552
+ @pos = @ungetcharpos
553
+ @lineno = @ungetcharlineno
554
+ nil
555
+ end
556
+
557
+ # returns true if no more data is available
558
+ def eos?
559
+ @pos >= @text.length and @queue.empty? and @backtrace.empty?
560
+ end
561
+
562
+ # push back a token, will be returned on the next readtok
563
+ # lifo
564
+ def unreadtok(tok)
565
+ @queue << tok if tok
566
+ nil
567
+ end
568
+
569
+ # calls readtok_nopp and handles preprocessor directives
570
+ def readtok(expand_macros = true)
571
+ lastpos = @pos
572
+ tok = readtok_nopp
573
+
574
+ if not tok
575
+ # end of file: resume parent
576
+ if not @backtrace.empty?
577
+ raise ParseError, "parse error in #@filename: unmatched #if/#endif" if @backtrace.last.pop != @ifelse_nesting.length
578
+ @filename, @lineno, @text, @pos, @queue = @backtrace.pop
579
+ tok = readtok
580
+ end
581
+
582
+ elsif (tok.type == :eol or lastpos == 0) and @ifelse_nesting.last != :testing
583
+ unreadtok tok if lastpos == 0
584
+ # detect preprocessor directive
585
+ # state = 1 => seen :eol, 2 => seen #
586
+ pretok = []
587
+ rewind = true
588
+ state = 1
589
+ loop do
590
+ pretok << (ntok = readtok_nopp)
591
+ break if not ntok
592
+ if ntok.type == :space # nothing
593
+ elsif state == 1 and ntok.type == :punct and ntok.raw == '#' and not ntok.expanded_from
594
+ state = 2
595
+ elsif state == 2 and ntok.type == :string and not ntok.expanded_from
596
+ rewind = false if preprocessor_directive(ntok)
597
+ break
598
+ else break
599
+ end
600
+ end
601
+ if rewind
602
+ # false alarm: revert
603
+ pretok.reverse_each { |t| unreadtok t }
604
+ end
605
+ tok = readtok if lastpos == 0 # else return the :eol
606
+
607
+ elsif expand_macros and tok.type == :string and m = @definition[tok.raw] and not tok.expanded_from.to_a.find { |ef| ef.raw == m.name.raw } and
608
+ ((m.args and margs = Macro.parse_arglist(self)) or not m.args)
609
+
610
+ if defined? @traced_macros and tok.backtrace[-2].to_s[0] == ?" and m.name and m.name.backtrace[-2].to_s[0] == ?<
611
+ @traced_macros |= [tok.raw] # we are in a normal file and expand to an header-defined macro
612
+ end
613
+
614
+ m.apply(self, tok, margs).reverse_each { |t| unreadtok t }
615
+
616
+ tok = readtok
617
+ end
618
+
619
+ tok
620
+ end
621
+
622
+ # read and return the next token
623
+ # parses quoted strings (set tok.value) and C/C++ comments (:space/:eol)
624
+ def readtok_nopp
625
+ return @queue.pop unless @queue.empty?
626
+
627
+ nbt = []
628
+ @backtrace.each { |bt| nbt << bt[0] << bt[1] }
629
+ tok = Token.new(nbt << @filename << @lineno)
630
+
631
+ case c = getchar
632
+ when nil
633
+ return nil
634
+ when ?', ?"
635
+ # read quoted string value
636
+ readtok_nopp_str(tok, c)
637
+ when ?a..?z, ?A..?Z, ?0..?9, ?$, ?_
638
+ tok.type = :string
639
+ raw = tok.raw << c
640
+ loop do
641
+ case c = getchar
642
+ when nil; ungetchar; break # avoids 'no method "coerce" for nil' warning
643
+ when ?a..?z, ?A..?Z, ?0..?9, ?$, ?_
644
+ raw << c
645
+ else ungetchar; break
646
+ end
647
+ end
648
+
649
+ when ?\ , ?\t, ?\r, ?\n, ?\f
650
+ tok.type = ((c == ?\ || c == ?\t) ? :space : :eol)
651
+ raw = tok.raw << c
652
+ loop do
653
+ case c = getchar
654
+ when nil; break
655
+ when ?\ , ?\t
656
+ when ?\n, ?\f, ?\r; tok.type = :eol
657
+ else break
658
+ end
659
+ raw << c
660
+ end
661
+ ungetchar
662
+
663
+ when ?/
664
+ raw = tok.raw << c
665
+ # comment
666
+ case c = getchar
667
+ when ?/
668
+ # till eol
669
+ tok.type = :eol
670
+ raw << c
671
+ while c = getchar
672
+ raw << c
673
+ break if c == ?\n
674
+ end
675
+ when ?*
676
+ tok.type = :space
677
+ raw << c
678
+ seenstar = false
679
+ loop do
680
+ raise tok, 'unterminated c++ comment' if not c = getchar
681
+ raw << c
682
+ case c
683
+ when ?*; seenstar = true
684
+ when ?/; break if seenstar # no need to reset seenstar, already false
685
+ else seenstar = false
686
+ end
687
+ end
688
+ else
689
+ # just a slash
690
+ ungetchar
691
+ tok.type = :punct
692
+ end
693
+
694
+ else
695
+ tok.type = :punct
696
+ tok.raw << c
697
+ end
698
+
699
+ tok
700
+ end
701
+
702
+ # we just read a ' or a ", read until the end of the string
703
+ # tok.value will contain the raw string (with escapes interpreted etc)
704
+ def readtok_nopp_str(tok, delimiter)
705
+ tok.type = :quoted
706
+ tok.raw << delimiter
707
+ tok.value = ''
708
+ c = nil
709
+ loop do
710
+ raise tok, 'unterminated string' if not c = getchar
711
+ tok.raw << c
712
+ case c
713
+ when delimiter; break
714
+ when ?\\
715
+ raise tok, 'unterminated escape' if not c = getchar
716
+ tok.raw << c
717
+ tok.value << \
718
+ case c
719
+ when ?n; ?\n
720
+ when ?r; ?\r
721
+ when ?t; ?\t
722
+ when ?a; ?\a
723
+ when ?b; ?\b
724
+ when ?v; ?\v
725
+ when ?f; ?\f
726
+ when ?e; ?\e
727
+ when ?#, ?\\, ?', ?"; c
728
+ when ?\n; '' # already handled by getchar
729
+ when ?x;
730
+ hex = ''
731
+ while hex.length < 2
732
+ raise tok, 'unterminated escape' if not c = getchar
733
+ case c
734
+ when ?0..?9, ?a..?f, ?A..?F
735
+ else ungetchar; break
736
+ end
737
+ hex << c
738
+ tok.raw << c
739
+ end
740
+ raise tok, 'unterminated escape' if hex.empty?
741
+ hex.hex
742
+ when ?0..?7;
743
+ oct = '' << c
744
+ while oct.length < 3
745
+ raise tok, 'unterminated escape' if not c = getchar
746
+ case c
747
+ when ?0..?7
748
+ else ungetchar; break
749
+ end
750
+ oct << c
751
+ tok.raw << c
752
+ end
753
+ oct.oct
754
+ else c # raise tok, 'unknown escape sequence'
755
+ end
756
+ when ?\n; ungetchar ; raise tok, 'unterminated string'
757
+ else tok.value << c
758
+ end
759
+ end
760
+
761
+ tok
762
+ end
763
+
764
+
765
+ # defines a simple preprocessor macro (expands to 0 or 1 token)
766
+ # does not check overwriting
767
+ def define(name, value=nil, from=caller.first)
768
+ from =~ /^(.*?):(\d+)/
769
+ btfile, btlineno = $1, $2.to_i
770
+ t = Token.new([btfile, btlineno])
771
+ t.type = :string
772
+ t.raw = name.dup
773
+ @definition[name] = Macro.new(t)
774
+ if value.kind_of? ::String and eos?
775
+ feed(value, btfile, btlineno)
776
+ @definition[name].body << readtok until eos?
777
+ elsif value # XXX won't split multi-token defs..
778
+ t = Token.new([btfile, btlineno])
779
+ t.type = :string
780
+ t.raw = value.to_s
781
+ @definition[name].body << t
782
+ end
783
+ end
784
+
785
+ # defines a pp constant if it is not already defined
786
+ def define_weak(name, value=nil, from=caller.first)
787
+ define(name, value, from) if not @definition[name]
788
+ end
789
+
790
+ # defines a pp constant so that later #define/#undef will be ignored
791
+ def define_strong(name, value=nil, from=caller.first)
792
+ (@defined_strong ||= []) << name
793
+ define(name, value, from)
794
+ end
795
+
796
+ # does not define name, and prevent it from being defined later
797
+ def nodefine_strong(name)
798
+ (@defined_strong ||= []) << name
799
+ end
800
+
801
+ # handles #directives
802
+ # returns true if the command is valid
803
+ # second parameter for internal use
804
+ def preprocessor_directive(cmd, ocmd = cmd)
805
+ # read spaces, returns the next token
806
+ # XXX for all commands that may change @ifelse_nesting, ensure last element is :testing to disallow any other preprocessor directive to be run in a bad environment (while looking ahead)
807
+ skipspc = lambda {
808
+ loop do
809
+ tok = readtok_nopp
810
+ break tok if not tok or tok.type != :space
811
+ end
812
+ }
813
+
814
+ # XXX do not preprocess tokens when searching for :eol, it will trigger preprocessor directive detection from readtok
815
+
816
+ eol = tok = nil
817
+ case cmd.raw
818
+ when 'if'
819
+ case @ifelse_nesting.last
820
+ when :accept, nil
821
+ @ifelse_nesting << :testing
822
+ raise cmd, 'expr expected' if not test = PPExpression.parse(self)
823
+ eol = skipspc[]
824
+ raise eol, 'pp syntax error' if eol and eol.type != :eol
825
+ unreadtok eol
826
+ case test.reduce
827
+ when 0; @ifelse_nesting[-1] = :discard
828
+ when Integer; @ifelse_nesting[-1] = :accept
829
+ else @ifelse_nesting[-1] = :discard
830
+ end
831
+ when :discard, :discard_all
832
+ @ifelse_nesting << :discard_all
833
+ end
834
+
835
+ when 'ifdef'
836
+ case @ifelse_nesting.last
837
+ when :accept, nil
838
+ @ifelse_nesting << :testing
839
+ raise eol || tok || cmd, 'pp syntax error' if not tok = skipspc[] or tok.type != :string or (eol = skipspc[] and eol.type != :eol)
840
+ unreadtok eol
841
+ @ifelse_nesting[-1] = (@definition[tok.raw] ? :accept : :discard)
842
+ when :discard, :discard_all
843
+ @ifelse_nesting << :discard_all
844
+ end
845
+
846
+ when 'ifndef'
847
+ case @ifelse_nesting.last
848
+ when :accept, nil
849
+ @ifelse_nesting << :testing
850
+ raise eol || tok || cmd, 'pp syntax error' if not tok = skipspc[] or tok.type != :string or (eol = skipspc[] and eol.type != :eol)
851
+ unreadtok eol
852
+ @ifelse_nesting[-1] = (@definition[tok.raw] ? :discard : :accept)
853
+ when :discard, :discard_all
854
+ @ifelse_nesting << :discard_all
855
+ end
856
+
857
+ when 'elif'
858
+ case @ifelse_nesting.last
859
+ when :accept
860
+ @ifelse_nesting[-1] = :discard_all
861
+ when :discard
862
+ @ifelse_nesting[-1] = :testing
863
+ raise cmd, 'expr expected' if not test = PPExpression.parse(self)
864
+ raise eol, 'pp syntax error' if eol = skipspc[] and eol.type != :eol
865
+ unreadtok eol
866
+ case test.reduce
867
+ when 0; @ifelse_nesting[-1] = :discard
868
+ when Integer; @ifelse_nesting[-1] = :accept
869
+ else @ifelse_nesting[-1] = :discard
870
+ end
871
+ when :discard_all
872
+ else raise cmd, 'pp syntax error'
873
+ end
874
+
875
+ when 'else'
876
+ @ifelse_nesting << :testing
877
+ @ifelse_nesting.pop
878
+ raise eol || cmd, 'pp syntax error' if @ifelse_nesting.empty? or (eol = skipspc[] and eol.type != :eol)
879
+ unreadtok eol
880
+ case @ifelse_nesting.last
881
+ when :accept
882
+ @ifelse_nesting[-1] = :discard_all
883
+ when :discard
884
+ @ifelse_nesting[-1] = :accept
885
+ when :discard_all
886
+ end
887
+
888
+ when 'endif'
889
+ @ifelse_nesting << :testing
890
+ @ifelse_nesting.pop
891
+ raise eol || cmd, 'pp syntax error' if @ifelse_nesting.empty? or (eol = skipspc[] and eol.type != :eol)
892
+ unreadtok eol
893
+ @ifelse_nesting.pop
894
+
895
+ when 'define'
896
+ return if @ifelse_nesting.last and @ifelse_nesting.last != :accept
897
+
898
+ raise tok || cmd, 'pp syntax error' if not tok = skipspc[] or tok.type != :string
899
+ m = Macro.new(tok)
900
+ valid = m.parse_definition(self)
901
+ if not defined? @defined_strong or not @defined_strong.include? tok.raw
902
+ puts "W: pp: redefinition of #{tok.raw} at #{tok.backtrace_str},\n prev def at #{@definition[tok.raw].name.backtrace_str}" if @definition[tok.raw] and $VERBOSE and @warn_redefinition
903
+ @definition[tok.raw] = m if valid
904
+ end
905
+
906
+ when 'undef'
907
+ return if @ifelse_nesting.last and @ifelse_nesting.last != :accept
908
+
909
+ raise eol || tok || cmd, 'pp syntax error' if not tok = skipspc[] or tok.type != :string or (eol = skipspc[] and eol.type != :eol)
910
+ if not defined? @defined_strong or not @defined_strong.include? tok.raw
911
+ @definition.delete tok.raw
912
+ unreadtok eol
913
+ end
914
+
915
+ when 'include', 'include_next'
916
+ return if @ifelse_nesting.last and @ifelse_nesting.last != :accept
917
+
918
+ directive_include(cmd, skipspc)
919
+
920
+ when 'error', 'warning'
921
+ return if @ifelse_nesting.last and @ifelse_nesting.last != :accept
922
+ msg = ''
923
+ while tok = readtok_nopp and tok.type != :eol
924
+ msg << tok.raw
925
+ end
926
+ unreadtok tok
927
+ if cmd.raw == 'warning'
928
+ puts cmd.exception("#warning#{msg}").message if $VERBOSE
929
+ else
930
+ raise cmd, "#error#{msg}"
931
+ end
932
+
933
+ when 'line'
934
+ return if @ifelse_nesting.last and @ifelse_nesting.last != :accept
935
+
936
+ raise tok || cmd if not tok = skipspc[] or tok.type != :string
937
+ @lineno = Integer(tok.raw) rescue raise(tok, 'bad line number')
938
+ raise eol if eol = skipspc[] and eol.type != :eol
939
+ unreadtok eol
940
+
941
+ when 'pragma'
942
+ return if @ifelse_nesting.last and @ifelse_nesting.last != :accept
943
+
944
+ directive_pragma(cmd, skipspc)
945
+
946
+ else return false
947
+ end
948
+
949
+ # skip #ifndef'd parts of the source
950
+ state = 1 # just seen :eol
951
+ while @ifelse_nesting.last == :discard or @ifelse_nesting.last == :discard_all
952
+ begin
953
+ tok = skipspc[]
954
+ rescue ParseError
955
+ # react as gcc -E: <"> unterminated in #if 0 => ok, </*> unterminated => error (the " will fail at eol)
956
+ retry
957
+ end
958
+
959
+ if not tok; raise ocmd, 'pp unterminated conditional'
960
+ elsif tok.type == :eol; state = 1
961
+ elsif state == 1 and tok.type == :punct and tok.raw == '#'; state = 2
962
+ elsif state == 2 and tok.type == :string; state = preprocessor_directive(tok, ocmd) ? 1 : 0
963
+ else state = 0
964
+ end
965
+ end
966
+
967
+ true
968
+ end
969
+
970
+ # handles the '#include' directive, which will insert a new file content in the token stream
971
+ def directive_include(cmd, skipspc)
972
+ raise cmd, 'nested too deeply' if backtrace.length > 200 # gcc
973
+
974
+ # allow preprocessing
975
+ nil while tok = readtok and tok.type == :space
976
+ raise tok || cmd, 'pp syntax error' if not tok or (tok.type != :quoted and (tok.type != :punct or tok.raw != '<'))
977
+ if tok.type == :quoted
978
+ ipath = tok.value
979
+ if @filename[0] == ?< or @backtrace.find { |btf, *a| btf[0] == ?< }
980
+ # XXX local include from a std include... (kikoo windows.h !)
981
+ path = nil
982
+ if not @include_search_path.find { |d| ::File.exist?(path = ::File.join(d, ipath)) } ||
983
+ @include_search_path.find { |d| path = file_exist_nocase(::File.join(d, ipath)) } ||
984
+ path = file_exist_nocase(::File.join(::File.dirname(@filename[1..-2]), ipath))
985
+ path = nil
986
+ end
987
+ elsif ipath[0] != ?/
988
+ path = ::File.join(::File.dirname(@filename[1..-2]), ipath) if ipath[0] != ?/
989
+ path = file_exist_nocase(path || ipath) if not ::File.exist?(path || ipath)
990
+ else
991
+ path = ipath
992
+ path = file_exist_nocase(path) if not ::File.exist? path
993
+ end
994
+ else
995
+ # no more preprocessing : allow comments/multiple space/etc
996
+ ipath = ''
997
+ while tok = readtok_nopp and (tok.type != :punct or tok.raw != '>')
998
+ raise cmd, 'syntax error' if tok.type == :eol
999
+ ipath << tok.raw
1000
+ end
1001
+ raise cmd, 'pp syntax error, unterminated path' if not tok
1002
+ if ipath[0] != ?/
1003
+ path = nil
1004
+ isp = @include_search_path
1005
+ if cmd.raw == 'include_next'
1006
+ raise self, 'include_next sux' if not idx = isp.find { |d| @filename[1, d.length] == d }
1007
+ isp = isp[isp.index(idx)+1..-1]
1008
+ end
1009
+ if not isp.find { |d| ::File.exist?(path = ::File.join(d, ipath)) } ||
1010
+ isp.find { |d| path = file_exist_nocase(::File.join(d, ipath)) }
1011
+ path = nil
1012
+ end
1013
+ end
1014
+ end
1015
+ eol = nil
1016
+ raise eol if eol = skipspc[] and eol.type != :eol
1017
+ unreadtok eol
1018
+ return if cmd.raw == 'include_next' and not path and not @hooked_include[ipath] # XXX
1019
+
1020
+ if not @pragma_once[path || ipath]
1021
+ @backtrace << [@filename, @lineno, @text, @pos, @queue, @ifelse_nesting.length]
1022
+
1023
+ # gcc-style autodetect
1024
+ # XXX the headers we already parsed may have needed a prepare_gcc...
1025
+ # maybe restart parsing ?
1026
+ if ipath == 'stddef.h' and not path and not @hooked_include[ipath]
1027
+ tk = tok.dup
1028
+ tk.raw = 'prepare_gcc'
1029
+ @pragma_callback[tk]
1030
+ if @hooked_include[ipath]
1031
+ puts "metasm pp: autodetected gcc-style headers" if $VERBOSE
1032
+ end
1033
+ end
1034
+
1035
+ if @hooked_include[ipath]
1036
+ path = '<hooked>/'+ipath
1037
+ puts "metasm preprocessor: including #{path}" if $DEBUG
1038
+ @text = @hooked_include[ipath]
1039
+ else
1040
+ puts "metasm preprocessor: including #{path}" if $DEBUG
1041
+ raise cmd, "No such file or directory #{ipath.inspect}" if not path or not ::File.exist? path
1042
+ raise cmd, 'filename too long' if path.length > 4096 # gcc
1043
+ @text = ::File.read(path)
1044
+ end
1045
+
1046
+ # @filename[-1] used in trace_macros to distinguish generic/specific files
1047
+ if tok.type == :quoted
1048
+ @filename = '"' + path + '"'
1049
+ else
1050
+ @filename = '<' + path + '>'
1051
+ end
1052
+ @lineno = 1
1053
+ @pos = 0
1054
+ @queue = []
1055
+ else
1056
+ puts "metasm preprocessor: not reincluding #{path} (pragma once)" if $DEBUG
1057
+ end
1058
+ end
1059
+
1060
+ # checks if a file exists
1061
+ # search for case-insensitive variants of the path
1062
+ # returns the match if found, or nil
1063
+ def file_exist_nocase(name)
1064
+ componants = name.tr('\\', '/').split('/')
1065
+ if componants[0] == ''
1066
+ ret = '/'
1067
+ componants.shift
1068
+ else
1069
+ ret = './'
1070
+ end
1071
+ componants.each { |cp|
1072
+ return if not ccp = Dir.entries(ret).find { |ccp_| ccp_.downcase == cp.downcase }
1073
+ ret = File.join(ret, ccp)
1074
+ }
1075
+ ret
1076
+ end
1077
+
1078
+ # handles a '#pragma' directive in the preprocessor source
1079
+ # here we handle:
1080
+ # 'once': do not re-#include this file
1081
+ # 'no_warn_redefinition': macro redefinition warning
1082
+ # 'include_dir' / 'include_path': insert directories in the #include <xx> search path (this new dir will be searched first)
1083
+ # 'push_macro' / 'pop_macro': allows temporary redifinition of a macro with later restoration
1084
+ # other directives are forwarded to @pragma_callback
1085
+ def directive_pragma(cmd, skipspc)
1086
+ nil while tok = readtok and tok.type == :space
1087
+ raise tok || cmd if not tok or tok.type != :string
1088
+
1089
+ case tok.raw
1090
+ when 'once'
1091
+ @pragma_once[@filename[1..-2]] = true
1092
+ when 'no_warn_redefinition'
1093
+ @warn_redefinition = false
1094
+ when 'include_dir', 'include_path'
1095
+ nil while dir = readtok and dir.type == :space
1096
+ raise cmd, 'qstring expected' if not dir or dir.type != :quoted
1097
+ dir = ::File.expand_path dir.value
1098
+ raise cmd, 'invalid path' if not ::File.directory? dir
1099
+ @include_search_path.unshift dir
1100
+
1101
+ when 'push_macro', 'pop_macro'
1102
+ @pragma_macro_stack ||= []
1103
+ nil while lp = readtok and lp.type == :space
1104
+ nil while m = readtok and m.type == :space
1105
+ nil while rp = readtok and rp.type == :space
1106
+ raise cmd if not rp or lp.type != :punct or rp.type != :punct or lp.raw != '(' or rp.raw != ')' or m.type != :quoted
1107
+ if tok.raw == 'push_macro'
1108
+ @pragma_macro_stack << @definition[m.value]
1109
+ else
1110
+ raise cmd, "macro stack empty" if @pragma_macro_stack.empty?
1111
+ if mbody = @pragma_macro_stack.pop # push undefined macro allowed
1112
+ @definition[m.value] = mbody
1113
+ else
1114
+ @definition.delete m.value
1115
+ end
1116
+ end
1117
+ else
1118
+ @pragma_callback[tok]
1119
+ end
1120
+
1121
+ eol = nil
1122
+ raise eol, 'eol expected' if eol = skipspc[] and eol.type != :eol
1123
+ unreadtok eol
1124
+ end
1125
+
1126
+ # parses a preprocessor expression (similar to Expression, + handles "defined(foo)"), returns an Expression
1127
+ class PPExpression
1128
+ class << self
1129
+ # reads an operator from the lexer, returns the corresponding symbol or nil
1130
+ def readop(lexer)
1131
+ if not tok = lexer.readtok or tok.type != :punct
1132
+ lexer.unreadtok tok
1133
+ return
1134
+ end
1135
+
1136
+ op = tok
1137
+ case op.raw
1138
+ # may be followed by itself or '='
1139
+ when '>', '<'
1140
+ if ntok = lexer.readtok and ntok.type == :punct and (ntok.raw == op.raw or ntok.raw == '=')
1141
+ op = op.dup
1142
+ op.raw << ntok.raw
1143
+ else
1144
+ lexer.unreadtok ntok
1145
+ end
1146
+ # may be followed by itself
1147
+ when '|', '&'
1148
+ if ntok = lexer.readtok and ntok.type == :punct and ntok.raw == op.raw
1149
+ op = op.dup
1150
+ op.raw << ntok.raw
1151
+ else
1152
+ lexer.unreadtok ntok
1153
+ end
1154
+ # must be followed by '='
1155
+ when '!', '='
1156
+ if not ntok = lexer.readtok or ntok.type != :punct and ntok.raw != '='
1157
+ lexer.unreadtok ntok
1158
+ lexer.unreadtok tok
1159
+ return
1160
+ end
1161
+ op = op.dup
1162
+ op.raw << ntok.raw
1163
+ # ok
1164
+ when '^', '+', '-', '*', '/', '%', '>>', '<<', '>=', '<=', '||', '&&', '!=', '=='
1165
+ # unknown
1166
+ else
1167
+ lexer.unreadtok tok
1168
+ return
1169
+ end
1170
+ op.value = op.raw.to_sym
1171
+ op
1172
+ end
1173
+
1174
+ # handles floats and "defined" keyword
1175
+ def parse_intfloat(lexer, tok)
1176
+ if tok.type == :string and tok.raw == 'defined'
1177
+ nil while ntok = lexer.readtok_nopp and ntok.type == :space
1178
+ raise tok if not ntok
1179
+ if ntok.type == :punct and ntok.raw == '('
1180
+ nil while ntok = lexer.readtok_nopp and ntok.type == :space
1181
+ nil while rtok = lexer.readtok_nopp and rtok.type == :space
1182
+ raise tok if not rtok or rtok.type != :punct or rtok.raw != ')'
1183
+ end
1184
+ raise tok if not ntok or ntok.type != :string
1185
+ tok.value = lexer.definition[ntok.raw] ? 1 : 0
1186
+ return
1187
+ elsif tok.type == :string and tok.raw == 'L'
1188
+ ntok = lexer.readtok_nopp
1189
+ if ntok.type == :quoted and ntok.raw[0] == ?'
1190
+ tok.raw << ntok.raw
1191
+ tok.value = (ntok.value + "\0\0").unpack('v') # XXX endianness
1192
+ else
1193
+ lexer.unreadtok ntok
1194
+ end
1195
+ end
1196
+
1197
+ Expression.parse_num_value(lexer, tok)
1198
+ end
1199
+
1200
+ # returns the next value from lexer (parenthesised expression, immediate, variable, unary operators)
1201
+ # single-line only, and does not handle multibyte char string
1202
+ def parse_value(lexer)
1203
+ nil while tok = lexer.readtok and tok.type == :space
1204
+ return if not tok
1205
+ case tok.type
1206
+ when :string
1207
+ parse_intfloat(lexer, tok)
1208
+ val = tok.value || tok.raw
1209
+ when :quoted
1210
+ if tok.raw[0] != ?' or tok.value.length > 1 # allow single-char
1211
+ lexer.unreadtok tok
1212
+ return
1213
+ end
1214
+ val = tok.value[0]
1215
+ when :punct
1216
+ case tok.raw
1217
+ when '('
1218
+ val = parse(lexer)
1219
+ nil while ntok = lexer.readtok and ntok.type == :space
1220
+ raise tok, "')' expected after #{val.inspect} got #{ntok.inspect}" if not ntok or ntok.type != :punct or ntok.raw != ')'
1221
+ when '!', '+', '-', '~'
1222
+ nil while ntok = lexer.readtok and ntok.type == :space
1223
+ lexer.unreadtok ntok
1224
+ raise tok, 'need expression after unary operator' if not val = parse_value(lexer)
1225
+ val = Expression[tok.raw.to_sym, val]
1226
+ when '.'
1227
+ parse_intfloat(lexer, tok)
1228
+ if not tok.value
1229
+ lexer.unreadtok tok
1230
+ return
1231
+ end
1232
+ val = tok.value
1233
+ else
1234
+ lexer.unreadtok tok
1235
+ return
1236
+ end
1237
+ else
1238
+ lexer.unreadtok tok
1239
+ return
1240
+ end
1241
+ nil while tok = lexer.readtok and tok.type == :space
1242
+ lexer.unreadtok tok
1243
+ val
1244
+ end
1245
+
1246
+ def parse(lexer)
1247
+ opstack = []
1248
+ stack = []
1249
+
1250
+ return if not e = parse_value(lexer)
1251
+
1252
+ stack << e
1253
+
1254
+ while op = readop(lexer)
1255
+ nil while ntok = lexer.readtok and ntok.type == :space
1256
+ lexer.unreadtok ntok
1257
+ until opstack.empty? or Expression::OP_PRIO[op.value][opstack.last]
1258
+ stack << Expression.new(opstack.pop, stack.pop, stack.pop)
1259
+ end
1260
+
1261
+ opstack << op.value
1262
+
1263
+ raise op, 'need rhs' if not e = parse_value(lexer)
1264
+
1265
+ stack << e
1266
+ end
1267
+
1268
+ until opstack.empty?
1269
+ stack << Expression.new(opstack.pop, stack.pop, stack.pop)
1270
+ end
1271
+
1272
+ Expression[stack.first]
1273
+ end
1274
+ end
1275
+ end
1276
+ end
1277
+ end