metasm 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. data/BUGS +11 -0
  2. data/CREDITS +17 -0
  3. data/README +270 -0
  4. data/TODO +114 -0
  5. data/doc/code_organisation.txt +146 -0
  6. data/doc/const_missing.txt +16 -0
  7. data/doc/core_classes.txt +75 -0
  8. data/doc/feature_list.txt +53 -0
  9. data/doc/index.txt +59 -0
  10. data/doc/install_notes.txt +170 -0
  11. data/doc/style.css +3 -0
  12. data/doc/use_cases.txt +18 -0
  13. data/lib/metasm.rb +80 -0
  14. data/lib/metasm/arm.rb +12 -0
  15. data/lib/metasm/arm/debug.rb +39 -0
  16. data/lib/metasm/arm/decode.rb +167 -0
  17. data/lib/metasm/arm/encode.rb +77 -0
  18. data/lib/metasm/arm/main.rb +75 -0
  19. data/lib/metasm/arm/opcodes.rb +177 -0
  20. data/lib/metasm/arm/parse.rb +130 -0
  21. data/lib/metasm/arm/render.rb +55 -0
  22. data/lib/metasm/compile_c.rb +1457 -0
  23. data/lib/metasm/dalvik.rb +8 -0
  24. data/lib/metasm/dalvik/decode.rb +196 -0
  25. data/lib/metasm/dalvik/main.rb +60 -0
  26. data/lib/metasm/dalvik/opcodes.rb +366 -0
  27. data/lib/metasm/decode.rb +213 -0
  28. data/lib/metasm/decompile.rb +2659 -0
  29. data/lib/metasm/disassemble.rb +2068 -0
  30. data/lib/metasm/disassemble_api.rb +1280 -0
  31. data/lib/metasm/dynldr.rb +1329 -0
  32. data/lib/metasm/encode.rb +333 -0
  33. data/lib/metasm/exe_format/a_out.rb +194 -0
  34. data/lib/metasm/exe_format/autoexe.rb +82 -0
  35. data/lib/metasm/exe_format/bflt.rb +189 -0
  36. data/lib/metasm/exe_format/coff.rb +455 -0
  37. data/lib/metasm/exe_format/coff_decode.rb +901 -0
  38. data/lib/metasm/exe_format/coff_encode.rb +1078 -0
  39. data/lib/metasm/exe_format/dex.rb +457 -0
  40. data/lib/metasm/exe_format/dol.rb +145 -0
  41. data/lib/metasm/exe_format/elf.rb +923 -0
  42. data/lib/metasm/exe_format/elf_decode.rb +979 -0
  43. data/lib/metasm/exe_format/elf_encode.rb +1375 -0
  44. data/lib/metasm/exe_format/macho.rb +827 -0
  45. data/lib/metasm/exe_format/main.rb +228 -0
  46. data/lib/metasm/exe_format/mz.rb +164 -0
  47. data/lib/metasm/exe_format/nds.rb +172 -0
  48. data/lib/metasm/exe_format/pe.rb +437 -0
  49. data/lib/metasm/exe_format/serialstruct.rb +246 -0
  50. data/lib/metasm/exe_format/shellcode.rb +114 -0
  51. data/lib/metasm/exe_format/xcoff.rb +167 -0
  52. data/lib/metasm/gui.rb +23 -0
  53. data/lib/metasm/gui/cstruct.rb +373 -0
  54. data/lib/metasm/gui/dasm_coverage.rb +199 -0
  55. data/lib/metasm/gui/dasm_decomp.rb +369 -0
  56. data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
  57. data/lib/metasm/gui/dasm_graph.rb +1354 -0
  58. data/lib/metasm/gui/dasm_hex.rb +543 -0
  59. data/lib/metasm/gui/dasm_listing.rb +599 -0
  60. data/lib/metasm/gui/dasm_main.rb +906 -0
  61. data/lib/metasm/gui/dasm_opcodes.rb +291 -0
  62. data/lib/metasm/gui/debug.rb +1228 -0
  63. data/lib/metasm/gui/gtk.rb +884 -0
  64. data/lib/metasm/gui/qt.rb +495 -0
  65. data/lib/metasm/gui/win32.rb +3004 -0
  66. data/lib/metasm/gui/x11.rb +621 -0
  67. data/lib/metasm/ia32.rb +14 -0
  68. data/lib/metasm/ia32/compile_c.rb +1523 -0
  69. data/lib/metasm/ia32/debug.rb +193 -0
  70. data/lib/metasm/ia32/decode.rb +1167 -0
  71. data/lib/metasm/ia32/decompile.rb +564 -0
  72. data/lib/metasm/ia32/encode.rb +314 -0
  73. data/lib/metasm/ia32/main.rb +233 -0
  74. data/lib/metasm/ia32/opcodes.rb +872 -0
  75. data/lib/metasm/ia32/parse.rb +327 -0
  76. data/lib/metasm/ia32/render.rb +91 -0
  77. data/lib/metasm/main.rb +1193 -0
  78. data/lib/metasm/mips.rb +11 -0
  79. data/lib/metasm/mips/compile_c.rb +7 -0
  80. data/lib/metasm/mips/decode.rb +253 -0
  81. data/lib/metasm/mips/encode.rb +51 -0
  82. data/lib/metasm/mips/main.rb +72 -0
  83. data/lib/metasm/mips/opcodes.rb +443 -0
  84. data/lib/metasm/mips/parse.rb +51 -0
  85. data/lib/metasm/mips/render.rb +43 -0
  86. data/lib/metasm/os/gnu_exports.rb +270 -0
  87. data/lib/metasm/os/linux.rb +1112 -0
  88. data/lib/metasm/os/main.rb +1686 -0
  89. data/lib/metasm/os/remote.rb +527 -0
  90. data/lib/metasm/os/windows.rb +2027 -0
  91. data/lib/metasm/os/windows_exports.rb +745 -0
  92. data/lib/metasm/parse.rb +876 -0
  93. data/lib/metasm/parse_c.rb +3938 -0
  94. data/lib/metasm/pic16c/decode.rb +42 -0
  95. data/lib/metasm/pic16c/main.rb +17 -0
  96. data/lib/metasm/pic16c/opcodes.rb +68 -0
  97. data/lib/metasm/ppc.rb +11 -0
  98. data/lib/metasm/ppc/decode.rb +264 -0
  99. data/lib/metasm/ppc/decompile.rb +251 -0
  100. data/lib/metasm/ppc/encode.rb +51 -0
  101. data/lib/metasm/ppc/main.rb +129 -0
  102. data/lib/metasm/ppc/opcodes.rb +410 -0
  103. data/lib/metasm/ppc/parse.rb +52 -0
  104. data/lib/metasm/preprocessor.rb +1277 -0
  105. data/lib/metasm/render.rb +130 -0
  106. data/lib/metasm/sh4.rb +8 -0
  107. data/lib/metasm/sh4/decode.rb +336 -0
  108. data/lib/metasm/sh4/main.rb +292 -0
  109. data/lib/metasm/sh4/opcodes.rb +381 -0
  110. data/lib/metasm/x86_64.rb +12 -0
  111. data/lib/metasm/x86_64/compile_c.rb +1025 -0
  112. data/lib/metasm/x86_64/debug.rb +59 -0
  113. data/lib/metasm/x86_64/decode.rb +268 -0
  114. data/lib/metasm/x86_64/encode.rb +264 -0
  115. data/lib/metasm/x86_64/main.rb +135 -0
  116. data/lib/metasm/x86_64/opcodes.rb +118 -0
  117. data/lib/metasm/x86_64/parse.rb +68 -0
  118. data/misc/bottleneck.rb +61 -0
  119. data/misc/cheader-findpppath.rb +58 -0
  120. data/misc/hexdiff.rb +74 -0
  121. data/misc/hexdump.rb +55 -0
  122. data/misc/metasm-all.rb +13 -0
  123. data/misc/objdiff.rb +47 -0
  124. data/misc/objscan.rb +40 -0
  125. data/misc/pdfparse.rb +661 -0
  126. data/misc/ppc_pdf2oplist.rb +192 -0
  127. data/misc/tcp_proxy_hex.rb +84 -0
  128. data/misc/txt2html.rb +440 -0
  129. data/samples/a.out.rb +31 -0
  130. data/samples/asmsyntax.rb +77 -0
  131. data/samples/bindiff.rb +555 -0
  132. data/samples/compilation-steps.rb +49 -0
  133. data/samples/cparser_makestackoffset.rb +55 -0
  134. data/samples/dasm-backtrack.rb +38 -0
  135. data/samples/dasmnavig.rb +318 -0
  136. data/samples/dbg-apihook.rb +228 -0
  137. data/samples/dbghelp.rb +143 -0
  138. data/samples/disassemble-gui.rb +102 -0
  139. data/samples/disassemble.rb +133 -0
  140. data/samples/dump_upx.rb +95 -0
  141. data/samples/dynamic_ruby.rb +1929 -0
  142. data/samples/elf_list_needed.rb +46 -0
  143. data/samples/elf_listexports.rb +33 -0
  144. data/samples/elfencode.rb +25 -0
  145. data/samples/exeencode.rb +128 -0
  146. data/samples/factorize-headers-elfimports.rb +77 -0
  147. data/samples/factorize-headers-peimports.rb +109 -0
  148. data/samples/factorize-headers.rb +43 -0
  149. data/samples/gdbclient.rb +583 -0
  150. data/samples/generate_libsigs.rb +102 -0
  151. data/samples/hotfix_gtk_dbg.rb +59 -0
  152. data/samples/install_win_env.rb +78 -0
  153. data/samples/lindebug.rb +924 -0
  154. data/samples/linux_injectsyscall.rb +95 -0
  155. data/samples/machoencode.rb +31 -0
  156. data/samples/metasm-shell.rb +91 -0
  157. data/samples/pe-hook.rb +69 -0
  158. data/samples/pe-ia32-cpuid.rb +203 -0
  159. data/samples/pe-mips.rb +35 -0
  160. data/samples/pe-shutdown.rb +78 -0
  161. data/samples/pe-testrelocs.rb +51 -0
  162. data/samples/pe-testrsrc.rb +24 -0
  163. data/samples/pe_listexports.rb +31 -0
  164. data/samples/peencode.rb +19 -0
  165. data/samples/peldr.rb +494 -0
  166. data/samples/preprocess-flatten.rb +19 -0
  167. data/samples/r0trace.rb +308 -0
  168. data/samples/rubstop.rb +399 -0
  169. data/samples/scan_pt_gnu_stack.rb +54 -0
  170. data/samples/scanpeexports.rb +62 -0
  171. data/samples/shellcode-c.rb +40 -0
  172. data/samples/shellcode-dynlink.rb +146 -0
  173. data/samples/source.asm +34 -0
  174. data/samples/struct_offset.rb +47 -0
  175. data/samples/testpe.rb +32 -0
  176. data/samples/testraw.rb +45 -0
  177. data/samples/win32genloader.rb +132 -0
  178. data/samples/win32hooker-advanced.rb +169 -0
  179. data/samples/win32hooker.rb +96 -0
  180. data/samples/win32livedasm.rb +33 -0
  181. data/samples/win32remotescan.rb +133 -0
  182. data/samples/wintrace.rb +92 -0
  183. data/tests/all.rb +8 -0
  184. data/tests/dasm.rb +39 -0
  185. data/tests/dynldr.rb +35 -0
  186. data/tests/encodeddata.rb +132 -0
  187. data/tests/ia32.rb +82 -0
  188. data/tests/mips.rb +116 -0
  189. data/tests/parse_c.rb +239 -0
  190. data/tests/preprocessor.rb +269 -0
  191. data/tests/x86_64.rb +62 -0
  192. metadata +255 -0
@@ -0,0 +1,52 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/ppc/opcodes'
8
+ require 'metasm/parse'
9
+
10
+ module Metasm
11
+ class PowerPC
12
+ # TODO
13
+ def parse_arg_valid?(op, sym, arg)
14
+ # special case for lw reg, imm32(reg) ? (pseudo-instr, need to convert to 'lui t0, up imm32 ori t0 down imm32 add t0, reg lw reg, 0(t0)
15
+ case sym
16
+ when :rs, :rt, :rd; arg.kind_of? Reg
17
+ when :sa, :i16, :i20, :i26; arg.kind_of? Expression
18
+ when :rs_i16; arg.kind_of? Memref
19
+ when :ft; arg.kind_of? FpReg
20
+ else raise "internal error: mips arg #{sym.inspect}"
21
+ end
22
+ end
23
+
24
+ def parse_argument(pgm)
25
+ pgm.skip_space
26
+ return if not tok = pgm.nexttok
27
+ if tok.type == :string and Reg.s_to_i[tok.raw]
28
+ pgm.readtok
29
+ arg = Reg.new Reg.s_to_i[tok.raw]
30
+ elsif tok.type == :string and FpReg.s_to_i[tok.raw]
31
+ pgm.readtok
32
+ arg = FpReg.new FpReg.s_to_i[tok.raw]
33
+ else
34
+ arg = Expression.parse pgm
35
+ pgm.skip_space
36
+ # check memory indirection: 'off(base reg)' # XXX scaled index ?
37
+ if arg and pgm.nexttok and pgm.nexttok.type == :punct and pgm.nexttok.raw == '('
38
+ pgm.readtok
39
+ pgm.skip_space_eol
40
+ ntok = pgm.readtok
41
+ raise tok, "Invalid base #{ntok}" unless ntok and ntok.type == :string and Reg.s_to_i[ntok.raw]
42
+ base = Reg.new Reg.s_to_i[ntok.raw]
43
+ pgm.skip_space_eol
44
+ ntok = pgm.readtok
45
+ raise tok, "Invalid memory reference, ')' expected" if not ntok or ntok.type != :punct or ntok.raw != ')'
46
+ arg = Memref.new base, arg
47
+ end
48
+ end
49
+ arg
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,1277 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/main'
8
+
9
+
10
+ module Metasm
11
+ # A source text preprocessor (C-like)
12
+ # defines the methods nexttok, readtok and unreadtok
13
+ # they spits out Tokens of type :
14
+ # :string for words (/[a-Z0-9$_]+/)
15
+ # :punct for punctuation (/[.,:*+-]/ etc), any unhandled character
16
+ # :space for space/tabs/comment/\r
17
+ # :eol for newline :space including at least one \n not escaped
18
+ # :quoted for quoted string, tok.raw includes delimiter and all content. tok.value holds the interpreted value (handles \x, \oct, \r etc). 1-line only
19
+ # or nil on end of stream
20
+ # \ at end of line discards a newline, otherwise returns a tok :punct with the \
21
+ # preprocessor directives start with a :punct '#' just after an :eol (so you can have spaces before #), they take a whole line
22
+ # comments are C/C++ style (//...\n or /*...*/), returned as :eol (resp. :space)
23
+ class Preprocessor
24
+ # a token, as returned by the preprocessor
25
+ class Token
26
+ # the token type: :space, :eol, :quoted, :string, :punct, ...
27
+ attr_accessor :type
28
+ # the interpreted value of the token (Integer for an int, etc)
29
+ attr_accessor :value
30
+ # the raw string that gave this token
31
+ attr_accessor :raw
32
+ # a list of token this on is expanded from (Preprocessor macro expansion)
33
+ attr_accessor :expanded_from
34
+
35
+ include Backtrace
36
+
37
+ def initialize(backtrace)
38
+ @backtrace = backtrace
39
+ @value = nil
40
+ @raw = ''
41
+ end
42
+
43
+ # used when doing 'raise tok, "foo"'
44
+ # raises a ParseError, adding backtrace information
45
+ def exception(msg='syntax error')
46
+ msgh = msg.to_s
47
+ if msg
48
+ msgh << ' near '
49
+ expanded_from.to_a.each { |ef| msgh << ef.exception(nil).message << " expanded to \n\t" }
50
+ end
51
+ msgh << ((@raw.length > 35) ? (@raw[0..10] + '<...>' + @raw[-10..-1]).inspect : @raw.inspect)
52
+ msgh << " at " << backtrace_str
53
+ ParseError.new msgh
54
+ end
55
+
56
+ def dup
57
+ n = self.class.new(backtrace)
58
+ n.type = @type
59
+ n.value = @value.kind_of?(String) ? @value.dup : @value
60
+ n.raw = @raw.dup
61
+ n.expanded_from = @expanded_from.dup if defined? @expanded_from
62
+ n
63
+ end
64
+ end
65
+
66
+ # a preprocessor macro
67
+ class Macro
68
+ # the token holding the name used in the macro definition
69
+ attr_accessor :name
70
+ # array of tokens of formal arguments
71
+ attr_accessor :args
72
+ # array of tokens of macro body
73
+ attr_accessor :body
74
+ # bool
75
+ attr_accessor :varargs
76
+
77
+ def initialize(name)
78
+ @name = name
79
+ @body = []
80
+ end
81
+
82
+
83
+ # parses an argument list from the lexer or from a list of tokens
84
+ # modifies the list, returns an array of list of tokens/nil
85
+ # handles nesting
86
+ def self.parse_arglist(lexer, list=nil)
87
+ readtok = lambda { list ? list.shift : lexer.readtok(false) }
88
+ unreadtok = lambda { |t| list ? (list.unshift(t) if t) : lexer.unreadtok(t) }
89
+ tok = nil
90
+ unreadlist = []
91
+ unreadlist << tok while tok = readtok[] and tok.type == :space
92
+ if not tok or tok.type != :punct or tok.raw != '('
93
+ unreadtok[tok]
94
+ unreadlist.reverse_each { |t| unreadtok[t] }
95
+ return nil
96
+ end
97
+ args = []
98
+ # each argument is any token sequence
99
+ # if it includes an '(' then find the matching ')', whatever is inside (handle nesting)
100
+ # arg cannot include ',' in the top-level
101
+ # args are parsed with no macro expansion
102
+ # convert any space/eol sequence to a single space, strips them at begin/end of argument
103
+ loop do
104
+ arg = []
105
+ nest = 0
106
+ loop do
107
+ raise lexer, 'unterminated arg list' if not tok = readtok[]
108
+ case tok.type
109
+ when :eol, :space
110
+ next if arg.last and arg.last.type == :space
111
+ tok = tok.dup
112
+ tok.type = :space
113
+ tok.raw = ' '
114
+ when :punct
115
+ case tok.raw
116
+ when ','; break if nest == 0
117
+ when ')'; break if nest == 0 ; nest -= 1
118
+ when '('; nest += 1
119
+ end
120
+ end
121
+ arg << tok
122
+ end
123
+ arg.pop if arg.last and arg.last.type == :space
124
+ args << arg if not arg.empty? or args.length > 0 or tok.raw != ')'
125
+ break if tok.raw == ')'
126
+ end
127
+ args
128
+ end
129
+
130
+ # applies a preprocessor macro
131
+ # parses arguments if needed
132
+ # macros are lazy
133
+ # fills tokens.expanded_from
134
+ # returns an array of tokens
135
+ def apply(lexer, name, args, list=nil)
136
+ expfrom = name.expanded_from.to_a + [name]
137
+ if args
138
+ # hargs is a hash argname.raw => array of tokens
139
+ hargs = @args.zip(args).inject({}) { |h, (af, ar)| h.update af.raw => ar }
140
+
141
+ if not varargs
142
+ raise name, 'invalid argument count' if args.length != @args.length
143
+ else
144
+ raise name, 'invalid argument count' if args.length < @args.length
145
+ virg = name.dup # concat remaining args in __VA_ARGS__
146
+ virg.type = :punct
147
+ virg.raw = ','
148
+ va = args[@args.length..-1].map { |a| a + [virg.dup] }.flatten
149
+ va.pop
150
+ hargs['__VA_ARGS__'] = va
151
+ end
152
+ else
153
+ hargs = {}
154
+ end
155
+
156
+ res = []
157
+ b = @body.map { |t| t = t.dup ; t.expanded_from = expfrom ; t }
158
+ while t = b.shift
159
+ if a = hargs[t.raw]
160
+ # expand macros
161
+ a = a.dup
162
+ while at = a.shift
163
+ margs = nil
164
+ if at.type == :string and am = lexer.definition[at.raw] and not at.expanded_from.to_a.find { |ef| ef.raw == @name.raw } and
165
+ ((am.args and margs = Macro.parse_arglist(lexer, a)) or not am.args)
166
+ toks = am.apply(lexer, at, margs, a)
167
+ a = toks + a # reroll
168
+ else
169
+ res << at.dup if not res.last or res.last.type != :space or at.type != :space
170
+ end
171
+ end
172
+ elsif t.type == :punct and t.raw == '##'
173
+ # the '##' operator: concat the next token to the last in body
174
+ nil while t = b.shift and t.type == :space
175
+ res.pop while res.last and res.last.type == :space
176
+ if not a = hargs[t.raw]
177
+ a = [t]
178
+ end
179
+ if varargs and t.raw == '__VA_ARGS__' and res.last and res.last.type == :punct and res.last.raw == ','
180
+ if args.length == @args.length # pop last , if no vararg passed # XXX poof(1, 2,) != poof(1, 2)
181
+ res.pop
182
+ else # allow merging with ',' without warning
183
+ res.concat a
184
+ end
185
+ else
186
+ a = a[1..-1] if a.first and a.first.type == :space
187
+ if not res.last or res.last.type != :string or not a.first or a.first.type != :string
188
+ puts name.exception("cannot merge token #{res.last.raw} with #{a.first ? a.first.raw : 'nil'}").message if not a.first or (a.first.raw != '.' and res.last.raw != '.') if $VERBOSE
189
+ res.concat a
190
+ else
191
+ res[-1] = res[-1].dup
192
+ res.last.raw << a.first.raw
193
+ res.concat a[1..-1]
194
+ end
195
+ end
196
+ elsif args and t.type == :punct and t.raw == '#' # map an arg to a qstring
197
+ nil while t = b.shift and t.type == :space
198
+ t.type = :quoted
199
+ t.value = hargs[t.raw].map { |aa| aa.raw }.join
200
+ t.value = t.value[1..-1] if t.value[0] == ?\ # delete leading space
201
+ t.raw = t.value.inspect
202
+ res << t
203
+ else
204
+ res << t
205
+ end
206
+ end
207
+ res
208
+ end
209
+
210
+ # parses the argument list and the body from lexer
211
+ # converts # + # to ## in body
212
+ def parse_definition(lexer)
213
+ varg = nil
214
+ if tok = lexer.readtok_nopp and tok.type == :punct and tok.raw == '('
215
+ @args = []
216
+ loop do
217
+ nil while tok = lexer.readtok_nopp and tok.type == :space
218
+ # check '...'
219
+ if tok and tok.type == :punct and tok.raw == '.'
220
+ t1 = lexer.readtok_nopp
221
+ t2 = lexer.readtok_nopp
222
+ t3 = lexer.readtok_nopp
223
+ t3 = lexer.readtok_nopp while t3 and t3.type == :space
224
+ raise @name, 'booh' if not t1 or t1.type != :punct or t1.raw != '.' or
225
+ not t2 or t2.type != :punct or t2.raw != '.' or
226
+ not t3 or t3.type != :punct or t3.raw != ')'
227
+ @varargs = true
228
+ break
229
+ end
230
+ break if tok and tok.type == :punct and tok.raw == ')' and @args.empty? # allow empty list
231
+ raise @name, 'invalid arg definition' if not tok or tok.type != :string
232
+ @args << tok
233
+ nil while tok = lexer.readtok_nopp and tok.type == :space
234
+ # check '...'
235
+ if tok and tok.type == :punct and tok.raw == '.'
236
+ t1 = lexer.readtok_nopp
237
+ t2 = lexer.readtok_nopp
238
+ t3 = lexer.readtok_nopp
239
+ t3 = lexer.readtok_nopp while t3 and t3.type == :space
240
+ raise @name, 'booh' if not t1 or t1.type != :punct or t1.raw != '.' or
241
+ not t2 or t2.type != :punct or t2.raw != '.' or
242
+ not t3 or t3.type != :punct or t3.raw != ')'
243
+ @varargs = true
244
+ varg = @args.pop.raw
245
+ break
246
+ end
247
+ raise @name, 'invalid arg separator' if not tok or tok.type != :punct or (tok.raw != ')' and tok.raw != ',')
248
+ break if tok.raw == ')'
249
+ end
250
+ else lexer.unreadtok tok
251
+ end
252
+
253
+ nil while tok = lexer.readtok_nopp and tok.type == :space
254
+ lexer.unreadtok tok
255
+
256
+ while tok = lexer.readtok_nopp
257
+ tok = tok.dup
258
+ case tok.type
259
+ when :eol
260
+ lexer.unreadtok tok
261
+ break
262
+ when :space
263
+ next if @body.last and @body.last.type == :space
264
+ tok.raw = ' '
265
+ when :string
266
+ tok.raw = '__VA_ARGS__' if varg and tok.raw == varg
267
+ when :punct
268
+ if tok.raw == '#'
269
+ ntok = lexer.readtok_nopp
270
+ if ntok and ntok.type == :punct and ntok.raw == '#'
271
+ tok.raw << '#'
272
+ else
273
+ lexer.unreadtok ntok
274
+ end
275
+ end
276
+ end
277
+ @body << tok
278
+ end
279
+ @body.pop if @body.last and @body.last.type == :space
280
+
281
+ # check macro is correct
282
+ invalid_body = nil
283
+ if (@body[-1] and @body[-1].raw == '##') or (@body[0] and @body[0].raw == '##')
284
+ invalid_body ||= 'cannot have ## at begin or end of macro body'
285
+ end
286
+ if args
287
+ if @args.map { |a| a.raw }.uniq.length != @args.length
288
+ invalid_body ||= 'duplicate macro parameter'
289
+ end
290
+ @body.each_with_index { |tok_, i|
291
+ if tok_.type == :punct and tok_.raw == '#'
292
+ a = @body[i+1]
293
+ a = @body[i+2] if not a or a.type == :space
294
+ if not a.type == :string or (not @args.find { |aa| aa.raw == a.raw } and (not varargs or a.raw != '__VA_ARGS__'))
295
+ invalid_body ||= 'cannot have # followed by non-argument'
296
+ end
297
+ end
298
+ }
299
+ end
300
+ if invalid_body
301
+ puts "W: #{lexer.filename}:#{lexer.lineno}, in #{@name.raw}: #{invalid_body}" if $VERBOSE
302
+ false
303
+ else
304
+ true
305
+ end
306
+ end
307
+
308
+ def dump(comment = true)
309
+ str = ''
310
+ str << "\n// from #{@name.backtrace[-2, 2] * ':'}\n" if comment
311
+ str << "#define #{@name.raw}"
312
+ if args
313
+ str << '(' << (@args.map { |t| t.raw } + (varargs ? ['...'] : [])).join(', ') << ')'
314
+ end
315
+ str << ' ' << @body.map { |t| t.raw }.join
316
+ end
317
+ end
318
+
319
+ # special object, handles __FILE__ __LINE__ __COUNTER__ __DATE__ __TIME__ macros
320
+ class SpecialMacro
321
+ def args ; end
322
+ def body ; [@name] end
323
+
324
+ attr_accessor :name
325
+ def initialize(raw)
326
+ @name = Token.new(nil)
327
+ @name.type = :string
328
+ @name.raw = raw
329
+ end
330
+
331
+ def apply(lexer, name, emptyarglist, toklist=nil)
332
+ tok = @name.dup
333
+ tok.expanded_from = name.expanded_from.to_a + [name]
334
+ case @name.raw
335
+ when '__FILE__', '__DATE__', '__TIME__' # returns a :quoted
336
+ tok.type = :quoted
337
+ tok.value = \
338
+ case @name.raw
339
+ when '__FILE__'
340
+ name = name.expanded_from.first if name.expanded_from
341
+ name.backtrace.to_a[-2].to_s
342
+ when '__DATE__'; Time.now.strftime('%b %e %Y')
343
+ when '__TIME__'; Time.now.strftime('%H:%M:%S')
344
+ end
345
+ tok.raw = tok.value.inspect
346
+ when '__LINE__', '__COUNTER__' # returns a :string
347
+ tok.type = :string
348
+ case @name.raw
349
+ when '__LINE__'
350
+ name = name.expanded_from.first if name.expanded_from
351
+ tok.value = name.backtrace.to_a[-1]
352
+ when '__COUNTER__'
353
+ tok.value = @counter ||= 0
354
+ @counter += 1
355
+ end
356
+ tok.raw = tok.value.to_s
357
+ else raise name, 'internal error'
358
+ end
359
+ [tok]
360
+ end
361
+ end
362
+
363
+ # the raw string we're reading
364
+ attr_accessor :text, :pos
365
+ # the backtrace information for current file
366
+ attr_accessor :filename, :lineno
367
+ # the unreadtok queue
368
+ attr_accessor :queue
369
+ # the backtrace (array of previous [filename, lineno, text, pos] that #included us)
370
+ attr_accessor :backtrace
371
+ # a hash of macro definitions: macro name => [macro def tok, [macro args tok], [macro body toks]]
372
+ attr_accessor :definition
373
+ # array of directories to search for #included <files>
374
+ attr_accessor :include_search_path
375
+ # a Proc called for unhandled #pragma occurences
376
+ # takes the pragma 1st tok as arg, must unread the final :eol, should fallback to the previous callback
377
+ attr_accessor :pragma_callback
378
+ # hash filename => file content
379
+ attr_accessor :hooked_include
380
+ attr_accessor :warn_redefinition
381
+
382
+ # global default search directory for #included <files>
383
+ @@include_search_path = ['/usr/include']
384
+ def self.include_search_path ; @@include_search_path end
385
+ def self.include_search_path=(np) @@include_search_path=np end
386
+
387
+ def initialize(text='')
388
+ @queue = []
389
+ @backtrace = []
390
+ @definition = %w[__FILE__ __LINE__ __COUNTER__ __DATE__ __TIME__].inject({}) { |h, n| h.update n => SpecialMacro.new(n) }
391
+ @include_search_path = @@include_search_path.dup
392
+ # stack of :accept/:discard/:discard_all/:testing, represents the current nesting of #if..#endif
393
+ @ifelse_nesting = []
394
+ @text = text
395
+ @pos = 0
396
+ @filename = 'unknown'
397
+ @lineno = 1
398
+ @warn_redefinition = true
399
+ @hooked_include = {}
400
+ @pragma_once = {}
401
+ @pragma_callback = lambda { |otok|
402
+ tok = otok
403
+ str = tok.raw.dup
404
+ str << tok.raw while tok = readtok and tok.type != :eol
405
+ unreadtok tok
406
+ puts otok.exception("unhandled pragma #{str.inspect}").message if $VERBOSE
407
+ }
408
+ define '__METASM__', VERSION
409
+ end
410
+
411
+ def exception(msg='syntax error')
412
+ backtrace_str = Backtrace.backtrace_str([@filename, @lineno] + @backtrace.map { |f, l, *a| [f, l] }.flatten)
413
+ ParseError.new "at #{backtrace_str}: #{msg}"
414
+ end
415
+
416
+ # returns the preprocessed content
417
+ def dump
418
+ ret = ''
419
+ neol = 0
420
+ while not eos?
421
+ t = readtok
422
+ case t.type
423
+ when :space; ret << ' '
424
+ when :eol; ret << "\n" if (neol += 1) <= 2
425
+ when :quoted; neol = 0 ; ret << t.raw # keep quoted style
426
+ else neol = 0 ; ret << (t.value || t.raw).to_s
427
+ end
428
+ end
429
+ ret
430
+ end
431
+
432
+ attr_accessor :traced_macros
433
+ # preprocess text, and retrieve all macros defined in #included <files> and used in the text
434
+ # returns a C source-like string
435
+ def self.factorize(text, comment=false)
436
+ p = new(text)
437
+ p.traced_macros = []
438
+ p.readtok while not p.eos?
439
+ p.dump_macros(p.traced_macros, comment)
440
+ end
441
+
442
+ # dumps the definition of the macros whose name is in the list + their dependencies
443
+ # returns one big C-style source string
444
+ def dump_macros(list, comment = true)
445
+ depend = {}
446
+ # build dependency graph (we can output macros in any order, but it's more human-readable)
447
+ walk = lambda { |mname|
448
+ depend[mname] ||= []
449
+ @definition[mname].body.each { |t|
450
+ name = t.raw
451
+ if @definition[name]
452
+ depend[mname] << name
453
+ if not depend[name]
454
+ depend[name] = []
455
+ walk[name]
456
+ end
457
+ end
458
+ }
459
+ }
460
+ list.each { |mname| walk[mname] }
461
+
462
+ res = []
463
+ while not depend.empty?
464
+ todo_now = depend.keys.find_all { |k| (depend[k] - [k]).empty? }
465
+ if todo_now.empty?
466
+ dep_cycle = lambda { |ary|
467
+ deps = depend[ary.last]
468
+ if deps.include? ary.first; ary
469
+ elsif (deps-ary).find { |d| deps = dep_cycle[ary + [d]] }; deps
470
+ end
471
+ }
472
+ if not depend.find { |k, dep| todo_now = dep_cycle[[k]] }
473
+ todo_now = depend.keys
474
+ end
475
+ end
476
+ todo_now.sort.each { |k|
477
+ res << @definition[k].dump(comment) if @definition[k].kind_of? Macro
478
+ depend.delete k
479
+ }
480
+ depend.each_key { |k| depend[k] -= todo_now }
481
+ end
482
+ res.join("\n")
483
+ end
484
+
485
+ # starts a new lexer, with the specified initial filename/line number (for backtraces)
486
+ def feed(text, filename='unknown', lineno=1)
487
+ raise self, 'cannot start new text, did not finish current source' if not eos?
488
+ feed!(text, filename, lineno)
489
+ end
490
+
491
+ # starts a new lexer, with the specified initial filename/line number (for backtraces)
492
+ # discards old text/whatever
493
+ def feed!(text, filename='unknown', lineno=1)
494
+ raise ArgumentError, 'need something to parse!' if not text
495
+ @text = text
496
+ # @filename[-1] used in trace_macros to distinguish generic/specific files
497
+ @filename = "\"#{filename}\""
498
+ @lineno = lineno
499
+ @pos = 0
500
+ @queue = []
501
+ self
502
+ end
503
+
504
+ # calls #feed on the content of the file
505
+ def feed_file(filename)
506
+ feed(File.read(filename), filename)
507
+ end
508
+
509
+ Trigraph = { ?= => ?#, ?) => ?], ?! => ?|,
510
+ ?( => ?[, ?' => ?^, ?> => ?},
511
+ ?/ => ?\\,?< => ?{, ?- => ?~ }
512
+
513
+ # reads one character from self.text
514
+ # updates self.lineno
515
+ # handles trigraphs and \-continued lines
516
+ def getchar
517
+ @ungetcharpos = @pos
518
+ @ungetcharlineno = @lineno
519
+ c = @text[@pos]
520
+ @pos += 1
521
+
522
+ # check trigraph
523
+ if c == ?? and @text[@pos] == ?? and Trigraph[@text[@pos+1]]
524
+ puts "can i has trigraf plox ??#{c.chr} (#@filename:#@lineno)" if $VERBOSE
525
+ c = Trigraph[@text[@pos+1]]
526
+ @pos += 2
527
+ end
528
+
529
+ # check line continuation
530
+ # TODO portability
531
+ if c == ?\\ and (@text[@pos] == ?\n or (@text[@pos] == ?\r and @text[@pos+1] == ?\n))
532
+ @lineno += 1
533
+ @pos += 1 if @text[@pos] == ?\r
534
+ @pos += 1
535
+ return getchar
536
+ end
537
+
538
+ if c == ?\r and @text[@pos] == ?\n
539
+ @pos += 1
540
+ c = ?\n
541
+ end
542
+
543
+ # update lineno
544
+ if c == ?\n
545
+ @lineno += 1
546
+ end
547
+
548
+ c
549
+ end
550
+
551
+ def ungetchar
552
+ @pos = @ungetcharpos
553
+ @lineno = @ungetcharlineno
554
+ nil
555
+ end
556
+
557
+ # returns true if no more data is available
558
+ def eos?
559
+ @pos >= @text.length and @queue.empty? and @backtrace.empty?
560
+ end
561
+
562
+ # push back a token, will be returned on the next readtok
563
+ # lifo
564
+ def unreadtok(tok)
565
+ @queue << tok if tok
566
+ nil
567
+ end
568
+
569
+ # calls readtok_nopp and handles preprocessor directives
570
+ def readtok(expand_macros = true)
571
+ lastpos = @pos
572
+ tok = readtok_nopp
573
+
574
+ if not tok
575
+ # end of file: resume parent
576
+ if not @backtrace.empty?
577
+ raise ParseError, "parse error in #@filename: unmatched #if/#endif" if @backtrace.last.pop != @ifelse_nesting.length
578
+ @filename, @lineno, @text, @pos, @queue = @backtrace.pop
579
+ tok = readtok
580
+ end
581
+
582
+ elsif (tok.type == :eol or lastpos == 0) and @ifelse_nesting.last != :testing
583
+ unreadtok tok if lastpos == 0
584
+ # detect preprocessor directive
585
+ # state = 1 => seen :eol, 2 => seen #
586
+ pretok = []
587
+ rewind = true
588
+ state = 1
589
+ loop do
590
+ pretok << (ntok = readtok_nopp)
591
+ break if not ntok
592
+ if ntok.type == :space # nothing
593
+ elsif state == 1 and ntok.type == :punct and ntok.raw == '#' and not ntok.expanded_from
594
+ state = 2
595
+ elsif state == 2 and ntok.type == :string and not ntok.expanded_from
596
+ rewind = false if preprocessor_directive(ntok)
597
+ break
598
+ else break
599
+ end
600
+ end
601
+ if rewind
602
+ # false alarm: revert
603
+ pretok.reverse_each { |t| unreadtok t }
604
+ end
605
+ tok = readtok if lastpos == 0 # else return the :eol
606
+
607
+ elsif expand_macros and tok.type == :string and m = @definition[tok.raw] and not tok.expanded_from.to_a.find { |ef| ef.raw == m.name.raw } and
608
+ ((m.args and margs = Macro.parse_arglist(self)) or not m.args)
609
+
610
+ if defined? @traced_macros and tok.backtrace[-2].to_s[0] == ?" and m.name and m.name.backtrace[-2].to_s[0] == ?<
611
+ @traced_macros |= [tok.raw] # we are in a normal file and expand to an header-defined macro
612
+ end
613
+
614
+ m.apply(self, tok, margs).reverse_each { |t| unreadtok t }
615
+
616
+ tok = readtok
617
+ end
618
+
619
+ tok
620
+ end
621
+
622
+ # read and return the next token
623
+ # parses quoted strings (set tok.value) and C/C++ comments (:space/:eol)
624
+ def readtok_nopp
625
+ return @queue.pop unless @queue.empty?
626
+
627
+ nbt = []
628
+ @backtrace.each { |bt| nbt << bt[0] << bt[1] }
629
+ tok = Token.new(nbt << @filename << @lineno)
630
+
631
+ case c = getchar
632
+ when nil
633
+ return nil
634
+ when ?', ?"
635
+ # read quoted string value
636
+ readtok_nopp_str(tok, c)
637
+ when ?a..?z, ?A..?Z, ?0..?9, ?$, ?_
638
+ tok.type = :string
639
+ raw = tok.raw << c
640
+ loop do
641
+ case c = getchar
642
+ when nil; ungetchar; break # avoids 'no method "coerce" for nil' warning
643
+ when ?a..?z, ?A..?Z, ?0..?9, ?$, ?_
644
+ raw << c
645
+ else ungetchar; break
646
+ end
647
+ end
648
+
649
+ when ?\ , ?\t, ?\r, ?\n, ?\f
650
+ tok.type = ((c == ?\ || c == ?\t) ? :space : :eol)
651
+ raw = tok.raw << c
652
+ loop do
653
+ case c = getchar
654
+ when nil; break
655
+ when ?\ , ?\t
656
+ when ?\n, ?\f, ?\r; tok.type = :eol
657
+ else break
658
+ end
659
+ raw << c
660
+ end
661
+ ungetchar
662
+
663
+ when ?/
664
+ raw = tok.raw << c
665
+ # comment
666
+ case c = getchar
667
+ when ?/
668
+ # till eol
669
+ tok.type = :eol
670
+ raw << c
671
+ while c = getchar
672
+ raw << c
673
+ break if c == ?\n
674
+ end
675
+ when ?*
676
+ tok.type = :space
677
+ raw << c
678
+ seenstar = false
679
+ loop do
680
+ raise tok, 'unterminated c++ comment' if not c = getchar
681
+ raw << c
682
+ case c
683
+ when ?*; seenstar = true
684
+ when ?/; break if seenstar # no need to reset seenstar, already false
685
+ else seenstar = false
686
+ end
687
+ end
688
+ else
689
+ # just a slash
690
+ ungetchar
691
+ tok.type = :punct
692
+ end
693
+
694
+ else
695
+ tok.type = :punct
696
+ tok.raw << c
697
+ end
698
+
699
+ tok
700
+ end
701
+
702
+ # we just read a ' or a ", read until the end of the string
703
+ # tok.value will contain the raw string (with escapes interpreted etc)
704
+ def readtok_nopp_str(tok, delimiter)
705
+ tok.type = :quoted
706
+ tok.raw << delimiter
707
+ tok.value = ''
708
+ c = nil
709
+ loop do
710
+ raise tok, 'unterminated string' if not c = getchar
711
+ tok.raw << c
712
+ case c
713
+ when delimiter; break
714
+ when ?\\
715
+ raise tok, 'unterminated escape' if not c = getchar
716
+ tok.raw << c
717
+ tok.value << \
718
+ case c
719
+ when ?n; ?\n
720
+ when ?r; ?\r
721
+ when ?t; ?\t
722
+ when ?a; ?\a
723
+ when ?b; ?\b
724
+ when ?v; ?\v
725
+ when ?f; ?\f
726
+ when ?e; ?\e
727
+ when ?#, ?\\, ?', ?"; c
728
+ when ?\n; '' # already handled by getchar
729
+ when ?x;
730
+ hex = ''
731
+ while hex.length < 2
732
+ raise tok, 'unterminated escape' if not c = getchar
733
+ case c
734
+ when ?0..?9, ?a..?f, ?A..?F
735
+ else ungetchar; break
736
+ end
737
+ hex << c
738
+ tok.raw << c
739
+ end
740
+ raise tok, 'unterminated escape' if hex.empty?
741
+ hex.hex
742
+ when ?0..?7;
743
+ oct = '' << c
744
+ while oct.length < 3
745
+ raise tok, 'unterminated escape' if not c = getchar
746
+ case c
747
+ when ?0..?7
748
+ else ungetchar; break
749
+ end
750
+ oct << c
751
+ tok.raw << c
752
+ end
753
+ oct.oct
754
+ else c # raise tok, 'unknown escape sequence'
755
+ end
756
+ when ?\n; ungetchar ; raise tok, 'unterminated string'
757
+ else tok.value << c
758
+ end
759
+ end
760
+
761
+ tok
762
+ end
763
+
764
+
765
+ # defines a simple preprocessor macro (expands to 0 or 1 token)
766
+ # does not check overwriting
767
+ def define(name, value=nil, from=caller.first)
768
+ from =~ /^(.*?):(\d+)/
769
+ btfile, btlineno = $1, $2.to_i
770
+ t = Token.new([btfile, btlineno])
771
+ t.type = :string
772
+ t.raw = name.dup
773
+ @definition[name] = Macro.new(t)
774
+ if value.kind_of? ::String and eos?
775
+ feed(value, btfile, btlineno)
776
+ @definition[name].body << readtok until eos?
777
+ elsif value # XXX won't split multi-token defs..
778
+ t = Token.new([btfile, btlineno])
779
+ t.type = :string
780
+ t.raw = value.to_s
781
+ @definition[name].body << t
782
+ end
783
+ end
784
+
785
+ # defines a pp constant if it is not already defined
786
+ def define_weak(name, value=nil, from=caller.first)
787
+ define(name, value, from) if not @definition[name]
788
+ end
789
+
790
+ # defines a pp constant so that later #define/#undef will be ignored
791
+ def define_strong(name, value=nil, from=caller.first)
792
+ (@defined_strong ||= []) << name
793
+ define(name, value, from)
794
+ end
795
+
796
+ # does not define name, and prevent it from being defined later
797
+ def nodefine_strong(name)
798
+ (@defined_strong ||= []) << name
799
+ end
800
+
801
+ # handles #directives
802
+ # returns true if the command is valid
803
+ # second parameter for internal use
804
+ def preprocessor_directive(cmd, ocmd = cmd)
805
+ # read spaces, returns the next token
806
+ # XXX for all commands that may change @ifelse_nesting, ensure last element is :testing to disallow any other preprocessor directive to be run in a bad environment (while looking ahead)
807
+ skipspc = lambda {
808
+ loop do
809
+ tok = readtok_nopp
810
+ break tok if not tok or tok.type != :space
811
+ end
812
+ }
813
+
814
+ # XXX do not preprocess tokens when searching for :eol, it will trigger preprocessor directive detection from readtok
815
+
816
+ eol = tok = nil
817
+ case cmd.raw
818
+ when 'if'
819
+ case @ifelse_nesting.last
820
+ when :accept, nil
821
+ @ifelse_nesting << :testing
822
+ raise cmd, 'expr expected' if not test = PPExpression.parse(self)
823
+ eol = skipspc[]
824
+ raise eol, 'pp syntax error' if eol and eol.type != :eol
825
+ unreadtok eol
826
+ case test.reduce
827
+ when 0; @ifelse_nesting[-1] = :discard
828
+ when Integer; @ifelse_nesting[-1] = :accept
829
+ else @ifelse_nesting[-1] = :discard
830
+ end
831
+ when :discard, :discard_all
832
+ @ifelse_nesting << :discard_all
833
+ end
834
+
835
+ when 'ifdef'
836
+ case @ifelse_nesting.last
837
+ when :accept, nil
838
+ @ifelse_nesting << :testing
839
+ raise eol || tok || cmd, 'pp syntax error' if not tok = skipspc[] or tok.type != :string or (eol = skipspc[] and eol.type != :eol)
840
+ unreadtok eol
841
+ @ifelse_nesting[-1] = (@definition[tok.raw] ? :accept : :discard)
842
+ when :discard, :discard_all
843
+ @ifelse_nesting << :discard_all
844
+ end
845
+
846
+ when 'ifndef'
847
+ case @ifelse_nesting.last
848
+ when :accept, nil
849
+ @ifelse_nesting << :testing
850
+ raise eol || tok || cmd, 'pp syntax error' if not tok = skipspc[] or tok.type != :string or (eol = skipspc[] and eol.type != :eol)
851
+ unreadtok eol
852
+ @ifelse_nesting[-1] = (@definition[tok.raw] ? :discard : :accept)
853
+ when :discard, :discard_all
854
+ @ifelse_nesting << :discard_all
855
+ end
856
+
857
+ when 'elif'
858
+ case @ifelse_nesting.last
859
+ when :accept
860
+ @ifelse_nesting[-1] = :discard_all
861
+ when :discard
862
+ @ifelse_nesting[-1] = :testing
863
+ raise cmd, 'expr expected' if not test = PPExpression.parse(self)
864
+ raise eol, 'pp syntax error' if eol = skipspc[] and eol.type != :eol
865
+ unreadtok eol
866
+ case test.reduce
867
+ when 0; @ifelse_nesting[-1] = :discard
868
+ when Integer; @ifelse_nesting[-1] = :accept
869
+ else @ifelse_nesting[-1] = :discard
870
+ end
871
+ when :discard_all
872
+ else raise cmd, 'pp syntax error'
873
+ end
874
+
875
+ when 'else'
876
+ @ifelse_nesting << :testing
877
+ @ifelse_nesting.pop
878
+ raise eol || cmd, 'pp syntax error' if @ifelse_nesting.empty? or (eol = skipspc[] and eol.type != :eol)
879
+ unreadtok eol
880
+ case @ifelse_nesting.last
881
+ when :accept
882
+ @ifelse_nesting[-1] = :discard_all
883
+ when :discard
884
+ @ifelse_nesting[-1] = :accept
885
+ when :discard_all
886
+ end
887
+
888
+ when 'endif'
889
+ @ifelse_nesting << :testing
890
+ @ifelse_nesting.pop
891
+ raise eol || cmd, 'pp syntax error' if @ifelse_nesting.empty? or (eol = skipspc[] and eol.type != :eol)
892
+ unreadtok eol
893
+ @ifelse_nesting.pop
894
+
895
+ when 'define'
896
+ return if @ifelse_nesting.last and @ifelse_nesting.last != :accept
897
+
898
+ raise tok || cmd, 'pp syntax error' if not tok = skipspc[] or tok.type != :string
899
+ m = Macro.new(tok)
900
+ valid = m.parse_definition(self)
901
+ if not defined? @defined_strong or not @defined_strong.include? tok.raw
902
+ puts "W: pp: redefinition of #{tok.raw} at #{tok.backtrace_str},\n prev def at #{@definition[tok.raw].name.backtrace_str}" if @definition[tok.raw] and $VERBOSE and @warn_redefinition
903
+ @definition[tok.raw] = m if valid
904
+ end
905
+
906
+ when 'undef'
907
+ return if @ifelse_nesting.last and @ifelse_nesting.last != :accept
908
+
909
+ raise eol || tok || cmd, 'pp syntax error' if not tok = skipspc[] or tok.type != :string or (eol = skipspc[] and eol.type != :eol)
910
+ if not defined? @defined_strong or not @defined_strong.include? tok.raw
911
+ @definition.delete tok.raw
912
+ unreadtok eol
913
+ end
914
+
915
+ when 'include', 'include_next'
916
+ return if @ifelse_nesting.last and @ifelse_nesting.last != :accept
917
+
918
+ directive_include(cmd, skipspc)
919
+
920
+ when 'error', 'warning'
921
+ return if @ifelse_nesting.last and @ifelse_nesting.last != :accept
922
+ msg = ''
923
+ while tok = readtok_nopp and tok.type != :eol
924
+ msg << tok.raw
925
+ end
926
+ unreadtok tok
927
+ if cmd.raw == 'warning'
928
+ puts cmd.exception("#warning#{msg}").message if $VERBOSE
929
+ else
930
+ raise cmd, "#error#{msg}"
931
+ end
932
+
933
+ when 'line'
934
+ return if @ifelse_nesting.last and @ifelse_nesting.last != :accept
935
+
936
+ raise tok || cmd if not tok = skipspc[] or tok.type != :string
937
+ @lineno = Integer(tok.raw) rescue raise(tok, 'bad line number')
938
+ raise eol if eol = skipspc[] and eol.type != :eol
939
+ unreadtok eol
940
+
941
+ when 'pragma'
942
+ return if @ifelse_nesting.last and @ifelse_nesting.last != :accept
943
+
944
+ directive_pragma(cmd, skipspc)
945
+
946
+ else return false
947
+ end
948
+
949
+ # skip #ifndef'd parts of the source
950
+ state = 1 # just seen :eol
951
+ while @ifelse_nesting.last == :discard or @ifelse_nesting.last == :discard_all
952
+ begin
953
+ tok = skipspc[]
954
+ rescue ParseError
955
+ # react as gcc -E: <"> unterminated in #if 0 => ok, </*> unterminated => error (the " will fail at eol)
956
+ retry
957
+ end
958
+
959
+ if not tok; raise ocmd, 'pp unterminated conditional'
960
+ elsif tok.type == :eol; state = 1
961
+ elsif state == 1 and tok.type == :punct and tok.raw == '#'; state = 2
962
+ elsif state == 2 and tok.type == :string; state = preprocessor_directive(tok, ocmd) ? 1 : 0
963
+ else state = 0
964
+ end
965
+ end
966
+
967
+ true
968
+ end
969
+
970
+ # handles the '#include' directive, which will insert a new file content in the token stream
971
+ def directive_include(cmd, skipspc)
972
+ raise cmd, 'nested too deeply' if backtrace.length > 200 # gcc
973
+
974
+ # allow preprocessing
975
+ nil while tok = readtok and tok.type == :space
976
+ raise tok || cmd, 'pp syntax error' if not tok or (tok.type != :quoted and (tok.type != :punct or tok.raw != '<'))
977
+ if tok.type == :quoted
978
+ ipath = tok.value
979
+ if @filename[0] == ?< or @backtrace.find { |btf, *a| btf[0] == ?< }
980
+ # XXX local include from a std include... (kikoo windows.h !)
981
+ path = nil
982
+ if not @include_search_path.find { |d| ::File.exist?(path = ::File.join(d, ipath)) } ||
983
+ @include_search_path.find { |d| path = file_exist_nocase(::File.join(d, ipath)) } ||
984
+ path = file_exist_nocase(::File.join(::File.dirname(@filename[1..-2]), ipath))
985
+ path = nil
986
+ end
987
+ elsif ipath[0] != ?/
988
+ path = ::File.join(::File.dirname(@filename[1..-2]), ipath) if ipath[0] != ?/
989
+ path = file_exist_nocase(path || ipath) if not ::File.exist?(path || ipath)
990
+ else
991
+ path = ipath
992
+ path = file_exist_nocase(path) if not ::File.exist? path
993
+ end
994
+ else
995
+ # no more preprocessing : allow comments/multiple space/etc
996
+ ipath = ''
997
+ while tok = readtok_nopp and (tok.type != :punct or tok.raw != '>')
998
+ raise cmd, 'syntax error' if tok.type == :eol
999
+ ipath << tok.raw
1000
+ end
1001
+ raise cmd, 'pp syntax error, unterminated path' if not tok
1002
+ if ipath[0] != ?/
1003
+ path = nil
1004
+ isp = @include_search_path
1005
+ if cmd.raw == 'include_next'
1006
+ raise self, 'include_next sux' if not idx = isp.find { |d| @filename[1, d.length] == d }
1007
+ isp = isp[isp.index(idx)+1..-1]
1008
+ end
1009
+ if not isp.find { |d| ::File.exist?(path = ::File.join(d, ipath)) } ||
1010
+ isp.find { |d| path = file_exist_nocase(::File.join(d, ipath)) }
1011
+ path = nil
1012
+ end
1013
+ end
1014
+ end
1015
+ eol = nil
1016
+ raise eol if eol = skipspc[] and eol.type != :eol
1017
+ unreadtok eol
1018
+ return if cmd.raw == 'include_next' and not path and not @hooked_include[ipath] # XXX
1019
+
1020
+ if not @pragma_once[path || ipath]
1021
+ @backtrace << [@filename, @lineno, @text, @pos, @queue, @ifelse_nesting.length]
1022
+
1023
+ # gcc-style autodetect
1024
+ # XXX the headers we already parsed may have needed a prepare_gcc...
1025
+ # maybe restart parsing ?
1026
+ if ipath == 'stddef.h' and not path and not @hooked_include[ipath]
1027
+ tk = tok.dup
1028
+ tk.raw = 'prepare_gcc'
1029
+ @pragma_callback[tk]
1030
+ if @hooked_include[ipath]
1031
+ puts "metasm pp: autodetected gcc-style headers" if $VERBOSE
1032
+ end
1033
+ end
1034
+
1035
+ if @hooked_include[ipath]
1036
+ path = '<hooked>/'+ipath
1037
+ puts "metasm preprocessor: including #{path}" if $DEBUG
1038
+ @text = @hooked_include[ipath]
1039
+ else
1040
+ puts "metasm preprocessor: including #{path}" if $DEBUG
1041
+ raise cmd, "No such file or directory #{ipath.inspect}" if not path or not ::File.exist? path
1042
+ raise cmd, 'filename too long' if path.length > 4096 # gcc
1043
+ @text = ::File.read(path)
1044
+ end
1045
+
1046
+ # @filename[-1] used in trace_macros to distinguish generic/specific files
1047
+ if tok.type == :quoted
1048
+ @filename = '"' + path + '"'
1049
+ else
1050
+ @filename = '<' + path + '>'
1051
+ end
1052
+ @lineno = 1
1053
+ @pos = 0
1054
+ @queue = []
1055
+ else
1056
+ puts "metasm preprocessor: not reincluding #{path} (pragma once)" if $DEBUG
1057
+ end
1058
+ end
1059
+
1060
+ # checks if a file exists
1061
+ # search for case-insensitive variants of the path
1062
+ # returns the match if found, or nil
1063
+ def file_exist_nocase(name)
1064
+ componants = name.tr('\\', '/').split('/')
1065
+ if componants[0] == ''
1066
+ ret = '/'
1067
+ componants.shift
1068
+ else
1069
+ ret = './'
1070
+ end
1071
+ componants.each { |cp|
1072
+ return if not ccp = Dir.entries(ret).find { |ccp_| ccp_.downcase == cp.downcase }
1073
+ ret = File.join(ret, ccp)
1074
+ }
1075
+ ret
1076
+ end
1077
+
1078
+ # handles a '#pragma' directive in the preprocessor source
1079
+ # here we handle:
1080
+ # 'once': do not re-#include this file
1081
+ # 'no_warn_redefinition': macro redefinition warning
1082
+ # 'include_dir' / 'include_path': insert directories in the #include <xx> search path (this new dir will be searched first)
1083
+ # 'push_macro' / 'pop_macro': allows temporary redifinition of a macro with later restoration
1084
+ # other directives are forwarded to @pragma_callback
1085
+ def directive_pragma(cmd, skipspc)
1086
+ nil while tok = readtok and tok.type == :space
1087
+ raise tok || cmd if not tok or tok.type != :string
1088
+
1089
+ case tok.raw
1090
+ when 'once'
1091
+ @pragma_once[@filename[1..-2]] = true
1092
+ when 'no_warn_redefinition'
1093
+ @warn_redefinition = false
1094
+ when 'include_dir', 'include_path'
1095
+ nil while dir = readtok and dir.type == :space
1096
+ raise cmd, 'qstring expected' if not dir or dir.type != :quoted
1097
+ dir = ::File.expand_path dir.value
1098
+ raise cmd, 'invalid path' if not ::File.directory? dir
1099
+ @include_search_path.unshift dir
1100
+
1101
+ when 'push_macro', 'pop_macro'
1102
+ @pragma_macro_stack ||= []
1103
+ nil while lp = readtok and lp.type == :space
1104
+ nil while m = readtok and m.type == :space
1105
+ nil while rp = readtok and rp.type == :space
1106
+ raise cmd if not rp or lp.type != :punct or rp.type != :punct or lp.raw != '(' or rp.raw != ')' or m.type != :quoted
1107
+ if tok.raw == 'push_macro'
1108
+ @pragma_macro_stack << @definition[m.value]
1109
+ else
1110
+ raise cmd, "macro stack empty" if @pragma_macro_stack.empty?
1111
+ if mbody = @pragma_macro_stack.pop # push undefined macro allowed
1112
+ @definition[m.value] = mbody
1113
+ else
1114
+ @definition.delete m.value
1115
+ end
1116
+ end
1117
+ else
1118
+ @pragma_callback[tok]
1119
+ end
1120
+
1121
+ eol = nil
1122
+ raise eol, 'eol expected' if eol = skipspc[] and eol.type != :eol
1123
+ unreadtok eol
1124
+ end
1125
+
1126
+ # parses a preprocessor expression (similar to Expression, + handles "defined(foo)"), returns an Expression
1127
+ class PPExpression
1128
+ class << self
1129
+ # reads an operator from the lexer, returns the corresponding symbol or nil
1130
+ def readop(lexer)
1131
+ if not tok = lexer.readtok or tok.type != :punct
1132
+ lexer.unreadtok tok
1133
+ return
1134
+ end
1135
+
1136
+ op = tok
1137
+ case op.raw
1138
+ # may be followed by itself or '='
1139
+ when '>', '<'
1140
+ if ntok = lexer.readtok and ntok.type == :punct and (ntok.raw == op.raw or ntok.raw == '=')
1141
+ op = op.dup
1142
+ op.raw << ntok.raw
1143
+ else
1144
+ lexer.unreadtok ntok
1145
+ end
1146
+ # may be followed by itself
1147
+ when '|', '&'
1148
+ if ntok = lexer.readtok and ntok.type == :punct and ntok.raw == op.raw
1149
+ op = op.dup
1150
+ op.raw << ntok.raw
1151
+ else
1152
+ lexer.unreadtok ntok
1153
+ end
1154
+ # must be followed by '='
1155
+ when '!', '='
1156
+ if not ntok = lexer.readtok or ntok.type != :punct and ntok.raw != '='
1157
+ lexer.unreadtok ntok
1158
+ lexer.unreadtok tok
1159
+ return
1160
+ end
1161
+ op = op.dup
1162
+ op.raw << ntok.raw
1163
+ # ok
1164
+ when '^', '+', '-', '*', '/', '%', '>>', '<<', '>=', '<=', '||', '&&', '!=', '=='
1165
+ # unknown
1166
+ else
1167
+ lexer.unreadtok tok
1168
+ return
1169
+ end
1170
+ op.value = op.raw.to_sym
1171
+ op
1172
+ end
1173
+
1174
+ # handles floats and "defined" keyword
1175
+ def parse_intfloat(lexer, tok)
1176
+ if tok.type == :string and tok.raw == 'defined'
1177
+ nil while ntok = lexer.readtok_nopp and ntok.type == :space
1178
+ raise tok if not ntok
1179
+ if ntok.type == :punct and ntok.raw == '('
1180
+ nil while ntok = lexer.readtok_nopp and ntok.type == :space
1181
+ nil while rtok = lexer.readtok_nopp and rtok.type == :space
1182
+ raise tok if not rtok or rtok.type != :punct or rtok.raw != ')'
1183
+ end
1184
+ raise tok if not ntok or ntok.type != :string
1185
+ tok.value = lexer.definition[ntok.raw] ? 1 : 0
1186
+ return
1187
+ elsif tok.type == :string and tok.raw == 'L'
1188
+ ntok = lexer.readtok_nopp
1189
+ if ntok.type == :quoted and ntok.raw[0] == ?'
1190
+ tok.raw << ntok.raw
1191
+ tok.value = (ntok.value + "\0\0").unpack('v') # XXX endianness
1192
+ else
1193
+ lexer.unreadtok ntok
1194
+ end
1195
+ end
1196
+
1197
+ Expression.parse_num_value(lexer, tok)
1198
+ end
1199
+
1200
+ # returns the next value from lexer (parenthesised expression, immediate, variable, unary operators)
1201
+ # single-line only, and does not handle multibyte char string
1202
+ def parse_value(lexer)
1203
+ nil while tok = lexer.readtok and tok.type == :space
1204
+ return if not tok
1205
+ case tok.type
1206
+ when :string
1207
+ parse_intfloat(lexer, tok)
1208
+ val = tok.value || tok.raw
1209
+ when :quoted
1210
+ if tok.raw[0] != ?' or tok.value.length > 1 # allow single-char
1211
+ lexer.unreadtok tok
1212
+ return
1213
+ end
1214
+ val = tok.value[0]
1215
+ when :punct
1216
+ case tok.raw
1217
+ when '('
1218
+ val = parse(lexer)
1219
+ nil while ntok = lexer.readtok and ntok.type == :space
1220
+ raise tok, "')' expected after #{val.inspect} got #{ntok.inspect}" if not ntok or ntok.type != :punct or ntok.raw != ')'
1221
+ when '!', '+', '-', '~'
1222
+ nil while ntok = lexer.readtok and ntok.type == :space
1223
+ lexer.unreadtok ntok
1224
+ raise tok, 'need expression after unary operator' if not val = parse_value(lexer)
1225
+ val = Expression[tok.raw.to_sym, val]
1226
+ when '.'
1227
+ parse_intfloat(lexer, tok)
1228
+ if not tok.value
1229
+ lexer.unreadtok tok
1230
+ return
1231
+ end
1232
+ val = tok.value
1233
+ else
1234
+ lexer.unreadtok tok
1235
+ return
1236
+ end
1237
+ else
1238
+ lexer.unreadtok tok
1239
+ return
1240
+ end
1241
+ nil while tok = lexer.readtok and tok.type == :space
1242
+ lexer.unreadtok tok
1243
+ val
1244
+ end
1245
+
1246
+ def parse(lexer)
1247
+ opstack = []
1248
+ stack = []
1249
+
1250
+ return if not e = parse_value(lexer)
1251
+
1252
+ stack << e
1253
+
1254
+ while op = readop(lexer)
1255
+ nil while ntok = lexer.readtok and ntok.type == :space
1256
+ lexer.unreadtok ntok
1257
+ until opstack.empty? or Expression::OP_PRIO[op.value][opstack.last]
1258
+ stack << Expression.new(opstack.pop, stack.pop, stack.pop)
1259
+ end
1260
+
1261
+ opstack << op.value
1262
+
1263
+ raise op, 'need rhs' if not e = parse_value(lexer)
1264
+
1265
+ stack << e
1266
+ end
1267
+
1268
+ until opstack.empty?
1269
+ stack << Expression.new(opstack.pop, stack.pop, stack.pop)
1270
+ end
1271
+
1272
+ Expression[stack.first]
1273
+ end
1274
+ end
1275
+ end
1276
+ end
1277
+ end