rogue_parser 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,38 @@
1
+ # -*- ruby -*-
2
+
3
+ Autotest.add_hook :initialize do |at|
4
+ at.extra_files << "../../ParseTree/dev/test/pt_testcase.rb"
5
+ at.libs << ":../../ParseTree/dev/lib:../../ParseTree/dev/test"
6
+ at.add_exception 'unit'
7
+ at.add_exception 'coverage'
8
+ at.add_exception '.diff'
9
+ at.add_exception 'coverage.info'
10
+
11
+ at.unit_diff = "unit_diff -u -b"
12
+
13
+ at.add_mapping(/^lib\/.*\.y$/) do |f, _|
14
+ at.files_matching %r%^test/.*#{File.basename(f, '.y').gsub '_', '_?'}.rb$%
15
+ end
16
+
17
+ at.add_mapping(/pt_testcase.rb/) do |f, _|
18
+ at.files_matching(/^test.*rb$/)
19
+ end
20
+
21
+ %w(TestEnvironment TestStackState).each do |klass|
22
+ at.extra_class_map[klass] = "test/test_ruby_parser_extras.rb"
23
+ end
24
+ end
25
+
26
+ Autotest.add_hook :run_command do |at|
27
+ system "rake parser"
28
+ end
29
+
30
+ class Autotest
31
+ def ruby
32
+ File.expand_path "~/.multiruby/install/1.9.0-0/bin/ruby"
33
+ end
34
+ end if ENV['ONENINE']
35
+
36
+ require 'autotest/rcov'
37
+ Autotest::RCov.command = 'rcov_info'
38
+ # Autotest::RCov.pattern = 'test/test_ruby_lexer.rb'
@@ -0,0 +1,5 @@
1
+ == 1.0.0 / 2007-12-20
2
+
3
+ * 1 major enhancement
4
+ * Birthday!
5
+
@@ -0,0 +1,9 @@
1
+ .autotest
2
+ History.txt
3
+ Manifest.txt
4
+ README.txt
5
+ Rakefile
6
+ lib/ruby_lexer.rb
7
+ lib/ruby_parser.y
8
+ test/test_ruby_lexer.rb
9
+ test/test_ruby_parser.rb
@@ -0,0 +1,76 @@
1
+
2
+
3
+ 'rogue_parser' is 'ruby_parser' with this patch applied :
4
+
5
+ http://rubyforge.org/tracker/?func=detail&aid=20106&group_id=439&atid=1780
6
+
7
+
8
+ I hope I can get rid of this temporary repo by the next release of ruby_parser
9
+
10
+
11
+
12
+
13
+ ruby_parser
14
+ by Ryan Davis
15
+ http://parsetree.rubyforge.org/
16
+
17
+ == DESCRIPTION:
18
+
19
+ ruby_parser (RP) is a ruby parser written in pure ruby (utilizing
20
+ racc--which does by default use a C extension). RP's output is
21
+ the same as ParseTree's output: s-expressions using ruby's arrays and
22
+ base types.
23
+
24
+ == FEATURES/PROBLEMS:
25
+
26
+ * Pure ruby, no compiles.
27
+ * Includes preceding comment data for defn/defs/class/module nodes!
28
+ * Incredibly simple interface.
29
+ * Output is 100% equivalent to ParseTree.
30
+ * Can utilize PT's SexpProcessor and UnifiedRuby for language processing.
31
+ * Known Issue: Speed sucks currently. 5500 tests currently run in 21 min.
32
+ * Known Issue: Code is waaay ugly. Port of a port. Not my fault. Will fix RSN.
33
+ * Known Issue: I don't currently support newline nodes.
34
+ * Known Issue: Totally awesome.
35
+ * Known Issue: dasgn_curr decls can be out of order from ParseTree's.
36
+
37
+ == SYNOPSIS:
38
+
39
+ RubyParser.new.parse "1+1"
40
+ # => s(:call, s(:lit, 1), :+, s(:array, s(:lit, 1)))
41
+
42
+ == REQUIREMENTS:
43
+
44
+ * ruby. woot.
45
+ * ParseTree is needed for Sexp class... crap. I might break that out.
46
+ * ParseTree for testing.
47
+ * racc full package for parser development.
48
+
49
+ == INSTALL:
50
+
51
+ * sudo gem install ruby_parser
52
+
53
+ == LICENSE:
54
+
55
+ (The MIT License)
56
+
57
+ Copyright (c) 2007 Ryan Davis
58
+
59
+ Permission is hereby granted, free of charge, to any person obtaining
60
+ a copy of this software and associated documentation files (the
61
+ 'Software'), to deal in the Software without restriction, including
62
+ without limitation the rights to use, copy, modify, merge, publish,
63
+ distribute, sublicense, and/or sell copies of the Software, and to
64
+ permit persons to whom the Software is furnished to do so, subject to
65
+ the following conditions:
66
+
67
+ The above copyright notice and this permission notice shall be
68
+ included in all copies or substantial portions of the Software.
69
+
70
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
71
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
72
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
73
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
74
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
75
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
76
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,134 @@
1
+ # -*- ruby -*-
2
+
3
+ require 'rubygems'
4
+ require 'hoe'
5
+
6
+ $: << '../../ParseTree/dev/lib/'
7
+ require './lib/ruby_parser_extras.rb'
8
+
9
+ hoe = Hoe.new('rogue_parser', RubyParser::VERSION) do |parser|
10
+ parser.rubyforge_name = 'parsetree'
11
+ parser.developer('Ryan Davis', 'ryand-ruby@zenspider.com')
12
+ parser.extra_deps << 'ParseTree'
13
+ end
14
+
15
+ hoe.spec.files += ['lib/ruby_parser.rb'] # jim.... cmon man
16
+ hoe.spec.files += ['lib/ruby_parser_extras.rb']
17
+
18
+ module Rake::TaskManager
19
+ def all_tasks
20
+ @tasks
21
+ end
22
+ end
23
+
24
+ Rake.application.all_tasks["default"].prerequisites.clear
25
+
26
+ [:default, :multi, :test].each do |t|
27
+ task t => :parser
28
+ end
29
+
30
+ path = "pkg/ruby_parser-#{RubyParser::VERSION}"
31
+ task path => :parser do
32
+ Dir.chdir path do
33
+ sh "rake parser"
34
+ end
35
+ end
36
+
37
+ task :parser => ["lib/ruby_parser.rb"]
38
+
39
+ rule '.rb' => '.y' do |t|
40
+ #sh "racc -l -t -o #{t.name} #{t.source}"
41
+ sh "racc -l -o #{t.name} #{t.source}"
42
+ # using racc 1.4.5... no -t...
43
+ end
44
+
45
+ task :clean do
46
+ rm_rf(Dir["**/*~"] +
47
+ Dir["**/*.diff"] +
48
+ Dir["coverage.info"] +
49
+ Dir["coverage"] +
50
+ Dir["lib/ruby_parser.rb"] +
51
+ Dir["lib/*.output"])
52
+ end
53
+
54
+ def next_num(glob)
55
+ num = Dir[glob].max[/\d+/].to_i + 1
56
+ end
57
+
58
+ def profile(type)
59
+ num = next_num("profile_#{type}*.txt")
60
+ sh "zenprofile -w -Ilib:ext:bin:test -rtest/unit test/test_ruby_#{type}.rb &> profile_#{type}_%03d.txt" % num
61
+ end
62
+
63
+ task :profile do
64
+ profile 'lexer'
65
+ profile 'parser'
66
+ end
67
+
68
+ begin
69
+ require 'rcov/rcovtask'
70
+ Rcov::RcovTask.new do |t|
71
+ pattern = ENV['PATTERN'] || 'test/test_ruby_*.rb'
72
+
73
+ t.test_files = FileList[pattern]
74
+ t.verbose = true
75
+ t.rcov_opts << "--threshold 80"
76
+ t.rcov_opts << "--no-color"
77
+ end
78
+ rescue LoadError
79
+ # skip
80
+ end
81
+
82
+ desc "Compares PT to RP and deletes all files that match"
83
+ task :compare do
84
+ files = Dir["unit/**/*.rb"]
85
+ puts "Parsing #{files.size} files"
86
+ files.each do |file|
87
+ puts file
88
+ system "./cmp.rb -q #{file} && rm #{file}"
89
+ end
90
+ system 'find -d unit -type d -empty -exec rmdir {} \;'
91
+ end
92
+
93
+ desc "Compares PT to RP and stops on first failure"
94
+ task :find_bug do
95
+ files = Dir["unit/**/*.rb"]
96
+ puts "Parsing #{files.size} files"
97
+ files.each do |file|
98
+ puts file
99
+ sh "./cmp.rb -q #{file}"
100
+ end
101
+ end
102
+
103
+ task :sort do
104
+ sh 'grepsort "^ +def" lib/ruby_lexer.rb'
105
+ sh 'grepsort "^ +def (test|util)" test/test_ruby_lexer.rb'
106
+ end
107
+
108
+ task :rcov_info => :parser do
109
+ pattern = ENV['PATTERN'] || "test/test_*.rb"
110
+ ruby "-Ilib -S rcov --text-report --save coverage.info #{pattern}"
111
+ end
112
+
113
+ task :rcov_overlay do
114
+ rcov, eol = Marshal.load(File.read("coverage.info")).last[ENV["FILE"]], 1
115
+ puts rcov[:lines].zip(rcov[:coverage]).map { |line, coverage|
116
+ bol, eol = eol, eol + line.length
117
+ [bol, eol, "#ffcccc"] unless coverage
118
+ }.compact.inspect
119
+ end
120
+
121
+ task :loc do
122
+ loc1 = `wc -l ../1.0.0/lib/ruby_lexer.rb`[/\d+/]
123
+ flog1 = `flog -s ../1.0.0/lib/ruby_lexer.rb`[/\d+\.\d+/]
124
+ loc2 = `cat lib/ruby_lexer.rb lib/ruby_parser_extras.rb | wc -l`[/\d+/]
125
+ flog2 = `flog -s lib/ruby_lexer.rb lib/ruby_parser_extras.rb`[/\d+\.\d+/]
126
+
127
+ loc1, loc2, flog1, flog2 = loc1.to_i, loc2.to_i, flog1.to_f, flog2.to_f
128
+
129
+ puts "1.0.0: loc = #{loc1} flog = #{flog1}"
130
+ puts "dev : loc = #{loc2} flog = #{flog2}"
131
+ puts "delta: loc = #{loc2-loc1} flog = #{flog2-flog1}"
132
+ end
133
+
134
+ # vim: syntax=Ruby
@@ -0,0 +1,1329 @@
1
+ #$: << File.expand_path("~/Work/p4/zss/src/ParseTree/dev/lib") # for me, not you.
2
+ require 'sexp'
3
+ require 'ruby_parser_extras'
4
+
5
+ class RubyLexer
6
+ attr_accessor :command_start
7
+ attr_accessor :cmdarg
8
+ attr_accessor :cond
9
+ attr_accessor :nest
10
+
11
+ # Additional context surrounding tokens that both the lexer and
12
+ # grammar use.
13
+ attr_reader :lex_state
14
+
15
+ attr_accessor :lex_strterm
16
+
17
+ # Stream of data that yylex examines.
18
+ attr_reader :src
19
+
20
+ # Last token read via yylex.
21
+ attr_accessor :token
22
+
23
+ # Tempory buffer to build up a potential token. Consumer takes
24
+ # responsibility to reset this before use.
25
+ attr_accessor :token_buffer
26
+
27
+ # Value of last token which had a value associated with it.
28
+ attr_accessor :yacc_value
29
+
30
+ # What handles warnings
31
+ attr_accessor :warnings
32
+
33
+ EOF = :eof_haha!
34
+
35
+ # ruby constants for strings (should this be moved somewhere else?)
36
+ STR_FUNC_BORING = 0x00
37
+ STR_FUNC_ESCAPE = 0x01 # TODO: remove and replace with REGEXP
38
+ STR_FUNC_EXPAND = 0x02
39
+ STR_FUNC_REGEXP = 0x04
40
+ STR_FUNC_AWORDS = 0x08
41
+ STR_FUNC_SYMBOL = 0x10
42
+ STR_FUNC_INDENT = 0x20 # <<-HEREDOC
43
+
44
+ STR_SQUOTE = STR_FUNC_BORING
45
+ STR_DQUOTE = STR_FUNC_BORING | STR_FUNC_EXPAND
46
+ STR_XQUOTE = STR_FUNC_BORING | STR_FUNC_EXPAND
47
+ STR_REGEXP = STR_FUNC_REGEXP | STR_FUNC_ESCAPE | STR_FUNC_EXPAND
48
+ STR_SSYM = STR_FUNC_SYMBOL
49
+ STR_DSYM = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
50
+
51
+ # How the parser advances to the next token.
52
+ #
53
+ # @return true if not at end of file (EOF).
54
+
55
+ # if ENV['SPY'] then
56
+ # @@stats = Hash.new 0
57
+ #
58
+ # def self.stats
59
+ # @@stats
60
+ # end
61
+ #
62
+ # at_exit {
63
+ # require 'pp'
64
+ # pp RubyLexer.stats.sort_by {|k,v| -v}.first(20)
65
+ # }
66
+ # end
67
+
68
+ def advance
69
+ r = yylex
70
+ self.token = r
71
+
72
+ @@stats[r] += 1 if ENV['SPY']
73
+
74
+ return r != RubyLexer::EOF
75
+ end
76
+
77
+ def arg_ambiguous
78
+ self.warning("Ambiguous first argument. make sure.")
79
+ end
80
+
81
+ def comments
82
+ c = @comments.join
83
+ @comments.clear
84
+ c
85
+ end
86
+
87
+ def expr_beg_push val
88
+ cond.push false
89
+ cmdarg.push false
90
+ self.lex_state = :expr_beg
91
+ self.yacc_value = s(val)
92
+ end
93
+
94
+ def fix_arg_lex_state
95
+ self.lex_state = if lex_state == :expr_fname || lex_state == :expr_dot
96
+ :expr_arg
97
+ else
98
+ :expr_beg
99
+ end
100
+ end
101
+
102
+ def heredoc here # Region has 63 lines, 1595 characters
103
+ _, eos, func, last_line = here
104
+
105
+ indent = (func & STR_FUNC_INDENT) != 0
106
+ expand = (func & STR_FUNC_EXPAND) != 0
107
+ eos_re = indent ? /[ \t]*#{eos}(\r?\n|\z)/ : /#{eos}(\r?\n|\z)/
108
+ err_msg = "can't match #{eos_re.inspect} anywhere in "
109
+
110
+ rb_compile_error err_msg if
111
+ src.eos?
112
+
113
+ if src.beginning_of_line? && src.scan(eos_re) then
114
+ src.unread_many last_line # TODO: figure out how to remove this
115
+ self.yacc_value = s(eos)
116
+ return :tSTRING_END
117
+ end
118
+
119
+ token_buffer.clear
120
+
121
+ if expand then
122
+ case
123
+ when src.scan(/#[$@]/) then
124
+ src.pos -= 1 # FIX omg stupid
125
+ self.yacc_value = s(src.matched)
126
+ return :tSTRING_DVAR
127
+ when src.scan(/#[{]/) then
128
+ self.yacc_value = s(src.matched)
129
+ return :tSTRING_DBEG
130
+ when src.scan(/#/) then
131
+ token_buffer << '#'
132
+ end
133
+
134
+ until src.scan(eos_re) do
135
+ c = tokadd_string func, "\n", nil
136
+
137
+ rb_compile_error err_msg if
138
+ c == RubyLexer::EOF
139
+
140
+ if c != "\n" then
141
+ self.yacc_value = s(:str, token_buffer.join.delete("\r"))
142
+ return :tSTRING_CONTENT
143
+ else
144
+ token_buffer << src.scan(/\n/)
145
+ end
146
+
147
+ rb_compile_error err_msg if
148
+ src.eos?
149
+ end
150
+
151
+ # tack on a NL after the heredoc token - FIX NL should not be needed
152
+ src.unread_many(eos + "\n") # TODO: remove this... stupid stupid stupid
153
+ else
154
+ until src.check(eos_re) do
155
+ token_buffer << src.scan(/.*(\n|\z)/)
156
+ rb_compile_error err_msg if
157
+ src.eos?
158
+ end
159
+ end
160
+
161
+ self.lex_strterm = s(:heredoc, eos, func, last_line)
162
+ self.yacc_value = s(:str, token_buffer.join.delete("\r"))
163
+
164
+ return :tSTRING_CONTENT
165
+ end
166
+
167
+ def heredoc_identifier
168
+ term, func = nil, STR_FUNC_BORING
169
+ token_buffer.clear
170
+
171
+ case
172
+ when src.scan(/(-?)(['"`])(.*?)\2/) then
173
+ term = src[2]
174
+ unless src[1].empty? then
175
+ func |= STR_FUNC_INDENT
176
+ end
177
+ func |= case term
178
+ when "\'" then
179
+ STR_SQUOTE
180
+ when '"' then
181
+ STR_DQUOTE
182
+ else
183
+ STR_XQUOTE
184
+ end
185
+ token_buffer << src[3]
186
+ when src.scan(/-?(['"`])(?!\1*\Z)/) then
187
+ rb_compile_error "unterminated here document identifier"
188
+ when src.scan(/(-?)(\w+)/) then
189
+ term = '"'
190
+ func |= STR_DQUOTE
191
+ unless src[1].empty? then
192
+ func |= STR_FUNC_INDENT
193
+ end
194
+ token_buffer << src[2]
195
+ else
196
+ return nil
197
+ end
198
+
199
+ if src.check(/.*\n/) then
200
+ # TODO: think about storing off the char range instead
201
+ line = src.string[src.pos, src.matched_size]
202
+ src.string[src.pos, src.matched_size] = ''
203
+ else
204
+ line = nil
205
+ end
206
+
207
+ self.lex_strterm = s(:heredoc, token_buffer.join, func, line)
208
+
209
+ if term == '`' then
210
+ self.yacc_value = s("`")
211
+ return :tXSTRING_BEG
212
+ else
213
+ self.yacc_value = s("\"")
214
+ return :tSTRING_BEG
215
+ end
216
+ end
217
+
218
+ def initialize
219
+ self.token_buffer = []
220
+ self.cond = StackState.new(:cond)
221
+ self.cmdarg = StackState.new(:cmdarg)
222
+ self.nest = 0
223
+ @comments = []
224
+
225
+ reset
226
+ end
227
+
228
+ def int_with_base base
229
+ if src.matched =~ /__/ then
230
+ rb_compile_error "Invalid numeric format"
231
+ end
232
+ self.yacc_value = src.matched.to_i(base)
233
+ return :tINTEGER
234
+ end
235
+
236
+ def lex_state= o
237
+ raise "wtf?" unless Symbol === o
238
+ @lex_state = o
239
+ end
240
+
241
+ ##
242
+ # Parse a number from the input stream.
243
+ #
244
+ # @param c The first character of the number.
245
+ # @return A int constant wich represents a token.
246
+
247
+ def parse_number
248
+ self.lex_state = :expr_end
249
+
250
+ case
251
+ when src.scan(/[+-]?0[xbd]\b/) then
252
+ rb_compile_error "Invalid numeric format"
253
+ when src.scan(/[+-]?0x[a-f0-9_]+/i) then
254
+ return int_with_base(16)
255
+ when src.scan(/[+-]?0b[01_]+/) then
256
+ return int_with_base(2)
257
+ when src.scan(/[+-]?0d[0-9_]+/) then
258
+ return int_with_base(10)
259
+ when src.scan(/[+-]?0o?[0-7_]*[89]/) then
260
+ rb_compile_error "Illegal octal digit."
261
+ when src.scan(/[+-]?0o?[0-7_]+|0o/) then
262
+ return int_with_base(8)
263
+ when src.scan(/[+-]?[\d_]+_(e|\.)/) then
264
+ rb_compile_error "Trailing '_' in number."
265
+ when src.scan(/[+-]?[\d_]+\.[\d_]+(e[+-]?[\d_]+)?\b|[+-]?[\d_]+e[+-]?[\d_]+\b/i) then
266
+ number = src.matched
267
+ if number =~ /__/ then
268
+ rb_compile_error "Invalid numeric format"
269
+ end
270
+ self.yacc_value = number.to_f
271
+ return :tFLOAT
272
+ when src.scan(/[+-]?0\b/) then
273
+ return int_with_base(10)
274
+ when src.scan(/[+-]?[\d_]+\b/) then
275
+ return int_with_base(10)
276
+ else
277
+ rb_compile_error "Bad number format"
278
+ end
279
+ end
280
+
281
+ def parse_quote
282
+ beg, nnd, short_hand, c = nil, nil, false, nil
283
+
284
+ if src.scan(/[a-z0-9]{1,2}/i) then # Long-hand (e.g. %Q{}).
285
+ rb_compile_error "unknown type of %string" if
286
+ src.matched_size == 2
287
+ c = src.matched
288
+ beg = src.getch
289
+ short_hand = false
290
+ # HACK: stupid rubinius
291
+ # c, beg, short_hand = src.matched, src.getch, false
292
+ else # Short-hand (e.g. %{, %., %!, etc)
293
+ c = 'Q'
294
+ beg = src.getch
295
+ short_hand = true
296
+ # HACK: stupid rubinius
297
+ # c, beg, short_hand = 'Q', src.getch, true
298
+ end
299
+
300
+ if src.eos? or c == RubyLexer::EOF or beg == RubyLexer::EOF then
301
+ rb_compile_error "unterminated quoted string meets end of file"
302
+ end
303
+
304
+ # Figure nnd-char. "\0" is special to indicate beg=nnd and that no nesting?
305
+ nnd = { "(" => ")", "[" => "]", "{" => "}", "<" => ">" }[beg]
306
+ nnd, beg = beg, "\0" if nnd.nil?
307
+
308
+ token_type, self.yacc_value = nil, s("%#{c}#{beg}")
309
+ token_type, string_type = case c
310
+ when 'Q' then
311
+ ch = short_hand ? nnd : c + beg
312
+ self.yacc_value = s("%#{ch}")
313
+ [:tSTRING_BEG, STR_DQUOTE]
314
+ when 'q' then
315
+ [:tSTRING_BEG, STR_SQUOTE]
316
+ when 'W' then
317
+ src.scan(/\s*/)
318
+ [:tWORDS_BEG, STR_DQUOTE | STR_FUNC_AWORDS]
319
+ when 'w' then
320
+ src.scan(/\s*/)
321
+ [:tAWORDS_BEG, STR_SQUOTE | STR_FUNC_AWORDS]
322
+ when 'x' then
323
+ [:tXSTRING_BEG, STR_XQUOTE]
324
+ when 'r' then
325
+ [:tREGEXP_BEG, STR_REGEXP]
326
+ when 's' then
327
+ self.lex_state = :expr_fname
328
+ [:tSYMBEG, STR_SSYM]
329
+ end
330
+
331
+ rb_compile_error "Bad %string type. Expected [Qqwxr\W], found '#{c}'." if
332
+ token_type.nil?
333
+
334
+ self.lex_strterm = s(:strterm, string_type, nnd, beg)
335
+
336
+ return token_type
337
+ end
338
+
339
+ def parse_string(quote)
340
+ _, string_type, term, open = quote
341
+
342
+ space = false # FIX: remove these
343
+ func = string_type
344
+ paren = open
345
+ term_re = Regexp.escape term
346
+
347
+ awords = (func & STR_FUNC_AWORDS) != 0
348
+ regexp = (func & STR_FUNC_REGEXP) != 0
349
+ expand = (func & STR_FUNC_EXPAND) != 0
350
+
351
+ unless func then
352
+ return :tSTRING_END
353
+ end
354
+
355
+ space = true if awords and src.scan(/\s+/)
356
+
357
+ if self.nest == 0 && src.scan(/#{term_re}/) then
358
+ if awords then
359
+ quote[1] = nil
360
+ return ' '
361
+ elsif regexp then
362
+ self.yacc_value = self.regx_options
363
+ return :tREGEXP_END
364
+ else
365
+ self.yacc_value = s(term)
366
+ return :tSTRING_END
367
+ end
368
+ end
369
+
370
+ if space then
371
+ return ' '
372
+ end
373
+
374
+ self.token_buffer.clear
375
+
376
+ if expand
377
+ case
378
+ when src.scan(/#(?=[$@])/) then
379
+ return :tSTRING_DVAR
380
+ when src.scan(/#[{]/) then
381
+ return :tSTRING_DBEG
382
+ when src.scan(/#/) then
383
+ token_buffer << '#'
384
+ end
385
+ end
386
+
387
+ if tokadd_string(func, term, paren) == RubyLexer::EOF then
388
+ rb_compile_error "unterminated string meets end of file"
389
+ end
390
+
391
+ self.yacc_value = s(:str, token_buffer.join)
392
+ return :tSTRING_CONTENT
393
+ end
394
+
395
+ def rb_compile_error msg
396
+ msg += ". near line #{src.lineno}: #{src.rest[/^.*/].inspect}"
397
+ raise SyntaxError, msg
398
+ end
399
+
400
+ def read_escape
401
+ case
402
+ when src.scan(/\\/) then # Backslash
403
+ '\\'
404
+ when src.scan(/n/) then # newline
405
+ "\n"
406
+ when src.scan(/t/) then # horizontal tab
407
+ "\t"
408
+ when src.scan(/r/) then # carriage-return
409
+ "\r"
410
+ when src.scan(/f/) then # form-feed
411
+ "\f"
412
+ when src.scan(/v/) then # vertical tab
413
+ "\13"
414
+ when src.scan(/a/) then # alarm(bell)
415
+ "\007"
416
+ when src.scan(/e/) then # escape
417
+ "\033"
418
+ when src.scan(/b/) then # backspace
419
+ "\010"
420
+ when src.scan(/s/) then # space
421
+ " "
422
+ when src.scan(/[0-7]{1,3}/) then # octal constant
423
+ src.matched.to_i(8).chr
424
+ when src.scan(/x([0-9a-fA-Fa-f]{2})/) then # hex constant
425
+ src[1].to_i(16).chr
426
+ when src.scan(/M-\\/) then
427
+ c = self.read_escape
428
+ c[0] = (c[0].ord | 0x80).chr
429
+ c
430
+ when src.scan(/M-(.)/) then
431
+ c = src[1]
432
+ c[0] = (c[0].ord | 0x80).chr
433
+ c
434
+ when src.scan(/C-\\|c\\/) then
435
+ c = self.read_escape
436
+ c[0] = (c[0].ord & 0x9f).chr
437
+ c
438
+ when src.scan(/C-\?|c\?/) then
439
+ 0177.chr
440
+ when src.scan(/(C-|c)(.)/) then
441
+ c = src[2]
442
+ c[0] = (c[0].ord & 0x9f).chr
443
+ c
444
+ when src.scan(/[McCx0-9]/) || src.eos? then
445
+ rb_compile_error("Invalid escape character syntax")
446
+ else
447
+ src.getch
448
+ end
449
+ end
450
+
451
+ def regx_options
452
+ good, bad = [], []
453
+
454
+ if src.scan(/[a-z]+/) then
455
+ good, bad = src.matched.split(//).partition { |s| s =~ /^[ixmonesu]$/ }
456
+ end
457
+
458
+ unless bad.empty? then
459
+ rb_compile_error("unknown regexp option%s - %s" %
460
+ [(bad.size > 1 ? "s" : ""), bad.join.inspect])
461
+ end
462
+
463
+ return good.join
464
+ end
465
+
466
+ def reset
467
+ self.command_start = true
468
+ self.lex_strterm = nil
469
+ self.token = nil
470
+ self.yacc_value = nil
471
+
472
+ @src = nil
473
+ @lex_state = nil
474
+ end
475
+
476
+ def src= src
477
+ raise "bad src: #{src.inspect}" unless String === src
478
+ @src = StringScanner.new src
479
+ end
480
+
481
+ def store_comment
482
+ @comments.push(*self.token_buffer)
483
+ self.token_buffer.clear
484
+ end
485
+
486
+ def tokadd_escape term
487
+ case
488
+ when src.scan(/\\\n/) then
489
+ # just ignore
490
+ when src.scan(/\\([0-7]{1,3}|x[0-9a-fA-F]{1,2})/) then
491
+ self.token_buffer << src.matched
492
+ when src.scan(/\\([MC]-|c)(?=\\)/) then
493
+ self.token_buffer << src.matched
494
+ self.tokadd_escape term
495
+ when src.scan(/\\([MC]-|c)(.)/) then
496
+ self.token_buffer << src.matched
497
+ when src.scan(/\\[McCx]/) then
498
+ rb_compile_error "Invalid escape character syntax"
499
+ when src.scan(/\\(.)/m) then
500
+ self.token_buffer << src.matched
501
+ else
502
+ rb_compile_error "Invalid escape character syntax"
503
+ end
504
+ end
505
+
506
+ def tokadd_string(func, term, paren)
507
+ awords = (func & STR_FUNC_AWORDS) != 0
508
+ escape = (func & STR_FUNC_ESCAPE) != 0
509
+ expand = (func & STR_FUNC_EXPAND) != 0
510
+ regexp = (func & STR_FUNC_REGEXP) != 0
511
+ symbol = (func & STR_FUNC_SYMBOL) != 0
512
+
513
+ paren_re = paren.nil? ? nil : Regexp.new(Regexp.escape(paren))
514
+ term_re = Regexp.new(Regexp.escape(term))
515
+
516
+ until src.eos? do
517
+ c = nil
518
+ case
519
+ when paren_re && src.scan(paren_re) then
520
+ self.nest += 1
521
+ when self.nest == 0 && src.scan(term_re) then
522
+ src.pos -= 1
523
+ break
524
+ when src.scan(term_re) then
525
+ self.nest -= 1
526
+ when ((awords && src.scan(/\s/)) ||
527
+ (expand && src.scan(/#(?=[\$\@\{])/))) then
528
+ src.pos -= 1
529
+ break
530
+ when awords && src.scan(/\\\n/) then
531
+ token_buffer << "\n"
532
+ next
533
+ when expand && src.scan(/\\\n/) then
534
+ next
535
+ when awords && src.scan(/\\\s/) then
536
+ c = ' '
537
+ when (expand && src.scan(/#(?!\n)/)) || src.scan(/\\\n/) then
538
+ # do nothing
539
+ when src.scan(/\\\\/) then
540
+ if escape then
541
+ token_buffer << '\\'
542
+ end
543
+ c = '\\'
544
+ when regexp && src.check(/\\/) then
545
+ self.tokadd_escape term
546
+ next
547
+ when expand && src.scan(/\\/) then
548
+ c = self.read_escape
549
+ when src.scan(/\\/) then
550
+ unless src.scan(term_re) || paren.nil? || src.scan(paren_re) then
551
+ token_buffer << "\\"
552
+ end
553
+ # \\ case:
554
+ # else if (ismbchar(c)) {
555
+ # int i, len = mbclen(c)-1;
556
+ # for (i = 0; i < len; i++) {
557
+ # tokadd(c);
558
+ # c = nextc();
559
+ # }
560
+ # }
561
+ else
562
+ c = src.getch # FIX: I don't like this style
563
+ if symbol && src.scan(/\0/) then
564
+ rb_compile_error "symbol cannot contain '\\0'"
565
+ end
566
+ end
567
+
568
+ c = src.matched unless c
569
+ token_buffer << c
570
+ end # until
571
+
572
+ c = src.matched unless c
573
+ c = RubyLexer::EOF if src.eos?
574
+
575
+ return c
576
+ end
577
+
578
+ def warning s
579
+ # do nothing for now
580
+ end
581
+
582
+ ##
583
+ # Returns the next token. Also sets yy_val is needed.
584
+ #
585
+ # @return Description of the Returned Value
586
+ # TODO: remove ALL sexps coming from here and move up to grammar
587
+ # TODO: only literal values should come up from the lexer.
588
+
589
+ def yylex
590
+ c = ''
591
+ space_seen = false
592
+ command_state = false
593
+ token_buffer.clear
594
+ src = self.src
595
+
596
+ self.token = nil
597
+ self.yacc_value = nil
598
+
599
+ if lex_strterm then
600
+ token = nil
601
+
602
+ if lex_strterm[0] == :heredoc then
603
+ token = self.heredoc(lex_strterm)
604
+ if token == :tSTRING_END then
605
+ self.lex_strterm = nil
606
+ self.lex_state = :expr_end
607
+ end
608
+ else
609
+ token = self.parse_string(lex_strterm)
610
+
611
+ if token == :tSTRING_END || token == :tREGEXP_END then
612
+ self.lex_strterm = nil
613
+ self.lex_state = :expr_end
614
+ end
615
+ end
616
+
617
+ return token
618
+ end
619
+
620
+ command_state = self.command_start
621
+ self.command_start = false
622
+
623
+ last_state = lex_state
624
+
625
+ loop do
626
+ case
627
+ when src.scan(/\004|\032|\000/), src.eos? then # ^D, ^Z, EOF
628
+ return RubyLexer::EOF
629
+ when src.scan(/\ |\t|\f|\r|\13/) then # white spaces, 13 = '\v
630
+ space_seen = true
631
+ next
632
+ when src.scan(/#|\n/) then
633
+ c = src.matched
634
+ if c == '#' then
635
+ src.unread c # ok
636
+
637
+ while src.scan(/\s*#.*(\n+|\z)/) do
638
+ token_buffer << src.matched.gsub(/^ +#/, '#').gsub(/^ +$/, '')
639
+ end
640
+
641
+ self.store_comment
642
+
643
+ if src.eos? then
644
+ return RubyLexer::EOF
645
+ end
646
+ end
647
+ # Replace a string of newlines with a single one
648
+
649
+ src.scan(/\n+/)
650
+
651
+ if [:expr_beg, :expr_fname,
652
+ :expr_dot, :expr_class].include? lex_state then
653
+ next
654
+ end
655
+
656
+ self.command_start = true
657
+ self.lex_state = :expr_beg
658
+ return "\n"
659
+ when src.scan(/\*\*=/) then
660
+ self.lex_state = :expr_beg
661
+ self.yacc_value = s("**")
662
+ return :tOP_ASGN
663
+ when src.scan(/\*\*/) then
664
+ self.yacc_value = s("**")
665
+ self.fix_arg_lex_state
666
+ return :tPOW
667
+ when src.scan(/\*\=/) then
668
+ self.lex_state = :expr_beg
669
+ self.yacc_value = s("*")
670
+ return :tOP_ASGN
671
+ when src.scan(/\*/) then
672
+ result = if lex_state.is_argument && space_seen && !src.check(/\s/) then
673
+ warning("`*' interpreted as argument prefix")
674
+ :tSTAR
675
+ elsif lex_state == :expr_beg || lex_state == :expr_mid then
676
+ :tSTAR
677
+ else
678
+ :tSTAR2
679
+ end
680
+
681
+ self.yacc_value = s("*")
682
+
683
+ self.fix_arg_lex_state
684
+
685
+ return result
686
+ when src.scan(/\!\=/) then
687
+ self.lex_state = :expr_beg
688
+ self.yacc_value = s("!=")
689
+ return :tNEQ
690
+ when src.scan(/\!~/) then
691
+ self.lex_state = :expr_beg
692
+ self.yacc_value = s("!~")
693
+ return :tNMATCH
694
+ when src.scan(/\!/) then
695
+ self.lex_state = :expr_beg
696
+ self.yacc_value = s("!")
697
+ return :tBANG
698
+ when src.scan(/\=\=\=/) then
699
+ self.fix_arg_lex_state
700
+ self.yacc_value = s("===")
701
+ return :tEQQ
702
+ when src.scan(/\=\=/) then
703
+ self.fix_arg_lex_state
704
+ self.yacc_value = s("==")
705
+ return :tEQ
706
+ when src.scan(/\=~/) then
707
+ self.fix_arg_lex_state
708
+ self.yacc_value = s("=~")
709
+ return :tMATCH
710
+ when src.scan(/\=>/) then
711
+ self.fix_arg_lex_state
712
+ self.yacc_value = s("=>")
713
+ return :tASSOC
714
+ when src.scan(/\=/) then
715
+ # documentation nodes
716
+ if src.was_begin_of_line and src.scan(/begin(?=\s)/) then
717
+ self.token_buffer << '=' # FIX merge up
718
+ self.token_buffer << src.matched
719
+
720
+ unless src.scan(/.*?\n=end(\n|\z)/m) then
721
+ rb_compile_error("embedded document meets end of file")
722
+ end
723
+
724
+ self.token_buffer << src.matched
725
+ self.store_comment
726
+
727
+ next
728
+ else
729
+ self.fix_arg_lex_state
730
+ self.yacc_value = s("=")
731
+ return '='
732
+ end
733
+ when src.scan(/\<\=\>/) then
734
+ self.fix_arg_lex_state
735
+ self.yacc_value = s("<=>")
736
+ return :tCMP
737
+ when src.scan(/\<\=/) then
738
+ self.fix_arg_lex_state
739
+ self.yacc_value = s("<=")
740
+ return :tLEQ
741
+ when src.scan(/\<\<\=/) then
742
+ self.fix_arg_lex_state
743
+ self.lex_state = :expr_beg
744
+ self.yacc_value = s("\<\<")
745
+ return :tOP_ASGN
746
+ when src.scan(/\<\</) then
747
+ if (! [:expr_end, :expr_dot,
748
+ :expr_endarg, :expr_class].include?(lex_state) &&
749
+ (!lex_state.is_argument || space_seen)) then
750
+ tok = self.heredoc_identifier
751
+ if tok then
752
+ return tok
753
+ end
754
+ end
755
+
756
+ self.fix_arg_lex_state
757
+ self.yacc_value = s("\<\<")
758
+ return :tLSHFT
759
+ when src.scan(/\</) then
760
+ self.fix_arg_lex_state
761
+ self.yacc_value = s("<")
762
+ return :tLT
763
+ when src.scan(/\>\=/) then
764
+ self.fix_arg_lex_state
765
+ self.yacc_value = s(">=")
766
+ return :tGEQ
767
+ when src.scan(/\>\>=/) then
768
+ self.fix_arg_lex_state
769
+ self.lex_state = :expr_beg
770
+ self.yacc_value = s(">>")
771
+ return :tOP_ASGN
772
+ when src.scan(/\>\>/) then
773
+ self.fix_arg_lex_state
774
+ self.yacc_value = s(">>")
775
+ return :tRSHFT
776
+ when src.scan(/\>/) then
777
+ self.fix_arg_lex_state
778
+ self.yacc_value = s(">")
779
+ return :tGT
780
+ when src.scan(/\"/) then
781
+ self.lex_strterm = s(:strterm, STR_DQUOTE, '"', "\0") # TODO: question this
782
+ self.yacc_value = s("\"")
783
+ return :tSTRING_BEG
784
+ when src.scan(/\`/) then
785
+ self.yacc_value = s("`")
786
+ case lex_state
787
+ when :expr_fname then
788
+ self.lex_state = :expr_end
789
+ return :tBACK_REF2
790
+ when :expr_dot then
791
+ self.lex_state = if command_state then
792
+ :expr_cmdarg
793
+ else
794
+ :expr_arg
795
+ end
796
+ return :tBACK_REF2
797
+ end
798
+ self.lex_strterm = s(:strterm, STR_XQUOTE, '`', "\0")
799
+ return :tXSTRING_BEG
800
+ when src.scan(/\'/) then
801
+ self.lex_strterm = s(:strterm, STR_SQUOTE, "\'", "\0")
802
+ self.yacc_value = s("'")
803
+ return :tSTRING_BEG
804
+ when src.scan(/\?/) then
805
+ if lex_state == :expr_end || lex_state == :expr_endarg then
806
+ self.lex_state = :expr_beg
807
+ self.yacc_value = s("?")
808
+ return '?'
809
+ end
810
+
811
+ if src.eos? then
812
+ rb_compile_error "incomplete character syntax"
813
+ end
814
+
815
+ if src.check(/\s|\v/) then
816
+ unless lex_state.is_argument then
817
+ c2 = case src.matched
818
+ when " " then
819
+ 's'
820
+ when "\n" then
821
+ 'n'
822
+ when "\t" then
823
+ 't'
824
+ when "\v" then
825
+ 'v'
826
+ when "\r" then
827
+ 'r'
828
+ when "\f" then
829
+ 'f'
830
+ end
831
+
832
+ if c2 then
833
+ warning("invalid character syntax; use ?\\" + c2)
834
+ end
835
+ end
836
+
837
+ # ternary
838
+ self.lex_state = :expr_beg
839
+ self.yacc_value = s("?")
840
+ return '?'
841
+ # elsif ismbchar(c) then # ternary, also
842
+ # rb_warn "multibyte character literal not supported yet; use ?\\#{c}"
843
+ # support.unread c
844
+ # self.lex_state = :expr_beg
845
+ # return '?'
846
+ elsif src.check(/\w(?=\w)/) then # ternary, also
847
+ self.lex_state = :expr_beg
848
+ self.yacc_value = s("?")
849
+ return '?'
850
+ end
851
+
852
+ c = if src.scan(/\\/) then
853
+ self.read_escape
854
+ else
855
+ src.getch
856
+ end
857
+ c[0] = (c[0].ord & 0xff).chr
858
+ self.lex_state = :expr_end
859
+ self.yacc_value = c[0].ord
860
+ return :tINTEGER
861
+ when src.scan(/\&&=/) then
862
+ self.yacc_value = s("&&")
863
+ self.lex_state = :expr_beg
864
+ return :tOP_ASGN
865
+ when src.scan(/\&&/) then
866
+ self.lex_state = :expr_beg
867
+ self.yacc_value = s("&&")
868
+ return :tANDOP
869
+ when src.scan(/\&\=/) then
870
+ self.yacc_value = s("&")
871
+ self.lex_state = :expr_beg
872
+ return :tOP_ASGN
873
+ when src.scan(/&/) then
874
+ result = if lex_state.is_argument && space_seen && !src.check(/\s/) then
875
+ warning("`&' interpreted as argument prefix")
876
+ :tAMPER
877
+ elsif lex_state == :expr_beg || lex_state == :expr_mid then
878
+ :tAMPER
879
+ else
880
+ :tAMPER2
881
+ end
882
+
883
+ self.fix_arg_lex_state
884
+ self.yacc_value = s("&")
885
+ return result
886
+ when src.scan(/\|\|\=/) then
887
+ self.lex_state = :expr_beg
888
+ self.yacc_value = s("||")
889
+ return :tOP_ASGN
890
+ when src.scan(/\|\|/) then
891
+ self.lex_state = :expr_beg
892
+ self.yacc_value = s("||")
893
+ return :tOROP
894
+ when src.scan(/\|\=/) then
895
+ self.lex_state = :expr_beg
896
+ self.yacc_value = s("|")
897
+ return :tOP_ASGN
898
+ when src.scan(/\|/) then
899
+ self.fix_arg_lex_state
900
+ self.yacc_value = s("|")
901
+ return :tPIPE
902
+ when src.scan(/[+-]/) then
903
+ sign = src.matched
904
+ utype, type = if sign == "+" then
905
+ [:tUPLUS, :tPLUS]
906
+ else
907
+ [:tUMINUS, :tMINUS]
908
+ end
909
+
910
+ if lex_state == :expr_fname || lex_state == :expr_dot then
911
+ self.lex_state = :expr_arg
912
+ if src.scan(/@/) then
913
+ self.yacc_value = s("#{sign}@")
914
+ return utype
915
+ else
916
+ self.yacc_value = s(sign)
917
+ return type
918
+ end
919
+ end
920
+
921
+ if src.scan(/\=/) then
922
+ self.lex_state = :expr_beg
923
+ self.yacc_value = s(sign)
924
+ return :tOP_ASGN
925
+ end
926
+
927
+ if (lex_state == :expr_beg || lex_state == :expr_mid ||
928
+ (lex_state.is_argument && space_seen && !src.check(/\s/))) then
929
+ if lex_state.is_argument then
930
+ arg_ambiguous
931
+ end
932
+
933
+ self.lex_state = :expr_beg
934
+ self.yacc_value = s(sign)
935
+
936
+ if src.check(/\d/) then
937
+ if utype == :tUPLUS then
938
+ return self.parse_number
939
+ else
940
+ return :tUMINUS_NUM
941
+ end
942
+ end
943
+
944
+ return utype
945
+ end
946
+
947
+ self.lex_state = :expr_beg
948
+ self.yacc_value = s(sign)
949
+ return type
950
+ when src.scan(/\.\.\./) then
951
+ self.lex_state = :expr_beg
952
+ self.yacc_value = s("...")
953
+ return :tDOT3
954
+ when src.scan(/\.\./) then
955
+ self.lex_state = :expr_beg
956
+ self.yacc_value = s("..")
957
+ return :tDOT2
958
+ when src.scan(/\.\d/) then
959
+ rb_compile_error "no .<digit> floating literal anymore put 0 before dot"
960
+ when src.scan(/\./) then
961
+ self.lex_state = :expr_dot
962
+ self.yacc_value = s(".")
963
+ return :tDOT
964
+ when src.check(/[0-9]/) then
965
+ return parse_number
966
+ when src.scan(/[\)\]\}]/) then
967
+ cond.lexpop
968
+ cmdarg.lexpop
969
+ self.lex_state = :expr_end
970
+ self.yacc_value = s(src.matched)
971
+ result = {
972
+ ")" => :tRPAREN,
973
+ "]" => :tRBRACK,
974
+ "}" => :tRCURLY
975
+ }[src.matched]
976
+ return result
977
+ when src.scan(/::/) then
978
+ if (lex_state == :expr_beg ||
979
+ lex_state == :expr_mid ||
980
+ lex_state == :expr_class ||
981
+ (lex_state.is_argument && space_seen)) then
982
+ self.lex_state = :expr_beg
983
+ self.yacc_value = s("::")
984
+ return :tCOLON3
985
+ end
986
+
987
+ self.lex_state = :expr_dot
988
+ self.yacc_value = s("::")
989
+ return :tCOLON2
990
+ when src.scan(/\:/) then
991
+ if (lex_state == :expr_end || lex_state == :expr_endarg ||
992
+ src.check(/\s/)) then
993
+ self.lex_state = :expr_beg
994
+ self.yacc_value = s(":")
995
+ return ':'
996
+ end
997
+
998
+ case
999
+ when src.scan(/\'/) then
1000
+ self.lex_strterm = s(:strterm, STR_SSYM, src.matched, "\0")
1001
+ when src.scan(/\"/) then
1002
+ self.lex_strterm = s(:strterm, STR_DSYM, src.matched, "\0")
1003
+ end
1004
+
1005
+ self.lex_state = :expr_fname
1006
+ self.yacc_value = s(":")
1007
+ return :tSYMBEG
1008
+ when src.scan(/\//) then
1009
+ if lex_state == :expr_beg || lex_state == :expr_mid then
1010
+ self.lex_strterm = s(:strterm, STR_REGEXP, '/', "\0")
1011
+ self.yacc_value = s("/")
1012
+ return :tREGEXP_BEG
1013
+ end
1014
+
1015
+ if src.scan(/\=/) then
1016
+ self.yacc_value = s("/")
1017
+ self.lex_state = :expr_beg
1018
+ return :tOP_ASGN
1019
+ end
1020
+
1021
+ if lex_state.is_argument && space_seen then
1022
+ unless src.scan(/\s/) then
1023
+ arg_ambiguous
1024
+ self.lex_strterm = s(:strterm, STR_REGEXP, '/', "\0")
1025
+ self.yacc_value = s("/")
1026
+ return :tREGEXP_BEG
1027
+ end
1028
+ end
1029
+
1030
+ self.fix_arg_lex_state
1031
+
1032
+ self.yacc_value = s("/")
1033
+ return :tDIVIDE
1034
+ when src.scan(/\^=/) then
1035
+ self.lex_state = :expr_beg
1036
+ self.yacc_value = s("^")
1037
+ return :tOP_ASGN
1038
+ when src.scan(/\^/) then
1039
+ self.fix_arg_lex_state
1040
+ self.yacc_value = s("^")
1041
+ return :tCARET
1042
+ when src.scan(/\;/) then
1043
+ self.command_start = true
1044
+ self.lex_state = :expr_beg
1045
+ self.yacc_value = s(";")
1046
+ return src.matched
1047
+ when src.scan(/\,/) then
1048
+ self.lex_state = :expr_beg
1049
+ self.yacc_value = s(",")
1050
+ return src.matched
1051
+ when src.scan(/\~/) then
1052
+ if lex_state == :expr_fname || lex_state == :expr_dot then
1053
+ src.scan(/@/)
1054
+ end
1055
+
1056
+ self.fix_arg_lex_state
1057
+ self.yacc_value = s("~")
1058
+
1059
+ return :tTILDE
1060
+ when src.scan(/\(/) then
1061
+ result = :tLPAREN2
1062
+ self.command_start = true
1063
+ if lex_state == :expr_beg || lex_state == :expr_mid then
1064
+ result = :tLPAREN
1065
+ elsif space_seen then
1066
+ if lex_state == :expr_cmdarg then
1067
+ result = :tLPAREN_ARG
1068
+ elsif lex_state == :expr_arg then
1069
+ warning("don't put space before argument parentheses")
1070
+ result = :tLPAREN2
1071
+ end
1072
+ end
1073
+
1074
+ self.expr_beg_push "("
1075
+
1076
+ return result
1077
+ when src.scan(/\[/) then
1078
+ result = src.matched
1079
+ if lex_state == :expr_fname || lex_state == :expr_dot then
1080
+ self.lex_state = :expr_arg
1081
+ case
1082
+ when src.scan(/\]\=/) then
1083
+ self.yacc_value = s("[]=")
1084
+ return :tASET
1085
+ when src.scan(/\]/) then
1086
+ self.yacc_value = s("[]")
1087
+ return :tAREF
1088
+ else
1089
+ rb_compile_error "unexpected '['"
1090
+ end
1091
+ elsif lex_state == :expr_beg || lex_state == :expr_mid then
1092
+ result = :tLBRACK
1093
+ elsif lex_state.is_argument && space_seen then
1094
+ result = :tLBRACK
1095
+ end
1096
+
1097
+ self.expr_beg_push("[")
1098
+
1099
+ return result
1100
+ when src.scan(/\{/) then
1101
+ result = if lex_state.is_argument || lex_state == :expr_end then
1102
+ :tLCURLY # block (primary)
1103
+ elsif lex_state == :expr_endarg then
1104
+ :tLBRACE_ARG # block (expr)
1105
+ else
1106
+ :tLBRACE # hash
1107
+ end
1108
+
1109
+ self.expr_beg_push("{")
1110
+
1111
+ return result
1112
+ when src.scan(/\\/) then
1113
+ if src.scan(/\n/) then
1114
+ space_seen = true
1115
+ next
1116
+ end
1117
+ rb_compile_error "bare backslash only allowed before newline"
1118
+ when src.scan(/\%/) then
1119
+ if lex_state == :expr_beg || lex_state == :expr_mid then
1120
+ return parse_quote
1121
+ end
1122
+
1123
+ if src.scan(/\=/) then
1124
+ self.lex_state = :expr_beg
1125
+ self.yacc_value = s("%")
1126
+ return :tOP_ASGN
1127
+ end
1128
+
1129
+ if lex_state.is_argument && space_seen && ! src.check(/\s/) then
1130
+ return parse_quote
1131
+ end
1132
+
1133
+ self.fix_arg_lex_state
1134
+ self.yacc_value = s("%")
1135
+
1136
+ return :tPERCENT
1137
+ when src.scan(/(\$_)(\w)/) then
1138
+ self.lex_state = :expr_end
1139
+ token_buffer << src[1]
1140
+ # HACK? c = src[2]
1141
+ # pass through
1142
+ when src.scan(/\$_/) then
1143
+ self.lex_state = :expr_end
1144
+ token_buffer << src.matched
1145
+ self.yacc_value = s(src.matched)
1146
+ return :tGVAR
1147
+ when src.scan(/\$[~*$?!@\/\\;,.=:<>\"]|\$-\w?/) then
1148
+ self.lex_state = :expr_end
1149
+ token_buffer << src.matched
1150
+ self.yacc_value = s(token_buffer.join)
1151
+ return :tGVAR
1152
+ when src.scan(/\$([\&\`\'\+])/) then
1153
+ self.lex_state = :expr_end
1154
+ # Explicit reference to these vars as symbols...
1155
+ if last_state == :expr_fname then
1156
+ token_buffer << src.matched
1157
+ self.yacc_value = s(token_buffer.join)
1158
+ return :tGVAR
1159
+ else
1160
+ self.yacc_value = s(:back_ref, src[1].to_sym)
1161
+ return :tBACK_REF
1162
+ end
1163
+ when src.scan(/\$[1-9]\d*/) then
1164
+ self.lex_state = :expr_end
1165
+ token_buffer.push(*src.matched.split(//))
1166
+ if last_state == :expr_fname then
1167
+ self.yacc_value = s(token_buffer.join)
1168
+ return :tGVAR
1169
+ else
1170
+ self.yacc_value = s(:nth_ref, token_buffer.join[1..-1].to_i)
1171
+ return :tNTH_REF
1172
+ end
1173
+ when src.scan(/\$0/) then
1174
+ self.lex_state = :expr_end
1175
+ token_buffer << '$' # why just this?!?
1176
+ # pass through
1177
+ when src.scan(/\$\W|\$\z/) then # TODO: remove?
1178
+ self.lex_state = :expr_end
1179
+ self.yacc_value = s("$")
1180
+ return '$'
1181
+ when src.scan(/\$/)
1182
+ self.lex_state = :expr_end
1183
+ token_buffer << src.matched
1184
+ src.getch
1185
+ # pass through
1186
+ when src.scan(/\@/) then
1187
+ token_buffer << '@'
1188
+
1189
+ if src.scan(/(@)?\d/) then
1190
+ if src[1] then
1191
+ rb_compile_error "`@@#{c}` is not allowed as a class variable name"
1192
+ else
1193
+ rb_compile_error "`@#{c}' is not allowed as an instance variable name"
1194
+ end
1195
+ end
1196
+
1197
+ if src.scan(/@/) then
1198
+ token_buffer << src.matched
1199
+ end
1200
+
1201
+ unless src.scan(/\w/) then
1202
+ self.yacc_value = s("@")
1203
+ return '@'
1204
+ end
1205
+ when src.scan(/\_/) then
1206
+ if src.was_begin_of_line && src.scan(/_END__(\n|\Z)/) then
1207
+ return RubyLexer::EOF
1208
+ end
1209
+ else
1210
+ c = src.getch # FIX: I really hate this
1211
+ if c =~ /\W/ then
1212
+ rb_compile_error "Invalid char '#{c.inspect}' in expression"
1213
+ end
1214
+ end
1215
+
1216
+ src.pos -= 1 # HACK
1217
+ if src.scan(/\w+/) then
1218
+ token_buffer.push(*src.matched.split(//)) # TODO: that split is tarded.
1219
+ end
1220
+
1221
+ if token_buffer[0] =~ /\w/ && src.scan(/[\!\?](?!=)/) then
1222
+ token_buffer << src.matched
1223
+ end
1224
+
1225
+ result = nil
1226
+ last_state = lex_state
1227
+
1228
+ case token_buffer[0]
1229
+ when /^\$/ then
1230
+ self.lex_state = :expr_end
1231
+ result = :tGVAR
1232
+ when '@' then
1233
+ self.lex_state = :expr_end
1234
+ if token_buffer[1] == '@' then
1235
+ result = :tCVAR
1236
+ else
1237
+ result = :tIVAR
1238
+ end
1239
+ else
1240
+ if token_buffer[-1] =~ /[!?]/ then
1241
+ result = :tFID
1242
+ else
1243
+ if lex_state == :expr_fname then
1244
+ # ident=, not =~ => == or followed by =>
1245
+ if src.scan(/=(?:(?![~>=])|(?==>))/) then
1246
+ result = :tIDENTIFIER
1247
+ token_buffer << src.matched
1248
+ end
1249
+ end
1250
+
1251
+ if result.nil? && token_buffer[0] =~ /[A-Z]/ then
1252
+ result = :tCONSTANT
1253
+ else
1254
+ result = :tIDENTIFIER
1255
+ end
1256
+ end
1257
+
1258
+ unless lex_state == :expr_dot then
1259
+ # See if it is a reserved word.
1260
+ keyword = Keyword.keyword(token_buffer.join, token_buffer.length)
1261
+
1262
+ unless keyword.nil? then
1263
+ state = lex_state
1264
+ self.lex_state = keyword.state
1265
+
1266
+ self.yacc_value = if state == :expr_fname then
1267
+ s(keyword.name)
1268
+ else
1269
+ s(token_buffer.join)
1270
+ end
1271
+
1272
+ if keyword.id0 == :kDO then
1273
+ self.command_start = true
1274
+ if cond.is_in_state then
1275
+ return :kDO_COND
1276
+ end
1277
+ if cmdarg.is_in_state && state != :expr_cmdarg then
1278
+ return :kDO_BLOCK
1279
+ end
1280
+ if state == :expr_endarg then
1281
+ return :kDO_BLOCK
1282
+ end
1283
+ return :kDO
1284
+ end
1285
+
1286
+ if state == :expr_beg then
1287
+ return keyword.id0
1288
+ end
1289
+
1290
+ if keyword.id0 != keyword.id1 then
1291
+ self.lex_state = :expr_beg
1292
+ end
1293
+
1294
+ return keyword.id1
1295
+ end
1296
+ end # lex_state == :expr_dot
1297
+
1298
+ if (lex_state == :expr_beg ||
1299
+ lex_state == :expr_mid ||
1300
+ lex_state == :expr_dot ||
1301
+ lex_state == :expr_arg ||
1302
+ lex_state == :expr_cmdarg) then
1303
+ if command_state then
1304
+ self.lex_state = :expr_cmdarg
1305
+ else
1306
+ self.lex_state = :expr_arg
1307
+ end
1308
+ else
1309
+ self.lex_state = :expr_end
1310
+ end
1311
+ end
1312
+
1313
+ # Lame: parsing logic made it into lexer in ruby...So we
1314
+ # are emulating
1315
+ # FIXME: I believe this is much simpler now...
1316
+ # HACK
1317
+ # if (IdUtil.var_type(temp_val) == IdUtil.LOCAL_VAR &&
1318
+ # last_state != :expr_dot &&
1319
+ # (BlockStaticScope === scope && (scope.is_defined(temp_val) >= 0)) ||
1320
+ # (scope.local_scope.is_defined(temp_val) >= 0)) then
1321
+ # self.lex_state = :expr_end
1322
+ # end
1323
+
1324
+ self.yacc_value = s(token_buffer.join)
1325
+
1326
+ return result
1327
+ end
1328
+ end
1329
+ end