rogue_parser 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,38 @@
1
+ # -*- ruby -*-
2
+
3
+ Autotest.add_hook :initialize do |at|
4
+ at.extra_files << "../../ParseTree/dev/test/pt_testcase.rb"
5
+ at.libs << ":../../ParseTree/dev/lib:../../ParseTree/dev/test"
6
+ at.add_exception 'unit'
7
+ at.add_exception 'coverage'
8
+ at.add_exception '.diff'
9
+ at.add_exception 'coverage.info'
10
+
11
+ at.unit_diff = "unit_diff -u -b"
12
+
13
+ at.add_mapping(/^lib\/.*\.y$/) do |f, _|
14
+ at.files_matching %r%^test/.*#{File.basename(f, '.y').gsub '_', '_?'}.rb$%
15
+ end
16
+
17
+ at.add_mapping(/pt_testcase.rb/) do |f, _|
18
+ at.files_matching(/^test.*rb$/)
19
+ end
20
+
21
+ %w(TestEnvironment TestStackState).each do |klass|
22
+ at.extra_class_map[klass] = "test/test_ruby_parser_extras.rb"
23
+ end
24
+ end
25
+
26
+ Autotest.add_hook :run_command do |at|
27
+ system "rake parser"
28
+ end
29
+
30
+ class Autotest
31
+ def ruby
32
+ File.expand_path "~/.multiruby/install/1.9.0-0/bin/ruby"
33
+ end
34
+ end if ENV['ONENINE']
35
+
36
+ require 'autotest/rcov'
37
+ Autotest::RCov.command = 'rcov_info'
38
+ # Autotest::RCov.pattern = 'test/test_ruby_lexer.rb'
@@ -0,0 +1,5 @@
1
+ == 1.0.0 / 2007-12-20
2
+
3
+ * 1 major enhancement
4
+ * Birthday!
5
+
@@ -0,0 +1,9 @@
1
+ .autotest
2
+ History.txt
3
+ Manifest.txt
4
+ README.txt
5
+ Rakefile
6
+ lib/ruby_lexer.rb
7
+ lib/ruby_parser.y
8
+ test/test_ruby_lexer.rb
9
+ test/test_ruby_parser.rb
@@ -0,0 +1,76 @@
1
+
2
+
3
+ 'rogue_parser' is 'ruby_parser' with this patch applied :
4
+
5
+ http://rubyforge.org/tracker/?func=detail&aid=20106&group_id=439&atid=1780
6
+
7
+
8
+ I hope I can get rid of this temporary repo by the next release of ruby_parser
9
+
10
+
11
+
12
+
13
+ ruby_parser
14
+ by Ryan Davis
15
+ http://parsetree.rubyforge.org/
16
+
17
+ == DESCRIPTION:
18
+
19
+ ruby_parser (RP) is a ruby parser written in pure ruby (utilizing
20
+ racc--which does by default use a C extension). RP's output is
21
+ the same as ParseTree's output: s-expressions using ruby's arrays and
22
+ base types.
23
+
24
+ == FEATURES/PROBLEMS:
25
+
26
+ * Pure ruby, no compiles.
27
+ * Includes preceding comment data for defn/defs/class/module nodes!
28
+ * Incredibly simple interface.
29
+ * Output is 100% equivalent to ParseTree.
30
+ * Can utilize PT's SexpProcessor and UnifiedRuby for language processing.
31
+ * Known Issue: Speed sucks currently. 5500 tests currently run in 21 min.
32
+ * Known Issue: Code is waaay ugly. Port of a port. Not my fault. Will fix RSN.
33
+ * Known Issue: I don't currently support newline nodes.
34
+ * Known Issue: Totally awesome.
35
+ * Known Issue: dasgn_curr decls can be out of order from ParseTree's.
36
+
37
+ == SYNOPSIS:
38
+
39
+ RubyParser.new.parse "1+1"
40
+ # => s(:call, s(:lit, 1), :+, s(:array, s(:lit, 1)))
41
+
42
+ == REQUIREMENTS:
43
+
44
+ * ruby. woot.
45
+ * ParseTree is needed for Sexp class... crap. I might break that out.
46
+ * ParseTree for testing.
47
+ * racc full package for parser development.
48
+
49
+ == INSTALL:
50
+
51
+ * sudo gem install ruby_parser
52
+
53
+ == LICENSE:
54
+
55
+ (The MIT License)
56
+
57
+ Copyright (c) 2007 Ryan Davis
58
+
59
+ Permission is hereby granted, free of charge, to any person obtaining
60
+ a copy of this software and associated documentation files (the
61
+ 'Software'), to deal in the Software without restriction, including
62
+ without limitation the rights to use, copy, modify, merge, publish,
63
+ distribute, sublicense, and/or sell copies of the Software, and to
64
+ permit persons to whom the Software is furnished to do so, subject to
65
+ the following conditions:
66
+
67
+ The above copyright notice and this permission notice shall be
68
+ included in all copies or substantial portions of the Software.
69
+
70
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
71
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
72
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
73
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
74
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
75
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
76
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,134 @@
1
+ # -*- ruby -*-
2
+
3
+ require 'rubygems'
4
+ require 'hoe'
5
+
6
+ $: << '../../ParseTree/dev/lib/'
7
+ require './lib/ruby_parser_extras.rb'
8
+
9
+ hoe = Hoe.new('rogue_parser', RubyParser::VERSION) do |parser|
10
+ parser.rubyforge_name = 'parsetree'
11
+ parser.developer('Ryan Davis', 'ryand-ruby@zenspider.com')
12
+ parser.extra_deps << 'ParseTree'
13
+ end
14
+
15
+ hoe.spec.files += ['lib/ruby_parser.rb'] # jim.... cmon man
16
+ hoe.spec.files += ['lib/ruby_parser_extras.rb']
17
+
18
+ module Rake::TaskManager
19
+ def all_tasks
20
+ @tasks
21
+ end
22
+ end
23
+
24
+ Rake.application.all_tasks["default"].prerequisites.clear
25
+
26
+ [:default, :multi, :test].each do |t|
27
+ task t => :parser
28
+ end
29
+
30
+ path = "pkg/ruby_parser-#{RubyParser::VERSION}"
31
+ task path => :parser do
32
+ Dir.chdir path do
33
+ sh "rake parser"
34
+ end
35
+ end
36
+
37
+ task :parser => ["lib/ruby_parser.rb"]
38
+
39
+ rule '.rb' => '.y' do |t|
40
+ #sh "racc -l -t -o #{t.name} #{t.source}"
41
+ sh "racc -l -o #{t.name} #{t.source}"
42
+ # using racc 1.4.5... no -t...
43
+ end
44
+
45
+ task :clean do
46
+ rm_rf(Dir["**/*~"] +
47
+ Dir["**/*.diff"] +
48
+ Dir["coverage.info"] +
49
+ Dir["coverage"] +
50
+ Dir["lib/ruby_parser.rb"] +
51
+ Dir["lib/*.output"])
52
+ end
53
+
54
+ def next_num(glob)
55
+ num = Dir[glob].max[/\d+/].to_i + 1
56
+ end
57
+
58
+ def profile(type)
59
+ num = next_num("profile_#{type}*.txt")
60
+ sh "zenprofile -w -Ilib:ext:bin:test -rtest/unit test/test_ruby_#{type}.rb &> profile_#{type}_%03d.txt" % num
61
+ end
62
+
63
+ task :profile do
64
+ profile 'lexer'
65
+ profile 'parser'
66
+ end
67
+
68
+ begin
69
+ require 'rcov/rcovtask'
70
+ Rcov::RcovTask.new do |t|
71
+ pattern = ENV['PATTERN'] || 'test/test_ruby_*.rb'
72
+
73
+ t.test_files = FileList[pattern]
74
+ t.verbose = true
75
+ t.rcov_opts << "--threshold 80"
76
+ t.rcov_opts << "--no-color"
77
+ end
78
+ rescue LoadError
79
+ # skip
80
+ end
81
+
82
+ desc "Compares PT to RP and deletes all files that match"
83
+ task :compare do
84
+ files = Dir["unit/**/*.rb"]
85
+ puts "Parsing #{files.size} files"
86
+ files.each do |file|
87
+ puts file
88
+ system "./cmp.rb -q #{file} && rm #{file}"
89
+ end
90
+ system 'find -d unit -type d -empty -exec rmdir {} \;'
91
+ end
92
+
93
+ desc "Compares PT to RP and stops on first failure"
94
+ task :find_bug do
95
+ files = Dir["unit/**/*.rb"]
96
+ puts "Parsing #{files.size} files"
97
+ files.each do |file|
98
+ puts file
99
+ sh "./cmp.rb -q #{file}"
100
+ end
101
+ end
102
+
103
+ task :sort do
104
+ sh 'grepsort "^ +def" lib/ruby_lexer.rb'
105
+ sh 'grepsort "^ +def (test|util)" test/test_ruby_lexer.rb'
106
+ end
107
+
108
+ task :rcov_info => :parser do
109
+ pattern = ENV['PATTERN'] || "test/test_*.rb"
110
+ ruby "-Ilib -S rcov --text-report --save coverage.info #{pattern}"
111
+ end
112
+
113
+ task :rcov_overlay do
114
+ rcov, eol = Marshal.load(File.read("coverage.info")).last[ENV["FILE"]], 1
115
+ puts rcov[:lines].zip(rcov[:coverage]).map { |line, coverage|
116
+ bol, eol = eol, eol + line.length
117
+ [bol, eol, "#ffcccc"] unless coverage
118
+ }.compact.inspect
119
+ end
120
+
121
+ task :loc do
122
+ loc1 = `wc -l ../1.0.0/lib/ruby_lexer.rb`[/\d+/]
123
+ flog1 = `flog -s ../1.0.0/lib/ruby_lexer.rb`[/\d+\.\d+/]
124
+ loc2 = `cat lib/ruby_lexer.rb lib/ruby_parser_extras.rb | wc -l`[/\d+/]
125
+ flog2 = `flog -s lib/ruby_lexer.rb lib/ruby_parser_extras.rb`[/\d+\.\d+/]
126
+
127
+ loc1, loc2, flog1, flog2 = loc1.to_i, loc2.to_i, flog1.to_f, flog2.to_f
128
+
129
+ puts "1.0.0: loc = #{loc1} flog = #{flog1}"
130
+ puts "dev : loc = #{loc2} flog = #{flog2}"
131
+ puts "delta: loc = #{loc2-loc1} flog = #{flog2-flog1}"
132
+ end
133
+
134
+ # vim: syntax=Ruby
@@ -0,0 +1,1329 @@
1
+ #$: << File.expand_path("~/Work/p4/zss/src/ParseTree/dev/lib") # for me, not you.
2
+ require 'sexp'
3
+ require 'ruby_parser_extras'
4
+
5
+ class RubyLexer
6
+ attr_accessor :command_start
7
+ attr_accessor :cmdarg
8
+ attr_accessor :cond
9
+ attr_accessor :nest
10
+
11
+ # Additional context surrounding tokens that both the lexer and
12
+ # grammar use.
13
+ attr_reader :lex_state
14
+
15
+ attr_accessor :lex_strterm
16
+
17
+ # Stream of data that yylex examines.
18
+ attr_reader :src
19
+
20
+ # Last token read via yylex.
21
+ attr_accessor :token
22
+
23
+ # Tempory buffer to build up a potential token. Consumer takes
24
+ # responsibility to reset this before use.
25
+ attr_accessor :token_buffer
26
+
27
+ # Value of last token which had a value associated with it.
28
+ attr_accessor :yacc_value
29
+
30
+ # What handles warnings
31
+ attr_accessor :warnings
32
+
33
+ EOF = :eof_haha!
34
+
35
+ # ruby constants for strings (should this be moved somewhere else?)
36
+ STR_FUNC_BORING = 0x00
37
+ STR_FUNC_ESCAPE = 0x01 # TODO: remove and replace with REGEXP
38
+ STR_FUNC_EXPAND = 0x02
39
+ STR_FUNC_REGEXP = 0x04
40
+ STR_FUNC_AWORDS = 0x08
41
+ STR_FUNC_SYMBOL = 0x10
42
+ STR_FUNC_INDENT = 0x20 # <<-HEREDOC
43
+
44
+ STR_SQUOTE = STR_FUNC_BORING
45
+ STR_DQUOTE = STR_FUNC_BORING | STR_FUNC_EXPAND
46
+ STR_XQUOTE = STR_FUNC_BORING | STR_FUNC_EXPAND
47
+ STR_REGEXP = STR_FUNC_REGEXP | STR_FUNC_ESCAPE | STR_FUNC_EXPAND
48
+ STR_SSYM = STR_FUNC_SYMBOL
49
+ STR_DSYM = STR_FUNC_SYMBOL | STR_FUNC_EXPAND
50
+
51
+ # How the parser advances to the next token.
52
+ #
53
+ # @return true if not at end of file (EOF).
54
+
55
+ # if ENV['SPY'] then
56
+ # @@stats = Hash.new 0
57
+ #
58
+ # def self.stats
59
+ # @@stats
60
+ # end
61
+ #
62
+ # at_exit {
63
+ # require 'pp'
64
+ # pp RubyLexer.stats.sort_by {|k,v| -v}.first(20)
65
+ # }
66
+ # end
67
+
68
+ def advance
69
+ r = yylex
70
+ self.token = r
71
+
72
+ @@stats[r] += 1 if ENV['SPY']
73
+
74
+ return r != RubyLexer::EOF
75
+ end
76
+
77
+ def arg_ambiguous
78
+ self.warning("Ambiguous first argument. make sure.")
79
+ end
80
+
81
+ def comments
82
+ c = @comments.join
83
+ @comments.clear
84
+ c
85
+ end
86
+
87
+ def expr_beg_push val
88
+ cond.push false
89
+ cmdarg.push false
90
+ self.lex_state = :expr_beg
91
+ self.yacc_value = s(val)
92
+ end
93
+
94
+ def fix_arg_lex_state
95
+ self.lex_state = if lex_state == :expr_fname || lex_state == :expr_dot
96
+ :expr_arg
97
+ else
98
+ :expr_beg
99
+ end
100
+ end
101
+
102
+ def heredoc here # Region has 63 lines, 1595 characters
103
+ _, eos, func, last_line = here
104
+
105
+ indent = (func & STR_FUNC_INDENT) != 0
106
+ expand = (func & STR_FUNC_EXPAND) != 0
107
+ eos_re = indent ? /[ \t]*#{eos}(\r?\n|\z)/ : /#{eos}(\r?\n|\z)/
108
+ err_msg = "can't match #{eos_re.inspect} anywhere in "
109
+
110
+ rb_compile_error err_msg if
111
+ src.eos?
112
+
113
+ if src.beginning_of_line? && src.scan(eos_re) then
114
+ src.unread_many last_line # TODO: figure out how to remove this
115
+ self.yacc_value = s(eos)
116
+ return :tSTRING_END
117
+ end
118
+
119
+ token_buffer.clear
120
+
121
+ if expand then
122
+ case
123
+ when src.scan(/#[$@]/) then
124
+ src.pos -= 1 # FIX omg stupid
125
+ self.yacc_value = s(src.matched)
126
+ return :tSTRING_DVAR
127
+ when src.scan(/#[{]/) then
128
+ self.yacc_value = s(src.matched)
129
+ return :tSTRING_DBEG
130
+ when src.scan(/#/) then
131
+ token_buffer << '#'
132
+ end
133
+
134
+ until src.scan(eos_re) do
135
+ c = tokadd_string func, "\n", nil
136
+
137
+ rb_compile_error err_msg if
138
+ c == RubyLexer::EOF
139
+
140
+ if c != "\n" then
141
+ self.yacc_value = s(:str, token_buffer.join.delete("\r"))
142
+ return :tSTRING_CONTENT
143
+ else
144
+ token_buffer << src.scan(/\n/)
145
+ end
146
+
147
+ rb_compile_error err_msg if
148
+ src.eos?
149
+ end
150
+
151
+ # tack on a NL after the heredoc token - FIX NL should not be needed
152
+ src.unread_many(eos + "\n") # TODO: remove this... stupid stupid stupid
153
+ else
154
+ until src.check(eos_re) do
155
+ token_buffer << src.scan(/.*(\n|\z)/)
156
+ rb_compile_error err_msg if
157
+ src.eos?
158
+ end
159
+ end
160
+
161
+ self.lex_strterm = s(:heredoc, eos, func, last_line)
162
+ self.yacc_value = s(:str, token_buffer.join.delete("\r"))
163
+
164
+ return :tSTRING_CONTENT
165
+ end
166
+
167
+ def heredoc_identifier
168
+ term, func = nil, STR_FUNC_BORING
169
+ token_buffer.clear
170
+
171
+ case
172
+ when src.scan(/(-?)(['"`])(.*?)\2/) then
173
+ term = src[2]
174
+ unless src[1].empty? then
175
+ func |= STR_FUNC_INDENT
176
+ end
177
+ func |= case term
178
+ when "\'" then
179
+ STR_SQUOTE
180
+ when '"' then
181
+ STR_DQUOTE
182
+ else
183
+ STR_XQUOTE
184
+ end
185
+ token_buffer << src[3]
186
+ when src.scan(/-?(['"`])(?!\1*\Z)/) then
187
+ rb_compile_error "unterminated here document identifier"
188
+ when src.scan(/(-?)(\w+)/) then
189
+ term = '"'
190
+ func |= STR_DQUOTE
191
+ unless src[1].empty? then
192
+ func |= STR_FUNC_INDENT
193
+ end
194
+ token_buffer << src[2]
195
+ else
196
+ return nil
197
+ end
198
+
199
+ if src.check(/.*\n/) then
200
+ # TODO: think about storing off the char range instead
201
+ line = src.string[src.pos, src.matched_size]
202
+ src.string[src.pos, src.matched_size] = ''
203
+ else
204
+ line = nil
205
+ end
206
+
207
+ self.lex_strterm = s(:heredoc, token_buffer.join, func, line)
208
+
209
+ if term == '`' then
210
+ self.yacc_value = s("`")
211
+ return :tXSTRING_BEG
212
+ else
213
+ self.yacc_value = s("\"")
214
+ return :tSTRING_BEG
215
+ end
216
+ end
217
+
218
+ def initialize
219
+ self.token_buffer = []
220
+ self.cond = StackState.new(:cond)
221
+ self.cmdarg = StackState.new(:cmdarg)
222
+ self.nest = 0
223
+ @comments = []
224
+
225
+ reset
226
+ end
227
+
228
+ def int_with_base base
229
+ if src.matched =~ /__/ then
230
+ rb_compile_error "Invalid numeric format"
231
+ end
232
+ self.yacc_value = src.matched.to_i(base)
233
+ return :tINTEGER
234
+ end
235
+
236
+ def lex_state= o
237
+ raise "wtf?" unless Symbol === o
238
+ @lex_state = o
239
+ end
240
+
241
+ ##
242
+ # Parse a number from the input stream.
243
+ #
244
+ # @param c The first character of the number.
245
+ # @return A int constant wich represents a token.
246
+
247
+ def parse_number
248
+ self.lex_state = :expr_end
249
+
250
+ case
251
+ when src.scan(/[+-]?0[xbd]\b/) then
252
+ rb_compile_error "Invalid numeric format"
253
+ when src.scan(/[+-]?0x[a-f0-9_]+/i) then
254
+ return int_with_base(16)
255
+ when src.scan(/[+-]?0b[01_]+/) then
256
+ return int_with_base(2)
257
+ when src.scan(/[+-]?0d[0-9_]+/) then
258
+ return int_with_base(10)
259
+ when src.scan(/[+-]?0o?[0-7_]*[89]/) then
260
+ rb_compile_error "Illegal octal digit."
261
+ when src.scan(/[+-]?0o?[0-7_]+|0o/) then
262
+ return int_with_base(8)
263
+ when src.scan(/[+-]?[\d_]+_(e|\.)/) then
264
+ rb_compile_error "Trailing '_' in number."
265
+ when src.scan(/[+-]?[\d_]+\.[\d_]+(e[+-]?[\d_]+)?\b|[+-]?[\d_]+e[+-]?[\d_]+\b/i) then
266
+ number = src.matched
267
+ if number =~ /__/ then
268
+ rb_compile_error "Invalid numeric format"
269
+ end
270
+ self.yacc_value = number.to_f
271
+ return :tFLOAT
272
+ when src.scan(/[+-]?0\b/) then
273
+ return int_with_base(10)
274
+ when src.scan(/[+-]?[\d_]+\b/) then
275
+ return int_with_base(10)
276
+ else
277
+ rb_compile_error "Bad number format"
278
+ end
279
+ end
280
+
281
+ def parse_quote
282
+ beg, nnd, short_hand, c = nil, nil, false, nil
283
+
284
+ if src.scan(/[a-z0-9]{1,2}/i) then # Long-hand (e.g. %Q{}).
285
+ rb_compile_error "unknown type of %string" if
286
+ src.matched_size == 2
287
+ c = src.matched
288
+ beg = src.getch
289
+ short_hand = false
290
+ # HACK: stupid rubinius
291
+ # c, beg, short_hand = src.matched, src.getch, false
292
+ else # Short-hand (e.g. %{, %., %!, etc)
293
+ c = 'Q'
294
+ beg = src.getch
295
+ short_hand = true
296
+ # HACK: stupid rubinius
297
+ # c, beg, short_hand = 'Q', src.getch, true
298
+ end
299
+
300
+ if src.eos? or c == RubyLexer::EOF or beg == RubyLexer::EOF then
301
+ rb_compile_error "unterminated quoted string meets end of file"
302
+ end
303
+
304
+ # Figure nnd-char. "\0" is special to indicate beg=nnd and that no nesting?
305
+ nnd = { "(" => ")", "[" => "]", "{" => "}", "<" => ">" }[beg]
306
+ nnd, beg = beg, "\0" if nnd.nil?
307
+
308
+ token_type, self.yacc_value = nil, s("%#{c}#{beg}")
309
+ token_type, string_type = case c
310
+ when 'Q' then
311
+ ch = short_hand ? nnd : c + beg
312
+ self.yacc_value = s("%#{ch}")
313
+ [:tSTRING_BEG, STR_DQUOTE]
314
+ when 'q' then
315
+ [:tSTRING_BEG, STR_SQUOTE]
316
+ when 'W' then
317
+ src.scan(/\s*/)
318
+ [:tWORDS_BEG, STR_DQUOTE | STR_FUNC_AWORDS]
319
+ when 'w' then
320
+ src.scan(/\s*/)
321
+ [:tAWORDS_BEG, STR_SQUOTE | STR_FUNC_AWORDS]
322
+ when 'x' then
323
+ [:tXSTRING_BEG, STR_XQUOTE]
324
+ when 'r' then
325
+ [:tREGEXP_BEG, STR_REGEXP]
326
+ when 's' then
327
+ self.lex_state = :expr_fname
328
+ [:tSYMBEG, STR_SSYM]
329
+ end
330
+
331
+ rb_compile_error "Bad %string type. Expected [Qqwxr\W], found '#{c}'." if
332
+ token_type.nil?
333
+
334
+ self.lex_strterm = s(:strterm, string_type, nnd, beg)
335
+
336
+ return token_type
337
+ end
338
+
339
+ def parse_string(quote)
340
+ _, string_type, term, open = quote
341
+
342
+ space = false # FIX: remove these
343
+ func = string_type
344
+ paren = open
345
+ term_re = Regexp.escape term
346
+
347
+ awords = (func & STR_FUNC_AWORDS) != 0
348
+ regexp = (func & STR_FUNC_REGEXP) != 0
349
+ expand = (func & STR_FUNC_EXPAND) != 0
350
+
351
+ unless func then
352
+ return :tSTRING_END
353
+ end
354
+
355
+ space = true if awords and src.scan(/\s+/)
356
+
357
+ if self.nest == 0 && src.scan(/#{term_re}/) then
358
+ if awords then
359
+ quote[1] = nil
360
+ return ' '
361
+ elsif regexp then
362
+ self.yacc_value = self.regx_options
363
+ return :tREGEXP_END
364
+ else
365
+ self.yacc_value = s(term)
366
+ return :tSTRING_END
367
+ end
368
+ end
369
+
370
+ if space then
371
+ return ' '
372
+ end
373
+
374
+ self.token_buffer.clear
375
+
376
+ if expand
377
+ case
378
+ when src.scan(/#(?=[$@])/) then
379
+ return :tSTRING_DVAR
380
+ when src.scan(/#[{]/) then
381
+ return :tSTRING_DBEG
382
+ when src.scan(/#/) then
383
+ token_buffer << '#'
384
+ end
385
+ end
386
+
387
+ if tokadd_string(func, term, paren) == RubyLexer::EOF then
388
+ rb_compile_error "unterminated string meets end of file"
389
+ end
390
+
391
+ self.yacc_value = s(:str, token_buffer.join)
392
+ return :tSTRING_CONTENT
393
+ end
394
+
395
+ def rb_compile_error msg
396
+ msg += ". near line #{src.lineno}: #{src.rest[/^.*/].inspect}"
397
+ raise SyntaxError, msg
398
+ end
399
+
400
+ def read_escape
401
+ case
402
+ when src.scan(/\\/) then # Backslash
403
+ '\\'
404
+ when src.scan(/n/) then # newline
405
+ "\n"
406
+ when src.scan(/t/) then # horizontal tab
407
+ "\t"
408
+ when src.scan(/r/) then # carriage-return
409
+ "\r"
410
+ when src.scan(/f/) then # form-feed
411
+ "\f"
412
+ when src.scan(/v/) then # vertical tab
413
+ "\13"
414
+ when src.scan(/a/) then # alarm(bell)
415
+ "\007"
416
+ when src.scan(/e/) then # escape
417
+ "\033"
418
+ when src.scan(/b/) then # backspace
419
+ "\010"
420
+ when src.scan(/s/) then # space
421
+ " "
422
+ when src.scan(/[0-7]{1,3}/) then # octal constant
423
+ src.matched.to_i(8).chr
424
+ when src.scan(/x([0-9a-fA-Fa-f]{2})/) then # hex constant
425
+ src[1].to_i(16).chr
426
+ when src.scan(/M-\\/) then
427
+ c = self.read_escape
428
+ c[0] = (c[0].ord | 0x80).chr
429
+ c
430
+ when src.scan(/M-(.)/) then
431
+ c = src[1]
432
+ c[0] = (c[0].ord | 0x80).chr
433
+ c
434
+ when src.scan(/C-\\|c\\/) then
435
+ c = self.read_escape
436
+ c[0] = (c[0].ord & 0x9f).chr
437
+ c
438
+ when src.scan(/C-\?|c\?/) then
439
+ 0177.chr
440
+ when src.scan(/(C-|c)(.)/) then
441
+ c = src[2]
442
+ c[0] = (c[0].ord & 0x9f).chr
443
+ c
444
+ when src.scan(/[McCx0-9]/) || src.eos? then
445
+ rb_compile_error("Invalid escape character syntax")
446
+ else
447
+ src.getch
448
+ end
449
+ end
450
+
451
+ def regx_options
452
+ good, bad = [], []
453
+
454
+ if src.scan(/[a-z]+/) then
455
+ good, bad = src.matched.split(//).partition { |s| s =~ /^[ixmonesu]$/ }
456
+ end
457
+
458
+ unless bad.empty? then
459
+ rb_compile_error("unknown regexp option%s - %s" %
460
+ [(bad.size > 1 ? "s" : ""), bad.join.inspect])
461
+ end
462
+
463
+ return good.join
464
+ end
465
+
466
+ def reset
467
+ self.command_start = true
468
+ self.lex_strterm = nil
469
+ self.token = nil
470
+ self.yacc_value = nil
471
+
472
+ @src = nil
473
+ @lex_state = nil
474
+ end
475
+
476
+ def src= src
477
+ raise "bad src: #{src.inspect}" unless String === src
478
+ @src = StringScanner.new src
479
+ end
480
+
481
+ def store_comment
482
+ @comments.push(*self.token_buffer)
483
+ self.token_buffer.clear
484
+ end
485
+
486
+ def tokadd_escape term
487
+ case
488
+ when src.scan(/\\\n/) then
489
+ # just ignore
490
+ when src.scan(/\\([0-7]{1,3}|x[0-9a-fA-F]{1,2})/) then
491
+ self.token_buffer << src.matched
492
+ when src.scan(/\\([MC]-|c)(?=\\)/) then
493
+ self.token_buffer << src.matched
494
+ self.tokadd_escape term
495
+ when src.scan(/\\([MC]-|c)(.)/) then
496
+ self.token_buffer << src.matched
497
+ when src.scan(/\\[McCx]/) then
498
+ rb_compile_error "Invalid escape character syntax"
499
+ when src.scan(/\\(.)/m) then
500
+ self.token_buffer << src.matched
501
+ else
502
+ rb_compile_error "Invalid escape character syntax"
503
+ end
504
+ end
505
+
506
+ def tokadd_string(func, term, paren)
507
+ awords = (func & STR_FUNC_AWORDS) != 0
508
+ escape = (func & STR_FUNC_ESCAPE) != 0
509
+ expand = (func & STR_FUNC_EXPAND) != 0
510
+ regexp = (func & STR_FUNC_REGEXP) != 0
511
+ symbol = (func & STR_FUNC_SYMBOL) != 0
512
+
513
+ paren_re = paren.nil? ? nil : Regexp.new(Regexp.escape(paren))
514
+ term_re = Regexp.new(Regexp.escape(term))
515
+
516
+ until src.eos? do
517
+ c = nil
518
+ case
519
+ when paren_re && src.scan(paren_re) then
520
+ self.nest += 1
521
+ when self.nest == 0 && src.scan(term_re) then
522
+ src.pos -= 1
523
+ break
524
+ when src.scan(term_re) then
525
+ self.nest -= 1
526
+ when ((awords && src.scan(/\s/)) ||
527
+ (expand && src.scan(/#(?=[\$\@\{])/))) then
528
+ src.pos -= 1
529
+ break
530
+ when awords && src.scan(/\\\n/) then
531
+ token_buffer << "\n"
532
+ next
533
+ when expand && src.scan(/\\\n/) then
534
+ next
535
+ when awords && src.scan(/\\\s/) then
536
+ c = ' '
537
+ when (expand && src.scan(/#(?!\n)/)) || src.scan(/\\\n/) then
538
+ # do nothing
539
+ when src.scan(/\\\\/) then
540
+ if escape then
541
+ token_buffer << '\\'
542
+ end
543
+ c = '\\'
544
+ when regexp && src.check(/\\/) then
545
+ self.tokadd_escape term
546
+ next
547
+ when expand && src.scan(/\\/) then
548
+ c = self.read_escape
549
+ when src.scan(/\\/) then
550
+ unless src.scan(term_re) || paren.nil? || src.scan(paren_re) then
551
+ token_buffer << "\\"
552
+ end
553
+ # \\ case:
554
+ # else if (ismbchar(c)) {
555
+ # int i, len = mbclen(c)-1;
556
+ # for (i = 0; i < len; i++) {
557
+ # tokadd(c);
558
+ # c = nextc();
559
+ # }
560
+ # }
561
+ else
562
+ c = src.getch # FIX: I don't like this style
563
+ if symbol && src.scan(/\0/) then
564
+ rb_compile_error "symbol cannot contain '\\0'"
565
+ end
566
+ end
567
+
568
+ c = src.matched unless c
569
+ token_buffer << c
570
+ end # until
571
+
572
+ c = src.matched unless c
573
+ c = RubyLexer::EOF if src.eos?
574
+
575
+ return c
576
+ end
577
+
578
+ def warning s
579
+ # do nothing for now
580
+ end
581
+
582
+ ##
583
+ # Returns the next token. Also sets yy_val is needed.
584
+ #
585
+ # @return Description of the Returned Value
586
+ # TODO: remove ALL sexps coming from here and move up to grammar
587
+ # TODO: only literal values should come up from the lexer.
588
+
589
+ def yylex
590
+ c = ''
591
+ space_seen = false
592
+ command_state = false
593
+ token_buffer.clear
594
+ src = self.src
595
+
596
+ self.token = nil
597
+ self.yacc_value = nil
598
+
599
+ if lex_strterm then
600
+ token = nil
601
+
602
+ if lex_strterm[0] == :heredoc then
603
+ token = self.heredoc(lex_strterm)
604
+ if token == :tSTRING_END then
605
+ self.lex_strterm = nil
606
+ self.lex_state = :expr_end
607
+ end
608
+ else
609
+ token = self.parse_string(lex_strterm)
610
+
611
+ if token == :tSTRING_END || token == :tREGEXP_END then
612
+ self.lex_strterm = nil
613
+ self.lex_state = :expr_end
614
+ end
615
+ end
616
+
617
+ return token
618
+ end
619
+
620
+ command_state = self.command_start
621
+ self.command_start = false
622
+
623
+ last_state = lex_state
624
+
625
+ loop do
626
+ case
627
+ when src.scan(/\004|\032|\000/), src.eos? then # ^D, ^Z, EOF
628
+ return RubyLexer::EOF
629
+ when src.scan(/\ |\t|\f|\r|\13/) then # white spaces, 13 = '\v
630
+ space_seen = true
631
+ next
632
+ when src.scan(/#|\n/) then
633
+ c = src.matched
634
+ if c == '#' then
635
+ src.unread c # ok
636
+
637
+ while src.scan(/\s*#.*(\n+|\z)/) do
638
+ token_buffer << src.matched.gsub(/^ +#/, '#').gsub(/^ +$/, '')
639
+ end
640
+
641
+ self.store_comment
642
+
643
+ if src.eos? then
644
+ return RubyLexer::EOF
645
+ end
646
+ end
647
+ # Replace a string of newlines with a single one
648
+
649
+ src.scan(/\n+/)
650
+
651
+ if [:expr_beg, :expr_fname,
652
+ :expr_dot, :expr_class].include? lex_state then
653
+ next
654
+ end
655
+
656
+ self.command_start = true
657
+ self.lex_state = :expr_beg
658
+ return "\n"
659
+ when src.scan(/\*\*=/) then
660
+ self.lex_state = :expr_beg
661
+ self.yacc_value = s("**")
662
+ return :tOP_ASGN
663
+ when src.scan(/\*\*/) then
664
+ self.yacc_value = s("**")
665
+ self.fix_arg_lex_state
666
+ return :tPOW
667
+ when src.scan(/\*\=/) then
668
+ self.lex_state = :expr_beg
669
+ self.yacc_value = s("*")
670
+ return :tOP_ASGN
671
+ when src.scan(/\*/) then
672
+ result = if lex_state.is_argument && space_seen && !src.check(/\s/) then
673
+ warning("`*' interpreted as argument prefix")
674
+ :tSTAR
675
+ elsif lex_state == :expr_beg || lex_state == :expr_mid then
676
+ :tSTAR
677
+ else
678
+ :tSTAR2
679
+ end
680
+
681
+ self.yacc_value = s("*")
682
+
683
+ self.fix_arg_lex_state
684
+
685
+ return result
686
+ when src.scan(/\!\=/) then
687
+ self.lex_state = :expr_beg
688
+ self.yacc_value = s("!=")
689
+ return :tNEQ
690
+ when src.scan(/\!~/) then
691
+ self.lex_state = :expr_beg
692
+ self.yacc_value = s("!~")
693
+ return :tNMATCH
694
+ when src.scan(/\!/) then
695
+ self.lex_state = :expr_beg
696
+ self.yacc_value = s("!")
697
+ return :tBANG
698
+ when src.scan(/\=\=\=/) then
699
+ self.fix_arg_lex_state
700
+ self.yacc_value = s("===")
701
+ return :tEQQ
702
+ when src.scan(/\=\=/) then
703
+ self.fix_arg_lex_state
704
+ self.yacc_value = s("==")
705
+ return :tEQ
706
+ when src.scan(/\=~/) then
707
+ self.fix_arg_lex_state
708
+ self.yacc_value = s("=~")
709
+ return :tMATCH
710
+ when src.scan(/\=>/) then
711
+ self.fix_arg_lex_state
712
+ self.yacc_value = s("=>")
713
+ return :tASSOC
714
+ when src.scan(/\=/) then
715
+ # documentation nodes
716
+ if src.was_begin_of_line and src.scan(/begin(?=\s)/) then
717
+ self.token_buffer << '=' # FIX merge up
718
+ self.token_buffer << src.matched
719
+
720
+ unless src.scan(/.*?\n=end(\n|\z)/m) then
721
+ rb_compile_error("embedded document meets end of file")
722
+ end
723
+
724
+ self.token_buffer << src.matched
725
+ self.store_comment
726
+
727
+ next
728
+ else
729
+ self.fix_arg_lex_state
730
+ self.yacc_value = s("=")
731
+ return '='
732
+ end
733
+ when src.scan(/\<\=\>/) then
734
+ self.fix_arg_lex_state
735
+ self.yacc_value = s("<=>")
736
+ return :tCMP
737
+ when src.scan(/\<\=/) then
738
+ self.fix_arg_lex_state
739
+ self.yacc_value = s("<=")
740
+ return :tLEQ
741
+ when src.scan(/\<\<\=/) then
742
+ self.fix_arg_lex_state
743
+ self.lex_state = :expr_beg
744
+ self.yacc_value = s("\<\<")
745
+ return :tOP_ASGN
746
+ when src.scan(/\<\</) then
747
+ if (! [:expr_end, :expr_dot,
748
+ :expr_endarg, :expr_class].include?(lex_state) &&
749
+ (!lex_state.is_argument || space_seen)) then
750
+ tok = self.heredoc_identifier
751
+ if tok then
752
+ return tok
753
+ end
754
+ end
755
+
756
+ self.fix_arg_lex_state
757
+ self.yacc_value = s("\<\<")
758
+ return :tLSHFT
759
+ when src.scan(/\</) then
760
+ self.fix_arg_lex_state
761
+ self.yacc_value = s("<")
762
+ return :tLT
763
+ when src.scan(/\>\=/) then
764
+ self.fix_arg_lex_state
765
+ self.yacc_value = s(">=")
766
+ return :tGEQ
767
+ when src.scan(/\>\>=/) then
768
+ self.fix_arg_lex_state
769
+ self.lex_state = :expr_beg
770
+ self.yacc_value = s(">>")
771
+ return :tOP_ASGN
772
+ when src.scan(/\>\>/) then
773
+ self.fix_arg_lex_state
774
+ self.yacc_value = s(">>")
775
+ return :tRSHFT
776
+ when src.scan(/\>/) then
777
+ self.fix_arg_lex_state
778
+ self.yacc_value = s(">")
779
+ return :tGT
780
+ when src.scan(/\"/) then
781
+ self.lex_strterm = s(:strterm, STR_DQUOTE, '"', "\0") # TODO: question this
782
+ self.yacc_value = s("\"")
783
+ return :tSTRING_BEG
784
+ when src.scan(/\`/) then
785
+ self.yacc_value = s("`")
786
+ case lex_state
787
+ when :expr_fname then
788
+ self.lex_state = :expr_end
789
+ return :tBACK_REF2
790
+ when :expr_dot then
791
+ self.lex_state = if command_state then
792
+ :expr_cmdarg
793
+ else
794
+ :expr_arg
795
+ end
796
+ return :tBACK_REF2
797
+ end
798
+ self.lex_strterm = s(:strterm, STR_XQUOTE, '`', "\0")
799
+ return :tXSTRING_BEG
800
+ when src.scan(/\'/) then
801
+ self.lex_strterm = s(:strterm, STR_SQUOTE, "\'", "\0")
802
+ self.yacc_value = s("'")
803
+ return :tSTRING_BEG
804
+ when src.scan(/\?/) then
805
+ if lex_state == :expr_end || lex_state == :expr_endarg then
806
+ self.lex_state = :expr_beg
807
+ self.yacc_value = s("?")
808
+ return '?'
809
+ end
810
+
811
+ if src.eos? then
812
+ rb_compile_error "incomplete character syntax"
813
+ end
814
+
815
+ if src.check(/\s|\v/) then
816
+ unless lex_state.is_argument then
817
+ c2 = case src.matched
818
+ when " " then
819
+ 's'
820
+ when "\n" then
821
+ 'n'
822
+ when "\t" then
823
+ 't'
824
+ when "\v" then
825
+ 'v'
826
+ when "\r" then
827
+ 'r'
828
+ when "\f" then
829
+ 'f'
830
+ end
831
+
832
+ if c2 then
833
+ warning("invalid character syntax; use ?\\" + c2)
834
+ end
835
+ end
836
+
837
+ # ternary
838
+ self.lex_state = :expr_beg
839
+ self.yacc_value = s("?")
840
+ return '?'
841
+ # elsif ismbchar(c) then # ternary, also
842
+ # rb_warn "multibyte character literal not supported yet; use ?\\#{c}"
843
+ # support.unread c
844
+ # self.lex_state = :expr_beg
845
+ # return '?'
846
+ elsif src.check(/\w(?=\w)/) then # ternary, also
847
+ self.lex_state = :expr_beg
848
+ self.yacc_value = s("?")
849
+ return '?'
850
+ end
851
+
852
+ c = if src.scan(/\\/) then
853
+ self.read_escape
854
+ else
855
+ src.getch
856
+ end
857
+ c[0] = (c[0].ord & 0xff).chr
858
+ self.lex_state = :expr_end
859
+ self.yacc_value = c[0].ord
860
+ return :tINTEGER
861
+ when src.scan(/\&&=/) then
862
+ self.yacc_value = s("&&")
863
+ self.lex_state = :expr_beg
864
+ return :tOP_ASGN
865
+ when src.scan(/\&&/) then
866
+ self.lex_state = :expr_beg
867
+ self.yacc_value = s("&&")
868
+ return :tANDOP
869
+ when src.scan(/\&\=/) then
870
+ self.yacc_value = s("&")
871
+ self.lex_state = :expr_beg
872
+ return :tOP_ASGN
873
+ when src.scan(/&/) then
874
+ result = if lex_state.is_argument && space_seen && !src.check(/\s/) then
875
+ warning("`&' interpreted as argument prefix")
876
+ :tAMPER
877
+ elsif lex_state == :expr_beg || lex_state == :expr_mid then
878
+ :tAMPER
879
+ else
880
+ :tAMPER2
881
+ end
882
+
883
+ self.fix_arg_lex_state
884
+ self.yacc_value = s("&")
885
+ return result
886
+ when src.scan(/\|\|\=/) then
887
+ self.lex_state = :expr_beg
888
+ self.yacc_value = s("||")
889
+ return :tOP_ASGN
890
+ when src.scan(/\|\|/) then
891
+ self.lex_state = :expr_beg
892
+ self.yacc_value = s("||")
893
+ return :tOROP
894
+ when src.scan(/\|\=/) then
895
+ self.lex_state = :expr_beg
896
+ self.yacc_value = s("|")
897
+ return :tOP_ASGN
898
+ when src.scan(/\|/) then
899
+ self.fix_arg_lex_state
900
+ self.yacc_value = s("|")
901
+ return :tPIPE
902
+ when src.scan(/[+-]/) then
903
+ sign = src.matched
904
+ utype, type = if sign == "+" then
905
+ [:tUPLUS, :tPLUS]
906
+ else
907
+ [:tUMINUS, :tMINUS]
908
+ end
909
+
910
+ if lex_state == :expr_fname || lex_state == :expr_dot then
911
+ self.lex_state = :expr_arg
912
+ if src.scan(/@/) then
913
+ self.yacc_value = s("#{sign}@")
914
+ return utype
915
+ else
916
+ self.yacc_value = s(sign)
917
+ return type
918
+ end
919
+ end
920
+
921
+ if src.scan(/\=/) then
922
+ self.lex_state = :expr_beg
923
+ self.yacc_value = s(sign)
924
+ return :tOP_ASGN
925
+ end
926
+
927
+ if (lex_state == :expr_beg || lex_state == :expr_mid ||
928
+ (lex_state.is_argument && space_seen && !src.check(/\s/))) then
929
+ if lex_state.is_argument then
930
+ arg_ambiguous
931
+ end
932
+
933
+ self.lex_state = :expr_beg
934
+ self.yacc_value = s(sign)
935
+
936
+ if src.check(/\d/) then
937
+ if utype == :tUPLUS then
938
+ return self.parse_number
939
+ else
940
+ return :tUMINUS_NUM
941
+ end
942
+ end
943
+
944
+ return utype
945
+ end
946
+
947
+ self.lex_state = :expr_beg
948
+ self.yacc_value = s(sign)
949
+ return type
950
+ when src.scan(/\.\.\./) then
951
+ self.lex_state = :expr_beg
952
+ self.yacc_value = s("...")
953
+ return :tDOT3
954
+ when src.scan(/\.\./) then
955
+ self.lex_state = :expr_beg
956
+ self.yacc_value = s("..")
957
+ return :tDOT2
958
+ when src.scan(/\.\d/) then
959
+ rb_compile_error "no .<digit> floating literal anymore put 0 before dot"
960
+ when src.scan(/\./) then
961
+ self.lex_state = :expr_dot
962
+ self.yacc_value = s(".")
963
+ return :tDOT
964
+ when src.check(/[0-9]/) then
965
+ return parse_number
966
+ when src.scan(/[\)\]\}]/) then
967
+ cond.lexpop
968
+ cmdarg.lexpop
969
+ self.lex_state = :expr_end
970
+ self.yacc_value = s(src.matched)
971
+ result = {
972
+ ")" => :tRPAREN,
973
+ "]" => :tRBRACK,
974
+ "}" => :tRCURLY
975
+ }[src.matched]
976
+ return result
977
+ when src.scan(/::/) then
978
+ if (lex_state == :expr_beg ||
979
+ lex_state == :expr_mid ||
980
+ lex_state == :expr_class ||
981
+ (lex_state.is_argument && space_seen)) then
982
+ self.lex_state = :expr_beg
983
+ self.yacc_value = s("::")
984
+ return :tCOLON3
985
+ end
986
+
987
+ self.lex_state = :expr_dot
988
+ self.yacc_value = s("::")
989
+ return :tCOLON2
990
+ when src.scan(/\:/) then
991
+ if (lex_state == :expr_end || lex_state == :expr_endarg ||
992
+ src.check(/\s/)) then
993
+ self.lex_state = :expr_beg
994
+ self.yacc_value = s(":")
995
+ return ':'
996
+ end
997
+
998
+ case
999
+ when src.scan(/\'/) then
1000
+ self.lex_strterm = s(:strterm, STR_SSYM, src.matched, "\0")
1001
+ when src.scan(/\"/) then
1002
+ self.lex_strterm = s(:strterm, STR_DSYM, src.matched, "\0")
1003
+ end
1004
+
1005
+ self.lex_state = :expr_fname
1006
+ self.yacc_value = s(":")
1007
+ return :tSYMBEG
1008
+ when src.scan(/\//) then
1009
+ if lex_state == :expr_beg || lex_state == :expr_mid then
1010
+ self.lex_strterm = s(:strterm, STR_REGEXP, '/', "\0")
1011
+ self.yacc_value = s("/")
1012
+ return :tREGEXP_BEG
1013
+ end
1014
+
1015
+ if src.scan(/\=/) then
1016
+ self.yacc_value = s("/")
1017
+ self.lex_state = :expr_beg
1018
+ return :tOP_ASGN
1019
+ end
1020
+
1021
+ if lex_state.is_argument && space_seen then
1022
+ unless src.scan(/\s/) then
1023
+ arg_ambiguous
1024
+ self.lex_strterm = s(:strterm, STR_REGEXP, '/', "\0")
1025
+ self.yacc_value = s("/")
1026
+ return :tREGEXP_BEG
1027
+ end
1028
+ end
1029
+
1030
+ self.fix_arg_lex_state
1031
+
1032
+ self.yacc_value = s("/")
1033
+ return :tDIVIDE
1034
+ when src.scan(/\^=/) then
1035
+ self.lex_state = :expr_beg
1036
+ self.yacc_value = s("^")
1037
+ return :tOP_ASGN
1038
+ when src.scan(/\^/) then
1039
+ self.fix_arg_lex_state
1040
+ self.yacc_value = s("^")
1041
+ return :tCARET
1042
+ when src.scan(/\;/) then
1043
+ self.command_start = true
1044
+ self.lex_state = :expr_beg
1045
+ self.yacc_value = s(";")
1046
+ return src.matched
1047
+ when src.scan(/\,/) then
1048
+ self.lex_state = :expr_beg
1049
+ self.yacc_value = s(",")
1050
+ return src.matched
1051
+ when src.scan(/\~/) then
1052
+ if lex_state == :expr_fname || lex_state == :expr_dot then
1053
+ src.scan(/@/)
1054
+ end
1055
+
1056
+ self.fix_arg_lex_state
1057
+ self.yacc_value = s("~")
1058
+
1059
+ return :tTILDE
1060
+ when src.scan(/\(/) then
1061
+ result = :tLPAREN2
1062
+ self.command_start = true
1063
+ if lex_state == :expr_beg || lex_state == :expr_mid then
1064
+ result = :tLPAREN
1065
+ elsif space_seen then
1066
+ if lex_state == :expr_cmdarg then
1067
+ result = :tLPAREN_ARG
1068
+ elsif lex_state == :expr_arg then
1069
+ warning("don't put space before argument parentheses")
1070
+ result = :tLPAREN2
1071
+ end
1072
+ end
1073
+
1074
+ self.expr_beg_push "("
1075
+
1076
+ return result
1077
+ when src.scan(/\[/) then
1078
+ result = src.matched
1079
+ if lex_state == :expr_fname || lex_state == :expr_dot then
1080
+ self.lex_state = :expr_arg
1081
+ case
1082
+ when src.scan(/\]\=/) then
1083
+ self.yacc_value = s("[]=")
1084
+ return :tASET
1085
+ when src.scan(/\]/) then
1086
+ self.yacc_value = s("[]")
1087
+ return :tAREF
1088
+ else
1089
+ rb_compile_error "unexpected '['"
1090
+ end
1091
+ elsif lex_state == :expr_beg || lex_state == :expr_mid then
1092
+ result = :tLBRACK
1093
+ elsif lex_state.is_argument && space_seen then
1094
+ result = :tLBRACK
1095
+ end
1096
+
1097
+ self.expr_beg_push("[")
1098
+
1099
+ return result
1100
+ when src.scan(/\{/) then
1101
+ result = if lex_state.is_argument || lex_state == :expr_end then
1102
+ :tLCURLY # block (primary)
1103
+ elsif lex_state == :expr_endarg then
1104
+ :tLBRACE_ARG # block (expr)
1105
+ else
1106
+ :tLBRACE # hash
1107
+ end
1108
+
1109
+ self.expr_beg_push("{")
1110
+
1111
+ return result
1112
+ when src.scan(/\\/) then
1113
+ if src.scan(/\n/) then
1114
+ space_seen = true
1115
+ next
1116
+ end
1117
+ rb_compile_error "bare backslash only allowed before newline"
1118
+ when src.scan(/\%/) then
1119
+ if lex_state == :expr_beg || lex_state == :expr_mid then
1120
+ return parse_quote
1121
+ end
1122
+
1123
+ if src.scan(/\=/) then
1124
+ self.lex_state = :expr_beg
1125
+ self.yacc_value = s("%")
1126
+ return :tOP_ASGN
1127
+ end
1128
+
1129
+ if lex_state.is_argument && space_seen && ! src.check(/\s/) then
1130
+ return parse_quote
1131
+ end
1132
+
1133
+ self.fix_arg_lex_state
1134
+ self.yacc_value = s("%")
1135
+
1136
+ return :tPERCENT
1137
+ when src.scan(/(\$_)(\w)/) then
1138
+ self.lex_state = :expr_end
1139
+ token_buffer << src[1]
1140
+ # HACK? c = src[2]
1141
+ # pass through
1142
+ when src.scan(/\$_/) then
1143
+ self.lex_state = :expr_end
1144
+ token_buffer << src.matched
1145
+ self.yacc_value = s(src.matched)
1146
+ return :tGVAR
1147
+ when src.scan(/\$[~*$?!@\/\\;,.=:<>\"]|\$-\w?/) then
1148
+ self.lex_state = :expr_end
1149
+ token_buffer << src.matched
1150
+ self.yacc_value = s(token_buffer.join)
1151
+ return :tGVAR
1152
+ when src.scan(/\$([\&\`\'\+])/) then
1153
+ self.lex_state = :expr_end
1154
+ # Explicit reference to these vars as symbols...
1155
+ if last_state == :expr_fname then
1156
+ token_buffer << src.matched
1157
+ self.yacc_value = s(token_buffer.join)
1158
+ return :tGVAR
1159
+ else
1160
+ self.yacc_value = s(:back_ref, src[1].to_sym)
1161
+ return :tBACK_REF
1162
+ end
1163
+ when src.scan(/\$[1-9]\d*/) then
1164
+ self.lex_state = :expr_end
1165
+ token_buffer.push(*src.matched.split(//))
1166
+ if last_state == :expr_fname then
1167
+ self.yacc_value = s(token_buffer.join)
1168
+ return :tGVAR
1169
+ else
1170
+ self.yacc_value = s(:nth_ref, token_buffer.join[1..-1].to_i)
1171
+ return :tNTH_REF
1172
+ end
1173
+ when src.scan(/\$0/) then
1174
+ self.lex_state = :expr_end
1175
+ token_buffer << '$' # why just this?!?
1176
+ # pass through
1177
+ when src.scan(/\$\W|\$\z/) then # TODO: remove?
1178
+ self.lex_state = :expr_end
1179
+ self.yacc_value = s("$")
1180
+ return '$'
1181
+ when src.scan(/\$/)
1182
+ self.lex_state = :expr_end
1183
+ token_buffer << src.matched
1184
+ src.getch
1185
+ # pass through
1186
+ when src.scan(/\@/) then
1187
+ token_buffer << '@'
1188
+
1189
+ if src.scan(/(@)?\d/) then
1190
+ if src[1] then
1191
+ rb_compile_error "`@@#{c}` is not allowed as a class variable name"
1192
+ else
1193
+ rb_compile_error "`@#{c}' is not allowed as an instance variable name"
1194
+ end
1195
+ end
1196
+
1197
+ if src.scan(/@/) then
1198
+ token_buffer << src.matched
1199
+ end
1200
+
1201
+ unless src.scan(/\w/) then
1202
+ self.yacc_value = s("@")
1203
+ return '@'
1204
+ end
1205
+ when src.scan(/\_/) then
1206
+ if src.was_begin_of_line && src.scan(/_END__(\n|\Z)/) then
1207
+ return RubyLexer::EOF
1208
+ end
1209
+ else
1210
+ c = src.getch # FIX: I really hate this
1211
+ if c =~ /\W/ then
1212
+ rb_compile_error "Invalid char '#{c.inspect}' in expression"
1213
+ end
1214
+ end
1215
+
1216
+ src.pos -= 1 # HACK
1217
+ if src.scan(/\w+/) then
1218
+ token_buffer.push(*src.matched.split(//)) # TODO: that split is tarded.
1219
+ end
1220
+
1221
+ if token_buffer[0] =~ /\w/ && src.scan(/[\!\?](?!=)/) then
1222
+ token_buffer << src.matched
1223
+ end
1224
+
1225
+ result = nil
1226
+ last_state = lex_state
1227
+
1228
+ case token_buffer[0]
1229
+ when /^\$/ then
1230
+ self.lex_state = :expr_end
1231
+ result = :tGVAR
1232
+ when '@' then
1233
+ self.lex_state = :expr_end
1234
+ if token_buffer[1] == '@' then
1235
+ result = :tCVAR
1236
+ else
1237
+ result = :tIVAR
1238
+ end
1239
+ else
1240
+ if token_buffer[-1] =~ /[!?]/ then
1241
+ result = :tFID
1242
+ else
1243
+ if lex_state == :expr_fname then
1244
+ # ident=, not =~ => == or followed by =>
1245
+ if src.scan(/=(?:(?![~>=])|(?==>))/) then
1246
+ result = :tIDENTIFIER
1247
+ token_buffer << src.matched
1248
+ end
1249
+ end
1250
+
1251
+ if result.nil? && token_buffer[0] =~ /[A-Z]/ then
1252
+ result = :tCONSTANT
1253
+ else
1254
+ result = :tIDENTIFIER
1255
+ end
1256
+ end
1257
+
1258
+ unless lex_state == :expr_dot then
1259
+ # See if it is a reserved word.
1260
+ keyword = Keyword.keyword(token_buffer.join, token_buffer.length)
1261
+
1262
+ unless keyword.nil? then
1263
+ state = lex_state
1264
+ self.lex_state = keyword.state
1265
+
1266
+ self.yacc_value = if state == :expr_fname then
1267
+ s(keyword.name)
1268
+ else
1269
+ s(token_buffer.join)
1270
+ end
1271
+
1272
+ if keyword.id0 == :kDO then
1273
+ self.command_start = true
1274
+ if cond.is_in_state then
1275
+ return :kDO_COND
1276
+ end
1277
+ if cmdarg.is_in_state && state != :expr_cmdarg then
1278
+ return :kDO_BLOCK
1279
+ end
1280
+ if state == :expr_endarg then
1281
+ return :kDO_BLOCK
1282
+ end
1283
+ return :kDO
1284
+ end
1285
+
1286
+ if state == :expr_beg then
1287
+ return keyword.id0
1288
+ end
1289
+
1290
+ if keyword.id0 != keyword.id1 then
1291
+ self.lex_state = :expr_beg
1292
+ end
1293
+
1294
+ return keyword.id1
1295
+ end
1296
+ end # lex_state == :expr_dot
1297
+
1298
+ if (lex_state == :expr_beg ||
1299
+ lex_state == :expr_mid ||
1300
+ lex_state == :expr_dot ||
1301
+ lex_state == :expr_arg ||
1302
+ lex_state == :expr_cmdarg) then
1303
+ if command_state then
1304
+ self.lex_state = :expr_cmdarg
1305
+ else
1306
+ self.lex_state = :expr_arg
1307
+ end
1308
+ else
1309
+ self.lex_state = :expr_end
1310
+ end
1311
+ end
1312
+
1313
+ # Lame: parsing logic made it into lexer in ruby...So we
1314
+ # are emulating
1315
+ # FIXME: I believe this is much simpler now...
1316
+ # HACK
1317
+ # if (IdUtil.var_type(temp_val) == IdUtil.LOCAL_VAR &&
1318
+ # last_state != :expr_dot &&
1319
+ # (BlockStaticScope === scope && (scope.is_defined(temp_val) >= 0)) ||
1320
+ # (scope.local_scope.is_defined(temp_val) >= 0)) then
1321
+ # self.lex_state = :expr_end
1322
+ # end
1323
+
1324
+ self.yacc_value = s(token_buffer.join)
1325
+
1326
+ return result
1327
+ end
1328
+ end
1329
+ end