rubylexer 0.7.6 → 0.7.7
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +4 -0
- data/History.txt +54 -7
- data/Makefile +43 -0
- data/lib/.document +3 -0
- data/lib/rubylexer.rb +286 -154
- data/lib/rubylexer/.document +9 -0
- data/lib/rubylexer/charhandler.rb +25 -19
- data/lib/rubylexer/context.rb +17 -4
- data/lib/rubylexer/rubycode.rb +1 -1
- data/lib/rubylexer/rulexer.rb +120 -95
- data/lib/rubylexer/symboltable.rb +22 -1
- data/lib/rubylexer/test/oneliners.rb +20 -0
- data/lib/rubylexer/test/oneliners_1.9.rb +146 -0
- data/lib/rubylexer/test/testcases.rb +6 -2
- data/lib/rubylexer/token.rb +22 -6
- data/lib/rubylexer/tokenprinter.rb +6 -6
- data/lib/rubylexer/version.rb +1 -1
- data/rubylexer.gemspec +40 -0
- data/test/code/coloruby.rb +154 -0
- data/test/code/dumptokens.rb +10 -5
- data/test/code/regression.rb +31 -17
- data/test/code/rubylexervsruby.rb +1 -1
- data/test/code/test_1.9.rb +31 -0
- data/test/code/tokentest.rb +6 -6
- data/test/data/{hdr_dos2.rb → hdr_dos2.rb.broken} +0 -0
- data/test/data/{heremonsters.rb.broken → heremonsters_broken.rb} +0 -0
- data/test/data/{heremonsters_dos.rb.broken → heremonsters_dos_broken.rb} +0 -0
- data/test/test_all.rb +2 -0
- metadata +94 -98
- data/Rakefile +0 -37
data/.document
ADDED
data/History.txt
CHANGED
@@ -1,4 +1,51 @@
|
|
1
|
-
=== 0.7.
|
1
|
+
=== 0.7.7/21dec2009
|
2
|
+
* 5 Major Enhancements:
|
3
|
+
* Got rid of the hacky RuLexer ancestor class. woo-hoo!
|
4
|
+
* Input charsets other than 7bit ascii now supported
|
5
|
+
* binary (8bit ascii), utf8, and euc now work; sjis does not
|
6
|
+
* __FILE__ and __LINE__ now have their correct values attached to them
|
7
|
+
* Build scripts completely rewritten; hoe is now gone!
|
8
|
+
* Improved ruby 1.9 compatibility (see below)
|
9
|
+
|
10
|
+
* 4 Major Bugfixes:
|
11
|
+
* Module names may begin with arbitrary expressions
|
12
|
+
* but such expressions are always ended by ::
|
13
|
+
* even if inside a implicit parens context
|
14
|
+
* and positions of whitespace tokens in module header are better tracked
|
15
|
+
* Finally learned to disable rdoc for files it dislikes (thanks, Roger!)
|
16
|
+
* Rescue in rhs context is always ternary now
|
17
|
+
* (this is incorrect if in a multiassign, but redparse will handle that)
|
18
|
+
* Parsing of do, comma, and unary star on assign rhs is better now
|
19
|
+
|
20
|
+
* 2 Minor Enhancements:
|
21
|
+
* Colorizer example
|
22
|
+
* Changes to token classes to incorporate modifications made by redparse
|
23
|
+
|
24
|
+
* 7 Minor Bugfixes:
|
25
|
+
* Newline after = is now soft
|
26
|
+
* Fixed type of local var if inside class/method inside def
|
27
|
+
* Fixed parsing of shebang line if no args
|
28
|
+
* Fixed incorrect offsets in a few obscure cases
|
29
|
+
* Don't treat \r as newline in =begin..=end
|
30
|
+
* Cleaned up test data
|
31
|
+
* Fixed mistypings of local vars in string inclusions
|
32
|
+
|
33
|
+
* Improved 1.9 compatibility:
|
34
|
+
* code works under 1.9 interpreter
|
35
|
+
* stabby blocks
|
36
|
+
* __ENCODING__ keyword
|
37
|
+
* tolerate ternary : at beginning of line
|
38
|
+
* character constants are string, not integer, literals
|
39
|
+
* new \u escape sequence allowed in double-quotish strings
|
40
|
+
* allow nested () in def param list
|
41
|
+
* not is a funclike keyword
|
42
|
+
* parens allowed as method name; alias for #call
|
43
|
+
* block private locals declared after ; inside block param
|
44
|
+
* !, !=, and !~ are methods in 1.9
|
45
|
+
* local variables declared by named backreferences in regexps
|
46
|
+
* tests for many 1.9 features
|
47
|
+
|
48
|
+
=== 0.7.6/1jul2009
|
2
49
|
* 5 Bugfixes:
|
3
50
|
* don't treat <, <=, <=> as starting variables (only << for here header)
|
4
51
|
* space between break/return/next and following open paren is ignored
|
@@ -11,11 +58,11 @@
|
|
11
58
|
* dot at beginning of line
|
12
59
|
* !, !=, !~ are now valid method/symbol names
|
13
60
|
|
14
|
-
=== 0.7.5/
|
61
|
+
=== 0.7.5/23may2009
|
15
62
|
* 1 Bugfix:
|
16
63
|
* fixed problem with parsing shebang lines
|
17
64
|
|
18
|
-
=== 0.7.4/
|
65
|
+
=== 0.7.4/20may2009
|
19
66
|
* 2 Major Enhancements:
|
20
67
|
* preliminary support for ruby 1.9
|
21
68
|
* utf8 inputs should now work... more or less
|
@@ -31,7 +78,7 @@
|
|
31
78
|
* added tag field to Token; I hope many flags can be coalesced into tag.
|
32
79
|
* note line that all strings (and here docs) start and end on
|
33
80
|
|
34
|
-
=== 0.7.3/
|
81
|
+
=== 0.7.3/19apr2009
|
35
82
|
* 9 Bugfixes:
|
36
83
|
* remember whether comma was seen in paren context
|
37
84
|
* reducing the warning load
|
@@ -56,7 +103,7 @@
|
|
56
103
|
* various other little helper methods needed by redparse in Tokens
|
57
104
|
* hack Rakefile so 'rake test' will stay in 1 process (keeps netbeans happy)
|
58
105
|
|
59
|
-
=== 0.7.2/
|
106
|
+
=== 0.7.2/12oct2008
|
60
107
|
* 12 Minor Enhancements:
|
61
108
|
* a new context for then kw expected
|
62
109
|
* disable all backtracking when scanning string interiors
|
@@ -71,7 +118,7 @@
|
|
71
118
|
* trying to make 'rake test' work right
|
72
119
|
* certain other changes of no importance whatsoever
|
73
120
|
|
74
|
-
=== 0.7.1/
|
121
|
+
=== 0.7.1/28aug2008
|
75
122
|
* 6 Major Enhancements:
|
76
123
|
* handling of empty string fragments now more closely mirrors ruby; this resolves many warnings
|
77
124
|
* yet more hacks in aid of string inclusions
|
@@ -161,7 +208,7 @@
|
|
161
208
|
* offset problems in here head and body, symbol and fal tokens are always ignored (a hack)
|
162
209
|
* tokentest has a --loop option, for load testing
|
163
210
|
|
164
|
-
=== 0.7.0/
|
211
|
+
=== 0.7.0/15feb2008
|
165
212
|
* implicit tokens are now emitted at the right times (need more test code)
|
166
213
|
* local variables are now temporarily hidden by class, module, and def
|
167
214
|
* line numbers should always be correct now (=begin...=end caused this) (??)
|
data/Makefile
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
name=RubyLexer
|
2
|
+
lname=rubylexer
|
3
|
+
gemname=rubylexer
|
4
|
+
|
5
|
+
#everything after this line is generic
|
6
|
+
|
7
|
+
version=$(shell ruby -r ./lib/$(lname)/version.rb -e "puts $(name)::VERSION")
|
8
|
+
filelist=$(shell git ls-files)
|
9
|
+
|
10
|
+
.PHONY: all test docs gem tar pkg email
|
11
|
+
all: test
|
12
|
+
|
13
|
+
test:
|
14
|
+
ruby -Ilib test/test_all.rb
|
15
|
+
|
16
|
+
docs:
|
17
|
+
rdoc lib/*
|
18
|
+
|
19
|
+
pkg: gem tar
|
20
|
+
|
21
|
+
gem:
|
22
|
+
gem build $(lname).gemspec
|
23
|
+
|
24
|
+
tar:
|
25
|
+
tar cf - $(filelist) | ( mkdir $(gemname)-$(version); cd $(gemname)-$(version); tar xf - )
|
26
|
+
tar czf $(gemname)-$(version).tar.gz $(gemname)-$(version)
|
27
|
+
rm -rf $(gemname)-$(version)
|
28
|
+
|
29
|
+
email: README.txt History.txt
|
30
|
+
ruby -e ' \
|
31
|
+
require "rubygems"; \
|
32
|
+
load "./$(lname).gemspec"; \
|
33
|
+
spec= Gem::Specification.list.find{|x| x.name=="$(gemname)"}; \
|
34
|
+
puts "\
|
35
|
+
Subject: [ANN] $(name) #{spec.version} Released \
|
36
|
+
\n\n$(name) version #{spec.version} has been released! \n\n\
|
37
|
+
#{Array(spec.homepage).map{|url| " * #{url}\n" }} \
|
38
|
+
\n\
|
39
|
+
#{$(name)::Description} \
|
40
|
+
\n\nChanges:\n\n \
|
41
|
+
#{$(name)::Latest_changes} \
|
42
|
+
"\
|
43
|
+
'
|
data/lib/.document
ADDED
data/lib/rubylexer.rb
CHANGED
@@ -109,9 +109,6 @@ class RubyLexer
|
|
109
109
|
#?\r => :newline, #implicitly escaped after op
|
110
110
|
|
111
111
|
?\\ => :escnewline,
|
112
|
-
?\x00 => :eof,
|
113
|
-
?\x04 => :eof,
|
114
|
-
?\x1a => :eof,
|
115
112
|
|
116
113
|
"[({" => :open_brace,
|
117
114
|
"])}" => :close_brace,
|
@@ -119,7 +116,15 @@ class RubyLexer
|
|
119
116
|
|
120
117
|
?# => :comment,
|
121
118
|
|
122
|
-
|
119
|
+
?\x00 => :eof,
|
120
|
+
?\x04 => :eof,
|
121
|
+
?\x1a => :eof,
|
122
|
+
|
123
|
+
?\x01..?\x03 => :illegal_char,
|
124
|
+
?\x05..?\x08 => :illegal_char,
|
125
|
+
?\x0E..?\x19 => :illegal_char,
|
126
|
+
?\x1b..?\x1F => :illegal_char,
|
127
|
+
?\x7F => :illegal_char,
|
123
128
|
}
|
124
129
|
|
125
130
|
attr_reader :incomplete_here_tokens, :parsestack, :last_token_maybe_implicit
|
@@ -138,10 +143,14 @@ class RubyLexer
|
|
138
143
|
def #{n}; #{n}; end
|
139
144
|
def self.#{n}; @@#{n}; end
|
140
145
|
"
|
141
|
-
}.
|
146
|
+
}.join
|
142
147
|
|
143
148
|
NEVERSTARTPARAMLISTWORDS=/\A(#{OPORBEGINWORDS}|#{INNERBOUNDINGWORDS}|#{BINOPWORDS}|end)((?:(?!#@@LETTER_DIGIT).)|\Z)/om
|
144
|
-
|
149
|
+
if ?A.is_a? String #ruby >= 1.9
|
150
|
+
NEVERSTARTPARAMLISTFIRST=/[aoeitrwu]/
|
151
|
+
else
|
152
|
+
NEVERSTARTPARAMLISTFIRST=CharSet['aoeitrwu'] #chars that begin NEVERSTARTPARAMLIST
|
153
|
+
end
|
145
154
|
NEVERSTARTPARAMLISTMAXLEN=7 #max len of a NEVERSTARTPARAMLIST
|
146
155
|
|
147
156
|
=begin
|
@@ -149,13 +158,13 @@ class RubyLexer
|
|
149
158
|
utf8=String::PATTERN_UTF8 #or euc, or sjis...
|
150
159
|
LCLETTER_U="(?>[a-z_]|#{utf8})"
|
151
160
|
LETTER_U="(?>[A-Za-z_]|#{utf8})"
|
152
|
-
|
161
|
+
LETTER_DIGIT_U="(?>[A-Za-z_0-9]|#{utf8})"
|
153
162
|
=end
|
154
163
|
|
155
164
|
#-----------------------------------
|
156
|
-
def initialize(filename,file,linenum=1,offset_adjust=0,options={
|
165
|
+
def initialize(filename,file,linenum=1,offset_adjust=0,options={})
|
157
166
|
@offset_adjust=0 #set again in next line
|
158
|
-
|
167
|
+
rulexer_initialize(filename,file, linenum,offset_adjust)
|
159
168
|
@start_linenum=linenum
|
160
169
|
@parsestack=[TopLevelContext.new]
|
161
170
|
@incomplete_here_tokens=[] #not used anymore
|
@@ -168,16 +177,17 @@ class RubyLexer
|
|
168
177
|
@enable_macro=nil
|
169
178
|
@base_file=nil
|
170
179
|
@progress_thread=nil
|
171
|
-
@rubyversion=options[:rubyversion]
|
180
|
+
@rubyversion=options[:rubyversion]||1.8
|
172
181
|
@encoding=options[:encoding]||:detect
|
173
182
|
@method_operators=if @rubyversion>=1.9
|
174
|
-
/#{RUBYSYMOPERATORREX}|\A![
|
183
|
+
/#{RUBYSYMOPERATORREX}|\A![=~@]?/o
|
175
184
|
else
|
176
185
|
RUBYSYMOPERATORREX
|
177
186
|
end
|
178
187
|
|
179
|
-
@toptable=CharHandler.new(self, :
|
188
|
+
@toptable=CharHandler.new(self, :identifier, CHARMAPPINGS)
|
180
189
|
|
190
|
+
extend RubyLexer1_9 if @rubyversion>=1.9
|
181
191
|
read_leading_encoding
|
182
192
|
start_of_line_directives
|
183
193
|
progress_printer
|
@@ -203,11 +213,11 @@ class RubyLexer
|
|
203
213
|
def read_leading_encoding
|
204
214
|
return unless @encoding==:detect
|
205
215
|
@encoding=:ascii
|
206
|
-
@encoding=:utf8 if @file.skip(
|
216
|
+
@encoding=:utf8 if @file.skip( "\xEF\xBB\xBF" ) #bom
|
207
217
|
if @file.skip( /\A#!/ )
|
208
218
|
loop do
|
209
219
|
til_charset( /[\s\v]/ )
|
210
|
-
break if @file.match(
|
220
|
+
break if @file.match( /^\n|[\s\v]([^-\s\v]|--?[\s\v])/,4 )
|
211
221
|
if @file.skip( /.-K(.)/ )
|
212
222
|
case $1
|
213
223
|
when 'u'; @encoding=:utf8
|
@@ -243,8 +253,9 @@ class RubyLexer
|
|
243
253
|
@localvars_stack.last
|
244
254
|
end
|
245
255
|
|
256
|
+
attr_accessor :localvars_stack
|
257
|
+
|
246
258
|
attr_accessor :in_def
|
247
|
-
attr :localvars_stack
|
248
259
|
attr :offset_adjust
|
249
260
|
attr_writer :pending_here_bodies
|
250
261
|
attr :rubyversion
|
@@ -256,7 +267,7 @@ class RubyLexer
|
|
256
267
|
|
257
268
|
#-----------------------------------
|
258
269
|
def get1token
|
259
|
-
result=
|
270
|
+
result=rulexer_get1token #most of the action's here
|
260
271
|
|
261
272
|
if ENV['PROGRESS']
|
262
273
|
@last_cp_pos||=0
|
@@ -300,12 +311,12 @@ class RubyLexer
|
|
300
311
|
|
301
312
|
#-----------------------------------
|
302
313
|
def eof?
|
303
|
-
|
314
|
+
rulexer_eof? or EoiToken===@last_operative_token
|
304
315
|
end
|
305
316
|
|
306
317
|
#-----------------------------------
|
307
318
|
def input_position
|
308
|
-
|
319
|
+
rulexer_input_position+@offset_adjust
|
309
320
|
end
|
310
321
|
|
311
322
|
#-----------------------------------
|
@@ -351,6 +362,7 @@ private
|
|
351
362
|
return true if (defined? @in_def) and @in_def
|
352
363
|
@parsestack.reverse_each{|ctx|
|
353
364
|
ctx.starter=='def' and ctx.state!=:saw_def and return true
|
365
|
+
ctx.starter=='class' || ctx.starter=='module' and return false
|
354
366
|
}
|
355
367
|
return false
|
356
368
|
end
|
@@ -389,7 +401,7 @@ private
|
|
389
401
|
unless @moretokens.empty?
|
390
402
|
case @moretokens.first
|
391
403
|
when StillIgnoreToken
|
392
|
-
when NewlineToken
|
404
|
+
when NewlineToken; allow_eol or break
|
393
405
|
else break
|
394
406
|
end
|
395
407
|
else
|
@@ -467,12 +479,9 @@ private
|
|
467
479
|
if @parsestack.last.wantarrow and @rubyversion>=1.9 and @file.skip ":"
|
468
480
|
@moretokens.push SymbolToken.new(str,oldpos), KeywordToken.new("=>",input_position-1)
|
469
481
|
else
|
470
|
-
@moretokens.unshift(*parse_keywords(str,oldpos) do |tok|
|
471
|
-
#
|
472
|
-
|
473
|
-
when FUNCLIKE_KEYWORDS; except=tok
|
474
|
-
when VARLIKE_KEYWORDS,RUBYKEYWORDS; raise "shouldnt see keywords here, now"
|
475
|
-
end
|
482
|
+
@moretokens.unshift(*parse_keywords(str,oldpos) do |tok,except|
|
483
|
+
#most callers of this block pass nothing(==nil) for except. only _keyword_funclike passes a true val
|
484
|
+
|
476
485
|
was_last=@last_operative_token
|
477
486
|
@last_operative_token=tok if tok
|
478
487
|
normally=safe_recurse { |a| var_or_meth_name(str,was_last,oldpos,after_nonid_op?{true}) }
|
@@ -547,6 +556,7 @@ private
|
|
547
556
|
#@defining_lvar is a hack
|
548
557
|
@defining_lvar or case ctx=@parsestack.last
|
549
558
|
#when ForSMContext; ctx.state==:for
|
559
|
+
when UnparenedParamListLhsContext; /^(->|,|;)$/===lasttok.ident
|
550
560
|
when RescueSMContext
|
551
561
|
lasttok.ident=="=>" and @file.match?( /\A[\s\v]*([:;#\n]|then(?!#@@LETTER_DIGIT))/om )
|
552
562
|
#when BlockParamListLhsContext; true
|
@@ -567,6 +577,7 @@ private
|
|
567
577
|
#whitespace before but not after the 'operator' indicates it is to be considered a
|
568
578
|
#value token instead. otherwise it is a binary operator. (unary (prefix) ops count
|
569
579
|
#as 'values' here.)
|
580
|
+
#this is by far the ugliest method in RubyLexer.
|
570
581
|
def var_or_meth_name(name,lasttok,pos,was_after_nonid_op)
|
571
582
|
#look for call site if not a keyword or keyword is function-like
|
572
583
|
#look for and ignore local variable names
|
@@ -579,7 +590,7 @@ private
|
|
579
590
|
when /(?!#@@LETTER_DIGIT).$/o #do nothing
|
580
591
|
when /^#@@LCLETTER/o
|
581
592
|
(localvars===name or
|
582
|
-
VARLIKE_KEYWORDS===name or
|
593
|
+
#VARLIKE_KEYWORDS===name or
|
583
594
|
was_in_lvar_define_state
|
584
595
|
) and not lasttok===/^(\.|::)$/
|
585
596
|
when /^#@@UCLETTER/o
|
@@ -617,8 +628,9 @@ private
|
|
617
628
|
#if next op is assignment (or comma in lvalue list)
|
618
629
|
#then omit implicit parens
|
619
630
|
assignment_coming=case nc=nextchar
|
620
|
-
when ?=; not /^=[>=~]$/===readahead(2)
|
631
|
+
when ?=; not( /^=[>=~]$/===readahead(2) )
|
621
632
|
when ?,; comma_in_lvalue_list?
|
633
|
+
when (?; if @rubyversion>=1.9); ParenedParamListLhsContext===@parsestack.last
|
622
634
|
when ?); last_context_not_implicit.lhs
|
623
635
|
when ?i; /^in(?!#@@LETTER_DIGIT)/o===readahead(3) and
|
624
636
|
ForSMContext===last_context_not_implicit
|
@@ -645,7 +657,7 @@ private
|
|
645
657
|
IMPLICIT_PARENS_BEFORE_ACCESSOR_ASSIGNMENT
|
646
658
|
else
|
647
659
|
case nc
|
648
|
-
when nil
|
660
|
+
when nil; 2
|
649
661
|
when ?!; /^![=~]$/===readahead(2) ? 2 : 1
|
650
662
|
when ?d;
|
651
663
|
if /^do((?!#@@LETTER_DIGIT)|$)/o===readahead(3)
|
@@ -761,7 +773,7 @@ private
|
|
761
773
|
!(KeywordToken===lasttok and /^(\.|::)$/===lasttok.ident)
|
762
774
|
#only 1 param in list
|
763
775
|
result.unshift ImplicitParamListStartToken.new(oldpos)
|
764
|
-
@parsestack.push
|
776
|
+
@parsestack.push KWParamListContextNoParen.new(@linenum)
|
765
777
|
else
|
766
778
|
arr,pass=*param_list_coming_with_2_or_more_params?
|
767
779
|
result.push( *arr )
|
@@ -846,14 +858,14 @@ private
|
|
846
858
|
result=[]
|
847
859
|
ctx=@parsestack.last
|
848
860
|
while klass=CONTEXT2ENDTOK_FOR_RESCUE[ctx.class]
|
849
|
-
break if AssignmentRhsContext===ctx && !ctx.multi_assign?
|
850
|
-
if ParamListContextNoParen===ctx && AssignmentRhsContext===@parsestack[-2]
|
851
|
-
result.push ImplicitParamListEndToken.new(input_position-str.length),
|
852
|
-
AssignmentRhsListEndToken.new(input_position-str.length)
|
853
|
-
@parsestack.pop
|
854
|
-
@parsestack.pop
|
855
|
-
break
|
856
|
-
end
|
861
|
+
# break if AssignmentRhsContext===ctx && !ctx.multi_assign?
|
862
|
+
# if ParamListContextNoParen===ctx && AssignmentRhsContext===@parsestack[-2]
|
863
|
+
# result.push ImplicitParamListEndToken.new(input_position-str.length),
|
864
|
+
# AssignmentRhsListEndToken.new(input_position-str.length)
|
865
|
+
# @parsestack.pop
|
866
|
+
# @parsestack.pop
|
867
|
+
# break
|
868
|
+
# end
|
857
869
|
result << klass.new(input_position-str.length) #unless AssignmentRhsContext===ctx and !ctx.multi_assign?
|
858
870
|
break if RescueSMContext===ctx #why is this here?
|
859
871
|
@parsestack.pop
|
@@ -866,6 +878,7 @@ private
|
|
866
878
|
CONTEXT2ENDTOK_FOR_DO={
|
867
879
|
AssignmentRhsContext=>AssignmentRhsListEndToken,
|
868
880
|
ParamListContextNoParen=>ImplicitParamListEndToken,
|
881
|
+
UnparenedParamListLhsContext=>KwParamListEndToken,
|
869
882
|
ExpectDoOrNlContext=>1,
|
870
883
|
#WhenParamListContext=>KwParamListEndToken,
|
871
884
|
#RescueSMContext=>KwParamListEndToken
|
@@ -874,6 +887,17 @@ private
|
|
874
887
|
#assert @moretokens.empty?
|
875
888
|
result=[]
|
876
889
|
while klass=CONTEXT2ENDTOK_FOR_DO[@parsestack.last.class]
|
890
|
+
if klass==AssignmentRhsListEndToken
|
891
|
+
i=@parsestack.size
|
892
|
+
end_the_assign=false
|
893
|
+
while AssignmentRhsContext===@parsestack[i-=1]
|
894
|
+
if CONTEXT2ENDTOK_FOR_DO[@parsestack[i-1].class] and
|
895
|
+
@parsestack[i-1].class!=AssignmentRhsContext
|
896
|
+
break end_the_assign=true
|
897
|
+
end
|
898
|
+
end
|
899
|
+
break unless end_the_assign
|
900
|
+
end
|
877
901
|
break if klass==1
|
878
902
|
result << klass.new(input_position-str.length)
|
879
903
|
@parsestack.pop
|
@@ -917,19 +941,27 @@ private
|
|
917
941
|
|
918
942
|
#-----------------------------------
|
919
943
|
@@SPACES=/[\ \t\v\f\v]/
|
920
|
-
@@WSTOK
|
921
|
-
|
922
|
-
|
923
|
-
|
944
|
+
@@WSTOK=/(?>
|
945
|
+
(?>\r?)\n|
|
946
|
+
(?>\r*)(?>#@@SPACES+)(?>(?:#@@SPACES|\r(?!\n))*)|
|
947
|
+
\#(?>[^\n]*)\n|
|
948
|
+
\\(?>\r?)\n|
|
949
|
+
^=begin(?>(?>#@@SPACES.*)?)\n
|
950
|
+
(?>(?:(?!=end)(?>.*)\n))*
|
951
|
+
=end(?>(?>#@@SPACES.*)?)\n
|
952
|
+
)/x
|
953
|
+
@@WSTOKS=/(?!=begin)(?>#@@WSTOK+)/o
|
954
|
+
def divide_ws(ws0,offset)
|
924
955
|
result=[]
|
925
|
-
|
956
|
+
ws0.scan(/\G#@@WSTOK/o){|ws|
|
926
957
|
incr= $~.begin(0)
|
927
|
-
|
928
|
-
when /\A[\#=]/;
|
929
|
-
when /\n\Z/; EscNlToken
|
930
|
-
else WsToken
|
958
|
+
tok=case ws
|
959
|
+
when /\A[\#=]/; IgnoreToken.new(ws,offset+incr)
|
960
|
+
when /\n\Z/; EscNlToken.new(ws,offset+incr,@filename,@linenum)
|
961
|
+
else WsToken.new(ws,offset+incr)
|
931
962
|
end
|
932
|
-
result <<
|
963
|
+
result << tok
|
964
|
+
@linenum+=ws.count "\n"
|
933
965
|
}
|
934
966
|
result.each_with_index{|ws,i|
|
935
967
|
if WsToken===ws
|
@@ -939,7 +971,22 @@ private
|
|
939
971
|
return result
|
940
972
|
end
|
941
973
|
|
942
|
-
|
974
|
+
#-----------------------------------
|
975
|
+
#lex tokens until a predefined end token is found.
|
976
|
+
#returns a list of tokens seen.
|
977
|
+
def read_arbitrary_expression(&endcondition)
|
978
|
+
result=[]
|
979
|
+
oldsize=@parsestack.size
|
980
|
+
safe_recurse{
|
981
|
+
tok=nil
|
982
|
+
until endcondition[tok,@parsestack[oldsize+1..-1]||[]] and @parsestack.size==oldsize
|
983
|
+
tok=get1token
|
984
|
+
result<<tok
|
985
|
+
EoiToken===tok and break lexerror( tok, "unexpected eof" )
|
986
|
+
end
|
987
|
+
}
|
988
|
+
result
|
989
|
+
end
|
943
990
|
|
944
991
|
#-----------------------------------
|
945
992
|
#parse keywords now, to prevent confusion over bare symbols
|
@@ -950,7 +997,7 @@ private
|
|
950
997
|
assert !(KeywordToken===@last_operative_token and /A(\.|::|def)\Z/===@last_operative_token.ident)
|
951
998
|
result=[KeywordToken.new(str,offset)]
|
952
999
|
|
953
|
-
m
|
1000
|
+
m=:"keyword_#{str}"
|
954
1001
|
respond_to?(m) ? (send m,str,offset,result,&block) : block[MethNameToken.new(str)]
|
955
1002
|
end
|
956
1003
|
public #these have to be public so respond_to? can see them (sigh)
|
@@ -977,29 +1024,39 @@ private
|
|
977
1024
|
def keyword_module(str,offset,result)
|
978
1025
|
result.first.has_end!
|
979
1026
|
@parsestack.push WantsEndContext.new(str,@linenum)
|
980
|
-
@localvars_stack.push SymbolTable.new
|
981
1027
|
offset=input_position
|
982
|
-
@
|
983
|
-
|
984
|
-
|
985
|
-
fail if all.empty?
|
986
|
-
@moretokens.concat divide_ws(ws,offset) if ws
|
987
|
-
@moretokens.push KeywordToken.new('::',offset+md.end(0)-2) if dc
|
988
|
-
loop do
|
989
|
-
offset=input_position
|
990
|
-
@file.scan(/\A(#@@WSTOKS)?(#@@UCLETTER#@@LETTER_DIGIT*)(::)?/o)
|
1028
|
+
assert @moretokens.empty?
|
1029
|
+
tokens=[]
|
1030
|
+
if @file.scan(/\A(#@@WSTOKS)?(#@@UCLETTER#@@LETTER_DIGIT*)(?=[#{WHSP}]+(?:[^(])|[#;\n]|::)/o)
|
991
1031
|
md=@file.last_match
|
992
|
-
all,ws,name
|
993
|
-
if ws
|
994
|
-
|
995
|
-
|
1032
|
+
all,ws,name=*md
|
1033
|
+
tokens.concat divide_ws(ws,md.begin(1)) if ws
|
1034
|
+
tokens.push VarNameToken.new(name,md.begin(2))
|
1035
|
+
end
|
1036
|
+
tokens.push( *read_arbitrary_expression{|tok,extra_contexts|
|
1037
|
+
#@file.check /\A(\n|;|::|end(?!#@@LETTER_DIGIT)|(#@@UCLETTER#@@LETTER_DIGIT*)(?!(#@@WSTOKS)?::))/o
|
1038
|
+
@file.check( /\A(\n|;|end(?!#@@LETTER_DIGIT))/o ) or
|
1039
|
+
@file.check("::") && extra_contexts.all?{|ctx| ImplicitParamListContext===ctx } &&
|
1040
|
+
@moretokens.push(*abort_noparens!)
|
1041
|
+
} ) if !name #or @file.check /#@@WSTOKS?::/o
|
1042
|
+
@moretokens[0,0]=tokens
|
1043
|
+
@localvars_stack.push SymbolTable.new
|
1044
|
+
while @file.check( /\A::/ )
|
1045
|
+
#VarNameToken===@moretokens.last or
|
1046
|
+
#KeywordToken===@moretokens.last && @moretokens.last.ident=="::"
|
1047
|
+
@file.scan(/\A(#@@WSTOKS)?(::)?(#@@WSTOKS)?(#@@UCLETTER#@@LETTER_DIGIT*)/o) or break
|
1048
|
+
md=@file.last_match
|
1049
|
+
all,ws1,dc,ws2,name=*md
|
1050
|
+
if ws1
|
1051
|
+
@moretokens.concat divide_ws(ws1,md.begin(1))
|
1052
|
+
incr=ws1.size
|
996
1053
|
else
|
997
1054
|
incr=0
|
998
1055
|
end
|
999
|
-
@moretokens.push
|
1000
|
-
|
1001
|
-
@moretokens.
|
1002
|
-
@moretokens.push
|
1056
|
+
@moretokens.push NoWsToken.new(md.begin(2)) if dc
|
1057
|
+
@moretokens.push KeywordToken.new('::',md.begin(2)) if dc
|
1058
|
+
@moretokens.concat divide_ws(ws2,md.begin(3)) if ws2
|
1059
|
+
@moretokens.push VarNameToken.new(name,md.begin(4))
|
1003
1060
|
end
|
1004
1061
|
@moretokens.push EndHeaderToken.new(input_position)
|
1005
1062
|
return result
|
@@ -1071,8 +1128,7 @@ private
|
|
1071
1128
|
else
|
1072
1129
|
result.last.has_end!
|
1073
1130
|
if BlockContext===ctx and ctx.wanting_stabby_block_body
|
1074
|
-
|
1075
|
-
ctx.starter,ctx.ender="do","end"
|
1131
|
+
@parsestack[-1]= WantsEndContext.new(str,@linenum)
|
1076
1132
|
else
|
1077
1133
|
@parsestack.push WantsEndContext.new(str,@linenum)
|
1078
1134
|
localvars.start_block
|
@@ -1107,8 +1163,8 @@ private
|
|
1107
1163
|
result << tok
|
1108
1164
|
end until parencount==0 #@parsestack.size==old_size
|
1109
1165
|
@localvars_stack.push SymbolTable.new
|
1110
|
-
|
1111
|
-
|
1166
|
+
else #no parentheses, all tail
|
1167
|
+
set_last_token KeywordToken.new(".") #hack hack
|
1112
1168
|
tokindex=result.size
|
1113
1169
|
result << tok=symbol(false,false)
|
1114
1170
|
name=tok.to_s
|
@@ -1118,7 +1174,7 @@ private
|
|
1118
1174
|
maybe_local=case name
|
1119
1175
|
when /(?!#@@LETTER_DIGIT).$/o; #do nothing
|
1120
1176
|
when /^[@$]/; true
|
1121
|
-
when VARLIKE_KEYWORDS,FUNCLIKE_KEYWORDS; ty=KeywordToken
|
1177
|
+
when VARLIKE_KEYWORDS,FUNCLIKE_KEYWORDS,("__ENCODING__" if @rubyversion>=1.9); ty=KeywordToken
|
1122
1178
|
when /^#@@LCLETTER/o; localvars===name
|
1123
1179
|
when /^#@@UCLETTER/o; is_const=true #this is the right algorithm for constants...
|
1124
1180
|
end
|
@@ -1164,6 +1220,7 @@ private
|
|
1164
1220
|
nc=(@moretokens.empty? ? nextchar.chr : @moretokens.first.to_s[0,1])
|
1165
1221
|
if state==:expect_op and /^(?:#@@LETTER|[(&*])/o===nc
|
1166
1222
|
ctx.state=:def_param_list
|
1223
|
+
ctx.has_parens= '('==nc
|
1167
1224
|
list,listend=def_param_list
|
1168
1225
|
result.concat list
|
1169
1226
|
end_index=result.index(listend)
|
@@ -1271,10 +1328,19 @@ private
|
|
1271
1328
|
|
1272
1329
|
def keyword_rescue(str,offset,result)
|
1273
1330
|
unless after_nonid_op? {false}
|
1331
|
+
result.replace []
|
1274
1332
|
#rescue needs to be treated differently when in operator context...
|
1275
1333
|
#i think no RescueSMContext should be pushed on the stack...
|
1276
|
-
|
1277
|
-
|
1334
|
+
tok=OperatorToken.new(str,offset)
|
1335
|
+
tok.unary=false #plus, the rescue token should be marked as infix
|
1336
|
+
if AssignmentRhsContext===@parsestack.last
|
1337
|
+
tok.as="rescue3"
|
1338
|
+
@parsestack.pop #end rhs context
|
1339
|
+
result.push AssignmentRhsListEndToken.new(offset) #end rhs token
|
1340
|
+
else
|
1341
|
+
result.concat abort_noparens_for_rescue!(str)
|
1342
|
+
end
|
1343
|
+
result.push tok
|
1278
1344
|
else
|
1279
1345
|
result.push KwParamListStartToken.new(offset+str.length)
|
1280
1346
|
#corresponding EndToken emitted by abort_noparens! on leaving rescue context
|
@@ -1349,12 +1415,31 @@ private
|
|
1349
1415
|
return result
|
1350
1416
|
end
|
1351
1417
|
|
1418
|
+
def keyword___FILE__(str,offset,result)
|
1419
|
+
result.last.value=@filename
|
1420
|
+
return result
|
1421
|
+
end
|
1422
|
+
|
1423
|
+
def keyword___LINE__(str,offset,result)
|
1424
|
+
result.last.value=@linenum
|
1425
|
+
return result
|
1426
|
+
end
|
1427
|
+
|
1428
|
+
module RubyLexer1_9
|
1429
|
+
def keyword___ENCODING__(str,offset,result)
|
1430
|
+
#result.last.value=huh
|
1431
|
+
return result
|
1432
|
+
end
|
1433
|
+
|
1434
|
+
def keyword_not(*args,&block) _keyword_funclike(*args,&block) end
|
1435
|
+
end
|
1352
1436
|
|
1353
1437
|
def _keyword_funclike(str,offset,result)
|
1354
1438
|
if @last_operative_token===/^(\.|::)$/
|
1355
1439
|
result=yield MethNameToken.new(str) #should pass a methname token here
|
1356
1440
|
else
|
1357
|
-
|
1441
|
+
tok=KeywordToken.new(str)
|
1442
|
+
result=yield tok,tok
|
1358
1443
|
end
|
1359
1444
|
return result
|
1360
1445
|
end
|
@@ -1366,10 +1451,12 @@ private
|
|
1366
1451
|
#do nothing
|
1367
1452
|
return result
|
1368
1453
|
end
|
1369
|
-
for kw in VARLIKE_KEYWORDLIST+["defined?", "not"] do
|
1454
|
+
for kw in VARLIKE_KEYWORDLIST-["__FILE__","__LINE__"]+["defined?", "not"] do
|
1370
1455
|
alias_method "keyword_#{kw}".to_sym, :_keyword_varlike
|
1371
1456
|
end
|
1372
1457
|
|
1458
|
+
|
1459
|
+
|
1373
1460
|
private
|
1374
1461
|
|
1375
1462
|
#-----------------------------------
|
@@ -1453,6 +1540,7 @@ end
|
|
1453
1540
|
elsif starter==?(
|
1454
1541
|
ctx_type=UnparenedParamListLhsContext #hacky... should be a param?
|
1455
1542
|
@parsestack.push ctx_type.new(@linenum)
|
1543
|
+
a<<KwParamListStartToken.new( input_position )
|
1456
1544
|
end
|
1457
1545
|
|
1458
1546
|
set_last_token KeywordToken.new( ';' )
|
@@ -1493,16 +1581,45 @@ end
|
|
1493
1581
|
#parsestack was changed by get1token above...
|
1494
1582
|
normal_comma_level+=1
|
1495
1583
|
assert(normal_comma_level==@parsestack.size)
|
1496
|
-
endingblock=proc{|
|
1584
|
+
endingblock=proc{|tok2| tok2===')' }
|
1497
1585
|
else
|
1498
|
-
endingblock=proc{|
|
1586
|
+
endingblock=proc{|tok2| tok2===';' or NewlineToken===tok2}
|
1499
1587
|
end
|
1500
1588
|
class << endingblock
|
1501
1589
|
alias === call
|
1502
1590
|
end
|
1503
1591
|
|
1592
|
+
listend=method_parameters(result,normal_comma_level,endingblock,old_parsestack_size)
|
1593
|
+
|
1594
|
+
@defining_lvar=false
|
1595
|
+
@parsestack.last.see self,:semi
|
1596
|
+
|
1597
|
+
assert(@parsestack.size <= old_parsestack_size)
|
1598
|
+
|
1599
|
+
#hack: force next token to look like start of a
|
1600
|
+
#new stmt, if the last ignored_tokens
|
1601
|
+
#call above did not find a newline
|
1602
|
+
#(just in case the next token parsed
|
1603
|
+
#happens to call quote_expected? or after_nonid_op)
|
1604
|
+
result.concat ignored_tokens
|
1605
|
+
# if !eof? and nextchar.chr[/[iuw\/<|>+\-*&%?:({]/] and
|
1606
|
+
# !(NewlineToken===@last_operative_token) and
|
1607
|
+
# !(/^(end|;)$/===@last_operative_token)
|
1608
|
+
#result<<EndHeaderToken.new(result.last.offset+result.last.to_s.size)
|
1609
|
+
set_last_token KeywordToken.new( ';' )
|
1610
|
+
result<< get1token
|
1611
|
+
# end
|
1612
|
+
}
|
1613
|
+
|
1614
|
+
return result,listend
|
1615
|
+
end
|
1616
|
+
|
1617
|
+
|
1618
|
+
#-----------------------------------
|
1619
|
+
#read local parameter names in method definition
|
1620
|
+
def method_parameters(result,normal_comma_level,endingblock,old_parsestack_size)
|
1621
|
+
listend=nil
|
1504
1622
|
set_last_token KeywordToken.new( ',' )#hack
|
1505
|
-
#read local parameter names
|
1506
1623
|
nextvar=nil
|
1507
1624
|
loop do
|
1508
1625
|
expect_name=(@last_operative_token===',' and
|
@@ -1533,7 +1650,7 @@ end
|
|
1533
1650
|
# assert !nextvar
|
1534
1651
|
nextvar=tok.ident
|
1535
1652
|
localvars[nextvar]=false #remove nextvar from list of local vars for now
|
1536
|
-
when /^[&*]$/.token_pat #unary form...
|
1653
|
+
when /^[&*(]$/.token_pat #unary form...
|
1537
1654
|
#a NoWsToken is also expected... read it now
|
1538
1655
|
result.concat maybe_no_ws_token #not needed?
|
1539
1656
|
set_last_token KeywordToken.new( ',' )
|
@@ -1553,32 +1670,9 @@ end
|
|
1553
1670
|
end
|
1554
1671
|
end
|
1555
1672
|
end
|
1556
|
-
|
1557
|
-
@defining_lvar=false
|
1558
|
-
@parsestack.last.see self,:semi
|
1559
|
-
|
1560
|
-
assert(@parsestack.size <= old_parsestack_size)
|
1561
|
-
assert(endingblock[tok] || ErrorToken===tok)
|
1562
|
-
|
1563
|
-
#hack: force next token to look like start of a
|
1564
|
-
#new stmt, if the last ignored_tokens
|
1565
|
-
#call above did not find a newline
|
1566
|
-
#(just in case the next token parsed
|
1567
|
-
#happens to call quote_expected? or after_nonid_op)
|
1568
|
-
result.concat ignored_tokens
|
1569
|
-
# if !eof? and nextchar.chr[/[iuw\/<|>+\-*&%?:({]/] and
|
1570
|
-
# !(NewlineToken===@last_operative_token) and
|
1571
|
-
# !(/^(end|;)$/===@last_operative_token)
|
1572
|
-
#result<<EndHeaderToken.new(result.last.offset+result.last.to_s.size)
|
1573
|
-
set_last_token KeywordToken.new( ';' )
|
1574
|
-
result<< get1token
|
1575
|
-
# end
|
1576
|
-
}
|
1577
|
-
|
1578
|
-
return result,listend
|
1673
|
+
return listend
|
1579
1674
|
end
|
1580
1675
|
|
1581
|
-
|
1582
1676
|
#-----------------------------------
|
1583
1677
|
#handle % in ruby code. is it part of fancy quote or a modulo operator?
|
1584
1678
|
def percent(ch)
|
@@ -1630,7 +1724,13 @@ end
|
|
1630
1724
|
def char_literal_or_op(ch)
|
1631
1725
|
if colon_quote_expected? ch
|
1632
1726
|
getchar
|
1633
|
-
|
1727
|
+
if @rubyversion >= 1.9
|
1728
|
+
StringToken.new getchar_maybe_escape
|
1729
|
+
else
|
1730
|
+
ch=getchar_maybe_escape[0]
|
1731
|
+
ch=ch.ord if ch.respond_to? :ord
|
1732
|
+
NumberToken.new ch
|
1733
|
+
end
|
1634
1734
|
else
|
1635
1735
|
@parsestack.push TernaryContext.new(@linenum)
|
1636
1736
|
KeywordToken.new getchar #operator
|
@@ -1825,7 +1925,7 @@ end
|
|
1825
1925
|
context=merge_assignment_op_in_setter_callsites? ? ?: : nc
|
1826
1926
|
return [identifier_as_string(context), start]
|
1827
1927
|
when ?(
|
1828
|
-
return [nil,start] if @enable_macro
|
1928
|
+
return [nil,start] if @enable_macro or @rubyversion>=1.9
|
1829
1929
|
end
|
1830
1930
|
|
1831
1931
|
set_last_token KeywordToken.new(';')
|
@@ -1853,7 +1953,7 @@ end
|
|
1853
1953
|
|
1854
1954
|
res= HerePlaceholderToken.new( dash, quote, ender, quote_real )
|
1855
1955
|
if true
|
1856
|
-
res.open=["<<",dash,quote,ender,quote].
|
1956
|
+
res.open=["<<",dash,quote,ender,quote].join
|
1857
1957
|
procrastinated=til_charset(/[\n]/)#+readnl
|
1858
1958
|
unless @base_file
|
1859
1959
|
@base_file=@file
|
@@ -1979,7 +2079,7 @@ end
|
|
1979
2079
|
@offset_adjust=@min_offset_adjust
|
1980
2080
|
@moretokens.push( *optional_here_bodies )
|
1981
2081
|
ln=@linenum
|
1982
|
-
@moretokens.push lexerror(EscNlToken.new(
|
2082
|
+
@moretokens.push lexerror(EscNlToken.new(result,input_position-result.size,@filename,ln-1), error),
|
1983
2083
|
FileAndLineToken.new(@filename,ln,input_position)
|
1984
2084
|
|
1985
2085
|
start_of_line_directives
|
@@ -1995,7 +2095,7 @@ if true
|
|
1995
2095
|
pos=input_position
|
1996
2096
|
while body=@pending_here_bodies.shift
|
1997
2097
|
#body.offset=pos
|
1998
|
-
result.push EscNlToken.new(
|
2098
|
+
result.push EscNlToken.new("\n",body.offset-1,@filename,nil)
|
1999
2099
|
result.push FileAndLineToken.new(@filename,body.ident.line,body.offset)
|
2000
2100
|
result.push body
|
2001
2101
|
#result.push NoWsToken.new @pending_here_bodies.empty? ? input_position : @pending_here_bodies.first
|
@@ -2146,25 +2246,25 @@ end
|
|
2146
2246
|
!@last_operative_token.infix?) ||
|
2147
2247
|
!after_nonid_op?{false}
|
2148
2248
|
|
2149
|
-
hard=false if @rubyversion>=1.9 and @file.check /\A\n(?:#@@WSTOKS)
|
2249
|
+
hard=false if @rubyversion>=1.9 and @file.check( /\A\n(?:#@@WSTOKS)?[.:][^.:]/o )
|
2150
2250
|
|
2151
2251
|
if hard
|
2152
2252
|
@offset_adjust=@min_offset_adjust
|
2153
2253
|
a= abort_noparens!
|
2154
2254
|
case @parsestack.last #these should be in the see:semi handler
|
2155
|
-
when ExpectDoOrNlContext
|
2156
|
-
when ExpectThenOrNlContext
|
2255
|
+
when ExpectDoOrNlContext; @parsestack.pop
|
2256
|
+
when ExpectThenOrNlContext; @parsestack.pop
|
2157
2257
|
end
|
2158
2258
|
assert !@parsestack.empty?
|
2159
2259
|
@parsestack.last.see self,:semi
|
2160
2260
|
|
2161
|
-
a <<
|
2261
|
+
a << rulexer_newline(ch)
|
2162
2262
|
@moretokens.replace a+@moretokens
|
2163
2263
|
else
|
2164
2264
|
@offset_adjust=@min_offset_adjust
|
2165
2265
|
offset= input_position
|
2166
2266
|
nl=readnl
|
2167
|
-
@moretokens.push EscNlToken.new(
|
2267
|
+
@moretokens.push EscNlToken.new(nl,offset,@filename,@linenum-1),
|
2168
2268
|
FileAndLineToken.new(@filename,@linenum,input_position)
|
2169
2269
|
end
|
2170
2270
|
|
@@ -2222,15 +2322,15 @@ end
|
|
2222
2322
|
|
2223
2323
|
begin
|
2224
2324
|
eof? and raise "eof before =end"
|
2225
|
-
more<< til_charset(
|
2325
|
+
more<< til_charset(/\n/)
|
2226
2326
|
eof? and raise "eof before =end"
|
2227
2327
|
more<< readnl
|
2228
2328
|
end until readahead(EQENDLENGTH)==EQEND
|
2229
2329
|
|
2230
2330
|
#read rest of line after =end
|
2231
|
-
more << til_charset(
|
2232
|
-
assert((eof? or ?\
|
2233
|
-
assert !(
|
2331
|
+
more << til_charset(/\n/)
|
2332
|
+
assert((eof? or ?\n===nextchar))
|
2333
|
+
assert !(/\n/===more[-1,1])
|
2234
2334
|
more<< readnl unless eof?
|
2235
2335
|
|
2236
2336
|
# newls= more.scan(/\r\n?|\n\r?/)
|
@@ -2311,8 +2411,8 @@ end
|
|
2311
2411
|
return yield
|
2312
2412
|
when StringToken, SymbolToken, NumberToken, HerePlaceholderToken,
|
2313
2413
|
%r{^(
|
2314
|
-
end|self|true|false|nil
|
2315
|
-
__FILE__|__LINE__|[\})\]]
|
2414
|
+
end|self|true|false|nil|->|
|
2415
|
+
__FILE__|__LINE__|__ENCODING__|[\})\]]
|
2316
2416
|
)$}x.token_pat
|
2317
2417
|
#dunno about def/undef
|
2318
2418
|
#maybe class/module shouldn't he here either?
|
@@ -2399,7 +2499,7 @@ end
|
|
2399
2499
|
result= operator_or_methname_token( result)
|
2400
2500
|
result.offset=oldpos
|
2401
2501
|
return result
|
2402
|
-
|
2502
|
+
end
|
2403
2503
|
|
2404
2504
|
#-----------------------------------
|
2405
2505
|
def tilde(ch) #match ~
|
@@ -2426,20 +2526,22 @@ end
|
|
2426
2526
|
#could be beginning of number, too
|
2427
2527
|
#fixme: handle +@ and -@ here as well... (currently, this is done in symbol()?)
|
2428
2528
|
def plusminus(ch)
|
2529
|
+
pos=input_position
|
2429
2530
|
assert(/^[+\-]$/===ch)
|
2430
2531
|
if unary_op_expected?(ch) or
|
2431
2532
|
KeywordToken===@last_operative_token &&
|
2432
2533
|
/^(return|break|next)$/===@last_operative_token.ident
|
2433
2534
|
if (?0..?9)===readahead(2)[1]
|
2434
|
-
|
2535
|
+
result= number(ch)
|
2435
2536
|
elsif @rubyversion>=1.9 and '->' == readahead(2) #stabby proc
|
2537
|
+
@file.pos+=2
|
2436
2538
|
#push down block context
|
2437
2539
|
localvars.start_block
|
2438
2540
|
@parsestack.push ctx=BlockContext.new(@linenum)
|
2439
2541
|
ctx.wanting_stabby_block_body=true
|
2440
2542
|
#read optional proc params
|
2441
2543
|
block_param_list_lookahead ?(, ParenedParamListLhsContext
|
2442
|
-
|
2544
|
+
result=KeywordToken.new('->',pos)
|
2443
2545
|
|
2444
2546
|
else #unary operator
|
2445
2547
|
result=getchar
|
@@ -2456,6 +2558,7 @@ end
|
|
2456
2558
|
end
|
2457
2559
|
result=(operator_or_methname_token result)
|
2458
2560
|
end
|
2561
|
+
result.offset=pos
|
2459
2562
|
return result
|
2460
2563
|
end
|
2461
2564
|
|
@@ -2485,14 +2588,16 @@ end
|
|
2485
2588
|
#ruby delays adding lvars from regexps to known lvars table
|
2486
2589
|
#for several tokens in some cases. not sure why or if on purpose
|
2487
2590
|
#i'm just going to add them right away
|
2488
|
-
|
2591
|
+
last.lvars.each{|lvar| localvars[lvar]=true }
|
2489
2592
|
end
|
2490
2593
|
when '' #plain assignment: record local variable definitions
|
2491
2594
|
last_context_not_implicit.lhs=false
|
2595
|
+
@last_operative_token=result
|
2492
2596
|
@moretokens.push( *ignored_tokens(true).map{|x|
|
2493
|
-
NewlineToken===x ? EscNlToken.new(
|
2597
|
+
NewlineToken===x ? EscNlToken.new(x.ident,x.offset,@filename,@linenum) : x
|
2494
2598
|
} )
|
2495
2599
|
@parsestack.push AssignmentRhsContext.new(@linenum)
|
2600
|
+
@moretokens.push AssignmentRhsListStartToken.new( input_position)
|
2496
2601
|
if eat_next_if ?*
|
2497
2602
|
tok=OperatorToken.new('*', input_position-1)
|
2498
2603
|
tok.tag=:unary
|
@@ -2501,7 +2606,6 @@ end
|
|
2501
2606
|
@moretokens << NoWsToken.new(input_position)
|
2502
2607
|
comma_in_lvalue_list? #is this needed?
|
2503
2608
|
end
|
2504
|
-
@moretokens.push AssignmentRhsListStartToken.new( input_position)
|
2505
2609
|
end
|
2506
2610
|
return result
|
2507
2611
|
end
|
@@ -2513,13 +2617,15 @@ end
|
|
2513
2617
|
k=eat_next_if(/[~=]/)
|
2514
2618
|
if k
|
2515
2619
|
result+=k
|
2516
|
-
elsif eof
|
2620
|
+
elsif eof? or WHSPLF[nextchar.chr] #do nothing
|
2517
2621
|
else
|
2518
|
-
|
2519
|
-
@moretokens << NoWsToken.new(input_position)
|
2622
|
+
@moretokens << NoWsToken.new(input_position)
|
2520
2623
|
end
|
2521
|
-
|
2522
|
-
|
2624
|
+
ty= @rubyversion>=1.9 ? OperatorToken : KeywordToken
|
2625
|
+
result=ty.new(result, input_position-result.size)
|
2626
|
+
result.unary=!k #result should distinguish unary !
|
2627
|
+
|
2628
|
+
return result
|
2523
2629
|
end
|
2524
2630
|
|
2525
2631
|
|
@@ -2565,7 +2671,7 @@ if false
|
|
2565
2671
|
def comment(str)
|
2566
2672
|
result=""
|
2567
2673
|
#loop{
|
2568
|
-
result<<
|
2674
|
+
result<< rulexer_comment(nil).to_s
|
2569
2675
|
|
2570
2676
|
if /^\#.*\#$/===result #if comment was ended by a crunch
|
2571
2677
|
|
@@ -2645,20 +2751,41 @@ end
|
|
2645
2751
|
when '{'
|
2646
2752
|
#check if we are in a hash literal or string inclusion (#{}),
|
2647
2753
|
#in which case below would be bad.
|
2648
|
-
if
|
2754
|
+
if !(UnparenedParamListLhsContext===@parsestack.last) and
|
2755
|
+
after_nonid_op?{false} || @last_operative_token.has_no_block?
|
2649
2756
|
@parsestack.push ListImmedContext.new(ch,@linenum) #that is, a hash
|
2650
2757
|
else
|
2651
2758
|
#abort_noparens!
|
2652
2759
|
tokch.set_infix!
|
2653
2760
|
tokch.as="do"
|
2654
|
-
|
2761
|
+
|
2762
|
+
#if (perhaps deep) inside a stabby block param list context, end it
|
2763
|
+
if @rubyversion>=1.9
|
2764
|
+
stabby_params_just_ended=false
|
2765
|
+
(@parsestack.size-1).downto(1){|i|
|
2766
|
+
case @parsestack[i]
|
2767
|
+
when ParamListContextNoParen,AssignmentRhsContext
|
2768
|
+
#do nothing yet... see if inside a UnparenedParamListLhsContext
|
2769
|
+
when UnparenedParamListLhsContext #stabby proc
|
2770
|
+
@moretokens<<tokch
|
2771
|
+
(@parsestack.size-1).downto(i){|j|
|
2772
|
+
@moretokens.unshift @parsestack[j].endtoken(input_position-1)
|
2773
|
+
}
|
2774
|
+
@parsestack[i..-1]=[]
|
2775
|
+
tokch=@moretokens.shift
|
2776
|
+
stabby_params_just_ended=true
|
2777
|
+
break
|
2778
|
+
else break
|
2779
|
+
end
|
2780
|
+
}
|
2781
|
+
end
|
2782
|
+
|
2655
2783
|
# 'need to find matching callsite context and end it if implicit'
|
2656
2784
|
lasttok=last_operative_token
|
2657
|
-
if !(lasttok===')' and lasttok.callsite?) #or ParamListContextNoParen===parsestack.last
|
2785
|
+
if !(lasttok===')' and lasttok.callsite?) and !stabby_params_just_ended #or ParamListContextNoParen===parsestack.last
|
2658
2786
|
@moretokens.push( *(abort_1_noparen!(1).push tokch) )
|
2659
2787
|
tokch=@moretokens.shift
|
2660
2788
|
end
|
2661
|
-
#=end
|
2662
2789
|
|
2663
2790
|
if BlockContext===@parsestack.last and @parsestack.last.wanting_stabby_block_body
|
2664
2791
|
@parsestack.last.wanting_stabby_block_body=false
|
@@ -2719,7 +2846,7 @@ end
|
|
2719
2846
|
|
2720
2847
|
#-----------------------------------
|
2721
2848
|
def endoffile_detected(s='')
|
2722
|
-
@moretokens.push( *(abort_noparens!.push
|
2849
|
+
@moretokens.push( *(abort_noparens!.push rulexer_endoffile_detected(s)))
|
2723
2850
|
if @progress_thread
|
2724
2851
|
@progress_thread.kill
|
2725
2852
|
@progress_thread=nil
|
@@ -2731,32 +2858,37 @@ end
|
|
2731
2858
|
|
2732
2859
|
#-----------------------------------
|
2733
2860
|
def single_char_token(ch)
|
2734
|
-
KeywordToken.new
|
2861
|
+
KeywordToken.new rulexer_single_char_token(ch), input_position-1
|
2735
2862
|
end
|
2736
2863
|
|
2737
2864
|
#-----------------------------------
|
2738
2865
|
def comma(ch)
|
2739
2866
|
@moretokens.push token=single_char_token(ch)
|
2740
2867
|
|
2741
|
-
|
2742
|
-
|
2743
|
-
|
2744
|
-
|
2868
|
+
case @parsestack[-1]
|
2869
|
+
when AssignmentRhsContext;
|
2870
|
+
token.tag=:rhs
|
2871
|
+
#if assignment rhs seen inside method param list, when param list,
|
2872
|
+
# array or hash literal, rescue where comma is expected, method def param list,
|
2873
|
+
# or another right hand side
|
2874
|
+
# then end the assignment rhs now
|
2875
|
+
#+[OBS,ParamListContext|ParamListContextNoParen|WhenParamListContext|ListImmedContext|AssignmentRhsContext|
|
2745
2876
|
# (RescueSMContext&-{:state=>:rescue})|(DefContext&-{:in_body=>FalseClass|nil}),
|
2746
2877
|
# AssignmentRhsContext
|
2747
2878
|
#]===@parsestack
|
2748
|
-
|
2749
|
-
|
2750
|
-
|
2751
|
-
|
2752
|
-
|
2753
|
-
|
2754
|
-
|
2879
|
+
while AssignmentRhsContext===@parsestack[-1]
|
2880
|
+
pop=
|
2881
|
+
case @parsestack[-2]
|
2882
|
+
when ParamListContext,ParamListContextNoParen,WhenParamListContext,
|
2883
|
+
ListImmedContext,AssignmentRhsContext; true
|
2884
|
+
when RescueSMContext; @parsestack[-2].state==:rescue
|
2885
|
+
when DefContext; !@parsestack[-2].in_body and !@parsestack[-2].has_parens?
|
2886
|
+
else false
|
2887
|
+
end
|
2888
|
+
break unless pop
|
2755
2889
|
@parsestack.pop
|
2756
|
-
@moretokens.unshift AssignmentRhsListEndToken.new(input_position)
|
2757
|
-
|
2758
|
-
case @parsestack[-1]
|
2759
|
-
when AssignmentRhsContext; token.tag=:rhs
|
2890
|
+
@moretokens.unshift AssignmentRhsListEndToken.new(input_position-1)
|
2891
|
+
end
|
2760
2892
|
when ParamListContext,ParamListContextNoParen; #:call
|
2761
2893
|
when ListImmedContext; #:array
|
2762
2894
|
when BlockParamListLhsContext,UnparenedParamListLhsContext; #:block or stabby proc
|
@@ -2800,7 +2932,7 @@ end
|
|
2800
2932
|
#-----------------------------------
|
2801
2933
|
#tokenify_results_of :identifier
|
2802
2934
|
save_offsets_in(*CHARMAPPINGS.values.uniq-[
|
2803
|
-
:symbol_or_op,:open_brace,:whitespace,:exclam,:backquote,:caret
|
2935
|
+
:symbol_or_op,:open_brace,:whitespace,:exclam,:backquote,:caret,:plusminus
|
2804
2936
|
])
|
2805
2937
|
#save_offsets_in :symbol
|
2806
2938
|
|