rubylexer 0.7.6 → 0.7.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +4 -0
- data/History.txt +54 -7
- data/Makefile +43 -0
- data/lib/.document +3 -0
- data/lib/rubylexer.rb +286 -154
- data/lib/rubylexer/.document +9 -0
- data/lib/rubylexer/charhandler.rb +25 -19
- data/lib/rubylexer/context.rb +17 -4
- data/lib/rubylexer/rubycode.rb +1 -1
- data/lib/rubylexer/rulexer.rb +120 -95
- data/lib/rubylexer/symboltable.rb +22 -1
- data/lib/rubylexer/test/oneliners.rb +20 -0
- data/lib/rubylexer/test/oneliners_1.9.rb +146 -0
- data/lib/rubylexer/test/testcases.rb +6 -2
- data/lib/rubylexer/token.rb +22 -6
- data/lib/rubylexer/tokenprinter.rb +6 -6
- data/lib/rubylexer/version.rb +1 -1
- data/rubylexer.gemspec +40 -0
- data/test/code/coloruby.rb +154 -0
- data/test/code/dumptokens.rb +10 -5
- data/test/code/regression.rb +31 -17
- data/test/code/rubylexervsruby.rb +1 -1
- data/test/code/test_1.9.rb +31 -0
- data/test/code/tokentest.rb +6 -6
- data/test/data/{hdr_dos2.rb → hdr_dos2.rb.broken} +0 -0
- data/test/data/{heremonsters.rb.broken → heremonsters_broken.rb} +0 -0
- data/test/data/{heremonsters_dos.rb.broken → heremonsters_dos_broken.rb} +0 -0
- data/test/test_all.rb +2 -0
- metadata +94 -98
- data/Rakefile +0 -37
data/.document
ADDED
data/History.txt
CHANGED
@@ -1,4 +1,51 @@
|
|
1
|
-
=== 0.7.
|
1
|
+
=== 0.7.7/21dec2009
|
2
|
+
* 5 Major Enhancements:
|
3
|
+
* Got rid of the hacky RuLexer ancestor class. woo-hoo!
|
4
|
+
* Input charsets other than 7bit ascii now supported
|
5
|
+
* binary (8bit ascii), utf8, and euc now work; sjis does not
|
6
|
+
* __FILE__ and __LINE__ now have their correct values attached to them
|
7
|
+
* Build scripts completely rewritten; hoe is now gone!
|
8
|
+
* Improved ruby 1.9 compatibility (see below)
|
9
|
+
|
10
|
+
* 4 Major Bugfixes:
|
11
|
+
* Module names may begin with arbitrary expressions
|
12
|
+
* but such expressions are always ended by ::
|
13
|
+
* even if inside a implicit parens context
|
14
|
+
* and positions of whitespace tokens in module header are better tracked
|
15
|
+
* Finally learned to disable rdoc for files it dislikes (thanks, Roger!)
|
16
|
+
* Rescue in rhs context is always ternary now
|
17
|
+
* (this is incorrect if in a multiassign, but redparse will handle that)
|
18
|
+
* Parsing of do, comma, and unary star on assign rhs is better now
|
19
|
+
|
20
|
+
* 2 Minor Enhancements:
|
21
|
+
* Colorizer example
|
22
|
+
* Changes to token classes to incorporate modifications made by redparse
|
23
|
+
|
24
|
+
* 7 Minor Bugfixes:
|
25
|
+
* Newline after = is now soft
|
26
|
+
* Fixed type of local var if inside class/method inside def
|
27
|
+
* Fixed parsing of shebang line if no args
|
28
|
+
* Fixed incorrect offsets in a few obscure cases
|
29
|
+
* Don't treat \r as newline in =begin..=end
|
30
|
+
* Cleaned up test data
|
31
|
+
* Fixed mistypings of local vars in string inclusions
|
32
|
+
|
33
|
+
* Improved 1.9 compatibility:
|
34
|
+
* code works under 1.9 interpreter
|
35
|
+
* stabby blocks
|
36
|
+
* __ENCODING__ keyword
|
37
|
+
* tolerate ternary : at beginning of line
|
38
|
+
* character constants are string, not integer, literals
|
39
|
+
* new \u escape sequence allowed in double-quotish strings
|
40
|
+
* allow nested () in def param list
|
41
|
+
* not is a funclike keyword
|
42
|
+
* parens allowed as method name; alias for #call
|
43
|
+
* block private locals declared after ; inside block param
|
44
|
+
* !, !=, and !~ are methods in 1.9
|
45
|
+
* local variables declared by named backreferences in regexps
|
46
|
+
* tests for many 1.9 features
|
47
|
+
|
48
|
+
=== 0.7.6/1jul2009
|
2
49
|
* 5 Bugfixes:
|
3
50
|
* don't treat <, <=, <=> as starting variables (only << for here header)
|
4
51
|
* space between break/return/next and following open paren is ignored
|
@@ -11,11 +58,11 @@
|
|
11
58
|
* dot at beginning of line
|
12
59
|
* !, !=, !~ are now valid method/symbol names
|
13
60
|
|
14
|
-
=== 0.7.5/
|
61
|
+
=== 0.7.5/23may2009
|
15
62
|
* 1 Bugfix:
|
16
63
|
* fixed problem with parsing shebang lines
|
17
64
|
|
18
|
-
=== 0.7.4/
|
65
|
+
=== 0.7.4/20may2009
|
19
66
|
* 2 Major Enhancements:
|
20
67
|
* preliminary support for ruby 1.9
|
21
68
|
* utf8 inputs should now work... more or less
|
@@ -31,7 +78,7 @@
|
|
31
78
|
* added tag field to Token; I hope many flags can be coalesced into tag.
|
32
79
|
* note line that all strings (and here docs) start and end on
|
33
80
|
|
34
|
-
=== 0.7.3/
|
81
|
+
=== 0.7.3/19apr2009
|
35
82
|
* 9 Bugfixes:
|
36
83
|
* remember whether comma was seen in paren context
|
37
84
|
* reducing the warning load
|
@@ -56,7 +103,7 @@
|
|
56
103
|
* various other little helper methods needed by redparse in Tokens
|
57
104
|
* hack Rakefile so 'rake test' will stay in 1 process (keeps netbeans happy)
|
58
105
|
|
59
|
-
=== 0.7.2/
|
106
|
+
=== 0.7.2/12oct2008
|
60
107
|
* 12 Minor Enhancements:
|
61
108
|
* a new context for then kw expected
|
62
109
|
* disable all backtracking when scanning string interiors
|
@@ -71,7 +118,7 @@
|
|
71
118
|
* trying to make 'rake test' work right
|
72
119
|
* certain other changes of no importance whatsoever
|
73
120
|
|
74
|
-
=== 0.7.1/
|
121
|
+
=== 0.7.1/28aug2008
|
75
122
|
* 6 Major Enhancements:
|
76
123
|
* handling of empty string fragments now more closely mirrors ruby; this resolves many warnings
|
77
124
|
* yet more hacks in aid of string inclusions
|
@@ -161,7 +208,7 @@
|
|
161
208
|
* offset problems in here head and body, symbol and fal tokens are always ignored (a hack)
|
162
209
|
* tokentest has a --loop option, for load testing
|
163
210
|
|
164
|
-
=== 0.7.0/
|
211
|
+
=== 0.7.0/15feb2008
|
165
212
|
* implicit tokens are now emitted at the right times (need more test code)
|
166
213
|
* local variables are now temporarily hidden by class, module, and def
|
167
214
|
* line numbers should always be correct now (=begin...=end caused this) (??)
|
data/Makefile
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
name=RubyLexer
|
2
|
+
lname=rubylexer
|
3
|
+
gemname=rubylexer
|
4
|
+
|
5
|
+
#everything after this line is generic
|
6
|
+
|
7
|
+
version=$(shell ruby -r ./lib/$(lname)/version.rb -e "puts $(name)::VERSION")
|
8
|
+
filelist=$(shell git ls-files)
|
9
|
+
|
10
|
+
.PHONY: all test docs gem tar pkg email
|
11
|
+
all: test
|
12
|
+
|
13
|
+
test:
|
14
|
+
ruby -Ilib test/test_all.rb
|
15
|
+
|
16
|
+
docs:
|
17
|
+
rdoc lib/*
|
18
|
+
|
19
|
+
pkg: gem tar
|
20
|
+
|
21
|
+
gem:
|
22
|
+
gem build $(lname).gemspec
|
23
|
+
|
24
|
+
tar:
|
25
|
+
tar cf - $(filelist) | ( mkdir $(gemname)-$(version); cd $(gemname)-$(version); tar xf - )
|
26
|
+
tar czf $(gemname)-$(version).tar.gz $(gemname)-$(version)
|
27
|
+
rm -rf $(gemname)-$(version)
|
28
|
+
|
29
|
+
email: README.txt History.txt
|
30
|
+
ruby -e ' \
|
31
|
+
require "rubygems"; \
|
32
|
+
load "./$(lname).gemspec"; \
|
33
|
+
spec= Gem::Specification.list.find{|x| x.name=="$(gemname)"}; \
|
34
|
+
puts "\
|
35
|
+
Subject: [ANN] $(name) #{spec.version} Released \
|
36
|
+
\n\n$(name) version #{spec.version} has been released! \n\n\
|
37
|
+
#{Array(spec.homepage).map{|url| " * #{url}\n" }} \
|
38
|
+
\n\
|
39
|
+
#{$(name)::Description} \
|
40
|
+
\n\nChanges:\n\n \
|
41
|
+
#{$(name)::Latest_changes} \
|
42
|
+
"\
|
43
|
+
'
|
data/lib/.document
ADDED
data/lib/rubylexer.rb
CHANGED
@@ -109,9 +109,6 @@ class RubyLexer
|
|
109
109
|
#?\r => :newline, #implicitly escaped after op
|
110
110
|
|
111
111
|
?\\ => :escnewline,
|
112
|
-
?\x00 => :eof,
|
113
|
-
?\x04 => :eof,
|
114
|
-
?\x1a => :eof,
|
115
112
|
|
116
113
|
"[({" => :open_brace,
|
117
114
|
"])}" => :close_brace,
|
@@ -119,7 +116,15 @@ class RubyLexer
|
|
119
116
|
|
120
117
|
?# => :comment,
|
121
118
|
|
122
|
-
|
119
|
+
?\x00 => :eof,
|
120
|
+
?\x04 => :eof,
|
121
|
+
?\x1a => :eof,
|
122
|
+
|
123
|
+
?\x01..?\x03 => :illegal_char,
|
124
|
+
?\x05..?\x08 => :illegal_char,
|
125
|
+
?\x0E..?\x19 => :illegal_char,
|
126
|
+
?\x1b..?\x1F => :illegal_char,
|
127
|
+
?\x7F => :illegal_char,
|
123
128
|
}
|
124
129
|
|
125
130
|
attr_reader :incomplete_here_tokens, :parsestack, :last_token_maybe_implicit
|
@@ -138,10 +143,14 @@ class RubyLexer
|
|
138
143
|
def #{n}; #{n}; end
|
139
144
|
def self.#{n}; @@#{n}; end
|
140
145
|
"
|
141
|
-
}.
|
146
|
+
}.join
|
142
147
|
|
143
148
|
NEVERSTARTPARAMLISTWORDS=/\A(#{OPORBEGINWORDS}|#{INNERBOUNDINGWORDS}|#{BINOPWORDS}|end)((?:(?!#@@LETTER_DIGIT).)|\Z)/om
|
144
|
-
|
149
|
+
if ?A.is_a? String #ruby >= 1.9
|
150
|
+
NEVERSTARTPARAMLISTFIRST=/[aoeitrwu]/
|
151
|
+
else
|
152
|
+
NEVERSTARTPARAMLISTFIRST=CharSet['aoeitrwu'] #chars that begin NEVERSTARTPARAMLIST
|
153
|
+
end
|
145
154
|
NEVERSTARTPARAMLISTMAXLEN=7 #max len of a NEVERSTARTPARAMLIST
|
146
155
|
|
147
156
|
=begin
|
@@ -149,13 +158,13 @@ class RubyLexer
|
|
149
158
|
utf8=String::PATTERN_UTF8 #or euc, or sjis...
|
150
159
|
LCLETTER_U="(?>[a-z_]|#{utf8})"
|
151
160
|
LETTER_U="(?>[A-Za-z_]|#{utf8})"
|
152
|
-
|
161
|
+
LETTER_DIGIT_U="(?>[A-Za-z_0-9]|#{utf8})"
|
153
162
|
=end
|
154
163
|
|
155
164
|
#-----------------------------------
|
156
|
-
def initialize(filename,file,linenum=1,offset_adjust=0,options={
|
165
|
+
def initialize(filename,file,linenum=1,offset_adjust=0,options={})
|
157
166
|
@offset_adjust=0 #set again in next line
|
158
|
-
|
167
|
+
rulexer_initialize(filename,file, linenum,offset_adjust)
|
159
168
|
@start_linenum=linenum
|
160
169
|
@parsestack=[TopLevelContext.new]
|
161
170
|
@incomplete_here_tokens=[] #not used anymore
|
@@ -168,16 +177,17 @@ class RubyLexer
|
|
168
177
|
@enable_macro=nil
|
169
178
|
@base_file=nil
|
170
179
|
@progress_thread=nil
|
171
|
-
@rubyversion=options[:rubyversion]
|
180
|
+
@rubyversion=options[:rubyversion]||1.8
|
172
181
|
@encoding=options[:encoding]||:detect
|
173
182
|
@method_operators=if @rubyversion>=1.9
|
174
|
-
/#{RUBYSYMOPERATORREX}|\A![
|
183
|
+
/#{RUBYSYMOPERATORREX}|\A![=~@]?/o
|
175
184
|
else
|
176
185
|
RUBYSYMOPERATORREX
|
177
186
|
end
|
178
187
|
|
179
|
-
@toptable=CharHandler.new(self, :
|
188
|
+
@toptable=CharHandler.new(self, :identifier, CHARMAPPINGS)
|
180
189
|
|
190
|
+
extend RubyLexer1_9 if @rubyversion>=1.9
|
181
191
|
read_leading_encoding
|
182
192
|
start_of_line_directives
|
183
193
|
progress_printer
|
@@ -203,11 +213,11 @@ class RubyLexer
|
|
203
213
|
def read_leading_encoding
|
204
214
|
return unless @encoding==:detect
|
205
215
|
@encoding=:ascii
|
206
|
-
@encoding=:utf8 if @file.skip(
|
216
|
+
@encoding=:utf8 if @file.skip( "\xEF\xBB\xBF" ) #bom
|
207
217
|
if @file.skip( /\A#!/ )
|
208
218
|
loop do
|
209
219
|
til_charset( /[\s\v]/ )
|
210
|
-
break if @file.match(
|
220
|
+
break if @file.match( /^\n|[\s\v]([^-\s\v]|--?[\s\v])/,4 )
|
211
221
|
if @file.skip( /.-K(.)/ )
|
212
222
|
case $1
|
213
223
|
when 'u'; @encoding=:utf8
|
@@ -243,8 +253,9 @@ class RubyLexer
|
|
243
253
|
@localvars_stack.last
|
244
254
|
end
|
245
255
|
|
256
|
+
attr_accessor :localvars_stack
|
257
|
+
|
246
258
|
attr_accessor :in_def
|
247
|
-
attr :localvars_stack
|
248
259
|
attr :offset_adjust
|
249
260
|
attr_writer :pending_here_bodies
|
250
261
|
attr :rubyversion
|
@@ -256,7 +267,7 @@ class RubyLexer
|
|
256
267
|
|
257
268
|
#-----------------------------------
|
258
269
|
def get1token
|
259
|
-
result=
|
270
|
+
result=rulexer_get1token #most of the action's here
|
260
271
|
|
261
272
|
if ENV['PROGRESS']
|
262
273
|
@last_cp_pos||=0
|
@@ -300,12 +311,12 @@ class RubyLexer
|
|
300
311
|
|
301
312
|
#-----------------------------------
|
302
313
|
def eof?
|
303
|
-
|
314
|
+
rulexer_eof? or EoiToken===@last_operative_token
|
304
315
|
end
|
305
316
|
|
306
317
|
#-----------------------------------
|
307
318
|
def input_position
|
308
|
-
|
319
|
+
rulexer_input_position+@offset_adjust
|
309
320
|
end
|
310
321
|
|
311
322
|
#-----------------------------------
|
@@ -351,6 +362,7 @@ private
|
|
351
362
|
return true if (defined? @in_def) and @in_def
|
352
363
|
@parsestack.reverse_each{|ctx|
|
353
364
|
ctx.starter=='def' and ctx.state!=:saw_def and return true
|
365
|
+
ctx.starter=='class' || ctx.starter=='module' and return false
|
354
366
|
}
|
355
367
|
return false
|
356
368
|
end
|
@@ -389,7 +401,7 @@ private
|
|
389
401
|
unless @moretokens.empty?
|
390
402
|
case @moretokens.first
|
391
403
|
when StillIgnoreToken
|
392
|
-
when NewlineToken
|
404
|
+
when NewlineToken; allow_eol or break
|
393
405
|
else break
|
394
406
|
end
|
395
407
|
else
|
@@ -467,12 +479,9 @@ private
|
|
467
479
|
if @parsestack.last.wantarrow and @rubyversion>=1.9 and @file.skip ":"
|
468
480
|
@moretokens.push SymbolToken.new(str,oldpos), KeywordToken.new("=>",input_position-1)
|
469
481
|
else
|
470
|
-
@moretokens.unshift(*parse_keywords(str,oldpos) do |tok|
|
471
|
-
#
|
472
|
-
|
473
|
-
when FUNCLIKE_KEYWORDS; except=tok
|
474
|
-
when VARLIKE_KEYWORDS,RUBYKEYWORDS; raise "shouldnt see keywords here, now"
|
475
|
-
end
|
482
|
+
@moretokens.unshift(*parse_keywords(str,oldpos) do |tok,except|
|
483
|
+
#most callers of this block pass nothing(==nil) for except. only _keyword_funclike passes a true val
|
484
|
+
|
476
485
|
was_last=@last_operative_token
|
477
486
|
@last_operative_token=tok if tok
|
478
487
|
normally=safe_recurse { |a| var_or_meth_name(str,was_last,oldpos,after_nonid_op?{true}) }
|
@@ -547,6 +556,7 @@ private
|
|
547
556
|
#@defining_lvar is a hack
|
548
557
|
@defining_lvar or case ctx=@parsestack.last
|
549
558
|
#when ForSMContext; ctx.state==:for
|
559
|
+
when UnparenedParamListLhsContext; /^(->|,|;)$/===lasttok.ident
|
550
560
|
when RescueSMContext
|
551
561
|
lasttok.ident=="=>" and @file.match?( /\A[\s\v]*([:;#\n]|then(?!#@@LETTER_DIGIT))/om )
|
552
562
|
#when BlockParamListLhsContext; true
|
@@ -567,6 +577,7 @@ private
|
|
567
577
|
#whitespace before but not after the 'operator' indicates it is to be considered a
|
568
578
|
#value token instead. otherwise it is a binary operator. (unary (prefix) ops count
|
569
579
|
#as 'values' here.)
|
580
|
+
#this is by far the ugliest method in RubyLexer.
|
570
581
|
def var_or_meth_name(name,lasttok,pos,was_after_nonid_op)
|
571
582
|
#look for call site if not a keyword or keyword is function-like
|
572
583
|
#look for and ignore local variable names
|
@@ -579,7 +590,7 @@ private
|
|
579
590
|
when /(?!#@@LETTER_DIGIT).$/o #do nothing
|
580
591
|
when /^#@@LCLETTER/o
|
581
592
|
(localvars===name or
|
582
|
-
VARLIKE_KEYWORDS===name or
|
593
|
+
#VARLIKE_KEYWORDS===name or
|
583
594
|
was_in_lvar_define_state
|
584
595
|
) and not lasttok===/^(\.|::)$/
|
585
596
|
when /^#@@UCLETTER/o
|
@@ -617,8 +628,9 @@ private
|
|
617
628
|
#if next op is assignment (or comma in lvalue list)
|
618
629
|
#then omit implicit parens
|
619
630
|
assignment_coming=case nc=nextchar
|
620
|
-
when ?=; not /^=[>=~]$/===readahead(2)
|
631
|
+
when ?=; not( /^=[>=~]$/===readahead(2) )
|
621
632
|
when ?,; comma_in_lvalue_list?
|
633
|
+
when (?; if @rubyversion>=1.9); ParenedParamListLhsContext===@parsestack.last
|
622
634
|
when ?); last_context_not_implicit.lhs
|
623
635
|
when ?i; /^in(?!#@@LETTER_DIGIT)/o===readahead(3) and
|
624
636
|
ForSMContext===last_context_not_implicit
|
@@ -645,7 +657,7 @@ private
|
|
645
657
|
IMPLICIT_PARENS_BEFORE_ACCESSOR_ASSIGNMENT
|
646
658
|
else
|
647
659
|
case nc
|
648
|
-
when nil
|
660
|
+
when nil; 2
|
649
661
|
when ?!; /^![=~]$/===readahead(2) ? 2 : 1
|
650
662
|
when ?d;
|
651
663
|
if /^do((?!#@@LETTER_DIGIT)|$)/o===readahead(3)
|
@@ -761,7 +773,7 @@ private
|
|
761
773
|
!(KeywordToken===lasttok and /^(\.|::)$/===lasttok.ident)
|
762
774
|
#only 1 param in list
|
763
775
|
result.unshift ImplicitParamListStartToken.new(oldpos)
|
764
|
-
@parsestack.push
|
776
|
+
@parsestack.push KWParamListContextNoParen.new(@linenum)
|
765
777
|
else
|
766
778
|
arr,pass=*param_list_coming_with_2_or_more_params?
|
767
779
|
result.push( *arr )
|
@@ -846,14 +858,14 @@ private
|
|
846
858
|
result=[]
|
847
859
|
ctx=@parsestack.last
|
848
860
|
while klass=CONTEXT2ENDTOK_FOR_RESCUE[ctx.class]
|
849
|
-
break if AssignmentRhsContext===ctx && !ctx.multi_assign?
|
850
|
-
if ParamListContextNoParen===ctx && AssignmentRhsContext===@parsestack[-2]
|
851
|
-
result.push ImplicitParamListEndToken.new(input_position-str.length),
|
852
|
-
AssignmentRhsListEndToken.new(input_position-str.length)
|
853
|
-
@parsestack.pop
|
854
|
-
@parsestack.pop
|
855
|
-
break
|
856
|
-
end
|
861
|
+
# break if AssignmentRhsContext===ctx && !ctx.multi_assign?
|
862
|
+
# if ParamListContextNoParen===ctx && AssignmentRhsContext===@parsestack[-2]
|
863
|
+
# result.push ImplicitParamListEndToken.new(input_position-str.length),
|
864
|
+
# AssignmentRhsListEndToken.new(input_position-str.length)
|
865
|
+
# @parsestack.pop
|
866
|
+
# @parsestack.pop
|
867
|
+
# break
|
868
|
+
# end
|
857
869
|
result << klass.new(input_position-str.length) #unless AssignmentRhsContext===ctx and !ctx.multi_assign?
|
858
870
|
break if RescueSMContext===ctx #why is this here?
|
859
871
|
@parsestack.pop
|
@@ -866,6 +878,7 @@ private
|
|
866
878
|
CONTEXT2ENDTOK_FOR_DO={
|
867
879
|
AssignmentRhsContext=>AssignmentRhsListEndToken,
|
868
880
|
ParamListContextNoParen=>ImplicitParamListEndToken,
|
881
|
+
UnparenedParamListLhsContext=>KwParamListEndToken,
|
869
882
|
ExpectDoOrNlContext=>1,
|
870
883
|
#WhenParamListContext=>KwParamListEndToken,
|
871
884
|
#RescueSMContext=>KwParamListEndToken
|
@@ -874,6 +887,17 @@ private
|
|
874
887
|
#assert @moretokens.empty?
|
875
888
|
result=[]
|
876
889
|
while klass=CONTEXT2ENDTOK_FOR_DO[@parsestack.last.class]
|
890
|
+
if klass==AssignmentRhsListEndToken
|
891
|
+
i=@parsestack.size
|
892
|
+
end_the_assign=false
|
893
|
+
while AssignmentRhsContext===@parsestack[i-=1]
|
894
|
+
if CONTEXT2ENDTOK_FOR_DO[@parsestack[i-1].class] and
|
895
|
+
@parsestack[i-1].class!=AssignmentRhsContext
|
896
|
+
break end_the_assign=true
|
897
|
+
end
|
898
|
+
end
|
899
|
+
break unless end_the_assign
|
900
|
+
end
|
877
901
|
break if klass==1
|
878
902
|
result << klass.new(input_position-str.length)
|
879
903
|
@parsestack.pop
|
@@ -917,19 +941,27 @@ private
|
|
917
941
|
|
918
942
|
#-----------------------------------
|
919
943
|
@@SPACES=/[\ \t\v\f\v]/
|
920
|
-
@@WSTOK
|
921
|
-
|
922
|
-
|
923
|
-
|
944
|
+
@@WSTOK=/(?>
|
945
|
+
(?>\r?)\n|
|
946
|
+
(?>\r*)(?>#@@SPACES+)(?>(?:#@@SPACES|\r(?!\n))*)|
|
947
|
+
\#(?>[^\n]*)\n|
|
948
|
+
\\(?>\r?)\n|
|
949
|
+
^=begin(?>(?>#@@SPACES.*)?)\n
|
950
|
+
(?>(?:(?!=end)(?>.*)\n))*
|
951
|
+
=end(?>(?>#@@SPACES.*)?)\n
|
952
|
+
)/x
|
953
|
+
@@WSTOKS=/(?!=begin)(?>#@@WSTOK+)/o
|
954
|
+
def divide_ws(ws0,offset)
|
924
955
|
result=[]
|
925
|
-
|
956
|
+
ws0.scan(/\G#@@WSTOK/o){|ws|
|
926
957
|
incr= $~.begin(0)
|
927
|
-
|
928
|
-
when /\A[\#=]/;
|
929
|
-
when /\n\Z/; EscNlToken
|
930
|
-
else WsToken
|
958
|
+
tok=case ws
|
959
|
+
when /\A[\#=]/; IgnoreToken.new(ws,offset+incr)
|
960
|
+
when /\n\Z/; EscNlToken.new(ws,offset+incr,@filename,@linenum)
|
961
|
+
else WsToken.new(ws,offset+incr)
|
931
962
|
end
|
932
|
-
result <<
|
963
|
+
result << tok
|
964
|
+
@linenum+=ws.count "\n"
|
933
965
|
}
|
934
966
|
result.each_with_index{|ws,i|
|
935
967
|
if WsToken===ws
|
@@ -939,7 +971,22 @@ private
|
|
939
971
|
return result
|
940
972
|
end
|
941
973
|
|
942
|
-
|
974
|
+
#-----------------------------------
|
975
|
+
#lex tokens until a predefined end token is found.
|
976
|
+
#returns a list of tokens seen.
|
977
|
+
def read_arbitrary_expression(&endcondition)
|
978
|
+
result=[]
|
979
|
+
oldsize=@parsestack.size
|
980
|
+
safe_recurse{
|
981
|
+
tok=nil
|
982
|
+
until endcondition[tok,@parsestack[oldsize+1..-1]||[]] and @parsestack.size==oldsize
|
983
|
+
tok=get1token
|
984
|
+
result<<tok
|
985
|
+
EoiToken===tok and break lexerror( tok, "unexpected eof" )
|
986
|
+
end
|
987
|
+
}
|
988
|
+
result
|
989
|
+
end
|
943
990
|
|
944
991
|
#-----------------------------------
|
945
992
|
#parse keywords now, to prevent confusion over bare symbols
|
@@ -950,7 +997,7 @@ private
|
|
950
997
|
assert !(KeywordToken===@last_operative_token and /A(\.|::|def)\Z/===@last_operative_token.ident)
|
951
998
|
result=[KeywordToken.new(str,offset)]
|
952
999
|
|
953
|
-
m
|
1000
|
+
m=:"keyword_#{str}"
|
954
1001
|
respond_to?(m) ? (send m,str,offset,result,&block) : block[MethNameToken.new(str)]
|
955
1002
|
end
|
956
1003
|
public #these have to be public so respond_to? can see them (sigh)
|
@@ -977,29 +1024,39 @@ private
|
|
977
1024
|
def keyword_module(str,offset,result)
|
978
1025
|
result.first.has_end!
|
979
1026
|
@parsestack.push WantsEndContext.new(str,@linenum)
|
980
|
-
@localvars_stack.push SymbolTable.new
|
981
1027
|
offset=input_position
|
982
|
-
@
|
983
|
-
|
984
|
-
|
985
|
-
fail if all.empty?
|
986
|
-
@moretokens.concat divide_ws(ws,offset) if ws
|
987
|
-
@moretokens.push KeywordToken.new('::',offset+md.end(0)-2) if dc
|
988
|
-
loop do
|
989
|
-
offset=input_position
|
990
|
-
@file.scan(/\A(#@@WSTOKS)?(#@@UCLETTER#@@LETTER_DIGIT*)(::)?/o)
|
1028
|
+
assert @moretokens.empty?
|
1029
|
+
tokens=[]
|
1030
|
+
if @file.scan(/\A(#@@WSTOKS)?(#@@UCLETTER#@@LETTER_DIGIT*)(?=[#{WHSP}]+(?:[^(])|[#;\n]|::)/o)
|
991
1031
|
md=@file.last_match
|
992
|
-
all,ws,name
|
993
|
-
if ws
|
994
|
-
|
995
|
-
|
1032
|
+
all,ws,name=*md
|
1033
|
+
tokens.concat divide_ws(ws,md.begin(1)) if ws
|
1034
|
+
tokens.push VarNameToken.new(name,md.begin(2))
|
1035
|
+
end
|
1036
|
+
tokens.push( *read_arbitrary_expression{|tok,extra_contexts|
|
1037
|
+
#@file.check /\A(\n|;|::|end(?!#@@LETTER_DIGIT)|(#@@UCLETTER#@@LETTER_DIGIT*)(?!(#@@WSTOKS)?::))/o
|
1038
|
+
@file.check( /\A(\n|;|end(?!#@@LETTER_DIGIT))/o ) or
|
1039
|
+
@file.check("::") && extra_contexts.all?{|ctx| ImplicitParamListContext===ctx } &&
|
1040
|
+
@moretokens.push(*abort_noparens!)
|
1041
|
+
} ) if !name #or @file.check /#@@WSTOKS?::/o
|
1042
|
+
@moretokens[0,0]=tokens
|
1043
|
+
@localvars_stack.push SymbolTable.new
|
1044
|
+
while @file.check( /\A::/ )
|
1045
|
+
#VarNameToken===@moretokens.last or
|
1046
|
+
#KeywordToken===@moretokens.last && @moretokens.last.ident=="::"
|
1047
|
+
@file.scan(/\A(#@@WSTOKS)?(::)?(#@@WSTOKS)?(#@@UCLETTER#@@LETTER_DIGIT*)/o) or break
|
1048
|
+
md=@file.last_match
|
1049
|
+
all,ws1,dc,ws2,name=*md
|
1050
|
+
if ws1
|
1051
|
+
@moretokens.concat divide_ws(ws1,md.begin(1))
|
1052
|
+
incr=ws1.size
|
996
1053
|
else
|
997
1054
|
incr=0
|
998
1055
|
end
|
999
|
-
@moretokens.push
|
1000
|
-
|
1001
|
-
@moretokens.
|
1002
|
-
@moretokens.push
|
1056
|
+
@moretokens.push NoWsToken.new(md.begin(2)) if dc
|
1057
|
+
@moretokens.push KeywordToken.new('::',md.begin(2)) if dc
|
1058
|
+
@moretokens.concat divide_ws(ws2,md.begin(3)) if ws2
|
1059
|
+
@moretokens.push VarNameToken.new(name,md.begin(4))
|
1003
1060
|
end
|
1004
1061
|
@moretokens.push EndHeaderToken.new(input_position)
|
1005
1062
|
return result
|
@@ -1071,8 +1128,7 @@ private
|
|
1071
1128
|
else
|
1072
1129
|
result.last.has_end!
|
1073
1130
|
if BlockContext===ctx and ctx.wanting_stabby_block_body
|
1074
|
-
|
1075
|
-
ctx.starter,ctx.ender="do","end"
|
1131
|
+
@parsestack[-1]= WantsEndContext.new(str,@linenum)
|
1076
1132
|
else
|
1077
1133
|
@parsestack.push WantsEndContext.new(str,@linenum)
|
1078
1134
|
localvars.start_block
|
@@ -1107,8 +1163,8 @@ private
|
|
1107
1163
|
result << tok
|
1108
1164
|
end until parencount==0 #@parsestack.size==old_size
|
1109
1165
|
@localvars_stack.push SymbolTable.new
|
1110
|
-
|
1111
|
-
|
1166
|
+
else #no parentheses, all tail
|
1167
|
+
set_last_token KeywordToken.new(".") #hack hack
|
1112
1168
|
tokindex=result.size
|
1113
1169
|
result << tok=symbol(false,false)
|
1114
1170
|
name=tok.to_s
|
@@ -1118,7 +1174,7 @@ private
|
|
1118
1174
|
maybe_local=case name
|
1119
1175
|
when /(?!#@@LETTER_DIGIT).$/o; #do nothing
|
1120
1176
|
when /^[@$]/; true
|
1121
|
-
when VARLIKE_KEYWORDS,FUNCLIKE_KEYWORDS; ty=KeywordToken
|
1177
|
+
when VARLIKE_KEYWORDS,FUNCLIKE_KEYWORDS,("__ENCODING__" if @rubyversion>=1.9); ty=KeywordToken
|
1122
1178
|
when /^#@@LCLETTER/o; localvars===name
|
1123
1179
|
when /^#@@UCLETTER/o; is_const=true #this is the right algorithm for constants...
|
1124
1180
|
end
|
@@ -1164,6 +1220,7 @@ private
|
|
1164
1220
|
nc=(@moretokens.empty? ? nextchar.chr : @moretokens.first.to_s[0,1])
|
1165
1221
|
if state==:expect_op and /^(?:#@@LETTER|[(&*])/o===nc
|
1166
1222
|
ctx.state=:def_param_list
|
1223
|
+
ctx.has_parens= '('==nc
|
1167
1224
|
list,listend=def_param_list
|
1168
1225
|
result.concat list
|
1169
1226
|
end_index=result.index(listend)
|
@@ -1271,10 +1328,19 @@ private
|
|
1271
1328
|
|
1272
1329
|
def keyword_rescue(str,offset,result)
|
1273
1330
|
unless after_nonid_op? {false}
|
1331
|
+
result.replace []
|
1274
1332
|
#rescue needs to be treated differently when in operator context...
|
1275
1333
|
#i think no RescueSMContext should be pushed on the stack...
|
1276
|
-
|
1277
|
-
|
1334
|
+
tok=OperatorToken.new(str,offset)
|
1335
|
+
tok.unary=false #plus, the rescue token should be marked as infix
|
1336
|
+
if AssignmentRhsContext===@parsestack.last
|
1337
|
+
tok.as="rescue3"
|
1338
|
+
@parsestack.pop #end rhs context
|
1339
|
+
result.push AssignmentRhsListEndToken.new(offset) #end rhs token
|
1340
|
+
else
|
1341
|
+
result.concat abort_noparens_for_rescue!(str)
|
1342
|
+
end
|
1343
|
+
result.push tok
|
1278
1344
|
else
|
1279
1345
|
result.push KwParamListStartToken.new(offset+str.length)
|
1280
1346
|
#corresponding EndToken emitted by abort_noparens! on leaving rescue context
|
@@ -1349,12 +1415,31 @@ private
|
|
1349
1415
|
return result
|
1350
1416
|
end
|
1351
1417
|
|
1418
|
+
def keyword___FILE__(str,offset,result)
|
1419
|
+
result.last.value=@filename
|
1420
|
+
return result
|
1421
|
+
end
|
1422
|
+
|
1423
|
+
def keyword___LINE__(str,offset,result)
|
1424
|
+
result.last.value=@linenum
|
1425
|
+
return result
|
1426
|
+
end
|
1427
|
+
|
1428
|
+
module RubyLexer1_9
|
1429
|
+
def keyword___ENCODING__(str,offset,result)
|
1430
|
+
#result.last.value=huh
|
1431
|
+
return result
|
1432
|
+
end
|
1433
|
+
|
1434
|
+
def keyword_not(*args,&block) _keyword_funclike(*args,&block) end
|
1435
|
+
end
|
1352
1436
|
|
1353
1437
|
def _keyword_funclike(str,offset,result)
|
1354
1438
|
if @last_operative_token===/^(\.|::)$/
|
1355
1439
|
result=yield MethNameToken.new(str) #should pass a methname token here
|
1356
1440
|
else
|
1357
|
-
|
1441
|
+
tok=KeywordToken.new(str)
|
1442
|
+
result=yield tok,tok
|
1358
1443
|
end
|
1359
1444
|
return result
|
1360
1445
|
end
|
@@ -1366,10 +1451,12 @@ private
|
|
1366
1451
|
#do nothing
|
1367
1452
|
return result
|
1368
1453
|
end
|
1369
|
-
for kw in VARLIKE_KEYWORDLIST+["defined?", "not"] do
|
1454
|
+
for kw in VARLIKE_KEYWORDLIST-["__FILE__","__LINE__"]+["defined?", "not"] do
|
1370
1455
|
alias_method "keyword_#{kw}".to_sym, :_keyword_varlike
|
1371
1456
|
end
|
1372
1457
|
|
1458
|
+
|
1459
|
+
|
1373
1460
|
private
|
1374
1461
|
|
1375
1462
|
#-----------------------------------
|
@@ -1453,6 +1540,7 @@ end
|
|
1453
1540
|
elsif starter==?(
|
1454
1541
|
ctx_type=UnparenedParamListLhsContext #hacky... should be a param?
|
1455
1542
|
@parsestack.push ctx_type.new(@linenum)
|
1543
|
+
a<<KwParamListStartToken.new( input_position )
|
1456
1544
|
end
|
1457
1545
|
|
1458
1546
|
set_last_token KeywordToken.new( ';' )
|
@@ -1493,16 +1581,45 @@ end
|
|
1493
1581
|
#parsestack was changed by get1token above...
|
1494
1582
|
normal_comma_level+=1
|
1495
1583
|
assert(normal_comma_level==@parsestack.size)
|
1496
|
-
endingblock=proc{|
|
1584
|
+
endingblock=proc{|tok2| tok2===')' }
|
1497
1585
|
else
|
1498
|
-
endingblock=proc{|
|
1586
|
+
endingblock=proc{|tok2| tok2===';' or NewlineToken===tok2}
|
1499
1587
|
end
|
1500
1588
|
class << endingblock
|
1501
1589
|
alias === call
|
1502
1590
|
end
|
1503
1591
|
|
1592
|
+
listend=method_parameters(result,normal_comma_level,endingblock,old_parsestack_size)
|
1593
|
+
|
1594
|
+
@defining_lvar=false
|
1595
|
+
@parsestack.last.see self,:semi
|
1596
|
+
|
1597
|
+
assert(@parsestack.size <= old_parsestack_size)
|
1598
|
+
|
1599
|
+
#hack: force next token to look like start of a
|
1600
|
+
#new stmt, if the last ignored_tokens
|
1601
|
+
#call above did not find a newline
|
1602
|
+
#(just in case the next token parsed
|
1603
|
+
#happens to call quote_expected? or after_nonid_op)
|
1604
|
+
result.concat ignored_tokens
|
1605
|
+
# if !eof? and nextchar.chr[/[iuw\/<|>+\-*&%?:({]/] and
|
1606
|
+
# !(NewlineToken===@last_operative_token) and
|
1607
|
+
# !(/^(end|;)$/===@last_operative_token)
|
1608
|
+
#result<<EndHeaderToken.new(result.last.offset+result.last.to_s.size)
|
1609
|
+
set_last_token KeywordToken.new( ';' )
|
1610
|
+
result<< get1token
|
1611
|
+
# end
|
1612
|
+
}
|
1613
|
+
|
1614
|
+
return result,listend
|
1615
|
+
end
|
1616
|
+
|
1617
|
+
|
1618
|
+
#-----------------------------------
|
1619
|
+
#read local parameter names in method definition
|
1620
|
+
def method_parameters(result,normal_comma_level,endingblock,old_parsestack_size)
|
1621
|
+
listend=nil
|
1504
1622
|
set_last_token KeywordToken.new( ',' )#hack
|
1505
|
-
#read local parameter names
|
1506
1623
|
nextvar=nil
|
1507
1624
|
loop do
|
1508
1625
|
expect_name=(@last_operative_token===',' and
|
@@ -1533,7 +1650,7 @@ end
|
|
1533
1650
|
# assert !nextvar
|
1534
1651
|
nextvar=tok.ident
|
1535
1652
|
localvars[nextvar]=false #remove nextvar from list of local vars for now
|
1536
|
-
when /^[&*]$/.token_pat #unary form...
|
1653
|
+
when /^[&*(]$/.token_pat #unary form...
|
1537
1654
|
#a NoWsToken is also expected... read it now
|
1538
1655
|
result.concat maybe_no_ws_token #not needed?
|
1539
1656
|
set_last_token KeywordToken.new( ',' )
|
@@ -1553,32 +1670,9 @@ end
|
|
1553
1670
|
end
|
1554
1671
|
end
|
1555
1672
|
end
|
1556
|
-
|
1557
|
-
@defining_lvar=false
|
1558
|
-
@parsestack.last.see self,:semi
|
1559
|
-
|
1560
|
-
assert(@parsestack.size <= old_parsestack_size)
|
1561
|
-
assert(endingblock[tok] || ErrorToken===tok)
|
1562
|
-
|
1563
|
-
#hack: force next token to look like start of a
|
1564
|
-
#new stmt, if the last ignored_tokens
|
1565
|
-
#call above did not find a newline
|
1566
|
-
#(just in case the next token parsed
|
1567
|
-
#happens to call quote_expected? or after_nonid_op)
|
1568
|
-
result.concat ignored_tokens
|
1569
|
-
# if !eof? and nextchar.chr[/[iuw\/<|>+\-*&%?:({]/] and
|
1570
|
-
# !(NewlineToken===@last_operative_token) and
|
1571
|
-
# !(/^(end|;)$/===@last_operative_token)
|
1572
|
-
#result<<EndHeaderToken.new(result.last.offset+result.last.to_s.size)
|
1573
|
-
set_last_token KeywordToken.new( ';' )
|
1574
|
-
result<< get1token
|
1575
|
-
# end
|
1576
|
-
}
|
1577
|
-
|
1578
|
-
return result,listend
|
1673
|
+
return listend
|
1579
1674
|
end
|
1580
1675
|
|
1581
|
-
|
1582
1676
|
#-----------------------------------
|
1583
1677
|
#handle % in ruby code. is it part of fancy quote or a modulo operator?
|
1584
1678
|
def percent(ch)
|
@@ -1630,7 +1724,13 @@ end
|
|
1630
1724
|
def char_literal_or_op(ch)
|
1631
1725
|
if colon_quote_expected? ch
|
1632
1726
|
getchar
|
1633
|
-
|
1727
|
+
if @rubyversion >= 1.9
|
1728
|
+
StringToken.new getchar_maybe_escape
|
1729
|
+
else
|
1730
|
+
ch=getchar_maybe_escape[0]
|
1731
|
+
ch=ch.ord if ch.respond_to? :ord
|
1732
|
+
NumberToken.new ch
|
1733
|
+
end
|
1634
1734
|
else
|
1635
1735
|
@parsestack.push TernaryContext.new(@linenum)
|
1636
1736
|
KeywordToken.new getchar #operator
|
@@ -1825,7 +1925,7 @@ end
|
|
1825
1925
|
context=merge_assignment_op_in_setter_callsites? ? ?: : nc
|
1826
1926
|
return [identifier_as_string(context), start]
|
1827
1927
|
when ?(
|
1828
|
-
return [nil,start] if @enable_macro
|
1928
|
+
return [nil,start] if @enable_macro or @rubyversion>=1.9
|
1829
1929
|
end
|
1830
1930
|
|
1831
1931
|
set_last_token KeywordToken.new(';')
|
@@ -1853,7 +1953,7 @@ end
|
|
1853
1953
|
|
1854
1954
|
res= HerePlaceholderToken.new( dash, quote, ender, quote_real )
|
1855
1955
|
if true
|
1856
|
-
res.open=["<<",dash,quote,ender,quote].
|
1956
|
+
res.open=["<<",dash,quote,ender,quote].join
|
1857
1957
|
procrastinated=til_charset(/[\n]/)#+readnl
|
1858
1958
|
unless @base_file
|
1859
1959
|
@base_file=@file
|
@@ -1979,7 +2079,7 @@ end
|
|
1979
2079
|
@offset_adjust=@min_offset_adjust
|
1980
2080
|
@moretokens.push( *optional_here_bodies )
|
1981
2081
|
ln=@linenum
|
1982
|
-
@moretokens.push lexerror(EscNlToken.new(
|
2082
|
+
@moretokens.push lexerror(EscNlToken.new(result,input_position-result.size,@filename,ln-1), error),
|
1983
2083
|
FileAndLineToken.new(@filename,ln,input_position)
|
1984
2084
|
|
1985
2085
|
start_of_line_directives
|
@@ -1995,7 +2095,7 @@ if true
|
|
1995
2095
|
pos=input_position
|
1996
2096
|
while body=@pending_here_bodies.shift
|
1997
2097
|
#body.offset=pos
|
1998
|
-
result.push EscNlToken.new(
|
2098
|
+
result.push EscNlToken.new("\n",body.offset-1,@filename,nil)
|
1999
2099
|
result.push FileAndLineToken.new(@filename,body.ident.line,body.offset)
|
2000
2100
|
result.push body
|
2001
2101
|
#result.push NoWsToken.new @pending_here_bodies.empty? ? input_position : @pending_here_bodies.first
|
@@ -2146,25 +2246,25 @@ end
|
|
2146
2246
|
!@last_operative_token.infix?) ||
|
2147
2247
|
!after_nonid_op?{false}
|
2148
2248
|
|
2149
|
-
hard=false if @rubyversion>=1.9 and @file.check /\A\n(?:#@@WSTOKS)
|
2249
|
+
hard=false if @rubyversion>=1.9 and @file.check( /\A\n(?:#@@WSTOKS)?[.:][^.:]/o )
|
2150
2250
|
|
2151
2251
|
if hard
|
2152
2252
|
@offset_adjust=@min_offset_adjust
|
2153
2253
|
a= abort_noparens!
|
2154
2254
|
case @parsestack.last #these should be in the see:semi handler
|
2155
|
-
when ExpectDoOrNlContext
|
2156
|
-
when ExpectThenOrNlContext
|
2255
|
+
when ExpectDoOrNlContext; @parsestack.pop
|
2256
|
+
when ExpectThenOrNlContext; @parsestack.pop
|
2157
2257
|
end
|
2158
2258
|
assert !@parsestack.empty?
|
2159
2259
|
@parsestack.last.see self,:semi
|
2160
2260
|
|
2161
|
-
a <<
|
2261
|
+
a << rulexer_newline(ch)
|
2162
2262
|
@moretokens.replace a+@moretokens
|
2163
2263
|
else
|
2164
2264
|
@offset_adjust=@min_offset_adjust
|
2165
2265
|
offset= input_position
|
2166
2266
|
nl=readnl
|
2167
|
-
@moretokens.push EscNlToken.new(
|
2267
|
+
@moretokens.push EscNlToken.new(nl,offset,@filename,@linenum-1),
|
2168
2268
|
FileAndLineToken.new(@filename,@linenum,input_position)
|
2169
2269
|
end
|
2170
2270
|
|
@@ -2222,15 +2322,15 @@ end
|
|
2222
2322
|
|
2223
2323
|
begin
|
2224
2324
|
eof? and raise "eof before =end"
|
2225
|
-
more<< til_charset(
|
2325
|
+
more<< til_charset(/\n/)
|
2226
2326
|
eof? and raise "eof before =end"
|
2227
2327
|
more<< readnl
|
2228
2328
|
end until readahead(EQENDLENGTH)==EQEND
|
2229
2329
|
|
2230
2330
|
#read rest of line after =end
|
2231
|
-
more << til_charset(
|
2232
|
-
assert((eof? or ?\
|
2233
|
-
assert !(
|
2331
|
+
more << til_charset(/\n/)
|
2332
|
+
assert((eof? or ?\n===nextchar))
|
2333
|
+
assert !(/\n/===more[-1,1])
|
2234
2334
|
more<< readnl unless eof?
|
2235
2335
|
|
2236
2336
|
# newls= more.scan(/\r\n?|\n\r?/)
|
@@ -2311,8 +2411,8 @@ end
|
|
2311
2411
|
return yield
|
2312
2412
|
when StringToken, SymbolToken, NumberToken, HerePlaceholderToken,
|
2313
2413
|
%r{^(
|
2314
|
-
end|self|true|false|nil
|
2315
|
-
__FILE__|__LINE__|[\})\]]
|
2414
|
+
end|self|true|false|nil|->|
|
2415
|
+
__FILE__|__LINE__|__ENCODING__|[\})\]]
|
2316
2416
|
)$}x.token_pat
|
2317
2417
|
#dunno about def/undef
|
2318
2418
|
#maybe class/module shouldn't he here either?
|
@@ -2399,7 +2499,7 @@ end
|
|
2399
2499
|
result= operator_or_methname_token( result)
|
2400
2500
|
result.offset=oldpos
|
2401
2501
|
return result
|
2402
|
-
|
2502
|
+
end
|
2403
2503
|
|
2404
2504
|
#-----------------------------------
|
2405
2505
|
def tilde(ch) #match ~
|
@@ -2426,20 +2526,22 @@ end
|
|
2426
2526
|
#could be beginning of number, too
|
2427
2527
|
#fixme: handle +@ and -@ here as well... (currently, this is done in symbol()?)
|
2428
2528
|
def plusminus(ch)
|
2529
|
+
pos=input_position
|
2429
2530
|
assert(/^[+\-]$/===ch)
|
2430
2531
|
if unary_op_expected?(ch) or
|
2431
2532
|
KeywordToken===@last_operative_token &&
|
2432
2533
|
/^(return|break|next)$/===@last_operative_token.ident
|
2433
2534
|
if (?0..?9)===readahead(2)[1]
|
2434
|
-
|
2535
|
+
result= number(ch)
|
2435
2536
|
elsif @rubyversion>=1.9 and '->' == readahead(2) #stabby proc
|
2537
|
+
@file.pos+=2
|
2436
2538
|
#push down block context
|
2437
2539
|
localvars.start_block
|
2438
2540
|
@parsestack.push ctx=BlockContext.new(@linenum)
|
2439
2541
|
ctx.wanting_stabby_block_body=true
|
2440
2542
|
#read optional proc params
|
2441
2543
|
block_param_list_lookahead ?(, ParenedParamListLhsContext
|
2442
|
-
|
2544
|
+
result=KeywordToken.new('->',pos)
|
2443
2545
|
|
2444
2546
|
else #unary operator
|
2445
2547
|
result=getchar
|
@@ -2456,6 +2558,7 @@ end
|
|
2456
2558
|
end
|
2457
2559
|
result=(operator_or_methname_token result)
|
2458
2560
|
end
|
2561
|
+
result.offset=pos
|
2459
2562
|
return result
|
2460
2563
|
end
|
2461
2564
|
|
@@ -2485,14 +2588,16 @@ end
|
|
2485
2588
|
#ruby delays adding lvars from regexps to known lvars table
|
2486
2589
|
#for several tokens in some cases. not sure why or if on purpose
|
2487
2590
|
#i'm just going to add them right away
|
2488
|
-
|
2591
|
+
last.lvars.each{|lvar| localvars[lvar]=true }
|
2489
2592
|
end
|
2490
2593
|
when '' #plain assignment: record local variable definitions
|
2491
2594
|
last_context_not_implicit.lhs=false
|
2595
|
+
@last_operative_token=result
|
2492
2596
|
@moretokens.push( *ignored_tokens(true).map{|x|
|
2493
|
-
NewlineToken===x ? EscNlToken.new(
|
2597
|
+
NewlineToken===x ? EscNlToken.new(x.ident,x.offset,@filename,@linenum) : x
|
2494
2598
|
} )
|
2495
2599
|
@parsestack.push AssignmentRhsContext.new(@linenum)
|
2600
|
+
@moretokens.push AssignmentRhsListStartToken.new( input_position)
|
2496
2601
|
if eat_next_if ?*
|
2497
2602
|
tok=OperatorToken.new('*', input_position-1)
|
2498
2603
|
tok.tag=:unary
|
@@ -2501,7 +2606,6 @@ end
|
|
2501
2606
|
@moretokens << NoWsToken.new(input_position)
|
2502
2607
|
comma_in_lvalue_list? #is this needed?
|
2503
2608
|
end
|
2504
|
-
@moretokens.push AssignmentRhsListStartToken.new( input_position)
|
2505
2609
|
end
|
2506
2610
|
return result
|
2507
2611
|
end
|
@@ -2513,13 +2617,15 @@ end
|
|
2513
2617
|
k=eat_next_if(/[~=]/)
|
2514
2618
|
if k
|
2515
2619
|
result+=k
|
2516
|
-
elsif eof
|
2620
|
+
elsif eof? or WHSPLF[nextchar.chr] #do nothing
|
2517
2621
|
else
|
2518
|
-
|
2519
|
-
@moretokens << NoWsToken.new(input_position)
|
2622
|
+
@moretokens << NoWsToken.new(input_position)
|
2520
2623
|
end
|
2521
|
-
|
2522
|
-
|
2624
|
+
ty= @rubyversion>=1.9 ? OperatorToken : KeywordToken
|
2625
|
+
result=ty.new(result, input_position-result.size)
|
2626
|
+
result.unary=!k #result should distinguish unary !
|
2627
|
+
|
2628
|
+
return result
|
2523
2629
|
end
|
2524
2630
|
|
2525
2631
|
|
@@ -2565,7 +2671,7 @@ if false
|
|
2565
2671
|
def comment(str)
|
2566
2672
|
result=""
|
2567
2673
|
#loop{
|
2568
|
-
result<<
|
2674
|
+
result<< rulexer_comment(nil).to_s
|
2569
2675
|
|
2570
2676
|
if /^\#.*\#$/===result #if comment was ended by a crunch
|
2571
2677
|
|
@@ -2645,20 +2751,41 @@ end
|
|
2645
2751
|
when '{'
|
2646
2752
|
#check if we are in a hash literal or string inclusion (#{}),
|
2647
2753
|
#in which case below would be bad.
|
2648
|
-
if
|
2754
|
+
if !(UnparenedParamListLhsContext===@parsestack.last) and
|
2755
|
+
after_nonid_op?{false} || @last_operative_token.has_no_block?
|
2649
2756
|
@parsestack.push ListImmedContext.new(ch,@linenum) #that is, a hash
|
2650
2757
|
else
|
2651
2758
|
#abort_noparens!
|
2652
2759
|
tokch.set_infix!
|
2653
2760
|
tokch.as="do"
|
2654
|
-
|
2761
|
+
|
2762
|
+
#if (perhaps deep) inside a stabby block param list context, end it
|
2763
|
+
if @rubyversion>=1.9
|
2764
|
+
stabby_params_just_ended=false
|
2765
|
+
(@parsestack.size-1).downto(1){|i|
|
2766
|
+
case @parsestack[i]
|
2767
|
+
when ParamListContextNoParen,AssignmentRhsContext
|
2768
|
+
#do nothing yet... see if inside a UnparenedParamListLhsContext
|
2769
|
+
when UnparenedParamListLhsContext #stabby proc
|
2770
|
+
@moretokens<<tokch
|
2771
|
+
(@parsestack.size-1).downto(i){|j|
|
2772
|
+
@moretokens.unshift @parsestack[j].endtoken(input_position-1)
|
2773
|
+
}
|
2774
|
+
@parsestack[i..-1]=[]
|
2775
|
+
tokch=@moretokens.shift
|
2776
|
+
stabby_params_just_ended=true
|
2777
|
+
break
|
2778
|
+
else break
|
2779
|
+
end
|
2780
|
+
}
|
2781
|
+
end
|
2782
|
+
|
2655
2783
|
# 'need to find matching callsite context and end it if implicit'
|
2656
2784
|
lasttok=last_operative_token
|
2657
|
-
if !(lasttok===')' and lasttok.callsite?) #or ParamListContextNoParen===parsestack.last
|
2785
|
+
if !(lasttok===')' and lasttok.callsite?) and !stabby_params_just_ended #or ParamListContextNoParen===parsestack.last
|
2658
2786
|
@moretokens.push( *(abort_1_noparen!(1).push tokch) )
|
2659
2787
|
tokch=@moretokens.shift
|
2660
2788
|
end
|
2661
|
-
#=end
|
2662
2789
|
|
2663
2790
|
if BlockContext===@parsestack.last and @parsestack.last.wanting_stabby_block_body
|
2664
2791
|
@parsestack.last.wanting_stabby_block_body=false
|
@@ -2719,7 +2846,7 @@ end
|
|
2719
2846
|
|
2720
2847
|
#-----------------------------------
|
2721
2848
|
def endoffile_detected(s='')
|
2722
|
-
@moretokens.push( *(abort_noparens!.push
|
2849
|
+
@moretokens.push( *(abort_noparens!.push rulexer_endoffile_detected(s)))
|
2723
2850
|
if @progress_thread
|
2724
2851
|
@progress_thread.kill
|
2725
2852
|
@progress_thread=nil
|
@@ -2731,32 +2858,37 @@ end
|
|
2731
2858
|
|
2732
2859
|
#-----------------------------------
|
2733
2860
|
def single_char_token(ch)
|
2734
|
-
KeywordToken.new
|
2861
|
+
KeywordToken.new rulexer_single_char_token(ch), input_position-1
|
2735
2862
|
end
|
2736
2863
|
|
2737
2864
|
#-----------------------------------
|
2738
2865
|
def comma(ch)
|
2739
2866
|
@moretokens.push token=single_char_token(ch)
|
2740
2867
|
|
2741
|
-
|
2742
|
-
|
2743
|
-
|
2744
|
-
|
2868
|
+
case @parsestack[-1]
|
2869
|
+
when AssignmentRhsContext;
|
2870
|
+
token.tag=:rhs
|
2871
|
+
#if assignment rhs seen inside method param list, when param list,
|
2872
|
+
# array or hash literal, rescue where comma is expected, method def param list,
|
2873
|
+
# or another right hand side
|
2874
|
+
# then end the assignment rhs now
|
2875
|
+
#+[OBS,ParamListContext|ParamListContextNoParen|WhenParamListContext|ListImmedContext|AssignmentRhsContext|
|
2745
2876
|
# (RescueSMContext&-{:state=>:rescue})|(DefContext&-{:in_body=>FalseClass|nil}),
|
2746
2877
|
# AssignmentRhsContext
|
2747
2878
|
#]===@parsestack
|
2748
|
-
|
2749
|
-
|
2750
|
-
|
2751
|
-
|
2752
|
-
|
2753
|
-
|
2754
|
-
|
2879
|
+
while AssignmentRhsContext===@parsestack[-1]
|
2880
|
+
pop=
|
2881
|
+
case @parsestack[-2]
|
2882
|
+
when ParamListContext,ParamListContextNoParen,WhenParamListContext,
|
2883
|
+
ListImmedContext,AssignmentRhsContext; true
|
2884
|
+
when RescueSMContext; @parsestack[-2].state==:rescue
|
2885
|
+
when DefContext; !@parsestack[-2].in_body and !@parsestack[-2].has_parens?
|
2886
|
+
else false
|
2887
|
+
end
|
2888
|
+
break unless pop
|
2755
2889
|
@parsestack.pop
|
2756
|
-
@moretokens.unshift AssignmentRhsListEndToken.new(input_position)
|
2757
|
-
|
2758
|
-
case @parsestack[-1]
|
2759
|
-
when AssignmentRhsContext; token.tag=:rhs
|
2890
|
+
@moretokens.unshift AssignmentRhsListEndToken.new(input_position-1)
|
2891
|
+
end
|
2760
2892
|
when ParamListContext,ParamListContextNoParen; #:call
|
2761
2893
|
when ListImmedContext; #:array
|
2762
2894
|
when BlockParamListLhsContext,UnparenedParamListLhsContext; #:block or stabby proc
|
@@ -2800,7 +2932,7 @@ end
|
|
2800
2932
|
#-----------------------------------
|
2801
2933
|
#tokenify_results_of :identifier
|
2802
2934
|
save_offsets_in(*CHARMAPPINGS.values.uniq-[
|
2803
|
-
:symbol_or_op,:open_brace,:whitespace,:exclam,:backquote,:caret
|
2935
|
+
:symbol_or_op,:open_brace,:whitespace,:exclam,:backquote,:caret,:plusminus
|
2804
2936
|
])
|
2805
2937
|
#save_offsets_in :symbol
|
2806
2938
|
|