rubylexer 0.7.7 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -0
- data/History.txt +64 -0
- data/Makefile +2 -2
- data/README.txt +13 -9
- data/bin/rubylexer +113 -0
- data/lib/assert.rb +1 -1
- data/lib/rubylexer.rb +856 -305
- data/lib/rubylexer/charhandler.rb +1 -1
- data/lib/rubylexer/charset.rb +15 -7
- data/lib/rubylexer/context.rb +10 -2
- data/lib/rubylexer/lextable.rb +1 -0
- data/lib/rubylexer/rubycode.rb +1 -1
- data/lib/rubylexer/rulexer.rb +106 -32
- data/lib/rubylexer/symboltable.rb +1 -1
- data/lib/rubylexer/test/oneliners.rb +15 -5
- data/lib/rubylexer/test/oneliners_1.9.rb +116 -92
- data/lib/rubylexer/test/stanzas.rb +49 -27
- data/lib/rubylexer/test/testcases.rb +2 -2
- data/lib/rubylexer/token.rb +153 -23
- data/lib/rubylexer/tokenprinter.rb +9 -6
- data/lib/rubylexer/version.rb +1 -1
- data/rubylexer.gemspec +12 -8
- data/test/bad/ruby_lexer.rb +7 -0
- data/test/code/deletewarns.rb +1 -1
- data/test/code/dumptokens.rb +1 -81
- data/test/code/heredoc_blast_test.rb +112 -0
- data/test/code/locatetest.rb +1 -1
- data/test/code/regression.rb +23 -23
- data/test/code/rubylexervsruby.rb +59 -12
- data/test/code/tokentest.rb +62 -52
- data/test/data/23.rb +0 -1
- data/test/data/g.rb +0 -1
- data/test/data/heremonsters.rb +1 -1
- data/test/data/heremonsters_dos.rb +1 -1
- data/test/data/pre.rb +0 -1
- data/test/data/pre.unix.rb +0 -1
- data/test/data/putstext.rb +4 -0
- data/test/data/regtest.rb +0 -1
- data/test/data/stuffydog.rb +5 -0
- data/test/data/stuffydog2.rb +5 -0
- data/test/data/wsdlDriver.rb +0 -1
- data/test/test.sh +1 -1
- data/test/test_all.rb +3 -0
- data/test/test_bad_rubylexer.rb +16 -0
- data/test/test_rubylexer_bad.rb +12 -0
- data/testing.txt +40 -20
- metadata +51 -38
checksums.yaml
ADDED
@@ -0,0 +1,4 @@
|
|
1
|
+
---
|
2
|
+
SHA512:
|
3
|
+
metadata.gz: 3c70096f3a29e480ba50326069d9340b544d6117c048ba4593efce89e1a1f286a846b3a2d432fdc08da18890287170109162f2bbfac14fedaf16542b5310cca0
|
4
|
+
data.tar.gz: 85086987839b9a114cb826ca99490eb566a81d4a518e692a47781537c5b1706cad8fb6076b274e7eabfd9a8e85e884ba09022210c54de48cd229d5d2d346ffe9
|
data/History.txt
CHANGED
@@ -1,3 +1,67 @@
|
|
1
|
+
=== 0.8.0/11may2016
|
2
|
+
* 3 major enhancements:
|
3
|
+
* new framework for extending the lexer using modules:
|
4
|
+
* moved ruby 1.9 lexing logic into a separate module
|
5
|
+
* moved most macro-specific lexing logic to a separate module in rubymacros
|
6
|
+
* support for non-ascii encoding:
|
7
|
+
* support ascii, binary, utf-8, and euc-* encodings in 1.9 mode
|
8
|
+
* 1.8 mode allows binary encoding only
|
9
|
+
* \uXXXX character escapes in 1.9 mode strings (and char lits)
|
10
|
+
* which can turn a string into utf-8 even in non-utf-8 sources
|
11
|
+
* support for the encoding line:
|
12
|
+
* encoding line comes out as a separate token
|
13
|
+
* Theres now a ShebangToken as well as the EncodingDeclToken
|
14
|
+
* reading of encoding in -K option in shebang line improved
|
15
|
+
* utf8 bom overrides all later encoding decls
|
16
|
+
|
17
|
+
* 8 minor improvements:
|
18
|
+
* in gemspec, find files relative to __FILE__ instead of pwd
|
19
|
+
* there's now a rubylexer binary; works like the old dumptokens.rb
|
20
|
+
* improved test coverage generally
|
21
|
+
* defend RubyLexer against being defined by anyone else (_ahem_)
|
22
|
+
* friendlier inspect
|
23
|
+
* using my own definition of whitespace instead of \s
|
24
|
+
* api changes to help redparse out:
|
25
|
+
* __ keywords get assigned a value
|
26
|
+
* added RubyLexer#unshift: to force tokens back on lexer input
|
27
|
+
|
28
|
+
* 33 minor bugfixes:
|
29
|
+
* fixed position attributes of tokens in some cases
|
30
|
+
* use more noncapturing groups to avoid backref strangeness later
|
31
|
+
* leave trailing nl (if any) at end of heredoc on input
|
32
|
+
* emit saved-up here bodies before eof
|
33
|
+
* emit right num of parens after unary * & after def and before param list
|
34
|
+
* escaped newline token shouldnt have nl unless one was seen in input
|
35
|
+
* fixed multi-assigns in string inclusions
|
36
|
+
* premature eof in obscure places caused inf loop
|
37
|
+
* corrected handling for do inside of assignment inside method param list
|
38
|
+
* whitespace should never include trailing newline
|
39
|
+
* better detection of ! and = at end of identifiers
|
40
|
+
* disallow allow newline around :: in module header
|
41
|
+
* cr no longer ends comments
|
42
|
+
* !, !=, !~ should always be operator tokens, even in 1.8 mode
|
43
|
+
* .. and ... should be operator tokens
|
44
|
+
* fixes to unlexer:
|
45
|
+
* append newline when unlexing here doc, but only if it had none already
|
46
|
+
* improve formatting of dumptokens output when str inclusions are present
|
47
|
+
* fixed unlexing of char constants when char is space or non-glyph
|
48
|
+
* bugfixes in 1.9-mode lexing:
|
49
|
+
* don't make multiassign in block params (directly or nested)
|
50
|
+
* recognize lvars after ; in method and block param lists
|
51
|
+
* recognize lvars in block param list better
|
52
|
+
* 1.9 keywords correctly recognized and procesed
|
53
|
+
* char literals in 1.9 mode are more like strings than numbers now
|
54
|
+
* -> is considered an operator rather than value kw now
|
55
|
+
* use ImplicitParamListStart/EndToken instead of KwParamListStart/EndToken for ->'s param list
|
56
|
+
* the only chars at end which force an ident to be a method are now ?!=
|
57
|
+
* recognize lvar after & or * in stabby block param list
|
58
|
+
* changes for 1.9 compatibility:
|
59
|
+
* eliminating 1.9 warnings generally
|
60
|
+
* avoiding Array#to_s in 1.9 (sigh)
|
61
|
+
* keep Token#inspect working in 1.9
|
62
|
+
* fix CharSet#=== for 1.9 (again?)
|
63
|
+
* grope thru ruby -y output using grep instead of ruby to avoid 1.9 strangeness
|
64
|
+
|
1
65
|
=== 0.7.7/21dec2009
|
2
66
|
* 5 Major Enhancements:
|
3
67
|
* Got rid of the hacky RuLexer ancestor class. woo-hoo!
|
data/Makefile
CHANGED
@@ -11,7 +11,7 @@ filelist=$(shell git ls-files)
|
|
11
11
|
all: test
|
12
12
|
|
13
13
|
test:
|
14
|
-
ruby
|
14
|
+
RUBYLIB=lib ruby test/test_all.rb
|
15
15
|
|
16
16
|
docs:
|
17
17
|
rdoc lib/*
|
@@ -30,7 +30,7 @@ email: README.txt History.txt
|
|
30
30
|
ruby -e ' \
|
31
31
|
require "rubygems"; \
|
32
32
|
load "./$(lname).gemspec"; \
|
33
|
-
spec=
|
33
|
+
spec= @@the_gem; \
|
34
34
|
puts "\
|
35
35
|
Subject: [ANN] $(name) #{spec.version} Released \
|
36
36
|
\n\n$(name) version #{spec.version} has been released! \n\n\
|
data/README.txt
CHANGED
@@ -12,7 +12,8 @@ capability is included to give RubyLexer enough context to tokenize correctly
|
|
12
12
|
in all cases. (This turned out to be more parsing than I had thought or
|
13
13
|
wanted to take on at first.) RubyLexer handles the hard things like
|
14
14
|
complicated strings, the ambiguous nature of some punctuation characters and
|
15
|
-
keywords in ruby, and distinguishing methods and local variables.
|
15
|
+
keywords in ruby, and distinguishing methods and local variables. It should
|
16
|
+
be able to correctly lex 99.9+% of legal ruby code.
|
16
17
|
|
17
18
|
RubyLexer is not particularly clean code. As I progressed in writing this,
|
18
19
|
I've learned a little about how these things are supposed to be done; the
|
@@ -30,11 +31,9 @@ If you have any questions, comments, problems, new feature requests, or just
|
|
30
31
|
want to figure out how to make it work for what you need to do, contact me:
|
31
32
|
rubylexer _at_ inforadical _dot_ net
|
32
33
|
|
33
|
-
|
34
|
-
|
34
|
+
Bugs or problems with rubylexer should be submitted to the bug stream for
|
35
|
+
rubylexer's github project: http://github.com/coatl/rubylexer/bugs
|
35
36
|
|
36
|
-
(There aren't any bug filed against RubyLexer there yet, but don't be afraid
|
37
|
-
that your report will get lonely.)
|
38
37
|
|
39
38
|
==SYNOPSIS:
|
40
39
|
require "rubylexer.rb"
|
@@ -45,8 +44,10 @@ until EoiToken===(token=lexer.get1token)
|
|
45
44
|
end
|
46
45
|
|
47
46
|
== Status
|
48
|
-
RubyLexer can correctly lex all legal Ruby 1.8 code that I've been able to
|
49
|
-
find
|
47
|
+
RubyLexer can correctly lex all legal Ruby 1.8 and 1.9 code that I've been able to
|
48
|
+
find. (And I've found quite a bit.)
|
49
|
+
|
50
|
+
It can also handle (most of) my catalog of nasty
|
50
51
|
test cases (see below for known problems). Modulo some very obscure bugs,
|
51
52
|
RubyLexer can correctly distinguish these ambiguous uses of the following
|
52
53
|
operators, depending on context:
|
@@ -82,12 +83,15 @@ context not really preserved when entering or leaving string inclusions. this ca
|
|
82
83
|
-a number or problems, which had to be hacked around. it would be better to avoid
|
83
84
|
-tokens within tokens. (0.8)
|
84
85
|
string contents might not be correctly translated in a few cases (0.8?)
|
85
|
-
symbols which contain string interpolations are flattened into one token. eg :"foo#{bar}" (0.8)
|
86
86
|
'\r' whitespace sometimes seen in dos-formatted output.. shouldn't be (eg pre.rb) (0.7)
|
87
87
|
windows newline in source is likely to cause problems in obscure cases (need test case)
|
88
|
-
ruby 1.9 incompletely supported (0.9)
|
89
88
|
current character set is always forced to ascii-8bit. however, this mode should be
|
90
89
|
-compatible with texts written in regular ascii, utf-8, and euc. (among others?) (1.0)
|
91
90
|
regression test currently shows a few errors with differences in exact token ordering
|
92
91
|
-around string inclusions. these errors are much less serious than they seem.
|
93
92
|
offset of AssignmentRhsListEndToken appears to be off by 1
|
93
|
+
offset of Here document bodies appear to be off by 1 sometimes
|
94
|
+
newline inserted at eof in texts which end with heredoc but no nl
|
95
|
+
token offsets after here documents are now off
|
96
|
+
unlexing of here document body in the middle of an otherwise unsuspecting
|
97
|
+
-string lexes wrong. (still parses ok, tho, even so.)
|
data/bin/rubylexer
ADDED
@@ -0,0 +1,113 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
=begin legalia
|
3
|
+
rubylexer - a ruby lexer written in ruby
|
4
|
+
Copyright (C) 2004,2005,2008, 2011 Caleb Clausen
|
5
|
+
|
6
|
+
This library is free software; you can redistribute it and/or
|
7
|
+
modify it under the terms of the GNU Lesser General Public
|
8
|
+
License as published by the Free Software Foundation; either
|
9
|
+
version 2.1 of the License, or (at your option) any later version.
|
10
|
+
|
11
|
+
This library is distributed in the hope that it will be useful,
|
12
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
14
|
+
Lesser General Public License for more details.
|
15
|
+
|
16
|
+
You should have received a copy of the GNU Lesser General Public
|
17
|
+
License along with this library; if not, write to the Free Software
|
18
|
+
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
19
|
+
=end
|
20
|
+
|
21
|
+
$Debug=true
|
22
|
+
require 'rubylexer'
|
23
|
+
require 'getoptlong'
|
24
|
+
|
25
|
+
#def puts(x) end
|
26
|
+
|
27
|
+
#a Token#inspect that omits the object id
|
28
|
+
class RubyLexer
|
29
|
+
class Token
|
30
|
+
DONT_STRIFY=%w[@elems @string @headtok]
|
31
|
+
def strify
|
32
|
+
result=[self.class.name[/[^:]+$/],": ",instance_variables.sort.collect{|v|
|
33
|
+
[v,"=",instance_variable_get(v).inspect," "] unless DONT_STRIFY.include? v.to_s or "@ident"==v.to_s && HereBodyToken===self
|
34
|
+
}].join
|
35
|
+
if @elems
|
36
|
+
result=[result,*@elems.map{|x|
|
37
|
+
case x
|
38
|
+
when String; " "+x.inspect
|
39
|
+
else x.ident.map{|xx| xx.strify.gsub(/^/," ")}
|
40
|
+
end
|
41
|
+
}].join("\n")
|
42
|
+
end
|
43
|
+
if @string
|
44
|
+
result=[result,*@string.elems.map{|x|
|
45
|
+
case x
|
46
|
+
when String; " "+x.inspect
|
47
|
+
else x.ident.map{|xx| xx.strify.gsub(/^/," ")}
|
48
|
+
end
|
49
|
+
}].join("\n")
|
50
|
+
end
|
51
|
+
if @headtok
|
52
|
+
result=[result,*@headtok.string.elems.map{|x|
|
53
|
+
case x
|
54
|
+
when String; " "+x.inspect
|
55
|
+
else x.ident.map{|xx| xx.strify.gsub(/^/," ")}
|
56
|
+
end
|
57
|
+
}].join("\n")
|
58
|
+
end
|
59
|
+
result
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
name=macros=silent=file=nil
|
65
|
+
options={}
|
66
|
+
#allow -e
|
67
|
+
opts=GetoptLong.new(
|
68
|
+
["--eval", "-e", GetoptLong::REQUIRED_ARGUMENT],
|
69
|
+
["--silent", "-s", GetoptLong::NO_ARGUMENT],
|
70
|
+
["--macro", "-m", GetoptLong::NO_ARGUMENT],
|
71
|
+
["--ruby19", "--1.9", "-9", GetoptLong::NO_ARGUMENT]
|
72
|
+
)
|
73
|
+
opts.each{|opt,arg|
|
74
|
+
case opt
|
75
|
+
when '--eval'
|
76
|
+
file=arg
|
77
|
+
name='-e'
|
78
|
+
when '--silent'
|
79
|
+
silent=true
|
80
|
+
when '--macro'
|
81
|
+
macros=true
|
82
|
+
when '--ruby19'
|
83
|
+
options[:rubyversion]=1.9
|
84
|
+
end
|
85
|
+
}
|
86
|
+
|
87
|
+
#determine input file and its name if not already known
|
88
|
+
file||=if name=ARGV.first
|
89
|
+
File.open(name)
|
90
|
+
else
|
91
|
+
name='-'
|
92
|
+
$stdin.read
|
93
|
+
end
|
94
|
+
|
95
|
+
args=name, file
|
96
|
+
args.push 1,0,options unless options.empty?
|
97
|
+
lexer=RubyLexer.new(*args)
|
98
|
+
if macros
|
99
|
+
require 'macro'
|
100
|
+
lexer.enable_macros!
|
101
|
+
lexer.extend ::RubyLexer::MacroMixin
|
102
|
+
lexer.rubylexer_modules_init
|
103
|
+
end
|
104
|
+
if silent
|
105
|
+
until RubyLexer::EoiToken===(tok=lexer.get1token)
|
106
|
+
end
|
107
|
+
else
|
108
|
+
until RubyLexer::EoiToken===(tok=lexer.get1token)
|
109
|
+
puts tok.strify
|
110
|
+
#p [tok.startline, tok.endline]
|
111
|
+
end
|
112
|
+
end
|
113
|
+
puts tok.strify #print eoi token
|
data/lib/assert.rb
CHANGED
data/lib/rubylexer.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
|
+
#encoding: binary
|
1
2
|
=begin
|
2
3
|
rubylexer - a ruby lexer written in ruby
|
3
|
-
Copyright (C) 2004,2005,2008 Caleb Clausen
|
4
|
+
Copyright (C) 2004,2005,2008, 2011 Caleb Clausen
|
4
5
|
|
5
6
|
This library is free software; you can redistribute it and/or
|
6
7
|
modify it under the terms of the GNU Lesser General Public
|
@@ -17,7 +18,6 @@
|
|
17
18
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
18
19
|
=end
|
19
20
|
|
20
|
-
|
21
21
|
require 'rubylexer/rulexer' #must be 1st!!!
|
22
22
|
require 'rubylexer/version'
|
23
23
|
require 'rubylexer/token'
|
@@ -27,14 +27,20 @@ require 'rubylexer/symboltable'
|
|
27
27
|
require 'rubylexer/context'
|
28
28
|
require 'rubylexer/tokenprinter'
|
29
29
|
|
30
|
-
|
31
30
|
#-----------------------------------
|
32
31
|
class RubyLexer
|
33
32
|
include NestedContexts
|
34
|
-
|
35
33
|
|
34
|
+
#here's a list of other constants that should already be defined at this point:
|
35
|
+
[WHSP, VERSION, Token, CharSet, CharHandler, SymbolTable, SimpleTokenPrinter].each{|k| fail if k.nil? }
|
36
36
|
|
37
|
-
|
37
|
+
RUBYUNOPERATORS=%w{ +@ ~ ~@ -@ ! !@ }
|
38
|
+
RUBYBINOPERATORS=%w{ & | ^ / % == === =~ > >= >> < <= << <=> + - * ** }
|
39
|
+
RUBYCOMPOPERATORS=%w{== === =~ > >= < <= <=>}
|
40
|
+
RUBYSYMOPERATORS=RUBYUNOPERATORS+RUBYBINOPERATORS+%w{ [] []= }
|
41
|
+
RUBYNONSYMOPERATORS=%w{!= !~ = => :: ? : , ; . .. ... || && ||= &&=}+
|
42
|
+
(RUBYBINOPERATORS-RUBYCOMPOPERATORS).map{|op| op+'='}
|
43
|
+
RUBYSYMOPERATORREX=
|
38
44
|
%r{^([&|^/%]|=(==?)|=~|>[=>]?|<(<|=>?)?|[+~\-]@?|\*\*?|\[\]=?)}
|
39
45
|
# (nasty beastie, eh?)
|
40
46
|
#these are the overridable operators
|
@@ -42,39 +48,48 @@ class RubyLexer
|
|
42
48
|
#or op= ops like: += -= ||=
|
43
49
|
#or .. ... ?:
|
44
50
|
#for that use:
|
45
|
-
|
51
|
+
RUBYNONSYMOPERATORREX=
|
46
52
|
%r{^([%^/\-+|&]=|(\|\||&&)=?|(<<|>>|\*\*?)=|\.{1,3}|[?:,;]|::|=>?|![=~]?)$}
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
53
|
+
RUBYOPERATORREX=/#{RUBYSYMOPERATORREX}|#{RUBYNONSYMOPERATORREX}/o
|
54
|
+
UNSYMOPS=/^[~!]$/ #always unary
|
55
|
+
UBSYMOPS=/^(?:[*&+-]|::)$/ #ops that could be unary or binary
|
56
|
+
WHSPCHARS=WHSPLF+"\\#"
|
57
|
+
OPORBEGINWORDLIST=%w(if unless while until)
|
58
|
+
BEGINWORDLIST=%w(def class module begin for case do)+OPORBEGINWORDLIST
|
59
|
+
OPORBEGINWORDS="(?:#{OPORBEGINWORDLIST.join '|'})"
|
60
|
+
BEGINWORDS=/^(?:#{BEGINWORDLIST.join '|'})$/o
|
61
|
+
FUNCLIKE_KEYWORDLIST_1_9=%w[not]
|
62
|
+
FUNCLIKE_KEYWORDLIST=%w/break next redo return yield retry super BEGIN END/
|
63
|
+
FUNCLIKE_KEYWORDS=/^(?:#{FUNCLIKE_KEYWORDLIST.join '|'})$/
|
64
|
+
VARLIKE_KEYWORDLIST_1_9=%w[__ENCODING__]
|
65
|
+
VARLIKE_KEYWORDLIST=%w/__FILE__ __LINE__ false nil self true/
|
66
|
+
VARLIKE_KEYWORDS=/^(?:#{VARLIKE_KEYWORDLIST.join '|'})$/
|
67
|
+
attr_reader :FUNCLIKE_KEYWORDS, :VARLIKE_KEYWORDS
|
68
|
+
|
69
|
+
INNERBOUNDINGWORDLIST=%w"else elsif ensure in then rescue when"
|
70
|
+
INNERBOUNDINGWORDS="(?:#{INNERBOUNDINGWORDLIST.join '|'})"
|
71
|
+
BINOPWORDLIST=%w"and or"
|
72
|
+
BINOPWORDS="(?:#{BINOPWORDLIST.join '|'})"
|
63
73
|
|
64
|
-
|
65
|
-
^(alias|#{BINOPWORDS}|defined\?|not|undef|end|
|
74
|
+
RUBYKEYWORDS=%r{
|
75
|
+
^(?:alias|#{BINOPWORDS}|defined\?|not|undef|end|
|
66
76
|
#{VARLIKE_KEYWORDS}|#{FUNCLIKE_KEYWORDS}|
|
67
77
|
#{INNERBOUNDINGWORDS}|#{BEGINWORDS}
|
68
78
|
)$
|
69
79
|
}xo
|
80
|
+
RUBYKEYWORDLIST=%w{alias defined? not undef end}+
|
81
|
+
BINOPWORDLIST+
|
82
|
+
VARLIKE_KEYWORDLIST+FUNCLIKE_KEYWORDLIST+
|
83
|
+
INNERBOUNDINGWORDLIST+BEGINWORDLIST+
|
84
|
+
VARLIKE_KEYWORDLIST_1_9
|
70
85
|
#__END__ should not be in this set... its handled in start_of_line_directives
|
71
86
|
|
72
|
-
|
73
|
-
|
74
|
-
|
87
|
+
HIGHASCII=?\x80..?\xFF
|
88
|
+
NONASCII=HIGHASCII
|
89
|
+
#NONASCII=?\x80..?xFFFFFFFF #or is it 10FFFF, whatever the highest conceivable code point
|
75
90
|
|
76
91
|
|
77
|
-
|
92
|
+
CHARMAPPINGS = {
|
78
93
|
?$ => :dollar_identifier,
|
79
94
|
?@ => :at_identifier,
|
80
95
|
?a..?z => :identifier,
|
@@ -125,33 +140,33 @@ class RubyLexer
|
|
125
140
|
?\x0E..?\x19 => :illegal_char,
|
126
141
|
?\x1b..?\x1F => :illegal_char,
|
127
142
|
?\x7F => :illegal_char,
|
128
|
-
|
143
|
+
}
|
129
144
|
|
130
|
-
|
145
|
+
attr_reader :incomplete_here_tokens, :parsestack, :last_token_maybe_implicit
|
131
146
|
|
132
|
-
|
147
|
+
UCLETTER=@@UCLETTER="[A-Z]"
|
133
148
|
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
149
|
+
#cheaters way, treats utf chars as always 1 byte wide
|
150
|
+
#all high-bit chars are lowercase letters
|
151
|
+
#works, but strings compare with strict binary identity, not unicode collation
|
152
|
+
#works for euc too, I think
|
153
|
+
#(the ruby spec for utf8 support permits this interpretation)
|
154
|
+
LCLETTER=@@LCLETTER="[a-z_\x80-\xFF]"
|
155
|
+
LETTER=@@LETTER="[A-Za-z_\x80-\xFF]"
|
156
|
+
LETTER_DIGIT=@@LETTER_DIGIT="[A-Za-z_0-9\x80-\xFF]"
|
157
|
+
eval %w[UCLETTER LCLETTER LETTER LETTER_DIGIT].map{|n| "
|
143
158
|
def #{n}; #{n}; end
|
144
159
|
def self.#{n}; @@#{n}; end
|
145
160
|
"
|
146
|
-
|
161
|
+
}.join
|
147
162
|
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
163
|
+
NEVERSTARTPARAMLISTWORDS=/\A(#{OPORBEGINWORDS}|#{INNERBOUNDINGWORDS}|#{BINOPWORDS}|end)((?:(?!#@@LETTER_DIGIT).)|\Z)/om
|
164
|
+
if ?A.is_a? String #ruby >= 1.9
|
165
|
+
NEVERSTARTPARAMLISTFIRST=/[aoeitrwu]/
|
166
|
+
else
|
167
|
+
NEVERSTARTPARAMLISTFIRST=CharSet['aoeitrwu'] #chars that begin NEVERSTARTPARAMLIST
|
168
|
+
end
|
169
|
+
NEVERSTARTPARAMLISTMAXLEN=7 #max len of a NEVERSTARTPARAMLIST
|
155
170
|
|
156
171
|
=begin
|
157
172
|
require 'jcode'
|
@@ -163,7 +178,14 @@ class RubyLexer
|
|
163
178
|
|
164
179
|
#-----------------------------------
|
165
180
|
def initialize(filename,file,linenum=1,offset_adjust=0,options={})
|
166
|
-
|
181
|
+
if file.respond_to? :set_encoding
|
182
|
+
file.set_encoding 'binary'
|
183
|
+
elsif file.respond_to? :force_encoding
|
184
|
+
file=file.dup if file.frozen?
|
185
|
+
file.force_encoding 'binary'
|
186
|
+
end
|
187
|
+
|
188
|
+
@offset_adjust=@offset_adjust2=0 #set again in next line
|
167
189
|
rulexer_initialize(filename,file, linenum,offset_adjust)
|
168
190
|
@start_linenum=linenum
|
169
191
|
@parsestack=[TopLevelContext.new]
|
@@ -179,63 +201,141 @@ class RubyLexer
|
|
179
201
|
@progress_thread=nil
|
180
202
|
@rubyversion=options[:rubyversion]||1.8
|
181
203
|
@encoding=options[:encoding]||:detect
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
204
|
+
|
205
|
+
@always_binary_chars=CharSet['}]);|>,.=^']
|
206
|
+
@unary_or_binary_chars=CharSet['+-%/']
|
207
|
+
|
208
|
+
|
209
|
+
@FUNCLIKE_KEYWORDS=FUNCLIKE_KEYWORDS
|
210
|
+
@VARLIKE_KEYWORDS=VARLIKE_KEYWORDS
|
187
211
|
|
188
212
|
@toptable=CharHandler.new(self, :identifier, CHARMAPPINGS)
|
189
213
|
|
190
|
-
|
191
|
-
|
192
|
-
|
214
|
+
if @rubyversion>=1.9
|
215
|
+
extend RubyLexer1_9
|
216
|
+
end
|
217
|
+
rubylexer_modules_init
|
218
|
+
@method_operators=build_method_operators
|
219
|
+
if input_position.zero?
|
220
|
+
read_leading_encoding
|
221
|
+
@encoding=:binary if @rubyversion<=1.8
|
222
|
+
start_of_line_directives
|
223
|
+
end
|
193
224
|
progress_printer
|
194
225
|
end
|
195
226
|
|
196
|
-
|
197
|
-
|
227
|
+
def rubylexer_modules_init
|
228
|
+
|
229
|
+
end
|
230
|
+
|
231
|
+
alias dump inspect # preserve old inspect functionality
|
232
|
+
|
233
|
+
# irb friendly #inspect/#to_s
|
234
|
+
def to_s
|
235
|
+
mods=class<<self;self end.ancestors-self.class.ancestors
|
236
|
+
mods=mods.map{|mod| mod.name }.join('+')
|
237
|
+
mods="+"<<mods unless mods.empty?
|
238
|
+
"#<#{self.class.name}#{mods}: [#{@file.inspect}]>"
|
239
|
+
end
|
240
|
+
|
241
|
+
alias inspect to_s
|
242
|
+
|
198
243
|
|
199
|
-
|
200
|
-
|
244
|
+
def build_method_operators
|
245
|
+
/#{RUBYSYMOPERATORREX}|\A`/o
|
246
|
+
end
|
247
|
+
|
248
|
+
|
249
|
+
RAW_ENCODING_ALIASES={
|
250
|
+
#'utf-8'=>'utf8',
|
251
|
+
|
252
|
+
'ascii-8-bit'=>'binary',
|
253
|
+
'ascii-7-bit'=>'ascii',
|
201
254
|
'euc-jp'=>'euc',
|
202
255
|
|
203
|
-
'
|
204
|
-
'
|
205
|
-
'
|
256
|
+
'iso-8859-1'=>'binary',
|
257
|
+
'latin-1'=>'binary',
|
258
|
+
#'ascii8bit'=>'binary',
|
259
|
+
#'ascii7bit'=>'ascii',
|
260
|
+
#'eucjp'=>'euc',
|
206
261
|
|
207
262
|
'us-ascii'=>'ascii',
|
208
263
|
'shift-jis'=>'sjis',
|
209
264
|
|
210
265
|
'autodetect'=>'detect',
|
211
266
|
}
|
267
|
+
ENCODING_ALIASES=Hash[*RAW_ENCODING_ALIASES.map{|long,short| [long.tr_s('-_',''),short] }.flatten]
|
212
268
|
ENCODINGS=%w[ascii binary utf8 euc sjis]
|
269
|
+
NONWORKING_ENCODINGS=%w[sjis]
|
270
|
+
WSCHARS=@@WSCHARS= /[\s]/==="\v" ? '\s' : '\s\v' #same as WHSP
|
271
|
+
WSNONLCHARS=@@WSNONLCHARS=/(?!\n)[#@@WSCHARS]/o #same as WHSPLF
|
272
|
+
|
273
|
+
NOPARAMLONGOPTIONS=%w[copyright version verbose debug yydebug help]
|
274
|
+
PARAMLONGOPTIONS=%w[encoding dump]
|
275
|
+
DASHPARAMLONGOPTIONS=%w[enable disable]
|
276
|
+
NOPARAMOPTIONS="SacdhlnpsvwyU"
|
277
|
+
OCTALPARAMOPTIONS="0"
|
278
|
+
CHARPARAMOPTIONS="KTW"
|
279
|
+
PARAMSHORTOPTIONS="CXFIEeir"
|
280
|
+
MAYBEPARAMSHORTOPTIONS="x"
|
281
|
+
NEWIN1_9OPTIONS=%w[encoding dump enable disable X U W E]
|
282
|
+
LONGOPTIONS=/
|
283
|
+
--(#{NOPARAMLONGOPTIONS.join'|'})|
|
284
|
+
--(#{PARAMLONGOPTIONS.join'|'})(=|#@@WSNONLCHARS+)[^#@@WSCHARS]+|
|
285
|
+
--(#{DASHPARAMLONGOPTIONS.join'|'})-[^#@@WSCHARS]+
|
286
|
+
/ox
|
287
|
+
CHAINOPTIONS=/
|
288
|
+
[#{NOPARAMOPTIONS}]+|
|
289
|
+
[#{OCTALPARAMOPTIONS}][0-7]{1,3}|
|
290
|
+
[#{CHARPARAMOPTIONS}].
|
291
|
+
/ox
|
292
|
+
PARAMOPTIONS=/
|
293
|
+
[#{PARAMSHORTOPTIONS}]#@@WSNONLCHARS*[^#@@WSCHARS]+|
|
294
|
+
[#{MAYBEPARAMSHORTOPTIONS}]#@@WSNONLCHARS*[^#@@WSCHARS]*
|
295
|
+
/ox
|
296
|
+
OPTIONS=/
|
297
|
+
(#@@WSNONLCHARS*(
|
298
|
+
#{LONGOPTIONS} | --? |
|
299
|
+
-#{CHAINOPTIONS}*( #{PARAMOPTIONS} | #{CHAINOPTIONS} )
|
300
|
+
))*
|
301
|
+
/ox
|
302
|
+
|
213
303
|
def read_leading_encoding
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
304
|
+
@encoding=nil if @encoding==:detect
|
305
|
+
if enc=@file.scan( "\xEF\xBB\xBF" ) #bom
|
306
|
+
encpos=0
|
307
|
+
@encoding||=:utf8
|
308
|
+
elsif @file.skip( /\A#!/ )
|
309
|
+
lastpos=@file.pos
|
218
310
|
loop do
|
219
|
-
til_charset( /[
|
220
|
-
|
221
|
-
if @file.
|
222
|
-
|
223
|
-
|
224
|
-
when '
|
225
|
-
when '
|
311
|
+
til_charset( /[#@@WSCHARS]/o )
|
312
|
+
assert @file.pos > lastpos
|
313
|
+
break if @file.match( /^\n|#@@WSNONLCHARS([^-#@@WSCHARS])/o,4 )
|
314
|
+
if @file.skip( /.-#{CHAINOPTIONS}*K#@@WSNONLCHARS*([a-zA-Z0-9])/o )
|
315
|
+
case @file.last_match[1]
|
316
|
+
when 'u','U'; @encoding||=:utf8
|
317
|
+
when 'e','E'; @encoding||=:euc
|
318
|
+
when 's','S'; @encoding||=:sjis
|
226
319
|
end
|
320
|
+
elsif @file.skip( /.#{LONGOPTIONS}/o )
|
227
321
|
end
|
322
|
+
getchar
|
323
|
+
lastpos=@file.pos
|
228
324
|
end
|
229
325
|
til_charset( /[\n]/ )
|
326
|
+
@moretokens<<ShebangToken.new(@file[0...@file.pos])
|
327
|
+
pos=input_position
|
328
|
+
@moretokens<<EscNlToken.new(readnl,pos,@filename,2)
|
329
|
+
@moretokens<<FileAndLineToken.new(@filename,2,input_position)
|
230
330
|
end
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
331
|
+
encpos=input_position unless enc
|
332
|
+
enc||=read_encoding_line
|
333
|
+
ensure
|
334
|
+
@moretokens<<EncodingDeclToken.new(enc||'',@encoding,enc ? encpos : input_position) if @encoding
|
335
|
+
@encoding||=:ascii
|
336
|
+
end
|
337
|
+
|
338
|
+
def read_encoding_line
|
239
339
|
end
|
240
340
|
|
241
341
|
def progress_printer
|
@@ -309,6 +409,11 @@ class RubyLexer
|
|
309
409
|
end
|
310
410
|
end
|
311
411
|
|
412
|
+
#-----------------------------------
|
413
|
+
def unshift(*tokens)
|
414
|
+
@moretokens.unshift(*tokens)
|
415
|
+
end
|
416
|
+
|
312
417
|
#-----------------------------------
|
313
418
|
def eof?
|
314
419
|
rulexer_eof? or EoiToken===@last_operative_token
|
@@ -316,7 +421,7 @@ class RubyLexer
|
|
316
421
|
|
317
422
|
#-----------------------------------
|
318
423
|
def input_position
|
319
|
-
rulexer_input_position+@offset_adjust
|
424
|
+
rulexer_input_position+@offset_adjust+@offset_adjust2
|
320
425
|
end
|
321
426
|
|
322
427
|
#-----------------------------------
|
@@ -392,7 +497,7 @@ private
|
|
392
497
|
end
|
393
498
|
|
394
499
|
#-----------------------------------
|
395
|
-
WSCHARSET=/[#\\\n\
|
500
|
+
WSCHARSET=/[#\\\n#@@WSCHARS\x00\x04\x1a]/o
|
396
501
|
def ignored_tokens(allow_eof=false,allow_eol=true)
|
397
502
|
result=[]
|
398
503
|
result << @moretokens.shift while StillIgnoreToken===@moretokens.first
|
@@ -428,7 +533,7 @@ private
|
|
428
533
|
"#" => :comment,
|
429
534
|
"\n" => :newline,
|
430
535
|
"\\" => :escnewline,
|
431
|
-
"\
|
536
|
+
"#@@WSCHARS\t\r\f" => :whitespace
|
432
537
|
)
|
433
538
|
#tok=nil
|
434
539
|
while tok=@whsphandler.go((nextchar or return result))
|
@@ -476,10 +581,10 @@ private
|
|
476
581
|
assert( /^#@@LETTER$/o===context)
|
477
582
|
assert MethNameToken===@last_operative_token || !(@last_operative_token===/^(\.|::|(un)?def|alias)$/)
|
478
583
|
|
479
|
-
if @parsestack.last.wantarrow and @rubyversion>=1.9 and @file.skip ":"
|
480
|
-
@moretokens.
|
481
|
-
else
|
482
|
-
@moretokens.unshift(*
|
584
|
+
# if @parsestack.last.wantarrow and @rubyversion>=1.9 and @file.skip ":"
|
585
|
+
# @moretokens.unshift SymbolToken.new(str,oldpos), KeywordToken.new(":",input_position-1,:as=>"=>")
|
586
|
+
# else
|
587
|
+
@moretokens.unshift(*special_identifier?(str,oldpos) do |tok,except|
|
483
588
|
#most callers of this block pass nothing(==nil) for except. only _keyword_funclike passes a true val
|
484
589
|
|
485
590
|
was_last=@last_operative_token
|
@@ -488,7 +593,7 @@ private
|
|
488
593
|
(Array===normally ? normally[0]=except : normally=except) if except
|
489
594
|
normally
|
490
595
|
end)
|
491
|
-
end
|
596
|
+
# end
|
492
597
|
return @moretokens.shift
|
493
598
|
end
|
494
599
|
|
@@ -512,9 +617,9 @@ private
|
|
512
617
|
case context
|
513
618
|
when ?@,?$ then ""
|
514
619
|
# when ?: then "!(?![=])|\\?|=(?![=~>])"
|
515
|
-
else "!(
|
620
|
+
else "!(?=\\z|[^=]|=[=~>])|\\?"
|
516
621
|
end
|
517
|
-
@in_def_name||context==?: and trailers<<"|=(?![
|
622
|
+
@in_def_name||context==?: and trailers<<"|=(?![~>]|=[^~=>])"
|
518
623
|
|
519
624
|
@file.scan(IDENTREX[trailers]||=/^(?>#@@LETTER#@@LETTER_DIGIT*(?:#{trailers})?)/)
|
520
625
|
end
|
@@ -553,12 +658,14 @@ private
|
|
553
658
|
|
554
659
|
#-----------------------------------
|
555
660
|
def in_lvar_define_state lasttok=@last_operative_token
|
556
|
-
#@defining_lvar is a hack
|
557
|
-
|
661
|
+
return true if @defining_lvar #@defining_lvar is a hack
|
662
|
+
ctx=@parsestack.last
|
663
|
+
case ctx
|
558
664
|
#when ForSMContext; ctx.state==:for
|
559
|
-
when UnparenedParamListLhsContext
|
665
|
+
when UnparenedParamListLhsContext
|
666
|
+
/^(->|,|;)$/===lasttok.ident or /^[*&]$/===lasttok.ident && lasttok.unary
|
560
667
|
when RescueSMContext
|
561
|
-
lasttok.ident=="=>" and @file.match?( /\A[
|
668
|
+
lasttok.ident=="=>" and @file.match?( /\A[#@@WSCHARS]*([:;#\n]|then(?!#@@LETTER_DIGIT))/om )
|
562
669
|
#when BlockParamListLhsContext; true
|
563
670
|
end
|
564
671
|
end
|
@@ -587,7 +694,7 @@ private
|
|
587
694
|
was_in_lvar_define_state=in_lvar_define_state(lasttok)
|
588
695
|
#maybe_local really means 'maybe local or constant'
|
589
696
|
maybe_local=case name
|
590
|
-
when /
|
697
|
+
when /[?!=]$/o #do nothing
|
591
698
|
when /^#@@LCLETTER/o
|
592
699
|
(localvars===name or
|
593
700
|
#VARLIKE_KEYWORDS===name or
|
@@ -605,6 +712,9 @@ private
|
|
605
712
|
tok=set_last_token assign_lvar_type!(VarNameToken.new(name,pos))
|
606
713
|
|
607
714
|
oldpos= input_position
|
715
|
+
oldline= linenum
|
716
|
+
|
717
|
+
#deal with ws following the ident
|
608
718
|
sawnl=false
|
609
719
|
result=ws_toks=ignored_tokens(true) {|nl| sawnl=true }
|
610
720
|
if sawnl || eof?
|
@@ -617,16 +727,32 @@ private
|
|
617
727
|
elsif maybe_local
|
618
728
|
return result.unshift(tok) #if is_const
|
619
729
|
else
|
620
|
-
|
730
|
+
toks=[
|
621
731
|
MethNameToken.new(name,pos), #insert implicit parens right after tok
|
622
732
|
ImplicitParamListStartToken.new( oldpos),
|
623
733
|
ImplicitParamListEndToken.new( oldpos)
|
624
|
-
|
734
|
+
]
|
735
|
+
toks.each{|t| t.endline=oldline}
|
736
|
+
return result.unshift(*toks)
|
625
737
|
end
|
626
738
|
end
|
627
739
|
|
628
740
|
#if next op is assignment (or comma in lvalue list)
|
629
741
|
#then omit implicit parens
|
742
|
+
assignment_coming=
|
743
|
+
/\A(?:
|
744
|
+
=[^>=~] | (,) | (;) | (\)) |
|
745
|
+
(in(?!#@@LETTER_DIGIT)) | (\|[^\|=]) | [%\/\-+^*&|]= | ([<>*&|])\6=
|
746
|
+
)/mox===readahead(3) &&
|
747
|
+
case
|
748
|
+
when $1; comma_in_lvalue_list? #comma
|
749
|
+
when $2; semicolon_in_block_param_list?
|
750
|
+
when $3; last_context_not_implicit.lhs #right paren in lhs
|
751
|
+
when $4; ForSMContext===last_context_not_implicit #in
|
752
|
+
when $5; BlockParamListLhsContext===last_context_not_implicit #ending goalpost
|
753
|
+
else true
|
754
|
+
end
|
755
|
+
=begin was
|
630
756
|
assignment_coming=case nc=nextchar
|
631
757
|
when ?=; not( /^=[>=~]$/===readahead(2) )
|
632
758
|
when ?,; comma_in_lvalue_list?
|
@@ -642,15 +768,20 @@ private
|
|
642
768
|
readahead(2)[1] != ?|
|
643
769
|
when ?%,?/,?-,?+,?^; readahead(2)[1]== ?=
|
644
770
|
end
|
771
|
+
=end
|
772
|
+
|
645
773
|
if (assignment_coming && !(lasttok===/^(\.|::)$/) or was_in_lvar_define_state)
|
646
774
|
tok=assign_lvar_type! VarNameToken.new(name,pos)
|
647
|
-
if /(?!#@@LETTER_DIGIT).$/o===name
|
648
|
-
|
775
|
+
#if /(?!#@@LETTER_DIGIT).$/o===name
|
776
|
+
#nonalphabetics... operator? skip it
|
777
|
+
#els
|
778
|
+
if /^#@@LCLETTER/o===name #and !(lasttok===/^(\.|::)$/)
|
649
779
|
localvars[name]=true
|
650
780
|
end
|
651
781
|
return result.unshift(tok)
|
652
782
|
end
|
653
783
|
|
784
|
+
nc=nextchar
|
654
785
|
implicit_parens_to_emit=
|
655
786
|
if assignment_coming
|
656
787
|
@parsestack.push AssignmentContext.new(nil) if nc==?% or nc==?/
|
@@ -677,12 +808,6 @@ private
|
|
677
808
|
when ?{
|
678
809
|
maybe_local=false
|
679
810
|
1
|
680
|
-
=begin
|
681
|
-
x=2
|
682
|
-
x-=1 if /\A(return|break|next)\Z/===name and
|
683
|
-
!(KeywordToken===oldlast and oldlast===/\A(\.|::)\Z/)
|
684
|
-
x
|
685
|
-
=end
|
686
811
|
when ?(
|
687
812
|
maybe_local=false
|
688
813
|
lastid=lasttok&&lasttok.ident
|
@@ -692,25 +817,16 @@ private
|
|
692
817
|
when '{'; was_after_nonid_op=false if BlockContext===@parsestack.last or BeginEndContext===@parsestack.last
|
693
818
|
end if KeywordToken===lasttok
|
694
819
|
was_after_nonid_op=false if NewlineToken===lasttok or lasttok.nil?
|
695
|
-
want_parens=!(ws_toks.empty? or was_after_nonid_op)
|
696
|
-
# /^(::|rescue|yield|else|case|when|if|unless|until|while|and|or|&&|\|\||[?:]|\.\.?\.?|=>)$/===lastid or
|
697
|
-
# MethNameToken===lasttok or
|
698
|
-
# RUBYNONSYMOPERATORREX===lastid && /=$/===lastid && '!='!=lastid
|
699
|
-
# )
|
820
|
+
want_parens=!(ws_toks.empty? or was_after_nonid_op)
|
700
821
|
|
701
822
|
#look ahead for closing paren (after some whitespace...)
|
702
823
|
want_parens=false if @file.match?( /\A.(?:\s|\v|\#.*\n)*\)/ )
|
703
|
-
# afterparen=@file.pos
|
704
|
-
# getchar
|
705
|
-
# ignored_tokens(true)
|
706
|
-
# want_parens=false if nextchar==?)
|
707
|
-
# @file.pos=afterparen
|
708
824
|
want_parens=true if /^(return|break|next)$/===@last_operative_token.ident and not(
|
709
825
|
KeywordToken===lasttok and /^(\.|::)$/===lasttok.ident
|
710
826
|
)
|
711
827
|
want_parens ? 1 : 0
|
712
|
-
when ?},?],?),?;,(?^ unless @enable_macro), ?|, ?>, ?,, ?., ?=; 2
|
713
|
-
when
|
828
|
+
when @always_binary_chars; 2 # ?},?],?),?;,(?^ unless @enable_macro), ?|, ?>, ?,, ?., ?=; 2
|
829
|
+
when @unary_or_binary_chars; #?+, ?-, ?%, ?/, (?^ if @enable_macro)
|
714
830
|
if /^(return|break|next)$/===@last_operative_token.ident and not(
|
715
831
|
KeywordToken===lasttok and /^(\.|::)$/===lasttok.ident
|
716
832
|
)
|
@@ -719,13 +835,12 @@ private
|
|
719
835
|
(ws_toks.empty? || readahead(2)[/^.[#{WHSPLF}]/o]) ? 2 : 3
|
720
836
|
end
|
721
837
|
when ?*, ?&
|
722
|
-
# lasttok=@last_operative_token
|
723
838
|
if /^(return|break|next)$/===@last_operative_token.ident and not(
|
724
839
|
KeywordToken===lasttok and /^(\.|::)$/===lasttok.ident
|
725
840
|
)
|
726
841
|
1
|
727
842
|
else
|
728
|
-
(ws_toks.empty? || readahead(2)[/^.[#{WHSPLF}*&]/o]) ? 2 : 3
|
843
|
+
(ws_toks.empty? || readahead(2)[/^.[#{WHSPLF}*&]/o] and !@in_def_name) ? 2 : 3
|
729
844
|
end
|
730
845
|
when ?:
|
731
846
|
next2=readahead(2)
|
@@ -738,7 +853,6 @@ private
|
|
738
853
|
#? never begins a char constant if immediately followed
|
739
854
|
#by 2 or more letters or digits
|
740
855
|
/^\?([#{WHSPLF}]|#@@LETTER_DIGIT{2})/o===next3 ? 2 : 3
|
741
|
-
# when ?:,??; (readahead(2)[/^.[#{WHSPLF}]/o]) ? 2 : 3
|
742
856
|
when ?<; (!ws_toks.empty? && readahead(4)[/^<<-?(?:["'`]|#@@LETTER_DIGIT)/o]) ? 3 : 2
|
743
857
|
when ?[;
|
744
858
|
if ws_toks.empty?
|
@@ -797,6 +911,8 @@ private
|
|
797
911
|
# 'need to pop noparen from parsestack on these tokens: (in operator context)'
|
798
912
|
# 'not ok:'
|
799
913
|
# 'not (but should it be?)'
|
914
|
+
ensure
|
915
|
+
result.first.endline||=oldline unless result.empty?
|
800
916
|
end
|
801
917
|
|
802
918
|
#-----------------------------------
|
@@ -818,6 +934,7 @@ private
|
|
818
934
|
break true
|
819
935
|
elsif EoiToken===tok
|
820
936
|
lexerror tok, "unexpected eof in parameter list"
|
937
|
+
break
|
821
938
|
end
|
822
939
|
}
|
823
940
|
result.concat @moretokens
|
@@ -825,22 +942,31 @@ private
|
|
825
942
|
return [result,pass]
|
826
943
|
end
|
827
944
|
|
945
|
+
|
828
946
|
#-----------------------------------
|
947
|
+
module NestedContexts
|
948
|
+
class VContext<NestedContext
|
949
|
+
end
|
950
|
+
end
|
951
|
+
VContext=NestedContexts::VContext
|
829
952
|
CONTEXT2ENDTOK={
|
830
953
|
AssignmentRhsContext=>AssignmentRhsListEndToken,
|
831
954
|
ParamListContextNoParen=>ImplicitParamListEndToken,
|
832
955
|
KWParamListContextNoParen=>ImplicitParamListEndToken, #break,next,return
|
833
956
|
WhenParamListContext=>KwParamListEndToken,
|
834
|
-
RescueSMContext=>KwParamListEndToken
|
957
|
+
RescueSMContext=>KwParamListEndToken,
|
958
|
+
VContext=>0
|
835
959
|
}
|
836
|
-
def abort_noparens!(str='')
|
960
|
+
def abort_noparens!(str='',adj=str.size)
|
837
961
|
#assert @moretokens.empty?
|
838
962
|
result=[]
|
839
|
-
|
840
|
-
|
841
|
-
|
842
|
-
break if
|
963
|
+
ctx=@parsestack.last
|
964
|
+
while klass=CONTEXT2ENDTOK[ctx.class]
|
965
|
+
result << klass.new(input_position-adj) if Class===klass
|
966
|
+
break if RescueSMContext===ctx #and str==':'
|
967
|
+
break if WhenParamListContext===ctx and str==':'
|
843
968
|
@parsestack.pop
|
969
|
+
ctx=@parsestack.last
|
844
970
|
end
|
845
971
|
return result
|
846
972
|
end
|
@@ -878,7 +1004,7 @@ private
|
|
878
1004
|
CONTEXT2ENDTOK_FOR_DO={
|
879
1005
|
AssignmentRhsContext=>AssignmentRhsListEndToken,
|
880
1006
|
ParamListContextNoParen=>ImplicitParamListEndToken,
|
881
|
-
UnparenedParamListLhsContext=>
|
1007
|
+
UnparenedParamListLhsContext=>ImplicitParamListEndToken,
|
882
1008
|
ExpectDoOrNlContext=>1,
|
883
1009
|
#WhenParamListContext=>KwParamListEndToken,
|
884
1010
|
#RescueSMContext=>KwParamListEndToken
|
@@ -886,6 +1012,10 @@ private
|
|
886
1012
|
def abort_noparens_for_do!(str='')
|
887
1013
|
#assert @moretokens.empty?
|
888
1014
|
result=[]
|
1015
|
+
return result if @parsestack[-1].class==AssignmentRhsContext and
|
1016
|
+
@parsestack[-2].class==ParamListContextNoParen and
|
1017
|
+
@parsestack[-3].class==DefContext and
|
1018
|
+
!@parsestack[-3].in_body
|
889
1019
|
while klass=CONTEXT2ENDTOK_FOR_DO[@parsestack.last.class]
|
890
1020
|
if klass==AssignmentRhsListEndToken
|
891
1021
|
i=@parsestack.size
|
@@ -930,38 +1060,50 @@ private
|
|
930
1060
|
end
|
931
1061
|
|
932
1062
|
#-----------------------------------
|
933
|
-
def enable_macros!
|
934
|
-
@enable_macro="macro"
|
1063
|
+
def enable_macros! #this wholemethod should be unnecessary now
|
1064
|
+
@enable_macro="macro" #shouldn't be needed anymore... should be safe to remove
|
935
1065
|
class <<self
|
936
1066
|
alias keyword_macro keyword_def
|
937
1067
|
end
|
1068
|
+
@unary_or_binary_chars.add '^'
|
1069
|
+
@always_binary_chars.remove '^'
|
938
1070
|
end
|
939
1071
|
public :enable_macros!
|
940
1072
|
|
941
1073
|
|
942
1074
|
#-----------------------------------
|
943
|
-
@@SPACES=/[\ \t\
|
1075
|
+
@@SPACES=/[\ \t\f\v]/
|
944
1076
|
@@WSTOK=/(?>
|
945
1077
|
(?>\r?)\n|
|
946
1078
|
(?>\r*)(?>#@@SPACES+)(?>(?:#@@SPACES|\r(?!\n))*)|
|
947
|
-
\#(?>[^\n]*)
|
1079
|
+
\#(?>[^\n]*)(?=\n)|
|
948
1080
|
\\(?>\r?)\n|
|
949
1081
|
^=begin(?>(?>#@@SPACES.*)?)\n
|
950
1082
|
(?>(?:(?!=end)(?>.*)\n))*
|
951
1083
|
=end(?>(?>#@@SPACES.*)?)\n
|
952
1084
|
)/x
|
953
1085
|
@@WSTOKS=/(?!=begin)(?>#@@WSTOK+)/o
|
1086
|
+
WSTOKS=@@WSTOKS
|
954
1087
|
def divide_ws(ws0,offset)
|
955
1088
|
result=[]
|
956
1089
|
ws0.scan(/\G#@@WSTOK/o){|ws|
|
957
1090
|
incr= $~.begin(0)
|
958
|
-
|
959
|
-
|
960
|
-
when /\
|
961
|
-
|
1091
|
+
lines=ws.count "\n"
|
1092
|
+
case ws
|
1093
|
+
when /\A\#/
|
1094
|
+
result<< IgnoreToken.new(ws,offset+incr)
|
1095
|
+
when /\A=/
|
1096
|
+
tok=IgnoreToken.new(ws,offset+incr)
|
1097
|
+
tok.startline=@linenum
|
1098
|
+
tok.endline=@linenum+lines
|
1099
|
+
result<<tok
|
1100
|
+
when /\n\Z/
|
1101
|
+
result<< EscNlToken.new(ws,offset+incr,@filename,@linenum+1)
|
1102
|
+
else
|
1103
|
+
result<< WsToken.new(ws,offset+incr)
|
962
1104
|
end
|
963
|
-
result
|
964
|
-
@linenum+=
|
1105
|
+
result<< FileAndLineToken.new(@filename,@linenum+lines,offset+incr+ws.size) if lines>0
|
1106
|
+
@linenum+=lines
|
965
1107
|
}
|
966
1108
|
result.each_with_index{|ws,i|
|
967
1109
|
if WsToken===ws
|
@@ -992,13 +1134,13 @@ private
|
|
992
1134
|
#parse keywords now, to prevent confusion over bare symbols
|
993
1135
|
#and match end with corresponding preceding def or class or whatever.
|
994
1136
|
#if arg is not a keyword, the block is called
|
995
|
-
def
|
1137
|
+
def special_identifier?(str,offset,&block)
|
996
1138
|
assert @moretokens.empty?
|
997
1139
|
assert !(KeywordToken===@last_operative_token and /A(\.|::|def)\Z/===@last_operative_token.ident)
|
998
|
-
result=[KeywordToken.new(str,offset)]
|
999
1140
|
|
1000
|
-
m
|
1001
|
-
|
1141
|
+
m="keyword_#{str}"
|
1142
|
+
return yield( MethNameToken.new(str) )unless respond_to?(m)
|
1143
|
+
send m,str,offset,[KeywordToken.new(str,offset)],&block
|
1002
1144
|
end
|
1003
1145
|
public #these have to be public so respond_to? can see them (sigh)
|
1004
1146
|
def keyword_end(str,offset,result)
|
@@ -1045,6 +1187,8 @@ private
|
|
1045
1187
|
#VarNameToken===@moretokens.last or
|
1046
1188
|
#KeywordToken===@moretokens.last && @moretokens.last.ident=="::"
|
1047
1189
|
@file.scan(/\A(#@@WSTOKS)?(::)?(#@@WSTOKS)?(#@@UCLETTER#@@LETTER_DIGIT*)/o) or break
|
1190
|
+
#should not allow newline around :: here
|
1191
|
+
|
1048
1192
|
md=@file.last_match
|
1049
1193
|
all,ws1,dc,ws2,name=*md
|
1050
1194
|
if ws1
|
@@ -1138,12 +1282,12 @@ private
|
|
1138
1282
|
return result
|
1139
1283
|
end
|
1140
1284
|
def keyword_def(str,offset,result) #macros too, if enabled
|
1141
|
-
|
1142
|
-
|
1143
|
-
|
1285
|
+
result.first.has_end!
|
1286
|
+
@parsestack.push ctx=DefContext.new(@linenum)
|
1287
|
+
ctx.state=:saw_def
|
1144
1288
|
old_moretokens=@moretokens
|
1145
1289
|
@moretokens=[]
|
1146
|
-
aa=@moretokens
|
1290
|
+
#aa=@moretokens
|
1147
1291
|
#safe_recurse { |aa|
|
1148
1292
|
set_last_token KeywordToken.new(str) #hack
|
1149
1293
|
result.concat ignored_tokens
|
@@ -1156,28 +1300,48 @@ private
|
|
1156
1300
|
begin
|
1157
1301
|
tok=get1token
|
1158
1302
|
case tok
|
1159
|
-
when/^\($/.token_pat
|
1160
|
-
when/^\)$/.token_pat
|
1303
|
+
when /^\($/.token_pat ; parencount+=1
|
1304
|
+
when /^\)$/.token_pat ; parencount-=1
|
1305
|
+
when EoiToken
|
1306
|
+
@moretokens= old_moretokens.concat @moretokens
|
1307
|
+
return result<<lexerror( tok, "eof in def header" )
|
1161
1308
|
end
|
1162
|
-
EoiToken===tok and lexerror tok, "eof in def header"
|
1163
1309
|
result << tok
|
1164
1310
|
end until parencount==0 #@parsestack.size==old_size
|
1165
1311
|
@localvars_stack.push SymbolTable.new
|
1166
1312
|
else #no parentheses, all tail
|
1167
1313
|
set_last_token KeywordToken.new(".") #hack hack
|
1168
1314
|
tokindex=result.size
|
1315
|
+
tokline=result.last.endline
|
1169
1316
|
result << tok=symbol(false,false)
|
1170
1317
|
name=tok.to_s
|
1171
1318
|
assert !in_lvar_define_state
|
1172
1319
|
|
1173
1320
|
#maybe_local really means 'maybe local or constant'
|
1321
|
+
@maybe_local_pat||=%r{
|
1322
|
+
((?!#@@LETTER_DIGIT).$) | ^[@$] | (#@VARLIKE_KEYWORDS | #@FUNCLIKE_KEYWORDS) |
|
1323
|
+
(^#@@LCLETTER) | (^#@@UCLETTER)
|
1324
|
+
}x
|
1325
|
+
@maybe_local_pat === name and
|
1326
|
+
maybe_local=
|
1327
|
+
case
|
1328
|
+
when $1; maybe_local=false #operator or non-ident
|
1329
|
+
when $2; ty=KeywordToken #keyword
|
1330
|
+
when $3; maybe_local=localvars===name #lvar or method
|
1331
|
+
when $4; is_const=true #constant
|
1332
|
+
else true
|
1333
|
+
end
|
1334
|
+
#maybe_local=ty=KeywordToken if is__ENCODING__keyword?(name) #"__ENCODING__"==name and @rubyversion>=1.9
|
1335
|
+
=begin was
|
1174
1336
|
maybe_local=case name
|
1175
1337
|
when /(?!#@@LETTER_DIGIT).$/o; #do nothing
|
1176
1338
|
when /^[@$]/; true
|
1177
|
-
when VARLIKE_KEYWORDS
|
1339
|
+
when /#@VARLIKE_KEYWORDS|#@FUNCLIKE_KEYWORDS/,("__ENCODING__" if @rubyversion>=1.9); ty=KeywordToken
|
1178
1340
|
when /^#@@LCLETTER/o; localvars===name
|
1179
1341
|
when /^#@@UCLETTER/o; is_const=true #this is the right algorithm for constants...
|
1180
1342
|
end
|
1343
|
+
=end
|
1344
|
+
|
1181
1345
|
result.push( *ignored_tokens(false,false) )
|
1182
1346
|
nc=nextchar
|
1183
1347
|
if !ty and maybe_local
|
@@ -1195,7 +1359,13 @@ private
|
|
1195
1359
|
end
|
1196
1360
|
|
1197
1361
|
assert result[tokindex].equal?(tok)
|
1198
|
-
var=
|
1362
|
+
var=ty.new(tok.to_s,tok.offset)
|
1363
|
+
if ty==KeywordToken and name[0,2]=="__"
|
1364
|
+
send("keyword_#{name}",name,tok.offset,[var])
|
1365
|
+
end
|
1366
|
+
var.endline=tokline
|
1367
|
+
|
1368
|
+
var=assign_lvar_type! var
|
1199
1369
|
@localvars_stack.push SymbolTable.new
|
1200
1370
|
var.in_def=true if inside_method_def? and var.respond_to? :in_def=
|
1201
1371
|
result[tokindex]=var
|
@@ -1230,7 +1400,9 @@ private
|
|
1230
1400
|
else
|
1231
1401
|
ofs+=listend.to_s.size
|
1232
1402
|
end
|
1233
|
-
|
1403
|
+
tok=EndHeaderToken.new(ofs)
|
1404
|
+
tok.endline= result[end_index-1].endline #@linenum
|
1405
|
+
result.insert end_index+1,tok
|
1234
1406
|
break
|
1235
1407
|
end
|
1236
1408
|
|
@@ -1239,6 +1411,9 @@ private
|
|
1239
1411
|
case tok
|
1240
1412
|
when EoiToken
|
1241
1413
|
lexerror tok,'unexpected eof in def header'
|
1414
|
+
@moretokens= old_moretokens.concat @moretokens
|
1415
|
+
return result
|
1416
|
+
|
1242
1417
|
when StillIgnoreToken
|
1243
1418
|
when MethNameToken ,VarNameToken # /^#@@LETTER/o.token_pat
|
1244
1419
|
lexerror tok,'expected . or ::' unless state==:expect_name
|
@@ -1256,7 +1431,10 @@ private
|
|
1256
1431
|
if endofs
|
1257
1432
|
result.insert( -2,ImplicitParamListEndToken.new(tok.offset) )
|
1258
1433
|
end
|
1259
|
-
|
1434
|
+
ehtok= EndHeaderToken.new(tok.offset)
|
1435
|
+
#ehtok.endline=tok.endline
|
1436
|
+
#ehtok.endline-=1 if NewlineToken===tok
|
1437
|
+
result.insert( -2, ehtok )
|
1260
1438
|
break
|
1261
1439
|
else
|
1262
1440
|
lexerror(tok, "bizarre token in def name: " +
|
@@ -1425,14 +1603,296 @@ private
|
|
1425
1603
|
return result
|
1426
1604
|
end
|
1427
1605
|
|
1606
|
+
|
1607
|
+
#-----------------------------------
|
1608
|
+
def encoding_name_normalize name
|
1609
|
+
name=name.dup
|
1610
|
+
name.downcase!
|
1611
|
+
name.tr_s! '-_',''
|
1612
|
+
name=ENCODING_ALIASES[name] if ENCODING_ALIASES[name]
|
1613
|
+
return name
|
1614
|
+
end
|
1615
|
+
|
1428
1616
|
module RubyLexer1_9
|
1617
|
+
FUNCLIKE_KEYWORDLIST=RubyLexer::FUNCLIKE_KEYWORDLIST+FUNCLIKE_KEYWORDLIST_1_9
|
1618
|
+
VARLIKE_KEYWORDLIST=RubyLexer::VARLIKE_KEYWORDLIST+VARLIKE_KEYWORDLIST_1_9
|
1619
|
+
FUNCLIKE_KEYWORDS=/^(?:#{FUNCLIKE_KEYWORDLIST.join '|'})$/
|
1620
|
+
VARLIKE_KEYWORDS=/^(?:#{VARLIKE_KEYWORDLIST.join '|'})$/
|
1621
|
+
def FUNCLIKE_KEYWORDS orig=nil
|
1622
|
+
/(?:#{orig||super()}|^(?:#{FUNCLIKE_KEYWORDLIST_1_9.join '|'})$)/
|
1623
|
+
end
|
1624
|
+
|
1625
|
+
def VARLIKE_KEYWORDS orig=nil
|
1626
|
+
/(?:#{orig||super()}|^(?:#{VARLIKE_KEYWORDLIST_1_9.join '|'})$)/
|
1627
|
+
end
|
1628
|
+
|
1629
|
+
def rubylexer_modules_init
|
1630
|
+
super
|
1631
|
+
@FUNCLIKE_KEYWORDS=FUNCLIKE_KEYWORDS @FUNCLIKE_KEYWORDS unless @FUNCLIKE_KEYWORDS==="->"
|
1632
|
+
@VARLIKE_KEYWORDS=VARLIKE_KEYWORDS @VARLIKE_KEYWORDS unless @VARLIKE_KEYWORDS==="__ENCODING__"
|
1633
|
+
end
|
1634
|
+
|
1635
|
+
#-----------------------------------
|
1636
|
+
def dquote_handle(ch)
|
1637
|
+
dquote19_esc_seq(ch,'"','"')
|
1638
|
+
end
|
1639
|
+
#-----------------------------------
|
1640
|
+
def dquote_handler_name
|
1641
|
+
:dquote19_esc_seq
|
1642
|
+
end
|
1643
|
+
#-----------------------------------
|
1644
|
+
def Wquote_handler_name
|
1645
|
+
:Wquote19_esc_seq
|
1646
|
+
end
|
1647
|
+
|
1648
|
+
#-----------------------------------
|
1649
|
+
def method_params? # .()
|
1650
|
+
lasttok=last_token_maybe_implicit #last_operative_token
|
1651
|
+
super or
|
1652
|
+
(lasttok and lasttok.ident=='.')
|
1653
|
+
end
|
1654
|
+
|
1655
|
+
#-----------------------------------
|
1656
|
+
def callsite_symbol(x)
|
1657
|
+
return if nextchar==?(
|
1658
|
+
super
|
1659
|
+
end
|
1660
|
+
|
1661
|
+
#-----------------------------------
|
1662
|
+
def read_encoding_line
|
1663
|
+
if line=@file.scan(
|
1664
|
+
/\A#{WSNONLCHARS}*#[\x00-\x7F]*?(?:en)?coding#{WSNONLCHARS}*[:=]#{WSNONLCHARS}*([a-z0-9_-]+)[\x00-\x7F]*$/io
|
1665
|
+
)
|
1666
|
+
name=@file.last_match[1]
|
1667
|
+
name=encoding_name_normalize name
|
1668
|
+
@encoding=name.to_sym if ENCODINGS.include? name
|
1669
|
+
return line
|
1670
|
+
end
|
1671
|
+
end
|
1672
|
+
|
1673
|
+
#-----------------------------------
|
1429
1674
|
def keyword___ENCODING__(str,offset,result)
|
1430
1675
|
#result.last.value=huh
|
1431
1676
|
return result
|
1432
1677
|
end
|
1433
1678
|
|
1679
|
+
#-----------------------------------
|
1434
1680
|
def keyword_not(*args,&block) _keyword_funclike(*args,&block) end
|
1435
|
-
|
1681
|
+
|
1682
|
+
#-----------------------------------
|
1683
|
+
def special_identifier?(str,oldpos)
|
1684
|
+
if @parsestack.last.wantarrow and @file.skip ":"
|
1685
|
+
return SymbolToken.new(str,oldpos), KeywordToken.new(":",input_position-1,:as=>"=>")
|
1686
|
+
else
|
1687
|
+
return super
|
1688
|
+
end
|
1689
|
+
end
|
1690
|
+
|
1691
|
+
#-----------------------------------
|
1692
|
+
def want_hard_nl?
|
1693
|
+
return false if @file.check( /\A\n(?:#{WSTOKS})?[.:][^.:]/o )
|
1694
|
+
super
|
1695
|
+
end
|
1696
|
+
|
1697
|
+
#-----------------------------------
|
1698
|
+
#RE_* shamelessly stolen from jcode.rb
|
1699
|
+
RE_UTF8= /[\xc0-\xdf][\x80-\xbf]|[\xe0-\xef][\x80-\xbf][\x80-\xbf]|[\xf0-\xf7][\x80-\xbf]{3}/n #longer sequences are possible
|
1700
|
+
RE_EUC= /[\xa1-\xfe][\xa1-\xfe]/n #is this complete?
|
1701
|
+
RE_SJIS= /[\x81-\x9f\xe0-\xef][\x40-\x7e\x80-\xfc]/n #is this complete? windows31j?
|
1702
|
+
ENCODING2EXTCHAR={
|
1703
|
+
:utf8=>RE_UTF8,
|
1704
|
+
:euc=>RE_EUC,
|
1705
|
+
:sjis=>RE_SJIS,
|
1706
|
+
:binary=>/[\x80-\xFF]/n,
|
1707
|
+
:ascii=>nil
|
1708
|
+
}
|
1709
|
+
|
1710
|
+
#handle ? in ruby code. is it part of ?..: or a character literal?
|
1711
|
+
def char_literal_or_op(ch) #unicode char literals, etc
|
1712
|
+
if colon_quote_expected? ch
|
1713
|
+
#char literal
|
1714
|
+
pos=input_position
|
1715
|
+
getchar
|
1716
|
+
extchar= ENCODING2EXTCHAR[@encoding]
|
1717
|
+
result=
|
1718
|
+
if extchar and extchar=@file.scan( extchar )
|
1719
|
+
assign_encoding!(StringToken.new('"', extchar))
|
1720
|
+
else
|
1721
|
+
getchar_maybe_escape
|
1722
|
+
assign_encoding!(StringToken.new('"', @file[pos+1...input_position]))
|
1723
|
+
end
|
1724
|
+
result.offset=pos
|
1725
|
+
result.bs_handler=:dquote19_esc_seq
|
1726
|
+
result.open='?'
|
1727
|
+
result.close=''
|
1728
|
+
return result
|
1729
|
+
else #(ternary) operator
|
1730
|
+
super
|
1731
|
+
end
|
1732
|
+
end
|
1733
|
+
|
1734
|
+
#-----------------------------------
|
1735
|
+
def plusminus(ch) #->
|
1736
|
+
pos=input_position
|
1737
|
+
assert(/^[+\-]$/===ch)
|
1738
|
+
if unary_op_expected?(ch) or
|
1739
|
+
KeywordToken===@last_operative_token &&
|
1740
|
+
/^(return|break|next)$/===@last_operative_token.ident
|
1741
|
+
if '->' == readahead(2) #stabby proc
|
1742
|
+
@file.pos+=2
|
1743
|
+
#push down block context
|
1744
|
+
localvars.start_block
|
1745
|
+
@parsestack.push ctx=RubyLexer::BlockContext.new(@linenum)
|
1746
|
+
ctx.wanting_stabby_block_body=true
|
1747
|
+
#read optional proc params
|
1748
|
+
block_param_list_lookahead ?(, RubyLexer::ParenedParamListLhsContext
|
1749
|
+
result=RubyLexer::KeywordToken.new('->',pos)
|
1750
|
+
result.offset=pos
|
1751
|
+
return result
|
1752
|
+
end
|
1753
|
+
end
|
1754
|
+
super
|
1755
|
+
end
|
1756
|
+
|
1757
|
+
#-----------------------------------
|
1758
|
+
#match /=(>|~|==?)?/ (= or == or =~ or === or =>)
|
1759
|
+
def equals(ch) # /(?<foo>bar)/=~'bar'; declares foo lvar
|
1760
|
+
if readahead(2)=='=~' # =~... after regex, maybe?
|
1761
|
+
last=last_operative_token
|
1762
|
+
|
1763
|
+
if StringToken===last and last.lvars
|
1764
|
+
#ruby delays adding lvars from regexps to known lvars table
|
1765
|
+
#for several tokens in some cases. not sure why or if on purpose
|
1766
|
+
#i'm just going to add them right away
|
1767
|
+
last.lvars.each{|lvar| localvars[lvar]=true }
|
1768
|
+
end
|
1769
|
+
end
|
1770
|
+
return super
|
1771
|
+
end
|
1772
|
+
|
1773
|
+
#-----------------------------------
|
1774
|
+
def assign_encoding! str
|
1775
|
+
#search for nonascii bytes
|
1776
|
+
#either directly or via hex (\xXX) or octal (\NNN) escapes
|
1777
|
+
#and \u escapes also
|
1778
|
+
utf8=nonascii=false
|
1779
|
+
str.elems.grep(String).each do|frag|
|
1780
|
+
frag.scan(/#{EVEN_BS_S}(?:\\u|\\2[0-7][0-7]|\\x[89a-fA-F][0-9a-fA-F])|[^\x00-\x7F]/o) do |match|
|
1781
|
+
if match[-1]==?u
|
1782
|
+
utf8=true
|
1783
|
+
break if nonascii
|
1784
|
+
else
|
1785
|
+
nonascii=true
|
1786
|
+
break if utf8
|
1787
|
+
end
|
1788
|
+
end or break
|
1789
|
+
end
|
1790
|
+
|
1791
|
+
lexerror(str,"utf8 and nonascii intermixed") if utf8 and nonascii and @encoding!=:utf8
|
1792
|
+
|
1793
|
+
#encoding is source encoding unless \u escape is found
|
1794
|
+
str.utf8! if utf8
|
1795
|
+
|
1796
|
+
#maybe assign string fragments encodings if running under >=1.9?
|
1797
|
+
|
1798
|
+
return str
|
1799
|
+
end
|
1800
|
+
|
1801
|
+
#-----------------------------------
|
1802
|
+
def regex(ch=nil)
|
1803
|
+
result=super
|
1804
|
+
named_brs=[]
|
1805
|
+
if result.elems.size==1 and String===result.elems.first
|
1806
|
+
elem=result.elems.first
|
1807
|
+
index=0
|
1808
|
+
while index=elem.index(/(#{EVEN_BS_S})( \(\?[<'] | \(\?\# | \[ )/xo,index)
|
1809
|
+
index+=$1.size
|
1810
|
+
case $2
|
1811
|
+
when "(?<"
|
1812
|
+
index=elem.index(/\G...(#{LCLETTER}#{LETTER_DIGIT}+)>/o,index)
|
1813
|
+
break lexerror(result, "malformed named backreference") unless index
|
1814
|
+
index+=$&.size
|
1815
|
+
named_brs<<$1
|
1816
|
+
when "(?'"
|
1817
|
+
index=elem.index(/\G...(#{LCLETTER}#{LETTER_DIGIT}+)'/o,index)
|
1818
|
+
break lexerror(result, "malformed named backreference") unless index
|
1819
|
+
index+=$&.size
|
1820
|
+
named_brs<<$1
|
1821
|
+
when "(?#"
|
1822
|
+
index+=3
|
1823
|
+
index=elem.index(/#{EVEN_BS_S}\)/o,index)
|
1824
|
+
break lexerror(result, "unterminated regexp comment") unless index
|
1825
|
+
index+=$&.size
|
1826
|
+
when "["
|
1827
|
+
index+=1
|
1828
|
+
paren_ctr=1
|
1829
|
+
loop do
|
1830
|
+
index=elem.index(/#{EVEN_BS_S}(&&\[\^|\])/o,index)
|
1831
|
+
break lexerror(result, "unterminated character class") unless index
|
1832
|
+
index+=$&.size
|
1833
|
+
if $1==']'
|
1834
|
+
paren_ctr-=1
|
1835
|
+
break if paren_ctr==0
|
1836
|
+
else
|
1837
|
+
paren_ctr+=1
|
1838
|
+
end
|
1839
|
+
end
|
1840
|
+
break unless index
|
1841
|
+
|
1842
|
+
end
|
1843
|
+
end
|
1844
|
+
result.lvars= named_brs unless named_brs.empty?
|
1845
|
+
end
|
1846
|
+
return result
|
1847
|
+
end
|
1848
|
+
|
1849
|
+
def build_method_operators
|
1850
|
+
/#{RUBYSYMOPERATORREX}|\A![=~@]?|\A`/o
|
1851
|
+
end
|
1852
|
+
|
1853
|
+
include RubyLexer::NestedContexts
|
1854
|
+
|
1855
|
+
def semicolon_in_block_param_list?
|
1856
|
+
ParenedParamListLhsContext===@parsestack.last ||
|
1857
|
+
BlockParamListLhsContext===@parsestack.last
|
1858
|
+
end
|
1859
|
+
|
1860
|
+
def is__ENCODING__keyword?(name)
|
1861
|
+
"__ENCODING__"==name
|
1862
|
+
end
|
1863
|
+
|
1864
|
+
#-----------------------------------
|
1865
|
+
def colon_operator tok
|
1866
|
+
if TernaryContext===@parsestack.last
|
1867
|
+
tok.ternary=true
|
1868
|
+
@parsestack.pop #should be in the context's see handler
|
1869
|
+
end
|
1870
|
+
end
|
1871
|
+
|
1872
|
+
def maybe_end_stabby_block_param_list(tokch)
|
1873
|
+
stabby_params_just_ended=false
|
1874
|
+
(@parsestack.size-1).downto(1){|i|
|
1875
|
+
case @parsestack[i]
|
1876
|
+
when ParamListContextNoParen,AssignmentRhsContext
|
1877
|
+
#do nothing yet... see if inside a UnparenedParamListLhsContext
|
1878
|
+
when UnparenedParamListLhsContext #stabby proc
|
1879
|
+
@moretokens<<tokch
|
1880
|
+
(@parsestack.size-1).downto(i){|j|
|
1881
|
+
@moretokens.unshift @parsestack[j].endtoken(input_position-1)
|
1882
|
+
}
|
1883
|
+
@parsestack[i..-1]=[]
|
1884
|
+
tokch=@moretokens.shift
|
1885
|
+
stabby_params_just_ended=true
|
1886
|
+
break
|
1887
|
+
else break
|
1888
|
+
end
|
1889
|
+
}
|
1890
|
+
return stabby_params_just_ended,tokch
|
1891
|
+
end
|
1892
|
+
end #module RubyLexer1_9
|
1893
|
+
|
1894
|
+
def semicolon_in_block_param_list?; end
|
1895
|
+
def is__ENCODING__keyword?(name); end
|
1436
1896
|
|
1437
1897
|
def _keyword_funclike(str,offset,result)
|
1438
1898
|
if @last_operative_token===/^(\.|::)$/
|
@@ -1492,7 +1952,7 @@ private
|
|
1492
1952
|
#-----------------------------------
|
1493
1953
|
def block_param_list_lookahead starter=?|, ctx_type=BlockParamListLhsContext
|
1494
1954
|
safe_recurse{ |la|
|
1495
|
-
set_last_token KeywordToken.new(
|
1955
|
+
set_last_token KeywordToken.new( ';' )
|
1496
1956
|
a=ignored_tokens
|
1497
1957
|
|
1498
1958
|
if eat_next_if(starter)
|
@@ -1540,7 +2000,7 @@ end
|
|
1540
2000
|
elsif starter==?(
|
1541
2001
|
ctx_type=UnparenedParamListLhsContext #hacky... should be a param?
|
1542
2002
|
@parsestack.push ctx_type.new(@linenum)
|
1543
|
-
a<<
|
2003
|
+
a<<ImplicitParamListStartToken.new( input_position )
|
1544
2004
|
end
|
1545
2005
|
|
1546
2006
|
set_last_token KeywordToken.new( ';' )
|
@@ -1622,7 +2082,7 @@ end
|
|
1622
2082
|
set_last_token KeywordToken.new( ',' )#hack
|
1623
2083
|
nextvar=nil
|
1624
2084
|
loop do
|
1625
|
-
expect_name=(
|
2085
|
+
expect_name=(/^[,;]$/===@last_operative_token.ident and
|
1626
2086
|
normal_comma_level==@parsestack.size)
|
1627
2087
|
expect_name and @defining_lvar||=true
|
1628
2088
|
result << tok=get1token
|
@@ -1697,7 +2157,7 @@ end
|
|
1697
2157
|
(@last_operative_token===/^(return|next|break)$/ and KeywordToken===@last_operative_token)
|
1698
2158
|
result=quadriop(ch)
|
1699
2159
|
if want_unary
|
1700
|
-
#readahead(2)[1..1][/[
|
2160
|
+
#readahead(2)[1..1][/[#@@WSCHARS#\\]/o] or #not needed?
|
1701
2161
|
assert OperatorToken===result
|
1702
2162
|
result.tag=:unary #result should distinguish unary+binary *&
|
1703
2163
|
WHSPLF[nextchar.chr] or
|
@@ -1724,13 +2184,15 @@ end
|
|
1724
2184
|
def char_literal_or_op(ch)
|
1725
2185
|
if colon_quote_expected? ch
|
1726
2186
|
getchar
|
1727
|
-
if @rubyversion >= 1.9
|
1728
|
-
StringToken.new getchar_maybe_escape
|
1729
|
-
else
|
2187
|
+
# if @rubyversion >= 1.9
|
2188
|
+
# assign_encoding! StringToken.new getchar_maybe_escape
|
2189
|
+
# else
|
1730
2190
|
ch=getchar_maybe_escape[0]
|
1731
2191
|
ch=ch.ord if ch.respond_to? :ord
|
1732
|
-
NumberToken.new ch
|
1733
|
-
|
2192
|
+
result=NumberToken.new ch
|
2193
|
+
result.char_literal=true
|
2194
|
+
return result
|
2195
|
+
# end
|
1734
2196
|
else
|
1735
2197
|
@parsestack.push TernaryContext.new(@linenum)
|
1736
2198
|
KeywordToken.new getchar #operator
|
@@ -1747,7 +2209,7 @@ end
|
|
1747
2209
|
end
|
1748
2210
|
|
1749
2211
|
if !op and after_nonid_op?{
|
1750
|
-
!is_var_name? and WHSPLF[prevchar] and !readahead(2)[%r{^/[
|
2212
|
+
!is_var_name? and WHSPLF[prevchar] and !readahead(2)[%r{^/[#@@WSCHARS=]}o]
|
1751
2213
|
} || (KeywordToken===@last_token_maybe_implicit and @last_token_maybe_implicit.ident=="(")
|
1752
2214
|
return regex(ch)
|
1753
2215
|
else #/ is operator
|
@@ -1772,7 +2234,7 @@ end
|
|
1772
2234
|
when /^[@$]/; true
|
1773
2235
|
when /^<</; HerePlaceholderToken===tok
|
1774
2236
|
when /(?!#@@LETTER_DIGIT).$/o; false
|
1775
|
-
# when /^#@@LCLETTER/o; localvars===s or VARLIKE_KEYWORDS===s
|
2237
|
+
# when /^#@@LCLETTER/o; localvars===s or @VARLIKE_KEYWORDS===s
|
1776
2238
|
when /^#@@LETTER/o; VarNameToken===tok
|
1777
2239
|
else raise "not var or method name: #{s}"
|
1778
2240
|
end
|
@@ -1781,7 +2243,7 @@ end
|
|
1781
2243
|
#-----------------------------------
|
1782
2244
|
def colon_quote_expected?(ch) #yukko hack
|
1783
2245
|
assert ':?'[ch]
|
1784
|
-
readahead(2)[/^(\?[^#{WHSPLF}]|:[
|
2246
|
+
readahead(2)[/^(\?[^#{WHSPLF}]|:[^#@@WSCHARS :])$/o] or return false
|
1785
2247
|
|
1786
2248
|
after_nonid_op? {
|
1787
2249
|
#possible func-call as operator
|
@@ -1804,42 +2266,40 @@ end
|
|
1804
2266
|
lastchar=prevchar
|
1805
2267
|
eat_next_if(ch[0]) or raise "needed: "+ch
|
1806
2268
|
|
1807
|
-
if nextchar==?( and @enable_macro
|
2269
|
+
if nextchar==?( and @enable_macro #factored
|
1808
2270
|
result= OperatorToken.new(':', startpos)
|
1809
2271
|
result.unary=true
|
1810
2272
|
return result
|
1811
2273
|
end
|
1812
2274
|
|
1813
2275
|
#handle quoted symbols like :"foobar", :"[]"
|
1814
|
-
qe
|
2276
|
+
if qe
|
2277
|
+
return symbol(':')
|
2278
|
+
elsif eat_next_if(?:)
|
2279
|
+
#we definately found a ::
|
2280
|
+
|
2281
|
+
colon2=KeywordToken.new( '::',startpos)
|
2282
|
+
lasttok=@last_operative_token
|
2283
|
+
assert !(String===lasttok)
|
2284
|
+
if (VarNameToken===lasttok or MethNameToken===lasttok) and
|
2285
|
+
lasttok===/^(?:[$@]|#@@LETTER)/o and !WHSPCHARS[lastchar]
|
2286
|
+
then
|
2287
|
+
@moretokens << colon2
|
2288
|
+
result= NoWsToken.new(startpos)
|
2289
|
+
else
|
2290
|
+
result=colon2
|
2291
|
+
end
|
2292
|
+
dot_rhs(colon2)
|
2293
|
+
return result
|
1815
2294
|
|
1816
|
-
#
|
1817
|
-
|
2295
|
+
#return single : token
|
2296
|
+
else
|
1818
2297
|
#cancel implicit contexts...
|
1819
|
-
@moretokens.push(*abort_noparens!(':'))
|
2298
|
+
@moretokens.push(*abort_noparens!(':')) #special treatment not needed in 1.9 mode?
|
1820
2299
|
@moretokens.push tok=KeywordToken.new(':',startpos)
|
1821
2300
|
|
1822
|
-
|
1823
|
-
when TernaryContext
|
1824
|
-
tok.ternary=true
|
1825
|
-
@parsestack.pop #should be in the context's see handler
|
1826
|
-
when ExpectDoOrNlContext #should be in the context's see handler
|
1827
|
-
if @rubyversion<1.9
|
1828
|
-
@parsestack.pop
|
1829
|
-
assert @parsestack.last.starter[/^(while|until|for)$/]
|
1830
|
-
tok.as=";"
|
1831
|
-
end
|
1832
|
-
when ExpectThenOrNlContext,WhenParamListContext
|
1833
|
-
if @rubyversion<1.9
|
1834
|
-
#should be in the context's see handler
|
1835
|
-
@parsestack.pop
|
1836
|
-
tok.as="then"
|
1837
|
-
end
|
1838
|
-
when RescueSMContext
|
1839
|
-
tok.as=";"
|
1840
|
-
end or
|
2301
|
+
colon_operator(tok) or
|
1841
2302
|
fail ": not expected in #{@parsestack.last.class}->#{@parsestack.last.starter}"
|
1842
|
-
|
1843
2303
|
|
1844
2304
|
#end ternary context, if any
|
1845
2305
|
@parsestack.last.see self,:colon
|
@@ -1847,21 +2307,25 @@ end
|
|
1847
2307
|
return @moretokens.shift
|
1848
2308
|
end
|
1849
2309
|
|
1850
|
-
|
1851
|
-
|
1852
|
-
|
1853
|
-
|
1854
|
-
|
1855
|
-
|
1856
|
-
|
1857
|
-
|
1858
|
-
|
1859
|
-
|
1860
|
-
|
1861
|
-
|
1862
|
-
|
1863
|
-
|
1864
|
-
|
2310
|
+
end
|
2311
|
+
|
2312
|
+
#-----------------------------------
|
2313
|
+
def colon_operator tok
|
2314
|
+
case @parsestack.last
|
2315
|
+
when TernaryContext
|
2316
|
+
tok.ternary=true
|
2317
|
+
@parsestack.pop #should be in the context's see handler
|
2318
|
+
when ExpectDoOrNlContext #should be in the context's see handler
|
2319
|
+
@parsestack.pop
|
2320
|
+
assert @parsestack.last.starter[/^(while|until|for)$/]
|
2321
|
+
tok.as=";"
|
2322
|
+
when ExpectThenOrNlContext,WhenParamListContext
|
2323
|
+
#should be in the context's see handler
|
2324
|
+
@parsestack.pop
|
2325
|
+
tok.as="then"
|
2326
|
+
when RescueSMContext
|
2327
|
+
tok.as=";"
|
2328
|
+
end
|
1865
2329
|
end
|
1866
2330
|
|
1867
2331
|
#-----------------------------------
|
@@ -1883,13 +2347,14 @@ end
|
|
1883
2347
|
assert notbare
|
1884
2348
|
open=":'"; close="'"
|
1885
2349
|
single_quote("'")
|
1886
|
-
when ?` then read(1) #`
|
2350
|
+
# when ?` then read(1) #`
|
1887
2351
|
when ?@ then at_identifier.to_s
|
1888
2352
|
when ?$ then dollar_identifier.to_s
|
1889
2353
|
when ?_,?a..?z,NONASCII then identifier_as_string(?:)
|
1890
2354
|
when ?A..?Z then
|
1891
2355
|
result=identifier_as_string(?:)
|
1892
2356
|
if @last_operative_token==='::'
|
2357
|
+
fail #i think this can't happen anymore now
|
1893
2358
|
assert klass==MethNameToken
|
1894
2359
|
/#@@LETTER_DIGIT$/o===result and klass=VarNameToken
|
1895
2360
|
end
|
@@ -1919,13 +2384,13 @@ end
|
|
1919
2384
|
opmatches=readahead(3)[@method_operators]
|
1920
2385
|
return [read(opmatches.size), start] if opmatches
|
1921
2386
|
case nc=nextchar
|
1922
|
-
when ?` #`
|
1923
|
-
return [read(1),start]
|
2387
|
+
# when ?` #`
|
2388
|
+
# return [read(1),start]
|
1924
2389
|
when ?_,?a..?z,?A..?Z,NONASCII
|
1925
2390
|
context=merge_assignment_op_in_setter_callsites? ? ?: : nc
|
1926
2391
|
return [identifier_as_string(context), start]
|
1927
2392
|
when ?(
|
1928
|
-
return [nil,start] if @enable_macro or @rubyversion>=1.9
|
2393
|
+
return [nil,start] if @enable_macro or @rubyversion>=1.9 #factored
|
1929
2394
|
end
|
1930
2395
|
|
1931
2396
|
set_last_token KeywordToken.new(';')
|
@@ -1942,16 +2407,17 @@ end
|
|
1942
2407
|
if quote
|
1943
2408
|
ender=til_charset(/[#{quote}]/)
|
1944
2409
|
(quote==getchar) or
|
1945
|
-
return lexerror(HerePlaceholderToken.new( dash, quote, ender ), "mismatched quotes in here doc")
|
2410
|
+
return lexerror(res=HerePlaceholderToken.new( dash, quote, ender ), "mismatched quotes in here doc")
|
1946
2411
|
quote_real=true
|
1947
2412
|
else
|
1948
2413
|
quote='"'
|
1949
2414
|
ender=@file.scan(/#@@LETTER_DIGIT+/o)
|
1950
2415
|
ender.length >= 1 or
|
1951
|
-
return lexerror(HerePlaceholderToken.new( dash, quote, ender, nil ), "invalid here header")
|
2416
|
+
return lexerror(res=HerePlaceholderToken.new( dash, quote, ender, nil ), "invalid here header")
|
1952
2417
|
end
|
1953
2418
|
|
1954
2419
|
res= HerePlaceholderToken.new( dash, quote, ender, quote_real )
|
2420
|
+
res.line=linenum
|
1955
2421
|
if true
|
1956
2422
|
res.open=["<<",dash,quote,ender,quote].join
|
1957
2423
|
procrastinated=til_charset(/[\n]/)#+readnl
|
@@ -1982,14 +2448,15 @@ if true
|
|
1982
2448
|
#in order to keep offsets correct in the long term
|
1983
2449
|
#(at present, offsets and line numbers between
|
1984
2450
|
#here header and its body will be wrong. but they should re-sync thereafter.)
|
1985
|
-
newpos=input_position_raw
|
2451
|
+
newpos=input_position_raw
|
1986
2452
|
#unless procrastinated.empty?
|
1987
|
-
@file.modify(newpos,
|
2453
|
+
@file.modify(newpos,0,procrastinated) #vomit procrastinated text back onto input
|
1988
2454
|
#end
|
2455
|
+
#@offset_adjust2=-1 #nice idea, but crashes 1.9.2 and causes more warnings than it fixes... :(
|
1989
2456
|
input_position_set newpos
|
1990
2457
|
|
1991
2458
|
#line numbers would be wrong within the procrastinated section
|
1992
|
-
@linenum-=1
|
2459
|
+
@linenum=res.line #was: @linenum-=1
|
1993
2460
|
|
1994
2461
|
#be nice to get the here body token at the right place in input, too...
|
1995
2462
|
@pending_here_bodies<< body
|
@@ -2038,6 +2505,8 @@ else
|
|
2038
2505
|
#the rest of the here token is read after a
|
2039
2506
|
#newline has been seen and res.affix is eventually called
|
2040
2507
|
end
|
2508
|
+
ensure
|
2509
|
+
assign_encoding!(res.string) if res
|
2041
2510
|
end
|
2042
2511
|
|
2043
2512
|
#-----------------------------------
|
@@ -2073,13 +2542,13 @@ end
|
|
2073
2542
|
@base_file.pos=@file.pos
|
2074
2543
|
@file=@base_file
|
2075
2544
|
@base_file=nil
|
2076
|
-
result="\n"
|
2545
|
+
# result="\n"
|
2077
2546
|
end
|
2078
2547
|
|
2079
2548
|
@offset_adjust=@min_offset_adjust
|
2080
2549
|
@moretokens.push( *optional_here_bodies )
|
2081
2550
|
ln=@linenum
|
2082
|
-
@moretokens.push lexerror(EscNlToken.new(result,input_position-result.size,@filename,ln
|
2551
|
+
@moretokens.push lexerror(EscNlToken.new(result,input_position-result.size,@filename,ln), error),
|
2083
2552
|
FileAndLineToken.new(@filename,ln,input_position)
|
2084
2553
|
|
2085
2554
|
start_of_line_directives
|
@@ -2091,17 +2560,20 @@ end
|
|
2091
2560
|
def optional_here_bodies
|
2092
2561
|
result=[]
|
2093
2562
|
if true
|
2094
|
-
|
2095
|
-
|
2096
|
-
|
2563
|
+
#handle here bodies queued up by previous line
|
2564
|
+
pos=input_position
|
2565
|
+
while body=@pending_here_bodies.shift
|
2097
2566
|
#body.offset=pos
|
2098
|
-
result.push EscNlToken.new("\n",body.offset-1,@filename
|
2099
|
-
result.push FileAndLineToken.new(@filename,body.
|
2567
|
+
result.push EscNlToken.new("\n",body.offset-1,@filename,@linenum)
|
2568
|
+
result.push FileAndLineToken.new(@filename,@linenum,body.offset)
|
2100
2569
|
result.push body
|
2101
2570
|
#result.push NoWsToken.new @pending_here_bodies.empty? ? input_position : @pending_here_bodies.first
|
2102
2571
|
#result.push FileAndLineToken.new(@filename,@linenum,pos) #position and line num are off
|
2103
|
-
body.
|
2104
|
-
|
2572
|
+
@linenum+=body.linecount
|
2573
|
+
body.endline=@linenum-1
|
2574
|
+
# body.startline=@linenum-1-body.linecount
|
2575
|
+
end
|
2576
|
+
|
2105
2577
|
else
|
2106
2578
|
#...(we should be more compatible with dos/mac style newlines...)
|
2107
2579
|
while tofill=@incomplete_here_tokens.shift
|
@@ -2122,6 +2594,7 @@ end
|
|
2122
2594
|
tofill.string.offset= input_position
|
2123
2595
|
linecount=1 #for terminator
|
2124
2596
|
assert("\n"==prevchar)
|
2597
|
+
startline=@linenum
|
2125
2598
|
loop {
|
2126
2599
|
assert("\n"==prevchar)
|
2127
2600
|
|
@@ -2137,8 +2610,10 @@ end
|
|
2137
2610
|
end
|
2138
2611
|
if read(tofill.ender.size)==tofill.ender
|
2139
2612
|
crs=til_charset(/[^\r]/)||''
|
2140
|
-
|
2141
|
-
|
2613
|
+
nl=nextchar
|
2614
|
+
if !nl or nl==?\n
|
2615
|
+
close+=tofill.ender+crs
|
2616
|
+
close+="\n" if nl
|
2142
2617
|
break
|
2143
2618
|
end
|
2144
2619
|
end
|
@@ -2223,11 +2698,22 @@ end
|
|
2223
2698
|
tofill.close=close
|
2224
2699
|
result.close=str.close=close[1..-1]
|
2225
2700
|
result.offset=str.offset
|
2701
|
+
result.endline=@linenum-1
|
2702
|
+
result.startline=startline
|
2226
2703
|
assert str.open
|
2227
2704
|
assert str.close
|
2228
2705
|
return result
|
2229
2706
|
end
|
2230
2707
|
|
2708
|
+
#-----------------------------------
|
2709
|
+
def want_hard_nl?
|
2710
|
+
NewlineToken===@last_operative_token || #hack
|
2711
|
+
(KeywordToken===@last_operative_token and
|
2712
|
+
@last_operative_token.ident=="rescue" and
|
2713
|
+
!@last_operative_token.infix?) ||
|
2714
|
+
!after_nonid_op?{false}
|
2715
|
+
end
|
2716
|
+
|
2231
2717
|
#-----------------------------------
|
2232
2718
|
def newline(ch)
|
2233
2719
|
assert("\r\n"[nextchar.chr])
|
@@ -2237,16 +2723,9 @@ end
|
|
2237
2723
|
assert !@parsestack.empty?
|
2238
2724
|
assert @moretokens.empty?
|
2239
2725
|
|
2240
|
-
|
2241
|
-
pre.allow_ooo_offset=true
|
2726
|
+
hard=want_hard_nl?
|
2242
2727
|
|
2243
|
-
hard=
|
2244
|
-
(KeywordToken===@last_operative_token and
|
2245
|
-
@last_operative_token.ident=="rescue" and
|
2246
|
-
!@last_operative_token.infix?) ||
|
2247
|
-
!after_nonid_op?{false}
|
2248
|
-
|
2249
|
-
hard=false if @rubyversion>=1.9 and @file.check( /\A\n(?:#@@WSTOKS)?[.:][^.:]/o )
|
2728
|
+
# hard=false if @rubyversion>=1.9 and @file.check( /\A\n(?:#@@WSTOKS)?[.:][^.:]/o )
|
2250
2729
|
|
2251
2730
|
if hard
|
2252
2731
|
@offset_adjust=@min_offset_adjust
|
@@ -2259,13 +2738,15 @@ end
|
|
2259
2738
|
@parsestack.last.see self,:semi
|
2260
2739
|
|
2261
2740
|
a << rulexer_newline(ch)
|
2262
|
-
|
2741
|
+
a+=@moretokens
|
2742
|
+
@moretokens.replace a
|
2263
2743
|
else
|
2264
2744
|
@offset_adjust=@min_offset_adjust
|
2265
2745
|
offset= input_position
|
2266
2746
|
nl=readnl
|
2267
|
-
|
2268
|
-
FileAndLineToken.new(@filename,@linenum,input_position)
|
2747
|
+
a=[EscNlToken.new(nl,offset,@filename,@linenum),
|
2748
|
+
FileAndLineToken.new(@filename,@linenum,input_position)]
|
2749
|
+
@moretokens.push( *a )
|
2269
2750
|
end
|
2270
2751
|
|
2271
2752
|
#optimization: when thru with regurgitated text from a here document,
|
@@ -2284,23 +2765,13 @@ end
|
|
2284
2765
|
|
2285
2766
|
@moretokens.unshift(*optional_here_bodies)
|
2286
2767
|
|
2287
|
-
#adjust line
|
2288
|
-
|
2289
|
-
|
2290
|
-
|
2291
|
-
|
2292
|
-
|
2293
|
-
pre_fal=true
|
2294
|
-
fal.line-=@moretokens[i].linecount
|
2295
|
-
|
2296
|
-
i-=1
|
2297
|
-
end
|
2768
|
+
#adjust line #s to account for newlines in here bodys
|
2769
|
+
l=@linenum
|
2770
|
+
a.reverse_each{|implicit|
|
2771
|
+
implicit.endline=l
|
2772
|
+
l-=1 if EscNlToken===implicit or NewlineToken===implicit
|
2773
|
+
}
|
2298
2774
|
|
2299
|
-
if pre_fal
|
2300
|
-
result=@moretokens.first
|
2301
|
-
pre.offset=result.offset
|
2302
|
-
@moretokens.unshift pre
|
2303
|
-
end
|
2304
2775
|
start_of_line_directives
|
2305
2776
|
|
2306
2777
|
result=@moretokens.shift
|
@@ -2317,6 +2788,7 @@ end
|
|
2317
2788
|
def start_of_line_directives
|
2318
2789
|
#handle =begin...=end (at start of a line)
|
2319
2790
|
while EQBEGIN===readahead(EQBEGINLENGTH)
|
2791
|
+
startline=@linenum
|
2320
2792
|
startpos= input_position
|
2321
2793
|
more= read(EQBEGINLENGTH-1) #get =begin
|
2322
2794
|
|
@@ -2337,8 +2809,10 @@ end
|
|
2337
2809
|
# @linenum+= newls.size
|
2338
2810
|
|
2339
2811
|
#inject the fresh comment into future token results
|
2340
|
-
|
2341
|
-
|
2812
|
+
comment=IgnoreToken.new(more,startpos)
|
2813
|
+
comment.startline=startline
|
2814
|
+
comment.endline=@linenum
|
2815
|
+
@moretokens.push comment, FileAndLineToken.new(@filename,@linenum,input_position)
|
2342
2816
|
end
|
2343
2817
|
|
2344
2818
|
#handle __END__
|
@@ -2353,11 +2827,13 @@ end
|
|
2353
2827
|
|
2354
2828
|
#-----------------------------------
|
2355
2829
|
#used to resolve the ambiguity of
|
2356
|
-
# unary ops (+, -, *, &,
|
2830
|
+
# unary ops (+, -, *, &, (and ^ if macros enabled) ) in ruby
|
2357
2831
|
#returns whether current token is to be the start of a literal
|
2358
2832
|
IDBEGINCHAR=/^(?:#@@LETTER|[$@])/o
|
2359
2833
|
def unary_op_expected?(ch) #yukko hack
|
2360
|
-
|
2834
|
+
|
2835
|
+
#not unary if its anything followed by = or &/* followed by themselves
|
2836
|
+
return false if /^(?:.=|([&*])\1)$/===readahead(2)
|
2361
2837
|
|
2362
2838
|
return true if KeywordToken===@last_operative_token and @last_operative_token==='for'
|
2363
2839
|
|
@@ -2397,12 +2873,12 @@ end
|
|
2397
2873
|
# <<, %, /, ?, :, and newline (among others) in ruby
|
2398
2874
|
def after_nonid_op?
|
2399
2875
|
|
2400
|
-
#this is how it should be, I think, and then no handlers for methnametoken and FUNCLIKE_KEYWORDS are needed
|
2876
|
+
#this is how it should be, I think, and then no handlers for methnametoken and @FUNCLIKE_KEYWORDS are needed
|
2401
2877
|
# if ImplicitParamListStartToken===@last_token_including_implicit
|
2402
2878
|
# huh return true
|
2403
2879
|
# end
|
2404
2880
|
case @last_operative_token
|
2405
|
-
when VarNameToken , MethNameToken, FUNCLIKE_KEYWORDS.token_pat
|
2881
|
+
when VarNameToken , MethNameToken, @FUNCLIKE_KEYWORDS.token_pat
|
2406
2882
|
#VarNameToken should really be left out of this case...
|
2407
2883
|
#should be in next branch instread
|
2408
2884
|
#callers all check for last token being not a variable if they pass anything
|
@@ -2411,7 +2887,7 @@ end
|
|
2411
2887
|
return yield
|
2412
2888
|
when StringToken, SymbolToken, NumberToken, HerePlaceholderToken,
|
2413
2889
|
%r{^(
|
2414
|
-
end|self|true|false|nil
|
2890
|
+
end|self|true|false|nil|
|
2415
2891
|
__FILE__|__LINE__|__ENCODING__|[\})\]]
|
2416
2892
|
)$}x.token_pat
|
2417
2893
|
#dunno about def/undef
|
@@ -2425,7 +2901,7 @@ end
|
|
2425
2901
|
return true if OperatorToken===@last_operative_token || KeywordToken===@last_operative_token
|
2426
2902
|
when NewlineToken, nil, #nil means we're still at beginning of file
|
2427
2903
|
/^([({\[]|or|not|and|if|unless|then|elsif|else|class|module|def|
|
2428
|
-
while|until|begin|for|in|case|when|ensure|defined
|
2904
|
+
while|until|begin|for|in|case|when|ensure|defined\?|->)$
|
2429
2905
|
/x.token_pat
|
2430
2906
|
return true
|
2431
2907
|
when KeywordToken
|
@@ -2478,7 +2954,7 @@ end
|
|
2478
2954
|
|
2479
2955
|
#-----------------------------------
|
2480
2956
|
def caret(ch) #match /^=?/ (^ or ^=) (maybe unary ^ too)
|
2481
|
-
if @enable_macro and (@last_token_maybe_implicit and
|
2957
|
+
if @enable_macro and (@last_token_maybe_implicit and #factored
|
2482
2958
|
@last_token_maybe_implicit.ident=='(') || unary_op_expected?(ch)
|
2483
2959
|
result=OperatorToken.new(read(1),input_position)
|
2484
2960
|
result.unary=true
|
@@ -2533,15 +3009,15 @@ end
|
|
2533
3009
|
/^(return|break|next)$/===@last_operative_token.ident
|
2534
3010
|
if (?0..?9)===readahead(2)[1]
|
2535
3011
|
result= number(ch)
|
2536
|
-
elsif @rubyversion>=1.9 and '->' == readahead(2) #stabby proc
|
2537
|
-
@file.pos+=2
|
2538
|
-
#push down block context
|
2539
|
-
localvars.start_block
|
2540
|
-
@parsestack.push ctx=BlockContext.new(@linenum)
|
2541
|
-
ctx.wanting_stabby_block_body=true
|
2542
|
-
#read optional proc params
|
2543
|
-
block_param_list_lookahead ?(, ParenedParamListLhsContext
|
2544
|
-
result=KeywordToken.new('->',pos)
|
3012
|
+
# elsif @rubyversion>=1.9 and '->' == readahead(2) #stabby proc
|
3013
|
+
# @file.pos+=2
|
3014
|
+
# #push down block context
|
3015
|
+
# localvars.start_block
|
3016
|
+
# @parsestack.push ctx=BlockContext.new(@linenum)
|
3017
|
+
# ctx.wanting_stabby_block_body=true
|
3018
|
+
# #read optional proc params
|
3019
|
+
# block_param_list_lookahead ?(, ParenedParamListLhsContext
|
3020
|
+
# result=KeywordToken.new('->',pos)
|
2545
3021
|
|
2546
3022
|
else #unary operator
|
2547
3023
|
result=getchar
|
@@ -2581,20 +3057,20 @@ end
|
|
2581
3057
|
result=@moretokens.shift
|
2582
3058
|
end
|
2583
3059
|
@parsestack.last.see self,:arrow
|
2584
|
-
when '~' # =~... after regex, maybe?
|
2585
|
-
last=last_operative_token
|
2586
|
-
|
2587
|
-
if @rubyversion>=1.9 and StringToken===last and last.lvars
|
2588
|
-
#ruby delays adding lvars from regexps to known lvars table
|
2589
|
-
#for several tokens in some cases. not sure why or if on purpose
|
2590
|
-
#i'm just going to add them right away
|
2591
|
-
last.lvars.each{|lvar| localvars[lvar]=true }
|
2592
|
-
end
|
3060
|
+
# when '~' # =~... after regex, maybe?
|
3061
|
+
# last=last_operative_token
|
3062
|
+
#
|
3063
|
+
# if @rubyversion>=1.9 and StringToken===last and last.lvars
|
3064
|
+
# #ruby delays adding lvars from regexps to known lvars table
|
3065
|
+
# #for several tokens in some cases. not sure why or if on purpose
|
3066
|
+
# #i'm just going to add them right away
|
3067
|
+
# last.lvars.each{|lvar| localvars[lvar]=true }
|
3068
|
+
# end
|
2593
3069
|
when '' #plain assignment: record local variable definitions
|
2594
3070
|
last_context_not_implicit.lhs=false
|
2595
3071
|
@last_operative_token=result
|
2596
3072
|
@moretokens.push( *ignored_tokens(true).map{|x|
|
2597
|
-
NewlineToken===x ? EscNlToken.new(x.ident,x.offset
|
3073
|
+
NewlineToken===x ? EscNlToken.new(x.ident,x.offset,x.filename,x.linenum) : x
|
2598
3074
|
} )
|
2599
3075
|
@parsestack.push AssignmentRhsContext.new(@linenum)
|
2600
3076
|
@moretokens.push AssignmentRhsListStartToken.new( input_position)
|
@@ -2621,14 +3097,13 @@ end
|
|
2621
3097
|
else
|
2622
3098
|
@moretokens << NoWsToken.new(input_position)
|
2623
3099
|
end
|
2624
|
-
ty=
|
3100
|
+
ty=OperatorToken
|
2625
3101
|
result=ty.new(result, input_position-result.size)
|
2626
3102
|
result.unary=!k #result should distinguish unary !
|
2627
3103
|
|
2628
3104
|
return result
|
2629
3105
|
end
|
2630
3106
|
|
2631
|
-
|
2632
3107
|
#-----------------------------------
|
2633
3108
|
def dot(ch)
|
2634
3109
|
str=''
|
@@ -2636,11 +3111,11 @@ end
|
|
2636
3111
|
|
2637
3112
|
#three lumps of sugar or two?
|
2638
3113
|
eat_next_if(?.) and
|
2639
|
-
return
|
3114
|
+
return OperatorToken.new(eat_next_if(?.)? "..." : "..")
|
2640
3115
|
|
2641
3116
|
#else saw just single .
|
2642
3117
|
#match a valid ruby id after the dot
|
2643
|
-
result= KeywordToken.new( ".")
|
3118
|
+
result= KeywordToken.new( "." )
|
2644
3119
|
dot_rhs(result)
|
2645
3120
|
return result
|
2646
3121
|
end
|
@@ -2651,7 +3126,10 @@ end
|
|
2651
3126
|
aa= ignored_tokens
|
2652
3127
|
was=after_nonid_op?{true}
|
2653
3128
|
tok,pos=callsite_symbol(prevtok)
|
2654
|
-
|
3129
|
+
if tok
|
3130
|
+
toks=var_or_meth_name(tok,prevtok,pos,was)
|
3131
|
+
aa.push(*toks)
|
3132
|
+
end
|
2655
3133
|
a.unshift(*aa)
|
2656
3134
|
}
|
2657
3135
|
end
|
@@ -2692,8 +3170,20 @@ if false
|
|
2692
3170
|
return IgnoreToken.new(result)
|
2693
3171
|
end
|
2694
3172
|
end
|
3173
|
+
|
3174
|
+
#-----------------------------------
|
3175
|
+
def method_params?
|
3176
|
+
lasttok=last_token_maybe_implicit #last_operative_token
|
3177
|
+
VarNameToken===lasttok or
|
3178
|
+
MethNameToken===lasttok or
|
3179
|
+
lasttok===@FUNCLIKE_KEYWORDS or
|
3180
|
+
(@enable_macro and lasttok and lasttok.ident==')') #factored
|
3181
|
+
end
|
3182
|
+
|
2695
3183
|
#-----------------------------------
|
2696
3184
|
def open_brace(ch)
|
3185
|
+
#there are 3 distinct cases here; this method should be divided in 3
|
3186
|
+
|
2697
3187
|
assert((ch!='[' or !want_op_name))
|
2698
3188
|
assert(@moretokens.empty?)
|
2699
3189
|
lastchar=prevchar
|
@@ -2707,26 +3197,19 @@ end
|
|
2707
3197
|
# in contexts expecting an (operator) method name, we
|
2708
3198
|
# would want to match [] or []= at this point
|
2709
3199
|
#but control never comes this way in those cases... goes
|
2710
|
-
#to custom parsers for alias, undef, and def in #
|
3200
|
+
#to custom parsers for alias, undef, and def in #special_identifier?
|
2711
3201
|
tokch.set_infix! unless after_nonid_op?{WHSPLF[lastchar]}
|
2712
3202
|
@parsestack.push ListImmedContext.new(ch,@linenum)
|
2713
3203
|
lasttok=last_operative_token
|
2714
3204
|
#could be: lasttok===/^#@@LETTER/o
|
2715
3205
|
if (VarNameToken===lasttok or ImplicitParamListEndToken===lasttok or
|
2716
|
-
MethNameToken===lasttok or lasttok
|
3206
|
+
MethNameToken===lasttok or lasttok===@FUNCLIKE_KEYWORDS) and !WHSPCHARS[lastchar]
|
2717
3207
|
@moretokens << (tokch)
|
2718
3208
|
tokch= NoWsToken.new(input_position-1)
|
2719
3209
|
end
|
2720
3210
|
when '('
|
2721
|
-
lasttok=last_token_maybe_implicit #last_operative_token
|
2722
3211
|
#could be: lasttok===/^#@@LETTER/o
|
2723
|
-
method_params
|
2724
|
-
VarNameToken===lasttok or
|
2725
|
-
MethNameToken===lasttok or
|
2726
|
-
lasttok===FUNCLIKE_KEYWORDS or
|
2727
|
-
(@enable_macro and lasttok and lasttok.ident==')')
|
2728
|
-
)
|
2729
|
-
if method_params
|
3212
|
+
if method_params?
|
2730
3213
|
unless WHSPCHARS[lastchar]
|
2731
3214
|
@moretokens << tokch
|
2732
3215
|
tokch= NoWsToken.new(input_position-1)
|
@@ -2753,13 +3236,19 @@ end
|
|
2753
3236
|
#in which case below would be bad.
|
2754
3237
|
if !(UnparenedParamListLhsContext===@parsestack.last) and
|
2755
3238
|
after_nonid_op?{false} || @last_operative_token.has_no_block?
|
2756
|
-
@
|
3239
|
+
if @file.readbehind(2)=='#{'
|
3240
|
+
@parsestack.push StringInclusionContext.new(@linenum)
|
3241
|
+
else
|
3242
|
+
@parsestack.push ListImmedContext.new(ch,@linenum) #that is, a hash
|
3243
|
+
end
|
2757
3244
|
else
|
2758
3245
|
#abort_noparens!
|
2759
3246
|
tokch.set_infix!
|
2760
3247
|
tokch.as="do"
|
2761
3248
|
|
2762
3249
|
#if (perhaps deep) inside a stabby block param list context, end it
|
3250
|
+
stabby_params_just_ended,tokch=maybe_end_stabby_block_param_list(tokch)
|
3251
|
+
=begin was
|
2763
3252
|
if @rubyversion>=1.9
|
2764
3253
|
stabby_params_just_ended=false
|
2765
3254
|
(@parsestack.size-1).downto(1){|i|
|
@@ -2779,6 +3268,7 @@ end
|
|
2779
3268
|
end
|
2780
3269
|
}
|
2781
3270
|
end
|
3271
|
+
=end
|
2782
3272
|
|
2783
3273
|
# 'need to find matching callsite context and end it if implicit'
|
2784
3274
|
lasttok=last_operative_token
|
@@ -2799,6 +3289,11 @@ end
|
|
2799
3289
|
return (tokch)
|
2800
3290
|
end
|
2801
3291
|
|
3292
|
+
#-----------------------------------
|
3293
|
+
def maybe_end_stabby_block_param_list(tokch)
|
3294
|
+
return false,tokch
|
3295
|
+
end
|
3296
|
+
|
2802
3297
|
#-----------------------------------
|
2803
3298
|
def close_brace(ch)
|
2804
3299
|
ch==eat_next_if(/[)}\]]/) or raise "lexer confusion"
|
@@ -2846,12 +3341,20 @@ end
|
|
2846
3341
|
|
2847
3342
|
#-----------------------------------
|
2848
3343
|
def endoffile_detected(s='')
|
2849
|
-
@
|
3344
|
+
@linenum+=1 #optional_here_bodies expects to be called after a newline was seen and @linenum bumped
|
3345
|
+
#in this case, there is no newline, but we need to pretend there is. otherwise optional_here_bodies
|
3346
|
+
#makes tokens with wrong line numbers
|
3347
|
+
|
3348
|
+
@moretokens.concat optional_here_bodies
|
3349
|
+
@linenum-=1 #now put it back
|
3350
|
+
@moretokens.concat abort_noparens!
|
3351
|
+
@moretokens.push rulexer_endoffile_detected(s)
|
2850
3352
|
if @progress_thread
|
2851
3353
|
@progress_thread.kill
|
2852
3354
|
@progress_thread=nil
|
2853
3355
|
end
|
2854
3356
|
result= @moretokens.shift
|
3357
|
+
assert @pending_here_bodies.empty?
|
2855
3358
|
balanced_braces? or (lexerror result,"unbalanced braces at eof. parsestack=#{@parsestack.inspect}")
|
2856
3359
|
result
|
2857
3360
|
end
|
@@ -2879,8 +3382,11 @@ end
|
|
2879
3382
|
while AssignmentRhsContext===@parsestack[-1]
|
2880
3383
|
pop=
|
2881
3384
|
case @parsestack[-2]
|
2882
|
-
when ParamListContext,ParamListContextNoParen,
|
2883
|
-
ListImmedContext,AssignmentRhsContext
|
3385
|
+
when ParamListContext,ParamListContextNoParen,
|
3386
|
+
WhenParamListContext,ListImmedContext,AssignmentRhsContext,
|
3387
|
+
ParenedParamListLhsContext,UnparenedParamListLhsContext,
|
3388
|
+
BlockParamListLhsContext,KnownNestedLhsParenContext
|
3389
|
+
true
|
2884
3390
|
when RescueSMContext; @parsestack[-2].state==:rescue
|
2885
3391
|
when DefContext; !@parsestack[-2].in_body and !@parsestack[-2].has_parens?
|
2886
3392
|
else false
|
@@ -2904,7 +3410,7 @@ end
|
|
2904
3410
|
#-----------------------------------
|
2905
3411
|
def semicolon(ch)
|
2906
3412
|
assert @moretokens.empty?
|
2907
|
-
@moretokens.push(*abort_noparens!)
|
3413
|
+
@moretokens.push(*abort_noparens!(';',0))
|
2908
3414
|
@parsestack.last.see self,:semi
|
2909
3415
|
case @parsestack.last #should be in context's see:semi handler
|
2910
3416
|
when ExpectThenOrNlContext
|
@@ -2932,9 +3438,54 @@ end
|
|
2932
3438
|
#-----------------------------------
|
2933
3439
|
#tokenify_results_of :identifier
|
2934
3440
|
save_offsets_in(*CHARMAPPINGS.values.uniq-[
|
2935
|
-
:symbol_or_op,:open_brace,:whitespace,:exclam,:
|
3441
|
+
:symbol_or_op,:open_brace,:whitespace,:exclam,:caret,:plusminus
|
2936
3442
|
])
|
3443
|
+
save_linenums_in :symbol_or_op,:open_brace,:whitespace,:exclam,:caret,:plusminus
|
2937
3444
|
#save_offsets_in :symbol
|
2938
3445
|
|
2939
3446
|
end
|
2940
3447
|
|
3448
|
+
#defense against my class being redefined by a a certain other project,
|
3449
|
+
module Kernel
|
3450
|
+
eval %w[require load].map{|name| <<-END }.join
|
3451
|
+
#{name}__without_rubylexer_protection=instance_method :#{name}
|
3452
|
+
define_method(:#{name}) do |file|
|
3453
|
+
if /\\Aruby_(lexer|parser)(\\.rb)?\\z/i===File.basename(file)
|
3454
|
+
warn "Uh-oh, you're trying to use ruby_parser and rubylexer at the same time."
|
3455
|
+
warn "ruby_parser causes a namespace conflict with rubylexer"
|
3456
|
+
warn "because ruby_parser redefines the class RubyLexer"
|
3457
|
+
warn "in a way which is incompatible with standard RubyLexer."
|
3458
|
+
warn "The rubylexer gem owns the namespace ::RubyLexer,"
|
3459
|
+
warn "and claimed it at least 2 years before ruby_parser existed."
|
3460
|
+
warn "Attempt to redefine RubyLexer in an incompatible way disabled."
|
3461
|
+
else
|
3462
|
+
begin
|
3463
|
+
#{name}__without_rubylexer_protection.bind(self).call file
|
3464
|
+
rescue Exception=>e
|
3465
|
+
e.backtrace.delete_if{|x| /\\A\#{__FILE__}:\#{__LINE__-2}:/o===x }
|
3466
|
+
raise e
|
3467
|
+
end
|
3468
|
+
end
|
3469
|
+
end
|
3470
|
+
END
|
3471
|
+
end
|
3472
|
+
|
3473
|
+
eval %w[class\ Module module\ Kernel].map{|ctx| <<END }.join
|
3474
|
+
#{ctx}
|
3475
|
+
autoload__without_rubylexer_protection=instance_method :autoload
|
3476
|
+
define_method(:autoload) do |mod,file|
|
3477
|
+
if /\\Aruby_(lexer|parser)(\\.rb)?\\z/i===File.basename(file)
|
3478
|
+
warn "Uh-oh, you're trying to use ruby_parser and rubylexer at the same time."
|
3479
|
+
warn "ruby_parser causes a namespace conflict with rubylexer"
|
3480
|
+
warn "because ruby_parser redefines the class RubyLexer"
|
3481
|
+
warn "in a way which is incompatible with standard RubyLexer."
|
3482
|
+
warn "The rubylexer gem owns the namespace ::RubyLexer,"
|
3483
|
+
warn "and claimed it at least 2 years before ruby_parser existed."
|
3484
|
+
warn "Attempt to redefine RubyLexer in an incompatible way disabled."
|
3485
|
+
else
|
3486
|
+
autoload__without_rubylexer_protection.bind(self).call mod,file
|
3487
|
+
end
|
3488
|
+
end
|
3489
|
+
end
|
3490
|
+
END
|
3491
|
+
|