rubylexer 0.6.2 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +55 -0
- data/Manifest.txt +67 -0
- data/README.txt +103 -0
- data/Rakefile +24 -0
- data/howtouse.txt +9 -6
- data/{assert.rb → lib/assert.rb} +11 -11
- data/{rubylexer.rb → lib/rubylexer.rb} +645 -342
- data/lib/rubylexer/0.6.2.rb +39 -0
- data/lib/rubylexer/0.6.rb +5 -0
- data/lib/rubylexer/0.7.0.rb +2 -0
- data/{charhandler.rb → lib/rubylexer/charhandler.rb} +4 -2
- data/{charset.rb → lib/rubylexer/charset.rb} +4 -3
- data/{context.rb → lib/rubylexer/context.rb} +48 -18
- data/{rubycode.rb → lib/rubylexer/rubycode.rb} +5 -3
- data/{rulexer.rb → lib/rubylexer/rulexer.rb} +180 -102
- data/{symboltable.rb → lib/rubylexer/symboltable.rb} +10 -1
- data/{token.rb → lib/rubylexer/token.rb} +72 -20
- data/{tokenprinter.rb → lib/rubylexer/tokenprinter.rb} +39 -16
- data/lib/rubylexer/version.rb +3 -0
- data/{testcode → test/code}/deletewarns.rb +0 -0
- data/test/code/dl_all_gems.rb +43 -0
- data/{testcode → test/code}/dumptokens.rb +12 -9
- data/test/code/locatetest +30 -0
- data/test/code/locatetest.rb +49 -0
- data/test/code/rubylexervsruby.rb +173 -0
- data/{testcode → test/code}/tokentest.rb +62 -51
- data/{testcode → test/code}/torment +8 -8
- data/test/code/unpack_all_gems.rb +15 -0
- data/{testdata → test/data}/1.rb.broken +0 -0
- data/{testdata → test/data}/23.rb +0 -0
- data/test/data/__end__.rb +2 -0
- data/test/data/__end__2.rb +3 -0
- data/test/data/and.rb +5 -0
- data/test/data/blockassigntest.rb +23 -0
- data/test/data/chunky.plain.rb +75 -0
- data/test/data/chunky_bacon.rb +112 -0
- data/test/data/chunky_bacon2.rb +112 -0
- data/test/data/chunky_bacon3.rb +112 -0
- data/test/data/chunky_bacon4.rb +112 -0
- data/test/data/for.rb +45 -0
- data/test/data/format.rb +6 -0
- data/{testdata → test/data}/g.rb +0 -0
- data/test/data/gemlist.txt +280 -0
- data/test/data/heart.rb +7 -0
- data/test/data/if.rb +6 -0
- data/test/data/jarh.rb +369 -0
- data/test/data/lbrace.rb +4 -0
- data/test/data/lbrack.rb +4 -0
- data/{testdata → test/data}/newsyntax.rb +0 -0
- data/{testdata → test/data}/noeolatend.rb +0 -0
- data/test/data/p-op.rb +8 -0
- data/{testdata → test/data}/p.rb +671 -79
- data/{testdata → test/data}/pleac.rb.broken +0 -0
- data/{testdata → test/data}/pre.rb +0 -0
- data/{testdata → test/data}/pre.unix.rb +0 -0
- data/{testdata → test/data}/regtest.rb +0 -0
- data/test/data/rescue.rb +35 -0
- data/test/data/s.rb +186 -0
- data/test/data/strinc.rb +2 -0
- data/{testdata → test/data}/tokentest.assert.rb.can +0 -0
- data/test/data/untermed_here.rb.broken +2 -0
- data/test/data/untermed_string.rb.broken +1 -0
- data/{testdata → test/data}/untitled1.rb +0 -0
- data/{testdata → test/data}/w.rb +0 -0
- data/{testdata → test/data}/wsdlDriver.rb +0 -0
- data/testing.txt +6 -4
- metadata +163 -59
- data/README +0 -134
- data/Rantfile +0 -37
- data/io.each_til_charset.rb +0 -247
- data/require.rb +0 -103
- data/rlold.rb +0 -12
- data/testcode/locatetest +0 -12
- data/testcode/rubylexervsruby.rb +0 -104
- data/testcode/rubylexervsruby.sh +0 -51
- data/testresults/placeholder +0 -0
data/History.txt
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
=== 0.7.0/2-15-2008
|
2
|
+
* implicit tokens are now emitted at the right times (need more test code)
|
3
|
+
* local variables are now temporarily hidden by class, module, and def
|
4
|
+
* line numbers should always be correct now (=begin...=end caused this) (??)
|
5
|
+
* fixed meth/var-name confusion in idents after 'def' but before params
|
6
|
+
* FileAndLineToken now emitted on all new lines (0.8)
|
7
|
+
* offset of __END__ now works(??)
|
8
|
+
* put files into lib/
|
9
|
+
* improvements in parsing unary * and & (??)
|
10
|
+
* input can now come from a string
|
11
|
+
* local vars (defs and uses) are recognized in string inclusions
|
12
|
+
* explicitly decimal numbers, eg: 0d123456789
|
13
|
+
* eof in unusual cases is better handled
|
14
|
+
* __END__ is not treated as a keyword
|
15
|
+
* '|' as goalpost is now better handled
|
16
|
+
* a number of things have been renamed internally
|
17
|
+
* no more implicit parens for setter method calls
|
18
|
+
* '{' after return, break, and next is now the start of a hash.
|
19
|
+
* ambiguous cases of '::','(',':',and '?' are now better handled.
|
20
|
+
* more start/end hint tokens (for 'when', 'rescue', and 'for')
|
21
|
+
* bugfixes in rhs hint tokens
|
22
|
+
* parsing of def headers for singleton methods is improved
|
23
|
+
* rescue as operator is now handled
|
24
|
+
* block param list lookahead is simplified
|
25
|
+
* unary ops (including * and &) can be easily distinguished in output
|
26
|
+
* here document bodies better handled, esp after escaped newline
|
27
|
+
* symbols like %s{symbol} now actually work
|
28
|
+
* implicit parens around => better handled...
|
29
|
+
* different cases of '{' can now be easily distinguished
|
30
|
+
* ImplicitParamList Start and End are now Keyword, not Ignore tokens.
|
31
|
+
|
32
|
+
=== 0.6.2
|
33
|
+
* testcode/dumptokens.rb charhandler.rb doesn't work... but does after unix2dos (not reproducible)
|
34
|
+
* files are opened in binmode to avoid all possible eol translation
|
35
|
+
* (x.+?x) now works
|
36
|
+
* methname/varname mixups fixed in some cases
|
37
|
+
* performance improvements, in most important cases
|
38
|
+
* error handling tokens should be emitted on error input... ErrorToken mixin module
|
39
|
+
* but old error handling interface should be preserved and made available
|
40
|
+
* moved readahead and friends into IOext
|
41
|
+
* made optimized readahead et al for fakefile
|
42
|
+
* dos newlines (and newlines generally) can be fancy string delimiters
|
43
|
+
* do,if,until, etc, have a way to tell if an end is associated
|
44
|
+
* broke readme into pieces
|
45
|
+
|
46
|
+
=== 0.6.0
|
47
|
+
* partly fixed the implicit tokens at the wrong times. (or not at the
|
48
|
+
* right times) (partly fixed)
|
49
|
+
* : operator might be a synonym for 'then'
|
50
|
+
* variables other than the last are now recognized in multiple assignment
|
51
|
+
* variables created by for and rescue are now recognized
|
52
|
+
* token following :: should not be BareSymbolToken if begins with A-Z (unless obviously a func)
|
53
|
+
* read code to be lexed from a string. (irb wants this)
|
54
|
+
* fancy symbols weren't supported at all. (like this: %s{abcdefg})
|
55
|
+
|
data/Manifest.txt
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
COPYING
|
2
|
+
README.txt
|
3
|
+
Manifest.txt
|
4
|
+
Rakefile
|
5
|
+
howtouse.txt
|
6
|
+
History.txt
|
7
|
+
testing.txt
|
8
|
+
lib/rubylexer/rubycode.rb
|
9
|
+
lib/rubylexer/context.rb
|
10
|
+
lib/rubylexer/token.rb
|
11
|
+
lib/rubylexer/0.6.rb
|
12
|
+
lib/rubylexer/0.6.2.rb
|
13
|
+
lib/rubylexer/0.7.0.rb
|
14
|
+
lib/rubylexer/version.rb
|
15
|
+
lib/rubylexer/rulexer.rb
|
16
|
+
lib/rubylexer/tokenprinter.rb
|
17
|
+
lib/rubylexer/charset.rb
|
18
|
+
lib/rubylexer/symboltable.rb
|
19
|
+
lib/rubylexer/charhandler.rb
|
20
|
+
lib/assert.rb
|
21
|
+
lib/rubylexer.rb
|
22
|
+
test/data/gemlist.txt
|
23
|
+
test/data/blockassigntest.rb
|
24
|
+
test/data/for.rb
|
25
|
+
test/data/chunky_bacon.rb
|
26
|
+
test/data/and.rb
|
27
|
+
test/data/pre.unix.rb
|
28
|
+
test/data/untermed_string.rb.broken
|
29
|
+
test/data/__end__2.rb
|
30
|
+
test/data/w.rb
|
31
|
+
test/data/if.rb
|
32
|
+
test/data/pre.rb
|
33
|
+
test/data/jarh.rb
|
34
|
+
test/data/regtest.rb
|
35
|
+
test/data/chunky_bacon4.rb
|
36
|
+
test/data/__end__.rb
|
37
|
+
test/data/strinc.rb
|
38
|
+
test/data/lbrace.rb
|
39
|
+
test/data/p.rb
|
40
|
+
test/data/chunky.plain.rb
|
41
|
+
test/data/noeolatend.rb
|
42
|
+
test/data/g.rb
|
43
|
+
test/data/23.rb
|
44
|
+
test/data/lbrack.rb
|
45
|
+
test/data/untitled1.rb
|
46
|
+
test/data/rescue.rb
|
47
|
+
test/data/tokentest.assert.rb.can
|
48
|
+
test/data/pleac.rb.broken
|
49
|
+
test/data/heart.rb
|
50
|
+
test/data/s.rb
|
51
|
+
test/data/wsdlDriver.rb
|
52
|
+
test/data/p-op.rb
|
53
|
+
test/data/1.rb.broken
|
54
|
+
test/data/untermed_here.rb.broken
|
55
|
+
test/data/newsyntax.rb
|
56
|
+
test/data/chunky_bacon3.rb
|
57
|
+
test/data/chunky_bacon2.rb
|
58
|
+
test/data/format.rb
|
59
|
+
test/code/locatetest.rb
|
60
|
+
test/code/rubylexervsruby.rb
|
61
|
+
test/code/dl_all_gems.rb
|
62
|
+
test/code/unpack_all_gems.rb
|
63
|
+
test/code/tokentest.rb
|
64
|
+
test/code/dumptokens.rb
|
65
|
+
test/code/torment
|
66
|
+
test/code/locatetest
|
67
|
+
test/code/deletewarns.rb
|
data/README.txt
ADDED
@@ -0,0 +1,103 @@
|
|
1
|
+
= RubyLexer
|
2
|
+
|
3
|
+
*
|
4
|
+
*
|
5
|
+
*
|
6
|
+
|
7
|
+
=== DESCRIPTION:
|
8
|
+
|
9
|
+
RubyLexer is a lexer library for Ruby, written in Ruby. Rubylexer is meant
|
10
|
+
as a lexer for Ruby that's complete and correct; all legal Ruby
|
11
|
+
code should be lexed correctly by RubyLexer as well. Just enough parsing
|
12
|
+
capability is included to give RubyLexer enough context to tokenize correctly
|
13
|
+
in all cases. (This turned out to be more parsing than I had thought or
|
14
|
+
wanted to take on at first.) RubyLexer handles the hard things like
|
15
|
+
complicated strings, the ambiguous nature of some punctuation characters and
|
16
|
+
keywords in ruby, and distinguishing methods and local variables.
|
17
|
+
|
18
|
+
RubyLexer is not particularly clean code. As I progressed in writing this,
|
19
|
+
I've learned a little about how these things are supposed to be done; the
|
20
|
+
lexer is not supposed to have any state of it's own, instead it gets whatever
|
21
|
+
it needs to know from the parser. As a stand-alone lexer, Rubylexer maintains
|
22
|
+
quite a lot of state. Every instance variable in the RubyLexer class is some
|
23
|
+
sort of lexer state. Most of the complication and ugly code in RubyLexer is
|
24
|
+
in maintaining or using this state.
|
25
|
+
|
26
|
+
For information about using RubyLexer in your program, please see howtouse.txt.
|
27
|
+
|
28
|
+
For my notes on the testing of RubyLexer, see testing.txt.
|
29
|
+
|
30
|
+
If you have any questions, comments, problems, new feature requests, or just
|
31
|
+
want to figure out how to make it work for what you need to do, contact me:
|
32
|
+
rubylexer _at_ inforadical _dot_ net
|
33
|
+
|
34
|
+
RubyLexer is a RubyForge project. RubyForge is another good place to send your
|
35
|
+
bug reports or whatever: http://rubyforge.org/projects/rubylexer/
|
36
|
+
|
37
|
+
(There aren't any bug filed against RubyLexer there yet, but don't be afraid
|
38
|
+
that your report will get lonely.)
|
39
|
+
|
40
|
+
==SYNOPSIS:
|
41
|
+
require "rubylexer.rb"
|
42
|
+
#then later
|
43
|
+
lexer=RubyLexer.new(a_file_name, opened_File_or_String)
|
44
|
+
until EoiToken===(token=lexer.get1token)
|
45
|
+
#...do stuff w/ token...
|
46
|
+
end
|
47
|
+
|
48
|
+
== Status
|
49
|
+
RubyLexer can correctly lex all legal Ruby 1.8 code that I've been able to
|
50
|
+
find on my Debian system. It can also handle (most of) my catalog of nasty
|
51
|
+
test cases (in testdata/p.rb) (see below for known problems). At this point,
|
52
|
+
new bugs are almost exclusively found by my home-grown test code, rather
|
53
|
+
than ruby code gathered 'from the wild'. There are a number of issues I know
|
54
|
+
about and plan to fix, but it seems that Ruby coders don't write code complex
|
55
|
+
enough to trigger them very often. Although incomplete, RubyLexer can
|
56
|
+
correctly distinguish these ambiguous uses of the following operator and
|
57
|
+
keywords, depending on context:
|
58
|
+
% can be modulus operator or start of fancy string
|
59
|
+
/ can be division operator or start of regex
|
60
|
+
* & + - :: can be unary or binary operator
|
61
|
+
[] can be for array literal or [] method (or []=)
|
62
|
+
<< can be here document or left shift operator (or in class<<obj expr)
|
63
|
+
: can be start of symbol, substitute for then, or part of ternary op
|
64
|
+
(there are other uses too, but they're not supported yet.)
|
65
|
+
? can be start of character constant or ternary operator
|
66
|
+
` can be method name or start of exec string
|
67
|
+
any overrideable operator and most keywords can also be method names
|
68
|
+
|
69
|
+
== todo
|
70
|
+
test w/ more code (rubygems, rpa, obfuscated ruby contest, rubicon, others?)
|
71
|
+
these 5 should be my standard test suite: p.rb, (matz') test.rb, tk.rb, obfuscated ruby contest, rubicon
|
72
|
+
test more ways: cvt source to dos or mac fmt before testing
|
73
|
+
test more ways: run unit tests after passing thru rubylexer (0.7)
|
74
|
+
test more ways: test require'd, load'd, or eval'd code as well (0.7)
|
75
|
+
lex code a line (or chunk) at a time and save state for next line (irb wants this) (0.8)
|
76
|
+
incremental lexing (ides want this (for performance))
|
77
|
+
put everything in a namespace
|
78
|
+
integrate w/ other tools...
|
79
|
+
html colorized output?
|
80
|
+
move more state onto @parsestack (ongoing)
|
81
|
+
the new cases in p.rb now compile, but won't run
|
82
|
+
expand on test documentation
|
83
|
+
use want_op_name more
|
84
|
+
return result as a half-parsed tree (with parentheses and the like matched)
|
85
|
+
emit advisory tokens when see beginword, then (or equivalent), or end... what else does florian want?
|
86
|
+
strings are still slow
|
87
|
+
emit advisory tokens when local var defined/goes out of scope (or hidden/unhidden?)
|
88
|
+
token pruning in dumptokens...
|
89
|
+
|
90
|
+
== known issues: (and planned fix release)
|
91
|
+
context not really preserved when entering or leaving string inclusions. this causes
|
92
|
+
a number or problems. local variables are ok now, but here document headers started
|
93
|
+
in a string inclusion with the body outside will be a problem. (0.8)
|
94
|
+
string tokenization sometimes a little different from ruby around newlines
|
95
|
+
(htree/template.rb) (0.8)
|
96
|
+
string contents might not be correctly translated in a few cases (0.8?)
|
97
|
+
symbols which contain string interpolations are flattened into one token. eg :"foo#{bar}" (0.8)
|
98
|
+
'\r' whitespace sometimes seen in dos-formatted output.. shouldn't be (eg pre.rb) (0.7)
|
99
|
+
windows or mac newline in source are likely to cause problems in obscure cases (need test case)
|
100
|
+
unterminated =begin is not an error (0.8)
|
101
|
+
ruby 1.9 completely unsupported (0.9)
|
102
|
+
character sets other than ascii are not supported at all (1.0)
|
103
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
# Copyright (C) 2008 Caleb Clausen
|
2
|
+
# Distributed under the terms of Ruby's license.
|
3
|
+
require 'rubygems'
|
4
|
+
require 'hoe'
|
5
|
+
require 'lib/rubylexer/version.rb'
|
6
|
+
|
7
|
+
|
8
|
+
readme=open("README.txt")
|
9
|
+
readme.readline("\n=== DESCRIPTION:")
|
10
|
+
readme.readline("\n\n")
|
11
|
+
desc=readme.readline("\n\n")
|
12
|
+
|
13
|
+
hoe=Hoe.new("rubylexer", RubyLexer::VERSION) do |_|
|
14
|
+
_.author = "Caleb Clausen"
|
15
|
+
_.email = "rubylexer-owner @at@ inforadical .dot. net"
|
16
|
+
_.url = "http://rubylexer.rubyforge.org/"
|
17
|
+
_.extra_deps = ["sequence"]
|
18
|
+
_.test_globs=["test/{code/*,data/*rb*,results/}"]
|
19
|
+
_.description=desc
|
20
|
+
_.summary=desc[/\A[^.]+\./]
|
21
|
+
_.spec_extras={:bindir=>''}
|
22
|
+
end
|
23
|
+
|
24
|
+
|
data/howtouse.txt
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
|
2
2
|
Using rubylexer:
|
3
3
|
require "rubylexer.rb"
|
4
|
-
|
4
|
+
#then later
|
5
5
|
lexer=RubyLexer.new(a_file_name, opened_File_or_String)
|
6
|
-
until EoiToken===(
|
7
|
-
|
6
|
+
until EoiToken===(token=lexer.get1token)
|
7
|
+
#...do stuff w/ token...
|
8
8
|
end
|
9
9
|
|
10
|
-
For a slightly expanded version of this example, see
|
10
|
+
For a slightly expanded version of this example, see test/code/dumptokens.rb.
|
11
11
|
|
12
12
|
tok will be a subclass of Token. there are many token classes (see token.rb)
|
13
13
|
however, all tokens have some common methods:
|
@@ -23,7 +23,8 @@ WToken #(mostly useless?) abstract superclass for KeywordToken,
|
|
23
23
|
#OperatorToken, VarNameToken, and HerePlaceholderToken
|
24
24
|
#but not (confusingly) MethNameToken (perhaps that'll change)
|
25
25
|
KeywordToken #a ruby keyword or non-overridable punctuation char(s)
|
26
|
-
OperatorToken #overrideable operators
|
26
|
+
OperatorToken #overrideable operators.
|
27
|
+
#use #unary? and #binary? to find out how many arguments it takes.
|
27
28
|
VarNameToken #a name that represents a variable
|
28
29
|
HerePlaceholderToken #represents the header of a here string. subclass of WToken
|
29
30
|
MethNameToken #the name of a method: the uncoloned
|
@@ -120,7 +121,8 @@ time to adapt to changes. That promise goes for all the changes described below.
|
|
120
121
|
|
121
122
|
In cases where the 2 are incompatible, (inspired by rubygems) I've come up with this:
|
122
123
|
|
123
|
-
|
124
|
+
require 'rubylexer/0.6'
|
125
|
+
rl=RubyLexer.new(...args...) #request the 0.6 api
|
124
126
|
|
125
127
|
This actually works currently; it enables the old api where errors cause an exception instead
|
126
128
|
of generating ErrorTokens. The default will always be to use the new api.
|
@@ -133,4 +135,5 @@ be a big deal; old clients can just include the namespace module.
|
|
133
135
|
Token#ident may be taken away or change without notice.
|
134
136
|
MethNameToken may become a WToken
|
135
137
|
HereBodyToken should really be a string subclass...
|
138
|
+
Newline,EscNl,BareSymbolToken may get renamed
|
136
139
|
|
data/{assert.rb → lib/assert.rb}
RENAMED
@@ -1,5 +1,4 @@
|
|
1
1
|
=begin copyright
|
2
|
-
rubylexer - a ruby lexer written in ruby
|
3
2
|
Copyright (C) 2004,2005 Caleb Clausen
|
4
3
|
|
5
4
|
This library is free software; you can redistribute it and/or
|
@@ -16,16 +15,17 @@
|
|
16
15
|
License along with this library; if not, write to the Free Software
|
17
16
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
18
17
|
=end
|
19
|
-
require 'set'
|
20
18
|
|
19
|
+
module Kernel
|
20
|
+
def assert(expr,msg="assertion failed")
|
21
|
+
defined? $Debug and $Debug and (expr or raise msg)
|
22
|
+
end
|
21
23
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
$DEBUG and STDERR.print "FIXME: #{s}\n"
|
30
|
-
@@printed.add s
|
24
|
+
@@printed={}
|
25
|
+
def fixme(s)
|
26
|
+
unless @@printed[s]
|
27
|
+
@@printed[s]=1
|
28
|
+
defined? $Debug and $Debug and $stderr.print "FIXME: #{s}\n"
|
29
|
+
end
|
30
|
+
end
|
31
31
|
end
|
@@ -19,15 +19,18 @@
|
|
19
19
|
|
20
20
|
|
21
21
|
|
22
|
-
require
|
23
|
-
require
|
24
|
-
require
|
25
|
-
require
|
26
|
-
|
22
|
+
require 'rubylexer/rulexer' #must be 1st!!!
|
23
|
+
require 'rubylexer/version'
|
24
|
+
require 'rubylexer/token'
|
25
|
+
require 'rubylexer/charhandler'
|
26
|
+
require 'rubylexer/symboltable'
|
27
|
+
#require "io.each_til_charset"
|
28
|
+
require 'rubylexer/context'
|
29
|
+
require 'rubylexer/tokenprinter'
|
27
30
|
|
28
31
|
|
29
32
|
#-----------------------------------
|
30
|
-
class RubyLexer
|
33
|
+
class RubyLexer
|
31
34
|
include NestedContexts
|
32
35
|
|
33
36
|
RUBYSYMOPERATORREX=
|
@@ -39,7 +42,7 @@ class RubyLexer < RuLexer
|
|
39
42
|
#or .. ... ?:
|
40
43
|
#for that use:
|
41
44
|
RUBYNONSYMOPERATORREX=
|
42
|
-
%r{^([
|
45
|
+
%r{^([%^~/\-+|&]=|(\|\||&&)=?|(<<|>>|\*\*?)=|\.{1,3}|[?:,;]|=>?|![=~]?)$}
|
43
46
|
RUBYOPERATORREX=/#{RUBYSYMOPERATORREX}|#{RUBYNONSYMOPERATORREX}/o
|
44
47
|
UNSYMOPS=/^[~!]$/ #always unary
|
45
48
|
UBSYMOPS=/^([*&+-]|::)$/ #ops that could be unary or binary
|
@@ -50,16 +53,18 @@ class RubyLexer < RuLexer
|
|
50
53
|
VARLIKE_KEYWORDS=/^(__FILE__|__LINE__|false|nil|self|true)$/
|
51
54
|
INNERBOUNDINGWORDS="(else|elsif|ensure|in|then|rescue|when)"
|
52
55
|
BINOPWORDS="(and|or)"
|
53
|
-
NEVERSTARTPARAMLISTWORDS=/^(#{OPORBEGINWORDS}|#{INNERBOUNDINGWORDS}|#{BINOPWORDS}|end)[^a-zA-Z0-9_!?=]
|
54
|
-
NEVERSTARTPARAMLISTFIRST=CharSet[
|
55
|
-
NEVERSTARTPARAMLISTMAXLEN=7
|
56
|
+
NEVERSTARTPARAMLISTWORDS=/^(#{OPORBEGINWORDS}|#{INNERBOUNDINGWORDS}|#{BINOPWORDS}|end)([^a-zA-Z0-9_!?=]|\Z)/o
|
57
|
+
NEVERSTARTPARAMLISTFIRST=CharSet['aoeitrwu'] #chars that begin NEVERSTARTPARAMLIST
|
58
|
+
NEVERSTARTPARAMLISTMAXLEN=7 #max len of a NEVERSTARTPARAMLIST
|
56
59
|
|
57
60
|
RUBYKEYWORDS=%r{
|
58
|
-
^(alias|#{BINOPWORDS}|not|undef|
|
61
|
+
^(alias|#{BINOPWORDS}|not|undef|end|
|
59
62
|
#{VARLIKE_KEYWORDS}|#{FUNCLIKE_KEYWORDS}|
|
60
63
|
#{INNERBOUNDINGWORDS}|#{BEGINWORDS}
|
61
64
|
)$
|
62
65
|
}xo
|
66
|
+
#__END__ should not be in this set... its handled in start_of_line_directives
|
67
|
+
|
63
68
|
CHARMAPPINGS = {
|
64
69
|
?$ => :dollar_identifier,
|
65
70
|
?@ => :at_identifier,
|
@@ -67,8 +72,7 @@ class RubyLexer < RuLexer
|
|
67
72
|
?A..?Z => :identifier,
|
68
73
|
?_ => :identifier,
|
69
74
|
?0..?9 => :number,
|
70
|
-
|
71
|
-
?' => :single_quote,
|
75
|
+
%{"'} => :double_quote,
|
72
76
|
?` => :back_quote,
|
73
77
|
|
74
78
|
WHSP => :whitespace, #includes \r
|
@@ -83,7 +87,8 @@ class RubyLexer < RuLexer
|
|
83
87
|
|
84
88
|
#these ones could signal either an op or a term
|
85
89
|
?/ => :regex_or_div,
|
86
|
-
"
|
90
|
+
"|" => :conjunction_or_goalpost,
|
91
|
+
">" => :quadriop,
|
87
92
|
"*&" => :star_or_amp, #could be unary
|
88
93
|
"+-" => :plusminus, #could be unary
|
89
94
|
?< => :lessthan,
|
@@ -103,22 +108,27 @@ class RubyLexer < RuLexer
|
|
103
108
|
?# => :comment
|
104
109
|
}
|
105
110
|
|
106
|
-
|
111
|
+
attr_reader :incomplete_here_tokens, :parsestack
|
107
112
|
|
108
113
|
|
109
114
|
#-----------------------------------
|
110
115
|
def initialize(filename,file,linenum=1)
|
111
116
|
super(filename,file, linenum)
|
112
117
|
@start_linenum=linenum
|
113
|
-
@
|
118
|
+
@parsestack=[TopLevelContext.new]
|
114
119
|
@incomplete_here_tokens=[]
|
115
|
-
@
|
120
|
+
@localvars_stack=[SymbolTable.new]
|
116
121
|
@defining_lvar=nil
|
122
|
+
@in_def_name=false
|
117
123
|
|
118
124
|
@toptable=CharHandler.new(self, :illegal_char, CHARMAPPINGS)
|
119
125
|
|
120
126
|
start_of_line_directives
|
121
127
|
end
|
128
|
+
|
129
|
+
def localvars;
|
130
|
+
@localvars_stack.last
|
131
|
+
end
|
122
132
|
|
123
133
|
#-----------------------------------
|
124
134
|
def get1token
|
@@ -129,25 +139,23 @@ class RubyLexer < RuLexer
|
|
129
139
|
|
130
140
|
#check for bizarre token types
|
131
141
|
case result
|
132
|
-
when
|
133
|
-
|
142
|
+
when StillIgnoreToken#,nil
|
143
|
+
result
|
134
144
|
when Token#,String
|
145
|
+
@last_operative_token=result
|
146
|
+
assert !(IgnoreToken===@last_operative_token)
|
147
|
+
result
|
135
148
|
else
|
136
|
-
|
149
|
+
raise "#{@filename}:#{linenum}:token is a #{result.class}, last is #{@last_operative_token}"
|
137
150
|
end
|
138
|
-
|
139
|
-
@last_operative_token=result
|
140
|
-
|
141
|
-
return result
|
142
151
|
end
|
143
|
-
|
144
152
|
|
145
153
|
|
146
154
|
#-----------------------------------
|
147
155
|
def balanced_braces?
|
148
156
|
|
149
|
-
#@
|
150
|
-
@
|
157
|
+
#@parsestack.empty?
|
158
|
+
@parsestack.size==1 and TopLevelContext===@parsestack.first
|
151
159
|
end
|
152
160
|
|
153
161
|
#-----------------------------------
|
@@ -182,7 +190,7 @@ private
|
|
182
190
|
|
183
191
|
#-----------------------------------
|
184
192
|
def expect_do_or_end_or_nl!(st)
|
185
|
-
@
|
193
|
+
@parsestack.push ExpectDoOrNlContext.new(st,/(do|;|:|\n)/,@linenum)
|
186
194
|
end
|
187
195
|
|
188
196
|
#-----------------------------------
|
@@ -199,31 +207,38 @@ private
|
|
199
207
|
end
|
200
208
|
|
201
209
|
#-----------------------------------
|
202
|
-
WSCHARSET
|
203
|
-
def ignored_tokens(allow_eof=false)
|
210
|
+
WSCHARSET=/[#\\\n\s\t\v\r\f]/
|
211
|
+
def ignored_tokens(allow_eof=false,allow_eol=true)
|
204
212
|
result=[]
|
205
|
-
result<<@moretokens.shift while
|
213
|
+
result<<@moretokens.shift while StillIgnoreToken===@moretokens.first
|
206
214
|
@moretokens.empty? or return result
|
207
|
-
if true
|
208
215
|
loop do
|
209
216
|
unless @moretokens.empty?
|
210
|
-
|
211
|
-
|
217
|
+
case @moretokens.first
|
218
|
+
when StillIgnoreToken
|
219
|
+
when NewlineToken: allow_eol or break
|
220
|
+
else break
|
221
|
+
end
|
212
222
|
else
|
213
|
-
|
223
|
+
|
224
|
+
break unless ch=nextchar
|
225
|
+
ch=ch.chr
|
226
|
+
break unless WSCHARSET===ch
|
227
|
+
break if ch[/[\r\n]/] and !allow_eol
|
214
228
|
end
|
229
|
+
|
215
230
|
|
216
231
|
tok=get1token
|
217
|
-
result<<tok
|
232
|
+
result << tok
|
218
233
|
case tok
|
219
|
-
when NewlineToken
|
220
|
-
when EoiToken
|
221
|
-
when
|
222
|
-
else raise "impossible"
|
234
|
+
when NewlineToken; assert allow_eol; block_given? and yield tok
|
235
|
+
when EoiToken; allow_eof or lexerror tok,"end of file not expected here(2)"
|
236
|
+
when StillIgnoreToken
|
237
|
+
else raise "impossible token: #{tok.inspect}"
|
223
238
|
end
|
224
239
|
end
|
225
240
|
|
226
|
-
|
241
|
+
=begin
|
227
242
|
@whsphandler||=CharHandler.new(self, :==,
|
228
243
|
"#" => :comment,
|
229
244
|
"\n" => :newline,
|
@@ -235,18 +250,18 @@ else
|
|
235
250
|
block_given? and NewlineToken===tok and yield tok
|
236
251
|
result << tok
|
237
252
|
end
|
238
|
-
end
|
253
|
+
=end
|
239
254
|
return result
|
240
255
|
end
|
241
256
|
|
242
257
|
#-----------------------------------
|
243
258
|
def safe_recurse
|
244
259
|
old_moretokens=@moretokens
|
245
|
-
#
|
260
|
+
#old_parsestack=@parsestack.dup
|
246
261
|
@moretokens=[]
|
247
262
|
result= yield @moretokens
|
248
263
|
#assert @incomplete_here_tokens.empty?
|
249
|
-
#assert @
|
264
|
+
#assert @parsestack==old_parsestack
|
250
265
|
@moretokens= old_moretokens.concat @moretokens
|
251
266
|
return result
|
252
267
|
#need to do something with @last_operative_token?
|
@@ -258,7 +273,7 @@ end
|
|
258
273
|
result = ((
|
259
274
|
#order matters here, but it shouldn't
|
260
275
|
#(but til_charset must be last)
|
261
|
-
eat_next_if(
|
276
|
+
eat_next_if(/[!@&+`'=~\/\\,.;<>*"$?:]/) or
|
262
277
|
(eat_next_if('-') and ("-"+getchar)) or
|
263
278
|
(?0..?9)===nextchar ? til_charset(/[^\d]/) : nil
|
264
279
|
))
|
@@ -266,7 +281,7 @@ end
|
|
266
281
|
|
267
282
|
#-----------------------------------
|
268
283
|
def identifier(context=nil)
|
269
|
-
oldpos
|
284
|
+
oldpos= input_position
|
270
285
|
str=identifier_as_string(context)
|
271
286
|
|
272
287
|
#skip keyword processing if 'escaped' as it were, by def, . or ::
|
@@ -279,8 +294,8 @@ end
|
|
279
294
|
@moretokens.unshift(*parse_keywords(str,oldpos) do
|
280
295
|
#if not a keyword,
|
281
296
|
case str
|
282
|
-
when FUNCLIKE_KEYWORDS
|
283
|
-
when VARLIKE_KEYWORDS,RUBYKEYWORDS
|
297
|
+
when FUNCLIKE_KEYWORDS; #do nothing
|
298
|
+
when VARLIKE_KEYWORDS,RUBYKEYWORDS; raise "shouldnt see keywords here, now"
|
284
299
|
end
|
285
300
|
safe_recurse { |a| var_or_meth_name(str,@last_operative_token,oldpos) }
|
286
301
|
end)
|
@@ -290,7 +305,7 @@ end
|
|
290
305
|
#-----------------------------------
|
291
306
|
def identifier_as_string(context)
|
292
307
|
#must begin w/ letter or underscore
|
293
|
-
str=eat_next_if(
|
308
|
+
str=eat_next_if(/[_a-z]/i) or return nil
|
294
309
|
|
295
310
|
#equals, question mark, and exclamation mark
|
296
311
|
#might be allowed at the end in some contexts.
|
@@ -305,18 +320,20 @@ end
|
|
305
320
|
when ?: then [?=, ??, ?!]
|
306
321
|
else [nil,??, ?!]
|
307
322
|
end
|
323
|
+
|
324
|
+
@in_def_name and maybe_eq= ?=
|
308
325
|
|
309
326
|
str<<til_charset(/[^a-z0-9_]/i)
|
310
327
|
|
311
328
|
#look for ?, !, or =, if allowed
|
312
|
-
case b
|
329
|
+
case b=getc
|
313
330
|
when nil #means we're at eof
|
314
331
|
#handling nil here prevents b from ever matching
|
315
332
|
#a nil value of maybe_qm, maybe_ex or maybe_eq
|
316
333
|
when maybe_qm
|
317
334
|
str << b
|
318
335
|
when maybe_ex
|
319
|
-
nc=(nextchar unless
|
336
|
+
nc=(nextchar unless eof?)
|
320
337
|
#does ex appear to be part of a larger operator?
|
321
338
|
if nc==?= #or nc==?~
|
322
339
|
back1char
|
@@ -324,7 +341,7 @@ end
|
|
324
341
|
str << b
|
325
342
|
end
|
326
343
|
when maybe_eq
|
327
|
-
nc=(nextchar unless
|
344
|
+
nc=(nextchar unless eof?)
|
328
345
|
#does eq appear to be part of a larger operator?
|
329
346
|
if nc==?= or nc==?~ or nc==?>
|
330
347
|
back1char
|
@@ -342,34 +359,37 @@ end
|
|
342
359
|
#-----------------------------------
|
343
360
|
#contexts in which comma may appear in ruby:
|
344
361
|
#multiple lhs (terminated by assign op)
|
345
|
-
#multiple rhs (in implicit context)
|
362
|
+
#multiple rhs (in implicit context)
|
346
363
|
#method actual param list (in ( or implicit context)
|
347
364
|
#method formal param list (in ( or implicit context)
|
348
|
-
#block formal param list (in | context)
|
365
|
+
#block formal param list (in | context)
|
366
|
+
#nested multiple rhs
|
367
|
+
#nested multiple lhs
|
368
|
+
#nested block formal list
|
369
|
+
#element reference/assignment (in [] or []= method actual parameter context)
|
349
370
|
#hash immediate (in imm{ context)
|
350
371
|
#array immediate (in imm[ context)
|
351
|
-
#
|
352
|
-
#list after for
|
372
|
+
#list between 'for' and 'in'
|
353
373
|
#list after rescue
|
354
374
|
#list after when
|
355
375
|
#list after undef
|
356
376
|
|
357
|
-
#note: comma in parens not around a param list is illegal
|
377
|
+
#note: comma in parens not around a param list or lhs or rhs is illegal
|
358
378
|
|
359
379
|
#-----------------------------------
|
360
380
|
#a comma has been seen. are we in an
|
361
381
|
#lvalue list or some other construct that uses commas?
|
362
382
|
def comma_in_lvalue_list?
|
363
|
-
not ListContext===@
|
383
|
+
@parsestack.last.lhs= (not ListContext===@parsestack.last)
|
364
384
|
end
|
365
385
|
|
366
386
|
#-----------------------------------
|
367
387
|
def in_lvar_define_state
|
368
388
|
#@defining_lvar is a hack
|
369
|
-
@defining_lvar or case ctx=@
|
370
|
-
when ForSMContext
|
371
|
-
when RescueSMContext
|
372
|
-
when
|
389
|
+
@defining_lvar or case ctx=@parsestack.last
|
390
|
+
when ForSMContext; ctx.state==:for
|
391
|
+
when RescueSMContext; ctx.state==:arrow
|
392
|
+
#when BlockParamListLhsContext; true
|
373
393
|
end
|
374
394
|
end
|
375
395
|
|
@@ -391,66 +411,102 @@ end
|
|
391
411
|
#look for and ignore local variable names
|
392
412
|
|
393
413
|
assert String===name
|
394
|
-
|
395
|
-
#fixme: keywords shouldn't be treated specially after :: and .
|
396
414
|
|
397
415
|
#maybe_local really means 'maybe local or constant'
|
398
416
|
maybe_local=case name
|
399
|
-
when /[^a-z_0-9]$/i
|
400
|
-
when /^[a-z_]
|
401
|
-
when /^[A-Z]
|
417
|
+
when /[^a-z_0-9]$/i; #do nothing
|
418
|
+
when /^[a-z_]/; (localvars===name or VARLIKE_KEYWORDS===name or in_lvar_define_state) and not lasttok===/^(\.|::)$/
|
419
|
+
when /^[A-Z]/; is_const=true;not lasttok==='.' #this is the right algorithm for constants...
|
402
420
|
end
|
403
421
|
|
404
422
|
assert(@moretokens.empty?)
|
423
|
+
|
424
|
+
oldlast=@last_operative_token
|
405
425
|
|
406
426
|
tok=@last_operative_token=VarNameToken.new(name,pos)
|
407
427
|
|
408
|
-
oldpos
|
428
|
+
oldpos= input_position
|
409
429
|
sawnl=false
|
410
430
|
result=ws_toks=ignored_tokens(true) {|nl| sawnl=true }
|
411
|
-
sawnl ||
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
431
|
+
if sawnl || eof?
|
432
|
+
if maybe_local then
|
433
|
+
if in_lvar_define_state
|
434
|
+
if /^[a-z_][a-zA-Z_0-9]*$/===name
|
435
|
+
assert !(lasttok===/^(\.|::)$/)
|
436
|
+
localvars[name]=true
|
437
|
+
else
|
438
|
+
lexerror tok,"not a valid variable name: #{name}"
|
439
|
+
end
|
440
|
+
return result.unshift(tok)
|
441
|
+
end
|
442
|
+
return result.unshift(tok) #if is_const
|
443
|
+
else
|
444
|
+
return result.unshift(
|
445
|
+
MethNameToken.new(name,pos), #insert implicit parens right after tok
|
446
|
+
ImplicitParamListStartToken.new( oldpos),
|
447
|
+
ImplicitParamListEndToken.new( oldpos)
|
448
|
+
)
|
416
449
|
end
|
417
|
-
|
450
|
+
end
|
418
451
|
|
419
452
|
#if next op is assignment (or comma in lvalue list)
|
420
453
|
#then omit implicit parens
|
421
454
|
assignment_coming=case nc=nextchar
|
422
|
-
when
|
423
|
-
when
|
424
|
-
when
|
425
|
-
when
|
426
|
-
when
|
455
|
+
when ?=; not /^=[>=~]$/===readahead(2)
|
456
|
+
when ?,; comma_in_lvalue_list?
|
457
|
+
when ?); last_context_not_implicit.lhs
|
458
|
+
when ?>,?<; /^(.)\1=$/===readahead(3)
|
459
|
+
when ?*,?&; /^(.)\1?=/===readahead(3)
|
460
|
+
when ?|; /^\|\|?=/===readahead(3) or
|
461
|
+
#is it a goalpost?
|
462
|
+
BlockParamListLhsContext===last_context_not_implicit &&
|
463
|
+
readahead(2)[1] != ?|
|
464
|
+
when ?%,?/,?-,?+,?^; readahead(2)[1]== ?=
|
427
465
|
end
|
428
|
-
if (assignment_coming or in_lvar_define_state)
|
466
|
+
if (assignment_coming && !(lasttok===/^(\.|::)$/) or in_lvar_define_state)
|
429
467
|
tok=VarNameToken.new(name,pos)
|
430
468
|
if /[^a-z_0-9]$/i===name
|
431
469
|
lexerror tok,"not a valid variable name: #{name}"
|
432
470
|
elsif /^[a-z_]/===name and !(lasttok===/^(\.|::)$/)
|
433
|
-
|
471
|
+
localvars[name]=true
|
434
472
|
end
|
435
473
|
return result.unshift(tok)
|
436
474
|
end
|
437
|
-
|
438
|
-
implicit_parens_to_emit=
|
439
|
-
|
475
|
+
|
476
|
+
implicit_parens_to_emit=
|
477
|
+
if assignment_coming
|
478
|
+
@parsestack.push AssignmentContext.new(nil) if nc==?% or nc==?/
|
479
|
+
0
|
480
|
+
else
|
481
|
+
case nc
|
482
|
+
when nil: 2
|
483
|
+
when ?!; readahead(2)=='!=' ? 2 : 1
|
440
484
|
when NEVERSTARTPARAMLISTFIRST
|
441
485
|
(NEVERSTARTPARAMLISTWORDS===readahead(NEVERSTARTPARAMLISTMAXLEN)) ? 2 : 1
|
442
|
-
when ?",?',?`,?a..?z,?A..?Z,?0..?9,?_
|
443
|
-
when ?{
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
when
|
450
|
-
|
486
|
+
when ?",?',?`,?a..?z,?A..?Z,?0..?9,?_,?@,?$,?~; 1
|
487
|
+
when ?{
|
488
|
+
maybe_local=false
|
489
|
+
x=2
|
490
|
+
x-=1 if /\A(return|break|next)\Z/===name and
|
491
|
+
!(KeywordToken===oldlast and oldlast===/\A(\.|::)\Z/)
|
492
|
+
x
|
493
|
+
when ?(;
|
494
|
+
maybe_local=false; !(ws_toks.empty? or lasttok===/^(\.|::)$/)? 1 : 0
|
495
|
+
when ?},?],?),?;,?^, ?|, ?>, ?,, ?., ?=; 2
|
496
|
+
when ?+, ?-, ?*, ?&, ?%, ?/; (ws_toks.empty? || readahead(2)[/^.[#{WHSPLF}]/]) ? 2 : 3
|
497
|
+
when ?:,??; next2=readahead(2);
|
498
|
+
WHSPLF[next2[1].chr] || next2=='::' ? 2 : 3
|
499
|
+
# when ?:,??; (readahead(2)[/^.[#{WHSPLF}]/]) ? 2 : 3
|
500
|
+
when ?<; (ws_toks.empty? || readahead(3)[/^<<["'`a-zA-Z_0-9-]/]) ? 3 : 2
|
501
|
+
when ?[; ws_toks.empty? ? 2 : 3
|
502
|
+
when ?\\, ?\s, ?\t, ?\n, ?\r, ?\v, ?#; raise 'failure'
|
503
|
+
else raise "unknown char after ident: #{nc=nextchar ? nc.chr : "<<EOF>>"}"
|
504
|
+
end
|
505
|
+
end
|
506
|
+
|
507
|
+
if is_const and implicit_parens_to_emit==3 then
|
508
|
+
implicit_parens_to_emit=1
|
451
509
|
end
|
452
|
-
|
453
|
-
implicit_parens_to_emit==3 and is_const and implicit_parens_to_emit=1
|
454
510
|
|
455
511
|
tok=if maybe_local and implicit_parens_to_emit>=2
|
456
512
|
implicit_parens_to_emit=0
|
@@ -459,15 +515,18 @@ end
|
|
459
515
|
MethNameToken
|
460
516
|
end.new(name,pos)
|
461
517
|
|
462
|
-
|
463
518
|
case implicit_parens_to_emit
|
464
|
-
when 2
|
519
|
+
when 2;
|
465
520
|
result.unshift ImplicitParamListStartToken.new(oldpos),
|
466
521
|
ImplicitParamListEndToken.new(oldpos)
|
467
|
-
when 1,3
|
468
|
-
|
469
|
-
|
470
|
-
|
522
|
+
when 1,3;
|
523
|
+
arr,pass=*param_list_coming_with_2_or_more_params?
|
524
|
+
result.push( *arr )
|
525
|
+
unless pass
|
526
|
+
result.unshift ImplicitParamListStartToken.new(oldpos)
|
527
|
+
@parsestack.push ParamListContextNoParen.new(@linenum)
|
528
|
+
end
|
529
|
+
when 0; #do nothing
|
471
530
|
else raise 'invalid value of implicit_parens_to_emit'
|
472
531
|
end
|
473
532
|
return result.unshift(tok)
|
@@ -476,22 +535,43 @@ end
|
|
476
535
|
# '\n (unescaped) and or'
|
477
536
|
# 'then else elsif rescue ensure (illegal in value context)'
|
478
537
|
|
479
|
-
# 'need to pop noparen from
|
538
|
+
# 'need to pop noparen from parsestack on these tokens: (in operator context)'
|
480
539
|
# 'not ok:'
|
481
540
|
# 'not (but should it be?)'
|
482
541
|
end
|
483
542
|
|
543
|
+
#-----------------------------------
|
544
|
+
def param_list_coming_with_2_or_more_params?
|
545
|
+
WHSPCHARS[prevchar] && (?(==nextchar) or return [[],false]
|
546
|
+
basesize=@parsestack.size
|
547
|
+
result=[get1token]
|
548
|
+
pass=loop{
|
549
|
+
tok=get1token
|
550
|
+
result<<tok
|
551
|
+
if @parsestack.size==basesize
|
552
|
+
break false
|
553
|
+
elsif ','==tok.to_s and @parsestack.size==basesize+1
|
554
|
+
break true
|
555
|
+
elsif EoiToken===tok
|
556
|
+
lexerror tok, "unexpected eof in parameter list"
|
557
|
+
end
|
558
|
+
}
|
559
|
+
return [result,pass]
|
560
|
+
end
|
561
|
+
|
484
562
|
#-----------------------------------
|
485
563
|
CONTEXT2ENDTOK={AssignmentRhsContext=>AssignmentRhsListEndToken,
|
486
564
|
ParamListContextNoParen=>ImplicitParamListEndToken,
|
487
|
-
|
565
|
+
WhenParamListContext=>KwParamListEndToken,
|
566
|
+
RescueSMContext=>KwParamListEndToken
|
488
567
|
}
|
489
568
|
def abort_noparens!(str='')
|
490
569
|
#assert @moretokens.empty?
|
491
570
|
result=[]
|
492
|
-
while klass=CONTEXT2ENDTOK[@
|
493
|
-
result << klass.new(
|
494
|
-
|
571
|
+
while klass=CONTEXT2ENDTOK[@parsestack.last.class]
|
572
|
+
result << klass.new(input_position-str.length)
|
573
|
+
break if RescueSMContext===@parsestack.last
|
574
|
+
@parsestack.pop
|
495
575
|
end
|
496
576
|
return result
|
497
577
|
end
|
@@ -501,13 +581,13 @@ if false #no longer used
|
|
501
581
|
def abort_1_noparen!(offs=0)
|
502
582
|
assert @moretokens.empty?
|
503
583
|
result=[]
|
504
|
-
while AssignmentRhsContext===@
|
505
|
-
@
|
506
|
-
result << AssignmentRhsListEndToken.new(
|
584
|
+
while AssignmentRhsContext===@parsestack.last
|
585
|
+
@parsestack.pop
|
586
|
+
result << AssignmentRhsListEndToken.new(input_position-offs)
|
507
587
|
end
|
508
|
-
ParamListContextNoParen===@
|
509
|
-
@
|
510
|
-
result << ImplicitParamListEndToken.new(
|
588
|
+
ParamListContextNoParen===@parsestack.last or lexerror huh,'{} with no matching callsite'
|
589
|
+
@parsestack.pop
|
590
|
+
result << ImplicitParamListEndToken.new(input_position-offs)
|
511
591
|
return result
|
512
592
|
end
|
513
593
|
end
|
@@ -523,30 +603,31 @@ end
|
|
523
603
|
case str
|
524
604
|
when "end"
|
525
605
|
result.unshift(*abort_noparens!(str))
|
526
|
-
@
|
606
|
+
@parsestack.last.see self,:semi #sorta hacky... should make an :end event instead?
|
527
607
|
|
528
608
|
=begin not needed?
|
529
|
-
if ExpectDoOrNlContext===@
|
530
|
-
@
|
531
|
-
assert @
|
609
|
+
if ExpectDoOrNlContext===@parsestack.last
|
610
|
+
@parsestack.pop
|
611
|
+
assert @parsestack.last.starter[/^(while|until|for)$/]
|
532
612
|
end
|
533
613
|
=end
|
534
614
|
|
535
|
-
WantsEndContext===@
|
536
|
-
ctx=@
|
615
|
+
WantsEndContext===@parsestack.last or lexerror result.last, 'unbalanced end'
|
616
|
+
ctx=@parsestack.pop
|
537
617
|
start,line=ctx.starter,ctx.linenum
|
538
618
|
BEGINWORDS===start or lexerror result.last, "end does not match #{start or "nil"}"
|
539
|
-
/^(
|
619
|
+
/^(do)$/===start and localvars.end_block
|
620
|
+
/^(class|module|def)$/===start and @localvars_stack.pop
|
540
621
|
|
541
622
|
when "class","module"
|
542
623
|
result.first.has_end!
|
543
|
-
@
|
544
|
-
@
|
545
|
-
|
624
|
+
@parsestack.push WantsEndContext.new(str,@linenum)
|
625
|
+
@localvars_stack.push SymbolTable.new
|
626
|
+
|
546
627
|
when "if","unless" #could be infix form without end
|
547
628
|
if after_nonid_op?{false} #prefix form
|
548
629
|
result.first.has_end!
|
549
|
-
@
|
630
|
+
@parsestack.push WantsEndContext.new(str,@linenum)
|
550
631
|
|
551
632
|
|
552
633
|
else #infix form
|
@@ -554,11 +635,11 @@ end
|
|
554
635
|
end
|
555
636
|
when "begin","case"
|
556
637
|
result.first.has_end!
|
557
|
-
@
|
638
|
+
@parsestack.push WantsEndContext.new(str,@linenum)
|
558
639
|
when "while","until" #could be infix form without end
|
559
640
|
if after_nonid_op?{false} #prefix form
|
560
641
|
result.first.has_end!
|
561
|
-
@
|
642
|
+
@parsestack.push WantsEndContext.new(str,@linenum)
|
562
643
|
expect_do_or_end_or_nl! str
|
563
644
|
|
564
645
|
else #infix form
|
@@ -566,24 +647,26 @@ end
|
|
566
647
|
end
|
567
648
|
when "for"
|
568
649
|
result.first.has_end!
|
569
|
-
|
650
|
+
result.push KwParamListStartToken.new(offset+str.length)
|
651
|
+
# corresponding EndToken emitted leaving ForContext ("in" branch, below)
|
652
|
+
@parsestack.push WantsEndContext.new(str,@linenum)
|
570
653
|
#expect_do_or_end_or_nl! str #handled by ForSMContext now
|
571
|
-
@
|
654
|
+
@parsestack.push ForSMContext.new(@linenum)
|
572
655
|
when "do"
|
573
656
|
result.unshift(*abort_noparens!(str))
|
574
|
-
if ExpectDoOrNlContext===@
|
575
|
-
@
|
576
|
-
assert WantsEndContext===@
|
657
|
+
if ExpectDoOrNlContext===@parsestack.last
|
658
|
+
@parsestack.pop
|
659
|
+
assert WantsEndContext===@parsestack.last
|
577
660
|
else
|
578
661
|
result.last.has_end!
|
579
|
-
@
|
580
|
-
|
662
|
+
@parsestack.push WantsEndContext.new(str,@linenum)
|
663
|
+
localvars.start_block
|
581
664
|
block_param_list_lookahead
|
582
665
|
end
|
583
666
|
when "def"
|
584
667
|
result.first.has_end!
|
585
|
-
@
|
586
|
-
@
|
668
|
+
@parsestack.push WantsEndContext.new("def",@linenum)
|
669
|
+
@localvars_stack.push SymbolTable.new
|
587
670
|
safe_recurse { |aa|
|
588
671
|
@last_operative_token=KeywordToken.new "def" #hack
|
589
672
|
result.concat ignored_tokens
|
@@ -591,7 +674,7 @@ end
|
|
591
674
|
#read an expr like a.b.c or a::b::c
|
592
675
|
#or (expr).b.c
|
593
676
|
if nextchar==?( #look for optional parenthesised head
|
594
|
-
old_size=@
|
677
|
+
old_size=@parsestack.size
|
595
678
|
parencount=0
|
596
679
|
begin
|
597
680
|
tok=get1token
|
@@ -601,22 +684,58 @@ end
|
|
601
684
|
end
|
602
685
|
EoiToken===tok and lexerror tok, "eof in def header"
|
603
686
|
result<<tok
|
604
|
-
end until parencount==0 #@
|
687
|
+
end until parencount==0 #@parsestack.size==old_size
|
605
688
|
else #no parentheses, all tail
|
606
689
|
@last_operative_token=KeywordToken.new "." #hack hack
|
607
|
-
result
|
608
|
-
|
690
|
+
tokindex=result.size
|
691
|
+
result << tok=symbol(false,false)
|
692
|
+
name=tok.to_s
|
693
|
+
assert !in_lvar_define_state
|
694
|
+
|
695
|
+
#maybe_local really means 'maybe local or constant'
|
696
|
+
maybe_local=case name
|
697
|
+
when /[^a-z_0-9]$/i; #do nothing
|
698
|
+
when /^[@$]/; true
|
699
|
+
when VARLIKE_KEYWORDS,FUNCLIKE_KEYWORDS; ty=KeywordToken
|
700
|
+
when /^[a-z_]/; localvars===name
|
701
|
+
when /^[A-Z]/; is_const=true #this is the right algorithm for constants...
|
702
|
+
end
|
703
|
+
if !ty and maybe_local
|
704
|
+
result.push( *ignored_tokens(false,false) )
|
705
|
+
nc=nextchar
|
706
|
+
if nc==?: || nc==?.
|
707
|
+
ty=VarNameToken
|
708
|
+
end
|
709
|
+
end
|
710
|
+
unless ty
|
711
|
+
ty=MethNameToken
|
712
|
+
endofs=tok.offset+tok.to_s.length
|
713
|
+
result[tokindex+1...tokindex+1]=
|
714
|
+
[ImplicitParamListStartToken.new(endofs),ImplicitParamListEndToken.new(endofs)]
|
715
|
+
end
|
716
|
+
|
717
|
+
assert result[tokindex].equal?(tok)
|
718
|
+
result[tokindex]=ty.new(tok.to_s,tok.offset)
|
719
|
+
|
720
|
+
|
721
|
+
#if a.b.c.d is seen, a, b, and c
|
609
722
|
#should be considered maybe varname instead of methnames.
|
610
723
|
#the last (d in the example) is always considered a methname;
|
611
724
|
#it's what's being defined.
|
725
|
+
#b and c should be considered varnames only if
|
726
|
+
#they are capitalized and preceded by :: .
|
727
|
+
#a could even be a keyword (eg self or block_given?).
|
612
728
|
end
|
613
729
|
#read tail: .b.c.d etc
|
614
|
-
@last_operative_token=
|
730
|
+
result.reverse_each{|res| break @last_operative_token=res unless StillIgnoreToken===res}
|
731
|
+
###@last_operative_token=result.last #naive
|
732
|
+
assert !(IgnoreToken===@last_operative_token)
|
615
733
|
state=:expect_op
|
734
|
+
@in_def_name=true
|
616
735
|
loop do
|
617
736
|
|
618
737
|
#look for start of parameter list
|
619
|
-
nc=(@moretokens.
|
738
|
+
nc=(@moretokens.empty? ? nextchar.chr : @moretokens.first.to_s[0,1])
|
620
739
|
if state==:expect_op and /^[a-z_(&*]/i===nc
|
621
740
|
result.concat def_param_list
|
622
741
|
break
|
@@ -627,8 +746,8 @@ end
|
|
627
746
|
case tok
|
628
747
|
when EoiToken
|
629
748
|
lexerror tok,'unexpected eof in def header'
|
630
|
-
when
|
631
|
-
when MethNameToken
|
749
|
+
when StillIgnoreToken
|
750
|
+
when MethNameToken ,VarNameToken # /^[a-z_]/i.token_pat
|
632
751
|
lexerror tok,'expected . or ::' unless state==:expect_name
|
633
752
|
state=:expect_op
|
634
753
|
when /^(\.|::)$/.token_pat
|
@@ -642,6 +761,7 @@ end
|
|
642
761
|
"#{tok}:#{tok.class}")
|
643
762
|
end
|
644
763
|
end
|
764
|
+
@in_def_name=false
|
645
765
|
}
|
646
766
|
when "alias"
|
647
767
|
safe_recurse { |a|
|
@@ -663,6 +783,7 @@ end
|
|
663
783
|
tok or lexerror(result.first,"bad symbol in undef")
|
664
784
|
result<< tok
|
665
785
|
@last_operative_token=tok
|
786
|
+
assert !(IgnoreToken===@last_operative_token)
|
666
787
|
|
667
788
|
sawnl=false
|
668
789
|
result.concat ignored_tokens(true){|nl| sawnl=true}
|
@@ -674,26 +795,47 @@ end
|
|
674
795
|
}
|
675
796
|
|
676
797
|
# when "defined?"
|
677
|
-
# huh
|
678
798
|
#defined? might have a baresymbol following it
|
679
799
|
#does it need to be handled specially?
|
800
|
+
#it would seem not.....
|
680
801
|
|
681
802
|
when "when"
|
803
|
+
#abort_noparens! emits EndToken on leaving context
|
682
804
|
result.unshift(*abort_noparens!(str))
|
683
|
-
|
805
|
+
result.push KwParamListStartToken.new( offset+str.length)
|
806
|
+
@parsestack.push WhenParamListContext.new(str,@linenum)
|
684
807
|
|
685
808
|
when "rescue"
|
686
|
-
|
687
|
-
|
809
|
+
unless after_nonid_op? {false}
|
810
|
+
#rescue needs to be treated differently when in operator context...
|
811
|
+
#i think no RescueSMContext should be pushed on the stack...
|
812
|
+
#plus, the rescue token should be marked as infix
|
813
|
+
result.first.set_infix!
|
814
|
+
else
|
815
|
+
result.push KwParamListStartToken.new(offset+str.length)
|
816
|
+
#corresponding EndToken emitted by abort_noparens! on leaving rescue context
|
817
|
+
result.unshift(*abort_noparens!(str))
|
818
|
+
@parsestack.push RescueSMContext.new(@linenum)
|
819
|
+
end
|
688
820
|
|
689
|
-
when "then"
|
821
|
+
when "then"
|
822
|
+
result.unshift(*abort_noparens!(str))
|
823
|
+
@parsestack.last.see self,:then
|
824
|
+
|
825
|
+
when "in"
|
826
|
+
result.unshift KwParamListEndToken.new( offset)
|
690
827
|
result.unshift(*abort_noparens!(str))
|
691
|
-
@
|
828
|
+
@parsestack.last.see self,:in
|
692
829
|
|
693
|
-
when
|
830
|
+
when /\A(#{BINOPWORDS}|#{INNERBOUNDINGWORDS})\Z/o
|
694
831
|
result.unshift(*abort_noparens!(str))
|
695
832
|
|
696
|
-
when
|
833
|
+
when /\A(return|break|next)\Z/
|
834
|
+
result=yield
|
835
|
+
result.first.has_no_block! unless KeywordToken===@last_operative_token and @last_operative_token===/\A(\.|::)\Z/
|
836
|
+
|
837
|
+
when FUNCLIKE_KEYWORDS
|
838
|
+
result=yield
|
697
839
|
|
698
840
|
when RUBYKEYWORDS
|
699
841
|
#do nothing
|
@@ -706,6 +848,36 @@ end
|
|
706
848
|
end
|
707
849
|
|
708
850
|
|
851
|
+
#-----------------------------------
|
852
|
+
def parsestack_lastnonassign_is?(obj)
|
853
|
+
@parsestack.reverse_each{|ctx|
|
854
|
+
case ctx
|
855
|
+
# when klass: return true
|
856
|
+
when AssignmentRhsContext
|
857
|
+
else return ctx.object_id==obj.object_id
|
858
|
+
end
|
859
|
+
}
|
860
|
+
end
|
861
|
+
|
862
|
+
#-----------------------------------
|
863
|
+
#what's inside goalposts (the block formal parameter list)
|
864
|
+
#is considered the left hand side of an assignment.
|
865
|
+
#inside goalposts, a local variable is declared if
|
866
|
+
#it has one of the following tokens on both sides:
|
867
|
+
# , (if directly inside goalposts or nested lhs)
|
868
|
+
# | (as a goalpost)
|
869
|
+
# * or & (unary only)
|
870
|
+
# ( or ) (if they form a nested left hand side)
|
871
|
+
#parens form a nested lhs if they're not part of an actual
|
872
|
+
#parameter list and have a comma directly in them somewhere
|
873
|
+
#a nested lhs _must_ have a comma in it somewhere. this is
|
874
|
+
#not legal:
|
875
|
+
# (foo)=[1]
|
876
|
+
#whereas this is:
|
877
|
+
# (foo,)=[1]
|
878
|
+
|
879
|
+
|
880
|
+
|
709
881
|
#-----------------------------------
|
710
882
|
def block_param_list_lookahead
|
711
883
|
safe_recurse{ |la|
|
@@ -713,27 +885,45 @@ end
|
|
713
885
|
a=ignored_tokens
|
714
886
|
|
715
887
|
if eat_next_if(?|)
|
716
|
-
a<<KeywordToken.new("|"
|
888
|
+
a<<KeywordToken.new("|", input_position-1)
|
889
|
+
if true
|
890
|
+
@parsestack.push mycontext=BlockParamListLhsContext.new(@linenum)
|
891
|
+
nextchar==?| and a.push NoWsToken.new(input_position)
|
892
|
+
else
|
717
893
|
if eat_next_if(?|)
|
718
|
-
a.concat [NoWsToken.new(
|
719
|
-
KeywordToken.new('|'
|
894
|
+
a.concat [NoWsToken.new(input_position-1),
|
895
|
+
KeywordToken.new('|', input_position-1)]
|
720
896
|
else
|
721
897
|
assert !@defining_lvar
|
722
898
|
@defining_lvar=true
|
723
899
|
assert((@last_operative_token===';' or NewlineToken===@last_operative_token))
|
724
|
-
@
|
725
|
-
#block param initializers
|
726
|
-
|
900
|
+
@parsestack.push mycontext=BlockParamListLhsContext.new(@linenum)
|
901
|
+
#block param initializers ARE supported here, even tho ruby doesn't allow them!
|
902
|
+
tok=nil
|
903
|
+
loop do
|
727
904
|
tok=get1token
|
728
|
-
|
905
|
+
case tok
|
906
|
+
when EoiToken; lexerror tok,"eof in block parameter list"
|
907
|
+
when AssignmentRhsListStartToken; @defining_lvar=false
|
908
|
+
when AssignmentRhsListEndToken; parsestack_lastnonassign_is?(mycontext) and @defining_lvar=true
|
909
|
+
end
|
910
|
+
|
911
|
+
tok==='|' and parsestack_lastnonassign_is?(mycontext) and break
|
729
912
|
a<<tok
|
730
|
-
end
|
731
|
-
assert@defining_lvar
|
913
|
+
end
|
914
|
+
assert@defining_lvar || AssignmentRhsContext===@parsestack.last
|
732
915
|
@defining_lvar=false
|
733
|
-
|
734
|
-
|
916
|
+
while AssignmentRhsContext===@parsestack.last
|
917
|
+
a.push( *abort_noparens!('|') )
|
918
|
+
end
|
919
|
+
|
920
|
+
@parsestack.last.object_id==mycontext.object_id or raise 'expected my BlockParamListLhsContext atop @parsestack'
|
921
|
+
@parsestack.pop
|
922
|
+
|
923
|
+
a<<KeywordToken.new('|',tok.offset)
|
735
924
|
@moretokens.empty? or
|
736
925
|
fixme %#moretokens might be set from get1token call above...might be bad#
|
926
|
+
end
|
737
927
|
end
|
738
928
|
end
|
739
929
|
|
@@ -755,8 +945,9 @@ end
|
|
755
945
|
#then match the following tokens until
|
756
946
|
#the matching endbrace is found
|
757
947
|
def def_param_list
|
948
|
+
@in_def_name=false
|
758
949
|
result=[]
|
759
|
-
normal_comma_level=
|
950
|
+
normal_comma_level=old_parsestack_size=@parsestack.size
|
760
951
|
safe_recurse { |a|
|
761
952
|
assert(@moretokens.empty?)
|
762
953
|
assert((not IgnoreToken===@moretokens[0]))
|
@@ -770,9 +961,9 @@ end
|
|
770
961
|
assert(tok==='(')
|
771
962
|
|
772
963
|
|
773
|
-
#
|
964
|
+
#parsestack was changed by get1token above...
|
774
965
|
normal_comma_level+=1
|
775
|
-
assert(normal_comma_level==@
|
966
|
+
assert(normal_comma_level==@parsestack.size)
|
776
967
|
endingblock=proc{|tok| tok===')' }
|
777
968
|
else
|
778
969
|
endingblock=proc{|tok| tok===';' or NewlineToken===tok}
|
@@ -785,36 +976,48 @@ end
|
|
785
976
|
#read local parameter names
|
786
977
|
loop do
|
787
978
|
expect_name=(@last_operative_token===',' and
|
788
|
-
normal_comma_level==@
|
979
|
+
normal_comma_level==@parsestack.size)
|
789
980
|
expect_name and @defining_lvar||=true
|
790
981
|
result << tok=get1token
|
791
982
|
lexerror tok, "unexpected eof in def header" if EoiToken===tok
|
792
983
|
|
793
984
|
#break if at end of param list
|
794
985
|
endingblock===tok and
|
795
|
-
|
986
|
+
old_parsestack_size>=@parsestack.size and break
|
796
987
|
|
797
988
|
#next token is a local var name
|
798
989
|
#(or the one after that if unary ops present)
|
799
990
|
#result.concat ignored_tokens
|
800
|
-
expect_name
|
801
|
-
|
802
|
-
|
991
|
+
if expect_name
|
992
|
+
case tok
|
993
|
+
when IgnoreToken #, /^[A-Z]/ #do nothing
|
994
|
+
when /^,$/.token_pat #hack
|
995
|
+
|
996
|
+
|
997
|
+
when VarNameToken
|
803
998
|
assert@defining_lvar
|
804
999
|
@defining_lvar=false
|
805
1000
|
assert((not @last_operative_token===','))
|
806
|
-
|
1001
|
+
when /^[&*]$/.token_pat #unary form...
|
807
1002
|
#a NoWsToken is also expected... read it now
|
808
1003
|
result.concat maybe_no_ws_token #not needed?
|
809
1004
|
@last_operative_token=KeywordToken.new ','
|
810
|
-
|
1005
|
+
else
|
1006
|
+
lexerror tok,"unfamiliar var name '#{tok}'"
|
1007
|
+
end
|
1008
|
+
elsif /^,$/.token_pat===tok and
|
1009
|
+
normal_comma_level+1==@parsestack.size and
|
1010
|
+
AssignmentRhsContext===@parsestack.last
|
1011
|
+
#seeing comma here should end implicit rhs started within the param list
|
1012
|
+
result[-1,0]=AssignmentRhsListEndToken.new(tok.offset)
|
1013
|
+
@parsestack.pop
|
811
1014
|
end
|
812
1015
|
end
|
813
1016
|
|
814
1017
|
@defining_lvar=false
|
815
1018
|
|
816
1019
|
|
817
|
-
assert(@
|
1020
|
+
assert(@parsestack.size <= old_parsestack_size)
|
818
1021
|
assert(endingblock[tok])
|
819
1022
|
|
820
1023
|
#hack: force next token to look like start of a
|
@@ -846,19 +1049,19 @@ end
|
|
846
1049
|
end
|
847
1050
|
|
848
1051
|
#-----------------------------------
|
849
|
-
#handle * in ruby code. is unary or binary operator?
|
1052
|
+
#handle * & in ruby code. is unary or binary operator?
|
850
1053
|
def star_or_amp(ch)
|
851
1054
|
assert('*&'[ch])
|
852
|
-
|
1055
|
+
want_unary=unary_op_expected? ch
|
1056
|
+
result=(quadriop ch)
|
1057
|
+
if want_unary
|
853
1058
|
#readahead(2)[1..1][/[\s\v#\\]/] or #not needed?
|
854
|
-
result
|
1059
|
+
assert OperatorToken===result
|
1060
|
+
result.unary=true #result should distinguish unary+binary *&
|
855
1061
|
WHSPLF[nextchar.chr] or
|
856
|
-
@moretokens << NoWsToken.new(
|
857
|
-
return result
|
858
|
-
else
|
859
|
-
return(quadriop ch)
|
1062
|
+
@moretokens << NoWsToken.new(input_position)
|
860
1063
|
end
|
861
|
-
|
1064
|
+
result
|
862
1065
|
end
|
863
1066
|
|
864
1067
|
#-----------------------------------
|
@@ -868,7 +1071,7 @@ end
|
|
868
1071
|
getchar
|
869
1072
|
NumberToken.new getchar_maybe_escape
|
870
1073
|
else
|
871
|
-
@
|
1074
|
+
@parsestack.push TernaryContext.new(@linenum)
|
872
1075
|
KeywordToken.new getchar #operator
|
873
1076
|
end
|
874
1077
|
end
|
@@ -888,18 +1091,19 @@ end
|
|
888
1091
|
end
|
889
1092
|
|
890
1093
|
#-----------------------------------
|
891
|
-
#return true if tok corresponds to a variable or constant,
|
892
|
-
#
|
1094
|
+
#return true if last tok corresponds to a variable or constant,
|
1095
|
+
#false if its for a method, nil for something else
|
1096
|
+
#we assume it is a valid token with a correctly formed name.
|
893
1097
|
#...should really be called was_var_name
|
894
1098
|
def is_var_name?
|
895
1099
|
(tok=@last_operative_token)
|
896
1100
|
|
897
1101
|
s=tok.to_s
|
898
1102
|
case s
|
899
|
-
when /[^a-z_0-9]$/i
|
900
|
-
when /^[a-z_]
|
901
|
-
when /^[A-Z]
|
902
|
-
when /^[@$<]
|
1103
|
+
when /[^a-z_0-9]$/i; false
|
1104
|
+
when /^[a-z_]/; localvars===s or VARLIKE_KEYWORDS===s
|
1105
|
+
when /^[A-Z]/; VarNameToken===tok
|
1106
|
+
when /^[@$<]/; true
|
903
1107
|
else raise "not var or method name: #{s}"
|
904
1108
|
end
|
905
1109
|
end
|
@@ -907,21 +1111,26 @@ end
|
|
907
1111
|
#-----------------------------------
|
908
1112
|
def colon_quote_expected?(ch) #yukko hack
|
909
1113
|
assert ':?'[ch]
|
910
|
-
readahead(2)[/^(\?[^#{WHSPLF}]|:[
|
1114
|
+
readahead(2)[/^(\?[^#{WHSPLF}]|:[^\s\r\n\t\f\v :])$/o] or return false
|
911
1115
|
|
912
1116
|
after_nonid_op? {
|
913
1117
|
#possible func-call as operator
|
914
1118
|
|
915
|
-
|
1119
|
+
not is_var_name? and
|
1120
|
+
if ch==':'
|
1121
|
+
not TernaryContext===@parsestack.last
|
1122
|
+
else
|
1123
|
+
!readahead(3)[/^\?[a-z0-9_]{2}/i]
|
1124
|
+
end
|
916
1125
|
}
|
917
1126
|
end
|
918
1127
|
|
919
1128
|
#-----------------------------------
|
920
1129
|
def symbol_or_op(ch)
|
921
|
-
startpos
|
1130
|
+
startpos= input_position
|
922
1131
|
qe= colon_quote_expected?(ch)
|
923
1132
|
lastchar=prevchar
|
924
|
-
eat_next_if(ch) or raise "needed: "+ch
|
1133
|
+
eat_next_if(ch[0]) or raise "needed: "+ch
|
925
1134
|
|
926
1135
|
#handle quoted symbols like :"foobar", :"[]"
|
927
1136
|
qe and return symbol(':')
|
@@ -932,13 +1141,13 @@ end
|
|
932
1141
|
@moretokens.push(*abort_noparens!(':'))
|
933
1142
|
|
934
1143
|
#end ternary context, if any
|
935
|
-
@
|
1144
|
+
@parsestack.last.see self,:colon
|
936
1145
|
|
937
|
-
TernaryContext===@
|
1146
|
+
TernaryContext===@parsestack.last and @parsestack.pop #should be in the context's see handler
|
938
1147
|
|
939
|
-
if ExpectDoOrNlContext===@
|
940
|
-
@
|
941
|
-
assert @
|
1148
|
+
if ExpectDoOrNlContext===@parsestack.last #should be in the context's see handler
|
1149
|
+
@parsestack.pop
|
1150
|
+
assert @parsestack.last.starter[/^(while|until|for)$/]
|
942
1151
|
end
|
943
1152
|
|
944
1153
|
@moretokens.push KeywordToken.new(':',startpos)
|
@@ -965,17 +1174,17 @@ end
|
|
965
1174
|
#-----------------------------------
|
966
1175
|
def symbol(notbare,couldbecallsite=!notbare)
|
967
1176
|
assert !couldbecallsite
|
968
|
-
start
|
1177
|
+
start= input_position
|
969
1178
|
notbare and start-=1
|
970
1179
|
klass=(notbare ? SymbolToken : MethNameToken)
|
971
1180
|
|
972
1181
|
#look for operators
|
973
1182
|
opmatches=readahead(3)[RUBYSYMOPERATORREX]
|
974
|
-
result= opmatches ?
|
1183
|
+
result= opmatches ? read(opmatches.size) :
|
975
1184
|
case nc=nextchar
|
976
1185
|
when ?" then assert notbare;double_quote('"')
|
977
1186
|
when ?' then assert notbare;double_quote("'")
|
978
|
-
when ?` then
|
1187
|
+
when ?` then read(1)
|
979
1188
|
when ?@ then at_identifier.to_s
|
980
1189
|
when ?$ then dollar_identifier.to_s
|
981
1190
|
when ?_,?a..?z then identifier_as_string(?:)
|
@@ -991,19 +1200,24 @@ end
|
|
991
1200
|
return lexerror(klass.new(result,start),error)
|
992
1201
|
end
|
993
1202
|
|
1203
|
+
def merge_assignment_op_in_setter_callsites?
|
1204
|
+
false
|
1205
|
+
end
|
994
1206
|
#-----------------------------------
|
995
1207
|
def callsite_symbol(tok_to_errify)
|
996
|
-
start
|
1208
|
+
start= input_position
|
997
1209
|
|
998
1210
|
#look for operators
|
999
1211
|
opmatches=readahead(3)[RUBYSYMOPERATORREX]
|
1000
|
-
return [opmatches ?
|
1212
|
+
return [opmatches ? read(opmatches.size) :
|
1001
1213
|
case nc=nextchar
|
1002
|
-
when ?` then
|
1003
|
-
when ?_,?a..?z,?A..?Z then
|
1214
|
+
when ?` then read(1)
|
1215
|
+
when ?_,?a..?z,?A..?Z then
|
1216
|
+
context=merge_assignment_op_in_setter_callsites? ? ?: : nc
|
1217
|
+
identifier_as_string(context)
|
1004
1218
|
else
|
1005
1219
|
@last_operative_token=KeywordToken.new(';')
|
1006
|
-
lexerror(tok_to_errify,"unexpected char starting symbol: #{nc.chr}")
|
1220
|
+
lexerror(tok_to_errify,"unexpected char starting callsite symbol: #{nc.chr}, tok=#{tok_to_errify.inspect}")
|
1007
1221
|
nil
|
1008
1222
|
end, start
|
1009
1223
|
]
|
@@ -1011,10 +1225,10 @@ end
|
|
1011
1225
|
|
1012
1226
|
#-----------------------------------
|
1013
1227
|
def here_header
|
1014
|
-
|
1228
|
+
read(2)=='<<' or raise "parser insanity"
|
1015
1229
|
|
1016
1230
|
dash=eat_next_if(?-)
|
1017
|
-
quote=eat_next_if(
|
1231
|
+
quote=eat_next_if( /['"`]/)
|
1018
1232
|
if quote
|
1019
1233
|
ender=til_charset(/[#{quote}]/)
|
1020
1234
|
(quote==getchar) or
|
@@ -1042,8 +1256,8 @@ end
|
|
1042
1256
|
#handle case of here header in a string inclusion, but
|
1043
1257
|
#here body outside it.
|
1044
1258
|
cnt=0
|
1045
|
-
1.upto @
|
1046
|
-
case @
|
1259
|
+
1.upto @parsestack.size do |i|
|
1260
|
+
case @parsestack[-i]
|
1047
1261
|
when AssignmentRhsContext,ParamListContextNoParen,TopLevelContext
|
1048
1262
|
else cnt+=1
|
1049
1263
|
end
|
@@ -1054,11 +1268,11 @@ end
|
|
1054
1268
|
end
|
1055
1269
|
|
1056
1270
|
tok=get1token
|
1057
|
-
assert(a.
|
1271
|
+
assert(a.equal?( @moretokens))
|
1058
1272
|
toks<<tok
|
1059
1273
|
EoiToken===tok and lexerror tok, "here body expected before eof"
|
1060
1274
|
end while res.unsafe_to_use
|
1061
|
-
assert(a.
|
1275
|
+
assert(a.equal?( @moretokens))
|
1062
1276
|
a[0,0]= toks #same as a=toks+a, but keeps a's id
|
1063
1277
|
}
|
1064
1278
|
|
@@ -1076,9 +1290,9 @@ end
|
|
1076
1290
|
if quote_expected?(ch) #and @last_operative_token!='class' #not needed?
|
1077
1291
|
here_header
|
1078
1292
|
else
|
1079
|
-
operator_or_methname_token
|
1293
|
+
operator_or_methname_token read(2)
|
1080
1294
|
end
|
1081
|
-
when "<=>" then operator_or_methname_token
|
1295
|
+
when "<=>" then operator_or_methname_token read(3)
|
1082
1296
|
else quadriop(ch)
|
1083
1297
|
end
|
1084
1298
|
end
|
@@ -1087,115 +1301,152 @@ end
|
|
1087
1301
|
def escnewline(ch)
|
1088
1302
|
assert ch == '\\'
|
1089
1303
|
|
1090
|
-
pos
|
1304
|
+
pos= input_position
|
1091
1305
|
result=getchar
|
1092
1306
|
if nl=readnl
|
1093
1307
|
result+=nl
|
1094
1308
|
else
|
1095
1309
|
error='illegal escape sequence'
|
1096
1310
|
end
|
1097
|
-
|
1098
|
-
|
1311
|
+
|
1312
|
+
@moretokens.unshift FileAndLineToken.new(@filename,ln=@linenum,input_position)
|
1313
|
+
optional_here_bodies
|
1099
1314
|
|
1315
|
+
lexerror EscNlToken.new(@filename,ln-1,result,pos), error
|
1316
|
+
end
|
1317
|
+
|
1100
1318
|
#-----------------------------------
|
1101
|
-
def
|
1102
|
-
assert("\r\n"[nextchar.chr])
|
1319
|
+
def optional_here_bodies
|
1103
1320
|
|
1104
1321
|
#handle here bodies queued up by previous line
|
1105
1322
|
#(we should be more compatible with dos/mac style newlines...)
|
1106
|
-
|
1107
|
-
tofill.string.offset
|
1323
|
+
while tofill=@incomplete_here_tokens.shift
|
1324
|
+
tofill.string.offset= input_position
|
1108
1325
|
loop {
|
1109
|
-
assert("\r\n"[
|
1110
|
-
|
1111
|
-
#retr evrything til next nl
|
1112
|
-
line=all_quote(/^[\r\n]$/, tofill.quote, /^[\r\n]$/, :regex_esc_seq)
|
1113
|
-
#(you didn't know all_quote could take a regex, did you?)
|
1326
|
+
assert("\r\n"[prevchar])
|
1114
1327
|
|
1115
|
-
#
|
1116
|
-
|
1117
|
-
|
1118
|
-
|
1119
|
-
|
1120
|
-
|
1121
|
-
|
1122
|
-
|
1123
|
-
|
1124
|
-
|
1125
|
-
|
1126
|
-
|
1127
|
-
|
1128
|
-
|
1129
|
-
|
1328
|
+
#here body terminator?
|
1329
|
+
oldpos= input_position
|
1330
|
+
if tofill.dash
|
1331
|
+
til_charset(/[^#{WHSP}]/o)
|
1332
|
+
end
|
1333
|
+
break if eof?
|
1334
|
+
break if read(tofill.ender.size)==tofill.ender and readnl
|
1335
|
+
input_position_set oldpos
|
1336
|
+
|
1337
|
+
if tofill.quote=="'"
|
1338
|
+
line=til_charset(/[\r\n]/)+readnl
|
1339
|
+
line.gsub! "\\\\", "\\"
|
1340
|
+
tofill.append line
|
1341
|
+
assert(line[-1..-1][/[\r\n]/])
|
1342
|
+
else
|
1343
|
+
|
1344
|
+
back1char #-1 to make newline char the next to read
|
1345
|
+
@linenum-=1
|
1346
|
+
|
1347
|
+
#retr evrything til next nl
|
1348
|
+
line=all_quote(INET_NL_REX, tofill.quote, INET_NL_REX)
|
1349
|
+
#(you didn't know all_quote could take a regex, did you?)
|
1350
|
+
|
1351
|
+
#get rid of fals that otherwise appear to be in the middle of
|
1352
|
+
#a string (and are emitted out of order)
|
1353
|
+
fal=@moretokens.pop
|
1354
|
+
assert FileAndLineToken===fal || fal.nil?
|
1355
|
+
|
1356
|
+
back1char
|
1357
|
+
@linenum-=1
|
1358
|
+
assert("\r\n"[nextchar.chr])
|
1359
|
+
tofill.append_token line
|
1360
|
+
tofill.append readnl
|
1361
|
+
end
|
1130
1362
|
}
|
1131
1363
|
|
1132
|
-
assert("\r\n"[
|
1364
|
+
assert(eof? || "\r\n"[prevchar])
|
1133
1365
|
tofill.unsafe_to_use=false
|
1366
|
+
tofill.line=@linenum-1
|
1134
1367
|
|
1135
|
-
|
1368
|
+
@moretokens.push \
|
1369
|
+
tofill.bodyclass.new(tofill),
|
1370
|
+
FileAndLineToken.new(@filename,@linenum,input_position)
|
1136
1371
|
end
|
1372
|
+
|
1373
|
+
end
|
1374
|
+
|
1375
|
+
#-----------------------------------
|
1376
|
+
def newline(ch)
|
1377
|
+
assert("\r\n"[nextchar.chr])
|
1378
|
+
|
1379
|
+
|
1137
1380
|
|
1138
1381
|
#ordinary newline handling (possibly implicitly escaped)
|
1139
1382
|
assert("\r\n"[nextchar.chr])
|
1383
|
+
assert !@parsestack.empty?
|
1140
1384
|
assert @moretokens.empty?
|
1141
1385
|
result=if NewlineToken===@last_operative_token or #hack
|
1142
1386
|
@last_operative_token===/^(;|begin|do|#{INNERBOUNDINGWORDS})$/ or #hack
|
1143
1387
|
!after_nonid_op?{false}
|
1144
1388
|
then #hack-o-rama: probly cases left out above
|
1145
1389
|
a= abort_noparens!
|
1146
|
-
ExpectDoOrNlContext===@
|
1147
|
-
|
1390
|
+
ExpectDoOrNlContext===@parsestack.last and @parsestack.pop
|
1391
|
+
assert !@parsestack.empty?
|
1392
|
+
@parsestack.last.see self,:semi
|
1148
1393
|
|
1149
1394
|
a << super(ch)
|
1150
1395
|
@moretokens.replace a+@moretokens
|
1151
1396
|
@moretokens.shift
|
1152
1397
|
else
|
1153
|
-
offset
|
1154
|
-
|
1155
|
-
|
1398
|
+
offset= input_position
|
1399
|
+
nl=readnl
|
1400
|
+
@moretokens << FileAndLineToken.new(@filename,@linenum,input_position)
|
1401
|
+
EscNlToken.new(@filename,@linenum-1,nl,offset)
|
1156
1402
|
#WsToken.new ' ' #why? #should be "\\\n" ?
|
1157
1403
|
end
|
1158
1404
|
|
1405
|
+
optional_here_bodies
|
1406
|
+
|
1159
1407
|
start_of_line_directives
|
1160
1408
|
|
1161
1409
|
return result
|
1162
1410
|
end
|
1163
1411
|
|
1164
1412
|
#-----------------------------------
|
1165
|
-
EQBEGIN=%r/^=begin[
|
1413
|
+
EQBEGIN=%r/^=begin[ \t\v\r\n\f]$/
|
1166
1414
|
EQBEGINLENGTH=7
|
1167
1415
|
EQEND='=end'
|
1168
|
-
|
1416
|
+
EQENDLENGTH=4
|
1417
|
+
ENDMARKER=/^__END__[\r\n]?\Z/
|
1169
1418
|
ENDMARKERLENGTH=8
|
1170
1419
|
def start_of_line_directives
|
1171
1420
|
#handle =begin...=end (at start of a line)
|
1172
1421
|
while EQBEGIN===readahead(EQBEGINLENGTH)
|
1173
|
-
startpos
|
1174
|
-
more
|
1175
|
-
|
1176
|
-
|
1177
|
-
|
1178
|
-
|
1179
|
-
|
1180
|
-
|
1181
|
-
|
1422
|
+
startpos= input_position
|
1423
|
+
more= read(EQBEGINLENGTH-1) #get =begin
|
1424
|
+
|
1425
|
+
begin
|
1426
|
+
eof? and raise "eof before =end"
|
1427
|
+
more<<til_charset(/[\r\n]/)
|
1428
|
+
more<<readnl
|
1429
|
+
end until readahead(EQENDLENGTH)==EQEND
|
1430
|
+
|
1182
1431
|
#read rest of line after =end
|
1183
|
-
more <<
|
1432
|
+
more << til_charset(/[\r\n]/)
|
1184
1433
|
assert((?\r===nextchar or ?\n===nextchar))
|
1185
1434
|
assert !(/[\r\n]/===more[-1,1])
|
1435
|
+
more<< readnl
|
1186
1436
|
|
1187
|
-
newls= more.scan(/\r\n?|\n\r?/)
|
1188
|
-
@linenum+= newls.size
|
1437
|
+
# newls= more.scan(/\r\n?|\n\r?/)
|
1438
|
+
# @linenum+= newls.size
|
1189
1439
|
|
1190
1440
|
#inject the fresh comment into future token results
|
1191
|
-
@moretokens.push IgnoreToken.new(more,startpos)
|
1441
|
+
@moretokens.push IgnoreToken.new(more,startpos),
|
1442
|
+
FileAndLineToken.new(@filename,@linenum,input_position)
|
1192
1443
|
end
|
1193
1444
|
|
1194
1445
|
#handle __END__
|
1195
1446
|
if ENDMARKER===readahead(ENDMARKERLENGTH)
|
1196
|
-
assert !(ImplicitContext===@
|
1197
|
-
@moretokens.unshift endoffile_detected(
|
1198
|
-
@file.
|
1447
|
+
assert !(ImplicitContext===@parsestack.last)
|
1448
|
+
@moretokens.unshift endoffile_detected(read(7))
|
1449
|
+
# input_position_set @file.size
|
1199
1450
|
end
|
1200
1451
|
end
|
1201
1452
|
|
@@ -1221,11 +1472,15 @@ end
|
|
1221
1472
|
#used to resolve the ambiguity of
|
1222
1473
|
# <<, %, ? in ruby
|
1223
1474
|
#returns whether current token is to be the start of a literal
|
1224
|
-
#/ is not handled right here if whitespace immediately follows the /
|
1225
1475
|
def quote_expected?(ch) #yukko hack
|
1476
|
+
if AssignmentContext===@parsestack.last
|
1477
|
+
@parsestack.pop
|
1478
|
+
return false
|
1479
|
+
end
|
1480
|
+
|
1226
1481
|
case ch[0]
|
1227
1482
|
when ?? then readahead(2)[/^\?[#{WHSPLF}]$/o] #not needed?
|
1228
|
-
when ?% then readahead(3)[/^%([a-
|
1483
|
+
when ?% then readahead(3)[/^%([a-pt-vyzA-PR-VX-Z]|[QqrswWx][a-zA-Z0-9])/]
|
1229
1484
|
when ?< then !readahead(4)[/^<<-?['"`a-z0-9_]/i]
|
1230
1485
|
else raise 'unexpected ch (#{ch}) in quote_expected?'
|
1231
1486
|
# when ?+,?-,?&,?*,?~,?! then '*&='[readahead(2)[1..1]]
|
@@ -1240,22 +1495,29 @@ end
|
|
1240
1495
|
end
|
1241
1496
|
|
1242
1497
|
#-----------------------------------
|
1498
|
+
#returns false if last token was an value, true if it was an operator.
|
1499
|
+
#returns what block yields if last token was a method name.
|
1243
1500
|
#used to resolve the ambiguity of
|
1244
|
-
# <<, %, /, ?, :, and newline in ruby
|
1501
|
+
# <<, %, /, ?, :, and newline (among others) in ruby
|
1245
1502
|
def after_nonid_op?
|
1246
1503
|
case @last_operative_token
|
1247
|
-
when MethNameToken,
|
1504
|
+
when MethNameToken, FUNCLIKE_KEYWORDS.token_pat ,VarNameToken
|
1505
|
+
#VarNameToken should really be left out of this case...
|
1506
|
+
#should be in next branch instread
|
1507
|
+
#callers all check for last token being not a variable if they pass anything
|
1508
|
+
#but {false} in the block
|
1248
1509
|
return yield
|
1249
1510
|
when StringToken, SymbolToken, NumberToken, HerePlaceholderToken,
|
1250
|
-
%r{^(
|
1251
|
-
|
1511
|
+
%r{^(
|
1512
|
+
class|module|end|self|true|false|nil|
|
1513
|
+
__FILE__|__LINE__|[\})\]]|alias|(un)?def|for
|
1252
1514
|
)$}x.token_pat
|
1253
|
-
#
|
1254
|
-
#maybe class/module shouldn't either?
|
1515
|
+
#dunno about def/undef
|
1516
|
+
#maybe class/module shouldn't he here either?
|
1255
1517
|
#for is also in NewlineToken branch, below.
|
1256
1518
|
#what about rescue?
|
1257
1519
|
return false
|
1258
|
-
when /^(#{RUBYOPERATORREX}|#{INNERBOUNDINGWORDS})$/o.token_pat
|
1520
|
+
when /^(#{RUBYOPERATORREX}|#{INNERBOUNDINGWORDS}|do)$/o.token_pat
|
1259
1521
|
#regexs above must match whole string
|
1260
1522
|
#assert(@last_operative_token==$&) #disabled 'cause $& is now always nil :(
|
1261
1523
|
return true
|
@@ -1273,19 +1535,46 @@ end
|
|
1273
1535
|
end
|
1274
1536
|
end
|
1275
1537
|
|
1538
|
+
|
1539
|
+
|
1540
|
+
|
1541
|
+
#-----------------------------------
|
1542
|
+
#returns the last context on @parsestack which isn't an ImplicitContext
|
1543
|
+
def last_context_not_implicit
|
1544
|
+
@parsestack.reverse_each{|ctx|
|
1545
|
+
return ctx unless ImplicitContext===ctx
|
1546
|
+
}
|
1547
|
+
fail
|
1548
|
+
end
|
1549
|
+
|
1550
|
+
#-----------------------------------
|
1551
|
+
#a | has been seen. is it an operator? or a goalpost?
|
1552
|
+
#(goalpost == delimiter of block param list)
|
1553
|
+
#if it is a goalpost, end the BlockParamListLhsContext on
|
1554
|
+
#the context stack, as well as any implicit contexts on top of it.
|
1555
|
+
def conjunction_or_goalpost(ch)
|
1556
|
+
result=quadriop(ch)
|
1557
|
+
if result===/^|$/ and BlockParamListLhsContext===last_context_not_implicit
|
1558
|
+
@moretokens.push( *abort_noparens!("|"))
|
1559
|
+
assert(BlockParamListLhsContext===@parsestack.last)
|
1560
|
+
@parsestack.pop
|
1561
|
+
@moretokens.push KeywordToken.new("|", input_position-1)
|
1562
|
+
result=@moretokens.shift
|
1563
|
+
end
|
1564
|
+
result
|
1565
|
+
end
|
1566
|
+
|
1276
1567
|
#-----------------------------------
|
1277
1568
|
def quadriop(ch) #match /&&?=?/ (&, &&, &=, or &&=)
|
1278
1569
|
assert(%w[& * | < >].include?(ch))
|
1279
|
-
# '&*'[ch] and qe=quote_expected?(ch) #not needed?
|
1280
1570
|
result=getchar + (eat_next_if(ch)or'')
|
1281
1571
|
if eat_next_if(?=)
|
1282
1572
|
result << ?=
|
1283
|
-
# elsif qe and result[/^[&*]$/] #not needed?
|
1284
|
-
# @moretokens<<NoWsToken.new(@file.pos) #not needed?
|
1285
1573
|
end
|
1286
1574
|
return operator_or_methname_token(result)
|
1287
1575
|
end
|
1288
1576
|
|
1577
|
+
|
1289
1578
|
#-----------------------------------
|
1290
1579
|
def biop(ch) #match /%=?/ (% or %=)
|
1291
1580
|
assert(ch[/^[%^~]$/])
|
@@ -1295,18 +1584,18 @@ end
|
|
1295
1584
|
end
|
1296
1585
|
return operator_or_methname_token( result)
|
1297
1586
|
end
|
1298
|
-
|
1299
1587
|
#-----------------------------------
|
1300
|
-
def tilde(ch) #match
|
1588
|
+
def tilde(ch) #match ~
|
1301
1589
|
assert(ch=='~')
|
1302
1590
|
result=getchar
|
1303
|
-
# eat_next_if(?=) ?
|
1591
|
+
# eat_next_if(?=) ? #ack, spppft, I'm always getting this backwards
|
1304
1592
|
# result <<?= :
|
1305
1593
|
WHSPLF[nextchar.chr] ||
|
1306
|
-
@moretokens << NoWsToken.new(
|
1594
|
+
@moretokens << NoWsToken.new(input_position)
|
1307
1595
|
#why is the NoWsToken necessary at this point?
|
1308
|
-
|
1309
|
-
#result should distinguish unary ~
|
1596
|
+
result=operator_or_methname_token result
|
1597
|
+
result.unary=true #result should distinguish unary ~
|
1598
|
+
result
|
1310
1599
|
end
|
1311
1600
|
|
1312
1601
|
#-----------------------------------
|
@@ -1327,8 +1616,9 @@ end
|
|
1327
1616
|
else #unary operator
|
1328
1617
|
result=getchar
|
1329
1618
|
WHSPLF[nextchar.chr] or
|
1330
|
-
@moretokens << NoWsToken.new(
|
1331
|
-
|
1619
|
+
@moretokens << NoWsToken.new(input_position)
|
1620
|
+
result=(operator_or_methname_token result)
|
1621
|
+
result.unary=true
|
1332
1622
|
#todo: result should distinguish unary+binary +-
|
1333
1623
|
end
|
1334
1624
|
else #binary operator
|
@@ -1337,45 +1627,54 @@ end
|
|
1337
1627
|
if eat_next_if(?=)
|
1338
1628
|
result << ?=
|
1339
1629
|
end
|
1340
|
-
|
1630
|
+
result=(operator_or_methname_token result)
|
1341
1631
|
#todo: result should distinguish unary+binary +-
|
1342
1632
|
end
|
1633
|
+
result
|
1343
1634
|
end
|
1344
1635
|
|
1345
1636
|
#-----------------------------------
|
1346
1637
|
def equals(ch) #match /=(>|~|==?)?/ (= or == or =~ or === or =>)
|
1347
|
-
offset
|
1638
|
+
offset= input_position
|
1348
1639
|
str=getchar
|
1349
1640
|
assert str=='='
|
1350
|
-
c=(eat_next_if(
|
1641
|
+
c=(eat_next_if(/[~=>]/)or'')
|
1351
1642
|
str << c
|
1643
|
+
result= operator_or_methname_token( str,offset)
|
1352
1644
|
case c
|
1353
1645
|
when '=': str<< (eat_next_if(?=)or'')
|
1354
1646
|
|
1355
|
-
when '>':
|
1647
|
+
when '>':
|
1648
|
+
unless ParamListContextNoParen===@parsestack.last
|
1649
|
+
@moretokens.unshift result
|
1650
|
+
@moretokens.unshift( *abort_noparens!("=>"))
|
1651
|
+
result=@moretokens.shift
|
1652
|
+
end
|
1653
|
+
@parsestack.last.see self,:arrow
|
1356
1654
|
when '': #record local variable definitions
|
1357
1655
|
|
1358
|
-
@
|
1656
|
+
@parsestack.push AssignmentRhsContext.new(@linenum)
|
1359
1657
|
@moretokens.unshift AssignmentRhsListStartToken.new( offset+1)
|
1360
1658
|
end
|
1361
|
-
return
|
1659
|
+
return result
|
1362
1660
|
end
|
1363
1661
|
|
1364
1662
|
#-----------------------------------
|
1365
1663
|
def exclam(ch) #match /![~=]?/ (! or != or !~)
|
1366
1664
|
assert nextchar==?!
|
1367
1665
|
result=getchar
|
1368
|
-
k=eat_next_if(
|
1666
|
+
k=eat_next_if(/[~=]/)
|
1369
1667
|
if k
|
1370
1668
|
result+=k
|
1371
1669
|
else
|
1372
1670
|
WHSPLF[nextchar.chr] or
|
1373
|
-
@moretokens << NoWsToken.new(
|
1671
|
+
@moretokens << NoWsToken.new(input_position)
|
1374
1672
|
end
|
1375
|
-
return KeywordToken.new(result)
|
1673
|
+
return KeywordToken.new(result, input_position-result.size)
|
1376
1674
|
#result should distinguish unary !
|
1377
1675
|
end
|
1378
1676
|
|
1677
|
+
|
1379
1678
|
#-----------------------------------
|
1380
1679
|
def dot(ch)
|
1381
1680
|
str=''
|
@@ -1391,7 +1690,6 @@ end
|
|
1391
1690
|
dot_rhs(result)
|
1392
1691
|
return result
|
1393
1692
|
end
|
1394
|
-
|
1395
1693
|
#-----------------------------------
|
1396
1694
|
def dot_rhs(prevtok)
|
1397
1695
|
safe_recurse { |a|
|
@@ -1403,20 +1701,17 @@ end
|
|
1403
1701
|
}
|
1404
1702
|
end
|
1405
1703
|
|
1406
|
-
#-----------------------------------
|
1407
|
-
def single_quote(ch=nil)
|
1408
|
-
double_quote(ch)
|
1409
|
-
end
|
1410
|
-
|
1411
1704
|
#-----------------------------------
|
1412
1705
|
def back_quote(ch=nil)
|
1413
|
-
|
1414
|
-
|
1415
|
-
|
1416
|
-
|
1417
|
-
|
1706
|
+
if @last_operative_token===/^(def|::|\.)$/
|
1707
|
+
oldpos= input_position
|
1708
|
+
MethNameToken.new(eat_next_if(?`), oldpos)
|
1709
|
+
else
|
1710
|
+
double_quote(ch)
|
1711
|
+
end
|
1418
1712
|
end
|
1419
1713
|
|
1714
|
+
if false
|
1420
1715
|
#-----------------------------------
|
1421
1716
|
def comment(str)
|
1422
1717
|
result=""
|
@@ -1441,27 +1736,30 @@ end
|
|
1441
1736
|
|
1442
1737
|
return IgnoreToken.new(result)
|
1443
1738
|
end
|
1444
|
-
|
1739
|
+
end
|
1445
1740
|
#-----------------------------------
|
1446
1741
|
def open_brace(ch)
|
1447
1742
|
assert((ch!='[' or !want_op_name))
|
1448
1743
|
assert(@moretokens.empty?)
|
1449
1744
|
lastchar=prevchar
|
1450
|
-
ch=eat_next_if(
|
1451
|
-
tokch=KeywordToken.new(ch
|
1745
|
+
ch=eat_next_if(/[({\[]/)or raise "lexer confusion"
|
1746
|
+
tokch=KeywordToken.new(ch, input_position-1)
|
1747
|
+
|
1452
1748
|
|
1453
1749
|
#maybe emitting of NoWsToken can be moved into var_or_meth_name ??
|
1454
1750
|
case tokch.ident
|
1455
1751
|
when '['
|
1456
|
-
#
|
1457
|
-
#
|
1458
|
-
|
1752
|
+
# in contexts expecting an (operator) method name, we
|
1753
|
+
# would want to match [] or []= at this point
|
1754
|
+
#but control never comes this way in those cases... goes
|
1755
|
+
#to custom parsers for alias, undef, and def in #parse_keywords
|
1756
|
+
tokch.set_infix! unless after_nonid_op?{WHSPLF[lastchar]}
|
1757
|
+
@parsestack.push ListImmedContext.new(ch,@linenum)
|
1459
1758
|
lasttok=last_operative_token
|
1460
1759
|
#could be: lasttok===/^[a-z_]/i
|
1461
|
-
if (VarNameToken===lasttok or MethNameToken===lasttok
|
1462
|
-
lasttok===FUNCLIKE_KEYWORDS) and !WHSPCHARS[lastchar]
|
1760
|
+
if (VarNameToken===lasttok or ImplicitParamListEndToken===lasttok or MethNameToken===lasttok) and !WHSPCHARS[lastchar]
|
1463
1761
|
@moretokens << (tokch)
|
1464
|
-
tokch= NoWsToken.new(
|
1762
|
+
tokch= NoWsToken.new(input_position-1)
|
1465
1763
|
end
|
1466
1764
|
when '('
|
1467
1765
|
lasttok=last_operative_token
|
@@ -1470,19 +1768,20 @@ end
|
|
1470
1768
|
lasttok===FUNCLIKE_KEYWORDS)
|
1471
1769
|
unless WHSPCHARS[lastchar]
|
1472
1770
|
@moretokens << tokch
|
1473
|
-
tokch= NoWsToken.new(
|
1771
|
+
tokch= NoWsToken.new(input_position-1)
|
1474
1772
|
end
|
1475
|
-
@
|
1773
|
+
@parsestack.push ParamListContext.new(@linenum)
|
1476
1774
|
else
|
1477
|
-
@
|
1775
|
+
@parsestack.push ParenContext.new(@linenum)
|
1478
1776
|
end
|
1479
1777
|
|
1480
1778
|
when '{'
|
1481
1779
|
#check if we are in a hash literal or string inclusion (#{}),
|
1482
1780
|
#in which case below would be bad.
|
1483
|
-
if after_nonid_op?{false}
|
1484
|
-
@
|
1781
|
+
if after_nonid_op?{false} or @last_operative_token.has_no_block?
|
1782
|
+
@parsestack.push ListImmedContext.new(ch,@linenum) #that is, a hash
|
1485
1783
|
else
|
1784
|
+
tokch.set_infix!
|
1486
1785
|
=begin not needed now, i think
|
1487
1786
|
# 'need to find matching callsite context and end it if implicit'
|
1488
1787
|
lasttok=last_operative_token
|
@@ -1492,8 +1791,8 @@ end
|
|
1492
1791
|
end
|
1493
1792
|
=end
|
1494
1793
|
|
1495
|
-
|
1496
|
-
@
|
1794
|
+
localvars.start_block
|
1795
|
+
@parsestack.push BlockContext.new(@linenum)
|
1497
1796
|
block_param_list_lookahead
|
1498
1797
|
end
|
1499
1798
|
end
|
@@ -1504,18 +1803,18 @@ end
|
|
1504
1803
|
def close_brace(ch)
|
1505
1804
|
ch==eat_next_if(/[)}\]]/) or raise "lexer confusion"
|
1506
1805
|
@moretokens.concat abort_noparens!(ch)
|
1507
|
-
@
|
1508
|
-
@
|
1509
|
-
if @
|
1806
|
+
@parsestack.last.see self,:semi #hack
|
1807
|
+
@moretokens<< kw=KeywordToken.new( ch, input_position-1)
|
1808
|
+
if @parsestack.empty?
|
1510
1809
|
lexerror kw,"unmatched brace: #{ch}"
|
1511
1810
|
return @moretokens.shift
|
1512
1811
|
end
|
1513
|
-
ctx=@
|
1812
|
+
ctx=@parsestack.pop
|
1514
1813
|
origch,line=ctx.starter,ctx.linenum
|
1515
1814
|
ch==PAIRS[origch] or
|
1516
1815
|
lexerror kw,"mismatched braces: #{origch}#{ch}\n" +
|
1517
1816
|
"matching brace location", @filename, line
|
1518
|
-
BlockContext===ctx and
|
1817
|
+
BlockContext===ctx and localvars.end_block
|
1519
1818
|
if ParamListContext==ctx.class
|
1520
1819
|
assert ch==')'
|
1521
1820
|
#kw.set_callsite! #not needed?
|
@@ -1525,29 +1824,29 @@ end
|
|
1525
1824
|
|
1526
1825
|
#-----------------------------------
|
1527
1826
|
def eof(ch=nil)
|
1528
|
-
|
1529
|
-
|
1530
|
-
|
1827
|
+
#this must be the very last character...
|
1828
|
+
oldpos= input_position
|
1829
|
+
assert(?\0==getc)
|
1531
1830
|
|
1532
|
-
|
1831
|
+
result= "\0#{ignored_tokens(true).delete_if{|t|FileAndLineToken===t}}"
|
1533
1832
|
|
1534
|
-
|
1535
|
-
|
1536
|
-
|
1537
|
-
|
1833
|
+
eof? or
|
1834
|
+
lexerror result,'nul character is not at the end of file'
|
1835
|
+
input_position_set @file.size
|
1836
|
+
return(endoffile_detected result)
|
1538
1837
|
end
|
1539
1838
|
|
1540
1839
|
#-----------------------------------
|
1541
1840
|
def endoffile_detected(s='')
|
1542
1841
|
@moretokens.push( *(abort_noparens!.push super(s)))
|
1543
1842
|
result= @moretokens.shift
|
1544
|
-
balanced_braces? or (lexerror result,"unbalanced braces at eof.
|
1843
|
+
balanced_braces? or (lexerror result,"unbalanced braces at eof. parsestack=#{@parsestack.inspect}")
|
1545
1844
|
result
|
1546
1845
|
end
|
1547
1846
|
|
1548
1847
|
#-----------------------------------
|
1549
1848
|
def single_char_token(ch)
|
1550
|
-
KeywordToken.new super(ch),
|
1849
|
+
KeywordToken.new super(ch), input_position-1
|
1551
1850
|
end
|
1552
1851
|
|
1553
1852
|
#-----------------------------------
|
@@ -1557,13 +1856,13 @@ end
|
|
1557
1856
|
|
1558
1857
|
#-----------------------------------
|
1559
1858
|
def semicolon(ch)
|
1560
|
-
|
1859
|
+
assert @moretokens.empty?
|
1561
1860
|
@moretokens.push(*abort_noparens!)
|
1562
|
-
@
|
1563
|
-
|
1564
|
-
|
1565
|
-
|
1566
|
-
|
1861
|
+
@parsestack.last.see self,:semi
|
1862
|
+
if ExpectDoOrNlContext===@parsestack.last #should be in context's see:semi handler
|
1863
|
+
@parsestack.pop
|
1864
|
+
assert @parsestack.last.starter[/^(while|until|for)$/]
|
1865
|
+
end
|
1567
1866
|
@moretokens.push single_char_token(ch)
|
1568
1867
|
return @moretokens.shift
|
1569
1868
|
end
|
@@ -1582,7 +1881,11 @@ end
|
|
1582
1881
|
|
1583
1882
|
#-----------------------------------
|
1584
1883
|
#tokenify_results_of :identifier
|
1585
|
-
save_offsets_in(*CHARMAPPINGS.values.uniq-[
|
1884
|
+
save_offsets_in(*CHARMAPPINGS.values.uniq-[
|
1885
|
+
:symbol_or_op,:open_brace,:whitespace,:exclam,:backquote
|
1886
|
+
|
1887
|
+
|
1888
|
+
])
|
1586
1889
|
#save_offsets_in :symbol
|
1587
1890
|
|
1588
1891
|
end
|