rubylexer 0.6.2 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +55 -0
- data/Manifest.txt +67 -0
- data/README.txt +103 -0
- data/Rakefile +24 -0
- data/howtouse.txt +9 -6
- data/{assert.rb → lib/assert.rb} +11 -11
- data/{rubylexer.rb → lib/rubylexer.rb} +645 -342
- data/lib/rubylexer/0.6.2.rb +39 -0
- data/lib/rubylexer/0.6.rb +5 -0
- data/lib/rubylexer/0.7.0.rb +2 -0
- data/{charhandler.rb → lib/rubylexer/charhandler.rb} +4 -2
- data/{charset.rb → lib/rubylexer/charset.rb} +4 -3
- data/{context.rb → lib/rubylexer/context.rb} +48 -18
- data/{rubycode.rb → lib/rubylexer/rubycode.rb} +5 -3
- data/{rulexer.rb → lib/rubylexer/rulexer.rb} +180 -102
- data/{symboltable.rb → lib/rubylexer/symboltable.rb} +10 -1
- data/{token.rb → lib/rubylexer/token.rb} +72 -20
- data/{tokenprinter.rb → lib/rubylexer/tokenprinter.rb} +39 -16
- data/lib/rubylexer/version.rb +3 -0
- data/{testcode → test/code}/deletewarns.rb +0 -0
- data/test/code/dl_all_gems.rb +43 -0
- data/{testcode → test/code}/dumptokens.rb +12 -9
- data/test/code/locatetest +30 -0
- data/test/code/locatetest.rb +49 -0
- data/test/code/rubylexervsruby.rb +173 -0
- data/{testcode → test/code}/tokentest.rb +62 -51
- data/{testcode → test/code}/torment +8 -8
- data/test/code/unpack_all_gems.rb +15 -0
- data/{testdata → test/data}/1.rb.broken +0 -0
- data/{testdata → test/data}/23.rb +0 -0
- data/test/data/__end__.rb +2 -0
- data/test/data/__end__2.rb +3 -0
- data/test/data/and.rb +5 -0
- data/test/data/blockassigntest.rb +23 -0
- data/test/data/chunky.plain.rb +75 -0
- data/test/data/chunky_bacon.rb +112 -0
- data/test/data/chunky_bacon2.rb +112 -0
- data/test/data/chunky_bacon3.rb +112 -0
- data/test/data/chunky_bacon4.rb +112 -0
- data/test/data/for.rb +45 -0
- data/test/data/format.rb +6 -0
- data/{testdata → test/data}/g.rb +0 -0
- data/test/data/gemlist.txt +280 -0
- data/test/data/heart.rb +7 -0
- data/test/data/if.rb +6 -0
- data/test/data/jarh.rb +369 -0
- data/test/data/lbrace.rb +4 -0
- data/test/data/lbrack.rb +4 -0
- data/{testdata → test/data}/newsyntax.rb +0 -0
- data/{testdata → test/data}/noeolatend.rb +0 -0
- data/test/data/p-op.rb +8 -0
- data/{testdata → test/data}/p.rb +671 -79
- data/{testdata → test/data}/pleac.rb.broken +0 -0
- data/{testdata → test/data}/pre.rb +0 -0
- data/{testdata → test/data}/pre.unix.rb +0 -0
- data/{testdata → test/data}/regtest.rb +0 -0
- data/test/data/rescue.rb +35 -0
- data/test/data/s.rb +186 -0
- data/test/data/strinc.rb +2 -0
- data/{testdata → test/data}/tokentest.assert.rb.can +0 -0
- data/test/data/untermed_here.rb.broken +2 -0
- data/test/data/untermed_string.rb.broken +1 -0
- data/{testdata → test/data}/untitled1.rb +0 -0
- data/{testdata → test/data}/w.rb +0 -0
- data/{testdata → test/data}/wsdlDriver.rb +0 -0
- data/testing.txt +6 -4
- metadata +163 -59
- data/README +0 -134
- data/Rantfile +0 -37
- data/io.each_til_charset.rb +0 -247
- data/require.rb +0 -103
- data/rlold.rb +0 -12
- data/testcode/locatetest +0 -12
- data/testcode/rubylexervsruby.rb +0 -104
- data/testcode/rubylexervsruby.sh +0 -51
- data/testresults/placeholder +0 -0
data/History.txt
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
=== 0.7.0/2-15-2008
|
2
|
+
* implicit tokens are now emitted at the right times (need more test code)
|
3
|
+
* local variables are now temporarily hidden by class, module, and def
|
4
|
+
* line numbers should always be correct now (=begin...=end caused this) (??)
|
5
|
+
* fixed meth/var-name confusion in idents after 'def' but before params
|
6
|
+
* FileAndLineToken now emitted on all new lines (0.8)
|
7
|
+
* offset of __END__ now works(??)
|
8
|
+
* put files into lib/
|
9
|
+
* improvements in parsing unary * and & (??)
|
10
|
+
* input can now come from a string
|
11
|
+
* local vars (defs and uses) are recognized in string inclusions
|
12
|
+
* explicitly decimal numbers, eg: 0d123456789
|
13
|
+
* eof in unusual cases is better handled
|
14
|
+
* __END__ is not treated as a keyword
|
15
|
+
* '|' as goalpost is now better handled
|
16
|
+
* a number of things have been renamed internally
|
17
|
+
* no more implicit parens for setter method calls
|
18
|
+
* '{' after return, break, and next is now the start of a hash.
|
19
|
+
* ambiguous cases of '::','(',':',and '?' are now better handled.
|
20
|
+
* more start/end hint tokens (for 'when', 'rescue', and 'for')
|
21
|
+
* bugfixes in rhs hint tokens
|
22
|
+
* parsing of def headers for singleton methods is improved
|
23
|
+
* rescue as operator is now handled
|
24
|
+
* block param list lookahead is simplified
|
25
|
+
* unary ops (including * and &) can be easily distinguished in output
|
26
|
+
* here document bodies better handled, esp after escaped newline
|
27
|
+
* symbols like %s{symbol} now actually work
|
28
|
+
* implicit parens around => better handled...
|
29
|
+
* different cases of '{' can now be easily distinguished
|
30
|
+
* ImplicitParamList Start and End are now Keyword, not Ignore tokens.
|
31
|
+
|
32
|
+
=== 0.6.2
|
33
|
+
* testcode/dumptokens.rb charhandler.rb doesn't work... but does after unix2dos (not reproducible)
|
34
|
+
* files are opened in binmode to avoid all possible eol translation
|
35
|
+
* (x.+?x) now works
|
36
|
+
* methname/varname mixups fixed in some cases
|
37
|
+
* performance improvements, in most important cases
|
38
|
+
* error handling tokens should be emitted on error input... ErrorToken mixin module
|
39
|
+
* but old error handling interface should be preserved and made available
|
40
|
+
* moved readahead and friends into IOext
|
41
|
+
* made optimized readahead et al for fakefile
|
42
|
+
* dos newlines (and newlines generally) can be fancy string delimiters
|
43
|
+
* do,if,until, etc, have a way to tell if an end is associated
|
44
|
+
* broke readme into pieces
|
45
|
+
|
46
|
+
=== 0.6.0
|
47
|
+
* partly fixed the implicit tokens at the wrong times. (or not at the
|
48
|
+
* right times) (partly fixed)
|
49
|
+
* : operator might be a synonym for 'then'
|
50
|
+
* variables other than the last are now recognized in multiple assignment
|
51
|
+
* variables created by for and rescue are now recognized
|
52
|
+
* token following :: should not be BareSymbolToken if begins with A-Z (unless obviously a func)
|
53
|
+
* read code to be lexed from a string. (irb wants this)
|
54
|
+
* fancy symbols weren't supported at all. (like this: %s{abcdefg})
|
55
|
+
|
data/Manifest.txt
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
COPYING
|
2
|
+
README.txt
|
3
|
+
Manifest.txt
|
4
|
+
Rakefile
|
5
|
+
howtouse.txt
|
6
|
+
History.txt
|
7
|
+
testing.txt
|
8
|
+
lib/rubylexer/rubycode.rb
|
9
|
+
lib/rubylexer/context.rb
|
10
|
+
lib/rubylexer/token.rb
|
11
|
+
lib/rubylexer/0.6.rb
|
12
|
+
lib/rubylexer/0.6.2.rb
|
13
|
+
lib/rubylexer/0.7.0.rb
|
14
|
+
lib/rubylexer/version.rb
|
15
|
+
lib/rubylexer/rulexer.rb
|
16
|
+
lib/rubylexer/tokenprinter.rb
|
17
|
+
lib/rubylexer/charset.rb
|
18
|
+
lib/rubylexer/symboltable.rb
|
19
|
+
lib/rubylexer/charhandler.rb
|
20
|
+
lib/assert.rb
|
21
|
+
lib/rubylexer.rb
|
22
|
+
test/data/gemlist.txt
|
23
|
+
test/data/blockassigntest.rb
|
24
|
+
test/data/for.rb
|
25
|
+
test/data/chunky_bacon.rb
|
26
|
+
test/data/and.rb
|
27
|
+
test/data/pre.unix.rb
|
28
|
+
test/data/untermed_string.rb.broken
|
29
|
+
test/data/__end__2.rb
|
30
|
+
test/data/w.rb
|
31
|
+
test/data/if.rb
|
32
|
+
test/data/pre.rb
|
33
|
+
test/data/jarh.rb
|
34
|
+
test/data/regtest.rb
|
35
|
+
test/data/chunky_bacon4.rb
|
36
|
+
test/data/__end__.rb
|
37
|
+
test/data/strinc.rb
|
38
|
+
test/data/lbrace.rb
|
39
|
+
test/data/p.rb
|
40
|
+
test/data/chunky.plain.rb
|
41
|
+
test/data/noeolatend.rb
|
42
|
+
test/data/g.rb
|
43
|
+
test/data/23.rb
|
44
|
+
test/data/lbrack.rb
|
45
|
+
test/data/untitled1.rb
|
46
|
+
test/data/rescue.rb
|
47
|
+
test/data/tokentest.assert.rb.can
|
48
|
+
test/data/pleac.rb.broken
|
49
|
+
test/data/heart.rb
|
50
|
+
test/data/s.rb
|
51
|
+
test/data/wsdlDriver.rb
|
52
|
+
test/data/p-op.rb
|
53
|
+
test/data/1.rb.broken
|
54
|
+
test/data/untermed_here.rb.broken
|
55
|
+
test/data/newsyntax.rb
|
56
|
+
test/data/chunky_bacon3.rb
|
57
|
+
test/data/chunky_bacon2.rb
|
58
|
+
test/data/format.rb
|
59
|
+
test/code/locatetest.rb
|
60
|
+
test/code/rubylexervsruby.rb
|
61
|
+
test/code/dl_all_gems.rb
|
62
|
+
test/code/unpack_all_gems.rb
|
63
|
+
test/code/tokentest.rb
|
64
|
+
test/code/dumptokens.rb
|
65
|
+
test/code/torment
|
66
|
+
test/code/locatetest
|
67
|
+
test/code/deletewarns.rb
|
data/README.txt
ADDED
@@ -0,0 +1,103 @@
|
|
1
|
+
= RubyLexer
|
2
|
+
|
3
|
+
*
|
4
|
+
*
|
5
|
+
*
|
6
|
+
|
7
|
+
=== DESCRIPTION:
|
8
|
+
|
9
|
+
RubyLexer is a lexer library for Ruby, written in Ruby. Rubylexer is meant
|
10
|
+
as a lexer for Ruby that's complete and correct; all legal Ruby
|
11
|
+
code should be lexed correctly by RubyLexer as well. Just enough parsing
|
12
|
+
capability is included to give RubyLexer enough context to tokenize correctly
|
13
|
+
in all cases. (This turned out to be more parsing than I had thought or
|
14
|
+
wanted to take on at first.) RubyLexer handles the hard things like
|
15
|
+
complicated strings, the ambiguous nature of some punctuation characters and
|
16
|
+
keywords in ruby, and distinguishing methods and local variables.
|
17
|
+
|
18
|
+
RubyLexer is not particularly clean code. As I progressed in writing this,
|
19
|
+
I've learned a little about how these things are supposed to be done; the
|
20
|
+
lexer is not supposed to have any state of it's own, instead it gets whatever
|
21
|
+
it needs to know from the parser. As a stand-alone lexer, Rubylexer maintains
|
22
|
+
quite a lot of state. Every instance variable in the RubyLexer class is some
|
23
|
+
sort of lexer state. Most of the complication and ugly code in RubyLexer is
|
24
|
+
in maintaining or using this state.
|
25
|
+
|
26
|
+
For information about using RubyLexer in your program, please see howtouse.txt.
|
27
|
+
|
28
|
+
For my notes on the testing of RubyLexer, see testing.txt.
|
29
|
+
|
30
|
+
If you have any questions, comments, problems, new feature requests, or just
|
31
|
+
want to figure out how to make it work for what you need to do, contact me:
|
32
|
+
rubylexer _at_ inforadical _dot_ net
|
33
|
+
|
34
|
+
RubyLexer is a RubyForge project. RubyForge is another good place to send your
|
35
|
+
bug reports or whatever: http://rubyforge.org/projects/rubylexer/
|
36
|
+
|
37
|
+
(There aren't any bug filed against RubyLexer there yet, but don't be afraid
|
38
|
+
that your report will get lonely.)
|
39
|
+
|
40
|
+
==SYNOPSIS:
|
41
|
+
require "rubylexer.rb"
|
42
|
+
#then later
|
43
|
+
lexer=RubyLexer.new(a_file_name, opened_File_or_String)
|
44
|
+
until EoiToken===(token=lexer.get1token)
|
45
|
+
#...do stuff w/ token...
|
46
|
+
end
|
47
|
+
|
48
|
+
== Status
|
49
|
+
RubyLexer can correctly lex all legal Ruby 1.8 code that I've been able to
|
50
|
+
find on my Debian system. It can also handle (most of) my catalog of nasty
|
51
|
+
test cases (in testdata/p.rb) (see below for known problems). At this point,
|
52
|
+
new bugs are almost exclusively found by my home-grown test code, rather
|
53
|
+
than ruby code gathered 'from the wild'. There are a number of issues I know
|
54
|
+
about and plan to fix, but it seems that Ruby coders don't write code complex
|
55
|
+
enough to trigger them very often. Although incomplete, RubyLexer can
|
56
|
+
correctly distinguish these ambiguous uses of the following operator and
|
57
|
+
keywords, depending on context:
|
58
|
+
% can be modulus operator or start of fancy string
|
59
|
+
/ can be division operator or start of regex
|
60
|
+
* & + - :: can be unary or binary operator
|
61
|
+
[] can be for array literal or [] method (or []=)
|
62
|
+
<< can be here document or left shift operator (or in class<<obj expr)
|
63
|
+
: can be start of symbol, substitute for then, or part of ternary op
|
64
|
+
(there are other uses too, but they're not supported yet.)
|
65
|
+
? can be start of character constant or ternary operator
|
66
|
+
` can be method name or start of exec string
|
67
|
+
any overrideable operator and most keywords can also be method names
|
68
|
+
|
69
|
+
== todo
|
70
|
+
test w/ more code (rubygems, rpa, obfuscated ruby contest, rubicon, others?)
|
71
|
+
these 5 should be my standard test suite: p.rb, (matz') test.rb, tk.rb, obfuscated ruby contest, rubicon
|
72
|
+
test more ways: cvt source to dos or mac fmt before testing
|
73
|
+
test more ways: run unit tests after passing thru rubylexer (0.7)
|
74
|
+
test more ways: test require'd, load'd, or eval'd code as well (0.7)
|
75
|
+
lex code a line (or chunk) at a time and save state for next line (irb wants this) (0.8)
|
76
|
+
incremental lexing (ides want this (for performance))
|
77
|
+
put everything in a namespace
|
78
|
+
integrate w/ other tools...
|
79
|
+
html colorized output?
|
80
|
+
move more state onto @parsestack (ongoing)
|
81
|
+
the new cases in p.rb now compile, but won't run
|
82
|
+
expand on test documentation
|
83
|
+
use want_op_name more
|
84
|
+
return result as a half-parsed tree (with parentheses and the like matched)
|
85
|
+
emit advisory tokens when see beginword, then (or equivalent), or end... what else does florian want?
|
86
|
+
strings are still slow
|
87
|
+
emit advisory tokens when local var defined/goes out of scope (or hidden/unhidden?)
|
88
|
+
token pruning in dumptokens...
|
89
|
+
|
90
|
+
== known issues: (and planned fix release)
|
91
|
+
context not really preserved when entering or leaving string inclusions. this causes
|
92
|
+
a number or problems. local variables are ok now, but here document headers started
|
93
|
+
in a string inclusion with the body outside will be a problem. (0.8)
|
94
|
+
string tokenization sometimes a little different from ruby around newlines
|
95
|
+
(htree/template.rb) (0.8)
|
96
|
+
string contents might not be correctly translated in a few cases (0.8?)
|
97
|
+
symbols which contain string interpolations are flattened into one token. eg :"foo#{bar}" (0.8)
|
98
|
+
'\r' whitespace sometimes seen in dos-formatted output.. shouldn't be (eg pre.rb) (0.7)
|
99
|
+
windows or mac newline in source are likely to cause problems in obscure cases (need test case)
|
100
|
+
unterminated =begin is not an error (0.8)
|
101
|
+
ruby 1.9 completely unsupported (0.9)
|
102
|
+
character sets other than ascii are not supported at all (1.0)
|
103
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
# Copyright (C) 2008 Caleb Clausen
|
2
|
+
# Distributed under the terms of Ruby's license.
|
3
|
+
require 'rubygems'
|
4
|
+
require 'hoe'
|
5
|
+
require 'lib/rubylexer/version.rb'
|
6
|
+
|
7
|
+
|
8
|
+
readme=open("README.txt")
|
9
|
+
readme.readline("\n=== DESCRIPTION:")
|
10
|
+
readme.readline("\n\n")
|
11
|
+
desc=readme.readline("\n\n")
|
12
|
+
|
13
|
+
hoe=Hoe.new("rubylexer", RubyLexer::VERSION) do |_|
|
14
|
+
_.author = "Caleb Clausen"
|
15
|
+
_.email = "rubylexer-owner @at@ inforadical .dot. net"
|
16
|
+
_.url = "http://rubylexer.rubyforge.org/"
|
17
|
+
_.extra_deps = ["sequence"]
|
18
|
+
_.test_globs=["test/{code/*,data/*rb*,results/}"]
|
19
|
+
_.description=desc
|
20
|
+
_.summary=desc[/\A[^.]+\./]
|
21
|
+
_.spec_extras={:bindir=>''}
|
22
|
+
end
|
23
|
+
|
24
|
+
|
data/howtouse.txt
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
|
2
2
|
Using rubylexer:
|
3
3
|
require "rubylexer.rb"
|
4
|
-
|
4
|
+
#then later
|
5
5
|
lexer=RubyLexer.new(a_file_name, opened_File_or_String)
|
6
|
-
until EoiToken===(
|
7
|
-
|
6
|
+
until EoiToken===(token=lexer.get1token)
|
7
|
+
#...do stuff w/ token...
|
8
8
|
end
|
9
9
|
|
10
|
-
For a slightly expanded version of this example, see
|
10
|
+
For a slightly expanded version of this example, see test/code/dumptokens.rb.
|
11
11
|
|
12
12
|
tok will be a subclass of Token. there are many token classes (see token.rb)
|
13
13
|
however, all tokens have some common methods:
|
@@ -23,7 +23,8 @@ WToken #(mostly useless?) abstract superclass for KeywordToken,
|
|
23
23
|
#OperatorToken, VarNameToken, and HerePlaceholderToken
|
24
24
|
#but not (confusingly) MethNameToken (perhaps that'll change)
|
25
25
|
KeywordToken #a ruby keyword or non-overridable punctuation char(s)
|
26
|
-
OperatorToken #overrideable operators
|
26
|
+
OperatorToken #overrideable operators.
|
27
|
+
#use #unary? and #binary? to find out how many arguments it takes.
|
27
28
|
VarNameToken #a name that represents a variable
|
28
29
|
HerePlaceholderToken #represents the header of a here string. subclass of WToken
|
29
30
|
MethNameToken #the name of a method: the uncoloned
|
@@ -120,7 +121,8 @@ time to adapt to changes. That promise goes for all the changes described below.
|
|
120
121
|
|
121
122
|
In cases where the 2 are incompatible, (inspired by rubygems) I've come up with this:
|
122
123
|
|
123
|
-
|
124
|
+
require 'rubylexer/0.6'
|
125
|
+
rl=RubyLexer.new(...args...) #request the 0.6 api
|
124
126
|
|
125
127
|
This actually works currently; it enables the old api where errors cause an exception instead
|
126
128
|
of generating ErrorTokens. The default will always be to use the new api.
|
@@ -133,4 +135,5 @@ be a big deal; old clients can just include the namespace module.
|
|
133
135
|
Token#ident may be taken away or change without notice.
|
134
136
|
MethNameToken may become a WToken
|
135
137
|
HereBodyToken should really be a string subclass...
|
138
|
+
Newline,EscNl,BareSymbolToken may get renamed
|
136
139
|
|
data/{assert.rb → lib/assert.rb}
RENAMED
@@ -1,5 +1,4 @@
|
|
1
1
|
=begin copyright
|
2
|
-
rubylexer - a ruby lexer written in ruby
|
3
2
|
Copyright (C) 2004,2005 Caleb Clausen
|
4
3
|
|
5
4
|
This library is free software; you can redistribute it and/or
|
@@ -16,16 +15,17 @@
|
|
16
15
|
License along with this library; if not, write to the Free Software
|
17
16
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
18
17
|
=end
|
19
|
-
require 'set'
|
20
18
|
|
19
|
+
module Kernel
|
20
|
+
def assert(expr,msg="assertion failed")
|
21
|
+
defined? $Debug and $Debug and (expr or raise msg)
|
22
|
+
end
|
21
23
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
$DEBUG and STDERR.print "FIXME: #{s}\n"
|
30
|
-
@@printed.add s
|
24
|
+
@@printed={}
|
25
|
+
def fixme(s)
|
26
|
+
unless @@printed[s]
|
27
|
+
@@printed[s]=1
|
28
|
+
defined? $Debug and $Debug and $stderr.print "FIXME: #{s}\n"
|
29
|
+
end
|
30
|
+
end
|
31
31
|
end
|
@@ -19,15 +19,18 @@
|
|
19
19
|
|
20
20
|
|
21
21
|
|
22
|
-
require
|
23
|
-
require
|
24
|
-
require
|
25
|
-
require
|
26
|
-
|
22
|
+
require 'rubylexer/rulexer' #must be 1st!!!
|
23
|
+
require 'rubylexer/version'
|
24
|
+
require 'rubylexer/token'
|
25
|
+
require 'rubylexer/charhandler'
|
26
|
+
require 'rubylexer/symboltable'
|
27
|
+
#require "io.each_til_charset"
|
28
|
+
require 'rubylexer/context'
|
29
|
+
require 'rubylexer/tokenprinter'
|
27
30
|
|
28
31
|
|
29
32
|
#-----------------------------------
|
30
|
-
class RubyLexer
|
33
|
+
class RubyLexer
|
31
34
|
include NestedContexts
|
32
35
|
|
33
36
|
RUBYSYMOPERATORREX=
|
@@ -39,7 +42,7 @@ class RubyLexer < RuLexer
|
|
39
42
|
#or .. ... ?:
|
40
43
|
#for that use:
|
41
44
|
RUBYNONSYMOPERATORREX=
|
42
|
-
%r{^([
|
45
|
+
%r{^([%^~/\-+|&]=|(\|\||&&)=?|(<<|>>|\*\*?)=|\.{1,3}|[?:,;]|=>?|![=~]?)$}
|
43
46
|
RUBYOPERATORREX=/#{RUBYSYMOPERATORREX}|#{RUBYNONSYMOPERATORREX}/o
|
44
47
|
UNSYMOPS=/^[~!]$/ #always unary
|
45
48
|
UBSYMOPS=/^([*&+-]|::)$/ #ops that could be unary or binary
|
@@ -50,16 +53,18 @@ class RubyLexer < RuLexer
|
|
50
53
|
VARLIKE_KEYWORDS=/^(__FILE__|__LINE__|false|nil|self|true)$/
|
51
54
|
INNERBOUNDINGWORDS="(else|elsif|ensure|in|then|rescue|when)"
|
52
55
|
BINOPWORDS="(and|or)"
|
53
|
-
NEVERSTARTPARAMLISTWORDS=/^(#{OPORBEGINWORDS}|#{INNERBOUNDINGWORDS}|#{BINOPWORDS}|end)[^a-zA-Z0-9_!?=]
|
54
|
-
NEVERSTARTPARAMLISTFIRST=CharSet[
|
55
|
-
NEVERSTARTPARAMLISTMAXLEN=7
|
56
|
+
NEVERSTARTPARAMLISTWORDS=/^(#{OPORBEGINWORDS}|#{INNERBOUNDINGWORDS}|#{BINOPWORDS}|end)([^a-zA-Z0-9_!?=]|\Z)/o
|
57
|
+
NEVERSTARTPARAMLISTFIRST=CharSet['aoeitrwu'] #chars that begin NEVERSTARTPARAMLIST
|
58
|
+
NEVERSTARTPARAMLISTMAXLEN=7 #max len of a NEVERSTARTPARAMLIST
|
56
59
|
|
57
60
|
RUBYKEYWORDS=%r{
|
58
|
-
^(alias|#{BINOPWORDS}|not|undef|
|
61
|
+
^(alias|#{BINOPWORDS}|not|undef|end|
|
59
62
|
#{VARLIKE_KEYWORDS}|#{FUNCLIKE_KEYWORDS}|
|
60
63
|
#{INNERBOUNDINGWORDS}|#{BEGINWORDS}
|
61
64
|
)$
|
62
65
|
}xo
|
66
|
+
#__END__ should not be in this set... its handled in start_of_line_directives
|
67
|
+
|
63
68
|
CHARMAPPINGS = {
|
64
69
|
?$ => :dollar_identifier,
|
65
70
|
?@ => :at_identifier,
|
@@ -67,8 +72,7 @@ class RubyLexer < RuLexer
|
|
67
72
|
?A..?Z => :identifier,
|
68
73
|
?_ => :identifier,
|
69
74
|
?0..?9 => :number,
|
70
|
-
|
71
|
-
?' => :single_quote,
|
75
|
+
%{"'} => :double_quote,
|
72
76
|
?` => :back_quote,
|
73
77
|
|
74
78
|
WHSP => :whitespace, #includes \r
|
@@ -83,7 +87,8 @@ class RubyLexer < RuLexer
|
|
83
87
|
|
84
88
|
#these ones could signal either an op or a term
|
85
89
|
?/ => :regex_or_div,
|
86
|
-
"
|
90
|
+
"|" => :conjunction_or_goalpost,
|
91
|
+
">" => :quadriop,
|
87
92
|
"*&" => :star_or_amp, #could be unary
|
88
93
|
"+-" => :plusminus, #could be unary
|
89
94
|
?< => :lessthan,
|
@@ -103,22 +108,27 @@ class RubyLexer < RuLexer
|
|
103
108
|
?# => :comment
|
104
109
|
}
|
105
110
|
|
106
|
-
|
111
|
+
attr_reader :incomplete_here_tokens, :parsestack
|
107
112
|
|
108
113
|
|
109
114
|
#-----------------------------------
|
110
115
|
def initialize(filename,file,linenum=1)
|
111
116
|
super(filename,file, linenum)
|
112
117
|
@start_linenum=linenum
|
113
|
-
@
|
118
|
+
@parsestack=[TopLevelContext.new]
|
114
119
|
@incomplete_here_tokens=[]
|
115
|
-
@
|
120
|
+
@localvars_stack=[SymbolTable.new]
|
116
121
|
@defining_lvar=nil
|
122
|
+
@in_def_name=false
|
117
123
|
|
118
124
|
@toptable=CharHandler.new(self, :illegal_char, CHARMAPPINGS)
|
119
125
|
|
120
126
|
start_of_line_directives
|
121
127
|
end
|
128
|
+
|
129
|
+
def localvars;
|
130
|
+
@localvars_stack.last
|
131
|
+
end
|
122
132
|
|
123
133
|
#-----------------------------------
|
124
134
|
def get1token
|
@@ -129,25 +139,23 @@ class RubyLexer < RuLexer
|
|
129
139
|
|
130
140
|
#check for bizarre token types
|
131
141
|
case result
|
132
|
-
when
|
133
|
-
|
142
|
+
when StillIgnoreToken#,nil
|
143
|
+
result
|
134
144
|
when Token#,String
|
145
|
+
@last_operative_token=result
|
146
|
+
assert !(IgnoreToken===@last_operative_token)
|
147
|
+
result
|
135
148
|
else
|
136
|
-
|
149
|
+
raise "#{@filename}:#{linenum}:token is a #{result.class}, last is #{@last_operative_token}"
|
137
150
|
end
|
138
|
-
|
139
|
-
@last_operative_token=result
|
140
|
-
|
141
|
-
return result
|
142
151
|
end
|
143
|
-
|
144
152
|
|
145
153
|
|
146
154
|
#-----------------------------------
|
147
155
|
def balanced_braces?
|
148
156
|
|
149
|
-
#@
|
150
|
-
@
|
157
|
+
#@parsestack.empty?
|
158
|
+
@parsestack.size==1 and TopLevelContext===@parsestack.first
|
151
159
|
end
|
152
160
|
|
153
161
|
#-----------------------------------
|
@@ -182,7 +190,7 @@ private
|
|
182
190
|
|
183
191
|
#-----------------------------------
|
184
192
|
def expect_do_or_end_or_nl!(st)
|
185
|
-
@
|
193
|
+
@parsestack.push ExpectDoOrNlContext.new(st,/(do|;|:|\n)/,@linenum)
|
186
194
|
end
|
187
195
|
|
188
196
|
#-----------------------------------
|
@@ -199,31 +207,38 @@ private
|
|
199
207
|
end
|
200
208
|
|
201
209
|
#-----------------------------------
|
202
|
-
WSCHARSET
|
203
|
-
def ignored_tokens(allow_eof=false)
|
210
|
+
WSCHARSET=/[#\\\n\s\t\v\r\f]/
|
211
|
+
def ignored_tokens(allow_eof=false,allow_eol=true)
|
204
212
|
result=[]
|
205
|
-
result<<@moretokens.shift while
|
213
|
+
result<<@moretokens.shift while StillIgnoreToken===@moretokens.first
|
206
214
|
@moretokens.empty? or return result
|
207
|
-
if true
|
208
215
|
loop do
|
209
216
|
unless @moretokens.empty?
|
210
|
-
|
211
|
-
|
217
|
+
case @moretokens.first
|
218
|
+
when StillIgnoreToken
|
219
|
+
when NewlineToken: allow_eol or break
|
220
|
+
else break
|
221
|
+
end
|
212
222
|
else
|
213
|
-
|
223
|
+
|
224
|
+
break unless ch=nextchar
|
225
|
+
ch=ch.chr
|
226
|
+
break unless WSCHARSET===ch
|
227
|
+
break if ch[/[\r\n]/] and !allow_eol
|
214
228
|
end
|
229
|
+
|
215
230
|
|
216
231
|
tok=get1token
|
217
|
-
result<<tok
|
232
|
+
result << tok
|
218
233
|
case tok
|
219
|
-
when NewlineToken
|
220
|
-
when EoiToken
|
221
|
-
when
|
222
|
-
else raise "impossible"
|
234
|
+
when NewlineToken; assert allow_eol; block_given? and yield tok
|
235
|
+
when EoiToken; allow_eof or lexerror tok,"end of file not expected here(2)"
|
236
|
+
when StillIgnoreToken
|
237
|
+
else raise "impossible token: #{tok.inspect}"
|
223
238
|
end
|
224
239
|
end
|
225
240
|
|
226
|
-
|
241
|
+
=begin
|
227
242
|
@whsphandler||=CharHandler.new(self, :==,
|
228
243
|
"#" => :comment,
|
229
244
|
"\n" => :newline,
|
@@ -235,18 +250,18 @@ else
|
|
235
250
|
block_given? and NewlineToken===tok and yield tok
|
236
251
|
result << tok
|
237
252
|
end
|
238
|
-
end
|
253
|
+
=end
|
239
254
|
return result
|
240
255
|
end
|
241
256
|
|
242
257
|
#-----------------------------------
|
243
258
|
def safe_recurse
|
244
259
|
old_moretokens=@moretokens
|
245
|
-
#
|
260
|
+
#old_parsestack=@parsestack.dup
|
246
261
|
@moretokens=[]
|
247
262
|
result= yield @moretokens
|
248
263
|
#assert @incomplete_here_tokens.empty?
|
249
|
-
#assert @
|
264
|
+
#assert @parsestack==old_parsestack
|
250
265
|
@moretokens= old_moretokens.concat @moretokens
|
251
266
|
return result
|
252
267
|
#need to do something with @last_operative_token?
|
@@ -258,7 +273,7 @@ end
|
|
258
273
|
result = ((
|
259
274
|
#order matters here, but it shouldn't
|
260
275
|
#(but til_charset must be last)
|
261
|
-
eat_next_if(
|
276
|
+
eat_next_if(/[!@&+`'=~\/\\,.;<>*"$?:]/) or
|
262
277
|
(eat_next_if('-') and ("-"+getchar)) or
|
263
278
|
(?0..?9)===nextchar ? til_charset(/[^\d]/) : nil
|
264
279
|
))
|
@@ -266,7 +281,7 @@ end
|
|
266
281
|
|
267
282
|
#-----------------------------------
|
268
283
|
def identifier(context=nil)
|
269
|
-
oldpos
|
284
|
+
oldpos= input_position
|
270
285
|
str=identifier_as_string(context)
|
271
286
|
|
272
287
|
#skip keyword processing if 'escaped' as it were, by def, . or ::
|
@@ -279,8 +294,8 @@ end
|
|
279
294
|
@moretokens.unshift(*parse_keywords(str,oldpos) do
|
280
295
|
#if not a keyword,
|
281
296
|
case str
|
282
|
-
when FUNCLIKE_KEYWORDS
|
283
|
-
when VARLIKE_KEYWORDS,RUBYKEYWORDS
|
297
|
+
when FUNCLIKE_KEYWORDS; #do nothing
|
298
|
+
when VARLIKE_KEYWORDS,RUBYKEYWORDS; raise "shouldnt see keywords here, now"
|
284
299
|
end
|
285
300
|
safe_recurse { |a| var_or_meth_name(str,@last_operative_token,oldpos) }
|
286
301
|
end)
|
@@ -290,7 +305,7 @@ end
|
|
290
305
|
#-----------------------------------
|
291
306
|
def identifier_as_string(context)
|
292
307
|
#must begin w/ letter or underscore
|
293
|
-
str=eat_next_if(
|
308
|
+
str=eat_next_if(/[_a-z]/i) or return nil
|
294
309
|
|
295
310
|
#equals, question mark, and exclamation mark
|
296
311
|
#might be allowed at the end in some contexts.
|
@@ -305,18 +320,20 @@ end
|
|
305
320
|
when ?: then [?=, ??, ?!]
|
306
321
|
else [nil,??, ?!]
|
307
322
|
end
|
323
|
+
|
324
|
+
@in_def_name and maybe_eq= ?=
|
308
325
|
|
309
326
|
str<<til_charset(/[^a-z0-9_]/i)
|
310
327
|
|
311
328
|
#look for ?, !, or =, if allowed
|
312
|
-
case b
|
329
|
+
case b=getc
|
313
330
|
when nil #means we're at eof
|
314
331
|
#handling nil here prevents b from ever matching
|
315
332
|
#a nil value of maybe_qm, maybe_ex or maybe_eq
|
316
333
|
when maybe_qm
|
317
334
|
str << b
|
318
335
|
when maybe_ex
|
319
|
-
nc=(nextchar unless
|
336
|
+
nc=(nextchar unless eof?)
|
320
337
|
#does ex appear to be part of a larger operator?
|
321
338
|
if nc==?= #or nc==?~
|
322
339
|
back1char
|
@@ -324,7 +341,7 @@ end
|
|
324
341
|
str << b
|
325
342
|
end
|
326
343
|
when maybe_eq
|
327
|
-
nc=(nextchar unless
|
344
|
+
nc=(nextchar unless eof?)
|
328
345
|
#does eq appear to be part of a larger operator?
|
329
346
|
if nc==?= or nc==?~ or nc==?>
|
330
347
|
back1char
|
@@ -342,34 +359,37 @@ end
|
|
342
359
|
#-----------------------------------
|
343
360
|
#contexts in which comma may appear in ruby:
|
344
361
|
#multiple lhs (terminated by assign op)
|
345
|
-
#multiple rhs (in implicit context)
|
362
|
+
#multiple rhs (in implicit context)
|
346
363
|
#method actual param list (in ( or implicit context)
|
347
364
|
#method formal param list (in ( or implicit context)
|
348
|
-
#block formal param list (in | context)
|
365
|
+
#block formal param list (in | context)
|
366
|
+
#nested multiple rhs
|
367
|
+
#nested multiple lhs
|
368
|
+
#nested block formal list
|
369
|
+
#element reference/assignment (in [] or []= method actual parameter context)
|
349
370
|
#hash immediate (in imm{ context)
|
350
371
|
#array immediate (in imm[ context)
|
351
|
-
#
|
352
|
-
#list after for
|
372
|
+
#list between 'for' and 'in'
|
353
373
|
#list after rescue
|
354
374
|
#list after when
|
355
375
|
#list after undef
|
356
376
|
|
357
|
-
#note: comma in parens not around a param list is illegal
|
377
|
+
#note: comma in parens not around a param list or lhs or rhs is illegal
|
358
378
|
|
359
379
|
#-----------------------------------
|
360
380
|
#a comma has been seen. are we in an
|
361
381
|
#lvalue list or some other construct that uses commas?
|
362
382
|
def comma_in_lvalue_list?
|
363
|
-
not ListContext===@
|
383
|
+
@parsestack.last.lhs= (not ListContext===@parsestack.last)
|
364
384
|
end
|
365
385
|
|
366
386
|
#-----------------------------------
|
367
387
|
def in_lvar_define_state
|
368
388
|
#@defining_lvar is a hack
|
369
|
-
@defining_lvar or case ctx=@
|
370
|
-
when ForSMContext
|
371
|
-
when RescueSMContext
|
372
|
-
when
|
389
|
+
@defining_lvar or case ctx=@parsestack.last
|
390
|
+
when ForSMContext; ctx.state==:for
|
391
|
+
when RescueSMContext; ctx.state==:arrow
|
392
|
+
#when BlockParamListLhsContext; true
|
373
393
|
end
|
374
394
|
end
|
375
395
|
|
@@ -391,66 +411,102 @@ end
|
|
391
411
|
#look for and ignore local variable names
|
392
412
|
|
393
413
|
assert String===name
|
394
|
-
|
395
|
-
#fixme: keywords shouldn't be treated specially after :: and .
|
396
414
|
|
397
415
|
#maybe_local really means 'maybe local or constant'
|
398
416
|
maybe_local=case name
|
399
|
-
when /[^a-z_0-9]$/i
|
400
|
-
when /^[a-z_]
|
401
|
-
when /^[A-Z]
|
417
|
+
when /[^a-z_0-9]$/i; #do nothing
|
418
|
+
when /^[a-z_]/; (localvars===name or VARLIKE_KEYWORDS===name or in_lvar_define_state) and not lasttok===/^(\.|::)$/
|
419
|
+
when /^[A-Z]/; is_const=true;not lasttok==='.' #this is the right algorithm for constants...
|
402
420
|
end
|
403
421
|
|
404
422
|
assert(@moretokens.empty?)
|
423
|
+
|
424
|
+
oldlast=@last_operative_token
|
405
425
|
|
406
426
|
tok=@last_operative_token=VarNameToken.new(name,pos)
|
407
427
|
|
408
|
-
oldpos
|
428
|
+
oldpos= input_position
|
409
429
|
sawnl=false
|
410
430
|
result=ws_toks=ignored_tokens(true) {|nl| sawnl=true }
|
411
|
-
sawnl ||
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
431
|
+
if sawnl || eof?
|
432
|
+
if maybe_local then
|
433
|
+
if in_lvar_define_state
|
434
|
+
if /^[a-z_][a-zA-Z_0-9]*$/===name
|
435
|
+
assert !(lasttok===/^(\.|::)$/)
|
436
|
+
localvars[name]=true
|
437
|
+
else
|
438
|
+
lexerror tok,"not a valid variable name: #{name}"
|
439
|
+
end
|
440
|
+
return result.unshift(tok)
|
441
|
+
end
|
442
|
+
return result.unshift(tok) #if is_const
|
443
|
+
else
|
444
|
+
return result.unshift(
|
445
|
+
MethNameToken.new(name,pos), #insert implicit parens right after tok
|
446
|
+
ImplicitParamListStartToken.new( oldpos),
|
447
|
+
ImplicitParamListEndToken.new( oldpos)
|
448
|
+
)
|
416
449
|
end
|
417
|
-
|
450
|
+
end
|
418
451
|
|
419
452
|
#if next op is assignment (or comma in lvalue list)
|
420
453
|
#then omit implicit parens
|
421
454
|
assignment_coming=case nc=nextchar
|
422
|
-
when
|
423
|
-
when
|
424
|
-
when
|
425
|
-
when
|
426
|
-
when
|
455
|
+
when ?=; not /^=[>=~]$/===readahead(2)
|
456
|
+
when ?,; comma_in_lvalue_list?
|
457
|
+
when ?); last_context_not_implicit.lhs
|
458
|
+
when ?>,?<; /^(.)\1=$/===readahead(3)
|
459
|
+
when ?*,?&; /^(.)\1?=/===readahead(3)
|
460
|
+
when ?|; /^\|\|?=/===readahead(3) or
|
461
|
+
#is it a goalpost?
|
462
|
+
BlockParamListLhsContext===last_context_not_implicit &&
|
463
|
+
readahead(2)[1] != ?|
|
464
|
+
when ?%,?/,?-,?+,?^; readahead(2)[1]== ?=
|
427
465
|
end
|
428
|
-
if (assignment_coming or in_lvar_define_state)
|
466
|
+
if (assignment_coming && !(lasttok===/^(\.|::)$/) or in_lvar_define_state)
|
429
467
|
tok=VarNameToken.new(name,pos)
|
430
468
|
if /[^a-z_0-9]$/i===name
|
431
469
|
lexerror tok,"not a valid variable name: #{name}"
|
432
470
|
elsif /^[a-z_]/===name and !(lasttok===/^(\.|::)$/)
|
433
|
-
|
471
|
+
localvars[name]=true
|
434
472
|
end
|
435
473
|
return result.unshift(tok)
|
436
474
|
end
|
437
|
-
|
438
|
-
implicit_parens_to_emit=
|
439
|
-
|
475
|
+
|
476
|
+
implicit_parens_to_emit=
|
477
|
+
if assignment_coming
|
478
|
+
@parsestack.push AssignmentContext.new(nil) if nc==?% or nc==?/
|
479
|
+
0
|
480
|
+
else
|
481
|
+
case nc
|
482
|
+
when nil: 2
|
483
|
+
when ?!; readahead(2)=='!=' ? 2 : 1
|
440
484
|
when NEVERSTARTPARAMLISTFIRST
|
441
485
|
(NEVERSTARTPARAMLISTWORDS===readahead(NEVERSTARTPARAMLISTMAXLEN)) ? 2 : 1
|
442
|
-
when ?",?',?`,?a..?z,?A..?Z,?0..?9,?_
|
443
|
-
when ?{
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
when
|
450
|
-
|
486
|
+
when ?",?',?`,?a..?z,?A..?Z,?0..?9,?_,?@,?$,?~; 1
|
487
|
+
when ?{
|
488
|
+
maybe_local=false
|
489
|
+
x=2
|
490
|
+
x-=1 if /\A(return|break|next)\Z/===name and
|
491
|
+
!(KeywordToken===oldlast and oldlast===/\A(\.|::)\Z/)
|
492
|
+
x
|
493
|
+
when ?(;
|
494
|
+
maybe_local=false; !(ws_toks.empty? or lasttok===/^(\.|::)$/)? 1 : 0
|
495
|
+
when ?},?],?),?;,?^, ?|, ?>, ?,, ?., ?=; 2
|
496
|
+
when ?+, ?-, ?*, ?&, ?%, ?/; (ws_toks.empty? || readahead(2)[/^.[#{WHSPLF}]/]) ? 2 : 3
|
497
|
+
when ?:,??; next2=readahead(2);
|
498
|
+
WHSPLF[next2[1].chr] || next2=='::' ? 2 : 3
|
499
|
+
# when ?:,??; (readahead(2)[/^.[#{WHSPLF}]/]) ? 2 : 3
|
500
|
+
when ?<; (ws_toks.empty? || readahead(3)[/^<<["'`a-zA-Z_0-9-]/]) ? 3 : 2
|
501
|
+
when ?[; ws_toks.empty? ? 2 : 3
|
502
|
+
when ?\\, ?\s, ?\t, ?\n, ?\r, ?\v, ?#; raise 'failure'
|
503
|
+
else raise "unknown char after ident: #{nc=nextchar ? nc.chr : "<<EOF>>"}"
|
504
|
+
end
|
505
|
+
end
|
506
|
+
|
507
|
+
if is_const and implicit_parens_to_emit==3 then
|
508
|
+
implicit_parens_to_emit=1
|
451
509
|
end
|
452
|
-
|
453
|
-
implicit_parens_to_emit==3 and is_const and implicit_parens_to_emit=1
|
454
510
|
|
455
511
|
tok=if maybe_local and implicit_parens_to_emit>=2
|
456
512
|
implicit_parens_to_emit=0
|
@@ -459,15 +515,18 @@ end
|
|
459
515
|
MethNameToken
|
460
516
|
end.new(name,pos)
|
461
517
|
|
462
|
-
|
463
518
|
case implicit_parens_to_emit
|
464
|
-
when 2
|
519
|
+
when 2;
|
465
520
|
result.unshift ImplicitParamListStartToken.new(oldpos),
|
466
521
|
ImplicitParamListEndToken.new(oldpos)
|
467
|
-
when 1,3
|
468
|
-
|
469
|
-
|
470
|
-
|
522
|
+
when 1,3;
|
523
|
+
arr,pass=*param_list_coming_with_2_or_more_params?
|
524
|
+
result.push( *arr )
|
525
|
+
unless pass
|
526
|
+
result.unshift ImplicitParamListStartToken.new(oldpos)
|
527
|
+
@parsestack.push ParamListContextNoParen.new(@linenum)
|
528
|
+
end
|
529
|
+
when 0; #do nothing
|
471
530
|
else raise 'invalid value of implicit_parens_to_emit'
|
472
531
|
end
|
473
532
|
return result.unshift(tok)
|
@@ -476,22 +535,43 @@ end
|
|
476
535
|
# '\n (unescaped) and or'
|
477
536
|
# 'then else elsif rescue ensure (illegal in value context)'
|
478
537
|
|
479
|
-
# 'need to pop noparen from
|
538
|
+
# 'need to pop noparen from parsestack on these tokens: (in operator context)'
|
480
539
|
# 'not ok:'
|
481
540
|
# 'not (but should it be?)'
|
482
541
|
end
|
483
542
|
|
543
|
+
#-----------------------------------
|
544
|
+
def param_list_coming_with_2_or_more_params?
|
545
|
+
WHSPCHARS[prevchar] && (?(==nextchar) or return [[],false]
|
546
|
+
basesize=@parsestack.size
|
547
|
+
result=[get1token]
|
548
|
+
pass=loop{
|
549
|
+
tok=get1token
|
550
|
+
result<<tok
|
551
|
+
if @parsestack.size==basesize
|
552
|
+
break false
|
553
|
+
elsif ','==tok.to_s and @parsestack.size==basesize+1
|
554
|
+
break true
|
555
|
+
elsif EoiToken===tok
|
556
|
+
lexerror tok, "unexpected eof in parameter list"
|
557
|
+
end
|
558
|
+
}
|
559
|
+
return [result,pass]
|
560
|
+
end
|
561
|
+
|
484
562
|
#-----------------------------------
|
485
563
|
CONTEXT2ENDTOK={AssignmentRhsContext=>AssignmentRhsListEndToken,
|
486
564
|
ParamListContextNoParen=>ImplicitParamListEndToken,
|
487
|
-
|
565
|
+
WhenParamListContext=>KwParamListEndToken,
|
566
|
+
RescueSMContext=>KwParamListEndToken
|
488
567
|
}
|
489
568
|
def abort_noparens!(str='')
|
490
569
|
#assert @moretokens.empty?
|
491
570
|
result=[]
|
492
|
-
while klass=CONTEXT2ENDTOK[@
|
493
|
-
result << klass.new(
|
494
|
-
|
571
|
+
while klass=CONTEXT2ENDTOK[@parsestack.last.class]
|
572
|
+
result << klass.new(input_position-str.length)
|
573
|
+
break if RescueSMContext===@parsestack.last
|
574
|
+
@parsestack.pop
|
495
575
|
end
|
496
576
|
return result
|
497
577
|
end
|
@@ -501,13 +581,13 @@ if false #no longer used
|
|
501
581
|
def abort_1_noparen!(offs=0)
|
502
582
|
assert @moretokens.empty?
|
503
583
|
result=[]
|
504
|
-
while AssignmentRhsContext===@
|
505
|
-
@
|
506
|
-
result << AssignmentRhsListEndToken.new(
|
584
|
+
while AssignmentRhsContext===@parsestack.last
|
585
|
+
@parsestack.pop
|
586
|
+
result << AssignmentRhsListEndToken.new(input_position-offs)
|
507
587
|
end
|
508
|
-
ParamListContextNoParen===@
|
509
|
-
@
|
510
|
-
result << ImplicitParamListEndToken.new(
|
588
|
+
ParamListContextNoParen===@parsestack.last or lexerror huh,'{} with no matching callsite'
|
589
|
+
@parsestack.pop
|
590
|
+
result << ImplicitParamListEndToken.new(input_position-offs)
|
511
591
|
return result
|
512
592
|
end
|
513
593
|
end
|
@@ -523,30 +603,31 @@ end
|
|
523
603
|
case str
|
524
604
|
when "end"
|
525
605
|
result.unshift(*abort_noparens!(str))
|
526
|
-
@
|
606
|
+
@parsestack.last.see self,:semi #sorta hacky... should make an :end event instead?
|
527
607
|
|
528
608
|
=begin not needed?
|
529
|
-
if ExpectDoOrNlContext===@
|
530
|
-
@
|
531
|
-
assert @
|
609
|
+
if ExpectDoOrNlContext===@parsestack.last
|
610
|
+
@parsestack.pop
|
611
|
+
assert @parsestack.last.starter[/^(while|until|for)$/]
|
532
612
|
end
|
533
613
|
=end
|
534
614
|
|
535
|
-
WantsEndContext===@
|
536
|
-
ctx=@
|
615
|
+
WantsEndContext===@parsestack.last or lexerror result.last, 'unbalanced end'
|
616
|
+
ctx=@parsestack.pop
|
537
617
|
start,line=ctx.starter,ctx.linenum
|
538
618
|
BEGINWORDS===start or lexerror result.last, "end does not match #{start or "nil"}"
|
539
|
-
/^(
|
619
|
+
/^(do)$/===start and localvars.end_block
|
620
|
+
/^(class|module|def)$/===start and @localvars_stack.pop
|
540
621
|
|
541
622
|
when "class","module"
|
542
623
|
result.first.has_end!
|
543
|
-
@
|
544
|
-
@
|
545
|
-
|
624
|
+
@parsestack.push WantsEndContext.new(str,@linenum)
|
625
|
+
@localvars_stack.push SymbolTable.new
|
626
|
+
|
546
627
|
when "if","unless" #could be infix form without end
|
547
628
|
if after_nonid_op?{false} #prefix form
|
548
629
|
result.first.has_end!
|
549
|
-
@
|
630
|
+
@parsestack.push WantsEndContext.new(str,@linenum)
|
550
631
|
|
551
632
|
|
552
633
|
else #infix form
|
@@ -554,11 +635,11 @@ end
|
|
554
635
|
end
|
555
636
|
when "begin","case"
|
556
637
|
result.first.has_end!
|
557
|
-
@
|
638
|
+
@parsestack.push WantsEndContext.new(str,@linenum)
|
558
639
|
when "while","until" #could be infix form without end
|
559
640
|
if after_nonid_op?{false} #prefix form
|
560
641
|
result.first.has_end!
|
561
|
-
@
|
642
|
+
@parsestack.push WantsEndContext.new(str,@linenum)
|
562
643
|
expect_do_or_end_or_nl! str
|
563
644
|
|
564
645
|
else #infix form
|
@@ -566,24 +647,26 @@ end
|
|
566
647
|
end
|
567
648
|
when "for"
|
568
649
|
result.first.has_end!
|
569
|
-
|
650
|
+
result.push KwParamListStartToken.new(offset+str.length)
|
651
|
+
# corresponding EndToken emitted leaving ForContext ("in" branch, below)
|
652
|
+
@parsestack.push WantsEndContext.new(str,@linenum)
|
570
653
|
#expect_do_or_end_or_nl! str #handled by ForSMContext now
|
571
|
-
@
|
654
|
+
@parsestack.push ForSMContext.new(@linenum)
|
572
655
|
when "do"
|
573
656
|
result.unshift(*abort_noparens!(str))
|
574
|
-
if ExpectDoOrNlContext===@
|
575
|
-
@
|
576
|
-
assert WantsEndContext===@
|
657
|
+
if ExpectDoOrNlContext===@parsestack.last
|
658
|
+
@parsestack.pop
|
659
|
+
assert WantsEndContext===@parsestack.last
|
577
660
|
else
|
578
661
|
result.last.has_end!
|
579
|
-
@
|
580
|
-
|
662
|
+
@parsestack.push WantsEndContext.new(str,@linenum)
|
663
|
+
localvars.start_block
|
581
664
|
block_param_list_lookahead
|
582
665
|
end
|
583
666
|
when "def"
|
584
667
|
result.first.has_end!
|
585
|
-
@
|
586
|
-
@
|
668
|
+
@parsestack.push WantsEndContext.new("def",@linenum)
|
669
|
+
@localvars_stack.push SymbolTable.new
|
587
670
|
safe_recurse { |aa|
|
588
671
|
@last_operative_token=KeywordToken.new "def" #hack
|
589
672
|
result.concat ignored_tokens
|
@@ -591,7 +674,7 @@ end
|
|
591
674
|
#read an expr like a.b.c or a::b::c
|
592
675
|
#or (expr).b.c
|
593
676
|
if nextchar==?( #look for optional parenthesised head
|
594
|
-
old_size=@
|
677
|
+
old_size=@parsestack.size
|
595
678
|
parencount=0
|
596
679
|
begin
|
597
680
|
tok=get1token
|
@@ -601,22 +684,58 @@ end
|
|
601
684
|
end
|
602
685
|
EoiToken===tok and lexerror tok, "eof in def header"
|
603
686
|
result<<tok
|
604
|
-
end until parencount==0 #@
|
687
|
+
end until parencount==0 #@parsestack.size==old_size
|
605
688
|
else #no parentheses, all tail
|
606
689
|
@last_operative_token=KeywordToken.new "." #hack hack
|
607
|
-
result
|
608
|
-
|
690
|
+
tokindex=result.size
|
691
|
+
result << tok=symbol(false,false)
|
692
|
+
name=tok.to_s
|
693
|
+
assert !in_lvar_define_state
|
694
|
+
|
695
|
+
#maybe_local really means 'maybe local or constant'
|
696
|
+
maybe_local=case name
|
697
|
+
when /[^a-z_0-9]$/i; #do nothing
|
698
|
+
when /^[@$]/; true
|
699
|
+
when VARLIKE_KEYWORDS,FUNCLIKE_KEYWORDS; ty=KeywordToken
|
700
|
+
when /^[a-z_]/; localvars===name
|
701
|
+
when /^[A-Z]/; is_const=true #this is the right algorithm for constants...
|
702
|
+
end
|
703
|
+
if !ty and maybe_local
|
704
|
+
result.push( *ignored_tokens(false,false) )
|
705
|
+
nc=nextchar
|
706
|
+
if nc==?: || nc==?.
|
707
|
+
ty=VarNameToken
|
708
|
+
end
|
709
|
+
end
|
710
|
+
unless ty
|
711
|
+
ty=MethNameToken
|
712
|
+
endofs=tok.offset+tok.to_s.length
|
713
|
+
result[tokindex+1...tokindex+1]=
|
714
|
+
[ImplicitParamListStartToken.new(endofs),ImplicitParamListEndToken.new(endofs)]
|
715
|
+
end
|
716
|
+
|
717
|
+
assert result[tokindex].equal?(tok)
|
718
|
+
result[tokindex]=ty.new(tok.to_s,tok.offset)
|
719
|
+
|
720
|
+
|
721
|
+
#if a.b.c.d is seen, a, b, and c
|
609
722
|
#should be considered maybe varname instead of methnames.
|
610
723
|
#the last (d in the example) is always considered a methname;
|
611
724
|
#it's what's being defined.
|
725
|
+
#b and c should be considered varnames only if
|
726
|
+
#they are capitalized and preceded by :: .
|
727
|
+
#a could even be a keyword (eg self or block_given?).
|
612
728
|
end
|
613
729
|
#read tail: .b.c.d etc
|
614
|
-
@last_operative_token=
|
730
|
+
result.reverse_each{|res| break @last_operative_token=res unless StillIgnoreToken===res}
|
731
|
+
###@last_operative_token=result.last #naive
|
732
|
+
assert !(IgnoreToken===@last_operative_token)
|
615
733
|
state=:expect_op
|
734
|
+
@in_def_name=true
|
616
735
|
loop do
|
617
736
|
|
618
737
|
#look for start of parameter list
|
619
|
-
nc=(@moretokens.
|
738
|
+
nc=(@moretokens.empty? ? nextchar.chr : @moretokens.first.to_s[0,1])
|
620
739
|
if state==:expect_op and /^[a-z_(&*]/i===nc
|
621
740
|
result.concat def_param_list
|
622
741
|
break
|
@@ -627,8 +746,8 @@ end
|
|
627
746
|
case tok
|
628
747
|
when EoiToken
|
629
748
|
lexerror tok,'unexpected eof in def header'
|
630
|
-
when
|
631
|
-
when MethNameToken
|
749
|
+
when StillIgnoreToken
|
750
|
+
when MethNameToken ,VarNameToken # /^[a-z_]/i.token_pat
|
632
751
|
lexerror tok,'expected . or ::' unless state==:expect_name
|
633
752
|
state=:expect_op
|
634
753
|
when /^(\.|::)$/.token_pat
|
@@ -642,6 +761,7 @@ end
|
|
642
761
|
"#{tok}:#{tok.class}")
|
643
762
|
end
|
644
763
|
end
|
764
|
+
@in_def_name=false
|
645
765
|
}
|
646
766
|
when "alias"
|
647
767
|
safe_recurse { |a|
|
@@ -663,6 +783,7 @@ end
|
|
663
783
|
tok or lexerror(result.first,"bad symbol in undef")
|
664
784
|
result<< tok
|
665
785
|
@last_operative_token=tok
|
786
|
+
assert !(IgnoreToken===@last_operative_token)
|
666
787
|
|
667
788
|
sawnl=false
|
668
789
|
result.concat ignored_tokens(true){|nl| sawnl=true}
|
@@ -674,26 +795,47 @@ end
|
|
674
795
|
}
|
675
796
|
|
676
797
|
# when "defined?"
|
677
|
-
# huh
|
678
798
|
#defined? might have a baresymbol following it
|
679
799
|
#does it need to be handled specially?
|
800
|
+
#it would seem not.....
|
680
801
|
|
681
802
|
when "when"
|
803
|
+
#abort_noparens! emits EndToken on leaving context
|
682
804
|
result.unshift(*abort_noparens!(str))
|
683
|
-
|
805
|
+
result.push KwParamListStartToken.new( offset+str.length)
|
806
|
+
@parsestack.push WhenParamListContext.new(str,@linenum)
|
684
807
|
|
685
808
|
when "rescue"
|
686
|
-
|
687
|
-
|
809
|
+
unless after_nonid_op? {false}
|
810
|
+
#rescue needs to be treated differently when in operator context...
|
811
|
+
#i think no RescueSMContext should be pushed on the stack...
|
812
|
+
#plus, the rescue token should be marked as infix
|
813
|
+
result.first.set_infix!
|
814
|
+
else
|
815
|
+
result.push KwParamListStartToken.new(offset+str.length)
|
816
|
+
#corresponding EndToken emitted by abort_noparens! on leaving rescue context
|
817
|
+
result.unshift(*abort_noparens!(str))
|
818
|
+
@parsestack.push RescueSMContext.new(@linenum)
|
819
|
+
end
|
688
820
|
|
689
|
-
when "then"
|
821
|
+
when "then"
|
822
|
+
result.unshift(*abort_noparens!(str))
|
823
|
+
@parsestack.last.see self,:then
|
824
|
+
|
825
|
+
when "in"
|
826
|
+
result.unshift KwParamListEndToken.new( offset)
|
690
827
|
result.unshift(*abort_noparens!(str))
|
691
|
-
@
|
828
|
+
@parsestack.last.see self,:in
|
692
829
|
|
693
|
-
when
|
830
|
+
when /\A(#{BINOPWORDS}|#{INNERBOUNDINGWORDS})\Z/o
|
694
831
|
result.unshift(*abort_noparens!(str))
|
695
832
|
|
696
|
-
when
|
833
|
+
when /\A(return|break|next)\Z/
|
834
|
+
result=yield
|
835
|
+
result.first.has_no_block! unless KeywordToken===@last_operative_token and @last_operative_token===/\A(\.|::)\Z/
|
836
|
+
|
837
|
+
when FUNCLIKE_KEYWORDS
|
838
|
+
result=yield
|
697
839
|
|
698
840
|
when RUBYKEYWORDS
|
699
841
|
#do nothing
|
@@ -706,6 +848,36 @@ end
|
|
706
848
|
end
|
707
849
|
|
708
850
|
|
851
|
+
#-----------------------------------
|
852
|
+
def parsestack_lastnonassign_is?(obj)
|
853
|
+
@parsestack.reverse_each{|ctx|
|
854
|
+
case ctx
|
855
|
+
# when klass: return true
|
856
|
+
when AssignmentRhsContext
|
857
|
+
else return ctx.object_id==obj.object_id
|
858
|
+
end
|
859
|
+
}
|
860
|
+
end
|
861
|
+
|
862
|
+
#-----------------------------------
|
863
|
+
#what's inside goalposts (the block formal parameter list)
|
864
|
+
#is considered the left hand side of an assignment.
|
865
|
+
#inside goalposts, a local variable is declared if
|
866
|
+
#it has one of the following tokens on both sides:
|
867
|
+
# , (if directly inside goalposts or nested lhs)
|
868
|
+
# | (as a goalpost)
|
869
|
+
# * or & (unary only)
|
870
|
+
# ( or ) (if they form a nested left hand side)
|
871
|
+
#parens form a nested lhs if they're not part of an actual
|
872
|
+
#parameter list and have a comma directly in them somewhere
|
873
|
+
#a nested lhs _must_ have a comma in it somewhere. this is
|
874
|
+
#not legal:
|
875
|
+
# (foo)=[1]
|
876
|
+
#whereas this is:
|
877
|
+
# (foo,)=[1]
|
878
|
+
|
879
|
+
|
880
|
+
|
709
881
|
#-----------------------------------
|
710
882
|
def block_param_list_lookahead
|
711
883
|
safe_recurse{ |la|
|
@@ -713,27 +885,45 @@ end
|
|
713
885
|
a=ignored_tokens
|
714
886
|
|
715
887
|
if eat_next_if(?|)
|
716
|
-
a<<KeywordToken.new("|"
|
888
|
+
a<<KeywordToken.new("|", input_position-1)
|
889
|
+
if true
|
890
|
+
@parsestack.push mycontext=BlockParamListLhsContext.new(@linenum)
|
891
|
+
nextchar==?| and a.push NoWsToken.new(input_position)
|
892
|
+
else
|
717
893
|
if eat_next_if(?|)
|
718
|
-
a.concat [NoWsToken.new(
|
719
|
-
KeywordToken.new('|'
|
894
|
+
a.concat [NoWsToken.new(input_position-1),
|
895
|
+
KeywordToken.new('|', input_position-1)]
|
720
896
|
else
|
721
897
|
assert !@defining_lvar
|
722
898
|
@defining_lvar=true
|
723
899
|
assert((@last_operative_token===';' or NewlineToken===@last_operative_token))
|
724
|
-
@
|
725
|
-
#block param initializers
|
726
|
-
|
900
|
+
@parsestack.push mycontext=BlockParamListLhsContext.new(@linenum)
|
901
|
+
#block param initializers ARE supported here, even tho ruby doesn't allow them!
|
902
|
+
tok=nil
|
903
|
+
loop do
|
727
904
|
tok=get1token
|
728
|
-
|
905
|
+
case tok
|
906
|
+
when EoiToken; lexerror tok,"eof in block parameter list"
|
907
|
+
when AssignmentRhsListStartToken; @defining_lvar=false
|
908
|
+
when AssignmentRhsListEndToken; parsestack_lastnonassign_is?(mycontext) and @defining_lvar=true
|
909
|
+
end
|
910
|
+
|
911
|
+
tok==='|' and parsestack_lastnonassign_is?(mycontext) and break
|
729
912
|
a<<tok
|
730
|
-
end
|
731
|
-
assert@defining_lvar
|
913
|
+
end
|
914
|
+
assert@defining_lvar || AssignmentRhsContext===@parsestack.last
|
732
915
|
@defining_lvar=false
|
733
|
-
|
734
|
-
|
916
|
+
while AssignmentRhsContext===@parsestack.last
|
917
|
+
a.push( *abort_noparens!('|') )
|
918
|
+
end
|
919
|
+
|
920
|
+
@parsestack.last.object_id==mycontext.object_id or raise 'expected my BlockParamListLhsContext atop @parsestack'
|
921
|
+
@parsestack.pop
|
922
|
+
|
923
|
+
a<<KeywordToken.new('|',tok.offset)
|
735
924
|
@moretokens.empty? or
|
736
925
|
fixme %#moretokens might be set from get1token call above...might be bad#
|
926
|
+
end
|
737
927
|
end
|
738
928
|
end
|
739
929
|
|
@@ -755,8 +945,9 @@ end
|
|
755
945
|
#then match the following tokens until
|
756
946
|
#the matching endbrace is found
|
757
947
|
def def_param_list
|
948
|
+
@in_def_name=false
|
758
949
|
result=[]
|
759
|
-
normal_comma_level=
|
950
|
+
normal_comma_level=old_parsestack_size=@parsestack.size
|
760
951
|
safe_recurse { |a|
|
761
952
|
assert(@moretokens.empty?)
|
762
953
|
assert((not IgnoreToken===@moretokens[0]))
|
@@ -770,9 +961,9 @@ end
|
|
770
961
|
assert(tok==='(')
|
771
962
|
|
772
963
|
|
773
|
-
#
|
964
|
+
#parsestack was changed by get1token above...
|
774
965
|
normal_comma_level+=1
|
775
|
-
assert(normal_comma_level==@
|
966
|
+
assert(normal_comma_level==@parsestack.size)
|
776
967
|
endingblock=proc{|tok| tok===')' }
|
777
968
|
else
|
778
969
|
endingblock=proc{|tok| tok===';' or NewlineToken===tok}
|
@@ -785,36 +976,48 @@ end
|
|
785
976
|
#read local parameter names
|
786
977
|
loop do
|
787
978
|
expect_name=(@last_operative_token===',' and
|
788
|
-
normal_comma_level==@
|
979
|
+
normal_comma_level==@parsestack.size)
|
789
980
|
expect_name and @defining_lvar||=true
|
790
981
|
result << tok=get1token
|
791
982
|
lexerror tok, "unexpected eof in def header" if EoiToken===tok
|
792
983
|
|
793
984
|
#break if at end of param list
|
794
985
|
endingblock===tok and
|
795
|
-
|
986
|
+
old_parsestack_size>=@parsestack.size and break
|
796
987
|
|
797
988
|
#next token is a local var name
|
798
989
|
#(or the one after that if unary ops present)
|
799
990
|
#result.concat ignored_tokens
|
800
|
-
expect_name
|
801
|
-
|
802
|
-
|
991
|
+
if expect_name
|
992
|
+
case tok
|
993
|
+
when IgnoreToken #, /^[A-Z]/ #do nothing
|
994
|
+
when /^,$/.token_pat #hack
|
995
|
+
|
996
|
+
|
997
|
+
when VarNameToken
|
803
998
|
assert@defining_lvar
|
804
999
|
@defining_lvar=false
|
805
1000
|
assert((not @last_operative_token===','))
|
806
|
-
|
1001
|
+
when /^[&*]$/.token_pat #unary form...
|
807
1002
|
#a NoWsToken is also expected... read it now
|
808
1003
|
result.concat maybe_no_ws_token #not needed?
|
809
1004
|
@last_operative_token=KeywordToken.new ','
|
810
|
-
|
1005
|
+
else
|
1006
|
+
lexerror tok,"unfamiliar var name '#{tok}'"
|
1007
|
+
end
|
1008
|
+
elsif /^,$/.token_pat===tok and
|
1009
|
+
normal_comma_level+1==@parsestack.size and
|
1010
|
+
AssignmentRhsContext===@parsestack.last
|
1011
|
+
#seeing comma here should end implicit rhs started within the param list
|
1012
|
+
result[-1,0]=AssignmentRhsListEndToken.new(tok.offset)
|
1013
|
+
@parsestack.pop
|
811
1014
|
end
|
812
1015
|
end
|
813
1016
|
|
814
1017
|
@defining_lvar=false
|
815
1018
|
|
816
1019
|
|
817
|
-
assert(@
|
1020
|
+
assert(@parsestack.size <= old_parsestack_size)
|
818
1021
|
assert(endingblock[tok])
|
819
1022
|
|
820
1023
|
#hack: force next token to look like start of a
|
@@ -846,19 +1049,19 @@ end
|
|
846
1049
|
end
|
847
1050
|
|
848
1051
|
#-----------------------------------
|
849
|
-
#handle * in ruby code. is unary or binary operator?
|
1052
|
+
#handle * & in ruby code. is unary or binary operator?
|
850
1053
|
def star_or_amp(ch)
|
851
1054
|
assert('*&'[ch])
|
852
|
-
|
1055
|
+
want_unary=unary_op_expected? ch
|
1056
|
+
result=(quadriop ch)
|
1057
|
+
if want_unary
|
853
1058
|
#readahead(2)[1..1][/[\s\v#\\]/] or #not needed?
|
854
|
-
result
|
1059
|
+
assert OperatorToken===result
|
1060
|
+
result.unary=true #result should distinguish unary+binary *&
|
855
1061
|
WHSPLF[nextchar.chr] or
|
856
|
-
@moretokens << NoWsToken.new(
|
857
|
-
return result
|
858
|
-
else
|
859
|
-
return(quadriop ch)
|
1062
|
+
@moretokens << NoWsToken.new(input_position)
|
860
1063
|
end
|
861
|
-
|
1064
|
+
result
|
862
1065
|
end
|
863
1066
|
|
864
1067
|
#-----------------------------------
|
@@ -868,7 +1071,7 @@ end
|
|
868
1071
|
getchar
|
869
1072
|
NumberToken.new getchar_maybe_escape
|
870
1073
|
else
|
871
|
-
@
|
1074
|
+
@parsestack.push TernaryContext.new(@linenum)
|
872
1075
|
KeywordToken.new getchar #operator
|
873
1076
|
end
|
874
1077
|
end
|
@@ -888,18 +1091,19 @@ end
|
|
888
1091
|
end
|
889
1092
|
|
890
1093
|
#-----------------------------------
|
891
|
-
#return true if tok corresponds to a variable or constant,
|
892
|
-
#
|
1094
|
+
#return true if last tok corresponds to a variable or constant,
|
1095
|
+
#false if its for a method, nil for something else
|
1096
|
+
#we assume it is a valid token with a correctly formed name.
|
893
1097
|
#...should really be called was_var_name
|
894
1098
|
def is_var_name?
|
895
1099
|
(tok=@last_operative_token)
|
896
1100
|
|
897
1101
|
s=tok.to_s
|
898
1102
|
case s
|
899
|
-
when /[^a-z_0-9]$/i
|
900
|
-
when /^[a-z_]
|
901
|
-
when /^[A-Z]
|
902
|
-
when /^[@$<]
|
1103
|
+
when /[^a-z_0-9]$/i; false
|
1104
|
+
when /^[a-z_]/; localvars===s or VARLIKE_KEYWORDS===s
|
1105
|
+
when /^[A-Z]/; VarNameToken===tok
|
1106
|
+
when /^[@$<]/; true
|
903
1107
|
else raise "not var or method name: #{s}"
|
904
1108
|
end
|
905
1109
|
end
|
@@ -907,21 +1111,26 @@ end
|
|
907
1111
|
#-----------------------------------
|
908
1112
|
def colon_quote_expected?(ch) #yukko hack
|
909
1113
|
assert ':?'[ch]
|
910
|
-
readahead(2)[/^(\?[^#{WHSPLF}]|:[
|
1114
|
+
readahead(2)[/^(\?[^#{WHSPLF}]|:[^\s\r\n\t\f\v :])$/o] or return false
|
911
1115
|
|
912
1116
|
after_nonid_op? {
|
913
1117
|
#possible func-call as operator
|
914
1118
|
|
915
|
-
|
1119
|
+
not is_var_name? and
|
1120
|
+
if ch==':'
|
1121
|
+
not TernaryContext===@parsestack.last
|
1122
|
+
else
|
1123
|
+
!readahead(3)[/^\?[a-z0-9_]{2}/i]
|
1124
|
+
end
|
916
1125
|
}
|
917
1126
|
end
|
918
1127
|
|
919
1128
|
#-----------------------------------
|
920
1129
|
def symbol_or_op(ch)
|
921
|
-
startpos
|
1130
|
+
startpos= input_position
|
922
1131
|
qe= colon_quote_expected?(ch)
|
923
1132
|
lastchar=prevchar
|
924
|
-
eat_next_if(ch) or raise "needed: "+ch
|
1133
|
+
eat_next_if(ch[0]) or raise "needed: "+ch
|
925
1134
|
|
926
1135
|
#handle quoted symbols like :"foobar", :"[]"
|
927
1136
|
qe and return symbol(':')
|
@@ -932,13 +1141,13 @@ end
|
|
932
1141
|
@moretokens.push(*abort_noparens!(':'))
|
933
1142
|
|
934
1143
|
#end ternary context, if any
|
935
|
-
@
|
1144
|
+
@parsestack.last.see self,:colon
|
936
1145
|
|
937
|
-
TernaryContext===@
|
1146
|
+
TernaryContext===@parsestack.last and @parsestack.pop #should be in the context's see handler
|
938
1147
|
|
939
|
-
if ExpectDoOrNlContext===@
|
940
|
-
@
|
941
|
-
assert @
|
1148
|
+
if ExpectDoOrNlContext===@parsestack.last #should be in the context's see handler
|
1149
|
+
@parsestack.pop
|
1150
|
+
assert @parsestack.last.starter[/^(while|until|for)$/]
|
942
1151
|
end
|
943
1152
|
|
944
1153
|
@moretokens.push KeywordToken.new(':',startpos)
|
@@ -965,17 +1174,17 @@ end
|
|
965
1174
|
#-----------------------------------
|
966
1175
|
def symbol(notbare,couldbecallsite=!notbare)
|
967
1176
|
assert !couldbecallsite
|
968
|
-
start
|
1177
|
+
start= input_position
|
969
1178
|
notbare and start-=1
|
970
1179
|
klass=(notbare ? SymbolToken : MethNameToken)
|
971
1180
|
|
972
1181
|
#look for operators
|
973
1182
|
opmatches=readahead(3)[RUBYSYMOPERATORREX]
|
974
|
-
result= opmatches ?
|
1183
|
+
result= opmatches ? read(opmatches.size) :
|
975
1184
|
case nc=nextchar
|
976
1185
|
when ?" then assert notbare;double_quote('"')
|
977
1186
|
when ?' then assert notbare;double_quote("'")
|
978
|
-
when ?` then
|
1187
|
+
when ?` then read(1)
|
979
1188
|
when ?@ then at_identifier.to_s
|
980
1189
|
when ?$ then dollar_identifier.to_s
|
981
1190
|
when ?_,?a..?z then identifier_as_string(?:)
|
@@ -991,19 +1200,24 @@ end
|
|
991
1200
|
return lexerror(klass.new(result,start),error)
|
992
1201
|
end
|
993
1202
|
|
1203
|
+
def merge_assignment_op_in_setter_callsites?
|
1204
|
+
false
|
1205
|
+
end
|
994
1206
|
#-----------------------------------
|
995
1207
|
def callsite_symbol(tok_to_errify)
|
996
|
-
start
|
1208
|
+
start= input_position
|
997
1209
|
|
998
1210
|
#look for operators
|
999
1211
|
opmatches=readahead(3)[RUBYSYMOPERATORREX]
|
1000
|
-
return [opmatches ?
|
1212
|
+
return [opmatches ? read(opmatches.size) :
|
1001
1213
|
case nc=nextchar
|
1002
|
-
when ?` then
|
1003
|
-
when ?_,?a..?z,?A..?Z then
|
1214
|
+
when ?` then read(1)
|
1215
|
+
when ?_,?a..?z,?A..?Z then
|
1216
|
+
context=merge_assignment_op_in_setter_callsites? ? ?: : nc
|
1217
|
+
identifier_as_string(context)
|
1004
1218
|
else
|
1005
1219
|
@last_operative_token=KeywordToken.new(';')
|
1006
|
-
lexerror(tok_to_errify,"unexpected char starting symbol: #{nc.chr}")
|
1220
|
+
lexerror(tok_to_errify,"unexpected char starting callsite symbol: #{nc.chr}, tok=#{tok_to_errify.inspect}")
|
1007
1221
|
nil
|
1008
1222
|
end, start
|
1009
1223
|
]
|
@@ -1011,10 +1225,10 @@ end
|
|
1011
1225
|
|
1012
1226
|
#-----------------------------------
|
1013
1227
|
def here_header
|
1014
|
-
|
1228
|
+
read(2)=='<<' or raise "parser insanity"
|
1015
1229
|
|
1016
1230
|
dash=eat_next_if(?-)
|
1017
|
-
quote=eat_next_if(
|
1231
|
+
quote=eat_next_if( /['"`]/)
|
1018
1232
|
if quote
|
1019
1233
|
ender=til_charset(/[#{quote}]/)
|
1020
1234
|
(quote==getchar) or
|
@@ -1042,8 +1256,8 @@ end
|
|
1042
1256
|
#handle case of here header in a string inclusion, but
|
1043
1257
|
#here body outside it.
|
1044
1258
|
cnt=0
|
1045
|
-
1.upto @
|
1046
|
-
case @
|
1259
|
+
1.upto @parsestack.size do |i|
|
1260
|
+
case @parsestack[-i]
|
1047
1261
|
when AssignmentRhsContext,ParamListContextNoParen,TopLevelContext
|
1048
1262
|
else cnt+=1
|
1049
1263
|
end
|
@@ -1054,11 +1268,11 @@ end
|
|
1054
1268
|
end
|
1055
1269
|
|
1056
1270
|
tok=get1token
|
1057
|
-
assert(a.
|
1271
|
+
assert(a.equal?( @moretokens))
|
1058
1272
|
toks<<tok
|
1059
1273
|
EoiToken===tok and lexerror tok, "here body expected before eof"
|
1060
1274
|
end while res.unsafe_to_use
|
1061
|
-
assert(a.
|
1275
|
+
assert(a.equal?( @moretokens))
|
1062
1276
|
a[0,0]= toks #same as a=toks+a, but keeps a's id
|
1063
1277
|
}
|
1064
1278
|
|
@@ -1076,9 +1290,9 @@ end
|
|
1076
1290
|
if quote_expected?(ch) #and @last_operative_token!='class' #not needed?
|
1077
1291
|
here_header
|
1078
1292
|
else
|
1079
|
-
operator_or_methname_token
|
1293
|
+
operator_or_methname_token read(2)
|
1080
1294
|
end
|
1081
|
-
when "<=>" then operator_or_methname_token
|
1295
|
+
when "<=>" then operator_or_methname_token read(3)
|
1082
1296
|
else quadriop(ch)
|
1083
1297
|
end
|
1084
1298
|
end
|
@@ -1087,115 +1301,152 @@ end
|
|
1087
1301
|
def escnewline(ch)
|
1088
1302
|
assert ch == '\\'
|
1089
1303
|
|
1090
|
-
pos
|
1304
|
+
pos= input_position
|
1091
1305
|
result=getchar
|
1092
1306
|
if nl=readnl
|
1093
1307
|
result+=nl
|
1094
1308
|
else
|
1095
1309
|
error='illegal escape sequence'
|
1096
1310
|
end
|
1097
|
-
|
1098
|
-
|
1311
|
+
|
1312
|
+
@moretokens.unshift FileAndLineToken.new(@filename,ln=@linenum,input_position)
|
1313
|
+
optional_here_bodies
|
1099
1314
|
|
1315
|
+
lexerror EscNlToken.new(@filename,ln-1,result,pos), error
|
1316
|
+
end
|
1317
|
+
|
1100
1318
|
#-----------------------------------
|
1101
|
-
def
|
1102
|
-
assert("\r\n"[nextchar.chr])
|
1319
|
+
def optional_here_bodies
|
1103
1320
|
|
1104
1321
|
#handle here bodies queued up by previous line
|
1105
1322
|
#(we should be more compatible with dos/mac style newlines...)
|
1106
|
-
|
1107
|
-
tofill.string.offset
|
1323
|
+
while tofill=@incomplete_here_tokens.shift
|
1324
|
+
tofill.string.offset= input_position
|
1108
1325
|
loop {
|
1109
|
-
assert("\r\n"[
|
1110
|
-
|
1111
|
-
#retr evrything til next nl
|
1112
|
-
line=all_quote(/^[\r\n]$/, tofill.quote, /^[\r\n]$/, :regex_esc_seq)
|
1113
|
-
#(you didn't know all_quote could take a regex, did you?)
|
1326
|
+
assert("\r\n"[prevchar])
|
1114
1327
|
|
1115
|
-
#
|
1116
|
-
|
1117
|
-
|
1118
|
-
|
1119
|
-
|
1120
|
-
|
1121
|
-
|
1122
|
-
|
1123
|
-
|
1124
|
-
|
1125
|
-
|
1126
|
-
|
1127
|
-
|
1128
|
-
|
1129
|
-
|
1328
|
+
#here body terminator?
|
1329
|
+
oldpos= input_position
|
1330
|
+
if tofill.dash
|
1331
|
+
til_charset(/[^#{WHSP}]/o)
|
1332
|
+
end
|
1333
|
+
break if eof?
|
1334
|
+
break if read(tofill.ender.size)==tofill.ender and readnl
|
1335
|
+
input_position_set oldpos
|
1336
|
+
|
1337
|
+
if tofill.quote=="'"
|
1338
|
+
line=til_charset(/[\r\n]/)+readnl
|
1339
|
+
line.gsub! "\\\\", "\\"
|
1340
|
+
tofill.append line
|
1341
|
+
assert(line[-1..-1][/[\r\n]/])
|
1342
|
+
else
|
1343
|
+
|
1344
|
+
back1char #-1 to make newline char the next to read
|
1345
|
+
@linenum-=1
|
1346
|
+
|
1347
|
+
#retr evrything til next nl
|
1348
|
+
line=all_quote(INET_NL_REX, tofill.quote, INET_NL_REX)
|
1349
|
+
#(you didn't know all_quote could take a regex, did you?)
|
1350
|
+
|
1351
|
+
#get rid of fals that otherwise appear to be in the middle of
|
1352
|
+
#a string (and are emitted out of order)
|
1353
|
+
fal=@moretokens.pop
|
1354
|
+
assert FileAndLineToken===fal || fal.nil?
|
1355
|
+
|
1356
|
+
back1char
|
1357
|
+
@linenum-=1
|
1358
|
+
assert("\r\n"[nextchar.chr])
|
1359
|
+
tofill.append_token line
|
1360
|
+
tofill.append readnl
|
1361
|
+
end
|
1130
1362
|
}
|
1131
1363
|
|
1132
|
-
assert("\r\n"[
|
1364
|
+
assert(eof? || "\r\n"[prevchar])
|
1133
1365
|
tofill.unsafe_to_use=false
|
1366
|
+
tofill.line=@linenum-1
|
1134
1367
|
|
1135
|
-
|
1368
|
+
@moretokens.push \
|
1369
|
+
tofill.bodyclass.new(tofill),
|
1370
|
+
FileAndLineToken.new(@filename,@linenum,input_position)
|
1136
1371
|
end
|
1372
|
+
|
1373
|
+
end
|
1374
|
+
|
1375
|
+
#-----------------------------------
|
1376
|
+
def newline(ch)
|
1377
|
+
assert("\r\n"[nextchar.chr])
|
1378
|
+
|
1379
|
+
|
1137
1380
|
|
1138
1381
|
#ordinary newline handling (possibly implicitly escaped)
|
1139
1382
|
assert("\r\n"[nextchar.chr])
|
1383
|
+
assert !@parsestack.empty?
|
1140
1384
|
assert @moretokens.empty?
|
1141
1385
|
result=if NewlineToken===@last_operative_token or #hack
|
1142
1386
|
@last_operative_token===/^(;|begin|do|#{INNERBOUNDINGWORDS})$/ or #hack
|
1143
1387
|
!after_nonid_op?{false}
|
1144
1388
|
then #hack-o-rama: probly cases left out above
|
1145
1389
|
a= abort_noparens!
|
1146
|
-
ExpectDoOrNlContext===@
|
1147
|
-
|
1390
|
+
ExpectDoOrNlContext===@parsestack.last and @parsestack.pop
|
1391
|
+
assert !@parsestack.empty?
|
1392
|
+
@parsestack.last.see self,:semi
|
1148
1393
|
|
1149
1394
|
a << super(ch)
|
1150
1395
|
@moretokens.replace a+@moretokens
|
1151
1396
|
@moretokens.shift
|
1152
1397
|
else
|
1153
|
-
offset
|
1154
|
-
|
1155
|
-
|
1398
|
+
offset= input_position
|
1399
|
+
nl=readnl
|
1400
|
+
@moretokens << FileAndLineToken.new(@filename,@linenum,input_position)
|
1401
|
+
EscNlToken.new(@filename,@linenum-1,nl,offset)
|
1156
1402
|
#WsToken.new ' ' #why? #should be "\\\n" ?
|
1157
1403
|
end
|
1158
1404
|
|
1405
|
+
optional_here_bodies
|
1406
|
+
|
1159
1407
|
start_of_line_directives
|
1160
1408
|
|
1161
1409
|
return result
|
1162
1410
|
end
|
1163
1411
|
|
1164
1412
|
#-----------------------------------
|
1165
|
-
EQBEGIN=%r/^=begin[
|
1413
|
+
EQBEGIN=%r/^=begin[ \t\v\r\n\f]$/
|
1166
1414
|
EQBEGINLENGTH=7
|
1167
1415
|
EQEND='=end'
|
1168
|
-
|
1416
|
+
EQENDLENGTH=4
|
1417
|
+
ENDMARKER=/^__END__[\r\n]?\Z/
|
1169
1418
|
ENDMARKERLENGTH=8
|
1170
1419
|
def start_of_line_directives
|
1171
1420
|
#handle =begin...=end (at start of a line)
|
1172
1421
|
while EQBEGIN===readahead(EQBEGINLENGTH)
|
1173
|
-
startpos
|
1174
|
-
more
|
1175
|
-
|
1176
|
-
|
1177
|
-
|
1178
|
-
|
1179
|
-
|
1180
|
-
|
1181
|
-
|
1422
|
+
startpos= input_position
|
1423
|
+
more= read(EQBEGINLENGTH-1) #get =begin
|
1424
|
+
|
1425
|
+
begin
|
1426
|
+
eof? and raise "eof before =end"
|
1427
|
+
more<<til_charset(/[\r\n]/)
|
1428
|
+
more<<readnl
|
1429
|
+
end until readahead(EQENDLENGTH)==EQEND
|
1430
|
+
|
1182
1431
|
#read rest of line after =end
|
1183
|
-
more <<
|
1432
|
+
more << til_charset(/[\r\n]/)
|
1184
1433
|
assert((?\r===nextchar or ?\n===nextchar))
|
1185
1434
|
assert !(/[\r\n]/===more[-1,1])
|
1435
|
+
more<< readnl
|
1186
1436
|
|
1187
|
-
newls= more.scan(/\r\n?|\n\r?/)
|
1188
|
-
@linenum+= newls.size
|
1437
|
+
# newls= more.scan(/\r\n?|\n\r?/)
|
1438
|
+
# @linenum+= newls.size
|
1189
1439
|
|
1190
1440
|
#inject the fresh comment into future token results
|
1191
|
-
@moretokens.push IgnoreToken.new(more,startpos)
|
1441
|
+
@moretokens.push IgnoreToken.new(more,startpos),
|
1442
|
+
FileAndLineToken.new(@filename,@linenum,input_position)
|
1192
1443
|
end
|
1193
1444
|
|
1194
1445
|
#handle __END__
|
1195
1446
|
if ENDMARKER===readahead(ENDMARKERLENGTH)
|
1196
|
-
assert !(ImplicitContext===@
|
1197
|
-
@moretokens.unshift endoffile_detected(
|
1198
|
-
@file.
|
1447
|
+
assert !(ImplicitContext===@parsestack.last)
|
1448
|
+
@moretokens.unshift endoffile_detected(read(7))
|
1449
|
+
# input_position_set @file.size
|
1199
1450
|
end
|
1200
1451
|
end
|
1201
1452
|
|
@@ -1221,11 +1472,15 @@ end
|
|
1221
1472
|
#used to resolve the ambiguity of
|
1222
1473
|
# <<, %, ? in ruby
|
1223
1474
|
#returns whether current token is to be the start of a literal
|
1224
|
-
#/ is not handled right here if whitespace immediately follows the /
|
1225
1475
|
def quote_expected?(ch) #yukko hack
|
1476
|
+
if AssignmentContext===@parsestack.last
|
1477
|
+
@parsestack.pop
|
1478
|
+
return false
|
1479
|
+
end
|
1480
|
+
|
1226
1481
|
case ch[0]
|
1227
1482
|
when ?? then readahead(2)[/^\?[#{WHSPLF}]$/o] #not needed?
|
1228
|
-
when ?% then readahead(3)[/^%([a-
|
1483
|
+
when ?% then readahead(3)[/^%([a-pt-vyzA-PR-VX-Z]|[QqrswWx][a-zA-Z0-9])/]
|
1229
1484
|
when ?< then !readahead(4)[/^<<-?['"`a-z0-9_]/i]
|
1230
1485
|
else raise 'unexpected ch (#{ch}) in quote_expected?'
|
1231
1486
|
# when ?+,?-,?&,?*,?~,?! then '*&='[readahead(2)[1..1]]
|
@@ -1240,22 +1495,29 @@ end
|
|
1240
1495
|
end
|
1241
1496
|
|
1242
1497
|
#-----------------------------------
|
1498
|
+
#returns false if last token was an value, true if it was an operator.
|
1499
|
+
#returns what block yields if last token was a method name.
|
1243
1500
|
#used to resolve the ambiguity of
|
1244
|
-
# <<, %, /, ?, :, and newline in ruby
|
1501
|
+
# <<, %, /, ?, :, and newline (among others) in ruby
|
1245
1502
|
def after_nonid_op?
|
1246
1503
|
case @last_operative_token
|
1247
|
-
when MethNameToken,
|
1504
|
+
when MethNameToken, FUNCLIKE_KEYWORDS.token_pat ,VarNameToken
|
1505
|
+
#VarNameToken should really be left out of this case...
|
1506
|
+
#should be in next branch instread
|
1507
|
+
#callers all check for last token being not a variable if they pass anything
|
1508
|
+
#but {false} in the block
|
1248
1509
|
return yield
|
1249
1510
|
when StringToken, SymbolToken, NumberToken, HerePlaceholderToken,
|
1250
|
-
%r{^(
|
1251
|
-
|
1511
|
+
%r{^(
|
1512
|
+
class|module|end|self|true|false|nil|
|
1513
|
+
__FILE__|__LINE__|[\})\]]|alias|(un)?def|for
|
1252
1514
|
)$}x.token_pat
|
1253
|
-
#
|
1254
|
-
#maybe class/module shouldn't either?
|
1515
|
+
#dunno about def/undef
|
1516
|
+
#maybe class/module shouldn't he here either?
|
1255
1517
|
#for is also in NewlineToken branch, below.
|
1256
1518
|
#what about rescue?
|
1257
1519
|
return false
|
1258
|
-
when /^(#{RUBYOPERATORREX}|#{INNERBOUNDINGWORDS})$/o.token_pat
|
1520
|
+
when /^(#{RUBYOPERATORREX}|#{INNERBOUNDINGWORDS}|do)$/o.token_pat
|
1259
1521
|
#regexs above must match whole string
|
1260
1522
|
#assert(@last_operative_token==$&) #disabled 'cause $& is now always nil :(
|
1261
1523
|
return true
|
@@ -1273,19 +1535,46 @@ end
|
|
1273
1535
|
end
|
1274
1536
|
end
|
1275
1537
|
|
1538
|
+
|
1539
|
+
|
1540
|
+
|
1541
|
+
#-----------------------------------
|
1542
|
+
#returns the last context on @parsestack which isn't an ImplicitContext
|
1543
|
+
def last_context_not_implicit
|
1544
|
+
@parsestack.reverse_each{|ctx|
|
1545
|
+
return ctx unless ImplicitContext===ctx
|
1546
|
+
}
|
1547
|
+
fail
|
1548
|
+
end
|
1549
|
+
|
1550
|
+
#-----------------------------------
|
1551
|
+
#a | has been seen. is it an operator? or a goalpost?
|
1552
|
+
#(goalpost == delimiter of block param list)
|
1553
|
+
#if it is a goalpost, end the BlockParamListLhsContext on
|
1554
|
+
#the context stack, as well as any implicit contexts on top of it.
|
1555
|
+
def conjunction_or_goalpost(ch)
|
1556
|
+
result=quadriop(ch)
|
1557
|
+
if result===/^|$/ and BlockParamListLhsContext===last_context_not_implicit
|
1558
|
+
@moretokens.push( *abort_noparens!("|"))
|
1559
|
+
assert(BlockParamListLhsContext===@parsestack.last)
|
1560
|
+
@parsestack.pop
|
1561
|
+
@moretokens.push KeywordToken.new("|", input_position-1)
|
1562
|
+
result=@moretokens.shift
|
1563
|
+
end
|
1564
|
+
result
|
1565
|
+
end
|
1566
|
+
|
1276
1567
|
#-----------------------------------
|
1277
1568
|
def quadriop(ch) #match /&&?=?/ (&, &&, &=, or &&=)
|
1278
1569
|
assert(%w[& * | < >].include?(ch))
|
1279
|
-
# '&*'[ch] and qe=quote_expected?(ch) #not needed?
|
1280
1570
|
result=getchar + (eat_next_if(ch)or'')
|
1281
1571
|
if eat_next_if(?=)
|
1282
1572
|
result << ?=
|
1283
|
-
# elsif qe and result[/^[&*]$/] #not needed?
|
1284
|
-
# @moretokens<<NoWsToken.new(@file.pos) #not needed?
|
1285
1573
|
end
|
1286
1574
|
return operator_or_methname_token(result)
|
1287
1575
|
end
|
1288
1576
|
|
1577
|
+
|
1289
1578
|
#-----------------------------------
|
1290
1579
|
def biop(ch) #match /%=?/ (% or %=)
|
1291
1580
|
assert(ch[/^[%^~]$/])
|
@@ -1295,18 +1584,18 @@ end
|
|
1295
1584
|
end
|
1296
1585
|
return operator_or_methname_token( result)
|
1297
1586
|
end
|
1298
|
-
|
1299
1587
|
#-----------------------------------
|
1300
|
-
def tilde(ch) #match
|
1588
|
+
def tilde(ch) #match ~
|
1301
1589
|
assert(ch=='~')
|
1302
1590
|
result=getchar
|
1303
|
-
# eat_next_if(?=) ?
|
1591
|
+
# eat_next_if(?=) ? #ack, spppft, I'm always getting this backwards
|
1304
1592
|
# result <<?= :
|
1305
1593
|
WHSPLF[nextchar.chr] ||
|
1306
|
-
@moretokens << NoWsToken.new(
|
1594
|
+
@moretokens << NoWsToken.new(input_position)
|
1307
1595
|
#why is the NoWsToken necessary at this point?
|
1308
|
-
|
1309
|
-
#result should distinguish unary ~
|
1596
|
+
result=operator_or_methname_token result
|
1597
|
+
result.unary=true #result should distinguish unary ~
|
1598
|
+
result
|
1310
1599
|
end
|
1311
1600
|
|
1312
1601
|
#-----------------------------------
|
@@ -1327,8 +1616,9 @@ end
|
|
1327
1616
|
else #unary operator
|
1328
1617
|
result=getchar
|
1329
1618
|
WHSPLF[nextchar.chr] or
|
1330
|
-
@moretokens << NoWsToken.new(
|
1331
|
-
|
1619
|
+
@moretokens << NoWsToken.new(input_position)
|
1620
|
+
result=(operator_or_methname_token result)
|
1621
|
+
result.unary=true
|
1332
1622
|
#todo: result should distinguish unary+binary +-
|
1333
1623
|
end
|
1334
1624
|
else #binary operator
|
@@ -1337,45 +1627,54 @@ end
|
|
1337
1627
|
if eat_next_if(?=)
|
1338
1628
|
result << ?=
|
1339
1629
|
end
|
1340
|
-
|
1630
|
+
result=(operator_or_methname_token result)
|
1341
1631
|
#todo: result should distinguish unary+binary +-
|
1342
1632
|
end
|
1633
|
+
result
|
1343
1634
|
end
|
1344
1635
|
|
1345
1636
|
#-----------------------------------
|
1346
1637
|
def equals(ch) #match /=(>|~|==?)?/ (= or == or =~ or === or =>)
|
1347
|
-
offset
|
1638
|
+
offset= input_position
|
1348
1639
|
str=getchar
|
1349
1640
|
assert str=='='
|
1350
|
-
c=(eat_next_if(
|
1641
|
+
c=(eat_next_if(/[~=>]/)or'')
|
1351
1642
|
str << c
|
1643
|
+
result= operator_or_methname_token( str,offset)
|
1352
1644
|
case c
|
1353
1645
|
when '=': str<< (eat_next_if(?=)or'')
|
1354
1646
|
|
1355
|
-
when '>':
|
1647
|
+
when '>':
|
1648
|
+
unless ParamListContextNoParen===@parsestack.last
|
1649
|
+
@moretokens.unshift result
|
1650
|
+
@moretokens.unshift( *abort_noparens!("=>"))
|
1651
|
+
result=@moretokens.shift
|
1652
|
+
end
|
1653
|
+
@parsestack.last.see self,:arrow
|
1356
1654
|
when '': #record local variable definitions
|
1357
1655
|
|
1358
|
-
@
|
1656
|
+
@parsestack.push AssignmentRhsContext.new(@linenum)
|
1359
1657
|
@moretokens.unshift AssignmentRhsListStartToken.new( offset+1)
|
1360
1658
|
end
|
1361
|
-
return
|
1659
|
+
return result
|
1362
1660
|
end
|
1363
1661
|
|
1364
1662
|
#-----------------------------------
|
1365
1663
|
def exclam(ch) #match /![~=]?/ (! or != or !~)
|
1366
1664
|
assert nextchar==?!
|
1367
1665
|
result=getchar
|
1368
|
-
k=eat_next_if(
|
1666
|
+
k=eat_next_if(/[~=]/)
|
1369
1667
|
if k
|
1370
1668
|
result+=k
|
1371
1669
|
else
|
1372
1670
|
WHSPLF[nextchar.chr] or
|
1373
|
-
@moretokens << NoWsToken.new(
|
1671
|
+
@moretokens << NoWsToken.new(input_position)
|
1374
1672
|
end
|
1375
|
-
return KeywordToken.new(result)
|
1673
|
+
return KeywordToken.new(result, input_position-result.size)
|
1376
1674
|
#result should distinguish unary !
|
1377
1675
|
end
|
1378
1676
|
|
1677
|
+
|
1379
1678
|
#-----------------------------------
|
1380
1679
|
def dot(ch)
|
1381
1680
|
str=''
|
@@ -1391,7 +1690,6 @@ end
|
|
1391
1690
|
dot_rhs(result)
|
1392
1691
|
return result
|
1393
1692
|
end
|
1394
|
-
|
1395
1693
|
#-----------------------------------
|
1396
1694
|
def dot_rhs(prevtok)
|
1397
1695
|
safe_recurse { |a|
|
@@ -1403,20 +1701,17 @@ end
|
|
1403
1701
|
}
|
1404
1702
|
end
|
1405
1703
|
|
1406
|
-
#-----------------------------------
|
1407
|
-
def single_quote(ch=nil)
|
1408
|
-
double_quote(ch)
|
1409
|
-
end
|
1410
|
-
|
1411
1704
|
#-----------------------------------
|
1412
1705
|
def back_quote(ch=nil)
|
1413
|
-
|
1414
|
-
|
1415
|
-
|
1416
|
-
|
1417
|
-
|
1706
|
+
if @last_operative_token===/^(def|::|\.)$/
|
1707
|
+
oldpos= input_position
|
1708
|
+
MethNameToken.new(eat_next_if(?`), oldpos)
|
1709
|
+
else
|
1710
|
+
double_quote(ch)
|
1711
|
+
end
|
1418
1712
|
end
|
1419
1713
|
|
1714
|
+
if false
|
1420
1715
|
#-----------------------------------
|
1421
1716
|
def comment(str)
|
1422
1717
|
result=""
|
@@ -1441,27 +1736,30 @@ end
|
|
1441
1736
|
|
1442
1737
|
return IgnoreToken.new(result)
|
1443
1738
|
end
|
1444
|
-
|
1739
|
+
end
|
1445
1740
|
#-----------------------------------
|
1446
1741
|
def open_brace(ch)
|
1447
1742
|
assert((ch!='[' or !want_op_name))
|
1448
1743
|
assert(@moretokens.empty?)
|
1449
1744
|
lastchar=prevchar
|
1450
|
-
ch=eat_next_if(
|
1451
|
-
tokch=KeywordToken.new(ch
|
1745
|
+
ch=eat_next_if(/[({\[]/)or raise "lexer confusion"
|
1746
|
+
tokch=KeywordToken.new(ch, input_position-1)
|
1747
|
+
|
1452
1748
|
|
1453
1749
|
#maybe emitting of NoWsToken can be moved into var_or_meth_name ??
|
1454
1750
|
case tokch.ident
|
1455
1751
|
when '['
|
1456
|
-
#
|
1457
|
-
#
|
1458
|
-
|
1752
|
+
# in contexts expecting an (operator) method name, we
|
1753
|
+
# would want to match [] or []= at this point
|
1754
|
+
#but control never comes this way in those cases... goes
|
1755
|
+
#to custom parsers for alias, undef, and def in #parse_keywords
|
1756
|
+
tokch.set_infix! unless after_nonid_op?{WHSPLF[lastchar]}
|
1757
|
+
@parsestack.push ListImmedContext.new(ch,@linenum)
|
1459
1758
|
lasttok=last_operative_token
|
1460
1759
|
#could be: lasttok===/^[a-z_]/i
|
1461
|
-
if (VarNameToken===lasttok or MethNameToken===lasttok
|
1462
|
-
lasttok===FUNCLIKE_KEYWORDS) and !WHSPCHARS[lastchar]
|
1760
|
+
if (VarNameToken===lasttok or ImplicitParamListEndToken===lasttok or MethNameToken===lasttok) and !WHSPCHARS[lastchar]
|
1463
1761
|
@moretokens << (tokch)
|
1464
|
-
tokch= NoWsToken.new(
|
1762
|
+
tokch= NoWsToken.new(input_position-1)
|
1465
1763
|
end
|
1466
1764
|
when '('
|
1467
1765
|
lasttok=last_operative_token
|
@@ -1470,19 +1768,20 @@ end
|
|
1470
1768
|
lasttok===FUNCLIKE_KEYWORDS)
|
1471
1769
|
unless WHSPCHARS[lastchar]
|
1472
1770
|
@moretokens << tokch
|
1473
|
-
tokch= NoWsToken.new(
|
1771
|
+
tokch= NoWsToken.new(input_position-1)
|
1474
1772
|
end
|
1475
|
-
@
|
1773
|
+
@parsestack.push ParamListContext.new(@linenum)
|
1476
1774
|
else
|
1477
|
-
@
|
1775
|
+
@parsestack.push ParenContext.new(@linenum)
|
1478
1776
|
end
|
1479
1777
|
|
1480
1778
|
when '{'
|
1481
1779
|
#check if we are in a hash literal or string inclusion (#{}),
|
1482
1780
|
#in which case below would be bad.
|
1483
|
-
if after_nonid_op?{false}
|
1484
|
-
@
|
1781
|
+
if after_nonid_op?{false} or @last_operative_token.has_no_block?
|
1782
|
+
@parsestack.push ListImmedContext.new(ch,@linenum) #that is, a hash
|
1485
1783
|
else
|
1784
|
+
tokch.set_infix!
|
1486
1785
|
=begin not needed now, i think
|
1487
1786
|
# 'need to find matching callsite context and end it if implicit'
|
1488
1787
|
lasttok=last_operative_token
|
@@ -1492,8 +1791,8 @@ end
|
|
1492
1791
|
end
|
1493
1792
|
=end
|
1494
1793
|
|
1495
|
-
|
1496
|
-
@
|
1794
|
+
localvars.start_block
|
1795
|
+
@parsestack.push BlockContext.new(@linenum)
|
1497
1796
|
block_param_list_lookahead
|
1498
1797
|
end
|
1499
1798
|
end
|
@@ -1504,18 +1803,18 @@ end
|
|
1504
1803
|
def close_brace(ch)
|
1505
1804
|
ch==eat_next_if(/[)}\]]/) or raise "lexer confusion"
|
1506
1805
|
@moretokens.concat abort_noparens!(ch)
|
1507
|
-
@
|
1508
|
-
@
|
1509
|
-
if @
|
1806
|
+
@parsestack.last.see self,:semi #hack
|
1807
|
+
@moretokens<< kw=KeywordToken.new( ch, input_position-1)
|
1808
|
+
if @parsestack.empty?
|
1510
1809
|
lexerror kw,"unmatched brace: #{ch}"
|
1511
1810
|
return @moretokens.shift
|
1512
1811
|
end
|
1513
|
-
ctx=@
|
1812
|
+
ctx=@parsestack.pop
|
1514
1813
|
origch,line=ctx.starter,ctx.linenum
|
1515
1814
|
ch==PAIRS[origch] or
|
1516
1815
|
lexerror kw,"mismatched braces: #{origch}#{ch}\n" +
|
1517
1816
|
"matching brace location", @filename, line
|
1518
|
-
BlockContext===ctx and
|
1817
|
+
BlockContext===ctx and localvars.end_block
|
1519
1818
|
if ParamListContext==ctx.class
|
1520
1819
|
assert ch==')'
|
1521
1820
|
#kw.set_callsite! #not needed?
|
@@ -1525,29 +1824,29 @@ end
|
|
1525
1824
|
|
1526
1825
|
#-----------------------------------
|
1527
1826
|
def eof(ch=nil)
|
1528
|
-
|
1529
|
-
|
1530
|
-
|
1827
|
+
#this must be the very last character...
|
1828
|
+
oldpos= input_position
|
1829
|
+
assert(?\0==getc)
|
1531
1830
|
|
1532
|
-
|
1831
|
+
result= "\0#{ignored_tokens(true).delete_if{|t|FileAndLineToken===t}}"
|
1533
1832
|
|
1534
|
-
|
1535
|
-
|
1536
|
-
|
1537
|
-
|
1833
|
+
eof? or
|
1834
|
+
lexerror result,'nul character is not at the end of file'
|
1835
|
+
input_position_set @file.size
|
1836
|
+
return(endoffile_detected result)
|
1538
1837
|
end
|
1539
1838
|
|
1540
1839
|
#-----------------------------------
|
1541
1840
|
def endoffile_detected(s='')
|
1542
1841
|
@moretokens.push( *(abort_noparens!.push super(s)))
|
1543
1842
|
result= @moretokens.shift
|
1544
|
-
balanced_braces? or (lexerror result,"unbalanced braces at eof.
|
1843
|
+
balanced_braces? or (lexerror result,"unbalanced braces at eof. parsestack=#{@parsestack.inspect}")
|
1545
1844
|
result
|
1546
1845
|
end
|
1547
1846
|
|
1548
1847
|
#-----------------------------------
|
1549
1848
|
def single_char_token(ch)
|
1550
|
-
KeywordToken.new super(ch),
|
1849
|
+
KeywordToken.new super(ch), input_position-1
|
1551
1850
|
end
|
1552
1851
|
|
1553
1852
|
#-----------------------------------
|
@@ -1557,13 +1856,13 @@ end
|
|
1557
1856
|
|
1558
1857
|
#-----------------------------------
|
1559
1858
|
def semicolon(ch)
|
1560
|
-
|
1859
|
+
assert @moretokens.empty?
|
1561
1860
|
@moretokens.push(*abort_noparens!)
|
1562
|
-
@
|
1563
|
-
|
1564
|
-
|
1565
|
-
|
1566
|
-
|
1861
|
+
@parsestack.last.see self,:semi
|
1862
|
+
if ExpectDoOrNlContext===@parsestack.last #should be in context's see:semi handler
|
1863
|
+
@parsestack.pop
|
1864
|
+
assert @parsestack.last.starter[/^(while|until|for)$/]
|
1865
|
+
end
|
1567
1866
|
@moretokens.push single_char_token(ch)
|
1568
1867
|
return @moretokens.shift
|
1569
1868
|
end
|
@@ -1582,7 +1881,11 @@ end
|
|
1582
1881
|
|
1583
1882
|
#-----------------------------------
|
1584
1883
|
#tokenify_results_of :identifier
|
1585
|
-
save_offsets_in(*CHARMAPPINGS.values.uniq-[
|
1884
|
+
save_offsets_in(*CHARMAPPINGS.values.uniq-[
|
1885
|
+
:symbol_or_op,:open_brace,:whitespace,:exclam,:backquote
|
1886
|
+
|
1887
|
+
|
1888
|
+
])
|
1586
1889
|
#save_offsets_in :symbol
|
1587
1890
|
|
1588
1891
|
end
|