rubylexer 0.7.0 → 0.7.1
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +90 -0
- data/Manifest.txt +54 -3
- data/README.txt +4 -7
- data/Rakefile +3 -2
- data/lib/rubylexer.rb +856 -323
- data/lib/rubylexer/0.7.0.rb +11 -2
- data/lib/rubylexer/0.7.1.rb +2 -0
- data/lib/rubylexer/charhandler.rb +4 -4
- data/lib/rubylexer/context.rb +86 -9
- data/lib/rubylexer/rulexer.rb +455 -101
- data/lib/rubylexer/token.rb +166 -43
- data/lib/rubylexer/tokenprinter.rb +16 -8
- data/lib/rubylexer/version.rb +1 -1
- data/rubylexer.vpj +98 -0
- data/test/code/all_the_gems.rb +33 -0
- data/test/code/all_the_raas.rb +226 -0
- data/test/code/all_the_rubies.rb +2 -0
- data/test/code/deletewarns.rb +19 -1
- data/test/code/dumptokens.rb +39 -8
- data/test/code/errscan +2 -0
- data/test/code/isolate_error.rb +72 -0
- data/test/code/lexloop +14 -0
- data/test/code/locatetest.rb +150 -8
- data/test/code/regression.rb +109 -0
- data/test/code/rubylexervsruby.rb +53 -15
- data/test/code/strgen.rb +138 -0
- data/test/code/tarball.rb +144 -0
- data/test/code/testcases.rb +11 -0
- data/test/code/tokentest.rb +115 -24
- data/test/data/__eof2.rb +1 -0
- data/test/data/__eof5.rb +2 -0
- data/test/data/__eof6.rb +2 -0
- data/test/data/cvtesc.rb +17 -0
- data/test/data/g.rb +6 -0
- data/test/data/hd0.rb +3 -0
- data/test/data/hdateof.rb +2 -0
- data/test/data/hdempty.rb +3 -0
- data/test/data/hdr.rb +9 -0
- data/test/data/hdr_dos.rb +13 -0
- data/test/data/hdr_dos2.rb +18 -0
- data/test/data/heart.rb +2 -0
- data/test/data/here_escnl.rb +25 -0
- data/test/data/here_escnl_dos.rb +20 -0
- data/test/data/here_squote.rb +3 -0
- data/test/data/heremonsters.rb +140 -0
- data/test/data/heremonsters.rb.broken +68 -0
- data/test/data/heremonsters.rb.broken.save +68 -0
- data/test/data/heremonsters_dos.rb +140 -0
- data/test/data/heremonsters_dos.rb.broken +68 -0
- data/test/data/illegal_oneliners.rb +1 -0
- data/test/data/illegal_stanzas.rb +0 -0
- data/test/data/make_ws_strdelim.rb +22 -0
- data/test/data/maven2_builer_test.rb +82 -0
- data/test/data/migration.rb +8944 -0
- data/test/data/modl.rb +6 -0
- data/test/data/modl_dos.rb +7 -0
- data/test/data/modl_fails.rb +10 -0
- data/test/data/multilinestring.rb +6 -0
- data/test/data/oneliners.rb +555 -0
- data/test/data/p-op.rb +2 -0
- data/test/data/p.rb +3 -1710
- data/test/data/s.rb +90 -21
- data/test/data/simple.rb +1 -0
- data/test/data/simple_dos.rb +1 -0
- data/test/data/stanzas.rb +1194 -0
- data/test/data/strdelim_crlf.rb +6 -0
- data/test/data/stuff.rb +6 -0
- data/test/data/stuff2.rb +5 -0
- data/test/data/stuff3.rb +6 -0
- data/test/data/stuff4.rb +6 -0
- data/test/data/tkweird.rb +20 -0
- data/test/data/unending_stuff.rb +5 -0
- data/test/data/whatnot.rb +8 -0
- data/test/data/ws_strdelim.rb +0 -0
- data/test/test.sh +239 -0
- data/testing.txt +39 -50
- metadata +110 -12
- data/test/code/dl_all_gems.rb +0 -43
- data/test/code/unpack_all_gems.rb +0 -15
- data/test/data/gemlist.txt +0 -280
data/History.txt
CHANGED
@@ -1,3 +1,93 @@
|
|
1
|
+
=== 0.7.1/10-29-2008
|
2
|
+
* 6 Major Enhancements:
|
3
|
+
* handling of empty string fragments now more closely mirrors ruby; this resolves many warnings
|
4
|
+
* yet more hacks in aid of string inclusions
|
5
|
+
* backslashes in strings are no longer interpreted automatically when lexed
|
6
|
+
* here documents are completely rewritten in a tricky way that more closely mimics what MRI does
|
7
|
+
* many more flags for tokens to tell apart the various cases:
|
8
|
+
* the various different local variable types have to be detected.
|
9
|
+
* colons which operate like semicolons or thens are marked as such
|
10
|
+
* { } used in block now flagged as parsing like do and end
|
11
|
+
* commas now are marked with different types depending on how they're used
|
12
|
+
* @variables in methods need to be marked as such, so their parsetree can come out different.
|
13
|
+
* clearly mark backquoted strings
|
14
|
+
* further refinements of local variable detection and implicit paren placement near these cases:
|
15
|
+
* when ws between method name and parenthesis
|
16
|
+
* break/return/next
|
17
|
+
* ? : << / rescue do
|
18
|
+
|
19
|
+
* 5 Minor Enhancements
|
20
|
+
* colon or star in assignment make it a multi assignment
|
21
|
+
* presence of unary * or & in param list forces it to be a multi-param list
|
22
|
+
* errors in string inclusions should now be handled better
|
23
|
+
* string and stringlike tokens now can tell you the exact sequence of chars used to open and close the string.
|
24
|
+
* correctly handling more cases where return/break/next parses different than a method (yuck!)
|
25
|
+
|
26
|
+
* 26 Bugfixes
|
27
|
+
* ~ operator can be followed with an @, like + and -
|
28
|
+
* ~ is overridable, however :: is not
|
29
|
+
* raise is not a keyword
|
30
|
+
* in addition to 0x00, 0x04 and 0x1a should be considered eof in ruby. why? idunno.
|
31
|
+
* setting PROGRESS env var will cause input file position to be printed to stderr periodically.
|
32
|
+
* defined? is not a funclike keyword... really more of a unary operator
|
33
|
+
* $- is a legitimate global variable.
|
34
|
+
* better parsing of lvalue list following for keyword.
|
35
|
+
* rescue is a variable define context only when right after => and before then (or disguises).
|
36
|
+
* better placement of implicit parens around def param list
|
37
|
+
* (global) variable aliasing now supported
|
38
|
+
* local vars in END block are NOT scoped to the block!
|
39
|
+
* local vars in def param lists aren't considered variables til after the initializer for that var
|
40
|
+
* end of def header is treated like ; even if none is present
|
41
|
+
* never put here document right after class keyword
|
42
|
+
* look for start of line directives at end of here document
|
43
|
+
* oops, mac newlines don't have to be supported
|
44
|
+
* dos newlines better tolerated around here documents
|
45
|
+
* less line number/offset confusion around here documents
|
46
|
+
* newline after (non-operator) rescue is hard (but not after INNERBOUNDINGWORDS)
|
47
|
+
* handling eof in more strange places
|
48
|
+
* always expect unary op after for
|
49
|
+
* unary ops should know about the before-but-not-after rule!
|
50
|
+
* newlines after = should be escaped
|
51
|
+
* \c? and \C-? are not interpreted the same as other ctrl chars
|
52
|
+
* \n\r and \r are not recognized as nl sequences
|
53
|
+
|
54
|
+
* 18 Internal Changes (not user visible)
|
55
|
+
* commas cause a :comma event on the parsestack
|
56
|
+
* some of the lists of types of operators are available now as arrays of strings instead of regexps
|
57
|
+
* single and double quote now have separate implementations again
|
58
|
+
* keep track of whether an implicit open or close paren has just been emitted
|
59
|
+
* put ws around << to keep slickedit happy
|
60
|
+
* the eof characters are also considered whitespace.
|
61
|
+
* identifier lexer now uses regexps more heavily
|
62
|
+
* method formal parameter list is not considered an lvalue context for commas.
|
63
|
+
* class and def now have their own parse contexts
|
64
|
+
* unary star causes a :splat event on the parsestack
|
65
|
+
* is_var_name now detects var tokens just from the token type, not looking at local vars table.
|
66
|
+
* a faster regexp-based implementation of string scanning
|
67
|
+
* moved yucky side effect out of quote_expected?
|
68
|
+
* these keywords: class module def for defined? no longer automatically create operator context
|
69
|
+
* a new context for BEGIN/END keywords
|
70
|
+
* a new context for param list of return/next/break
|
71
|
+
* new escape sequence processors for regexp and %W list
|
72
|
+
* numbers now scanned with a regexp
|
73
|
+
|
74
|
+
* 15 Enhancements and bug fixes to tests:
|
75
|
+
* just print a notice on errors which are also syntax errors for ruby
|
76
|
+
* a little cleanup of temp files
|
77
|
+
* rubylexervsruby and tokentest can take input from stdin
|
78
|
+
* unlexer improvements
|
79
|
+
* dumptokens now has a --silent cmdline option
|
80
|
+
* locatetest.rb is significantly enhanced
|
81
|
+
* --unified option to diff seems to work better than -u
|
82
|
+
* tokentest better verifies exact token contents...
|
83
|
+
* tokentest now uses open and close fields of strings to verify string bounds exactly
|
84
|
+
* CRLF in a string is always treated like just a LF. (CR is elided.)
|
85
|
+
* allow_ooo hacky flag marks tokens whose offset errors are to be ignored.
|
86
|
+
* all other offset errors have been downgraded to warnings.
|
87
|
+
* most of the offset problem I had been seeing have been fixed, tho
|
88
|
+
* offset problems in here head and body, symbol and fal tokens are always ignored (a hack)
|
89
|
+
* tokentest has a --loop option, for load testing
|
90
|
+
|
1
91
|
=== 0.7.0/2-15-2008
|
2
92
|
* implicit tokens are now emitted at the right times (need more test code)
|
3
93
|
* local variables are now temporarily hidden by class, module, and def
|
data/Manifest.txt
CHANGED
@@ -19,7 +19,6 @@ lib/rubylexer/symboltable.rb
|
|
19
19
|
lib/rubylexer/charhandler.rb
|
20
20
|
lib/assert.rb
|
21
21
|
lib/rubylexer.rb
|
22
|
-
test/data/gemlist.txt
|
23
22
|
test/data/blockassigntest.rb
|
24
23
|
test/data/for.rb
|
25
24
|
test/data/chunky_bacon.rb
|
@@ -58,10 +57,62 @@ test/data/chunky_bacon2.rb
|
|
58
57
|
test/data/format.rb
|
59
58
|
test/code/locatetest.rb
|
60
59
|
test/code/rubylexervsruby.rb
|
61
|
-
test/code/dl_all_gems.rb
|
62
|
-
test/code/unpack_all_gems.rb
|
63
60
|
test/code/tokentest.rb
|
64
61
|
test/code/dumptokens.rb
|
65
62
|
test/code/torment
|
66
63
|
test/code/locatetest
|
67
64
|
test/code/deletewarns.rb
|
65
|
+
lib/rubylexer/0.7.1.rb
|
66
|
+
rubylexer.vpj
|
67
|
+
test/code/all_the_gems.rb
|
68
|
+
test/code/all_the_raas.rb
|
69
|
+
test/code/all_the_rubies.rb
|
70
|
+
test/code/errscan
|
71
|
+
test/code/isolate_error.rb
|
72
|
+
test/code/lexloop
|
73
|
+
test/code/regression.rb
|
74
|
+
test/code/strgen.rb
|
75
|
+
test/code/tarball.rb
|
76
|
+
test/code/testcases.rb
|
77
|
+
test/data/chunky.plain.rb
|
78
|
+
test/data/cvtesc.rb
|
79
|
+
test/data/__eof2.rb
|
80
|
+
test/data/__eof5.rb
|
81
|
+
test/data/__eof6.rb
|
82
|
+
test/data/hd0.rb
|
83
|
+
test/data/hdateof.rb
|
84
|
+
test/data/hdempty.rb
|
85
|
+
test/data/hdr_dos2.rb
|
86
|
+
test/data/hdr_dos.rb
|
87
|
+
test/data/hdr.rb
|
88
|
+
test/data/here_escnl_dos.rb
|
89
|
+
test/data/here_escnl.rb
|
90
|
+
test/data/heremonsters_dos.rb
|
91
|
+
test/data/heremonsters_dos.rb.broken
|
92
|
+
test/data/heremonsters.rb
|
93
|
+
test/data/heremonsters.rb.broken
|
94
|
+
test/data/heremonsters.rb.broken.save
|
95
|
+
test/data/here_squote.rb
|
96
|
+
test/data/illegal_oneliners.rb
|
97
|
+
test/data/illegal_stanzas.rb
|
98
|
+
test/data/make_ws_strdelim.rb
|
99
|
+
test/data/maven2_builer_test.rb
|
100
|
+
test/data/migration.rb
|
101
|
+
test/data/modl_dos.rb
|
102
|
+
test/data/modl_fails.rb
|
103
|
+
test/data/modl.rb
|
104
|
+
test/data/multilinestring.rb
|
105
|
+
test/data/oneliners.rb
|
106
|
+
test/data/simple_dos.rb
|
107
|
+
test/data/simple.rb
|
108
|
+
test/data/stanzas.rb
|
109
|
+
test/data/strdelim_crlf.rb
|
110
|
+
test/data/stuff2.rb
|
111
|
+
test/data/stuff3.rb
|
112
|
+
test/data/stuff4.rb
|
113
|
+
test/data/stuff.rb
|
114
|
+
test/data/tkweird.rb
|
115
|
+
test/data/unending_stuff.rb
|
116
|
+
test/data/whatnot.rb
|
117
|
+
test/data/ws_strdelim.rb
|
118
|
+
test/test.sh
|
data/README.txt
CHANGED
@@ -67,10 +67,7 @@ keywords, depending on context:
|
|
67
67
|
any overrideable operator and most keywords can also be method names
|
68
68
|
|
69
69
|
== todo
|
70
|
-
test w/ more code (rubygems, rpa, obfuscated ruby contest, rubicon, others?)
|
71
|
-
these 5 should be my standard test suite: p.rb, (matz') test.rb, tk.rb, obfuscated ruby contest, rubicon
|
72
70
|
test more ways: cvt source to dos or mac fmt before testing
|
73
|
-
test more ways: run unit tests after passing thru rubylexer (0.7)
|
74
71
|
test more ways: test require'd, load'd, or eval'd code as well (0.7)
|
75
72
|
lex code a line (or chunk) at a time and save state for next line (irb wants this) (0.8)
|
76
73
|
incremental lexing (ides want this (for performance))
|
@@ -78,12 +75,10 @@ put everything in a namespace
|
|
78
75
|
integrate w/ other tools...
|
79
76
|
html colorized output?
|
80
77
|
move more state onto @parsestack (ongoing)
|
81
|
-
the new cases in p.rb now compile, but won't run
|
82
78
|
expand on test documentation
|
83
79
|
use want_op_name more
|
84
80
|
return result as a half-parsed tree (with parentheses and the like matched)
|
85
81
|
emit advisory tokens when see beginword, then (or equivalent), or end... what else does florian want?
|
86
|
-
strings are still slow
|
87
82
|
emit advisory tokens when local var defined/goes out of scope (or hidden/unhidden?)
|
88
83
|
token pruning in dumptokens...
|
89
84
|
|
@@ -96,8 +91,10 @@ string tokenization sometimes a little different from ruby around newlines
|
|
96
91
|
string contents might not be correctly translated in a few cases (0.8?)
|
97
92
|
symbols which contain string interpolations are flattened into one token. eg :"foo#{bar}" (0.8)
|
98
93
|
'\r' whitespace sometimes seen in dos-formatted output.. shouldn't be (eg pre.rb) (0.7)
|
99
|
-
windows
|
94
|
+
windows newline in source is likely to cause problems in obscure cases (need test case)
|
100
95
|
unterminated =begin is not an error (0.8)
|
101
96
|
ruby 1.9 completely unsupported (0.9)
|
102
97
|
character sets other than ascii are not supported at all (1.0)
|
103
|
-
|
98
|
+
regression test currently shows 14 errors with differences in exact token ordering
|
99
|
+
-around string inclusions. these errors are much less serious than they seem.
|
100
|
+
offset of AssignmentRhsListEndToken appears to be off by 1
|
data/Rakefile
CHANGED
@@ -13,12 +13,13 @@ require 'lib/rubylexer/version.rb'
|
|
13
13
|
hoe=Hoe.new("rubylexer", RubyLexer::VERSION) do |_|
|
14
14
|
_.author = "Caleb Clausen"
|
15
15
|
_.email = "rubylexer-owner @at@ inforadical .dot. net"
|
16
|
-
_.url = "http://rubylexer.rubyforge.org/"
|
17
|
-
_.extra_deps
|
16
|
+
_.url = ["http://rubylexer.rubyforge.org/", "http://rubyforge.org/projects/rubylexer/"]
|
17
|
+
_.extra_deps << ['sequence', '>= 0.2.0']
|
18
18
|
_.test_globs=["test/{code/*,data/*rb*,results/}"]
|
19
19
|
_.description=desc
|
20
20
|
_.summary=desc[/\A[^.]+\./]
|
21
21
|
_.spec_extras={:bindir=>''}
|
22
|
+
_.rdoc_pattern=/\A(howtouse\.txt|testing\.txt|README\.txt|lib\/.*\.rb)\Z/
|
22
23
|
end
|
23
24
|
|
24
25
|
|
data/lib/rubylexer.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
=begin
|
1
|
+
=begin legal crap
|
2
2
|
rubylexer - a ruby lexer written in ruby
|
3
|
-
Copyright (C) 2004,2005 Caleb Clausen
|
3
|
+
Copyright (C) 2004,2005,2008 Caleb Clausen
|
4
4
|
|
5
5
|
This library is free software; you can redistribute it and/or
|
6
6
|
modify it under the terms of the GNU Lesser General Public
|
@@ -18,7 +18,6 @@
|
|
18
18
|
=end
|
19
19
|
|
20
20
|
|
21
|
-
|
22
21
|
require 'rubylexer/rulexer' #must be 1st!!!
|
23
22
|
require 'rubylexer/version'
|
24
23
|
require 'rubylexer/token'
|
@@ -32,9 +31,11 @@ require 'rubylexer/tokenprinter'
|
|
32
31
|
#-----------------------------------
|
33
32
|
class RubyLexer
|
34
33
|
include NestedContexts
|
34
|
+
|
35
|
+
|
35
36
|
|
36
37
|
RUBYSYMOPERATORREX=
|
37
|
-
%r{^([
|
38
|
+
%r{^([&|^/%]|=(==?)|=~|>[=>]?|<(<|=>?)?|[+~\-]@?|\*\*?|\[\]=?)}
|
38
39
|
# (nasty beastie, eh?)
|
39
40
|
#these are the overridable operators
|
40
41
|
#does not match flow-control operators like: || && ! or and if not
|
@@ -42,23 +43,25 @@ class RubyLexer
|
|
42
43
|
#or .. ... ?:
|
43
44
|
#for that use:
|
44
45
|
RUBYNONSYMOPERATORREX=
|
45
|
-
%r{^([
|
46
|
+
%r{^([%^/\-+|&]=|(\|\||&&)=?|(<<|>>|\*\*?)=|\.{1,3}|[?:,;]|::|=>?|![=~]?)$}
|
46
47
|
RUBYOPERATORREX=/#{RUBYSYMOPERATORREX}|#{RUBYNONSYMOPERATORREX}/o
|
47
48
|
UNSYMOPS=/^[~!]$/ #always unary
|
48
49
|
UBSYMOPS=/^([*&+-]|::)$/ #ops that could be unary or binary
|
49
50
|
WHSPCHARS=WHSPLF+"\\#"
|
50
|
-
|
51
|
-
|
52
|
-
|
51
|
+
OPORBEGINWORDLIST=%w(if unless while until)
|
52
|
+
BEGINWORDLIST=%w(def class module begin for case do)+OPORBEGINWORDLIST
|
53
|
+
OPORBEGINWORDS="(#{OPORBEGINWORDLIST.join '|'})"
|
54
|
+
BEGINWORDS=/^(#{BEGINWORDLIST.join '|'})$/o
|
55
|
+
FUNCLIKE_KEYWORDS=/^(break|next|redo|return|yield|retry|super|BEGIN|END)$/
|
53
56
|
VARLIKE_KEYWORDS=/^(__FILE__|__LINE__|false|nil|self|true)$/
|
54
57
|
INNERBOUNDINGWORDS="(else|elsif|ensure|in|then|rescue|when)"
|
55
58
|
BINOPWORDS="(and|or)"
|
56
|
-
NEVERSTARTPARAMLISTWORDS
|
59
|
+
NEVERSTARTPARAMLISTWORDS=/\A(#{OPORBEGINWORDS}|#{INNERBOUNDINGWORDS}|#{BINOPWORDS}|end)([^a-zA-Z0-9_!?=]|\Z)/o
|
57
60
|
NEVERSTARTPARAMLISTFIRST=CharSet['aoeitrwu'] #chars that begin NEVERSTARTPARAMLIST
|
58
61
|
NEVERSTARTPARAMLISTMAXLEN=7 #max len of a NEVERSTARTPARAMLIST
|
59
62
|
|
60
63
|
RUBYKEYWORDS=%r{
|
61
|
-
^(alias|#{BINOPWORDS}|not|undef|end|
|
64
|
+
^(alias|#{BINOPWORDS}|defined\?|not|undef|end|
|
62
65
|
#{VARLIKE_KEYWORDS}|#{FUNCLIKE_KEYWORDS}|
|
63
66
|
#{INNERBOUNDINGWORDS}|#{BEGINWORDS}
|
64
67
|
)$
|
@@ -72,8 +75,9 @@ class RubyLexer
|
|
72
75
|
?A..?Z => :identifier,
|
73
76
|
?_ => :identifier,
|
74
77
|
?0..?9 => :number,
|
75
|
-
|
76
|
-
|
78
|
+
?" => :double_quote, #"
|
79
|
+
?' => :single_quote, #'
|
80
|
+
?` => :back_quote, #`
|
77
81
|
|
78
82
|
WHSP => :whitespace, #includes \r
|
79
83
|
?, => :comma,
|
@@ -99,7 +103,9 @@ class RubyLexer
|
|
99
103
|
#?\r => :newline, #implicitly escaped after op
|
100
104
|
|
101
105
|
?\\ => :escnewline,
|
102
|
-
?\
|
106
|
+
?\x00 => :eof,
|
107
|
+
?\x04 => :eof,
|
108
|
+
?\x1a => :eof,
|
103
109
|
|
104
110
|
"[({" => :open_brace,
|
105
111
|
"])}" => :close_brace,
|
@@ -108,41 +114,90 @@ class RubyLexer
|
|
108
114
|
?# => :comment
|
109
115
|
}
|
110
116
|
|
111
|
-
attr_reader :incomplete_here_tokens, :parsestack
|
117
|
+
attr_reader :incomplete_here_tokens, :parsestack, :last_token_maybe_implicit
|
112
118
|
|
113
119
|
|
114
120
|
#-----------------------------------
|
115
|
-
def initialize(filename,file,linenum=1)
|
116
|
-
|
121
|
+
def initialize(filename,file,linenum=1,offset_adjust=0)
|
122
|
+
@offset_adjust=0 #set again in next line
|
123
|
+
super(filename,file, linenum,offset_adjust)
|
117
124
|
@start_linenum=linenum
|
118
125
|
@parsestack=[TopLevelContext.new]
|
119
|
-
@incomplete_here_tokens=[]
|
126
|
+
@incomplete_here_tokens=[] #not used anymore
|
127
|
+
@pending_here_bodies=[]
|
120
128
|
@localvars_stack=[SymbolTable.new]
|
121
129
|
@defining_lvar=nil
|
122
130
|
@in_def_name=false
|
131
|
+
@last_operative_token=nil
|
132
|
+
@last_token_maybe_implicit=nil
|
123
133
|
|
124
134
|
@toptable=CharHandler.new(self, :illegal_char, CHARMAPPINGS)
|
125
135
|
|
126
136
|
start_of_line_directives
|
137
|
+
progress_printer
|
138
|
+
end
|
139
|
+
|
140
|
+
def progress_printer
|
141
|
+
return unless ENV['RL_PROGRESS']
|
142
|
+
$stderr.puts 'printing progresses'
|
143
|
+
@progress_thread=Thread.new do
|
144
|
+
until EoiToken===@last_operative_token
|
145
|
+
sleep 10
|
146
|
+
$stderr.puts @file.pos
|
147
|
+
end
|
148
|
+
end
|
127
149
|
end
|
128
150
|
|
129
151
|
def localvars;
|
130
152
|
@localvars_stack.last
|
131
153
|
end
|
132
154
|
|
155
|
+
attr :localvars_stack
|
156
|
+
attr :offset_adjust
|
157
|
+
attr_writer :pending_here_bodies
|
158
|
+
|
159
|
+
#-----------------------------------
|
160
|
+
def set_last_token(tok)
|
161
|
+
@last_operative_token=@last_token_maybe_implicit=tok
|
162
|
+
end
|
163
|
+
|
133
164
|
#-----------------------------------
|
134
165
|
def get1token
|
135
166
|
result=super #most of the action's here
|
136
167
|
|
168
|
+
if ENV['PROGRESS']
|
169
|
+
@last_cp_pos||=0
|
170
|
+
@start_time||=Time.now
|
171
|
+
if result.offset-@last_cp_pos>100000
|
172
|
+
$stderr.puts "#{result.offset} #{Time.now-@start_time}"
|
173
|
+
@last_cp_pos=result.offset
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
137
177
|
#now cleanup and housekeeping
|
138
178
|
|
139
179
|
|
140
180
|
#check for bizarre token types
|
141
181
|
case result
|
182
|
+
when ImplicitParamListStartToken, ImplicitParamListEndToken
|
183
|
+
@last_token_maybe_implicit=result
|
184
|
+
result
|
142
185
|
when StillIgnoreToken#,nil
|
143
186
|
result
|
187
|
+
when StringToken
|
188
|
+
set_last_token result
|
189
|
+
assert !(IgnoreToken===@last_operative_token)
|
190
|
+
result.elems.map!{|frag|
|
191
|
+
if String===frag
|
192
|
+
result.translate_escapes(frag)
|
193
|
+
else
|
194
|
+
frag
|
195
|
+
end
|
196
|
+
} if AUTO_UNESCAPE_STRINGS
|
197
|
+
result
|
198
|
+
|
144
199
|
when Token#,String
|
145
|
-
|
200
|
+
set_last_token result
|
146
201
|
assert !(IgnoreToken===@last_operative_token)
|
147
202
|
result
|
148
203
|
else
|
@@ -150,6 +205,20 @@ class RubyLexer
|
|
150
205
|
end
|
151
206
|
end
|
152
207
|
|
208
|
+
#-----------------------------------
|
209
|
+
def eof?
|
210
|
+
super or EoiToken===@last_operative_token
|
211
|
+
end
|
212
|
+
|
213
|
+
#-----------------------------------
|
214
|
+
def input_position
|
215
|
+
super+@offset_adjust
|
216
|
+
end
|
217
|
+
|
218
|
+
#-----------------------------------
|
219
|
+
def input_position_raw
|
220
|
+
@file.pos
|
221
|
+
end
|
153
222
|
|
154
223
|
#-----------------------------------
|
155
224
|
def balanced_braces?
|
@@ -163,7 +232,7 @@ class RubyLexer
|
|
163
232
|
s=eat_next_if(?$) or return nil
|
164
233
|
|
165
234
|
if t=((identifier_as_string(?$) or special_global))
|
166
|
-
s<<t
|
235
|
+
s << t
|
167
236
|
else error= "missing $id name"
|
168
237
|
end
|
169
238
|
|
@@ -173,17 +242,27 @@ class RubyLexer
|
|
173
242
|
#-----------------------------------
|
174
243
|
def at_identifier(ch=nil)
|
175
244
|
result = (eat_next_if(?@) or return nil)
|
176
|
-
result << (eat_next_if(?@)or'')
|
245
|
+
result << (eat_next_if(?@) or '')
|
177
246
|
if t=identifier_as_string(?@)
|
178
|
-
result<<t
|
247
|
+
result << t
|
179
248
|
else error= "missing @id name"
|
180
249
|
end
|
181
|
-
|
250
|
+
result=VarNameToken.new(result)
|
251
|
+
result.in_def=true if inside_method_def?
|
252
|
+
return lexerror(result,error)
|
182
253
|
end
|
183
254
|
|
184
255
|
private
|
185
256
|
#-----------------------------------
|
186
|
-
def
|
257
|
+
def inside_method_def?
|
258
|
+
@parsestack.reverse_each{|ctx|
|
259
|
+
ctx.starter=='def' and ctx.state!=:saw_def and return true
|
260
|
+
}
|
261
|
+
return false
|
262
|
+
end
|
263
|
+
|
264
|
+
#-----------------------------------
|
265
|
+
def here_spread_over_ruby_code(rl,tok) #not used anymore
|
187
266
|
assert(!rl.incomplete_here_tokens.empty?)
|
188
267
|
@incomplete_here_tokens += rl.incomplete_here_tokens
|
189
268
|
end
|
@@ -207,10 +286,10 @@ private
|
|
207
286
|
end
|
208
287
|
|
209
288
|
#-----------------------------------
|
210
|
-
WSCHARSET=/[#\\\n\s\t\v\r\f]/
|
289
|
+
WSCHARSET=/[#\\\n\s\t\v\r\f\x00\x04\x1a]/
|
211
290
|
def ignored_tokens(allow_eof=false,allow_eol=true)
|
212
291
|
result=[]
|
213
|
-
result
|
292
|
+
result << @moretokens.shift while StillIgnoreToken===@moretokens.first
|
214
293
|
@moretokens.empty? or return result
|
215
294
|
loop do
|
216
295
|
unless @moretokens.empty?
|
@@ -273,8 +352,8 @@ private
|
|
273
352
|
result = ((
|
274
353
|
#order matters here, but it shouldn't
|
275
354
|
#(but til_charset must be last)
|
276
|
-
|
277
|
-
|
355
|
+
eat_if(/-[a-z0-9_]/i,2) or
|
356
|
+
eat_next_if(/[!@&+`'=~\-\/\\,.;<>*"$?:]/) or
|
278
357
|
(?0..?9)===nextchar ? til_charset(/[^\d]/) : nil
|
279
358
|
))
|
280
359
|
end
|
@@ -289,23 +368,26 @@ private
|
|
289
368
|
#just asserts because those contexts are never encountered.
|
290
369
|
#control goes through symbol(<...>,nil)
|
291
370
|
assert( /^[a-z_]$/i===context)
|
292
|
-
assert !(@last_operative_token===/^(\.|::|(un)?def|alias)$/)
|
371
|
+
assert MethNameToken===@last_operative_token || !(@last_operative_token===/^(\.|::|(un)?def|alias)$/)
|
293
372
|
|
294
|
-
@moretokens.unshift(*parse_keywords(str,oldpos) do
|
373
|
+
@moretokens.unshift(*parse_keywords(str,oldpos) do |tok|
|
295
374
|
#if not a keyword,
|
296
375
|
case str
|
297
376
|
when FUNCLIKE_KEYWORDS; #do nothing
|
298
377
|
when VARLIKE_KEYWORDS,RUBYKEYWORDS; raise "shouldnt see keywords here, now"
|
299
378
|
end
|
300
|
-
|
379
|
+
was_last=@last_operative_token
|
380
|
+
@last_operative_token=tok if tok
|
381
|
+
safe_recurse { |a| var_or_meth_name(str,was_last,oldpos,after_nonid_op?{true}) }
|
301
382
|
end)
|
302
383
|
return @moretokens.shift
|
303
384
|
end
|
304
385
|
|
305
386
|
#-----------------------------------
|
387
|
+
IDENTREX={}
|
306
388
|
def identifier_as_string(context)
|
307
389
|
#must begin w/ letter or underscore
|
308
|
-
|
390
|
+
/[_a-z]/i===nextchar.chr or return
|
309
391
|
|
310
392
|
#equals, question mark, and exclamation mark
|
311
393
|
#might be allowed at the end in some contexts.
|
@@ -315,45 +397,16 @@ private
|
|
315
397
|
#i hope i've captured all right conditions....
|
316
398
|
#context should always be ?: right after def, ., and :: now
|
317
399
|
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
str<<til_charset(/[^a-z0-9_]/i)
|
327
|
-
|
328
|
-
#look for ?, !, or =, if allowed
|
329
|
-
case b=getc
|
330
|
-
when nil #means we're at eof
|
331
|
-
#handling nil here prevents b from ever matching
|
332
|
-
#a nil value of maybe_qm, maybe_ex or maybe_eq
|
333
|
-
when maybe_qm
|
334
|
-
str << b
|
335
|
-
when maybe_ex
|
336
|
-
nc=(nextchar unless eof?)
|
337
|
-
#does ex appear to be part of a larger operator?
|
338
|
-
if nc==?= #or nc==?~
|
339
|
-
back1char
|
340
|
-
else
|
341
|
-
str << b
|
342
|
-
end
|
343
|
-
when maybe_eq
|
344
|
-
nc=(nextchar unless eof?)
|
345
|
-
#does eq appear to be part of a larger operator?
|
346
|
-
if nc==?= or nc==?~ or nc==?>
|
347
|
-
back1char
|
348
|
-
else
|
349
|
-
str << b
|
350
|
-
end
|
351
|
-
else
|
352
|
-
back1char
|
353
|
-
end
|
400
|
+
#= and ! only match if not part of a larger operator
|
401
|
+
trailers =
|
402
|
+
case context
|
403
|
+
when ?@,?$ then ""
|
404
|
+
# when ?: then "!(?![=])|\\?|=(?![=~>])"
|
405
|
+
else "!(?![=])|\\?"
|
406
|
+
end
|
407
|
+
@in_def_name||context==?: and trailers<<"|=(?![=~>])"
|
354
408
|
|
355
|
-
|
356
|
-
return str
|
409
|
+
@file.scan(IDENTREX[trailers]||=/^[_a-z][a-z0-9_]*(?:#{trailers})?/i)
|
357
410
|
end
|
358
411
|
|
359
412
|
#-----------------------------------
|
@@ -380,18 +433,26 @@ private
|
|
380
433
|
#a comma has been seen. are we in an
|
381
434
|
#lvalue list or some other construct that uses commas?
|
382
435
|
def comma_in_lvalue_list?
|
383
|
-
@parsestack.last.lhs=
|
436
|
+
@parsestack.last.lhs=
|
437
|
+
case l=@parsestack.last
|
438
|
+
when ListContext:
|
439
|
+
when DefContext: l.in_body
|
440
|
+
else true
|
441
|
+
end
|
384
442
|
end
|
385
443
|
|
386
444
|
#-----------------------------------
|
387
445
|
def in_lvar_define_state
|
388
446
|
#@defining_lvar is a hack
|
389
447
|
@defining_lvar or case ctx=@parsestack.last
|
390
|
-
when ForSMContext; ctx.state==:for
|
391
|
-
when RescueSMContext
|
448
|
+
#when ForSMContext; ctx.state==:for
|
449
|
+
when RescueSMContext
|
450
|
+
@last_operative_token.ident=="=>" and @file.match? /\A[\s\v]*([:;#\n]|then[^a-zA-Z0-9_])/m
|
392
451
|
#when BlockParamListLhsContext; true
|
393
452
|
end
|
394
453
|
end
|
454
|
+
|
455
|
+
IMPLICIT_PARENS_BEFORE_ACCESSOR_ASSIGNMENT=2
|
395
456
|
|
396
457
|
#-----------------------------------
|
397
458
|
#determine if an alphabetic identifier refers to a variable
|
@@ -400,45 +461,50 @@ private
|
|
400
461
|
#if appropriate. adds tok to the
|
401
462
|
#local var table if its a local var being defined for the first time.
|
402
463
|
|
403
|
-
#
|
404
|
-
#
|
405
|
-
|
406
|
-
#
|
407
|
-
#
|
408
|
-
|
409
|
-
def var_or_meth_name(name,lasttok,pos)
|
464
|
+
#in general, operators in ruby are disambuated by the before-but-not-after rule.
|
465
|
+
#an otherwise ambiguous operator is disambiguated by the surrounding whitespace:
|
466
|
+
#whitespace before but not after the 'operator' indicates it is to be considered a
|
467
|
+
#value token instead. otherwise it is a binary operator. (unary (prefix) ops count
|
468
|
+
#as 'values' here.)
|
469
|
+
def var_or_meth_name(name,lasttok,pos,was_after_nonid_op)
|
410
470
|
#look for call site if not a keyword or keyword is function-like
|
411
471
|
#look for and ignore local variable names
|
412
472
|
|
413
473
|
assert String===name
|
414
474
|
|
475
|
+
was_in_lvar_define_state=in_lvar_define_state
|
415
476
|
#maybe_local really means 'maybe local or constant'
|
416
477
|
maybe_local=case name
|
417
|
-
when /[^a-z_0-9]$/i
|
418
|
-
when /^[a-z_]
|
419
|
-
|
478
|
+
when /[^a-z_0-9]$/i #do nothing
|
479
|
+
when /^[a-z_]/
|
480
|
+
(localvars===name or
|
481
|
+
VARLIKE_KEYWORDS===name or
|
482
|
+
was_in_lvar_define_state
|
483
|
+
) and not lasttok===/^(\.|::)$/
|
484
|
+
when /^[A-Z]/
|
485
|
+
is_const=true
|
486
|
+
not lasttok==='.' #this is the right algorithm for constants...
|
420
487
|
end
|
421
488
|
|
422
489
|
assert(@moretokens.empty?)
|
423
490
|
|
424
491
|
oldlast=@last_operative_token
|
425
492
|
|
426
|
-
tok
|
493
|
+
tok=set_last_token assign_lvar_type!(VarNameToken.new(name,pos))
|
427
494
|
|
428
495
|
oldpos= input_position
|
429
496
|
sawnl=false
|
430
497
|
result=ws_toks=ignored_tokens(true) {|nl| sawnl=true }
|
431
498
|
if sawnl || eof?
|
432
|
-
if
|
433
|
-
if
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
lexerror tok,"not a valid variable name: #{name}"
|
439
|
-
end
|
440
|
-
return result.unshift(tok)
|
499
|
+
if was_in_lvar_define_state
|
500
|
+
if /^[a-z_][a-zA-Z_0-9]*$/===name
|
501
|
+
assert !(lasttok===/^(\.|::)$/)
|
502
|
+
localvars[name]=true
|
503
|
+
else
|
504
|
+
lexerror tok,"not a valid variable name: #{name}"
|
441
505
|
end
|
506
|
+
return result.unshift(tok)
|
507
|
+
elsif maybe_local
|
442
508
|
return result.unshift(tok) #if is_const
|
443
509
|
else
|
444
510
|
return result.unshift(
|
@@ -455,6 +521,8 @@ private
|
|
455
521
|
when ?=; not /^=[>=~]$/===readahead(2)
|
456
522
|
when ?,; comma_in_lvalue_list?
|
457
523
|
when ?); last_context_not_implicit.lhs
|
524
|
+
when ?i; /^in[^a-zA-Z_0-9]/===readahead(3) and
|
525
|
+
ForSMContext===last_context_not_implicit
|
458
526
|
when ?>,?<; /^(.)\1=$/===readahead(3)
|
459
527
|
when ?*,?&; /^(.)\1?=/===readahead(3)
|
460
528
|
when ?|; /^\|\|?=/===readahead(3) or
|
@@ -463,8 +531,8 @@ private
|
|
463
531
|
readahead(2)[1] != ?|
|
464
532
|
when ?%,?/,?-,?+,?^; readahead(2)[1]== ?=
|
465
533
|
end
|
466
|
-
if (assignment_coming && !(lasttok===/^(\.|::)$/) or
|
467
|
-
tok=VarNameToken.new(name,pos)
|
534
|
+
if (assignment_coming && !(lasttok===/^(\.|::)$/) or was_in_lvar_define_state)
|
535
|
+
tok=assign_lvar_type! VarNameToken.new(name,pos)
|
468
536
|
if /[^a-z_0-9]$/i===name
|
469
537
|
lexerror tok,"not a valid variable name: #{name}"
|
470
538
|
elsif /^[a-z_]/===name and !(lasttok===/^(\.|::)$/)
|
@@ -476,44 +544,106 @@ private
|
|
476
544
|
implicit_parens_to_emit=
|
477
545
|
if assignment_coming
|
478
546
|
@parsestack.push AssignmentContext.new(nil) if nc==?% or nc==?/
|
479
|
-
|
547
|
+
IMPLICIT_PARENS_BEFORE_ACCESSOR_ASSIGNMENT
|
480
548
|
else
|
481
549
|
case nc
|
482
550
|
when nil: 2
|
483
|
-
when ?!; readahead(2)
|
551
|
+
when ?!; /^![=~]$/===readahead(2) ? 2 : 1
|
552
|
+
when ?d;
|
553
|
+
if /^do([^a-zA-Z0-9_]|$)/===readahead(3)
|
554
|
+
if maybe_local and expecting_do?
|
555
|
+
ty=VarNameToken
|
556
|
+
0
|
557
|
+
else
|
558
|
+
maybe_local=false
|
559
|
+
2
|
560
|
+
end
|
561
|
+
else
|
562
|
+
1
|
563
|
+
end
|
484
564
|
when NEVERSTARTPARAMLISTFIRST
|
485
565
|
(NEVERSTARTPARAMLISTWORDS===readahead(NEVERSTARTPARAMLISTMAXLEN)) ? 2 : 1
|
486
|
-
when ?",?',?`,?a..?z,?A..?Z,?0..?9,?_,?@,?$,?~; 1
|
566
|
+
when ?",?',?`,?a..?z,?A..?Z,?0..?9,?_,?@,?$,?~; 1 #"
|
487
567
|
when ?{
|
488
568
|
maybe_local=false
|
569
|
+
1
|
570
|
+
=begin
|
489
571
|
x=2
|
490
572
|
x-=1 if /\A(return|break|next)\Z/===name and
|
491
573
|
!(KeywordToken===oldlast and oldlast===/\A(\.|::)\Z/)
|
492
574
|
x
|
575
|
+
=end
|
493
576
|
when ?(;
|
494
|
-
maybe_local=false
|
577
|
+
maybe_local=false
|
578
|
+
lastid=lasttok&&lasttok.ident
|
579
|
+
case lastid
|
580
|
+
when /\A[;(]|do\Z/: was_after_nonid_op=false
|
581
|
+
when '|': was_after_nonid_op=false unless BlockParamListLhsContext===@parsestack.last
|
582
|
+
when '{': was_after_nonid_op=false if BlockContext===@parsestack.last or BeginEndContext===@parsestack.last
|
583
|
+
end if KeywordToken===lasttok
|
584
|
+
was_after_nonid_op=false if NewlineToken===lasttok or lasttok.nil?
|
585
|
+
want_parens=!(ws_toks.empty? or was_after_nonid_op) #or
|
586
|
+
# /^(::|rescue|yield|else|case|when|if|unless|until|while|and|or|&&|\|\||[?:]|\.\.?\.?|=>)$/===lastid or
|
587
|
+
# MethNameToken===lasttok or
|
588
|
+
# RUBYNONSYMOPERATORREX===lastid && /=$/===lastid && '!='!=lastid
|
589
|
+
# )
|
590
|
+
|
591
|
+
#look ahead for closing paren (after some whitespace...)
|
592
|
+
want_parens=false if @file.match? /\A.(?:\s|\v|\#.*\n)*\)/
|
593
|
+
# afterparen=@file.pos
|
594
|
+
# getchar
|
595
|
+
# ignored_tokens(true)
|
596
|
+
# want_parens=false if nextchar==?)
|
597
|
+
# @file.pos=afterparen
|
598
|
+
|
599
|
+
want_parens ? 1 : 0
|
495
600
|
when ?},?],?),?;,?^, ?|, ?>, ?,, ?., ?=; 2
|
496
|
-
when ?+, ?-,
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
601
|
+
when ?+, ?-, ?%, ?/
|
602
|
+
if /^(return|break|next)$/===@last_operative_token.ident and not(
|
603
|
+
KeywordToken===lasttok and /^(.|::)$/===lasttok.ident
|
604
|
+
)
|
605
|
+
1
|
606
|
+
else
|
607
|
+
(ws_toks.empty? || readahead(2)[/^.[#{WHSPLF}]/o]) ? 2 : 3
|
608
|
+
end
|
609
|
+
when ?*, ?&
|
610
|
+
lasttok=@last_operative_token
|
611
|
+
if /^(return|break|next)$/===@last_operative_token.ident and not(
|
612
|
+
KeywordToken===lasttok and /^(.|::)$/===lasttok.ident
|
613
|
+
)
|
614
|
+
1
|
615
|
+
else
|
616
|
+
(ws_toks.empty? || readahead(2)[/^.[#{WHSPLF}*&]/o]) ? 2 : 3
|
617
|
+
end
|
618
|
+
when ?:
|
619
|
+
next2=readahead(2)
|
620
|
+
if /^:(?:[#{WHSPLF}]|(:))$/o===next2 then
|
621
|
+
$1 && !ws_toks.empty? ? 3 : 2
|
622
|
+
else
|
623
|
+
3
|
624
|
+
end
|
625
|
+
when ??; next3=readahead(3);
|
626
|
+
/^\?([#{WHSPLF}]|[a-z_][a-z_0-9])/io===next3 ? 2 : 3
|
627
|
+
# when ?:,??; (readahead(2)[/^.[#{WHSPLF}]/o]) ? 2 : 3
|
628
|
+
when ?<; (!ws_toks.empty? && readahead(4)[/^<<-?["'`a-zA-Z_0-9]/]) ? 3 : 2
|
629
|
+
when ?[; ws_toks.empty?&&!(KeywordToken===oldlast and /^(return|break|next)$/===oldlast.ident) ? 2 : 3
|
502
630
|
when ?\\, ?\s, ?\t, ?\n, ?\r, ?\v, ?#; raise 'failure'
|
503
631
|
else raise "unknown char after ident: #{nc=nextchar ? nc.chr : "<<EOF>>"}"
|
504
632
|
end
|
505
633
|
end
|
506
634
|
|
507
|
-
if is_const and implicit_parens_to_emit==3 then
|
635
|
+
if is_const and implicit_parens_to_emit==3 then #needed?
|
508
636
|
implicit_parens_to_emit=1
|
509
637
|
end
|
510
638
|
|
511
|
-
|
639
|
+
if maybe_local and implicit_parens_to_emit>=2
|
512
640
|
implicit_parens_to_emit=0
|
513
|
-
VarNameToken
|
641
|
+
ty=VarNameToken
|
514
642
|
else
|
515
|
-
MethNameToken
|
516
|
-
end
|
643
|
+
ty||=MethNameToken
|
644
|
+
end
|
645
|
+
tok=assign_lvar_type!(ty.new(name,pos))
|
646
|
+
|
517
647
|
|
518
648
|
case implicit_parens_to_emit
|
519
649
|
when 2;
|
@@ -523,8 +653,17 @@ private
|
|
523
653
|
arr,pass=*param_list_coming_with_2_or_more_params?
|
524
654
|
result.push( *arr )
|
525
655
|
unless pass
|
656
|
+
#only 1 param in list
|
526
657
|
result.unshift ImplicitParamListStartToken.new(oldpos)
|
527
|
-
|
658
|
+
last=result.last
|
659
|
+
last.set_callsite! false if last.respond_to? :callsite? and last.callsite? #KeywordToken===last and last.ident==')'
|
660
|
+
if /^(break|next|return)$/===name and
|
661
|
+
!(KeywordToken===lasttok and /^(.|::)$/===lasttok.ident)
|
662
|
+
ty=KWParamListContextNoParen
|
663
|
+
else
|
664
|
+
ty=ParamListContextNoParen
|
665
|
+
end
|
666
|
+
@parsestack.push ty.new(@linenum)
|
528
667
|
end
|
529
668
|
when 0; #do nothing
|
530
669
|
else raise 'invalid value of implicit_parens_to_emit'
|
@@ -547,11 +686,13 @@ private
|
|
547
686
|
result=[get1token]
|
548
687
|
pass=loop{
|
549
688
|
tok=get1token
|
550
|
-
result<<tok
|
689
|
+
result << tok
|
551
690
|
if @parsestack.size==basesize
|
552
691
|
break false
|
553
692
|
elsif ','==tok.to_s and @parsestack.size==basesize+1
|
554
693
|
break true
|
694
|
+
elsif OperatorToken===tok and /^[&*]$/===tok.ident and tok.unary and @parsestack.size==basesize+1
|
695
|
+
break true
|
555
696
|
elsif EoiToken===tok
|
556
697
|
lexerror tok, "unexpected eof in parameter list"
|
557
698
|
end
|
@@ -560,11 +701,13 @@ private
|
|
560
701
|
end
|
561
702
|
|
562
703
|
#-----------------------------------
|
563
|
-
CONTEXT2ENDTOK={
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
|
704
|
+
CONTEXT2ENDTOK={
|
705
|
+
AssignmentRhsContext=>AssignmentRhsListEndToken,
|
706
|
+
ParamListContextNoParen=>ImplicitParamListEndToken,
|
707
|
+
KWParamListContextNoParen=>ImplicitParamListEndToken,
|
708
|
+
WhenParamListContext=>KwParamListEndToken,
|
709
|
+
RescueSMContext=>KwParamListEndToken
|
710
|
+
}
|
568
711
|
def abort_noparens!(str='')
|
569
712
|
#assert @moretokens.empty?
|
570
713
|
result=[]
|
@@ -576,7 +719,63 @@ private
|
|
576
719
|
return result
|
577
720
|
end
|
578
721
|
|
579
|
-
|
722
|
+
#-----------------------------------
|
723
|
+
CONTEXT2ENDTOK_FOR_RESCUE={
|
724
|
+
AssignmentRhsContext=>AssignmentRhsListEndToken,
|
725
|
+
ParamListContextNoParen=>ImplicitParamListEndToken,
|
726
|
+
KWParamListContextNoParen=>ImplicitParamListEndToken,
|
727
|
+
WhenParamListContext=>KwParamListEndToken,
|
728
|
+
RescueSMContext=>KwParamListEndToken
|
729
|
+
}
|
730
|
+
def abort_noparens_for_rescue!(str='')
|
731
|
+
#assert @moretokens.empty?
|
732
|
+
result=[]
|
733
|
+
ctx=@parsestack.last
|
734
|
+
while klass=CONTEXT2ENDTOK_FOR_RESCUE[ctx.class]
|
735
|
+
break if AssignmentRhsContext===ctx && !ctx.multi_assign?
|
736
|
+
if ParamListContextNoParen===ctx && AssignmentRhsContext===@parsestack[-2]
|
737
|
+
result.push ImplicitParamListEndToken.new(input_position-str.length),
|
738
|
+
AssignmentRhsListEndToken.new(input_position-str.length)
|
739
|
+
@parsestack.pop
|
740
|
+
@parsestack.pop
|
741
|
+
break
|
742
|
+
end
|
743
|
+
result << klass.new(input_position-str.length) #unless AssignmentRhsContext===ctx and !ctx.multi_assign?
|
744
|
+
break if RescueSMContext===ctx #why is this here?
|
745
|
+
@parsestack.pop
|
746
|
+
ctx=@parsestack.last
|
747
|
+
end
|
748
|
+
return result
|
749
|
+
end
|
750
|
+
|
751
|
+
#-----------------------------------
|
752
|
+
CONTEXT2ENDTOK_FOR_DO={
|
753
|
+
AssignmentRhsContext=>AssignmentRhsListEndToken,
|
754
|
+
ParamListContextNoParen=>ImplicitParamListEndToken,
|
755
|
+
ExpectDoOrNlContext=>1,
|
756
|
+
#WhenParamListContext=>KwParamListEndToken,
|
757
|
+
#RescueSMContext=>KwParamListEndToken
|
758
|
+
}
|
759
|
+
def abort_noparens_for_do!(str='')
|
760
|
+
#assert @moretokens.empty?
|
761
|
+
result=[]
|
762
|
+
while klass=CONTEXT2ENDTOK_FOR_DO[@parsestack.last.class]
|
763
|
+
break if klass==1
|
764
|
+
result << klass.new(input_position-str.length)
|
765
|
+
@parsestack.pop
|
766
|
+
end
|
767
|
+
return result
|
768
|
+
end
|
769
|
+
|
770
|
+
#-----------------------------------
|
771
|
+
def expecting_do?
|
772
|
+
@parsestack.reverse_each{|ctx|
|
773
|
+
next if AssignmentRhsContext===ctx
|
774
|
+
return !!CONTEXT2ENDTOK_FOR_DO[ctx.class]
|
775
|
+
}
|
776
|
+
return false
|
777
|
+
end
|
778
|
+
|
580
779
|
#-----------------------------------
|
581
780
|
def abort_1_noparen!(offs=0)
|
582
781
|
assert @moretokens.empty?
|
@@ -585,12 +784,12 @@ if false #no longer used
|
|
585
784
|
@parsestack.pop
|
586
785
|
result << AssignmentRhsListEndToken.new(input_position-offs)
|
587
786
|
end
|
588
|
-
ParamListContextNoParen===@parsestack.last or lexerror huh,'{} with no matching callsite'
|
787
|
+
if ParamListContextNoParen===@parsestack.last #or lexerror huh,'{} with no matching callsite'
|
589
788
|
@parsestack.pop
|
590
789
|
result << ImplicitParamListEndToken.new(input_position-offs)
|
790
|
+
end
|
591
791
|
return result
|
592
792
|
end
|
593
|
-
end
|
594
793
|
|
595
794
|
#-----------------------------------
|
596
795
|
#parse keywords now, to prevent confusion over bare symbols
|
@@ -598,6 +797,7 @@ end
|
|
598
797
|
#if arg is not a keyword, the block is called
|
599
798
|
def parse_keywords(str,offset)
|
600
799
|
assert @moretokens.empty?
|
800
|
+
assert !(KeywordToken===@last_operative_token and /A(.|::|def)\Z/===@last_operative_token.ident)
|
601
801
|
result=[KeywordToken.new(str,offset)]
|
602
802
|
|
603
803
|
case str
|
@@ -619,11 +819,15 @@ end
|
|
619
819
|
/^(do)$/===start and localvars.end_block
|
620
820
|
/^(class|module|def)$/===start and @localvars_stack.pop
|
621
821
|
|
622
|
-
when "
|
822
|
+
when "module"
|
623
823
|
result.first.has_end!
|
624
824
|
@parsestack.push WantsEndContext.new(str,@linenum)
|
625
825
|
@localvars_stack.push SymbolTable.new
|
626
826
|
|
827
|
+
when "class"
|
828
|
+
result.first.has_end!
|
829
|
+
@parsestack.push ClassContext.new(str,@linenum)
|
830
|
+
|
627
831
|
when "if","unless" #could be infix form without end
|
628
832
|
if after_nonid_op?{false} #prefix form
|
629
833
|
result.first.has_end!
|
@@ -653,10 +857,11 @@ end
|
|
653
857
|
#expect_do_or_end_or_nl! str #handled by ForSMContext now
|
654
858
|
@parsestack.push ForSMContext.new(@linenum)
|
655
859
|
when "do"
|
656
|
-
result.unshift(*
|
860
|
+
result.unshift(*abort_noparens_for_do!(str))
|
657
861
|
if ExpectDoOrNlContext===@parsestack.last
|
658
862
|
@parsestack.pop
|
659
863
|
assert WantsEndContext===@parsestack.last
|
864
|
+
result.last.as=";"
|
660
865
|
else
|
661
866
|
result.last.has_end!
|
662
867
|
@parsestack.push WantsEndContext.new(str,@linenum)
|
@@ -665,10 +870,10 @@ end
|
|
665
870
|
end
|
666
871
|
when "def"
|
667
872
|
result.first.has_end!
|
668
|
-
@parsestack.push
|
669
|
-
|
873
|
+
@parsestack.push ctx=DefContext.new(@linenum)
|
874
|
+
ctx.state=:saw_def
|
670
875
|
safe_recurse { |aa|
|
671
|
-
|
876
|
+
set_last_token KeywordToken.new "def" #hack
|
672
877
|
result.concat ignored_tokens
|
673
878
|
|
674
879
|
#read an expr like a.b.c or a::b::c
|
@@ -683,10 +888,11 @@ end
|
|
683
888
|
when/^\)$/.token_pat then parencount-=1
|
684
889
|
end
|
685
890
|
EoiToken===tok and lexerror tok, "eof in def header"
|
686
|
-
result<<tok
|
891
|
+
result << tok
|
687
892
|
end until parencount==0 #@parsestack.size==old_size
|
688
|
-
|
689
|
-
|
893
|
+
@localvars_stack.push SymbolTable.new
|
894
|
+
else #no parentheses, all tail
|
895
|
+
set_last_token KeywordToken.new "." #hack hack
|
690
896
|
tokindex=result.size
|
691
897
|
result << tok=symbol(false,false)
|
692
898
|
name=tok.to_s
|
@@ -700,25 +906,30 @@ end
|
|
700
906
|
when /^[a-z_]/; localvars===name
|
701
907
|
when /^[A-Z]/; is_const=true #this is the right algorithm for constants...
|
702
908
|
end
|
703
|
-
|
704
|
-
|
705
|
-
|
909
|
+
result.push( *ignored_tokens(false,false) )
|
910
|
+
nc=nextchar
|
911
|
+
if !ty and maybe_local
|
706
912
|
if nc==?: || nc==?.
|
707
913
|
ty=VarNameToken
|
708
914
|
end
|
709
915
|
end
|
710
|
-
|
711
|
-
|
712
|
-
|
713
|
-
|
714
|
-
|
916
|
+
if ty.nil? or (ty==KeywordToken and nc!=?: and nc!=?.)
|
917
|
+
ty=MethNameToken
|
918
|
+
if nc != ?(
|
919
|
+
endofs=tok.offset+tok.to_s.length
|
920
|
+
newtok=ImplicitParamListStartToken.new(endofs)
|
921
|
+
result.insert tokindex+1, newtok
|
922
|
+
end
|
715
923
|
end
|
716
924
|
|
717
925
|
assert result[tokindex].equal?(tok)
|
718
|
-
|
926
|
+
var=assign_lvar_type! ty.new(tok.to_s,tok.offset)
|
927
|
+
@localvars_stack.push SymbolTable.new
|
928
|
+
var.in_def=true if inside_method_def? and var.respond_to? :in_def=
|
929
|
+
result[tokindex]=var
|
719
930
|
|
720
931
|
|
721
|
-
#if a.b.c.d is seen, a, b
|
932
|
+
#if a.b.c.d is seen, a, b and c
|
722
933
|
#should be considered maybe varname instead of methnames.
|
723
934
|
#the last (d in the example) is always considered a methname;
|
724
935
|
#it's what's being defined.
|
@@ -727,8 +938,7 @@ end
|
|
727
938
|
#a could even be a keyword (eg self or block_given?).
|
728
939
|
end
|
729
940
|
#read tail: .b.c.d etc
|
730
|
-
result.reverse_each{|res| break
|
731
|
-
###@last_operative_token=result.last #naive
|
941
|
+
result.reverse_each{|res| break set_last_token res unless StillIgnoreToken===res}
|
732
942
|
assert !(IgnoreToken===@last_operative_token)
|
733
943
|
state=:expect_op
|
734
944
|
@in_def_name=true
|
@@ -737,12 +947,22 @@ end
|
|
737
947
|
#look for start of parameter list
|
738
948
|
nc=(@moretokens.empty? ? nextchar.chr : @moretokens.first.to_s[0,1])
|
739
949
|
if state==:expect_op and /^[a-z_(&*]/i===nc
|
740
|
-
|
950
|
+
ctx.state=:def_param_list
|
951
|
+
list,listend=def_param_list
|
952
|
+
result.concat list
|
953
|
+
end_index=result.index(listend)
|
954
|
+
ofs=listend.offset
|
955
|
+
if endofs
|
956
|
+
result.insert end_index,ImplicitParamListEndToken.new(ofs)
|
957
|
+
else
|
958
|
+
ofs+=listend.to_s.size
|
959
|
+
end
|
960
|
+
result.insert end_index+1,EndDefHeaderToken.new(ofs)
|
741
961
|
break
|
742
962
|
end
|
743
963
|
|
744
964
|
tok=get1token
|
745
|
-
result<<tok
|
965
|
+
result<< tok
|
746
966
|
case tok
|
747
967
|
when EoiToken
|
748
968
|
lexerror tok,'unexpected eof in def header'
|
@@ -752,9 +972,18 @@ end
|
|
752
972
|
state=:expect_op
|
753
973
|
when /^(\.|::)$/.token_pat
|
754
974
|
lexerror tok,'expected ident' unless state==:expect_op
|
975
|
+
if endofs
|
976
|
+
result.insert -2, ImplicitParamListEndToken.new(endofs)
|
977
|
+
endofs=nil
|
978
|
+
end
|
755
979
|
state=:expect_name
|
756
980
|
when /^(;|end)$/.token_pat, NewlineToken #are we done with def name?
|
981
|
+
ctx.state=:def_body
|
757
982
|
state==:expect_op or lexerror tok,'expected identifier'
|
983
|
+
if endofs
|
984
|
+
result.insert -2,ImplicitParamListEndToken.new(tok.offset)
|
985
|
+
end
|
986
|
+
result.insert -2, EndDefHeaderToken.new(tok.offset)
|
758
987
|
break
|
759
988
|
else
|
760
989
|
lexerror(tok, "bizarre token in def name: " +
|
@@ -765,24 +994,34 @@ end
|
|
765
994
|
}
|
766
995
|
when "alias"
|
767
996
|
safe_recurse { |a|
|
768
|
-
|
997
|
+
set_last_token KeywordToken.new "alias" #hack
|
769
998
|
result.concat ignored_tokens
|
770
999
|
res=symbol(eat_next_if(?:),false)
|
771
|
-
|
772
|
-
|
773
|
-
|
774
|
-
|
775
|
-
|
1000
|
+
unless res
|
1001
|
+
lexerror(result.first,"bad symbol in alias")
|
1002
|
+
else
|
1003
|
+
res.ident[0]==?$ and res=VarNameToken.new(res.ident,res.offset)
|
1004
|
+
result<< res
|
1005
|
+
set_last_token KeywordToken.new "alias" #hack
|
1006
|
+
result.concat ignored_tokens
|
1007
|
+
res=symbol(eat_next_if(?:),false)
|
1008
|
+
unless res
|
1009
|
+
lexerror(result.first,"bad symbol in alias")
|
1010
|
+
else
|
1011
|
+
res.ident[0]==?$ and res=VarNameToken.new(res.ident,res.offset)
|
1012
|
+
result<< res
|
1013
|
+
end
|
1014
|
+
end
|
776
1015
|
}
|
777
1016
|
when "undef"
|
778
1017
|
safe_recurse { |a|
|
779
1018
|
loop do
|
780
|
-
|
1019
|
+
set_last_token KeywordToken.new "," #hack
|
781
1020
|
result.concat ignored_tokens
|
782
1021
|
tok=symbol(eat_next_if(?:),false)
|
783
1022
|
tok or lexerror(result.first,"bad symbol in undef")
|
784
1023
|
result<< tok
|
785
|
-
|
1024
|
+
set_last_token tok
|
786
1025
|
assert !(IgnoreToken===@last_operative_token)
|
787
1026
|
|
788
1027
|
sawnl=false
|
@@ -809,13 +1048,13 @@ end
|
|
809
1048
|
unless after_nonid_op? {false}
|
810
1049
|
#rescue needs to be treated differently when in operator context...
|
811
1050
|
#i think no RescueSMContext should be pushed on the stack...
|
812
|
-
#plus, the rescue token should be marked as infix
|
813
|
-
result.
|
1051
|
+
result.first.set_infix! #plus, the rescue token should be marked as infix
|
1052
|
+
result.unshift(*abort_noparens_for_rescue!(str))
|
814
1053
|
else
|
815
1054
|
result.push KwParamListStartToken.new(offset+str.length)
|
816
1055
|
#corresponding EndToken emitted by abort_noparens! on leaving rescue context
|
817
|
-
result.unshift(*abort_noparens!(str))
|
818
1056
|
@parsestack.push RescueSMContext.new(@linenum)
|
1057
|
+
result.unshift(*abort_noparens!(str))
|
819
1058
|
end
|
820
1059
|
|
821
1060
|
when "then"
|
@@ -831,16 +1070,43 @@ end
|
|
831
1070
|
result.unshift(*abort_noparens!(str))
|
832
1071
|
|
833
1072
|
when /\A(return|break|next)\Z/
|
834
|
-
|
835
|
-
|
1073
|
+
fail if KeywordToken===@last_operative_token and @last_operative_token===/\A(\.|::)\Z/
|
1074
|
+
tok=KeywordToken.new(str,offset)
|
1075
|
+
result=yield tok
|
1076
|
+
result[0]=tok
|
1077
|
+
tok.has_no_block!
|
1078
|
+
|
1079
|
+
|
1080
|
+
when 'END'
|
1081
|
+
#END could be treated, lexically, just as if it is an
|
1082
|
+
#ordinary method, except that local vars created in
|
1083
|
+
#END blocks are visible to subsequent code. (Why??)
|
1084
|
+
#That difference forces a custom parsing.
|
1085
|
+
if @last_operative_token===/^(\.|::)$/
|
1086
|
+
result=yield nil #should pass a keyword token here
|
1087
|
+
else
|
1088
|
+
safe_recurse{
|
1089
|
+
old=result.first
|
1090
|
+
result=[
|
1091
|
+
MethNameToken.new(old.ident,old.offset),
|
1092
|
+
ImplicitParamListStartToken.new(input_position),
|
1093
|
+
ImplicitParamListEndToken.new(input_position),
|
1094
|
+
*ignored_tokens
|
1095
|
+
]
|
1096
|
+
getchar=='{' or lexerror(result.first,"expected { after #{str}")
|
1097
|
+
result.push KeywordToken.new('{',input_position-1)
|
1098
|
+
result.last.set_infix!
|
1099
|
+
@parsestack.push BeginEndContext.new(str,offset)
|
1100
|
+
}
|
1101
|
+
end
|
836
1102
|
|
837
1103
|
when FUNCLIKE_KEYWORDS
|
838
|
-
result=yield
|
1104
|
+
result=yield nil #should be a keyword token
|
839
1105
|
|
840
1106
|
when RUBYKEYWORDS
|
841
1107
|
#do nothing
|
842
1108
|
|
843
|
-
else result=yield
|
1109
|
+
else result=yield nil
|
844
1110
|
|
845
1111
|
end
|
846
1112
|
|
@@ -881,11 +1147,11 @@ end
|
|
881
1147
|
#-----------------------------------
|
882
1148
|
def block_param_list_lookahead
|
883
1149
|
safe_recurse{ |la|
|
884
|
-
|
1150
|
+
set_last_token KeywordToken.new ';'
|
885
1151
|
a=ignored_tokens
|
886
1152
|
|
887
1153
|
if eat_next_if(?|)
|
888
|
-
a<<KeywordToken.new("|", input_position-1)
|
1154
|
+
a<< KeywordToken.new("|", input_position-1)
|
889
1155
|
if true
|
890
1156
|
@parsestack.push mycontext=BlockParamListLhsContext.new(@linenum)
|
891
1157
|
nextchar==?| and a.push NoWsToken.new(input_position)
|
@@ -909,7 +1175,7 @@ else
|
|
909
1175
|
end
|
910
1176
|
|
911
1177
|
tok==='|' and parsestack_lastnonassign_is?(mycontext) and break
|
912
|
-
a<<tok
|
1178
|
+
a<< tok
|
913
1179
|
end
|
914
1180
|
assert@defining_lvar || AssignmentRhsContext===@parsestack.last
|
915
1181
|
@defining_lvar=false
|
@@ -920,14 +1186,14 @@ else
|
|
920
1186
|
@parsestack.last.object_id==mycontext.object_id or raise 'expected my BlockParamListLhsContext atop @parsestack'
|
921
1187
|
@parsestack.pop
|
922
1188
|
|
923
|
-
a<<KeywordToken.new('|',tok.offset)
|
1189
|
+
a<< KeywordToken.new('|',tok.offset)
|
924
1190
|
@moretokens.empty? or
|
925
1191
|
fixme %#moretokens might be set from get1token call above...might be bad#
|
926
1192
|
end
|
927
1193
|
end
|
928
1194
|
end
|
929
1195
|
|
930
|
-
|
1196
|
+
set_last_token KeywordToken.new ';'
|
931
1197
|
#a.concat ignored_tokens
|
932
1198
|
|
933
1199
|
#assert @last_operative_token===';'
|
@@ -948,6 +1214,7 @@ end
|
|
948
1214
|
@in_def_name=false
|
949
1215
|
result=[]
|
950
1216
|
normal_comma_level=old_parsestack_size=@parsestack.size
|
1217
|
+
listend=nil
|
951
1218
|
safe_recurse { |a|
|
952
1219
|
assert(@moretokens.empty?)
|
953
1220
|
assert((not IgnoreToken===@moretokens[0]))
|
@@ -972,18 +1239,22 @@ end
|
|
972
1239
|
alias === call
|
973
1240
|
end
|
974
1241
|
|
975
|
-
|
1242
|
+
set_last_token KeywordToken.new ',' #hack
|
976
1243
|
#read local parameter names
|
1244
|
+
nextvar=nil
|
977
1245
|
loop do
|
978
1246
|
expect_name=(@last_operative_token===',' and
|
979
1247
|
normal_comma_level==@parsestack.size)
|
980
1248
|
expect_name and @defining_lvar||=true
|
981
1249
|
result << tok=get1token
|
982
|
-
lexerror
|
1250
|
+
break lexerror(tok, "unexpected eof in def header") if EoiToken===tok
|
983
1251
|
|
984
1252
|
#break if at end of param list
|
985
|
-
|
986
|
-
|
1253
|
+
if endingblock===tok and old_parsestack_size>=@parsestack.size
|
1254
|
+
nextvar and localvars[nextvar]=true #add nextvar to local vars
|
1255
|
+
listend=tok
|
1256
|
+
break
|
1257
|
+
end
|
987
1258
|
|
988
1259
|
#next token is a local var name
|
989
1260
|
#(or the one after that if unary ops present)
|
@@ -992,33 +1263,40 @@ end
|
|
992
1263
|
case tok
|
993
1264
|
when IgnoreToken #, /^[A-Z]/ #do nothing
|
994
1265
|
when /^,$/.token_pat #hack
|
995
|
-
|
996
|
-
|
1266
|
+
|
997
1267
|
when VarNameToken
|
998
1268
|
assert@defining_lvar
|
999
1269
|
@defining_lvar=false
|
1000
1270
|
assert((not @last_operative_token===','))
|
1271
|
+
# assert !nextvar
|
1272
|
+
nextvar=tok.ident
|
1273
|
+
localvars[nextvar]=false #remove nextvar from list of local vars for now
|
1001
1274
|
when /^[&*]$/.token_pat #unary form...
|
1002
1275
|
#a NoWsToken is also expected... read it now
|
1003
1276
|
result.concat maybe_no_ws_token #not needed?
|
1004
|
-
|
1277
|
+
set_last_token KeywordToken.new ','
|
1005
1278
|
else
|
1006
1279
|
lexerror tok,"unfamiliar var name '#{tok}'"
|
1007
1280
|
end
|
1008
|
-
elsif /^,$/.token_pat===tok
|
1009
|
-
|
1010
|
-
|
1011
|
-
|
1012
|
-
|
1013
|
-
|
1281
|
+
elsif /^,$/.token_pat===tok
|
1282
|
+
if normal_comma_level+1==@parsestack.size and
|
1283
|
+
AssignmentRhsContext===@parsestack.last
|
1284
|
+
#seeing comma here should end implicit rhs started within the param list
|
1285
|
+
result << AssignmentRhsListEndToken.new(tok.offset)
|
1286
|
+
@parsestack.pop
|
1287
|
+
end
|
1288
|
+
if nextvar and normal_comma_level==@parsestack.size
|
1289
|
+
localvars[nextvar]=true #now, finally add nextvar back to local vars
|
1290
|
+
nextvar
|
1291
|
+
end
|
1014
1292
|
end
|
1015
1293
|
end
|
1016
1294
|
|
1017
1295
|
@defining_lvar=false
|
1018
|
-
|
1296
|
+
@parsestack.last.see self,:semi
|
1019
1297
|
|
1020
1298
|
assert(@parsestack.size <= old_parsestack_size)
|
1021
|
-
assert(endingblock[tok])
|
1299
|
+
assert(endingblock[tok] || ErrorToken===tok)
|
1022
1300
|
|
1023
1301
|
#hack: force next token to look like start of a
|
1024
1302
|
#new stmt, if the last ignored_tokens
|
@@ -1026,42 +1304,54 @@ end
|
|
1026
1304
|
#(just in case the next token parsed
|
1027
1305
|
#happens to call quote_expected? or after_nonid_op)
|
1028
1306
|
result.concat ignored_tokens
|
1029
|
-
if nextchar.chr[/[iuw\/<|>+\-*&%?:]/] and
|
1030
|
-
!(NewlineToken===@last_operative_token) and
|
1031
|
-
!(/^(end|;)$/===@last_operative_token)
|
1032
|
-
|
1307
|
+
# if !eof? and nextchar.chr[/[iuw\/<|>+\-*&%?:({]/] and
|
1308
|
+
# !(NewlineToken===@last_operative_token) and
|
1309
|
+
# !(/^(end|;)$/===@last_operative_token)
|
1310
|
+
#result<<EndDefHeaderToken.new(result.last.offset+result.last.to_s.size)
|
1311
|
+
set_last_token KeywordToken.new ';'
|
1033
1312
|
result<< get1token
|
1034
|
-
end
|
1313
|
+
# end
|
1035
1314
|
}
|
1036
1315
|
|
1037
|
-
return result
|
1316
|
+
return result,listend
|
1038
1317
|
end
|
1039
1318
|
|
1040
1319
|
|
1041
1320
|
#-----------------------------------
|
1042
1321
|
#handle % in ruby code. is it part of fancy quote or a modulo operator?
|
1043
1322
|
def percent(ch)
|
1044
|
-
|
1323
|
+
if AssignmentContext===@parsestack.last
|
1324
|
+
@parsestack.pop
|
1325
|
+
op=true
|
1326
|
+
end
|
1327
|
+
|
1328
|
+
if !op and quote_expected?(ch) ||
|
1329
|
+
(@last_operative_token===/^(return|next|break)$/ and KeywordToken===@last_operative_token)
|
1045
1330
|
fancy_quote ch
|
1046
|
-
|
1331
|
+
else
|
1047
1332
|
biop ch
|
1048
|
-
|
1333
|
+
end
|
1049
1334
|
end
|
1050
1335
|
|
1051
1336
|
#-----------------------------------
|
1052
1337
|
#handle * & in ruby code. is unary or binary operator?
|
1053
1338
|
def star_or_amp(ch)
|
1054
|
-
|
1055
|
-
|
1056
|
-
|
1057
|
-
|
1058
|
-
|
1059
|
-
|
1060
|
-
|
1061
|
-
|
1062
|
-
|
1063
|
-
|
1064
|
-
|
1339
|
+
assert('*&'[ch])
|
1340
|
+
want_unary=unary_op_expected?(ch) ||
|
1341
|
+
(@last_operative_token===/^(return|next|break)$/ and KeywordToken===@last_operative_token)
|
1342
|
+
result=quadriop(ch)
|
1343
|
+
if want_unary
|
1344
|
+
#readahead(2)[1..1][/[\s\v#\\]/] or #not needed?
|
1345
|
+
assert OperatorToken===result
|
1346
|
+
result.unary=true #result should distinguish unary+binary *&
|
1347
|
+
WHSPLF[nextchar.chr] or
|
1348
|
+
@moretokens << NoWsToken.new(input_position)
|
1349
|
+
comma_in_lvalue_list?
|
1350
|
+
if ch=='*'
|
1351
|
+
@parsestack.last.see self, :splat
|
1352
|
+
end
|
1353
|
+
end
|
1354
|
+
result
|
1065
1355
|
end
|
1066
1356
|
|
1067
1357
|
#-----------------------------------
|
@@ -1079,15 +1369,23 @@ end
|
|
1079
1369
|
#-----------------------------------
|
1080
1370
|
def regex_or_div(ch)
|
1081
1371
|
#space after slash always means / operator, rather than regex start
|
1082
|
-
|
1083
|
-
|
1084
|
-
|
1085
|
-
|
1086
|
-
|
1087
|
-
|
1088
|
-
|
1089
|
-
|
1090
|
-
|
1372
|
+
#= after slash always means /= operator, rather than regex start
|
1373
|
+
if AssignmentContext===@parsestack.last
|
1374
|
+
@parsestack.pop
|
1375
|
+
op=true
|
1376
|
+
end
|
1377
|
+
|
1378
|
+
if !op and after_nonid_op?{
|
1379
|
+
!is_var_name? and WHSPLF[prevchar] and !readahead(2)[%r{^/[\s\v=]}]
|
1380
|
+
} || (KeywordToken===@last_token_maybe_implicit and @last_token_maybe_implicit.ident=="(")
|
1381
|
+
return regex(ch)
|
1382
|
+
else #/ is operator
|
1383
|
+
result=getchar
|
1384
|
+
if eat_next_if(?=)
|
1385
|
+
result << '='
|
1386
|
+
end
|
1387
|
+
return(operator_or_methname_token result)
|
1388
|
+
end
|
1091
1389
|
end
|
1092
1390
|
|
1093
1391
|
#-----------------------------------
|
@@ -1101,8 +1399,8 @@ end
|
|
1101
1399
|
s=tok.to_s
|
1102
1400
|
case s
|
1103
1401
|
when /[^a-z_0-9]$/i; false
|
1104
|
-
when /^[a-z_]/; localvars===s or VARLIKE_KEYWORDS===s
|
1105
|
-
when /^[A-
|
1402
|
+
# when /^[a-z_]/; localvars===s or VARLIKE_KEYWORDS===s
|
1403
|
+
when /^[A-Z_]/i; VarNameToken===tok
|
1106
1404
|
when /^[@$<]/; true
|
1107
1405
|
else raise "not var or method name: #{s}"
|
1108
1406
|
end
|
@@ -1139,18 +1437,22 @@ end
|
|
1139
1437
|
unless eat_next_if(?:)
|
1140
1438
|
#cancel implicit contexts...
|
1141
1439
|
@moretokens.push(*abort_noparens!(':'))
|
1440
|
+
@moretokens.push KeywordToken.new(':',startpos)
|
1142
1441
|
|
1143
|
-
|
1144
|
-
@parsestack.
|
1145
|
-
|
1146
|
-
TernaryContext===@parsestack.last and @parsestack.pop #should be in the context's see handler
|
1147
|
-
|
1148
|
-
if ExpectDoOrNlContext===@parsestack.last #should be in the context's see handler
|
1442
|
+
case @parsestack.last
|
1443
|
+
when TernaryContext: @parsestack.pop #should be in the context's see handler
|
1444
|
+
when ExpectDoOrNlContext: #should be in the context's see handler
|
1149
1445
|
@parsestack.pop
|
1150
1446
|
assert @parsestack.last.starter[/^(while|until|for)$/]
|
1447
|
+
@moretokens.last.as=";"
|
1448
|
+
when RescueSMContext:
|
1449
|
+
@moretokens.last.as=";"
|
1450
|
+
else @moretokens.last.as="then"
|
1151
1451
|
end
|
1152
1452
|
|
1153
|
-
|
1453
|
+
#end ternary context, if any
|
1454
|
+
@parsestack.last.see self,:colon
|
1455
|
+
|
1154
1456
|
return @moretokens.shift
|
1155
1457
|
end
|
1156
1458
|
|
@@ -1182,9 +1484,15 @@ end
|
|
1182
1484
|
opmatches=readahead(3)[RUBYSYMOPERATORREX]
|
1183
1485
|
result= opmatches ? read(opmatches.size) :
|
1184
1486
|
case nc=nextchar
|
1185
|
-
when ?"
|
1186
|
-
|
1187
|
-
|
1487
|
+
when ?" #"
|
1488
|
+
assert notbare
|
1489
|
+
open=':"'; close='"'
|
1490
|
+
double_quote('"')
|
1491
|
+
when ?' #'
|
1492
|
+
assert notbare
|
1493
|
+
open=":'"; close="'"
|
1494
|
+
single_quote("'")
|
1495
|
+
when ?` then read(1) #`
|
1188
1496
|
when ?@ then at_identifier.to_s
|
1189
1497
|
when ?$ then dollar_identifier.to_s
|
1190
1498
|
when ?_,?a..?z then identifier_as_string(?:)
|
@@ -1197,7 +1505,12 @@ end
|
|
1197
1505
|
result
|
1198
1506
|
else error= "unexpected char starting symbol: #{nc.chr}"
|
1199
1507
|
end
|
1200
|
-
|
1508
|
+
result= lexerror(klass.new(result,start,notbare ? ':' : ''),error)
|
1509
|
+
if open
|
1510
|
+
result.open=open
|
1511
|
+
result.close=close
|
1512
|
+
end
|
1513
|
+
return result
|
1201
1514
|
end
|
1202
1515
|
|
1203
1516
|
def merge_assignment_op_in_setter_callsites?
|
@@ -1211,12 +1524,12 @@ end
|
|
1211
1524
|
opmatches=readahead(3)[RUBYSYMOPERATORREX]
|
1212
1525
|
return [opmatches ? read(opmatches.size) :
|
1213
1526
|
case nc=nextchar
|
1214
|
-
when ?` then read(1)
|
1527
|
+
when ?` then read(1) #`
|
1215
1528
|
when ?_,?a..?z,?A..?Z then
|
1216
1529
|
context=merge_assignment_op_in_setter_callsites? ? ?: : nc
|
1217
1530
|
identifier_as_string(context)
|
1218
1531
|
else
|
1219
|
-
|
1532
|
+
set_last_token KeywordToken.new(';')
|
1220
1533
|
lexerror(tok_to_errify,"unexpected char starting callsite symbol: #{nc.chr}, tok=#{tok_to_errify.inspect}")
|
1221
1534
|
nil
|
1222
1535
|
end, start
|
@@ -1233,20 +1546,63 @@ end
|
|
1233
1546
|
ender=til_charset(/[#{quote}]/)
|
1234
1547
|
(quote==getchar) or
|
1235
1548
|
return lexerror(HerePlaceholderToken.new( dash, quote, ender ), "mismatched quotes in here doc")
|
1549
|
+
quote_real=true
|
1236
1550
|
else
|
1237
1551
|
quote='"'
|
1238
1552
|
ender=til_charset(/[^a-zA-Z0-9_]/)
|
1239
1553
|
ender.length >= 1 or
|
1240
|
-
return lexerror(HerePlaceholderToken.new( dash, quote, ender ), "invalid here header")
|
1554
|
+
return lexerror(HerePlaceholderToken.new( dash, quote, ender, nil ), "invalid here header")
|
1241
1555
|
end
|
1242
1556
|
|
1243
|
-
res= HerePlaceholderToken.new( dash, quote, ender )
|
1557
|
+
res= HerePlaceholderToken.new( dash, quote, ender, quote_real )
|
1558
|
+
if true
|
1559
|
+
res.open=["<<",dash,quote,ender,quote].to_s
|
1560
|
+
procrastinated=til_charset(/[\n]/)#+readnl
|
1561
|
+
unless @base_file
|
1562
|
+
@base_file=@file
|
1563
|
+
@file=Sequence::List.new([@file])
|
1564
|
+
@file.pos=@base_file.pos
|
1565
|
+
end
|
1566
|
+
#actually delete procrastinated from input
|
1567
|
+
@file.delete(input_position_raw-procrastinated.size...input_position_raw)
|
1568
|
+
|
1569
|
+
nl=readnl or return lexerror(res, "here header without body (at eof)")
|
1570
|
+
|
1571
|
+
@moretokens<< res
|
1572
|
+
bodystart=input_position
|
1573
|
+
@offset_adjust = @min_offset_adjust+procrastinated.size
|
1574
|
+
#was: @offset_adjust += procrastinated.size
|
1575
|
+
body=here_body(res)
|
1576
|
+
res.close=body.close
|
1577
|
+
@offset_adjust = @min_offset_adjust
|
1578
|
+
#was: @offset_adjust -= procrastinated.size
|
1579
|
+
bodysize=input_position-bodystart
|
1580
|
+
|
1581
|
+
#one or two already read characters are overwritten here,
|
1582
|
+
#in order to keep offsets correct in the long term
|
1583
|
+
#(at present, offsets and line numbers between
|
1584
|
+
#here header and its body will be wrong. but they should re-sync thereafter.)
|
1585
|
+
newpos=input_position_raw-nl.size
|
1586
|
+
#unless procrastinated.empty?
|
1587
|
+
@file.modify(newpos,nl.size,procrastinated+nl) #vomit procrastinated text back onto input
|
1588
|
+
#end
|
1589
|
+
input_position_set newpos
|
1590
|
+
|
1591
|
+
#line numbers would be wrong within the procrastinated section
|
1592
|
+
@linenum-=1
|
1593
|
+
|
1594
|
+
#be nice to get the here body token at the right place in input, too...
|
1595
|
+
@pending_here_bodies<< body
|
1596
|
+
@offset_adjust-=bodysize#+nl.size
|
1597
|
+
|
1598
|
+
return @moretokens.shift
|
1599
|
+
else
|
1244
1600
|
@incomplete_here_tokens.push res
|
1245
1601
|
|
1246
1602
|
#hack: normally this should just be in get1token
|
1247
1603
|
#this fixup is necessary because the call the get1token below
|
1248
1604
|
#makes a recursion.
|
1249
|
-
|
1605
|
+
set_last_token res
|
1250
1606
|
|
1251
1607
|
safe_recurse { |a|
|
1252
1608
|
assert(a.object_id==@moretokens.object_id)
|
@@ -1269,7 +1625,7 @@ end
|
|
1269
1625
|
|
1270
1626
|
tok=get1token
|
1271
1627
|
assert(a.equal?( @moretokens))
|
1272
|
-
toks<<tok
|
1628
|
+
toks<< tok
|
1273
1629
|
EoiToken===tok and lexerror tok, "here body expected before eof"
|
1274
1630
|
end while res.unsafe_to_use
|
1275
1631
|
assert(a.equal?( @moretokens))
|
@@ -1281,13 +1637,14 @@ end
|
|
1281
1637
|
#the action continues in newline, where
|
1282
1638
|
#the rest of the here token is read after a
|
1283
1639
|
#newline has been seen and res.affix is eventually called
|
1640
|
+
end
|
1284
1641
|
end
|
1285
1642
|
|
1286
1643
|
#-----------------------------------
|
1287
1644
|
def lessthan(ch) #match quadriop('<') or here doc or spaceship op
|
1288
1645
|
case readahead(3)
|
1289
|
-
when /^<<['"`\-a-z0-9_]$/i
|
1290
|
-
if quote_expected?(ch)
|
1646
|
+
when /^<<['"`\-a-z0-9_]$/i #'
|
1647
|
+
if quote_expected?(ch) and not @last_operative_token==='class'
|
1291
1648
|
here_header
|
1292
1649
|
else
|
1293
1650
|
operator_or_methname_token read(2)
|
@@ -1309,101 +1666,231 @@ end
|
|
1309
1666
|
error='illegal escape sequence'
|
1310
1667
|
end
|
1311
1668
|
|
1312
|
-
|
1313
|
-
|
1669
|
+
#optimization: when thru with regurgitated text from a here document,
|
1670
|
+
#revert back to original unadorned Sequence instead of staying in the List.
|
1671
|
+
if @base_file and indices=@file.instance_eval{@start_pos} and
|
1672
|
+
(indices[-2]..indices[-1])===@file.pos
|
1673
|
+
@base_file.pos=@file.pos
|
1674
|
+
@file=@base_file
|
1675
|
+
@base_file=nil
|
1676
|
+
result="\n"
|
1677
|
+
end
|
1678
|
+
|
1679
|
+
@offset_adjust=@min_offset_adjust
|
1680
|
+
@moretokens.push *optional_here_bodies
|
1681
|
+
ln=@linenum
|
1682
|
+
@moretokens.push lexerror(EscNlToken.new(@filename,ln-1,result,input_position-result.size), error),
|
1683
|
+
FileAndLineToken.new(@filename,ln,input_position)
|
1684
|
+
|
1685
|
+
start_of_line_directives
|
1314
1686
|
|
1315
|
-
|
1687
|
+
return @moretokens.shift
|
1316
1688
|
end
|
1317
1689
|
|
1318
1690
|
#-----------------------------------
|
1319
1691
|
def optional_here_bodies
|
1320
|
-
|
1692
|
+
result=[]
|
1693
|
+
if true
|
1321
1694
|
#handle here bodies queued up by previous line
|
1322
|
-
|
1695
|
+
pos=input_position
|
1696
|
+
while body=@pending_here_bodies.shift
|
1697
|
+
#body.offset=pos
|
1698
|
+
result.push EscNlToken.new(@filename,nil,"\n",body.offset-1)
|
1699
|
+
result.push FileAndLineToken.new(@filename,body.ident.line,body.offset)
|
1700
|
+
result.push body
|
1701
|
+
#result.push NoWsToken.new @pending_here_bodies.empty? ? input_position : @pending_here_bodies.first
|
1702
|
+
#result.push FileAndLineToken.new(@filename,@linenum,pos) #position and line num are off
|
1703
|
+
body.headtok.line=@linenum-1
|
1704
|
+
end
|
1705
|
+
else
|
1706
|
+
#...(we should be more compatible with dos/mac style newlines...)
|
1323
1707
|
while tofill=@incomplete_here_tokens.shift
|
1708
|
+
result.push(
|
1709
|
+
here_body(tofill),
|
1710
|
+
FileAndLineToken.new(@filename,@linenum,input_position)
|
1711
|
+
)
|
1712
|
+
assert(eof? || "\r\n"[prevchar])
|
1713
|
+
tofill.line=@linenum-1
|
1714
|
+
end
|
1715
|
+
end
|
1716
|
+
return result
|
1717
|
+
end
|
1718
|
+
|
1719
|
+
#-----------------------------------
|
1720
|
+
def here_body(tofill)
|
1721
|
+
close="\n"
|
1324
1722
|
tofill.string.offset= input_position
|
1723
|
+
linecount=1 #for terminator
|
1724
|
+
assert("\n"==prevchar)
|
1325
1725
|
loop {
|
1326
|
-
assert("\
|
1726
|
+
assert("\n"==prevchar)
|
1327
1727
|
|
1328
1728
|
#here body terminator?
|
1329
|
-
oldpos=
|
1729
|
+
oldpos= input_position_raw
|
1330
1730
|
if tofill.dash
|
1331
|
-
til_charset(/[^#{WHSP}]/o)
|
1731
|
+
close+=til_charset(/[^#{WHSP}]/o)
|
1732
|
+
end
|
1733
|
+
break if eof? #this is an error, should be handled better
|
1734
|
+
if read(tofill.ender.size)==tofill.ender
|
1735
|
+
crs=til_charset(/[^\r]/)||''
|
1736
|
+
if nl=readnl
|
1737
|
+
close+=tofill.ender+crs+nl
|
1738
|
+
break
|
1739
|
+
end
|
1332
1740
|
end
|
1333
|
-
break if eof?
|
1334
|
-
break if read(tofill.ender.size)==tofill.ender and readnl
|
1335
1741
|
input_position_set oldpos
|
1336
1742
|
|
1743
|
+
assert("\n"==prevchar)
|
1744
|
+
|
1337
1745
|
if tofill.quote=="'"
|
1338
|
-
line=til_charset(/[\
|
1339
|
-
|
1746
|
+
line=til_charset(/[\n]/)
|
1747
|
+
unless nl=readnl
|
1748
|
+
assert eof?
|
1749
|
+
break #this is an error, should be handled better
|
1750
|
+
end
|
1751
|
+
line.chomp!("\r")
|
1752
|
+
line<< "\n"
|
1753
|
+
assert("\n"==prevchar)
|
1754
|
+
#line.gsub! "\\\\", "\\"
|
1340
1755
|
tofill.append line
|
1341
|
-
|
1756
|
+
tofill.string.bs_handler=:squote_heredoc_esc_seq
|
1757
|
+
linecount+=1
|
1758
|
+
assert("\n"==line[-1,1])
|
1759
|
+
assert("\n"==prevchar)
|
1342
1760
|
else
|
1343
1761
|
|
1762
|
+
assert("\n"==prevchar)
|
1763
|
+
|
1344
1764
|
back1char #-1 to make newline char the next to read
|
1345
1765
|
@linenum-=1
|
1346
1766
|
|
1767
|
+
assert /[\r\n]/===nextchar.chr
|
1768
|
+
|
1347
1769
|
#retr evrything til next nl
|
1770
|
+
if FASTER_STRING_ESCAPES
|
1771
|
+
line=all_quote("\r\n", tofill.quote, "\r\n")
|
1772
|
+
else
|
1348
1773
|
line=all_quote(INET_NL_REX, tofill.quote, INET_NL_REX)
|
1774
|
+
end
|
1775
|
+
linecount+=1
|
1349
1776
|
#(you didn't know all_quote could take a regex, did you?)
|
1350
1777
|
|
1778
|
+
assert("\n"==prevchar)
|
1779
|
+
|
1351
1780
|
#get rid of fals that otherwise appear to be in the middle of
|
1352
1781
|
#a string (and are emitted out of order)
|
1353
1782
|
fal=@moretokens.pop
|
1354
1783
|
assert FileAndLineToken===fal || fal.nil?
|
1355
1784
|
|
1785
|
+
assert line.bs_handler
|
1786
|
+
tofill.string.bs_handler||=line.bs_handler
|
1787
|
+
|
1788
|
+
tofill.append_token line
|
1789
|
+
tofill.string.elems<<'' unless String===tofill.string.elems.last
|
1790
|
+
|
1791
|
+
assert("\n"==prevchar)
|
1792
|
+
|
1356
1793
|
back1char
|
1357
1794
|
@linenum-=1
|
1358
1795
|
assert("\r\n"[nextchar.chr])
|
1359
|
-
tofill.append_token line
|
1360
1796
|
tofill.append readnl
|
1797
|
+
|
1798
|
+
assert("\n"==prevchar)
|
1361
1799
|
end
|
1800
|
+
|
1801
|
+
assert("\n"==prevchar)
|
1362
1802
|
}
|
1803
|
+
|
1363
1804
|
|
1364
|
-
|
1805
|
+
str=tofill.string
|
1806
|
+
str.bs_handler||=:dquote_esc_seq if str.elems.size==1 and str.elems.first==''
|
1365
1807
|
tofill.unsafe_to_use=false
|
1366
|
-
|
1367
|
-
|
1368
|
-
|
1369
|
-
|
1370
|
-
|
1371
|
-
|
1372
|
-
|
1808
|
+
assert str.bs_handler
|
1809
|
+
#?? or tofill.string.elems==[]
|
1810
|
+
|
1811
|
+
|
1812
|
+
tofill.string.instance_eval{@char="`"} if tofill.quote=="`"
|
1813
|
+
#special cased, but I think that's all that's necessary...
|
1814
|
+
|
1815
|
+
result=tofill.bodyclass.new(tofill,linecount)
|
1816
|
+
result.open=str.open=""
|
1817
|
+
tofill.close=close
|
1818
|
+
result.close=str.close=close[1..-1]
|
1819
|
+
result.offset=str.offset
|
1820
|
+
assert str.open
|
1821
|
+
assert str.close
|
1822
|
+
return result
|
1373
1823
|
end
|
1374
1824
|
|
1375
1825
|
#-----------------------------------
|
1376
1826
|
def newline(ch)
|
1377
1827
|
assert("\r\n"[nextchar.chr])
|
1378
1828
|
|
1379
|
-
|
1380
|
-
|
1381
1829
|
#ordinary newline handling (possibly implicitly escaped)
|
1382
1830
|
assert("\r\n"[nextchar.chr])
|
1383
1831
|
assert !@parsestack.empty?
|
1384
1832
|
assert @moretokens.empty?
|
1385
|
-
result=if NewlineToken===@last_operative_token or #hack
|
1386
|
-
@last_operative_token===/^(;|begin|do|#{INNERBOUNDINGWORDS})$/ or #hack
|
1387
|
-
!after_nonid_op?{false}
|
1388
|
-
then #hack-o-rama: probly cases left out above
|
1389
|
-
a= abort_noparens!
|
1390
|
-
ExpectDoOrNlContext===@parsestack.last and @parsestack.pop
|
1391
|
-
assert !@parsestack.empty?
|
1392
|
-
@parsestack.last.see self,:semi
|
1393
|
-
|
1394
|
-
a << super(ch)
|
1395
|
-
@moretokens.replace a+@moretokens
|
1396
|
-
@moretokens.shift
|
1397
|
-
else
|
1398
|
-
offset= input_position
|
1399
|
-
nl=readnl
|
1400
|
-
@moretokens << FileAndLineToken.new(@filename,@linenum,input_position)
|
1401
|
-
EscNlToken.new(@filename,@linenum-1,nl,offset)
|
1402
|
-
#WsToken.new ' ' #why? #should be "\\\n" ?
|
1403
|
-
end
|
1404
1833
|
|
1405
|
-
|
1834
|
+
pre=FileAndLineToken.new(@filename,@linenum+1,input_position)
|
1835
|
+
pre.allow_ooo_offset=true
|
1836
|
+
|
1837
|
+
if NewlineToken===@last_operative_token or #hack
|
1838
|
+
(KeywordToken===@last_operative_token and
|
1839
|
+
@last_operative_token.ident=="rescue" and
|
1840
|
+
!@last_operative_token.infix?) or
|
1841
|
+
#/^(;|begin|do|#{INNERBOUNDINGWORDS})$/ or #hack
|
1842
|
+
!after_nonid_op?{false}
|
1843
|
+
then #hack-o-rama: probly cases left out above
|
1844
|
+
@offset_adjust=@min_offset_adjust
|
1845
|
+
a= abort_noparens!
|
1846
|
+
ExpectDoOrNlContext===@parsestack.last and @parsestack.pop
|
1847
|
+
assert !@parsestack.empty?
|
1848
|
+
@parsestack.last.see self,:semi
|
1849
|
+
|
1850
|
+
a << super(ch)
|
1851
|
+
@moretokens.replace a+@moretokens
|
1852
|
+
else
|
1853
|
+
@offset_adjust=@min_offset_adjust
|
1854
|
+
offset= input_position
|
1855
|
+
nl=readnl
|
1856
|
+
@moretokens.push EscNlToken.new(@filename,@linenum-1,nl,offset),
|
1857
|
+
FileAndLineToken.new(@filename,@linenum,input_position)
|
1858
|
+
end
|
1859
|
+
|
1860
|
+
#optimization: when thru with regurgitated text from a here document,
|
1861
|
+
#revert back to original unadorned Sequence instead of staying in the list.
|
1862
|
+
if @base_file and indices=@file.instance_eval{@start_pos} and
|
1863
|
+
(indices[-2]..indices[-1])===@file.pos and Sequence::SubSeq===@file.list.last
|
1864
|
+
@base_file.pos=@file.pos
|
1865
|
+
@file=@base_file
|
1866
|
+
@base_file=nil
|
1867
|
+
end
|
1868
|
+
|
1869
|
+
fal=@moretokens.last
|
1870
|
+
assert FileAndLineToken===fal
|
1871
|
+
|
1872
|
+
@offset_adjust=@min_offset_adjust
|
1873
|
+
|
1874
|
+
@moretokens.unshift(*optional_here_bodies)
|
1875
|
+
result=@moretokens.shift
|
1876
|
+
|
1877
|
+
#adjust line count in fal to account for newlines in here bodys
|
1878
|
+
i=@moretokens.size-1
|
1879
|
+
while(i>=0)
|
1880
|
+
#assert FileAndLineToken===@moretokens[i]
|
1881
|
+
i-=1 if FileAndLineToken===@moretokens[i]
|
1882
|
+
break unless HereBodyToken===@moretokens[i]
|
1883
|
+
pre_fal=true
|
1884
|
+
fal.line-=@moretokens[i].linecount
|
1406
1885
|
|
1886
|
+
i-=1
|
1887
|
+
end
|
1888
|
+
|
1889
|
+
if pre_fal
|
1890
|
+
@moretokens.unshift result
|
1891
|
+
pre.offset=result.offset
|
1892
|
+
result=pre
|
1893
|
+
end
|
1407
1894
|
start_of_line_directives
|
1408
1895
|
|
1409
1896
|
return result
|
@@ -1424,15 +1911,16 @@ end
|
|
1424
1911
|
|
1425
1912
|
begin
|
1426
1913
|
eof? and raise "eof before =end"
|
1427
|
-
more<<til_charset(/[\r\n]/)
|
1428
|
-
|
1914
|
+
more<< til_charset(/[\r\n]/)
|
1915
|
+
eof? and raise "eof before =end"
|
1916
|
+
more<< readnl
|
1429
1917
|
end until readahead(EQENDLENGTH)==EQEND
|
1430
1918
|
|
1431
1919
|
#read rest of line after =end
|
1432
1920
|
more << til_charset(/[\r\n]/)
|
1433
|
-
assert((?\r===nextchar or ?\n===nextchar))
|
1921
|
+
assert((eof? or ?\r===nextchar or ?\n===nextchar))
|
1434
1922
|
assert !(/[\r\n]/===more[-1,1])
|
1435
|
-
more<< readnl
|
1923
|
+
more<< readnl unless eof?
|
1436
1924
|
|
1437
1925
|
# newls= more.scan(/\r\n?|\n\r?/)
|
1438
1926
|
# @linenum+= newls.size
|
@@ -1445,7 +1933,7 @@ end
|
|
1445
1933
|
#handle __END__
|
1446
1934
|
if ENDMARKER===readahead(ENDMARKERLENGTH)
|
1447
1935
|
assert !(ImplicitContext===@parsestack.last)
|
1448
|
-
@moretokens.unshift endoffile_detected(read(
|
1936
|
+
@moretokens.unshift endoffile_detected(read(ENDMARKERLENGTH))
|
1449
1937
|
# input_position_set @file.size
|
1450
1938
|
end
|
1451
1939
|
end
|
@@ -1460,11 +1948,13 @@ end
|
|
1460
1948
|
def unary_op_expected?(ch) #yukko hack
|
1461
1949
|
'*&='[readahead(2)[1..1]] and return false
|
1462
1950
|
|
1951
|
+
return true if KeywordToken===@last_operative_token and @last_operative_token==='for'
|
1952
|
+
|
1463
1953
|
after_nonid_op? {
|
1464
1954
|
#possible func-call as operator
|
1465
1955
|
|
1466
1956
|
not is_var_name? and
|
1467
|
-
WHSPLF[prevchar]
|
1957
|
+
WHSPLF[prevchar] and !WHSPLF[readahead(2)[1..1]]
|
1468
1958
|
}
|
1469
1959
|
end
|
1470
1960
|
|
@@ -1473,11 +1963,6 @@ end
|
|
1473
1963
|
# <<, %, ? in ruby
|
1474
1964
|
#returns whether current token is to be the start of a literal
|
1475
1965
|
def quote_expected?(ch) #yukko hack
|
1476
|
-
if AssignmentContext===@parsestack.last
|
1477
|
-
@parsestack.pop
|
1478
|
-
return false
|
1479
|
-
end
|
1480
|
-
|
1481
1966
|
case ch[0]
|
1482
1967
|
when ?? then readahead(2)[/^\?[#{WHSPLF}]$/o] #not needed?
|
1483
1968
|
when ?% then readahead(3)[/^%([a-pt-vyzA-PR-VX-Z]|[QqrswWx][a-zA-Z0-9])/]
|
@@ -1500,17 +1985,23 @@ end
|
|
1500
1985
|
#used to resolve the ambiguity of
|
1501
1986
|
# <<, %, /, ?, :, and newline (among others) in ruby
|
1502
1987
|
def after_nonid_op?
|
1988
|
+
|
1989
|
+
#this is how it should be, I think, and then no handlers for methnametoken and FUNCLIKE_KEYWORDS are needed
|
1990
|
+
# if ImplicitParamListStartToken===@last_token_including_implicit
|
1991
|
+
# huh return true
|
1992
|
+
# end
|
1503
1993
|
case @last_operative_token
|
1504
|
-
when MethNameToken, FUNCLIKE_KEYWORDS.token_pat
|
1994
|
+
when VarNameToken , MethNameToken, FUNCLIKE_KEYWORDS.token_pat
|
1505
1995
|
#VarNameToken should really be left out of this case...
|
1506
1996
|
#should be in next branch instread
|
1507
1997
|
#callers all check for last token being not a variable if they pass anything
|
1508
|
-
#but {false} in the block
|
1998
|
+
#but {false} in the block
|
1999
|
+
#(hmmm... some now have true or other non-varname checks in them... could these be bugs?)
|
1509
2000
|
return yield
|
1510
2001
|
when StringToken, SymbolToken, NumberToken, HerePlaceholderToken,
|
1511
2002
|
%r{^(
|
1512
|
-
|
1513
|
-
__FILE__|__LINE__|[\})\]]
|
2003
|
+
end|self|true|false|nil|
|
2004
|
+
__FILE__|__LINE__|[\})\]]
|
1514
2005
|
)$}x.token_pat
|
1515
2006
|
#dunno about def/undef
|
1516
2007
|
#maybe class/module shouldn't he here either?
|
@@ -1522,17 +2013,16 @@ end
|
|
1522
2013
|
#assert(@last_operative_token==$&) #disabled 'cause $& is now always nil :(
|
1523
2014
|
return true
|
1524
2015
|
when NewlineToken, nil, #nil means we're still at beginning of file
|
1525
|
-
/^([({\[]|or|not|and|if|unless|then|elsif|else|
|
1526
|
-
while|until|begin|for|in|case|when|ensure)$
|
2016
|
+
/^([({\[]|or|not|and|if|unless|then|elsif|else|class|module|def|
|
2017
|
+
while|until|begin|for|in|case|when|ensure|defined\?)$
|
1527
2018
|
/x.token_pat
|
1528
2019
|
return true
|
1529
|
-
|
1530
|
-
|
2020
|
+
when KeywordToken
|
2021
|
+
return true if /^(alias|undef)$/===@last_operative_token.ident #is this ever actually true???
|
1531
2022
|
when IgnoreToken
|
1532
2023
|
raise "last_operative_token shouldn't be ignoreable"
|
1533
|
-
else
|
1534
|
-
raise "after_nonid_op? after #{@last_operative_token}:#{@last_operative_token.class} -- now what"
|
1535
2024
|
end
|
2025
|
+
raise "after_nonid_op? after #{@last_operative_token}:#{@last_operative_token.class} -- now what"
|
1536
2026
|
end
|
1537
2027
|
|
1538
2028
|
|
@@ -1577,10 +2067,10 @@ end
|
|
1577
2067
|
|
1578
2068
|
#-----------------------------------
|
1579
2069
|
def biop(ch) #match /%=?/ (% or %=)
|
1580
|
-
assert(ch[/^[
|
2070
|
+
assert(ch[/^[%^]$/])
|
1581
2071
|
result=getchar
|
1582
2072
|
if eat_next_if(?=)
|
1583
|
-
result
|
2073
|
+
result << ?=
|
1584
2074
|
end
|
1585
2075
|
return operator_or_methname_token( result)
|
1586
2076
|
end
|
@@ -1610,7 +2100,9 @@ end
|
|
1610
2100
|
#fixme: handle +@ and -@ here as well... (currently, this is done in symbol()?)
|
1611
2101
|
def plusminus(ch)
|
1612
2102
|
assert(/^[+\-]$/===ch)
|
1613
|
-
if unary_op_expected?(ch)
|
2103
|
+
if unary_op_expected?(ch) or
|
2104
|
+
KeywordToken===@last_operative_token &&
|
2105
|
+
/^(return|break|next)$/===@last_operative_token.ident
|
1614
2106
|
if (?0..?9)===readahead(2)[1]
|
1615
2107
|
return number(ch)
|
1616
2108
|
else #unary operator
|
@@ -1619,7 +2111,6 @@ end
|
|
1619
2111
|
@moretokens << NoWsToken.new(input_position)
|
1620
2112
|
result=(operator_or_methname_token result)
|
1621
2113
|
result.unary=true
|
1622
|
-
#todo: result should distinguish unary+binary +-
|
1623
2114
|
end
|
1624
2115
|
else #binary operator
|
1625
2116
|
assert(! want_op_name)
|
@@ -1628,9 +2119,8 @@ end
|
|
1628
2119
|
result << ?=
|
1629
2120
|
end
|
1630
2121
|
result=(operator_or_methname_token result)
|
1631
|
-
#todo: result should distinguish unary+binary +-
|
1632
2122
|
end
|
1633
|
-
result
|
2123
|
+
return result
|
1634
2124
|
end
|
1635
2125
|
|
1636
2126
|
#-----------------------------------
|
@@ -1642,19 +2132,31 @@ end
|
|
1642
2132
|
str << c
|
1643
2133
|
result= operator_or_methname_token( str,offset)
|
1644
2134
|
case c
|
1645
|
-
when '=':
|
2135
|
+
when '=': #===,==
|
2136
|
+
str<< (eat_next_if(?=)or'')
|
1646
2137
|
|
1647
|
-
when '>':
|
2138
|
+
when '>': #=>
|
1648
2139
|
unless ParamListContextNoParen===@parsestack.last
|
1649
2140
|
@moretokens.unshift result
|
1650
2141
|
@moretokens.unshift( *abort_noparens!("=>"))
|
1651
2142
|
result=@moretokens.shift
|
1652
2143
|
end
|
1653
2144
|
@parsestack.last.see self,:arrow
|
1654
|
-
when '': #record local variable definitions
|
1655
|
-
|
2145
|
+
when '': #plain assignment: record local variable definitions
|
2146
|
+
last_context_not_implicit.lhs=false
|
2147
|
+
@moretokens.push *ignored_tokens(true).map{|x|
|
2148
|
+
NewlineToken===x ? EscNlToken.new(@filename,@linenum,x.ident,x.offset) : x
|
2149
|
+
}
|
1656
2150
|
@parsestack.push AssignmentRhsContext.new(@linenum)
|
1657
|
-
|
2151
|
+
if eat_next_if ?*
|
2152
|
+
tok=OperatorToken.new('*', input_position-1)
|
2153
|
+
tok.unary=true
|
2154
|
+
@moretokens.push tok
|
2155
|
+
WHSPLF[nextchar.chr] or
|
2156
|
+
@moretokens << NoWsToken.new(input_position)
|
2157
|
+
comma_in_lvalue_list? #is this needed?
|
2158
|
+
end
|
2159
|
+
@moretokens.push AssignmentRhsListStartToken.new( input_position)
|
1658
2160
|
end
|
1659
2161
|
return result
|
1660
2162
|
end
|
@@ -1666,6 +2168,7 @@ end
|
|
1666
2168
|
k=eat_next_if(/[~=]/)
|
1667
2169
|
if k
|
1668
2170
|
result+=k
|
2171
|
+
elsif eof?: #do nothing
|
1669
2172
|
else
|
1670
2173
|
WHSPLF[nextchar.chr] or
|
1671
2174
|
@moretokens << NoWsToken.new(input_position)
|
@@ -1693,10 +2196,11 @@ end
|
|
1693
2196
|
#-----------------------------------
|
1694
2197
|
def dot_rhs(prevtok)
|
1695
2198
|
safe_recurse { |a|
|
1696
|
-
|
2199
|
+
set_last_token prevtok
|
1697
2200
|
aa= ignored_tokens
|
2201
|
+
was=after_nonid_op?{true}
|
1698
2202
|
tok,pos=callsite_symbol(prevtok)
|
1699
|
-
tok and aa.push(*var_or_meth_name(tok,prevtok,pos))
|
2203
|
+
tok and aa.push(*var_or_meth_name(tok,prevtok,pos,was))
|
1700
2204
|
a.unshift(*aa)
|
1701
2205
|
}
|
1702
2206
|
end
|
@@ -1705,7 +2209,7 @@ end
|
|
1705
2209
|
def back_quote(ch=nil)
|
1706
2210
|
if @last_operative_token===/^(def|::|\.)$/
|
1707
2211
|
oldpos= input_position
|
1708
|
-
MethNameToken.new(eat_next_if(?`), oldpos)
|
2212
|
+
MethNameToken.new(eat_next_if(?`), oldpos) #`
|
1709
2213
|
else
|
1710
2214
|
double_quote(ch)
|
1711
2215
|
end
|
@@ -1716,7 +2220,7 @@ if false
|
|
1716
2220
|
def comment(str)
|
1717
2221
|
result=""
|
1718
2222
|
#loop{
|
1719
|
-
result<<super(nil).to_s
|
2223
|
+
result<< super(nil).to_s
|
1720
2224
|
|
1721
2225
|
if /^\#.*\#$/===result #if comment was ended by a crunch
|
1722
2226
|
|
@@ -1762,7 +2266,7 @@ end
|
|
1762
2266
|
tokch= NoWsToken.new(input_position-1)
|
1763
2267
|
end
|
1764
2268
|
when '('
|
1765
|
-
lasttok=last_operative_token
|
2269
|
+
lasttok=last_token_maybe_implicit #last_operative_token
|
1766
2270
|
#could be: lasttok===/^[a-z_]/i
|
1767
2271
|
if (VarNameToken===lasttok or MethNameToken===lasttok or
|
1768
2272
|
lasttok===FUNCLIKE_KEYWORDS)
|
@@ -1781,15 +2285,17 @@ end
|
|
1781
2285
|
if after_nonid_op?{false} or @last_operative_token.has_no_block?
|
1782
2286
|
@parsestack.push ListImmedContext.new(ch,@linenum) #that is, a hash
|
1783
2287
|
else
|
2288
|
+
#abort_noparens!
|
1784
2289
|
tokch.set_infix!
|
1785
|
-
=
|
2290
|
+
tokch.as="do"
|
2291
|
+
#=begin not needed now, i think
|
1786
2292
|
# 'need to find matching callsite context and end it if implicit'
|
1787
2293
|
lasttok=last_operative_token
|
1788
|
-
|
2294
|
+
if !(lasttok===')' and lasttok.callsite?) #or ParamListContextNoParen===parsestack.last
|
1789
2295
|
@moretokens.push *(abort_1_noparen!(1).push tokch)
|
1790
2296
|
tokch=@moretokens.shift
|
1791
2297
|
end
|
1792
|
-
|
2298
|
+
#=end
|
1793
2299
|
|
1794
2300
|
localvars.start_block
|
1795
2301
|
@parsestack.push BlockContext.new(@linenum)
|
@@ -1811,13 +2317,18 @@ end
|
|
1811
2317
|
end
|
1812
2318
|
ctx=@parsestack.pop
|
1813
2319
|
origch,line=ctx.starter,ctx.linenum
|
1814
|
-
ch
|
2320
|
+
if ch!=PAIRS[origch]
|
2321
|
+
#kw.extend MismatchedBrace
|
1815
2322
|
lexerror kw,"mismatched braces: #{origch}#{ch}\n" +
|
1816
2323
|
"matching brace location", @filename, line
|
1817
|
-
|
2324
|
+
end
|
2325
|
+
if BlockContext===ctx
|
2326
|
+
localvars.end_block
|
2327
|
+
@moretokens.last.as="end"
|
2328
|
+
end
|
1818
2329
|
if ParamListContext==ctx.class
|
1819
2330
|
assert ch==')'
|
1820
|
-
|
2331
|
+
kw.set_callsite! #not needed?
|
1821
2332
|
end
|
1822
2333
|
return @moretokens.shift
|
1823
2334
|
end
|
@@ -1826,19 +2337,24 @@ end
|
|
1826
2337
|
def eof(ch=nil)
|
1827
2338
|
#this must be the very last character...
|
1828
2339
|
oldpos= input_position
|
1829
|
-
assert(
|
2340
|
+
assert(/\A[\x0\x4\x1a]\Z/===nextchar.chr)
|
1830
2341
|
|
1831
|
-
result
|
2342
|
+
result=@file.read!
|
2343
|
+
# result= "\0#{ignored_tokens(true).delete_if{|t|FileAndLineToken===t}}"
|
1832
2344
|
|
1833
|
-
eof? or
|
1834
|
-
lexerror result,'nul character is not at the end of file'
|
1835
|
-
input_position_set @file.size
|
2345
|
+
# eof? or
|
2346
|
+
# lexerror result,'nul character is not at the end of file'
|
2347
|
+
# input_position_set @file.size
|
1836
2348
|
return(endoffile_detected result)
|
1837
2349
|
end
|
1838
2350
|
|
1839
2351
|
#-----------------------------------
|
1840
2352
|
def endoffile_detected(s='')
|
1841
2353
|
@moretokens.push( *(abort_noparens!.push super(s)))
|
2354
|
+
if @progress_thread
|
2355
|
+
@progress_thread.kill
|
2356
|
+
@progress_thread=nil
|
2357
|
+
end
|
1842
2358
|
result= @moretokens.shift
|
1843
2359
|
balanced_braces? or (lexerror result,"unbalanced braces at eof. parsestack=#{@parsestack.inspect}")
|
1844
2360
|
result
|
@@ -1851,7 +2367,26 @@ end
|
|
1851
2367
|
|
1852
2368
|
#-----------------------------------
|
1853
2369
|
def comma(ch)
|
1854
|
-
single_char_token(ch)
|
2370
|
+
@moretokens.push token=single_char_token(ch)
|
2371
|
+
if AssignmentRhsContext===@parsestack[-1] and
|
2372
|
+
ParamListContext===@parsestack[-2] ||
|
2373
|
+
ParamListContextNoParen===@parsestack[-2] ||
|
2374
|
+
WhenParamListContext===@parsestack[-2] ||
|
2375
|
+
(RescueSMContext===@parsestack[-2] && @parsestack[-2].state==:rescue) ||
|
2376
|
+
(DefContext===@parsestack[-2] && !@parsestack[-2].in_body)
|
2377
|
+
@parsestack.pop
|
2378
|
+
@moretokens.unshift AssignmentRhsListEndToken.new(input_position)
|
2379
|
+
end
|
2380
|
+
token.comma_type=
|
2381
|
+
case @parsestack[-1]
|
2382
|
+
when AssignmentRhsContext: :rhs
|
2383
|
+
when ParamListContext,ParamListContextNoParen: :call
|
2384
|
+
when ListImmedContext: :array
|
2385
|
+
else
|
2386
|
+
:lhs if comma_in_lvalue_list?
|
2387
|
+
end
|
2388
|
+
@parsestack.last.see self,:comma
|
2389
|
+
return @moretokens.shift
|
1855
2390
|
end
|
1856
2391
|
|
1857
2392
|
#-----------------------------------
|
@@ -1872,7 +2407,7 @@ end
|
|
1872
2407
|
assert RUBYOPERATORREX===s
|
1873
2408
|
if RUBYNONSYMOPERATORREX===s
|
1874
2409
|
KeywordToken
|
1875
|
-
elsif
|
2410
|
+
elsif want_op_name
|
1876
2411
|
MethNameToken
|
1877
2412
|
else
|
1878
2413
|
OperatorToken
|
@@ -1882,9 +2417,7 @@ end
|
|
1882
2417
|
#-----------------------------------
|
1883
2418
|
#tokenify_results_of :identifier
|
1884
2419
|
save_offsets_in(*CHARMAPPINGS.values.uniq-[
|
1885
|
-
:symbol_or_op,:open_brace,:whitespace,:exclam,:backquote
|
1886
|
-
|
1887
|
-
|
2420
|
+
:symbol_or_op,:open_brace,:whitespace,:exclam,:backquote
|
1888
2421
|
])
|
1889
2422
|
#save_offsets_in :symbol
|
1890
2423
|
|