rubylexer 0.7.0 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +90 -0
- data/Manifest.txt +54 -3
- data/README.txt +4 -7
- data/Rakefile +3 -2
- data/lib/rubylexer.rb +856 -323
- data/lib/rubylexer/0.7.0.rb +11 -2
- data/lib/rubylexer/0.7.1.rb +2 -0
- data/lib/rubylexer/charhandler.rb +4 -4
- data/lib/rubylexer/context.rb +86 -9
- data/lib/rubylexer/rulexer.rb +455 -101
- data/lib/rubylexer/token.rb +166 -43
- data/lib/rubylexer/tokenprinter.rb +16 -8
- data/lib/rubylexer/version.rb +1 -1
- data/rubylexer.vpj +98 -0
- data/test/code/all_the_gems.rb +33 -0
- data/test/code/all_the_raas.rb +226 -0
- data/test/code/all_the_rubies.rb +2 -0
- data/test/code/deletewarns.rb +19 -1
- data/test/code/dumptokens.rb +39 -8
- data/test/code/errscan +2 -0
- data/test/code/isolate_error.rb +72 -0
- data/test/code/lexloop +14 -0
- data/test/code/locatetest.rb +150 -8
- data/test/code/regression.rb +109 -0
- data/test/code/rubylexervsruby.rb +53 -15
- data/test/code/strgen.rb +138 -0
- data/test/code/tarball.rb +144 -0
- data/test/code/testcases.rb +11 -0
- data/test/code/tokentest.rb +115 -24
- data/test/data/__eof2.rb +1 -0
- data/test/data/__eof5.rb +2 -0
- data/test/data/__eof6.rb +2 -0
- data/test/data/cvtesc.rb +17 -0
- data/test/data/g.rb +6 -0
- data/test/data/hd0.rb +3 -0
- data/test/data/hdateof.rb +2 -0
- data/test/data/hdempty.rb +3 -0
- data/test/data/hdr.rb +9 -0
- data/test/data/hdr_dos.rb +13 -0
- data/test/data/hdr_dos2.rb +18 -0
- data/test/data/heart.rb +2 -0
- data/test/data/here_escnl.rb +25 -0
- data/test/data/here_escnl_dos.rb +20 -0
- data/test/data/here_squote.rb +3 -0
- data/test/data/heremonsters.rb +140 -0
- data/test/data/heremonsters.rb.broken +68 -0
- data/test/data/heremonsters.rb.broken.save +68 -0
- data/test/data/heremonsters_dos.rb +140 -0
- data/test/data/heremonsters_dos.rb.broken +68 -0
- data/test/data/illegal_oneliners.rb +1 -0
- data/test/data/illegal_stanzas.rb +0 -0
- data/test/data/make_ws_strdelim.rb +22 -0
- data/test/data/maven2_builer_test.rb +82 -0
- data/test/data/migration.rb +8944 -0
- data/test/data/modl.rb +6 -0
- data/test/data/modl_dos.rb +7 -0
- data/test/data/modl_fails.rb +10 -0
- data/test/data/multilinestring.rb +6 -0
- data/test/data/oneliners.rb +555 -0
- data/test/data/p-op.rb +2 -0
- data/test/data/p.rb +3 -1710
- data/test/data/s.rb +90 -21
- data/test/data/simple.rb +1 -0
- data/test/data/simple_dos.rb +1 -0
- data/test/data/stanzas.rb +1194 -0
- data/test/data/strdelim_crlf.rb +6 -0
- data/test/data/stuff.rb +6 -0
- data/test/data/stuff2.rb +5 -0
- data/test/data/stuff3.rb +6 -0
- data/test/data/stuff4.rb +6 -0
- data/test/data/tkweird.rb +20 -0
- data/test/data/unending_stuff.rb +5 -0
- data/test/data/whatnot.rb +8 -0
- data/test/data/ws_strdelim.rb +0 -0
- data/test/test.sh +239 -0
- data/testing.txt +39 -50
- metadata +110 -12
- data/test/code/dl_all_gems.rb +0 -43
- data/test/code/unpack_all_gems.rb +0 -15
- data/test/data/gemlist.txt +0 -280
data/History.txt
CHANGED
@@ -1,3 +1,93 @@
|
|
1
|
+
=== 0.7.1/10-29-2008
|
2
|
+
* 6 Major Enhancements:
|
3
|
+
* handling of empty string fragments now more closely mirrors ruby; this resolves many warnings
|
4
|
+
* yet more hacks in aid of string inclusions
|
5
|
+
* backslashes in strings are no longer interpreted automatically when lexed
|
6
|
+
* here documents are completely rewritten in a tricky way that more closely mimics what MRI does
|
7
|
+
* many more flags for tokens to tell apart the various cases:
|
8
|
+
* the various different local variable types have to be detected.
|
9
|
+
* colons which operate like semicolons or thens are marked as such
|
10
|
+
* { } used in block now flagged as parsing like do and end
|
11
|
+
* commas now are marked with different types depending on how they're used
|
12
|
+
* @variables in methods need to be marked as such, so their parsetree can come out different.
|
13
|
+
* clearly mark backquoted strings
|
14
|
+
* further refinements of local variable detection and implicit paren placement near these cases:
|
15
|
+
* when ws between method name and parenthesis
|
16
|
+
* break/return/next
|
17
|
+
* ? : << / rescue do
|
18
|
+
|
19
|
+
* 5 Minor Enhancements
|
20
|
+
* colon or star in assignment make it a multi assignment
|
21
|
+
* presence of unary * or & in param list forces it to be a multi-param list
|
22
|
+
* errors in string inclusions should now be handled better
|
23
|
+
* string and stringlike tokens now can tell you the exact sequence of chars used to open and close the string.
|
24
|
+
* correctly handling more cases where return/break/next parses different than a method (yuck!)
|
25
|
+
|
26
|
+
* 26 Bugfixes
|
27
|
+
* ~ operator can be followed with an @, like + and -
|
28
|
+
* ~ is overridable, however :: is not
|
29
|
+
* raise is not a keyword
|
30
|
+
* in addition to 0x00, 0x04 and 0x1a should be considered eof in ruby. why? idunno.
|
31
|
+
* setting PROGRESS env var will cause input file position to be printed to stderr periodically.
|
32
|
+
* defined? is not a funclike keyword... really more of a unary operator
|
33
|
+
* $- is a legitimate global variable.
|
34
|
+
* better parsing of lvalue list following for keyword.
|
35
|
+
* rescue is a variable define context only when right after => and before then (or disguises).
|
36
|
+
* better placement of implicit parens around def param list
|
37
|
+
* (global) variable aliasing now supported
|
38
|
+
* local vars in END block are NOT scoped to the block!
|
39
|
+
* local vars in def param lists aren't considered variables til after the initializer for that var
|
40
|
+
* end of def header is treated like ; even if none is present
|
41
|
+
* never put here document right after class keyword
|
42
|
+
* look for start of line directives at end of here document
|
43
|
+
* oops, mac newlines don't have to be supported
|
44
|
+
* dos newlines better tolerated around here documents
|
45
|
+
* less line number/offset confusion around here documents
|
46
|
+
* newline after (non-operator) rescue is hard (but not after INNERBOUNDINGWORDS)
|
47
|
+
* handling eof in more strange places
|
48
|
+
* always expect unary op after for
|
49
|
+
* unary ops should know about the before-but-not-after rule!
|
50
|
+
* newlines after = should be escaped
|
51
|
+
* \c? and \C-? are not interpreted the same as other ctrl chars
|
52
|
+
* \n\r and \r are not recognized as nl sequences
|
53
|
+
|
54
|
+
* 18 Internal Changes (not user visible)
|
55
|
+
* commas cause a :comma event on the parsestack
|
56
|
+
* some of the lists of types of operators are available now as arrays of strings instead of regexps
|
57
|
+
* single and double quote now have separate implementations again
|
58
|
+
* keep track of whether an implicit open or close paren has just been emitted
|
59
|
+
* put ws around << to keep slickedit happy
|
60
|
+
* the eof characters are also considered whitespace.
|
61
|
+
* identifier lexer now uses regexps more heavily
|
62
|
+
* method formal parameter list is not considered an lvalue context for commas.
|
63
|
+
* class and def now have their own parse contexts
|
64
|
+
* unary star causes a :splat event on the parsestack
|
65
|
+
* is_var_name now detects var tokens just from the token type, not looking at local vars table.
|
66
|
+
* a faster regexp-based implementation of string scanning
|
67
|
+
* moved yucky side effect out of quote_expected?
|
68
|
+
* these keywords: class module def for defined? no longer automatically create operator context
|
69
|
+
* a new context for BEGIN/END keywords
|
70
|
+
* a new context for param list of return/next/break
|
71
|
+
* new escape sequence processors for regexp and %W list
|
72
|
+
* numbers now scanned with a regexp
|
73
|
+
|
74
|
+
* 15 Enhancements and bug fixes to tests:
|
75
|
+
* just print a notice on errors which are also syntax errors for ruby
|
76
|
+
* a little cleanup of temp files
|
77
|
+
* rubylexervsruby and tokentest can take input from stdin
|
78
|
+
* unlexer improvements
|
79
|
+
* dumptokens now has a --silent cmdline option
|
80
|
+
* locatetest.rb is significantly enhanced
|
81
|
+
* --unified option to diff seems to work better than -u
|
82
|
+
* tokentest better verifies exact token contents...
|
83
|
+
* tokentest now uses open and close fields of strings to verify string bounds exactly
|
84
|
+
* CRLF in a string is always treated like just a LF. (CR is elided.)
|
85
|
+
* allow_ooo hacky flag marks tokens whose offset errors are to be ignored.
|
86
|
+
* all other offset errors have been downgraded to warnings.
|
87
|
+
* most of the offset problem I had been seeing have been fixed, tho
|
88
|
+
* offset problems in here head and body, symbol and fal tokens are always ignored (a hack)
|
89
|
+
* tokentest has a --loop option, for load testing
|
90
|
+
|
1
91
|
=== 0.7.0/2-15-2008
|
2
92
|
* implicit tokens are now emitted at the right times (need more test code)
|
3
93
|
* local variables are now temporarily hidden by class, module, and def
|
data/Manifest.txt
CHANGED
@@ -19,7 +19,6 @@ lib/rubylexer/symboltable.rb
|
|
19
19
|
lib/rubylexer/charhandler.rb
|
20
20
|
lib/assert.rb
|
21
21
|
lib/rubylexer.rb
|
22
|
-
test/data/gemlist.txt
|
23
22
|
test/data/blockassigntest.rb
|
24
23
|
test/data/for.rb
|
25
24
|
test/data/chunky_bacon.rb
|
@@ -58,10 +57,62 @@ test/data/chunky_bacon2.rb
|
|
58
57
|
test/data/format.rb
|
59
58
|
test/code/locatetest.rb
|
60
59
|
test/code/rubylexervsruby.rb
|
61
|
-
test/code/dl_all_gems.rb
|
62
|
-
test/code/unpack_all_gems.rb
|
63
60
|
test/code/tokentest.rb
|
64
61
|
test/code/dumptokens.rb
|
65
62
|
test/code/torment
|
66
63
|
test/code/locatetest
|
67
64
|
test/code/deletewarns.rb
|
65
|
+
lib/rubylexer/0.7.1.rb
|
66
|
+
rubylexer.vpj
|
67
|
+
test/code/all_the_gems.rb
|
68
|
+
test/code/all_the_raas.rb
|
69
|
+
test/code/all_the_rubies.rb
|
70
|
+
test/code/errscan
|
71
|
+
test/code/isolate_error.rb
|
72
|
+
test/code/lexloop
|
73
|
+
test/code/regression.rb
|
74
|
+
test/code/strgen.rb
|
75
|
+
test/code/tarball.rb
|
76
|
+
test/code/testcases.rb
|
77
|
+
test/data/chunky.plain.rb
|
78
|
+
test/data/cvtesc.rb
|
79
|
+
test/data/__eof2.rb
|
80
|
+
test/data/__eof5.rb
|
81
|
+
test/data/__eof6.rb
|
82
|
+
test/data/hd0.rb
|
83
|
+
test/data/hdateof.rb
|
84
|
+
test/data/hdempty.rb
|
85
|
+
test/data/hdr_dos2.rb
|
86
|
+
test/data/hdr_dos.rb
|
87
|
+
test/data/hdr.rb
|
88
|
+
test/data/here_escnl_dos.rb
|
89
|
+
test/data/here_escnl.rb
|
90
|
+
test/data/heremonsters_dos.rb
|
91
|
+
test/data/heremonsters_dos.rb.broken
|
92
|
+
test/data/heremonsters.rb
|
93
|
+
test/data/heremonsters.rb.broken
|
94
|
+
test/data/heremonsters.rb.broken.save
|
95
|
+
test/data/here_squote.rb
|
96
|
+
test/data/illegal_oneliners.rb
|
97
|
+
test/data/illegal_stanzas.rb
|
98
|
+
test/data/make_ws_strdelim.rb
|
99
|
+
test/data/maven2_builer_test.rb
|
100
|
+
test/data/migration.rb
|
101
|
+
test/data/modl_dos.rb
|
102
|
+
test/data/modl_fails.rb
|
103
|
+
test/data/modl.rb
|
104
|
+
test/data/multilinestring.rb
|
105
|
+
test/data/oneliners.rb
|
106
|
+
test/data/simple_dos.rb
|
107
|
+
test/data/simple.rb
|
108
|
+
test/data/stanzas.rb
|
109
|
+
test/data/strdelim_crlf.rb
|
110
|
+
test/data/stuff2.rb
|
111
|
+
test/data/stuff3.rb
|
112
|
+
test/data/stuff4.rb
|
113
|
+
test/data/stuff.rb
|
114
|
+
test/data/tkweird.rb
|
115
|
+
test/data/unending_stuff.rb
|
116
|
+
test/data/whatnot.rb
|
117
|
+
test/data/ws_strdelim.rb
|
118
|
+
test/test.sh
|
data/README.txt
CHANGED
@@ -67,10 +67,7 @@ keywords, depending on context:
|
|
67
67
|
any overrideable operator and most keywords can also be method names
|
68
68
|
|
69
69
|
== todo
|
70
|
-
test w/ more code (rubygems, rpa, obfuscated ruby contest, rubicon, others?)
|
71
|
-
these 5 should be my standard test suite: p.rb, (matz') test.rb, tk.rb, obfuscated ruby contest, rubicon
|
72
70
|
test more ways: cvt source to dos or mac fmt before testing
|
73
|
-
test more ways: run unit tests after passing thru rubylexer (0.7)
|
74
71
|
test more ways: test require'd, load'd, or eval'd code as well (0.7)
|
75
72
|
lex code a line (or chunk) at a time and save state for next line (irb wants this) (0.8)
|
76
73
|
incremental lexing (ides want this (for performance))
|
@@ -78,12 +75,10 @@ put everything in a namespace
|
|
78
75
|
integrate w/ other tools...
|
79
76
|
html colorized output?
|
80
77
|
move more state onto @parsestack (ongoing)
|
81
|
-
the new cases in p.rb now compile, but won't run
|
82
78
|
expand on test documentation
|
83
79
|
use want_op_name more
|
84
80
|
return result as a half-parsed tree (with parentheses and the like matched)
|
85
81
|
emit advisory tokens when see beginword, then (or equivalent), or end... what else does florian want?
|
86
|
-
strings are still slow
|
87
82
|
emit advisory tokens when local var defined/goes out of scope (or hidden/unhidden?)
|
88
83
|
token pruning in dumptokens...
|
89
84
|
|
@@ -96,8 +91,10 @@ string tokenization sometimes a little different from ruby around newlines
|
|
96
91
|
string contents might not be correctly translated in a few cases (0.8?)
|
97
92
|
symbols which contain string interpolations are flattened into one token. eg :"foo#{bar}" (0.8)
|
98
93
|
'\r' whitespace sometimes seen in dos-formatted output.. shouldn't be (eg pre.rb) (0.7)
|
99
|
-
windows
|
94
|
+
windows newline in source is likely to cause problems in obscure cases (need test case)
|
100
95
|
unterminated =begin is not an error (0.8)
|
101
96
|
ruby 1.9 completely unsupported (0.9)
|
102
97
|
character sets other than ascii are not supported at all (1.0)
|
103
|
-
|
98
|
+
regression test currently shows 14 errors with differences in exact token ordering
|
99
|
+
-around string inclusions. these errors are much less serious than they seem.
|
100
|
+
offset of AssignmentRhsListEndToken appears to be off by 1
|
data/Rakefile
CHANGED
@@ -13,12 +13,13 @@ require 'lib/rubylexer/version.rb'
|
|
13
13
|
hoe=Hoe.new("rubylexer", RubyLexer::VERSION) do |_|
|
14
14
|
_.author = "Caleb Clausen"
|
15
15
|
_.email = "rubylexer-owner @at@ inforadical .dot. net"
|
16
|
-
_.url = "http://rubylexer.rubyforge.org/"
|
17
|
-
_.extra_deps
|
16
|
+
_.url = ["http://rubylexer.rubyforge.org/", "http://rubyforge.org/projects/rubylexer/"]
|
17
|
+
_.extra_deps << ['sequence', '>= 0.2.0']
|
18
18
|
_.test_globs=["test/{code/*,data/*rb*,results/}"]
|
19
19
|
_.description=desc
|
20
20
|
_.summary=desc[/\A[^.]+\./]
|
21
21
|
_.spec_extras={:bindir=>''}
|
22
|
+
_.rdoc_pattern=/\A(howtouse\.txt|testing\.txt|README\.txt|lib\/.*\.rb)\Z/
|
22
23
|
end
|
23
24
|
|
24
25
|
|
data/lib/rubylexer.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
=begin
|
1
|
+
=begin legal crap
|
2
2
|
rubylexer - a ruby lexer written in ruby
|
3
|
-
Copyright (C) 2004,2005 Caleb Clausen
|
3
|
+
Copyright (C) 2004,2005,2008 Caleb Clausen
|
4
4
|
|
5
5
|
This library is free software; you can redistribute it and/or
|
6
6
|
modify it under the terms of the GNU Lesser General Public
|
@@ -18,7 +18,6 @@
|
|
18
18
|
=end
|
19
19
|
|
20
20
|
|
21
|
-
|
22
21
|
require 'rubylexer/rulexer' #must be 1st!!!
|
23
22
|
require 'rubylexer/version'
|
24
23
|
require 'rubylexer/token'
|
@@ -32,9 +31,11 @@ require 'rubylexer/tokenprinter'
|
|
32
31
|
#-----------------------------------
|
33
32
|
class RubyLexer
|
34
33
|
include NestedContexts
|
34
|
+
|
35
|
+
|
35
36
|
|
36
37
|
RUBYSYMOPERATORREX=
|
37
|
-
%r{^([
|
38
|
+
%r{^([&|^/%]|=(==?)|=~|>[=>]?|<(<|=>?)?|[+~\-]@?|\*\*?|\[\]=?)}
|
38
39
|
# (nasty beastie, eh?)
|
39
40
|
#these are the overridable operators
|
40
41
|
#does not match flow-control operators like: || && ! or and if not
|
@@ -42,23 +43,25 @@ class RubyLexer
|
|
42
43
|
#or .. ... ?:
|
43
44
|
#for that use:
|
44
45
|
RUBYNONSYMOPERATORREX=
|
45
|
-
%r{^([
|
46
|
+
%r{^([%^/\-+|&]=|(\|\||&&)=?|(<<|>>|\*\*?)=|\.{1,3}|[?:,;]|::|=>?|![=~]?)$}
|
46
47
|
RUBYOPERATORREX=/#{RUBYSYMOPERATORREX}|#{RUBYNONSYMOPERATORREX}/o
|
47
48
|
UNSYMOPS=/^[~!]$/ #always unary
|
48
49
|
UBSYMOPS=/^([*&+-]|::)$/ #ops that could be unary or binary
|
49
50
|
WHSPCHARS=WHSPLF+"\\#"
|
50
|
-
|
51
|
-
|
52
|
-
|
51
|
+
OPORBEGINWORDLIST=%w(if unless while until)
|
52
|
+
BEGINWORDLIST=%w(def class module begin for case do)+OPORBEGINWORDLIST
|
53
|
+
OPORBEGINWORDS="(#{OPORBEGINWORDLIST.join '|'})"
|
54
|
+
BEGINWORDS=/^(#{BEGINWORDLIST.join '|'})$/o
|
55
|
+
FUNCLIKE_KEYWORDS=/^(break|next|redo|return|yield|retry|super|BEGIN|END)$/
|
53
56
|
VARLIKE_KEYWORDS=/^(__FILE__|__LINE__|false|nil|self|true)$/
|
54
57
|
INNERBOUNDINGWORDS="(else|elsif|ensure|in|then|rescue|when)"
|
55
58
|
BINOPWORDS="(and|or)"
|
56
|
-
NEVERSTARTPARAMLISTWORDS
|
59
|
+
NEVERSTARTPARAMLISTWORDS=/\A(#{OPORBEGINWORDS}|#{INNERBOUNDINGWORDS}|#{BINOPWORDS}|end)([^a-zA-Z0-9_!?=]|\Z)/o
|
57
60
|
NEVERSTARTPARAMLISTFIRST=CharSet['aoeitrwu'] #chars that begin NEVERSTARTPARAMLIST
|
58
61
|
NEVERSTARTPARAMLISTMAXLEN=7 #max len of a NEVERSTARTPARAMLIST
|
59
62
|
|
60
63
|
RUBYKEYWORDS=%r{
|
61
|
-
^(alias|#{BINOPWORDS}|not|undef|end|
|
64
|
+
^(alias|#{BINOPWORDS}|defined\?|not|undef|end|
|
62
65
|
#{VARLIKE_KEYWORDS}|#{FUNCLIKE_KEYWORDS}|
|
63
66
|
#{INNERBOUNDINGWORDS}|#{BEGINWORDS}
|
64
67
|
)$
|
@@ -72,8 +75,9 @@ class RubyLexer
|
|
72
75
|
?A..?Z => :identifier,
|
73
76
|
?_ => :identifier,
|
74
77
|
?0..?9 => :number,
|
75
|
-
|
76
|
-
|
78
|
+
?" => :double_quote, #"
|
79
|
+
?' => :single_quote, #'
|
80
|
+
?` => :back_quote, #`
|
77
81
|
|
78
82
|
WHSP => :whitespace, #includes \r
|
79
83
|
?, => :comma,
|
@@ -99,7 +103,9 @@ class RubyLexer
|
|
99
103
|
#?\r => :newline, #implicitly escaped after op
|
100
104
|
|
101
105
|
?\\ => :escnewline,
|
102
|
-
?\
|
106
|
+
?\x00 => :eof,
|
107
|
+
?\x04 => :eof,
|
108
|
+
?\x1a => :eof,
|
103
109
|
|
104
110
|
"[({" => :open_brace,
|
105
111
|
"])}" => :close_brace,
|
@@ -108,41 +114,90 @@ class RubyLexer
|
|
108
114
|
?# => :comment
|
109
115
|
}
|
110
116
|
|
111
|
-
attr_reader :incomplete_here_tokens, :parsestack
|
117
|
+
attr_reader :incomplete_here_tokens, :parsestack, :last_token_maybe_implicit
|
112
118
|
|
113
119
|
|
114
120
|
#-----------------------------------
|
115
|
-
def initialize(filename,file,linenum=1)
|
116
|
-
|
121
|
+
def initialize(filename,file,linenum=1,offset_adjust=0)
|
122
|
+
@offset_adjust=0 #set again in next line
|
123
|
+
super(filename,file, linenum,offset_adjust)
|
117
124
|
@start_linenum=linenum
|
118
125
|
@parsestack=[TopLevelContext.new]
|
119
|
-
@incomplete_here_tokens=[]
|
126
|
+
@incomplete_here_tokens=[] #not used anymore
|
127
|
+
@pending_here_bodies=[]
|
120
128
|
@localvars_stack=[SymbolTable.new]
|
121
129
|
@defining_lvar=nil
|
122
130
|
@in_def_name=false
|
131
|
+
@last_operative_token=nil
|
132
|
+
@last_token_maybe_implicit=nil
|
123
133
|
|
124
134
|
@toptable=CharHandler.new(self, :illegal_char, CHARMAPPINGS)
|
125
135
|
|
126
136
|
start_of_line_directives
|
137
|
+
progress_printer
|
138
|
+
end
|
139
|
+
|
140
|
+
def progress_printer
|
141
|
+
return unless ENV['RL_PROGRESS']
|
142
|
+
$stderr.puts 'printing progresses'
|
143
|
+
@progress_thread=Thread.new do
|
144
|
+
until EoiToken===@last_operative_token
|
145
|
+
sleep 10
|
146
|
+
$stderr.puts @file.pos
|
147
|
+
end
|
148
|
+
end
|
127
149
|
end
|
128
150
|
|
129
151
|
def localvars;
|
130
152
|
@localvars_stack.last
|
131
153
|
end
|
132
154
|
|
155
|
+
attr :localvars_stack
|
156
|
+
attr :offset_adjust
|
157
|
+
attr_writer :pending_here_bodies
|
158
|
+
|
159
|
+
#-----------------------------------
|
160
|
+
def set_last_token(tok)
|
161
|
+
@last_operative_token=@last_token_maybe_implicit=tok
|
162
|
+
end
|
163
|
+
|
133
164
|
#-----------------------------------
|
134
165
|
def get1token
|
135
166
|
result=super #most of the action's here
|
136
167
|
|
168
|
+
if ENV['PROGRESS']
|
169
|
+
@last_cp_pos||=0
|
170
|
+
@start_time||=Time.now
|
171
|
+
if result.offset-@last_cp_pos>100000
|
172
|
+
$stderr.puts "#{result.offset} #{Time.now-@start_time}"
|
173
|
+
@last_cp_pos=result.offset
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
137
177
|
#now cleanup and housekeeping
|
138
178
|
|
139
179
|
|
140
180
|
#check for bizarre token types
|
141
181
|
case result
|
182
|
+
when ImplicitParamListStartToken, ImplicitParamListEndToken
|
183
|
+
@last_token_maybe_implicit=result
|
184
|
+
result
|
142
185
|
when StillIgnoreToken#,nil
|
143
186
|
result
|
187
|
+
when StringToken
|
188
|
+
set_last_token result
|
189
|
+
assert !(IgnoreToken===@last_operative_token)
|
190
|
+
result.elems.map!{|frag|
|
191
|
+
if String===frag
|
192
|
+
result.translate_escapes(frag)
|
193
|
+
else
|
194
|
+
frag
|
195
|
+
end
|
196
|
+
} if AUTO_UNESCAPE_STRINGS
|
197
|
+
result
|
198
|
+
|
144
199
|
when Token#,String
|
145
|
-
|
200
|
+
set_last_token result
|
146
201
|
assert !(IgnoreToken===@last_operative_token)
|
147
202
|
result
|
148
203
|
else
|
@@ -150,6 +205,20 @@ class RubyLexer
|
|
150
205
|
end
|
151
206
|
end
|
152
207
|
|
208
|
+
#-----------------------------------
|
209
|
+
def eof?
|
210
|
+
super or EoiToken===@last_operative_token
|
211
|
+
end
|
212
|
+
|
213
|
+
#-----------------------------------
|
214
|
+
def input_position
|
215
|
+
super+@offset_adjust
|
216
|
+
end
|
217
|
+
|
218
|
+
#-----------------------------------
|
219
|
+
def input_position_raw
|
220
|
+
@file.pos
|
221
|
+
end
|
153
222
|
|
154
223
|
#-----------------------------------
|
155
224
|
def balanced_braces?
|
@@ -163,7 +232,7 @@ class RubyLexer
|
|
163
232
|
s=eat_next_if(?$) or return nil
|
164
233
|
|
165
234
|
if t=((identifier_as_string(?$) or special_global))
|
166
|
-
s<<t
|
235
|
+
s << t
|
167
236
|
else error= "missing $id name"
|
168
237
|
end
|
169
238
|
|
@@ -173,17 +242,27 @@ class RubyLexer
|
|
173
242
|
#-----------------------------------
|
174
243
|
def at_identifier(ch=nil)
|
175
244
|
result = (eat_next_if(?@) or return nil)
|
176
|
-
result << (eat_next_if(?@)or'')
|
245
|
+
result << (eat_next_if(?@) or '')
|
177
246
|
if t=identifier_as_string(?@)
|
178
|
-
result<<t
|
247
|
+
result << t
|
179
248
|
else error= "missing @id name"
|
180
249
|
end
|
181
|
-
|
250
|
+
result=VarNameToken.new(result)
|
251
|
+
result.in_def=true if inside_method_def?
|
252
|
+
return lexerror(result,error)
|
182
253
|
end
|
183
254
|
|
184
255
|
private
|
185
256
|
#-----------------------------------
|
186
|
-
def
|
257
|
+
def inside_method_def?
|
258
|
+
@parsestack.reverse_each{|ctx|
|
259
|
+
ctx.starter=='def' and ctx.state!=:saw_def and return true
|
260
|
+
}
|
261
|
+
return false
|
262
|
+
end
|
263
|
+
|
264
|
+
#-----------------------------------
|
265
|
+
def here_spread_over_ruby_code(rl,tok) #not used anymore
|
187
266
|
assert(!rl.incomplete_here_tokens.empty?)
|
188
267
|
@incomplete_here_tokens += rl.incomplete_here_tokens
|
189
268
|
end
|
@@ -207,10 +286,10 @@ private
|
|
207
286
|
end
|
208
287
|
|
209
288
|
#-----------------------------------
|
210
|
-
WSCHARSET=/[#\\\n\s\t\v\r\f]/
|
289
|
+
WSCHARSET=/[#\\\n\s\t\v\r\f\x00\x04\x1a]/
|
211
290
|
def ignored_tokens(allow_eof=false,allow_eol=true)
|
212
291
|
result=[]
|
213
|
-
result
|
292
|
+
result << @moretokens.shift while StillIgnoreToken===@moretokens.first
|
214
293
|
@moretokens.empty? or return result
|
215
294
|
loop do
|
216
295
|
unless @moretokens.empty?
|
@@ -273,8 +352,8 @@ private
|
|
273
352
|
result = ((
|
274
353
|
#order matters here, but it shouldn't
|
275
354
|
#(but til_charset must be last)
|
276
|
-
|
277
|
-
|
355
|
+
eat_if(/-[a-z0-9_]/i,2) or
|
356
|
+
eat_next_if(/[!@&+`'=~\-\/\\,.;<>*"$?:]/) or
|
278
357
|
(?0..?9)===nextchar ? til_charset(/[^\d]/) : nil
|
279
358
|
))
|
280
359
|
end
|
@@ -289,23 +368,26 @@ private
|
|
289
368
|
#just asserts because those contexts are never encountered.
|
290
369
|
#control goes through symbol(<...>,nil)
|
291
370
|
assert( /^[a-z_]$/i===context)
|
292
|
-
assert !(@last_operative_token===/^(\.|::|(un)?def|alias)$/)
|
371
|
+
assert MethNameToken===@last_operative_token || !(@last_operative_token===/^(\.|::|(un)?def|alias)$/)
|
293
372
|
|
294
|
-
@moretokens.unshift(*parse_keywords(str,oldpos) do
|
373
|
+
@moretokens.unshift(*parse_keywords(str,oldpos) do |tok|
|
295
374
|
#if not a keyword,
|
296
375
|
case str
|
297
376
|
when FUNCLIKE_KEYWORDS; #do nothing
|
298
377
|
when VARLIKE_KEYWORDS,RUBYKEYWORDS; raise "shouldnt see keywords here, now"
|
299
378
|
end
|
300
|
-
|
379
|
+
was_last=@last_operative_token
|
380
|
+
@last_operative_token=tok if tok
|
381
|
+
safe_recurse { |a| var_or_meth_name(str,was_last,oldpos,after_nonid_op?{true}) }
|
301
382
|
end)
|
302
383
|
return @moretokens.shift
|
303
384
|
end
|
304
385
|
|
305
386
|
#-----------------------------------
|
387
|
+
IDENTREX={}
|
306
388
|
def identifier_as_string(context)
|
307
389
|
#must begin w/ letter or underscore
|
308
|
-
|
390
|
+
/[_a-z]/i===nextchar.chr or return
|
309
391
|
|
310
392
|
#equals, question mark, and exclamation mark
|
311
393
|
#might be allowed at the end in some contexts.
|
@@ -315,45 +397,16 @@ private
|
|
315
397
|
#i hope i've captured all right conditions....
|
316
398
|
#context should always be ?: right after def, ., and :: now
|
317
399
|
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
str<<til_charset(/[^a-z0-9_]/i)
|
327
|
-
|
328
|
-
#look for ?, !, or =, if allowed
|
329
|
-
case b=getc
|
330
|
-
when nil #means we're at eof
|
331
|
-
#handling nil here prevents b from ever matching
|
332
|
-
#a nil value of maybe_qm, maybe_ex or maybe_eq
|
333
|
-
when maybe_qm
|
334
|
-
str << b
|
335
|
-
when maybe_ex
|
336
|
-
nc=(nextchar unless eof?)
|
337
|
-
#does ex appear to be part of a larger operator?
|
338
|
-
if nc==?= #or nc==?~
|
339
|
-
back1char
|
340
|
-
else
|
341
|
-
str << b
|
342
|
-
end
|
343
|
-
when maybe_eq
|
344
|
-
nc=(nextchar unless eof?)
|
345
|
-
#does eq appear to be part of a larger operator?
|
346
|
-
if nc==?= or nc==?~ or nc==?>
|
347
|
-
back1char
|
348
|
-
else
|
349
|
-
str << b
|
350
|
-
end
|
351
|
-
else
|
352
|
-
back1char
|
353
|
-
end
|
400
|
+
#= and ! only match if not part of a larger operator
|
401
|
+
trailers =
|
402
|
+
case context
|
403
|
+
when ?@,?$ then ""
|
404
|
+
# when ?: then "!(?![=])|\\?|=(?![=~>])"
|
405
|
+
else "!(?![=])|\\?"
|
406
|
+
end
|
407
|
+
@in_def_name||context==?: and trailers<<"|=(?![=~>])"
|
354
408
|
|
355
|
-
|
356
|
-
return str
|
409
|
+
@file.scan(IDENTREX[trailers]||=/^[_a-z][a-z0-9_]*(?:#{trailers})?/i)
|
357
410
|
end
|
358
411
|
|
359
412
|
#-----------------------------------
|
@@ -380,18 +433,26 @@ private
|
|
380
433
|
#a comma has been seen. are we in an
|
381
434
|
#lvalue list or some other construct that uses commas?
|
382
435
|
def comma_in_lvalue_list?
|
383
|
-
@parsestack.last.lhs=
|
436
|
+
@parsestack.last.lhs=
|
437
|
+
case l=@parsestack.last
|
438
|
+
when ListContext:
|
439
|
+
when DefContext: l.in_body
|
440
|
+
else true
|
441
|
+
end
|
384
442
|
end
|
385
443
|
|
386
444
|
#-----------------------------------
|
387
445
|
def in_lvar_define_state
|
388
446
|
#@defining_lvar is a hack
|
389
447
|
@defining_lvar or case ctx=@parsestack.last
|
390
|
-
when ForSMContext; ctx.state==:for
|
391
|
-
when RescueSMContext
|
448
|
+
#when ForSMContext; ctx.state==:for
|
449
|
+
when RescueSMContext
|
450
|
+
@last_operative_token.ident=="=>" and @file.match? /\A[\s\v]*([:;#\n]|then[^a-zA-Z0-9_])/m
|
392
451
|
#when BlockParamListLhsContext; true
|
393
452
|
end
|
394
453
|
end
|
454
|
+
|
455
|
+
IMPLICIT_PARENS_BEFORE_ACCESSOR_ASSIGNMENT=2
|
395
456
|
|
396
457
|
#-----------------------------------
|
397
458
|
#determine if an alphabetic identifier refers to a variable
|
@@ -400,45 +461,50 @@ private
|
|
400
461
|
#if appropriate. adds tok to the
|
401
462
|
#local var table if its a local var being defined for the first time.
|
402
463
|
|
403
|
-
#
|
404
|
-
#
|
405
|
-
|
406
|
-
#
|
407
|
-
#
|
408
|
-
|
409
|
-
def var_or_meth_name(name,lasttok,pos)
|
464
|
+
#in general, operators in ruby are disambuated by the before-but-not-after rule.
|
465
|
+
#an otherwise ambiguous operator is disambiguated by the surrounding whitespace:
|
466
|
+
#whitespace before but not after the 'operator' indicates it is to be considered a
|
467
|
+
#value token instead. otherwise it is a binary operator. (unary (prefix) ops count
|
468
|
+
#as 'values' here.)
|
469
|
+
def var_or_meth_name(name,lasttok,pos,was_after_nonid_op)
|
410
470
|
#look for call site if not a keyword or keyword is function-like
|
411
471
|
#look for and ignore local variable names
|
412
472
|
|
413
473
|
assert String===name
|
414
474
|
|
475
|
+
was_in_lvar_define_state=in_lvar_define_state
|
415
476
|
#maybe_local really means 'maybe local or constant'
|
416
477
|
maybe_local=case name
|
417
|
-
when /[^a-z_0-9]$/i
|
418
|
-
when /^[a-z_]
|
419
|
-
|
478
|
+
when /[^a-z_0-9]$/i #do nothing
|
479
|
+
when /^[a-z_]/
|
480
|
+
(localvars===name or
|
481
|
+
VARLIKE_KEYWORDS===name or
|
482
|
+
was_in_lvar_define_state
|
483
|
+
) and not lasttok===/^(\.|::)$/
|
484
|
+
when /^[A-Z]/
|
485
|
+
is_const=true
|
486
|
+
not lasttok==='.' #this is the right algorithm for constants...
|
420
487
|
end
|
421
488
|
|
422
489
|
assert(@moretokens.empty?)
|
423
490
|
|
424
491
|
oldlast=@last_operative_token
|
425
492
|
|
426
|
-
tok
|
493
|
+
tok=set_last_token assign_lvar_type!(VarNameToken.new(name,pos))
|
427
494
|
|
428
495
|
oldpos= input_position
|
429
496
|
sawnl=false
|
430
497
|
result=ws_toks=ignored_tokens(true) {|nl| sawnl=true }
|
431
498
|
if sawnl || eof?
|
432
|
-
if
|
433
|
-
if
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
lexerror tok,"not a valid variable name: #{name}"
|
439
|
-
end
|
440
|
-
return result.unshift(tok)
|
499
|
+
if was_in_lvar_define_state
|
500
|
+
if /^[a-z_][a-zA-Z_0-9]*$/===name
|
501
|
+
assert !(lasttok===/^(\.|::)$/)
|
502
|
+
localvars[name]=true
|
503
|
+
else
|
504
|
+
lexerror tok,"not a valid variable name: #{name}"
|
441
505
|
end
|
506
|
+
return result.unshift(tok)
|
507
|
+
elsif maybe_local
|
442
508
|
return result.unshift(tok) #if is_const
|
443
509
|
else
|
444
510
|
return result.unshift(
|
@@ -455,6 +521,8 @@ private
|
|
455
521
|
when ?=; not /^=[>=~]$/===readahead(2)
|
456
522
|
when ?,; comma_in_lvalue_list?
|
457
523
|
when ?); last_context_not_implicit.lhs
|
524
|
+
when ?i; /^in[^a-zA-Z_0-9]/===readahead(3) and
|
525
|
+
ForSMContext===last_context_not_implicit
|
458
526
|
when ?>,?<; /^(.)\1=$/===readahead(3)
|
459
527
|
when ?*,?&; /^(.)\1?=/===readahead(3)
|
460
528
|
when ?|; /^\|\|?=/===readahead(3) or
|
@@ -463,8 +531,8 @@ private
|
|
463
531
|
readahead(2)[1] != ?|
|
464
532
|
when ?%,?/,?-,?+,?^; readahead(2)[1]== ?=
|
465
533
|
end
|
466
|
-
if (assignment_coming && !(lasttok===/^(\.|::)$/) or
|
467
|
-
tok=VarNameToken.new(name,pos)
|
534
|
+
if (assignment_coming && !(lasttok===/^(\.|::)$/) or was_in_lvar_define_state)
|
535
|
+
tok=assign_lvar_type! VarNameToken.new(name,pos)
|
468
536
|
if /[^a-z_0-9]$/i===name
|
469
537
|
lexerror tok,"not a valid variable name: #{name}"
|
470
538
|
elsif /^[a-z_]/===name and !(lasttok===/^(\.|::)$/)
|
@@ -476,44 +544,106 @@ private
|
|
476
544
|
implicit_parens_to_emit=
|
477
545
|
if assignment_coming
|
478
546
|
@parsestack.push AssignmentContext.new(nil) if nc==?% or nc==?/
|
479
|
-
|
547
|
+
IMPLICIT_PARENS_BEFORE_ACCESSOR_ASSIGNMENT
|
480
548
|
else
|
481
549
|
case nc
|
482
550
|
when nil: 2
|
483
|
-
when ?!; readahead(2)
|
551
|
+
when ?!; /^![=~]$/===readahead(2) ? 2 : 1
|
552
|
+
when ?d;
|
553
|
+
if /^do([^a-zA-Z0-9_]|$)/===readahead(3)
|
554
|
+
if maybe_local and expecting_do?
|
555
|
+
ty=VarNameToken
|
556
|
+
0
|
557
|
+
else
|
558
|
+
maybe_local=false
|
559
|
+
2
|
560
|
+
end
|
561
|
+
else
|
562
|
+
1
|
563
|
+
end
|
484
564
|
when NEVERSTARTPARAMLISTFIRST
|
485
565
|
(NEVERSTARTPARAMLISTWORDS===readahead(NEVERSTARTPARAMLISTMAXLEN)) ? 2 : 1
|
486
|
-
when ?",?',?`,?a..?z,?A..?Z,?0..?9,?_,?@,?$,?~; 1
|
566
|
+
when ?",?',?`,?a..?z,?A..?Z,?0..?9,?_,?@,?$,?~; 1 #"
|
487
567
|
when ?{
|
488
568
|
maybe_local=false
|
569
|
+
1
|
570
|
+
=begin
|
489
571
|
x=2
|
490
572
|
x-=1 if /\A(return|break|next)\Z/===name and
|
491
573
|
!(KeywordToken===oldlast and oldlast===/\A(\.|::)\Z/)
|
492
574
|
x
|
575
|
+
=end
|
493
576
|
when ?(;
|
494
|
-
maybe_local=false
|
577
|
+
maybe_local=false
|
578
|
+
lastid=lasttok&&lasttok.ident
|
579
|
+
case lastid
|
580
|
+
when /\A[;(]|do\Z/: was_after_nonid_op=false
|
581
|
+
when '|': was_after_nonid_op=false unless BlockParamListLhsContext===@parsestack.last
|
582
|
+
when '{': was_after_nonid_op=false if BlockContext===@parsestack.last or BeginEndContext===@parsestack.last
|
583
|
+
end if KeywordToken===lasttok
|
584
|
+
was_after_nonid_op=false if NewlineToken===lasttok or lasttok.nil?
|
585
|
+
want_parens=!(ws_toks.empty? or was_after_nonid_op) #or
|
586
|
+
# /^(::|rescue|yield|else|case|when|if|unless|until|while|and|or|&&|\|\||[?:]|\.\.?\.?|=>)$/===lastid or
|
587
|
+
# MethNameToken===lasttok or
|
588
|
+
# RUBYNONSYMOPERATORREX===lastid && /=$/===lastid && '!='!=lastid
|
589
|
+
# )
|
590
|
+
|
591
|
+
#look ahead for closing paren (after some whitespace...)
|
592
|
+
want_parens=false if @file.match? /\A.(?:\s|\v|\#.*\n)*\)/
|
593
|
+
# afterparen=@file.pos
|
594
|
+
# getchar
|
595
|
+
# ignored_tokens(true)
|
596
|
+
# want_parens=false if nextchar==?)
|
597
|
+
# @file.pos=afterparen
|
598
|
+
|
599
|
+
want_parens ? 1 : 0
|
495
600
|
when ?},?],?),?;,?^, ?|, ?>, ?,, ?., ?=; 2
|
496
|
-
when ?+, ?-,
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
601
|
+
when ?+, ?-, ?%, ?/
|
602
|
+
if /^(return|break|next)$/===@last_operative_token.ident and not(
|
603
|
+
KeywordToken===lasttok and /^(.|::)$/===lasttok.ident
|
604
|
+
)
|
605
|
+
1
|
606
|
+
else
|
607
|
+
(ws_toks.empty? || readahead(2)[/^.[#{WHSPLF}]/o]) ? 2 : 3
|
608
|
+
end
|
609
|
+
when ?*, ?&
|
610
|
+
lasttok=@last_operative_token
|
611
|
+
if /^(return|break|next)$/===@last_operative_token.ident and not(
|
612
|
+
KeywordToken===lasttok and /^(.|::)$/===lasttok.ident
|
613
|
+
)
|
614
|
+
1
|
615
|
+
else
|
616
|
+
(ws_toks.empty? || readahead(2)[/^.[#{WHSPLF}*&]/o]) ? 2 : 3
|
617
|
+
end
|
618
|
+
when ?:
|
619
|
+
next2=readahead(2)
|
620
|
+
if /^:(?:[#{WHSPLF}]|(:))$/o===next2 then
|
621
|
+
$1 && !ws_toks.empty? ? 3 : 2
|
622
|
+
else
|
623
|
+
3
|
624
|
+
end
|
625
|
+
when ??; next3=readahead(3);
|
626
|
+
/^\?([#{WHSPLF}]|[a-z_][a-z_0-9])/io===next3 ? 2 : 3
|
627
|
+
# when ?:,??; (readahead(2)[/^.[#{WHSPLF}]/o]) ? 2 : 3
|
628
|
+
when ?<; (!ws_toks.empty? && readahead(4)[/^<<-?["'`a-zA-Z_0-9]/]) ? 3 : 2
|
629
|
+
when ?[; ws_toks.empty?&&!(KeywordToken===oldlast and /^(return|break|next)$/===oldlast.ident) ? 2 : 3
|
502
630
|
when ?\\, ?\s, ?\t, ?\n, ?\r, ?\v, ?#; raise 'failure'
|
503
631
|
else raise "unknown char after ident: #{nc=nextchar ? nc.chr : "<<EOF>>"}"
|
504
632
|
end
|
505
633
|
end
|
506
634
|
|
507
|
-
if is_const and implicit_parens_to_emit==3 then
|
635
|
+
if is_const and implicit_parens_to_emit==3 then #needed?
|
508
636
|
implicit_parens_to_emit=1
|
509
637
|
end
|
510
638
|
|
511
|
-
|
639
|
+
if maybe_local and implicit_parens_to_emit>=2
|
512
640
|
implicit_parens_to_emit=0
|
513
|
-
VarNameToken
|
641
|
+
ty=VarNameToken
|
514
642
|
else
|
515
|
-
MethNameToken
|
516
|
-
end
|
643
|
+
ty||=MethNameToken
|
644
|
+
end
|
645
|
+
tok=assign_lvar_type!(ty.new(name,pos))
|
646
|
+
|
517
647
|
|
518
648
|
case implicit_parens_to_emit
|
519
649
|
when 2;
|
@@ -523,8 +653,17 @@ private
|
|
523
653
|
arr,pass=*param_list_coming_with_2_or_more_params?
|
524
654
|
result.push( *arr )
|
525
655
|
unless pass
|
656
|
+
#only 1 param in list
|
526
657
|
result.unshift ImplicitParamListStartToken.new(oldpos)
|
527
|
-
|
658
|
+
last=result.last
|
659
|
+
last.set_callsite! false if last.respond_to? :callsite? and last.callsite? #KeywordToken===last and last.ident==')'
|
660
|
+
if /^(break|next|return)$/===name and
|
661
|
+
!(KeywordToken===lasttok and /^(.|::)$/===lasttok.ident)
|
662
|
+
ty=KWParamListContextNoParen
|
663
|
+
else
|
664
|
+
ty=ParamListContextNoParen
|
665
|
+
end
|
666
|
+
@parsestack.push ty.new(@linenum)
|
528
667
|
end
|
529
668
|
when 0; #do nothing
|
530
669
|
else raise 'invalid value of implicit_parens_to_emit'
|
@@ -547,11 +686,13 @@ private
|
|
547
686
|
result=[get1token]
|
548
687
|
pass=loop{
|
549
688
|
tok=get1token
|
550
|
-
result<<tok
|
689
|
+
result << tok
|
551
690
|
if @parsestack.size==basesize
|
552
691
|
break false
|
553
692
|
elsif ','==tok.to_s and @parsestack.size==basesize+1
|
554
693
|
break true
|
694
|
+
elsif OperatorToken===tok and /^[&*]$/===tok.ident and tok.unary and @parsestack.size==basesize+1
|
695
|
+
break true
|
555
696
|
elsif EoiToken===tok
|
556
697
|
lexerror tok, "unexpected eof in parameter list"
|
557
698
|
end
|
@@ -560,11 +701,13 @@ private
|
|
560
701
|
end
|
561
702
|
|
562
703
|
#-----------------------------------
|
563
|
-
CONTEXT2ENDTOK={
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
|
704
|
+
CONTEXT2ENDTOK={
|
705
|
+
AssignmentRhsContext=>AssignmentRhsListEndToken,
|
706
|
+
ParamListContextNoParen=>ImplicitParamListEndToken,
|
707
|
+
KWParamListContextNoParen=>ImplicitParamListEndToken,
|
708
|
+
WhenParamListContext=>KwParamListEndToken,
|
709
|
+
RescueSMContext=>KwParamListEndToken
|
710
|
+
}
|
568
711
|
def abort_noparens!(str='')
|
569
712
|
#assert @moretokens.empty?
|
570
713
|
result=[]
|
@@ -576,7 +719,63 @@ private
|
|
576
719
|
return result
|
577
720
|
end
|
578
721
|
|
579
|
-
|
722
|
+
#-----------------------------------
|
723
|
+
CONTEXT2ENDTOK_FOR_RESCUE={
|
724
|
+
AssignmentRhsContext=>AssignmentRhsListEndToken,
|
725
|
+
ParamListContextNoParen=>ImplicitParamListEndToken,
|
726
|
+
KWParamListContextNoParen=>ImplicitParamListEndToken,
|
727
|
+
WhenParamListContext=>KwParamListEndToken,
|
728
|
+
RescueSMContext=>KwParamListEndToken
|
729
|
+
}
|
730
|
+
def abort_noparens_for_rescue!(str='')
|
731
|
+
#assert @moretokens.empty?
|
732
|
+
result=[]
|
733
|
+
ctx=@parsestack.last
|
734
|
+
while klass=CONTEXT2ENDTOK_FOR_RESCUE[ctx.class]
|
735
|
+
break if AssignmentRhsContext===ctx && !ctx.multi_assign?
|
736
|
+
if ParamListContextNoParen===ctx && AssignmentRhsContext===@parsestack[-2]
|
737
|
+
result.push ImplicitParamListEndToken.new(input_position-str.length),
|
738
|
+
AssignmentRhsListEndToken.new(input_position-str.length)
|
739
|
+
@parsestack.pop
|
740
|
+
@parsestack.pop
|
741
|
+
break
|
742
|
+
end
|
743
|
+
result << klass.new(input_position-str.length) #unless AssignmentRhsContext===ctx and !ctx.multi_assign?
|
744
|
+
break if RescueSMContext===ctx #why is this here?
|
745
|
+
@parsestack.pop
|
746
|
+
ctx=@parsestack.last
|
747
|
+
end
|
748
|
+
return result
|
749
|
+
end
|
750
|
+
|
751
|
+
#-----------------------------------
|
752
|
+
CONTEXT2ENDTOK_FOR_DO={
|
753
|
+
AssignmentRhsContext=>AssignmentRhsListEndToken,
|
754
|
+
ParamListContextNoParen=>ImplicitParamListEndToken,
|
755
|
+
ExpectDoOrNlContext=>1,
|
756
|
+
#WhenParamListContext=>KwParamListEndToken,
|
757
|
+
#RescueSMContext=>KwParamListEndToken
|
758
|
+
}
|
759
|
+
def abort_noparens_for_do!(str='')
|
760
|
+
#assert @moretokens.empty?
|
761
|
+
result=[]
|
762
|
+
while klass=CONTEXT2ENDTOK_FOR_DO[@parsestack.last.class]
|
763
|
+
break if klass==1
|
764
|
+
result << klass.new(input_position-str.length)
|
765
|
+
@parsestack.pop
|
766
|
+
end
|
767
|
+
return result
|
768
|
+
end
|
769
|
+
|
770
|
+
#-----------------------------------
|
771
|
+
def expecting_do?
|
772
|
+
@parsestack.reverse_each{|ctx|
|
773
|
+
next if AssignmentRhsContext===ctx
|
774
|
+
return !!CONTEXT2ENDTOK_FOR_DO[ctx.class]
|
775
|
+
}
|
776
|
+
return false
|
777
|
+
end
|
778
|
+
|
580
779
|
#-----------------------------------
|
581
780
|
def abort_1_noparen!(offs=0)
|
582
781
|
assert @moretokens.empty?
|
@@ -585,12 +784,12 @@ if false #no longer used
|
|
585
784
|
@parsestack.pop
|
586
785
|
result << AssignmentRhsListEndToken.new(input_position-offs)
|
587
786
|
end
|
588
|
-
ParamListContextNoParen===@parsestack.last or lexerror huh,'{} with no matching callsite'
|
787
|
+
if ParamListContextNoParen===@parsestack.last #or lexerror huh,'{} with no matching callsite'
|
589
788
|
@parsestack.pop
|
590
789
|
result << ImplicitParamListEndToken.new(input_position-offs)
|
790
|
+
end
|
591
791
|
return result
|
592
792
|
end
|
593
|
-
end
|
594
793
|
|
595
794
|
#-----------------------------------
|
596
795
|
#parse keywords now, to prevent confusion over bare symbols
|
@@ -598,6 +797,7 @@ end
|
|
598
797
|
#if arg is not a keyword, the block is called
|
599
798
|
def parse_keywords(str,offset)
|
600
799
|
assert @moretokens.empty?
|
800
|
+
assert !(KeywordToken===@last_operative_token and /A(.|::|def)\Z/===@last_operative_token.ident)
|
601
801
|
result=[KeywordToken.new(str,offset)]
|
602
802
|
|
603
803
|
case str
|
@@ -619,11 +819,15 @@ end
|
|
619
819
|
/^(do)$/===start and localvars.end_block
|
620
820
|
/^(class|module|def)$/===start and @localvars_stack.pop
|
621
821
|
|
622
|
-
when "
|
822
|
+
when "module"
|
623
823
|
result.first.has_end!
|
624
824
|
@parsestack.push WantsEndContext.new(str,@linenum)
|
625
825
|
@localvars_stack.push SymbolTable.new
|
626
826
|
|
827
|
+
when "class"
|
828
|
+
result.first.has_end!
|
829
|
+
@parsestack.push ClassContext.new(str,@linenum)
|
830
|
+
|
627
831
|
when "if","unless" #could be infix form without end
|
628
832
|
if after_nonid_op?{false} #prefix form
|
629
833
|
result.first.has_end!
|
@@ -653,10 +857,11 @@ end
|
|
653
857
|
#expect_do_or_end_or_nl! str #handled by ForSMContext now
|
654
858
|
@parsestack.push ForSMContext.new(@linenum)
|
655
859
|
when "do"
|
656
|
-
result.unshift(*
|
860
|
+
result.unshift(*abort_noparens_for_do!(str))
|
657
861
|
if ExpectDoOrNlContext===@parsestack.last
|
658
862
|
@parsestack.pop
|
659
863
|
assert WantsEndContext===@parsestack.last
|
864
|
+
result.last.as=";"
|
660
865
|
else
|
661
866
|
result.last.has_end!
|
662
867
|
@parsestack.push WantsEndContext.new(str,@linenum)
|
@@ -665,10 +870,10 @@ end
|
|
665
870
|
end
|
666
871
|
when "def"
|
667
872
|
result.first.has_end!
|
668
|
-
@parsestack.push
|
669
|
-
|
873
|
+
@parsestack.push ctx=DefContext.new(@linenum)
|
874
|
+
ctx.state=:saw_def
|
670
875
|
safe_recurse { |aa|
|
671
|
-
|
876
|
+
set_last_token KeywordToken.new "def" #hack
|
672
877
|
result.concat ignored_tokens
|
673
878
|
|
674
879
|
#read an expr like a.b.c or a::b::c
|
@@ -683,10 +888,11 @@ end
|
|
683
888
|
when/^\)$/.token_pat then parencount-=1
|
684
889
|
end
|
685
890
|
EoiToken===tok and lexerror tok, "eof in def header"
|
686
|
-
result<<tok
|
891
|
+
result << tok
|
687
892
|
end until parencount==0 #@parsestack.size==old_size
|
688
|
-
|
689
|
-
|
893
|
+
@localvars_stack.push SymbolTable.new
|
894
|
+
else #no parentheses, all tail
|
895
|
+
set_last_token KeywordToken.new "." #hack hack
|
690
896
|
tokindex=result.size
|
691
897
|
result << tok=symbol(false,false)
|
692
898
|
name=tok.to_s
|
@@ -700,25 +906,30 @@ end
|
|
700
906
|
when /^[a-z_]/; localvars===name
|
701
907
|
when /^[A-Z]/; is_const=true #this is the right algorithm for constants...
|
702
908
|
end
|
703
|
-
|
704
|
-
|
705
|
-
|
909
|
+
result.push( *ignored_tokens(false,false) )
|
910
|
+
nc=nextchar
|
911
|
+
if !ty and maybe_local
|
706
912
|
if nc==?: || nc==?.
|
707
913
|
ty=VarNameToken
|
708
914
|
end
|
709
915
|
end
|
710
|
-
|
711
|
-
|
712
|
-
|
713
|
-
|
714
|
-
|
916
|
+
if ty.nil? or (ty==KeywordToken and nc!=?: and nc!=?.)
|
917
|
+
ty=MethNameToken
|
918
|
+
if nc != ?(
|
919
|
+
endofs=tok.offset+tok.to_s.length
|
920
|
+
newtok=ImplicitParamListStartToken.new(endofs)
|
921
|
+
result.insert tokindex+1, newtok
|
922
|
+
end
|
715
923
|
end
|
716
924
|
|
717
925
|
assert result[tokindex].equal?(tok)
|
718
|
-
|
926
|
+
var=assign_lvar_type! ty.new(tok.to_s,tok.offset)
|
927
|
+
@localvars_stack.push SymbolTable.new
|
928
|
+
var.in_def=true if inside_method_def? and var.respond_to? :in_def=
|
929
|
+
result[tokindex]=var
|
719
930
|
|
720
931
|
|
721
|
-
#if a.b.c.d is seen, a, b
|
932
|
+
#if a.b.c.d is seen, a, b and c
|
722
933
|
#should be considered maybe varname instead of methnames.
|
723
934
|
#the last (d in the example) is always considered a methname;
|
724
935
|
#it's what's being defined.
|
@@ -727,8 +938,7 @@ end
|
|
727
938
|
#a could even be a keyword (eg self or block_given?).
|
728
939
|
end
|
729
940
|
#read tail: .b.c.d etc
|
730
|
-
result.reverse_each{|res| break
|
731
|
-
###@last_operative_token=result.last #naive
|
941
|
+
result.reverse_each{|res| break set_last_token res unless StillIgnoreToken===res}
|
732
942
|
assert !(IgnoreToken===@last_operative_token)
|
733
943
|
state=:expect_op
|
734
944
|
@in_def_name=true
|
@@ -737,12 +947,22 @@ end
|
|
737
947
|
#look for start of parameter list
|
738
948
|
nc=(@moretokens.empty? ? nextchar.chr : @moretokens.first.to_s[0,1])
|
739
949
|
if state==:expect_op and /^[a-z_(&*]/i===nc
|
740
|
-
|
950
|
+
ctx.state=:def_param_list
|
951
|
+
list,listend=def_param_list
|
952
|
+
result.concat list
|
953
|
+
end_index=result.index(listend)
|
954
|
+
ofs=listend.offset
|
955
|
+
if endofs
|
956
|
+
result.insert end_index,ImplicitParamListEndToken.new(ofs)
|
957
|
+
else
|
958
|
+
ofs+=listend.to_s.size
|
959
|
+
end
|
960
|
+
result.insert end_index+1,EndDefHeaderToken.new(ofs)
|
741
961
|
break
|
742
962
|
end
|
743
963
|
|
744
964
|
tok=get1token
|
745
|
-
result<<tok
|
965
|
+
result<< tok
|
746
966
|
case tok
|
747
967
|
when EoiToken
|
748
968
|
lexerror tok,'unexpected eof in def header'
|
@@ -752,9 +972,18 @@ end
|
|
752
972
|
state=:expect_op
|
753
973
|
when /^(\.|::)$/.token_pat
|
754
974
|
lexerror tok,'expected ident' unless state==:expect_op
|
975
|
+
if endofs
|
976
|
+
result.insert -2, ImplicitParamListEndToken.new(endofs)
|
977
|
+
endofs=nil
|
978
|
+
end
|
755
979
|
state=:expect_name
|
756
980
|
when /^(;|end)$/.token_pat, NewlineToken #are we done with def name?
|
981
|
+
ctx.state=:def_body
|
757
982
|
state==:expect_op or lexerror tok,'expected identifier'
|
983
|
+
if endofs
|
984
|
+
result.insert -2,ImplicitParamListEndToken.new(tok.offset)
|
985
|
+
end
|
986
|
+
result.insert -2, EndDefHeaderToken.new(tok.offset)
|
758
987
|
break
|
759
988
|
else
|
760
989
|
lexerror(tok, "bizarre token in def name: " +
|
@@ -765,24 +994,34 @@ end
|
|
765
994
|
}
|
766
995
|
when "alias"
|
767
996
|
safe_recurse { |a|
|
768
|
-
|
997
|
+
set_last_token KeywordToken.new "alias" #hack
|
769
998
|
result.concat ignored_tokens
|
770
999
|
res=symbol(eat_next_if(?:),false)
|
771
|
-
|
772
|
-
|
773
|
-
|
774
|
-
|
775
|
-
|
1000
|
+
unless res
|
1001
|
+
lexerror(result.first,"bad symbol in alias")
|
1002
|
+
else
|
1003
|
+
res.ident[0]==?$ and res=VarNameToken.new(res.ident,res.offset)
|
1004
|
+
result<< res
|
1005
|
+
set_last_token KeywordToken.new "alias" #hack
|
1006
|
+
result.concat ignored_tokens
|
1007
|
+
res=symbol(eat_next_if(?:),false)
|
1008
|
+
unless res
|
1009
|
+
lexerror(result.first,"bad symbol in alias")
|
1010
|
+
else
|
1011
|
+
res.ident[0]==?$ and res=VarNameToken.new(res.ident,res.offset)
|
1012
|
+
result<< res
|
1013
|
+
end
|
1014
|
+
end
|
776
1015
|
}
|
777
1016
|
when "undef"
|
778
1017
|
safe_recurse { |a|
|
779
1018
|
loop do
|
780
|
-
|
1019
|
+
set_last_token KeywordToken.new "," #hack
|
781
1020
|
result.concat ignored_tokens
|
782
1021
|
tok=symbol(eat_next_if(?:),false)
|
783
1022
|
tok or lexerror(result.first,"bad symbol in undef")
|
784
1023
|
result<< tok
|
785
|
-
|
1024
|
+
set_last_token tok
|
786
1025
|
assert !(IgnoreToken===@last_operative_token)
|
787
1026
|
|
788
1027
|
sawnl=false
|
@@ -809,13 +1048,13 @@ end
|
|
809
1048
|
unless after_nonid_op? {false}
|
810
1049
|
#rescue needs to be treated differently when in operator context...
|
811
1050
|
#i think no RescueSMContext should be pushed on the stack...
|
812
|
-
#plus, the rescue token should be marked as infix
|
813
|
-
result.
|
1051
|
+
result.first.set_infix! #plus, the rescue token should be marked as infix
|
1052
|
+
result.unshift(*abort_noparens_for_rescue!(str))
|
814
1053
|
else
|
815
1054
|
result.push KwParamListStartToken.new(offset+str.length)
|
816
1055
|
#corresponding EndToken emitted by abort_noparens! on leaving rescue context
|
817
|
-
result.unshift(*abort_noparens!(str))
|
818
1056
|
@parsestack.push RescueSMContext.new(@linenum)
|
1057
|
+
result.unshift(*abort_noparens!(str))
|
819
1058
|
end
|
820
1059
|
|
821
1060
|
when "then"
|
@@ -831,16 +1070,43 @@ end
|
|
831
1070
|
result.unshift(*abort_noparens!(str))
|
832
1071
|
|
833
1072
|
when /\A(return|break|next)\Z/
|
834
|
-
|
835
|
-
|
1073
|
+
fail if KeywordToken===@last_operative_token and @last_operative_token===/\A(\.|::)\Z/
|
1074
|
+
tok=KeywordToken.new(str,offset)
|
1075
|
+
result=yield tok
|
1076
|
+
result[0]=tok
|
1077
|
+
tok.has_no_block!
|
1078
|
+
|
1079
|
+
|
1080
|
+
when 'END'
|
1081
|
+
#END could be treated, lexically, just as if it is an
|
1082
|
+
#ordinary method, except that local vars created in
|
1083
|
+
#END blocks are visible to subsequent code. (Why??)
|
1084
|
+
#That difference forces a custom parsing.
|
1085
|
+
if @last_operative_token===/^(\.|::)$/
|
1086
|
+
result=yield nil #should pass a keyword token here
|
1087
|
+
else
|
1088
|
+
safe_recurse{
|
1089
|
+
old=result.first
|
1090
|
+
result=[
|
1091
|
+
MethNameToken.new(old.ident,old.offset),
|
1092
|
+
ImplicitParamListStartToken.new(input_position),
|
1093
|
+
ImplicitParamListEndToken.new(input_position),
|
1094
|
+
*ignored_tokens
|
1095
|
+
]
|
1096
|
+
getchar=='{' or lexerror(result.first,"expected { after #{str}")
|
1097
|
+
result.push KeywordToken.new('{',input_position-1)
|
1098
|
+
result.last.set_infix!
|
1099
|
+
@parsestack.push BeginEndContext.new(str,offset)
|
1100
|
+
}
|
1101
|
+
end
|
836
1102
|
|
837
1103
|
when FUNCLIKE_KEYWORDS
|
838
|
-
result=yield
|
1104
|
+
result=yield nil #should be a keyword token
|
839
1105
|
|
840
1106
|
when RUBYKEYWORDS
|
841
1107
|
#do nothing
|
842
1108
|
|
843
|
-
else result=yield
|
1109
|
+
else result=yield nil
|
844
1110
|
|
845
1111
|
end
|
846
1112
|
|
@@ -881,11 +1147,11 @@ end
|
|
881
1147
|
#-----------------------------------
|
882
1148
|
def block_param_list_lookahead
|
883
1149
|
safe_recurse{ |la|
|
884
|
-
|
1150
|
+
set_last_token KeywordToken.new ';'
|
885
1151
|
a=ignored_tokens
|
886
1152
|
|
887
1153
|
if eat_next_if(?|)
|
888
|
-
a<<KeywordToken.new("|", input_position-1)
|
1154
|
+
a<< KeywordToken.new("|", input_position-1)
|
889
1155
|
if true
|
890
1156
|
@parsestack.push mycontext=BlockParamListLhsContext.new(@linenum)
|
891
1157
|
nextchar==?| and a.push NoWsToken.new(input_position)
|
@@ -909,7 +1175,7 @@ else
|
|
909
1175
|
end
|
910
1176
|
|
911
1177
|
tok==='|' and parsestack_lastnonassign_is?(mycontext) and break
|
912
|
-
a<<tok
|
1178
|
+
a<< tok
|
913
1179
|
end
|
914
1180
|
assert@defining_lvar || AssignmentRhsContext===@parsestack.last
|
915
1181
|
@defining_lvar=false
|
@@ -920,14 +1186,14 @@ else
|
|
920
1186
|
@parsestack.last.object_id==mycontext.object_id or raise 'expected my BlockParamListLhsContext atop @parsestack'
|
921
1187
|
@parsestack.pop
|
922
1188
|
|
923
|
-
a<<KeywordToken.new('|',tok.offset)
|
1189
|
+
a<< KeywordToken.new('|',tok.offset)
|
924
1190
|
@moretokens.empty? or
|
925
1191
|
fixme %#moretokens might be set from get1token call above...might be bad#
|
926
1192
|
end
|
927
1193
|
end
|
928
1194
|
end
|
929
1195
|
|
930
|
-
|
1196
|
+
set_last_token KeywordToken.new ';'
|
931
1197
|
#a.concat ignored_tokens
|
932
1198
|
|
933
1199
|
#assert @last_operative_token===';'
|
@@ -948,6 +1214,7 @@ end
|
|
948
1214
|
@in_def_name=false
|
949
1215
|
result=[]
|
950
1216
|
normal_comma_level=old_parsestack_size=@parsestack.size
|
1217
|
+
listend=nil
|
951
1218
|
safe_recurse { |a|
|
952
1219
|
assert(@moretokens.empty?)
|
953
1220
|
assert((not IgnoreToken===@moretokens[0]))
|
@@ -972,18 +1239,22 @@ end
|
|
972
1239
|
alias === call
|
973
1240
|
end
|
974
1241
|
|
975
|
-
|
1242
|
+
set_last_token KeywordToken.new ',' #hack
|
976
1243
|
#read local parameter names
|
1244
|
+
nextvar=nil
|
977
1245
|
loop do
|
978
1246
|
expect_name=(@last_operative_token===',' and
|
979
1247
|
normal_comma_level==@parsestack.size)
|
980
1248
|
expect_name and @defining_lvar||=true
|
981
1249
|
result << tok=get1token
|
982
|
-
lexerror
|
1250
|
+
break lexerror(tok, "unexpected eof in def header") if EoiToken===tok
|
983
1251
|
|
984
1252
|
#break if at end of param list
|
985
|
-
|
986
|
-
|
1253
|
+
if endingblock===tok and old_parsestack_size>=@parsestack.size
|
1254
|
+
nextvar and localvars[nextvar]=true #add nextvar to local vars
|
1255
|
+
listend=tok
|
1256
|
+
break
|
1257
|
+
end
|
987
1258
|
|
988
1259
|
#next token is a local var name
|
989
1260
|
#(or the one after that if unary ops present)
|
@@ -992,33 +1263,40 @@ end
|
|
992
1263
|
case tok
|
993
1264
|
when IgnoreToken #, /^[A-Z]/ #do nothing
|
994
1265
|
when /^,$/.token_pat #hack
|
995
|
-
|
996
|
-
|
1266
|
+
|
997
1267
|
when VarNameToken
|
998
1268
|
assert@defining_lvar
|
999
1269
|
@defining_lvar=false
|
1000
1270
|
assert((not @last_operative_token===','))
|
1271
|
+
# assert !nextvar
|
1272
|
+
nextvar=tok.ident
|
1273
|
+
localvars[nextvar]=false #remove nextvar from list of local vars for now
|
1001
1274
|
when /^[&*]$/.token_pat #unary form...
|
1002
1275
|
#a NoWsToken is also expected... read it now
|
1003
1276
|
result.concat maybe_no_ws_token #not needed?
|
1004
|
-
|
1277
|
+
set_last_token KeywordToken.new ','
|
1005
1278
|
else
|
1006
1279
|
lexerror tok,"unfamiliar var name '#{tok}'"
|
1007
1280
|
end
|
1008
|
-
elsif /^,$/.token_pat===tok
|
1009
|
-
|
1010
|
-
|
1011
|
-
|
1012
|
-
|
1013
|
-
|
1281
|
+
elsif /^,$/.token_pat===tok
|
1282
|
+
if normal_comma_level+1==@parsestack.size and
|
1283
|
+
AssignmentRhsContext===@parsestack.last
|
1284
|
+
#seeing comma here should end implicit rhs started within the param list
|
1285
|
+
result << AssignmentRhsListEndToken.new(tok.offset)
|
1286
|
+
@parsestack.pop
|
1287
|
+
end
|
1288
|
+
if nextvar and normal_comma_level==@parsestack.size
|
1289
|
+
localvars[nextvar]=true #now, finally add nextvar back to local vars
|
1290
|
+
nextvar
|
1291
|
+
end
|
1014
1292
|
end
|
1015
1293
|
end
|
1016
1294
|
|
1017
1295
|
@defining_lvar=false
|
1018
|
-
|
1296
|
+
@parsestack.last.see self,:semi
|
1019
1297
|
|
1020
1298
|
assert(@parsestack.size <= old_parsestack_size)
|
1021
|
-
assert(endingblock[tok])
|
1299
|
+
assert(endingblock[tok] || ErrorToken===tok)
|
1022
1300
|
|
1023
1301
|
#hack: force next token to look like start of a
|
1024
1302
|
#new stmt, if the last ignored_tokens
|
@@ -1026,42 +1304,54 @@ end
|
|
1026
1304
|
#(just in case the next token parsed
|
1027
1305
|
#happens to call quote_expected? or after_nonid_op)
|
1028
1306
|
result.concat ignored_tokens
|
1029
|
-
if nextchar.chr[/[iuw\/<|>+\-*&%?:]/] and
|
1030
|
-
!(NewlineToken===@last_operative_token) and
|
1031
|
-
!(/^(end|;)$/===@last_operative_token)
|
1032
|
-
|
1307
|
+
# if !eof? and nextchar.chr[/[iuw\/<|>+\-*&%?:({]/] and
|
1308
|
+
# !(NewlineToken===@last_operative_token) and
|
1309
|
+
# !(/^(end|;)$/===@last_operative_token)
|
1310
|
+
#result<<EndDefHeaderToken.new(result.last.offset+result.last.to_s.size)
|
1311
|
+
set_last_token KeywordToken.new ';'
|
1033
1312
|
result<< get1token
|
1034
|
-
end
|
1313
|
+
# end
|
1035
1314
|
}
|
1036
1315
|
|
1037
|
-
return result
|
1316
|
+
return result,listend
|
1038
1317
|
end
|
1039
1318
|
|
1040
1319
|
|
1041
1320
|
#-----------------------------------
|
1042
1321
|
#handle % in ruby code. is it part of fancy quote or a modulo operator?
|
1043
1322
|
def percent(ch)
|
1044
|
-
|
1323
|
+
if AssignmentContext===@parsestack.last
|
1324
|
+
@parsestack.pop
|
1325
|
+
op=true
|
1326
|
+
end
|
1327
|
+
|
1328
|
+
if !op and quote_expected?(ch) ||
|
1329
|
+
(@last_operative_token===/^(return|next|break)$/ and KeywordToken===@last_operative_token)
|
1045
1330
|
fancy_quote ch
|
1046
|
-
|
1331
|
+
else
|
1047
1332
|
biop ch
|
1048
|
-
|
1333
|
+
end
|
1049
1334
|
end
|
1050
1335
|
|
1051
1336
|
#-----------------------------------
|
1052
1337
|
#handle * & in ruby code. is unary or binary operator?
|
1053
1338
|
def star_or_amp(ch)
|
1054
|
-
|
1055
|
-
|
1056
|
-
|
1057
|
-
|
1058
|
-
|
1059
|
-
|
1060
|
-
|
1061
|
-
|
1062
|
-
|
1063
|
-
|
1064
|
-
|
1339
|
+
assert('*&'[ch])
|
1340
|
+
want_unary=unary_op_expected?(ch) ||
|
1341
|
+
(@last_operative_token===/^(return|next|break)$/ and KeywordToken===@last_operative_token)
|
1342
|
+
result=quadriop(ch)
|
1343
|
+
if want_unary
|
1344
|
+
#readahead(2)[1..1][/[\s\v#\\]/] or #not needed?
|
1345
|
+
assert OperatorToken===result
|
1346
|
+
result.unary=true #result should distinguish unary+binary *&
|
1347
|
+
WHSPLF[nextchar.chr] or
|
1348
|
+
@moretokens << NoWsToken.new(input_position)
|
1349
|
+
comma_in_lvalue_list?
|
1350
|
+
if ch=='*'
|
1351
|
+
@parsestack.last.see self, :splat
|
1352
|
+
end
|
1353
|
+
end
|
1354
|
+
result
|
1065
1355
|
end
|
1066
1356
|
|
1067
1357
|
#-----------------------------------
|
@@ -1079,15 +1369,23 @@ end
|
|
1079
1369
|
#-----------------------------------
|
1080
1370
|
def regex_or_div(ch)
|
1081
1371
|
#space after slash always means / operator, rather than regex start
|
1082
|
-
|
1083
|
-
|
1084
|
-
|
1085
|
-
|
1086
|
-
|
1087
|
-
|
1088
|
-
|
1089
|
-
|
1090
|
-
|
1372
|
+
#= after slash always means /= operator, rather than regex start
|
1373
|
+
if AssignmentContext===@parsestack.last
|
1374
|
+
@parsestack.pop
|
1375
|
+
op=true
|
1376
|
+
end
|
1377
|
+
|
1378
|
+
if !op and after_nonid_op?{
|
1379
|
+
!is_var_name? and WHSPLF[prevchar] and !readahead(2)[%r{^/[\s\v=]}]
|
1380
|
+
} || (KeywordToken===@last_token_maybe_implicit and @last_token_maybe_implicit.ident=="(")
|
1381
|
+
return regex(ch)
|
1382
|
+
else #/ is operator
|
1383
|
+
result=getchar
|
1384
|
+
if eat_next_if(?=)
|
1385
|
+
result << '='
|
1386
|
+
end
|
1387
|
+
return(operator_or_methname_token result)
|
1388
|
+
end
|
1091
1389
|
end
|
1092
1390
|
|
1093
1391
|
#-----------------------------------
|
@@ -1101,8 +1399,8 @@ end
|
|
1101
1399
|
s=tok.to_s
|
1102
1400
|
case s
|
1103
1401
|
when /[^a-z_0-9]$/i; false
|
1104
|
-
when /^[a-z_]/; localvars===s or VARLIKE_KEYWORDS===s
|
1105
|
-
when /^[A-
|
1402
|
+
# when /^[a-z_]/; localvars===s or VARLIKE_KEYWORDS===s
|
1403
|
+
when /^[A-Z_]/i; VarNameToken===tok
|
1106
1404
|
when /^[@$<]/; true
|
1107
1405
|
else raise "not var or method name: #{s}"
|
1108
1406
|
end
|
@@ -1139,18 +1437,22 @@ end
|
|
1139
1437
|
unless eat_next_if(?:)
|
1140
1438
|
#cancel implicit contexts...
|
1141
1439
|
@moretokens.push(*abort_noparens!(':'))
|
1440
|
+
@moretokens.push KeywordToken.new(':',startpos)
|
1142
1441
|
|
1143
|
-
|
1144
|
-
@parsestack.
|
1145
|
-
|
1146
|
-
TernaryContext===@parsestack.last and @parsestack.pop #should be in the context's see handler
|
1147
|
-
|
1148
|
-
if ExpectDoOrNlContext===@parsestack.last #should be in the context's see handler
|
1442
|
+
case @parsestack.last
|
1443
|
+
when TernaryContext: @parsestack.pop #should be in the context's see handler
|
1444
|
+
when ExpectDoOrNlContext: #should be in the context's see handler
|
1149
1445
|
@parsestack.pop
|
1150
1446
|
assert @parsestack.last.starter[/^(while|until|for)$/]
|
1447
|
+
@moretokens.last.as=";"
|
1448
|
+
when RescueSMContext:
|
1449
|
+
@moretokens.last.as=";"
|
1450
|
+
else @moretokens.last.as="then"
|
1151
1451
|
end
|
1152
1452
|
|
1153
|
-
|
1453
|
+
#end ternary context, if any
|
1454
|
+
@parsestack.last.see self,:colon
|
1455
|
+
|
1154
1456
|
return @moretokens.shift
|
1155
1457
|
end
|
1156
1458
|
|
@@ -1182,9 +1484,15 @@ end
|
|
1182
1484
|
opmatches=readahead(3)[RUBYSYMOPERATORREX]
|
1183
1485
|
result= opmatches ? read(opmatches.size) :
|
1184
1486
|
case nc=nextchar
|
1185
|
-
when ?"
|
1186
|
-
|
1187
|
-
|
1487
|
+
when ?" #"
|
1488
|
+
assert notbare
|
1489
|
+
open=':"'; close='"'
|
1490
|
+
double_quote('"')
|
1491
|
+
when ?' #'
|
1492
|
+
assert notbare
|
1493
|
+
open=":'"; close="'"
|
1494
|
+
single_quote("'")
|
1495
|
+
when ?` then read(1) #`
|
1188
1496
|
when ?@ then at_identifier.to_s
|
1189
1497
|
when ?$ then dollar_identifier.to_s
|
1190
1498
|
when ?_,?a..?z then identifier_as_string(?:)
|
@@ -1197,7 +1505,12 @@ end
|
|
1197
1505
|
result
|
1198
1506
|
else error= "unexpected char starting symbol: #{nc.chr}"
|
1199
1507
|
end
|
1200
|
-
|
1508
|
+
result= lexerror(klass.new(result,start,notbare ? ':' : ''),error)
|
1509
|
+
if open
|
1510
|
+
result.open=open
|
1511
|
+
result.close=close
|
1512
|
+
end
|
1513
|
+
return result
|
1201
1514
|
end
|
1202
1515
|
|
1203
1516
|
def merge_assignment_op_in_setter_callsites?
|
@@ -1211,12 +1524,12 @@ end
|
|
1211
1524
|
opmatches=readahead(3)[RUBYSYMOPERATORREX]
|
1212
1525
|
return [opmatches ? read(opmatches.size) :
|
1213
1526
|
case nc=nextchar
|
1214
|
-
when ?` then read(1)
|
1527
|
+
when ?` then read(1) #`
|
1215
1528
|
when ?_,?a..?z,?A..?Z then
|
1216
1529
|
context=merge_assignment_op_in_setter_callsites? ? ?: : nc
|
1217
1530
|
identifier_as_string(context)
|
1218
1531
|
else
|
1219
|
-
|
1532
|
+
set_last_token KeywordToken.new(';')
|
1220
1533
|
lexerror(tok_to_errify,"unexpected char starting callsite symbol: #{nc.chr}, tok=#{tok_to_errify.inspect}")
|
1221
1534
|
nil
|
1222
1535
|
end, start
|
@@ -1233,20 +1546,63 @@ end
|
|
1233
1546
|
ender=til_charset(/[#{quote}]/)
|
1234
1547
|
(quote==getchar) or
|
1235
1548
|
return lexerror(HerePlaceholderToken.new( dash, quote, ender ), "mismatched quotes in here doc")
|
1549
|
+
quote_real=true
|
1236
1550
|
else
|
1237
1551
|
quote='"'
|
1238
1552
|
ender=til_charset(/[^a-zA-Z0-9_]/)
|
1239
1553
|
ender.length >= 1 or
|
1240
|
-
return lexerror(HerePlaceholderToken.new( dash, quote, ender ), "invalid here header")
|
1554
|
+
return lexerror(HerePlaceholderToken.new( dash, quote, ender, nil ), "invalid here header")
|
1241
1555
|
end
|
1242
1556
|
|
1243
|
-
res= HerePlaceholderToken.new( dash, quote, ender )
|
1557
|
+
res= HerePlaceholderToken.new( dash, quote, ender, quote_real )
|
1558
|
+
if true
|
1559
|
+
res.open=["<<",dash,quote,ender,quote].to_s
|
1560
|
+
procrastinated=til_charset(/[\n]/)#+readnl
|
1561
|
+
unless @base_file
|
1562
|
+
@base_file=@file
|
1563
|
+
@file=Sequence::List.new([@file])
|
1564
|
+
@file.pos=@base_file.pos
|
1565
|
+
end
|
1566
|
+
#actually delete procrastinated from input
|
1567
|
+
@file.delete(input_position_raw-procrastinated.size...input_position_raw)
|
1568
|
+
|
1569
|
+
nl=readnl or return lexerror(res, "here header without body (at eof)")
|
1570
|
+
|
1571
|
+
@moretokens<< res
|
1572
|
+
bodystart=input_position
|
1573
|
+
@offset_adjust = @min_offset_adjust+procrastinated.size
|
1574
|
+
#was: @offset_adjust += procrastinated.size
|
1575
|
+
body=here_body(res)
|
1576
|
+
res.close=body.close
|
1577
|
+
@offset_adjust = @min_offset_adjust
|
1578
|
+
#was: @offset_adjust -= procrastinated.size
|
1579
|
+
bodysize=input_position-bodystart
|
1580
|
+
|
1581
|
+
#one or two already read characters are overwritten here,
|
1582
|
+
#in order to keep offsets correct in the long term
|
1583
|
+
#(at present, offsets and line numbers between
|
1584
|
+
#here header and its body will be wrong. but they should re-sync thereafter.)
|
1585
|
+
newpos=input_position_raw-nl.size
|
1586
|
+
#unless procrastinated.empty?
|
1587
|
+
@file.modify(newpos,nl.size,procrastinated+nl) #vomit procrastinated text back onto input
|
1588
|
+
#end
|
1589
|
+
input_position_set newpos
|
1590
|
+
|
1591
|
+
#line numbers would be wrong within the procrastinated section
|
1592
|
+
@linenum-=1
|
1593
|
+
|
1594
|
+
#be nice to get the here body token at the right place in input, too...
|
1595
|
+
@pending_here_bodies<< body
|
1596
|
+
@offset_adjust-=bodysize#+nl.size
|
1597
|
+
|
1598
|
+
return @moretokens.shift
|
1599
|
+
else
|
1244
1600
|
@incomplete_here_tokens.push res
|
1245
1601
|
|
1246
1602
|
#hack: normally this should just be in get1token
|
1247
1603
|
#this fixup is necessary because the call the get1token below
|
1248
1604
|
#makes a recursion.
|
1249
|
-
|
1605
|
+
set_last_token res
|
1250
1606
|
|
1251
1607
|
safe_recurse { |a|
|
1252
1608
|
assert(a.object_id==@moretokens.object_id)
|
@@ -1269,7 +1625,7 @@ end
|
|
1269
1625
|
|
1270
1626
|
tok=get1token
|
1271
1627
|
assert(a.equal?( @moretokens))
|
1272
|
-
toks<<tok
|
1628
|
+
toks<< tok
|
1273
1629
|
EoiToken===tok and lexerror tok, "here body expected before eof"
|
1274
1630
|
end while res.unsafe_to_use
|
1275
1631
|
assert(a.equal?( @moretokens))
|
@@ -1281,13 +1637,14 @@ end
|
|
1281
1637
|
#the action continues in newline, where
|
1282
1638
|
#the rest of the here token is read after a
|
1283
1639
|
#newline has been seen and res.affix is eventually called
|
1640
|
+
end
|
1284
1641
|
end
|
1285
1642
|
|
1286
1643
|
#-----------------------------------
|
1287
1644
|
def lessthan(ch) #match quadriop('<') or here doc or spaceship op
|
1288
1645
|
case readahead(3)
|
1289
|
-
when /^<<['"`\-a-z0-9_]$/i
|
1290
|
-
if quote_expected?(ch)
|
1646
|
+
when /^<<['"`\-a-z0-9_]$/i #'
|
1647
|
+
if quote_expected?(ch) and not @last_operative_token==='class'
|
1291
1648
|
here_header
|
1292
1649
|
else
|
1293
1650
|
operator_or_methname_token read(2)
|
@@ -1309,101 +1666,231 @@ end
|
|
1309
1666
|
error='illegal escape sequence'
|
1310
1667
|
end
|
1311
1668
|
|
1312
|
-
|
1313
|
-
|
1669
|
+
#optimization: when thru with regurgitated text from a here document,
|
1670
|
+
#revert back to original unadorned Sequence instead of staying in the List.
|
1671
|
+
if @base_file and indices=@file.instance_eval{@start_pos} and
|
1672
|
+
(indices[-2]..indices[-1])===@file.pos
|
1673
|
+
@base_file.pos=@file.pos
|
1674
|
+
@file=@base_file
|
1675
|
+
@base_file=nil
|
1676
|
+
result="\n"
|
1677
|
+
end
|
1678
|
+
|
1679
|
+
@offset_adjust=@min_offset_adjust
|
1680
|
+
@moretokens.push *optional_here_bodies
|
1681
|
+
ln=@linenum
|
1682
|
+
@moretokens.push lexerror(EscNlToken.new(@filename,ln-1,result,input_position-result.size), error),
|
1683
|
+
FileAndLineToken.new(@filename,ln,input_position)
|
1684
|
+
|
1685
|
+
start_of_line_directives
|
1314
1686
|
|
1315
|
-
|
1687
|
+
return @moretokens.shift
|
1316
1688
|
end
|
1317
1689
|
|
1318
1690
|
#-----------------------------------
|
1319
1691
|
def optional_here_bodies
|
1320
|
-
|
1692
|
+
result=[]
|
1693
|
+
if true
|
1321
1694
|
#handle here bodies queued up by previous line
|
1322
|
-
|
1695
|
+
pos=input_position
|
1696
|
+
while body=@pending_here_bodies.shift
|
1697
|
+
#body.offset=pos
|
1698
|
+
result.push EscNlToken.new(@filename,nil,"\n",body.offset-1)
|
1699
|
+
result.push FileAndLineToken.new(@filename,body.ident.line,body.offset)
|
1700
|
+
result.push body
|
1701
|
+
#result.push NoWsToken.new @pending_here_bodies.empty? ? input_position : @pending_here_bodies.first
|
1702
|
+
#result.push FileAndLineToken.new(@filename,@linenum,pos) #position and line num are off
|
1703
|
+
body.headtok.line=@linenum-1
|
1704
|
+
end
|
1705
|
+
else
|
1706
|
+
#...(we should be more compatible with dos/mac style newlines...)
|
1323
1707
|
while tofill=@incomplete_here_tokens.shift
|
1708
|
+
result.push(
|
1709
|
+
here_body(tofill),
|
1710
|
+
FileAndLineToken.new(@filename,@linenum,input_position)
|
1711
|
+
)
|
1712
|
+
assert(eof? || "\r\n"[prevchar])
|
1713
|
+
tofill.line=@linenum-1
|
1714
|
+
end
|
1715
|
+
end
|
1716
|
+
return result
|
1717
|
+
end
|
1718
|
+
|
1719
|
+
#-----------------------------------
|
1720
|
+
def here_body(tofill)
|
1721
|
+
close="\n"
|
1324
1722
|
tofill.string.offset= input_position
|
1723
|
+
linecount=1 #for terminator
|
1724
|
+
assert("\n"==prevchar)
|
1325
1725
|
loop {
|
1326
|
-
assert("\
|
1726
|
+
assert("\n"==prevchar)
|
1327
1727
|
|
1328
1728
|
#here body terminator?
|
1329
|
-
oldpos=
|
1729
|
+
oldpos= input_position_raw
|
1330
1730
|
if tofill.dash
|
1331
|
-
til_charset(/[^#{WHSP}]/o)
|
1731
|
+
close+=til_charset(/[^#{WHSP}]/o)
|
1732
|
+
end
|
1733
|
+
break if eof? #this is an error, should be handled better
|
1734
|
+
if read(tofill.ender.size)==tofill.ender
|
1735
|
+
crs=til_charset(/[^\r]/)||''
|
1736
|
+
if nl=readnl
|
1737
|
+
close+=tofill.ender+crs+nl
|
1738
|
+
break
|
1739
|
+
end
|
1332
1740
|
end
|
1333
|
-
break if eof?
|
1334
|
-
break if read(tofill.ender.size)==tofill.ender and readnl
|
1335
1741
|
input_position_set oldpos
|
1336
1742
|
|
1743
|
+
assert("\n"==prevchar)
|
1744
|
+
|
1337
1745
|
if tofill.quote=="'"
|
1338
|
-
line=til_charset(/[\
|
1339
|
-
|
1746
|
+
line=til_charset(/[\n]/)
|
1747
|
+
unless nl=readnl
|
1748
|
+
assert eof?
|
1749
|
+
break #this is an error, should be handled better
|
1750
|
+
end
|
1751
|
+
line.chomp!("\r")
|
1752
|
+
line<< "\n"
|
1753
|
+
assert("\n"==prevchar)
|
1754
|
+
#line.gsub! "\\\\", "\\"
|
1340
1755
|
tofill.append line
|
1341
|
-
|
1756
|
+
tofill.string.bs_handler=:squote_heredoc_esc_seq
|
1757
|
+
linecount+=1
|
1758
|
+
assert("\n"==line[-1,1])
|
1759
|
+
assert("\n"==prevchar)
|
1342
1760
|
else
|
1343
1761
|
|
1762
|
+
assert("\n"==prevchar)
|
1763
|
+
|
1344
1764
|
back1char #-1 to make newline char the next to read
|
1345
1765
|
@linenum-=1
|
1346
1766
|
|
1767
|
+
assert /[\r\n]/===nextchar.chr
|
1768
|
+
|
1347
1769
|
#retr evrything til next nl
|
1770
|
+
if FASTER_STRING_ESCAPES
|
1771
|
+
line=all_quote("\r\n", tofill.quote, "\r\n")
|
1772
|
+
else
|
1348
1773
|
line=all_quote(INET_NL_REX, tofill.quote, INET_NL_REX)
|
1774
|
+
end
|
1775
|
+
linecount+=1
|
1349
1776
|
#(you didn't know all_quote could take a regex, did you?)
|
1350
1777
|
|
1778
|
+
assert("\n"==prevchar)
|
1779
|
+
|
1351
1780
|
#get rid of fals that otherwise appear to be in the middle of
|
1352
1781
|
#a string (and are emitted out of order)
|
1353
1782
|
fal=@moretokens.pop
|
1354
1783
|
assert FileAndLineToken===fal || fal.nil?
|
1355
1784
|
|
1785
|
+
assert line.bs_handler
|
1786
|
+
tofill.string.bs_handler||=line.bs_handler
|
1787
|
+
|
1788
|
+
tofill.append_token line
|
1789
|
+
tofill.string.elems<<'' unless String===tofill.string.elems.last
|
1790
|
+
|
1791
|
+
assert("\n"==prevchar)
|
1792
|
+
|
1356
1793
|
back1char
|
1357
1794
|
@linenum-=1
|
1358
1795
|
assert("\r\n"[nextchar.chr])
|
1359
|
-
tofill.append_token line
|
1360
1796
|
tofill.append readnl
|
1797
|
+
|
1798
|
+
assert("\n"==prevchar)
|
1361
1799
|
end
|
1800
|
+
|
1801
|
+
assert("\n"==prevchar)
|
1362
1802
|
}
|
1803
|
+
|
1363
1804
|
|
1364
|
-
|
1805
|
+
str=tofill.string
|
1806
|
+
str.bs_handler||=:dquote_esc_seq if str.elems.size==1 and str.elems.first==''
|
1365
1807
|
tofill.unsafe_to_use=false
|
1366
|
-
|
1367
|
-
|
1368
|
-
|
1369
|
-
|
1370
|
-
|
1371
|
-
|
1372
|
-
|
1808
|
+
assert str.bs_handler
|
1809
|
+
#?? or tofill.string.elems==[]
|
1810
|
+
|
1811
|
+
|
1812
|
+
tofill.string.instance_eval{@char="`"} if tofill.quote=="`"
|
1813
|
+
#special cased, but I think that's all that's necessary...
|
1814
|
+
|
1815
|
+
result=tofill.bodyclass.new(tofill,linecount)
|
1816
|
+
result.open=str.open=""
|
1817
|
+
tofill.close=close
|
1818
|
+
result.close=str.close=close[1..-1]
|
1819
|
+
result.offset=str.offset
|
1820
|
+
assert str.open
|
1821
|
+
assert str.close
|
1822
|
+
return result
|
1373
1823
|
end
|
1374
1824
|
|
1375
1825
|
#-----------------------------------
|
1376
1826
|
def newline(ch)
|
1377
1827
|
assert("\r\n"[nextchar.chr])
|
1378
1828
|
|
1379
|
-
|
1380
|
-
|
1381
1829
|
#ordinary newline handling (possibly implicitly escaped)
|
1382
1830
|
assert("\r\n"[nextchar.chr])
|
1383
1831
|
assert !@parsestack.empty?
|
1384
1832
|
assert @moretokens.empty?
|
1385
|
-
result=if NewlineToken===@last_operative_token or #hack
|
1386
|
-
@last_operative_token===/^(;|begin|do|#{INNERBOUNDINGWORDS})$/ or #hack
|
1387
|
-
!after_nonid_op?{false}
|
1388
|
-
then #hack-o-rama: probly cases left out above
|
1389
|
-
a= abort_noparens!
|
1390
|
-
ExpectDoOrNlContext===@parsestack.last and @parsestack.pop
|
1391
|
-
assert !@parsestack.empty?
|
1392
|
-
@parsestack.last.see self,:semi
|
1393
|
-
|
1394
|
-
a << super(ch)
|
1395
|
-
@moretokens.replace a+@moretokens
|
1396
|
-
@moretokens.shift
|
1397
|
-
else
|
1398
|
-
offset= input_position
|
1399
|
-
nl=readnl
|
1400
|
-
@moretokens << FileAndLineToken.new(@filename,@linenum,input_position)
|
1401
|
-
EscNlToken.new(@filename,@linenum-1,nl,offset)
|
1402
|
-
#WsToken.new ' ' #why? #should be "\\\n" ?
|
1403
|
-
end
|
1404
1833
|
|
1405
|
-
|
1834
|
+
pre=FileAndLineToken.new(@filename,@linenum+1,input_position)
|
1835
|
+
pre.allow_ooo_offset=true
|
1836
|
+
|
1837
|
+
if NewlineToken===@last_operative_token or #hack
|
1838
|
+
(KeywordToken===@last_operative_token and
|
1839
|
+
@last_operative_token.ident=="rescue" and
|
1840
|
+
!@last_operative_token.infix?) or
|
1841
|
+
#/^(;|begin|do|#{INNERBOUNDINGWORDS})$/ or #hack
|
1842
|
+
!after_nonid_op?{false}
|
1843
|
+
then #hack-o-rama: probly cases left out above
|
1844
|
+
@offset_adjust=@min_offset_adjust
|
1845
|
+
a= abort_noparens!
|
1846
|
+
ExpectDoOrNlContext===@parsestack.last and @parsestack.pop
|
1847
|
+
assert !@parsestack.empty?
|
1848
|
+
@parsestack.last.see self,:semi
|
1849
|
+
|
1850
|
+
a << super(ch)
|
1851
|
+
@moretokens.replace a+@moretokens
|
1852
|
+
else
|
1853
|
+
@offset_adjust=@min_offset_adjust
|
1854
|
+
offset= input_position
|
1855
|
+
nl=readnl
|
1856
|
+
@moretokens.push EscNlToken.new(@filename,@linenum-1,nl,offset),
|
1857
|
+
FileAndLineToken.new(@filename,@linenum,input_position)
|
1858
|
+
end
|
1859
|
+
|
1860
|
+
#optimization: when thru with regurgitated text from a here document,
|
1861
|
+
#revert back to original unadorned Sequence instead of staying in the list.
|
1862
|
+
if @base_file and indices=@file.instance_eval{@start_pos} and
|
1863
|
+
(indices[-2]..indices[-1])===@file.pos and Sequence::SubSeq===@file.list.last
|
1864
|
+
@base_file.pos=@file.pos
|
1865
|
+
@file=@base_file
|
1866
|
+
@base_file=nil
|
1867
|
+
end
|
1868
|
+
|
1869
|
+
fal=@moretokens.last
|
1870
|
+
assert FileAndLineToken===fal
|
1871
|
+
|
1872
|
+
@offset_adjust=@min_offset_adjust
|
1873
|
+
|
1874
|
+
@moretokens.unshift(*optional_here_bodies)
|
1875
|
+
result=@moretokens.shift
|
1876
|
+
|
1877
|
+
#adjust line count in fal to account for newlines in here bodys
|
1878
|
+
i=@moretokens.size-1
|
1879
|
+
while(i>=0)
|
1880
|
+
#assert FileAndLineToken===@moretokens[i]
|
1881
|
+
i-=1 if FileAndLineToken===@moretokens[i]
|
1882
|
+
break unless HereBodyToken===@moretokens[i]
|
1883
|
+
pre_fal=true
|
1884
|
+
fal.line-=@moretokens[i].linecount
|
1406
1885
|
|
1886
|
+
i-=1
|
1887
|
+
end
|
1888
|
+
|
1889
|
+
if pre_fal
|
1890
|
+
@moretokens.unshift result
|
1891
|
+
pre.offset=result.offset
|
1892
|
+
result=pre
|
1893
|
+
end
|
1407
1894
|
start_of_line_directives
|
1408
1895
|
|
1409
1896
|
return result
|
@@ -1424,15 +1911,16 @@ end
|
|
1424
1911
|
|
1425
1912
|
begin
|
1426
1913
|
eof? and raise "eof before =end"
|
1427
|
-
more<<til_charset(/[\r\n]/)
|
1428
|
-
|
1914
|
+
more<< til_charset(/[\r\n]/)
|
1915
|
+
eof? and raise "eof before =end"
|
1916
|
+
more<< readnl
|
1429
1917
|
end until readahead(EQENDLENGTH)==EQEND
|
1430
1918
|
|
1431
1919
|
#read rest of line after =end
|
1432
1920
|
more << til_charset(/[\r\n]/)
|
1433
|
-
assert((?\r===nextchar or ?\n===nextchar))
|
1921
|
+
assert((eof? or ?\r===nextchar or ?\n===nextchar))
|
1434
1922
|
assert !(/[\r\n]/===more[-1,1])
|
1435
|
-
more<< readnl
|
1923
|
+
more<< readnl unless eof?
|
1436
1924
|
|
1437
1925
|
# newls= more.scan(/\r\n?|\n\r?/)
|
1438
1926
|
# @linenum+= newls.size
|
@@ -1445,7 +1933,7 @@ end
|
|
1445
1933
|
#handle __END__
|
1446
1934
|
if ENDMARKER===readahead(ENDMARKERLENGTH)
|
1447
1935
|
assert !(ImplicitContext===@parsestack.last)
|
1448
|
-
@moretokens.unshift endoffile_detected(read(
|
1936
|
+
@moretokens.unshift endoffile_detected(read(ENDMARKERLENGTH))
|
1449
1937
|
# input_position_set @file.size
|
1450
1938
|
end
|
1451
1939
|
end
|
@@ -1460,11 +1948,13 @@ end
|
|
1460
1948
|
def unary_op_expected?(ch) #yukko hack
|
1461
1949
|
'*&='[readahead(2)[1..1]] and return false
|
1462
1950
|
|
1951
|
+
return true if KeywordToken===@last_operative_token and @last_operative_token==='for'
|
1952
|
+
|
1463
1953
|
after_nonid_op? {
|
1464
1954
|
#possible func-call as operator
|
1465
1955
|
|
1466
1956
|
not is_var_name? and
|
1467
|
-
WHSPLF[prevchar]
|
1957
|
+
WHSPLF[prevchar] and !WHSPLF[readahead(2)[1..1]]
|
1468
1958
|
}
|
1469
1959
|
end
|
1470
1960
|
|
@@ -1473,11 +1963,6 @@ end
|
|
1473
1963
|
# <<, %, ? in ruby
|
1474
1964
|
#returns whether current token is to be the start of a literal
|
1475
1965
|
def quote_expected?(ch) #yukko hack
|
1476
|
-
if AssignmentContext===@parsestack.last
|
1477
|
-
@parsestack.pop
|
1478
|
-
return false
|
1479
|
-
end
|
1480
|
-
|
1481
1966
|
case ch[0]
|
1482
1967
|
when ?? then readahead(2)[/^\?[#{WHSPLF}]$/o] #not needed?
|
1483
1968
|
when ?% then readahead(3)[/^%([a-pt-vyzA-PR-VX-Z]|[QqrswWx][a-zA-Z0-9])/]
|
@@ -1500,17 +1985,23 @@ end
|
|
1500
1985
|
#used to resolve the ambiguity of
|
1501
1986
|
# <<, %, /, ?, :, and newline (among others) in ruby
|
1502
1987
|
def after_nonid_op?
|
1988
|
+
|
1989
|
+
#this is how it should be, I think, and then no handlers for methnametoken and FUNCLIKE_KEYWORDS are needed
|
1990
|
+
# if ImplicitParamListStartToken===@last_token_including_implicit
|
1991
|
+
# huh return true
|
1992
|
+
# end
|
1503
1993
|
case @last_operative_token
|
1504
|
-
when MethNameToken, FUNCLIKE_KEYWORDS.token_pat
|
1994
|
+
when VarNameToken , MethNameToken, FUNCLIKE_KEYWORDS.token_pat
|
1505
1995
|
#VarNameToken should really be left out of this case...
|
1506
1996
|
#should be in next branch instread
|
1507
1997
|
#callers all check for last token being not a variable if they pass anything
|
1508
|
-
#but {false} in the block
|
1998
|
+
#but {false} in the block
|
1999
|
+
#(hmmm... some now have true or other non-varname checks in them... could these be bugs?)
|
1509
2000
|
return yield
|
1510
2001
|
when StringToken, SymbolToken, NumberToken, HerePlaceholderToken,
|
1511
2002
|
%r{^(
|
1512
|
-
|
1513
|
-
__FILE__|__LINE__|[\})\]]
|
2003
|
+
end|self|true|false|nil|
|
2004
|
+
__FILE__|__LINE__|[\})\]]
|
1514
2005
|
)$}x.token_pat
|
1515
2006
|
#dunno about def/undef
|
1516
2007
|
#maybe class/module shouldn't he here either?
|
@@ -1522,17 +2013,16 @@ end
|
|
1522
2013
|
#assert(@last_operative_token==$&) #disabled 'cause $& is now always nil :(
|
1523
2014
|
return true
|
1524
2015
|
when NewlineToken, nil, #nil means we're still at beginning of file
|
1525
|
-
/^([({\[]|or|not|and|if|unless|then|elsif|else|
|
1526
|
-
while|until|begin|for|in|case|when|ensure)$
|
2016
|
+
/^([({\[]|or|not|and|if|unless|then|elsif|else|class|module|def|
|
2017
|
+
while|until|begin|for|in|case|when|ensure|defined\?)$
|
1527
2018
|
/x.token_pat
|
1528
2019
|
return true
|
1529
|
-
|
1530
|
-
|
2020
|
+
when KeywordToken
|
2021
|
+
return true if /^(alias|undef)$/===@last_operative_token.ident #is this ever actually true???
|
1531
2022
|
when IgnoreToken
|
1532
2023
|
raise "last_operative_token shouldn't be ignoreable"
|
1533
|
-
else
|
1534
|
-
raise "after_nonid_op? after #{@last_operative_token}:#{@last_operative_token.class} -- now what"
|
1535
2024
|
end
|
2025
|
+
raise "after_nonid_op? after #{@last_operative_token}:#{@last_operative_token.class} -- now what"
|
1536
2026
|
end
|
1537
2027
|
|
1538
2028
|
|
@@ -1577,10 +2067,10 @@ end
|
|
1577
2067
|
|
1578
2068
|
#-----------------------------------
|
1579
2069
|
def biop(ch) #match /%=?/ (% or %=)
|
1580
|
-
assert(ch[/^[
|
2070
|
+
assert(ch[/^[%^]$/])
|
1581
2071
|
result=getchar
|
1582
2072
|
if eat_next_if(?=)
|
1583
|
-
result
|
2073
|
+
result << ?=
|
1584
2074
|
end
|
1585
2075
|
return operator_or_methname_token( result)
|
1586
2076
|
end
|
@@ -1610,7 +2100,9 @@ end
|
|
1610
2100
|
#fixme: handle +@ and -@ here as well... (currently, this is done in symbol()?)
|
1611
2101
|
def plusminus(ch)
|
1612
2102
|
assert(/^[+\-]$/===ch)
|
1613
|
-
if unary_op_expected?(ch)
|
2103
|
+
if unary_op_expected?(ch) or
|
2104
|
+
KeywordToken===@last_operative_token &&
|
2105
|
+
/^(return|break|next)$/===@last_operative_token.ident
|
1614
2106
|
if (?0..?9)===readahead(2)[1]
|
1615
2107
|
return number(ch)
|
1616
2108
|
else #unary operator
|
@@ -1619,7 +2111,6 @@ end
|
|
1619
2111
|
@moretokens << NoWsToken.new(input_position)
|
1620
2112
|
result=(operator_or_methname_token result)
|
1621
2113
|
result.unary=true
|
1622
|
-
#todo: result should distinguish unary+binary +-
|
1623
2114
|
end
|
1624
2115
|
else #binary operator
|
1625
2116
|
assert(! want_op_name)
|
@@ -1628,9 +2119,8 @@ end
|
|
1628
2119
|
result << ?=
|
1629
2120
|
end
|
1630
2121
|
result=(operator_or_methname_token result)
|
1631
|
-
#todo: result should distinguish unary+binary +-
|
1632
2122
|
end
|
1633
|
-
result
|
2123
|
+
return result
|
1634
2124
|
end
|
1635
2125
|
|
1636
2126
|
#-----------------------------------
|
@@ -1642,19 +2132,31 @@ end
|
|
1642
2132
|
str << c
|
1643
2133
|
result= operator_or_methname_token( str,offset)
|
1644
2134
|
case c
|
1645
|
-
when '=':
|
2135
|
+
when '=': #===,==
|
2136
|
+
str<< (eat_next_if(?=)or'')
|
1646
2137
|
|
1647
|
-
when '>':
|
2138
|
+
when '>': #=>
|
1648
2139
|
unless ParamListContextNoParen===@parsestack.last
|
1649
2140
|
@moretokens.unshift result
|
1650
2141
|
@moretokens.unshift( *abort_noparens!("=>"))
|
1651
2142
|
result=@moretokens.shift
|
1652
2143
|
end
|
1653
2144
|
@parsestack.last.see self,:arrow
|
1654
|
-
when '': #record local variable definitions
|
1655
|
-
|
2145
|
+
when '': #plain assignment: record local variable definitions
|
2146
|
+
last_context_not_implicit.lhs=false
|
2147
|
+
@moretokens.push *ignored_tokens(true).map{|x|
|
2148
|
+
NewlineToken===x ? EscNlToken.new(@filename,@linenum,x.ident,x.offset) : x
|
2149
|
+
}
|
1656
2150
|
@parsestack.push AssignmentRhsContext.new(@linenum)
|
1657
|
-
|
2151
|
+
if eat_next_if ?*
|
2152
|
+
tok=OperatorToken.new('*', input_position-1)
|
2153
|
+
tok.unary=true
|
2154
|
+
@moretokens.push tok
|
2155
|
+
WHSPLF[nextchar.chr] or
|
2156
|
+
@moretokens << NoWsToken.new(input_position)
|
2157
|
+
comma_in_lvalue_list? #is this needed?
|
2158
|
+
end
|
2159
|
+
@moretokens.push AssignmentRhsListStartToken.new( input_position)
|
1658
2160
|
end
|
1659
2161
|
return result
|
1660
2162
|
end
|
@@ -1666,6 +2168,7 @@ end
|
|
1666
2168
|
k=eat_next_if(/[~=]/)
|
1667
2169
|
if k
|
1668
2170
|
result+=k
|
2171
|
+
elsif eof?: #do nothing
|
1669
2172
|
else
|
1670
2173
|
WHSPLF[nextchar.chr] or
|
1671
2174
|
@moretokens << NoWsToken.new(input_position)
|
@@ -1693,10 +2196,11 @@ end
|
|
1693
2196
|
#-----------------------------------
|
1694
2197
|
def dot_rhs(prevtok)
|
1695
2198
|
safe_recurse { |a|
|
1696
|
-
|
2199
|
+
set_last_token prevtok
|
1697
2200
|
aa= ignored_tokens
|
2201
|
+
was=after_nonid_op?{true}
|
1698
2202
|
tok,pos=callsite_symbol(prevtok)
|
1699
|
-
tok and aa.push(*var_or_meth_name(tok,prevtok,pos))
|
2203
|
+
tok and aa.push(*var_or_meth_name(tok,prevtok,pos,was))
|
1700
2204
|
a.unshift(*aa)
|
1701
2205
|
}
|
1702
2206
|
end
|
@@ -1705,7 +2209,7 @@ end
|
|
1705
2209
|
def back_quote(ch=nil)
|
1706
2210
|
if @last_operative_token===/^(def|::|\.)$/
|
1707
2211
|
oldpos= input_position
|
1708
|
-
MethNameToken.new(eat_next_if(?`), oldpos)
|
2212
|
+
MethNameToken.new(eat_next_if(?`), oldpos) #`
|
1709
2213
|
else
|
1710
2214
|
double_quote(ch)
|
1711
2215
|
end
|
@@ -1716,7 +2220,7 @@ if false
|
|
1716
2220
|
def comment(str)
|
1717
2221
|
result=""
|
1718
2222
|
#loop{
|
1719
|
-
result<<super(nil).to_s
|
2223
|
+
result<< super(nil).to_s
|
1720
2224
|
|
1721
2225
|
if /^\#.*\#$/===result #if comment was ended by a crunch
|
1722
2226
|
|
@@ -1762,7 +2266,7 @@ end
|
|
1762
2266
|
tokch= NoWsToken.new(input_position-1)
|
1763
2267
|
end
|
1764
2268
|
when '('
|
1765
|
-
lasttok=last_operative_token
|
2269
|
+
lasttok=last_token_maybe_implicit #last_operative_token
|
1766
2270
|
#could be: lasttok===/^[a-z_]/i
|
1767
2271
|
if (VarNameToken===lasttok or MethNameToken===lasttok or
|
1768
2272
|
lasttok===FUNCLIKE_KEYWORDS)
|
@@ -1781,15 +2285,17 @@ end
|
|
1781
2285
|
if after_nonid_op?{false} or @last_operative_token.has_no_block?
|
1782
2286
|
@parsestack.push ListImmedContext.new(ch,@linenum) #that is, a hash
|
1783
2287
|
else
|
2288
|
+
#abort_noparens!
|
1784
2289
|
tokch.set_infix!
|
1785
|
-
=
|
2290
|
+
tokch.as="do"
|
2291
|
+
#=begin not needed now, i think
|
1786
2292
|
# 'need to find matching callsite context and end it if implicit'
|
1787
2293
|
lasttok=last_operative_token
|
1788
|
-
|
2294
|
+
if !(lasttok===')' and lasttok.callsite?) #or ParamListContextNoParen===parsestack.last
|
1789
2295
|
@moretokens.push *(abort_1_noparen!(1).push tokch)
|
1790
2296
|
tokch=@moretokens.shift
|
1791
2297
|
end
|
1792
|
-
|
2298
|
+
#=end
|
1793
2299
|
|
1794
2300
|
localvars.start_block
|
1795
2301
|
@parsestack.push BlockContext.new(@linenum)
|
@@ -1811,13 +2317,18 @@ end
|
|
1811
2317
|
end
|
1812
2318
|
ctx=@parsestack.pop
|
1813
2319
|
origch,line=ctx.starter,ctx.linenum
|
1814
|
-
ch
|
2320
|
+
if ch!=PAIRS[origch]
|
2321
|
+
#kw.extend MismatchedBrace
|
1815
2322
|
lexerror kw,"mismatched braces: #{origch}#{ch}\n" +
|
1816
2323
|
"matching brace location", @filename, line
|
1817
|
-
|
2324
|
+
end
|
2325
|
+
if BlockContext===ctx
|
2326
|
+
localvars.end_block
|
2327
|
+
@moretokens.last.as="end"
|
2328
|
+
end
|
1818
2329
|
if ParamListContext==ctx.class
|
1819
2330
|
assert ch==')'
|
1820
|
-
|
2331
|
+
kw.set_callsite! #not needed?
|
1821
2332
|
end
|
1822
2333
|
return @moretokens.shift
|
1823
2334
|
end
|
@@ -1826,19 +2337,24 @@ end
|
|
1826
2337
|
def eof(ch=nil)
|
1827
2338
|
#this must be the very last character...
|
1828
2339
|
oldpos= input_position
|
1829
|
-
assert(
|
2340
|
+
assert(/\A[\x0\x4\x1a]\Z/===nextchar.chr)
|
1830
2341
|
|
1831
|
-
result
|
2342
|
+
result=@file.read!
|
2343
|
+
# result= "\0#{ignored_tokens(true).delete_if{|t|FileAndLineToken===t}}"
|
1832
2344
|
|
1833
|
-
eof? or
|
1834
|
-
lexerror result,'nul character is not at the end of file'
|
1835
|
-
input_position_set @file.size
|
2345
|
+
# eof? or
|
2346
|
+
# lexerror result,'nul character is not at the end of file'
|
2347
|
+
# input_position_set @file.size
|
1836
2348
|
return(endoffile_detected result)
|
1837
2349
|
end
|
1838
2350
|
|
1839
2351
|
#-----------------------------------
|
1840
2352
|
def endoffile_detected(s='')
|
1841
2353
|
@moretokens.push( *(abort_noparens!.push super(s)))
|
2354
|
+
if @progress_thread
|
2355
|
+
@progress_thread.kill
|
2356
|
+
@progress_thread=nil
|
2357
|
+
end
|
1842
2358
|
result= @moretokens.shift
|
1843
2359
|
balanced_braces? or (lexerror result,"unbalanced braces at eof. parsestack=#{@parsestack.inspect}")
|
1844
2360
|
result
|
@@ -1851,7 +2367,26 @@ end
|
|
1851
2367
|
|
1852
2368
|
#-----------------------------------
|
1853
2369
|
def comma(ch)
|
1854
|
-
single_char_token(ch)
|
2370
|
+
@moretokens.push token=single_char_token(ch)
|
2371
|
+
if AssignmentRhsContext===@parsestack[-1] and
|
2372
|
+
ParamListContext===@parsestack[-2] ||
|
2373
|
+
ParamListContextNoParen===@parsestack[-2] ||
|
2374
|
+
WhenParamListContext===@parsestack[-2] ||
|
2375
|
+
(RescueSMContext===@parsestack[-2] && @parsestack[-2].state==:rescue) ||
|
2376
|
+
(DefContext===@parsestack[-2] && !@parsestack[-2].in_body)
|
2377
|
+
@parsestack.pop
|
2378
|
+
@moretokens.unshift AssignmentRhsListEndToken.new(input_position)
|
2379
|
+
end
|
2380
|
+
token.comma_type=
|
2381
|
+
case @parsestack[-1]
|
2382
|
+
when AssignmentRhsContext: :rhs
|
2383
|
+
when ParamListContext,ParamListContextNoParen: :call
|
2384
|
+
when ListImmedContext: :array
|
2385
|
+
else
|
2386
|
+
:lhs if comma_in_lvalue_list?
|
2387
|
+
end
|
2388
|
+
@parsestack.last.see self,:comma
|
2389
|
+
return @moretokens.shift
|
1855
2390
|
end
|
1856
2391
|
|
1857
2392
|
#-----------------------------------
|
@@ -1872,7 +2407,7 @@ end
|
|
1872
2407
|
assert RUBYOPERATORREX===s
|
1873
2408
|
if RUBYNONSYMOPERATORREX===s
|
1874
2409
|
KeywordToken
|
1875
|
-
elsif
|
2410
|
+
elsif want_op_name
|
1876
2411
|
MethNameToken
|
1877
2412
|
else
|
1878
2413
|
OperatorToken
|
@@ -1882,9 +2417,7 @@ end
|
|
1882
2417
|
#-----------------------------------
|
1883
2418
|
#tokenify_results_of :identifier
|
1884
2419
|
save_offsets_in(*CHARMAPPINGS.values.uniq-[
|
1885
|
-
:symbol_or_op,:open_brace,:whitespace,:exclam,:backquote
|
1886
|
-
|
1887
|
-
|
2420
|
+
:symbol_or_op,:open_brace,:whitespace,:exclam,:backquote
|
1888
2421
|
])
|
1889
2422
|
#save_offsets_in :symbol
|
1890
2423
|
|