rubylexer 0.7.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. data/History.txt +90 -0
  2. data/Manifest.txt +54 -3
  3. data/README.txt +4 -7
  4. data/Rakefile +3 -2
  5. data/lib/rubylexer.rb +856 -323
  6. data/lib/rubylexer/0.7.0.rb +11 -2
  7. data/lib/rubylexer/0.7.1.rb +2 -0
  8. data/lib/rubylexer/charhandler.rb +4 -4
  9. data/lib/rubylexer/context.rb +86 -9
  10. data/lib/rubylexer/rulexer.rb +455 -101
  11. data/lib/rubylexer/token.rb +166 -43
  12. data/lib/rubylexer/tokenprinter.rb +16 -8
  13. data/lib/rubylexer/version.rb +1 -1
  14. data/rubylexer.vpj +98 -0
  15. data/test/code/all_the_gems.rb +33 -0
  16. data/test/code/all_the_raas.rb +226 -0
  17. data/test/code/all_the_rubies.rb +2 -0
  18. data/test/code/deletewarns.rb +19 -1
  19. data/test/code/dumptokens.rb +39 -8
  20. data/test/code/errscan +2 -0
  21. data/test/code/isolate_error.rb +72 -0
  22. data/test/code/lexloop +14 -0
  23. data/test/code/locatetest.rb +150 -8
  24. data/test/code/regression.rb +109 -0
  25. data/test/code/rubylexervsruby.rb +53 -15
  26. data/test/code/strgen.rb +138 -0
  27. data/test/code/tarball.rb +144 -0
  28. data/test/code/testcases.rb +11 -0
  29. data/test/code/tokentest.rb +115 -24
  30. data/test/data/__eof2.rb +1 -0
  31. data/test/data/__eof5.rb +2 -0
  32. data/test/data/__eof6.rb +2 -0
  33. data/test/data/cvtesc.rb +17 -0
  34. data/test/data/g.rb +6 -0
  35. data/test/data/hd0.rb +3 -0
  36. data/test/data/hdateof.rb +2 -0
  37. data/test/data/hdempty.rb +3 -0
  38. data/test/data/hdr.rb +9 -0
  39. data/test/data/hdr_dos.rb +13 -0
  40. data/test/data/hdr_dos2.rb +18 -0
  41. data/test/data/heart.rb +2 -0
  42. data/test/data/here_escnl.rb +25 -0
  43. data/test/data/here_escnl_dos.rb +20 -0
  44. data/test/data/here_squote.rb +3 -0
  45. data/test/data/heremonsters.rb +140 -0
  46. data/test/data/heremonsters.rb.broken +68 -0
  47. data/test/data/heremonsters.rb.broken.save +68 -0
  48. data/test/data/heremonsters_dos.rb +140 -0
  49. data/test/data/heremonsters_dos.rb.broken +68 -0
  50. data/test/data/illegal_oneliners.rb +1 -0
  51. data/test/data/illegal_stanzas.rb +0 -0
  52. data/test/data/make_ws_strdelim.rb +22 -0
  53. data/test/data/maven2_builer_test.rb +82 -0
  54. data/test/data/migration.rb +8944 -0
  55. data/test/data/modl.rb +6 -0
  56. data/test/data/modl_dos.rb +7 -0
  57. data/test/data/modl_fails.rb +10 -0
  58. data/test/data/multilinestring.rb +6 -0
  59. data/test/data/oneliners.rb +555 -0
  60. data/test/data/p-op.rb +2 -0
  61. data/test/data/p.rb +3 -1710
  62. data/test/data/s.rb +90 -21
  63. data/test/data/simple.rb +1 -0
  64. data/test/data/simple_dos.rb +1 -0
  65. data/test/data/stanzas.rb +1194 -0
  66. data/test/data/strdelim_crlf.rb +6 -0
  67. data/test/data/stuff.rb +6 -0
  68. data/test/data/stuff2.rb +5 -0
  69. data/test/data/stuff3.rb +6 -0
  70. data/test/data/stuff4.rb +6 -0
  71. data/test/data/tkweird.rb +20 -0
  72. data/test/data/unending_stuff.rb +5 -0
  73. data/test/data/whatnot.rb +8 -0
  74. data/test/data/ws_strdelim.rb +0 -0
  75. data/test/test.sh +239 -0
  76. data/testing.txt +39 -50
  77. metadata +110 -12
  78. data/test/code/dl_all_gems.rb +0 -43
  79. data/test/code/unpack_all_gems.rb +0 -15
  80. data/test/data/gemlist.txt +0 -280
data/History.txt CHANGED
@@ -1,3 +1,93 @@
1
+ === 0.7.1/10-29-2008
2
+ * 6 Major Enhancements:
3
+ * handling of empty string fragments now more closely mirrors ruby; this resolves many warnings
4
+ * yet more hacks in aid of string inclusions
5
+ * backslashes in strings are no longer interpreted automatically when lexed
6
+ * here documents are completely rewritten in a tricky way that more closely mimics what MRI does
7
+ * many more flags for tokens to tell apart the various cases:
8
+ * the various different local variable types have to be detected.
9
+ * colons which operate like semicolons or thens are marked as such
10
+ * { } used in block now flagged as parsing like do and end
11
+ * commas now are marked with different types depending on how they're used
12
+ * @variables in methods need to be marked as such, so their parsetree can come out different.
13
+ * clearly mark backquoted strings
14
+ * further refinements of local variable detection and implicit paren placement near these cases:
15
+ * when ws between method name and parenthesis
16
+ * break/return/next
17
+ * ? : << / rescue do
18
+
19
+ * 5 Minor Enhancements
20
+ * colon or star in assignment make it a multi assignment
21
+ * presence of unary * or & in param list forces it to be a multi-param list
22
+ * errors in string inclusions should now be handled better
23
+ * string and stringlike tokens now can tell you the exact sequence of chars used to open and close the string.
24
+ * correctly handling more cases where return/break/next parses different than a method (yuck!)
25
+
26
+ * 26 Bugfixes
27
+ * ~ operator can be followed with an @, like + and -
28
+ * ~ is overridable, however :: is not
29
+ * raise is not a keyword
30
+ * in addition to 0x00, 0x04 and 0x1a should be considered eof in ruby. why? idunno.
31
+ * setting PROGRESS env var will cause input file position to be printed to stderr periodically.
32
+ * defined? is not a funclike keyword... really more of a unary operator
33
+ * $- is a legitimate global variable.
34
+ * better parsing of lvalue list following for keyword.
35
+ * rescue is a variable define context only when right after => and before then (or disguises).
36
+ * better placement of implicit parens around def param list
37
+ * (global) variable aliasing now supported
38
+ * local vars in END block are NOT scoped to the block!
39
+ * local vars in def param lists aren't considered variables til after the initializer for that var
40
+ * end of def header is treated like ; even if none is present
41
+ * never put here document right after class keyword
42
+ * look for start of line directives at end of here document
43
+ * oops, mac newlines don't have to be supported
44
+ * dos newlines better tolerated around here documents
45
+ * less line number/offset confusion around here documents
46
+ * newline after (non-operator) rescue is hard (but not after INNERBOUNDINGWORDS)
47
+ * handling eof in more strange places
48
+ * always expect unary op after for
49
+ * unary ops should know about the before-but-not-after rule!
50
+ * newlines after = should be escaped
51
+ * \c? and \C-? are not interpreted the same as other ctrl chars
52
+ * \n\r and \r are not recognized as nl sequences
53
+
54
+ * 18 Internal Changes (not user visible)
55
+ * commas cause a :comma event on the parsestack
56
+ * some of the lists of types of operators are available now as arrays of strings instead of regexps
57
+ * single and double quote now have separate implementations again
58
+ * keep track of whether an implicit open or close paren has just been emitted
59
+ * put ws around << to keep slickedit happy
60
+ * the eof characters are also considered whitespace.
61
+ * identifier lexer now uses regexps more heavily
62
+ * method formal parameter list is not considered an lvalue context for commas.
63
+ * class and def now have their own parse contexts
64
+ * unary star causes a :splat event on the parsestack
65
+ * is_var_name now detects var tokens just from the token type, not looking at local vars table.
66
+ * a faster regexp-based implementation of string scanning
67
+ * moved yucky side effect out of quote_expected?
68
+ * these keywords: class module def for defined? no longer automatically create operator context
69
+ * a new context for BEGIN/END keywords
70
+ * a new context for param list of return/next/break
71
+ * new escape sequence processors for regexp and %W list
72
+ * numbers now scanned with a regexp
73
+
74
+ * 15 Enhancements and bug fixes to tests:
75
+ * just print a notice on errors which are also syntax errors for ruby
76
+ * a little cleanup of temp files
77
+ * rubylexervsruby and tokentest can take input from stdin
78
+ * unlexer improvements
79
+ * dumptokens now has a --silent cmdline option
80
+ * locatetest.rb is significantly enhanced
81
+ * --unified option to diff seems to work better than -u
82
+ * tokentest better verifies exact token contents...
83
+ * tokentest now uses open and close fields of strings to verify string bounds exactly
84
+ * CRLF in a string is always treated like just a LF. (CR is elided.)
85
+ * allow_ooo hacky flag marks tokens whose offset errors are to be ignored.
86
+ * all other offset errors have been downgraded to warnings.
87
+ * most of the offset problem I had been seeing have been fixed, tho
88
+ * offset problems in here head and body, symbol and fal tokens are always ignored (a hack)
89
+ * tokentest has a --loop option, for load testing
90
+
1
91
  === 0.7.0/2-15-2008
2
92
  * implicit tokens are now emitted at the right times (need more test code)
3
93
  * local variables are now temporarily hidden by class, module, and def
data/Manifest.txt CHANGED
@@ -19,7 +19,6 @@ lib/rubylexer/symboltable.rb
19
19
  lib/rubylexer/charhandler.rb
20
20
  lib/assert.rb
21
21
  lib/rubylexer.rb
22
- test/data/gemlist.txt
23
22
  test/data/blockassigntest.rb
24
23
  test/data/for.rb
25
24
  test/data/chunky_bacon.rb
@@ -58,10 +57,62 @@ test/data/chunky_bacon2.rb
58
57
  test/data/format.rb
59
58
  test/code/locatetest.rb
60
59
  test/code/rubylexervsruby.rb
61
- test/code/dl_all_gems.rb
62
- test/code/unpack_all_gems.rb
63
60
  test/code/tokentest.rb
64
61
  test/code/dumptokens.rb
65
62
  test/code/torment
66
63
  test/code/locatetest
67
64
  test/code/deletewarns.rb
65
+ lib/rubylexer/0.7.1.rb
66
+ rubylexer.vpj
67
+ test/code/all_the_gems.rb
68
+ test/code/all_the_raas.rb
69
+ test/code/all_the_rubies.rb
70
+ test/code/errscan
71
+ test/code/isolate_error.rb
72
+ test/code/lexloop
73
+ test/code/regression.rb
74
+ test/code/strgen.rb
75
+ test/code/tarball.rb
76
+ test/code/testcases.rb
77
+ test/data/chunky.plain.rb
78
+ test/data/cvtesc.rb
79
+ test/data/__eof2.rb
80
+ test/data/__eof5.rb
81
+ test/data/__eof6.rb
82
+ test/data/hd0.rb
83
+ test/data/hdateof.rb
84
+ test/data/hdempty.rb
85
+ test/data/hdr_dos2.rb
86
+ test/data/hdr_dos.rb
87
+ test/data/hdr.rb
88
+ test/data/here_escnl_dos.rb
89
+ test/data/here_escnl.rb
90
+ test/data/heremonsters_dos.rb
91
+ test/data/heremonsters_dos.rb.broken
92
+ test/data/heremonsters.rb
93
+ test/data/heremonsters.rb.broken
94
+ test/data/heremonsters.rb.broken.save
95
+ test/data/here_squote.rb
96
+ test/data/illegal_oneliners.rb
97
+ test/data/illegal_stanzas.rb
98
+ test/data/make_ws_strdelim.rb
99
+ test/data/maven2_builer_test.rb
100
+ test/data/migration.rb
101
+ test/data/modl_dos.rb
102
+ test/data/modl_fails.rb
103
+ test/data/modl.rb
104
+ test/data/multilinestring.rb
105
+ test/data/oneliners.rb
106
+ test/data/simple_dos.rb
107
+ test/data/simple.rb
108
+ test/data/stanzas.rb
109
+ test/data/strdelim_crlf.rb
110
+ test/data/stuff2.rb
111
+ test/data/stuff3.rb
112
+ test/data/stuff4.rb
113
+ test/data/stuff.rb
114
+ test/data/tkweird.rb
115
+ test/data/unending_stuff.rb
116
+ test/data/whatnot.rb
117
+ test/data/ws_strdelim.rb
118
+ test/test.sh
data/README.txt CHANGED
@@ -67,10 +67,7 @@ keywords, depending on context:
67
67
  any overrideable operator and most keywords can also be method names
68
68
 
69
69
  == todo
70
- test w/ more code (rubygems, rpa, obfuscated ruby contest, rubicon, others?)
71
- these 5 should be my standard test suite: p.rb, (matz') test.rb, tk.rb, obfuscated ruby contest, rubicon
72
70
  test more ways: cvt source to dos or mac fmt before testing
73
- test more ways: run unit tests after passing thru rubylexer (0.7)
74
71
  test more ways: test require'd, load'd, or eval'd code as well (0.7)
75
72
  lex code a line (or chunk) at a time and save state for next line (irb wants this) (0.8)
76
73
  incremental lexing (ides want this (for performance))
@@ -78,12 +75,10 @@ put everything in a namespace
78
75
  integrate w/ other tools...
79
76
  html colorized output?
80
77
  move more state onto @parsestack (ongoing)
81
- the new cases in p.rb now compile, but won't run
82
78
  expand on test documentation
83
79
  use want_op_name more
84
80
  return result as a half-parsed tree (with parentheses and the like matched)
85
81
  emit advisory tokens when see beginword, then (or equivalent), or end... what else does florian want?
86
- strings are still slow
87
82
  emit advisory tokens when local var defined/goes out of scope (or hidden/unhidden?)
88
83
  token pruning in dumptokens...
89
84
 
@@ -96,8 +91,10 @@ string tokenization sometimes a little different from ruby around newlines
96
91
  string contents might not be correctly translated in a few cases (0.8?)
97
92
  symbols which contain string interpolations are flattened into one token. eg :"foo#{bar}" (0.8)
98
93
  '\r' whitespace sometimes seen in dos-formatted output.. shouldn't be (eg pre.rb) (0.7)
99
- windows or mac newline in source are likely to cause problems in obscure cases (need test case)
94
+ windows newline in source is likely to cause problems in obscure cases (need test case)
100
95
  unterminated =begin is not an error (0.8)
101
96
  ruby 1.9 completely unsupported (0.9)
102
97
  character sets other than ascii are not supported at all (1.0)
103
-
98
+ regression test currently shows 14 errors with differences in exact token ordering
99
+ -around string inclusions. these errors are much less serious than they seem.
100
+ offset of AssignmentRhsListEndToken appears to be off by 1
data/Rakefile CHANGED
@@ -13,12 +13,13 @@ require 'lib/rubylexer/version.rb'
13
13
  hoe=Hoe.new("rubylexer", RubyLexer::VERSION) do |_|
14
14
  _.author = "Caleb Clausen"
15
15
  _.email = "rubylexer-owner @at@ inforadical .dot. net"
16
- _.url = "http://rubylexer.rubyforge.org/"
17
- _.extra_deps = ["sequence"]
16
+ _.url = ["http://rubylexer.rubyforge.org/", "http://rubyforge.org/projects/rubylexer/"]
17
+ _.extra_deps << ['sequence', '>= 0.2.0']
18
18
  _.test_globs=["test/{code/*,data/*rb*,results/}"]
19
19
  _.description=desc
20
20
  _.summary=desc[/\A[^.]+\./]
21
21
  _.spec_extras={:bindir=>''}
22
+ _.rdoc_pattern=/\A(howtouse\.txt|testing\.txt|README\.txt|lib\/.*\.rb)\Z/
22
23
  end
23
24
 
24
25
 
data/lib/rubylexer.rb CHANGED
@@ -1,6 +1,6 @@
1
- =begin copyright
1
+ =begin legal crap
2
2
  rubylexer - a ruby lexer written in ruby
3
- Copyright (C) 2004,2005 Caleb Clausen
3
+ Copyright (C) 2004,2005,2008 Caleb Clausen
4
4
 
5
5
  This library is free software; you can redistribute it and/or
6
6
  modify it under the terms of the GNU Lesser General Public
@@ -18,7 +18,6 @@
18
18
  =end
19
19
 
20
20
 
21
-
22
21
  require 'rubylexer/rulexer' #must be 1st!!!
23
22
  require 'rubylexer/version'
24
23
  require 'rubylexer/token'
@@ -32,9 +31,11 @@ require 'rubylexer/tokenprinter'
32
31
  #-----------------------------------
33
32
  class RubyLexer
34
33
  include NestedContexts
34
+
35
+
35
36
 
36
37
  RUBYSYMOPERATORREX=
37
- %r{^([&|^/%~]|=(==?|~)|>[=>]?|<(<|=>?)?|[+\-]@?|\*\*?|\[\]=?)}
38
+ %r{^([&|^/%]|=(==?)|=~|>[=>]?|<(<|=>?)?|[+~\-]@?|\*\*?|\[\]=?)}
38
39
  # (nasty beastie, eh?)
39
40
  #these are the overridable operators
40
41
  #does not match flow-control operators like: || && ! or and if not
@@ -42,23 +43,25 @@ class RubyLexer
42
43
  #or .. ... ?:
43
44
  #for that use:
44
45
  RUBYNONSYMOPERATORREX=
45
- %r{^([%^~/\-+|&]=|(\|\||&&)=?|(<<|>>|\*\*?)=|\.{1,3}|[?:,;]|=>?|![=~]?)$}
46
+ %r{^([%^/\-+|&]=|(\|\||&&)=?|(<<|>>|\*\*?)=|\.{1,3}|[?:,;]|::|=>?|![=~]?)$}
46
47
  RUBYOPERATORREX=/#{RUBYSYMOPERATORREX}|#{RUBYNONSYMOPERATORREX}/o
47
48
  UNSYMOPS=/^[~!]$/ #always unary
48
49
  UBSYMOPS=/^([*&+-]|::)$/ #ops that could be unary or binary
49
50
  WHSPCHARS=WHSPLF+"\\#"
50
- OPORBEGINWORDS="(if|unless|while|until)"
51
- BEGINWORDS=/^(def|class|module|begin|for|case|do|#{OPORBEGINWORDS})$/o
52
- FUNCLIKE_KEYWORDS=/^(break|next|redo|return|raise|yield|defined\?|retry|super|BEGIN|END)$/
51
+ OPORBEGINWORDLIST=%w(if unless while until)
52
+ BEGINWORDLIST=%w(def class module begin for case do)+OPORBEGINWORDLIST
53
+ OPORBEGINWORDS="(#{OPORBEGINWORDLIST.join '|'})"
54
+ BEGINWORDS=/^(#{BEGINWORDLIST.join '|'})$/o
55
+ FUNCLIKE_KEYWORDS=/^(break|next|redo|return|yield|retry|super|BEGIN|END)$/
53
56
  VARLIKE_KEYWORDS=/^(__FILE__|__LINE__|false|nil|self|true)$/
54
57
  INNERBOUNDINGWORDS="(else|elsif|ensure|in|then|rescue|when)"
55
58
  BINOPWORDS="(and|or)"
56
- NEVERSTARTPARAMLISTWORDS=/^(#{OPORBEGINWORDS}|#{INNERBOUNDINGWORDS}|#{BINOPWORDS}|end)([^a-zA-Z0-9_!?=]|\Z)/o
59
+ NEVERSTARTPARAMLISTWORDS=/\A(#{OPORBEGINWORDS}|#{INNERBOUNDINGWORDS}|#{BINOPWORDS}|end)([^a-zA-Z0-9_!?=]|\Z)/o
57
60
  NEVERSTARTPARAMLISTFIRST=CharSet['aoeitrwu'] #chars that begin NEVERSTARTPARAMLIST
58
61
  NEVERSTARTPARAMLISTMAXLEN=7 #max len of a NEVERSTARTPARAMLIST
59
62
 
60
63
  RUBYKEYWORDS=%r{
61
- ^(alias|#{BINOPWORDS}|not|undef|end|
64
+ ^(alias|#{BINOPWORDS}|defined\?|not|undef|end|
62
65
  #{VARLIKE_KEYWORDS}|#{FUNCLIKE_KEYWORDS}|
63
66
  #{INNERBOUNDINGWORDS}|#{BEGINWORDS}
64
67
  )$
@@ -72,8 +75,9 @@ class RubyLexer
72
75
  ?A..?Z => :identifier,
73
76
  ?_ => :identifier,
74
77
  ?0..?9 => :number,
75
- %{"'} => :double_quote,
76
- ?` => :back_quote,
78
+ ?" => :double_quote, #"
79
+ ?' => :single_quote, #'
80
+ ?` => :back_quote, #`
77
81
 
78
82
  WHSP => :whitespace, #includes \r
79
83
  ?, => :comma,
@@ -99,7 +103,9 @@ class RubyLexer
99
103
  #?\r => :newline, #implicitly escaped after op
100
104
 
101
105
  ?\\ => :escnewline,
102
- ?\0 => :eof,
106
+ ?\x00 => :eof,
107
+ ?\x04 => :eof,
108
+ ?\x1a => :eof,
103
109
 
104
110
  "[({" => :open_brace,
105
111
  "])}" => :close_brace,
@@ -108,41 +114,90 @@ class RubyLexer
108
114
  ?# => :comment
109
115
  }
110
116
 
111
- attr_reader :incomplete_here_tokens, :parsestack
117
+ attr_reader :incomplete_here_tokens, :parsestack, :last_token_maybe_implicit
112
118
 
113
119
 
114
120
  #-----------------------------------
115
- def initialize(filename,file,linenum=1)
116
- super(filename,file, linenum)
121
+ def initialize(filename,file,linenum=1,offset_adjust=0)
122
+ @offset_adjust=0 #set again in next line
123
+ super(filename,file, linenum,offset_adjust)
117
124
  @start_linenum=linenum
118
125
  @parsestack=[TopLevelContext.new]
119
- @incomplete_here_tokens=[]
126
+ @incomplete_here_tokens=[] #not used anymore
127
+ @pending_here_bodies=[]
120
128
  @localvars_stack=[SymbolTable.new]
121
129
  @defining_lvar=nil
122
130
  @in_def_name=false
131
+ @last_operative_token=nil
132
+ @last_token_maybe_implicit=nil
123
133
 
124
134
  @toptable=CharHandler.new(self, :illegal_char, CHARMAPPINGS)
125
135
 
126
136
  start_of_line_directives
137
+ progress_printer
138
+ end
139
+
140
+ def progress_printer
141
+ return unless ENV['RL_PROGRESS']
142
+ $stderr.puts 'printing progresses'
143
+ @progress_thread=Thread.new do
144
+ until EoiToken===@last_operative_token
145
+ sleep 10
146
+ $stderr.puts @file.pos
147
+ end
148
+ end
127
149
  end
128
150
 
129
151
  def localvars;
130
152
  @localvars_stack.last
131
153
  end
132
154
 
155
+ attr :localvars_stack
156
+ attr :offset_adjust
157
+ attr_writer :pending_here_bodies
158
+
159
+ #-----------------------------------
160
+ def set_last_token(tok)
161
+ @last_operative_token=@last_token_maybe_implicit=tok
162
+ end
163
+
133
164
  #-----------------------------------
134
165
  def get1token
135
166
  result=super #most of the action's here
136
167
 
168
+ if ENV['PROGRESS']
169
+ @last_cp_pos||=0
170
+ @start_time||=Time.now
171
+ if result.offset-@last_cp_pos>100000
172
+ $stderr.puts "#{result.offset} #{Time.now-@start_time}"
173
+ @last_cp_pos=result.offset
174
+ end
175
+ end
176
+
137
177
  #now cleanup and housekeeping
138
178
 
139
179
 
140
180
  #check for bizarre token types
141
181
  case result
182
+ when ImplicitParamListStartToken, ImplicitParamListEndToken
183
+ @last_token_maybe_implicit=result
184
+ result
142
185
  when StillIgnoreToken#,nil
143
186
  result
187
+ when StringToken
188
+ set_last_token result
189
+ assert !(IgnoreToken===@last_operative_token)
190
+ result.elems.map!{|frag|
191
+ if String===frag
192
+ result.translate_escapes(frag)
193
+ else
194
+ frag
195
+ end
196
+ } if AUTO_UNESCAPE_STRINGS
197
+ result
198
+
144
199
  when Token#,String
145
- @last_operative_token=result
200
+ set_last_token result
146
201
  assert !(IgnoreToken===@last_operative_token)
147
202
  result
148
203
  else
@@ -150,6 +205,20 @@ class RubyLexer
150
205
  end
151
206
  end
152
207
 
208
+ #-----------------------------------
209
+ def eof?
210
+ super or EoiToken===@last_operative_token
211
+ end
212
+
213
+ #-----------------------------------
214
+ def input_position
215
+ super+@offset_adjust
216
+ end
217
+
218
+ #-----------------------------------
219
+ def input_position_raw
220
+ @file.pos
221
+ end
153
222
 
154
223
  #-----------------------------------
155
224
  def balanced_braces?
@@ -163,7 +232,7 @@ class RubyLexer
163
232
  s=eat_next_if(?$) or return nil
164
233
 
165
234
  if t=((identifier_as_string(?$) or special_global))
166
- s<<t
235
+ s << t
167
236
  else error= "missing $id name"
168
237
  end
169
238
 
@@ -173,17 +242,27 @@ class RubyLexer
173
242
  #-----------------------------------
174
243
  def at_identifier(ch=nil)
175
244
  result = (eat_next_if(?@) or return nil)
176
- result << (eat_next_if(?@)or'')
245
+ result << (eat_next_if(?@) or '')
177
246
  if t=identifier_as_string(?@)
178
- result<<t
247
+ result << t
179
248
  else error= "missing @id name"
180
249
  end
181
- return lexerror(VarNameToken.new(result),error)
250
+ result=VarNameToken.new(result)
251
+ result.in_def=true if inside_method_def?
252
+ return lexerror(result,error)
182
253
  end
183
254
 
184
255
  private
185
256
  #-----------------------------------
186
- def here_spread_over_ruby_code(rl,tok)
257
+ def inside_method_def?
258
+ @parsestack.reverse_each{|ctx|
259
+ ctx.starter=='def' and ctx.state!=:saw_def and return true
260
+ }
261
+ return false
262
+ end
263
+
264
+ #-----------------------------------
265
+ def here_spread_over_ruby_code(rl,tok) #not used anymore
187
266
  assert(!rl.incomplete_here_tokens.empty?)
188
267
  @incomplete_here_tokens += rl.incomplete_here_tokens
189
268
  end
@@ -207,10 +286,10 @@ private
207
286
  end
208
287
 
209
288
  #-----------------------------------
210
- WSCHARSET=/[#\\\n\s\t\v\r\f]/
289
+ WSCHARSET=/[#\\\n\s\t\v\r\f\x00\x04\x1a]/
211
290
  def ignored_tokens(allow_eof=false,allow_eol=true)
212
291
  result=[]
213
- result<<@moretokens.shift while StillIgnoreToken===@moretokens.first
292
+ result << @moretokens.shift while StillIgnoreToken===@moretokens.first
214
293
  @moretokens.empty? or return result
215
294
  loop do
216
295
  unless @moretokens.empty?
@@ -273,8 +352,8 @@ private
273
352
  result = ((
274
353
  #order matters here, but it shouldn't
275
354
  #(but til_charset must be last)
276
- eat_next_if(/[!@&+`'=~\/\\,.;<>*"$?:]/) or
277
- (eat_next_if('-') and ("-"+getchar)) or
355
+ eat_if(/-[a-z0-9_]/i,2) or
356
+ eat_next_if(/[!@&+`'=~\-\/\\,.;<>*"$?:]/) or
278
357
  (?0..?9)===nextchar ? til_charset(/[^\d]/) : nil
279
358
  ))
280
359
  end
@@ -289,23 +368,26 @@ private
289
368
  #just asserts because those contexts are never encountered.
290
369
  #control goes through symbol(<...>,nil)
291
370
  assert( /^[a-z_]$/i===context)
292
- assert !(@last_operative_token===/^(\.|::|(un)?def|alias)$/)
371
+ assert MethNameToken===@last_operative_token || !(@last_operative_token===/^(\.|::|(un)?def|alias)$/)
293
372
 
294
- @moretokens.unshift(*parse_keywords(str,oldpos) do
373
+ @moretokens.unshift(*parse_keywords(str,oldpos) do |tok|
295
374
  #if not a keyword,
296
375
  case str
297
376
  when FUNCLIKE_KEYWORDS; #do nothing
298
377
  when VARLIKE_KEYWORDS,RUBYKEYWORDS; raise "shouldnt see keywords here, now"
299
378
  end
300
- safe_recurse { |a| var_or_meth_name(str,@last_operative_token,oldpos) }
379
+ was_last=@last_operative_token
380
+ @last_operative_token=tok if tok
381
+ safe_recurse { |a| var_or_meth_name(str,was_last,oldpos,after_nonid_op?{true}) }
301
382
  end)
302
383
  return @moretokens.shift
303
384
  end
304
385
 
305
386
  #-----------------------------------
387
+ IDENTREX={}
306
388
  def identifier_as_string(context)
307
389
  #must begin w/ letter or underscore
308
- str=eat_next_if(/[_a-z]/i) or return nil
390
+ /[_a-z]/i===nextchar.chr or return
309
391
 
310
392
  #equals, question mark, and exclamation mark
311
393
  #might be allowed at the end in some contexts.
@@ -315,45 +397,16 @@ private
315
397
  #i hope i've captured all right conditions....
316
398
  #context should always be ?: right after def, ., and :: now
317
399
 
318
- maybe_eq,maybe_qm,maybe_ex = case context
319
- when ?@,?$ then [nil,nil,nil]
320
- when ?: then [?=, ??, ?!]
321
- else [nil,??, ?!]
322
- end
323
-
324
- @in_def_name and maybe_eq= ?=
325
-
326
- str<<til_charset(/[^a-z0-9_]/i)
327
-
328
- #look for ?, !, or =, if allowed
329
- case b=getc
330
- when nil #means we're at eof
331
- #handling nil here prevents b from ever matching
332
- #a nil value of maybe_qm, maybe_ex or maybe_eq
333
- when maybe_qm
334
- str << b
335
- when maybe_ex
336
- nc=(nextchar unless eof?)
337
- #does ex appear to be part of a larger operator?
338
- if nc==?= #or nc==?~
339
- back1char
340
- else
341
- str << b
342
- end
343
- when maybe_eq
344
- nc=(nextchar unless eof?)
345
- #does eq appear to be part of a larger operator?
346
- if nc==?= or nc==?~ or nc==?>
347
- back1char
348
- else
349
- str << b
350
- end
351
- else
352
- back1char
353
- end
400
+ #= and ! only match if not part of a larger operator
401
+ trailers =
402
+ case context
403
+ when ?@,?$ then ""
404
+ # when ?: then "!(?![=])|\\?|=(?![=~>])"
405
+ else "!(?![=])|\\?"
406
+ end
407
+ @in_def_name||context==?: and trailers<<"|=(?![=~>])"
354
408
 
355
-
356
- return str
409
+ @file.scan(IDENTREX[trailers]||=/^[_a-z][a-z0-9_]*(?:#{trailers})?/i)
357
410
  end
358
411
 
359
412
  #-----------------------------------
@@ -380,18 +433,26 @@ private
380
433
  #a comma has been seen. are we in an
381
434
  #lvalue list or some other construct that uses commas?
382
435
  def comma_in_lvalue_list?
383
- @parsestack.last.lhs= (not ListContext===@parsestack.last)
436
+ @parsestack.last.lhs=
437
+ case l=@parsestack.last
438
+ when ListContext:
439
+ when DefContext: l.in_body
440
+ else true
441
+ end
384
442
  end
385
443
 
386
444
  #-----------------------------------
387
445
  def in_lvar_define_state
388
446
  #@defining_lvar is a hack
389
447
  @defining_lvar or case ctx=@parsestack.last
390
- when ForSMContext; ctx.state==:for
391
- when RescueSMContext; ctx.state==:arrow
448
+ #when ForSMContext; ctx.state==:for
449
+ when RescueSMContext
450
+ @last_operative_token.ident=="=>" and @file.match? /\A[\s\v]*([:;#\n]|then[^a-zA-Z0-9_])/m
392
451
  #when BlockParamListLhsContext; true
393
452
  end
394
453
  end
454
+
455
+ IMPLICIT_PARENS_BEFORE_ACCESSOR_ASSIGNMENT=2
395
456
 
396
457
  #-----------------------------------
397
458
  #determine if an alphabetic identifier refers to a variable
@@ -400,45 +461,50 @@ private
400
461
  #if appropriate. adds tok to the
401
462
  #local var table if its a local var being defined for the first time.
402
463
 
403
- #note: what we here call variables (rather, constants) following ::
404
- #might actually be methods at runtime, but that's immaterial to tokenization.
405
-
406
- #note: this routine should determine the correct token type for name and
407
- #create the appropriate token. currently this is not done because callers
408
- #typically have done it (perhaps incorrectly) already.
409
- def var_or_meth_name(name,lasttok,pos)
464
+ #in general, operators in ruby are disambuated by the before-but-not-after rule.
465
+ #an otherwise ambiguous operator is disambiguated by the surrounding whitespace:
466
+ #whitespace before but not after the 'operator' indicates it is to be considered a
467
+ #value token instead. otherwise it is a binary operator. (unary (prefix) ops count
468
+ #as 'values' here.)
469
+ def var_or_meth_name(name,lasttok,pos,was_after_nonid_op)
410
470
  #look for call site if not a keyword or keyword is function-like
411
471
  #look for and ignore local variable names
412
472
 
413
473
  assert String===name
414
474
 
475
+ was_in_lvar_define_state=in_lvar_define_state
415
476
  #maybe_local really means 'maybe local or constant'
416
477
  maybe_local=case name
417
- when /[^a-z_0-9]$/i; #do nothing
418
- when /^[a-z_]/; (localvars===name or VARLIKE_KEYWORDS===name or in_lvar_define_state) and not lasttok===/^(\.|::)$/
419
- when /^[A-Z]/; is_const=true;not lasttok==='.' #this is the right algorithm for constants...
478
+ when /[^a-z_0-9]$/i #do nothing
479
+ when /^[a-z_]/
480
+ (localvars===name or
481
+ VARLIKE_KEYWORDS===name or
482
+ was_in_lvar_define_state
483
+ ) and not lasttok===/^(\.|::)$/
484
+ when /^[A-Z]/
485
+ is_const=true
486
+ not lasttok==='.' #this is the right algorithm for constants...
420
487
  end
421
488
 
422
489
  assert(@moretokens.empty?)
423
490
 
424
491
  oldlast=@last_operative_token
425
492
 
426
- tok=@last_operative_token=VarNameToken.new(name,pos)
493
+ tok=set_last_token assign_lvar_type!(VarNameToken.new(name,pos))
427
494
 
428
495
  oldpos= input_position
429
496
  sawnl=false
430
497
  result=ws_toks=ignored_tokens(true) {|nl| sawnl=true }
431
498
  if sawnl || eof?
432
- if maybe_local then
433
- if in_lvar_define_state
434
- if /^[a-z_][a-zA-Z_0-9]*$/===name
435
- assert !(lasttok===/^(\.|::)$/)
436
- localvars[name]=true
437
- else
438
- lexerror tok,"not a valid variable name: #{name}"
439
- end
440
- return result.unshift(tok)
499
+ if was_in_lvar_define_state
500
+ if /^[a-z_][a-zA-Z_0-9]*$/===name
501
+ assert !(lasttok===/^(\.|::)$/)
502
+ localvars[name]=true
503
+ else
504
+ lexerror tok,"not a valid variable name: #{name}"
441
505
  end
506
+ return result.unshift(tok)
507
+ elsif maybe_local
442
508
  return result.unshift(tok) #if is_const
443
509
  else
444
510
  return result.unshift(
@@ -455,6 +521,8 @@ private
455
521
  when ?=; not /^=[>=~]$/===readahead(2)
456
522
  when ?,; comma_in_lvalue_list?
457
523
  when ?); last_context_not_implicit.lhs
524
+ when ?i; /^in[^a-zA-Z_0-9]/===readahead(3) and
525
+ ForSMContext===last_context_not_implicit
458
526
  when ?>,?<; /^(.)\1=$/===readahead(3)
459
527
  when ?*,?&; /^(.)\1?=/===readahead(3)
460
528
  when ?|; /^\|\|?=/===readahead(3) or
@@ -463,8 +531,8 @@ private
463
531
  readahead(2)[1] != ?|
464
532
  when ?%,?/,?-,?+,?^; readahead(2)[1]== ?=
465
533
  end
466
- if (assignment_coming && !(lasttok===/^(\.|::)$/) or in_lvar_define_state)
467
- tok=VarNameToken.new(name,pos)
534
+ if (assignment_coming && !(lasttok===/^(\.|::)$/) or was_in_lvar_define_state)
535
+ tok=assign_lvar_type! VarNameToken.new(name,pos)
468
536
  if /[^a-z_0-9]$/i===name
469
537
  lexerror tok,"not a valid variable name: #{name}"
470
538
  elsif /^[a-z_]/===name and !(lasttok===/^(\.|::)$/)
@@ -476,44 +544,106 @@ private
476
544
  implicit_parens_to_emit=
477
545
  if assignment_coming
478
546
  @parsestack.push AssignmentContext.new(nil) if nc==?% or nc==?/
479
- 0
547
+ IMPLICIT_PARENS_BEFORE_ACCESSOR_ASSIGNMENT
480
548
  else
481
549
  case nc
482
550
  when nil: 2
483
- when ?!; readahead(2)=='!=' ? 2 : 1
551
+ when ?!; /^![=~]$/===readahead(2) ? 2 : 1
552
+ when ?d;
553
+ if /^do([^a-zA-Z0-9_]|$)/===readahead(3)
554
+ if maybe_local and expecting_do?
555
+ ty=VarNameToken
556
+ 0
557
+ else
558
+ maybe_local=false
559
+ 2
560
+ end
561
+ else
562
+ 1
563
+ end
484
564
  when NEVERSTARTPARAMLISTFIRST
485
565
  (NEVERSTARTPARAMLISTWORDS===readahead(NEVERSTARTPARAMLISTMAXLEN)) ? 2 : 1
486
- when ?",?',?`,?a..?z,?A..?Z,?0..?9,?_,?@,?$,?~; 1
566
+ when ?",?',?`,?a..?z,?A..?Z,?0..?9,?_,?@,?$,?~; 1 #"
487
567
  when ?{
488
568
  maybe_local=false
569
+ 1
570
+ =begin
489
571
  x=2
490
572
  x-=1 if /\A(return|break|next)\Z/===name and
491
573
  !(KeywordToken===oldlast and oldlast===/\A(\.|::)\Z/)
492
574
  x
575
+ =end
493
576
  when ?(;
494
- maybe_local=false; !(ws_toks.empty? or lasttok===/^(\.|::)$/)? 1 : 0
577
+ maybe_local=false
578
+ lastid=lasttok&&lasttok.ident
579
+ case lastid
580
+ when /\A[;(]|do\Z/: was_after_nonid_op=false
581
+ when '|': was_after_nonid_op=false unless BlockParamListLhsContext===@parsestack.last
582
+ when '{': was_after_nonid_op=false if BlockContext===@parsestack.last or BeginEndContext===@parsestack.last
583
+ end if KeywordToken===lasttok
584
+ was_after_nonid_op=false if NewlineToken===lasttok or lasttok.nil?
585
+ want_parens=!(ws_toks.empty? or was_after_nonid_op) #or
586
+ # /^(::|rescue|yield|else|case|when|if|unless|until|while|and|or|&&|\|\||[?:]|\.\.?\.?|=>)$/===lastid or
587
+ # MethNameToken===lasttok or
588
+ # RUBYNONSYMOPERATORREX===lastid && /=$/===lastid && '!='!=lastid
589
+ # )
590
+
591
+ #look ahead for closing paren (after some whitespace...)
592
+ want_parens=false if @file.match? /\A.(?:\s|\v|\#.*\n)*\)/
593
+ # afterparen=@file.pos
594
+ # getchar
595
+ # ignored_tokens(true)
596
+ # want_parens=false if nextchar==?)
597
+ # @file.pos=afterparen
598
+
599
+ want_parens ? 1 : 0
495
600
  when ?},?],?),?;,?^, ?|, ?>, ?,, ?., ?=; 2
496
- when ?+, ?-, ?*, ?&, ?%, ?/; (ws_toks.empty? || readahead(2)[/^.[#{WHSPLF}]/]) ? 2 : 3
497
- when ?:,??; next2=readahead(2);
498
- WHSPLF[next2[1].chr] || next2=='::' ? 2 : 3
499
- # when ?:,??; (readahead(2)[/^.[#{WHSPLF}]/]) ? 2 : 3
500
- when ?<; (ws_toks.empty? || readahead(3)[/^<<["'`a-zA-Z_0-9-]/]) ? 3 : 2
501
- when ?[; ws_toks.empty? ? 2 : 3
601
+ when ?+, ?-, ?%, ?/
602
+ if /^(return|break|next)$/===@last_operative_token.ident and not(
603
+ KeywordToken===lasttok and /^(.|::)$/===lasttok.ident
604
+ )
605
+ 1
606
+ else
607
+ (ws_toks.empty? || readahead(2)[/^.[#{WHSPLF}]/o]) ? 2 : 3
608
+ end
609
+ when ?*, ?&
610
+ lasttok=@last_operative_token
611
+ if /^(return|break|next)$/===@last_operative_token.ident and not(
612
+ KeywordToken===lasttok and /^(.|::)$/===lasttok.ident
613
+ )
614
+ 1
615
+ else
616
+ (ws_toks.empty? || readahead(2)[/^.[#{WHSPLF}*&]/o]) ? 2 : 3
617
+ end
618
+ when ?:
619
+ next2=readahead(2)
620
+ if /^:(?:[#{WHSPLF}]|(:))$/o===next2 then
621
+ $1 && !ws_toks.empty? ? 3 : 2
622
+ else
623
+ 3
624
+ end
625
+ when ??; next3=readahead(3);
626
+ /^\?([#{WHSPLF}]|[a-z_][a-z_0-9])/io===next3 ? 2 : 3
627
+ # when ?:,??; (readahead(2)[/^.[#{WHSPLF}]/o]) ? 2 : 3
628
+ when ?<; (!ws_toks.empty? && readahead(4)[/^<<-?["'`a-zA-Z_0-9]/]) ? 3 : 2
629
+ when ?[; ws_toks.empty?&&!(KeywordToken===oldlast and /^(return|break|next)$/===oldlast.ident) ? 2 : 3
502
630
  when ?\\, ?\s, ?\t, ?\n, ?\r, ?\v, ?#; raise 'failure'
503
631
  else raise "unknown char after ident: #{nc=nextchar ? nc.chr : "<<EOF>>"}"
504
632
  end
505
633
  end
506
634
 
507
- if is_const and implicit_parens_to_emit==3 then
635
+ if is_const and implicit_parens_to_emit==3 then #needed?
508
636
  implicit_parens_to_emit=1
509
637
  end
510
638
 
511
- tok=if maybe_local and implicit_parens_to_emit>=2
639
+ if maybe_local and implicit_parens_to_emit>=2
512
640
  implicit_parens_to_emit=0
513
- VarNameToken
641
+ ty=VarNameToken
514
642
  else
515
- MethNameToken
516
- end.new(name,pos)
643
+ ty||=MethNameToken
644
+ end
645
+ tok=assign_lvar_type!(ty.new(name,pos))
646
+
517
647
 
518
648
  case implicit_parens_to_emit
519
649
  when 2;
@@ -523,8 +653,17 @@ private
523
653
  arr,pass=*param_list_coming_with_2_or_more_params?
524
654
  result.push( *arr )
525
655
  unless pass
656
+ #only 1 param in list
526
657
  result.unshift ImplicitParamListStartToken.new(oldpos)
527
- @parsestack.push ParamListContextNoParen.new(@linenum)
658
+ last=result.last
659
+ last.set_callsite! false if last.respond_to? :callsite? and last.callsite? #KeywordToken===last and last.ident==')'
660
+ if /^(break|next|return)$/===name and
661
+ !(KeywordToken===lasttok and /^(.|::)$/===lasttok.ident)
662
+ ty=KWParamListContextNoParen
663
+ else
664
+ ty=ParamListContextNoParen
665
+ end
666
+ @parsestack.push ty.new(@linenum)
528
667
  end
529
668
  when 0; #do nothing
530
669
  else raise 'invalid value of implicit_parens_to_emit'
@@ -547,11 +686,13 @@ private
547
686
  result=[get1token]
548
687
  pass=loop{
549
688
  tok=get1token
550
- result<<tok
689
+ result << tok
551
690
  if @parsestack.size==basesize
552
691
  break false
553
692
  elsif ','==tok.to_s and @parsestack.size==basesize+1
554
693
  break true
694
+ elsif OperatorToken===tok and /^[&*]$/===tok.ident and tok.unary and @parsestack.size==basesize+1
695
+ break true
555
696
  elsif EoiToken===tok
556
697
  lexerror tok, "unexpected eof in parameter list"
557
698
  end
@@ -560,11 +701,13 @@ private
560
701
  end
561
702
 
562
703
  #-----------------------------------
563
- CONTEXT2ENDTOK={AssignmentRhsContext=>AssignmentRhsListEndToken,
564
- ParamListContextNoParen=>ImplicitParamListEndToken,
565
- WhenParamListContext=>KwParamListEndToken,
566
- RescueSMContext=>KwParamListEndToken
567
- }
704
+ CONTEXT2ENDTOK={
705
+ AssignmentRhsContext=>AssignmentRhsListEndToken,
706
+ ParamListContextNoParen=>ImplicitParamListEndToken,
707
+ KWParamListContextNoParen=>ImplicitParamListEndToken,
708
+ WhenParamListContext=>KwParamListEndToken,
709
+ RescueSMContext=>KwParamListEndToken
710
+ }
568
711
  def abort_noparens!(str='')
569
712
  #assert @moretokens.empty?
570
713
  result=[]
@@ -576,7 +719,63 @@ private
576
719
  return result
577
720
  end
578
721
 
579
- if false #no longer used
722
+ #-----------------------------------
723
+ CONTEXT2ENDTOK_FOR_RESCUE={
724
+ AssignmentRhsContext=>AssignmentRhsListEndToken,
725
+ ParamListContextNoParen=>ImplicitParamListEndToken,
726
+ KWParamListContextNoParen=>ImplicitParamListEndToken,
727
+ WhenParamListContext=>KwParamListEndToken,
728
+ RescueSMContext=>KwParamListEndToken
729
+ }
730
+ def abort_noparens_for_rescue!(str='')
731
+ #assert @moretokens.empty?
732
+ result=[]
733
+ ctx=@parsestack.last
734
+ while klass=CONTEXT2ENDTOK_FOR_RESCUE[ctx.class]
735
+ break if AssignmentRhsContext===ctx && !ctx.multi_assign?
736
+ if ParamListContextNoParen===ctx && AssignmentRhsContext===@parsestack[-2]
737
+ result.push ImplicitParamListEndToken.new(input_position-str.length),
738
+ AssignmentRhsListEndToken.new(input_position-str.length)
739
+ @parsestack.pop
740
+ @parsestack.pop
741
+ break
742
+ end
743
+ result << klass.new(input_position-str.length) #unless AssignmentRhsContext===ctx and !ctx.multi_assign?
744
+ break if RescueSMContext===ctx #why is this here?
745
+ @parsestack.pop
746
+ ctx=@parsestack.last
747
+ end
748
+ return result
749
+ end
750
+
751
+ #-----------------------------------
752
+ CONTEXT2ENDTOK_FOR_DO={
753
+ AssignmentRhsContext=>AssignmentRhsListEndToken,
754
+ ParamListContextNoParen=>ImplicitParamListEndToken,
755
+ ExpectDoOrNlContext=>1,
756
+ #WhenParamListContext=>KwParamListEndToken,
757
+ #RescueSMContext=>KwParamListEndToken
758
+ }
759
+ def abort_noparens_for_do!(str='')
760
+ #assert @moretokens.empty?
761
+ result=[]
762
+ while klass=CONTEXT2ENDTOK_FOR_DO[@parsestack.last.class]
763
+ break if klass==1
764
+ result << klass.new(input_position-str.length)
765
+ @parsestack.pop
766
+ end
767
+ return result
768
+ end
769
+
770
+ #-----------------------------------
771
+ def expecting_do?
772
+ @parsestack.reverse_each{|ctx|
773
+ next if AssignmentRhsContext===ctx
774
+ return !!CONTEXT2ENDTOK_FOR_DO[ctx.class]
775
+ }
776
+ return false
777
+ end
778
+
580
779
  #-----------------------------------
581
780
  def abort_1_noparen!(offs=0)
582
781
  assert @moretokens.empty?
@@ -585,12 +784,12 @@ if false #no longer used
585
784
  @parsestack.pop
586
785
  result << AssignmentRhsListEndToken.new(input_position-offs)
587
786
  end
588
- ParamListContextNoParen===@parsestack.last or lexerror huh,'{} with no matching callsite'
787
+ if ParamListContextNoParen===@parsestack.last #or lexerror huh,'{} with no matching callsite'
589
788
  @parsestack.pop
590
789
  result << ImplicitParamListEndToken.new(input_position-offs)
790
+ end
591
791
  return result
592
792
  end
593
- end
594
793
 
595
794
  #-----------------------------------
596
795
  #parse keywords now, to prevent confusion over bare symbols
@@ -598,6 +797,7 @@ end
598
797
  #if arg is not a keyword, the block is called
599
798
  def parse_keywords(str,offset)
600
799
  assert @moretokens.empty?
800
+ assert !(KeywordToken===@last_operative_token and /A(.|::|def)\Z/===@last_operative_token.ident)
601
801
  result=[KeywordToken.new(str,offset)]
602
802
 
603
803
  case str
@@ -619,11 +819,15 @@ end
619
819
  /^(do)$/===start and localvars.end_block
620
820
  /^(class|module|def)$/===start and @localvars_stack.pop
621
821
 
622
- when "class","module"
822
+ when "module"
623
823
  result.first.has_end!
624
824
  @parsestack.push WantsEndContext.new(str,@linenum)
625
825
  @localvars_stack.push SymbolTable.new
626
826
 
827
+ when "class"
828
+ result.first.has_end!
829
+ @parsestack.push ClassContext.new(str,@linenum)
830
+
627
831
  when "if","unless" #could be infix form without end
628
832
  if after_nonid_op?{false} #prefix form
629
833
  result.first.has_end!
@@ -653,10 +857,11 @@ end
653
857
  #expect_do_or_end_or_nl! str #handled by ForSMContext now
654
858
  @parsestack.push ForSMContext.new(@linenum)
655
859
  when "do"
656
- result.unshift(*abort_noparens!(str))
860
+ result.unshift(*abort_noparens_for_do!(str))
657
861
  if ExpectDoOrNlContext===@parsestack.last
658
862
  @parsestack.pop
659
863
  assert WantsEndContext===@parsestack.last
864
+ result.last.as=";"
660
865
  else
661
866
  result.last.has_end!
662
867
  @parsestack.push WantsEndContext.new(str,@linenum)
@@ -665,10 +870,10 @@ end
665
870
  end
666
871
  when "def"
667
872
  result.first.has_end!
668
- @parsestack.push WantsEndContext.new("def",@linenum)
669
- @localvars_stack.push SymbolTable.new
873
+ @parsestack.push ctx=DefContext.new(@linenum)
874
+ ctx.state=:saw_def
670
875
  safe_recurse { |aa|
671
- @last_operative_token=KeywordToken.new "def" #hack
876
+ set_last_token KeywordToken.new "def" #hack
672
877
  result.concat ignored_tokens
673
878
 
674
879
  #read an expr like a.b.c or a::b::c
@@ -683,10 +888,11 @@ end
683
888
  when/^\)$/.token_pat then parencount-=1
684
889
  end
685
890
  EoiToken===tok and lexerror tok, "eof in def header"
686
- result<<tok
891
+ result << tok
687
892
  end until parencount==0 #@parsestack.size==old_size
688
- else #no parentheses, all tail
689
- @last_operative_token=KeywordToken.new "." #hack hack
893
+ @localvars_stack.push SymbolTable.new
894
+ else #no parentheses, all tail
895
+ set_last_token KeywordToken.new "." #hack hack
690
896
  tokindex=result.size
691
897
  result << tok=symbol(false,false)
692
898
  name=tok.to_s
@@ -700,25 +906,30 @@ end
700
906
  when /^[a-z_]/; localvars===name
701
907
  when /^[A-Z]/; is_const=true #this is the right algorithm for constants...
702
908
  end
703
- if !ty and maybe_local
704
- result.push( *ignored_tokens(false,false) )
705
- nc=nextchar
909
+ result.push( *ignored_tokens(false,false) )
910
+ nc=nextchar
911
+ if !ty and maybe_local
706
912
  if nc==?: || nc==?.
707
913
  ty=VarNameToken
708
914
  end
709
915
  end
710
- unless ty
711
- ty=MethNameToken
712
- endofs=tok.offset+tok.to_s.length
713
- result[tokindex+1...tokindex+1]=
714
- [ImplicitParamListStartToken.new(endofs),ImplicitParamListEndToken.new(endofs)]
916
+ if ty.nil? or (ty==KeywordToken and nc!=?: and nc!=?.)
917
+ ty=MethNameToken
918
+ if nc != ?(
919
+ endofs=tok.offset+tok.to_s.length
920
+ newtok=ImplicitParamListStartToken.new(endofs)
921
+ result.insert tokindex+1, newtok
922
+ end
715
923
  end
716
924
 
717
925
  assert result[tokindex].equal?(tok)
718
- result[tokindex]=ty.new(tok.to_s,tok.offset)
926
+ var=assign_lvar_type! ty.new(tok.to_s,tok.offset)
927
+ @localvars_stack.push SymbolTable.new
928
+ var.in_def=true if inside_method_def? and var.respond_to? :in_def=
929
+ result[tokindex]=var
719
930
 
720
931
 
721
- #if a.b.c.d is seen, a, b, and c
932
+ #if a.b.c.d is seen, a, b and c
722
933
  #should be considered maybe varname instead of methnames.
723
934
  #the last (d in the example) is always considered a methname;
724
935
  #it's what's being defined.
@@ -727,8 +938,7 @@ end
727
938
  #a could even be a keyword (eg self or block_given?).
728
939
  end
729
940
  #read tail: .b.c.d etc
730
- result.reverse_each{|res| break @last_operative_token=res unless StillIgnoreToken===res}
731
- ###@last_operative_token=result.last #naive
941
+ result.reverse_each{|res| break set_last_token res unless StillIgnoreToken===res}
732
942
  assert !(IgnoreToken===@last_operative_token)
733
943
  state=:expect_op
734
944
  @in_def_name=true
@@ -737,12 +947,22 @@ end
737
947
  #look for start of parameter list
738
948
  nc=(@moretokens.empty? ? nextchar.chr : @moretokens.first.to_s[0,1])
739
949
  if state==:expect_op and /^[a-z_(&*]/i===nc
740
- result.concat def_param_list
950
+ ctx.state=:def_param_list
951
+ list,listend=def_param_list
952
+ result.concat list
953
+ end_index=result.index(listend)
954
+ ofs=listend.offset
955
+ if endofs
956
+ result.insert end_index,ImplicitParamListEndToken.new(ofs)
957
+ else
958
+ ofs+=listend.to_s.size
959
+ end
960
+ result.insert end_index+1,EndDefHeaderToken.new(ofs)
741
961
  break
742
962
  end
743
963
 
744
964
  tok=get1token
745
- result<<tok
965
+ result<< tok
746
966
  case tok
747
967
  when EoiToken
748
968
  lexerror tok,'unexpected eof in def header'
@@ -752,9 +972,18 @@ end
752
972
  state=:expect_op
753
973
  when /^(\.|::)$/.token_pat
754
974
  lexerror tok,'expected ident' unless state==:expect_op
975
+ if endofs
976
+ result.insert -2, ImplicitParamListEndToken.new(endofs)
977
+ endofs=nil
978
+ end
755
979
  state=:expect_name
756
980
  when /^(;|end)$/.token_pat, NewlineToken #are we done with def name?
981
+ ctx.state=:def_body
757
982
  state==:expect_op or lexerror tok,'expected identifier'
983
+ if endofs
984
+ result.insert -2,ImplicitParamListEndToken.new(tok.offset)
985
+ end
986
+ result.insert -2, EndDefHeaderToken.new(tok.offset)
758
987
  break
759
988
  else
760
989
  lexerror(tok, "bizarre token in def name: " +
@@ -765,24 +994,34 @@ end
765
994
  }
766
995
  when "alias"
767
996
  safe_recurse { |a|
768
- @last_operative_token=KeywordToken.new "alias" #hack
997
+ set_last_token KeywordToken.new "alias" #hack
769
998
  result.concat ignored_tokens
770
999
  res=symbol(eat_next_if(?:),false)
771
- res ? result<<res : lexerror(result.first,"bad symbol in alias")
772
- @last_operative_token=KeywordToken.new "alias" #hack
773
- result.concat ignored_tokens
774
- res=symbol(eat_next_if(?:),false)
775
- res ? result<<res : lexerror(result.first,"bad symbol in alias")
1000
+ unless res
1001
+ lexerror(result.first,"bad symbol in alias")
1002
+ else
1003
+ res.ident[0]==?$ and res=VarNameToken.new(res.ident,res.offset)
1004
+ result<< res
1005
+ set_last_token KeywordToken.new "alias" #hack
1006
+ result.concat ignored_tokens
1007
+ res=symbol(eat_next_if(?:),false)
1008
+ unless res
1009
+ lexerror(result.first,"bad symbol in alias")
1010
+ else
1011
+ res.ident[0]==?$ and res=VarNameToken.new(res.ident,res.offset)
1012
+ result<< res
1013
+ end
1014
+ end
776
1015
  }
777
1016
  when "undef"
778
1017
  safe_recurse { |a|
779
1018
  loop do
780
- @last_operative_token=KeywordToken.new "," #hack
1019
+ set_last_token KeywordToken.new "," #hack
781
1020
  result.concat ignored_tokens
782
1021
  tok=symbol(eat_next_if(?:),false)
783
1022
  tok or lexerror(result.first,"bad symbol in undef")
784
1023
  result<< tok
785
- @last_operative_token=tok
1024
+ set_last_token tok
786
1025
  assert !(IgnoreToken===@last_operative_token)
787
1026
 
788
1027
  sawnl=false
@@ -809,13 +1048,13 @@ end
809
1048
  unless after_nonid_op? {false}
810
1049
  #rescue needs to be treated differently when in operator context...
811
1050
  #i think no RescueSMContext should be pushed on the stack...
812
- #plus, the rescue token should be marked as infix
813
- result.first.set_infix!
1051
+ result.first.set_infix! #plus, the rescue token should be marked as infix
1052
+ result.unshift(*abort_noparens_for_rescue!(str))
814
1053
  else
815
1054
  result.push KwParamListStartToken.new(offset+str.length)
816
1055
  #corresponding EndToken emitted by abort_noparens! on leaving rescue context
817
- result.unshift(*abort_noparens!(str))
818
1056
  @parsestack.push RescueSMContext.new(@linenum)
1057
+ result.unshift(*abort_noparens!(str))
819
1058
  end
820
1059
 
821
1060
  when "then"
@@ -831,16 +1070,43 @@ end
831
1070
  result.unshift(*abort_noparens!(str))
832
1071
 
833
1072
  when /\A(return|break|next)\Z/
834
- result=yield
835
- result.first.has_no_block! unless KeywordToken===@last_operative_token and @last_operative_token===/\A(\.|::)\Z/
1073
+ fail if KeywordToken===@last_operative_token and @last_operative_token===/\A(\.|::)\Z/
1074
+ tok=KeywordToken.new(str,offset)
1075
+ result=yield tok
1076
+ result[0]=tok
1077
+ tok.has_no_block!
1078
+
1079
+
1080
+ when 'END'
1081
+ #END could be treated, lexically, just as if it is an
1082
+ #ordinary method, except that local vars created in
1083
+ #END blocks are visible to subsequent code. (Why??)
1084
+ #That difference forces a custom parsing.
1085
+ if @last_operative_token===/^(\.|::)$/
1086
+ result=yield nil #should pass a keyword token here
1087
+ else
1088
+ safe_recurse{
1089
+ old=result.first
1090
+ result=[
1091
+ MethNameToken.new(old.ident,old.offset),
1092
+ ImplicitParamListStartToken.new(input_position),
1093
+ ImplicitParamListEndToken.new(input_position),
1094
+ *ignored_tokens
1095
+ ]
1096
+ getchar=='{' or lexerror(result.first,"expected { after #{str}")
1097
+ result.push KeywordToken.new('{',input_position-1)
1098
+ result.last.set_infix!
1099
+ @parsestack.push BeginEndContext.new(str,offset)
1100
+ }
1101
+ end
836
1102
 
837
1103
  when FUNCLIKE_KEYWORDS
838
- result=yield
1104
+ result=yield nil #should be a keyword token
839
1105
 
840
1106
  when RUBYKEYWORDS
841
1107
  #do nothing
842
1108
 
843
- else result=yield
1109
+ else result=yield nil
844
1110
 
845
1111
  end
846
1112
 
@@ -881,11 +1147,11 @@ end
881
1147
  #-----------------------------------
882
1148
  def block_param_list_lookahead
883
1149
  safe_recurse{ |la|
884
- @last_operative_token=KeywordToken.new ';'
1150
+ set_last_token KeywordToken.new ';'
885
1151
  a=ignored_tokens
886
1152
 
887
1153
  if eat_next_if(?|)
888
- a<<KeywordToken.new("|", input_position-1)
1154
+ a<< KeywordToken.new("|", input_position-1)
889
1155
  if true
890
1156
  @parsestack.push mycontext=BlockParamListLhsContext.new(@linenum)
891
1157
  nextchar==?| and a.push NoWsToken.new(input_position)
@@ -909,7 +1175,7 @@ else
909
1175
  end
910
1176
 
911
1177
  tok==='|' and parsestack_lastnonassign_is?(mycontext) and break
912
- a<<tok
1178
+ a<< tok
913
1179
  end
914
1180
  assert@defining_lvar || AssignmentRhsContext===@parsestack.last
915
1181
  @defining_lvar=false
@@ -920,14 +1186,14 @@ else
920
1186
  @parsestack.last.object_id==mycontext.object_id or raise 'expected my BlockParamListLhsContext atop @parsestack'
921
1187
  @parsestack.pop
922
1188
 
923
- a<<KeywordToken.new('|',tok.offset)
1189
+ a<< KeywordToken.new('|',tok.offset)
924
1190
  @moretokens.empty? or
925
1191
  fixme %#moretokens might be set from get1token call above...might be bad#
926
1192
  end
927
1193
  end
928
1194
  end
929
1195
 
930
- @last_operative_token=KeywordToken.new ';'
1196
+ set_last_token KeywordToken.new ';'
931
1197
  #a.concat ignored_tokens
932
1198
 
933
1199
  #assert @last_operative_token===';'
@@ -948,6 +1214,7 @@ end
948
1214
  @in_def_name=false
949
1215
  result=[]
950
1216
  normal_comma_level=old_parsestack_size=@parsestack.size
1217
+ listend=nil
951
1218
  safe_recurse { |a|
952
1219
  assert(@moretokens.empty?)
953
1220
  assert((not IgnoreToken===@moretokens[0]))
@@ -972,18 +1239,22 @@ end
972
1239
  alias === call
973
1240
  end
974
1241
 
975
- @last_operative_token=KeywordToken.new ',' #hack
1242
+ set_last_token KeywordToken.new ',' #hack
976
1243
  #read local parameter names
1244
+ nextvar=nil
977
1245
  loop do
978
1246
  expect_name=(@last_operative_token===',' and
979
1247
  normal_comma_level==@parsestack.size)
980
1248
  expect_name and @defining_lvar||=true
981
1249
  result << tok=get1token
982
- lexerror tok, "unexpected eof in def header" if EoiToken===tok
1250
+ break lexerror(tok, "unexpected eof in def header") if EoiToken===tok
983
1251
 
984
1252
  #break if at end of param list
985
- endingblock===tok and
986
- old_parsestack_size>=@parsestack.size and break
1253
+ if endingblock===tok and old_parsestack_size>=@parsestack.size
1254
+ nextvar and localvars[nextvar]=true #add nextvar to local vars
1255
+ listend=tok
1256
+ break
1257
+ end
987
1258
 
988
1259
  #next token is a local var name
989
1260
  #(or the one after that if unary ops present)
@@ -992,33 +1263,40 @@ end
992
1263
  case tok
993
1264
  when IgnoreToken #, /^[A-Z]/ #do nothing
994
1265
  when /^,$/.token_pat #hack
995
-
996
-
1266
+
997
1267
  when VarNameToken
998
1268
  assert@defining_lvar
999
1269
  @defining_lvar=false
1000
1270
  assert((not @last_operative_token===','))
1271
+ # assert !nextvar
1272
+ nextvar=tok.ident
1273
+ localvars[nextvar]=false #remove nextvar from list of local vars for now
1001
1274
  when /^[&*]$/.token_pat #unary form...
1002
1275
  #a NoWsToken is also expected... read it now
1003
1276
  result.concat maybe_no_ws_token #not needed?
1004
- @last_operative_token=KeywordToken.new ','
1277
+ set_last_token KeywordToken.new ','
1005
1278
  else
1006
1279
  lexerror tok,"unfamiliar var name '#{tok}'"
1007
1280
  end
1008
- elsif /^,$/.token_pat===tok and
1009
- normal_comma_level+1==@parsestack.size and
1010
- AssignmentRhsContext===@parsestack.last
1011
- #seeing comma here should end implicit rhs started within the param list
1012
- result[-1,0]=AssignmentRhsListEndToken.new(tok.offset)
1013
- @parsestack.pop
1281
+ elsif /^,$/.token_pat===tok
1282
+ if normal_comma_level+1==@parsestack.size and
1283
+ AssignmentRhsContext===@parsestack.last
1284
+ #seeing comma here should end implicit rhs started within the param list
1285
+ result << AssignmentRhsListEndToken.new(tok.offset)
1286
+ @parsestack.pop
1287
+ end
1288
+ if nextvar and normal_comma_level==@parsestack.size
1289
+ localvars[nextvar]=true #now, finally add nextvar back to local vars
1290
+ nextvar
1291
+ end
1014
1292
  end
1015
1293
  end
1016
1294
 
1017
1295
  @defining_lvar=false
1018
-
1296
+ @parsestack.last.see self,:semi
1019
1297
 
1020
1298
  assert(@parsestack.size <= old_parsestack_size)
1021
- assert(endingblock[tok])
1299
+ assert(endingblock[tok] || ErrorToken===tok)
1022
1300
 
1023
1301
  #hack: force next token to look like start of a
1024
1302
  #new stmt, if the last ignored_tokens
@@ -1026,42 +1304,54 @@ end
1026
1304
  #(just in case the next token parsed
1027
1305
  #happens to call quote_expected? or after_nonid_op)
1028
1306
  result.concat ignored_tokens
1029
- if nextchar.chr[/[iuw\/<|>+\-*&%?:]/] and
1030
- !(NewlineToken===@last_operative_token) and
1031
- !(/^(end|;)$/===@last_operative_token)
1032
- @last_operative_token=KeywordToken.new ';'
1307
+ # if !eof? and nextchar.chr[/[iuw\/<|>+\-*&%?:({]/] and
1308
+ # !(NewlineToken===@last_operative_token) and
1309
+ # !(/^(end|;)$/===@last_operative_token)
1310
+ #result<<EndDefHeaderToken.new(result.last.offset+result.last.to_s.size)
1311
+ set_last_token KeywordToken.new ';'
1033
1312
  result<< get1token
1034
- end
1313
+ # end
1035
1314
  }
1036
1315
 
1037
- return result
1316
+ return result,listend
1038
1317
  end
1039
1318
 
1040
1319
 
1041
1320
  #-----------------------------------
1042
1321
  #handle % in ruby code. is it part of fancy quote or a modulo operator?
1043
1322
  def percent(ch)
1044
- if quote_expected? ch
1323
+ if AssignmentContext===@parsestack.last
1324
+ @parsestack.pop
1325
+ op=true
1326
+ end
1327
+
1328
+ if !op and quote_expected?(ch) ||
1329
+ (@last_operative_token===/^(return|next|break)$/ and KeywordToken===@last_operative_token)
1045
1330
  fancy_quote ch
1046
- else
1331
+ else
1047
1332
  biop ch
1048
- end
1333
+ end
1049
1334
  end
1050
1335
 
1051
1336
  #-----------------------------------
1052
1337
  #handle * & in ruby code. is unary or binary operator?
1053
1338
  def star_or_amp(ch)
1054
- assert('*&'[ch])
1055
- want_unary=unary_op_expected? ch
1056
- result=(quadriop ch)
1057
- if want_unary
1058
- #readahead(2)[1..1][/[\s\v#\\]/] or #not needed?
1059
- assert OperatorToken===result
1060
- result.unary=true #result should distinguish unary+binary *&
1061
- WHSPLF[nextchar.chr] or
1062
- @moretokens << NoWsToken.new(input_position)
1063
- end
1064
- result
1339
+ assert('*&'[ch])
1340
+ want_unary=unary_op_expected?(ch) ||
1341
+ (@last_operative_token===/^(return|next|break)$/ and KeywordToken===@last_operative_token)
1342
+ result=quadriop(ch)
1343
+ if want_unary
1344
+ #readahead(2)[1..1][/[\s\v#\\]/] or #not needed?
1345
+ assert OperatorToken===result
1346
+ result.unary=true #result should distinguish unary+binary *&
1347
+ WHSPLF[nextchar.chr] or
1348
+ @moretokens << NoWsToken.new(input_position)
1349
+ comma_in_lvalue_list?
1350
+ if ch=='*'
1351
+ @parsestack.last.see self, :splat
1352
+ end
1353
+ end
1354
+ result
1065
1355
  end
1066
1356
 
1067
1357
  #-----------------------------------
@@ -1079,15 +1369,23 @@ end
1079
1369
  #-----------------------------------
1080
1370
  def regex_or_div(ch)
1081
1371
  #space after slash always means / operator, rather than regex start
1082
- if after_nonid_op?{ !is_var_name? and WHSPLF[prevchar] and !readahead(2)[%r{^/\s}] }
1083
- return regex(ch)
1084
- else #/ is operator
1085
- result=getchar
1086
- if eat_next_if(?=)
1087
- result << '='
1088
- end
1089
- return(operator_or_methname_token result)
1090
- end
1372
+ #= after slash always means /= operator, rather than regex start
1373
+ if AssignmentContext===@parsestack.last
1374
+ @parsestack.pop
1375
+ op=true
1376
+ end
1377
+
1378
+ if !op and after_nonid_op?{
1379
+ !is_var_name? and WHSPLF[prevchar] and !readahead(2)[%r{^/[\s\v=]}]
1380
+ } || (KeywordToken===@last_token_maybe_implicit and @last_token_maybe_implicit.ident=="(")
1381
+ return regex(ch)
1382
+ else #/ is operator
1383
+ result=getchar
1384
+ if eat_next_if(?=)
1385
+ result << '='
1386
+ end
1387
+ return(operator_or_methname_token result)
1388
+ end
1091
1389
  end
1092
1390
 
1093
1391
  #-----------------------------------
@@ -1101,8 +1399,8 @@ end
1101
1399
  s=tok.to_s
1102
1400
  case s
1103
1401
  when /[^a-z_0-9]$/i; false
1104
- when /^[a-z_]/; localvars===s or VARLIKE_KEYWORDS===s
1105
- when /^[A-Z]/; VarNameToken===tok
1402
+ # when /^[a-z_]/; localvars===s or VARLIKE_KEYWORDS===s
1403
+ when /^[A-Z_]/i; VarNameToken===tok
1106
1404
  when /^[@$<]/; true
1107
1405
  else raise "not var or method name: #{s}"
1108
1406
  end
@@ -1139,18 +1437,22 @@ end
1139
1437
  unless eat_next_if(?:)
1140
1438
  #cancel implicit contexts...
1141
1439
  @moretokens.push(*abort_noparens!(':'))
1440
+ @moretokens.push KeywordToken.new(':',startpos)
1142
1441
 
1143
- #end ternary context, if any
1144
- @parsestack.last.see self,:colon
1145
-
1146
- TernaryContext===@parsestack.last and @parsestack.pop #should be in the context's see handler
1147
-
1148
- if ExpectDoOrNlContext===@parsestack.last #should be in the context's see handler
1442
+ case @parsestack.last
1443
+ when TernaryContext: @parsestack.pop #should be in the context's see handler
1444
+ when ExpectDoOrNlContext: #should be in the context's see handler
1149
1445
  @parsestack.pop
1150
1446
  assert @parsestack.last.starter[/^(while|until|for)$/]
1447
+ @moretokens.last.as=";"
1448
+ when RescueSMContext:
1449
+ @moretokens.last.as=";"
1450
+ else @moretokens.last.as="then"
1151
1451
  end
1152
1452
 
1153
- @moretokens.push KeywordToken.new(':',startpos)
1453
+ #end ternary context, if any
1454
+ @parsestack.last.see self,:colon
1455
+
1154
1456
  return @moretokens.shift
1155
1457
  end
1156
1458
 
@@ -1182,9 +1484,15 @@ end
1182
1484
  opmatches=readahead(3)[RUBYSYMOPERATORREX]
1183
1485
  result= opmatches ? read(opmatches.size) :
1184
1486
  case nc=nextchar
1185
- when ?" then assert notbare;double_quote('"')
1186
- when ?' then assert notbare;double_quote("'")
1187
- when ?` then read(1)
1487
+ when ?" #"
1488
+ assert notbare
1489
+ open=':"'; close='"'
1490
+ double_quote('"')
1491
+ when ?' #'
1492
+ assert notbare
1493
+ open=":'"; close="'"
1494
+ single_quote("'")
1495
+ when ?` then read(1) #`
1188
1496
  when ?@ then at_identifier.to_s
1189
1497
  when ?$ then dollar_identifier.to_s
1190
1498
  when ?_,?a..?z then identifier_as_string(?:)
@@ -1197,7 +1505,12 @@ end
1197
1505
  result
1198
1506
  else error= "unexpected char starting symbol: #{nc.chr}"
1199
1507
  end
1200
- return lexerror(klass.new(result,start),error)
1508
+ result= lexerror(klass.new(result,start,notbare ? ':' : ''),error)
1509
+ if open
1510
+ result.open=open
1511
+ result.close=close
1512
+ end
1513
+ return result
1201
1514
  end
1202
1515
 
1203
1516
  def merge_assignment_op_in_setter_callsites?
@@ -1211,12 +1524,12 @@ end
1211
1524
  opmatches=readahead(3)[RUBYSYMOPERATORREX]
1212
1525
  return [opmatches ? read(opmatches.size) :
1213
1526
  case nc=nextchar
1214
- when ?` then read(1)
1527
+ when ?` then read(1) #`
1215
1528
  when ?_,?a..?z,?A..?Z then
1216
1529
  context=merge_assignment_op_in_setter_callsites? ? ?: : nc
1217
1530
  identifier_as_string(context)
1218
1531
  else
1219
- @last_operative_token=KeywordToken.new(';')
1532
+ set_last_token KeywordToken.new(';')
1220
1533
  lexerror(tok_to_errify,"unexpected char starting callsite symbol: #{nc.chr}, tok=#{tok_to_errify.inspect}")
1221
1534
  nil
1222
1535
  end, start
@@ -1233,20 +1546,63 @@ end
1233
1546
  ender=til_charset(/[#{quote}]/)
1234
1547
  (quote==getchar) or
1235
1548
  return lexerror(HerePlaceholderToken.new( dash, quote, ender ), "mismatched quotes in here doc")
1549
+ quote_real=true
1236
1550
  else
1237
1551
  quote='"'
1238
1552
  ender=til_charset(/[^a-zA-Z0-9_]/)
1239
1553
  ender.length >= 1 or
1240
- return lexerror(HerePlaceholderToken.new( dash, quote, ender ), "invalid here header")
1554
+ return lexerror(HerePlaceholderToken.new( dash, quote, ender, nil ), "invalid here header")
1241
1555
  end
1242
1556
 
1243
- res= HerePlaceholderToken.new( dash, quote, ender )
1557
+ res= HerePlaceholderToken.new( dash, quote, ender, quote_real )
1558
+ if true
1559
+ res.open=["<<",dash,quote,ender,quote].to_s
1560
+ procrastinated=til_charset(/[\n]/)#+readnl
1561
+ unless @base_file
1562
+ @base_file=@file
1563
+ @file=Sequence::List.new([@file])
1564
+ @file.pos=@base_file.pos
1565
+ end
1566
+ #actually delete procrastinated from input
1567
+ @file.delete(input_position_raw-procrastinated.size...input_position_raw)
1568
+
1569
+ nl=readnl or return lexerror(res, "here header without body (at eof)")
1570
+
1571
+ @moretokens<< res
1572
+ bodystart=input_position
1573
+ @offset_adjust = @min_offset_adjust+procrastinated.size
1574
+ #was: @offset_adjust += procrastinated.size
1575
+ body=here_body(res)
1576
+ res.close=body.close
1577
+ @offset_adjust = @min_offset_adjust
1578
+ #was: @offset_adjust -= procrastinated.size
1579
+ bodysize=input_position-bodystart
1580
+
1581
+ #one or two already read characters are overwritten here,
1582
+ #in order to keep offsets correct in the long term
1583
+ #(at present, offsets and line numbers between
1584
+ #here header and its body will be wrong. but they should re-sync thereafter.)
1585
+ newpos=input_position_raw-nl.size
1586
+ #unless procrastinated.empty?
1587
+ @file.modify(newpos,nl.size,procrastinated+nl) #vomit procrastinated text back onto input
1588
+ #end
1589
+ input_position_set newpos
1590
+
1591
+ #line numbers would be wrong within the procrastinated section
1592
+ @linenum-=1
1593
+
1594
+ #be nice to get the here body token at the right place in input, too...
1595
+ @pending_here_bodies<< body
1596
+ @offset_adjust-=bodysize#+nl.size
1597
+
1598
+ return @moretokens.shift
1599
+ else
1244
1600
  @incomplete_here_tokens.push res
1245
1601
 
1246
1602
  #hack: normally this should just be in get1token
1247
1603
  #this fixup is necessary because the call the get1token below
1248
1604
  #makes a recursion.
1249
- @last_operative_token=res
1605
+ set_last_token res
1250
1606
 
1251
1607
  safe_recurse { |a|
1252
1608
  assert(a.object_id==@moretokens.object_id)
@@ -1269,7 +1625,7 @@ end
1269
1625
 
1270
1626
  tok=get1token
1271
1627
  assert(a.equal?( @moretokens))
1272
- toks<<tok
1628
+ toks<< tok
1273
1629
  EoiToken===tok and lexerror tok, "here body expected before eof"
1274
1630
  end while res.unsafe_to_use
1275
1631
  assert(a.equal?( @moretokens))
@@ -1281,13 +1637,14 @@ end
1281
1637
  #the action continues in newline, where
1282
1638
  #the rest of the here token is read after a
1283
1639
  #newline has been seen and res.affix is eventually called
1640
+ end
1284
1641
  end
1285
1642
 
1286
1643
  #-----------------------------------
1287
1644
  def lessthan(ch) #match quadriop('<') or here doc or spaceship op
1288
1645
  case readahead(3)
1289
- when /^<<['"`\-a-z0-9_]$/i
1290
- if quote_expected?(ch) #and @last_operative_token!='class' #not needed?
1646
+ when /^<<['"`\-a-z0-9_]$/i #'
1647
+ if quote_expected?(ch) and not @last_operative_token==='class'
1291
1648
  here_header
1292
1649
  else
1293
1650
  operator_or_methname_token read(2)
@@ -1309,101 +1666,231 @@ end
1309
1666
  error='illegal escape sequence'
1310
1667
  end
1311
1668
 
1312
- @moretokens.unshift FileAndLineToken.new(@filename,ln=@linenum,input_position)
1313
- optional_here_bodies
1669
+ #optimization: when thru with regurgitated text from a here document,
1670
+ #revert back to original unadorned Sequence instead of staying in the List.
1671
+ if @base_file and indices=@file.instance_eval{@start_pos} and
1672
+ (indices[-2]..indices[-1])===@file.pos
1673
+ @base_file.pos=@file.pos
1674
+ @file=@base_file
1675
+ @base_file=nil
1676
+ result="\n"
1677
+ end
1678
+
1679
+ @offset_adjust=@min_offset_adjust
1680
+ @moretokens.push *optional_here_bodies
1681
+ ln=@linenum
1682
+ @moretokens.push lexerror(EscNlToken.new(@filename,ln-1,result,input_position-result.size), error),
1683
+ FileAndLineToken.new(@filename,ln,input_position)
1684
+
1685
+ start_of_line_directives
1314
1686
 
1315
- lexerror EscNlToken.new(@filename,ln-1,result,pos), error
1687
+ return @moretokens.shift
1316
1688
  end
1317
1689
 
1318
1690
  #-----------------------------------
1319
1691
  def optional_here_bodies
1320
-
1692
+ result=[]
1693
+ if true
1321
1694
  #handle here bodies queued up by previous line
1322
- #(we should be more compatible with dos/mac style newlines...)
1695
+ pos=input_position
1696
+ while body=@pending_here_bodies.shift
1697
+ #body.offset=pos
1698
+ result.push EscNlToken.new(@filename,nil,"\n",body.offset-1)
1699
+ result.push FileAndLineToken.new(@filename,body.ident.line,body.offset)
1700
+ result.push body
1701
+ #result.push NoWsToken.new @pending_here_bodies.empty? ? input_position : @pending_here_bodies.first
1702
+ #result.push FileAndLineToken.new(@filename,@linenum,pos) #position and line num are off
1703
+ body.headtok.line=@linenum-1
1704
+ end
1705
+ else
1706
+ #...(we should be more compatible with dos/mac style newlines...)
1323
1707
  while tofill=@incomplete_here_tokens.shift
1708
+ result.push(
1709
+ here_body(tofill),
1710
+ FileAndLineToken.new(@filename,@linenum,input_position)
1711
+ )
1712
+ assert(eof? || "\r\n"[prevchar])
1713
+ tofill.line=@linenum-1
1714
+ end
1715
+ end
1716
+ return result
1717
+ end
1718
+
1719
+ #-----------------------------------
1720
+ def here_body(tofill)
1721
+ close="\n"
1324
1722
  tofill.string.offset= input_position
1723
+ linecount=1 #for terminator
1724
+ assert("\n"==prevchar)
1325
1725
  loop {
1326
- assert("\r\n"[prevchar])
1726
+ assert("\n"==prevchar)
1327
1727
 
1328
1728
  #here body terminator?
1329
- oldpos= input_position
1729
+ oldpos= input_position_raw
1330
1730
  if tofill.dash
1331
- til_charset(/[^#{WHSP}]/o)
1731
+ close+=til_charset(/[^#{WHSP}]/o)
1732
+ end
1733
+ break if eof? #this is an error, should be handled better
1734
+ if read(tofill.ender.size)==tofill.ender
1735
+ crs=til_charset(/[^\r]/)||''
1736
+ if nl=readnl
1737
+ close+=tofill.ender+crs+nl
1738
+ break
1739
+ end
1332
1740
  end
1333
- break if eof?
1334
- break if read(tofill.ender.size)==tofill.ender and readnl
1335
1741
  input_position_set oldpos
1336
1742
 
1743
+ assert("\n"==prevchar)
1744
+
1337
1745
  if tofill.quote=="'"
1338
- line=til_charset(/[\r\n]/)+readnl
1339
- line.gsub! "\\\\", "\\"
1746
+ line=til_charset(/[\n]/)
1747
+ unless nl=readnl
1748
+ assert eof?
1749
+ break #this is an error, should be handled better
1750
+ end
1751
+ line.chomp!("\r")
1752
+ line<< "\n"
1753
+ assert("\n"==prevchar)
1754
+ #line.gsub! "\\\\", "\\"
1340
1755
  tofill.append line
1341
- assert(line[-1..-1][/[\r\n]/])
1756
+ tofill.string.bs_handler=:squote_heredoc_esc_seq
1757
+ linecount+=1
1758
+ assert("\n"==line[-1,1])
1759
+ assert("\n"==prevchar)
1342
1760
  else
1343
1761
 
1762
+ assert("\n"==prevchar)
1763
+
1344
1764
  back1char #-1 to make newline char the next to read
1345
1765
  @linenum-=1
1346
1766
 
1767
+ assert /[\r\n]/===nextchar.chr
1768
+
1347
1769
  #retr evrything til next nl
1770
+ if FASTER_STRING_ESCAPES
1771
+ line=all_quote("\r\n", tofill.quote, "\r\n")
1772
+ else
1348
1773
  line=all_quote(INET_NL_REX, tofill.quote, INET_NL_REX)
1774
+ end
1775
+ linecount+=1
1349
1776
  #(you didn't know all_quote could take a regex, did you?)
1350
1777
 
1778
+ assert("\n"==prevchar)
1779
+
1351
1780
  #get rid of fals that otherwise appear to be in the middle of
1352
1781
  #a string (and are emitted out of order)
1353
1782
  fal=@moretokens.pop
1354
1783
  assert FileAndLineToken===fal || fal.nil?
1355
1784
 
1785
+ assert line.bs_handler
1786
+ tofill.string.bs_handler||=line.bs_handler
1787
+
1788
+ tofill.append_token line
1789
+ tofill.string.elems<<'' unless String===tofill.string.elems.last
1790
+
1791
+ assert("\n"==prevchar)
1792
+
1356
1793
  back1char
1357
1794
  @linenum-=1
1358
1795
  assert("\r\n"[nextchar.chr])
1359
- tofill.append_token line
1360
1796
  tofill.append readnl
1797
+
1798
+ assert("\n"==prevchar)
1361
1799
  end
1800
+
1801
+ assert("\n"==prevchar)
1362
1802
  }
1803
+
1363
1804
 
1364
- assert(eof? || "\r\n"[prevchar])
1805
+ str=tofill.string
1806
+ str.bs_handler||=:dquote_esc_seq if str.elems.size==1 and str.elems.first==''
1365
1807
  tofill.unsafe_to_use=false
1366
- tofill.line=@linenum-1
1367
-
1368
- @moretokens.push \
1369
- tofill.bodyclass.new(tofill),
1370
- FileAndLineToken.new(@filename,@linenum,input_position)
1371
- end
1372
-
1808
+ assert str.bs_handler
1809
+ #?? or tofill.string.elems==[]
1810
+
1811
+
1812
+ tofill.string.instance_eval{@char="`"} if tofill.quote=="`"
1813
+ #special cased, but I think that's all that's necessary...
1814
+
1815
+ result=tofill.bodyclass.new(tofill,linecount)
1816
+ result.open=str.open=""
1817
+ tofill.close=close
1818
+ result.close=str.close=close[1..-1]
1819
+ result.offset=str.offset
1820
+ assert str.open
1821
+ assert str.close
1822
+ return result
1373
1823
  end
1374
1824
 
1375
1825
  #-----------------------------------
1376
1826
  def newline(ch)
1377
1827
  assert("\r\n"[nextchar.chr])
1378
1828
 
1379
-
1380
-
1381
1829
  #ordinary newline handling (possibly implicitly escaped)
1382
1830
  assert("\r\n"[nextchar.chr])
1383
1831
  assert !@parsestack.empty?
1384
1832
  assert @moretokens.empty?
1385
- result=if NewlineToken===@last_operative_token or #hack
1386
- @last_operative_token===/^(;|begin|do|#{INNERBOUNDINGWORDS})$/ or #hack
1387
- !after_nonid_op?{false}
1388
- then #hack-o-rama: probly cases left out above
1389
- a= abort_noparens!
1390
- ExpectDoOrNlContext===@parsestack.last and @parsestack.pop
1391
- assert !@parsestack.empty?
1392
- @parsestack.last.see self,:semi
1393
-
1394
- a << super(ch)
1395
- @moretokens.replace a+@moretokens
1396
- @moretokens.shift
1397
- else
1398
- offset= input_position
1399
- nl=readnl
1400
- @moretokens << FileAndLineToken.new(@filename,@linenum,input_position)
1401
- EscNlToken.new(@filename,@linenum-1,nl,offset)
1402
- #WsToken.new ' ' #why? #should be "\\\n" ?
1403
- end
1404
1833
 
1405
- optional_here_bodies
1834
+ pre=FileAndLineToken.new(@filename,@linenum+1,input_position)
1835
+ pre.allow_ooo_offset=true
1836
+
1837
+ if NewlineToken===@last_operative_token or #hack
1838
+ (KeywordToken===@last_operative_token and
1839
+ @last_operative_token.ident=="rescue" and
1840
+ !@last_operative_token.infix?) or
1841
+ #/^(;|begin|do|#{INNERBOUNDINGWORDS})$/ or #hack
1842
+ !after_nonid_op?{false}
1843
+ then #hack-o-rama: probly cases left out above
1844
+ @offset_adjust=@min_offset_adjust
1845
+ a= abort_noparens!
1846
+ ExpectDoOrNlContext===@parsestack.last and @parsestack.pop
1847
+ assert !@parsestack.empty?
1848
+ @parsestack.last.see self,:semi
1849
+
1850
+ a << super(ch)
1851
+ @moretokens.replace a+@moretokens
1852
+ else
1853
+ @offset_adjust=@min_offset_adjust
1854
+ offset= input_position
1855
+ nl=readnl
1856
+ @moretokens.push EscNlToken.new(@filename,@linenum-1,nl,offset),
1857
+ FileAndLineToken.new(@filename,@linenum,input_position)
1858
+ end
1859
+
1860
+ #optimization: when thru with regurgitated text from a here document,
1861
+ #revert back to original unadorned Sequence instead of staying in the list.
1862
+ if @base_file and indices=@file.instance_eval{@start_pos} and
1863
+ (indices[-2]..indices[-1])===@file.pos and Sequence::SubSeq===@file.list.last
1864
+ @base_file.pos=@file.pos
1865
+ @file=@base_file
1866
+ @base_file=nil
1867
+ end
1868
+
1869
+ fal=@moretokens.last
1870
+ assert FileAndLineToken===fal
1871
+
1872
+ @offset_adjust=@min_offset_adjust
1873
+
1874
+ @moretokens.unshift(*optional_here_bodies)
1875
+ result=@moretokens.shift
1876
+
1877
+ #adjust line count in fal to account for newlines in here bodys
1878
+ i=@moretokens.size-1
1879
+ while(i>=0)
1880
+ #assert FileAndLineToken===@moretokens[i]
1881
+ i-=1 if FileAndLineToken===@moretokens[i]
1882
+ break unless HereBodyToken===@moretokens[i]
1883
+ pre_fal=true
1884
+ fal.line-=@moretokens[i].linecount
1406
1885
 
1886
+ i-=1
1887
+ end
1888
+
1889
+ if pre_fal
1890
+ @moretokens.unshift result
1891
+ pre.offset=result.offset
1892
+ result=pre
1893
+ end
1407
1894
  start_of_line_directives
1408
1895
 
1409
1896
  return result
@@ -1424,15 +1911,16 @@ end
1424
1911
 
1425
1912
  begin
1426
1913
  eof? and raise "eof before =end"
1427
- more<<til_charset(/[\r\n]/)
1428
- more<<readnl
1914
+ more<< til_charset(/[\r\n]/)
1915
+ eof? and raise "eof before =end"
1916
+ more<< readnl
1429
1917
  end until readahead(EQENDLENGTH)==EQEND
1430
1918
 
1431
1919
  #read rest of line after =end
1432
1920
  more << til_charset(/[\r\n]/)
1433
- assert((?\r===nextchar or ?\n===nextchar))
1921
+ assert((eof? or ?\r===nextchar or ?\n===nextchar))
1434
1922
  assert !(/[\r\n]/===more[-1,1])
1435
- more<< readnl
1923
+ more<< readnl unless eof?
1436
1924
 
1437
1925
  # newls= more.scan(/\r\n?|\n\r?/)
1438
1926
  # @linenum+= newls.size
@@ -1445,7 +1933,7 @@ end
1445
1933
  #handle __END__
1446
1934
  if ENDMARKER===readahead(ENDMARKERLENGTH)
1447
1935
  assert !(ImplicitContext===@parsestack.last)
1448
- @moretokens.unshift endoffile_detected(read(7))
1936
+ @moretokens.unshift endoffile_detected(read(ENDMARKERLENGTH))
1449
1937
  # input_position_set @file.size
1450
1938
  end
1451
1939
  end
@@ -1460,11 +1948,13 @@ end
1460
1948
  def unary_op_expected?(ch) #yukko hack
1461
1949
  '*&='[readahead(2)[1..1]] and return false
1462
1950
 
1951
+ return true if KeywordToken===@last_operative_token and @last_operative_token==='for'
1952
+
1463
1953
  after_nonid_op? {
1464
1954
  #possible func-call as operator
1465
1955
 
1466
1956
  not is_var_name? and
1467
- WHSPLF[prevchar]
1957
+ WHSPLF[prevchar] and !WHSPLF[readahead(2)[1..1]]
1468
1958
  }
1469
1959
  end
1470
1960
 
@@ -1473,11 +1963,6 @@ end
1473
1963
  # <<, %, ? in ruby
1474
1964
  #returns whether current token is to be the start of a literal
1475
1965
  def quote_expected?(ch) #yukko hack
1476
- if AssignmentContext===@parsestack.last
1477
- @parsestack.pop
1478
- return false
1479
- end
1480
-
1481
1966
  case ch[0]
1482
1967
  when ?? then readahead(2)[/^\?[#{WHSPLF}]$/o] #not needed?
1483
1968
  when ?% then readahead(3)[/^%([a-pt-vyzA-PR-VX-Z]|[QqrswWx][a-zA-Z0-9])/]
@@ -1500,17 +1985,23 @@ end
1500
1985
  #used to resolve the ambiguity of
1501
1986
  # <<, %, /, ?, :, and newline (among others) in ruby
1502
1987
  def after_nonid_op?
1988
+
1989
+ #this is how it should be, I think, and then no handlers for methnametoken and FUNCLIKE_KEYWORDS are needed
1990
+ # if ImplicitParamListStartToken===@last_token_including_implicit
1991
+ # huh return true
1992
+ # end
1503
1993
  case @last_operative_token
1504
- when MethNameToken, FUNCLIKE_KEYWORDS.token_pat ,VarNameToken
1994
+ when VarNameToken , MethNameToken, FUNCLIKE_KEYWORDS.token_pat
1505
1995
  #VarNameToken should really be left out of this case...
1506
1996
  #should be in next branch instread
1507
1997
  #callers all check for last token being not a variable if they pass anything
1508
- #but {false} in the block
1998
+ #but {false} in the block
1999
+ #(hmmm... some now have true or other non-varname checks in them... could these be bugs?)
1509
2000
  return yield
1510
2001
  when StringToken, SymbolToken, NumberToken, HerePlaceholderToken,
1511
2002
  %r{^(
1512
- class|module|end|self|true|false|nil|
1513
- __FILE__|__LINE__|[\})\]]|alias|(un)?def|for
2003
+ end|self|true|false|nil|
2004
+ __FILE__|__LINE__|[\})\]]
1514
2005
  )$}x.token_pat
1515
2006
  #dunno about def/undef
1516
2007
  #maybe class/module shouldn't he here either?
@@ -1522,17 +2013,16 @@ end
1522
2013
  #assert(@last_operative_token==$&) #disabled 'cause $& is now always nil :(
1523
2014
  return true
1524
2015
  when NewlineToken, nil, #nil means we're still at beginning of file
1525
- /^([({\[]|or|not|and|if|unless|then|elsif|else|
1526
- while|until|begin|for|in|case|when|ensure)$
2016
+ /^([({\[]|or|not|and|if|unless|then|elsif|else|class|module|def|
2017
+ while|until|begin|for|in|case|when|ensure|defined\?)$
1527
2018
  /x.token_pat
1528
2019
  return true
1529
- #when KeywordToken
1530
- # return true
2020
+ when KeywordToken
2021
+ return true if /^(alias|undef)$/===@last_operative_token.ident #is this ever actually true???
1531
2022
  when IgnoreToken
1532
2023
  raise "last_operative_token shouldn't be ignoreable"
1533
- else
1534
- raise "after_nonid_op? after #{@last_operative_token}:#{@last_operative_token.class} -- now what"
1535
2024
  end
2025
+ raise "after_nonid_op? after #{@last_operative_token}:#{@last_operative_token.class} -- now what"
1536
2026
  end
1537
2027
 
1538
2028
 
@@ -1577,10 +2067,10 @@ end
1577
2067
 
1578
2068
  #-----------------------------------
1579
2069
  def biop(ch) #match /%=?/ (% or %=)
1580
- assert(ch[/^[%^~]$/])
2070
+ assert(ch[/^[%^]$/])
1581
2071
  result=getchar
1582
2072
  if eat_next_if(?=)
1583
- result <<?=
2073
+ result << ?=
1584
2074
  end
1585
2075
  return operator_or_methname_token( result)
1586
2076
  end
@@ -1610,7 +2100,9 @@ end
1610
2100
  #fixme: handle +@ and -@ here as well... (currently, this is done in symbol()?)
1611
2101
  def plusminus(ch)
1612
2102
  assert(/^[+\-]$/===ch)
1613
- if unary_op_expected?(ch)
2103
+ if unary_op_expected?(ch) or
2104
+ KeywordToken===@last_operative_token &&
2105
+ /^(return|break|next)$/===@last_operative_token.ident
1614
2106
  if (?0..?9)===readahead(2)[1]
1615
2107
  return number(ch)
1616
2108
  else #unary operator
@@ -1619,7 +2111,6 @@ end
1619
2111
  @moretokens << NoWsToken.new(input_position)
1620
2112
  result=(operator_or_methname_token result)
1621
2113
  result.unary=true
1622
- #todo: result should distinguish unary+binary +-
1623
2114
  end
1624
2115
  else #binary operator
1625
2116
  assert(! want_op_name)
@@ -1628,9 +2119,8 @@ end
1628
2119
  result << ?=
1629
2120
  end
1630
2121
  result=(operator_or_methname_token result)
1631
- #todo: result should distinguish unary+binary +-
1632
2122
  end
1633
- result
2123
+ return result
1634
2124
  end
1635
2125
 
1636
2126
  #-----------------------------------
@@ -1642,19 +2132,31 @@ end
1642
2132
  str << c
1643
2133
  result= operator_or_methname_token( str,offset)
1644
2134
  case c
1645
- when '=': str<< (eat_next_if(?=)or'')
2135
+ when '=': #===,==
2136
+ str<< (eat_next_if(?=)or'')
1646
2137
 
1647
- when '>':
2138
+ when '>': #=>
1648
2139
  unless ParamListContextNoParen===@parsestack.last
1649
2140
  @moretokens.unshift result
1650
2141
  @moretokens.unshift( *abort_noparens!("=>"))
1651
2142
  result=@moretokens.shift
1652
2143
  end
1653
2144
  @parsestack.last.see self,:arrow
1654
- when '': #record local variable definitions
1655
-
2145
+ when '': #plain assignment: record local variable definitions
2146
+ last_context_not_implicit.lhs=false
2147
+ @moretokens.push *ignored_tokens(true).map{|x|
2148
+ NewlineToken===x ? EscNlToken.new(@filename,@linenum,x.ident,x.offset) : x
2149
+ }
1656
2150
  @parsestack.push AssignmentRhsContext.new(@linenum)
1657
- @moretokens.unshift AssignmentRhsListStartToken.new( offset+1)
2151
+ if eat_next_if ?*
2152
+ tok=OperatorToken.new('*', input_position-1)
2153
+ tok.unary=true
2154
+ @moretokens.push tok
2155
+ WHSPLF[nextchar.chr] or
2156
+ @moretokens << NoWsToken.new(input_position)
2157
+ comma_in_lvalue_list? #is this needed?
2158
+ end
2159
+ @moretokens.push AssignmentRhsListStartToken.new( input_position)
1658
2160
  end
1659
2161
  return result
1660
2162
  end
@@ -1666,6 +2168,7 @@ end
1666
2168
  k=eat_next_if(/[~=]/)
1667
2169
  if k
1668
2170
  result+=k
2171
+ elsif eof?: #do nothing
1669
2172
  else
1670
2173
  WHSPLF[nextchar.chr] or
1671
2174
  @moretokens << NoWsToken.new(input_position)
@@ -1693,10 +2196,11 @@ end
1693
2196
  #-----------------------------------
1694
2197
  def dot_rhs(prevtok)
1695
2198
  safe_recurse { |a|
1696
- @last_operative_token=prevtok
2199
+ set_last_token prevtok
1697
2200
  aa= ignored_tokens
2201
+ was=after_nonid_op?{true}
1698
2202
  tok,pos=callsite_symbol(prevtok)
1699
- tok and aa.push(*var_or_meth_name(tok,prevtok,pos))
2203
+ tok and aa.push(*var_or_meth_name(tok,prevtok,pos,was))
1700
2204
  a.unshift(*aa)
1701
2205
  }
1702
2206
  end
@@ -1705,7 +2209,7 @@ end
1705
2209
  def back_quote(ch=nil)
1706
2210
  if @last_operative_token===/^(def|::|\.)$/
1707
2211
  oldpos= input_position
1708
- MethNameToken.new(eat_next_if(?`), oldpos)
2212
+ MethNameToken.new(eat_next_if(?`), oldpos) #`
1709
2213
  else
1710
2214
  double_quote(ch)
1711
2215
  end
@@ -1716,7 +2220,7 @@ if false
1716
2220
  def comment(str)
1717
2221
  result=""
1718
2222
  #loop{
1719
- result<<super(nil).to_s
2223
+ result<< super(nil).to_s
1720
2224
 
1721
2225
  if /^\#.*\#$/===result #if comment was ended by a crunch
1722
2226
 
@@ -1762,7 +2266,7 @@ end
1762
2266
  tokch= NoWsToken.new(input_position-1)
1763
2267
  end
1764
2268
  when '('
1765
- lasttok=last_operative_token
2269
+ lasttok=last_token_maybe_implicit #last_operative_token
1766
2270
  #could be: lasttok===/^[a-z_]/i
1767
2271
  if (VarNameToken===lasttok or MethNameToken===lasttok or
1768
2272
  lasttok===FUNCLIKE_KEYWORDS)
@@ -1781,15 +2285,17 @@ end
1781
2285
  if after_nonid_op?{false} or @last_operative_token.has_no_block?
1782
2286
  @parsestack.push ListImmedContext.new(ch,@linenum) #that is, a hash
1783
2287
  else
2288
+ #abort_noparens!
1784
2289
  tokch.set_infix!
1785
- =begin not needed now, i think
2290
+ tokch.as="do"
2291
+ #=begin not needed now, i think
1786
2292
  # 'need to find matching callsite context and end it if implicit'
1787
2293
  lasttok=last_operative_token
1788
- unless lasttok===')' and lasttok.callsite?
2294
+ if !(lasttok===')' and lasttok.callsite?) #or ParamListContextNoParen===parsestack.last
1789
2295
  @moretokens.push *(abort_1_noparen!(1).push tokch)
1790
2296
  tokch=@moretokens.shift
1791
2297
  end
1792
- =end
2298
+ #=end
1793
2299
 
1794
2300
  localvars.start_block
1795
2301
  @parsestack.push BlockContext.new(@linenum)
@@ -1811,13 +2317,18 @@ end
1811
2317
  end
1812
2318
  ctx=@parsestack.pop
1813
2319
  origch,line=ctx.starter,ctx.linenum
1814
- ch==PAIRS[origch] or
2320
+ if ch!=PAIRS[origch]
2321
+ #kw.extend MismatchedBrace
1815
2322
  lexerror kw,"mismatched braces: #{origch}#{ch}\n" +
1816
2323
  "matching brace location", @filename, line
1817
- BlockContext===ctx and localvars.end_block
2324
+ end
2325
+ if BlockContext===ctx
2326
+ localvars.end_block
2327
+ @moretokens.last.as="end"
2328
+ end
1818
2329
  if ParamListContext==ctx.class
1819
2330
  assert ch==')'
1820
- #kw.set_callsite! #not needed?
2331
+ kw.set_callsite! #not needed?
1821
2332
  end
1822
2333
  return @moretokens.shift
1823
2334
  end
@@ -1826,19 +2337,24 @@ end
1826
2337
  def eof(ch=nil)
1827
2338
  #this must be the very last character...
1828
2339
  oldpos= input_position
1829
- assert(?\0==getc)
2340
+ assert(/\A[\x0\x4\x1a]\Z/===nextchar.chr)
1830
2341
 
1831
- result= "\0#{ignored_tokens(true).delete_if{|t|FileAndLineToken===t}}"
2342
+ result=@file.read!
2343
+ # result= "\0#{ignored_tokens(true).delete_if{|t|FileAndLineToken===t}}"
1832
2344
 
1833
- eof? or
1834
- lexerror result,'nul character is not at the end of file'
1835
- input_position_set @file.size
2345
+ # eof? or
2346
+ # lexerror result,'nul character is not at the end of file'
2347
+ # input_position_set @file.size
1836
2348
  return(endoffile_detected result)
1837
2349
  end
1838
2350
 
1839
2351
  #-----------------------------------
1840
2352
  def endoffile_detected(s='')
1841
2353
  @moretokens.push( *(abort_noparens!.push super(s)))
2354
+ if @progress_thread
2355
+ @progress_thread.kill
2356
+ @progress_thread=nil
2357
+ end
1842
2358
  result= @moretokens.shift
1843
2359
  balanced_braces? or (lexerror result,"unbalanced braces at eof. parsestack=#{@parsestack.inspect}")
1844
2360
  result
@@ -1851,7 +2367,26 @@ end
1851
2367
 
1852
2368
  #-----------------------------------
1853
2369
  def comma(ch)
1854
- single_char_token(ch)
2370
+ @moretokens.push token=single_char_token(ch)
2371
+ if AssignmentRhsContext===@parsestack[-1] and
2372
+ ParamListContext===@parsestack[-2] ||
2373
+ ParamListContextNoParen===@parsestack[-2] ||
2374
+ WhenParamListContext===@parsestack[-2] ||
2375
+ (RescueSMContext===@parsestack[-2] && @parsestack[-2].state==:rescue) ||
2376
+ (DefContext===@parsestack[-2] && !@parsestack[-2].in_body)
2377
+ @parsestack.pop
2378
+ @moretokens.unshift AssignmentRhsListEndToken.new(input_position)
2379
+ end
2380
+ token.comma_type=
2381
+ case @parsestack[-1]
2382
+ when AssignmentRhsContext: :rhs
2383
+ when ParamListContext,ParamListContextNoParen: :call
2384
+ when ListImmedContext: :array
2385
+ else
2386
+ :lhs if comma_in_lvalue_list?
2387
+ end
2388
+ @parsestack.last.see self,:comma
2389
+ return @moretokens.shift
1855
2390
  end
1856
2391
 
1857
2392
  #-----------------------------------
@@ -1872,7 +2407,7 @@ end
1872
2407
  assert RUBYOPERATORREX===s
1873
2408
  if RUBYNONSYMOPERATORREX===s
1874
2409
  KeywordToken
1875
- elsif @last_operative_token===/^(\.|::|def|undef|alias|defined\?)$/
2410
+ elsif want_op_name
1876
2411
  MethNameToken
1877
2412
  else
1878
2413
  OperatorToken
@@ -1882,9 +2417,7 @@ end
1882
2417
  #-----------------------------------
1883
2418
  #tokenify_results_of :identifier
1884
2419
  save_offsets_in(*CHARMAPPINGS.values.uniq-[
1885
- :symbol_or_op,:open_brace,:whitespace,:exclam,:backquote
1886
-
1887
-
2420
+ :symbol_or_op,:open_brace,:whitespace,:exclam,:backquote
1888
2421
  ])
1889
2422
  #save_offsets_in :symbol
1890
2423