rubylexer 0.7.0 → 0.7.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (80) hide show
  1. data/History.txt +90 -0
  2. data/Manifest.txt +54 -3
  3. data/README.txt +4 -7
  4. data/Rakefile +3 -2
  5. data/lib/rubylexer.rb +856 -323
  6. data/lib/rubylexer/0.7.0.rb +11 -2
  7. data/lib/rubylexer/0.7.1.rb +2 -0
  8. data/lib/rubylexer/charhandler.rb +4 -4
  9. data/lib/rubylexer/context.rb +86 -9
  10. data/lib/rubylexer/rulexer.rb +455 -101
  11. data/lib/rubylexer/token.rb +166 -43
  12. data/lib/rubylexer/tokenprinter.rb +16 -8
  13. data/lib/rubylexer/version.rb +1 -1
  14. data/rubylexer.vpj +98 -0
  15. data/test/code/all_the_gems.rb +33 -0
  16. data/test/code/all_the_raas.rb +226 -0
  17. data/test/code/all_the_rubies.rb +2 -0
  18. data/test/code/deletewarns.rb +19 -1
  19. data/test/code/dumptokens.rb +39 -8
  20. data/test/code/errscan +2 -0
  21. data/test/code/isolate_error.rb +72 -0
  22. data/test/code/lexloop +14 -0
  23. data/test/code/locatetest.rb +150 -8
  24. data/test/code/regression.rb +109 -0
  25. data/test/code/rubylexervsruby.rb +53 -15
  26. data/test/code/strgen.rb +138 -0
  27. data/test/code/tarball.rb +144 -0
  28. data/test/code/testcases.rb +11 -0
  29. data/test/code/tokentest.rb +115 -24
  30. data/test/data/__eof2.rb +1 -0
  31. data/test/data/__eof5.rb +2 -0
  32. data/test/data/__eof6.rb +2 -0
  33. data/test/data/cvtesc.rb +17 -0
  34. data/test/data/g.rb +6 -0
  35. data/test/data/hd0.rb +3 -0
  36. data/test/data/hdateof.rb +2 -0
  37. data/test/data/hdempty.rb +3 -0
  38. data/test/data/hdr.rb +9 -0
  39. data/test/data/hdr_dos.rb +13 -0
  40. data/test/data/hdr_dos2.rb +18 -0
  41. data/test/data/heart.rb +2 -0
  42. data/test/data/here_escnl.rb +25 -0
  43. data/test/data/here_escnl_dos.rb +20 -0
  44. data/test/data/here_squote.rb +3 -0
  45. data/test/data/heremonsters.rb +140 -0
  46. data/test/data/heremonsters.rb.broken +68 -0
  47. data/test/data/heremonsters.rb.broken.save +68 -0
  48. data/test/data/heremonsters_dos.rb +140 -0
  49. data/test/data/heremonsters_dos.rb.broken +68 -0
  50. data/test/data/illegal_oneliners.rb +1 -0
  51. data/test/data/illegal_stanzas.rb +0 -0
  52. data/test/data/make_ws_strdelim.rb +22 -0
  53. data/test/data/maven2_builer_test.rb +82 -0
  54. data/test/data/migration.rb +8944 -0
  55. data/test/data/modl.rb +6 -0
  56. data/test/data/modl_dos.rb +7 -0
  57. data/test/data/modl_fails.rb +10 -0
  58. data/test/data/multilinestring.rb +6 -0
  59. data/test/data/oneliners.rb +555 -0
  60. data/test/data/p-op.rb +2 -0
  61. data/test/data/p.rb +3 -1710
  62. data/test/data/s.rb +90 -21
  63. data/test/data/simple.rb +1 -0
  64. data/test/data/simple_dos.rb +1 -0
  65. data/test/data/stanzas.rb +1194 -0
  66. data/test/data/strdelim_crlf.rb +6 -0
  67. data/test/data/stuff.rb +6 -0
  68. data/test/data/stuff2.rb +5 -0
  69. data/test/data/stuff3.rb +6 -0
  70. data/test/data/stuff4.rb +6 -0
  71. data/test/data/tkweird.rb +20 -0
  72. data/test/data/unending_stuff.rb +5 -0
  73. data/test/data/whatnot.rb +8 -0
  74. data/test/data/ws_strdelim.rb +0 -0
  75. data/test/test.sh +239 -0
  76. data/testing.txt +39 -50
  77. metadata +110 -12
  78. data/test/code/dl_all_gems.rb +0 -43
  79. data/test/code/unpack_all_gems.rb +0 -15
  80. data/test/data/gemlist.txt +0 -280
data/History.txt CHANGED
@@ -1,3 +1,93 @@
1
+ === 0.7.1/10-29-2008
2
+ * 6 Major Enhancements:
3
+ * handling of empty string fragments now more closely mirrors ruby; this resolves many warnings
4
+ * yet more hacks in aid of string inclusions
5
+ * backslashes in strings are no longer interpreted automatically when lexed
6
+ * here documents are completely rewritten in a tricky way that more closely mimics what MRI does
7
+ * many more flags for tokens to tell apart the various cases:
8
+ * the various different local variable types have to be detected.
9
+ * colons which operate like semicolons or thens are marked as such
10
+ * { } used in block now flagged as parsing like do and end
11
+ * commas now are marked with different types depending on how they're used
12
+ * @variables in methods need to be marked as such, so their parsetree can come out different.
13
+ * clearly mark backquoted strings
14
+ * further refinements of local variable detection and implicit paren placement near these cases:
15
+ * when ws between method name and parenthesis
16
+ * break/return/next
17
+ * ? : << / rescue do
18
+
19
+ * 5 Minor Enhancements
20
+ * colon or star in assignment make it a multi assignment
21
+ * presence of unary * or & in param list forces it to be a multi-param list
22
+ * errors in string inclusions should now be handled better
23
+ * string and stringlike tokens now can tell you the exact sequence of chars used to open and close the string.
24
+ * correctly handling more cases where return/break/next parses different than a method (yuck!)
25
+
26
+ * 26 Bugfixes
27
+ * ~ operator can be followed with an @, like + and -
28
+ * ~ is overridable, however :: is not
29
+ * raise is not a keyword
30
+ * in addition to 0x00, 0x04 and 0x1a should be considered eof in ruby. why? idunno.
31
+ * setting PROGRESS env var will cause input file position to be printed to stderr periodically.
32
+ * defined? is not a funclike keyword... really more of a unary operator
33
+ * $- is a legitimate global variable.
34
+ * better parsing of lvalue list following for keyword.
35
+ * rescue is a variable define context only when right after => and before then (or disguises).
36
+ * better placement of implicit parens around def param list
37
+ * (global) variable aliasing now supported
38
+ * local vars in END block are NOT scoped to the block!
39
+ * local vars in def param lists aren't considered variables til after the initializer for that var
40
+ * end of def header is treated like ; even if none is present
41
+ * never put here document right after class keyword
42
+ * look for start of line directives at end of here document
43
+ * oops, mac newlines don't have to be supported
44
+ * dos newlines better tolerated around here documents
45
+ * less line number/offset confusion around here documents
46
+ * newline after (non-operator) rescue is hard (but not after INNERBOUNDINGWORDS)
47
+ * handling eof in more strange places
48
+ * always expect unary op after for
49
+ * unary ops should know about the before-but-not-after rule!
50
+ * newlines after = should be escaped
51
+ * \c? and \C-? are not interpreted the same as other ctrl chars
52
+ * \n\r and \r are not recognized as nl sequences
53
+
54
+ * 18 Internal Changes (not user visible)
55
+ * commas cause a :comma event on the parsestack
56
+ * some of the lists of types of operators are available now as arrays of strings instead of regexps
57
+ * single and double quote now have separate implementations again
58
+ * keep track of whether an implicit open or close paren has just been emitted
59
+ * put ws around << to keep slickedit happy
60
+ * the eof characters are also considered whitespace.
61
+ * identifier lexer now uses regexps more heavily
62
+ * method formal parameter list is not considered an lvalue context for commas.
63
+ * class and def now have their own parse contexts
64
+ * unary star causes a :splat event on the parsestack
65
+ * is_var_name now detects var tokens just from the token type, not looking at local vars table.
66
+ * a faster regexp-based implementation of string scanning
67
+ * moved yucky side effect out of quote_expected?
68
+ * these keywords: class module def for defined? no longer automatically create operator context
69
+ * a new context for BEGIN/END keywords
70
+ * a new context for param list of return/next/break
71
+ * new escape sequence processors for regexp and %W list
72
+ * numbers now scanned with a regexp
73
+
74
+ * 15 Enhancements and bug fixes to tests:
75
+ * just print a notice on errors which are also syntax errors for ruby
76
+ * a little cleanup of temp files
77
+ * rubylexervsruby and tokentest can take input from stdin
78
+ * unlexer improvements
79
+ * dumptokens now has a --silent cmdline option
80
+ * locatetest.rb is significantly enhanced
81
+ * --unified option to diff seems to work better than -u
82
+ * tokentest better verifies exact token contents...
83
+ * tokentest now uses open and close fields of strings to verify string bounds exactly
84
+ * CRLF in a string is always treated like just a LF. (CR is elided.)
85
+ * allow_ooo hacky flag marks tokens whose offset errors are to be ignored.
86
+ * all other offset errors have been downgraded to warnings.
87
+ * most of the offset problem I had been seeing have been fixed, tho
88
+ * offset problems in here head and body, symbol and fal tokens are always ignored (a hack)
89
+ * tokentest has a --loop option, for load testing
90
+
1
91
  === 0.7.0/2-15-2008
2
92
  * implicit tokens are now emitted at the right times (need more test code)
3
93
  * local variables are now temporarily hidden by class, module, and def
data/Manifest.txt CHANGED
@@ -19,7 +19,6 @@ lib/rubylexer/symboltable.rb
19
19
  lib/rubylexer/charhandler.rb
20
20
  lib/assert.rb
21
21
  lib/rubylexer.rb
22
- test/data/gemlist.txt
23
22
  test/data/blockassigntest.rb
24
23
  test/data/for.rb
25
24
  test/data/chunky_bacon.rb
@@ -58,10 +57,62 @@ test/data/chunky_bacon2.rb
58
57
  test/data/format.rb
59
58
  test/code/locatetest.rb
60
59
  test/code/rubylexervsruby.rb
61
- test/code/dl_all_gems.rb
62
- test/code/unpack_all_gems.rb
63
60
  test/code/tokentest.rb
64
61
  test/code/dumptokens.rb
65
62
  test/code/torment
66
63
  test/code/locatetest
67
64
  test/code/deletewarns.rb
65
+ lib/rubylexer/0.7.1.rb
66
+ rubylexer.vpj
67
+ test/code/all_the_gems.rb
68
+ test/code/all_the_raas.rb
69
+ test/code/all_the_rubies.rb
70
+ test/code/errscan
71
+ test/code/isolate_error.rb
72
+ test/code/lexloop
73
+ test/code/regression.rb
74
+ test/code/strgen.rb
75
+ test/code/tarball.rb
76
+ test/code/testcases.rb
77
+ test/data/chunky.plain.rb
78
+ test/data/cvtesc.rb
79
+ test/data/__eof2.rb
80
+ test/data/__eof5.rb
81
+ test/data/__eof6.rb
82
+ test/data/hd0.rb
83
+ test/data/hdateof.rb
84
+ test/data/hdempty.rb
85
+ test/data/hdr_dos2.rb
86
+ test/data/hdr_dos.rb
87
+ test/data/hdr.rb
88
+ test/data/here_escnl_dos.rb
89
+ test/data/here_escnl.rb
90
+ test/data/heremonsters_dos.rb
91
+ test/data/heremonsters_dos.rb.broken
92
+ test/data/heremonsters.rb
93
+ test/data/heremonsters.rb.broken
94
+ test/data/heremonsters.rb.broken.save
95
+ test/data/here_squote.rb
96
+ test/data/illegal_oneliners.rb
97
+ test/data/illegal_stanzas.rb
98
+ test/data/make_ws_strdelim.rb
99
+ test/data/maven2_builer_test.rb
100
+ test/data/migration.rb
101
+ test/data/modl_dos.rb
102
+ test/data/modl_fails.rb
103
+ test/data/modl.rb
104
+ test/data/multilinestring.rb
105
+ test/data/oneliners.rb
106
+ test/data/simple_dos.rb
107
+ test/data/simple.rb
108
+ test/data/stanzas.rb
109
+ test/data/strdelim_crlf.rb
110
+ test/data/stuff2.rb
111
+ test/data/stuff3.rb
112
+ test/data/stuff4.rb
113
+ test/data/stuff.rb
114
+ test/data/tkweird.rb
115
+ test/data/unending_stuff.rb
116
+ test/data/whatnot.rb
117
+ test/data/ws_strdelim.rb
118
+ test/test.sh
data/README.txt CHANGED
@@ -67,10 +67,7 @@ keywords, depending on context:
67
67
  any overrideable operator and most keywords can also be method names
68
68
 
69
69
  == todo
70
- test w/ more code (rubygems, rpa, obfuscated ruby contest, rubicon, others?)
71
- these 5 should be my standard test suite: p.rb, (matz') test.rb, tk.rb, obfuscated ruby contest, rubicon
72
70
  test more ways: cvt source to dos or mac fmt before testing
73
- test more ways: run unit tests after passing thru rubylexer (0.7)
74
71
  test more ways: test require'd, load'd, or eval'd code as well (0.7)
75
72
  lex code a line (or chunk) at a time and save state for next line (irb wants this) (0.8)
76
73
  incremental lexing (ides want this (for performance))
@@ -78,12 +75,10 @@ put everything in a namespace
78
75
  integrate w/ other tools...
79
76
  html colorized output?
80
77
  move more state onto @parsestack (ongoing)
81
- the new cases in p.rb now compile, but won't run
82
78
  expand on test documentation
83
79
  use want_op_name more
84
80
  return result as a half-parsed tree (with parentheses and the like matched)
85
81
  emit advisory tokens when see beginword, then (or equivalent), or end... what else does florian want?
86
- strings are still slow
87
82
  emit advisory tokens when local var defined/goes out of scope (or hidden/unhidden?)
88
83
  token pruning in dumptokens...
89
84
 
@@ -96,8 +91,10 @@ string tokenization sometimes a little different from ruby around newlines
96
91
  string contents might not be correctly translated in a few cases (0.8?)
97
92
  symbols which contain string interpolations are flattened into one token. eg :"foo#{bar}" (0.8)
98
93
  '\r' whitespace sometimes seen in dos-formatted output.. shouldn't be (eg pre.rb) (0.7)
99
- windows or mac newline in source are likely to cause problems in obscure cases (need test case)
94
+ windows newline in source is likely to cause problems in obscure cases (need test case)
100
95
  unterminated =begin is not an error (0.8)
101
96
  ruby 1.9 completely unsupported (0.9)
102
97
  character sets other than ascii are not supported at all (1.0)
103
-
98
+ regression test currently shows 14 errors with differences in exact token ordering
99
+ -around string inclusions. these errors are much less serious than they seem.
100
+ offset of AssignmentRhsListEndToken appears to be off by 1
data/Rakefile CHANGED
@@ -13,12 +13,13 @@ require 'lib/rubylexer/version.rb'
13
13
  hoe=Hoe.new("rubylexer", RubyLexer::VERSION) do |_|
14
14
  _.author = "Caleb Clausen"
15
15
  _.email = "rubylexer-owner @at@ inforadical .dot. net"
16
- _.url = "http://rubylexer.rubyforge.org/"
17
- _.extra_deps = ["sequence"]
16
+ _.url = ["http://rubylexer.rubyforge.org/", "http://rubyforge.org/projects/rubylexer/"]
17
+ _.extra_deps << ['sequence', '>= 0.2.0']
18
18
  _.test_globs=["test/{code/*,data/*rb*,results/}"]
19
19
  _.description=desc
20
20
  _.summary=desc[/\A[^.]+\./]
21
21
  _.spec_extras={:bindir=>''}
22
+ _.rdoc_pattern=/\A(howtouse\.txt|testing\.txt|README\.txt|lib\/.*\.rb)\Z/
22
23
  end
23
24
 
24
25
 
data/lib/rubylexer.rb CHANGED
@@ -1,6 +1,6 @@
1
- =begin copyright
1
+ =begin legal crap
2
2
  rubylexer - a ruby lexer written in ruby
3
- Copyright (C) 2004,2005 Caleb Clausen
3
+ Copyright (C) 2004,2005,2008 Caleb Clausen
4
4
 
5
5
  This library is free software; you can redistribute it and/or
6
6
  modify it under the terms of the GNU Lesser General Public
@@ -18,7 +18,6 @@
18
18
  =end
19
19
 
20
20
 
21
-
22
21
  require 'rubylexer/rulexer' #must be 1st!!!
23
22
  require 'rubylexer/version'
24
23
  require 'rubylexer/token'
@@ -32,9 +31,11 @@ require 'rubylexer/tokenprinter'
32
31
  #-----------------------------------
33
32
  class RubyLexer
34
33
  include NestedContexts
34
+
35
+
35
36
 
36
37
  RUBYSYMOPERATORREX=
37
- %r{^([&|^/%~]|=(==?|~)|>[=>]?|<(<|=>?)?|[+\-]@?|\*\*?|\[\]=?)}
38
+ %r{^([&|^/%]|=(==?)|=~|>[=>]?|<(<|=>?)?|[+~\-]@?|\*\*?|\[\]=?)}
38
39
  # (nasty beastie, eh?)
39
40
  #these are the overridable operators
40
41
  #does not match flow-control operators like: || && ! or and if not
@@ -42,23 +43,25 @@ class RubyLexer
42
43
  #or .. ... ?:
43
44
  #for that use:
44
45
  RUBYNONSYMOPERATORREX=
45
- %r{^([%^~/\-+|&]=|(\|\||&&)=?|(<<|>>|\*\*?)=|\.{1,3}|[?:,;]|=>?|![=~]?)$}
46
+ %r{^([%^/\-+|&]=|(\|\||&&)=?|(<<|>>|\*\*?)=|\.{1,3}|[?:,;]|::|=>?|![=~]?)$}
46
47
  RUBYOPERATORREX=/#{RUBYSYMOPERATORREX}|#{RUBYNONSYMOPERATORREX}/o
47
48
  UNSYMOPS=/^[~!]$/ #always unary
48
49
  UBSYMOPS=/^([*&+-]|::)$/ #ops that could be unary or binary
49
50
  WHSPCHARS=WHSPLF+"\\#"
50
- OPORBEGINWORDS="(if|unless|while|until)"
51
- BEGINWORDS=/^(def|class|module|begin|for|case|do|#{OPORBEGINWORDS})$/o
52
- FUNCLIKE_KEYWORDS=/^(break|next|redo|return|raise|yield|defined\?|retry|super|BEGIN|END)$/
51
+ OPORBEGINWORDLIST=%w(if unless while until)
52
+ BEGINWORDLIST=%w(def class module begin for case do)+OPORBEGINWORDLIST
53
+ OPORBEGINWORDS="(#{OPORBEGINWORDLIST.join '|'})"
54
+ BEGINWORDS=/^(#{BEGINWORDLIST.join '|'})$/o
55
+ FUNCLIKE_KEYWORDS=/^(break|next|redo|return|yield|retry|super|BEGIN|END)$/
53
56
  VARLIKE_KEYWORDS=/^(__FILE__|__LINE__|false|nil|self|true)$/
54
57
  INNERBOUNDINGWORDS="(else|elsif|ensure|in|then|rescue|when)"
55
58
  BINOPWORDS="(and|or)"
56
- NEVERSTARTPARAMLISTWORDS=/^(#{OPORBEGINWORDS}|#{INNERBOUNDINGWORDS}|#{BINOPWORDS}|end)([^a-zA-Z0-9_!?=]|\Z)/o
59
+ NEVERSTARTPARAMLISTWORDS=/\A(#{OPORBEGINWORDS}|#{INNERBOUNDINGWORDS}|#{BINOPWORDS}|end)([^a-zA-Z0-9_!?=]|\Z)/o
57
60
  NEVERSTARTPARAMLISTFIRST=CharSet['aoeitrwu'] #chars that begin NEVERSTARTPARAMLIST
58
61
  NEVERSTARTPARAMLISTMAXLEN=7 #max len of a NEVERSTARTPARAMLIST
59
62
 
60
63
  RUBYKEYWORDS=%r{
61
- ^(alias|#{BINOPWORDS}|not|undef|end|
64
+ ^(alias|#{BINOPWORDS}|defined\?|not|undef|end|
62
65
  #{VARLIKE_KEYWORDS}|#{FUNCLIKE_KEYWORDS}|
63
66
  #{INNERBOUNDINGWORDS}|#{BEGINWORDS}
64
67
  )$
@@ -72,8 +75,9 @@ class RubyLexer
72
75
  ?A..?Z => :identifier,
73
76
  ?_ => :identifier,
74
77
  ?0..?9 => :number,
75
- %{"'} => :double_quote,
76
- ?` => :back_quote,
78
+ ?" => :double_quote, #"
79
+ ?' => :single_quote, #'
80
+ ?` => :back_quote, #`
77
81
 
78
82
  WHSP => :whitespace, #includes \r
79
83
  ?, => :comma,
@@ -99,7 +103,9 @@ class RubyLexer
99
103
  #?\r => :newline, #implicitly escaped after op
100
104
 
101
105
  ?\\ => :escnewline,
102
- ?\0 => :eof,
106
+ ?\x00 => :eof,
107
+ ?\x04 => :eof,
108
+ ?\x1a => :eof,
103
109
 
104
110
  "[({" => :open_brace,
105
111
  "])}" => :close_brace,
@@ -108,41 +114,90 @@ class RubyLexer
108
114
  ?# => :comment
109
115
  }
110
116
 
111
- attr_reader :incomplete_here_tokens, :parsestack
117
+ attr_reader :incomplete_here_tokens, :parsestack, :last_token_maybe_implicit
112
118
 
113
119
 
114
120
  #-----------------------------------
115
- def initialize(filename,file,linenum=1)
116
- super(filename,file, linenum)
121
+ def initialize(filename,file,linenum=1,offset_adjust=0)
122
+ @offset_adjust=0 #set again in next line
123
+ super(filename,file, linenum,offset_adjust)
117
124
  @start_linenum=linenum
118
125
  @parsestack=[TopLevelContext.new]
119
- @incomplete_here_tokens=[]
126
+ @incomplete_here_tokens=[] #not used anymore
127
+ @pending_here_bodies=[]
120
128
  @localvars_stack=[SymbolTable.new]
121
129
  @defining_lvar=nil
122
130
  @in_def_name=false
131
+ @last_operative_token=nil
132
+ @last_token_maybe_implicit=nil
123
133
 
124
134
  @toptable=CharHandler.new(self, :illegal_char, CHARMAPPINGS)
125
135
 
126
136
  start_of_line_directives
137
+ progress_printer
138
+ end
139
+
140
+ def progress_printer
141
+ return unless ENV['RL_PROGRESS']
142
+ $stderr.puts 'printing progresses'
143
+ @progress_thread=Thread.new do
144
+ until EoiToken===@last_operative_token
145
+ sleep 10
146
+ $stderr.puts @file.pos
147
+ end
148
+ end
127
149
  end
128
150
 
129
151
  def localvars;
130
152
  @localvars_stack.last
131
153
  end
132
154
 
155
+ attr :localvars_stack
156
+ attr :offset_adjust
157
+ attr_writer :pending_here_bodies
158
+
159
+ #-----------------------------------
160
+ def set_last_token(tok)
161
+ @last_operative_token=@last_token_maybe_implicit=tok
162
+ end
163
+
133
164
  #-----------------------------------
134
165
  def get1token
135
166
  result=super #most of the action's here
136
167
 
168
+ if ENV['PROGRESS']
169
+ @last_cp_pos||=0
170
+ @start_time||=Time.now
171
+ if result.offset-@last_cp_pos>100000
172
+ $stderr.puts "#{result.offset} #{Time.now-@start_time}"
173
+ @last_cp_pos=result.offset
174
+ end
175
+ end
176
+
137
177
  #now cleanup and housekeeping
138
178
 
139
179
 
140
180
  #check for bizarre token types
141
181
  case result
182
+ when ImplicitParamListStartToken, ImplicitParamListEndToken
183
+ @last_token_maybe_implicit=result
184
+ result
142
185
  when StillIgnoreToken#,nil
143
186
  result
187
+ when StringToken
188
+ set_last_token result
189
+ assert !(IgnoreToken===@last_operative_token)
190
+ result.elems.map!{|frag|
191
+ if String===frag
192
+ result.translate_escapes(frag)
193
+ else
194
+ frag
195
+ end
196
+ } if AUTO_UNESCAPE_STRINGS
197
+ result
198
+
144
199
  when Token#,String
145
- @last_operative_token=result
200
+ set_last_token result
146
201
  assert !(IgnoreToken===@last_operative_token)
147
202
  result
148
203
  else
@@ -150,6 +205,20 @@ class RubyLexer
150
205
  end
151
206
  end
152
207
 
208
+ #-----------------------------------
209
+ def eof?
210
+ super or EoiToken===@last_operative_token
211
+ end
212
+
213
+ #-----------------------------------
214
+ def input_position
215
+ super+@offset_adjust
216
+ end
217
+
218
+ #-----------------------------------
219
+ def input_position_raw
220
+ @file.pos
221
+ end
153
222
 
154
223
  #-----------------------------------
155
224
  def balanced_braces?
@@ -163,7 +232,7 @@ class RubyLexer
163
232
  s=eat_next_if(?$) or return nil
164
233
 
165
234
  if t=((identifier_as_string(?$) or special_global))
166
- s<<t
235
+ s << t
167
236
  else error= "missing $id name"
168
237
  end
169
238
 
@@ -173,17 +242,27 @@ class RubyLexer
173
242
  #-----------------------------------
174
243
  def at_identifier(ch=nil)
175
244
  result = (eat_next_if(?@) or return nil)
176
- result << (eat_next_if(?@)or'')
245
+ result << (eat_next_if(?@) or '')
177
246
  if t=identifier_as_string(?@)
178
- result<<t
247
+ result << t
179
248
  else error= "missing @id name"
180
249
  end
181
- return lexerror(VarNameToken.new(result),error)
250
+ result=VarNameToken.new(result)
251
+ result.in_def=true if inside_method_def?
252
+ return lexerror(result,error)
182
253
  end
183
254
 
184
255
  private
185
256
  #-----------------------------------
186
- def here_spread_over_ruby_code(rl,tok)
257
+ def inside_method_def?
258
+ @parsestack.reverse_each{|ctx|
259
+ ctx.starter=='def' and ctx.state!=:saw_def and return true
260
+ }
261
+ return false
262
+ end
263
+
264
+ #-----------------------------------
265
+ def here_spread_over_ruby_code(rl,tok) #not used anymore
187
266
  assert(!rl.incomplete_here_tokens.empty?)
188
267
  @incomplete_here_tokens += rl.incomplete_here_tokens
189
268
  end
@@ -207,10 +286,10 @@ private
207
286
  end
208
287
 
209
288
  #-----------------------------------
210
- WSCHARSET=/[#\\\n\s\t\v\r\f]/
289
+ WSCHARSET=/[#\\\n\s\t\v\r\f\x00\x04\x1a]/
211
290
  def ignored_tokens(allow_eof=false,allow_eol=true)
212
291
  result=[]
213
- result<<@moretokens.shift while StillIgnoreToken===@moretokens.first
292
+ result << @moretokens.shift while StillIgnoreToken===@moretokens.first
214
293
  @moretokens.empty? or return result
215
294
  loop do
216
295
  unless @moretokens.empty?
@@ -273,8 +352,8 @@ private
273
352
  result = ((
274
353
  #order matters here, but it shouldn't
275
354
  #(but til_charset must be last)
276
- eat_next_if(/[!@&+`'=~\/\\,.;<>*"$?:]/) or
277
- (eat_next_if('-') and ("-"+getchar)) or
355
+ eat_if(/-[a-z0-9_]/i,2) or
356
+ eat_next_if(/[!@&+`'=~\-\/\\,.;<>*"$?:]/) or
278
357
  (?0..?9)===nextchar ? til_charset(/[^\d]/) : nil
279
358
  ))
280
359
  end
@@ -289,23 +368,26 @@ private
289
368
  #just asserts because those contexts are never encountered.
290
369
  #control goes through symbol(<...>,nil)
291
370
  assert( /^[a-z_]$/i===context)
292
- assert !(@last_operative_token===/^(\.|::|(un)?def|alias)$/)
371
+ assert MethNameToken===@last_operative_token || !(@last_operative_token===/^(\.|::|(un)?def|alias)$/)
293
372
 
294
- @moretokens.unshift(*parse_keywords(str,oldpos) do
373
+ @moretokens.unshift(*parse_keywords(str,oldpos) do |tok|
295
374
  #if not a keyword,
296
375
  case str
297
376
  when FUNCLIKE_KEYWORDS; #do nothing
298
377
  when VARLIKE_KEYWORDS,RUBYKEYWORDS; raise "shouldnt see keywords here, now"
299
378
  end
300
- safe_recurse { |a| var_or_meth_name(str,@last_operative_token,oldpos) }
379
+ was_last=@last_operative_token
380
+ @last_operative_token=tok if tok
381
+ safe_recurse { |a| var_or_meth_name(str,was_last,oldpos,after_nonid_op?{true}) }
301
382
  end)
302
383
  return @moretokens.shift
303
384
  end
304
385
 
305
386
  #-----------------------------------
387
+ IDENTREX={}
306
388
  def identifier_as_string(context)
307
389
  #must begin w/ letter or underscore
308
- str=eat_next_if(/[_a-z]/i) or return nil
390
+ /[_a-z]/i===nextchar.chr or return
309
391
 
310
392
  #equals, question mark, and exclamation mark
311
393
  #might be allowed at the end in some contexts.
@@ -315,45 +397,16 @@ private
315
397
  #i hope i've captured all right conditions....
316
398
  #context should always be ?: right after def, ., and :: now
317
399
 
318
- maybe_eq,maybe_qm,maybe_ex = case context
319
- when ?@,?$ then [nil,nil,nil]
320
- when ?: then [?=, ??, ?!]
321
- else [nil,??, ?!]
322
- end
323
-
324
- @in_def_name and maybe_eq= ?=
325
-
326
- str<<til_charset(/[^a-z0-9_]/i)
327
-
328
- #look for ?, !, or =, if allowed
329
- case b=getc
330
- when nil #means we're at eof
331
- #handling nil here prevents b from ever matching
332
- #a nil value of maybe_qm, maybe_ex or maybe_eq
333
- when maybe_qm
334
- str << b
335
- when maybe_ex
336
- nc=(nextchar unless eof?)
337
- #does ex appear to be part of a larger operator?
338
- if nc==?= #or nc==?~
339
- back1char
340
- else
341
- str << b
342
- end
343
- when maybe_eq
344
- nc=(nextchar unless eof?)
345
- #does eq appear to be part of a larger operator?
346
- if nc==?= or nc==?~ or nc==?>
347
- back1char
348
- else
349
- str << b
350
- end
351
- else
352
- back1char
353
- end
400
+ #= and ! only match if not part of a larger operator
401
+ trailers =
402
+ case context
403
+ when ?@,?$ then ""
404
+ # when ?: then "!(?![=])|\\?|=(?![=~>])"
405
+ else "!(?![=])|\\?"
406
+ end
407
+ @in_def_name||context==?: and trailers<<"|=(?![=~>])"
354
408
 
355
-
356
- return str
409
+ @file.scan(IDENTREX[trailers]||=/^[_a-z][a-z0-9_]*(?:#{trailers})?/i)
357
410
  end
358
411
 
359
412
  #-----------------------------------
@@ -380,18 +433,26 @@ private
380
433
  #a comma has been seen. are we in an
381
434
  #lvalue list or some other construct that uses commas?
382
435
  def comma_in_lvalue_list?
383
- @parsestack.last.lhs= (not ListContext===@parsestack.last)
436
+ @parsestack.last.lhs=
437
+ case l=@parsestack.last
438
+ when ListContext:
439
+ when DefContext: l.in_body
440
+ else true
441
+ end
384
442
  end
385
443
 
386
444
  #-----------------------------------
387
445
  def in_lvar_define_state
388
446
  #@defining_lvar is a hack
389
447
  @defining_lvar or case ctx=@parsestack.last
390
- when ForSMContext; ctx.state==:for
391
- when RescueSMContext; ctx.state==:arrow
448
+ #when ForSMContext; ctx.state==:for
449
+ when RescueSMContext
450
+ @last_operative_token.ident=="=>" and @file.match? /\A[\s\v]*([:;#\n]|then[^a-zA-Z0-9_])/m
392
451
  #when BlockParamListLhsContext; true
393
452
  end
394
453
  end
454
+
455
+ IMPLICIT_PARENS_BEFORE_ACCESSOR_ASSIGNMENT=2
395
456
 
396
457
  #-----------------------------------
397
458
  #determine if an alphabetic identifier refers to a variable
@@ -400,45 +461,50 @@ private
400
461
  #if appropriate. adds tok to the
401
462
  #local var table if its a local var being defined for the first time.
402
463
 
403
- #note: what we here call variables (rather, constants) following ::
404
- #might actually be methods at runtime, but that's immaterial to tokenization.
405
-
406
- #note: this routine should determine the correct token type for name and
407
- #create the appropriate token. currently this is not done because callers
408
- #typically have done it (perhaps incorrectly) already.
409
- def var_or_meth_name(name,lasttok,pos)
464
+ #in general, operators in ruby are disambuated by the before-but-not-after rule.
465
+ #an otherwise ambiguous operator is disambiguated by the surrounding whitespace:
466
+ #whitespace before but not after the 'operator' indicates it is to be considered a
467
+ #value token instead. otherwise it is a binary operator. (unary (prefix) ops count
468
+ #as 'values' here.)
469
+ def var_or_meth_name(name,lasttok,pos,was_after_nonid_op)
410
470
  #look for call site if not a keyword or keyword is function-like
411
471
  #look for and ignore local variable names
412
472
 
413
473
  assert String===name
414
474
 
475
+ was_in_lvar_define_state=in_lvar_define_state
415
476
  #maybe_local really means 'maybe local or constant'
416
477
  maybe_local=case name
417
- when /[^a-z_0-9]$/i; #do nothing
418
- when /^[a-z_]/; (localvars===name or VARLIKE_KEYWORDS===name or in_lvar_define_state) and not lasttok===/^(\.|::)$/
419
- when /^[A-Z]/; is_const=true;not lasttok==='.' #this is the right algorithm for constants...
478
+ when /[^a-z_0-9]$/i #do nothing
479
+ when /^[a-z_]/
480
+ (localvars===name or
481
+ VARLIKE_KEYWORDS===name or
482
+ was_in_lvar_define_state
483
+ ) and not lasttok===/^(\.|::)$/
484
+ when /^[A-Z]/
485
+ is_const=true
486
+ not lasttok==='.' #this is the right algorithm for constants...
420
487
  end
421
488
 
422
489
  assert(@moretokens.empty?)
423
490
 
424
491
  oldlast=@last_operative_token
425
492
 
426
- tok=@last_operative_token=VarNameToken.new(name,pos)
493
+ tok=set_last_token assign_lvar_type!(VarNameToken.new(name,pos))
427
494
 
428
495
  oldpos= input_position
429
496
  sawnl=false
430
497
  result=ws_toks=ignored_tokens(true) {|nl| sawnl=true }
431
498
  if sawnl || eof?
432
- if maybe_local then
433
- if in_lvar_define_state
434
- if /^[a-z_][a-zA-Z_0-9]*$/===name
435
- assert !(lasttok===/^(\.|::)$/)
436
- localvars[name]=true
437
- else
438
- lexerror tok,"not a valid variable name: #{name}"
439
- end
440
- return result.unshift(tok)
499
+ if was_in_lvar_define_state
500
+ if /^[a-z_][a-zA-Z_0-9]*$/===name
501
+ assert !(lasttok===/^(\.|::)$/)
502
+ localvars[name]=true
503
+ else
504
+ lexerror tok,"not a valid variable name: #{name}"
441
505
  end
506
+ return result.unshift(tok)
507
+ elsif maybe_local
442
508
  return result.unshift(tok) #if is_const
443
509
  else
444
510
  return result.unshift(
@@ -455,6 +521,8 @@ private
455
521
  when ?=; not /^=[>=~]$/===readahead(2)
456
522
  when ?,; comma_in_lvalue_list?
457
523
  when ?); last_context_not_implicit.lhs
524
+ when ?i; /^in[^a-zA-Z_0-9]/===readahead(3) and
525
+ ForSMContext===last_context_not_implicit
458
526
  when ?>,?<; /^(.)\1=$/===readahead(3)
459
527
  when ?*,?&; /^(.)\1?=/===readahead(3)
460
528
  when ?|; /^\|\|?=/===readahead(3) or
@@ -463,8 +531,8 @@ private
463
531
  readahead(2)[1] != ?|
464
532
  when ?%,?/,?-,?+,?^; readahead(2)[1]== ?=
465
533
  end
466
- if (assignment_coming && !(lasttok===/^(\.|::)$/) or in_lvar_define_state)
467
- tok=VarNameToken.new(name,pos)
534
+ if (assignment_coming && !(lasttok===/^(\.|::)$/) or was_in_lvar_define_state)
535
+ tok=assign_lvar_type! VarNameToken.new(name,pos)
468
536
  if /[^a-z_0-9]$/i===name
469
537
  lexerror tok,"not a valid variable name: #{name}"
470
538
  elsif /^[a-z_]/===name and !(lasttok===/^(\.|::)$/)
@@ -476,44 +544,106 @@ private
476
544
  implicit_parens_to_emit=
477
545
  if assignment_coming
478
546
  @parsestack.push AssignmentContext.new(nil) if nc==?% or nc==?/
479
- 0
547
+ IMPLICIT_PARENS_BEFORE_ACCESSOR_ASSIGNMENT
480
548
  else
481
549
  case nc
482
550
  when nil: 2
483
- when ?!; readahead(2)=='!=' ? 2 : 1
551
+ when ?!; /^![=~]$/===readahead(2) ? 2 : 1
552
+ when ?d;
553
+ if /^do([^a-zA-Z0-9_]|$)/===readahead(3)
554
+ if maybe_local and expecting_do?
555
+ ty=VarNameToken
556
+ 0
557
+ else
558
+ maybe_local=false
559
+ 2
560
+ end
561
+ else
562
+ 1
563
+ end
484
564
  when NEVERSTARTPARAMLISTFIRST
485
565
  (NEVERSTARTPARAMLISTWORDS===readahead(NEVERSTARTPARAMLISTMAXLEN)) ? 2 : 1
486
- when ?",?',?`,?a..?z,?A..?Z,?0..?9,?_,?@,?$,?~; 1
566
+ when ?",?',?`,?a..?z,?A..?Z,?0..?9,?_,?@,?$,?~; 1 #"
487
567
  when ?{
488
568
  maybe_local=false
569
+ 1
570
+ =begin
489
571
  x=2
490
572
  x-=1 if /\A(return|break|next)\Z/===name and
491
573
  !(KeywordToken===oldlast and oldlast===/\A(\.|::)\Z/)
492
574
  x
575
+ =end
493
576
  when ?(;
494
- maybe_local=false; !(ws_toks.empty? or lasttok===/^(\.|::)$/)? 1 : 0
577
+ maybe_local=false
578
+ lastid=lasttok&&lasttok.ident
579
+ case lastid
580
+ when /\A[;(]|do\Z/: was_after_nonid_op=false
581
+ when '|': was_after_nonid_op=false unless BlockParamListLhsContext===@parsestack.last
582
+ when '{': was_after_nonid_op=false if BlockContext===@parsestack.last or BeginEndContext===@parsestack.last
583
+ end if KeywordToken===lasttok
584
+ was_after_nonid_op=false if NewlineToken===lasttok or lasttok.nil?
585
+ want_parens=!(ws_toks.empty? or was_after_nonid_op) #or
586
+ # /^(::|rescue|yield|else|case|when|if|unless|until|while|and|or|&&|\|\||[?:]|\.\.?\.?|=>)$/===lastid or
587
+ # MethNameToken===lasttok or
588
+ # RUBYNONSYMOPERATORREX===lastid && /=$/===lastid && '!='!=lastid
589
+ # )
590
+
591
+ #look ahead for closing paren (after some whitespace...)
592
+ want_parens=false if @file.match? /\A.(?:\s|\v|\#.*\n)*\)/
593
+ # afterparen=@file.pos
594
+ # getchar
595
+ # ignored_tokens(true)
596
+ # want_parens=false if nextchar==?)
597
+ # @file.pos=afterparen
598
+
599
+ want_parens ? 1 : 0
495
600
  when ?},?],?),?;,?^, ?|, ?>, ?,, ?., ?=; 2
496
- when ?+, ?-, ?*, ?&, ?%, ?/; (ws_toks.empty? || readahead(2)[/^.[#{WHSPLF}]/]) ? 2 : 3
497
- when ?:,??; next2=readahead(2);
498
- WHSPLF[next2[1].chr] || next2=='::' ? 2 : 3
499
- # when ?:,??; (readahead(2)[/^.[#{WHSPLF}]/]) ? 2 : 3
500
- when ?<; (ws_toks.empty? || readahead(3)[/^<<["'`a-zA-Z_0-9-]/]) ? 3 : 2
501
- when ?[; ws_toks.empty? ? 2 : 3
601
+ when ?+, ?-, ?%, ?/
602
+ if /^(return|break|next)$/===@last_operative_token.ident and not(
603
+ KeywordToken===lasttok and /^(.|::)$/===lasttok.ident
604
+ )
605
+ 1
606
+ else
607
+ (ws_toks.empty? || readahead(2)[/^.[#{WHSPLF}]/o]) ? 2 : 3
608
+ end
609
+ when ?*, ?&
610
+ lasttok=@last_operative_token
611
+ if /^(return|break|next)$/===@last_operative_token.ident and not(
612
+ KeywordToken===lasttok and /^(.|::)$/===lasttok.ident
613
+ )
614
+ 1
615
+ else
616
+ (ws_toks.empty? || readahead(2)[/^.[#{WHSPLF}*&]/o]) ? 2 : 3
617
+ end
618
+ when ?:
619
+ next2=readahead(2)
620
+ if /^:(?:[#{WHSPLF}]|(:))$/o===next2 then
621
+ $1 && !ws_toks.empty? ? 3 : 2
622
+ else
623
+ 3
624
+ end
625
+ when ??; next3=readahead(3);
626
+ /^\?([#{WHSPLF}]|[a-z_][a-z_0-9])/io===next3 ? 2 : 3
627
+ # when ?:,??; (readahead(2)[/^.[#{WHSPLF}]/o]) ? 2 : 3
628
+ when ?<; (!ws_toks.empty? && readahead(4)[/^<<-?["'`a-zA-Z_0-9]/]) ? 3 : 2
629
+ when ?[; ws_toks.empty?&&!(KeywordToken===oldlast and /^(return|break|next)$/===oldlast.ident) ? 2 : 3
502
630
  when ?\\, ?\s, ?\t, ?\n, ?\r, ?\v, ?#; raise 'failure'
503
631
  else raise "unknown char after ident: #{nc=nextchar ? nc.chr : "<<EOF>>"}"
504
632
  end
505
633
  end
506
634
 
507
- if is_const and implicit_parens_to_emit==3 then
635
+ if is_const and implicit_parens_to_emit==3 then #needed?
508
636
  implicit_parens_to_emit=1
509
637
  end
510
638
 
511
- tok=if maybe_local and implicit_parens_to_emit>=2
639
+ if maybe_local and implicit_parens_to_emit>=2
512
640
  implicit_parens_to_emit=0
513
- VarNameToken
641
+ ty=VarNameToken
514
642
  else
515
- MethNameToken
516
- end.new(name,pos)
643
+ ty||=MethNameToken
644
+ end
645
+ tok=assign_lvar_type!(ty.new(name,pos))
646
+
517
647
 
518
648
  case implicit_parens_to_emit
519
649
  when 2;
@@ -523,8 +653,17 @@ private
523
653
  arr,pass=*param_list_coming_with_2_or_more_params?
524
654
  result.push( *arr )
525
655
  unless pass
656
+ #only 1 param in list
526
657
  result.unshift ImplicitParamListStartToken.new(oldpos)
527
- @parsestack.push ParamListContextNoParen.new(@linenum)
658
+ last=result.last
659
+ last.set_callsite! false if last.respond_to? :callsite? and last.callsite? #KeywordToken===last and last.ident==')'
660
+ if /^(break|next|return)$/===name and
661
+ !(KeywordToken===lasttok and /^(.|::)$/===lasttok.ident)
662
+ ty=KWParamListContextNoParen
663
+ else
664
+ ty=ParamListContextNoParen
665
+ end
666
+ @parsestack.push ty.new(@linenum)
528
667
  end
529
668
  when 0; #do nothing
530
669
  else raise 'invalid value of implicit_parens_to_emit'
@@ -547,11 +686,13 @@ private
547
686
  result=[get1token]
548
687
  pass=loop{
549
688
  tok=get1token
550
- result<<tok
689
+ result << tok
551
690
  if @parsestack.size==basesize
552
691
  break false
553
692
  elsif ','==tok.to_s and @parsestack.size==basesize+1
554
693
  break true
694
+ elsif OperatorToken===tok and /^[&*]$/===tok.ident and tok.unary and @parsestack.size==basesize+1
695
+ break true
555
696
  elsif EoiToken===tok
556
697
  lexerror tok, "unexpected eof in parameter list"
557
698
  end
@@ -560,11 +701,13 @@ private
560
701
  end
561
702
 
562
703
  #-----------------------------------
563
- CONTEXT2ENDTOK={AssignmentRhsContext=>AssignmentRhsListEndToken,
564
- ParamListContextNoParen=>ImplicitParamListEndToken,
565
- WhenParamListContext=>KwParamListEndToken,
566
- RescueSMContext=>KwParamListEndToken
567
- }
704
+ CONTEXT2ENDTOK={
705
+ AssignmentRhsContext=>AssignmentRhsListEndToken,
706
+ ParamListContextNoParen=>ImplicitParamListEndToken,
707
+ KWParamListContextNoParen=>ImplicitParamListEndToken,
708
+ WhenParamListContext=>KwParamListEndToken,
709
+ RescueSMContext=>KwParamListEndToken
710
+ }
568
711
  def abort_noparens!(str='')
569
712
  #assert @moretokens.empty?
570
713
  result=[]
@@ -576,7 +719,63 @@ private
576
719
  return result
577
720
  end
578
721
 
579
- if false #no longer used
722
+ #-----------------------------------
723
+ CONTEXT2ENDTOK_FOR_RESCUE={
724
+ AssignmentRhsContext=>AssignmentRhsListEndToken,
725
+ ParamListContextNoParen=>ImplicitParamListEndToken,
726
+ KWParamListContextNoParen=>ImplicitParamListEndToken,
727
+ WhenParamListContext=>KwParamListEndToken,
728
+ RescueSMContext=>KwParamListEndToken
729
+ }
730
+ def abort_noparens_for_rescue!(str='')
731
+ #assert @moretokens.empty?
732
+ result=[]
733
+ ctx=@parsestack.last
734
+ while klass=CONTEXT2ENDTOK_FOR_RESCUE[ctx.class]
735
+ break if AssignmentRhsContext===ctx && !ctx.multi_assign?
736
+ if ParamListContextNoParen===ctx && AssignmentRhsContext===@parsestack[-2]
737
+ result.push ImplicitParamListEndToken.new(input_position-str.length),
738
+ AssignmentRhsListEndToken.new(input_position-str.length)
739
+ @parsestack.pop
740
+ @parsestack.pop
741
+ break
742
+ end
743
+ result << klass.new(input_position-str.length) #unless AssignmentRhsContext===ctx and !ctx.multi_assign?
744
+ break if RescueSMContext===ctx #why is this here?
745
+ @parsestack.pop
746
+ ctx=@parsestack.last
747
+ end
748
+ return result
749
+ end
750
+
751
+ #-----------------------------------
752
+ CONTEXT2ENDTOK_FOR_DO={
753
+ AssignmentRhsContext=>AssignmentRhsListEndToken,
754
+ ParamListContextNoParen=>ImplicitParamListEndToken,
755
+ ExpectDoOrNlContext=>1,
756
+ #WhenParamListContext=>KwParamListEndToken,
757
+ #RescueSMContext=>KwParamListEndToken
758
+ }
759
+ def abort_noparens_for_do!(str='')
760
+ #assert @moretokens.empty?
761
+ result=[]
762
+ while klass=CONTEXT2ENDTOK_FOR_DO[@parsestack.last.class]
763
+ break if klass==1
764
+ result << klass.new(input_position-str.length)
765
+ @parsestack.pop
766
+ end
767
+ return result
768
+ end
769
+
770
+ #-----------------------------------
771
+ def expecting_do?
772
+ @parsestack.reverse_each{|ctx|
773
+ next if AssignmentRhsContext===ctx
774
+ return !!CONTEXT2ENDTOK_FOR_DO[ctx.class]
775
+ }
776
+ return false
777
+ end
778
+
580
779
  #-----------------------------------
581
780
  def abort_1_noparen!(offs=0)
582
781
  assert @moretokens.empty?
@@ -585,12 +784,12 @@ if false #no longer used
585
784
  @parsestack.pop
586
785
  result << AssignmentRhsListEndToken.new(input_position-offs)
587
786
  end
588
- ParamListContextNoParen===@parsestack.last or lexerror huh,'{} with no matching callsite'
787
+ if ParamListContextNoParen===@parsestack.last #or lexerror huh,'{} with no matching callsite'
589
788
  @parsestack.pop
590
789
  result << ImplicitParamListEndToken.new(input_position-offs)
790
+ end
591
791
  return result
592
792
  end
593
- end
594
793
 
595
794
  #-----------------------------------
596
795
  #parse keywords now, to prevent confusion over bare symbols
@@ -598,6 +797,7 @@ end
598
797
  #if arg is not a keyword, the block is called
599
798
  def parse_keywords(str,offset)
600
799
  assert @moretokens.empty?
800
+ assert !(KeywordToken===@last_operative_token and /A(.|::|def)\Z/===@last_operative_token.ident)
601
801
  result=[KeywordToken.new(str,offset)]
602
802
 
603
803
  case str
@@ -619,11 +819,15 @@ end
619
819
  /^(do)$/===start and localvars.end_block
620
820
  /^(class|module|def)$/===start and @localvars_stack.pop
621
821
 
622
- when "class","module"
822
+ when "module"
623
823
  result.first.has_end!
624
824
  @parsestack.push WantsEndContext.new(str,@linenum)
625
825
  @localvars_stack.push SymbolTable.new
626
826
 
827
+ when "class"
828
+ result.first.has_end!
829
+ @parsestack.push ClassContext.new(str,@linenum)
830
+
627
831
  when "if","unless" #could be infix form without end
628
832
  if after_nonid_op?{false} #prefix form
629
833
  result.first.has_end!
@@ -653,10 +857,11 @@ end
653
857
  #expect_do_or_end_or_nl! str #handled by ForSMContext now
654
858
  @parsestack.push ForSMContext.new(@linenum)
655
859
  when "do"
656
- result.unshift(*abort_noparens!(str))
860
+ result.unshift(*abort_noparens_for_do!(str))
657
861
  if ExpectDoOrNlContext===@parsestack.last
658
862
  @parsestack.pop
659
863
  assert WantsEndContext===@parsestack.last
864
+ result.last.as=";"
660
865
  else
661
866
  result.last.has_end!
662
867
  @parsestack.push WantsEndContext.new(str,@linenum)
@@ -665,10 +870,10 @@ end
665
870
  end
666
871
  when "def"
667
872
  result.first.has_end!
668
- @parsestack.push WantsEndContext.new("def",@linenum)
669
- @localvars_stack.push SymbolTable.new
873
+ @parsestack.push ctx=DefContext.new(@linenum)
874
+ ctx.state=:saw_def
670
875
  safe_recurse { |aa|
671
- @last_operative_token=KeywordToken.new "def" #hack
876
+ set_last_token KeywordToken.new "def" #hack
672
877
  result.concat ignored_tokens
673
878
 
674
879
  #read an expr like a.b.c or a::b::c
@@ -683,10 +888,11 @@ end
683
888
  when/^\)$/.token_pat then parencount-=1
684
889
  end
685
890
  EoiToken===tok and lexerror tok, "eof in def header"
686
- result<<tok
891
+ result << tok
687
892
  end until parencount==0 #@parsestack.size==old_size
688
- else #no parentheses, all tail
689
- @last_operative_token=KeywordToken.new "." #hack hack
893
+ @localvars_stack.push SymbolTable.new
894
+ else #no parentheses, all tail
895
+ set_last_token KeywordToken.new "." #hack hack
690
896
  tokindex=result.size
691
897
  result << tok=symbol(false,false)
692
898
  name=tok.to_s
@@ -700,25 +906,30 @@ end
700
906
  when /^[a-z_]/; localvars===name
701
907
  when /^[A-Z]/; is_const=true #this is the right algorithm for constants...
702
908
  end
703
- if !ty and maybe_local
704
- result.push( *ignored_tokens(false,false) )
705
- nc=nextchar
909
+ result.push( *ignored_tokens(false,false) )
910
+ nc=nextchar
911
+ if !ty and maybe_local
706
912
  if nc==?: || nc==?.
707
913
  ty=VarNameToken
708
914
  end
709
915
  end
710
- unless ty
711
- ty=MethNameToken
712
- endofs=tok.offset+tok.to_s.length
713
- result[tokindex+1...tokindex+1]=
714
- [ImplicitParamListStartToken.new(endofs),ImplicitParamListEndToken.new(endofs)]
916
+ if ty.nil? or (ty==KeywordToken and nc!=?: and nc!=?.)
917
+ ty=MethNameToken
918
+ if nc != ?(
919
+ endofs=tok.offset+tok.to_s.length
920
+ newtok=ImplicitParamListStartToken.new(endofs)
921
+ result.insert tokindex+1, newtok
922
+ end
715
923
  end
716
924
 
717
925
  assert result[tokindex].equal?(tok)
718
- result[tokindex]=ty.new(tok.to_s,tok.offset)
926
+ var=assign_lvar_type! ty.new(tok.to_s,tok.offset)
927
+ @localvars_stack.push SymbolTable.new
928
+ var.in_def=true if inside_method_def? and var.respond_to? :in_def=
929
+ result[tokindex]=var
719
930
 
720
931
 
721
- #if a.b.c.d is seen, a, b, and c
932
+ #if a.b.c.d is seen, a, b and c
722
933
  #should be considered maybe varname instead of methnames.
723
934
  #the last (d in the example) is always considered a methname;
724
935
  #it's what's being defined.
@@ -727,8 +938,7 @@ end
727
938
  #a could even be a keyword (eg self or block_given?).
728
939
  end
729
940
  #read tail: .b.c.d etc
730
- result.reverse_each{|res| break @last_operative_token=res unless StillIgnoreToken===res}
731
- ###@last_operative_token=result.last #naive
941
+ result.reverse_each{|res| break set_last_token res unless StillIgnoreToken===res}
732
942
  assert !(IgnoreToken===@last_operative_token)
733
943
  state=:expect_op
734
944
  @in_def_name=true
@@ -737,12 +947,22 @@ end
737
947
  #look for start of parameter list
738
948
  nc=(@moretokens.empty? ? nextchar.chr : @moretokens.first.to_s[0,1])
739
949
  if state==:expect_op and /^[a-z_(&*]/i===nc
740
- result.concat def_param_list
950
+ ctx.state=:def_param_list
951
+ list,listend=def_param_list
952
+ result.concat list
953
+ end_index=result.index(listend)
954
+ ofs=listend.offset
955
+ if endofs
956
+ result.insert end_index,ImplicitParamListEndToken.new(ofs)
957
+ else
958
+ ofs+=listend.to_s.size
959
+ end
960
+ result.insert end_index+1,EndDefHeaderToken.new(ofs)
741
961
  break
742
962
  end
743
963
 
744
964
  tok=get1token
745
- result<<tok
965
+ result<< tok
746
966
  case tok
747
967
  when EoiToken
748
968
  lexerror tok,'unexpected eof in def header'
@@ -752,9 +972,18 @@ end
752
972
  state=:expect_op
753
973
  when /^(\.|::)$/.token_pat
754
974
  lexerror tok,'expected ident' unless state==:expect_op
975
+ if endofs
976
+ result.insert -2, ImplicitParamListEndToken.new(endofs)
977
+ endofs=nil
978
+ end
755
979
  state=:expect_name
756
980
  when /^(;|end)$/.token_pat, NewlineToken #are we done with def name?
981
+ ctx.state=:def_body
757
982
  state==:expect_op or lexerror tok,'expected identifier'
983
+ if endofs
984
+ result.insert -2,ImplicitParamListEndToken.new(tok.offset)
985
+ end
986
+ result.insert -2, EndDefHeaderToken.new(tok.offset)
758
987
  break
759
988
  else
760
989
  lexerror(tok, "bizarre token in def name: " +
@@ -765,24 +994,34 @@ end
765
994
  }
766
995
  when "alias"
767
996
  safe_recurse { |a|
768
- @last_operative_token=KeywordToken.new "alias" #hack
997
+ set_last_token KeywordToken.new "alias" #hack
769
998
  result.concat ignored_tokens
770
999
  res=symbol(eat_next_if(?:),false)
771
- res ? result<<res : lexerror(result.first,"bad symbol in alias")
772
- @last_operative_token=KeywordToken.new "alias" #hack
773
- result.concat ignored_tokens
774
- res=symbol(eat_next_if(?:),false)
775
- res ? result<<res : lexerror(result.first,"bad symbol in alias")
1000
+ unless res
1001
+ lexerror(result.first,"bad symbol in alias")
1002
+ else
1003
+ res.ident[0]==?$ and res=VarNameToken.new(res.ident,res.offset)
1004
+ result<< res
1005
+ set_last_token KeywordToken.new "alias" #hack
1006
+ result.concat ignored_tokens
1007
+ res=symbol(eat_next_if(?:),false)
1008
+ unless res
1009
+ lexerror(result.first,"bad symbol in alias")
1010
+ else
1011
+ res.ident[0]==?$ and res=VarNameToken.new(res.ident,res.offset)
1012
+ result<< res
1013
+ end
1014
+ end
776
1015
  }
777
1016
  when "undef"
778
1017
  safe_recurse { |a|
779
1018
  loop do
780
- @last_operative_token=KeywordToken.new "," #hack
1019
+ set_last_token KeywordToken.new "," #hack
781
1020
  result.concat ignored_tokens
782
1021
  tok=symbol(eat_next_if(?:),false)
783
1022
  tok or lexerror(result.first,"bad symbol in undef")
784
1023
  result<< tok
785
- @last_operative_token=tok
1024
+ set_last_token tok
786
1025
  assert !(IgnoreToken===@last_operative_token)
787
1026
 
788
1027
  sawnl=false
@@ -809,13 +1048,13 @@ end
809
1048
  unless after_nonid_op? {false}
810
1049
  #rescue needs to be treated differently when in operator context...
811
1050
  #i think no RescueSMContext should be pushed on the stack...
812
- #plus, the rescue token should be marked as infix
813
- result.first.set_infix!
1051
+ result.first.set_infix! #plus, the rescue token should be marked as infix
1052
+ result.unshift(*abort_noparens_for_rescue!(str))
814
1053
  else
815
1054
  result.push KwParamListStartToken.new(offset+str.length)
816
1055
  #corresponding EndToken emitted by abort_noparens! on leaving rescue context
817
- result.unshift(*abort_noparens!(str))
818
1056
  @parsestack.push RescueSMContext.new(@linenum)
1057
+ result.unshift(*abort_noparens!(str))
819
1058
  end
820
1059
 
821
1060
  when "then"
@@ -831,16 +1070,43 @@ end
831
1070
  result.unshift(*abort_noparens!(str))
832
1071
 
833
1072
  when /\A(return|break|next)\Z/
834
- result=yield
835
- result.first.has_no_block! unless KeywordToken===@last_operative_token and @last_operative_token===/\A(\.|::)\Z/
1073
+ fail if KeywordToken===@last_operative_token and @last_operative_token===/\A(\.|::)\Z/
1074
+ tok=KeywordToken.new(str,offset)
1075
+ result=yield tok
1076
+ result[0]=tok
1077
+ tok.has_no_block!
1078
+
1079
+
1080
+ when 'END'
1081
+ #END could be treated, lexically, just as if it is an
1082
+ #ordinary method, except that local vars created in
1083
+ #END blocks are visible to subsequent code. (Why??)
1084
+ #That difference forces a custom parsing.
1085
+ if @last_operative_token===/^(\.|::)$/
1086
+ result=yield nil #should pass a keyword token here
1087
+ else
1088
+ safe_recurse{
1089
+ old=result.first
1090
+ result=[
1091
+ MethNameToken.new(old.ident,old.offset),
1092
+ ImplicitParamListStartToken.new(input_position),
1093
+ ImplicitParamListEndToken.new(input_position),
1094
+ *ignored_tokens
1095
+ ]
1096
+ getchar=='{' or lexerror(result.first,"expected { after #{str}")
1097
+ result.push KeywordToken.new('{',input_position-1)
1098
+ result.last.set_infix!
1099
+ @parsestack.push BeginEndContext.new(str,offset)
1100
+ }
1101
+ end
836
1102
 
837
1103
  when FUNCLIKE_KEYWORDS
838
- result=yield
1104
+ result=yield nil #should be a keyword token
839
1105
 
840
1106
  when RUBYKEYWORDS
841
1107
  #do nothing
842
1108
 
843
- else result=yield
1109
+ else result=yield nil
844
1110
 
845
1111
  end
846
1112
 
@@ -881,11 +1147,11 @@ end
881
1147
  #-----------------------------------
882
1148
  def block_param_list_lookahead
883
1149
  safe_recurse{ |la|
884
- @last_operative_token=KeywordToken.new ';'
1150
+ set_last_token KeywordToken.new ';'
885
1151
  a=ignored_tokens
886
1152
 
887
1153
  if eat_next_if(?|)
888
- a<<KeywordToken.new("|", input_position-1)
1154
+ a<< KeywordToken.new("|", input_position-1)
889
1155
  if true
890
1156
  @parsestack.push mycontext=BlockParamListLhsContext.new(@linenum)
891
1157
  nextchar==?| and a.push NoWsToken.new(input_position)
@@ -909,7 +1175,7 @@ else
909
1175
  end
910
1176
 
911
1177
  tok==='|' and parsestack_lastnonassign_is?(mycontext) and break
912
- a<<tok
1178
+ a<< tok
913
1179
  end
914
1180
  assert@defining_lvar || AssignmentRhsContext===@parsestack.last
915
1181
  @defining_lvar=false
@@ -920,14 +1186,14 @@ else
920
1186
  @parsestack.last.object_id==mycontext.object_id or raise 'expected my BlockParamListLhsContext atop @parsestack'
921
1187
  @parsestack.pop
922
1188
 
923
- a<<KeywordToken.new('|',tok.offset)
1189
+ a<< KeywordToken.new('|',tok.offset)
924
1190
  @moretokens.empty? or
925
1191
  fixme %#moretokens might be set from get1token call above...might be bad#
926
1192
  end
927
1193
  end
928
1194
  end
929
1195
 
930
- @last_operative_token=KeywordToken.new ';'
1196
+ set_last_token KeywordToken.new ';'
931
1197
  #a.concat ignored_tokens
932
1198
 
933
1199
  #assert @last_operative_token===';'
@@ -948,6 +1214,7 @@ end
948
1214
  @in_def_name=false
949
1215
  result=[]
950
1216
  normal_comma_level=old_parsestack_size=@parsestack.size
1217
+ listend=nil
951
1218
  safe_recurse { |a|
952
1219
  assert(@moretokens.empty?)
953
1220
  assert((not IgnoreToken===@moretokens[0]))
@@ -972,18 +1239,22 @@ end
972
1239
  alias === call
973
1240
  end
974
1241
 
975
- @last_operative_token=KeywordToken.new ',' #hack
1242
+ set_last_token KeywordToken.new ',' #hack
976
1243
  #read local parameter names
1244
+ nextvar=nil
977
1245
  loop do
978
1246
  expect_name=(@last_operative_token===',' and
979
1247
  normal_comma_level==@parsestack.size)
980
1248
  expect_name and @defining_lvar||=true
981
1249
  result << tok=get1token
982
- lexerror tok, "unexpected eof in def header" if EoiToken===tok
1250
+ break lexerror(tok, "unexpected eof in def header") if EoiToken===tok
983
1251
 
984
1252
  #break if at end of param list
985
- endingblock===tok and
986
- old_parsestack_size>=@parsestack.size and break
1253
+ if endingblock===tok and old_parsestack_size>=@parsestack.size
1254
+ nextvar and localvars[nextvar]=true #add nextvar to local vars
1255
+ listend=tok
1256
+ break
1257
+ end
987
1258
 
988
1259
  #next token is a local var name
989
1260
  #(or the one after that if unary ops present)
@@ -992,33 +1263,40 @@ end
992
1263
  case tok
993
1264
  when IgnoreToken #, /^[A-Z]/ #do nothing
994
1265
  when /^,$/.token_pat #hack
995
-
996
-
1266
+
997
1267
  when VarNameToken
998
1268
  assert@defining_lvar
999
1269
  @defining_lvar=false
1000
1270
  assert((not @last_operative_token===','))
1271
+ # assert !nextvar
1272
+ nextvar=tok.ident
1273
+ localvars[nextvar]=false #remove nextvar from list of local vars for now
1001
1274
  when /^[&*]$/.token_pat #unary form...
1002
1275
  #a NoWsToken is also expected... read it now
1003
1276
  result.concat maybe_no_ws_token #not needed?
1004
- @last_operative_token=KeywordToken.new ','
1277
+ set_last_token KeywordToken.new ','
1005
1278
  else
1006
1279
  lexerror tok,"unfamiliar var name '#{tok}'"
1007
1280
  end
1008
- elsif /^,$/.token_pat===tok and
1009
- normal_comma_level+1==@parsestack.size and
1010
- AssignmentRhsContext===@parsestack.last
1011
- #seeing comma here should end implicit rhs started within the param list
1012
- result[-1,0]=AssignmentRhsListEndToken.new(tok.offset)
1013
- @parsestack.pop
1281
+ elsif /^,$/.token_pat===tok
1282
+ if normal_comma_level+1==@parsestack.size and
1283
+ AssignmentRhsContext===@parsestack.last
1284
+ #seeing comma here should end implicit rhs started within the param list
1285
+ result << AssignmentRhsListEndToken.new(tok.offset)
1286
+ @parsestack.pop
1287
+ end
1288
+ if nextvar and normal_comma_level==@parsestack.size
1289
+ localvars[nextvar]=true #now, finally add nextvar back to local vars
1290
+ nextvar
1291
+ end
1014
1292
  end
1015
1293
  end
1016
1294
 
1017
1295
  @defining_lvar=false
1018
-
1296
+ @parsestack.last.see self,:semi
1019
1297
 
1020
1298
  assert(@parsestack.size <= old_parsestack_size)
1021
- assert(endingblock[tok])
1299
+ assert(endingblock[tok] || ErrorToken===tok)
1022
1300
 
1023
1301
  #hack: force next token to look like start of a
1024
1302
  #new stmt, if the last ignored_tokens
@@ -1026,42 +1304,54 @@ end
1026
1304
  #(just in case the next token parsed
1027
1305
  #happens to call quote_expected? or after_nonid_op)
1028
1306
  result.concat ignored_tokens
1029
- if nextchar.chr[/[iuw\/<|>+\-*&%?:]/] and
1030
- !(NewlineToken===@last_operative_token) and
1031
- !(/^(end|;)$/===@last_operative_token)
1032
- @last_operative_token=KeywordToken.new ';'
1307
+ # if !eof? and nextchar.chr[/[iuw\/<|>+\-*&%?:({]/] and
1308
+ # !(NewlineToken===@last_operative_token) and
1309
+ # !(/^(end|;)$/===@last_operative_token)
1310
+ #result<<EndDefHeaderToken.new(result.last.offset+result.last.to_s.size)
1311
+ set_last_token KeywordToken.new ';'
1033
1312
  result<< get1token
1034
- end
1313
+ # end
1035
1314
  }
1036
1315
 
1037
- return result
1316
+ return result,listend
1038
1317
  end
1039
1318
 
1040
1319
 
1041
1320
  #-----------------------------------
1042
1321
  #handle % in ruby code. is it part of fancy quote or a modulo operator?
1043
1322
  def percent(ch)
1044
- if quote_expected? ch
1323
+ if AssignmentContext===@parsestack.last
1324
+ @parsestack.pop
1325
+ op=true
1326
+ end
1327
+
1328
+ if !op and quote_expected?(ch) ||
1329
+ (@last_operative_token===/^(return|next|break)$/ and KeywordToken===@last_operative_token)
1045
1330
  fancy_quote ch
1046
- else
1331
+ else
1047
1332
  biop ch
1048
- end
1333
+ end
1049
1334
  end
1050
1335
 
1051
1336
  #-----------------------------------
1052
1337
  #handle * & in ruby code. is unary or binary operator?
1053
1338
  def star_or_amp(ch)
1054
- assert('*&'[ch])
1055
- want_unary=unary_op_expected? ch
1056
- result=(quadriop ch)
1057
- if want_unary
1058
- #readahead(2)[1..1][/[\s\v#\\]/] or #not needed?
1059
- assert OperatorToken===result
1060
- result.unary=true #result should distinguish unary+binary *&
1061
- WHSPLF[nextchar.chr] or
1062
- @moretokens << NoWsToken.new(input_position)
1063
- end
1064
- result
1339
+ assert('*&'[ch])
1340
+ want_unary=unary_op_expected?(ch) ||
1341
+ (@last_operative_token===/^(return|next|break)$/ and KeywordToken===@last_operative_token)
1342
+ result=quadriop(ch)
1343
+ if want_unary
1344
+ #readahead(2)[1..1][/[\s\v#\\]/] or #not needed?
1345
+ assert OperatorToken===result
1346
+ result.unary=true #result should distinguish unary+binary *&
1347
+ WHSPLF[nextchar.chr] or
1348
+ @moretokens << NoWsToken.new(input_position)
1349
+ comma_in_lvalue_list?
1350
+ if ch=='*'
1351
+ @parsestack.last.see self, :splat
1352
+ end
1353
+ end
1354
+ result
1065
1355
  end
1066
1356
 
1067
1357
  #-----------------------------------
@@ -1079,15 +1369,23 @@ end
1079
1369
  #-----------------------------------
1080
1370
  def regex_or_div(ch)
1081
1371
  #space after slash always means / operator, rather than regex start
1082
- if after_nonid_op?{ !is_var_name? and WHSPLF[prevchar] and !readahead(2)[%r{^/\s}] }
1083
- return regex(ch)
1084
- else #/ is operator
1085
- result=getchar
1086
- if eat_next_if(?=)
1087
- result << '='
1088
- end
1089
- return(operator_or_methname_token result)
1090
- end
1372
+ #= after slash always means /= operator, rather than regex start
1373
+ if AssignmentContext===@parsestack.last
1374
+ @parsestack.pop
1375
+ op=true
1376
+ end
1377
+
1378
+ if !op and after_nonid_op?{
1379
+ !is_var_name? and WHSPLF[prevchar] and !readahead(2)[%r{^/[\s\v=]}]
1380
+ } || (KeywordToken===@last_token_maybe_implicit and @last_token_maybe_implicit.ident=="(")
1381
+ return regex(ch)
1382
+ else #/ is operator
1383
+ result=getchar
1384
+ if eat_next_if(?=)
1385
+ result << '='
1386
+ end
1387
+ return(operator_or_methname_token result)
1388
+ end
1091
1389
  end
1092
1390
 
1093
1391
  #-----------------------------------
@@ -1101,8 +1399,8 @@ end
1101
1399
  s=tok.to_s
1102
1400
  case s
1103
1401
  when /[^a-z_0-9]$/i; false
1104
- when /^[a-z_]/; localvars===s or VARLIKE_KEYWORDS===s
1105
- when /^[A-Z]/; VarNameToken===tok
1402
+ # when /^[a-z_]/; localvars===s or VARLIKE_KEYWORDS===s
1403
+ when /^[A-Z_]/i; VarNameToken===tok
1106
1404
  when /^[@$<]/; true
1107
1405
  else raise "not var or method name: #{s}"
1108
1406
  end
@@ -1139,18 +1437,22 @@ end
1139
1437
  unless eat_next_if(?:)
1140
1438
  #cancel implicit contexts...
1141
1439
  @moretokens.push(*abort_noparens!(':'))
1440
+ @moretokens.push KeywordToken.new(':',startpos)
1142
1441
 
1143
- #end ternary context, if any
1144
- @parsestack.last.see self,:colon
1145
-
1146
- TernaryContext===@parsestack.last and @parsestack.pop #should be in the context's see handler
1147
-
1148
- if ExpectDoOrNlContext===@parsestack.last #should be in the context's see handler
1442
+ case @parsestack.last
1443
+ when TernaryContext: @parsestack.pop #should be in the context's see handler
1444
+ when ExpectDoOrNlContext: #should be in the context's see handler
1149
1445
  @parsestack.pop
1150
1446
  assert @parsestack.last.starter[/^(while|until|for)$/]
1447
+ @moretokens.last.as=";"
1448
+ when RescueSMContext:
1449
+ @moretokens.last.as=";"
1450
+ else @moretokens.last.as="then"
1151
1451
  end
1152
1452
 
1153
- @moretokens.push KeywordToken.new(':',startpos)
1453
+ #end ternary context, if any
1454
+ @parsestack.last.see self,:colon
1455
+
1154
1456
  return @moretokens.shift
1155
1457
  end
1156
1458
 
@@ -1182,9 +1484,15 @@ end
1182
1484
  opmatches=readahead(3)[RUBYSYMOPERATORREX]
1183
1485
  result= opmatches ? read(opmatches.size) :
1184
1486
  case nc=nextchar
1185
- when ?" then assert notbare;double_quote('"')
1186
- when ?' then assert notbare;double_quote("'")
1187
- when ?` then read(1)
1487
+ when ?" #"
1488
+ assert notbare
1489
+ open=':"'; close='"'
1490
+ double_quote('"')
1491
+ when ?' #'
1492
+ assert notbare
1493
+ open=":'"; close="'"
1494
+ single_quote("'")
1495
+ when ?` then read(1) #`
1188
1496
  when ?@ then at_identifier.to_s
1189
1497
  when ?$ then dollar_identifier.to_s
1190
1498
  when ?_,?a..?z then identifier_as_string(?:)
@@ -1197,7 +1505,12 @@ end
1197
1505
  result
1198
1506
  else error= "unexpected char starting symbol: #{nc.chr}"
1199
1507
  end
1200
- return lexerror(klass.new(result,start),error)
1508
+ result= lexerror(klass.new(result,start,notbare ? ':' : ''),error)
1509
+ if open
1510
+ result.open=open
1511
+ result.close=close
1512
+ end
1513
+ return result
1201
1514
  end
1202
1515
 
1203
1516
  def merge_assignment_op_in_setter_callsites?
@@ -1211,12 +1524,12 @@ end
1211
1524
  opmatches=readahead(3)[RUBYSYMOPERATORREX]
1212
1525
  return [opmatches ? read(opmatches.size) :
1213
1526
  case nc=nextchar
1214
- when ?` then read(1)
1527
+ when ?` then read(1) #`
1215
1528
  when ?_,?a..?z,?A..?Z then
1216
1529
  context=merge_assignment_op_in_setter_callsites? ? ?: : nc
1217
1530
  identifier_as_string(context)
1218
1531
  else
1219
- @last_operative_token=KeywordToken.new(';')
1532
+ set_last_token KeywordToken.new(';')
1220
1533
  lexerror(tok_to_errify,"unexpected char starting callsite symbol: #{nc.chr}, tok=#{tok_to_errify.inspect}")
1221
1534
  nil
1222
1535
  end, start
@@ -1233,20 +1546,63 @@ end
1233
1546
  ender=til_charset(/[#{quote}]/)
1234
1547
  (quote==getchar) or
1235
1548
  return lexerror(HerePlaceholderToken.new( dash, quote, ender ), "mismatched quotes in here doc")
1549
+ quote_real=true
1236
1550
  else
1237
1551
  quote='"'
1238
1552
  ender=til_charset(/[^a-zA-Z0-9_]/)
1239
1553
  ender.length >= 1 or
1240
- return lexerror(HerePlaceholderToken.new( dash, quote, ender ), "invalid here header")
1554
+ return lexerror(HerePlaceholderToken.new( dash, quote, ender, nil ), "invalid here header")
1241
1555
  end
1242
1556
 
1243
- res= HerePlaceholderToken.new( dash, quote, ender )
1557
+ res= HerePlaceholderToken.new( dash, quote, ender, quote_real )
1558
+ if true
1559
+ res.open=["<<",dash,quote,ender,quote].to_s
1560
+ procrastinated=til_charset(/[\n]/)#+readnl
1561
+ unless @base_file
1562
+ @base_file=@file
1563
+ @file=Sequence::List.new([@file])
1564
+ @file.pos=@base_file.pos
1565
+ end
1566
+ #actually delete procrastinated from input
1567
+ @file.delete(input_position_raw-procrastinated.size...input_position_raw)
1568
+
1569
+ nl=readnl or return lexerror(res, "here header without body (at eof)")
1570
+
1571
+ @moretokens<< res
1572
+ bodystart=input_position
1573
+ @offset_adjust = @min_offset_adjust+procrastinated.size
1574
+ #was: @offset_adjust += procrastinated.size
1575
+ body=here_body(res)
1576
+ res.close=body.close
1577
+ @offset_adjust = @min_offset_adjust
1578
+ #was: @offset_adjust -= procrastinated.size
1579
+ bodysize=input_position-bodystart
1580
+
1581
+ #one or two already read characters are overwritten here,
1582
+ #in order to keep offsets correct in the long term
1583
+ #(at present, offsets and line numbers between
1584
+ #here header and its body will be wrong. but they should re-sync thereafter.)
1585
+ newpos=input_position_raw-nl.size
1586
+ #unless procrastinated.empty?
1587
+ @file.modify(newpos,nl.size,procrastinated+nl) #vomit procrastinated text back onto input
1588
+ #end
1589
+ input_position_set newpos
1590
+
1591
+ #line numbers would be wrong within the procrastinated section
1592
+ @linenum-=1
1593
+
1594
+ #be nice to get the here body token at the right place in input, too...
1595
+ @pending_here_bodies<< body
1596
+ @offset_adjust-=bodysize#+nl.size
1597
+
1598
+ return @moretokens.shift
1599
+ else
1244
1600
  @incomplete_here_tokens.push res
1245
1601
 
1246
1602
  #hack: normally this should just be in get1token
1247
1603
  #this fixup is necessary because the call the get1token below
1248
1604
  #makes a recursion.
1249
- @last_operative_token=res
1605
+ set_last_token res
1250
1606
 
1251
1607
  safe_recurse { |a|
1252
1608
  assert(a.object_id==@moretokens.object_id)
@@ -1269,7 +1625,7 @@ end
1269
1625
 
1270
1626
  tok=get1token
1271
1627
  assert(a.equal?( @moretokens))
1272
- toks<<tok
1628
+ toks<< tok
1273
1629
  EoiToken===tok and lexerror tok, "here body expected before eof"
1274
1630
  end while res.unsafe_to_use
1275
1631
  assert(a.equal?( @moretokens))
@@ -1281,13 +1637,14 @@ end
1281
1637
  #the action continues in newline, where
1282
1638
  #the rest of the here token is read after a
1283
1639
  #newline has been seen and res.affix is eventually called
1640
+ end
1284
1641
  end
1285
1642
 
1286
1643
  #-----------------------------------
1287
1644
  def lessthan(ch) #match quadriop('<') or here doc or spaceship op
1288
1645
  case readahead(3)
1289
- when /^<<['"`\-a-z0-9_]$/i
1290
- if quote_expected?(ch) #and @last_operative_token!='class' #not needed?
1646
+ when /^<<['"`\-a-z0-9_]$/i #'
1647
+ if quote_expected?(ch) and not @last_operative_token==='class'
1291
1648
  here_header
1292
1649
  else
1293
1650
  operator_or_methname_token read(2)
@@ -1309,101 +1666,231 @@ end
1309
1666
  error='illegal escape sequence'
1310
1667
  end
1311
1668
 
1312
- @moretokens.unshift FileAndLineToken.new(@filename,ln=@linenum,input_position)
1313
- optional_here_bodies
1669
+ #optimization: when thru with regurgitated text from a here document,
1670
+ #revert back to original unadorned Sequence instead of staying in the List.
1671
+ if @base_file and indices=@file.instance_eval{@start_pos} and
1672
+ (indices[-2]..indices[-1])===@file.pos
1673
+ @base_file.pos=@file.pos
1674
+ @file=@base_file
1675
+ @base_file=nil
1676
+ result="\n"
1677
+ end
1678
+
1679
+ @offset_adjust=@min_offset_adjust
1680
+ @moretokens.push *optional_here_bodies
1681
+ ln=@linenum
1682
+ @moretokens.push lexerror(EscNlToken.new(@filename,ln-1,result,input_position-result.size), error),
1683
+ FileAndLineToken.new(@filename,ln,input_position)
1684
+
1685
+ start_of_line_directives
1314
1686
 
1315
- lexerror EscNlToken.new(@filename,ln-1,result,pos), error
1687
+ return @moretokens.shift
1316
1688
  end
1317
1689
 
1318
1690
  #-----------------------------------
1319
1691
  def optional_here_bodies
1320
-
1692
+ result=[]
1693
+ if true
1321
1694
  #handle here bodies queued up by previous line
1322
- #(we should be more compatible with dos/mac style newlines...)
1695
+ pos=input_position
1696
+ while body=@pending_here_bodies.shift
1697
+ #body.offset=pos
1698
+ result.push EscNlToken.new(@filename,nil,"\n",body.offset-1)
1699
+ result.push FileAndLineToken.new(@filename,body.ident.line,body.offset)
1700
+ result.push body
1701
+ #result.push NoWsToken.new @pending_here_bodies.empty? ? input_position : @pending_here_bodies.first
1702
+ #result.push FileAndLineToken.new(@filename,@linenum,pos) #position and line num are off
1703
+ body.headtok.line=@linenum-1
1704
+ end
1705
+ else
1706
+ #...(we should be more compatible with dos/mac style newlines...)
1323
1707
  while tofill=@incomplete_here_tokens.shift
1708
+ result.push(
1709
+ here_body(tofill),
1710
+ FileAndLineToken.new(@filename,@linenum,input_position)
1711
+ )
1712
+ assert(eof? || "\r\n"[prevchar])
1713
+ tofill.line=@linenum-1
1714
+ end
1715
+ end
1716
+ return result
1717
+ end
1718
+
1719
+ #-----------------------------------
1720
+ def here_body(tofill)
1721
+ close="\n"
1324
1722
  tofill.string.offset= input_position
1723
+ linecount=1 #for terminator
1724
+ assert("\n"==prevchar)
1325
1725
  loop {
1326
- assert("\r\n"[prevchar])
1726
+ assert("\n"==prevchar)
1327
1727
 
1328
1728
  #here body terminator?
1329
- oldpos= input_position
1729
+ oldpos= input_position_raw
1330
1730
  if tofill.dash
1331
- til_charset(/[^#{WHSP}]/o)
1731
+ close+=til_charset(/[^#{WHSP}]/o)
1732
+ end
1733
+ break if eof? #this is an error, should be handled better
1734
+ if read(tofill.ender.size)==tofill.ender
1735
+ crs=til_charset(/[^\r]/)||''
1736
+ if nl=readnl
1737
+ close+=tofill.ender+crs+nl
1738
+ break
1739
+ end
1332
1740
  end
1333
- break if eof?
1334
- break if read(tofill.ender.size)==tofill.ender and readnl
1335
1741
  input_position_set oldpos
1336
1742
 
1743
+ assert("\n"==prevchar)
1744
+
1337
1745
  if tofill.quote=="'"
1338
- line=til_charset(/[\r\n]/)+readnl
1339
- line.gsub! "\\\\", "\\"
1746
+ line=til_charset(/[\n]/)
1747
+ unless nl=readnl
1748
+ assert eof?
1749
+ break #this is an error, should be handled better
1750
+ end
1751
+ line.chomp!("\r")
1752
+ line<< "\n"
1753
+ assert("\n"==prevchar)
1754
+ #line.gsub! "\\\\", "\\"
1340
1755
  tofill.append line
1341
- assert(line[-1..-1][/[\r\n]/])
1756
+ tofill.string.bs_handler=:squote_heredoc_esc_seq
1757
+ linecount+=1
1758
+ assert("\n"==line[-1,1])
1759
+ assert("\n"==prevchar)
1342
1760
  else
1343
1761
 
1762
+ assert("\n"==prevchar)
1763
+
1344
1764
  back1char #-1 to make newline char the next to read
1345
1765
  @linenum-=1
1346
1766
 
1767
+ assert /[\r\n]/===nextchar.chr
1768
+
1347
1769
  #retr evrything til next nl
1770
+ if FASTER_STRING_ESCAPES
1771
+ line=all_quote("\r\n", tofill.quote, "\r\n")
1772
+ else
1348
1773
  line=all_quote(INET_NL_REX, tofill.quote, INET_NL_REX)
1774
+ end
1775
+ linecount+=1
1349
1776
  #(you didn't know all_quote could take a regex, did you?)
1350
1777
 
1778
+ assert("\n"==prevchar)
1779
+
1351
1780
  #get rid of fals that otherwise appear to be in the middle of
1352
1781
  #a string (and are emitted out of order)
1353
1782
  fal=@moretokens.pop
1354
1783
  assert FileAndLineToken===fal || fal.nil?
1355
1784
 
1785
+ assert line.bs_handler
1786
+ tofill.string.bs_handler||=line.bs_handler
1787
+
1788
+ tofill.append_token line
1789
+ tofill.string.elems<<'' unless String===tofill.string.elems.last
1790
+
1791
+ assert("\n"==prevchar)
1792
+
1356
1793
  back1char
1357
1794
  @linenum-=1
1358
1795
  assert("\r\n"[nextchar.chr])
1359
- tofill.append_token line
1360
1796
  tofill.append readnl
1797
+
1798
+ assert("\n"==prevchar)
1361
1799
  end
1800
+
1801
+ assert("\n"==prevchar)
1362
1802
  }
1803
+
1363
1804
 
1364
- assert(eof? || "\r\n"[prevchar])
1805
+ str=tofill.string
1806
+ str.bs_handler||=:dquote_esc_seq if str.elems.size==1 and str.elems.first==''
1365
1807
  tofill.unsafe_to_use=false
1366
- tofill.line=@linenum-1
1367
-
1368
- @moretokens.push \
1369
- tofill.bodyclass.new(tofill),
1370
- FileAndLineToken.new(@filename,@linenum,input_position)
1371
- end
1372
-
1808
+ assert str.bs_handler
1809
+ #?? or tofill.string.elems==[]
1810
+
1811
+
1812
+ tofill.string.instance_eval{@char="`"} if tofill.quote=="`"
1813
+ #special cased, but I think that's all that's necessary...
1814
+
1815
+ result=tofill.bodyclass.new(tofill,linecount)
1816
+ result.open=str.open=""
1817
+ tofill.close=close
1818
+ result.close=str.close=close[1..-1]
1819
+ result.offset=str.offset
1820
+ assert str.open
1821
+ assert str.close
1822
+ return result
1373
1823
  end
1374
1824
 
1375
1825
  #-----------------------------------
1376
1826
  def newline(ch)
1377
1827
  assert("\r\n"[nextchar.chr])
1378
1828
 
1379
-
1380
-
1381
1829
  #ordinary newline handling (possibly implicitly escaped)
1382
1830
  assert("\r\n"[nextchar.chr])
1383
1831
  assert !@parsestack.empty?
1384
1832
  assert @moretokens.empty?
1385
- result=if NewlineToken===@last_operative_token or #hack
1386
- @last_operative_token===/^(;|begin|do|#{INNERBOUNDINGWORDS})$/ or #hack
1387
- !after_nonid_op?{false}
1388
- then #hack-o-rama: probly cases left out above
1389
- a= abort_noparens!
1390
- ExpectDoOrNlContext===@parsestack.last and @parsestack.pop
1391
- assert !@parsestack.empty?
1392
- @parsestack.last.see self,:semi
1393
-
1394
- a << super(ch)
1395
- @moretokens.replace a+@moretokens
1396
- @moretokens.shift
1397
- else
1398
- offset= input_position
1399
- nl=readnl
1400
- @moretokens << FileAndLineToken.new(@filename,@linenum,input_position)
1401
- EscNlToken.new(@filename,@linenum-1,nl,offset)
1402
- #WsToken.new ' ' #why? #should be "\\\n" ?
1403
- end
1404
1833
 
1405
- optional_here_bodies
1834
+ pre=FileAndLineToken.new(@filename,@linenum+1,input_position)
1835
+ pre.allow_ooo_offset=true
1836
+
1837
+ if NewlineToken===@last_operative_token or #hack
1838
+ (KeywordToken===@last_operative_token and
1839
+ @last_operative_token.ident=="rescue" and
1840
+ !@last_operative_token.infix?) or
1841
+ #/^(;|begin|do|#{INNERBOUNDINGWORDS})$/ or #hack
1842
+ !after_nonid_op?{false}
1843
+ then #hack-o-rama: probly cases left out above
1844
+ @offset_adjust=@min_offset_adjust
1845
+ a= abort_noparens!
1846
+ ExpectDoOrNlContext===@parsestack.last and @parsestack.pop
1847
+ assert !@parsestack.empty?
1848
+ @parsestack.last.see self,:semi
1849
+
1850
+ a << super(ch)
1851
+ @moretokens.replace a+@moretokens
1852
+ else
1853
+ @offset_adjust=@min_offset_adjust
1854
+ offset= input_position
1855
+ nl=readnl
1856
+ @moretokens.push EscNlToken.new(@filename,@linenum-1,nl,offset),
1857
+ FileAndLineToken.new(@filename,@linenum,input_position)
1858
+ end
1859
+
1860
+ #optimization: when thru with regurgitated text from a here document,
1861
+ #revert back to original unadorned Sequence instead of staying in the list.
1862
+ if @base_file and indices=@file.instance_eval{@start_pos} and
1863
+ (indices[-2]..indices[-1])===@file.pos and Sequence::SubSeq===@file.list.last
1864
+ @base_file.pos=@file.pos
1865
+ @file=@base_file
1866
+ @base_file=nil
1867
+ end
1868
+
1869
+ fal=@moretokens.last
1870
+ assert FileAndLineToken===fal
1871
+
1872
+ @offset_adjust=@min_offset_adjust
1873
+
1874
+ @moretokens.unshift(*optional_here_bodies)
1875
+ result=@moretokens.shift
1876
+
1877
+ #adjust line count in fal to account for newlines in here bodys
1878
+ i=@moretokens.size-1
1879
+ while(i>=0)
1880
+ #assert FileAndLineToken===@moretokens[i]
1881
+ i-=1 if FileAndLineToken===@moretokens[i]
1882
+ break unless HereBodyToken===@moretokens[i]
1883
+ pre_fal=true
1884
+ fal.line-=@moretokens[i].linecount
1406
1885
 
1886
+ i-=1
1887
+ end
1888
+
1889
+ if pre_fal
1890
+ @moretokens.unshift result
1891
+ pre.offset=result.offset
1892
+ result=pre
1893
+ end
1407
1894
  start_of_line_directives
1408
1895
 
1409
1896
  return result
@@ -1424,15 +1911,16 @@ end
1424
1911
 
1425
1912
  begin
1426
1913
  eof? and raise "eof before =end"
1427
- more<<til_charset(/[\r\n]/)
1428
- more<<readnl
1914
+ more<< til_charset(/[\r\n]/)
1915
+ eof? and raise "eof before =end"
1916
+ more<< readnl
1429
1917
  end until readahead(EQENDLENGTH)==EQEND
1430
1918
 
1431
1919
  #read rest of line after =end
1432
1920
  more << til_charset(/[\r\n]/)
1433
- assert((?\r===nextchar or ?\n===nextchar))
1921
+ assert((eof? or ?\r===nextchar or ?\n===nextchar))
1434
1922
  assert !(/[\r\n]/===more[-1,1])
1435
- more<< readnl
1923
+ more<< readnl unless eof?
1436
1924
 
1437
1925
  # newls= more.scan(/\r\n?|\n\r?/)
1438
1926
  # @linenum+= newls.size
@@ -1445,7 +1933,7 @@ end
1445
1933
  #handle __END__
1446
1934
  if ENDMARKER===readahead(ENDMARKERLENGTH)
1447
1935
  assert !(ImplicitContext===@parsestack.last)
1448
- @moretokens.unshift endoffile_detected(read(7))
1936
+ @moretokens.unshift endoffile_detected(read(ENDMARKERLENGTH))
1449
1937
  # input_position_set @file.size
1450
1938
  end
1451
1939
  end
@@ -1460,11 +1948,13 @@ end
1460
1948
  def unary_op_expected?(ch) #yukko hack
1461
1949
  '*&='[readahead(2)[1..1]] and return false
1462
1950
 
1951
+ return true if KeywordToken===@last_operative_token and @last_operative_token==='for'
1952
+
1463
1953
  after_nonid_op? {
1464
1954
  #possible func-call as operator
1465
1955
 
1466
1956
  not is_var_name? and
1467
- WHSPLF[prevchar]
1957
+ WHSPLF[prevchar] and !WHSPLF[readahead(2)[1..1]]
1468
1958
  }
1469
1959
  end
1470
1960
 
@@ -1473,11 +1963,6 @@ end
1473
1963
  # <<, %, ? in ruby
1474
1964
  #returns whether current token is to be the start of a literal
1475
1965
  def quote_expected?(ch) #yukko hack
1476
- if AssignmentContext===@parsestack.last
1477
- @parsestack.pop
1478
- return false
1479
- end
1480
-
1481
1966
  case ch[0]
1482
1967
  when ?? then readahead(2)[/^\?[#{WHSPLF}]$/o] #not needed?
1483
1968
  when ?% then readahead(3)[/^%([a-pt-vyzA-PR-VX-Z]|[QqrswWx][a-zA-Z0-9])/]
@@ -1500,17 +1985,23 @@ end
1500
1985
  #used to resolve the ambiguity of
1501
1986
  # <<, %, /, ?, :, and newline (among others) in ruby
1502
1987
  def after_nonid_op?
1988
+
1989
+ #this is how it should be, I think, and then no handlers for methnametoken and FUNCLIKE_KEYWORDS are needed
1990
+ # if ImplicitParamListStartToken===@last_token_including_implicit
1991
+ # huh return true
1992
+ # end
1503
1993
  case @last_operative_token
1504
- when MethNameToken, FUNCLIKE_KEYWORDS.token_pat ,VarNameToken
1994
+ when VarNameToken , MethNameToken, FUNCLIKE_KEYWORDS.token_pat
1505
1995
  #VarNameToken should really be left out of this case...
1506
1996
  #should be in next branch instread
1507
1997
  #callers all check for last token being not a variable if they pass anything
1508
- #but {false} in the block
1998
+ #but {false} in the block
1999
+ #(hmmm... some now have true or other non-varname checks in them... could these be bugs?)
1509
2000
  return yield
1510
2001
  when StringToken, SymbolToken, NumberToken, HerePlaceholderToken,
1511
2002
  %r{^(
1512
- class|module|end|self|true|false|nil|
1513
- __FILE__|__LINE__|[\})\]]|alias|(un)?def|for
2003
+ end|self|true|false|nil|
2004
+ __FILE__|__LINE__|[\})\]]
1514
2005
  )$}x.token_pat
1515
2006
  #dunno about def/undef
1516
2007
  #maybe class/module shouldn't he here either?
@@ -1522,17 +2013,16 @@ end
1522
2013
  #assert(@last_operative_token==$&) #disabled 'cause $& is now always nil :(
1523
2014
  return true
1524
2015
  when NewlineToken, nil, #nil means we're still at beginning of file
1525
- /^([({\[]|or|not|and|if|unless|then|elsif|else|
1526
- while|until|begin|for|in|case|when|ensure)$
2016
+ /^([({\[]|or|not|and|if|unless|then|elsif|else|class|module|def|
2017
+ while|until|begin|for|in|case|when|ensure|defined\?)$
1527
2018
  /x.token_pat
1528
2019
  return true
1529
- #when KeywordToken
1530
- # return true
2020
+ when KeywordToken
2021
+ return true if /^(alias|undef)$/===@last_operative_token.ident #is this ever actually true???
1531
2022
  when IgnoreToken
1532
2023
  raise "last_operative_token shouldn't be ignoreable"
1533
- else
1534
- raise "after_nonid_op? after #{@last_operative_token}:#{@last_operative_token.class} -- now what"
1535
2024
  end
2025
+ raise "after_nonid_op? after #{@last_operative_token}:#{@last_operative_token.class} -- now what"
1536
2026
  end
1537
2027
 
1538
2028
 
@@ -1577,10 +2067,10 @@ end
1577
2067
 
1578
2068
  #-----------------------------------
1579
2069
  def biop(ch) #match /%=?/ (% or %=)
1580
- assert(ch[/^[%^~]$/])
2070
+ assert(ch[/^[%^]$/])
1581
2071
  result=getchar
1582
2072
  if eat_next_if(?=)
1583
- result <<?=
2073
+ result << ?=
1584
2074
  end
1585
2075
  return operator_or_methname_token( result)
1586
2076
  end
@@ -1610,7 +2100,9 @@ end
1610
2100
  #fixme: handle +@ and -@ here as well... (currently, this is done in symbol()?)
1611
2101
  def plusminus(ch)
1612
2102
  assert(/^[+\-]$/===ch)
1613
- if unary_op_expected?(ch)
2103
+ if unary_op_expected?(ch) or
2104
+ KeywordToken===@last_operative_token &&
2105
+ /^(return|break|next)$/===@last_operative_token.ident
1614
2106
  if (?0..?9)===readahead(2)[1]
1615
2107
  return number(ch)
1616
2108
  else #unary operator
@@ -1619,7 +2111,6 @@ end
1619
2111
  @moretokens << NoWsToken.new(input_position)
1620
2112
  result=(operator_or_methname_token result)
1621
2113
  result.unary=true
1622
- #todo: result should distinguish unary+binary +-
1623
2114
  end
1624
2115
  else #binary operator
1625
2116
  assert(! want_op_name)
@@ -1628,9 +2119,8 @@ end
1628
2119
  result << ?=
1629
2120
  end
1630
2121
  result=(operator_or_methname_token result)
1631
- #todo: result should distinguish unary+binary +-
1632
2122
  end
1633
- result
2123
+ return result
1634
2124
  end
1635
2125
 
1636
2126
  #-----------------------------------
@@ -1642,19 +2132,31 @@ end
1642
2132
  str << c
1643
2133
  result= operator_or_methname_token( str,offset)
1644
2134
  case c
1645
- when '=': str<< (eat_next_if(?=)or'')
2135
+ when '=': #===,==
2136
+ str<< (eat_next_if(?=)or'')
1646
2137
 
1647
- when '>':
2138
+ when '>': #=>
1648
2139
  unless ParamListContextNoParen===@parsestack.last
1649
2140
  @moretokens.unshift result
1650
2141
  @moretokens.unshift( *abort_noparens!("=>"))
1651
2142
  result=@moretokens.shift
1652
2143
  end
1653
2144
  @parsestack.last.see self,:arrow
1654
- when '': #record local variable definitions
1655
-
2145
+ when '': #plain assignment: record local variable definitions
2146
+ last_context_not_implicit.lhs=false
2147
+ @moretokens.push *ignored_tokens(true).map{|x|
2148
+ NewlineToken===x ? EscNlToken.new(@filename,@linenum,x.ident,x.offset) : x
2149
+ }
1656
2150
  @parsestack.push AssignmentRhsContext.new(@linenum)
1657
- @moretokens.unshift AssignmentRhsListStartToken.new( offset+1)
2151
+ if eat_next_if ?*
2152
+ tok=OperatorToken.new('*', input_position-1)
2153
+ tok.unary=true
2154
+ @moretokens.push tok
2155
+ WHSPLF[nextchar.chr] or
2156
+ @moretokens << NoWsToken.new(input_position)
2157
+ comma_in_lvalue_list? #is this needed?
2158
+ end
2159
+ @moretokens.push AssignmentRhsListStartToken.new( input_position)
1658
2160
  end
1659
2161
  return result
1660
2162
  end
@@ -1666,6 +2168,7 @@ end
1666
2168
  k=eat_next_if(/[~=]/)
1667
2169
  if k
1668
2170
  result+=k
2171
+ elsif eof?: #do nothing
1669
2172
  else
1670
2173
  WHSPLF[nextchar.chr] or
1671
2174
  @moretokens << NoWsToken.new(input_position)
@@ -1693,10 +2196,11 @@ end
1693
2196
  #-----------------------------------
1694
2197
  def dot_rhs(prevtok)
1695
2198
  safe_recurse { |a|
1696
- @last_operative_token=prevtok
2199
+ set_last_token prevtok
1697
2200
  aa= ignored_tokens
2201
+ was=after_nonid_op?{true}
1698
2202
  tok,pos=callsite_symbol(prevtok)
1699
- tok and aa.push(*var_or_meth_name(tok,prevtok,pos))
2203
+ tok and aa.push(*var_or_meth_name(tok,prevtok,pos,was))
1700
2204
  a.unshift(*aa)
1701
2205
  }
1702
2206
  end
@@ -1705,7 +2209,7 @@ end
1705
2209
  def back_quote(ch=nil)
1706
2210
  if @last_operative_token===/^(def|::|\.)$/
1707
2211
  oldpos= input_position
1708
- MethNameToken.new(eat_next_if(?`), oldpos)
2212
+ MethNameToken.new(eat_next_if(?`), oldpos) #`
1709
2213
  else
1710
2214
  double_quote(ch)
1711
2215
  end
@@ -1716,7 +2220,7 @@ if false
1716
2220
  def comment(str)
1717
2221
  result=""
1718
2222
  #loop{
1719
- result<<super(nil).to_s
2223
+ result<< super(nil).to_s
1720
2224
 
1721
2225
  if /^\#.*\#$/===result #if comment was ended by a crunch
1722
2226
 
@@ -1762,7 +2266,7 @@ end
1762
2266
  tokch= NoWsToken.new(input_position-1)
1763
2267
  end
1764
2268
  when '('
1765
- lasttok=last_operative_token
2269
+ lasttok=last_token_maybe_implicit #last_operative_token
1766
2270
  #could be: lasttok===/^[a-z_]/i
1767
2271
  if (VarNameToken===lasttok or MethNameToken===lasttok or
1768
2272
  lasttok===FUNCLIKE_KEYWORDS)
@@ -1781,15 +2285,17 @@ end
1781
2285
  if after_nonid_op?{false} or @last_operative_token.has_no_block?
1782
2286
  @parsestack.push ListImmedContext.new(ch,@linenum) #that is, a hash
1783
2287
  else
2288
+ #abort_noparens!
1784
2289
  tokch.set_infix!
1785
- =begin not needed now, i think
2290
+ tokch.as="do"
2291
+ #=begin not needed now, i think
1786
2292
  # 'need to find matching callsite context and end it if implicit'
1787
2293
  lasttok=last_operative_token
1788
- unless lasttok===')' and lasttok.callsite?
2294
+ if !(lasttok===')' and lasttok.callsite?) #or ParamListContextNoParen===parsestack.last
1789
2295
  @moretokens.push *(abort_1_noparen!(1).push tokch)
1790
2296
  tokch=@moretokens.shift
1791
2297
  end
1792
- =end
2298
+ #=end
1793
2299
 
1794
2300
  localvars.start_block
1795
2301
  @parsestack.push BlockContext.new(@linenum)
@@ -1811,13 +2317,18 @@ end
1811
2317
  end
1812
2318
  ctx=@parsestack.pop
1813
2319
  origch,line=ctx.starter,ctx.linenum
1814
- ch==PAIRS[origch] or
2320
+ if ch!=PAIRS[origch]
2321
+ #kw.extend MismatchedBrace
1815
2322
  lexerror kw,"mismatched braces: #{origch}#{ch}\n" +
1816
2323
  "matching brace location", @filename, line
1817
- BlockContext===ctx and localvars.end_block
2324
+ end
2325
+ if BlockContext===ctx
2326
+ localvars.end_block
2327
+ @moretokens.last.as="end"
2328
+ end
1818
2329
  if ParamListContext==ctx.class
1819
2330
  assert ch==')'
1820
- #kw.set_callsite! #not needed?
2331
+ kw.set_callsite! #not needed?
1821
2332
  end
1822
2333
  return @moretokens.shift
1823
2334
  end
@@ -1826,19 +2337,24 @@ end
1826
2337
  def eof(ch=nil)
1827
2338
  #this must be the very last character...
1828
2339
  oldpos= input_position
1829
- assert(?\0==getc)
2340
+ assert(/\A[\x0\x4\x1a]\Z/===nextchar.chr)
1830
2341
 
1831
- result= "\0#{ignored_tokens(true).delete_if{|t|FileAndLineToken===t}}"
2342
+ result=@file.read!
2343
+ # result= "\0#{ignored_tokens(true).delete_if{|t|FileAndLineToken===t}}"
1832
2344
 
1833
- eof? or
1834
- lexerror result,'nul character is not at the end of file'
1835
- input_position_set @file.size
2345
+ # eof? or
2346
+ # lexerror result,'nul character is not at the end of file'
2347
+ # input_position_set @file.size
1836
2348
  return(endoffile_detected result)
1837
2349
  end
1838
2350
 
1839
2351
  #-----------------------------------
1840
2352
  def endoffile_detected(s='')
1841
2353
  @moretokens.push( *(abort_noparens!.push super(s)))
2354
+ if @progress_thread
2355
+ @progress_thread.kill
2356
+ @progress_thread=nil
2357
+ end
1842
2358
  result= @moretokens.shift
1843
2359
  balanced_braces? or (lexerror result,"unbalanced braces at eof. parsestack=#{@parsestack.inspect}")
1844
2360
  result
@@ -1851,7 +2367,26 @@ end
1851
2367
 
1852
2368
  #-----------------------------------
1853
2369
  def comma(ch)
1854
- single_char_token(ch)
2370
+ @moretokens.push token=single_char_token(ch)
2371
+ if AssignmentRhsContext===@parsestack[-1] and
2372
+ ParamListContext===@parsestack[-2] ||
2373
+ ParamListContextNoParen===@parsestack[-2] ||
2374
+ WhenParamListContext===@parsestack[-2] ||
2375
+ (RescueSMContext===@parsestack[-2] && @parsestack[-2].state==:rescue) ||
2376
+ (DefContext===@parsestack[-2] && !@parsestack[-2].in_body)
2377
+ @parsestack.pop
2378
+ @moretokens.unshift AssignmentRhsListEndToken.new(input_position)
2379
+ end
2380
+ token.comma_type=
2381
+ case @parsestack[-1]
2382
+ when AssignmentRhsContext: :rhs
2383
+ when ParamListContext,ParamListContextNoParen: :call
2384
+ when ListImmedContext: :array
2385
+ else
2386
+ :lhs if comma_in_lvalue_list?
2387
+ end
2388
+ @parsestack.last.see self,:comma
2389
+ return @moretokens.shift
1855
2390
  end
1856
2391
 
1857
2392
  #-----------------------------------
@@ -1872,7 +2407,7 @@ end
1872
2407
  assert RUBYOPERATORREX===s
1873
2408
  if RUBYNONSYMOPERATORREX===s
1874
2409
  KeywordToken
1875
- elsif @last_operative_token===/^(\.|::|def|undef|alias|defined\?)$/
2410
+ elsif want_op_name
1876
2411
  MethNameToken
1877
2412
  else
1878
2413
  OperatorToken
@@ -1882,9 +2417,7 @@ end
1882
2417
  #-----------------------------------
1883
2418
  #tokenify_results_of :identifier
1884
2419
  save_offsets_in(*CHARMAPPINGS.values.uniq-[
1885
- :symbol_or_op,:open_brace,:whitespace,:exclam,:backquote
1886
-
1887
-
2420
+ :symbol_or_op,:open_brace,:whitespace,:exclam,:backquote
1888
2421
  ])
1889
2422
  #save_offsets_in :symbol
1890
2423