RubyGems - rubylexer - Versions diffs - 0.7.0 → 0.7.1 - Mend

rubylexer 0.7.0 → 0.7.1

Files changed (80) hide show

data/History.txt +90 -0
data/Manifest.txt +54 -3
data/README.txt +4 -7
data/Rakefile +3 -2
data/lib/rubylexer.rb +856 -323
data/lib/rubylexer/0.7.0.rb +11 -2
data/lib/rubylexer/0.7.1.rb +2 -0
data/lib/rubylexer/charhandler.rb +4 -4
data/lib/rubylexer/context.rb +86 -9
data/lib/rubylexer/rulexer.rb +455 -101
data/lib/rubylexer/token.rb +166 -43
data/lib/rubylexer/tokenprinter.rb +16 -8
data/lib/rubylexer/version.rb +1 -1
data/rubylexer.vpj +98 -0
data/test/code/all_the_gems.rb +33 -0
data/test/code/all_the_raas.rb +226 -0
data/test/code/all_the_rubies.rb +2 -0
data/test/code/deletewarns.rb +19 -1
data/test/code/dumptokens.rb +39 -8
data/test/code/errscan +2 -0
data/test/code/isolate_error.rb +72 -0
data/test/code/lexloop +14 -0
data/test/code/locatetest.rb +150 -8
data/test/code/regression.rb +109 -0
data/test/code/rubylexervsruby.rb +53 -15
data/test/code/strgen.rb +138 -0
data/test/code/tarball.rb +144 -0
data/test/code/testcases.rb +11 -0
data/test/code/tokentest.rb +115 -24
data/test/data/__eof2.rb +1 -0
data/test/data/__eof5.rb +2 -0
data/test/data/__eof6.rb +2 -0
data/test/data/cvtesc.rb +17 -0
data/test/data/g.rb +6 -0
data/test/data/hd0.rb +3 -0
data/test/data/hdateof.rb +2 -0
data/test/data/hdempty.rb +3 -0
data/test/data/hdr.rb +9 -0
data/test/data/hdr_dos.rb +13 -0
data/test/data/hdr_dos2.rb +18 -0
data/test/data/heart.rb +2 -0
data/test/data/here_escnl.rb +25 -0
data/test/data/here_escnl_dos.rb +20 -0
data/test/data/here_squote.rb +3 -0
data/test/data/heremonsters.rb +140 -0
data/test/data/heremonsters.rb.broken +68 -0
data/test/data/heremonsters.rb.broken.save +68 -0
data/test/data/heremonsters_dos.rb +140 -0
data/test/data/heremonsters_dos.rb.broken +68 -0
data/test/data/illegal_oneliners.rb +1 -0
data/test/data/illegal_stanzas.rb +0 -0
data/test/data/make_ws_strdelim.rb +22 -0
data/test/data/maven2_builer_test.rb +82 -0
data/test/data/migration.rb +8944 -0
data/test/data/modl.rb +6 -0
data/test/data/modl_dos.rb +7 -0
data/test/data/modl_fails.rb +10 -0
data/test/data/multilinestring.rb +6 -0
data/test/data/oneliners.rb +555 -0
data/test/data/p-op.rb +2 -0
data/test/data/p.rb +3 -1710
data/test/data/s.rb +90 -21
data/test/data/simple.rb +1 -0
data/test/data/simple_dos.rb +1 -0
data/test/data/stanzas.rb +1194 -0
data/test/data/strdelim_crlf.rb +6 -0
data/test/data/stuff.rb +6 -0
data/test/data/stuff2.rb +5 -0
data/test/data/stuff3.rb +6 -0
data/test/data/stuff4.rb +6 -0
data/test/data/tkweird.rb +20 -0
data/test/data/unending_stuff.rb +5 -0
data/test/data/whatnot.rb +8 -0
data/test/data/ws_strdelim.rb +0 -0
data/test/test.sh +239 -0
data/testing.txt +39 -50
metadata +110 -12
data/test/code/dl_all_gems.rb +0 -43
data/test/code/unpack_all_gems.rb +0 -15
data/test/data/gemlist.txt +0 -280

data/History.txt CHANGED Viewed

@@ -1,3 +1,93 @@
+=== 0.7.1/10-29-2008
+* 6 Major Enhancements:
+  * handling of empty string fragments now more closely mirrors ruby; this resolves many warnings
+  * yet more hacks in aid of string inclusions
+  * backslashes in strings are no longer interpreted automatically when lexed
+  * here documents are completely rewritten in a tricky way that more closely mimics what MRI does
+  * many more flags for tokens to tell apart the various cases:
+    * the various different local variable types have to be detected.
+    * colons which operate like semicolons or thens are marked as such
+    * { } used in block now flagged as parsing like do and end
+    * commas now are marked with different types depending on how they're used
+    * @variables in methods need to be marked as such, so their parsetree can come out different.
+    * clearly mark backquoted strings
+  * further refinements of local variable detection and implicit paren placement near these cases:
+    * when ws between method name and parenthesis
+    * break/return/next
+    * ? : << / rescue do
+* 5 Minor Enhancements
+  * colon or star in assignment make it a multi assignment
+  * presence of unary * or & in param list forces it to be a multi-param list
+  * errors in string inclusions should now be handled better
+  * string and stringlike tokens now can tell you the exact sequence of chars used to open and close the string.
+  * correctly handling more cases where return/break/next parses different than a method (yuck!)
+* 26 Bugfixes
+  * ~ operator can be followed with an @, like + and -
+  * ~ is overridable, however :: is not
+  * raise is not a keyword
+  * in addition to 0x00, 0x04 and 0x1a should be considered eof in ruby. why? idunno.
+  * setting PROGRESS env var will cause input file position to be printed to stderr periodically.
+  * defined? is not a funclike keyword... really more of a unary operator
+  * $- is a legitimate global variable.
+  * better parsing of lvalue list following for keyword.
+  * rescue is a variable define context only when right after => and before then (or disguises).
+  * better placement of implicit parens around def param list
+  * (global) variable aliasing now supported
+  * local vars in END block are NOT scoped to the block!
+  * local vars in def param lists aren't considered variables til after the initializer for that var
+  * end of def header is treated like ; even if none is present
+  * never put here document right after class keyword
+  * look for start of line directives at end of here document
+  * oops, mac newlines don't have to be supported
+  * dos newlines better tolerated around here documents
+  * less line number/offset confusion around here documents
+  * newline after (non-operator) rescue is hard (but not after INNERBOUNDINGWORDS)
+  * handling eof in more strange places
+  * always expect unary op after for
+  * unary ops should know about the before-but-not-after rule!
+  * newlines after = should be escaped
+  * \c? and \C-? are not interpreted the same as other ctrl chars
+  * \n\r  and \r are not recognized as nl sequences
+* 18 Internal Changes (not user visible)
+  * commas cause a :comma event on the parsestack
+  * some of the lists of types of operators are available now as arrays of strings instead of regexps
+  * single and double quote now have separate implementations again
+  * keep track of whether an implicit open or close paren has just been emitted
+  * put ws around << to keep slickedit happy
+  * the eof characters are also considered whitespace.
+  * identifier lexer now uses regexps more heavily
+  * method formal parameter list is not considered an lvalue context for commas.
+  * class and def now have their own parse contexts
+  * unary star causes a :splat event on the parsestack
+  * is_var_name now detects var tokens just from the token type, not looking at local vars table.
+  * a faster regexp-based implementation of string scanning
+  * moved yucky side effect out of quote_expected?
+  * these keywords: class module def for defined?  no longer automatically create operator context
+  * a new context for BEGIN/END keywords
+  * a new context for param list of return/next/break
+  * new escape sequence processors for regexp and %W list
+  * numbers now scanned with a regexp
+* 15 Enhancements and bug fixes to tests:
+  * just print a notice on errors which are also syntax errors for ruby
+  * a little cleanup of temp files
+  * rubylexervsruby and tokentest can take input from stdin
+  * unlexer improvements
+  * dumptokens now has a --silent cmdline option
+  * locatetest.rb is significantly enhanced
+  * --unified option to diff seems to work better than -u
+  * tokentest better verifies exact token contents...
+  * tokentest now uses open and close fields of strings to verify string bounds exactly
+  * CRLF in a string is always treated like just a LF. (CR is elided.)
+  * allow_ooo hacky flag marks tokens whose offset errors are to be ignored.
+  * all other offset errors have been downgraded to warnings.
+  * most of the offset problem I had been seeing have been fixed, tho
+  * offset problems in here head and body, symbol and fal tokens are always ignored (a hack)
+  * tokentest has a --loop option, for load testing
 === 0.7.0/2-15-2008
 * implicit tokens are now emitted at the right times (need more test code)
 * local variables are now temporarily hidden by class, module, and def

data/Manifest.txt CHANGED Viewed

@@ -19,7 +19,6 @@ lib/rubylexer/symboltable.rb
 lib/rubylexer/charhandler.rb
 lib/assert.rb
 lib/rubylexer.rb
-test/data/gemlist.txt
 test/data/blockassigntest.rb
 test/data/for.rb
 test/data/chunky_bacon.rb
@@ -58,10 +57,62 @@ test/data/chunky_bacon2.rb
 test/data/format.rb
 test/code/locatetest.rb
 test/code/rubylexervsruby.rb
-test/code/dl_all_gems.rb
-test/code/unpack_all_gems.rb
 test/code/tokentest.rb
 test/code/dumptokens.rb
 test/code/torment
 test/code/locatetest
 test/code/deletewarns.rb
+lib/rubylexer/0.7.1.rb
+rubylexer.vpj
+test/code/all_the_gems.rb
+test/code/all_the_raas.rb
+test/code/all_the_rubies.rb
+test/code/errscan
+test/code/isolate_error.rb
+test/code/lexloop
+test/code/regression.rb
+test/code/strgen.rb
+test/code/tarball.rb
+test/code/testcases.rb
+test/data/chunky.plain.rb
+test/data/cvtesc.rb
+test/data/__eof2.rb
+test/data/__eof5.rb
+test/data/__eof6.rb
+test/data/hd0.rb
+test/data/hdateof.rb
+test/data/hdempty.rb
+test/data/hdr_dos2.rb
+test/data/hdr_dos.rb
+test/data/hdr.rb
+test/data/here_escnl_dos.rb
+test/data/here_escnl.rb
+test/data/heremonsters_dos.rb
+test/data/heremonsters_dos.rb.broken
+test/data/heremonsters.rb
+test/data/heremonsters.rb.broken
+test/data/heremonsters.rb.broken.save
+test/data/here_squote.rb
+test/data/illegal_oneliners.rb
+test/data/illegal_stanzas.rb
+test/data/make_ws_strdelim.rb
+test/data/maven2_builer_test.rb
+test/data/migration.rb
+test/data/modl_dos.rb
+test/data/modl_fails.rb
+test/data/modl.rb
+test/data/multilinestring.rb
+test/data/oneliners.rb
+test/data/simple_dos.rb
+test/data/simple.rb
+test/data/stanzas.rb
+test/data/strdelim_crlf.rb
+test/data/stuff2.rb
+test/data/stuff3.rb
+test/data/stuff4.rb
+test/data/stuff.rb
+test/data/tkweird.rb
+test/data/unending_stuff.rb
+test/data/whatnot.rb
+test/data/ws_strdelim.rb
+test/test.sh

data/README.txt CHANGED Viewed

@@ -67,10 +67,7 @@ keywords, depending on context:
   any overrideable operator and most keywords can also be method names
 == todo
-test w/ more code (rubygems, rpa, obfuscated ruby contest, rubicon, others?)
-these 5 should be my standard test suite: p.rb, (matz') test.rb, tk.rb, obfuscated ruby contest, rubicon
 test more ways: cvt source to dos or mac fmt before testing
-test more ways: run unit tests after passing thru rubylexer (0.7)
 test more ways: test require'd, load'd, or eval'd code as well (0.7)
 lex code a line (or chunk) at a time and save state for next line (irb wants this) (0.8)
 incremental lexing (ides want this (for performance))
@@ -78,12 +75,10 @@ put everything in a namespace
 integrate w/ other tools...
 html colorized output?
 move more state onto @parsestack (ongoing)
-the new cases in p.rb now compile, but won't run
 expand on test documentation
 use want_op_name more
 return result as a half-parsed tree (with parentheses and the like matched)
 emit advisory tokens when see beginword, then (or equivalent), or end... what else does florian want?
-strings are still slow
 emit advisory tokens when local var defined/goes out of scope (or hidden/unhidden?)
 token pruning in dumptokens...
@@ -96,8 +91,10 @@ string tokenization sometimes a little different from ruby around newlines
 string contents might not be correctly translated in a few cases (0.8?)
 symbols which contain string interpolations are flattened into one token. eg :"foo#{bar}" (0.8)
 '\r' whitespace sometimes seen in dos-formatted output.. shouldn't be (eg pre.rb) (0.7)
-windows or mac newline in source are likely to cause problems in obscure cases (need test case)
+windows newline in source is likely to cause problems in obscure cases (need test case)
 unterminated =begin is not an error (0.8)
 ruby 1.9 completely unsupported (0.9)
 character sets other than ascii are not supported at all (1.0)
+regression test currently shows 14 errors with differences in exact token ordering
+-around string inclusions. these errors are much less serious than they seem.
+offset of AssignmentRhsListEndToken appears to be off by 1

data/Rakefile CHANGED Viewed

@@ -13,12 +13,13 @@ require 'lib/rubylexer/version.rb'
    hoe=Hoe.new("rubylexer", RubyLexer::VERSION) do |_|
      _.author = "Caleb Clausen"
      _.email = "rubylexer-owner @at@ inforadical .dot. net"
-     _.url = "http://rubylexer.rubyforge.org/"
-     _.extra_deps = ["sequence"]
+     _.url = ["http://rubylexer.rubyforge.org/", "http://rubyforge.org/projects/rubylexer/"]
+     _.extra_deps << ['sequence', '>= 0.2.0']
      _.test_globs=["test/{code/*,data/*rb*,results/}"]
      _.description=desc
      _.summary=desc[/\A[^.]+\./]
      _.spec_extras={:bindir=>''}
+     _.rdoc_pattern=/\A(howtouse\.txt|testing\.txt|README\.txt|lib\/.*\.rb)\Z/
    end

data/lib/rubylexer.rb CHANGED Viewed

@@ -1,6 +1,6 @@
-=begin copyright
+=begin legal crap
     rubylexer - a ruby lexer written in ruby
-    Copyright (C) 2004,2005  Caleb Clausen
+    Copyright (C) 2004,2005,2008  Caleb Clausen
     This library is free software; you can redistribute it and/or
     modify it under the terms of the GNU Lesser General Public
@@ -18,7 +18,6 @@
 =end
 require 'rubylexer/rulexer' #must be 1st!!!
 require 'rubylexer/version'
 require 'rubylexer/token'
@@ -32,9 +31,11 @@ require 'rubylexer/tokenprinter'
 #-----------------------------------
 class RubyLexer
   include NestedContexts
    RUBYSYMOPERATORREX=
-      %r{^([&|^/%~]|=(==?|~)|>[=>]?|<(<|=>?)?|[+\-]@?|\*\*?|\[\]=?)}
+      %r{^([&|^/%]|=(==?)|=~|>[=>]?|<(<|=>?)?|[+~\-]@?|\*\*?|\[\]=?)}
       # (nasty beastie, eh?)
       #these are the overridable operators
       #does not match flow-control operators like: || && ! or and if not
@@ -42,23 +43,25 @@ class RubyLexer
       #or .. ... ?:
       #for that use:
    RUBYNONSYMOPERATORREX=
-      %r{^([%^~/\-+|&]=|(\|\||&&)=?|(<<|>>|\*\*?)=|\.{1,3}|[?:,;]|=>?|![=~]?)$}
+      %r{^([%^/\-+|&]=|(\|\||&&)=?|(<<|>>|\*\*?)=|\.{1,3}|[?:,;]|::|=>?|![=~]?)$}
    RUBYOPERATORREX=/#{RUBYSYMOPERATORREX}|#{RUBYNONSYMOPERATORREX}/o
    UNSYMOPS=/^[~!]$/ #always unary
    UBSYMOPS=/^([*&+-]|::)$/  #ops that could be unary or binary
    WHSPCHARS=WHSPLF+"\\#"
-   OPORBEGINWORDS="(if|unless|while|until)"
-   BEGINWORDS=/^(def|class|module|begin|for|case|do|#{OPORBEGINWORDS})$/o
-   FUNCLIKE_KEYWORDS=/^(break|next|redo|return|raise|yield|defined\?|retry|super|BEGIN|END)$/
+   OPORBEGINWORDLIST=%w(if unless while until)
+   BEGINWORDLIST=%w(def class module begin for case do)+OPORBEGINWORDLIST
+   OPORBEGINWORDS="(#{OPORBEGINWORDLIST.join '|'})"
+   BEGINWORDS=/^(#{BEGINWORDLIST.join '|'})$/o
+   FUNCLIKE_KEYWORDS=/^(break|next|redo|return|yield|retry|super|BEGIN|END)$/
    VARLIKE_KEYWORDS=/^(__FILE__|__LINE__|false|nil|self|true)$/
    INNERBOUNDINGWORDS="(else|elsif|ensure|in|then|rescue|when)"
    BINOPWORDS="(and|or)"
-   NEVERSTARTPARAMLISTWORDS=/^(#{OPORBEGINWORDS}|#{INNERBOUNDINGWORDS}|#{BINOPWORDS}|end)([^a-zA-Z0-9_!?=]|\Z)/o
+   NEVERSTARTPARAMLISTWORDS=/\A(#{OPORBEGINWORDS}|#{INNERBOUNDINGWORDS}|#{BINOPWORDS}|end)([^a-zA-Z0-9_!?=]|\Z)/o
    NEVERSTARTPARAMLISTFIRST=CharSet['aoeitrwu']  #chars that begin NEVERSTARTPARAMLIST
    NEVERSTARTPARAMLISTMAXLEN=7     #max len of a NEVERSTARTPARAMLIST
    RUBYKEYWORDS=%r{
-     ^(alias|#{BINOPWORDS}|not|undef|end|
+     ^(alias|#{BINOPWORDS}|defined\?|not|undef|end|
        #{VARLIKE_KEYWORDS}|#{FUNCLIKE_KEYWORDS}|
        #{INNERBOUNDINGWORDS}|#{BEGINWORDS}
      )$
@@ -72,8 +75,9 @@ class RubyLexer
          ?A..?Z => :identifier,
          ?_     => :identifier,
          ?0..?9 => :number,
-         %{"'} => :double_quote,
-         ?` => :back_quote,
+         ?" => :double_quote,        #"
+         ?' => :single_quote,        #'
+         ?` => :back_quote,          #`
          WHSP => :whitespace, #includes \r
          ?, => :comma,
@@ -99,7 +103,9 @@ class RubyLexer
          #?\r => :newline, #implicitly escaped after op
          ?\\ => :escnewline,
-         ?\0 => :eof,
+         ?\x00 => :eof,
+         ?\x04 => :eof,
+         ?\x1a => :eof,
          "[({" => :open_brace,
          "])}" => :close_brace,
@@ -108,41 +114,90 @@ class RubyLexer
          ?# => :comment
    }
-   attr_reader :incomplete_here_tokens, :parsestack
+   attr_reader :incomplete_here_tokens, :parsestack, :last_token_maybe_implicit
    #-----------------------------------
-   def initialize(filename,file,linenum=1)
-      super(filename,file, linenum)
+   def initialize(filename,file,linenum=1,offset_adjust=0)
+      @offset_adjust=0 #set again in next line
+      super(filename,file, linenum,offset_adjust)
       @start_linenum=linenum
       @parsestack=[TopLevelContext.new]
-      @incomplete_here_tokens=[]
+      @incomplete_here_tokens=[] #not used anymore
+      @pending_here_bodies=[]
       @localvars_stack=[SymbolTable.new]
       @defining_lvar=nil
       @in_def_name=false
+      @last_operative_token=nil
+      @last_token_maybe_implicit=nil
       @toptable=CharHandler.new(self, :illegal_char, CHARMAPPINGS)
       start_of_line_directives
+      progress_printer
+   end
+   def progress_printer
+     return unless ENV['RL_PROGRESS']
+     $stderr.puts 'printing progresses'
+     @progress_thread=Thread.new do
+       until EoiToken===@last_operative_token
+         sleep 10
+         $stderr.puts @file.pos
+       end
+     end
    end
    def localvars;
      @localvars_stack.last
    end
+   attr :localvars_stack
+   attr :offset_adjust
+   attr_writer :pending_here_bodies
+   #-----------------------------------
+   def set_last_token(tok)
+     @last_operative_token=@last_token_maybe_implicit=tok
+   end
    #-----------------------------------
    def get1token
       result=super  #most of the action's here
+      if ENV['PROGRESS']
+      @last_cp_pos||=0
+      @start_time||=Time.now
+      if result.offset-@last_cp_pos>100000
+        $stderr.puts "#{result.offset} #{Time.now-@start_time}"
+        @last_cp_pos=result.offset
+      end
+      end
       #now cleanup and housekeeping
       #check for bizarre token types
       case result
+      when ImplicitParamListStartToken, ImplicitParamListEndToken
+          @last_token_maybe_implicit=result
+          result
       when StillIgnoreToken#,nil
           result
+      when StringToken
+          set_last_token result
+          assert !(IgnoreToken===@last_operative_token)
+          result.elems.map!{|frag|
+            if String===frag
+              result.translate_escapes(frag)
+            else
+              frag
+            end
+          } if AUTO_UNESCAPE_STRINGS
+          result
       when Token#,String
-          @last_operative_token=result
+          set_last_token result
           assert !(IgnoreToken===@last_operative_token)
           result
       else
@@ -150,6 +205,20 @@ class RubyLexer
       end
    end
+   #-----------------------------------
+   def eof?
+     super or EoiToken===@last_operative_token
+   end
+   #-----------------------------------
+   def input_position
+     super+@offset_adjust
+   end
+   #-----------------------------------
+   def input_position_raw
+     @file.pos
+   end
    #-----------------------------------
    def balanced_braces?
@@ -163,7 +232,7 @@ class RubyLexer
       s=eat_next_if(?$) or return nil
       if t=((identifier_as_string(?$) or special_global))
-        s<<t
+        s << t
       else error= "missing $id name"
       end
@@ -173,17 +242,27 @@ class RubyLexer
    #-----------------------------------
    def at_identifier(ch=nil)
       result =  (eat_next_if(?@) or return nil)
-      result << (eat_next_if(?@)or'')
+      result << (eat_next_if(?@) or '')
       if t=identifier_as_string(?@)
-        result<<t
+        result << t
       else error= "missing @id name"
       end
-      return lexerror(VarNameToken.new(result),error)
+      result=VarNameToken.new(result)
+      result.in_def=true if inside_method_def?
+      return lexerror(result,error)
    end
 private
    #-----------------------------------
-   def here_spread_over_ruby_code(rl,tok)
+   def inside_method_def?
+     @parsestack.reverse_each{|ctx|
+       ctx.starter=='def' and ctx.state!=:saw_def and return true
+     }
+     return false
+   end
+   #-----------------------------------
+   def here_spread_over_ruby_code(rl,tok) #not used anymore
      assert(!rl.incomplete_here_tokens.empty?)
      @incomplete_here_tokens += rl.incomplete_here_tokens
    end
@@ -207,10 +286,10 @@ private
   end
   #-----------------------------------
-  WSCHARSET=/[#\\\n\s\t\v\r\f]/
+  WSCHARSET=/[#\\\n\s\t\v\r\f\x00\x04\x1a]/
   def ignored_tokens(allow_eof=false,allow_eol=true)
     result=[]
-    result<<@moretokens.shift while StillIgnoreToken===@moretokens.first
+    result << @moretokens.shift while StillIgnoreToken===@moretokens.first
     @moretokens.empty? or return result
     loop do
       unless @moretokens.empty?
@@ -273,8 +352,8 @@ private
       result = ((
       #order matters here, but it shouldn't
       #(but til_charset must be last)
-         eat_next_if(/[!@&+`'=~\/\\,.;<>*"$?:]/) or
-         (eat_next_if('-') and ("-"+getchar)) or
+         eat_if(/-[a-z0-9_]/i,2) or
+         eat_next_if(/[!@&+`'=~\-\/\\,.;<>*"$?:]/) or
          (?0..?9)===nextchar ? til_charset(/[^\d]/) : nil
       ))
    end
@@ -289,23 +368,26 @@ private
       #just asserts because those contexts are never encountered.
       #control goes through symbol(<...>,nil)
       assert( /^[a-z_]$/i===context)
-      assert !(@last_operative_token===/^(\.|::|(un)?def|alias)$/)
+      assert MethNameToken===@last_operative_token || !(@last_operative_token===/^(\.|::|(un)?def|alias)$/)
-      @moretokens.unshift(*parse_keywords(str,oldpos) do
+      @moretokens.unshift(*parse_keywords(str,oldpos) do |tok|
         #if not a keyword,
         case str
           when FUNCLIKE_KEYWORDS; #do nothing
           when VARLIKE_KEYWORDS,RUBYKEYWORDS; raise "shouldnt see keywords here, now"
         end
-        safe_recurse { |a| var_or_meth_name(str,@last_operative_token,oldpos) }
+        was_last=@last_operative_token
+        @last_operative_token=tok if tok
+        safe_recurse { |a| var_or_meth_name(str,was_last,oldpos,after_nonid_op?{true}) }
       end)
       return @moretokens.shift
    end
    #-----------------------------------
+   IDENTREX={}
    def identifier_as_string(context)
       #must begin w/ letter or underscore
-      str=eat_next_if(/[_a-z]/i) or return nil
+      /[_a-z]/i===nextchar.chr or return
       #equals, question mark, and exclamation mark
       #might be allowed at the end in some contexts.
@@ -315,45 +397,16 @@ private
       #i hope i've captured all right conditions....
       #context should always be ?: right after def, ., and :: now
-      maybe_eq,maybe_qm,maybe_ex = case context
-         when ?@,?$ then [nil,nil,nil]
-         when ?:    then [?=, ??, ?!]
-         else            [nil,??, ?!]
-      end
-      @in_def_name and maybe_eq= ?=
-      str<<til_charset(/[^a-z0-9_]/i)
-      #look for ?, !, or =, if allowed
-      case b=getc
-      when nil #means we're at eof
-         #handling nil here prevents b from ever matching
-         #a nil value of maybe_qm, maybe_ex or maybe_eq
-      when maybe_qm
-         str << b
-      when maybe_ex
-         nc=(nextchar unless eof?)
-         #does ex appear to be part of a larger operator?
-         if nc==?= #or nc==?~
-           back1char
-         else
-           str << b
-         end
-      when maybe_eq
-         nc=(nextchar unless eof?)
-         #does eq appear to be part of a larger operator?
-         if nc==?= or nc==?~ or nc==?>
-           back1char
-         else
-           str << b
-         end
-      else
-         back1char
-      end
+      #= and ! only match if not part of a larger operator
+      trailers =
+        case context
+         when ?@,?$ then ""
+#         when ?:    then "!(?![=])|\\?|=(?![=~>])"
+         else            "!(?![=])|\\?"
+        end
+      @in_def_name||context==?: and trailers<<"|=(?![=~>])"
-      return str
+      @file.scan(IDENTREX[trailers]||=/^[_a-z][a-z0-9_]*(?:#{trailers})?/i)
    end
   #-----------------------------------
@@ -380,18 +433,26 @@ private
    #a comma has been seen. are we in an
    #lvalue list or some other construct that uses commas?
    def comma_in_lvalue_list?
-     @parsestack.last.lhs= (not ListContext===@parsestack.last)
+     @parsestack.last.lhs=
+       case l=@parsestack.last
+       when ListContext:
+       when DefContext: l.in_body
+       else true
+       end
    end
    #-----------------------------------
    def in_lvar_define_state
      #@defining_lvar is a hack
      @defining_lvar or case ctx=@parsestack.last
-       when ForSMContext; ctx.state==:for
-       when RescueSMContext; ctx.state==:arrow
+       #when ForSMContext; ctx.state==:for
+       when RescueSMContext
+         @last_operative_token.ident=="=>" and @file.match? /\A[\s\v]*([:;#\n]|then[^a-zA-Z0-9_])/m
        #when BlockParamListLhsContext; true
      end
    end
+   IMPLICIT_PARENS_BEFORE_ACCESSOR_ASSIGNMENT=2
    #-----------------------------------
    #determine if an alphabetic identifier refers to a variable
@@ -400,45 +461,50 @@ private
    #if appropriate. adds tok to the
    #local var table if its a local var being defined for the first time.
-   #note: what we here call variables (rather, constants) following ::
-   #might actually be methods at runtime, but that's immaterial to tokenization.
-   #note: this routine should determine the correct token type for name and
-   #create the appropriate token. currently this is not done because callers
-   #typically have done it (perhaps incorrectly) already.
-   def var_or_meth_name(name,lasttok,pos)
+   #in general, operators in ruby are disambuated by the before-but-not-after rule.
+   #an otherwise ambiguous operator is disambiguated by the surrounding whitespace:
+   #whitespace before but not after the 'operator' indicates it is to be considered a
+   #value token instead. otherwise it is a binary operator. (unary (prefix) ops count
+   #as 'values' here.)
+   def var_or_meth_name(name,lasttok,pos,was_after_nonid_op)
      #look for call site if not a keyword or keyword is function-like
      #look for and ignore local variable names
      assert String===name
+     was_in_lvar_define_state=in_lvar_define_state
      #maybe_local really means 'maybe local or constant'
      maybe_local=case name
-       when /[^a-z_0-9]$/i; #do nothing
-       when /^[a-z_]/;  (localvars===name or VARLIKE_KEYWORDS===name or in_lvar_define_state) and not lasttok===/^(\.|::)$/
-       when /^[A-Z]/; is_const=true;not lasttok==='.'  #this is the right algorithm for constants...
+       when /[^a-z_0-9]$/i #do nothing
+       when /^[a-z_]/
+         (localvars===name or
+          VARLIKE_KEYWORDS===name or
+          was_in_lvar_define_state
+         ) and not lasttok===/^(\.|::)$/
+       when /^[A-Z]/
+         is_const=true
+         not lasttok==='.'  #this is the right algorithm for constants...
      end
      assert(@moretokens.empty?)
      oldlast=@last_operative_token
-     tok=@last_operative_token=VarNameToken.new(name,pos)
+     tok=set_last_token assign_lvar_type!(VarNameToken.new(name,pos))
      oldpos= input_position
      sawnl=false
      result=ws_toks=ignored_tokens(true) {|nl| sawnl=true }
      if sawnl || eof?
-         if  maybe_local then
-           if in_lvar_define_state
-             if /^[a-z_][a-zA-Z_0-9]*$/===name
-               assert !(lasttok===/^(\.|::)$/)
-               localvars[name]=true
-             else
-               lexerror tok,"not a valid variable name: #{name}"
-             end
-             return result.unshift(tok)
+         if was_in_lvar_define_state
+           if /^[a-z_][a-zA-Z_0-9]*$/===name
+             assert !(lasttok===/^(\.|::)$/)
+             localvars[name]=true
+           else
+             lexerror tok,"not a valid variable name: #{name}"
            end
+           return result.unshift(tok)
+         elsif maybe_local
            return result.unshift(tok) #if is_const
          else
            return result.unshift(
@@ -455,6 +521,8 @@ private
        when ?=;  not /^=[>=~]$/===readahead(2)
        when ?,; comma_in_lvalue_list?
        when ?); last_context_not_implicit.lhs
+       when ?i; /^in[^a-zA-Z_0-9]/===readahead(3) and
+                  ForSMContext===last_context_not_implicit
        when ?>,?<; /^(.)\1=$/===readahead(3)
        when ?*,?&; /^(.)\1?=/===readahead(3)
        when ?|; /^\|\|?=/===readahead(3) or
@@ -463,8 +531,8 @@ private
                 readahead(2)[1] != ?|
        when ?%,?/,?-,?+,?^; readahead(2)[1]== ?=
      end
-     if (assignment_coming && !(lasttok===/^(\.|::)$/) or in_lvar_define_state)
-        tok=VarNameToken.new(name,pos)
+     if (assignment_coming && !(lasttok===/^(\.|::)$/) or was_in_lvar_define_state)
+        tok=assign_lvar_type! VarNameToken.new(name,pos)
         if /[^a-z_0-9]$/i===name
           lexerror tok,"not a valid variable name: #{name}"
         elsif /^[a-z_]/===name and !(lasttok===/^(\.|::)$/)
@@ -476,44 +544,106 @@ private
      implicit_parens_to_emit=
      if assignment_coming
        @parsestack.push AssignmentContext.new(nil) if nc==?% or nc==?/
-       0
+       IMPLICIT_PARENS_BEFORE_ACCESSOR_ASSIGNMENT
      else
      case nc
        when nil: 2
-       when ?!; readahead(2)=='!=' ? 2 : 1
+       when ?!; /^![=~]$/===readahead(2) ? 2 : 1
+       when ?d;
+         if /^do([^a-zA-Z0-9_]|$)/===readahead(3)
+           if maybe_local and expecting_do?
+             ty=VarNameToken
+             0
+           else
+             maybe_local=false
+             2
+           end
+         else
+           1
+         end
        when NEVERSTARTPARAMLISTFIRST
          (NEVERSTARTPARAMLISTWORDS===readahead(NEVERSTARTPARAMLISTMAXLEN)) ? 2 : 1
-       when ?",?',?`,?a..?z,?A..?Z,?0..?9,?_,?@,?$,?~; 1
+       when ?",?',?`,?a..?z,?A..?Z,?0..?9,?_,?@,?$,?~; 1 #"
        when ?{
          maybe_local=false
+         1
+=begin
          x=2
          x-=1 if /\A(return|break|next)\Z/===name and
                  !(KeywordToken===oldlast and oldlast===/\A(\.|::)\Z/)
          x
+=end
        when ?(;
-         maybe_local=false; !(ws_toks.empty? or lasttok===/^(\.|::)$/)? 1 : 0
+         maybe_local=false
+         lastid=lasttok&&lasttok.ident
+         case lastid
+           when /\A[;(]|do\Z/: was_after_nonid_op=false
+           when '|':  was_after_nonid_op=false unless BlockParamListLhsContext===@parsestack.last
+           when '{': was_after_nonid_op=false if  BlockContext===@parsestack.last or BeginEndContext===@parsestack.last
+         end if KeywordToken===lasttok
+         was_after_nonid_op=false if NewlineToken===lasttok or lasttok.nil?
+         want_parens=!(ws_toks.empty? or was_after_nonid_op) #or
+#                      /^(::|rescue|yield|else|case|when|if|unless|until|while|and|or|&&|\|\||[?:]|\.\.?\.?|=>)$/===lastid or
+#                      MethNameToken===lasttok or
+#                      RUBYNONSYMOPERATORREX===lastid && /=$/===lastid && '!='!=lastid
+#                     )
+         #look ahead for closing paren (after some whitespace...)
+         want_parens=false if @file.match? /\A.(?:\s|\v|\#.*\n)*\)/
+#         afterparen=@file.pos
+#         getchar
+#         ignored_tokens(true)
+#         want_parens=false if nextchar==?)
+#         @file.pos=afterparen
+         want_parens ? 1 : 0
        when ?},?],?),?;,?^, ?|, ?>, ?,, ?., ?=; 2
-       when ?+, ?-, ?*, ?&, ?%, ?/; (ws_toks.empty? || readahead(2)[/^.[#{WHSPLF}]/]) ? 2 : 3
-       when ?:,??; next2=readahead(2);
-                   WHSPLF[next2[1].chr] || next2=='::' ? 2 : 3
-#       when ?:,??; (readahead(2)[/^.[#{WHSPLF}]/]) ? 2 : 3
-       when ?<; (ws_toks.empty? || readahead(3)[/^<<["'`a-zA-Z_0-9-]/]) ? 3 : 2
-       when ?[; ws_toks.empty? ? 2 : 3
+       when ?+, ?-, ?%, ?/
+         if /^(return|break|next)$/===@last_operative_token.ident and not(
+              KeywordToken===lasttok and /^(.|::)$/===lasttok.ident
+            )
+           1
+         else
+           (ws_toks.empty? || readahead(2)[/^.[#{WHSPLF}]/o]) ? 2 : 3
+         end
+       when ?*, ?&
+         lasttok=@last_operative_token
+         if /^(return|break|next)$/===@last_operative_token.ident and not(
+              KeywordToken===lasttok and /^(.|::)$/===lasttok.ident
+            )
+           1
+         else
+           (ws_toks.empty? || readahead(2)[/^.[#{WHSPLF}*&]/o]) ? 2 : 3
+         end
+       when ?:
+         next2=readahead(2)
+         if /^:(?:[#{WHSPLF}]|(:))$/o===next2 then
+           $1 && !ws_toks.empty?   ? 3 : 2
+         else
+           3
+         end
+       when ??; next3=readahead(3);
+                   /^\?([#{WHSPLF}]|[a-z_][a-z_0-9])/io===next3 ? 2 : 3
+#       when ?:,??; (readahead(2)[/^.[#{WHSPLF}]/o]) ? 2 : 3
+       when ?<; (!ws_toks.empty? && readahead(4)[/^<<-?["'`a-zA-Z_0-9]/]) ? 3 : 2
+       when ?[; ws_toks.empty?&&!(KeywordToken===oldlast and /^(return|break|next)$/===oldlast.ident) ? 2 : 3
        when ?\\, ?\s, ?\t, ?\n, ?\r, ?\v, ?#; raise 'failure'
        else raise "unknown char after ident: #{nc=nextchar ? nc.chr : "<<EOF>>"}"
      end
      end
-     if is_const and implicit_parens_to_emit==3 then
+     if is_const and implicit_parens_to_emit==3 then #needed?
        implicit_parens_to_emit=1
      end
-     tok=if maybe_local and implicit_parens_to_emit>=2
+     if maybe_local and implicit_parens_to_emit>=2
        implicit_parens_to_emit=0
-       VarNameToken
+       ty=VarNameToken
      else
-       MethNameToken
-     end.new(name,pos)
+       ty||=MethNameToken
+     end
+     tok=assign_lvar_type!(ty.new(name,pos))
      case implicit_parens_to_emit
      when 2;
@@ -523,8 +653,17 @@ private
        arr,pass=*param_list_coming_with_2_or_more_params?
        result.push( *arr )
        unless pass
+         #only 1 param in list
          result.unshift ImplicitParamListStartToken.new(oldpos)
-         @parsestack.push ParamListContextNoParen.new(@linenum)
+         last=result.last
+         last.set_callsite! false if last.respond_to? :callsite? and last.callsite? #KeywordToken===last and last.ident==')'
+         if /^(break|next|return)$/===name and
+            !(KeywordToken===lasttok and /^(.|::)$/===lasttok.ident)
+           ty=KWParamListContextNoParen
+         else
+           ty=ParamListContextNoParen
+         end
+         @parsestack.push ty.new(@linenum)
        end
      when 0; #do nothing
      else raise 'invalid value of implicit_parens_to_emit'
@@ -547,11 +686,13 @@ private
        result=[get1token]
        pass=loop{
          tok=get1token
-         result<<tok
+         result << tok
          if @parsestack.size==basesize
            break false
          elsif ','==tok.to_s and @parsestack.size==basesize+1
            break true
+         elsif OperatorToken===tok and /^[&*]$/===tok.ident and tok.unary and @parsestack.size==basesize+1
+           break true
          elsif EoiToken===tok
            lexerror tok, "unexpected eof in parameter list"
          end
@@ -560,11 +701,13 @@ private
    end
   #-----------------------------------
-  CONTEXT2ENDTOK={AssignmentRhsContext=>AssignmentRhsListEndToken,
-                  ParamListContextNoParen=>ImplicitParamListEndToken,
-                  WhenParamListContext=>KwParamListEndToken,
-                  RescueSMContext=>KwParamListEndToken
-                 }
+  CONTEXT2ENDTOK={
+    AssignmentRhsContext=>AssignmentRhsListEndToken,
+    ParamListContextNoParen=>ImplicitParamListEndToken,
+    KWParamListContextNoParen=>ImplicitParamListEndToken,
+    WhenParamListContext=>KwParamListEndToken,
+    RescueSMContext=>KwParamListEndToken
+  }
   def abort_noparens!(str='')
     #assert @moretokens.empty?
     result=[]
@@ -576,7 +719,63 @@ private
     return result
   end
-if false #no longer used
+  #-----------------------------------
+  CONTEXT2ENDTOK_FOR_RESCUE={
+    AssignmentRhsContext=>AssignmentRhsListEndToken,
+    ParamListContextNoParen=>ImplicitParamListEndToken,
+    KWParamListContextNoParen=>ImplicitParamListEndToken,
+    WhenParamListContext=>KwParamListEndToken,
+    RescueSMContext=>KwParamListEndToken
+  }
+  def abort_noparens_for_rescue!(str='')
+    #assert @moretokens.empty?
+    result=[]
+    ctx=@parsestack.last
+    while klass=CONTEXT2ENDTOK_FOR_RESCUE[ctx.class]
+      break if AssignmentRhsContext===ctx && !ctx.multi_assign?
+      if ParamListContextNoParen===ctx && AssignmentRhsContext===@parsestack[-2]
+        result.push ImplicitParamListEndToken.new(input_position-str.length),
+                    AssignmentRhsListEndToken.new(input_position-str.length)
+          @parsestack.pop
+          @parsestack.pop
+        break
+      end
+      result << klass.new(input_position-str.length) #unless AssignmentRhsContext===ctx and !ctx.multi_assign?
+      break if RescueSMContext===ctx #why is this here?
+      @parsestack.pop
+      ctx=@parsestack.last
+    end
+    return result
+  end
+  #-----------------------------------
+  CONTEXT2ENDTOK_FOR_DO={
+    AssignmentRhsContext=>AssignmentRhsListEndToken,
+    ParamListContextNoParen=>ImplicitParamListEndToken,
+    ExpectDoOrNlContext=>1,
+    #WhenParamListContext=>KwParamListEndToken,
+    #RescueSMContext=>KwParamListEndToken
+  }
+  def abort_noparens_for_do!(str='')
+    #assert @moretokens.empty?
+    result=[]
+    while klass=CONTEXT2ENDTOK_FOR_DO[@parsestack.last.class]
+      break if klass==1
+      result << klass.new(input_position-str.length)
+      @parsestack.pop
+    end
+    return result
+  end
+  #-----------------------------------
+  def expecting_do?
+    @parsestack.reverse_each{|ctx|
+      next if AssignmentRhsContext===ctx
+      return !!CONTEXT2ENDTOK_FOR_DO[ctx.class]
+    }
+    return false
+  end
    #-----------------------------------
    def abort_1_noparen!(offs=0)
      assert @moretokens.empty?
@@ -585,12 +784,12 @@ if false #no longer used
        @parsestack.pop
        result << AssignmentRhsListEndToken.new(input_position-offs)
      end
-     ParamListContextNoParen===@parsestack.last or lexerror huh,'{} with no matching callsite'
+     if ParamListContextNoParen===@parsestack.last #or lexerror huh,'{} with no matching callsite'
        @parsestack.pop
        result << ImplicitParamListEndToken.new(input_position-offs)
+     end
      return result
    end
-end
    #-----------------------------------
    #parse keywords now, to prevent confusion over bare symbols
@@ -598,6 +797,7 @@ end
    #if arg is not a keyword, the block is called
    def parse_keywords(str,offset)
       assert @moretokens.empty?
+      assert !(KeywordToken===@last_operative_token and /A(.|::|def)\Z/===@last_operative_token.ident)
       result=[KeywordToken.new(str,offset)]
       case str
@@ -619,11 +819,15 @@ end
          /^(do)$/===start and localvars.end_block
          /^(class|module|def)$/===start and @localvars_stack.pop
-      when "class","module"
+      when "module"
          result.first.has_end!
          @parsestack.push WantsEndContext.new(str,@linenum)
          @localvars_stack.push SymbolTable.new
+      when "class"
+         result.first.has_end!
+         @parsestack.push ClassContext.new(str,@linenum)
       when "if","unless" #could be infix form without end
          if after_nonid_op?{false} #prefix form
             result.first.has_end!
@@ -653,10 +857,11 @@ end
          #expect_do_or_end_or_nl! str #handled by ForSMContext now
          @parsestack.push ForSMContext.new(@linenum)
       when "do"
-         result.unshift(*abort_noparens!(str))
+         result.unshift(*abort_noparens_for_do!(str))
          if ExpectDoOrNlContext===@parsestack.last
             @parsestack.pop
             assert WantsEndContext===@parsestack.last
+            result.last.as=";"
          else
             result.last.has_end!
             @parsestack.push WantsEndContext.new(str,@linenum)
@@ -665,10 +870,10 @@ end
          end
       when "def"
          result.first.has_end!
-         @parsestack.push WantsEndContext.new("def",@linenum)
-         @localvars_stack.push SymbolTable.new
+         @parsestack.push ctx=DefContext.new(@linenum)
+         ctx.state=:saw_def
          safe_recurse { |aa|
-            @last_operative_token=KeywordToken.new "def" #hack
+            set_last_token KeywordToken.new "def" #hack
             result.concat ignored_tokens
             #read an expr like a.b.c or a::b::c
@@ -683,10 +888,11 @@ end
                 when/^\)$/.token_pat then parencount-=1
                 end
                 EoiToken===tok and lexerror tok, "eof in def header"
-                result<<tok
+                result << tok
               end until  parencount==0 #@parsestack.size==old_size
-            else #no parentheses, all tail
-              @last_operative_token=KeywordToken.new "." #hack hack
+              @localvars_stack.push SymbolTable.new
+           else #no parentheses, all tail
+              set_last_token KeywordToken.new "." #hack hack
               tokindex=result.size
               result << tok=symbol(false,false)
               name=tok.to_s
@@ -700,25 +906,30 @@ end
                 when /^[a-z_]/;  localvars===name
                 when /^[A-Z]/; is_const=true  #this is the right algorithm for constants...
               end
-              if !ty and maybe_local
-                result.push(  *ignored_tokens(false,false)  )
-                nc=nextchar
+              result.push(  *ignored_tokens(false,false)  )
+              nc=nextchar
+              if !ty and maybe_local
                 if nc==?: || nc==?.
                   ty=VarNameToken
                 end
               end
-              unless ty
-                ty=MethNameToken
-                endofs=tok.offset+tok.to_s.length
-                result[tokindex+1...tokindex+1]=
-                  [ImplicitParamListStartToken.new(endofs),ImplicitParamListEndToken.new(endofs)]
+              if ty.nil? or (ty==KeywordToken and nc!=?: and nc!=?.)
+                   ty=MethNameToken
+                   if nc != ?(
+                     endofs=tok.offset+tok.to_s.length
+                     newtok=ImplicitParamListStartToken.new(endofs)
+                     result.insert tokindex+1, newtok
+                   end
               end
               assert result[tokindex].equal?(tok)
-              result[tokindex]=ty.new(tok.to_s,tok.offset)
+              var=assign_lvar_type! ty.new(tok.to_s,tok.offset)
+              @localvars_stack.push SymbolTable.new
+              var.in_def=true if inside_method_def? and var.respond_to? :in_def=
+              result[tokindex]=var
-              #if a.b.c.d is seen, a, b, and c
+              #if a.b.c.d is seen, a, b and c
               #should be considered maybe varname instead of methnames.
               #the last (d in the example) is always considered a methname;
               #it's what's being defined.
@@ -727,8 +938,7 @@ end
               #a could even be a keyword (eg self or block_given?).
             end
             #read tail: .b.c.d etc
-            result.reverse_each{|res| break @last_operative_token=res unless StillIgnoreToken===res}
-            ###@last_operative_token=result.last #naive
+            result.reverse_each{|res| break set_last_token res unless StillIgnoreToken===res}
             assert !(IgnoreToken===@last_operative_token)
             state=:expect_op
             @in_def_name=true
@@ -737,12 +947,22 @@ end
                #look for start of parameter list
                nc=(@moretokens.empty? ? nextchar.chr : @moretokens.first.to_s[0,1])
                if state==:expect_op and /^[a-z_(&*]/i===nc
-                  result.concat def_param_list
+                  ctx.state=:def_param_list
+                  list,listend=def_param_list
+                  result.concat list
+                  end_index=result.index(listend)
+                  ofs=listend.offset
+                  if endofs
+                    result.insert end_index,ImplicitParamListEndToken.new(ofs)
+                  else
+                    ofs+=listend.to_s.size
+                  end
+                  result.insert end_index+1,EndDefHeaderToken.new(ofs)
                   break
                end
                tok=get1token
-               result<<tok
+               result<< tok
                case tok
                when EoiToken
                   lexerror tok,'unexpected eof in def header'
@@ -752,9 +972,18 @@ end
                   state=:expect_op
                when /^(\.|::)$/.token_pat
                   lexerror tok,'expected ident' unless state==:expect_op
+                  if endofs
+                    result.insert -2, ImplicitParamListEndToken.new(endofs)
+                    endofs=nil
+                  end
                   state=:expect_name
                when /^(;|end)$/.token_pat, NewlineToken #are we done with def name?
+                  ctx.state=:def_body
                   state==:expect_op or lexerror tok,'expected identifier'
+                  if endofs
+                    result.insert -2,ImplicitParamListEndToken.new(tok.offset)
+                  end
+                  result.insert -2, EndDefHeaderToken.new(tok.offset)
                   break
                else
                   lexerror(tok, "bizarre token in def name: " +
@@ -765,24 +994,34 @@ end
          }
       when "alias"
          safe_recurse { |a|
-            @last_operative_token=KeywordToken.new "alias" #hack
+            set_last_token KeywordToken.new "alias" #hack
             result.concat ignored_tokens
             res=symbol(eat_next_if(?:),false)
-            res ? result<<res : lexerror(result.first,"bad symbol in alias")
-            @last_operative_token=KeywordToken.new "alias" #hack
-            result.concat ignored_tokens
-            res=symbol(eat_next_if(?:),false)
-            res ? result<<res : lexerror(result.first,"bad symbol in alias")
+            unless res
+              lexerror(result.first,"bad symbol in alias")
+            else
+              res.ident[0]==?$ and res=VarNameToken.new(res.ident,res.offset)
+              result<< res
+              set_last_token KeywordToken.new "alias" #hack
+              result.concat ignored_tokens
+              res=symbol(eat_next_if(?:),false)
+              unless res
+                lexerror(result.first,"bad symbol in alias")
+              else
+                res.ident[0]==?$ and res=VarNameToken.new(res.ident,res.offset)
+                result<< res
+              end
+            end
          }
       when "undef"
          safe_recurse { |a|
             loop do
-               @last_operative_token=KeywordToken.new "," #hack
+               set_last_token KeywordToken.new "," #hack
                result.concat ignored_tokens
                tok=symbol(eat_next_if(?:),false)
                tok or lexerror(result.first,"bad symbol in undef")
                result<< tok
-               @last_operative_token=tok
+               set_last_token tok
                assert !(IgnoreToken===@last_operative_token)
                sawnl=false
@@ -809,13 +1048,13 @@ end
          unless after_nonid_op? {false}
            #rescue needs to be treated differently when in operator context...
            #i think no RescueSMContext should be pushed on the stack...
-           #plus, the rescue token should be marked as infix
-           result.first.set_infix!
+           result.first.set_infix!            #plus, the rescue token should be marked as infix
+           result.unshift(*abort_noparens_for_rescue!(str))
          else
            result.push KwParamListStartToken.new(offset+str.length)
            #corresponding EndToken emitted by abort_noparens! on leaving rescue context
-           result.unshift(*abort_noparens!(str))
            @parsestack.push RescueSMContext.new(@linenum)
+           result.unshift(*abort_noparens!(str))
          end
       when "then"
@@ -831,16 +1070,43 @@ end
          result.unshift(*abort_noparens!(str))
       when /\A(return|break|next)\Z/
-         result=yield
-         result.first.has_no_block! unless KeywordToken===@last_operative_token and @last_operative_token===/\A(\.|::)\Z/
+         fail if KeywordToken===@last_operative_token and @last_operative_token===/\A(\.|::)\Z/
+         tok=KeywordToken.new(str,offset)
+         result=yield tok
+         result[0]=tok
+         tok.has_no_block!
+      when 'END'
+         #END could be treated, lexically, just as if it is an
+         #ordinary method, except that local vars created in
+         #END blocks are visible to subsequent code. (Why??)
+         #That difference forces a custom parsing.
+         if @last_operative_token===/^(\.|::)$/
+           result=yield nil #should pass a keyword token here
+         else
+           safe_recurse{
+             old=result.first
+             result=[
+               MethNameToken.new(old.ident,old.offset),
+               ImplicitParamListStartToken.new(input_position),
+               ImplicitParamListEndToken.new(input_position),
+               *ignored_tokens
+             ]
+             getchar=='{' or lexerror(result.first,"expected { after #{str}")
+             result.push KeywordToken.new('{',input_position-1)
+             result.last.set_infix!
+             @parsestack.push BeginEndContext.new(str,offset)
+           }
+         end
       when FUNCLIKE_KEYWORDS
-         result=yield
+         result=yield nil #should be a keyword token
       when RUBYKEYWORDS
         #do nothing
-      else result=yield
+      else result=yield nil
       end
@@ -881,11 +1147,11 @@ end
    #-----------------------------------
    def block_param_list_lookahead
       safe_recurse{ |la|
-         @last_operative_token=KeywordToken.new  ';'
+         set_last_token KeywordToken.new  ';'
          a=ignored_tokens
          if eat_next_if(?|)
-           a<<KeywordToken.new("|", input_position-1)
+           a<< KeywordToken.new("|", input_position-1)
 if true
            @parsestack.push mycontext=BlockParamListLhsContext.new(@linenum)
            nextchar==?| and a.push NoWsToken.new(input_position)
@@ -909,7 +1175,7 @@ else
                end
                tok==='|' and parsestack_lastnonassign_is?(mycontext) and break
-               a<<tok
+               a<< tok
              end
              assert@defining_lvar || AssignmentRhsContext===@parsestack.last
              @defining_lvar=false
@@ -920,14 +1186,14 @@ else
              @parsestack.last.object_id==mycontext.object_id or raise 'expected my BlockParamListLhsContext atop @parsestack'
              @parsestack.pop
-             a<<KeywordToken.new('|',tok.offset)
+             a<< KeywordToken.new('|',tok.offset)
              @moretokens.empty? or
                fixme %#moretokens might be set from get1token call above...might be bad#
 end
            end
          end
-         @last_operative_token=KeywordToken.new ';'
+         set_last_token KeywordToken.new ';'
          #a.concat ignored_tokens
          #assert @last_operative_token===';'
@@ -948,6 +1214,7 @@ end
       @in_def_name=false
       result=[]
       normal_comma_level=old_parsestack_size=@parsestack.size
+      listend=nil
       safe_recurse { |a|
          assert(@moretokens.empty?)
          assert((not IgnoreToken===@moretokens[0]))
@@ -972,18 +1239,22 @@ end
             alias === call
          end
-         @last_operative_token=KeywordToken.new ',' #hack
+         set_last_token KeywordToken.new ',' #hack
          #read local parameter names
+         nextvar=nil
          loop do
             expect_name=(@last_operative_token===',' and
                          normal_comma_level==@parsestack.size)
             expect_name and @defining_lvar||=true
             result << tok=get1token
-            lexerror tok, "unexpected eof in def header" if EoiToken===tok
+            break lexerror(tok, "unexpected eof in def header") if EoiToken===tok
             #break if at end of param list
-               endingblock===tok and
-                 old_parsestack_size>=@parsestack.size and break
+            if endingblock===tok and old_parsestack_size>=@parsestack.size
+              nextvar and localvars[nextvar]=true #add nextvar to local vars
+              listend=tok
+              break
+            end
             #next token is a local var name
             #(or the one after that if unary ops present)
@@ -992,33 +1263,40 @@ end
               case tok
                 when IgnoreToken #, /^[A-Z]/ #do nothing
                 when /^,$/.token_pat #hack
                 when VarNameToken
                   assert@defining_lvar
                   @defining_lvar=false
                   assert((not @last_operative_token===','))
+#                  assert !nextvar
+                  nextvar=tok.ident
+                  localvars[nextvar]=false #remove nextvar from list of local vars for now
                 when /^[&*]$/.token_pat #unary form...
                   #a NoWsToken is also expected... read it now
                   result.concat maybe_no_ws_token #not needed?
-                  @last_operative_token=KeywordToken.new ','
+                  set_last_token KeywordToken.new ','
                 else
                   lexerror tok,"unfamiliar var name '#{tok}'"
               end
-            elsif /^,$/.token_pat===tok and
-                  normal_comma_level+1==@parsestack.size and
-                  AssignmentRhsContext===@parsestack.last
-              #seeing comma here should end implicit rhs started within the param list
-              result[-1,0]=AssignmentRhsListEndToken.new(tok.offset)
-              @parsestack.pop
+            elsif /^,$/.token_pat===tok
+              if normal_comma_level+1==@parsestack.size and
+                 AssignmentRhsContext===@parsestack.last
+                #seeing comma here should end implicit rhs started within the param list
+                result << AssignmentRhsListEndToken.new(tok.offset)
+                @parsestack.pop
+              end
+              if nextvar and normal_comma_level==@parsestack.size
+                localvars[nextvar]=true #now, finally add nextvar back to local vars
+                nextvar
+              end
             end
          end
          @defining_lvar=false
+         @parsestack.last.see self,:semi
          assert(@parsestack.size <= old_parsestack_size)
-         assert(endingblock[tok])
+         assert(endingblock[tok] || ErrorToken===tok)
          #hack: force next token to look like start of a
          #new stmt, if the last ignored_tokens
@@ -1026,42 +1304,54 @@ end
          #(just in case the next token parsed
          #happens to call quote_expected? or after_nonid_op)
          result.concat ignored_tokens
-         if  nextchar.chr[/[iuw\/<|>+\-*&%?:]/] and
-             !(NewlineToken===@last_operative_token) and
-             !(/^(end|;)$/===@last_operative_token)
-           @last_operative_token=KeywordToken.new ';'
+#         if  !eof? and nextchar.chr[/[iuw\/<|>+\-*&%?:({]/] and
+#             !(NewlineToken===@last_operative_token) and
+#             !(/^(end|;)$/===@last_operative_token)
+           #result<<EndDefHeaderToken.new(result.last.offset+result.last.to_s.size)
+           set_last_token KeywordToken.new ';'
            result<< get1token
-         end
+#         end
       }
-      return result
+      return result,listend
    end
    #-----------------------------------
    #handle % in ruby code. is it part of fancy quote or a modulo operator?
    def percent(ch)
-      if quote_expected? ch
+     if AssignmentContext===@parsestack.last
+       @parsestack.pop
+       op=true
+     end
+     if !op and quote_expected?(ch)  ||
+       (@last_operative_token===/^(return|next|break)$/ and KeywordToken===@last_operative_token)
          fancy_quote ch
-      else
+     else
          biop ch
-      end
+     end
    end
    #-----------------------------------
    #handle * & in ruby code. is unary or binary operator?
    def star_or_amp(ch)
-      assert('*&'[ch])
-      want_unary=unary_op_expected? ch
-      result=(quadriop ch)
-      if want_unary
-         #readahead(2)[1..1][/[\s\v#\\]/] or #not needed?
-         assert OperatorToken===result
-         result.unary=true         #result should distinguish unary+binary *&
-         WHSPLF[nextchar.chr] or
-           @moretokens << NoWsToken.new(input_position)
-      end
-      result
+     assert('*&'[ch])
+     want_unary=unary_op_expected?(ch) ||
+       (@last_operative_token===/^(return|next|break)$/ and KeywordToken===@last_operative_token)
+     result=quadriop(ch)
+     if want_unary
+       #readahead(2)[1..1][/[\s\v#\\]/] or #not needed?
+       assert OperatorToken===result
+       result.unary=true         #result should distinguish unary+binary *&
+       WHSPLF[nextchar.chr] or
+         @moretokens << NoWsToken.new(input_position)
+       comma_in_lvalue_list?
+       if ch=='*'
+         @parsestack.last.see self, :splat
+       end
+     end
+     result
    end
    #-----------------------------------
@@ -1079,15 +1369,23 @@ end
    #-----------------------------------
    def regex_or_div(ch)
    #space after slash always means / operator, rather than regex start
-      if after_nonid_op?{ !is_var_name? and WHSPLF[prevchar] and !readahead(2)[%r{^/\s}] }
-        return regex(ch)
-      else #/ is operator
-        result=getchar
-        if eat_next_if(?=)
-          result << '='
-        end
-        return(operator_or_methname_token result)
-      end
+   #= after slash always means /= operator, rather than regex start
+     if AssignmentContext===@parsestack.last
+       @parsestack.pop
+       op=true
+     end
+     if !op and after_nonid_op?{
+          !is_var_name? and WHSPLF[prevchar] and !readahead(2)[%r{^/[\s\v=]}]
+        } || (KeywordToken===@last_token_maybe_implicit and @last_token_maybe_implicit.ident=="(")
+       return regex(ch)
+     else #/ is operator
+       result=getchar
+       if eat_next_if(?=)
+         result << '='
+       end
+       return(operator_or_methname_token result)
+     end
    end
    #-----------------------------------
@@ -1101,8 +1399,8 @@ end
      s=tok.to_s
      case s
      when /[^a-z_0-9]$/i; false
-     when /^[a-z_]/; localvars===s or VARLIKE_KEYWORDS===s
-     when /^[A-Z]/; VarNameToken===tok
+#     when /^[a-z_]/; localvars===s or VARLIKE_KEYWORDS===s
+     when /^[A-Z_]/i; VarNameToken===tok
      when /^[@$<]/; true
      else raise "not var or method name: #{s}"
      end
@@ -1139,18 +1437,22 @@ end
       unless eat_next_if(?:)
         #cancel implicit contexts...
         @moretokens.push(*abort_noparens!(':'))
+        @moretokens.push KeywordToken.new(':',startpos)
-        #end ternary context, if any
-        @parsestack.last.see self,:colon
-        TernaryContext===@parsestack.last and @parsestack.pop #should be in the context's see handler
-        if ExpectDoOrNlContext===@parsestack.last #should be in the context's see handler
+        case @parsestack.last
+        when TernaryContext: @parsestack.pop #should be in the context's see handler
+        when ExpectDoOrNlContext: #should be in the context's see handler
           @parsestack.pop
           assert @parsestack.last.starter[/^(while|until|for)$/]
+          @moretokens.last.as=";"
+        when RescueSMContext:
+          @moretokens.last.as=";"
+        else @moretokens.last.as="then"
         end
-        @moretokens.push KeywordToken.new(':',startpos)
+        #end ternary context, if any
+        @parsestack.last.see self,:colon
         return @moretokens.shift
       end
@@ -1182,9 +1484,15 @@ end
      opmatches=readahead(3)[RUBYSYMOPERATORREX]
      result= opmatches ? read(opmatches.size) :
        case nc=nextchar
-         when ?" then assert notbare;double_quote('"')
-         when ?' then assert notbare;double_quote("'")
-         when ?` then read(1)
+         when ?" #"
+           assert notbare
+           open=':"'; close='"'
+           double_quote('"')
+         when ?' #'
+           assert notbare
+           open=":'"; close="'"
+           single_quote("'")
+         when ?` then read(1) #`
          when ?@ then at_identifier.to_s
          when ?$ then dollar_identifier.to_s
          when ?_,?a..?z then identifier_as_string(?:)
@@ -1197,7 +1505,12 @@ end
            result
          else error= "unexpected char starting symbol: #{nc.chr}"
        end
-     return lexerror(klass.new(result,start),error)
+     result= lexerror(klass.new(result,start,notbare ?  ':' : ''),error)
+     if open
+       result.open=open
+       result.close=close
+     end
+     return result
    end
    def merge_assignment_op_in_setter_callsites?
@@ -1211,12 +1524,12 @@ end
      opmatches=readahead(3)[RUBYSYMOPERATORREX]
      return [opmatches ? read(opmatches.size) :
        case nc=nextchar
-         when ?` then read(1)
+         when ?` then read(1) #`
          when ?_,?a..?z,?A..?Z then
            context=merge_assignment_op_in_setter_callsites? ? ?: : nc
            identifier_as_string(context)
          else
-           @last_operative_token=KeywordToken.new(';')
+           set_last_token KeywordToken.new(';')
            lexerror(tok_to_errify,"unexpected char starting callsite symbol: #{nc.chr}, tok=#{tok_to_errify.inspect}")
            nil
        end, start
@@ -1233,20 +1546,63 @@ end
         ender=til_charset(/[#{quote}]/)
         (quote==getchar) or
           return lexerror(HerePlaceholderToken.new( dash, quote, ender ), "mismatched quotes in here doc")
+        quote_real=true
       else
         quote='"'
         ender=til_charset(/[^a-zA-Z0-9_]/)
         ender.length >= 1  or
-          return lexerror(HerePlaceholderToken.new( dash, quote, ender ), "invalid here header")
+          return lexerror(HerePlaceholderToken.new( dash, quote, ender, nil ), "invalid here header")
       end
-      res= HerePlaceholderToken.new( dash, quote, ender )
+      res= HerePlaceholderToken.new( dash, quote, ender, quote_real )
+if true
+      res.open=["<<",dash,quote,ender,quote].to_s
+      procrastinated=til_charset(/[\n]/)#+readnl
+      unless @base_file
+        @base_file=@file
+        @file=Sequence::List.new([@file])
+        @file.pos=@base_file.pos
+      end
+      #actually delete procrastinated from input
+      @file.delete(input_position_raw-procrastinated.size...input_position_raw)
+      nl=readnl or return lexerror(res, "here header without body (at eof)")
+      @moretokens<< res
+      bodystart=input_position
+      @offset_adjust = @min_offset_adjust+procrastinated.size
+      #was: @offset_adjust += procrastinated.size
+      body=here_body(res)
+      res.close=body.close
+      @offset_adjust = @min_offset_adjust
+      #was: @offset_adjust -= procrastinated.size
+      bodysize=input_position-bodystart
+      #one or two already read characters are overwritten here,
+      #in order to keep offsets correct in the long term
+      #(at present, offsets and line numbers between
+      #here header and its body will be wrong. but they should re-sync thereafter.)
+      newpos=input_position_raw-nl.size
+      #unless procrastinated.empty?
+        @file.modify(newpos,nl.size,procrastinated+nl) #vomit procrastinated text back onto input
+      #end
+      input_position_set newpos
+      #line numbers would be wrong within the procrastinated section
+      @linenum-=1
+      #be nice to get the here body token at the right place in input, too...
+      @pending_here_bodies<< body
+      @offset_adjust-=bodysize#+nl.size
+      return @moretokens.shift
+else
       @incomplete_here_tokens.push res
       #hack: normally this should just be in get1token
       #this fixup is necessary because the call the get1token below
       #makes a recursion.
-      @last_operative_token=res
+      set_last_token res
       safe_recurse { |a|
          assert(a.object_id==@moretokens.object_id)
@@ -1269,7 +1625,7 @@ end
            tok=get1token
            assert(a.equal?( @moretokens))
-           toks<<tok
+           toks<< tok
            EoiToken===tok and lexerror tok, "here body expected before eof"
          end while res.unsafe_to_use
          assert(a.equal?( @moretokens))
@@ -1281,13 +1637,14 @@ end
       #the action continues in newline, where
       #the rest of the here token is read after a
       #newline has been seen and res.affix is eventually called
+end
    end
    #-----------------------------------
    def lessthan(ch) #match quadriop('<') or here doc or spaceship op
       case readahead(3)
-        when /^<<['"`\-a-z0-9_]$/i
-           if quote_expected?(ch) #and @last_operative_token!='class' #not needed?
+        when /^<<['"`\-a-z0-9_]$/i #'
+           if quote_expected?(ch) and not @last_operative_token==='class'
               here_header
            else
               operator_or_methname_token read(2)
@@ -1309,101 +1666,231 @@ end
         error='illegal escape sequence'
       end
-      @moretokens.unshift FileAndLineToken.new(@filename,ln=@linenum,input_position)
-      optional_here_bodies
+      #optimization: when thru with regurgitated text from a here document,
+      #revert back to original unadorned Sequence instead of staying in the List.
+      if @base_file and indices=@file.instance_eval{@start_pos} and
+         (indices[-2]..indices[-1])===@file.pos
+        @base_file.pos=@file.pos
+        @file=@base_file
+        @base_file=nil
+        result="\n"
+      end
+      @offset_adjust=@min_offset_adjust
+      @moretokens.push *optional_here_bodies
+      ln=@linenum
+      @moretokens.push lexerror(EscNlToken.new(@filename,ln-1,result,input_position-result.size), error),
+                       FileAndLineToken.new(@filename,ln,input_position)
+      start_of_line_directives
-      lexerror EscNlToken.new(@filename,ln-1,result,pos), error
+      return @moretokens.shift
    end
    #-----------------------------------
    def optional_here_bodies
+     result=[]
+if true
       #handle here bodies queued up by previous line
-      #(we should be more compatible with dos/mac style newlines...)
+      pos=input_position
+      while body=@pending_here_bodies.shift
+        #body.offset=pos
+        result.push EscNlToken.new(@filename,nil,"\n",body.offset-1)
+        result.push FileAndLineToken.new(@filename,body.ident.line,body.offset)
+        result.push body
+        #result.push NoWsToken.new @pending_here_bodies.empty? ? input_position : @pending_here_bodies.first
+        #result.push FileAndLineToken.new(@filename,@linenum,pos) #position and line num are off
+        body.headtok.line=@linenum-1
+      end
+else
+      #...(we should be more compatible with dos/mac style newlines...)
       while tofill=@incomplete_here_tokens.shift
+        result.push(
+          here_body(tofill),
+          FileAndLineToken.new(@filename,@linenum,input_position)
+        )
+        assert(eof?  || "\r\n"[prevchar])
+        tofill.line=@linenum-1
+      end
+end
+     return result
+   end
+   #-----------------------------------
+   def here_body(tofill)
+         close="\n"
          tofill.string.offset= input_position
+         linecount=1 #for terminator
+         assert("\n"==prevchar)
          loop {
-            assert("\r\n"[prevchar])
+            assert("\n"==prevchar)
             #here body terminator?
-            oldpos= input_position
+            oldpos= input_position_raw
             if tofill.dash
-              til_charset(/[^#{WHSP}]/o)
+              close+=til_charset(/[^#{WHSP}]/o)
+            end
+            break if eof? #this is an error, should be handled better
+            if read(tofill.ender.size)==tofill.ender
+              crs=til_charset(/[^\r]/)||''
+              if nl=readnl
+                close+=tofill.ender+crs+nl
+                break
+              end
             end
-            break if eof?
-            break if read(tofill.ender.size)==tofill.ender and readnl
             input_position_set oldpos
+            assert("\n"==prevchar)
             if tofill.quote=="'"
-              line=til_charset(/[\r\n]/)+readnl
-              line.gsub! "\\\\", "\\"
+              line=til_charset(/[\n]/)
+              unless nl=readnl
+                assert eof?
+                break  #this is an error, should be handled better
+              end
+              line.chomp!("\r")
+              line<< "\n"
+              assert("\n"==prevchar)
+              #line.gsub! "\\\\", "\\"
               tofill.append line
-              assert(line[-1..-1][/[\r\n]/])
+              tofill.string.bs_handler=:squote_heredoc_esc_seq
+              linecount+=1
+              assert("\n"==line[-1,1])
+              assert("\n"==prevchar)
             else
+              assert("\n"==prevchar)
               back1char  #-1 to make newline char the next to read
               @linenum-=1
+              assert /[\r\n]/===nextchar.chr
               #retr evrything til next nl
+if FASTER_STRING_ESCAPES
+              line=all_quote("\r\n", tofill.quote, "\r\n")
+else
               line=all_quote(INET_NL_REX, tofill.quote, INET_NL_REX)
+end
+              linecount+=1
               #(you didn't know all_quote could take a regex, did you?)
+              assert("\n"==prevchar)
               #get rid of fals that otherwise appear to be in the middle of
               #a string (and are emitted out of order)
               fal=@moretokens.pop
               assert FileAndLineToken===fal || fal.nil?
+              assert line.bs_handler
+              tofill.string.bs_handler||=line.bs_handler
+              tofill.append_token line
+              tofill.string.elems<<'' unless String===tofill.string.elems.last
+              assert("\n"==prevchar)
               back1char
               @linenum-=1
               assert("\r\n"[nextchar.chr])
-              tofill.append_token line
               tofill.append readnl
+              assert("\n"==prevchar)
             end
+            assert("\n"==prevchar)
          }
-         assert(eof?  || "\r\n"[prevchar])
+         str=tofill.string
+         str.bs_handler||=:dquote_esc_seq if str.elems.size==1 and str.elems.first==''
          tofill.unsafe_to_use=false
-         tofill.line=@linenum-1
-         @moretokens.push \
-           tofill.bodyclass.new(tofill),
-           FileAndLineToken.new(@filename,@linenum,input_position)
-      end
+         assert str.bs_handler
+           #?? or tofill.string.elems==[]
+        tofill.string.instance_eval{@char="`"} if tofill.quote=="`"
+        #special cased, but I think that's all that's necessary...
+        result=tofill.bodyclass.new(tofill,linecount)
+        result.open=str.open=""
+        tofill.close=close
+        result.close=str.close=close[1..-1]
+        result.offset=str.offset
+        assert str.open
+        assert str.close
+        return result
    end
    #-----------------------------------
    def newline(ch)
       assert("\r\n"[nextchar.chr])
       #ordinary newline handling (possibly implicitly escaped)
       assert("\r\n"[nextchar.chr])
                    assert !@parsestack.empty?
       assert @moretokens.empty?
-      result=if NewlineToken===@last_operative_token or #hack
-                @last_operative_token===/^(;|begin|do|#{INNERBOUNDINGWORDS})$/ or #hack
-                !after_nonid_op?{false}
-             then   #hack-o-rama: probly cases left out above
-                   a= abort_noparens!
-                   ExpectDoOrNlContext===@parsestack.last and @parsestack.pop
-                   assert !@parsestack.empty?
-                   @parsestack.last.see self,:semi
-                   a << super(ch)
-                   @moretokens.replace a+@moretokens
-                   @moretokens.shift
-             else
-                   offset= input_position
-                   nl=readnl
-                   @moretokens << FileAndLineToken.new(@filename,@linenum,input_position)
-                   EscNlToken.new(@filename,@linenum-1,nl,offset)
-                   #WsToken.new ' ' #why?  #should be "\\\n" ?
-             end
-      optional_here_bodies
+      pre=FileAndLineToken.new(@filename,@linenum+1,input_position)
+      pre.allow_ooo_offset=true
+      if NewlineToken===@last_operative_token or #hack
+         (KeywordToken===@last_operative_token and
+          @last_operative_token.ident=="rescue" and
+          !@last_operative_token.infix?)  or
+         #/^(;|begin|do|#{INNERBOUNDINGWORDS})$/ or #hack
+         !after_nonid_op?{false}
+      then   #hack-o-rama: probly cases left out above
+        @offset_adjust=@min_offset_adjust
+        a= abort_noparens!
+        ExpectDoOrNlContext===@parsestack.last and @parsestack.pop
+        assert !@parsestack.empty?
+        @parsestack.last.see self,:semi
+        a << super(ch)
+        @moretokens.replace a+@moretokens
+      else
+        @offset_adjust=@min_offset_adjust
+        offset= input_position
+        nl=readnl
+        @moretokens.push EscNlToken.new(@filename,@linenum-1,nl,offset),
+           FileAndLineToken.new(@filename,@linenum,input_position)
+      end
+      #optimization: when thru with regurgitated text from a here document,
+      #revert back to original unadorned Sequence instead of staying in the list.
+      if @base_file and indices=@file.instance_eval{@start_pos} and
+         (indices[-2]..indices[-1])===@file.pos and Sequence::SubSeq===@file.list.last
+        @base_file.pos=@file.pos
+        @file=@base_file
+        @base_file=nil
+      end
+      fal=@moretokens.last
+      assert FileAndLineToken===fal
+      @offset_adjust=@min_offset_adjust
+      @moretokens.unshift(*optional_here_bodies)
+      result=@moretokens.shift
+      #adjust line count in fal to account for newlines in here bodys
+      i=@moretokens.size-1
+      while(i>=0)
+        #assert FileAndLineToken===@moretokens[i]
+        i-=1 if FileAndLineToken===@moretokens[i]
+        break unless HereBodyToken===@moretokens[i]
+        pre_fal=true
+        fal.line-=@moretokens[i].linecount
+        i-=1
+      end
+      if pre_fal
+        @moretokens.unshift result
+        pre.offset=result.offset
+        result=pre
+      end
       start_of_line_directives
       return result
@@ -1424,15 +1911,16 @@ end
          begin
            eof? and raise "eof before =end"
-           more<<til_charset(/[\r\n]/)
-           more<<readnl
+           more<< til_charset(/[\r\n]/)
+           eof? and raise "eof before =end"
+           more<< readnl
          end until readahead(EQENDLENGTH)==EQEND
          #read rest of line after =end
          more << til_charset(/[\r\n]/)
-         assert((?\r===nextchar or ?\n===nextchar))
+         assert((eof? or ?\r===nextchar or ?\n===nextchar))
          assert !(/[\r\n]/===more[-1,1])
-         more<< readnl
+         more<< readnl unless eof?
 #         newls= more.scan(/\r\n?|\n\r?/)
 #         @linenum+= newls.size
@@ -1445,7 +1933,7 @@ end
       #handle __END__
       if ENDMARKER===readahead(ENDMARKERLENGTH)
          assert !(ImplicitContext===@parsestack.last)
-         @moretokens.unshift endoffile_detected(read(7))
+         @moretokens.unshift endoffile_detected(read(ENDMARKERLENGTH))
 #         input_position_set @file.size
       end
    end
@@ -1460,11 +1948,13 @@ end
   def unary_op_expected?(ch) #yukko hack
     '*&='[readahead(2)[1..1]] and return false
+    return true if KeywordToken===@last_operative_token and @last_operative_token==='for'
     after_nonid_op? {
       #possible func-call as operator
       not is_var_name? and
-        WHSPLF[prevchar]
+        WHSPLF[prevchar] and !WHSPLF[readahead(2)[1..1]]
     }
   end
@@ -1473,11 +1963,6 @@ end
    # <<, %, ? in ruby
    #returns whether current token is to be the start of a literal
    def quote_expected?(ch) #yukko hack
-     if AssignmentContext===@parsestack.last
-       @parsestack.pop
-       return false
-     end
      case ch[0]
           when ?? then readahead(2)[/^\?[#{WHSPLF}]$/o] #not needed?
           when ?% then readahead(3)[/^%([a-pt-vyzA-PR-VX-Z]|[QqrswWx][a-zA-Z0-9])/]
@@ -1500,17 +1985,23 @@ end
    #used to resolve the ambiguity of
    # <<, %, /, ?, :, and newline (among others) in ruby
    def after_nonid_op?
+    #this is how it should be, I think, and then no handlers for methnametoken and FUNCLIKE_KEYWORDS are needed
+#      if ImplicitParamListStartToken===@last_token_including_implicit
+#        huh return true
+#      end
       case @last_operative_token
-         when MethNameToken, FUNCLIKE_KEYWORDS.token_pat ,VarNameToken
+         when VarNameToken , MethNameToken, FUNCLIKE_KEYWORDS.token_pat
          #VarNameToken should really be left out of this case...
          #should be in next branch instread
          #callers all check for last token being not a variable if they pass anything
-         #but {false} in the block
+         #but {false} in the block
+         #(hmmm... some now have true or other non-varname checks in them... could these be bugs?)
             return yield
          when StringToken, SymbolToken, NumberToken, HerePlaceholderToken,
               %r{^(
-                class|module|end|self|true|false|nil|
-                __FILE__|__LINE__|[\})\]]|alias|(un)?def|for
+                end|self|true|false|nil|
+                __FILE__|__LINE__|[\})\]]
               )$}x.token_pat
             #dunno about def/undef
             #maybe class/module shouldn't he here either?
@@ -1522,17 +2013,16 @@ end
             #assert(@last_operative_token==$&) #disabled 'cause $& is now always nil :(
             return true
          when NewlineToken, nil,   #nil means we're still at beginning of file
-              /^([({\[]|or|not|and|if|unless|then|elsif|else|
-                 while|until|begin|for|in|case|when|ensure)$
+              /^([({\[]|or|not|and|if|unless|then|elsif|else|class|module|def|
+                 while|until|begin|for|in|case|when|ensure|defined\?)$
               /x.token_pat
             return true
-         #when KeywordToken
-         #   return true
+         when KeywordToken
+            return true if /^(alias|undef)$/===@last_operative_token.ident  #is this ever actually true???
          when IgnoreToken
             raise "last_operative_token shouldn't be ignoreable"
-         else
-            raise "after_nonid_op? after #{@last_operative_token}:#{@last_operative_token.class} -- now what"
       end
+      raise "after_nonid_op? after #{@last_operative_token}:#{@last_operative_token.class} -- now what"
    end
@@ -1577,10 +2067,10 @@ end
    #-----------------------------------
    def biop(ch) #match /%=?/ (% or %=)
-      assert(ch[/^[%^~]$/])
+      assert(ch[/^[%^]$/])
       result=getchar
       if eat_next_if(?=)
-         result <<?=
+         result << ?=
       end
       return operator_or_methname_token( result)
    end
@@ -1610,7 +2100,9 @@ end
    #fixme: handle +@ and -@ here as well... (currently, this is done in symbol()?)
    def plusminus(ch)
       assert(/^[+\-]$/===ch)
-      if unary_op_expected?(ch)
+      if unary_op_expected?(ch) or
+         KeywordToken===@last_operative_token &&
+         /^(return|break|next)$/===@last_operative_token.ident
         if (?0..?9)===readahead(2)[1]
           return number(ch)
         else #unary operator
@@ -1619,7 +2111,6 @@ end
             @moretokens << NoWsToken.new(input_position)
           result=(operator_or_methname_token result)
           result.unary=true
-          #todo: result should distinguish unary+binary +-
         end
       else #binary operator
          assert(! want_op_name)
@@ -1628,9 +2119,8 @@ end
             result << ?=
          end
          result=(operator_or_methname_token result)
-         #todo: result should distinguish unary+binary +-
       end
-      result
+      return result
    end
    #-----------------------------------
@@ -1642,19 +2132,31 @@ end
       str << c
       result= operator_or_methname_token( str,offset)
       case c
-      when '=': str<< (eat_next_if(?=)or'')
+      when '=': #===,==
+        str<< (eat_next_if(?=)or'')
-      when '>':
+      when '>': #=>
         unless ParamListContextNoParen===@parsestack.last
           @moretokens.unshift result
           @moretokens.unshift( *abort_noparens!("=>"))
           result=@moretokens.shift
         end
         @parsestack.last.see self,:arrow
-      when '': #record local variable definitions
+      when '': #plain assignment: record local variable definitions
+        last_context_not_implicit.lhs=false
+        @moretokens.push *ignored_tokens(true).map{|x|
+          NewlineToken===x ? EscNlToken.new(@filename,@linenum,x.ident,x.offset) : x
+        }
         @parsestack.push AssignmentRhsContext.new(@linenum)
-        @moretokens.unshift AssignmentRhsListStartToken.new( offset+1)
+        if eat_next_if ?*
+          tok=OperatorToken.new('*', input_position-1)
+          tok.unary=true
+          @moretokens.push tok
+          WHSPLF[nextchar.chr] or
+            @moretokens << NoWsToken.new(input_position)
+          comma_in_lvalue_list? #is this needed?
+        end
+        @moretokens.push AssignmentRhsListStartToken.new( input_position)
       end
       return result
    end
@@ -1666,6 +2168,7 @@ end
       k=eat_next_if(/[~=]/)
       if k
         result+=k
+      elsif eof?: #do nothing
       else
         WHSPLF[nextchar.chr] or
           @moretokens << NoWsToken.new(input_position)
@@ -1693,10 +2196,11 @@ end
    #-----------------------------------
    def dot_rhs(prevtok)
       safe_recurse { |a|
-         @last_operative_token=prevtok
+         set_last_token prevtok
          aa= ignored_tokens
+         was=after_nonid_op?{true}
          tok,pos=callsite_symbol(prevtok)
-         tok and aa.push(*var_or_meth_name(tok,prevtok,pos))
+         tok and aa.push(*var_or_meth_name(tok,prevtok,pos,was))
          a.unshift(*aa)
       }
    end
@@ -1705,7 +2209,7 @@ end
   def back_quote(ch=nil)
     if @last_operative_token===/^(def|::|\.)$/
       oldpos= input_position
-      MethNameToken.new(eat_next_if(?`), oldpos)
+      MethNameToken.new(eat_next_if(?`), oldpos) #`
     else
       double_quote(ch)
     end
@@ -1716,7 +2220,7 @@ if false
    def comment(str)
      result=""
      #loop{
-       result<<super(nil).to_s
+       result<< super(nil).to_s
        if /^\#.*\#$/===result #if comment was ended by a crunch
@@ -1762,7 +2266,7 @@ end
                tokch= NoWsToken.new(input_position-1)
         end
       when '('
-        lasttok=last_operative_token
+        lasttok=last_token_maybe_implicit #last_operative_token
         #could be: lasttok===/^[a-z_]/i
         if (VarNameToken===lasttok or MethNameToken===lasttok or
             lasttok===FUNCLIKE_KEYWORDS)
@@ -1781,15 +2285,17 @@ end
       if after_nonid_op?{false} or @last_operative_token.has_no_block?
         @parsestack.push ListImmedContext.new(ch,@linenum) #that is, a hash
       else
+        #abort_noparens!
         tokch.set_infix!
-=begin not needed now, i think
+        tokch.as="do"
+#=begin not needed now, i think
         # 'need to find matching callsite context and end it if implicit'
         lasttok=last_operative_token
-        unless lasttok===')' and lasttok.callsite?
+        if !(lasttok===')' and lasttok.callsite?) #or ParamListContextNoParen===parsestack.last
           @moretokens.push *(abort_1_noparen!(1).push tokch)
           tokch=@moretokens.shift
         end
-=end
+#=end
         localvars.start_block
         @parsestack.push BlockContext.new(@linenum)
@@ -1811,13 +2317,18 @@ end
       end
       ctx=@parsestack.pop
       origch,line=ctx.starter,ctx.linenum
-      ch==PAIRS[origch] or
+      if ch!=PAIRS[origch]
+        #kw.extend MismatchedBrace
         lexerror kw,"mismatched braces: #{origch}#{ch}\n" +
                  "matching brace location", @filename, line
-      BlockContext===ctx and localvars.end_block
+      end
+      if BlockContext===ctx
+        localvars.end_block
+        @moretokens.last.as="end"
+      end
       if ParamListContext==ctx.class
         assert ch==')'
-        #kw.set_callsite! #not needed?
+        kw.set_callsite! #not needed?
       end
       return @moretokens.shift
    end
@@ -1826,19 +2337,24 @@ end
    def eof(ch=nil)
      #this must be the very last character...
      oldpos= input_position
-     assert(?\0==getc)
+     assert(/\A[\x0\x4\x1a]\Z/===nextchar.chr)
-     result= "\0#{ignored_tokens(true).delete_if{|t|FileAndLineToken===t}}"
+     result=@file.read!
+#     result= "\0#{ignored_tokens(true).delete_if{|t|FileAndLineToken===t}}"
-     eof? or
-        lexerror result,'nul character is not at the end of file'
-     input_position_set @file.size
+#     eof? or
+#        lexerror result,'nul character is not at the end of file'
+#     input_position_set @file.size
      return(endoffile_detected result)
    end
    #-----------------------------------
    def endoffile_detected(s='')
      @moretokens.push( *(abort_noparens!.push super(s)))
+     if @progress_thread
+       @progress_thread.kill
+       @progress_thread=nil
+     end
      result= @moretokens.shift
      balanced_braces? or (lexerror result,"unbalanced braces at eof. parsestack=#{@parsestack.inspect}")
      result
@@ -1851,7 +2367,26 @@ end
   #-----------------------------------
   def comma(ch)
-    single_char_token(ch)
+    @moretokens.push token=single_char_token(ch)
+    if AssignmentRhsContext===@parsestack[-1] and
+       ParamListContext===@parsestack[-2] ||
+       ParamListContextNoParen===@parsestack[-2] ||
+       WhenParamListContext===@parsestack[-2] ||
+       (RescueSMContext===@parsestack[-2] && @parsestack[-2].state==:rescue) ||
+       (DefContext===@parsestack[-2] && !@parsestack[-2].in_body)
+         @parsestack.pop
+         @moretokens.unshift AssignmentRhsListEndToken.new(input_position)
+    end
+    token.comma_type=
+    case @parsestack[-1]
+    when AssignmentRhsContext: :rhs
+    when ParamListContext,ParamListContextNoParen: :call
+    when ListImmedContext: :array
+    else
+      :lhs if comma_in_lvalue_list?
+    end
+    @parsestack.last.see self,:comma
+    return @moretokens.shift
   end
   #-----------------------------------
@@ -1872,7 +2407,7 @@ end
     assert RUBYOPERATORREX===s
     if RUBYNONSYMOPERATORREX===s
       KeywordToken
-    elsif @last_operative_token===/^(\.|::|def|undef|alias|defined\?)$/
+    elsif want_op_name
       MethNameToken
     else
       OperatorToken
@@ -1882,9 +2417,7 @@ end
   #-----------------------------------
   #tokenify_results_of  :identifier
   save_offsets_in(*CHARMAPPINGS.values.uniq-[
-    :symbol_or_op,:open_brace,:whitespace,:exclam,:backquote
+    :symbol_or_op,:open_brace,:whitespace,:exclam,:backquote
   ])
   #save_offsets_in :symbol