RubyGems - rubylexer - Versions diffs - 0.7.0 → 0.7.1 - Mend

rubylexer 0.7.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (80) hide show

data/History.txt +90 -0
data/Manifest.txt +54 -3
data/README.txt +4 -7
data/Rakefile +3 -2
data/lib/rubylexer.rb +856 -323
data/lib/rubylexer/0.7.0.rb +11 -2
data/lib/rubylexer/0.7.1.rb +2 -0
data/lib/rubylexer/charhandler.rb +4 -4
data/lib/rubylexer/context.rb +86 -9
data/lib/rubylexer/rulexer.rb +455 -101
data/lib/rubylexer/token.rb +166 -43
data/lib/rubylexer/tokenprinter.rb +16 -8
data/lib/rubylexer/version.rb +1 -1
data/rubylexer.vpj +98 -0
data/test/code/all_the_gems.rb +33 -0
data/test/code/all_the_raas.rb +226 -0
data/test/code/all_the_rubies.rb +2 -0
data/test/code/deletewarns.rb +19 -1
data/test/code/dumptokens.rb +39 -8
data/test/code/errscan +2 -0
data/test/code/isolate_error.rb +72 -0
data/test/code/lexloop +14 -0
data/test/code/locatetest.rb +150 -8
data/test/code/regression.rb +109 -0
data/test/code/rubylexervsruby.rb +53 -15
data/test/code/strgen.rb +138 -0
data/test/code/tarball.rb +144 -0
data/test/code/testcases.rb +11 -0
data/test/code/tokentest.rb +115 -24
data/test/data/__eof2.rb +1 -0
data/test/data/__eof5.rb +2 -0
data/test/data/__eof6.rb +2 -0
data/test/data/cvtesc.rb +17 -0
data/test/data/g.rb +6 -0
data/test/data/hd0.rb +3 -0
data/test/data/hdateof.rb +2 -0
data/test/data/hdempty.rb +3 -0
data/test/data/hdr.rb +9 -0
data/test/data/hdr_dos.rb +13 -0
data/test/data/hdr_dos2.rb +18 -0
data/test/data/heart.rb +2 -0
data/test/data/here_escnl.rb +25 -0
data/test/data/here_escnl_dos.rb +20 -0
data/test/data/here_squote.rb +3 -0
data/test/data/heremonsters.rb +140 -0
data/test/data/heremonsters.rb.broken +68 -0
data/test/data/heremonsters.rb.broken.save +68 -0
data/test/data/heremonsters_dos.rb +140 -0
data/test/data/heremonsters_dos.rb.broken +68 -0
data/test/data/illegal_oneliners.rb +1 -0
data/test/data/illegal_stanzas.rb +0 -0
data/test/data/make_ws_strdelim.rb +22 -0
data/test/data/maven2_builer_test.rb +82 -0
data/test/data/migration.rb +8944 -0
data/test/data/modl.rb +6 -0
data/test/data/modl_dos.rb +7 -0
data/test/data/modl_fails.rb +10 -0
data/test/data/multilinestring.rb +6 -0
data/test/data/oneliners.rb +555 -0
data/test/data/p-op.rb +2 -0
data/test/data/p.rb +3 -1710
data/test/data/s.rb +90 -21
data/test/data/simple.rb +1 -0
data/test/data/simple_dos.rb +1 -0
data/test/data/stanzas.rb +1194 -0
data/test/data/strdelim_crlf.rb +6 -0
data/test/data/stuff.rb +6 -0
data/test/data/stuff2.rb +5 -0
data/test/data/stuff3.rb +6 -0
data/test/data/stuff4.rb +6 -0
data/test/data/tkweird.rb +20 -0
data/test/data/unending_stuff.rb +5 -0
data/test/data/whatnot.rb +8 -0
data/test/data/ws_strdelim.rb +0 -0
data/test/test.sh +239 -0
data/testing.txt +39 -50
metadata +110 -12
data/test/code/dl_all_gems.rb +0 -43
data/test/code/unpack_all_gems.rb +0 -15
data/test/data/gemlist.txt +0 -280

data/History.txt CHANGED Viewed

@@ -1,3 +1,93 @@
+=== 0.7.1/10-29-2008
+* 6 Major Enhancements:
+  * handling of empty string fragments now more closely mirrors ruby; this resolves many warnings
+  * yet more hacks in aid of string inclusions
+  * backslashes in strings are no longer interpreted automatically when lexed
+  * here documents are completely rewritten in a tricky way that more closely mimics what MRI does
+  * many more flags for tokens to tell apart the various cases:
+    * the various different local variable types have to be detected.
+    * colons which operate like semicolons or thens are marked as such
+    * { } used in block now flagged as parsing like do and end
+    * commas now are marked with different types depending on how they're used
+    * @variables in methods need to be marked as such, so their parsetree can come out different.
+    * clearly mark backquoted strings
+  * further refinements of local variable detection and implicit paren placement near these cases:
+    * when ws between method name and parenthesis
+    * break/return/next
+    * ? : << / rescue do
+* 5 Minor Enhancements
+  * colon or star in assignment make it a multi assignment
+  * presence of unary * or & in param list forces it to be a multi-param list
+  * errors in string inclusions should now be handled better
+  * string and stringlike tokens now can tell you the exact sequence of chars used to open and close the string.
+  * correctly handling more cases where return/break/next parses different than a method (yuck!)
+* 26 Bugfixes
+  * ~ operator can be followed with an @, like + and -
+  * ~ is overridable, however :: is not
+  * raise is not a keyword
+  * in addition to 0x00, 0x04 and 0x1a should be considered eof in ruby. why? idunno.
+  * setting PROGRESS env var will cause input file position to be printed to stderr periodically.
+  * defined? is not a funclike keyword... really more of a unary operator
+  * $- is a legitimate global variable.
+  * better parsing of lvalue list following for keyword.
+  * rescue is a variable define context only when right after => and before then (or disguises).
+  * better placement of implicit parens around def param list
+  * (global) variable aliasing now supported
+  * local vars in END block are NOT scoped to the block!
+  * local vars in def param lists aren't considered variables til after the initializer for that var
+  * end of def header is treated like ; even if none is present
+  * never put here document right after class keyword
+  * look for start of line directives at end of here document
+  * oops, mac newlines don't have to be supported
+  * dos newlines better tolerated around here documents
+  * less line number/offset confusion around here documents
+  * newline after (non-operator) rescue is hard (but not after INNERBOUNDINGWORDS)
+  * handling eof in more strange places
+  * always expect unary op after for
+  * unary ops should know about the before-but-not-after rule!
+  * newlines after = should be escaped
+  * \c? and \C-? are not interpreted the same as other ctrl chars
+  * \n\r  and \r are not recognized as nl sequences
+* 18 Internal Changes (not user visible)
+  * commas cause a :comma event on the parsestack
+  * some of the lists of types of operators are available now as arrays of strings instead of regexps
+  * single and double quote now have separate implementations again
+  * keep track of whether an implicit open or close paren has just been emitted
+  * put ws around << to keep slickedit happy
+  * the eof characters are also considered whitespace.
+  * identifier lexer now uses regexps more heavily
+  * method formal parameter list is not considered an lvalue context for commas.
+  * class and def now have their own parse contexts
+  * unary star causes a :splat event on the parsestack
+  * is_var_name now detects var tokens just from the token type, not looking at local vars table.
+  * a faster regexp-based implementation of string scanning
+  * moved yucky side effect out of quote_expected?
+  * these keywords: class module def for defined?  no longer automatically create operator context
+  * a new context for BEGIN/END keywords
+  * a new context for param list of return/next/break
+  * new escape sequence processors for regexp and %W list
+  * numbers now scanned with a regexp
+* 15 Enhancements and bug fixes to tests:
+  * just print a notice on errors which are also syntax errors for ruby
+  * a little cleanup of temp files
+  * rubylexervsruby and tokentest can take input from stdin
+  * unlexer improvements
+  * dumptokens now has a --silent cmdline option
+  * locatetest.rb is significantly enhanced
+  * --unified option to diff seems to work better than -u
+  * tokentest better verifies exact token contents...
+  * tokentest now uses open and close fields of strings to verify string bounds exactly
+  * CRLF in a string is always treated like just a LF. (CR is elided.)
+  * allow_ooo hacky flag marks tokens whose offset errors are to be ignored.
+  * all other offset errors have been downgraded to warnings.
+  * most of the offset problem I had been seeing have been fixed, tho
+  * offset problems in here head and body, symbol and fal tokens are always ignored (a hack)
+  * tokentest has a --loop option, for load testing
 === 0.7.0/2-15-2008
 * implicit tokens are now emitted at the right times (need more test code)
 * local variables are now temporarily hidden by class, module, and def

data/Manifest.txt CHANGED Viewed

@@ -19,7 +19,6 @@ lib/rubylexer/symboltable.rb
 lib/rubylexer/charhandler.rb
 lib/assert.rb
 lib/rubylexer.rb
-test/data/gemlist.txt
 test/data/blockassigntest.rb
 test/data/for.rb
 test/data/chunky_bacon.rb
@@ -58,10 +57,62 @@ test/data/chunky_bacon2.rb
 test/data/format.rb
 test/code/locatetest.rb
 test/code/rubylexervsruby.rb
-test/code/dl_all_gems.rb
-test/code/unpack_all_gems.rb
 test/code/tokentest.rb
 test/code/dumptokens.rb
 test/code/torment
 test/code/locatetest
 test/code/deletewarns.rb
+lib/rubylexer/0.7.1.rb
+rubylexer.vpj
+test/code/all_the_gems.rb
+test/code/all_the_raas.rb
+test/code/all_the_rubies.rb
+test/code/errscan
+test/code/isolate_error.rb
+test/code/lexloop
+test/code/regression.rb
+test/code/strgen.rb
+test/code/tarball.rb
+test/code/testcases.rb
+test/data/chunky.plain.rb
+test/data/cvtesc.rb
+test/data/__eof2.rb
+test/data/__eof5.rb
+test/data/__eof6.rb
+test/data/hd0.rb
+test/data/hdateof.rb
+test/data/hdempty.rb
+test/data/hdr_dos2.rb
+test/data/hdr_dos.rb
+test/data/hdr.rb
+test/data/here_escnl_dos.rb
+test/data/here_escnl.rb
+test/data/heremonsters_dos.rb
+test/data/heremonsters_dos.rb.broken
+test/data/heremonsters.rb
+test/data/heremonsters.rb.broken
+test/data/heremonsters.rb.broken.save
+test/data/here_squote.rb
+test/data/illegal_oneliners.rb
+test/data/illegal_stanzas.rb
+test/data/make_ws_strdelim.rb
+test/data/maven2_builer_test.rb
+test/data/migration.rb
+test/data/modl_dos.rb
+test/data/modl_fails.rb
+test/data/modl.rb
+test/data/multilinestring.rb
+test/data/oneliners.rb
+test/data/simple_dos.rb
+test/data/simple.rb
+test/data/stanzas.rb
+test/data/strdelim_crlf.rb
+test/data/stuff2.rb
+test/data/stuff3.rb
+test/data/stuff4.rb
+test/data/stuff.rb
+test/data/tkweird.rb
+test/data/unending_stuff.rb
+test/data/whatnot.rb
+test/data/ws_strdelim.rb
+test/test.sh

data/README.txt CHANGED Viewed

@@ -67,10 +67,7 @@ keywords, depending on context:
   any overrideable operator and most keywords can also be method names
 == todo
-test w/ more code (rubygems, rpa, obfuscated ruby contest, rubicon, others?)
-these 5 should be my standard test suite: p.rb, (matz') test.rb, tk.rb, obfuscated ruby contest, rubicon
 test more ways: cvt source to dos or mac fmt before testing
-test more ways: run unit tests after passing thru rubylexer (0.7)
 test more ways: test require'd, load'd, or eval'd code as well (0.7)
 lex code a line (or chunk) at a time and save state for next line (irb wants this) (0.8)
 incremental lexing (ides want this (for performance))
@@ -78,12 +75,10 @@ put everything in a namespace
 integrate w/ other tools...
 html colorized output?
 move more state onto @parsestack (ongoing)
-the new cases in p.rb now compile, but won't run
 expand on test documentation
 use want_op_name more
 return result as a half-parsed tree (with parentheses and the like matched)
 emit advisory tokens when see beginword, then (or equivalent), or end... what else does florian want?
-strings are still slow
 emit advisory tokens when local var defined/goes out of scope (or hidden/unhidden?)
 token pruning in dumptokens...
@@ -96,8 +91,10 @@ string tokenization sometimes a little different from ruby around newlines
 string contents might not be correctly translated in a few cases (0.8?)
 symbols which contain string interpolations are flattened into one token. eg :"foo#{bar}" (0.8)
 '\r' whitespace sometimes seen in dos-formatted output.. shouldn't be (eg pre.rb) (0.7)
-windows or mac newline in source are likely to cause problems in obscure cases (need test case)
+windows newline in source is likely to cause problems in obscure cases (need test case)
 unterminated =begin is not an error (0.8)
 ruby 1.9 completely unsupported (0.9)
 character sets other than ascii are not supported at all (1.0)
+regression test currently shows 14 errors with differences in exact token ordering
+-around string inclusions. these errors are much less serious than they seem.
+offset of AssignmentRhsListEndToken appears to be off by 1

data/Rakefile CHANGED Viewed

@@ -13,12 +13,13 @@ require 'lib/rubylexer/version.rb'
    hoe=Hoe.new("rubylexer", RubyLexer::VERSION) do |_|
      _.author = "Caleb Clausen"
      _.email = "rubylexer-owner @at@ inforadical .dot. net"
-     _.url = "http://rubylexer.rubyforge.org/"
-     _.extra_deps = ["sequence"]
+     _.url = ["http://rubylexer.rubyforge.org/", "http://rubyforge.org/projects/rubylexer/"]
+     _.extra_deps << ['sequence', '>= 0.2.0']
      _.test_globs=["test/{code/*,data/*rb*,results/}"]
      _.description=desc
      _.summary=desc[/\A[^.]+\./]
      _.spec_extras={:bindir=>''}
+     _.rdoc_pattern=/\A(howtouse\.txt|testing\.txt|README\.txt|lib\/.*\.rb)\Z/
    end

data/lib/rubylexer.rb CHANGED Viewed

@@ -1,6 +1,6 @@
-=begin copyright
+=begin legal crap
     rubylexer - a ruby lexer written in ruby
-    Copyright (C) 2004,2005  Caleb Clausen
+    Copyright (C) 2004,2005,2008  Caleb Clausen
     This library is free software; you can redistribute it and/or
     modify it under the terms of the GNU Lesser General Public
@@ -18,7 +18,6 @@
 =end
 require 'rubylexer/rulexer' #must be 1st!!!
 require 'rubylexer/version'
 require 'rubylexer/token'
@@ -32,9 +31,11 @@ require 'rubylexer/tokenprinter'
 #-----------------------------------
 class RubyLexer
   include NestedContexts
    RUBYSYMOPERATORREX=
-      %r{^([&|^/%~]|=(==?|~)|>[=>]?|<(<|=>?)?|[+\-]@?|\*\*?|\[\]=?)}
+      %r{^([&|^/%]|=(==?)|=~|>[=>]?|<(<|=>?)?|[+~\-]@?|\*\*?|\[\]=?)}
       # (nasty beastie, eh?)
       #these are the overridable operators
       #does not match flow-control operators like: || && ! or and if not
@@ -42,23 +43,25 @@ class RubyLexer
       #or .. ... ?:
       #for that use:
    RUBYNONSYMOPERATORREX=
-      %r{^([%^~/\-+|&]=|(\|\||&&)=?|(<<|>>|\*\*?)=|\.{1,3}|[?:,;]|=>?|![=~]?)$}
+      %r{^([%^/\-+|&]=|(\|\||&&)=?|(<<|>>|\*\*?)=|\.{1,3}|[?:,;]|::|=>?|![=~]?)$}
    RUBYOPERATORREX=/#{RUBYSYMOPERATORREX}|#{RUBYNONSYMOPERATORREX}/o
    UNSYMOPS=/^[~!]$/ #always unary
    UBSYMOPS=/^([*&+-]|::)$/  #ops that could be unary or binary
    WHSPCHARS=WHSPLF+"\\#"
-   OPORBEGINWORDS="(if|unless|while|until)"
-   BEGINWORDS=/^(def|class|module|begin|for|case|do|#{OPORBEGINWORDS})$/o
-   FUNCLIKE_KEYWORDS=/^(break|next|redo|return|raise|yield|defined\?|retry|super|BEGIN|END)$/
+   OPORBEGINWORDLIST=%w(if unless while until)
+   BEGINWORDLIST=%w(def class module begin for case do)+OPORBEGINWORDLIST
+   OPORBEGINWORDS="(#{OPORBEGINWORDLIST.join '|'})"
+   BEGINWORDS=/^(#{BEGINWORDLIST.join '|'})$/o
+   FUNCLIKE_KEYWORDS=/^(break|next|redo|return|yield|retry|super|BEGIN|END)$/
    VARLIKE_KEYWORDS=/^(__FILE__|__LINE__|false|nil|self|true)$/
    INNERBOUNDINGWORDS="(else|elsif|ensure|in|then|rescue|when)"
    BINOPWORDS="(and|or)"
-   NEVERSTARTPARAMLISTWORDS=/^(#{OPORBEGINWORDS}|#{INNERBOUNDINGWORDS}|#{BINOPWORDS}|end)([^a-zA-Z0-9_!?=]|\Z)/o
+   NEVERSTARTPARAMLISTWORDS=/\A(#{OPORBEGINWORDS}|#{INNERBOUNDINGWORDS}|#{BINOPWORDS}|end)([^a-zA-Z0-9_!?=]|\Z)/o
    NEVERSTARTPARAMLISTFIRST=CharSet['aoeitrwu']  #chars that begin NEVERSTARTPARAMLIST
    NEVERSTARTPARAMLISTMAXLEN=7     #max len of a NEVERSTARTPARAMLIST
    RUBYKEYWORDS=%r{
-     ^(alias|#{BINOPWORDS}|not|undef|end|
+     ^(alias|#{BINOPWORDS}|defined\?|not|undef|end|
        #{VARLIKE_KEYWORDS}|#{FUNCLIKE_KEYWORDS}|
        #{INNERBOUNDINGWORDS}|#{BEGINWORDS}
      )$
@@ -72,8 +75,9 @@ class RubyLexer
          ?A..?Z => :identifier,
          ?_     => :identifier,
          ?0..?9 => :number,
-         %{"'} => :double_quote,
-         ?` => :back_quote,
+         ?" => :double_quote,        #"
+         ?' => :single_quote,        #'
+         ?` => :back_quote,          #`
          WHSP => :whitespace, #includes \r
          ?, => :comma,
@@ -99,7 +103,9 @@ class RubyLexer
          #?\r => :newline, #implicitly escaped after op
          ?\\ => :escnewline,
-         ?\0 => :eof,
+         ?\x00 => :eof,
+         ?\x04 => :eof,
+         ?\x1a => :eof,
          "[({" => :open_brace,
          "])}" => :close_brace,
@@ -108,41 +114,90 @@ class RubyLexer
          ?# => :comment
    }
-   attr_reader :incomplete_here_tokens, :parsestack
+   attr_reader :incomplete_here_tokens, :parsestack, :last_token_maybe_implicit
    #-----------------------------------
-   def initialize(filename,file,linenum=1)
-      super(filename,file, linenum)
+   def initialize(filename,file,linenum=1,offset_adjust=0)
+      @offset_adjust=0 #set again in next line
+      super(filename,file, linenum,offset_adjust)
       @start_linenum=linenum
       @parsestack=[TopLevelContext.new]
-      @incomplete_here_tokens=[]
+      @incomplete_here_tokens=[] #not used anymore
+      @pending_here_bodies=[]
       @localvars_stack=[SymbolTable.new]
       @defining_lvar=nil
       @in_def_name=false
+      @last_operative_token=nil
+      @last_token_maybe_implicit=nil
       @toptable=CharHandler.new(self, :illegal_char, CHARMAPPINGS)
       start_of_line_directives
+      progress_printer
+   end
+   def progress_printer
+     return unless ENV['RL_PROGRESS']
+     $stderr.puts 'printing progresses'
+     @progress_thread=Thread.new do
+       until EoiToken===@last_operative_token
+         sleep 10
+         $stderr.puts @file.pos
+       end
+     end
    end
    def localvars;
      @localvars_stack.last
    end
+   attr :localvars_stack
+   attr :offset_adjust
+   attr_writer :pending_here_bodies
+   #-----------------------------------
+   def set_last_token(tok)
+     @last_operative_token=@last_token_maybe_implicit=tok
+   end
    #-----------------------------------
    def get1token
       result=super  #most of the action's here
+      if ENV['PROGRESS']
+      @last_cp_pos||=0
+      @start_time||=Time.now
+      if result.offset-@last_cp_pos>100000
+        $stderr.puts "#{result.offset} #{Time.now-@start_time}"
+        @last_cp_pos=result.offset
+      end
+      end
       #now cleanup and housekeeping
       #check for bizarre token types
       case result
+      when ImplicitParamListStartToken, ImplicitParamListEndToken
+          @last_token_maybe_implicit=result
+          result
       when StillIgnoreToken#,nil
           result
+      when StringToken
+          set_last_token result
+          assert !(IgnoreToken===@last_operative_token)
+          result.elems.map!{|frag|
+            if String===frag
+              result.translate_escapes(frag)
+            else
+              frag
+            end
+          } if AUTO_UNESCAPE_STRINGS
+          result
       when Token#,String
-          @last_operative_token=result
+          set_last_token result
           assert !(IgnoreToken===@last_operative_token)
           result
       else
@@ -150,6 +205,20 @@ class RubyLexer
       end
    end
+   #-----------------------------------
+   def eof?
+     super or EoiToken===@last_operative_token
+   end
+   #-----------------------------------
+   def input_position
+     super+@offset_adjust
+   end
+   #-----------------------------------
+   def input_position_raw
+     @file.pos
+   end
    #-----------------------------------
    def balanced_braces?
@@ -163,7 +232,7 @@ class RubyLexer
       s=eat_next_if(?$) or return nil
       if t=((identifier_as_string(?$) or special_global))
-        s<<t
+        s << t
       else error= "missing $id name"
       end
@@ -173,17 +242,27 @@ class RubyLexer
    #-----------------------------------
    def at_identifier(ch=nil)
       result =  (eat_next_if(?@) or return nil)
-      result << (eat_next_if(?@)or'')
+      result << (eat_next_if(?@) or '')
       if t=identifier_as_string(?@)
-        result<<t
+        result << t
       else error= "missing @id name"
       end
-      return lexerror(VarNameToken.new(result),error)
+      result=VarNameToken.new(result)
+      result.in_def=true if inside_method_def?
+      return lexerror(result,error)
    end
 private
    #-----------------------------------
-   def here_spread_over_ruby_code(rl,tok)
+   def inside_method_def?
+     @parsestack.reverse_each{|ctx|
+       ctx.starter=='def' and ctx.state!=:saw_def and return true
+     }
+     return false
+   end
+   #-----------------------------------
+   def here_spread_over_ruby_code(rl,tok) #not used anymore
      assert(!rl.incomplete_here_tokens.empty?)
      @incomplete_here_tokens += rl.incomplete_here_tokens
    end
@@ -207,10 +286,10 @@ private
   end
   #-----------------------------------
-  WSCHARSET=/[#\\\n\s\t\v\r\f]/
+  WSCHARSET=/[#\\\n\s\t\v\r\f\x00\x04\x1a]/
   def ignored_tokens(allow_eof=false,allow_eol=true)
     result=[]
-    result<<@moretokens.shift while StillIgnoreToken===@moretokens.first
+    result << @moretokens.shift while StillIgnoreToken===@moretokens.first
     @moretokens.empty? or return result
     loop do
       unless @moretokens.empty?
@@ -273,8 +352,8 @@ private
       result = ((
       #order matters here, but it shouldn't
       #(but til_charset must be last)
-         eat_next_if(/[!@&+`'=~\/\\,.;<>*"$?:]/) or
-         (eat_next_if('-') and ("-"+getchar)) or
+         eat_if(/-[a-z0-9_]/i,2) or
+         eat_next_if(/[!@&+`'=~\-\/\\,.;<>*"$?:]/) or
          (?0..?9)===nextchar ? til_charset(/[^\d]/) : nil
       ))
    end
@@ -289,23 +368,26 @@ private
       #just asserts because those contexts are never encountered.
       #control goes through symbol(<...>,nil)
       assert( /^[a-z_]$/i===context)
-      assert !(@last_operative_token===/^(\.|::|(un)?def|alias)$/)
+      assert MethNameToken===@last_operative_token || !(@last_operative_token===/^(\.|::|(un)?def|alias)$/)
-      @moretokens.unshift(*parse_keywords(str,oldpos) do
+      @moretokens.unshift(*parse_keywords(str,oldpos) do |tok|
         #if not a keyword,
         case str
           when FUNCLIKE_KEYWORDS; #do nothing
           when VARLIKE_KEYWORDS,RUBYKEYWORDS; raise "shouldnt see keywords here, now"
         end
-        safe_recurse { |a| var_or_meth_name(str,@last_operative_token,oldpos) }
+        was_last=@last_operative_token
+        @last_operative_token=tok if tok
+        safe_recurse { |a| var_or_meth_name(str,was_last,oldpos,after_nonid_op?{true}) }
       end)
       return @moretokens.shift
    end
    #-----------------------------------
+   IDENTREX={}
    def identifier_as_string(context)
       #must begin w/ letter or underscore
-      str=eat_next_if(/[_a-z]/i) or return nil
+      /[_a-z]/i===nextchar.chr or return
       #equals, question mark, and exclamation mark
       #might be allowed at the end in some contexts.
@@ -315,45 +397,16 @@ private
       #i hope i've captured all right conditions....
       #context should always be ?: right after def, ., and :: now
-      maybe_eq,maybe_qm,maybe_ex = case context
-         when ?@,?$ then [nil,nil,nil]
-         when ?:    then [?=, ??, ?!]
-         else            [nil,??, ?!]
-      end
-      @in_def_name and maybe_eq= ?=
-      str<<til_charset(/[^a-z0-9_]/i)
-      #look for ?, !, or =, if allowed
-      case b=getc
-      when nil #means we're at eof
-         #handling nil here prevents b from ever matching
-         #a nil value of maybe_qm, maybe_ex or maybe_eq
-      when maybe_qm
-         str << b
-      when maybe_ex
-         nc=(nextchar unless eof?)
-         #does ex appear to be part of a larger operator?
-         if nc==?= #or nc==?~
-           back1char
-         else
-           str << b
-         end
-      when maybe_eq
-         nc=(nextchar unless eof?)
-         #does eq appear to be part of a larger operator?
-         if nc==?= or nc==?~ or nc==?>
-           back1char
-         else
-           str << b
-         end
-      else
-         back1char
-      end
+      #= and ! only match if not part of a larger operator
+      trailers =
+        case context
+         when ?@,?$ then ""
+#         when ?:    then "!(?![=])|\\?|=(?![=~>])"
+         else            "!(?![=])|\\?"
+        end
+      @in_def_name||context==?: and trailers<<"|=(?![=~>])"
-      return str
+      @file.scan(IDENTREX[trailers]||=/^[_a-z][a-z0-9_]*(?:#{trailers})?/i)
    end
   #-----------------------------------
@@ -380,18 +433,26 @@ private
    #a comma has been seen. are we in an
    #lvalue list or some other construct that uses commas?
    def comma_in_lvalue_list?
-     @parsestack.last.lhs= (not ListContext===@parsestack.last)
+     @parsestack.last.lhs=
+       case l=@parsestack.last
+       when ListContext:
+       when DefContext: l.in_body
+       else true
+       end
    end
    #-----------------------------------
    def in_lvar_define_state
      #@defining_lvar is a hack
      @defining_lvar or case ctx=@parsestack.last
-       when ForSMContext; ctx.state==:for
-       when RescueSMContext; ctx.state==:arrow
+       #when ForSMContext; ctx.state==:for
+       when RescueSMContext
+         @last_operative_token.ident=="=>" and @file.match? /\A[\s\v]*([:;#\n]|then[^a-zA-Z0-9_])/m
        #when BlockParamListLhsContext; true
      end
    end
+   IMPLICIT_PARENS_BEFORE_ACCESSOR_ASSIGNMENT=2
    #-----------------------------------
    #determine if an alphabetic identifier refers to a variable
@@ -400,45 +461,50 @@ private
    #if appropriate. adds tok to the
    #local var table if its a local var being defined for the first time.
-   #note: what we here call variables (rather, constants) following ::
-   #might actually be methods at runtime, but that's immaterial to tokenization.
-   #note: this routine should determine the correct token type for name and
-   #create the appropriate token. currently this is not done because callers
-   #typically have done it (perhaps incorrectly) already.
-   def var_or_meth_name(name,lasttok,pos)
+   #in general, operators in ruby are disambuated by the before-but-not-after rule.
+   #an otherwise ambiguous operator is disambiguated by the surrounding whitespace:
+   #whitespace before but not after the 'operator' indicates it is to be considered a
+   #value token instead. otherwise it is a binary operator. (unary (prefix) ops count
+   #as 'values' here.)
+   def var_or_meth_name(name,lasttok,pos,was_after_nonid_op)
      #look for call site if not a keyword or keyword is function-like
      #look for and ignore local variable names
      assert String===name
+     was_in_lvar_define_state=in_lvar_define_state
      #maybe_local really means 'maybe local or constant'
      maybe_local=case name
-       when /[^a-z_0-9]$/i; #do nothing
-       when /^[a-z_]/;  (localvars===name or VARLIKE_KEYWORDS===name or in_lvar_define_state) and not lasttok===/^(\.|::)$/
-       when /^[A-Z]/; is_const=true;not lasttok==='.'  #this is the right algorithm for constants...
+       when /[^a-z_0-9]$/i #do nothing
+       when /^[a-z_]/
+         (localvars===name or
+          VARLIKE_KEYWORDS===name or
+          was_in_lvar_define_state
+         ) and not lasttok===/^(\.|::)$/
+       when /^[A-Z]/
+         is_const=true
+         not lasttok==='.'  #this is the right algorithm for constants...
      end
      assert(@moretokens.empty?)
      oldlast=@last_operative_token
-     tok=@last_operative_token=VarNameToken.new(name,pos)
+     tok=set_last_token assign_lvar_type!(VarNameToken.new(name,pos))
      oldpos= input_position
      sawnl=false
      result=ws_toks=ignored_tokens(true) {|nl| sawnl=true }
      if sawnl || eof?
-         if  maybe_local then
-           if in_lvar_define_state
-             if /^[a-z_][a-zA-Z_0-9]*$/===name
-               assert !(lasttok===/^(\.|::)$/)
-               localvars[name]=true
-             else
-               lexerror tok,"not a valid variable name: #{name}"
-             end
-             return result.unshift(tok)
+         if was_in_lvar_define_state
+           if /^[a-z_][a-zA-Z_0-9]*$/===name
+             assert !(lasttok===/^(\.|::)$/)
+             localvars[name]=true
+           else
+             lexerror tok,"not a valid variable name: #{name}"
            end
+           return result.unshift(tok)
+         elsif maybe_local
            return result.unshift(tok) #if is_const
          else
            return result.unshift(
@@ -455,6 +521,8 @@ private
        when ?=;  not /^=[>=~]$/===readahead(2)
        when ?,; comma_in_lvalue_list?
        when ?); last_context_not_implicit.lhs
+       when ?i; /^in[^a-zA-Z_0-9]/===readahead(3) and
+                  ForSMContext===last_context_not_implicit
        when ?>,?<; /^(.)\1=$/===readahead(3)
        when ?*,?&; /^(.)\1?=/===readahead(3)
        when ?|; /^\|\|?=/===readahead(3) or
@@ -463,8 +531,8 @@ private
                 readahead(2)[1] != ?|
        when ?%,?/,?-,?+,?^; readahead(2)[1]== ?=
      end
-     if (assignment_coming && !(lasttok===/^(\.|::)$/) or in_lvar_define_state)
-        tok=VarNameToken.new(name,pos)
+     if (assignment_coming && !(lasttok===/^(\.|::)$/) or was_in_lvar_define_state)
+        tok=assign_lvar_type! VarNameToken.new(name,pos)
         if /[^a-z_0-9]$/i===name
           lexerror tok,"not a valid variable name: #{name}"
         elsif /^[a-z_]/===name and !(lasttok===/^(\.|::)$/)
@@ -476,44 +544,106 @@ private
      implicit_parens_to_emit=
      if assignment_coming
        @parsestack.push AssignmentContext.new(nil) if nc==?% or nc==?/
-       0
+       IMPLICIT_PARENS_BEFORE_ACCESSOR_ASSIGNMENT
      else
      case nc
        when nil: 2
-       when ?!; readahead(2)=='!=' ? 2 : 1
+       when ?!; /^![=~]$/===readahead(2) ? 2 : 1
+       when ?d;
+         if /^do([^a-zA-Z0-9_]|$)/===readahead(3)
+           if maybe_local and expecting_do?
+             ty=VarNameToken
+             0
+           else
+             maybe_local=false
+             2
+           end
+         else
+           1
+         end
        when NEVERSTARTPARAMLISTFIRST
          (NEVERSTARTPARAMLISTWORDS===readahead(NEVERSTARTPARAMLISTMAXLEN)) ? 2 : 1
-       when ?",?',?`,?a..?z,?A..?Z,?0..?9,?_,?@,?$,?~; 1
+       when ?",?',?`,?a..?z,?A..?Z,?0..?9,?_,?@,?$,?~; 1 #"
        when ?{
          maybe_local=false
+         1
+=begin
          x=2
          x-=1 if /\A(return|break|next)\Z/===name and
                  !(KeywordToken===oldlast and oldlast===/\A(\.|::)\Z/)
          x
+=end
        when ?(;
-         maybe_local=false; !(ws_toks.empty? or lasttok===/^(\.|::)$/)? 1 : 0
+         maybe_local=false
+         lastid=lasttok&&lasttok.ident
+         case lastid
+           when /\A[;(]|do\Z/: was_after_nonid_op=false
+           when '|':  was_after_nonid_op=false unless BlockParamListLhsContext===@parsestack.last
+           when '{': was_after_nonid_op=false if  BlockContext===@parsestack.last or BeginEndContext===@parsestack.last
+         end if KeywordToken===lasttok
+         was_after_nonid_op=false if NewlineToken===lasttok or lasttok.nil?
+         want_parens=!(ws_toks.empty? or was_after_nonid_op) #or
+#                      /^(::|rescue|yield|else|case|when|if|unless|until|while|and|or|&&|\|\||[?:]|\.\.?\.?|=>)$/===lastid or
+#                      MethNameToken===lasttok or
+#                      RUBYNONSYMOPERATORREX===lastid && /=$/===lastid && '!='!=lastid
+#                     )
+         #look ahead for closing paren (after some whitespace...)
+         want_parens=false if @file.match? /\A.(?:\s|\v|\#.*\n)*\)/
+#         afterparen=@file.pos
+#         getchar
+#         ignored_tokens(true)
+#         want_parens=false if nextchar==?)
+#         @file.pos=afterparen
+         want_parens ? 1 : 0
        when ?},?],?),?;,?^, ?|, ?>, ?,, ?., ?=; 2
-       when ?+, ?-, ?*, ?&, ?%, ?/; (ws_toks.empty? || readahead(2)[/^.[#{WHSPLF}]/]) ? 2 : 3
-       when ?:,??; next2=readahead(2);
-                   WHSPLF[next2[1].chr] || next2=='::' ? 2 : 3
-#       when ?:,??; (readahead(2)[/^.[#{WHSPLF}]/]) ? 2 : 3
-       when ?<; (ws_toks.empty? || readahead(3)[/^<<["'`a-zA-Z_0-9-]/]) ? 3 : 2
-       when ?[; ws_toks.empty? ? 2 : 3
+       when ?+, ?-, ?%, ?/
+         if /^(return|break|next)$/===@last_operative_token.ident and not(
+              KeywordToken===lasttok and /^(.|::)$/===lasttok.ident
+            )
+           1
+         else
+           (ws_toks.empty? || readahead(2)[/^.[#{WHSPLF}]/o]) ? 2 : 3
+         end
+       when ?*, ?&
+         lasttok=@last_operative_token
+         if /^(return|break|next)$/===@last_operative_token.ident and not(
+              KeywordToken===lasttok and /^(.|::)$/===lasttok.ident
+            )
+           1
+         else
+           (ws_toks.empty? || readahead(2)[/^.[#{WHSPLF}*&]/o]) ? 2 : 3
+         end
+       when ?:
+         next2=readahead(2)
+         if /^:(?:[#{WHSPLF}]|(:))$/o===next2 then
+           $1 && !ws_toks.empty?   ? 3 : 2
+         else
+           3
+         end
+       when ??; next3=readahead(3);
+                   /^\?([#{WHSPLF}]|[a-z_][a-z_0-9])/io===next3 ? 2 : 3
+#       when ?:,??; (readahead(2)[/^.[#{WHSPLF}]/o]) ? 2 : 3
+       when ?<; (!ws_toks.empty? && readahead(4)[/^<<-?["'`a-zA-Z_0-9]/]) ? 3 : 2
+       when ?[; ws_toks.empty?&&!(KeywordToken===oldlast and /^(return|break|next)$/===oldlast.ident) ? 2 : 3
        when ?\\, ?\s, ?\t, ?\n, ?\r, ?\v, ?#; raise 'failure'
        else raise "unknown char after ident: #{nc=nextchar ? nc.chr : "<<EOF>>"}"
      end
      end
-     if is_const and implicit_parens_to_emit==3 then
+     if is_const and implicit_parens_to_emit==3 then #needed?
        implicit_parens_to_emit=1
      end
-     tok=if maybe_local and implicit_parens_to_emit>=2
+     if maybe_local and implicit_parens_to_emit>=2
        implicit_parens_to_emit=0
-       VarNameToken
+       ty=VarNameToken
      else
-       MethNameToken
-     end.new(name,pos)
+       ty||=MethNameToken
+     end
+     tok=assign_lvar_type!(ty.new(name,pos))
      case implicit_parens_to_emit
      when 2;
@@ -523,8 +653,17 @@ private
        arr,pass=*param_list_coming_with_2_or_more_params?
        result.push( *arr )
        unless pass
+         #only 1 param in list
          result.unshift ImplicitParamListStartToken.new(oldpos)
-         @parsestack.push ParamListContextNoParen.new(@linenum)
+         last=result.last
+         last.set_callsite! false if last.respond_to? :callsite? and last.callsite? #KeywordToken===last and last.ident==')'
+         if /^(break|next|return)$/===name and
+            !(KeywordToken===lasttok and /^(.|::)$/===lasttok.ident)
+           ty=KWParamListContextNoParen
+         else
+           ty=ParamListContextNoParen
+         end
+         @parsestack.push ty.new(@linenum)
        end
      when 0; #do nothing
      else raise 'invalid value of implicit_parens_to_emit'
@@ -547,11 +686,13 @@ private
        result=[get1token]
        pass=loop{
          tok=get1token
-         result<<tok
+         result << tok
          if @parsestack.size==basesize
            break false
          elsif ','==tok.to_s and @parsestack.size==basesize+1
            break true
+         elsif OperatorToken===tok and /^[&*]$/===tok.ident and tok.unary and @parsestack.size==basesize+1
+           break true
          elsif EoiToken===tok
            lexerror tok, "unexpected eof in parameter list"
          end
@@ -560,11 +701,13 @@ private
    end
   #-----------------------------------
-  CONTEXT2ENDTOK={AssignmentRhsContext=>AssignmentRhsListEndToken,
-                  ParamListContextNoParen=>ImplicitParamListEndToken,
-                  WhenParamListContext=>KwParamListEndToken,
-                  RescueSMContext=>KwParamListEndToken
-                 }
+  CONTEXT2ENDTOK={
+    AssignmentRhsContext=>AssignmentRhsListEndToken,
+    ParamListContextNoParen=>ImplicitParamListEndToken,
+    KWParamListContextNoParen=>ImplicitParamListEndToken,
+    WhenParamListContext=>KwParamListEndToken,
+    RescueSMContext=>KwParamListEndToken
+  }
   def abort_noparens!(str='')
     #assert @moretokens.empty?
     result=[]
@@ -576,7 +719,63 @@ private
     return result
   end
-if false #no longer used
+  #-----------------------------------
+  CONTEXT2ENDTOK_FOR_RESCUE={
+    AssignmentRhsContext=>AssignmentRhsListEndToken,
+    ParamListContextNoParen=>ImplicitParamListEndToken,
+    KWParamListContextNoParen=>ImplicitParamListEndToken,
+    WhenParamListContext=>KwParamListEndToken,
+    RescueSMContext=>KwParamListEndToken
+  }
+  def abort_noparens_for_rescue!(str='')
+    #assert @moretokens.empty?
+    result=[]
+    ctx=@parsestack.last
+    while klass=CONTEXT2ENDTOK_FOR_RESCUE[ctx.class]
+      break if AssignmentRhsContext===ctx && !ctx.multi_assign?
+      if ParamListContextNoParen===ctx && AssignmentRhsContext===@parsestack[-2]
+        result.push ImplicitParamListEndToken.new(input_position-str.length),
+                    AssignmentRhsListEndToken.new(input_position-str.length)
+          @parsestack.pop
+          @parsestack.pop
+        break
+      end
+      result << klass.new(input_position-str.length) #unless AssignmentRhsContext===ctx and !ctx.multi_assign?
+      break if RescueSMContext===ctx #why is this here?
+      @parsestack.pop
+      ctx=@parsestack.last
+    end
+    return result
+  end
+  #-----------------------------------
+  CONTEXT2ENDTOK_FOR_DO={
+    AssignmentRhsContext=>AssignmentRhsListEndToken,
+    ParamListContextNoParen=>ImplicitParamListEndToken,
+    ExpectDoOrNlContext=>1,
+    #WhenParamListContext=>KwParamListEndToken,
+    #RescueSMContext=>KwParamListEndToken
+  }
+  def abort_noparens_for_do!(str='')
+    #assert @moretokens.empty?
+    result=[]
+    while klass=CONTEXT2ENDTOK_FOR_DO[@parsestack.last.class]
+      break if klass==1
+      result << klass.new(input_position-str.length)
+      @parsestack.pop
+    end
+    return result
+  end
+  #-----------------------------------
+  def expecting_do?
+    @parsestack.reverse_each{|ctx|
+      next if AssignmentRhsContext===ctx
+      return !!CONTEXT2ENDTOK_FOR_DO[ctx.class]
+    }
+    return false
+  end
    #-----------------------------------
    def abort_1_noparen!(offs=0)
      assert @moretokens.empty?
@@ -585,12 +784,12 @@ if false #no longer used
        @parsestack.pop
        result << AssignmentRhsListEndToken.new(input_position-offs)
      end
-     ParamListContextNoParen===@parsestack.last or lexerror huh,'{} with no matching callsite'
+     if ParamListContextNoParen===@parsestack.last #or lexerror huh,'{} with no matching callsite'
        @parsestack.pop
        result << ImplicitParamListEndToken.new(input_position-offs)
+     end
      return result
    end
-end
    #-----------------------------------
    #parse keywords now, to prevent confusion over bare symbols
@@ -598,6 +797,7 @@ end
    #if arg is not a keyword, the block is called
    def parse_keywords(str,offset)
       assert @moretokens.empty?
+      assert !(KeywordToken===@last_operative_token and /A(.|::|def)\Z/===@last_operative_token.ident)
       result=[KeywordToken.new(str,offset)]
       case str
@@ -619,11 +819,15 @@ end
          /^(do)$/===start and localvars.end_block
          /^(class|module|def)$/===start and @localvars_stack.pop
-      when "class","module"
+      when "module"
          result.first.has_end!
          @parsestack.push WantsEndContext.new(str,@linenum)
          @localvars_stack.push SymbolTable.new
+      when "class"
+         result.first.has_end!
+         @parsestack.push ClassContext.new(str,@linenum)
       when "if","unless" #could be infix form without end
          if after_nonid_op?{false} #prefix form
             result.first.has_end!
@@ -653,10 +857,11 @@ end
          #expect_do_or_end_or_nl! str #handled by ForSMContext now
          @parsestack.push ForSMContext.new(@linenum)
       when "do"
-         result.unshift(*abort_noparens!(str))
+         result.unshift(*abort_noparens_for_do!(str))
          if ExpectDoOrNlContext===@parsestack.last
             @parsestack.pop
             assert WantsEndContext===@parsestack.last
+            result.last.as=";"
          else
             result.last.has_end!
             @parsestack.push WantsEndContext.new(str,@linenum)
@@ -665,10 +870,10 @@ end
          end
       when "def"
          result.first.has_end!
-         @parsestack.push WantsEndContext.new("def",@linenum)
-         @localvars_stack.push SymbolTable.new
+         @parsestack.push ctx=DefContext.new(@linenum)
+         ctx.state=:saw_def
          safe_recurse { |aa|
-            @last_operative_token=KeywordToken.new "def" #hack
+            set_last_token KeywordToken.new "def" #hack
             result.concat ignored_tokens
             #read an expr like a.b.c or a::b::c
@@ -683,10 +888,11 @@ end
                 when/^\)$/.token_pat then parencount-=1
                 end
                 EoiToken===tok and lexerror tok, "eof in def header"
-                result<<tok
+                result << tok
               end until  parencount==0 #@parsestack.size==old_size
-            else #no parentheses, all tail
-              @last_operative_token=KeywordToken.new "." #hack hack
+              @localvars_stack.push SymbolTable.new
+           else #no parentheses, all tail
+              set_last_token KeywordToken.new "." #hack hack
               tokindex=result.size
               result << tok=symbol(false,false)
               name=tok.to_s
@@ -700,25 +906,30 @@ end
                 when /^[a-z_]/;  localvars===name
                 when /^[A-Z]/; is_const=true  #this is the right algorithm for constants...
               end
-              if !ty and maybe_local
-                result.push(  *ignored_tokens(false,false)  )
-                nc=nextchar
+              result.push(  *ignored_tokens(false,false)  )
+              nc=nextchar
+              if !ty and maybe_local
                 if nc==?: || nc==?.
                   ty=VarNameToken
                 end
               end
-              unless ty
-                ty=MethNameToken
-                endofs=tok.offset+tok.to_s.length
-                result[tokindex+1...tokindex+1]=
-                  [ImplicitParamListStartToken.new(endofs),ImplicitParamListEndToken.new(endofs)]
+              if ty.nil? or (ty==KeywordToken and nc!=?: and nc!=?.)
+                   ty=MethNameToken
+                   if nc != ?(
+                     endofs=tok.offset+tok.to_s.length
+                     newtok=ImplicitParamListStartToken.new(endofs)
+                     result.insert tokindex+1, newtok
+                   end
               end
               assert result[tokindex].equal?(tok)
-              result[tokindex]=ty.new(tok.to_s,tok.offset)
+              var=assign_lvar_type! ty.new(tok.to_s,tok.offset)
+              @localvars_stack.push SymbolTable.new
+              var.in_def=true if inside_method_def? and var.respond_to? :in_def=
+              result[tokindex]=var
-              #if a.b.c.d is seen, a, b, and c
+              #if a.b.c.d is seen, a, b and c
               #should be considered maybe varname instead of methnames.
               #the last (d in the example) is always considered a methname;
               #it's what's being defined.
@@ -727,8 +938,7 @@ end
               #a could even be a keyword (eg self or block_given?).
             end
             #read tail: .b.c.d etc
-            result.reverse_each{|res| break @last_operative_token=res unless StillIgnoreToken===res}
-            ###@last_operative_token=result.last #naive
+            result.reverse_each{|res| break set_last_token res unless StillIgnoreToken===res}
             assert !(IgnoreToken===@last_operative_token)
             state=:expect_op
             @in_def_name=true
@@ -737,12 +947,22 @@ end
                #look for start of parameter list
                nc=(@moretokens.empty? ? nextchar.chr : @moretokens.first.to_s[0,1])
                if state==:expect_op and /^[a-z_(&*]/i===nc
-                  result.concat def_param_list
+                  ctx.state=:def_param_list
+                  list,listend=def_param_list
+                  result.concat list
+                  end_index=result.index(listend)
+                  ofs=listend.offset
+                  if endofs
+                    result.insert end_index,ImplicitParamListEndToken.new(ofs)
+                  else
+                    ofs+=listend.to_s.size
+                  end
+                  result.insert end_index+1,EndDefHeaderToken.new(ofs)
                   break
                end
                tok=get1token
-               result<<tok
+               result<< tok
                case tok
                when EoiToken
                   lexerror tok,'unexpected eof in def header'
@@ -752,9 +972,18 @@ end
                   state=:expect_op
                when /^(\.|::)$/.token_pat
                   lexerror tok,'expected ident' unless state==:expect_op
+                  if endofs
+                    result.insert -2, ImplicitParamListEndToken.new(endofs)
+                    endofs=nil
+                  end
                   state=:expect_name
                when /^(;|end)$/.token_pat, NewlineToken #are we done with def name?
+                  ctx.state=:def_body
                   state==:expect_op or lexerror tok,'expected identifier'
+                  if endofs
+                    result.insert -2,ImplicitParamListEndToken.new(tok.offset)
+                  end
+                  result.insert -2, EndDefHeaderToken.new(tok.offset)
                   break
                else
                   lexerror(tok, "bizarre token in def name: " +
@@ -765,24 +994,34 @@ end
          }
       when "alias"
          safe_recurse { |a|
-            @last_operative_token=KeywordToken.new "alias" #hack
+            set_last_token KeywordToken.new "alias" #hack
             result.concat ignored_tokens
             res=symbol(eat_next_if(?:),false)
-            res ? result<<res : lexerror(result.first,"bad symbol in alias")
-            @last_operative_token=KeywordToken.new "alias" #hack
-            result.concat ignored_tokens
-            res=symbol(eat_next_if(?:),false)
-            res ? result<<res : lexerror(result.first,"bad symbol in alias")
+            unless res
+              lexerror(result.first,"bad symbol in alias")
+            else
+              res.ident[0]==?$ and res=VarNameToken.new(res.ident,res.offset)
+              result<< res
+              set_last_token KeywordToken.new "alias" #hack
+              result.concat ignored_tokens
+              res=symbol(eat_next_if(?:),false)
+              unless res
+                lexerror(result.first,"bad symbol in alias")
+              else
+                res.ident[0]==?$ and res=VarNameToken.new(res.ident,res.offset)
+                result<< res
+              end
+            end
          }
       when "undef"
          safe_recurse { |a|
             loop do
-               @last_operative_token=KeywordToken.new "," #hack
+               set_last_token KeywordToken.new "," #hack
                result.concat ignored_tokens
                tok=symbol(eat_next_if(?:),false)
                tok or lexerror(result.first,"bad symbol in undef")
                result<< tok
-               @last_operative_token=tok
+               set_last_token tok
                assert !(IgnoreToken===@last_operative_token)
                sawnl=false
@@ -809,13 +1048,13 @@ end
          unless after_nonid_op? {false}
            #rescue needs to be treated differently when in operator context...
            #i think no RescueSMContext should be pushed on the stack...
-           #plus, the rescue token should be marked as infix
-           result.first.set_infix!
+           result.first.set_infix!            #plus, the rescue token should be marked as infix
+           result.unshift(*abort_noparens_for_rescue!(str))
          else
            result.push KwParamListStartToken.new(offset+str.length)
            #corresponding EndToken emitted by abort_noparens! on leaving rescue context
-           result.unshift(*abort_noparens!(str))
            @parsestack.push RescueSMContext.new(@linenum)
+           result.unshift(*abort_noparens!(str))
          end
       when "then"
@@ -831,16 +1070,43 @@ end
          result.unshift(*abort_noparens!(str))
       when /\A(return|break|next)\Z/
-         result=yield
-         result.first.has_no_block! unless KeywordToken===@last_operative_token and @last_operative_token===/\A(\.|::)\Z/
+         fail if KeywordToken===@last_operative_token and @last_operative_token===/\A(\.|::)\Z/
+         tok=KeywordToken.new(str,offset)
+         result=yield tok
+         result[0]=tok
+         tok.has_no_block!
+      when 'END'
+         #END could be treated, lexically, just as if it is an
+         #ordinary method, except that local vars created in
+         #END blocks are visible to subsequent code. (Why??)
+         #That difference forces a custom parsing.
+         if @last_operative_token===/^(\.|::)$/
+           result=yield nil #should pass a keyword token here
+         else
+           safe_recurse{
+             old=result.first
+             result=[
+               MethNameToken.new(old.ident,old.offset),
+               ImplicitParamListStartToken.new(input_position),
+               ImplicitParamListEndToken.new(input_position),
+               *ignored_tokens
+             ]
+             getchar=='{' or lexerror(result.first,"expected { after #{str}")
+             result.push KeywordToken.new('{',input_position-1)
+             result.last.set_infix!
+             @parsestack.push BeginEndContext.new(str,offset)
+           }
+         end
       when FUNCLIKE_KEYWORDS
-         result=yield
+         result=yield nil #should be a keyword token
       when RUBYKEYWORDS
         #do nothing
-      else result=yield
+      else result=yield nil
       end
@@ -881,11 +1147,11 @@ end
    #-----------------------------------
    def block_param_list_lookahead
       safe_recurse{ |la|
-         @last_operative_token=KeywordToken.new  ';'
+         set_last_token KeywordToken.new  ';'
          a=ignored_tokens
          if eat_next_if(?|)
-           a<<KeywordToken.new("|", input_position-1)
+           a<< KeywordToken.new("|", input_position-1)
 if true
            @parsestack.push mycontext=BlockParamListLhsContext.new(@linenum)
            nextchar==?| and a.push NoWsToken.new(input_position)
@@ -909,7 +1175,7 @@ else
                end
                tok==='|' and parsestack_lastnonassign_is?(mycontext) and break
-               a<<tok
+               a<< tok
              end
              assert@defining_lvar || AssignmentRhsContext===@parsestack.last
              @defining_lvar=false
@@ -920,14 +1186,14 @@ else
              @parsestack.last.object_id==mycontext.object_id or raise 'expected my BlockParamListLhsContext atop @parsestack'
              @parsestack.pop
-             a<<KeywordToken.new('|',tok.offset)
+             a<< KeywordToken.new('|',tok.offset)
              @moretokens.empty? or
                fixme %#moretokens might be set from get1token call above...might be bad#
 end
            end
          end
-         @last_operative_token=KeywordToken.new ';'
+         set_last_token KeywordToken.new ';'
          #a.concat ignored_tokens
          #assert @last_operative_token===';'
@@ -948,6 +1214,7 @@ end
       @in_def_name=false
       result=[]
       normal_comma_level=old_parsestack_size=@parsestack.size
+      listend=nil
       safe_recurse { |a|
          assert(@moretokens.empty?)
          assert((not IgnoreToken===@moretokens[0]))
@@ -972,18 +1239,22 @@ end
             alias === call
          end
-         @last_operative_token=KeywordToken.new ',' #hack
+         set_last_token KeywordToken.new ',' #hack
          #read local parameter names
+         nextvar=nil
          loop do
             expect_name=(@last_operative_token===',' and
                          normal_comma_level==@parsestack.size)
             expect_name and @defining_lvar||=true
             result << tok=get1token
-            lexerror tok, "unexpected eof in def header" if EoiToken===tok
+            break lexerror(tok, "unexpected eof in def header") if EoiToken===tok
             #break if at end of param list
-               endingblock===tok and
-                 old_parsestack_size>=@parsestack.size and break
+            if endingblock===tok and old_parsestack_size>=@parsestack.size
+              nextvar and localvars[nextvar]=true #add nextvar to local vars
+              listend=tok
+              break
+            end
             #next token is a local var name
             #(or the one after that if unary ops present)
@@ -992,33 +1263,40 @@ end
               case tok
                 when IgnoreToken #, /^[A-Z]/ #do nothing
                 when /^,$/.token_pat #hack
                 when VarNameToken
                   assert@defining_lvar
                   @defining_lvar=false
                   assert((not @last_operative_token===','))
+#                  assert !nextvar
+                  nextvar=tok.ident
+                  localvars[nextvar]=false #remove nextvar from list of local vars for now
                 when /^[&*]$/.token_pat #unary form...
                   #a NoWsToken is also expected... read it now
                   result.concat maybe_no_ws_token #not needed?
-                  @last_operative_token=KeywordToken.new ','
+                  set_last_token KeywordToken.new ','
                 else
                   lexerror tok,"unfamiliar var name '#{tok}'"
               end
-            elsif /^,$/.token_pat===tok and
-                  normal_comma_level+1==@parsestack.size and
-                  AssignmentRhsContext===@parsestack.last
-              #seeing comma here should end implicit rhs started within the param list
-              result[-1,0]=AssignmentRhsListEndToken.new(tok.offset)
-              @parsestack.pop
+            elsif /^,$/.token_pat===tok
+              if normal_comma_level+1==@parsestack.size and
+                 AssignmentRhsContext===@parsestack.last
+                #seeing comma here should end implicit rhs started within the param list
+                result << AssignmentRhsListEndToken.new(tok.offset)
+                @parsestack.pop
+              end
+              if nextvar and normal_comma_level==@parsestack.size
+                localvars[nextvar]=true #now, finally add nextvar back to local vars
+                nextvar
+              end
             end
          end
          @defining_lvar=false
+         @parsestack.last.see self,:semi
          assert(@parsestack.size <= old_parsestack_size)
-         assert(endingblock[tok])
+         assert(endingblock[tok] || ErrorToken===tok)
          #hack: force next token to look like start of a
          #new stmt, if the last ignored_tokens
@@ -1026,42 +1304,54 @@ end
          #(just in case the next token parsed
          #happens to call quote_expected? or after_nonid_op)
          result.concat ignored_tokens
-         if  nextchar.chr[/[iuw\/<|>+\-*&%?:]/] and
-             !(NewlineToken===@last_operative_token) and
-             !(/^(end|;)$/===@last_operative_token)
-           @last_operative_token=KeywordToken.new ';'
+#         if  !eof? and nextchar.chr[/[iuw\/<|>+\-*&%?:({]/] and
+#             !(NewlineToken===@last_operative_token) and
+#             !(/^(end|;)$/===@last_operative_token)
+           #result<<EndDefHeaderToken.new(result.last.offset+result.last.to_s.size)
+           set_last_token KeywordToken.new ';'
            result<< get1token
-         end
+#         end
       }
-      return result
+      return result,listend
    end
    #-----------------------------------
    #handle % in ruby code. is it part of fancy quote or a modulo operator?
    def percent(ch)
-      if quote_expected? ch
+     if AssignmentContext===@parsestack.last
+       @parsestack.pop
+       op=true
+     end
+     if !op and quote_expected?(ch)  ||
+       (@last_operative_token===/^(return|next|break)$/ and KeywordToken===@last_operative_token)
          fancy_quote ch
-      else
+     else
          biop ch
-      end
+     end
    end
    #-----------------------------------
    #handle * & in ruby code. is unary or binary operator?
    def star_or_amp(ch)
-      assert('*&'[ch])
-      want_unary=unary_op_expected? ch
-      result=(quadriop ch)
-      if want_unary
-         #readahead(2)[1..1][/[\s\v#\\]/] or #not needed?
-         assert OperatorToken===result
-         result.unary=true         #result should distinguish unary+binary *&
-         WHSPLF[nextchar.chr] or
-           @moretokens << NoWsToken.new(input_position)
-      end
-      result
+     assert('*&'[ch])
+     want_unary=unary_op_expected?(ch) ||
+       (@last_operative_token===/^(return|next|break)$/ and KeywordToken===@last_operative_token)
+     result=quadriop(ch)
+     if want_unary
+       #readahead(2)[1..1][/[\s\v#\\]/] or #not needed?
+       assert OperatorToken===result
+       result.unary=true         #result should distinguish unary+binary *&
+       WHSPLF[nextchar.chr] or
+         @moretokens << NoWsToken.new(input_position)
+       comma_in_lvalue_list?
+       if ch=='*'
+         @parsestack.last.see self, :splat
+       end
+     end
+     result
    end
    #-----------------------------------
@@ -1079,15 +1369,23 @@ end
    #-----------------------------------
    def regex_or_div(ch)
    #space after slash always means / operator, rather than regex start
-      if after_nonid_op?{ !is_var_name? and WHSPLF[prevchar] and !readahead(2)[%r{^/\s}] }
-        return regex(ch)
-      else #/ is operator
-        result=getchar
-        if eat_next_if(?=)
-          result << '='
-        end
-        return(operator_or_methname_token result)
-      end
+   #= after slash always means /= operator, rather than regex start
+     if AssignmentContext===@parsestack.last
+       @parsestack.pop
+       op=true
+     end
+     if !op and after_nonid_op?{
+          !is_var_name? and WHSPLF[prevchar] and !readahead(2)[%r{^/[\s\v=]}]
+        } || (KeywordToken===@last_token_maybe_implicit and @last_token_maybe_implicit.ident=="(")
+       return regex(ch)
+     else #/ is operator
+       result=getchar
+       if eat_next_if(?=)
+         result << '='
+       end
+       return(operator_or_methname_token result)
+     end
    end
    #-----------------------------------
@@ -1101,8 +1399,8 @@ end
      s=tok.to_s
      case s
      when /[^a-z_0-9]$/i; false
-     when /^[a-z_]/; localvars===s or VARLIKE_KEYWORDS===s
-     when /^[A-Z]/; VarNameToken===tok
+#     when /^[a-z_]/; localvars===s or VARLIKE_KEYWORDS===s
+     when /^[A-Z_]/i; VarNameToken===tok
      when /^[@$<]/; true
      else raise "not var or method name: #{s}"
      end
@@ -1139,18 +1437,22 @@ end
       unless eat_next_if(?:)
         #cancel implicit contexts...
         @moretokens.push(*abort_noparens!(':'))
+        @moretokens.push KeywordToken.new(':',startpos)
-        #end ternary context, if any
-        @parsestack.last.see self,:colon
-        TernaryContext===@parsestack.last and @parsestack.pop #should be in the context's see handler
-        if ExpectDoOrNlContext===@parsestack.last #should be in the context's see handler
+        case @parsestack.last
+        when TernaryContext: @parsestack.pop #should be in the context's see handler
+        when ExpectDoOrNlContext: #should be in the context's see handler
           @parsestack.pop
           assert @parsestack.last.starter[/^(while|until|for)$/]
+          @moretokens.last.as=";"
+        when RescueSMContext:
+          @moretokens.last.as=";"
+        else @moretokens.last.as="then"
         end
-        @moretokens.push KeywordToken.new(':',startpos)
+        #end ternary context, if any
+        @parsestack.last.see self,:colon
         return @moretokens.shift
       end
@@ -1182,9 +1484,15 @@ end
      opmatches=readahead(3)[RUBYSYMOPERATORREX]
      result= opmatches ? read(opmatches.size) :
        case nc=nextchar
-         when ?" then assert notbare;double_quote('"')
-         when ?' then assert notbare;double_quote("'")
-         when ?` then read(1)
+         when ?" #"
+           assert notbare
+           open=':"'; close='"'
+           double_quote('"')
+         when ?' #'
+           assert notbare
+           open=":'"; close="'"
+           single_quote("'")
+         when ?` then read(1) #`
          when ?@ then at_identifier.to_s
          when ?$ then dollar_identifier.to_s
          when ?_,?a..?z then identifier_as_string(?:)
@@ -1197,7 +1505,12 @@ end
            result
          else error= "unexpected char starting symbol: #{nc.chr}"
        end
-     return lexerror(klass.new(result,start),error)
+     result= lexerror(klass.new(result,start,notbare ?  ':' : ''),error)
+     if open
+       result.open=open
+       result.close=close
+     end
+     return result
    end
    def merge_assignment_op_in_setter_callsites?
@@ -1211,12 +1524,12 @@ end
      opmatches=readahead(3)[RUBYSYMOPERATORREX]
      return [opmatches ? read(opmatches.size) :
        case nc=nextchar
-         when ?` then read(1)
+         when ?` then read(1) #`
          when ?_,?a..?z,?A..?Z then
            context=merge_assignment_op_in_setter_callsites? ? ?: : nc
            identifier_as_string(context)
          else
-           @last_operative_token=KeywordToken.new(';')
+           set_last_token KeywordToken.new(';')
            lexerror(tok_to_errify,"unexpected char starting callsite symbol: #{nc.chr}, tok=#{tok_to_errify.inspect}")
            nil
        end, start
@@ -1233,20 +1546,63 @@ end
         ender=til_charset(/[#{quote}]/)
         (quote==getchar) or
           return lexerror(HerePlaceholderToken.new( dash, quote, ender ), "mismatched quotes in here doc")
+        quote_real=true
       else
         quote='"'
         ender=til_charset(/[^a-zA-Z0-9_]/)
         ender.length >= 1  or
-          return lexerror(HerePlaceholderToken.new( dash, quote, ender ), "invalid here header")
+          return lexerror(HerePlaceholderToken.new( dash, quote, ender, nil ), "invalid here header")
       end
-      res= HerePlaceholderToken.new( dash, quote, ender )
+      res= HerePlaceholderToken.new( dash, quote, ender, quote_real )
+if true
+      res.open=["<<",dash,quote,ender,quote].to_s
+      procrastinated=til_charset(/[\n]/)#+readnl
+      unless @base_file
+        @base_file=@file
+        @file=Sequence::List.new([@file])
+        @file.pos=@base_file.pos
+      end
+      #actually delete procrastinated from input
+      @file.delete(input_position_raw-procrastinated.size...input_position_raw)
+      nl=readnl or return lexerror(res, "here header without body (at eof)")
+      @moretokens<< res
+      bodystart=input_position
+      @offset_adjust = @min_offset_adjust+procrastinated.size
+      #was: @offset_adjust += procrastinated.size
+      body=here_body(res)
+      res.close=body.close
+      @offset_adjust = @min_offset_adjust
+      #was: @offset_adjust -= procrastinated.size
+      bodysize=input_position-bodystart
+      #one or two already read characters are overwritten here,
+      #in order to keep offsets correct in the long term
+      #(at present, offsets and line numbers between
+      #here header and its body will be wrong. but they should re-sync thereafter.)
+      newpos=input_position_raw-nl.size
+      #unless procrastinated.empty?
+        @file.modify(newpos,nl.size,procrastinated+nl) #vomit procrastinated text back onto input
+      #end
+      input_position_set newpos
+      #line numbers would be wrong within the procrastinated section
+      @linenum-=1
+      #be nice to get the here body token at the right place in input, too...
+      @pending_here_bodies<< body
+      @offset_adjust-=bodysize#+nl.size
+      return @moretokens.shift
+else
       @incomplete_here_tokens.push res
       #hack: normally this should just be in get1token
       #this fixup is necessary because the call the get1token below
       #makes a recursion.
-      @last_operative_token=res
+      set_last_token res
       safe_recurse { |a|
          assert(a.object_id==@moretokens.object_id)
@@ -1269,7 +1625,7 @@ end
            tok=get1token
            assert(a.equal?( @moretokens))
-           toks<<tok
+           toks<< tok
            EoiToken===tok and lexerror tok, "here body expected before eof"
          end while res.unsafe_to_use
          assert(a.equal?( @moretokens))
@@ -1281,13 +1637,14 @@ end
       #the action continues in newline, where
       #the rest of the here token is read after a
       #newline has been seen and res.affix is eventually called
+end
    end
    #-----------------------------------
    def lessthan(ch) #match quadriop('<') or here doc or spaceship op
       case readahead(3)
-        when /^<<['"`\-a-z0-9_]$/i
-           if quote_expected?(ch) #and @last_operative_token!='class' #not needed?
+        when /^<<['"`\-a-z0-9_]$/i #'
+           if quote_expected?(ch) and not @last_operative_token==='class'
               here_header
            else
               operator_or_methname_token read(2)
@@ -1309,101 +1666,231 @@ end
         error='illegal escape sequence'
       end
-      @moretokens.unshift FileAndLineToken.new(@filename,ln=@linenum,input_position)
-      optional_here_bodies
+      #optimization: when thru with regurgitated text from a here document,
+      #revert back to original unadorned Sequence instead of staying in the List.
+      if @base_file and indices=@file.instance_eval{@start_pos} and
+         (indices[-2]..indices[-1])===@file.pos
+        @base_file.pos=@file.pos
+        @file=@base_file
+        @base_file=nil
+        result="\n"
+      end
+      @offset_adjust=@min_offset_adjust
+      @moretokens.push *optional_here_bodies
+      ln=@linenum
+      @moretokens.push lexerror(EscNlToken.new(@filename,ln-1,result,input_position-result.size), error),
+                       FileAndLineToken.new(@filename,ln,input_position)
+      start_of_line_directives
-      lexerror EscNlToken.new(@filename,ln-1,result,pos), error
+      return @moretokens.shift
    end
    #-----------------------------------
    def optional_here_bodies
+     result=[]
+if true
       #handle here bodies queued up by previous line
-      #(we should be more compatible with dos/mac style newlines...)
+      pos=input_position
+      while body=@pending_here_bodies.shift
+        #body.offset=pos
+        result.push EscNlToken.new(@filename,nil,"\n",body.offset-1)
+        result.push FileAndLineToken.new(@filename,body.ident.line,body.offset)
+        result.push body
+        #result.push NoWsToken.new @pending_here_bodies.empty? ? input_position : @pending_here_bodies.first
+        #result.push FileAndLineToken.new(@filename,@linenum,pos) #position and line num are off
+        body.headtok.line=@linenum-1
+      end
+else
+      #...(we should be more compatible with dos/mac style newlines...)
       while tofill=@incomplete_here_tokens.shift
+        result.push(
+          here_body(tofill),
+          FileAndLineToken.new(@filename,@linenum,input_position)
+        )
+        assert(eof?  || "\r\n"[prevchar])
+        tofill.line=@linenum-1
+      end
+end
+     return result
+   end
+   #-----------------------------------
+   def here_body(tofill)
+         close="\n"
          tofill.string.offset= input_position
+         linecount=1 #for terminator
+         assert("\n"==prevchar)
          loop {
-            assert("\r\n"[prevchar])
+            assert("\n"==prevchar)
             #here body terminator?
-            oldpos= input_position
+            oldpos= input_position_raw
             if tofill.dash
-              til_charset(/[^#{WHSP}]/o)
+              close+=til_charset(/[^#{WHSP}]/o)
+            end
+            break if eof? #this is an error, should be handled better
+            if read(tofill.ender.size)==tofill.ender
+              crs=til_charset(/[^\r]/)||''
+              if nl=readnl
+                close+=tofill.ender+crs+nl
+                break
+              end
             end
-            break if eof?
-            break if read(tofill.ender.size)==tofill.ender and readnl
             input_position_set oldpos
+            assert("\n"==prevchar)
             if tofill.quote=="'"
-              line=til_charset(/[\r\n]/)+readnl
-              line.gsub! "\\\\", "\\"
+              line=til_charset(/[\n]/)
+              unless nl=readnl
+                assert eof?
+                break  #this is an error, should be handled better
+              end
+              line.chomp!("\r")
+              line<< "\n"
+              assert("\n"==prevchar)
+              #line.gsub! "\\\\", "\\"
               tofill.append line
-              assert(line[-1..-1][/[\r\n]/])
+              tofill.string.bs_handler=:squote_heredoc_esc_seq
+              linecount+=1
+              assert("\n"==line[-1,1])
+              assert("\n"==prevchar)
             else
+              assert("\n"==prevchar)
               back1char  #-1 to make newline char the next to read
               @linenum-=1
+              assert /[\r\n]/===nextchar.chr
               #retr evrything til next nl
+if FASTER_STRING_ESCAPES
+              line=all_quote("\r\n", tofill.quote, "\r\n")
+else
               line=all_quote(INET_NL_REX, tofill.quote, INET_NL_REX)
+end
+              linecount+=1
               #(you didn't know all_quote could take a regex, did you?)
+              assert("\n"==prevchar)
               #get rid of fals that otherwise appear to be in the middle of
               #a string (and are emitted out of order)
               fal=@moretokens.pop
               assert FileAndLineToken===fal || fal.nil?
+              assert line.bs_handler
+              tofill.string.bs_handler||=line.bs_handler
+              tofill.append_token line
+              tofill.string.elems<<'' unless String===tofill.string.elems.last
+              assert("\n"==prevchar)
               back1char
               @linenum-=1
               assert("\r\n"[nextchar.chr])
-              tofill.append_token line
               tofill.append readnl
+              assert("\n"==prevchar)
             end
+            assert("\n"==prevchar)
          }
-         assert(eof?  || "\r\n"[prevchar])
+         str=tofill.string
+         str.bs_handler||=:dquote_esc_seq if str.elems.size==1 and str.elems.first==''
          tofill.unsafe_to_use=false
-         tofill.line=@linenum-1
-         @moretokens.push \
-           tofill.bodyclass.new(tofill),
-           FileAndLineToken.new(@filename,@linenum,input_position)
-      end
+         assert str.bs_handler
+           #?? or tofill.string.elems==[]
+        tofill.string.instance_eval{@char="`"} if tofill.quote=="`"
+        #special cased, but I think that's all that's necessary...
+        result=tofill.bodyclass.new(tofill,linecount)
+        result.open=str.open=""
+        tofill.close=close
+        result.close=str.close=close[1..-1]
+        result.offset=str.offset
+        assert str.open
+        assert str.close
+        return result
    end
    #-----------------------------------
    def newline(ch)
       assert("\r\n"[nextchar.chr])
       #ordinary newline handling (possibly implicitly escaped)
       assert("\r\n"[nextchar.chr])
                    assert !@parsestack.empty?
       assert @moretokens.empty?
-      result=if NewlineToken===@last_operative_token or #hack
-                @last_operative_token===/^(;|begin|do|#{INNERBOUNDINGWORDS})$/ or #hack
-                !after_nonid_op?{false}
-             then   #hack-o-rama: probly cases left out above
-                   a= abort_noparens!
-                   ExpectDoOrNlContext===@parsestack.last and @parsestack.pop
-                   assert !@parsestack.empty?
-                   @parsestack.last.see self,:semi
-                   a << super(ch)
-                   @moretokens.replace a+@moretokens
-                   @moretokens.shift
-             else
-                   offset= input_position
-                   nl=readnl
-                   @moretokens << FileAndLineToken.new(@filename,@linenum,input_position)
-                   EscNlToken.new(@filename,@linenum-1,nl,offset)
-                   #WsToken.new ' ' #why?  #should be "\\\n" ?
-             end
-      optional_here_bodies
+      pre=FileAndLineToken.new(@filename,@linenum+1,input_position)
+      pre.allow_ooo_offset=true
+      if NewlineToken===@last_operative_token or #hack
+         (KeywordToken===@last_operative_token and
+          @last_operative_token.ident=="rescue" and
+          !@last_operative_token.infix?)  or
+         #/^(;|begin|do|#{INNERBOUNDINGWORDS})$/ or #hack
+         !after_nonid_op?{false}
+      then   #hack-o-rama: probly cases left out above
+        @offset_adjust=@min_offset_adjust
+        a= abort_noparens!
+        ExpectDoOrNlContext===@parsestack.last and @parsestack.pop
+        assert !@parsestack.empty?
+        @parsestack.last.see self,:semi
+        a << super(ch)
+        @moretokens.replace a+@moretokens
+      else
+        @offset_adjust=@min_offset_adjust
+        offset= input_position
+        nl=readnl
+        @moretokens.push EscNlToken.new(@filename,@linenum-1,nl,offset),
+           FileAndLineToken.new(@filename,@linenum,input_position)
+      end
+      #optimization: when thru with regurgitated text from a here document,
+      #revert back to original unadorned Sequence instead of staying in the list.
+      if @base_file and indices=@file.instance_eval{@start_pos} and
+         (indices[-2]..indices[-1])===@file.pos and Sequence::SubSeq===@file.list.last
+        @base_file.pos=@file.pos
+        @file=@base_file
+        @base_file=nil
+      end
+      fal=@moretokens.last
+      assert FileAndLineToken===fal
+      @offset_adjust=@min_offset_adjust
+      @moretokens.unshift(*optional_here_bodies)
+      result=@moretokens.shift
+      #adjust line count in fal to account for newlines in here bodys
+      i=@moretokens.size-1
+      while(i>=0)
+        #assert FileAndLineToken===@moretokens[i]
+        i-=1 if FileAndLineToken===@moretokens[i]
+        break unless HereBodyToken===@moretokens[i]
+        pre_fal=true
+        fal.line-=@moretokens[i].linecount
+        i-=1
+      end
+      if pre_fal
+        @moretokens.unshift result
+        pre.offset=result.offset
+        result=pre
+      end
       start_of_line_directives
       return result
@@ -1424,15 +1911,16 @@ end
          begin
            eof? and raise "eof before =end"
-           more<<til_charset(/[\r\n]/)
-           more<<readnl
+           more<< til_charset(/[\r\n]/)
+           eof? and raise "eof before =end"
+           more<< readnl
          end until readahead(EQENDLENGTH)==EQEND
          #read rest of line after =end
          more << til_charset(/[\r\n]/)
-         assert((?\r===nextchar or ?\n===nextchar))
+         assert((eof? or ?\r===nextchar or ?\n===nextchar))
          assert !(/[\r\n]/===more[-1,1])
-         more<< readnl
+         more<< readnl unless eof?
 #         newls= more.scan(/\r\n?|\n\r?/)
 #         @linenum+= newls.size
@@ -1445,7 +1933,7 @@ end
       #handle __END__
       if ENDMARKER===readahead(ENDMARKERLENGTH)
          assert !(ImplicitContext===@parsestack.last)
-         @moretokens.unshift endoffile_detected(read(7))
+         @moretokens.unshift endoffile_detected(read(ENDMARKERLENGTH))
 #         input_position_set @file.size
       end
    end
@@ -1460,11 +1948,13 @@ end
   def unary_op_expected?(ch) #yukko hack
     '*&='[readahead(2)[1..1]] and return false
+    return true if KeywordToken===@last_operative_token and @last_operative_token==='for'
     after_nonid_op? {
       #possible func-call as operator
       not is_var_name? and
-        WHSPLF[prevchar]
+        WHSPLF[prevchar] and !WHSPLF[readahead(2)[1..1]]
     }
   end
@@ -1473,11 +1963,6 @@ end
    # <<, %, ? in ruby
    #returns whether current token is to be the start of a literal
    def quote_expected?(ch) #yukko hack
-     if AssignmentContext===@parsestack.last
-       @parsestack.pop
-       return false
-     end
      case ch[0]
           when ?? then readahead(2)[/^\?[#{WHSPLF}]$/o] #not needed?
           when ?% then readahead(3)[/^%([a-pt-vyzA-PR-VX-Z]|[QqrswWx][a-zA-Z0-9])/]
@@ -1500,17 +1985,23 @@ end
    #used to resolve the ambiguity of
    # <<, %, /, ?, :, and newline (among others) in ruby
    def after_nonid_op?
+    #this is how it should be, I think, and then no handlers for methnametoken and FUNCLIKE_KEYWORDS are needed
+#      if ImplicitParamListStartToken===@last_token_including_implicit
+#        huh return true
+#      end
       case @last_operative_token
-         when MethNameToken, FUNCLIKE_KEYWORDS.token_pat ,VarNameToken
+         when VarNameToken , MethNameToken, FUNCLIKE_KEYWORDS.token_pat
          #VarNameToken should really be left out of this case...
          #should be in next branch instread
          #callers all check for last token being not a variable if they pass anything
-         #but {false} in the block
+         #but {false} in the block
+         #(hmmm... some now have true or other non-varname checks in them... could these be bugs?)
             return yield
          when StringToken, SymbolToken, NumberToken, HerePlaceholderToken,
               %r{^(
-                class|module|end|self|true|false|nil|
-                __FILE__|__LINE__|[\})\]]|alias|(un)?def|for
+                end|self|true|false|nil|
+                __FILE__|__LINE__|[\})\]]
               )$}x.token_pat
             #dunno about def/undef
             #maybe class/module shouldn't he here either?
@@ -1522,17 +2013,16 @@ end
             #assert(@last_operative_token==$&) #disabled 'cause $& is now always nil :(
             return true
          when NewlineToken, nil,   #nil means we're still at beginning of file
-              /^([({\[]|or|not|and|if|unless|then|elsif|else|
-                 while|until|begin|for|in|case|when|ensure)$
+              /^([({\[]|or|not|and|if|unless|then|elsif|else|class|module|def|
+                 while|until|begin|for|in|case|when|ensure|defined\?)$
               /x.token_pat
             return true
-         #when KeywordToken
-         #   return true
+         when KeywordToken
+            return true if /^(alias|undef)$/===@last_operative_token.ident  #is this ever actually true???
          when IgnoreToken
             raise "last_operative_token shouldn't be ignoreable"
-         else
-            raise "after_nonid_op? after #{@last_operative_token}:#{@last_operative_token.class} -- now what"
       end
+      raise "after_nonid_op? after #{@last_operative_token}:#{@last_operative_token.class} -- now what"
    end
@@ -1577,10 +2067,10 @@ end
    #-----------------------------------
    def biop(ch) #match /%=?/ (% or %=)
-      assert(ch[/^[%^~]$/])
+      assert(ch[/^[%^]$/])
       result=getchar
       if eat_next_if(?=)
-         result <<?=
+         result << ?=
       end
       return operator_or_methname_token( result)
    end
@@ -1610,7 +2100,9 @@ end
    #fixme: handle +@ and -@ here as well... (currently, this is done in symbol()?)
    def plusminus(ch)
       assert(/^[+\-]$/===ch)
-      if unary_op_expected?(ch)
+      if unary_op_expected?(ch) or
+         KeywordToken===@last_operative_token &&
+         /^(return|break|next)$/===@last_operative_token.ident
         if (?0..?9)===readahead(2)[1]
           return number(ch)
         else #unary operator
@@ -1619,7 +2111,6 @@ end
             @moretokens << NoWsToken.new(input_position)
           result=(operator_or_methname_token result)
           result.unary=true
-          #todo: result should distinguish unary+binary +-
         end
       else #binary operator
          assert(! want_op_name)
@@ -1628,9 +2119,8 @@ end
             result << ?=
          end
          result=(operator_or_methname_token result)
-         #todo: result should distinguish unary+binary +-
       end
-      result
+      return result
    end
    #-----------------------------------
@@ -1642,19 +2132,31 @@ end
       str << c
       result= operator_or_methname_token( str,offset)
       case c
-      when '=': str<< (eat_next_if(?=)or'')
+      when '=': #===,==
+        str<< (eat_next_if(?=)or'')
-      when '>':
+      when '>': #=>
         unless ParamListContextNoParen===@parsestack.last
           @moretokens.unshift result
           @moretokens.unshift( *abort_noparens!("=>"))
           result=@moretokens.shift
         end
         @parsestack.last.see self,:arrow
-      when '': #record local variable definitions
+      when '': #plain assignment: record local variable definitions
+        last_context_not_implicit.lhs=false
+        @moretokens.push *ignored_tokens(true).map{|x|
+          NewlineToken===x ? EscNlToken.new(@filename,@linenum,x.ident,x.offset) : x
+        }
         @parsestack.push AssignmentRhsContext.new(@linenum)
-        @moretokens.unshift AssignmentRhsListStartToken.new( offset+1)
+        if eat_next_if ?*
+          tok=OperatorToken.new('*', input_position-1)
+          tok.unary=true
+          @moretokens.push tok
+          WHSPLF[nextchar.chr] or
+            @moretokens << NoWsToken.new(input_position)
+          comma_in_lvalue_list? #is this needed?
+        end
+        @moretokens.push AssignmentRhsListStartToken.new( input_position)
       end
       return result
    end
@@ -1666,6 +2168,7 @@ end
       k=eat_next_if(/[~=]/)
       if k
         result+=k
+      elsif eof?: #do nothing
       else
         WHSPLF[nextchar.chr] or
           @moretokens << NoWsToken.new(input_position)
@@ -1693,10 +2196,11 @@ end
    #-----------------------------------
    def dot_rhs(prevtok)
       safe_recurse { |a|
-         @last_operative_token=prevtok
+         set_last_token prevtok
          aa= ignored_tokens
+         was=after_nonid_op?{true}
          tok,pos=callsite_symbol(prevtok)
-         tok and aa.push(*var_or_meth_name(tok,prevtok,pos))
+         tok and aa.push(*var_or_meth_name(tok,prevtok,pos,was))
          a.unshift(*aa)
       }
    end
@@ -1705,7 +2209,7 @@ end
   def back_quote(ch=nil)
     if @last_operative_token===/^(def|::|\.)$/
       oldpos= input_position
-      MethNameToken.new(eat_next_if(?`), oldpos)
+      MethNameToken.new(eat_next_if(?`), oldpos) #`
     else
       double_quote(ch)
     end
@@ -1716,7 +2220,7 @@ if false
    def comment(str)
      result=""
      #loop{
-       result<<super(nil).to_s
+       result<< super(nil).to_s
        if /^\#.*\#$/===result #if comment was ended by a crunch
@@ -1762,7 +2266,7 @@ end
                tokch= NoWsToken.new(input_position-1)
         end
       when '('
-        lasttok=last_operative_token
+        lasttok=last_token_maybe_implicit #last_operative_token
         #could be: lasttok===/^[a-z_]/i
         if (VarNameToken===lasttok or MethNameToken===lasttok or
             lasttok===FUNCLIKE_KEYWORDS)
@@ -1781,15 +2285,17 @@ end
       if after_nonid_op?{false} or @last_operative_token.has_no_block?
         @parsestack.push ListImmedContext.new(ch,@linenum) #that is, a hash
       else
+        #abort_noparens!
         tokch.set_infix!
-=begin not needed now, i think
+        tokch.as="do"
+#=begin not needed now, i think
         # 'need to find matching callsite context and end it if implicit'
         lasttok=last_operative_token
-        unless lasttok===')' and lasttok.callsite?
+        if !(lasttok===')' and lasttok.callsite?) #or ParamListContextNoParen===parsestack.last
           @moretokens.push *(abort_1_noparen!(1).push tokch)
           tokch=@moretokens.shift
         end
-=end
+#=end
         localvars.start_block
         @parsestack.push BlockContext.new(@linenum)
@@ -1811,13 +2317,18 @@ end
       end
       ctx=@parsestack.pop
       origch,line=ctx.starter,ctx.linenum
-      ch==PAIRS[origch] or
+      if ch!=PAIRS[origch]
+        #kw.extend MismatchedBrace
         lexerror kw,"mismatched braces: #{origch}#{ch}\n" +
                  "matching brace location", @filename, line
-      BlockContext===ctx and localvars.end_block
+      end
+      if BlockContext===ctx
+        localvars.end_block
+        @moretokens.last.as="end"
+      end
       if ParamListContext==ctx.class
         assert ch==')'
-        #kw.set_callsite! #not needed?
+        kw.set_callsite! #not needed?
       end
       return @moretokens.shift
    end
@@ -1826,19 +2337,24 @@ end
    def eof(ch=nil)
      #this must be the very last character...
      oldpos= input_position
-     assert(?\0==getc)
+     assert(/\A[\x0\x4\x1a]\Z/===nextchar.chr)
-     result= "\0#{ignored_tokens(true).delete_if{|t|FileAndLineToken===t}}"
+     result=@file.read!
+#     result= "\0#{ignored_tokens(true).delete_if{|t|FileAndLineToken===t}}"
-     eof? or
-        lexerror result,'nul character is not at the end of file'
-     input_position_set @file.size
+#     eof? or
+#        lexerror result,'nul character is not at the end of file'
+#     input_position_set @file.size
      return(endoffile_detected result)
    end
    #-----------------------------------
    def endoffile_detected(s='')
      @moretokens.push( *(abort_noparens!.push super(s)))
+     if @progress_thread
+       @progress_thread.kill
+       @progress_thread=nil
+     end
      result= @moretokens.shift
      balanced_braces? or (lexerror result,"unbalanced braces at eof. parsestack=#{@parsestack.inspect}")
      result
@@ -1851,7 +2367,26 @@ end
   #-----------------------------------
   def comma(ch)
-    single_char_token(ch)
+    @moretokens.push token=single_char_token(ch)
+    if AssignmentRhsContext===@parsestack[-1] and
+       ParamListContext===@parsestack[-2] ||
+       ParamListContextNoParen===@parsestack[-2] ||
+       WhenParamListContext===@parsestack[-2] ||
+       (RescueSMContext===@parsestack[-2] && @parsestack[-2].state==:rescue) ||
+       (DefContext===@parsestack[-2] && !@parsestack[-2].in_body)
+         @parsestack.pop
+         @moretokens.unshift AssignmentRhsListEndToken.new(input_position)
+    end
+    token.comma_type=
+    case @parsestack[-1]
+    when AssignmentRhsContext: :rhs
+    when ParamListContext,ParamListContextNoParen: :call
+    when ListImmedContext: :array
+    else
+      :lhs if comma_in_lvalue_list?
+    end
+    @parsestack.last.see self,:comma
+    return @moretokens.shift
   end
   #-----------------------------------
@@ -1872,7 +2407,7 @@ end
     assert RUBYOPERATORREX===s
     if RUBYNONSYMOPERATORREX===s
       KeywordToken
-    elsif @last_operative_token===/^(\.|::|def|undef|alias|defined\?)$/
+    elsif want_op_name
       MethNameToken
     else
       OperatorToken
@@ -1882,9 +2417,7 @@ end
   #-----------------------------------
   #tokenify_results_of  :identifier
   save_offsets_in(*CHARMAPPINGS.values.uniq-[
-    :symbol_or_op,:open_brace,:whitespace,:exclam,:backquote
+    :symbol_or_op,:open_brace,:whitespace,:exclam,:backquote
   ])
   #save_offsets_in :symbol