RubyGems - rubylexer - Versions diffs - 0.7.6 → 0.7.7 - Mend

rubylexer 0.7.6 → 0.7.7

Files changed (30) hide show

data/.document +4 -0
data/History.txt +54 -7
data/Makefile +43 -0
data/lib/.document +3 -0
data/lib/rubylexer.rb +286 -154
data/lib/rubylexer/.document +9 -0
data/lib/rubylexer/charhandler.rb +25 -19
data/lib/rubylexer/context.rb +17 -4
data/lib/rubylexer/rubycode.rb +1 -1
data/lib/rubylexer/rulexer.rb +120 -95
data/lib/rubylexer/symboltable.rb +22 -1
data/lib/rubylexer/test/oneliners.rb +20 -0
data/lib/rubylexer/test/oneliners_1.9.rb +146 -0
data/lib/rubylexer/test/testcases.rb +6 -2
data/lib/rubylexer/token.rb +22 -6
data/lib/rubylexer/tokenprinter.rb +6 -6
data/lib/rubylexer/version.rb +1 -1
data/rubylexer.gemspec +40 -0
data/test/code/coloruby.rb +154 -0
data/test/code/dumptokens.rb +10 -5
data/test/code/regression.rb +31 -17
data/test/code/rubylexervsruby.rb +1 -1
data/test/code/test_1.9.rb +31 -0
data/test/code/tokentest.rb +6 -6
data/test/data/{hdr_dos2.rb → hdr_dos2.rb.broken} +0 -0
data/test/data/{heremonsters.rb.broken → heremonsters_broken.rb} +0 -0
data/test/data/{heremonsters_dos.rb.broken → heremonsters_dos_broken.rb} +0 -0
data/test/test_all.rb +2 -0
metadata +94 -98
data/Rakefile +0 -37

data/.document ADDED

@@ -0,0 +1,4 @@
+README.txt
+howtouse.txt
+testing.txt
+lib

data/History.txt CHANGED

@@ -1,4 +1,51 @@
-=== 0.7.6/7-01-2009
+=== 0.7.7/21dec2009
+* 5 Major Enhancements:
+  * Got rid of the hacky RuLexer ancestor class. woo-hoo!
+  * Input charsets other than 7bit ascii now supported
+    * binary (8bit ascii), utf8, and euc now work; sjis does not
+  * __FILE__ and __LINE__ now have their correct values attached to them
+  * Build scripts completely rewritten; hoe is now gone!
+  * Improved ruby 1.9 compatibility (see below)
+* 4 Major Bugfixes:
+  * Module names may begin with arbitrary expressions
+    * but such expressions are always ended by ::
+    * even if inside a implicit parens context
+    * and positions of whitespace tokens in module header are better tracked
+  * Finally learned to disable rdoc for files it dislikes (thanks, Roger!)
+  * Rescue in rhs context is always ternary now
+    * (this is incorrect if in a multiassign, but redparse will handle that)
+  * Parsing of do, comma, and unary star on assign rhs is better now
+* 2 Minor Enhancements:
+  * Colorizer example
+  * Changes to token classes to incorporate modifications made by redparse
+* 7 Minor Bugfixes:
+  * Newline after = is now soft
+  * Fixed type of local var if inside class/method inside def
+  * Fixed parsing of shebang line if no args
+  * Fixed incorrect offsets in a few obscure cases
+  * Don't treat \r as newline in =begin..=end
+  * Cleaned up test data
+  * Fixed mistypings of local vars in string inclusions
+* Improved 1.9 compatibility:
+  * code works under 1.9 interpreter
+  * stabby blocks
+  * __ENCODING__ keyword
+  * tolerate ternary : at beginning of line
+  * character constants are string, not integer, literals
+  * new \u escape sequence allowed in double-quotish strings
+  * allow nested () in def param list
+  * not is a funclike keyword
+  * parens allowed as method name; alias for #call
+  * block private locals declared after ; inside block param
+  * !, !=, and !~ are methods in 1.9
+  * local variables declared by named backreferences in regexps
+  * tests for many 1.9 features
+=== 0.7.6/1jul2009
 * 5 Bugfixes:
   * don't treat <, <=, <=> as starting variables (only << for here header)
   * space between break/return/next and following open paren is ignored
@@ -11,11 +58,11 @@
   * dot at beginning of line
   * !, !=, !~ are now valid method/symbol names
-=== 0.7.5/5-23-2009
+=== 0.7.5/23may2009
 * 1 Bugfix:
   * fixed problem with parsing shebang lines
-=== 0.7.4/5-20-2009
+=== 0.7.4/20may2009
 * 2 Major Enhancements:
   * preliminary support for ruby 1.9
   * utf8 inputs should now work... more or less
@@ -31,7 +78,7 @@
   * added tag field to Token; I hope many flags can be coalesced into tag.
   * note line that all strings (and here docs) start and end on
-=== 0.7.3/4-19-2009
+=== 0.7.3/19apr2009
 * 9 Bugfixes:
   * remember whether comma was seen in paren context
   * reducing the warning load
@@ -56,7 +103,7 @@
   * various other little helper methods needed by redparse in Tokens
   * hack Rakefile so 'rake test' will stay in 1 process (keeps netbeans happy)
-=== 0.7.2/10-12-2008
+=== 0.7.2/12oct2008
 * 12 Minor Enhancements:
   * a new context for then kw expected
   * disable all backtracking when scanning string interiors
@@ -71,7 +118,7 @@
   * trying to make 'rake test' work right
   * certain other changes of no importance whatsoever
-=== 0.7.1/8-29-2008
+=== 0.7.1/28aug2008
 * 6 Major Enhancements:
   * handling of empty string fragments now more closely mirrors ruby; this resolves many warnings
   * yet more hacks in aid of string inclusions
@@ -161,7 +208,7 @@
   * offset problems in here head and body, symbol and fal tokens are always ignored (a hack)
   * tokentest has a --loop option, for load testing
-=== 0.7.0/2-15-2008
+=== 0.7.0/15feb2008
 * implicit tokens are now emitted at the right times (need more test code)
 * local variables are now temporarily hidden by class, module, and def
 * line numbers should always be correct now (=begin...=end caused this) (??)

data/Makefile ADDED

@@ -0,0 +1,43 @@
+name=RubyLexer
+lname=rubylexer
+gemname=rubylexer
+#everything after this line is generic
+version=$(shell ruby -r ./lib/$(lname)/version.rb -e "puts $(name)::VERSION")
+filelist=$(shell git ls-files)
+.PHONY: all test docs gem tar pkg email
+all: test
+test:
+	ruby -Ilib test/test_all.rb
+docs:
+	rdoc lib/*
+pkg: gem tar
+gem:
+	gem build $(lname).gemspec
+tar:
+	tar cf - $(filelist) | ( mkdir $(gemname)-$(version); cd $(gemname)-$(version); tar xf - )
+	tar czf $(gemname)-$(version).tar.gz $(gemname)-$(version)
+	rm -rf $(gemname)-$(version)
+email: README.txt History.txt
+	ruby -e ' \
+  require "rubygems"; \
+  load "./$(lname).gemspec"; \
+  spec= Gem::Specification.list.find{|x| x.name=="$(gemname)"}; \
+  puts "\
+Subject: [ANN] $(name) #{spec.version} Released \
+\n\n$(name) version #{spec.version} has been released! \n\n\
+#{Array(spec.homepage).map{|url| " * #{url}\n" }} \
+ \n\
+#{$(name)::Description} \
+\n\nChanges:\n\n \
+#{$(name)::Latest_changes} \
+"\
+'

data/lib/.document ADDED

@@ -0,0 +1,3 @@
+assert.rb
+rubylexer
+rubylexer.rb

data/lib/rubylexer.rb CHANGED

@@ -109,9 +109,6 @@ class RubyLexer
          #?\r => :newline, #implicitly escaped after op
          ?\\ => :escnewline,
-         ?\x00 => :eof,
-         ?\x04 => :eof,
-         ?\x1a => :eof,
          "[({" => :open_brace,
          "])}" => :close_brace,
@@ -119,7 +116,15 @@ class RubyLexer
          ?# => :comment,
-         NONASCII => :identifier,
+         ?\x00 => :eof,
+         ?\x04 => :eof,
+         ?\x1a => :eof,
+         ?\x01..?\x03 => :illegal_char,
+         ?\x05..?\x08 => :illegal_char,
+         ?\x0E..?\x19 => :illegal_char,
+         ?\x1b..?\x1F => :illegal_char,
+         ?\x7F => :illegal_char,
    }
    attr_reader :incomplete_here_tokens, :parsestack, :last_token_maybe_implicit
@@ -138,10 +143,14 @@ class RubyLexer
      def #{n}; #{n}; end
      def self.#{n}; @@#{n}; end
      "
-   }.to_s
+   }.join
    NEVERSTARTPARAMLISTWORDS=/\A(#{OPORBEGINWORDS}|#{INNERBOUNDINGWORDS}|#{BINOPWORDS}|end)((?:(?!#@@LETTER_DIGIT).)|\Z)/om
-   NEVERSTARTPARAMLISTFIRST=CharSet['aoeitrwu']  #chars that begin NEVERSTARTPARAMLIST
+   if ?A.is_a? String #ruby >= 1.9
+     NEVERSTARTPARAMLISTFIRST=/[aoeitrwu]/
+   else
+     NEVERSTARTPARAMLISTFIRST=CharSet['aoeitrwu']  #chars that begin NEVERSTARTPARAMLIST
+   end
    NEVERSTARTPARAMLISTMAXLEN=7     #max len of a NEVERSTARTPARAMLIST
 =begin
@@ -149,13 +158,13 @@ class RubyLexer
    utf8=String::PATTERN_UTF8 #or euc, or sjis...
    LCLETTER_U="(?>[a-z_]|#{utf8})"
    LETTER_U="(?>[A-Za-z_]|#{utf8})"
-   IDENTCHAR_U="(?>[A-Za-z_0-9]|#{utf8})"
+   LETTER_DIGIT_U="(?>[A-Za-z_0-9]|#{utf8})"
 =end
    #-----------------------------------
-   def initialize(filename,file,linenum=1,offset_adjust=0,options={:rubyversion=>1.8})
+   def initialize(filename,file,linenum=1,offset_adjust=0,options={})
       @offset_adjust=0 #set again in next line
-      super(filename,file, linenum,offset_adjust)
+      rulexer_initialize(filename,file, linenum,offset_adjust)
       @start_linenum=linenum
       @parsestack=[TopLevelContext.new]
       @incomplete_here_tokens=[] #not used anymore
@@ -168,16 +177,17 @@ class RubyLexer
       @enable_macro=nil
       @base_file=nil
       @progress_thread=nil
-      @rubyversion=options[:rubyversion]
+      @rubyversion=options[:rubyversion]||1.8
       @encoding=options[:encoding]||:detect
       @method_operators=if @rubyversion>=1.9
-                          /#{RUBYSYMOPERATORREX}|\A![=~]?\Z/o
+                          /#{RUBYSYMOPERATORREX}|\A![=~@]?/o
                         else
                           RUBYSYMOPERATORREX
                         end
-      @toptable=CharHandler.new(self, :illegal_char, CHARMAPPINGS)
+      @toptable=CharHandler.new(self, :identifier, CHARMAPPINGS)
+      extend RubyLexer1_9 if @rubyversion>=1.9
       read_leading_encoding
       start_of_line_directives
       progress_printer
@@ -203,11 +213,11 @@ class RubyLexer
    def read_leading_encoding
      return unless @encoding==:detect
      @encoding=:ascii
-     @encoding=:utf8 if @file.skip( /\xEF\xBB\xBF/ )   #bom
+     @encoding=:utf8 if @file.skip( "\xEF\xBB\xBF" )   #bom
      if @file.skip( /\A#!/ )
        loop do
          til_charset( /[\s\v]/ )
-         break if @file.match( / ([^-\s\v]|--[\s\v])/,4 )
+         break if @file.match( /^\n|[\s\v]([^-\s\v]|--?[\s\v])/,4 )
          if @file.skip( /.-K(.)/ )
            case $1
            when 'u'; @encoding=:utf8
@@ -243,8 +253,9 @@ class RubyLexer
      @localvars_stack.last
    end
+   attr_accessor :localvars_stack
    attr_accessor :in_def
-   attr :localvars_stack
    attr :offset_adjust
    attr_writer :pending_here_bodies
    attr :rubyversion
@@ -256,7 +267,7 @@ class RubyLexer
    #-----------------------------------
    def get1token
-      result=super  #most of the action's here
+      result=rulexer_get1token  #most of the action's here
       if ENV['PROGRESS']
       @last_cp_pos||=0
@@ -300,12 +311,12 @@ class RubyLexer
    #-----------------------------------
    def eof?
-     super or EoiToken===@last_operative_token
+     rulexer_eof? or EoiToken===@last_operative_token
    end
    #-----------------------------------
    def input_position
-     super+@offset_adjust
+     rulexer_input_position+@offset_adjust
    end
    #-----------------------------------
@@ -351,6 +362,7 @@ private
      return true if (defined? @in_def) and @in_def
      @parsestack.reverse_each{|ctx|
        ctx.starter=='def' and ctx.state!=:saw_def and return true
+       ctx.starter=='class' || ctx.starter=='module' and return false
      }
      return false
    end
@@ -389,7 +401,7 @@ private
       unless @moretokens.empty?
         case @moretokens.first
         when StillIgnoreToken
-        when NewlineToken: allow_eol or break
+        when NewlineToken; allow_eol or break
         else break
         end
       else
@@ -467,12 +479,9 @@ private
       if @parsestack.last.wantarrow and @rubyversion>=1.9 and @file.skip ":"
         @moretokens.push SymbolToken.new(str,oldpos), KeywordToken.new("=>",input_position-1)
       else
-        @moretokens.unshift(*parse_keywords(str,oldpos) do |tok|
-          #if not a keyword, decide if it should be var or method
-          case str
-            when FUNCLIKE_KEYWORDS; except=tok
-            when VARLIKE_KEYWORDS,RUBYKEYWORDS; raise "shouldnt see keywords here, now"
-          end
+        @moretokens.unshift(*parse_keywords(str,oldpos) do |tok,except|
+          #most callers of this block pass nothing(==nil) for except. only _keyword_funclike passes a true val
           was_last=@last_operative_token
           @last_operative_token=tok if tok
           normally=safe_recurse { |a| var_or_meth_name(str,was_last,oldpos,after_nonid_op?{true}) }
@@ -547,6 +556,7 @@ private
      #@defining_lvar is a hack
      @defining_lvar or case ctx=@parsestack.last
        #when ForSMContext; ctx.state==:for
+       when UnparenedParamListLhsContext;  /^(->|,|;)$/===lasttok.ident
        when RescueSMContext
          lasttok.ident=="=>" and @file.match?( /\A[\s\v]*([:;#\n]|then(?!#@@LETTER_DIGIT))/om )
        #when BlockParamListLhsContext; true
@@ -567,6 +577,7 @@ private
    #whitespace before but not after the 'operator' indicates it is to be considered a
    #value token instead. otherwise it is a binary operator. (unary (prefix) ops count
    #as 'values' here.)
+   #this is by far the ugliest method in RubyLexer.
    def var_or_meth_name(name,lasttok,pos,was_after_nonid_op)
      #look for call site if not a keyword or keyword is function-like
      #look for and ignore local variable names
@@ -579,7 +590,7 @@ private
        when /(?!#@@LETTER_DIGIT).$/o #do nothing
        when /^#@@LCLETTER/o
          (localvars===name or
-          VARLIKE_KEYWORDS===name or
+          #VARLIKE_KEYWORDS===name or
           was_in_lvar_define_state
          ) and not lasttok===/^(\.|::)$/
        when /^#@@UCLETTER/o
@@ -617,8 +628,9 @@ private
      #if next op is assignment (or comma in lvalue list)
      #then omit implicit parens
      assignment_coming=case nc=nextchar
-       when ?=;  not /^=[>=~]$/===readahead(2)
+       when ?=;  not( /^=[>=~]$/===readahead(2) )
        when ?,; comma_in_lvalue_list?
+       when (?; if @rubyversion>=1.9); ParenedParamListLhsContext===@parsestack.last
        when ?); last_context_not_implicit.lhs
        when ?i; /^in(?!#@@LETTER_DIGIT)/o===readahead(3) and
                   ForSMContext===last_context_not_implicit
@@ -645,7 +657,7 @@ private
        IMPLICIT_PARENS_BEFORE_ACCESSOR_ASSIGNMENT
      else
      case nc
-       when nil: 2
+       when nil; 2
        when ?!; /^![=~]$/===readahead(2) ? 2 : 1
        when ?d;
          if /^do((?!#@@LETTER_DIGIT)|$)/o===readahead(3)
@@ -761,7 +773,7 @@ private
                 !(KeywordToken===lasttok and /^(\.|::)$/===lasttok.ident)
          #only 1 param in list
          result.unshift ImplicitParamListStartToken.new(oldpos)
-         @parsestack.push ParamListContextNoParen.new(@linenum)
+         @parsestack.push KWParamListContextNoParen.new(@linenum)
        else
          arr,pass=*param_list_coming_with_2_or_more_params?
          result.push( *arr )
@@ -846,14 +858,14 @@ private
     result=[]
     ctx=@parsestack.last
     while klass=CONTEXT2ENDTOK_FOR_RESCUE[ctx.class]
-      break if AssignmentRhsContext===ctx && !ctx.multi_assign?
-      if ParamListContextNoParen===ctx && AssignmentRhsContext===@parsestack[-2]
-        result.push ImplicitParamListEndToken.new(input_position-str.length),
-                    AssignmentRhsListEndToken.new(input_position-str.length)
-          @parsestack.pop
-          @parsestack.pop
-        break
-      end
+#      break if AssignmentRhsContext===ctx && !ctx.multi_assign?
+#      if ParamListContextNoParen===ctx && AssignmentRhsContext===@parsestack[-2]
+#        result.push ImplicitParamListEndToken.new(input_position-str.length),
+#                    AssignmentRhsListEndToken.new(input_position-str.length)
+#          @parsestack.pop
+#          @parsestack.pop
+#        break
+#      end
       result << klass.new(input_position-str.length) #unless AssignmentRhsContext===ctx and !ctx.multi_assign?
       break if RescueSMContext===ctx #why is this here?
       @parsestack.pop
@@ -866,6 +878,7 @@ private
   CONTEXT2ENDTOK_FOR_DO={
     AssignmentRhsContext=>AssignmentRhsListEndToken,
     ParamListContextNoParen=>ImplicitParamListEndToken,
+    UnparenedParamListLhsContext=>KwParamListEndToken,
     ExpectDoOrNlContext=>1,
     #WhenParamListContext=>KwParamListEndToken,
     #RescueSMContext=>KwParamListEndToken
@@ -874,6 +887,17 @@ private
     #assert @moretokens.empty?
     result=[]
     while klass=CONTEXT2ENDTOK_FOR_DO[@parsestack.last.class]
+      if klass==AssignmentRhsListEndToken
+        i=@parsestack.size
+        end_the_assign=false
+        while AssignmentRhsContext===@parsestack[i-=1]
+          if CONTEXT2ENDTOK_FOR_DO[@parsestack[i-1].class] and
+             @parsestack[i-1].class!=AssignmentRhsContext
+               break end_the_assign=true
+          end
+        end
+        break unless end_the_assign
+      end
       break if klass==1
       result << klass.new(input_position-str.length)
       @parsestack.pop
@@ -917,19 +941,27 @@ private
    #-----------------------------------
    @@SPACES=/[\ \t\v\f\v]/
-   @@WSTOK=/\r?\n|\r*#@@SPACES+(?:#@@SPACES|\r(?!\n))*|\#[^\n]*\n|\\\r?\n|
-            ^=begin(?:[\s].*)?\n(?:(?!=end).*\n)*=end[\s\n].*\n/x
-   @@WSTOKS=/(?!=begin)#@@WSTOK+/o
-   def divide_ws(ws,offset)
+   @@WSTOK=/(?>
+               (?>\r?)\n|
+               (?>\r*)(?>#@@SPACES+)(?>(?:#@@SPACES|\r(?!\n))*)|
+               \#(?>[^\n]*)\n|
+               \\(?>\r?)\n|
+               ^=begin(?>(?>#@@SPACES.*)?)\n
+                 (?>(?:(?!=end)(?>.*)\n))*
+               =end(?>(?>#@@SPACES.*)?)\n
+            )/x
+   @@WSTOKS=/(?!=begin)(?>#@@WSTOK+)/o
+   def divide_ws(ws0,offset)
      result=[]
-     ws.scan(/\G#@@WSTOK/o){|ws|
+     ws0.scan(/\G#@@WSTOK/o){|ws|
        incr= $~.begin(0)
-       klass=case ws
-       when /\A[\#=]/; CommentToken
-       when /\n\Z/; EscNlToken
-       else WsToken
+       tok=case ws
+       when /\A[\#=]/; IgnoreToken.new(ws,offset+incr)
+       when /\n\Z/; EscNlToken.new(ws,offset+incr,@filename,@linenum)
+       else WsToken.new(ws,offset+incr)
        end
-       result << klass.new(ws,offset+incr)
+       result << tok
+       @linenum+=ws.count "\n"
      }
      result.each_with_index{|ws,i|
        if WsToken===ws
@@ -939,7 +971,22 @@ private
      return result
    end
+   #-----------------------------------
+   #lex tokens until a predefined end token is found.
+   #returns a list of tokens seen.
+   def read_arbitrary_expression(&endcondition)
+     result=[]
+     oldsize=@parsestack.size
+     safe_recurse{
+       tok=nil
+       until endcondition[tok,@parsestack[oldsize+1..-1]||[]] and @parsestack.size==oldsize
+         tok=get1token
+         result<<tok
+         EoiToken===tok and break lexerror( tok, "unexpected eof" )
+       end
+     }
+     result
+   end
    #-----------------------------------
    #parse keywords now, to prevent confusion over bare symbols
@@ -950,7 +997,7 @@ private
       assert !(KeywordToken===@last_operative_token and /A(\.|::|def)\Z/===@last_operative_token.ident)
       result=[KeywordToken.new(str,offset)]
-      m="keyword_#{str}"
+      m=:"keyword_#{str}"
       respond_to?(m) ? (send m,str,offset,result,&block) : block[MethNameToken.new(str)]
    end
    public #these have to be public so respond_to? can see them (sigh)
@@ -977,29 +1024,39 @@ private
    def keyword_module(str,offset,result)
          result.first.has_end!
          @parsestack.push WantsEndContext.new(str,@linenum)
-         @localvars_stack.push SymbolTable.new
          offset=input_position
-         @file.scan(/\A(#@@WSTOKS)?(::)?/o)
-         md=@file.last_match
-         all,ws,dc=*md
-         fail if all.empty?
-         @moretokens.concat divide_ws(ws,offset) if ws
-         @moretokens.push KeywordToken.new('::',offset+md.end(0)-2) if dc
-         loop do
-           offset=input_position
-           @file.scan(/\A(#@@WSTOKS)?(#@@UCLETTER#@@LETTER_DIGIT*)(::)?/o)
+         assert @moretokens.empty?
+         tokens=[]
+         if @file.scan(/\A(#@@WSTOKS)?(#@@UCLETTER#@@LETTER_DIGIT*)(?=[#{WHSP}]+(?:[^(])|[#;\n]|::)/o)
            md=@file.last_match
-           all,ws,name,dc=*md
-           if ws
-             @moretokens.concat divide_ws(ws,offset)
-             incr=ws.size
+           all,ws,name=*md
+           tokens.concat divide_ws(ws,md.begin(1)) if ws
+           tokens.push VarNameToken.new(name,md.begin(2))
+         end
+         tokens.push( *read_arbitrary_expression{|tok,extra_contexts|
+           #@file.check /\A(\n|;|::|end(?!#@@LETTER_DIGIT)|(#@@UCLETTER#@@LETTER_DIGIT*)(?!(#@@WSTOKS)?::))/o
+           @file.check( /\A(\n|;|end(?!#@@LETTER_DIGIT))/o ) or
+             @file.check("::") && extra_contexts.all?{|ctx| ImplicitParamListContext===ctx } &&
+               @moretokens.push(*abort_noparens!)
+         } ) if !name #or @file.check /#@@WSTOKS?::/o
+         @moretokens[0,0]=tokens
+         @localvars_stack.push SymbolTable.new
+         while @file.check( /\A::/ )
+               #VarNameToken===@moretokens.last or
+               #KeywordToken===@moretokens.last && @moretokens.last.ident=="::"
+           @file.scan(/\A(#@@WSTOKS)?(::)?(#@@WSTOKS)?(#@@UCLETTER#@@LETTER_DIGIT*)/o) or break
+           md=@file.last_match
+           all,ws1,dc,ws2,name=*md
+           if ws1
+             @moretokens.concat divide_ws(ws1,md.begin(1))
+             incr=ws1.size
            else
              incr=0
            end
-           @moretokens.push VarNameToken.new(name,offset+incr)
-           break unless dc
-           @moretokens.push NoWsToken.new(offset+md.end(0)-2)
-           @moretokens.push KeywordToken.new('::',offset+md.end(0)-2)
+           @moretokens.push NoWsToken.new(md.begin(2)) if dc
+           @moretokens.push KeywordToken.new('::',md.begin(2)) if dc
+           @moretokens.concat divide_ws(ws2,md.begin(3)) if ws2
+           @moretokens.push VarNameToken.new(name,md.begin(4))
          end
          @moretokens.push EndHeaderToken.new(input_position)
          return result
@@ -1071,8 +1128,7 @@ private
          else
             result.last.has_end!
             if BlockContext===ctx and ctx.wanting_stabby_block_body
-              ctx.wanting_stabby_block_body=false
-              ctx.starter,ctx.ender="do","end"
+              @parsestack[-1]= WantsEndContext.new(str,@linenum)
             else
               @parsestack.push WantsEndContext.new(str,@linenum)
               localvars.start_block
@@ -1107,8 +1163,8 @@ private
                 result << tok
               end until  parencount==0 #@parsestack.size==old_size
               @localvars_stack.push SymbolTable.new
-           else #no parentheses, all tail
-             set_last_token KeywordToken.new(".") #hack hack
+            else #no parentheses, all tail
+              set_last_token KeywordToken.new(".") #hack hack
               tokindex=result.size
               result << tok=symbol(false,false)
               name=tok.to_s
@@ -1118,7 +1174,7 @@ private
               maybe_local=case name
                 when /(?!#@@LETTER_DIGIT).$/o; #do nothing
                 when /^[@$]/; true
-                when VARLIKE_KEYWORDS,FUNCLIKE_KEYWORDS; ty=KeywordToken
+                when VARLIKE_KEYWORDS,FUNCLIKE_KEYWORDS,("__ENCODING__" if @rubyversion>=1.9); ty=KeywordToken
                 when /^#@@LCLETTER/o;  localvars===name
                 when /^#@@UCLETTER/o; is_const=true  #this is the right algorithm for constants...
               end
@@ -1164,6 +1220,7 @@ private
                nc=(@moretokens.empty? ? nextchar.chr : @moretokens.first.to_s[0,1])
                if state==:expect_op and /^(?:#@@LETTER|[(&*])/o===nc
                   ctx.state=:def_param_list
+                  ctx.has_parens= '('==nc
                   list,listend=def_param_list
                   result.concat list
                   end_index=result.index(listend)
@@ -1271,10 +1328,19 @@ private
    def keyword_rescue(str,offset,result)
          unless after_nonid_op? {false}
+           result.replace []
            #rescue needs to be treated differently when in operator context...
            #i think no RescueSMContext should be pushed on the stack...
-           result.first.set_infix!            #plus, the rescue token should be marked as infix
-           result.unshift(*abort_noparens_for_rescue!(str))
+           tok=OperatorToken.new(str,offset)
+           tok.unary=false           #plus, the rescue token should be marked as infix
+           if AssignmentRhsContext===@parsestack.last
+             tok.as="rescue3"
+             @parsestack.pop #end rhs context
+             result.push AssignmentRhsListEndToken.new(offset) #end rhs token
+           else
+             result.concat abort_noparens_for_rescue!(str)
+           end
+           result.push tok
          else
            result.push KwParamListStartToken.new(offset+str.length)
            #corresponding EndToken emitted by abort_noparens! on leaving rescue context
@@ -1349,12 +1415,31 @@ private
          return result
    end
+   def keyword___FILE__(str,offset,result)
+     result.last.value=@filename
+     return result
+   end
+   def keyword___LINE__(str,offset,result)
+     result.last.value=@linenum
+     return result
+   end
+   module RubyLexer1_9
+     def keyword___ENCODING__(str,offset,result)
+       #result.last.value=huh
+       return result
+     end
+     def keyword_not(*args,&block) _keyword_funclike(*args,&block) end
+   end
    def _keyword_funclike(str,offset,result)
          if @last_operative_token===/^(\.|::)$/
            result=yield MethNameToken.new(str) #should pass a methname token here
          else
-           result=yield KeywordToken.new(str)
+           tok=KeywordToken.new(str)
+           result=yield tok,tok
          end
          return result
    end
@@ -1366,10 +1451,12 @@ private
          #do nothing
          return result
    end
-   for kw in VARLIKE_KEYWORDLIST+["defined?", "not"] do
+   for kw in VARLIKE_KEYWORDLIST-["__FILE__","__LINE__"]+["defined?", "not"] do
      alias_method "keyword_#{kw}".to_sym, :_keyword_varlike
    end
    private
    #-----------------------------------
@@ -1453,6 +1540,7 @@ end
          elsif starter==?(
            ctx_type=UnparenedParamListLhsContext #hacky... should be a param?
            @parsestack.push ctx_type.new(@linenum)
+           a<<KwParamListStartToken.new( input_position )
          end
          set_last_token KeywordToken.new( ';' )
@@ -1493,16 +1581,45 @@ end
             #parsestack was changed by get1token above...
             normal_comma_level+=1
             assert(normal_comma_level==@parsestack.size)
-            endingblock=proc{|tok| tok===')' }
+            endingblock=proc{|tok2| tok2===')' }
          else
-            endingblock=proc{|tok| tok===';' or NewlineToken===tok}
+            endingblock=proc{|tok2| tok2===';' or NewlineToken===tok2}
          end
          class << endingblock
             alias === call
          end
+         listend=method_parameters(result,normal_comma_level,endingblock,old_parsestack_size)
+         @defining_lvar=false
+         @parsestack.last.see self,:semi
+         assert(@parsestack.size <= old_parsestack_size)
+         #hack: force next token to look like start of a
+         #new stmt, if the last ignored_tokens
+         #call above did not find a newline
+         #(just in case the next token parsed
+         #happens to call quote_expected? or after_nonid_op)
+         result.concat ignored_tokens
+#         if  !eof? and nextchar.chr[/[iuw\/<|>+\-*&%?:({]/] and
+#             !(NewlineToken===@last_operative_token) and
+#             !(/^(end|;)$/===@last_operative_token)
+           #result<<EndHeaderToken.new(result.last.offset+result.last.to_s.size)
+           set_last_token KeywordToken.new( ';' )
+           result<< get1token
+#         end
+      }
+      return result,listend
+   end
+   #-----------------------------------
+   #read local parameter names in method definition
+   def method_parameters(result,normal_comma_level,endingblock,old_parsestack_size)
+         listend=nil
          set_last_token KeywordToken.new( ',' )#hack
-         #read local parameter names
          nextvar=nil
          loop do
             expect_name=(@last_operative_token===',' and
@@ -1533,7 +1650,7 @@ end
 #                  assert !nextvar
                   nextvar=tok.ident
                   localvars[nextvar]=false #remove nextvar from list of local vars for now
-                when /^[&*]$/.token_pat #unary form...
+                when /^[&*(]$/.token_pat #unary form...
                   #a NoWsToken is also expected... read it now
                   result.concat maybe_no_ws_token #not needed?
                   set_last_token KeywordToken.new( ',' )
@@ -1553,32 +1670,9 @@ end
               end
             end
          end
-         @defining_lvar=false
-         @parsestack.last.see self,:semi
-         assert(@parsestack.size <= old_parsestack_size)
-         assert(endingblock[tok] || ErrorToken===tok)
-         #hack: force next token to look like start of a
-         #new stmt, if the last ignored_tokens
-         #call above did not find a newline
-         #(just in case the next token parsed
-         #happens to call quote_expected? or after_nonid_op)
-         result.concat ignored_tokens
-#         if  !eof? and nextchar.chr[/[iuw\/<|>+\-*&%?:({]/] and
-#             !(NewlineToken===@last_operative_token) and
-#             !(/^(end|;)$/===@last_operative_token)
-           #result<<EndHeaderToken.new(result.last.offset+result.last.to_s.size)
-           set_last_token KeywordToken.new( ';' )
-           result<< get1token
-#         end
-      }
-      return result,listend
+         return listend
    end
    #-----------------------------------
    #handle % in ruby code. is it part of fancy quote or a modulo operator?
    def percent(ch)
@@ -1630,7 +1724,13 @@ end
    def char_literal_or_op(ch)
       if colon_quote_expected? ch
          getchar
-         NumberToken.new getchar_maybe_escape
+         if @rubyversion >= 1.9
+           StringToken.new getchar_maybe_escape
+         else
+           ch=getchar_maybe_escape[0]
+           ch=ch.ord if ch.respond_to? :ord
+           NumberToken.new ch
+         end
       else
          @parsestack.push TernaryContext.new(@linenum)
          KeywordToken.new getchar   #operator
@@ -1825,7 +1925,7 @@ end
            context=merge_assignment_op_in_setter_callsites? ? ?: : nc
            return [identifier_as_string(context), start]
          when ?(
-           return [nil,start] if @enable_macro
+           return [nil,start] if @enable_macro or @rubyversion>=1.9
      end
      set_last_token KeywordToken.new(';')
@@ -1853,7 +1953,7 @@ end
       res= HerePlaceholderToken.new( dash, quote, ender, quote_real )
 if true
-      res.open=["<<",dash,quote,ender,quote].to_s
+      res.open=["<<",dash,quote,ender,quote].join
       procrastinated=til_charset(/[\n]/)#+readnl
       unless @base_file
         @base_file=@file
@@ -1979,7 +2079,7 @@ end
       @offset_adjust=@min_offset_adjust
       @moretokens.push( *optional_here_bodies )
       ln=@linenum
-      @moretokens.push lexerror(EscNlToken.new(@filename,ln-1,result,input_position-result.size), error),
+      @moretokens.push lexerror(EscNlToken.new(result,input_position-result.size,@filename,ln-1), error),
                        FileAndLineToken.new(@filename,ln,input_position)
       start_of_line_directives
@@ -1995,7 +2095,7 @@ if true
       pos=input_position
       while body=@pending_here_bodies.shift
         #body.offset=pos
-        result.push EscNlToken.new(@filename,nil,"\n",body.offset-1)
+        result.push EscNlToken.new("\n",body.offset-1,@filename,nil)
         result.push FileAndLineToken.new(@filename,body.ident.line,body.offset)
         result.push body
         #result.push NoWsToken.new @pending_here_bodies.empty? ? input_position : @pending_here_bodies.first
@@ -2146,25 +2246,25 @@ end
             !@last_operative_token.infix?)  ||
            !after_nonid_op?{false}
-      hard=false if @rubyversion>=1.9 and @file.check /\A\n(?:#@@WSTOKS)?\.[^.]/o
+      hard=false if @rubyversion>=1.9 and @file.check( /\A\n(?:#@@WSTOKS)?[.:][^.:]/o )
       if hard
         @offset_adjust=@min_offset_adjust
         a= abort_noparens!
         case @parsestack.last  #these should be in the see:semi handler
-          when ExpectDoOrNlContext: @parsestack.pop
-          when ExpectThenOrNlContext: @parsestack.pop
+          when ExpectDoOrNlContext; @parsestack.pop
+          when ExpectThenOrNlContext; @parsestack.pop
         end
         assert !@parsestack.empty?
         @parsestack.last.see self,:semi
-        a << super(ch)
+        a << rulexer_newline(ch)
         @moretokens.replace a+@moretokens
       else
         @offset_adjust=@min_offset_adjust
         offset= input_position
         nl=readnl
-        @moretokens.push EscNlToken.new(@filename,@linenum-1,nl,offset),
+        @moretokens.push EscNlToken.new(nl,offset,@filename,@linenum-1),
            FileAndLineToken.new(@filename,@linenum,input_position)
       end
@@ -2222,15 +2322,15 @@ end
          begin
            eof? and raise "eof before =end"
-           more<< til_charset(/[\r\n]/)
+           more<< til_charset(/\n/)
            eof? and raise "eof before =end"
            more<< readnl
          end until readahead(EQENDLENGTH)==EQEND
          #read rest of line after =end
-         more << til_charset(/[\r\n]/)
-         assert((eof? or ?\r===nextchar or ?\n===nextchar))
-         assert !(/[\r\n]/===more[-1,1])
+         more << til_charset(/\n/)
+         assert((eof? or ?\n===nextchar))
+         assert !(/\n/===more[-1,1])
          more<< readnl unless eof?
 #         newls= more.scan(/\r\n?|\n\r?/)
@@ -2311,8 +2411,8 @@ end
             return yield
          when StringToken, SymbolToken, NumberToken, HerePlaceholderToken,
               %r{^(
-                end|self|true|false|nil|
-                __FILE__|__LINE__|[\})\]]
+                end|self|true|false|nil|->|
+                __FILE__|__LINE__|__ENCODING__|[\})\]]
               )$}x.token_pat
             #dunno about def/undef
             #maybe class/module shouldn't he here either?
@@ -2399,7 +2499,7 @@ end
       result= operator_or_methname_token( result)
       result.offset=oldpos
       return result
-   end
+    end
    #-----------------------------------
    def tilde(ch) #match ~
@@ -2426,20 +2526,22 @@ end
    #could be beginning of number, too
    #fixme: handle +@ and -@ here as well... (currently, this is done in symbol()?)
    def plusminus(ch)
+      pos=input_position
       assert(/^[+\-]$/===ch)
       if unary_op_expected?(ch) or
          KeywordToken===@last_operative_token &&
          /^(return|break|next)$/===@last_operative_token.ident
         if (?0..?9)===readahead(2)[1]
-          return number(ch)
+          result= number(ch)
         elsif @rubyversion>=1.9 and '->' == readahead(2) #stabby proc
+          @file.pos+=2
           #push down block context
           localvars.start_block
           @parsestack.push ctx=BlockContext.new(@linenum)
           ctx.wanting_stabby_block_body=true
           #read optional proc params
           block_param_list_lookahead ?(, ParenedParamListLhsContext
+          result=KeywordToken.new('->',pos)
         else #unary operator
           result=getchar
@@ -2456,6 +2558,7 @@ end
          end
          result=(operator_or_methname_token result)
       end
+      result.offset=pos
       return result
    end
@@ -2485,14 +2588,16 @@ end
           #ruby delays adding lvars from regexps to known lvars table
           #for several tokens in some cases. not sure why or if on purpose
           #i'm just going to add them right away
-          localvars.concat last.lvars
+          last.lvars.each{|lvar| localvars[lvar]=true }
         end
       when '' #plain assignment: record local variable definitions
         last_context_not_implicit.lhs=false
+        @last_operative_token=result
         @moretokens.push( *ignored_tokens(true).map{|x|
-          NewlineToken===x ? EscNlToken.new(@filename,@linenum,x.ident,x.offset) : x
+          NewlineToken===x ? EscNlToken.new(x.ident,x.offset,@filename,@linenum) : x
         } )
         @parsestack.push AssignmentRhsContext.new(@linenum)
+        @moretokens.push AssignmentRhsListStartToken.new( input_position)
         if eat_next_if ?*
           tok=OperatorToken.new('*', input_position-1)
           tok.tag=:unary
@@ -2501,7 +2606,6 @@ end
             @moretokens << NoWsToken.new(input_position)
           comma_in_lvalue_list? #is this needed?
         end
-        @moretokens.push AssignmentRhsListStartToken.new( input_position)
       end
       return result
    end
@@ -2513,13 +2617,15 @@ end
       k=eat_next_if(/[~=]/)
       if k
         result+=k
-      elsif eof?: #do nothing
+      elsif eof? or WHSPLF[nextchar.chr] #do nothing
       else
-        WHSPLF[nextchar.chr] or
-          @moretokens << NoWsToken.new(input_position)
+        @moretokens << NoWsToken.new(input_position)
       end
-      return KeywordToken.new(result, input_position-result.size)
-      #result should distinguish unary !
+      ty= @rubyversion>=1.9 ? OperatorToken : KeywordToken
+      result=ty.new(result, input_position-result.size)
+      result.unary=!k #result should distinguish unary !
+      return result
    end
@@ -2565,7 +2671,7 @@ if false
    def comment(str)
      result=""
      #loop{
-       result<< super(nil).to_s
+       result<< rulexer_comment(nil).to_s
        if /^\#.*\#$/===result #if comment was ended by a crunch
@@ -2645,20 +2751,41 @@ end
       when '{'
       #check if we are in a hash literal or string inclusion (#{}),
       #in which case below would be bad.
-      if after_nonid_op?{false} or @last_operative_token.has_no_block?
+      if  !(UnparenedParamListLhsContext===@parsestack.last) and
+          after_nonid_op?{false} || @last_operative_token.has_no_block?
         @parsestack.push ListImmedContext.new(ch,@linenum) #that is, a hash
       else
         #abort_noparens!
         tokch.set_infix!
         tokch.as="do"
-#=begin not needed now, i think
+        #if (perhaps deep) inside a stabby block param list context, end it
+        if @rubyversion>=1.9
+          stabby_params_just_ended=false
+          (@parsestack.size-1).downto(1){|i|
+            case @parsestack[i]
+            when ParamListContextNoParen,AssignmentRhsContext
+              #do nothing yet... see if inside a UnparenedParamListLhsContext
+            when UnparenedParamListLhsContext #stabby proc
+              @moretokens<<tokch
+              (@parsestack.size-1).downto(i){|j|
+                @moretokens.unshift @parsestack[j].endtoken(input_position-1)
+              }
+              @parsestack[i..-1]=[]
+              tokch=@moretokens.shift
+              stabby_params_just_ended=true
+              break
+            else break
+            end
+          }
+        end
         # 'need to find matching callsite context and end it if implicit'
         lasttok=last_operative_token
-        if !(lasttok===')' and lasttok.callsite?) #or ParamListContextNoParen===parsestack.last
+        if !(lasttok===')' and lasttok.callsite?) and !stabby_params_just_ended #or ParamListContextNoParen===parsestack.last
           @moretokens.push( *(abort_1_noparen!(1).push tokch) )
           tokch=@moretokens.shift
         end
-#=end
         if BlockContext===@parsestack.last and @parsestack.last.wanting_stabby_block_body
           @parsestack.last.wanting_stabby_block_body=false
@@ -2719,7 +2846,7 @@ end
    #-----------------------------------
    def endoffile_detected(s='')
-     @moretokens.push( *(abort_noparens!.push super(s)))
+     @moretokens.push( *(abort_noparens!.push rulexer_endoffile_detected(s)))
      if @progress_thread
        @progress_thread.kill
        @progress_thread=nil
@@ -2731,32 +2858,37 @@ end
   #-----------------------------------
   def single_char_token(ch)
-    KeywordToken.new super(ch), input_position-1
+    KeywordToken.new rulexer_single_char_token(ch), input_position-1
   end
   #-----------------------------------
   def comma(ch)
     @moretokens.push token=single_char_token(ch)
-    #if assignment rhs seen inside method param list, when param list, array or hash literal,
-    #       rescue where comma is expected, or method def param list
-    #          then end the assignment rhs now
-       #+[OBS,ParamListContext|ParamListContextNoParen|WhenParamListContext|ListImmedContext|
+    case @parsestack[-1]
+    when AssignmentRhsContext;
+       token.tag=:rhs
+      #if assignment rhs seen inside method param list, when param list,
+      #       array or hash literal, rescue where comma is expected, method def param list,
+      #       or another right hand side
+      #          then end the assignment rhs now
+       #+[OBS,ParamListContext|ParamListContextNoParen|WhenParamListContext|ListImmedContext|AssignmentRhsContext|
        #      (RescueSMContext&-{:state=>:rescue})|(DefContext&-{:in_body=>FalseClass|nil}),
        #  AssignmentRhsContext
        #]===@parsestack
-    if AssignmentRhsContext===@parsestack[-1] and
-       ParamListContext===@parsestack[-2] ||
-       ParamListContextNoParen===@parsestack[-2] ||
-       WhenParamListContext===@parsestack[-2] ||
-       ListImmedContext===@parsestack[-2] ||
-       (RescueSMContext===@parsestack[-2] && @parsestack[-2].state==:rescue) ||
-       (DefContext===@parsestack[-2] && !@parsestack[-2].in_body)
+       while AssignmentRhsContext===@parsestack[-1]
+         pop=
+           case @parsestack[-2]
+           when ParamListContext,ParamListContextNoParen,WhenParamListContext,
+                ListImmedContext,AssignmentRhsContext; true
+           when RescueSMContext; @parsestack[-2].state==:rescue
+           when DefContext; !@parsestack[-2].in_body and !@parsestack[-2].has_parens?
+           else false
+           end
+         break unless pop
          @parsestack.pop
-         @moretokens.unshift AssignmentRhsListEndToken.new(input_position)
-    end
-    case @parsestack[-1]
-    when AssignmentRhsContext; token.tag=:rhs
+         @moretokens.unshift AssignmentRhsListEndToken.new(input_position-1)
+       end
     when ParamListContext,ParamListContextNoParen; #:call
     when ListImmedContext; #:array
     when BlockParamListLhsContext,UnparenedParamListLhsContext; #:block or stabby proc
@@ -2800,7 +2932,7 @@ end
   #-----------------------------------
   #tokenify_results_of  :identifier
   save_offsets_in(*CHARMAPPINGS.values.uniq-[
-    :symbol_or_op,:open_brace,:whitespace,:exclam,:backquote,:caret
+    :symbol_or_op,:open_brace,:whitespace,:exclam,:backquote,:caret,:plusminus
   ])
   #save_offsets_in :symbol