RubyGems - rubylexer - Versions diffs - 0.7.0 → 0.7.1 - Mend

rubylexer 0.7.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (80) hide show

data/History.txt +90 -0
data/Manifest.txt +54 -3
data/README.txt +4 -7
data/Rakefile +3 -2
data/lib/rubylexer.rb +856 -323
data/lib/rubylexer/0.7.0.rb +11 -2
data/lib/rubylexer/0.7.1.rb +2 -0
data/lib/rubylexer/charhandler.rb +4 -4
data/lib/rubylexer/context.rb +86 -9
data/lib/rubylexer/rulexer.rb +455 -101
data/lib/rubylexer/token.rb +166 -43
data/lib/rubylexer/tokenprinter.rb +16 -8
data/lib/rubylexer/version.rb +1 -1
data/rubylexer.vpj +98 -0
data/test/code/all_the_gems.rb +33 -0
data/test/code/all_the_raas.rb +226 -0
data/test/code/all_the_rubies.rb +2 -0
data/test/code/deletewarns.rb +19 -1
data/test/code/dumptokens.rb +39 -8
data/test/code/errscan +2 -0
data/test/code/isolate_error.rb +72 -0
data/test/code/lexloop +14 -0
data/test/code/locatetest.rb +150 -8
data/test/code/regression.rb +109 -0
data/test/code/rubylexervsruby.rb +53 -15
data/test/code/strgen.rb +138 -0
data/test/code/tarball.rb +144 -0
data/test/code/testcases.rb +11 -0
data/test/code/tokentest.rb +115 -24
data/test/data/__eof2.rb +1 -0
data/test/data/__eof5.rb +2 -0
data/test/data/__eof6.rb +2 -0
data/test/data/cvtesc.rb +17 -0
data/test/data/g.rb +6 -0
data/test/data/hd0.rb +3 -0
data/test/data/hdateof.rb +2 -0
data/test/data/hdempty.rb +3 -0
data/test/data/hdr.rb +9 -0
data/test/data/hdr_dos.rb +13 -0
data/test/data/hdr_dos2.rb +18 -0
data/test/data/heart.rb +2 -0
data/test/data/here_escnl.rb +25 -0
data/test/data/here_escnl_dos.rb +20 -0
data/test/data/here_squote.rb +3 -0
data/test/data/heremonsters.rb +140 -0
data/test/data/heremonsters.rb.broken +68 -0
data/test/data/heremonsters.rb.broken.save +68 -0
data/test/data/heremonsters_dos.rb +140 -0
data/test/data/heremonsters_dos.rb.broken +68 -0
data/test/data/illegal_oneliners.rb +1 -0
data/test/data/illegal_stanzas.rb +0 -0
data/test/data/make_ws_strdelim.rb +22 -0
data/test/data/maven2_builer_test.rb +82 -0
data/test/data/migration.rb +8944 -0
data/test/data/modl.rb +6 -0
data/test/data/modl_dos.rb +7 -0
data/test/data/modl_fails.rb +10 -0
data/test/data/multilinestring.rb +6 -0
data/test/data/oneliners.rb +555 -0
data/test/data/p-op.rb +2 -0
data/test/data/p.rb +3 -1710
data/test/data/s.rb +90 -21
data/test/data/simple.rb +1 -0
data/test/data/simple_dos.rb +1 -0
data/test/data/stanzas.rb +1194 -0
data/test/data/strdelim_crlf.rb +6 -0
data/test/data/stuff.rb +6 -0
data/test/data/stuff2.rb +5 -0
data/test/data/stuff3.rb +6 -0
data/test/data/stuff4.rb +6 -0
data/test/data/tkweird.rb +20 -0
data/test/data/unending_stuff.rb +5 -0
data/test/data/whatnot.rb +8 -0
data/test/data/ws_strdelim.rb +0 -0
data/test/test.sh +239 -0
data/testing.txt +39 -50
metadata +110 -12
data/test/code/dl_all_gems.rb +0 -43
data/test/code/unpack_all_gems.rb +0 -15
data/test/data/gemlist.txt +0 -280

data/lib/rubylexer/0.7.0.rb CHANGED Viewed

@@ -1,2 +1,11 @@
-require 'rubylexer'
-#nothing else (yet)
+require 'rubylexer/0.7.1'
+class RubyLexer
+  IMPLICIT_PARENS_BEFORE_ACCESSOR_ASSIGNMENT=0
+  DECIMAL_INT_INTERP=:to_i
+  ARBITRARY_INT_INTERP=:oct
+  AUTO_UNESCAPE_STRINGS=true
+end

data/lib/rubylexer/0.7.1.rb ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ require 'rubylexer'
2	+ #nothing else (yet)

data/lib/rubylexer/charhandler.rb CHANGED Viewed

@@ -1,6 +1,6 @@
-=begin copyright
+=begin legal crap
     rubylexer - a ruby lexer written in ruby
-    Copyright (C) 2004,2005  Caleb Clausen
+    Copyright (C) 2004,2005,2008  Caleb Clausen
     This library is free software; you can redistribute it and/or
     modify it under the terms of the GNU Lesser General Public
@@ -59,8 +59,8 @@ class CharHandler
     assert !frozen?
     @table[b]=action
-    @matcher<<?\\ if CHARSETSPECIALS===b
-    @matcher<<b
+    @matcher << ?\\ if CHARSETSPECIALS===b
+    @matcher << b
   end
   private :[]=

data/lib/rubylexer/context.rb CHANGED Viewed

@@ -1,3 +1,23 @@
+=begin legal crap
+    rubylexer - a ruby lexer written in ruby
+    Copyright (C) 2008  Caleb Clausen
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+=end
 class RubyLexer
 module NestedContexts
   class NestedContext
@@ -18,6 +38,8 @@ module NestedContexts
     def lhs=*x; end #do nothing
   end
+  #contexts which expect to see commas,
+  #(other than assignment lhs, which has no context)
   class ListContext < NestedContext
   end
@@ -41,6 +63,12 @@ module NestedContexts
     end
   end
+  class BeginEndContext  < NestedContext
+    def initialize(str,linenum)
+      super('{','}',linenum)
+    end
+  end
 #  class BlockParamListContext  < ListContext
 #    def initialize(linenum)
 #      super('|','|',linenum)
@@ -67,7 +95,7 @@ module NestedContexts
     def starter; '|' end
     def ender; '|' end
   end
   class ImplicitContext < ListContext
   end
@@ -78,6 +106,9 @@ module NestedContexts
     def lhs; false end
   end
+  class KWParamListContextNoParen < ParamListContextNoParen
+  end
   class WhenParamListContext < ImplicitContext
     def initialize(starter,linenum)
       super(starter,nil,linenum)
@@ -94,16 +125,54 @@ module NestedContexts
     def initialize(linenum)
       super(nil,nil,linenum)
     end
+    def see lxr,msg
+      case msg
+      when :semi; lxr.parsestack.pop
+      when :comma,:splat; @multi=true
+      end
+    end
+    def multi_assign?; @multi end
   end
   class WantsEndContext < NestedContext
     def initialize(starter,linenum)
       super(starter,'end',linenum)
     end
+    attr_accessor :state
     def see lxr,msg
-      msg==:rescue ? lxr.parsestack.push_rescue_sm : super
+      msg==:rescue and lxr.parsestack.push_rescue_sm
+    end
+  end
+  class ClassContext < WantsEndContext
+    def see(lxr,msg)
+      if msg==:semi and @state!=:semi
+        lxr.localvars_stack.push SymbolTable.new
+        @state=:semi
+      else
+        super
+      end
+    end
+  end
+  class DefContext < WantsEndContext
+    def initialize(linenum)
+      super('def', linenum)
+      @in_body=false
+    end
+    def see(lxr,msg)
+      if msg==:semi and @state!=:semi
+        @in_body=true
+        @state=:semi
+      else
+        super
+      end
     end
+    attr :in_body
   end
   class StringContext < NestedContext #not used yet
@@ -125,13 +194,19 @@ module NestedContexts
   end
-  class RescueSMContext < NestedContext
+  class RescueSMContext < ListContext
     #normal progression: rescue => arrow => then
     EVENTS=[:rescue,:arrow,:then,:semi,:colon]
-    LEGAL_SUCCESSORS={nil=> [:rescue], :rescue => [:arrow,:then,:semi,:colon],:arrow => [:then,:semi,:colon],:then => [nil]}
-    #note on :semi and :colon events: in arrow state (and only then),
+    LEGAL_SUCCESSORS={
+      nil=> [:rescue],
+      :rescue => [:arrow,:then,:semi,:colon],
+      :arrow => [:then,:semi,:colon],
+      :then => []
+    }
+    #note on :semi and :colon events:
     #      (unescaped) newline, semicolon, and (unaccompanied) colon
-    #      also trigger the :then event. otherwise, they are ignored.
+    #      also trigger the :then event. they are ignored if in :then
+    #      state already.
     attr :state
     def initialize linenum
@@ -153,6 +228,7 @@ module NestedContexts
         msg=:then
         self.equal? stack.pop or raise 'syntax error: then not expected at this time'
                   #pop self off owning context stack
+      when :comma, :splat: return
       else super
       end
       LEGAL_SUCCESSORS[@state].include? msg or raise "rescue syntax error: #{msg} unexpected in #@state"
@@ -161,10 +237,10 @@ module NestedContexts
   end
-  class ForSMContext < NestedContext
+  class ForSMContext < ImplicitLhsContext
     #normal progression: for => in
     EVENTS=[:for,:in]
-    LEGAL_SUCCESSORS={nil=> :for, :for => :in,:in => nil}
+    LEGAL_SUCCESSORS={nil=> [:for], :for => [:in],:in => []}
     #note on :semi and :colon events: in :in state (and only then),
     #      (unescaped) newline, semicolon, and (unaccompanied) colon
     #      also trigger the :then event. otherwise, they are ignored.
@@ -185,9 +261,10 @@ module NestedContexts
       when :in:  self.equal? stack.pop or raise 'syntax error: in not expected at this time'
                  stack.push ExpectDoOrNlContext.new("for",/(do|;|:|\n)/,@linenum)
                  #pop self off owning context stack and push ExpectDoOrNlContext
+      when :comma, :splat: return
       else super
       end
-      LEGAL_SUCCESSORS[@state] == msg or raise "for syntax error: #{msg} unexpected in #@state"
+      LEGAL_SUCCESSORS[@state].include? msg or raise "for syntax error: #{msg} unexpected in #@state"
       @state=msg
     end
   end

data/lib/rubylexer/rulexer.rb CHANGED Viewed

@@ -1,6 +1,6 @@
-=begin copyright
+=begin legal crap
     rubylexer - a ruby lexer written in ruby
-    Copyright (C) 2004,2005  Caleb Clausen
+    Copyright (C) 2004,2005,2008  Caleb Clausen
     This library is free software; you can redistribute it and/or
     modify it under the terms of the GNU Lesser General Public
@@ -17,6 +17,8 @@
     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 =end
+#warn "hacking $LOAD_PATH to find latest sequence"
+#$:<<"../sequence/lib"
 require "assert"
@@ -31,6 +33,7 @@ require 'rubygems'
 #require 'sequence'
 require 'sequence/indexed'
 require 'sequence/file'
+require 'sequence/list'
 #-----------------------------------
 assert !defined? ::RubyLexer
 $RuLexer=Class.new{}
@@ -40,6 +43,9 @@ end
 $RuLexer=nil
 #------------------------------------
 class RubyLexer
+  FASTER_STRING_ESCAPES=true
+  warn "FASTER_STRING_ESCAPES is off" unless FASTER_STRING_ESCAPES
+  AUTO_UNESCAPE_STRINGS=false
   class RuLexer
    WHSP=" \t\r\v\f"
    WHSPLF=WHSP+"\n"
@@ -49,20 +55,22 @@ class RubyLexer
    PAIRS={ '{'=>'}', '['=>']', '('=>')', '<'=>'>'}
-   attr_reader :linenum,:last_operative_token
+   attr_reader :linenum,:last_operative_token,:original_file,:filename
+   attr_accessor :file #hack
    #-----------------------------------
-   def initialize(filename, file, line)
+   def initialize(filename, file, line, offset_adjust=0)
       @filename=filename
 #      String===file && file=IOext::FakeFile.new(file)
       file.binmode if File===file
       @original_file=file
       @file=file.to_sequence
+      @file.pos=@original_file.pos if @original_file.respond_to? :pos
       @linenum=line
       @toptable=nil   #descendants must fill this out
+      @min_offset_adjust=@offset_adjust=offset_adjust
       @moretokens=[ RubyLexer::FileAndLineToken.new(@filename, @linenum, input_position) ]
-      @last_operative_token=nil
       @endsets={}
    end
@@ -95,6 +103,14 @@ class RubyLexer
      end until tok.is_a? EoiToken
    end
+   #-----------------------------------
+#   def offset_adjust; 0 end
+   #-----------------------------------
+#   def offset_adjust_set! offset_adjust
+#     @offset_adjust=offset_adjust
+#   end
    include Enumerable
 private
@@ -121,8 +137,8 @@ private
    #-----------------------------------
    def regex(ch=nil)
-      result=RenderExactlyStringToken.new('/').
-         append_token double_quote("/")
+      result=RenderExactlyStringToken.new('/').append_token str=double_quote("/")
+      result.open=result.close="/"
       result.line=@linenum
       return result
    end
@@ -142,17 +158,20 @@ private
       assert ch=='%'
       oldpos= input_position
       eat_next_if(ch) or raise "fancy_quote, no "+ch
+      strlex=:double_quote
+      open="%"
       ch=getchar
+      open+=ch
       #ch.tr!('qwQWrx','"["{/`')
       type=case ch
-         when 'q' then "'"
+         when 'q' then strlex=:single_quote; "'"
          when 'w' then "[" #word array
          when 'Q' then '"' #regular string
          when 'W' then '{' #dquotish word array
          when 'r' then '/' #regex
          when 'x' then '`' #exec it
-         when 's' then '"' #symbol
+         when 's' then strlex=:single_quote; "'" #symbol
          #other letters, nums are illegal here
          when /^[a-z0-9]$/oi
             error= "unrecognized %string type: "+ch; '"'
@@ -160,33 +179,191 @@ private
             result= lexerror( StringToken.new('', oldpos), "unexpected eof in %string")
             result.line=@linenum
             return result
-         else back1char; '"' #no letter means string too
+         else open.chop!; back1char; '"' #no letter means string too
       end
+if FASTER_STRING_ESCAPES
+      beg= readahead(2)=="\r\n" ? "\r\n" : nextchar.chr
+      assert /[\r\n]/===nextchar.chr if beg=="\r\n"
+else
       beg=nextchar.chr
       if /^[\r\n]$/===beg  then
            beg=INET_NL_REX
       end
-      result=double_quote(beg, type, (PAIRS[beg] or beg))
+end
+      result=send(strlex, beg, type, close=(PAIRS[beg] or beg))
       case ch
-      when /^[Wwr]$/;
+      when /^[Wwr]$/:
+        str=result
         result=RenderExactlyStringToken.new(type).append_token(result)
+        result.open=str.open; result.close=str.close
         result.line=@linenum
-      when 's'; result=SymbolToken.new(result.to_s)
+      when 's':
+        result.open=open+beg
+        result.close=close
+        result=SymbolToken.new result,nil,"%s"
       end
+      result.open=open+beg
+      result.close=close
       result.offset=oldpos
       return lexerror(result,error)
    end
    #-----------------------------------
-   #this method is now misnamed, since it handles single quotes as well
    def double_quote(nester, type=nester, delimiter=nester)
-      all_quote(nester,type,delimiter)
+      result=all_quote(nester,type,delimiter)
+      result.open=nester
+      result.close=delimiter
+      return result
    end
    #-----------------------------------
+   def single_quote(nester, type=nester, delimiter=nester)
+     result=all_quote nester, type, delimiter
+#     result.elems.first.gsub! /\\\\/, '\\'
+     result.open=result.close="'"
+     return result
+   end
+   #-----------------------------------
+   INTERIOR_REX_CACHE={}
+   EVEN_BS_S=/
+     ($|
+      [^\\c-]|
+      ($|[^\\])(c|[CM]-)|
+      ($|[^CM])-
+     )
+     (\\(?:c|[CM]-)?\\)*
+   /x
+   ILLEGAL_ESCAPED=/#{EVEN_BS_S}(\\([CM][^-]|x[^a-fA-F0-9]))/o #whaddaya do with this?
+   ILLEGAL_CRUNCH=/#{EVEN_BS_S}(\#@[^a-zA-Z_]|\#$[^a-zA-Z_0-9\-!@&+`'=~\/\\,.;<>*"$?:;])/o #and this?
    def all_quote(nester, type, delimiter, bs_handler=nil)
+if FASTER_STRING_ESCAPES
+      #string must start with nester
+      if nester=="\r\n" #treat dos nl like unix
+        nester=delimiter="\n"
+        readnl
+      else
+        eat_next_if(nester[0])
+      end or return nil
+      special_char= nester.dup
+      special_char<< (delimiter) if nester!=delimiter
+      if "'["[type]
+        single_quotish=true
+        special=/\\./m
+      else
+        crunch=/#(?=[^{$@])/
+        escaped=/\\([^xcCM0-7]|(c|[CM].)([^\\]|(?=\\))|x.[0-9a-fA-F]?|[0-7]{1,3})/m
+        special=
+          case delimiter
+          when '\\': crunch
+          when '#': escaped
+          else /#{escaped}|#{crunch}/o
+          end
+        special_char<< maybe_crunch="#"
+      end
+      normal="[^#{Regexp.quote '\\'+special_char}]"
+      interior=INTERIOR_REX_CACHE[special_char]||=/#{normal}*(#{special}+#{normal}*)*/
+      #backslash is just scanned thru, not interpreted
+      #... that will change token format
+      #, which will make lots of downstream headaches.
+      str=StringToken.new type
+      str.bs_handler ||= case type
+        when '/' then :regex_esc_seq
+        when '{' then :Wquote_esc_seq
+        when '"','`',':' then :dquote_esc_seq
+        when "'"     then :squote_esc_seq
+        when "["     then :wquote_esc_seq
+        else raise "unknown quote type: #{type}"
+      end
+      old_linenum=@linenum
+      nestlevel=1
+      loop{
+         str.append(@file.scan( interior ))
+         #scan could stop at any character if at the end of its buffer.
+         b=getchar
+         case b
+            when delimiter
+               assert nestlevel>0
+               if (nestlevel-=1)==0
+                  case str.elems.last
+                  #if last str data fragment was empty and
+                  #followed an inclusion, delete it
+                  #unless there was an escnl between inclusion and string end
+                  when ''
+                    str.elems.size>1 and
+                    if /\\\r?\n(.|\r?\n)\Z/===@file.readbehind(5)
+                      #do nothing
+                    else
+                      str.elems.pop
+                    end
+                  when /\r\Z/      #if delim is \n, trailing (literal) \r is chopped
+                    str.elems.last.chomp! "\r" if delimiter=="\n"
+                  end
+                  str.modifiers=til_charset(/[^eioumnsx]/) if '/'==type
+                  nlcount=0
+                  str.elems.each{|frag|
+                    next unless String===frag
+                    #dos nls turn into unix nls in string literals
+                    nlcount+=frag.count("\n")
+                    frag.gsub!(/\r\n/, "\n")
+                  }
+                  nlcount+=1 if delimiter=="\n"
+                  str.line=@linenum+=nlcount
+                  if nlcount>0
+                    #emit eol marker later if line has changed
+                    @moretokens << FileAndLineToken.new(
+                      @filename,@linenum,input_position
+                    )
+                    @pending_here_bodies.each{|body|
+                      body.allow_ooo_offset=true
+                    } unless delimiter=="\n"
+                  end
+                  str.open=nester
+                  str.close=delimiter
+                  return str
+               end
+               assert nestlevel>0
+            when nester
+               #this branch ignored if nester==delimiter
+               assert(nester!=delimiter)
+               nestlevel+=1
+            when nil then raise "nil char from each_byte?" #never happens
+            when maybe_crunch
+               nc=nextchar.chr
+               nc[/^[{@$]$/] and b=ruby_code(nc)
+            when "\\"
+               back1char
+               next
+            when ""  #eof
+               lexerror str, "unterminated #{delimiter}-string at eof"
+               break
+         end
+         #shouldn't tolerate ILLEGAL_ESCAPED in str (unless single quotish)....
+         lexerror str, "illegal escape sequence" if !("['"[type]) and ILLEGAL_ESCAPED===b
+         str.append b
+      }
+      assert eof?
+      str.line=@linenum
+      str
+else
       endset="\r\n\\\\"
       #string must start with nester
@@ -199,7 +376,8 @@ private
       end or return nil
       bs_handler ||= case type
-        when '/','{' then :regex_esc_seq
+        when '/' then :regex_esc_seq
+        when '{' then :Wquote_esc_seq
         when '"','`',':' then :dquote_esc_seq
         when "'"     then :squote_esc_seq
         when "["     then :wquote_esc_seq
@@ -212,6 +390,7 @@ private
       endset<<maybe_crunch="#" unless "'["[type]
       endset=
         @endsets[endset] ||= /[#{endset}]/
+      false&& last_escnl_elem_idx=nil
       loop{
          str.append(til_charset( endset ))
          b=getchar
@@ -221,14 +400,34 @@ private
          end
          case b
             when delimiter
+               assert nestlevel>0
                if (nestlevel-=1)==0
+                  #if last str data fragment was empty and
+                  #followed an inclusion, delete it
+                  #unless there was an escnl between inclusion and string end
+                  if str.elems.last=='' and str.elems.size>1
+                    if /\\\r?\n(.|\r?\n)\Z/===@file.readbehind(5)
+                      #do nothing
+                    else
+                      str.elems.pop
+                    end
+                  end
                   str.modifiers=til_charset(/[^eioumnsx]/) if '/'==type
-                  #emit eol marker later if line has changed
                   str.line=@linenum
-                  @linenum != old_linenum and @moretokens <<
-                     FileAndLineToken.new(@filename,@linenum, input_position)
+                  if @linenum != old_linenum
+                    #emit eol marker later if line has changed
+                    @moretokens << FileAndLineToken.new(
+                      @filename,@linenum,input_position
+                    )
+                    @pending_here_bodies.each{|body|
+                      body.allow_ooo_offset=true
+                    } unless nester==INET_NL_REX
+                  end
                   return str
                end
+               assert nestlevel>0
             when nester
                #this branch ignored if nester==delimiter
                assert(nester!=delimiter)
@@ -248,11 +447,13 @@ private
                break
          end
          str.append b
       }
       assert eof?
       str.line=@linenum
       str
+end
    end
    #-----------------------------------
@@ -268,26 +469,17 @@ private
          when '#' then '#'
          when /^[#{ESCAPECHRS}]$/o
             k.tr(ESCAPECHRS,ESCAPESEQS)
-=begin not needed anymore
-         when "a"  then "\a"
-         when "b"  then "\b"
-         when "e"  then "\e"
-         when "f"  then "\f"
-         when "n"  then "\n"
-         when "r"  then "\r"
-         when "s"  then "\ "
-         when "t"  then "\t"
-         when "v"  then "\v"
-=end
          when "M"
             eat_next_if(?-) or raise 'bad \\M sequence'
             (getchar_maybe_escape | 0x80).chr
          when "C"
             eat_next_if(?-) or raise 'bad \\C sequence'
+            nextchar==?? and getchar and return "\177" #wtf?
             (getchar_maybe_escape & 0x9F).chr
          when "c"
+            nextchar==?? and getchar and return "\177" #wtf?
             (getchar_maybe_escape & 0x9F).chr
          when /^[0-7]$/
@@ -306,31 +498,33 @@ private
             str.hex.chr
          else
-            '\\'+k
+            k
       end
    end
    #-----------------------------------
    def regex_esc_seq(ch,nester,delimiter)
       assert ch == '\\'
-      c=getchar
-      return case c
-         when "\n"
-            @linenum+=1
-            ''#ch+c
-         when nester,delimiter  ,"/"
-            c
-         #when "c"
-         #   ch + c + getchar
-         #when "M","C"
-         #   eat_next_if(?-) or
-         #         lexerror "illegal \\#{c}- esc sequence"
-         #   ch + c + '-' + (eat_next_if(/[^\\]/)or'')
-         #   #if this \M- or \C- sequence is continued by
-         #   #another backslash, we'll just leave the
-         #   #backslash on the input, to be read by the next pass
-         else
-            ch+c
+      ch=getchar
+      if ch=="\n"
+        @linenum+=1
+        return ''
+      end
+      '\\'+ch
+   end
+   #-----------------------------------
+   def Wquote_esc_seq(ch,nester,delimiter)
+      assert ch == '\\'
+      case ch=getchar
+      when "\n": @linenum+=1; ch
+      when nester,delimiter: ch
+      when /[\s\v\\]/: ch
+      else
+        back1char
+        result=dquote_esc_seq('\\',nester,delimiter)
+        #/\s|\v/===result and result="\\"+result
+        result
       end
    end
@@ -340,16 +534,16 @@ private
       #get the escaped character
       escchar=getchar
-      return (case escchar
-         #all \ sequences but \delimiter, \nester
-         #are passed thru unchanged; actual
+      case escchar
+         #all \ sequences
+         #are unescaped; actual
          #newlines are counted but not changed
-         when delimiter,nester
-              ''
-         when "\n"
-              @linenum+=1; "\\"
-         else '\\'
-      end+escchar)
+         when delimiter,nester,'\\': escchar
+#         when delimiter,nester: escchar
+         when "\n": @linenum+=1; escchar
+         when /[\s\v]/: escchar
+         else       "\\"+escchar
+      end
    end
    #-----------------------------------
@@ -358,52 +552,173 @@ private
       #get the escaped character
       escchar=getchar
-      return (case escchar
-         #all \ sequences but \delimiter, \nester and \\
-         #are passed thru unchanged; actual
+      case escchar
+         #all \ sequences
+         #are unescaped; actual
+         #newlines are counted but not changed
+         when delimiter,nester,'\\': escchar
+#         when delimiter,nester: escchar
+         when "\n": @linenum+=1; "\\"+escchar
+         else       "\\"+escchar
+      end
+   end
+   #-----------------------------------
+   def squote_heredoc_esc_seq(ch,nester,delimiter)
+      assert(ch=='\\')
+      #get the escaped character
+      escchar=getchar
+      case escchar
+         #all \ sequences
+         #are unescaped; actual
          #newlines are counted but not changed
-         when delimiter,nester,'\\'
-              ''
-         when "\n"
-              @linenum+=1; "\\"
-         else '\\'
-      end+escchar)
+         when delimiter,nester: escchar
+#         when delimiter,nester: escchar
+         when "\n": @linenum+=1; "\\"+escchar
+         else       "\\"+escchar
+      end
+   end
+=begin
+   #-----------------------------------
+   def squote_esc_seq(ch,nester,delimiter)
+      assert(ch=='\\')
+      #get the escaped character
+      escchar=getchar
+      escchar=="\n" and @linenum+=1
+      escchar="\\"+escchar unless escchar[/['\\]/]
+      return escchar
    end
+=end
+#   alias squote_esc_seq	wquote_esc_seq
+  module RecursiveRubyLexer
+    def initial_nonblock_levels
+      @localvars_stack.size==1 ? 2 : 1
+    end
+  end
+  def initial_nonblock_levels; 1 end
+  def first_current_level
+    result=@localvars_stack.last.__locals_lists.size-initial_nonblock_levels
+    result=[initial_nonblock_levels,result].max
+    result
+  end
+  def merge_levels levels, nil_empty_class
+    case (levels.size rescue 0)
+    when 0: {} unless nil_empty_class
+    when 1: levels.first.dup
+    else levels.inject{|a,b| a.merge b}
+    end
+  end
+  def decompose_lvars(nil_empty_class=false)
+    levels=
+      @localvars_stack.last.__locals_lists
+    nonblocky=merge_levels levels[0...initial_nonblock_levels], nil_empty_class
+    blocky=merge_levels levels[initial_nonblock_levels...first_current_level], nil_empty_class
+    current=merge_levels levels[first_current_level..-1], nil_empty_class
+    return nonblocky,blocky,current
+  end
+  def new_lvar_type
+    size=@localvars_stack.last.__locals_lists.size
+    return :local if size<=initial_nonblock_levels
+    return :block if size<first_current_level
+    return :current
+  end
+  def lvar_type(name)
+    nonblocky,blocky,current=decompose_lvars
+    nonblocky[name] and return :local
+    blocky[name] and return :block
+    current[name] and return :current
+    return new_lvar_type
+  end
+  def assign_lvar_type!(vartok)
+    vartok.respond_to? :lvar_type= and
+      vartok.lvar_type=lvar_type(vartok.ident)
+    return vartok
+  end
    #-----------------------------------
    def ruby_code(ch='{')
       assert ch[/^[{(@$]$/]
       klass= RubyLexer===self ? self.class : RubyLexer
-      rl=klass.new(@filename,@file,@linenum)
+      rl=klass.new(@filename,@file,@linenum,offset_adjust())
+      rl.extend RecursiveRubyLexer
+#      rl.offset_adjust_set! offset_adjust()
+      assert offset_adjust()==rl.offset_adjust()
       #pass current local vars into new parser
-      localvars.names.each{|varname|
+      #must pass the lists of nonblock, parentblock and currentblock vars separately
+      #then a table increment after each
+      nonblocky,blocky,current=decompose_lvars(true)
+      nonblocky.keys.each{|varname|
         rl.localvars[varname]=true
       }
-      rl.localvars.start_block
+      rl.localvars.start_block
+      #incremental table, tells us what :local vars are defined in the str inclusion
+      if blocky
+        rl.localvars.start_block
+        blocky.keys.each{|varname|
+          rl.localvars[varname]=true
+        }
+        rl.localvars.start_block
+        #incremental table, tells us what :block vars are defined in the str inclusion
+      end
+      if current
+        rl.localvars.start_block
+        current.keys.each{|varname|
+          rl.localvars[varname]=true
+        }
+        rl.localvars.start_block
+        #incremental table, tells us what :current vars are defined in the str inclusion
+      end
+      rl.pending_here_bodies=@pending_here_bodies
       case ch
       when '@'
          tokens=[rl.at_identifier]
       when '$'
          tokens=[rl.dollar_identifier]
-      when '{','('
+      when '{'#,'('
          tokens=[]
          loop {
             tok=rl.get1token
-            EoiToken===tok and lexerror tok,"unterminated string inclusion"
             tokens << tok
-            break if tok===PAIRS[ch] and rl.no_more? and rl.balanced_braces?
+            if EoiToken===tok
+              lexerror tok,"unterminated string inclusion"
+              break
+            end
+            if tok==='}'
+              if ErrorToken===tok #mismatched?
+                parsestack[1..-1].reverse_each{|ctx|
+                  tok.error<< "\nno end found for #{ctx.class}"
+                }
+                break
+              end
+              break if rl.no_more? and rl.balanced_braces?
+            end
          }
       else
          raise 'hell'
       end
+=begin
       if @linenum != rl.linenum
         last=tokens.pop
         fal=FileAndLineToken.new(@filename,@linenum, last.offset)
         tokens.push fal,last
       end
+=end
       #need to verify that rl's @moretokens, @incomplete_here_tokens are empty
       rl.incomplete_here_tokens.empty? or
@@ -411,6 +726,13 @@ private
       rl.no_more? or
         raise 'uh-oh, ruby tokens were lexed past end of ruby code'
+      #assert offset_adjust()==rl.offset_adjust() #|| rl.offset_adjust().zero?
+      @offset_adjust=rl.offset_adjust
+      #input_position_set rl.input_position_raw
+      @file=rl.file
+#      @pending_here_bodies=rl.pending_here_bodies
       #local vars defined in inclusion get propagated to outer parser
       newvars=rl.localvars.__locals_lists[1..-1].map{|bag| bag.keys }.flatten
       newvars.each{|newvar| localvars[newvar]=true }
@@ -431,36 +753,53 @@ private
 #   OCTCHARS=?0..?7
 #   DECCHARS=?0..?9
 #   HEXCHARS=CharSet[?0..?9, ?A..?F, ?a..?f]
-   BINCHARS=/[^01_]/
-   OCTCHARS=/[^0-7_]/
-   DECCHARS=/[^0-9_]/
-   HEXCHARS=/[^0-9a-f_]/i
+   BINCHARS=/[01_]+/
+   OCTCHARS=/[0-7_]+/
+   allowed=/[0-9_]/
+   DECCHARS=/^#{allowed}*(\.(?!_)#{allowed}+)?([eE](?!_)(?:[+-])?#{allowed}+)?/
+   HEXCHARS=/[0-9a-f_]+/i
+   DECIMAL_INT_INTERP=:to_s
+   ARBITRARY_INT_INTERP=:to_s
+   NUMREXCACHE={}
    #0-9
    #-----------------------------------
    def number(str)
       return nil unless /^[0-9+\-]$/===str
-      interp=:to_i
+      interp=DECIMAL_INT_INTERP
       str=  (eat_next_if(/[+\-]/)or'')
       str<< (eat_next_if(?0)or'')
-      if str[-1] == ?0 and !eof? and !nextchar.chr[/[.eE]/]
-         typechar=eat_next_if(/[BOXD]/i)||'o'
-         str << typechar
-         interp=:oct
-         unallowed=case typechar
-           when 'b','B'; BINCHARS
-           when 'x','X'; HEXCHARS
-           when 'o','O'; OCTCHARS
-           when 'd','D'; interp=:to_i; DECCHARS
-           else raise  :impossible
-         end
+      if str[-1] == ?0 and !eof?
+        if nextchar.chr[/[bodx]/i]
+          typechar=eat_next_if(/[bodx]/i)
+          str << typechar
+          interp=ARBITRARY_INT_INTERP
+          allowed=case typechar
+            when 'b','B'; BINCHARS
+            when 'x','X'; HEXCHARS
+            when 'o','O'; OCTCHARS
+            when 'd','D'; DECCHARS
+            else raise  :impossible
+          end
+        elsif /[.e]/i===nextchar.chr
+          interp=ARBITRARY_INT_INTERP
+          allowed=DECCHARS
+        else
+          interp=ARBITRARY_INT_INTERP
+          allowed=OCTCHARS
+        end
       else
-         interp=:to_i
-         unallowed =DECCHARS
+         interp=DECIMAL_INT_INTERP
+         allowed =DECCHARS
       end
+      #allowed = NUMREXCACHE[allowed] ||= /^#{allowed}*(\.(?!_)#{allowed}+)?([eE](?!_)(?:[+-])?#{allowed}+)?/
+      str<<(@file.scan(allowed)||'')
+      interp=:to_s if $1 or $2
+      return NumberToken.new(str.send(interp))
       addl_dig_seqs= (typechar)? 0 : 2      #den 210
       error=nil
@@ -528,11 +867,11 @@ end
    #-----------------------------------
    INET_NL_REX=/^(\r\n?|\n\r?)/
    def readnl
-      #compatible with dos/mac style newlines...
+      #compatible with dos style newlines...
       eof? and return ''
-      nl=readahead(2)[INET_NL_REX]
+      nl=readahead(2)[/\A\r?\n/]
       nl or return nil
       assert((1..2)===nl.length)
       @linenum+=1
@@ -542,7 +881,8 @@ end
    #-----------------------------------
    def newline(ch)
       offset= input_position
-      nl=readnl
+      nl=read 1
+      @linenum+=1
       @moretokens << FileAndLineToken.new( @filename, @linenum, input_position )
       return NewlineToken.new( nl,offset)
    end
@@ -563,7 +903,7 @@ protected
 #  delegate_to :@file, :eat_next_if,:prevchar,:nextchar,:getchar,:getc,:back1char
   require 'forwardable'
   extend Forwardable
-  def_delegators :@file, :readahead,:readback, :read, :eof?
+  def_delegators :@file, :readahead, :readback, :read, :eof?
   def til_charset cs,len=16; @file.read_til_charset cs,len end
   def getc; @file.read1 end
@@ -571,14 +911,28 @@ protected
   def back1char; @file.move( -1 )end
   def prevchar; @file.readbehind 1 end
   def nextchar; @file.readahead1 end
-  def eat_next_if(ch);
-    saw=getchar
+  #-----------------------------------
+  def eat_next_if(ch)
+    saw=getc or return
     if Integer===ch
-      ch==saw[0]
+      ch==saw
     else
-      ch===saw
+      ch===saw.chr
     end or (back1char; return)
-    return saw
+    return saw.chr
+  end
+  #-----------------------------------
+  def eat_if(pat,count)
+    oldpos=@file.pos
+    saw=read count
+    if pat===saw
+      return saw
+    else
+      @file.pos=oldpos
+      return nil
+    end
   end
   #-----------------------------------