rubylexer 0.7.6 → 0.7.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +4 -0
- data/History.txt +54 -7
- data/Makefile +43 -0
- data/lib/.document +3 -0
- data/lib/rubylexer.rb +286 -154
- data/lib/rubylexer/.document +9 -0
- data/lib/rubylexer/charhandler.rb +25 -19
- data/lib/rubylexer/context.rb +17 -4
- data/lib/rubylexer/rubycode.rb +1 -1
- data/lib/rubylexer/rulexer.rb +120 -95
- data/lib/rubylexer/symboltable.rb +22 -1
- data/lib/rubylexer/test/oneliners.rb +20 -0
- data/lib/rubylexer/test/oneliners_1.9.rb +146 -0
- data/lib/rubylexer/test/testcases.rb +6 -2
- data/lib/rubylexer/token.rb +22 -6
- data/lib/rubylexer/tokenprinter.rb +6 -6
- data/lib/rubylexer/version.rb +1 -1
- data/rubylexer.gemspec +40 -0
- data/test/code/coloruby.rb +154 -0
- data/test/code/dumptokens.rb +10 -5
- data/test/code/regression.rb +31 -17
- data/test/code/rubylexervsruby.rb +1 -1
- data/test/code/test_1.9.rb +31 -0
- data/test/code/tokentest.rb +6 -6
- data/test/data/{hdr_dos2.rb → hdr_dos2.rb.broken} +0 -0
- data/test/data/{heremonsters.rb.broken → heremonsters_broken.rb} +0 -0
- data/test/data/{heremonsters_dos.rb.broken → heremonsters_dos_broken.rb} +0 -0
- data/test/test_all.rb +2 -0
- metadata +94 -98
- data/Rakefile +0 -37
| @@ -1,4 +1,4 @@ | |
| 1 | 
            -
            =begin | 
| 1 | 
            +
            =begin
         | 
| 2 2 | 
             
                rubylexer - a ruby lexer written in ruby
         | 
| 3 3 | 
             
                Copyright (C) 2004,2005,2008  Caleb Clausen
         | 
| 4 4 |  | 
| @@ -22,23 +22,29 @@ class RubyLexer | |
| 22 22 | 
             
            #------------------------------------
         | 
| 23 23 | 
             
            class CharHandler
         | 
| 24 24 | 
             
              #-----------------------------------
         | 
| 25 | 
            -
               | 
| 25 | 
            +
              if ?A.is_a? String #ruby >= 1.9
         | 
| 26 | 
            +
                CHARSETSPECIALS=/[\[\]\\\-]/
         | 
| 27 | 
            +
              else
         | 
| 28 | 
            +
                CHARSETSPECIALS=CharSet[?[ ,?] ,?\\ ,?-]
         | 
| 29 | 
            +
              end
         | 
| 26 30 | 
             
              def initialize(receiver,default,hash) 
         | 
| 27 31 | 
             
                @default=default 
         | 
| 28 32 | 
             
                @receiver=receiver
         | 
| 29 | 
            -
             # | 
| 30 | 
            -
             | 
| 33 | 
            +
                if ?A.is_a? String #ruby >= 1.9
         | 
| 34 | 
            +
                  @table={}
         | 
| 35 | 
            +
                else
         | 
| 36 | 
            +
                  @table=Array.new(0)
         | 
| 37 | 
            +
                end
         | 
| 31 38 | 
             
                @matcher='^[^'
         | 
| 32 39 |  | 
| 33 40 | 
             
                hash.each_pair {|pattern,action|
         | 
| 34 41 | 
             
                  case pattern
         | 
| 35 42 | 
             
                  when Range
         | 
| 36 43 | 
             
                    pattern.each { |c|
         | 
| 37 | 
            -
                      c.kind_of? String and c=c[0] #cvt to integer  #still needed?
         | 
| 38 44 | 
             
                      self[c]=action
         | 
| 39 45 | 
             
                    }
         | 
| 40 46 | 
             
                  when String
         | 
| 41 | 
            -
                    pattern | 
| 47 | 
            +
                    CharHandler.each_char(pattern) {|b| self[b]=action }
         | 
| 42 48 | 
             
                  when Fixnum
         | 
| 43 49 | 
             
                    self[pattern]=action
         | 
| 44 50 | 
             
                  else
         | 
| @@ -47,15 +53,26 @@ class CharHandler | |
| 47 53 | 
             
                }
         | 
| 48 54 |  | 
| 49 55 | 
             
                @matcher += ']$'
         | 
| 50 | 
            -
                @matcher=Regexp.new(@matcher)
         | 
| 56 | 
            +
                @matcher=Regexp.new(@matcher,0,'n')
         | 
| 51 57 |  | 
| 52 58 | 
             
                freeze
         | 
| 53 59 | 
             
              end
         | 
| 54 60 |  | 
| 61 | 
            +
              #-----------------------------------
         | 
| 62 | 
            +
              if String===?a 
         | 
| 63 | 
            +
                def self.each_char(str,&block)
         | 
| 64 | 
            +
                  str.each_char(&block)
         | 
| 65 | 
            +
                end
         | 
| 66 | 
            +
              else
         | 
| 67 | 
            +
                def self.each_char(str,&block)
         | 
| 68 | 
            +
                  str.each_byte(&block)
         | 
| 69 | 
            +
                end
         | 
| 70 | 
            +
              end
         | 
| 71 | 
            +
             | 
| 55 72 | 
             
              #-----------------------------------
         | 
| 56 73 | 
             
              def []=(b,action)  #for use in initialize only
         | 
| 57 74 | 
             
                assert b >= ?\x00
         | 
| 58 | 
            -
                assert b <= ?\ | 
| 75 | 
            +
                assert b <= ?\x7F
         | 
| 59 76 | 
             
                assert !frozen?
         | 
| 60 77 |  | 
| 61 78 | 
             
                @table[b]=action
         | 
| @@ -69,17 +86,6 @@ class CharHandler | |
| 69 86 | 
             
                @receiver.send((@table[b] or @default), b.chr, *args)
         | 
| 70 87 | 
             
              end
         | 
| 71 88 |  | 
| 72 | 
            -
              #-----------------------------------
         | 
| 73 | 
            -
              def eat_file(file,blocksize,*args)
         | 
| 74 | 
            -
                begin
         | 
| 75 | 
            -
                  chars=file.read(blocksize)
         | 
| 76 | 
            -
                  md=@matcher.match(chars)
         | 
| 77 | 
            -
                  mychar=md[0][0]
         | 
| 78 | 
            -
                  #get file back in the right pos
         | 
| 79 | 
            -
                  file.pos+=md.offset(0)[0] - chars.length
         | 
| 80 | 
            -
                  @receiver.send(@default,md[0])
         | 
| 81 | 
            -
                end until go(mychar,*args)
         | 
| 82 | 
            -
              end
         | 
| 83 89 | 
             
            end
         | 
| 84 90 | 
             
            end
         | 
| 85 91 |  | 
    
        data/lib/rubylexer/context.rb
    CHANGED
    
    | @@ -1,4 +1,4 @@ | |
| 1 | 
            -
            =begin  | 
| 1 | 
            +
            =begin 
         | 
| 2 2 | 
             
                rubylexer - a ruby lexer written in ruby
         | 
| 3 3 | 
             
                Copyright (C) 2008  Caleb Clausen
         | 
| 4 4 |  | 
| @@ -74,6 +74,7 @@ module NestedContexts | |
| 74 74 | 
             
                  super('{','}',linenum)
         | 
| 75 75 | 
             
                end
         | 
| 76 76 | 
             
                attr_accessor :wanting_stabby_block_body
         | 
| 77 | 
            +
                #attr_writer :starter,:ender
         | 
| 77 78 | 
             
              end
         | 
| 78 79 |  | 
| 79 80 | 
             
              class BeginEndContext  < NestedContext
         | 
| @@ -118,6 +119,9 @@ module NestedContexts | |
| 118 119 | 
             
              class UnparenedParamListLhsContext < ImplicitLhsContext
         | 
| 119 120 | 
             
                def starter; huh end  #" " ???
         | 
| 120 121 | 
             
                def ender; huh end    #; or \n when from method def, { or do when from stabby block
         | 
| 122 | 
            +
                def endtoken offset
         | 
| 123 | 
            +
                  KwParamListEndToken.new offset
         | 
| 124 | 
            +
                end
         | 
| 121 125 | 
             
              end
         | 
| 122 126 |  | 
| 123 127 | 
             
              class ImplicitContext < ListContext
         | 
| @@ -129,6 +133,9 @@ module NestedContexts | |
| 129 133 | 
             
                end
         | 
| 130 134 | 
             
                def lhs; false end
         | 
| 131 135 | 
             
                def wantarrow; true end
         | 
| 136 | 
            +
                def endtoken offset
         | 
| 137 | 
            +
                  ImplicitParamListEndToken.new offset
         | 
| 138 | 
            +
                end
         | 
| 132 139 | 
             
              end
         | 
| 133 140 |  | 
| 134 141 | 
             
              class KWParamListContextNoParen < ParamListContextNoParen
         | 
| @@ -159,6 +166,9 @@ module NestedContexts | |
| 159 166 | 
             
                def multi_assign?
         | 
| 160 167 | 
             
                  @multi if defined? @multi 
         | 
| 161 168 | 
             
                end
         | 
| 169 | 
            +
                def endtoken offset
         | 
| 170 | 
            +
                  AssignmentRhsEndToken.new offset
         | 
| 171 | 
            +
                end
         | 
| 162 172 | 
             
              end
         | 
| 163 173 |  | 
| 164 174 | 
             
              class WantsEndContext < NestedContext
         | 
| @@ -201,6 +211,9 @@ module NestedContexts | |
| 201 211 | 
             
                end
         | 
| 202 212 |  | 
| 203 213 | 
             
                attr :in_body
         | 
| 214 | 
            +
             | 
| 215 | 
            +
                attr_writer :has_parens
         | 
| 216 | 
            +
                def has_parens?; @has_parens end
         | 
| 204 217 | 
             
              end
         | 
| 205 218 |  | 
| 206 219 | 
             
              class StringContext < NestedContext #not used yet
         | 
| @@ -283,13 +296,13 @@ module NestedContexts | |
| 283 296 | 
             
                  stack=lxr.parsestack
         | 
| 284 297 | 
             
                  assert msg!=:for
         | 
| 285 298 | 
             
                  case msg
         | 
| 286 | 
            -
                  when :for | 
| 299 | 
            +
                  when :for; WantsEndContext===stack.last or raise 'syntax error: for not expected at this time'
         | 
| 287 300 | 
             
                             #local var defined in this state
         | 
| 288 301 | 
             
                             #never actually used?
         | 
| 289 | 
            -
                  when :in | 
| 302 | 
            +
                  when :in;  self.equal? stack.pop or raise 'syntax error: in not expected at this time'
         | 
| 290 303 | 
             
                             stack.push ExpectDoOrNlContext.new("for",/(do|;|:|\n)/,@linenum) 
         | 
| 291 304 | 
             
                             #pop self off owning context stack and push ExpectDoOrNlContext
         | 
| 292 | 
            -
                  when :comma, :splat | 
| 305 | 
            +
                  when :comma, :splat; return
         | 
| 293 306 | 
             
                  else super
         | 
| 294 307 | 
             
                  end
         | 
| 295 308 | 
             
                  LEGAL_SUCCESSORS[@state].include? msg or raise "for syntax error: #{msg} unexpected in #@state"
         | 
    
        data/lib/rubylexer/rubycode.rb
    CHANGED
    
    
    
        data/lib/rubylexer/rulexer.rb
    CHANGED
    
    | @@ -38,17 +38,10 @@ require 'sequence/file' | |
| 38 38 | 
             
            require 'sequence/list'
         | 
| 39 39 | 
             
            #-----------------------------------
         | 
| 40 40 | 
             
            assert !defined? ::RubyLexer
         | 
| 41 | 
            -
            $RuLexer=Class.new{}
         | 
| 42 | 
            -
            class RubyLexer < $RuLexer
         | 
| 43 | 
            -
              RuLexer=$RuLexer
         | 
| 44 | 
            -
            end
         | 
| 45 | 
            -
            $RuLexer=nil
         | 
| 46 | 
            -
            #------------------------------------
         | 
| 47 41 | 
             
            class RubyLexer
         | 
| 48 42 | 
             
              FASTER_STRING_ESCAPES=true
         | 
| 49 43 | 
             
              warn "FASTER_STRING_ESCAPES is off" unless FASTER_STRING_ESCAPES
         | 
| 50 44 | 
             
              AUTO_UNESCAPE_STRINGS=false
         | 
| 51 | 
            -
              class RuLexer
         | 
| 52 45 | 
             
               WHSP=" \t\r\v\f"
         | 
| 53 46 | 
             
               WHSPLF=WHSP+"\n"
         | 
| 54 47 | 
             
               #maybe \r should be in WHSPLF instead
         | 
| @@ -75,11 +68,13 @@ class RubyLexer | |
| 75 68 | 
             
                  @moretokens=[ RubyLexer::FileAndLineToken.new(@filename, @linenum, input_position) ]
         | 
| 76 69 | 
             
                  @endsets={}
         | 
| 77 70 | 
             
               end
         | 
| 71 | 
            +
               alias rulexer_initialize initialize
         | 
| 78 72 |  | 
| 79 73 | 
             
               #-----------------------------------
         | 
| 80 74 | 
             
               def endoffile_detected s=''
         | 
| 81 75 | 
             
                 EoiToken.new(s,@original_file, input_position-s.size)
         | 
| 82 76 | 
             
               end
         | 
| 77 | 
            +
               alias rulexer_endoffile_detected endoffile_detected
         | 
| 83 78 |  | 
| 84 79 | 
             
               #-----------------------------------
         | 
| 85 80 | 
             
               def get1token
         | 
| @@ -92,6 +87,7 @@ class RubyLexer | |
| 92 87 |  | 
| 93 88 | 
             
                  @toptable.go( nextchar )
         | 
| 94 89 | 
             
               end
         | 
| 90 | 
            +
               alias rulexer_get1token get1token
         | 
| 95 91 |  | 
| 96 92 | 
             
               #-----------------------------------
         | 
| 97 93 | 
             
               def no_more?
         | 
| @@ -143,40 +139,41 @@ private | |
| 143 139 | 
             
                  if @rubyversion>=1.9
         | 
| 144 140 | 
             
                    named_brs=[]
         | 
| 145 141 | 
             
                    if result.elems.size==1 and String===result.elems.first
         | 
| 142 | 
            +
                        elem=result.elems.first
         | 
| 146 143 | 
             
                        index=0
         | 
| 147 | 
            -
                         | 
| 148 | 
            -
             | 
| 149 | 
            -
                           | 
| 150 | 
            -
                           | 
| 151 | 
            -
                          when "(?<"; huh
         | 
| 144 | 
            +
                        while index=elem.index(/(#{EVEN_BS_S})( \(\?[<'] | \(\?\# | \[ )/xo,index)
         | 
| 145 | 
            +
                          index+=$1.size
         | 
| 146 | 
            +
                          case $2
         | 
| 147 | 
            +
                          when "(?<"
         | 
| 152 148 | 
             
                            index=elem.index(/\G...(#{LCLETTER}#{LETTER_DIGIT}+)>/o,index)
         | 
| 153 | 
            -
                             | 
| 154 | 
            -
                            index | 
| 149 | 
            +
                            break lexerror(result, "malformed named backreference") unless index
         | 
| 150 | 
            +
                            index+=$&.size
         | 
| 155 151 | 
             
                            named_brs<<$1
         | 
| 156 | 
            -
                          when "(?'" | 
| 152 | 
            +
                          when "(?'"
         | 
| 157 153 | 
             
                            index=elem.index(/\G...(#{LCLETTER}#{LETTER_DIGIT}+)'/o,index)
         | 
| 158 | 
            -
                             | 
| 159 | 
            -
                            index | 
| 154 | 
            +
                            break lexerror(result, "malformed named backreference") unless index
         | 
| 155 | 
            +
                            index+=$&.size
         | 
| 160 156 | 
             
                            named_brs<<$1
         | 
| 161 | 
            -
                          when "(?#" | 
| 157 | 
            +
                          when "(?#"
         | 
| 162 158 | 
             
                            index+=3
         | 
| 163 | 
            -
                            index=elem.index(/#{EVEN_BS_S}\) | 
| 164 | 
            -
                             | 
| 165 | 
            -
                            index | 
| 166 | 
            -
                          when "[" | 
| 159 | 
            +
                            index=elem.index(/#{EVEN_BS_S}\)/o,index)
         | 
| 160 | 
            +
                            break lexerror(result, "unterminated regexp comment") unless index
         | 
| 161 | 
            +
                            index+=$&.size
         | 
| 162 | 
            +
                          when "["
         | 
| 167 163 | 
             
                            index+=1
         | 
| 168 164 | 
             
                            paren_ctr=1
         | 
| 169 165 | 
             
                            loop do
         | 
| 170 166 | 
             
                              index=elem.index(/#{EVEN_BS_S}(&&\[\^|\])/o,index)
         | 
| 171 | 
            -
                               | 
| 167 | 
            +
                              break lexerror(result, "unterminated character class") unless index
         | 
| 172 168 | 
             
                              index+=$&.size
         | 
| 173 | 
            -
                               | 
| 174 | 
            -
                                paren_ctr+=1
         | 
| 175 | 
            -
                              else 
         | 
| 169 | 
            +
                              if $1==']'
         | 
| 176 170 | 
             
                                paren_ctr-=1
         | 
| 177 171 | 
             
                                break if paren_ctr==0 
         | 
| 172 | 
            +
                              else 
         | 
| 173 | 
            +
                                paren_ctr+=1
         | 
| 178 174 | 
             
                              end
         | 
| 179 175 | 
             
                            end
         | 
| 176 | 
            +
                            break unless index
         | 
| 180 177 |  | 
| 181 178 | 
             
                          end
         | 
| 182 179 | 
             
                        end
         | 
| @@ -190,6 +187,7 @@ private | |
| 190 187 |  | 
| 191 188 | 
             
               #-----------------------------------
         | 
| 192 189 | 
             
               def single_char_token(str)  getchar   end
         | 
| 190 | 
            +
               alias rulexer_single_char_token single_char_token
         | 
| 193 191 |  | 
| 194 192 | 
             
               #-----------------------------------
         | 
| 195 193 | 
             
               def illegal_char(ch)
         | 
| @@ -239,12 +237,12 @@ else | |
| 239 237 | 
             
            end
         | 
| 240 238 | 
             
                  result=send(strlex, beg, type, close=(PAIRS[beg] or beg))
         | 
| 241 239 | 
             
                  case ch
         | 
| 242 | 
            -
                  when /^[Wwr] | 
| 240 | 
            +
                  when /^[Wwr]$/
         | 
| 243 241 | 
             
                    str=result
         | 
| 244 242 | 
             
                    result=RenderExactlyStringToken.new(type).append_token(result)
         | 
| 245 243 | 
             
                    result.open=str.open; result.close=str.close
         | 
| 246 244 | 
             
                    result.line=@linenum
         | 
| 247 | 
            -
                  when 's' | 
| 245 | 
            +
                  when 's'
         | 
| 248 246 | 
             
                    result.open=open+beg
         | 
| 249 247 | 
             
                    result.close=close
         | 
| 250 248 | 
             
                    result=SymbolToken.new result,nil,"%s"
         | 
| @@ -274,12 +272,12 @@ end | |
| 274 272 | 
             
               #-----------------------------------
         | 
| 275 273 | 
             
               INTERIOR_REX_CACHE={}
         | 
| 276 274 | 
             
               EVEN_BS_S=/
         | 
| 277 | 
            -
                 ( | 
| 275 | 
            +
                 (?:\G|
         | 
| 278 276 | 
             
                  [^\\c-]|
         | 
| 279 | 
            -
                  ( | 
| 280 | 
            -
                  ( | 
| 277 | 
            +
                  (?:\G|[^\\])(?:c|[CM]-)|
         | 
| 278 | 
            +
                  (?:\G|[^CM])-
         | 
| 281 279 | 
             
                 )
         | 
| 282 | 
            -
                 ( | 
| 280 | 
            +
                 (?:\\(?:c|[CM]-)?){2}*
         | 
| 283 281 | 
             
               /x
         | 
| 284 282 | 
             
               ILLEGAL_ESCAPED=/#{EVEN_BS_S}(\\([CM][^-]|x[^a-fA-F0-9]))/o #whaddaya do with this?
         | 
| 285 283 | 
             
               def all_quote(nester, type, delimiter, bs_handler=nil)
         | 
| @@ -298,12 +296,12 @@ if FASTER_STRING_ESCAPES | |
| 298 296 | 
             
                    single_quotish=true
         | 
| 299 297 | 
             
                    special=/\\./m
         | 
| 300 298 | 
             
                  else
         | 
| 301 | 
            -
                    crunch | 
| 299 | 
            +
                    crunch=/\#(?=[^{$@])/
         | 
| 302 300 | 
             
                    escaped=/\\(?>[^xcCM0-7]|(?>c|[CM].)(?>[^\\]|(?=\\))|(?>x.[0-9a-fA-F]?)|(?>[0-7]{1,3}))/m
         | 
| 303 301 | 
             
                    special=
         | 
| 304 302 | 
             
                      case delimiter
         | 
| 305 | 
            -
                      when '\\' | 
| 306 | 
            -
                      when '#' | 
| 303 | 
            +
                      when '\\'; crunch
         | 
| 304 | 
            +
                      when '#'; escaped
         | 
| 307 305 | 
             
                      else /#{escaped}|#{crunch}/o
         | 
| 308 306 | 
             
                      end
         | 
| 309 307 | 
             
                    special_char<< maybe_crunch="#"
         | 
| @@ -318,8 +316,8 @@ if FASTER_STRING_ESCAPES | |
| 318 316 | 
             
                  str=StringToken.new type
         | 
| 319 317 | 
             
                  str.bs_handler ||= case type
         | 
| 320 318 | 
             
                    when '/' then :regex_esc_seq
         | 
| 321 | 
            -
                    when '{' then :Wquote_esc_seq
         | 
| 322 | 
            -
                    when '"','`',':' then :dquote_esc_seq
         | 
| 319 | 
            +
                    when '{' then @rubyversion>=1.9 ? :Wquote19_esc_seq : :Wquote_esc_seq
         | 
| 320 | 
            +
                    when '"','`',':' then @rubyversion>=1.9 ? :dquote19_esc_seq : :dquote_esc_seq
         | 
| 323 321 | 
             
                    when "'"     then :squote_esc_seq
         | 
| 324 322 | 
             
                    when "["     then :wquote_esc_seq
         | 
| 325 323 | 
             
                    else raise "unknown quote type: #{type}"
         | 
| @@ -431,8 +429,8 @@ else | |
| 431 429 |  | 
| 432 430 | 
             
                  bs_handler ||= case type
         | 
| 433 431 | 
             
                    when '/' then :regex_esc_seq
         | 
| 434 | 
            -
                    when '{' then :Wquote_esc_seq
         | 
| 435 | 
            -
                    when '"','`',':' then :dquote_esc_seq
         | 
| 432 | 
            +
                    when '{' then @rubyversion>=1.9 ? :Wquote19_esc_seq : :Wquote_esc_seq
         | 
| 433 | 
            +
                    when '"','`',':' then @rubyversion>=1.9 ? :dquote19_esc_seq : :dquote_esc_seq
         | 
| 436 434 | 
             
                    when "'"     then :squote_esc_seq
         | 
| 437 435 | 
             
                    when "["     then :wquote_esc_seq
         | 
| 438 436 | 
             
                    else raise "unknown quote type: #{type}"
         | 
| @@ -525,16 +523,25 @@ end | |
| 525 523 | 
             
                        k.tr(ESCAPECHRS,ESCAPESEQS)
         | 
| 526 524 | 
             
                     when "M"
         | 
| 527 525 | 
             
                        eat_next_if(?-) or raise 'bad \\M sequence'
         | 
| 528 | 
            -
                         | 
| 526 | 
            +
                        ch=getchar_maybe_escape[0]
         | 
| 527 | 
            +
                        ch=ch.ord if ch.respond_to? :ord
         | 
| 528 | 
            +
                        ch>=0xFF and raise 'bad \\M sequence'
         | 
| 529 | 
            +
                        (ch | 0x80).chr
         | 
| 529 530 |  | 
| 530 531 | 
             
                     when "C"
         | 
| 531 532 | 
             
                        eat_next_if(?-) or raise 'bad \\C sequence'
         | 
| 532 533 | 
             
                        nextchar==?? and getchar and return "\177" #wtf?
         | 
| 533 | 
            -
                         | 
| 534 | 
            +
                        ch=getchar_maybe_escape[0]
         | 
| 535 | 
            +
                        ch=ch.ord if ch.respond_to? :ord
         | 
| 536 | 
            +
                        ch>=0xFF and raise 'bad \\M sequence'
         | 
| 537 | 
            +
                        (ch & 0x9F).chr
         | 
| 534 538 |  | 
| 535 539 | 
             
                     when "c"
         | 
| 536 540 | 
             
                        nextchar==?? and getchar and return "\177" #wtf?
         | 
| 537 | 
            -
                         | 
| 541 | 
            +
                        ch=getchar_maybe_escape[0]
         | 
| 542 | 
            +
                        ch=ch.ord if ch.respond_to? :ord
         | 
| 543 | 
            +
                        ch>=0xFF and raise 'bad \\M sequence'
         | 
| 544 | 
            +
                        (ch & 0x9F).chr
         | 
| 538 545 |  | 
| 539 546 | 
             
                     when /^[0-7]$/
         | 
| 540 547 | 
             
                        str=k
         | 
| @@ -556,6 +563,33 @@ end | |
| 556 563 | 
             
                  end
         | 
| 557 564 | 
             
               end
         | 
| 558 565 |  | 
| 566 | 
            +
               #-----------------------------------
         | 
| 567 | 
            +
               def dquote19_esc_seq(ch,nester,delimiter)
         | 
| 568 | 
            +
                  assert ch == '\\'
         | 
| 569 | 
            +
                  case ch=getchar
         | 
| 570 | 
            +
                  when 'u'
         | 
| 571 | 
            +
                    case ch=getchar
         | 
| 572 | 
            +
                    when /[a-f0-9]/i
         | 
| 573 | 
            +
                      u=read(4)
         | 
| 574 | 
            +
                      raise "bad unicode escape" unless /[0-9a-f]{4}/i===u
         | 
| 575 | 
            +
                      [u.hex].pack "U"
         | 
| 576 | 
            +
                    when '{'
         | 
| 577 | 
            +
                      result=[]
         | 
| 578 | 
            +
                      until eat_next_if '}'
         | 
| 579 | 
            +
                        u=@file.scan(/\A[0-9a-f]{1,6}[ \t]?/i,7)
         | 
| 580 | 
            +
                        result<<u.hex
         | 
| 581 | 
            +
                      end
         | 
| 582 | 
            +
                      result=result.pack "U*"
         | 
| 583 | 
            +
                    else raise "bad unicode escape"
         | 
| 584 | 
            +
                    end 
         | 
| 585 | 
            +
                  else 
         | 
| 586 | 
            +
                    back1char
         | 
| 587 | 
            +
                    result=dquote_esc_seq('\\',nester,delimiter)
         | 
| 588 | 
            +
                    #/\s|\v/===result and result="\\"+result
         | 
| 589 | 
            +
                    result
         | 
| 590 | 
            +
                  end
         | 
| 591 | 
            +
               end
         | 
| 592 | 
            +
             | 
| 559 593 | 
             
               #-----------------------------------
         | 
| 560 594 | 
             
               def regex_esc_seq(ch,nester,delimiter)
         | 
| 561 595 | 
             
                  assert ch == '\\'
         | 
| @@ -571,9 +605,9 @@ end | |
| 571 605 | 
             
               def Wquote_esc_seq(ch,nester,delimiter)
         | 
| 572 606 | 
             
                  assert ch == '\\'
         | 
| 573 607 | 
             
                  case ch=getchar
         | 
| 574 | 
            -
                  when "\n" | 
| 575 | 
            -
                  when nester,delimiter | 
| 576 | 
            -
                  when /[\s\v\\] | 
| 608 | 
            +
                  when "\n"; @linenum+=1; ch
         | 
| 609 | 
            +
                  when nester,delimiter; ch
         | 
| 610 | 
            +
                  when /[\s\v\\]/; ch
         | 
| 577 611 | 
             
                  else 
         | 
| 578 612 | 
             
                    back1char
         | 
| 579 613 | 
             
                    result=dquote_esc_seq('\\',nester,delimiter)
         | 
| @@ -582,6 +616,21 @@ end | |
| 582 616 | 
             
                  end
         | 
| 583 617 | 
             
               end
         | 
| 584 618 |  | 
| 619 | 
            +
               #-----------------------------------
         | 
| 620 | 
            +
               def Wquote19_esc_seq(ch,nester,delimiter)
         | 
| 621 | 
            +
                  assert ch == '\\'
         | 
| 622 | 
            +
                  case ch=getchar
         | 
| 623 | 
            +
                  when "\n"; @linenum+=1; ch
         | 
| 624 | 
            +
                  when nester,delimiter; ch
         | 
| 625 | 
            +
                  when /[\s\v\\]/; ch
         | 
| 626 | 
            +
                  else 
         | 
| 627 | 
            +
                    back1char
         | 
| 628 | 
            +
                    result=dquote19_esc_seq('\\',nester,delimiter)
         | 
| 629 | 
            +
                    #/\s|\v/===result and result="\\"+result
         | 
| 630 | 
            +
                    result
         | 
| 631 | 
            +
                  end
         | 
| 632 | 
            +
               end
         | 
| 633 | 
            +
             | 
| 585 634 | 
             
               #-----------------------------------
         | 
| 586 635 | 
             
               def wquote_esc_seq(ch,nester,delimiter)
         | 
| 587 636 | 
             
                  assert(ch=='\\')
         | 
| @@ -592,10 +641,10 @@ end | |
| 592 641 | 
             
                     #all \ sequences 
         | 
| 593 642 | 
             
                     #are unescaped; actual
         | 
| 594 643 | 
             
                     #newlines are counted but not changed
         | 
| 595 | 
            -
                     when delimiter,nester,'\\' | 
| 596 | 
            -
            #         when delimiter,nester | 
| 597 | 
            -
                     when "\n" | 
| 598 | 
            -
                     when /[\s\v] | 
| 644 | 
            +
                     when delimiter,nester,'\\'; escchar
         | 
| 645 | 
            +
            #         when delimiter,nester; escchar
         | 
| 646 | 
            +
                     when "\n"; @linenum+=1; escchar
         | 
| 647 | 
            +
                     when /[\s\v]/; escchar
         | 
| 599 648 | 
             
                     else       "\\"+escchar
         | 
| 600 649 | 
             
                  end
         | 
| 601 650 | 
             
               end
         | 
| @@ -610,9 +659,9 @@ end | |
| 610 659 | 
             
                     #all \ sequences 
         | 
| 611 660 | 
             
                     #are unescaped; actual
         | 
| 612 661 | 
             
                     #newlines are counted but not changed
         | 
| 613 | 
            -
                     when delimiter,nester,'\\' | 
| 614 | 
            -
            #         when delimiter,nester | 
| 615 | 
            -
                     when "\n" | 
| 662 | 
            +
                     when delimiter,nester,'\\'; escchar
         | 
| 663 | 
            +
            #         when delimiter,nester; escchar
         | 
| 664 | 
            +
                     when "\n"; @linenum+=1; "\\"+escchar
         | 
| 616 665 | 
             
                     else       "\\"+escchar
         | 
| 617 666 | 
             
                  end
         | 
| 618 667 | 
             
               end
         | 
| @@ -627,9 +676,9 @@ end | |
| 627 676 | 
             
                     #all \ sequences 
         | 
| 628 677 | 
             
                     #are unescaped; actual
         | 
| 629 678 | 
             
                     #newlines are counted but not changed
         | 
| 630 | 
            -
                     when delimiter,nester | 
| 631 | 
            -
            #         when delimiter,nester | 
| 632 | 
            -
                     when "\n" | 
| 679 | 
            +
                     when delimiter,nester; escchar
         | 
| 680 | 
            +
            #         when delimiter,nester; escchar
         | 
| 681 | 
            +
                     when "\n"; @linenum+=1; "\\"+escchar
         | 
| 633 682 | 
             
                     else       "\\"+escchar
         | 
| 634 683 | 
             
                  end
         | 
| 635 684 | 
             
               end
         | 
| @@ -649,9 +698,11 @@ end | |
| 649 698 | 
             
            #   alias squote_esc_seq	wquote_esc_seq
         | 
| 650 699 |  | 
| 651 700 | 
             
              module RecursiveRubyLexer
         | 
| 701 | 
            +
            =begin
         | 
| 652 702 | 
             
                def initial_nonblock_levels
         | 
| 653 703 | 
             
                  @localvars_stack.size==1 ? 2 : 1
         | 
| 654 704 | 
             
                end
         | 
| 705 | 
            +
            =end
         | 
| 655 706 | 
             
              end
         | 
| 656 707 |  | 
| 657 708 | 
             
              def initial_nonblock_levels; 1 end
         | 
| @@ -663,8 +714,8 @@ end | |
| 663 714 |  | 
| 664 715 | 
             
              def merge_levels levels, nil_empty_class
         | 
| 665 716 | 
             
                case (levels.size rescue 0)
         | 
| 666 | 
            -
                when 0 | 
| 667 | 
            -
                when 1 | 
| 717 | 
            +
                when 0; {} unless nil_empty_class
         | 
| 718 | 
            +
                when 1; levels.first.dup
         | 
| 668 719 | 
             
                else levels.inject{|a,b| a.merge b} 
         | 
| 669 720 | 
             
                end
         | 
| 670 721 | 
             
              end
         | 
| @@ -713,30 +764,7 @@ end | |
| 713 764 | 
             
                  #pass current local vars into new parser
         | 
| 714 765 | 
             
                  #must pass the lists of nonblock, parentblock and currentblock vars separately
         | 
| 715 766 | 
             
                  #then a table increment after each
         | 
| 716 | 
            -
                   | 
| 717 | 
            -
                  nonblocky.keys.each{|varname|
         | 
| 718 | 
            -
                    rl.localvars[varname]=true
         | 
| 719 | 
            -
                  }
         | 
| 720 | 
            -
                  rl.localvars.start_block  
         | 
| 721 | 
            -
                  #incremental table, tells us what :local vars are defined in the str inclusion
         | 
| 722 | 
            -
             | 
| 723 | 
            -
                  if blocky
         | 
| 724 | 
            -
                    rl.localvars.start_block  
         | 
| 725 | 
            -
                    blocky.keys.each{|varname|
         | 
| 726 | 
            -
                      rl.localvars[varname]=true
         | 
| 727 | 
            -
                    }
         | 
| 728 | 
            -
                    rl.localvars.start_block
         | 
| 729 | 
            -
                    #incremental table, tells us what :block vars are defined in the str inclusion
         | 
| 730 | 
            -
                  end
         | 
| 731 | 
            -
             | 
| 732 | 
            -
                  if current
         | 
| 733 | 
            -
                    rl.localvars.start_block  
         | 
| 734 | 
            -
                    current.keys.each{|varname|
         | 
| 735 | 
            -
                      rl.localvars[varname]=true
         | 
| 736 | 
            -
                    }
         | 
| 737 | 
            -
                    rl.localvars.start_block
         | 
| 738 | 
            -
                    #incremental table, tells us what :current vars are defined in the str inclusion
         | 
| 739 | 
            -
                  end
         | 
| 767 | 
            +
                  rl.localvars_stack=@localvars_stack.map{|lvs| lvs.deep_copy}
         | 
| 740 768 |  | 
| 741 769 | 
             
                  rl.pending_here_bodies=@pending_here_bodies
         | 
| 742 770 |  | 
| @@ -790,19 +818,13 @@ end | |
| 790 818 | 
             
            #      @pending_here_bodies=rl.pending_here_bodies      
         | 
| 791 819 |  | 
| 792 820 | 
             
                  #local vars defined in inclusion get propagated to outer parser
         | 
| 793 | 
            -
                   | 
| 794 | 
            -
                  newvars.each{|newvar| localvars[newvar]=true }
         | 
| 821 | 
            +
                  @localvars_stack=rl.localvars_stack
         | 
| 795 822 |  | 
| 796 823 | 
             
                  result=RubyCode.new(tokens,@filename,@linenum)
         | 
| 797 824 | 
             
                  @linenum=rl.linenum
         | 
| 798 825 | 
             
                  return result
         | 
| 799 826 | 
             
               end
         | 
| 800 827 |  | 
| 801 | 
            -
               #-----------------------------------
         | 
| 802 | 
            -
               def here_spread_over_ruby_code(rl,tok)
         | 
| 803 | 
            -
                 lexerror tok, 'here body outside string inclusion'
         | 
| 804 | 
            -
               end
         | 
| 805 | 
            -
               
         | 
| 806 828 |  | 
| 807 829 | 
             
               #-----------------------------------
         | 
| 808 830 | 
             
            #   BINCHARS=?0..?1
         | 
| @@ -913,6 +935,7 @@ else | |
| 913 935 | 
             
                  IgnoreToken.new(til_charset(/[\r\n]/))
         | 
| 914 936 | 
             
               end
         | 
| 915 937 | 
             
            end
         | 
| 938 | 
            +
              alias rulexer_comment comment
         | 
| 916 939 |  | 
| 917 940 | 
             
               #-----------------------------------
         | 
| 918 941 | 
             
               def whitespace(ch)
         | 
| @@ -944,16 +967,17 @@ end | |
| 944 967 | 
             
                  @moretokens << FileAndLineToken.new( @filename, @linenum, offset+1 )
         | 
| 945 968 | 
             
                  return NewlineToken.new("\n",offset)
         | 
| 946 969 | 
             
               end
         | 
| 947 | 
            -
             | 
| 970 | 
            +
               alias rulexer_newline newline
         | 
| 948 971 |  | 
| 949 972 | 
             
               #-----------------------------------
         | 
| 950 973 | 
             
               def getchar_maybe_escape
         | 
| 951 974 | 
             
                  eof? and raise "unterminated dq string"
         | 
| 952 | 
            -
                  c=getc
         | 
| 953 | 
            -
             | 
| 954 | 
            -
                  c == ?\\ and
         | 
| 955 | 
            -
                     (c = (dquote_esc_seq('\\')[-1] or ?\n))
         | 
| 975 | 
            +
                  c=getc.chr
         | 
| 956 976 |  | 
| 977 | 
            +
                  if c == "\\"
         | 
| 978 | 
            +
                     c = @rubyversion >= 1.9 ? dquote19_esc_seq('\\') : dquote_esc_seq('\\')
         | 
| 979 | 
            +
                     c = "\n" if c.empty?
         | 
| 980 | 
            +
                  end
         | 
| 957 981 | 
             
                  return c
         | 
| 958 982 | 
             
               end
         | 
| 959 983 |  | 
| @@ -962,6 +986,7 @@ protected | |
| 962 986 | 
             
              require 'forwardable'
         | 
| 963 987 | 
             
              extend Forwardable
         | 
| 964 988 | 
             
              def_delegators :@file, :readahead, :readback, :read, :eof?
         | 
| 989 | 
            +
              alias rulexer_eof? eof?
         | 
| 965 990 |  | 
| 966 991 | 
             
              def til_charset cs,len=16; @file.read_til_charset cs,len end
         | 
| 967 992 | 
             
              def getc; @file.read1 end
         | 
| @@ -995,13 +1020,14 @@ protected | |
| 995 1020 |  | 
| 996 1021 | 
             
              #-----------------------------------
         | 
| 997 1022 | 
             
              def input_position; @file.pos end
         | 
| 1023 | 
            +
              alias rulexer_input_position input_position
         | 
| 998 1024 |  | 
| 999 1025 | 
             
              #-----------------------------------
         | 
| 1000 1026 | 
             
              def input_position_set x; @file.pos=x end
         | 
| 1001 1027 |  | 
| 1002 1028 | 
             
              #-----------------------------------
         | 
| 1003 1029 | 
             
              def self.save_offsets_in(*funcnames)
         | 
| 1004 | 
            -
                eval funcnames.collect{|fn| <<-endeval }. | 
| 1030 | 
            +
                eval funcnames.collect{|fn| <<-endeval }.join
         | 
| 1005 1031 | 
             
                  class ::#{self}
         | 
| 1006 1032 | 
             
                    alias #{fn}__no_offset #{fn}   #rename old ver of fn
         | 
| 1007 1033 | 
             
                    def #{fn}(*args)               #create new version
         | 
| @@ -1015,7 +1041,6 @@ protected | |
| 1015 1041 | 
             
                endeval
         | 
| 1016 1042 | 
             
              end
         | 
| 1017 1043 |  | 
| 1018 | 
            -
              end
         | 
| 1019 1044 |  | 
| 1020 1045 | 
             
            end
         | 
| 1021 1046 |  |