RubyGems - coderay - Versions diffs - 1.0.0 → 1.0.0.598.pre - Mend

coderay 1.0.0 → 1.0.0.598.pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (79) hide show

data/FOLDERS +49 -0
data/Rakefile +6 -5
data/bin/coderay +74 -190
data/bin/coderay_stylesheet +4 -0
data/{README_INDEX.rdoc → lib/README} +20 -10
data/lib/coderay.rb +60 -62
data/lib/coderay/duo.rb +55 -2
data/lib/coderay/encoder.rb +39 -52
data/lib/coderay/encoders/_map.rb +7 -11
data/lib/coderay/encoders/comment_filter.rb +61 -0
data/lib/coderay/encoders/count.rb +26 -11
data/lib/coderay/encoders/debug.rb +60 -11
data/lib/coderay/encoders/div.rb +8 -9
data/lib/coderay/encoders/filter.rb +52 -12
data/lib/coderay/encoders/html.rb +113 -106
data/lib/coderay/encoders/html/css.rb +7 -2
data/lib/coderay/encoders/html/numbering.rb +27 -24
data/lib/coderay/encoders/html/output.rb +58 -15
data/lib/coderay/encoders/json.rb +44 -37
data/lib/coderay/encoders/lines_of_code.rb +56 -9
data/lib/coderay/encoders/null.rb +13 -6
data/lib/coderay/encoders/page.rb +8 -8
data/lib/coderay/encoders/span.rb +9 -10
data/lib/coderay/encoders/statistic.rb +114 -51
data/lib/coderay/encoders/terminal.rb +10 -7
data/lib/coderay/encoders/text.rb +36 -17
data/lib/coderay/encoders/token_kind_filter.rb +58 -1
data/lib/coderay/encoders/xml.rb +11 -13
data/lib/coderay/encoders/yaml.rb +14 -16
data/lib/coderay/for_redcloth.rb +1 -1
data/lib/coderay/helpers/file_type.rb +240 -125
data/lib/coderay/helpers/gzip_simple.rb +123 -0
data/lib/coderay/helpers/plugin.rb +307 -241
data/lib/coderay/helpers/word_list.rb +126 -65
data/lib/coderay/scanner.rb +103 -153
data/lib/coderay/scanners/_map.rb +16 -18
data/lib/coderay/scanners/c.rb +13 -13
data/lib/coderay/scanners/cpp.rb +6 -6
data/lib/coderay/scanners/css.rb +48 -47
data/lib/coderay/scanners/debug.rb +55 -9
data/lib/coderay/scanners/delphi.rb +4 -4
data/lib/coderay/scanners/diff.rb +25 -43
data/lib/coderay/scanners/groovy.rb +2 -2
data/lib/coderay/scanners/html.rb +30 -107
data/lib/coderay/scanners/java.rb +5 -6
data/lib/coderay/scanners/java/builtin_types.rb +0 -2
data/lib/coderay/scanners/java_script.rb +6 -6
data/lib/coderay/scanners/json.rb +6 -7
data/lib/coderay/scanners/nitro_xhtml.rb +136 -0
data/lib/coderay/scanners/php.rb +12 -13
data/lib/coderay/scanners/plaintext.rb +26 -0
data/lib/coderay/scanners/python.rb +4 -4
data/lib/coderay/scanners/{erb.rb → rhtml.rb} +11 -19
data/lib/coderay/scanners/ruby.rb +208 -219
data/lib/coderay/scanners/ruby/patterns.rb +85 -18
data/lib/coderay/scanners/scheme.rb +136 -0
data/lib/coderay/scanners/sql.rb +22 -29
data/lib/coderay/scanners/yaml.rb +10 -11
data/lib/coderay/styles/_map.rb +2 -2
data/lib/coderay/styles/alpha.rb +104 -102
data/lib/coderay/styles/cycnus.rb +143 -0
data/lib/coderay/styles/murphy.rb +123 -0
data/lib/coderay/token_kinds.rb +86 -87
data/lib/coderay/tokens.rb +169 -26
data/test/functional/basic.rb +14 -200
data/test/functional/examples.rb +14 -20
data/test/functional/for_redcloth.rb +8 -15
data/test/functional/load_plugin_scanner.rb +11 -0
data/test/functional/suite.rb +6 -9
data/test/functional/vhdl.rb +126 -0
data/test/functional/word_list.rb +79 -0
metadata +129 -107
data/lib/coderay/helpers/gzip.rb +0 -41
data/lib/coderay/scanners/clojure.rb +0 -217
data/lib/coderay/scanners/haml.rb +0 -168
data/lib/coderay/scanners/ruby/string_state.rb +0 -71
data/lib/coderay/scanners/text.rb +0 -26
data/lib/coderay/tokens_proxy.rb +0 -55
data/lib/coderay/version.rb +0 -3

data/lib/coderay/scanners/json.rb CHANGED Viewed

@@ -13,11 +13,10 @@ module Scanners
     ]  # :nodoc:
     ESCAPE = / [bfnrt\\"\/] /x  # :nodoc:
-    UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x  # :nodoc:
+    UNICODE_ESCAPE =  / u[a-fA-F0-9]{4} /x  # :nodoc:
   protected
-    # See http://json.org/ for a definition of the JSON lexic/grammar.
     def scan_tokens encoder, options
       state = :initial
@@ -45,14 +44,14 @@ module Scanners
             when '}', ']' then stack.pop  # no error recovery, but works for valid JSON
             end
           elsif match = scan(/ true | false | null /x)
-            encoder.text_token match, :value
+             encoder.text_token match, :value
           elsif match = scan(/ -? (?: 0 | [1-9]\d* ) /x)
+            kind = :integer
             if scan(/ \.\d+ (?:[eE][-+]?\d+)? | [eE][-+]? \d+ /x)
               match << matched
-              encoder.text_token match, :float
-            else
-              encoder.text_token match, :integer
+              kind = :float
             end
+            encoder.text_token match, kind
           else
             encoder.text_token getch, :error
           end
@@ -77,7 +76,7 @@ module Scanners
           end
         else
-          raise_inspect 'Unknown state: %p' % [state], encoder
+          raise_inspect 'Unknown state', encoder
         end
       end

data/lib/coderay/scanners/nitro_xhtml.rb ADDED Viewed

@@ -0,0 +1,136 @@
+module CodeRay
+module Scanners
+  load :html
+  load :ruby
+  # Nitro XHTML Scanner
+  #
+  # Alias: +nitro+
+  class NitroXHTML < Scanner
+    register_for :nitro_xhtml
+    file_extension :xhtml
+    title 'Nitro XHTML'
+    KINDS_NOT_LOC = HTML::KINDS_NOT_LOC
+    NITRO_RUBY_BLOCK = /
+      <\?r
+      (?>
+        [^\?]*
+        (?> \?(?!>) [^\?]* )*
+      )
+      (?: \?> )?
+    |
+      <ruby>
+      (?>
+        [^<]*
+        (?> <(?!\/ruby>) [^<]* )*
+      )
+      (?: <\/ruby> )?
+    |
+      <%
+      (?>
+        [^%]*
+        (?> %(?!>) [^%]* )*
+      )
+      (?: %> )?
+    /mx  # :nodoc:
+    NITRO_VALUE_BLOCK = /
+      \#
+      (?:
+        \{
+        [^{}]*
+        (?>
+          \{ [^}]* \}
+          (?> [^{}]* )
+        )*
+        \}?
+      | \| [^|]* \|?
+      | \( [^)]* \)?
+      | \[ [^\]]* \]?
+      | \\ [^\\]* \\?
+      )
+    /x  # :nodoc:
+    NITRO_ENTITY = /
+      % (?: \#\d+ | \w+ ) ;
+    /  # :nodoc:
+    START_OF_RUBY = /
+      (?=[<\#%])
+      < (?: \?r | % | ruby> )
+    | \# [{(|]
+    | % (?: \#\d+ | \w+ ) ;
+    /x  # :nodoc:
+    CLOSING_PAREN = Hash.new { |h, p| h[p] = p }  # :nodoc:
+    CLOSING_PAREN.update( {
+      '(' => ')',
+      '[' => ']',
+      '{' => '}',
+    } )
+  protected
+    def setup
+      @ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true
+      @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
+    end
+    def reset_instance
+      super
+      @html_scanner.reset
+    end
+    def scan_tokens encoder, options
+      until eos?
+        if (match = scan_until(/(?=#{START_OF_RUBY})/o) || match = scan_until(/\z/)) and not match.empty?
+          @html_scanner.tokenize match
+        elsif match = scan(/#{NITRO_VALUE_BLOCK}/o)
+          start_tag = match[0,2]
+          delimiter = CLOSING_PAREN[start_tag[1,1]]
+          end_tag = match[-1,1] == delimiter ? delimiter : ''
+          encoder.begin_group :inline
+          encoder.text_token start_tag, :inline_delimiter
+          code = match[start_tag.size .. -1 - end_tag.size]
+          @ruby_scanner.tokenize code, :tokens => encoder
+          encoder.text_token end_tag, :inline_delimiter unless end_tag.empty?
+          encoder.end_group :inline
+        elsif match = scan(/#{NITRO_RUBY_BLOCK}/o)
+          start_tag = '<?r'
+          end_tag = match[-2,2] == '?>' ? '?>' : ''
+          encoder.begin_group :inline
+          encoder.text_token start_tag, :inline_delimiter
+          code = match[start_tag.size .. -(end_tag.size)-1]
+          @ruby_scanner.tokenize code, :tokens => encoder
+          encoder.text_token end_tag, :inline_delimiter unless end_tag.empty?
+          encoder.end_group :inline
+        elsif entity = scan(/#{NITRO_ENTITY}/o)
+          encoder.text_token entity, :entity
+        elsif scan(/%/)
+          encoder.text_token matched, :error
+        else
+          raise_inspect 'else-case reached!', encoder
+        end
+      end
+      encoder
+    end
+  end
+end
+end

data/lib/coderay/scanners/php.rb CHANGED Viewed

@@ -10,7 +10,6 @@ module Scanners
     register_for :php
     file_extension 'php'
-    encoding 'BINARY'
     KINDS_NOT_LOC = HTML::KINDS_NOT_LOC
@@ -181,14 +180,14 @@ module Scanners
         $argc $argv
       ]
-      IDENT_KIND = WordList::CaseIgnoring.new(:ident).
-        add(KEYWORDS, :keyword).
-        add(TYPES, :predefined_type).
-        add(LANGUAGE_CONSTRUCTS, :keyword).
+      IDENT_KIND = CaseIgnoringWordList.new(:ident).
+        add(KEYWORDS, :reserved).
+        add(TYPES, :pre_type).
+        add(LANGUAGE_CONSTRUCTS, :reserved).
         add(BUILTIN_FUNCTIONS, :predefined).
-        add(CLASSES, :predefined_constant).
+        add(CLASSES, :pre_constant).
         add(EXCEPTIONS, :exception).
-        add(CONSTANTS, :predefined_constant)
+        add(CONSTANTS, :pre_constant)
       VARIABLE_KIND = WordList.new(:local_variable).
         add(PREDEFINED, :predefined)
@@ -234,8 +233,8 @@ module Scanners
     def scan_tokens encoder, options
       if check(RE::PHP_START) ||  # starts with <?
-       (match?(/\s*<\S/) && check(/.{1,1000}#{RE::PHP_START}/om)) || # starts with tag and contains <?
-       check(/.{0,1000}#{RE::HTML_INDICATOR}/om) ||
+       (match?(/\s*<\S/) && exist?(RE::PHP_START)) || # starts with tag and contains <?
+       exist?(RE::HTML_INDICATOR) ||
        check(/.{1,100}#{RE::PHP_START}/om)  # PHP start after max 100 chars
         # is HTML with embedded PHP, so start with HTML
         states = [:initial]
@@ -261,7 +260,7 @@ module Scanners
             label_expected = true
             states << :php
           else
-            match = scan_until(/(?=#{RE::PHP_START})/o) || scan_rest
+            match = scan_until(/(?=#{RE::PHP_START})/o) || scan_until(/\z/)
             @html_scanner.tokenize match unless match.empty?
           end
@@ -281,7 +280,7 @@ module Scanners
               label_expected = false
               if kind == :ident && match =~ /^[A-Z]/
                 kind = :constant
-              elsif kind == :keyword
+              elsif kind == :reserved
                 case match
                 when 'class'
                   states << :class_expected
@@ -354,7 +353,7 @@ module Scanners
           elsif match = scan(/<<<(?:(#{RE::IDENTIFIER})|"(#{RE::IDENTIFIER})"|'(#{RE::IDENTIFIER})')/o)
             encoder.begin_group :string
-            # warn 'heredoc in heredoc?' if heredoc_delimiter
+            warn 'heredoc in heredoc?' if heredoc_delimiter
             heredoc_delimiter = Regexp.escape(self[1] || self[2] || self[3])
             encoder.text_token match, :delimiter
             states.push self[3] ? :sqstring : :dqstring
@@ -462,7 +461,7 @@ module Scanners
               states.push :php
               encoder.text_token match, :delimiter
             else
-              encoder.text_token match, :content
+              encoder.text_token match, :string
             end
           elsif match = scan(/\$\{#{RE::IDENTIFIER}\}/o)
             encoder.text_token match, :local_variable

data/lib/coderay/scanners/plaintext.rb ADDED Viewed

@@ -0,0 +1,26 @@
+module CodeRay
+module Scanners
+  # Scanner for plain text.
+  #
+  # Yields just one token of the kind :plain.
+  #
+  # Alias: +plain+
+  class Plaintext < Scanner
+    register_for :plaintext, :plain
+    title 'Plain text'
+    KINDS_NOT_LOC = [:plain]  # :nodoc:
+  protected
+    def scan_tokens encoder, options
+      encoder.text_token string, :plain
+      encoder
+    end
+  end
+end
+end

data/lib/coderay/scanners/python.rb CHANGED Viewed

@@ -58,7 +58,7 @@ module Scanners
       add(KEYWORDS, :keyword).
       add(OLD_KEYWORDS, :old_keyword).
       add(PREDEFINED_METHODS_AND_TYPES, :predefined).
-      add(PREDEFINED_VARIABLES_AND_CONSTANTS, :predefined_constant).
+      add(PREDEFINED_VARIABLES_AND_CONSTANTS, :pre_constant).
       add(PREDEFINED_EXCEPTIONS, :exception)  # :nodoc:
     NAME = / [^\W\d] \w* /x  # :nodoc:
@@ -107,7 +107,7 @@ module Scanners
       string_raw = false
       string_type = nil
       docstring_coming = match?(/#{DOCSTRING_COMING}/o)
-      last_token_dot = false
+      import_clause = class_name_follows = last_token_dot = false
       unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
       from_import_state = []
@@ -194,7 +194,7 @@ module Scanners
             encoder.text_token match, :hex
           elsif match = scan(/0[bB][01]+[lL]?/)
-            encoder.text_token match, :binary
+            encoder.text_token match, :bin
           elsif match = scan(/(?:\d*\.\d+|\d+\.\d*)(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/)
             if scan(/[jJ]/)
@@ -205,7 +205,7 @@ module Scanners
             end
           elsif match = scan(/0[oO][0-7]+|0[0-7]+(?![89.eE])[lL]?/)
-            encoder.text_token match, :octal
+            encoder.text_token match, :oct
           elsif match = scan(/\d+([lL])?/)
             if self[1] == nil && scan(/[jJ]/)

data/lib/coderay/scanners/{erb.rb → rhtml.rb} RENAMED Viewed

@@ -5,23 +5,23 @@ module Scanners
   load :ruby
   # Scanner for HTML ERB templates.
-  class ERB < Scanner
+  class RHTML < Scanner
-    register_for :erb
+    register_for :rhtml
     title 'HTML ERB Template'
     KINDS_NOT_LOC = HTML::KINDS_NOT_LOC
     ERB_RUBY_BLOCK = /
-      (<%(?!%)[-=\#]?)
-      ((?>
+      <%(?!%)[=-]?
+      (?>
         [^\-%]*    # normal*
         (?>        # special
           (?: %(?!>) | -(?!%>) )
           [^\-%]*  # normal*
         )*
-      ))
-      ((?: -?%> )?)
+      )
+      (?: -?%> )?
     /x  # :nodoc:
     START_OF_ERB = /
@@ -44,29 +44,21 @@ module Scanners
       until eos?
-        if (match = scan_until(/(?=#{START_OF_ERB})/o) || scan_rest) and not match.empty?
+        if (match = scan_until(/(?=#{START_OF_ERB})/o) || scan_until(/\z/)) and not match.empty?
           @html_scanner.tokenize match, :tokens => encoder
         elsif match = scan(/#{ERB_RUBY_BLOCK}/o)
-          start_tag = self[1]
-          code = self[2]
-          end_tag = self[3]
+          start_tag = match[/\A<%[-=]?/]
+          end_tag = match[/-?%?>?\z/]
           encoder.begin_group :inline
           encoder.text_token start_tag, :inline_delimiter
-          if start_tag == '<%#'
-            encoder.text_token code, :comment
-          else
-            @ruby_scanner.tokenize code, :tokens => encoder
-          end unless code.empty?
+          code = match[start_tag.size .. -1 - end_tag.size]
+          @ruby_scanner.tokenize code
           encoder.text_token end_tag, :inline_delimiter unless end_tag.empty?
           encoder.end_group :inline
         else
           raise_inspect 'else-case reached!', encoder
         end
       end

data/lib/coderay/scanners/ruby.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 module CodeRay
 module Scanners
   # This scanner is really complex, since Ruby _is_ a complex language!
   #
   # It tries to highlight 100% of all common code,
@@ -8,16 +8,22 @@ module Scanners
   #
   # It is optimized for HTML highlighting, and is not very useful for
   # parsing or pretty printing.
+  #
+  # For now, I think it's better than the scanners in VIM or Syntax, or
+  # any highlighter I was able to find, except Caleb's RubyLexer.
+  #
+  # I hope it's also better than the rdoc/irb lexer.
+  #
+  # Alias: +irb+
   class Ruby < Scanner
     register_for :ruby
     file_extension 'rb'
+    helper :patterns
-    autoload :Patterns,    'coderay/scanners/ruby/patterns'
-    autoload :StringState, 'coderay/scanners/ruby/string_state'
-    def interpreted_string_state
-      StringState.new :string, true, '"'
+    unless defined? EncodingError
+      EncodingError = Class.new Exception  # :nodoc:
     end
   protected
@@ -27,10 +33,11 @@ module Scanners
     end
     def scan_tokens encoder, options
-      state, heredocs = options[:state] || @state
-      heredocs = heredocs.dup if heredocs.is_a?(Array)
-      if state && state.instance_of?(StringState)
+      patterns = Patterns  # avoid constant lookup
+      state = @state
+      if state.instance_of? patterns::StringState
         encoder.begin_group state.type
       end
@@ -39,89 +46,165 @@ module Scanners
       method_call_expected = false
       value_expected = true
+      heredocs = nil
       inline_block_stack = nil
       inline_block_curly_depth = 0
-      if heredocs
-        state = heredocs.shift
-        encoder.begin_group state.type
-        heredocs = nil if heredocs.empty?
-      end
       # def_object_stack = nil
       # def_object_paren_depth = 0
-      patterns = Patterns  # avoid constant lookup
       unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
       until eos?
-        if state.instance_of? ::Symbol
-          if match = scan(/[ \t\f\v]+/)
-            encoder.text_token match, :space
-          elsif match = scan(/\n/)
-            if heredocs
-              unscan  # heredoc scanning needs \n at start
-              state = heredocs.shift
-              encoder.begin_group state.type
-              heredocs = nil if heredocs.empty?
+        if state.instance_of? patterns::StringState
+          match = scan_until(state.pattern) || scan_until(/\z/)
+          encoder.text_token match, :content unless match.empty?
+          break if eos?
+          if state.heredoc and self[1]  # end of heredoc
+            match = getch.to_s
+            match << scan_until(/$/) unless eos?
+            encoder.text_token match, :delimiter
+            encoder.end_group state.type
+            state = state.next_state
+            next
+          end
+          case match = getch
+          when state.delim
+            if state.paren_depth
+              state.paren_depth -= 1
+              if state.paren_depth > 0
+                encoder.text_token match, :nesting_delimiter
+                next
+              end
+            end
+            encoder.text_token match, :delimiter
+            if state.type == :regexp and not eos?
+              modifiers = scan(/#{patterns::REGEXP_MODIFIERS}/ox)
+              encoder.text_token modifiers, :modifier unless modifiers.empty?
+            end
+            encoder.end_group state.type
+            value_expected = false
+            state = state.next_state
+          when '\\'
+            if state.interpreted
+              if esc = scan(/ #{patterns::ESCAPE} /ox)
+                encoder.text_token match + esc, :char
+              else
+                encoder.text_token match, :error
+              end
             else
-              state = :initial if state == :undef_comma_expected
-              encoder.text_token match, :space
+              case m = getch
+              when state.delim, '\\'
+                encoder.text_token match + m, :char
+              when nil
+                encoder.text_token match, :content
+              else
+                encoder.text_token match + m, :content
+              end
+            end
+          when '#'
+            case peek(1)
+            when '{'
+              inline_block_stack ||= []
+              inline_block_stack << [state, inline_block_curly_depth, heredocs]
               value_expected = true
+              state = :initial
+              inline_block_curly_depth = 1
+              encoder.begin_group :inline
+              encoder.text_token match + getch, :inline_delimiter
+            when '$', '@'
+              encoder.text_token match, :escape
+              last_state = state
+              state = :initial
+            else
+              raise_inspect 'else-case # reached; #%p not handled' %
+                [peek(1)], encoder
             end
+          when state.opening_paren
+            state.paren_depth += 1
+            encoder.text_token match, :nesting_delimiter
+          when /#{patterns::REGEXP_SYMBOLS}/ox
+            encoder.text_token match, :function
+          else
+            raise_inspect 'else-case " reached; %p not handled, state = %p' %
+              [match, state], encoder
+          end
+        else
+          if match = scan(/[ \t\f]+/)
+            match << scan(/\s*/) unless eos? || heredocs
+            value_expected = true if match.index(?\n)
+            encoder.text_token match, :space
-          elsif match = scan(bol? ? / \#(!)?.* | #{patterns::RUBYDOC_OR_DATA} /ox : /\#.*/)
-            encoder.text_token match, self[1] ? :doctype : :comment
-          elsif match = scan(/\\\n/)
+          elsif match = scan(/\\?\n/)
+            if match == "\n"
+              value_expected = true
+              state = :initial if state == :undef_comma_expected
+            end
             if heredocs
               unscan  # heredoc scanning needs \n at start
-              encoder.text_token scan(/\\/), :space
               state = heredocs.shift
               encoder.begin_group state.type
               heredocs = nil if heredocs.empty?
+              next
             else
-              encoder.text_token match, :space
+              match << scan(/\s*/) unless eos?
             end
+            encoder.text_token match, :space
+          elsif bol? && match = scan(/\#!.*/)
+            encoder.text_token match, :doctype
+          elsif match = scan(/\#.*/) or
+             (bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o))
+            encoder.text_token match, :comment
           elsif state == :initial
             # IDENTS #
-            if !method_call_expected &&
+            if !method_call_expected and
                match = scan(unicode ? /#{patterns::METHOD_NAME}/uo :
                                       /#{patterns::METHOD_NAME}/o)
               value_expected = false
               kind = patterns::IDENT_KIND[match]
               if kind == :ident
-                if match[/\A[A-Z]/] && !(match[/[!?]$/] || match?(/\(/))
+                if match[/^[A-Z]/] && !match[/[!?]$/] && !match?(/\(/)
                   kind = :constant
                 end
-              elsif kind == :keyword
+              elsif kind == :reserved
                 state = patterns::KEYWORD_NEW_STATE[match]
                 value_expected = true if patterns::KEYWORDS_EXPECTING_VALUE[match]
               end
               value_expected = true if !value_expected && check(/#{patterns::VALUE_FOLLOWS}/o)
               encoder.text_token match, kind
-            elsif method_call_expected &&
+            elsif method_call_expected and
                match = scan(unicode ? /#{patterns::METHOD_AFTER_DOT}/uo :
                                       /#{patterns::METHOD_AFTER_DOT}/o)
-              if method_call_expected == '::' && match[/\A[A-Z]/] && !match?(/\(/)
+              if method_call_expected == '::' && match[/^[A-Z]/] && !match?(/\(/)
                 encoder.text_token match, :constant
               else
                 encoder.text_token match, :ident
               end
               method_call_expected = false
               value_expected = check(/#{patterns::VALUE_FOLLOWS}/o)
             # OPERATORS #
-            elsif !method_call_expected && match = scan(/ (\.(?!\.)|::) | (?: \.\.\.? | ==?=? | [,\(\[\{] )() | [\)\]\}] /x)
+            elsif not method_call_expected and match = scan(/ \.\.\.? | (\.|::) | [,\(\)\[\]\{\}] | ==?=? /x)
+              value_expected = match !~ / [.\)\]\}] /x || match =~ /\A\.\./
               method_call_expected = self[1]
-              value_expected = !method_call_expected && self[2]
               if inline_block_stack
                 case match
                 when '{'
@@ -139,109 +222,111 @@ module Scanners
                 end
               end
               encoder.text_token match, :operator
-            elsif match = scan(unicode ? /#{patterns::SYMBOL}/uo :
-                                         /#{patterns::SYMBOL}/o)
-              case delim = match[1]
-              when ?', ?"
-                encoder.begin_group :symbol
-                encoder.text_token ':', :symbol
-                match = delim.chr
-                encoder.text_token match, :delimiter
-                state = self.class::StringState.new :symbol, delim == ?", match
-              else
-                encoder.text_token match, :symbol
-                value_expected = false
-              end
-            elsif match = scan(/ ' (?:(?>[^'\\]*) ')? | " (?:(?>[^"\\\#]*) ")? /mx)
+            elsif match = scan(/ ['"] /mx)
               encoder.begin_group :string
-              if match.size == 1
-                encoder.text_token match, :delimiter
-                state = self.class::StringState.new :string, match == '"', match  # important for streaming
-              else
-                encoder.text_token match[0,1], :delimiter
-                encoder.text_token match[1..-2], :content if match.size > 2
-                encoder.text_token match[-1,1], :delimiter
-                encoder.end_group :string
-                value_expected = false
-              end
+              encoder.text_token match, :delimiter
+              state = patterns::StringState.new :string, match == '"', match  # important for streaming
             elsif match = scan(unicode ? /#{patterns::INSTANCE_VARIABLE}/uo :
                                          /#{patterns::INSTANCE_VARIABLE}/o)
               value_expected = false
               encoder.text_token match, :instance_variable
-            elsif value_expected && match = scan(/\//)
+            elsif value_expected and match = scan(/\//)
               encoder.begin_group :regexp
               encoder.text_token match, :delimiter
-              state = self.class::StringState.new :regexp, true, '/'
+              interpreted = true
+              state = patterns::StringState.new :regexp, interpreted, '/'
             elsif match = scan(value_expected ? /[-+]?#{patterns::NUMERIC}/o : /#{patterns::NUMERIC}/o)
               if method_call_expected
                 encoder.text_token match, :error
                 method_call_expected = false
               else
-                encoder.text_token match, self[1] ? :float : :integer  # TODO: send :hex/:octal/:binary
+                encoder.text_token match, self[1] ? :float : :integer
               end
               value_expected = false
-            elsif match = scan(/ [-+!~^\/]=? | [:;] | [*|&]{1,2}=? | >>? /x)
+            elsif match = scan(unicode ? /#{patterns::SYMBOL}/uo :
+                                         /#{patterns::SYMBOL}/o)
+              case delim = match[1]
+              when ?', ?"
+                encoder.begin_group :symbol
+                encoder.text_token ':', :symbol
+                match = delim.chr
+                encoder.text_token match, :delimiter
+                state = patterns::StringState.new :symbol, delim == ?", match
+              else
+                encoder.text_token match, :symbol
+                value_expected = false
+              end
+            elsif match = scan(/ [-+!~^]=? | [*|&]{1,2}=? | >>? /x)
               value_expected = true
               encoder.text_token match, :operator
-            elsif value_expected && match = scan(/#{patterns::HEREDOC_OPEN}/o)
+            elsif value_expected and match = scan(/#{patterns::HEREDOC_OPEN}/o)
+              indented = self[1] == '-'
               quote = self[3]
               delim = self[quote ? 4 : 2]
               kind = patterns::QUOTE_TO_TYPE[quote]
               encoder.begin_group kind
               encoder.text_token match, :delimiter
               encoder.end_group kind
+              heredoc = patterns::StringState.new kind, quote != '\'',
+                delim, (indented ? :indented : :linestart )
               heredocs ||= []  # create heredocs if empty
-              heredocs << self.class::StringState.new(kind, quote != "'", delim,
-                self[1] == '-' ? :indented : :linestart)
+              heredocs << heredoc
               value_expected = false
-            elsif value_expected && match = scan(/#{patterns::FANCY_STRING_START}/o)
-              kind = patterns::FANCY_STRING_KIND[self[1]]
+            elsif value_expected and match = scan(/#{patterns::FANCY_START}/o)
+              kind, interpreted = *patterns::FancyStringType.fetch(self[1]) do
+                raise_inspect 'Unknown fancy string: %%%p' % k, encoder
+              end
               encoder.begin_group kind
-              state = self.class::StringState.new kind, patterns::FANCY_STRING_INTERPRETED[self[1]], self[2]
+              state = patterns::StringState.new kind, interpreted, self[2]
               encoder.text_token match, :delimiter
-            elsif value_expected && match = scan(/#{patterns::CHARACTER}/o)
+            elsif value_expected and match = scan(/#{patterns::CHARACTER}/o)
               value_expected = false
               encoder.text_token match, :integer
-            elsif match = scan(/ %=? | <(?:<|=>?)? | \? /x)
+            elsif match = scan(/ [\/%]=? | <(?:<|=>?)? | [?:;] /x)
               value_expected = true
               encoder.text_token match, :operator
             elsif match = scan(/`/)
-              encoder.begin_group :shell
-              encoder.text_token match, :delimiter
-              state = self.class::StringState.new :shell, true, match
+              if method_call_expected
+                encoder.text_token match, :operator
+                value_expected = true
+              else
+                encoder.begin_group :shell
+                encoder.text_token match, :delimiter
+                state = patterns::StringState.new :shell, true, match
+              end
             elsif match = scan(unicode ? /#{patterns::GLOBAL_VARIABLE}/uo :
                                          /#{patterns::GLOBAL_VARIABLE}/o)
               encoder.text_token match, :global_variable
               value_expected = false
             elsif match = scan(unicode ? /#{patterns::CLASS_VARIABLE}/uo :
                                          /#{patterns::CLASS_VARIABLE}/o)
               encoder.text_token match, :class_variable
               value_expected = false
             elsif match = scan(/\\\z/)
               encoder.text_token match, :space
             else
               if method_call_expected
                 method_call_expected = false
                 next
               end
-              unless unicode
+              if !unicode
                 # check for unicode
-                $DEBUG_BEFORE, $DEBUG = $DEBUG, false
+                debug, $DEBUG = $DEBUG, false
                 begin
                   if check(/./mu).size > 1
                     # seems like we should try again with unicode
@@ -250,7 +335,7 @@ module Scanners
                 rescue
                   # bad unicode char; use getch
                 ensure
-                  $DEBUG = $DEBUG_BEFORE
+                  $DEBUG = debug
                 end
                 next if unicode
               end
@@ -263,7 +348,7 @@ module Scanners
               state = last_state
               last_state = nil
             end
           elsif state == :def_expected
             if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
                                       /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
@@ -273,7 +358,7 @@ module Scanners
               last_state = :dot_expected
               state = :initial
             end
           elsif state == :dot_expected
             if match = scan(/\.|::/)
               # invalid definition
@@ -282,7 +367,7 @@ module Scanners
             else
               state = :initial
             end
           elsif state == :module_expected
             if match = scan(/<</)
               encoder.text_token match, :operator
@@ -293,7 +378,7 @@ module Scanners
                 encoder.text_token match, :class
               end
             end
           elsif state == :undef_expected
             state = :undef_comma_expected
             if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
@@ -306,7 +391,7 @@ module Scanners
                 encoder.text_token ':', :symbol
                 match = delim.chr
                 encoder.text_token match, :delimiter
-                state = self.class::StringState.new :symbol, delim == ?", match
+                state = patterns::StringState.new :symbol, delim == ?", match
                 state.next_state = :undef_comma_expected
               else
                 encoder.text_token match, :symbol
@@ -314,7 +399,7 @@ module Scanners
             else
               state = :initial
             end
           elsif state == :undef_comma_expected
             if match = scan(/,/)
               encoder.text_token match, :operator
@@ -322,7 +407,7 @@ module Scanners
             else
               state = :initial
             end
           elsif state == :alias_expected
             match = scan(unicode ? /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/uo :
                                    /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o)
@@ -333,129 +418,33 @@ module Scanners
               encoder.text_token self[3], (self[3][0] == ?: ? :symbol : :method)
             end
             state = :initial
           else
-            #:nocov:
             raise_inspect 'Unknown state: %p' % [state], encoder
-            #:nocov:
-          end
-        else  # StringState
-          match = scan_until(state.pattern) || scan_rest
-          unless match.empty?
-            encoder.text_token match, :content
-            break if eos?
-          end
-          if state.heredoc && self[1]  # end of heredoc
-            match = getch
-            match << scan_until(/$/) unless eos?
-            encoder.text_token match, :delimiter unless match.empty?
-            encoder.end_group state.type
-            state = state.next_state
-            next
-          end
-          case match = getch
-          when state.delim
-            if state.paren_depth
-              state.paren_depth -= 1
-              if state.paren_depth > 0
-                encoder.text_token match, :content
-                next
-              end
-            end
-            encoder.text_token match, :delimiter
-            if state.type == :regexp && !eos?
-              match = scan(/#{patterns::REGEXP_MODIFIERS}/o)
-              encoder.text_token match, :modifier unless match.empty?
-            end
-            encoder.end_group state.type
-            value_expected = false
-            state = state.next_state
-          when '\\'
-            if state.interpreted
-              if esc = scan(/#{patterns::ESCAPE}/o)
-                encoder.text_token match + esc, :char
-              else
-                encoder.text_token match, :error
-              end
-            else
-              case esc = getch
-              when nil
-                encoder.text_token match, :content
-              when state.delim, '\\'
-                encoder.text_token match + esc, :char
-              else
-                encoder.text_token match + esc, :content
-              end
-            end
-          when '#'
-            case peek(1)
-            when '{'
-              inline_block_stack ||= []
-              inline_block_stack << [state, inline_block_curly_depth, heredocs]
-              value_expected = true
-              state = :initial
-              inline_block_curly_depth = 1
-              encoder.begin_group :inline
-              encoder.text_token match + getch, :inline_delimiter
-            when '$', '@'
-              encoder.text_token match, :escape
-              last_state = state
-              state = :initial
-            else
-              #:nocov:
-              raise_inspect 'else-case # reached; #%p not handled' % [peek(1)], encoder
-              #:nocov:
-            end
-          when state.opening_paren
-            state.paren_depth += 1
-            encoder.text_token match, :content
-          else
-            #:nocov
-            raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], encoder
-            #:nocov:
           end
         end
       end
       # cleaning up
-      if state.is_a? StringState
-        encoder.end_group state.type
-      end
       if options[:keep_state]
-        if state.is_a?(StringState) && state.heredoc
-          (heredocs ||= []).unshift state
-          state = :initial
-        elsif heredocs && heredocs.empty?
-          heredocs = nil
-        end
-        @state = state, heredocs
+        @state = state
+      end
+      if state.is_a? patterns::StringState
+        encoder.end_group state.type
       end
       if inline_block_stack
         until inline_block_stack.empty?
-          state, = *inline_block_stack.pop
-          encoder.end_group :inline
+          state, *more = inline_block_stack.pop
+          encoder.end_group :inline if more
           encoder.end_group state.type
         end
       end
       encoder
     end
   end
 end
 end