RubyGems - coderay - Versions diffs - 1.0.0.598.pre → 1.0.0.738.pre - Mend

coderay 1.0.0.598.pre → 1.0.0.738.pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

data/bin/coderay +1 -1
data/lib/coderay.rb +38 -32
data/lib/coderay/duo.rb +1 -54
data/lib/coderay/encoder.rb +31 -33
data/lib/coderay/encoders/_map.rb +4 -2
data/lib/coderay/encoders/comment_filter.rb +0 -61
data/lib/coderay/encoders/count.rb +2 -23
data/lib/coderay/encoders/debug.rb +11 -60
data/lib/coderay/encoders/filter.rb +0 -46
data/lib/coderay/encoders/html.rb +83 -91
data/lib/coderay/encoders/html/css.rb +1 -6
data/lib/coderay/encoders/html/numbering.rb +18 -21
data/lib/coderay/encoders/html/output.rb +10 -52
data/lib/coderay/encoders/json.rb +19 -39
data/lib/coderay/encoders/lines_of_code.rb +7 -52
data/lib/coderay/encoders/null.rb +6 -13
data/lib/coderay/encoders/statistic.rb +30 -93
data/lib/coderay/encoders/terminal.rb +3 -4
data/lib/coderay/encoders/text.rb +1 -23
data/lib/coderay/encoders/token_kind_filter.rb +0 -58
data/lib/coderay/helpers/file_type.rb +119 -240
data/lib/coderay/helpers/gzip.rb +41 -0
data/lib/coderay/helpers/plugin.rb +237 -307
data/lib/coderay/scanner.rb +112 -88
data/lib/coderay/scanners/_map.rb +3 -3
data/lib/coderay/scanners/c.rb +7 -7
data/lib/coderay/scanners/clojure.rb +204 -0
data/lib/coderay/scanners/css.rb +10 -20
data/lib/coderay/scanners/debug.rb +9 -55
data/lib/coderay/scanners/diff.rb +21 -4
data/lib/coderay/scanners/html.rb +65 -18
data/lib/coderay/scanners/java.rb +3 -2
data/lib/coderay/scanners/java_script.rb +3 -3
data/lib/coderay/scanners/json.rb +7 -6
data/lib/coderay/scanners/php.rb +2 -1
data/lib/coderay/scanners/rhtml.rb +6 -2
data/lib/coderay/scanners/ruby.rb +193 -193
data/lib/coderay/scanners/ruby/patterns.rb +15 -82
data/lib/coderay/scanners/ruby/string_state.rb +71 -0
data/lib/coderay/scanners/sql.rb +1 -1
data/lib/coderay/scanners/yaml.rb +4 -2
data/lib/coderay/styles/_map.rb +2 -2
data/lib/coderay/styles/alpha.rb +48 -38
data/lib/coderay/styles/cycnus.rb +2 -1
data/lib/coderay/token_kinds.rb +88 -86
data/lib/coderay/tokens.rb +88 -112
data/test/functional/basic.rb +184 -5
data/test/functional/examples.rb +4 -4
data/test/functional/for_redcloth.rb +3 -2
data/test/functional/suite.rb +7 -6
metadata +11 -24
data/lib/coderay/helpers/gzip_simple.rb +0 -123
data/test/functional/load_plugin_scanner.rb +0 -11
data/test/functional/vhdl.rb +0 -126
data/test/functional/word_list.rb +0 -79

data/lib/coderay/scanner.rb CHANGED Viewed

@@ -1,7 +1,12 @@
-module CodeRay
+# encoding: utf-8
+require 'strscan'
-  require 'coderay/helpers/plugin'
+module CodeRay
+  autoload :WordList, 'coderay/helpers/word_list'
+  # FIXME: Rename CaseIgnoringWordList to WordList::CaseIgnoring.
+  autoload :CaseIgnoringWordList, 'coderay/helpers/word_list'
   # = Scanners
   #
   # This module holds the Scanner class and its subclasses.
@@ -16,9 +21,8 @@ module CodeRay
   module Scanners
     extend PluginHost
     plugin_path File.dirname(__FILE__), 'scanners'
-    require 'strscan'
     # = Scanner
     #
     # The base class for all Scanners.
@@ -46,61 +50,83 @@ module CodeRay
       extend Plugin
       plugin_host Scanners
       # Raised if a Scanner fails while scanning
-      ScanError = Class.new(Exception)
-      require 'coderay/helpers/word_list'
+      ScanError = Class.new Exception
       # The default options for all scanner classes.
       #
       # Define @default_options for subclasses.
       DEFAULT_OPTIONS = { }
       KINDS_NOT_LOC = [:comment, :doctype, :docstring]
       class << self
-        def normify code
-          code = code.to_s.dup
-          # try using UTF-8
-          if code.respond_to? :force_encoding
-            debug, $DEBUG = $DEBUG, false
-            begin
-              code.force_encoding 'UTF-8'
-              code[/\z/]  # raises an ArgumentError when code contains a non-UTF-8 char
-            rescue ArgumentError
-              code.force_encoding 'binary'
-            ensure
-              $DEBUG = debug
-            end
+        # Normalizes the given code into a string with UNIX newlines, in the
+        # scanner's internal encoding, with invalid and undefined charachters
+        # replaced by placeholders. Always returns a new object.
+        def normalize code
+          original = code
+          code = code.to_s unless code.is_a? ::String
+          if code.respond_to? :encoding
+            code = encode_with_encoding code, self.encoding
+          else
+            code = to_unix code if code.index ?\r
           end
-          # convert the string to UNIX newline format
-          code.gsub!(/\r\n?/, "\n") if code.index ?\r
+          # code = code.dup if code.eql? original
           code
         end
-        def file_extension extension = nil
-          if extension
-            @file_extension = extension.to_s
+        # The typical filename suffix for this scanner's language.
+        def file_extension extension = plugin_id
+          @file_extension ||= extension.to_s
+        end
+        # The encoding used internally by this scanner.
+        def encoding name = 'UTF-8'
+          @encoding ||= defined?(Encoding.find) && Encoding.find(name)
+        end
+        # The lang of this Scanner class, which is equal to its Plugin ID.
+        alias lang plugin_id
+      protected
+        def encode_with_encoding code, target_encoding
+          if code.encoding == target_encoding
+            if code.valid_encoding?
+              return to_unix code
+            else
+              source_encoding = guess_encoding code
+            end
           else
-            @file_extension ||= plugin_id.to_s
+            source_encoding = code.encoding
           end
+          # print "encode_with_encoding from #{source_encoding} to #{target_encoding}"
+          code.encode target_encoding, source_encoding, :universal_newline => true, :undef => :replace, :invalid => :replace
         end
+        def to_unix code
+          code.gsub(/\r\n?/, "\n")
+        end
+        def guess_encoding s
+          #:nocov:
+          IO.popen("file -b --mime -", "w+") do |file|
+            file.write s[0, 1024]
+            file.close_write
+            begin
+              Encoding.find file.gets[/charset=([-\w]+)/, 1]
+            rescue ArgumentError
+              Encoding::BINARY
+            end
+          end
+          #:nocov:
+        end
       end
-=begin
-## Excluded for speed reasons; protected seems to make methods slow.
-  # Save the StringScanner methods from being called.
-  # This would not be useful for highlighting.
-  strscan_public_methods =
-    StringScanner.instance_methods -
-    StringScanner.ancestors[1].instance_methods
-  protected(*strscan_public_methods)
-=end
       # Create a new Scanner.
       #
       # * +code+ is the input String and is handled by the superclass
@@ -110,43 +136,46 @@ module CodeRay
       #   overwrite default options here.)
       #
       # Else, a Tokens object is used.
-      def initialize code='', options = {}
-        raise "I am only the basic Scanner class. I can't scan "\
-          "anything. :( Use my subclasses." if self.class == Scanner
+      def initialize code = '', options = {}
+        if self.class == Scanner
+          raise NotImplementedError, "I am only the basic Scanner class. I can't scan anything. :( Use my subclasses."
+        end
         @options = self.class::DEFAULT_OPTIONS.merge options
-        super Scanner.normify(code)
+        super self.class.normalize(code)
         @tokens = options[:tokens] || Tokens.new
         @tokens.scanner = self if @tokens.respond_to? :scanner=
         setup
       end
-      # Sets back the scanner. Subclasses are to define the reset_instance
-      # method.
+      # Sets back the scanner. Subclasses should to define the reset_instance
+      # method instead of this one.
       def reset
         super
         reset_instance
       end
+      # Set a new string to be scanned.
       def string= code
-        code = Scanner.normify(code)
+        code = self.class.normalize(code)
         super code
         reset_instance
       end
-      # More mnemonic accessor name for the input string.
-      alias code string
-      alias code= string=
-      # Returns the Plugin ID for this scanner.
+      # the Plugin ID for this scanner
       def lang
-        self.class.plugin_id.to_s
+        self.class.lang
       end
-      # Scans the code and returns all tokens in a Tokens object.
+      # the default file extension for this scanner
+      def file_extension
+        self.class.file_extension
+      end
+      # Scan the code and returns all tokens in a Tokens object.
       def tokenize source = nil, options = {}
         options = @options.merge(options)
         @tokens = options[:tokens] || @tokens || Tokens.new
@@ -170,18 +199,18 @@ module CodeRay
         end
       end
-      # Caches the result of tokenize.
+      # Cache the result of tokenize.
       def tokens
         @cached_tokens ||= tokenize
       end
-      # Traverses the tokens.
+      # Traverse the tokens.
       def each &block
         tokens.each(&block)
       end
       include Enumerable
-      # The current line position of the scanner.
+      # The current line position of the scanner. See also #column.
       #
       # Beware, this is implemented inefficiently. It should be used
       # for debugging only.
@@ -189,27 +218,23 @@ module CodeRay
         string[0..pos].count("\n") + 1
       end
-      # The current column position of the scanner. See #line.
+      # The current column position of the scanner. See also #line.
+      #
+      # Beware, this is implemented inefficiently. It should be used
+      # for debugging only.
       def column pos = self.pos
         return 0 if pos <= 0
-        string = string()
-        if string.respond_to?(:bytesize) && (defined?(@bin_string) || string.bytesize != string.size)
-          @bin_string ||= string.dup.force_encoding('binary')
-          string = @bin_string
+        string = self.string
+        if string.respond_to?(:bytesize) && string.bytesize != string.size
+          #:nocov:
+          string = string.dup.force_encoding('binary')
+          #:nocov:
         end
         pos - (string.rindex(?\n, pos) || 0)
       end
-      def marshal_dump  # :nodoc:
-        @options
-      end
-      def marshal_load options  # :nodoc:
-        @options = options
-      end
     protected
       # Can be implemented by subclasses to do some initialization
       # that has to be done once per instance.
       #
@@ -217,15 +242,14 @@ module CodeRay
       # scan.
       def setup  # :doc:
       end
       # This is the central method, and commonly the only one a
       # subclass implements.
       #
       # Subclasses must implement this method; it must return +tokens+
       # and must only use Tokens#<< for storing scanned tokens!
       def scan_tokens tokens, options  # :doc:
-        raise NotImplementedError,
-          "#{self.class}#scan_tokens not implemented."
+        raise NotImplementedError, "#{self.class}#scan_tokens not implemented."
       end
       # Resets the scanner.
@@ -234,7 +258,7 @@ module CodeRay
         @cached_tokens = nil
         @bin_string = nil if defined? @bin_string
       end
       # Scanner error with additional status information
       def raise_inspect msg, tokens, state = 'No state given!', ambit = 30
         raise ScanError, <<-EOE % [
@@ -266,8 +290,8 @@ surrounding code:
           string[pos, ambit],
         ]
       end
     end
   end
 end

data/lib/coderay/scanners/_map.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 module CodeRay
 module Scanners
   map \
     :cplusplus => :cpp,
     :'c++' => :cpp,
@@ -15,8 +15,8 @@ module Scanners
     :plain => :plaintext,
     :xhtml => :html,
     :yml => :yaml
   default :plain
 end
 end

data/lib/coderay/scanners/c.rb CHANGED Viewed

@@ -65,10 +65,6 @@ module Scanners
           elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
             encoder.text_token match, :comment
-          elsif match = scan(/ \# \s* if \s* 0 /x)
-            match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
-            encoder.text_token match, :comment
           elsif match = scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x)
             label_expected = match =~ /[;\{\}]/
             if case_expected
@@ -93,9 +89,6 @@ module Scanners
             end
             encoder.text_token match, kind
-          elsif match = scan(/\$/)
-            encoder.text_token match, :ident
           elsif match = scan(/L?"/)
             encoder.begin_group :string
             if match[0] == ?L
@@ -105,6 +98,10 @@ module Scanners
             encoder.text_token match, :delimiter
             state = :string
+          elsif match = scan(/ \# \s* if \s* 0 /x)
+            match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
+            encoder.text_token match, :comment
           elsif match = scan(/#[ \t]*(\w*)/)
             encoder.text_token match, :preprocessor
             in_preproc_line = true
@@ -115,6 +112,9 @@ module Scanners
             label_expected = false
             encoder.text_token match, :char
+          elsif match = scan(/\$/)
+            encoder.text_token match, :ident
           elsif match = scan(/0[xX][0-9A-Fa-f]+/)
             label_expected = false
             encoder.text_token match, :hex

data/lib/coderay/scanners/clojure.rb ADDED Viewed

@@ -0,0 +1,204 @@
+# encoding: utf-8
+module CodeRay
+  module Scanners
+    # Clojure scanner by Licenser.
+    class Clojure < Scanner
+      register_for :clojure
+      file_extension 'clj'
+      SPECIAL_FORMS = %w[
+        def if do let quote var fn loop recur throw try catch monitor-enter monitor-exit .
+        new
+      ]  # :nodoc:
+      CORE_FORMS = %w[
+        + - -> ->> .. / * < <= = == > >= accessor aclone add-classpath add-watch
+        agent agent-error agent-errors aget alength alias all-ns alter alter-meta!
+        alter-var-root amap ancestors and apply areduce array-map aset aset-boolean
+        aset-byte aset-char aset-double aset-float aset-int aset-long aset-short
+        assert assoc assoc! assoc-in associative? atom await await-for bases bean
+        bigdec bigint binding bit-and bit-and-not bit-clear bit-flip bit-not bit-or
+        bit-set bit-shift-left bit-shift-right bit-test bit-xor boolean boolean-array
+        booleans bound-fn bound-fn* bound? butlast byte byte-array bytes case cast char
+        char-array char-escape-string char-name-string char? chars class class?
+        clear-agent-errors clojure-version coll? comment commute comp comparator
+        compare compare-and-set! compile complement concat cond condp conj conj!
+        cons constantly construct-proxy contains? count counted? create-ns
+        create-struct cycle dec decimal? declare definline defmacro defmethod defmulti
+        defn defn- defonce defprotocol defrecord defstruct deftype delay delay?
+        deliver denominator deref derive descendants disj disj! dissoc dissoc!
+        distinct distinct? doall doc dorun doseq dosync dotimes doto double
+        double-array doubles drop drop-last drop-while empty empty? ensure
+        enumeration-seq error-handler error-mode eval even? every? extend
+        extend-protocol extend-type extenders extends? false? ffirst file-seq
+        filter find find-doc find-ns find-var first float float-array float?
+        floats flush fn fn? fnext for force format future future-call future-cancel
+        future-cancelled? future-done? future? gen-class gen-interface gensym get
+        get-in get-method get-proxy-class get-thread-bindings get-validator hash
+        hash-map hash-set identical? identity if-let if-not ifn? import in-ns
+        inc init-proxy instance? int int-array integer? interleave intern
+        interpose into into-array ints io! isa? iterate iterator-seq juxt key
+        keys keyword keyword? last lazy-cat lazy-seq let letfn line-seq list list*
+        list? load load-file load-reader load-string loaded-libs locking long
+        long-array longs loop macroexpand macroexpand-1 make-array make-hierarchy
+        map map? mapcat max max-key memfn memoize merge merge-with meta methods
+        min min-key mod name namespace neg? newline next nfirst nil? nnext not
+        not-any? not-empty not-every? not= ns ns-aliases ns-imports ns-interns
+        ns-map ns-name ns-publics ns-refers ns-resolve ns-unalias ns-unmap nth
+        nthnext num number? numerator object-array odd? or parents partial
+        partition pcalls peek persistent! pmap pop pop! pop-thread-bindings
+        pos? pr pr-str prefer-method prefers print print-namespace-doc
+        print-str printf println println-str prn prn-str promise proxy
+        proxy-mappings proxy-super push-thread-bindings pvalues quot rand
+        rand-int range ratio? rationalize re-find re-groups re-matcher
+        re-matches re-pattern re-seq read read-line read-string reduce ref
+        ref-history-count ref-max-history ref-min-history ref-set refer
+        refer-clojure reify release-pending-sends rem remove remove-all-methods
+        remove-method remove-ns remove-watch repeat repeatedly replace replicate
+        require reset! reset-meta! resolve rest restart-agent resultset-seq
+        reverse reversible? rseq rsubseq satisfies? second select-keys send
+        send-off seq seq? seque sequence sequential? set set-error-handler!
+        set-error-mode! set-validator! set? short short-array shorts
+        shutdown-agents slurp some sort sort-by sorted-map sorted-map-by
+        sorted-set sorted-set-by sorted? special-form-anchor special-symbol?
+        split-at split-with str string? struct struct-map subs subseq subvec
+        supers swap! symbol symbol? sync syntax-symbol-anchor take take-last
+        take-nth take-while test the-ns thread-bound? time to-array to-array-2d
+        trampoline transient tree-seq true? type unchecked-add unchecked-dec
+        unchecked-divide unchecked-inc unchecked-multiply unchecked-negate
+        unchecked-remainder unchecked-subtract underive update-in update-proxy
+        use val vals var-get var-set var? vary-meta vec vector vector-of vector?
+        when when-first when-let when-not while with-bindings with-bindings*
+        with-in-str with-local-vars with-meta with-open with-out-str
+        with-precision xml-seq zero? zipmap
+      ]  # :nodoc:
+      PREDEFINED_CONSTANTS = %w[
+        true false *1 *2 *3 *agent* *clojure-version* *command-line-args*
+        *compile-files* *compile-path* *e *err* *file* *flush-on-newline*
+        *in* *ns* *out* *print-dup* *print-length* *print-level* *print-meta*
+        *print-readably* *read-eval* *warn-on-reflection*
+      ]  # :nodoc:
+      IDENT_KIND = CaseIgnoringWordList.new(:ident).
+        add(SPECIAL_FORMS, :reserved).
+        add(CORE_FORMS, :reserved).
+        add(PREDEFINED_CONSTANTS, :pre_constant)
+      BASIC_IDENTIFIER = /[a-zA-Z$%*\/_+!?&<>\-=][a-zA-Z0-9$&*+!\/_?<>\-\#]*/
+      IDENTIFIER = /(?:[@']?(?:#{BASIC_IDENTIFIER}\.)*#{BASIC_IDENTIFIER}(?:\/#{BASIC_IDENTIFIER})?\.?)|\.\.?/
+      SYMBOL = /::?#{IDENTIFIER}/o
+      DIGIT = /\d/
+      DIGIT10 = DIGIT
+      DIGIT16 = /[0-9a-f]/i
+      DIGIT8 = /[0-7]/
+      DIGIT2 = /[01]/
+      RADIX16 = /\#x/i
+      RADIX8 = /\#o/i
+      RADIX2 = /\#b/i
+      RADIX10 = /\#d/i
+      EXACTNESS = /#i|#e/i
+      SIGN = /[\+-]?/
+      EXP_MARK = /[esfdl]/i
+      EXP = /#{EXP_MARK}#{SIGN}#{DIGIT}+/
+      SUFFIX = /#{EXP}?/
+      PREFIX10 = /#{RADIX10}?#{EXACTNESS}?|#{EXACTNESS}?#{RADIX10}?/
+      PREFIX16 = /#{RADIX16}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX16}/
+      PREFIX8 = /#{RADIX8}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX8}/
+      PREFIX2 = /#{RADIX2}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX2}/
+      UINT10 = /#{DIGIT10}+#*/
+      UINT16 = /#{DIGIT16}+#*/
+      UINT8 = /#{DIGIT8}+#*/
+      UINT2 = /#{DIGIT2}+#*/
+      DECIMAL = /#{DIGIT10}+#+\.#*#{SUFFIX}|#{DIGIT10}+\.#{DIGIT10}*#*#{SUFFIX}|\.#{DIGIT10}+#*#{SUFFIX}|#{UINT10}#{EXP}/
+      UREAL10 = /#{UINT10}\/#{UINT10}|#{DECIMAL}|#{UINT10}/
+      UREAL16 = /#{UINT16}\/#{UINT16}|#{UINT16}/
+      UREAL8 = /#{UINT8}\/#{UINT8}|#{UINT8}/
+      UREAL2 = /#{UINT2}\/#{UINT2}|#{UINT2}/
+      REAL10 = /#{SIGN}#{UREAL10}/
+      REAL16 = /#{SIGN}#{UREAL16}/
+      REAL8 = /#{SIGN}#{UREAL8}/
+      REAL2 = /#{SIGN}#{UREAL2}/
+      IMAG10 = /i|#{UREAL10}i/
+      IMAG16 = /i|#{UREAL16}i/
+      IMAG8 = /i|#{UREAL8}i/
+      IMAG2 = /i|#{UREAL2}i/
+      COMPLEX10 = /#{REAL10}@#{REAL10}|#{REAL10}\+#{IMAG10}|#{REAL10}-#{IMAG10}|\+#{IMAG10}|-#{IMAG10}|#{REAL10}/
+      COMPLEX16 = /#{REAL16}@#{REAL16}|#{REAL16}\+#{IMAG16}|#{REAL16}-#{IMAG16}|\+#{IMAG16}|-#{IMAG16}|#{REAL16}/
+      COMPLEX8 = /#{REAL8}@#{REAL8}|#{REAL8}\+#{IMAG8}|#{REAL8}-#{IMAG8}|\+#{IMAG8}|-#{IMAG8}|#{REAL8}/
+      COMPLEX2 = /#{REAL2}@#{REAL2}|#{REAL2}\+#{IMAG2}|#{REAL2}-#{IMAG2}|\+#{IMAG2}|-#{IMAG2}|#{REAL2}/
+      NUM10 = /#{PREFIX10}?#{COMPLEX10}/
+      NUM16 = /#{PREFIX16}#{COMPLEX16}/
+      NUM8 = /#{PREFIX8}#{COMPLEX8}/
+      NUM2 = /#{PREFIX2}#{COMPLEX2}/
+      NUM = /#{NUM10}|#{NUM16}|#{NUM8}|#{NUM2}/
+    protected
+      def scan_tokens encoder, options
+        state = :initial
+        ident_kind = IDENT_KIND
+        until eos?
+          case state
+          when :initial
+            if match = scan(/ \s+ | \\\n | , /x)
+              encoder.text_token match, :space
+            elsif match = scan(/['`\(\[\)\]\{\}]|\#[({]|~@?|@/)
+              encoder.text_token match, :operator  # FIXME: was :operator_fat
+            elsif match = scan(/;.*/)
+              encoder.text_token match, :comment
+            elsif match = scan(/\#\\(?:newline|space|.?)/)
+              encoder.text_token match, :char
+            elsif match = scan(/\#[ft]/)
+              encoder.text_token match, :pre_constant
+            elsif match = scan(/#{IDENTIFIER}/o)
+              encoder.text_token match, ident_kind[matched]
+            elsif match = scan(/#{SYMBOL}/o)
+              encoder.text_token match, :symbol
+            elsif match = scan(/\./)
+              encoder.text_token match, :operator
+            elsif match = scan(/ \# \^ #{IDENTIFIER} /ox)
+              encoder.text_token match, :type
+            elsif match = scan(/ (\#)? " /x)
+              state = self[1] ? :regexp : :string
+              encoder.begin_group state
+              encoder.text_token match, :delimiter
+            elsif match = scan(/#{NUM}/o) and not matched.empty?
+              encoder.text_token match, match[/[.e]/i] ? :float : :integer
+            else
+              encoder.text_token getch, :error
+            end
+          when :string, :regexp
+            if match = scan(/[^"\\]+|\\.?/)
+              encoder.text_token match, :content
+            elsif match = scan(/"/)
+              encoder.text_token match, :delimiter
+              encoder.end_group state
+              state = :initial
+            else
+              raise_inspect "else case \" reached; %p not handled." % peek(1),
+                encoder, state
+            end
+          else
+            raise 'else case reached'
+          end
+        end
+        if [:string, :regexp].include? state
+          encoder.end_group state
+        end
+        encoder
+      end
+    end
+  end
+end