RubyGems - coderay - Versions diffs - 0.9.8 → 1.0.0 - Mend

coderay 0.9.8 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (87) hide show

data/{lib/README → README_INDEX.rdoc} +10 -21
data/Rakefile +6 -6
data/bin/coderay +193 -64
data/lib/coderay.rb +61 -105
data/lib/coderay/duo.rb +17 -21
data/lib/coderay/encoder.rb +100 -112
data/lib/coderay/encoders/_map.rb +12 -7
data/lib/coderay/encoders/comment_filter.rb +12 -30
data/lib/coderay/encoders/count.rb +29 -11
data/lib/coderay/encoders/debug.rb +32 -20
data/lib/coderay/encoders/div.rb +13 -9
data/lib/coderay/encoders/filter.rb +34 -51
data/lib/coderay/encoders/html.rb +155 -161
data/lib/coderay/encoders/html/css.rb +4 -9
data/lib/coderay/encoders/html/numbering.rb +115 -0
data/lib/coderay/encoders/html/output.rb +22 -70
data/lib/coderay/encoders/json.rb +59 -45
data/lib/coderay/encoders/lines_of_code.rb +12 -57
data/lib/coderay/encoders/null.rb +6 -14
data/lib/coderay/encoders/page.rb +13 -9
data/lib/coderay/encoders/span.rb +13 -9
data/lib/coderay/encoders/statistic.rb +58 -39
data/lib/coderay/encoders/terminal.rb +179 -0
data/lib/coderay/encoders/text.rb +31 -17
data/lib/coderay/encoders/token_kind_filter.rb +111 -0
data/lib/coderay/encoders/xml.rb +19 -18
data/lib/coderay/encoders/yaml.rb +37 -9
data/lib/coderay/for_redcloth.rb +4 -4
data/lib/coderay/helpers/file_type.rb +127 -246
data/lib/coderay/helpers/gzip.rb +41 -0
data/lib/coderay/helpers/plugin.rb +241 -306
data/lib/coderay/helpers/word_list.rb +65 -126
data/lib/coderay/scanner.rb +173 -156
data/lib/coderay/scanners/_map.rb +18 -17
data/lib/coderay/scanners/c.rb +63 -77
data/lib/coderay/scanners/clojure.rb +217 -0
data/lib/coderay/scanners/cpp.rb +71 -84
data/lib/coderay/scanners/css.rb +103 -120
data/lib/coderay/scanners/debug.rb +47 -44
data/lib/coderay/scanners/delphi.rb +70 -76
data/lib/coderay/scanners/diff.rb +141 -50
data/lib/coderay/scanners/erb.rb +81 -0
data/lib/coderay/scanners/groovy.rb +104 -113
data/lib/coderay/scanners/haml.rb +168 -0
data/lib/coderay/scanners/html.rb +181 -110
data/lib/coderay/scanners/java.rb +73 -75
data/lib/coderay/scanners/java/builtin_types.rb +2 -0
data/lib/coderay/scanners/java_script.rb +90 -101
data/lib/coderay/scanners/json.rb +40 -53
data/lib/coderay/scanners/php.rb +123 -147
data/lib/coderay/scanners/python.rb +93 -91
data/lib/coderay/scanners/raydebug.rb +66 -0
data/lib/coderay/scanners/ruby.rb +343 -326
data/lib/coderay/scanners/ruby/patterns.rb +40 -106
data/lib/coderay/scanners/ruby/string_state.rb +71 -0
data/lib/coderay/scanners/sql.rb +80 -66
data/lib/coderay/scanners/text.rb +26 -0
data/lib/coderay/scanners/xml.rb +1 -1
data/lib/coderay/scanners/yaml.rb +74 -73
data/lib/coderay/style.rb +10 -7
data/lib/coderay/styles/_map.rb +3 -3
data/lib/coderay/styles/alpha.rb +143 -0
data/lib/coderay/token_kinds.rb +90 -0
data/lib/coderay/tokens.rb +102 -277
data/lib/coderay/tokens_proxy.rb +55 -0
data/lib/coderay/version.rb +3 -0
data/test/functional/basic.rb +200 -18
data/test/functional/examples.rb +130 -0
data/test/functional/for_redcloth.rb +15 -8
data/test/functional/suite.rb +9 -6
metadata +103 -123
data/FOLDERS +0 -53
data/bin/coderay_stylesheet +0 -4
data/lib/coderay/encoders/html/numerization.rb +0 -133
data/lib/coderay/encoders/term.rb +0 -158
data/lib/coderay/encoders/token_class_filter.rb +0 -84
data/lib/coderay/helpers/gzip_simple.rb +0 -123
data/lib/coderay/scanners/nitro_xhtml.rb +0 -136
data/lib/coderay/scanners/plaintext.rb +0 -20
data/lib/coderay/scanners/rhtml.rb +0 -78
data/lib/coderay/scanners/scheme.rb +0 -145
data/lib/coderay/styles/cycnus.rb +0 -152
data/lib/coderay/styles/murphy.rb +0 -134
data/lib/coderay/token_classes.rb +0 -86
data/test/functional/load_plugin_scanner.rb +0 -11
data/test/functional/vhdl.rb +0 -126
data/test/functional/word_list.rb +0 -79

data/lib/coderay/tokens.rb CHANGED

@@ -1,6 +1,9 @@
 module CodeRay
-  # = Tokens
+  # GZip library for writing and reading token dumps.
+  autoload :GZip, 'coderay/helpers/gzip'
+  # = Tokens  TODO: Rewrite!
   #
   # The Tokens class represents a list of tokens returnd from
   # a Scanner.
@@ -8,7 +11,7 @@ module CodeRay
   # A token is not a special object, just a two-element Array
   # consisting of
   # * the _token_ _text_ (the original source of the token in a String) or
-  #   a _token_ _action_ (:open, :close, :begin_line, :end_line)
+  #   a _token_ _action_ (begin_group, end_group, begin_line, end_line)
   # * the _token_ _kind_ (a Symbol representing the type of the token)
   #
   # A token looks like this:
@@ -18,16 +21,16 @@ module CodeRay
   #   ['$^', :error]
   #
   # Some scanners also yield sub-tokens, represented by special
-  # token actions, namely :open and :close.
+  # token actions, namely begin_group and end_group.
   #
   # The Ruby scanner, for example, splits "a string" into:
   #
   #  [
-  #   [:open, :string],
+  #   [:begin_group, :string],
   #   ['"', :delimiter],
   #   ['a string', :content],
   #   ['"', :delimiter],
-  #   [:close, :string]
+  #   [:end_group, :string]
   #  ]
   #
   # Tokens is the interface between Scanners and Encoders:
@@ -47,46 +50,11 @@ module CodeRay
   #
   # It also allows you to generate tokens directly (without using a scanner),
   # to load them from a file, and still use any Encoder that CodeRay provides.
-  #
-  # Tokens' subclass TokenStream allows streaming to save memory.
   class Tokens < Array
     # The Scanner instance that created the tokens.
     attr_accessor :scanner
-    # Whether the object is a TokenStream.
-    #
-    # Returns false.
-    def stream?
-      false
-    end
-    # Iterates over all tokens.
-    #
-    # If a filter is given, only tokens of that kind are yielded.
-    def each kind_filter = nil, &block
-      unless kind_filter
-        super(&block)
-      else
-        super() do |text, kind|
-          next unless kind == kind_filter
-          yield text, kind
-        end
-      end
-    end
-    # Iterates over all text tokens.
-    # Range tokens like [:open, :string] are left out.
-    #
-    # Example:
-    #   tokens.each_text_token { |text, kind| text.replace html_escape(text) }
-    def each_text_token
-      each do |text, kind|
-        next unless text.is_a? ::String
-        yield text, kind
-      end
-    end
     # Encode the tokens using encoder.
     #
     # encoder can be
@@ -96,120 +64,98 @@ module CodeRay
     #
     # options are passed to the encoder.
     def encode encoder, options = {}
-      unless encoder.is_a? Encoders::Encoder
-        unless encoder.is_a? Class
-          encoder_class = Encoders[encoder]
-        end
-        encoder = encoder_class.new options
-      end
+      encoder = Encoders[encoder].new options if encoder.respond_to? :to_sym
       encoder.encode_tokens self, options
     end
-    # Turn into a string using Encoders::Text.
-    #
-    # +options+ are passed to the encoder if given.
-    def to_s options = {}
-      encode :text, options
+    # Turn tokens into a string by concatenating them.
+    def to_s
+      encode CodeRay::Encoders::Encoder.new
     end
     # Redirects unknown methods to encoder calls.
     #
     # For example, if you call +tokens.html+, the HTML encoder
     # is used to highlight the tokens.
     def method_missing meth, options = {}
-      Encoders[meth].new(options).encode_tokens self
-    end
-    # Returns the tokens compressed by joining consecutive
-    # tokens of the same kind.
-    #
-    # This can not be undone, but should yield the same output
-    # in most Encoders.  It basically makes the output smaller.
-    #
-    # Combined with dump, it saves space for the cost of time.
-    #
-    # If the scanner is written carefully, this is not required -
-    # for example, consecutive //-comment lines could already be
-    # joined in one comment token by the Scanner.
-    def optimize
-      last_kind = last_text = nil
-      new = self.class.new
-      for text, kind in self
-        if text.is_a? String
-          if kind == last_kind
-            last_text << text
-          else
-            new << [last_text, last_kind] if last_kind
-            last_text = text
-            last_kind = kind
-          end
-        else
-          new << [last_text, last_kind] if last_kind
-          last_kind = last_text = nil
-          new << [text, kind]
-        end
-      end
-      new << [last_text, last_kind] if last_kind
-      new
-    end
-    # Compact the object itself; see optimize.
-    def optimize!
-      replace optimize
+      encode meth, options
+    rescue PluginHost::PluginNotFound
+      super
     end
-    # Ensure that all :open tokens have a correspondent :close one.
-    #
-    # TODO: Test this!
-    def fix
-      tokens = self.class.new
-      # Check token nesting using a stack of kinds.
+    # Split the tokens into parts of the given +sizes+.
+    #
+    # The result will be an Array of Tokens objects. The parts have
+    # the text size specified by the parameter. In addition, each
+    # part closes all opened tokens. This is useful to insert tokens
+    # betweem them.
+    #
+    # This method is used by @Scanner#tokenize@ when called with an Array
+    # of source strings. The Diff encoder uses it for inline highlighting.
+    def split_into_parts *sizes
+      parts = []
       opened = []
-      for type, kind in self
-        case type
-        when :open
-          opened.push [:close, kind]
-        when :begin_line
-          opened.push [:end_line, kind]
-        when :close, :end_line
-          expected = opened.pop
-          if [type, kind] != expected
-            # Unexpected :close; decide what to do based on the kind:
-            # - token was never opened: delete the :close (just skip it)
-            next unless opened.rindex expected
-            # - token was opened earlier: also close tokens in between
-            tokens << token until (token = opened.pop) == expected
+      content = nil
+      part = Tokens.new
+      part_size = 0
+      size = sizes.first
+      i = 0
+      for item in self
+        case content
+        when nil
+          content = item
+        when String
+          if size && part_size + content.size > size  # token must be cut
+            if part_size < size  # some part of the token goes into this part
+              content = content.dup  # content may no be safe to change
+              part << content.slice!(0, size - part_size) << item
+            end
+            # close all open groups and lines...
+            closing = opened.reverse.flatten.map do |content_or_kind|
+              case content_or_kind
+              when :begin_group
+                :end_group
+              when :begin_line
+                :end_line
+              else
+                content_or_kind
+              end
+            end
+            part.concat closing
+            begin
+              parts << part
+              part = Tokens.new
+              size = sizes[i += 1]
+            end until size.nil? || size > 0
+            # ...and open them again.
+            part.concat opened.flatten
+            part_size = 0
+            redo unless content.empty?
+          else
+            part << content << item
+            part_size += content.size
           end
+          content = nil
+        when Symbol
+          case content
+          when :begin_group, :begin_line
+            opened << [content, item]
+          when :end_group, :end_line
+            opened.pop
+          else
+            raise ArgumentError, 'Unknown token action: %p, kind = %p' % [content, item]
+          end
+          part << content << item
+          content = nil
+        else
+          raise ArgumentError, 'Token input junk: %p, kind = %p' % [content, item]
         end
-        tokens << [type, kind]
       end
-      # Close remaining opened tokens
-      tokens << token while token = opened.pop
-      tokens
+      parts << part
+      parts << Tokens.new while parts.size < sizes.size
+      parts
     end
-    def fix!
-      replace fix
-    end
-    # TODO: Scanner#split_into_lines
-    #
-    # Makes sure that:
-    # - newlines are single tokens
-    #   (which means all other token are single-line)
-    # - there are no open tokens at the end the line
-    #
-    # This makes it simple for encoders that work line-oriented,
-    # like HTML with list-style numeration.
-    def split_into_lines
-      raise NotImplementedError
-    end
-    def split_into_lines!
-      replace split_into_lines
-    end
     # Dumps the object into a String that can be saved
     # in files or databases.
     #
@@ -226,28 +172,16 @@ module CodeRay
     #
     # See GZip module.
     def dump gzip_level = 7
-      require 'coderay/helpers/gzip_simple'
       dump = Marshal.dump self
-      dump = dump.gzip gzip_level
+      dump = GZip.gzip dump, gzip_level
       dump.extend Undumping
     end
-    # The total size of the tokens.
-    # Should be equal to the input size before
-    # scanning.
-    def text_size
-      size = 0
-      each_text_token do |t, k|
-        size + t.size
-      end
-      size
-    end
-    # Return all text tokens joined into a single string.
-    def text
-      map { |t, k| t if t.is_a? ::String }.join
+    # Return the actual number of tokens.
+    def count
+      size / 2
     end
     # Include this module to give an object an #undump
     # method.
     #
@@ -258,133 +192,24 @@ module CodeRay
         Tokens.load self
       end
     end
     # Undump the object using Marshal.load, then
     # unzip it using GZip.gunzip.
     #
     # The result is commonly a Tokens object, but
     # this is not guaranteed.
     def Tokens.load dump
-      require 'coderay/helpers/gzip_simple'
-      dump = dump.gunzip
+      dump = GZip.gunzip dump
       @dump = Marshal.load dump
     end
-  end
-  # = TokenStream
-  #
-  # The TokenStream class is a fake Array without elements.
-  #
-  # It redirects the method << to a block given at creation.
-  #
-  # This allows scanners and Encoders to use streaming (no
-  # tokens are saved, the input is highlighted the same time it
-  # is scanned) with the same code.
-  #
-  # See CodeRay.encode_stream and CodeRay.scan_stream
-  class TokenStream < Tokens
-    # Whether the object is a TokenStream.
-    #
-    # Returns true.
-    def stream?
-      true
-    end
-    # The Array is empty, but size counts the tokens given by <<.
-    attr_reader :size
-    # Creates a new TokenStream that calls +block+ whenever
-    # its << method is called.
-    #
-    # Example:
-    #
-    #   require 'coderay'
-    #
-    #   token_stream = CodeRay::TokenStream.new do |text, kind|
-    #     puts 'kind: %s, text size: %d.' % [kind, text.size]
-    #   end
-    #
-    #   token_stream << ['/\d+/', :regexp]
-    #   #-> kind: rexpexp, text size: 5.
-    #
-    def initialize &block
-      raise ArgumentError, 'Block expected for streaming.' unless block
-      @callback = block
-      @size = 0
-    end
-    # Calls +block+ with +token+ and increments size.
-    #
-    # Returns self.
-    def << token
-      @callback.call(*token)
-      @size += 1
-      self
-    end
-    # This method is not implemented due to speed reasons. Use Tokens.
-    def text_size
-      raise NotImplementedError,
-        'This method is not implemented due to speed reasons.'
-    end
-    # A TokenStream cannot be dumped. Use Tokens.
-    def dump
-      raise NotImplementedError, 'A TokenStream cannot be dumped.'
-    end
-    # A TokenStream cannot be optimized. Use Tokens.
-    def optimize
-      raise NotImplementedError, 'A TokenStream cannot be optimized.'
-    end
-  end
-end
-if $0 == __FILE__
-  $VERBOSE = true
-  $: << File.join(File.dirname(__FILE__), '..')
-  eval DATA.read, nil, $0, __LINE__ + 4
-end
-__END__
-require 'test/unit'
-class TokensTest < Test::Unit::TestCase
-  def test_creation
-    assert CodeRay::Tokens < Array
-    tokens = nil
-    assert_nothing_raised do
-      tokens = CodeRay::Tokens.new
-    end
-    assert_kind_of Array, tokens
-  end
-  def test_adding_tokens
-    tokens = CodeRay::Tokens.new
-    assert_nothing_raised do
-      tokens << ['string', :type]
-      tokens << ['()', :operator]
-    end
-    assert_equal tokens.size, 2
-  end
-  def test_dump_undump
-    tokens = CodeRay::Tokens.new
-    assert_nothing_raised do
-      tokens << ['string', :type]
-      tokens << ['()', :operator]
-    end
-    tokens2 = nil
-    assert_nothing_raised do
-      tokens2 = tokens.dump.undump
-    end
-    assert_equal tokens, tokens2
+    alias text_token push
+    def begin_group kind; push :begin_group, kind end
+    def end_group kind; push :end_group, kind end
+    def begin_line kind; push :begin_line, kind end
+    def end_line kind; push :end_line, kind end
+    alias tokens concat
   end
-end
+end