RubyGems - coderay - Versions diffs - 0.9.8 → 1.0.0 - Mend

coderay 0.9.8 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (87) hide show

data/{lib/README → README_INDEX.rdoc} +10 -21
data/Rakefile +6 -6
data/bin/coderay +193 -64
data/lib/coderay.rb +61 -105
data/lib/coderay/duo.rb +17 -21
data/lib/coderay/encoder.rb +100 -112
data/lib/coderay/encoders/_map.rb +12 -7
data/lib/coderay/encoders/comment_filter.rb +12 -30
data/lib/coderay/encoders/count.rb +29 -11
data/lib/coderay/encoders/debug.rb +32 -20
data/lib/coderay/encoders/div.rb +13 -9
data/lib/coderay/encoders/filter.rb +34 -51
data/lib/coderay/encoders/html.rb +155 -161
data/lib/coderay/encoders/html/css.rb +4 -9
data/lib/coderay/encoders/html/numbering.rb +115 -0
data/lib/coderay/encoders/html/output.rb +22 -70
data/lib/coderay/encoders/json.rb +59 -45
data/lib/coderay/encoders/lines_of_code.rb +12 -57
data/lib/coderay/encoders/null.rb +6 -14
data/lib/coderay/encoders/page.rb +13 -9
data/lib/coderay/encoders/span.rb +13 -9
data/lib/coderay/encoders/statistic.rb +58 -39
data/lib/coderay/encoders/terminal.rb +179 -0
data/lib/coderay/encoders/text.rb +31 -17
data/lib/coderay/encoders/token_kind_filter.rb +111 -0
data/lib/coderay/encoders/xml.rb +19 -18
data/lib/coderay/encoders/yaml.rb +37 -9
data/lib/coderay/for_redcloth.rb +4 -4
data/lib/coderay/helpers/file_type.rb +127 -246
data/lib/coderay/helpers/gzip.rb +41 -0
data/lib/coderay/helpers/plugin.rb +241 -306
data/lib/coderay/helpers/word_list.rb +65 -126
data/lib/coderay/scanner.rb +173 -156
data/lib/coderay/scanners/_map.rb +18 -17
data/lib/coderay/scanners/c.rb +63 -77
data/lib/coderay/scanners/clojure.rb +217 -0
data/lib/coderay/scanners/cpp.rb +71 -84
data/lib/coderay/scanners/css.rb +103 -120
data/lib/coderay/scanners/debug.rb +47 -44
data/lib/coderay/scanners/delphi.rb +70 -76
data/lib/coderay/scanners/diff.rb +141 -50
data/lib/coderay/scanners/erb.rb +81 -0
data/lib/coderay/scanners/groovy.rb +104 -113
data/lib/coderay/scanners/haml.rb +168 -0
data/lib/coderay/scanners/html.rb +181 -110
data/lib/coderay/scanners/java.rb +73 -75
data/lib/coderay/scanners/java/builtin_types.rb +2 -0
data/lib/coderay/scanners/java_script.rb +90 -101
data/lib/coderay/scanners/json.rb +40 -53
data/lib/coderay/scanners/php.rb +123 -147
data/lib/coderay/scanners/python.rb +93 -91
data/lib/coderay/scanners/raydebug.rb +66 -0
data/lib/coderay/scanners/ruby.rb +343 -326
data/lib/coderay/scanners/ruby/patterns.rb +40 -106
data/lib/coderay/scanners/ruby/string_state.rb +71 -0
data/lib/coderay/scanners/sql.rb +80 -66
data/lib/coderay/scanners/text.rb +26 -0
data/lib/coderay/scanners/xml.rb +1 -1
data/lib/coderay/scanners/yaml.rb +74 -73
data/lib/coderay/style.rb +10 -7
data/lib/coderay/styles/_map.rb +3 -3
data/lib/coderay/styles/alpha.rb +143 -0
data/lib/coderay/token_kinds.rb +90 -0
data/lib/coderay/tokens.rb +102 -277
data/lib/coderay/tokens_proxy.rb +55 -0
data/lib/coderay/version.rb +3 -0
data/test/functional/basic.rb +200 -18
data/test/functional/examples.rb +130 -0
data/test/functional/for_redcloth.rb +15 -8
data/test/functional/suite.rb +9 -6
metadata +103 -123
data/FOLDERS +0 -53
data/bin/coderay_stylesheet +0 -4
data/lib/coderay/encoders/html/numerization.rb +0 -133
data/lib/coderay/encoders/term.rb +0 -158
data/lib/coderay/encoders/token_class_filter.rb +0 -84
data/lib/coderay/helpers/gzip_simple.rb +0 -123
data/lib/coderay/scanners/nitro_xhtml.rb +0 -136
data/lib/coderay/scanners/plaintext.rb +0 -20
data/lib/coderay/scanners/rhtml.rb +0 -78
data/lib/coderay/scanners/scheme.rb +0 -145
data/lib/coderay/styles/cycnus.rb +0 -152
data/lib/coderay/styles/murphy.rb +0 -134
data/lib/coderay/token_classes.rb +0 -86
data/test/functional/load_plugin_scanner.rb +0 -11
data/test/functional/vhdl.rb +0 -126
data/test/functional/word_list.rb +0 -79

data/lib/coderay/helpers/word_list.rb CHANGED

@@ -1,138 +1,77 @@
 module CodeRay
-# = WordList
-#
-# <b>A Hash subclass designed for mapping word lists to token types.</b>
-#
-# Copyright (c) 2006 by murphy (Kornelius Kalnbach) <murphy rubychan de>
-#
-# License:: LGPL / ask the author
-# Version:: 1.1 (2006-Oct-19)
-#
-# A WordList is a Hash with some additional features.
-# It is intended to be used for keyword recognition.
-#
-# WordList is highly optimized to be used in Scanners,
-# typically to decide whether a given ident is a special token.
-#
-# For case insensitive words use CaseIgnoringWordList.
-#
-# Example:
-#
-#  # define word arrays
-#  RESERVED_WORDS = %w[
-#    asm break case continue default do else
-#    ...
-#  ]
-#
-#  PREDEFINED_TYPES = %w[
-#    int long short char void
-#    ...
-#  ]
-#
-#  PREDEFINED_CONSTANTS = %w[
-#    EOF NULL ...
-#  ]
-#
-#  # make a WordList
-#  IDENT_KIND = WordList.new(:ident).
-#    add(RESERVED_WORDS, :reserved).
-#    add(PREDEFINED_TYPES, :pre_type).
-#    add(PREDEFINED_CONSTANTS, :pre_constant)
-#
-#  ...
-#
-#  def scan_tokens tokens, options
-#    ...
-#
-#    elsif scan(/[A-Za-z_][A-Za-z_0-9]*/)
-#      # use it
-#      kind = IDENT_KIND[match]
-#      ...
-class WordList < Hash
-  # Creates a new WordList with +default+ as default value.
-  #
-  # You can activate +caching+ to store the results for every [] request.
+  # = WordList
   #
-  # With caching, methods like +include?+ or +delete+ may no longer behave
-  # as you expect. Therefore, it is recommended to use the [] method only.
-  def initialize default = false, caching = false, &block
-    if block
-      raise ArgumentError, 'Can\'t combine block with caching.' if caching
-      super(&block)
-    else
-      if caching
-        super() do |h, k|
-          h[k] = h.fetch k, default
-        end
-      else
-        super default
-      end
-    end
-  end
-  # Add words to the list and associate them with +kind+.
+  # <b>A Hash subclass designed for mapping word lists to token types.</b>
   #
-  # Returns +self+, so you can concat add calls.
-  def add words, kind = true
-    words.each do |word|
-      self[word] = kind
+  # Copyright (c) 2006-2011 by murphy (Kornelius Kalnbach) <murphy rubychan de>
+  #
+  # License:: LGPL / ask the author
+  # Version:: 2.0 (2011-05-08)
+  #
+  # A WordList is a Hash with some additional features.
+  # It is intended to be used for keyword recognition.
+  #
+  # WordList is optimized to be used in Scanners,
+  # typically to decide whether a given ident is a special token.
+  #
+  # For case insensitive words use WordList::CaseIgnoring.
+  #
+  # Example:
+  #
+  #  # define word arrays
+  #  RESERVED_WORDS = %w[
+  #    asm break case continue default do else
+  #  ]
+  #
+  #  PREDEFINED_TYPES = %w[
+  #    int long short char void
+  #  ]
+  #
+  #  # make a WordList
+  #  IDENT_KIND = WordList.new(:ident).
+  #    add(RESERVED_WORDS, :reserved).
+  #    add(PREDEFINED_TYPES, :predefined_type)
+  #
+  #  ...
+  #
+  #  def scan_tokens tokens, options
+  #    ...
+  #
+  #    elsif scan(/[A-Za-z_][A-Za-z_0-9]*/)
+  #      # use it
+  #      kind = IDENT_KIND[match]
+  #      ...
+  class WordList < Hash
+    # Create a new WordList with +default+ as default value.
+    def initialize default = false
+      super default
     end
-    self
-  end
-end
-# A CaseIgnoringWordList is like a WordList, only that
-# keys are compared case-insensitively.
-#
-# Ignoring the text case is realized by sending the +downcase+ message to
-# all keys.
-#
-# Caching usually makes a CaseIgnoringWordList faster, but it has to be
-# activated explicitely.
-class CaseIgnoringWordList < WordList
-  # Creates a new case-insensitive WordList with +default+ as default value.
-  #
-  # You can activate caching to store the results for every [] request.
-  # This speeds up subsequent lookups for the same word, but also
-  # uses memory.
-  def initialize default = false, caching = false
-    if caching
-      super(default, false) do |h, k|
-        h[k] = h.fetch k.downcase, default
-      end
-    else
-      super(default, false)
-      extend Uncached
+    # Add words to the list and associate them with +value+.
+    #
+    # Returns +self+, so you can concat add calls.
+    def add words, value = true
+      words.each { |word| self[word] = value }
+      self
     end
   end
-  module Uncached  # :nodoc:
+  # A CaseIgnoring WordList is like a WordList, only that
+  # keys are compared case-insensitively (normalizing keys using +downcase+).
+  class WordList::CaseIgnoring < WordList
     def [] key
-      super(key.downcase)
+      super key.downcase
     end
-  end
-  # Add +words+ to the list and associate them with +kind+.
-  def add words, kind = true
-    words.each do |word|
-      self[word.downcase] = kind
+    def []= key, value
+      super key.downcase, value
     end
-    self
   end
-end
 end
-__END__
-# check memory consumption
-END {
-  ObjectSpace.each_object(CodeRay::CaseIgnoringWordList) do |wl|
-    p wl.inject(0) { |memo, key, value| memo + key.size + 24 }
-  end
-}

data/lib/coderay/scanner.rb CHANGED

@@ -1,7 +1,10 @@
-module CodeRay
+# encoding: utf-8
+require 'strscan'
-  require 'coderay/helpers/plugin'
+module CodeRay
+  autoload :WordList, 'coderay/helpers/word_list'
   # = Scanners
   #
   # This module holds the Scanner class and its subclasses.
@@ -16,9 +19,8 @@ module CodeRay
   module Scanners
     extend PluginHost
     plugin_path File.dirname(__FILE__), 'scanners'
-    require 'strscan'
     # = Scanner
     #
     # The base class for all Scanners.
@@ -46,64 +48,89 @@ module CodeRay
       extend Plugin
       plugin_host Scanners
       # Raised if a Scanner fails while scanning
-      ScanError = Class.new(Exception)
-      require 'coderay/helpers/word_list'
+      ScanError = Class.new StandardError
       # The default options for all scanner classes.
       #
       # Define @default_options for subclasses.
-      DEFAULT_OPTIONS = { :stream => false }
+      DEFAULT_OPTIONS = { }
+      KINDS_NOT_LOC = [:comment, :doctype, :docstring]
+      attr_accessor :state
-      KINDS_NOT_LOC = [:comment, :doctype]
       class << self
-        # Returns if the Scanner can be used in streaming mode.
-        def streamable?
-          is_a? Streamable
+        # Normalizes the given code into a string with UNIX newlines, in the
+        # scanner's internal encoding, with invalid and undefined charachters
+        # replaced by placeholders. Always returns a new object.
+        def normalize code
+          # original = code
+          code = code.to_s unless code.is_a? ::String
+          return code if code.empty?
+          if code.respond_to? :encoding
+            code = encode_with_encoding code, self.encoding
+          else
+            code = to_unix code
+          end
+          # code = code.dup if code.eql? original
+          code
         end
-        def normify code
-          code = code.to_s
-          if code.respond_to?(:encoding) && (code.encoding.name != 'UTF-8' || !code.valid_encoding?)
-            code = code.dup
-            original_encoding = code.encoding
-            code.force_encoding 'Windows-1252'
-            unless code.valid_encoding?
-              code.force_encoding original_encoding
-              if code.encoding.name == 'UTF-8'
-                code.encode! 'UTF-16BE', :invalid => :replace, :undef => :replace, :replace => '?'
-              end
-              code.encode! 'UTF-8', :invalid => :replace, :undef => :replace, :replace => '?'
+        # The typical filename suffix for this scanner's language.
+        def file_extension extension = lang
+          @file_extension ||= extension.to_s
+        end
+        # The encoding used internally by this scanner.
+        def encoding name = 'UTF-8'
+          @encoding ||= defined?(Encoding.find) && Encoding.find(name)
+        end
+        # The lang of this Scanner class, which is equal to its Plugin ID.
+        def lang
+          @plugin_id
+        end
+      protected
+        def encode_with_encoding code, target_encoding
+          if code.encoding == target_encoding
+            if code.valid_encoding?
+              return to_unix(code)
+            else
+              source_encoding = guess_encoding code
             end
+          else
+            source_encoding = code.encoding
           end
-          code.to_unix
+          # print "encode_with_encoding from #{source_encoding} to #{target_encoding}"
+          code.encode target_encoding, source_encoding, :universal_newline => true, :undef => :replace, :invalid => :replace
         end
-        def file_extension extension = nil
-          if extension
-            @file_extension = extension.to_s
-          else
-            @file_extension ||= plugin_id.to_s
+        def to_unix code
+          code.index(?\r) ? code.gsub(/\r\n?/, "\n") : code
+        end
+        def guess_encoding s
+          #:nocov:
+          IO.popen("file -b --mime -", "w+") do |file|
+            file.write s[0, 1024]
+            file.close_write
+            begin
+              Encoding.find file.gets[/charset=([-\w]+)/, 1]
+            rescue ArgumentError
+              Encoding::BINARY
+            end
           end
+          #:nocov:
         end
       end
-=begin
-## Excluded for speed reasons; protected seems to make methods slow.
-  # Save the StringScanner methods from being called.
-  # This would not be useful for highlighting.
-  strscan_public_methods =
-    StringScanner.instance_methods -
-    StringScanner.ancestors[1].instance_methods
-  protected(*strscan_public_methods)
-=end
       # Create a new Scanner.
       #
       # * +code+ is the input String and is handled by the superclass
@@ -111,146 +138,147 @@ module CodeRay
       # * +options+ is a Hash with Symbols as keys.
       #   It is merged with the default options of the class (you can
       #   overwrite default options here.)
-      # * +block+ is the callback for streamed highlighting.
-      #
-      # If you set :stream to +true+ in the options, the Scanner uses a
-      # TokenStream with the +block+ as callback to handle the tokens.
       #
       # Else, a Tokens object is used.
-      def initialize code='', options = {}, &block
-        raise "I am only the basic Scanner class. I can't scan "\
-          "anything. :( Use my subclasses." if self.class == Scanner
+      def initialize code = '', options = {}
+        if self.class == Scanner
+          raise NotImplementedError, "I am only the basic Scanner class. I can't scan anything. :( Use my subclasses."
+        end
         @options = self.class::DEFAULT_OPTIONS.merge options
-        super Scanner.normify(code)
-        @tokens = options[:tokens]
-        if @options[:stream]
-          warn "warning in CodeRay::Scanner.new: :stream is set, "\
-            "but no block was given" unless block_given?
-          raise NotStreamableError, self unless kind_of? Streamable
-          @tokens ||= TokenStream.new(&block)
-        else
-          warn "warning in CodeRay::Scanner.new: Block given, "\
-            "but :stream is #{@options[:stream]}" if block_given?
-          @tokens ||= Tokens.new
-        end
-        @tokens.scanner = self
+        super self.class.normalize(code)
+        @tokens = options[:tokens] || Tokens.new
+        @tokens.scanner = self if @tokens.respond_to? :scanner=
         setup
       end
+      # Sets back the scanner. Subclasses should redefine the reset_instance
+      # method instead of this one.
       def reset
         super
         reset_instance
       end
+      # Set a new string to be scanned.
       def string= code
-        code = Scanner.normify(code)
-        if defined?(RUBY_DESCRIPTION) && RUBY_DESCRIPTION['rubinius 1.0.1']
-          reset_state
-          @string = code
-        else
-          super code
-        end
+        code = self.class.normalize(code)
+        super code
         reset_instance
       end
-      # More mnemonic accessor name for the input string.
-      alias code string
-      alias code= string=
-      # Returns the Plugin ID for this scanner.
+      # the Plugin ID for this scanner
       def lang
-        self.class.plugin_id
+        self.class.lang
       end
-      # Scans the code and returns all tokens in a Tokens object.
-      def tokenize new_string=nil, options = {}
+      # the default file extension for this scanner
+      def file_extension
+        self.class.file_extension
+      end
+      # Scan the code and returns all tokens in a Tokens object.
+      def tokenize source = nil, options = {}
         options = @options.merge(options)
-        self.string = new_string if new_string
-        @cached_tokens =
-          if @options[:stream]  # :stream must have been set already
-            reset unless new_string
-            scan_tokens @tokens, options
-            @tokens
-          else
-            scan_tokens @tokens, options
-          end
+        @tokens = options[:tokens] || @tokens || Tokens.new
+        @tokens.scanner = self if @tokens.respond_to? :scanner=
+        case source
+        when Array
+          self.string = self.class.normalize(source.join)
+        when nil
+          reset
+        else
+          self.string = self.class.normalize(source)
+        end
+        begin
+          scan_tokens @tokens, options
+        rescue => e
+          message = "Error in %s#scan_tokens, initial state was: %p" % [self.class, defined?(state) && state]
+          raise_inspect e.message, @tokens, message, 30, e.backtrace
+        end
+        @cached_tokens = @tokens
+        if source.is_a? Array
+          @tokens.split_into_parts(*source.map { |part| part.size })
+        else
+          @tokens
+        end
       end
+      # Cache the result of tokenize.
       def tokens
         @cached_tokens ||= tokenize
       end
-      # Whether the scanner is in streaming mode.
-      def streaming?
-        !!@options[:stream]
-      end
-      # Traverses the tokens.
+      # Traverse the tokens.
       def each &block
-        raise ArgumentError,
-          'Cannot traverse TokenStream.' if @options[:stream]
         tokens.each(&block)
       end
       include Enumerable
-      # The current line position of the scanner.
+      # The current line position of the scanner, starting with 1.
+      # See also: #column.
       #
       # Beware, this is implemented inefficiently. It should be used
       # for debugging only.
-      def line
-        string[0..pos].count("\n") + 1
+      def line pos = self.pos
+        return 1 if pos <= 0
+        binary_string[0...pos].count("\n") + 1
       end
+      # The current column position of the scanner, starting with 1.
+      # See also: #line.
       def column pos = self.pos
-        return 0 if pos <= 0
-        string = string()
-        if string.respond_to?(:bytesize) && (defined?(@bin_string) || string.bytesize != string.size)
-          @bin_string ||= string.dup.force_encoding('binary')
-          string = @bin_string
-        end
-        pos - (string.rindex(?\n, pos) || 0)
+        return 1 if pos <= 0
+        pos - (binary_string.rindex(?\n, pos - 1) || -1)
       end
-      def marshal_dump
-        @options
+      # The string in binary encoding.
+      #
+      # To be used with #pos, which is the index of the byte the scanner
+      # will scan next.
+      def binary_string
+        @binary_string ||=
+          if string.respond_to?(:bytesize) && string.bytesize != string.size
+            #:nocov:
+            string.dup.force_encoding('binary')
+            #:nocov:
+          else
+            string
+          end
       end
-      def marshal_load options
-        @options = options
-      end
     protected
       # Can be implemented by subclasses to do some initialization
       # that has to be done once per instance.
       #
       # Use reset for initialization that has to be done once per
       # scan.
-      def setup
+      def setup  # :doc:
       end
       # This is the central method, and commonly the only one a
       # subclass implements.
       #
       # Subclasses must implement this method; it must return +tokens+
       # and must only use Tokens#<< for storing scanned tokens!
-      def scan_tokens tokens, options
-        raise NotImplementedError,
-          "#{self.class}#scan_tokens not implemented."
+      def scan_tokens tokens, options  # :doc:
+        raise NotImplementedError, "#{self.class}#scan_tokens not implemented."
       end
+      # Resets the scanner.
       def reset_instance
-        @tokens.clear unless @options[:keep_tokens]
+        @tokens.clear if @tokens.respond_to?(:clear) && !@options[:keep_tokens]
         @cached_tokens = nil
-        @bin_string = nil if defined? @bin_string
+        @binary_string = nil if defined? @binary_string
       end
       # Scanner error with additional status information
-      def raise_inspect msg, tokens, state = 'No state given!', ambit = 30
+      def raise_inspect msg, tokens, state = self.state || 'No state given!', ambit = 30, backtrace = caller
         raise ScanError, <<-EOE % [
@@ -272,13 +300,13 @@ surrounding code:
         EOE
           File.basename(caller[0]),
           msg,
-          tokens.size,
-          tokens.last(10).map { |t| t.inspect }.join("\n"),
+          tokens.respond_to?(:size) ? tokens.size : 0,
+          tokens.respond_to?(:last) ? tokens.last(10).map { |t| t.inspect }.join("\n") : '',
           line, column, pos,
           matched, state, bol?, eos?,
-          string[pos - ambit, ambit],
-          string[pos, ambit],
-        ]
+          binary_string[pos - ambit, ambit],
+          binary_string[pos, ambit],
+        ], backtrace
       end
       # Shorthand for scan_until(/\z/).
@@ -288,19 +316,8 @@ surrounding code:
         terminate
         rest
       end
-    end
-  end
-end
-class String
-  # I love this hack. It seems to silence all dos/unix/mac newline problems.
-  def to_unix
-    if index ?\r
-      gsub(/\r\n?/, "\n")
-    else
-      self
     end
   end
-end
+end