RubyGems - coderay - Versions diffs - 1.0.0 → 1.0.0.598.pre - Mend

coderay 1.0.0 → 1.0.0.598.pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (79) hide show

data/FOLDERS +49 -0
data/Rakefile +6 -5
data/bin/coderay +74 -190
data/bin/coderay_stylesheet +4 -0
data/{README_INDEX.rdoc → lib/README} +20 -10
data/lib/coderay.rb +60 -62
data/lib/coderay/duo.rb +55 -2
data/lib/coderay/encoder.rb +39 -52
data/lib/coderay/encoders/_map.rb +7 -11
data/lib/coderay/encoders/comment_filter.rb +61 -0
data/lib/coderay/encoders/count.rb +26 -11
data/lib/coderay/encoders/debug.rb +60 -11
data/lib/coderay/encoders/div.rb +8 -9
data/lib/coderay/encoders/filter.rb +52 -12
data/lib/coderay/encoders/html.rb +113 -106
data/lib/coderay/encoders/html/css.rb +7 -2
data/lib/coderay/encoders/html/numbering.rb +27 -24
data/lib/coderay/encoders/html/output.rb +58 -15
data/lib/coderay/encoders/json.rb +44 -37
data/lib/coderay/encoders/lines_of_code.rb +56 -9
data/lib/coderay/encoders/null.rb +13 -6
data/lib/coderay/encoders/page.rb +8 -8
data/lib/coderay/encoders/span.rb +9 -10
data/lib/coderay/encoders/statistic.rb +114 -51
data/lib/coderay/encoders/terminal.rb +10 -7
data/lib/coderay/encoders/text.rb +36 -17
data/lib/coderay/encoders/token_kind_filter.rb +58 -1
data/lib/coderay/encoders/xml.rb +11 -13
data/lib/coderay/encoders/yaml.rb +14 -16
data/lib/coderay/for_redcloth.rb +1 -1
data/lib/coderay/helpers/file_type.rb +240 -125
data/lib/coderay/helpers/gzip_simple.rb +123 -0
data/lib/coderay/helpers/plugin.rb +307 -241
data/lib/coderay/helpers/word_list.rb +126 -65
data/lib/coderay/scanner.rb +103 -153
data/lib/coderay/scanners/_map.rb +16 -18
data/lib/coderay/scanners/c.rb +13 -13
data/lib/coderay/scanners/cpp.rb +6 -6
data/lib/coderay/scanners/css.rb +48 -47
data/lib/coderay/scanners/debug.rb +55 -9
data/lib/coderay/scanners/delphi.rb +4 -4
data/lib/coderay/scanners/diff.rb +25 -43
data/lib/coderay/scanners/groovy.rb +2 -2
data/lib/coderay/scanners/html.rb +30 -107
data/lib/coderay/scanners/java.rb +5 -6
data/lib/coderay/scanners/java/builtin_types.rb +0 -2
data/lib/coderay/scanners/java_script.rb +6 -6
data/lib/coderay/scanners/json.rb +6 -7
data/lib/coderay/scanners/nitro_xhtml.rb +136 -0
data/lib/coderay/scanners/php.rb +12 -13
data/lib/coderay/scanners/plaintext.rb +26 -0
data/lib/coderay/scanners/python.rb +4 -4
data/lib/coderay/scanners/{erb.rb → rhtml.rb} +11 -19
data/lib/coderay/scanners/ruby.rb +208 -219
data/lib/coderay/scanners/ruby/patterns.rb +85 -18
data/lib/coderay/scanners/scheme.rb +136 -0
data/lib/coderay/scanners/sql.rb +22 -29
data/lib/coderay/scanners/yaml.rb +10 -11
data/lib/coderay/styles/_map.rb +2 -2
data/lib/coderay/styles/alpha.rb +104 -102
data/lib/coderay/styles/cycnus.rb +143 -0
data/lib/coderay/styles/murphy.rb +123 -0
data/lib/coderay/token_kinds.rb +86 -87
data/lib/coderay/tokens.rb +169 -26
data/test/functional/basic.rb +14 -200
data/test/functional/examples.rb +14 -20
data/test/functional/for_redcloth.rb +8 -15
data/test/functional/load_plugin_scanner.rb +11 -0
data/test/functional/suite.rb +6 -9
data/test/functional/vhdl.rb +126 -0
data/test/functional/word_list.rb +79 -0
metadata +129 -107
data/lib/coderay/helpers/gzip.rb +0 -41
data/lib/coderay/scanners/clojure.rb +0 -217
data/lib/coderay/scanners/haml.rb +0 -168
data/lib/coderay/scanners/ruby/string_state.rb +0 -71
data/lib/coderay/scanners/text.rb +0 -26
data/lib/coderay/tokens_proxy.rb +0 -55
data/lib/coderay/version.rb +0 -3

data/lib/coderay/helpers/word_list.rb CHANGED Viewed

@@ -1,77 +1,138 @@
 module CodeRay
-  # = WordList
+# = WordList
+#
+# <b>A Hash subclass designed for mapping word lists to token types.</b>
+#
+# Copyright (c) 2006 by murphy (Kornelius Kalnbach) <murphy rubychan de>
+#
+# License:: LGPL / ask the author
+# Version:: 1.1 (2006-Oct-19)
+#
+# A WordList is a Hash with some additional features.
+# It is intended to be used for keyword recognition.
+#
+# WordList is highly optimized to be used in Scanners,
+# typically to decide whether a given ident is a special token.
+#
+# For case insensitive words use CaseIgnoringWordList.
+#
+# Example:
+#
+#  # define word arrays
+#  RESERVED_WORDS = %w[
+#    asm break case continue default do else
+#    ...
+#  ]
+#
+#  PREDEFINED_TYPES = %w[
+#    int long short char void
+#    ...
+#  ]
+#
+#  PREDEFINED_CONSTANTS = %w[
+#    EOF NULL ...
+#  ]
+#
+#  # make a WordList
+#  IDENT_KIND = WordList.new(:ident).
+#    add(RESERVED_WORDS, :reserved).
+#    add(PREDEFINED_TYPES, :pre_type).
+#    add(PREDEFINED_CONSTANTS, :pre_constant)
+#
+#  ...
+#
+#  def scan_tokens tokens, options
+#    ...
+#
+#    elsif scan(/[A-Za-z_][A-Za-z_0-9]*/)
+#      # use it
+#      kind = IDENT_KIND[match]
+#      ...
+class WordList < Hash
+  # Creates a new WordList with +default+ as default value.
   #
-  # <b>A Hash subclass designed for mapping word lists to token types.</b>
+  # You can activate +caching+ to store the results for every [] request.
   #
-  # Copyright (c) 2006-2011 by murphy (Kornelius Kalnbach) <murphy rubychan de>
-  #
-  # License:: LGPL / ask the author
-  # Version:: 2.0 (2011-05-08)
-  #
-  # A WordList is a Hash with some additional features.
-  # It is intended to be used for keyword recognition.
-  #
-  # WordList is optimized to be used in Scanners,
-  # typically to decide whether a given ident is a special token.
-  #
-  # For case insensitive words use WordList::CaseIgnoring.
-  #
-  # Example:
-  #
-  #  # define word arrays
-  #  RESERVED_WORDS = %w[
-  #    asm break case continue default do else
-  #  ]
-  #
-  #  PREDEFINED_TYPES = %w[
-  #    int long short char void
-  #  ]
-  #
-  #  # make a WordList
-  #  IDENT_KIND = WordList.new(:ident).
-  #    add(RESERVED_WORDS, :reserved).
-  #    add(PREDEFINED_TYPES, :predefined_type)
-  #
-  #  ...
-  #
-  #  def scan_tokens tokens, options
-  #    ...
-  #
-  #    elsif scan(/[A-Za-z_][A-Za-z_0-9]*/)
-  #      # use it
-  #      kind = IDENT_KIND[match]
-  #      ...
-  class WordList < Hash
-    # Create a new WordList with +default+ as default value.
-    def initialize default = false
-      super default
+  # With caching, methods like +include?+ or +delete+ may no longer behave
+  # as you expect. Therefore, it is recommended to use the [] method only.
+  def initialize default = false, caching = false, &block
+    if block
+      raise ArgumentError, 'Can\'t combine block with caching.' if caching
+      super(&block)
+    else
+      if caching
+        super() do |h, k|
+          h[k] = h.fetch k, default
+        end
+      else
+        super default
+      end
     end
-    # Add words to the list and associate them with +value+.
-    #
-    # Returns +self+, so you can concat add calls.
-    def add words, value = true
-      words.each { |word| self[word] = value }
-      self
+  end
+  # Add words to the list and associate them with +kind+.
+  #
+  # Returns +self+, so you can concat add calls.
+  def add words, kind = true
+    words.each do |word|
+      self[word] = kind
+    end
+    self
+  end
+end
+# A CaseIgnoringWordList is like a WordList, only that
+# keys are compared case-insensitively.
+#
+# Ignoring the text case is realized by sending the +downcase+ message to
+# all keys.
+#
+# Caching usually makes a CaseIgnoringWordList faster, but it has to be
+# activated explicitely.
+class CaseIgnoringWordList < WordList
+  # Creates a new case-insensitive WordList with +default+ as default value.
+  #
+  # You can activate caching to store the results for every [] request.
+  # This speeds up subsequent lookups for the same word, but also
+  # uses memory.
+  def initialize default = false, caching = false
+    if caching
+      super(default, false) do |h, k|
+        h[k] = h.fetch k.downcase, default
+      end
+    else
+      super(default, false)
+      extend Uncached
     end
   end
-  # A CaseIgnoring WordList is like a WordList, only that
-  # keys are compared case-insensitively (normalizing keys using +downcase+).
-  class WordList::CaseIgnoring < WordList
+  module Uncached  # :nodoc:
     def [] key
-      super key.downcase
+      super(key.downcase)
     end
-    def []= key, value
-      super key.downcase, value
+  end
+  # Add +words+ to the list and associate them with +kind+.
+  def add words, kind = true
+    words.each do |word|
+      self[word.downcase] = kind
     end
+    self
   end
+end
 end
+__END__
+# check memory consumption
+END {
+  ObjectSpace.each_object(CodeRay::CaseIgnoringWordList) do |wl|
+    p wl.inject(0) { |memo, key, value| memo + key.size + 24 }
+  end
+}

data/lib/coderay/scanner.rb CHANGED Viewed

@@ -1,10 +1,7 @@
-# encoding: utf-8
-require 'strscan'
 module CodeRay
-  autoload :WordList, 'coderay/helpers/word_list'
+  require 'coderay/helpers/plugin'
   # = Scanners
   #
   # This module holds the Scanner class and its subclasses.
@@ -19,8 +16,9 @@ module CodeRay
   module Scanners
     extend PluginHost
     plugin_path File.dirname(__FILE__), 'scanners'
+    require 'strscan'
     # = Scanner
     #
     # The base class for all Scanners.
@@ -48,89 +46,61 @@ module CodeRay
       extend Plugin
       plugin_host Scanners
       # Raised if a Scanner fails while scanning
-      ScanError = Class.new StandardError
+      ScanError = Class.new(Exception)
+      require 'coderay/helpers/word_list'
       # The default options for all scanner classes.
       #
       # Define @default_options for subclasses.
       DEFAULT_OPTIONS = { }
       KINDS_NOT_LOC = [:comment, :doctype, :docstring]
-      attr_accessor :state
       class << self
-        # Normalizes the given code into a string with UNIX newlines, in the
-        # scanner's internal encoding, with invalid and undefined charachters
-        # replaced by placeholders. Always returns a new object.
-        def normalize code
-          # original = code
-          code = code.to_s unless code.is_a? ::String
-          return code if code.empty?
-          if code.respond_to? :encoding
-            code = encode_with_encoding code, self.encoding
-          else
-            code = to_unix code
+        def normify code
+          code = code.to_s.dup
+          # try using UTF-8
+          if code.respond_to? :force_encoding
+            debug, $DEBUG = $DEBUG, false
+            begin
+              code.force_encoding 'UTF-8'
+              code[/\z/]  # raises an ArgumentError when code contains a non-UTF-8 char
+            rescue ArgumentError
+              code.force_encoding 'binary'
+            ensure
+              $DEBUG = debug
+            end
           end
-          # code = code.dup if code.eql? original
+          # convert the string to UNIX newline format
+          code.gsub!(/\r\n?/, "\n") if code.index ?\r
           code
         end
-        # The typical filename suffix for this scanner's language.
-        def file_extension extension = lang
-          @file_extension ||= extension.to_s
-        end
-        # The encoding used internally by this scanner.
-        def encoding name = 'UTF-8'
-          @encoding ||= defined?(Encoding.find) && Encoding.find(name)
-        end
-        # The lang of this Scanner class, which is equal to its Plugin ID.
-        def lang
-          @plugin_id
-        end
-      protected
-        def encode_with_encoding code, target_encoding
-          if code.encoding == target_encoding
-            if code.valid_encoding?
-              return to_unix(code)
-            else
-              source_encoding = guess_encoding code
-            end
+        def file_extension extension = nil
+          if extension
+            @file_extension = extension.to_s
           else
-            source_encoding = code.encoding
+            @file_extension ||= plugin_id.to_s
           end
-          # print "encode_with_encoding from #{source_encoding} to #{target_encoding}"
-          code.encode target_encoding, source_encoding, :universal_newline => true, :undef => :replace, :invalid => :replace
-        end
-        def to_unix code
-          code.index(?\r) ? code.gsub(/\r\n?/, "\n") : code
         end
-        def guess_encoding s
-          #:nocov:
-          IO.popen("file -b --mime -", "w+") do |file|
-            file.write s[0, 1024]
-            file.close_write
-            begin
-              Encoding.find file.gets[/charset=([-\w]+)/, 1]
-            rescue ArgumentError
-              Encoding::BINARY
-            end
-          end
-          #:nocov:
-        end
       end
+=begin
+## Excluded for speed reasons; protected seems to make methods slow.
+  # Save the StringScanner methods from being called.
+  # This would not be useful for highlighting.
+  strscan_public_methods =
+    StringScanner.instance_methods -
+    StringScanner.ancestors[1].instance_methods
+  protected(*strscan_public_methods)
+=end
       # Create a new Scanner.
       #
       # * +code+ is the input String and is handled by the superclass
@@ -140,66 +110,58 @@ module CodeRay
       #   overwrite default options here.)
       #
       # Else, a Tokens object is used.
-      def initialize code = '', options = {}
-        if self.class == Scanner
-          raise NotImplementedError, "I am only the basic Scanner class. I can't scan anything. :( Use my subclasses."
-        end
+      def initialize code='', options = {}
+        raise "I am only the basic Scanner class. I can't scan "\
+          "anything. :( Use my subclasses." if self.class == Scanner
         @options = self.class::DEFAULT_OPTIONS.merge options
-        super self.class.normalize(code)
+        super Scanner.normify(code)
         @tokens = options[:tokens] || Tokens.new
         @tokens.scanner = self if @tokens.respond_to? :scanner=
         setup
       end
-      # Sets back the scanner. Subclasses should redefine the reset_instance
-      # method instead of this one.
+      # Sets back the scanner. Subclasses are to define the reset_instance
+      # method.
       def reset
         super
         reset_instance
       end
-      # Set a new string to be scanned.
       def string= code
-        code = self.class.normalize(code)
+        code = Scanner.normify(code)
         super code
         reset_instance
       end
-      # the Plugin ID for this scanner
+      # More mnemonic accessor name for the input string.
+      alias code string
+      alias code= string=
+      # Returns the Plugin ID for this scanner.
       def lang
-        self.class.lang
-      end
-      # the default file extension for this scanner
-      def file_extension
-        self.class.file_extension
+        self.class.plugin_id.to_s
       end
-      # Scan the code and returns all tokens in a Tokens object.
+      # Scans the code and returns all tokens in a Tokens object.
       def tokenize source = nil, options = {}
         options = @options.merge(options)
         @tokens = options[:tokens] || @tokens || Tokens.new
         @tokens.scanner = self if @tokens.respond_to? :scanner=
         case source
+        when String
+          self.string = source
         when Array
-          self.string = self.class.normalize(source.join)
+          self.string = source.join
         when nil
           reset
         else
-          self.string = self.class.normalize(source)
-        end
-        begin
-          scan_tokens @tokens, options
-        rescue => e
-          message = "Error in %s#scan_tokens, initial state was: %p" % [self.class, defined?(state) && state]
-          raise_inspect e.message, @tokens, message, 30, e.backtrace
+          raise ArgumentError, 'expected String, Array, or nil'
         end
+        scan_tokens @tokens, options
         @cached_tokens = @tokens
         if source.is_a? Array
           @tokens.split_into_parts(*source.map { |part| part.size })
@@ -208,51 +170,46 @@ module CodeRay
         end
       end
-      # Cache the result of tokenize.
+      # Caches the result of tokenize.
       def tokens
         @cached_tokens ||= tokenize
       end
-      # Traverse the tokens.
+      # Traverses the tokens.
       def each &block
         tokens.each(&block)
       end
       include Enumerable
-      # The current line position of the scanner, starting with 1.
-      # See also: #column.
+      # The current line position of the scanner.
       #
       # Beware, this is implemented inefficiently. It should be used
       # for debugging only.
-      def line pos = self.pos
-        return 1 if pos <= 0
-        binary_string[0...pos].count("\n") + 1
+      def line
+        string[0..pos].count("\n") + 1
       end
-      # The current column position of the scanner, starting with 1.
-      # See also: #line.
+      # The current column position of the scanner. See #line.
       def column pos = self.pos
-        return 1 if pos <= 0
-        pos - (binary_string.rindex(?\n, pos - 1) || -1)
+        return 0 if pos <= 0
+        string = string()
+        if string.respond_to?(:bytesize) && (defined?(@bin_string) || string.bytesize != string.size)
+          @bin_string ||= string.dup.force_encoding('binary')
+          string = @bin_string
+        end
+        pos - (string.rindex(?\n, pos) || 0)
       end
-      # The string in binary encoding.
-      #
-      # To be used with #pos, which is the index of the byte the scanner
-      # will scan next.
-      def binary_string
-        @binary_string ||=
-          if string.respond_to?(:bytesize) && string.bytesize != string.size
-            #:nocov:
-            string.dup.force_encoding('binary')
-            #:nocov:
-          else
-            string
-          end
+      def marshal_dump  # :nodoc:
+        @options
       end
+      def marshal_load options  # :nodoc:
+        @options = options
+      end
     protected
       # Can be implemented by subclasses to do some initialization
       # that has to be done once per instance.
       #
@@ -260,25 +217,26 @@ module CodeRay
       # scan.
       def setup  # :doc:
       end
       # This is the central method, and commonly the only one a
       # subclass implements.
       #
       # Subclasses must implement this method; it must return +tokens+
       # and must only use Tokens#<< for storing scanned tokens!
       def scan_tokens tokens, options  # :doc:
-        raise NotImplementedError, "#{self.class}#scan_tokens not implemented."
+        raise NotImplementedError,
+          "#{self.class}#scan_tokens not implemented."
       end
       # Resets the scanner.
       def reset_instance
         @tokens.clear if @tokens.respond_to?(:clear) && !@options[:keep_tokens]
         @cached_tokens = nil
-        @binary_string = nil if defined? @binary_string
+        @bin_string = nil if defined? @bin_string
       end
       # Scanner error with additional status information
-      def raise_inspect msg, tokens, state = self.state || 'No state given!', ambit = 30, backtrace = caller
+      def raise_inspect msg, tokens, state = 'No state given!', ambit = 30
         raise ScanError, <<-EOE % [
@@ -300,24 +258,16 @@ surrounding code:
         EOE
           File.basename(caller[0]),
           msg,
-          tokens.respond_to?(:size) ? tokens.size : 0,
-          tokens.respond_to?(:last) ? tokens.last(10).map { |t| t.inspect }.join("\n") : '',
+          tokens.size,
+          tokens.last(10).map { |t| t.inspect }.join("\n"),
           line, column, pos,
           matched, state, bol?, eos?,
-          binary_string[pos - ambit, ambit],
-          binary_string[pos, ambit],
-        ], backtrace
+          string[pos - ambit, ambit],
+          string[pos, ambit],
+        ]
       end
-      # Shorthand for scan_until(/\z/).
-      # This method also avoids a JRuby 1.9 mode bug.
-      def scan_rest
-        rest = self.rest
-        terminate
-        rest
-      end
     end
   end
 end