RubyGems - syntax - Versions diffs - 0.5.0 → 0.7.0 - Mend

syntax 0.5.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

data/data/ruby.css +18 -0
data/data/xml.css +8 -0
data/data/yaml.css +12 -0
data/lib/syntax.rb +8 -1
data/lib/syntax/common.rb +30 -6
data/lib/syntax/convertors/abstract.rb +24 -0
data/lib/syntax/convertors/html.rb +18 -17
data/lib/syntax/{ruby.rb → lang/ruby.rb} +84 -19
data/lib/syntax/{xml.rb → lang/xml.rb} +0 -0
data/lib/syntax/{yaml.rb → lang/yaml.rb} +0 -0
data/lib/syntax/version.rb +1 -1
data/test/syntax/tc_ruby.rb +500 -352
data/test/syntax/tc_xml.rb +2 -2
data/test/syntax/tc_yaml.rb +2 -2
data/test/tc_syntax.rb +22 -0
metadata +14 -8

data/data/ruby.css ADDED

@@ -0,0 +1,18 @@
+.ruby .normal {}
+.ruby .comment { color: #005; font-style: italic; }
+.ruby .keyword { color: #A00; font-weight: bold; }
+.ruby .method { color: #077; }
+.ruby .class { color: #074; }
+.ruby .module { color: #050; }
+.ruby .punct { color: #447; font-weight: bold; }
+.ruby .symbol { color: #099; }
+.ruby .string { color: #944; background: #FFE; }
+.ruby .char { color: #F07; }
+.ruby .ident { color: #004; }
+.ruby .constant { color: #07F; }
+.ruby .regex { color: #B66; background: #FEF; }
+.ruby .number { color: #F99; }
+.ruby .attribute { color: #7BB; }
+.ruby .global { color: #7FB; }
+.ruby .expr { color: #227; }
+.ruby .escape { color: #277; }

data/data/xml.css ADDED

@@ -0,0 +1,8 @@
+.xml .normal {}
+.xml .namespace { color: #B66; font-weight: bold; }
+.xml .tag { color: #F88; }
+.xml .comment { color: #005; font-style: italic; }
+.xml .punct { color: #447; font-weight: bold; }
+.xml .string { color: #944; }
+.xml .number { color: #F99; }
+.xml .attribute { color: #BB7; }

data/data/yaml.css ADDED

@@ -0,0 +1,12 @@
+.yaml .normal {}
+.yaml .document { font-weight: bold; color: #07F; }
+.yaml .type { font-weight: bold; color: #05C; }
+.yaml .key { color: #F88; }
+.yaml .comment { color: #005; font-style: italic; }
+.yaml .punct { color: #447; font-weight: bold; }
+.yaml .string { color: #944; }
+.yaml .number { color: #F99; }
+.yaml .time { color: #F99; }
+.yaml .date { color: #F99; }
+.yaml .ref { color: #944; }
+.yaml .anchor { color: #944; }

data/lib/syntax.rb CHANGED

@@ -21,11 +21,18 @@ module Syntax
   # handler will be returned.
   def load( syntax )
     begin
-      require "syntax/#{syntax}"
+      require "syntax/lang/#{syntax}"
     rescue LoadError
     end
     SYNTAX[ syntax ].new
   end
   module_function :load
+  # Return an array of the names of supported syntaxes.
+  def all
+    lang_dir = File.join(File.dirname(__FILE__), "syntax", "lang")
+    Dir["#{lang_dir}/*.rb"].map { |path| File.basename(path, ".rb") }
+  end
+  module_function :all
 end

data/lib/syntax/common.rb CHANGED

@@ -10,11 +10,16 @@ module Syntax
     # the type of the lexeme that was extracted.
     attr_reader :group
+    # the instruction associated with this token (:none, :region_open, or
+    # :region_close)
+    attr_reader :instruction
     # Create a new Token representing the given text, and belonging to the
     # given group.
-    def initialize( text, group )
+    def initialize( text, group, instruction = :none )
       super text
       @group = group
+      @instruction = instruction
     end
   end
@@ -25,6 +30,12 @@ module Syntax
   # a single token.
   class Tokenizer
+    # The current group being processed by the tokenizer
+    attr_reader :group
+    # The current chunk of text being accumulated
+    attr_reader :chunk
     # Start tokenizing. This sets up the state in preparation for tokenization,
     # such as creating a new scanner for the text and saving the callback block.
     # The block will be invoked for each token extracted.
@@ -104,15 +115,28 @@ module Syntax
       # After the new group is started, if +data+ is non-nil it will be appended
       # to the chunk.
       def start_group( gr, data=nil )
-        if gr != @group && !@chunk.empty?
-          @callback.call( Token.new( @chunk, @group ) )
-          @chunk = ""
-        end
+        flush_chunk if gr != @group
         @group = gr
         @chunk << data if data
       end
+      def start_region( gr, data=nil )
+        flush_chunk
+        @group = gr
+        @callback.call( Token.new( data||"", @group, :region_open ) )
+      end
+      def end_region( gr, data=nil )
+        flush_chunk
+        @group = gr
+        @callback.call( Token.new( data||"", @group, :region_close ) )
+      end
+      def flush_chunk
+        @callback.call( Token.new( @chunk, @group ) ) unless @chunk.empty?
+        @chunk = ""
+      end
   end
 end

data/lib/syntax/convertors/abstract.rb ADDED

@@ -0,0 +1,24 @@
+require 'syntax'
+module Syntax
+  module Convertors
+    # The abstract ancestor class for all convertors. It implements a few
+    # convenience methods to provide a common interface for all convertors.
+    class Abstract
+      # A convenience method for instantiating a new convertor for a
+      # specific syntax.
+      def self.for_syntax( syntax )
+        new( Syntax.load( syntax ) )
+      end
+      # Creates a new convertor that uses the given tokenizer.
+      def initialize( tokenizer )
+        @tokenizer = tokenizer
+      end
+    end
+  end
+end

data/lib/syntax/convertors/html.rb CHANGED

@@ -1,21 +1,10 @@
-require 'syntax'
+require 'syntax/convertors/abstract'
 module Syntax
   module Convertors
     # A simple class for converting a text into HTML.
-    class HTML
-      # A convenience method for instantiating a new HTML convertor for a
-      # specific syntax.
-      def self.for_syntax( syntax )
-        new( Syntax.load( syntax ) )
-      end
-      # Creates a new HTML convertor that uses the given tokenizer.
-      def initialize( tokenizer )
-        @tokenizer = tokenizer
-      end
+    class HTML < Abstract
       # Converts the given text to HTML, using spans to represent token groups
       # of any type but <tt>:normal</tt> (which is always unhighlighted). If
@@ -23,13 +12,25 @@ module Syntax
       def convert( text, pre=true )
         html = ""
         html << "<pre>" if pre
+        regions = []
         @tokenizer.tokenize( text ) do |tok|
-          if tok.group == :normal
-            html << html_escape( tok )
-          else
-            html << "<span class=\"#{tok.group}\">#{html_escape(tok)}</span>"
+          value = html_escape(tok)
+          case tok.instruction
+            when :region_close then
+              regions.pop
+              html << "</span>"
+            when :region_open then
+              regions.push tok.group
+              html << "<span class=\"#{tok.group}\">#{value}"
+            else
+              if tok.group == ( regions.last || :normal )
+                html << value
+              else
+                html << "<span class=\"#{tok.group}\">#{value}</span>"
+              end
           end
         end
+        html << "</span>" while regions.pop
         html << "</pre>" if pre
         html
       end

data/lib/syntax/{ruby.rb → lang/ruby.rb} RENAMED

@@ -17,6 +17,8 @@ module Syntax
     # Perform ruby-specific setup
     def setup
       @selector = false
+      @allow_operator = false
+      @heredocs = []
     end
     # Step through a single iteration of the tokenization process.
@@ -42,13 +44,17 @@ module Syntax
           when check( /:"/ )
             start_group :symbol, scan(/:/)
             scan_delimited_region :symbol, :symbol, "", true
+            @allow_operator = true
           when check( /:'/ )
             start_group :symbol, scan(/:/)
             scan_delimited_region :symbol, :symbol, "", false
+            @allow_operator = true
           when check( /:\w/ )
             start_group :symbol, scan(/:\w+[!?]?/)
+            @allow_operator = true
           when check( /\?\\?./ )
             start_group :char, scan(/\?\\?./)
+            @allow_operator = true
           when check( /(__FILE__|__LINE__|true|false|nil|self)[?!]?/ )
             if @selector || matched[-1] == ?? || matched[-1] == ?!
               start_group :ident,
@@ -58,20 +64,32 @@ module Syntax
                 scan(/(__FILE__|__LINE__|true|false|nil|self)/)
             end
             @selector = false
+            @allow_operator = true
           else
             case peek(2)
               when "%r"
                 scan_delimited_region :punct, :regex, scan( /../ ), true
+                @allow_operator = true
               when "%w", "%q"
                 scan_delimited_region :punct, :string, scan( /../ ), false
+                @allow_operator = true
               when "%s"
                 scan_delimited_region :punct, :symbol, scan( /../ ), false
+                @allow_operator = true
               when "%W", "%Q", "%x"
                 scan_delimited_region :punct, :string, scan( /../ ), true
+                @allow_operator = true
               when /%[^\sa-zA-Z0-9]/
                 scan_delimited_region :punct, :string, scan( /./ ), true
+                @allow_operator = true
               when "<<"
+                saw_word = ( chunk[-1,1] =~ /[\w!?]/ )
                 start_group :punct, scan( /<</ )
+                if saw_word
+                  @allow_operator = false
+                  return
+                end
                 float_right = scan( /-/ )
                 append "-" if float_right
                 if ( type = scan( /['"]/ ) )
@@ -86,45 +104,69 @@ module Syntax
                 end
                 start_group :constant, delim
                 start_group :punct, scan( /#{type}/ ) if type
-                scan_delimited_region :constant, :string, "", ( type != "'" ),
-                  delim, true, float_right
+                @heredocs << [ float_right, type, delim ]
+                @allow_operator = true
               else
                 case peek(1)
+                  when /[\n\r]/
+                    unless @heredocs.empty?
+                      scan_heredoc(*@heredocs.shift)
+                    else
+                      start_group :normal, scan( /\s+/ )
+                    end
+                    @allow_operator = false
                   when /\s/
                     start_group :normal, scan( /\s+/ )
                   when "#"
-                    start_group :comment, scan( /#.*$/ )
+                    start_group :comment, scan( /#[^\n\r]*/ )
                   when /[A-Z]/
                     start_group :constant, scan( /\w+/ )
+                    @allow_operator = true
                   when /[a-z_]/
                     word = scan( /\w+[?!]?/ )
                     if !@selector && KEYWORDS.include?( word )
                       start_group :keyword, word
+                      @allow_operator = false
                     elsif
                       start_group :ident, word
+                      @allow_operator = true
                     end
                     @selector = false
                   when /\d/
                     start_group :number,
                       scan( /[\d_]+(\.[\d_]+)?([eE][\d_]+)?/ )
+                    @allow_operator = true
                   when '"'
                     scan_delimited_region :punct, :string, "", true
+                    @allow_operator = true
                   when '/'
-                    scan_delimited_region :punct, :regex, "", true
+                    if @allow_operator
+                      start_group :punct, scan(%r{/})
+                      @allow_operator = false
+                    else
+                      scan_delimited_region :punct, :regex, "", true
+                      @allow_operator = true
+                    end
                   when "'"
                     scan_delimited_region :punct, :string, "", false
+                    @allow_operator = true
                   when "."
                     dots = scan( /\.{1,3}/ )
                     start_group :punct, dots
                     @selector = ( dots.length == 1 )
                   when /[@]/
                     start_group :attribute, scan( /@{1,2}\w*/ )
+                    @allow_operator = true
                   when /[$]/
                     start_group :global, scan(/\$/)
                     start_group :global, scan( /\w+|./ ) if check(/./)
-                  when /[-!?*\/+=<>()\[\]\{}:;,&|%]/
-                    start_group :punct,
-                      scan(/[-!?*\/+=<>()\[\]\{}:;,&|%]/)
+                    @allow_operator = true
+                  when /[-!?*\/+=<>(\[\{}:;,&|%]/
+                    start_group :punct, scan(/./)
+                    @allow_operator = false
+                  when /[)\]]/
+                    start_group :punct, scan(/./)
+                    @allow_operator = true
                   else
                     # all else just falls through this, to prevent
                     # infinite loops...
@@ -140,8 +182,21 @@ module Syntax
       # Scan a delimited region of text. This handles the simple cases (strings
       # delimited with quotes) as well as the more complex cases of %-strings
       # and here-documents.
+      #
+      # * +delim_group+ is the group to use to classify the delimiters of the
+      #   region
+      # * +inner_group+ is the group to use to classify the contents of the
+      #   region
+      # * +starter+ is the text to use as the starting delimiter
+      # * +exprs+ is a boolean flag indicating whether the region is an
+      #   interpolated string or not
+      # * +delim+ is the text to use as the delimiter of the region. If +nil+,
+      #   the next character will be treated as the delimiter.
+      # * +heredoc+ is either +false+, meaning the region is not a heredoc, or
+      #   <tt>:flush</tt> (meaning the delimiter must be flushed left), or
+      #   <tt>:float</tt> (meaning the delimiter doens't have to be flush left).
       def scan_delimited_region( delim_group, inner_group, starter, exprs,
-        delim=nil, delim_alone=false, float_right=false )
+        delim=nil, heredoc=false )
       # begin
         if !delim
           start_group delim_group, starter
@@ -152,23 +207,22 @@ module Syntax
             when '{' then '}'
             when '(' then ')'
             when '[' then ']'
+            when '<' then '>'
             else delim
           end
         end
-        start_group inner_group
+        start_region inner_group
         items = "\\\\|"
-        if delim_alone
+        if heredoc
           items << "(^"
-          items << '\s*' if float_right
-          items << "#{delim}$)"
+          items << '\s*' if heredoc == :float
+          items << "#{Regexp.escape(delim)}\s*)$"
         else
-          items << "#{delim}"
+          items << "#{Regexp.escape(delim)}"
         end
-        items << "|#(\\$|@|\\{)"if exprs
+        items << "|#(\\$|@|\\{)" if exprs
         items = Regexp.new( items )
         loop do
@@ -186,15 +240,15 @@ module Syntax
                   case peek(1)
                     when "'"
                       scan(/./)
-                      start_group :expr, "\\'"
+                      start_group :escape, "\\'"
                     when "\\"
                       scan(/./)
-                      start_group :expr, "\\\\"
+                      start_group :escape, "\\\\"
                     else
                       start_group inner_group, "\\"
                   end
                 else
-                  start_group :expr, "\\"
+                  start_group :escape, "\\"
                   c = getch
                   append c
                   case c
@@ -205,6 +259,7 @@ module Syntax
                   end
                 end
               when delim
+                end_region inner_group
                 start_group delim_group, matched
                 break
               when /^#/
@@ -232,6 +287,16 @@ module Syntax
           end
         end
       end
+      # Scan a heredoc beginning at the current position.
+      #
+      # * +float+ indicates whether the delimiter may be floated to the right
+      # * +type+ is +nil+, a single quote, or a double quote
+      # * +delim+ is the delimiter to look for
+      def scan_heredoc(float, type, delim)
+        scan_delimited_region( :constant, :string, "", type != "'",
+          delim, float ? :float : :flush )
+      end
   end
   SYNTAX["ruby"] = Ruby

data/lib/syntax/{xml.rb → lang/xml.rb} RENAMED

File without changes

data/lib/syntax/{yaml.rb → lang/yaml.rb} RENAMED

File without changes

data/lib/syntax/version.rb CHANGED

@@ -1,7 +1,7 @@
 module Syntax
   module Version
     MAJOR=0
-    MINOR=5
+    MINOR=7
     TINY=0
     STRING=[MAJOR,MINOR,TINY].join('.')