RubyGems - syntax - Versions diffs - 0.7.0 → 1.0.0 - Mend

syntax 0.7.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

data/lib/syntax/common.rb +21 -0
data/lib/syntax/convertors/abstract.rb +3 -0
data/lib/syntax/lang/ruby.rb +28 -15
data/lib/syntax/version.rb +2 -2
data/test/syntax/tc_ruby.rb +297 -92
data/test/syntax/tokenizer_testcase.rb +40 -0
metadata +5 -4

data/lib/syntax/common.rb CHANGED

@@ -78,8 +78,22 @@ module Syntax
       finish
     end
+    # Specify a set of tokenizer-specific options. Each tokenizer may (or may
+    # not) publish any options, but if a tokenizer does those options may be
+    # used to specify optional behavior.
+    def set( opts={} )
+      ( @options ||= Hash.new ).update opts
+    end
+    # Get the value of the specified option.
+    def option(opt)
+      @options ? @options[opt] : nil
+    end
     private
+      EOL = /(?=\r\n?|\n|$)/
       # A convenience for delegating method calls to the scanner.
       def self.delegate( sym )
         define_method( sym ) { |*a| @text.__send__( sym, *a ) }
@@ -137,6 +151,13 @@ module Syntax
         @chunk = ""
       end
+      def subtokenize( syntax, text )
+        tokenizer = Syntax.load( syntax )
+        tokenizer.set @options if @options
+        flush_chunk
+        tokenizer.tokenize( text, &@callback )
+      end
   end
 end

data/lib/syntax/convertors/abstract.rb CHANGED

@@ -7,6 +7,9 @@ module Syntax
     # convenience methods to provide a common interface for all convertors.
     class Abstract
+      # A reference to the tokenizer used by this convertor.
+      attr_reader :tokenizer
       # A convenience method for instantiating a new convertor for a
       # specific syntax.
       def self.for_syntax( syntax )

data/lib/syntax/lang/ruby.rb CHANGED

@@ -25,20 +25,20 @@ module Syntax
     def step
       case
         when bol? && check( /=begin/ )
-          start_group( :comment, scan_until( /^=end$/ ) )
-        when bol? && check( /__END__$/ )
+          start_group( :comment, scan_until( /^=end#{EOL}/ ) )
+        when bol? && check( /__END__#{EOL}/ )
           start_group( :comment, scan_until( /\Z/ ) )
       else
         case
           when check( /def\s+/ )
             start_group :keyword, scan( /def\s+/ )
-            start_group :method,  scan_until( /$|(?=[;(\s])/ )
+            start_group :method,  scan_until( /(?=[;(\s]|#{EOL})/ )
           when check( /class\s+/ )
             start_group :keyword, scan( /class\s+/ )
-            start_group :class,  scan_until( /$|(?=[;\s<])/ )
+            start_group :class,  scan_until( /(?=[;\s<]|#{EOL})/ )
           when check( /module\s+/ )
             start_group :keyword, scan( /module\s+/ )
-            start_group :module,  scan_until( /$|(?=[;\s])/ )
+            start_group :module,  scan_until( /(?=[;\s]|#{EOL})/ )
           when check( /::/ )
             start_group :punct, scan(/::/)
           when check( /:"/ )
@@ -49,11 +49,11 @@ module Syntax
             start_group :symbol, scan(/:/)
             scan_delimited_region :symbol, :symbol, "", false
             @allow_operator = true
-          when check( /:\w/ )
-            start_group :symbol, scan(/:\w+[!?]?/)
+          when scan( /:[_a-zA-Z@$][$@\w]*[=!?]?/ )
+            start_group :symbol, matched
             @allow_operator = true
-          when check( /\?\\?./ )
-            start_group :char, scan(/\?\\?./)
+          when scan( /\?(\\[^\n\r]|[^\\\n\r\s])/ )
+            start_group :char, matched
             @allow_operator = true
           when check( /(__FILE__|__LINE__|true|false|nil|self)[?!]?/ )
             if @selector || matched[-1] == ?? || matched[-1] == ?!
@@ -65,6 +65,9 @@ module Syntax
             end
             @selector = false
             @allow_operator = true
+          when scan(/0([bB][01]+|[oO][0-7]+|[dD][0-9]+|[xX][0-9a-fA-F]+)/)
+            start_group :number, matched
+            @allow_operator = true
           else
             case peek(2)
               when "%r"
@@ -120,7 +123,7 @@ module Syntax
                   when "#"
                     start_group :comment, scan( /#[^\n\r]*/ )
                   when /[A-Z]/
-                    start_group :constant, scan( /\w+/ )
+                    start_group @selector ? :ident : :constant, scan( /\w+/ )
                     @allow_operator = true
                   when /[a-z_]/
                     word = scan( /\w+[?!]?/ )
@@ -218,11 +221,11 @@ module Syntax
         if heredoc
           items << "(^"
           items << '\s*' if heredoc == :float
-          items << "#{Regexp.escape(delim)}\s*)$"
+          items << "#{Regexp.escape(delim)}\s*?)#{EOL}"
         else
           items << "#{Regexp.escape(delim)}"
         end
-        items << "|#(\\$|@|\\{)" if exprs
+        items << "|#(\\$|@@?|\\{)" if exprs
         items = Regexp.new( items )
         loop do
@@ -263,25 +266,35 @@ module Syntax
                 start_group delim_group, matched
                 break
               when /^#/
+                do_highlight = (option(:expressions) == :highlight)
+                start_region :expr if do_highlight
                 start_group :expr, matched
                 case matched[1]
                   when ?{
                     depth = 1
+                    content = ""
                     while depth > 0
                       p = pos
                       c = scan_until( /[\{}]/ )
                       if c.nil?
-                        append scan_until( /\Z/ )
+                        content << scan_until( /\Z/ )
                         break
                       else
                         depth += ( matched == "{" ? 1 : -1 )
-                        append pre_match[p..-1]
-                        append matched
+                        content << pre_match[p..-1]
+                        content << matched if depth > 0
                       end
                     end
+                    if do_highlight
+                      subtokenize "ruby", content
+                      start_group :expr, "}"
+                    else
+                      append content + "}"
+                    end
                   when ?$, ?@
                     append scan( /\w+/ )
                 end
+                end_region :expr if do_highlight
               else raise "unexpected match on #{matched}"
             end
           end

data/lib/syntax/version.rb CHANGED

@@ -1,7 +1,7 @@
 module Syntax
   module Version
-    MAJOR=0
-    MINOR=7
+    MAJOR=1
+    MINOR=0
     TINY=0
     STRING=[MAJOR,MINOR,TINY].join('.')

data/test/syntax/tc_ruby.rb CHANGED

@@ -1,81 +1,69 @@
-$:.unshift File.dirname(__FILE__) +"/../../lib"
+require File.dirname(__FILE__) + "/tokenizer_testcase"
-require 'test/unit'
-require 'syntax/lang/ruby'
+class TC_Syntax_Ruby < TokenizerTestCase
-class TC_Syntax_Ruby < Test::Unit::TestCase
-  def tokenize( string )
-    @tokens = []
-    @ruby.tokenize( string ) { |tok| @tokens << tok }
-  end
-  def assert_next_token(group, lexeme, instruction=:none)
-    assert false, "no tokens in stack" if @tokens.nil? or @tokens.empty?
-    assert_equal [group, lexeme, instruction],
-      [@tokens.first.group, @tokens.first, @tokens.shift.instruction]
-  end
-  def assert_no_next_token
-    assert @tokens.empty?
-  end
-  def skip_token( n=1 )
-    n.times { @tokens.shift } unless @tokens.nil? || @tokens.empty?
-  end
-  def setup
-    @ruby = Syntax::Ruby.new
-  end
+  syntax "ruby"
   def test_empty
-    tokenize( "" )
+    tokenize ""
     assert_no_next_token
   end
   def test_constant
-    tokenize( "Foo" )
+    tokenize "Foo"
     assert_next_token :constant, "Foo"
   end
   def test_ident
-    tokenize( "foo" )
+    tokenize "foo"
     assert_next_token :ident, "foo"
   end
   def test_comment_eol
-    tokenize( "# a comment\nfoo" )
+    tokenize "# a comment\nfoo"
     assert_next_token :comment, "# a comment"
     assert_next_token :normal, "\n"
     assert_next_token :ident, "foo"
   end
   def test_comment_block
-    tokenize( "=begin\nthis is a comment\n=end\nnoncomment" )
+    tokenize "=begin\nthis is a comment\n=end\nnoncomment"
     assert_next_token :comment, "=begin\nthis is a comment\n=end"
     assert_next_token :normal, "\n"
     assert_next_token :ident, "noncomment"
   end
+  def test_comment_block_with_CRNL
+    tokenize "=begin\r\nthis is a comment\r\n=end\r\nnoncomment"
+    assert_next_token :comment, "=begin\r\nthis is a comment\r\n=end"
+    assert_next_token :normal, "\r\n"
+    assert_next_token :ident, "noncomment"
+  end
   def test_keyword
     Syntax::Ruby::KEYWORDS.each do |word|
-      tokenize( word )
+      tokenize word
       assert_next_token :keyword, word
     end
     Syntax::Ruby::KEYWORDS.each do |word|
-      tokenize( "foo.#{word}" )
+      tokenize "foo.#{word}"
       skip_token 2
       assert_next_token :ident, word
     end
   end
   def test__END__
-    tokenize( "__END__\n\nblah blah blah" )
+    tokenize "__END__\n\nblah blah blah"
     assert_next_token :comment, "__END__\n\nblah blah blah"
   end
+  def test__END__with_CRNL
+    tokenize "__END__\r\nblah blah blah"
+    assert_next_token :comment, "__END__\r\nblah blah blah"
+  end
   def test_def_paren
-    tokenize( "def  foo(bar)" )
+    tokenize "def  foo(bar)"
     assert_next_token :keyword, "def  "
     assert_next_token :method, "foo"
     assert_next_token :punct, "("
@@ -84,7 +72,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
   end
   def test_def_space
-    tokenize( "def  foo bar" )
+    tokenize "def  foo bar"
     assert_next_token :keyword, "def  "
     assert_next_token :method, "foo"
     assert_next_token :normal, " "
@@ -92,28 +80,34 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
   end
   def test_def_semicolon
-    tokenize( "def  foo;" )
+    tokenize "def  foo;"
     assert_next_token :keyword, "def  "
     assert_next_token :method, "foo"
     assert_next_token :punct, ";"
   end
+  def test_def_eol
+    tokenize "def foo"
+    assert_next_token :keyword, "def "
+    assert_next_token :method, "foo"
+  end
   def test_class_space
-    tokenize( "class    Foo\n" )
+    tokenize "class    Foo\n"
     assert_next_token :keyword, "class    "
     assert_next_token :class, "Foo"
     assert_next_token :normal, "\n"
   end
   def test_class_semicolon
-    tokenize( "class    Foo;" )
+    tokenize "class    Foo;"
     assert_next_token :keyword, "class    "
     assert_next_token :class, "Foo"
     assert_next_token :punct, ";"
   end
   def test_class_extend
-    tokenize( "class    Foo< Bang" )
+    tokenize "class    Foo< Bang"
     assert_next_token :keyword, "class    "
     assert_next_token :class, "Foo"
     assert_next_token :punct, "<"
@@ -122,34 +116,34 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
   end
   def test_module_space
-    tokenize( "module    Foo\n" )
+    tokenize "module    Foo\n"
     assert_next_token :keyword, "module    "
     assert_next_token :module, "Foo"
     assert_next_token :normal, "\n"
   end
   def test_module_semicolon
-    tokenize( "module    Foo;" )
+    tokenize "module    Foo;"
     assert_next_token :keyword, "module    "
     assert_next_token :module, "Foo"
     assert_next_token :punct, ";"
   end
   def test_module_other
-    tokenize( "module    Foo!\n" )
+    tokenize "module    Foo!\n"
     assert_next_token :keyword, "module    "
     assert_next_token :module, "Foo!"
   end
   def test_scope_operator
-    tokenize( "Foo::Bar" )
+    tokenize "Foo::Bar"
     assert_next_token :constant, "Foo"
     assert_next_token :punct, "::"
     assert_next_token :constant, "Bar"
   end
   def test_symbol_dquote
-    tokenize( ':"foo"' )
+    tokenize ':"foo"'
     assert_next_token :symbol, ':"'
     assert_next_token :symbol, '', :region_open
     assert_next_token :symbol, 'foo'
@@ -159,7 +153,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
   end
   def test_symbol_squote
-    tokenize( ":'foo'" )
+    tokenize ":'foo'"
     assert_next_token :symbol, ":'"
     assert_next_token :symbol, "", :region_open
     assert_next_token :symbol, "foo"
@@ -169,43 +163,56 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
   end
   def test_symbol
-    tokenize( ":foo_bar?" )
-    assert_next_token :symbol, ":foo_bar?"
+    tokenize ":foo_123"
+    assert_next_token :symbol, ":foo_123"
+    tokenize ":123"
+    assert_next_token :punct, ":"
+    assert_next_token :number, "123"
+    tokenize ":foo="
+    assert_next_token :symbol, ":foo="
+    tokenize ":foo!"
+    assert_next_token :symbol, ":foo!"
+    tokenize ":foo?"
+    assert_next_token :symbol, ":foo?"
   end
   def test_char
-    tokenize( "?." )
+    tokenize "?."
     assert_next_token :char, "?."
-    tokenize( '?\n' )
+    tokenize '?\n'
     assert_next_token :char, '?\n'
   end
   def test_specials
     %w{__FILE__ __LINE__ true false nil self}.each do |word|
-      tokenize( word )
+      tokenize word
       assert_next_token :constant, word
     end
     %w{__FILE__ __LINE__ true false nil self}.each do |word|
-      tokenize( "#{word}?" )
+      tokenize "#{word}?"
       assert_next_token :ident, "#{word}?"
     end
     %w{__FILE__ __LINE__ true false nil self}.each do |word|
-      tokenize( "#{word}!" )
+      tokenize "#{word}!"
       assert_next_token :ident, "#{word}!"
     end
     %w{__FILE__ __LINE__ true false nil self}.each do |word|
-      tokenize( "x.#{word}" )
+      tokenize "x.#{word}"
       skip_token 2
       assert_next_token :ident, word
     end
   end
   def test_pct_r
-    tokenize( '%r{foo#{x}bar}' )
+    tokenize '%r{foo#{x}bar}'
     assert_next_token :punct, "%r{"
     assert_next_token :regex, "", :region_open
     assert_next_token :regex, "foo"
@@ -214,7 +221,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
     assert_next_token :regex, "", :region_close
     assert_next_token :punct, "}"
-    tokenize( '%r-foo#{x}bar-' )
+    tokenize '%r-foo#{x}bar-'
     assert_next_token :punct, "%r-"
     assert_next_token :regex, "", :region_open
     assert_next_token :regex, "foo"
@@ -238,7 +245,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
   end
   def test_pct_w_brace
-    tokenize( '%w{foo bar baz}' )
+    tokenize '%w{foo bar baz}'
     assert_next_token :punct, "%w{"
     assert_next_token :string, '', :region_open
     assert_next_token :string, 'foo bar baz'
@@ -247,7 +254,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
   end
   def test_pct_w
-    tokenize( '%w-foo#{x} bar baz-' )
+    tokenize '%w-foo#{x} bar baz-'
     assert_next_token :punct, "%w-"
     assert_next_token :string, '', :region_open
     assert_next_token :string, 'foo#{x} bar baz'
@@ -256,7 +263,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
   end
   def test_pct_q
-    tokenize( '%q-hello #{world}-' )
+    tokenize '%q-hello #{world}-'
     assert_next_token :punct, "%q-"
     assert_next_token :string, '', :region_open
     assert_next_token :string, 'hello #{world}'
@@ -265,7 +272,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
   end
   def test_pct_s
-    tokenize( '%s-hello #{world}-' )
+    tokenize '%s-hello #{world}-'
     assert_next_token :punct, "%s-"
     assert_next_token :symbol, '', :region_open
     assert_next_token :symbol, 'hello #{world}'
@@ -274,7 +281,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
   end
   def test_pct_W
-    tokenize( '%W-foo#{x} bar baz-' )
+    tokenize '%W-foo#{x} bar baz-'
     assert_next_token :punct, "%W-"
     assert_next_token :string, '', :region_open
     assert_next_token :string, 'foo'
@@ -285,7 +292,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
   end
   def test_pct_Q
-    tokenize( '%Q-hello #{world}-' )
+    tokenize '%Q-hello #{world}-'
     assert_next_token :punct, "%Q-"
     assert_next_token :string, '', :region_open
     assert_next_token :string, 'hello '
@@ -295,7 +302,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
   end
   def test_pct_x
-    tokenize( '%x-ls /blah/#{foo}-' )
+    tokenize '%x-ls /blah/#{foo}-'
     assert_next_token :punct, "%x-"
     assert_next_token :string, '', :region_open
     assert_next_token :string, 'ls /blah/'
@@ -305,7 +312,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
   end
   def test_pct_string
-    tokenize( '%-hello #{world}-' )
+    tokenize '%-hello #{world}-'
     assert_next_token :punct, "%-"
     assert_next_token :string, '', :region_open
     assert_next_token :string, 'hello '
@@ -315,7 +322,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
   end
   def test_bad_pct_string
-    tokenize( '%0hello #{world}0' )
+    tokenize '%0hello #{world}0'
     assert_next_token :punct, "%"
     assert_next_token :number, '0'
     assert_next_token :ident, 'hello'
@@ -324,7 +331,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
   end
   def test_shift_left
-    tokenize( 'foo << 5' )
+    tokenize 'foo << 5'
     assert_next_token :ident, "foo"
     assert_next_token :normal, " "
     assert_next_token :punct, "<<"
@@ -333,14 +340,14 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
   end
   def test_shift_left_no_white
-    tokenize( 'foo<<5' )
+    tokenize 'foo<<5'
     assert_next_token :ident, "foo"
     assert_next_token :punct, "<<"
     assert_next_token :number, "5"
   end
   def test_here_doc_no_opts
-    tokenize( "foo <<EOF\n  foo\n  bar\n  baz\nEOF" )
+    tokenize "foo <<EOF\n  foo\n  bar\n  baz\nEOF"
     assert_next_token :ident, "foo"
     assert_next_token :normal, " "
     assert_next_token :punct, "<<"
@@ -352,7 +359,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
   end
   def test_here_doc_no_opts_missing_end
-    tokenize( "foo <<EOF\n  foo\n  bar\n  baz\n EOF" )
+    tokenize "foo <<EOF\n  foo\n  bar\n  baz\n EOF"
     assert_next_token :ident, "foo"
     assert_next_token :normal, " "
     assert_next_token :punct, "<<"
@@ -363,7 +370,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
   end
   def test_here_doc_float_right
-    tokenize( "foo <<-EOF\n  foo\n  bar\n  baz\n  EOF" )
+    tokenize "foo <<-EOF\n  foo\n  bar\n  baz\n  EOF"
     assert_next_token :ident, "foo"
     assert_next_token :normal, " "
     assert_next_token :punct, "<<-"
@@ -375,7 +382,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
   end
   def test_here_doc_single_quotes
-    tokenize( "foo <<'EOF'\n  foo\#{x}\n  bar\n  baz\nEOF" )
+    tokenize "foo <<'EOF'\n  foo\#{x}\n  bar\n  baz\nEOF"
     assert_next_token :ident, "foo"
     assert_next_token :normal, " "
     assert_next_token :punct, "<<'"
@@ -388,7 +395,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
   end
   def test_here_doc_double_quotes
-    tokenize( "foo <<\"EOF\"\n  foo\#{x}\n  bar\n  baz\nEOF" )
+    tokenize "foo <<\"EOF\"\n  foo\#{x}\n  bar\n  baz\nEOF"
     assert_next_token :ident, "foo"
     assert_next_token :normal, " "
     assert_next_token :punct, "<<\""
@@ -403,12 +410,12 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
   end
   def test_space
-    tokenize( "\n  \t\t\n\n\r\n" )
+    tokenize "\n  \t\t\n\n\r\n"
     assert_next_token :normal, "\n  \t\t\n\n\r\n"
   end
   def test_number
-    tokenize( "1 1.0 1e5 1.0e5 1_2.5 1_2.5_2 1_2.5_2e3_2" )
+    tokenize "1 1.0 1e5 1.0e5 1_2.5 1_2.5_2 1_2.5_2e3_2"
     assert_next_token :number, "1"
     skip_token
     assert_next_token :number, "1.0"
@@ -425,7 +432,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
   end
   def test_dquoted_string
-    tokenize( '"foo #{x} bar\"\n\tbaz\xA5b\5\1234"' )
+    tokenize '"foo #{x} bar\"\n\tbaz\xA5b\5\1234"'
     assert_next_token :punct, '"'
     assert_next_token :string, '', :region_open
     assert_next_token :string, 'foo '
@@ -442,7 +449,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
   end
   def test_squoted_string
-    tokenize( '\'foo #{x} bar\\\'\n\tbaz\\\\\xA5b\5\1234\'' )
+    tokenize '\'foo #{x} bar\\\'\n\tbaz\\\\\xA5b\5\1234\''
     assert_next_token :punct, "'"
     assert_next_token :string, "", :region_open
     assert_next_token :string, 'foo #{x} bar'
@@ -455,51 +462,51 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
   end
   def test_dot_selector
-    tokenize( 'foo.nil' )
+    tokenize 'foo.nil'
     skip_token
     assert_next_token :punct, "."
     assert_next_token :ident, "nil"
   end
   def test_dot_range_inclusive
-    tokenize( 'foo..nil' )
+    tokenize 'foo..nil'
     skip_token
     assert_next_token :punct, ".."
     assert_next_token :constant, "nil"
   end
   def test_dot_range_exclusive
-    tokenize( 'foo...nil' )
+    tokenize 'foo...nil'
     skip_token
     assert_next_token :punct, "..."
     assert_next_token :constant, "nil"
   end
   def test_dot_range_many
-    tokenize( 'foo.....nil' )
+    tokenize 'foo.....nil'
     skip_token
     assert_next_token :punct, "....."
     assert_next_token :constant, "nil"
   end
   def test_attribute
-    tokenize( '@var_foo' )
+    tokenize '@var_foo'
     assert_next_token :attribute, "@var_foo"
   end
   def test_global
-    tokenize( '$var_foo' )
+    tokenize '$var_foo'
     assert_next_token :global, "$var_foo"
-    tokenize( '$12' )
+    tokenize '$12'
     assert_next_token :global, "$12"
-    tokenize( '$/f' )
+    tokenize '$/f'
     assert_next_token :global, "$/"
-    tokenize( "$\n" )
+    tokenize "$\n"
     assert_next_token :global, "$"
   end
   def test_paren_delimiter
-    tokenize( '%w(a)' )
+    tokenize '%w(a)'
     assert_next_token :punct, "%w("
     assert_next_token :string, "", :region_open
     assert_next_token :string, "a"
@@ -508,7 +515,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
   end
   def test_division
-    tokenize( 'm / 3' )
+    tokenize 'm / 3'
     assert_next_token :ident, "m"
     assert_next_token :normal, " "
     assert_next_token :punct, "/"
@@ -517,7 +524,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
   end
   def test_regex
-    tokenize( 'm =~ /3/' )
+    tokenize 'm =~ /3/'
     assert_next_token :ident, "m"
     assert_next_token :normal, " "
     assert_next_token :punct, "=~"
@@ -530,7 +537,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
   end
   def test_heredoc_with_trailing_text
-    tokenize( "foo('here', <<EOF)\n  A heredoc.\nEOF\nfoo" )
+    tokenize "foo('here', <<EOF)\n  A heredoc.\nEOF\nfoo"
     assert_next_token :ident,  "foo"
     assert_next_token :punct,  "('"
     assert_next_token :string, '', :region_open
@@ -550,7 +557,7 @@ class TC_Syntax_Ruby < Test::Unit::TestCase
   end
   def test_multiple_heredocs
-    tokenize( <<'TEST' )
+    tokenize <<'TEST'
 foo('here', <<EOF, 'there', <<-'FOO', 'blah')
 First heredoc, right here.
 Expressions are #{allowed}
@@ -598,7 +605,7 @@ TEST
   end
   def test_carldr_bad_heredoc_001
-    tokenize( <<'TEST' )
+    tokenize <<'TEST'
 str = <<END
 here document #{1 + 1}
 END
@@ -663,4 +670,202 @@ TEST
     assert_next_token :punct, ")/"
     assert_next_token :number, "2"
   end
+  def test_heredoc_with_CRNL
+    tokenize "foo <<SRC\r\nSome text\r\nSRC\r\nfoo"
+    assert_next_token :ident, "foo"
+    assert_next_token :normal, " "
+    assert_next_token :punct, "<<"
+    assert_next_token :constant, "SRC"
+    assert_next_token :string, "", :region_open
+    assert_next_token :string, "\r\nSome text\r\n"
+    assert_next_token :string, "", :region_close
+    assert_next_token :constant, "SRC"
+    assert_next_token :normal, "\r\n"
+    assert_next_token :ident, "foo"
+  end
+  def test_question_mark_at_newline
+    tokenize "foo ?\n 'bar': 'baz'"
+    assert_next_token :ident, "foo"
+    assert_next_token :normal, " "
+    assert_next_token :punct, "?"
+    assert_next_token :normal, "\n "
+    assert_next_token :punct, "'"
+    assert_next_token :string, "", :region_open
+    assert_next_token :string, "bar"
+    assert_next_token :string, "", :region_close
+    assert_next_token :punct, "':"
+    assert_next_token :normal, " "
+    assert_next_token :punct, "'"
+    assert_next_token :string, "", :region_open
+    assert_next_token :string, "baz"
+    assert_next_token :string, "", :region_close
+    assert_next_token :punct, "'"
+  end
+  def test_question_mark_and_escaped_newline
+    tokenize "foo ?\\\n 'bar': 'baz'"
+    assert_next_token :ident, "foo"
+    assert_next_token :normal, " "
+    assert_next_token :punct, "?\\"
+    assert_next_token :normal, "\n "
+    assert_next_token :punct, "'"
+    assert_next_token :string, "", :region_open
+    assert_next_token :string, "bar"
+    assert_next_token :string, "", :region_close
+    assert_next_token :punct, "':"
+    assert_next_token :normal, " "
+    assert_next_token :punct, "'"
+    assert_next_token :string, "", :region_open
+    assert_next_token :string, "baz"
+    assert_next_token :string, "", :region_close
+    assert_next_token :punct, "'"
+  end
+  def test_highlighted_subexpression
+    tokenizer.set :expressions => :highlight
+    tokenize '"la la #{["hello", "world"].each { |f| puts "string #{f}" }}"'
+    assert_next_token :punct, '"'
+    assert_next_token :string, "", :region_open
+    assert_next_token :string, "la la "
+    assert_next_token :expr, "", :region_open
+    assert_next_token :expr, '#{'
+    assert_next_token :punct, '["'
+    assert_next_token :string, "", :region_open
+    assert_next_token :string, 'hello'
+    assert_next_token :string, "", :region_close
+    assert_next_token :punct, '",'
+    assert_next_token :normal, ' '
+    assert_next_token :punct, '"'
+    assert_next_token :string, "", :region_open
+    assert_next_token :string, "world"
+    assert_next_token :string, "", :region_close
+    assert_next_token :punct, '"].'
+    assert_next_token :ident, 'each'
+    assert_next_token :normal, ' '
+    assert_next_token :punct, '{'
+    assert_next_token :normal, ' '
+    assert_next_token :punct, '|'
+    assert_next_token :ident, 'f'
+    assert_next_token :punct, '|'
+    assert_next_token :normal, ' '
+    assert_next_token :ident, 'puts'
+    assert_next_token :normal, ' '
+    assert_next_token :punct, '"'
+    assert_next_token :string, "", :region_open
+    assert_next_token :string, "string "
+    assert_next_token :expr, "", :region_open
+    assert_next_token :expr, '#{'
+    assert_next_token :ident, 'f'
+    assert_next_token :expr, '}'
+    assert_next_token :expr, "", :region_close
+    assert_next_token :string, "", :region_close
+    assert_next_token :punct, '"'
+    assert_next_token :normal, ' '
+    assert_next_token :punct, '}'
+    assert_next_token :expr, '}'
+    assert_next_token :expr, "", :region_close
+    assert_next_token :string, "", :region_close
+    assert_next_token :punct, '"'
+  end
+  def test_expr_in_braces
+    tokenize '"#{f}"'
+    assert_next_token :punct, '"'
+    assert_next_token :string, "", :region_open
+    assert_next_token :expr, '#{f}'
+    assert_next_token :string, "", :region_close
+    assert_next_token :punct, '"'
+  end
+  def test_expr_in_braces_with_nested_braces
+    tokenize '"#{loop{break}}"'
+    assert_next_token :punct, '"'
+    assert_next_token :string, "", :region_open
+    assert_next_token :expr, '#{loop{break}}'
+    assert_next_token :string, "", :region_close
+    assert_next_token :punct, '"'
+  end
+  def test_expr_with_global_var
+    tokenize '"#$f"'
+    assert_next_token :punct, '"'
+    assert_next_token :string, "", :region_open
+    assert_next_token :expr, '#$f'
+    assert_next_token :string, "", :region_close
+    assert_next_token :punct, '"'
+  end
+  def test_expr_with_instance_var
+    tokenize '"#@f"'
+    assert_next_token :punct, '"'
+    assert_next_token :string, "", :region_open
+    assert_next_token :expr, '#@f'
+    assert_next_token :string, "", :region_close
+    assert_next_token :punct, '"'
+  end
+  def test_expr_with_class_var
+    tokenize '"#@@f"'
+    assert_next_token :punct, '"'
+    assert_next_token :string, "", :region_open
+    assert_next_token :expr, '#@@f'
+    assert_next_token :string, "", :region_close
+    assert_next_token :punct, '"'
+  end
+  def test_qmark_space
+    tokenize "? "
+    assert_next_token :punct, "?"
+    assert_next_token :normal, " "
+  end
+  def test_capitalized_method
+    tokenize "obj.Foo"
+    skip_token 2
+    assert_next_token :ident, "Foo"
+  end
+  def test_hexadecimal_literal
+    tokenize "0xDEADbeef 0X1234567890ABCDEFG"
+    assert_next_token :number, "0xDEADbeef"
+    skip_token
+    assert_next_token :number, "0X1234567890ABCDEF"
+    assert_next_token :constant, "G"
+  end
+  def test_binary_literal
+    tokenize "0b2 0b0 0b101 0B123"
+    assert_next_token :number, "0"
+    assert_next_token :ident, "b2"
+    skip_token
+    assert_next_token :number, "0b0"
+    skip_token
+    assert_next_token :number, "0b101"
+    skip_token
+    assert_next_token :number, "0B123"
+  end
+  def test_octal_literal
+    tokenize "0o9 0o12345670abc 0O12345678"
+    assert_next_token :number, "0"
+    assert_next_token :ident, "o9"
+    skip_token
+    assert_next_token :number, "0o12345670"
+    assert_next_token :ident, "abc"
+    skip_token
+    assert_next_token :number, "0O12345678"
+  end
+  def test_decimal_literal
+    tokenize "0dA 0d1234567890abc 0D1234567890"
+    assert_next_token :number, "0"
+    assert_next_token :ident, "dA"
+    skip_token
+    assert_next_token :number, "0d1234567890"
+    assert_next_token :ident, "abc"
+    skip_token
+    assert_next_token :number, "0D1234567890"
+  end
 end

data/test/syntax/tokenizer_testcase.rb ADDED

@@ -0,0 +1,40 @@
+$:.unshift File.dirname(__FILE__) + "/../../lib"
+require 'test/unit'
+require 'syntax'
+class TokenizerTestCase < Test::Unit::TestCase
+  def self.syntax( type )
+    class_eval <<-EOF
+      def setup
+        @tokenizer = Syntax.load(#{type.inspect})
+      end
+    EOF
+  end
+  def default_test
+  end
+  private
+    attr_reader :tokenizer
+    def tokenize( string )
+      @tokens = []
+      @tokenizer.tokenize( string ) { |tok| @tokens << tok }
+    end
+    def assert_next_token(group, lexeme, instruction=:none)
+      assert false, "no tokens in stack" if @tokens.nil? or @tokens.empty?
+      assert_equal [group, lexeme, instruction],
+        [@tokens.first.group, @tokens.first, @tokens.shift.instruction]
+    end
+    def assert_no_next_token
+      assert @tokens.empty?
+    end
+    def skip_token( n=1 )
+      n.times { @tokens.shift } unless @tokens.nil? || @tokens.empty?
+    end
+end

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
-rubygems_version: 0.8.8
+rubygems_version: 0.8.10
 specification_version: 1
 name: syntax
 version: !ruby/object:Gem::Version
-  version: 0.7.0
-date: 2005-03-23
+  version: 1.0.0
+date: 2005-06-18
 summary: Syntax is Ruby library for performing simple syntax highlighting.
 require_paths:
   - lib
-email: jgb3@email.byu.edu
+email: jamis@jamisbuck.org
 homepage:
 rubyforge_project:
 description:
@@ -47,6 +47,7 @@ files:
   - test/syntax/tc_ruby.rb
   - test/syntax/tc_xml.rb
   - test/syntax/tc_yaml.rb
+  - test/syntax/tokenizer_testcase.rb
 test_files:
   - test/ALL-TESTS.rb
 rdoc_options: []