RubyGems - breakout_parser - Versions diffs - 0.0.23 → 0.0.31 - Mend

breakout_parser 0.0.23 → 0.0.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

checksums.yaml +15 -0
data/ChangeLog +19 -0
data/README +23 -11
data/ext/breakout_parser/extconf.rb +10 -5
data/ext/breakout_parser/parser.h +27 -0
data/ext/breakout_parser/parser.l +22 -6
data/ext/breakout_parser/parser.y +294 -126
data/ext/breakout_parser/ruby_ext.c +51 -22
data/ext/breakout_parser/ruby_ext.h +2 -0
data/spec/links_only_parser_spec.rb +916 -0
data/spec/obj_proxy.rb +22 -0
data/spec/parser_examples_spec.rb +115 -0
data/spec/parser_spec.rb +146 -34
data/spec/spec_helper.rb +1 -0
metadata +89 -62
data/ext/breakout_parser/lex.yy.c +0 -2996
data/ext/breakout_parser/parser.tab.c +0 -2482
data/ext/breakout_parser/parser.tab.h +0 -113

data/ext/breakout_parser/ruby_ext.c CHANGED Viewed

@@ -1,6 +1,13 @@
 #ifdef RUBY_VERSION
 #include "ruby.h"
+#include "ruby_ext.h"
+#ifdef RUBY_19
+  #include "ruby/st.h"
+#else
+  #include "st.h"
+#endif
 void Init_breakout_parser();
 VALUE method_parse(int, VALUE*, VALUE);
@@ -20,55 +27,78 @@ extern const char *space_name;
 extern size_t in_buf_len, bufsize, space_name_len;
 extern const char *site_url;
+extern const char *large_files_url;
 extern size_t site_url_len;
+extern size_t large_files_url_len;
+extern VALUE meta_attributes;
 VALUE vcs_url;
+char **temp_names, **temp_values;
 extern int parse_links_only, absolute_urls;
 VALUE do_parse(int argc, VALUE *argv, VALUE self) {
     VALUE s, text, r_space_name;
     char *p;
-    if( argc < 2 || argc > 5 ){
-        rb_raise(rb_eArgError, "wrong number of arguments (%d for 2..5)", argc);
-        return rb_str_new("",0); // unreachable code, but for double safety
+    if (argc < 2 || argc > 7) {
+        rb_raise(rb_eArgError, "wrong number of arguments (%d for 2..7)", argc);
     }
     text = argv[0];
     r_space_name = argv[1];
     site_url = NULL; site_url_len = 0;
-    if( argc > 2 && argv[2] != Qnil && argv[2] != Qfalse ){
+    if (argc > 2 && RTEST(argv[2])){
         site_url     = StringValueCStr(argv[2]);
         site_url_len = site_url ? strlen(site_url) : 0;
         while( site_url && site_url_len > 0 && site_url[site_url_len-1] == '/' ) {
             // skip trailing slashes
-            site_url_len--;
+            site_url_len--;
         }
     }
-    vcs_url = NULL;
-    if( argc > 3 && argv[3] != Qnil && argv[3] != Qfalse ){
+    vcs_url = rb_str_new2("");
+    if (argc > 3 && RTEST(argv[3])) {
         vcs_url = argv[3];
     }
     absolute_urls = 0;
-    if( argc > 4 && argv[4] != Qnil && argv[4] != Qfalse ){
+    if (argc > 4 && RTEST(argv[4])) {
         absolute_urls = 1;
     }
-    if(!text || text == Qnil || text == Qfalse){
-        // NULL input string
-        return rb_str_new("",0);
+    large_files_url = NULL; large_files_url_len = 0;
+    if (argc > 5 && RTEST(argv[5])) {
+        large_files_url = StringValueCStr(argv[5]);
+        large_files_url_len = large_files_url ? strlen(large_files_url) : 0;
     }
-    p          = RSTRING(text)->ptr;
-    in_buf_len = RSTRING(text)->len;
+    meta_attributes = rb_hash_new();
+    if (argc > 6 && RTEST(argv[6])) {
+        // More information about object types may be found in ruby.h
+        if (TYPE(argv[6]) != T_HASH) {
+            rb_raise(rb_eTypeError, "wrong type of meta attributes values; expected Hash, given %d", TYPE(argv[6]));
+        }
+        if (RHASH_SIZE(argv[6]) > 0) {
+            meta_attributes = argv[6];
+        }
+    }
+    if(!text || !(RTEST(text))){
+        // empty input string
+        return rb_str_new2("");
+    }
+    p          = RSTRING_PTR(text);
+    in_buf_len = RSTRING_LEN(text);
     if(!p || in_buf_len <= 0){
         // empty input string
-        return rb_str_new("",0);
+        return rb_str_new2("");
     }
     while( in_buf_len > 0 && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')){
@@ -84,25 +114,24 @@ VALUE do_parse(int argc, VALUE *argv, VALUE self) {
     bufsize    = 1 + in_buf_len + in_buf_len/3; // reserve 30% of in_buf size
     if(bufsize<0x100) bufsize = 0x100;
-    buf = ALLOC_N(char, bufsize);
+    buf = ALLOC_N(char, bufsize);
     bufptr = buf;
-    // protect buf from GC (theoretically)
-    rb_iv_set(self,"@obj",Data_Wrap_Struct(rb_cData,NULL,NULL,buf));
     yyparse();
     yylex_destroy();
-//    printf("[.] yyparse() ended\n");
+    // printf("[.] yyparse() ended\n");
     // make ruby string from our char[] data
-    s = rb_str_new(buf,bufptr-buf);
+    s = rb_str_new(buf, bufptr-buf);
-    // cleanup
-    rb_iv_set(self,"@obj",Qnil);
     xfree(buf);
     buf = bufptr = NULL;
     bufsize = 0;
+    #ifdef RUBY_19
+      s = rb_funcall(s, rb_intern("force_encoding"), 1, rb_str_new2("UTF-8")); // encode string to UTF-8
+    #endif
     return s;
 }

data/ext/breakout_parser/ruby_ext.h ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ void yyparse();
2	+ void yylex_destroy();

data/spec/links_only_parser_spec.rb ADDED Viewed

@@ -0,0 +1,916 @@
+# -*- coding: utf-8 -*-
+require 'spec_helper'
+require File.dirname(__FILE__) + '/obj_proxy'
+describe 'BreakoutParser' do
+  def self.hex_string s
+    ## sexier, but not runs on ruby 1.8.6 patchlevel 383 i386-mingw32:
+    #s.each_byte.to_a.map{ |c| "%02x" % c }.join
+    r = ''
+    s.each_byte{ |c| r << "%02x" % c }
+    r
+  end
+  def hex_string s; self.class.hex_string(s); end
+  it 'accepts from 2 to 7 arguments' do
+    expect { BreakoutParser.parse }.to raise_error(ArgumentError, "wrong number of arguments (0 for 2..7)")
+    expect { BreakoutParser.parse('a') }.to raise_error(ArgumentError, "wrong number of arguments (1 for 2..7)")
+    [9, 10].each do |argc|
+      expect { BreakoutParser.parse(*(['a'] * (argc - 1) + [{}])) }.to raise_error(ArgumentError, "wrong number of arguments (#{argc} for 2..7)")
+    end
+    (2..6).each do |argc|
+      expect { BreakoutParser.parse(*(['a'] * argc)) }.to_not raise_error(ArgumentError)
+    end
+    expect { BreakoutParser.parse(*(['a'] * 6 + [{}])) }.to_not raise_error(ArgumentError)
+  end
+  it 'converts \n to <br />' do
+    parse("aaa\nbbb").should match(%r"aaa ?<br /> ?bbb")
+  end
+  it "parses 1M file #1" do
+    s = 'a' * 1024 * 1024
+    parse(s).size.should == s.size
+  end
+  it "parses 1M file #2" do
+    s = 'a' + (' ' * 1024 * 1024) + 'b'
+    parse(s).should == 'a b'
+  end
+  it "parses 1M file #3" do
+    s = 'a ' * 1024 * 512
+    parse(s).size.should == s.strip.size
+  end
+  it "handles nil & false text well" do
+    parse(false).should == ""
+    parse(false, :space_name => false).should == ""
+    parse("",    :space_name => false).should == ""
+    parse(nil).should == ""
+    parse(nil,   :space_name => nil).should == ""
+    parse("",    :space_name => nil).should == ""
+  end
+  it "handles nil space_name well" do
+    lambda{
+      parse("#123", :space_name => nil)
+    }.should raise_error(TypeError)
+  end
+  it "handles false space_name well" do
+    lambda{
+      parse("#123", :space_name => false)
+    }.should raise_error(TypeError)
+  end
+  it "strips tailing spaces and newlines" do
+    parse("aaa         ").should == "aaa"
+    parse("aaa\t\t\t\t\t\t").should == "aaa"
+    parse("aaa\r\r\r\r\r").should == "aaa"
+    parse("aaa\n\n\n\n\n").should == "aaa"
+    parse("aaa\r\n\r\n\r\n\r\n").should == "aaa"
+    parse("aaa\r\n\t   \t  \n  \r   \n   \t  \t\n\r   ").should == "aaa"
+  end
+  it "strips leading spaces and newlines" do
+    parse("         aaa").should == "aaa"
+    parse("\t\t\t\t\t\taaa").should == "aaa"
+    parse("\r\r\r\r\raaa").should == "aaa"
+    parse("\n\n\n\n\naaa").should == "aaa"
+    parse("\r\n\r\n\r\n\r\naaa").should == "aaa"
+    parse("\r\n\t   \t  \n  \r   \n   \t  \t\n\r   aaa").should == "aaa"
+  end
+  it "converts each newline to <br />" do
+    parse("aaa\n\nbbb").should == "aaa<br /><br />bbb"
+    parse("aaa\n  \nbbb").should == "aaa<br /><br />bbb"
+    parse("aaa\n\n\nbbb").should == "aaa<br /><br /><br />bbb"
+    parse("aaa\n   \n    \nbbb").should == "aaa<br /><br /><br />bbb"
+    parse("aaa\r\n   \r\n    \r\nbbb").should == "aaa<br /><br /><br />bbb"
+    parse("aaa\n   \n\n    \nbbb").should == "aaa<br /><br /><br /><br />bbb"
+    parse("aaa\n   \n\n\n    \nbbb").should == "aaa" + "<br />"*5 + "bbb"
+    parse("aaa\n\n\n\n\n\n\nbbb").should == "aaa" + "<br />"*7 + "bbb"
+    parse("aaa\r\n\r\n\r\nbbb").should == "aaa" + "<br />"*3 + "bbb"
+  end
+###############################################################################
+  describe "@code@" do
+    it "only" do
+      parse("@smth@").should == '@smth@'
+    end
+    it "at beginning" do
+      parse("@smth@\nxxx").should == '@smth@<br />xxx'
+    end
+    it "in the middle of text" do
+      parse("xxx @smth@ yyy").should == 'xxx @smth@ yyy'
+    end
+    it "parses @multiline\\nsmth@" do
+      parse("@multiline\nsmth@").should == "@multiline<br />smth@"
+    end
+    it "not confuses" do
+      parse("look at @this code@ and mail me at xxx@yyy.com").should ==
+        'look at @this code@ and mail me at <a href="mailto:xxx@yyy.com">xxx@yyy.com</a>'
+    end
+    it "w/o closing tag" do
+      parse("@smth").should == '@smth'
+    end
+    it "nesting1 w/o closing tags" do
+      parse("@smth1 @smth2").should == '@smth1 @smth2'
+    end
+    it "nesting2 w/o closing tags" do
+      parse("@smth1  @smth2").should == '@smth1 @smth2'
+    end
+    it "two times" do
+      parse("@code1@ @code2@").should == "@code1@ @code2@"
+      parse("@code1@   @code2@").should == "@code1@ @code2@"
+      parse("  @code1@   @code2@  ").should == "@code1@ @code2@"
+      parse("  @code1@ xxx @code2@  ").should == "@code1@ xxx @code2@"
+    end
+  end
+###############################################################################
+  describe "*bold*" do
+    it "only" do
+      parse("*bold*").should == '*bold*'
+    end
+    it "at beginning" do
+      parse("*bold*\nxxx").should == "*bold*<br />xxx"
+    end
+    it "in the middle of text" do
+      parse("xxx *bold* yyy").should == "xxx *bold* yyy"
+    end
+    it "parses *multiline\\nbold*" do
+      parse("*multiline\nbold*").should == "*multiline<br />bold*"
+    end
+    it "skips lone star inside bold block" do
+      parse("*aaa * bbb*").should == "*aaa * bbb*"
+    end
+    it "skips lone star" do
+      parse("aaa * bbb").should == 'aaa * bbb'
+    end
+    it "w/o closing tag" do
+      parse("*bold").should == '*bold'
+    end
+    it "nesting1 w/o closing tags" do
+      parse("*bold1 *bold2").should == "*bold1 *bold2"
+    end
+    it "nesting2 w/o closing tags" do
+      parse("*bold1  *bold2").should == "*bold1 *bold2"
+    end
+    it "not parses '*.*'" do
+      parse("*.*").should == "*.*"
+      parse(" *.* ").should == "*.*"
+      parse("aaa *.* bbb").should == "aaa *.* bbb"
+    end
+    it "not parses '*.something'" do
+      parse("*.exe").should == "*.exe"
+      parse(" *.exe ").should == "*.exe"
+      parse("aaa *.exe bbb").should == "aaa *.exe bbb"
+    end
+  end
+###############################################################################
+  describe "_italic_" do
+    it "only" do
+      s = "_italic_"
+      parse(s).should == s
+    end
+    it "at beginning" do
+      s = "_italic_\nxxx"
+      parse(s).should == s.gsub("\n","<br />")
+    end
+    it "in the middle of text" do
+      s = "xxx _italic_ yyy"
+      parse(s).should == s
+    end
+    it "parses _multiline\\nitalic_" do
+      s = "_multiline\nitalic_"
+      parse(s).should == s.gsub("\n","<br />")
+    end
+    it "skips lone underscore inside italic block" do
+      s = "_aaa _ bbb_"
+      parse(s).should == s
+    end
+    it "skips lone underscore" do
+      s = "aaa _ bbb"
+      parse(s).should == s
+    end
+    it "w/o closing tag" do
+      s = "_italic"
+      parse(s).should == s
+    end
+    it "nesting1 w/o closing tags" do
+      s = "_italic1 _italic2"
+      parse(s).should == s
+    end
+    it "nesting2 w/o closing tags" do
+      s = "_italic1  _italic2"
+      parse(s).should == s.gsub(/ +/,' ')
+    end
+  end
+###############################################################################
+  describe "combinations" do
+    it "bold in italic" do
+      s = "_aaa *bbb* ccc_"
+      parse(s).should == s
+    end
+    it "bold in italic - no closing1" do
+      s = "_aaa *bbb* ccc"
+      parse(s).should == s
+    end
+    it "bold in italic - no closing2" do
+      s = "_aaa *bbb ccc"
+      parse(s).should == s
+    end
+    it "italic in bold" do
+      s = "*aaa _bbb_ ccc*"
+      parse(s).should == s
+    end
+    it "italic in bold - no closing1" do
+      s = "*aaa _bbb_ ccc"
+      parse(s).should == s
+    end
+    it "italic in bold - no closing2" do
+      s = "*aaa _bbb ccc"
+      parse(s).should == s
+    end
+    {'ul' => '*', 'ol' => '#'}.each do |l,c|
+      it "raw text link inside #{l.upcase}> #1" do
+        s = "#{c} aaa http://www.ru"
+        parse(s).should == "#{c} aaa <a rel=\"nofollow\" href=\"http://www.ru\">http://www.ru</a>"
+      end
+      it "raw text link inside #{l.upcase}> #2" do
+        s = "#{c} aaa http://www.ru\n#{c} bbb"
+        parse(s).should == "#{c} aaa <a rel=\"nofollow\" href=\"http://www.ru\">http://www.ru</a><br />#{c} bbb"
+      end
+      it "raw text link inside #{l.upcase}> #3" do
+        s = "#{c} http://www.ru"
+        parse(s).should == "#{c} <a rel=\"nofollow\" href=\"http://www.ru\">http://www.ru</a>"
+      end
+      it "raw text link inside #{l.upcase}> #4" do
+        s = "#{c} aaa http://www.ru bbb"
+        parse(s).should == "#{c} aaa <a rel=\"nofollow\" href=\"http://www.ru\">http://www.ru</a> bbb"
+      end
+      it "two links inside #{l.upcase}>" do
+        s = "#{c} aaa http://www.ru http://ya.ru bbb"
+        parse(s).should == "#{c} aaa <a rel=\"nofollow\" href=\"http://www.ru\">http://www.ru</a> <a rel=\"nofollow\" href=\"http://ya.ru\">http://ya.ru</a> bbb"
+      end
+    end
+  end
+###############################################################################
+  describe "IGNORES unnumbered lists" do
+    it "should ignore" do
+      parse("* a\n* b\n* c").should == "* a<br />* b<br />* c"
+    end
+    it "two lists" do
+      s = "* a\n* b\n* c"
+      s = s + "\nxxx\n" + s
+      parse(s).should == "* a<br />* b<br />* c<br />xxx<br />* a<br />* b<br />* c"
+    end
+    it "in middle of text when begins with space" do
+      parse("hello\n * a\n  * b\n * c\nworld").should ==
+        "hello<br />* a<br />* b<br />* c<br />world"
+    end
+    it "in middle of text" do
+      parse("hello\n* a\n* b\n* c\nworld").should ==
+        "hello<br />* a<br />* b<br />* c<br />world"
+    end
+    it "after blank line" do
+      parse("hello\n\n * a\n * b\n * c\nworld").should ==
+        "hello<br /><br />* a<br />* b<br />* c<br />world"
+    end
+  end
+###############################################################################
+  describe "IGNORES numbered lists" do
+    it "should ignore" do
+      parse("# a\n# b\n# c").should == "# a<br /># b<br /># c"
+    end
+    it "two lists" do
+      s = "# a\n# b\n# c"
+      s = s + "\nxxx\n" + s
+      parse(s).should == h(s).gsub("\n","<br />")
+    end
+    it "in middle of text when begins with space" do
+      parse("hello\n # a\n  # b\n # c\nworld").should ==
+        "hello<br /># a<br /># b<br /># c<br />world"
+    end
+    it "in middle of text" do
+      parse("hello\n# a\n# b\n# c\nworld").should ==
+        "hello<br /># a<br /># b<br /># c<br />world"
+    end
+    it "after blank line" do
+      parse("hello\n\n # a\n # b\n # c\nworld").should ==
+        "hello<br /><br /># a<br /># b<br /># c<br />world"
+    end
+  end
+###############################################################################
+  1.upto(5) do |lvl|
+    describe "H#{lvl}" do
+      it "at the beginning" do
+        s = "h#{lvl}. xxx"
+        parse(s).should == h(s)
+      end
+      it "after 1 line of text" do
+        s = "abcd\nh#{lvl}. xxx"
+        parse(s).should == h(s).gsub("\n","<br />")
+      end
+      it "after 2 lines of text" do
+        s = "abcd\ndefgh\nh#{lvl}. xxx"
+        parse(s).should == h(s).gsub("\n","<br />")
+      end
+      it "in middle of other words" do
+        s = "abcd defgh h#{lvl}. xxx yyy"
+        parse(s).should == h(s).gsub("\n","<br />")
+      end
+      it "in middle of other lines" do
+        s = "abcd defgh\nh#{lvl}. xxx\nyyy"
+        parse(s).should == h(s).gsub("\n","<br />")
+      end
+      it "does nothing special on spaces in id" do
+        s = "h#{lvl}. xxx   yyy z"
+        parse(s).should == h(s).gsub(/ +/,' ')
+      end
+      it "does nothing special on underscores in id" do
+        s = "h#{lvl}. xxx___yyy_z"
+        parse(s).should == h(s)
+      end
+      it "does nothing special on dashes in id" do
+        s = "h#{lvl}. xxx---yyy-z"
+        parse(s).should == h(s)
+      end
+      it "does nothing special on dots in id" do
+        s = "h#{lvl}. xxx...yyy.z"
+        parse(s).should == h(s)
+      end
+      %w'Ъ ъ : ; , привет" \' ! < >'.each do |c|
+        it "does nothing special on \"#{c}\"" do
+          s = "h#{lvl}. xxx#{c}yyy"
+          parse(s).should == h(s)
+        end
+      end
+    end
+  end
+###############################################################################
+  describe "raw text links" do
+    describe "starting with 'http://'" do
+      it "at the beginning" do
+        parse("http://asd.ru").should == "<a rel=\"nofollow\" href=\"http://asd.ru\">http://asd.ru</a>"
+      end
+      it "in middle of other words" do
+        parse("aaa bbb ccc http://asd.ru ddd eee fff").should ==
+          "aaa bbb ccc <a rel=\"nofollow\" href=\"http://asd.ru\">http://asd.ru</a> ddd eee fff"
+      end
+      it "in new line" do
+        parse("aaa bbb ccc\nhttp://asd.ru\nddd eee fff").should match(
+          %r"aaa bbb ccc ?<br /> ?<a rel=\"nofollow\" href=\"http://asd.ru\">http://asd.ru</a> ?<br /> ?ddd eee fff"
+        )
+      end
+      it "escapes '&' in link _text_" do
+        parse("http://asd.ru/?a=1&b=2").should == "<a rel=\"nofollow\" href=\"http://asd.ru/?a=1&b=2\">http://asd.ru/?a=1&amp;b=2</a>"
+      end
+      it "parses https://" do
+        parse("https://asd.ru").should == "<a rel=\"nofollow\" href=\"https://asd.ru\">https://asd.ru</a>"
+      end
+      %w', .'.each do |c|
+        it "stops parsing on \"#{c} \"" do
+          parse("http://asd.ru#{c}").should == "<a rel=\"nofollow\" href=\"http://asd.ru\">http://asd.ru</a>#{c}"
+          parse(" http://asd.ru#{c} ").should == "<a rel=\"nofollow\" href=\"http://asd.ru\">http://asd.ru</a>#{c}"
+          parse(" http://asd.ru#{c} hello!").should == "<a rel=\"nofollow\" href=\"http://asd.ru\">http://asd.ru</a>#{c} hello!"
+          parse("xxx http://asd.ru#{c} hello!").should == "xxx <a rel=\"nofollow\" href=\"http://asd.ru\">http://asd.ru</a>#{c} hello!"
+          parse(" http://asd.ru/#{c} hello!").should == "<a rel=\"nofollow\" href=\"http://asd.ru/\">http://asd.ru/</a>#{c} hello!"
+          parse(" http://aaa.com#{c} http://bbb.com").should ==
+            "<a rel=\"nofollow\" href=\"http://aaa.com\">http://aaa.com</a>#{c} <a rel=\"nofollow\" href=\"http://bbb.com\">http://bbb.com</a>"
+        end
+      end
+    end
+    describe "starting with 'www.'" do
+      it "at the beginning" do
+        parse("www.ru").should == "<a rel=\"nofollow\" href=\"http://www.ru\">www.ru</a>"
+      end
+      it "in middle of other words" do
+        parse("aaa bbb ccc www.ru ddd eee fff").should ==
+          "aaa bbb ccc <a rel=\"nofollow\" href=\"http://www.ru\">www.ru</a> ddd eee fff"
+      end
+      it "in new line" do
+        parse("aaa bbb ccc\nwww.ru\nddd eee fff").should match(
+          %r"aaa bbb ccc ?<br /> ?<a rel=\"nofollow\" href=\"http://www.ru\">www.ru</a> ?<br /> ?ddd eee fff"
+        )
+      end
+      it "escapes '&' in link _text_" do
+        parse("www.ru/?a=1&b=2").should == "<a rel=\"nofollow\" href=\"http://www.ru/?a=1&b=2\">www.ru/?a=1&amp;b=2</a>"
+      end
+      %w', .'.each do |c|
+        it "stops parsing on \"#{c} \"" do
+          parse("www.ru#{c}").should == "<a rel=\"nofollow\" href=\"http://www.ru\">www.ru</a>#{c}"
+          parse(" www.ru#{c} ").should == "<a rel=\"nofollow\" href=\"http://www.ru\">www.ru</a>#{c}"
+          parse(" www.ru#{c} hello!").should == "<a rel=\"nofollow\" href=\"http://www.ru\">www.ru</a>#{c} hello!"
+          parse("xxx www.ru#{c} hello!").should == "xxx <a rel=\"nofollow\" href=\"http://www.ru\">www.ru</a>#{c} hello!"
+          parse(" www.ru/#{c} hello!").should == "<a rel=\"nofollow\" href=\"http://www.ru/\">www.ru/</a>#{c} hello!"
+          parse(" www.aaa.com#{c} www.bbb.com").should ==
+            "<a rel=\"nofollow\" href=\"http://www.aaa.com\">www.aaa.com</a>#{c} <a rel=\"nofollow\" href=\"http://www.bbb.com\">www.bbb.com</a>"
+        end
+      end
+    end
+    describe 'e-mails' do
+      it "at the beginning" do
+        parse("aaa@bbb.com").should == "<a href=\"mailto:aaa@bbb.com\">aaa@bbb.com</a>"
+      end
+      it "in middle of other words" do
+        parse("aaa bbb ccc xx@yy.cn ddd eee fff").should ==
+          "aaa bbb ccc <a href=\"mailto:xx@yy.cn\">xx@yy.cn</a> ddd eee fff"
+      end
+      it "in new line" do
+        parse("aaa bbb ccc\naa.bb@cc.dd.ee\nddd eee fff").should match(
+          %r"aaa bbb ccc ?<br /> ?<a href=\"mailto:aa.bb@cc.dd.ee\">aa.bb@cc.dd.ee</a> ?<br /> ?ddd eee fff"
+        )
+      end
+      %w', .'.each do |c|
+        it "stops parsing on \"#{c} \"" do
+          parse("a-b@c-d.efghjikl#{c}").should == "<a href=\"mailto:a-b@c-d.efghjikl\">a-b@c-d.efghjikl</a>#{c}"
+          parse(" a-b@c-d.efghjikl#{c} ").should == "<a href=\"mailto:a-b@c-d.efghjikl\">a-b@c-d.efghjikl</a>#{c}"
+          parse(" a-b@c-d.efghjikl#{c} hello!").should == "<a href=\"mailto:a-b@c-d.efghjikl\">a-b@c-d.efghjikl</a>#{c} hello!"
+          parse("xxx a-b@c-d.efghjikl#{c} hello!").should == "xxx <a href=\"mailto:a-b@c-d.efghjikl\">a-b@c-d.efghjikl</a>#{c} hello!"
+          parse(" a-b@c-d.efghjikl#{c} hello!").should == "<a href=\"mailto:a-b@c-d.efghjikl\">a-b@c-d.efghjikl</a>#{c} hello!"
+          parse(" www@aaa.com#{c} www@bbb.com").should ==
+            "<a href=\"mailto:www@aaa.com\">www@aaa.com</a>#{c} <a href=\"mailto:www@bbb.com\">www@bbb.com</a>"
+        end
+      end
+      it "not parses bad emails" do
+        s="a@b.c a@b a.b@c a.b@@c a@b@c.d a#b@c.d"
+        parse(s).should == s
+      end
+    end
+  end
+###############################################################################
+  describe "#ticketNum ticket links" do
+    it "at the beginning" do
+      parse("#1234").should == '<a href="/spaces/test_space/tickets/1234">#1234</a>'
+    end
+    it "in middle of other words" do
+      parse("aaa bbb ccc #3476 ddd eee fff").should ==
+        'aaa bbb ccc <a href="/spaces/test_space/tickets/3476">#3476</a> ddd eee fff'
+    end
+    it "in new line" do
+      parse("aaa bbb ccc\n#1234\nddd eee fff").should match(
+        %r|aaa bbb ccc ?<br /> ?<a href="/spaces/test_space/tickets/1234">#1234</a> ?<br /> ?ddd eee fff|
+      )
+    end
+    it "ignores non-digits" do
+      parse("#1234d").should == '#1234d'
+      parse("#xxx").should == '#xxx'
+    end
+    ".,;!?:-".each_char do |c|
+      it "uses '#{c}' as separator" do
+        l = '<a href="/spaces/test_space/tickets/1234">#1234</a>'
+        t = "L#{c}L"
+        parse(t.gsub('L','#1234')).should == t.gsub('L',l)
+        t = "L#{c}#{c}L"
+        parse(t.gsub('L','#1234')).should == t.gsub('L',l)
+        t = "#{c}L#{c}L#{c}"
+        parse(t.gsub('L','#1234')).should == t.gsub('L',l)
+      end
+    end
+  end
+###############################################################################
+  [
+    %w'<pre><code> </code></pre>',
+    %w'<pre> </pre>',
+    %w'<notextile> </notextile>'
+  ].each do |ot,ct|
+    # ot - opening tag
+    # ct - closing tag
+#    if ot == '<notextile>'
+#      ote,cte = '',''
+#    else
+      ote,cte = ot,ct
+#    end
+    describe "#{ot}..#{ct}" do
+      it "works" do
+        s = <<-EOF
+      for ( n = 0; n < max_size && \
+             (c = getc( yyin )) != EOF && c != '\\n'; ++n ) \
+        buf[n] = (char) c; \
+        EOF
+        parse("#{ot}#{s.strip}#{ct}").should ==
+          h("#{ot}#{s.strip}#{ct}").gsub(/[ \t]+/,' ').gsub("\n","<br />")
+        s = <<-EOF
+            while ( 1 < 2 ) do
+              puts "<b>12345\\t54321</b>"
+              // *bold* comment
+              // _italic_ comment
+            end
+            ---
+            * aaa
+            * bbb
+            * ccc
+        EOF
+        parse("#{ot}#{s.strip}#{ct}").should ==
+          h("#{ote}#{s.strip}#{cte}").gsub(/[ \t]+/,' ').gsub(/\n */,"<br />")
+      end
+      it "not parses *bold*" do
+        s = "#{ot} *bold*#{ct}"
+        parse(s).should == h(s)
+      end
+      it "not parses _italic_" do
+        s = "#{ot} _italic_#{ct}"
+        parse(s).should == h(s)
+      end
+      it "not parses UL lists" do
+        s = "#{ot}\n * l1\n * l2\n * l3#{ct}"
+        parse(s).should == h(s).gsub("\n ","<br />")
+      end
+      it "not parses OL lists" do
+        s = "#{ot}\n # l1\n # l2\n # l3#{ct}"
+        parse(s).should == h(s).gsub("\n ","<br />")
+      end
+      it "not parses H1..H5" do
+        1.upto(5) do |i|
+          s = "#{ot}\nh#{i}. zzzzzzz\n#{ct}"
+          parse(s).should == h(s).gsub("\n","<br />")
+        end
+      end
+      it "parses raw text links" do
+        s = "#{ot}xxx http://www.ru yyy#{ct}"
+        parse(s).should == "#{h(ote)}xxx <a rel=\"nofollow\" href=\"http://www.ru\">http://www.ru</a> yyy#{h(cte)}"
+        s = "#{ot}http://www.ru#{ct}"
+        parse(s).should == "#{h(ote)}<a rel=\"nofollow\" href=\"http://www.ru\">http://www.ru</a>#{h(cte)}"
+      end
+      it "keeps newlines" do
+        s = "#{ot}aaa\nbbb#{ct}"
+        parse(s).should == h(s).gsub("\n","<br />")
+        s = "#{ot}aaa\n\nbbb\nccc#{ct}"
+        parse(s).should == h(s).gsub("\n","<br />")
+      end
+      it "w/o closing tags" do
+        s = "#{ot}aaa"
+        parse(s).should == h(s)
+      end
+      it "in middle of text" do
+        s = "xxx #{ot}yyyy#{ct} jjj"
+        parse(s).should == h(s)
+      end
+      it "with 2 instances" do
+        s = "xxx #{ot}yyyy#{ct} <jjj> #{ot}asdkjaslkd#{ct} END"
+        parse(s).should == h(s)
+      end
+      it "works with unicode" do
+        s = "привет #{ot} жжж #{ct} пока!"
+        parse(s).should == h(s)
+        s = "#{ot}абвгдеёжзийклмнопрстуфхцчшщьыъэюя#{ct}"
+        parse(s).should == h(s)
+        s = "#{ot}АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЬЫЪЭЮЯ#{ct}"
+        parse(s).should == h(s)
+        s = "#{ot}☸☹☺☻☼☽☾☿#{ct}"
+        parse(s).should == h(s)
+      end
+      it "should escape lone closing tags" do
+        s = "#{ct}"
+        parse(s).should == h(s)
+      end
+      it "should skip newlines and spaces at end" do
+        s = "#{ot} aaa bbb ccc \n\n\n  \t\n\n\n\r\n\r\n   \t  #{ct}"
+        parse(s).should == "#{h(ote)} aaa bbb ccc<br /><br /><br /><br /><br /><br /><br /><br />#{h(cte)}"
+      end
+      it "escapes html chars" do
+        HTML_ESCAPE.each do |k,v|
+          parse("#{ot}#{k}#{ct}").should == h("#{ote}#{k}#{cte}")
+        end
+      end
+    end
+  end
+  describe "NOT PARSES <pre><code>..</code></pre>" do
+    it "with no spaces between <pre> and <code>" do
+      s = "<pre><code>aaa</code></pre>"
+      parse(s).should == h(s).gsub(/ +/,' ')
+    end
+    it "with spaces between <pre> and <code>" do
+      s = "<pre>    <code>aaa</code>           </pre>"
+      parse(s).should == h(s).gsub(/ +/,' ')
+    end
+    it "with spaces between <pre> and <code> and inside" do
+      s = "<pre>    <code>  aaa   bbb   </code>           </pre>"
+      parse(s).should == h(s).gsub(/ +/,' ')
+    end
+  end
+###############################################################################
+  describe "<code>..</code>" do
+    it "keeps <code> tags" do
+      s = "<code>aaa</code>"
+      parse(s).should == h(s)
+    end
+    it "strips heading & tailing whitespace" do
+      s = "<code> \r\n \t  \t\r  aaa \r\n\t  \t\r  </code>"
+      parse(s).should == "&lt;code&gt;<br /><br /> aaa<br /><br /> &lt;/code&gt;"
+    end
+    it "not parses *bold*" do
+      s = "<code>aaa *bbb* ccc</code>"
+      parse(s).should == h(s)
+    end
+    it "not parses _italic_" do
+      s = "<code>aaa _bbb_ ccc</code>"
+      parse(s).should == h(s)
+    end
+    it "not parses headers" do
+      s = "<code>aaa\nh1. bbb\nccc</code>"
+      parse(s).should == h(s).gsub("\n","<br />")
+    end
+    it "not parses <pre>" do
+      s = "<code>aaa <pre>bbb</pre> ccc</code>"
+      parse(s).should == h(s)
+    end
+    it "NOT closes unclosed <code>" do
+      s = "aaa <code>bbb"
+      parse(s).should == h(s)
+    end
+    it "NOT escapes '&'" do
+      s = "<code>aaa & bbb</code>"
+      parse(s).should == h(s)
+    end
+    it "parses links" do
+      s = "<code>aaa #1 #2 #3 http://www.ru [[wiki:jjj]] [[url:http://www.ru]] bbb</code>"
+      parse(s).should == "&lt;code&gt;aaa <a href=\"/spaces/test_space/tickets/1\">#1</a> <a href=\"/spaces/test_space/tickets/2\">#2</a> <a href=\"/spaces/test_space/tickets/3\">#3</a> <a rel=\"nofollow\" href=\"http://www.ru\">http://www.ru</a> <a class=\"wiki_link\" title=\"jjj\" href=\"/spaces/test_space/wiki/jjj\">jjj</a> <a rel=\"nofollow\" href=\"http://www.ru\">http://www.ru</a> bbb&lt;/code&gt;"
+    end
+    it "NOT works" do
+      s = "<code>aaa & bbb</code> xxx <code>jjj&hhh</code>"
+      parse(s).should == h(s)
+      s = "<code>   aaa      </code>xxx<code>      jjj      </code>"
+      parse(s).should == h(s).gsub(/ +/,' ')
+    end
+    it "NOT keeps code bold" do
+      s = "*aaa <code>bbb</code> ccc*"
+      parse(s).should == h(s)
+      s = "*<code>aaa</code>*"
+      parse(s).should == h(s)
+    end
+    it "NOT keeps code italic" do
+      s = "_aaa <code>bbb</code> ccc_"
+      parse(s).should == h(s)
+      s = "_<code>aaa</code>_"
+      parse(s).should == h(s)
+    end
+  end
+###############################################################################
+  describe "Assembla Links" do
+    a = {}
+    a["wiki:Name"]  = '<a class="wiki_link" title="Name" href="/spaces/test_space/wiki/Name">Name</a>'
+    a["Name"]       = '<a class="wiki_link" title="Name" href="/spaces/test_space/wiki/Name">Name</a>'
+    a["Name#Ref"]   = '<a class="wiki_link" title="Name#Ref" href="/spaces/test_space/wiki/Name#h-Ref">Name#Ref</a>'
+    a["Name#h-Ref"]   = '<a class="wiki_link" title="Name#h-Ref" href="/spaces/test_space/wiki/Name#h-h-Ref">Name#h-Ref</a>'
+    a["#Ref"]       = '<a href="#h-Ref" title="#Ref" class="wiki_link">#Ref</a>'
+    a["#привет"]    = %Q|<a href="#h-#{hex_string("привет")}" title="#привет" class="wiki_link">#привет</a>|
+    a["#with  spc"] = %Q|<a href="#h-with__spc" title="#with  spc" class="wiki_link">#with  spc</a>|
+    a["#with__usc"] = %Q|<a href="#h-with__usc" title="#with__usc" class="wiki_link">#with__usc</a>|
+    a["#with--dsh"] = %Q|<a href="#h-with--dsh" title="#with--dsh" class="wiki_link">#with--dsh</a>|
+    a["#with!xclm"] = %Q|<a href="#h-#{hex_string("with!xclm")}" title="#with!xclm" class="wiki_link">#with!xclm</a>|
+    a["#with&amp"]  = %Q|<a href="#h-#{hex_string("with&amp")}" title="#with&amp" class="wiki_link">#with&amp;amp</a>|
+    a["ticket:234"] = '<a href="/spaces/test_space/tickets/234">#234</a>'
+    a["revision:1f4bdab77be696efd"] =
+      '<a href="/code/test_space/git/changesets/1f4bdab77be696efd">revision:1f4bdab77be696efd</a>'
+    a["revision:12345"] =
+      '<a href="/code/test_space/subversion/changesets/12345">revision:12345</a>'
+    a["revision:567:1f4bdab77be696efd"] =
+      '<a href="/code/test_space/git-567/changesets/1f4bdab77be696efd">revision:1f4bdab77be696efd</a>'
+    a["revision:3:12345"] =
+      '<a href="/code/test_space/subversion-3/changesets/12345">revision:12345</a>'
+    a["r:2345"] = '<a href="/code/test_space/subversion/changesets/2345">revision:2345</a>'
+    a["r:2345ef"] = '<a href="/code/test_space/git/changesets/2345ef">revision:2345ef</a>'
+    a["r:10:2345"] = '<a href="/code/test_space/subversion-10/changesets/2345">revision:2345</a>'
+    a["r:1:2345ef"] = '<a href="/code/test_space/git-1/changesets/2345ef">revision:2345ef</a>'
+    a["url:http://www.ru"]   = '<a rel="nofollow" href="http://www.ru">http://www.ru</a>'
+    a["url:https://www.ru"]  = '<a rel="nofollow" href="https://www.ru">https://www.ru</a>'
+    a["url:www.ru"]          = '<a rel="nofollow" href="http://www.ru">http://www.ru</a>'
+    a["url:www.ru/?a=1&b=2"] = '<a rel="nofollow" href="http://www.ru/?a=1&amp;b=2">http://www.ru/?a=1&amp;b=2</a>'
+    a["url:ftp://www.ru"]    = '<a rel="nofollow" href="ftp://www.ru">ftp://www.ru</a>'
+    a["url:/spaces/x2"]      = '<a rel="nofollow" href="/spaces/x2">/spaces/x2</a>'
+    a["file:ExistingFile.txt"] =
+      '<a href="/spaces/test_space/documents/download/ExistingFile.txt">file:ExistingFile.txt</a>'
+    a["file:cVJUz6ejWr35pEab_qKWB8"] =
+      '<a href="/spaces/test_space/documents/download/cVJUz6ejWr35pEab_qKWB8">file:cVJUz6ejWr35pEab_qKWB8</a>'
+    a.each do |k,v|
+      it "parses [[#{k}]]" do
+        parse("[[#{k}]]").should == v
+      end
+      it "parses [[#{k}|привет тест]]" do
+        parse("[[#{k}|привет тест]]").should == v.sub(/>.*</,">привет тест<")
+      end
+      it "parses [[#{k}|test & here]]" do
+        parse("[[#{k}|test & here]]").should == v.sub(/>.*</,">test &amp; here<")
+      end
+      if v['href="/'] && !k['url:']
+        it "parses [[#{k}]] with a site url" do
+          site_url = "http://www.ru"
+          parse("[[#{k}]]", :site_url => site_url).should ==
+            v.gsub('href="/',"href=\"#{site_url}/")
+          # with extraordinary slash
+          parse("[[#{k}]]", :site_url => "#{site_url}/").should ==
+            v.gsub('href="/',"href=\"#{site_url}/")
+          site_url = "http://127.0.0.1:3000"
+          parse("[[#{k}]]", :site_url => site_url).should ==
+            v.gsub('href="/',"href=\"#{site_url}/")
+          # with extraordinary slash
+          parse("[[#{k}]]", :site_url => "#{site_url}/").should ==
+            v.gsub('href="/',"href=\"#{site_url}/")
+        end
+      end
+      if v['/git/']
+        it "parses [[#{k}]] with custom git_url (String)" do
+          git_url = "http://www.ru/"
+          rev = k.split(':').last.tr(']','')
+          parse("[[#{k}]]", :git_url => git_url).should ==
+            v.sub('/code/test_space/git/changesets/',git_url)
+        end
+        it "parses [[#{k}]] with custom git_url (ObjProxy)" do
+          rev = k.split(':').last.tr(']','')
+          @asdfg = 'http://mmm.us'
+          git_url = Breakout::ObjProxy.new do
+            @asdfg + '/'
+          end
+          parse("[[#{k}]]", :git_url => git_url).should ==
+            v.sub('/code/test_space/git/changesets/',git_url)
+        end
+        it "parses [[#{k}]] with NULL git_url (ObjProxy)" do
+          rev = k.split(':').last.tr(']','')
+          git_url = Breakout::ObjProxy.new do
+            nil
+          end
+          parse("[[#{k}]]", :git_url => git_url).should == v
+        end
+        it "parses [[#{k}]] with FALSE git_url (ObjProxy)" do
+          rev = k.split(':').last.tr(']','')
+          git_url = Breakout::ObjProxy.new do
+            false
+          end
+          parse("[[#{k}]]", :git_url => git_url).should == v
+        end
+      end
+    end
+    it "should not instantiate ObjProxy's internal object if there are no git or svn links in text" do
+      git_url = Breakout::ObjProxy.new do
+        raise 'should not be raised'
+      end
+      lambda {
+        parse("no revision tag", :git_url => git_url).should == 'no revision tag'
+      }.should_not raise_error
+    end
+    describe "absolute_urls" do
+      # 'true' values
+      [true, 1, 'x'].each do |v|
+        it "should NOT convert relative url to absolute when absolute_urls = #{v.inspect} AND site_url is NULL" do
+          parse("[[url:/rel]]", :absolute_urls => v).should ==
+            '<a rel="nofollow" href="/rel">/rel</a>'
+          parse("[[url:/rel|text]]", :absolute_urls => v).should ==
+            '<a rel="nofollow" href="/rel">text</a>'
+        end
+        it "should convert relative url to absolute when absolute_urls = #{v.inspect}" do
+          parse("[[url:/rel]]", :absolute_urls => v, :site_url => 'http://www.ru').should ==
+            '<a rel="nofollow" href="http://www.ru/rel">/rel</a>'
+          parse("[[url:/rel|text]]", :absolute_urls => v, :site_url => 'http://www.ru').should ==
+            '<a rel="nofollow" href="http://www.ru/rel">text</a>'
+        end
+      end
+      # 'false' values
+      [false, nil].each do |v|
+        it "should not convert relative url to absolute when absolute_urls = #{v.inspect}" do
+          parse("[[url:/rel]]", :absolute_urls => v).should ==
+            '<a rel="nofollow" href="/rel">/rel</a>'
+          parse("[[url:/rel|text]]", :absolute_urls => v).should ==
+            '<a rel="nofollow" href="/rel">text</a>'
+        end
+      end
+    end
+    a = {}
+    a["image:ExistingImage.png"] =
+      '<img src="/spaces/test_space/documents/download/ExistingImage.png" alt="ALT" />'
+    a["image:cVJUz6ejWr35pEab_qKWB8"] =
+      '<img src="/spaces/test_space/documents/download/cVJUz6ejWr35pEab_qKWB8" alt="ALT" />'
+    a.each do |k,v|
+      it "parses [[#{k}]]" do
+        parse("[[#{k}]]").should == v.sub('ALT',k.sub('image:',''))
+      end
+      it "parses [[#{k}|привет тест]]" do
+        parse("[[#{k}|привет тест]]").should == v.sub('ALT','привет тест')
+      end
+      it "parses [[#{k}|test & here]]" do
+        parse("[[#{k}|test & here]]").should == v.sub('ALT','test & here')
+      end
+    end
+    it "ignores unknown link types" do
+      s = "[[zzz:xxx]]"
+      parse(s).should == s
+      s = "[[abcd:1234]]"
+      parse(s).should == s
+      s = "[[abcd::1234]] [[abcd:1234]] [[uri:ww.ru]]"
+      parse(s).should == s
+    end
+    it "ignores file & image links with forbidden symbols" do
+      s = "[[file:aaa/bbb]]"
+      parse(s).should == s
+      s = "[[file:aaa\\bbb]]"
+      parse(s).should == s
+      s = "[[file:aaa bbb]]"
+      parse(s).should == s
+      s = "[[image:aaa/bbb]]"
+      parse(s).should == s
+      s = "[[image:aaa\\bbb]]"
+      parse(s).should == s
+      s = "[[image:aaa bbb]]"
+      parse(s).should == s
+    end
+  end
+###############################################################################
+###############################################################################
+###############################################################################
+  unless defined?(HTML_ESCAPE)
+    HTML_ESCAPE = { '&' => '&amp;', '>' => '&gt;', '<' => '&lt;', '"' => '&quot;' }
+  end
+  def h(s)
+    s.to_s.gsub(/[&"><]/) { |special| HTML_ESCAPE[special] }
+  end
+  def parse(s, h = {})
+    h[:space_name] = "test_space" unless h.key?(:space_name)
+    BreakoutParser.parse_links_only(s,
+      h[:space_name],
+      h[:site_url],
+      h[:git_url],
+      h[:absolute_urls],
+      h[:large_files_url],
+      h[:meta_attributes]
+    ).strip
+  end
+end