RubyGems - breakout_parser - Versions diffs - 0.0.7 → 0.0.8 - Mend

breakout_parser 0.0.7 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

data/ext/breakout_parser/lex.yy.c +669 -478
data/ext/breakout_parser/make_win32.bat +1 -0
data/ext/breakout_parser/parser.l +31 -30
data/ext/breakout_parser/parser.tab.c +189 -166
data/ext/breakout_parser/parser.tab.h +23 -22
data/ext/breakout_parser/parser.y +17 -1
data/ext/breakout_parser/ruby_ext.c +15 -1
data/spec/parser_spec.rb +94 -28
metadata +2 -2

data/ext/breakout_parser/parser.tab.h CHANGED Viewed

@@ -54,27 +54,28 @@
      FILE_LINK = 270,
      IMAGE_LINK = 271,
      URL = 272,
-     UL = 273,
-     H1 = 274,
-     H2 = 275,
-     H3 = 276,
-     H4 = 277,
-     H5 = 278,
-     INLINE_CODE = 279,
-     SPACE = 280,
-     BR = 281,
-     OLI = 282,
-     ULI = 283,
-     PRE_CODE_START = 284,
-     PRE_CODE_END = 285,
-     PRE_START = 286,
-     PRE_END = 287,
-     CODE_START = 288,
-     CODE_END = 289,
-     NOTEXTILE_START = 290,
-     NOTEXTILE_END = 291,
-     BOLD_END = 292,
-     ITALIC_END = 293
+     EMAIL = 273,
+     UL = 274,
+     H1 = 275,
+     H2 = 276,
+     H3 = 277,
+     H4 = 278,
+     H5 = 279,
+     INLINE_CODE = 280,
+     SPACE = 281,
+     BR = 282,
+     OLI = 283,
+     ULI = 284,
+     PRE_CODE_START = 285,
+     PRE_CODE_END = 286,
+     PRE_START = 287,
+     PRE_END = 288,
+     CODE_START = 289,
+     CODE_END = 290,
+     NOTEXTILE_START = 291,
+     NOTEXTILE_END = 292,
+     BOLD_END = 293,
+     ITALIC_END = 294
    };
 #endif
@@ -94,7 +95,7 @@ typedef union YYSTYPE
 /* Line 1676 of yacc.c  */
-#line 98 "parser.tab.h"
+#line 99 "parser.tab.h"
 } YYSTYPE;
 # define YYSTYPE_IS_TRIVIAL 1
 # define yystype YYSTYPE /* obsolescent; will be withdrawn */

data/ext/breakout_parser/parser.y CHANGED Viewed

@@ -69,7 +69,7 @@ void yyerror(const char *msg)
 %token <svalue> T_WORD TICKET_LINK LINK SVN_REVISION_LINK GIT_REVISION_LINK WIKI_LINK ANCHOR_LINK
 %token <svalue> URL_WITH_PROTO_LINK URL_WITHOUT_PROTO_LINK
 %token <svalue> FILE_LINK IMAGE_LINK
-%token <svalue> URL
+%token <svalue> URL EMAIL
 %token <svalue> UL
 %token <svalue> H1 H2 H3 H4 H5
 %token <svalue> INLINE_CODE
@@ -116,6 +116,7 @@ word : chars
       | link
       | T_WORD         {concat2($1)} // TODO: somehow pass T_WORD's length here
       | URL            {process_url($1)}
+      | EMAIL          {process_email($1)}
       | BOLD_START     {$1 ? concat(" <strong>",9) : concat("<strong>",8)}
       | BOLD_END       {concat("</strong>",9)}
       | ITALIC_START   {$1 ? concat(" <em>",5) : concat("<em>",4)}
@@ -402,6 +403,21 @@ process_url(const char*url){
     const char *p;
     concat("<a rel=\"nofollow\" href=\"",24);
+    if( *url == 'w' ){
+        // url starts with 'www.'
+        concat("http://",7);
+    } else {
+        // assume url starts with 'http://'
+    }
     for(p=url; *p; p++) concat_raw_char(*p);
     process_link_tail(url,NULL,NULL);
 }
+process_email(const char*url){
+    const char *p;
+    concat("<a href=\"mailto:",16);
+    for(p=url; *p; p++) concat_raw_char(*p);
+    process_link_tail(url,NULL,NULL);
+}

data/ext/breakout_parser/ruby_ext.c CHANGED Viewed

@@ -4,12 +4,14 @@
 void Init_breakout_parser();
 VALUE method_parse(VALUE, VALUE, VALUE);
+VALUE method_parse_links_only(VALUE, VALUE, VALUE);
 VALUE breakout_parser = Qnil;
 void Init_breakout_parser() {
     breakout_parser = rb_define_class("BreakoutParser",rb_cObject);
     rb_define_singleton_method(breakout_parser, "parse", method_parse, 2);
+    rb_define_singleton_method(breakout_parser, "parse_links_only", method_parse_links_only, 2);
 }
 extern char *buf, *bufptr;
@@ -17,7 +19,9 @@ extern char *in_buf, *in_pos;
 extern const char *space_name;
 extern size_t in_buf_len, bufsize, space_name_len;
-VALUE method_parse(VALUE self, VALUE text, VALUE r_space_name) {
+extern int parse_links_only;
+VALUE do_parse(VALUE self, VALUE text, VALUE r_space_name) {
     VALUE s;
     char *p;
@@ -69,4 +73,14 @@ VALUE method_parse(VALUE self, VALUE text, VALUE r_space_name) {
     return s;
 }
+VALUE method_parse(VALUE self, VALUE text, VALUE r_space_name) {
+    parse_links_only = 0;
+    return do_parse(self,text,r_space_name);
+}
+VALUE method_parse_links_only(VALUE self, VALUE text, VALUE r_space_name) {
+    parse_links_only = 1;
+    return do_parse(self,text,r_space_name);
+}
 #endif // ifdef RUBY_VERSION

data/spec/parser_spec.rb CHANGED Viewed

@@ -97,7 +97,7 @@ describe 'BreakoutParser' do
     end
     it "not confuses" do
       parse("look at @this code@ and mail me at xxx@yyy.com").should ==
-        'look at <code>this code</code> and mail me at xxx@yyy.com'
+        'look at <code>this code</code> and mail me at <a href="mailto:xxx@yyy.com">xxx@yyy.com</a>'
     end
     it "w/o closing tag" do
       parse("@smth").should == '@smth'
@@ -359,35 +359,99 @@ describe 'BreakoutParser' do
 ###############################################################################
   describe "raw text links" do
-    it "at the beginning" do
-      parse("http://www.ru").should == "<a rel=\"nofollow\" href=\"http://www.ru\">http://www.ru</a>"
-    end
-    it "in middle of other words" do
-      parse("aaa bbb ccc http://www.ru ddd eee fff").should ==
-        "aaa bbb ccc <a rel=\"nofollow\" href=\"http://www.ru\">http://www.ru</a> ddd eee fff"
-    end
-    it "in new line" do
-      parse("aaa bbb ccc\nhttp://www.ru\nddd eee fff").should match(
-        %r"aaa bbb ccc ?<br /> ?<a rel=\"nofollow\" href=\"http://www.ru\">http://www.ru</a> ?<br /> ?ddd eee fff"
-      )
-    end
-    it "escapes '&' in link _text_" do
-      parse("http://www.ru/?a=1&b=2").should == "<a rel=\"nofollow\" href=\"http://www.ru/?a=1&b=2\">http://www.ru/?a=1&amp;b=2</a>"
+    describe "starting with 'http://'" do
+      it "at the beginning" do
+        parse("http://asd.ru").should == "<a rel=\"nofollow\" href=\"http://asd.ru\">http://asd.ru</a>"
+      end
+      it "in middle of other words" do
+        parse("aaa bbb ccc http://asd.ru ddd eee fff").should ==
+          "aaa bbb ccc <a rel=\"nofollow\" href=\"http://asd.ru\">http://asd.ru</a> ddd eee fff"
+      end
+      it "in new line" do
+        parse("aaa bbb ccc\nhttp://asd.ru\nddd eee fff").should match(
+          %r"aaa bbb ccc ?<br /> ?<a rel=\"nofollow\" href=\"http://asd.ru\">http://asd.ru</a> ?<br /> ?ddd eee fff"
+        )
+      end
+      it "escapes '&' in link _text_" do
+        parse("http://asd.ru/?a=1&b=2").should == "<a rel=\"nofollow\" href=\"http://asd.ru/?a=1&b=2\">http://asd.ru/?a=1&amp;b=2</a>"
+      end
+      it "parses https://" do
+        parse("https://asd.ru").should == "<a rel=\"nofollow\" href=\"https://asd.ru\">https://asd.ru</a>"
+      end
+      %w', .'.each do |c|
+        it "stops parsing on \"#{c} \"" do
+          parse("http://asd.ru#{c}").should == "<a rel=\"nofollow\" href=\"http://asd.ru\">http://asd.ru</a>#{c}"
+          parse(" http://asd.ru#{c} ").should == "<a rel=\"nofollow\" href=\"http://asd.ru\">http://asd.ru</a>#{c}"
+          parse(" http://asd.ru#{c} hello!").should == "<a rel=\"nofollow\" href=\"http://asd.ru\">http://asd.ru</a>#{c} hello!"
+          parse("xxx http://asd.ru#{c} hello!").should == "xxx <a rel=\"nofollow\" href=\"http://asd.ru\">http://asd.ru</a>#{c} hello!"
+          parse(" http://asd.ru/#{c} hello!").should == "<a rel=\"nofollow\" href=\"http://asd.ru/\">http://asd.ru/</a>#{c} hello!"
+          parse(" http://aaa.com#{c} http://bbb.com").should ==
+            "<a rel=\"nofollow\" href=\"http://aaa.com\">http://aaa.com</a>#{c} <a rel=\"nofollow\" href=\"http://bbb.com\">http://bbb.com</a>"
+        end
+      end
     end
-    it "parses https://" do
-      parse("https://www.ru").should == "<a rel=\"nofollow\" href=\"https://www.ru\">https://www.ru</a>"
+    describe "starting with 'www.'" do
+      it "at the beginning" do
+        parse("www.ru").should == "<a rel=\"nofollow\" href=\"http://www.ru\">www.ru</a>"
+      end
+      it "in middle of other words" do
+        parse("aaa bbb ccc www.ru ddd eee fff").should ==
+          "aaa bbb ccc <a rel=\"nofollow\" href=\"http://www.ru\">www.ru</a> ddd eee fff"
+      end
+      it "in new line" do
+        parse("aaa bbb ccc\nwww.ru\nddd eee fff").should match(
+          %r"aaa bbb ccc ?<br /> ?<a rel=\"nofollow\" href=\"http://www.ru\">www.ru</a> ?<br /> ?ddd eee fff"
+        )
+      end
+      it "escapes '&' in link _text_" do
+        parse("www.ru/?a=1&b=2").should == "<a rel=\"nofollow\" href=\"http://www.ru/?a=1&b=2\">www.ru/?a=1&amp;b=2</a>"
+      end
+      %w', .'.each do |c|
+        it "stops parsing on \"#{c} \"" do
+          parse("www.ru#{c}").should == "<a rel=\"nofollow\" href=\"http://www.ru\">www.ru</a>#{c}"
+          parse(" www.ru#{c} ").should == "<a rel=\"nofollow\" href=\"http://www.ru\">www.ru</a>#{c}"
+          parse(" www.ru#{c} hello!").should == "<a rel=\"nofollow\" href=\"http://www.ru\">www.ru</a>#{c} hello!"
+          parse("xxx www.ru#{c} hello!").should == "xxx <a rel=\"nofollow\" href=\"http://www.ru\">www.ru</a>#{c} hello!"
+          parse(" www.ru/#{c} hello!").should == "<a rel=\"nofollow\" href=\"http://www.ru/\">www.ru/</a>#{c} hello!"
+          parse(" www.aaa.com#{c} www.bbb.com").should ==
+            "<a rel=\"nofollow\" href=\"http://www.aaa.com\">www.aaa.com</a>#{c} <a rel=\"nofollow\" href=\"http://www.bbb.com\">www.bbb.com</a>"
+        end
+      end
     end
-    %w', .'.each do |c|
-      it "stops parsing on \"#{c} \"" do
-        parse("http://www.ru#{c}").should == "<a rel=\"nofollow\" href=\"http://www.ru\">http://www.ru</a>#{c}"
-        parse(" http://www.ru#{c} ").should == "<a rel=\"nofollow\" href=\"http://www.ru\">http://www.ru</a>#{c}"
-        parse(" http://www.ru#{c} hello!").should == "<a rel=\"nofollow\" href=\"http://www.ru\">http://www.ru</a>#{c} hello!"
-        parse("xxx http://www.ru#{c} hello!").should == "xxx <a rel=\"nofollow\" href=\"http://www.ru\">http://www.ru</a>#{c} hello!"
-        parse(" http://www.ru/#{c} hello!").should == "<a rel=\"nofollow\" href=\"http://www.ru/\">http://www.ru/</a>#{c} hello!"
-        parse(" http://aaa.com#{c} http://bbb.com").should ==
-          "<a rel=\"nofollow\" href=\"http://aaa.com\">http://aaa.com</a>#{c} <a rel=\"nofollow\" href=\"http://bbb.com\">http://bbb.com</a>"
+    describe 'e-mails' do
+      it "at the beginning" do
+        parse("aaa@bbb.com").should == "<a href=\"mailto:aaa@bbb.com\">aaa@bbb.com</a>"
+      end
+      it "in middle of other words" do
+        parse("aaa bbb ccc xx@yy.cn ddd eee fff").should ==
+          "aaa bbb ccc <a href=\"mailto:xx@yy.cn\">xx@yy.cn</a> ddd eee fff"
+      end
+      it "in new line" do
+        parse("aaa bbb ccc\naa.bb@cc.dd.ee\nddd eee fff").should match(
+          %r"aaa bbb ccc ?<br /> ?<a href=\"mailto:aa.bb@cc.dd.ee\">aa.bb@cc.dd.ee</a> ?<br /> ?ddd eee fff"
+        )
+      end
+      %w', .'.each do |c|
+        it "stops parsing on \"#{c} \"" do
+          parse("a-b@c-d.efghjikl#{c}").should == "<a href=\"mailto:a-b@c-d.efghjikl\">a-b@c-d.efghjikl</a>#{c}"
+          parse(" a-b@c-d.efghjikl#{c} ").should == "<a href=\"mailto:a-b@c-d.efghjikl\">a-b@c-d.efghjikl</a>#{c}"
+          parse(" a-b@c-d.efghjikl#{c} hello!").should == "<a href=\"mailto:a-b@c-d.efghjikl\">a-b@c-d.efghjikl</a>#{c} hello!"
+          parse("xxx a-b@c-d.efghjikl#{c} hello!").should == "xxx <a href=\"mailto:a-b@c-d.efghjikl\">a-b@c-d.efghjikl</a>#{c} hello!"
+          parse(" a-b@c-d.efghjikl#{c} hello!").should == "<a href=\"mailto:a-b@c-d.efghjikl\">a-b@c-d.efghjikl</a>#{c} hello!"
+          parse(" www@aaa.com#{c} www@bbb.com").should ==
+            "<a href=\"mailto:www@aaa.com\">www@aaa.com</a>#{c} <a href=\"mailto:www@bbb.com\">www@bbb.com</a>"
+        end
+      end
+      it "not parses bad emails" do
+        s="a@b.c a@b a.b@c a.b@@c a@b@c.d a#b@c.d"
+        parse(s).should == s
       end
     end
   end
@@ -693,7 +757,7 @@ describe 'BreakoutParser' do
       parse(s).should == s
       s = "[[abcd:1234]]"
       parse(s).should == s
-      s = "[[abcd::1234]] [[abcd:1234]] [[uri:www.ru]]"
+      s = "[[abcd::1234]] [[abcd:1234]] [[uri:ww.ru]]"
       parse(s).should == s
     end
@@ -718,7 +782,9 @@ describe 'BreakoutParser' do
 ###############################################################################
 ###############################################################################
-  HTML_ESCAPE   =   { '&' => '&amp;', '>' => '&gt;', '<' => '&lt;', '"' => '&quot;' }
+  unless defined?HTML_ESCAPE
+    HTML_ESCAPE   =   { '&' => '&amp;', '>' => '&gt;', '<' => '&lt;', '"' => '&quot;' }
+  end
   def h s
     s.to_s.gsub(/[&"><]/) { |special| HTML_ESCAPE[special] }

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: breakout_parser
 version: !ruby/object:Gem::Version
-  version: 0.0.7
+  version: 0.0.8
 platform: ruby
 authors:
 - Andrey "Zed" Zaikin
@@ -9,7 +9,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2010-03-09 00:00:00 +05:00
+date: 2010-03-12 00:00:00 +05:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency