RubyGems - breakout_parser - Versions diffs - 0.0.12 → 0.0.13 - Mend

breakout_parser 0.0.12 → 0.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

data/ChangeLog +7 -0
data/ext/breakout_parser/lex.yy.c +716 -693
data/ext/breakout_parser/parser.l +7 -2
data/ext/breakout_parser/parser.tab.c +229 -171
data/ext/breakout_parser/parser.tab.h +27 -27
data/ext/breakout_parser/parser.y +71 -12
data/spec/parser_spec.rb +86 -21
metadata +2 -2

data/ext/breakout_parser/parser.tab.h CHANGED Viewed

@@ -44,32 +44,32 @@
      ITALIC_START = 260,
      BOLD_ITALIC_START = 261,
      ITALIC_BOLD_START = 262,
-     T_WORD = 263,
-     TICKET_LINK = 264,
-     LINK = 265,
-     SVN_REVISION_LINK = 266,
-     GIT_REVISION_LINK = 267,
-     WIKI_LINK = 268,
-     ANCHOR_LINK = 269,
-     SVN_N_REVISION_LINK = 270,
-     GIT_N_REVISION_LINK = 271,
-     URL_WITH_PROTO_LINK = 272,
-     URL_WITHOUT_PROTO_LINK = 273,
-     FILE_LINK = 274,
-     IMAGE_LINK = 275,
-     URL = 276,
-     EMAIL = 277,
-     UL = 278,
-     H1 = 279,
-     H2 = 280,
-     H3 = 281,
-     H4 = 282,
-     H5 = 283,
-     INLINE_CODE = 284,
-     SPACE = 285,
-     BR = 286,
-     OLI = 287,
-     ULI = 288,
+     ULI = 263,
+     OLI = 264,
+     T_WORD = 265,
+     TICKET_LINK = 266,
+     LINK = 267,
+     SVN_REVISION_LINK = 268,
+     GIT_REVISION_LINK = 269,
+     WIKI_LINK = 270,
+     ANCHOR_LINK = 271,
+     SVN_N_REVISION_LINK = 272,
+     GIT_N_REVISION_LINK = 273,
+     URL_WITH_PROTO_LINK = 274,
+     URL_WITHOUT_PROTO_LINK = 275,
+     FILE_LINK = 276,
+     IMAGE_LINK = 277,
+     URL = 278,
+     EMAIL = 279,
+     UL = 280,
+     H1 = 281,
+     H2 = 282,
+     H3 = 283,
+     H4 = 284,
+     H5 = 285,
+     INLINE_CODE = 286,
+     SPACE = 287,
+     BR = 288,
      PRE_CODE_START = 289,
      PRE_CODE_END = 290,
      PRE_START = 291,
@@ -92,7 +92,7 @@ typedef union YYSTYPE
 {
 /* Line 1676 of yacc.c  */
-#line 66 "parser.y"
+#line 68 "parser.y"
         double dvalue;
         int ivalue;

data/ext/breakout_parser/parser.y CHANGED Viewed

@@ -25,6 +25,8 @@ size_t site_url_len = 0;
 extern VALUE git_url;
+int list_level = 1;
 #define CHECK_BUF_SIZE(len) \
     if( (bufptr - buf + len + 1) >= bufsize ){ \
         /*printf("[.] REALLOC oldsz=%d, newsz=%d\n",bufsize, (bufsize+((len > 0x1000) ? (len+0x1000) : 0x1000)));*/ \
@@ -72,6 +74,7 @@ void yyerror(const char *msg)
 %token <ivalue> T_CHAR BOLD_START ITALIC_START
 %token <ivalue> BOLD_ITALIC_START ITALIC_BOLD_START
+%token <ivalue> ULI OLI
 %token <svalue> T_WORD TICKET_LINK LINK SVN_REVISION_LINK GIT_REVISION_LINK WIKI_LINK ANCHOR_LINK
 %token <svalue> SVN_N_REVISION_LINK GIT_N_REVISION_LINK
 %token <svalue> URL_WITH_PROTO_LINK URL_WITHOUT_PROTO_LINK
@@ -80,7 +83,7 @@ void yyerror(const char *msg)
 %token <svalue> UL
 %token <svalue> H1 H2 H3 H4 H5
 %token <svalue> INLINE_CODE
-%token SPACE BR /*BRBR*/ OLI ULI
+%token SPACE BR /*BRBR*/
 %token PRE_CODE_START PRE_CODE_END PRE_START PRE_END CODE_START CODE_END
 %token NOTEXTILE_START NOTEXTILE_END
 %token BOLD_END ITALIC_END
@@ -101,8 +104,20 @@ textitem: br
      | h3 {concat("</h3>",5)}
      | h4 {concat("</h4>",5)}
      | h5 {concat("</h5>",5)}
-     | {concat("<ul>",4)} ulist {concat("</ul>",5)} textitem
-     | {concat("<ol>",4)} olist {concat("</ol>",5)} textitem
+     | {
+            list_level=1;
+            concat("<ul>",4)
+       } ulist {
+            concat("</ul>",5);
+            for(; list_level>1 && list_level<4; list_level--) concat("</li></ul>",10);
+       } textitem
+     | {
+            list_level=1;
+            concat("<ol>",4)
+       } olist {
+            concat("</ol>",5);
+            for(; list_level>1 && list_level<4; list_level--) concat("</li></ol>",10);
+       } textitem
      | code
 ulist: ulitem        {concat("</li>",5)}
@@ -157,14 +172,14 @@ char : T_CHAR {concat_escaped_char($1)}
 //raw_char : T_CHAR {concat_raw_char($1)}
-h1   : H1          {concat("<h1 id=\"",8); process_header($1)}
-h2   : H2          {concat("<h2 id=\"",8); process_header($1)}
-h3   : H3          {concat("<h3 id=\"",8); process_header($1)}
-h4   : H4          {concat("<h4 id=\"",8); process_header($1)}
-h5   : H5          {concat("<h5 id=\"",8); process_header($1)}
+h1   : H1          {concat("<h1 id=\"h-",10); process_header($1)}
+h2   : H2          {concat("<h2 id=\"h-",10); process_header($1)}
+h3   : H3          {concat("<h3 id=\"h-",10); process_header($1)}
+h4   : H4          {concat("<h4 id=\"h-",10); process_header($1)}
+h5   : H5          {concat("<h5 id=\"h-",10); process_header($1)}
 //ul   : UL          {concat("<ul>",4)}
-oli  : OLI         {concat("<li>",4)}
-uli  : ULI         {concat("<li>",4)}
+oli  : OLI         {process_oli($1)}
+uli  : ULI         {process_uli($1)}
 br   : BR          {concat("<br />",6)}
 //     | BRBR        {concat("<br /><br />",12)}
@@ -177,6 +192,36 @@ code : PRE_CODE_START {concat("<pre><code>",11)} chars PRE_CODE_END {concat("</c
 %%
+process_uli(int level){
+    if( level == list_level ){
+        concat("<li>",4);
+    } else if( level < list_level ){
+        list_level = level;
+        //unconcat("</li>");
+        concat("</ul></li><li>",14);
+    } else {
+        // if(level > list_level)
+        list_level = level;
+        unconcat("</li>");
+        concat("<ul><li>",8);
+    }
+}
+process_oli(int level){
+    if( level == list_level ){
+        concat("<li>",4);
+    } else if( level < list_level ){
+        list_level = level;
+        //unconcat("</li>");
+        concat("</ol></li><li>",14);
+    } else {
+        // if(level > list_level)
+        list_level = level;
+        unconcat("</li>");
+        concat("<ol><li>",8);
+    }
+}
 concat_hex_char(char c){
     unsigned char d;
     d = ((unsigned char)c)>>4;
@@ -274,7 +319,7 @@ process_anchor_link(const char*target){
     if((p = strchr(target,'|')) && (p<pend) && (p>target)) pend = p-1;
-    concat("<a href=\"#",10);
+    concat("<a href=\"#h-",12);
     if( need_hex_convert(target,pend) ){
         for(p = target; *p && p<=pend; p++) concat_hex_char( *p );
     } else {
@@ -362,7 +407,16 @@ process_wiki_link(const char*target){
     concat("/wiki/show/",11);
     concat(space_name,space_name_len);
     concat_raw_char('/');
-    for(c=target; *c && *c!=']' && *c!='|'; c++) concat_raw_char(*c);
+    for(c=target; *c && *c!=']' && *c!='|' && *c!='#'; c++) concat_raw_char(*c);
+    if( *c == '#' ){
+        concat_raw_char('#');
+        //if(memcmp(c, "#h-", 3) != 0){
+            // anchor w/o "h-" prefix, we need to add it
+            concat_raw_char('h');
+            concat_raw_char('-');
+        //}
+        for(c++; *c && *c!=']' && *c!='|'; c++) concat_raw_char(*c);
+    }
     process_link_tail(target,NULL,NULL);
 }
@@ -457,6 +511,11 @@ process_email(const char*url){
     process_link_tail(url,NULL,NULL);
 }
+unconcat(const char*what){
+    int l = strlen(what);
+    if( bufptr-buf > l && strncmp(bufptr-l,what,l) == 0 ) bufptr -= l;
+}
 revert_bold(){
     char *p;
     for( p=bufptr-1; p >= (buf+7) ; p--){

data/spec/parser_spec.rb CHANGED Viewed

@@ -11,6 +11,19 @@ describe 'BreakoutParser' do
   end
   def hex_string s; self.class.hex_string(s); end
+  def unformat s
+    s.strip.split("\n").map(&:strip).join
+  end
+  def get_data fname
+    r = File.read(File.dirname(__FILE__) + '/data/' + fname)
+    if self.class.description[' numbered list multilevel']
+      r.gsub!('*','#')
+      r.gsub!('ul>','ol>')
+    end
+    r
+  end
   it 'accepts from 2 to 4 arguments' do
     [0,1,5,6,7,8,9,10].each do |argc|
       lambda{
@@ -319,6 +332,31 @@ describe 'BreakoutParser' do
       parse("hello\n\n * a\n * b\n * c\nworld").should ==
         "hello<br /><br /><ul><li>a</li><li>b</li><li>c</li></ul>world"
     end
+    describe "multilevel" do
+      it "at start" do
+        # NOTE: not sure that list1.html is correct enough, but it should render fine
+        parse(get_data('list1')).should == unformat(get_data('list1.html'))
+      end
+      it "in middle" do
+        parse(get_data('list2')).should == unformat(get_data('list2.html'))
+      end
+      it "three levels" do
+        parse(get_data('list3')).should == unformat(get_data('list3.html'))
+      end
+      it "mess - should have matching count of opening and closing tags" do
+        r = parse(get_data('list4'))
+        r.scan('<ul>').count.should > 0
+        r.scan('<ul>').count.should <= r.scan('</ul>').count
+      end
+      it "at end" do
+        parse(get_data('list5')).should == unformat(get_data('list5.html'))
+      end
+    end
   end
 ###############################################################################
@@ -347,58 +385,84 @@ describe 'BreakoutParser' do
       parse("hello\n\n # a\n # b\n # c\nworld").should ==
         "hello<br /><br /><ol><li>a</li><li>b</li><li>c</li></ol>world"
     end
+    describe "multilevel" do
+      it "at start" do
+        # NOTE: not sure that list1.html is correct enough, but it should render fine
+        parse(get_data('list1')).should == unformat(get_data('list1.html'))
+      end
+      it "in middle" do
+        parse(get_data('list2')).should == unformat(get_data('list2.html'))
+      end
+      it "three levels" do
+        parse(get_data('list3')).should == unformat(get_data('list3.html'))
+      end
+      it "mess - should have matching count of opening and closing tags" do
+        r = parse(unformat(get_data('list4')))
+        r.scan('<ol>').count.should > 0
+        r.scan('<ol>').count.should <= r.scan('</ol>').count
+      end
+      it "at end" do
+        parse(get_data('list5')).should == unformat(get_data('list5.html'))
+      end
+    end
   end
 ###############################################################################
+# headers
   1.upto(5) do |lvl|
     describe "H#{lvl}" do
       it "at the beginning" do
-        parse("h#{lvl}. xxx").should == "<h#{lvl} id=\"xxx\">xxx</h#{lvl}>"
+        parse("h#{lvl}. xxx").should == "<h#{lvl} id=\"h-xxx\">xxx</h#{lvl}>"
       end
       it "after 1 line of text" do
-        parse("abcd\nh#{lvl}. xxx").should == "abcd<br /><h#{lvl} id=\"xxx\">xxx</h#{lvl}>"
+        parse("abcd\nh#{lvl}. xxx").should == "abcd<br /><h#{lvl} id=\"h-xxx\">xxx</h#{lvl}>"
       end
       it "after 2 lines of text" do
-        parse("abcd\ndefgh\nh#{lvl}. xxx").should == "abcd<br />defgh<br /><h#{lvl} id=\"xxx\">xxx</h#{lvl}>"
+        parse("abcd\ndefgh\nh#{lvl}. xxx").should == "abcd<br />defgh<br /><h#{lvl} id=\"h-xxx\">xxx</h#{lvl}>"
       end
       it "in middle of other words" do
         parse("abcd defgh h#{lvl}. xxx yyy").should == "abcd defgh h#{lvl}. xxx yyy"
       end
       it "in middle of other lines" do
-        parse("abcd defgh\nh#{lvl}. xxx\nyyy").should == "abcd defgh<br /><h#{lvl} id=\"xxx\">xxx</h#{lvl}><br />yyy"
+        parse("abcd defgh\nh#{lvl}. xxx\nyyy").should == "abcd defgh<br /><h#{lvl} id=\"h-xxx\">xxx</h#{lvl}><br />yyy"
       end
       it "converts spaces to underscores in id" do
-        parse("h#{lvl}. xxx   yyy z").should == "<h#{lvl} id=\"xxx___yyy_z\">xxx   yyy z</h#{lvl}>"
+        parse("h#{lvl}. xxx   yyy z").should == "<h#{lvl} id=\"h-xxx___yyy_z\">xxx   yyy z</h#{lvl}>"
       end
       it "keeps underscores in id" do
-        parse("h#{lvl}. xxx___yyy_z").should == "<h#{lvl} id=\"xxx___yyy_z\">xxx___yyy_z</h#{lvl}>"
+        parse("h#{lvl}. xxx___yyy_z").should == "<h#{lvl} id=\"h-xxx___yyy_z\">xxx___yyy_z</h#{lvl}>"
       end
       it "keeps dashes in id" do
-        parse("h#{lvl}. xxx---yyy-z").should == "<h#{lvl} id=\"xxx---yyy-z\">xxx---yyy-z</h#{lvl}>"
+        parse("h#{lvl}. xxx---yyy-z").should == "<h#{lvl} id=\"h-xxx---yyy-z\">xxx---yyy-z</h#{lvl}>"
       end
       it "keeps dots in id" do
-        parse("h#{lvl}. xxx...yyy.z").should == "<h#{lvl} id=\"xxx...yyy.z\">xxx...yyy.z</h#{lvl}>"
+        parse("h#{lvl}. xxx...yyy.z").should == "<h#{lvl} id=\"h-xxx...yyy.z\">xxx...yyy.z</h#{lvl}>"
       end
       %w'Ъ ъ : ; , привет" \' ! < >'.each do |c|
         it "converts id to hex if it contains \"#{c}\"" do
           idhex = hex_string("xxx#{c}yyy")
-          parse("h#{lvl}. xxx#{c}yyy").should == "<h#{lvl} id=\"#{idhex}\">xxx#{h(c)}yyy</h#{lvl}>"
+          parse("h#{lvl}. xxx#{c}yyy").should == "<h#{lvl} id=\"h-#{idhex}\">xxx#{h(c)}yyy</h#{lvl}>"
         end
       end
       it "skips excess spaces" do
-        parse("h#{lvl}.  \t  xxx   \t ").should == "<h#{lvl} id=\"xxx\">xxx</h#{lvl}>"
+        parse("h#{lvl}.  \t  xxx   \t ").should == "<h#{lvl} id=\"h-xxx\">xxx</h#{lvl}>"
       end
       it "thinks that \\r is EOL" do
-        parse("h#{lvl}. xxx\ryyy").should == "<h#{lvl} id=\"xxx\">xxx</h#{lvl}><br />yyy"
-        parse("h#{lvl}. xxx\r").should == "<h#{lvl} id=\"xxx\">xxx</h#{lvl}>"
+        parse("h#{lvl}. xxx\ryyy").should == "<h#{lvl} id=\"h-xxx\">xxx</h#{lvl}><br />yyy"
+        parse("h#{lvl}. xxx\r").should == "<h#{lvl} id=\"h-xxx\">xxx</h#{lvl}>"
         parse("h#{lvl}. xxx\r yyy").sub(' yyy','yyy').should ==
-          "<h#{lvl} id=\"xxx\">xxx</h#{lvl}><br />yyy"
+          "<h#{lvl} id=\"h-xxx\">xxx</h#{lvl}><br />yyy"
       end
     end
   end
@@ -740,14 +804,15 @@ describe 'BreakoutParser' do
     a = {}
     a["wiki:Name"]  = '<a class="wiki_link" title="Name" href="/wiki/show/test_space/Name">Name</a>'
     a["Name"]       = '<a class="wiki_link" title="Name" href="/wiki/show/test_space/Name">Name</a>'
-    a["Name#Ref"]   = '<a class="wiki_link" title="Name#Ref" href="/wiki/show/test_space/Name#Ref">Name#Ref</a>'
-    a["#Ref"]       = '<a href="#Ref" title="#Ref" class="wiki_link">#Ref</a>'
-    a["#привет"]    = %Q|<a href="##{hex_string("привет")}" title="#привет" class="wiki_link">#привет</a>|
-    a["#with  spc"] = %Q|<a href="#with__spc" title="#with  spc" class="wiki_link">#with  spc</a>|
-    a["#with__usc"] = %Q|<a href="#with__usc" title="#with__usc" class="wiki_link">#with__usc</a>|
-    a["#with--dsh"] = %Q|<a href="#with--dsh" title="#with--dsh" class="wiki_link">#with--dsh</a>|
-    a["#with!xclm"] = %Q|<a href="##{hex_string("with!xclm")}" title="#with!xclm" class="wiki_link">#with!xclm</a>|
-    a["#with&amp"]  = %Q|<a href="##{hex_string("with&amp")}" title="#with&amp" class="wiki_link">#with&amp;amp</a>|
+    a["Name#Ref"]   = '<a class="wiki_link" title="Name#Ref" href="/wiki/show/test_space/Name#h-Ref">Name#Ref</a>'
+    a["Name#h-Ref"]   = '<a class="wiki_link" title="Name#h-Ref" href="/wiki/show/test_space/Name#h-h-Ref">Name#h-Ref</a>'
+    a["#Ref"]       = '<a href="#h-Ref" title="#Ref" class="wiki_link">#Ref</a>'
+    a["#привет"]    = %Q|<a href="#h-#{hex_string("привет")}" title="#привет" class="wiki_link">#привет</a>|
+    a["#with  spc"] = %Q|<a href="#h-with__spc" title="#with  spc" class="wiki_link">#with  spc</a>|
+    a["#with__usc"] = %Q|<a href="#h-with__usc" title="#with__usc" class="wiki_link">#with__usc</a>|
+    a["#with--dsh"] = %Q|<a href="#h-with--dsh" title="#with--dsh" class="wiki_link">#with--dsh</a>|
+    a["#with!xclm"] = %Q|<a href="#h-#{hex_string("with!xclm")}" title="#with!xclm" class="wiki_link">#with!xclm</a>|
+    a["#with&amp"]  = %Q|<a href="#h-#{hex_string("with&amp")}" title="#with&amp" class="wiki_link">#with&amp;amp</a>|
     a["ticket:234"] = '<a href="/spaces/test_space/tickets/234">#234</a>'
     a["revision:1f4bdab77be696efd"] =

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: breakout_parser
 version: !ruby/object:Gem::Version
-  version: 0.0.12
+  version: 0.0.13
 platform: ruby
 authors:
 - Andrey "Zed" Zaikin
@@ -9,7 +9,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2010-04-01 00:00:00 +06:00
+date: 2010-04-27 00:00:00 +06:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency