breakout_parser 0.0.12 → 0.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -44,32 +44,32 @@
44
44
  ITALIC_START = 260,
45
45
  BOLD_ITALIC_START = 261,
46
46
  ITALIC_BOLD_START = 262,
47
- T_WORD = 263,
48
- TICKET_LINK = 264,
49
- LINK = 265,
50
- SVN_REVISION_LINK = 266,
51
- GIT_REVISION_LINK = 267,
52
- WIKI_LINK = 268,
53
- ANCHOR_LINK = 269,
54
- SVN_N_REVISION_LINK = 270,
55
- GIT_N_REVISION_LINK = 271,
56
- URL_WITH_PROTO_LINK = 272,
57
- URL_WITHOUT_PROTO_LINK = 273,
58
- FILE_LINK = 274,
59
- IMAGE_LINK = 275,
60
- URL = 276,
61
- EMAIL = 277,
62
- UL = 278,
63
- H1 = 279,
64
- H2 = 280,
65
- H3 = 281,
66
- H4 = 282,
67
- H5 = 283,
68
- INLINE_CODE = 284,
69
- SPACE = 285,
70
- BR = 286,
71
- OLI = 287,
72
- ULI = 288,
47
+ ULI = 263,
48
+ OLI = 264,
49
+ T_WORD = 265,
50
+ TICKET_LINK = 266,
51
+ LINK = 267,
52
+ SVN_REVISION_LINK = 268,
53
+ GIT_REVISION_LINK = 269,
54
+ WIKI_LINK = 270,
55
+ ANCHOR_LINK = 271,
56
+ SVN_N_REVISION_LINK = 272,
57
+ GIT_N_REVISION_LINK = 273,
58
+ URL_WITH_PROTO_LINK = 274,
59
+ URL_WITHOUT_PROTO_LINK = 275,
60
+ FILE_LINK = 276,
61
+ IMAGE_LINK = 277,
62
+ URL = 278,
63
+ EMAIL = 279,
64
+ UL = 280,
65
+ H1 = 281,
66
+ H2 = 282,
67
+ H3 = 283,
68
+ H4 = 284,
69
+ H5 = 285,
70
+ INLINE_CODE = 286,
71
+ SPACE = 287,
72
+ BR = 288,
73
73
  PRE_CODE_START = 289,
74
74
  PRE_CODE_END = 290,
75
75
  PRE_START = 291,
@@ -92,7 +92,7 @@ typedef union YYSTYPE
92
92
  {
93
93
 
94
94
  /* Line 1676 of yacc.c */
95
- #line 66 "parser.y"
95
+ #line 68 "parser.y"
96
96
 
97
97
  double dvalue;
98
98
  int ivalue;
@@ -25,6 +25,8 @@ size_t site_url_len = 0;
25
25
 
26
26
  extern VALUE git_url;
27
27
 
28
+ int list_level = 1;
29
+
28
30
  #define CHECK_BUF_SIZE(len) \
29
31
  if( (bufptr - buf + len + 1) >= bufsize ){ \
30
32
  /*printf("[.] REALLOC oldsz=%d, newsz=%d\n",bufsize, (bufsize+((len > 0x1000) ? (len+0x1000) : 0x1000)));*/ \
@@ -72,6 +74,7 @@ void yyerror(const char *msg)
72
74
 
73
75
  %token <ivalue> T_CHAR BOLD_START ITALIC_START
74
76
  %token <ivalue> BOLD_ITALIC_START ITALIC_BOLD_START
77
+ %token <ivalue> ULI OLI
75
78
  %token <svalue> T_WORD TICKET_LINK LINK SVN_REVISION_LINK GIT_REVISION_LINK WIKI_LINK ANCHOR_LINK
76
79
  %token <svalue> SVN_N_REVISION_LINK GIT_N_REVISION_LINK
77
80
  %token <svalue> URL_WITH_PROTO_LINK URL_WITHOUT_PROTO_LINK
@@ -80,7 +83,7 @@ void yyerror(const char *msg)
80
83
  %token <svalue> UL
81
84
  %token <svalue> H1 H2 H3 H4 H5
82
85
  %token <svalue> INLINE_CODE
83
- %token SPACE BR /*BRBR*/ OLI ULI
86
+ %token SPACE BR /*BRBR*/
84
87
  %token PRE_CODE_START PRE_CODE_END PRE_START PRE_END CODE_START CODE_END
85
88
  %token NOTEXTILE_START NOTEXTILE_END
86
89
  %token BOLD_END ITALIC_END
@@ -101,8 +104,20 @@ textitem: br
101
104
  | h3 {concat("</h3>",5)}
102
105
  | h4 {concat("</h4>",5)}
103
106
  | h5 {concat("</h5>",5)}
104
- | {concat("<ul>",4)} ulist {concat("</ul>",5)} textitem
105
- | {concat("<ol>",4)} olist {concat("</ol>",5)} textitem
107
+ | {
108
+ list_level=1;
109
+ concat("<ul>",4)
110
+ } ulist {
111
+ concat("</ul>",5);
112
+ for(; list_level>1 && list_level<4; list_level--) concat("</li></ul>",10);
113
+ } textitem
114
+ | {
115
+ list_level=1;
116
+ concat("<ol>",4)
117
+ } olist {
118
+ concat("</ol>",5);
119
+ for(; list_level>1 && list_level<4; list_level--) concat("</li></ol>",10);
120
+ } textitem
106
121
  | code
107
122
 
108
123
  ulist: ulitem {concat("</li>",5)}
@@ -157,14 +172,14 @@ char : T_CHAR {concat_escaped_char($1)}
157
172
 
158
173
  //raw_char : T_CHAR {concat_raw_char($1)}
159
174
 
160
- h1 : H1 {concat("<h1 id=\"",8); process_header($1)}
161
- h2 : H2 {concat("<h2 id=\"",8); process_header($1)}
162
- h3 : H3 {concat("<h3 id=\"",8); process_header($1)}
163
- h4 : H4 {concat("<h4 id=\"",8); process_header($1)}
164
- h5 : H5 {concat("<h5 id=\"",8); process_header($1)}
175
+ h1 : H1 {concat("<h1 id=\"h-",10); process_header($1)}
176
+ h2 : H2 {concat("<h2 id=\"h-",10); process_header($1)}
177
+ h3 : H3 {concat("<h3 id=\"h-",10); process_header($1)}
178
+ h4 : H4 {concat("<h4 id=\"h-",10); process_header($1)}
179
+ h5 : H5 {concat("<h5 id=\"h-",10); process_header($1)}
165
180
  //ul : UL {concat("<ul>",4)}
166
- oli : OLI {concat("<li>",4)}
167
- uli : ULI {concat("<li>",4)}
181
+ oli : OLI {process_oli($1)}
182
+ uli : ULI {process_uli($1)}
168
183
  br : BR {concat("<br />",6)}
169
184
  // | BRBR {concat("<br /><br />",12)}
170
185
 
@@ -177,6 +192,36 @@ code : PRE_CODE_START {concat("<pre><code>",11)} chars PRE_CODE_END {concat("</c
177
192
 
178
193
  %%
179
194
 
195
+ process_uli(int level){
196
+ if( level == list_level ){
197
+ concat("<li>",4);
198
+ } else if( level < list_level ){
199
+ list_level = level;
200
+ //unconcat("</li>");
201
+ concat("</ul></li><li>",14);
202
+ } else {
203
+ // if(level > list_level)
204
+ list_level = level;
205
+ unconcat("</li>");
206
+ concat("<ul><li>",8);
207
+ }
208
+ }
209
+
210
+ process_oli(int level){
211
+ if( level == list_level ){
212
+ concat("<li>",4);
213
+ } else if( level < list_level ){
214
+ list_level = level;
215
+ //unconcat("</li>");
216
+ concat("</ol></li><li>",14);
217
+ } else {
218
+ // if(level > list_level)
219
+ list_level = level;
220
+ unconcat("</li>");
221
+ concat("<ol><li>",8);
222
+ }
223
+ }
224
+
180
225
  concat_hex_char(char c){
181
226
  unsigned char d;
182
227
  d = ((unsigned char)c)>>4;
@@ -274,7 +319,7 @@ process_anchor_link(const char*target){
274
319
 
275
320
  if((p = strchr(target,'|')) && (p<pend) && (p>target)) pend = p-1;
276
321
 
277
- concat("<a href=\"#",10);
322
+ concat("<a href=\"#h-",12);
278
323
  if( need_hex_convert(target,pend) ){
279
324
  for(p = target; *p && p<=pend; p++) concat_hex_char( *p );
280
325
  } else {
@@ -362,7 +407,16 @@ process_wiki_link(const char*target){
362
407
  concat("/wiki/show/",11);
363
408
  concat(space_name,space_name_len);
364
409
  concat_raw_char('/');
365
- for(c=target; *c && *c!=']' && *c!='|'; c++) concat_raw_char(*c);
410
+ for(c=target; *c && *c!=']' && *c!='|' && *c!='#'; c++) concat_raw_char(*c);
411
+ if( *c == '#' ){
412
+ concat_raw_char('#');
413
+ //if(memcmp(c, "#h-", 3) != 0){
414
+ // anchor w/o "h-" prefix, we need to add it
415
+ concat_raw_char('h');
416
+ concat_raw_char('-');
417
+ //}
418
+ for(c++; *c && *c!=']' && *c!='|'; c++) concat_raw_char(*c);
419
+ }
366
420
  process_link_tail(target,NULL,NULL);
367
421
  }
368
422
 
@@ -457,6 +511,11 @@ process_email(const char*url){
457
511
  process_link_tail(url,NULL,NULL);
458
512
  }
459
513
 
514
+ unconcat(const char*what){
515
+ int l = strlen(what);
516
+ if( bufptr-buf > l && strncmp(bufptr-l,what,l) == 0 ) bufptr -= l;
517
+ }
518
+
460
519
  revert_bold(){
461
520
  char *p;
462
521
  for( p=bufptr-1; p >= (buf+7) ; p--){
data/spec/parser_spec.rb CHANGED
@@ -11,6 +11,19 @@ describe 'BreakoutParser' do
11
11
  end
12
12
  def hex_string s; self.class.hex_string(s); end
13
13
 
14
+ def unformat s
15
+ s.strip.split("\n").map(&:strip).join
16
+ end
17
+
18
+ def get_data fname
19
+ r = File.read(File.dirname(__FILE__) + '/data/' + fname)
20
+ if self.class.description[' numbered list multilevel']
21
+ r.gsub!('*','#')
22
+ r.gsub!('ul>','ol>')
23
+ end
24
+ r
25
+ end
26
+
14
27
  it 'accepts from 2 to 4 arguments' do
15
28
  [0,1,5,6,7,8,9,10].each do |argc|
16
29
  lambda{
@@ -319,6 +332,31 @@ describe 'BreakoutParser' do
319
332
  parse("hello\n\n * a\n * b\n * c\nworld").should ==
320
333
  "hello<br /><br /><ul><li>a</li><li>b</li><li>c</li></ul>world"
321
334
  end
335
+
336
+ describe "multilevel" do
337
+ it "at start" do
338
+ # NOTE: not sure that list1.html is correct enough, but it should render fine
339
+ parse(get_data('list1')).should == unformat(get_data('list1.html'))
340
+ end
341
+
342
+ it "in middle" do
343
+ parse(get_data('list2')).should == unformat(get_data('list2.html'))
344
+ end
345
+
346
+ it "three levels" do
347
+ parse(get_data('list3')).should == unformat(get_data('list3.html'))
348
+ end
349
+
350
+ it "mess - should have matching count of opening and closing tags" do
351
+ r = parse(get_data('list4'))
352
+ r.scan('<ul>').count.should > 0
353
+ r.scan('<ul>').count.should <= r.scan('</ul>').count
354
+ end
355
+
356
+ it "at end" do
357
+ parse(get_data('list5')).should == unformat(get_data('list5.html'))
358
+ end
359
+ end
322
360
  end
323
361
 
324
362
  ###############################################################################
@@ -347,58 +385,84 @@ describe 'BreakoutParser' do
347
385
  parse("hello\n\n # a\n # b\n # c\nworld").should ==
348
386
  "hello<br /><br /><ol><li>a</li><li>b</li><li>c</li></ol>world"
349
387
  end
388
+
389
+ describe "multilevel" do
390
+ it "at start" do
391
+ # NOTE: not sure that list1.html is correct enough, but it should render fine
392
+ parse(get_data('list1')).should == unformat(get_data('list1.html'))
393
+ end
394
+
395
+ it "in middle" do
396
+ parse(get_data('list2')).should == unformat(get_data('list2.html'))
397
+ end
398
+
399
+ it "three levels" do
400
+ parse(get_data('list3')).should == unformat(get_data('list3.html'))
401
+ end
402
+
403
+ it "mess - should have matching count of opening and closing tags" do
404
+ r = parse(unformat(get_data('list4')))
405
+ r.scan('<ol>').count.should > 0
406
+ r.scan('<ol>').count.should <= r.scan('</ol>').count
407
+ end
408
+
409
+ it "at end" do
410
+ parse(get_data('list5')).should == unformat(get_data('list5.html'))
411
+ end
412
+ end
350
413
  end
351
414
 
352
415
  ###############################################################################
416
+ # headers
353
417
 
354
418
  1.upto(5) do |lvl|
355
419
  describe "H#{lvl}" do
356
420
  it "at the beginning" do
357
- parse("h#{lvl}. xxx").should == "<h#{lvl} id=\"xxx\">xxx</h#{lvl}>"
421
+ parse("h#{lvl}. xxx").should == "<h#{lvl} id=\"h-xxx\">xxx</h#{lvl}>"
358
422
  end
359
423
  it "after 1 line of text" do
360
- parse("abcd\nh#{lvl}. xxx").should == "abcd<br /><h#{lvl} id=\"xxx\">xxx</h#{lvl}>"
424
+ parse("abcd\nh#{lvl}. xxx").should == "abcd<br /><h#{lvl} id=\"h-xxx\">xxx</h#{lvl}>"
361
425
  end
362
426
  it "after 2 lines of text" do
363
- parse("abcd\ndefgh\nh#{lvl}. xxx").should == "abcd<br />defgh<br /><h#{lvl} id=\"xxx\">xxx</h#{lvl}>"
427
+ parse("abcd\ndefgh\nh#{lvl}. xxx").should == "abcd<br />defgh<br /><h#{lvl} id=\"h-xxx\">xxx</h#{lvl}>"
364
428
  end
365
429
  it "in middle of other words" do
366
430
  parse("abcd defgh h#{lvl}. xxx yyy").should == "abcd defgh h#{lvl}. xxx yyy"
367
431
  end
368
432
  it "in middle of other lines" do
369
- parse("abcd defgh\nh#{lvl}. xxx\nyyy").should == "abcd defgh<br /><h#{lvl} id=\"xxx\">xxx</h#{lvl}><br />yyy"
433
+ parse("abcd defgh\nh#{lvl}. xxx\nyyy").should == "abcd defgh<br /><h#{lvl} id=\"h-xxx\">xxx</h#{lvl}><br />yyy"
370
434
  end
371
435
 
372
436
  it "converts spaces to underscores in id" do
373
- parse("h#{lvl}. xxx yyy z").should == "<h#{lvl} id=\"xxx___yyy_z\">xxx yyy z</h#{lvl}>"
437
+ parse("h#{lvl}. xxx yyy z").should == "<h#{lvl} id=\"h-xxx___yyy_z\">xxx yyy z</h#{lvl}>"
374
438
  end
375
439
  it "keeps underscores in id" do
376
- parse("h#{lvl}. xxx___yyy_z").should == "<h#{lvl} id=\"xxx___yyy_z\">xxx___yyy_z</h#{lvl}>"
440
+ parse("h#{lvl}. xxx___yyy_z").should == "<h#{lvl} id=\"h-xxx___yyy_z\">xxx___yyy_z</h#{lvl}>"
377
441
  end
378
442
  it "keeps dashes in id" do
379
- parse("h#{lvl}. xxx---yyy-z").should == "<h#{lvl} id=\"xxx---yyy-z\">xxx---yyy-z</h#{lvl}>"
443
+ parse("h#{lvl}. xxx---yyy-z").should == "<h#{lvl} id=\"h-xxx---yyy-z\">xxx---yyy-z</h#{lvl}>"
380
444
  end
381
445
  it "keeps dots in id" do
382
- parse("h#{lvl}. xxx...yyy.z").should == "<h#{lvl} id=\"xxx...yyy.z\">xxx...yyy.z</h#{lvl}>"
446
+ parse("h#{lvl}. xxx...yyy.z").should == "<h#{lvl} id=\"h-xxx...yyy.z\">xxx...yyy.z</h#{lvl}>"
383
447
  end
384
448
 
385
449
  %w'Ъ ъ : ; , привет" \' ! < >'.each do |c|
386
450
  it "converts id to hex if it contains \"#{c}\"" do
387
451
  idhex = hex_string("xxx#{c}yyy")
388
- parse("h#{lvl}. xxx#{c}yyy").should == "<h#{lvl} id=\"#{idhex}\">xxx#{h(c)}yyy</h#{lvl}>"
452
+ parse("h#{lvl}. xxx#{c}yyy").should == "<h#{lvl} id=\"h-#{idhex}\">xxx#{h(c)}yyy</h#{lvl}>"
389
453
  end
390
454
  end
391
455
 
392
456
  it "skips excess spaces" do
393
- parse("h#{lvl}. \t xxx \t ").should == "<h#{lvl} id=\"xxx\">xxx</h#{lvl}>"
457
+ parse("h#{lvl}. \t xxx \t ").should == "<h#{lvl} id=\"h-xxx\">xxx</h#{lvl}>"
394
458
  end
395
459
 
396
460
  it "thinks that \\r is EOL" do
397
- parse("h#{lvl}. xxx\ryyy").should == "<h#{lvl} id=\"xxx\">xxx</h#{lvl}><br />yyy"
398
- parse("h#{lvl}. xxx\r").should == "<h#{lvl} id=\"xxx\">xxx</h#{lvl}>"
461
+ parse("h#{lvl}. xxx\ryyy").should == "<h#{lvl} id=\"h-xxx\">xxx</h#{lvl}><br />yyy"
462
+ parse("h#{lvl}. xxx\r").should == "<h#{lvl} id=\"h-xxx\">xxx</h#{lvl}>"
399
463
 
400
464
  parse("h#{lvl}. xxx\r yyy").sub(' yyy','yyy').should ==
401
- "<h#{lvl} id=\"xxx\">xxx</h#{lvl}><br />yyy"
465
+ "<h#{lvl} id=\"h-xxx\">xxx</h#{lvl}><br />yyy"
402
466
  end
403
467
  end
404
468
  end
@@ -740,14 +804,15 @@ describe 'BreakoutParser' do
740
804
  a = {}
741
805
  a["wiki:Name"] = '<a class="wiki_link" title="Name" href="/wiki/show/test_space/Name">Name</a>'
742
806
  a["Name"] = '<a class="wiki_link" title="Name" href="/wiki/show/test_space/Name">Name</a>'
743
- a["Name#Ref"] = '<a class="wiki_link" title="Name#Ref" href="/wiki/show/test_space/Name#Ref">Name#Ref</a>'
744
- a["#Ref"] = '<a href="#Ref" title="#Ref" class="wiki_link">#Ref</a>'
745
- a["#привет"] = %Q|<a href="##{hex_string("привет")}" title="#привет" class="wiki_link">#привет</a>|
746
- a["#with spc"] = %Q|<a href="#with__spc" title="#with spc" class="wiki_link">#with spc</a>|
747
- a["#with__usc"] = %Q|<a href="#with__usc" title="#with__usc" class="wiki_link">#with__usc</a>|
748
- a["#with--dsh"] = %Q|<a href="#with--dsh" title="#with--dsh" class="wiki_link">#with--dsh</a>|
749
- a["#with!xclm"] = %Q|<a href="##{hex_string("with!xclm")}" title="#with!xclm" class="wiki_link">#with!xclm</a>|
750
- a["#with&amp"] = %Q|<a href="##{hex_string("with&amp")}" title="#with&amp" class="wiki_link">#with&amp;amp</a>|
807
+ a["Name#Ref"] = '<a class="wiki_link" title="Name#Ref" href="/wiki/show/test_space/Name#h-Ref">Name#Ref</a>'
808
+ a["Name#h-Ref"] = '<a class="wiki_link" title="Name#h-Ref" href="/wiki/show/test_space/Name#h-h-Ref">Name#h-Ref</a>'
809
+ a["#Ref"] = '<a href="#h-Ref" title="#Ref" class="wiki_link">#Ref</a>'
810
+ a["#привет"] = %Q|<a href="#h-#{hex_string("привет")}" title="#привет" class="wiki_link">#привет</a>|
811
+ a["#with spc"] = %Q|<a href="#h-with__spc" title="#with spc" class="wiki_link">#with spc</a>|
812
+ a["#with__usc"] = %Q|<a href="#h-with__usc" title="#with__usc" class="wiki_link">#with__usc</a>|
813
+ a["#with--dsh"] = %Q|<a href="#h-with--dsh" title="#with--dsh" class="wiki_link">#with--dsh</a>|
814
+ a["#with!xclm"] = %Q|<a href="#h-#{hex_string("with!xclm")}" title="#with!xclm" class="wiki_link">#with!xclm</a>|
815
+ a["#with&amp"] = %Q|<a href="#h-#{hex_string("with&amp")}" title="#with&amp" class="wiki_link">#with&amp;amp</a>|
751
816
 
752
817
  a["ticket:234"] = '<a href="/spaces/test_space/tickets/234">#234</a>'
753
818
  a["revision:1f4bdab77be696efd"] =
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: breakout_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.12
4
+ version: 0.0.13
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrey "Zed" Zaikin
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2010-04-01 00:00:00 +06:00
12
+ date: 2010-04-27 00:00:00 +06:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency