breakout_parser 0.0.12 → 0.0.13

Sign up to get free protection for your applications and to get access to all the features.
@@ -44,32 +44,32 @@
44
44
  ITALIC_START = 260,
45
45
  BOLD_ITALIC_START = 261,
46
46
  ITALIC_BOLD_START = 262,
47
- T_WORD = 263,
48
- TICKET_LINK = 264,
49
- LINK = 265,
50
- SVN_REVISION_LINK = 266,
51
- GIT_REVISION_LINK = 267,
52
- WIKI_LINK = 268,
53
- ANCHOR_LINK = 269,
54
- SVN_N_REVISION_LINK = 270,
55
- GIT_N_REVISION_LINK = 271,
56
- URL_WITH_PROTO_LINK = 272,
57
- URL_WITHOUT_PROTO_LINK = 273,
58
- FILE_LINK = 274,
59
- IMAGE_LINK = 275,
60
- URL = 276,
61
- EMAIL = 277,
62
- UL = 278,
63
- H1 = 279,
64
- H2 = 280,
65
- H3 = 281,
66
- H4 = 282,
67
- H5 = 283,
68
- INLINE_CODE = 284,
69
- SPACE = 285,
70
- BR = 286,
71
- OLI = 287,
72
- ULI = 288,
47
+ ULI = 263,
48
+ OLI = 264,
49
+ T_WORD = 265,
50
+ TICKET_LINK = 266,
51
+ LINK = 267,
52
+ SVN_REVISION_LINK = 268,
53
+ GIT_REVISION_LINK = 269,
54
+ WIKI_LINK = 270,
55
+ ANCHOR_LINK = 271,
56
+ SVN_N_REVISION_LINK = 272,
57
+ GIT_N_REVISION_LINK = 273,
58
+ URL_WITH_PROTO_LINK = 274,
59
+ URL_WITHOUT_PROTO_LINK = 275,
60
+ FILE_LINK = 276,
61
+ IMAGE_LINK = 277,
62
+ URL = 278,
63
+ EMAIL = 279,
64
+ UL = 280,
65
+ H1 = 281,
66
+ H2 = 282,
67
+ H3 = 283,
68
+ H4 = 284,
69
+ H5 = 285,
70
+ INLINE_CODE = 286,
71
+ SPACE = 287,
72
+ BR = 288,
73
73
  PRE_CODE_START = 289,
74
74
  PRE_CODE_END = 290,
75
75
  PRE_START = 291,
@@ -92,7 +92,7 @@ typedef union YYSTYPE
92
92
  {
93
93
 
94
94
  /* Line 1676 of yacc.c */
95
- #line 66 "parser.y"
95
+ #line 68 "parser.y"
96
96
 
97
97
  double dvalue;
98
98
  int ivalue;
@@ -25,6 +25,8 @@ size_t site_url_len = 0;
25
25
 
26
26
  extern VALUE git_url;
27
27
 
28
+ int list_level = 1;
29
+
28
30
  #define CHECK_BUF_SIZE(len) \
29
31
  if( (bufptr - buf + len + 1) >= bufsize ){ \
30
32
  /*printf("[.] REALLOC oldsz=%d, newsz=%d\n",bufsize, (bufsize+((len > 0x1000) ? (len+0x1000) : 0x1000)));*/ \
@@ -72,6 +74,7 @@ void yyerror(const char *msg)
72
74
 
73
75
  %token <ivalue> T_CHAR BOLD_START ITALIC_START
74
76
  %token <ivalue> BOLD_ITALIC_START ITALIC_BOLD_START
77
+ %token <ivalue> ULI OLI
75
78
  %token <svalue> T_WORD TICKET_LINK LINK SVN_REVISION_LINK GIT_REVISION_LINK WIKI_LINK ANCHOR_LINK
76
79
  %token <svalue> SVN_N_REVISION_LINK GIT_N_REVISION_LINK
77
80
  %token <svalue> URL_WITH_PROTO_LINK URL_WITHOUT_PROTO_LINK
@@ -80,7 +83,7 @@ void yyerror(const char *msg)
80
83
  %token <svalue> UL
81
84
  %token <svalue> H1 H2 H3 H4 H5
82
85
  %token <svalue> INLINE_CODE
83
- %token SPACE BR /*BRBR*/ OLI ULI
86
+ %token SPACE BR /*BRBR*/
84
87
  %token PRE_CODE_START PRE_CODE_END PRE_START PRE_END CODE_START CODE_END
85
88
  %token NOTEXTILE_START NOTEXTILE_END
86
89
  %token BOLD_END ITALIC_END
@@ -101,8 +104,20 @@ textitem: br
101
104
  | h3 {concat("</h3>",5)}
102
105
  | h4 {concat("</h4>",5)}
103
106
  | h5 {concat("</h5>",5)}
104
- | {concat("<ul>",4)} ulist {concat("</ul>",5)} textitem
105
- | {concat("<ol>",4)} olist {concat("</ol>",5)} textitem
107
+ | {
108
+ list_level=1;
109
+ concat("<ul>",4)
110
+ } ulist {
111
+ concat("</ul>",5);
112
+ for(; list_level>1 && list_level<4; list_level--) concat("</li></ul>",10);
113
+ } textitem
114
+ | {
115
+ list_level=1;
116
+ concat("<ol>",4)
117
+ } olist {
118
+ concat("</ol>",5);
119
+ for(; list_level>1 && list_level<4; list_level--) concat("</li></ol>",10);
120
+ } textitem
106
121
  | code
107
122
 
108
123
  ulist: ulitem {concat("</li>",5)}
@@ -157,14 +172,14 @@ char : T_CHAR {concat_escaped_char($1)}
157
172
 
158
173
  //raw_char : T_CHAR {concat_raw_char($1)}
159
174
 
160
- h1 : H1 {concat("<h1 id=\"",8); process_header($1)}
161
- h2 : H2 {concat("<h2 id=\"",8); process_header($1)}
162
- h3 : H3 {concat("<h3 id=\"",8); process_header($1)}
163
- h4 : H4 {concat("<h4 id=\"",8); process_header($1)}
164
- h5 : H5 {concat("<h5 id=\"",8); process_header($1)}
175
+ h1 : H1 {concat("<h1 id=\"h-",10); process_header($1)}
176
+ h2 : H2 {concat("<h2 id=\"h-",10); process_header($1)}
177
+ h3 : H3 {concat("<h3 id=\"h-",10); process_header($1)}
178
+ h4 : H4 {concat("<h4 id=\"h-",10); process_header($1)}
179
+ h5 : H5 {concat("<h5 id=\"h-",10); process_header($1)}
165
180
  //ul : UL {concat("<ul>",4)}
166
- oli : OLI {concat("<li>",4)}
167
- uli : ULI {concat("<li>",4)}
181
+ oli : OLI {process_oli($1)}
182
+ uli : ULI {process_uli($1)}
168
183
  br : BR {concat("<br />",6)}
169
184
  // | BRBR {concat("<br /><br />",12)}
170
185
 
@@ -177,6 +192,36 @@ code : PRE_CODE_START {concat("<pre><code>",11)} chars PRE_CODE_END {concat("</c
177
192
 
178
193
  %%
179
194
 
195
+ process_uli(int level){
196
+ if( level == list_level ){
197
+ concat("<li>",4);
198
+ } else if( level < list_level ){
199
+ list_level = level;
200
+ //unconcat("</li>");
201
+ concat("</ul></li><li>",14);
202
+ } else {
203
+ // if(level > list_level)
204
+ list_level = level;
205
+ unconcat("</li>");
206
+ concat("<ul><li>",8);
207
+ }
208
+ }
209
+
210
+ process_oli(int level){
211
+ if( level == list_level ){
212
+ concat("<li>",4);
213
+ } else if( level < list_level ){
214
+ list_level = level;
215
+ //unconcat("</li>");
216
+ concat("</ol></li><li>",14);
217
+ } else {
218
+ // if(level > list_level)
219
+ list_level = level;
220
+ unconcat("</li>");
221
+ concat("<ol><li>",8);
222
+ }
223
+ }
224
+
180
225
  concat_hex_char(char c){
181
226
  unsigned char d;
182
227
  d = ((unsigned char)c)>>4;
@@ -274,7 +319,7 @@ process_anchor_link(const char*target){
274
319
 
275
320
  if((p = strchr(target,'|')) && (p<pend) && (p>target)) pend = p-1;
276
321
 
277
- concat("<a href=\"#",10);
322
+ concat("<a href=\"#h-",12);
278
323
  if( need_hex_convert(target,pend) ){
279
324
  for(p = target; *p && p<=pend; p++) concat_hex_char( *p );
280
325
  } else {
@@ -362,7 +407,16 @@ process_wiki_link(const char*target){
362
407
  concat("/wiki/show/",11);
363
408
  concat(space_name,space_name_len);
364
409
  concat_raw_char('/');
365
- for(c=target; *c && *c!=']' && *c!='|'; c++) concat_raw_char(*c);
410
+ for(c=target; *c && *c!=']' && *c!='|' && *c!='#'; c++) concat_raw_char(*c);
411
+ if( *c == '#' ){
412
+ concat_raw_char('#');
413
+ //if(memcmp(c, "#h-", 3) != 0){
414
+ // anchor w/o "h-" prefix, we need to add it
415
+ concat_raw_char('h');
416
+ concat_raw_char('-');
417
+ //}
418
+ for(c++; *c && *c!=']' && *c!='|'; c++) concat_raw_char(*c);
419
+ }
366
420
  process_link_tail(target,NULL,NULL);
367
421
  }
368
422
 
@@ -457,6 +511,11 @@ process_email(const char*url){
457
511
  process_link_tail(url,NULL,NULL);
458
512
  }
459
513
 
514
+ unconcat(const char*what){
515
+ int l = strlen(what);
516
+ if( bufptr-buf > l && strncmp(bufptr-l,what,l) == 0 ) bufptr -= l;
517
+ }
518
+
460
519
  revert_bold(){
461
520
  char *p;
462
521
  for( p=bufptr-1; p >= (buf+7) ; p--){
data/spec/parser_spec.rb CHANGED
@@ -11,6 +11,19 @@ describe 'BreakoutParser' do
11
11
  end
12
12
  def hex_string s; self.class.hex_string(s); end
13
13
 
14
+ def unformat s
15
+ s.strip.split("\n").map(&:strip).join
16
+ end
17
+
18
+ def get_data fname
19
+ r = File.read(File.dirname(__FILE__) + '/data/' + fname)
20
+ if self.class.description[' numbered list multilevel']
21
+ r.gsub!('*','#')
22
+ r.gsub!('ul>','ol>')
23
+ end
24
+ r
25
+ end
26
+
14
27
  it 'accepts from 2 to 4 arguments' do
15
28
  [0,1,5,6,7,8,9,10].each do |argc|
16
29
  lambda{
@@ -319,6 +332,31 @@ describe 'BreakoutParser' do
319
332
  parse("hello\n\n * a\n * b\n * c\nworld").should ==
320
333
  "hello<br /><br /><ul><li>a</li><li>b</li><li>c</li></ul>world"
321
334
  end
335
+
336
+ describe "multilevel" do
337
+ it "at start" do
338
+ # NOTE: not sure that list1.html is correct enough, but it should render fine
339
+ parse(get_data('list1')).should == unformat(get_data('list1.html'))
340
+ end
341
+
342
+ it "in middle" do
343
+ parse(get_data('list2')).should == unformat(get_data('list2.html'))
344
+ end
345
+
346
+ it "three levels" do
347
+ parse(get_data('list3')).should == unformat(get_data('list3.html'))
348
+ end
349
+
350
+ it "mess - should have matching count of opening and closing tags" do
351
+ r = parse(get_data('list4'))
352
+ r.scan('<ul>').count.should > 0
353
+ r.scan('<ul>').count.should <= r.scan('</ul>').count
354
+ end
355
+
356
+ it "at end" do
357
+ parse(get_data('list5')).should == unformat(get_data('list5.html'))
358
+ end
359
+ end
322
360
  end
323
361
 
324
362
  ###############################################################################
@@ -347,58 +385,84 @@ describe 'BreakoutParser' do
347
385
  parse("hello\n\n # a\n # b\n # c\nworld").should ==
348
386
  "hello<br /><br /><ol><li>a</li><li>b</li><li>c</li></ol>world"
349
387
  end
388
+
389
+ describe "multilevel" do
390
+ it "at start" do
391
+ # NOTE: not sure that list1.html is correct enough, but it should render fine
392
+ parse(get_data('list1')).should == unformat(get_data('list1.html'))
393
+ end
394
+
395
+ it "in middle" do
396
+ parse(get_data('list2')).should == unformat(get_data('list2.html'))
397
+ end
398
+
399
+ it "three levels" do
400
+ parse(get_data('list3')).should == unformat(get_data('list3.html'))
401
+ end
402
+
403
+ it "mess - should have matching count of opening and closing tags" do
404
+ r = parse(unformat(get_data('list4')))
405
+ r.scan('<ol>').count.should > 0
406
+ r.scan('<ol>').count.should <= r.scan('</ol>').count
407
+ end
408
+
409
+ it "at end" do
410
+ parse(get_data('list5')).should == unformat(get_data('list5.html'))
411
+ end
412
+ end
350
413
  end
351
414
 
352
415
  ###############################################################################
416
+ # headers
353
417
 
354
418
  1.upto(5) do |lvl|
355
419
  describe "H#{lvl}" do
356
420
  it "at the beginning" do
357
- parse("h#{lvl}. xxx").should == "<h#{lvl} id=\"xxx\">xxx</h#{lvl}>"
421
+ parse("h#{lvl}. xxx").should == "<h#{lvl} id=\"h-xxx\">xxx</h#{lvl}>"
358
422
  end
359
423
  it "after 1 line of text" do
360
- parse("abcd\nh#{lvl}. xxx").should == "abcd<br /><h#{lvl} id=\"xxx\">xxx</h#{lvl}>"
424
+ parse("abcd\nh#{lvl}. xxx").should == "abcd<br /><h#{lvl} id=\"h-xxx\">xxx</h#{lvl}>"
361
425
  end
362
426
  it "after 2 lines of text" do
363
- parse("abcd\ndefgh\nh#{lvl}. xxx").should == "abcd<br />defgh<br /><h#{lvl} id=\"xxx\">xxx</h#{lvl}>"
427
+ parse("abcd\ndefgh\nh#{lvl}. xxx").should == "abcd<br />defgh<br /><h#{lvl} id=\"h-xxx\">xxx</h#{lvl}>"
364
428
  end
365
429
  it "in middle of other words" do
366
430
  parse("abcd defgh h#{lvl}. xxx yyy").should == "abcd defgh h#{lvl}. xxx yyy"
367
431
  end
368
432
  it "in middle of other lines" do
369
- parse("abcd defgh\nh#{lvl}. xxx\nyyy").should == "abcd defgh<br /><h#{lvl} id=\"xxx\">xxx</h#{lvl}><br />yyy"
433
+ parse("abcd defgh\nh#{lvl}. xxx\nyyy").should == "abcd defgh<br /><h#{lvl} id=\"h-xxx\">xxx</h#{lvl}><br />yyy"
370
434
  end
371
435
 
372
436
  it "converts spaces to underscores in id" do
373
- parse("h#{lvl}. xxx yyy z").should == "<h#{lvl} id=\"xxx___yyy_z\">xxx yyy z</h#{lvl}>"
437
+ parse("h#{lvl}. xxx yyy z").should == "<h#{lvl} id=\"h-xxx___yyy_z\">xxx yyy z</h#{lvl}>"
374
438
  end
375
439
  it "keeps underscores in id" do
376
- parse("h#{lvl}. xxx___yyy_z").should == "<h#{lvl} id=\"xxx___yyy_z\">xxx___yyy_z</h#{lvl}>"
440
+ parse("h#{lvl}. xxx___yyy_z").should == "<h#{lvl} id=\"h-xxx___yyy_z\">xxx___yyy_z</h#{lvl}>"
377
441
  end
378
442
  it "keeps dashes in id" do
379
- parse("h#{lvl}. xxx---yyy-z").should == "<h#{lvl} id=\"xxx---yyy-z\">xxx---yyy-z</h#{lvl}>"
443
+ parse("h#{lvl}. xxx---yyy-z").should == "<h#{lvl} id=\"h-xxx---yyy-z\">xxx---yyy-z</h#{lvl}>"
380
444
  end
381
445
  it "keeps dots in id" do
382
- parse("h#{lvl}. xxx...yyy.z").should == "<h#{lvl} id=\"xxx...yyy.z\">xxx...yyy.z</h#{lvl}>"
446
+ parse("h#{lvl}. xxx...yyy.z").should == "<h#{lvl} id=\"h-xxx...yyy.z\">xxx...yyy.z</h#{lvl}>"
383
447
  end
384
448
 
385
449
  %w'Ъ ъ : ; , привет" \' ! < >'.each do |c|
386
450
  it "converts id to hex if it contains \"#{c}\"" do
387
451
  idhex = hex_string("xxx#{c}yyy")
388
- parse("h#{lvl}. xxx#{c}yyy").should == "<h#{lvl} id=\"#{idhex}\">xxx#{h(c)}yyy</h#{lvl}>"
452
+ parse("h#{lvl}. xxx#{c}yyy").should == "<h#{lvl} id=\"h-#{idhex}\">xxx#{h(c)}yyy</h#{lvl}>"
389
453
  end
390
454
  end
391
455
 
392
456
  it "skips excess spaces" do
393
- parse("h#{lvl}. \t xxx \t ").should == "<h#{lvl} id=\"xxx\">xxx</h#{lvl}>"
457
+ parse("h#{lvl}. \t xxx \t ").should == "<h#{lvl} id=\"h-xxx\">xxx</h#{lvl}>"
394
458
  end
395
459
 
396
460
  it "thinks that \\r is EOL" do
397
- parse("h#{lvl}. xxx\ryyy").should == "<h#{lvl} id=\"xxx\">xxx</h#{lvl}><br />yyy"
398
- parse("h#{lvl}. xxx\r").should == "<h#{lvl} id=\"xxx\">xxx</h#{lvl}>"
461
+ parse("h#{lvl}. xxx\ryyy").should == "<h#{lvl} id=\"h-xxx\">xxx</h#{lvl}><br />yyy"
462
+ parse("h#{lvl}. xxx\r").should == "<h#{lvl} id=\"h-xxx\">xxx</h#{lvl}>"
399
463
 
400
464
  parse("h#{lvl}. xxx\r yyy").sub(' yyy','yyy').should ==
401
- "<h#{lvl} id=\"xxx\">xxx</h#{lvl}><br />yyy"
465
+ "<h#{lvl} id=\"h-xxx\">xxx</h#{lvl}><br />yyy"
402
466
  end
403
467
  end
404
468
  end
@@ -740,14 +804,15 @@ describe 'BreakoutParser' do
740
804
  a = {}
741
805
  a["wiki:Name"] = '<a class="wiki_link" title="Name" href="/wiki/show/test_space/Name">Name</a>'
742
806
  a["Name"] = '<a class="wiki_link" title="Name" href="/wiki/show/test_space/Name">Name</a>'
743
- a["Name#Ref"] = '<a class="wiki_link" title="Name#Ref" href="/wiki/show/test_space/Name#Ref">Name#Ref</a>'
744
- a["#Ref"] = '<a href="#Ref" title="#Ref" class="wiki_link">#Ref</a>'
745
- a["#привет"] = %Q|<a href="##{hex_string("привет")}" title="#привет" class="wiki_link">#привет</a>|
746
- a["#with spc"] = %Q|<a href="#with__spc" title="#with spc" class="wiki_link">#with spc</a>|
747
- a["#with__usc"] = %Q|<a href="#with__usc" title="#with__usc" class="wiki_link">#with__usc</a>|
748
- a["#with--dsh"] = %Q|<a href="#with--dsh" title="#with--dsh" class="wiki_link">#with--dsh</a>|
749
- a["#with!xclm"] = %Q|<a href="##{hex_string("with!xclm")}" title="#with!xclm" class="wiki_link">#with!xclm</a>|
750
- a["#with&amp"] = %Q|<a href="##{hex_string("with&amp")}" title="#with&amp" class="wiki_link">#with&amp;amp</a>|
807
+ a["Name#Ref"] = '<a class="wiki_link" title="Name#Ref" href="/wiki/show/test_space/Name#h-Ref">Name#Ref</a>'
808
+ a["Name#h-Ref"] = '<a class="wiki_link" title="Name#h-Ref" href="/wiki/show/test_space/Name#h-h-Ref">Name#h-Ref</a>'
809
+ a["#Ref"] = '<a href="#h-Ref" title="#Ref" class="wiki_link">#Ref</a>'
810
+ a["#привет"] = %Q|<a href="#h-#{hex_string("привет")}" title="#привет" class="wiki_link">#привет</a>|
811
+ a["#with spc"] = %Q|<a href="#h-with__spc" title="#with spc" class="wiki_link">#with spc</a>|
812
+ a["#with__usc"] = %Q|<a href="#h-with__usc" title="#with__usc" class="wiki_link">#with__usc</a>|
813
+ a["#with--dsh"] = %Q|<a href="#h-with--dsh" title="#with--dsh" class="wiki_link">#with--dsh</a>|
814
+ a["#with!xclm"] = %Q|<a href="#h-#{hex_string("with!xclm")}" title="#with!xclm" class="wiki_link">#with!xclm</a>|
815
+ a["#with&amp"] = %Q|<a href="#h-#{hex_string("with&amp")}" title="#with&amp" class="wiki_link">#with&amp;amp</a>|
751
816
 
752
817
  a["ticket:234"] = '<a href="/spaces/test_space/tickets/234">#234</a>'
753
818
  a["revision:1f4bdab77be696efd"] =
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: breakout_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.12
4
+ version: 0.0.13
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrey "Zed" Zaikin
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2010-04-01 00:00:00 +06:00
12
+ date: 2010-04-27 00:00:00 +06:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency