breakout_parser 0.0.12 → 0.0.13
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +7 -0
- data/ext/breakout_parser/lex.yy.c +716 -693
- data/ext/breakout_parser/parser.l +7 -2
- data/ext/breakout_parser/parser.tab.c +229 -171
- data/ext/breakout_parser/parser.tab.h +27 -27
- data/ext/breakout_parser/parser.y +71 -12
- data/spec/parser_spec.rb +86 -21
- metadata +2 -2
@@ -44,32 +44,32 @@
|
|
44
44
|
ITALIC_START = 260,
|
45
45
|
BOLD_ITALIC_START = 261,
|
46
46
|
ITALIC_BOLD_START = 262,
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
47
|
+
ULI = 263,
|
48
|
+
OLI = 264,
|
49
|
+
T_WORD = 265,
|
50
|
+
TICKET_LINK = 266,
|
51
|
+
LINK = 267,
|
52
|
+
SVN_REVISION_LINK = 268,
|
53
|
+
GIT_REVISION_LINK = 269,
|
54
|
+
WIKI_LINK = 270,
|
55
|
+
ANCHOR_LINK = 271,
|
56
|
+
SVN_N_REVISION_LINK = 272,
|
57
|
+
GIT_N_REVISION_LINK = 273,
|
58
|
+
URL_WITH_PROTO_LINK = 274,
|
59
|
+
URL_WITHOUT_PROTO_LINK = 275,
|
60
|
+
FILE_LINK = 276,
|
61
|
+
IMAGE_LINK = 277,
|
62
|
+
URL = 278,
|
63
|
+
EMAIL = 279,
|
64
|
+
UL = 280,
|
65
|
+
H1 = 281,
|
66
|
+
H2 = 282,
|
67
|
+
H3 = 283,
|
68
|
+
H4 = 284,
|
69
|
+
H5 = 285,
|
70
|
+
INLINE_CODE = 286,
|
71
|
+
SPACE = 287,
|
72
|
+
BR = 288,
|
73
73
|
PRE_CODE_START = 289,
|
74
74
|
PRE_CODE_END = 290,
|
75
75
|
PRE_START = 291,
|
@@ -92,7 +92,7 @@ typedef union YYSTYPE
|
|
92
92
|
{
|
93
93
|
|
94
94
|
/* Line 1676 of yacc.c */
|
95
|
-
#line
|
95
|
+
#line 68 "parser.y"
|
96
96
|
|
97
97
|
double dvalue;
|
98
98
|
int ivalue;
|
@@ -25,6 +25,8 @@ size_t site_url_len = 0;
|
|
25
25
|
|
26
26
|
extern VALUE git_url;
|
27
27
|
|
28
|
+
int list_level = 1;
|
29
|
+
|
28
30
|
#define CHECK_BUF_SIZE(len) \
|
29
31
|
if( (bufptr - buf + len + 1) >= bufsize ){ \
|
30
32
|
/*printf("[.] REALLOC oldsz=%d, newsz=%d\n",bufsize, (bufsize+((len > 0x1000) ? (len+0x1000) : 0x1000)));*/ \
|
@@ -72,6 +74,7 @@ void yyerror(const char *msg)
|
|
72
74
|
|
73
75
|
%token <ivalue> T_CHAR BOLD_START ITALIC_START
|
74
76
|
%token <ivalue> BOLD_ITALIC_START ITALIC_BOLD_START
|
77
|
+
%token <ivalue> ULI OLI
|
75
78
|
%token <svalue> T_WORD TICKET_LINK LINK SVN_REVISION_LINK GIT_REVISION_LINK WIKI_LINK ANCHOR_LINK
|
76
79
|
%token <svalue> SVN_N_REVISION_LINK GIT_N_REVISION_LINK
|
77
80
|
%token <svalue> URL_WITH_PROTO_LINK URL_WITHOUT_PROTO_LINK
|
@@ -80,7 +83,7 @@ void yyerror(const char *msg)
|
|
80
83
|
%token <svalue> UL
|
81
84
|
%token <svalue> H1 H2 H3 H4 H5
|
82
85
|
%token <svalue> INLINE_CODE
|
83
|
-
%token SPACE BR /*BRBR*/
|
86
|
+
%token SPACE BR /*BRBR*/
|
84
87
|
%token PRE_CODE_START PRE_CODE_END PRE_START PRE_END CODE_START CODE_END
|
85
88
|
%token NOTEXTILE_START NOTEXTILE_END
|
86
89
|
%token BOLD_END ITALIC_END
|
@@ -101,8 +104,20 @@ textitem: br
|
|
101
104
|
| h3 {concat("</h3>",5)}
|
102
105
|
| h4 {concat("</h4>",5)}
|
103
106
|
| h5 {concat("</h5>",5)}
|
104
|
-
| {
|
105
|
-
|
107
|
+
| {
|
108
|
+
list_level=1;
|
109
|
+
concat("<ul>",4)
|
110
|
+
} ulist {
|
111
|
+
concat("</ul>",5);
|
112
|
+
for(; list_level>1 && list_level<4; list_level--) concat("</li></ul>",10);
|
113
|
+
} textitem
|
114
|
+
| {
|
115
|
+
list_level=1;
|
116
|
+
concat("<ol>",4)
|
117
|
+
} olist {
|
118
|
+
concat("</ol>",5);
|
119
|
+
for(; list_level>1 && list_level<4; list_level--) concat("</li></ol>",10);
|
120
|
+
} textitem
|
106
121
|
| code
|
107
122
|
|
108
123
|
ulist: ulitem {concat("</li>",5)}
|
@@ -157,14 +172,14 @@ char : T_CHAR {concat_escaped_char($1)}
|
|
157
172
|
|
158
173
|
//raw_char : T_CHAR {concat_raw_char($1)}
|
159
174
|
|
160
|
-
h1 : H1 {concat("<h1 id=\"",
|
161
|
-
h2 : H2 {concat("<h2 id=\"",
|
162
|
-
h3 : H3 {concat("<h3 id=\"",
|
163
|
-
h4 : H4 {concat("<h4 id=\"",
|
164
|
-
h5 : H5 {concat("<h5 id=\"",
|
175
|
+
h1 : H1 {concat("<h1 id=\"h-",10); process_header($1)}
|
176
|
+
h2 : H2 {concat("<h2 id=\"h-",10); process_header($1)}
|
177
|
+
h3 : H3 {concat("<h3 id=\"h-",10); process_header($1)}
|
178
|
+
h4 : H4 {concat("<h4 id=\"h-",10); process_header($1)}
|
179
|
+
h5 : H5 {concat("<h5 id=\"h-",10); process_header($1)}
|
165
180
|
//ul : UL {concat("<ul>",4)}
|
166
|
-
oli : OLI {
|
167
|
-
uli : ULI {
|
181
|
+
oli : OLI {process_oli($1)}
|
182
|
+
uli : ULI {process_uli($1)}
|
168
183
|
br : BR {concat("<br />",6)}
|
169
184
|
// | BRBR {concat("<br /><br />",12)}
|
170
185
|
|
@@ -177,6 +192,36 @@ code : PRE_CODE_START {concat("<pre><code>",11)} chars PRE_CODE_END {concat("</c
|
|
177
192
|
|
178
193
|
%%
|
179
194
|
|
195
|
+
process_uli(int level){
|
196
|
+
if( level == list_level ){
|
197
|
+
concat("<li>",4);
|
198
|
+
} else if( level < list_level ){
|
199
|
+
list_level = level;
|
200
|
+
//unconcat("</li>");
|
201
|
+
concat("</ul></li><li>",14);
|
202
|
+
} else {
|
203
|
+
// if(level > list_level)
|
204
|
+
list_level = level;
|
205
|
+
unconcat("</li>");
|
206
|
+
concat("<ul><li>",8);
|
207
|
+
}
|
208
|
+
}
|
209
|
+
|
210
|
+
process_oli(int level){
|
211
|
+
if( level == list_level ){
|
212
|
+
concat("<li>",4);
|
213
|
+
} else if( level < list_level ){
|
214
|
+
list_level = level;
|
215
|
+
//unconcat("</li>");
|
216
|
+
concat("</ol></li><li>",14);
|
217
|
+
} else {
|
218
|
+
// if(level > list_level)
|
219
|
+
list_level = level;
|
220
|
+
unconcat("</li>");
|
221
|
+
concat("<ol><li>",8);
|
222
|
+
}
|
223
|
+
}
|
224
|
+
|
180
225
|
concat_hex_char(char c){
|
181
226
|
unsigned char d;
|
182
227
|
d = ((unsigned char)c)>>4;
|
@@ -274,7 +319,7 @@ process_anchor_link(const char*target){
|
|
274
319
|
|
275
320
|
if((p = strchr(target,'|')) && (p<pend) && (p>target)) pend = p-1;
|
276
321
|
|
277
|
-
concat("<a href=\"#",
|
322
|
+
concat("<a href=\"#h-",12);
|
278
323
|
if( need_hex_convert(target,pend) ){
|
279
324
|
for(p = target; *p && p<=pend; p++) concat_hex_char( *p );
|
280
325
|
} else {
|
@@ -362,7 +407,16 @@ process_wiki_link(const char*target){
|
|
362
407
|
concat("/wiki/show/",11);
|
363
408
|
concat(space_name,space_name_len);
|
364
409
|
concat_raw_char('/');
|
365
|
-
for(c=target; *c && *c!=']' && *c!='|'; c++) concat_raw_char(*c);
|
410
|
+
for(c=target; *c && *c!=']' && *c!='|' && *c!='#'; c++) concat_raw_char(*c);
|
411
|
+
if( *c == '#' ){
|
412
|
+
concat_raw_char('#');
|
413
|
+
//if(memcmp(c, "#h-", 3) != 0){
|
414
|
+
// anchor w/o "h-" prefix, we need to add it
|
415
|
+
concat_raw_char('h');
|
416
|
+
concat_raw_char('-');
|
417
|
+
//}
|
418
|
+
for(c++; *c && *c!=']' && *c!='|'; c++) concat_raw_char(*c);
|
419
|
+
}
|
366
420
|
process_link_tail(target,NULL,NULL);
|
367
421
|
}
|
368
422
|
|
@@ -457,6 +511,11 @@ process_email(const char*url){
|
|
457
511
|
process_link_tail(url,NULL,NULL);
|
458
512
|
}
|
459
513
|
|
514
|
+
unconcat(const char*what){
|
515
|
+
int l = strlen(what);
|
516
|
+
if( bufptr-buf > l && strncmp(bufptr-l,what,l) == 0 ) bufptr -= l;
|
517
|
+
}
|
518
|
+
|
460
519
|
revert_bold(){
|
461
520
|
char *p;
|
462
521
|
for( p=bufptr-1; p >= (buf+7) ; p--){
|
data/spec/parser_spec.rb
CHANGED
@@ -11,6 +11,19 @@ describe 'BreakoutParser' do
|
|
11
11
|
end
|
12
12
|
def hex_string s; self.class.hex_string(s); end
|
13
13
|
|
14
|
+
def unformat s
|
15
|
+
s.strip.split("\n").map(&:strip).join
|
16
|
+
end
|
17
|
+
|
18
|
+
def get_data fname
|
19
|
+
r = File.read(File.dirname(__FILE__) + '/data/' + fname)
|
20
|
+
if self.class.description[' numbered list multilevel']
|
21
|
+
r.gsub!('*','#')
|
22
|
+
r.gsub!('ul>','ol>')
|
23
|
+
end
|
24
|
+
r
|
25
|
+
end
|
26
|
+
|
14
27
|
it 'accepts from 2 to 4 arguments' do
|
15
28
|
[0,1,5,6,7,8,9,10].each do |argc|
|
16
29
|
lambda{
|
@@ -319,6 +332,31 @@ describe 'BreakoutParser' do
|
|
319
332
|
parse("hello\n\n * a\n * b\n * c\nworld").should ==
|
320
333
|
"hello<br /><br /><ul><li>a</li><li>b</li><li>c</li></ul>world"
|
321
334
|
end
|
335
|
+
|
336
|
+
describe "multilevel" do
|
337
|
+
it "at start" do
|
338
|
+
# NOTE: not sure that list1.html is correct enough, but it should render fine
|
339
|
+
parse(get_data('list1')).should == unformat(get_data('list1.html'))
|
340
|
+
end
|
341
|
+
|
342
|
+
it "in middle" do
|
343
|
+
parse(get_data('list2')).should == unformat(get_data('list2.html'))
|
344
|
+
end
|
345
|
+
|
346
|
+
it "three levels" do
|
347
|
+
parse(get_data('list3')).should == unformat(get_data('list3.html'))
|
348
|
+
end
|
349
|
+
|
350
|
+
it "mess - should have matching count of opening and closing tags" do
|
351
|
+
r = parse(get_data('list4'))
|
352
|
+
r.scan('<ul>').count.should > 0
|
353
|
+
r.scan('<ul>').count.should <= r.scan('</ul>').count
|
354
|
+
end
|
355
|
+
|
356
|
+
it "at end" do
|
357
|
+
parse(get_data('list5')).should == unformat(get_data('list5.html'))
|
358
|
+
end
|
359
|
+
end
|
322
360
|
end
|
323
361
|
|
324
362
|
###############################################################################
|
@@ -347,58 +385,84 @@ describe 'BreakoutParser' do
|
|
347
385
|
parse("hello\n\n # a\n # b\n # c\nworld").should ==
|
348
386
|
"hello<br /><br /><ol><li>a</li><li>b</li><li>c</li></ol>world"
|
349
387
|
end
|
388
|
+
|
389
|
+
describe "multilevel" do
|
390
|
+
it "at start" do
|
391
|
+
# NOTE: not sure that list1.html is correct enough, but it should render fine
|
392
|
+
parse(get_data('list1')).should == unformat(get_data('list1.html'))
|
393
|
+
end
|
394
|
+
|
395
|
+
it "in middle" do
|
396
|
+
parse(get_data('list2')).should == unformat(get_data('list2.html'))
|
397
|
+
end
|
398
|
+
|
399
|
+
it "three levels" do
|
400
|
+
parse(get_data('list3')).should == unformat(get_data('list3.html'))
|
401
|
+
end
|
402
|
+
|
403
|
+
it "mess - should have matching count of opening and closing tags" do
|
404
|
+
r = parse(unformat(get_data('list4')))
|
405
|
+
r.scan('<ol>').count.should > 0
|
406
|
+
r.scan('<ol>').count.should <= r.scan('</ol>').count
|
407
|
+
end
|
408
|
+
|
409
|
+
it "at end" do
|
410
|
+
parse(get_data('list5')).should == unformat(get_data('list5.html'))
|
411
|
+
end
|
412
|
+
end
|
350
413
|
end
|
351
414
|
|
352
415
|
###############################################################################
|
416
|
+
# headers
|
353
417
|
|
354
418
|
1.upto(5) do |lvl|
|
355
419
|
describe "H#{lvl}" do
|
356
420
|
it "at the beginning" do
|
357
|
-
parse("h#{lvl}. xxx").should == "<h#{lvl} id=\"xxx\">xxx</h#{lvl}>"
|
421
|
+
parse("h#{lvl}. xxx").should == "<h#{lvl} id=\"h-xxx\">xxx</h#{lvl}>"
|
358
422
|
end
|
359
423
|
it "after 1 line of text" do
|
360
|
-
parse("abcd\nh#{lvl}. xxx").should == "abcd<br /><h#{lvl} id=\"xxx\">xxx</h#{lvl}>"
|
424
|
+
parse("abcd\nh#{lvl}. xxx").should == "abcd<br /><h#{lvl} id=\"h-xxx\">xxx</h#{lvl}>"
|
361
425
|
end
|
362
426
|
it "after 2 lines of text" do
|
363
|
-
parse("abcd\ndefgh\nh#{lvl}. xxx").should == "abcd<br />defgh<br /><h#{lvl} id=\"xxx\">xxx</h#{lvl}>"
|
427
|
+
parse("abcd\ndefgh\nh#{lvl}. xxx").should == "abcd<br />defgh<br /><h#{lvl} id=\"h-xxx\">xxx</h#{lvl}>"
|
364
428
|
end
|
365
429
|
it "in middle of other words" do
|
366
430
|
parse("abcd defgh h#{lvl}. xxx yyy").should == "abcd defgh h#{lvl}. xxx yyy"
|
367
431
|
end
|
368
432
|
it "in middle of other lines" do
|
369
|
-
parse("abcd defgh\nh#{lvl}. xxx\nyyy").should == "abcd defgh<br /><h#{lvl} id=\"xxx\">xxx</h#{lvl}><br />yyy"
|
433
|
+
parse("abcd defgh\nh#{lvl}. xxx\nyyy").should == "abcd defgh<br /><h#{lvl} id=\"h-xxx\">xxx</h#{lvl}><br />yyy"
|
370
434
|
end
|
371
435
|
|
372
436
|
it "converts spaces to underscores in id" do
|
373
|
-
parse("h#{lvl}. xxx yyy z").should == "<h#{lvl} id=\"xxx___yyy_z\">xxx yyy z</h#{lvl}>"
|
437
|
+
parse("h#{lvl}. xxx yyy z").should == "<h#{lvl} id=\"h-xxx___yyy_z\">xxx yyy z</h#{lvl}>"
|
374
438
|
end
|
375
439
|
it "keeps underscores in id" do
|
376
|
-
parse("h#{lvl}. xxx___yyy_z").should == "<h#{lvl} id=\"xxx___yyy_z\">xxx___yyy_z</h#{lvl}>"
|
440
|
+
parse("h#{lvl}. xxx___yyy_z").should == "<h#{lvl} id=\"h-xxx___yyy_z\">xxx___yyy_z</h#{lvl}>"
|
377
441
|
end
|
378
442
|
it "keeps dashes in id" do
|
379
|
-
parse("h#{lvl}. xxx---yyy-z").should == "<h#{lvl} id=\"xxx---yyy-z\">xxx---yyy-z</h#{lvl}>"
|
443
|
+
parse("h#{lvl}. xxx---yyy-z").should == "<h#{lvl} id=\"h-xxx---yyy-z\">xxx---yyy-z</h#{lvl}>"
|
380
444
|
end
|
381
445
|
it "keeps dots in id" do
|
382
|
-
parse("h#{lvl}. xxx...yyy.z").should == "<h#{lvl} id=\"xxx...yyy.z\">xxx...yyy.z</h#{lvl}>"
|
446
|
+
parse("h#{lvl}. xxx...yyy.z").should == "<h#{lvl} id=\"h-xxx...yyy.z\">xxx...yyy.z</h#{lvl}>"
|
383
447
|
end
|
384
448
|
|
385
449
|
%w'Ъ ъ : ; , привет" \' ! < >'.each do |c|
|
386
450
|
it "converts id to hex if it contains \"#{c}\"" do
|
387
451
|
idhex = hex_string("xxx#{c}yyy")
|
388
|
-
parse("h#{lvl}. xxx#{c}yyy").should == "<h#{lvl} id=\"
|
452
|
+
parse("h#{lvl}. xxx#{c}yyy").should == "<h#{lvl} id=\"h-#{idhex}\">xxx#{h(c)}yyy</h#{lvl}>"
|
389
453
|
end
|
390
454
|
end
|
391
455
|
|
392
456
|
it "skips excess spaces" do
|
393
|
-
parse("h#{lvl}. \t xxx \t ").should == "<h#{lvl} id=\"xxx\">xxx</h#{lvl}>"
|
457
|
+
parse("h#{lvl}. \t xxx \t ").should == "<h#{lvl} id=\"h-xxx\">xxx</h#{lvl}>"
|
394
458
|
end
|
395
459
|
|
396
460
|
it "thinks that \\r is EOL" do
|
397
|
-
parse("h#{lvl}. xxx\ryyy").should == "<h#{lvl} id=\"xxx\">xxx</h#{lvl}><br />yyy"
|
398
|
-
parse("h#{lvl}. xxx\r").should == "<h#{lvl} id=\"xxx\">xxx</h#{lvl}>"
|
461
|
+
parse("h#{lvl}. xxx\ryyy").should == "<h#{lvl} id=\"h-xxx\">xxx</h#{lvl}><br />yyy"
|
462
|
+
parse("h#{lvl}. xxx\r").should == "<h#{lvl} id=\"h-xxx\">xxx</h#{lvl}>"
|
399
463
|
|
400
464
|
parse("h#{lvl}. xxx\r yyy").sub(' yyy','yyy').should ==
|
401
|
-
"<h#{lvl} id=\"xxx\">xxx</h#{lvl}><br />yyy"
|
465
|
+
"<h#{lvl} id=\"h-xxx\">xxx</h#{lvl}><br />yyy"
|
402
466
|
end
|
403
467
|
end
|
404
468
|
end
|
@@ -740,14 +804,15 @@ describe 'BreakoutParser' do
|
|
740
804
|
a = {}
|
741
805
|
a["wiki:Name"] = '<a class="wiki_link" title="Name" href="/wiki/show/test_space/Name">Name</a>'
|
742
806
|
a["Name"] = '<a class="wiki_link" title="Name" href="/wiki/show/test_space/Name">Name</a>'
|
743
|
-
a["Name#Ref"] = '<a class="wiki_link" title="Name#Ref" href="/wiki/show/test_space/Name#Ref">Name#Ref</a>'
|
744
|
-
a["#Ref"]
|
745
|
-
a["
|
746
|
-
a["
|
747
|
-
a["#
|
748
|
-
a["#
|
749
|
-
a["#with
|
750
|
-
a["#with
|
807
|
+
a["Name#Ref"] = '<a class="wiki_link" title="Name#Ref" href="/wiki/show/test_space/Name#h-Ref">Name#Ref</a>'
|
808
|
+
a["Name#h-Ref"] = '<a class="wiki_link" title="Name#h-Ref" href="/wiki/show/test_space/Name#h-h-Ref">Name#h-Ref</a>'
|
809
|
+
a["#Ref"] = '<a href="#h-Ref" title="#Ref" class="wiki_link">#Ref</a>'
|
810
|
+
a["#привет"] = %Q|<a href="#h-#{hex_string("привет")}" title="#привет" class="wiki_link">#привет</a>|
|
811
|
+
a["#with spc"] = %Q|<a href="#h-with__spc" title="#with spc" class="wiki_link">#with spc</a>|
|
812
|
+
a["#with__usc"] = %Q|<a href="#h-with__usc" title="#with__usc" class="wiki_link">#with__usc</a>|
|
813
|
+
a["#with--dsh"] = %Q|<a href="#h-with--dsh" title="#with--dsh" class="wiki_link">#with--dsh</a>|
|
814
|
+
a["#with!xclm"] = %Q|<a href="#h-#{hex_string("with!xclm")}" title="#with!xclm" class="wiki_link">#with!xclm</a>|
|
815
|
+
a["#with&"] = %Q|<a href="#h-#{hex_string("with&")}" title="#with&" class="wiki_link">#with&amp</a>|
|
751
816
|
|
752
817
|
a["ticket:234"] = '<a href="/spaces/test_space/tickets/234">#234</a>'
|
753
818
|
a["revision:1f4bdab77be696efd"] =
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: breakout_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.13
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrey "Zed" Zaikin
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-04-
|
12
|
+
date: 2010-04-27 00:00:00 +06:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|