breakout_parser 0.0.12 → 0.0.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +7 -0
- data/ext/breakout_parser/lex.yy.c +716 -693
- data/ext/breakout_parser/parser.l +7 -2
- data/ext/breakout_parser/parser.tab.c +229 -171
- data/ext/breakout_parser/parser.tab.h +27 -27
- data/ext/breakout_parser/parser.y +71 -12
- data/spec/parser_spec.rb +86 -21
- metadata +2 -2
@@ -44,32 +44,32 @@
|
|
44
44
|
ITALIC_START = 260,
|
45
45
|
BOLD_ITALIC_START = 261,
|
46
46
|
ITALIC_BOLD_START = 262,
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
47
|
+
ULI = 263,
|
48
|
+
OLI = 264,
|
49
|
+
T_WORD = 265,
|
50
|
+
TICKET_LINK = 266,
|
51
|
+
LINK = 267,
|
52
|
+
SVN_REVISION_LINK = 268,
|
53
|
+
GIT_REVISION_LINK = 269,
|
54
|
+
WIKI_LINK = 270,
|
55
|
+
ANCHOR_LINK = 271,
|
56
|
+
SVN_N_REVISION_LINK = 272,
|
57
|
+
GIT_N_REVISION_LINK = 273,
|
58
|
+
URL_WITH_PROTO_LINK = 274,
|
59
|
+
URL_WITHOUT_PROTO_LINK = 275,
|
60
|
+
FILE_LINK = 276,
|
61
|
+
IMAGE_LINK = 277,
|
62
|
+
URL = 278,
|
63
|
+
EMAIL = 279,
|
64
|
+
UL = 280,
|
65
|
+
H1 = 281,
|
66
|
+
H2 = 282,
|
67
|
+
H3 = 283,
|
68
|
+
H4 = 284,
|
69
|
+
H5 = 285,
|
70
|
+
INLINE_CODE = 286,
|
71
|
+
SPACE = 287,
|
72
|
+
BR = 288,
|
73
73
|
PRE_CODE_START = 289,
|
74
74
|
PRE_CODE_END = 290,
|
75
75
|
PRE_START = 291,
|
@@ -92,7 +92,7 @@ typedef union YYSTYPE
|
|
92
92
|
{
|
93
93
|
|
94
94
|
/* Line 1676 of yacc.c */
|
95
|
-
#line
|
95
|
+
#line 68 "parser.y"
|
96
96
|
|
97
97
|
double dvalue;
|
98
98
|
int ivalue;
|
@@ -25,6 +25,8 @@ size_t site_url_len = 0;
|
|
25
25
|
|
26
26
|
extern VALUE git_url;
|
27
27
|
|
28
|
+
int list_level = 1;
|
29
|
+
|
28
30
|
#define CHECK_BUF_SIZE(len) \
|
29
31
|
if( (bufptr - buf + len + 1) >= bufsize ){ \
|
30
32
|
/*printf("[.] REALLOC oldsz=%d, newsz=%d\n",bufsize, (bufsize+((len > 0x1000) ? (len+0x1000) : 0x1000)));*/ \
|
@@ -72,6 +74,7 @@ void yyerror(const char *msg)
|
|
72
74
|
|
73
75
|
%token <ivalue> T_CHAR BOLD_START ITALIC_START
|
74
76
|
%token <ivalue> BOLD_ITALIC_START ITALIC_BOLD_START
|
77
|
+
%token <ivalue> ULI OLI
|
75
78
|
%token <svalue> T_WORD TICKET_LINK LINK SVN_REVISION_LINK GIT_REVISION_LINK WIKI_LINK ANCHOR_LINK
|
76
79
|
%token <svalue> SVN_N_REVISION_LINK GIT_N_REVISION_LINK
|
77
80
|
%token <svalue> URL_WITH_PROTO_LINK URL_WITHOUT_PROTO_LINK
|
@@ -80,7 +83,7 @@ void yyerror(const char *msg)
|
|
80
83
|
%token <svalue> UL
|
81
84
|
%token <svalue> H1 H2 H3 H4 H5
|
82
85
|
%token <svalue> INLINE_CODE
|
83
|
-
%token SPACE BR /*BRBR*/
|
86
|
+
%token SPACE BR /*BRBR*/
|
84
87
|
%token PRE_CODE_START PRE_CODE_END PRE_START PRE_END CODE_START CODE_END
|
85
88
|
%token NOTEXTILE_START NOTEXTILE_END
|
86
89
|
%token BOLD_END ITALIC_END
|
@@ -101,8 +104,20 @@ textitem: br
|
|
101
104
|
| h3 {concat("</h3>",5)}
|
102
105
|
| h4 {concat("</h4>",5)}
|
103
106
|
| h5 {concat("</h5>",5)}
|
104
|
-
| {
|
105
|
-
|
107
|
+
| {
|
108
|
+
list_level=1;
|
109
|
+
concat("<ul>",4)
|
110
|
+
} ulist {
|
111
|
+
concat("</ul>",5);
|
112
|
+
for(; list_level>1 && list_level<4; list_level--) concat("</li></ul>",10);
|
113
|
+
} textitem
|
114
|
+
| {
|
115
|
+
list_level=1;
|
116
|
+
concat("<ol>",4)
|
117
|
+
} olist {
|
118
|
+
concat("</ol>",5);
|
119
|
+
for(; list_level>1 && list_level<4; list_level--) concat("</li></ol>",10);
|
120
|
+
} textitem
|
106
121
|
| code
|
107
122
|
|
108
123
|
ulist: ulitem {concat("</li>",5)}
|
@@ -157,14 +172,14 @@ char : T_CHAR {concat_escaped_char($1)}
|
|
157
172
|
|
158
173
|
//raw_char : T_CHAR {concat_raw_char($1)}
|
159
174
|
|
160
|
-
h1 : H1 {concat("<h1 id=\"",
|
161
|
-
h2 : H2 {concat("<h2 id=\"",
|
162
|
-
h3 : H3 {concat("<h3 id=\"",
|
163
|
-
h4 : H4 {concat("<h4 id=\"",
|
164
|
-
h5 : H5 {concat("<h5 id=\"",
|
175
|
+
h1 : H1 {concat("<h1 id=\"h-",10); process_header($1)}
|
176
|
+
h2 : H2 {concat("<h2 id=\"h-",10); process_header($1)}
|
177
|
+
h3 : H3 {concat("<h3 id=\"h-",10); process_header($1)}
|
178
|
+
h4 : H4 {concat("<h4 id=\"h-",10); process_header($1)}
|
179
|
+
h5 : H5 {concat("<h5 id=\"h-",10); process_header($1)}
|
165
180
|
//ul : UL {concat("<ul>",4)}
|
166
|
-
oli : OLI {
|
167
|
-
uli : ULI {
|
181
|
+
oli : OLI {process_oli($1)}
|
182
|
+
uli : ULI {process_uli($1)}
|
168
183
|
br : BR {concat("<br />",6)}
|
169
184
|
// | BRBR {concat("<br /><br />",12)}
|
170
185
|
|
@@ -177,6 +192,36 @@ code : PRE_CODE_START {concat("<pre><code>",11)} chars PRE_CODE_END {concat("</c
|
|
177
192
|
|
178
193
|
%%
|
179
194
|
|
195
|
+
process_uli(int level){
|
196
|
+
if( level == list_level ){
|
197
|
+
concat("<li>",4);
|
198
|
+
} else if( level < list_level ){
|
199
|
+
list_level = level;
|
200
|
+
//unconcat("</li>");
|
201
|
+
concat("</ul></li><li>",14);
|
202
|
+
} else {
|
203
|
+
// if(level > list_level)
|
204
|
+
list_level = level;
|
205
|
+
unconcat("</li>");
|
206
|
+
concat("<ul><li>",8);
|
207
|
+
}
|
208
|
+
}
|
209
|
+
|
210
|
+
process_oli(int level){
|
211
|
+
if( level == list_level ){
|
212
|
+
concat("<li>",4);
|
213
|
+
} else if( level < list_level ){
|
214
|
+
list_level = level;
|
215
|
+
//unconcat("</li>");
|
216
|
+
concat("</ol></li><li>",14);
|
217
|
+
} else {
|
218
|
+
// if(level > list_level)
|
219
|
+
list_level = level;
|
220
|
+
unconcat("</li>");
|
221
|
+
concat("<ol><li>",8);
|
222
|
+
}
|
223
|
+
}
|
224
|
+
|
180
225
|
concat_hex_char(char c){
|
181
226
|
unsigned char d;
|
182
227
|
d = ((unsigned char)c)>>4;
|
@@ -274,7 +319,7 @@ process_anchor_link(const char*target){
|
|
274
319
|
|
275
320
|
if((p = strchr(target,'|')) && (p<pend) && (p>target)) pend = p-1;
|
276
321
|
|
277
|
-
concat("<a href=\"#",
|
322
|
+
concat("<a href=\"#h-",12);
|
278
323
|
if( need_hex_convert(target,pend) ){
|
279
324
|
for(p = target; *p && p<=pend; p++) concat_hex_char( *p );
|
280
325
|
} else {
|
@@ -362,7 +407,16 @@ process_wiki_link(const char*target){
|
|
362
407
|
concat("/wiki/show/",11);
|
363
408
|
concat(space_name,space_name_len);
|
364
409
|
concat_raw_char('/');
|
365
|
-
for(c=target; *c && *c!=']' && *c!='|'; c++) concat_raw_char(*c);
|
410
|
+
for(c=target; *c && *c!=']' && *c!='|' && *c!='#'; c++) concat_raw_char(*c);
|
411
|
+
if( *c == '#' ){
|
412
|
+
concat_raw_char('#');
|
413
|
+
//if(memcmp(c, "#h-", 3) != 0){
|
414
|
+
// anchor w/o "h-" prefix, we need to add it
|
415
|
+
concat_raw_char('h');
|
416
|
+
concat_raw_char('-');
|
417
|
+
//}
|
418
|
+
for(c++; *c && *c!=']' && *c!='|'; c++) concat_raw_char(*c);
|
419
|
+
}
|
366
420
|
process_link_tail(target,NULL,NULL);
|
367
421
|
}
|
368
422
|
|
@@ -457,6 +511,11 @@ process_email(const char*url){
|
|
457
511
|
process_link_tail(url,NULL,NULL);
|
458
512
|
}
|
459
513
|
|
514
|
+
unconcat(const char*what){
|
515
|
+
int l = strlen(what);
|
516
|
+
if( bufptr-buf > l && strncmp(bufptr-l,what,l) == 0 ) bufptr -= l;
|
517
|
+
}
|
518
|
+
|
460
519
|
revert_bold(){
|
461
520
|
char *p;
|
462
521
|
for( p=bufptr-1; p >= (buf+7) ; p--){
|
data/spec/parser_spec.rb
CHANGED
@@ -11,6 +11,19 @@ describe 'BreakoutParser' do
|
|
11
11
|
end
|
12
12
|
def hex_string s; self.class.hex_string(s); end
|
13
13
|
|
14
|
+
def unformat s
|
15
|
+
s.strip.split("\n").map(&:strip).join
|
16
|
+
end
|
17
|
+
|
18
|
+
def get_data fname
|
19
|
+
r = File.read(File.dirname(__FILE__) + '/data/' + fname)
|
20
|
+
if self.class.description[' numbered list multilevel']
|
21
|
+
r.gsub!('*','#')
|
22
|
+
r.gsub!('ul>','ol>')
|
23
|
+
end
|
24
|
+
r
|
25
|
+
end
|
26
|
+
|
14
27
|
it 'accepts from 2 to 4 arguments' do
|
15
28
|
[0,1,5,6,7,8,9,10].each do |argc|
|
16
29
|
lambda{
|
@@ -319,6 +332,31 @@ describe 'BreakoutParser' do
|
|
319
332
|
parse("hello\n\n * a\n * b\n * c\nworld").should ==
|
320
333
|
"hello<br /><br /><ul><li>a</li><li>b</li><li>c</li></ul>world"
|
321
334
|
end
|
335
|
+
|
336
|
+
describe "multilevel" do
|
337
|
+
it "at start" do
|
338
|
+
# NOTE: not sure that list1.html is correct enough, but it should render fine
|
339
|
+
parse(get_data('list1')).should == unformat(get_data('list1.html'))
|
340
|
+
end
|
341
|
+
|
342
|
+
it "in middle" do
|
343
|
+
parse(get_data('list2')).should == unformat(get_data('list2.html'))
|
344
|
+
end
|
345
|
+
|
346
|
+
it "three levels" do
|
347
|
+
parse(get_data('list3')).should == unformat(get_data('list3.html'))
|
348
|
+
end
|
349
|
+
|
350
|
+
it "mess - should have matching count of opening and closing tags" do
|
351
|
+
r = parse(get_data('list4'))
|
352
|
+
r.scan('<ul>').count.should > 0
|
353
|
+
r.scan('<ul>').count.should <= r.scan('</ul>').count
|
354
|
+
end
|
355
|
+
|
356
|
+
it "at end" do
|
357
|
+
parse(get_data('list5')).should == unformat(get_data('list5.html'))
|
358
|
+
end
|
359
|
+
end
|
322
360
|
end
|
323
361
|
|
324
362
|
###############################################################################
|
@@ -347,58 +385,84 @@ describe 'BreakoutParser' do
|
|
347
385
|
parse("hello\n\n # a\n # b\n # c\nworld").should ==
|
348
386
|
"hello<br /><br /><ol><li>a</li><li>b</li><li>c</li></ol>world"
|
349
387
|
end
|
388
|
+
|
389
|
+
describe "multilevel" do
|
390
|
+
it "at start" do
|
391
|
+
# NOTE: not sure that list1.html is correct enough, but it should render fine
|
392
|
+
parse(get_data('list1')).should == unformat(get_data('list1.html'))
|
393
|
+
end
|
394
|
+
|
395
|
+
it "in middle" do
|
396
|
+
parse(get_data('list2')).should == unformat(get_data('list2.html'))
|
397
|
+
end
|
398
|
+
|
399
|
+
it "three levels" do
|
400
|
+
parse(get_data('list3')).should == unformat(get_data('list3.html'))
|
401
|
+
end
|
402
|
+
|
403
|
+
it "mess - should have matching count of opening and closing tags" do
|
404
|
+
r = parse(unformat(get_data('list4')))
|
405
|
+
r.scan('<ol>').count.should > 0
|
406
|
+
r.scan('<ol>').count.should <= r.scan('</ol>').count
|
407
|
+
end
|
408
|
+
|
409
|
+
it "at end" do
|
410
|
+
parse(get_data('list5')).should == unformat(get_data('list5.html'))
|
411
|
+
end
|
412
|
+
end
|
350
413
|
end
|
351
414
|
|
352
415
|
###############################################################################
|
416
|
+
# headers
|
353
417
|
|
354
418
|
1.upto(5) do |lvl|
|
355
419
|
describe "H#{lvl}" do
|
356
420
|
it "at the beginning" do
|
357
|
-
parse("h#{lvl}. xxx").should == "<h#{lvl} id=\"xxx\">xxx</h#{lvl}>"
|
421
|
+
parse("h#{lvl}. xxx").should == "<h#{lvl} id=\"h-xxx\">xxx</h#{lvl}>"
|
358
422
|
end
|
359
423
|
it "after 1 line of text" do
|
360
|
-
parse("abcd\nh#{lvl}. xxx").should == "abcd<br /><h#{lvl} id=\"xxx\">xxx</h#{lvl}>"
|
424
|
+
parse("abcd\nh#{lvl}. xxx").should == "abcd<br /><h#{lvl} id=\"h-xxx\">xxx</h#{lvl}>"
|
361
425
|
end
|
362
426
|
it "after 2 lines of text" do
|
363
|
-
parse("abcd\ndefgh\nh#{lvl}. xxx").should == "abcd<br />defgh<br /><h#{lvl} id=\"xxx\">xxx</h#{lvl}>"
|
427
|
+
parse("abcd\ndefgh\nh#{lvl}. xxx").should == "abcd<br />defgh<br /><h#{lvl} id=\"h-xxx\">xxx</h#{lvl}>"
|
364
428
|
end
|
365
429
|
it "in middle of other words" do
|
366
430
|
parse("abcd defgh h#{lvl}. xxx yyy").should == "abcd defgh h#{lvl}. xxx yyy"
|
367
431
|
end
|
368
432
|
it "in middle of other lines" do
|
369
|
-
parse("abcd defgh\nh#{lvl}. xxx\nyyy").should == "abcd defgh<br /><h#{lvl} id=\"xxx\">xxx</h#{lvl}><br />yyy"
|
433
|
+
parse("abcd defgh\nh#{lvl}. xxx\nyyy").should == "abcd defgh<br /><h#{lvl} id=\"h-xxx\">xxx</h#{lvl}><br />yyy"
|
370
434
|
end
|
371
435
|
|
372
436
|
it "converts spaces to underscores in id" do
|
373
|
-
parse("h#{lvl}. xxx yyy z").should == "<h#{lvl} id=\"xxx___yyy_z\">xxx yyy z</h#{lvl}>"
|
437
|
+
parse("h#{lvl}. xxx yyy z").should == "<h#{lvl} id=\"h-xxx___yyy_z\">xxx yyy z</h#{lvl}>"
|
374
438
|
end
|
375
439
|
it "keeps underscores in id" do
|
376
|
-
parse("h#{lvl}. xxx___yyy_z").should == "<h#{lvl} id=\"xxx___yyy_z\">xxx___yyy_z</h#{lvl}>"
|
440
|
+
parse("h#{lvl}. xxx___yyy_z").should == "<h#{lvl} id=\"h-xxx___yyy_z\">xxx___yyy_z</h#{lvl}>"
|
377
441
|
end
|
378
442
|
it "keeps dashes in id" do
|
379
|
-
parse("h#{lvl}. xxx---yyy-z").should == "<h#{lvl} id=\"xxx---yyy-z\">xxx---yyy-z</h#{lvl}>"
|
443
|
+
parse("h#{lvl}. xxx---yyy-z").should == "<h#{lvl} id=\"h-xxx---yyy-z\">xxx---yyy-z</h#{lvl}>"
|
380
444
|
end
|
381
445
|
it "keeps dots in id" do
|
382
|
-
parse("h#{lvl}. xxx...yyy.z").should == "<h#{lvl} id=\"xxx...yyy.z\">xxx...yyy.z</h#{lvl}>"
|
446
|
+
parse("h#{lvl}. xxx...yyy.z").should == "<h#{lvl} id=\"h-xxx...yyy.z\">xxx...yyy.z</h#{lvl}>"
|
383
447
|
end
|
384
448
|
|
385
449
|
%w'Ъ ъ : ; , привет" \' ! < >'.each do |c|
|
386
450
|
it "converts id to hex if it contains \"#{c}\"" do
|
387
451
|
idhex = hex_string("xxx#{c}yyy")
|
388
|
-
parse("h#{lvl}. xxx#{c}yyy").should == "<h#{lvl} id=\"
|
452
|
+
parse("h#{lvl}. xxx#{c}yyy").should == "<h#{lvl} id=\"h-#{idhex}\">xxx#{h(c)}yyy</h#{lvl}>"
|
389
453
|
end
|
390
454
|
end
|
391
455
|
|
392
456
|
it "skips excess spaces" do
|
393
|
-
parse("h#{lvl}. \t xxx \t ").should == "<h#{lvl} id=\"xxx\">xxx</h#{lvl}>"
|
457
|
+
parse("h#{lvl}. \t xxx \t ").should == "<h#{lvl} id=\"h-xxx\">xxx</h#{lvl}>"
|
394
458
|
end
|
395
459
|
|
396
460
|
it "thinks that \\r is EOL" do
|
397
|
-
parse("h#{lvl}. xxx\ryyy").should == "<h#{lvl} id=\"xxx\">xxx</h#{lvl}><br />yyy"
|
398
|
-
parse("h#{lvl}. xxx\r").should == "<h#{lvl} id=\"xxx\">xxx</h#{lvl}>"
|
461
|
+
parse("h#{lvl}. xxx\ryyy").should == "<h#{lvl} id=\"h-xxx\">xxx</h#{lvl}><br />yyy"
|
462
|
+
parse("h#{lvl}. xxx\r").should == "<h#{lvl} id=\"h-xxx\">xxx</h#{lvl}>"
|
399
463
|
|
400
464
|
parse("h#{lvl}. xxx\r yyy").sub(' yyy','yyy').should ==
|
401
|
-
"<h#{lvl} id=\"xxx\">xxx</h#{lvl}><br />yyy"
|
465
|
+
"<h#{lvl} id=\"h-xxx\">xxx</h#{lvl}><br />yyy"
|
402
466
|
end
|
403
467
|
end
|
404
468
|
end
|
@@ -740,14 +804,15 @@ describe 'BreakoutParser' do
|
|
740
804
|
a = {}
|
741
805
|
a["wiki:Name"] = '<a class="wiki_link" title="Name" href="/wiki/show/test_space/Name">Name</a>'
|
742
806
|
a["Name"] = '<a class="wiki_link" title="Name" href="/wiki/show/test_space/Name">Name</a>'
|
743
|
-
a["Name#Ref"] = '<a class="wiki_link" title="Name#Ref" href="/wiki/show/test_space/Name#Ref">Name#Ref</a>'
|
744
|
-
a["#Ref"]
|
745
|
-
a["
|
746
|
-
a["
|
747
|
-
a["#
|
748
|
-
a["#
|
749
|
-
a["#with
|
750
|
-
a["#with
|
807
|
+
a["Name#Ref"] = '<a class="wiki_link" title="Name#Ref" href="/wiki/show/test_space/Name#h-Ref">Name#Ref</a>'
|
808
|
+
a["Name#h-Ref"] = '<a class="wiki_link" title="Name#h-Ref" href="/wiki/show/test_space/Name#h-h-Ref">Name#h-Ref</a>'
|
809
|
+
a["#Ref"] = '<a href="#h-Ref" title="#Ref" class="wiki_link">#Ref</a>'
|
810
|
+
a["#привет"] = %Q|<a href="#h-#{hex_string("привет")}" title="#привет" class="wiki_link">#привет</a>|
|
811
|
+
a["#with spc"] = %Q|<a href="#h-with__spc" title="#with spc" class="wiki_link">#with spc</a>|
|
812
|
+
a["#with__usc"] = %Q|<a href="#h-with__usc" title="#with__usc" class="wiki_link">#with__usc</a>|
|
813
|
+
a["#with--dsh"] = %Q|<a href="#h-with--dsh" title="#with--dsh" class="wiki_link">#with--dsh</a>|
|
814
|
+
a["#with!xclm"] = %Q|<a href="#h-#{hex_string("with!xclm")}" title="#with!xclm" class="wiki_link">#with!xclm</a>|
|
815
|
+
a["#with&"] = %Q|<a href="#h-#{hex_string("with&")}" title="#with&" class="wiki_link">#with&amp</a>|
|
751
816
|
|
752
817
|
a["ticket:234"] = '<a href="/spaces/test_space/tickets/234">#234</a>'
|
753
818
|
a["revision:1f4bdab77be696efd"] =
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: breakout_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.13
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrey "Zed" Zaikin
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-04-
|
12
|
+
date: 2010-04-27 00:00:00 +06:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|