breakout_parser 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/ext/breakout_parser/lex.yy.c +401 -361
- data/ext/breakout_parser/parser.l +3 -0
- data/ext/breakout_parser/parser.tab.c +222 -175
- data/ext/breakout_parser/parser.tab.h +22 -20
- data/ext/breakout_parser/parser.y +29 -2
- data/spec/parser_spec.rb +61 -27
- metadata +2 -2
@@ -51,25 +51,27 @@
|
|
51
51
|
ANCHOR_LINK = 267,
|
52
52
|
URL_WITH_PROTO_LINK = 268,
|
53
53
|
URL_WITHOUT_PROTO_LINK = 269,
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
54
|
+
FILE_LINK = 270,
|
55
|
+
IMAGE_LINK = 271,
|
56
|
+
URL = 272,
|
57
|
+
UL = 273,
|
58
|
+
H1 = 274,
|
59
|
+
H2 = 275,
|
60
|
+
H3 = 276,
|
61
|
+
H4 = 277,
|
62
|
+
H5 = 278,
|
63
|
+
SPACE = 279,
|
64
|
+
BR = 280,
|
65
|
+
OLI = 281,
|
66
|
+
ULI = 282,
|
67
|
+
PRE_CODE_START = 283,
|
68
|
+
PRE_CODE_END = 284,
|
69
|
+
PRE_START = 285,
|
70
|
+
PRE_END = 286,
|
71
|
+
CODE_START = 287,
|
72
|
+
CODE_END = 288,
|
73
|
+
BOLD_END = 289,
|
74
|
+
ITALIC_END = 290
|
73
75
|
};
|
74
76
|
#endif
|
75
77
|
|
@@ -89,7 +91,7 @@ typedef union YYSTYPE
|
|
89
91
|
|
90
92
|
|
91
93
|
/* Line 1676 of yacc.c */
|
92
|
-
#line
|
94
|
+
#line 95 "parser.tab.h"
|
93
95
|
} YYSTYPE;
|
94
96
|
# define YYSTYPE_IS_TRIVIAL 1
|
95
97
|
# define yystype YYSTYPE /* obsolescent; will be withdrawn */
|
@@ -68,6 +68,7 @@ void yyerror(const char *msg)
|
|
68
68
|
%token <ivalue> T_CHAR BOLD_START ITALIC_START
|
69
69
|
%token <svalue> T_WORD TICKET_LINK LINK SVN_REVISION_LINK GIT_REVISION_LINK WIKI_LINK ANCHOR_LINK
|
70
70
|
%token <svalue> URL_WITH_PROTO_LINK URL_WITHOUT_PROTO_LINK
|
71
|
+
%token <svalue> FILE_LINK IMAGE_LINK
|
71
72
|
%token <svalue> URL
|
72
73
|
%token <svalue> UL
|
73
74
|
%token <svalue> H1 H2 H3 H4 H5
|
@@ -125,6 +126,8 @@ link: TICKET_LINK {process_ticket_link($1)}
|
|
125
126
|
| URL_WITHOUT_PROTO_LINK {process_url_link($1,"http://")}
|
126
127
|
| WIKI_LINK {process_wiki_link($1)}
|
127
128
|
| ANCHOR_LINK {process_anchor_link($1)}
|
129
|
+
| FILE_LINK {process_file_link($1)}
|
130
|
+
| IMAGE_LINK {process_image_link($1)}
|
128
131
|
|
129
132
|
chars:
|
130
133
|
| char chars
|
@@ -252,7 +255,7 @@ process_anchor_link(const char*target){
|
|
252
255
|
|
253
256
|
process_url_link(const char*target,const char* proto){
|
254
257
|
const char *c;
|
255
|
-
concat("<a href=\"",
|
258
|
+
concat("<a rel=\"nofollow\" href=\"",24);
|
256
259
|
if(proto) concat2(proto);
|
257
260
|
for(c=target; *c && *c != ']' && *c != '|'; c++) concat_raw_char(*c);
|
258
261
|
process_link_tail(target,NULL,proto);
|
@@ -287,6 +290,30 @@ process_wiki_link(const char*target){
|
|
287
290
|
process_link_tail(target,NULL,NULL);
|
288
291
|
}
|
289
292
|
|
293
|
+
process_file_link(const char*target){
|
294
|
+
const char *c;
|
295
|
+
concat("<a href=\"/spaces/",17);
|
296
|
+
concat(space_name,space_name_len);
|
297
|
+
concat("/documents/download/",20);
|
298
|
+
for(c=target; *c && *c!=']' && *c!='|'; c++) concat_raw_char(*c);
|
299
|
+
process_link_tail(target,NULL,"file:");
|
300
|
+
}
|
301
|
+
|
302
|
+
process_image_link(const char*target){
|
303
|
+
const char *c, *p;
|
304
|
+
concat("<img src=\"/spaces/",18);
|
305
|
+
concat(space_name,space_name_len);
|
306
|
+
concat("/documents/download/",20);
|
307
|
+
for(c=target; *c && *c!=']' && *c!='|'; c++) concat_raw_char(*c);
|
308
|
+
concat("\" alt=\"",7);
|
309
|
+
if(p = strchr(target,'|')){
|
310
|
+
for(c=p+1; *c && *c!=']' && *c!='|'; c++) concat_raw_char(*c);
|
311
|
+
} else {
|
312
|
+
for(c=target; *c && *c!=']' && *c!='|'; c++) concat_raw_char(*c);
|
313
|
+
}
|
314
|
+
concat("\" />",4);
|
315
|
+
}
|
316
|
+
|
290
317
|
process_ticket_link(const char*ticket_id){
|
291
318
|
const char *c;
|
292
319
|
while(*ticket_id && (*ticket_id < '0' || *ticket_id > '9') ) ticket_id++;
|
@@ -356,7 +383,7 @@ concat_escaping_html(const char*what){
|
|
356
383
|
process_url(const char*url){
|
357
384
|
const char *p;
|
358
385
|
|
359
|
-
concat("<a href=\"",
|
386
|
+
concat("<a rel=\"nofollow\" href=\"",24);
|
360
387
|
for(p=url; *p; p++) concat_raw_char(*p);
|
361
388
|
process_link_tail(url,NULL,NULL);
|
362
389
|
}
|
data/spec/parser_spec.rb
CHANGED
@@ -168,23 +168,23 @@ describe 'BreakoutParser' do
|
|
168
168
|
{'ul' => '*', 'ol' => '#'}.each do |l,c|
|
169
169
|
it "raw text link inside #{l.upcase}> #1" do
|
170
170
|
s = "#{c} aaa http://www.ru"
|
171
|
-
parse(s).should == "<#{l}><li>aaa <a href=\"http://www.ru\">http://www.ru</a></li></#{l}>"
|
171
|
+
parse(s).should == "<#{l}><li>aaa <a rel=\"nofollow\" href=\"http://www.ru\">http://www.ru</a></li></#{l}>"
|
172
172
|
end
|
173
173
|
it "raw text link inside #{l.upcase}> #2" do
|
174
174
|
s = "#{c} aaa http://www.ru\n#{c} bbb"
|
175
|
-
parse(s).should == "<#{l}><li>aaa <a href=\"http://www.ru\">http://www.ru</a></li><li>bbb</li></#{l}>"
|
175
|
+
parse(s).should == "<#{l}><li>aaa <a rel=\"nofollow\" href=\"http://www.ru\">http://www.ru</a></li><li>bbb</li></#{l}>"
|
176
176
|
end
|
177
177
|
it "raw text link inside #{l.upcase}> #3" do
|
178
178
|
s = "#{c} http://www.ru"
|
179
|
-
parse(s).should == "<#{l}><li><a href=\"http://www.ru\">http://www.ru</a></li></#{l}>"
|
179
|
+
parse(s).should == "<#{l}><li><a rel=\"nofollow\" href=\"http://www.ru\">http://www.ru</a></li></#{l}>"
|
180
180
|
end
|
181
181
|
it "raw text link inside #{l.upcase}> #4" do
|
182
182
|
s = "#{c} aaa http://www.ru bbb"
|
183
|
-
parse(s).should == "<#{l}><li>aaa <a href=\"http://www.ru\">http://www.ru</a> bbb</li></#{l}>"
|
183
|
+
parse(s).should == "<#{l}><li>aaa <a rel=\"nofollow\" href=\"http://www.ru\">http://www.ru</a> bbb</li></#{l}>"
|
184
184
|
end
|
185
185
|
it "two links inside #{l.upcase}>" do
|
186
186
|
s = "#{c} aaa http://www.ru http://ya.ru bbb"
|
187
|
-
parse(s).should == "<#{l}><li>aaa <a href=\"http://www.ru\">http://www.ru</a> <a href=\"http://ya.ru\">http://ya.ru</a> bbb</li></#{l}>"
|
187
|
+
parse(s).should == "<#{l}><li>aaa <a rel=\"nofollow\" href=\"http://www.ru\">http://www.ru</a> <a rel=\"nofollow\" href=\"http://ya.ru\">http://ya.ru</a> bbb</li></#{l}>"
|
188
188
|
end
|
189
189
|
end
|
190
190
|
end
|
@@ -303,34 +303,34 @@ describe 'BreakoutParser' do
|
|
303
303
|
|
304
304
|
describe "raw text links" do
|
305
305
|
it "at the beginning" do
|
306
|
-
parse("http://www.ru").should == "<a href=\"http://www.ru\">http://www.ru</a>"
|
306
|
+
parse("http://www.ru").should == "<a rel=\"nofollow\" href=\"http://www.ru\">http://www.ru</a>"
|
307
307
|
end
|
308
308
|
it "in middle of other words" do
|
309
309
|
parse("aaa bbb ccc http://www.ru ddd eee fff").should ==
|
310
|
-
"aaa bbb ccc <a href=\"http://www.ru\">http://www.ru</a> ddd eee fff"
|
310
|
+
"aaa bbb ccc <a rel=\"nofollow\" href=\"http://www.ru\">http://www.ru</a> ddd eee fff"
|
311
311
|
end
|
312
312
|
it "in new line" do
|
313
313
|
parse("aaa bbb ccc\nhttp://www.ru\nddd eee fff").should match(
|
314
|
-
%r"aaa bbb ccc ?<br /> ?<a href=\"http://www.ru\">http://www.ru</a> ?<br /> ?ddd eee fff"
|
314
|
+
%r"aaa bbb ccc ?<br /> ?<a rel=\"nofollow\" href=\"http://www.ru\">http://www.ru</a> ?<br /> ?ddd eee fff"
|
315
315
|
)
|
316
316
|
end
|
317
317
|
it "escapes '&' in link _text_" do
|
318
|
-
parse("http://www.ru/?a=1&b=2").should == "<a href=\"http://www.ru/?a=1&b=2\">http://www.ru/?a=1&b=2</a>"
|
318
|
+
parse("http://www.ru/?a=1&b=2").should == "<a rel=\"nofollow\" href=\"http://www.ru/?a=1&b=2\">http://www.ru/?a=1&b=2</a>"
|
319
319
|
end
|
320
320
|
|
321
321
|
it "parses https://" do
|
322
|
-
parse("https://www.ru").should == "<a href=\"https://www.ru\">https://www.ru</a>"
|
322
|
+
parse("https://www.ru").should == "<a rel=\"nofollow\" href=\"https://www.ru\">https://www.ru</a>"
|
323
323
|
end
|
324
324
|
|
325
325
|
%w', .'.each do |c|
|
326
326
|
it "stops parsing on \"#{c} \"" do
|
327
|
-
parse("http://www.ru#{c}").should == "<a href=\"http://www.ru\">http://www.ru</a>#{c}"
|
328
|
-
parse(" http://www.ru#{c} ").should == "<a href=\"http://www.ru\">http://www.ru</a>#{c}"
|
329
|
-
parse(" http://www.ru#{c} hello!").should == "<a href=\"http://www.ru\">http://www.ru</a>#{c} hello!"
|
330
|
-
parse("xxx http://www.ru#{c} hello!").should == "xxx <a href=\"http://www.ru\">http://www.ru</a>#{c} hello!"
|
331
|
-
parse(" http://www.ru/#{c} hello!").should == "<a href=\"http://www.ru/\">http://www.ru/</a>#{c} hello!"
|
327
|
+
parse("http://www.ru#{c}").should == "<a rel=\"nofollow\" href=\"http://www.ru\">http://www.ru</a>#{c}"
|
328
|
+
parse(" http://www.ru#{c} ").should == "<a rel=\"nofollow\" href=\"http://www.ru\">http://www.ru</a>#{c}"
|
329
|
+
parse(" http://www.ru#{c} hello!").should == "<a rel=\"nofollow\" href=\"http://www.ru\">http://www.ru</a>#{c} hello!"
|
330
|
+
parse("xxx http://www.ru#{c} hello!").should == "xxx <a rel=\"nofollow\" href=\"http://www.ru\">http://www.ru</a>#{c} hello!"
|
331
|
+
parse(" http://www.ru/#{c} hello!").should == "<a rel=\"nofollow\" href=\"http://www.ru/\">http://www.ru/</a>#{c} hello!"
|
332
332
|
parse(" http://aaa.com#{c} http://bbb.com").should ==
|
333
|
-
"<a href=\"http://aaa.com\">http://aaa.com</a>#{c} <a href=\"http://bbb.com\">http://bbb.com</a>"
|
333
|
+
"<a rel=\"nofollow\" href=\"http://aaa.com\">http://aaa.com</a>#{c} <a rel=\"nofollow\" href=\"http://bbb.com\">http://bbb.com</a>"
|
334
334
|
end
|
335
335
|
end
|
336
336
|
end
|
@@ -572,12 +572,17 @@ describe 'BreakoutParser' do
|
|
572
572
|
a["r:2345"] = '<a href="http://code.assembla.com/test_space/svn/changesets/2345">revision:2345</a>'
|
573
573
|
a["r:2345ef"] = '<a href="http://code.assembla.com/test_space/git/changesets/2345ef">revision:2345ef</a>'
|
574
574
|
|
575
|
-
a["url:http://www.ru"] = '<a href="http://www.ru">http://www.ru</a>'
|
576
|
-
a["url:https://www.ru"] = '<a href="https://www.ru">https://www.ru</a>'
|
577
|
-
a["url:www.ru"] = '<a href="http://www.ru">http://www.ru</a>'
|
578
|
-
a["url:www.ru/?a=1&b=2"] = '<a href="http://www.ru/?a=1&b=2">http://www.ru/?a=1&b=2</a>'
|
579
|
-
a["url:ftp://www.ru"] = '<a href="ftp://www.ru">ftp://www.ru</a>'
|
580
|
-
a["url:/spaces/x2"] = '<a href="/spaces/x2">/spaces/x2</a>'
|
575
|
+
a["url:http://www.ru"] = '<a rel="nofollow" href="http://www.ru">http://www.ru</a>'
|
576
|
+
a["url:https://www.ru"] = '<a rel="nofollow" href="https://www.ru">https://www.ru</a>'
|
577
|
+
a["url:www.ru"] = '<a rel="nofollow" href="http://www.ru">http://www.ru</a>'
|
578
|
+
a["url:www.ru/?a=1&b=2"] = '<a rel="nofollow" href="http://www.ru/?a=1&b=2">http://www.ru/?a=1&b=2</a>'
|
579
|
+
a["url:ftp://www.ru"] = '<a rel="nofollow" href="ftp://www.ru">ftp://www.ru</a>'
|
580
|
+
a["url:/spaces/x2"] = '<a rel="nofollow" href="/spaces/x2">/spaces/x2</a>'
|
581
|
+
|
582
|
+
a["file:ExistingFile.txt"] =
|
583
|
+
'<a href="/spaces/test_space/documents/download/ExistingFile.txt">file:ExistingFile.txt</a>'
|
584
|
+
a["file:cVJUz6ejWr35pEab_qKWB8"] =
|
585
|
+
'<a href="/spaces/test_space/documents/download/cVJUz6ejWr35pEab_qKWB8">file:cVJUz6ejWr35pEab_qKWB8</a>'
|
581
586
|
|
582
587
|
a.each do |k,v|
|
583
588
|
it "parses [[#{k}]]" do
|
@@ -591,7 +596,25 @@ describe 'BreakoutParser' do
|
|
591
596
|
end
|
592
597
|
end
|
593
598
|
|
594
|
-
|
599
|
+
a = {}
|
600
|
+
a["image:ExistingImage.png"] =
|
601
|
+
'<img src="/spaces/test_space/documents/download/ExistingImage.png" alt="ALT" />'
|
602
|
+
a["image:cVJUz6ejWr35pEab_qKWB8"] =
|
603
|
+
'<img src="/spaces/test_space/documents/download/cVJUz6ejWr35pEab_qKWB8" alt="ALT" />'
|
604
|
+
|
605
|
+
a.each do |k,v|
|
606
|
+
it "parses [[#{k}]]" do
|
607
|
+
parse("[[#{k}]]").should == v.sub('ALT',k.sub('image:',''))
|
608
|
+
end
|
609
|
+
it "parses [[#{k}|привет тест]]" do
|
610
|
+
parse("[[#{k}|привет тест]]").should == v.sub('ALT','привет тест')
|
611
|
+
end
|
612
|
+
it "parses [[#{k}|test & here]]" do
|
613
|
+
parse("[[#{k}|test & here]]").should == v.sub('ALT','test & here')
|
614
|
+
end
|
615
|
+
end
|
616
|
+
|
617
|
+
it "ignores unknown link types" do
|
595
618
|
s = "[[zzz:xxx]]"
|
596
619
|
parse(s).should == s
|
597
620
|
s = "[[abcd:1234]]"
|
@@ -600,10 +623,21 @@ describe 'BreakoutParser' do
|
|
600
623
|
parse(s).should == s
|
601
624
|
end
|
602
625
|
|
603
|
-
it "links
|
604
|
-
|
605
|
-
|
606
|
-
|
626
|
+
it "ignores file & image links with forbidden symbols" do
|
627
|
+
s = "[[file:aaa/bbb]]"
|
628
|
+
parse(s).should == s
|
629
|
+
s = "[[file:aaa\\bbb]]"
|
630
|
+
parse(s).should == s
|
631
|
+
s = "[[file:aaa bbb]]"
|
632
|
+
parse(s).should == s
|
633
|
+
|
634
|
+
s = "[[image:aaa/bbb]]"
|
635
|
+
parse(s).should == s
|
636
|
+
s = "[[image:aaa\\bbb]]"
|
637
|
+
parse(s).should == s
|
638
|
+
s = "[[image:aaa bbb]]"
|
639
|
+
parse(s).should == s
|
640
|
+
end
|
607
641
|
end
|
608
642
|
|
609
643
|
###############################################################################
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: breakout_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrey "Zed" Zaikin
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-
|
12
|
+
date: 2010-02-03 00:00:00 +05:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|