breakout_parser 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ext/breakout_parser/lex.yy.c +401 -361
- data/ext/breakout_parser/parser.l +3 -0
- data/ext/breakout_parser/parser.tab.c +222 -175
- data/ext/breakout_parser/parser.tab.h +22 -20
- data/ext/breakout_parser/parser.y +29 -2
- data/spec/parser_spec.rb +61 -27
- metadata +2 -2
@@ -51,25 +51,27 @@
|
|
51
51
|
ANCHOR_LINK = 267,
|
52
52
|
URL_WITH_PROTO_LINK = 268,
|
53
53
|
URL_WITHOUT_PROTO_LINK = 269,
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
54
|
+
FILE_LINK = 270,
|
55
|
+
IMAGE_LINK = 271,
|
56
|
+
URL = 272,
|
57
|
+
UL = 273,
|
58
|
+
H1 = 274,
|
59
|
+
H2 = 275,
|
60
|
+
H3 = 276,
|
61
|
+
H4 = 277,
|
62
|
+
H5 = 278,
|
63
|
+
SPACE = 279,
|
64
|
+
BR = 280,
|
65
|
+
OLI = 281,
|
66
|
+
ULI = 282,
|
67
|
+
PRE_CODE_START = 283,
|
68
|
+
PRE_CODE_END = 284,
|
69
|
+
PRE_START = 285,
|
70
|
+
PRE_END = 286,
|
71
|
+
CODE_START = 287,
|
72
|
+
CODE_END = 288,
|
73
|
+
BOLD_END = 289,
|
74
|
+
ITALIC_END = 290
|
73
75
|
};
|
74
76
|
#endif
|
75
77
|
|
@@ -89,7 +91,7 @@ typedef union YYSTYPE
|
|
89
91
|
|
90
92
|
|
91
93
|
/* Line 1676 of yacc.c */
|
92
|
-
#line
|
94
|
+
#line 95 "parser.tab.h"
|
93
95
|
} YYSTYPE;
|
94
96
|
# define YYSTYPE_IS_TRIVIAL 1
|
95
97
|
# define yystype YYSTYPE /* obsolescent; will be withdrawn */
|
@@ -68,6 +68,7 @@ void yyerror(const char *msg)
|
|
68
68
|
%token <ivalue> T_CHAR BOLD_START ITALIC_START
|
69
69
|
%token <svalue> T_WORD TICKET_LINK LINK SVN_REVISION_LINK GIT_REVISION_LINK WIKI_LINK ANCHOR_LINK
|
70
70
|
%token <svalue> URL_WITH_PROTO_LINK URL_WITHOUT_PROTO_LINK
|
71
|
+
%token <svalue> FILE_LINK IMAGE_LINK
|
71
72
|
%token <svalue> URL
|
72
73
|
%token <svalue> UL
|
73
74
|
%token <svalue> H1 H2 H3 H4 H5
|
@@ -125,6 +126,8 @@ link: TICKET_LINK {process_ticket_link($1)}
|
|
125
126
|
| URL_WITHOUT_PROTO_LINK {process_url_link($1,"http://")}
|
126
127
|
| WIKI_LINK {process_wiki_link($1)}
|
127
128
|
| ANCHOR_LINK {process_anchor_link($1)}
|
129
|
+
| FILE_LINK {process_file_link($1)}
|
130
|
+
| IMAGE_LINK {process_image_link($1)}
|
128
131
|
|
129
132
|
chars:
|
130
133
|
| char chars
|
@@ -252,7 +255,7 @@ process_anchor_link(const char*target){
|
|
252
255
|
|
253
256
|
process_url_link(const char*target,const char* proto){
|
254
257
|
const char *c;
|
255
|
-
concat("<a href=\"",
|
258
|
+
concat("<a rel=\"nofollow\" href=\"",24);
|
256
259
|
if(proto) concat2(proto);
|
257
260
|
for(c=target; *c && *c != ']' && *c != '|'; c++) concat_raw_char(*c);
|
258
261
|
process_link_tail(target,NULL,proto);
|
@@ -287,6 +290,30 @@ process_wiki_link(const char*target){
|
|
287
290
|
process_link_tail(target,NULL,NULL);
|
288
291
|
}
|
289
292
|
|
293
|
+
process_file_link(const char*target){
|
294
|
+
const char *c;
|
295
|
+
concat("<a href=\"/spaces/",17);
|
296
|
+
concat(space_name,space_name_len);
|
297
|
+
concat("/documents/download/",20);
|
298
|
+
for(c=target; *c && *c!=']' && *c!='|'; c++) concat_raw_char(*c);
|
299
|
+
process_link_tail(target,NULL,"file:");
|
300
|
+
}
|
301
|
+
|
302
|
+
process_image_link(const char*target){
|
303
|
+
const char *c, *p;
|
304
|
+
concat("<img src=\"/spaces/",18);
|
305
|
+
concat(space_name,space_name_len);
|
306
|
+
concat("/documents/download/",20);
|
307
|
+
for(c=target; *c && *c!=']' && *c!='|'; c++) concat_raw_char(*c);
|
308
|
+
concat("\" alt=\"",7);
|
309
|
+
if(p = strchr(target,'|')){
|
310
|
+
for(c=p+1; *c && *c!=']' && *c!='|'; c++) concat_raw_char(*c);
|
311
|
+
} else {
|
312
|
+
for(c=target; *c && *c!=']' && *c!='|'; c++) concat_raw_char(*c);
|
313
|
+
}
|
314
|
+
concat("\" />",4);
|
315
|
+
}
|
316
|
+
|
290
317
|
process_ticket_link(const char*ticket_id){
|
291
318
|
const char *c;
|
292
319
|
while(*ticket_id && (*ticket_id < '0' || *ticket_id > '9') ) ticket_id++;
|
@@ -356,7 +383,7 @@ concat_escaping_html(const char*what){
|
|
356
383
|
process_url(const char*url){
|
357
384
|
const char *p;
|
358
385
|
|
359
|
-
concat("<a href=\"",
|
386
|
+
concat("<a rel=\"nofollow\" href=\"",24);
|
360
387
|
for(p=url; *p; p++) concat_raw_char(*p);
|
361
388
|
process_link_tail(url,NULL,NULL);
|
362
389
|
}
|
data/spec/parser_spec.rb
CHANGED
@@ -168,23 +168,23 @@ describe 'BreakoutParser' do
|
|
168
168
|
{'ul' => '*', 'ol' => '#'}.each do |l,c|
|
169
169
|
it "raw text link inside #{l.upcase}> #1" do
|
170
170
|
s = "#{c} aaa http://www.ru"
|
171
|
-
parse(s).should == "<#{l}><li>aaa <a href=\"http://www.ru\">http://www.ru</a></li></#{l}>"
|
171
|
+
parse(s).should == "<#{l}><li>aaa <a rel=\"nofollow\" href=\"http://www.ru\">http://www.ru</a></li></#{l}>"
|
172
172
|
end
|
173
173
|
it "raw text link inside #{l.upcase}> #2" do
|
174
174
|
s = "#{c} aaa http://www.ru\n#{c} bbb"
|
175
|
-
parse(s).should == "<#{l}><li>aaa <a href=\"http://www.ru\">http://www.ru</a></li><li>bbb</li></#{l}>"
|
175
|
+
parse(s).should == "<#{l}><li>aaa <a rel=\"nofollow\" href=\"http://www.ru\">http://www.ru</a></li><li>bbb</li></#{l}>"
|
176
176
|
end
|
177
177
|
it "raw text link inside #{l.upcase}> #3" do
|
178
178
|
s = "#{c} http://www.ru"
|
179
|
-
parse(s).should == "<#{l}><li><a href=\"http://www.ru\">http://www.ru</a></li></#{l}>"
|
179
|
+
parse(s).should == "<#{l}><li><a rel=\"nofollow\" href=\"http://www.ru\">http://www.ru</a></li></#{l}>"
|
180
180
|
end
|
181
181
|
it "raw text link inside #{l.upcase}> #4" do
|
182
182
|
s = "#{c} aaa http://www.ru bbb"
|
183
|
-
parse(s).should == "<#{l}><li>aaa <a href=\"http://www.ru\">http://www.ru</a> bbb</li></#{l}>"
|
183
|
+
parse(s).should == "<#{l}><li>aaa <a rel=\"nofollow\" href=\"http://www.ru\">http://www.ru</a> bbb</li></#{l}>"
|
184
184
|
end
|
185
185
|
it "two links inside #{l.upcase}>" do
|
186
186
|
s = "#{c} aaa http://www.ru http://ya.ru bbb"
|
187
|
-
parse(s).should == "<#{l}><li>aaa <a href=\"http://www.ru\">http://www.ru</a> <a href=\"http://ya.ru\">http://ya.ru</a> bbb</li></#{l}>"
|
187
|
+
parse(s).should == "<#{l}><li>aaa <a rel=\"nofollow\" href=\"http://www.ru\">http://www.ru</a> <a rel=\"nofollow\" href=\"http://ya.ru\">http://ya.ru</a> bbb</li></#{l}>"
|
188
188
|
end
|
189
189
|
end
|
190
190
|
end
|
@@ -303,34 +303,34 @@ describe 'BreakoutParser' do
|
|
303
303
|
|
304
304
|
describe "raw text links" do
|
305
305
|
it "at the beginning" do
|
306
|
-
parse("http://www.ru").should == "<a href=\"http://www.ru\">http://www.ru</a>"
|
306
|
+
parse("http://www.ru").should == "<a rel=\"nofollow\" href=\"http://www.ru\">http://www.ru</a>"
|
307
307
|
end
|
308
308
|
it "in middle of other words" do
|
309
309
|
parse("aaa bbb ccc http://www.ru ddd eee fff").should ==
|
310
|
-
"aaa bbb ccc <a href=\"http://www.ru\">http://www.ru</a> ddd eee fff"
|
310
|
+
"aaa bbb ccc <a rel=\"nofollow\" href=\"http://www.ru\">http://www.ru</a> ddd eee fff"
|
311
311
|
end
|
312
312
|
it "in new line" do
|
313
313
|
parse("aaa bbb ccc\nhttp://www.ru\nddd eee fff").should match(
|
314
|
-
%r"aaa bbb ccc ?<br /> ?<a href=\"http://www.ru\">http://www.ru</a> ?<br /> ?ddd eee fff"
|
314
|
+
%r"aaa bbb ccc ?<br /> ?<a rel=\"nofollow\" href=\"http://www.ru\">http://www.ru</a> ?<br /> ?ddd eee fff"
|
315
315
|
)
|
316
316
|
end
|
317
317
|
it "escapes '&' in link _text_" do
|
318
|
-
parse("http://www.ru/?a=1&b=2").should == "<a href=\"http://www.ru/?a=1&b=2\">http://www.ru/?a=1&b=2</a>"
|
318
|
+
parse("http://www.ru/?a=1&b=2").should == "<a rel=\"nofollow\" href=\"http://www.ru/?a=1&b=2\">http://www.ru/?a=1&b=2</a>"
|
319
319
|
end
|
320
320
|
|
321
321
|
it "parses https://" do
|
322
|
-
parse("https://www.ru").should == "<a href=\"https://www.ru\">https://www.ru</a>"
|
322
|
+
parse("https://www.ru").should == "<a rel=\"nofollow\" href=\"https://www.ru\">https://www.ru</a>"
|
323
323
|
end
|
324
324
|
|
325
325
|
%w', .'.each do |c|
|
326
326
|
it "stops parsing on \"#{c} \"" do
|
327
|
-
parse("http://www.ru#{c}").should == "<a href=\"http://www.ru\">http://www.ru</a>#{c}"
|
328
|
-
parse(" http://www.ru#{c} ").should == "<a href=\"http://www.ru\">http://www.ru</a>#{c}"
|
329
|
-
parse(" http://www.ru#{c} hello!").should == "<a href=\"http://www.ru\">http://www.ru</a>#{c} hello!"
|
330
|
-
parse("xxx http://www.ru#{c} hello!").should == "xxx <a href=\"http://www.ru\">http://www.ru</a>#{c} hello!"
|
331
|
-
parse(" http://www.ru/#{c} hello!").should == "<a href=\"http://www.ru/\">http://www.ru/</a>#{c} hello!"
|
327
|
+
parse("http://www.ru#{c}").should == "<a rel=\"nofollow\" href=\"http://www.ru\">http://www.ru</a>#{c}"
|
328
|
+
parse(" http://www.ru#{c} ").should == "<a rel=\"nofollow\" href=\"http://www.ru\">http://www.ru</a>#{c}"
|
329
|
+
parse(" http://www.ru#{c} hello!").should == "<a rel=\"nofollow\" href=\"http://www.ru\">http://www.ru</a>#{c} hello!"
|
330
|
+
parse("xxx http://www.ru#{c} hello!").should == "xxx <a rel=\"nofollow\" href=\"http://www.ru\">http://www.ru</a>#{c} hello!"
|
331
|
+
parse(" http://www.ru/#{c} hello!").should == "<a rel=\"nofollow\" href=\"http://www.ru/\">http://www.ru/</a>#{c} hello!"
|
332
332
|
parse(" http://aaa.com#{c} http://bbb.com").should ==
|
333
|
-
"<a href=\"http://aaa.com\">http://aaa.com</a>#{c} <a href=\"http://bbb.com\">http://bbb.com</a>"
|
333
|
+
"<a rel=\"nofollow\" href=\"http://aaa.com\">http://aaa.com</a>#{c} <a rel=\"nofollow\" href=\"http://bbb.com\">http://bbb.com</a>"
|
334
334
|
end
|
335
335
|
end
|
336
336
|
end
|
@@ -572,12 +572,17 @@ describe 'BreakoutParser' do
|
|
572
572
|
a["r:2345"] = '<a href="http://code.assembla.com/test_space/svn/changesets/2345">revision:2345</a>'
|
573
573
|
a["r:2345ef"] = '<a href="http://code.assembla.com/test_space/git/changesets/2345ef">revision:2345ef</a>'
|
574
574
|
|
575
|
-
a["url:http://www.ru"] = '<a href="http://www.ru">http://www.ru</a>'
|
576
|
-
a["url:https://www.ru"] = '<a href="https://www.ru">https://www.ru</a>'
|
577
|
-
a["url:www.ru"] = '<a href="http://www.ru">http://www.ru</a>'
|
578
|
-
a["url:www.ru/?a=1&b=2"] = '<a href="http://www.ru/?a=1&b=2">http://www.ru/?a=1&b=2</a>'
|
579
|
-
a["url:ftp://www.ru"] = '<a href="ftp://www.ru">ftp://www.ru</a>'
|
580
|
-
a["url:/spaces/x2"] = '<a href="/spaces/x2">/spaces/x2</a>'
|
575
|
+
a["url:http://www.ru"] = '<a rel="nofollow" href="http://www.ru">http://www.ru</a>'
|
576
|
+
a["url:https://www.ru"] = '<a rel="nofollow" href="https://www.ru">https://www.ru</a>'
|
577
|
+
a["url:www.ru"] = '<a rel="nofollow" href="http://www.ru">http://www.ru</a>'
|
578
|
+
a["url:www.ru/?a=1&b=2"] = '<a rel="nofollow" href="http://www.ru/?a=1&b=2">http://www.ru/?a=1&b=2</a>'
|
579
|
+
a["url:ftp://www.ru"] = '<a rel="nofollow" href="ftp://www.ru">ftp://www.ru</a>'
|
580
|
+
a["url:/spaces/x2"] = '<a rel="nofollow" href="/spaces/x2">/spaces/x2</a>'
|
581
|
+
|
582
|
+
a["file:ExistingFile.txt"] =
|
583
|
+
'<a href="/spaces/test_space/documents/download/ExistingFile.txt">file:ExistingFile.txt</a>'
|
584
|
+
a["file:cVJUz6ejWr35pEab_qKWB8"] =
|
585
|
+
'<a href="/spaces/test_space/documents/download/cVJUz6ejWr35pEab_qKWB8">file:cVJUz6ejWr35pEab_qKWB8</a>'
|
581
586
|
|
582
587
|
a.each do |k,v|
|
583
588
|
it "parses [[#{k}]]" do
|
@@ -591,7 +596,25 @@ describe 'BreakoutParser' do
|
|
591
596
|
end
|
592
597
|
end
|
593
598
|
|
594
|
-
|
599
|
+
a = {}
|
600
|
+
a["image:ExistingImage.png"] =
|
601
|
+
'<img src="/spaces/test_space/documents/download/ExistingImage.png" alt="ALT" />'
|
602
|
+
a["image:cVJUz6ejWr35pEab_qKWB8"] =
|
603
|
+
'<img src="/spaces/test_space/documents/download/cVJUz6ejWr35pEab_qKWB8" alt="ALT" />'
|
604
|
+
|
605
|
+
a.each do |k,v|
|
606
|
+
it "parses [[#{k}]]" do
|
607
|
+
parse("[[#{k}]]").should == v.sub('ALT',k.sub('image:',''))
|
608
|
+
end
|
609
|
+
it "parses [[#{k}|привет тест]]" do
|
610
|
+
parse("[[#{k}|привет тест]]").should == v.sub('ALT','привет тест')
|
611
|
+
end
|
612
|
+
it "parses [[#{k}|test & here]]" do
|
613
|
+
parse("[[#{k}|test & here]]").should == v.sub('ALT','test & here')
|
614
|
+
end
|
615
|
+
end
|
616
|
+
|
617
|
+
it "ignores unknown link types" do
|
595
618
|
s = "[[zzz:xxx]]"
|
596
619
|
parse(s).should == s
|
597
620
|
s = "[[abcd:1234]]"
|
@@ -600,10 +623,21 @@ describe 'BreakoutParser' do
|
|
600
623
|
parse(s).should == s
|
601
624
|
end
|
602
625
|
|
603
|
-
it "links
|
604
|
-
|
605
|
-
|
606
|
-
|
626
|
+
it "ignores file & image links with forbidden symbols" do
|
627
|
+
s = "[[file:aaa/bbb]]"
|
628
|
+
parse(s).should == s
|
629
|
+
s = "[[file:aaa\\bbb]]"
|
630
|
+
parse(s).should == s
|
631
|
+
s = "[[file:aaa bbb]]"
|
632
|
+
parse(s).should == s
|
633
|
+
|
634
|
+
s = "[[image:aaa/bbb]]"
|
635
|
+
parse(s).should == s
|
636
|
+
s = "[[image:aaa\\bbb]]"
|
637
|
+
parse(s).should == s
|
638
|
+
s = "[[image:aaa bbb]]"
|
639
|
+
parse(s).should == s
|
640
|
+
end
|
607
641
|
end
|
608
642
|
|
609
643
|
###############################################################################
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: breakout_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrey "Zed" Zaikin
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-
|
12
|
+
date: 2010-02-03 00:00:00 +05:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|