breakout_parser 0.0.7 → 0.0.8
Sign up to get free protection for your applications and to get access to all the features.
- data/ext/breakout_parser/lex.yy.c +669 -478
- data/ext/breakout_parser/make_win32.bat +1 -0
- data/ext/breakout_parser/parser.l +31 -30
- data/ext/breakout_parser/parser.tab.c +189 -166
- data/ext/breakout_parser/parser.tab.h +23 -22
- data/ext/breakout_parser/parser.y +17 -1
- data/ext/breakout_parser/ruby_ext.c +15 -1
- data/spec/parser_spec.rb +94 -28
- metadata +2 -2
@@ -54,27 +54,28 @@
|
|
54
54
|
FILE_LINK = 270,
|
55
55
|
IMAGE_LINK = 271,
|
56
56
|
URL = 272,
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
57
|
+
EMAIL = 273,
|
58
|
+
UL = 274,
|
59
|
+
H1 = 275,
|
60
|
+
H2 = 276,
|
61
|
+
H3 = 277,
|
62
|
+
H4 = 278,
|
63
|
+
H5 = 279,
|
64
|
+
INLINE_CODE = 280,
|
65
|
+
SPACE = 281,
|
66
|
+
BR = 282,
|
67
|
+
OLI = 283,
|
68
|
+
ULI = 284,
|
69
|
+
PRE_CODE_START = 285,
|
70
|
+
PRE_CODE_END = 286,
|
71
|
+
PRE_START = 287,
|
72
|
+
PRE_END = 288,
|
73
|
+
CODE_START = 289,
|
74
|
+
CODE_END = 290,
|
75
|
+
NOTEXTILE_START = 291,
|
76
|
+
NOTEXTILE_END = 292,
|
77
|
+
BOLD_END = 293,
|
78
|
+
ITALIC_END = 294
|
78
79
|
};
|
79
80
|
#endif
|
80
81
|
|
@@ -94,7 +95,7 @@ typedef union YYSTYPE
|
|
94
95
|
|
95
96
|
|
96
97
|
/* Line 1676 of yacc.c */
|
97
|
-
#line
|
98
|
+
#line 99 "parser.tab.h"
|
98
99
|
} YYSTYPE;
|
99
100
|
# define YYSTYPE_IS_TRIVIAL 1
|
100
101
|
# define yystype YYSTYPE /* obsolescent; will be withdrawn */
|
@@ -69,7 +69,7 @@ void yyerror(const char *msg)
|
|
69
69
|
%token <svalue> T_WORD TICKET_LINK LINK SVN_REVISION_LINK GIT_REVISION_LINK WIKI_LINK ANCHOR_LINK
|
70
70
|
%token <svalue> URL_WITH_PROTO_LINK URL_WITHOUT_PROTO_LINK
|
71
71
|
%token <svalue> FILE_LINK IMAGE_LINK
|
72
|
-
%token <svalue> URL
|
72
|
+
%token <svalue> URL EMAIL
|
73
73
|
%token <svalue> UL
|
74
74
|
%token <svalue> H1 H2 H3 H4 H5
|
75
75
|
%token <svalue> INLINE_CODE
|
@@ -116,6 +116,7 @@ word : chars
|
|
116
116
|
| link
|
117
117
|
| T_WORD {concat2($1)} // TODO: somehow pass T_WORD's length here
|
118
118
|
| URL {process_url($1)}
|
119
|
+
| EMAIL {process_email($1)}
|
119
120
|
| BOLD_START {$1 ? concat(" <strong>",9) : concat("<strong>",8)}
|
120
121
|
| BOLD_END {concat("</strong>",9)}
|
121
122
|
| ITALIC_START {$1 ? concat(" <em>",5) : concat("<em>",4)}
|
@@ -402,6 +403,21 @@ process_url(const char*url){
|
|
402
403
|
const char *p;
|
403
404
|
|
404
405
|
concat("<a rel=\"nofollow\" href=\"",24);
|
406
|
+
if( *url == 'w' ){
|
407
|
+
// url starts with 'www.'
|
408
|
+
concat("http://",7);
|
409
|
+
} else {
|
410
|
+
// assume url starts with 'http://'
|
411
|
+
}
|
405
412
|
for(p=url; *p; p++) concat_raw_char(*p);
|
406
413
|
process_link_tail(url,NULL,NULL);
|
407
414
|
}
|
415
|
+
|
416
|
+
process_email(const char*url){
|
417
|
+
const char *p;
|
418
|
+
|
419
|
+
concat("<a href=\"mailto:",16);
|
420
|
+
for(p=url; *p; p++) concat_raw_char(*p);
|
421
|
+
process_link_tail(url,NULL,NULL);
|
422
|
+
}
|
423
|
+
|
@@ -4,12 +4,14 @@
|
|
4
4
|
|
5
5
|
void Init_breakout_parser();
|
6
6
|
VALUE method_parse(VALUE, VALUE, VALUE);
|
7
|
+
VALUE method_parse_links_only(VALUE, VALUE, VALUE);
|
7
8
|
|
8
9
|
VALUE breakout_parser = Qnil;
|
9
10
|
|
10
11
|
void Init_breakout_parser() {
|
11
12
|
breakout_parser = rb_define_class("BreakoutParser",rb_cObject);
|
12
13
|
rb_define_singleton_method(breakout_parser, "parse", method_parse, 2);
|
14
|
+
rb_define_singleton_method(breakout_parser, "parse_links_only", method_parse_links_only, 2);
|
13
15
|
}
|
14
16
|
|
15
17
|
extern char *buf, *bufptr;
|
@@ -17,7 +19,9 @@ extern char *in_buf, *in_pos;
|
|
17
19
|
extern const char *space_name;
|
18
20
|
extern size_t in_buf_len, bufsize, space_name_len;
|
19
21
|
|
20
|
-
|
22
|
+
extern int parse_links_only;
|
23
|
+
|
24
|
+
VALUE do_parse(VALUE self, VALUE text, VALUE r_space_name) {
|
21
25
|
VALUE s;
|
22
26
|
char *p;
|
23
27
|
|
@@ -69,4 +73,14 @@ VALUE method_parse(VALUE self, VALUE text, VALUE r_space_name) {
|
|
69
73
|
return s;
|
70
74
|
}
|
71
75
|
|
76
|
+
VALUE method_parse(VALUE self, VALUE text, VALUE r_space_name) {
|
77
|
+
parse_links_only = 0;
|
78
|
+
return do_parse(self,text,r_space_name);
|
79
|
+
}
|
80
|
+
|
81
|
+
VALUE method_parse_links_only(VALUE self, VALUE text, VALUE r_space_name) {
|
82
|
+
parse_links_only = 1;
|
83
|
+
return do_parse(self,text,r_space_name);
|
84
|
+
}
|
85
|
+
|
72
86
|
#endif // ifdef RUBY_VERSION
|
data/spec/parser_spec.rb
CHANGED
@@ -97,7 +97,7 @@ describe 'BreakoutParser' do
|
|
97
97
|
end
|
98
98
|
it "not confuses" do
|
99
99
|
parse("look at @this code@ and mail me at xxx@yyy.com").should ==
|
100
|
-
'look at <code>this code</code> and mail me at xxx@yyy.com'
|
100
|
+
'look at <code>this code</code> and mail me at <a href="mailto:xxx@yyy.com">xxx@yyy.com</a>'
|
101
101
|
end
|
102
102
|
it "w/o closing tag" do
|
103
103
|
parse("@smth").should == '@smth'
|
@@ -359,35 +359,99 @@ describe 'BreakoutParser' do
|
|
359
359
|
###############################################################################
|
360
360
|
|
361
361
|
describe "raw text links" do
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
"aaa bbb ccc
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
362
|
+
describe "starting with 'http://'" do
|
363
|
+
it "at the beginning" do
|
364
|
+
parse("http://asd.ru").should == "<a rel=\"nofollow\" href=\"http://asd.ru\">http://asd.ru</a>"
|
365
|
+
end
|
366
|
+
it "in middle of other words" do
|
367
|
+
parse("aaa bbb ccc http://asd.ru ddd eee fff").should ==
|
368
|
+
"aaa bbb ccc <a rel=\"nofollow\" href=\"http://asd.ru\">http://asd.ru</a> ddd eee fff"
|
369
|
+
end
|
370
|
+
it "in new line" do
|
371
|
+
parse("aaa bbb ccc\nhttp://asd.ru\nddd eee fff").should match(
|
372
|
+
%r"aaa bbb ccc ?<br /> ?<a rel=\"nofollow\" href=\"http://asd.ru\">http://asd.ru</a> ?<br /> ?ddd eee fff"
|
373
|
+
)
|
374
|
+
end
|
375
|
+
it "escapes '&' in link _text_" do
|
376
|
+
parse("http://asd.ru/?a=1&b=2").should == "<a rel=\"nofollow\" href=\"http://asd.ru/?a=1&b=2\">http://asd.ru/?a=1&b=2</a>"
|
377
|
+
end
|
378
|
+
|
379
|
+
it "parses https://" do
|
380
|
+
parse("https://asd.ru").should == "<a rel=\"nofollow\" href=\"https://asd.ru\">https://asd.ru</a>"
|
381
|
+
end
|
382
|
+
|
383
|
+
%w', .'.each do |c|
|
384
|
+
it "stops parsing on \"#{c} \"" do
|
385
|
+
parse("http://asd.ru#{c}").should == "<a rel=\"nofollow\" href=\"http://asd.ru\">http://asd.ru</a>#{c}"
|
386
|
+
parse(" http://asd.ru#{c} ").should == "<a rel=\"nofollow\" href=\"http://asd.ru\">http://asd.ru</a>#{c}"
|
387
|
+
parse(" http://asd.ru#{c} hello!").should == "<a rel=\"nofollow\" href=\"http://asd.ru\">http://asd.ru</a>#{c} hello!"
|
388
|
+
parse("xxx http://asd.ru#{c} hello!").should == "xxx <a rel=\"nofollow\" href=\"http://asd.ru\">http://asd.ru</a>#{c} hello!"
|
389
|
+
parse(" http://asd.ru/#{c} hello!").should == "<a rel=\"nofollow\" href=\"http://asd.ru/\">http://asd.ru/</a>#{c} hello!"
|
390
|
+
parse(" http://aaa.com#{c} http://bbb.com").should ==
|
391
|
+
"<a rel=\"nofollow\" href=\"http://aaa.com\">http://aaa.com</a>#{c} <a rel=\"nofollow\" href=\"http://bbb.com\">http://bbb.com</a>"
|
392
|
+
end
|
393
|
+
end
|
376
394
|
end
|
377
395
|
|
378
|
-
|
379
|
-
|
396
|
+
describe "starting with 'www.'" do
|
397
|
+
it "at the beginning" do
|
398
|
+
parse("www.ru").should == "<a rel=\"nofollow\" href=\"http://www.ru\">www.ru</a>"
|
399
|
+
end
|
400
|
+
it "in middle of other words" do
|
401
|
+
parse("aaa bbb ccc www.ru ddd eee fff").should ==
|
402
|
+
"aaa bbb ccc <a rel=\"nofollow\" href=\"http://www.ru\">www.ru</a> ddd eee fff"
|
403
|
+
end
|
404
|
+
it "in new line" do
|
405
|
+
parse("aaa bbb ccc\nwww.ru\nddd eee fff").should match(
|
406
|
+
%r"aaa bbb ccc ?<br /> ?<a rel=\"nofollow\" href=\"http://www.ru\">www.ru</a> ?<br /> ?ddd eee fff"
|
407
|
+
)
|
408
|
+
end
|
409
|
+
it "escapes '&' in link _text_" do
|
410
|
+
parse("www.ru/?a=1&b=2").should == "<a rel=\"nofollow\" href=\"http://www.ru/?a=1&b=2\">www.ru/?a=1&b=2</a>"
|
411
|
+
end
|
412
|
+
|
413
|
+
%w', .'.each do |c|
|
414
|
+
it "stops parsing on \"#{c} \"" do
|
415
|
+
parse("www.ru#{c}").should == "<a rel=\"nofollow\" href=\"http://www.ru\">www.ru</a>#{c}"
|
416
|
+
parse(" www.ru#{c} ").should == "<a rel=\"nofollow\" href=\"http://www.ru\">www.ru</a>#{c}"
|
417
|
+
parse(" www.ru#{c} hello!").should == "<a rel=\"nofollow\" href=\"http://www.ru\">www.ru</a>#{c} hello!"
|
418
|
+
parse("xxx www.ru#{c} hello!").should == "xxx <a rel=\"nofollow\" href=\"http://www.ru\">www.ru</a>#{c} hello!"
|
419
|
+
parse(" www.ru/#{c} hello!").should == "<a rel=\"nofollow\" href=\"http://www.ru/\">www.ru/</a>#{c} hello!"
|
420
|
+
parse(" www.aaa.com#{c} www.bbb.com").should ==
|
421
|
+
"<a rel=\"nofollow\" href=\"http://www.aaa.com\">www.aaa.com</a>#{c} <a rel=\"nofollow\" href=\"http://www.bbb.com\">www.bbb.com</a>"
|
422
|
+
end
|
423
|
+
end
|
380
424
|
end
|
381
425
|
|
382
|
-
|
383
|
-
it "
|
384
|
-
parse("
|
385
|
-
|
386
|
-
|
387
|
-
parse("
|
388
|
-
|
389
|
-
|
390
|
-
|
426
|
+
describe 'e-mails' do
|
427
|
+
it "at the beginning" do
|
428
|
+
parse("aaa@bbb.com").should == "<a href=\"mailto:aaa@bbb.com\">aaa@bbb.com</a>"
|
429
|
+
end
|
430
|
+
it "in middle of other words" do
|
431
|
+
parse("aaa bbb ccc xx@yy.cn ddd eee fff").should ==
|
432
|
+
"aaa bbb ccc <a href=\"mailto:xx@yy.cn\">xx@yy.cn</a> ddd eee fff"
|
433
|
+
end
|
434
|
+
it "in new line" do
|
435
|
+
parse("aaa bbb ccc\naa.bb@cc.dd.ee\nddd eee fff").should match(
|
436
|
+
%r"aaa bbb ccc ?<br /> ?<a href=\"mailto:aa.bb@cc.dd.ee\">aa.bb@cc.dd.ee</a> ?<br /> ?ddd eee fff"
|
437
|
+
)
|
438
|
+
end
|
439
|
+
|
440
|
+
%w', .'.each do |c|
|
441
|
+
it "stops parsing on \"#{c} \"" do
|
442
|
+
parse("a-b@c-d.efghjikl#{c}").should == "<a href=\"mailto:a-b@c-d.efghjikl\">a-b@c-d.efghjikl</a>#{c}"
|
443
|
+
parse(" a-b@c-d.efghjikl#{c} ").should == "<a href=\"mailto:a-b@c-d.efghjikl\">a-b@c-d.efghjikl</a>#{c}"
|
444
|
+
parse(" a-b@c-d.efghjikl#{c} hello!").should == "<a href=\"mailto:a-b@c-d.efghjikl\">a-b@c-d.efghjikl</a>#{c} hello!"
|
445
|
+
parse("xxx a-b@c-d.efghjikl#{c} hello!").should == "xxx <a href=\"mailto:a-b@c-d.efghjikl\">a-b@c-d.efghjikl</a>#{c} hello!"
|
446
|
+
parse(" a-b@c-d.efghjikl#{c} hello!").should == "<a href=\"mailto:a-b@c-d.efghjikl\">a-b@c-d.efghjikl</a>#{c} hello!"
|
447
|
+
parse(" www@aaa.com#{c} www@bbb.com").should ==
|
448
|
+
"<a href=\"mailto:www@aaa.com\">www@aaa.com</a>#{c} <a href=\"mailto:www@bbb.com\">www@bbb.com</a>"
|
449
|
+
end
|
450
|
+
end
|
451
|
+
|
452
|
+
it "not parses bad emails" do
|
453
|
+
s="a@b.c a@b a.b@c a.b@@c a@b@c.d a#b@c.d"
|
454
|
+
parse(s).should == s
|
391
455
|
end
|
392
456
|
end
|
393
457
|
end
|
@@ -693,7 +757,7 @@ describe 'BreakoutParser' do
|
|
693
757
|
parse(s).should == s
|
694
758
|
s = "[[abcd:1234]]"
|
695
759
|
parse(s).should == s
|
696
|
-
s = "[[abcd::1234]] [[abcd:1234]] [[uri:
|
760
|
+
s = "[[abcd::1234]] [[abcd:1234]] [[uri:ww.ru]]"
|
697
761
|
parse(s).should == s
|
698
762
|
end
|
699
763
|
|
@@ -718,7 +782,9 @@ describe 'BreakoutParser' do
|
|
718
782
|
###############################################################################
|
719
783
|
###############################################################################
|
720
784
|
|
721
|
-
HTML_ESCAPE
|
785
|
+
unless defined?HTML_ESCAPE
|
786
|
+
HTML_ESCAPE = { '&' => '&', '>' => '>', '<' => '<', '"' => '"' }
|
787
|
+
end
|
722
788
|
|
723
789
|
def h s
|
724
790
|
s.to_s.gsub(/[&"><]/) { |special| HTML_ESCAPE[special] }
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: breakout_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrey "Zed" Zaikin
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-03-
|
12
|
+
date: 2010-03-12 00:00:00 +05:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|