breakout_parser 0.0.7 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ext/breakout_parser/lex.yy.c +669 -478
- data/ext/breakout_parser/make_win32.bat +1 -0
- data/ext/breakout_parser/parser.l +31 -30
- data/ext/breakout_parser/parser.tab.c +189 -166
- data/ext/breakout_parser/parser.tab.h +23 -22
- data/ext/breakout_parser/parser.y +17 -1
- data/ext/breakout_parser/ruby_ext.c +15 -1
- data/spec/parser_spec.rb +94 -28
- metadata +2 -2
@@ -54,27 +54,28 @@
|
|
54
54
|
FILE_LINK = 270,
|
55
55
|
IMAGE_LINK = 271,
|
56
56
|
URL = 272,
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
57
|
+
EMAIL = 273,
|
58
|
+
UL = 274,
|
59
|
+
H1 = 275,
|
60
|
+
H2 = 276,
|
61
|
+
H3 = 277,
|
62
|
+
H4 = 278,
|
63
|
+
H5 = 279,
|
64
|
+
INLINE_CODE = 280,
|
65
|
+
SPACE = 281,
|
66
|
+
BR = 282,
|
67
|
+
OLI = 283,
|
68
|
+
ULI = 284,
|
69
|
+
PRE_CODE_START = 285,
|
70
|
+
PRE_CODE_END = 286,
|
71
|
+
PRE_START = 287,
|
72
|
+
PRE_END = 288,
|
73
|
+
CODE_START = 289,
|
74
|
+
CODE_END = 290,
|
75
|
+
NOTEXTILE_START = 291,
|
76
|
+
NOTEXTILE_END = 292,
|
77
|
+
BOLD_END = 293,
|
78
|
+
ITALIC_END = 294
|
78
79
|
};
|
79
80
|
#endif
|
80
81
|
|
@@ -94,7 +95,7 @@ typedef union YYSTYPE
|
|
94
95
|
|
95
96
|
|
96
97
|
/* Line 1676 of yacc.c */
|
97
|
-
#line
|
98
|
+
#line 99 "parser.tab.h"
|
98
99
|
} YYSTYPE;
|
99
100
|
# define YYSTYPE_IS_TRIVIAL 1
|
100
101
|
# define yystype YYSTYPE /* obsolescent; will be withdrawn */
|
@@ -69,7 +69,7 @@ void yyerror(const char *msg)
|
|
69
69
|
%token <svalue> T_WORD TICKET_LINK LINK SVN_REVISION_LINK GIT_REVISION_LINK WIKI_LINK ANCHOR_LINK
|
70
70
|
%token <svalue> URL_WITH_PROTO_LINK URL_WITHOUT_PROTO_LINK
|
71
71
|
%token <svalue> FILE_LINK IMAGE_LINK
|
72
|
-
%token <svalue> URL
|
72
|
+
%token <svalue> URL EMAIL
|
73
73
|
%token <svalue> UL
|
74
74
|
%token <svalue> H1 H2 H3 H4 H5
|
75
75
|
%token <svalue> INLINE_CODE
|
@@ -116,6 +116,7 @@ word : chars
|
|
116
116
|
| link
|
117
117
|
| T_WORD {concat2($1)} // TODO: somehow pass T_WORD's length here
|
118
118
|
| URL {process_url($1)}
|
119
|
+
| EMAIL {process_email($1)}
|
119
120
|
| BOLD_START {$1 ? concat(" <strong>",9) : concat("<strong>",8)}
|
120
121
|
| BOLD_END {concat("</strong>",9)}
|
121
122
|
| ITALIC_START {$1 ? concat(" <em>",5) : concat("<em>",4)}
|
@@ -402,6 +403,21 @@ process_url(const char*url){
|
|
402
403
|
const char *p;
|
403
404
|
|
404
405
|
concat("<a rel=\"nofollow\" href=\"",24);
|
406
|
+
if( *url == 'w' ){
|
407
|
+
// url starts with 'www.'
|
408
|
+
concat("http://",7);
|
409
|
+
} else {
|
410
|
+
// assume url starts with 'http://'
|
411
|
+
}
|
405
412
|
for(p=url; *p; p++) concat_raw_char(*p);
|
406
413
|
process_link_tail(url,NULL,NULL);
|
407
414
|
}
|
415
|
+
|
416
|
+
process_email(const char*url){
|
417
|
+
const char *p;
|
418
|
+
|
419
|
+
concat("<a href=\"mailto:",16);
|
420
|
+
for(p=url; *p; p++) concat_raw_char(*p);
|
421
|
+
process_link_tail(url,NULL,NULL);
|
422
|
+
}
|
423
|
+
|
@@ -4,12 +4,14 @@
|
|
4
4
|
|
5
5
|
void Init_breakout_parser();
|
6
6
|
VALUE method_parse(VALUE, VALUE, VALUE);
|
7
|
+
VALUE method_parse_links_only(VALUE, VALUE, VALUE);
|
7
8
|
|
8
9
|
VALUE breakout_parser = Qnil;
|
9
10
|
|
10
11
|
void Init_breakout_parser() {
|
11
12
|
breakout_parser = rb_define_class("BreakoutParser",rb_cObject);
|
12
13
|
rb_define_singleton_method(breakout_parser, "parse", method_parse, 2);
|
14
|
+
rb_define_singleton_method(breakout_parser, "parse_links_only", method_parse_links_only, 2);
|
13
15
|
}
|
14
16
|
|
15
17
|
extern char *buf, *bufptr;
|
@@ -17,7 +19,9 @@ extern char *in_buf, *in_pos;
|
|
17
19
|
extern const char *space_name;
|
18
20
|
extern size_t in_buf_len, bufsize, space_name_len;
|
19
21
|
|
20
|
-
|
22
|
+
extern int parse_links_only;
|
23
|
+
|
24
|
+
VALUE do_parse(VALUE self, VALUE text, VALUE r_space_name) {
|
21
25
|
VALUE s;
|
22
26
|
char *p;
|
23
27
|
|
@@ -69,4 +73,14 @@ VALUE method_parse(VALUE self, VALUE text, VALUE r_space_name) {
|
|
69
73
|
return s;
|
70
74
|
}
|
71
75
|
|
76
|
+
VALUE method_parse(VALUE self, VALUE text, VALUE r_space_name) {
|
77
|
+
parse_links_only = 0;
|
78
|
+
return do_parse(self,text,r_space_name);
|
79
|
+
}
|
80
|
+
|
81
|
+
VALUE method_parse_links_only(VALUE self, VALUE text, VALUE r_space_name) {
|
82
|
+
parse_links_only = 1;
|
83
|
+
return do_parse(self,text,r_space_name);
|
84
|
+
}
|
85
|
+
|
72
86
|
#endif // ifdef RUBY_VERSION
|
data/spec/parser_spec.rb
CHANGED
@@ -97,7 +97,7 @@ describe 'BreakoutParser' do
|
|
97
97
|
end
|
98
98
|
it "not confuses" do
|
99
99
|
parse("look at @this code@ and mail me at xxx@yyy.com").should ==
|
100
|
-
'look at <code>this code</code> and mail me at xxx@yyy.com'
|
100
|
+
'look at <code>this code</code> and mail me at <a href="mailto:xxx@yyy.com">xxx@yyy.com</a>'
|
101
101
|
end
|
102
102
|
it "w/o closing tag" do
|
103
103
|
parse("@smth").should == '@smth'
|
@@ -359,35 +359,99 @@ describe 'BreakoutParser' do
|
|
359
359
|
###############################################################################
|
360
360
|
|
361
361
|
describe "raw text links" do
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
"aaa bbb ccc
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
362
|
+
describe "starting with 'http://'" do
|
363
|
+
it "at the beginning" do
|
364
|
+
parse("http://asd.ru").should == "<a rel=\"nofollow\" href=\"http://asd.ru\">http://asd.ru</a>"
|
365
|
+
end
|
366
|
+
it "in middle of other words" do
|
367
|
+
parse("aaa bbb ccc http://asd.ru ddd eee fff").should ==
|
368
|
+
"aaa bbb ccc <a rel=\"nofollow\" href=\"http://asd.ru\">http://asd.ru</a> ddd eee fff"
|
369
|
+
end
|
370
|
+
it "in new line" do
|
371
|
+
parse("aaa bbb ccc\nhttp://asd.ru\nddd eee fff").should match(
|
372
|
+
%r"aaa bbb ccc ?<br /> ?<a rel=\"nofollow\" href=\"http://asd.ru\">http://asd.ru</a> ?<br /> ?ddd eee fff"
|
373
|
+
)
|
374
|
+
end
|
375
|
+
it "escapes '&' in link _text_" do
|
376
|
+
parse("http://asd.ru/?a=1&b=2").should == "<a rel=\"nofollow\" href=\"http://asd.ru/?a=1&b=2\">http://asd.ru/?a=1&b=2</a>"
|
377
|
+
end
|
378
|
+
|
379
|
+
it "parses https://" do
|
380
|
+
parse("https://asd.ru").should == "<a rel=\"nofollow\" href=\"https://asd.ru\">https://asd.ru</a>"
|
381
|
+
end
|
382
|
+
|
383
|
+
%w', .'.each do |c|
|
384
|
+
it "stops parsing on \"#{c} \"" do
|
385
|
+
parse("http://asd.ru#{c}").should == "<a rel=\"nofollow\" href=\"http://asd.ru\">http://asd.ru</a>#{c}"
|
386
|
+
parse(" http://asd.ru#{c} ").should == "<a rel=\"nofollow\" href=\"http://asd.ru\">http://asd.ru</a>#{c}"
|
387
|
+
parse(" http://asd.ru#{c} hello!").should == "<a rel=\"nofollow\" href=\"http://asd.ru\">http://asd.ru</a>#{c} hello!"
|
388
|
+
parse("xxx http://asd.ru#{c} hello!").should == "xxx <a rel=\"nofollow\" href=\"http://asd.ru\">http://asd.ru</a>#{c} hello!"
|
389
|
+
parse(" http://asd.ru/#{c} hello!").should == "<a rel=\"nofollow\" href=\"http://asd.ru/\">http://asd.ru/</a>#{c} hello!"
|
390
|
+
parse(" http://aaa.com#{c} http://bbb.com").should ==
|
391
|
+
"<a rel=\"nofollow\" href=\"http://aaa.com\">http://aaa.com</a>#{c} <a rel=\"nofollow\" href=\"http://bbb.com\">http://bbb.com</a>"
|
392
|
+
end
|
393
|
+
end
|
376
394
|
end
|
377
395
|
|
378
|
-
|
379
|
-
|
396
|
+
describe "starting with 'www.'" do
|
397
|
+
it "at the beginning" do
|
398
|
+
parse("www.ru").should == "<a rel=\"nofollow\" href=\"http://www.ru\">www.ru</a>"
|
399
|
+
end
|
400
|
+
it "in middle of other words" do
|
401
|
+
parse("aaa bbb ccc www.ru ddd eee fff").should ==
|
402
|
+
"aaa bbb ccc <a rel=\"nofollow\" href=\"http://www.ru\">www.ru</a> ddd eee fff"
|
403
|
+
end
|
404
|
+
it "in new line" do
|
405
|
+
parse("aaa bbb ccc\nwww.ru\nddd eee fff").should match(
|
406
|
+
%r"aaa bbb ccc ?<br /> ?<a rel=\"nofollow\" href=\"http://www.ru\">www.ru</a> ?<br /> ?ddd eee fff"
|
407
|
+
)
|
408
|
+
end
|
409
|
+
it "escapes '&' in link _text_" do
|
410
|
+
parse("www.ru/?a=1&b=2").should == "<a rel=\"nofollow\" href=\"http://www.ru/?a=1&b=2\">www.ru/?a=1&b=2</a>"
|
411
|
+
end
|
412
|
+
|
413
|
+
%w', .'.each do |c|
|
414
|
+
it "stops parsing on \"#{c} \"" do
|
415
|
+
parse("www.ru#{c}").should == "<a rel=\"nofollow\" href=\"http://www.ru\">www.ru</a>#{c}"
|
416
|
+
parse(" www.ru#{c} ").should == "<a rel=\"nofollow\" href=\"http://www.ru\">www.ru</a>#{c}"
|
417
|
+
parse(" www.ru#{c} hello!").should == "<a rel=\"nofollow\" href=\"http://www.ru\">www.ru</a>#{c} hello!"
|
418
|
+
parse("xxx www.ru#{c} hello!").should == "xxx <a rel=\"nofollow\" href=\"http://www.ru\">www.ru</a>#{c} hello!"
|
419
|
+
parse(" www.ru/#{c} hello!").should == "<a rel=\"nofollow\" href=\"http://www.ru/\">www.ru/</a>#{c} hello!"
|
420
|
+
parse(" www.aaa.com#{c} www.bbb.com").should ==
|
421
|
+
"<a rel=\"nofollow\" href=\"http://www.aaa.com\">www.aaa.com</a>#{c} <a rel=\"nofollow\" href=\"http://www.bbb.com\">www.bbb.com</a>"
|
422
|
+
end
|
423
|
+
end
|
380
424
|
end
|
381
425
|
|
382
|
-
|
383
|
-
it "
|
384
|
-
parse("
|
385
|
-
|
386
|
-
|
387
|
-
parse("
|
388
|
-
|
389
|
-
|
390
|
-
|
426
|
+
describe 'e-mails' do
|
427
|
+
it "at the beginning" do
|
428
|
+
parse("aaa@bbb.com").should == "<a href=\"mailto:aaa@bbb.com\">aaa@bbb.com</a>"
|
429
|
+
end
|
430
|
+
it "in middle of other words" do
|
431
|
+
parse("aaa bbb ccc xx@yy.cn ddd eee fff").should ==
|
432
|
+
"aaa bbb ccc <a href=\"mailto:xx@yy.cn\">xx@yy.cn</a> ddd eee fff"
|
433
|
+
end
|
434
|
+
it "in new line" do
|
435
|
+
parse("aaa bbb ccc\naa.bb@cc.dd.ee\nddd eee fff").should match(
|
436
|
+
%r"aaa bbb ccc ?<br /> ?<a href=\"mailto:aa.bb@cc.dd.ee\">aa.bb@cc.dd.ee</a> ?<br /> ?ddd eee fff"
|
437
|
+
)
|
438
|
+
end
|
439
|
+
|
440
|
+
%w', .'.each do |c|
|
441
|
+
it "stops parsing on \"#{c} \"" do
|
442
|
+
parse("a-b@c-d.efghjikl#{c}").should == "<a href=\"mailto:a-b@c-d.efghjikl\">a-b@c-d.efghjikl</a>#{c}"
|
443
|
+
parse(" a-b@c-d.efghjikl#{c} ").should == "<a href=\"mailto:a-b@c-d.efghjikl\">a-b@c-d.efghjikl</a>#{c}"
|
444
|
+
parse(" a-b@c-d.efghjikl#{c} hello!").should == "<a href=\"mailto:a-b@c-d.efghjikl\">a-b@c-d.efghjikl</a>#{c} hello!"
|
445
|
+
parse("xxx a-b@c-d.efghjikl#{c} hello!").should == "xxx <a href=\"mailto:a-b@c-d.efghjikl\">a-b@c-d.efghjikl</a>#{c} hello!"
|
446
|
+
parse(" a-b@c-d.efghjikl#{c} hello!").should == "<a href=\"mailto:a-b@c-d.efghjikl\">a-b@c-d.efghjikl</a>#{c} hello!"
|
447
|
+
parse(" www@aaa.com#{c} www@bbb.com").should ==
|
448
|
+
"<a href=\"mailto:www@aaa.com\">www@aaa.com</a>#{c} <a href=\"mailto:www@bbb.com\">www@bbb.com</a>"
|
449
|
+
end
|
450
|
+
end
|
451
|
+
|
452
|
+
it "not parses bad emails" do
|
453
|
+
s="a@b.c a@b a.b@c a.b@@c a@b@c.d a#b@c.d"
|
454
|
+
parse(s).should == s
|
391
455
|
end
|
392
456
|
end
|
393
457
|
end
|
@@ -693,7 +757,7 @@ describe 'BreakoutParser' do
|
|
693
757
|
parse(s).should == s
|
694
758
|
s = "[[abcd:1234]]"
|
695
759
|
parse(s).should == s
|
696
|
-
s = "[[abcd::1234]] [[abcd:1234]] [[uri:
|
760
|
+
s = "[[abcd::1234]] [[abcd:1234]] [[uri:ww.ru]]"
|
697
761
|
parse(s).should == s
|
698
762
|
end
|
699
763
|
|
@@ -718,7 +782,9 @@ describe 'BreakoutParser' do
|
|
718
782
|
###############################################################################
|
719
783
|
###############################################################################
|
720
784
|
|
721
|
-
HTML_ESCAPE
|
785
|
+
unless defined?HTML_ESCAPE
|
786
|
+
HTML_ESCAPE = { '&' => '&', '>' => '>', '<' => '<', '"' => '"' }
|
787
|
+
end
|
722
788
|
|
723
789
|
def h s
|
724
790
|
s.to_s.gsub(/[&"><]/) { |special| HTML_ESCAPE[special] }
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: breakout_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrey "Zed" Zaikin
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-03-
|
12
|
+
date: 2010-03-12 00:00:00 +05:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|