breakout_parser 0.0.7 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -54,27 +54,28 @@
54
54
  FILE_LINK = 270,
55
55
  IMAGE_LINK = 271,
56
56
  URL = 272,
57
- UL = 273,
58
- H1 = 274,
59
- H2 = 275,
60
- H3 = 276,
61
- H4 = 277,
62
- H5 = 278,
63
- INLINE_CODE = 279,
64
- SPACE = 280,
65
- BR = 281,
66
- OLI = 282,
67
- ULI = 283,
68
- PRE_CODE_START = 284,
69
- PRE_CODE_END = 285,
70
- PRE_START = 286,
71
- PRE_END = 287,
72
- CODE_START = 288,
73
- CODE_END = 289,
74
- NOTEXTILE_START = 290,
75
- NOTEXTILE_END = 291,
76
- BOLD_END = 292,
77
- ITALIC_END = 293
57
+ EMAIL = 273,
58
+ UL = 274,
59
+ H1 = 275,
60
+ H2 = 276,
61
+ H3 = 277,
62
+ H4 = 278,
63
+ H5 = 279,
64
+ INLINE_CODE = 280,
65
+ SPACE = 281,
66
+ BR = 282,
67
+ OLI = 283,
68
+ ULI = 284,
69
+ PRE_CODE_START = 285,
70
+ PRE_CODE_END = 286,
71
+ PRE_START = 287,
72
+ PRE_END = 288,
73
+ CODE_START = 289,
74
+ CODE_END = 290,
75
+ NOTEXTILE_START = 291,
76
+ NOTEXTILE_END = 292,
77
+ BOLD_END = 293,
78
+ ITALIC_END = 294
78
79
  };
79
80
  #endif
80
81
 
@@ -94,7 +95,7 @@ typedef union YYSTYPE
94
95
 
95
96
 
96
97
  /* Line 1676 of yacc.c */
97
- #line 98 "parser.tab.h"
98
+ #line 99 "parser.tab.h"
98
99
  } YYSTYPE;
99
100
  # define YYSTYPE_IS_TRIVIAL 1
100
101
  # define yystype YYSTYPE /* obsolescent; will be withdrawn */
@@ -69,7 +69,7 @@ void yyerror(const char *msg)
69
69
  %token <svalue> T_WORD TICKET_LINK LINK SVN_REVISION_LINK GIT_REVISION_LINK WIKI_LINK ANCHOR_LINK
70
70
  %token <svalue> URL_WITH_PROTO_LINK URL_WITHOUT_PROTO_LINK
71
71
  %token <svalue> FILE_LINK IMAGE_LINK
72
- %token <svalue> URL
72
+ %token <svalue> URL EMAIL
73
73
  %token <svalue> UL
74
74
  %token <svalue> H1 H2 H3 H4 H5
75
75
  %token <svalue> INLINE_CODE
@@ -116,6 +116,7 @@ word : chars
116
116
  | link
117
117
  | T_WORD {concat2($1)} // TODO: somehow pass T_WORD's length here
118
118
  | URL {process_url($1)}
119
+ | EMAIL {process_email($1)}
119
120
  | BOLD_START {$1 ? concat(" <strong>",9) : concat("<strong>",8)}
120
121
  | BOLD_END {concat("</strong>",9)}
121
122
  | ITALIC_START {$1 ? concat(" <em>",5) : concat("<em>",4)}
@@ -402,6 +403,21 @@ process_url(const char*url){
402
403
  const char *p;
403
404
 
404
405
  concat("<a rel=\"nofollow\" href=\"",24);
406
+ if( *url == 'w' ){
407
+ // url starts with 'www.'
408
+ concat("http://",7);
409
+ } else {
410
+ // assume url starts with 'http://'
411
+ }
405
412
  for(p=url; *p; p++) concat_raw_char(*p);
406
413
  process_link_tail(url,NULL,NULL);
407
414
  }
415
+
416
+ process_email(const char*url){
417
+ const char *p;
418
+
419
+ concat("<a href=\"mailto:",16);
420
+ for(p=url; *p; p++) concat_raw_char(*p);
421
+ process_link_tail(url,NULL,NULL);
422
+ }
423
+
@@ -4,12 +4,14 @@
4
4
 
5
5
  void Init_breakout_parser();
6
6
  VALUE method_parse(VALUE, VALUE, VALUE);
7
+ VALUE method_parse_links_only(VALUE, VALUE, VALUE);
7
8
 
8
9
  VALUE breakout_parser = Qnil;
9
10
 
10
11
  void Init_breakout_parser() {
11
12
  breakout_parser = rb_define_class("BreakoutParser",rb_cObject);
12
13
  rb_define_singleton_method(breakout_parser, "parse", method_parse, 2);
14
+ rb_define_singleton_method(breakout_parser, "parse_links_only", method_parse_links_only, 2);
13
15
  }
14
16
 
15
17
  extern char *buf, *bufptr;
@@ -17,7 +19,9 @@ extern char *in_buf, *in_pos;
17
19
  extern const char *space_name;
18
20
  extern size_t in_buf_len, bufsize, space_name_len;
19
21
 
20
- VALUE method_parse(VALUE self, VALUE text, VALUE r_space_name) {
22
+ extern int parse_links_only;
23
+
24
+ VALUE do_parse(VALUE self, VALUE text, VALUE r_space_name) {
21
25
  VALUE s;
22
26
  char *p;
23
27
 
@@ -69,4 +73,14 @@ VALUE method_parse(VALUE self, VALUE text, VALUE r_space_name) {
69
73
  return s;
70
74
  }
71
75
 
76
+ VALUE method_parse(VALUE self, VALUE text, VALUE r_space_name) {
77
+ parse_links_only = 0;
78
+ return do_parse(self,text,r_space_name);
79
+ }
80
+
81
+ VALUE method_parse_links_only(VALUE self, VALUE text, VALUE r_space_name) {
82
+ parse_links_only = 1;
83
+ return do_parse(self,text,r_space_name);
84
+ }
85
+
72
86
  #endif // ifdef RUBY_VERSION
data/spec/parser_spec.rb CHANGED
@@ -97,7 +97,7 @@ describe 'BreakoutParser' do
97
97
  end
98
98
  it "not confuses" do
99
99
  parse("look at @this code@ and mail me at xxx@yyy.com").should ==
100
- 'look at <code>this code</code> and mail me at xxx@yyy.com'
100
+ 'look at <code>this code</code> and mail me at <a href="mailto:xxx@yyy.com">xxx@yyy.com</a>'
101
101
  end
102
102
  it "w/o closing tag" do
103
103
  parse("@smth").should == '@smth'
@@ -359,35 +359,99 @@ describe 'BreakoutParser' do
359
359
  ###############################################################################
360
360
 
361
361
  describe "raw text links" do
362
- it "at the beginning" do
363
- parse("http://www.ru").should == "<a rel=\"nofollow\" href=\"http://www.ru\">http://www.ru</a>"
364
- end
365
- it "in middle of other words" do
366
- parse("aaa bbb ccc http://www.ru ddd eee fff").should ==
367
- "aaa bbb ccc <a rel=\"nofollow\" href=\"http://www.ru\">http://www.ru</a> ddd eee fff"
368
- end
369
- it "in new line" do
370
- parse("aaa bbb ccc\nhttp://www.ru\nddd eee fff").should match(
371
- %r"aaa bbb ccc ?<br /> ?<a rel=\"nofollow\" href=\"http://www.ru\">http://www.ru</a> ?<br /> ?ddd eee fff"
372
- )
373
- end
374
- it "escapes '&' in link _text_" do
375
- parse("http://www.ru/?a=1&b=2").should == "<a rel=\"nofollow\" href=\"http://www.ru/?a=1&b=2\">http://www.ru/?a=1&amp;b=2</a>"
362
+ describe "starting with 'http://'" do
363
+ it "at the beginning" do
364
+ parse("http://asd.ru").should == "<a rel=\"nofollow\" href=\"http://asd.ru\">http://asd.ru</a>"
365
+ end
366
+ it "in middle of other words" do
367
+ parse("aaa bbb ccc http://asd.ru ddd eee fff").should ==
368
+ "aaa bbb ccc <a rel=\"nofollow\" href=\"http://asd.ru\">http://asd.ru</a> ddd eee fff"
369
+ end
370
+ it "in new line" do
371
+ parse("aaa bbb ccc\nhttp://asd.ru\nddd eee fff").should match(
372
+ %r"aaa bbb ccc ?<br /> ?<a rel=\"nofollow\" href=\"http://asd.ru\">http://asd.ru</a> ?<br /> ?ddd eee fff"
373
+ )
374
+ end
375
+ it "escapes '&' in link _text_" do
376
+ parse("http://asd.ru/?a=1&b=2").should == "<a rel=\"nofollow\" href=\"http://asd.ru/?a=1&b=2\">http://asd.ru/?a=1&amp;b=2</a>"
377
+ end
378
+
379
+ it "parses https://" do
380
+ parse("https://asd.ru").should == "<a rel=\"nofollow\" href=\"https://asd.ru\">https://asd.ru</a>"
381
+ end
382
+
383
+ %w', .'.each do |c|
384
+ it "stops parsing on \"#{c} \"" do
385
+ parse("http://asd.ru#{c}").should == "<a rel=\"nofollow\" href=\"http://asd.ru\">http://asd.ru</a>#{c}"
386
+ parse(" http://asd.ru#{c} ").should == "<a rel=\"nofollow\" href=\"http://asd.ru\">http://asd.ru</a>#{c}"
387
+ parse(" http://asd.ru#{c} hello!").should == "<a rel=\"nofollow\" href=\"http://asd.ru\">http://asd.ru</a>#{c} hello!"
388
+ parse("xxx http://asd.ru#{c} hello!").should == "xxx <a rel=\"nofollow\" href=\"http://asd.ru\">http://asd.ru</a>#{c} hello!"
389
+ parse(" http://asd.ru/#{c} hello!").should == "<a rel=\"nofollow\" href=\"http://asd.ru/\">http://asd.ru/</a>#{c} hello!"
390
+ parse(" http://aaa.com#{c} http://bbb.com").should ==
391
+ "<a rel=\"nofollow\" href=\"http://aaa.com\">http://aaa.com</a>#{c} <a rel=\"nofollow\" href=\"http://bbb.com\">http://bbb.com</a>"
392
+ end
393
+ end
376
394
  end
377
395
 
378
- it "parses https://" do
379
- parse("https://www.ru").should == "<a rel=\"nofollow\" href=\"https://www.ru\">https://www.ru</a>"
396
+ describe "starting with 'www.'" do
397
+ it "at the beginning" do
398
+ parse("www.ru").should == "<a rel=\"nofollow\" href=\"http://www.ru\">www.ru</a>"
399
+ end
400
+ it "in middle of other words" do
401
+ parse("aaa bbb ccc www.ru ddd eee fff").should ==
402
+ "aaa bbb ccc <a rel=\"nofollow\" href=\"http://www.ru\">www.ru</a> ddd eee fff"
403
+ end
404
+ it "in new line" do
405
+ parse("aaa bbb ccc\nwww.ru\nddd eee fff").should match(
406
+ %r"aaa bbb ccc ?<br /> ?<a rel=\"nofollow\" href=\"http://www.ru\">www.ru</a> ?<br /> ?ddd eee fff"
407
+ )
408
+ end
409
+ it "escapes '&' in link _text_" do
410
+ parse("www.ru/?a=1&b=2").should == "<a rel=\"nofollow\" href=\"http://www.ru/?a=1&b=2\">www.ru/?a=1&amp;b=2</a>"
411
+ end
412
+
413
+ %w', .'.each do |c|
414
+ it "stops parsing on \"#{c} \"" do
415
+ parse("www.ru#{c}").should == "<a rel=\"nofollow\" href=\"http://www.ru\">www.ru</a>#{c}"
416
+ parse(" www.ru#{c} ").should == "<a rel=\"nofollow\" href=\"http://www.ru\">www.ru</a>#{c}"
417
+ parse(" www.ru#{c} hello!").should == "<a rel=\"nofollow\" href=\"http://www.ru\">www.ru</a>#{c} hello!"
418
+ parse("xxx www.ru#{c} hello!").should == "xxx <a rel=\"nofollow\" href=\"http://www.ru\">www.ru</a>#{c} hello!"
419
+ parse(" www.ru/#{c} hello!").should == "<a rel=\"nofollow\" href=\"http://www.ru/\">www.ru/</a>#{c} hello!"
420
+ parse(" www.aaa.com#{c} www.bbb.com").should ==
421
+ "<a rel=\"nofollow\" href=\"http://www.aaa.com\">www.aaa.com</a>#{c} <a rel=\"nofollow\" href=\"http://www.bbb.com\">www.bbb.com</a>"
422
+ end
423
+ end
380
424
  end
381
425
 
382
- %w', .'.each do |c|
383
- it "stops parsing on \"#{c} \"" do
384
- parse("http://www.ru#{c}").should == "<a rel=\"nofollow\" href=\"http://www.ru\">http://www.ru</a>#{c}"
385
- parse(" http://www.ru#{c} ").should == "<a rel=\"nofollow\" href=\"http://www.ru\">http://www.ru</a>#{c}"
386
- parse(" http://www.ru#{c} hello!").should == "<a rel=\"nofollow\" href=\"http://www.ru\">http://www.ru</a>#{c} hello!"
387
- parse("xxx http://www.ru#{c} hello!").should == "xxx <a rel=\"nofollow\" href=\"http://www.ru\">http://www.ru</a>#{c} hello!"
388
- parse(" http://www.ru/#{c} hello!").should == "<a rel=\"nofollow\" href=\"http://www.ru/\">http://www.ru/</a>#{c} hello!"
389
- parse(" http://aaa.com#{c} http://bbb.com").should ==
390
- "<a rel=\"nofollow\" href=\"http://aaa.com\">http://aaa.com</a>#{c} <a rel=\"nofollow\" href=\"http://bbb.com\">http://bbb.com</a>"
426
+ describe 'e-mails' do
427
+ it "at the beginning" do
428
+ parse("aaa@bbb.com").should == "<a href=\"mailto:aaa@bbb.com\">aaa@bbb.com</a>"
429
+ end
430
+ it "in middle of other words" do
431
+ parse("aaa bbb ccc xx@yy.cn ddd eee fff").should ==
432
+ "aaa bbb ccc <a href=\"mailto:xx@yy.cn\">xx@yy.cn</a> ddd eee fff"
433
+ end
434
+ it "in new line" do
435
+ parse("aaa bbb ccc\naa.bb@cc.dd.ee\nddd eee fff").should match(
436
+ %r"aaa bbb ccc ?<br /> ?<a href=\"mailto:aa.bb@cc.dd.ee\">aa.bb@cc.dd.ee</a> ?<br /> ?ddd eee fff"
437
+ )
438
+ end
439
+
440
+ %w', .'.each do |c|
441
+ it "stops parsing on \"#{c} \"" do
442
+ parse("a-b@c-d.efghjikl#{c}").should == "<a href=\"mailto:a-b@c-d.efghjikl\">a-b@c-d.efghjikl</a>#{c}"
443
+ parse(" a-b@c-d.efghjikl#{c} ").should == "<a href=\"mailto:a-b@c-d.efghjikl\">a-b@c-d.efghjikl</a>#{c}"
444
+ parse(" a-b@c-d.efghjikl#{c} hello!").should == "<a href=\"mailto:a-b@c-d.efghjikl\">a-b@c-d.efghjikl</a>#{c} hello!"
445
+ parse("xxx a-b@c-d.efghjikl#{c} hello!").should == "xxx <a href=\"mailto:a-b@c-d.efghjikl\">a-b@c-d.efghjikl</a>#{c} hello!"
446
+ parse(" a-b@c-d.efghjikl#{c} hello!").should == "<a href=\"mailto:a-b@c-d.efghjikl\">a-b@c-d.efghjikl</a>#{c} hello!"
447
+ parse(" www@aaa.com#{c} www@bbb.com").should ==
448
+ "<a href=\"mailto:www@aaa.com\">www@aaa.com</a>#{c} <a href=\"mailto:www@bbb.com\">www@bbb.com</a>"
449
+ end
450
+ end
451
+
452
+ it "not parses bad emails" do
453
+ s="a@b.c a@b a.b@c a.b@@c a@b@c.d a#b@c.d"
454
+ parse(s).should == s
391
455
  end
392
456
  end
393
457
  end
@@ -693,7 +757,7 @@ describe 'BreakoutParser' do
693
757
  parse(s).should == s
694
758
  s = "[[abcd:1234]]"
695
759
  parse(s).should == s
696
- s = "[[abcd::1234]] [[abcd:1234]] [[uri:www.ru]]"
760
+ s = "[[abcd::1234]] [[abcd:1234]] [[uri:ww.ru]]"
697
761
  parse(s).should == s
698
762
  end
699
763
 
@@ -718,7 +782,9 @@ describe 'BreakoutParser' do
718
782
  ###############################################################################
719
783
  ###############################################################################
720
784
 
721
- HTML_ESCAPE = { '&' => '&amp;', '>' => '&gt;', '<' => '&lt;', '"' => '&quot;' }
785
+ unless defined?HTML_ESCAPE
786
+ HTML_ESCAPE = { '&' => '&amp;', '>' => '&gt;', '<' => '&lt;', '"' => '&quot;' }
787
+ end
722
788
 
723
789
  def h s
724
790
  s.to_s.gsub(/[&"><]/) { |special| HTML_ESCAPE[special] }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: breakout_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.7
4
+ version: 0.0.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrey "Zed" Zaikin
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2010-03-09 00:00:00 +05:00
12
+ date: 2010-03-12 00:00:00 +05:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency