twitter-text 1.1.8 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -11,7 +11,7 @@ require 'digest'
11
11
 
12
12
  spec = Gem::Specification.new do |s|
13
13
  s.name = "twitter-text"
14
- s.version = "1.1.8"
14
+ s.version = "1.2.0"
15
15
  s.authors = ["Matt Sanford", "Patrick Ewing", "Ben Cherry", "Britt Selvitelle", "Raffi Krikorian"]
16
16
  s.email = ["matt@twitter.com", "patrick.henry.ewing@gmail.com", "bcherry@gmail.com", "bs@brittspace.com", "raffi@twitter.com"]
17
17
  s.homepage = "http://twitter.com"
@@ -2,11 +2,9 @@
2
2
  module Twitter
3
3
  # A module for including Tweet auto-linking in a class. The primary use of this is for helpers/views so they can auto-link
4
4
  # usernames, lists, hashtags and URLs.
5
- module Autolink
5
+ module Autolink extend self
6
6
  include ActionView::Helpers::TagHelper #tag_options needed by auto_link
7
7
 
8
- WWW_REGEX = /www\./i #:nodoc:
9
-
10
8
  # Default CSS class for auto-linked URLs
11
9
  DEFAULT_URL_CLASS = "tweet-url"
12
10
  # Default CSS class for auto-linked lists (along with the url class)
@@ -18,6 +16,20 @@ module Twitter
18
16
  # HTML attribute for robot nofollow behavior (default)
19
17
  HTML_ATTR_NO_FOLLOW = " rel=\"nofollow\""
20
18
 
19
+ HTML_ENTITIES = {
20
+ '&' => '&',
21
+ '>' => '>',
22
+ '<' => '&lt;',
23
+ '"' => '&quot;',
24
+ "'" => '&#39;'
25
+ }
26
+
27
+ def encode(text)
28
+ text && text.gsub(/[&"'><]/) do |character|
29
+ HTML_ENTITIES[character]
30
+ end
31
+ end
32
+
21
33
  # Add <tt><a></a></tt> tags around the usernames, lists, hashtags and URLs in the provided <tt>text</tt>. The
22
34
  # <tt><a></tt> tags can be controlled with the following entries in the <tt>options</tt>
23
35
  # hash:
@@ -59,19 +71,39 @@ module Twitter
59
71
  options[:list_url_base] ||= "http://twitter.com/"
60
72
  extra_html = HTML_ATTR_NO_FOLLOW unless options[:suppress_no_follow]
61
73
 
62
- text.gsub(Twitter::Regex[:auto_link_usernames_or_lists]) do
63
- if $4 && !options[:suppress_lists]
64
- # the link is a list
65
- text = list = "#{$3}#{$4}"
66
- text = yield(list) if block_given?
67
- "#{$1}#{$2}<a class=\"#{options[:url_class]} #{options[:list_class]}\" href=\"#{options[:list_url_base]}#{list.downcase}\"#{extra_html}>#{text}</a>"
74
+ new_text = ""
75
+
76
+ # this -1 flag allows strings ending in ">" to work
77
+ text.split(/[<>]/, -1).each_with_index do |chunk, index|
78
+ if index != 0
79
+ new_text << ((index % 2 == 0) ? ">" : "<")
80
+ end
81
+
82
+ if index % 4 != 0
83
+ new_text << chunk
68
84
  else
69
- # this is a screen name
70
- text = $3
71
- text = yield(text) if block_given?
72
- "#{$1}#{$2}<a class=\"#{options[:url_class]} #{options[:username_class]}\" href=\"#{options[:username_url_base]}#{text}\"#{extra_html}>#{text}</a>"
85
+ new_text << chunk.gsub(Twitter::Regex[:auto_link_usernames_or_lists]) do
86
+ before, at, user, slash_listname, after = $1, $2, $3, $4, $5
87
+ if slash_listname && !options[:suppress_lists]
88
+ # the link is a list
89
+ chunk = list = "#{user}#{slash_listname}"
90
+ chunk = yield(list) if block_given?
91
+ "#{before}#{at}<a class=\"#{options[:url_class]} #{options[:list_class]}\" href=\"#{encode(options[:list_url_base])}#{encode(list.downcase)}\"#{extra_html}>#{encode(chunk)}</a>#{after}"
92
+ else
93
+ if after =~ Twitter::Regex[:end_screen_name_match]
94
+ # Followed by something that means we don't autolink
95
+ "#{before}#{at}#{user}#{slash_listname}#{after}"
96
+ else
97
+ # this is a screen name
98
+ chunk = user
99
+ chunk = yield(chunk) if block_given?
100
+ "#{before}#{at}<a class=\"#{options[:url_class]} #{options[:username_class]}\" href=\"#{encode(options[:username_url_base])}#{encode(chunk)}\"#{extra_html}>#{encode(chunk)}</a>#{after}"
101
+ end
102
+ end
103
+ end
73
104
  end
74
105
  end
106
+ new_text
75
107
  end
76
108
 
77
109
  # Add <tt><a></a></tt> tags around the hashtags in the provided <tt>text</tt>. The
@@ -94,7 +126,7 @@ module Twitter
94
126
  hash = $2
95
127
  text = $3
96
128
  text = yield(text) if block_given?
97
- "#{before}<a href=\"#{options[:hashtag_url_base]}#{text}\" title=\"##{text}\" class=\"#{options[:url_class]} #{options[:hashtag_class]}\"#{extra_html}>#{hash}#{text}</a>"
129
+ "#{before}<a href=\"#{options[:hashtag_url_base]}#{encode(text)}\" title=\"##{encode(text)}\" class=\"#{options[:url_class]} #{options[:hashtag_class]}\"#{extra_html}>#{encode(hash)}#{encode(text)}</a>"
98
130
  end
99
131
  end
100
132
 
@@ -107,10 +139,14 @@ module Twitter
107
139
  options[:rel] = "nofollow" unless options.delete(:suppress_no_follow)
108
140
 
109
141
  text.gsub(Twitter::Regex[:valid_url]) do
110
- all, before, url, protocol = $1, $2, $3, $4
111
- html_attrs = tag_options(options.stringify_keys) || ""
112
- full_url = (protocol =~ WWW_REGEX ? "http://#{url}" : url)
113
- "#{before}<a href=\"#{full_url}\"#{html_attrs}>#{url}</a>"
142
+ all, before, url, protocol, domain, path, query_string = $1, $2, $3, $4, $5, $6, $7
143
+ if !protocol.blank? || domain =~ Twitter::Regex[:probable_tld]
144
+ html_attrs = tag_options(options.stringify_keys) || ""
145
+ full_url = ((protocol =~ Twitter::Regex[:www] || protocol.blank?) ? "http://#{url}" : url)
146
+ "#{before}<a href=\"#{encode(full_url)}\"#{html_attrs}>#{encode(url)}</a>"
147
+ else
148
+ all
149
+ end
114
150
  end
115
151
  end
116
152
 
@@ -39,7 +39,7 @@ end
39
39
  module Twitter
40
40
  # A module for including Tweet parsing in a class. This module provides function for the extraction and processing
41
41
  # of usernames, lists, URLs and hashtags.
42
- module Extractor
42
+ module Extractor extend self
43
43
 
44
44
  # Extracts a list of all usernames mentioned in the Tweet <tt>text</tt>. If the
45
45
  # <tt>text</tt> is <tt>nil</tt> or contains no username mentions an empty array
@@ -65,7 +65,7 @@ module Twitter
65
65
  possible_screen_names = []
66
66
  position = 0
67
67
  text.to_s.scan(Twitter::Regex[:extract_mentions]) do |before, sn, after|
68
- unless after =~ Twitter::Regex[:at_signs]
68
+ unless after =~ Twitter::Regex[:end_screen_name_match]
69
69
  start_position = text.to_s.sub_string_search(sn, position) - 1
70
70
  position = start_position + sn.char_length + 1
71
71
  possible_screen_names << {
@@ -117,13 +117,15 @@ module Twitter
117
117
  urls = []
118
118
  position = 0
119
119
  text.to_s.scan(Twitter::Regex[:valid_url]) do |all, before, url, protocol, domain, path, query|
120
- start_position = text.to_s.sub_string_search(url, position)
121
- end_position = start_position + url.char_length
122
- position = end_position
123
- urls << {
124
- :url => (protocol == "www." ? "http://#{url}" : url),
125
- :indices => [start_position, end_position]
126
- }
120
+ if !protocol.blank? || domain =~ Twitter::Regex[:probable_tld]
121
+ start_position = text.to_s.sub_string_search(url, position)
122
+ end_position = start_position + url.char_length
123
+ position = end_position
124
+ urls << {
125
+ :url => ((protocol =~ Twitter::Regex[:www] || protocol.blank?) ? "http://#{url}" : url),
126
+ :indices => [start_position, end_position]
127
+ }
128
+ end
127
129
  end
128
130
  urls.each{|url| yield url[:url], url[:indices].first, url[:indices].last } if block_given?
129
131
  urls
@@ -153,7 +155,7 @@ module Twitter
153
155
  tags = []
154
156
  position = 0
155
157
  text.scan(Twitter::Regex[:auto_link_hashtags]) do |before, hash, hash_text|
156
- start_position = text.to_s.sub_string_search(hash, position)
158
+ start_position = text.to_s.sub_string_search(hash + hash_text, position)
157
159
  position = start_position + hash_text.char_length + 1
158
160
  tags << {
159
161
  :hashtag => hash_text,
@@ -2,7 +2,7 @@
2
2
  module Twitter
3
3
  # Module for doing "hit highlighting" on tweets that have been auto-linked already.
4
4
  # Useful with the results returned from the Search API.
5
- module HitHighlighter
5
+ module HitHighlighter extend self
6
6
  # Default Tag used for hit highlighting
7
7
  DEFAULT_HIGHLIGHT_TAG = "em"
8
8
 
@@ -22,9 +22,7 @@ module Twitter
22
22
  tag_name = options[:tag] || DEFAULT_HIGHLIGHT_TAG
23
23
  tags = ["<" + tag_name + ">", "</" + tag_name + ">"]
24
24
 
25
- chunks = text.split("<").map do |item|
26
- item.blank? ? item : item.split(">")
27
- end.flatten
25
+ chunks = text.split(/[<>]/)
28
26
 
29
27
  result = ""
30
28
  chunk_index, chunk = 0, chunks[0]
@@ -43,16 +43,23 @@ module Twitter
43
43
  LATIN_ACCENTS = [(0xc0..0xd6).to_a, (0xd8..0xf6).to_a, (0xf8..0xff).to_a].flatten.pack('U*').freeze
44
44
  REGEXEN[:latin_accents] = /[#{LATIN_ACCENTS}]+/o
45
45
 
46
+ REGEXEN[:end_screen_name_match] = /#{REGEXEN[:at_signs]}|#{REGEXEN[:latin_accents]}/o
47
+
46
48
  # Characters considered valid in a hashtag but not at the beginning, where only a-z and 0-9 are valid.
47
49
  HASHTAG_CHARACTERS = /[a-z0-9_#{LATIN_ACCENTS}]/io
48
- REGEXEN[:auto_link_hashtags] = /(^|[^0-9A-Z&\/]+)(#|#)([0-9A-Z_]*[A-Z_]+#{HASHTAG_CHARACTERS}*)/io
49
- REGEXEN[:auto_link_usernames_or_lists] = /([^a-zA-Z0-9_]|^)([@@]+)([a-zA-Z0-9_]{1,20})(\/#{REGEXEN[:list_name]})?/o
50
+ REGEXEN[:auto_link_hashtags] = /(^|[^0-9A-Z&\/\?]+)(#|#)([0-9A-Z_]*[A-Z_]+#{HASHTAG_CHARACTERS}*)/io
51
+ REGEXEN[:auto_link_usernames_or_lists] = /([^a-zA-Z0-9_]|^|RT:?)([@@]+)([a-zA-Z0-9_]{1,20})(\/[a-zA-Z][a-zA-Z0-9_\-]{0,24})?($|.)/o
50
52
  REGEXEN[:auto_link_emoticon] = /(8\-\#|8\-E|\+\-\(|\`\@|\`O|\&lt;\|:~\(|\}:o\{|:\-\[|\&gt;o\&lt;|X\-\/|\[:-\]\-I\-|\/\/\/\/Ö\\\\\\\\|\(\|:\|\/\)|∑:\*\)|\( \| \))/
51
53
 
52
54
  # URL related hash regex collection
53
- REGEXEN[:valid_preceding_chars] = /(?:[^\/"':!=]|^|\:)/
55
+ REGEXEN[:valid_preceding_chars] = /(?:[^-\/"':!=A-Z0-9_]|^|\:)/i
54
56
  REGEXEN[:valid_domain] = /(?:[^[:punct:]\s][\.-](?=[^[:punct:]\s])|[^[:punct:]\s]){1,}\.[a-z]{2,}(?::[0-9]+)?/i
55
57
 
58
+ # For protocol-less URLs, we'll accept them if they end in one of a handful of likely TLDs
59
+ REGEXEN[:probable_tld] = /\.(?:com|net|org|gov|edu)$/i
60
+
61
+ REGEXEN[:www] = /www\./i
62
+
56
63
  REGEXEN[:valid_general_url_path_chars] = /[a-z0-9!\*';:=\+\$\/%#\[\]\-_,~]/i
57
64
  # Allow URL paths to contain balanced parens
58
65
  # 1. Used in Wikipedia URLs like /Primer_(film)
@@ -73,7 +80,7 @@ module Twitter
73
80
  ( # $1 total match
74
81
  (#{REGEXEN[:valid_preceding_chars]}) # $2 Preceeding chracter
75
82
  ( # $3 URL
76
- (https?:\/\/|www\.) # $4 Protocol or beginning
83
+ ((?:https?:\/\/|www\.)?) # $4 Protocol or beginning
77
84
  (#{REGEXEN[:valid_domain]}) # $5 Domain(s) and optional post number
78
85
  (/#{REGEXEN[:valid_url_path_chars]}*
79
86
  #{REGEXEN[:valid_url_path_ending_chars]}?
@@ -1,5 +1,5 @@
1
1
  module Twitter
2
- module Validation
2
+ module Validation extend self
3
3
  MAX_LENGTH = 140
4
4
 
5
5
  # Character not allowed in Tweets
@@ -475,10 +475,30 @@ describe Twitter::Autolink do
475
475
  end
476
476
 
477
477
  context "with a @ in a URL" do
478
- def original_text; 'http://x.xx/@"style="color:pink"onmouseover=alert(1)//'; end
478
+ context "with XSS attack" do
479
+ def original_text; 'http://x.xx/@"style="color:pink"onmouseover=alert(1)//'; end
479
480
 
480
- it "should not allow XSS follwing @" do
481
- @autolinked_text.should have_autolinked_url('http://x.xx/')
481
+ it "should not allow XSS follwing @" do
482
+ @autolinked_text.should have_autolinked_url('http://x.xx/')
483
+ end
484
+ end
485
+
486
+ context "with a username not followed by a /" do
487
+ def original_text; 'http://example.com/@foobar'; end
488
+
489
+ it "should link small url and username" do
490
+ @autolinked_text.should have_autolinked_url('http://example.com/')
491
+ @autolinked_text.should link_to_screen_name('foobar')
492
+ end
493
+ end
494
+
495
+ context "with a username followed by a /" do
496
+ def original_text; 'http://example.com/@foobar/'; end
497
+
498
+ it "should not link the username but link full url" do
499
+ @autolinked_text.should have_autolinked_url('http://example.com/@foobar/')
500
+ @autolinked_text.should_not link_to_screen_name('foobar')
501
+ end
482
502
  end
483
503
  end
484
504
 
@@ -498,4 +518,23 @@ describe Twitter::Autolink do
498
518
 
499
519
  end
500
520
 
521
+ describe "encode" do
522
+ before do
523
+ @linker = TestAutolink.new
524
+ end
525
+ it "should escape html entities properly" do
526
+ @linker.encode("&").should == "&amp;"
527
+ @linker.encode(">").should == "&gt;"
528
+ @linker.encode("<").should == "&lt;"
529
+ @linker.encode("\"").should == "&quot;"
530
+ @linker.encode("'").should == "&#39;"
531
+ @linker.encode("&<>\"").should == "&amp;&lt;&gt;&quot;"
532
+ @linker.encode("<div>").should == "&lt;div&gt;"
533
+ @linker.encode("a&b").should == "a&amp;b"
534
+ @linker.encode("<a href=\"http://twitter.com\" target=\"_blank\">twitter & friends</a>").should == "&lt;a href=&quot;http://twitter.com&quot; target=&quot;_blank&quot;&gt;twitter &amp; friends&lt;/a&gt;"
535
+ @linker.encode("&amp;").should == "&amp;amp;"
536
+ @linker.encode(nil).should == nil
537
+ end
538
+ end
539
+
501
540
  end
@@ -76,11 +76,15 @@ describe Twitter::HitHighlighter do
76
76
  it "should highlight around a link" do
77
77
  @highlighter.hit_highlight("test <a>test</a> test", [[3, 11]]).should == "tes<em>t <a>test</a> t</em>est"
78
78
  end
79
-
79
+
80
80
  it "should fail gracefully with bad hits" do
81
81
  @highlighter.hit_highlight("test test", [[5, 20]]).should == "test <em>test</em>"
82
82
  end
83
83
 
84
+ it "should not mess up with touching tags" do
85
+ @highlighter.hit_highlight("<a>foo</a><a>foo</a>", [[3,6]]).should == "<a>foo</a><a><em>foo</em></a>"
86
+ end
87
+
84
88
  end
85
89
 
86
90
  end
metadata CHANGED
@@ -1,12 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: twitter-text
3
3
  version: !ruby/object:Gem::Version
4
+ hash: 31
4
5
  prerelease: false
5
6
  segments:
6
7
  - 1
7
- - 1
8
- - 8
9
- version: 1.1.8
8
+ - 2
9
+ - 0
10
+ version: 1.2.0
10
11
  platform: ruby
11
12
  authors:
12
13
  - Matt Sanford
@@ -18,16 +19,18 @@ autorequire: ""
18
19
  bindir: bin
19
20
  cert_chain: []
20
21
 
21
- date: 2010-08-23 00:00:00 -07:00
22
+ date: 2010-10-05 00:00:00 -07:00
22
23
  default_executable:
23
24
  dependencies:
24
25
  - !ruby/object:Gem::Dependency
25
26
  name: actionpack
26
27
  prerelease: false
27
28
  requirement: &id001 !ruby/object:Gem::Requirement
29
+ none: false
28
30
  requirements:
29
31
  - - ">="
30
32
  - !ruby/object:Gem::Version
33
+ hash: 3
31
34
  segments:
32
35
  - 0
33
36
  version: "0"
@@ -76,23 +79,27 @@ rdoc_options: []
76
79
  require_paths:
77
80
  - lib
78
81
  required_ruby_version: !ruby/object:Gem::Requirement
82
+ none: false
79
83
  requirements:
80
84
  - - ">="
81
85
  - !ruby/object:Gem::Version
86
+ hash: 3
82
87
  segments:
83
88
  - 0
84
89
  version: "0"
85
90
  required_rubygems_version: !ruby/object:Gem::Requirement
91
+ none: false
86
92
  requirements:
87
93
  - - ">="
88
94
  - !ruby/object:Gem::Version
95
+ hash: 3
89
96
  segments:
90
97
  - 0
91
98
  version: "0"
92
99
  requirements: []
93
100
 
94
101
  rubyforge_project:
95
- rubygems_version: 1.3.6
102
+ rubygems_version: 1.3.7
96
103
  signing_key:
97
104
  specification_version: 3
98
105
  summary: Twitter text handling library