twitter-text 1.1.8 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -11,7 +11,7 @@ require 'digest'
11
11
 
12
12
  spec = Gem::Specification.new do |s|
13
13
  s.name = "twitter-text"
14
- s.version = "1.1.8"
14
+ s.version = "1.2.0"
15
15
  s.authors = ["Matt Sanford", "Patrick Ewing", "Ben Cherry", "Britt Selvitelle", "Raffi Krikorian"]
16
16
  s.email = ["matt@twitter.com", "patrick.henry.ewing@gmail.com", "bcherry@gmail.com", "bs@brittspace.com", "raffi@twitter.com"]
17
17
  s.homepage = "http://twitter.com"
@@ -2,11 +2,9 @@
2
2
  module Twitter
3
3
  # A module for including Tweet auto-linking in a class. The primary use of this is for helpers/views so they can auto-link
4
4
  # usernames, lists, hashtags and URLs.
5
- module Autolink
5
+ module Autolink extend self
6
6
  include ActionView::Helpers::TagHelper #tag_options needed by auto_link
7
7
 
8
- WWW_REGEX = /www\./i #:nodoc:
9
-
10
8
  # Default CSS class for auto-linked URLs
11
9
  DEFAULT_URL_CLASS = "tweet-url"
12
10
  # Default CSS class for auto-linked lists (along with the url class)
@@ -18,6 +16,20 @@ module Twitter
18
16
  # HTML attribute for robot nofollow behavior (default)
19
17
  HTML_ATTR_NO_FOLLOW = " rel=\"nofollow\""
20
18
 
19
+ HTML_ENTITIES = {
20
+ '&' => '&',
21
+ '>' => '>',
22
+ '<' => '&lt;',
23
+ '"' => '&quot;',
24
+ "'" => '&#39;'
25
+ }
26
+
27
+ def encode(text)
28
+ text && text.gsub(/[&"'><]/) do |character|
29
+ HTML_ENTITIES[character]
30
+ end
31
+ end
32
+
21
33
  # Add <tt><a></a></tt> tags around the usernames, lists, hashtags and URLs in the provided <tt>text</tt>. The
22
34
  # <tt><a></tt> tags can be controlled with the following entries in the <tt>options</tt>
23
35
  # hash:
@@ -59,19 +71,39 @@ module Twitter
59
71
  options[:list_url_base] ||= "http://twitter.com/"
60
72
  extra_html = HTML_ATTR_NO_FOLLOW unless options[:suppress_no_follow]
61
73
 
62
- text.gsub(Twitter::Regex[:auto_link_usernames_or_lists]) do
63
- if $4 && !options[:suppress_lists]
64
- # the link is a list
65
- text = list = "#{$3}#{$4}"
66
- text = yield(list) if block_given?
67
- "#{$1}#{$2}<a class=\"#{options[:url_class]} #{options[:list_class]}\" href=\"#{options[:list_url_base]}#{list.downcase}\"#{extra_html}>#{text}</a>"
74
+ new_text = ""
75
+
76
+ # this -1 flag allows strings ending in ">" to work
77
+ text.split(/[<>]/, -1).each_with_index do |chunk, index|
78
+ if index != 0
79
+ new_text << ((index % 2 == 0) ? ">" : "<")
80
+ end
81
+
82
+ if index % 4 != 0
83
+ new_text << chunk
68
84
  else
69
- # this is a screen name
70
- text = $3
71
- text = yield(text) if block_given?
72
- "#{$1}#{$2}<a class=\"#{options[:url_class]} #{options[:username_class]}\" href=\"#{options[:username_url_base]}#{text}\"#{extra_html}>#{text}</a>"
85
+ new_text << chunk.gsub(Twitter::Regex[:auto_link_usernames_or_lists]) do
86
+ before, at, user, slash_listname, after = $1, $2, $3, $4, $5
87
+ if slash_listname && !options[:suppress_lists]
88
+ # the link is a list
89
+ chunk = list = "#{user}#{slash_listname}"
90
+ chunk = yield(list) if block_given?
91
+ "#{before}#{at}<a class=\"#{options[:url_class]} #{options[:list_class]}\" href=\"#{encode(options[:list_url_base])}#{encode(list.downcase)}\"#{extra_html}>#{encode(chunk)}</a>#{after}"
92
+ else
93
+ if after =~ Twitter::Regex[:end_screen_name_match]
94
+ # Followed by something that means we don't autolink
95
+ "#{before}#{at}#{user}#{slash_listname}#{after}"
96
+ else
97
+ # this is a screen name
98
+ chunk = user
99
+ chunk = yield(chunk) if block_given?
100
+ "#{before}#{at}<a class=\"#{options[:url_class]} #{options[:username_class]}\" href=\"#{encode(options[:username_url_base])}#{encode(chunk)}\"#{extra_html}>#{encode(chunk)}</a>#{after}"
101
+ end
102
+ end
103
+ end
73
104
  end
74
105
  end
106
+ new_text
75
107
  end
76
108
 
77
109
  # Add <tt><a></a></tt> tags around the hashtags in the provided <tt>text</tt>. The
@@ -94,7 +126,7 @@ module Twitter
94
126
  hash = $2
95
127
  text = $3
96
128
  text = yield(text) if block_given?
97
- "#{before}<a href=\"#{options[:hashtag_url_base]}#{text}\" title=\"##{text}\" class=\"#{options[:url_class]} #{options[:hashtag_class]}\"#{extra_html}>#{hash}#{text}</a>"
129
+ "#{before}<a href=\"#{options[:hashtag_url_base]}#{encode(text)}\" title=\"##{encode(text)}\" class=\"#{options[:url_class]} #{options[:hashtag_class]}\"#{extra_html}>#{encode(hash)}#{encode(text)}</a>"
98
130
  end
99
131
  end
100
132
 
@@ -107,10 +139,14 @@ module Twitter
107
139
  options[:rel] = "nofollow" unless options.delete(:suppress_no_follow)
108
140
 
109
141
  text.gsub(Twitter::Regex[:valid_url]) do
110
- all, before, url, protocol = $1, $2, $3, $4
111
- html_attrs = tag_options(options.stringify_keys) || ""
112
- full_url = (protocol =~ WWW_REGEX ? "http://#{url}" : url)
113
- "#{before}<a href=\"#{full_url}\"#{html_attrs}>#{url}</a>"
142
+ all, before, url, protocol, domain, path, query_string = $1, $2, $3, $4, $5, $6, $7
143
+ if !protocol.blank? || domain =~ Twitter::Regex[:probable_tld]
144
+ html_attrs = tag_options(options.stringify_keys) || ""
145
+ full_url = ((protocol =~ Twitter::Regex[:www] || protocol.blank?) ? "http://#{url}" : url)
146
+ "#{before}<a href=\"#{encode(full_url)}\"#{html_attrs}>#{encode(url)}</a>"
147
+ else
148
+ all
149
+ end
114
150
  end
115
151
  end
116
152
 
@@ -39,7 +39,7 @@ end
39
39
  module Twitter
40
40
  # A module for including Tweet parsing in a class. This module provides function for the extraction and processing
41
41
  # of usernames, lists, URLs and hashtags.
42
- module Extractor
42
+ module Extractor extend self
43
43
 
44
44
  # Extracts a list of all usernames mentioned in the Tweet <tt>text</tt>. If the
45
45
  # <tt>text</tt> is <tt>nil</tt> or contains no username mentions an empty array
@@ -65,7 +65,7 @@ module Twitter
65
65
  possible_screen_names = []
66
66
  position = 0
67
67
  text.to_s.scan(Twitter::Regex[:extract_mentions]) do |before, sn, after|
68
- unless after =~ Twitter::Regex[:at_signs]
68
+ unless after =~ Twitter::Regex[:end_screen_name_match]
69
69
  start_position = text.to_s.sub_string_search(sn, position) - 1
70
70
  position = start_position + sn.char_length + 1
71
71
  possible_screen_names << {
@@ -117,13 +117,15 @@ module Twitter
117
117
  urls = []
118
118
  position = 0
119
119
  text.to_s.scan(Twitter::Regex[:valid_url]) do |all, before, url, protocol, domain, path, query|
120
- start_position = text.to_s.sub_string_search(url, position)
121
- end_position = start_position + url.char_length
122
- position = end_position
123
- urls << {
124
- :url => (protocol == "www." ? "http://#{url}" : url),
125
- :indices => [start_position, end_position]
126
- }
120
+ if !protocol.blank? || domain =~ Twitter::Regex[:probable_tld]
121
+ start_position = text.to_s.sub_string_search(url, position)
122
+ end_position = start_position + url.char_length
123
+ position = end_position
124
+ urls << {
125
+ :url => ((protocol =~ Twitter::Regex[:www] || protocol.blank?) ? "http://#{url}" : url),
126
+ :indices => [start_position, end_position]
127
+ }
128
+ end
127
129
  end
128
130
  urls.each{|url| yield url[:url], url[:indices].first, url[:indices].last } if block_given?
129
131
  urls
@@ -153,7 +155,7 @@ module Twitter
153
155
  tags = []
154
156
  position = 0
155
157
  text.scan(Twitter::Regex[:auto_link_hashtags]) do |before, hash, hash_text|
156
- start_position = text.to_s.sub_string_search(hash, position)
158
+ start_position = text.to_s.sub_string_search(hash + hash_text, position)
157
159
  position = start_position + hash_text.char_length + 1
158
160
  tags << {
159
161
  :hashtag => hash_text,
@@ -2,7 +2,7 @@
2
2
  module Twitter
3
3
  # Module for doing "hit highlighting" on tweets that have been auto-linked already.
4
4
  # Useful with the results returned from the Search API.
5
- module HitHighlighter
5
+ module HitHighlighter extend self
6
6
  # Default Tag used for hit highlighting
7
7
  DEFAULT_HIGHLIGHT_TAG = "em"
8
8
 
@@ -22,9 +22,7 @@ module Twitter
22
22
  tag_name = options[:tag] || DEFAULT_HIGHLIGHT_TAG
23
23
  tags = ["<" + tag_name + ">", "</" + tag_name + ">"]
24
24
 
25
- chunks = text.split("<").map do |item|
26
- item.blank? ? item : item.split(">")
27
- end.flatten
25
+ chunks = text.split(/[<>]/)
28
26
 
29
27
  result = ""
30
28
  chunk_index, chunk = 0, chunks[0]
@@ -43,16 +43,23 @@ module Twitter
43
43
  LATIN_ACCENTS = [(0xc0..0xd6).to_a, (0xd8..0xf6).to_a, (0xf8..0xff).to_a].flatten.pack('U*').freeze
44
44
  REGEXEN[:latin_accents] = /[#{LATIN_ACCENTS}]+/o
45
45
 
46
+ REGEXEN[:end_screen_name_match] = /#{REGEXEN[:at_signs]}|#{REGEXEN[:latin_accents]}/o
47
+
46
48
  # Characters considered valid in a hashtag but not at the beginning, where only a-z and 0-9 are valid.
47
49
  HASHTAG_CHARACTERS = /[a-z0-9_#{LATIN_ACCENTS}]/io
48
- REGEXEN[:auto_link_hashtags] = /(^|[^0-9A-Z&\/]+)(#|#)([0-9A-Z_]*[A-Z_]+#{HASHTAG_CHARACTERS}*)/io
49
- REGEXEN[:auto_link_usernames_or_lists] = /([^a-zA-Z0-9_]|^)([@@]+)([a-zA-Z0-9_]{1,20})(\/#{REGEXEN[:list_name]})?/o
50
+ REGEXEN[:auto_link_hashtags] = /(^|[^0-9A-Z&\/\?]+)(#|#)([0-9A-Z_]*[A-Z_]+#{HASHTAG_CHARACTERS}*)/io
51
+ REGEXEN[:auto_link_usernames_or_lists] = /([^a-zA-Z0-9_]|^|RT:?)([@@]+)([a-zA-Z0-9_]{1,20})(\/[a-zA-Z][a-zA-Z0-9_\-]{0,24})?($|.)/o
50
52
  REGEXEN[:auto_link_emoticon] = /(8\-\#|8\-E|\+\-\(|\`\@|\`O|\&lt;\|:~\(|\}:o\{|:\-\[|\&gt;o\&lt;|X\-\/|\[:-\]\-I\-|\/\/\/\/Ö\\\\\\\\|\(\|:\|\/\)|∑:\*\)|\( \| \))/
51
53
 
52
54
  # URL related hash regex collection
53
- REGEXEN[:valid_preceding_chars] = /(?:[^\/"':!=]|^|\:)/
55
+ REGEXEN[:valid_preceding_chars] = /(?:[^-\/"':!=A-Z0-9_]|^|\:)/i
54
56
  REGEXEN[:valid_domain] = /(?:[^[:punct:]\s][\.-](?=[^[:punct:]\s])|[^[:punct:]\s]){1,}\.[a-z]{2,}(?::[0-9]+)?/i
55
57
 
58
+ # For protocol-less URLs, we'll accept them if they end in one of a handful of likely TLDs
59
+ REGEXEN[:probable_tld] = /\.(?:com|net|org|gov|edu)$/i
60
+
61
+ REGEXEN[:www] = /www\./i
62
+
56
63
  REGEXEN[:valid_general_url_path_chars] = /[a-z0-9!\*';:=\+\$\/%#\[\]\-_,~]/i
57
64
  # Allow URL paths to contain balanced parens
58
65
  # 1. Used in Wikipedia URLs like /Primer_(film)
@@ -73,7 +80,7 @@ module Twitter
73
80
  ( # $1 total match
74
81
  (#{REGEXEN[:valid_preceding_chars]}) # $2 Preceeding chracter
75
82
  ( # $3 URL
76
- (https?:\/\/|www\.) # $4 Protocol or beginning
83
+ ((?:https?:\/\/|www\.)?) # $4 Protocol or beginning
77
84
  (#{REGEXEN[:valid_domain]}) # $5 Domain(s) and optional post number
78
85
  (/#{REGEXEN[:valid_url_path_chars]}*
79
86
  #{REGEXEN[:valid_url_path_ending_chars]}?
@@ -1,5 +1,5 @@
1
1
  module Twitter
2
- module Validation
2
+ module Validation extend self
3
3
  MAX_LENGTH = 140
4
4
 
5
5
  # Character not allowed in Tweets
@@ -475,10 +475,30 @@ describe Twitter::Autolink do
475
475
  end
476
476
 
477
477
  context "with a @ in a URL" do
478
- def original_text; 'http://x.xx/@"style="color:pink"onmouseover=alert(1)//'; end
478
+ context "with XSS attack" do
479
+ def original_text; 'http://x.xx/@"style="color:pink"onmouseover=alert(1)//'; end
479
480
 
480
- it "should not allow XSS follwing @" do
481
- @autolinked_text.should have_autolinked_url('http://x.xx/')
481
+ it "should not allow XSS follwing @" do
482
+ @autolinked_text.should have_autolinked_url('http://x.xx/')
483
+ end
484
+ end
485
+
486
+ context "with a username not followed by a /" do
487
+ def original_text; 'http://example.com/@foobar'; end
488
+
489
+ it "should link small url and username" do
490
+ @autolinked_text.should have_autolinked_url('http://example.com/')
491
+ @autolinked_text.should link_to_screen_name('foobar')
492
+ end
493
+ end
494
+
495
+ context "with a username followed by a /" do
496
+ def original_text; 'http://example.com/@foobar/'; end
497
+
498
+ it "should not link the username but link full url" do
499
+ @autolinked_text.should have_autolinked_url('http://example.com/@foobar/')
500
+ @autolinked_text.should_not link_to_screen_name('foobar')
501
+ end
482
502
  end
483
503
  end
484
504
 
@@ -498,4 +518,23 @@ describe Twitter::Autolink do
498
518
 
499
519
  end
500
520
 
521
+ describe "encode" do
522
+ before do
523
+ @linker = TestAutolink.new
524
+ end
525
+ it "should escape html entities properly" do
526
+ @linker.encode("&").should == "&amp;"
527
+ @linker.encode(">").should == "&gt;"
528
+ @linker.encode("<").should == "&lt;"
529
+ @linker.encode("\"").should == "&quot;"
530
+ @linker.encode("'").should == "&#39;"
531
+ @linker.encode("&<>\"").should == "&amp;&lt;&gt;&quot;"
532
+ @linker.encode("<div>").should == "&lt;div&gt;"
533
+ @linker.encode("a&b").should == "a&amp;b"
534
+ @linker.encode("<a href=\"http://twitter.com\" target=\"_blank\">twitter & friends</a>").should == "&lt;a href=&quot;http://twitter.com&quot; target=&quot;_blank&quot;&gt;twitter &amp; friends&lt;/a&gt;"
535
+ @linker.encode("&amp;").should == "&amp;amp;"
536
+ @linker.encode(nil).should == nil
537
+ end
538
+ end
539
+
501
540
  end
@@ -76,11 +76,15 @@ describe Twitter::HitHighlighter do
76
76
  it "should highlight around a link" do
77
77
  @highlighter.hit_highlight("test <a>test</a> test", [[3, 11]]).should == "tes<em>t <a>test</a> t</em>est"
78
78
  end
79
-
79
+
80
80
  it "should fail gracefully with bad hits" do
81
81
  @highlighter.hit_highlight("test test", [[5, 20]]).should == "test <em>test</em>"
82
82
  end
83
83
 
84
+ it "should not mess up with touching tags" do
85
+ @highlighter.hit_highlight("<a>foo</a><a>foo</a>", [[3,6]]).should == "<a>foo</a><a><em>foo</em></a>"
86
+ end
87
+
84
88
  end
85
89
 
86
90
  end
metadata CHANGED
@@ -1,12 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: twitter-text
3
3
  version: !ruby/object:Gem::Version
4
+ hash: 31
4
5
  prerelease: false
5
6
  segments:
6
7
  - 1
7
- - 1
8
- - 8
9
- version: 1.1.8
8
+ - 2
9
+ - 0
10
+ version: 1.2.0
10
11
  platform: ruby
11
12
  authors:
12
13
  - Matt Sanford
@@ -18,16 +19,18 @@ autorequire: ""
18
19
  bindir: bin
19
20
  cert_chain: []
20
21
 
21
- date: 2010-08-23 00:00:00 -07:00
22
+ date: 2010-10-05 00:00:00 -07:00
22
23
  default_executable:
23
24
  dependencies:
24
25
  - !ruby/object:Gem::Dependency
25
26
  name: actionpack
26
27
  prerelease: false
27
28
  requirement: &id001 !ruby/object:Gem::Requirement
29
+ none: false
28
30
  requirements:
29
31
  - - ">="
30
32
  - !ruby/object:Gem::Version
33
+ hash: 3
31
34
  segments:
32
35
  - 0
33
36
  version: "0"
@@ -76,23 +79,27 @@ rdoc_options: []
76
79
  require_paths:
77
80
  - lib
78
81
  required_ruby_version: !ruby/object:Gem::Requirement
82
+ none: false
79
83
  requirements:
80
84
  - - ">="
81
85
  - !ruby/object:Gem::Version
86
+ hash: 3
82
87
  segments:
83
88
  - 0
84
89
  version: "0"
85
90
  required_rubygems_version: !ruby/object:Gem::Requirement
91
+ none: false
86
92
  requirements:
87
93
  - - ">="
88
94
  - !ruby/object:Gem::Version
95
+ hash: 3
89
96
  segments:
90
97
  - 0
91
98
  version: "0"
92
99
  requirements: []
93
100
 
94
101
  rubyforge_project:
95
- rubygems_version: 1.3.6
102
+ rubygems_version: 1.3.7
96
103
  signing_key:
97
104
  specification_version: 3
98
105
  summary: Twitter text handling library