twitter-text 1.4.9 → 1.4.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile.lock CHANGED
@@ -1,14 +1,16 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- twitter-text (1.4.8)
4
+ twitter-text (1.4.10)
5
5
  activesupport
6
6
 
7
7
  GEM
8
8
  remote: http://rubygems.org/
9
9
  specs:
10
- activesupport (3.0.3)
10
+ activesupport (3.1.0)
11
+ multi_json (~> 1.0)
11
12
  diff-lcs (1.1.2)
13
+ multi_json (1.0.4)
12
14
  nokogiri (1.4.4)
13
15
  nokogiri (1.4.4-java)
14
16
  weakling (>= 0.0.3)
data/lib/autolink.rb CHANGED
@@ -20,7 +20,7 @@ module Twitter
20
20
  OPTIONS_NOT_ATTRIBUTES = [:url_class, :list_class, :username_class, :hashtag_class,
21
21
  :username_url_base, :list_url_base, :hashtag_url_base,
22
22
  :username_url_block, :list_url_block, :hashtag_url_block, :link_url_block,
23
- :suppress_lists, :suppress_no_follow]
23
+ :suppress_lists, :suppress_no_follow, :url_entities]
24
24
 
25
25
  HTML_ENTITIES = {
26
26
  '&' => '&',
@@ -139,6 +139,16 @@ module Twitter
139
139
  options = href_options.dup
140
140
  options[:rel] = "nofollow" unless options.delete(:suppress_no_follow)
141
141
  options[:class] = options.delete(:url_class)
142
+
143
+ url_entities = {}
144
+ if options[:url_entities]
145
+ options[:url_entities].each do |entity|
146
+ entity = entity.with_indifferent_access
147
+ url_entities[entity[:url]] = entity
148
+ end
149
+ options.delete(:url_entities)
150
+ end
151
+
142
152
  html_attrs = html_attrs_for_options(options)
143
153
 
144
154
  Twitter::Rewriter.rewrite_urls(text) do |url|
@@ -147,7 +157,13 @@ module Twitter
147
157
  else
148
158
  html_escape(url)
149
159
  end
150
- %(<a href="#{href}"#{html_attrs}>#{html_escape(url)}</a>)
160
+
161
+ display_url = url
162
+ if url_entities[url] && url_entities[url][:display_url]
163
+ display_url = url_entities[url][:display_url]
164
+ end
165
+
166
+ %(<a href="#{href}"#{html_attrs}>#{html_escape(display_url)}</a>)
151
167
  end
152
168
  end
153
169
 
data/lib/extractor.rb CHANGED
@@ -155,9 +155,12 @@ module Twitter
155
155
  return [] unless text
156
156
  urls = []
157
157
  position = 0
158
- text.to_s.scan(Twitter::Regex[:valid_url]) do |all, before, url, protocol, domain, path, query|
158
+ text.to_s.scan(Twitter::Regex[:valid_url]) do |all, before, url, protocol, domain, port, path, query|
159
159
  valid_url_match_data = $~
160
- if protocol && !protocol.empty?
160
+
161
+ # Regex in Ruby 1.8 doesn't support lookbehind, so we need to manually filter out
162
+ # the short URLs without protocol and path, i.e., [domain].[ccTLD]
163
+ unless !protocol && !path && domain =~ Twitter::Regex[:valid_short_domain]
161
164
  start_position = valid_url_match_data.char_begin(3)
162
165
  end_position = valid_url_match_data.char_end(3)
163
166
  urls << {
data/lib/regex.rb CHANGED
@@ -111,14 +111,35 @@ module Twitter
111
111
  REGEXEN[:auto_link_emoticon] = /(8\-\#|8\-E|\+\-\(|\`\@|\`O|\&lt;\|:~\(|\}:o\{|:\-\[|\&gt;o\&lt;|X\-\/|\[:-\]\-I\-|\/\/\/\/Ö\\\\\\\\|\(\|:\|\/\)|∑:\*\)|\( \| \))/
112
112
 
113
113
  # URL related hash regex collection
114
- REGEXEN[:valid_preceding_chars] = /(?:[^-\/"':!=A-Z0-9_@@]|^|\:)/i
114
+ REGEXEN[:valid_preceding_chars] = /(?:[^-\/"'!=A-Z0-9_@@\.]|^)/i
115
115
 
116
- DOMAIN_EXCLUDE_PART = "[:punct:][:space:][:blank:]#{[0x00A0].pack('U')}"
117
- REGEXEN[:valid_subdomain] = /(?:[^#{DOMAIN_EXCLUDE_PART}](?:[_-]|[^#{DOMAIN_EXCLUDE_PART}])*)?[^#{DOMAIN_EXCLUDE_PART}]\./
118
- REGEXEN[:valid_domain_name] = /(?:[^#{DOMAIN_EXCLUDE_PART}](?:[-]|[^#{DOMAIN_EXCLUDE_PART}])*)?[^#{DOMAIN_EXCLUDE_PART}]/
119
- REGEXEN[:valid_domain] = /#{REGEXEN[:valid_subdomain]}*#{REGEXEN[:valid_domain_name]}\.(?:xn--[a-z0-9]{2,}|[a-z]{2,})(?::[0-9]+)?/i
116
+ DOMAIN_VALID_CHARS = "[^[:punct:][:space:][:blank:]#{[0x00A0].pack('U')}]"
117
+ REGEXEN[:valid_subdomain] = /(?:(?:#{DOMAIN_VALID_CHARS}(?:[_-]|#{DOMAIN_VALID_CHARS})*)?#{DOMAIN_VALID_CHARS}\.)/i
118
+ REGEXEN[:valid_domain_name] = /(?:(?:#{DOMAIN_VALID_CHARS}(?:[-]|#{DOMAIN_VALID_CHARS})*)?#{DOMAIN_VALID_CHARS}\.)/i
120
119
 
121
- REGEXEN[:valid_general_url_path_chars] = /[a-z0-9!\*';:=\+\,\$\/%#\[\]\-_~|#{LATIN_ACCENTS}]/i
120
+ REGEXEN[:valid_gTLD] = /(?:(?:aero|asia|biz|cat|com|coop|edu|gov|info|int|jobs|mil|mobi|museum|name|net|org|pro|tel|travel)(?=[^[:alpha:]]|$))/i
121
+ REGEXEN[:valid_ccTLD] = %r{
122
+ (?:
123
+ (?:ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|
124
+ ci|ck|cl|cm|cn|co|cr|cs|cu|cv|cx|cy|cz|dd|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|
125
+ gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|
126
+ lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|
127
+ pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|ss|st|su|sv|sy|sz|tc|td|tf|tg|th|
128
+ tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|za|zm|zw)
129
+ (?=[^[:alpha:]]|$)
130
+ )
131
+ }ix
132
+ REGEXEN[:valid_punycode] = /(?:xn--[0-9a-z]+)/
133
+
134
+ REGEXEN[:valid_domain] = /(?:
135
+ #{REGEXEN[:valid_subdomain]}*#{REGEXEN[:valid_domain_name]}
136
+ (?:#{REGEXEN[:valid_gTLD]}|#{REGEXEN[:valid_ccTLD]}|#{REGEXEN[:valid_punycode]})
137
+ )/ix
138
+ REGEXEN[:valid_short_domain] = /^#{REGEXEN[:valid_domain_name]}#{REGEXEN[:valid_ccTLD]}$/
139
+
140
+ REGEXEN[:valid_port_number] = /[0-9]+/
141
+
142
+ REGEXEN[:valid_general_url_path_chars] = /[a-z0-9!\*';:=\+\,\$\/%#\[\]\-_~&|#{LATIN_ACCENTS}]/i
122
143
  # Allow URL paths to contain balanced parens
123
144
  # 1. Used in Wikipedia URLs like /Primer_(film)
124
145
  # 2. Used in IIS sessions like /S(dfd346)/
@@ -139,16 +160,17 @@ module Twitter
139
160
  ( # $1 total match
140
161
  (#{REGEXEN[:valid_preceding_chars]}) # $2 Preceeding chracter
141
162
  ( # $3 URL
142
- (https?:\/\/) # $4 Protocol
143
- (#{REGEXEN[:valid_domain]}) # $5 Domain(s) and optional post number
163
+ (https?:\/\/)? # $4 Protocol (optional)
164
+ (#{REGEXEN[:valid_domain]}) # $5 Domain(s)
165
+ (?::(#{REGEXEN[:valid_port_number]}))? # $6 Port number (optional)
144
166
  (/
145
167
  (?:
146
168
  #{REGEXEN[:valid_url_path_chars]}+#{REGEXEN[:valid_url_path_ending_chars]}| # 1+ path chars and a valid last char
147
169
  #{REGEXEN[:valid_url_path_chars]}+#{REGEXEN[:valid_url_path_ending_chars]}?| # Optional last char to handle /@foo/ case
148
170
  #{REGEXEN[:valid_url_path_ending_chars]} # Just a # case
149
171
  )?
150
- )? # $6 URL Path and anchor
151
- (\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # $7 Query String
172
+ )? # $7 URL Path and anchor
173
+ (\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # $8 Query String
152
174
  )
153
175
  )
154
176
  }iox;
@@ -161,7 +183,7 @@ module Twitter
161
183
  #{REGEXEN[:validate_url_unreserved]}|
162
184
  #{REGEXEN[:validate_url_pct_encoded]}|
163
185
  #{REGEXEN[:validate_url_sub_delims]}|
164
- :|@
186
+ [:\|@]
165
187
  )/iox
166
188
 
167
189
  REGEXEN[:validate_url_scheme] = /(?:[a-z][a-z0-9+\-.]*)/i
@@ -236,11 +258,9 @@ module Twitter
236
258
  REGEXEN[:validate_url_unencoded] = %r{
237
259
  \A # Full URL
238
260
  (?:
239
- ([^:/?#]+): # $1 Scheme
240
- )
241
- (?://
242
- ([^/?#]*) # $2 Authority
243
- )
261
+ ([^:/?#]+):// # $1 Scheme
262
+ )?
263
+ ([^/?#]*) # $2 Authority
244
264
  ([^?#]*) # $3 Path
245
265
  (?:
246
266
  \?([^#]*) # $4 Query
data/lib/validation.rb CHANGED
@@ -74,7 +74,7 @@ module Twitter
74
74
  extracted.size == 1 && extracted.first == hashtag[1..-1]
75
75
  end
76
76
 
77
- def valid_url?(url, unicode_domains=true)
77
+ def valid_url?(url, unicode_domains=true, require_protocol=true)
78
78
  return false if !url || url.empty?
79
79
 
80
80
  url_parts = url.match(Twitter::Regex[:validate_url_unencoded])
@@ -82,7 +82,8 @@ module Twitter
82
82
 
83
83
  scheme, authority, path, query, fragment = url_parts.captures
84
84
 
85
- return false unless (valid_match?(scheme, Twitter::Regex[:validate_url_scheme]) && scheme.match(/\Ahttps?\Z/i) &&
85
+ return false unless ((!require_protocol ||
86
+ (valid_match?(scheme, Twitter::Regex[:validate_url_scheme]) && scheme.match(/\Ahttps?\Z/i))) &&
86
87
  valid_match?(path, Twitter::Regex[:validate_url_path]) &&
87
88
  valid_match?(query, Twitter::Regex[:validate_url_query], true) &&
88
89
  valid_match?(fragment, Twitter::Regex[:validate_url_fragment], true))
@@ -391,18 +391,18 @@ describe Twitter::Autolink do
391
391
  end
392
392
 
393
393
  context "balanced parens with a double quote inside" do
394
- def url; "http://foo.bar/foo_(\")_bar" end
394
+ def url; "http://foo.com/foo_(\")_bar" end
395
395
 
396
396
  it "should be linked" do
397
- @autolinked_text.should have_autolinked_url("http://foo.bar/foo_")
397
+ @autolinked_text.should have_autolinked_url("http://foo.com/foo_")
398
398
  end
399
399
  end
400
400
 
401
401
  context "balanced parens hiding XSS" do
402
- def url; 'http://x.xx/("style="color:red"onmouseover="alert(1)' end
402
+ def url; 'http://x.xx.com/("style="color:red"onmouseover="alert(1)' end
403
403
 
404
404
  it "should be linked" do
405
- @autolinked_text.should have_autolinked_url("http://x.xx/")
405
+ @autolinked_text.should have_autolinked_url("http://x.xx.com/")
406
406
  end
407
407
  end
408
408
  end
@@ -479,10 +479,10 @@ describe Twitter::Autolink do
479
479
 
480
480
  context "with a @ in a URL" do
481
481
  context "with XSS attack" do
482
- def original_text; 'http://x.xx/@"style="color:pink"onmouseover=alert(1)//'; end
482
+ def original_text; 'http://x.xx.com/@"style="color:pink"onmouseover=alert(1)//'; end
483
483
 
484
484
  it "should not allow XSS follwing @" do
485
- @autolinked_text.should have_autolinked_url('http://x.xx/')
485
+ @autolinked_text.should have_autolinked_url('http://x.xx.com/')
486
486
  end
487
487
  end
488
488
 
@@ -432,19 +432,19 @@ describe Twitter::Rewriter do
432
432
  end
433
433
 
434
434
  context "balanced parens with a double quote inside" do
435
- def url; "http://foo.bar/foo_(\")_bar" end
435
+ def url; "http://foo.bar.com/foo_(\")_bar" end
436
436
 
437
437
  it "should be rewritten" do
438
- @block_args.should == ["http://foo.bar/foo_"];
438
+ @block_args.should == ["http://foo.bar.com/foo_"];
439
439
  @rewritten_text.should == "I found a neatness ([rewritten](\")_bar)"
440
440
  end
441
441
  end
442
442
 
443
443
  context "balanced parens hiding XSS" do
444
- def url; 'http://x.xx/("style="color:red"onmouseover="alert(1)' end
444
+ def url; 'http://x.xx.com/("style="color:red"onmouseover="alert(1)' end
445
445
 
446
446
  it "should be rewritten" do
447
- @block_args.should == ["http://x.xx/"];
447
+ @block_args.should == ["http://x.xx.com/"];
448
448
  @rewritten_text.should == 'I found a neatness ([rewritten]("style="color:red"onmouseover="alert(1))'
449
449
  end
450
450
  end
@@ -526,10 +526,10 @@ describe Twitter::Rewriter do
526
526
 
527
527
  context "with a @ in a URL" do
528
528
  context "with XSS attack" do
529
- def original_text; 'http://x.xx/@"style="color:pink"onmouseover=alert(1)//'; end
529
+ def original_text; 'http://x.xx.com/@"style="color:pink"onmouseover=alert(1)//'; end
530
530
 
531
531
  it "should not allow XSS follwing @" do
532
- @block_args.should == ["http://x.xx/"]
532
+ @block_args.should == ["http://x.xx.com/"]
533
533
  @rewritten_text.should == '[rewritten]@"style="color:pink"onmouseover=alert(1)//'
534
534
  end
535
535
  end
data/spec/test_urls.rb CHANGED
@@ -26,14 +26,16 @@ module TestUrls
26
26
  "http://a_b.c-d.com",
27
27
  "http://a-b.b.com",
28
28
  "http://twitter-dash.com",
29
- # "t.co/nwcLTFF"
29
+ "www.foobar.com",
30
+ "WWW.FOOBAR.COM",
31
+ "www.foobar.co.jp",
32
+ "http://t.co",
33
+ "t.co/nwcLTFF"
30
34
  ] unless defined?(TestUrls::VALID)
31
35
 
32
36
  INVALID = [
33
37
  "http://no-tld",
34
38
  "http://tld-too-short.x",
35
- "www.foobar.com",
36
- "WWW.FOOBAR.COM",
37
39
  "http://-doman_dash.com",
38
40
  "http://_leadingunderscore.twitter.com",
39
41
  "http://trailingunderscore_.twitter.com",
@@ -50,7 +50,7 @@ class ConformanceTest < Test::Unit::TestCase
50
50
  run_conformance_test(File.join(@conformance_dir, 'extract.yml'), :urls) do |description, expected, input|
51
51
  assert_equal expected, extract_urls(input), description
52
52
  expected.each do |expected_url|
53
- assert_equal true, valid_url?(expected_url), "expected url [#{expected_url}] not valid"
53
+ assert_equal true, valid_url?(expected_url, true, false), "expected url [#{expected_url}] not valid"
54
54
  end
55
55
  end
56
56
  end
@@ -151,6 +151,12 @@ class ConformanceTest < Test::Unit::TestCase
151
151
  end
152
152
  end
153
153
 
154
+ def test_urls_without_protocol_validation_conformance
155
+ run_conformance_test(File.join(@conformance_dir, 'validate.yml'), :urls_without_protocol) do |description, expected, input|
156
+ assert_equal expected, valid_url?(input, true, false), description
157
+ end
158
+ end
159
+
154
160
  def test_hashtags_validation_conformance
155
161
  run_conformance_test(File.join(@conformance_dir, 'validate.yml'), :hashtags) do |description, expected, input|
156
162
  assert_equal expected, valid_hashtag?(input), description
data/twitter-text.gemspec CHANGED
@@ -1,10 +1,10 @@
1
1
  spec = Gem::Specification.new do |s|
2
2
  s.name = "twitter-text"
3
- s.version = "1.4.9"
3
+ s.version = "1.4.10"
4
4
  s.authors = ["Matt Sanford", "Patrick Ewing", "Ben Cherry", "Britt Selvitelle",
5
- "Raffi Krikorian", "J.P. Cummins", "Yoshimasa Niwa"]
5
+ "Raffi Krikorian", "J.P. Cummins", "Yoshimasa Niwa", "Keita Fujii"]
6
6
  s.email = ["matt@twitter.com", "patrick.henry.ewing@gmail.com", "bcherry@gmail.com", "bs@brittspace.com",
7
- "raffi@twitter.com", "jcummins@twitter.com", "niw@niw.at"]
7
+ "raffi@twitter.com", "jcummins@twitter.com", "niw@niw.at", "keita@twitter.com"]
8
8
  s.homepage = "http://twitter.com"
9
9
  s.description = s.summary = "A gem that provides text handling for Twitter"
10
10
 
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: twitter-text
3
3
  version: !ruby/object:Gem::Version
4
- hash: 21
5
- prerelease: false
4
+ hash: 19
5
+ prerelease:
6
6
  segments:
7
7
  - 1
8
8
  - 4
9
- - 9
10
- version: 1.4.9
9
+ - 10
10
+ version: 1.4.10
11
11
  platform: ruby
12
12
  authors:
13
13
  - Matt Sanford
@@ -17,11 +17,12 @@ authors:
17
17
  - Raffi Krikorian
18
18
  - J.P. Cummins
19
19
  - Yoshimasa Niwa
20
+ - Keita Fujii
20
21
  autorequire:
21
22
  bindir: bin
22
23
  cert_chain: []
23
24
 
24
- date: 2011-08-05 00:00:00 -07:00
25
+ date: 2011-09-20 00:00:00 -07:00
25
26
  default_executable:
26
27
  dependencies:
27
28
  - !ruby/object:Gem::Dependency
@@ -103,6 +104,7 @@ email:
103
104
  - raffi@twitter.com
104
105
  - jcummins@twitter.com
105
106
  - niw@niw.at
107
+ - keita@twitter.com
106
108
  executables: []
107
109
 
108
110
  extensions: []
@@ -171,7 +173,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
171
173
  requirements: []
172
174
 
173
175
  rubyforge_project:
174
- rubygems_version: 1.3.7
176
+ rubygems_version: 1.4.1
175
177
  signing_key:
176
178
  specification_version: 3
177
179
  summary: Twitter text handling library