twitter-text 1.4.9 → 1.4.10

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile.lock CHANGED
@@ -1,14 +1,16 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- twitter-text (1.4.8)
4
+ twitter-text (1.4.10)
5
5
  activesupport
6
6
 
7
7
  GEM
8
8
  remote: http://rubygems.org/
9
9
  specs:
10
- activesupport (3.0.3)
10
+ activesupport (3.1.0)
11
+ multi_json (~> 1.0)
11
12
  diff-lcs (1.1.2)
13
+ multi_json (1.0.4)
12
14
  nokogiri (1.4.4)
13
15
  nokogiri (1.4.4-java)
14
16
  weakling (>= 0.0.3)
data/lib/autolink.rb CHANGED
@@ -20,7 +20,7 @@ module Twitter
20
20
  OPTIONS_NOT_ATTRIBUTES = [:url_class, :list_class, :username_class, :hashtag_class,
21
21
  :username_url_base, :list_url_base, :hashtag_url_base,
22
22
  :username_url_block, :list_url_block, :hashtag_url_block, :link_url_block,
23
- :suppress_lists, :suppress_no_follow]
23
+ :suppress_lists, :suppress_no_follow, :url_entities]
24
24
 
25
25
  HTML_ENTITIES = {
26
26
  '&' => '&',
@@ -139,6 +139,16 @@ module Twitter
139
139
  options = href_options.dup
140
140
  options[:rel] = "nofollow" unless options.delete(:suppress_no_follow)
141
141
  options[:class] = options.delete(:url_class)
142
+
143
+ url_entities = {}
144
+ if options[:url_entities]
145
+ options[:url_entities].each do |entity|
146
+ entity = entity.with_indifferent_access
147
+ url_entities[entity[:url]] = entity
148
+ end
149
+ options.delete(:url_entities)
150
+ end
151
+
142
152
  html_attrs = html_attrs_for_options(options)
143
153
 
144
154
  Twitter::Rewriter.rewrite_urls(text) do |url|
@@ -147,7 +157,13 @@ module Twitter
147
157
  else
148
158
  html_escape(url)
149
159
  end
150
- %(<a href="#{href}"#{html_attrs}>#{html_escape(url)}</a>)
160
+
161
+ display_url = url
162
+ if url_entities[url] && url_entities[url][:display_url]
163
+ display_url = url_entities[url][:display_url]
164
+ end
165
+
166
+ %(<a href="#{href}"#{html_attrs}>#{html_escape(display_url)}</a>)
151
167
  end
152
168
  end
153
169
 
data/lib/extractor.rb CHANGED
@@ -155,9 +155,12 @@ module Twitter
155
155
  return [] unless text
156
156
  urls = []
157
157
  position = 0
158
- text.to_s.scan(Twitter::Regex[:valid_url]) do |all, before, url, protocol, domain, path, query|
158
+ text.to_s.scan(Twitter::Regex[:valid_url]) do |all, before, url, protocol, domain, port, path, query|
159
159
  valid_url_match_data = $~
160
- if protocol && !protocol.empty?
160
+
161
+ # Regex in Ruby 1.8 doesn't support lookbehind, so we need to manually filter out
162
+ # the short URLs without protocol and path, i.e., [domain].[ccTLD]
163
+ unless !protocol && !path && domain =~ Twitter::Regex[:valid_short_domain]
161
164
  start_position = valid_url_match_data.char_begin(3)
162
165
  end_position = valid_url_match_data.char_end(3)
163
166
  urls << {
data/lib/regex.rb CHANGED
@@ -111,14 +111,35 @@ module Twitter
111
111
  REGEXEN[:auto_link_emoticon] = /(8\-\#|8\-E|\+\-\(|\`\@|\`O|\&lt;\|:~\(|\}:o\{|:\-\[|\&gt;o\&lt;|X\-\/|\[:-\]\-I\-|\/\/\/\/Ö\\\\\\\\|\(\|:\|\/\)|∑:\*\)|\( \| \))/
112
112
 
113
113
  # URL related hash regex collection
114
- REGEXEN[:valid_preceding_chars] = /(?:[^-\/"':!=A-Z0-9_@@]|^|\:)/i
114
+ REGEXEN[:valid_preceding_chars] = /(?:[^-\/"'!=A-Z0-9_@@\.]|^)/i
115
115
 
116
- DOMAIN_EXCLUDE_PART = "[:punct:][:space:][:blank:]#{[0x00A0].pack('U')}"
117
- REGEXEN[:valid_subdomain] = /(?:[^#{DOMAIN_EXCLUDE_PART}](?:[_-]|[^#{DOMAIN_EXCLUDE_PART}])*)?[^#{DOMAIN_EXCLUDE_PART}]\./
118
- REGEXEN[:valid_domain_name] = /(?:[^#{DOMAIN_EXCLUDE_PART}](?:[-]|[^#{DOMAIN_EXCLUDE_PART}])*)?[^#{DOMAIN_EXCLUDE_PART}]/
119
- REGEXEN[:valid_domain] = /#{REGEXEN[:valid_subdomain]}*#{REGEXEN[:valid_domain_name]}\.(?:xn--[a-z0-9]{2,}|[a-z]{2,})(?::[0-9]+)?/i
116
+ DOMAIN_VALID_CHARS = "[^[:punct:][:space:][:blank:]#{[0x00A0].pack('U')}]"
117
+ REGEXEN[:valid_subdomain] = /(?:(?:#{DOMAIN_VALID_CHARS}(?:[_-]|#{DOMAIN_VALID_CHARS})*)?#{DOMAIN_VALID_CHARS}\.)/i
118
+ REGEXEN[:valid_domain_name] = /(?:(?:#{DOMAIN_VALID_CHARS}(?:[-]|#{DOMAIN_VALID_CHARS})*)?#{DOMAIN_VALID_CHARS}\.)/i
120
119
 
121
- REGEXEN[:valid_general_url_path_chars] = /[a-z0-9!\*';:=\+\,\$\/%#\[\]\-_~|#{LATIN_ACCENTS}]/i
120
+ REGEXEN[:valid_gTLD] = /(?:(?:aero|asia|biz|cat|com|coop|edu|gov|info|int|jobs|mil|mobi|museum|name|net|org|pro|tel|travel)(?=[^[:alpha:]]|$))/i
121
+ REGEXEN[:valid_ccTLD] = %r{
122
+ (?:
123
+ (?:ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|
124
+ ci|ck|cl|cm|cn|co|cr|cs|cu|cv|cx|cy|cz|dd|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|
125
+ gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|
126
+ lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|
127
+ pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|ss|st|su|sv|sy|sz|tc|td|tf|tg|th|
128
+ tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|za|zm|zw)
129
+ (?=[^[:alpha:]]|$)
130
+ )
131
+ }ix
132
+ REGEXEN[:valid_punycode] = /(?:xn--[0-9a-z]+)/
133
+
134
+ REGEXEN[:valid_domain] = /(?:
135
+ #{REGEXEN[:valid_subdomain]}*#{REGEXEN[:valid_domain_name]}
136
+ (?:#{REGEXEN[:valid_gTLD]}|#{REGEXEN[:valid_ccTLD]}|#{REGEXEN[:valid_punycode]})
137
+ )/ix
138
+ REGEXEN[:valid_short_domain] = /^#{REGEXEN[:valid_domain_name]}#{REGEXEN[:valid_ccTLD]}$/
139
+
140
+ REGEXEN[:valid_port_number] = /[0-9]+/
141
+
142
+ REGEXEN[:valid_general_url_path_chars] = /[a-z0-9!\*';:=\+\,\$\/%#\[\]\-_~&|#{LATIN_ACCENTS}]/i
122
143
  # Allow URL paths to contain balanced parens
123
144
  # 1. Used in Wikipedia URLs like /Primer_(film)
124
145
  # 2. Used in IIS sessions like /S(dfd346)/
@@ -139,16 +160,17 @@ module Twitter
139
160
  ( # $1 total match
140
161
  (#{REGEXEN[:valid_preceding_chars]}) # $2 Preceeding chracter
141
162
  ( # $3 URL
142
- (https?:\/\/) # $4 Protocol
143
- (#{REGEXEN[:valid_domain]}) # $5 Domain(s) and optional post number
163
+ (https?:\/\/)? # $4 Protocol (optional)
164
+ (#{REGEXEN[:valid_domain]}) # $5 Domain(s)
165
+ (?::(#{REGEXEN[:valid_port_number]}))? # $6 Port number (optional)
144
166
  (/
145
167
  (?:
146
168
  #{REGEXEN[:valid_url_path_chars]}+#{REGEXEN[:valid_url_path_ending_chars]}| # 1+ path chars and a valid last char
147
169
  #{REGEXEN[:valid_url_path_chars]}+#{REGEXEN[:valid_url_path_ending_chars]}?| # Optional last char to handle /@foo/ case
148
170
  #{REGEXEN[:valid_url_path_ending_chars]} # Just a # case
149
171
  )?
150
- )? # $6 URL Path and anchor
151
- (\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # $7 Query String
172
+ )? # $7 URL Path and anchor
173
+ (\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # $8 Query String
152
174
  )
153
175
  )
154
176
  }iox;
@@ -161,7 +183,7 @@ module Twitter
161
183
  #{REGEXEN[:validate_url_unreserved]}|
162
184
  #{REGEXEN[:validate_url_pct_encoded]}|
163
185
  #{REGEXEN[:validate_url_sub_delims]}|
164
- :|@
186
+ [:\|@]
165
187
  )/iox
166
188
 
167
189
  REGEXEN[:validate_url_scheme] = /(?:[a-z][a-z0-9+\-.]*)/i
@@ -236,11 +258,9 @@ module Twitter
236
258
  REGEXEN[:validate_url_unencoded] = %r{
237
259
  \A # Full URL
238
260
  (?:
239
- ([^:/?#]+): # $1 Scheme
240
- )
241
- (?://
242
- ([^/?#]*) # $2 Authority
243
- )
261
+ ([^:/?#]+):// # $1 Scheme
262
+ )?
263
+ ([^/?#]*) # $2 Authority
244
264
  ([^?#]*) # $3 Path
245
265
  (?:
246
266
  \?([^#]*) # $4 Query
data/lib/validation.rb CHANGED
@@ -74,7 +74,7 @@ module Twitter
74
74
  extracted.size == 1 && extracted.first == hashtag[1..-1]
75
75
  end
76
76
 
77
- def valid_url?(url, unicode_domains=true)
77
+ def valid_url?(url, unicode_domains=true, require_protocol=true)
78
78
  return false if !url || url.empty?
79
79
 
80
80
  url_parts = url.match(Twitter::Regex[:validate_url_unencoded])
@@ -82,7 +82,8 @@ module Twitter
82
82
 
83
83
  scheme, authority, path, query, fragment = url_parts.captures
84
84
 
85
- return false unless (valid_match?(scheme, Twitter::Regex[:validate_url_scheme]) && scheme.match(/\Ahttps?\Z/i) &&
85
+ return false unless ((!require_protocol ||
86
+ (valid_match?(scheme, Twitter::Regex[:validate_url_scheme]) && scheme.match(/\Ahttps?\Z/i))) &&
86
87
  valid_match?(path, Twitter::Regex[:validate_url_path]) &&
87
88
  valid_match?(query, Twitter::Regex[:validate_url_query], true) &&
88
89
  valid_match?(fragment, Twitter::Regex[:validate_url_fragment], true))
@@ -391,18 +391,18 @@ describe Twitter::Autolink do
391
391
  end
392
392
 
393
393
  context "balanced parens with a double quote inside" do
394
- def url; "http://foo.bar/foo_(\")_bar" end
394
+ def url; "http://foo.com/foo_(\")_bar" end
395
395
 
396
396
  it "should be linked" do
397
- @autolinked_text.should have_autolinked_url("http://foo.bar/foo_")
397
+ @autolinked_text.should have_autolinked_url("http://foo.com/foo_")
398
398
  end
399
399
  end
400
400
 
401
401
  context "balanced parens hiding XSS" do
402
- def url; 'http://x.xx/("style="color:red"onmouseover="alert(1)' end
402
+ def url; 'http://x.xx.com/("style="color:red"onmouseover="alert(1)' end
403
403
 
404
404
  it "should be linked" do
405
- @autolinked_text.should have_autolinked_url("http://x.xx/")
405
+ @autolinked_text.should have_autolinked_url("http://x.xx.com/")
406
406
  end
407
407
  end
408
408
  end
@@ -479,10 +479,10 @@ describe Twitter::Autolink do
479
479
 
480
480
  context "with a @ in a URL" do
481
481
  context "with XSS attack" do
482
- def original_text; 'http://x.xx/@"style="color:pink"onmouseover=alert(1)//'; end
482
+ def original_text; 'http://x.xx.com/@"style="color:pink"onmouseover=alert(1)//'; end
483
483
 
484
484
  it "should not allow XSS follwing @" do
485
- @autolinked_text.should have_autolinked_url('http://x.xx/')
485
+ @autolinked_text.should have_autolinked_url('http://x.xx.com/')
486
486
  end
487
487
  end
488
488
 
@@ -432,19 +432,19 @@ describe Twitter::Rewriter do
432
432
  end
433
433
 
434
434
  context "balanced parens with a double quote inside" do
435
- def url; "http://foo.bar/foo_(\")_bar" end
435
+ def url; "http://foo.bar.com/foo_(\")_bar" end
436
436
 
437
437
  it "should be rewritten" do
438
- @block_args.should == ["http://foo.bar/foo_"];
438
+ @block_args.should == ["http://foo.bar.com/foo_"];
439
439
  @rewritten_text.should == "I found a neatness ([rewritten](\")_bar)"
440
440
  end
441
441
  end
442
442
 
443
443
  context "balanced parens hiding XSS" do
444
- def url; 'http://x.xx/("style="color:red"onmouseover="alert(1)' end
444
+ def url; 'http://x.xx.com/("style="color:red"onmouseover="alert(1)' end
445
445
 
446
446
  it "should be rewritten" do
447
- @block_args.should == ["http://x.xx/"];
447
+ @block_args.should == ["http://x.xx.com/"];
448
448
  @rewritten_text.should == 'I found a neatness ([rewritten]("style="color:red"onmouseover="alert(1))'
449
449
  end
450
450
  end
@@ -526,10 +526,10 @@ describe Twitter::Rewriter do
526
526
 
527
527
  context "with a @ in a URL" do
528
528
  context "with XSS attack" do
529
- def original_text; 'http://x.xx/@"style="color:pink"onmouseover=alert(1)//'; end
529
+ def original_text; 'http://x.xx.com/@"style="color:pink"onmouseover=alert(1)//'; end
530
530
 
531
531
  it "should not allow XSS follwing @" do
532
- @block_args.should == ["http://x.xx/"]
532
+ @block_args.should == ["http://x.xx.com/"]
533
533
  @rewritten_text.should == '[rewritten]@"style="color:pink"onmouseover=alert(1)//'
534
534
  end
535
535
  end
data/spec/test_urls.rb CHANGED
@@ -26,14 +26,16 @@ module TestUrls
26
26
  "http://a_b.c-d.com",
27
27
  "http://a-b.b.com",
28
28
  "http://twitter-dash.com",
29
- # "t.co/nwcLTFF"
29
+ "www.foobar.com",
30
+ "WWW.FOOBAR.COM",
31
+ "www.foobar.co.jp",
32
+ "http://t.co",
33
+ "t.co/nwcLTFF"
30
34
  ] unless defined?(TestUrls::VALID)
31
35
 
32
36
  INVALID = [
33
37
  "http://no-tld",
34
38
  "http://tld-too-short.x",
35
- "www.foobar.com",
36
- "WWW.FOOBAR.COM",
37
39
  "http://-doman_dash.com",
38
40
  "http://_leadingunderscore.twitter.com",
39
41
  "http://trailingunderscore_.twitter.com",
@@ -50,7 +50,7 @@ class ConformanceTest < Test::Unit::TestCase
50
50
  run_conformance_test(File.join(@conformance_dir, 'extract.yml'), :urls) do |description, expected, input|
51
51
  assert_equal expected, extract_urls(input), description
52
52
  expected.each do |expected_url|
53
- assert_equal true, valid_url?(expected_url), "expected url [#{expected_url}] not valid"
53
+ assert_equal true, valid_url?(expected_url, true, false), "expected url [#{expected_url}] not valid"
54
54
  end
55
55
  end
56
56
  end
@@ -151,6 +151,12 @@ class ConformanceTest < Test::Unit::TestCase
151
151
  end
152
152
  end
153
153
 
154
+ def test_urls_without_protocol_validation_conformance
155
+ run_conformance_test(File.join(@conformance_dir, 'validate.yml'), :urls_without_protocol) do |description, expected, input|
156
+ assert_equal expected, valid_url?(input, true, false), description
157
+ end
158
+ end
159
+
154
160
  def test_hashtags_validation_conformance
155
161
  run_conformance_test(File.join(@conformance_dir, 'validate.yml'), :hashtags) do |description, expected, input|
156
162
  assert_equal expected, valid_hashtag?(input), description
data/twitter-text.gemspec CHANGED
@@ -1,10 +1,10 @@
1
1
  spec = Gem::Specification.new do |s|
2
2
  s.name = "twitter-text"
3
- s.version = "1.4.9"
3
+ s.version = "1.4.10"
4
4
  s.authors = ["Matt Sanford", "Patrick Ewing", "Ben Cherry", "Britt Selvitelle",
5
- "Raffi Krikorian", "J.P. Cummins", "Yoshimasa Niwa"]
5
+ "Raffi Krikorian", "J.P. Cummins", "Yoshimasa Niwa", "Keita Fujii"]
6
6
  s.email = ["matt@twitter.com", "patrick.henry.ewing@gmail.com", "bcherry@gmail.com", "bs@brittspace.com",
7
- "raffi@twitter.com", "jcummins@twitter.com", "niw@niw.at"]
7
+ "raffi@twitter.com", "jcummins@twitter.com", "niw@niw.at", "keita@twitter.com"]
8
8
  s.homepage = "http://twitter.com"
9
9
  s.description = s.summary = "A gem that provides text handling for Twitter"
10
10
 
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: twitter-text
3
3
  version: !ruby/object:Gem::Version
4
- hash: 21
5
- prerelease: false
4
+ hash: 19
5
+ prerelease:
6
6
  segments:
7
7
  - 1
8
8
  - 4
9
- - 9
10
- version: 1.4.9
9
+ - 10
10
+ version: 1.4.10
11
11
  platform: ruby
12
12
  authors:
13
13
  - Matt Sanford
@@ -17,11 +17,12 @@ authors:
17
17
  - Raffi Krikorian
18
18
  - J.P. Cummins
19
19
  - Yoshimasa Niwa
20
+ - Keita Fujii
20
21
  autorequire:
21
22
  bindir: bin
22
23
  cert_chain: []
23
24
 
24
- date: 2011-08-05 00:00:00 -07:00
25
+ date: 2011-09-20 00:00:00 -07:00
25
26
  default_executable:
26
27
  dependencies:
27
28
  - !ruby/object:Gem::Dependency
@@ -103,6 +104,7 @@ email:
103
104
  - raffi@twitter.com
104
105
  - jcummins@twitter.com
105
106
  - niw@niw.at
107
+ - keita@twitter.com
106
108
  executables: []
107
109
 
108
110
  extensions: []
@@ -171,7 +173,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
171
173
  requirements: []
172
174
 
173
175
  rubyforge_project:
174
- rubygems_version: 1.3.7
176
+ rubygems_version: 1.4.1
175
177
  signing_key:
176
178
  specification_version: 3
177
179
  summary: Twitter text handling library