twitter-text 1.4.9 → 1.4.10
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile.lock +4 -2
- data/lib/autolink.rb +18 -2
- data/lib/extractor.rb +5 -2
- data/lib/regex.rb +36 -16
- data/lib/validation.rb +3 -2
- data/spec/autolinking_spec.rb +6 -6
- data/spec/rewriter_spec.rb +6 -6
- data/spec/test_urls.rb +5 -3
- data/test/conformance_test.rb +7 -1
- data/twitter-text.gemspec +3 -3
- metadata +8 -6
data/Gemfile.lock
CHANGED
@@ -1,14 +1,16 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
twitter-text (1.4.
|
4
|
+
twitter-text (1.4.10)
|
5
5
|
activesupport
|
6
6
|
|
7
7
|
GEM
|
8
8
|
remote: http://rubygems.org/
|
9
9
|
specs:
|
10
|
-
activesupport (3.0
|
10
|
+
activesupport (3.1.0)
|
11
|
+
multi_json (~> 1.0)
|
11
12
|
diff-lcs (1.1.2)
|
13
|
+
multi_json (1.0.4)
|
12
14
|
nokogiri (1.4.4)
|
13
15
|
nokogiri (1.4.4-java)
|
14
16
|
weakling (>= 0.0.3)
|
data/lib/autolink.rb
CHANGED
@@ -20,7 +20,7 @@ module Twitter
|
|
20
20
|
OPTIONS_NOT_ATTRIBUTES = [:url_class, :list_class, :username_class, :hashtag_class,
|
21
21
|
:username_url_base, :list_url_base, :hashtag_url_base,
|
22
22
|
:username_url_block, :list_url_block, :hashtag_url_block, :link_url_block,
|
23
|
-
:suppress_lists, :suppress_no_follow]
|
23
|
+
:suppress_lists, :suppress_no_follow, :url_entities]
|
24
24
|
|
25
25
|
HTML_ENTITIES = {
|
26
26
|
'&' => '&',
|
@@ -139,6 +139,16 @@ module Twitter
|
|
139
139
|
options = href_options.dup
|
140
140
|
options[:rel] = "nofollow" unless options.delete(:suppress_no_follow)
|
141
141
|
options[:class] = options.delete(:url_class)
|
142
|
+
|
143
|
+
url_entities = {}
|
144
|
+
if options[:url_entities]
|
145
|
+
options[:url_entities].each do |entity|
|
146
|
+
entity = entity.with_indifferent_access
|
147
|
+
url_entities[entity[:url]] = entity
|
148
|
+
end
|
149
|
+
options.delete(:url_entities)
|
150
|
+
end
|
151
|
+
|
142
152
|
html_attrs = html_attrs_for_options(options)
|
143
153
|
|
144
154
|
Twitter::Rewriter.rewrite_urls(text) do |url|
|
@@ -147,7 +157,13 @@ module Twitter
|
|
147
157
|
else
|
148
158
|
html_escape(url)
|
149
159
|
end
|
150
|
-
|
160
|
+
|
161
|
+
display_url = url
|
162
|
+
if url_entities[url] && url_entities[url][:display_url]
|
163
|
+
display_url = url_entities[url][:display_url]
|
164
|
+
end
|
165
|
+
|
166
|
+
%(<a href="#{href}"#{html_attrs}>#{html_escape(display_url)}</a>)
|
151
167
|
end
|
152
168
|
end
|
153
169
|
|
data/lib/extractor.rb
CHANGED
@@ -155,9 +155,12 @@ module Twitter
|
|
155
155
|
return [] unless text
|
156
156
|
urls = []
|
157
157
|
position = 0
|
158
|
-
text.to_s.scan(Twitter::Regex[:valid_url]) do |all, before, url, protocol, domain, path, query|
|
158
|
+
text.to_s.scan(Twitter::Regex[:valid_url]) do |all, before, url, protocol, domain, port, path, query|
|
159
159
|
valid_url_match_data = $~
|
160
|
-
|
160
|
+
|
161
|
+
# Regex in Ruby 1.8 doesn't support lookbehind, so we need to manually filter out
|
162
|
+
# the short URLs without protocol and path, i.e., [domain].[ccTLD]
|
163
|
+
unless !protocol && !path && domain =~ Twitter::Regex[:valid_short_domain]
|
161
164
|
start_position = valid_url_match_data.char_begin(3)
|
162
165
|
end_position = valid_url_match_data.char_end(3)
|
163
166
|
urls << {
|
data/lib/regex.rb
CHANGED
@@ -111,14 +111,35 @@ module Twitter
|
|
111
111
|
REGEXEN[:auto_link_emoticon] = /(8\-\#|8\-E|\+\-\(|\`\@|\`O|\<\|:~\(|\}:o\{|:\-\[|\>o\<|X\-\/|\[:-\]\-I\-|\/\/\/\/Ö\\\\\\\\|\(\|:\|\/\)|∑:\*\)|\( \| \))/
|
112
112
|
|
113
113
|
# URL related hash regex collection
|
114
|
-
REGEXEN[:valid_preceding_chars] = /(?:[^-\/"'
|
114
|
+
REGEXEN[:valid_preceding_chars] = /(?:[^-\/"'!=A-Z0-9_@@\.]|^)/i
|
115
115
|
|
116
|
-
|
117
|
-
REGEXEN[:valid_subdomain] = /(?:
|
118
|
-
REGEXEN[:valid_domain_name] = /(?:
|
119
|
-
REGEXEN[:valid_domain] = /#{REGEXEN[:valid_subdomain]}*#{REGEXEN[:valid_domain_name]}\.(?:xn--[a-z0-9]{2,}|[a-z]{2,})(?::[0-9]+)?/i
|
116
|
+
DOMAIN_VALID_CHARS = "[^[:punct:][:space:][:blank:]#{[0x00A0].pack('U')}]"
|
117
|
+
REGEXEN[:valid_subdomain] = /(?:(?:#{DOMAIN_VALID_CHARS}(?:[_-]|#{DOMAIN_VALID_CHARS})*)?#{DOMAIN_VALID_CHARS}\.)/i
|
118
|
+
REGEXEN[:valid_domain_name] = /(?:(?:#{DOMAIN_VALID_CHARS}(?:[-]|#{DOMAIN_VALID_CHARS})*)?#{DOMAIN_VALID_CHARS}\.)/i
|
120
119
|
|
121
|
-
REGEXEN[:
|
120
|
+
REGEXEN[:valid_gTLD] = /(?:(?:aero|asia|biz|cat|com|coop|edu|gov|info|int|jobs|mil|mobi|museum|name|net|org|pro|tel|travel)(?=[^[:alpha:]]|$))/i
|
121
|
+
REGEXEN[:valid_ccTLD] = %r{
|
122
|
+
(?:
|
123
|
+
(?:ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|
|
124
|
+
ci|ck|cl|cm|cn|co|cr|cs|cu|cv|cx|cy|cz|dd|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|
|
125
|
+
gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|
|
126
|
+
lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|
|
127
|
+
pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|ss|st|su|sv|sy|sz|tc|td|tf|tg|th|
|
128
|
+
tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|za|zm|zw)
|
129
|
+
(?=[^[:alpha:]]|$)
|
130
|
+
)
|
131
|
+
}ix
|
132
|
+
REGEXEN[:valid_punycode] = /(?:xn--[0-9a-z]+)/
|
133
|
+
|
134
|
+
REGEXEN[:valid_domain] = /(?:
|
135
|
+
#{REGEXEN[:valid_subdomain]}*#{REGEXEN[:valid_domain_name]}
|
136
|
+
(?:#{REGEXEN[:valid_gTLD]}|#{REGEXEN[:valid_ccTLD]}|#{REGEXEN[:valid_punycode]})
|
137
|
+
)/ix
|
138
|
+
REGEXEN[:valid_short_domain] = /^#{REGEXEN[:valid_domain_name]}#{REGEXEN[:valid_ccTLD]}$/
|
139
|
+
|
140
|
+
REGEXEN[:valid_port_number] = /[0-9]+/
|
141
|
+
|
142
|
+
REGEXEN[:valid_general_url_path_chars] = /[a-z0-9!\*';:=\+\,\$\/%#\[\]\-_~&|#{LATIN_ACCENTS}]/i
|
122
143
|
# Allow URL paths to contain balanced parens
|
123
144
|
# 1. Used in Wikipedia URLs like /Primer_(film)
|
124
145
|
# 2. Used in IIS sessions like /S(dfd346)/
|
@@ -139,16 +160,17 @@ module Twitter
|
|
139
160
|
( # $1 total match
|
140
161
|
(#{REGEXEN[:valid_preceding_chars]}) # $2 Preceeding chracter
|
141
162
|
( # $3 URL
|
142
|
-
(https?:\/\/)
|
143
|
-
(#{REGEXEN[:valid_domain]}) # $5 Domain(s)
|
163
|
+
(https?:\/\/)? # $4 Protocol (optional)
|
164
|
+
(#{REGEXEN[:valid_domain]}) # $5 Domain(s)
|
165
|
+
(?::(#{REGEXEN[:valid_port_number]}))? # $6 Port number (optional)
|
144
166
|
(/
|
145
167
|
(?:
|
146
168
|
#{REGEXEN[:valid_url_path_chars]}+#{REGEXEN[:valid_url_path_ending_chars]}| # 1+ path chars and a valid last char
|
147
169
|
#{REGEXEN[:valid_url_path_chars]}+#{REGEXEN[:valid_url_path_ending_chars]}?| # Optional last char to handle /@foo/ case
|
148
170
|
#{REGEXEN[:valid_url_path_ending_chars]} # Just a # case
|
149
171
|
)?
|
150
|
-
)? # $
|
151
|
-
(\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # $
|
172
|
+
)? # $7 URL Path and anchor
|
173
|
+
(\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # $8 Query String
|
152
174
|
)
|
153
175
|
)
|
154
176
|
}iox;
|
@@ -161,7 +183,7 @@ module Twitter
|
|
161
183
|
#{REGEXEN[:validate_url_unreserved]}|
|
162
184
|
#{REGEXEN[:validate_url_pct_encoded]}|
|
163
185
|
#{REGEXEN[:validate_url_sub_delims]}|
|
164
|
-
|
186
|
+
[:\|@]
|
165
187
|
)/iox
|
166
188
|
|
167
189
|
REGEXEN[:validate_url_scheme] = /(?:[a-z][a-z0-9+\-.]*)/i
|
@@ -236,11 +258,9 @@ module Twitter
|
|
236
258
|
REGEXEN[:validate_url_unencoded] = %r{
|
237
259
|
\A # Full URL
|
238
260
|
(?:
|
239
|
-
([^:/?#]+)
|
240
|
-
)
|
241
|
-
(
|
242
|
-
([^/?#]*) # $2 Authority
|
243
|
-
)
|
261
|
+
([^:/?#]+):// # $1 Scheme
|
262
|
+
)?
|
263
|
+
([^/?#]*) # $2 Authority
|
244
264
|
([^?#]*) # $3 Path
|
245
265
|
(?:
|
246
266
|
\?([^#]*) # $4 Query
|
data/lib/validation.rb
CHANGED
@@ -74,7 +74,7 @@ module Twitter
|
|
74
74
|
extracted.size == 1 && extracted.first == hashtag[1..-1]
|
75
75
|
end
|
76
76
|
|
77
|
-
def valid_url?(url, unicode_domains=true)
|
77
|
+
def valid_url?(url, unicode_domains=true, require_protocol=true)
|
78
78
|
return false if !url || url.empty?
|
79
79
|
|
80
80
|
url_parts = url.match(Twitter::Regex[:validate_url_unencoded])
|
@@ -82,7 +82,8 @@ module Twitter
|
|
82
82
|
|
83
83
|
scheme, authority, path, query, fragment = url_parts.captures
|
84
84
|
|
85
|
-
return false unless (
|
85
|
+
return false unless ((!require_protocol ||
|
86
|
+
(valid_match?(scheme, Twitter::Regex[:validate_url_scheme]) && scheme.match(/\Ahttps?\Z/i))) &&
|
86
87
|
valid_match?(path, Twitter::Regex[:validate_url_path]) &&
|
87
88
|
valid_match?(query, Twitter::Regex[:validate_url_query], true) &&
|
88
89
|
valid_match?(fragment, Twitter::Regex[:validate_url_fragment], true))
|
data/spec/autolinking_spec.rb
CHANGED
@@ -391,18 +391,18 @@ describe Twitter::Autolink do
|
|
391
391
|
end
|
392
392
|
|
393
393
|
context "balanced parens with a double quote inside" do
|
394
|
-
def url; "http://foo.
|
394
|
+
def url; "http://foo.com/foo_(\")_bar" end
|
395
395
|
|
396
396
|
it "should be linked" do
|
397
|
-
@autolinked_text.should have_autolinked_url("http://foo.
|
397
|
+
@autolinked_text.should have_autolinked_url("http://foo.com/foo_")
|
398
398
|
end
|
399
399
|
end
|
400
400
|
|
401
401
|
context "balanced parens hiding XSS" do
|
402
|
-
def url; 'http://x.xx/("style="color:red"onmouseover="alert(1)' end
|
402
|
+
def url; 'http://x.xx.com/("style="color:red"onmouseover="alert(1)' end
|
403
403
|
|
404
404
|
it "should be linked" do
|
405
|
-
@autolinked_text.should have_autolinked_url("http://x.xx/")
|
405
|
+
@autolinked_text.should have_autolinked_url("http://x.xx.com/")
|
406
406
|
end
|
407
407
|
end
|
408
408
|
end
|
@@ -479,10 +479,10 @@ describe Twitter::Autolink do
|
|
479
479
|
|
480
480
|
context "with a @ in a URL" do
|
481
481
|
context "with XSS attack" do
|
482
|
-
def original_text; 'http://x.xx/@"style="color:pink"onmouseover=alert(1)//'; end
|
482
|
+
def original_text; 'http://x.xx.com/@"style="color:pink"onmouseover=alert(1)//'; end
|
483
483
|
|
484
484
|
it "should not allow XSS follwing @" do
|
485
|
-
@autolinked_text.should have_autolinked_url('http://x.xx/')
|
485
|
+
@autolinked_text.should have_autolinked_url('http://x.xx.com/')
|
486
486
|
end
|
487
487
|
end
|
488
488
|
|
data/spec/rewriter_spec.rb
CHANGED
@@ -432,19 +432,19 @@ describe Twitter::Rewriter do
|
|
432
432
|
end
|
433
433
|
|
434
434
|
context "balanced parens with a double quote inside" do
|
435
|
-
def url; "http://foo.bar/foo_(\")_bar" end
|
435
|
+
def url; "http://foo.bar.com/foo_(\")_bar" end
|
436
436
|
|
437
437
|
it "should be rewritten" do
|
438
|
-
@block_args.should == ["http://foo.bar/foo_"];
|
438
|
+
@block_args.should == ["http://foo.bar.com/foo_"];
|
439
439
|
@rewritten_text.should == "I found a neatness ([rewritten](\")_bar)"
|
440
440
|
end
|
441
441
|
end
|
442
442
|
|
443
443
|
context "balanced parens hiding XSS" do
|
444
|
-
def url; 'http://x.xx/("style="color:red"onmouseover="alert(1)' end
|
444
|
+
def url; 'http://x.xx.com/("style="color:red"onmouseover="alert(1)' end
|
445
445
|
|
446
446
|
it "should be rewritten" do
|
447
|
-
@block_args.should == ["http://x.xx/"];
|
447
|
+
@block_args.should == ["http://x.xx.com/"];
|
448
448
|
@rewritten_text.should == 'I found a neatness ([rewritten]("style="color:red"onmouseover="alert(1))'
|
449
449
|
end
|
450
450
|
end
|
@@ -526,10 +526,10 @@ describe Twitter::Rewriter do
|
|
526
526
|
|
527
527
|
context "with a @ in a URL" do
|
528
528
|
context "with XSS attack" do
|
529
|
-
def original_text; 'http://x.xx/@"style="color:pink"onmouseover=alert(1)//'; end
|
529
|
+
def original_text; 'http://x.xx.com/@"style="color:pink"onmouseover=alert(1)//'; end
|
530
530
|
|
531
531
|
it "should not allow XSS follwing @" do
|
532
|
-
@block_args.should == ["http://x.xx/"]
|
532
|
+
@block_args.should == ["http://x.xx.com/"]
|
533
533
|
@rewritten_text.should == '[rewritten]@"style="color:pink"onmouseover=alert(1)//'
|
534
534
|
end
|
535
535
|
end
|
data/spec/test_urls.rb
CHANGED
@@ -26,14 +26,16 @@ module TestUrls
|
|
26
26
|
"http://a_b.c-d.com",
|
27
27
|
"http://a-b.b.com",
|
28
28
|
"http://twitter-dash.com",
|
29
|
-
|
29
|
+
"www.foobar.com",
|
30
|
+
"WWW.FOOBAR.COM",
|
31
|
+
"www.foobar.co.jp",
|
32
|
+
"http://t.co",
|
33
|
+
"t.co/nwcLTFF"
|
30
34
|
] unless defined?(TestUrls::VALID)
|
31
35
|
|
32
36
|
INVALID = [
|
33
37
|
"http://no-tld",
|
34
38
|
"http://tld-too-short.x",
|
35
|
-
"www.foobar.com",
|
36
|
-
"WWW.FOOBAR.COM",
|
37
39
|
"http://-doman_dash.com",
|
38
40
|
"http://_leadingunderscore.twitter.com",
|
39
41
|
"http://trailingunderscore_.twitter.com",
|
data/test/conformance_test.rb
CHANGED
@@ -50,7 +50,7 @@ class ConformanceTest < Test::Unit::TestCase
|
|
50
50
|
run_conformance_test(File.join(@conformance_dir, 'extract.yml'), :urls) do |description, expected, input|
|
51
51
|
assert_equal expected, extract_urls(input), description
|
52
52
|
expected.each do |expected_url|
|
53
|
-
assert_equal true, valid_url?(expected_url), "expected url [#{expected_url}] not valid"
|
53
|
+
assert_equal true, valid_url?(expected_url, true, false), "expected url [#{expected_url}] not valid"
|
54
54
|
end
|
55
55
|
end
|
56
56
|
end
|
@@ -151,6 +151,12 @@ class ConformanceTest < Test::Unit::TestCase
|
|
151
151
|
end
|
152
152
|
end
|
153
153
|
|
154
|
+
def test_urls_without_protocol_validation_conformance
|
155
|
+
run_conformance_test(File.join(@conformance_dir, 'validate.yml'), :urls_without_protocol) do |description, expected, input|
|
156
|
+
assert_equal expected, valid_url?(input, true, false), description
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
154
160
|
def test_hashtags_validation_conformance
|
155
161
|
run_conformance_test(File.join(@conformance_dir, 'validate.yml'), :hashtags) do |description, expected, input|
|
156
162
|
assert_equal expected, valid_hashtag?(input), description
|
data/twitter-text.gemspec
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
spec = Gem::Specification.new do |s|
|
2
2
|
s.name = "twitter-text"
|
3
|
-
s.version = "1.4.
|
3
|
+
s.version = "1.4.10"
|
4
4
|
s.authors = ["Matt Sanford", "Patrick Ewing", "Ben Cherry", "Britt Selvitelle",
|
5
|
-
"Raffi Krikorian", "J.P. Cummins", "Yoshimasa Niwa"]
|
5
|
+
"Raffi Krikorian", "J.P. Cummins", "Yoshimasa Niwa", "Keita Fujii"]
|
6
6
|
s.email = ["matt@twitter.com", "patrick.henry.ewing@gmail.com", "bcherry@gmail.com", "bs@brittspace.com",
|
7
|
-
"raffi@twitter.com", "jcummins@twitter.com", "niw@niw.at"]
|
7
|
+
"raffi@twitter.com", "jcummins@twitter.com", "niw@niw.at", "keita@twitter.com"]
|
8
8
|
s.homepage = "http://twitter.com"
|
9
9
|
s.description = s.summary = "A gem that provides text handling for Twitter"
|
10
10
|
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twitter-text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
5
|
-
prerelease:
|
4
|
+
hash: 19
|
5
|
+
prerelease:
|
6
6
|
segments:
|
7
7
|
- 1
|
8
8
|
- 4
|
9
|
-
-
|
10
|
-
version: 1.4.
|
9
|
+
- 10
|
10
|
+
version: 1.4.10
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Matt Sanford
|
@@ -17,11 +17,12 @@ authors:
|
|
17
17
|
- Raffi Krikorian
|
18
18
|
- J.P. Cummins
|
19
19
|
- Yoshimasa Niwa
|
20
|
+
- Keita Fujii
|
20
21
|
autorequire:
|
21
22
|
bindir: bin
|
22
23
|
cert_chain: []
|
23
24
|
|
24
|
-
date: 2011-
|
25
|
+
date: 2011-09-20 00:00:00 -07:00
|
25
26
|
default_executable:
|
26
27
|
dependencies:
|
27
28
|
- !ruby/object:Gem::Dependency
|
@@ -103,6 +104,7 @@ email:
|
|
103
104
|
- raffi@twitter.com
|
104
105
|
- jcummins@twitter.com
|
105
106
|
- niw@niw.at
|
107
|
+
- keita@twitter.com
|
106
108
|
executables: []
|
107
109
|
|
108
110
|
extensions: []
|
@@ -171,7 +173,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
171
173
|
requirements: []
|
172
174
|
|
173
175
|
rubyforge_project:
|
174
|
-
rubygems_version: 1.
|
176
|
+
rubygems_version: 1.4.1
|
175
177
|
signing_key:
|
176
178
|
specification_version: 3
|
177
179
|
summary: Twitter text handling library
|