twitter-text 1.4.9 → 1.4.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile.lock +4 -2
- data/lib/autolink.rb +18 -2
- data/lib/extractor.rb +5 -2
- data/lib/regex.rb +36 -16
- data/lib/validation.rb +3 -2
- data/spec/autolinking_spec.rb +6 -6
- data/spec/rewriter_spec.rb +6 -6
- data/spec/test_urls.rb +5 -3
- data/test/conformance_test.rb +7 -1
- data/twitter-text.gemspec +3 -3
- metadata +8 -6
data/Gemfile.lock
CHANGED
@@ -1,14 +1,16 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
twitter-text (1.4.
|
4
|
+
twitter-text (1.4.10)
|
5
5
|
activesupport
|
6
6
|
|
7
7
|
GEM
|
8
8
|
remote: http://rubygems.org/
|
9
9
|
specs:
|
10
|
-
activesupport (3.0
|
10
|
+
activesupport (3.1.0)
|
11
|
+
multi_json (~> 1.0)
|
11
12
|
diff-lcs (1.1.2)
|
13
|
+
multi_json (1.0.4)
|
12
14
|
nokogiri (1.4.4)
|
13
15
|
nokogiri (1.4.4-java)
|
14
16
|
weakling (>= 0.0.3)
|
data/lib/autolink.rb
CHANGED
@@ -20,7 +20,7 @@ module Twitter
|
|
20
20
|
OPTIONS_NOT_ATTRIBUTES = [:url_class, :list_class, :username_class, :hashtag_class,
|
21
21
|
:username_url_base, :list_url_base, :hashtag_url_base,
|
22
22
|
:username_url_block, :list_url_block, :hashtag_url_block, :link_url_block,
|
23
|
-
:suppress_lists, :suppress_no_follow]
|
23
|
+
:suppress_lists, :suppress_no_follow, :url_entities]
|
24
24
|
|
25
25
|
HTML_ENTITIES = {
|
26
26
|
'&' => '&',
|
@@ -139,6 +139,16 @@ module Twitter
|
|
139
139
|
options = href_options.dup
|
140
140
|
options[:rel] = "nofollow" unless options.delete(:suppress_no_follow)
|
141
141
|
options[:class] = options.delete(:url_class)
|
142
|
+
|
143
|
+
url_entities = {}
|
144
|
+
if options[:url_entities]
|
145
|
+
options[:url_entities].each do |entity|
|
146
|
+
entity = entity.with_indifferent_access
|
147
|
+
url_entities[entity[:url]] = entity
|
148
|
+
end
|
149
|
+
options.delete(:url_entities)
|
150
|
+
end
|
151
|
+
|
142
152
|
html_attrs = html_attrs_for_options(options)
|
143
153
|
|
144
154
|
Twitter::Rewriter.rewrite_urls(text) do |url|
|
@@ -147,7 +157,13 @@ module Twitter
|
|
147
157
|
else
|
148
158
|
html_escape(url)
|
149
159
|
end
|
150
|
-
|
160
|
+
|
161
|
+
display_url = url
|
162
|
+
if url_entities[url] && url_entities[url][:display_url]
|
163
|
+
display_url = url_entities[url][:display_url]
|
164
|
+
end
|
165
|
+
|
166
|
+
%(<a href="#{href}"#{html_attrs}>#{html_escape(display_url)}</a>)
|
151
167
|
end
|
152
168
|
end
|
153
169
|
|
data/lib/extractor.rb
CHANGED
@@ -155,9 +155,12 @@ module Twitter
|
|
155
155
|
return [] unless text
|
156
156
|
urls = []
|
157
157
|
position = 0
|
158
|
-
text.to_s.scan(Twitter::Regex[:valid_url]) do |all, before, url, protocol, domain, path, query|
|
158
|
+
text.to_s.scan(Twitter::Regex[:valid_url]) do |all, before, url, protocol, domain, port, path, query|
|
159
159
|
valid_url_match_data = $~
|
160
|
-
|
160
|
+
|
161
|
+
# Regex in Ruby 1.8 doesn't support lookbehind, so we need to manually filter out
|
162
|
+
# the short URLs without protocol and path, i.e., [domain].[ccTLD]
|
163
|
+
unless !protocol && !path && domain =~ Twitter::Regex[:valid_short_domain]
|
161
164
|
start_position = valid_url_match_data.char_begin(3)
|
162
165
|
end_position = valid_url_match_data.char_end(3)
|
163
166
|
urls << {
|
data/lib/regex.rb
CHANGED
@@ -111,14 +111,35 @@ module Twitter
|
|
111
111
|
REGEXEN[:auto_link_emoticon] = /(8\-\#|8\-E|\+\-\(|\`\@|\`O|\<\|:~\(|\}:o\{|:\-\[|\>o\<|X\-\/|\[:-\]\-I\-|\/\/\/\/Ö\\\\\\\\|\(\|:\|\/\)|∑:\*\)|\( \| \))/
|
112
112
|
|
113
113
|
# URL related hash regex collection
|
114
|
-
REGEXEN[:valid_preceding_chars] = /(?:[^-\/"'
|
114
|
+
REGEXEN[:valid_preceding_chars] = /(?:[^-\/"'!=A-Z0-9_@@\.]|^)/i
|
115
115
|
|
116
|
-
|
117
|
-
REGEXEN[:valid_subdomain] = /(?:
|
118
|
-
REGEXEN[:valid_domain_name] = /(?:
|
119
|
-
REGEXEN[:valid_domain] = /#{REGEXEN[:valid_subdomain]}*#{REGEXEN[:valid_domain_name]}\.(?:xn--[a-z0-9]{2,}|[a-z]{2,})(?::[0-9]+)?/i
|
116
|
+
DOMAIN_VALID_CHARS = "[^[:punct:][:space:][:blank:]#{[0x00A0].pack('U')}]"
|
117
|
+
REGEXEN[:valid_subdomain] = /(?:(?:#{DOMAIN_VALID_CHARS}(?:[_-]|#{DOMAIN_VALID_CHARS})*)?#{DOMAIN_VALID_CHARS}\.)/i
|
118
|
+
REGEXEN[:valid_domain_name] = /(?:(?:#{DOMAIN_VALID_CHARS}(?:[-]|#{DOMAIN_VALID_CHARS})*)?#{DOMAIN_VALID_CHARS}\.)/i
|
120
119
|
|
121
|
-
REGEXEN[:
|
120
|
+
REGEXEN[:valid_gTLD] = /(?:(?:aero|asia|biz|cat|com|coop|edu|gov|info|int|jobs|mil|mobi|museum|name|net|org|pro|tel|travel)(?=[^[:alpha:]]|$))/i
|
121
|
+
REGEXEN[:valid_ccTLD] = %r{
|
122
|
+
(?:
|
123
|
+
(?:ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|
|
124
|
+
ci|ck|cl|cm|cn|co|cr|cs|cu|cv|cx|cy|cz|dd|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|
|
125
|
+
gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|
|
126
|
+
lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|
|
127
|
+
pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|ss|st|su|sv|sy|sz|tc|td|tf|tg|th|
|
128
|
+
tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|za|zm|zw)
|
129
|
+
(?=[^[:alpha:]]|$)
|
130
|
+
)
|
131
|
+
}ix
|
132
|
+
REGEXEN[:valid_punycode] = /(?:xn--[0-9a-z]+)/
|
133
|
+
|
134
|
+
REGEXEN[:valid_domain] = /(?:
|
135
|
+
#{REGEXEN[:valid_subdomain]}*#{REGEXEN[:valid_domain_name]}
|
136
|
+
(?:#{REGEXEN[:valid_gTLD]}|#{REGEXEN[:valid_ccTLD]}|#{REGEXEN[:valid_punycode]})
|
137
|
+
)/ix
|
138
|
+
REGEXEN[:valid_short_domain] = /^#{REGEXEN[:valid_domain_name]}#{REGEXEN[:valid_ccTLD]}$/
|
139
|
+
|
140
|
+
REGEXEN[:valid_port_number] = /[0-9]+/
|
141
|
+
|
142
|
+
REGEXEN[:valid_general_url_path_chars] = /[a-z0-9!\*';:=\+\,\$\/%#\[\]\-_~&|#{LATIN_ACCENTS}]/i
|
122
143
|
# Allow URL paths to contain balanced parens
|
123
144
|
# 1. Used in Wikipedia URLs like /Primer_(film)
|
124
145
|
# 2. Used in IIS sessions like /S(dfd346)/
|
@@ -139,16 +160,17 @@ module Twitter
|
|
139
160
|
( # $1 total match
|
140
161
|
(#{REGEXEN[:valid_preceding_chars]}) # $2 Preceeding chracter
|
141
162
|
( # $3 URL
|
142
|
-
(https?:\/\/)
|
143
|
-
(#{REGEXEN[:valid_domain]}) # $5 Domain(s)
|
163
|
+
(https?:\/\/)? # $4 Protocol (optional)
|
164
|
+
(#{REGEXEN[:valid_domain]}) # $5 Domain(s)
|
165
|
+
(?::(#{REGEXEN[:valid_port_number]}))? # $6 Port number (optional)
|
144
166
|
(/
|
145
167
|
(?:
|
146
168
|
#{REGEXEN[:valid_url_path_chars]}+#{REGEXEN[:valid_url_path_ending_chars]}| # 1+ path chars and a valid last char
|
147
169
|
#{REGEXEN[:valid_url_path_chars]}+#{REGEXEN[:valid_url_path_ending_chars]}?| # Optional last char to handle /@foo/ case
|
148
170
|
#{REGEXEN[:valid_url_path_ending_chars]} # Just a # case
|
149
171
|
)?
|
150
|
-
)? # $
|
151
|
-
(\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # $
|
172
|
+
)? # $7 URL Path and anchor
|
173
|
+
(\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # $8 Query String
|
152
174
|
)
|
153
175
|
)
|
154
176
|
}iox;
|
@@ -161,7 +183,7 @@ module Twitter
|
|
161
183
|
#{REGEXEN[:validate_url_unreserved]}|
|
162
184
|
#{REGEXEN[:validate_url_pct_encoded]}|
|
163
185
|
#{REGEXEN[:validate_url_sub_delims]}|
|
164
|
-
|
186
|
+
[:\|@]
|
165
187
|
)/iox
|
166
188
|
|
167
189
|
REGEXEN[:validate_url_scheme] = /(?:[a-z][a-z0-9+\-.]*)/i
|
@@ -236,11 +258,9 @@ module Twitter
|
|
236
258
|
REGEXEN[:validate_url_unencoded] = %r{
|
237
259
|
\A # Full URL
|
238
260
|
(?:
|
239
|
-
([^:/?#]+)
|
240
|
-
)
|
241
|
-
(
|
242
|
-
([^/?#]*) # $2 Authority
|
243
|
-
)
|
261
|
+
([^:/?#]+):// # $1 Scheme
|
262
|
+
)?
|
263
|
+
([^/?#]*) # $2 Authority
|
244
264
|
([^?#]*) # $3 Path
|
245
265
|
(?:
|
246
266
|
\?([^#]*) # $4 Query
|
data/lib/validation.rb
CHANGED
@@ -74,7 +74,7 @@ module Twitter
|
|
74
74
|
extracted.size == 1 && extracted.first == hashtag[1..-1]
|
75
75
|
end
|
76
76
|
|
77
|
-
def valid_url?(url, unicode_domains=true)
|
77
|
+
def valid_url?(url, unicode_domains=true, require_protocol=true)
|
78
78
|
return false if !url || url.empty?
|
79
79
|
|
80
80
|
url_parts = url.match(Twitter::Regex[:validate_url_unencoded])
|
@@ -82,7 +82,8 @@ module Twitter
|
|
82
82
|
|
83
83
|
scheme, authority, path, query, fragment = url_parts.captures
|
84
84
|
|
85
|
-
return false unless (
|
85
|
+
return false unless ((!require_protocol ||
|
86
|
+
(valid_match?(scheme, Twitter::Regex[:validate_url_scheme]) && scheme.match(/\Ahttps?\Z/i))) &&
|
86
87
|
valid_match?(path, Twitter::Regex[:validate_url_path]) &&
|
87
88
|
valid_match?(query, Twitter::Regex[:validate_url_query], true) &&
|
88
89
|
valid_match?(fragment, Twitter::Regex[:validate_url_fragment], true))
|
data/spec/autolinking_spec.rb
CHANGED
@@ -391,18 +391,18 @@ describe Twitter::Autolink do
|
|
391
391
|
end
|
392
392
|
|
393
393
|
context "balanced parens with a double quote inside" do
|
394
|
-
def url; "http://foo.
|
394
|
+
def url; "http://foo.com/foo_(\")_bar" end
|
395
395
|
|
396
396
|
it "should be linked" do
|
397
|
-
@autolinked_text.should have_autolinked_url("http://foo.
|
397
|
+
@autolinked_text.should have_autolinked_url("http://foo.com/foo_")
|
398
398
|
end
|
399
399
|
end
|
400
400
|
|
401
401
|
context "balanced parens hiding XSS" do
|
402
|
-
def url; 'http://x.xx/("style="color:red"onmouseover="alert(1)' end
|
402
|
+
def url; 'http://x.xx.com/("style="color:red"onmouseover="alert(1)' end
|
403
403
|
|
404
404
|
it "should be linked" do
|
405
|
-
@autolinked_text.should have_autolinked_url("http://x.xx/")
|
405
|
+
@autolinked_text.should have_autolinked_url("http://x.xx.com/")
|
406
406
|
end
|
407
407
|
end
|
408
408
|
end
|
@@ -479,10 +479,10 @@ describe Twitter::Autolink do
|
|
479
479
|
|
480
480
|
context "with a @ in a URL" do
|
481
481
|
context "with XSS attack" do
|
482
|
-
def original_text; 'http://x.xx/@"style="color:pink"onmouseover=alert(1)//'; end
|
482
|
+
def original_text; 'http://x.xx.com/@"style="color:pink"onmouseover=alert(1)//'; end
|
483
483
|
|
484
484
|
it "should not allow XSS follwing @" do
|
485
|
-
@autolinked_text.should have_autolinked_url('http://x.xx/')
|
485
|
+
@autolinked_text.should have_autolinked_url('http://x.xx.com/')
|
486
486
|
end
|
487
487
|
end
|
488
488
|
|
data/spec/rewriter_spec.rb
CHANGED
@@ -432,19 +432,19 @@ describe Twitter::Rewriter do
|
|
432
432
|
end
|
433
433
|
|
434
434
|
context "balanced parens with a double quote inside" do
|
435
|
-
def url; "http://foo.bar/foo_(\")_bar" end
|
435
|
+
def url; "http://foo.bar.com/foo_(\")_bar" end
|
436
436
|
|
437
437
|
it "should be rewritten" do
|
438
|
-
@block_args.should == ["http://foo.bar/foo_"];
|
438
|
+
@block_args.should == ["http://foo.bar.com/foo_"];
|
439
439
|
@rewritten_text.should == "I found a neatness ([rewritten](\")_bar)"
|
440
440
|
end
|
441
441
|
end
|
442
442
|
|
443
443
|
context "balanced parens hiding XSS" do
|
444
|
-
def url; 'http://x.xx/("style="color:red"onmouseover="alert(1)' end
|
444
|
+
def url; 'http://x.xx.com/("style="color:red"onmouseover="alert(1)' end
|
445
445
|
|
446
446
|
it "should be rewritten" do
|
447
|
-
@block_args.should == ["http://x.xx/"];
|
447
|
+
@block_args.should == ["http://x.xx.com/"];
|
448
448
|
@rewritten_text.should == 'I found a neatness ([rewritten]("style="color:red"onmouseover="alert(1))'
|
449
449
|
end
|
450
450
|
end
|
@@ -526,10 +526,10 @@ describe Twitter::Rewriter do
|
|
526
526
|
|
527
527
|
context "with a @ in a URL" do
|
528
528
|
context "with XSS attack" do
|
529
|
-
def original_text; 'http://x.xx/@"style="color:pink"onmouseover=alert(1)//'; end
|
529
|
+
def original_text; 'http://x.xx.com/@"style="color:pink"onmouseover=alert(1)//'; end
|
530
530
|
|
531
531
|
it "should not allow XSS follwing @" do
|
532
|
-
@block_args.should == ["http://x.xx/"]
|
532
|
+
@block_args.should == ["http://x.xx.com/"]
|
533
533
|
@rewritten_text.should == '[rewritten]@"style="color:pink"onmouseover=alert(1)//'
|
534
534
|
end
|
535
535
|
end
|
data/spec/test_urls.rb
CHANGED
@@ -26,14 +26,16 @@ module TestUrls
|
|
26
26
|
"http://a_b.c-d.com",
|
27
27
|
"http://a-b.b.com",
|
28
28
|
"http://twitter-dash.com",
|
29
|
-
|
29
|
+
"www.foobar.com",
|
30
|
+
"WWW.FOOBAR.COM",
|
31
|
+
"www.foobar.co.jp",
|
32
|
+
"http://t.co",
|
33
|
+
"t.co/nwcLTFF"
|
30
34
|
] unless defined?(TestUrls::VALID)
|
31
35
|
|
32
36
|
INVALID = [
|
33
37
|
"http://no-tld",
|
34
38
|
"http://tld-too-short.x",
|
35
|
-
"www.foobar.com",
|
36
|
-
"WWW.FOOBAR.COM",
|
37
39
|
"http://-doman_dash.com",
|
38
40
|
"http://_leadingunderscore.twitter.com",
|
39
41
|
"http://trailingunderscore_.twitter.com",
|
data/test/conformance_test.rb
CHANGED
@@ -50,7 +50,7 @@ class ConformanceTest < Test::Unit::TestCase
|
|
50
50
|
run_conformance_test(File.join(@conformance_dir, 'extract.yml'), :urls) do |description, expected, input|
|
51
51
|
assert_equal expected, extract_urls(input), description
|
52
52
|
expected.each do |expected_url|
|
53
|
-
assert_equal true, valid_url?(expected_url), "expected url [#{expected_url}] not valid"
|
53
|
+
assert_equal true, valid_url?(expected_url, true, false), "expected url [#{expected_url}] not valid"
|
54
54
|
end
|
55
55
|
end
|
56
56
|
end
|
@@ -151,6 +151,12 @@ class ConformanceTest < Test::Unit::TestCase
|
|
151
151
|
end
|
152
152
|
end
|
153
153
|
|
154
|
+
def test_urls_without_protocol_validation_conformance
|
155
|
+
run_conformance_test(File.join(@conformance_dir, 'validate.yml'), :urls_without_protocol) do |description, expected, input|
|
156
|
+
assert_equal expected, valid_url?(input, true, false), description
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
154
160
|
def test_hashtags_validation_conformance
|
155
161
|
run_conformance_test(File.join(@conformance_dir, 'validate.yml'), :hashtags) do |description, expected, input|
|
156
162
|
assert_equal expected, valid_hashtag?(input), description
|
data/twitter-text.gemspec
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
spec = Gem::Specification.new do |s|
|
2
2
|
s.name = "twitter-text"
|
3
|
-
s.version = "1.4.
|
3
|
+
s.version = "1.4.10"
|
4
4
|
s.authors = ["Matt Sanford", "Patrick Ewing", "Ben Cherry", "Britt Selvitelle",
|
5
|
-
"Raffi Krikorian", "J.P. Cummins", "Yoshimasa Niwa"]
|
5
|
+
"Raffi Krikorian", "J.P. Cummins", "Yoshimasa Niwa", "Keita Fujii"]
|
6
6
|
s.email = ["matt@twitter.com", "patrick.henry.ewing@gmail.com", "bcherry@gmail.com", "bs@brittspace.com",
|
7
|
-
"raffi@twitter.com", "jcummins@twitter.com", "niw@niw.at"]
|
7
|
+
"raffi@twitter.com", "jcummins@twitter.com", "niw@niw.at", "keita@twitter.com"]
|
8
8
|
s.homepage = "http://twitter.com"
|
9
9
|
s.description = s.summary = "A gem that provides text handling for Twitter"
|
10
10
|
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twitter-text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
5
|
-
prerelease:
|
4
|
+
hash: 19
|
5
|
+
prerelease:
|
6
6
|
segments:
|
7
7
|
- 1
|
8
8
|
- 4
|
9
|
-
-
|
10
|
-
version: 1.4.
|
9
|
+
- 10
|
10
|
+
version: 1.4.10
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Matt Sanford
|
@@ -17,11 +17,12 @@ authors:
|
|
17
17
|
- Raffi Krikorian
|
18
18
|
- J.P. Cummins
|
19
19
|
- Yoshimasa Niwa
|
20
|
+
- Keita Fujii
|
20
21
|
autorequire:
|
21
22
|
bindir: bin
|
22
23
|
cert_chain: []
|
23
24
|
|
24
|
-
date: 2011-
|
25
|
+
date: 2011-09-20 00:00:00 -07:00
|
25
26
|
default_executable:
|
26
27
|
dependencies:
|
27
28
|
- !ruby/object:Gem::Dependency
|
@@ -103,6 +104,7 @@ email:
|
|
103
104
|
- raffi@twitter.com
|
104
105
|
- jcummins@twitter.com
|
105
106
|
- niw@niw.at
|
107
|
+
- keita@twitter.com
|
106
108
|
executables: []
|
107
109
|
|
108
110
|
extensions: []
|
@@ -171,7 +173,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
171
173
|
requirements: []
|
172
174
|
|
173
175
|
rubyforge_project:
|
174
|
-
rubygems_version: 1.
|
176
|
+
rubygems_version: 1.4.1
|
175
177
|
signing_key:
|
176
178
|
specification_version: 3
|
177
179
|
summary: Twitter text handling library
|