twitter-text 1.4.15 → 1.4.16
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +11 -4
- data/lib/autolink.rb +15 -10
- data/lib/regex.rb +4 -4
- data/spec/autolinking_spec.rb +17 -7
- data/spec/extractor_spec.rb +7 -0
- data/spec/spec_helper.rb +30 -10
- data/twitter-text.gemspec +1 -1
- metadata +4 -4
data/README.rdoc
CHANGED
@@ -55,9 +55,16 @@ numbers but cannot be solely numbers and cannot contain punctuation.
|
|
55
55
|
|
56
56
|
=== URLs
|
57
57
|
|
58
|
-
|
59
|
-
|
60
|
-
|
58
|
+
Asian languages like Chinese, Japanese or Korean may not use a delimiter such as
|
59
|
+
a space to separate normal text from URLs making it difficult to identify where
|
60
|
+
the URL ends and the text starts.
|
61
|
+
|
62
|
+
For this reason twitter-text currently does not support extracting or auto-linking
|
63
|
+
of URLs immediately followed by non-Latin characters.
|
64
|
+
|
65
|
+
Example: "http://twitter.com/は素晴らしい" .
|
66
|
+
The normal text is "は素晴らしい" and is not part of the URL even though
|
67
|
+
it isn't space separated.
|
61
68
|
|
62
69
|
=== International
|
63
70
|
|
@@ -112,4 +119,4 @@ Thanks to everybody who has filed issues, provided feedback or contributed patch
|
|
112
119
|
distributed under the License is distributed on an "AS IS" BASIS,
|
113
120
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
114
121
|
See the License for the specific language governing permissions and
|
115
|
-
limitations under the License.
|
122
|
+
limitations under the License.
|
data/lib/autolink.rb
CHANGED
@@ -20,7 +20,7 @@ module Twitter
|
|
20
20
|
OPTIONS_NOT_ATTRIBUTES = [:url_class, :list_class, :username_class, :hashtag_class,
|
21
21
|
:username_url_base, :list_url_base, :hashtag_url_base,
|
22
22
|
:username_url_block, :list_url_block, :hashtag_url_block, :link_url_block,
|
23
|
-
:suppress_lists, :suppress_no_follow, :url_entities]
|
23
|
+
:username_include_symbol, :suppress_lists, :suppress_no_follow, :url_entities]
|
24
24
|
|
25
25
|
HTML_ENTITIES = {
|
26
26
|
'&' => '&',
|
@@ -47,6 +47,7 @@ module Twitter
|
|
47
47
|
# <tt>:username_url_base</tt>:: the value for <tt>href</tt> attribute on username links. The <tt>@username</tt> (minus the <tt>@</tt>) will be appended at the end of this.
|
48
48
|
# <tt>:list_url_base</tt>:: the value for <tt>href</tt> attribute on list links. The <tt>@username/list</tt> (minus the <tt>@</tt>) will be appended at the end of this.
|
49
49
|
# <tt>:hashtag_url_base</tt>:: the value for <tt>href</tt> attribute on hashtag links. The <tt>#hashtag</tt> (minus the <tt>#</tt>) will be appended at the end of this.
|
50
|
+
# <tt>:username_include_symbol</tt>:: place the <tt>@</tt> symbol within username and list links
|
50
51
|
# <tt>:suppress_lists</tt>:: disable auto-linking to lists
|
51
52
|
# <tt>:suppress_no_follow</tt>:: Do not add <tt>rel="nofollow"</tt> to auto-linked items
|
52
53
|
# <tt>:target</tt>:: add <tt>target="window_name"</tt> to auto-linked items
|
@@ -66,6 +67,7 @@ module Twitter
|
|
66
67
|
# <tt>:list_class</tt>:: class to add to list <tt><a></tt> tags
|
67
68
|
# <tt>:username_class</tt>:: class to add to username <tt><a></tt> tags
|
68
69
|
# <tt>:username_url_base</tt>:: the value for <tt>href</tt> attribute on username links. The <tt>@username</tt> (minus the <tt>@</tt>) will be appended at the end of this.
|
70
|
+
# <tt>:username_include_symbol</tt>:: place the <tt>@</tt> symbol within username and list links
|
69
71
|
# <tt>:list_url_base</tt>:: the value for <tt>href</tt> attribute on list links. The <tt>@username/list</tt> (minus the <tt>@</tt>) will be appended at the end of this.
|
70
72
|
# <tt>:suppress_lists</tt>:: disable auto-linking to lists
|
71
73
|
# <tt>:suppress_no_follow</tt>:: Do not add <tt>rel="nofollow"</tt> to auto-linked items
|
@@ -75,13 +77,16 @@ module Twitter
|
|
75
77
|
options[:url_class] ||= DEFAULT_URL_CLASS
|
76
78
|
options[:list_class] ||= DEFAULT_LIST_CLASS
|
77
79
|
options[:username_class] ||= DEFAULT_USERNAME_CLASS
|
78
|
-
options[:username_url_base] ||= "
|
79
|
-
options[:list_url_base] ||= "
|
80
|
+
options[:username_url_base] ||= "https://twitter.com/"
|
81
|
+
options[:list_url_base] ||= "https://twitter.com/"
|
80
82
|
options[:target] ||= DEFAULT_TARGET
|
81
83
|
|
82
84
|
extra_html = HTML_ATTR_NO_FOLLOW unless options[:suppress_no_follow]
|
83
85
|
|
84
86
|
Twitter::Rewriter.rewrite_usernames_or_lists(text) do |at, username, slash_listname|
|
87
|
+
at_before_user = options[:username_include_symbol] ? at : ''
|
88
|
+
at = options[:username_include_symbol] ? '' : at
|
89
|
+
|
85
90
|
name = "#{username}#{slash_listname}"
|
86
91
|
chunk = block_given? ? yield(name) : name
|
87
92
|
|
@@ -89,16 +94,16 @@ module Twitter
|
|
89
94
|
href = if options[:list_url_block]
|
90
95
|
options[:list_url_block].call(name.downcase)
|
91
96
|
else
|
92
|
-
"#{html_escape(options[:list_url_base]
|
97
|
+
"#{html_escape(options[:list_url_base] + name.downcase)}"
|
93
98
|
end
|
94
|
-
%(#{at}<a class="#{options[:url_class]} #{options[:list_class]}" #{target_tag(options)}href="#{href}"#{extra_html}>#{html_escape(chunk)}</a>)
|
99
|
+
%(#{at}<a class="#{options[:url_class]} #{options[:list_class]}" #{target_tag(options)}href="#{href}"#{extra_html}>#{html_escape(at_before_user + chunk)}</a>)
|
95
100
|
else
|
96
101
|
href = if options[:username_url_block]
|
97
102
|
options[:username_url_block].call(chunk)
|
98
103
|
else
|
99
|
-
"#{html_escape(options[:username_url_base]
|
104
|
+
"#{html_escape(options[:username_url_base] + chunk)}"
|
100
105
|
end
|
101
|
-
%(#{at}<a class="#{options[:url_class]} #{options[:username_class]}" #{target_tag(options)}href="#{href}"#{extra_html}>#{html_escape(chunk)}</a>)
|
106
|
+
%(#{at}<a class="#{options[:url_class]} #{options[:username_class]}" #{target_tag(options)}href="#{href}"#{extra_html}>#{html_escape(at_before_user + chunk)}</a>)
|
102
107
|
end
|
103
108
|
end
|
104
109
|
end
|
@@ -116,7 +121,7 @@ module Twitter
|
|
116
121
|
options = options.dup
|
117
122
|
options[:url_class] ||= DEFAULT_URL_CLASS
|
118
123
|
options[:hashtag_class] ||= DEFAULT_HASHTAG_CLASS
|
119
|
-
options[:hashtag_url_base] ||= "
|
124
|
+
options[:hashtag_url_base] ||= "https://twitter.com/#!/search?q=%23"
|
120
125
|
options[:target] ||= DEFAULT_TARGET
|
121
126
|
extra_html = HTML_ATTR_NO_FOLLOW unless options[:suppress_no_follow]
|
122
127
|
|
@@ -179,10 +184,10 @@ module Twitter
|
|
179
184
|
BOOLEAN_ATTRIBUTES = Set.new([:disabled, :readonly, :multiple, :checked]).freeze
|
180
185
|
|
181
186
|
def html_attrs_for_options(options)
|
182
|
-
|
187
|
+
autolink_html_attrs options.reject{|k, v| OPTIONS_NOT_ATTRIBUTES.include?(k)}
|
183
188
|
end
|
184
189
|
|
185
|
-
def
|
190
|
+
def autolink_html_attrs(options)
|
186
191
|
options.inject("") do |attrs, (key, value)|
|
187
192
|
if BOOLEAN_ATTRIBUTES.include?(key)
|
188
193
|
value = value ? key : nil
|
data/lib/regex.rb
CHANGED
@@ -109,17 +109,17 @@ module Twitter
|
|
109
109
|
REGEXEN[:end_hashtag_match] = /\A(?:[##]|:\/\/)/o
|
110
110
|
|
111
111
|
REGEXEN[:at_signs] = /[@@]/
|
112
|
-
REGEXEN[:extract_mentions] = /(^|[^a-zA-Z0-9_])#{REGEXEN[:at_signs]}([a-zA-Z0-9_]{1,20})/o
|
113
|
-
REGEXEN[:extract_mentions_or_lists] = /(^|[^a-zA-Z0-9_])#{REGEXEN[:at_signs]}([a-zA-Z0-9_]{1,20})(\/[a-zA-Z][a-zA-Z0-9_\-]{0,24})?/o
|
112
|
+
REGEXEN[:extract_mentions] = /(^|[^a-zA-Z0-9_!#\$%&*@@])#{REGEXEN[:at_signs]}([a-zA-Z0-9_]{1,20})/o
|
113
|
+
REGEXEN[:extract_mentions_or_lists] = /(^|[^a-zA-Z0-9_!#\$%&*@@])#{REGEXEN[:at_signs]}([a-zA-Z0-9_]{1,20})(\/[a-zA-Z][a-zA-Z0-9_\-]{0,24})?/o
|
114
114
|
REGEXEN[:extract_reply] = /^(?:#{REGEXEN[:spaces]})*#{REGEXEN[:at_signs]}([a-zA-Z0-9_]{1,20})/o
|
115
115
|
# Used in Extractor and Rewriter for final filtering
|
116
116
|
REGEXEN[:end_screen_name_match] = /\A(?:#{REGEXEN[:at_signs]}|#{REGEXEN[:latin_accents]}|:\/\/)/o
|
117
117
|
|
118
|
-
REGEXEN[:auto_link_usernames_or_lists] = /([^a-zA-Z0-9_]|^|RT:?)([@@]+)([a-zA-Z0-9_]{1,20})(\/[a-zA-Z][a-zA-Z0-9_\-]{0,24})?/o
|
118
|
+
REGEXEN[:auto_link_usernames_or_lists] = /([^a-zA-Z0-9_!#\$%&*@@]|^|RT:?)([@@]+)([a-zA-Z0-9_]{1,20})(\/[a-zA-Z][a-zA-Z0-9_\-]{0,24})?/o
|
119
119
|
REGEXEN[:auto_link_emoticon] = /(8\-\#|8\-E|\+\-\(|\`\@|\`O|\<\|:~\(|\}:o\{|:\-\[|\>o\<|X\-\/|\[:-\]\-I\-|\/\/\/\/Ö\\\\\\\\|\(\|:\|\/\)|∑:\*\)|\( \| \))/
|
120
120
|
|
121
121
|
# URL related hash regex collection
|
122
|
-
REGEXEN[:valid_preceding_chars] = /(?:[^-\/"'!=A-Z0-9_
|
122
|
+
REGEXEN[:valid_preceding_chars] = /(?:[^-\/"'!=A-Z0-9_@@\$##\.#{INVALID_CHARACTERS.join('')}]|^)/io
|
123
123
|
|
124
124
|
DOMAIN_VALID_CHARS = "[^[:punct:][:space:][:blank:][:cntrl:]#{INVALID_CHARACTERS.join('')}#{UNICODE_SPACES.join('')}]"
|
125
125
|
REGEXEN[:valid_subdomain] = /(?:(?:#{DOMAIN_VALID_CHARS}(?:[_-]|#{DOMAIN_VALID_CHARS})*)?#{DOMAIN_VALID_CHARS}\.)/io
|
data/spec/autolinking_spec.rb
CHANGED
@@ -298,7 +298,7 @@ describe Twitter::Autolink do
|
|
298
298
|
it "should be linked" do
|
299
299
|
link = Nokogiri::HTML(@autolinked_text).search('a')
|
300
300
|
(link.inner_text.respond_to?(:force_encoding) ? link.inner_text.force_encoding("utf-8") : link.inner_text).should == "#{[0xFF03].pack('U')}twj_dev"
|
301
|
-
link.first['href'].should == '
|
301
|
+
link.first['href'].should == 'https://twitter.com/#!/search?q=%23twj_dev'
|
302
302
|
end
|
303
303
|
end
|
304
304
|
|
@@ -309,7 +309,7 @@ describe Twitter::Autolink do
|
|
309
309
|
end
|
310
310
|
|
311
311
|
it "should be linked" do
|
312
|
-
@autolinked_text.should == "<a href=\"
|
312
|
+
@autolinked_text.should == "<a href=\"https://twitter.com/#!/search?q=%23éhashtag\" title=\"#éhashtag\" class=\"tweet-url hashtag\" rel=\"nofollow\">#éhashtag</a>"
|
313
313
|
end
|
314
314
|
end
|
315
315
|
|
@@ -530,13 +530,13 @@ describe Twitter::Autolink do
|
|
530
530
|
end
|
531
531
|
|
532
532
|
it "should allow url/hashtag overlap" do
|
533
|
-
auto_linked = @linker.auto_link("
|
534
|
-
auto_linked.should have_autolinked_url('
|
533
|
+
auto_linked = @linker.auto_link("https://twitter.com/#search")
|
534
|
+
auto_linked.should have_autolinked_url('https://twitter.com/#search')
|
535
535
|
end
|
536
536
|
|
537
537
|
it "should not add invalid option in HTML tags" do
|
538
|
-
auto_linked = @linker.auto_link("
|
539
|
-
auto_linked.should have_autolinked_url('
|
538
|
+
auto_linked = @linker.auto_link("https://twitter.com/ is a URL, not a hashtag", :hashtag_class => 'hashtag_classname')
|
539
|
+
auto_linked.should have_autolinked_url('https://twitter.com/')
|
540
540
|
auto_linked.should_not include('hashtag_class')
|
541
541
|
auto_linked.should_not include('hashtag_classname')
|
542
542
|
end
|
@@ -558,6 +558,16 @@ describe Twitter::Autolink do
|
|
558
558
|
linked.should match(/nofollow/)
|
559
559
|
end
|
560
560
|
|
561
|
+
it "should include the '@' symbol in a username when passed :username_include_symbol" do
|
562
|
+
linked = TestAutolink.new.auto_link("@user", :username_include_symbol => true)
|
563
|
+
linked.should link_to_screen_name('user', '@user')
|
564
|
+
end
|
565
|
+
|
566
|
+
it "should include the '@' symbol in a list when passed :username_include_symbol" do
|
567
|
+
linked = TestAutolink.new.auto_link("@user/list", :username_include_symbol => true)
|
568
|
+
linked.should link_to_list_path('user/list', '@user/list')
|
569
|
+
end
|
570
|
+
|
561
571
|
it "should not add rel=nofollow when passed :suppress_no_follow" do
|
562
572
|
linked = TestAutolink.new.auto_link("http://example.com/", :suppress_no_follow => true)
|
563
573
|
linked.should have_autolinked_url('http://example.com/')
|
@@ -610,7 +620,7 @@ describe Twitter::Autolink do
|
|
610
620
|
@linker.html_escape("&<>\"").should == "&<>""
|
611
621
|
@linker.html_escape("<div>").should == "<div>"
|
612
622
|
@linker.html_escape("a&b").should == "a&b"
|
613
|
-
@linker.html_escape("<a href=\"
|
623
|
+
@linker.html_escape("<a href=\"https://twitter.com\" target=\"_blank\">twitter & friends</a>").should == "<a href="https://twitter.com" target="_blank">twitter & friends</a>"
|
614
624
|
@linker.html_escape("&").should == "&amp;"
|
615
625
|
@linker.html_escape(nil).should == nil
|
616
626
|
end
|
data/spec/extractor_spec.rb
CHANGED
@@ -39,6 +39,13 @@ describe Twitter::Extractor do
|
|
39
39
|
it "should be linked in Japanese text" do
|
40
40
|
@extractor.extract_mentioned_screen_names("の@aliceに到着を待っている").should == ["alice"]
|
41
41
|
end
|
42
|
+
|
43
|
+
it "should ignore mentions preceded by !, @, #, $, %, & or *" do
|
44
|
+
invalid_chars = ['!', '@', '#', '$', '%', '&', '*']
|
45
|
+
invalid_chars.each do |c|
|
46
|
+
@extractor.extract_mentioned_screen_names("f#{c}@kn").should == []
|
47
|
+
end
|
48
|
+
end
|
42
49
|
end
|
43
50
|
|
44
51
|
it "should accept a block arugment and call it in order" do
|
data/spec/spec_helper.rb
CHANGED
@@ -51,18 +51,26 @@ RSpec::Matchers.define :have_autolinked_url do |url, inner_text|
|
|
51
51
|
end
|
52
52
|
end
|
53
53
|
|
54
|
-
RSpec::Matchers.define :link_to_screen_name do |screen_name|
|
54
|
+
RSpec::Matchers.define :link_to_screen_name do |screen_name, inner_text|
|
55
|
+
expected = inner_text ? inner_text : screen_name
|
56
|
+
|
55
57
|
match do |text|
|
56
58
|
@link = Nokogiri::HTML(text).search("a.username")
|
57
|
-
@link &&
|
59
|
+
@link &&
|
60
|
+
@link.inner_text == expected &&
|
61
|
+
"https://twitter.com/#{screen_name}".downcase.should == @link.first['href']
|
58
62
|
end
|
59
63
|
|
60
64
|
failure_message_for_should do |text|
|
61
|
-
|
65
|
+
if @link.first
|
66
|
+
"Expected link '#{@link.inner_text}' with href '#{@link.first['href']}' to match screen_name '#{expected}', but it does not."
|
67
|
+
else
|
68
|
+
"Expected screen name '#{screen_name}' to be autolinked in '#{text}', but no link was found."
|
69
|
+
end
|
62
70
|
end
|
63
71
|
|
64
72
|
failure_message_for_should_not do |text|
|
65
|
-
"
|
73
|
+
"Expected link '#{@link.inner_text}' with href '#{@link.first['href']}' not to match screen_name '#{expected}', but it does."
|
66
74
|
end
|
67
75
|
|
68
76
|
description do
|
@@ -70,18 +78,26 @@ RSpec::Matchers.define :link_to_screen_name do |screen_name|
|
|
70
78
|
end
|
71
79
|
end
|
72
80
|
|
73
|
-
RSpec::Matchers.define :link_to_list_path do |list_path|
|
81
|
+
RSpec::Matchers.define :link_to_list_path do |list_path, inner_text|
|
82
|
+
expected = inner_text ? inner_text : list_path
|
83
|
+
|
74
84
|
match do |text|
|
75
85
|
@link = Nokogiri::HTML(text).search("a.list-slug")
|
76
|
-
|
86
|
+
@link &&
|
87
|
+
@link.inner_text == expected &&
|
88
|
+
"https://twitter.com/#{list_path}".downcase.should == @link.first['href']
|
77
89
|
end
|
78
90
|
|
79
91
|
failure_message_for_should do |text|
|
80
|
-
|
92
|
+
if @link.first
|
93
|
+
"Expected link '#{@link.inner_text}' with href '#{@link.first['href']}' to match the list path '#{expected}', but it does not."
|
94
|
+
else
|
95
|
+
"Expected list path '#{list_path}' to be autolinked in '#{text}', but no link was found."
|
96
|
+
end
|
81
97
|
end
|
82
98
|
|
83
99
|
failure_message_for_should_not do |text|
|
84
|
-
"
|
100
|
+
"Expected link '#{@link.inner_text}' with href '#{@link.first['href']}' not to match the list path '#{expected}', but it does."
|
85
101
|
end
|
86
102
|
|
87
103
|
description do
|
@@ -91,17 +107,21 @@ end
|
|
91
107
|
|
92
108
|
RSpec::Matchers.define :have_autolinked_hashtag do |hashtag|
|
93
109
|
match do |text|
|
94
|
-
@link = Nokogiri::HTML(text).search("a[@href='
|
110
|
+
@link = Nokogiri::HTML(text).search("a[@href='https://twitter.com/#!/search?q=#{hashtag.sub(/^#/, '%23')}']")
|
95
111
|
@link &&
|
96
112
|
@link.inner_text &&
|
97
113
|
@link.inner_text == hashtag
|
98
114
|
end
|
99
115
|
|
100
116
|
failure_message_for_should do |text|
|
101
|
-
if @link
|
117
|
+
if @link.first
|
102
118
|
"Expected link text to be [#{hashtag}], but it was [#{@link.inner_text}] in #{text}"
|
103
119
|
else
|
104
120
|
"Expected hashtag #{hashtag} to be autolinked in '#{text}', but no link was found."
|
105
121
|
end
|
106
122
|
end
|
123
|
+
|
124
|
+
failure_message_for_should_not do |text|
|
125
|
+
"Expected link '#{@link.inner_text}' with href '#{@link.first['href']}' not to match the hashtag '#{hashtag}', but it does."
|
126
|
+
end
|
107
127
|
end
|
data/twitter-text.gemspec
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = "twitter-text"
|
5
|
-
s.version = "1.4.
|
5
|
+
s.version = "1.4.16"
|
6
6
|
s.authors = ["Matt Sanford", "Patrick Ewing", "Ben Cherry", "Britt Selvitelle",
|
7
7
|
"Raffi Krikorian", "J.P. Cummins", "Yoshimasa Niwa", "Keita Fujii"]
|
8
8
|
s.email = ["matt@twitter.com", "patrick.henry.ewing@gmail.com", "bcherry@gmail.com", "bs@brittspace.com",
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twitter-text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 39
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 1
|
8
8
|
- 4
|
9
|
-
-
|
10
|
-
version: 1.4.
|
9
|
+
- 16
|
10
|
+
version: 1.4.16
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Matt Sanford
|
@@ -22,7 +22,7 @@ autorequire:
|
|
22
22
|
bindir: bin
|
23
23
|
cert_chain: []
|
24
24
|
|
25
|
-
date:
|
25
|
+
date: 2012-02-07 00:00:00 -08:00
|
26
26
|
default_executable:
|
27
27
|
dependencies:
|
28
28
|
- !ruby/object:Gem::Dependency
|