twitter-text 1.0 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +29 -5
- data/lib/autolink.rb +28 -11
- data/lib/regex.rb +6 -6
- data/spec/autolinking_spec.rb +10 -2
- data/spec/extractor_spec.rb +0 -2
- data/spec/regex_spec.rb +14 -4
- metadata +3 -3
data/Rakefile
CHANGED
@@ -5,11 +5,12 @@ require 'rubygems/specification'
|
|
5
5
|
require 'date'
|
6
6
|
require 'spec/rake/spectask'
|
7
7
|
require 'spec/rake/verify_rcov'
|
8
|
+
require 'digest'
|
8
9
|
|
9
10
|
|
10
11
|
spec = Gem::Specification.new do |s|
|
11
12
|
s.name = "twitter-text"
|
12
|
-
s.version = "1.0"
|
13
|
+
s.version = "1.0.1"
|
13
14
|
s.author = "Matt Sanford"
|
14
15
|
s.email = "matt@twitter.com"
|
15
16
|
s.homepage = "http://twitter.com"
|
@@ -19,7 +20,7 @@ spec = Gem::Specification.new do |s|
|
|
19
20
|
s.has_rdoc = true
|
20
21
|
s.summary = "Twitter text handling library"
|
21
22
|
|
22
|
-
s.add_dependency "
|
23
|
+
s.add_dependency "actionpack"
|
23
24
|
|
24
25
|
s.require_path = 'lib'
|
25
26
|
s.autorequire = ''
|
@@ -41,16 +42,39 @@ Spec::Rake::SpecTask.new('spec:rcov') do |t|
|
|
41
42
|
t.rcov_opts = ['--exclude', 'spec']
|
42
43
|
end
|
43
44
|
|
45
|
+
|
46
|
+
def conformance_version(dir)
|
47
|
+
Dir[File.join(dir, '*')].inject(Digest::SHA1.new){|digest, file| digest.update(Digest::SHA1.file(file).hexdigest) }
|
48
|
+
end
|
49
|
+
|
44
50
|
namespace :test do
|
45
51
|
namespace :conformance do
|
52
|
+
|
53
|
+
|
46
54
|
desc "Update conformance testing data"
|
47
55
|
task :update do
|
48
|
-
dir = File.join(File.dirname(__FILE__), "test", "twitter-text-conformance")
|
49
56
|
puts "Updating conformance data ... "
|
50
|
-
system("
|
57
|
+
system("git submodule init") || raise("Failed to init submodule")
|
58
|
+
system("git submodule update") || raise("Failed to update submodule")
|
51
59
|
puts "Updating conformance data ... DONE"
|
52
60
|
end
|
53
61
|
|
62
|
+
desc "Change conformance test data to the lastest version"
|
63
|
+
task :latest => ['conformance:update'] do
|
64
|
+
current_dir = File.dirname(__FILE__)
|
65
|
+
submodule_dir = File.join(File.dirname(__FILE__), "test", "twitter-text-conformance")
|
66
|
+
version_before = conformance_version(submodule_dir)
|
67
|
+
system("cd #{submodule_dir} && git pull origin master") || raise("Failed to pull submodule version")
|
68
|
+
system("cd #{current_dir}")
|
69
|
+
if conformance_version(submodule_dir) != version_before
|
70
|
+
system("cd #{current_dir} && git add #{submodule_dir}") || raise("Failed to add upgrade files")
|
71
|
+
system("git commit -m \"Upgraded to the latest conformance suite\" #{submodule_dir}") || raise("Failed to commit upgraded conformacne data")
|
72
|
+
puts "Upgraded conformance suite."
|
73
|
+
else
|
74
|
+
puts "No conformance suite changes."
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
54
78
|
desc "Run conformance test suite"
|
55
79
|
task :run do
|
56
80
|
ruby "test/conformance_test.rb"
|
@@ -58,7 +82,7 @@ namespace :test do
|
|
58
82
|
end
|
59
83
|
|
60
84
|
desc "Run conformance test suite"
|
61
|
-
task :conformance => ['conformance:
|
85
|
+
task :conformance => ['conformance:latest', 'conformance:run'] do
|
62
86
|
end
|
63
87
|
end
|
64
88
|
|
data/lib/autolink.rb
CHANGED
@@ -5,6 +5,8 @@ module Twitter
|
|
5
5
|
module Autolink
|
6
6
|
include ActionView::Helpers::TagHelper #tag_options needed by auto_link
|
7
7
|
|
8
|
+
WWW_REGEX = /www\./i #:nodoc:
|
9
|
+
|
8
10
|
# Default CSS class for auto-linked URLs
|
9
11
|
DEFAULT_URL_CLASS = "tweet-url"
|
10
12
|
# Default CSS class for auto-linked lists (along with the url class)
|
@@ -13,6 +15,8 @@ module Twitter
|
|
13
15
|
DEFAULT_USERNAME_CLASS = "username"
|
14
16
|
# Default CSS class for auto-linked hashtags (along with the url class)
|
15
17
|
DEFAULT_HASHTAG_CLASS = "hashtag"
|
18
|
+
# HTML attribute for robot nofollow behavior (default)
|
19
|
+
HTML_ATTR_NO_FOLLOW = " rel=\"nofollow\""
|
16
20
|
|
17
21
|
# Add <tt><a></a></tt> tags around the usernames, lists, hashtags and URLs in the provided <tt>text</tt>. The
|
18
22
|
# <tt><a></tt> tags can be controlled with the following entries in the <tt>options</tt>
|
@@ -25,9 +29,14 @@ module Twitter
|
|
25
29
|
# <tt>:username_url_base</tt>:: the value for <tt>href</tt> attribute on username links. The <tt>@username</tt> (minus the <tt>@</tt>) will be appended at the end of this.
|
26
30
|
# <tt>:list_url_base</tt>:: the value for <tt>href</tt> attribute on list links. The <tt>@username/list</tt> (minus the <tt>@</tt>) will be appended at the end of this.
|
27
31
|
# <tt>:hashtag_url_base</tt>:: the value for <tt>href</tt> attribute on hashtag links. The <tt>#hashtag</tt> (minus the <tt>#</tt>) will be appended at the end of this.
|
28
|
-
|
29
|
-
|
30
|
-
|
32
|
+
# <tt>:suppress_lists</tt>:: disable auto-linking to lists
|
33
|
+
# <tt>:suppress_no_follow</tt>:: Do not add <tt>rel="nofollow"</tt> to auto-linked items
|
34
|
+
def auto_link(text, options = {})
|
35
|
+
auto_link_usernames_or_lists(
|
36
|
+
auto_link_urls_custom(
|
37
|
+
auto_link_hashtags(text, options),
|
38
|
+
options),
|
39
|
+
options)
|
31
40
|
end
|
32
41
|
|
33
42
|
# Add <tt><a></a></tt> tags around the usernames and lists in the provided <tt>text</tt>. The
|
@@ -39,6 +48,8 @@ module Twitter
|
|
39
48
|
# <tt>:username_class</tt>:: class to add to username <tt><a></tt> tags
|
40
49
|
# <tt>:username_url_base</tt>:: the value for <tt>href</tt> attribute on username links. The <tt>@username</tt> (minus the <tt>@</tt>) will be appended at the end of this.
|
41
50
|
# <tt>:list_url_base</tt>:: the value for <tt>href</tt> attribute on list links. The <tt>@username/list</tt> (minus the <tt>@</tt>) will be appended at the end of this.
|
51
|
+
# <tt>:suppress_lists</tt>:: disable auto-linking to lists
|
52
|
+
# <tt>:suppress_no_follow</tt>:: Do not add <tt>rel="nofollow"</tt> to auto-linked items
|
42
53
|
def auto_link_usernames_or_lists(text, options = {}) # :yields: list_or_username
|
43
54
|
options = options.dup
|
44
55
|
options[:url_class] ||= DEFAULT_URL_CLASS
|
@@ -46,18 +57,19 @@ module Twitter
|
|
46
57
|
options[:username_class] ||= DEFAULT_USERNAME_CLASS
|
47
58
|
options[:username_url_base] ||= "http://twitter.com/"
|
48
59
|
options[:list_url_base] ||= "http://twitter.com/"
|
60
|
+
extra_html = HTML_ATTR_NO_FOLLOW unless options[:suppress_no_follow]
|
49
61
|
|
50
62
|
text.gsub(Twitter::Regex[:auto_link_usernames_or_lists]) do
|
51
63
|
if $4 && !options[:suppress_lists]
|
52
64
|
# the link is a list
|
53
65
|
text = list = "#{$3}#{$4}"
|
54
66
|
text = yield(list) if block_given?
|
55
|
-
"#{$1}#{$2}<a class=\"#{options[:url_class]} #{options[:list_class]}\" href=\"#{options[:list_url_base]}#{list.downcase}\">#{text}</a>"
|
67
|
+
"#{$1}#{$2}<a class=\"#{options[:url_class]} #{options[:list_class]}\" href=\"#{options[:list_url_base]}#{list.downcase}\"#{extra_html}>#{text}</a>"
|
56
68
|
else
|
57
69
|
# this is a screen name
|
58
70
|
text = $3
|
59
71
|
text = yield(text) if block_given?
|
60
|
-
"#{$1}#{$2}<a class=\"#{options[:url_class]} #{options[:username_class]}\" href=\"#{options[:username_url_base]}#{
|
72
|
+
"#{$1}#{$2}<a class=\"#{options[:url_class]} #{options[:username_class]}\" href=\"#{options[:username_url_base]}#{text}\"#{extra_html}>#{text}</a>"
|
61
73
|
end
|
62
74
|
end
|
63
75
|
end
|
@@ -69,31 +81,36 @@ module Twitter
|
|
69
81
|
# <tt>:url_class</tt>:: class to add to all <tt><a></tt> tags
|
70
82
|
# <tt>:hashtag_class</tt>:: class to add to hashtag <tt><a></tt> tags
|
71
83
|
# <tt>:hashtag_url_base</tt>:: the value for <tt>href</tt> attribute. The hashtag text (minus the <tt>#</tt>) will be appended at the end of this.
|
72
|
-
#
|
84
|
+
# <tt>:suppress_no_follow</tt>:: Do not add <tt>rel="nofollow"</tt> to auto-linked items
|
73
85
|
def auto_link_hashtags(text, options = {}) # :yields: hashtag_text
|
74
86
|
options = options.dup
|
75
87
|
options[:url_class] ||= DEFAULT_URL_CLASS
|
76
88
|
options[:hashtag_class] ||= DEFAULT_HASHTAG_CLASS
|
77
89
|
options[:hashtag_url_base] ||= "http://twitter.com/search?q=%23"
|
90
|
+
extra_html = HTML_ATTR_NO_FOLLOW unless options[:suppress_no_follow]
|
78
91
|
|
79
92
|
text.gsub(Twitter::Regex[:auto_link_hashtags]) do
|
80
93
|
before = $1
|
81
94
|
hash = $2
|
82
95
|
text = $3
|
83
96
|
text = yield(text) if block_given?
|
84
|
-
"#{before}<a href=\"#{options[:hashtag_url_base]}#{text}\" title=\"##{text}\" class=\"#{options[:url_class]} #{options[:hashtag_class]}\">#{hash}#{text}</a>"
|
97
|
+
"#{before}<a href=\"#{options[:hashtag_url_base]}#{text}\" title=\"##{text}\" class=\"#{options[:url_class]} #{options[:hashtag_class]}\"#{extra_html}>#{hash}#{text}</a>"
|
85
98
|
end
|
86
99
|
end
|
87
100
|
|
88
101
|
# Add <tt><a></a></tt> tags around the URLs in the provided <tt>text</tt>. Any
|
89
102
|
# elements in the <tt>href_options</tt> hash will be converted to HTML attributes
|
90
|
-
# and place in the <tt><a></tt> tag.
|
103
|
+
# and place in the <tt><a></tt> tag. Unless <tt>href_options</tt> contains <tt>:suppress_no_follow</tt>
|
104
|
+
# the <tt>rel="nofollow"</tt> attribute will be added.
|
91
105
|
def auto_link_urls_custom(text, href_options = {})
|
106
|
+
options = href_options.dup
|
107
|
+
options[:rel] = "nofollow" unless options.delete(:suppress_no_follow)
|
108
|
+
|
92
109
|
text.gsub(Twitter::Regex[:valid_url]) do
|
93
110
|
all, before, url, protocol = $1, $2, $3, $4
|
94
|
-
|
95
|
-
full_url = (protocol
|
96
|
-
"#{before}<a href=\"#{full_url}\"#{
|
111
|
+
html_attrs = tag_options(options.stringify_keys) || ""
|
112
|
+
full_url = (protocol =~ WWW_REGEX ? "http://#{url}" : url)
|
113
|
+
"#{before}<a href=\"#{full_url}\"#{html_attrs}>#{url}</a>"
|
97
114
|
end
|
98
115
|
end
|
99
116
|
|
data/lib/regex.rb
CHANGED
@@ -26,7 +26,7 @@ module Twitter
|
|
26
26
|
].flatten.freeze
|
27
27
|
REGEXEN[:spaces] = Regexp.new(UNICODE_SPACES.collect{ |e| [e].pack 'U*' }.join('|'))
|
28
28
|
|
29
|
-
REGEXEN[:extract_mentions] = /(^|[^a-zA-Z0-9_])[@@]([a-zA-Z0-9_]{1,20})/
|
29
|
+
REGEXEN[:extract_mentions] = /(^|[^a-zA-Z0-9_])[@@]([a-zA-Z0-9_]{1,20})(?!@)/
|
30
30
|
REGEXEN[:extract_reply] = /^(?:#{REGEXEN[:spaces]})*[@@]([a-zA-Z0-9_]{1,20})/o
|
31
31
|
|
32
32
|
REGEXEN[:list_name] = /^[a-zA-Z\x80-\xff].{0,79}$/
|
@@ -42,14 +42,14 @@ module Twitter
|
|
42
42
|
REGEXEN[:auto_link_emoticon] = /(8\-\#|8\-E|\+\-\(|\`\@|\`O|\<\|:~\(|\}:o\{|:\-\[|\>o\<|X\-\/|\[:-\]\-I\-|\/\/\/\/Ö\\\\\\\\|\(\|:\|\/\)|∑:\*\)|\( \| \))/
|
43
43
|
|
44
44
|
# URL related hash regex collection
|
45
|
-
REGEXEN[:valid_preceeding_chars] = /(?:[^\/"':!=]
|
46
|
-
REGEXEN[:valid_domain] = /[
|
47
|
-
REGEXEN[:valid_url_path_chars] = /[a-z0-9!\*'\(\)
|
45
|
+
REGEXEN[:valid_preceeding_chars] = /(?:[^\/"':!=]|^|\:)/
|
46
|
+
REGEXEN[:valid_domain] = /(?:[\.-]|[^[:punct:]])+\.[a-z]{2,}(?::[0-9]+)?/i
|
47
|
+
REGEXEN[:valid_url_path_chars] = /[a-z0-9!\*'\(\);:&=\+\$\/%#\[\]\-_\.,~]/i
|
48
48
|
# Valid end-of-path chracters (so /foo. does not gobble the period).
|
49
49
|
# 1. Allow ) for Wikipedia URLs.
|
50
50
|
# 2. Allow =&# for empty URL parameters and other URL-join artifacts
|
51
51
|
REGEXEN[:valid_url_path_ending_chars] = /[a-z0-9\)=#\/]/i
|
52
|
-
REGEXEN[:valid_url_query_chars] = /[a-z0-9!\*'\(\)
|
52
|
+
REGEXEN[:valid_url_query_chars] = /[a-z0-9!\*'\(\);:&=\+\$\/%#\[\]\-_\.,~]/i
|
53
53
|
REGEXEN[:valid_url_query_ending_chars] = /[a-z0-9_&=#]/i
|
54
54
|
REGEXEN[:valid_url] = %r{
|
55
55
|
( # $1 total match
|
@@ -71,4 +71,4 @@ module Twitter
|
|
71
71
|
REGEXEN[key]
|
72
72
|
end
|
73
73
|
end
|
74
|
-
end
|
74
|
+
end
|
data/spec/autolinking_spec.rb
CHANGED
@@ -346,6 +346,14 @@ describe Twitter::Autolink do
|
|
346
346
|
end
|
347
347
|
end
|
348
348
|
|
349
|
+
context "when preceded by a :" do
|
350
|
+
def original_text; "Check this out @hoverbird:#{url}"; end
|
351
|
+
|
352
|
+
it "should be linked" do
|
353
|
+
@autolinked_text.should have_autolinked_url(url)
|
354
|
+
end
|
355
|
+
end
|
356
|
+
|
349
357
|
context "with a URL ending in allowed punctuation" do
|
350
358
|
it "does not consume ending punctuation" do
|
351
359
|
matcher = TestAutolink.new
|
@@ -358,7 +366,7 @@ describe Twitter::Autolink do
|
|
358
366
|
context "with a URL preceded in forbidden characters" do
|
359
367
|
it "should not be linked" do
|
360
368
|
matcher = TestAutolink.new
|
361
|
-
%w| \ ' /
|
369
|
+
%w| \ ' / ! = |.each do |char|
|
362
370
|
matcher.auto_link("#{char}#{url}").should_not have_autolinked_url(url)
|
363
371
|
end
|
364
372
|
end
|
@@ -424,4 +432,4 @@ describe Twitter::Autolink do
|
|
424
432
|
end
|
425
433
|
end
|
426
434
|
|
427
|
-
end
|
435
|
+
end
|
data/spec/extractor_spec.rb
CHANGED
@@ -108,8 +108,6 @@ describe Twitter::Extractor do
|
|
108
108
|
"http://www.boingboing.net/2007/02/14/katamari_damacy_phon.html",
|
109
109
|
"http://somehost.com:3000",
|
110
110
|
"http://x.com/~matthew+%-x",
|
111
|
-
"http://x.com/~matthew+%-,.;@:x",
|
112
|
-
"http://x.com/,.;@:x",
|
113
111
|
"http://en.wikipedia.org/wiki/Primer_(film)",
|
114
112
|
"http://www.ams.org/bookstore-getitem/item=mbk-59",
|
115
113
|
"http://chilp.it/?77e8fd",
|
data/spec/regex_spec.rb
CHANGED
@@ -11,11 +11,15 @@ describe "Twitter::Regex regular expressions" do
|
|
11
11
|
"http://www.boingboing.net/2007/02/14/katamari_damacy_phon.html",
|
12
12
|
"http://somehost.com:3000",
|
13
13
|
"http://x.com/~matthew+%-x",
|
14
|
-
"http://x.com/~matthew+%-,.;@:x",
|
15
|
-
"http://x.com/,.;@:x",
|
16
14
|
"http://en.wikipedia.org/wiki/Primer_(film)",
|
17
15
|
"http://www.ams.org/bookstore-getitem/item=mbk-59",
|
18
16
|
"http://chilp.it/?77e8fd",
|
17
|
+
"www.foobar.com",
|
18
|
+
"WWW.FOOBAR.COM",
|
19
|
+
"http://tell.me/why",
|
20
|
+
"http://longtlds.mobi",
|
21
|
+
"http://✪df.ws/ejp",
|
22
|
+
"http://日本.com"
|
19
23
|
]
|
20
24
|
|
21
25
|
@urls.each do |url|
|
@@ -31,14 +35,20 @@ describe "Twitter::Regex regular expressions" do
|
|
31
35
|
end
|
32
36
|
|
33
37
|
describe "invalid URLS" do
|
34
|
-
|
38
|
+
it "does not link urls with invalid characters" do
|
35
39
|
[ "http://doman-dash_2314352345_dfasd.foo-cow_4352.com",
|
36
40
|
"http://no-tld",
|
37
41
|
"http://tld-too-short.x",
|
38
42
|
"http://x.com/,,,/.../@@@/;;;/:::/---/%%%x",
|
39
|
-
"http://
|
43
|
+
"http://doman_dash_2314352345_dfasd.foo-cow_4352.com",
|
40
44
|
].each {|url| url.should_not have_autolinked_url(url)}
|
41
45
|
end
|
46
|
+
|
47
|
+
it "does not link domains beginning with a hypen" do
|
48
|
+
pending
|
49
|
+
"http://-doman_dash_2314352345_dfasd.com".should_not match_autolink_expression
|
50
|
+
end
|
51
|
+
|
42
52
|
end
|
43
53
|
|
44
54
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twitter-text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 1.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Matt Sanford
|
@@ -9,11 +9,11 @@ autorequire: ""
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-
|
12
|
+
date: 2010-02-10 00:00:00 -08:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
|
-
name:
|
16
|
+
name: actionpack
|
17
17
|
type: :runtime
|
18
18
|
version_requirement:
|
19
19
|
version_requirements: !ruby/object:Gem::Requirement
|