twitter-text 1.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +29 -5
- data/lib/autolink.rb +28 -11
- data/lib/regex.rb +6 -6
- data/spec/autolinking_spec.rb +10 -2
- data/spec/extractor_spec.rb +0 -2
- data/spec/regex_spec.rb +14 -4
- metadata +3 -3
data/Rakefile
CHANGED
@@ -5,11 +5,12 @@ require 'rubygems/specification'
|
|
5
5
|
require 'date'
|
6
6
|
require 'spec/rake/spectask'
|
7
7
|
require 'spec/rake/verify_rcov'
|
8
|
+
require 'digest'
|
8
9
|
|
9
10
|
|
10
11
|
spec = Gem::Specification.new do |s|
|
11
12
|
s.name = "twitter-text"
|
12
|
-
s.version = "1.0"
|
13
|
+
s.version = "1.0.1"
|
13
14
|
s.author = "Matt Sanford"
|
14
15
|
s.email = "matt@twitter.com"
|
15
16
|
s.homepage = "http://twitter.com"
|
@@ -19,7 +20,7 @@ spec = Gem::Specification.new do |s|
|
|
19
20
|
s.has_rdoc = true
|
20
21
|
s.summary = "Twitter text handling library"
|
21
22
|
|
22
|
-
s.add_dependency "
|
23
|
+
s.add_dependency "actionpack"
|
23
24
|
|
24
25
|
s.require_path = 'lib'
|
25
26
|
s.autorequire = ''
|
@@ -41,16 +42,39 @@ Spec::Rake::SpecTask.new('spec:rcov') do |t|
|
|
41
42
|
t.rcov_opts = ['--exclude', 'spec']
|
42
43
|
end
|
43
44
|
|
45
|
+
|
46
|
+
def conformance_version(dir)
|
47
|
+
Dir[File.join(dir, '*')].inject(Digest::SHA1.new){|digest, file| digest.update(Digest::SHA1.file(file).hexdigest) }
|
48
|
+
end
|
49
|
+
|
44
50
|
namespace :test do
|
45
51
|
namespace :conformance do
|
52
|
+
|
53
|
+
|
46
54
|
desc "Update conformance testing data"
|
47
55
|
task :update do
|
48
|
-
dir = File.join(File.dirname(__FILE__), "test", "twitter-text-conformance")
|
49
56
|
puts "Updating conformance data ... "
|
50
|
-
system("
|
57
|
+
system("git submodule init") || raise("Failed to init submodule")
|
58
|
+
system("git submodule update") || raise("Failed to update submodule")
|
51
59
|
puts "Updating conformance data ... DONE"
|
52
60
|
end
|
53
61
|
|
62
|
+
desc "Change conformance test data to the lastest version"
|
63
|
+
task :latest => ['conformance:update'] do
|
64
|
+
current_dir = File.dirname(__FILE__)
|
65
|
+
submodule_dir = File.join(File.dirname(__FILE__), "test", "twitter-text-conformance")
|
66
|
+
version_before = conformance_version(submodule_dir)
|
67
|
+
system("cd #{submodule_dir} && git pull origin master") || raise("Failed to pull submodule version")
|
68
|
+
system("cd #{current_dir}")
|
69
|
+
if conformance_version(submodule_dir) != version_before
|
70
|
+
system("cd #{current_dir} && git add #{submodule_dir}") || raise("Failed to add upgrade files")
|
71
|
+
system("git commit -m \"Upgraded to the latest conformance suite\" #{submodule_dir}") || raise("Failed to commit upgraded conformacne data")
|
72
|
+
puts "Upgraded conformance suite."
|
73
|
+
else
|
74
|
+
puts "No conformance suite changes."
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
54
78
|
desc "Run conformance test suite"
|
55
79
|
task :run do
|
56
80
|
ruby "test/conformance_test.rb"
|
@@ -58,7 +82,7 @@ namespace :test do
|
|
58
82
|
end
|
59
83
|
|
60
84
|
desc "Run conformance test suite"
|
61
|
-
task :conformance => ['conformance:
|
85
|
+
task :conformance => ['conformance:latest', 'conformance:run'] do
|
62
86
|
end
|
63
87
|
end
|
64
88
|
|
data/lib/autolink.rb
CHANGED
@@ -5,6 +5,8 @@ module Twitter
|
|
5
5
|
module Autolink
|
6
6
|
include ActionView::Helpers::TagHelper #tag_options needed by auto_link
|
7
7
|
|
8
|
+
WWW_REGEX = /www\./i #:nodoc:
|
9
|
+
|
8
10
|
# Default CSS class for auto-linked URLs
|
9
11
|
DEFAULT_URL_CLASS = "tweet-url"
|
10
12
|
# Default CSS class for auto-linked lists (along with the url class)
|
@@ -13,6 +15,8 @@ module Twitter
|
|
13
15
|
DEFAULT_USERNAME_CLASS = "username"
|
14
16
|
# Default CSS class for auto-linked hashtags (along with the url class)
|
15
17
|
DEFAULT_HASHTAG_CLASS = "hashtag"
|
18
|
+
# HTML attribute for robot nofollow behavior (default)
|
19
|
+
HTML_ATTR_NO_FOLLOW = " rel=\"nofollow\""
|
16
20
|
|
17
21
|
# Add <tt><a></a></tt> tags around the usernames, lists, hashtags and URLs in the provided <tt>text</tt>. The
|
18
22
|
# <tt><a></tt> tags can be controlled with the following entries in the <tt>options</tt>
|
@@ -25,9 +29,14 @@ module Twitter
|
|
25
29
|
# <tt>:username_url_base</tt>:: the value for <tt>href</tt> attribute on username links. The <tt>@username</tt> (minus the <tt>@</tt>) will be appended at the end of this.
|
26
30
|
# <tt>:list_url_base</tt>:: the value for <tt>href</tt> attribute on list links. The <tt>@username/list</tt> (minus the <tt>@</tt>) will be appended at the end of this.
|
27
31
|
# <tt>:hashtag_url_base</tt>:: the value for <tt>href</tt> attribute on hashtag links. The <tt>#hashtag</tt> (minus the <tt>#</tt>) will be appended at the end of this.
|
28
|
-
|
29
|
-
|
30
|
-
|
32
|
+
# <tt>:suppress_lists</tt>:: disable auto-linking to lists
|
33
|
+
# <tt>:suppress_no_follow</tt>:: Do not add <tt>rel="nofollow"</tt> to auto-linked items
|
34
|
+
def auto_link(text, options = {})
|
35
|
+
auto_link_usernames_or_lists(
|
36
|
+
auto_link_urls_custom(
|
37
|
+
auto_link_hashtags(text, options),
|
38
|
+
options),
|
39
|
+
options)
|
31
40
|
end
|
32
41
|
|
33
42
|
# Add <tt><a></a></tt> tags around the usernames and lists in the provided <tt>text</tt>. The
|
@@ -39,6 +48,8 @@ module Twitter
|
|
39
48
|
# <tt>:username_class</tt>:: class to add to username <tt><a></tt> tags
|
40
49
|
# <tt>:username_url_base</tt>:: the value for <tt>href</tt> attribute on username links. The <tt>@username</tt> (minus the <tt>@</tt>) will be appended at the end of this.
|
41
50
|
# <tt>:list_url_base</tt>:: the value for <tt>href</tt> attribute on list links. The <tt>@username/list</tt> (minus the <tt>@</tt>) will be appended at the end of this.
|
51
|
+
# <tt>:suppress_lists</tt>:: disable auto-linking to lists
|
52
|
+
# <tt>:suppress_no_follow</tt>:: Do not add <tt>rel="nofollow"</tt> to auto-linked items
|
42
53
|
def auto_link_usernames_or_lists(text, options = {}) # :yields: list_or_username
|
43
54
|
options = options.dup
|
44
55
|
options[:url_class] ||= DEFAULT_URL_CLASS
|
@@ -46,18 +57,19 @@ module Twitter
|
|
46
57
|
options[:username_class] ||= DEFAULT_USERNAME_CLASS
|
47
58
|
options[:username_url_base] ||= "http://twitter.com/"
|
48
59
|
options[:list_url_base] ||= "http://twitter.com/"
|
60
|
+
extra_html = HTML_ATTR_NO_FOLLOW unless options[:suppress_no_follow]
|
49
61
|
|
50
62
|
text.gsub(Twitter::Regex[:auto_link_usernames_or_lists]) do
|
51
63
|
if $4 && !options[:suppress_lists]
|
52
64
|
# the link is a list
|
53
65
|
text = list = "#{$3}#{$4}"
|
54
66
|
text = yield(list) if block_given?
|
55
|
-
"#{$1}#{$2}<a class=\"#{options[:url_class]} #{options[:list_class]}\" href=\"#{options[:list_url_base]}#{list.downcase}\">#{text}</a>"
|
67
|
+
"#{$1}#{$2}<a class=\"#{options[:url_class]} #{options[:list_class]}\" href=\"#{options[:list_url_base]}#{list.downcase}\"#{extra_html}>#{text}</a>"
|
56
68
|
else
|
57
69
|
# this is a screen name
|
58
70
|
text = $3
|
59
71
|
text = yield(text) if block_given?
|
60
|
-
"#{$1}#{$2}<a class=\"#{options[:url_class]} #{options[:username_class]}\" href=\"#{options[:username_url_base]}#{
|
72
|
+
"#{$1}#{$2}<a class=\"#{options[:url_class]} #{options[:username_class]}\" href=\"#{options[:username_url_base]}#{text}\"#{extra_html}>#{text}</a>"
|
61
73
|
end
|
62
74
|
end
|
63
75
|
end
|
@@ -69,31 +81,36 @@ module Twitter
|
|
69
81
|
# <tt>:url_class</tt>:: class to add to all <tt><a></tt> tags
|
70
82
|
# <tt>:hashtag_class</tt>:: class to add to hashtag <tt><a></tt> tags
|
71
83
|
# <tt>:hashtag_url_base</tt>:: the value for <tt>href</tt> attribute. The hashtag text (minus the <tt>#</tt>) will be appended at the end of this.
|
72
|
-
#
|
84
|
+
# <tt>:suppress_no_follow</tt>:: Do not add <tt>rel="nofollow"</tt> to auto-linked items
|
73
85
|
def auto_link_hashtags(text, options = {}) # :yields: hashtag_text
|
74
86
|
options = options.dup
|
75
87
|
options[:url_class] ||= DEFAULT_URL_CLASS
|
76
88
|
options[:hashtag_class] ||= DEFAULT_HASHTAG_CLASS
|
77
89
|
options[:hashtag_url_base] ||= "http://twitter.com/search?q=%23"
|
90
|
+
extra_html = HTML_ATTR_NO_FOLLOW unless options[:suppress_no_follow]
|
78
91
|
|
79
92
|
text.gsub(Twitter::Regex[:auto_link_hashtags]) do
|
80
93
|
before = $1
|
81
94
|
hash = $2
|
82
95
|
text = $3
|
83
96
|
text = yield(text) if block_given?
|
84
|
-
"#{before}<a href=\"#{options[:hashtag_url_base]}#{text}\" title=\"##{text}\" class=\"#{options[:url_class]} #{options[:hashtag_class]}\">#{hash}#{text}</a>"
|
97
|
+
"#{before}<a href=\"#{options[:hashtag_url_base]}#{text}\" title=\"##{text}\" class=\"#{options[:url_class]} #{options[:hashtag_class]}\"#{extra_html}>#{hash}#{text}</a>"
|
85
98
|
end
|
86
99
|
end
|
87
100
|
|
88
101
|
# Add <tt><a></a></tt> tags around the URLs in the provided <tt>text</tt>. Any
|
89
102
|
# elements in the <tt>href_options</tt> hash will be converted to HTML attributes
|
90
|
-
# and place in the <tt><a></tt> tag.
|
103
|
+
# and place in the <tt><a></tt> tag. Unless <tt>href_options</tt> contains <tt>:suppress_no_follow</tt>
|
104
|
+
# the <tt>rel="nofollow"</tt> attribute will be added.
|
91
105
|
def auto_link_urls_custom(text, href_options = {})
|
106
|
+
options = href_options.dup
|
107
|
+
options[:rel] = "nofollow" unless options.delete(:suppress_no_follow)
|
108
|
+
|
92
109
|
text.gsub(Twitter::Regex[:valid_url]) do
|
93
110
|
all, before, url, protocol = $1, $2, $3, $4
|
94
|
-
|
95
|
-
full_url = (protocol
|
96
|
-
"#{before}<a href=\"#{full_url}\"#{
|
111
|
+
html_attrs = tag_options(options.stringify_keys) || ""
|
112
|
+
full_url = (protocol =~ WWW_REGEX ? "http://#{url}" : url)
|
113
|
+
"#{before}<a href=\"#{full_url}\"#{html_attrs}>#{url}</a>"
|
97
114
|
end
|
98
115
|
end
|
99
116
|
|
data/lib/regex.rb
CHANGED
@@ -26,7 +26,7 @@ module Twitter
|
|
26
26
|
].flatten.freeze
|
27
27
|
REGEXEN[:spaces] = Regexp.new(UNICODE_SPACES.collect{ |e| [e].pack 'U*' }.join('|'))
|
28
28
|
|
29
|
-
REGEXEN[:extract_mentions] = /(^|[^a-zA-Z0-9_])[@@]([a-zA-Z0-9_]{1,20})/
|
29
|
+
REGEXEN[:extract_mentions] = /(^|[^a-zA-Z0-9_])[@@]([a-zA-Z0-9_]{1,20})(?!@)/
|
30
30
|
REGEXEN[:extract_reply] = /^(?:#{REGEXEN[:spaces]})*[@@]([a-zA-Z0-9_]{1,20})/o
|
31
31
|
|
32
32
|
REGEXEN[:list_name] = /^[a-zA-Z\x80-\xff].{0,79}$/
|
@@ -42,14 +42,14 @@ module Twitter
|
|
42
42
|
REGEXEN[:auto_link_emoticon] = /(8\-\#|8\-E|\+\-\(|\`\@|\`O|\<\|:~\(|\}:o\{|:\-\[|\>o\<|X\-\/|\[:-\]\-I\-|\/\/\/\/Ö\\\\\\\\|\(\|:\|\/\)|∑:\*\)|\( \| \))/
|
43
43
|
|
44
44
|
# URL related hash regex collection
|
45
|
-
REGEXEN[:valid_preceeding_chars] = /(?:[^\/"':!=]
|
46
|
-
REGEXEN[:valid_domain] = /[
|
47
|
-
REGEXEN[:valid_url_path_chars] = /[a-z0-9!\*'\(\)
|
45
|
+
REGEXEN[:valid_preceeding_chars] = /(?:[^\/"':!=]|^|\:)/
|
46
|
+
REGEXEN[:valid_domain] = /(?:[\.-]|[^[:punct:]])+\.[a-z]{2,}(?::[0-9]+)?/i
|
47
|
+
REGEXEN[:valid_url_path_chars] = /[a-z0-9!\*'\(\);:&=\+\$\/%#\[\]\-_\.,~]/i
|
48
48
|
# Valid end-of-path chracters (so /foo. does not gobble the period).
|
49
49
|
# 1. Allow ) for Wikipedia URLs.
|
50
50
|
# 2. Allow =&# for empty URL parameters and other URL-join artifacts
|
51
51
|
REGEXEN[:valid_url_path_ending_chars] = /[a-z0-9\)=#\/]/i
|
52
|
-
REGEXEN[:valid_url_query_chars] = /[a-z0-9!\*'\(\)
|
52
|
+
REGEXEN[:valid_url_query_chars] = /[a-z0-9!\*'\(\);:&=\+\$\/%#\[\]\-_\.,~]/i
|
53
53
|
REGEXEN[:valid_url_query_ending_chars] = /[a-z0-9_&=#]/i
|
54
54
|
REGEXEN[:valid_url] = %r{
|
55
55
|
( # $1 total match
|
@@ -71,4 +71,4 @@ module Twitter
|
|
71
71
|
REGEXEN[key]
|
72
72
|
end
|
73
73
|
end
|
74
|
-
end
|
74
|
+
end
|
data/spec/autolinking_spec.rb
CHANGED
@@ -346,6 +346,14 @@ describe Twitter::Autolink do
|
|
346
346
|
end
|
347
347
|
end
|
348
348
|
|
349
|
+
context "when preceded by a :" do
|
350
|
+
def original_text; "Check this out @hoverbird:#{url}"; end
|
351
|
+
|
352
|
+
it "should be linked" do
|
353
|
+
@autolinked_text.should have_autolinked_url(url)
|
354
|
+
end
|
355
|
+
end
|
356
|
+
|
349
357
|
context "with a URL ending in allowed punctuation" do
|
350
358
|
it "does not consume ending punctuation" do
|
351
359
|
matcher = TestAutolink.new
|
@@ -358,7 +366,7 @@ describe Twitter::Autolink do
|
|
358
366
|
context "with a URL preceded in forbidden characters" do
|
359
367
|
it "should not be linked" do
|
360
368
|
matcher = TestAutolink.new
|
361
|
-
%w| \ ' /
|
369
|
+
%w| \ ' / ! = |.each do |char|
|
362
370
|
matcher.auto_link("#{char}#{url}").should_not have_autolinked_url(url)
|
363
371
|
end
|
364
372
|
end
|
@@ -424,4 +432,4 @@ describe Twitter::Autolink do
|
|
424
432
|
end
|
425
433
|
end
|
426
434
|
|
427
|
-
end
|
435
|
+
end
|
data/spec/extractor_spec.rb
CHANGED
@@ -108,8 +108,6 @@ describe Twitter::Extractor do
|
|
108
108
|
"http://www.boingboing.net/2007/02/14/katamari_damacy_phon.html",
|
109
109
|
"http://somehost.com:3000",
|
110
110
|
"http://x.com/~matthew+%-x",
|
111
|
-
"http://x.com/~matthew+%-,.;@:x",
|
112
|
-
"http://x.com/,.;@:x",
|
113
111
|
"http://en.wikipedia.org/wiki/Primer_(film)",
|
114
112
|
"http://www.ams.org/bookstore-getitem/item=mbk-59",
|
115
113
|
"http://chilp.it/?77e8fd",
|
data/spec/regex_spec.rb
CHANGED
@@ -11,11 +11,15 @@ describe "Twitter::Regex regular expressions" do
|
|
11
11
|
"http://www.boingboing.net/2007/02/14/katamari_damacy_phon.html",
|
12
12
|
"http://somehost.com:3000",
|
13
13
|
"http://x.com/~matthew+%-x",
|
14
|
-
"http://x.com/~matthew+%-,.;@:x",
|
15
|
-
"http://x.com/,.;@:x",
|
16
14
|
"http://en.wikipedia.org/wiki/Primer_(film)",
|
17
15
|
"http://www.ams.org/bookstore-getitem/item=mbk-59",
|
18
16
|
"http://chilp.it/?77e8fd",
|
17
|
+
"www.foobar.com",
|
18
|
+
"WWW.FOOBAR.COM",
|
19
|
+
"http://tell.me/why",
|
20
|
+
"http://longtlds.mobi",
|
21
|
+
"http://✪df.ws/ejp",
|
22
|
+
"http://日本.com"
|
19
23
|
]
|
20
24
|
|
21
25
|
@urls.each do |url|
|
@@ -31,14 +35,20 @@ describe "Twitter::Regex regular expressions" do
|
|
31
35
|
end
|
32
36
|
|
33
37
|
describe "invalid URLS" do
|
34
|
-
|
38
|
+
it "does not link urls with invalid characters" do
|
35
39
|
[ "http://doman-dash_2314352345_dfasd.foo-cow_4352.com",
|
36
40
|
"http://no-tld",
|
37
41
|
"http://tld-too-short.x",
|
38
42
|
"http://x.com/,,,/.../@@@/;;;/:::/---/%%%x",
|
39
|
-
"http://
|
43
|
+
"http://doman_dash_2314352345_dfasd.foo-cow_4352.com",
|
40
44
|
].each {|url| url.should_not have_autolinked_url(url)}
|
41
45
|
end
|
46
|
+
|
47
|
+
it "does not link domains beginning with a hypen" do
|
48
|
+
pending
|
49
|
+
"http://-doman_dash_2314352345_dfasd.com".should_not match_autolink_expression
|
50
|
+
end
|
51
|
+
|
42
52
|
end
|
43
53
|
|
44
54
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twitter-text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 1.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Matt Sanford
|
@@ -9,11 +9,11 @@ autorequire: ""
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-
|
12
|
+
date: 2010-02-10 00:00:00 -08:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
|
-
name:
|
16
|
+
name: actionpack
|
17
17
|
type: :runtime
|
18
18
|
version_requirement:
|
19
19
|
version_requirements: !ruby/object:Gem::Requirement
|