twitter-text 1.0 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -5,11 +5,12 @@ require 'rubygems/specification'
5
5
  require 'date'
6
6
  require 'spec/rake/spectask'
7
7
  require 'spec/rake/verify_rcov'
8
+ require 'digest'
8
9
 
9
10
 
10
11
  spec = Gem::Specification.new do |s|
11
12
  s.name = "twitter-text"
12
- s.version = "1.0"
13
+ s.version = "1.0.1"
13
14
  s.author = "Matt Sanford"
14
15
  s.email = "matt@twitter.com"
15
16
  s.homepage = "http://twitter.com"
@@ -19,7 +20,7 @@ spec = Gem::Specification.new do |s|
19
20
  s.has_rdoc = true
20
21
  s.summary = "Twitter text handling library"
21
22
 
22
- s.add_dependency "action_view"
23
+ s.add_dependency "actionpack"
23
24
 
24
25
  s.require_path = 'lib'
25
26
  s.autorequire = ''
@@ -41,16 +42,39 @@ Spec::Rake::SpecTask.new('spec:rcov') do |t|
41
42
  t.rcov_opts = ['--exclude', 'spec']
42
43
  end
43
44
 
45
+
46
+ def conformance_version(dir)
47
+ Dir[File.join(dir, '*')].inject(Digest::SHA1.new){|digest, file| digest.update(Digest::SHA1.file(file).hexdigest) }
48
+ end
49
+
44
50
  namespace :test do
45
51
  namespace :conformance do
52
+
53
+
46
54
  desc "Update conformance testing data"
47
55
  task :update do
48
- dir = File.join(File.dirname(__FILE__), "test", "twitter-text-conformance")
49
56
  puts "Updating conformance data ... "
50
- system("cd #{dir} && git pull origin master") || exit(1)
57
+ system("git submodule init") || raise("Failed to init submodule")
58
+ system("git submodule update") || raise("Failed to update submodule")
51
59
  puts "Updating conformance data ... DONE"
52
60
  end
53
61
 
62
+ desc "Change conformance test data to the lastest version"
63
+ task :latest => ['conformance:update'] do
64
+ current_dir = File.dirname(__FILE__)
65
+ submodule_dir = File.join(File.dirname(__FILE__), "test", "twitter-text-conformance")
66
+ version_before = conformance_version(submodule_dir)
67
+ system("cd #{submodule_dir} && git pull origin master") || raise("Failed to pull submodule version")
68
+ system("cd #{current_dir}")
69
+ if conformance_version(submodule_dir) != version_before
70
+ system("cd #{current_dir} && git add #{submodule_dir}") || raise("Failed to add upgrade files")
71
+ system("git commit -m \"Upgraded to the latest conformance suite\" #{submodule_dir}") || raise("Failed to commit upgraded conformacne data")
72
+ puts "Upgraded conformance suite."
73
+ else
74
+ puts "No conformance suite changes."
75
+ end
76
+ end
77
+
54
78
  desc "Run conformance test suite"
55
79
  task :run do
56
80
  ruby "test/conformance_test.rb"
@@ -58,7 +82,7 @@ namespace :test do
58
82
  end
59
83
 
60
84
  desc "Run conformance test suite"
61
- task :conformance => ['conformance:update', 'conformance:run'] do
85
+ task :conformance => ['conformance:latest', 'conformance:run'] do
62
86
  end
63
87
  end
64
88
 
data/lib/autolink.rb CHANGED
@@ -5,6 +5,8 @@ module Twitter
5
5
  module Autolink
6
6
  include ActionView::Helpers::TagHelper #tag_options needed by auto_link
7
7
 
8
+ WWW_REGEX = /www\./i #:nodoc:
9
+
8
10
  # Default CSS class for auto-linked URLs
9
11
  DEFAULT_URL_CLASS = "tweet-url"
10
12
  # Default CSS class for auto-linked lists (along with the url class)
@@ -13,6 +15,8 @@ module Twitter
13
15
  DEFAULT_USERNAME_CLASS = "username"
14
16
  # Default CSS class for auto-linked hashtags (along with the url class)
15
17
  DEFAULT_HASHTAG_CLASS = "hashtag"
18
+ # HTML attribute for robot nofollow behavior (default)
19
+ HTML_ATTR_NO_FOLLOW = " rel=\"nofollow\""
16
20
 
17
21
  # Add <tt><a></a></tt> tags around the usernames, lists, hashtags and URLs in the provided <tt>text</tt>. The
18
22
  # <tt><a></tt> tags can be controlled with the following entries in the <tt>options</tt>
@@ -25,9 +29,14 @@ module Twitter
25
29
  # <tt>:username_url_base</tt>:: the value for <tt>href</tt> attribute on username links. The <tt>@username</tt> (minus the <tt>@</tt>) will be appended at the end of this.
26
30
  # <tt>:list_url_base</tt>:: the value for <tt>href</tt> attribute on list links. The <tt>@username/list</tt> (minus the <tt>@</tt>) will be appended at the end of this.
27
31
  # <tt>:hashtag_url_base</tt>:: the value for <tt>href</tt> attribute on hashtag links. The <tt>#hashtag</tt> (minus the <tt>#</tt>) will be appended at the end of this.
28
- def auto_link(text, options = {}, &block) # :yields: hashtag_or_list_or_username
29
- options = options.dup
30
- auto_link_usernames_or_lists(auto_link_urls_custom(auto_link_hashtags(text), options, &block), &block)
32
+ # <tt>:suppress_lists</tt>:: disable auto-linking to lists
33
+ # <tt>:suppress_no_follow</tt>:: Do not add <tt>rel="nofollow"</tt> to auto-linked items
34
+ def auto_link(text, options = {})
35
+ auto_link_usernames_or_lists(
36
+ auto_link_urls_custom(
37
+ auto_link_hashtags(text, options),
38
+ options),
39
+ options)
31
40
  end
32
41
 
33
42
  # Add <tt><a></a></tt> tags around the usernames and lists in the provided <tt>text</tt>. The
@@ -39,6 +48,8 @@ module Twitter
39
48
  # <tt>:username_class</tt>:: class to add to username <tt><a></tt> tags
40
49
  # <tt>:username_url_base</tt>:: the value for <tt>href</tt> attribute on username links. The <tt>@username</tt> (minus the <tt>@</tt>) will be appended at the end of this.
41
50
  # <tt>:list_url_base</tt>:: the value for <tt>href</tt> attribute on list links. The <tt>@username/list</tt> (minus the <tt>@</tt>) will be appended at the end of this.
51
+ # <tt>:suppress_lists</tt>:: disable auto-linking to lists
52
+ # <tt>:suppress_no_follow</tt>:: Do not add <tt>rel="nofollow"</tt> to auto-linked items
42
53
  def auto_link_usernames_or_lists(text, options = {}) # :yields: list_or_username
43
54
  options = options.dup
44
55
  options[:url_class] ||= DEFAULT_URL_CLASS
@@ -46,18 +57,19 @@ module Twitter
46
57
  options[:username_class] ||= DEFAULT_USERNAME_CLASS
47
58
  options[:username_url_base] ||= "http://twitter.com/"
48
59
  options[:list_url_base] ||= "http://twitter.com/"
60
+ extra_html = HTML_ATTR_NO_FOLLOW unless options[:suppress_no_follow]
49
61
 
50
62
  text.gsub(Twitter::Regex[:auto_link_usernames_or_lists]) do
51
63
  if $4 && !options[:suppress_lists]
52
64
  # the link is a list
53
65
  text = list = "#{$3}#{$4}"
54
66
  text = yield(list) if block_given?
55
- "#{$1}#{$2}<a class=\"#{options[:url_class]} #{options[:list_class]}\" href=\"#{options[:list_url_base]}#{list.downcase}\">#{text}</a>"
67
+ "#{$1}#{$2}<a class=\"#{options[:url_class]} #{options[:list_class]}\" href=\"#{options[:list_url_base]}#{list.downcase}\"#{extra_html}>#{text}</a>"
56
68
  else
57
69
  # this is a screen name
58
70
  text = $3
59
71
  text = yield(text) if block_given?
60
- "#{$1}#{$2}<a class=\"#{options[:url_class]} #{options[:username_class]}\" href=\"#{options[:username_url_base]}#{$3}\">#{text}</a>"
72
+ "#{$1}#{$2}<a class=\"#{options[:url_class]} #{options[:username_class]}\" href=\"#{options[:username_url_base]}#{text}\"#{extra_html}>#{text}</a>"
61
73
  end
62
74
  end
63
75
  end
@@ -69,31 +81,36 @@ module Twitter
69
81
  # <tt>:url_class</tt>:: class to add to all <tt><a></tt> tags
70
82
  # <tt>:hashtag_class</tt>:: class to add to hashtag <tt><a></tt> tags
71
83
  # <tt>:hashtag_url_base</tt>:: the value for <tt>href</tt> attribute. The hashtag text (minus the <tt>#</tt>) will be appended at the end of this.
72
- #
84
+ # <tt>:suppress_no_follow</tt>:: Do not add <tt>rel="nofollow"</tt> to auto-linked items
73
85
  def auto_link_hashtags(text, options = {}) # :yields: hashtag_text
74
86
  options = options.dup
75
87
  options[:url_class] ||= DEFAULT_URL_CLASS
76
88
  options[:hashtag_class] ||= DEFAULT_HASHTAG_CLASS
77
89
  options[:hashtag_url_base] ||= "http://twitter.com/search?q=%23"
90
+ extra_html = HTML_ATTR_NO_FOLLOW unless options[:suppress_no_follow]
78
91
 
79
92
  text.gsub(Twitter::Regex[:auto_link_hashtags]) do
80
93
  before = $1
81
94
  hash = $2
82
95
  text = $3
83
96
  text = yield(text) if block_given?
84
- "#{before}<a href=\"#{options[:hashtag_url_base]}#{text}\" title=\"##{text}\" class=\"#{options[:url_class]} #{options[:hashtag_class]}\">#{hash}#{text}</a>"
97
+ "#{before}<a href=\"#{options[:hashtag_url_base]}#{text}\" title=\"##{text}\" class=\"#{options[:url_class]} #{options[:hashtag_class]}\"#{extra_html}>#{hash}#{text}</a>"
85
98
  end
86
99
  end
87
100
 
88
101
  # Add <tt><a></a></tt> tags around the URLs in the provided <tt>text</tt>. Any
89
102
  # elements in the <tt>href_options</tt> hash will be converted to HTML attributes
90
- # and place in the <tt><a></tt> tag.
103
+ # and place in the <tt><a></tt> tag. Unless <tt>href_options</tt> contains <tt>:suppress_no_follow</tt>
104
+ # the <tt>rel="nofollow"</tt> attribute will be added.
91
105
  def auto_link_urls_custom(text, href_options = {})
106
+ options = href_options.dup
107
+ options[:rel] = "nofollow" unless options.delete(:suppress_no_follow)
108
+
92
109
  text.gsub(Twitter::Regex[:valid_url]) do
93
110
  all, before, url, protocol = $1, $2, $3, $4
94
- options = tag_options(href_options.stringify_keys) || ""
95
- full_url = (protocol == "www." ? "http://#{url}" : url)
96
- "#{before}<a href=\"#{full_url}\"#{options}>#{url}</a>"
111
+ html_attrs = tag_options(options.stringify_keys) || ""
112
+ full_url = (protocol =~ WWW_REGEX ? "http://#{url}" : url)
113
+ "#{before}<a href=\"#{full_url}\"#{html_attrs}>#{url}</a>"
97
114
  end
98
115
  end
99
116
 
data/lib/regex.rb CHANGED
@@ -26,7 +26,7 @@ module Twitter
26
26
  ].flatten.freeze
27
27
  REGEXEN[:spaces] = Regexp.new(UNICODE_SPACES.collect{ |e| [e].pack 'U*' }.join('|'))
28
28
 
29
- REGEXEN[:extract_mentions] = /(^|[^a-zA-Z0-9_])[@@]([a-zA-Z0-9_]{1,20})/
29
+ REGEXEN[:extract_mentions] = /(^|[^a-zA-Z0-9_])[@@]([a-zA-Z0-9_]{1,20})(?!@)/
30
30
  REGEXEN[:extract_reply] = /^(?:#{REGEXEN[:spaces]})*[@@]([a-zA-Z0-9_]{1,20})/o
31
31
 
32
32
  REGEXEN[:list_name] = /^[a-zA-Z\x80-\xff].{0,79}$/
@@ -42,14 +42,14 @@ module Twitter
42
42
  REGEXEN[:auto_link_emoticon] = /(8\-\#|8\-E|\+\-\(|\`\@|\`O|\&lt;\|:~\(|\}:o\{|:\-\[|\&gt;o\&lt;|X\-\/|\[:-\]\-I\-|\/\/\/\/Ö\\\\\\\\|\(\|:\|\/\)|∑:\*\)|\( \| \))/
43
43
 
44
44
  # URL related hash regex collection
45
- REGEXEN[:valid_preceeding_chars] = /(?:[^\/"':!=]|^)/
46
- REGEXEN[:valid_domain] = /[a-z0-9\.-]+\.[a-z]{2,}(?::[0-9]+)?/i
47
- REGEXEN[:valid_url_path_chars] = /[a-z0-9!\*'\(\);:@&=\+\$\/%#\[\]\-_\.,~]/i
45
+ REGEXEN[:valid_preceeding_chars] = /(?:[^\/"':!=]|^|\:)/
46
+ REGEXEN[:valid_domain] = /(?:[\.-]|[^[:punct:]])+\.[a-z]{2,}(?::[0-9]+)?/i
47
+ REGEXEN[:valid_url_path_chars] = /[a-z0-9!\*'\(\);:&=\+\$\/%#\[\]\-_\.,~]/i
48
48
  # Valid end-of-path chracters (so /foo. does not gobble the period).
49
49
  # 1. Allow ) for Wikipedia URLs.
50
50
  # 2. Allow =&# for empty URL parameters and other URL-join artifacts
51
51
  REGEXEN[:valid_url_path_ending_chars] = /[a-z0-9\)=#\/]/i
52
- REGEXEN[:valid_url_query_chars] = /[a-z0-9!\*'\(\);:@&=\+\$\/%#\[\]\-_\.,~]/i
52
+ REGEXEN[:valid_url_query_chars] = /[a-z0-9!\*'\(\);:&=\+\$\/%#\[\]\-_\.,~]/i
53
53
  REGEXEN[:valid_url_query_ending_chars] = /[a-z0-9_&=#]/i
54
54
  REGEXEN[:valid_url] = %r{
55
55
  ( # $1 total match
@@ -71,4 +71,4 @@ module Twitter
71
71
  REGEXEN[key]
72
72
  end
73
73
  end
74
- end
74
+ end
@@ -346,6 +346,14 @@ describe Twitter::Autolink do
346
346
  end
347
347
  end
348
348
 
349
+ context "when preceded by a :" do
350
+ def original_text; "Check this out @hoverbird:#{url}"; end
351
+
352
+ it "should be linked" do
353
+ @autolinked_text.should have_autolinked_url(url)
354
+ end
355
+ end
356
+
349
357
  context "with a URL ending in allowed punctuation" do
350
358
  it "does not consume ending punctuation" do
351
359
  matcher = TestAutolink.new
@@ -358,7 +366,7 @@ describe Twitter::Autolink do
358
366
  context "with a URL preceded in forbidden characters" do
359
367
  it "should not be linked" do
360
368
  matcher = TestAutolink.new
361
- %w| \ ' / : ! = |.each do |char|
369
+ %w| \ ' / ! = |.each do |char|
362
370
  matcher.auto_link("#{char}#{url}").should_not have_autolinked_url(url)
363
371
  end
364
372
  end
@@ -424,4 +432,4 @@ describe Twitter::Autolink do
424
432
  end
425
433
  end
426
434
 
427
- end
435
+ end
@@ -108,8 +108,6 @@ describe Twitter::Extractor do
108
108
  "http://www.boingboing.net/2007/02/14/katamari_damacy_phon.html",
109
109
  "http://somehost.com:3000",
110
110
  "http://x.com/~matthew+%-x",
111
- "http://x.com/~matthew+%-,.;@:x",
112
- "http://x.com/,.;@:x",
113
111
  "http://en.wikipedia.org/wiki/Primer_(film)",
114
112
  "http://www.ams.org/bookstore-getitem/item=mbk-59",
115
113
  "http://chilp.it/?77e8fd",
data/spec/regex_spec.rb CHANGED
@@ -11,11 +11,15 @@ describe "Twitter::Regex regular expressions" do
11
11
  "http://www.boingboing.net/2007/02/14/katamari_damacy_phon.html",
12
12
  "http://somehost.com:3000",
13
13
  "http://x.com/~matthew+%-x",
14
- "http://x.com/~matthew+%-,.;@:x",
15
- "http://x.com/,.;@:x",
16
14
  "http://en.wikipedia.org/wiki/Primer_(film)",
17
15
  "http://www.ams.org/bookstore-getitem/item=mbk-59",
18
16
  "http://chilp.it/?77e8fd",
17
+ "www.foobar.com",
18
+ "WWW.FOOBAR.COM",
19
+ "http://tell.me/why",
20
+ "http://longtlds.mobi",
21
+ "http://✪df.ws/ejp",
22
+ "http://日本.com"
19
23
  ]
20
24
 
21
25
  @urls.each do |url|
@@ -31,14 +35,20 @@ describe "Twitter::Regex regular expressions" do
31
35
  end
32
36
 
33
37
  describe "invalid URLS" do
34
- it "does not link urls with invalid_domains" do
38
+ it "does not link urls with invalid characters" do
35
39
  [ "http://doman-dash_2314352345_dfasd.foo-cow_4352.com",
36
40
  "http://no-tld",
37
41
  "http://tld-too-short.x",
38
42
  "http://x.com/,,,/.../@@@/;;;/:::/---/%%%x",
39
- "http://doman-dash_2314352345_dfasd.foo-cow_4352.com",
43
+ "http://doman_dash_2314352345_dfasd.foo-cow_4352.com",
40
44
  ].each {|url| url.should_not have_autolinked_url(url)}
41
45
  end
46
+
47
+ it "does not link domains beginning with a hypen" do
48
+ pending
49
+ "http://-doman_dash_2314352345_dfasd.com".should_not match_autolink_expression
50
+ end
51
+
42
52
  end
43
53
 
44
54
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: twitter-text
3
3
  version: !ruby/object:Gem::Version
4
- version: "1.0"
4
+ version: 1.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Matt Sanford
@@ -9,11 +9,11 @@ autorequire: ""
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2010-01-27 00:00:00 -08:00
12
+ date: 2010-02-10 00:00:00 -08:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
- name: action_view
16
+ name: actionpack
17
17
  type: :runtime
18
18
  version_requirement:
19
19
  version_requirements: !ruby/object:Gem::Requirement