twitter-text 1.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -5,11 +5,12 @@ require 'rubygems/specification'
5
5
  require 'date'
6
6
  require 'spec/rake/spectask'
7
7
  require 'spec/rake/verify_rcov'
8
+ require 'digest'
8
9
 
9
10
 
10
11
  spec = Gem::Specification.new do |s|
11
12
  s.name = "twitter-text"
12
- s.version = "1.0"
13
+ s.version = "1.0.1"
13
14
  s.author = "Matt Sanford"
14
15
  s.email = "matt@twitter.com"
15
16
  s.homepage = "http://twitter.com"
@@ -19,7 +20,7 @@ spec = Gem::Specification.new do |s|
19
20
  s.has_rdoc = true
20
21
  s.summary = "Twitter text handling library"
21
22
 
22
- s.add_dependency "action_view"
23
+ s.add_dependency "actionpack"
23
24
 
24
25
  s.require_path = 'lib'
25
26
  s.autorequire = ''
@@ -41,16 +42,39 @@ Spec::Rake::SpecTask.new('spec:rcov') do |t|
41
42
  t.rcov_opts = ['--exclude', 'spec']
42
43
  end
43
44
 
45
+
46
+ def conformance_version(dir)
47
+ Dir[File.join(dir, '*')].inject(Digest::SHA1.new){|digest, file| digest.update(Digest::SHA1.file(file).hexdigest) }
48
+ end
49
+
44
50
  namespace :test do
45
51
  namespace :conformance do
52
+
53
+
46
54
  desc "Update conformance testing data"
47
55
  task :update do
48
- dir = File.join(File.dirname(__FILE__), "test", "twitter-text-conformance")
49
56
  puts "Updating conformance data ... "
50
- system("cd #{dir} && git pull origin master") || exit(1)
57
+ system("git submodule init") || raise("Failed to init submodule")
58
+ system("git submodule update") || raise("Failed to update submodule")
51
59
  puts "Updating conformance data ... DONE"
52
60
  end
53
61
 
62
+ desc "Change conformance test data to the lastest version"
63
+ task :latest => ['conformance:update'] do
64
+ current_dir = File.dirname(__FILE__)
65
+ submodule_dir = File.join(File.dirname(__FILE__), "test", "twitter-text-conformance")
66
+ version_before = conformance_version(submodule_dir)
67
+ system("cd #{submodule_dir} && git pull origin master") || raise("Failed to pull submodule version")
68
+ system("cd #{current_dir}")
69
+ if conformance_version(submodule_dir) != version_before
70
+ system("cd #{current_dir} && git add #{submodule_dir}") || raise("Failed to add upgrade files")
71
+ system("git commit -m \"Upgraded to the latest conformance suite\" #{submodule_dir}") || raise("Failed to commit upgraded conformacne data")
72
+ puts "Upgraded conformance suite."
73
+ else
74
+ puts "No conformance suite changes."
75
+ end
76
+ end
77
+
54
78
  desc "Run conformance test suite"
55
79
  task :run do
56
80
  ruby "test/conformance_test.rb"
@@ -58,7 +82,7 @@ namespace :test do
58
82
  end
59
83
 
60
84
  desc "Run conformance test suite"
61
- task :conformance => ['conformance:update', 'conformance:run'] do
85
+ task :conformance => ['conformance:latest', 'conformance:run'] do
62
86
  end
63
87
  end
64
88
 
data/lib/autolink.rb CHANGED
@@ -5,6 +5,8 @@ module Twitter
5
5
  module Autolink
6
6
  include ActionView::Helpers::TagHelper #tag_options needed by auto_link
7
7
 
8
+ WWW_REGEX = /www\./i #:nodoc:
9
+
8
10
  # Default CSS class for auto-linked URLs
9
11
  DEFAULT_URL_CLASS = "tweet-url"
10
12
  # Default CSS class for auto-linked lists (along with the url class)
@@ -13,6 +15,8 @@ module Twitter
13
15
  DEFAULT_USERNAME_CLASS = "username"
14
16
  # Default CSS class for auto-linked hashtags (along with the url class)
15
17
  DEFAULT_HASHTAG_CLASS = "hashtag"
18
+ # HTML attribute for robot nofollow behavior (default)
19
+ HTML_ATTR_NO_FOLLOW = " rel=\"nofollow\""
16
20
 
17
21
  # Add <tt><a></a></tt> tags around the usernames, lists, hashtags and URLs in the provided <tt>text</tt>. The
18
22
  # <tt><a></tt> tags can be controlled with the following entries in the <tt>options</tt>
@@ -25,9 +29,14 @@ module Twitter
25
29
  # <tt>:username_url_base</tt>:: the value for <tt>href</tt> attribute on username links. The <tt>@username</tt> (minus the <tt>@</tt>) will be appended at the end of this.
26
30
  # <tt>:list_url_base</tt>:: the value for <tt>href</tt> attribute on list links. The <tt>@username/list</tt> (minus the <tt>@</tt>) will be appended at the end of this.
27
31
  # <tt>:hashtag_url_base</tt>:: the value for <tt>href</tt> attribute on hashtag links. The <tt>#hashtag</tt> (minus the <tt>#</tt>) will be appended at the end of this.
28
- def auto_link(text, options = {}, &block) # :yields: hashtag_or_list_or_username
29
- options = options.dup
30
- auto_link_usernames_or_lists(auto_link_urls_custom(auto_link_hashtags(text), options, &block), &block)
32
+ # <tt>:suppress_lists</tt>:: disable auto-linking to lists
33
+ # <tt>:suppress_no_follow</tt>:: Do not add <tt>rel="nofollow"</tt> to auto-linked items
34
+ def auto_link(text, options = {})
35
+ auto_link_usernames_or_lists(
36
+ auto_link_urls_custom(
37
+ auto_link_hashtags(text, options),
38
+ options),
39
+ options)
31
40
  end
32
41
 
33
42
  # Add <tt><a></a></tt> tags around the usernames and lists in the provided <tt>text</tt>. The
@@ -39,6 +48,8 @@ module Twitter
39
48
  # <tt>:username_class</tt>:: class to add to username <tt><a></tt> tags
40
49
  # <tt>:username_url_base</tt>:: the value for <tt>href</tt> attribute on username links. The <tt>@username</tt> (minus the <tt>@</tt>) will be appended at the end of this.
41
50
  # <tt>:list_url_base</tt>:: the value for <tt>href</tt> attribute on list links. The <tt>@username/list</tt> (minus the <tt>@</tt>) will be appended at the end of this.
51
+ # <tt>:suppress_lists</tt>:: disable auto-linking to lists
52
+ # <tt>:suppress_no_follow</tt>:: Do not add <tt>rel="nofollow"</tt> to auto-linked items
42
53
  def auto_link_usernames_or_lists(text, options = {}) # :yields: list_or_username
43
54
  options = options.dup
44
55
  options[:url_class] ||= DEFAULT_URL_CLASS
@@ -46,18 +57,19 @@ module Twitter
46
57
  options[:username_class] ||= DEFAULT_USERNAME_CLASS
47
58
  options[:username_url_base] ||= "http://twitter.com/"
48
59
  options[:list_url_base] ||= "http://twitter.com/"
60
+ extra_html = HTML_ATTR_NO_FOLLOW unless options[:suppress_no_follow]
49
61
 
50
62
  text.gsub(Twitter::Regex[:auto_link_usernames_or_lists]) do
51
63
  if $4 && !options[:suppress_lists]
52
64
  # the link is a list
53
65
  text = list = "#{$3}#{$4}"
54
66
  text = yield(list) if block_given?
55
- "#{$1}#{$2}<a class=\"#{options[:url_class]} #{options[:list_class]}\" href=\"#{options[:list_url_base]}#{list.downcase}\">#{text}</a>"
67
+ "#{$1}#{$2}<a class=\"#{options[:url_class]} #{options[:list_class]}\" href=\"#{options[:list_url_base]}#{list.downcase}\"#{extra_html}>#{text}</a>"
56
68
  else
57
69
  # this is a screen name
58
70
  text = $3
59
71
  text = yield(text) if block_given?
60
- "#{$1}#{$2}<a class=\"#{options[:url_class]} #{options[:username_class]}\" href=\"#{options[:username_url_base]}#{$3}\">#{text}</a>"
72
+ "#{$1}#{$2}<a class=\"#{options[:url_class]} #{options[:username_class]}\" href=\"#{options[:username_url_base]}#{text}\"#{extra_html}>#{text}</a>"
61
73
  end
62
74
  end
63
75
  end
@@ -69,31 +81,36 @@ module Twitter
69
81
  # <tt>:url_class</tt>:: class to add to all <tt><a></tt> tags
70
82
  # <tt>:hashtag_class</tt>:: class to add to hashtag <tt><a></tt> tags
71
83
  # <tt>:hashtag_url_base</tt>:: the value for <tt>href</tt> attribute. The hashtag text (minus the <tt>#</tt>) will be appended at the end of this.
72
- #
84
+ # <tt>:suppress_no_follow</tt>:: Do not add <tt>rel="nofollow"</tt> to auto-linked items
73
85
  def auto_link_hashtags(text, options = {}) # :yields: hashtag_text
74
86
  options = options.dup
75
87
  options[:url_class] ||= DEFAULT_URL_CLASS
76
88
  options[:hashtag_class] ||= DEFAULT_HASHTAG_CLASS
77
89
  options[:hashtag_url_base] ||= "http://twitter.com/search?q=%23"
90
+ extra_html = HTML_ATTR_NO_FOLLOW unless options[:suppress_no_follow]
78
91
 
79
92
  text.gsub(Twitter::Regex[:auto_link_hashtags]) do
80
93
  before = $1
81
94
  hash = $2
82
95
  text = $3
83
96
  text = yield(text) if block_given?
84
- "#{before}<a href=\"#{options[:hashtag_url_base]}#{text}\" title=\"##{text}\" class=\"#{options[:url_class]} #{options[:hashtag_class]}\">#{hash}#{text}</a>"
97
+ "#{before}<a href=\"#{options[:hashtag_url_base]}#{text}\" title=\"##{text}\" class=\"#{options[:url_class]} #{options[:hashtag_class]}\"#{extra_html}>#{hash}#{text}</a>"
85
98
  end
86
99
  end
87
100
 
88
101
  # Add <tt><a></a></tt> tags around the URLs in the provided <tt>text</tt>. Any
89
102
  # elements in the <tt>href_options</tt> hash will be converted to HTML attributes
90
- # and place in the <tt><a></tt> tag.
103
+ # and place in the <tt><a></tt> tag. Unless <tt>href_options</tt> contains <tt>:suppress_no_follow</tt>
104
+ # the <tt>rel="nofollow"</tt> attribute will be added.
91
105
  def auto_link_urls_custom(text, href_options = {})
106
+ options = href_options.dup
107
+ options[:rel] = "nofollow" unless options.delete(:suppress_no_follow)
108
+
92
109
  text.gsub(Twitter::Regex[:valid_url]) do
93
110
  all, before, url, protocol = $1, $2, $3, $4
94
- options = tag_options(href_options.stringify_keys) || ""
95
- full_url = (protocol == "www." ? "http://#{url}" : url)
96
- "#{before}<a href=\"#{full_url}\"#{options}>#{url}</a>"
111
+ html_attrs = tag_options(options.stringify_keys) || ""
112
+ full_url = (protocol =~ WWW_REGEX ? "http://#{url}" : url)
113
+ "#{before}<a href=\"#{full_url}\"#{html_attrs}>#{url}</a>"
97
114
  end
98
115
  end
99
116
 
data/lib/regex.rb CHANGED
@@ -26,7 +26,7 @@ module Twitter
26
26
  ].flatten.freeze
27
27
  REGEXEN[:spaces] = Regexp.new(UNICODE_SPACES.collect{ |e| [e].pack 'U*' }.join('|'))
28
28
 
29
- REGEXEN[:extract_mentions] = /(^|[^a-zA-Z0-9_])[@@]([a-zA-Z0-9_]{1,20})/
29
+ REGEXEN[:extract_mentions] = /(^|[^a-zA-Z0-9_])[@@]([a-zA-Z0-9_]{1,20})(?!@)/
30
30
  REGEXEN[:extract_reply] = /^(?:#{REGEXEN[:spaces]})*[@@]([a-zA-Z0-9_]{1,20})/o
31
31
 
32
32
  REGEXEN[:list_name] = /^[a-zA-Z\x80-\xff].{0,79}$/
@@ -42,14 +42,14 @@ module Twitter
42
42
  REGEXEN[:auto_link_emoticon] = /(8\-\#|8\-E|\+\-\(|\`\@|\`O|\&lt;\|:~\(|\}:o\{|:\-\[|\&gt;o\&lt;|X\-\/|\[:-\]\-I\-|\/\/\/\/Ö\\\\\\\\|\(\|:\|\/\)|∑:\*\)|\( \| \))/
43
43
 
44
44
  # URL related hash regex collection
45
- REGEXEN[:valid_preceeding_chars] = /(?:[^\/"':!=]|^)/
46
- REGEXEN[:valid_domain] = /[a-z0-9\.-]+\.[a-z]{2,}(?::[0-9]+)?/i
47
- REGEXEN[:valid_url_path_chars] = /[a-z0-9!\*'\(\);:@&=\+\$\/%#\[\]\-_\.,~]/i
45
+ REGEXEN[:valid_preceeding_chars] = /(?:[^\/"':!=]|^|\:)/
46
+ REGEXEN[:valid_domain] = /(?:[\.-]|[^[:punct:]])+\.[a-z]{2,}(?::[0-9]+)?/i
47
+ REGEXEN[:valid_url_path_chars] = /[a-z0-9!\*'\(\);:&=\+\$\/%#\[\]\-_\.,~]/i
48
48
  # Valid end-of-path chracters (so /foo. does not gobble the period).
49
49
  # 1. Allow ) for Wikipedia URLs.
50
50
  # 2. Allow =&# for empty URL parameters and other URL-join artifacts
51
51
  REGEXEN[:valid_url_path_ending_chars] = /[a-z0-9\)=#\/]/i
52
- REGEXEN[:valid_url_query_chars] = /[a-z0-9!\*'\(\);:@&=\+\$\/%#\[\]\-_\.,~]/i
52
+ REGEXEN[:valid_url_query_chars] = /[a-z0-9!\*'\(\);:&=\+\$\/%#\[\]\-_\.,~]/i
53
53
  REGEXEN[:valid_url_query_ending_chars] = /[a-z0-9_&=#]/i
54
54
  REGEXEN[:valid_url] = %r{
55
55
  ( # $1 total match
@@ -71,4 +71,4 @@ module Twitter
71
71
  REGEXEN[key]
72
72
  end
73
73
  end
74
- end
74
+ end
@@ -346,6 +346,14 @@ describe Twitter::Autolink do
346
346
  end
347
347
  end
348
348
 
349
+ context "when preceded by a :" do
350
+ def original_text; "Check this out @hoverbird:#{url}"; end
351
+
352
+ it "should be linked" do
353
+ @autolinked_text.should have_autolinked_url(url)
354
+ end
355
+ end
356
+
349
357
  context "with a URL ending in allowed punctuation" do
350
358
  it "does not consume ending punctuation" do
351
359
  matcher = TestAutolink.new
@@ -358,7 +366,7 @@ describe Twitter::Autolink do
358
366
  context "with a URL preceded in forbidden characters" do
359
367
  it "should not be linked" do
360
368
  matcher = TestAutolink.new
361
- %w| \ ' / : ! = |.each do |char|
369
+ %w| \ ' / ! = |.each do |char|
362
370
  matcher.auto_link("#{char}#{url}").should_not have_autolinked_url(url)
363
371
  end
364
372
  end
@@ -424,4 +432,4 @@ describe Twitter::Autolink do
424
432
  end
425
433
  end
426
434
 
427
- end
435
+ end
@@ -108,8 +108,6 @@ describe Twitter::Extractor do
108
108
  "http://www.boingboing.net/2007/02/14/katamari_damacy_phon.html",
109
109
  "http://somehost.com:3000",
110
110
  "http://x.com/~matthew+%-x",
111
- "http://x.com/~matthew+%-,.;@:x",
112
- "http://x.com/,.;@:x",
113
111
  "http://en.wikipedia.org/wiki/Primer_(film)",
114
112
  "http://www.ams.org/bookstore-getitem/item=mbk-59",
115
113
  "http://chilp.it/?77e8fd",
data/spec/regex_spec.rb CHANGED
@@ -11,11 +11,15 @@ describe "Twitter::Regex regular expressions" do
11
11
  "http://www.boingboing.net/2007/02/14/katamari_damacy_phon.html",
12
12
  "http://somehost.com:3000",
13
13
  "http://x.com/~matthew+%-x",
14
- "http://x.com/~matthew+%-,.;@:x",
15
- "http://x.com/,.;@:x",
16
14
  "http://en.wikipedia.org/wiki/Primer_(film)",
17
15
  "http://www.ams.org/bookstore-getitem/item=mbk-59",
18
16
  "http://chilp.it/?77e8fd",
17
+ "www.foobar.com",
18
+ "WWW.FOOBAR.COM",
19
+ "http://tell.me/why",
20
+ "http://longtlds.mobi",
21
+ "http://✪df.ws/ejp",
22
+ "http://日本.com"
19
23
  ]
20
24
 
21
25
  @urls.each do |url|
@@ -31,14 +35,20 @@ describe "Twitter::Regex regular expressions" do
31
35
  end
32
36
 
33
37
  describe "invalid URLS" do
34
- it "does not link urls with invalid_domains" do
38
+ it "does not link urls with invalid characters" do
35
39
  [ "http://doman-dash_2314352345_dfasd.foo-cow_4352.com",
36
40
  "http://no-tld",
37
41
  "http://tld-too-short.x",
38
42
  "http://x.com/,,,/.../@@@/;;;/:::/---/%%%x",
39
- "http://doman-dash_2314352345_dfasd.foo-cow_4352.com",
43
+ "http://doman_dash_2314352345_dfasd.foo-cow_4352.com",
40
44
  ].each {|url| url.should_not have_autolinked_url(url)}
41
45
  end
46
+
47
+ it "does not link domains beginning with a hypen" do
48
+ pending
49
+ "http://-doman_dash_2314352345_dfasd.com".should_not match_autolink_expression
50
+ end
51
+
42
52
  end
43
53
 
44
54
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: twitter-text
3
3
  version: !ruby/object:Gem::Version
4
- version: "1.0"
4
+ version: 1.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Matt Sanford
@@ -9,11 +9,11 @@ autorequire: ""
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2010-01-27 00:00:00 -08:00
12
+ date: 2010-02-10 00:00:00 -08:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
- name: action_view
16
+ name: actionpack
17
17
  type: :runtime
18
18
  version_requirement:
19
19
  version_requirements: !ruby/object:Gem::Requirement