twitter-text 1.2.4 → 1.2.5
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +6 -0
- data/.gitmodules +3 -0
- data/.rspec +2 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +64 -0
- data/Rakefile +8 -60
- data/lib/autolink.rb +9 -3
- data/lib/extractor.rb +34 -22
- data/lib/hithighlighter.rb +0 -1
- data/lib/regex.rb +8 -7
- data/lib/twitter-text.rb +4 -7
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/spec/autolinking_spec.rb +7 -7
- data/spec/spec_helper.rb +22 -16
- data/spec/test_urls.rb +2 -2
- data/spec/twitter_text_spec.rb +20 -0
- data/test/conformance_test.rb +126 -0
- data/twitter-text.gemspec +23 -0
- metadata +84 -9
data/.gitignore
ADDED
data/.gitmodules
ADDED
data/.rspec
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
twitter-text (1.2.5)
|
5
|
+
actionpack
|
6
|
+
|
7
|
+
GEM
|
8
|
+
remote: http://rubygems.org/
|
9
|
+
specs:
|
10
|
+
abstract (1.0.0)
|
11
|
+
actionpack (3.0.1)
|
12
|
+
activemodel (= 3.0.1)
|
13
|
+
activesupport (= 3.0.1)
|
14
|
+
builder (~> 2.1.2)
|
15
|
+
erubis (~> 2.6.6)
|
16
|
+
i18n (~> 0.4.1)
|
17
|
+
rack (~> 1.2.1)
|
18
|
+
rack-mount (~> 0.6.12)
|
19
|
+
rack-test (~> 0.5.4)
|
20
|
+
tzinfo (~> 0.3.23)
|
21
|
+
activemodel (3.0.1)
|
22
|
+
activesupport (= 3.0.1)
|
23
|
+
builder (~> 2.1.2)
|
24
|
+
i18n (~> 0.4.1)
|
25
|
+
activesupport (3.0.1)
|
26
|
+
builder (2.1.2)
|
27
|
+
diff-lcs (1.1.2)
|
28
|
+
erubis (2.6.6)
|
29
|
+
abstract (>= 1.0.0)
|
30
|
+
i18n (0.4.2)
|
31
|
+
nokogiri (1.4.3.1)
|
32
|
+
nokogiri (1.4.3.1-java)
|
33
|
+
weakling (>= 0.0.3)
|
34
|
+
rack (1.2.1)
|
35
|
+
rack-mount (0.6.13)
|
36
|
+
rack (>= 1.0.0)
|
37
|
+
rack-test (0.5.6)
|
38
|
+
rack (>= 1.0)
|
39
|
+
rake (0.8.7)
|
40
|
+
rspec (2.1.0)
|
41
|
+
rspec-core (~> 2.1.0)
|
42
|
+
rspec-expectations (~> 2.1.0)
|
43
|
+
rspec-mocks (~> 2.1.0)
|
44
|
+
rspec-core (2.1.0)
|
45
|
+
rspec-expectations (2.1.0)
|
46
|
+
diff-lcs (~> 1.1.2)
|
47
|
+
rspec-mocks (2.1.0)
|
48
|
+
simplecov (0.3.7)
|
49
|
+
simplecov-html (>= 0.3.7)
|
50
|
+
simplecov-html (0.3.9)
|
51
|
+
tzinfo (0.3.23)
|
52
|
+
weakling (0.0.4-java)
|
53
|
+
|
54
|
+
PLATFORMS
|
55
|
+
java
|
56
|
+
ruby
|
57
|
+
|
58
|
+
DEPENDENCIES
|
59
|
+
actionpack
|
60
|
+
nokogiri
|
61
|
+
rake
|
62
|
+
rspec
|
63
|
+
simplecov
|
64
|
+
twitter-text!
|
data/Rakefile
CHANGED
@@ -1,50 +1,13 @@
|
|
1
|
-
require '
|
2
|
-
|
3
|
-
require 'rake/rdoctask'
|
4
|
-
require 'rubygems/specification'
|
5
|
-
require 'date'
|
6
|
-
|
7
|
-
gem 'rspec'
|
8
|
-
require 'spec/rake/spectask'
|
9
|
-
require 'spec/rake/verify_rcov'
|
10
|
-
require 'digest'
|
11
|
-
|
12
|
-
spec = Gem::Specification.new do |s|
|
13
|
-
s.name = "twitter-text"
|
14
|
-
s.version = "1.2.4"
|
15
|
-
s.authors = ["Matt Sanford", "Patrick Ewing", "Ben Cherry", "Britt Selvitelle", "Raffi Krikorian"]
|
16
|
-
s.email = ["matt@twitter.com", "patrick.henry.ewing@gmail.com", "bcherry@gmail.com", "bs@brittspace.com", "raffi@twitter.com"]
|
17
|
-
s.homepage = "http://twitter.com"
|
18
|
-
s.description = s.summary = "A gem that provides text handling for Twitter"
|
19
|
-
|
20
|
-
s.platform = Gem::Platform::RUBY
|
21
|
-
s.has_rdoc = true
|
22
|
-
s.summary = "Twitter text handling library"
|
23
|
-
|
24
|
-
s.add_dependency "actionpack"
|
1
|
+
require 'bundler'
|
2
|
+
Bundler::GemHelper.install_tasks
|
25
3
|
|
26
|
-
|
27
|
-
s.autorequire = ''
|
28
|
-
s.files = %w(LICENSE README.rdoc Rakefile TODO) + Dir.glob("{lib,spec}/**/*")
|
29
|
-
end
|
30
|
-
|
31
|
-
task :default => :spec
|
32
|
-
|
33
|
-
desc "Run specs"
|
34
|
-
Spec::Rake::SpecTask.new do |t|
|
35
|
-
t.spec_files = FileList['spec/**/*_spec.rb']
|
36
|
-
t.spec_opts = %w(-fs --color)
|
37
|
-
t.libs << ["spec", '.']
|
38
|
-
end
|
4
|
+
task :default => ["spec", "test:conformance"]
|
39
5
|
|
40
|
-
|
41
|
-
|
42
|
-
t.spec_files = FileList['spec/**/*.rb']
|
43
|
-
t.rcov = true
|
44
|
-
t.rcov_opts = ['--exclude', 'spec']
|
45
|
-
end
|
6
|
+
require 'rspec/core/rake_task'
|
7
|
+
RSpec::Core::RakeTask.new(:spec)
|
46
8
|
|
47
9
|
def conformance_version(dir)
|
10
|
+
require 'digest'
|
48
11
|
Dir[File.join(dir, '*')].inject(Digest::SHA1.new){|digest, file| digest.update(Digest::SHA1.file(file).hexdigest) }
|
49
12
|
end
|
50
13
|
|
@@ -85,6 +48,7 @@ namespace :test do
|
|
85
48
|
end
|
86
49
|
end
|
87
50
|
|
51
|
+
require 'rake/rdoctask'
|
88
52
|
namespace :doc do
|
89
53
|
Rake::RDocTask.new do |rd|
|
90
54
|
rd.main = "README.rdoc"
|
@@ -93,22 +57,6 @@ namespace :doc do
|
|
93
57
|
end
|
94
58
|
end
|
95
59
|
|
96
|
-
|
97
|
-
pkg.gem_spec = spec
|
98
|
-
end
|
99
|
-
|
100
|
-
desc "install the gem locally"
|
101
|
-
task :install => [:package] do
|
102
|
-
sh %{sudo gem install pkg/#{GEM}-#{GEM_VERSION}}
|
103
|
-
end
|
104
|
-
|
105
|
-
desc "create a gemspec file"
|
106
|
-
task :make_spec do
|
107
|
-
File.open("#{GEM}.gemspec", "w") do |file|
|
108
|
-
file.puts spec.to_ruby
|
109
|
-
end
|
110
|
-
end
|
111
|
-
|
112
|
-
desc "runs cruise control build"
|
60
|
+
desc "Run cruise control build"
|
113
61
|
task :cruise => [:spec, 'test:conformance'] do
|
114
62
|
end
|
data/lib/autolink.rb
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
|
2
1
|
module Twitter
|
3
2
|
# A module for including Tweet auto-linking in a class. The primary use of this is for helpers/views so they can auto-link
|
4
3
|
# usernames, lists, hashtags and URLs.
|
@@ -140,10 +139,17 @@ module Twitter
|
|
140
139
|
|
141
140
|
text.gsub(Twitter::Regex[:valid_url]) do
|
142
141
|
all, before, url, protocol, domain, path, query_string = $1, $2, $3, $4, $5, $6, $7
|
143
|
-
if !protocol.blank? || domain =~ Twitter::Regex[:
|
142
|
+
if !protocol.blank? # || domain =~ Twitter::Regex[:probable_tld_domain]
|
144
143
|
html_attrs = tag_options(options.stringify_keys) || ""
|
145
144
|
full_url = ((protocol =~ Twitter::Regex[:www] || protocol.blank?) ? "http://#{url}" : url)
|
146
145
|
"#{before}<a href=\"#{html_escape(full_url)}\"#{html_attrs}>#{html_escape(url)}</a>"
|
146
|
+
elsif all =~ Twitter::Regex[:probable_tld_domain]
|
147
|
+
before_tld, tld_domain = $1, $2
|
148
|
+
|
149
|
+
html_attrs = tag_options(options.stringify_keys) || ""
|
150
|
+
full_url = "http://#{tld_domain}"
|
151
|
+
prefix = (before_tld == before ? before : "#{before}#{before_tld}")
|
152
|
+
"#{prefix}<a href=\"#{html_escape(full_url)}\"#{html_attrs}>#{html_escape(tld_domain)}</a>"
|
147
153
|
else
|
148
154
|
all
|
149
155
|
end
|
@@ -151,4 +157,4 @@ module Twitter
|
|
151
157
|
end
|
152
158
|
|
153
159
|
end
|
154
|
-
end
|
160
|
+
end
|
data/lib/extractor.rb
CHANGED
@@ -20,18 +20,23 @@ class String
|
|
20
20
|
char_array
|
21
21
|
end
|
22
22
|
end
|
23
|
+
end
|
23
24
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
index(sub_str, position)
|
25
|
+
# Helper functions to return character offsets instead of byte offsets.
|
26
|
+
class MatchData
|
27
|
+
def char_begin(n)
|
28
|
+
if string.respond_to? :codepoints
|
29
|
+
self.begin(n)
|
30
30
|
else
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
31
|
+
string[0, self.begin(n)].char_length
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def char_end(n)
|
36
|
+
if string.respond_to? :codepoints
|
37
|
+
self.end(n)
|
38
|
+
else
|
39
|
+
string[0, self.end(n)].char_length
|
35
40
|
end
|
36
41
|
end
|
37
42
|
end
|
@@ -63,14 +68,14 @@ module Twitter
|
|
63
68
|
return [] unless text
|
64
69
|
|
65
70
|
possible_screen_names = []
|
66
|
-
position = 0
|
67
71
|
text.to_s.scan(Twitter::Regex[:extract_mentions]) do |before, sn, after|
|
72
|
+
extract_mentions_match_data = $~
|
68
73
|
unless after =~ Twitter::Regex[:end_screen_name_match]
|
69
|
-
start_position =
|
70
|
-
|
74
|
+
start_position = extract_mentions_match_data.char_begin(2) - 1
|
75
|
+
end_position = extract_mentions_match_data.char_end(2)
|
71
76
|
possible_screen_names << {
|
72
77
|
:screen_name => sn,
|
73
|
-
:indices => [start_position,
|
78
|
+
:indices => [start_position, end_position]
|
74
79
|
}
|
75
80
|
end
|
76
81
|
end
|
@@ -117,14 +122,22 @@ module Twitter
|
|
117
122
|
urls = []
|
118
123
|
position = 0
|
119
124
|
text.to_s.scan(Twitter::Regex[:valid_url]) do |all, before, url, protocol, domain, path, query|
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
125
|
+
valid_url_match_data = $~
|
126
|
+
if !protocol.blank?
|
127
|
+
start_position = valid_url_match_data.char_begin(3)
|
128
|
+
end_position = valid_url_match_data.char_end(3)
|
124
129
|
urls << {
|
125
130
|
:url => ((protocol =~ Twitter::Regex[:www] || protocol.blank?) ? "http://#{url}" : url),
|
126
131
|
:indices => [start_position, end_position]
|
127
132
|
}
|
133
|
+
elsif all =~ Twitter::Regex[:probable_tld_domain]
|
134
|
+
tld_domain = $2
|
135
|
+
start_position = valid_url_match_data.char_begin(1) + $~.char_begin(2)
|
136
|
+
end_position = valid_url_match_data.char_begin(1) + $~.char_end(2)
|
137
|
+
urls << {
|
138
|
+
:url => "http://#{tld_domain}",
|
139
|
+
:indices => [start_position, end_position]
|
140
|
+
}
|
128
141
|
end
|
129
142
|
end
|
130
143
|
urls.each{|url| yield url[:url], url[:indices].first, url[:indices].last } if block_given?
|
@@ -153,13 +166,12 @@ module Twitter
|
|
153
166
|
return [] unless text
|
154
167
|
|
155
168
|
tags = []
|
156
|
-
position = 0
|
157
169
|
text.scan(Twitter::Regex[:auto_link_hashtags]) do |before, hash, hash_text|
|
158
|
-
start_position =
|
159
|
-
|
170
|
+
start_position = $~.char_begin(2)
|
171
|
+
end_position = $~.char_end(3)
|
160
172
|
tags << {
|
161
173
|
:hashtag => hash_text,
|
162
|
-
:indices => [start_position,
|
174
|
+
:indices => [start_position, end_position]
|
163
175
|
}
|
164
176
|
end
|
165
177
|
tags.each{|tag| yield tag[:hashtag], tag[:indices].first, tag[:indices].last } if block_given?
|
data/lib/hithighlighter.rb
CHANGED
data/lib/regex.rb
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
# encoding: utf-8
|
2
|
-
|
3
2
|
module Twitter
|
4
3
|
# A collection of regular expressions for parsing Tweet text. The regular expression
|
5
4
|
# list is frozen at load time to ensure immutability. These reular expressions are
|
@@ -31,15 +30,17 @@ module Twitter
|
|
31
30
|
REGEXEN[:extract_mentions] = /(^|[^a-zA-Z0-9_])#{REGEXEN[:at_signs]}([a-zA-Z0-9_]{1,20})(?=(.|$))/o
|
32
31
|
REGEXEN[:extract_reply] = /^(?:#{REGEXEN[:spaces]})*#{REGEXEN[:at_signs]}([a-zA-Z0-9_]{1,20})/o
|
33
32
|
|
34
|
-
major, minor, patch = RUBY_VERSION.split(
|
35
|
-
if major.to_i >= 1 && minor.to_i >= 9
|
33
|
+
major, minor, patch = RUBY_VERSION.split('.')
|
34
|
+
if major.to_i >= 2 || major.to_i == 1 && minor.to_i >= 9 || (defined?(RUBY_ENGINE) && ["jruby", "rbx"].include?(RUBY_ENGINE))
|
36
35
|
REGEXEN[:list_name] = /[a-zA-Z][a-zA-Z0-9_\-\u0080-\u00ff]{0,24}/
|
37
36
|
else
|
38
|
-
# This line barfs at compile time in Ruby 1.9.
|
37
|
+
# This line barfs at compile time in Ruby 1.9, JRuby, or Rubinius.
|
39
38
|
REGEXEN[:list_name] = eval("/[a-zA-Z][a-zA-Z0-9_\\-\x80-\xff]{0,24}/")
|
40
39
|
end
|
41
40
|
|
42
|
-
# Latin accented characters
|
41
|
+
# Latin accented characters
|
42
|
+
# Excludes 0xd7 from the range (the multiplication sign, confusable with "x").
|
43
|
+
# Also excludes 0xf7, the division sign
|
43
44
|
LATIN_ACCENTS = [(0xc0..0xd6).to_a, (0xd8..0xf6).to_a, (0xf8..0xff).to_a].flatten.pack('U*').freeze
|
44
45
|
REGEXEN[:latin_accents] = /[#{LATIN_ACCENTS}]+/o
|
45
46
|
|
@@ -47,7 +48,7 @@ module Twitter
|
|
47
48
|
|
48
49
|
# Characters considered valid in a hashtag but not at the beginning, where only a-z and 0-9 are valid.
|
49
50
|
HASHTAG_CHARACTERS = /[a-z0-9_#{LATIN_ACCENTS}]/io
|
50
|
-
REGEXEN[:auto_link_hashtags] = /(^|[^0-9A-Z&\/\?]+)(#|#)([0-
|
51
|
+
REGEXEN[:auto_link_hashtags] = /(^|[^0-9A-Z&\/\?]+)(#|#)([0-9a-z_]*[a-z_]+#{HASHTAG_CHARACTERS}*)/io
|
51
52
|
REGEXEN[:auto_link_usernames_or_lists] = /([^a-zA-Z0-9_]|^|RT:?)([@@]+)([a-zA-Z0-9_]{1,20})(\/[a-zA-Z][a-zA-Z0-9_\-]{0,24})?/o
|
52
53
|
REGEXEN[:auto_link_emoticon] = /(8\-\#|8\-E|\+\-\(|\`\@|\`O|\<\|:~\(|\}:o\{|:\-\[|\>o\<|X\-\/|\[:-\]\-I\-|\/\/\/\/Ö\\\\\\\\|\(\|:\|\/\)|∑:\*\)|\( \| \))/
|
53
54
|
|
@@ -56,7 +57,7 @@ module Twitter
|
|
56
57
|
REGEXEN[:valid_domain] = /(?:[^[:punct:]\s][\.-](?=[^[:punct:]\s])|[^[:punct:]\s]){1,}\.[a-z]{2,}(?::[0-9]+)?/i
|
57
58
|
|
58
59
|
# For protocol-less URLs, we'll accept them if they end in one of a handful of likely TLDs
|
59
|
-
REGEXEN[:
|
60
|
+
REGEXEN[:probable_tld_domain] = /^(.*?)((?:[a-z0-9_\.\-]+)\.(?:com|net|org|gov|edu))$/i
|
60
61
|
|
61
62
|
REGEXEN[:www] = /www\./i
|
62
63
|
|
data/lib/twitter-text.rb
CHANGED
@@ -1,15 +1,12 @@
|
|
1
|
-
|
2
1
|
major, minor, patch = RUBY_VERSION.split('.')
|
3
2
|
|
4
|
-
if major == 1 && minor < 9
|
3
|
+
if major.to_i == 1 && minor.to_i < 9
|
5
4
|
# Ruby 1.8 KCODE check. Not needed on 1.9 and later.
|
6
|
-
raise("twitter-text requires the $KCODE variable be set to 'UTF8' or 'u'") unless [
|
5
|
+
raise("twitter-text requires the $KCODE variable be set to 'UTF8' or 'u'") unless $KCODE[0].chr =~ /u/i
|
7
6
|
end
|
8
7
|
|
9
|
-
|
10
|
-
|
11
|
-
# Needed for auto-linking
|
12
|
-
gem 'actionpack'
|
8
|
+
# External libraries required. (for gems, use: ruby -rubygems ...)
|
9
|
+
require 'action_pack'
|
13
10
|
require 'action_view'
|
14
11
|
|
15
12
|
require File.join(File.dirname(__FILE__), 'regex')
|
data/script/destroy
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'rubigen'
|
6
|
+
rescue LoadError
|
7
|
+
require 'rubygems'
|
8
|
+
require 'rubigen'
|
9
|
+
end
|
10
|
+
require 'rubigen/scripts/destroy'
|
11
|
+
|
12
|
+
ARGV.shift if ['--help', '-h'].include?(ARGV[0])
|
13
|
+
RubiGen::Base.use_component_sources! [:newgem_simple, :test_unit]
|
14
|
+
RubiGen::Scripts::Destroy.new.run(ARGV)
|
data/script/generate
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'rubigen'
|
6
|
+
rescue LoadError
|
7
|
+
require 'rubygems'
|
8
|
+
require 'rubigen'
|
9
|
+
end
|
10
|
+
require 'rubigen/scripts/generate'
|
11
|
+
|
12
|
+
ARGV.shift if ['--help', '-h'].include?(ARGV[0])
|
13
|
+
RubiGen::Base.use_component_sources! [:newgem_simple, :test_unit]
|
14
|
+
RubiGen::Scripts::Generate.new.run(ARGV)
|
data/spec/autolinking_spec.rb
CHANGED
@@ -36,7 +36,7 @@ describe Twitter::Autolink do
|
|
36
36
|
def original_text; "meet@the beach"; end
|
37
37
|
|
38
38
|
it "should not be linked" do
|
39
|
-
|
39
|
+
Nokogiri::HTML(@autolinked_text).search('a').should be_blank
|
40
40
|
end
|
41
41
|
end
|
42
42
|
|
@@ -132,7 +132,7 @@ describe Twitter::Autolink do
|
|
132
132
|
def original_text; "hello @/my-list"; end
|
133
133
|
|
134
134
|
it "should NOT be linked" do
|
135
|
-
|
135
|
+
Nokogiri::HTML(@autolinked_text).search('a').should be_blank
|
136
136
|
end
|
137
137
|
end
|
138
138
|
|
@@ -148,7 +148,7 @@ describe Twitter::Autolink do
|
|
148
148
|
def original_text; "meet@the/beach"; end
|
149
149
|
|
150
150
|
it "should not be linked" do
|
151
|
-
|
151
|
+
Nokogiri::HTML(@autolinked_text).search('a').should be_blank
|
152
152
|
end
|
153
153
|
end
|
154
154
|
|
@@ -304,9 +304,9 @@ describe Twitter::Autolink do
|
|
304
304
|
def original_text; "#{[0xFF03].pack('U')}twj_dev"; end
|
305
305
|
|
306
306
|
it "should be linked" do
|
307
|
-
link =
|
307
|
+
link = Nokogiri::HTML(@autolinked_text).search('a')
|
308
308
|
(link.inner_text.respond_to?(:force_encoding) ? link.inner_text.force_encoding("utf-8") : link.inner_text).should == "#{[0xFF03].pack('U')}twj_dev"
|
309
|
-
link['href'].should == 'http://twitter.com/search?q=%23twj_dev'
|
309
|
+
link.first['href'].should == 'http://twitter.com/search?q=%23twj_dev'
|
310
310
|
end
|
311
311
|
end
|
312
312
|
|
@@ -468,9 +468,9 @@ describe Twitter::Autolink do
|
|
468
468
|
def original_text; "I like www.foobar.com dudes"; end
|
469
469
|
|
470
470
|
it "links to the original text with the full href" do
|
471
|
-
link =
|
471
|
+
link = Nokogiri::HTML(@autolinked_text).search('a')
|
472
472
|
link.inner_text.should == 'www.foobar.com'
|
473
|
-
link['href'].should == 'http://www.foobar.com'
|
473
|
+
link.first['href'].should == 'http://www.foobar.com'
|
474
474
|
end
|
475
475
|
end
|
476
476
|
|
data/spec/spec_helper.rb
CHANGED
@@ -1,21 +1,27 @@
|
|
1
1
|
$TESTING=true
|
2
|
+
$KCODE='u'
|
2
3
|
$:.push File.join(File.dirname(__FILE__), '..', 'lib')
|
3
4
|
|
4
|
-
require '
|
5
|
-
require '
|
6
|
-
|
5
|
+
require 'nokogiri'
|
6
|
+
require 'simplecov'
|
7
|
+
SimpleCov.start do
|
8
|
+
add_group 'Libraries', 'lib'
|
9
|
+
end
|
10
|
+
|
11
|
+
require File.expand_path('../../lib/twitter-text', __FILE__)
|
12
|
+
require File.expand_path('../test_urls', __FILE__)
|
7
13
|
|
8
|
-
|
14
|
+
RSpec.configure do |config|
|
9
15
|
config.include TestUrls
|
10
16
|
end
|
11
17
|
|
12
|
-
|
18
|
+
RSpec::Matchers.define :match_autolink_expression do
|
13
19
|
match do |string|
|
14
20
|
Twitter::Regex[:valid_url].match(string)
|
15
21
|
end
|
16
22
|
end
|
17
23
|
|
18
|
-
|
24
|
+
RSpec::Matchers.define :match_autolink_expression_in do |text|
|
19
25
|
match do |url|
|
20
26
|
@match_data = Twitter::Regex[:valid_url].match(text)
|
21
27
|
@match_data && @match_data.to_s.strip == url
|
@@ -26,9 +32,9 @@ Spec::Matchers.define :match_autolink_expression_in do |text|
|
|
26
32
|
end
|
27
33
|
end
|
28
34
|
|
29
|
-
|
35
|
+
RSpec::Matchers.define :have_autolinked_url do |url|
|
30
36
|
match do |text|
|
31
|
-
@link =
|
37
|
+
@link = Nokogiri::HTML(text).search("a[@href='#{url}']")
|
32
38
|
@link &&
|
33
39
|
@link.inner_text &&
|
34
40
|
@link.inner_text == url
|
@@ -39,10 +45,10 @@ Spec::Matchers.define :have_autolinked_url do |url|
|
|
39
45
|
end
|
40
46
|
end
|
41
47
|
|
42
|
-
|
48
|
+
RSpec::Matchers.define :link_to_screen_name do |screen_name|
|
43
49
|
match do |text|
|
44
|
-
@link =
|
45
|
-
@link && @link.inner_text == screen_name && "http://twitter.com/#{screen_name}".downcase.should == @link['href']
|
50
|
+
@link = Nokogiri::HTML(text).search("a.username")
|
51
|
+
@link && @link.inner_text == screen_name && "http://twitter.com/#{screen_name}".downcase.should == @link.first['href']
|
46
52
|
end
|
47
53
|
|
48
54
|
failure_message_for_should do |text|
|
@@ -58,10 +64,10 @@ Spec::Matchers.define :link_to_screen_name do |screen_name|
|
|
58
64
|
end
|
59
65
|
end
|
60
66
|
|
61
|
-
|
67
|
+
RSpec::Matchers.define :link_to_list_path do |list_path|
|
62
68
|
match do |text|
|
63
|
-
@link =
|
64
|
-
!@link.nil? && @link.inner_text == list_path && "http://twitter.com/#{list_path}".downcase.should == @link['href']
|
69
|
+
@link = Nokogiri::HTML(text).search("a.list-slug")
|
70
|
+
!@link.nil? && @link.inner_text == list_path && "http://twitter.com/#{list_path}".downcase.should == @link.first['href']
|
65
71
|
end
|
66
72
|
|
67
73
|
failure_message_for_should do |text|
|
@@ -77,9 +83,9 @@ Spec::Matchers.define :link_to_list_path do |list_path|
|
|
77
83
|
end
|
78
84
|
end
|
79
85
|
|
80
|
-
|
86
|
+
RSpec::Matchers.define :have_autolinked_hashtag do |hashtag|
|
81
87
|
match do |text|
|
82
|
-
@link =
|
88
|
+
@link = Nokogiri::HTML(text).search("a[@href='http://twitter.com/search?q=#{CGI.escape hashtag}']")
|
83
89
|
@link &&
|
84
90
|
@link.inner_text &&
|
85
91
|
@link.inner_text == hashtag
|
data/spec/test_urls.rb
CHANGED
@@ -23,12 +23,12 @@ module TestUrls
|
|
23
23
|
"http://x.com/has/one/char/domain",
|
24
24
|
"http://t.co/nwcLTFF",
|
25
25
|
# "t.co/nwcLTFF"
|
26
|
-
]
|
26
|
+
] unless defined?(TestUrls::VALID)
|
27
27
|
|
28
28
|
INVALID = [
|
29
29
|
"http://no-tld",
|
30
30
|
"http://tld-too-short.x",
|
31
31
|
"http://-doman_dash.com"
|
32
|
-
]
|
32
|
+
] unless defined?(TestUrls::INVALID)
|
33
33
|
|
34
34
|
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/spec_helper'
|
2
|
+
|
3
|
+
major, minor, patch = RUBY_VERSION.split('.')
|
4
|
+
if major.to_i == 1 && minor.to_i < 9
|
5
|
+
describe "base" do
|
6
|
+
before do
|
7
|
+
$KCODE = 'NONE'
|
8
|
+
end
|
9
|
+
|
10
|
+
after do
|
11
|
+
$KCODE = 'u'
|
12
|
+
end
|
13
|
+
|
14
|
+
it "should raise with invalid KCODE on Ruby < 1.9" do
|
15
|
+
lambda do
|
16
|
+
require 'twitter-text'
|
17
|
+
end.should raise_error
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,126 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'yaml'
|
3
|
+
$KCODE = 'UTF8'
|
4
|
+
require File.dirname(__FILE__) + '/../lib/twitter-text'
|
5
|
+
|
6
|
+
class ConformanceTest < Test::Unit::TestCase
|
7
|
+
include Twitter::Extractor
|
8
|
+
include Twitter::Autolink
|
9
|
+
include Twitter::HitHighlighter
|
10
|
+
|
11
|
+
def setup
|
12
|
+
@conformance_dir = ENV['CONFORMANCE_DIR'] || File.join(File.dirname(__FILE__), 'twitter-text-conformance')
|
13
|
+
end
|
14
|
+
|
15
|
+
module ExtractorConformance
|
16
|
+
def test_replies_extractor_conformance
|
17
|
+
run_conformance_test(File.join(@conformance_dir, 'extract.yml'), :replies) do |description, expected, input|
|
18
|
+
assert_equal expected, extract_reply_screen_name(input), description
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_mentions_extractor_conformance
|
23
|
+
run_conformance_test(File.join(@conformance_dir, 'extract.yml'), :mentions) do |description, expected, input|
|
24
|
+
assert_equal expected, extract_mentioned_screen_names(input), description
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_mentions_with_indices_extractor_conformance
|
29
|
+
run_conformance_test(File.join(@conformance_dir, 'extract.yml'), :mentions_with_indices) do |description, expected, input|
|
30
|
+
expected = expected.map{|elem| elem.symbolize_keys }
|
31
|
+
assert_equal expected, extract_mentioned_screen_names_with_indices(input), description
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_url_extractor_conformance
|
36
|
+
run_conformance_test(File.join(@conformance_dir, 'extract.yml'), :urls) do |description, expected, input|
|
37
|
+
assert_equal expected, extract_urls(input), description
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def test_urls_with_indices_extractor_conformance
|
42
|
+
run_conformance_test(File.join(@conformance_dir, 'extract.yml'), :urls_with_indices) do |description, expected, input|
|
43
|
+
expected = expected.map{|elem| elem.symbolize_keys }
|
44
|
+
assert_equal expected, extract_urls_with_indices(input), description
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def test_hashtag_extractor_conformance
|
49
|
+
run_conformance_test(File.join(@conformance_dir, 'extract.yml'), :hashtags) do |description, expected, input|
|
50
|
+
assert_equal expected, extract_hashtags(input), description
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def test_hashtags_with_indices_extractor_conformance
|
55
|
+
run_conformance_test(File.join(@conformance_dir, 'extract.yml'), :hashtags_with_indices) do |description, expected, input|
|
56
|
+
expected = expected.map{|elem| elem.symbolize_keys }
|
57
|
+
assert_equal expected, extract_hashtags_with_indices(input), description
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
include ExtractorConformance
|
62
|
+
|
63
|
+
module AutolinkConformance
|
64
|
+
def test_users_autolink_conformance
|
65
|
+
run_conformance_test(File.join(@conformance_dir, 'autolink.yml'), :usernames) do |description, expected, input|
|
66
|
+
assert_equal expected, auto_link_usernames_or_lists(input, :suppress_no_follow => true), description
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def test_lists_autolink_conformance
|
71
|
+
run_conformance_test(File.join(@conformance_dir, 'autolink.yml'), :lists) do |description, expected, input|
|
72
|
+
assert_equal expected, auto_link_usernames_or_lists(input, :suppress_no_follow => true), description
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def test_urls_autolink_conformance
|
77
|
+
run_conformance_test(File.join(@conformance_dir, 'autolink.yml'), :urls) do |description, expected, input|
|
78
|
+
assert_equal expected, auto_link_urls_custom(input, :suppress_no_follow => true), description
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def test_hashtags_autolink_conformance
|
83
|
+
run_conformance_test(File.join(@conformance_dir, 'autolink.yml'), :hashtags) do |description, expected, input|
|
84
|
+
assert_equal expected, auto_link_hashtags(input, :suppress_no_follow => true), description
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def test_all_autolink_conformance
|
89
|
+
run_conformance_test(File.join(@conformance_dir, 'autolink.yml'), :all) do |description, expected, input|
|
90
|
+
assert_equal expected, auto_link(input, :suppress_no_follow => true), description
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
include AutolinkConformance
|
95
|
+
|
96
|
+
module HitHighlighterConformance
|
97
|
+
|
98
|
+
def test_plain_text_conformance
|
99
|
+
run_conformance_test(File.join(@conformance_dir, 'hit_highlighting.yml'), :plain_text, true) do |config|
|
100
|
+
assert_equal config['expected'], hit_highlight(config['text'], config['hits']), config['description']
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def test_with_links_conformance
|
105
|
+
run_conformance_test(File.join(@conformance_dir, 'hit_highlighting.yml'), :with_links, true) do |config|
|
106
|
+
assert_equal config['expected'], hit_highlight(config['text'], config['hits']), config['description']
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
include HitHighlighterConformance
|
111
|
+
|
112
|
+
private
|
113
|
+
|
114
|
+
def run_conformance_test(file, test_type, hash_config = false, &block)
|
115
|
+
yaml = YAML.load_file(file)
|
116
|
+
assert yaml["tests"][test_type.to_s], "No such test suite: #{test_type.to_s}"
|
117
|
+
|
118
|
+
yaml["tests"][test_type.to_s].each do |test_info|
|
119
|
+
if hash_config
|
120
|
+
yield test_info
|
121
|
+
else
|
122
|
+
yield test_info['description'], test_info['expected'], test_info['text']
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
spec = Gem::Specification.new do |s|
|
2
|
+
s.name = "twitter-text"
|
3
|
+
s.version = "1.2.5"
|
4
|
+
s.authors = ["Matt Sanford", "Patrick Ewing", "Ben Cherry", "Britt Selvitelle", "Raffi Krikorian"]
|
5
|
+
s.email = ["matt@twitter.com", "patrick.henry.ewing@gmail.com", "bcherry@gmail.com", "bs@brittspace.com", "raffi@twitter.com"]
|
6
|
+
s.homepage = "http://twitter.com"
|
7
|
+
s.description = s.summary = "A gem that provides text handling for Twitter"
|
8
|
+
|
9
|
+
s.platform = Gem::Platform::RUBY
|
10
|
+
s.has_rdoc = true
|
11
|
+
s.summary = "Twitter text handling library"
|
12
|
+
|
13
|
+
s.add_development_dependency "nokogiri"
|
14
|
+
s.add_development_dependency "rake"
|
15
|
+
s.add_development_dependency "rspec"
|
16
|
+
s.add_development_dependency "simplecov"
|
17
|
+
s.add_runtime_dependency "actionpack"
|
18
|
+
|
19
|
+
s.files = `git ls-files`.split("\n")
|
20
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
21
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
22
|
+
s.require_paths = ["lib"]
|
23
|
+
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twitter-text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 21
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 1
|
8
8
|
- 2
|
9
|
-
-
|
10
|
-
version: 1.2.
|
9
|
+
- 5
|
10
|
+
version: 1.2.5
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Matt Sanford
|
@@ -15,15 +15,15 @@ authors:
|
|
15
15
|
- Ben Cherry
|
16
16
|
- Britt Selvitelle
|
17
17
|
- Raffi Krikorian
|
18
|
-
autorequire:
|
18
|
+
autorequire:
|
19
19
|
bindir: bin
|
20
20
|
cert_chain: []
|
21
21
|
|
22
|
-
date: 2010-11-
|
22
|
+
date: 2010-11-18 00:00:00 -08:00
|
23
23
|
default_executable:
|
24
24
|
dependencies:
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
|
-
name:
|
26
|
+
name: nokogiri
|
27
27
|
prerelease: false
|
28
28
|
requirement: &id001 !ruby/object:Gem::Requirement
|
29
29
|
none: false
|
@@ -34,8 +34,64 @@ dependencies:
|
|
34
34
|
segments:
|
35
35
|
- 0
|
36
36
|
version: "0"
|
37
|
-
type: :
|
37
|
+
type: :development
|
38
38
|
version_requirements: *id001
|
39
|
+
- !ruby/object:Gem::Dependency
|
40
|
+
name: rake
|
41
|
+
prerelease: false
|
42
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
43
|
+
none: false
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
hash: 3
|
48
|
+
segments:
|
49
|
+
- 0
|
50
|
+
version: "0"
|
51
|
+
type: :development
|
52
|
+
version_requirements: *id002
|
53
|
+
- !ruby/object:Gem::Dependency
|
54
|
+
name: rspec
|
55
|
+
prerelease: false
|
56
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
hash: 3
|
62
|
+
segments:
|
63
|
+
- 0
|
64
|
+
version: "0"
|
65
|
+
type: :development
|
66
|
+
version_requirements: *id003
|
67
|
+
- !ruby/object:Gem::Dependency
|
68
|
+
name: simplecov
|
69
|
+
prerelease: false
|
70
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
71
|
+
none: false
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
hash: 3
|
76
|
+
segments:
|
77
|
+
- 0
|
78
|
+
version: "0"
|
79
|
+
type: :development
|
80
|
+
version_requirements: *id004
|
81
|
+
- !ruby/object:Gem::Dependency
|
82
|
+
name: actionpack
|
83
|
+
prerelease: false
|
84
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
85
|
+
none: false
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
hash: 3
|
90
|
+
segments:
|
91
|
+
- 0
|
92
|
+
version: "0"
|
93
|
+
type: :runtime
|
94
|
+
version_requirements: *id005
|
39
95
|
description: A gem that provides text handling for Twitter
|
40
96
|
email:
|
41
97
|
- matt@twitter.com
|
@@ -50,6 +106,11 @@ extensions: []
|
|
50
106
|
extra_rdoc_files: []
|
51
107
|
|
52
108
|
files:
|
109
|
+
- .gitignore
|
110
|
+
- .gitmodules
|
111
|
+
- .rspec
|
112
|
+
- Gemfile
|
113
|
+
- Gemfile.lock
|
53
114
|
- LICENSE
|
54
115
|
- README.rdoc
|
55
116
|
- Rakefile
|
@@ -61,14 +122,19 @@ files:
|
|
61
122
|
- lib/twitter-text.rb
|
62
123
|
- lib/unicode.rb
|
63
124
|
- lib/validation.rb
|
125
|
+
- script/destroy
|
126
|
+
- script/generate
|
64
127
|
- spec/autolinking_spec.rb
|
65
128
|
- spec/extractor_spec.rb
|
66
129
|
- spec/hithighlighter_spec.rb
|
67
130
|
- spec/regex_spec.rb
|
68
131
|
- spec/spec_helper.rb
|
69
132
|
- spec/test_urls.rb
|
133
|
+
- spec/twitter_text_spec.rb
|
70
134
|
- spec/unicode_spec.rb
|
71
135
|
- spec/validation_spec.rb
|
136
|
+
- test/conformance_test.rb
|
137
|
+
- twitter-text.gemspec
|
72
138
|
has_rdoc: true
|
73
139
|
homepage: http://twitter.com
|
74
140
|
licenses: []
|
@@ -103,5 +169,14 @@ rubygems_version: 1.3.7
|
|
103
169
|
signing_key:
|
104
170
|
specification_version: 3
|
105
171
|
summary: Twitter text handling library
|
106
|
-
test_files:
|
107
|
-
|
172
|
+
test_files:
|
173
|
+
- spec/autolinking_spec.rb
|
174
|
+
- spec/extractor_spec.rb
|
175
|
+
- spec/hithighlighter_spec.rb
|
176
|
+
- spec/regex_spec.rb
|
177
|
+
- spec/spec_helper.rb
|
178
|
+
- spec/test_urls.rb
|
179
|
+
- spec/twitter_text_spec.rb
|
180
|
+
- spec/unicode_spec.rb
|
181
|
+
- spec/validation_spec.rb
|
182
|
+
- test/conformance_test.rb
|