twitter-text 1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,195 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ class TestExtractor
4
+ include Twitter::Extractor
5
+ end
6
+
7
+ describe Twitter::Extractor do
8
+ before do
9
+ @extractor = TestExtractor.new
10
+ end
11
+
12
+ describe "mentions" do
13
+ context "single screen name alone " do
14
+ it "should be linked" do
15
+ @extractor.extract_mentioned_screen_names("@alice").should == ["alice"]
16
+ end
17
+
18
+ it "should be linked with _" do
19
+ @extractor.extract_mentioned_screen_names("@alice_adams").should == ["alice_adams"]
20
+ end
21
+
22
+ it "should be linked if numeric" do
23
+ @extractor.extract_mentioned_screen_names("@1234").should == ["1234"]
24
+ end
25
+ end
26
+
27
+ context "multiple screen names" do
28
+ it "should both be linked" do
29
+ @extractor.extract_mentioned_screen_names("@alice @bob").should == ["alice", "bob"]
30
+ end
31
+ end
32
+
33
+ context "screen names embedded in text" do
34
+ it "should be linked in Latin text" do
35
+ @extractor.extract_mentioned_screen_names("waiting for @alice to arrive").should == ["alice"]
36
+ end
37
+
38
+ it "should be linked in Japanese text" do
39
+ @extractor.extract_mentioned_screen_names("の@aliceに到着を待っている").should == ["alice"]
40
+ end
41
+ end
42
+
43
+ it "should accept a block arugment and call it in order" do
44
+ needed = ["alice", "bob"]
45
+ @extractor.extract_mentioned_screen_names("@alice @bob") do |sn|
46
+ sn.should == needed.shift
47
+ end
48
+ needed.should == []
49
+ end
50
+ end
51
+
52
+ describe "replies" do
53
+ context "should be extracted from" do
54
+ it "should extract from lone name" do
55
+ @extractor.extract_reply_screen_name("@alice").should == "alice"
56
+ end
57
+
58
+ it "should extract from the start" do
59
+ @extractor.extract_reply_screen_name("@alice reply text").should == "alice"
60
+ end
61
+
62
+ it "should extract preceeded by a space" do
63
+ @extractor.extract_reply_screen_name(" @alice reply text").should == "alice"
64
+ end
65
+
66
+ it "should extract preceeded by a full-width space" do
67
+ @extractor.extract_reply_screen_name("#{[0x3000].pack('U')}@alice reply text").should == "alice"
68
+ end
69
+ end
70
+
71
+ context "should not be extracted from" do
72
+ it "should not be extracted when preceeded by text" do
73
+ @extractor.extract_reply_screen_name("reply @alice text").should == nil
74
+ end
75
+
76
+ it "should not be extracted when preceeded by puctuation" do
77
+ %w(. / _ - + # ! @).each do |punct|
78
+ @extractor.extract_reply_screen_name("#{punct}@alice text").should == nil
79
+ end
80
+ end
81
+ end
82
+
83
+ context "should accept a block arugment" do
84
+ it "should call the block on match" do
85
+ @extractor.extract_reply_screen_name("@alice") do |sn|
86
+ sn.should == "alice"
87
+ end
88
+ end
89
+
90
+ it "should not call the block on no match" do
91
+ calls = 0
92
+ @extractor.extract_reply_screen_name("not a reply") do |sn|
93
+ calls += 1
94
+ end
95
+ calls.should == 0
96
+ end
97
+ end
98
+ end
99
+
100
+ describe "urls" do
101
+ describe "matching URLS" do
102
+ @urls = [
103
+ "http://google.com",
104
+ "http://foobar.com/#",
105
+ "http://google.com/#foo",
106
+ "http://google.com/#search?q=iphone%20-filter%3Alinks",
107
+ "http://twitter.com/#search?q=iphone%20-filter%3Alinks",
108
+ "http://www.boingboing.net/2007/02/14/katamari_damacy_phon.html",
109
+ "http://somehost.com:3000",
110
+ "http://x.com/~matthew+%-x",
111
+ "http://x.com/~matthew+%-,.;@:x",
112
+ "http://x.com/,.;@:x",
113
+ "http://en.wikipedia.org/wiki/Primer_(film)",
114
+ "http://www.ams.org/bookstore-getitem/item=mbk-59",
115
+ "http://chilp.it/?77e8fd",
116
+ ]
117
+
118
+ @urls.each do |url|
119
+ it "should extract the URL #{url}" do
120
+ @extractor.extract_urls(url).should == [url]
121
+ end
122
+
123
+ it "should match the URL #{url} when it's embedded in other text" do
124
+ text = "Sweet url: #{url} I found. #awesome"
125
+ @extractor.extract_urls(text).should == [url]
126
+ end
127
+ end
128
+ end
129
+
130
+ describe "invalid URLS" do
131
+ it "does not link urls with invalid_domains" do
132
+ [ "http://doman-dash_2314352345_dfasd.foo-cow_4352.com",
133
+ "http://no-tld",
134
+ "http://tld-too-short.x",
135
+ "http://doman-dash_2314352345_dfasd.foo-cow_4352.com",
136
+ ].each {|url| @extractor.extract_urls(url).should == [] }
137
+ end
138
+ end
139
+ end
140
+
141
+ describe "hashtags" do
142
+ context "extracts latin/numeric hashtags" do
143
+ %w(text text123 123text).each do |hashtag|
144
+ it "should extract ##{hashtag}" do
145
+ @extractor.extract_hashtags("##{hashtag}").should == [hashtag]
146
+ end
147
+
148
+ it "should extract ##{hashtag} within text" do
149
+ @extractor.extract_hashtags("pre-text ##{hashtag} post-text").should == [hashtag]
150
+ end
151
+ end
152
+ end
153
+
154
+ context "international hashtags" do
155
+
156
+ context "should allow accents" do
157
+ %w(mañana café münchen).each do |hashtag|
158
+ it "should extract ##{hashtag}" do
159
+ @extractor.extract_hashtags("##{hashtag}").should == [hashtag]
160
+ end
161
+
162
+ it "should extract ##{hashtag} within text" do
163
+ @extractor.extract_hashtags("pre-text ##{hashtag} post-text").should == [hashtag]
164
+ end
165
+ end
166
+
167
+ it "should not allow the multiplication character" do
168
+ @extractor.extract_hashtags("#pre#{[0xd7].pack('U')}post").should == ["pre"]
169
+ end
170
+
171
+ it "should not allow the division character" do
172
+ @extractor.extract_hashtags("#pre#{[0xf7].pack('U')}post").should == ["pre"]
173
+ end
174
+ end
175
+
176
+ context "should NOT allow Japanese" do
177
+ %w(会議中 ハッシュ).each do |hashtag|
178
+ it "should NOT extract ##{hashtag}" do
179
+ @extractor.extract_hashtags("##{hashtag}").should == []
180
+ end
181
+
182
+ it "should NOT extract ##{hashtag} within text" do
183
+ @extractor.extract_hashtags("pre-text ##{hashtag} post-text").should == []
184
+ end
185
+ end
186
+ end
187
+
188
+ end
189
+
190
+ it "should not extract numeric hashtags" do
191
+ @extractor.extract_hashtags("#1234").should == []
192
+ end
193
+ end
194
+
195
+ end
@@ -0,0 +1,44 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ describe "Twitter::Regex regular expressions" do
4
+ describe "matching URLS" do
5
+ @urls = [
6
+ "http://google.com",
7
+ "http://foobar.com/#",
8
+ "http://google.com/#foo",
9
+ "http://google.com/#search?q=iphone%20-filter%3Alinks",
10
+ "http://twitter.com/#search?q=iphone%20-filter%3Alinks",
11
+ "http://www.boingboing.net/2007/02/14/katamari_damacy_phon.html",
12
+ "http://somehost.com:3000",
13
+ "http://x.com/~matthew+%-x",
14
+ "http://x.com/~matthew+%-,.;@:x",
15
+ "http://x.com/,.;@:x",
16
+ "http://en.wikipedia.org/wiki/Primer_(film)",
17
+ "http://www.ams.org/bookstore-getitem/item=mbk-59",
18
+ "http://chilp.it/?77e8fd",
19
+ ]
20
+
21
+ @urls.each do |url|
22
+ it "should match the URL #{url}" do
23
+ url.should match_autolink_expression
24
+ end
25
+
26
+ it "should match the URL #{url} when it's embedded in other text" do
27
+ text = "Sweet url: #{url} I found. #awesome"
28
+ url.should match_autolink_expression_in(text)
29
+ end
30
+ end
31
+ end
32
+
33
+ describe "invalid URLS" do
34
+ it "does not link urls with invalid_domains" do
35
+ [ "http://doman-dash_2314352345_dfasd.foo-cow_4352.com",
36
+ "http://no-tld",
37
+ "http://tld-too-short.x",
38
+ "http://x.com/,,,/.../@@@/;;;/:::/---/%%%x",
39
+ "http://doman-dash_2314352345_dfasd.foo-cow_4352.com",
40
+ ].each {|url| url.should_not have_autolinked_url(url)}
41
+ end
42
+ end
43
+
44
+ end
@@ -0,0 +1,86 @@
1
+ $TESTING=true
2
+ $:.push File.join(File.dirname(__FILE__), '..', 'lib')
3
+
4
+ require 'twitter-text'
5
+ require 'hpricot'
6
+
7
+ Spec::Matchers.define :match_autolink_expression do
8
+ match do |string|
9
+ Twitter::Regex[:valid_url].match(string)
10
+ end
11
+ end
12
+
13
+ Spec::Matchers.define :match_autolink_expression_in do |text|
14
+ match do |url|
15
+ @match_data = Twitter::Regex[:valid_url].match(text)
16
+ @match_data && @match_data.to_s.strip == url
17
+ end
18
+
19
+ failure_message_for_should do |url|
20
+ "Expected to find url '#{url}' in text '#{text}', but the match was #{@match_data.captures}'"
21
+ end
22
+ end
23
+
24
+ Spec::Matchers.define :have_autolinked_url do |url|
25
+ match do |text|
26
+ @link = Hpricot(text).at("a[@href='#{url}']")
27
+ @link &&
28
+ @link.inner_text &&
29
+ @link.inner_text == url
30
+ end
31
+
32
+ failure_message_for_should do |text|
33
+ "Expected url '#{url}' to be autolinked in '#{text}'"
34
+ end
35
+ end
36
+
37
+ Spec::Matchers.define :link_to_screen_name do |screen_name|
38
+ match do |text|
39
+ @link = Hpricot(text).at("a.username")
40
+ @link && @link.inner_text == screen_name && "http://twitter.com/#{screen_name}".downcase.should == @link['href']
41
+ end
42
+
43
+ failure_message_for_should do |text|
44
+ "expected link #{@link.inner_text} with href #{@link['href']} to match screen_name #{@screen_name}, but it does not"
45
+ end
46
+
47
+ failure_message_for_should_not do |text|
48
+ "expected link #{@link.inner_text} with href #{@link['href']} not to match screen_name #{@screen_name}, but it does"
49
+ end
50
+
51
+ description do
52
+ "contain a link with the name and href pointing to the expected screen_name"
53
+ end
54
+ end
55
+
56
+ Spec::Matchers.define :link_to_list_path do |list_path|
57
+ match do |text|
58
+ @link = Hpricot(text).at("a.list-slug")
59
+ !@link.nil? && @link.inner_text == list_path && "http://twitter.com/#{list_path}".downcase.should == @link['href']
60
+ end
61
+
62
+ failure_message_for_should do |text|
63
+ "expected link #{@link.inner_text} with href #{@link['href']} to match the list path #{list_path}, but it does not"
64
+ end
65
+
66
+ failure_message_for_should_not do |text|
67
+ "expected link #{@link.inner_text} with href #{@link['href']} not to match the list path #{@list_path}, but it does"
68
+ end
69
+
70
+ description do
71
+ "contain a link with the list title and an href pointing to the list path"
72
+ end
73
+ end
74
+
75
+ Spec::Matchers.define :have_autolinked_hashtag do |hashtag|
76
+ match do |text|
77
+ @link = Hpricot(text).at("a[@href='http://twitter.com/search?q=#{CGI.escape hashtag}']")
78
+ @link &&
79
+ @link.inner_text &&
80
+ @link.inner_text == hashtag
81
+ end
82
+
83
+ failure_message_for_should do |text|
84
+ "Expected hashtag #{hashtag} to be autolinked in '#{text}'"
85
+ end
86
+ end
@@ -0,0 +1,30 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ describe Twitter::Unicode do
4
+
5
+ it "should lazy-init constants" do
6
+ Twitter::Unicode.const_defined?(:UFEB6).should == false
7
+ Twitter::Unicode::UFEB6.should_not be_nil
8
+ Twitter::Unicode::UFEB6.should be_kind_of(String)
9
+ Twitter::Unicode.const_defined?(:UFEB6).should == true
10
+ end
11
+
12
+ it "should return corresponding character" do
13
+ Twitter::Unicode::UFEB6.should == [0xfeb6].pack('U')
14
+ end
15
+
16
+ it "should allow lowercase notation" do
17
+ Twitter::Unicode::Ufeb6.should == Twitter::Unicode::UFEB6
18
+ Twitter::Unicode::Ufeb6.should === Twitter::Unicode::UFEB6
19
+ end
20
+
21
+ it "should allow underscore notation" do
22
+ Twitter::Unicode::U_FEB6.should == Twitter::Unicode::UFEB6
23
+ Twitter::Unicode::U_FEB6.should === Twitter::Unicode::UFEB6
24
+ end
25
+
26
+ it "should raise on invalid codepoints" do
27
+ lambda { Twitter::Unicode::FFFFFF }.should raise_error(NameError)
28
+ end
29
+
30
+ end
@@ -0,0 +1,42 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ class TestValidation
4
+ include Twitter::Validation
5
+ end
6
+
7
+ describe Twitter::Validation do
8
+
9
+ it "should disallow invalid BOM character" do
10
+ TestValidation.new.tweet_invalid?("Bom:#{Twitter::Unicode::UFFFE}").should == :invalid_characters
11
+ TestValidation.new.tweet_invalid?("Bom:#{Twitter::Unicode::UFEFF}").should == :invalid_characters
12
+ end
13
+
14
+ it "should disallow invalid U+FFFF character" do
15
+ TestValidation.new.tweet_invalid?("Bom:#{Twitter::Unicode::UFFFF}").should == :invalid_characters
16
+ end
17
+
18
+ it "should disallow direction change characters" do
19
+ [0x202A, 0x202B, 0x202C, 0x202D, 0x202E].map{|cp| [cp].pack('U') }.each do |char|
20
+ TestValidation.new.tweet_invalid?("Invalid:#{char}").should == :invalid_characters
21
+ end
22
+ end
23
+
24
+ it "should disallow non-Unicode" do
25
+ TestValidation.new.tweet_invalid?("not-Unicode:\xfff0").should == :invalid_characters
26
+ end
27
+
28
+ it "should allow <= 140 combined accent characters" do
29
+ char = [0x65, 0x0301].pack('U')
30
+ TestValidation.new.tweet_invalid?(char * 139).should == false
31
+ TestValidation.new.tweet_invalid?(char * 140).should == false
32
+ TestValidation.new.tweet_invalid?(char * 141).should == :too_long
33
+ end
34
+
35
+ it "should allow <= 140 multi-byte characters" do
36
+ char = [ 0x1d106 ].pack('U')
37
+ TestValidation.new.tweet_invalid?(char * 139).should == false
38
+ TestValidation.new.tweet_invalid?(char * 140).should == false
39
+ TestValidation.new.tweet_invalid?(char * 141).should == :too_long
40
+ end
41
+
42
+ end
metadata ADDED
@@ -0,0 +1,79 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: twitter-text
3
+ version: !ruby/object:Gem::Version
4
+ version: "1.0"
5
+ platform: ruby
6
+ authors:
7
+ - Matt Sanford
8
+ autorequire: ""
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2010-01-27 00:00:00 -08:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: action_view
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ version:
25
+ description: A gem that provides text handling for Twitter
26
+ email: matt@twitter.com
27
+ executables: []
28
+
29
+ extensions: []
30
+
31
+ extra_rdoc_files: []
32
+
33
+ files:
34
+ - LICENSE
35
+ - README.rdoc
36
+ - Rakefile
37
+ - TODO
38
+ - lib/autolink.rb
39
+ - lib/extractor.rb
40
+ - lib/regex.rb
41
+ - lib/twitter-text.rb
42
+ - lib/unicode.rb
43
+ - lib/validation.rb
44
+ - spec/autolinking_spec.rb
45
+ - spec/extractor_spec.rb
46
+ - spec/regex_spec.rb
47
+ - spec/spec_helper.rb
48
+ - spec/unicode_spec.rb
49
+ - spec/validation_spec.rb
50
+ has_rdoc: true
51
+ homepage: http://twitter.com
52
+ licenses: []
53
+
54
+ post_install_message:
55
+ rdoc_options: []
56
+
57
+ require_paths:
58
+ - lib
59
+ required_ruby_version: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - ">="
62
+ - !ruby/object:Gem::Version
63
+ version: "0"
64
+ version:
65
+ required_rubygems_version: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: "0"
70
+ version:
71
+ requirements: []
72
+
73
+ rubyforge_project:
74
+ rubygems_version: 1.3.5
75
+ signing_key:
76
+ specification_version: 3
77
+ summary: Twitter text handling library
78
+ test_files: []
79
+