twitter-text 1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,195 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ class TestExtractor
4
+ include Twitter::Extractor
5
+ end
6
+
7
+ describe Twitter::Extractor do
8
+ before do
9
+ @extractor = TestExtractor.new
10
+ end
11
+
12
+ describe "mentions" do
13
+ context "single screen name alone " do
14
+ it "should be linked" do
15
+ @extractor.extract_mentioned_screen_names("@alice").should == ["alice"]
16
+ end
17
+
18
+ it "should be linked with _" do
19
+ @extractor.extract_mentioned_screen_names("@alice_adams").should == ["alice_adams"]
20
+ end
21
+
22
+ it "should be linked if numeric" do
23
+ @extractor.extract_mentioned_screen_names("@1234").should == ["1234"]
24
+ end
25
+ end
26
+
27
+ context "multiple screen names" do
28
+ it "should both be linked" do
29
+ @extractor.extract_mentioned_screen_names("@alice @bob").should == ["alice", "bob"]
30
+ end
31
+ end
32
+
33
+ context "screen names embedded in text" do
34
+ it "should be linked in Latin text" do
35
+ @extractor.extract_mentioned_screen_names("waiting for @alice to arrive").should == ["alice"]
36
+ end
37
+
38
+ it "should be linked in Japanese text" do
39
+ @extractor.extract_mentioned_screen_names("の@aliceに到着を待っている").should == ["alice"]
40
+ end
41
+ end
42
+
43
+ it "should accept a block arugment and call it in order" do
44
+ needed = ["alice", "bob"]
45
+ @extractor.extract_mentioned_screen_names("@alice @bob") do |sn|
46
+ sn.should == needed.shift
47
+ end
48
+ needed.should == []
49
+ end
50
+ end
51
+
52
+ describe "replies" do
53
+ context "should be extracted from" do
54
+ it "should extract from lone name" do
55
+ @extractor.extract_reply_screen_name("@alice").should == "alice"
56
+ end
57
+
58
+ it "should extract from the start" do
59
+ @extractor.extract_reply_screen_name("@alice reply text").should == "alice"
60
+ end
61
+
62
+ it "should extract preceeded by a space" do
63
+ @extractor.extract_reply_screen_name(" @alice reply text").should == "alice"
64
+ end
65
+
66
+ it "should extract preceeded by a full-width space" do
67
+ @extractor.extract_reply_screen_name("#{[0x3000].pack('U')}@alice reply text").should == "alice"
68
+ end
69
+ end
70
+
71
+ context "should not be extracted from" do
72
+ it "should not be extracted when preceeded by text" do
73
+ @extractor.extract_reply_screen_name("reply @alice text").should == nil
74
+ end
75
+
76
+ it "should not be extracted when preceeded by puctuation" do
77
+ %w(. / _ - + # ! @).each do |punct|
78
+ @extractor.extract_reply_screen_name("#{punct}@alice text").should == nil
79
+ end
80
+ end
81
+ end
82
+
83
+ context "should accept a block arugment" do
84
+ it "should call the block on match" do
85
+ @extractor.extract_reply_screen_name("@alice") do |sn|
86
+ sn.should == "alice"
87
+ end
88
+ end
89
+
90
+ it "should not call the block on no match" do
91
+ calls = 0
92
+ @extractor.extract_reply_screen_name("not a reply") do |sn|
93
+ calls += 1
94
+ end
95
+ calls.should == 0
96
+ end
97
+ end
98
+ end
99
+
100
+ describe "urls" do
101
+ describe "matching URLS" do
102
+ @urls = [
103
+ "http://google.com",
104
+ "http://foobar.com/#",
105
+ "http://google.com/#foo",
106
+ "http://google.com/#search?q=iphone%20-filter%3Alinks",
107
+ "http://twitter.com/#search?q=iphone%20-filter%3Alinks",
108
+ "http://www.boingboing.net/2007/02/14/katamari_damacy_phon.html",
109
+ "http://somehost.com:3000",
110
+ "http://x.com/~matthew+%-x",
111
+ "http://x.com/~matthew+%-,.;@:x",
112
+ "http://x.com/,.;@:x",
113
+ "http://en.wikipedia.org/wiki/Primer_(film)",
114
+ "http://www.ams.org/bookstore-getitem/item=mbk-59",
115
+ "http://chilp.it/?77e8fd",
116
+ ]
117
+
118
+ @urls.each do |url|
119
+ it "should extract the URL #{url}" do
120
+ @extractor.extract_urls(url).should == [url]
121
+ end
122
+
123
+ it "should match the URL #{url} when it's embedded in other text" do
124
+ text = "Sweet url: #{url} I found. #awesome"
125
+ @extractor.extract_urls(text).should == [url]
126
+ end
127
+ end
128
+ end
129
+
130
+ describe "invalid URLS" do
131
+ it "does not link urls with invalid_domains" do
132
+ [ "http://doman-dash_2314352345_dfasd.foo-cow_4352.com",
133
+ "http://no-tld",
134
+ "http://tld-too-short.x",
135
+ "http://doman-dash_2314352345_dfasd.foo-cow_4352.com",
136
+ ].each {|url| @extractor.extract_urls(url).should == [] }
137
+ end
138
+ end
139
+ end
140
+
141
+ describe "hashtags" do
142
+ context "extracts latin/numeric hashtags" do
143
+ %w(text text123 123text).each do |hashtag|
144
+ it "should extract ##{hashtag}" do
145
+ @extractor.extract_hashtags("##{hashtag}").should == [hashtag]
146
+ end
147
+
148
+ it "should extract ##{hashtag} within text" do
149
+ @extractor.extract_hashtags("pre-text ##{hashtag} post-text").should == [hashtag]
150
+ end
151
+ end
152
+ end
153
+
154
+ context "international hashtags" do
155
+
156
+ context "should allow accents" do
157
+ %w(mañana café münchen).each do |hashtag|
158
+ it "should extract ##{hashtag}" do
159
+ @extractor.extract_hashtags("##{hashtag}").should == [hashtag]
160
+ end
161
+
162
+ it "should extract ##{hashtag} within text" do
163
+ @extractor.extract_hashtags("pre-text ##{hashtag} post-text").should == [hashtag]
164
+ end
165
+ end
166
+
167
+ it "should not allow the multiplication character" do
168
+ @extractor.extract_hashtags("#pre#{[0xd7].pack('U')}post").should == ["pre"]
169
+ end
170
+
171
+ it "should not allow the division character" do
172
+ @extractor.extract_hashtags("#pre#{[0xf7].pack('U')}post").should == ["pre"]
173
+ end
174
+ end
175
+
176
+ context "should NOT allow Japanese" do
177
+ %w(会議中 ハッシュ).each do |hashtag|
178
+ it "should NOT extract ##{hashtag}" do
179
+ @extractor.extract_hashtags("##{hashtag}").should == []
180
+ end
181
+
182
+ it "should NOT extract ##{hashtag} within text" do
183
+ @extractor.extract_hashtags("pre-text ##{hashtag} post-text").should == []
184
+ end
185
+ end
186
+ end
187
+
188
+ end
189
+
190
+ it "should not extract numeric hashtags" do
191
+ @extractor.extract_hashtags("#1234").should == []
192
+ end
193
+ end
194
+
195
+ end
@@ -0,0 +1,44 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ describe "Twitter::Regex regular expressions" do
4
+ describe "matching URLS" do
5
+ @urls = [
6
+ "http://google.com",
7
+ "http://foobar.com/#",
8
+ "http://google.com/#foo",
9
+ "http://google.com/#search?q=iphone%20-filter%3Alinks",
10
+ "http://twitter.com/#search?q=iphone%20-filter%3Alinks",
11
+ "http://www.boingboing.net/2007/02/14/katamari_damacy_phon.html",
12
+ "http://somehost.com:3000",
13
+ "http://x.com/~matthew+%-x",
14
+ "http://x.com/~matthew+%-,.;@:x",
15
+ "http://x.com/,.;@:x",
16
+ "http://en.wikipedia.org/wiki/Primer_(film)",
17
+ "http://www.ams.org/bookstore-getitem/item=mbk-59",
18
+ "http://chilp.it/?77e8fd",
19
+ ]
20
+
21
+ @urls.each do |url|
22
+ it "should match the URL #{url}" do
23
+ url.should match_autolink_expression
24
+ end
25
+
26
+ it "should match the URL #{url} when it's embedded in other text" do
27
+ text = "Sweet url: #{url} I found. #awesome"
28
+ url.should match_autolink_expression_in(text)
29
+ end
30
+ end
31
+ end
32
+
33
+ describe "invalid URLS" do
34
+ it "does not link urls with invalid_domains" do
35
+ [ "http://doman-dash_2314352345_dfasd.foo-cow_4352.com",
36
+ "http://no-tld",
37
+ "http://tld-too-short.x",
38
+ "http://x.com/,,,/.../@@@/;;;/:::/---/%%%x",
39
+ "http://doman-dash_2314352345_dfasd.foo-cow_4352.com",
40
+ ].each {|url| url.should_not have_autolinked_url(url)}
41
+ end
42
+ end
43
+
44
+ end
@@ -0,0 +1,86 @@
1
+ $TESTING=true
2
+ $:.push File.join(File.dirname(__FILE__), '..', 'lib')
3
+
4
+ require 'twitter-text'
5
+ require 'hpricot'
6
+
7
+ Spec::Matchers.define :match_autolink_expression do
8
+ match do |string|
9
+ Twitter::Regex[:valid_url].match(string)
10
+ end
11
+ end
12
+
13
+ Spec::Matchers.define :match_autolink_expression_in do |text|
14
+ match do |url|
15
+ @match_data = Twitter::Regex[:valid_url].match(text)
16
+ @match_data && @match_data.to_s.strip == url
17
+ end
18
+
19
+ failure_message_for_should do |url|
20
+ "Expected to find url '#{url}' in text '#{text}', but the match was #{@match_data.captures}'"
21
+ end
22
+ end
23
+
24
+ Spec::Matchers.define :have_autolinked_url do |url|
25
+ match do |text|
26
+ @link = Hpricot(text).at("a[@href='#{url}']")
27
+ @link &&
28
+ @link.inner_text &&
29
+ @link.inner_text == url
30
+ end
31
+
32
+ failure_message_for_should do |text|
33
+ "Expected url '#{url}' to be autolinked in '#{text}'"
34
+ end
35
+ end
36
+
37
+ Spec::Matchers.define :link_to_screen_name do |screen_name|
38
+ match do |text|
39
+ @link = Hpricot(text).at("a.username")
40
+ @link && @link.inner_text == screen_name && "http://twitter.com/#{screen_name}".downcase.should == @link['href']
41
+ end
42
+
43
+ failure_message_for_should do |text|
44
+ "expected link #{@link.inner_text} with href #{@link['href']} to match screen_name #{@screen_name}, but it does not"
45
+ end
46
+
47
+ failure_message_for_should_not do |text|
48
+ "expected link #{@link.inner_text} with href #{@link['href']} not to match screen_name #{@screen_name}, but it does"
49
+ end
50
+
51
+ description do
52
+ "contain a link with the name and href pointing to the expected screen_name"
53
+ end
54
+ end
55
+
56
+ Spec::Matchers.define :link_to_list_path do |list_path|
57
+ match do |text|
58
+ @link = Hpricot(text).at("a.list-slug")
59
+ !@link.nil? && @link.inner_text == list_path && "http://twitter.com/#{list_path}".downcase.should == @link['href']
60
+ end
61
+
62
+ failure_message_for_should do |text|
63
+ "expected link #{@link.inner_text} with href #{@link['href']} to match the list path #{list_path}, but it does not"
64
+ end
65
+
66
+ failure_message_for_should_not do |text|
67
+ "expected link #{@link.inner_text} with href #{@link['href']} not to match the list path #{@list_path}, but it does"
68
+ end
69
+
70
+ description do
71
+ "contain a link with the list title and an href pointing to the list path"
72
+ end
73
+ end
74
+
75
+ Spec::Matchers.define :have_autolinked_hashtag do |hashtag|
76
+ match do |text|
77
+ @link = Hpricot(text).at("a[@href='http://twitter.com/search?q=#{CGI.escape hashtag}']")
78
+ @link &&
79
+ @link.inner_text &&
80
+ @link.inner_text == hashtag
81
+ end
82
+
83
+ failure_message_for_should do |text|
84
+ "Expected hashtag #{hashtag} to be autolinked in '#{text}'"
85
+ end
86
+ end
@@ -0,0 +1,30 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ describe Twitter::Unicode do
4
+
5
+ it "should lazy-init constants" do
6
+ Twitter::Unicode.const_defined?(:UFEB6).should == false
7
+ Twitter::Unicode::UFEB6.should_not be_nil
8
+ Twitter::Unicode::UFEB6.should be_kind_of(String)
9
+ Twitter::Unicode.const_defined?(:UFEB6).should == true
10
+ end
11
+
12
+ it "should return corresponding character" do
13
+ Twitter::Unicode::UFEB6.should == [0xfeb6].pack('U')
14
+ end
15
+
16
+ it "should allow lowercase notation" do
17
+ Twitter::Unicode::Ufeb6.should == Twitter::Unicode::UFEB6
18
+ Twitter::Unicode::Ufeb6.should === Twitter::Unicode::UFEB6
19
+ end
20
+
21
+ it "should allow underscore notation" do
22
+ Twitter::Unicode::U_FEB6.should == Twitter::Unicode::UFEB6
23
+ Twitter::Unicode::U_FEB6.should === Twitter::Unicode::UFEB6
24
+ end
25
+
26
+ it "should raise on invalid codepoints" do
27
+ lambda { Twitter::Unicode::FFFFFF }.should raise_error(NameError)
28
+ end
29
+
30
+ end
@@ -0,0 +1,42 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ class TestValidation
4
+ include Twitter::Validation
5
+ end
6
+
7
+ describe Twitter::Validation do
8
+
9
+ it "should disallow invalid BOM character" do
10
+ TestValidation.new.tweet_invalid?("Bom:#{Twitter::Unicode::UFFFE}").should == :invalid_characters
11
+ TestValidation.new.tweet_invalid?("Bom:#{Twitter::Unicode::UFEFF}").should == :invalid_characters
12
+ end
13
+
14
+ it "should disallow invalid U+FFFF character" do
15
+ TestValidation.new.tweet_invalid?("Bom:#{Twitter::Unicode::UFFFF}").should == :invalid_characters
16
+ end
17
+
18
+ it "should disallow direction change characters" do
19
+ [0x202A, 0x202B, 0x202C, 0x202D, 0x202E].map{|cp| [cp].pack('U') }.each do |char|
20
+ TestValidation.new.tweet_invalid?("Invalid:#{char}").should == :invalid_characters
21
+ end
22
+ end
23
+
24
+ it "should disallow non-Unicode" do
25
+ TestValidation.new.tweet_invalid?("not-Unicode:\xfff0").should == :invalid_characters
26
+ end
27
+
28
+ it "should allow <= 140 combined accent characters" do
29
+ char = [0x65, 0x0301].pack('U')
30
+ TestValidation.new.tweet_invalid?(char * 139).should == false
31
+ TestValidation.new.tweet_invalid?(char * 140).should == false
32
+ TestValidation.new.tweet_invalid?(char * 141).should == :too_long
33
+ end
34
+
35
+ it "should allow <= 140 multi-byte characters" do
36
+ char = [ 0x1d106 ].pack('U')
37
+ TestValidation.new.tweet_invalid?(char * 139).should == false
38
+ TestValidation.new.tweet_invalid?(char * 140).should == false
39
+ TestValidation.new.tweet_invalid?(char * 141).should == :too_long
40
+ end
41
+
42
+ end
metadata ADDED
@@ -0,0 +1,79 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: twitter-text
3
+ version: !ruby/object:Gem::Version
4
+ version: "1.0"
5
+ platform: ruby
6
+ authors:
7
+ - Matt Sanford
8
+ autorequire: ""
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2010-01-27 00:00:00 -08:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: action_view
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ version:
25
+ description: A gem that provides text handling for Twitter
26
+ email: matt@twitter.com
27
+ executables: []
28
+
29
+ extensions: []
30
+
31
+ extra_rdoc_files: []
32
+
33
+ files:
34
+ - LICENSE
35
+ - README.rdoc
36
+ - Rakefile
37
+ - TODO
38
+ - lib/autolink.rb
39
+ - lib/extractor.rb
40
+ - lib/regex.rb
41
+ - lib/twitter-text.rb
42
+ - lib/unicode.rb
43
+ - lib/validation.rb
44
+ - spec/autolinking_spec.rb
45
+ - spec/extractor_spec.rb
46
+ - spec/regex_spec.rb
47
+ - spec/spec_helper.rb
48
+ - spec/unicode_spec.rb
49
+ - spec/validation_spec.rb
50
+ has_rdoc: true
51
+ homepage: http://twitter.com
52
+ licenses: []
53
+
54
+ post_install_message:
55
+ rdoc_options: []
56
+
57
+ require_paths:
58
+ - lib
59
+ required_ruby_version: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - ">="
62
+ - !ruby/object:Gem::Version
63
+ version: "0"
64
+ version:
65
+ required_rubygems_version: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: "0"
70
+ version:
71
+ requirements: []
72
+
73
+ rubyforge_project:
74
+ rubygems_version: 1.3.5
75
+ signing_key:
76
+ specification_version: 3
77
+ summary: Twitter text handling library
78
+ test_files: []
79
+