redaranj-twitter-text 1.0.4.191

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,175 @@
1
+ # encoding: utf-8
2
+ require File.dirname(__FILE__) + '/spec_helper'
3
+
4
+ class TestExtractor
5
+ include Twitter::Extractor
6
+ end
7
+
8
+ describe Twitter::Extractor do
9
+ before do
10
+ @extractor = TestExtractor.new
11
+ end
12
+
13
+ describe "mentions" do
14
+ context "single screen name alone " do
15
+ it "should be linked" do
16
+ @extractor.extract_mentioned_screen_names("@alice").should == ["alice"]
17
+ end
18
+
19
+ it "should be linked with _" do
20
+ @extractor.extract_mentioned_screen_names("@alice_adams").should == ["alice_adams"]
21
+ end
22
+
23
+ it "should be linked if numeric" do
24
+ @extractor.extract_mentioned_screen_names("@1234").should == ["1234"]
25
+ end
26
+ end
27
+
28
+ context "multiple screen names" do
29
+ it "should both be linked" do
30
+ @extractor.extract_mentioned_screen_names("@alice @bob").should == ["alice", "bob"]
31
+ end
32
+ end
33
+
34
+ context "screen names embedded in text" do
35
+ it "should be linked in Latin text" do
36
+ @extractor.extract_mentioned_screen_names("waiting for @alice to arrive").should == ["alice"]
37
+ end
38
+
39
+ it "should be linked in Japanese text" do
40
+ @extractor.extract_mentioned_screen_names("の@aliceに到着を待っている").should == ["alice"]
41
+ end
42
+ end
43
+
44
+ it "should accept a block arugment and call it in order" do
45
+ needed = ["alice", "bob"]
46
+ @extractor.extract_mentioned_screen_names("@alice @bob") do |sn|
47
+ sn.should == needed.shift
48
+ end
49
+ needed.should == []
50
+ end
51
+ end
52
+
53
+ describe "replies" do
54
+ context "should be extracted from" do
55
+ it "should extract from lone name" do
56
+ @extractor.extract_reply_screen_name("@alice").should == "alice"
57
+ end
58
+
59
+ it "should extract from the start" do
60
+ @extractor.extract_reply_screen_name("@alice reply text").should == "alice"
61
+ end
62
+
63
+ it "should extract preceded by a space" do
64
+ @extractor.extract_reply_screen_name(" @alice reply text").should == "alice"
65
+ end
66
+
67
+ it "should extract preceded by a full-width space" do
68
+ @extractor.extract_reply_screen_name("#{[0x3000].pack('U')}@alice reply text").should == "alice"
69
+ end
70
+ end
71
+
72
+ context "should not be extracted from" do
73
+ it "should not be extracted when preceded by text" do
74
+ @extractor.extract_reply_screen_name("reply @alice text").should == nil
75
+ end
76
+
77
+ it "should not be extracted when preceded by puctuation" do
78
+ %w(. / _ - + # ! @).each do |punct|
79
+ @extractor.extract_reply_screen_name("#{punct}@alice text").should == nil
80
+ end
81
+ end
82
+ end
83
+
84
+ context "should accept a block arugment" do
85
+ it "should call the block on match" do
86
+ @extractor.extract_reply_screen_name("@alice") do |sn|
87
+ sn.should == "alice"
88
+ end
89
+ end
90
+
91
+ it "should not call the block on no match" do
92
+ calls = 0
93
+ @extractor.extract_reply_screen_name("not a reply") do |sn|
94
+ calls += 1
95
+ end
96
+ calls.should == 0
97
+ end
98
+ end
99
+ end
100
+
101
+ describe "urls" do
102
+ describe "matching URLS" do
103
+ TestUrls::VALID.each do |url|
104
+ it "should extract the URL #{url} and prefix it with a protocol if missing" do
105
+ @extractor.extract_urls(url).first.should include(url)
106
+ end
107
+
108
+ it "should match the URL #{url} when it's embedded in other text" do
109
+ text = "Sweet url: #{url} I found. #awesome"
110
+ @extractor.extract_urls(text).first.should include(url)
111
+ end
112
+ end
113
+ end
114
+
115
+ describe "invalid URLS" do
116
+ it "does not link urls with invalid domains" do
117
+ @extractor.extract_urls("http://tld-too-short.x").should == []
118
+ end
119
+ end
120
+ end
121
+
122
+ describe "hashtags" do
123
+ context "extracts latin/numeric hashtags" do
124
+ %w(text text123 123text).each do |hashtag|
125
+ it "should extract ##{hashtag}" do
126
+ @extractor.extract_hashtags("##{hashtag}").should == [hashtag]
127
+ end
128
+
129
+ it "should extract ##{hashtag} within text" do
130
+ @extractor.extract_hashtags("pre-text ##{hashtag} post-text").should == [hashtag]
131
+ end
132
+ end
133
+ end
134
+
135
+ context "international hashtags" do
136
+ context "should allow accents" do
137
+ %w(mañana café münchen).each do |hashtag|
138
+ it "should extract ##{hashtag}" do
139
+ @extractor.extract_hashtags("##{hashtag}").should == [hashtag]
140
+ end
141
+
142
+ it "should extract ##{hashtag} within text" do
143
+ @extractor.extract_hashtags("pre-text ##{hashtag} post-text").should == [hashtag]
144
+ end
145
+ end
146
+
147
+ it "should not allow the multiplication character" do
148
+ @extractor.extract_hashtags("#pre#{[0xd7].pack('U')}post").should == ["pre"]
149
+ end
150
+
151
+ it "should not allow the division character" do
152
+ @extractor.extract_hashtags("#pre#{[0xf7].pack('U')}post").should == ["pre"]
153
+ end
154
+ end
155
+
156
+ context "should NOT allow Japanese" do
157
+ %w(会議中 ハッシュ).each do |hashtag|
158
+ it "should NOT extract ##{hashtag}" do
159
+ @extractor.extract_hashtags("##{hashtag}").should == []
160
+ end
161
+
162
+ it "should NOT extract ##{hashtag} within text" do
163
+ @extractor.extract_hashtags("pre-text ##{hashtag} post-text").should == []
164
+ end
165
+ end
166
+ end
167
+
168
+ end
169
+
170
+ it "should not extract numeric hashtags" do
171
+ @extractor.extract_hashtags("#1234").should == []
172
+ end
173
+ end
174
+
175
+ end
@@ -0,0 +1,24 @@
1
+ # encoding: utf-8
2
+ require File.dirname(__FILE__) + '/spec_helper'
3
+
4
+ describe "Twitter::Regex regular expressions" do
5
+ describe "matching URLS" do
6
+ TestUrls::VALID.each do |url|
7
+ it "should match the URL #{url}" do
8
+ url.should match_autolink_expression
9
+ end
10
+
11
+ it "should match the URL #{url} when it's embedded in other text" do
12
+ text = "Sweet url: #{url} I found. #awesome"
13
+ url.should match_autolink_expression_in(text)
14
+ end
15
+ end
16
+ end
17
+
18
+ describe "invalid URLS" do
19
+ it "does not link urls with invalid characters" do
20
+ TestUrls::INVALID.each {|url| url.should_not have_autolinked_url(url)}
21
+ end
22
+ end
23
+
24
+ end
@@ -0,0 +1,96 @@
1
+ # encoding: utf-8
2
+ $TESTING=true
3
+ $:.push File.join(File.dirname(__FILE__), '..', 'lib')
4
+
5
+ require 'twitter-text'
6
+ require 'hpricot'
7
+ require 'spec/test_urls'
8
+
9
+ Spec::Runner.configure do |config|
10
+ config.include TestUrls
11
+ end
12
+
13
+ Spec::Matchers.define :match_autolink_expression do
14
+ match do |string|
15
+ Twitter::Regex[:valid_url].match(string)
16
+ end
17
+ end
18
+
19
+ Spec::Matchers.define :match_autolink_expression_in do |text|
20
+ match do |url|
21
+ @match_data = Twitter::Regex[:valid_url].match(text)
22
+ @match_data && @match_data.to_s.strip == url
23
+ end
24
+
25
+ failure_message_for_should do |url|
26
+ "Expected to find url '#{url}' in text '#{text}', but the match was #{@match_data.captures}'"
27
+ end
28
+ end
29
+
30
+ Spec::Matchers.define :have_autolinked_url do |url|
31
+ match do |text|
32
+ @link = Hpricot(text).at("a[@href='#{url}']")
33
+ @link &&
34
+ @link.inner_text &&
35
+ @link.inner_text == url
36
+ end
37
+
38
+ failure_message_for_should do |text|
39
+ "Expected url '#{url}' to be autolinked in '#{text}'"
40
+ end
41
+ end
42
+
43
+ Spec::Matchers.define :link_to_screen_name do |screen_name|
44
+ match do |text|
45
+ @link = Hpricot(text).at("a.username")
46
+ @link && @link.inner_text == screen_name && "http://twitter.com/#{screen_name}".downcase.should == @link['href']
47
+ end
48
+
49
+ failure_message_for_should do |text|
50
+ "expected link #{@link.inner_text} with href #{@link['href']} to match screen_name #{@screen_name}, but it does not"
51
+ end
52
+
53
+ failure_message_for_should_not do |text|
54
+ "expected link #{@link.inner_text} with href #{@link['href']} not to match screen_name #{@screen_name}, but it does"
55
+ end
56
+
57
+ description do
58
+ "contain a link with the name and href pointing to the expected screen_name"
59
+ end
60
+ end
61
+
62
+ Spec::Matchers.define :link_to_list_path do |list_path|
63
+ match do |text|
64
+ @link = Hpricot(text).at("a.list-slug")
65
+ !@link.nil? && @link.inner_text == list_path && "http://twitter.com/#{list_path}".downcase.should == @link['href']
66
+ end
67
+
68
+ failure_message_for_should do |text|
69
+ "expected link #{@link.inner_text} with href #{@link['href']} to match the list path #{list_path}, but it does not"
70
+ end
71
+
72
+ failure_message_for_should_not do |text|
73
+ "expected link #{@link.inner_text} with href #{@link['href']} not to match the list path #{@list_path}, but it does"
74
+ end
75
+
76
+ description do
77
+ "contain a link with the list title and an href pointing to the list path"
78
+ end
79
+ end
80
+
81
+ Spec::Matchers.define :have_autolinked_hashtag do |hashtag|
82
+ match do |text|
83
+ @link = Hpricot(text).at("a[@href='http://twitter.com/search?q=#{CGI.escape hashtag}']")
84
+ @link &&
85
+ @link.inner_text &&
86
+ @link.inner_text == hashtag
87
+ end
88
+
89
+ failure_message_for_should do |text|
90
+ if @link
91
+ "Expected link text to be #{hashtag}, but it was #{@link.inner_text}"
92
+ else
93
+ "Expected hashtag #{hashtag} to be autolinked in '#{text}', but no link was found."
94
+ end
95
+ end
96
+ end
@@ -0,0 +1,31 @@
1
+ # encoding: utf-8
2
+ module TestUrls
3
+ VALID = [
4
+ "http://google.com",
5
+ "http://foobar.com/#",
6
+ "http://google.com/#foo",
7
+ "http://google.com/#search?q=iphone%20-filter%3Alinks",
8
+ "http://twitter.com/#search?q=iphone%20-filter%3Alinks",
9
+ "http://www.boingboing.net/2007/02/14/katamari_damacy_phon.html",
10
+ "http://somehost.com:3000",
11
+ "http://x.com/~matthew+%-x",
12
+ "http://en.wikipedia.org/wiki/Primer_(film)",
13
+ "http://www.ams.org/bookstore-getitem/item=mbk-59",
14
+ "http://chilp.it/?77e8fd",
15
+ "www.foobar.com",
16
+ "WWW.FOOBAR.COM",
17
+ "http://tell.me/why",
18
+ "http://longtlds.info",
19
+ "http://✪df.ws/ejp",
20
+ "http://日本.com"
21
+ ]
22
+
23
+ INVALID = [
24
+ "http://no-tld",
25
+ "http://tld-too-short.x",
26
+ "http://x.com/,,,/.../@@@/;;;/:::/---/%%%x",
27
+ "http://domain-dash.com",
28
+ "http://-doman_dash.com"
29
+ ]
30
+
31
+ end
@@ -0,0 +1,31 @@
1
+ # encoding: utf-8
2
+ require File.dirname(__FILE__) + '/spec_helper'
3
+
4
+ describe Twitter::Unicode do
5
+
6
+ it "should lazy-init constants" do
7
+ Twitter::Unicode.const_defined?(:UFEB6).should == false
8
+ Twitter::Unicode::UFEB6.should_not be_nil
9
+ Twitter::Unicode::UFEB6.should be_kind_of(String)
10
+ Twitter::Unicode.const_defined?(:UFEB6).should == true
11
+ end
12
+
13
+ it "should return corresponding character" do
14
+ Twitter::Unicode::UFEB6.should == [0xfeb6].pack('U')
15
+ end
16
+
17
+ it "should allow lowercase notation" do
18
+ Twitter::Unicode::Ufeb6.should == Twitter::Unicode::UFEB6
19
+ Twitter::Unicode::Ufeb6.should === Twitter::Unicode::UFEB6
20
+ end
21
+
22
+ it "should allow underscore notation" do
23
+ Twitter::Unicode::U_FEB6.should == Twitter::Unicode::UFEB6
24
+ Twitter::Unicode::U_FEB6.should === Twitter::Unicode::UFEB6
25
+ end
26
+
27
+ it "should raise on invalid codepoints" do
28
+ lambda { Twitter::Unicode::FFFFFF }.should raise_error(NameError)
29
+ end
30
+
31
+ end
@@ -0,0 +1,43 @@
1
+ # encoding: utf-8
2
+ require File.dirname(__FILE__) + '/spec_helper'
3
+
4
+ class TestValidation
5
+ include Twitter::Validation
6
+ end
7
+
8
+ describe Twitter::Validation do
9
+
10
+ it "should disallow invalid BOM character" do
11
+ TestValidation.new.tweet_invalid?("Bom:#{Twitter::Unicode::UFFFE}").should == :invalid_characters
12
+ TestValidation.new.tweet_invalid?("Bom:#{Twitter::Unicode::UFEFF}").should == :invalid_characters
13
+ end
14
+
15
+ it "should disallow invalid U+FFFF character" do
16
+ TestValidation.new.tweet_invalid?("Bom:#{Twitter::Unicode::UFFFF}").should == :invalid_characters
17
+ end
18
+
19
+ it "should disallow direction change characters" do
20
+ [0x202A, 0x202B, 0x202C, 0x202D, 0x202E].map{|cp| [cp].pack('U') }.each do |char|
21
+ TestValidation.new.tweet_invalid?("Invalid:#{char}").should == :invalid_characters
22
+ end
23
+ end
24
+
25
+ it "should disallow non-Unicode" do
26
+ TestValidation.new.tweet_invalid?("not-Unicode:\xfff0").should == :invalid_characters
27
+ end
28
+
29
+ it "should allow <= 140 combined accent characters" do
30
+ char = [0x65, 0x0301].pack('U')
31
+ TestValidation.new.tweet_invalid?(char * 139).should == false
32
+ TestValidation.new.tweet_invalid?(char * 140).should == false
33
+ TestValidation.new.tweet_invalid?(char * 141).should == :too_long
34
+ end
35
+
36
+ it "should allow <= 140 multi-byte characters" do
37
+ char = [ 0x1d106 ].pack('U')
38
+ TestValidation.new.tweet_invalid?(char * 139).should == false
39
+ TestValidation.new.tweet_invalid?(char * 140).should == false
40
+ TestValidation.new.tweet_invalid?(char * 141).should == :too_long
41
+ end
42
+
43
+ end
metadata ADDED
@@ -0,0 +1,93 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: redaranj-twitter-text
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 1
7
+ - 0
8
+ - 4
9
+ - 191
10
+ version: 1.0.4.191
11
+ platform: ruby
12
+ authors:
13
+ - Matt Sanford
14
+ - Patrick Ewing
15
+ autorequire: ""
16
+ bindir: bin
17
+ cert_chain: []
18
+
19
+ date: 2010-04-09 00:00:00 -04:00
20
+ default_executable:
21
+ dependencies:
22
+ - !ruby/object:Gem::Dependency
23
+ name: actionpack
24
+ prerelease: false
25
+ requirement: &id001 !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ segments:
30
+ - 0
31
+ version: "0"
32
+ type: :runtime
33
+ version_requirements: *id001
34
+ description: A gem that provides text handling for Twitter. Built to run on 1.9.1
35
+ email:
36
+ - matt@twitter.com
37
+ - patrick.henry.ewing@gmail.com
38
+ executables: []
39
+
40
+ extensions: []
41
+
42
+ extra_rdoc_files: []
43
+
44
+ files:
45
+ - LICENSE
46
+ - README.rdoc
47
+ - Rakefile
48
+ - TODO
49
+ - lib/autolink.rb
50
+ - lib/extractor.rb
51
+ - lib/regex.rb
52
+ - lib/twitter-text.rb
53
+ - lib/unicode.rb
54
+ - lib/validation.rb
55
+ - spec/autolinking_spec.rb
56
+ - spec/extractor_spec.rb
57
+ - spec/regex_spec.rb
58
+ - spec/spec_helper.rb
59
+ - spec/test_urls.rb
60
+ - spec/unicode_spec.rb
61
+ - spec/validation_spec.rb
62
+ has_rdoc: true
63
+ homepage: http://twitter.com
64
+ licenses: []
65
+
66
+ post_install_message:
67
+ rdoc_options: []
68
+
69
+ require_paths:
70
+ - lib
71
+ required_ruby_version: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ segments:
76
+ - 0
77
+ version: "0"
78
+ required_rubygems_version: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ segments:
83
+ - 0
84
+ version: "0"
85
+ requirements: []
86
+
87
+ rubyforge_project:
88
+ rubygems_version: 1.3.6
89
+ signing_key:
90
+ specification_version: 3
91
+ summary: Twitter text handling library
92
+ test_files: []
93
+