twitter-text-relative 1.6.2.pre.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,368 @@
1
+ # encoding: utf-8
2
+ require File.dirname(__FILE__) + '/spec_helper'
3
+
4
+ class TestExtractor
5
+ include Twitter::Extractor
6
+ end
7
+
8
+ describe Twitter::Extractor do
9
+ before do
10
+ @extractor = TestExtractor.new
11
+ end
12
+
13
+ describe "mentions" do
14
+ context "single screen name alone " do
15
+ it "should be linked" do
16
+ @extractor.extract_mentioned_screen_names("@alice").should == ["alice"]
17
+ end
18
+
19
+ it "should be linked with _" do
20
+ @extractor.extract_mentioned_screen_names("@alice_adams").should == ["alice_adams"]
21
+ end
22
+
23
+ it "should be linked if numeric" do
24
+ @extractor.extract_mentioned_screen_names("@1234").should == ["1234"]
25
+ end
26
+ end
27
+
28
+ context "multiple screen names" do
29
+ it "should both be linked" do
30
+ @extractor.extract_mentioned_screen_names("@alice @bob").should == ["alice", "bob"]
31
+ end
32
+ end
33
+
34
+ context "screen names embedded in text" do
35
+ it "should be linked in Latin text" do
36
+ @extractor.extract_mentioned_screen_names("waiting for @alice to arrive").should == ["alice"]
37
+ end
38
+
39
+ it "should be linked in Japanese text" do
40
+ @extractor.extract_mentioned_screen_names("の@aliceに到着を待っている").should == ["alice"]
41
+ end
42
+
43
+ it "should ignore mentions preceded by !, @, #, $, %, & or *" do
44
+ invalid_chars = ['!', '@', '#', '$', '%', '&', '*']
45
+ invalid_chars.each do |c|
46
+ @extractor.extract_mentioned_screen_names("f#{c}@kn").should == []
47
+ end
48
+ end
49
+ end
50
+
51
+ it "should accept a block arugment and call it in order" do
52
+ needed = ["alice", "bob"]
53
+ @extractor.extract_mentioned_screen_names("@alice @bob") do |sn|
54
+ sn.should == needed.shift
55
+ end
56
+ needed.should == []
57
+ end
58
+ end
59
+
60
+ describe "mentions with indices" do
61
+ context "single screen name alone " do
62
+ it "should be linked and the correct indices" do
63
+ @extractor.extract_mentioned_screen_names_with_indices("@alice").should == [{:screen_name => "alice", :indices => [0, 6]}]
64
+ end
65
+
66
+ it "should be linked with _ and the correct indices" do
67
+ @extractor.extract_mentioned_screen_names_with_indices("@alice_adams").should == [{:screen_name => "alice_adams", :indices => [0, 12]}]
68
+ end
69
+
70
+ it "should be linked if numeric and the correct indices" do
71
+ @extractor.extract_mentioned_screen_names_with_indices("@1234").should == [{:screen_name => "1234", :indices => [0, 5]}]
72
+ end
73
+ end
74
+
75
+ context "multiple screen names" do
76
+ it "should both be linked with the correct indices" do
77
+ @extractor.extract_mentioned_screen_names_with_indices("@alice @bob").should ==
78
+ [{:screen_name => "alice", :indices => [0, 6]},
79
+ {:screen_name => "bob", :indices => [7, 11]}]
80
+ end
81
+
82
+ it "should be linked with the correct indices even when repeated" do
83
+ @extractor.extract_mentioned_screen_names_with_indices("@alice @alice @bob").should ==
84
+ [{:screen_name => "alice", :indices => [0, 6]},
85
+ {:screen_name => "alice", :indices => [7, 13]},
86
+ {:screen_name => "bob", :indices => [14, 18]}]
87
+ end
88
+ end
89
+
90
+ context "screen names embedded in text" do
91
+ it "should be linked in Latin text with the correct indices" do
92
+ @extractor.extract_mentioned_screen_names_with_indices("waiting for @alice to arrive").should == [{:screen_name => "alice", :indices => [12, 18]}]
93
+ end
94
+
95
+ it "should be linked in Japanese text with the correct indices" do
96
+ @extractor.extract_mentioned_screen_names_with_indices("の@aliceに到着を待っている").should == [{:screen_name => "alice", :indices => [1, 7]}]
97
+ end
98
+ end
99
+
100
+ it "should accept a block arugment and call it in order" do
101
+ needed = [{:screen_name => "alice", :indices => [0, 6]}, {:screen_name => "bob", :indices => [7, 11]}]
102
+ @extractor.extract_mentioned_screen_names_with_indices("@alice @bob") do |sn, start_index, end_index|
103
+ data = needed.shift
104
+ sn.should == data[:screen_name]
105
+ start_index.should == data[:indices].first
106
+ end_index.should == data[:indices].last
107
+ end
108
+ needed.should == []
109
+ end
110
+
111
+ it "should extract screen name in text with supplementary character" do
112
+ @extractor.extract_mentioned_screen_names_with_indices("#{[0x10400].pack('U')} @alice").should == [{:screen_name => "alice", :indices => [2, 8]}]
113
+ end
114
+ end
115
+
116
+ describe "replies" do
117
+ context "should be extracted from" do
118
+ it "should extract from lone name" do
119
+ @extractor.extract_reply_screen_name("@alice").should == "alice"
120
+ end
121
+
122
+ it "should extract from the start" do
123
+ @extractor.extract_reply_screen_name("@alice reply text").should == "alice"
124
+ end
125
+
126
+ it "should extract preceded by a space" do
127
+ @extractor.extract_reply_screen_name(" @alice reply text").should == "alice"
128
+ end
129
+
130
+ it "should extract preceded by a full-width space" do
131
+ @extractor.extract_reply_screen_name("#{[0x3000].pack('U')}@alice reply text").should == "alice"
132
+ end
133
+ end
134
+
135
+ context "should not be extracted from" do
136
+ it "should not be extracted when preceded by text" do
137
+ @extractor.extract_reply_screen_name("reply @alice text").should == nil
138
+ end
139
+
140
+ it "should not be extracted when preceded by puctuation" do
141
+ %w(. / _ - + # ! @).each do |punct|
142
+ @extractor.extract_reply_screen_name("#{punct}@alice text").should == nil
143
+ end
144
+ end
145
+ end
146
+
147
+ context "should accept a block arugment" do
148
+ it "should call the block on match" do
149
+ @extractor.extract_reply_screen_name("@alice") do |sn|
150
+ sn.should == "alice"
151
+ end
152
+ end
153
+
154
+ it "should not call the block on no match" do
155
+ calls = 0
156
+ @extractor.extract_reply_screen_name("not a reply") do |sn|
157
+ calls += 1
158
+ end
159
+ calls.should == 0
160
+ end
161
+ end
162
+ end
163
+
164
+ describe "urls" do
165
+ describe "matching URLS" do
166
+ TestUrls::VALID.each do |url|
167
+ it "should extract the URL #{url} and prefix it with a protocol if missing" do
168
+ @extractor.extract_urls(url).first.should include(url)
169
+ end
170
+
171
+ it "should match the URL #{url} when it's embedded in other text" do
172
+ text = "Sweet url: #{url} I found. #awesome"
173
+ @extractor.extract_urls(text).first.should include(url)
174
+ end
175
+ end
176
+ end
177
+
178
+ describe "invalid URLS" do
179
+ it "does not link urls with invalid domains" do
180
+ @extractor.extract_urls("http://tld-too-short.x").should == []
181
+ end
182
+ end
183
+
184
+ describe "t.co URLS" do
185
+ TestUrls::TCO.each do |url|
186
+ it "should only extract the t.co URL from the URL #{url}" do
187
+ extracted_urls = @extractor.extract_urls(url)
188
+ extracted_urls.size.should == 1
189
+ extracted_url = extracted_urls.first
190
+ extracted_url.should_not == url
191
+ extracted_url.should == url[0...20]
192
+ end
193
+
194
+ it "should match the t.co URL from the URL #{url} when it's embedded in other text" do
195
+ text = "Sweet url: #{url} I found. #awesome"
196
+ extracted_urls = @extractor.extract_urls(text)
197
+ extracted_urls.size.should == 1
198
+ extracted_url = extracted_urls.first
199
+ extracted_url.should_not == url
200
+ extracted_url.should == url[0...20]
201
+ end
202
+ end
203
+ end
204
+ end
205
+
206
+ describe "urls with indices" do
207
+ describe "matching URLS" do
208
+ TestUrls::VALID.each do |url|
209
+ it "should extract the URL #{url} and prefix it with a protocol if missing" do
210
+ extracted_urls = @extractor.extract_urls_with_indices(url)
211
+ extracted_urls.size.should == 1
212
+ extracted_url = extracted_urls.first
213
+ extracted_url[:url].should include(url)
214
+ extracted_url[:indices].first.should == 0
215
+ extracted_url[:indices].last.should == url.chars.to_a.size
216
+ end
217
+
218
+ it "should match the URL #{url} when it's embedded in other text" do
219
+ text = "Sweet url: #{url} I found. #awesome"
220
+ extracted_urls = @extractor.extract_urls_with_indices(text)
221
+ extracted_urls.size.should == 1
222
+ extracted_url = extracted_urls.first
223
+ extracted_url[:url].should include(url)
224
+ extracted_url[:indices].first.should == 11
225
+ extracted_url[:indices].last.should == 11 + url.chars.to_a.size
226
+ end
227
+ end
228
+
229
+ it "should extract URL in text with supplementary character" do
230
+ @extractor.extract_urls_with_indices("#{[0x10400].pack('U')} http://twitter.com").should == [{:url => "http://twitter.com", :indices => [2, 20]}]
231
+ end
232
+ end
233
+
234
+ describe "invalid URLS" do
235
+ it "does not link urls with invalid domains" do
236
+ @extractor.extract_urls_with_indices("http://tld-too-short.x").should == []
237
+ end
238
+ end
239
+
240
+ describe "t.co URLS" do
241
+ TestUrls::TCO.each do |url|
242
+ it "should only extract the t.co URL from the URL #{url} and adjust indices correctly" do
243
+ extracted_urls = @extractor.extract_urls_with_indices(url)
244
+ extracted_urls.size.should == 1
245
+ extracted_url = extracted_urls.first
246
+ extracted_url[:url].should_not include(url)
247
+ extracted_url[:url].should include(url[0...20])
248
+ extracted_url[:indices].first.should == 0
249
+ extracted_url[:indices].last.should == 20
250
+ end
251
+
252
+ it "should match the t.co URL from the URL #{url} when it's embedded in other text" do
253
+ text = "Sweet url: #{url} I found. #awesome"
254
+ extracted_urls = @extractor.extract_urls_with_indices(text)
255
+ extracted_urls.size.should == 1
256
+ extracted_url = extracted_urls.first
257
+ extracted_url[:url].should_not include(url)
258
+ extracted_url[:url].should include(url[0...20])
259
+ extracted_url[:indices].first.should == 11
260
+ extracted_url[:indices].last.should == 31
261
+ end
262
+ end
263
+ end
264
+ end
265
+
266
+ describe "hashtags" do
267
+ context "extracts latin/numeric hashtags" do
268
+ %w(text text123 123text).each do |hashtag|
269
+ it "should extract ##{hashtag}" do
270
+ @extractor.extract_hashtags("##{hashtag}").should == [hashtag]
271
+ end
272
+
273
+ it "should extract ##{hashtag} within text" do
274
+ @extractor.extract_hashtags("pre-text ##{hashtag} post-text").should == [hashtag]
275
+ end
276
+ end
277
+ end
278
+
279
+ context "international hashtags" do
280
+ context "should allow accents" do
281
+ %w(mañana café münchen).each do |hashtag|
282
+ it "should extract ##{hashtag}" do
283
+ @extractor.extract_hashtags("##{hashtag}").should == [hashtag]
284
+ end
285
+
286
+ it "should extract ##{hashtag} within text" do
287
+ @extractor.extract_hashtags("pre-text ##{hashtag} post-text").should == [hashtag]
288
+ end
289
+ end
290
+
291
+ it "should not allow the multiplication character" do
292
+ @extractor.extract_hashtags("#pre#{Twitter::Unicode::U00D7}post").should == ["pre"]
293
+ end
294
+
295
+ it "should not allow the division character" do
296
+ @extractor.extract_hashtags("#pre#{Twitter::Unicode::U00F7}post").should == ["pre"]
297
+ end
298
+ end
299
+
300
+ end
301
+
302
+ it "should not extract numeric hashtags" do
303
+ @extractor.extract_hashtags("#1234").should == []
304
+ end
305
+
306
+ it "should extract hashtag followed by punctuations" do
307
+ @extractor.extract_hashtags("#test1: #test2; #test3\"").should == ["test1", "test2" ,"test3"]
308
+ end
309
+ end
310
+
311
+ describe "hashtags with indices" do
312
+ def match_hashtag_in_text(hashtag, text, offset = 0)
313
+ extracted_hashtags = @extractor.extract_hashtags_with_indices(text)
314
+ extracted_hashtags.size.should == 1
315
+ extracted_hashtag = extracted_hashtags.first
316
+ extracted_hashtag[:hashtag].should == hashtag
317
+ extracted_hashtag[:indices].first.should == offset
318
+ extracted_hashtag[:indices].last.should == offset + hashtag.chars.to_a.size + 1
319
+ end
320
+
321
+ def not_match_hashtag_in_text(text)
322
+ extracted_hashtags = @extractor.extract_hashtags_with_indices(text)
323
+ extracted_hashtags.size.should == 0
324
+ end
325
+
326
+ context "extracts latin/numeric hashtags" do
327
+ %w(text text123 123text).each do |hashtag|
328
+ it "should extract ##{hashtag}" do
329
+ match_hashtag_in_text(hashtag, "##{hashtag}")
330
+ end
331
+
332
+ it "should extract ##{hashtag} within text" do
333
+ match_hashtag_in_text(hashtag, "pre-text ##{hashtag} post-text", 9)
334
+ end
335
+ end
336
+ end
337
+
338
+ context "international hashtags" do
339
+ context "should allow accents" do
340
+ %w(mañana café münchen).each do |hashtag|
341
+ it "should extract ##{hashtag}" do
342
+ match_hashtag_in_text(hashtag, "##{hashtag}")
343
+ end
344
+
345
+ it "should extract ##{hashtag} within text" do
346
+ match_hashtag_in_text(hashtag, "pre-text ##{hashtag} post-text", 9)
347
+ end
348
+ end
349
+
350
+ it "should not allow the multiplication character" do
351
+ match_hashtag_in_text("pre", "#pre#{[0xd7].pack('U')}post", 0)
352
+ end
353
+
354
+ it "should not allow the division character" do
355
+ match_hashtag_in_text("pre", "#pre#{[0xf7].pack('U')}post", 0)
356
+ end
357
+ end
358
+ end
359
+
360
+ it "should not extract numeric hashtags" do
361
+ not_match_hashtag_in_text("#1234")
362
+ end
363
+
364
+ it "should extract hashtag in text with supplementary character" do
365
+ match_hashtag_in_text("hashtag", "#{[0x10400].pack('U')} #hashtag", 2)
366
+ end
367
+ end
368
+ end
@@ -0,0 +1,92 @@
1
+ # encoding: utf-8
2
+ require File.dirname(__FILE__) + '/spec_helper'
3
+
4
+ class TestHitHighlighter
5
+ include Twitter::HitHighlighter
6
+ end
7
+
8
+ describe Twitter::HitHighlighter do
9
+ describe "highlight" do
10
+ before do
11
+ @highlighter = TestHitHighlighter.new
12
+ end
13
+
14
+ context "with options" do
15
+ before do
16
+ @original = "Testing this hit highliter"
17
+ @hits = [[13,16]]
18
+ end
19
+
20
+ it "should default to <em> tags" do
21
+ @highlighter.hit_highlight(@original, @hits).should == "Testing this <em>hit</em> highliter"
22
+ end
23
+
24
+ it "should allow tag override" do
25
+ @highlighter.hit_highlight(@original, @hits, :tag => 'b').should == "Testing this <b>hit</b> highliter"
26
+ end
27
+ end
28
+
29
+ context "without links" do
30
+ before do
31
+ @original = "Hey! this is a test tweet"
32
+ end
33
+
34
+ it "should return original when no hits are provided" do
35
+ @highlighter.hit_highlight(@original).should == @original
36
+ end
37
+
38
+ it "should highlight one hit" do
39
+ @highlighter.hit_highlight(@original, hits = [[5, 9]]).should == "Hey! <em>this</em> is a test tweet"
40
+ end
41
+
42
+ it "should highlight two hits" do
43
+ @highlighter.hit_highlight(@original, hits = [[5, 9], [15, 19]]).should == "Hey! <em>this</em> is a <em>test</em> tweet"
44
+ end
45
+
46
+ it "should correctly highlight first-word hits" do
47
+ @highlighter.hit_highlight(@original, hits = [[0, 3]]).should == "<em>Hey</em>! this is a test tweet"
48
+ end
49
+
50
+ it "should correctly highlight last-word hits" do
51
+ @highlighter.hit_highlight(@original, hits = [[20, 25]]).should == "Hey! this is a test <em>tweet</em>"
52
+ end
53
+ end
54
+
55
+ context "with links" do
56
+ it "should highlight with a single link" do
57
+ @highlighter.hit_highlight("@<a>bcherry</a> this was a test tweet", [[9, 13]]).should == "@<a>bcherry</a> <em>this</em> was a test tweet"
58
+ end
59
+
60
+ it "should highlight with link at the end" do
61
+ @highlighter.hit_highlight("test test <a>test</a>", [[5, 9]]).should == "test <em>test</em> <a>test</a>"
62
+ end
63
+
64
+ it "should highlight with a link at the beginning" do
65
+ @highlighter.hit_highlight("<a>test</a> test test", [[5, 9]]).should == "<a>test</a> <em>test</em> test"
66
+ end
67
+
68
+ it "should highlight an entire link" do
69
+ @highlighter.hit_highlight("test <a>test</a> test", [[5, 9]]).should == "test <a><em>test</em></a> test"
70
+ end
71
+
72
+ it "should highlight within a link" do
73
+ @highlighter.hit_highlight("test <a>test</a> test", [[6, 8]]).should == "test <a>t<em>es</em>t</a> test"
74
+ end
75
+
76
+ it "should highlight around a link" do
77
+ @highlighter.hit_highlight("test <a>test</a> test", [[3, 11]]).should == "tes<em>t <a>test</a> t</em>est"
78
+ end
79
+
80
+ it "should fail gracefully with bad hits" do
81
+ @highlighter.hit_highlight("test test", [[5, 20]]).should == "test <em>test</em>"
82
+ end
83
+
84
+ it "should not mess up with touching tags" do
85
+ @highlighter.hit_highlight("<a>foo</a><a>foo</a>", [[3,6]]).should == "<a>foo</a><a><em>foo</em></a>"
86
+ end
87
+
88
+ end
89
+
90
+ end
91
+
92
+ end