twitter-text-editted 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,368 @@
1
+ # encoding: utf-8
2
+ require File.dirname(__FILE__) + '/spec_helper'
3
+
4
+ class TestExtractor
5
+ include Twitter::Extractor
6
+ end
7
+
8
+ describe Twitter::Extractor do
9
+ before do
10
+ @extractor = TestExtractor.new
11
+ end
12
+
13
+ describe "mentions" do
14
+ context "single screen name alone " do
15
+ it "should be linked" do
16
+ @extractor.extract_mentioned_screen_names("@alice").should == ["alice"]
17
+ end
18
+
19
+ it "should be linked with _" do
20
+ @extractor.extract_mentioned_screen_names("@alice_adams").should == ["alice_adams"]
21
+ end
22
+
23
+ it "should be linked if numeric" do
24
+ @extractor.extract_mentioned_screen_names("@1234").should == ["1234"]
25
+ end
26
+ end
27
+
28
+ context "multiple screen names" do
29
+ it "should both be linked" do
30
+ @extractor.extract_mentioned_screen_names("@alice @bob").should == ["alice", "bob"]
31
+ end
32
+ end
33
+
34
+ context "screen names embedded in text" do
35
+ it "should be linked in Latin text" do
36
+ @extractor.extract_mentioned_screen_names("waiting for @alice to arrive").should == ["alice"]
37
+ end
38
+
39
+ it "should be linked in Japanese text" do
40
+ @extractor.extract_mentioned_screen_names("の@aliceに到着を待っている").should == ["alice"]
41
+ end
42
+
43
+ it "should ignore mentions preceded by !, @, #, $, %, & or *" do
44
+ invalid_chars = ['!', '@', '#', '$', '%', '&', '*']
45
+ invalid_chars.each do |c|
46
+ @extractor.extract_mentioned_screen_names("f#{c}@kn").should == []
47
+ end
48
+ end
49
+ end
50
+
51
+ it "should accept a block arugment and call it in order" do
52
+ needed = ["alice", "bob"]
53
+ @extractor.extract_mentioned_screen_names("@alice @bob") do |sn|
54
+ sn.should == needed.shift
55
+ end
56
+ needed.should == []
57
+ end
58
+ end
59
+
60
+ describe "mentions with indices" do
61
+ context "single screen name alone " do
62
+ it "should be linked and the correct indices" do
63
+ @extractor.extract_mentioned_screen_names_with_indices("@alice").should == [{:screen_name => "alice", :indices => [0, 6]}]
64
+ end
65
+
66
+ it "should be linked with _ and the correct indices" do
67
+ @extractor.extract_mentioned_screen_names_with_indices("@alice_adams").should == [{:screen_name => "alice_adams", :indices => [0, 12]}]
68
+ end
69
+
70
+ it "should be linked if numeric and the correct indices" do
71
+ @extractor.extract_mentioned_screen_names_with_indices("@1234").should == [{:screen_name => "1234", :indices => [0, 5]}]
72
+ end
73
+ end
74
+
75
+ context "multiple screen names" do
76
+ it "should both be linked with the correct indices" do
77
+ @extractor.extract_mentioned_screen_names_with_indices("@alice @bob").should ==
78
+ [{:screen_name => "alice", :indices => [0, 6]},
79
+ {:screen_name => "bob", :indices => [7, 11]}]
80
+ end
81
+
82
+ it "should be linked with the correct indices even when repeated" do
83
+ @extractor.extract_mentioned_screen_names_with_indices("@alice @alice @bob").should ==
84
+ [{:screen_name => "alice", :indices => [0, 6]},
85
+ {:screen_name => "alice", :indices => [7, 13]},
86
+ {:screen_name => "bob", :indices => [14, 18]}]
87
+ end
88
+ end
89
+
90
+ context "screen names embedded in text" do
91
+ it "should be linked in Latin text with the correct indices" do
92
+ @extractor.extract_mentioned_screen_names_with_indices("waiting for @alice to arrive").should == [{:screen_name => "alice", :indices => [12, 18]}]
93
+ end
94
+
95
+ it "should be linked in Japanese text with the correct indices" do
96
+ @extractor.extract_mentioned_screen_names_with_indices("の@aliceに到着を待っている").should == [{:screen_name => "alice", :indices => [1, 7]}]
97
+ end
98
+ end
99
+
100
+ it "should accept a block arugment and call it in order" do
101
+ needed = [{:screen_name => "alice", :indices => [0, 6]}, {:screen_name => "bob", :indices => [7, 11]}]
102
+ @extractor.extract_mentioned_screen_names_with_indices("@alice @bob") do |sn, start_index, end_index|
103
+ data = needed.shift
104
+ sn.should == data[:screen_name]
105
+ start_index.should == data[:indices].first
106
+ end_index.should == data[:indices].last
107
+ end
108
+ needed.should == []
109
+ end
110
+
111
+ it "should extract screen name in text with supplementary character" do
112
+ @extractor.extract_mentioned_screen_names_with_indices("#{[0x10400].pack('U')} @alice").should == [{:screen_name => "alice", :indices => [2, 8]}]
113
+ end
114
+ end
115
+
116
+ describe "replies" do
117
+ context "should be extracted from" do
118
+ it "should extract from lone name" do
119
+ @extractor.extract_reply_screen_name("@alice").should == "alice"
120
+ end
121
+
122
+ it "should extract from the start" do
123
+ @extractor.extract_reply_screen_name("@alice reply text").should == "alice"
124
+ end
125
+
126
+ it "should extract preceded by a space" do
127
+ @extractor.extract_reply_screen_name(" @alice reply text").should == "alice"
128
+ end
129
+
130
+ it "should extract preceded by a full-width space" do
131
+ @extractor.extract_reply_screen_name("#{[0x3000].pack('U')}@alice reply text").should == "alice"
132
+ end
133
+ end
134
+
135
+ context "should not be extracted from" do
136
+ it "should not be extracted when preceded by text" do
137
+ @extractor.extract_reply_screen_name("reply @alice text").should == nil
138
+ end
139
+
140
+ it "should not be extracted when preceded by puctuation" do
141
+ %w(. / _ - + # ! @).each do |punct|
142
+ @extractor.extract_reply_screen_name("#{punct}@alice text").should == nil
143
+ end
144
+ end
145
+ end
146
+
147
+ context "should accept a block arugment" do
148
+ it "should call the block on match" do
149
+ @extractor.extract_reply_screen_name("@alice") do |sn|
150
+ sn.should == "alice"
151
+ end
152
+ end
153
+
154
+ it "should not call the block on no match" do
155
+ calls = 0
156
+ @extractor.extract_reply_screen_name("not a reply") do |sn|
157
+ calls += 1
158
+ end
159
+ calls.should == 0
160
+ end
161
+ end
162
+ end
163
+
164
+ describe "urls" do
165
+ describe "matching URLS" do
166
+ TestUrls::VALID.each do |url|
167
+ it "should extract the URL #{url} and prefix it with a protocol if missing" do
168
+ @extractor.extract_urls(url).first.should include(url)
169
+ end
170
+
171
+ it "should match the URL #{url} when it's embedded in other text" do
172
+ text = "Sweet url: #{url} I found. #awesome"
173
+ @extractor.extract_urls(text).first.should include(url)
174
+ end
175
+ end
176
+ end
177
+
178
+ describe "invalid URLS" do
179
+ it "does not link urls with invalid domains" do
180
+ @extractor.extract_urls("http://tld-too-short.x").should == []
181
+ end
182
+ end
183
+
184
+ describe "t.co URLS" do
185
+ TestUrls::TCO.each do |url|
186
+ it "should only extract the t.co URL from the URL #{url}" do
187
+ extracted_urls = @extractor.extract_urls(url)
188
+ extracted_urls.size.should == 1
189
+ extracted_url = extracted_urls.first
190
+ extracted_url.should_not == url
191
+ extracted_url.should == url[0...20]
192
+ end
193
+
194
+ it "should match the t.co URL from the URL #{url} when it's embedded in other text" do
195
+ text = "Sweet url: #{url} I found. #awesome"
196
+ extracted_urls = @extractor.extract_urls(text)
197
+ extracted_urls.size.should == 1
198
+ extracted_url = extracted_urls.first
199
+ extracted_url.should_not == url
200
+ extracted_url.should == url[0...20]
201
+ end
202
+ end
203
+ end
204
+ end
205
+
206
+ describe "urls with indices" do
207
+ describe "matching URLS" do
208
+ TestUrls::VALID.each do |url|
209
+ it "should extract the URL #{url} and prefix it with a protocol if missing" do
210
+ extracted_urls = @extractor.extract_urls_with_indices(url)
211
+ extracted_urls.size.should == 1
212
+ extracted_url = extracted_urls.first
213
+ extracted_url[:url].should include(url)
214
+ extracted_url[:indices].first.should == 0
215
+ extracted_url[:indices].last.should == url.chars.to_a.size
216
+ end
217
+
218
+ it "should match the URL #{url} when it's embedded in other text" do
219
+ text = "Sweet url: #{url} I found. #awesome"
220
+ extracted_urls = @extractor.extract_urls_with_indices(text)
221
+ extracted_urls.size.should == 1
222
+ extracted_url = extracted_urls.first
223
+ extracted_url[:url].should include(url)
224
+ extracted_url[:indices].first.should == 11
225
+ extracted_url[:indices].last.should == 11 + url.chars.to_a.size
226
+ end
227
+ end
228
+
229
+ it "should extract URL in text with supplementary character" do
230
+ @extractor.extract_urls_with_indices("#{[0x10400].pack('U')} http://twitter.com").should == [{:url => "http://twitter.com", :indices => [2, 20]}]
231
+ end
232
+ end
233
+
234
+ describe "invalid URLS" do
235
+ it "does not link urls with invalid domains" do
236
+ @extractor.extract_urls_with_indices("http://tld-too-short.x").should == []
237
+ end
238
+ end
239
+
240
+ describe "t.co URLS" do
241
+ TestUrls::TCO.each do |url|
242
+ it "should only extract the t.co URL from the URL #{url} and adjust indices correctly" do
243
+ extracted_urls = @extractor.extract_urls_with_indices(url)
244
+ extracted_urls.size.should == 1
245
+ extracted_url = extracted_urls.first
246
+ extracted_url[:url].should_not include(url)
247
+ extracted_url[:url].should include(url[0...20])
248
+ extracted_url[:indices].first.should == 0
249
+ extracted_url[:indices].last.should == 20
250
+ end
251
+
252
+ it "should match the t.co URL from the URL #{url} when it's embedded in other text" do
253
+ text = "Sweet url: #{url} I found. #awesome"
254
+ extracted_urls = @extractor.extract_urls_with_indices(text)
255
+ extracted_urls.size.should == 1
256
+ extracted_url = extracted_urls.first
257
+ extracted_url[:url].should_not include(url)
258
+ extracted_url[:url].should include(url[0...20])
259
+ extracted_url[:indices].first.should == 11
260
+ extracted_url[:indices].last.should == 31
261
+ end
262
+ end
263
+ end
264
+ end
265
+
266
+ describe "hashtags" do
267
+ context "extracts latin/numeric hashtags" do
268
+ %w(text text123 123text).each do |hashtag|
269
+ it "should extract ##{hashtag}" do
270
+ @extractor.extract_hashtags("##{hashtag}").should == [hashtag]
271
+ end
272
+
273
+ it "should extract ##{hashtag} within text" do
274
+ @extractor.extract_hashtags("pre-text ##{hashtag} post-text").should == [hashtag]
275
+ end
276
+ end
277
+ end
278
+
279
+ context "international hashtags" do
280
+ context "should allow accents" do
281
+ %w(mañana café münchen).each do |hashtag|
282
+ it "should extract ##{hashtag}" do
283
+ @extractor.extract_hashtags("##{hashtag}").should == [hashtag]
284
+ end
285
+
286
+ it "should extract ##{hashtag} within text" do
287
+ @extractor.extract_hashtags("pre-text ##{hashtag} post-text").should == [hashtag]
288
+ end
289
+ end
290
+
291
+ it "should not allow the multiplication character" do
292
+ @extractor.extract_hashtags("#pre#{Twitter::Unicode::U00D7}post").should == ["pre"]
293
+ end
294
+
295
+ it "should not allow the division character" do
296
+ @extractor.extract_hashtags("#pre#{Twitter::Unicode::U00F7}post").should == ["pre"]
297
+ end
298
+ end
299
+
300
+ end
301
+
302
+ it "should not extract numeric hashtags" do
303
+ @extractor.extract_hashtags("#1234").should == []
304
+ end
305
+
306
+ it "should extract hashtag followed by punctuations" do
307
+ @extractor.extract_hashtags("#test1: #test2; #test3\"").should == ["test1", "test2" ,"test3"]
308
+ end
309
+ end
310
+
311
+ describe "hashtags with indices" do
312
+ def match_hashtag_in_text(hashtag, text, offset = 0)
313
+ extracted_hashtags = @extractor.extract_hashtags_with_indices(text)
314
+ extracted_hashtags.size.should == 1
315
+ extracted_hashtag = extracted_hashtags.first
316
+ extracted_hashtag[:hashtag].should == hashtag
317
+ extracted_hashtag[:indices].first.should == offset
318
+ extracted_hashtag[:indices].last.should == offset + hashtag.chars.to_a.size + 1
319
+ end
320
+
321
+ def not_match_hashtag_in_text(text)
322
+ extracted_hashtags = @extractor.extract_hashtags_with_indices(text)
323
+ extracted_hashtags.size.should == 0
324
+ end
325
+
326
+ context "extracts latin/numeric hashtags" do
327
+ %w(text text123 123text).each do |hashtag|
328
+ it "should extract ##{hashtag}" do
329
+ match_hashtag_in_text(hashtag, "##{hashtag}")
330
+ end
331
+
332
+ it "should extract ##{hashtag} within text" do
333
+ match_hashtag_in_text(hashtag, "pre-text ##{hashtag} post-text", 9)
334
+ end
335
+ end
336
+ end
337
+
338
+ context "international hashtags" do
339
+ context "should allow accents" do
340
+ %w(mañana café münchen).each do |hashtag|
341
+ it "should extract ##{hashtag}" do
342
+ match_hashtag_in_text(hashtag, "##{hashtag}")
343
+ end
344
+
345
+ it "should extract ##{hashtag} within text" do
346
+ match_hashtag_in_text(hashtag, "pre-text ##{hashtag} post-text", 9)
347
+ end
348
+ end
349
+
350
+ it "should not allow the multiplication character" do
351
+ match_hashtag_in_text("pre", "#pre#{[0xd7].pack('U')}post", 0)
352
+ end
353
+
354
+ it "should not allow the division character" do
355
+ match_hashtag_in_text("pre", "#pre#{[0xf7].pack('U')}post", 0)
356
+ end
357
+ end
358
+ end
359
+
360
+ it "should not extract numeric hashtags" do
361
+ not_match_hashtag_in_text("#1234")
362
+ end
363
+
364
+ it "should extract hashtag in text with supplementary character" do
365
+ match_hashtag_in_text("hashtag", "#{[0x10400].pack('U')} #hashtag", 2)
366
+ end
367
+ end
368
+ end
@@ -0,0 +1,92 @@
1
+ # encoding: utf-8
2
+ require File.dirname(__FILE__) + '/spec_helper'
3
+
4
+ class TestHitHighlighter
5
+ include Twitter::HitHighlighter
6
+ end
7
+
8
+ describe Twitter::HitHighlighter do
9
+ describe "highlight" do
10
+ before do
11
+ @highlighter = TestHitHighlighter.new
12
+ end
13
+
14
+ context "with options" do
15
+ before do
16
+ @original = "Testing this hit highliter"
17
+ @hits = [[13,16]]
18
+ end
19
+
20
+ it "should default to <em> tags" do
21
+ @highlighter.hit_highlight(@original, @hits).should == "Testing this <em>hit</em> highliter"
22
+ end
23
+
24
+ it "should allow tag override" do
25
+ @highlighter.hit_highlight(@original, @hits, :tag => 'b').should == "Testing this <b>hit</b> highliter"
26
+ end
27
+ end
28
+
29
+ context "without links" do
30
+ before do
31
+ @original = "Hey! this is a test tweet"
32
+ end
33
+
34
+ it "should return original when no hits are provided" do
35
+ @highlighter.hit_highlight(@original).should == @original
36
+ end
37
+
38
+ it "should highlight one hit" do
39
+ @highlighter.hit_highlight(@original, hits = [[5, 9]]).should == "Hey! <em>this</em> is a test tweet"
40
+ end
41
+
42
+ it "should highlight two hits" do
43
+ @highlighter.hit_highlight(@original, hits = [[5, 9], [15, 19]]).should == "Hey! <em>this</em> is a <em>test</em> tweet"
44
+ end
45
+
46
+ it "should correctly highlight first-word hits" do
47
+ @highlighter.hit_highlight(@original, hits = [[0, 3]]).should == "<em>Hey</em>! this is a test tweet"
48
+ end
49
+
50
+ it "should correctly highlight last-word hits" do
51
+ @highlighter.hit_highlight(@original, hits = [[20, 25]]).should == "Hey! this is a test <em>tweet</em>"
52
+ end
53
+ end
54
+
55
+ context "with links" do
56
+ it "should highlight with a single link" do
57
+ @highlighter.hit_highlight("@<a>bcherry</a> this was a test tweet", [[9, 13]]).should == "@<a>bcherry</a> <em>this</em> was a test tweet"
58
+ end
59
+
60
+ it "should highlight with link at the end" do
61
+ @highlighter.hit_highlight("test test <a>test</a>", [[5, 9]]).should == "test <em>test</em> <a>test</a>"
62
+ end
63
+
64
+ it "should highlight with a link at the beginning" do
65
+ @highlighter.hit_highlight("<a>test</a> test test", [[5, 9]]).should == "<a>test</a> <em>test</em> test"
66
+ end
67
+
68
+ it "should highlight an entire link" do
69
+ @highlighter.hit_highlight("test <a>test</a> test", [[5, 9]]).should == "test <a><em>test</em></a> test"
70
+ end
71
+
72
+ it "should highlight within a link" do
73
+ @highlighter.hit_highlight("test <a>test</a> test", [[6, 8]]).should == "test <a>t<em>es</em>t</a> test"
74
+ end
75
+
76
+ it "should highlight around a link" do
77
+ @highlighter.hit_highlight("test <a>test</a> test", [[3, 11]]).should == "tes<em>t <a>test</a> t</em>est"
78
+ end
79
+
80
+ it "should fail gracefully with bad hits" do
81
+ @highlighter.hit_highlight("test test", [[5, 20]]).should == "test <em>test</em>"
82
+ end
83
+
84
+ it "should not mess up with touching tags" do
85
+ @highlighter.hit_highlight("<a>foo</a><a>foo</a>", [[3,6]]).should == "<a>foo</a><a><em>foo</em></a>"
86
+ end
87
+
88
+ end
89
+
90
+ end
91
+
92
+ end