twitter-text 1.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/validation.rb ADDED
@@ -0,0 +1,51 @@
1
+
2
+ module Twitter
3
+ module Validation
4
+ MAX_LENGTH = 140
5
+
6
+ # Character not allowed in Tweets
7
+ INVALID_CHARACTERS = [
8
+ 0xFFFE, 0xFEFF, # BOM
9
+ 0xFFFF, # Special
10
+ 0x202A, 0x202B, 0x202C, 0x202D, 0x202E # Directional change
11
+ ].map{|cp| [cp].pack('U') }.freeze
12
+
13
+ # Returns the length of the string as it would be displayed. This is equivilent to the length of the Unicode NFC
14
+ # (See: http://www.unicode.org/reports/tr15). This is needed in order to consistently calculate the length of a
15
+ # string no matter which actual form was transmitted. For example:
16
+ #
17
+ # U+0065 Latin Small Letter E
18
+ # + U+0301 Combining Acute Accent
19
+ # ----------
20
+ # = 2 bytes, 2 characters, displayed as é (1 visual glyph)
21
+ # … The NFC of {U+0065, U+0301} is {U+00E9}, which is a single chracter and a +display_length+ of 1
22
+ #
23
+ # The string could also contain U+00E9 already, in which case the canonicalization will not change the value.
24
+ #
25
+ def tweet_length(text)
26
+ ActiveSupport::Multibyte::Chars.new(text).normalize(:c).length
27
+ end
28
+
29
+ # Check the <tt>text</tt> for any reason that it may not be valid as a Tweet. This is meant as a pre-validation
30
+ # before posting to api.twitter.com. There are several server-side reasons for Tweets to fail but this pre-validation
31
+ # will allow quicker feedback.
32
+ #
33
+ # Returns <tt>false</tt> if this <tt>text</tt> is valid. Otherwise one of the following Symbols will be returned:
34
+ #
35
+ # <tt>:too_long</tt>:: if the <tt>text</tt> is too long
36
+ # <tt>:empty</tt>:: if the <tt>text</tt> is nil or empty
37
+ # <tt>:invalid_characters</tt>:: if the <tt>text</tt> contains non-Unicode or any of the disallowed Unicode characters
38
+ def tweet_invalid?(text)
39
+ return :empty if text.blank?
40
+ begin
41
+ return :too_long if tweet_length(text) > MAX_LENGTH
42
+ return :invalid_characters if INVALID_CHARACTERS.any?{|invalid_char| text.include?(invalid_char) }
43
+ rescue ArgumentError, ActiveSupport::Multibyte::EncodingError => e
44
+ # non-Unicode value.
45
+ return :invalid_characters
46
+ end
47
+
48
+ return false
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,427 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ class TestAutolink
4
+ include Twitter::Autolink
5
+ end
6
+
7
+ describe Twitter::Autolink do
8
+ def original_text; end
9
+ def url; end
10
+
11
+ describe "auto_link_custom" do
12
+ before do
13
+ @autolinked_text = TestAutolink.new.auto_link(original_text) if original_text
14
+ end
15
+
16
+ describe "username autolinking" do
17
+ context "username preceded by a space" do
18
+ def original_text; "hello @jacob"; end
19
+
20
+ it "should be linked" do
21
+ @autolinked_text.should link_to_screen_name('jacob')
22
+ end
23
+ end
24
+
25
+ context "username at beginning of line" do
26
+ def original_text; "@jacob you're cool"; end
27
+
28
+ it "should be linked" do
29
+ @autolinked_text.should link_to_screen_name('jacob')
30
+ end
31
+ end
32
+
33
+ context "username preceded by word character" do
34
+ def original_text; "meet@the beach"; end
35
+
36
+ it "should not be linked" do
37
+ Hpricot(@autolinked_text).search('a').should be_blank
38
+ end
39
+ end
40
+
41
+ context "username preceded by non-word character" do
42
+ def original_text; "great.@jacob"; end
43
+
44
+ it "should be linked" do
45
+ @autolinked_text.should link_to_screen_name('jacob')
46
+ end
47
+ end
48
+
49
+ context "username containing non-word characters" do
50
+ def original_text; "@zach&^$%^"; end
51
+
52
+ it "should not be linked" do
53
+ @autolinked_text.should link_to_screen_name('zach')
54
+ end
55
+ end
56
+
57
+ context "username over twenty characters" do
58
+ def original_text
59
+ @twenty_character_username = "zach" * 5
60
+ "@" + @twenty_character_username + "1"
61
+ end
62
+
63
+ it "should not be linked" do
64
+ @autolinked_text.should link_to_screen_name(@twenty_character_username)
65
+ end
66
+ end
67
+
68
+ context "username followed by japanese" do
69
+ def original_text; "@jacobの"; end
70
+
71
+ it "should be linked" do
72
+ @autolinked_text.should link_to_screen_name('jacob')
73
+ end
74
+ end
75
+
76
+ context "username preceded by japanese" do
77
+ def original_text; "あ@matz"; end
78
+
79
+ it "should be linked" do
80
+ @autolinked_text.should link_to_screen_name('matz')
81
+ end
82
+ end
83
+
84
+ context "username surrounded by japanese" do
85
+ def original_text; "あ@yoshimiの"; end
86
+
87
+ it "should be linked" do
88
+ @autolinked_text.should link_to_screen_name('yoshimi')
89
+ end
90
+ end
91
+
92
+ context "username using full-width at-sign" do
93
+ def original_text
94
+ "#{[0xFF20].pack('U')}jacob"
95
+ end
96
+
97
+ it "should be linked" do
98
+ @autolinked_text.should link_to_screen_name('jacob')
99
+ end
100
+ end
101
+ end
102
+
103
+ describe "list path autolinking" do
104
+
105
+ context "when List is not available" do
106
+ it "should not be linked" do
107
+ @autolinked_text = TestAutolink.new.auto_link_usernames_or_lists("hello @jacob/my-list", :suppress_lists => true)
108
+ @autolinked_text.should_not link_to_list_path('jacob/my-list')
109
+ end
110
+ end
111
+
112
+ context "slug preceded by a space" do
113
+ def original_text; "hello @jacob/my-list"; end
114
+
115
+ it "should be linked" do
116
+ @autolinked_text.should link_to_list_path('jacob/my-list')
117
+ end
118
+ end
119
+
120
+ context "username followed by a slash but no list" do
121
+ def original_text; "hello @jacob/ my-list"; end
122
+
123
+ it "should NOT be linked" do
124
+ @autolinked_text.should_not link_to_list_path('jacob/my-list')
125
+ @autolinked_text.should link_to_screen_name('jacob')
126
+ end
127
+ end
128
+
129
+ context "empty username followed by a list" do
130
+ def original_text; "hello @/my-list"; end
131
+
132
+ it "should NOT be linked" do
133
+ Hpricot(@autolinked_text).search('a').should be_blank
134
+ end
135
+ end
136
+
137
+ context "list slug at beginning of line" do
138
+ def original_text; "@jacob/my-list"; end
139
+
140
+ it "should be linked" do
141
+ @autolinked_text.should link_to_list_path('jacob/my-list')
142
+ end
143
+ end
144
+
145
+ context "username preceded by alpha-numeric character" do
146
+ def original_text; "meet@the/beach"; end
147
+
148
+ it "should not be linked" do
149
+ Hpricot(@autolinked_text).search('a').should be_blank
150
+ end
151
+ end
152
+
153
+ context "username preceded by non-word character" do
154
+ def original_text; "great.@jacob/my-list"; end
155
+
156
+ it "should be linked" do
157
+ @autolinked_text = TestAutolink.new.auto_link("great.@jacob/my-list")
158
+ @autolinked_text.should link_to_list_path('jacob/my-list')
159
+ end
160
+ end
161
+
162
+ context "username containing non-word characters" do
163
+ def original_text; "@zach/test&^$%^"; end
164
+
165
+ it "should be linked" do
166
+ @autolinked_text.should link_to_list_path('zach/test')
167
+ end
168
+ end
169
+
170
+ context "username over twenty characters" do
171
+ def original_text
172
+ @eighty_character_list = "jack/" + ("a" * 80)
173
+ "@#{@eighty_character_list}12345"
174
+ end
175
+
176
+ it "should be linked" do
177
+ @autolinked_text.should link_to_list_path(@eighty_character_list)
178
+ end
179
+ end
180
+ end
181
+
182
+ describe "hashtag autolinking" do
183
+ context "with an all numeric hashtag" do
184
+ def original_text; "#123"; end
185
+
186
+ it "should not be linked" do
187
+ @autolinked_text.should_not have_autolinked_hashtag('#123')
188
+ end
189
+ end
190
+
191
+ context "with a hashtag with alphanumeric characters" do
192
+ def original_text; "#ab1d"; end
193
+
194
+ it "should be linked" do
195
+ @autolinked_text.should have_autolinked_hashtag('#ab1d')
196
+ end
197
+ end
198
+
199
+ context "with a hashtag with underscores" do
200
+ def original_text; "#a_b_c_d"; end
201
+
202
+ it "should be linked" do
203
+ @autolinked_text.should have_autolinked_hashtag(original_text)
204
+ end
205
+ end
206
+
207
+ context "with a hashtag that is preceded by a word character" do
208
+ def original_text; "ab#cd"; end
209
+
210
+ it "should not be linked" do
211
+ @autolinked_text.should_not have_autolinked_hashtag(original_text)
212
+ end
213
+ end
214
+
215
+ context "with a page anchor in a url" do
216
+ def original_text; "Here's my url: http://foobar.com/#home"; end
217
+
218
+ it "should not link the hashtag" do
219
+ @autolinked_text.should_not have_autolinked_hashtag('#home')
220
+ end
221
+
222
+ it "should link the url" do
223
+ @autolinked_text.should have_autolinked_url('http://foobar.com/#home')
224
+ end
225
+ end
226
+
227
+ context "with a hashtag that starts with a number but has word characters" do
228
+ def original_text; "#2ab"; end
229
+
230
+ it "should be linked" do
231
+ @autolinked_text.should have_autolinked_hashtag(original_text)
232
+ end
233
+ end
234
+
235
+ context "with multiple valid hashtags" do
236
+ def original_text; "I'm frickin' awesome #ab #cd #ef"; end
237
+
238
+ it "links each hashtag" do
239
+ @autolinked_text.should have_autolinked_hashtag('#ab')
240
+ @autolinked_text.should have_autolinked_hashtag('#cd')
241
+ @autolinked_text.should have_autolinked_hashtag('#ef')
242
+ end
243
+ end
244
+
245
+ context "with a hashtag preceded by a ." do
246
+ def original_text; "ok, great.#abc"; end
247
+
248
+ it "should be linked" do
249
+ @autolinked_text.should have_autolinked_hashtag('#abc')
250
+ end
251
+ end
252
+
253
+ context "with a hashtag preceded by a &" do
254
+ def original_text; "&#nbsp;"; end
255
+
256
+ it "should not be linked" do
257
+ @autolinked_text.should_not have_autolinked_hashtag('#nbsp;')
258
+ end
259
+ end
260
+
261
+ context "with a hashtag that ends in an !" do
262
+ def original_text; "#great!"; end
263
+
264
+ it "should be linked, but should not include the !" do
265
+ @autolinked_text.should have_autolinked_hashtag('#great')
266
+ end
267
+ end
268
+
269
+ context "with a hashtag preceded by Japanese" do
270
+ def original_text; "の#twj_dev"; end
271
+
272
+ it "should be linked" do
273
+ @autolinked_text.should have_autolinked_hashtag('#twj_dev')
274
+ end
275
+ end
276
+
277
+ context "with a hashtag followed by Japanese" do
278
+ def original_text; "#twj_devの"; end
279
+
280
+ it "should be linked" do
281
+ @autolinked_text.should have_autolinked_hashtag('#twj_dev')
282
+ end
283
+ end
284
+
285
+ context "with a hashtag preceded by a full-width space" do
286
+ def original_text; "#{[0x3000].pack('U')}#twj_dev"; end
287
+
288
+ it "should be linked" do
289
+ @autolinked_text.should have_autolinked_hashtag('#twj_dev')
290
+ end
291
+ end
292
+
293
+ context "with a hashtag followed by a full-width space" do
294
+ def original_text; "#twj_dev#{[0x3000].pack('U')}"; end
295
+
296
+ it "should be linked" do
297
+ @autolinked_text.should have_autolinked_hashtag('#twj_dev')
298
+ end
299
+ end
300
+
301
+ context "with a hashtag using full-width hash" do
302
+ def original_text; "#{[0xFF03].pack('U')}twj_dev"; end
303
+
304
+ it "should be linked" do
305
+ link = Hpricot(@autolinked_text).at('a')
306
+ link.inner_text.should == "#{[0xFF03].pack('U')}twj_dev"
307
+ link['href'].should == 'http://twitter.com/search?q=%23twj_dev'
308
+ end
309
+ end
310
+
311
+ end
312
+
313
+ describe "URL autolinking" do
314
+ def url; "http://www.google.com"; end
315
+
316
+ context "when embedded in plain text" do
317
+ def original_text; "On my search engine #{url} I found good links."; end
318
+
319
+ it "should be linked" do
320
+ @autolinked_text.should have_autolinked_url(url)
321
+ end
322
+ end
323
+
324
+ context "when surrounded by Japanese;" do
325
+ def original_text; "いまなにしてる#{url}いまなにしてる"; end
326
+
327
+ it "should be linked" do
328
+ @autolinked_text.should have_autolinked_url(url)
329
+ end
330
+ end
331
+
332
+ context "when surrounded by parentheses;" do
333
+ def original_text; "I found a neatness (#{url})"; end
334
+
335
+ it "should be linked" do
336
+ @autolinked_text.should have_autolinked_url(url)
337
+ end
338
+
339
+ context "when the URL ends with a slash;" do
340
+ def url; "http://www.google.com/"; end
341
+
342
+ it "should be linked" do
343
+ pending # our support for Wikipedia URLS containing parens breaks this corner case
344
+ @autolinked_text.should have_autolinked_url(url)
345
+ end
346
+ end
347
+ end
348
+
349
+ context "with a URL ending in allowed punctuation" do
350
+ it "does not consume ending punctuation" do
351
+ matcher = TestAutolink.new
352
+ %w| ? ! , . : ; ] ) } = \ ' |.each do |char|
353
+ matcher.auto_link("#{url}#{char}").should have_autolinked_url(url)
354
+ end
355
+ end
356
+ end
357
+
358
+ context "with a URL preceded in forbidden characters" do
359
+ it "should not be linked" do
360
+ matcher = TestAutolink.new
361
+ %w| \ ' / : ! = |.each do |char|
362
+ matcher.auto_link("#{char}#{url}").should_not have_autolinked_url(url)
363
+ end
364
+ end
365
+ end
366
+
367
+ context "when embedded in a link tag" do
368
+ def original_text; "<link rel='true'>#{url}</link>"; end
369
+
370
+ it "should be linked" do
371
+ @autolinked_text.should have_autolinked_url(url)
372
+ end
373
+ end
374
+
375
+ context "with multiple URLs" do
376
+ def original_text; "http://www.links.org link at start of page, link at end http://www.foo.org"; end
377
+
378
+ it "should autolink each one" do
379
+ @autolinked_text.should have_autolinked_url('http://www.links.org')
380
+ @autolinked_text.should have_autolinked_url('http://www.foo.org')
381
+ end
382
+ end
383
+
384
+ context "with multiple URLs in different formats" do
385
+ def original_text; "http://foo.com https://bar.com http://mail.foobar.org"; end
386
+
387
+ it "should autolink each one, in the proper order" do
388
+ @autolinked_text.should have_autolinked_url('http://foo.com')
389
+ @autolinked_text.should have_autolinked_url('https://bar.com')
390
+ @autolinked_text.should have_autolinked_url('http://mail.foobar.org')
391
+ end
392
+ end
393
+
394
+ context "with a URL having a long TLD" do
395
+ def original_text; "Yahoo integriert Facebook http://golem.mobi/0912/71607.html"; end
396
+
397
+ it "should autolink it" do
398
+ @autolinked_text.should have_autolinked_url('http://golem.mobi/0912/71607.html')
399
+ end
400
+ end
401
+
402
+ context "with a url lacking the protocol" do
403
+ def original_text; "I like www.foobar.com dudes"; end
404
+
405
+ it "links to the original text with the full href" do
406
+ link = Hpricot(@autolinked_text).at('a')
407
+ link.inner_text.should == 'www.foobar.com'
408
+ link['href'].should == 'http://www.foobar.com'
409
+ end
410
+ end
411
+
412
+ end
413
+
414
+ describe "Autolink all" do
415
+ before do
416
+ @linker = TestAutolink.new
417
+ end
418
+
419
+ it "should allow url/hashtag overlap" do
420
+ auto_linked = @linker.auto_link("http://twitter.com/#search")
421
+ auto_linked.should have_autolinked_url('http://twitter.com/#search')
422
+ end
423
+
424
+ end
425
+ end
426
+
427
+ end