twitter-text 1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/validation.rb ADDED
@@ -0,0 +1,51 @@
1
+
2
+ module Twitter
3
+ module Validation
4
+ MAX_LENGTH = 140
5
+
6
+ # Character not allowed in Tweets
7
+ INVALID_CHARACTERS = [
8
+ 0xFFFE, 0xFEFF, # BOM
9
+ 0xFFFF, # Special
10
+ 0x202A, 0x202B, 0x202C, 0x202D, 0x202E # Directional change
11
+ ].map{|cp| [cp].pack('U') }.freeze
12
+
13
+ # Returns the length of the string as it would be displayed. This is equivilent to the length of the Unicode NFC
14
+ # (See: http://www.unicode.org/reports/tr15). This is needed in order to consistently calculate the length of a
15
+ # string no matter which actual form was transmitted. For example:
16
+ #
17
+ # U+0065 Latin Small Letter E
18
+ # + U+0301 Combining Acute Accent
19
+ # ----------
20
+ # = 2 bytes, 2 characters, displayed as é (1 visual glyph)
21
+ # … The NFC of {U+0065, U+0301} is {U+00E9}, which is a single chracter and a +display_length+ of 1
22
+ #
23
+ # The string could also contain U+00E9 already, in which case the canonicalization will not change the value.
24
+ #
25
+ def tweet_length(text)
26
+ ActiveSupport::Multibyte::Chars.new(text).normalize(:c).length
27
+ end
28
+
29
+ # Check the <tt>text</tt> for any reason that it may not be valid as a Tweet. This is meant as a pre-validation
30
+ # before posting to api.twitter.com. There are several server-side reasons for Tweets to fail but this pre-validation
31
+ # will allow quicker feedback.
32
+ #
33
+ # Returns <tt>false</tt> if this <tt>text</tt> is valid. Otherwise one of the following Symbols will be returned:
34
+ #
35
+ # <tt>:too_long</tt>:: if the <tt>text</tt> is too long
36
+ # <tt>:empty</tt>:: if the <tt>text</tt> is nil or empty
37
+ # <tt>:invalid_characters</tt>:: if the <tt>text</tt> contains non-Unicode or any of the disallowed Unicode characters
38
+ def tweet_invalid?(text)
39
+ return :empty if text.blank?
40
+ begin
41
+ return :too_long if tweet_length(text) > MAX_LENGTH
42
+ return :invalid_characters if INVALID_CHARACTERS.any?{|invalid_char| text.include?(invalid_char) }
43
+ rescue ArgumentError, ActiveSupport::Multibyte::EncodingError => e
44
+ # non-Unicode value.
45
+ return :invalid_characters
46
+ end
47
+
48
+ return false
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,427 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ class TestAutolink
4
+ include Twitter::Autolink
5
+ end
6
+
7
+ describe Twitter::Autolink do
8
+ def original_text; end
9
+ def url; end
10
+
11
+ describe "auto_link_custom" do
12
+ before do
13
+ @autolinked_text = TestAutolink.new.auto_link(original_text) if original_text
14
+ end
15
+
16
+ describe "username autolinking" do
17
+ context "username preceded by a space" do
18
+ def original_text; "hello @jacob"; end
19
+
20
+ it "should be linked" do
21
+ @autolinked_text.should link_to_screen_name('jacob')
22
+ end
23
+ end
24
+
25
+ context "username at beginning of line" do
26
+ def original_text; "@jacob you're cool"; end
27
+
28
+ it "should be linked" do
29
+ @autolinked_text.should link_to_screen_name('jacob')
30
+ end
31
+ end
32
+
33
+ context "username preceded by word character" do
34
+ def original_text; "meet@the beach"; end
35
+
36
+ it "should not be linked" do
37
+ Hpricot(@autolinked_text).search('a').should be_blank
38
+ end
39
+ end
40
+
41
+ context "username preceded by non-word character" do
42
+ def original_text; "great.@jacob"; end
43
+
44
+ it "should be linked" do
45
+ @autolinked_text.should link_to_screen_name('jacob')
46
+ end
47
+ end
48
+
49
+ context "username containing non-word characters" do
50
+ def original_text; "@zach&^$%^"; end
51
+
52
+ it "should not be linked" do
53
+ @autolinked_text.should link_to_screen_name('zach')
54
+ end
55
+ end
56
+
57
+ context "username over twenty characters" do
58
+ def original_text
59
+ @twenty_character_username = "zach" * 5
60
+ "@" + @twenty_character_username + "1"
61
+ end
62
+
63
+ it "should not be linked" do
64
+ @autolinked_text.should link_to_screen_name(@twenty_character_username)
65
+ end
66
+ end
67
+
68
+ context "username followed by japanese" do
69
+ def original_text; "@jacobの"; end
70
+
71
+ it "should be linked" do
72
+ @autolinked_text.should link_to_screen_name('jacob')
73
+ end
74
+ end
75
+
76
+ context "username preceded by japanese" do
77
+ def original_text; "あ@matz"; end
78
+
79
+ it "should be linked" do
80
+ @autolinked_text.should link_to_screen_name('matz')
81
+ end
82
+ end
83
+
84
+ context "username surrounded by japanese" do
85
+ def original_text; "あ@yoshimiの"; end
86
+
87
+ it "should be linked" do
88
+ @autolinked_text.should link_to_screen_name('yoshimi')
89
+ end
90
+ end
91
+
92
+ context "username using full-width at-sign" do
93
+ def original_text
94
+ "#{[0xFF20].pack('U')}jacob"
95
+ end
96
+
97
+ it "should be linked" do
98
+ @autolinked_text.should link_to_screen_name('jacob')
99
+ end
100
+ end
101
+ end
102
+
103
+ describe "list path autolinking" do
104
+
105
+ context "when List is not available" do
106
+ it "should not be linked" do
107
+ @autolinked_text = TestAutolink.new.auto_link_usernames_or_lists("hello @jacob/my-list", :suppress_lists => true)
108
+ @autolinked_text.should_not link_to_list_path('jacob/my-list')
109
+ end
110
+ end
111
+
112
+ context "slug preceded by a space" do
113
+ def original_text; "hello @jacob/my-list"; end
114
+
115
+ it "should be linked" do
116
+ @autolinked_text.should link_to_list_path('jacob/my-list')
117
+ end
118
+ end
119
+
120
+ context "username followed by a slash but no list" do
121
+ def original_text; "hello @jacob/ my-list"; end
122
+
123
+ it "should NOT be linked" do
124
+ @autolinked_text.should_not link_to_list_path('jacob/my-list')
125
+ @autolinked_text.should link_to_screen_name('jacob')
126
+ end
127
+ end
128
+
129
+ context "empty username followed by a list" do
130
+ def original_text; "hello @/my-list"; end
131
+
132
+ it "should NOT be linked" do
133
+ Hpricot(@autolinked_text).search('a').should be_blank
134
+ end
135
+ end
136
+
137
+ context "list slug at beginning of line" do
138
+ def original_text; "@jacob/my-list"; end
139
+
140
+ it "should be linked" do
141
+ @autolinked_text.should link_to_list_path('jacob/my-list')
142
+ end
143
+ end
144
+
145
+ context "username preceded by alpha-numeric character" do
146
+ def original_text; "meet@the/beach"; end
147
+
148
+ it "should not be linked" do
149
+ Hpricot(@autolinked_text).search('a').should be_blank
150
+ end
151
+ end
152
+
153
+ context "username preceded by non-word character" do
154
+ def original_text; "great.@jacob/my-list"; end
155
+
156
+ it "should be linked" do
157
+ @autolinked_text = TestAutolink.new.auto_link("great.@jacob/my-list")
158
+ @autolinked_text.should link_to_list_path('jacob/my-list')
159
+ end
160
+ end
161
+
162
+ context "username containing non-word characters" do
163
+ def original_text; "@zach/test&^$%^"; end
164
+
165
+ it "should be linked" do
166
+ @autolinked_text.should link_to_list_path('zach/test')
167
+ end
168
+ end
169
+
170
+ context "username over twenty characters" do
171
+ def original_text
172
+ @eighty_character_list = "jack/" + ("a" * 80)
173
+ "@#{@eighty_character_list}12345"
174
+ end
175
+
176
+ it "should be linked" do
177
+ @autolinked_text.should link_to_list_path(@eighty_character_list)
178
+ end
179
+ end
180
+ end
181
+
182
+ describe "hashtag autolinking" do
183
+ context "with an all numeric hashtag" do
184
+ def original_text; "#123"; end
185
+
186
+ it "should not be linked" do
187
+ @autolinked_text.should_not have_autolinked_hashtag('#123')
188
+ end
189
+ end
190
+
191
+ context "with a hashtag with alphanumeric characters" do
192
+ def original_text; "#ab1d"; end
193
+
194
+ it "should be linked" do
195
+ @autolinked_text.should have_autolinked_hashtag('#ab1d')
196
+ end
197
+ end
198
+
199
+ context "with a hashtag with underscores" do
200
+ def original_text; "#a_b_c_d"; end
201
+
202
+ it "should be linked" do
203
+ @autolinked_text.should have_autolinked_hashtag(original_text)
204
+ end
205
+ end
206
+
207
+ context "with a hashtag that is preceded by a word character" do
208
+ def original_text; "ab#cd"; end
209
+
210
+ it "should not be linked" do
211
+ @autolinked_text.should_not have_autolinked_hashtag(original_text)
212
+ end
213
+ end
214
+
215
+ context "with a page anchor in a url" do
216
+ def original_text; "Here's my url: http://foobar.com/#home"; end
217
+
218
+ it "should not link the hashtag" do
219
+ @autolinked_text.should_not have_autolinked_hashtag('#home')
220
+ end
221
+
222
+ it "should link the url" do
223
+ @autolinked_text.should have_autolinked_url('http://foobar.com/#home')
224
+ end
225
+ end
226
+
227
+ context "with a hashtag that starts with a number but has word characters" do
228
+ def original_text; "#2ab"; end
229
+
230
+ it "should be linked" do
231
+ @autolinked_text.should have_autolinked_hashtag(original_text)
232
+ end
233
+ end
234
+
235
+ context "with multiple valid hashtags" do
236
+ def original_text; "I'm frickin' awesome #ab #cd #ef"; end
237
+
238
+ it "links each hashtag" do
239
+ @autolinked_text.should have_autolinked_hashtag('#ab')
240
+ @autolinked_text.should have_autolinked_hashtag('#cd')
241
+ @autolinked_text.should have_autolinked_hashtag('#ef')
242
+ end
243
+ end
244
+
245
+ context "with a hashtag preceded by a ." do
246
+ def original_text; "ok, great.#abc"; end
247
+
248
+ it "should be linked" do
249
+ @autolinked_text.should have_autolinked_hashtag('#abc')
250
+ end
251
+ end
252
+
253
+ context "with a hashtag preceded by a &" do
254
+ def original_text; "&#nbsp;"; end
255
+
256
+ it "should not be linked" do
257
+ @autolinked_text.should_not have_autolinked_hashtag('#nbsp;')
258
+ end
259
+ end
260
+
261
+ context "with a hashtag that ends in an !" do
262
+ def original_text; "#great!"; end
263
+
264
+ it "should be linked, but should not include the !" do
265
+ @autolinked_text.should have_autolinked_hashtag('#great')
266
+ end
267
+ end
268
+
269
+ context "with a hashtag preceded by Japanese" do
270
+ def original_text; "の#twj_dev"; end
271
+
272
+ it "should be linked" do
273
+ @autolinked_text.should have_autolinked_hashtag('#twj_dev')
274
+ end
275
+ end
276
+
277
+ context "with a hashtag followed by Japanese" do
278
+ def original_text; "#twj_devの"; end
279
+
280
+ it "should be linked" do
281
+ @autolinked_text.should have_autolinked_hashtag('#twj_dev')
282
+ end
283
+ end
284
+
285
+ context "with a hashtag preceded by a full-width space" do
286
+ def original_text; "#{[0x3000].pack('U')}#twj_dev"; end
287
+
288
+ it "should be linked" do
289
+ @autolinked_text.should have_autolinked_hashtag('#twj_dev')
290
+ end
291
+ end
292
+
293
+ context "with a hashtag followed by a full-width space" do
294
+ def original_text; "#twj_dev#{[0x3000].pack('U')}"; end
295
+
296
+ it "should be linked" do
297
+ @autolinked_text.should have_autolinked_hashtag('#twj_dev')
298
+ end
299
+ end
300
+
301
+ context "with a hashtag using full-width hash" do
302
+ def original_text; "#{[0xFF03].pack('U')}twj_dev"; end
303
+
304
+ it "should be linked" do
305
+ link = Hpricot(@autolinked_text).at('a')
306
+ link.inner_text.should == "#{[0xFF03].pack('U')}twj_dev"
307
+ link['href'].should == 'http://twitter.com/search?q=%23twj_dev'
308
+ end
309
+ end
310
+
311
+ end
312
+
313
+ describe "URL autolinking" do
314
+ def url; "http://www.google.com"; end
315
+
316
+ context "when embedded in plain text" do
317
+ def original_text; "On my search engine #{url} I found good links."; end
318
+
319
+ it "should be linked" do
320
+ @autolinked_text.should have_autolinked_url(url)
321
+ end
322
+ end
323
+
324
+ context "when surrounded by Japanese;" do
325
+ def original_text; "いまなにしてる#{url}いまなにしてる"; end
326
+
327
+ it "should be linked" do
328
+ @autolinked_text.should have_autolinked_url(url)
329
+ end
330
+ end
331
+
332
+ context "when surrounded by parentheses;" do
333
+ def original_text; "I found a neatness (#{url})"; end
334
+
335
+ it "should be linked" do
336
+ @autolinked_text.should have_autolinked_url(url)
337
+ end
338
+
339
+ context "when the URL ends with a slash;" do
340
+ def url; "http://www.google.com/"; end
341
+
342
+ it "should be linked" do
343
+ pending # our support for Wikipedia URLS containing parens breaks this corner case
344
+ @autolinked_text.should have_autolinked_url(url)
345
+ end
346
+ end
347
+ end
348
+
349
+ context "with a URL ending in allowed punctuation" do
350
+ it "does not consume ending punctuation" do
351
+ matcher = TestAutolink.new
352
+ %w| ? ! , . : ; ] ) } = \ ' |.each do |char|
353
+ matcher.auto_link("#{url}#{char}").should have_autolinked_url(url)
354
+ end
355
+ end
356
+ end
357
+
358
+ context "with a URL preceded in forbidden characters" do
359
+ it "should not be linked" do
360
+ matcher = TestAutolink.new
361
+ %w| \ ' / : ! = |.each do |char|
362
+ matcher.auto_link("#{char}#{url}").should_not have_autolinked_url(url)
363
+ end
364
+ end
365
+ end
366
+
367
+ context "when embedded in a link tag" do
368
+ def original_text; "<link rel='true'>#{url}</link>"; end
369
+
370
+ it "should be linked" do
371
+ @autolinked_text.should have_autolinked_url(url)
372
+ end
373
+ end
374
+
375
+ context "with multiple URLs" do
376
+ def original_text; "http://www.links.org link at start of page, link at end http://www.foo.org"; end
377
+
378
+ it "should autolink each one" do
379
+ @autolinked_text.should have_autolinked_url('http://www.links.org')
380
+ @autolinked_text.should have_autolinked_url('http://www.foo.org')
381
+ end
382
+ end
383
+
384
+ context "with multiple URLs in different formats" do
385
+ def original_text; "http://foo.com https://bar.com http://mail.foobar.org"; end
386
+
387
+ it "should autolink each one, in the proper order" do
388
+ @autolinked_text.should have_autolinked_url('http://foo.com')
389
+ @autolinked_text.should have_autolinked_url('https://bar.com')
390
+ @autolinked_text.should have_autolinked_url('http://mail.foobar.org')
391
+ end
392
+ end
393
+
394
+ context "with a URL having a long TLD" do
395
+ def original_text; "Yahoo integriert Facebook http://golem.mobi/0912/71607.html"; end
396
+
397
+ it "should autolink it" do
398
+ @autolinked_text.should have_autolinked_url('http://golem.mobi/0912/71607.html')
399
+ end
400
+ end
401
+
402
+ context "with a url lacking the protocol" do
403
+ def original_text; "I like www.foobar.com dudes"; end
404
+
405
+ it "links to the original text with the full href" do
406
+ link = Hpricot(@autolinked_text).at('a')
407
+ link.inner_text.should == 'www.foobar.com'
408
+ link['href'].should == 'http://www.foobar.com'
409
+ end
410
+ end
411
+
412
+ end
413
+
414
+ describe "Autolink all" do
415
+ before do
416
+ @linker = TestAutolink.new
417
+ end
418
+
419
+ it "should allow url/hashtag overlap" do
420
+ auto_linked = @linker.auto_link("http://twitter.com/#search")
421
+ auto_linked.should have_autolinked_url('http://twitter.com/#search')
422
+ end
423
+
424
+ end
425
+ end
426
+
427
+ end