twitter-text 1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +13 -0
- data/README.rdoc +58 -0
- data/Rakefile +92 -0
- data/TODO +3 -0
- data/lib/autolink.rb +101 -0
- data/lib/extractor.rb +69 -0
- data/lib/regex.rb +74 -0
- data/lib/twitter-text.rb +13 -0
- data/lib/unicode.rb +27 -0
- data/lib/validation.rb +51 -0
- data/spec/autolinking_spec.rb +427 -0
- data/spec/extractor_spec.rb +195 -0
- data/spec/regex_spec.rb +44 -0
- data/spec/spec_helper.rb +86 -0
- data/spec/unicode_spec.rb +30 -0
- data/spec/validation_spec.rb +42 -0
- metadata +79 -0
data/lib/validation.rb
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
|
2
|
+
module Twitter
|
3
|
+
module Validation
|
4
|
+
MAX_LENGTH = 140
|
5
|
+
|
6
|
+
# Character not allowed in Tweets
|
7
|
+
INVALID_CHARACTERS = [
|
8
|
+
0xFFFE, 0xFEFF, # BOM
|
9
|
+
0xFFFF, # Special
|
10
|
+
0x202A, 0x202B, 0x202C, 0x202D, 0x202E # Directional change
|
11
|
+
].map{|cp| [cp].pack('U') }.freeze
|
12
|
+
|
13
|
+
# Returns the length of the string as it would be displayed. This is equivilent to the length of the Unicode NFC
|
14
|
+
# (See: http://www.unicode.org/reports/tr15). This is needed in order to consistently calculate the length of a
|
15
|
+
# string no matter which actual form was transmitted. For example:
|
16
|
+
#
|
17
|
+
# U+0065 Latin Small Letter E
|
18
|
+
# + U+0301 Combining Acute Accent
|
19
|
+
# ----------
|
20
|
+
# = 2 bytes, 2 characters, displayed as é (1 visual glyph)
|
21
|
+
# … The NFC of {U+0065, U+0301} is {U+00E9}, which is a single chracter and a +display_length+ of 1
|
22
|
+
#
|
23
|
+
# The string could also contain U+00E9 already, in which case the canonicalization will not change the value.
|
24
|
+
#
|
25
|
+
def tweet_length(text)
|
26
|
+
ActiveSupport::Multibyte::Chars.new(text).normalize(:c).length
|
27
|
+
end
|
28
|
+
|
29
|
+
# Check the <tt>text</tt> for any reason that it may not be valid as a Tweet. This is meant as a pre-validation
|
30
|
+
# before posting to api.twitter.com. There are several server-side reasons for Tweets to fail but this pre-validation
|
31
|
+
# will allow quicker feedback.
|
32
|
+
#
|
33
|
+
# Returns <tt>false</tt> if this <tt>text</tt> is valid. Otherwise one of the following Symbols will be returned:
|
34
|
+
#
|
35
|
+
# <tt>:too_long</tt>:: if the <tt>text</tt> is too long
|
36
|
+
# <tt>:empty</tt>:: if the <tt>text</tt> is nil or empty
|
37
|
+
# <tt>:invalid_characters</tt>:: if the <tt>text</tt> contains non-Unicode or any of the disallowed Unicode characters
|
38
|
+
def tweet_invalid?(text)
|
39
|
+
return :empty if text.blank?
|
40
|
+
begin
|
41
|
+
return :too_long if tweet_length(text) > MAX_LENGTH
|
42
|
+
return :invalid_characters if INVALID_CHARACTERS.any?{|invalid_char| text.include?(invalid_char) }
|
43
|
+
rescue ArgumentError, ActiveSupport::Multibyte::EncodingError => e
|
44
|
+
# non-Unicode value.
|
45
|
+
return :invalid_characters
|
46
|
+
end
|
47
|
+
|
48
|
+
return false
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,427 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/spec_helper'
|
2
|
+
|
3
|
+
class TestAutolink
|
4
|
+
include Twitter::Autolink
|
5
|
+
end
|
6
|
+
|
7
|
+
describe Twitter::Autolink do
|
8
|
+
def original_text; end
|
9
|
+
def url; end
|
10
|
+
|
11
|
+
describe "auto_link_custom" do
|
12
|
+
before do
|
13
|
+
@autolinked_text = TestAutolink.new.auto_link(original_text) if original_text
|
14
|
+
end
|
15
|
+
|
16
|
+
describe "username autolinking" do
|
17
|
+
context "username preceded by a space" do
|
18
|
+
def original_text; "hello @jacob"; end
|
19
|
+
|
20
|
+
it "should be linked" do
|
21
|
+
@autolinked_text.should link_to_screen_name('jacob')
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
context "username at beginning of line" do
|
26
|
+
def original_text; "@jacob you're cool"; end
|
27
|
+
|
28
|
+
it "should be linked" do
|
29
|
+
@autolinked_text.should link_to_screen_name('jacob')
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
context "username preceded by word character" do
|
34
|
+
def original_text; "meet@the beach"; end
|
35
|
+
|
36
|
+
it "should not be linked" do
|
37
|
+
Hpricot(@autolinked_text).search('a').should be_blank
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
context "username preceded by non-word character" do
|
42
|
+
def original_text; "great.@jacob"; end
|
43
|
+
|
44
|
+
it "should be linked" do
|
45
|
+
@autolinked_text.should link_to_screen_name('jacob')
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
context "username containing non-word characters" do
|
50
|
+
def original_text; "@zach&^$%^"; end
|
51
|
+
|
52
|
+
it "should not be linked" do
|
53
|
+
@autolinked_text.should link_to_screen_name('zach')
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
context "username over twenty characters" do
|
58
|
+
def original_text
|
59
|
+
@twenty_character_username = "zach" * 5
|
60
|
+
"@" + @twenty_character_username + "1"
|
61
|
+
end
|
62
|
+
|
63
|
+
it "should not be linked" do
|
64
|
+
@autolinked_text.should link_to_screen_name(@twenty_character_username)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
context "username followed by japanese" do
|
69
|
+
def original_text; "@jacobの"; end
|
70
|
+
|
71
|
+
it "should be linked" do
|
72
|
+
@autolinked_text.should link_to_screen_name('jacob')
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
context "username preceded by japanese" do
|
77
|
+
def original_text; "あ@matz"; end
|
78
|
+
|
79
|
+
it "should be linked" do
|
80
|
+
@autolinked_text.should link_to_screen_name('matz')
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
context "username surrounded by japanese" do
|
85
|
+
def original_text; "あ@yoshimiの"; end
|
86
|
+
|
87
|
+
it "should be linked" do
|
88
|
+
@autolinked_text.should link_to_screen_name('yoshimi')
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
context "username using full-width at-sign" do
|
93
|
+
def original_text
|
94
|
+
"#{[0xFF20].pack('U')}jacob"
|
95
|
+
end
|
96
|
+
|
97
|
+
it "should be linked" do
|
98
|
+
@autolinked_text.should link_to_screen_name('jacob')
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
describe "list path autolinking" do
|
104
|
+
|
105
|
+
context "when List is not available" do
|
106
|
+
it "should not be linked" do
|
107
|
+
@autolinked_text = TestAutolink.new.auto_link_usernames_or_lists("hello @jacob/my-list", :suppress_lists => true)
|
108
|
+
@autolinked_text.should_not link_to_list_path('jacob/my-list')
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
context "slug preceded by a space" do
|
113
|
+
def original_text; "hello @jacob/my-list"; end
|
114
|
+
|
115
|
+
it "should be linked" do
|
116
|
+
@autolinked_text.should link_to_list_path('jacob/my-list')
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
context "username followed by a slash but no list" do
|
121
|
+
def original_text; "hello @jacob/ my-list"; end
|
122
|
+
|
123
|
+
it "should NOT be linked" do
|
124
|
+
@autolinked_text.should_not link_to_list_path('jacob/my-list')
|
125
|
+
@autolinked_text.should link_to_screen_name('jacob')
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
context "empty username followed by a list" do
|
130
|
+
def original_text; "hello @/my-list"; end
|
131
|
+
|
132
|
+
it "should NOT be linked" do
|
133
|
+
Hpricot(@autolinked_text).search('a').should be_blank
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
context "list slug at beginning of line" do
|
138
|
+
def original_text; "@jacob/my-list"; end
|
139
|
+
|
140
|
+
it "should be linked" do
|
141
|
+
@autolinked_text.should link_to_list_path('jacob/my-list')
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
context "username preceded by alpha-numeric character" do
|
146
|
+
def original_text; "meet@the/beach"; end
|
147
|
+
|
148
|
+
it "should not be linked" do
|
149
|
+
Hpricot(@autolinked_text).search('a').should be_blank
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
context "username preceded by non-word character" do
|
154
|
+
def original_text; "great.@jacob/my-list"; end
|
155
|
+
|
156
|
+
it "should be linked" do
|
157
|
+
@autolinked_text = TestAutolink.new.auto_link("great.@jacob/my-list")
|
158
|
+
@autolinked_text.should link_to_list_path('jacob/my-list')
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
context "username containing non-word characters" do
|
163
|
+
def original_text; "@zach/test&^$%^"; end
|
164
|
+
|
165
|
+
it "should be linked" do
|
166
|
+
@autolinked_text.should link_to_list_path('zach/test')
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
context "username over twenty characters" do
|
171
|
+
def original_text
|
172
|
+
@eighty_character_list = "jack/" + ("a" * 80)
|
173
|
+
"@#{@eighty_character_list}12345"
|
174
|
+
end
|
175
|
+
|
176
|
+
it "should be linked" do
|
177
|
+
@autolinked_text.should link_to_list_path(@eighty_character_list)
|
178
|
+
end
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
describe "hashtag autolinking" do
|
183
|
+
context "with an all numeric hashtag" do
|
184
|
+
def original_text; "#123"; end
|
185
|
+
|
186
|
+
it "should not be linked" do
|
187
|
+
@autolinked_text.should_not have_autolinked_hashtag('#123')
|
188
|
+
end
|
189
|
+
end
|
190
|
+
|
191
|
+
context "with a hashtag with alphanumeric characters" do
|
192
|
+
def original_text; "#ab1d"; end
|
193
|
+
|
194
|
+
it "should be linked" do
|
195
|
+
@autolinked_text.should have_autolinked_hashtag('#ab1d')
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
context "with a hashtag with underscores" do
|
200
|
+
def original_text; "#a_b_c_d"; end
|
201
|
+
|
202
|
+
it "should be linked" do
|
203
|
+
@autolinked_text.should have_autolinked_hashtag(original_text)
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
context "with a hashtag that is preceded by a word character" do
|
208
|
+
def original_text; "ab#cd"; end
|
209
|
+
|
210
|
+
it "should not be linked" do
|
211
|
+
@autolinked_text.should_not have_autolinked_hashtag(original_text)
|
212
|
+
end
|
213
|
+
end
|
214
|
+
|
215
|
+
context "with a page anchor in a url" do
|
216
|
+
def original_text; "Here's my url: http://foobar.com/#home"; end
|
217
|
+
|
218
|
+
it "should not link the hashtag" do
|
219
|
+
@autolinked_text.should_not have_autolinked_hashtag('#home')
|
220
|
+
end
|
221
|
+
|
222
|
+
it "should link the url" do
|
223
|
+
@autolinked_text.should have_autolinked_url('http://foobar.com/#home')
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
context "with a hashtag that starts with a number but has word characters" do
|
228
|
+
def original_text; "#2ab"; end
|
229
|
+
|
230
|
+
it "should be linked" do
|
231
|
+
@autolinked_text.should have_autolinked_hashtag(original_text)
|
232
|
+
end
|
233
|
+
end
|
234
|
+
|
235
|
+
context "with multiple valid hashtags" do
|
236
|
+
def original_text; "I'm frickin' awesome #ab #cd #ef"; end
|
237
|
+
|
238
|
+
it "links each hashtag" do
|
239
|
+
@autolinked_text.should have_autolinked_hashtag('#ab')
|
240
|
+
@autolinked_text.should have_autolinked_hashtag('#cd')
|
241
|
+
@autolinked_text.should have_autolinked_hashtag('#ef')
|
242
|
+
end
|
243
|
+
end
|
244
|
+
|
245
|
+
context "with a hashtag preceded by a ." do
|
246
|
+
def original_text; "ok, great.#abc"; end
|
247
|
+
|
248
|
+
it "should be linked" do
|
249
|
+
@autolinked_text.should have_autolinked_hashtag('#abc')
|
250
|
+
end
|
251
|
+
end
|
252
|
+
|
253
|
+
context "with a hashtag preceded by a &" do
|
254
|
+
def original_text; "&#nbsp;"; end
|
255
|
+
|
256
|
+
it "should not be linked" do
|
257
|
+
@autolinked_text.should_not have_autolinked_hashtag('#nbsp;')
|
258
|
+
end
|
259
|
+
end
|
260
|
+
|
261
|
+
context "with a hashtag that ends in an !" do
|
262
|
+
def original_text; "#great!"; end
|
263
|
+
|
264
|
+
it "should be linked, but should not include the !" do
|
265
|
+
@autolinked_text.should have_autolinked_hashtag('#great')
|
266
|
+
end
|
267
|
+
end
|
268
|
+
|
269
|
+
context "with a hashtag preceded by Japanese" do
|
270
|
+
def original_text; "の#twj_dev"; end
|
271
|
+
|
272
|
+
it "should be linked" do
|
273
|
+
@autolinked_text.should have_autolinked_hashtag('#twj_dev')
|
274
|
+
end
|
275
|
+
end
|
276
|
+
|
277
|
+
context "with a hashtag followed by Japanese" do
|
278
|
+
def original_text; "#twj_devの"; end
|
279
|
+
|
280
|
+
it "should be linked" do
|
281
|
+
@autolinked_text.should have_autolinked_hashtag('#twj_dev')
|
282
|
+
end
|
283
|
+
end
|
284
|
+
|
285
|
+
context "with a hashtag preceded by a full-width space" do
|
286
|
+
def original_text; "#{[0x3000].pack('U')}#twj_dev"; end
|
287
|
+
|
288
|
+
it "should be linked" do
|
289
|
+
@autolinked_text.should have_autolinked_hashtag('#twj_dev')
|
290
|
+
end
|
291
|
+
end
|
292
|
+
|
293
|
+
context "with a hashtag followed by a full-width space" do
|
294
|
+
def original_text; "#twj_dev#{[0x3000].pack('U')}"; end
|
295
|
+
|
296
|
+
it "should be linked" do
|
297
|
+
@autolinked_text.should have_autolinked_hashtag('#twj_dev')
|
298
|
+
end
|
299
|
+
end
|
300
|
+
|
301
|
+
context "with a hashtag using full-width hash" do
|
302
|
+
def original_text; "#{[0xFF03].pack('U')}twj_dev"; end
|
303
|
+
|
304
|
+
it "should be linked" do
|
305
|
+
link = Hpricot(@autolinked_text).at('a')
|
306
|
+
link.inner_text.should == "#{[0xFF03].pack('U')}twj_dev"
|
307
|
+
link['href'].should == 'http://twitter.com/search?q=%23twj_dev'
|
308
|
+
end
|
309
|
+
end
|
310
|
+
|
311
|
+
end
|
312
|
+
|
313
|
+
describe "URL autolinking" do
|
314
|
+
def url; "http://www.google.com"; end
|
315
|
+
|
316
|
+
context "when embedded in plain text" do
|
317
|
+
def original_text; "On my search engine #{url} I found good links."; end
|
318
|
+
|
319
|
+
it "should be linked" do
|
320
|
+
@autolinked_text.should have_autolinked_url(url)
|
321
|
+
end
|
322
|
+
end
|
323
|
+
|
324
|
+
context "when surrounded by Japanese;" do
|
325
|
+
def original_text; "いまなにしてる#{url}いまなにしてる"; end
|
326
|
+
|
327
|
+
it "should be linked" do
|
328
|
+
@autolinked_text.should have_autolinked_url(url)
|
329
|
+
end
|
330
|
+
end
|
331
|
+
|
332
|
+
context "when surrounded by parentheses;" do
|
333
|
+
def original_text; "I found a neatness (#{url})"; end
|
334
|
+
|
335
|
+
it "should be linked" do
|
336
|
+
@autolinked_text.should have_autolinked_url(url)
|
337
|
+
end
|
338
|
+
|
339
|
+
context "when the URL ends with a slash;" do
|
340
|
+
def url; "http://www.google.com/"; end
|
341
|
+
|
342
|
+
it "should be linked" do
|
343
|
+
pending # our support for Wikipedia URLS containing parens breaks this corner case
|
344
|
+
@autolinked_text.should have_autolinked_url(url)
|
345
|
+
end
|
346
|
+
end
|
347
|
+
end
|
348
|
+
|
349
|
+
context "with a URL ending in allowed punctuation" do
|
350
|
+
it "does not consume ending punctuation" do
|
351
|
+
matcher = TestAutolink.new
|
352
|
+
%w| ? ! , . : ; ] ) } = \ ' |.each do |char|
|
353
|
+
matcher.auto_link("#{url}#{char}").should have_autolinked_url(url)
|
354
|
+
end
|
355
|
+
end
|
356
|
+
end
|
357
|
+
|
358
|
+
context "with a URL preceded in forbidden characters" do
|
359
|
+
it "should not be linked" do
|
360
|
+
matcher = TestAutolink.new
|
361
|
+
%w| \ ' / : ! = |.each do |char|
|
362
|
+
matcher.auto_link("#{char}#{url}").should_not have_autolinked_url(url)
|
363
|
+
end
|
364
|
+
end
|
365
|
+
end
|
366
|
+
|
367
|
+
context "when embedded in a link tag" do
|
368
|
+
def original_text; "<link rel='true'>#{url}</link>"; end
|
369
|
+
|
370
|
+
it "should be linked" do
|
371
|
+
@autolinked_text.should have_autolinked_url(url)
|
372
|
+
end
|
373
|
+
end
|
374
|
+
|
375
|
+
context "with multiple URLs" do
|
376
|
+
def original_text; "http://www.links.org link at start of page, link at end http://www.foo.org"; end
|
377
|
+
|
378
|
+
it "should autolink each one" do
|
379
|
+
@autolinked_text.should have_autolinked_url('http://www.links.org')
|
380
|
+
@autolinked_text.should have_autolinked_url('http://www.foo.org')
|
381
|
+
end
|
382
|
+
end
|
383
|
+
|
384
|
+
context "with multiple URLs in different formats" do
|
385
|
+
def original_text; "http://foo.com https://bar.com http://mail.foobar.org"; end
|
386
|
+
|
387
|
+
it "should autolink each one, in the proper order" do
|
388
|
+
@autolinked_text.should have_autolinked_url('http://foo.com')
|
389
|
+
@autolinked_text.should have_autolinked_url('https://bar.com')
|
390
|
+
@autolinked_text.should have_autolinked_url('http://mail.foobar.org')
|
391
|
+
end
|
392
|
+
end
|
393
|
+
|
394
|
+
context "with a URL having a long TLD" do
|
395
|
+
def original_text; "Yahoo integriert Facebook http://golem.mobi/0912/71607.html"; end
|
396
|
+
|
397
|
+
it "should autolink it" do
|
398
|
+
@autolinked_text.should have_autolinked_url('http://golem.mobi/0912/71607.html')
|
399
|
+
end
|
400
|
+
end
|
401
|
+
|
402
|
+
context "with a url lacking the protocol" do
|
403
|
+
def original_text; "I like www.foobar.com dudes"; end
|
404
|
+
|
405
|
+
it "links to the original text with the full href" do
|
406
|
+
link = Hpricot(@autolinked_text).at('a')
|
407
|
+
link.inner_text.should == 'www.foobar.com'
|
408
|
+
link['href'].should == 'http://www.foobar.com'
|
409
|
+
end
|
410
|
+
end
|
411
|
+
|
412
|
+
end
|
413
|
+
|
414
|
+
describe "Autolink all" do
|
415
|
+
before do
|
416
|
+
@linker = TestAutolink.new
|
417
|
+
end
|
418
|
+
|
419
|
+
it "should allow url/hashtag overlap" do
|
420
|
+
auto_linked = @linker.auto_link("http://twitter.com/#search")
|
421
|
+
auto_linked.should have_autolinked_url('http://twitter.com/#search')
|
422
|
+
end
|
423
|
+
|
424
|
+
end
|
425
|
+
end
|
426
|
+
|
427
|
+
end
|