redaranj-twitter-text 1.0.4.191

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,13 @@
1
+ # encoding: utf-8
2
+ if ("".respond_to?(:encoding) && "".encoding.name == "UTF-*") || (!"".respond_to?(:encoding) && !['u','UTF8'].include?($KCODE))
3
+ raise("twitter-text requires the $KCODE variable be set to 'UTF8' or 'u'")
4
+ end
5
+
6
+ # Needed for auto-linking
7
+ require 'action_view'
8
+
9
+ require 'regex'
10
+ require 'autolink'
11
+ require 'extractor'
12
+ require 'unicode'
13
+ require 'validation'
@@ -0,0 +1,27 @@
1
+ # encoding: utf-8
2
+ module Twitter
3
+ # This module lazily defines constants of the form Uxxxx for all Unicode
4
+ # codepoints from U0000 to U10FFFF. The value of each constant is the
5
+ # UTF-8 string for the codepoint.
6
+ # Examples:
7
+ # copyright = Unicode::U00A9
8
+ # euro = Unicode::U20AC
9
+ # infinity = Unicode::U221E
10
+ #
11
+ module Unicode
12
+ CODEPOINT_REGEX = /^U_?([0-9a-fA-F]{4,5}|10[0-9a-fA-F]{4})$/
13
+
14
+ def self.const_missing(name)
15
+ # Check that the constant name is of the right form: U0000 to U10FFFF
16
+ if name.to_s =~ CODEPOINT_REGEX
17
+ # Convert the codepoint to an immutable UTF-8 string,
18
+ # define a real constant for that value and return the value
19
+ #p name, name.class
20
+ const_set(name, [$1.to_i(16)].pack("U").freeze)
21
+ else # Raise an error for constants that are not Unicode.
22
+ raise NameError, "Uninitialized constant: Unicode::#{name}"
23
+ end
24
+ end
25
+ end
26
+
27
+ end
@@ -0,0 +1,52 @@
1
+ # encoding: utf-8
2
+ module Twitter
3
+ module Validation
4
+ MAX_LENGTH = 140
5
+
6
+ # Character not allowed in Tweets
7
+ INVALID_CHARACTERS = [
8
+ 0xFFFE, 0xFEFF, # BOM
9
+ 0xFFFF, # Special
10
+ 0x202A, 0x202B, 0x202C, 0x202D, 0x202E # Directional change
11
+ ].map{|cp| [cp].pack('U') }.freeze
12
+
13
+ # Returns the length of the string as it would be displayed. This is equivilent to the length of the Unicode NFC
14
+ # (See: http://www.unicode.org/reports/tr15). This is needed in order to consistently calculate the length of a
15
+ # string no matter which actual form was transmitted. For example:
16
+ #
17
+ # U+0065 Latin Small Letter E
18
+ # + U+0301 Combining Acute Accent
19
+ # ----------
20
+ # = 2 bytes, 2 characters, displayed as é (1 visual glyph)
21
+ # … The NFC of {U+0065, U+0301} is {U+00E9}, which is a single chracter and a +display_length+ of 1
22
+ #
23
+ # The string could also contain U+00E9 already, in which case the canonicalization will not change the value.
24
+ #
25
+ def tweet_length(text)
26
+ ActiveSupport::Multibyte::Chars.new(text).normalize(:c).length
27
+ end
28
+
29
+ # Check the <tt>text</tt> for any reason that it may not be valid as a Tweet. This is meant as a pre-validation
30
+ # before posting to api.twitter.com. There are several server-side reasons for Tweets to fail but this pre-validation
31
+ # will allow quicker feedback.
32
+ #
33
+ # Returns <tt>false</tt> if this <tt>text</tt> is valid. Otherwise one of the following Symbols will be returned:
34
+ #
35
+ # <tt>:too_long</tt>:: if the <tt>text</tt> is too long
36
+ # <tt>:empty</tt>:: if the <tt>text</tt> is nil or empty
37
+ # <tt>:invalid_characters</tt>:: if the <tt>text</tt> contains non-Unicode or any of the disallowed Unicode characters
38
+ def tweet_invalid?(text)
39
+ begin
40
+ return :empty if text.blank?
41
+ return :too_long if tweet_length(text) > MAX_LENGTH
42
+ return :invalid_characters if INVALID_CHARACTERS.any?{|invalid_char| text.include?(invalid_char) }
43
+ return :invalid_characters if false
44
+ rescue ArgumentError, ActiveSupport::Multibyte::EncodingError => e
45
+ # non-Unicode value.
46
+ return :invalid_characters
47
+ end
48
+
49
+ return false
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,437 @@
1
+ # encoding: utf-8
2
+ require File.dirname(__FILE__) + '/spec_helper'
3
+
4
+ class TestAutolink
5
+ include Twitter::Autolink
6
+ end
7
+
8
+ describe Twitter::Autolink do
9
+ def original_text; end
10
+ def url; end
11
+
12
+ describe "auto_link_custom" do
13
+ before do
14
+ @autolinked_text = TestAutolink.new.auto_link(original_text) if original_text
15
+ end
16
+
17
+ describe "username autolinking" do
18
+ context "username preceded by a space" do
19
+ def original_text; "hello @jacob"; end
20
+
21
+ it "should be linked" do
22
+ @autolinked_text.should link_to_screen_name('jacob')
23
+ end
24
+ end
25
+
26
+ context "username at beginning of line" do
27
+ def original_text; "@jacob you're cool"; end
28
+
29
+ it "should be linked" do
30
+ @autolinked_text.should link_to_screen_name('jacob')
31
+ end
32
+ end
33
+
34
+ context "username preceded by word character" do
35
+ def original_text; "meet@the beach"; end
36
+
37
+ it "should not be linked" do
38
+ Hpricot(@autolinked_text).search('a').should be_blank
39
+ end
40
+ end
41
+
42
+ context "username preceded by non-word character" do
43
+ def original_text; "great.@jacob"; end
44
+
45
+ it "should be linked" do
46
+ @autolinked_text.should link_to_screen_name('jacob')
47
+ end
48
+ end
49
+
50
+ context "username containing non-word characters" do
51
+ def original_text; "@zach&^$%^"; end
52
+
53
+ it "should not be linked" do
54
+ @autolinked_text.should link_to_screen_name('zach')
55
+ end
56
+ end
57
+
58
+ context "username over twenty characters" do
59
+ def original_text
60
+ @twenty_character_username = "zach" * 5
61
+ "@" + @twenty_character_username + "1"
62
+ end
63
+
64
+ it "should not be linked" do
65
+ @autolinked_text.should link_to_screen_name(@twenty_character_username)
66
+ end
67
+ end
68
+
69
+ context "username followed by japanese" do
70
+ def original_text; "@jacobの"; end
71
+
72
+ it "should be linked" do
73
+ @autolinked_text.should link_to_screen_name('jacob')
74
+ end
75
+ end
76
+
77
+ context "username preceded by japanese" do
78
+ def original_text; "あ@matz"; end
79
+
80
+ it "should be linked" do
81
+ @autolinked_text.should link_to_screen_name('matz')
82
+ end
83
+ end
84
+
85
+ context "username surrounded by japanese" do
86
+ def original_text; "あ@yoshimiの"; end
87
+
88
+ it "should be linked" do
89
+ @autolinked_text.should link_to_screen_name('yoshimi')
90
+ end
91
+ end
92
+
93
+ context "username using full-width at-sign" do
94
+ def original_text
95
+ "#{[0xFF20].pack('U')}jacob"
96
+ end
97
+
98
+ it "should be linked" do
99
+ @autolinked_text.should link_to_screen_name('jacob')
100
+ end
101
+ end
102
+ end
103
+
104
+ describe "list path autolinking" do
105
+
106
+ context "when List is not available" do
107
+ it "should not be linked" do
108
+ @autolinked_text = TestAutolink.new.auto_link_usernames_or_lists("hello @jacob/my-list", :suppress_lists => true)
109
+ @autolinked_text.should_not link_to_list_path('jacob/my-list')
110
+ end
111
+ end
112
+
113
+ context "slug preceded by a space" do
114
+ def original_text; "hello @jacob/my-list"; end
115
+
116
+ it "should be linked" do
117
+ @autolinked_text.should link_to_list_path('jacob/my-list')
118
+ end
119
+ end
120
+
121
+ context "username followed by a slash but no list" do
122
+ def original_text; "hello @jacob/ my-list"; end
123
+
124
+ it "should NOT be linked" do
125
+ @autolinked_text.should_not link_to_list_path('jacob/my-list')
126
+ @autolinked_text.should link_to_screen_name('jacob')
127
+ end
128
+ end
129
+
130
+ context "empty username followed by a list" do
131
+ def original_text; "hello @/my-list"; end
132
+
133
+ it "should NOT be linked" do
134
+ Hpricot(@autolinked_text).search('a').should be_blank
135
+ end
136
+ end
137
+
138
+ context "list slug at beginning of line" do
139
+ def original_text; "@jacob/my-list"; end
140
+
141
+ it "should be linked" do
142
+ @autolinked_text.should link_to_list_path('jacob/my-list')
143
+ end
144
+ end
145
+
146
+ context "username preceded by alpha-numeric character" do
147
+ def original_text; "meet@the/beach"; end
148
+
149
+ it "should not be linked" do
150
+ Hpricot(@autolinked_text).search('a').should be_blank
151
+ end
152
+ end
153
+
154
+ context "username preceded by non-word character" do
155
+ def original_text; "great.@jacob/my-list"; end
156
+
157
+ it "should be linked" do
158
+ @autolinked_text = TestAutolink.new.auto_link("great.@jacob/my-list")
159
+ @autolinked_text.should link_to_list_path('jacob/my-list')
160
+ end
161
+ end
162
+
163
+ context "username containing non-word characters" do
164
+ def original_text; "@zach/test&^$%^"; end
165
+
166
+ it "should be linked" do
167
+ @autolinked_text.should link_to_list_path('zach/test')
168
+ end
169
+ end
170
+
171
+ context "username over twenty characters" do
172
+ def original_text
173
+ @eighty_character_list = "jack/" + ("a" * 80)
174
+ "@#{@eighty_character_list}12345"
175
+ end
176
+
177
+ it "should be linked" do
178
+ @autolinked_text.should link_to_list_path(@eighty_character_list)
179
+ end
180
+ end
181
+ end
182
+
183
+ describe "hashtag autolinking" do
184
+ context "with an all numeric hashtag" do
185
+ def original_text; "#123"; end
186
+
187
+ it "should not be linked" do
188
+ @autolinked_text.should_not have_autolinked_hashtag('#123')
189
+ end
190
+ end
191
+
192
+ context "with a hashtag with alphanumeric characters" do
193
+ def original_text; "#ab1d"; end
194
+
195
+ it "should be linked" do
196
+ @autolinked_text.should have_autolinked_hashtag('#ab1d')
197
+ end
198
+ end
199
+
200
+ context "with a hashtag with underscores" do
201
+ def original_text; "#a_b_c_d"; end
202
+
203
+ it "should be linked" do
204
+ @autolinked_text.should have_autolinked_hashtag(original_text)
205
+ end
206
+ end
207
+
208
+ context "with a hashtag that is preceded by a word character" do
209
+ def original_text; "ab#cd"; end
210
+
211
+ it "should not be linked" do
212
+ @autolinked_text.should_not have_autolinked_hashtag(original_text)
213
+ end
214
+ end
215
+
216
+ context "with a page anchor in a url" do
217
+ def original_text; "Here's my url: http://foobar.com/#home"; end
218
+
219
+ it "should not link the hashtag" do
220
+ @autolinked_text.should_not have_autolinked_hashtag('#home')
221
+ end
222
+
223
+ it "should link the url" do
224
+ @autolinked_text.should have_autolinked_url('http://foobar.com/#home')
225
+ end
226
+ end
227
+
228
+ context "with a hashtag that starts with a number but has word characters" do
229
+ def original_text; "#2ab"; end
230
+
231
+ it "should be linked" do
232
+ @autolinked_text.should have_autolinked_hashtag(original_text)
233
+ end
234
+ end
235
+
236
+ context "with multiple valid hashtags" do
237
+ def original_text; "I'm frickin' awesome #ab #cd #ef"; end
238
+
239
+ it "links each hashtag" do
240
+ @autolinked_text.should have_autolinked_hashtag('#ab')
241
+ @autolinked_text.should have_autolinked_hashtag('#cd')
242
+ @autolinked_text.should have_autolinked_hashtag('#ef')
243
+ end
244
+ end
245
+
246
+ context "with a hashtag preceded by a ." do
247
+ def original_text; "ok, great.#abc"; end
248
+
249
+ it "should be linked" do
250
+ @autolinked_text.should have_autolinked_hashtag('#abc')
251
+ end
252
+ end
253
+
254
+ context "with a hashtag preceded by a &" do
255
+ def original_text; "&#nbsp;"; end
256
+
257
+ it "should not be linked" do
258
+ @autolinked_text.should_not have_autolinked_hashtag('#nbsp;')
259
+ end
260
+ end
261
+
262
+ context "with a hashtag that ends in an !" do
263
+ def original_text; "#great!"; end
264
+
265
+ it "should be linked, but should not include the !" do
266
+ @autolinked_text.should have_autolinked_hashtag('#great')
267
+ end
268
+ end
269
+
270
+ context "with a hashtag preceded by Japanese" do
271
+ def original_text; "の#twj_dev"; end
272
+
273
+ it "should be linked" do
274
+ @autolinked_text.should have_autolinked_hashtag('#twj_dev')
275
+ end
276
+ end
277
+
278
+ context "with a hashtag followed by Japanese" do
279
+ def original_text; "#twj_devの"; end
280
+
281
+ it "should be linked" do
282
+ @autolinked_text.should have_autolinked_hashtag('#twj_dev')
283
+ end
284
+ end
285
+
286
+ context "with a hashtag preceded by a full-width space" do
287
+ def original_text; "#{[0x3000].pack('U')}#twj_dev"; end
288
+
289
+ it "should be linked" do
290
+ @autolinked_text.should have_autolinked_hashtag('#twj_dev')
291
+ end
292
+ end
293
+
294
+ context "with a hashtag followed by a full-width space" do
295
+ def original_text; "#twj_dev#{[0x3000].pack('U')}"; end
296
+
297
+ it "should be linked" do
298
+ @autolinked_text.should have_autolinked_hashtag('#twj_dev')
299
+ end
300
+ end
301
+
302
+ context "with a hashtag using full-width hash" do
303
+ def original_text; "#{[0xFF03].pack('U')}twj_dev"; end
304
+
305
+ it "should be linked" do
306
+ pending
307
+ # link = Hpricot(@autolinked_text).at('a')
308
+ # link.inner_text.should == "#{[0xFF03].pack('U')}twj_dev"
309
+ # link['href'].should == 'http://twitter.com/search?q=%23twj_dev'
310
+ end
311
+ end
312
+
313
+ end
314
+
315
+ describe "URL autolinking" do
316
+ def url; "http://www.google.com"; end
317
+
318
+ context "when embedded in plain text" do
319
+ def original_text; "On my search engine #{url} I found good links."; end
320
+
321
+ it "should be linked" do
322
+ @autolinked_text.should have_autolinked_url(url)
323
+ end
324
+ end
325
+
326
+ context "when surrounded by Japanese;" do
327
+ def original_text; "いまなにしてる#{url}いまなにしてる"; end
328
+
329
+ it "should be linked" do
330
+ @autolinked_text.should have_autolinked_url(url)
331
+ end
332
+ end
333
+
334
+ context "when surrounded by parentheses;" do
335
+ def original_text; "I found a neatness (#{url})"; end
336
+
337
+ it "should be linked" do
338
+ @autolinked_text.should have_autolinked_url(url)
339
+ end
340
+
341
+ context "when the URL ends with a slash;" do
342
+ def url; "http://www.google.com/"; end
343
+
344
+ it "should be linked" do
345
+ pending # our support for Wikipedia URLS containing parens breaks this corner case
346
+ @autolinked_text.should have_autolinked_url(url)
347
+ end
348
+ end
349
+ end
350
+
351
+ context "when preceded by a :" do
352
+ def original_text; "Check this out @hoverbird:#{url}"; end
353
+
354
+ it "should be linked" do
355
+ @autolinked_text.should have_autolinked_url(url)
356
+ end
357
+ end
358
+
359
+ context "with a URL ending in allowed punctuation" do
360
+ it "does not consume ending punctuation" do
361
+ matcher = TestAutolink.new
362
+ %w| ? ! , . : ; ] ) } = \ ' |.each do |char|
363
+ matcher.auto_link("#{url}#{char}").should have_autolinked_url(url)
364
+ end
365
+ end
366
+ end
367
+
368
+ context "with a URL preceded in forbidden characters" do
369
+ it "should not be linked" do
370
+ matcher = TestAutolink.new
371
+ %w| \ ' / ! = |.each do |char|
372
+ matcher.auto_link("#{char}#{url}").should_not have_autolinked_url(url)
373
+ end
374
+ end
375
+ end
376
+
377
+ context "when embedded in a link tag" do
378
+ def original_text; "<link rel='true'>#{url}</link>"; end
379
+
380
+ it "should be linked" do
381
+ @autolinked_text.should have_autolinked_url(url)
382
+ end
383
+ end
384
+
385
+ context "with multiple URLs" do
386
+ def original_text; "http://www.links.org link at start of page, link at end http://www.foo.org"; end
387
+
388
+ it "should autolink each one" do
389
+ @autolinked_text.should have_autolinked_url('http://www.links.org')
390
+ @autolinked_text.should have_autolinked_url('http://www.foo.org')
391
+ end
392
+ end
393
+
394
+ context "with multiple URLs in different formats" do
395
+ def original_text; "http://foo.com https://bar.com http://mail.foobar.org"; end
396
+
397
+ it "should autolink each one, in the proper order" do
398
+ @autolinked_text.should have_autolinked_url('http://foo.com')
399
+ @autolinked_text.should have_autolinked_url('https://bar.com')
400
+ @autolinked_text.should have_autolinked_url('http://mail.foobar.org')
401
+ end
402
+ end
403
+
404
+ context "with a URL having a long TLD" do
405
+ def original_text; "Yahoo integriert Facebook http://golem.mobi/0912/71607.html"; end
406
+
407
+ it "should autolink it" do
408
+ @autolinked_text.should have_autolinked_url('http://golem.mobi/0912/71607.html')
409
+ end
410
+ end
411
+
412
+ context "with a url lacking the protocol" do
413
+ def original_text; "I like www.foobar.com dudes"; end
414
+
415
+ it "links to the original text with the full href" do
416
+ link = Hpricot(@autolinked_text).at('a')
417
+ link.inner_text.should == 'www.foobar.com'
418
+ link['href'].should == 'http://www.foobar.com'
419
+ end
420
+ end
421
+
422
+ end
423
+
424
+ describe "Autolink all" do
425
+ before do
426
+ @linker = TestAutolink.new
427
+ end
428
+
429
+ it "should allow url/hashtag overlap" do
430
+ auto_linked = @linker.auto_link("http://twitter.com/#search")
431
+ auto_linked.should have_autolinked_url('http://twitter.com/#search')
432
+ end
433
+
434
+ end
435
+ end
436
+
437
+ end