incollege-text 1.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,63 @@
1
+ module Incollege
2
+ # A module provides base methods to rewrite usernames, lists, hashtags and URLs.
3
+ module Rewriter extend self
4
+ def rewrite_entities(text, entities)
5
+ chars = text.to_s.to_char_a
6
+
7
+ # sort by start index
8
+ entities = entities.sort_by do |entity|
9
+ indices = entity.respond_to?(:indices) ? entity.indices : entity[:indices]
10
+ indices.first
11
+ end
12
+
13
+ result = []
14
+ last_index = entities.inject(0) do |index, entity|
15
+ indices = entity.respond_to?(:indices) ? entity.indices : entity[:indices]
16
+ result << chars[index...indices.first]
17
+ result << yield(entity, chars)
18
+ indices.last
19
+ end
20
+ result << chars[last_index..-1]
21
+
22
+ result.flatten.join
23
+ end
24
+
25
+ # These methods are deprecated, will be removed in future.
26
+ extend Deprecation
27
+
28
+ def rewrite(text, options = {})
29
+ [:hashtags, :urls, :usernames_or_lists].inject(text) do |key|
30
+ options[key] ? send(:"rewrite_#{key}", text, &options[key]) : text
31
+ end
32
+ end
33
+ deprecate :rewrite, :rewrite_entities
34
+
35
+ def rewrite_usernames_or_lists(text)
36
+ entities = Extractor.extract_mentions_or_lists_with_indices(text)
37
+ rewrite_entities(text, entities) do |entity, chars|
38
+ at = chars[entity[:indices].first]
39
+ list_slug = entity[:list_slug]
40
+ list_slug = nil if list_slug.empty?
41
+ yield(at, entity[:screen_name], list_slug)
42
+ end
43
+ end
44
+ deprecate :rewrite_usernames_or_lists, :rewrite_entities
45
+
46
+ def rewrite_hashtags(text)
47
+ entities = Extractor.extract_hashtags_with_indices(text)
48
+ rewrite_entities(text, entities) do |entity, chars|
49
+ hash = chars[entity[:indices].first]
50
+ yield(hash, entity[:hashtag])
51
+ end
52
+ end
53
+ deprecate :rewrite_hashtags, :rewrite_entities
54
+
55
+ def rewrite_urls(text)
56
+ entities = Extractor.extract_urls_with_indices(text, :extract_url_without_protocol => false)
57
+ rewrite_entities(text, entities) do |entity, chars|
58
+ yield(entity[:url])
59
+ end
60
+ end
61
+ deprecate :rewrite_urls, :rewrite_entities
62
+ end
63
+ end
@@ -0,0 +1,26 @@
1
+ module Incollege
2
+ # This module lazily defines constants of the form Uxxxx for all Unicode
3
+ # codepoints from U0000 to U10FFFF. The value of each constant is the
4
+ # UTF-8 string for the codepoint.
5
+ # Examples:
6
+ # copyright = Unicode::U00A9
7
+ # euro = Unicode::U20AC
8
+ # infinity = Unicode::U221E
9
+ #
10
+ module Unicode
11
+ CODEPOINT_REGEX = /^U_?([0-9a-fA-F]{4,5}|10[0-9a-fA-F]{4})$/
12
+
13
+ def self.const_missing(name)
14
+ # Check that the constant name is of the right form: U0000 to U10FFFF
15
+ if name.to_s =~ CODEPOINT_REGEX
16
+ # Convert the codepoint to an immutable UTF-8 string,
17
+ # define a real constant for that value and return the value
18
+ #p name, name.class
19
+ const_set(name, [$1.to_i(16)].pack("U").freeze)
20
+ else # Raise an error for constants that are not Unicode.
21
+ raise NameError, "Uninitialized constant: Unicode::#{name}"
22
+ end
23
+ end
24
+ end
25
+
26
+ end
@@ -0,0 +1,113 @@
1
+ require 'unf'
2
+
3
+ module Incollege
4
+ module Validation extend self
5
+ MAX_LENGTH = 140
6
+
7
+ DEFAULT_TCO_URL_LENGTHS = {
8
+ :short_url_length => 23,
9
+ :short_url_length_https => 23,
10
+ :characters_reserved_per_media => 23
11
+ }.freeze
12
+
13
+ # Returns the length of the string as it would be displayed. This is equivilent to the length of the Unicode NFC
14
+ # (See: http://www.unicode.org/reports/tr15). This is needed in order to consistently calculate the length of a
15
+ # string no matter which actual form was transmitted. For example:
16
+ #
17
+ # U+0065 Latin Small Letter E
18
+ # + U+0301 Combining Acute Accent
19
+ # ----------
20
+ # = 2 bytes, 2 characters, displayed as é (1 visual glyph)
21
+ # … The NFC of {U+0065, U+0301} is {U+00E9}, which is a single chracter and a +display_length+ of 1
22
+ #
23
+ # The string could also contain U+00E9 already, in which case the canonicalization will not change the value.
24
+ #
25
+ def tweet_length(text, options = {})
26
+ options = DEFAULT_TCO_URL_LENGTHS.merge(options)
27
+
28
+ length = text.to_nfc.unpack("U*").length
29
+
30
+ Incollege::Extractor.extract_urls_with_indices(text) do |url, start_position, end_position|
31
+ length += start_position - end_position
32
+ length += url.downcase =~ /^https:\/\// ? options[:short_url_length_https] : options[:short_url_length]
33
+ end
34
+
35
+ length
36
+ end
37
+
38
+ # Check the <tt>text</tt> for any reason that it may not be valid as a Tweet. This is meant as a pre-validation
39
+ # before posting to api.twitter.com. There are several server-side reasons for Tweets to fail but this pre-validation
40
+ # will allow quicker feedback.
41
+ #
42
+ # Returns <tt>false</tt> if this <tt>text</tt> is valid. Otherwise one of the following Symbols will be returned:
43
+ #
44
+ # <tt>:too_long</tt>:: if the <tt>text</tt> is too long
45
+ # <tt>:empty</tt>:: if the <tt>text</tt> is nil or empty
46
+ # <tt>:invalid_characters</tt>:: if the <tt>text</tt> contains non-Unicode or any of the disallowed Unicode characters
47
+ def tweet_invalid?(text)
48
+ return :empty if !text || text.empty?
49
+ begin
50
+ return :too_long if tweet_length(text) > MAX_LENGTH
51
+ return :invalid_characters if Incollege::Regex::INVALID_CHARACTERS.any?{|invalid_char| text.include?(invalid_char) }
52
+ rescue ArgumentError
53
+ # non-Unicode value.
54
+ return :invalid_characters
55
+ end
56
+
57
+ return false
58
+ end
59
+
60
+ def valid_tweet_text?(text)
61
+ !tweet_invalid?(text)
62
+ end
63
+
64
+ def valid_username?(username)
65
+ return false if !username || username.empty?
66
+
67
+ extracted = Incollege::Extractor.extract_mentioned_screen_names(username)
68
+ # Should extract the username minus the @ sign, hence the [1..-1]
69
+ extracted.size == 1 && extracted.first == username[1..-1]
70
+ end
71
+
72
+ VALID_LIST_RE = /\A#{Incollege::Regex[:valid_mention_or_list]}\z/o
73
+ def valid_list?(username_list)
74
+ match = username_list.match(VALID_LIST_RE)
75
+ # Must have matched and had nothing before or after
76
+ !!(match && match[1] == "" && match[4] && !match[4].empty?)
77
+ end
78
+
79
+ def valid_hashtag?(hashtag)
80
+ return false if !hashtag || hashtag.empty?
81
+
82
+ extracted = Incollege::Extractor.extract_hashtags(hashtag)
83
+ # Should extract the hashtag minus the # sign, hence the [1..-1]
84
+ extracted.size == 1 && extracted.first == hashtag[1..-1]
85
+ end
86
+
87
+ def valid_url?(url, unicode_domains=true, require_protocol=true)
88
+ return false if !url || url.empty?
89
+
90
+ url_parts = url.match(Incollege::Regex[:validate_url_unencoded])
91
+ return false unless (url_parts && url_parts.to_s == url)
92
+
93
+ scheme, authority, path, query, fragment = url_parts.captures
94
+
95
+ return false unless ((!require_protocol ||
96
+ (valid_match?(scheme, Incollege::Regex[:validate_url_scheme]) && scheme.match(/\Ahttps?\Z/i))) &&
97
+ valid_match?(path, Incollege::Regex[:validate_url_path]) &&
98
+ valid_match?(query, Incollege::Regex[:validate_url_query], true) &&
99
+ valid_match?(fragment, Incollege::Regex[:validate_url_fragment], true))
100
+
101
+ return (unicode_domains && valid_match?(authority, Incollege::Regex[:validate_url_unicode_authority])) ||
102
+ (!unicode_domains && valid_match?(authority, Incollege::Regex[:validate_url_authority]))
103
+ end
104
+
105
+ private
106
+
107
+ def valid_match?(string, regex, optional=false)
108
+ return (string && string.match(regex) && $~.to_s == string) unless optional
109
+
110
+ !(string && (!string.match(regex) || $~.to_s != string))
111
+ end
112
+ end
113
+ end
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
3
+
4
+ begin
5
+ require 'rubigen'
6
+ rescue LoadError
7
+ require 'rubygems'
8
+ require 'rubigen'
9
+ end
10
+ require 'rubigen/scripts/destroy'
11
+
12
+ ARGV.shift if ['--help', '-h'].include?(ARGV[0])
13
+ RubiGen::Base.use_component_sources! [:newgem_simple, :test_unit]
14
+ RubiGen::Scripts::Destroy.new.run(ARGV)
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
3
+
4
+ begin
5
+ require 'rubigen'
6
+ rescue LoadError
7
+ require 'rubygems'
8
+ require 'rubigen'
9
+ end
10
+ require 'rubigen/scripts/generate'
11
+
12
+ ARGV.shift if ['--help', '-h'].include?(ARGV[0])
13
+ RubiGen::Base.use_component_sources! [:newgem_simple, :test_unit]
14
+ RubiGen::Scripts::Generate.new.run(ARGV)
@@ -0,0 +1,844 @@
1
+ # encoding: utf-8
2
+ require File.dirname(__FILE__) + '/spec_helper'
3
+
4
+ class TestAutolink
5
+ include Incollege::Autolink
6
+ end
7
+
8
+ describe Incollege::Autolink do
9
+ def original_text; end
10
+ def url; end
11
+
12
+ describe "auto_link_custom" do
13
+ before do
14
+ @autolinked_text = TestAutolink.new.auto_link(original_text) if original_text
15
+ end
16
+
17
+ describe "username autolinking" do
18
+ context "username preceded by a space" do
19
+ def original_text; "hello @jacob"; end
20
+
21
+ it "should be linked" do
22
+ @autolinked_text.should link_to_screen_name('jacob')
23
+ end
24
+ end
25
+
26
+ context "username in camelCase" do
27
+ def original_text() "@jaCob iS cOoL" end
28
+
29
+ it "should be linked" do
30
+ @autolinked_text.should link_to_screen_name('jaCob')
31
+ end
32
+ end
33
+
34
+ context "username at beginning of line" do
35
+ def original_text; "@jacob you're cool"; end
36
+
37
+ it "should be linked" do
38
+ @autolinked_text.should link_to_screen_name('jacob')
39
+ end
40
+ end
41
+
42
+ context "username preceded by word character" do
43
+ def original_text; "meet@the beach"; end
44
+
45
+ it "should not be linked" do
46
+ Nokogiri::HTML(@autolinked_text).search('a').should be_empty
47
+ end
48
+ end
49
+
50
+ context "username preceded by non-word character" do
51
+ def original_text; "great.@jacob"; end
52
+
53
+ it "should be linked" do
54
+ @autolinked_text.should link_to_screen_name('jacob')
55
+ end
56
+ end
57
+
58
+ context "username containing non-word characters" do
59
+ def original_text; "@zach&^$%^"; end
60
+
61
+ it "should not be linked" do
62
+ @autolinked_text.should link_to_screen_name('zach')
63
+ end
64
+ end
65
+
66
+ context "username over twenty characters" do
67
+ def original_text
68
+ @twenty_character_username = "zach" * 5
69
+ "@" + @twenty_character_username + "1"
70
+ end
71
+
72
+ it "should not be linked" do
73
+ @autolinked_text.should link_to_screen_name(@twenty_character_username)
74
+ end
75
+ end
76
+
77
+ context "username followed by japanese" do
78
+ def original_text; "@jacobの"; end
79
+
80
+ it "should be linked" do
81
+ @autolinked_text.should link_to_screen_name('jacob')
82
+ end
83
+ end
84
+
85
+ context "username preceded by japanese" do
86
+ def original_text; "あ@matz"; end
87
+
88
+ it "should be linked" do
89
+ @autolinked_text.should link_to_screen_name('matz')
90
+ end
91
+ end
92
+
93
+ context "username surrounded by japanese" do
94
+ def original_text; "あ@yoshimiの"; end
95
+
96
+ it "should be linked" do
97
+ @autolinked_text.should link_to_screen_name('yoshimi')
98
+ end
99
+ end
100
+
101
+ context "username using full-width at-sign" do
102
+ def original_text
103
+ "#{[0xFF20].pack('U')}jacob"
104
+ end
105
+
106
+ it "should be linked" do
107
+ @autolinked_text.should link_to_screen_name('jacob')
108
+ end
109
+ end
110
+ end
111
+
112
+ describe "list path autolinking" do
113
+
114
+ context "when List is not available" do
115
+ it "should not be linked" do
116
+ @autolinked_text = TestAutolink.new.auto_link_usernames_or_lists("hello @jacob/my-list", :suppress_lists => true)
117
+ @autolinked_text.should_not link_to_list_path('jacob/my-list')
118
+ @autolinked_text.should include('my-list')
119
+ end
120
+ end
121
+
122
+ context "slug preceded by a space" do
123
+ def original_text; "hello @jacob/my-list"; end
124
+
125
+ it "should be linked" do
126
+ @autolinked_text.should link_to_list_path('jacob/my-list')
127
+ end
128
+ end
129
+
130
+ context "username followed by a slash but no list" do
131
+ def original_text; "hello @jacob/ my-list"; end
132
+
133
+ it "should NOT be linked" do
134
+ @autolinked_text.should_not link_to_list_path('jacob/my-list')
135
+ @autolinked_text.should link_to_screen_name('jacob')
136
+ end
137
+ end
138
+
139
+ context "empty username followed by a list" do
140
+ def original_text; "hello @/my-list"; end
141
+
142
+ it "should NOT be linked" do
143
+ Nokogiri::HTML(@autolinked_text).search('a').should be_empty
144
+ end
145
+ end
146
+
147
+ context "list slug at beginning of line" do
148
+ def original_text; "@jacob/my-list"; end
149
+
150
+ it "should be linked" do
151
+ @autolinked_text.should link_to_list_path('jacob/my-list')
152
+ end
153
+ end
154
+
155
+ context "username preceded by alpha-numeric character" do
156
+ def original_text; "meet@the/beach"; end
157
+
158
+ it "should not be linked" do
159
+ Nokogiri::HTML(@autolinked_text).search('a').should be_empty
160
+ end
161
+ end
162
+
163
+ context "username preceded by non-word character" do
164
+ def original_text; "great.@jacob/my-list"; end
165
+
166
+ it "should be linked" do
167
+ @autolinked_text = TestAutolink.new.auto_link("great.@jacob/my-list")
168
+ @autolinked_text.should link_to_list_path('jacob/my-list')
169
+ end
170
+ end
171
+
172
+ context "username containing non-word characters" do
173
+ def original_text; "@zach/test&^$%^"; end
174
+
175
+ it "should be linked" do
176
+ @autolinked_text.should link_to_list_path('zach/test')
177
+ end
178
+ end
179
+
180
+ context "username over twenty characters" do
181
+ def original_text
182
+ @twentyfive_character_list = "jack/" + ("a" * 25)
183
+ "@#{@twentyfive_character_list}12345"
184
+ end
185
+
186
+ it "should be linked" do
187
+ @autolinked_text.should link_to_list_path(@twentyfive_character_list)
188
+ end
189
+ end
190
+ end
191
+
192
+ describe "hashtag autolinking" do
193
+ context "with an all numeric hashtag" do
194
+ def original_text; "#123"; end
195
+
196
+ it "should not be linked" do
197
+ @autolinked_text.should_not have_autolinked_hashtag('#123')
198
+ end
199
+ end
200
+
201
+ context "with a hashtag with alphanumeric characters" do
202
+ def original_text; "#ab1d"; end
203
+
204
+ it "should be linked" do
205
+ @autolinked_text.should have_autolinked_hashtag('#ab1d')
206
+ end
207
+ end
208
+
209
+ context "with a hashtag with underscores" do
210
+ def original_text; "#a_b_c_d"; end
211
+
212
+ it "should be linked" do
213
+ @autolinked_text.should have_autolinked_hashtag(original_text)
214
+ end
215
+ end
216
+
217
+ context "with a hashtag that is preceded by a word character" do
218
+ def original_text; "ab#cd"; end
219
+
220
+ it "should not be linked" do
221
+ @autolinked_text.should_not have_autolinked_hashtag(original_text)
222
+ end
223
+ end
224
+
225
+ context "with a page anchor in a url" do
226
+ def original_text; "Here's my url: http://foobar.com/#home"; end
227
+
228
+ it "should not link the hashtag" do
229
+ @autolinked_text.should_not have_autolinked_hashtag('#home')
230
+ end
231
+
232
+ it "should link the url" do
233
+ @autolinked_text.should have_autolinked_url('http://foobar.com/#home')
234
+ end
235
+ end
236
+
237
+ context "with a hashtag that starts with a number but has word characters" do
238
+ def original_text; "#2ab"; end
239
+
240
+ it "should be linked" do
241
+ @autolinked_text.should have_autolinked_hashtag(original_text)
242
+ end
243
+ end
244
+
245
+ context "with multiple valid hashtags" do
246
+ def original_text; "I'm frickin' awesome #ab #cd #ef"; end
247
+
248
+ it "links each hashtag" do
249
+ @autolinked_text.should have_autolinked_hashtag('#ab')
250
+ @autolinked_text.should have_autolinked_hashtag('#cd')
251
+ @autolinked_text.should have_autolinked_hashtag('#ef')
252
+ end
253
+ end
254
+
255
+ context "with a hashtag preceded by a ." do
256
+ def original_text; "ok, great.#abc"; end
257
+
258
+ it "should be linked" do
259
+ @autolinked_text.should have_autolinked_hashtag('#abc')
260
+ end
261
+ end
262
+
263
+ context "with a hashtag preceded by a &" do
264
+ def original_text; "&#nbsp;"; end
265
+
266
+ it "should not be linked" do
267
+ @autolinked_text.should_not have_autolinked_hashtag('#nbsp;')
268
+ end
269
+ end
270
+
271
+ context "with a hashtag that ends in an !" do
272
+ def original_text; "#great!"; end
273
+
274
+ it "should be linked, but should not include the !" do
275
+ @autolinked_text.should have_autolinked_hashtag('#great')
276
+ end
277
+ end
278
+
279
+ context "with a hashtag followed by Japanese" do
280
+ def original_text; "#twj_devの"; end
281
+
282
+ it "should be linked" do
283
+ @autolinked_text.should have_autolinked_hashtag('#twj_devの')
284
+ end
285
+ end
286
+
287
+ context "with a hashtag preceded by a full-width space" do
288
+ def original_text; "#{[0x3000].pack('U')}#twj_dev"; end
289
+
290
+ it "should be linked" do
291
+ @autolinked_text.should have_autolinked_hashtag('#twj_dev')
292
+ end
293
+ end
294
+
295
+ context "with a hashtag followed by a full-width space" do
296
+ def original_text; "#twj_dev#{[0x3000].pack('U')}"; end
297
+
298
+ it "should be linked" do
299
+ @autolinked_text.should have_autolinked_hashtag('#twj_dev')
300
+ end
301
+ end
302
+
303
+ context "with a hashtag using full-width hash" do
304
+ def original_text; "#{[0xFF03].pack('U')}twj_dev"; end
305
+
306
+ it "should be linked" do
307
+ link = Nokogiri::HTML(@autolinked_text).search('a')
308
+ (link.inner_text.respond_to?(:force_encoding) ? link.inner_text.force_encoding("utf-8") : link.inner_text).should == "#{[0xFF03].pack('U')}twj_dev"
309
+ link.first['href'].should == 'https://twitter.com/#!/search?q=%23twj_dev'
310
+ end
311
+ end
312
+
313
+ context "with a hashtag containing an accented latin character" do
314
+ def original_text
315
+ # the hashtag is #éhashtag
316
+ "##{[0x00e9].pack('U')}hashtag"
317
+ end
318
+
319
+ it "should be linked" do
320
+ @autolinked_text.should == "<a class=\"tweet-url hashtag\" href=\"https://twitter.com/#!/search?q=%23éhashtag\" rel=\"nofollow\" title=\"#éhashtag\">#éhashtag</a>"
321
+ end
322
+ end
323
+
324
+ end
325
+
326
+ describe "URL autolinking" do
327
+ def url; "http://www.google.com"; end
328
+
329
+ context "when embedded in plain text" do
330
+ def original_text; "On my search engine #{url} I found good links."; end
331
+
332
+ it "should be linked" do
333
+ @autolinked_text.should have_autolinked_url(url)
334
+ end
335
+ end
336
+
337
+ context "when surrounded by Japanese;" do
338
+ def original_text; "いまなにしてる#{url}いまなにしてる"; end
339
+
340
+ it "should be linked" do
341
+ @autolinked_text.should have_autolinked_url(url)
342
+ end
343
+ end
344
+
345
+ context "with a path surrounded by parentheses;" do
346
+ def original_text; "I found a neatness (#{url})"; end
347
+
348
+ it "should be linked" do
349
+ @autolinked_text.should have_autolinked_url(url)
350
+ end
351
+
352
+ context "when the URL ends with a slash;" do
353
+ def url; "http://www.google.com/"; end
354
+
355
+ it "should be linked" do
356
+ @autolinked_text.should have_autolinked_url(url)
357
+ end
358
+ end
359
+
360
+ context "when the URL has a path;" do
361
+ def url; "http://www.google.com/fsdfasdf"; end
362
+
363
+ it "should be linked" do
364
+ @autolinked_text.should have_autolinked_url(url)
365
+ end
366
+ end
367
+ end
368
+
369
+ context "when path contains parens" do
370
+ def original_text; "I found a neatness (#{url})"; end
371
+
372
+ it "should be linked" do
373
+ @autolinked_text.should have_autolinked_url(url)
374
+ end
375
+
376
+ context "wikipedia" do
377
+ def url; "http://en.wikipedia.org/wiki/Madonna_(artist)"; end
378
+
379
+ it "should be linked" do
380
+ @autolinked_text.should have_autolinked_url(url)
381
+ end
382
+ end
383
+
384
+ context "IIS session" do
385
+ def url; "http://msdn.com/S(deadbeef)/page.htm"; end
386
+
387
+ it "should be linked" do
388
+ @autolinked_text.should have_autolinked_url(url)
389
+ end
390
+ end
391
+
392
+ context "unbalanced parens" do
393
+ def url; "http://example.com/i_has_a_("; end
394
+
395
+ it "should be linked" do
396
+ @autolinked_text.should have_autolinked_url("http://example.com/i_has_a_")
397
+ end
398
+ end
399
+
400
+ context "balanced parens with a double quote inside" do
401
+ def url; "http://foo.com/foo_(\")_bar" end
402
+
403
+ it "should be linked" do
404
+ @autolinked_text.should have_autolinked_url("http://foo.com/foo_")
405
+ end
406
+ end
407
+
408
+ context "balanced parens hiding XSS" do
409
+ def url; 'http://x.xx.com/("style="color:red"onmouseover="alert(1)' end
410
+
411
+ it "should be linked" do
412
+ @autolinked_text.should have_autolinked_url("http://x.xx.com/")
413
+ end
414
+ end
415
+ end
416
+
417
+ context "when preceded by a :" do
418
+ def original_text; "Check this out @hoverbird:#{url}"; end
419
+
420
+ it "should be linked" do
421
+ @autolinked_text.should have_autolinked_url(url)
422
+ end
423
+ end
424
+
425
+ context "with a URL ending in allowed punctuation" do
426
+ it "does not consume ending punctuation" do
427
+ matcher = TestAutolink.new
428
+ %w| ? ! , . : ; ] ) } = \ ' |.each do |char|
429
+ matcher.auto_link("#{url}#{char}").should have_autolinked_url(url)
430
+ end
431
+ end
432
+ end
433
+
434
+ context "with a URL preceded in forbidden characters" do
435
+ it "should be linked" do
436
+ matcher = TestAutolink.new
437
+ %w| \ ' / ! = |.each do |char|
438
+ matcher.auto_link("#{char}#{url}").should have_autolinked_url(url)
439
+ end
440
+ end
441
+ end
442
+
443
+ context "when embedded in a link tag" do
444
+ def original_text; "<link rel='true'>#{url}</link>"; end
445
+
446
+ it "should be linked" do
447
+ @autolinked_text.should have_autolinked_url(url)
448
+ end
449
+ end
450
+
451
+ context "with multiple URLs" do
452
+ def original_text; "http://www.links.org link at start of page, link at end http://www.foo.org"; end
453
+
454
+ it "should autolink each one" do
455
+ @autolinked_text.should have_autolinked_url('http://www.links.org')
456
+ @autolinked_text.should have_autolinked_url('http://www.foo.org')
457
+ end
458
+ end
459
+
460
+ context "with multiple URLs in different formats" do
461
+ def original_text; "http://foo.com https://bar.com http://mail.foobar.org"; end
462
+
463
+ it "should autolink each one, in the proper order" do
464
+ @autolinked_text.should have_autolinked_url('http://foo.com')
465
+ @autolinked_text.should have_autolinked_url('https://bar.com')
466
+ @autolinked_text.should have_autolinked_url('http://mail.foobar.org')
467
+ end
468
+ end
469
+
470
+ context "with a URL having a long TLD" do
471
+ def original_text; "Yahoo integriert Facebook http://golem.mobi/0912/71607.html"; end
472
+
473
+ it "should autolink it" do
474
+ @autolinked_text.should have_autolinked_url('http://golem.mobi/0912/71607.html')
475
+ end
476
+ end
477
+
478
+ context "with a url lacking the protocol" do
479
+ def original_text; "I like www.foobar.com dudes"; end
480
+
481
+ it "does not link at all" do
482
+ link = Nokogiri::HTML(@autolinked_text).search('a')
483
+ link.should be_empty
484
+ end
485
+ end
486
+
487
+ context "with a @ in a URL" do
488
+ context "with XSS attack" do
489
+ def original_text; 'http://x.xx.com/@"style="color:pink"onmouseover=alert(1)//'; end
490
+
491
+ it "should not allow XSS follwing @" do
492
+ @autolinked_text.should have_autolinked_url('http://x.xx.com/')
493
+ end
494
+ end
495
+
496
+ context "with a username not followed by a /" do
497
+ def original_text; 'http://example.com/@foobar'; end
498
+
499
+ it "should link url" do
500
+ @autolinked_text.should have_autolinked_url('http://example.com/@foobar')
501
+ end
502
+ end
503
+
504
+ context "with a username followed by a /" do
505
+ def original_text; 'http://example.com/@foobar/'; end
506
+
507
+ it "should not link the username but link full url" do
508
+ @autolinked_text.should have_autolinked_url('http://example.com/@foobar/')
509
+ @autolinked_text.should_not link_to_screen_name('foobar')
510
+ end
511
+ end
512
+ end
513
+
514
+ context "regex engine quirks" do
515
+ context "does not spiral out of control on repeated periods" do
516
+ def original_text; "Test a ton of periods http://example.com/path.........................................."; end
517
+
518
+ it "should autolink" do
519
+ @autolinked_text.should have_autolinked_url('http://example.com/path')
520
+ end
521
+ end
522
+
523
+ context "does not spiral out of control on repeated dashes" do
524
+ def original_text; "Single char file ext http://www.bestbuy.com/site/Currie+Technologies+-+Ezip+400+Scooter/9885188.p?id=1218189013070&skuId=9885188"; end
525
+
526
+ it "should autolink" do
527
+ @autolinked_text.should have_autolinked_url('http://www.bestbuy.com/site/Currie+Technologies+-+Ezip+400+Scooter/9885188.p?id=1218189013070&skuId=9885188')
528
+ end
529
+ end
530
+ end
531
+
532
+ end
533
+
534
+ describe "Autolink all" do
535
+ before do
536
+ @linker = TestAutolink.new
537
+ end
538
+
539
+ it "should allow url/hashtag overlap" do
540
+ auto_linked = @linker.auto_link("https://twitter.com/#search")
541
+ auto_linked.should have_autolinked_url('https://twitter.com/#search')
542
+ end
543
+
544
+ it "should not add invalid option in HTML tags" do
545
+ auto_linked = @linker.auto_link("https://twitter.com/ is a URL, not a hashtag", :hashtag_class => 'hashtag_classname')
546
+ auto_linked.should have_autolinked_url('https://twitter.com/')
547
+ auto_linked.should_not include('hashtag_class')
548
+ auto_linked.should_not include('hashtag_classname')
549
+ end
550
+
551
+ it "should autolink url/hashtag/mention in text with Unicode supplementary characters" do
552
+ auto_linked = @linker.auto_link("#{[0x10400].pack('U')} #hashtag #{[0x10400].pack('U')} @mention #{[0x10400].pack('U')} http://twitter.com/")
553
+ auto_linked.should have_autolinked_hashtag('#hashtag')
554
+ auto_linked.should link_to_screen_name('mention')
555
+ auto_linked.should have_autolinked_url('http://twitter.com/')
556
+ end
557
+ end
558
+
559
+ end
560
+
561
+ describe "autolinking options" do
562
+ before do
563
+ @linker = TestAutolink.new
564
+ end
565
+
566
+ it "should show display_url when :url_entities provided" do
567
+ linked = @linker.auto_link("http://t.co/0JG5Mcq", :url_entities => [{
568
+ "url" => "http://t.co/0JG5Mcq",
569
+ "display_url" => "blog.twitter.com/2011/05/twitte…",
570
+ "expanded_url" => "http://blog.twitter.com/2011/05/twitter-for-mac-update.html",
571
+ "indices" => [
572
+ 84,
573
+ 103
574
+ ]
575
+ }])
576
+ html = Nokogiri::HTML(linked)
577
+ html.search('a').should_not be_empty
578
+ html.search('a[@href="http://t.co/0JG5Mcq"]').should_not be_empty
579
+ html.search('span[@class=js-display-url]').inner_text.should == "blog.twitter.com/2011/05/twitte"
580
+ html.inner_text.should == " http://blog.twitter.com/2011/05/twitter-for-mac-update.html …"
581
+ html.search('span[@style="position:absolute;left:-9999px;"]').size.should == 4
582
+ end
583
+
584
+ it "should accept invisible_tag_attrs option" do
585
+ linked = @linker.auto_link("http://t.co/0JG5Mcq",
586
+ {
587
+ :url_entities => [{
588
+ "url" => "http://t.co/0JG5Mcq",
589
+ "display_url" => "blog.twitter.com/2011/05/twitte…",
590
+ "expanded_url" => "http://blog.twitter.com/2011/05/twitter-for-mac-update.html",
591
+ "indices" => [
592
+ 0,
593
+ 19
594
+ ]
595
+ }],
596
+ :invisible_tag_attrs => "style='dummy;'"
597
+ })
598
+ html = Nokogiri::HTML(linked)
599
+ html.search('span[@style="dummy;"]').size.should == 4
600
+ end
601
+
602
+ it "should show display_url if available in entity" do
603
+ linked = @linker.auto_link_entities("http://t.co/0JG5Mcq",
604
+ [{
605
+ :url => "http://t.co/0JG5Mcq",
606
+ :display_url => "blog.twitter.com/2011/05/twitte…",
607
+ :expanded_url => "http://blog.twitter.com/2011/05/twitter-for-mac-update.html",
608
+ :indices => [0, 19]
609
+ }]
610
+ )
611
+ html = Nokogiri::HTML(linked)
612
+ html.search('a').should_not be_empty
613
+ html.search('a[@href="http://t.co/0JG5Mcq"]').should_not be_empty
614
+ html.search('span[@class=js-display-url]').inner_text.should == "blog.twitter.com/2011/05/twitte"
615
+ html.inner_text.should == " http://blog.twitter.com/2011/05/twitter-for-mac-update.html …"
616
+ end
617
+
618
+ it "should apply :class as a CSS class" do
619
+ linked = @linker.auto_link("http://example.com/", :class => 'myclass')
620
+ linked.should have_autolinked_url('http://example.com/')
621
+ linked.should match(/myclass/)
622
+ end
623
+
624
+ it "should apply :url_class only on URL" do
625
+ linked = @linker.auto_link("http://twitter.com")
626
+ linked.should have_autolinked_url('http://twitter.com')
627
+ linked.should_not match(/class/)
628
+
629
+ linked = @linker.auto_link("http://twitter.com", :url_class => 'testClass')
630
+ linked.should have_autolinked_url('http://twitter.com')
631
+ linked.should match(/class=\"testClass\"/)
632
+
633
+ linked = @linker.auto_link("#hash @tw", :url_class => 'testClass')
634
+ linked.should match(/class=\"tweet-url hashtag\"/)
635
+ linked.should match(/class=\"tweet-url username\"/)
636
+ linked.should_not match(/class=\"testClass\"/)
637
+ end
638
+
639
+ it "should add rel=nofollow by default" do
640
+ linked = @linker.auto_link("http://example.com/")
641
+ linked.should have_autolinked_url('http://example.com/')
642
+ linked.should match(/nofollow/)
643
+ end
644
+
645
+ it "should include the '@' symbol in a username when passed :username_include_symbol" do
646
+ linked = @linker.auto_link("@user", :username_include_symbol => true)
647
+ linked.should link_to_screen_name('user', '@user')
648
+ end
649
+
650
+ it "should include the '@' symbol in a list when passed :username_include_symbol" do
651
+ linked = @linker.auto_link("@user/list", :username_include_symbol => true)
652
+ linked.should link_to_list_path('user/list', '@user/list')
653
+ end
654
+
655
+ it "should not add rel=nofollow when passed :suppress_no_follow" do
656
+ linked = @linker.auto_link("http://example.com/", :suppress_no_follow => true)
657
+ linked.should have_autolinked_url('http://example.com/')
658
+ linked.should_not match(/nofollow/)
659
+ end
660
+
661
+ it "should not add a target attribute by default" do
662
+ linked = @linker.auto_link("http://example.com/")
663
+ linked.should have_autolinked_url('http://example.com/')
664
+ linked.should_not match(/target=/)
665
+ end
666
+
667
+ it "should respect the :target option" do
668
+ linked = @linker.auto_link("http://example.com/", :target => 'mywindow')
669
+ linked.should have_autolinked_url('http://example.com/')
670
+ linked.should match(/target="mywindow"/)
671
+ end
672
+
673
+ it "should customize href by username_url_block option" do
674
+ linked = @linker.auto_link("@test", :username_url_block => lambda{|a| "dummy"})
675
+ linked.should have_autolinked_url('dummy', 'test')
676
+ end
677
+
678
+ it "should customize href by list_url_block option" do
679
+ linked = @linker.auto_link("@test/list", :list_url_block => lambda{|a| "dummy"})
680
+ linked.should have_autolinked_url('dummy', 'test/list')
681
+ end
682
+
683
+ it "should customize href by hashtag_url_block option" do
684
+ linked = @linker.auto_link("#hashtag", :hashtag_url_block => lambda{|a| "dummy"})
685
+ linked.should have_autolinked_url('dummy', '#hashtag')
686
+ end
687
+
688
+ it "should customize href by cashtag_url_block option" do
689
+ linked = @linker.auto_link("$CASH", :cashtag_url_block => lambda{|a| "dummy"})
690
+ linked.should have_autolinked_url('dummy', '$CASH')
691
+ end
692
+
693
+ it "should customize href by link_url_block option" do
694
+ linked = @linker.auto_link("http://example.com/", :link_url_block => lambda{|a| "dummy"})
695
+ linked.should have_autolinked_url('dummy', 'http://example.com/')
696
+ end
697
+
698
+ it "should modify link attributes by link_attribute_block" do
699
+ linked = @linker.auto_link("#hash @mention",
700
+ :link_attribute_block => lambda{|entity, attributes|
701
+ attributes[:"dummy-hash-attr"] = "test" if entity[:hashtag]
702
+ }
703
+ )
704
+ linked.should match(/<a[^>]+hashtag[^>]+dummy-hash-attr=\"test\"[^>]+>/)
705
+ linked.should_not match(/<a[^>]+username[^>]+dummy-hash-attr=\"test\"[^>]+>/)
706
+ linked.should_not match(/link_attribute_block/i)
707
+
708
+ linked = @linker.auto_link("@mention http://twitter.com/",
709
+ :link_attribute_block => lambda{|entity, attributes|
710
+ attributes["dummy-url-attr"] = entity[:url] if entity[:url]
711
+ }
712
+ )
713
+ linked.should_not match(/<a[^>]+username[^>]+dummy-url-attr=\"http:\/\/twitter.com\/\"[^>]*>/)
714
+ linked.should match(/<a[^>]+dummy-url-attr=\"http:\/\/twitter.com\/\"/)
715
+ end
716
+
717
+ it "should modify link text by link_text_block" do
718
+ linked = @linker.auto_link("#hash @mention",
719
+ :link_text_block => lambda{|entity, text|
720
+ entity[:hashtag] ? "#replaced" : "pre_#{text}_post"
721
+ }
722
+ )
723
+ linked.should match(/<a[^>]+>#replaced<\/a>/)
724
+ linked.should match(/<a[^>]+>pre_mention_post<\/a>/)
725
+
726
+ linked = @linker.auto_link("#hash @mention", {
727
+ :link_text_block => lambda{|entity, text|
728
+ "pre_#{text}_post"
729
+ },
730
+ :symbol_tag => "s", :text_with_symbol_tag => "b", :username_include_symbol => true
731
+ })
732
+ linked.should match(/<a[^>]+>pre_<s>#<\/s><b>hash<\/b>_post<\/a>/)
733
+ linked.should match(/<a[^>]+>pre_<s>@<\/s><b>mention<\/b>_post<\/a>/)
734
+ end
735
+
736
+ it "should apply :url_target only to auto-linked URLs" do
737
+ auto_linked = @linker.auto_link("#hashtag @mention http://test.com/", {:url_target => '_blank'})
738
+ auto_linked.should have_autolinked_hashtag('#hashtag')
739
+ auto_linked.should link_to_screen_name('mention')
740
+ auto_linked.should have_autolinked_url('http://test.com/')
741
+ auto_linked.should_not match(/<a[^>]+hashtag[^>]+target[^>]+>/)
742
+ auto_linked.should_not match(/<a[^>]+username[^>]+target[^>]+>/)
743
+ auto_linked.should match(/<a[^>]+test.com[^>]+target=\"_blank\"[^>]*>/)
744
+ end
745
+
746
+ it "should apply target='_blank' only to auto-linked URLs when :target_blank is set to true" do
747
+ auto_linked = @linker.auto_link("#hashtag @mention http://test.com/", {:target_blank => true})
748
+ auto_linked.should have_autolinked_hashtag('#hashtag')
749
+ auto_linked.should link_to_screen_name('mention')
750
+ auto_linked.should have_autolinked_url('http://test.com/')
751
+ auto_linked.should match(/<a[^>]+hashtag[^>]+target=\"_blank\"[^>]*>/)
752
+ auto_linked.should match(/<a[^>]+username[^>]+target=\"_blank\"[^>]*>/)
753
+ auto_linked.should match(/<a[^>]+test.com[^>]+target=\"_blank\"[^>]*>/)
754
+ end
755
+ end
756
+
757
+ describe "link_url_with_entity" do
758
+ before do
759
+ @linker = TestAutolink.new
760
+ end
761
+
762
+ it "should use display_url and expanded_url" do
763
+ @linker.send(:link_url_with_entity,
764
+ {
765
+ :url => "http://t.co/abcde",
766
+ :display_url => "twitter.com",
767
+ :expanded_url => "http://twitter.com/"},
768
+ {:invisible_tag_attrs => "class='invisible'"}).gsub('"', "'").should == "<span class='tco-ellipsis'><span class='invisible'>&nbsp;</span></span><span class='invisible'>http://</span><span class='js-display-url'>twitter.com</span><span class='invisible'>/</span><span class='tco-ellipsis'><span class='invisible'>&nbsp;</span></span>";
769
+ end
770
+
771
+ it "should correctly handle display_url ending with '…'" do
772
+ @linker.send(:link_url_with_entity,
773
+ {
774
+ :url => "http://t.co/abcde",
775
+ :display_url => "twitter.com…",
776
+ :expanded_url => "http://twitter.com/abcdefg"},
777
+ {:invisible_tag_attrs => "class='invisible'"}).gsub('"', "'").should == "<span class='tco-ellipsis'><span class='invisible'>&nbsp;</span></span><span class='invisible'>http://</span><span class='js-display-url'>twitter.com</span><span class='invisible'>/abcdefg</span><span class='tco-ellipsis'><span class='invisible'>&nbsp;</span>…</span>";
778
+ end
779
+
780
+ it "should correctly handle display_url starting with '…'" do
781
+ @linker.send(:link_url_with_entity,
782
+ {
783
+ :url => "http://t.co/abcde",
784
+ :display_url => "…tter.com/abcdefg",
785
+ :expanded_url => "http://twitter.com/abcdefg"},
786
+ {:invisible_tag_attrs => "class='invisible'"}).gsub('"', "'").should == "<span class='tco-ellipsis'>…<span class='invisible'>&nbsp;</span></span><span class='invisible'>http://twi</span><span class='js-display-url'>tter.com/abcdefg</span><span class='invisible'></span><span class='tco-ellipsis'><span class='invisible'>&nbsp;</span></span>";
787
+ end
788
+
789
+ it "should not create spans if display_url and expanded_url are on different domains" do
790
+ @linker.send(:link_url_with_entity,
791
+ {
792
+ :url => "http://t.co/abcde",
793
+ :display_url => "pic.twitter.com/xyz",
794
+ :expanded_url => "http://twitter.com/foo/statuses/123/photo/1"},
795
+ {:invisible_tag_attrs => "class='invisible'"}).gsub('"', "'").should == "pic.twitter.com/xyz"
796
+ end
797
+ end
798
+
799
+ describe "symbol_tag" do
800
+ before do
801
+ @linker = TestAutolink.new
802
+ end
803
+ it "should put :symbol_tag around symbol" do
804
+ @linker.auto_link("@mention", {:symbol_tag => 's', :username_include_symbol=>true}).should match(/<s>@<\/s>mention/)
805
+ @linker.auto_link("#hash", {:symbol_tag => 's'}).should match(/<s>#<\/s>hash/)
806
+ result = @linker.auto_link("@mention #hash $CASH", {:symbol_tag => 'b', :username_include_symbol=>true})
807
+ result.should match(/<b>@<\/b>mention/)
808
+ result.should match(/<b>#<\/b>hash/)
809
+ result.should match(/<b>\$<\/b>CASH/)
810
+ end
811
+ it "should put :text_with_symbol_tag around text" do
812
+ result = @linker.auto_link("@mention #hash $CASH", {:text_with_symbol_tag => 'b'})
813
+ result.should match(/<b>mention<\/b>/)
814
+ result.should match(/<b>hash<\/b>/)
815
+ result.should match(/<b>CASH<\/b>/)
816
+ end
817
+ it "should put :symbol_tag around symbol and :text_with_symbol_tag around text" do
818
+ result = @linker.auto_link("@mention #hash $CASH", {:symbol_tag => 's', :text_with_symbol_tag => 'b', :username_include_symbol=>true})
819
+ result.should match(/<s>@<\/s><b>mention<\/b>/)
820
+ result.should match(/<s>#<\/s><b>hash<\/b>/)
821
+ result.should match(/<s>\$<\/s><b>CASH<\/b>/)
822
+ end
823
+ end
824
+
825
+ describe "html_escape" do
826
+ before do
827
+ @linker = TestAutolink.new
828
+ end
829
+ it "should escape html entities properly" do
830
+ @linker.html_escape("&").should == "&amp;"
831
+ @linker.html_escape(">").should == "&gt;"
832
+ @linker.html_escape("<").should == "&lt;"
833
+ @linker.html_escape("\"").should == "&quot;"
834
+ @linker.html_escape("'").should == "&#39;"
835
+ @linker.html_escape("&<>\"").should == "&amp;&lt;&gt;&quot;"
836
+ @linker.html_escape("<div>").should == "&lt;div&gt;"
837
+ @linker.html_escape("a&b").should == "a&amp;b"
838
+ @linker.html_escape("<a href=\"https://twitter.com\" target=\"_blank\">twitter & friends</a>").should == "&lt;a href=&quot;https://twitter.com&quot; target=&quot;_blank&quot;&gt;twitter &amp; friends&lt;/a&gt;"
839
+ @linker.html_escape("&amp;").should == "&amp;amp;"
840
+ @linker.html_escape(nil).should == nil
841
+ end
842
+ end
843
+
844
+ end