twitter-text-kow 1.3.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,76 @@
1
+ # Copyright 2018 Twitter, Inc.
2
+ # Licensed under the Apache License, Version 2.0
3
+ # http://www.apache.org/licenses/LICENSE-2.0
4
+
5
+ # encoding: utf-8
6
+ require File.dirname(__FILE__) + '/spec_helper'
7
+
8
+ describe "Twitter::TwitterText::Regex regular expressions" do
9
+ describe "matching URLS" do
10
+ TestUrls::VALID.each do |url|
11
+ it "should match the URL #{url}" do
12
+ expect(url).to match_autolink_expression
13
+ end
14
+
15
+ it "should match the URL #{url} when it's embedded in other text" do
16
+ text = "Sweet url: #{url} I found. #awesome"
17
+ expect(url).to match_autolink_expression_in(text)
18
+ end
19
+ end
20
+ end
21
+
22
+ describe "invalid URLS" do
23
+ it "does not link urls with invalid characters" do
24
+ TestUrls::INVALID.each {|url| expect(url).to_not match_autolink_expression}
25
+ end
26
+ end
27
+
28
+ describe "matching List names" do
29
+ it "should match if less than 25 characters" do
30
+ name = "Shuffleboard Community"
31
+ expect(name.length).to be < 25
32
+ expect(name).to match(Twitter::TwitterText::Regex::REGEXEN[:list_name])
33
+ end
34
+
35
+ it "should not match if greater than 25 characters" do
36
+ name = "Most Glorious Shady Meadows Shuffleboard Community"
37
+ expect(name.length).to be > 25
38
+ expect(name).to match(Twitter::TwitterText::Regex[:list_name])
39
+ end
40
+
41
+ end
42
+
43
+ describe "matching Unicode 10.0 emoji" do
44
+ it "should match new emoji" do
45
+ input = "Unicode 10.0; grinning face with one large and one small eye: 🤪; woman with headscarf: 🧕; (fitzpatrick) woman with headscarf + medium-dark skin tone: 🧕🏾; flag (England): 🏴󠁧󠁢󠁥󠁮󠁧󠁿"
46
+ expected = ["🤪", "🧕", "🧕🏾", "🏴󠁧󠁢󠁥󠁮󠁧󠁿"]
47
+ entities = Twitter::TwitterText::Extractor.extract_emoji_with_indices(input)
48
+ entities.each_with_index do |entity, i|
49
+ expect(entity[:emoji]).to be_kind_of(String)
50
+ expect(entity[:indices]).to be_kind_of(Array)
51
+ entity[:indices].each do |position|
52
+ expect(position).to be_kind_of(Integer)
53
+ end
54
+ expect(entity[:emoji]).to be == expected[i]
55
+ expect(Twitter::TwitterText::Extractor.is_valid_emoji(entity[:emoji])).to be true
56
+ end
57
+ end
58
+ end
59
+
60
+ describe "matching Unicode 9.0 emoji" do
61
+ it "should match new emoji" do
62
+ input = "Unicode 9.0; face with cowboy hat: 🤠; woman dancing: 💃, woman dancing + medium-dark skin tone: 💃🏾"
63
+ expected = ["🤠", "💃", "💃🏾"]
64
+ entities = Twitter::TwitterText::Extractor.extract_emoji_with_indices(input)
65
+ entities.each_with_index do |entity, i|
66
+ expect(entity[:emoji]).to be_kind_of(String)
67
+ expect(entity[:indices]).to be_kind_of(Array)
68
+ entity[:indices].each do |position|
69
+ expect(position).to be_kind_of(Integer)
70
+ end
71
+ expect(entity[:emoji]).to be == expected[i]
72
+ expect(Twitter::TwitterText::Extractor.is_valid_emoji(entity[:emoji])).to be true
73
+ end
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,553 @@
1
+ # Copyright 2018 Twitter, Inc.
2
+ # Licensed under the Apache License, Version 2.0
3
+ # http://www.apache.org/licenses/LICENSE-2.0
4
+
5
+ # encoding: utf-8
6
+ require File.dirname(__FILE__) + '/spec_helper'
7
+
8
+ describe Twitter::TwitterText::Rewriter do
9
+ def original_text; end
10
+ def url; end
11
+
12
+ def block(*args)
13
+ if Array === @block_args
14
+ unless Array === @block_args.first
15
+ @block_args = [@block_args]
16
+ end
17
+ @block_args << args
18
+ else
19
+ @block_args = args
20
+ end
21
+ "[rewritten]"
22
+ end
23
+
24
+ describe "rewrite usernames" do #{{{
25
+ before do
26
+ @rewritten_text = Twitter::TwitterText::Rewriter.rewrite_usernames_or_lists(original_text, &method(:block))
27
+ end
28
+
29
+ context "username preceded by a space" do
30
+ def original_text; "hello @jacob"; end
31
+
32
+ it "should be rewritten" do
33
+ expect(@block_args).to be == ["@", "jacob", nil]
34
+ expect(@rewritten_text).to be == "hello [rewritten]"
35
+ end
36
+ end
37
+
38
+ context "username at beginning of line" do
39
+ def original_text; "@jacob you're cool"; end
40
+
41
+ it "should be rewritten" do
42
+ expect(@block_args).to be == ["@", "jacob", nil]
43
+ expect(@rewritten_text).to be == "[rewritten] you're cool"
44
+ end
45
+ end
46
+
47
+ context "username preceded by word character" do
48
+ def original_text; "meet@the beach"; end
49
+
50
+ it "should not be rewritten" do
51
+ expect(@block_args).to be nil
52
+ expect(@rewritten_text).to be == "meet@the beach"
53
+ end
54
+ end
55
+
56
+ context "username preceded by non-word character" do
57
+ def original_text; "great.@jacob"; end
58
+
59
+ it "should be rewritten" do
60
+ expect(@block_args).to be == ["@", "jacob", nil]
61
+ expect(@rewritten_text).to be == "great.[rewritten]"
62
+ end
63
+ end
64
+
65
+ context "username containing non-word characters" do
66
+ def original_text; "@jacob&^$%^"; end
67
+
68
+ it "should be rewritten" do
69
+ expect(@block_args).to be == ["@", "jacob", nil]
70
+ expect(@rewritten_text).to be == "[rewritten]&^$%^"
71
+ end
72
+ end
73
+
74
+ context "username over twenty characters" do
75
+ def original_text
76
+ @twenty_character_username = "zach" * 5
77
+ "@" + @twenty_character_username + "1"
78
+ end
79
+
80
+ it "should be rewritten" do
81
+ expect(@block_args).to be == ["@", @twenty_character_username, nil]
82
+ expect(@rewritten_text).to be == "[rewritten]1"
83
+ end
84
+ end
85
+
86
+ context "username followed by japanese" do
87
+ def original_text; "@jacobの"; end
88
+
89
+ it "should be rewritten" do
90
+ expect(@block_args).to be == ["@", "jacob", nil]
91
+ expect(@rewritten_text).to be == "[rewritten]の"
92
+ end
93
+ end
94
+
95
+ context "username preceded by japanese" do
96
+ def original_text; "あ@jacob"; end
97
+
98
+ it "should be rewritten" do
99
+ expect(@block_args).to be == ["@", "jacob", nil]
100
+ expect(@rewritten_text).to be == "あ[rewritten]"
101
+ end
102
+ end
103
+
104
+ context "username surrounded by japanese" do
105
+ def original_text; "あ@jacobの"; end
106
+
107
+ it "should be rewritten" do
108
+ expect(@block_args).to be == ["@", "jacob", nil]
109
+ expect(@rewritten_text).to be == "あ[rewritten]の"
110
+ end
111
+ end
112
+
113
+ context "username using full-width at-sign" do
114
+ def original_text
115
+ "#{[0xFF20].pack('U')}jacob"
116
+ end
117
+
118
+ it "should be rewritten" do
119
+ expect(@block_args).to be == ["@", "jacob", nil]
120
+ expect(@rewritten_text).to be == "[rewritten]"
121
+ end
122
+ end
123
+ end #}}}
124
+
125
+ describe "rewrite lists" do #{{{
126
+ before do
127
+ @rewritten_text = Twitter::TwitterText::Rewriter.rewrite_usernames_or_lists(original_text, &method(:block))
128
+ end
129
+
130
+ context "slug preceded by a space" do
131
+ def original_text; "hello @jacob/my-list"; end
132
+
133
+ it "should be rewritten" do
134
+ expect(@block_args).to be == ["@", "jacob", "/my-list"]
135
+ expect(@rewritten_text).to be == "hello [rewritten]"
136
+ end
137
+ end
138
+
139
+ context "username followed by a slash but no list" do
140
+ def original_text; "hello @jacob/ my-list"; end
141
+
142
+ it "should not be rewritten" do
143
+ expect(@block_args).to be == ["@", "jacob", nil]
144
+ expect(@rewritten_text).to be == "hello [rewritten]/ my-list"
145
+ end
146
+ end
147
+
148
+ context "empty username followed by a list" do
149
+ def original_text; "hello @/my-list"; end
150
+
151
+ it "should not be rewritten" do
152
+ expect(@block_args).to be nil
153
+ expect(@rewritten_text).to be == "hello @/my-list"
154
+ end
155
+ end
156
+
157
+ context "list slug at beginning of line" do
158
+ def original_text; "@jacob/my-list"; end
159
+
160
+ it "should be rewritten" do
161
+ expect(@block_args).to be == ["@", "jacob", "/my-list"]
162
+ expect(@rewritten_text).to be == "[rewritten]"
163
+ end
164
+ end
165
+
166
+ context "username preceded by alpha-numeric character" do
167
+ def original_text; "meet@jacob/my-list"; end
168
+
169
+ it "should not be rewritten" do
170
+ expect(@block_args).to be nil
171
+ expect(@rewritten_text).to be == "meet@jacob/my-list"
172
+ end
173
+ end
174
+
175
+ context "username preceded by non-word character" do
176
+ def original_text; "great.@jacob/my-list"; end
177
+
178
+ it "should be rewritten" do
179
+ expect(@block_args).to be == ["@", "jacob", "/my-list"]
180
+ expect(@rewritten_text).to be == "great.[rewritten]"
181
+ end
182
+ end
183
+
184
+ context "username containing non-word characters" do
185
+ def original_text; "@jacob/my-list&^$%^"; end
186
+
187
+ it "should be rewritten" do
188
+ expect(@block_args).to be == ["@", "jacob", "/my-list"]
189
+ expect(@rewritten_text).to be == "[rewritten]&^$%^"
190
+ end
191
+ end
192
+
193
+ context "username over twenty characters" do
194
+ def original_text
195
+ @twentyfive_character_list = "a" * 25
196
+ "@jacob/#{@twentyfive_character_list}12345"
197
+ end
198
+
199
+ it "should be rewritten" do
200
+ expect(@block_args).to be == ["@", "jacob", "/#{@twentyfive_character_list}"]
201
+ expect(@rewritten_text).to be == "[rewritten]12345"
202
+ end
203
+ end
204
+ end #}}}
205
+
206
+ describe "rewrite hashtags" do #{{{
207
+ before do
208
+ @rewritten_text = Twitter::TwitterText::Rewriter.rewrite_hashtags(original_text, &method(:block))
209
+ end
210
+
211
+ context "with an all numeric hashtag" do
212
+ def original_text; "#123"; end
213
+
214
+ it "should not be rewritten" do
215
+ expect(@block_args).to be nil
216
+ expect(@rewritten_text).to be == "#123"
217
+ end
218
+ end
219
+
220
+ context "with a hashtag with alphanumeric characters" do
221
+ def original_text; "#ab1d"; end
222
+
223
+ it "should be rewritten" do
224
+ expect(@block_args).to be == ["#", "ab1d"]
225
+ expect(@rewritten_text).to be == "[rewritten]"
226
+ end
227
+ end
228
+
229
+ context "with a hashtag with underscores" do
230
+ def original_text; "#a_b_c_d"; end
231
+
232
+ it "should be rewritten" do
233
+ expect(@block_args).to be == ["#", "a_b_c_d"]
234
+ expect(@rewritten_text).to be == "[rewritten]"
235
+ end
236
+ end
237
+
238
+ context "with a hashtag that is preceded by a word character" do
239
+ def original_text; "ab#cd"; end
240
+
241
+ it "should not be rewritten" do
242
+ expect(@block_args).to be nil
243
+ expect(@rewritten_text).to be == "ab#cd"
244
+ end
245
+ end
246
+
247
+ context "with a hashtag that starts with a number but has word characters" do
248
+ def original_text; "#2ab"; end
249
+
250
+ it "should be rewritten" do
251
+ expect(@block_args).to be == ["#", "2ab"]
252
+ expect(@rewritten_text).to be == "[rewritten]"
253
+ end
254
+ end
255
+
256
+ context "with multiple valid hashtags" do
257
+ def original_text; "I'm frickin' awesome #ab #cd #ef"; end
258
+
259
+ it "rewrites each hashtag" do
260
+ expect(@block_args).to be == [["#", "ab"], ["#", "cd"], ["#", "ef"]]
261
+ expect(@rewritten_text).to be == "I'm frickin' awesome [rewritten] [rewritten] [rewritten]"
262
+ end
263
+ end
264
+
265
+ context "with a hashtag preceded by a ." do
266
+ def original_text; "ok, great.#abc"; end
267
+
268
+ it "should be rewritten" do
269
+ expect(@block_args).to be == ["#", "abc"]
270
+ expect(@rewritten_text).to be == "ok, great.[rewritten]"
271
+ end
272
+ end
273
+
274
+ context "with a hashtag preceded by a &" do
275
+ def original_text; "&#nbsp;"; end
276
+
277
+ it "should not be rewritten" do
278
+ expect(@block_args).to be nil
279
+ expect(@rewritten_text).to be == "&#nbsp;"
280
+ end
281
+ end
282
+
283
+ context "with a hashtag that ends in an !" do
284
+ def original_text; "#great!"; end
285
+
286
+ it "should be rewritten, but should not include the !" do
287
+ expect(@block_args).to be == ["#", "great"];
288
+ expect(@rewritten_text).to be == "[rewritten]!"
289
+ end
290
+ end
291
+
292
+ context "with a hashtag followed by Japanese" do
293
+ def original_text; "#twj_devの"; end
294
+
295
+ it "should be rewritten" do
296
+ expect(@block_args).to be == ["#", "twj_devの"];
297
+ expect(@rewritten_text).to be == "[rewritten]"
298
+ end
299
+ end
300
+
301
+ context "with a hashtag preceded by a full-width space" do
302
+ def original_text; "#{[0x3000].pack('U')}#twj_dev"; end
303
+
304
+ it "should be rewritten" do
305
+ expect(@block_args).to be == ["#", "twj_dev"];
306
+ expect(@rewritten_text).to be == " [rewritten]"
307
+ end
308
+ end
309
+
310
+ context "with a hashtag followed by a full-width space" do
311
+ def original_text; "#twj_dev#{[0x3000].pack('U')}"; end
312
+
313
+ it "should be rewritten" do
314
+ expect(@block_args).to be == ["#", "twj_dev"];
315
+ expect(@rewritten_text).to be == "[rewritten] "
316
+ end
317
+ end
318
+
319
+ context "with a hashtag using full-width hash" do
320
+ def original_text; "#{[0xFF03].pack('U')}twj_dev"; end
321
+
322
+ it "should be rewritten" do
323
+ expect(@block_args).to be == ["#", "twj_dev"];
324
+ expect(@rewritten_text).to be == "[rewritten]"
325
+ end
326
+ end
327
+
328
+ context "with a hashtag containing an accented latin character" do
329
+ def original_text
330
+ # the hashtag is #éhashtag
331
+ "##{[0x00e9].pack('U')}hashtag"
332
+ end
333
+
334
+ it "should be rewritten" do
335
+ expect(@block_args).to be == ["#", "éhashtag"];
336
+ expect(@rewritten_text).to be == "[rewritten]"
337
+ end
338
+ end
339
+ end #}}}
340
+
341
+ describe "rewrite urls" do #{{{
342
+ def url; "http://www.google.com"; end
343
+
344
+ before do
345
+ @rewritten_text = Twitter::TwitterText::Rewriter.rewrite_urls(original_text, &method(:block))
346
+ end
347
+
348
+ context "when embedded in plain text" do
349
+ def original_text; "On my search engine #{url} I found good links."; end
350
+
351
+ it "should be rewritten" do
352
+ expect(@block_args).to be == [url];
353
+ expect(@rewritten_text).to be == "On my search engine [rewritten] I found good links."
354
+ end
355
+ end
356
+
357
+ context "when surrounded by Japanese;" do
358
+ def original_text; "いまなにしてる#{url}いまなにしてる"; end
359
+
360
+ it "should be rewritten" do
361
+ expect(@block_args).to be == [url];
362
+ expect(@rewritten_text).to be == "いまなにしてる[rewritten]いまなにしてる"
363
+ end
364
+ end
365
+
366
+ context "with a path surrounded by parentheses;" do
367
+ def original_text; "I found a neatness (#{url})"; end
368
+
369
+ it "should be rewritten" do
370
+ expect(@block_args).to be == [url];
371
+ expect(@rewritten_text).to be == "I found a neatness ([rewritten])"
372
+ end
373
+
374
+ context "when the URL ends with a slash;" do
375
+ def url; "http://www.google.com/"; end
376
+
377
+ it "should be rewritten" do
378
+ expect(@block_args).to be == [url];
379
+ expect(@rewritten_text).to be == "I found a neatness ([rewritten])"
380
+ end
381
+ end
382
+
383
+ context "when the URL has a path;" do
384
+ def url; "http://www.google.com/fsdfasdf"; end
385
+
386
+ it "should be rewritten" do
387
+ expect(@block_args).to be == [url];
388
+ expect(@rewritten_text).to be == "I found a neatness ([rewritten])"
389
+ end
390
+ end
391
+ end
392
+
393
+ context "when path contains parens" do
394
+ def original_text; "I found a neatness (#{url})"; end
395
+
396
+ it "should be rewritten" do
397
+ expect(@block_args).to be == [url];
398
+ expect(@rewritten_text).to be == "I found a neatness ([rewritten])"
399
+ end
400
+
401
+ context "wikipedia" do
402
+ def url; "http://en.wikipedia.org/wiki/Madonna_(artist)"; end
403
+
404
+ it "should be rewritten" do
405
+ expect(@block_args).to be == [url];
406
+ expect(@rewritten_text).to be == "I found a neatness ([rewritten])"
407
+ end
408
+ end
409
+
410
+ context "IIS session" do
411
+ def url; "http://msdn.com/S(deadbeef)/page.htm"; end
412
+
413
+ it "should be rewritten" do
414
+ expect(@block_args).to be == [url];
415
+ expect(@rewritten_text).to be == "I found a neatness ([rewritten])"
416
+ end
417
+ end
418
+
419
+ context "unbalanced parens" do
420
+ def url; "http://example.com/i_has_a_("; end
421
+
422
+ it "should be rewritten" do
423
+ expect(@block_args).to be == ["http://example.com/i_has_a_"];
424
+ expect(@rewritten_text).to be == "I found a neatness ([rewritten]()"
425
+ end
426
+ end
427
+
428
+ context "balanced parens with a double quote inside" do
429
+ def url; "http://foo.bar.com/foo_(\")_bar" end
430
+
431
+ it "should be rewritten" do
432
+ expect(@block_args).to be == ["http://foo.bar.com/foo_"];
433
+ expect(@rewritten_text).to be == "I found a neatness ([rewritten](\")_bar)"
434
+ end
435
+ end
436
+
437
+ context "balanced parens hiding XSS" do
438
+ def url; 'http://x.xx.com/("style="color:red"onmouseover="alert(1)' end
439
+
440
+ it "should be rewritten" do
441
+ expect(@block_args).to be == ["http://x.xx.com/"];
442
+ expect(@rewritten_text).to be == 'I found a neatness ([rewritten]("style="color:red"onmouseover="alert(1))'
443
+ end
444
+ end
445
+ end
446
+
447
+ context "when preceded by a :" do
448
+ def original_text; "Check this out @hoverbird:#{url}"; end
449
+
450
+ it "should be rewritten" do
451
+ expect(@block_args).to be == [url];
452
+ expect(@rewritten_text).to be == "Check this out @hoverbird:[rewritten]"
453
+ end
454
+ end
455
+
456
+ context "with a URL ending in allowed punctuation" do
457
+ it "does not consume ending punctuation" do
458
+ %w| ? ! , . : ; ] ) } = \ ' |.each do |char|
459
+ expect(Twitter::TwitterText::Rewriter.rewrite_urls("#{url}#{char}") do |url|
460
+ expect(url).to be == url
461
+ "[rewritten]"
462
+ end).to be == "[rewritten]#{char}"
463
+ end
464
+ end
465
+ end
466
+
467
+ context "with a URL preceded in forbidden characters" do
468
+ it "should be rewritten" do
469
+ %w| \ ' / ! = |.each do |char|
470
+ expect(Twitter::TwitterText::Rewriter.rewrite_urls("#{char}#{url}") do |url|
471
+ "[rewritten]" # should not be called here.
472
+ end).to be == "#{char}[rewritten]"
473
+ end
474
+ end
475
+ end
476
+
477
+ context "when embedded in a link tag" do
478
+ def original_text; "<link rel='true'>#{url}</link>"; end
479
+
480
+ it "should be rewritten" do
481
+ expect(@block_args).to be == [url];
482
+ expect(@rewritten_text).to be == "<link rel='true'>[rewritten]</link>"
483
+ end
484
+ end
485
+
486
+ context "with multiple URLs" do
487
+ def original_text; "http://www.links.org link at start of page, link at end http://www.foo.org"; end
488
+
489
+ it "should autolink each one" do
490
+ expect(@block_args).to be == [["http://www.links.org"], ["http://www.foo.org"]];
491
+ expect(@rewritten_text).to be == "[rewritten] link at start of page, link at end [rewritten]"
492
+ end
493
+ end
494
+
495
+ context "with multiple URLs in different formats" do
496
+ def original_text; "http://foo.com https://bar.com http://mail.foobar.org"; end
497
+
498
+ it "should autolink each one, in the proper order" do
499
+ expect(@block_args).to be == [["http://foo.com"], ["https://bar.com"], ["http://mail.foobar.org"]];
500
+ expect(@rewritten_text).to be == "[rewritten] [rewritten] [rewritten]"
501
+ end
502
+ end
503
+
504
+ context "with a URL having a long TLD" do
505
+ def original_text; "Yahoo integriert Facebook http://golem.mobi/0912/71607.html"; end
506
+
507
+ it "should autolink it" do
508
+ expect(@block_args).to be == ["http://golem.mobi/0912/71607.html"]
509
+ expect(@rewritten_text).to be == "Yahoo integriert Facebook [rewritten]"
510
+ end
511
+ end
512
+
513
+ context "with a url lacking the protocol" do
514
+ def original_text; "I like www.foobar.com dudes"; end
515
+
516
+ it "does not link at all" do
517
+ expect(@block_args).to be nil
518
+ expect(@rewritten_text).to be == "I like www.foobar.com dudes"
519
+ end
520
+ end
521
+
522
+ context "with a @ in a URL" do
523
+ context "with XSS attack" do
524
+ def original_text; 'http://x.xx.com/@"style="color:pink"onmouseover=alert(1)//'; end
525
+
526
+ it "should not allow XSS follwing @" do
527
+ expect(@block_args).to be == ["http://x.xx.com/"]
528
+ expect(@rewritten_text).to be == '[rewritten]@"style="color:pink"onmouseover=alert(1)//'
529
+ end
530
+ end
531
+
532
+ context "with a username not followed by a /" do
533
+ def original_text; "http://example.com/@foobar"; end
534
+
535
+ it "should link url" do
536
+ expect(@block_args).to be == ["http://example.com/@foobar"]
537
+ expect(@rewritten_text).to be == "[rewritten]"
538
+ end
539
+ end
540
+
541
+ context "with a username followed by a /" do
542
+ def original_text; "http://example.com/@foobar/"; end
543
+
544
+ it "should not link the username but link full url" do
545
+ expect(@block_args).to be == ["http://example.com/@foobar/"]
546
+ expect(@rewritten_text).to be == "[rewritten]"
547
+ end
548
+ end
549
+ end
550
+ end #}}}
551
+ end
552
+
553
+ # vim: foldmethod=marker