twitter-text-kow 1.3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,76 @@
1
+ # Copyright 2018 Twitter, Inc.
2
+ # Licensed under the Apache License, Version 2.0
3
+ # http://www.apache.org/licenses/LICENSE-2.0
4
+
5
+ # encoding: utf-8
6
+ require File.dirname(__FILE__) + '/spec_helper'
7
+
8
+ describe "Twitter::TwitterText::Regex regular expressions" do
9
+ describe "matching URLS" do
10
+ TestUrls::VALID.each do |url|
11
+ it "should match the URL #{url}" do
12
+ expect(url).to match_autolink_expression
13
+ end
14
+
15
+ it "should match the URL #{url} when it's embedded in other text" do
16
+ text = "Sweet url: #{url} I found. #awesome"
17
+ expect(url).to match_autolink_expression_in(text)
18
+ end
19
+ end
20
+ end
21
+
22
+ describe "invalid URLS" do
23
+ it "does not link urls with invalid characters" do
24
+ TestUrls::INVALID.each {|url| expect(url).to_not match_autolink_expression}
25
+ end
26
+ end
27
+
28
+ describe "matching List names" do
29
+ it "should match if less than 25 characters" do
30
+ name = "Shuffleboard Community"
31
+ expect(name.length).to be < 25
32
+ expect(name).to match(Twitter::TwitterText::Regex::REGEXEN[:list_name])
33
+ end
34
+
35
+ it "should not match if greater than 25 characters" do
36
+ name = "Most Glorious Shady Meadows Shuffleboard Community"
37
+ expect(name.length).to be > 25
38
+ expect(name).to match(Twitter::TwitterText::Regex[:list_name])
39
+ end
40
+
41
+ end
42
+
43
+ describe "matching Unicode 10.0 emoji" do
44
+ it "should match new emoji" do
45
+ input = "Unicode 10.0; grinning face with one large and one small eye: 🤪; woman with headscarf: 🧕; (fitzpatrick) woman with headscarf + medium-dark skin tone: 🧕🏾; flag (England): 🏴󠁧󠁢󠁥󠁮󠁧󠁿"
46
+ expected = ["🤪", "🧕", "🧕🏾", "🏴󠁧󠁢󠁥󠁮󠁧󠁿"]
47
+ entities = Twitter::TwitterText::Extractor.extract_emoji_with_indices(input)
48
+ entities.each_with_index do |entity, i|
49
+ expect(entity[:emoji]).to be_kind_of(String)
50
+ expect(entity[:indices]).to be_kind_of(Array)
51
+ entity[:indices].each do |position|
52
+ expect(position).to be_kind_of(Integer)
53
+ end
54
+ expect(entity[:emoji]).to be == expected[i]
55
+ expect(Twitter::TwitterText::Extractor.is_valid_emoji(entity[:emoji])).to be true
56
+ end
57
+ end
58
+ end
59
+
60
+ describe "matching Unicode 9.0 emoji" do
61
+ it "should match new emoji" do
62
+ input = "Unicode 9.0; face with cowboy hat: 🤠; woman dancing: 💃, woman dancing + medium-dark skin tone: 💃🏾"
63
+ expected = ["🤠", "💃", "💃🏾"]
64
+ entities = Twitter::TwitterText::Extractor.extract_emoji_with_indices(input)
65
+ entities.each_with_index do |entity, i|
66
+ expect(entity[:emoji]).to be_kind_of(String)
67
+ expect(entity[:indices]).to be_kind_of(Array)
68
+ entity[:indices].each do |position|
69
+ expect(position).to be_kind_of(Integer)
70
+ end
71
+ expect(entity[:emoji]).to be == expected[i]
72
+ expect(Twitter::TwitterText::Extractor.is_valid_emoji(entity[:emoji])).to be true
73
+ end
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,553 @@
1
+ # Copyright 2018 Twitter, Inc.
2
+ # Licensed under the Apache License, Version 2.0
3
+ # http://www.apache.org/licenses/LICENSE-2.0
4
+
5
+ # encoding: utf-8
6
+ require File.dirname(__FILE__) + '/spec_helper'
7
+
8
+ describe Twitter::TwitterText::Rewriter do
9
+ def original_text; end
10
+ def url; end
11
+
12
+ def block(*args)
13
+ if Array === @block_args
14
+ unless Array === @block_args.first
15
+ @block_args = [@block_args]
16
+ end
17
+ @block_args << args
18
+ else
19
+ @block_args = args
20
+ end
21
+ "[rewritten]"
22
+ end
23
+
24
+ describe "rewrite usernames" do #{{{
25
+ before do
26
+ @rewritten_text = Twitter::TwitterText::Rewriter.rewrite_usernames_or_lists(original_text, &method(:block))
27
+ end
28
+
29
+ context "username preceded by a space" do
30
+ def original_text; "hello @jacob"; end
31
+
32
+ it "should be rewritten" do
33
+ expect(@block_args).to be == ["@", "jacob", nil]
34
+ expect(@rewritten_text).to be == "hello [rewritten]"
35
+ end
36
+ end
37
+
38
+ context "username at beginning of line" do
39
+ def original_text; "@jacob you're cool"; end
40
+
41
+ it "should be rewritten" do
42
+ expect(@block_args).to be == ["@", "jacob", nil]
43
+ expect(@rewritten_text).to be == "[rewritten] you're cool"
44
+ end
45
+ end
46
+
47
+ context "username preceded by word character" do
48
+ def original_text; "meet@the beach"; end
49
+
50
+ it "should not be rewritten" do
51
+ expect(@block_args).to be nil
52
+ expect(@rewritten_text).to be == "meet@the beach"
53
+ end
54
+ end
55
+
56
+ context "username preceded by non-word character" do
57
+ def original_text; "great.@jacob"; end
58
+
59
+ it "should be rewritten" do
60
+ expect(@block_args).to be == ["@", "jacob", nil]
61
+ expect(@rewritten_text).to be == "great.[rewritten]"
62
+ end
63
+ end
64
+
65
+ context "username containing non-word characters" do
66
+ def original_text; "@jacob&^$%^"; end
67
+
68
+ it "should be rewritten" do
69
+ expect(@block_args).to be == ["@", "jacob", nil]
70
+ expect(@rewritten_text).to be == "[rewritten]&^$%^"
71
+ end
72
+ end
73
+
74
+ context "username over twenty characters" do
75
+ def original_text
76
+ @twenty_character_username = "zach" * 5
77
+ "@" + @twenty_character_username + "1"
78
+ end
79
+
80
+ it "should be rewritten" do
81
+ expect(@block_args).to be == ["@", @twenty_character_username, nil]
82
+ expect(@rewritten_text).to be == "[rewritten]1"
83
+ end
84
+ end
85
+
86
+ context "username followed by japanese" do
87
+ def original_text; "@jacobの"; end
88
+
89
+ it "should be rewritten" do
90
+ expect(@block_args).to be == ["@", "jacob", nil]
91
+ expect(@rewritten_text).to be == "[rewritten]の"
92
+ end
93
+ end
94
+
95
+ context "username preceded by japanese" do
96
+ def original_text; "あ@jacob"; end
97
+
98
+ it "should be rewritten" do
99
+ expect(@block_args).to be == ["@", "jacob", nil]
100
+ expect(@rewritten_text).to be == "あ[rewritten]"
101
+ end
102
+ end
103
+
104
+ context "username surrounded by japanese" do
105
+ def original_text; "あ@jacobの"; end
106
+
107
+ it "should be rewritten" do
108
+ expect(@block_args).to be == ["@", "jacob", nil]
109
+ expect(@rewritten_text).to be == "あ[rewritten]の"
110
+ end
111
+ end
112
+
113
+ context "username using full-width at-sign" do
114
+ def original_text
115
+ "#{[0xFF20].pack('U')}jacob"
116
+ end
117
+
118
+ it "should be rewritten" do
119
+ expect(@block_args).to be == ["@", "jacob", nil]
120
+ expect(@rewritten_text).to be == "[rewritten]"
121
+ end
122
+ end
123
+ end #}}}
124
+
125
+ describe "rewrite lists" do #{{{
126
+ before do
127
+ @rewritten_text = Twitter::TwitterText::Rewriter.rewrite_usernames_or_lists(original_text, &method(:block))
128
+ end
129
+
130
+ context "slug preceded by a space" do
131
+ def original_text; "hello @jacob/my-list"; end
132
+
133
+ it "should be rewritten" do
134
+ expect(@block_args).to be == ["@", "jacob", "/my-list"]
135
+ expect(@rewritten_text).to be == "hello [rewritten]"
136
+ end
137
+ end
138
+
139
+ context "username followed by a slash but no list" do
140
+ def original_text; "hello @jacob/ my-list"; end
141
+
142
+ it "should not be rewritten" do
143
+ expect(@block_args).to be == ["@", "jacob", nil]
144
+ expect(@rewritten_text).to be == "hello [rewritten]/ my-list"
145
+ end
146
+ end
147
+
148
+ context "empty username followed by a list" do
149
+ def original_text; "hello @/my-list"; end
150
+
151
+ it "should not be rewritten" do
152
+ expect(@block_args).to be nil
153
+ expect(@rewritten_text).to be == "hello @/my-list"
154
+ end
155
+ end
156
+
157
+ context "list slug at beginning of line" do
158
+ def original_text; "@jacob/my-list"; end
159
+
160
+ it "should be rewritten" do
161
+ expect(@block_args).to be == ["@", "jacob", "/my-list"]
162
+ expect(@rewritten_text).to be == "[rewritten]"
163
+ end
164
+ end
165
+
166
+ context "username preceded by alpha-numeric character" do
167
+ def original_text; "meet@jacob/my-list"; end
168
+
169
+ it "should not be rewritten" do
170
+ expect(@block_args).to be nil
171
+ expect(@rewritten_text).to be == "meet@jacob/my-list"
172
+ end
173
+ end
174
+
175
+ context "username preceded by non-word character" do
176
+ def original_text; "great.@jacob/my-list"; end
177
+
178
+ it "should be rewritten" do
179
+ expect(@block_args).to be == ["@", "jacob", "/my-list"]
180
+ expect(@rewritten_text).to be == "great.[rewritten]"
181
+ end
182
+ end
183
+
184
+ context "username containing non-word characters" do
185
+ def original_text; "@jacob/my-list&^$%^"; end
186
+
187
+ it "should be rewritten" do
188
+ expect(@block_args).to be == ["@", "jacob", "/my-list"]
189
+ expect(@rewritten_text).to be == "[rewritten]&^$%^"
190
+ end
191
+ end
192
+
193
+ context "username over twenty characters" do
194
+ def original_text
195
+ @twentyfive_character_list = "a" * 25
196
+ "@jacob/#{@twentyfive_character_list}12345"
197
+ end
198
+
199
+ it "should be rewritten" do
200
+ expect(@block_args).to be == ["@", "jacob", "/#{@twentyfive_character_list}"]
201
+ expect(@rewritten_text).to be == "[rewritten]12345"
202
+ end
203
+ end
204
+ end #}}}
205
+
206
+ describe "rewrite hashtags" do #{{{
207
+ before do
208
+ @rewritten_text = Twitter::TwitterText::Rewriter.rewrite_hashtags(original_text, &method(:block))
209
+ end
210
+
211
+ context "with an all numeric hashtag" do
212
+ def original_text; "#123"; end
213
+
214
+ it "should not be rewritten" do
215
+ expect(@block_args).to be nil
216
+ expect(@rewritten_text).to be == "#123"
217
+ end
218
+ end
219
+
220
+ context "with a hashtag with alphanumeric characters" do
221
+ def original_text; "#ab1d"; end
222
+
223
+ it "should be rewritten" do
224
+ expect(@block_args).to be == ["#", "ab1d"]
225
+ expect(@rewritten_text).to be == "[rewritten]"
226
+ end
227
+ end
228
+
229
+ context "with a hashtag with underscores" do
230
+ def original_text; "#a_b_c_d"; end
231
+
232
+ it "should be rewritten" do
233
+ expect(@block_args).to be == ["#", "a_b_c_d"]
234
+ expect(@rewritten_text).to be == "[rewritten]"
235
+ end
236
+ end
237
+
238
+ context "with a hashtag that is preceded by a word character" do
239
+ def original_text; "ab#cd"; end
240
+
241
+ it "should not be rewritten" do
242
+ expect(@block_args).to be nil
243
+ expect(@rewritten_text).to be == "ab#cd"
244
+ end
245
+ end
246
+
247
+ context "with a hashtag that starts with a number but has word characters" do
248
+ def original_text; "#2ab"; end
249
+
250
+ it "should be rewritten" do
251
+ expect(@block_args).to be == ["#", "2ab"]
252
+ expect(@rewritten_text).to be == "[rewritten]"
253
+ end
254
+ end
255
+
256
+ context "with multiple valid hashtags" do
257
+ def original_text; "I'm frickin' awesome #ab #cd #ef"; end
258
+
259
+ it "rewrites each hashtag" do
260
+ expect(@block_args).to be == [["#", "ab"], ["#", "cd"], ["#", "ef"]]
261
+ expect(@rewritten_text).to be == "I'm frickin' awesome [rewritten] [rewritten] [rewritten]"
262
+ end
263
+ end
264
+
265
+ context "with a hashtag preceded by a ." do
266
+ def original_text; "ok, great.#abc"; end
267
+
268
+ it "should be rewritten" do
269
+ expect(@block_args).to be == ["#", "abc"]
270
+ expect(@rewritten_text).to be == "ok, great.[rewritten]"
271
+ end
272
+ end
273
+
274
+ context "with a hashtag preceded by a &" do
275
+ def original_text; "&#nbsp;"; end
276
+
277
+ it "should not be rewritten" do
278
+ expect(@block_args).to be nil
279
+ expect(@rewritten_text).to be == "&#nbsp;"
280
+ end
281
+ end
282
+
283
+ context "with a hashtag that ends in an !" do
284
+ def original_text; "#great!"; end
285
+
286
+ it "should be rewritten, but should not include the !" do
287
+ expect(@block_args).to be == ["#", "great"];
288
+ expect(@rewritten_text).to be == "[rewritten]!"
289
+ end
290
+ end
291
+
292
+ context "with a hashtag followed by Japanese" do
293
+ def original_text; "#twj_devの"; end
294
+
295
+ it "should be rewritten" do
296
+ expect(@block_args).to be == ["#", "twj_devの"];
297
+ expect(@rewritten_text).to be == "[rewritten]"
298
+ end
299
+ end
300
+
301
+ context "with a hashtag preceded by a full-width space" do
302
+ def original_text; "#{[0x3000].pack('U')}#twj_dev"; end
303
+
304
+ it "should be rewritten" do
305
+ expect(@block_args).to be == ["#", "twj_dev"];
306
+ expect(@rewritten_text).to be == " [rewritten]"
307
+ end
308
+ end
309
+
310
+ context "with a hashtag followed by a full-width space" do
311
+ def original_text; "#twj_dev#{[0x3000].pack('U')}"; end
312
+
313
+ it "should be rewritten" do
314
+ expect(@block_args).to be == ["#", "twj_dev"];
315
+ expect(@rewritten_text).to be == "[rewritten] "
316
+ end
317
+ end
318
+
319
+ context "with a hashtag using full-width hash" do
320
+ def original_text; "#{[0xFF03].pack('U')}twj_dev"; end
321
+
322
+ it "should be rewritten" do
323
+ expect(@block_args).to be == ["#", "twj_dev"];
324
+ expect(@rewritten_text).to be == "[rewritten]"
325
+ end
326
+ end
327
+
328
+ context "with a hashtag containing an accented latin character" do
329
+ def original_text
330
+ # the hashtag is #éhashtag
331
+ "##{[0x00e9].pack('U')}hashtag"
332
+ end
333
+
334
+ it "should be rewritten" do
335
+ expect(@block_args).to be == ["#", "éhashtag"];
336
+ expect(@rewritten_text).to be == "[rewritten]"
337
+ end
338
+ end
339
+ end #}}}
340
+
341
+ describe "rewrite urls" do #{{{
342
+ def url; "http://www.google.com"; end
343
+
344
+ before do
345
+ @rewritten_text = Twitter::TwitterText::Rewriter.rewrite_urls(original_text, &method(:block))
346
+ end
347
+
348
+ context "when embedded in plain text" do
349
+ def original_text; "On my search engine #{url} I found good links."; end
350
+
351
+ it "should be rewritten" do
352
+ expect(@block_args).to be == [url];
353
+ expect(@rewritten_text).to be == "On my search engine [rewritten] I found good links."
354
+ end
355
+ end
356
+
357
+ context "when surrounded by Japanese;" do
358
+ def original_text; "いまなにしてる#{url}いまなにしてる"; end
359
+
360
+ it "should be rewritten" do
361
+ expect(@block_args).to be == [url];
362
+ expect(@rewritten_text).to be == "いまなにしてる[rewritten]いまなにしてる"
363
+ end
364
+ end
365
+
366
+ context "with a path surrounded by parentheses;" do
367
+ def original_text; "I found a neatness (#{url})"; end
368
+
369
+ it "should be rewritten" do
370
+ expect(@block_args).to be == [url];
371
+ expect(@rewritten_text).to be == "I found a neatness ([rewritten])"
372
+ end
373
+
374
+ context "when the URL ends with a slash;" do
375
+ def url; "http://www.google.com/"; end
376
+
377
+ it "should be rewritten" do
378
+ expect(@block_args).to be == [url];
379
+ expect(@rewritten_text).to be == "I found a neatness ([rewritten])"
380
+ end
381
+ end
382
+
383
+ context "when the URL has a path;" do
384
+ def url; "http://www.google.com/fsdfasdf"; end
385
+
386
+ it "should be rewritten" do
387
+ expect(@block_args).to be == [url];
388
+ expect(@rewritten_text).to be == "I found a neatness ([rewritten])"
389
+ end
390
+ end
391
+ end
392
+
393
+ context "when path contains parens" do
394
+ def original_text; "I found a neatness (#{url})"; end
395
+
396
+ it "should be rewritten" do
397
+ expect(@block_args).to be == [url];
398
+ expect(@rewritten_text).to be == "I found a neatness ([rewritten])"
399
+ end
400
+
401
+ context "wikipedia" do
402
+ def url; "http://en.wikipedia.org/wiki/Madonna_(artist)"; end
403
+
404
+ it "should be rewritten" do
405
+ expect(@block_args).to be == [url];
406
+ expect(@rewritten_text).to be == "I found a neatness ([rewritten])"
407
+ end
408
+ end
409
+
410
+ context "IIS session" do
411
+ def url; "http://msdn.com/S(deadbeef)/page.htm"; end
412
+
413
+ it "should be rewritten" do
414
+ expect(@block_args).to be == [url];
415
+ expect(@rewritten_text).to be == "I found a neatness ([rewritten])"
416
+ end
417
+ end
418
+
419
+ context "unbalanced parens" do
420
+ def url; "http://example.com/i_has_a_("; end
421
+
422
+ it "should be rewritten" do
423
+ expect(@block_args).to be == ["http://example.com/i_has_a_"];
424
+ expect(@rewritten_text).to be == "I found a neatness ([rewritten]()"
425
+ end
426
+ end
427
+
428
+ context "balanced parens with a double quote inside" do
429
+ def url; "http://foo.bar.com/foo_(\")_bar" end
430
+
431
+ it "should be rewritten" do
432
+ expect(@block_args).to be == ["http://foo.bar.com/foo_"];
433
+ expect(@rewritten_text).to be == "I found a neatness ([rewritten](\")_bar)"
434
+ end
435
+ end
436
+
437
+ context "balanced parens hiding XSS" do
438
+ def url; 'http://x.xx.com/("style="color:red"onmouseover="alert(1)' end
439
+
440
+ it "should be rewritten" do
441
+ expect(@block_args).to be == ["http://x.xx.com/"];
442
+ expect(@rewritten_text).to be == 'I found a neatness ([rewritten]("style="color:red"onmouseover="alert(1))'
443
+ end
444
+ end
445
+ end
446
+
447
+ context "when preceded by a :" do
448
+ def original_text; "Check this out @hoverbird:#{url}"; end
449
+
450
+ it "should be rewritten" do
451
+ expect(@block_args).to be == [url];
452
+ expect(@rewritten_text).to be == "Check this out @hoverbird:[rewritten]"
453
+ end
454
+ end
455
+
456
+ context "with a URL ending in allowed punctuation" do
457
+ it "does not consume ending punctuation" do
458
+ %w| ? ! , . : ; ] ) } = \ ' |.each do |char|
459
+ expect(Twitter::TwitterText::Rewriter.rewrite_urls("#{url}#{char}") do |url|
460
+ expect(url).to be == url
461
+ "[rewritten]"
462
+ end).to be == "[rewritten]#{char}"
463
+ end
464
+ end
465
+ end
466
+
467
+ context "with a URL preceded in forbidden characters" do
468
+ it "should be rewritten" do
469
+ %w| \ ' / ! = |.each do |char|
470
+ expect(Twitter::TwitterText::Rewriter.rewrite_urls("#{char}#{url}") do |url|
471
+ "[rewritten]" # should not be called here.
472
+ end).to be == "#{char}[rewritten]"
473
+ end
474
+ end
475
+ end
476
+
477
+ context "when embedded in a link tag" do
478
+ def original_text; "<link rel='true'>#{url}</link>"; end
479
+
480
+ it "should be rewritten" do
481
+ expect(@block_args).to be == [url];
482
+ expect(@rewritten_text).to be == "<link rel='true'>[rewritten]</link>"
483
+ end
484
+ end
485
+
486
+ context "with multiple URLs" do
487
+ def original_text; "http://www.links.org link at start of page, link at end http://www.foo.org"; end
488
+
489
+ it "should autolink each one" do
490
+ expect(@block_args).to be == [["http://www.links.org"], ["http://www.foo.org"]];
491
+ expect(@rewritten_text).to be == "[rewritten] link at start of page, link at end [rewritten]"
492
+ end
493
+ end
494
+
495
+ context "with multiple URLs in different formats" do
496
+ def original_text; "http://foo.com https://bar.com http://mail.foobar.org"; end
497
+
498
+ it "should autolink each one, in the proper order" do
499
+ expect(@block_args).to be == [["http://foo.com"], ["https://bar.com"], ["http://mail.foobar.org"]];
500
+ expect(@rewritten_text).to be == "[rewritten] [rewritten] [rewritten]"
501
+ end
502
+ end
503
+
504
+ context "with a URL having a long TLD" do
505
+ def original_text; "Yahoo integriert Facebook http://golem.mobi/0912/71607.html"; end
506
+
507
+ it "should autolink it" do
508
+ expect(@block_args).to be == ["http://golem.mobi/0912/71607.html"]
509
+ expect(@rewritten_text).to be == "Yahoo integriert Facebook [rewritten]"
510
+ end
511
+ end
512
+
513
+ context "with a url lacking the protocol" do
514
+ def original_text; "I like www.foobar.com dudes"; end
515
+
516
+ it "does not link at all" do
517
+ expect(@block_args).to be nil
518
+ expect(@rewritten_text).to be == "I like www.foobar.com dudes"
519
+ end
520
+ end
521
+
522
+ context "with a @ in a URL" do
523
+ context "with XSS attack" do
524
+ def original_text; 'http://x.xx.com/@"style="color:pink"onmouseover=alert(1)//'; end
525
+
526
+ it "should not allow XSS follwing @" do
527
+ expect(@block_args).to be == ["http://x.xx.com/"]
528
+ expect(@rewritten_text).to be == '[rewritten]@"style="color:pink"onmouseover=alert(1)//'
529
+ end
530
+ end
531
+
532
+ context "with a username not followed by a /" do
533
+ def original_text; "http://example.com/@foobar"; end
534
+
535
+ it "should link url" do
536
+ expect(@block_args).to be == ["http://example.com/@foobar"]
537
+ expect(@rewritten_text).to be == "[rewritten]"
538
+ end
539
+ end
540
+
541
+ context "with a username followed by a /" do
542
+ def original_text; "http://example.com/@foobar/"; end
543
+
544
+ it "should not link the username but link full url" do
545
+ expect(@block_args).to be == ["http://example.com/@foobar/"]
546
+ expect(@rewritten_text).to be == "[rewritten]"
547
+ end
548
+ end
549
+ end
550
+ end #}}}
551
+ end
552
+
553
+ # vim: foldmethod=marker