twitter-text-simpleidn 3.0.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gemtest +0 -0
- data/.gitignore +40 -0
- data/.gitmodules +3 -0
- data/.rspec +2 -0
- data/CHANGELOG.md +35 -0
- data/Gemfile +4 -0
- data/LICENSE +188 -0
- data/README.md +193 -0
- data/Rakefile +52 -0
- data/config/README.md +142 -0
- data/config/v1.json +8 -0
- data/config/v2.json +29 -0
- data/config/v3.json +30 -0
- data/lib/assets/tld_lib.yml +1571 -0
- data/lib/twitter-text.rb +29 -0
- data/lib/twitter-text/autolink.rb +453 -0
- data/lib/twitter-text/configuration.rb +68 -0
- data/lib/twitter-text/deprecation.rb +21 -0
- data/lib/twitter-text/emoji_regex.rb +27 -0
- data/lib/twitter-text/extractor.rb +388 -0
- data/lib/twitter-text/hash_helper.rb +27 -0
- data/lib/twitter-text/hit_highlighter.rb +92 -0
- data/lib/twitter-text/regex.rb +381 -0
- data/lib/twitter-text/rewriter.rb +69 -0
- data/lib/twitter-text/unicode.rb +31 -0
- data/lib/twitter-text/validation.rb +251 -0
- data/lib/twitter-text/weighted_range.rb +24 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/spec/autolinking_spec.rb +848 -0
- data/spec/configuration_spec.rb +136 -0
- data/spec/extractor_spec.rb +392 -0
- data/spec/hithighlighter_spec.rb +96 -0
- data/spec/regex_spec.rb +76 -0
- data/spec/rewriter_spec.rb +553 -0
- data/spec/spec_helper.rb +139 -0
- data/spec/test_urls.rb +90 -0
- data/spec/twitter_text_spec.rb +25 -0
- data/spec/unicode_spec.rb +35 -0
- data/spec/validation_spec.rb +87 -0
- data/test/conformance_test.rb +242 -0
- data/twitter-text.gemspec +35 -0
- metadata +229 -0
@@ -0,0 +1,96 @@
|
|
1
|
+
# Copyright 2018 Twitter, Inc.
|
2
|
+
# Licensed under the Apache License, Version 2.0
|
3
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
4
|
+
|
5
|
+
# encoding: utf-8
|
6
|
+
require File.dirname(__FILE__) + '/spec_helper'
|
7
|
+
|
8
|
+
class TestHitHighlighter
|
9
|
+
include Twitter::TwitterText::HitHighlighter
|
10
|
+
end
|
11
|
+
|
12
|
+
describe Twitter::TwitterText::HitHighlighter do
|
13
|
+
describe "highlight" do
|
14
|
+
before do
|
15
|
+
@highlighter = TestHitHighlighter.new
|
16
|
+
end
|
17
|
+
|
18
|
+
context "with options" do
|
19
|
+
before do
|
20
|
+
@original = "Testing this hit highliter"
|
21
|
+
@hits = [[13,16]]
|
22
|
+
end
|
23
|
+
|
24
|
+
it "should default to <em> tags" do
|
25
|
+
expect(@highlighter.hit_highlight(@original, @hits)).to be == "Testing this <em>hit</em> highliter"
|
26
|
+
end
|
27
|
+
|
28
|
+
it "should allow tag override" do
|
29
|
+
expect(@highlighter.hit_highlight(@original, @hits, :tag => 'b')).to be == "Testing this <b>hit</b> highliter"
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
context "without links" do
|
34
|
+
before do
|
35
|
+
@original = "Hey! this is a test tweet"
|
36
|
+
end
|
37
|
+
|
38
|
+
it "should return original when no hits are provided" do
|
39
|
+
expect(@highlighter.hit_highlight(@original)).to be == @original
|
40
|
+
end
|
41
|
+
|
42
|
+
it "should highlight one hit" do
|
43
|
+
expect(@highlighter.hit_highlight(@original, hits = [[5, 9]])).to be == "Hey! <em>this</em> is a test tweet"
|
44
|
+
end
|
45
|
+
|
46
|
+
it "should highlight two hits" do
|
47
|
+
expect(@highlighter.hit_highlight(@original, hits = [[5, 9], [15, 19]])).to be == "Hey! <em>this</em> is a <em>test</em> tweet"
|
48
|
+
end
|
49
|
+
|
50
|
+
it "should correctly highlight first-word hits" do
|
51
|
+
expect(@highlighter.hit_highlight(@original, hits = [[0, 3]])).to be == "<em>Hey</em>! this is a test tweet"
|
52
|
+
end
|
53
|
+
|
54
|
+
it "should correctly highlight last-word hits" do
|
55
|
+
expect(@highlighter.hit_highlight(@original, hits = [[20, 25]])).to be == "Hey! this is a test <em>tweet</em>"
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
context "with links" do
|
60
|
+
it "should highlight with a single link" do
|
61
|
+
expect(@highlighter.hit_highlight("@<a>bcherry</a> this was a test tweet", [[9, 13]])).to be == "@<a>bcherry</a> <em>this</em> was a test tweet"
|
62
|
+
end
|
63
|
+
|
64
|
+
it "should highlight with link at the end" do
|
65
|
+
expect(@highlighter.hit_highlight("test test <a>test</a>", [[5, 9]])).to be == "test <em>test</em> <a>test</a>"
|
66
|
+
end
|
67
|
+
|
68
|
+
it "should highlight with a link at the beginning" do
|
69
|
+
expect(@highlighter.hit_highlight("<a>test</a> test test", [[5, 9]])).to be == "<a>test</a> <em>test</em> test"
|
70
|
+
end
|
71
|
+
|
72
|
+
it "should highlight an entire link" do
|
73
|
+
expect(@highlighter.hit_highlight("test <a>test</a> test", [[5, 9]])).to be == "test <a><em>test</em></a> test"
|
74
|
+
end
|
75
|
+
|
76
|
+
it "should highlight within a link" do
|
77
|
+
expect(@highlighter.hit_highlight("test <a>test</a> test", [[6, 8]])).to be == "test <a>t<em>es</em>t</a> test"
|
78
|
+
end
|
79
|
+
|
80
|
+
it "should highlight around a link" do
|
81
|
+
expect(@highlighter.hit_highlight("test <a>test</a> test", [[3, 11]])).to be == "tes<em>t <a>test</a> t</em>est"
|
82
|
+
end
|
83
|
+
|
84
|
+
it "should fail gracefully with bad hits" do
|
85
|
+
expect(@highlighter.hit_highlight("test test", [[5, 20]])).to be == "test <em>test</em>"
|
86
|
+
end
|
87
|
+
|
88
|
+
it "should not mess up with touching tags" do
|
89
|
+
expect(@highlighter.hit_highlight("<a>foo</a><a>foo</a>", [[3,6]])).to be == "<a>foo</a><a><em>foo</em></a>"
|
90
|
+
end
|
91
|
+
|
92
|
+
end
|
93
|
+
|
94
|
+
end
|
95
|
+
|
96
|
+
end
|
data/spec/regex_spec.rb
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
# Copyright 2018 Twitter, Inc.
|
2
|
+
# Licensed under the Apache License, Version 2.0
|
3
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
4
|
+
|
5
|
+
# encoding: utf-8
|
6
|
+
require File.dirname(__FILE__) + '/spec_helper'
|
7
|
+
|
8
|
+
describe "Twitter::TwitterText::Regex regular expressions" do
|
9
|
+
describe "matching URLS" do
|
10
|
+
TestUrls::VALID.each do |url|
|
11
|
+
it "should match the URL #{url}" do
|
12
|
+
expect(url).to match_autolink_expression
|
13
|
+
end
|
14
|
+
|
15
|
+
it "should match the URL #{url} when it's embedded in other text" do
|
16
|
+
text = "Sweet url: #{url} I found. #awesome"
|
17
|
+
expect(url).to match_autolink_expression_in(text)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
describe "invalid URLS" do
|
23
|
+
it "does not link urls with invalid characters" do
|
24
|
+
TestUrls::INVALID.each {|url| expect(url).to_not match_autolink_expression}
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
describe "matching List names" do
|
29
|
+
it "should match if less than 25 characters" do
|
30
|
+
name = "Shuffleboard Community"
|
31
|
+
expect(name.length).to be < 25
|
32
|
+
expect(name).to match(Twitter::TwitterText::Regex::REGEXEN[:list_name])
|
33
|
+
end
|
34
|
+
|
35
|
+
it "should not match if greater than 25 characters" do
|
36
|
+
name = "Most Glorious Shady Meadows Shuffleboard Community"
|
37
|
+
expect(name.length).to be > 25
|
38
|
+
expect(name).to match(Twitter::TwitterText::Regex[:list_name])
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
|
43
|
+
describe "matching Unicode 10.0 emoji" do
|
44
|
+
it "should match new emoji" do
|
45
|
+
input = "Unicode 10.0; grinning face with one large and one small eye: 🤪; woman with headscarf: 🧕; (fitzpatrick) woman with headscarf + medium-dark skin tone: 🧕🏾; flag (England): 🏴"
|
46
|
+
expected = ["🤪", "🧕", "🧕🏾", "🏴"]
|
47
|
+
entities = Twitter::TwitterText::Extractor.extract_emoji_with_indices(input)
|
48
|
+
entities.each_with_index do |entity, i|
|
49
|
+
expect(entity[:emoji]).to be_kind_of(String)
|
50
|
+
expect(entity[:indices]).to be_kind_of(Array)
|
51
|
+
entity[:indices].each do |position|
|
52
|
+
expect(position).to be_kind_of(Integer)
|
53
|
+
end
|
54
|
+
expect(entity[:emoji]).to be == expected[i]
|
55
|
+
expect(Twitter::TwitterText::Extractor.is_valid_emoji(entity[:emoji])).to be true
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
describe "matching Unicode 9.0 emoji" do
|
61
|
+
it "should match new emoji" do
|
62
|
+
input = "Unicode 9.0; face with cowboy hat: 🤠; woman dancing: 💃, woman dancing + medium-dark skin tone: 💃🏾"
|
63
|
+
expected = ["🤠", "💃", "💃🏾"]
|
64
|
+
entities = Twitter::TwitterText::Extractor.extract_emoji_with_indices(input)
|
65
|
+
entities.each_with_index do |entity, i|
|
66
|
+
expect(entity[:emoji]).to be_kind_of(String)
|
67
|
+
expect(entity[:indices]).to be_kind_of(Array)
|
68
|
+
entity[:indices].each do |position|
|
69
|
+
expect(position).to be_kind_of(Integer)
|
70
|
+
end
|
71
|
+
expect(entity[:emoji]).to be == expected[i]
|
72
|
+
expect(Twitter::TwitterText::Extractor.is_valid_emoji(entity[:emoji])).to be true
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
@@ -0,0 +1,553 @@
|
|
1
|
+
# Copyright 2018 Twitter, Inc.
|
2
|
+
# Licensed under the Apache License, Version 2.0
|
3
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
4
|
+
|
5
|
+
# encoding: utf-8
|
6
|
+
require File.dirname(__FILE__) + '/spec_helper'
|
7
|
+
|
8
|
+
describe Twitter::TwitterText::Rewriter do
|
9
|
+
def original_text; end
|
10
|
+
def url; end
|
11
|
+
|
12
|
+
def block(*args)
|
13
|
+
if Array === @block_args
|
14
|
+
unless Array === @block_args.first
|
15
|
+
@block_args = [@block_args]
|
16
|
+
end
|
17
|
+
@block_args << args
|
18
|
+
else
|
19
|
+
@block_args = args
|
20
|
+
end
|
21
|
+
"[rewritten]"
|
22
|
+
end
|
23
|
+
|
24
|
+
describe "rewrite usernames" do #{{{
|
25
|
+
before do
|
26
|
+
@rewritten_text = Twitter::TwitterText::Rewriter.rewrite_usernames_or_lists(original_text, &method(:block))
|
27
|
+
end
|
28
|
+
|
29
|
+
context "username preceded by a space" do
|
30
|
+
def original_text; "hello @jacob"; end
|
31
|
+
|
32
|
+
it "should be rewritten" do
|
33
|
+
expect(@block_args).to be == ["@", "jacob", nil]
|
34
|
+
expect(@rewritten_text).to be == "hello [rewritten]"
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
context "username at beginning of line" do
|
39
|
+
def original_text; "@jacob you're cool"; end
|
40
|
+
|
41
|
+
it "should be rewritten" do
|
42
|
+
expect(@block_args).to be == ["@", "jacob", nil]
|
43
|
+
expect(@rewritten_text).to be == "[rewritten] you're cool"
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
context "username preceded by word character" do
|
48
|
+
def original_text; "meet@the beach"; end
|
49
|
+
|
50
|
+
it "should not be rewritten" do
|
51
|
+
expect(@block_args).to be nil
|
52
|
+
expect(@rewritten_text).to be == "meet@the beach"
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
context "username preceded by non-word character" do
|
57
|
+
def original_text; "great.@jacob"; end
|
58
|
+
|
59
|
+
it "should be rewritten" do
|
60
|
+
expect(@block_args).to be == ["@", "jacob", nil]
|
61
|
+
expect(@rewritten_text).to be == "great.[rewritten]"
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
context "username containing non-word characters" do
|
66
|
+
def original_text; "@jacob&^$%^"; end
|
67
|
+
|
68
|
+
it "should be rewritten" do
|
69
|
+
expect(@block_args).to be == ["@", "jacob", nil]
|
70
|
+
expect(@rewritten_text).to be == "[rewritten]&^$%^"
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
context "username over twenty characters" do
|
75
|
+
def original_text
|
76
|
+
@twenty_character_username = "zach" * 5
|
77
|
+
"@" + @twenty_character_username + "1"
|
78
|
+
end
|
79
|
+
|
80
|
+
it "should be rewritten" do
|
81
|
+
expect(@block_args).to be == ["@", @twenty_character_username, nil]
|
82
|
+
expect(@rewritten_text).to be == "[rewritten]1"
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
context "username followed by japanese" do
|
87
|
+
def original_text; "@jacobの"; end
|
88
|
+
|
89
|
+
it "should be rewritten" do
|
90
|
+
expect(@block_args).to be == ["@", "jacob", nil]
|
91
|
+
expect(@rewritten_text).to be == "[rewritten]の"
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
context "username preceded by japanese" do
|
96
|
+
def original_text; "あ@jacob"; end
|
97
|
+
|
98
|
+
it "should be rewritten" do
|
99
|
+
expect(@block_args).to be == ["@", "jacob", nil]
|
100
|
+
expect(@rewritten_text).to be == "あ[rewritten]"
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
context "username surrounded by japanese" do
|
105
|
+
def original_text; "あ@jacobの"; end
|
106
|
+
|
107
|
+
it "should be rewritten" do
|
108
|
+
expect(@block_args).to be == ["@", "jacob", nil]
|
109
|
+
expect(@rewritten_text).to be == "あ[rewritten]の"
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
context "username using full-width at-sign" do
|
114
|
+
def original_text
|
115
|
+
"#{[0xFF20].pack('U')}jacob"
|
116
|
+
end
|
117
|
+
|
118
|
+
it "should be rewritten" do
|
119
|
+
expect(@block_args).to be == ["@", "jacob", nil]
|
120
|
+
expect(@rewritten_text).to be == "[rewritten]"
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end #}}}
|
124
|
+
|
125
|
+
describe "rewrite lists" do #{{{
|
126
|
+
before do
|
127
|
+
@rewritten_text = Twitter::TwitterText::Rewriter.rewrite_usernames_or_lists(original_text, &method(:block))
|
128
|
+
end
|
129
|
+
|
130
|
+
context "slug preceded by a space" do
|
131
|
+
def original_text; "hello @jacob/my-list"; end
|
132
|
+
|
133
|
+
it "should be rewritten" do
|
134
|
+
expect(@block_args).to be == ["@", "jacob", "/my-list"]
|
135
|
+
expect(@rewritten_text).to be == "hello [rewritten]"
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
context "username followed by a slash but no list" do
|
140
|
+
def original_text; "hello @jacob/ my-list"; end
|
141
|
+
|
142
|
+
it "should not be rewritten" do
|
143
|
+
expect(@block_args).to be == ["@", "jacob", nil]
|
144
|
+
expect(@rewritten_text).to be == "hello [rewritten]/ my-list"
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
context "empty username followed by a list" do
|
149
|
+
def original_text; "hello @/my-list"; end
|
150
|
+
|
151
|
+
it "should not be rewritten" do
|
152
|
+
expect(@block_args).to be nil
|
153
|
+
expect(@rewritten_text).to be == "hello @/my-list"
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
context "list slug at beginning of line" do
|
158
|
+
def original_text; "@jacob/my-list"; end
|
159
|
+
|
160
|
+
it "should be rewritten" do
|
161
|
+
expect(@block_args).to be == ["@", "jacob", "/my-list"]
|
162
|
+
expect(@rewritten_text).to be == "[rewritten]"
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
context "username preceded by alpha-numeric character" do
|
167
|
+
def original_text; "meet@jacob/my-list"; end
|
168
|
+
|
169
|
+
it "should not be rewritten" do
|
170
|
+
expect(@block_args).to be nil
|
171
|
+
expect(@rewritten_text).to be == "meet@jacob/my-list"
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
context "username preceded by non-word character" do
|
176
|
+
def original_text; "great.@jacob/my-list"; end
|
177
|
+
|
178
|
+
it "should be rewritten" do
|
179
|
+
expect(@block_args).to be == ["@", "jacob", "/my-list"]
|
180
|
+
expect(@rewritten_text).to be == "great.[rewritten]"
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
context "username containing non-word characters" do
|
185
|
+
def original_text; "@jacob/my-list&^$%^"; end
|
186
|
+
|
187
|
+
it "should be rewritten" do
|
188
|
+
expect(@block_args).to be == ["@", "jacob", "/my-list"]
|
189
|
+
expect(@rewritten_text).to be == "[rewritten]&^$%^"
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
context "username over twenty characters" do
|
194
|
+
def original_text
|
195
|
+
@twentyfive_character_list = "a" * 25
|
196
|
+
"@jacob/#{@twentyfive_character_list}12345"
|
197
|
+
end
|
198
|
+
|
199
|
+
it "should be rewritten" do
|
200
|
+
expect(@block_args).to be == ["@", "jacob", "/#{@twentyfive_character_list}"]
|
201
|
+
expect(@rewritten_text).to be == "[rewritten]12345"
|
202
|
+
end
|
203
|
+
end
|
204
|
+
end #}}}
|
205
|
+
|
206
|
+
describe "rewrite hashtags" do #{{{
|
207
|
+
before do
|
208
|
+
@rewritten_text = Twitter::TwitterText::Rewriter.rewrite_hashtags(original_text, &method(:block))
|
209
|
+
end
|
210
|
+
|
211
|
+
context "with an all numeric hashtag" do
|
212
|
+
def original_text; "#123"; end
|
213
|
+
|
214
|
+
it "should not be rewritten" do
|
215
|
+
expect(@block_args).to be nil
|
216
|
+
expect(@rewritten_text).to be == "#123"
|
217
|
+
end
|
218
|
+
end
|
219
|
+
|
220
|
+
context "with a hashtag with alphanumeric characters" do
|
221
|
+
def original_text; "#ab1d"; end
|
222
|
+
|
223
|
+
it "should be rewritten" do
|
224
|
+
expect(@block_args).to be == ["#", "ab1d"]
|
225
|
+
expect(@rewritten_text).to be == "[rewritten]"
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
229
|
+
context "with a hashtag with underscores" do
|
230
|
+
def original_text; "#a_b_c_d"; end
|
231
|
+
|
232
|
+
it "should be rewritten" do
|
233
|
+
expect(@block_args).to be == ["#", "a_b_c_d"]
|
234
|
+
expect(@rewritten_text).to be == "[rewritten]"
|
235
|
+
end
|
236
|
+
end
|
237
|
+
|
238
|
+
context "with a hashtag that is preceded by a word character" do
|
239
|
+
def original_text; "ab#cd"; end
|
240
|
+
|
241
|
+
it "should not be rewritten" do
|
242
|
+
expect(@block_args).to be nil
|
243
|
+
expect(@rewritten_text).to be == "ab#cd"
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
context "with a hashtag that starts with a number but has word characters" do
|
248
|
+
def original_text; "#2ab"; end
|
249
|
+
|
250
|
+
it "should be rewritten" do
|
251
|
+
expect(@block_args).to be == ["#", "2ab"]
|
252
|
+
expect(@rewritten_text).to be == "[rewritten]"
|
253
|
+
end
|
254
|
+
end
|
255
|
+
|
256
|
+
context "with multiple valid hashtags" do
|
257
|
+
def original_text; "I'm frickin' awesome #ab #cd #ef"; end
|
258
|
+
|
259
|
+
it "rewrites each hashtag" do
|
260
|
+
expect(@block_args).to be == [["#", "ab"], ["#", "cd"], ["#", "ef"]]
|
261
|
+
expect(@rewritten_text).to be == "I'm frickin' awesome [rewritten] [rewritten] [rewritten]"
|
262
|
+
end
|
263
|
+
end
|
264
|
+
|
265
|
+
context "with a hashtag preceded by a ." do
|
266
|
+
def original_text; "ok, great.#abc"; end
|
267
|
+
|
268
|
+
it "should be rewritten" do
|
269
|
+
expect(@block_args).to be == ["#", "abc"]
|
270
|
+
expect(@rewritten_text).to be == "ok, great.[rewritten]"
|
271
|
+
end
|
272
|
+
end
|
273
|
+
|
274
|
+
context "with a hashtag preceded by a &" do
|
275
|
+
def original_text; "&#nbsp;"; end
|
276
|
+
|
277
|
+
it "should not be rewritten" do
|
278
|
+
expect(@block_args).to be nil
|
279
|
+
expect(@rewritten_text).to be == "&#nbsp;"
|
280
|
+
end
|
281
|
+
end
|
282
|
+
|
283
|
+
context "with a hashtag that ends in an !" do
|
284
|
+
def original_text; "#great!"; end
|
285
|
+
|
286
|
+
it "should be rewritten, but should not include the !" do
|
287
|
+
expect(@block_args).to be == ["#", "great"];
|
288
|
+
expect(@rewritten_text).to be == "[rewritten]!"
|
289
|
+
end
|
290
|
+
end
|
291
|
+
|
292
|
+
context "with a hashtag followed by Japanese" do
|
293
|
+
def original_text; "#twj_devの"; end
|
294
|
+
|
295
|
+
it "should be rewritten" do
|
296
|
+
expect(@block_args).to be == ["#", "twj_devの"];
|
297
|
+
expect(@rewritten_text).to be == "[rewritten]"
|
298
|
+
end
|
299
|
+
end
|
300
|
+
|
301
|
+
context "with a hashtag preceded by a full-width space" do
|
302
|
+
def original_text; "#{[0x3000].pack('U')}#twj_dev"; end
|
303
|
+
|
304
|
+
it "should be rewritten" do
|
305
|
+
expect(@block_args).to be == ["#", "twj_dev"];
|
306
|
+
expect(@rewritten_text).to be == " [rewritten]"
|
307
|
+
end
|
308
|
+
end
|
309
|
+
|
310
|
+
context "with a hashtag followed by a full-width space" do
|
311
|
+
def original_text; "#twj_dev#{[0x3000].pack('U')}"; end
|
312
|
+
|
313
|
+
it "should be rewritten" do
|
314
|
+
expect(@block_args).to be == ["#", "twj_dev"];
|
315
|
+
expect(@rewritten_text).to be == "[rewritten] "
|
316
|
+
end
|
317
|
+
end
|
318
|
+
|
319
|
+
context "with a hashtag using full-width hash" do
|
320
|
+
def original_text; "#{[0xFF03].pack('U')}twj_dev"; end
|
321
|
+
|
322
|
+
it "should be rewritten" do
|
323
|
+
expect(@block_args).to be == ["#", "twj_dev"];
|
324
|
+
expect(@rewritten_text).to be == "[rewritten]"
|
325
|
+
end
|
326
|
+
end
|
327
|
+
|
328
|
+
context "with a hashtag containing an accented latin character" do
|
329
|
+
def original_text
|
330
|
+
# the hashtag is #éhashtag
|
331
|
+
"##{[0x00e9].pack('U')}hashtag"
|
332
|
+
end
|
333
|
+
|
334
|
+
it "should be rewritten" do
|
335
|
+
expect(@block_args).to be == ["#", "éhashtag"];
|
336
|
+
expect(@rewritten_text).to be == "[rewritten]"
|
337
|
+
end
|
338
|
+
end
|
339
|
+
end #}}}
|
340
|
+
|
341
|
+
describe "rewrite urls" do #{{{
|
342
|
+
def url; "http://www.google.com"; end
|
343
|
+
|
344
|
+
before do
|
345
|
+
@rewritten_text = Twitter::TwitterText::Rewriter.rewrite_urls(original_text, &method(:block))
|
346
|
+
end
|
347
|
+
|
348
|
+
context "when embedded in plain text" do
|
349
|
+
def original_text; "On my search engine #{url} I found good links."; end
|
350
|
+
|
351
|
+
it "should be rewritten" do
|
352
|
+
expect(@block_args).to be == [url];
|
353
|
+
expect(@rewritten_text).to be == "On my search engine [rewritten] I found good links."
|
354
|
+
end
|
355
|
+
end
|
356
|
+
|
357
|
+
context "when surrounded by Japanese;" do
|
358
|
+
def original_text; "いまなにしてる#{url}いまなにしてる"; end
|
359
|
+
|
360
|
+
it "should be rewritten" do
|
361
|
+
expect(@block_args).to be == [url];
|
362
|
+
expect(@rewritten_text).to be == "いまなにしてる[rewritten]いまなにしてる"
|
363
|
+
end
|
364
|
+
end
|
365
|
+
|
366
|
+
context "with a path surrounded by parentheses;" do
|
367
|
+
def original_text; "I found a neatness (#{url})"; end
|
368
|
+
|
369
|
+
it "should be rewritten" do
|
370
|
+
expect(@block_args).to be == [url];
|
371
|
+
expect(@rewritten_text).to be == "I found a neatness ([rewritten])"
|
372
|
+
end
|
373
|
+
|
374
|
+
context "when the URL ends with a slash;" do
|
375
|
+
def url; "http://www.google.com/"; end
|
376
|
+
|
377
|
+
it "should be rewritten" do
|
378
|
+
expect(@block_args).to be == [url];
|
379
|
+
expect(@rewritten_text).to be == "I found a neatness ([rewritten])"
|
380
|
+
end
|
381
|
+
end
|
382
|
+
|
383
|
+
context "when the URL has a path;" do
|
384
|
+
def url; "http://www.google.com/fsdfasdf"; end
|
385
|
+
|
386
|
+
it "should be rewritten" do
|
387
|
+
expect(@block_args).to be == [url];
|
388
|
+
expect(@rewritten_text).to be == "I found a neatness ([rewritten])"
|
389
|
+
end
|
390
|
+
end
|
391
|
+
end
|
392
|
+
|
393
|
+
context "when path contains parens" do
|
394
|
+
def original_text; "I found a neatness (#{url})"; end
|
395
|
+
|
396
|
+
it "should be rewritten" do
|
397
|
+
expect(@block_args).to be == [url];
|
398
|
+
expect(@rewritten_text).to be == "I found a neatness ([rewritten])"
|
399
|
+
end
|
400
|
+
|
401
|
+
context "wikipedia" do
|
402
|
+
def url; "http://en.wikipedia.org/wiki/Madonna_(artist)"; end
|
403
|
+
|
404
|
+
it "should be rewritten" do
|
405
|
+
expect(@block_args).to be == [url];
|
406
|
+
expect(@rewritten_text).to be == "I found a neatness ([rewritten])"
|
407
|
+
end
|
408
|
+
end
|
409
|
+
|
410
|
+
context "IIS session" do
|
411
|
+
def url; "http://msdn.com/S(deadbeef)/page.htm"; end
|
412
|
+
|
413
|
+
it "should be rewritten" do
|
414
|
+
expect(@block_args).to be == [url];
|
415
|
+
expect(@rewritten_text).to be == "I found a neatness ([rewritten])"
|
416
|
+
end
|
417
|
+
end
|
418
|
+
|
419
|
+
context "unbalanced parens" do
|
420
|
+
def url; "http://example.com/i_has_a_("; end
|
421
|
+
|
422
|
+
it "should be rewritten" do
|
423
|
+
expect(@block_args).to be == ["http://example.com/i_has_a_"];
|
424
|
+
expect(@rewritten_text).to be == "I found a neatness ([rewritten]()"
|
425
|
+
end
|
426
|
+
end
|
427
|
+
|
428
|
+
context "balanced parens with a double quote inside" do
|
429
|
+
def url; "http://foo.bar.com/foo_(\")_bar" end
|
430
|
+
|
431
|
+
it "should be rewritten" do
|
432
|
+
expect(@block_args).to be == ["http://foo.bar.com/foo_"];
|
433
|
+
expect(@rewritten_text).to be == "I found a neatness ([rewritten](\")_bar)"
|
434
|
+
end
|
435
|
+
end
|
436
|
+
|
437
|
+
context "balanced parens hiding XSS" do
|
438
|
+
def url; 'http://x.xx.com/("style="color:red"onmouseover="alert(1)' end
|
439
|
+
|
440
|
+
it "should be rewritten" do
|
441
|
+
expect(@block_args).to be == ["http://x.xx.com/"];
|
442
|
+
expect(@rewritten_text).to be == 'I found a neatness ([rewritten]("style="color:red"onmouseover="alert(1))'
|
443
|
+
end
|
444
|
+
end
|
445
|
+
end
|
446
|
+
|
447
|
+
context "when preceded by a :" do
|
448
|
+
def original_text; "Check this out @hoverbird:#{url}"; end
|
449
|
+
|
450
|
+
it "should be rewritten" do
|
451
|
+
expect(@block_args).to be == [url];
|
452
|
+
expect(@rewritten_text).to be == "Check this out @hoverbird:[rewritten]"
|
453
|
+
end
|
454
|
+
end
|
455
|
+
|
456
|
+
context "with a URL ending in allowed punctuation" do
|
457
|
+
it "does not consume ending punctuation" do
|
458
|
+
%w| ? ! , . : ; ] ) } = \ ' |.each do |char|
|
459
|
+
expect(Twitter::TwitterText::Rewriter.rewrite_urls("#{url}#{char}") do |url|
|
460
|
+
expect(url).to be == url
|
461
|
+
"[rewritten]"
|
462
|
+
end).to be == "[rewritten]#{char}"
|
463
|
+
end
|
464
|
+
end
|
465
|
+
end
|
466
|
+
|
467
|
+
context "with a URL preceded in forbidden characters" do
|
468
|
+
it "should be rewritten" do
|
469
|
+
%w| \ ' / ! = |.each do |char|
|
470
|
+
expect(Twitter::TwitterText::Rewriter.rewrite_urls("#{char}#{url}") do |url|
|
471
|
+
"[rewritten]" # should not be called here.
|
472
|
+
end).to be == "#{char}[rewritten]"
|
473
|
+
end
|
474
|
+
end
|
475
|
+
end
|
476
|
+
|
477
|
+
context "when embedded in a link tag" do
|
478
|
+
def original_text; "<link rel='true'>#{url}</link>"; end
|
479
|
+
|
480
|
+
it "should be rewritten" do
|
481
|
+
expect(@block_args).to be == [url];
|
482
|
+
expect(@rewritten_text).to be == "<link rel='true'>[rewritten]</link>"
|
483
|
+
end
|
484
|
+
end
|
485
|
+
|
486
|
+
context "with multiple URLs" do
|
487
|
+
def original_text; "http://www.links.org link at start of page, link at end http://www.foo.org"; end
|
488
|
+
|
489
|
+
it "should autolink each one" do
|
490
|
+
expect(@block_args).to be == [["http://www.links.org"], ["http://www.foo.org"]];
|
491
|
+
expect(@rewritten_text).to be == "[rewritten] link at start of page, link at end [rewritten]"
|
492
|
+
end
|
493
|
+
end
|
494
|
+
|
495
|
+
context "with multiple URLs in different formats" do
|
496
|
+
def original_text; "http://foo.com https://bar.com http://mail.foobar.org"; end
|
497
|
+
|
498
|
+
it "should autolink each one, in the proper order" do
|
499
|
+
expect(@block_args).to be == [["http://foo.com"], ["https://bar.com"], ["http://mail.foobar.org"]];
|
500
|
+
expect(@rewritten_text).to be == "[rewritten] [rewritten] [rewritten]"
|
501
|
+
end
|
502
|
+
end
|
503
|
+
|
504
|
+
context "with a URL having a long TLD" do
|
505
|
+
def original_text; "Yahoo integriert Facebook http://golem.mobi/0912/71607.html"; end
|
506
|
+
|
507
|
+
it "should autolink it" do
|
508
|
+
expect(@block_args).to be == ["http://golem.mobi/0912/71607.html"]
|
509
|
+
expect(@rewritten_text).to be == "Yahoo integriert Facebook [rewritten]"
|
510
|
+
end
|
511
|
+
end
|
512
|
+
|
513
|
+
context "with a url lacking the protocol" do
|
514
|
+
def original_text; "I like www.foobar.com dudes"; end
|
515
|
+
|
516
|
+
it "does not link at all" do
|
517
|
+
expect(@block_args).to be nil
|
518
|
+
expect(@rewritten_text).to be == "I like www.foobar.com dudes"
|
519
|
+
end
|
520
|
+
end
|
521
|
+
|
522
|
+
context "with a @ in a URL" do
|
523
|
+
context "with XSS attack" do
|
524
|
+
def original_text; 'http://x.xx.com/@"style="color:pink"onmouseover=alert(1)//'; end
|
525
|
+
|
526
|
+
it "should not allow XSS follwing @" do
|
527
|
+
expect(@block_args).to be == ["http://x.xx.com/"]
|
528
|
+
expect(@rewritten_text).to be == '[rewritten]@"style="color:pink"onmouseover=alert(1)//'
|
529
|
+
end
|
530
|
+
end
|
531
|
+
|
532
|
+
context "with a username not followed by a /" do
|
533
|
+
def original_text; "http://example.com/@foobar"; end
|
534
|
+
|
535
|
+
it "should link url" do
|
536
|
+
expect(@block_args).to be == ["http://example.com/@foobar"]
|
537
|
+
expect(@rewritten_text).to be == "[rewritten]"
|
538
|
+
end
|
539
|
+
end
|
540
|
+
|
541
|
+
context "with a username followed by a /" do
|
542
|
+
def original_text; "http://example.com/@foobar/"; end
|
543
|
+
|
544
|
+
it "should not link the username but link full url" do
|
545
|
+
expect(@block_args).to be == ["http://example.com/@foobar/"]
|
546
|
+
expect(@rewritten_text).to be == "[rewritten]"
|
547
|
+
end
|
548
|
+
end
|
549
|
+
end
|
550
|
+
end #}}}
|
551
|
+
end
|
552
|
+
|
553
|
+
# vim: foldmethod=marker
|