twitter-text 1.14.7 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.rspec +1 -1
- data/README.md +104 -33
- data/lib/assets/tld_lib.yml +1 -0
- data/lib/twitter-text.rb +2 -0
- data/lib/twitter-text/autolink.rb +4 -4
- data/lib/twitter-text/configuration.rb +53 -0
- data/lib/twitter-text/deprecation.rb +1 -1
- data/lib/twitter-text/extractor.rb +31 -1
- data/lib/twitter-text/regex.rb +13 -13
- data/lib/twitter-text/validation.rb +155 -43
- data/lib/twitter-text/weighted_range.rb +18 -0
- data/spec/autolinking_spec.rb +161 -161
- data/spec/configuration_spec.rb +91 -0
- data/spec/extractor_spec.rb +92 -72
- data/spec/hithighlighter_spec.rb +15 -15
- data/spec/regex_spec.rb +7 -7
- data/spec/rewriter_spec.rb +110 -109
- data/spec/spec_helper.rb +13 -15
- data/spec/test_urls.rb +6 -4
- data/spec/twitter_text_spec.rb +2 -2
- data/spec/unicode_spec.rb +10 -10
- data/spec/validation_spec.rb +35 -11
- data/test/conformance_test.rb +14 -0
- data/twitter-text.gemspec +11 -9
- metadata +53 -32
- data/lib/assets/tld_lib.yml +0 -1565
@@ -0,0 +1,91 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require File.dirname(__FILE__) + '/spec_helper'
|
3
|
+
|
4
|
+
describe Twitter::Configuration do
|
5
|
+
context "configuration" do
|
6
|
+
context "with invalid data" do
|
7
|
+
it "should raise an exception" do
|
8
|
+
invalid_hash = Twitter::Configuration.parse_string("{\"version\":2,\"maxWeightedTweetLength\":280,\"scale\":100,\"defaultWeight\":200,\"transformedURLLength\":23,\"ranges\":[{\"start\":0,\"end\":true,\"weight\":false},{\"start\":8192,\"end\":8205,\"weight\":100},{\"start\":8208,\"end\":8223,\"weight\":100},{\"start\":8242,\"end\":8247,\"weight\":100}]}")
|
9
|
+
expect { Twitter::Configuration.new(invalid_hash) }.to raise_error(ArgumentError)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
context "with defaults" do
|
14
|
+
before do
|
15
|
+
Twitter::Configuration.default_configuration = Twitter::Configuration.configuration_from_file(Twitter::Configuration::CONFIG_V2)
|
16
|
+
end
|
17
|
+
|
18
|
+
it "should define version constants" do
|
19
|
+
expect(Twitter::Configuration.const_defined?(:CONFIG_V1)).to be true
|
20
|
+
expect(Twitter::Configuration.const_defined?(:CONFIG_V2)).to be true
|
21
|
+
end
|
22
|
+
|
23
|
+
it "should define a default configuration" do
|
24
|
+
expect(Twitter::Configuration.default_configuration).to_not be_nil
|
25
|
+
expect(Twitter::Configuration.default_configuration.version).to eq(2)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
context "with v1 configuration" do
|
30
|
+
before do
|
31
|
+
@config = Twitter::Configuration.configuration_from_file(Twitter::Configuration::CONFIG_V1)
|
32
|
+
end
|
33
|
+
|
34
|
+
it "should have a version" do
|
35
|
+
expect(@config.version).to eq(1)
|
36
|
+
end
|
37
|
+
|
38
|
+
it "should have a max_weighted_tweet_length" do
|
39
|
+
expect(@config.max_weighted_tweet_length).to eq(140)
|
40
|
+
end
|
41
|
+
|
42
|
+
it "should have a scale" do
|
43
|
+
expect(@config.scale).to eq(1)
|
44
|
+
end
|
45
|
+
|
46
|
+
it "should have a default_weight" do
|
47
|
+
expect(@config.default_weight).to eq(1)
|
48
|
+
end
|
49
|
+
|
50
|
+
it "should have a transformed_url_length" do
|
51
|
+
expect(@config.transformed_url_length).to eq(23)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
context "with v2 configuration" do
|
56
|
+
before do
|
57
|
+
@config = Twitter::Configuration.configuration_from_file(Twitter::Configuration::CONFIG_V2)
|
58
|
+
end
|
59
|
+
|
60
|
+
it "should have a version" do
|
61
|
+
expect(@config.version).to eq(2)
|
62
|
+
end
|
63
|
+
|
64
|
+
it "should have a max_weighted_tweet_length" do
|
65
|
+
expect(@config.max_weighted_tweet_length).to eq(280)
|
66
|
+
end
|
67
|
+
|
68
|
+
it "should have a scale" do
|
69
|
+
expect(@config.scale).to eq(100)
|
70
|
+
end
|
71
|
+
|
72
|
+
it "should have a default_weight" do
|
73
|
+
expect(@config.default_weight).to eq(200)
|
74
|
+
end
|
75
|
+
|
76
|
+
it "should have a transformed_url_length" do
|
77
|
+
expect(@config.transformed_url_length).to eq(23)
|
78
|
+
end
|
79
|
+
|
80
|
+
it "should have a configured range" do
|
81
|
+
expect(@config.ranges).to be_kind_of(Array)
|
82
|
+
expect(@config.ranges.count).to be > 0
|
83
|
+
expect(@config.ranges[0]).to be_kind_of(Twitter::WeightedRange)
|
84
|
+
weighted_range = @config.ranges[0]
|
85
|
+
expect(weighted_range.start).to be_kind_of(Integer)
|
86
|
+
expect(weighted_range.end).to be_kind_of(Integer)
|
87
|
+
expect(weighted_range.weight).to be_kind_of(Integer)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
data/spec/extractor_spec.rb
CHANGED
@@ -13,37 +13,37 @@ describe Twitter::Extractor do
|
|
13
13
|
describe "mentions" do
|
14
14
|
context "single screen name alone " do
|
15
15
|
it "should be linked" do
|
16
|
-
@extractor.extract_mentioned_screen_names("@alice").
|
16
|
+
expect(@extractor.extract_mentioned_screen_names("@alice")).to be == ["alice"]
|
17
17
|
end
|
18
18
|
|
19
19
|
it "should be linked with _" do
|
20
|
-
@extractor.extract_mentioned_screen_names("@alice_adams").
|
20
|
+
expect(@extractor.extract_mentioned_screen_names("@alice_adams")).to be == ["alice_adams"]
|
21
21
|
end
|
22
22
|
|
23
23
|
it "should be linked if numeric" do
|
24
|
-
@extractor.extract_mentioned_screen_names("@1234").
|
24
|
+
expect(@extractor.extract_mentioned_screen_names("@1234")).to be == ["1234"]
|
25
25
|
end
|
26
26
|
end
|
27
27
|
|
28
28
|
context "multiple screen names" do
|
29
29
|
it "should both be linked" do
|
30
|
-
@extractor.extract_mentioned_screen_names("@alice @bob").
|
30
|
+
expect(@extractor.extract_mentioned_screen_names("@alice @bob")).to be == ["alice", "bob"]
|
31
31
|
end
|
32
32
|
end
|
33
33
|
|
34
34
|
context "screen names embedded in text" do
|
35
35
|
it "should be linked in Latin text" do
|
36
|
-
@extractor.extract_mentioned_screen_names("waiting for @alice to arrive").
|
36
|
+
expect(@extractor.extract_mentioned_screen_names("waiting for @alice to arrive")).to be == ["alice"]
|
37
37
|
end
|
38
38
|
|
39
39
|
it "should be linked in Japanese text" do
|
40
|
-
@extractor.extract_mentioned_screen_names("の@aliceに到着を待っている").
|
40
|
+
expect(@extractor.extract_mentioned_screen_names("の@aliceに到着を待っている")).to be == ["alice"]
|
41
41
|
end
|
42
42
|
|
43
43
|
it "should ignore mentions preceded by !, @, #, $, %, & or *" do
|
44
44
|
invalid_chars = ['!', '@', '#', '$', '%', '&', '*']
|
45
45
|
invalid_chars.each do |c|
|
46
|
-
@extractor.extract_mentioned_screen_names("f#{c}@kn").
|
46
|
+
expect(@extractor.extract_mentioned_screen_names("f#{c}@kn")).to be == []
|
47
47
|
end
|
48
48
|
end
|
49
49
|
end
|
@@ -51,36 +51,36 @@ describe Twitter::Extractor do
|
|
51
51
|
it "should accept a block arugment and call it in order" do
|
52
52
|
needed = ["alice", "bob"]
|
53
53
|
@extractor.extract_mentioned_screen_names("@alice @bob") do |sn|
|
54
|
-
sn.
|
54
|
+
expect(sn).to be == needed.shift
|
55
55
|
end
|
56
|
-
needed.
|
56
|
+
expect(needed).to be == []
|
57
57
|
end
|
58
58
|
end
|
59
59
|
|
60
60
|
describe "mentions with indices" do
|
61
61
|
context "single screen name alone " do
|
62
62
|
it "should be linked and the correct indices" do
|
63
|
-
@extractor.extract_mentioned_screen_names_with_indices("@alice").
|
63
|
+
expect(@extractor.extract_mentioned_screen_names_with_indices("@alice")).to be == [{:screen_name => "alice", :indices => [0, 6]}]
|
64
64
|
end
|
65
65
|
|
66
66
|
it "should be linked with _ and the correct indices" do
|
67
|
-
@extractor.extract_mentioned_screen_names_with_indices("@alice_adams").
|
67
|
+
expect(@extractor.extract_mentioned_screen_names_with_indices("@alice_adams")).to be == [{:screen_name => "alice_adams", :indices => [0, 12]}]
|
68
68
|
end
|
69
69
|
|
70
70
|
it "should be linked if numeric and the correct indices" do
|
71
|
-
@extractor.extract_mentioned_screen_names_with_indices("@1234").
|
71
|
+
expect(@extractor.extract_mentioned_screen_names_with_indices("@1234")).to be == [{:screen_name => "1234", :indices => [0, 5]}]
|
72
72
|
end
|
73
73
|
end
|
74
74
|
|
75
75
|
context "multiple screen names" do
|
76
76
|
it "should both be linked with the correct indices" do
|
77
|
-
@extractor.extract_mentioned_screen_names_with_indices("@alice @bob").
|
77
|
+
expect(@extractor.extract_mentioned_screen_names_with_indices("@alice @bob")).to be ==
|
78
78
|
[{:screen_name => "alice", :indices => [0, 6]},
|
79
79
|
{:screen_name => "bob", :indices => [7, 11]}]
|
80
80
|
end
|
81
81
|
|
82
82
|
it "should be linked with the correct indices even when repeated" do
|
83
|
-
@extractor.extract_mentioned_screen_names_with_indices("@alice @alice @bob").
|
83
|
+
expect(@extractor.extract_mentioned_screen_names_with_indices("@alice @alice @bob")).to be ==
|
84
84
|
[{:screen_name => "alice", :indices => [0, 6]},
|
85
85
|
{:screen_name => "alice", :indices => [7, 13]},
|
86
86
|
{:screen_name => "bob", :indices => [14, 18]}]
|
@@ -89,11 +89,11 @@ describe Twitter::Extractor do
|
|
89
89
|
|
90
90
|
context "screen names embedded in text" do
|
91
91
|
it "should be linked in Latin text with the correct indices" do
|
92
|
-
@extractor.extract_mentioned_screen_names_with_indices("waiting for @alice to arrive").
|
92
|
+
expect(@extractor.extract_mentioned_screen_names_with_indices("waiting for @alice to arrive")).to be == [{:screen_name => "alice", :indices => [12, 18]}]
|
93
93
|
end
|
94
94
|
|
95
95
|
it "should be linked in Japanese text with the correct indices" do
|
96
|
-
@extractor.extract_mentioned_screen_names_with_indices("の@aliceに到着を待っている").
|
96
|
+
expect(@extractor.extract_mentioned_screen_names_with_indices("の@aliceに到着を待っている")).to be == [{:screen_name => "alice", :indices => [1, 7]}]
|
97
97
|
end
|
98
98
|
end
|
99
99
|
|
@@ -101,45 +101,45 @@ describe Twitter::Extractor do
|
|
101
101
|
needed = [{:screen_name => "alice", :indices => [0, 6]}, {:screen_name => "bob", :indices => [7, 11]}]
|
102
102
|
@extractor.extract_mentioned_screen_names_with_indices("@alice @bob") do |sn, start_index, end_index|
|
103
103
|
data = needed.shift
|
104
|
-
sn.
|
105
|
-
start_index.
|
106
|
-
end_index.
|
104
|
+
expect(sn).to be == data[:screen_name]
|
105
|
+
expect(start_index).to be == data[:indices].first
|
106
|
+
expect(end_index).to be == data[:indices].last
|
107
107
|
end
|
108
|
-
needed.
|
108
|
+
expect(needed).to be == []
|
109
109
|
end
|
110
110
|
|
111
111
|
it "should extract screen name in text with supplementary character" do
|
112
|
-
@extractor.extract_mentioned_screen_names_with_indices("#{[0x10400].pack('U')} @alice").
|
112
|
+
expect(@extractor.extract_mentioned_screen_names_with_indices("#{[0x10400].pack('U')} @alice")).to be == [{:screen_name => "alice", :indices => [2, 8]}]
|
113
113
|
end
|
114
114
|
end
|
115
115
|
|
116
116
|
describe "replies" do
|
117
117
|
context "should be extracted from" do
|
118
118
|
it "should extract from lone name" do
|
119
|
-
@extractor.extract_reply_screen_name("@alice").
|
119
|
+
expect(@extractor.extract_reply_screen_name("@alice")).to be == "alice"
|
120
120
|
end
|
121
121
|
|
122
122
|
it "should extract from the start" do
|
123
|
-
@extractor.extract_reply_screen_name("@alice reply text").
|
123
|
+
expect(@extractor.extract_reply_screen_name("@alice reply text")).to be == "alice"
|
124
124
|
end
|
125
125
|
|
126
126
|
it "should extract preceded by a space" do
|
127
|
-
@extractor.extract_reply_screen_name(" @alice reply text").
|
127
|
+
expect(@extractor.extract_reply_screen_name(" @alice reply text")).to be == "alice"
|
128
128
|
end
|
129
129
|
|
130
130
|
it "should extract preceded by a full-width space" do
|
131
|
-
@extractor.extract_reply_screen_name("#{[0x3000].pack('U')}@alice reply text").
|
131
|
+
expect(@extractor.extract_reply_screen_name("#{[0x3000].pack('U')}@alice reply text")).to be == "alice"
|
132
132
|
end
|
133
133
|
end
|
134
134
|
|
135
135
|
context "should not be extracted from" do
|
136
136
|
it "should not be extracted when preceded by text" do
|
137
|
-
@extractor.extract_reply_screen_name("reply @alice text").
|
137
|
+
expect(@extractor.extract_reply_screen_name("reply @alice text")).to be == nil
|
138
138
|
end
|
139
139
|
|
140
140
|
it "should not be extracted when preceded by puctuation" do
|
141
141
|
%w(. / _ - + # ! @).each do |punct|
|
142
|
-
@extractor.extract_reply_screen_name("#{punct}@alice text").
|
142
|
+
expect(@extractor.extract_reply_screen_name("#{punct}@alice text")).to be == nil
|
143
143
|
end
|
144
144
|
end
|
145
145
|
end
|
@@ -147,7 +147,7 @@ describe Twitter::Extractor do
|
|
147
147
|
context "should accept a block arugment" do
|
148
148
|
it "should call the block on match" do
|
149
149
|
@extractor.extract_reply_screen_name("@alice") do |sn|
|
150
|
-
sn.
|
150
|
+
expect(sn).to be == "alice"
|
151
151
|
end
|
152
152
|
end
|
153
153
|
|
@@ -156,7 +156,7 @@ describe Twitter::Extractor do
|
|
156
156
|
@extractor.extract_reply_screen_name("not a reply") do |sn|
|
157
157
|
calls += 1
|
158
158
|
end
|
159
|
-
calls.
|
159
|
+
expect(calls).to be == 0
|
160
160
|
end
|
161
161
|
end
|
162
162
|
end
|
@@ -165,19 +165,21 @@ describe Twitter::Extractor do
|
|
165
165
|
describe "matching URLS" do
|
166
166
|
TestUrls::VALID.each do |url|
|
167
167
|
it "should extract the URL #{url} and prefix it with a protocol if missing" do
|
168
|
-
@extractor.extract_urls(url).first.
|
168
|
+
expect(@extractor.extract_urls(url).first).to include(url)
|
169
169
|
end
|
170
170
|
|
171
171
|
it "should match the URL #{url} when it's embedded in other text" do
|
172
172
|
text = "Sweet url: #{url} I found. #awesome"
|
173
|
-
@extractor.extract_urls(text).first.
|
173
|
+
expect(@extractor.extract_urls(text).first).to include(url)
|
174
174
|
end
|
175
175
|
end
|
176
176
|
end
|
177
177
|
|
178
178
|
describe "invalid URLS" do
|
179
|
-
|
180
|
-
|
179
|
+
TestUrls::INVALID.each do |url|
|
180
|
+
it "does not extract URL from #{url}" do
|
181
|
+
expect(@extractor.extract_urls(url).first).to be nil
|
182
|
+
end
|
181
183
|
end
|
182
184
|
end
|
183
185
|
|
@@ -185,19 +187,19 @@ describe Twitter::Extractor do
|
|
185
187
|
TestUrls::TCO.each do |url|
|
186
188
|
it "should only extract the t.co URL from the URL #{url}" do
|
187
189
|
extracted_urls = @extractor.extract_urls(url)
|
188
|
-
extracted_urls.size.
|
190
|
+
expect(extracted_urls.size).to be == 1
|
189
191
|
extracted_url = extracted_urls.first
|
190
|
-
extracted_url.
|
191
|
-
extracted_url.
|
192
|
+
expect(extracted_url).to_not be == url
|
193
|
+
expect(extracted_url).to be == url[0...20]
|
192
194
|
end
|
193
195
|
|
194
196
|
it "should match the t.co URL from the URL #{url} when it's embedded in other text" do
|
195
197
|
text = "Sweet url: #{url} I found. #awesome"
|
196
198
|
extracted_urls = @extractor.extract_urls(text)
|
197
|
-
extracted_urls.size.
|
199
|
+
expect(extracted_urls.size).to be == 1
|
198
200
|
extracted_url = extracted_urls.first
|
199
|
-
extracted_url.
|
200
|
-
extracted_url.
|
201
|
+
expect(extracted_url).to_not be == url
|
202
|
+
expect(extracted_url).to be == url[0...20]
|
201
203
|
end
|
202
204
|
end
|
203
205
|
end
|
@@ -208,32 +210,50 @@ describe Twitter::Extractor do
|
|
208
210
|
TestUrls::VALID.each do |url|
|
209
211
|
it "should extract the URL #{url} and prefix it with a protocol if missing" do
|
210
212
|
extracted_urls = @extractor.extract_urls_with_indices(url)
|
211
|
-
extracted_urls.size.
|
213
|
+
expect(extracted_urls.size).to be == 1
|
212
214
|
extracted_url = extracted_urls.first
|
213
|
-
extracted_url[:url].
|
214
|
-
extracted_url[:indices].first.
|
215
|
-
extracted_url[:indices].last.
|
215
|
+
expect(extracted_url[:url]).to include(url)
|
216
|
+
expect(extracted_url[:indices].first).to be == 0
|
217
|
+
expect(extracted_url[:indices].last).to be == url.chars.to_a.size
|
216
218
|
end
|
217
219
|
|
218
220
|
it "should match the URL #{url} when it's embedded in other text" do
|
219
221
|
text = "Sweet url: #{url} I found. #awesome"
|
220
222
|
extracted_urls = @extractor.extract_urls_with_indices(text)
|
221
|
-
extracted_urls.size.
|
223
|
+
expect(extracted_urls.size).to be == 1
|
222
224
|
extracted_url = extracted_urls.first
|
223
|
-
extracted_url[:url].
|
224
|
-
extracted_url[:indices].first.
|
225
|
-
extracted_url[:indices].last.
|
225
|
+
expect(extracted_url[:url]).to include(url)
|
226
|
+
expect(extracted_url[:indices].first).to be == 11
|
227
|
+
expect(extracted_url[:indices].last).to be == 11 + url.chars.to_a.size
|
226
228
|
end
|
227
229
|
end
|
228
230
|
|
229
231
|
it "should extract URL in text with supplementary character" do
|
230
|
-
@extractor.extract_urls_with_indices("#{[0x10400].pack('U')} http://twitter.com").
|
232
|
+
expect(@extractor.extract_urls_with_indices("#{[0x10400].pack('U')} http://twitter.com")).to be == [{:url => "http://twitter.com", :indices => [2, 20]}]
|
231
233
|
end
|
232
234
|
end
|
233
235
|
|
234
236
|
describe "invalid URLS" do
|
235
237
|
it "does not link urls with invalid domains" do
|
236
|
-
@extractor.extract_urls_with_indices("http://tld-too-short.x").
|
238
|
+
expect(@extractor.extract_urls_with_indices("http://tld-too-short.x")).to be == []
|
239
|
+
end
|
240
|
+
|
241
|
+
it "does not consider a long URL with protocol to be valid" do
|
242
|
+
# maximum length of domain label is 32 chars.
|
243
|
+
url = ("a" * 31) + "."
|
244
|
+
url *= (Twitter::Extractor::MAX_URL_LENGTH / 32)
|
245
|
+
url = "https://" + url + "com" # longer than 4096 (MAX_URL_LENGTH) chars
|
246
|
+
expect(@extractor.is_valid_domain(url.length, url, true)).to be false
|
247
|
+
end
|
248
|
+
|
249
|
+
it "does not consider a long URL without protocol to be valid" do
|
250
|
+
# maximum length of domain label is 32 chars.
|
251
|
+
url = ("a" * 31) + "."
|
252
|
+
url *= ((Twitter::Extractor::MAX_URL_LENGTH / 32) - 1)
|
253
|
+
url = url + "com" # shorter than 4096 (MAX_URL_LENGTH) chars
|
254
|
+
expect(@extractor.is_valid_domain(url.length, url, false)).to be true
|
255
|
+
url = ("a" * (31 - "https://".length)) + "." + url
|
256
|
+
expect(@extractor.is_valid_domain(url.length, url, false)).to be false
|
237
257
|
end
|
238
258
|
end
|
239
259
|
|
@@ -241,23 +261,23 @@ describe Twitter::Extractor do
|
|
241
261
|
TestUrls::TCO.each do |url|
|
242
262
|
it "should only extract the t.co URL from the URL #{url} and adjust indices correctly" do
|
243
263
|
extracted_urls = @extractor.extract_urls_with_indices(url)
|
244
|
-
extracted_urls.size.
|
264
|
+
expect(extracted_urls.size).to be == 1
|
245
265
|
extracted_url = extracted_urls.first
|
246
|
-
extracted_url[:url].
|
247
|
-
extracted_url[:url].
|
248
|
-
extracted_url[:indices].first.
|
249
|
-
extracted_url[:indices].last.
|
266
|
+
expect(extracted_url[:url]).to_not include(url)
|
267
|
+
expect(extracted_url[:url]).to include(url[0...20])
|
268
|
+
expect(extracted_url[:indices].first).to be == 0
|
269
|
+
expect(extracted_url[:indices].last).to be == 20
|
250
270
|
end
|
251
271
|
|
252
272
|
it "should match the t.co URL from the URL #{url} when it's embedded in other text" do
|
253
273
|
text = "Sweet url: #{url} I found. #awesome"
|
254
274
|
extracted_urls = @extractor.extract_urls_with_indices(text)
|
255
|
-
extracted_urls.size.
|
275
|
+
expect(extracted_urls.size).to be == 1
|
256
276
|
extracted_url = extracted_urls.first
|
257
|
-
extracted_url[:url].
|
258
|
-
extracted_url[:url].
|
259
|
-
extracted_url[:indices].first.
|
260
|
-
extracted_url[:indices].last.
|
277
|
+
expect(extracted_url[:url]).to_not include(url)
|
278
|
+
expect(extracted_url[:url]).to include(url[0...20])
|
279
|
+
expect(extracted_url[:indices].first).to be == 11
|
280
|
+
expect(extracted_url[:indices].last).to be == 31
|
261
281
|
end
|
262
282
|
end
|
263
283
|
end
|
@@ -267,11 +287,11 @@ describe Twitter::Extractor do
|
|
267
287
|
context "extracts latin/numeric hashtags" do
|
268
288
|
%w(text text123 123text).each do |hashtag|
|
269
289
|
it "should extract ##{hashtag}" do
|
270
|
-
@extractor.extract_hashtags("##{hashtag}").
|
290
|
+
expect(@extractor.extract_hashtags("##{hashtag}")).to be == [hashtag]
|
271
291
|
end
|
272
292
|
|
273
293
|
it "should extract ##{hashtag} within text" do
|
274
|
-
@extractor.extract_hashtags("pre-text ##{hashtag} post-text").
|
294
|
+
expect(@extractor.extract_hashtags("pre-text ##{hashtag} post-text")).to be == [hashtag]
|
275
295
|
end
|
276
296
|
end
|
277
297
|
end
|
@@ -280,47 +300,47 @@ describe Twitter::Extractor do
|
|
280
300
|
context "should allow accents" do
|
281
301
|
%w(mañana café münchen).each do |hashtag|
|
282
302
|
it "should extract ##{hashtag}" do
|
283
|
-
@extractor.extract_hashtags("##{hashtag}").
|
303
|
+
expect(@extractor.extract_hashtags("##{hashtag}")).to be == [hashtag]
|
284
304
|
end
|
285
305
|
|
286
306
|
it "should extract ##{hashtag} within text" do
|
287
|
-
@extractor.extract_hashtags("pre-text ##{hashtag} post-text").
|
307
|
+
expect(@extractor.extract_hashtags("pre-text ##{hashtag} post-text")).to be == [hashtag]
|
288
308
|
end
|
289
309
|
end
|
290
310
|
|
291
311
|
it "should not allow the multiplication character" do
|
292
|
-
@extractor.extract_hashtags("#pre#{Twitter::Unicode::U00D7}post").
|
312
|
+
expect(@extractor.extract_hashtags("#pre#{Twitter::Unicode::U00D7}post")).to be == ["pre"]
|
293
313
|
end
|
294
314
|
|
295
315
|
it "should not allow the division character" do
|
296
|
-
@extractor.extract_hashtags("#pre#{Twitter::Unicode::U00F7}post").
|
316
|
+
expect(@extractor.extract_hashtags("#pre#{Twitter::Unicode::U00F7}post")).to be == ["pre"]
|
297
317
|
end
|
298
318
|
end
|
299
319
|
|
300
320
|
end
|
301
321
|
|
302
322
|
it "should not extract numeric hashtags" do
|
303
|
-
@extractor.extract_hashtags("#1234").
|
323
|
+
expect(@extractor.extract_hashtags("#1234")).to be == []
|
304
324
|
end
|
305
325
|
|
306
326
|
it "should extract hashtag followed by punctuations" do
|
307
|
-
@extractor.extract_hashtags("#test1: #test2; #test3\"").
|
327
|
+
expect(@extractor.extract_hashtags("#test1: #test2; #test3\"")).to be == ["test1", "test2" ,"test3"]
|
308
328
|
end
|
309
329
|
end
|
310
330
|
|
311
331
|
describe "hashtags with indices" do
|
312
332
|
def match_hashtag_in_text(hashtag, text, offset = 0)
|
313
333
|
extracted_hashtags = @extractor.extract_hashtags_with_indices(text)
|
314
|
-
extracted_hashtags.size.
|
334
|
+
expect(extracted_hashtags.size).to be == 1
|
315
335
|
extracted_hashtag = extracted_hashtags.first
|
316
|
-
extracted_hashtag[:hashtag].
|
317
|
-
extracted_hashtag[:indices].first.
|
318
|
-
extracted_hashtag[:indices].last.
|
336
|
+
expect(extracted_hashtag[:hashtag]).to be == hashtag
|
337
|
+
expect(extracted_hashtag[:indices].first).to be == offset
|
338
|
+
expect(extracted_hashtag[:indices].last).to be == offset + hashtag.chars.to_a.size + 1
|
319
339
|
end
|
320
340
|
|
321
341
|
def not_match_hashtag_in_text(text)
|
322
342
|
extracted_hashtags = @extractor.extract_hashtags_with_indices(text)
|
323
|
-
extracted_hashtags.size.
|
343
|
+
expect(extracted_hashtags.size).to be == 0
|
324
344
|
end
|
325
345
|
|
326
346
|
context "extracts latin/numeric hashtags" do
|