twitter-text 1.14.7 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.rspec +1 -1
- data/README.md +104 -33
- data/lib/assets/tld_lib.yml +1 -0
- data/lib/twitter-text.rb +2 -0
- data/lib/twitter-text/autolink.rb +4 -4
- data/lib/twitter-text/configuration.rb +53 -0
- data/lib/twitter-text/deprecation.rb +1 -1
- data/lib/twitter-text/extractor.rb +31 -1
- data/lib/twitter-text/regex.rb +13 -13
- data/lib/twitter-text/validation.rb +155 -43
- data/lib/twitter-text/weighted_range.rb +18 -0
- data/spec/autolinking_spec.rb +161 -161
- data/spec/configuration_spec.rb +91 -0
- data/spec/extractor_spec.rb +92 -72
- data/spec/hithighlighter_spec.rb +15 -15
- data/spec/regex_spec.rb +7 -7
- data/spec/rewriter_spec.rb +110 -109
- data/spec/spec_helper.rb +13 -15
- data/spec/test_urls.rb +6 -4
- data/spec/twitter_text_spec.rb +2 -2
- data/spec/unicode_spec.rb +10 -10
- data/spec/validation_spec.rb +35 -11
- data/test/conformance_test.rb +14 -0
- data/twitter-text.gemspec +11 -9
- metadata +53 -32
- data/lib/assets/tld_lib.yml +0 -1565
@@ -0,0 +1,91 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require File.dirname(__FILE__) + '/spec_helper'
|
3
|
+
|
4
|
+
describe Twitter::Configuration do
|
5
|
+
context "configuration" do
|
6
|
+
context "with invalid data" do
|
7
|
+
it "should raise an exception" do
|
8
|
+
invalid_hash = Twitter::Configuration.parse_string("{\"version\":2,\"maxWeightedTweetLength\":280,\"scale\":100,\"defaultWeight\":200,\"transformedURLLength\":23,\"ranges\":[{\"start\":0,\"end\":true,\"weight\":false},{\"start\":8192,\"end\":8205,\"weight\":100},{\"start\":8208,\"end\":8223,\"weight\":100},{\"start\":8242,\"end\":8247,\"weight\":100}]}")
|
9
|
+
expect { Twitter::Configuration.new(invalid_hash) }.to raise_error(ArgumentError)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
context "with defaults" do
|
14
|
+
before do
|
15
|
+
Twitter::Configuration.default_configuration = Twitter::Configuration.configuration_from_file(Twitter::Configuration::CONFIG_V2)
|
16
|
+
end
|
17
|
+
|
18
|
+
it "should define version constants" do
|
19
|
+
expect(Twitter::Configuration.const_defined?(:CONFIG_V1)).to be true
|
20
|
+
expect(Twitter::Configuration.const_defined?(:CONFIG_V2)).to be true
|
21
|
+
end
|
22
|
+
|
23
|
+
it "should define a default configuration" do
|
24
|
+
expect(Twitter::Configuration.default_configuration).to_not be_nil
|
25
|
+
expect(Twitter::Configuration.default_configuration.version).to eq(2)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
context "with v1 configuration" do
|
30
|
+
before do
|
31
|
+
@config = Twitter::Configuration.configuration_from_file(Twitter::Configuration::CONFIG_V1)
|
32
|
+
end
|
33
|
+
|
34
|
+
it "should have a version" do
|
35
|
+
expect(@config.version).to eq(1)
|
36
|
+
end
|
37
|
+
|
38
|
+
it "should have a max_weighted_tweet_length" do
|
39
|
+
expect(@config.max_weighted_tweet_length).to eq(140)
|
40
|
+
end
|
41
|
+
|
42
|
+
it "should have a scale" do
|
43
|
+
expect(@config.scale).to eq(1)
|
44
|
+
end
|
45
|
+
|
46
|
+
it "should have a default_weight" do
|
47
|
+
expect(@config.default_weight).to eq(1)
|
48
|
+
end
|
49
|
+
|
50
|
+
it "should have a transformed_url_length" do
|
51
|
+
expect(@config.transformed_url_length).to eq(23)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
context "with v2 configuration" do
|
56
|
+
before do
|
57
|
+
@config = Twitter::Configuration.configuration_from_file(Twitter::Configuration::CONFIG_V2)
|
58
|
+
end
|
59
|
+
|
60
|
+
it "should have a version" do
|
61
|
+
expect(@config.version).to eq(2)
|
62
|
+
end
|
63
|
+
|
64
|
+
it "should have a max_weighted_tweet_length" do
|
65
|
+
expect(@config.max_weighted_tweet_length).to eq(280)
|
66
|
+
end
|
67
|
+
|
68
|
+
it "should have a scale" do
|
69
|
+
expect(@config.scale).to eq(100)
|
70
|
+
end
|
71
|
+
|
72
|
+
it "should have a default_weight" do
|
73
|
+
expect(@config.default_weight).to eq(200)
|
74
|
+
end
|
75
|
+
|
76
|
+
it "should have a transformed_url_length" do
|
77
|
+
expect(@config.transformed_url_length).to eq(23)
|
78
|
+
end
|
79
|
+
|
80
|
+
it "should have a configured range" do
|
81
|
+
expect(@config.ranges).to be_kind_of(Array)
|
82
|
+
expect(@config.ranges.count).to be > 0
|
83
|
+
expect(@config.ranges[0]).to be_kind_of(Twitter::WeightedRange)
|
84
|
+
weighted_range = @config.ranges[0]
|
85
|
+
expect(weighted_range.start).to be_kind_of(Integer)
|
86
|
+
expect(weighted_range.end).to be_kind_of(Integer)
|
87
|
+
expect(weighted_range.weight).to be_kind_of(Integer)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
data/spec/extractor_spec.rb
CHANGED
@@ -13,37 +13,37 @@ describe Twitter::Extractor do
|
|
13
13
|
describe "mentions" do
|
14
14
|
context "single screen name alone " do
|
15
15
|
it "should be linked" do
|
16
|
-
@extractor.extract_mentioned_screen_names("@alice").
|
16
|
+
expect(@extractor.extract_mentioned_screen_names("@alice")).to be == ["alice"]
|
17
17
|
end
|
18
18
|
|
19
19
|
it "should be linked with _" do
|
20
|
-
@extractor.extract_mentioned_screen_names("@alice_adams").
|
20
|
+
expect(@extractor.extract_mentioned_screen_names("@alice_adams")).to be == ["alice_adams"]
|
21
21
|
end
|
22
22
|
|
23
23
|
it "should be linked if numeric" do
|
24
|
-
@extractor.extract_mentioned_screen_names("@1234").
|
24
|
+
expect(@extractor.extract_mentioned_screen_names("@1234")).to be == ["1234"]
|
25
25
|
end
|
26
26
|
end
|
27
27
|
|
28
28
|
context "multiple screen names" do
|
29
29
|
it "should both be linked" do
|
30
|
-
@extractor.extract_mentioned_screen_names("@alice @bob").
|
30
|
+
expect(@extractor.extract_mentioned_screen_names("@alice @bob")).to be == ["alice", "bob"]
|
31
31
|
end
|
32
32
|
end
|
33
33
|
|
34
34
|
context "screen names embedded in text" do
|
35
35
|
it "should be linked in Latin text" do
|
36
|
-
@extractor.extract_mentioned_screen_names("waiting for @alice to arrive").
|
36
|
+
expect(@extractor.extract_mentioned_screen_names("waiting for @alice to arrive")).to be == ["alice"]
|
37
37
|
end
|
38
38
|
|
39
39
|
it "should be linked in Japanese text" do
|
40
|
-
@extractor.extract_mentioned_screen_names("の@aliceに到着を待っている").
|
40
|
+
expect(@extractor.extract_mentioned_screen_names("の@aliceに到着を待っている")).to be == ["alice"]
|
41
41
|
end
|
42
42
|
|
43
43
|
it "should ignore mentions preceded by !, @, #, $, %, & or *" do
|
44
44
|
invalid_chars = ['!', '@', '#', '$', '%', '&', '*']
|
45
45
|
invalid_chars.each do |c|
|
46
|
-
@extractor.extract_mentioned_screen_names("f#{c}@kn").
|
46
|
+
expect(@extractor.extract_mentioned_screen_names("f#{c}@kn")).to be == []
|
47
47
|
end
|
48
48
|
end
|
49
49
|
end
|
@@ -51,36 +51,36 @@ describe Twitter::Extractor do
|
|
51
51
|
it "should accept a block arugment and call it in order" do
|
52
52
|
needed = ["alice", "bob"]
|
53
53
|
@extractor.extract_mentioned_screen_names("@alice @bob") do |sn|
|
54
|
-
sn.
|
54
|
+
expect(sn).to be == needed.shift
|
55
55
|
end
|
56
|
-
needed.
|
56
|
+
expect(needed).to be == []
|
57
57
|
end
|
58
58
|
end
|
59
59
|
|
60
60
|
describe "mentions with indices" do
|
61
61
|
context "single screen name alone " do
|
62
62
|
it "should be linked and the correct indices" do
|
63
|
-
@extractor.extract_mentioned_screen_names_with_indices("@alice").
|
63
|
+
expect(@extractor.extract_mentioned_screen_names_with_indices("@alice")).to be == [{:screen_name => "alice", :indices => [0, 6]}]
|
64
64
|
end
|
65
65
|
|
66
66
|
it "should be linked with _ and the correct indices" do
|
67
|
-
@extractor.extract_mentioned_screen_names_with_indices("@alice_adams").
|
67
|
+
expect(@extractor.extract_mentioned_screen_names_with_indices("@alice_adams")).to be == [{:screen_name => "alice_adams", :indices => [0, 12]}]
|
68
68
|
end
|
69
69
|
|
70
70
|
it "should be linked if numeric and the correct indices" do
|
71
|
-
@extractor.extract_mentioned_screen_names_with_indices("@1234").
|
71
|
+
expect(@extractor.extract_mentioned_screen_names_with_indices("@1234")).to be == [{:screen_name => "1234", :indices => [0, 5]}]
|
72
72
|
end
|
73
73
|
end
|
74
74
|
|
75
75
|
context "multiple screen names" do
|
76
76
|
it "should both be linked with the correct indices" do
|
77
|
-
@extractor.extract_mentioned_screen_names_with_indices("@alice @bob").
|
77
|
+
expect(@extractor.extract_mentioned_screen_names_with_indices("@alice @bob")).to be ==
|
78
78
|
[{:screen_name => "alice", :indices => [0, 6]},
|
79
79
|
{:screen_name => "bob", :indices => [7, 11]}]
|
80
80
|
end
|
81
81
|
|
82
82
|
it "should be linked with the correct indices even when repeated" do
|
83
|
-
@extractor.extract_mentioned_screen_names_with_indices("@alice @alice @bob").
|
83
|
+
expect(@extractor.extract_mentioned_screen_names_with_indices("@alice @alice @bob")).to be ==
|
84
84
|
[{:screen_name => "alice", :indices => [0, 6]},
|
85
85
|
{:screen_name => "alice", :indices => [7, 13]},
|
86
86
|
{:screen_name => "bob", :indices => [14, 18]}]
|
@@ -89,11 +89,11 @@ describe Twitter::Extractor do
|
|
89
89
|
|
90
90
|
context "screen names embedded in text" do
|
91
91
|
it "should be linked in Latin text with the correct indices" do
|
92
|
-
@extractor.extract_mentioned_screen_names_with_indices("waiting for @alice to arrive").
|
92
|
+
expect(@extractor.extract_mentioned_screen_names_with_indices("waiting for @alice to arrive")).to be == [{:screen_name => "alice", :indices => [12, 18]}]
|
93
93
|
end
|
94
94
|
|
95
95
|
it "should be linked in Japanese text with the correct indices" do
|
96
|
-
@extractor.extract_mentioned_screen_names_with_indices("の@aliceに到着を待っている").
|
96
|
+
expect(@extractor.extract_mentioned_screen_names_with_indices("の@aliceに到着を待っている")).to be == [{:screen_name => "alice", :indices => [1, 7]}]
|
97
97
|
end
|
98
98
|
end
|
99
99
|
|
@@ -101,45 +101,45 @@ describe Twitter::Extractor do
|
|
101
101
|
needed = [{:screen_name => "alice", :indices => [0, 6]}, {:screen_name => "bob", :indices => [7, 11]}]
|
102
102
|
@extractor.extract_mentioned_screen_names_with_indices("@alice @bob") do |sn, start_index, end_index|
|
103
103
|
data = needed.shift
|
104
|
-
sn.
|
105
|
-
start_index.
|
106
|
-
end_index.
|
104
|
+
expect(sn).to be == data[:screen_name]
|
105
|
+
expect(start_index).to be == data[:indices].first
|
106
|
+
expect(end_index).to be == data[:indices].last
|
107
107
|
end
|
108
|
-
needed.
|
108
|
+
expect(needed).to be == []
|
109
109
|
end
|
110
110
|
|
111
111
|
it "should extract screen name in text with supplementary character" do
|
112
|
-
@extractor.extract_mentioned_screen_names_with_indices("#{[0x10400].pack('U')} @alice").
|
112
|
+
expect(@extractor.extract_mentioned_screen_names_with_indices("#{[0x10400].pack('U')} @alice")).to be == [{:screen_name => "alice", :indices => [2, 8]}]
|
113
113
|
end
|
114
114
|
end
|
115
115
|
|
116
116
|
describe "replies" do
|
117
117
|
context "should be extracted from" do
|
118
118
|
it "should extract from lone name" do
|
119
|
-
@extractor.extract_reply_screen_name("@alice").
|
119
|
+
expect(@extractor.extract_reply_screen_name("@alice")).to be == "alice"
|
120
120
|
end
|
121
121
|
|
122
122
|
it "should extract from the start" do
|
123
|
-
@extractor.extract_reply_screen_name("@alice reply text").
|
123
|
+
expect(@extractor.extract_reply_screen_name("@alice reply text")).to be == "alice"
|
124
124
|
end
|
125
125
|
|
126
126
|
it "should extract preceded by a space" do
|
127
|
-
@extractor.extract_reply_screen_name(" @alice reply text").
|
127
|
+
expect(@extractor.extract_reply_screen_name(" @alice reply text")).to be == "alice"
|
128
128
|
end
|
129
129
|
|
130
130
|
it "should extract preceded by a full-width space" do
|
131
|
-
@extractor.extract_reply_screen_name("#{[0x3000].pack('U')}@alice reply text").
|
131
|
+
expect(@extractor.extract_reply_screen_name("#{[0x3000].pack('U')}@alice reply text")).to be == "alice"
|
132
132
|
end
|
133
133
|
end
|
134
134
|
|
135
135
|
context "should not be extracted from" do
|
136
136
|
it "should not be extracted when preceded by text" do
|
137
|
-
@extractor.extract_reply_screen_name("reply @alice text").
|
137
|
+
expect(@extractor.extract_reply_screen_name("reply @alice text")).to be == nil
|
138
138
|
end
|
139
139
|
|
140
140
|
it "should not be extracted when preceded by puctuation" do
|
141
141
|
%w(. / _ - + # ! @).each do |punct|
|
142
|
-
@extractor.extract_reply_screen_name("#{punct}@alice text").
|
142
|
+
expect(@extractor.extract_reply_screen_name("#{punct}@alice text")).to be == nil
|
143
143
|
end
|
144
144
|
end
|
145
145
|
end
|
@@ -147,7 +147,7 @@ describe Twitter::Extractor do
|
|
147
147
|
context "should accept a block arugment" do
|
148
148
|
it "should call the block on match" do
|
149
149
|
@extractor.extract_reply_screen_name("@alice") do |sn|
|
150
|
-
sn.
|
150
|
+
expect(sn).to be == "alice"
|
151
151
|
end
|
152
152
|
end
|
153
153
|
|
@@ -156,7 +156,7 @@ describe Twitter::Extractor do
|
|
156
156
|
@extractor.extract_reply_screen_name("not a reply") do |sn|
|
157
157
|
calls += 1
|
158
158
|
end
|
159
|
-
calls.
|
159
|
+
expect(calls).to be == 0
|
160
160
|
end
|
161
161
|
end
|
162
162
|
end
|
@@ -165,19 +165,21 @@ describe Twitter::Extractor do
|
|
165
165
|
describe "matching URLS" do
|
166
166
|
TestUrls::VALID.each do |url|
|
167
167
|
it "should extract the URL #{url} and prefix it with a protocol if missing" do
|
168
|
-
@extractor.extract_urls(url).first.
|
168
|
+
expect(@extractor.extract_urls(url).first).to include(url)
|
169
169
|
end
|
170
170
|
|
171
171
|
it "should match the URL #{url} when it's embedded in other text" do
|
172
172
|
text = "Sweet url: #{url} I found. #awesome"
|
173
|
-
@extractor.extract_urls(text).first.
|
173
|
+
expect(@extractor.extract_urls(text).first).to include(url)
|
174
174
|
end
|
175
175
|
end
|
176
176
|
end
|
177
177
|
|
178
178
|
describe "invalid URLS" do
|
179
|
-
|
180
|
-
|
179
|
+
TestUrls::INVALID.each do |url|
|
180
|
+
it "does not extract URL from #{url}" do
|
181
|
+
expect(@extractor.extract_urls(url).first).to be nil
|
182
|
+
end
|
181
183
|
end
|
182
184
|
end
|
183
185
|
|
@@ -185,19 +187,19 @@ describe Twitter::Extractor do
|
|
185
187
|
TestUrls::TCO.each do |url|
|
186
188
|
it "should only extract the t.co URL from the URL #{url}" do
|
187
189
|
extracted_urls = @extractor.extract_urls(url)
|
188
|
-
extracted_urls.size.
|
190
|
+
expect(extracted_urls.size).to be == 1
|
189
191
|
extracted_url = extracted_urls.first
|
190
|
-
extracted_url.
|
191
|
-
extracted_url.
|
192
|
+
expect(extracted_url).to_not be == url
|
193
|
+
expect(extracted_url).to be == url[0...20]
|
192
194
|
end
|
193
195
|
|
194
196
|
it "should match the t.co URL from the URL #{url} when it's embedded in other text" do
|
195
197
|
text = "Sweet url: #{url} I found. #awesome"
|
196
198
|
extracted_urls = @extractor.extract_urls(text)
|
197
|
-
extracted_urls.size.
|
199
|
+
expect(extracted_urls.size).to be == 1
|
198
200
|
extracted_url = extracted_urls.first
|
199
|
-
extracted_url.
|
200
|
-
extracted_url.
|
201
|
+
expect(extracted_url).to_not be == url
|
202
|
+
expect(extracted_url).to be == url[0...20]
|
201
203
|
end
|
202
204
|
end
|
203
205
|
end
|
@@ -208,32 +210,50 @@ describe Twitter::Extractor do
|
|
208
210
|
TestUrls::VALID.each do |url|
|
209
211
|
it "should extract the URL #{url} and prefix it with a protocol if missing" do
|
210
212
|
extracted_urls = @extractor.extract_urls_with_indices(url)
|
211
|
-
extracted_urls.size.
|
213
|
+
expect(extracted_urls.size).to be == 1
|
212
214
|
extracted_url = extracted_urls.first
|
213
|
-
extracted_url[:url].
|
214
|
-
extracted_url[:indices].first.
|
215
|
-
extracted_url[:indices].last.
|
215
|
+
expect(extracted_url[:url]).to include(url)
|
216
|
+
expect(extracted_url[:indices].first).to be == 0
|
217
|
+
expect(extracted_url[:indices].last).to be == url.chars.to_a.size
|
216
218
|
end
|
217
219
|
|
218
220
|
it "should match the URL #{url} when it's embedded in other text" do
|
219
221
|
text = "Sweet url: #{url} I found. #awesome"
|
220
222
|
extracted_urls = @extractor.extract_urls_with_indices(text)
|
221
|
-
extracted_urls.size.
|
223
|
+
expect(extracted_urls.size).to be == 1
|
222
224
|
extracted_url = extracted_urls.first
|
223
|
-
extracted_url[:url].
|
224
|
-
extracted_url[:indices].first.
|
225
|
-
extracted_url[:indices].last.
|
225
|
+
expect(extracted_url[:url]).to include(url)
|
226
|
+
expect(extracted_url[:indices].first).to be == 11
|
227
|
+
expect(extracted_url[:indices].last).to be == 11 + url.chars.to_a.size
|
226
228
|
end
|
227
229
|
end
|
228
230
|
|
229
231
|
it "should extract URL in text with supplementary character" do
|
230
|
-
@extractor.extract_urls_with_indices("#{[0x10400].pack('U')} http://twitter.com").
|
232
|
+
expect(@extractor.extract_urls_with_indices("#{[0x10400].pack('U')} http://twitter.com")).to be == [{:url => "http://twitter.com", :indices => [2, 20]}]
|
231
233
|
end
|
232
234
|
end
|
233
235
|
|
234
236
|
describe "invalid URLS" do
|
235
237
|
it "does not link urls with invalid domains" do
|
236
|
-
@extractor.extract_urls_with_indices("http://tld-too-short.x").
|
238
|
+
expect(@extractor.extract_urls_with_indices("http://tld-too-short.x")).to be == []
|
239
|
+
end
|
240
|
+
|
241
|
+
it "does not consider a long URL with protocol to be valid" do
|
242
|
+
# maximum length of domain label is 32 chars.
|
243
|
+
url = ("a" * 31) + "."
|
244
|
+
url *= (Twitter::Extractor::MAX_URL_LENGTH / 32)
|
245
|
+
url = "https://" + url + "com" # longer than 4096 (MAX_URL_LENGTH) chars
|
246
|
+
expect(@extractor.is_valid_domain(url.length, url, true)).to be false
|
247
|
+
end
|
248
|
+
|
249
|
+
it "does not consider a long URL without protocol to be valid" do
|
250
|
+
# maximum length of domain label is 32 chars.
|
251
|
+
url = ("a" * 31) + "."
|
252
|
+
url *= ((Twitter::Extractor::MAX_URL_LENGTH / 32) - 1)
|
253
|
+
url = url + "com" # shorter than 4096 (MAX_URL_LENGTH) chars
|
254
|
+
expect(@extractor.is_valid_domain(url.length, url, false)).to be true
|
255
|
+
url = ("a" * (31 - "https://".length)) + "." + url
|
256
|
+
expect(@extractor.is_valid_domain(url.length, url, false)).to be false
|
237
257
|
end
|
238
258
|
end
|
239
259
|
|
@@ -241,23 +261,23 @@ describe Twitter::Extractor do
|
|
241
261
|
TestUrls::TCO.each do |url|
|
242
262
|
it "should only extract the t.co URL from the URL #{url} and adjust indices correctly" do
|
243
263
|
extracted_urls = @extractor.extract_urls_with_indices(url)
|
244
|
-
extracted_urls.size.
|
264
|
+
expect(extracted_urls.size).to be == 1
|
245
265
|
extracted_url = extracted_urls.first
|
246
|
-
extracted_url[:url].
|
247
|
-
extracted_url[:url].
|
248
|
-
extracted_url[:indices].first.
|
249
|
-
extracted_url[:indices].last.
|
266
|
+
expect(extracted_url[:url]).to_not include(url)
|
267
|
+
expect(extracted_url[:url]).to include(url[0...20])
|
268
|
+
expect(extracted_url[:indices].first).to be == 0
|
269
|
+
expect(extracted_url[:indices].last).to be == 20
|
250
270
|
end
|
251
271
|
|
252
272
|
it "should match the t.co URL from the URL #{url} when it's embedded in other text" do
|
253
273
|
text = "Sweet url: #{url} I found. #awesome"
|
254
274
|
extracted_urls = @extractor.extract_urls_with_indices(text)
|
255
|
-
extracted_urls.size.
|
275
|
+
expect(extracted_urls.size).to be == 1
|
256
276
|
extracted_url = extracted_urls.first
|
257
|
-
extracted_url[:url].
|
258
|
-
extracted_url[:url].
|
259
|
-
extracted_url[:indices].first.
|
260
|
-
extracted_url[:indices].last.
|
277
|
+
expect(extracted_url[:url]).to_not include(url)
|
278
|
+
expect(extracted_url[:url]).to include(url[0...20])
|
279
|
+
expect(extracted_url[:indices].first).to be == 11
|
280
|
+
expect(extracted_url[:indices].last).to be == 31
|
261
281
|
end
|
262
282
|
end
|
263
283
|
end
|
@@ -267,11 +287,11 @@ describe Twitter::Extractor do
|
|
267
287
|
context "extracts latin/numeric hashtags" do
|
268
288
|
%w(text text123 123text).each do |hashtag|
|
269
289
|
it "should extract ##{hashtag}" do
|
270
|
-
@extractor.extract_hashtags("##{hashtag}").
|
290
|
+
expect(@extractor.extract_hashtags("##{hashtag}")).to be == [hashtag]
|
271
291
|
end
|
272
292
|
|
273
293
|
it "should extract ##{hashtag} within text" do
|
274
|
-
@extractor.extract_hashtags("pre-text ##{hashtag} post-text").
|
294
|
+
expect(@extractor.extract_hashtags("pre-text ##{hashtag} post-text")).to be == [hashtag]
|
275
295
|
end
|
276
296
|
end
|
277
297
|
end
|
@@ -280,47 +300,47 @@ describe Twitter::Extractor do
|
|
280
300
|
context "should allow accents" do
|
281
301
|
%w(mañana café münchen).each do |hashtag|
|
282
302
|
it "should extract ##{hashtag}" do
|
283
|
-
@extractor.extract_hashtags("##{hashtag}").
|
303
|
+
expect(@extractor.extract_hashtags("##{hashtag}")).to be == [hashtag]
|
284
304
|
end
|
285
305
|
|
286
306
|
it "should extract ##{hashtag} within text" do
|
287
|
-
@extractor.extract_hashtags("pre-text ##{hashtag} post-text").
|
307
|
+
expect(@extractor.extract_hashtags("pre-text ##{hashtag} post-text")).to be == [hashtag]
|
288
308
|
end
|
289
309
|
end
|
290
310
|
|
291
311
|
it "should not allow the multiplication character" do
|
292
|
-
@extractor.extract_hashtags("#pre#{Twitter::Unicode::U00D7}post").
|
312
|
+
expect(@extractor.extract_hashtags("#pre#{Twitter::Unicode::U00D7}post")).to be == ["pre"]
|
293
313
|
end
|
294
314
|
|
295
315
|
it "should not allow the division character" do
|
296
|
-
@extractor.extract_hashtags("#pre#{Twitter::Unicode::U00F7}post").
|
316
|
+
expect(@extractor.extract_hashtags("#pre#{Twitter::Unicode::U00F7}post")).to be == ["pre"]
|
297
317
|
end
|
298
318
|
end
|
299
319
|
|
300
320
|
end
|
301
321
|
|
302
322
|
it "should not extract numeric hashtags" do
|
303
|
-
@extractor.extract_hashtags("#1234").
|
323
|
+
expect(@extractor.extract_hashtags("#1234")).to be == []
|
304
324
|
end
|
305
325
|
|
306
326
|
it "should extract hashtag followed by punctuations" do
|
307
|
-
@extractor.extract_hashtags("#test1: #test2; #test3\"").
|
327
|
+
expect(@extractor.extract_hashtags("#test1: #test2; #test3\"")).to be == ["test1", "test2" ,"test3"]
|
308
328
|
end
|
309
329
|
end
|
310
330
|
|
311
331
|
describe "hashtags with indices" do
|
312
332
|
def match_hashtag_in_text(hashtag, text, offset = 0)
|
313
333
|
extracted_hashtags = @extractor.extract_hashtags_with_indices(text)
|
314
|
-
extracted_hashtags.size.
|
334
|
+
expect(extracted_hashtags.size).to be == 1
|
315
335
|
extracted_hashtag = extracted_hashtags.first
|
316
|
-
extracted_hashtag[:hashtag].
|
317
|
-
extracted_hashtag[:indices].first.
|
318
|
-
extracted_hashtag[:indices].last.
|
336
|
+
expect(extracted_hashtag[:hashtag]).to be == hashtag
|
337
|
+
expect(extracted_hashtag[:indices].first).to be == offset
|
338
|
+
expect(extracted_hashtag[:indices].last).to be == offset + hashtag.chars.to_a.size + 1
|
319
339
|
end
|
320
340
|
|
321
341
|
def not_match_hashtag_in_text(text)
|
322
342
|
extracted_hashtags = @extractor.extract_hashtags_with_indices(text)
|
323
|
-
extracted_hashtags.size.
|
343
|
+
expect(extracted_hashtags.size).to be == 0
|
324
344
|
end
|
325
345
|
|
326
346
|
context "extracts latin/numeric hashtags" do
|