url_scrubber 0.7.20 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +13 -5
- data/lib/url_scrubber/version.rb +1 -1
- data/lib/url_scrubber.rb +53 -6
- data/spec/url_scrubber_spec.rb +205 -33
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,15 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
YjU1ZDhmYTczNWEzMDk2MjQ4M2U2NDkzMGNiZmQ1Y2U2YTMxZDk0MQ==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
NDE2YzlhOWFjMmU3NWFiNjIzNmE4OTJlYjQ3ZmYwMDExMTJjZGNmMw==
|
5
7
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
MDRlZTJlNzFiOTMwYWFkZWY4YzZiZWM5ZjAxMWVhMzMwZjNmYzg4ZmZkMjc2
|
10
|
+
NGRmZTc0Nzc0NGU4M2MzNTk2NGQ5YzhkODRhYTk1YTU3ZWE5YmY1MDMyMTQ4
|
11
|
+
ZjFiNGI4MTAyMmM0MzQ1Y2Y5NWMyMTYwMmQyZTQyNTE1ZTQ4ZDQ=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
MTQ4MDcxMDFlM2QzM2Q2ODAwMWUxOGRhMzg2OTBmMjhiNTEzNmNmYTY5ZDg4
|
14
|
+
NjM4YjhmOGNkMTJmNzQ1ZGQxMDI3ODgxNDc0NGNiN2QyNmExMTk5NzRjOGUy
|
15
|
+
NWMzOWRjNTMwM2YxZjFjYjUzYmMyYjU1YjVkNGNjOWFiMTRkOTE=
|
data/lib/url_scrubber/version.rb
CHANGED
data/lib/url_scrubber.rb
CHANGED
@@ -12,8 +12,8 @@ module UrlScrubber
|
|
12
12
|
return nil unless m
|
13
13
|
|
14
14
|
url = m[1]
|
15
|
-
url.sub!(/^https
|
16
|
-
url.sub!(/^htp
|
15
|
+
url.sub!(/^https/i, 'http')
|
16
|
+
url.sub!(/^htp/i, 'http')
|
17
17
|
url.sub!(/\/+$/, '')
|
18
18
|
url.sub!(/;+$/, '')
|
19
19
|
url.sub!('#!/', '')
|
@@ -23,6 +23,8 @@ module UrlScrubber
|
|
23
23
|
remove_html_tags!(url)
|
24
24
|
url = drop_anchor!(special_cases(url))
|
25
25
|
url.chomp(',')
|
26
|
+
url.gsub!(/\/+$/, '') # remove any trailing slashes (/) in the resulting URL
|
27
|
+
return url
|
26
28
|
end
|
27
29
|
|
28
30
|
def self.service_of(url)
|
@@ -159,7 +161,7 @@ module UrlScrubber
|
|
159
161
|
when :google then return sc_google_plus(url)
|
160
162
|
when :flickr then return sc_flickr(url)
|
161
163
|
when :pinterest then return sc_pinterest(url)
|
162
|
-
when :vimeo
|
164
|
+
when :vimeo then return sc_vimeo(url)
|
163
165
|
when :yelp then return sc_yelp(url)
|
164
166
|
end
|
165
167
|
|
@@ -167,7 +169,9 @@ module UrlScrubber
|
|
167
169
|
end
|
168
170
|
|
169
171
|
def self.remove_www!(url)
|
170
|
-
url.sub!(%r{://www\d*\.}, '://')
|
172
|
+
# url.sub!(%r{://www\d*\.}, '://')
|
173
|
+
url.sub!(%r{^https?://www?w?\d*\.}i, 'http://')
|
174
|
+
url.sub!(%r{^https?://m\d*\.}i, 'http://')
|
171
175
|
url
|
172
176
|
end
|
173
177
|
|
@@ -228,21 +232,64 @@ module UrlScrubber
|
|
228
232
|
end
|
229
233
|
|
230
234
|
def self.sc_facebook(url)
|
235
|
+
# puts "sc_facebook: #{url}"
|
236
|
+
regex1 = /^(?<url>(https?:\/\/)(www\.)?facebook\.com\/(pages\/)*(?<uname>.*)[\/-](?<uid>[0-9]+))($|\/|\/(about|timeline|info|app_)?)/i
|
237
|
+
regex2 = /^(?<url>(https?:\/\/)(www\.)?facebook\.com\/profile.php\?id=(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
|
238
|
+
regex2a = /^(?<url>(https?:\/\/)(www\.)?facebook\.com\/profile.php\?_rdr=p&id=(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
|
239
|
+
regex3 = /^(?<url>(https?:\/\/)(www\.)?facebook\.com\/(?<uname>[^\?\/]*))($|\/$|\/(about|timeline|info|app_.*)?)/i
|
240
|
+
regex4 = /^(?<url>(https?:\/\/)(www\.)?facebook\.com\/)(?<php>home.php\?([#!]+\/)*)(?<uname>.*)/i
|
241
|
+
|
231
242
|
if url.match("/media/albums") || url.match("/media/set")
|
243
|
+
# puts "media"
|
232
244
|
url = url.match('\&') ? url.split('&',2)[0] : url
|
233
245
|
elsif url.include?('facebook.com/groups/')
|
234
246
|
url = drop_url_query!(url)
|
247
|
+
elsif mdata = url.match(regex1)
|
248
|
+
# puts "regex1"
|
249
|
+
# "http://facebook.com/pages/Command-Canada/1434248516885065/timeline"
|
250
|
+
url = mdata[:url]
|
251
|
+
uname = mdata[:uname]
|
252
|
+
uid = mdata[:uid]
|
253
|
+
elsif mdata = url.match(regex2)
|
254
|
+
# puts "regex2"
|
255
|
+
# "https://www.facebook.com/profile.php?id=100009574328879"
|
256
|
+
url, http_response = check_for_redirection(mdata[:url])
|
257
|
+
uid = mdata[:uid]
|
258
|
+
elsif mdata = url.match(regex2a)
|
259
|
+
# puts "regex2a"
|
260
|
+
# "https://www.facebook.com/profile.php?_rdr=p&id=100009574328879"
|
261
|
+
url = "http://facebook.com/profile.php?id=" + mdata[:uid]
|
262
|
+
url, http_response = check_for_redirection(url)
|
263
|
+
uid = mdata[:uid]
|
264
|
+
elsif mdata = url.match(regex4)
|
265
|
+
# puts "#{url} - #{mdata[:uname]}"
|
266
|
+
# "http://facebook.com/home.php?#!/person.name"
|
267
|
+
url = mdata[:url] + mdata[:uname]
|
268
|
+
url = drop_url_query!(url)
|
269
|
+
elsif mdata = url.match(regex3)
|
270
|
+
# puts "regex3"
|
271
|
+
# "http://facebook.com/TonyMollHomeLoans/timeline"
|
272
|
+
if mdata[:uname] != "pages"
|
273
|
+
url = mdata[:url]
|
274
|
+
uname = mdata[:uname]
|
275
|
+
end
|
276
|
+
url = drop_url_query!(url)
|
235
277
|
elsif url.include?("facebook.com/profile.php?id=")
|
278
|
+
# puts "profile.php"
|
236
279
|
# these were being truncated, they do redirect, but typically a 301 response is generated
|
237
280
|
# so the url is returned unchanged. Better than truncation.
|
238
281
|
url, http_response = check_for_redirection(url)
|
239
282
|
else
|
240
|
-
|
283
|
+
# puts "else"
|
241
284
|
url = drop_url_query!(url)
|
242
285
|
end
|
286
|
+
|
287
|
+
# Due to the redirection check, "https" and "www." can be re-introduced
|
288
|
+
url = url.sub(%r{^https?://www.}i, 'http://')
|
289
|
+
url = url.sub(/\?_rdr.*/, '')
|
243
290
|
url
|
244
291
|
end
|
245
|
-
|
292
|
+
|
246
293
|
def self.sc_linkedin(url)
|
247
294
|
|
248
295
|
url.sub!('linkedin.com/companies/', 'linkedin.com/company/')
|
data/spec/url_scrubber_spec.rb
CHANGED
@@ -30,6 +30,7 @@ describe UrlScrubber do
|
|
30
30
|
UrlScrubber.scrub('http://www.example.com/page').should eq('http://example.com/page')
|
31
31
|
end
|
32
32
|
|
33
|
+
|
33
34
|
describe 'with youtube urls' do
|
34
35
|
it 'should drop /user/ if it exists' do
|
35
36
|
UrlScrubber.scrub('http://youtube.com/user/absolutely').should eq('http://youtube.com/absolutely')
|
@@ -44,6 +45,7 @@ describe UrlScrubber do
|
|
44
45
|
end
|
45
46
|
end
|
46
47
|
|
48
|
+
|
47
49
|
describe 'with twitter urls' do
|
48
50
|
it 'should drop @ from in front of username' do
|
49
51
|
UrlScrubber.scrub('http://twitter.com/@absolutely').should eq('http://twitter.com/absolutely')
|
@@ -69,6 +71,7 @@ describe UrlScrubber do
|
|
69
71
|
end
|
70
72
|
end
|
71
73
|
|
74
|
+
|
72
75
|
describe 'with facebook urls' do
|
73
76
|
it 'should drop /home.php?#!/ from the beginning of the path' do
|
74
77
|
UrlScrubber.scrub('http://facebook.com/home.php?#!/person.name').should eq('http://facebook.com/person.name')
|
@@ -82,11 +85,176 @@ describe UrlScrubber do
|
|
82
85
|
UrlScrubber.scrub('http://facebook.com/person.name?ref=pb').should eq('http://facebook.com/person.name')
|
83
86
|
end
|
84
87
|
|
88
|
+
# Vanity URL
|
89
|
+
|
90
|
+
it 'should not change a good vanity url' do
|
91
|
+
UrlScrubber.scrub('http://facebook.com/TonyMollHomeLoans').should eq('http://facebook.com/TonyMollHomeLoans')
|
92
|
+
end
|
93
|
+
|
94
|
+
it 'should only drop the trailing / on a good vanity url' do
|
95
|
+
UrlScrubber.scrub('http://facebook.com/TonyMollHomeLoans/').should eq('http://facebook.com/TonyMollHomeLoans')
|
96
|
+
end
|
97
|
+
|
98
|
+
it 'should only drop the trailing /about on a good vanity url' do
|
99
|
+
UrlScrubber.scrub('http://facebook.com/TonyMollHomeLoans/about').should eq('http://facebook.com/TonyMollHomeLoans')
|
100
|
+
end
|
101
|
+
|
102
|
+
it 'should only drop the trailing /timeline on a good vanity url' do
|
103
|
+
UrlScrubber.scrub('http://facebook.com/TonyMollHomeLoans/timeline').should eq('http://facebook.com/TonyMollHomeLoans')
|
104
|
+
end
|
105
|
+
|
106
|
+
it 'should only drop the trailing ?_rdr=p on a good vanity url' do
|
107
|
+
UrlScrubber.scrub('http://facebook.com/TonyMollHomeLoans?_rdr=p').should eq('http://facebook.com/TonyMollHomeLoans')
|
108
|
+
end
|
109
|
+
|
110
|
+
# Vanity URL with period in username
|
111
|
+
|
112
|
+
it 'should not change a good vanity url with a period in username' do
|
113
|
+
UrlScrubber.scrub('http://facebook.com/jim.reischling').should eq('http://facebook.com/jim.reischling')
|
114
|
+
end
|
115
|
+
|
116
|
+
it 'should only drop the trailing / on a good vanity url with a period in username' do
|
117
|
+
UrlScrubber.scrub('http://facebook.com/jim.reischling/').should eq('http://facebook.com/jim.reischling')
|
118
|
+
end
|
119
|
+
|
120
|
+
it 'should only drop the trailing /about on a good vanity url with a period in username' do
|
121
|
+
UrlScrubber.scrub('http://facebook.com/jim.reischling/about').should eq('http://facebook.com/jim.reischling')
|
122
|
+
end
|
123
|
+
|
124
|
+
it 'should only drop the trailing /timeline on a good vanity url with a period in username' do
|
125
|
+
UrlScrubber.scrub('http://facebook.com/jim.reischling/timeline').should eq('http://facebook.com/jim.reischling')
|
126
|
+
end
|
127
|
+
|
128
|
+
it 'should only drop the trailing ?_rdr on a good vanity url with a period in username' do
|
129
|
+
UrlScrubber.scrub('http://facebook.com/jim.reischling?_rdr').should eq('http://facebook.com/jim.reischling')
|
130
|
+
end
|
131
|
+
|
132
|
+
# Vanity URL with two periods in username
|
133
|
+
|
134
|
+
it 'should not change a good vanity url two periods in username' do
|
135
|
+
UrlScrubber.scrub('http://facebook.com/sam.thwaite.5/').should eq('http://facebook.com/sam.thwaite.5')
|
136
|
+
end
|
137
|
+
|
138
|
+
# V1 Non-Vanity URLs
|
139
|
+
|
140
|
+
it 'should not change a good V1 non-vanity url without UID' do
|
141
|
+
UrlScrubber.scrub('http://facebook.com/pages/Chris-Stopp-NMLS-257248-Alpine-Mortgage-Planning').should eq('http://facebook.com/pages/Chris-Stopp-NMLS-257248-Alpine-Mortgage-Planning')
|
142
|
+
end
|
143
|
+
|
144
|
+
it 'should not change a good V1 non-vanity url without UID with trialing slash' do
|
145
|
+
UrlScrubber.scrub('http://facebook.com/pages/Chris-Stopp-NMLS-257248-Alpine-Mortgage-Planning/').should eq('http://facebook.com/pages/Chris-Stopp-NMLS-257248-Alpine-Mortgage-Planning')
|
146
|
+
end
|
147
|
+
|
148
|
+
it 'should not change a good V1 non-vanity url with UID' do
|
149
|
+
UrlScrubber.scrub('http://facebook.com/pages/Chris-Stopp-NMLS-257248-Alpine-Mortgage-Planning/152622158131521').should eq('http://facebook.com/pages/Chris-Stopp-NMLS-257248-Alpine-Mortgage-Planning/152622158131521')
|
150
|
+
end
|
151
|
+
|
152
|
+
it 'should only drop the trailing /about on a good V1 non-vanity url' do
|
153
|
+
UrlScrubber.scrub('http://facebook.com/pages/Chris-Stopp-NMLS-257248-Alpine-Mortgage-Planning/152622158131521/about').should eq('http://facebook.com/pages/Chris-Stopp-NMLS-257248-Alpine-Mortgage-Planning/152622158131521')
|
154
|
+
end
|
155
|
+
|
156
|
+
it 'should only drop the trailing /info on a good V1 non-vanity url' do
|
157
|
+
UrlScrubber.scrub('http://facebook.com/pages/Chris-Stopp-NMLS-257248-Alpine-Mortgage-Planning/152622158131521/info').should eq('http://facebook.com/pages/Chris-Stopp-NMLS-257248-Alpine-Mortgage-Planning/152622158131521')
|
158
|
+
end
|
159
|
+
|
160
|
+
it 'should only drop the trailing /timeline on a good V1 non-vanity url' do
|
161
|
+
UrlScrubber.scrub('http://facebook.com/pages/Chris-Stopp-NMLS-257248-Alpine-Mortgage-Planning/152622158131521/timeline').should eq('http://facebook.com/pages/Chris-Stopp-NMLS-257248-Alpine-Mortgage-Planning/152622158131521')
|
162
|
+
end
|
163
|
+
|
164
|
+
it 'should only drop the trailing ?_rdr on a good V1 non-vanity url' do
|
165
|
+
UrlScrubber.scrub('http://facebook.com/pages/Chris-Stopp-NMLS-257248-Alpine-Mortgage-Planning/152622158131521?_rdr').should eq('http://facebook.com/pages/Chris-Stopp-NMLS-257248-Alpine-Mortgage-Planning/152622158131521')
|
166
|
+
end
|
167
|
+
|
168
|
+
# V2 Non-Vanity URLs
|
169
|
+
|
170
|
+
it 'should not change a good V2 non-vanity url without UID' do
|
171
|
+
UrlScrubber.scrub('http://facebook.com/Pinnacle-Capital-Mortgage-Lynette-Lea-Jorden-Reverse-Mortgage-Specialist-702124366560414').should eq('http://facebook.com/Pinnacle-Capital-Mortgage-Lynette-Lea-Jorden-Reverse-Mortgage-Specialist-702124366560414')
|
172
|
+
end
|
173
|
+
|
174
|
+
it 'should not change a good V2 non-vanity url without UID with trialing slash' do
|
175
|
+
UrlScrubber.scrub('http://facebook.com/Pinnacle-Capital-Mortgage-Lynette-Lea-Jorden-Reverse-Mortgage-Specialist-702124366560414/').should eq('http://facebook.com/Pinnacle-Capital-Mortgage-Lynette-Lea-Jorden-Reverse-Mortgage-Specialist-702124366560414')
|
176
|
+
end
|
177
|
+
|
178
|
+
it 'should only drop the trailing /about on a good V2 non-vanity url' do
|
179
|
+
UrlScrubber.scrub('http://facebook.com/Pinnacle-Capital-Mortgage-Lynette-Lea-Jorden-Reverse-Mortgage-Specialist-702124366560414/about').should eq('http://facebook.com/Pinnacle-Capital-Mortgage-Lynette-Lea-Jorden-Reverse-Mortgage-Specialist-702124366560414')
|
180
|
+
end
|
181
|
+
|
182
|
+
it 'should only drop the trailing /info on a good V2 non-vanity url' do
|
183
|
+
UrlScrubber.scrub('http://facebook.com/Pinnacle-Capital-Mortgage-Lynette-Lea-Jorden-Reverse-Mortgage-Specialist-702124366560414/info').should eq('http://facebook.com/Pinnacle-Capital-Mortgage-Lynette-Lea-Jorden-Reverse-Mortgage-Specialist-702124366560414')
|
184
|
+
end
|
185
|
+
|
186
|
+
it 'should only drop the trailing /timeline on a good V2 non-vanity url' do
|
187
|
+
UrlScrubber.scrub('http://facebook.com/Pinnacle-Capital-Mortgage-Lynette-Lea-Jorden-Reverse-Mortgage-Specialist-702124366560414/timeline').should eq('http://facebook.com/Pinnacle-Capital-Mortgage-Lynette-Lea-Jorden-Reverse-Mortgage-Specialist-702124366560414')
|
188
|
+
end
|
189
|
+
|
190
|
+
it 'should only drop the trailing ?_rdr on a good V2 non-vanity url' do
|
191
|
+
UrlScrubber.scrub('http://facebook.com/Pinnacle-Capital-Mortgage-Lynette-Lea-Jorden-Reverse-Mortgage-Specialist-702124366560414?_rdr').should eq('http://facebook.com/Pinnacle-Capital-Mortgage-Lynette-Lea-Jorden-Reverse-Mortgage-Specialist-702124366560414')
|
192
|
+
end
|
193
|
+
|
194
|
+
it 'should only drop the trailing app_267091300008193 on a good Vanity url' do
|
195
|
+
UrlScrubber.scrub('http://facebook.com/theloansisters/app_267091300008193').should eq('http://facebook.com/theloansisters')
|
196
|
+
end
|
197
|
+
|
198
|
+
it 'should handle profile URL with shorter UID' do
|
199
|
+
UrlScrubber.scrub('http://www.facebook.com/profile.php?id=1165522415').should eq('http://facebook.com/profile.php?id=1165522415')
|
200
|
+
end
|
201
|
+
|
202
|
+
it 'should handle profile URL with longer UID' do
|
203
|
+
UrlScrubber.scrub('http://www.facebook.com/profile.php?id=100004113611106').should eq('http://facebook.com/profile.php?id=100004113611106')
|
204
|
+
end
|
205
|
+
|
206
|
+
it 'should handle profile URL with longer UID and https' do
|
207
|
+
UrlScrubber.scrub('https://www.facebook.com/profile.php?id=100000735668376').should eq('http://facebook.com/profile.php?id=100000735668376')
|
208
|
+
end
|
209
|
+
|
210
|
+
it 'should handle profile URL with fref' do
|
211
|
+
UrlScrubber.scrub('https://www.facebook.com/profile.php?id=100007058896205&fref=ts').should eq('http://facebook.com/profile.php?id=100007058896205')
|
212
|
+
end
|
213
|
+
|
214
|
+
it 'should handle profile URL with &_rdr' do
|
215
|
+
UrlScrubber.scrub('https://www.facebook.com/profile.php?id=100008623904282&_rdr').should eq('http://facebook.com/profile.php?id=100008623904282')
|
216
|
+
end
|
217
|
+
|
218
|
+
it 'should handle profile URL with _rdr=p&' do
|
219
|
+
UrlScrubber.scrub('https://www.facebook.com/profile.php?_rdr=p&id=100009574328879').should eq('http://facebook.com/profile.php?id=100009574328879')
|
220
|
+
end
|
221
|
+
|
222
|
+
it 'should handle uppercase HTTP' do
|
223
|
+
UrlScrubber.scrub('HTTP://facebook.com/TonyMollHomeLoans').should eq('http://facebook.com/TonyMollHomeLoans')
|
224
|
+
end
|
225
|
+
|
226
|
+
it 'should handle uppercase HTTPS' do
|
227
|
+
UrlScrubber.scrub('HTTPS://facebook.com/TonyMollHomeLoans').should eq('http://facebook.com/TonyMollHomeLoans')
|
228
|
+
end
|
229
|
+
|
230
|
+
it 'should handle ww.' do
|
231
|
+
UrlScrubber.scrub('http://ww.facebook.com/TonyMollHomeLoans').should eq('http://facebook.com/TonyMollHomeLoans')
|
232
|
+
end
|
233
|
+
|
234
|
+
it 'should handle ww1.' do
|
235
|
+
UrlScrubber.scrub('http://ww1.facebook.com/TonyMollHomeLoans').should eq('http://facebook.com/TonyMollHomeLoans')
|
236
|
+
end
|
237
|
+
|
238
|
+
it 'should handle www1.' do
|
239
|
+
UrlScrubber.scrub('http://ww1.facebook.com/TonyMollHomeLoans').should eq('http://facebook.com/TonyMollHomeLoans')
|
240
|
+
end
|
241
|
+
|
242
|
+
it 'should handle wwww.' do
|
243
|
+
UrlScrubber.scrub('http://wwww.facebook.com/TonyMollHomeLoans').should eq('http://facebook.com/TonyMollHomeLoans')
|
244
|
+
end
|
245
|
+
|
246
|
+
it 'should handle mobile urls (m.)' do
|
247
|
+
UrlScrubber.scrub('http://m.facebook.com/TonyMollHomeLoans').should eq('http://facebook.com/TonyMollHomeLoans')
|
248
|
+
end
|
249
|
+
|
85
250
|
it 'should not kick in for a non-facebook url' do
|
86
251
|
UrlScrubber.scrub('http://example.com/home.php?#!/person.name').should_not eq('http://example.com/person.name')
|
87
252
|
end
|
88
253
|
end
|
89
254
|
|
255
|
+
|
256
|
+
# LinkedIn
|
257
|
+
|
90
258
|
describe 'with linkedin urls' do
|
91
259
|
it 'should change /companies/ to /company/' do
|
92
260
|
UrlScrubber.scrub('http://linkedin.com/companies/1337').should eq('http://linkedin.com/company/1337')
|
@@ -136,7 +304,9 @@ describe UrlScrubber do
|
|
136
304
|
end
|
137
305
|
|
138
306
|
describe 'with slideshare urls' do
|
139
|
-
|
307
|
+
skip "is not implemented yet" do
|
308
|
+
# pending
|
309
|
+
end
|
140
310
|
end
|
141
311
|
|
142
312
|
describe 'with flickr urls' do
|
@@ -174,7 +344,9 @@ describe UrlScrubber do
|
|
174
344
|
end
|
175
345
|
|
176
346
|
describe 'with vimeo urls' do
|
177
|
-
|
347
|
+
skip "is not implemented yet" do
|
348
|
+
# pending
|
349
|
+
end
|
178
350
|
end
|
179
351
|
|
180
352
|
describe 'with instagram urls' do
|
@@ -242,7 +414,7 @@ describe UrlScrubber do
|
|
242
414
|
end
|
243
415
|
|
244
416
|
it 'returns :tumblr for Tumblr urls' do
|
245
|
-
UrlScrubber.service_of('http://cisco.tumblr.com).should eq :tumblr
|
417
|
+
UrlScrubber.service_of('http://cisco.tumblr.com').should eq :tumblr
|
246
418
|
end
|
247
419
|
|
248
420
|
it 'returns :other for other urls' do
|
@@ -258,132 +430,132 @@ describe UrlScrubber do
|
|
258
430
|
|
259
431
|
describe 'for youtube' do
|
260
432
|
it 'returns true for apparent channel urls' do
|
261
|
-
UrlScrubber.ideal_form?('http://youtube.com/absolutely').should
|
433
|
+
UrlScrubber.ideal_form?('http://youtube.com/absolutely').should be_truthy
|
262
434
|
end
|
263
435
|
|
264
436
|
it 'returns false for videos' do
|
265
|
-
UrlScrubber.ideal_form?('http://youtube.com/watch?v=vRGMAW1wzQ8').should
|
437
|
+
UrlScrubber.ideal_form?('http://youtube.com/watch?v=vRGMAW1wzQ8').should be_falsey
|
266
438
|
end
|
267
439
|
end
|
268
440
|
|
269
441
|
describe 'for twitter' do
|
270
442
|
it 'returns true for apparent user urls' do
|
271
|
-
UrlScrubber.ideal_form?('http://twitter.com/absolutely').should
|
443
|
+
UrlScrubber.ideal_form?('http://twitter.com/absolutely').should be_truthy
|
272
444
|
end
|
273
445
|
|
274
446
|
it 'returns false for other pages' do
|
275
|
-
UrlScrubber.ideal_form?('http://twitter.com/').should
|
447
|
+
UrlScrubber.ideal_form?('http://twitter.com/').should be_falsey
|
276
448
|
end
|
277
449
|
end
|
278
450
|
|
279
451
|
describe 'for facebook' do
|
280
452
|
it 'returns true for apparent user urls' do
|
281
|
-
UrlScrubber.ideal_form?('http://facebook.com/person.name').should
|
282
|
-
UrlScrubber.ideal_form?('http://facebook.com/profile.php?id=543123521').should
|
453
|
+
UrlScrubber.ideal_form?('http://facebook.com/person.name').should be_truthy
|
454
|
+
UrlScrubber.ideal_form?('http://facebook.com/profile.php?id=543123521').should be_truthy
|
283
455
|
end
|
284
456
|
|
285
457
|
it 'returns false for other urls' do
|
286
|
-
UrlScrubber.ideal_form?('http://facebook.com/').should
|
458
|
+
UrlScrubber.ideal_form?('http://facebook.com/').should be_falsey
|
287
459
|
end
|
288
460
|
end
|
289
461
|
|
290
462
|
describe 'for linkedin' do
|
291
463
|
it 'returns true for apparent company urls' do
|
292
|
-
UrlScrubber.ideal_form?('http://linkedin.com/company/1337').should
|
293
|
-
UrlScrubber.ideal_form?('http://www.linkedin.com/profile/view?id=12341324').should
|
294
|
-
UrlScrubber.ideal_form?('http://linkedin.com/company/brand-name').should
|
464
|
+
UrlScrubber.ideal_form?('http://linkedin.com/company/1337').should be_truthy
|
465
|
+
UrlScrubber.ideal_form?('http://www.linkedin.com/profile/view?id=12341324').should be_truthy
|
466
|
+
UrlScrubber.ideal_form?('http://linkedin.com/company/brand-name').should be_truthy
|
295
467
|
end
|
296
468
|
|
297
469
|
it 'returns false for other urls' do
|
298
|
-
UrlScrubber.ideal_form?('http://linkedin.com/jobs/c-Cisco').should
|
470
|
+
UrlScrubber.ideal_form?('http://linkedin.com/jobs/c-Cisco').should be_falsey
|
299
471
|
end
|
300
472
|
end
|
301
473
|
|
302
474
|
describe 'for google plus' do
|
303
475
|
it 'returns true for apparent person urls' do
|
304
|
-
UrlScrubber.ideal_form?('http://plus.google.com/+SomeName').should
|
305
|
-
UrlScrubber.ideal_form?('http://plus.google.com/u/0/b/111111111111111111111').should
|
306
|
-
UrlScrubber.ideal_form?('https://plus.google.com/u/0/5432123454135/posts').should
|
476
|
+
UrlScrubber.ideal_form?('http://plus.google.com/+SomeName').should be_truthy
|
477
|
+
UrlScrubber.ideal_form?('http://plus.google.com/u/0/b/111111111111111111111').should be_truthy
|
478
|
+
UrlScrubber.ideal_form?('https://plus.google.com/u/0/5432123454135/posts').should be_truthy
|
307
479
|
end
|
308
480
|
|
309
481
|
it 'returns false for other urls' do
|
310
|
-
UrlScrubber.ideal_form?('http://plus.google.com/').should
|
482
|
+
UrlScrubber.ideal_form?('http://plus.google.com/').should be_falsey
|
311
483
|
end
|
312
484
|
end
|
313
485
|
|
314
486
|
describe 'for slideshare' do
|
315
487
|
it 'returns true for apparent user urls' do
|
316
|
-
UrlScrubber.ideal_form?('http://slideshare.net/absolutely').should
|
488
|
+
UrlScrubber.ideal_form?('http://slideshare.net/absolutely').should be_truthy
|
317
489
|
end
|
318
490
|
|
319
491
|
it 'returns false for other urls' do
|
320
|
-
UrlScrubber.ideal_form?('http://slideshare.net/absolutely/how-to-create-great-slides-for-presentations').should
|
492
|
+
UrlScrubber.ideal_form?('http://slideshare.net/absolutely/how-to-create-great-slides-for-presentations').should be_falsey
|
321
493
|
end
|
322
494
|
end
|
323
495
|
|
324
496
|
describe 'for flickr' do
|
325
497
|
it 'returns true for apparent user urls' do
|
326
|
-
UrlScrubber.ideal_form?('http://flickr.com/username').should
|
498
|
+
UrlScrubber.ideal_form?('http://flickr.com/username').should be_truthy
|
327
499
|
end
|
328
500
|
|
329
501
|
it 'returns false for other urls' do
|
330
|
-
UrlScrubber.ideal_form?('http://flickr.com/').should
|
502
|
+
UrlScrubber.ideal_form?('http://flickr.com/').should be_falsey
|
331
503
|
end
|
332
504
|
end
|
333
505
|
|
334
506
|
describe 'for pinterest' do
|
335
507
|
it 'returns true for apparent user urls' do
|
336
|
-
UrlScrubber.ideal_form?('http://pinterest.com/username').should
|
508
|
+
UrlScrubber.ideal_form?('http://pinterest.com/username').should be_truthy
|
337
509
|
end
|
338
510
|
|
339
511
|
it 'returns false for other urls' do
|
340
|
-
UrlScrubber.ideal_form?('http://pinterest.com/pin/532412513451524').should
|
512
|
+
UrlScrubber.ideal_form?('http://pinterest.com/pin/532412513451524').should be_falsey
|
341
513
|
end
|
342
514
|
end
|
343
515
|
|
344
516
|
describe 'for yelp' do
|
345
517
|
it 'returns true for apparent business urls' do
|
346
|
-
UrlScrubber.ideal_form?('http://yelp.com/very-important-business').should
|
518
|
+
UrlScrubber.ideal_form?('http://yelp.com/very-important-business').should be_truthy
|
347
519
|
end
|
348
520
|
|
349
521
|
it 'returns false for other urls' do
|
350
|
-
UrlScrubber.ideal_form?('http://yelp.com/user_details?userid=Aheunaobuh-huoanuhbAU').should
|
522
|
+
UrlScrubber.ideal_form?('http://yelp.com/user_details?userid=Aheunaobuh-huoanuhbAU').should be_falsey
|
351
523
|
end
|
352
524
|
end
|
353
525
|
|
354
526
|
describe 'for vimeo' do
|
355
527
|
it 'returns true for apparent user urls' do
|
356
|
-
UrlScrubber.ideal_form?('http://vimeo.com/absolutely').should
|
528
|
+
UrlScrubber.ideal_form?('http://vimeo.com/absolutely').should be_truthy
|
357
529
|
end
|
358
530
|
|
359
531
|
it 'returns false for video urls' do
|
360
|
-
UrlScrubber.ideal_form?('http://vimeo.com/45453874578').should
|
532
|
+
UrlScrubber.ideal_form?('http://vimeo.com/45453874578').should be_falsey
|
361
533
|
end
|
362
534
|
end
|
363
535
|
|
364
536
|
describe 'for instagram' do
|
365
537
|
it 'returns true for apparent user urls' do
|
366
|
-
UrlScrubber.ideal_form?('http://instagram.com/username').should
|
538
|
+
UrlScrubber.ideal_form?('http://instagram.com/username').should be_truthy
|
367
539
|
end
|
368
540
|
|
369
541
|
it 'returns false for other urls' do
|
370
|
-
UrlScrubber.ideal_form?('http://instagram.com/532513451524').should
|
542
|
+
UrlScrubber.ideal_form?('http://instagram.com/532513451524').should be_falsey
|
371
543
|
end
|
372
544
|
end
|
373
545
|
|
374
546
|
describe 'for tumblr' do
|
375
547
|
it 'returns true for apparent business urls' do
|
376
|
-
UrlScrubber.ideal_form?('http://cisco.tumblr.com').should
|
548
|
+
UrlScrubber.ideal_form?('http://cisco.tumblr.com').should be_truthy
|
377
549
|
end
|
378
550
|
|
379
551
|
it 'returns false for user urls' do
|
380
|
-
UrlScrubber.ideal_form?('http://tumblr.com/joe').should
|
552
|
+
UrlScrubber.ideal_form?('http://tumblr.com/joe').should be_falsey
|
381
553
|
end
|
382
554
|
end
|
383
555
|
|
384
556
|
describe 'for other sites' do
|
385
557
|
it 'returns true for any other site, really' do
|
386
|
-
UrlScrubber.ideal_form?('http://example.com/absolutely/anything').should
|
558
|
+
UrlScrubber.ideal_form?('http://example.com/absolutely/anything').should be_truthy
|
387
559
|
end
|
388
560
|
end
|
389
561
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: url_scrubber
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Colin Langton
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date:
|
14
|
+
date: 2016-04-08 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: rspec
|
@@ -124,7 +124,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
124
124
|
version: '0'
|
125
125
|
requirements: []
|
126
126
|
rubyforge_project:
|
127
|
-
rubygems_version: 2.
|
127
|
+
rubygems_version: 2.4.8
|
128
128
|
signing_key:
|
129
129
|
specification_version: 4
|
130
130
|
summary: Clean up URLs.
|