url_scrubber 0.7.20 → 0.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +13 -5
- data/lib/url_scrubber/version.rb +1 -1
- data/lib/url_scrubber.rb +53 -6
- data/spec/url_scrubber_spec.rb +205 -33
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,15 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
YjU1ZDhmYTczNWEzMDk2MjQ4M2U2NDkzMGNiZmQ1Y2U2YTMxZDk0MQ==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
NDE2YzlhOWFjMmU3NWFiNjIzNmE4OTJlYjQ3ZmYwMDExMTJjZGNmMw==
|
5
7
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
MDRlZTJlNzFiOTMwYWFkZWY4YzZiZWM5ZjAxMWVhMzMwZjNmYzg4ZmZkMjc2
|
10
|
+
NGRmZTc0Nzc0NGU4M2MzNTk2NGQ5YzhkODRhYTk1YTU3ZWE5YmY1MDMyMTQ4
|
11
|
+
ZjFiNGI4MTAyMmM0MzQ1Y2Y5NWMyMTYwMmQyZTQyNTE1ZTQ4ZDQ=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
MTQ4MDcxMDFlM2QzM2Q2ODAwMWUxOGRhMzg2OTBmMjhiNTEzNmNmYTY5ZDg4
|
14
|
+
NjM4YjhmOGNkMTJmNzQ1ZGQxMDI3ODgxNDc0NGNiN2QyNmExMTk5NzRjOGUy
|
15
|
+
NWMzOWRjNTMwM2YxZjFjYjUzYmMyYjU1YjVkNGNjOWFiMTRkOTE=
|
data/lib/url_scrubber/version.rb
CHANGED
data/lib/url_scrubber.rb
CHANGED
@@ -12,8 +12,8 @@ module UrlScrubber
|
|
12
12
|
return nil unless m
|
13
13
|
|
14
14
|
url = m[1]
|
15
|
-
url.sub!(/^https
|
16
|
-
url.sub!(/^htp
|
15
|
+
url.sub!(/^https/i, 'http')
|
16
|
+
url.sub!(/^htp/i, 'http')
|
17
17
|
url.sub!(/\/+$/, '')
|
18
18
|
url.sub!(/;+$/, '')
|
19
19
|
url.sub!('#!/', '')
|
@@ -23,6 +23,8 @@ module UrlScrubber
|
|
23
23
|
remove_html_tags!(url)
|
24
24
|
url = drop_anchor!(special_cases(url))
|
25
25
|
url.chomp(',')
|
26
|
+
url.gsub!(/\/+$/, '') # remove any trailing slashes (/) in the resulting URL
|
27
|
+
return url
|
26
28
|
end
|
27
29
|
|
28
30
|
def self.service_of(url)
|
@@ -159,7 +161,7 @@ module UrlScrubber
|
|
159
161
|
when :google then return sc_google_plus(url)
|
160
162
|
when :flickr then return sc_flickr(url)
|
161
163
|
when :pinterest then return sc_pinterest(url)
|
162
|
-
when :vimeo
|
164
|
+
when :vimeo then return sc_vimeo(url)
|
163
165
|
when :yelp then return sc_yelp(url)
|
164
166
|
end
|
165
167
|
|
@@ -167,7 +169,9 @@ module UrlScrubber
|
|
167
169
|
end
|
168
170
|
|
169
171
|
def self.remove_www!(url)
|
170
|
-
url.sub!(%r{://www\d*\.}, '://')
|
172
|
+
# url.sub!(%r{://www\d*\.}, '://')
|
173
|
+
url.sub!(%r{^https?://www?w?\d*\.}i, 'http://')
|
174
|
+
url.sub!(%r{^https?://m\d*\.}i, 'http://')
|
171
175
|
url
|
172
176
|
end
|
173
177
|
|
@@ -228,21 +232,64 @@ module UrlScrubber
|
|
228
232
|
end
|
229
233
|
|
230
234
|
def self.sc_facebook(url)
|
235
|
+
# puts "sc_facebook: #{url}"
|
236
|
+
regex1 = /^(?<url>(https?:\/\/)(www\.)?facebook\.com\/(pages\/)*(?<uname>.*)[\/-](?<uid>[0-9]+))($|\/|\/(about|timeline|info|app_)?)/i
|
237
|
+
regex2 = /^(?<url>(https?:\/\/)(www\.)?facebook\.com\/profile.php\?id=(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
|
238
|
+
regex2a = /^(?<url>(https?:\/\/)(www\.)?facebook\.com\/profile.php\?_rdr=p&id=(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
|
239
|
+
regex3 = /^(?<url>(https?:\/\/)(www\.)?facebook\.com\/(?<uname>[^\?\/]*))($|\/$|\/(about|timeline|info|app_.*)?)/i
|
240
|
+
regex4 = /^(?<url>(https?:\/\/)(www\.)?facebook\.com\/)(?<php>home.php\?([#!]+\/)*)(?<uname>.*)/i
|
241
|
+
|
231
242
|
if url.match("/media/albums") || url.match("/media/set")
|
243
|
+
# puts "media"
|
232
244
|
url = url.match('\&') ? url.split('&',2)[0] : url
|
233
245
|
elsif url.include?('facebook.com/groups/')
|
234
246
|
url = drop_url_query!(url)
|
247
|
+
elsif mdata = url.match(regex1)
|
248
|
+
# puts "regex1"
|
249
|
+
# "http://facebook.com/pages/Command-Canada/1434248516885065/timeline"
|
250
|
+
url = mdata[:url]
|
251
|
+
uname = mdata[:uname]
|
252
|
+
uid = mdata[:uid]
|
253
|
+
elsif mdata = url.match(regex2)
|
254
|
+
# puts "regex2"
|
255
|
+
# "https://www.facebook.com/profile.php?id=100009574328879"
|
256
|
+
url, http_response = check_for_redirection(mdata[:url])
|
257
|
+
uid = mdata[:uid]
|
258
|
+
elsif mdata = url.match(regex2a)
|
259
|
+
# puts "regex2a"
|
260
|
+
# "https://www.facebook.com/profile.php?_rdr=p&id=100009574328879"
|
261
|
+
url = "http://facebook.com/profile.php?id=" + mdata[:uid]
|
262
|
+
url, http_response = check_for_redirection(url)
|
263
|
+
uid = mdata[:uid]
|
264
|
+
elsif mdata = url.match(regex4)
|
265
|
+
# puts "#{url} - #{mdata[:uname]}"
|
266
|
+
# "http://facebook.com/home.php?#!/person.name"
|
267
|
+
url = mdata[:url] + mdata[:uname]
|
268
|
+
url = drop_url_query!(url)
|
269
|
+
elsif mdata = url.match(regex3)
|
270
|
+
# puts "regex3"
|
271
|
+
# "http://facebook.com/TonyMollHomeLoans/timeline"
|
272
|
+
if mdata[:uname] != "pages"
|
273
|
+
url = mdata[:url]
|
274
|
+
uname = mdata[:uname]
|
275
|
+
end
|
276
|
+
url = drop_url_query!(url)
|
235
277
|
elsif url.include?("facebook.com/profile.php?id=")
|
278
|
+
# puts "profile.php"
|
236
279
|
# these were being truncated, they do redirect, but typically a 301 response is generated
|
237
280
|
# so the url is returned unchanged. Better than truncation.
|
238
281
|
url, http_response = check_for_redirection(url)
|
239
282
|
else
|
240
|
-
|
283
|
+
# puts "else"
|
241
284
|
url = drop_url_query!(url)
|
242
285
|
end
|
286
|
+
|
287
|
+
# Due to the redirection check, "https" and "www." can be re-introduced
|
288
|
+
url = url.sub(%r{^https?://www.}i, 'http://')
|
289
|
+
url = url.sub(/\?_rdr.*/, '')
|
243
290
|
url
|
244
291
|
end
|
245
|
-
|
292
|
+
|
246
293
|
def self.sc_linkedin(url)
|
247
294
|
|
248
295
|
url.sub!('linkedin.com/companies/', 'linkedin.com/company/')
|
data/spec/url_scrubber_spec.rb
CHANGED
@@ -30,6 +30,7 @@ describe UrlScrubber do
|
|
30
30
|
UrlScrubber.scrub('http://www.example.com/page').should eq('http://example.com/page')
|
31
31
|
end
|
32
32
|
|
33
|
+
|
33
34
|
describe 'with youtube urls' do
|
34
35
|
it 'should drop /user/ if it exists' do
|
35
36
|
UrlScrubber.scrub('http://youtube.com/user/absolutely').should eq('http://youtube.com/absolutely')
|
@@ -44,6 +45,7 @@ describe UrlScrubber do
|
|
44
45
|
end
|
45
46
|
end
|
46
47
|
|
48
|
+
|
47
49
|
describe 'with twitter urls' do
|
48
50
|
it 'should drop @ from in front of username' do
|
49
51
|
UrlScrubber.scrub('http://twitter.com/@absolutely').should eq('http://twitter.com/absolutely')
|
@@ -69,6 +71,7 @@ describe UrlScrubber do
|
|
69
71
|
end
|
70
72
|
end
|
71
73
|
|
74
|
+
|
72
75
|
describe 'with facebook urls' do
|
73
76
|
it 'should drop /home.php?#!/ from the beginning of the path' do
|
74
77
|
UrlScrubber.scrub('http://facebook.com/home.php?#!/person.name').should eq('http://facebook.com/person.name')
|
@@ -82,11 +85,176 @@ describe UrlScrubber do
|
|
82
85
|
UrlScrubber.scrub('http://facebook.com/person.name?ref=pb').should eq('http://facebook.com/person.name')
|
83
86
|
end
|
84
87
|
|
88
|
+
# Vanity URL
|
89
|
+
|
90
|
+
it 'should not change a good vanity url' do
|
91
|
+
UrlScrubber.scrub('http://facebook.com/TonyMollHomeLoans').should eq('http://facebook.com/TonyMollHomeLoans')
|
92
|
+
end
|
93
|
+
|
94
|
+
it 'should only drop the trailing / on a good vanity url' do
|
95
|
+
UrlScrubber.scrub('http://facebook.com/TonyMollHomeLoans/').should eq('http://facebook.com/TonyMollHomeLoans')
|
96
|
+
end
|
97
|
+
|
98
|
+
it 'should only drop the trailing /about on a good vanity url' do
|
99
|
+
UrlScrubber.scrub('http://facebook.com/TonyMollHomeLoans/about').should eq('http://facebook.com/TonyMollHomeLoans')
|
100
|
+
end
|
101
|
+
|
102
|
+
it 'should only drop the trailing /timeline on a good vanity url' do
|
103
|
+
UrlScrubber.scrub('http://facebook.com/TonyMollHomeLoans/timeline').should eq('http://facebook.com/TonyMollHomeLoans')
|
104
|
+
end
|
105
|
+
|
106
|
+
it 'should only drop the trailing ?_rdr=p on a good vanity url' do
|
107
|
+
UrlScrubber.scrub('http://facebook.com/TonyMollHomeLoans?_rdr=p').should eq('http://facebook.com/TonyMollHomeLoans')
|
108
|
+
end
|
109
|
+
|
110
|
+
# Vanity URL with period in username
|
111
|
+
|
112
|
+
it 'should not change a good vanity url with a period in username' do
|
113
|
+
UrlScrubber.scrub('http://facebook.com/jim.reischling').should eq('http://facebook.com/jim.reischling')
|
114
|
+
end
|
115
|
+
|
116
|
+
it 'should only drop the trailing / on a good vanity url with a period in username' do
|
117
|
+
UrlScrubber.scrub('http://facebook.com/jim.reischling/').should eq('http://facebook.com/jim.reischling')
|
118
|
+
end
|
119
|
+
|
120
|
+
it 'should only drop the trailing /about on a good vanity url with a period in username' do
|
121
|
+
UrlScrubber.scrub('http://facebook.com/jim.reischling/about').should eq('http://facebook.com/jim.reischling')
|
122
|
+
end
|
123
|
+
|
124
|
+
it 'should only drop the trailing /timeline on a good vanity url with a period in username' do
|
125
|
+
UrlScrubber.scrub('http://facebook.com/jim.reischling/timeline').should eq('http://facebook.com/jim.reischling')
|
126
|
+
end
|
127
|
+
|
128
|
+
it 'should only drop the trailing ?_rdr on a good vanity url with a period in username' do
|
129
|
+
UrlScrubber.scrub('http://facebook.com/jim.reischling?_rdr').should eq('http://facebook.com/jim.reischling')
|
130
|
+
end
|
131
|
+
|
132
|
+
# Vanity URL with two periods in username
|
133
|
+
|
134
|
+
it 'should not change a good vanity url two periods in username' do
|
135
|
+
UrlScrubber.scrub('http://facebook.com/sam.thwaite.5/').should eq('http://facebook.com/sam.thwaite.5')
|
136
|
+
end
|
137
|
+
|
138
|
+
# V1 Non-Vanity URLs
|
139
|
+
|
140
|
+
it 'should not change a good V1 non-vanity url without UID' do
|
141
|
+
UrlScrubber.scrub('http://facebook.com/pages/Chris-Stopp-NMLS-257248-Alpine-Mortgage-Planning').should eq('http://facebook.com/pages/Chris-Stopp-NMLS-257248-Alpine-Mortgage-Planning')
|
142
|
+
end
|
143
|
+
|
144
|
+
it 'should not change a good V1 non-vanity url without UID with trialing slash' do
|
145
|
+
UrlScrubber.scrub('http://facebook.com/pages/Chris-Stopp-NMLS-257248-Alpine-Mortgage-Planning/').should eq('http://facebook.com/pages/Chris-Stopp-NMLS-257248-Alpine-Mortgage-Planning')
|
146
|
+
end
|
147
|
+
|
148
|
+
it 'should not change a good V1 non-vanity url with UID' do
|
149
|
+
UrlScrubber.scrub('http://facebook.com/pages/Chris-Stopp-NMLS-257248-Alpine-Mortgage-Planning/152622158131521').should eq('http://facebook.com/pages/Chris-Stopp-NMLS-257248-Alpine-Mortgage-Planning/152622158131521')
|
150
|
+
end
|
151
|
+
|
152
|
+
it 'should only drop the trailing /about on a good V1 non-vanity url' do
|
153
|
+
UrlScrubber.scrub('http://facebook.com/pages/Chris-Stopp-NMLS-257248-Alpine-Mortgage-Planning/152622158131521/about').should eq('http://facebook.com/pages/Chris-Stopp-NMLS-257248-Alpine-Mortgage-Planning/152622158131521')
|
154
|
+
end
|
155
|
+
|
156
|
+
it 'should only drop the trailing /info on a good V1 non-vanity url' do
|
157
|
+
UrlScrubber.scrub('http://facebook.com/pages/Chris-Stopp-NMLS-257248-Alpine-Mortgage-Planning/152622158131521/info').should eq('http://facebook.com/pages/Chris-Stopp-NMLS-257248-Alpine-Mortgage-Planning/152622158131521')
|
158
|
+
end
|
159
|
+
|
160
|
+
it 'should only drop the trailing /timeline on a good V1 non-vanity url' do
|
161
|
+
UrlScrubber.scrub('http://facebook.com/pages/Chris-Stopp-NMLS-257248-Alpine-Mortgage-Planning/152622158131521/timeline').should eq('http://facebook.com/pages/Chris-Stopp-NMLS-257248-Alpine-Mortgage-Planning/152622158131521')
|
162
|
+
end
|
163
|
+
|
164
|
+
it 'should only drop the trailing ?_rdr on a good V1 non-vanity url' do
|
165
|
+
UrlScrubber.scrub('http://facebook.com/pages/Chris-Stopp-NMLS-257248-Alpine-Mortgage-Planning/152622158131521?_rdr').should eq('http://facebook.com/pages/Chris-Stopp-NMLS-257248-Alpine-Mortgage-Planning/152622158131521')
|
166
|
+
end
|
167
|
+
|
168
|
+
# V2 Non-Vanity URLs
|
169
|
+
|
170
|
+
it 'should not change a good V2 non-vanity url without UID' do
|
171
|
+
UrlScrubber.scrub('http://facebook.com/Pinnacle-Capital-Mortgage-Lynette-Lea-Jorden-Reverse-Mortgage-Specialist-702124366560414').should eq('http://facebook.com/Pinnacle-Capital-Mortgage-Lynette-Lea-Jorden-Reverse-Mortgage-Specialist-702124366560414')
|
172
|
+
end
|
173
|
+
|
174
|
+
it 'should not change a good V2 non-vanity url without UID with trialing slash' do
|
175
|
+
UrlScrubber.scrub('http://facebook.com/Pinnacle-Capital-Mortgage-Lynette-Lea-Jorden-Reverse-Mortgage-Specialist-702124366560414/').should eq('http://facebook.com/Pinnacle-Capital-Mortgage-Lynette-Lea-Jorden-Reverse-Mortgage-Specialist-702124366560414')
|
176
|
+
end
|
177
|
+
|
178
|
+
it 'should only drop the trailing /about on a good V2 non-vanity url' do
|
179
|
+
UrlScrubber.scrub('http://facebook.com/Pinnacle-Capital-Mortgage-Lynette-Lea-Jorden-Reverse-Mortgage-Specialist-702124366560414/about').should eq('http://facebook.com/Pinnacle-Capital-Mortgage-Lynette-Lea-Jorden-Reverse-Mortgage-Specialist-702124366560414')
|
180
|
+
end
|
181
|
+
|
182
|
+
it 'should only drop the trailing /info on a good V2 non-vanity url' do
|
183
|
+
UrlScrubber.scrub('http://facebook.com/Pinnacle-Capital-Mortgage-Lynette-Lea-Jorden-Reverse-Mortgage-Specialist-702124366560414/info').should eq('http://facebook.com/Pinnacle-Capital-Mortgage-Lynette-Lea-Jorden-Reverse-Mortgage-Specialist-702124366560414')
|
184
|
+
end
|
185
|
+
|
186
|
+
it 'should only drop the trailing /timeline on a good V2 non-vanity url' do
|
187
|
+
UrlScrubber.scrub('http://facebook.com/Pinnacle-Capital-Mortgage-Lynette-Lea-Jorden-Reverse-Mortgage-Specialist-702124366560414/timeline').should eq('http://facebook.com/Pinnacle-Capital-Mortgage-Lynette-Lea-Jorden-Reverse-Mortgage-Specialist-702124366560414')
|
188
|
+
end
|
189
|
+
|
190
|
+
it 'should only drop the trailing ?_rdr on a good V2 non-vanity url' do
|
191
|
+
UrlScrubber.scrub('http://facebook.com/Pinnacle-Capital-Mortgage-Lynette-Lea-Jorden-Reverse-Mortgage-Specialist-702124366560414?_rdr').should eq('http://facebook.com/Pinnacle-Capital-Mortgage-Lynette-Lea-Jorden-Reverse-Mortgage-Specialist-702124366560414')
|
192
|
+
end
|
193
|
+
|
194
|
+
it 'should only drop the trailing app_267091300008193 on a good Vanity url' do
|
195
|
+
UrlScrubber.scrub('http://facebook.com/theloansisters/app_267091300008193').should eq('http://facebook.com/theloansisters')
|
196
|
+
end
|
197
|
+
|
198
|
+
it 'should handle profile URL with shorter UID' do
|
199
|
+
UrlScrubber.scrub('http://www.facebook.com/profile.php?id=1165522415').should eq('http://facebook.com/profile.php?id=1165522415')
|
200
|
+
end
|
201
|
+
|
202
|
+
it 'should handle profile URL with longer UID' do
|
203
|
+
UrlScrubber.scrub('http://www.facebook.com/profile.php?id=100004113611106').should eq('http://facebook.com/profile.php?id=100004113611106')
|
204
|
+
end
|
205
|
+
|
206
|
+
it 'should handle profile URL with longer UID and https' do
|
207
|
+
UrlScrubber.scrub('https://www.facebook.com/profile.php?id=100000735668376').should eq('http://facebook.com/profile.php?id=100000735668376')
|
208
|
+
end
|
209
|
+
|
210
|
+
it 'should handle profile URL with fref' do
|
211
|
+
UrlScrubber.scrub('https://www.facebook.com/profile.php?id=100007058896205&fref=ts').should eq('http://facebook.com/profile.php?id=100007058896205')
|
212
|
+
end
|
213
|
+
|
214
|
+
it 'should handle profile URL with &_rdr' do
|
215
|
+
UrlScrubber.scrub('https://www.facebook.com/profile.php?id=100008623904282&_rdr').should eq('http://facebook.com/profile.php?id=100008623904282')
|
216
|
+
end
|
217
|
+
|
218
|
+
it 'should handle profile URL with _rdr=p&' do
|
219
|
+
UrlScrubber.scrub('https://www.facebook.com/profile.php?_rdr=p&id=100009574328879').should eq('http://facebook.com/profile.php?id=100009574328879')
|
220
|
+
end
|
221
|
+
|
222
|
+
it 'should handle uppercase HTTP' do
|
223
|
+
UrlScrubber.scrub('HTTP://facebook.com/TonyMollHomeLoans').should eq('http://facebook.com/TonyMollHomeLoans')
|
224
|
+
end
|
225
|
+
|
226
|
+
it 'should handle uppercase HTTPS' do
|
227
|
+
UrlScrubber.scrub('HTTPS://facebook.com/TonyMollHomeLoans').should eq('http://facebook.com/TonyMollHomeLoans')
|
228
|
+
end
|
229
|
+
|
230
|
+
it 'should handle ww.' do
|
231
|
+
UrlScrubber.scrub('http://ww.facebook.com/TonyMollHomeLoans').should eq('http://facebook.com/TonyMollHomeLoans')
|
232
|
+
end
|
233
|
+
|
234
|
+
it 'should handle ww1.' do
|
235
|
+
UrlScrubber.scrub('http://ww1.facebook.com/TonyMollHomeLoans').should eq('http://facebook.com/TonyMollHomeLoans')
|
236
|
+
end
|
237
|
+
|
238
|
+
it 'should handle www1.' do
|
239
|
+
UrlScrubber.scrub('http://ww1.facebook.com/TonyMollHomeLoans').should eq('http://facebook.com/TonyMollHomeLoans')
|
240
|
+
end
|
241
|
+
|
242
|
+
it 'should handle wwww.' do
|
243
|
+
UrlScrubber.scrub('http://wwww.facebook.com/TonyMollHomeLoans').should eq('http://facebook.com/TonyMollHomeLoans')
|
244
|
+
end
|
245
|
+
|
246
|
+
it 'should handle mobile urls (m.)' do
|
247
|
+
UrlScrubber.scrub('http://m.facebook.com/TonyMollHomeLoans').should eq('http://facebook.com/TonyMollHomeLoans')
|
248
|
+
end
|
249
|
+
|
85
250
|
it 'should not kick in for a non-facebook url' do
|
86
251
|
UrlScrubber.scrub('http://example.com/home.php?#!/person.name').should_not eq('http://example.com/person.name')
|
87
252
|
end
|
88
253
|
end
|
89
254
|
|
255
|
+
|
256
|
+
# LinkedIn
|
257
|
+
|
90
258
|
describe 'with linkedin urls' do
|
91
259
|
it 'should change /companies/ to /company/' do
|
92
260
|
UrlScrubber.scrub('http://linkedin.com/companies/1337').should eq('http://linkedin.com/company/1337')
|
@@ -136,7 +304,9 @@ describe UrlScrubber do
|
|
136
304
|
end
|
137
305
|
|
138
306
|
describe 'with slideshare urls' do
|
139
|
-
|
307
|
+
skip "is not implemented yet" do
|
308
|
+
# pending
|
309
|
+
end
|
140
310
|
end
|
141
311
|
|
142
312
|
describe 'with flickr urls' do
|
@@ -174,7 +344,9 @@ describe UrlScrubber do
|
|
174
344
|
end
|
175
345
|
|
176
346
|
describe 'with vimeo urls' do
|
177
|
-
|
347
|
+
skip "is not implemented yet" do
|
348
|
+
# pending
|
349
|
+
end
|
178
350
|
end
|
179
351
|
|
180
352
|
describe 'with instagram urls' do
|
@@ -242,7 +414,7 @@ describe UrlScrubber do
|
|
242
414
|
end
|
243
415
|
|
244
416
|
it 'returns :tumblr for Tumblr urls' do
|
245
|
-
UrlScrubber.service_of('http://cisco.tumblr.com).should eq :tumblr
|
417
|
+
UrlScrubber.service_of('http://cisco.tumblr.com').should eq :tumblr
|
246
418
|
end
|
247
419
|
|
248
420
|
it 'returns :other for other urls' do
|
@@ -258,132 +430,132 @@ describe UrlScrubber do
|
|
258
430
|
|
259
431
|
describe 'for youtube' do
|
260
432
|
it 'returns true for apparent channel urls' do
|
261
|
-
UrlScrubber.ideal_form?('http://youtube.com/absolutely').should
|
433
|
+
UrlScrubber.ideal_form?('http://youtube.com/absolutely').should be_truthy
|
262
434
|
end
|
263
435
|
|
264
436
|
it 'returns false for videos' do
|
265
|
-
UrlScrubber.ideal_form?('http://youtube.com/watch?v=vRGMAW1wzQ8').should
|
437
|
+
UrlScrubber.ideal_form?('http://youtube.com/watch?v=vRGMAW1wzQ8').should be_falsey
|
266
438
|
end
|
267
439
|
end
|
268
440
|
|
269
441
|
describe 'for twitter' do
|
270
442
|
it 'returns true for apparent user urls' do
|
271
|
-
UrlScrubber.ideal_form?('http://twitter.com/absolutely').should
|
443
|
+
UrlScrubber.ideal_form?('http://twitter.com/absolutely').should be_truthy
|
272
444
|
end
|
273
445
|
|
274
446
|
it 'returns false for other pages' do
|
275
|
-
UrlScrubber.ideal_form?('http://twitter.com/').should
|
447
|
+
UrlScrubber.ideal_form?('http://twitter.com/').should be_falsey
|
276
448
|
end
|
277
449
|
end
|
278
450
|
|
279
451
|
describe 'for facebook' do
|
280
452
|
it 'returns true for apparent user urls' do
|
281
|
-
UrlScrubber.ideal_form?('http://facebook.com/person.name').should
|
282
|
-
UrlScrubber.ideal_form?('http://facebook.com/profile.php?id=543123521').should
|
453
|
+
UrlScrubber.ideal_form?('http://facebook.com/person.name').should be_truthy
|
454
|
+
UrlScrubber.ideal_form?('http://facebook.com/profile.php?id=543123521').should be_truthy
|
283
455
|
end
|
284
456
|
|
285
457
|
it 'returns false for other urls' do
|
286
|
-
UrlScrubber.ideal_form?('http://facebook.com/').should
|
458
|
+
UrlScrubber.ideal_form?('http://facebook.com/').should be_falsey
|
287
459
|
end
|
288
460
|
end
|
289
461
|
|
290
462
|
describe 'for linkedin' do
|
291
463
|
it 'returns true for apparent company urls' do
|
292
|
-
UrlScrubber.ideal_form?('http://linkedin.com/company/1337').should
|
293
|
-
UrlScrubber.ideal_form?('http://www.linkedin.com/profile/view?id=12341324').should
|
294
|
-
UrlScrubber.ideal_form?('http://linkedin.com/company/brand-name').should
|
464
|
+
UrlScrubber.ideal_form?('http://linkedin.com/company/1337').should be_truthy
|
465
|
+
UrlScrubber.ideal_form?('http://www.linkedin.com/profile/view?id=12341324').should be_truthy
|
466
|
+
UrlScrubber.ideal_form?('http://linkedin.com/company/brand-name').should be_truthy
|
295
467
|
end
|
296
468
|
|
297
469
|
it 'returns false for other urls' do
|
298
|
-
UrlScrubber.ideal_form?('http://linkedin.com/jobs/c-Cisco').should
|
470
|
+
UrlScrubber.ideal_form?('http://linkedin.com/jobs/c-Cisco').should be_falsey
|
299
471
|
end
|
300
472
|
end
|
301
473
|
|
302
474
|
describe 'for google plus' do
|
303
475
|
it 'returns true for apparent person urls' do
|
304
|
-
UrlScrubber.ideal_form?('http://plus.google.com/+SomeName').should
|
305
|
-
UrlScrubber.ideal_form?('http://plus.google.com/u/0/b/111111111111111111111').should
|
306
|
-
UrlScrubber.ideal_form?('https://plus.google.com/u/0/5432123454135/posts').should
|
476
|
+
UrlScrubber.ideal_form?('http://plus.google.com/+SomeName').should be_truthy
|
477
|
+
UrlScrubber.ideal_form?('http://plus.google.com/u/0/b/111111111111111111111').should be_truthy
|
478
|
+
UrlScrubber.ideal_form?('https://plus.google.com/u/0/5432123454135/posts').should be_truthy
|
307
479
|
end
|
308
480
|
|
309
481
|
it 'returns false for other urls' do
|
310
|
-
UrlScrubber.ideal_form?('http://plus.google.com/').should
|
482
|
+
UrlScrubber.ideal_form?('http://plus.google.com/').should be_falsey
|
311
483
|
end
|
312
484
|
end
|
313
485
|
|
314
486
|
describe 'for slideshare' do
|
315
487
|
it 'returns true for apparent user urls' do
|
316
|
-
UrlScrubber.ideal_form?('http://slideshare.net/absolutely').should
|
488
|
+
UrlScrubber.ideal_form?('http://slideshare.net/absolutely').should be_truthy
|
317
489
|
end
|
318
490
|
|
319
491
|
it 'returns false for other urls' do
|
320
|
-
UrlScrubber.ideal_form?('http://slideshare.net/absolutely/how-to-create-great-slides-for-presentations').should
|
492
|
+
UrlScrubber.ideal_form?('http://slideshare.net/absolutely/how-to-create-great-slides-for-presentations').should be_falsey
|
321
493
|
end
|
322
494
|
end
|
323
495
|
|
324
496
|
describe 'for flickr' do
|
325
497
|
it 'returns true for apparent user urls' do
|
326
|
-
UrlScrubber.ideal_form?('http://flickr.com/username').should
|
498
|
+
UrlScrubber.ideal_form?('http://flickr.com/username').should be_truthy
|
327
499
|
end
|
328
500
|
|
329
501
|
it 'returns false for other urls' do
|
330
|
-
UrlScrubber.ideal_form?('http://flickr.com/').should
|
502
|
+
UrlScrubber.ideal_form?('http://flickr.com/').should be_falsey
|
331
503
|
end
|
332
504
|
end
|
333
505
|
|
334
506
|
describe 'for pinterest' do
|
335
507
|
it 'returns true for apparent user urls' do
|
336
|
-
UrlScrubber.ideal_form?('http://pinterest.com/username').should
|
508
|
+
UrlScrubber.ideal_form?('http://pinterest.com/username').should be_truthy
|
337
509
|
end
|
338
510
|
|
339
511
|
it 'returns false for other urls' do
|
340
|
-
UrlScrubber.ideal_form?('http://pinterest.com/pin/532412513451524').should
|
512
|
+
UrlScrubber.ideal_form?('http://pinterest.com/pin/532412513451524').should be_falsey
|
341
513
|
end
|
342
514
|
end
|
343
515
|
|
344
516
|
describe 'for yelp' do
|
345
517
|
it 'returns true for apparent business urls' do
|
346
|
-
UrlScrubber.ideal_form?('http://yelp.com/very-important-business').should
|
518
|
+
UrlScrubber.ideal_form?('http://yelp.com/very-important-business').should be_truthy
|
347
519
|
end
|
348
520
|
|
349
521
|
it 'returns false for other urls' do
|
350
|
-
UrlScrubber.ideal_form?('http://yelp.com/user_details?userid=Aheunaobuh-huoanuhbAU').should
|
522
|
+
UrlScrubber.ideal_form?('http://yelp.com/user_details?userid=Aheunaobuh-huoanuhbAU').should be_falsey
|
351
523
|
end
|
352
524
|
end
|
353
525
|
|
354
526
|
describe 'for vimeo' do
|
355
527
|
it 'returns true for apparent user urls' do
|
356
|
-
UrlScrubber.ideal_form?('http://vimeo.com/absolutely').should
|
528
|
+
UrlScrubber.ideal_form?('http://vimeo.com/absolutely').should be_truthy
|
357
529
|
end
|
358
530
|
|
359
531
|
it 'returns false for video urls' do
|
360
|
-
UrlScrubber.ideal_form?('http://vimeo.com/45453874578').should
|
532
|
+
UrlScrubber.ideal_form?('http://vimeo.com/45453874578').should be_falsey
|
361
533
|
end
|
362
534
|
end
|
363
535
|
|
364
536
|
describe 'for instagram' do
|
365
537
|
it 'returns true for apparent user urls' do
|
366
|
-
UrlScrubber.ideal_form?('http://instagram.com/username').should
|
538
|
+
UrlScrubber.ideal_form?('http://instagram.com/username').should be_truthy
|
367
539
|
end
|
368
540
|
|
369
541
|
it 'returns false for other urls' do
|
370
|
-
UrlScrubber.ideal_form?('http://instagram.com/532513451524').should
|
542
|
+
UrlScrubber.ideal_form?('http://instagram.com/532513451524').should be_falsey
|
371
543
|
end
|
372
544
|
end
|
373
545
|
|
374
546
|
describe 'for tumblr' do
|
375
547
|
it 'returns true for apparent business urls' do
|
376
|
-
UrlScrubber.ideal_form?('http://cisco.tumblr.com').should
|
548
|
+
UrlScrubber.ideal_form?('http://cisco.tumblr.com').should be_truthy
|
377
549
|
end
|
378
550
|
|
379
551
|
it 'returns false for user urls' do
|
380
|
-
UrlScrubber.ideal_form?('http://tumblr.com/joe').should
|
552
|
+
UrlScrubber.ideal_form?('http://tumblr.com/joe').should be_falsey
|
381
553
|
end
|
382
554
|
end
|
383
555
|
|
384
556
|
describe 'for other sites' do
|
385
557
|
it 'returns true for any other site, really' do
|
386
|
-
UrlScrubber.ideal_form?('http://example.com/absolutely/anything').should
|
558
|
+
UrlScrubber.ideal_form?('http://example.com/absolutely/anything').should be_truthy
|
387
559
|
end
|
388
560
|
end
|
389
561
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: url_scrubber
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Colin Langton
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date:
|
14
|
+
date: 2016-04-08 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: rspec
|
@@ -124,7 +124,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
124
124
|
version: '0'
|
125
125
|
requirements: []
|
126
126
|
rubyforge_project:
|
127
|
-
rubygems_version: 2.
|
127
|
+
rubygems_version: 2.4.8
|
128
128
|
signing_key:
|
129
129
|
specification_version: 4
|
130
130
|
summary: Clean up URLs.
|