url_scrubber 0.7.20 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,15 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 743040fbabe8b162af5d06665e9d8b076916097a
4
- data.tar.gz: 3a620b4a6fff1a137093f2d1fb5be3c6b43ef84f
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ YjU1ZDhmYTczNWEzMDk2MjQ4M2U2NDkzMGNiZmQ1Y2U2YTMxZDk0MQ==
5
+ data.tar.gz: !binary |-
6
+ NDE2YzlhOWFjMmU3NWFiNjIzNmE4OTJlYjQ3ZmYwMDExMTJjZGNmMw==
5
7
  SHA512:
6
- metadata.gz: 1641e258c6f4bccbe75de24bb7132697ed286b2f485cc1ef73ba0576307c15eda16b6e8be11e2cf91386b719c2f476ad04cb4e47a47b433df9e0355166fc5e3f
7
- data.tar.gz: 6e290b6607c0363c73a25328f8b217a56789ae90793930a11ca78dbe51676a90e3871d17ef878dc1f936fdc8c5ba2bb28f2cf90749afe3ebf57e52f231e40926
8
+ metadata.gz: !binary |-
9
+ MDRlZTJlNzFiOTMwYWFkZWY4YzZiZWM5ZjAxMWVhMzMwZjNmYzg4ZmZkMjc2
10
+ NGRmZTc0Nzc0NGU4M2MzNTk2NGQ5YzhkODRhYTk1YTU3ZWE5YmY1MDMyMTQ4
11
+ ZjFiNGI4MTAyMmM0MzQ1Y2Y5NWMyMTYwMmQyZTQyNTE1ZTQ4ZDQ=
12
+ data.tar.gz: !binary |-
13
+ MTQ4MDcxMDFlM2QzM2Q2ODAwMWUxOGRhMzg2OTBmMjhiNTEzNmNmYTY5ZDg4
14
+ NjM4YjhmOGNkMTJmNzQ1ZGQxMDI3ODgxNDc0NGNiN2QyNmExMTk5NzRjOGUy
15
+ NWMzOWRjNTMwM2YxZjFjYjUzYmMyYjU1YjVkNGNjOWFiMTRkOTE=
@@ -1,3 +1,3 @@
1
1
  module UrlScrubber
2
- VERSION = "0.7.20"
2
+ VERSION = "0.8.0"
3
3
  end
data/lib/url_scrubber.rb CHANGED
@@ -12,8 +12,8 @@ module UrlScrubber
12
12
  return nil unless m
13
13
 
14
14
  url = m[1]
15
- url.sub!(/^https/, 'http')
16
- url.sub!(/^htp/, 'http')
15
+ url.sub!(/^https/i, 'http')
16
+ url.sub!(/^htp/i, 'http')
17
17
  url.sub!(/\/+$/, '')
18
18
  url.sub!(/;+$/, '')
19
19
  url.sub!('#!/', '')
@@ -23,6 +23,8 @@ module UrlScrubber
23
23
  remove_html_tags!(url)
24
24
  url = drop_anchor!(special_cases(url))
25
25
  url.chomp(',')
26
+ url.gsub!(/\/+$/, '') # remove any trailing slashes (/) in the resulting URL
27
+ return url
26
28
  end
27
29
 
28
30
  def self.service_of(url)
@@ -159,7 +161,7 @@ module UrlScrubber
159
161
  when :google then return sc_google_plus(url)
160
162
  when :flickr then return sc_flickr(url)
161
163
  when :pinterest then return sc_pinterest(url)
162
- when :vimeo then return sc_vimeo(url)
164
+ when :vimeo then return sc_vimeo(url)
163
165
  when :yelp then return sc_yelp(url)
164
166
  end
165
167
 
@@ -167,7 +169,9 @@ module UrlScrubber
167
169
  end
168
170
 
169
171
  def self.remove_www!(url)
170
- url.sub!(%r{://www\d*\.}, '://')
172
+ # url.sub!(%r{://www\d*\.}, '://')
173
+ url.sub!(%r{^https?://www?w?\d*\.}i, 'http://')
174
+ url.sub!(%r{^https?://m\d*\.}i, 'http://')
171
175
  url
172
176
  end
173
177
 
@@ -228,21 +232,64 @@ module UrlScrubber
228
232
  end
229
233
 
230
234
  def self.sc_facebook(url)
235
+ # puts "sc_facebook: #{url}"
236
+ regex1 = /^(?<url>(https?:\/\/)(www\.)?facebook\.com\/(pages\/)*(?<uname>.*)[\/-](?<uid>[0-9]+))($|\/|\/(about|timeline|info|app_)?)/i
237
+ regex2 = /^(?<url>(https?:\/\/)(www\.)?facebook\.com\/profile.php\?id=(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
238
+ regex2a = /^(?<url>(https?:\/\/)(www\.)?facebook\.com\/profile.php\?_rdr=p&id=(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
239
+ regex3 = /^(?<url>(https?:\/\/)(www\.)?facebook\.com\/(?<uname>[^\?\/]*))($|\/$|\/(about|timeline|info|app_.*)?)/i
240
+ regex4 = /^(?<url>(https?:\/\/)(www\.)?facebook\.com\/)(?<php>home.php\?([#!]+\/)*)(?<uname>.*)/i
241
+
231
242
  if url.match("/media/albums") || url.match("/media/set")
243
+ # puts "media"
232
244
  url = url.match('\&') ? url.split('&',2)[0] : url
233
245
  elsif url.include?('facebook.com/groups/')
234
246
  url = drop_url_query!(url)
247
+ elsif mdata = url.match(regex1)
248
+ # puts "regex1"
249
+ # "http://facebook.com/pages/Command-Canada/1434248516885065/timeline"
250
+ url = mdata[:url]
251
+ uname = mdata[:uname]
252
+ uid = mdata[:uid]
253
+ elsif mdata = url.match(regex2)
254
+ # puts "regex2"
255
+ # "https://www.facebook.com/profile.php?id=100009574328879"
256
+ url, http_response = check_for_redirection(mdata[:url])
257
+ uid = mdata[:uid]
258
+ elsif mdata = url.match(regex2a)
259
+ # puts "regex2a"
260
+ # "https://www.facebook.com/profile.php?_rdr=p&id=100009574328879"
261
+ url = "http://facebook.com/profile.php?id=" + mdata[:uid]
262
+ url, http_response = check_for_redirection(url)
263
+ uid = mdata[:uid]
264
+ elsif mdata = url.match(regex4)
265
+ # puts "#{url} - #{mdata[:uname]}"
266
+ # "http://facebook.com/home.php?#!/person.name"
267
+ url = mdata[:url] + mdata[:uname]
268
+ url = drop_url_query!(url)
269
+ elsif mdata = url.match(regex3)
270
+ # puts "regex3"
271
+ # "http://facebook.com/TonyMollHomeLoans/timeline"
272
+ if mdata[:uname] != "pages"
273
+ url = mdata[:url]
274
+ uname = mdata[:uname]
275
+ end
276
+ url = drop_url_query!(url)
235
277
  elsif url.include?("facebook.com/profile.php?id=")
278
+ # puts "profile.php"
236
279
  # these were being truncated, they do redirect, but typically a 301 response is generated
237
280
  # so the url is returned unchanged. Better than truncation.
238
281
  url, http_response = check_for_redirection(url)
239
282
  else
240
- url.sub!(/facebook\.com\/home\.php[\?#!\/]+/, 'facebook.com/')
283
+ # puts "else"
241
284
  url = drop_url_query!(url)
242
285
  end
286
+
287
+ # Due to the redirection check, "https" and "www." can be re-introduced
288
+ url = url.sub(%r{^https?://www.}i, 'http://')
289
+ url = url.sub(/\?_rdr.*/, '')
243
290
  url
244
291
  end
245
-
292
+
246
293
  def self.sc_linkedin(url)
247
294
 
248
295
  url.sub!('linkedin.com/companies/', 'linkedin.com/company/')
@@ -30,6 +30,7 @@ describe UrlScrubber do
30
30
  UrlScrubber.scrub('http://www.example.com/page').should eq('http://example.com/page')
31
31
  end
32
32
 
33
+
33
34
  describe 'with youtube urls' do
34
35
  it 'should drop /user/ if it exists' do
35
36
  UrlScrubber.scrub('http://youtube.com/user/absolutely').should eq('http://youtube.com/absolutely')
@@ -44,6 +45,7 @@ describe UrlScrubber do
44
45
  end
45
46
  end
46
47
 
48
+
47
49
  describe 'with twitter urls' do
48
50
  it 'should drop @ from in front of username' do
49
51
  UrlScrubber.scrub('http://twitter.com/@absolutely').should eq('http://twitter.com/absolutely')
@@ -69,6 +71,7 @@ describe UrlScrubber do
69
71
  end
70
72
  end
71
73
 
74
+
72
75
  describe 'with facebook urls' do
73
76
  it 'should drop /home.php?#!/ from the beginning of the path' do
74
77
  UrlScrubber.scrub('http://facebook.com/home.php?#!/person.name').should eq('http://facebook.com/person.name')
@@ -82,11 +85,176 @@ describe UrlScrubber do
82
85
  UrlScrubber.scrub('http://facebook.com/person.name?ref=pb').should eq('http://facebook.com/person.name')
83
86
  end
84
87
 
88
+ # Vanity URL
89
+
90
+ it 'should not change a good vanity url' do
91
+ UrlScrubber.scrub('http://facebook.com/TonyMollHomeLoans').should eq('http://facebook.com/TonyMollHomeLoans')
92
+ end
93
+
94
+ it 'should only drop the trailing / on a good vanity url' do
95
+ UrlScrubber.scrub('http://facebook.com/TonyMollHomeLoans/').should eq('http://facebook.com/TonyMollHomeLoans')
96
+ end
97
+
98
+ it 'should only drop the trailing /about on a good vanity url' do
99
+ UrlScrubber.scrub('http://facebook.com/TonyMollHomeLoans/about').should eq('http://facebook.com/TonyMollHomeLoans')
100
+ end
101
+
102
+ it 'should only drop the trailing /timeline on a good vanity url' do
103
+ UrlScrubber.scrub('http://facebook.com/TonyMollHomeLoans/timeline').should eq('http://facebook.com/TonyMollHomeLoans')
104
+ end
105
+
106
+ it 'should only drop the trailing ?_rdr=p on a good vanity url' do
107
+ UrlScrubber.scrub('http://facebook.com/TonyMollHomeLoans?_rdr=p').should eq('http://facebook.com/TonyMollHomeLoans')
108
+ end
109
+
110
+ # Vanity URL with period in username
111
+
112
+ it 'should not change a good vanity url with a period in username' do
113
+ UrlScrubber.scrub('http://facebook.com/jim.reischling').should eq('http://facebook.com/jim.reischling')
114
+ end
115
+
116
+ it 'should only drop the trailing / on a good vanity url with a period in username' do
117
+ UrlScrubber.scrub('http://facebook.com/jim.reischling/').should eq('http://facebook.com/jim.reischling')
118
+ end
119
+
120
+ it 'should only drop the trailing /about on a good vanity url with a period in username' do
121
+ UrlScrubber.scrub('http://facebook.com/jim.reischling/about').should eq('http://facebook.com/jim.reischling')
122
+ end
123
+
124
+ it 'should only drop the trailing /timeline on a good vanity url with a period in username' do
125
+ UrlScrubber.scrub('http://facebook.com/jim.reischling/timeline').should eq('http://facebook.com/jim.reischling')
126
+ end
127
+
128
+ it 'should only drop the trailing ?_rdr on a good vanity url with a period in username' do
129
+ UrlScrubber.scrub('http://facebook.com/jim.reischling?_rdr').should eq('http://facebook.com/jim.reischling')
130
+ end
131
+
132
+ # Vanity URL with two periods in username
133
+
134
+ it 'should not change a good vanity url two periods in username' do
135
+ UrlScrubber.scrub('http://facebook.com/sam.thwaite.5/').should eq('http://facebook.com/sam.thwaite.5')
136
+ end
137
+
138
+ # V1 Non-Vanity URLs
139
+
140
+ it 'should not change a good V1 non-vanity url without UID' do
141
+ UrlScrubber.scrub('http://facebook.com/pages/Chris-Stopp-NMLS-257248-Alpine-Mortgage-Planning').should eq('http://facebook.com/pages/Chris-Stopp-NMLS-257248-Alpine-Mortgage-Planning')
142
+ end
143
+
144
+ it 'should not change a good V1 non-vanity url without UID with trialing slash' do
145
+ UrlScrubber.scrub('http://facebook.com/pages/Chris-Stopp-NMLS-257248-Alpine-Mortgage-Planning/').should eq('http://facebook.com/pages/Chris-Stopp-NMLS-257248-Alpine-Mortgage-Planning')
146
+ end
147
+
148
+ it 'should not change a good V1 non-vanity url with UID' do
149
+ UrlScrubber.scrub('http://facebook.com/pages/Chris-Stopp-NMLS-257248-Alpine-Mortgage-Planning/152622158131521').should eq('http://facebook.com/pages/Chris-Stopp-NMLS-257248-Alpine-Mortgage-Planning/152622158131521')
150
+ end
151
+
152
+ it 'should only drop the trailing /about on a good V1 non-vanity url' do
153
+ UrlScrubber.scrub('http://facebook.com/pages/Chris-Stopp-NMLS-257248-Alpine-Mortgage-Planning/152622158131521/about').should eq('http://facebook.com/pages/Chris-Stopp-NMLS-257248-Alpine-Mortgage-Planning/152622158131521')
154
+ end
155
+
156
+ it 'should only drop the trailing /info on a good V1 non-vanity url' do
157
+ UrlScrubber.scrub('http://facebook.com/pages/Chris-Stopp-NMLS-257248-Alpine-Mortgage-Planning/152622158131521/info').should eq('http://facebook.com/pages/Chris-Stopp-NMLS-257248-Alpine-Mortgage-Planning/152622158131521')
158
+ end
159
+
160
+ it 'should only drop the trailing /timeline on a good V1 non-vanity url' do
161
+ UrlScrubber.scrub('http://facebook.com/pages/Chris-Stopp-NMLS-257248-Alpine-Mortgage-Planning/152622158131521/timeline').should eq('http://facebook.com/pages/Chris-Stopp-NMLS-257248-Alpine-Mortgage-Planning/152622158131521')
162
+ end
163
+
164
+ it 'should only drop the trailing ?_rdr on a good V1 non-vanity url' do
165
+ UrlScrubber.scrub('http://facebook.com/pages/Chris-Stopp-NMLS-257248-Alpine-Mortgage-Planning/152622158131521?_rdr').should eq('http://facebook.com/pages/Chris-Stopp-NMLS-257248-Alpine-Mortgage-Planning/152622158131521')
166
+ end
167
+
168
+ # V2 Non-Vanity URLs
169
+
170
+ it 'should not change a good V2 non-vanity url without UID' do
171
+ UrlScrubber.scrub('http://facebook.com/Pinnacle-Capital-Mortgage-Lynette-Lea-Jorden-Reverse-Mortgage-Specialist-702124366560414').should eq('http://facebook.com/Pinnacle-Capital-Mortgage-Lynette-Lea-Jorden-Reverse-Mortgage-Specialist-702124366560414')
172
+ end
173
+
174
+ it 'should not change a good V2 non-vanity url without UID with trialing slash' do
175
+ UrlScrubber.scrub('http://facebook.com/Pinnacle-Capital-Mortgage-Lynette-Lea-Jorden-Reverse-Mortgage-Specialist-702124366560414/').should eq('http://facebook.com/Pinnacle-Capital-Mortgage-Lynette-Lea-Jorden-Reverse-Mortgage-Specialist-702124366560414')
176
+ end
177
+
178
+ it 'should only drop the trailing /about on a good V2 non-vanity url' do
179
+ UrlScrubber.scrub('http://facebook.com/Pinnacle-Capital-Mortgage-Lynette-Lea-Jorden-Reverse-Mortgage-Specialist-702124366560414/about').should eq('http://facebook.com/Pinnacle-Capital-Mortgage-Lynette-Lea-Jorden-Reverse-Mortgage-Specialist-702124366560414')
180
+ end
181
+
182
+ it 'should only drop the trailing /info on a good V2 non-vanity url' do
183
+ UrlScrubber.scrub('http://facebook.com/Pinnacle-Capital-Mortgage-Lynette-Lea-Jorden-Reverse-Mortgage-Specialist-702124366560414/info').should eq('http://facebook.com/Pinnacle-Capital-Mortgage-Lynette-Lea-Jorden-Reverse-Mortgage-Specialist-702124366560414')
184
+ end
185
+
186
+ it 'should only drop the trailing /timeline on a good V2 non-vanity url' do
187
+ UrlScrubber.scrub('http://facebook.com/Pinnacle-Capital-Mortgage-Lynette-Lea-Jorden-Reverse-Mortgage-Specialist-702124366560414/timeline').should eq('http://facebook.com/Pinnacle-Capital-Mortgage-Lynette-Lea-Jorden-Reverse-Mortgage-Specialist-702124366560414')
188
+ end
189
+
190
+ it 'should only drop the trailing ?_rdr on a good V2 non-vanity url' do
191
+ UrlScrubber.scrub('http://facebook.com/Pinnacle-Capital-Mortgage-Lynette-Lea-Jorden-Reverse-Mortgage-Specialist-702124366560414?_rdr').should eq('http://facebook.com/Pinnacle-Capital-Mortgage-Lynette-Lea-Jorden-Reverse-Mortgage-Specialist-702124366560414')
192
+ end
193
+
194
+ it 'should only drop the trailing app_267091300008193 on a good Vanity url' do
195
+ UrlScrubber.scrub('http://facebook.com/theloansisters/app_267091300008193').should eq('http://facebook.com/theloansisters')
196
+ end
197
+
198
+ it 'should handle profile URL with shorter UID' do
199
+ UrlScrubber.scrub('http://www.facebook.com/profile.php?id=1165522415').should eq('http://facebook.com/profile.php?id=1165522415')
200
+ end
201
+
202
+ it 'should handle profile URL with longer UID' do
203
+ UrlScrubber.scrub('http://www.facebook.com/profile.php?id=100004113611106').should eq('http://facebook.com/profile.php?id=100004113611106')
204
+ end
205
+
206
+ it 'should handle profile URL with longer UID and https' do
207
+ UrlScrubber.scrub('https://www.facebook.com/profile.php?id=100000735668376').should eq('http://facebook.com/profile.php?id=100000735668376')
208
+ end
209
+
210
+ it 'should handle profile URL with fref' do
211
+ UrlScrubber.scrub('https://www.facebook.com/profile.php?id=100007058896205&fref=ts').should eq('http://facebook.com/profile.php?id=100007058896205')
212
+ end
213
+
214
+ it 'should handle profile URL with &_rdr' do
215
+ UrlScrubber.scrub('https://www.facebook.com/profile.php?id=100008623904282&_rdr').should eq('http://facebook.com/profile.php?id=100008623904282')
216
+ end
217
+
218
+ it 'should handle profile URL with _rdr=p&' do
219
+ UrlScrubber.scrub('https://www.facebook.com/profile.php?_rdr=p&id=100009574328879').should eq('http://facebook.com/profile.php?id=100009574328879')
220
+ end
221
+
222
+ it 'should handle uppercase HTTP' do
223
+ UrlScrubber.scrub('HTTP://facebook.com/TonyMollHomeLoans').should eq('http://facebook.com/TonyMollHomeLoans')
224
+ end
225
+
226
+ it 'should handle uppercase HTTPS' do
227
+ UrlScrubber.scrub('HTTPS://facebook.com/TonyMollHomeLoans').should eq('http://facebook.com/TonyMollHomeLoans')
228
+ end
229
+
230
+ it 'should handle ww.' do
231
+ UrlScrubber.scrub('http://ww.facebook.com/TonyMollHomeLoans').should eq('http://facebook.com/TonyMollHomeLoans')
232
+ end
233
+
234
+ it 'should handle ww1.' do
235
+ UrlScrubber.scrub('http://ww1.facebook.com/TonyMollHomeLoans').should eq('http://facebook.com/TonyMollHomeLoans')
236
+ end
237
+
238
+ it 'should handle www1.' do
239
+ UrlScrubber.scrub('http://ww1.facebook.com/TonyMollHomeLoans').should eq('http://facebook.com/TonyMollHomeLoans')
240
+ end
241
+
242
+ it 'should handle wwww.' do
243
+ UrlScrubber.scrub('http://wwww.facebook.com/TonyMollHomeLoans').should eq('http://facebook.com/TonyMollHomeLoans')
244
+ end
245
+
246
+ it 'should handle mobile urls (m.)' do
247
+ UrlScrubber.scrub('http://m.facebook.com/TonyMollHomeLoans').should eq('http://facebook.com/TonyMollHomeLoans')
248
+ end
249
+
85
250
  it 'should not kick in for a non-facebook url' do
86
251
  UrlScrubber.scrub('http://example.com/home.php?#!/person.name').should_not eq('http://example.com/person.name')
87
252
  end
88
253
  end
89
254
 
255
+
256
+ # LinkedIn
257
+
90
258
  describe 'with linkedin urls' do
91
259
  it 'should change /companies/ to /company/' do
92
260
  UrlScrubber.scrub('http://linkedin.com/companies/1337').should eq('http://linkedin.com/company/1337')
@@ -136,7 +304,9 @@ describe UrlScrubber do
136
304
  end
137
305
 
138
306
  describe 'with slideshare urls' do
139
- pending
307
+ skip "is not implemented yet" do
308
+ # pending
309
+ end
140
310
  end
141
311
 
142
312
  describe 'with flickr urls' do
@@ -174,7 +344,9 @@ describe UrlScrubber do
174
344
  end
175
345
 
176
346
  describe 'with vimeo urls' do
177
- pending
347
+ skip "is not implemented yet" do
348
+ # pending
349
+ end
178
350
  end
179
351
 
180
352
  describe 'with instagram urls' do
@@ -242,7 +414,7 @@ describe UrlScrubber do
242
414
  end
243
415
 
244
416
  it 'returns :tumblr for Tumblr urls' do
245
- UrlScrubber.service_of('http://cisco.tumblr.com).should eq :tumblr
417
+ UrlScrubber.service_of('http://cisco.tumblr.com').should eq :tumblr
246
418
  end
247
419
 
248
420
  it 'returns :other for other urls' do
@@ -258,132 +430,132 @@ describe UrlScrubber do
258
430
 
259
431
  describe 'for youtube' do
260
432
  it 'returns true for apparent channel urls' do
261
- UrlScrubber.ideal_form?('http://youtube.com/absolutely').should be_true
433
+ UrlScrubber.ideal_form?('http://youtube.com/absolutely').should be_truthy
262
434
  end
263
435
 
264
436
  it 'returns false for videos' do
265
- UrlScrubber.ideal_form?('http://youtube.com/watch?v=vRGMAW1wzQ8').should be_false
437
+ UrlScrubber.ideal_form?('http://youtube.com/watch?v=vRGMAW1wzQ8').should be_falsey
266
438
  end
267
439
  end
268
440
 
269
441
  describe 'for twitter' do
270
442
  it 'returns true for apparent user urls' do
271
- UrlScrubber.ideal_form?('http://twitter.com/absolutely').should be_true
443
+ UrlScrubber.ideal_form?('http://twitter.com/absolutely').should be_truthy
272
444
  end
273
445
 
274
446
  it 'returns false for other pages' do
275
- UrlScrubber.ideal_form?('http://twitter.com/').should be_false
447
+ UrlScrubber.ideal_form?('http://twitter.com/').should be_falsey
276
448
  end
277
449
  end
278
450
 
279
451
  describe 'for facebook' do
280
452
  it 'returns true for apparent user urls' do
281
- UrlScrubber.ideal_form?('http://facebook.com/person.name').should be_true
282
- UrlScrubber.ideal_form?('http://facebook.com/profile.php?id=543123521').should be_true
453
+ UrlScrubber.ideal_form?('http://facebook.com/person.name').should be_truthy
454
+ UrlScrubber.ideal_form?('http://facebook.com/profile.php?id=543123521').should be_truthy
283
455
  end
284
456
 
285
457
  it 'returns false for other urls' do
286
- UrlScrubber.ideal_form?('http://facebook.com/').should be_false
458
+ UrlScrubber.ideal_form?('http://facebook.com/').should be_falsey
287
459
  end
288
460
  end
289
461
 
290
462
  describe 'for linkedin' do
291
463
  it 'returns true for apparent company urls' do
292
- UrlScrubber.ideal_form?('http://linkedin.com/company/1337').should be_true
293
- UrlScrubber.ideal_form?('http://www.linkedin.com/profile/view?id=12341324').should be_true
294
- UrlScrubber.ideal_form?('http://linkedin.com/company/brand-name').should be_true
464
+ UrlScrubber.ideal_form?('http://linkedin.com/company/1337').should be_truthy
465
+ UrlScrubber.ideal_form?('http://www.linkedin.com/profile/view?id=12341324').should be_truthy
466
+ UrlScrubber.ideal_form?('http://linkedin.com/company/brand-name').should be_truthy
295
467
  end
296
468
 
297
469
  it 'returns false for other urls' do
298
- UrlScrubber.ideal_form?('http://linkedin.com/jobs/c-Cisco').should be_false
470
+ UrlScrubber.ideal_form?('http://linkedin.com/jobs/c-Cisco').should be_falsey
299
471
  end
300
472
  end
301
473
 
302
474
  describe 'for google plus' do
303
475
  it 'returns true for apparent person urls' do
304
- UrlScrubber.ideal_form?('http://plus.google.com/+SomeName').should be_true
305
- UrlScrubber.ideal_form?('http://plus.google.com/u/0/b/111111111111111111111').should be_true
306
- UrlScrubber.ideal_form?('https://plus.google.com/u/0/5432123454135/posts').should be_true
476
+ UrlScrubber.ideal_form?('http://plus.google.com/+SomeName').should be_truthy
477
+ UrlScrubber.ideal_form?('http://plus.google.com/u/0/b/111111111111111111111').should be_truthy
478
+ UrlScrubber.ideal_form?('https://plus.google.com/u/0/5432123454135/posts').should be_truthy
307
479
  end
308
480
 
309
481
  it 'returns false for other urls' do
310
- UrlScrubber.ideal_form?('http://plus.google.com/').should be_false
482
+ UrlScrubber.ideal_form?('http://plus.google.com/').should be_falsey
311
483
  end
312
484
  end
313
485
 
314
486
  describe 'for slideshare' do
315
487
  it 'returns true for apparent user urls' do
316
- UrlScrubber.ideal_form?('http://slideshare.net/absolutely').should be_true
488
+ UrlScrubber.ideal_form?('http://slideshare.net/absolutely').should be_truthy
317
489
  end
318
490
 
319
491
  it 'returns false for other urls' do
320
- UrlScrubber.ideal_form?('http://slideshare.net/absolutely/how-to-create-great-slides-for-presentations').should be_false
492
+ UrlScrubber.ideal_form?('http://slideshare.net/absolutely/how-to-create-great-slides-for-presentations').should be_falsey
321
493
  end
322
494
  end
323
495
 
324
496
  describe 'for flickr' do
325
497
  it 'returns true for apparent user urls' do
326
- UrlScrubber.ideal_form?('http://flickr.com/username').should be_true
498
+ UrlScrubber.ideal_form?('http://flickr.com/username').should be_truthy
327
499
  end
328
500
 
329
501
  it 'returns false for other urls' do
330
- UrlScrubber.ideal_form?('http://flickr.com/').should be_false
502
+ UrlScrubber.ideal_form?('http://flickr.com/').should be_falsey
331
503
  end
332
504
  end
333
505
 
334
506
  describe 'for pinterest' do
335
507
  it 'returns true for apparent user urls' do
336
- UrlScrubber.ideal_form?('http://pinterest.com/username').should be_true
508
+ UrlScrubber.ideal_form?('http://pinterest.com/username').should be_truthy
337
509
  end
338
510
 
339
511
  it 'returns false for other urls' do
340
- UrlScrubber.ideal_form?('http://pinterest.com/pin/532412513451524').should be_false
512
+ UrlScrubber.ideal_form?('http://pinterest.com/pin/532412513451524').should be_falsey
341
513
  end
342
514
  end
343
515
 
344
516
  describe 'for yelp' do
345
517
  it 'returns true for apparent business urls' do
346
- UrlScrubber.ideal_form?('http://yelp.com/very-important-business').should be_true
518
+ UrlScrubber.ideal_form?('http://yelp.com/very-important-business').should be_truthy
347
519
  end
348
520
 
349
521
  it 'returns false for other urls' do
350
- UrlScrubber.ideal_form?('http://yelp.com/user_details?userid=Aheunaobuh-huoanuhbAU').should be_false
522
+ UrlScrubber.ideal_form?('http://yelp.com/user_details?userid=Aheunaobuh-huoanuhbAU').should be_falsey
351
523
  end
352
524
  end
353
525
 
354
526
  describe 'for vimeo' do
355
527
  it 'returns true for apparent user urls' do
356
- UrlScrubber.ideal_form?('http://vimeo.com/absolutely').should be_true
528
+ UrlScrubber.ideal_form?('http://vimeo.com/absolutely').should be_truthy
357
529
  end
358
530
 
359
531
  it 'returns false for video urls' do
360
- UrlScrubber.ideal_form?('http://vimeo.com/45453874578').should be_false
532
+ UrlScrubber.ideal_form?('http://vimeo.com/45453874578').should be_falsey
361
533
  end
362
534
  end
363
535
 
364
536
  describe 'for instagram' do
365
537
  it 'returns true for apparent user urls' do
366
- UrlScrubber.ideal_form?('http://instagram.com/username').should be_true
538
+ UrlScrubber.ideal_form?('http://instagram.com/username').should be_truthy
367
539
  end
368
540
 
369
541
  it 'returns false for other urls' do
370
- UrlScrubber.ideal_form?('http://instagram.com/532513451524').should be_false
542
+ UrlScrubber.ideal_form?('http://instagram.com/532513451524').should be_falsey
371
543
  end
372
544
  end
373
545
 
374
546
  describe 'for tumblr' do
375
547
  it 'returns true for apparent business urls' do
376
- UrlScrubber.ideal_form?('http://cisco.tumblr.com').should be_true
548
+ UrlScrubber.ideal_form?('http://cisco.tumblr.com').should be_truthy
377
549
  end
378
550
 
379
551
  it 'returns false for user urls' do
380
- UrlScrubber.ideal_form?('http://tumblr.com/joe').should be_false
552
+ UrlScrubber.ideal_form?('http://tumblr.com/joe').should be_falsey
381
553
  end
382
554
  end
383
555
 
384
556
  describe 'for other sites' do
385
557
  it 'returns true for any other site, really' do
386
- UrlScrubber.ideal_form?('http://example.com/absolutely/anything').should be_true
558
+ UrlScrubber.ideal_form?('http://example.com/absolutely/anything').should be_truthy
387
559
  end
388
560
  end
389
561
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: url_scrubber
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.20
4
+ version: 0.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Colin Langton
@@ -11,7 +11,7 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2014-06-12 00:00:00.000000000 Z
14
+ date: 2016-04-08 00:00:00.000000000 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: rspec
@@ -124,7 +124,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
124
124
  version: '0'
125
125
  requirements: []
126
126
  rubyforge_project:
127
- rubygems_version: 2.1.10
127
+ rubygems_version: 2.4.8
128
128
  signing_key:
129
129
  specification_version: 4
130
130
  summary: Clean up URLs.