url_scrubber 0.8.16 → 0.8.21

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 455a7dccc82ca65d302205f525100c2c59700cbf41950533cae59269e47355c0
4
- data.tar.gz: 558a14779d4edcb5766896cab6ed21b1b7ad2f5cc924d30a007aa670d4fe7bd7
3
+ metadata.gz: dfe7609c65d93e0b93cdc01f8b9ff08b6abbd417a4be183a19993f2df17a5451
4
+ data.tar.gz: 7f85340db035fa6330cc894ae0290261633d3c626ba76210ddea2be1c0a57352
5
5
  SHA512:
6
- metadata.gz: fba24e3059a04408972ef8fc52013b726aff9b2995adc1bf153bb9723673b65f9add9334a1c4fd2f93c7a92fa9db960b1e9469b81590f20a60a4c830e92632ea
7
- data.tar.gz: 2d30bdcaf8d61e516cacc933dc2cfd2dd8010cea0edaab786a6d8b2c4a0f94c657d97c6b63ace038cc0f12a14e0ee0d2979e8bda43d9d814b74a56d0f7e3ae02
6
+ metadata.gz: 2751402307e2edb719e12279c11301d3a7084920e19dc7dc7ec18b37bf19a684b69b5ad8f7dcca7867667a463012d6f08c339fa4842458edc5c8003d303d69e1
7
+ data.tar.gz: d9156d4cb46a3a232b4e80df2759bba624d2366ec691cece5f0c025a24ccc4faf5706a7e48f6ef35d4bde8b50b354fb3965ff54f056cca59f29beac926f71a73
data/lib/url_scrubber.rb CHANGED
@@ -12,7 +12,7 @@ module UrlScrubber
12
12
 
13
13
  url = url.clone # don't modify the original argument
14
14
 
15
- m = url.match(/(htt?ps?:\/\/\S*)/i)
15
+ m = url.match(/(htt?ps?:\/\/\S+)/i)
16
16
  return nil unless m
17
17
 
18
18
  url = m[1]
@@ -179,7 +179,11 @@ module UrlScrubber
179
179
 
180
180
  def self.downcase_domain(url)
181
181
  domain_match = url.match(%r{http://[^/]+}i)
182
- domain_match[0].downcase + domain_match.post_match
182
+ if domain_match
183
+ domain_match[0].downcase + domain_match.post_match
184
+ else
185
+ url
186
+ end
183
187
  end
184
188
 
185
189
 
@@ -285,13 +289,15 @@ module UrlScrubber
285
289
 
286
290
  # TODO This needs to be rewritten to be independent of the Facebook domain and public suffix used: e.g. facebook.com vs fb.com vs. fb.me
287
291
  def self.sc_facebook(url)
288
- #puts "sc_facebook: #{url}"
289
- regex1 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/((pages?|pg)\/)*(?<uname>.*)[\/-](?<uid>[0-9]+))($|\/|\/(about|timeline|info|app_)?)/i
292
+
293
+ url = url.gsub(/(_rdr=.+&)|(&_rdr=.+$)/,"")
294
+
295
+ regex1 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/(((?<group>groups?)|pages?|pg)\/)*(?<uname>.*)[\/-](?<uid>[0-9]+))($|\/|\/(about|timeline|info|app_)?)/i
290
296
  regex2 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/profile.php\?id=(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
291
- regex2a = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/profile.php\?_rdr=p&id=(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
292
- regex3 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/((pages?|pg)\/)*(?<uname>[^\?\/]*))($|\/$|\/(about|timeline|info|app_.*)?)/i
297
+ regex3 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/(((?<group>groups?)|pages?|pg)\/)*(?<uname>[^\?\/]*))($|\/$|\/(about|timeline|info|app_.*)?)/i
293
298
  regex4 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/)(?<php>home.php\?([#!]+\/)*)(?<uname>.*)/i
294
299
  regex5 = /^(?<url>(https?:\/\/)((business|www)\.)?facebook\.com\/(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
300
+ regex6 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/home\/accounts\?business_id=(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
295
301
 
296
302
  # If the user gives us a path to a Post, "http://facebook.com/LoansByJanet/posts/1691075027771418"
297
303
  # then drop the post part, "/posts/1691075027771418" to get the base url, "http://facebook.com/LoansByJanet/"
@@ -300,10 +306,7 @@ module UrlScrubber
300
306
  end
301
307
 
302
308
  if url.match("/media/albums") || url.match("/media/set")
303
- # puts "media"
304
309
  url = url.match('\&') ? url.split('&',2)[0] : url
305
- elsif url.include?('facebook.com/groups/')
306
- url = drop_url_query!(url)
307
310
  elsif mdata = url.match(regex1)
308
311
  # "http://facebook.com/pages/Command-Canada/1434248516885065/timeline"
309
312
  url = mdata[:url]
@@ -313,11 +316,6 @@ module UrlScrubber
313
316
  # "https://www.facebook.com/profile.php?id=100009574328879"
314
317
  url, http_response = check_for_facebook_redirection(mdata[:url])
315
318
  uid = mdata[:uid]
316
- elsif mdata = url.match(regex2a)
317
- # "https://www.facebook.com/profile.php?_rdr=p&id=100009574328879"
318
- url = "http://facebook.com/profile.php?id=" + mdata[:uid]
319
- url, http_response = check_for_facebook_redirection(url)
320
- uid = mdata[:uid]
321
319
  elsif mdata = url.match(regex4)
322
320
  # "http://facebook.com/home.php?#!/person.name"
323
321
  url = mdata[:url] + mdata[:uname]
@@ -326,11 +324,16 @@ module UrlScrubber
326
324
  # "https://www.facebook.com/100009574328879"
327
325
  url = "http://facebook.com/" + mdata[:uid]
328
326
  uid = mdata[:uid]
327
+ elsif mdata = url.match(regex6)
328
+ # "http://business.facebook.com/home/accounts?business_id=1145724702268347"
329
+ url = mdata[:url]
330
+ uid = mdata[:uid]
329
331
  elsif mdata = url.match(regex3)
330
332
  # "http://facebook.com/TonyMollHomeLoans/timeline"
331
333
  # "http://facebook.com/pg/TonyMollHomeLoans/timeline"
332
- if ["page", "pages", "pg"].exclude?(mdata[:uname])
333
- url = "http://facebook.com/" + mdata[:uname]
334
+ # "https://www.facebook.com/groups/practicewithclaritygroup"
335
+ if ["group", "groups", "page", "pages", "pg"].exclude?(mdata[:uname])
336
+ url = (mdata[:group] ? "http://facebook.com/groups/" : "http://facebook.com/") + mdata[:uname]
334
337
  uname = mdata[:uname]
335
338
  end
336
339
  url = drop_url_query!(url)
@@ -1,3 +1,3 @@
1
1
  module UrlScrubber
2
- VERSION = "0.8.16"
2
+ VERSION = "0.8.21"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: url_scrubber
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.16
4
+ version: 0.8.21
5
5
  platform: ruby
6
6
  authors:
7
7
  - Colin Langton
@@ -12,7 +12,7 @@ authors:
12
12
  autorequire:
13
13
  bindir: bin
14
14
  cert_chain: []
15
- date: 2019-02-12 00:00:00.000000000 Z
15
+ date: 2021-01-08 00:00:00.000000000 Z
16
16
  dependencies:
17
17
  - !ruby/object:Gem::Dependency
18
18
  name: rspec
@@ -125,8 +125,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
125
125
  - !ruby/object:Gem::Version
126
126
  version: '0'
127
127
  requirements: []
128
- rubyforge_project:
129
- rubygems_version: 2.7.8
128
+ rubygems_version: 3.0.6
130
129
  signing_key:
131
130
  specification_version: 4
132
131
  summary: Clean up URLs.