url_scrubber 0.8.16 → 0.8.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 455a7dccc82ca65d302205f525100c2c59700cbf41950533cae59269e47355c0
4
- data.tar.gz: 558a14779d4edcb5766896cab6ed21b1b7ad2f5cc924d30a007aa670d4fe7bd7
3
+ metadata.gz: 0ff75d42c034a2c7e8ac8bc09bcc8dc35ab42138cb81b4899137779a71da6d7f
4
+ data.tar.gz: 1f27e292990f3b2bbeb08aeaface0aa1b5a27adb04228d7c8a4dca20b9e22a8b
5
5
  SHA512:
6
- metadata.gz: fba24e3059a04408972ef8fc52013b726aff9b2995adc1bf153bb9723673b65f9add9334a1c4fd2f93c7a92fa9db960b1e9469b81590f20a60a4c830e92632ea
7
- data.tar.gz: 2d30bdcaf8d61e516cacc933dc2cfd2dd8010cea0edaab786a6d8b2c4a0f94c657d97c6b63ace038cc0f12a14e0ee0d2979e8bda43d9d814b74a56d0f7e3ae02
6
+ metadata.gz: da302452052addacc609a5e2a7459e809e328d40de859da654541ba470280dc4d541f3d15ed8512329c5e4fbf5b8b0995f3bb33c90054a593ed5e25681d31b88
7
+ data.tar.gz: 593d8c9861a6f5aa58bf57191e7fcc363510a4da68663e43212b08072a6d82eb0f6cb0fafb13f27cb07aa748399cd57a0145fbd426914bf76046c0f040cb02a0
@@ -1,3 +1,3 @@
1
1
  module UrlScrubber
2
- VERSION = "0.8.16"
2
+ VERSION = "0.8.17"
3
3
  end
data/lib/url_scrubber.rb CHANGED
@@ -285,13 +285,15 @@ module UrlScrubber
285
285
 
286
286
  # TODO This needs to be rewritten to be independent of the Facebook domain and public suffix used: e.g. facebook.com vs fb.com vs. fb.me
287
287
  def self.sc_facebook(url)
288
- #puts "sc_facebook: #{url}"
289
- regex1 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/((pages?|pg)\/)*(?<uname>.*)[\/-](?<uid>[0-9]+))($|\/|\/(about|timeline|info|app_)?)/i
288
+
289
+ url = url.gsub(/(_rdr=.+&)|(&_rdr=.+$)/,"")
290
+
291
+ regex1 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/(((?<group>groups?)|pages?|pg)\/)*(?<uname>.*)[\/-](?<uid>[0-9]+))($|\/|\/(about|timeline|info|app_)?)/i
290
292
  regex2 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/profile.php\?id=(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
291
- regex2a = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/profile.php\?_rdr=p&id=(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
292
- regex3 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/((pages?|pg)\/)*(?<uname>[^\?\/]*))($|\/$|\/(about|timeline|info|app_.*)?)/i
293
+ regex3 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/(((?<group>groups?)|pages?|pg)\/)*(?<uname>[^\?\/]*))($|\/$|\/(about|timeline|info|app_.*)?)/i
293
294
  regex4 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/)(?<php>home.php\?([#!]+\/)*)(?<uname>.*)/i
294
295
  regex5 = /^(?<url>(https?:\/\/)((business|www)\.)?facebook\.com\/(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
296
+ regex6 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/home\/accounts\?business_id=(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
295
297
 
296
298
  # If the user gives us a path to a Post, "http://facebook.com/LoansByJanet/posts/1691075027771418"
297
299
  # then drop the post part, "/posts/1691075027771418" to get the base url, "http://facebook.com/LoansByJanet/"
@@ -300,10 +302,7 @@ module UrlScrubber
300
302
  end
301
303
 
302
304
  if url.match("/media/albums") || url.match("/media/set")
303
- # puts "media"
304
305
  url = url.match('\&') ? url.split('&',2)[0] : url
305
- elsif url.include?('facebook.com/groups/')
306
- url = drop_url_query!(url)
307
306
  elsif mdata = url.match(regex1)
308
307
  # "http://facebook.com/pages/Command-Canada/1434248516885065/timeline"
309
308
  url = mdata[:url]
@@ -313,11 +312,6 @@ module UrlScrubber
313
312
  # "https://www.facebook.com/profile.php?id=100009574328879"
314
313
  url, http_response = check_for_facebook_redirection(mdata[:url])
315
314
  uid = mdata[:uid]
316
- elsif mdata = url.match(regex2a)
317
- # "https://www.facebook.com/profile.php?_rdr=p&id=100009574328879"
318
- url = "http://facebook.com/profile.php?id=" + mdata[:uid]
319
- url, http_response = check_for_facebook_redirection(url)
320
- uid = mdata[:uid]
321
315
  elsif mdata = url.match(regex4)
322
316
  # "http://facebook.com/home.php?#!/person.name"
323
317
  url = mdata[:url] + mdata[:uname]
@@ -326,11 +320,16 @@ module UrlScrubber
326
320
  # "https://www.facebook.com/100009574328879"
327
321
  url = "http://facebook.com/" + mdata[:uid]
328
322
  uid = mdata[:uid]
323
+ elsif mdata = url.match(regex6)
324
+ # "http://business.facebook.com/home/accounts?business_id=1145724702268347"
325
+ url = mdata[:url]
326
+ uid = mdata[:uid]
329
327
  elsif mdata = url.match(regex3)
330
328
  # "http://facebook.com/TonyMollHomeLoans/timeline"
331
329
  # "http://facebook.com/pg/TonyMollHomeLoans/timeline"
332
- if ["page", "pages", "pg"].exclude?(mdata[:uname])
333
- url = "http://facebook.com/" + mdata[:uname]
330
+ # "https://www.facebook.com/groups/practicewithclaritygroup"
331
+ if ["group", "groups", "page", "pages", "pg"].exclude?(mdata[:uname])
332
+ url = (mdata[:group] ? "http://facebook.com/groups/" : "http://facebook.com/") + mdata[:uname]
334
333
  uname = mdata[:uname]
335
334
  end
336
335
  url = drop_url_query!(url)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: url_scrubber
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.16
4
+ version: 0.8.17
5
5
  platform: ruby
6
6
  authors:
7
7
  - Colin Langton
@@ -12,7 +12,7 @@ authors:
12
12
  autorequire:
13
13
  bindir: bin
14
14
  cert_chain: []
15
- date: 2019-02-12 00:00:00.000000000 Z
15
+ date: 2019-03-16 00:00:00.000000000 Z
16
16
  dependencies:
17
17
  - !ruby/object:Gem::Dependency
18
18
  name: rspec
@@ -126,7 +126,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
126
126
  version: '0'
127
127
  requirements: []
128
128
  rubyforge_project:
129
- rubygems_version: 2.7.8
129
+ rubygems_version: 2.7.7
130
130
  signing_key:
131
131
  specification_version: 4
132
132
  summary: Clean up URLs.