RubyGems - url_scrubber - Versions diffs - 0.8.16 → 0.8.21 - Mend

url_scrubber 0.8.16 → 0.8.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 455a7dccc82ca65d302205f525100c2c59700cbf41950533cae59269e47355c0
-  data.tar.gz: 558a14779d4edcb5766896cab6ed21b1b7ad2f5cc924d30a007aa670d4fe7bd7
+  metadata.gz: dfe7609c65d93e0b93cdc01f8b9ff08b6abbd417a4be183a19993f2df17a5451
+  data.tar.gz: 7f85340db035fa6330cc894ae0290261633d3c626ba76210ddea2be1c0a57352
 SHA512:
-  metadata.gz: fba24e3059a04408972ef8fc52013b726aff9b2995adc1bf153bb9723673b65f9add9334a1c4fd2f93c7a92fa9db960b1e9469b81590f20a60a4c830e92632ea
-  data.tar.gz: 2d30bdcaf8d61e516cacc933dc2cfd2dd8010cea0edaab786a6d8b2c4a0f94c657d97c6b63ace038cc0f12a14e0ee0d2979e8bda43d9d814b74a56d0f7e3ae02
+  metadata.gz: 2751402307e2edb719e12279c11301d3a7084920e19dc7dc7ec18b37bf19a684b69b5ad8f7dcca7867667a463012d6f08c339fa4842458edc5c8003d303d69e1
+  data.tar.gz: d9156d4cb46a3a232b4e80df2759bba624d2366ec691cece5f0c025a24ccc4faf5706a7e48f6ef35d4bde8b50b354fb3965ff54f056cca59f29beac926f71a73

data/lib/url_scrubber.rb CHANGED Viewed

@@ -12,7 +12,7 @@ module UrlScrubber
     url = url.clone # don't modify the original argument
-    m = url.match(/(htt?ps?:\/\/\S*)/i)
+    m = url.match(/(htt?ps?:\/\/\S+)/i)
     return nil unless m
     url = m[1]
@@ -179,7 +179,11 @@ module UrlScrubber
   def self.downcase_domain(url)
     domain_match = url.match(%r{http://[^/]+}i)
-    domain_match[0].downcase + domain_match.post_match
+    if domain_match
+      domain_match[0].downcase + domain_match.post_match
+    else
+      url
+    end
   end
@@ -285,13 +289,15 @@ module UrlScrubber
   # TODO This needs to be rewritten to be independent of the Facebook domain and public suffix used: e.g. facebook.com vs fb.com vs. fb.me
   def self.sc_facebook(url)
-    #puts "sc_facebook: #{url}"
-    regex1  = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/((pages?|pg)\/)*(?<uname>.*)[\/-](?<uid>[0-9]+))($|\/|\/(about|timeline|info|app_)?)/i
+    url = url.gsub(/(_rdr=.+&)|(&_rdr=.+$)/,"")
+    regex1  = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/(((?<group>groups?)|pages?|pg)\/)*(?<uname>.*)[\/-](?<uid>[0-9]+))($|\/|\/(about|timeline|info|app_)?)/i
     regex2  = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/profile.php\?id=(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
-    regex2a = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/profile.php\?_rdr=p&id=(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
-    regex3  = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/((pages?|pg)\/)*(?<uname>[^\?\/]*))($|\/$|\/(about|timeline|info|app_.*)?)/i
+    regex3  = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/(((?<group>groups?)|pages?|pg)\/)*(?<uname>[^\?\/]*))($|\/$|\/(about|timeline|info|app_.*)?)/i
     regex4  = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/)(?<php>home.php\?([#!]+\/)*)(?<uname>.*)/i
     regex5  = /^(?<url>(https?:\/\/)((business|www)\.)?facebook\.com\/(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
+    regex6  = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/home\/accounts\?business_id=(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
     # If the user gives us a path to a Post, "http://facebook.com/LoansByJanet/posts/1691075027771418"
     # then drop the post part, "/posts/1691075027771418" to get the base url, "http://facebook.com/LoansByJanet/"
@@ -300,10 +306,7 @@ module UrlScrubber
     end
     if url.match("/media/albums") || url.match("/media/set")
-      # puts "media"
       url = url.match('\&') ? url.split('&',2)[0] : url
-    elsif url.include?('facebook.com/groups/')
-      url = drop_url_query!(url)
     elsif mdata = url.match(regex1)
       # "http://facebook.com/pages/Command-Canada/1434248516885065/timeline"
       url = mdata[:url]
@@ -313,11 +316,6 @@ module UrlScrubber
       # "https://www.facebook.com/profile.php?id=100009574328879"
       url, http_response = check_for_facebook_redirection(mdata[:url])
       uid = mdata[:uid]
-    elsif mdata = url.match(regex2a)
-      # "https://www.facebook.com/profile.php?_rdr=p&id=100009574328879"
-      url = "http://facebook.com/profile.php?id=" + mdata[:uid]
-      url, http_response = check_for_facebook_redirection(url)
-      uid = mdata[:uid]
     elsif mdata = url.match(regex4)
       # "http://facebook.com/home.php?#!/person.name"
         url = mdata[:url] + mdata[:uname]
@@ -326,11 +324,16 @@ module UrlScrubber
         # "https://www.facebook.com/100009574328879"
         url = "http://facebook.com/" + mdata[:uid]
         uid = mdata[:uid]
+      elsif mdata = url.match(regex6)
+        # "http://business.facebook.com/home/accounts?business_id=1145724702268347"
+        url = mdata[:url]
+        uid = mdata[:uid]
     elsif mdata = url.match(regex3)
       # "http://facebook.com/TonyMollHomeLoans/timeline"
       # "http://facebook.com/pg/TonyMollHomeLoans/timeline"
-      if ["page", "pages", "pg"].exclude?(mdata[:uname])
-        url = "http://facebook.com/" + mdata[:uname]
+      # "https://www.facebook.com/groups/practicewithclaritygroup"
+      if ["group", "groups", "page", "pages", "pg"].exclude?(mdata[:uname])
+        url = (mdata[:group] ? "http://facebook.com/groups/" : "http://facebook.com/") + mdata[:uname]
         uname = mdata[:uname]
       end
       url = drop_url_query!(url)

data/lib/url_scrubber/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module UrlScrubber
-  VERSION = "0.8.16"
+  VERSION = "0.8.21"
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: url_scrubber
 version: !ruby/object:Gem::Version
-  version: 0.8.16
+  version: 0.8.21
 platform: ruby
 authors:
 - Colin Langton
@@ -12,7 +12,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2019-02-12 00:00:00.000000000 Z
+date: 2021-01-08 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rspec
@@ -125,8 +125,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubyforge_project:
-rubygems_version: 2.7.8
+rubygems_version: 3.0.6
 signing_key:
 specification_version: 4
 summary: Clean up URLs.