RubyGems - url_scrubber - Versions diffs - 0.8.15 → 0.8.20 - Mend

url_scrubber 0.8.15 → 0.8.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 608b40e9de605ac987e39f8fa5b1640d6543c8dc8a83553f2689f2fbe716b50a
-  data.tar.gz: d6d1c7905a4875ef9fb6f2ce7ab03f18ff31e524b180c3e13bd10467513c79b8
+  metadata.gz: 5392c3c6b9af7ba8d175315c846027174b8801d0a8371525a6c7eb1eb0e115be
+  data.tar.gz: dc671761ac0e450afd851a1c8890db403b68a47af341485408ba10f01e24f056
 SHA512:
-  metadata.gz: f6f43dd74cec24acd3f7e2a376b89476f00eb09e627a96c55ba5ec0e97c7ccc994385858183c13230a98066d2b687a462225a3347f6aa27d36f8a668eaef087c
-  data.tar.gz: 395a1561434cc85197f10211393cfdaabea3e35fb50a2559f0c0c3dd669aea3cd13b8e68192a4eda105a2abce1c7476bde167ad00c8704d0e62052ce92ca94ba
+  metadata.gz: 6d0b8e970868607dea54065027ef662a59780b88db21cfb36e933cff0086ecaedac63e1f23e588e6a7f9d0bcb649950385b5351e354116e825ff65649adad1a1
+  data.tar.gz: 70fcdd156dc4aa2aae15a0b16e0a8e70fdd14808003840719ed9cbda415113fcf94ca26facff5d34a4fa26ef7ce568037a945d09f22c7ececf64dec075e6529b

data/lib/url_scrubber.rb CHANGED

@@ -12,7 +12,7 @@ module UrlScrubber
     url = url.clone # don't modify the original argument
-    m = url.match(/(htt?ps?:\/\/\S*)/i)
+    m = url.match(/(htt?ps?:\/\/\S+)/i)
     return nil unless m
     url = m[1]
@@ -48,6 +48,8 @@ module UrlScrubber
       when 'tumblr'             then return :tumblr
       when 'twitter'            then return :twitter
       when 'vimeo'              then return :vimeo
+      when 'vk'                 then return :vkontakte
+      when 'weibo'              then return :weibo
       when 'yelp'               then return :yelp
       when 'youtube'            then return :youtube
       end
@@ -69,6 +71,10 @@ module UrlScrubber
     return false unless url
     case service_of(url)
+    when :vkontakte
+      !!url.match(%r{^http://vk\.com/[\w_]+$})
+    when :weibo
+      !!url.match(%r{^http://weibo\.com/[\w_-]+$})
     when :youtube
       !!url.match(%r{^http://youtube\.com/[\w_-]+$})
     when :twitter
@@ -279,13 +285,15 @@ module UrlScrubber
   # TODO This needs to be rewritten to be independent of the Facebook domain and public suffix used: e.g. facebook.com vs fb.com vs. fb.me
   def self.sc_facebook(url)
-    #puts "sc_facebook: #{url}"
-    regex1  = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/((pages?|pg)\/)*(?<uname>.*)[\/-](?<uid>[0-9]+))($|\/|\/(about|timeline|info|app_)?)/i
+    url = url.gsub(/(_rdr=.+&)|(&_rdr=.+$)/,"")
+    regex1  = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/(((?<group>groups?)|pages?|pg)\/)*(?<uname>.*)[\/-](?<uid>[0-9]+))($|\/|\/(about|timeline|info|app_)?)/i
     regex2  = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/profile.php\?id=(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
-    regex2a = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/profile.php\?_rdr=p&id=(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
-    regex3  = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/((pages?|pg)\/)*(?<uname>[^\?\/]*))($|\/$|\/(about|timeline|info|app_.*)?)/i
+    regex3  = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/(((?<group>groups?)|pages?|pg)\/)*(?<uname>[^\?\/]*))($|\/$|\/(about|timeline|info|app_.*)?)/i
     regex4  = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/)(?<php>home.php\?([#!]+\/)*)(?<uname>.*)/i
     regex5  = /^(?<url>(https?:\/\/)((business|www)\.)?facebook\.com\/(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
+    regex6  = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/home\/accounts\?business_id=(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
     # If the user gives us a path to a Post, "http://facebook.com/LoansByJanet/posts/1691075027771418"
     # then drop the post part, "/posts/1691075027771418" to get the base url, "http://facebook.com/LoansByJanet/"
@@ -294,10 +302,7 @@ module UrlScrubber
     end
     if url.match("/media/albums") || url.match("/media/set")
-      # puts "media"
       url = url.match('\&') ? url.split('&',2)[0] : url
-    elsif url.include?('facebook.com/groups/')
-      url = drop_url_query!(url)
     elsif mdata = url.match(regex1)
       # "http://facebook.com/pages/Command-Canada/1434248516885065/timeline"
       url = mdata[:url]
@@ -307,11 +312,6 @@ module UrlScrubber
       # "https://www.facebook.com/profile.php?id=100009574328879"
       url, http_response = check_for_facebook_redirection(mdata[:url])
       uid = mdata[:uid]
-    elsif mdata = url.match(regex2a)
-      # "https://www.facebook.com/profile.php?_rdr=p&id=100009574328879"
-      url = "http://facebook.com/profile.php?id=" + mdata[:uid]
-      url, http_response = check_for_facebook_redirection(url)
-      uid = mdata[:uid]
     elsif mdata = url.match(regex4)
       # "http://facebook.com/home.php?#!/person.name"
         url = mdata[:url] + mdata[:uname]
@@ -320,11 +320,16 @@ module UrlScrubber
         # "https://www.facebook.com/100009574328879"
         url = "http://facebook.com/" + mdata[:uid]
         uid = mdata[:uid]
+      elsif mdata = url.match(regex6)
+        # "http://business.facebook.com/home/accounts?business_id=1145724702268347"
+        url = mdata[:url]
+        uid = mdata[:uid]
     elsif mdata = url.match(regex3)
       # "http://facebook.com/TonyMollHomeLoans/timeline"
       # "http://facebook.com/pg/TonyMollHomeLoans/timeline"
-      if ["page", "pages", "pg"].exclude?(mdata[:uname])
-        url = "http://facebook.com/" + mdata[:uname]
+      # "https://www.facebook.com/groups/practicewithclaritygroup"
+      if ["group", "groups", "page", "pages", "pg"].exclude?(mdata[:uname])
+        url = (mdata[:group] ? "http://facebook.com/groups/" : "http://facebook.com/") + mdata[:uname]
         uname = mdata[:uname]
       end
       url = drop_url_query!(url)

data/lib/url_scrubber/version.rb CHANGED

@@ -1,3 +1,3 @@
 module UrlScrubber
-  VERSION = "0.8.15"
+  VERSION = "0.8.20"
 end

data/url_scrubber.gemspec CHANGED

@@ -7,6 +7,7 @@ Gem::Specification.new do |gem|
   gem.description   = %q{Remove extraneous bits from URLs, follow redirects, identify social media urls, etc.}
   gem.summary       = %q{Clean up URLs.}
   gem.homepage      = "http://brandle.net"
+  gem.license       = "MIT"
   gem.files         = `git ls-files`.split($\)
   gem.executables   = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: url_scrubber
 version: !ruby/object:Gem::Version
-  version: 0.8.15
+  version: 0.8.20
 platform: ruby
 authors:
 - Colin Langton
@@ -12,7 +12,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2018-11-22 00:00:00.000000000 Z
+date: 2020-08-28 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rspec
@@ -97,7 +97,6 @@ extensions: []
 extra_rdoc_files: []
 files:
 - ".gitignore"
-- ".rvmrc"
 - Gemfile
 - Guardfile
 - README.md
@@ -108,7 +107,8 @@ files:
 - spec/url_scrubber_spec.rb
 - url_scrubber.gemspec
 homepage: http://brandle.net
-licenses: []
+licenses:
+- MIT
 metadata: {}
 post_install_message:
 rdoc_options: []
@@ -125,8 +125,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubyforge_project:
-rubygems_version: 2.7.7
+rubygems_version: 3.0.6
 signing_key:
 specification_version: 4
 summary: Clean up URLs.

data/.rvmrc DELETED

	@@ -1 +0,0 @@
1	- rvm use 1.9.3@url_scrubber --create