RubyGems - url_scrubber - Versions diffs - 0.8.13 → 0.8.15 - Mend

url_scrubber 0.8.13 → 0.8.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 5b0e2a469b840847dde026eaaeb18f48938e49080a737ba2f70b53333f889aeb
-  data.tar.gz: df9c7c316151a830d000f1adffcf043a95fe46e71822824a9526a9a309522b11
+  metadata.gz: 608b40e9de605ac987e39f8fa5b1640d6543c8dc8a83553f2689f2fbe716b50a
+  data.tar.gz: d6d1c7905a4875ef9fb6f2ce7ab03f18ff31e524b180c3e13bd10467513c79b8
 SHA512:
-  metadata.gz: 0c45b738609ad89ffbf7e69cb46378710fe7fdb69f88abcb61df71e112a02194493eb1175650a446d81a51601ee36f9acabf8d5f68853f561ead495025eb9e3f
-  data.tar.gz: 8a293e4f32b7cf355bd3a2e8570ac1b5e5569b116ab92e0b98c6eeff92d028ac25952c8df4d8a49a1d0f829919b9fdc716fbade35063922f3157c1f26710b45b
+  metadata.gz: f6f43dd74cec24acd3f7e2a376b89476f00eb09e627a96c55ba5ec0e97c7ccc994385858183c13230a98066d2b687a462225a3347f6aa27d36f8a668eaef087c
+  data.tar.gz: 395a1561434cc85197f10211393cfdaabea3e35fb50a2559f0c0c3dd669aea3cd13b8e68192a4eda105a2abce1c7476bde167ad00c8704d0e62052ce92ca94ba

data/lib/url_scrubber/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module UrlScrubber
-  VERSION = "0.8.13"
+  VERSION = "0.8.15"
 end

data/lib/url_scrubber.rb CHANGED Viewed

@@ -34,39 +34,30 @@ module UrlScrubber
   def self.service_of(url)
-    domain_match = url.match(%r{https?://([^/]+)})
-    if domain_match
-      domain = domain_match[1]
-      first_dot = domain.index(".")
-  	  #first_dot_position = domain.index(".")
-  	  #first_dot_position += 1 if first_dot_position
-  	  #Rails.logger.debug "domain = #{domain}, first dot = #{first_dot ? first_dot : 'none'}, first dot 1= #{first_dot ? domain[first_dot+1..domain.size] : 'NIL'}"
-  	  if first_dot
-        # tumblr is a unique format
-        return :tumblr if domain[first_dot+1..domain.size].index("tumblr.com") == 0
+    url_parts = Domainatrix.parse(url)
+    if url_parts.host.present?
+      case url_parts.domain
+      when 'facebook'           then return :facebook
+      when 'fb'                 then return :facebook
+      when 'flickr'             then return :flickr
+      when 'instagram'          then return :instagram
+      when 'linkedin'           then return :linkedin
+      when 'pinterest'          then return :pinterest
+      when 'slideshare'         then return :slideshare
+      when 'tumblr'             then return :tumblr
+      when 'twitter'            then return :twitter
+      when 'vimeo'              then return :vimeo
+      when 'yelp'               then return :yelp
+      when 'youtube'            then return :youtube
       end
-      case domain
-      when /\byoutube\.com$/            then return :youtube
-      when /\btwitter\.com$/            then return :twitter
-      when /\bfacebook\.com$/           then return :facebook
-      when /\bbusiness.facebook\.com$/  then return :facebook
-      when /\blinkedin\.com$/           then return :linkedin
-      when /\bplus\.google\.com$/       then return :google
-      when /\bbusiness\.google\.com$/   then return :google
-      when /\bslideshare\.net$/         then return :slideshare
-      when /\bflickr\.com$/             then return :flickr
-      when /\bpinterest\.com$/          then return :pinterest
-      when /\bvimeo\.com$/              then return :vimeo
-      when /\binstagram\.com$/          then return :instagram
-      when /\byelp\.com$/               then return :yelp
+      case url_parts.host
+      when /\bplus\.google\.com$/ then return :google
       end
     else
-  	  Rails.logger.debug "No Domain Match"
+      Rails.logger.debug "No Domain Match"
     end
     :other
@@ -286,6 +277,7 @@ module UrlScrubber
   end
+  # TODO This needs to be rewritten to be independent of the Facebook domain and public suffix used: e.g. facebook.com vs fb.com vs. fb.me
   def self.sc_facebook(url)
     #puts "sc_facebook: #{url}"
     regex1  = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/((pages?|pg)\/)*(?<uname>.*)[\/-](?<uid>[0-9]+))($|\/|\/(about|timeline|info|app_)?)/i
@@ -293,6 +285,7 @@ module UrlScrubber
     regex2a = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/profile.php\?_rdr=p&id=(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
     regex3  = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/((pages?|pg)\/)*(?<uname>[^\?\/]*))($|\/$|\/(about|timeline|info|app_.*)?)/i
     regex4  = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/)(?<php>home.php\?([#!]+\/)*)(?<uname>.*)/i
+    regex5  = /^(?<url>(https?:\/\/)((business|www)\.)?facebook\.com\/(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
     # If the user gives us a path to a Post, "http://facebook.com/LoansByJanet/posts/1691075027771418"
     # then drop the post part, "/posts/1691075027771418" to get the base url, "http://facebook.com/LoansByJanet/"
@@ -306,32 +299,31 @@ module UrlScrubber
     elsif url.include?('facebook.com/groups/')
       url = drop_url_query!(url)
     elsif mdata = url.match(regex1)
-      # puts "regex1"
       # "http://facebook.com/pages/Command-Canada/1434248516885065/timeline"
       url = mdata[:url]
       uname = mdata[:uname]
       uid = mdata[:uid]
     elsif mdata = url.match(regex2)
-      # puts "regex2"
       # "https://www.facebook.com/profile.php?id=100009574328879"
       url, http_response = check_for_facebook_redirection(mdata[:url])
       uid = mdata[:uid]
     elsif mdata = url.match(regex2a)
-      # puts "regex2a"
       # "https://www.facebook.com/profile.php?_rdr=p&id=100009574328879"
       url = "http://facebook.com/profile.php?id=" + mdata[:uid]
       url, http_response = check_for_facebook_redirection(url)
       uid = mdata[:uid]
     elsif mdata = url.match(regex4)
-      # puts "#{url} - #{mdata[:uname]}"
       # "http://facebook.com/home.php?#!/person.name"
         url = mdata[:url] + mdata[:uname]
         url = drop_url_query!(url)
+      elsif mdata = url.match(regex5)
+        # "https://www.facebook.com/100009574328879"
+        url = "http://facebook.com/" + mdata[:uid]
+        uid = mdata[:uid]
     elsif mdata = url.match(regex3)
-      # puts "regex3"
       # "http://facebook.com/TonyMollHomeLoans/timeline"
       # "http://facebook.com/pg/TonyMollHomeLoans/timeline"
-      if ["pages", "pg"].exclude?(mdata[:uname])
+      if ["page", "pages", "pg"].exclude?(mdata[:uname])
         url = "http://facebook.com/" + mdata[:uname]
         uname = mdata[:uname]
       end
@@ -353,6 +345,7 @@ module UrlScrubber
   end
+  # TODO This needs to be rewritten to be independent of the LinkedIn domain and public suffix used: e.g. linkedin.com vs lnkd.in vs linkedin.ca
   def self.sc_linkedin(url)
     url.sub!('linkedin.com/companies/', 'linkedin.com/company/')
     if !!url.match(%r{com/company/})

data/url_scrubber.gemspec CHANGED Viewed

@@ -2,8 +2,8 @@
 require File.expand_path('../lib/url_scrubber/version', __FILE__)
 Gem::Specification.new do |gem|
-  gem.authors       = ["Colin Langton", "Christopher Maujean", "David Hillard", "Edgar Abadines"]
-  gem.email         = ["colin@hoteldelta.net", "cmaujean@brandle.net", "dhillard@brandle.net", "ed@brandle.net"]
+  gem.authors       = ["Colin Langton", "Christopher Maujean", "David Hillard", "Edgar Abadines", "Chip Roberson"]
+  gem.email         = ["colin@hoteldelta.net", "cmaujean@brandle.net", "dhillard@brandle.net", "ed@brandle.net", "chip@brandle.net"]
   gem.description   = %q{Remove extraneous bits from URLs, follow redirects, identify social media urls, etc.}
   gem.summary       = %q{Clean up URLs.}
   gem.homepage      = "http://brandle.net"

metadata CHANGED Viewed

@@ -1,17 +1,18 @@
 --- !ruby/object:Gem::Specification
 name: url_scrubber
 version: !ruby/object:Gem::Version
-  version: 0.8.13
+  version: 0.8.15
 platform: ruby
 authors:
 - Colin Langton
 - Christopher Maujean
 - David Hillard
 - Edgar Abadines
+- Chip Roberson
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2018-11-20 00:00:00.000000000 Z
+date: 2018-11-22 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rspec
@@ -90,6 +91,7 @@ email:
 - cmaujean@brandle.net
 - dhillard@brandle.net
 - ed@brandle.net
+- chip@brandle.net
 executables: []
 extensions: []
 extra_rdoc_files: []