url_scrubber 0.8.16 → 0.8.17

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 455a7dccc82ca65d302205f525100c2c59700cbf41950533cae59269e47355c0
4
- data.tar.gz: 558a14779d4edcb5766896cab6ed21b1b7ad2f5cc924d30a007aa670d4fe7bd7
3
+ metadata.gz: 0ff75d42c034a2c7e8ac8bc09bcc8dc35ab42138cb81b4899137779a71da6d7f
4
+ data.tar.gz: 1f27e292990f3b2bbeb08aeaface0aa1b5a27adb04228d7c8a4dca20b9e22a8b
5
5
  SHA512:
6
- metadata.gz: fba24e3059a04408972ef8fc52013b726aff9b2995adc1bf153bb9723673b65f9add9334a1c4fd2f93c7a92fa9db960b1e9469b81590f20a60a4c830e92632ea
7
- data.tar.gz: 2d30bdcaf8d61e516cacc933dc2cfd2dd8010cea0edaab786a6d8b2c4a0f94c657d97c6b63ace038cc0f12a14e0ee0d2979e8bda43d9d814b74a56d0f7e3ae02
6
+ metadata.gz: da302452052addacc609a5e2a7459e809e328d40de859da654541ba470280dc4d541f3d15ed8512329c5e4fbf5b8b0995f3bb33c90054a593ed5e25681d31b88
7
+ data.tar.gz: 593d8c9861a6f5aa58bf57191e7fcc363510a4da68663e43212b08072a6d82eb0f6cb0fafb13f27cb07aa748399cd57a0145fbd426914bf76046c0f040cb02a0
@@ -1,3 +1,3 @@
1
1
  module UrlScrubber
2
- VERSION = "0.8.16"
2
+ VERSION = "0.8.17"
3
3
  end
data/lib/url_scrubber.rb CHANGED
@@ -285,13 +285,15 @@ module UrlScrubber
285
285
 
286
286
  # TODO This needs to be rewritten to be independent of the Facebook domain and public suffix used: e.g. facebook.com vs fb.com vs. fb.me
287
287
  def self.sc_facebook(url)
288
- #puts "sc_facebook: #{url}"
289
- regex1 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/((pages?|pg)\/)*(?<uname>.*)[\/-](?<uid>[0-9]+))($|\/|\/(about|timeline|info|app_)?)/i
288
+
289
+ url = url.gsub(/(_rdr=.+&)|(&_rdr=.+$)/,"")
290
+
291
+ regex1 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/(((?<group>groups?)|pages?|pg)\/)*(?<uname>.*)[\/-](?<uid>[0-9]+))($|\/|\/(about|timeline|info|app_)?)/i
290
292
  regex2 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/profile.php\?id=(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
291
- regex2a = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/profile.php\?_rdr=p&id=(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
292
- regex3 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/((pages?|pg)\/)*(?<uname>[^\?\/]*))($|\/$|\/(about|timeline|info|app_.*)?)/i
293
+ regex3 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/(((?<group>groups?)|pages?|pg)\/)*(?<uname>[^\?\/]*))($|\/$|\/(about|timeline|info|app_.*)?)/i
293
294
  regex4 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/)(?<php>home.php\?([#!]+\/)*)(?<uname>.*)/i
294
295
  regex5 = /^(?<url>(https?:\/\/)((business|www)\.)?facebook\.com\/(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
296
+ regex6 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/home\/accounts\?business_id=(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
295
297
 
296
298
  # If the user gives us a path to a Post, "http://facebook.com/LoansByJanet/posts/1691075027771418"
297
299
  # then drop the post part, "/posts/1691075027771418" to get the base url, "http://facebook.com/LoansByJanet/"
@@ -300,10 +302,7 @@ module UrlScrubber
300
302
  end
301
303
 
302
304
  if url.match("/media/albums") || url.match("/media/set")
303
- # puts "media"
304
305
  url = url.match('\&') ? url.split('&',2)[0] : url
305
- elsif url.include?('facebook.com/groups/')
306
- url = drop_url_query!(url)
307
306
  elsif mdata = url.match(regex1)
308
307
  # "http://facebook.com/pages/Command-Canada/1434248516885065/timeline"
309
308
  url = mdata[:url]
@@ -313,11 +312,6 @@ module UrlScrubber
313
312
  # "https://www.facebook.com/profile.php?id=100009574328879"
314
313
  url, http_response = check_for_facebook_redirection(mdata[:url])
315
314
  uid = mdata[:uid]
316
- elsif mdata = url.match(regex2a)
317
- # "https://www.facebook.com/profile.php?_rdr=p&id=100009574328879"
318
- url = "http://facebook.com/profile.php?id=" + mdata[:uid]
319
- url, http_response = check_for_facebook_redirection(url)
320
- uid = mdata[:uid]
321
315
  elsif mdata = url.match(regex4)
322
316
  # "http://facebook.com/home.php?#!/person.name"
323
317
  url = mdata[:url] + mdata[:uname]
@@ -326,11 +320,16 @@ module UrlScrubber
326
320
  # "https://www.facebook.com/100009574328879"
327
321
  url = "http://facebook.com/" + mdata[:uid]
328
322
  uid = mdata[:uid]
323
+ elsif mdata = url.match(regex6)
324
+ # "http://business.facebook.com/home/accounts?business_id=1145724702268347"
325
+ url = mdata[:url]
326
+ uid = mdata[:uid]
329
327
  elsif mdata = url.match(regex3)
330
328
  # "http://facebook.com/TonyMollHomeLoans/timeline"
331
329
  # "http://facebook.com/pg/TonyMollHomeLoans/timeline"
332
- if ["page", "pages", "pg"].exclude?(mdata[:uname])
333
- url = "http://facebook.com/" + mdata[:uname]
330
+ # "https://www.facebook.com/groups/practicewithclaritygroup"
331
+ if ["group", "groups", "page", "pages", "pg"].exclude?(mdata[:uname])
332
+ url = (mdata[:group] ? "http://facebook.com/groups/" : "http://facebook.com/") + mdata[:uname]
334
333
  uname = mdata[:uname]
335
334
  end
336
335
  url = drop_url_query!(url)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: url_scrubber
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.16
4
+ version: 0.8.17
5
5
  platform: ruby
6
6
  authors:
7
7
  - Colin Langton
@@ -12,7 +12,7 @@ authors:
12
12
  autorequire:
13
13
  bindir: bin
14
14
  cert_chain: []
15
- date: 2019-02-12 00:00:00.000000000 Z
15
+ date: 2019-03-16 00:00:00.000000000 Z
16
16
  dependencies:
17
17
  - !ruby/object:Gem::Dependency
18
18
  name: rspec
@@ -126,7 +126,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
126
126
  version: '0'
127
127
  requirements: []
128
128
  rubyforge_project:
129
- rubygems_version: 2.7.8
129
+ rubygems_version: 2.7.7
130
130
  signing_key:
131
131
  specification_version: 4
132
132
  summary: Clean up URLs.