url_scrubber 0.8.16 → 0.8.17
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/url_scrubber/version.rb +1 -1
- data/lib/url_scrubber.rb +13 -14
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0ff75d42c034a2c7e8ac8bc09bcc8dc35ab42138cb81b4899137779a71da6d7f
|
4
|
+
data.tar.gz: 1f27e292990f3b2bbeb08aeaface0aa1b5a27adb04228d7c8a4dca20b9e22a8b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: da302452052addacc609a5e2a7459e809e328d40de859da654541ba470280dc4d541f3d15ed8512329c5e4fbf5b8b0995f3bb33c90054a593ed5e25681d31b88
|
7
|
+
data.tar.gz: 593d8c9861a6f5aa58bf57191e7fcc363510a4da68663e43212b08072a6d82eb0f6cb0fafb13f27cb07aa748399cd57a0145fbd426914bf76046c0f040cb02a0
|
data/lib/url_scrubber/version.rb
CHANGED
data/lib/url_scrubber.rb
CHANGED
@@ -285,13 +285,15 @@ module UrlScrubber
|
|
285
285
|
|
286
286
|
# TODO This needs to be rewritten to be independent of the Facebook domain and public suffix used: e.g. facebook.com vs fb.com vs. fb.me
|
287
287
|
def self.sc_facebook(url)
|
288
|
-
|
289
|
-
|
288
|
+
|
289
|
+
url = url.gsub(/(_rdr=.+&)|(&_rdr=.+$)/,"")
|
290
|
+
|
291
|
+
regex1 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/(((?<group>groups?)|pages?|pg)\/)*(?<uname>.*)[\/-](?<uid>[0-9]+))($|\/|\/(about|timeline|info|app_)?)/i
|
290
292
|
regex2 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/profile.php\?id=(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
|
291
|
-
|
292
|
-
regex3 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/((pages?|pg)\/)*(?<uname>[^\?\/]*))($|\/$|\/(about|timeline|info|app_.*)?)/i
|
293
|
+
regex3 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/(((?<group>groups?)|pages?|pg)\/)*(?<uname>[^\?\/]*))($|\/$|\/(about|timeline|info|app_.*)?)/i
|
293
294
|
regex4 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/)(?<php>home.php\?([#!]+\/)*)(?<uname>.*)/i
|
294
295
|
regex5 = /^(?<url>(https?:\/\/)((business|www)\.)?facebook\.com\/(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
|
296
|
+
regex6 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/home\/accounts\?business_id=(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
|
295
297
|
|
296
298
|
# If the user gives us a path to a Post, "http://facebook.com/LoansByJanet/posts/1691075027771418"
|
297
299
|
# then drop the post part, "/posts/1691075027771418" to get the base url, "http://facebook.com/LoansByJanet/"
|
@@ -300,10 +302,7 @@ module UrlScrubber
|
|
300
302
|
end
|
301
303
|
|
302
304
|
if url.match("/media/albums") || url.match("/media/set")
|
303
|
-
# puts "media"
|
304
305
|
url = url.match('\&') ? url.split('&',2)[0] : url
|
305
|
-
elsif url.include?('facebook.com/groups/')
|
306
|
-
url = drop_url_query!(url)
|
307
306
|
elsif mdata = url.match(regex1)
|
308
307
|
# "http://facebook.com/pages/Command-Canada/1434248516885065/timeline"
|
309
308
|
url = mdata[:url]
|
@@ -313,11 +312,6 @@ module UrlScrubber
|
|
313
312
|
# "https://www.facebook.com/profile.php?id=100009574328879"
|
314
313
|
url, http_response = check_for_facebook_redirection(mdata[:url])
|
315
314
|
uid = mdata[:uid]
|
316
|
-
elsif mdata = url.match(regex2a)
|
317
|
-
# "https://www.facebook.com/profile.php?_rdr=p&id=100009574328879"
|
318
|
-
url = "http://facebook.com/profile.php?id=" + mdata[:uid]
|
319
|
-
url, http_response = check_for_facebook_redirection(url)
|
320
|
-
uid = mdata[:uid]
|
321
315
|
elsif mdata = url.match(regex4)
|
322
316
|
# "http://facebook.com/home.php?#!/person.name"
|
323
317
|
url = mdata[:url] + mdata[:uname]
|
@@ -326,11 +320,16 @@ module UrlScrubber
|
|
326
320
|
# "https://www.facebook.com/100009574328879"
|
327
321
|
url = "http://facebook.com/" + mdata[:uid]
|
328
322
|
uid = mdata[:uid]
|
323
|
+
elsif mdata = url.match(regex6)
|
324
|
+
# "http://business.facebook.com/home/accounts?business_id=1145724702268347"
|
325
|
+
url = mdata[:url]
|
326
|
+
uid = mdata[:uid]
|
329
327
|
elsif mdata = url.match(regex3)
|
330
328
|
# "http://facebook.com/TonyMollHomeLoans/timeline"
|
331
329
|
# "http://facebook.com/pg/TonyMollHomeLoans/timeline"
|
332
|
-
|
333
|
-
|
330
|
+
# "https://www.facebook.com/groups/practicewithclaritygroup"
|
331
|
+
if ["group", "groups", "page", "pages", "pg"].exclude?(mdata[:uname])
|
332
|
+
url = (mdata[:group] ? "http://facebook.com/groups/" : "http://facebook.com/") + mdata[:uname]
|
334
333
|
uname = mdata[:uname]
|
335
334
|
end
|
336
335
|
url = drop_url_query!(url)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: url_scrubber
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.17
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Colin Langton
|
@@ -12,7 +12,7 @@ authors:
|
|
12
12
|
autorequire:
|
13
13
|
bindir: bin
|
14
14
|
cert_chain: []
|
15
|
-
date: 2019-
|
15
|
+
date: 2019-03-16 00:00:00.000000000 Z
|
16
16
|
dependencies:
|
17
17
|
- !ruby/object:Gem::Dependency
|
18
18
|
name: rspec
|
@@ -126,7 +126,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
126
126
|
version: '0'
|
127
127
|
requirements: []
|
128
128
|
rubyforge_project:
|
129
|
-
rubygems_version: 2.7.
|
129
|
+
rubygems_version: 2.7.7
|
130
130
|
signing_key:
|
131
131
|
specification_version: 4
|
132
132
|
summary: Clean up URLs.
|