url_scrubber 0.8.16 → 0.8.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/url_scrubber/version.rb +1 -1
- data/lib/url_scrubber.rb +13 -14
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0ff75d42c034a2c7e8ac8bc09bcc8dc35ab42138cb81b4899137779a71da6d7f
|
4
|
+
data.tar.gz: 1f27e292990f3b2bbeb08aeaface0aa1b5a27adb04228d7c8a4dca20b9e22a8b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: da302452052addacc609a5e2a7459e809e328d40de859da654541ba470280dc4d541f3d15ed8512329c5e4fbf5b8b0995f3bb33c90054a593ed5e25681d31b88
|
7
|
+
data.tar.gz: 593d8c9861a6f5aa58bf57191e7fcc363510a4da68663e43212b08072a6d82eb0f6cb0fafb13f27cb07aa748399cd57a0145fbd426914bf76046c0f040cb02a0
|
data/lib/url_scrubber/version.rb
CHANGED
data/lib/url_scrubber.rb
CHANGED
@@ -285,13 +285,15 @@ module UrlScrubber
|
|
285
285
|
|
286
286
|
# TODO This needs to be rewritten to be independent of the Facebook domain and public suffix used: e.g. facebook.com vs fb.com vs. fb.me
|
287
287
|
def self.sc_facebook(url)
|
288
|
-
|
289
|
-
|
288
|
+
|
289
|
+
url = url.gsub(/(_rdr=.+&)|(&_rdr=.+$)/,"")
|
290
|
+
|
291
|
+
regex1 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/(((?<group>groups?)|pages?|pg)\/)*(?<uname>.*)[\/-](?<uid>[0-9]+))($|\/|\/(about|timeline|info|app_)?)/i
|
290
292
|
regex2 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/profile.php\?id=(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
|
291
|
-
|
292
|
-
regex3 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/((pages?|pg)\/)*(?<uname>[^\?\/]*))($|\/$|\/(about|timeline|info|app_.*)?)/i
|
293
|
+
regex3 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/(((?<group>groups?)|pages?|pg)\/)*(?<uname>[^\?\/]*))($|\/$|\/(about|timeline|info|app_.*)?)/i
|
293
294
|
regex4 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/)(?<php>home.php\?([#!]+\/)*)(?<uname>.*)/i
|
294
295
|
regex5 = /^(?<url>(https?:\/\/)((business|www)\.)?facebook\.com\/(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
|
296
|
+
regex6 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/home\/accounts\?business_id=(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
|
295
297
|
|
296
298
|
# If the user gives us a path to a Post, "http://facebook.com/LoansByJanet/posts/1691075027771418"
|
297
299
|
# then drop the post part, "/posts/1691075027771418" to get the base url, "http://facebook.com/LoansByJanet/"
|
@@ -300,10 +302,7 @@ module UrlScrubber
|
|
300
302
|
end
|
301
303
|
|
302
304
|
if url.match("/media/albums") || url.match("/media/set")
|
303
|
-
# puts "media"
|
304
305
|
url = url.match('\&') ? url.split('&',2)[0] : url
|
305
|
-
elsif url.include?('facebook.com/groups/')
|
306
|
-
url = drop_url_query!(url)
|
307
306
|
elsif mdata = url.match(regex1)
|
308
307
|
# "http://facebook.com/pages/Command-Canada/1434248516885065/timeline"
|
309
308
|
url = mdata[:url]
|
@@ -313,11 +312,6 @@ module UrlScrubber
|
|
313
312
|
# "https://www.facebook.com/profile.php?id=100009574328879"
|
314
313
|
url, http_response = check_for_facebook_redirection(mdata[:url])
|
315
314
|
uid = mdata[:uid]
|
316
|
-
elsif mdata = url.match(regex2a)
|
317
|
-
# "https://www.facebook.com/profile.php?_rdr=p&id=100009574328879"
|
318
|
-
url = "http://facebook.com/profile.php?id=" + mdata[:uid]
|
319
|
-
url, http_response = check_for_facebook_redirection(url)
|
320
|
-
uid = mdata[:uid]
|
321
315
|
elsif mdata = url.match(regex4)
|
322
316
|
# "http://facebook.com/home.php?#!/person.name"
|
323
317
|
url = mdata[:url] + mdata[:uname]
|
@@ -326,11 +320,16 @@ module UrlScrubber
|
|
326
320
|
# "https://www.facebook.com/100009574328879"
|
327
321
|
url = "http://facebook.com/" + mdata[:uid]
|
328
322
|
uid = mdata[:uid]
|
323
|
+
elsif mdata = url.match(regex6)
|
324
|
+
# "http://business.facebook.com/home/accounts?business_id=1145724702268347"
|
325
|
+
url = mdata[:url]
|
326
|
+
uid = mdata[:uid]
|
329
327
|
elsif mdata = url.match(regex3)
|
330
328
|
# "http://facebook.com/TonyMollHomeLoans/timeline"
|
331
329
|
# "http://facebook.com/pg/TonyMollHomeLoans/timeline"
|
332
|
-
|
333
|
-
|
330
|
+
# "https://www.facebook.com/groups/practicewithclaritygroup"
|
331
|
+
if ["group", "groups", "page", "pages", "pg"].exclude?(mdata[:uname])
|
332
|
+
url = (mdata[:group] ? "http://facebook.com/groups/" : "http://facebook.com/") + mdata[:uname]
|
334
333
|
uname = mdata[:uname]
|
335
334
|
end
|
336
335
|
url = drop_url_query!(url)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: url_scrubber
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.17
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Colin Langton
|
@@ -12,7 +12,7 @@ authors:
|
|
12
12
|
autorequire:
|
13
13
|
bindir: bin
|
14
14
|
cert_chain: []
|
15
|
-
date: 2019-
|
15
|
+
date: 2019-03-16 00:00:00.000000000 Z
|
16
16
|
dependencies:
|
17
17
|
- !ruby/object:Gem::Dependency
|
18
18
|
name: rspec
|
@@ -126,7 +126,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
126
126
|
version: '0'
|
127
127
|
requirements: []
|
128
128
|
rubyforge_project:
|
129
|
-
rubygems_version: 2.7.
|
129
|
+
rubygems_version: 2.7.7
|
130
130
|
signing_key:
|
131
131
|
specification_version: 4
|
132
132
|
summary: Clean up URLs.
|