url_scrubber 0.8.16 → 0.8.21
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/url_scrubber.rb +19 -16
- data/lib/url_scrubber/version.rb +1 -1
- metadata +3 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dfe7609c65d93e0b93cdc01f8b9ff08b6abbd417a4be183a19993f2df17a5451
|
4
|
+
data.tar.gz: 7f85340db035fa6330cc894ae0290261633d3c626ba76210ddea2be1c0a57352
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2751402307e2edb719e12279c11301d3a7084920e19dc7dc7ec18b37bf19a684b69b5ad8f7dcca7867667a463012d6f08c339fa4842458edc5c8003d303d69e1
|
7
|
+
data.tar.gz: d9156d4cb46a3a232b4e80df2759bba624d2366ec691cece5f0c025a24ccc4faf5706a7e48f6ef35d4bde8b50b354fb3965ff54f056cca59f29beac926f71a73
|
data/lib/url_scrubber.rb
CHANGED
@@ -12,7 +12,7 @@ module UrlScrubber
|
|
12
12
|
|
13
13
|
url = url.clone # don't modify the original argument
|
14
14
|
|
15
|
-
m = url.match(/(htt?ps?:\/\/\S
|
15
|
+
m = url.match(/(htt?ps?:\/\/\S+)/i)
|
16
16
|
return nil unless m
|
17
17
|
|
18
18
|
url = m[1]
|
@@ -179,7 +179,11 @@ module UrlScrubber
|
|
179
179
|
|
180
180
|
def self.downcase_domain(url)
|
181
181
|
domain_match = url.match(%r{http://[^/]+}i)
|
182
|
-
|
182
|
+
if domain_match
|
183
|
+
domain_match[0].downcase + domain_match.post_match
|
184
|
+
else
|
185
|
+
url
|
186
|
+
end
|
183
187
|
end
|
184
188
|
|
185
189
|
|
@@ -285,13 +289,15 @@ module UrlScrubber
|
|
285
289
|
|
286
290
|
# TODO This needs to be rewritten to be independent of the Facebook domain and public suffix used: e.g. facebook.com vs fb.com vs. fb.me
|
287
291
|
def self.sc_facebook(url)
|
288
|
-
|
289
|
-
|
292
|
+
|
293
|
+
url = url.gsub(/(_rdr=.+&)|(&_rdr=.+$)/,"")
|
294
|
+
|
295
|
+
regex1 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/(((?<group>groups?)|pages?|pg)\/)*(?<uname>.*)[\/-](?<uid>[0-9]+))($|\/|\/(about|timeline|info|app_)?)/i
|
290
296
|
regex2 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/profile.php\?id=(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
|
291
|
-
|
292
|
-
regex3 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/((pages?|pg)\/)*(?<uname>[^\?\/]*))($|\/$|\/(about|timeline|info|app_.*)?)/i
|
297
|
+
regex3 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/(((?<group>groups?)|pages?|pg)\/)*(?<uname>[^\?\/]*))($|\/$|\/(about|timeline|info|app_.*)?)/i
|
293
298
|
regex4 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/)(?<php>home.php\?([#!]+\/)*)(?<uname>.*)/i
|
294
299
|
regex5 = /^(?<url>(https?:\/\/)((business|www)\.)?facebook\.com\/(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
|
300
|
+
regex6 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/home\/accounts\?business_id=(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
|
295
301
|
|
296
302
|
# If the user gives us a path to a Post, "http://facebook.com/LoansByJanet/posts/1691075027771418"
|
297
303
|
# then drop the post part, "/posts/1691075027771418" to get the base url, "http://facebook.com/LoansByJanet/"
|
@@ -300,10 +306,7 @@ module UrlScrubber
|
|
300
306
|
end
|
301
307
|
|
302
308
|
if url.match("/media/albums") || url.match("/media/set")
|
303
|
-
# puts "media"
|
304
309
|
url = url.match('\&') ? url.split('&',2)[0] : url
|
305
|
-
elsif url.include?('facebook.com/groups/')
|
306
|
-
url = drop_url_query!(url)
|
307
310
|
elsif mdata = url.match(regex1)
|
308
311
|
# "http://facebook.com/pages/Command-Canada/1434248516885065/timeline"
|
309
312
|
url = mdata[:url]
|
@@ -313,11 +316,6 @@ module UrlScrubber
|
|
313
316
|
# "https://www.facebook.com/profile.php?id=100009574328879"
|
314
317
|
url, http_response = check_for_facebook_redirection(mdata[:url])
|
315
318
|
uid = mdata[:uid]
|
316
|
-
elsif mdata = url.match(regex2a)
|
317
|
-
# "https://www.facebook.com/profile.php?_rdr=p&id=100009574328879"
|
318
|
-
url = "http://facebook.com/profile.php?id=" + mdata[:uid]
|
319
|
-
url, http_response = check_for_facebook_redirection(url)
|
320
|
-
uid = mdata[:uid]
|
321
319
|
elsif mdata = url.match(regex4)
|
322
320
|
# "http://facebook.com/home.php?#!/person.name"
|
323
321
|
url = mdata[:url] + mdata[:uname]
|
@@ -326,11 +324,16 @@ module UrlScrubber
|
|
326
324
|
# "https://www.facebook.com/100009574328879"
|
327
325
|
url = "http://facebook.com/" + mdata[:uid]
|
328
326
|
uid = mdata[:uid]
|
327
|
+
elsif mdata = url.match(regex6)
|
328
|
+
# "http://business.facebook.com/home/accounts?business_id=1145724702268347"
|
329
|
+
url = mdata[:url]
|
330
|
+
uid = mdata[:uid]
|
329
331
|
elsif mdata = url.match(regex3)
|
330
332
|
# "http://facebook.com/TonyMollHomeLoans/timeline"
|
331
333
|
# "http://facebook.com/pg/TonyMollHomeLoans/timeline"
|
332
|
-
|
333
|
-
|
334
|
+
# "https://www.facebook.com/groups/practicewithclaritygroup"
|
335
|
+
if ["group", "groups", "page", "pages", "pg"].exclude?(mdata[:uname])
|
336
|
+
url = (mdata[:group] ? "http://facebook.com/groups/" : "http://facebook.com/") + mdata[:uname]
|
334
337
|
uname = mdata[:uname]
|
335
338
|
end
|
336
339
|
url = drop_url_query!(url)
|
data/lib/url_scrubber/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: url_scrubber
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.21
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Colin Langton
|
@@ -12,7 +12,7 @@ authors:
|
|
12
12
|
autorequire:
|
13
13
|
bindir: bin
|
14
14
|
cert_chain: []
|
15
|
-
date:
|
15
|
+
date: 2021-01-08 00:00:00.000000000 Z
|
16
16
|
dependencies:
|
17
17
|
- !ruby/object:Gem::Dependency
|
18
18
|
name: rspec
|
@@ -125,8 +125,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
125
125
|
- !ruby/object:Gem::Version
|
126
126
|
version: '0'
|
127
127
|
requirements: []
|
128
|
-
|
129
|
-
rubygems_version: 2.7.8
|
128
|
+
rubygems_version: 3.0.6
|
130
129
|
signing_key:
|
131
130
|
specification_version: 4
|
132
131
|
summary: Clean up URLs.
|