url_scrubber 0.8.15 → 0.8.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/url_scrubber.rb +20 -15
- data/lib/url_scrubber/version.rb +1 -1
- data/url_scrubber.gemspec +1 -0
- metadata +5 -6
- data/.rvmrc +0 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5392c3c6b9af7ba8d175315c846027174b8801d0a8371525a6c7eb1eb0e115be
|
4
|
+
data.tar.gz: dc671761ac0e450afd851a1c8890db403b68a47af341485408ba10f01e24f056
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6d0b8e970868607dea54065027ef662a59780b88db21cfb36e933cff0086ecaedac63e1f23e588e6a7f9d0bcb649950385b5351e354116e825ff65649adad1a1
|
7
|
+
data.tar.gz: 70fcdd156dc4aa2aae15a0b16e0a8e70fdd14808003840719ed9cbda415113fcf94ca26facff5d34a4fa26ef7ce568037a945d09f22c7ececf64dec075e6529b
|
data/lib/url_scrubber.rb
CHANGED
@@ -12,7 +12,7 @@ module UrlScrubber
|
|
12
12
|
|
13
13
|
url = url.clone # don't modify the original argument
|
14
14
|
|
15
|
-
m = url.match(/(htt?ps?:\/\/\S
|
15
|
+
m = url.match(/(htt?ps?:\/\/\S+)/i)
|
16
16
|
return nil unless m
|
17
17
|
|
18
18
|
url = m[1]
|
@@ -48,6 +48,8 @@ module UrlScrubber
|
|
48
48
|
when 'tumblr' then return :tumblr
|
49
49
|
when 'twitter' then return :twitter
|
50
50
|
when 'vimeo' then return :vimeo
|
51
|
+
when 'vk' then return :vkontakte
|
52
|
+
when 'weibo' then return :weibo
|
51
53
|
when 'yelp' then return :yelp
|
52
54
|
when 'youtube' then return :youtube
|
53
55
|
end
|
@@ -69,6 +71,10 @@ module UrlScrubber
|
|
69
71
|
return false unless url
|
70
72
|
|
71
73
|
case service_of(url)
|
74
|
+
when :vkontakte
|
75
|
+
!!url.match(%r{^http://vk\.com/[\w_]+$})
|
76
|
+
when :weibo
|
77
|
+
!!url.match(%r{^http://weibo\.com/[\w_-]+$})
|
72
78
|
when :youtube
|
73
79
|
!!url.match(%r{^http://youtube\.com/[\w_-]+$})
|
74
80
|
when :twitter
|
@@ -279,13 +285,15 @@ module UrlScrubber
|
|
279
285
|
|
280
286
|
# TODO This needs to be rewritten to be independent of the Facebook domain and public suffix used: e.g. facebook.com vs fb.com vs. fb.me
|
281
287
|
def self.sc_facebook(url)
|
282
|
-
|
283
|
-
|
288
|
+
|
289
|
+
url = url.gsub(/(_rdr=.+&)|(&_rdr=.+$)/,"")
|
290
|
+
|
291
|
+
regex1 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/(((?<group>groups?)|pages?|pg)\/)*(?<uname>.*)[\/-](?<uid>[0-9]+))($|\/|\/(about|timeline|info|app_)?)/i
|
284
292
|
regex2 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/profile.php\?id=(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
|
285
|
-
|
286
|
-
regex3 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/((pages?|pg)\/)*(?<uname>[^\?\/]*))($|\/$|\/(about|timeline|info|app_.*)?)/i
|
293
|
+
regex3 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/(((?<group>groups?)|pages?|pg)\/)*(?<uname>[^\?\/]*))($|\/$|\/(about|timeline|info|app_.*)?)/i
|
287
294
|
regex4 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/)(?<php>home.php\?([#!]+\/)*)(?<uname>.*)/i
|
288
295
|
regex5 = /^(?<url>(https?:\/\/)((business|www)\.)?facebook\.com\/(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
|
296
|
+
regex6 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/home\/accounts\?business_id=(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
|
289
297
|
|
290
298
|
# If the user gives us a path to a Post, "http://facebook.com/LoansByJanet/posts/1691075027771418"
|
291
299
|
# then drop the post part, "/posts/1691075027771418" to get the base url, "http://facebook.com/LoansByJanet/"
|
@@ -294,10 +302,7 @@ module UrlScrubber
|
|
294
302
|
end
|
295
303
|
|
296
304
|
if url.match("/media/albums") || url.match("/media/set")
|
297
|
-
# puts "media"
|
298
305
|
url = url.match('\&') ? url.split('&',2)[0] : url
|
299
|
-
elsif url.include?('facebook.com/groups/')
|
300
|
-
url = drop_url_query!(url)
|
301
306
|
elsif mdata = url.match(regex1)
|
302
307
|
# "http://facebook.com/pages/Command-Canada/1434248516885065/timeline"
|
303
308
|
url = mdata[:url]
|
@@ -307,11 +312,6 @@ module UrlScrubber
|
|
307
312
|
# "https://www.facebook.com/profile.php?id=100009574328879"
|
308
313
|
url, http_response = check_for_facebook_redirection(mdata[:url])
|
309
314
|
uid = mdata[:uid]
|
310
|
-
elsif mdata = url.match(regex2a)
|
311
|
-
# "https://www.facebook.com/profile.php?_rdr=p&id=100009574328879"
|
312
|
-
url = "http://facebook.com/profile.php?id=" + mdata[:uid]
|
313
|
-
url, http_response = check_for_facebook_redirection(url)
|
314
|
-
uid = mdata[:uid]
|
315
315
|
elsif mdata = url.match(regex4)
|
316
316
|
# "http://facebook.com/home.php?#!/person.name"
|
317
317
|
url = mdata[:url] + mdata[:uname]
|
@@ -320,11 +320,16 @@ module UrlScrubber
|
|
320
320
|
# "https://www.facebook.com/100009574328879"
|
321
321
|
url = "http://facebook.com/" + mdata[:uid]
|
322
322
|
uid = mdata[:uid]
|
323
|
+
elsif mdata = url.match(regex6)
|
324
|
+
# "http://business.facebook.com/home/accounts?business_id=1145724702268347"
|
325
|
+
url = mdata[:url]
|
326
|
+
uid = mdata[:uid]
|
323
327
|
elsif mdata = url.match(regex3)
|
324
328
|
# "http://facebook.com/TonyMollHomeLoans/timeline"
|
325
329
|
# "http://facebook.com/pg/TonyMollHomeLoans/timeline"
|
326
|
-
|
327
|
-
|
330
|
+
# "https://www.facebook.com/groups/practicewithclaritygroup"
|
331
|
+
if ["group", "groups", "page", "pages", "pg"].exclude?(mdata[:uname])
|
332
|
+
url = (mdata[:group] ? "http://facebook.com/groups/" : "http://facebook.com/") + mdata[:uname]
|
328
333
|
uname = mdata[:uname]
|
329
334
|
end
|
330
335
|
url = drop_url_query!(url)
|
data/lib/url_scrubber/version.rb
CHANGED
data/url_scrubber.gemspec
CHANGED
@@ -7,6 +7,7 @@ Gem::Specification.new do |gem|
|
|
7
7
|
gem.description = %q{Remove extraneous bits from URLs, follow redirects, identify social media urls, etc.}
|
8
8
|
gem.summary = %q{Clean up URLs.}
|
9
9
|
gem.homepage = "http://brandle.net"
|
10
|
+
gem.license = "MIT"
|
10
11
|
|
11
12
|
gem.files = `git ls-files`.split($\)
|
12
13
|
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: url_scrubber
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.20
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Colin Langton
|
@@ -12,7 +12,7 @@ authors:
|
|
12
12
|
autorequire:
|
13
13
|
bindir: bin
|
14
14
|
cert_chain: []
|
15
|
-
date:
|
15
|
+
date: 2020-08-28 00:00:00.000000000 Z
|
16
16
|
dependencies:
|
17
17
|
- !ruby/object:Gem::Dependency
|
18
18
|
name: rspec
|
@@ -97,7 +97,6 @@ extensions: []
|
|
97
97
|
extra_rdoc_files: []
|
98
98
|
files:
|
99
99
|
- ".gitignore"
|
100
|
-
- ".rvmrc"
|
101
100
|
- Gemfile
|
102
101
|
- Guardfile
|
103
102
|
- README.md
|
@@ -108,7 +107,8 @@ files:
|
|
108
107
|
- spec/url_scrubber_spec.rb
|
109
108
|
- url_scrubber.gemspec
|
110
109
|
homepage: http://brandle.net
|
111
|
-
licenses:
|
110
|
+
licenses:
|
111
|
+
- MIT
|
112
112
|
metadata: {}
|
113
113
|
post_install_message:
|
114
114
|
rdoc_options: []
|
@@ -125,8 +125,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
125
125
|
- !ruby/object:Gem::Version
|
126
126
|
version: '0'
|
127
127
|
requirements: []
|
128
|
-
|
129
|
-
rubygems_version: 2.7.7
|
128
|
+
rubygems_version: 3.0.6
|
130
129
|
signing_key:
|
131
130
|
specification_version: 4
|
132
131
|
summary: Clean up URLs.
|
data/.rvmrc
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
rvm use 1.9.3@url_scrubber --create
|