url_scrubber 0.8.15 → 0.8.20
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/url_scrubber.rb +20 -15
- data/lib/url_scrubber/version.rb +1 -1
- data/url_scrubber.gemspec +1 -0
- metadata +5 -6
- data/.rvmrc +0 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5392c3c6b9af7ba8d175315c846027174b8801d0a8371525a6c7eb1eb0e115be
|
4
|
+
data.tar.gz: dc671761ac0e450afd851a1c8890db403b68a47af341485408ba10f01e24f056
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6d0b8e970868607dea54065027ef662a59780b88db21cfb36e933cff0086ecaedac63e1f23e588e6a7f9d0bcb649950385b5351e354116e825ff65649adad1a1
|
7
|
+
data.tar.gz: 70fcdd156dc4aa2aae15a0b16e0a8e70fdd14808003840719ed9cbda415113fcf94ca26facff5d34a4fa26ef7ce568037a945d09f22c7ececf64dec075e6529b
|
data/lib/url_scrubber.rb
CHANGED
@@ -12,7 +12,7 @@ module UrlScrubber
|
|
12
12
|
|
13
13
|
url = url.clone # don't modify the original argument
|
14
14
|
|
15
|
-
m = url.match(/(htt?ps?:\/\/\S
|
15
|
+
m = url.match(/(htt?ps?:\/\/\S+)/i)
|
16
16
|
return nil unless m
|
17
17
|
|
18
18
|
url = m[1]
|
@@ -48,6 +48,8 @@ module UrlScrubber
|
|
48
48
|
when 'tumblr' then return :tumblr
|
49
49
|
when 'twitter' then return :twitter
|
50
50
|
when 'vimeo' then return :vimeo
|
51
|
+
when 'vk' then return :vkontakte
|
52
|
+
when 'weibo' then return :weibo
|
51
53
|
when 'yelp' then return :yelp
|
52
54
|
when 'youtube' then return :youtube
|
53
55
|
end
|
@@ -69,6 +71,10 @@ module UrlScrubber
|
|
69
71
|
return false unless url
|
70
72
|
|
71
73
|
case service_of(url)
|
74
|
+
when :vkontakte
|
75
|
+
!!url.match(%r{^http://vk\.com/[\w_]+$})
|
76
|
+
when :weibo
|
77
|
+
!!url.match(%r{^http://weibo\.com/[\w_-]+$})
|
72
78
|
when :youtube
|
73
79
|
!!url.match(%r{^http://youtube\.com/[\w_-]+$})
|
74
80
|
when :twitter
|
@@ -279,13 +285,15 @@ module UrlScrubber
|
|
279
285
|
|
280
286
|
# TODO This needs to be rewritten to be independent of the Facebook domain and public suffix used: e.g. facebook.com vs fb.com vs. fb.me
|
281
287
|
def self.sc_facebook(url)
|
282
|
-
|
283
|
-
|
288
|
+
|
289
|
+
url = url.gsub(/(_rdr=.+&)|(&_rdr=.+$)/,"")
|
290
|
+
|
291
|
+
regex1 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/(((?<group>groups?)|pages?|pg)\/)*(?<uname>.*)[\/-](?<uid>[0-9]+))($|\/|\/(about|timeline|info|app_)?)/i
|
284
292
|
regex2 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/profile.php\?id=(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
|
285
|
-
|
286
|
-
regex3 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/((pages?|pg)\/)*(?<uname>[^\?\/]*))($|\/$|\/(about|timeline|info|app_.*)?)/i
|
293
|
+
regex3 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/(((?<group>groups?)|pages?|pg)\/)*(?<uname>[^\?\/]*))($|\/$|\/(about|timeline|info|app_.*)?)/i
|
287
294
|
regex4 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/)(?<php>home.php\?([#!]+\/)*)(?<uname>.*)/i
|
288
295
|
regex5 = /^(?<url>(https?:\/\/)((business|www)\.)?facebook\.com\/(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
|
296
|
+
regex6 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/home\/accounts\?business_id=(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
|
289
297
|
|
290
298
|
# If the user gives us a path to a Post, "http://facebook.com/LoansByJanet/posts/1691075027771418"
|
291
299
|
# then drop the post part, "/posts/1691075027771418" to get the base url, "http://facebook.com/LoansByJanet/"
|
@@ -294,10 +302,7 @@ module UrlScrubber
|
|
294
302
|
end
|
295
303
|
|
296
304
|
if url.match("/media/albums") || url.match("/media/set")
|
297
|
-
# puts "media"
|
298
305
|
url = url.match('\&') ? url.split('&',2)[0] : url
|
299
|
-
elsif url.include?('facebook.com/groups/')
|
300
|
-
url = drop_url_query!(url)
|
301
306
|
elsif mdata = url.match(regex1)
|
302
307
|
# "http://facebook.com/pages/Command-Canada/1434248516885065/timeline"
|
303
308
|
url = mdata[:url]
|
@@ -307,11 +312,6 @@ module UrlScrubber
|
|
307
312
|
# "https://www.facebook.com/profile.php?id=100009574328879"
|
308
313
|
url, http_response = check_for_facebook_redirection(mdata[:url])
|
309
314
|
uid = mdata[:uid]
|
310
|
-
elsif mdata = url.match(regex2a)
|
311
|
-
# "https://www.facebook.com/profile.php?_rdr=p&id=100009574328879"
|
312
|
-
url = "http://facebook.com/profile.php?id=" + mdata[:uid]
|
313
|
-
url, http_response = check_for_facebook_redirection(url)
|
314
|
-
uid = mdata[:uid]
|
315
315
|
elsif mdata = url.match(regex4)
|
316
316
|
# "http://facebook.com/home.php?#!/person.name"
|
317
317
|
url = mdata[:url] + mdata[:uname]
|
@@ -320,11 +320,16 @@ module UrlScrubber
|
|
320
320
|
# "https://www.facebook.com/100009574328879"
|
321
321
|
url = "http://facebook.com/" + mdata[:uid]
|
322
322
|
uid = mdata[:uid]
|
323
|
+
elsif mdata = url.match(regex6)
|
324
|
+
# "http://business.facebook.com/home/accounts?business_id=1145724702268347"
|
325
|
+
url = mdata[:url]
|
326
|
+
uid = mdata[:uid]
|
323
327
|
elsif mdata = url.match(regex3)
|
324
328
|
# "http://facebook.com/TonyMollHomeLoans/timeline"
|
325
329
|
# "http://facebook.com/pg/TonyMollHomeLoans/timeline"
|
326
|
-
|
327
|
-
|
330
|
+
# "https://www.facebook.com/groups/practicewithclaritygroup"
|
331
|
+
if ["group", "groups", "page", "pages", "pg"].exclude?(mdata[:uname])
|
332
|
+
url = (mdata[:group] ? "http://facebook.com/groups/" : "http://facebook.com/") + mdata[:uname]
|
328
333
|
uname = mdata[:uname]
|
329
334
|
end
|
330
335
|
url = drop_url_query!(url)
|
data/lib/url_scrubber/version.rb
CHANGED
data/url_scrubber.gemspec
CHANGED
@@ -7,6 +7,7 @@ Gem::Specification.new do |gem|
|
|
7
7
|
gem.description = %q{Remove extraneous bits from URLs, follow redirects, identify social media urls, etc.}
|
8
8
|
gem.summary = %q{Clean up URLs.}
|
9
9
|
gem.homepage = "http://brandle.net"
|
10
|
+
gem.license = "MIT"
|
10
11
|
|
11
12
|
gem.files = `git ls-files`.split($\)
|
12
13
|
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: url_scrubber
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.20
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Colin Langton
|
@@ -12,7 +12,7 @@ authors:
|
|
12
12
|
autorequire:
|
13
13
|
bindir: bin
|
14
14
|
cert_chain: []
|
15
|
-
date:
|
15
|
+
date: 2020-08-28 00:00:00.000000000 Z
|
16
16
|
dependencies:
|
17
17
|
- !ruby/object:Gem::Dependency
|
18
18
|
name: rspec
|
@@ -97,7 +97,6 @@ extensions: []
|
|
97
97
|
extra_rdoc_files: []
|
98
98
|
files:
|
99
99
|
- ".gitignore"
|
100
|
-
- ".rvmrc"
|
101
100
|
- Gemfile
|
102
101
|
- Guardfile
|
103
102
|
- README.md
|
@@ -108,7 +107,8 @@ files:
|
|
108
107
|
- spec/url_scrubber_spec.rb
|
109
108
|
- url_scrubber.gemspec
|
110
109
|
homepage: http://brandle.net
|
111
|
-
licenses:
|
110
|
+
licenses:
|
111
|
+
- MIT
|
112
112
|
metadata: {}
|
113
113
|
post_install_message:
|
114
114
|
rdoc_options: []
|
@@ -125,8 +125,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
125
125
|
- !ruby/object:Gem::Version
|
126
126
|
version: '0'
|
127
127
|
requirements: []
|
128
|
-
|
129
|
-
rubygems_version: 2.7.7
|
128
|
+
rubygems_version: 3.0.6
|
130
129
|
signing_key:
|
131
130
|
specification_version: 4
|
132
131
|
summary: Clean up URLs.
|
data/.rvmrc
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
rvm use 1.9.3@url_scrubber --create
|