url_scrubber 0.8.15 → 0.8.20

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 608b40e9de605ac987e39f8fa5b1640d6543c8dc8a83553f2689f2fbe716b50a
4
- data.tar.gz: d6d1c7905a4875ef9fb6f2ce7ab03f18ff31e524b180c3e13bd10467513c79b8
3
+ metadata.gz: 5392c3c6b9af7ba8d175315c846027174b8801d0a8371525a6c7eb1eb0e115be
4
+ data.tar.gz: dc671761ac0e450afd851a1c8890db403b68a47af341485408ba10f01e24f056
5
5
  SHA512:
6
- metadata.gz: f6f43dd74cec24acd3f7e2a376b89476f00eb09e627a96c55ba5ec0e97c7ccc994385858183c13230a98066d2b687a462225a3347f6aa27d36f8a668eaef087c
7
- data.tar.gz: 395a1561434cc85197f10211393cfdaabea3e35fb50a2559f0c0c3dd669aea3cd13b8e68192a4eda105a2abce1c7476bde167ad00c8704d0e62052ce92ca94ba
6
+ metadata.gz: 6d0b8e970868607dea54065027ef662a59780b88db21cfb36e933cff0086ecaedac63e1f23e588e6a7f9d0bcb649950385b5351e354116e825ff65649adad1a1
7
+ data.tar.gz: 70fcdd156dc4aa2aae15a0b16e0a8e70fdd14808003840719ed9cbda415113fcf94ca26facff5d34a4fa26ef7ce568037a945d09f22c7ececf64dec075e6529b
@@ -12,7 +12,7 @@ module UrlScrubber
12
12
 
13
13
  url = url.clone # don't modify the original argument
14
14
 
15
- m = url.match(/(htt?ps?:\/\/\S*)/i)
15
+ m = url.match(/(htt?ps?:\/\/\S+)/i)
16
16
  return nil unless m
17
17
 
18
18
  url = m[1]
@@ -48,6 +48,8 @@ module UrlScrubber
48
48
  when 'tumblr' then return :tumblr
49
49
  when 'twitter' then return :twitter
50
50
  when 'vimeo' then return :vimeo
51
+ when 'vk' then return :vkontakte
52
+ when 'weibo' then return :weibo
51
53
  when 'yelp' then return :yelp
52
54
  when 'youtube' then return :youtube
53
55
  end
@@ -69,6 +71,10 @@ module UrlScrubber
69
71
  return false unless url
70
72
 
71
73
  case service_of(url)
74
+ when :vkontakte
75
+ !!url.match(%r{^http://vk\.com/[\w_]+$})
76
+ when :weibo
77
+ !!url.match(%r{^http://weibo\.com/[\w_-]+$})
72
78
  when :youtube
73
79
  !!url.match(%r{^http://youtube\.com/[\w_-]+$})
74
80
  when :twitter
@@ -279,13 +285,15 @@ module UrlScrubber
279
285
 
280
286
  # TODO This needs to be rewritten to be independent of the Facebook domain and public suffix used: e.g. facebook.com vs fb.com vs. fb.me
281
287
  def self.sc_facebook(url)
282
- #puts "sc_facebook: #{url}"
283
- regex1 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/((pages?|pg)\/)*(?<uname>.*)[\/-](?<uid>[0-9]+))($|\/|\/(about|timeline|info|app_)?)/i
288
+
289
+ url = url.gsub(/(_rdr=.+&)|(&_rdr=.+$)/,"")
290
+
291
+ regex1 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/(((?<group>groups?)|pages?|pg)\/)*(?<uname>.*)[\/-](?<uid>[0-9]+))($|\/|\/(about|timeline|info|app_)?)/i
284
292
  regex2 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/profile.php\?id=(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
285
- regex2a = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/profile.php\?_rdr=p&id=(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
286
- regex3 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/((pages?|pg)\/)*(?<uname>[^\?\/]*))($|\/$|\/(about|timeline|info|app_.*)?)/i
293
+ regex3 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/(((?<group>groups?)|pages?|pg)\/)*(?<uname>[^\?\/]*))($|\/$|\/(about|timeline|info|app_.*)?)/i
287
294
  regex4 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/)(?<php>home.php\?([#!]+\/)*)(?<uname>.*)/i
288
295
  regex5 = /^(?<url>(https?:\/\/)((business|www)\.)?facebook\.com\/(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
296
+ regex6 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/home\/accounts\?business_id=(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
289
297
 
290
298
  # If the user gives us a path to a Post, "http://facebook.com/LoansByJanet/posts/1691075027771418"
291
299
  # then drop the post part, "/posts/1691075027771418" to get the base url, "http://facebook.com/LoansByJanet/"
@@ -294,10 +302,7 @@ module UrlScrubber
294
302
  end
295
303
 
296
304
  if url.match("/media/albums") || url.match("/media/set")
297
- # puts "media"
298
305
  url = url.match('\&') ? url.split('&',2)[0] : url
299
- elsif url.include?('facebook.com/groups/')
300
- url = drop_url_query!(url)
301
306
  elsif mdata = url.match(regex1)
302
307
  # "http://facebook.com/pages/Command-Canada/1434248516885065/timeline"
303
308
  url = mdata[:url]
@@ -307,11 +312,6 @@ module UrlScrubber
307
312
  # "https://www.facebook.com/profile.php?id=100009574328879"
308
313
  url, http_response = check_for_facebook_redirection(mdata[:url])
309
314
  uid = mdata[:uid]
310
- elsif mdata = url.match(regex2a)
311
- # "https://www.facebook.com/profile.php?_rdr=p&id=100009574328879"
312
- url = "http://facebook.com/profile.php?id=" + mdata[:uid]
313
- url, http_response = check_for_facebook_redirection(url)
314
- uid = mdata[:uid]
315
315
  elsif mdata = url.match(regex4)
316
316
  # "http://facebook.com/home.php?#!/person.name"
317
317
  url = mdata[:url] + mdata[:uname]
@@ -320,11 +320,16 @@ module UrlScrubber
320
320
  # "https://www.facebook.com/100009574328879"
321
321
  url = "http://facebook.com/" + mdata[:uid]
322
322
  uid = mdata[:uid]
323
+ elsif mdata = url.match(regex6)
324
+ # "http://business.facebook.com/home/accounts?business_id=1145724702268347"
325
+ url = mdata[:url]
326
+ uid = mdata[:uid]
323
327
  elsif mdata = url.match(regex3)
324
328
  # "http://facebook.com/TonyMollHomeLoans/timeline"
325
329
  # "http://facebook.com/pg/TonyMollHomeLoans/timeline"
326
- if ["page", "pages", "pg"].exclude?(mdata[:uname])
327
- url = "http://facebook.com/" + mdata[:uname]
330
+ # "https://www.facebook.com/groups/practicewithclaritygroup"
331
+ if ["group", "groups", "page", "pages", "pg"].exclude?(mdata[:uname])
332
+ url = (mdata[:group] ? "http://facebook.com/groups/" : "http://facebook.com/") + mdata[:uname]
328
333
  uname = mdata[:uname]
329
334
  end
330
335
  url = drop_url_query!(url)
@@ -1,3 +1,3 @@
1
1
  module UrlScrubber
2
- VERSION = "0.8.15"
2
+ VERSION = "0.8.20"
3
3
  end
@@ -7,6 +7,7 @@ Gem::Specification.new do |gem|
7
7
  gem.description = %q{Remove extraneous bits from URLs, follow redirects, identify social media urls, etc.}
8
8
  gem.summary = %q{Clean up URLs.}
9
9
  gem.homepage = "http://brandle.net"
10
+ gem.license = "MIT"
10
11
 
11
12
  gem.files = `git ls-files`.split($\)
12
13
  gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: url_scrubber
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.15
4
+ version: 0.8.20
5
5
  platform: ruby
6
6
  authors:
7
7
  - Colin Langton
@@ -12,7 +12,7 @@ authors:
12
12
  autorequire:
13
13
  bindir: bin
14
14
  cert_chain: []
15
- date: 2018-11-22 00:00:00.000000000 Z
15
+ date: 2020-08-28 00:00:00.000000000 Z
16
16
  dependencies:
17
17
  - !ruby/object:Gem::Dependency
18
18
  name: rspec
@@ -97,7 +97,6 @@ extensions: []
97
97
  extra_rdoc_files: []
98
98
  files:
99
99
  - ".gitignore"
100
- - ".rvmrc"
101
100
  - Gemfile
102
101
  - Guardfile
103
102
  - README.md
@@ -108,7 +107,8 @@ files:
108
107
  - spec/url_scrubber_spec.rb
109
108
  - url_scrubber.gemspec
110
109
  homepage: http://brandle.net
111
- licenses: []
110
+ licenses:
111
+ - MIT
112
112
  metadata: {}
113
113
  post_install_message:
114
114
  rdoc_options: []
@@ -125,8 +125,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
125
125
  - !ruby/object:Gem::Version
126
126
  version: '0'
127
127
  requirements: []
128
- rubyforge_project:
129
- rubygems_version: 2.7.7
128
+ rubygems_version: 3.0.6
130
129
  signing_key:
131
130
  specification_version: 4
132
131
  summary: Clean up URLs.
data/.rvmrc DELETED
@@ -1 +0,0 @@
1
- rvm use 1.9.3@url_scrubber --create