url_scrubber 0.8.15 → 0.8.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 608b40e9de605ac987e39f8fa5b1640d6543c8dc8a83553f2689f2fbe716b50a
4
- data.tar.gz: d6d1c7905a4875ef9fb6f2ce7ab03f18ff31e524b180c3e13bd10467513c79b8
3
+ metadata.gz: 5392c3c6b9af7ba8d175315c846027174b8801d0a8371525a6c7eb1eb0e115be
4
+ data.tar.gz: dc671761ac0e450afd851a1c8890db403b68a47af341485408ba10f01e24f056
5
5
  SHA512:
6
- metadata.gz: f6f43dd74cec24acd3f7e2a376b89476f00eb09e627a96c55ba5ec0e97c7ccc994385858183c13230a98066d2b687a462225a3347f6aa27d36f8a668eaef087c
7
- data.tar.gz: 395a1561434cc85197f10211393cfdaabea3e35fb50a2559f0c0c3dd669aea3cd13b8e68192a4eda105a2abce1c7476bde167ad00c8704d0e62052ce92ca94ba
6
+ metadata.gz: 6d0b8e970868607dea54065027ef662a59780b88db21cfb36e933cff0086ecaedac63e1f23e588e6a7f9d0bcb649950385b5351e354116e825ff65649adad1a1
7
+ data.tar.gz: 70fcdd156dc4aa2aae15a0b16e0a8e70fdd14808003840719ed9cbda415113fcf94ca26facff5d34a4fa26ef7ce568037a945d09f22c7ececf64dec075e6529b
@@ -12,7 +12,7 @@ module UrlScrubber
12
12
 
13
13
  url = url.clone # don't modify the original argument
14
14
 
15
- m = url.match(/(htt?ps?:\/\/\S*)/i)
15
+ m = url.match(/(htt?ps?:\/\/\S+)/i)
16
16
  return nil unless m
17
17
 
18
18
  url = m[1]
@@ -48,6 +48,8 @@ module UrlScrubber
48
48
  when 'tumblr' then return :tumblr
49
49
  when 'twitter' then return :twitter
50
50
  when 'vimeo' then return :vimeo
51
+ when 'vk' then return :vkontakte
52
+ when 'weibo' then return :weibo
51
53
  when 'yelp' then return :yelp
52
54
  when 'youtube' then return :youtube
53
55
  end
@@ -69,6 +71,10 @@ module UrlScrubber
69
71
  return false unless url
70
72
 
71
73
  case service_of(url)
74
+ when :vkontakte
75
+ !!url.match(%r{^http://vk\.com/[\w_]+$})
76
+ when :weibo
77
+ !!url.match(%r{^http://weibo\.com/[\w_-]+$})
72
78
  when :youtube
73
79
  !!url.match(%r{^http://youtube\.com/[\w_-]+$})
74
80
  when :twitter
@@ -279,13 +285,15 @@ module UrlScrubber
279
285
 
280
286
  # TODO This needs to be rewritten to be independent of the Facebook domain and public suffix used: e.g. facebook.com vs fb.com vs. fb.me
281
287
  def self.sc_facebook(url)
282
- #puts "sc_facebook: #{url}"
283
- regex1 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/((pages?|pg)\/)*(?<uname>.*)[\/-](?<uid>[0-9]+))($|\/|\/(about|timeline|info|app_)?)/i
288
+
289
+ url = url.gsub(/(_rdr=.+&)|(&_rdr=.+$)/,"")
290
+
291
+ regex1 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/(((?<group>groups?)|pages?|pg)\/)*(?<uname>.*)[\/-](?<uid>[0-9]+))($|\/|\/(about|timeline|info|app_)?)/i
284
292
  regex2 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/profile.php\?id=(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
285
- regex2a = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/profile.php\?_rdr=p&id=(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
286
- regex3 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/((pages?|pg)\/)*(?<uname>[^\?\/]*))($|\/$|\/(about|timeline|info|app_.*)?)/i
293
+ regex3 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/(((?<group>groups?)|pages?|pg)\/)*(?<uname>[^\?\/]*))($|\/$|\/(about|timeline|info|app_.*)?)/i
287
294
  regex4 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/)(?<php>home.php\?([#!]+\/)*)(?<uname>.*)/i
288
295
  regex5 = /^(?<url>(https?:\/\/)((business|www)\.)?facebook\.com\/(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
296
+ regex6 = /^(?<url>(https?:\/\/)((www|business)\.)?facebook\.com\/home\/accounts\?business_id=(?<uid>[0-9]+))($|\/|\/.*|&.*)/i
289
297
 
290
298
  # If the user gives us a path to a Post, "http://facebook.com/LoansByJanet/posts/1691075027771418"
291
299
  # then drop the post part, "/posts/1691075027771418" to get the base url, "http://facebook.com/LoansByJanet/"
@@ -294,10 +302,7 @@ module UrlScrubber
294
302
  end
295
303
 
296
304
  if url.match("/media/albums") || url.match("/media/set")
297
- # puts "media"
298
305
  url = url.match('\&') ? url.split('&',2)[0] : url
299
- elsif url.include?('facebook.com/groups/')
300
- url = drop_url_query!(url)
301
306
  elsif mdata = url.match(regex1)
302
307
  # "http://facebook.com/pages/Command-Canada/1434248516885065/timeline"
303
308
  url = mdata[:url]
@@ -307,11 +312,6 @@ module UrlScrubber
307
312
  # "https://www.facebook.com/profile.php?id=100009574328879"
308
313
  url, http_response = check_for_facebook_redirection(mdata[:url])
309
314
  uid = mdata[:uid]
310
- elsif mdata = url.match(regex2a)
311
- # "https://www.facebook.com/profile.php?_rdr=p&id=100009574328879"
312
- url = "http://facebook.com/profile.php?id=" + mdata[:uid]
313
- url, http_response = check_for_facebook_redirection(url)
314
- uid = mdata[:uid]
315
315
  elsif mdata = url.match(regex4)
316
316
  # "http://facebook.com/home.php?#!/person.name"
317
317
  url = mdata[:url] + mdata[:uname]
@@ -320,11 +320,16 @@ module UrlScrubber
320
320
  # "https://www.facebook.com/100009574328879"
321
321
  url = "http://facebook.com/" + mdata[:uid]
322
322
  uid = mdata[:uid]
323
+ elsif mdata = url.match(regex6)
324
+ # "http://business.facebook.com/home/accounts?business_id=1145724702268347"
325
+ url = mdata[:url]
326
+ uid = mdata[:uid]
323
327
  elsif mdata = url.match(regex3)
324
328
  # "http://facebook.com/TonyMollHomeLoans/timeline"
325
329
  # "http://facebook.com/pg/TonyMollHomeLoans/timeline"
326
- if ["page", "pages", "pg"].exclude?(mdata[:uname])
327
- url = "http://facebook.com/" + mdata[:uname]
330
+ # "https://www.facebook.com/groups/practicewithclaritygroup"
331
+ if ["group", "groups", "page", "pages", "pg"].exclude?(mdata[:uname])
332
+ url = (mdata[:group] ? "http://facebook.com/groups/" : "http://facebook.com/") + mdata[:uname]
328
333
  uname = mdata[:uname]
329
334
  end
330
335
  url = drop_url_query!(url)
@@ -1,3 +1,3 @@
1
1
  module UrlScrubber
2
- VERSION = "0.8.15"
2
+ VERSION = "0.8.20"
3
3
  end
@@ -7,6 +7,7 @@ Gem::Specification.new do |gem|
7
7
  gem.description = %q{Remove extraneous bits from URLs, follow redirects, identify social media urls, etc.}
8
8
  gem.summary = %q{Clean up URLs.}
9
9
  gem.homepage = "http://brandle.net"
10
+ gem.license = "MIT"
10
11
 
11
12
  gem.files = `git ls-files`.split($\)
12
13
  gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: url_scrubber
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.15
4
+ version: 0.8.20
5
5
  platform: ruby
6
6
  authors:
7
7
  - Colin Langton
@@ -12,7 +12,7 @@ authors:
12
12
  autorequire:
13
13
  bindir: bin
14
14
  cert_chain: []
15
- date: 2018-11-22 00:00:00.000000000 Z
15
+ date: 2020-08-28 00:00:00.000000000 Z
16
16
  dependencies:
17
17
  - !ruby/object:Gem::Dependency
18
18
  name: rspec
@@ -97,7 +97,6 @@ extensions: []
97
97
  extra_rdoc_files: []
98
98
  files:
99
99
  - ".gitignore"
100
- - ".rvmrc"
101
100
  - Gemfile
102
101
  - Guardfile
103
102
  - README.md
@@ -108,7 +107,8 @@ files:
108
107
  - spec/url_scrubber_spec.rb
109
108
  - url_scrubber.gemspec
110
109
  homepage: http://brandle.net
111
- licenses: []
110
+ licenses:
111
+ - MIT
112
112
  metadata: {}
113
113
  post_install_message:
114
114
  rdoc_options: []
@@ -125,8 +125,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
125
125
  - !ruby/object:Gem::Version
126
126
  version: '0'
127
127
  requirements: []
128
- rubyforge_project:
129
- rubygems_version: 2.7.7
128
+ rubygems_version: 3.0.6
130
129
  signing_key:
131
130
  specification_version: 4
132
131
  summary: Clean up URLs.
data/.rvmrc DELETED
@@ -1 +0,0 @@
1
- rvm use 1.9.3@url_scrubber --create