url_scrubber 0.7.10 → 0.7.11

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: dc1cea704c5440cf1114fe0ab1a013963bb0184f
4
+ data.tar.gz: 15e202c908bc49f2dd65aa1ab8bdb482bdc0fa71
5
+ SHA512:
6
+ metadata.gz: e12356b808e46f5e2bb292105eb69b001b9a7ef42c65908351bfac100fd5bcbc29cffe8740ad75e0e55ea1b2fd0a8981f101333ad3198718ce743a3fb2721f7f
7
+ data.tar.gz: 4b0961d123a5962dc027e82e11b43ac6135bebf94ac92e6462af1634e4734fb90d63dea6edd146ec09f76903416522d535a2f8f3845500b71b1bc213c0d21e0c
data/lib/url_scrubber.rb CHANGED
@@ -112,6 +112,18 @@ module UrlScrubber
112
112
  end
113
113
  end
114
114
 
115
+ # Requirements:
116
+ # 1. must have http/https scheme
117
+ # 2. no "@" in any of the passed in url string
118
+ # 3. valid uri as determined by Addressable::URI
119
+ def self.valid_url?(url)
120
+ schemes = %w(http https)
121
+ parsed = URI.parse(url) or return false
122
+ schemes.include?(parsed.scheme) && !url.include?("@")
123
+ rescue URI::InvalidURIError
124
+ false
125
+ end
126
+
115
127
  private
116
128
 
117
129
  def self.downcase_domain(url)
@@ -238,68 +250,66 @@ module UrlScrubber
238
250
  url
239
251
  end
240
252
 
241
- private
242
-
243
- def self.check_for_redirection(uri_str, limit = 5)
244
- login_patterns = [
245
- # pages that require user logins
246
- %r{^.*/login[^/]*$}
247
- ]
248
-
249
- failure_patterns = [
250
- # pages that give 200 codes but actually indicate a not found
251
- %r{linkedin\.com/home\?report%2Efailure}i
252
- ]
253
-
254
- raise 'Too many HTTP redirects' if limit == 0
255
-
256
- begin
257
- url = URI.parse(uri_str)
258
- rescue URI::InvalidURIError => e
259
- return [uri_str, CustomError.new(786, "Invalid URI #{uri_str} : #{e.message}") ]
260
- end
253
+ def self.check_for_redirection(uri_str, limit = 5)
254
+ login_patterns = [
255
+ # pages that require user logins
256
+ %r{^.*/login[^/]*$}
257
+ ]
258
+
259
+ failure_patterns = [
260
+ # pages that give 200 codes but actually indicate a not found
261
+ %r{linkedin\.com/home\?report%2Efailure}i
262
+ ]
263
+
264
+ raise 'Too many HTTP redirects' if limit == 0
265
+
266
+ begin
267
+ url = URI.parse(uri_str)
268
+ rescue URI::InvalidURIError => e
269
+ return [uri_str, CustomError.new(786, "Invalid URI #{uri_str} : #{e.message}") ]
270
+ end
261
271
 
262
- http = Net::HTTP.new(url.host, url.port)
263
- if url.port == 443
264
- http.use_ssl = true
265
- http.verify_mode = OpenSSL::SSL::VERIFY_NONE
266
- else
267
- http.use_ssl = false
272
+ http = Net::HTTP.new(url.host, url.port)
273
+ if url.port == 443
274
+ http.use_ssl = true
275
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE
276
+ else
277
+ http.use_ssl = false
278
+ end
279
+ request = Net::HTTP::Get.new(url.request_uri, { 'User-Agent' => USER_AGENT })
280
+
281
+ begin
282
+ response = http.request(request)
283
+ rescue Exception => e
284
+ failure_response = Net::HTTPClientError.new('1.1', '404', 'Not Found')
285
+ return [uri_str, failure_response]
268
286
  end
269
- request = Net::HTTP::Get.new(url.request_uri, { 'User-Agent' => USER_AGENT })
270
287
 
271
- begin
272
- response = http.request(request)
273
- rescue Exception => e
274
- failure_response = Net::HTTPClientError.new('1.1', '404', 'Not Found')
275
- return [uri_str, failure_response]
288
+ if response.is_a? Net::HTTPRedirection
289
+ if response['location'][0,4] == "http"
290
+ if failure_patterns.any? { |pattern| response['location'].match(pattern) }
291
+ # got redirected to a page indicating failure, so act like it's a 404
292
+ failure_response = Net::HTTPClientError.new('1.1', '404', 'Not Found')
293
+ return [uri_str, failure_response]
276
294
  end
277
295
 
278
- if response.is_a? Net::HTTPRedirection
279
- if response['location'][0,4] == "http"
280
- if failure_patterns.any? { |pattern| response['location'].match(pattern) }
281
- # got redirected to a page indicating failure, so act like it's a 404
282
- failure_response = Net::HTTPClientError.new('1.1', '404', 'Not Found')
283
- return [uri_str, failure_response]
284
- end
285
-
286
- redirected_url, base_response = check_for_redirection(response['location'], limit - 1)
287
-
288
- if login_patterns.any? { |pattern| redirected_url.match(pattern) }
289
- # got redirected to a login page. return the ultimate response, but the previous url
290
- return [uri_str, base_response]
291
- else
292
- return [redirected_url, base_response]
293
- end
296
+ redirected_url, base_response = check_for_redirection(response['location'], limit - 1)
294
297
 
298
+ if login_patterns.any? { |pattern| redirected_url.match(pattern) }
299
+ # got redirected to a login page. return the ultimate response, but the previous url
300
+ return [uri_str, base_response]
295
301
  else
296
- redir_url = "http://#{url.host}#{response['location']}"
297
- redirected_url, base_response = check_for_redirection(redir_url, limit - 1)
298
302
  return [redirected_url, base_response]
299
303
  end
304
+
300
305
  else
301
- return [uri_str, response]
306
+ redir_url = "http://#{url.host}#{response['location']}"
307
+ redirected_url, base_response = check_for_redirection(redir_url, limit - 1)
308
+ return [redirected_url, base_response]
302
309
  end
310
+ else
311
+ return [uri_str, response]
303
312
  end
313
+ end
304
314
 
305
315
  end
@@ -1,3 +1,3 @@
1
1
  module UrlScrubber
2
- VERSION = "0.7.10"
2
+ VERSION = "0.7.11"
3
3
  end
data/url_scrubber.gemspec CHANGED
@@ -2,8 +2,8 @@
2
2
  require File.expand_path('../lib/url_scrubber/version', __FILE__)
3
3
 
4
4
  Gem::Specification.new do |gem|
5
- gem.authors = ["Colin Langton", "Christopher Maujean", "David Hillard"]
6
- gem.email = ["colin@hoteldelta.net", "cmaujean@brandle.net", "dhillard@brandle.net"]
5
+ gem.authors = ["Colin Langton", "Christopher Maujean", "David Hillard", "Edgar Abadines"]
6
+ gem.email = ["colin@hoteldelta.net", "cmaujean@brandle.net", "dhillard@brandle.net", "ed@brandle.net"]
7
7
  gem.description = %q{Remove extraneous bits from URLs, follow redirects, identify social media urls, etc.}
8
8
  gem.summary = %q{Clean up URLs.}
9
9
  gem.homepage = "http://brandle.net"
metadata CHANGED
@@ -1,22 +1,21 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: url_scrubber
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.10
5
- prerelease:
4
+ version: 0.7.11
6
5
  platform: ruby
7
6
  authors:
8
7
  - Colin Langton
9
8
  - Christopher Maujean
10
9
  - David Hillard
10
+ - Edgar Abadines
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2013-09-27 00:00:00.000000000 Z
14
+ date: 2014-02-03 00:00:00.000000000 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: rspec
18
18
  requirement: !ruby/object:Gem::Requirement
19
- none: false
20
19
  requirements:
21
20
  - - ~>
22
21
  - !ruby/object:Gem::Version
@@ -24,7 +23,6 @@ dependencies:
24
23
  type: :development
25
24
  prerelease: false
26
25
  version_requirements: !ruby/object:Gem::Requirement
27
- none: false
28
26
  requirements:
29
27
  - - ~>
30
28
  - !ruby/object:Gem::Version
@@ -32,7 +30,6 @@ dependencies:
32
30
  - !ruby/object:Gem::Dependency
33
31
  name: guard-bundler
34
32
  requirement: !ruby/object:Gem::Requirement
35
- none: false
36
33
  requirements:
37
34
  - - ~>
38
35
  - !ruby/object:Gem::Version
@@ -40,7 +37,6 @@ dependencies:
40
37
  type: :development
41
38
  prerelease: false
42
39
  version_requirements: !ruby/object:Gem::Requirement
43
- none: false
44
40
  requirements:
45
41
  - - ~>
46
42
  - !ruby/object:Gem::Version
@@ -48,7 +44,6 @@ dependencies:
48
44
  - !ruby/object:Gem::Dependency
49
45
  name: guard-rspec
50
46
  requirement: !ruby/object:Gem::Requirement
51
- none: false
52
47
  requirements:
53
48
  - - ~>
54
49
  - !ruby/object:Gem::Version
@@ -56,7 +51,6 @@ dependencies:
56
51
  type: :development
57
52
  prerelease: false
58
53
  version_requirements: !ruby/object:Gem::Requirement
59
- none: false
60
54
  requirements:
61
55
  - - ~>
62
56
  - !ruby/object:Gem::Version
@@ -64,7 +58,6 @@ dependencies:
64
58
  - !ruby/object:Gem::Dependency
65
59
  name: terminal-notifier-guard
66
60
  requirement: !ruby/object:Gem::Requirement
67
- none: false
68
61
  requirements:
69
62
  - - ! '>='
70
63
  - !ruby/object:Gem::Version
@@ -72,7 +65,6 @@ dependencies:
72
65
  type: :development
73
66
  prerelease: false
74
67
  version_requirements: !ruby/object:Gem::Requirement
75
- none: false
76
68
  requirements:
77
69
  - - ! '>='
78
70
  - !ruby/object:Gem::Version
@@ -80,7 +72,6 @@ dependencies:
80
72
  - !ruby/object:Gem::Dependency
81
73
  name: rb-fsevent
82
74
  requirement: !ruby/object:Gem::Requirement
83
- none: false
84
75
  requirements:
85
76
  - - ~>
86
77
  - !ruby/object:Gem::Version
@@ -88,7 +79,6 @@ dependencies:
88
79
  type: :development
89
80
  prerelease: false
90
81
  version_requirements: !ruby/object:Gem::Requirement
91
- none: false
92
82
  requirements:
93
83
  - - ~>
94
84
  - !ruby/object:Gem::Version
@@ -99,6 +89,7 @@ email:
99
89
  - colin@hoteldelta.net
100
90
  - cmaujean@brandle.net
101
91
  - dhillard@brandle.net
92
+ - ed@brandle.net
102
93
  executables: []
103
94
  extensions: []
104
95
  extra_rdoc_files: []
@@ -116,27 +107,26 @@ files:
116
107
  - url_scrubber.gemspec
117
108
  homepage: http://brandle.net
118
109
  licenses: []
110
+ metadata: {}
119
111
  post_install_message:
120
112
  rdoc_options: []
121
113
  require_paths:
122
114
  - lib
123
115
  required_ruby_version: !ruby/object:Gem::Requirement
124
- none: false
125
116
  requirements:
126
117
  - - ! '>='
127
118
  - !ruby/object:Gem::Version
128
119
  version: '0'
129
120
  required_rubygems_version: !ruby/object:Gem::Requirement
130
- none: false
131
121
  requirements:
132
122
  - - ! '>='
133
123
  - !ruby/object:Gem::Version
134
124
  version: '0'
135
125
  requirements: []
136
126
  rubyforge_project:
137
- rubygems_version: 1.8.21
127
+ rubygems_version: 2.1.10
138
128
  signing_key:
139
- specification_version: 3
129
+ specification_version: 4
140
130
  summary: Clean up URLs.
141
131
  test_files:
142
132
  - spec/spec_helper.rb