http_url_validation_improved 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -20,6 +20,7 @@ a specified type.}
20
20
  gem.email = "walter@katipo.co.nz"
21
21
  gem.homepage = "http://github.com/kete/http_url_validation_improved"
22
22
  gem.authors = ["Erik Gregg", "Walter McGinnis", "Kieran Pilkington"]
23
+ gem.add_dependency "addressable"
23
24
  gem.add_development_dependency "thoughtbot-shoulda", ">= 0"
24
25
  # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
25
26
  end
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.2.0
1
+ 1.3.0
@@ -1,5 +1,5 @@
1
1
  require 'net/http'
2
- require 'uri'
2
+ require 'addressable/uri'
3
3
  require 'socket'
4
4
 
5
5
  module ActiveRecord
@@ -26,15 +26,20 @@ module ActiveRecord
26
26
  moved_retry ||= false
27
27
  not_allowed_retry ||= false
28
28
  retry_without_headers ||= false
29
+ # some domains will block requests that come in more frequently than 1 per second
30
+ sleepy_domains = ['wikipedia.org']
31
+ sleep_interval = 2 # 2 to be on the safe side
32
+ must_sleep ||= false
29
33
  response = nil
30
34
 
31
35
  # resolve to url escaped version of URL
32
- # TODO: at some point hopefully URI lib
33
- # be updated to allow unicode values
34
- # escape for now
35
- value = URI.escape(value)
36
+ # value = URI.escape(value)
37
+ # updated to allow unicode values
38
+ # escaping shouldn't be necessary
39
+ must_sleep = sleepy_domains.select { |d| value.include?(d) }.size > 0
36
40
 
37
- url = URI.parse(value)
41
+
42
+ url = Addressable::URI.parse(value)
38
43
 
39
44
  # Check Formatting
40
45
  # moved to use the URI library's logic
@@ -50,23 +55,17 @@ module ActiveRecord
50
55
  http.use_ssl = true
51
56
  http.verify_mode = OpenSSL::SSL::VERIFY_NONE
52
57
  end
53
- headers = Object.const_defined?('SITE_URL') ? { "User-Agent" => "#{SITE_URL} link checking mechanism via Ruby Net/HTTP" } : { "User-Agent" => "Ruby Net/HTTp used for link checking mechanism" }
58
+ headers = Object.const_defined?('SITE_URL') ? { "User-Agent" => "#{SITE_URL} link checking mechanism (http://github.com/kete/http_url_validation_improved) via Ruby Net/HTTP" } : { "User-Agent" => "Ruby Net/HTTp used for link checking mechanism (http://github.com/kete/http_url_validation_improved)" }
54
59
  response = if not_allowed_retry
60
+ sleep sleep_interval if must_sleep
61
+
55
62
  if retry_without_headers
56
63
  http.request_get(url.path) {|r|}
57
64
  else
58
65
  http.request_get(url.path, headers) {|r|}
59
66
  end
60
67
  else
61
- # we know that *.wikipedia.org don't like the headers
62
- # and will treat 3 requests to get to the point
63
- # where we normally try without headers as DoS
64
- # if not wikipedia, try with headers
65
- if value.include?('wikipedia.org')
66
- http.request_head(url.path)
67
- else
68
- http.request_head(url.path, headers)
69
- end
68
+ http.request_head(url.path, headers)
70
69
  end
71
70
  # response = not_allowed_retry ? http.request_get(url.path) {|r|} : http.request_head(url.path)
72
71
  # Comment out as you need to
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: http_url_validation_improved
3
3
  version: !ruby/object:Gem::Version
4
- hash: 31
4
+ hash: 27
5
5
  prerelease: false
6
6
  segments:
7
7
  - 1
8
- - 2
8
+ - 3
9
9
  - 0
10
- version: 1.2.0
10
+ version: 1.3.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Erik Gregg
@@ -17,11 +17,11 @@ autorequire:
17
17
  bindir: bin
18
18
  cert_chain: []
19
19
 
20
- date: 2010-06-15 00:00:00 +12:00
20
+ date: 2011-02-04 00:00:00 +13:00
21
21
  default_executable:
22
22
  dependencies:
23
23
  - !ruby/object:Gem::Dependency
24
- name: thoughtbot-shoulda
24
+ name: addressable
25
25
  prerelease: false
26
26
  requirement: &id001 !ruby/object:Gem::Requirement
27
27
  none: false
@@ -32,8 +32,22 @@ dependencies:
32
32
  segments:
33
33
  - 0
34
34
  version: "0"
35
- type: :development
35
+ type: :runtime
36
36
  version_requirements: *id001
37
+ - !ruby/object:Gem::Dependency
38
+ name: thoughtbot-shoulda
39
+ prerelease: false
40
+ requirement: &id002 !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ">="
44
+ - !ruby/object:Gem::Version
45
+ hash: 3
46
+ segments:
47
+ - 0
48
+ version: "0"
49
+ type: :development
50
+ version_requirements: *id002
37
51
  description: |-
38
52
  a Rails gem that allows you to validate a URL
39
53
  entered in a form. It validates if the URL exists by hitting it with a HEAD