http_url_validation_improved 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +1 -0
- data/VERSION +1 -1
- data/lib/http_url_validation_improved.rb +15 -16
- metadata +20 -6
data/Rakefile
CHANGED
@@ -20,6 +20,7 @@ a specified type.}
|
|
20
20
|
gem.email = "walter@katipo.co.nz"
|
21
21
|
gem.homepage = "http://github.com/kete/http_url_validation_improved"
|
22
22
|
gem.authors = ["Erik Gregg", "Walter McGinnis", "Kieran Pilkington"]
|
23
|
+
gem.add_dependency "addressable"
|
23
24
|
gem.add_development_dependency "thoughtbot-shoulda", ">= 0"
|
24
25
|
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
25
26
|
end
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.
|
1
|
+
1.3.0
|
@@ -1,5 +1,5 @@
|
|
1
1
|
require 'net/http'
|
2
|
-
require 'uri'
|
2
|
+
require 'addressable/uri'
|
3
3
|
require 'socket'
|
4
4
|
|
5
5
|
module ActiveRecord
|
@@ -26,15 +26,20 @@ module ActiveRecord
|
|
26
26
|
moved_retry ||= false
|
27
27
|
not_allowed_retry ||= false
|
28
28
|
retry_without_headers ||= false
|
29
|
+
# some domains will block requests that come in more frequently than 1 per second
|
30
|
+
sleepy_domains = ['wikipedia.org']
|
31
|
+
sleep_interval = 2 # 2 to be on the safe side
|
32
|
+
must_sleep ||= false
|
29
33
|
response = nil
|
30
34
|
|
31
35
|
# resolve to url escaped version of URL
|
32
|
-
#
|
33
|
-
#
|
34
|
-
#
|
35
|
-
|
36
|
+
# value = URI.escape(value)
|
37
|
+
# updated to allow unicode values
|
38
|
+
# escaping shouldn't be necessary
|
39
|
+
must_sleep = sleepy_domains.select { |d| value.include?(d) }.size > 0
|
36
40
|
|
37
|
-
|
41
|
+
|
42
|
+
url = Addressable::URI.parse(value)
|
38
43
|
|
39
44
|
# Check Formatting
|
40
45
|
# moved to use the URI library's logic
|
@@ -50,23 +55,17 @@ module ActiveRecord
|
|
50
55
|
http.use_ssl = true
|
51
56
|
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
52
57
|
end
|
53
|
-
headers = Object.const_defined?('SITE_URL') ? { "User-Agent" => "#{SITE_URL} link checking mechanism via Ruby Net/HTTP" } : { "User-Agent" => "Ruby Net/HTTp used for link checking mechanism" }
|
58
|
+
headers = Object.const_defined?('SITE_URL') ? { "User-Agent" => "#{SITE_URL} link checking mechanism (http://github.com/kete/http_url_validation_improved) via Ruby Net/HTTP" } : { "User-Agent" => "Ruby Net/HTTp used for link checking mechanism (http://github.com/kete/http_url_validation_improved)" }
|
54
59
|
response = if not_allowed_retry
|
60
|
+
sleep sleep_interval if must_sleep
|
61
|
+
|
55
62
|
if retry_without_headers
|
56
63
|
http.request_get(url.path) {|r|}
|
57
64
|
else
|
58
65
|
http.request_get(url.path, headers) {|r|}
|
59
66
|
end
|
60
67
|
else
|
61
|
-
|
62
|
-
# and will treat 3 requests to get to the point
|
63
|
-
# where we normally try without headers as DoS
|
64
|
-
# if not wikipedia, try with headers
|
65
|
-
if value.include?('wikipedia.org')
|
66
|
-
http.request_head(url.path)
|
67
|
-
else
|
68
|
-
http.request_head(url.path, headers)
|
69
|
-
end
|
68
|
+
http.request_head(url.path, headers)
|
70
69
|
end
|
71
70
|
# response = not_allowed_retry ? http.request_get(url.path) {|r|} : http.request_head(url.path)
|
72
71
|
# Comment out as you need to
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: http_url_validation_improved
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 27
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 1
|
8
|
-
-
|
8
|
+
- 3
|
9
9
|
- 0
|
10
|
-
version: 1.
|
10
|
+
version: 1.3.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Erik Gregg
|
@@ -17,11 +17,11 @@ autorequire:
|
|
17
17
|
bindir: bin
|
18
18
|
cert_chain: []
|
19
19
|
|
20
|
-
date:
|
20
|
+
date: 2011-02-04 00:00:00 +13:00
|
21
21
|
default_executable:
|
22
22
|
dependencies:
|
23
23
|
- !ruby/object:Gem::Dependency
|
24
|
-
name:
|
24
|
+
name: addressable
|
25
25
|
prerelease: false
|
26
26
|
requirement: &id001 !ruby/object:Gem::Requirement
|
27
27
|
none: false
|
@@ -32,8 +32,22 @@ dependencies:
|
|
32
32
|
segments:
|
33
33
|
- 0
|
34
34
|
version: "0"
|
35
|
-
type: :
|
35
|
+
type: :runtime
|
36
36
|
version_requirements: *id001
|
37
|
+
- !ruby/object:Gem::Dependency
|
38
|
+
name: thoughtbot-shoulda
|
39
|
+
prerelease: false
|
40
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ">="
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
hash: 3
|
46
|
+
segments:
|
47
|
+
- 0
|
48
|
+
version: "0"
|
49
|
+
type: :development
|
50
|
+
version_requirements: *id002
|
37
51
|
description: |-
|
38
52
|
a Rails gem that allows you to validate a URL
|
39
53
|
entered in a form. It validates if the URL exists by hitting it with a HEAD
|