kauperts_link_checker 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source "http://rubygems.org"
2
+
3
+ gemspec
@@ -0,0 +1,25 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ kauperts_link_checker (0.4.0)
5
+ i18n
6
+ simpleidn
7
+
8
+ GEM
9
+ remote: http://rubygems.org/
10
+ specs:
11
+ activesupport (2.3.11)
12
+ i18n (0.6.0)
13
+ mocha (0.9.12)
14
+ redgreen (1.2.2)
15
+ simpleidn (0.0.3)
16
+
17
+ PLATFORMS
18
+ ruby
19
+
20
+ DEPENDENCIES
21
+ activesupport
22
+ bundler
23
+ kauperts_link_checker!
24
+ mocha
25
+ redgreen
data/LICENSE ADDED
@@ -0,0 +1,25 @@
1
+ Copyright (c) 2011, kaupert media gmbh
2
+ All rights reserved.
3
+
4
+ Redistribution and use in source and binary forms, with or without
5
+ modification, are permitted provided that the following conditions are met:
6
+ * Redistributions of source code must retain the above copyright
7
+ notice, this list of conditions and the following disclaimer.
8
+ * Redistributions in binary form must reproduce the above copyright
9
+ notice, this list of conditions and the following disclaimer in the
10
+ documentation and/or other materials provided with the distribution.
11
+ * Neither the name of the original author / copyright holder nor the
12
+ names of its contributors may be used to endorse or promote products
13
+ derived from this software without specific prior written permission.
14
+
15
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
19
+ DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22
+ ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
+
@@ -0,0 +1,40 @@
1
+ == Kauperts::LinkChecker
2
+
3
+ <b>Kauperts::LinkChecker</b> is a simple library to check for the well-being of URLs. It supports HTTPS and IDN URIs.
4
+
5
+ === Installation
6
+ The gem is not published yet and can be installed from github:
7
+ git clone git://github.com/kaupertmedia/kauperts_link_checker.git
8
+ cd kauperts_link_checker
9
+ gem build kauperts_link_checker.gemspec
10
+ gem install kauperts_link_checker-<gemversion>.gem
11
+
12
+ Much easier using bundler:
13
+ # Gemfile
14
+ gem 'kauperts_link_checker', :git => "git://github.com/kaupermedia/kauperts_link_checker"
15
+
16
+ === Usage
17
+ It will check any object that responds to +url+:
18
+ status = Kauperts::LinkChecker.check!(object_responding_to_url)
19
+ unless status.ok?
20
+ puts status
21
+ end
22
+
23
+ You can ignore 301 permanent redirect that only add a trailing slash like this:
24
+ status = Kauperts::LinkChecker.check!(object_responding_to_url, :ignore_trailing_slash_redirects => true)
25
+ unless status.ok?
26
+ # A redirect from http://example.com/foo to http://example.com/foo/ will be considered ok
27
+ end
28
+
29
+ === I18n
30
+ The following keys are used to translate error messages using the I18n gem:
31
+ * <tt>kauperts.link_checker.errors.timeout</tt>: message when rescueing from Timeout::Error
32
+ * <tt>kauperts.link_checker.errors.generic_network</tt>: message when (currently) rescueing from all other exceptions
33
+
34
+ === Credits
35
+ Kauperts::LinkChecker is extracted from a maintenance task made for
36
+ {berlin.kauperts.de}[http://berlin.kauperts.de] by {kaupert media gmbh}[http://kaupertmedia.de].
37
+
38
+ === License
39
+ Kauperts::LinkChecker is released under a 3-clause BSD-licence. See the LICENSE file for details.
40
+
@@ -0,0 +1,23 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ Bundler::GemHelper.install_tasks
4
+ require 'rake/testtask'
5
+ require 'rake/rdoctask'
6
+
7
+ desc "Default: run unit tests."
8
+ task :default => :test
9
+
10
+ Rake::TestTask.new(:test) do |t|
11
+ t.libs << 'lib'
12
+ t.libs << 'test'
13
+ t.pattern = 'test/**/*_test.rb'
14
+ t.verbose = true
15
+ end
16
+
17
+ Rake::RDocTask.new(:rdoc) do |rdoc|
18
+ rdoc.rdoc_dir = 'rdoc'
19
+ rdoc.title = 'Kauperts::LinkChecker'
20
+ rdoc.options << '--line-numbers' << '--inline-source'
21
+ rdoc.rdoc_files.include('README*')
22
+ rdoc.rdoc_files.include('lib/**/*.rb')
23
+ end
@@ -0,0 +1,25 @@
1
+ $:.push File.expand_path("../lib/", __FILE__)
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = "kauperts_link_checker"
5
+ s.version = "0.4.0"
6
+ s.platform = Gem::Platform::RUBY
7
+ s.authors = ["Wolfgang Vogl", "Carsten Zimmermann"]
8
+ s.email = ["carp@hacksocke.de"]
9
+ s.homepage = ""
10
+ s.summary = "A simple library to check for the well-being of an URL"
11
+ s.description = "A simple library to check for the well-being of an URL"
12
+
13
+ s.files = `git ls-files`.split("\n")
14
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
15
+ s.require_paths = ["lib"]
16
+
17
+ s.add_dependency "i18n"
18
+ s.add_dependency "simpleidn"
19
+
20
+ s.add_development_dependency 'mocha'
21
+ s.add_development_dependency 'bundler'
22
+ s.add_development_dependency 'redgreen'
23
+ s.add_development_dependency 'activesupport'
24
+ end
25
+
@@ -0,0 +1,92 @@
1
+ require "net/https"
2
+ require "simpleidn"
3
+ module Kauperts
4
+
5
+ # Checks the status of an object which responds to +url+. The returned
6
+ # status can be accessed via +status+. It contains either a string
7
+ # representation of a numeric http status code or an error message.
8
+ #
9
+ # Supports HTTPS and IDN-domains.
10
+ #
11
+ #
12
+ # The following keys are used to translate error messages using the I18n gem:
13
+ # * <tt>kauperts.link_checker.errors.timeout</tt>: rescues from Timeout::Error
14
+ # * <tt>kauperts.link_checker.errors.generic_network</tt>: (currently) rescues from all other exceptions
15
+ # * <tt>kauperts.link_checker.status.redirect_permanently</tt>: translation for 301 permanent redirects
16
+ class LinkChecker
17
+
18
+ attr_reader :configuration, :object, :status
19
+
20
+ class Configuration < Struct.new(:ignore_trailing_slash_redirects)
21
+ end
22
+
23
+ # === Parameters
24
+ # * +object+: an arbitrary object which responds to +url+.
25
+ # * +options+: optional configuration parameters, see below.
26
+ #
27
+ # === Available Options
28
+ # * +ignore_trailing_slash_redirects+: ignores redirects to the same URI but only with an added trailing slash (default: false)
29
+ def initialize(object, options = {})
30
+ object.respond_to?(:url) ? @object = object : raise(ArgumentError.new("object doesn't respond to url"))
31
+
32
+ # Assign config variables
33
+ @configuration = Configuration.new
34
+ options = { :ignore_trailing_slash_redirects => false }.merge(options).each do |key, val|
35
+ @configuration.send(:"#{key}=", val)
36
+ end
37
+
38
+ end
39
+
40
+ # Checks the associated url object. Sets and returns +status+
41
+ def check!
42
+ begin
43
+ uri = parsed_uri(@object.url)
44
+ if uri.scheme == 'https'
45
+ http = Net::HTTP.new(uri.host , 443)
46
+ http.use_ssl = true
47
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE
48
+ response = http.start{ http.get2(uri.to_s) }
49
+ else
50
+ response = Net::HTTP.get_response(uri)
51
+ end
52
+ status = if response.code == '301'
53
+ @redirect_with_trailing_slash_only = "#{uri}/" == response['location']
54
+ "#{I18n.t :"kauperts.link_checker.status.redirect_permanently", :default => "Moved permanently"} (#{response['location']})"
55
+ else
56
+ response.code
57
+ end
58
+ rescue Timeout::Error => e
59
+ status = "#{I18n.t :"kauperts.link_checker.errors.timeout", :default => "Timeout"} (#{e.message})"
60
+ rescue Exception => e
61
+ status = "#{I18n.t :"kauperts.link_checker.errors.generic_network", :default => "Generic network error"} (#{e.message})"
62
+ end
63
+ @status = status
64
+ end
65
+
66
+ # Returns if a check has been run and the return code was '200 OK'
67
+ # or if a 301 permanent redirect only added a trailing slash
68
+ # while +ignore_trailing_slash_redirects+ has been set to true
69
+ def ok?
70
+ @status == '200' or (@redirect_with_trailing_slash_only == true and self.configuration.ignore_trailing_slash_redirects)
71
+ end
72
+
73
+ # Immediately checks +object+ and returns the LinkChecker instance
74
+ def self.check!(object)
75
+ checker = new(object)
76
+ checker.check!
77
+ checker
78
+ end
79
+
80
+ protected
81
+
82
+ # Transforms a possible IDN within +url+ into ASCII and returns
83
+ # a parsed URI instance.
84
+ def parsed_uri(url)
85
+ url_without_protocol = /^http[s]?:\/\/(.+)/.match(url)[1]
86
+ domain = url_without_protocol.split('/', 2)[0]
87
+ idn_domain = SimpleIDN.to_ascii(domain)
88
+ URI.parse(url.gsub(domain, idn_domain))
89
+ end
90
+
91
+ end
92
+ end
@@ -0,0 +1,204 @@
1
+ require 'test_helper'
2
+
3
+ class LinkCheckerTest < ActiveSupport::TestCase
4
+
5
+ test "should accept objects responding to url" do
6
+ assert_nothing_raised do
7
+ assert checker.new(url_object)
8
+ end
9
+ assert_raises ArgumentError do
10
+ checker.new(Object.new)
11
+ end
12
+ end
13
+
14
+ test "should instantiate with optional configuration hash" do
15
+ assert defined?(Kauperts::LinkChecker::Configuration)
16
+
17
+ obj = checker.new(url_object)
18
+ assert_respond_to obj, :configuration
19
+
20
+ assert_respond_to obj.configuration, :ignore_trailing_slash_redirects
21
+ assert !obj.configuration.ignore_trailing_slash_redirects
22
+
23
+ obj = checker.new(url_object, :ignore_trailing_slash_redirects => true)
24
+
25
+ assert_respond_to obj.configuration, :ignore_trailing_slash_redirects
26
+ assert_equal true, obj.configuration.ignore_trailing_slash_redirects
27
+ end
28
+
29
+ test "should expose object" do
30
+ obj = checker.new(url_object)
31
+ assert_respond_to obj, :object
32
+ end
33
+
34
+ test "should have check! method" do
35
+ obj = checker.new(url_object)
36
+ assert_respond_to obj, :check!
37
+ end
38
+
39
+ test "should return status array with 200" do
40
+ stub_net_http!
41
+ url = url_object
42
+ obj = checker.new(url)
43
+ a = obj.check!
44
+ assert_equal "200", obj.check!
45
+ end
46
+
47
+ test "should ignore permanent redirects with trailing slash only if told so" do
48
+ url = url_object("http://www.example.com/foo")
49
+ location = url.url + "/"
50
+ stub_net_http_redirect!("301", location)
51
+
52
+ obj = checker.new(url)
53
+ obj.check!
54
+ assert_equal false, obj.ok?
55
+
56
+ obj = checker.new(url, :ignore_trailing_slash_redirects => true)
57
+ obj.check!
58
+ assert_equal true, obj.ok?
59
+ end
60
+
61
+ test "should return status array with 404" do
62
+ stub_net_http!("404")
63
+ url = url_object
64
+ obj = checker.new(url)
65
+ assert_equal "404", obj.check!
66
+ end
67
+
68
+ test "should handle time out exceptions" do
69
+ stub_net_http_error!(Timeout::Error, "Takes way too long")
70
+ url = url_object
71
+ obj = checker.new(url)
72
+ assert_nothing_raised do
73
+ status = obj.check!
74
+ assert_kind_of String, status
75
+ assert_match /Timeout (.+)/, status
76
+ end
77
+ end
78
+
79
+ test "should handle generic network problem" do
80
+ class GenericNetworkException < Exception; end
81
+ stub_net_http_error!(GenericNetworkException, "Somehow broken")
82
+ url = url_object
83
+ obj = checker.new(url)
84
+ assert_nothing_raised do
85
+ status = obj.check!
86
+ assert_kind_of String, status
87
+ assert_match /Generic network error (.+)/, status
88
+ end
89
+ end
90
+
91
+ test "should handle domain with umlauts" do
92
+ SimpleIDN.expects(:to_ascii).returns('www.xn--trotzkpfchen-9ib.de').at_least(1)
93
+ stub_net_http!
94
+ url = url_object('http://www.trotzköpfchen.de')
95
+ obj = checker.new(url)
96
+ assert_equal "200", obj.check!
97
+ end
98
+
99
+ test "should handle ssl protocol" do
100
+ stub_net_https!
101
+ url = url_object(nil, "https")
102
+ obj = checker.new(url)
103
+ assert_equal "200", obj.check!
104
+ end
105
+
106
+ test "should have status" do
107
+ stub_net_http!
108
+ url = url_object
109
+ obj = checker.new(url)
110
+ assert_respond_to obj, :status
111
+ assert_nil obj.status
112
+ obj.check!
113
+ assert_not_nil obj.status
114
+ end
115
+
116
+ test "should have ok? method" do
117
+ stub_net_http!
118
+ url = url_object
119
+ obj = checker.new(url)
120
+ assert_respond_to obj, :ok?
121
+ assert !obj.ok?
122
+ obj.check!
123
+ assert obj.ok?
124
+ end
125
+
126
+ test "should check directly when called from class" do
127
+ stub_net_http!
128
+ url = url_object
129
+ assert_respond_to checker, :check!
130
+ assert_raises ArgumentError do
131
+ checker.check!
132
+ end
133
+ assert_kind_of checker, checker.check!(url)
134
+ end
135
+
136
+ test "should support I18n message for timeout error" do
137
+ I18n.expects(:t).with(:"kauperts.link_checker.errors.timeout", :default => "Timeout").returns('Zeitüberschreitung')
138
+ stub_net_http_error!(Timeout::Error, "Dauert zu lange")
139
+ url = url_object
140
+ assert_match /Zeitüberschreitung (.+)/, checker.check!(url).status
141
+ end
142
+
143
+ test "should support I18n message for generic network error" do
144
+ I18n.expects(:t).with(:"kauperts.link_checker.errors.generic_network", :default => "Generic network error").returns('Netzwerkfehler')
145
+ class GenericNetworkException < Exception; end
146
+ stub_net_http_error!(GenericNetworkException, "Irgendwie kaputt")
147
+ url = url_object
148
+ assert_match /Netzwerkfehler (.+)/, checker.check!(url).status
149
+ end
150
+
151
+ test "should return redirection url" do
152
+ stub_net_http_redirect!
153
+ url = url_object
154
+ assert_match /auenland.de/, checker.check!(url).status
155
+ end
156
+
157
+ test "should support I18n message for 301 permanent redirects" do
158
+ I18n.expects(:t).with(:"kauperts.link_checker.status.redirect_permanently", :default => "Moved permanently").returns('Umgezogen')
159
+ location = "http://auenland.de"
160
+ stub_net_http_redirect!(301, location)
161
+ url = url_object
162
+ assert_match /Umgezogen \(#{location}\)/, checker.check!(url).status
163
+ end
164
+
165
+
166
+ protected
167
+
168
+ def checker
169
+ Kauperts::LinkChecker
170
+ end
171
+
172
+ def stub_net_http!(return_code = "200")
173
+ return_code = return_code.to_s
174
+ mock_response = mock('response')
175
+ mock_response.stubs(:code).returns(return_code)
176
+ Net::HTTP.stubs(:get_response).returns(mock_response)
177
+ end
178
+
179
+ def stub_net_https!(return_code = "200")
180
+ return_code = return_code.to_s
181
+ mock_response = mock('sslresponse')
182
+ mock_response.stubs(:code).returns(return_code)
183
+ Net::HTTP.any_instance.stubs(:start).returns(mock_response)
184
+ end
185
+
186
+ def stub_net_http_error!(exception, message)
187
+ Net::HTTP.stubs(:get_response).raises(exception, message)
188
+ end
189
+
190
+ def stub_net_http_redirect!(return_code = '301', location ="http://auenland.de")
191
+ return_code = return_code.to_s
192
+ mock_response = {'location' => location}
193
+ mock_response.stubs(:code).returns(return_code)
194
+ Net::HTTP.stubs(:get_response).returns(mock_response)
195
+ end
196
+
197
+ def url_object(url = nil, protocol = 'http')
198
+ obj = mock('url_object')
199
+ url ||= "#{protocol}://www.google.com"
200
+ obj.stubs(:url).returns(url)
201
+ obj
202
+ end
203
+
204
+ end
@@ -0,0 +1,8 @@
1
+ require 'rubygems'
2
+ require 'bundler/setup'
3
+
4
+ require 'test/unit'
5
+ require 'active_support/test_case'
6
+ Bundler.require(:default, :development)
7
+
8
+ require 'kauperts_link_checker'
metadata ADDED
@@ -0,0 +1,161 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: kauperts_link_checker
3
+ version: !ruby/object:Gem::Version
4
+ hash: 15
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 4
9
+ - 0
10
+ version: 0.4.0
11
+ platform: ruby
12
+ authors:
13
+ - Wolfgang Vogl
14
+ - Carsten Zimmermann
15
+ autorequire:
16
+ bindir: bin
17
+ cert_chain: []
18
+
19
+ date: 2012-04-05 00:00:00 +02:00
20
+ default_executable:
21
+ dependencies:
22
+ - !ruby/object:Gem::Dependency
23
+ name: i18n
24
+ prerelease: false
25
+ requirement: &id001 !ruby/object:Gem::Requirement
26
+ none: false
27
+ requirements:
28
+ - - ">="
29
+ - !ruby/object:Gem::Version
30
+ hash: 3
31
+ segments:
32
+ - 0
33
+ version: "0"
34
+ type: :runtime
35
+ version_requirements: *id001
36
+ - !ruby/object:Gem::Dependency
37
+ name: simpleidn
38
+ prerelease: false
39
+ requirement: &id002 !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - ">="
43
+ - !ruby/object:Gem::Version
44
+ hash: 3
45
+ segments:
46
+ - 0
47
+ version: "0"
48
+ type: :runtime
49
+ version_requirements: *id002
50
+ - !ruby/object:Gem::Dependency
51
+ name: mocha
52
+ prerelease: false
53
+ requirement: &id003 !ruby/object:Gem::Requirement
54
+ none: false
55
+ requirements:
56
+ - - ">="
57
+ - !ruby/object:Gem::Version
58
+ hash: 3
59
+ segments:
60
+ - 0
61
+ version: "0"
62
+ type: :development
63
+ version_requirements: *id003
64
+ - !ruby/object:Gem::Dependency
65
+ name: bundler
66
+ prerelease: false
67
+ requirement: &id004 !ruby/object:Gem::Requirement
68
+ none: false
69
+ requirements:
70
+ - - ">="
71
+ - !ruby/object:Gem::Version
72
+ hash: 3
73
+ segments:
74
+ - 0
75
+ version: "0"
76
+ type: :development
77
+ version_requirements: *id004
78
+ - !ruby/object:Gem::Dependency
79
+ name: redgreen
80
+ prerelease: false
81
+ requirement: &id005 !ruby/object:Gem::Requirement
82
+ none: false
83
+ requirements:
84
+ - - ">="
85
+ - !ruby/object:Gem::Version
86
+ hash: 3
87
+ segments:
88
+ - 0
89
+ version: "0"
90
+ type: :development
91
+ version_requirements: *id005
92
+ - !ruby/object:Gem::Dependency
93
+ name: activesupport
94
+ prerelease: false
95
+ requirement: &id006 !ruby/object:Gem::Requirement
96
+ none: false
97
+ requirements:
98
+ - - ">="
99
+ - !ruby/object:Gem::Version
100
+ hash: 3
101
+ segments:
102
+ - 0
103
+ version: "0"
104
+ type: :development
105
+ version_requirements: *id006
106
+ description: A simple library to check for the well-being of an URL
107
+ email:
108
+ - carp@hacksocke.de
109
+ executables: []
110
+
111
+ extensions: []
112
+
113
+ extra_rdoc_files: []
114
+
115
+ files:
116
+ - Gemfile
117
+ - Gemfile.lock
118
+ - LICENSE
119
+ - README.rdoc
120
+ - Rakefile
121
+ - kauperts_link_checker.gemspec
122
+ - lib/kauperts_link_checker.rb
123
+ - test/link_checker_test.rb
124
+ - test/test_helper.rb
125
+ has_rdoc: true
126
+ homepage: ""
127
+ licenses: []
128
+
129
+ post_install_message:
130
+ rdoc_options: []
131
+
132
+ require_paths:
133
+ - lib
134
+ required_ruby_version: !ruby/object:Gem::Requirement
135
+ none: false
136
+ requirements:
137
+ - - ">="
138
+ - !ruby/object:Gem::Version
139
+ hash: 3
140
+ segments:
141
+ - 0
142
+ version: "0"
143
+ required_rubygems_version: !ruby/object:Gem::Requirement
144
+ none: false
145
+ requirements:
146
+ - - ">="
147
+ - !ruby/object:Gem::Version
148
+ hash: 3
149
+ segments:
150
+ - 0
151
+ version: "0"
152
+ requirements: []
153
+
154
+ rubyforge_project:
155
+ rubygems_version: 1.4.2
156
+ signing_key:
157
+ specification_version: 3
158
+ summary: A simple library to check for the well-being of an URL
159
+ test_files:
160
+ - test/link_checker_test.rb
161
+ - test/test_helper.rb