kauperts_link_checker 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source "http://rubygems.org"
2
+
3
+ gemspec
@@ -0,0 +1,25 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ kauperts_link_checker (0.4.0)
5
+ i18n
6
+ simpleidn
7
+
8
+ GEM
9
+ remote: http://rubygems.org/
10
+ specs:
11
+ activesupport (2.3.11)
12
+ i18n (0.6.0)
13
+ mocha (0.9.12)
14
+ redgreen (1.2.2)
15
+ simpleidn (0.0.3)
16
+
17
+ PLATFORMS
18
+ ruby
19
+
20
+ DEPENDENCIES
21
+ activesupport
22
+ bundler
23
+ kauperts_link_checker!
24
+ mocha
25
+ redgreen
data/LICENSE ADDED
@@ -0,0 +1,25 @@
1
+ Copyright (c) 2011, kaupert media gmbh
2
+ All rights reserved.
3
+
4
+ Redistribution and use in source and binary forms, with or without
5
+ modification, are permitted provided that the following conditions are met:
6
+ * Redistributions of source code must retain the above copyright
7
+ notice, this list of conditions and the following disclaimer.
8
+ * Redistributions in binary form must reproduce the above copyright
9
+ notice, this list of conditions and the following disclaimer in the
10
+ documentation and/or other materials provided with the distribution.
11
+ * Neither the name of the original author / copyright holder nor the
12
+ names of its contributors may be used to endorse or promote products
13
+ derived from this software without specific prior written permission.
14
+
15
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
19
+ DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22
+ ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
+
@@ -0,0 +1,40 @@
1
+ == Kauperts::LinkChecker
2
+
3
+ <b>Kauperts::LinkChecker</b> is a simple library to check for the well-being of URLs. It supports HTTPS and IDN URIs.
4
+
5
+ === Installation
6
+ The gem is not published yet and can be installed from github:
7
+ git clone git://github.com/kaupertmedia/kauperts_link_checker.git
8
+ cd kauperts_link_checker
9
+ gem build kauperts_link_checker.gemspec
10
+ gem install kauperts_link_checker-<gemversion>.gem
11
+
12
+ Much easier using bundler:
13
+ # Gemfile
14
+ gem 'kauperts_link_checker', :git => "git://github.com/kaupermedia/kauperts_link_checker"
15
+
16
+ === Usage
17
+ It will check any object that responds to +url+:
18
+ status = Kauperts::LinkChecker.check!(object_responding_to_url)
19
+ unless status.ok?
20
+ puts status
21
+ end
22
+
23
+ You can ignore 301 permanent redirect that only add a trailing slash like this:
24
+ status = Kauperts::LinkChecker.check!(object_responding_to_url, :ignore_trailing_slash_redirects => true)
25
+ unless status.ok?
26
+ # A redirect from http://example.com/foo to http://example.com/foo/ will be considered ok
27
+ end
28
+
29
+ === I18n
30
+ The following keys are used to translate error messages using the I18n gem:
31
+ * <tt>kauperts.link_checker.errors.timeout</tt>: message when rescueing from Timeout::Error
32
+ * <tt>kauperts.link_checker.errors.generic_network</tt>: message when (currently) rescueing from all other exceptions
33
+
34
+ === Credits
35
+ Kauperts::LinkChecker is extracted from a maintenance task made for
36
+ {berlin.kauperts.de}[http://berlin.kauperts.de] by {kaupert media gmbh}[http://kaupertmedia.de].
37
+
38
+ === License
39
+ Kauperts::LinkChecker is released under a 3-clause BSD-licence. See the LICENSE file for details.
40
+
@@ -0,0 +1,23 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ Bundler::GemHelper.install_tasks
4
+ require 'rake/testtask'
5
+ require 'rake/rdoctask'
6
+
7
+ desc "Default: run unit tests."
8
+ task :default => :test
9
+
10
+ Rake::TestTask.new(:test) do |t|
11
+ t.libs << 'lib'
12
+ t.libs << 'test'
13
+ t.pattern = 'test/**/*_test.rb'
14
+ t.verbose = true
15
+ end
16
+
17
+ Rake::RDocTask.new(:rdoc) do |rdoc|
18
+ rdoc.rdoc_dir = 'rdoc'
19
+ rdoc.title = 'Kauperts::LinkChecker'
20
+ rdoc.options << '--line-numbers' << '--inline-source'
21
+ rdoc.rdoc_files.include('README*')
22
+ rdoc.rdoc_files.include('lib/**/*.rb')
23
+ end
@@ -0,0 +1,25 @@
1
+ $:.push File.expand_path("../lib/", __FILE__)
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = "kauperts_link_checker"
5
+ s.version = "0.4.0"
6
+ s.platform = Gem::Platform::RUBY
7
+ s.authors = ["Wolfgang Vogl", "Carsten Zimmermann"]
8
+ s.email = ["carp@hacksocke.de"]
9
+ s.homepage = ""
10
+ s.summary = "A simple library to check for the well-being of an URL"
11
+ s.description = "A simple library to check for the well-being of an URL"
12
+
13
+ s.files = `git ls-files`.split("\n")
14
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
15
+ s.require_paths = ["lib"]
16
+
17
+ s.add_dependency "i18n"
18
+ s.add_dependency "simpleidn"
19
+
20
+ s.add_development_dependency 'mocha'
21
+ s.add_development_dependency 'bundler'
22
+ s.add_development_dependency 'redgreen'
23
+ s.add_development_dependency 'activesupport'
24
+ end
25
+
@@ -0,0 +1,92 @@
1
+ require "net/https"
2
+ require "simpleidn"
3
+ module Kauperts
4
+
5
+ # Checks the status of an object which responds to +url+. The returned
6
+ # status can be accessed via +status+. It contains either a string
7
+ # representation of a numeric http status code or an error message.
8
+ #
9
+ # Supports HTTPS and IDN-domains.
10
+ #
11
+ #
12
+ # The following keys are used to translate error messages using the I18n gem:
13
+ # * <tt>kauperts.link_checker.errors.timeout</tt>: rescues from Timeout::Error
14
+ # * <tt>kauperts.link_checker.errors.generic_network</tt>: (currently) rescues from all other exceptions
15
+ # * <tt>kauperts.link_checker.status.redirect_permanently</tt>: translation for 301 permanent redirects
16
+ class LinkChecker
17
+
18
+ attr_reader :configuration, :object, :status
19
+
20
+ class Configuration < Struct.new(:ignore_trailing_slash_redirects)
21
+ end
22
+
23
+ # === Parameters
24
+ # * +object+: an arbitrary object which responds to +url+.
25
+ # * +options+: optional configuration parameters, see below.
26
+ #
27
+ # === Available Options
28
+ # * +ignore_trailing_slash_redirects+: ignores redirects to the same URI but only with an added trailing slash (default: false)
29
+ def initialize(object, options = {})
30
+ object.respond_to?(:url) ? @object = object : raise(ArgumentError.new("object doesn't respond to url"))
31
+
32
+ # Assign config variables
33
+ @configuration = Configuration.new
34
+ options = { :ignore_trailing_slash_redirects => false }.merge(options).each do |key, val|
35
+ @configuration.send(:"#{key}=", val)
36
+ end
37
+
38
+ end
39
+
40
+ # Checks the associated url object. Sets and returns +status+
41
+ def check!
42
+ begin
43
+ uri = parsed_uri(@object.url)
44
+ if uri.scheme == 'https'
45
+ http = Net::HTTP.new(uri.host , 443)
46
+ http.use_ssl = true
47
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE
48
+ response = http.start{ http.get2(uri.to_s) }
49
+ else
50
+ response = Net::HTTP.get_response(uri)
51
+ end
52
+ status = if response.code == '301'
53
+ @redirect_with_trailing_slash_only = "#{uri}/" == response['location']
54
+ "#{I18n.t :"kauperts.link_checker.status.redirect_permanently", :default => "Moved permanently"} (#{response['location']})"
55
+ else
56
+ response.code
57
+ end
58
+ rescue Timeout::Error => e
59
+ status = "#{I18n.t :"kauperts.link_checker.errors.timeout", :default => "Timeout"} (#{e.message})"
60
+ rescue Exception => e
61
+ status = "#{I18n.t :"kauperts.link_checker.errors.generic_network", :default => "Generic network error"} (#{e.message})"
62
+ end
63
+ @status = status
64
+ end
65
+
66
+ # Returns if a check has been run and the return code was '200 OK'
67
+ # or if a 301 permanent redirect only added a trailing slash
68
+ # while +ignore_trailing_slash_redirects+ has been set to true
69
+ def ok?
70
+ @status == '200' or (@redirect_with_trailing_slash_only == true and self.configuration.ignore_trailing_slash_redirects)
71
+ end
72
+
73
+ # Immediately checks +object+ and returns the LinkChecker instance
74
+ def self.check!(object)
75
+ checker = new(object)
76
+ checker.check!
77
+ checker
78
+ end
79
+
80
+ protected
81
+
82
+ # Transforms a possible IDN within +url+ into ASCII and returns
83
+ # a parsed URI instance.
84
+ def parsed_uri(url)
85
+ url_without_protocol = /^http[s]?:\/\/(.+)/.match(url)[1]
86
+ domain = url_without_protocol.split('/', 2)[0]
87
+ idn_domain = SimpleIDN.to_ascii(domain)
88
+ URI.parse(url.gsub(domain, idn_domain))
89
+ end
90
+
91
+ end
92
+ end
@@ -0,0 +1,204 @@
1
+ require 'test_helper'
2
+
3
+ class LinkCheckerTest < ActiveSupport::TestCase
4
+
5
+ test "should accept objects responding to url" do
6
+ assert_nothing_raised do
7
+ assert checker.new(url_object)
8
+ end
9
+ assert_raises ArgumentError do
10
+ checker.new(Object.new)
11
+ end
12
+ end
13
+
14
+ test "should instantiate with optional configuration hash" do
15
+ assert defined?(Kauperts::LinkChecker::Configuration)
16
+
17
+ obj = checker.new(url_object)
18
+ assert_respond_to obj, :configuration
19
+
20
+ assert_respond_to obj.configuration, :ignore_trailing_slash_redirects
21
+ assert !obj.configuration.ignore_trailing_slash_redirects
22
+
23
+ obj = checker.new(url_object, :ignore_trailing_slash_redirects => true)
24
+
25
+ assert_respond_to obj.configuration, :ignore_trailing_slash_redirects
26
+ assert_equal true, obj.configuration.ignore_trailing_slash_redirects
27
+ end
28
+
29
+ test "should expose object" do
30
+ obj = checker.new(url_object)
31
+ assert_respond_to obj, :object
32
+ end
33
+
34
+ test "should have check! method" do
35
+ obj = checker.new(url_object)
36
+ assert_respond_to obj, :check!
37
+ end
38
+
39
+ test "should return status array with 200" do
40
+ stub_net_http!
41
+ url = url_object
42
+ obj = checker.new(url)
43
+ a = obj.check!
44
+ assert_equal "200", obj.check!
45
+ end
46
+
47
+ test "should ignore permanent redirects with trailing slash only if told so" do
48
+ url = url_object("http://www.example.com/foo")
49
+ location = url.url + "/"
50
+ stub_net_http_redirect!("301", location)
51
+
52
+ obj = checker.new(url)
53
+ obj.check!
54
+ assert_equal false, obj.ok?
55
+
56
+ obj = checker.new(url, :ignore_trailing_slash_redirects => true)
57
+ obj.check!
58
+ assert_equal true, obj.ok?
59
+ end
60
+
61
+ test "should return status array with 404" do
62
+ stub_net_http!("404")
63
+ url = url_object
64
+ obj = checker.new(url)
65
+ assert_equal "404", obj.check!
66
+ end
67
+
68
+ test "should handle time out exceptions" do
69
+ stub_net_http_error!(Timeout::Error, "Takes way too long")
70
+ url = url_object
71
+ obj = checker.new(url)
72
+ assert_nothing_raised do
73
+ status = obj.check!
74
+ assert_kind_of String, status
75
+ assert_match /Timeout (.+)/, status
76
+ end
77
+ end
78
+
79
+ test "should handle generic network problem" do
80
+ class GenericNetworkException < Exception; end
81
+ stub_net_http_error!(GenericNetworkException, "Somehow broken")
82
+ url = url_object
83
+ obj = checker.new(url)
84
+ assert_nothing_raised do
85
+ status = obj.check!
86
+ assert_kind_of String, status
87
+ assert_match /Generic network error (.+)/, status
88
+ end
89
+ end
90
+
91
+ test "should handle domain with umlauts" do
92
+ SimpleIDN.expects(:to_ascii).returns('www.xn--trotzkpfchen-9ib.de').at_least(1)
93
+ stub_net_http!
94
+ url = url_object('http://www.trotzköpfchen.de')
95
+ obj = checker.new(url)
96
+ assert_equal "200", obj.check!
97
+ end
98
+
99
+ test "should handle ssl protocol" do
100
+ stub_net_https!
101
+ url = url_object(nil, "https")
102
+ obj = checker.new(url)
103
+ assert_equal "200", obj.check!
104
+ end
105
+
106
+ test "should have status" do
107
+ stub_net_http!
108
+ url = url_object
109
+ obj = checker.new(url)
110
+ assert_respond_to obj, :status
111
+ assert_nil obj.status
112
+ obj.check!
113
+ assert_not_nil obj.status
114
+ end
115
+
116
+ test "should have ok? method" do
117
+ stub_net_http!
118
+ url = url_object
119
+ obj = checker.new(url)
120
+ assert_respond_to obj, :ok?
121
+ assert !obj.ok?
122
+ obj.check!
123
+ assert obj.ok?
124
+ end
125
+
126
+ test "should check directly when called from class" do
127
+ stub_net_http!
128
+ url = url_object
129
+ assert_respond_to checker, :check!
130
+ assert_raises ArgumentError do
131
+ checker.check!
132
+ end
133
+ assert_kind_of checker, checker.check!(url)
134
+ end
135
+
136
+ test "should support I18n message for timeout error" do
137
+ I18n.expects(:t).with(:"kauperts.link_checker.errors.timeout", :default => "Timeout").returns('Zeitüberschreitung')
138
+ stub_net_http_error!(Timeout::Error, "Dauert zu lange")
139
+ url = url_object
140
+ assert_match /Zeitüberschreitung (.+)/, checker.check!(url).status
141
+ end
142
+
143
+ test "should support I18n message for generic network error" do
144
+ I18n.expects(:t).with(:"kauperts.link_checker.errors.generic_network", :default => "Generic network error").returns('Netzwerkfehler')
145
+ class GenericNetworkException < Exception; end
146
+ stub_net_http_error!(GenericNetworkException, "Irgendwie kaputt")
147
+ url = url_object
148
+ assert_match /Netzwerkfehler (.+)/, checker.check!(url).status
149
+ end
150
+
151
+ test "should return redirection url" do
152
+ stub_net_http_redirect!
153
+ url = url_object
154
+ assert_match /auenland.de/, checker.check!(url).status
155
+ end
156
+
157
+ test "should support I18n message for 301 permanent redirects" do
158
+ I18n.expects(:t).with(:"kauperts.link_checker.status.redirect_permanently", :default => "Moved permanently").returns('Umgezogen')
159
+ location = "http://auenland.de"
160
+ stub_net_http_redirect!(301, location)
161
+ url = url_object
162
+ assert_match /Umgezogen \(#{location}\)/, checker.check!(url).status
163
+ end
164
+
165
+
166
+ protected
167
+
168
+ def checker
169
+ Kauperts::LinkChecker
170
+ end
171
+
172
+ def stub_net_http!(return_code = "200")
173
+ return_code = return_code.to_s
174
+ mock_response = mock('response')
175
+ mock_response.stubs(:code).returns(return_code)
176
+ Net::HTTP.stubs(:get_response).returns(mock_response)
177
+ end
178
+
179
+ def stub_net_https!(return_code = "200")
180
+ return_code = return_code.to_s
181
+ mock_response = mock('sslresponse')
182
+ mock_response.stubs(:code).returns(return_code)
183
+ Net::HTTP.any_instance.stubs(:start).returns(mock_response)
184
+ end
185
+
186
+ def stub_net_http_error!(exception, message)
187
+ Net::HTTP.stubs(:get_response).raises(exception, message)
188
+ end
189
+
190
+ def stub_net_http_redirect!(return_code = '301', location ="http://auenland.de")
191
+ return_code = return_code.to_s
192
+ mock_response = {'location' => location}
193
+ mock_response.stubs(:code).returns(return_code)
194
+ Net::HTTP.stubs(:get_response).returns(mock_response)
195
+ end
196
+
197
+ def url_object(url = nil, protocol = 'http')
198
+ obj = mock('url_object')
199
+ url ||= "#{protocol}://www.google.com"
200
+ obj.stubs(:url).returns(url)
201
+ obj
202
+ end
203
+
204
+ end
@@ -0,0 +1,8 @@
1
+ require 'rubygems'
2
+ require 'bundler/setup'
3
+
4
+ require 'test/unit'
5
+ require 'active_support/test_case'
6
+ Bundler.require(:default, :development)
7
+
8
+ require 'kauperts_link_checker'
metadata ADDED
@@ -0,0 +1,161 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: kauperts_link_checker
3
+ version: !ruby/object:Gem::Version
4
+ hash: 15
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 4
9
+ - 0
10
+ version: 0.4.0
11
+ platform: ruby
12
+ authors:
13
+ - Wolfgang Vogl
14
+ - Carsten Zimmermann
15
+ autorequire:
16
+ bindir: bin
17
+ cert_chain: []
18
+
19
+ date: 2012-04-05 00:00:00 +02:00
20
+ default_executable:
21
+ dependencies:
22
+ - !ruby/object:Gem::Dependency
23
+ name: i18n
24
+ prerelease: false
25
+ requirement: &id001 !ruby/object:Gem::Requirement
26
+ none: false
27
+ requirements:
28
+ - - ">="
29
+ - !ruby/object:Gem::Version
30
+ hash: 3
31
+ segments:
32
+ - 0
33
+ version: "0"
34
+ type: :runtime
35
+ version_requirements: *id001
36
+ - !ruby/object:Gem::Dependency
37
+ name: simpleidn
38
+ prerelease: false
39
+ requirement: &id002 !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - ">="
43
+ - !ruby/object:Gem::Version
44
+ hash: 3
45
+ segments:
46
+ - 0
47
+ version: "0"
48
+ type: :runtime
49
+ version_requirements: *id002
50
+ - !ruby/object:Gem::Dependency
51
+ name: mocha
52
+ prerelease: false
53
+ requirement: &id003 !ruby/object:Gem::Requirement
54
+ none: false
55
+ requirements:
56
+ - - ">="
57
+ - !ruby/object:Gem::Version
58
+ hash: 3
59
+ segments:
60
+ - 0
61
+ version: "0"
62
+ type: :development
63
+ version_requirements: *id003
64
+ - !ruby/object:Gem::Dependency
65
+ name: bundler
66
+ prerelease: false
67
+ requirement: &id004 !ruby/object:Gem::Requirement
68
+ none: false
69
+ requirements:
70
+ - - ">="
71
+ - !ruby/object:Gem::Version
72
+ hash: 3
73
+ segments:
74
+ - 0
75
+ version: "0"
76
+ type: :development
77
+ version_requirements: *id004
78
+ - !ruby/object:Gem::Dependency
79
+ name: redgreen
80
+ prerelease: false
81
+ requirement: &id005 !ruby/object:Gem::Requirement
82
+ none: false
83
+ requirements:
84
+ - - ">="
85
+ - !ruby/object:Gem::Version
86
+ hash: 3
87
+ segments:
88
+ - 0
89
+ version: "0"
90
+ type: :development
91
+ version_requirements: *id005
92
+ - !ruby/object:Gem::Dependency
93
+ name: activesupport
94
+ prerelease: false
95
+ requirement: &id006 !ruby/object:Gem::Requirement
96
+ none: false
97
+ requirements:
98
+ - - ">="
99
+ - !ruby/object:Gem::Version
100
+ hash: 3
101
+ segments:
102
+ - 0
103
+ version: "0"
104
+ type: :development
105
+ version_requirements: *id006
106
+ description: A simple library to check for the well-being of an URL
107
+ email:
108
+ - carp@hacksocke.de
109
+ executables: []
110
+
111
+ extensions: []
112
+
113
+ extra_rdoc_files: []
114
+
115
+ files:
116
+ - Gemfile
117
+ - Gemfile.lock
118
+ - LICENSE
119
+ - README.rdoc
120
+ - Rakefile
121
+ - kauperts_link_checker.gemspec
122
+ - lib/kauperts_link_checker.rb
123
+ - test/link_checker_test.rb
124
+ - test/test_helper.rb
125
+ has_rdoc: true
126
+ homepage: ""
127
+ licenses: []
128
+
129
+ post_install_message:
130
+ rdoc_options: []
131
+
132
+ require_paths:
133
+ - lib
134
+ required_ruby_version: !ruby/object:Gem::Requirement
135
+ none: false
136
+ requirements:
137
+ - - ">="
138
+ - !ruby/object:Gem::Version
139
+ hash: 3
140
+ segments:
141
+ - 0
142
+ version: "0"
143
+ required_rubygems_version: !ruby/object:Gem::Requirement
144
+ none: false
145
+ requirements:
146
+ - - ">="
147
+ - !ruby/object:Gem::Version
148
+ hash: 3
149
+ segments:
150
+ - 0
151
+ version: "0"
152
+ requirements: []
153
+
154
+ rubyforge_project:
155
+ rubygems_version: 1.4.2
156
+ signing_key:
157
+ specification_version: 3
158
+ summary: A simple library to check for the well-being of an URL
159
+ test_files:
160
+ - test/link_checker_test.rb
161
+ - test/test_helper.rb