bad_link_finder 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ N2ZkYWM1MjViZmM0M2E3ZDUyZDQ1NmU2MjQ4NDU5Yzk4YjMwZTY3Yg==
5
+ data.tar.gz: !binary |-
6
+ MTljZDc2YzUxNmFkZmZjMWNhMzlkYWE1MDg1OWQ2YzU2OWFkYzUzNw==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ NjM3MjRkOTVlMGRlYWI0NTliMDViMDI2ZjlkZjI1MGZjNDhjYTMwYWIyNjUy
10
+ OGJjODRiZmJlYzMzMzI2NzcyZGJhNjE4ZmY4ZjQzZjFlZTMyOWQ4ZDk5MzZm
11
+ MmRiZWYxYmViYmMwYzJjMjdmNTQyNWU2MTIzMDUzYWE0MmFiYmY=
12
+ data.tar.gz: !binary |-
13
+ MTBlYjc4OTU5OWFkOWM5YTE4MDVhNjgzNDI0ZmQxNmQwODcwZWU3NzhkMTlh
14
+ MzQ0YjhhMzExY2JmZGFmN2M2YmYxOTNhNmRmNDg3M2I3MTQ3NjljMmU2NTg4
15
+ NmNlZWNkYzIwOTc1ZWRlMjU4ODE2MDI3NDgxODc4NTZkODA4YTc=
data/LICENCE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Elliot Crosby-McCullough
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,13 @@
1
+ # Bad link finder
2
+
3
+ Crawls a mirrored site and checks that all links either return a successful response or redirect to somewhere that does.
4
+
5
+ ## Usage
6
+
7
+ Set environment variables:
8
+
9
+ - `MIRROR_DIR`, to the location of your mirrored site.
10
+ - `REPORT_OUTPUT_FILE`, to the location you'd like the CSV saved to.
11
+ - `SITE_HOST`, to the full host of the live site you'd like to test against, including protocol. For example, `https://www.example.com`
12
+
13
+ Then execute `bad_link_finder`.
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bad_link_finder'
4
+
5
+ begin
6
+ BadLinkFinder.run
7
+ rescue BadLinkFinder::EnvironmentVariableError => e
8
+ abort "Please check your environment variables: #{e.message}"
9
+ end
@@ -0,0 +1,22 @@
1
+ require 'csv'
2
+
3
+ module BadLinkFinder
4
+ class CSVBuilder
5
+ def initialize(bad_link_map)
6
+ @bad_link_map = bad_link_map
7
+ end
8
+
9
+ def to_s
10
+ @to_s ||= CSV.generate(encoding: 'UTF-8') do |csv|
11
+ csv << ['page_url', 'link', 'error_message', 'raw_error_message']
12
+
13
+ @bad_link_map.each do |page_url, bad_links|
14
+ bad_links.each do |bad_link|
15
+ exception_message = bad_link.exception.message if bad_link.exception
16
+ csv << [page_url, bad_link.link, bad_link.error_message, exception_message]
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,68 @@
1
+ require 'mechanize'
2
+
3
+ module BadLinkFinder
4
+ class Link
5
+ attr_reader :link, :url, :error_message, :exception
6
+
7
+ def initialize(page_url, link)
8
+ @page_url = page_url
9
+ @link = link
10
+ @url = get_url_from_link(link)
11
+
12
+ validate_with_request
13
+
14
+ rescue URI::InvalidURIError => exception
15
+ record_error("This link is in a bad format", exception)
16
+ rescue Mechanize::ResponseCodeError => exception
17
+ if exception.response_code.to_i == 405 && !@head_unsupported
18
+ @head_unsupported = true
19
+ retry
20
+ else
21
+ record_error("This request returned a #{exception.response_code}", exception)
22
+ end
23
+ rescue Mechanize::UnauthorizedError => exception
24
+ record_error("This link requires authorisation", exception)
25
+ rescue Mechanize::UnsupportedSchemeError => exception
26
+ record_error("This link has a scheme we can't load (should be http or https)", exception)
27
+ rescue Mechanize::RedirectLimitReachedError => exception
28
+ record_error("This link might be in a redirect loop", exception)
29
+ rescue Mechanize::RobotsDisallowedError => exception
30
+ record_error("This link is blocked by robots.txt or nofollow attributes", exception)
31
+ rescue Mechanize::Error, Net::HTTP::Persistent::Error, Timeout::Error, Errno::EINVAL,
32
+ Errno::ECONNRESET, EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
33
+ Net::ProtocolError, OpenSSL::SSL::SSLError, SocketError => exception # Thanks Net::HTTP
34
+ record_error("The server failed to serve this page properly", exception)
35
+ end
36
+
37
+ def valid?
38
+ @error_message.nil?
39
+ end
40
+
41
+ protected
42
+
43
+ def validate_with_request
44
+ puts "-- testing link #{@link} using #{@url}"
45
+ sleep 0.1 # Recommended pause for gov.uk rate limiting
46
+
47
+ browser = Mechanize.new
48
+ browser.user_agent = 'GOV.UK link checker'
49
+
50
+ if @head_unsupported
51
+ browser.get(@url)
52
+ else
53
+ browser.head(@url)
54
+ end
55
+ end
56
+
57
+ def get_url_from_link(link)
58
+ URI.join(@page_url, link).to_s
59
+ end
60
+
61
+ def record_error(message, exception = nil)
62
+ @error_message = message
63
+ @exception = exception
64
+
65
+ puts "---- found broken link #{@url}: #{message}: #{exception.message if exception}"
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,31 @@
1
+ require 'nokogiri'
2
+
3
+ module BadLinkFinder
4
+ class Page
5
+ def initialize(mirror_dir, path)
6
+ @path = strip_html_ending(path)
7
+
8
+ file = mirror_dir + path
9
+ doc = Nokogiri::HTML(file.read)
10
+ @links = doc.css('a').map do |a|
11
+ strip_html_ending(a['href']) unless ignore_link?(a['href'])
12
+ end.compact
13
+ end
14
+
15
+ attr_reader :path, :links
16
+
17
+ protected
18
+
19
+ def strip_html_ending(href)
20
+ if href.start_with?('http')
21
+ href
22
+ else
23
+ href.sub(%r{(?<!\?)(?:index\.html|\.html)(.*)}, '\1')
24
+ end
25
+ end
26
+
27
+ def ignore_link?(href)
28
+ href.nil? || href.start_with?('#', 'mailto:')
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,22 @@
1
+ require 'bad_link_finder/link'
2
+
3
+ module BadLinkFinder
4
+ class PageChecker
5
+ def initialize(host, page, result_cache)
6
+ host = host.chomp('/') + '/'
7
+ @page = page
8
+ @page_url = URI.join(host, page.path).to_s
9
+ @result_cache = result_cache
10
+ end
11
+
12
+ attr_reader :page_url
13
+
14
+ def bad_links
15
+ @bad_links ||= @page.links.map do |raw_link|
16
+ link = @result_cache.fetch(raw_link) || @result_cache.store(raw_link, BadLinkFinder::Link.new(@page_url, raw_link))
17
+
18
+ link unless link.valid?
19
+ end.compact
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,24 @@
1
+ # If/when the bad link finder is converted to a set of parallel processes
2
+ # this cache will need to be backed by something threadsafe.
3
+
4
+ module BadLinkFinder
5
+ class ResultCache
6
+ def initialize
7
+ @cache = {}
8
+ end
9
+
10
+ def store(key, link)
11
+ @cache[stripped_key(key)] = link
12
+ end
13
+
14
+ def fetch(key)
15
+ @cache[stripped_key(key)]
16
+ end
17
+
18
+ protected
19
+
20
+ def stripped_key(key)
21
+ key.sub(/#.*$/, '')
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,21 @@
1
+ require 'pathname'
2
+ require 'bad_link_finder/page'
3
+
4
+ module BadLinkFinder
5
+ class Site
6
+ include Enumerable
7
+
8
+ def initialize(mirror_dir)
9
+ @mirror_dir = mirror_dir.is_a?(String) ? Pathname.new(mirror_dir) : mirror_dir
10
+ end
11
+
12
+ def each
13
+ Dir.chdir(@mirror_dir) do
14
+ Dir.glob('**/*').each do |path|
15
+ next if File.directory?(path)
16
+ yield BadLinkFinder::Page.new(@mirror_dir, path)
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,26 @@
1
+ require 'bad_link_finder/site'
2
+ require 'bad_link_finder/result_cache'
3
+ require 'bad_link_finder/page_checker'
4
+
5
+ module BadLinkFinder
6
+ class SiteChecker
7
+ def initialize(mirror_dir, host)
8
+ @mirror_dir = File.expand_path(mirror_dir)
9
+ @host = host
10
+ @result_cache = BadLinkFinder::ResultCache.new
11
+ end
12
+
13
+ def run
14
+ bad_link_map = {}
15
+ BadLinkFinder::Site.new(@mirror_dir).map do |page|
16
+ page_checker = BadLinkFinder::PageChecker.new(@host, page, @result_cache)
17
+ puts "Checking page #{page.path} as #{page_checker.page_url}"
18
+
19
+ bad_links = page_checker.bad_links
20
+ bad_link_map[page_checker.page_url] = bad_links if bad_links.any?
21
+ end
22
+
23
+ return bad_link_map
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,3 @@
1
+ module BadLinkFinder
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,24 @@
1
+ require 'bad_link_finder/site_checker'
2
+ require 'bad_link_finder/csv_builder'
3
+ require 'pathname'
4
+
5
+ module BadLinkFinder
6
+ def self.run
7
+ ['MIRROR_DIR', 'REPORT_OUTPUT_FILE', 'SITE_HOST'].each do |var|
8
+ raise EnvironmentVariableError.new("Missing environment variable #{var}") unless ENV.has_key?(var)
9
+ end
10
+
11
+ raise EnvironmentVariableError.new("MIRROR_DIR '#{ENV['MIRROR_DIR']}' does not exist") unless Dir.exist?(ENV['MIRROR_DIR'])
12
+
13
+ bad_link_map = BadLinkFinder::SiteChecker.new(ENV['MIRROR_DIR'], ENV['SITE_HOST']).run
14
+ csv_builder = CSVBuilder.new(bad_link_map)
15
+
16
+ report_path = Pathname.new(ENV['REPORT_OUTPUT_FILE'])
17
+ report_path.parent.mkpath
18
+ report_path.open('w') do |file|
19
+ file.write(csv_builder)
20
+ end
21
+ end
22
+
23
+ class EnvironmentVariableError < ArgumentError; end
24
+ end
@@ -0,0 +1,9 @@
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head><title>Example site</title></head>
4
+ <body>
5
+ <a href='relative-example.html'>Relative example</a>
6
+ <a href='relative-example.html'>Relative example</a>
7
+ <a href='https://www.example.net/external-example.html'>External example</a>
8
+ </body>
9
+ </html>
@@ -0,0 +1,14 @@
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head><title>Example site</title></head>
4
+ <body>
5
+ <!-- Included -->
6
+ <a href='/example/index.html?test=true&redirect=http://www.example.com/in-param-url/index.html#section-1'></a>
7
+ <a href=''></a>
8
+
9
+ <!-- Excluded -->
10
+ <a></a>
11
+ <a href='#section-2'></a>
12
+ <a href='mailto:test@example.com'></a>
13
+ </body>
14
+ </html>
@@ -0,0 +1,7 @@
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head><title>Example site</title></head>
4
+ <body>
5
+ <a href='/example/index.html'>Example 1</a>
6
+ </body>
7
+ </html>
@@ -0,0 +1,44 @@
1
+ require 'test_helper'
2
+ require 'webmock/minitest'
3
+ require 'bad_link_finder'
4
+
5
+ describe BadLinkFinder do
6
+
7
+ before do
8
+ stub_request(:any, 'http://www.example.com/example/').to_return(status: 200)
9
+ stub_request(:any, 'http://www.example.com/example/relative-example').to_return(status: 302, headers: {'Location' => 'http://www.example.com/example/'})
10
+ stub_request(:any, 'https://www.example.net/external-example.html').to_return(status: 500)
11
+ stub_request(:any, 'http://www.example.com/example/?test=true&redirect=http://www.example.com/in-param-url/index.html').to_return(status: 404)
12
+
13
+ ENV['MIRROR_DIR'] = (FIXTURES_ROOT+'www.example.com').to_s
14
+ ENV['REPORT_OUTPUT_FILE'] = (TMP_ROOT+'bad_links.csv').to_s
15
+ ENV['SITE_HOST'] = 'http://www.example.com/'
16
+ end
17
+
18
+ it "finds all broken links and exports to a CSV" do
19
+ BadLinkFinder.run
20
+
21
+ csv_string = File.read(ENV['REPORT_OUTPUT_FILE'])
22
+
23
+ assert_match 'http://www.example.com/example/', csv_string
24
+ end
25
+
26
+ it "complains if key variables are missing" do
27
+ ['MIRROR_DIR', 'REPORT_OUTPUT_FILE', 'SITE_HOST'].each do |var|
28
+ ENV.delete(var)
29
+
30
+ assert_raises(BadLinkFinder::EnvironmentVariableError) do
31
+ BadLinkFinder.run
32
+ end
33
+ end
34
+ end
35
+
36
+ it "complains if the MIRROR_DIR does not exist" do
37
+ ENV['MIRROR_DIR'] = (FIXTURES_ROOT+'this_does_not_exist').to_s
38
+
39
+ assert_raises(BadLinkFinder::EnvironmentVariableError) do
40
+ BadLinkFinder.run
41
+ end
42
+ end
43
+
44
+ end
@@ -0,0 +1,5 @@
1
+ class Minitest::Test
2
+ def assert_same_elements(array1, array2)
3
+ assert_equal array1.to_set, array2.to_set, "Different elements in #{array1.inspect} and #{array2.inspect}"
4
+ end
5
+ end
@@ -0,0 +1,11 @@
1
+ require 'bundler/setup'
2
+ $LOAD_PATH.unshift File.expand_path("../../lib", __FILE__)
3
+ $LOAD_PATH.unshift File.expand_path("..", __FILE__)
4
+
5
+ require 'minitest/autorun'
6
+ require 'support/matchers'
7
+
8
+ require 'pathname'
9
+ APP_ROOT = Pathname.new(File.join(File.dirname(__FILE__), '..'))
10
+ FIXTURES_ROOT = APP_ROOT+'test/fixtures'
11
+ TMP_ROOT = APP_ROOT+'tmp'
@@ -0,0 +1,45 @@
1
+ require 'test_helper'
2
+ require 'bad_link_finder/csv_builder'
3
+ require 'ostruct'
4
+ require 'csv'
5
+
6
+ describe BadLinkFinder::CSVBuilder do
7
+
8
+ it "flattens out the bad links map into a CSV structure" do
9
+ bad_link_map = {
10
+ 'http://www.example.com/example/' => [
11
+ mock_link(link: 'https://www.example.net/external-example.html', error_message: "This link returned a 404", exception: TestException.new('404 not found')),
12
+ mock_link(link: 'relative-example', error_message: "Nope")
13
+ ],
14
+ 'http://www.example.com/example/relative-example' => [
15
+ mock_link(
16
+ link: '/example/?test=true&redirect=http://www.example.com/in-param-url/index.html#section-1',
17
+ error_message: "What even is this?",
18
+ exception: TestException.new('Test exception')
19
+ )
20
+ ]
21
+ }
22
+
23
+ csv_builder = BadLinkFinder::CSVBuilder.new(bad_link_map)
24
+
25
+ parsed_csv = CSV.parse(csv_builder.to_s)
26
+
27
+ headers = parsed_csv.shift
28
+ assert_equal ['page_url', 'link', 'error_message', 'raw_error_message'], headers
29
+
30
+ assert_equal bad_link_map.values.flatten.count, parsed_csv.count
31
+
32
+ bad_link_map.each do |page_url, links|
33
+ links.each do |link|
34
+ assert parsed_csv.include?([page_url, link.link, link.error_message, (link.exception.message if link.exception)])
35
+ end
36
+ end
37
+ end
38
+
39
+ def mock_link(attrs)
40
+ OpenStruct.new(attrs)
41
+ end
42
+
43
+ class TestException < Exception; end
44
+
45
+ end
@@ -0,0 +1,70 @@
1
+ require 'test_helper'
2
+ require 'webmock/minitest'
3
+ require 'bad_link_finder/link'
4
+
5
+ describe BadLinkFinder::Link do
6
+
7
+ describe '#valid?' do
8
+ it "approves fully qualified urls which get a good response" do
9
+ stub_url("http://www.example.com", 200)
10
+ link = build_link('http://www.example.com')
11
+
12
+ assert link.valid?
13
+ end
14
+
15
+ it "approves relative paths which get a good response" do
16
+ stub_url("http://www.example.com/somewhere/an-example-path", 200)
17
+ link = build_link('an-example-path', page_url: 'http://www.example.com/somewhere/')
18
+
19
+ assert link.valid?
20
+ end
21
+
22
+ it "approves absolute paths which get a good response" do
23
+ stub_url("http://www.example.com/an-example-path", 200)
24
+ link = build_link('/an-example-path', page_url: 'http://www.example.com/somewhere/')
25
+
26
+ assert link.valid?
27
+ end
28
+
29
+ it "reports malformed links without checking the internet" do
30
+ link = build_link('htt[]://{an-example-path}')
31
+
32
+ refute link.valid?
33
+ assert_equal "This link is in a bad format", link.error_message
34
+ end
35
+
36
+ it "reports links returning failure status codes" do
37
+ stub_url("http://www.example.com/an-example-path", 404)
38
+ link = build_link('/an-example-path')
39
+
40
+ refute link.valid?
41
+ assert_equal "This request returned a 404", link.error_message
42
+ end
43
+
44
+ it "reports URLs returning failure status codes" do
45
+ stub_url("https://www.example.net/an-external-failure", 500)
46
+ link = build_link('https://www.example.net/an-external-failure')
47
+
48
+ refute link.valid?
49
+ assert_equal "This request returned a 500", link.error_message
50
+ end
51
+
52
+ it "retries 405s as GET requests" do
53
+ stub_request(:head, "http://www.example.com/an-example-path").to_return(status: 405)
54
+ stub_request(:get, "http://www.example.com/an-example-path").to_return(status: 200)
55
+ link = build_link('/an-example-path')
56
+
57
+ assert link.valid?
58
+ end
59
+ end
60
+
61
+ def stub_url(url, status)
62
+ stub_request(:any, url).to_return(status: status)
63
+ end
64
+
65
+ def build_link(link_path, opts = {})
66
+ page_url = opts[:page_url] || 'http://www.example.com'
67
+ BadLinkFinder::Link.new(page_url, link_path)
68
+ end
69
+
70
+ end
@@ -0,0 +1,22 @@
1
+ require 'test_helper'
2
+ require 'bad_link_finder/page_checker'
3
+ require 'bad_link_finder/page'
4
+ require 'bad_link_finder/result_cache'
5
+
6
+ describe BadLinkFinder::PageChecker do
7
+
8
+ describe "#page_url" do
9
+ it "correctly merges the host with the page path" do
10
+ assert_equal 'http://www.example.com/', build_page_checker('index.html').page_url.to_s
11
+ assert_equal 'http://www.example.com/example/', build_page_checker('example/index.html').page_url.to_s
12
+ assert_equal 'http://www.example.com/example/relative-example', build_page_checker('example/relative-example.html').page_url.to_s
13
+ end
14
+ end
15
+
16
+ def build_page_checker(path)
17
+ site_mirror = FIXTURES_ROOT+'www.example.com'
18
+ page = BadLinkFinder::Page.new(site_mirror, path)
19
+ BadLinkFinder::PageChecker.new('http://www.example.com/', page, BadLinkFinder::ResultCache.new)
20
+ end
21
+
22
+ end
@@ -0,0 +1,50 @@
1
+ require 'test_helper'
2
+ require 'bad_link_finder/page'
3
+
4
+ describe BadLinkFinder::Page do
5
+
6
+ it "strips index.html and .html from the page path" do
7
+ assert_equal '', build_page('index.html').path.to_s
8
+ assert_equal 'example/', build_page('example/index.html').path.to_s
9
+ assert_equal 'example/relative-example', build_page('example/relative-example.html').path.to_s
10
+ end
11
+
12
+ it "finds absolute paths, stripping index.html and .html" do
13
+ assert_equal ['/example/'], build_page('index.html').links.map(&:to_s)
14
+ end
15
+
16
+ it "finds relative paths, stripping index.html and .html" do
17
+ assert build_page('example/index.html').links.map(&:to_s).include?('relative-example')
18
+ end
19
+
20
+ it "finds and preserves external URLs" do
21
+ assert build_page('example/index.html').links.map(&:to_s).include?('https://www.example.net/external-example.html')
22
+ end
23
+
24
+ it "preserves params and anchors on internal links" do
25
+ page = build_page('example/relative-example.html')
26
+ assert page.links.map(&:to_s).include?('/example/?test=true&redirect=http://www.example.com/in-param-url/index.html#section-1')
27
+ end
28
+
29
+ it "includes links with empty href" do
30
+ assert build_page('example/relative-example.html').links.map(&:to_s).include?('')
31
+ end
32
+
33
+ it "excludes links with no href" do
34
+ refute build_page('example/relative-example.html').links.include?(nil)
35
+ end
36
+
37
+ it "excludes links with an href containing only an anchor reference" do
38
+ refute build_page('example/relative-example.html').links.map(&:to_s).include?('#section-2')
39
+ end
40
+
41
+ it "excludes mailto links" do
42
+ refute build_page('example/relative-example.html').links.map(&:to_s).include?('mailto:test@example.com')
43
+ end
44
+
45
+ def build_page(path)
46
+ site_mirror = FIXTURES_ROOT+'www.example.com'
47
+ BadLinkFinder::Page.new(site_mirror, path)
48
+ end
49
+
50
+ end
@@ -0,0 +1,38 @@
1
+ require 'test_helper'
2
+ require 'bad_link_finder/result_cache'
3
+
4
+ describe BadLinkFinder::ResultCache do
5
+
6
+ before do
7
+ @cache = BadLinkFinder::ResultCache.new
8
+ end
9
+
10
+ it "returns a cache hit for URLs which differ only by anchor" do
11
+ @cache.store('http://www.example.com#test123', 'value')
12
+ assert_equal 'value', @cache.fetch('http://www.example.com#test567')
13
+
14
+ @cache.store('http://www.example.com?test=true#test123', 'value')
15
+ assert_equal 'value', @cache.fetch('http://www.example.com?test=true#test567')
16
+
17
+ @cache.store('http://www.example.com?test=true#test123', 'value')
18
+ refute_equal 'value', @cache.fetch('http://www.example.com?test=false#test567')
19
+ end
20
+
21
+ describe "#store" do
22
+ it "returns the item stored" do
23
+ assert_equal 'value', @cache.store('key', 'value')
24
+ end
25
+ end
26
+
27
+ describe "#fetch" do
28
+ it "returns fetched items on a hit" do
29
+ @cache.store('key', 'value')
30
+ assert_equal 'value', @cache.fetch('key')
31
+ end
32
+
33
+ it "returns nil on a miss" do
34
+ assert_nil @cache.fetch('missing-key')
35
+ end
36
+ end
37
+
38
+ end
@@ -0,0 +1,22 @@
1
+ require 'test_helper'
2
+ require 'bad_link_finder/site'
3
+
4
+ describe BadLinkFinder::Site do
5
+
6
+ before do
7
+ @site_mirror = FIXTURES_ROOT+'www.example.com'
8
+ end
9
+
10
+ describe '#each' do
11
+ it "loads all files from a directory and passes on the host" do
12
+ site_map = [
13
+ '',
14
+ 'example/',
15
+ 'example/relative-example'
16
+ ]
17
+
18
+ assert_same_elements site_map, BadLinkFinder::Site.new(@site_mirror).map { |page| page.path.to_s }
19
+ end
20
+ end
21
+
22
+ end
metadata ADDED
@@ -0,0 +1,166 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bad_link_finder
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Elliot Crosby-McCullough
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-11-25 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: mechanize
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '2.7'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '2.7'
27
+ - !ruby/object:Gem::Dependency
28
+ name: nokogiri
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: '1.6'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: '1.6'
41
+ - !ruby/object:Gem::Dependency
42
+ name: minitest
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ! '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: webmock
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ! '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: bundler
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ~>
74
+ - !ruby/object:Gem::Version
75
+ version: '1.3'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ~>
81
+ - !ruby/object:Gem::Version
82
+ version: '1.3'
83
+ - !ruby/object:Gem::Dependency
84
+ name: gem_publisher
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '='
88
+ - !ruby/object:Gem::Version
89
+ version: 1.3.0
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - '='
95
+ - !ruby/object:Gem::Version
96
+ version: 1.3.0
97
+ description: Crawls a static site mirror testing all links. Reports links which don't
98
+ return 200 or redirect to a 200.
99
+ email:
100
+ - elliot.cm@gmail.com
101
+ executables:
102
+ - bad_link_finder
103
+ extensions: []
104
+ extra_rdoc_files: []
105
+ files:
106
+ - bin/bad_link_finder
107
+ - lib/bad_link_finder/csv_builder.rb
108
+ - lib/bad_link_finder/link.rb
109
+ - lib/bad_link_finder/page.rb
110
+ - lib/bad_link_finder/page_checker.rb
111
+ - lib/bad_link_finder/result_cache.rb
112
+ - lib/bad_link_finder/site.rb
113
+ - lib/bad_link_finder/site_checker.rb
114
+ - lib/bad_link_finder/version.rb
115
+ - lib/bad_link_finder.rb
116
+ - README.md
117
+ - LICENCE.txt
118
+ - test/fixtures/www.example.com/example/index.html
119
+ - test/fixtures/www.example.com/example/relative-example.html
120
+ - test/fixtures/www.example.com/index.html
121
+ - test/integration/bad_link_finder_test.rb
122
+ - test/support/matchers.rb
123
+ - test/test_helper.rb
124
+ - test/unit/csv_builder_test.rb
125
+ - test/unit/link_test.rb
126
+ - test/unit/page_checker_test.rb
127
+ - test/unit/page_test.rb
128
+ - test/unit/result_cache_test.rb
129
+ - test/unit/site_test.rb
130
+ homepage: http://github.com/alphagov/bad_link_finder
131
+ licenses:
132
+ - MIT
133
+ metadata: {}
134
+ post_install_message:
135
+ rdoc_options: []
136
+ require_paths:
137
+ - lib
138
+ required_ruby_version: !ruby/object:Gem::Requirement
139
+ requirements:
140
+ - - ! '>='
141
+ - !ruby/object:Gem::Version
142
+ version: '0'
143
+ required_rubygems_version: !ruby/object:Gem::Requirement
144
+ requirements:
145
+ - - ! '>='
146
+ - !ruby/object:Gem::Version
147
+ version: 2.1.11
148
+ requirements: []
149
+ rubyforge_project:
150
+ rubygems_version: 2.1.11
151
+ signing_key:
152
+ specification_version: 4
153
+ summary: Tests links in static site mirrors
154
+ test_files:
155
+ - test/fixtures/www.example.com/example/index.html
156
+ - test/fixtures/www.example.com/example/relative-example.html
157
+ - test/fixtures/www.example.com/index.html
158
+ - test/integration/bad_link_finder_test.rb
159
+ - test/support/matchers.rb
160
+ - test/test_helper.rb
161
+ - test/unit/csv_builder_test.rb
162
+ - test/unit/link_test.rb
163
+ - test/unit/page_checker_test.rb
164
+ - test/unit/page_test.rb
165
+ - test/unit/result_cache_test.rb
166
+ - test/unit/site_test.rb