bad_link_finder 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ N2ZkYWM1MjViZmM0M2E3ZDUyZDQ1NmU2MjQ4NDU5Yzk4YjMwZTY3Yg==
5
+ data.tar.gz: !binary |-
6
+ MTljZDc2YzUxNmFkZmZjMWNhMzlkYWE1MDg1OWQ2YzU2OWFkYzUzNw==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ NjM3MjRkOTVlMGRlYWI0NTliMDViMDI2ZjlkZjI1MGZjNDhjYTMwYWIyNjUy
10
+ OGJjODRiZmJlYzMzMzI2NzcyZGJhNjE4ZmY4ZjQzZjFlZTMyOWQ4ZDk5MzZm
11
+ MmRiZWYxYmViYmMwYzJjMjdmNTQyNWU2MTIzMDUzYWE0MmFiYmY=
12
+ data.tar.gz: !binary |-
13
+ MTBlYjc4OTU5OWFkOWM5YTE4MDVhNjgzNDI0ZmQxNmQwODcwZWU3NzhkMTlh
14
+ MzQ0YjhhMzExY2JmZGFmN2M2YmYxOTNhNmRmNDg3M2I3MTQ3NjljMmU2NTg4
15
+ NmNlZWNkYzIwOTc1ZWRlMjU4ODE2MDI3NDgxODc4NTZkODA4YTc=
data/LICENCE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Elliot Crosby-McCullough
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,13 @@
1
+ # Bad link finder
2
+
3
+ Crawls a mirrored site and checks that all links either return a successful response or redirect to somewhere that does.
4
+
5
+ ## Usage
6
+
7
+ Set environment variables:
8
+
9
+ - `MIRROR_DIR`, to the location of your mirrored site.
10
+ - `REPORT_OUTPUT_FILE`, to the location you'd like the CSV saved to.
11
+ - `SITE_HOST`, to the full host of the live site you'd like to test against, including protocol. For example, `https://www.example.com`
12
+
13
+ Then execute `bad_link_finder`.
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bad_link_finder'
4
+
5
+ begin
6
+ BadLinkFinder.run
7
+ rescue BadLinkFinder::EnvironmentVariableError => e
8
+ abort "Please check your environment variables: #{e.message}"
9
+ end
@@ -0,0 +1,22 @@
1
+ require 'csv'
2
+
3
+ module BadLinkFinder
4
+ class CSVBuilder
5
+ def initialize(bad_link_map)
6
+ @bad_link_map = bad_link_map
7
+ end
8
+
9
+ def to_s
10
+ @to_s ||= CSV.generate(encoding: 'UTF-8') do |csv|
11
+ csv << ['page_url', 'link', 'error_message', 'raw_error_message']
12
+
13
+ @bad_link_map.each do |page_url, bad_links|
14
+ bad_links.each do |bad_link|
15
+ exception_message = bad_link.exception.message if bad_link.exception
16
+ csv << [page_url, bad_link.link, bad_link.error_message, exception_message]
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,68 @@
1
+ require 'mechanize'
2
+
3
+ module BadLinkFinder
4
+ class Link
5
+ attr_reader :link, :url, :error_message, :exception
6
+
7
+ def initialize(page_url, link)
8
+ @page_url = page_url
9
+ @link = link
10
+ @url = get_url_from_link(link)
11
+
12
+ validate_with_request
13
+
14
+ rescue URI::InvalidURIError => exception
15
+ record_error("This link is in a bad format", exception)
16
+ rescue Mechanize::ResponseCodeError => exception
17
+ if exception.response_code.to_i == 405 && !@head_unsupported
18
+ @head_unsupported = true
19
+ retry
20
+ else
21
+ record_error("This request returned a #{exception.response_code}", exception)
22
+ end
23
+ rescue Mechanize::UnauthorizedError => exception
24
+ record_error("This link requires authorisation", exception)
25
+ rescue Mechanize::UnsupportedSchemeError => exception
26
+ record_error("This link has a scheme we can't load (should be http or https)", exception)
27
+ rescue Mechanize::RedirectLimitReachedError => exception
28
+ record_error("This link might be in a redirect loop", exception)
29
+ rescue Mechanize::RobotsDisallowedError => exception
30
+ record_error("This link is blocked by robots.txt or nofollow attributes", exception)
31
+ rescue Mechanize::Error, Net::HTTP::Persistent::Error, Timeout::Error, Errno::EINVAL,
32
+ Errno::ECONNRESET, EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
33
+ Net::ProtocolError, OpenSSL::SSL::SSLError, SocketError => exception # Thanks Net::HTTP
34
+ record_error("The server failed to serve this page properly", exception)
35
+ end
36
+
37
+ def valid?
38
+ @error_message.nil?
39
+ end
40
+
41
+ protected
42
+
43
+ def validate_with_request
44
+ puts "-- testing link #{@link} using #{@url}"
45
+ sleep 0.1 # Recommended pause for gov.uk rate limiting
46
+
47
+ browser = Mechanize.new
48
+ browser.user_agent = 'GOV.UK link checker'
49
+
50
+ if @head_unsupported
51
+ browser.get(@url)
52
+ else
53
+ browser.head(@url)
54
+ end
55
+ end
56
+
57
+ def get_url_from_link(link)
58
+ URI.join(@page_url, link).to_s
59
+ end
60
+
61
+ def record_error(message, exception = nil)
62
+ @error_message = message
63
+ @exception = exception
64
+
65
+ puts "---- found broken link #{@url}: #{message}: #{exception.message if exception}"
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,31 @@
1
+ require 'nokogiri'
2
+
3
+ module BadLinkFinder
4
+ class Page
5
+ def initialize(mirror_dir, path)
6
+ @path = strip_html_ending(path)
7
+
8
+ file = mirror_dir + path
9
+ doc = Nokogiri::HTML(file.read)
10
+ @links = doc.css('a').map do |a|
11
+ strip_html_ending(a['href']) unless ignore_link?(a['href'])
12
+ end.compact
13
+ end
14
+
15
+ attr_reader :path, :links
16
+
17
+ protected
18
+
19
+ def strip_html_ending(href)
20
+ if href.start_with?('http')
21
+ href
22
+ else
23
+ href.sub(%r{(?<!\?)(?:index\.html|\.html)(.*)}, '\1')
24
+ end
25
+ end
26
+
27
+ def ignore_link?(href)
28
+ href.nil? || href.start_with?('#', 'mailto:')
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,22 @@
1
+ require 'bad_link_finder/link'
2
+
3
+ module BadLinkFinder
4
+ class PageChecker
5
+ def initialize(host, page, result_cache)
6
+ host = host.chomp('/') + '/'
7
+ @page = page
8
+ @page_url = URI.join(host, page.path).to_s
9
+ @result_cache = result_cache
10
+ end
11
+
12
+ attr_reader :page_url
13
+
14
+ def bad_links
15
+ @bad_links ||= @page.links.map do |raw_link|
16
+ link = @result_cache.fetch(raw_link) || @result_cache.store(raw_link, BadLinkFinder::Link.new(@page_url, raw_link))
17
+
18
+ link unless link.valid?
19
+ end.compact
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,24 @@
1
+ # If/when the bad link finder is converted to a set of parallel processes
2
+ # this cache will need to be backed by something threadsafe.
3
+
4
+ module BadLinkFinder
5
+ class ResultCache
6
+ def initialize
7
+ @cache = {}
8
+ end
9
+
10
+ def store(key, link)
11
+ @cache[stripped_key(key)] = link
12
+ end
13
+
14
+ def fetch(key)
15
+ @cache[stripped_key(key)]
16
+ end
17
+
18
+ protected
19
+
20
+ def stripped_key(key)
21
+ key.sub(/#.*$/, '')
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,21 @@
1
+ require 'pathname'
2
+ require 'bad_link_finder/page'
3
+
4
+ module BadLinkFinder
5
+ class Site
6
+ include Enumerable
7
+
8
+ def initialize(mirror_dir)
9
+ @mirror_dir = mirror_dir.is_a?(String) ? Pathname.new(mirror_dir) : mirror_dir
10
+ end
11
+
12
+ def each
13
+ Dir.chdir(@mirror_dir) do
14
+ Dir.glob('**/*').each do |path|
15
+ next if File.directory?(path)
16
+ yield BadLinkFinder::Page.new(@mirror_dir, path)
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,26 @@
1
+ require 'bad_link_finder/site'
2
+ require 'bad_link_finder/result_cache'
3
+ require 'bad_link_finder/page_checker'
4
+
5
+ module BadLinkFinder
6
+ class SiteChecker
7
+ def initialize(mirror_dir, host)
8
+ @mirror_dir = File.expand_path(mirror_dir)
9
+ @host = host
10
+ @result_cache = BadLinkFinder::ResultCache.new
11
+ end
12
+
13
+ def run
14
+ bad_link_map = {}
15
+ BadLinkFinder::Site.new(@mirror_dir).map do |page|
16
+ page_checker = BadLinkFinder::PageChecker.new(@host, page, @result_cache)
17
+ puts "Checking page #{page.path} as #{page_checker.page_url}"
18
+
19
+ bad_links = page_checker.bad_links
20
+ bad_link_map[page_checker.page_url] = bad_links if bad_links.any?
21
+ end
22
+
23
+ return bad_link_map
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,3 @@
1
+ module BadLinkFinder
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,24 @@
1
+ require 'bad_link_finder/site_checker'
2
+ require 'bad_link_finder/csv_builder'
3
+ require 'pathname'
4
+
5
+ module BadLinkFinder
6
+ def self.run
7
+ ['MIRROR_DIR', 'REPORT_OUTPUT_FILE', 'SITE_HOST'].each do |var|
8
+ raise EnvironmentVariableError.new("Missing environment variable #{var}") unless ENV.has_key?(var)
9
+ end
10
+
11
+ raise EnvironmentVariableError.new("MIRROR_DIR '#{ENV['MIRROR_DIR']}' does not exist") unless Dir.exist?(ENV['MIRROR_DIR'])
12
+
13
+ bad_link_map = BadLinkFinder::SiteChecker.new(ENV['MIRROR_DIR'], ENV['SITE_HOST']).run
14
+ csv_builder = CSVBuilder.new(bad_link_map)
15
+
16
+ report_path = Pathname.new(ENV['REPORT_OUTPUT_FILE'])
17
+ report_path.parent.mkpath
18
+ report_path.open('w') do |file|
19
+ file.write(csv_builder)
20
+ end
21
+ end
22
+
23
+ class EnvironmentVariableError < ArgumentError; end
24
+ end
@@ -0,0 +1,9 @@
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head><title>Example site</title></head>
4
+ <body>
5
+ <a href='relative-example.html'>Relative example</a>
6
+ <a href='relative-example.html'>Relative example</a>
7
+ <a href='https://www.example.net/external-example.html'>External example</a>
8
+ </body>
9
+ </html>
@@ -0,0 +1,14 @@
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head><title>Example site</title></head>
4
+ <body>
5
+ <!-- Included -->
6
+ <a href='/example/index.html?test=true&redirect=http://www.example.com/in-param-url/index.html#section-1'></a>
7
+ <a href=''></a>
8
+
9
+ <!-- Excluded -->
10
+ <a></a>
11
+ <a href='#section-2'></a>
12
+ <a href='mailto:test@example.com'></a>
13
+ </body>
14
+ </html>
@@ -0,0 +1,7 @@
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head><title>Example site</title></head>
4
+ <body>
5
+ <a href='/example/index.html'>Example 1</a>
6
+ </body>
7
+ </html>
@@ -0,0 +1,44 @@
1
+ require 'test_helper'
2
+ require 'webmock/minitest'
3
+ require 'bad_link_finder'
4
+
5
+ describe BadLinkFinder do
6
+
7
+ before do
8
+ stub_request(:any, 'http://www.example.com/example/').to_return(status: 200)
9
+ stub_request(:any, 'http://www.example.com/example/relative-example').to_return(status: 302, headers: {'Location' => 'http://www.example.com/example/'})
10
+ stub_request(:any, 'https://www.example.net/external-example.html').to_return(status: 500)
11
+ stub_request(:any, 'http://www.example.com/example/?test=true&redirect=http://www.example.com/in-param-url/index.html').to_return(status: 404)
12
+
13
+ ENV['MIRROR_DIR'] = (FIXTURES_ROOT+'www.example.com').to_s
14
+ ENV['REPORT_OUTPUT_FILE'] = (TMP_ROOT+'bad_links.csv').to_s
15
+ ENV['SITE_HOST'] = 'http://www.example.com/'
16
+ end
17
+
18
+ it "finds all broken links and exports to a CSV" do
19
+ BadLinkFinder.run
20
+
21
+ csv_string = File.read(ENV['REPORT_OUTPUT_FILE'])
22
+
23
+ assert_match 'http://www.example.com/example/', csv_string
24
+ end
25
+
26
+ it "complains if key variables are missing" do
27
+ ['MIRROR_DIR', 'REPORT_OUTPUT_FILE', 'SITE_HOST'].each do |var|
28
+ ENV.delete(var)
29
+
30
+ assert_raises(BadLinkFinder::EnvironmentVariableError) do
31
+ BadLinkFinder.run
32
+ end
33
+ end
34
+ end
35
+
36
+ it "complains if the MIRROR_DIR does not exist" do
37
+ ENV['MIRROR_DIR'] = (FIXTURES_ROOT+'this_does_not_exist').to_s
38
+
39
+ assert_raises(BadLinkFinder::EnvironmentVariableError) do
40
+ BadLinkFinder.run
41
+ end
42
+ end
43
+
44
+ end
@@ -0,0 +1,5 @@
1
+ class Minitest::Test
2
+ def assert_same_elements(array1, array2)
3
+ assert_equal array1.to_set, array2.to_set, "Different elements in #{array1.inspect} and #{array2.inspect}"
4
+ end
5
+ end
@@ -0,0 +1,11 @@
1
+ require 'bundler/setup'
2
+ $LOAD_PATH.unshift File.expand_path("../../lib", __FILE__)
3
+ $LOAD_PATH.unshift File.expand_path("..", __FILE__)
4
+
5
+ require 'minitest/autorun'
6
+ require 'support/matchers'
7
+
8
+ require 'pathname'
9
+ APP_ROOT = Pathname.new(File.join(File.dirname(__FILE__), '..'))
10
+ FIXTURES_ROOT = APP_ROOT+'test/fixtures'
11
+ TMP_ROOT = APP_ROOT+'tmp'
@@ -0,0 +1,45 @@
1
+ require 'test_helper'
2
+ require 'bad_link_finder/csv_builder'
3
+ require 'ostruct'
4
+ require 'csv'
5
+
6
+ describe BadLinkFinder::CSVBuilder do
7
+
8
+ it "flattens out the bad links map into a CSV structure" do
9
+ bad_link_map = {
10
+ 'http://www.example.com/example/' => [
11
+ mock_link(link: 'https://www.example.net/external-example.html', error_message: "This link returned a 404", exception: TestException.new('404 not found')),
12
+ mock_link(link: 'relative-example', error_message: "Nope")
13
+ ],
14
+ 'http://www.example.com/example/relative-example' => [
15
+ mock_link(
16
+ link: '/example/?test=true&redirect=http://www.example.com/in-param-url/index.html#section-1',
17
+ error_message: "What even is this?",
18
+ exception: TestException.new('Test exception')
19
+ )
20
+ ]
21
+ }
22
+
23
+ csv_builder = BadLinkFinder::CSVBuilder.new(bad_link_map)
24
+
25
+ parsed_csv = CSV.parse(csv_builder.to_s)
26
+
27
+ headers = parsed_csv.shift
28
+ assert_equal ['page_url', 'link', 'error_message', 'raw_error_message'], headers
29
+
30
+ assert_equal bad_link_map.values.flatten.count, parsed_csv.count
31
+
32
+ bad_link_map.each do |page_url, links|
33
+ links.each do |link|
34
+ assert parsed_csv.include?([page_url, link.link, link.error_message, (link.exception.message if link.exception)])
35
+ end
36
+ end
37
+ end
38
+
39
+ def mock_link(attrs)
40
+ OpenStruct.new(attrs)
41
+ end
42
+
43
+ class TestException < Exception; end
44
+
45
+ end
@@ -0,0 +1,70 @@
1
+ require 'test_helper'
2
+ require 'webmock/minitest'
3
+ require 'bad_link_finder/link'
4
+
5
+ describe BadLinkFinder::Link do
6
+
7
+ describe '#valid?' do
8
+ it "approves fully qualified urls which get a good response" do
9
+ stub_url("http://www.example.com", 200)
10
+ link = build_link('http://www.example.com')
11
+
12
+ assert link.valid?
13
+ end
14
+
15
+ it "approves relative paths which get a good response" do
16
+ stub_url("http://www.example.com/somewhere/an-example-path", 200)
17
+ link = build_link('an-example-path', page_url: 'http://www.example.com/somewhere/')
18
+
19
+ assert link.valid?
20
+ end
21
+
22
+ it "approves absolute paths which get a good response" do
23
+ stub_url("http://www.example.com/an-example-path", 200)
24
+ link = build_link('/an-example-path', page_url: 'http://www.example.com/somewhere/')
25
+
26
+ assert link.valid?
27
+ end
28
+
29
+ it "reports malformed links without checking the internet" do
30
+ link = build_link('htt[]://{an-example-path}')
31
+
32
+ refute link.valid?
33
+ assert_equal "This link is in a bad format", link.error_message
34
+ end
35
+
36
+ it "reports links returning failure status codes" do
37
+ stub_url("http://www.example.com/an-example-path", 404)
38
+ link = build_link('/an-example-path')
39
+
40
+ refute link.valid?
41
+ assert_equal "This request returned a 404", link.error_message
42
+ end
43
+
44
+ it "reports URLs returning failure status codes" do
45
+ stub_url("https://www.example.net/an-external-failure", 500)
46
+ link = build_link('https://www.example.net/an-external-failure')
47
+
48
+ refute link.valid?
49
+ assert_equal "This request returned a 500", link.error_message
50
+ end
51
+
52
+ it "retries 405s as GET requests" do
53
+ stub_request(:head, "http://www.example.com/an-example-path").to_return(status: 405)
54
+ stub_request(:get, "http://www.example.com/an-example-path").to_return(status: 200)
55
+ link = build_link('/an-example-path')
56
+
57
+ assert link.valid?
58
+ end
59
+ end
60
+
61
+ def stub_url(url, status)
62
+ stub_request(:any, url).to_return(status: status)
63
+ end
64
+
65
+ def build_link(link_path, opts = {})
66
+ page_url = opts[:page_url] || 'http://www.example.com'
67
+ BadLinkFinder::Link.new(page_url, link_path)
68
+ end
69
+
70
+ end
@@ -0,0 +1,22 @@
1
+ require 'test_helper'
2
+ require 'bad_link_finder/page_checker'
3
+ require 'bad_link_finder/page'
4
+ require 'bad_link_finder/result_cache'
5
+
6
+ describe BadLinkFinder::PageChecker do
7
+
8
+ describe "#page_url" do
9
+ it "correctly merges the host with the page path" do
10
+ assert_equal 'http://www.example.com/', build_page_checker('index.html').page_url.to_s
11
+ assert_equal 'http://www.example.com/example/', build_page_checker('example/index.html').page_url.to_s
12
+ assert_equal 'http://www.example.com/example/relative-example', build_page_checker('example/relative-example.html').page_url.to_s
13
+ end
14
+ end
15
+
16
+ def build_page_checker(path)
17
+ site_mirror = FIXTURES_ROOT+'www.example.com'
18
+ page = BadLinkFinder::Page.new(site_mirror, path)
19
+ BadLinkFinder::PageChecker.new('http://www.example.com/', page, BadLinkFinder::ResultCache.new)
20
+ end
21
+
22
+ end
@@ -0,0 +1,50 @@
1
+ require 'test_helper'
2
+ require 'bad_link_finder/page'
3
+
4
+ describe BadLinkFinder::Page do
5
+
6
+ it "strips index.html and .html from the page path" do
7
+ assert_equal '', build_page('index.html').path.to_s
8
+ assert_equal 'example/', build_page('example/index.html').path.to_s
9
+ assert_equal 'example/relative-example', build_page('example/relative-example.html').path.to_s
10
+ end
11
+
12
+ it "finds absolute paths, stripping index.html and .html" do
13
+ assert_equal ['/example/'], build_page('index.html').links.map(&:to_s)
14
+ end
15
+
16
+ it "finds relative paths, stripping index.html and .html" do
17
+ assert build_page('example/index.html').links.map(&:to_s).include?('relative-example')
18
+ end
19
+
20
+ it "finds and preserves external URLs" do
21
+ assert build_page('example/index.html').links.map(&:to_s).include?('https://www.example.net/external-example.html')
22
+ end
23
+
24
+ it "preserves params and anchors on internal links" do
25
+ page = build_page('example/relative-example.html')
26
+ assert page.links.map(&:to_s).include?('/example/?test=true&redirect=http://www.example.com/in-param-url/index.html#section-1')
27
+ end
28
+
29
+ it "includes links with empty href" do
30
+ assert build_page('example/relative-example.html').links.map(&:to_s).include?('')
31
+ end
32
+
33
+ it "excludes links with no href" do
34
+ refute build_page('example/relative-example.html').links.include?(nil)
35
+ end
36
+
37
+ it "excludes links with an href containing only an anchor reference" do
38
+ refute build_page('example/relative-example.html').links.map(&:to_s).include?('#section-2')
39
+ end
40
+
41
+ it "excludes mailto links" do
42
+ refute build_page('example/relative-example.html').links.map(&:to_s).include?('mailto:test@example.com')
43
+ end
44
+
45
+ def build_page(path)
46
+ site_mirror = FIXTURES_ROOT+'www.example.com'
47
+ BadLinkFinder::Page.new(site_mirror, path)
48
+ end
49
+
50
+ end
@@ -0,0 +1,38 @@
1
+ require 'test_helper'
2
+ require 'bad_link_finder/result_cache'
3
+
4
+ describe BadLinkFinder::ResultCache do
5
+
6
+ before do
7
+ @cache = BadLinkFinder::ResultCache.new
8
+ end
9
+
10
+ it "returns a cache hit for URLs which differ only by anchor" do
11
+ @cache.store('http://www.example.com#test123', 'value')
12
+ assert_equal 'value', @cache.fetch('http://www.example.com#test567')
13
+
14
+ @cache.store('http://www.example.com?test=true#test123', 'value')
15
+ assert_equal 'value', @cache.fetch('http://www.example.com?test=true#test567')
16
+
17
+ @cache.store('http://www.example.com?test=true#test123', 'value')
18
+ refute_equal 'value', @cache.fetch('http://www.example.com?test=false#test567')
19
+ end
20
+
21
+ describe "#store" do
22
+ it "returns the item stored" do
23
+ assert_equal 'value', @cache.store('key', 'value')
24
+ end
25
+ end
26
+
27
+ describe "#fetch" do
28
+ it "returns fetched items on a hit" do
29
+ @cache.store('key', 'value')
30
+ assert_equal 'value', @cache.fetch('key')
31
+ end
32
+
33
+ it "returns nil on a miss" do
34
+ assert_nil @cache.fetch('missing-key')
35
+ end
36
+ end
37
+
38
+ end
@@ -0,0 +1,22 @@
1
+ require 'test_helper'
2
+ require 'bad_link_finder/site'
3
+
4
+ describe BadLinkFinder::Site do
5
+
6
+ before do
7
+ @site_mirror = FIXTURES_ROOT+'www.example.com'
8
+ end
9
+
10
+ describe '#each' do
11
+ it "loads all files from a directory and passes on the host" do
12
+ site_map = [
13
+ '',
14
+ 'example/',
15
+ 'example/relative-example'
16
+ ]
17
+
18
+ assert_same_elements site_map, BadLinkFinder::Site.new(@site_mirror).map { |page| page.path.to_s }
19
+ end
20
+ end
21
+
22
+ end
metadata ADDED
@@ -0,0 +1,166 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bad_link_finder
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Elliot Crosby-McCullough
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-11-25 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: mechanize
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '2.7'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '2.7'
27
+ - !ruby/object:Gem::Dependency
28
+ name: nokogiri
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: '1.6'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: '1.6'
41
+ - !ruby/object:Gem::Dependency
42
+ name: minitest
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ! '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: webmock
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ! '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: bundler
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ~>
74
+ - !ruby/object:Gem::Version
75
+ version: '1.3'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ~>
81
+ - !ruby/object:Gem::Version
82
+ version: '1.3'
83
+ - !ruby/object:Gem::Dependency
84
+ name: gem_publisher
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '='
88
+ - !ruby/object:Gem::Version
89
+ version: 1.3.0
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - '='
95
+ - !ruby/object:Gem::Version
96
+ version: 1.3.0
97
+ description: Crawls a static site mirror testing all links. Reports links which don't
98
+ return 200 or redirect to a 200.
99
+ email:
100
+ - elliot.cm@gmail.com
101
+ executables:
102
+ - bad_link_finder
103
+ extensions: []
104
+ extra_rdoc_files: []
105
+ files:
106
+ - bin/bad_link_finder
107
+ - lib/bad_link_finder/csv_builder.rb
108
+ - lib/bad_link_finder/link.rb
109
+ - lib/bad_link_finder/page.rb
110
+ - lib/bad_link_finder/page_checker.rb
111
+ - lib/bad_link_finder/result_cache.rb
112
+ - lib/bad_link_finder/site.rb
113
+ - lib/bad_link_finder/site_checker.rb
114
+ - lib/bad_link_finder/version.rb
115
+ - lib/bad_link_finder.rb
116
+ - README.md
117
+ - LICENCE.txt
118
+ - test/fixtures/www.example.com/example/index.html
119
+ - test/fixtures/www.example.com/example/relative-example.html
120
+ - test/fixtures/www.example.com/index.html
121
+ - test/integration/bad_link_finder_test.rb
122
+ - test/support/matchers.rb
123
+ - test/test_helper.rb
124
+ - test/unit/csv_builder_test.rb
125
+ - test/unit/link_test.rb
126
+ - test/unit/page_checker_test.rb
127
+ - test/unit/page_test.rb
128
+ - test/unit/result_cache_test.rb
129
+ - test/unit/site_test.rb
130
+ homepage: http://github.com/alphagov/bad_link_finder
131
+ licenses:
132
+ - MIT
133
+ metadata: {}
134
+ post_install_message:
135
+ rdoc_options: []
136
+ require_paths:
137
+ - lib
138
+ required_ruby_version: !ruby/object:Gem::Requirement
139
+ requirements:
140
+ - - ! '>='
141
+ - !ruby/object:Gem::Version
142
+ version: '0'
143
+ required_rubygems_version: !ruby/object:Gem::Requirement
144
+ requirements:
145
+ - - ! '>='
146
+ - !ruby/object:Gem::Version
147
+ version: 2.1.11
148
+ requirements: []
149
+ rubyforge_project:
150
+ rubygems_version: 2.1.11
151
+ signing_key:
152
+ specification_version: 4
153
+ summary: Tests links in static site mirrors
154
+ test_files:
155
+ - test/fixtures/www.example.com/example/index.html
156
+ - test/fixtures/www.example.com/example/relative-example.html
157
+ - test/fixtures/www.example.com/index.html
158
+ - test/integration/bad_link_finder_test.rb
159
+ - test/support/matchers.rb
160
+ - test/test_helper.rb
161
+ - test/unit/csv_builder_test.rb
162
+ - test/unit/link_test.rb
163
+ - test/unit/page_checker_test.rb
164
+ - test/unit/page_test.rb
165
+ - test/unit/result_cache_test.rb
166
+ - test/unit/site_test.rb