bad_link_finder 0.3.2 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/bad_link_finder.rb +11 -2
- data/lib/bad_link_finder/link.rb +4 -3
- data/lib/bad_link_finder/page_checker.rb +9 -13
- data/lib/bad_link_finder/site_checker.rb +5 -4
- data/lib/bad_link_finder/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7336841a6833dbf1369e624892d2c59fc75a782c
|
4
|
+
data.tar.gz: 744b7a4514a0ced554df8d9ae5030ad058ac882f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f4646a14c3f8960d4cbb38e5aa9cebfe0552b5c0a516b4696b785db99beb393584fb88a3ee963dee6af5b15a82638870ca71aca56beda1eab75cee36b680c4ea
|
7
|
+
data.tar.gz: 798a38b30d5874655b16e64df255dccc50878eb4f9d15c8290794edf29794498e5661423556627bbfda5b41a558df9da9931b4d596a012ae40964ecb35f2f806
|
data/lib/bad_link_finder.rb
CHANGED
@@ -1,9 +1,10 @@
|
|
1
1
|
require 'bad_link_finder/site_checker'
|
2
2
|
require 'bad_link_finder/csv_builder'
|
3
3
|
require 'pathname'
|
4
|
+
require 'logger'
|
4
5
|
|
5
6
|
module BadLinkFinder
|
6
|
-
def self.run
|
7
|
+
def self.run(logger = NullLogger.new)
|
7
8
|
['MIRROR_DIR', 'REPORT_OUTPUT_FILE', 'SITE_HOST'].each do |var|
|
8
9
|
raise EnvironmentVariableError.new("Missing environment variable #{var}") unless ENV.has_key?(var)
|
9
10
|
end
|
@@ -16,7 +17,7 @@ module BadLinkFinder
|
|
16
17
|
csv_file = report_path.open('w')
|
17
18
|
csv_builder = BadLinkFinder::CSVBuilder.new(csv_file)
|
18
19
|
|
19
|
-
BadLinkFinder::SiteChecker.new(ENV['MIRROR_DIR'], ENV['SITE_HOST'], csv_builder, ENV['START_FROM']).run
|
20
|
+
BadLinkFinder::SiteChecker.new(ENV['MIRROR_DIR'], ENV['SITE_HOST'], csv_builder, ENV['START_FROM'], logger).run
|
20
21
|
|
21
22
|
csv_file.close
|
22
23
|
|
@@ -24,4 +25,12 @@ module BadLinkFinder
|
|
24
25
|
end
|
25
26
|
|
26
27
|
class EnvironmentVariableError < ArgumentError; end
|
28
|
+
|
29
|
+
class NullLogger < Logger
|
30
|
+
def initialize(*args)
|
31
|
+
end
|
32
|
+
|
33
|
+
def add(*args, &block)
|
34
|
+
end
|
35
|
+
end
|
27
36
|
end
|
data/lib/bad_link_finder/link.rb
CHANGED
@@ -5,7 +5,8 @@ module BadLinkFinder
|
|
5
5
|
class Link
|
6
6
|
attr_reader :link, :url, :error_message, :exception
|
7
7
|
|
8
|
-
def initialize(page_url, link)
|
8
|
+
def initialize(page_url, link, logger = BadLinkFinder::NullLogger.new)
|
9
|
+
@logger = logger
|
9
10
|
@page_url = page_url
|
10
11
|
@link = link
|
11
12
|
@url = get_url_from_link(link)
|
@@ -45,7 +46,7 @@ module BadLinkFinder
|
|
45
46
|
protected
|
46
47
|
|
47
48
|
def validate_with_request
|
48
|
-
|
49
|
+
@logger.info "-- testing link #{@link} using #{@url}"
|
49
50
|
sleep 0.1 # Recommended pause for gov.uk rate limiting
|
50
51
|
|
51
52
|
browser = Mechanize.new
|
@@ -76,7 +77,7 @@ module BadLinkFinder
|
|
76
77
|
@error_message = message
|
77
78
|
@exception = exception
|
78
79
|
|
79
|
-
|
80
|
+
@logger.info "---- found broken link #{@url}: #{message}: #{exception.message if exception}"
|
80
81
|
end
|
81
82
|
end
|
82
83
|
end
|
@@ -2,28 +2,24 @@ require 'bad_link_finder/link'
|
|
2
2
|
|
3
3
|
module BadLinkFinder
|
4
4
|
class PageChecker
|
5
|
-
def initialize(host, page, result_cache)
|
5
|
+
def initialize(host, page, result_cache, logger = BadLinkFinder::NullLogger.new)
|
6
6
|
host = host.chomp('/') + '/'
|
7
7
|
@page = page
|
8
8
|
@page_url = URI.join(host, page.path).to_s
|
9
9
|
@result_cache = result_cache
|
10
|
+
@logger = logger
|
10
11
|
end
|
11
12
|
|
12
13
|
attr_reader :page_url
|
13
14
|
|
14
|
-
def
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
link = @result_cache.fetch(raw_link) || @result_cache.store(raw_link, BadLinkFinder::Link.new(@page_url, raw_link))
|
15
|
+
def bad_links
|
16
|
+
@bad_links ||= @page.links.map { |link| fetch_or_build(link) }.reject(&:valid?)
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
20
|
|
21
|
-
|
22
|
-
|
23
|
-
next link
|
24
|
-
end
|
25
|
-
end.compact
|
26
|
-
end
|
21
|
+
def fetch_or_build(link)
|
22
|
+
@result_cache.fetch(link) || @result_cache.store(link, BadLinkFinder::Link.new(@page_url, link, @logger))
|
27
23
|
end
|
28
24
|
end
|
29
25
|
end
|
@@ -4,20 +4,21 @@ require 'bad_link_finder/page_checker'
|
|
4
4
|
|
5
5
|
module BadLinkFinder
|
6
6
|
class SiteChecker
|
7
|
-
def initialize(mirror_dir, host, csv_builder, start_from = nil)
|
7
|
+
def initialize(mirror_dir, host, csv_builder, start_from = nil, logger = BadLinkFinder::NullLogger.new)
|
8
8
|
@mirror_dir = File.expand_path(mirror_dir)
|
9
9
|
@host = host
|
10
10
|
@csv_builder = csv_builder
|
11
11
|
@start_from = start_from
|
12
12
|
@result_cache = BadLinkFinder::ResultCache.new
|
13
|
+
@logger = logger
|
13
14
|
end
|
14
15
|
|
15
16
|
def run
|
16
17
|
BadLinkFinder::Site.new(@mirror_dir, @start_from).each do |page|
|
17
|
-
page_checker = BadLinkFinder::PageChecker.new(@host, page, @result_cache)
|
18
|
-
|
18
|
+
page_checker = BadLinkFinder::PageChecker.new(@host, page, @result_cache, @logger)
|
19
|
+
@logger.info "Checking page #{page.path} as #{page_checker.page_url}"
|
19
20
|
|
20
|
-
page_checker.
|
21
|
+
page_checker.bad_links.each do |link|
|
21
22
|
@csv_builder << {
|
22
23
|
url: page_checker.page_url,
|
23
24
|
id: page.id,
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bad_link_finder
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Elliot Crosby-McCullough
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-05-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: mechanize
|