bad_link_finder 0.3.2 → 0.3.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/bad_link_finder.rb +11 -2
- data/lib/bad_link_finder/link.rb +4 -3
- data/lib/bad_link_finder/page_checker.rb +9 -13
- data/lib/bad_link_finder/site_checker.rb +5 -4
- data/lib/bad_link_finder/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7336841a6833dbf1369e624892d2c59fc75a782c
|
4
|
+
data.tar.gz: 744b7a4514a0ced554df8d9ae5030ad058ac882f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f4646a14c3f8960d4cbb38e5aa9cebfe0552b5c0a516b4696b785db99beb393584fb88a3ee963dee6af5b15a82638870ca71aca56beda1eab75cee36b680c4ea
|
7
|
+
data.tar.gz: 798a38b30d5874655b16e64df255dccc50878eb4f9d15c8290794edf29794498e5661423556627bbfda5b41a558df9da9931b4d596a012ae40964ecb35f2f806
|
data/lib/bad_link_finder.rb
CHANGED
@@ -1,9 +1,10 @@
|
|
1
1
|
require 'bad_link_finder/site_checker'
|
2
2
|
require 'bad_link_finder/csv_builder'
|
3
3
|
require 'pathname'
|
4
|
+
require 'logger'
|
4
5
|
|
5
6
|
module BadLinkFinder
|
6
|
-
def self.run
|
7
|
+
def self.run(logger = NullLogger.new)
|
7
8
|
['MIRROR_DIR', 'REPORT_OUTPUT_FILE', 'SITE_HOST'].each do |var|
|
8
9
|
raise EnvironmentVariableError.new("Missing environment variable #{var}") unless ENV.has_key?(var)
|
9
10
|
end
|
@@ -16,7 +17,7 @@ module BadLinkFinder
|
|
16
17
|
csv_file = report_path.open('w')
|
17
18
|
csv_builder = BadLinkFinder::CSVBuilder.new(csv_file)
|
18
19
|
|
19
|
-
BadLinkFinder::SiteChecker.new(ENV['MIRROR_DIR'], ENV['SITE_HOST'], csv_builder, ENV['START_FROM']).run
|
20
|
+
BadLinkFinder::SiteChecker.new(ENV['MIRROR_DIR'], ENV['SITE_HOST'], csv_builder, ENV['START_FROM'], logger).run
|
20
21
|
|
21
22
|
csv_file.close
|
22
23
|
|
@@ -24,4 +25,12 @@ module BadLinkFinder
|
|
24
25
|
end
|
25
26
|
|
26
27
|
class EnvironmentVariableError < ArgumentError; end
|
28
|
+
|
29
|
+
class NullLogger < Logger
|
30
|
+
def initialize(*args)
|
31
|
+
end
|
32
|
+
|
33
|
+
def add(*args, &block)
|
34
|
+
end
|
35
|
+
end
|
27
36
|
end
|
data/lib/bad_link_finder/link.rb
CHANGED
@@ -5,7 +5,8 @@ module BadLinkFinder
|
|
5
5
|
class Link
|
6
6
|
attr_reader :link, :url, :error_message, :exception
|
7
7
|
|
8
|
-
def initialize(page_url, link)
|
8
|
+
def initialize(page_url, link, logger = BadLinkFinder::NullLogger.new)
|
9
|
+
@logger = logger
|
9
10
|
@page_url = page_url
|
10
11
|
@link = link
|
11
12
|
@url = get_url_from_link(link)
|
@@ -45,7 +46,7 @@ module BadLinkFinder
|
|
45
46
|
protected
|
46
47
|
|
47
48
|
def validate_with_request
|
48
|
-
|
49
|
+
@logger.info "-- testing link #{@link} using #{@url}"
|
49
50
|
sleep 0.1 # Recommended pause for gov.uk rate limiting
|
50
51
|
|
51
52
|
browser = Mechanize.new
|
@@ -76,7 +77,7 @@ module BadLinkFinder
|
|
76
77
|
@error_message = message
|
77
78
|
@exception = exception
|
78
79
|
|
79
|
-
|
80
|
+
@logger.info "---- found broken link #{@url}: #{message}: #{exception.message if exception}"
|
80
81
|
end
|
81
82
|
end
|
82
83
|
end
|
@@ -2,28 +2,24 @@ require 'bad_link_finder/link'
|
|
2
2
|
|
3
3
|
module BadLinkFinder
|
4
4
|
class PageChecker
|
5
|
-
def initialize(host, page, result_cache)
|
5
|
+
def initialize(host, page, result_cache, logger = BadLinkFinder::NullLogger.new)
|
6
6
|
host = host.chomp('/') + '/'
|
7
7
|
@page = page
|
8
8
|
@page_url = URI.join(host, page.path).to_s
|
9
9
|
@result_cache = result_cache
|
10
|
+
@logger = logger
|
10
11
|
end
|
11
12
|
|
12
13
|
attr_reader :page_url
|
13
14
|
|
14
|
-
def
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
link = @result_cache.fetch(raw_link) || @result_cache.store(raw_link, BadLinkFinder::Link.new(@page_url, raw_link))
|
15
|
+
def bad_links
|
16
|
+
@bad_links ||= @page.links.map { |link| fetch_or_build(link) }.reject(&:valid?)
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
20
|
|
21
|
-
|
22
|
-
|
23
|
-
next link
|
24
|
-
end
|
25
|
-
end.compact
|
26
|
-
end
|
21
|
+
def fetch_or_build(link)
|
22
|
+
@result_cache.fetch(link) || @result_cache.store(link, BadLinkFinder::Link.new(@page_url, link, @logger))
|
27
23
|
end
|
28
24
|
end
|
29
25
|
end
|
@@ -4,20 +4,21 @@ require 'bad_link_finder/page_checker'
|
|
4
4
|
|
5
5
|
module BadLinkFinder
|
6
6
|
class SiteChecker
|
7
|
-
def initialize(mirror_dir, host, csv_builder, start_from = nil)
|
7
|
+
def initialize(mirror_dir, host, csv_builder, start_from = nil, logger = BadLinkFinder::NullLogger.new)
|
8
8
|
@mirror_dir = File.expand_path(mirror_dir)
|
9
9
|
@host = host
|
10
10
|
@csv_builder = csv_builder
|
11
11
|
@start_from = start_from
|
12
12
|
@result_cache = BadLinkFinder::ResultCache.new
|
13
|
+
@logger = logger
|
13
14
|
end
|
14
15
|
|
15
16
|
def run
|
16
17
|
BadLinkFinder::Site.new(@mirror_dir, @start_from).each do |page|
|
17
|
-
page_checker = BadLinkFinder::PageChecker.new(@host, page, @result_cache)
|
18
|
-
|
18
|
+
page_checker = BadLinkFinder::PageChecker.new(@host, page, @result_cache, @logger)
|
19
|
+
@logger.info "Checking page #{page.path} as #{page_checker.page_url}"
|
19
20
|
|
20
|
-
page_checker.
|
21
|
+
page_checker.bad_links.each do |link|
|
21
22
|
@csv_builder << {
|
22
23
|
url: page_checker.page_url,
|
23
24
|
id: page.id,
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bad_link_finder
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Elliot Crosby-McCullough
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-05-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: mechanize
|