bad_link_finder 0.3.2 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: eb79f23c7fabe10bbd9654a4eadfffde34ed2cac
4
- data.tar.gz: bf8da7c36a356b4bf2281d78a872a897afc2d94a
3
+ metadata.gz: 7336841a6833dbf1369e624892d2c59fc75a782c
4
+ data.tar.gz: 744b7a4514a0ced554df8d9ae5030ad058ac882f
5
5
  SHA512:
6
- metadata.gz: 97e222a53897c800715cec1c16c322213c9c563cde70aa483abda47aaf99e1ae8c8eefa2659969fc4260f9c8cc9999affdac0d2ff89ad18ff5f0de430653b664
7
- data.tar.gz: fb7db07dbf3b04e1804c9e3ea555796fa47734dc2e1c2613bab2d7df3a338c9221dae8ce059a253e1f907217a18fdc5f7fade1e15f6736252b0dd01b07c9b10b
6
+ metadata.gz: f4646a14c3f8960d4cbb38e5aa9cebfe0552b5c0a516b4696b785db99beb393584fb88a3ee963dee6af5b15a82638870ca71aca56beda1eab75cee36b680c4ea
7
+ data.tar.gz: 798a38b30d5874655b16e64df255dccc50878eb4f9d15c8290794edf29794498e5661423556627bbfda5b41a558df9da9931b4d596a012ae40964ecb35f2f806
@@ -1,9 +1,10 @@
1
1
  require 'bad_link_finder/site_checker'
2
2
  require 'bad_link_finder/csv_builder'
3
3
  require 'pathname'
4
+ require 'logger'
4
5
 
5
6
  module BadLinkFinder
6
- def self.run
7
+ def self.run(logger = NullLogger.new)
7
8
  ['MIRROR_DIR', 'REPORT_OUTPUT_FILE', 'SITE_HOST'].each do |var|
8
9
  raise EnvironmentVariableError.new("Missing environment variable #{var}") unless ENV.has_key?(var)
9
10
  end
@@ -16,7 +17,7 @@ module BadLinkFinder
16
17
  csv_file = report_path.open('w')
17
18
  csv_builder = BadLinkFinder::CSVBuilder.new(csv_file)
18
19
 
19
- BadLinkFinder::SiteChecker.new(ENV['MIRROR_DIR'], ENV['SITE_HOST'], csv_builder, ENV['START_FROM']).run
20
+ BadLinkFinder::SiteChecker.new(ENV['MIRROR_DIR'], ENV['SITE_HOST'], csv_builder, ENV['START_FROM'], logger).run
20
21
 
21
22
  csv_file.close
22
23
 
@@ -24,4 +25,12 @@ module BadLinkFinder
24
25
  end
25
26
 
26
27
  class EnvironmentVariableError < ArgumentError; end
28
+
29
+ class NullLogger < Logger
30
+ def initialize(*args)
31
+ end
32
+
33
+ def add(*args, &block)
34
+ end
35
+ end
27
36
  end
@@ -5,7 +5,8 @@ module BadLinkFinder
5
5
  class Link
6
6
  attr_reader :link, :url, :error_message, :exception
7
7
 
8
- def initialize(page_url, link)
8
+ def initialize(page_url, link, logger = BadLinkFinder::NullLogger.new)
9
+ @logger = logger
9
10
  @page_url = page_url
10
11
  @link = link
11
12
  @url = get_url_from_link(link)
@@ -45,7 +46,7 @@ module BadLinkFinder
45
46
  protected
46
47
 
47
48
  def validate_with_request
48
- puts "-- testing link #{@link} using #{@url}"
49
+ @logger.info "-- testing link #{@link} using #{@url}"
49
50
  sleep 0.1 # Recommended pause for gov.uk rate limiting
50
51
 
51
52
  browser = Mechanize.new
@@ -76,7 +77,7 @@ module BadLinkFinder
76
77
  @error_message = message
77
78
  @exception = exception
78
79
 
79
- puts "---- found broken link #{@url}: #{message}: #{exception.message if exception}"
80
+ @logger.info "---- found broken link #{@url}: #{message}: #{exception.message if exception}"
80
81
  end
81
82
  end
82
83
  end
@@ -2,28 +2,24 @@ require 'bad_link_finder/link'
2
2
 
3
3
  module BadLinkFinder
4
4
  class PageChecker
5
- def initialize(host, page, result_cache)
5
+ def initialize(host, page, result_cache, logger = BadLinkFinder::NullLogger.new)
6
6
  host = host.chomp('/') + '/'
7
7
  @page = page
8
8
  @page_url = URI.join(host, page.path).to_s
9
9
  @result_cache = result_cache
10
+ @logger = logger
10
11
  end
11
12
 
12
13
  attr_reader :page_url
13
14
 
14
- def each_bad_link(&block)
15
- if @bad_links
16
- @bad_links.each(&block)
17
- else
18
- @bad_links = @page.links.map do |raw_link|
19
- link = @result_cache.fetch(raw_link) || @result_cache.store(raw_link, BadLinkFinder::Link.new(@page_url, raw_link))
15
+ def bad_links
16
+ @bad_links ||= @page.links.map { |link| fetch_or_build(link) }.reject(&:valid?)
17
+ end
18
+
19
+ private
20
20
 
21
- unless link.valid?
22
- yield link
23
- next link
24
- end
25
- end.compact
26
- end
21
+ def fetch_or_build(link)
22
+ @result_cache.fetch(link) || @result_cache.store(link, BadLinkFinder::Link.new(@page_url, link, @logger))
27
23
  end
28
24
  end
29
25
  end
@@ -4,20 +4,21 @@ require 'bad_link_finder/page_checker'
4
4
 
5
5
  module BadLinkFinder
6
6
  class SiteChecker
7
- def initialize(mirror_dir, host, csv_builder, start_from = nil)
7
+ def initialize(mirror_dir, host, csv_builder, start_from = nil, logger = BadLinkFinder::NullLogger.new)
8
8
  @mirror_dir = File.expand_path(mirror_dir)
9
9
  @host = host
10
10
  @csv_builder = csv_builder
11
11
  @start_from = start_from
12
12
  @result_cache = BadLinkFinder::ResultCache.new
13
+ @logger = logger
13
14
  end
14
15
 
15
16
  def run
16
17
  BadLinkFinder::Site.new(@mirror_dir, @start_from).each do |page|
17
- page_checker = BadLinkFinder::PageChecker.new(@host, page, @result_cache)
18
- puts "Checking page #{page.path} as #{page_checker.page_url}"
18
+ page_checker = BadLinkFinder::PageChecker.new(@host, page, @result_cache, @logger)
19
+ @logger.info "Checking page #{page.path} as #{page_checker.page_url}"
19
20
 
20
- page_checker.each_bad_link do |link|
21
+ page_checker.bad_links.each do |link|
21
22
  @csv_builder << {
22
23
  url: page_checker.page_url,
23
24
  id: page.id,
@@ -1,3 +1,3 @@
1
1
  module BadLinkFinder
2
- VERSION = "0.3.2"
2
+ VERSION = "0.3.3"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bad_link_finder
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.3.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elliot Crosby-McCullough
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-03-31 00:00:00.000000000 Z
11
+ date: 2014-05-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: mechanize