html-proofer 2.5.2 → 2.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/htmlproof +15 -22
- data/lib/html/proofer.rb +21 -38
- data/lib/html/proofer/cache.rb +132 -7
- data/lib/html/proofer/check_runner.rb +2 -1
- data/lib/html/proofer/checkable.rb +4 -0
- data/lib/html/proofer/checks/links.rb +1 -0
- data/lib/html/proofer/configuration.rb +48 -0
- data/lib/html/proofer/url_validator.rb +50 -7
- data/lib/html/proofer/utils.rb +22 -1
- data/lib/html/proofer/version.rb +1 -1
- metadata +32 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 904c91c8694ab71a3722677bb5e8be2c78074503
|
4
|
+
data.tar.gz: 8e8c720d05ac809b4b5628711a40516a88be8dfa
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e72778e7edd2f302a91b6727d4825ed64cc506cf473bc9575d0d79051ed39f56fca0bff8b79cba268c28a68b796f1c0495460e4c420d1f7f16439f84e7a94325
|
7
|
+
data.tar.gz: dd974ec72bf547882f85e59b51223b5ed2c54688288cf5265c34c01b872236daa4c1127c61ef4e25882f4dd340b74dc7c88370c31fcd603853b74f352b0e5213
|
data/bin/htmlproof
CHANGED
@@ -5,15 +5,6 @@ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), *%w( .. lib ))
|
|
5
5
|
|
6
6
|
require 'html/proofer'
|
7
7
|
require 'mercenary'
|
8
|
-
require 'rubygems'
|
9
|
-
|
10
|
-
def to_regex?(item)
|
11
|
-
if item.start_with?('/') && item.end_with?('/')
|
12
|
-
Regexp.new item[1...-1]
|
13
|
-
else
|
14
|
-
item
|
15
|
-
end
|
16
|
-
end
|
17
8
|
|
18
9
|
Mercenary.program(:htmlproof) do |p|
|
19
10
|
p.version HTML::Proofer::VERSION
|
@@ -22,25 +13,27 @@ Mercenary.program(:htmlproof) do |p|
|
|
22
13
|
|
23
14
|
p.description 'Runs the HTML-Proofer suite on the files in PATH. For more details, see the README.'
|
24
15
|
|
16
|
+
p.option 'allow_hash_href', '--allow-hash-href', 'If `true`, ignores the `href` `#`'
|
25
17
|
p.option 'as_links', '--as-links', 'Assumes that `PATH` is a comma-separated array of links to check.'
|
26
|
-
p.option 'alt_ignore', '--alt-ignore image1,[image2,...]', Array, '
|
27
|
-
p.option 'empty_alt_ignore', '--empty-alt-ignore', 'Ignores images with empty alt tags.'
|
18
|
+
p.option 'alt_ignore', '--alt-ignore image1,[image2,...]', Array, 'A comma-separated list of Strings or RegExps containing `img`s whose missing `alt` tags are safe to ignore'
|
28
19
|
p.option 'checks_to_ignore', '--checks-to-ignore check1,[check2,...]', Array, ' An array of Strings indicating which checks you\'d like to not perform.'
|
29
20
|
p.option 'check_external_hash', '--check-external-hash', 'Checks whether external hashes exist (even if the website exists). This slows the checker down (default: `false`).'
|
30
21
|
p.option 'check_favicon', '--check-favicon', 'Enables the favicon checker (default: `false`).'
|
31
22
|
p.option 'check_html', '--check-html', 'Enables HTML validation errors from Nokogiri (default: `false`).'
|
32
23
|
p.option 'directory_index_file', '--directory-index-file', String, 'Sets the file to look for when a link refers to a directory. (default: `index.html`)'
|
33
|
-
p.option 'disable_external', '--disable-external', '
|
34
|
-
p.option '
|
24
|
+
p.option 'disable_external', '--disable-external', 'If `true`, does not run the external link checker, which can take a lot of time (default: `false`)'
|
25
|
+
p.option 'empty_alt_ignore', '--empty-alt-ignore', 'If `true`, ignores images with empty alt tags'
|
26
|
+
p.option 'error_sort', '--error-sort SORT', 'Defines the sort order for error output. Can be `:path`, `:desc`, or `:status` (default: `path`).'
|
35
27
|
p.option 'enforce_https', '--enforce-https', 'Fails a link if it\'s not marked as `https` (default: `false`).'
|
36
|
-
p.option 'ext', '--ext EXT', String, 'The extension of your HTML files (default: `.html`)'
|
37
|
-
p.option '
|
38
|
-
p.option '
|
39
|
-
p.option '
|
28
|
+
p.option 'ext', '--ext EXT', String, 'The extension of your HTML files including the dot. (default: `.html`)'
|
29
|
+
p.option 'external_only', '--external_only', 'Only checks problems with external references'
|
30
|
+
p.option 'file_ignore', '--file-ignore file1,[file2,...]', Array, 'A comma-separated list of Strings or RegExps containing file paths that are safe to ignore'
|
31
|
+
p.option 'href_ignore', '--href-ignore link1,[link2,...]', Array, 'A comma-separated list of Strings or RegExps containing `href`s that are safe to ignore. Note that non-HTTP(S) URIs are always ignored. **Will be renamed in a future release.**'
|
32
|
+
p.option 'href_swap', '--href-swap re:string,[re:string,...]', Array, 'A comma-separated list containing key-value pairs of `RegExp => String`. It transforms links that match `RegExp` into `String` via `gsub`. **Will be renamed in a future release.**'
|
40
33
|
p.option 'ignore_script_embeds', '--ignore-script-embeds', 'Ignore `check_html` errors associated with `script`s (default: `false`)'
|
41
|
-
p.option 'only_4xx', '--only-4xx', 'Only reports errors for links that fall within the
|
42
|
-
p.option 'url_ignore', '--url-ignore link1,[link2,...]', Array, '
|
43
|
-
p.option 'verbose', '--verbose', '
|
34
|
+
p.option 'only_4xx', '--only-4xx', 'Only reports errors for links that fall within the 4xx status code range'
|
35
|
+
p.option 'url_ignore', '--url-ignore link1,[link2,...]', Array, 'A comma-separated list of Strings or RegExps containing URLs that are safe to ignore. It affects all HTML attributes. Note that non-HTTP(S) URIs are always ignored'
|
36
|
+
p.option 'verbose', '--verbose', 'If `true`, outputs extra information as the checking happens. Useful for debugging. **Will be deprecated in a future release.**'
|
44
37
|
p.option 'verbosity', '--verbosity', String, 'Sets the logging level, as determined by Yell'
|
45
38
|
|
46
39
|
p.action do |args, opts|
|
@@ -52,7 +45,7 @@ Mercenary.program(:htmlproof) do |p|
|
|
52
45
|
# prepare everything to go to proofer
|
53
46
|
p.options.select { |o| !opts[o.config_key].nil? }.each do |option|
|
54
47
|
if option.return_type.to_s == 'Array' # TODO: is_a? doesn't work here?
|
55
|
-
opts[option.config_key] = opts[option.config_key].map { |i| to_regex?(i) }
|
48
|
+
opts[option.config_key] = opts[option.config_key].map { |i| HTML::Proofer::Configuration.to_regex?(i) }
|
56
49
|
end
|
57
50
|
options[option.config_key.to_sym] = opts[option.config_key]
|
58
51
|
end
|
@@ -68,7 +61,7 @@ Mercenary.program(:htmlproof) do |p|
|
|
68
61
|
|
69
62
|
# check for ignore_scripts_embeds as it should be set in :validation
|
70
63
|
unless opts['ignore_script_embeds'].nil?
|
71
|
-
|
64
|
+
options[:validation] = { :ignore_script_embeds => true }
|
72
65
|
end
|
73
66
|
|
74
67
|
options[:error_sort] = opts['error-sort'].to_sym unless opts['error-sort'].nil?
|
data/lib/html/proofer.rb
CHANGED
@@ -8,30 +8,23 @@ end
|
|
8
8
|
require_all 'proofer'
|
9
9
|
require_all 'proofer/check_runner'
|
10
10
|
require_all 'proofer/checks'
|
11
|
-
require_relative './proofer/utils'
|
12
|
-
require_relative './proofer/xpathfunctions'
|
13
11
|
|
14
12
|
require 'parallel'
|
13
|
+
require 'fileutils'
|
15
14
|
|
16
15
|
begin
|
17
16
|
require 'awesome_print'
|
18
17
|
rescue LoadError; end
|
19
18
|
|
20
19
|
module HTML
|
21
|
-
|
22
20
|
class Proofer
|
23
21
|
include HTML::Proofer::Utils
|
24
22
|
|
25
23
|
attr_reader :options, :typhoeus_opts, :hydra_opts, :parallel_opts, :validation_opts, :external_urls, :iterable_external_urls
|
26
24
|
|
27
|
-
TYPHOEUS_DEFAULTS = {
|
28
|
-
:followlocation => true,
|
29
|
-
:headers => {
|
30
|
-
'User-Agent' => "Mozilla/5.0 (compatible; HTML Proofer/#{VERSION}; +https://github.com/gjtorikian/html-proofer)"
|
31
|
-
}
|
32
|
-
}
|
33
|
-
|
34
25
|
def initialize(src, opts = {})
|
26
|
+
FileUtils.mkdir_p(STORAGE_DIR) unless File.exist?(STORAGE_DIR)
|
27
|
+
|
35
28
|
@src = src
|
36
29
|
|
37
30
|
if opts[:verbose]
|
@@ -41,30 +34,12 @@ module HTML
|
|
41
34
|
warn '`@options[:href_ignore]` will be renamed in a future 3.x.x release: http://git.io/vGHHy'
|
42
35
|
end
|
43
36
|
|
44
|
-
@proofer_opts =
|
45
|
-
|
46
|
-
|
47
|
-
:href_swap => [],
|
48
|
-
:href_ignore => [],
|
49
|
-
:file_ignore => [],
|
50
|
-
:url_ignore => [],
|
51
|
-
:check_external_hash => false,
|
52
|
-
:alt_ignore => [],
|
53
|
-
:empty_alt_ignore => false,
|
54
|
-
:enforce_https => false,
|
55
|
-
:disable_external => false,
|
56
|
-
:verbose => false,
|
57
|
-
:only_4xx => false,
|
58
|
-
:directory_index_file => 'index.html',
|
59
|
-
:check_html => false,
|
60
|
-
:error_sort => :path,
|
61
|
-
:checks_to_ignore => []
|
62
|
-
}
|
63
|
-
|
64
|
-
@typhoeus_opts = TYPHOEUS_DEFAULTS.merge(opts[:typhoeus] || {})
|
37
|
+
@proofer_opts = HTML::Proofer::Configuration::PROOFER_DEFAULTS
|
38
|
+
|
39
|
+
@typhoeus_opts = HTML::Proofer::Configuration::TYPHOEUS_DEFAULTS.merge(opts[:typhoeus] || {})
|
65
40
|
opts.delete(:typhoeus)
|
66
41
|
|
67
|
-
@hydra_opts = opts[:hydra] || {}
|
42
|
+
@hydra_opts = HTML::Proofer::Configuration::HYDRA_DEFAULTS.merge(opts[:hydra] || {})
|
68
43
|
opts.delete(:hydra)
|
69
44
|
|
70
45
|
# fall back to parallel defaults
|
@@ -84,9 +59,7 @@ module HTML
|
|
84
59
|
end
|
85
60
|
|
86
61
|
def run
|
87
|
-
|
88
|
-
check_text = "#{checks} " << (count == 1 ? 'check' : 'checks')
|
89
|
-
logger.log :info, :blue, "Running #{check_text} on #{@src} on *#{@options[:ext]}... \n\n"
|
62
|
+
logger.log :info, :blue, "Running #{checks} on #{@src} on *#{@options[:ext]}... \n\n"
|
90
63
|
|
91
64
|
if @src.is_a?(Array) && !@options[:disable_external]
|
92
65
|
check_list_of_links
|
@@ -123,9 +96,19 @@ module HTML
|
|
123
96
|
@failed_tests.concat(item[:failed_tests])
|
124
97
|
end
|
125
98
|
|
126
|
-
|
99
|
+
# TODO: lazy. if we're checking only external links,
|
100
|
+
# we'll just trash all the failed tests. really, we should
|
101
|
+
# just not run those other checks at all.
|
102
|
+
if @options[:external_only]
|
103
|
+
@failed_tests = []
|
104
|
+
validate_urls
|
105
|
+
elsif !@options[:disable_external]
|
106
|
+
validate_urls
|
107
|
+
end
|
127
108
|
|
128
|
-
|
109
|
+
count = files.length
|
110
|
+
file_text = pluralize(count, 'file', 'files')
|
111
|
+
logger.log :info, :blue, "Ran on #{file_text}!\n\n"
|
129
112
|
end
|
130
113
|
|
131
114
|
# Walks over each implemented check and runs them on the files, in parallel.
|
@@ -195,7 +178,7 @@ module HTML
|
|
195
178
|
|
196
179
|
sorted_failures.sort_and_report
|
197
180
|
count = @failed_tests.length
|
198
|
-
failure_text =
|
181
|
+
failure_text = pluralize(count, 'failure', 'failures')
|
199
182
|
fail logger.colorize :red, "HTML-Proofer found #{failure_text}!"
|
200
183
|
end
|
201
184
|
end
|
data/lib/html/proofer/cache.rb
CHANGED
@@ -1,16 +1,141 @@
|
|
1
|
+
require_relative 'utils'
|
2
|
+
|
3
|
+
require 'json'
|
4
|
+
require 'active_support/core_ext/string'
|
5
|
+
require 'active_support/core_ext/date'
|
6
|
+
require 'active_support/core_ext/numeric/time'
|
7
|
+
|
1
8
|
module HTML
|
2
9
|
class Proofer
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
10
|
+
class Cache
|
11
|
+
include HTML::Proofer::Utils
|
12
|
+
|
13
|
+
FILENAME = File.join(STORAGE_DIR, 'cache.log')
|
14
|
+
|
15
|
+
attr_accessor :exists, :load, :cache_log, :cache_time
|
16
|
+
|
17
|
+
def initialize(logger, options)
|
18
|
+
@logger = logger
|
19
|
+
@cache_log = {}
|
20
|
+
|
21
|
+
if options.nil? || options.empty?
|
22
|
+
@load = false
|
23
|
+
else
|
24
|
+
@load = true
|
25
|
+
@parsed_timeframe = parsed_timeframe(options[:timeframe] || '30d')
|
26
|
+
end
|
27
|
+
@cache_time = Time.now
|
28
|
+
|
29
|
+
if File.exist?(FILENAME)
|
30
|
+
@exists = true
|
31
|
+
contents = File.read(FILENAME)
|
32
|
+
@cache_log = contents.empty? ? {} : JSON.parse(contents)
|
33
|
+
else
|
34
|
+
@exists = false
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def within_timeframe?(time)
|
39
|
+
(@parsed_timeframe..@cache_time).cover?(time)
|
40
|
+
end
|
41
|
+
|
42
|
+
def urls
|
43
|
+
@cache_log['urls'] || []
|
44
|
+
end
|
45
|
+
|
46
|
+
def parsed_timeframe(timeframe)
|
47
|
+
time, date = timeframe.match(/(\d+)(\D)/).captures
|
48
|
+
time = time.to_f
|
49
|
+
case date
|
50
|
+
when 'M'
|
51
|
+
time.months.ago
|
52
|
+
when 'w'
|
53
|
+
time.weeks.ago
|
54
|
+
when 'd'
|
55
|
+
time.days.ago
|
56
|
+
when 'h'
|
57
|
+
time.hours.ago
|
7
58
|
else
|
8
|
-
|
59
|
+
fail ArgumentError, "#{date} is not a valid timeframe!"
|
9
60
|
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def add(url, filenames, status, msg = '')
|
64
|
+
data = {
|
65
|
+
:time => @cache_time,
|
66
|
+
:filenames => filenames,
|
67
|
+
:status => status,
|
68
|
+
:message => msg
|
69
|
+
}
|
70
|
+
|
71
|
+
@cache_log[clean_url(url)] = data
|
72
|
+
end
|
73
|
+
|
74
|
+
def detect_url_changes(found)
|
75
|
+
existing_urls = @cache_log.keys.map { |url| clean_url(url) }
|
76
|
+
found_urls = found.keys.map { |url| clean_url(url) }
|
77
|
+
|
78
|
+
# prepare to add new URLs detected
|
79
|
+
additions = found.reject do |url, _|
|
80
|
+
url = clean_url(url)
|
81
|
+
if existing_urls.include?(url)
|
82
|
+
true
|
83
|
+
else
|
84
|
+
@logger.log :debug, :yellow, "Adding #{url} to cache check"
|
85
|
+
false
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
new_link_count = additions.length
|
90
|
+
new_link_text = pluralize(new_link_count, 'link', 'links')
|
91
|
+
@logger.log :info, :blue, "Adding #{new_link_text} to the cache..."
|
92
|
+
|
93
|
+
# remove from cache URLs that no longer exist
|
94
|
+
del = 0
|
95
|
+
@cache_log.delete_if do |url, _|
|
96
|
+
url = clean_url(url)
|
97
|
+
if !found_urls.include?(url)
|
98
|
+
@logger.log :debug, :yellow, "Removing #{url} from cache check"
|
99
|
+
del += 1
|
100
|
+
true
|
101
|
+
else
|
102
|
+
false
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
del_link_text = pluralize(del, 'link', 'links')
|
107
|
+
@logger.log :info, :blue, "Removing #{del_link_text} from the cache..."
|
108
|
+
|
109
|
+
additions
|
110
|
+
end
|
111
|
+
|
112
|
+
def write
|
113
|
+
File.write(FILENAME, @cache_log.to_json)
|
114
|
+
end
|
115
|
+
|
116
|
+
def load?
|
117
|
+
@load.nil?
|
118
|
+
end
|
119
|
+
|
120
|
+
|
121
|
+
# FIXME: there seems to be some discrepenacy where Typhoeus occasionally adds
|
122
|
+
# a trailing slash to URL strings, which causes issues with the cache
|
123
|
+
def slashless_url(url)
|
124
|
+
url.chomp('/')
|
125
|
+
end
|
126
|
+
|
127
|
+
# FIXME: it seems that Typhoeus actually acts on escaped URLs,
|
128
|
+
# but there's no way to get at that information, and the cache
|
129
|
+
# stores unescaped URLs. Because of this, some links, such as
|
130
|
+
# github.com/search/issues?q=is:open+is:issue+fig are not matched
|
131
|
+
# as github.com/search/issues?q=is%3Aopen+is%3Aissue+fig
|
132
|
+
def unescape_url(url)
|
133
|
+
Addressable::URI.unescape(url)
|
134
|
+
end
|
10
135
|
|
11
|
-
|
136
|
+
def clean_url(url)
|
137
|
+
slashless_url(unescape_url(url))
|
12
138
|
end
|
13
|
-
module_function :create_nokogiri
|
14
139
|
end
|
15
140
|
end
|
16
141
|
end
|
@@ -7,7 +7,7 @@ module HTML
|
|
7
7
|
|
8
8
|
attr_reader :issues, :src, :path, :options, :typhoeus_opts, :hydra_opts, :parallel_opts, \
|
9
9
|
:validation_opts, :external_urls, :href_ignores, :url_ignores, :alt_ignores, \
|
10
|
-
:empty_alt_ignore
|
10
|
+
:empty_alt_ignore, :allow_hash_href
|
11
11
|
|
12
12
|
def initialize(src, path, html, options, typhoeus_opts, hydra_opts, parallel_opts, validation_opts)
|
13
13
|
@src = src
|
@@ -23,6 +23,7 @@ module HTML
|
|
23
23
|
@url_ignores = @options[:url_ignore]
|
24
24
|
@alt_ignores = @options[:alt_ignore]
|
25
25
|
@empty_alt_ignore = @options[:empty_alt_ignore]
|
26
|
+
@allow_hash_href = @options[:allow_hash_href]
|
26
27
|
@external_urls = {}
|
27
28
|
end
|
28
29
|
|
@@ -32,6 +32,7 @@ class LinkCheck < ::HTML::Proofer::CheckRunner
|
|
32
32
|
next if link.ignore?
|
33
33
|
next if link.href =~ /^javascript:/ # can't put this in ignore? because the URI does not parse
|
34
34
|
next if link.placeholder?
|
35
|
+
next if link.allow_hash_href? && link.href == '#'
|
35
36
|
|
36
37
|
# is it even a valid URL?
|
37
38
|
unless link.valid?
|
@@ -0,0 +1,48 @@
|
|
1
|
+
module HTML
|
2
|
+
class Proofer
|
3
|
+
module Configuration
|
4
|
+
require_relative 'version'
|
5
|
+
|
6
|
+
PROOFER_DEFAULTS = {
|
7
|
+
:allow_hash_href => false,
|
8
|
+
:alt_ignore => [],
|
9
|
+
:check_external_hash => false,
|
10
|
+
:check_favicon => false,
|
11
|
+
:check_html => false,
|
12
|
+
:checks_to_ignore => [],
|
13
|
+
:directory_index_file => 'index.html',
|
14
|
+
:disable_external => false,
|
15
|
+
:empty_alt_ignore => false,
|
16
|
+
:enforce_https => false,
|
17
|
+
:error_sort => :path,
|
18
|
+
:ext => '.html',
|
19
|
+
:external_only => false,
|
20
|
+
:file_ignore => [],
|
21
|
+
:href_ignore => [],
|
22
|
+
:href_swap => [],
|
23
|
+
:only_4xx => false,
|
24
|
+
:url_ignore => [],
|
25
|
+
:verbose => false
|
26
|
+
}
|
27
|
+
|
28
|
+
TYPHOEUS_DEFAULTS = {
|
29
|
+
:followlocation => true,
|
30
|
+
:headers => {
|
31
|
+
'User-Agent' => "Mozilla/5.0 (compatible; HTML Proofer/#{HTML::Proofer::VERSION}; +https://github.com/gjtorikian/html-proofer)"
|
32
|
+
}
|
33
|
+
}
|
34
|
+
|
35
|
+
HYDRA_DEFAULTS = {
|
36
|
+
:max_concurrency => 50
|
37
|
+
}
|
38
|
+
|
39
|
+
def self.to_regex?(item)
|
40
|
+
if item.start_with?('/') && item.end_with?('/')
|
41
|
+
Regexp.new item[1...-1]
|
42
|
+
else
|
43
|
+
item
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'typhoeus'
|
2
2
|
require 'uri'
|
3
3
|
require_relative './utils'
|
4
|
+
require_relative './cache'
|
4
5
|
|
5
6
|
module HTML
|
6
7
|
class Proofer
|
@@ -18,16 +19,40 @@ module HTML
|
|
18
19
|
@hydra = Typhoeus::Hydra.new(hydra_opts)
|
19
20
|
@typhoeus_opts = typhoeus_opts
|
20
21
|
@external_domain_paths_with_queries = {}
|
22
|
+
@cache = Cache.new(@logger, @options[:cache])
|
21
23
|
end
|
22
24
|
|
23
25
|
def run
|
24
26
|
@iterable_external_urls = remove_query_values
|
25
|
-
|
27
|
+
|
28
|
+
if @cache.exists && @cache.load
|
29
|
+
cache_count = @cache.cache_log.length
|
30
|
+
cache_text = pluralize(cache_count, 'link', 'links')
|
31
|
+
|
32
|
+
logger.log :info, :blue, "Found #{cache_text} in the cache..."
|
33
|
+
|
34
|
+
urls_to_check = @cache.detect_url_changes(@iterable_external_urls)
|
35
|
+
|
36
|
+
@cache.cache_log.each_pair do |url, cache|
|
37
|
+
if @cache.within_timeframe?(cache['time'])
|
38
|
+
next if cache['message'].empty? # these were successes to skip
|
39
|
+
urls_to_check[url] = cache['filenames'] # these are failures to retry
|
40
|
+
else
|
41
|
+
urls_to_check[url] = cache['filenames'] # pass or fail, recheck expired links
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
external_link_checker(urls_to_check)
|
46
|
+
else
|
47
|
+
external_link_checker(@iterable_external_urls)
|
48
|
+
end
|
49
|
+
|
50
|
+
@cache.write
|
26
51
|
@failed_tests
|
27
52
|
end
|
28
53
|
|
29
54
|
def remove_query_values
|
30
|
-
return if @external_urls.nil?
|
55
|
+
return nil if @external_urls.nil?
|
31
56
|
iterable_external_urls = @external_urls.dup
|
32
57
|
@external_urls.keys.each do |url|
|
33
58
|
uri = begin
|
@@ -75,14 +100,16 @@ module HTML
|
|
75
100
|
external_urls = Hash[external_urls.sort]
|
76
101
|
|
77
102
|
count = external_urls.length
|
78
|
-
check_text =
|
103
|
+
check_text = pluralize(count, 'external link', 'external links')
|
79
104
|
logger.log :info, :blue, "Checking #{check_text}..."
|
80
105
|
|
81
106
|
Ethon.logger = logger # log from Typhoeus/Ethon
|
82
107
|
|
83
108
|
url_processor(external_urls)
|
84
109
|
|
85
|
-
logger.log :debug, :yellow, "Running requests for
|
110
|
+
logger.log :debug, :yellow, "Running requests for:"
|
111
|
+
logger.log :debug, :yellow, "###\n" + external_urls.keys.join("\n") + "\n###"
|
112
|
+
|
86
113
|
hydra.run
|
87
114
|
end
|
88
115
|
|
@@ -125,14 +152,19 @@ module HTML
|
|
125
152
|
|
126
153
|
if response_code.between?(200, 299)
|
127
154
|
check_hash_in_2xx_response(href, effective_url, response, filenames)
|
155
|
+
@cache.add(href, filenames, response_code)
|
128
156
|
elsif response.timed_out?
|
129
157
|
handle_timeout(href, filenames, response_code)
|
158
|
+
elsif response_code == 0
|
159
|
+
handle_failure(href, filenames, response_code)
|
130
160
|
elsif method == :head
|
131
161
|
queue_request(:get, href, filenames)
|
132
162
|
else
|
133
163
|
return if @options[:only_4xx] && !response_code.between?(400, 499)
|
134
164
|
# Received a non-successful http response.
|
135
|
-
|
165
|
+
msg = "External link #{href} failed: #{response_code} #{response.return_message}"
|
166
|
+
add_external_issue(filenames, msg, response_code)
|
167
|
+
@cache.add(href, filenames, response_code, msg)
|
136
168
|
end
|
137
169
|
end
|
138
170
|
|
@@ -153,12 +185,23 @@ module HTML
|
|
153
185
|
|
154
186
|
return unless body_doc.xpath(xpath).empty?
|
155
187
|
|
156
|
-
|
188
|
+
msg = "External link #{href} failed: #{effective_url} exists, but the hash '#{hash}' does not"
|
189
|
+
add_external_issue(filenames, msg, response.code)
|
190
|
+
@cache.add(href, filenames, response.code, msg)
|
157
191
|
end
|
158
192
|
|
159
193
|
def handle_timeout(href, filenames, response_code)
|
194
|
+
msg = "External link #{href} failed: got a time out (response code #{response_code})"
|
195
|
+
@cache.add(href, filenames, 0, msg)
|
196
|
+
return if @options[:only_4xx]
|
197
|
+
add_external_issue(filenames, msg, response_code)
|
198
|
+
end
|
199
|
+
|
200
|
+
def handle_failure(href, filenames, response_code)
|
201
|
+
msg = "External link #{href} failed: response code #{response_code} means something's wrong"
|
202
|
+
@cache.add(href, filenames, 0, msg)
|
160
203
|
return if @options[:only_4xx]
|
161
|
-
add_external_issue
|
204
|
+
add_external_issue(filenames, msg, response_code)
|
162
205
|
end
|
163
206
|
|
164
207
|
def add_external_issue(filenames, desc, status = nil)
|
data/lib/html/proofer/utils.rb
CHANGED
@@ -3,6 +3,12 @@ require 'nokogiri'
|
|
3
3
|
module HTML
|
4
4
|
class Proofer
|
5
5
|
module Utils
|
6
|
+
STORAGE_DIR = File.join('tmp', '.htmlproofer')
|
7
|
+
|
8
|
+
def pluralize(count, single, plural)
|
9
|
+
"#{count} " << (count == 1 ? single : plural)
|
10
|
+
end
|
11
|
+
|
6
12
|
def create_nokogiri(path)
|
7
13
|
if File.exist? path
|
8
14
|
content = File.open(path).read
|
@@ -10,7 +16,7 @@ module HTML
|
|
10
16
|
content = path
|
11
17
|
end
|
12
18
|
|
13
|
-
Nokogiri::HTML(content)
|
19
|
+
Nokogiri::HTML(clean_content(content))
|
14
20
|
end
|
15
21
|
module_function :create_nokogiri
|
16
22
|
|
@@ -21,6 +27,21 @@ module HTML
|
|
21
27
|
href
|
22
28
|
end
|
23
29
|
module_function :swap
|
30
|
+
|
31
|
+
# address a problem with Nokogiri's parsing URL entities
|
32
|
+
# problem from http://git.io/vBYU1
|
33
|
+
# solution from http://git.io/vBYUi
|
34
|
+
def clean_content(string)
|
35
|
+
matches = string.scan(%r{https?://([^>]+)}i)
|
36
|
+
|
37
|
+
matches.flatten.each do |url|
|
38
|
+
escaped_url = url.gsub(/&(?!amp;)/, '&')
|
39
|
+
escaped_url = escaped_url.gsub(%r{/}, '/')
|
40
|
+
string.gsub!(url, escaped_url)
|
41
|
+
end
|
42
|
+
string
|
43
|
+
end
|
44
|
+
module_function :clean_content
|
24
45
|
end
|
25
46
|
end
|
26
47
|
end
|
data/lib/html/proofer/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html-proofer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Garen Torikian
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-12-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: mercenary
|
@@ -108,6 +108,20 @@ dependencies:
|
|
108
108
|
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '2.3'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: activesupport
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - "~>"
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '4.2'
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - "~>"
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '4.2'
|
111
125
|
- !ruby/object:Gem::Dependency
|
112
126
|
name: redcarpet
|
113
127
|
requirement: !ruby/object:Gem::Requirement
|
@@ -178,6 +192,20 @@ dependencies:
|
|
178
192
|
- - "~>"
|
179
193
|
- !ruby/object:Gem::Version
|
180
194
|
version: '2.9'
|
195
|
+
- !ruby/object:Gem::Dependency
|
196
|
+
name: timecop
|
197
|
+
requirement: !ruby/object:Gem::Requirement
|
198
|
+
requirements:
|
199
|
+
- - "~>"
|
200
|
+
- !ruby/object:Gem::Version
|
201
|
+
version: '0.8'
|
202
|
+
type: :development
|
203
|
+
prerelease: false
|
204
|
+
version_requirements: !ruby/object:Gem::Requirement
|
205
|
+
requirements:
|
206
|
+
- - "~>"
|
207
|
+
- !ruby/object:Gem::Version
|
208
|
+
version: '0.8'
|
181
209
|
description: Test your rendered HTML files to make sure they're accurate.
|
182
210
|
email:
|
183
211
|
- gjtorikian@gmail.com
|
@@ -197,6 +225,7 @@ files:
|
|
197
225
|
- lib/html/proofer/checks/images.rb
|
198
226
|
- lib/html/proofer/checks/links.rb
|
199
227
|
- lib/html/proofer/checks/scripts.rb
|
228
|
+
- lib/html/proofer/configuration.rb
|
200
229
|
- lib/html/proofer/log.rb
|
201
230
|
- lib/html/proofer/url_validator.rb
|
202
231
|
- lib/html/proofer/utils.rb
|
@@ -222,7 +251,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
222
251
|
version: '0'
|
223
252
|
requirements: []
|
224
253
|
rubyforge_project:
|
225
|
-
rubygems_version: 2.4.5
|
254
|
+
rubygems_version: 2.4.5.1
|
226
255
|
signing_key:
|
227
256
|
specification_version: 4
|
228
257
|
summary: A set of tests to validate your HTML output. These tests check if your image
|