html-proofer 3.15.0 → 3.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/htmlproofer +5 -1
- data/lib/html-proofer.rb +1 -1
- data/lib/html-proofer/cache.rb +10 -10
- data/lib/html-proofer/check.rb +15 -7
- data/lib/html-proofer/check/html.rb +7 -1
- data/lib/html-proofer/check/links.rb +47 -21
- data/lib/html-proofer/check/opengraph.rb +4 -4
- data/lib/html-proofer/configuration.rb +4 -2
- data/lib/html-proofer/element.rb +22 -17
- data/lib/html-proofer/log.rb +15 -11
- data/lib/html-proofer/middleware.rb +4 -2
- data/lib/html-proofer/runner.rb +78 -12
- data/lib/html-proofer/url_validator.rb +21 -25
- data/lib/html-proofer/utils.rb +2 -2
- data/lib/html-proofer/version.rb +1 -1
- metadata +8 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9b5a47189e872130e01a2080e6e8ddb1f7f22520098deaf941960194d2338b2b
|
4
|
+
data.tar.gz: 3543c42860956427e8c828861f76e1c5cd984d79054c091c6e5f97e0352b3137
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1defd1fb2ee651015231b0ed4d1407ed56e25a907e0602ee9e175b23f3b88938e020f438d2171ceea8d52bc58984901543c97184a3da13c9b08cdaf62200a4aa
|
7
|
+
data.tar.gz: 97eb6120db724822830398a85f14faa4415638e965d21aef0703991e7a14d335f32df65d72bb877be6f0706f24154deea3f08a6bbeb2cdf5d7c392d03ce2af23
|
data/bin/htmlproofer
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
# frozen_string_literal: true
|
3
3
|
|
4
|
-
|
4
|
+
$stdout.sync = true
|
5
5
|
|
6
6
|
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', 'lib')
|
7
7
|
|
@@ -41,6 +41,8 @@ Mercenary.program(:htmlproofer) do |p|
|
|
41
41
|
p.option 'report_missing_names', '--report-missing-names', 'When `check_html` is enabled, HTML markup that are missing entity names are reported as errors (default: `false`)'
|
42
42
|
p.option 'report_script_embeds', '--report-script-embeds', 'When `check_html` is enabled, `script` tags containing markup are reported as errors (default: `false`)'
|
43
43
|
p.option 'report_missing_doctype', '--report-missing-doctype', 'When `check_html` is enabled, HTML markup with missing or out-of-order `DOCTYPE` are reported as errors (default: `false`)'
|
44
|
+
p.option 'report_eof_tags', '--report-eof-tags', 'When `check_html` is enabled, HTML markup with tags that are malformed are reported as errors (default: `false`)'
|
45
|
+
p.option 'report_mismatched_tags', '--report-mismatched-tags', 'When `check_html` is enabled, HTML markup with mismatched tags are reported as errors (default: `false`)'
|
44
46
|
p.option 'log_level', '--log-level <level>', String, 'Sets the logging level, as determined by Yell. One of `:debug`, `:info`, `:warn`, `:error`, or `:fatal`. (default: `:info`)'
|
45
47
|
p.option 'only_4xx', '--only-4xx', 'Only reports errors for links that fall within the 4xx status code range'
|
46
48
|
p.option 'storage_dir', '--storage-dir PATH', String, 'Directory where to store the cache log (default: "tmp/.htmlproofer")'
|
@@ -82,6 +84,8 @@ Mercenary.program(:htmlproofer) do |p|
|
|
82
84
|
options[:validation][:report_missing_names] = opts['report_missing_names'] unless opts['report_missing_names'].nil?
|
83
85
|
options[:validation][:report_invalid_tags] = opts['report_invalid_tags'] unless opts['report_invalid_tags'].nil?
|
84
86
|
options[:validation][:report_missing_doctype] = opts['report_missing_doctype'] unless opts['report_missing_doctype'].nil?
|
87
|
+
options[:validation][:report_eof_tags] = opts['report_eof_tags'] unless opts['report_eof_tags'].nil?
|
88
|
+
options[:validation][:report_mismatched_tags] = opts['report_mismatched_tags'] unless opts['report_mismatched_tags'].nil?
|
85
89
|
|
86
90
|
options[:typhoeus] = HTMLProofer::Configuration.parse_json_option('typhoeus_config', opts['typhoeus_config']) unless opts['typhoeus_config'].nil?
|
87
91
|
|
data/lib/html-proofer.rb
CHANGED
data/lib/html-proofer/cache.rb
CHANGED
@@ -59,6 +59,8 @@ module HTMLProofer
|
|
59
59
|
end
|
60
60
|
|
61
61
|
def add(url, filenames, status, msg = '')
|
62
|
+
return unless use_cache?
|
63
|
+
|
62
64
|
data = {
|
63
65
|
time: @cache_time,
|
64
66
|
filenames: filenames,
|
@@ -92,12 +94,12 @@ module HTMLProofer
|
|
92
94
|
del = 0
|
93
95
|
@cache_log.delete_if do |url, _|
|
94
96
|
url = clean_url(url)
|
95
|
-
if
|
97
|
+
if found_urls.include?(url)
|
98
|
+
false
|
99
|
+
else
|
96
100
|
@logger.log :debug, "Removing #{url} from cache check"
|
97
101
|
del += 1
|
98
102
|
true
|
99
|
-
else
|
100
|
-
false
|
101
103
|
end
|
102
104
|
end
|
103
105
|
|
@@ -115,14 +117,12 @@ module HTMLProofer
|
|
115
117
|
@load.nil?
|
116
118
|
end
|
117
119
|
|
118
|
-
def retrieve_urls(
|
119
|
-
urls_to_check = detect_url_changes(
|
120
|
+
def retrieve_urls(urls)
|
121
|
+
urls_to_check = detect_url_changes(urls)
|
120
122
|
@cache_log.each_pair do |url, cache|
|
121
|
-
if within_timeframe?(cache['time'])
|
122
|
-
|
123
|
-
|
124
|
-
urls_to_check[url] = cache['filenames'] # recheck expired links
|
125
|
-
end
|
123
|
+
next if within_timeframe?(cache['time']) && cache['message'].empty? # these were successes to skip
|
124
|
+
|
125
|
+
urls_to_check[url] = cache['filenames'] # recheck expired links
|
126
126
|
end
|
127
127
|
urls_to_check
|
128
128
|
end
|
data/lib/html-proofer/check.rb
CHANGED
@@ -3,20 +3,23 @@
|
|
3
3
|
module HTMLProofer
|
4
4
|
# Mostly handles issue management and collecting of external URLs.
|
5
5
|
class Check
|
6
|
-
attr_reader :node, :html, :element, :src, :path, :options, :issues, :external_urls
|
6
|
+
attr_reader :node, :html, :element, :src, :path, :options, :issues, :internal_urls, :external_urls
|
7
7
|
|
8
|
-
def initialize(src, path, html, options)
|
8
|
+
def initialize(src, path, html, logger, cache, options)
|
9
9
|
@src = src
|
10
10
|
@path = path
|
11
11
|
@html = remove_ignored(html)
|
12
|
+
@logger = logger
|
13
|
+
@cache = cache
|
12
14
|
@options = options
|
13
15
|
@issues = []
|
16
|
+
@internal_urls = {}
|
14
17
|
@external_urls = {}
|
15
18
|
end
|
16
19
|
|
17
20
|
def create_element(node)
|
18
21
|
@node = node
|
19
|
-
Element.new(node, self)
|
22
|
+
Element.new(node, self, @logger)
|
20
23
|
end
|
21
24
|
|
22
25
|
def run
|
@@ -25,15 +28,20 @@ module HTMLProofer
|
|
25
28
|
|
26
29
|
def add_issue(desc, line: nil, status: -1, content: nil)
|
27
30
|
@issues << Issue.new(@path, desc, line: line, status: status, content: content)
|
31
|
+
false
|
32
|
+
end
|
33
|
+
|
34
|
+
def add_to_internal_urls(url, internal_url)
|
35
|
+
if @internal_urls[url]
|
36
|
+
@internal_urls[url] << internal_url
|
37
|
+
else
|
38
|
+
@internal_urls[url] = [internal_url]
|
39
|
+
end
|
28
40
|
end
|
29
41
|
|
30
42
|
def add_to_external_urls(url)
|
31
43
|
return if @external_urls[url]
|
32
44
|
|
33
|
-
add_path_for_url(url)
|
34
|
-
end
|
35
|
-
|
36
|
-
def add_path_for_url(url)
|
37
45
|
if @external_urls[url]
|
38
46
|
@external_urls[url] << @path
|
39
47
|
else
|
@@ -6,7 +6,9 @@ class HtmlCheck < ::HTMLProofer::Check
|
|
6
6
|
INVALID_TAG_MSG = /Tag ([\w\-:]+) invalid/.freeze
|
7
7
|
INVALID_PREFIX = /Namespace prefix/.freeze
|
8
8
|
PARSE_ENTITY_REF = /htmlParseEntityRef: no name/.freeze
|
9
|
-
DOCTYPE_MSG = /
|
9
|
+
DOCTYPE_MSG = /Expected a doctype token/.freeze
|
10
|
+
EOF_IN_TAG = /End of input in tag/.freeze
|
11
|
+
MISMATCHED_TAGS = /That tag isn't allowed here/.freeze
|
10
12
|
|
11
13
|
def run
|
12
14
|
@html.errors.each do |error|
|
@@ -24,6 +26,10 @@ class HtmlCheck < ::HTMLProofer::Check
|
|
24
26
|
options[:validation][:report_missing_names]
|
25
27
|
when DOCTYPE_MSG
|
26
28
|
options[:validation][:report_missing_doctype]
|
29
|
+
when EOF_IN_TAG
|
30
|
+
options[:validation][:report_eof_tags]
|
31
|
+
when MISMATCHED_TAGS
|
32
|
+
options[:validation][:report_mismatched_tags]
|
27
33
|
else
|
28
34
|
true
|
29
35
|
end
|
@@ -4,6 +4,8 @@ class LinkCheck < ::HTMLProofer::Check
|
|
4
4
|
include HTMLProofer::Utils
|
5
5
|
|
6
6
|
def missing_href?
|
7
|
+
return blank?(@link.src) if @node.name == 'source'
|
8
|
+
|
7
9
|
blank?(@link.href) && blank?(@link.name) && blank?(@link.id)
|
8
10
|
end
|
9
11
|
|
@@ -12,7 +14,7 @@ class LinkCheck < ::HTMLProofer::Check
|
|
12
14
|
end
|
13
15
|
|
14
16
|
def run
|
15
|
-
@html.css('a, link').each do |node|
|
17
|
+
@html.css('a, link, source').each do |node|
|
16
18
|
@link = create_element(node)
|
17
19
|
line = node.line
|
18
20
|
content = node.to_s
|
@@ -49,23 +51,31 @@ class LinkCheck < ::HTMLProofer::Check
|
|
49
51
|
# curl/Typheous inaccurately return 404s for some links. cc https://git.io/vyCFx
|
50
52
|
next if @link.respond_to?(:rel) && @link.rel == 'dns-prefetch'
|
51
53
|
|
52
|
-
add_to_external_urls(@link.href)
|
54
|
+
add_to_external_urls(@link.href || @link.src)
|
53
55
|
next
|
54
|
-
elsif @link.internal?
|
55
|
-
|
56
|
+
elsif @link.internal?
|
57
|
+
if @link.exists?
|
58
|
+
add_to_internal_urls(@link.href, InternalLink.new(@link, @path, line, content))
|
59
|
+
else
|
60
|
+
add_issue("internally linking to #{@link.href}, which does not exist", line: line, content: content)
|
61
|
+
end
|
56
62
|
end
|
63
|
+
end
|
57
64
|
|
58
|
-
|
59
|
-
|
60
|
-
add_issue("internally linking to a directory #{@link.absolute_path} without trailing slash", line: line, content: content)
|
61
|
-
next
|
62
|
-
end
|
65
|
+
external_urls
|
66
|
+
end
|
63
67
|
|
64
|
-
|
65
|
-
|
68
|
+
def check_internal_link(link, line, content)
|
69
|
+
# does the local directory have a trailing slash?
|
70
|
+
if link.unslashed_directory?(link.absolute_path)
|
71
|
+
add_issue("internally linking to a directory #{link.absolute_path} without trailing slash", line: line, content: content)
|
72
|
+
return false
|
66
73
|
end
|
67
74
|
|
68
|
-
|
75
|
+
# verify the target hash
|
76
|
+
return handle_hash(link, line, content) if link.hash
|
77
|
+
|
78
|
+
true
|
69
79
|
end
|
70
80
|
|
71
81
|
def check_schemes(link, line, content)
|
@@ -94,23 +104,27 @@ class LinkCheck < ::HTMLProofer::Check
|
|
94
104
|
end
|
95
105
|
|
96
106
|
def handle_hash(link, line, content)
|
97
|
-
if link.internal? && !
|
98
|
-
add_issue("linking to internal hash ##{link.hash} that does not exist", line: line, content: content)
|
107
|
+
if link.internal? && !hash_exists?(link.html, link.hash) # rubocop:disable Style/GuardClause
|
108
|
+
return add_issue("linking to internal hash ##{link.hash} that does not exist", line: line, content: content)
|
99
109
|
elsif link.external?
|
100
|
-
external_link_check(link, line, content)
|
110
|
+
return external_link_check(link, line, content)
|
101
111
|
end
|
112
|
+
|
113
|
+
true
|
102
114
|
end
|
103
115
|
|
104
116
|
def external_link_check(link, line, content)
|
105
|
-
if
|
106
|
-
|
117
|
+
if link.exists? # rubocop:disable Style/GuardClause
|
118
|
+
target_html = create_nokogiri(link.absolute_path)
|
119
|
+
return add_issue("linking to #{link.href}, but #{link.hash} does not exist", line: line, content: content) unless hash_exists?(target_html, link.hash)
|
107
120
|
else
|
108
|
-
|
109
|
-
add_issue("linking to #{link.href}, but #{link.hash} does not exist", line: line, content: content) unless hash_check target_html, link.hash
|
121
|
+
return add_issue("trying to find hash of #{link.href}, but #{link.absolute_path} does not exist", line: line, content: content)
|
110
122
|
end
|
123
|
+
|
124
|
+
true
|
111
125
|
end
|
112
126
|
|
113
|
-
def
|
127
|
+
def hash_exists?(html, href_hash)
|
114
128
|
decoded_href_hash = Addressable::URI.unescape(href_hash)
|
115
129
|
fragment_ids = [href_hash, decoded_href_hash]
|
116
130
|
# https://www.w3.org/TR/html5/single-page.html#scroll-to-fragid
|
@@ -148,7 +162,19 @@ class LinkCheck < ::HTMLProofer::Check
|
|
148
162
|
|
149
163
|
class XpathFunctions
|
150
164
|
def case_sensitive_equals(node_set, str_to_match)
|
151
|
-
node_set.find_all { |node| node.to_s
|
165
|
+
node_set.find_all { |node| node.to_s.== str_to_match.to_s }
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
class InternalLink
|
170
|
+
attr_reader :link, :href, :path, :line, :content
|
171
|
+
|
172
|
+
def initialize(link, path, line, content)
|
173
|
+
@link = link
|
174
|
+
@href = @link.href
|
175
|
+
@path = path
|
176
|
+
@line = line
|
177
|
+
@content = content
|
152
178
|
end
|
153
179
|
end
|
154
180
|
end
|
@@ -3,12 +3,12 @@
|
|
3
3
|
class OpenGraphElement < ::HTMLProofer::Element
|
4
4
|
attr_reader :src
|
5
5
|
|
6
|
-
def initialize(obj, check)
|
7
|
-
super(obj, check)
|
6
|
+
def initialize(obj, check, logger)
|
7
|
+
super(obj, check, logger)
|
8
8
|
# Fake up src from the content attribute
|
9
9
|
instance_variable_set('@src', @content)
|
10
10
|
|
11
|
-
@src.insert 0, 'http:' if
|
11
|
+
@src.insert 0, 'http:' if %r{^//}.match?(@src)
|
12
12
|
end
|
13
13
|
end
|
14
14
|
|
@@ -23,7 +23,7 @@ class OpenGraphCheck < ::HTMLProofer::Check
|
|
23
23
|
|
24
24
|
def run
|
25
25
|
@html.css('meta[property="og:url"], meta[property="og:image"]').each do |m|
|
26
|
-
@opengraph = OpenGraphElement.new(m, self)
|
26
|
+
@opengraph = OpenGraphElement.new(m, self, @logger)
|
27
27
|
|
28
28
|
next if @opengraph.ignore?
|
29
29
|
|
@@ -52,7 +52,9 @@ module HTMLProofer
|
|
52
52
|
report_script_embeds: false,
|
53
53
|
report_missing_names: false,
|
54
54
|
report_invalid_tags: false,
|
55
|
-
report_missing_doctype: false
|
55
|
+
report_missing_doctype: false,
|
56
|
+
report_eof_tags: false,
|
57
|
+
report_mismatched_tags: false
|
56
58
|
}.freeze
|
57
59
|
|
58
60
|
CACHE_DEFAULTS = {}.freeze
|
@@ -78,7 +80,7 @@ module HTMLProofer
|
|
78
80
|
begin
|
79
81
|
JSON.parse(config)
|
80
82
|
rescue StandardError
|
81
|
-
raise ArgumentError, "Option '
|
83
|
+
raise ArgumentError, "Option '#{option_name} did not contain valid JSON."
|
82
84
|
end
|
83
85
|
end
|
84
86
|
end
|
data/lib/html-proofer/element.rb
CHANGED
@@ -10,12 +10,18 @@ module HTMLProofer
|
|
10
10
|
|
11
11
|
attr_reader :id, :name, :alt, :href, :link, :src, :line, :data_proofer_ignore
|
12
12
|
|
13
|
-
def initialize(obj, check)
|
13
|
+
def initialize(obj, check, logger)
|
14
|
+
@logger = logger
|
14
15
|
# Construct readable ivars for every element
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
16
|
+
begin
|
17
|
+
obj.attributes.each_pair do |attribute, value|
|
18
|
+
name = attribute.tr('-:.;@', '_').to_s.to_sym
|
19
|
+
(class << self; self; end).send(:attr_reader, name)
|
20
|
+
instance_variable_set("@#{name}", value.value)
|
21
|
+
end
|
22
|
+
rescue NameError => e
|
23
|
+
@logger.log :error, "Attribute set `#{obj}` contains an error!"
|
24
|
+
raise e
|
19
25
|
end
|
20
26
|
|
21
27
|
@aria_hidden = defined?(@aria_hidden) && @aria_hidden == 'true' ? true : false
|
@@ -36,19 +42,19 @@ module HTMLProofer
|
|
36
42
|
|
37
43
|
# fix up missing protocols
|
38
44
|
if defined?(@href)
|
39
|
-
@href.insert(0, 'http:') if
|
45
|
+
@href.insert(0, 'http:') if %r{^//}.match?(@href)
|
40
46
|
else
|
41
47
|
@href = nil
|
42
48
|
end
|
43
49
|
|
44
50
|
if defined?(@src)
|
45
|
-
@src.insert(0, 'http:') if
|
51
|
+
@src.insert(0, 'http:') if %r{^//}.match?(@src)
|
46
52
|
else
|
47
53
|
@src = nil
|
48
54
|
end
|
49
55
|
|
50
56
|
if defined?(@srcset)
|
51
|
-
@srcset.insert(0, 'http:') if
|
57
|
+
@srcset.insert(0, 'http:') if %r{^//}.match?(@srcset)
|
52
58
|
else
|
53
59
|
@srcset = nil
|
54
60
|
end
|
@@ -99,12 +105,10 @@ module HTMLProofer
|
|
99
105
|
return true if @data_proofer_ignore
|
100
106
|
return true if @parent_ignorable
|
101
107
|
|
102
|
-
return true if
|
108
|
+
return true if /^javascript:/.match?(url)
|
103
109
|
|
104
110
|
# ignore base64 encoded images
|
105
|
-
if %w[ImageCheck FaviconCheck].include?
|
106
|
-
return true if url =~ /^data:image/
|
107
|
-
end
|
111
|
+
return true if %w[ImageCheck FaviconCheck].include?(@type) && /^data:image/.match?(url)
|
108
112
|
|
109
113
|
# ignore user defined URLs
|
110
114
|
return true if ignores_pattern_check(@check.options[:url_ignore])
|
@@ -172,7 +176,7 @@ module HTMLProofer
|
|
172
176
|
|
173
177
|
path_dot_ext = path + @check.options[:extension] if @check.options[:assume_extension]
|
174
178
|
|
175
|
-
if
|
179
|
+
if %r{^/}.match?(path) # path relative to root
|
176
180
|
if File.directory?(@check.src)
|
177
181
|
base = @check.src
|
178
182
|
else
|
@@ -181,7 +185,7 @@ module HTMLProofer
|
|
181
185
|
end
|
182
186
|
elsif File.exist?(File.expand_path(path, @check.src)) || File.exist?(File.expand_path(path_dot_ext, @check.src)) # relative links, path is a file
|
183
187
|
base = File.dirname @check.path
|
184
|
-
elsif File.exist?(File.join(File.dirname(@check.path), path)) || File.exist?(File.join(File.dirname(@check.path), path_dot_ext)) # relative links in nested dir, path is a file
|
188
|
+
elsif File.exist?(File.join(File.dirname(@check.path), path)) || File.exist?(File.join(File.dirname(@check.path), path_dot_ext)) # rubocop:disable Lint/DuplicateBranch; relative links in nested dir, path is a file
|
185
189
|
base = File.dirname @check.path
|
186
190
|
else # relative link, path is a directory
|
187
191
|
base = @check.path
|
@@ -211,10 +215,11 @@ module HTMLProofer
|
|
211
215
|
|
212
216
|
def ignores_pattern_check(links)
|
213
217
|
links.each do |ignore|
|
214
|
-
|
218
|
+
case ignore
|
219
|
+
when String
|
215
220
|
return true if ignore == url
|
216
|
-
|
217
|
-
return true if ignore
|
221
|
+
when Regexp
|
222
|
+
return true if ignore&.match?(url)
|
218
223
|
end
|
219
224
|
end
|
220
225
|
|
data/lib/html-proofer/log.rb
CHANGED
@@ -7,16 +7,27 @@ module HTMLProofer
|
|
7
7
|
class Log
|
8
8
|
include Yell::Loggable
|
9
9
|
|
10
|
+
STDOUT_LEVELS = %i[debug info warn].freeze
|
11
|
+
STDERR_LEVELS = %i[error fatal].freeze
|
12
|
+
|
10
13
|
def initialize(log_level)
|
11
14
|
@logger = Yell.new(format: false, \
|
12
15
|
name: 'HTMLProofer', \
|
13
16
|
level: "gte.#{log_level}") do |l|
|
14
|
-
l.adapter :stdout, level:
|
15
|
-
l.adapter :stderr, level:
|
17
|
+
l.adapter :stdout, level: 'lte.warn'
|
18
|
+
l.adapter :stderr, level: 'gte.error'
|
16
19
|
end
|
17
20
|
end
|
18
21
|
|
19
22
|
def log(level, message)
|
23
|
+
log_with_color(level, message)
|
24
|
+
end
|
25
|
+
|
26
|
+
def log_with_color(level, message)
|
27
|
+
@logger.send level, colorize(level, message)
|
28
|
+
end
|
29
|
+
|
30
|
+
def colorize(level, message)
|
20
31
|
color = case level
|
21
32
|
when :debug
|
22
33
|
:cyan
|
@@ -28,15 +39,8 @@ module HTMLProofer
|
|
28
39
|
:red
|
29
40
|
end
|
30
41
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
def log_with_color(level, color, message)
|
35
|
-
@logger.send level, colorize(color, message)
|
36
|
-
end
|
37
|
-
|
38
|
-
def colorize(color, message)
|
39
|
-
if $stdout.isatty && $stderr.isatty
|
42
|
+
if (STDOUT_LEVELS.include?(level) && $stdout.isatty) || \
|
43
|
+
(STDERR_LEVELS.include?(level) && $stderr.isatty)
|
40
44
|
Rainbow(message).send(color)
|
41
45
|
else
|
42
46
|
message
|
@@ -6,6 +6,7 @@ module HTMLProofer
|
|
6
6
|
|
7
7
|
class InvalidHtmlError < StandardError
|
8
8
|
def initialize(failures)
|
9
|
+
super
|
9
10
|
@failures = failures
|
10
11
|
end
|
11
12
|
|
@@ -21,7 +22,8 @@ module HTMLProofer
|
|
21
22
|
allow_hash_href: true,
|
22
23
|
check_external_hash: true,
|
23
24
|
check_html: true,
|
24
|
-
url_ignore: [/.*/] # Don't try to check local files exist
|
25
|
+
url_ignore: [/.*/], # Don't try to check if local files exist
|
26
|
+
validation: { report_eof_tags: true }
|
25
27
|
}
|
26
28
|
end
|
27
29
|
|
@@ -52,7 +54,7 @@ module HTMLProofer
|
|
52
54
|
def call(env)
|
53
55
|
result = @app.call(env)
|
54
56
|
return result if env['REQUEST_METHOD'] != 'GET'
|
55
|
-
return result if env['QUERY_STRING']
|
57
|
+
return result if /proofer-ignore/.match?(env['QUERY_STRING'])
|
56
58
|
return result if result.first != 200
|
57
59
|
|
58
60
|
body = []
|
data/lib/html-proofer/runner.rb
CHANGED
@@ -4,7 +4,7 @@ module HTMLProofer
|
|
4
4
|
class Runner
|
5
5
|
include HTMLProofer::Utils
|
6
6
|
|
7
|
-
attr_reader :options, :external_urls, :failures
|
7
|
+
attr_reader :options, :internal_urls, :external_urls, :failures
|
8
8
|
|
9
9
|
def initialize(src, opts = {})
|
10
10
|
@src = src
|
@@ -20,6 +20,8 @@ module HTMLProofer
|
|
20
20
|
|
21
21
|
@type = @options.delete(:type)
|
22
22
|
@logger = HTMLProofer::Log.new(@options[:log_level])
|
23
|
+
@cache = Cache.new(@logger, @options[:cache])
|
24
|
+
@internal_link_checks = nil
|
23
25
|
|
24
26
|
# Add swap patterns for internal domains
|
25
27
|
unless @options[:internal_domains].empty?
|
@@ -30,7 +32,11 @@ module HTMLProofer
|
|
30
32
|
end
|
31
33
|
end
|
32
34
|
|
35
|
+
@internal_urls = {}
|
36
|
+
@internal_urls_to_paths = {}
|
37
|
+
@external_urls = {}
|
33
38
|
@failures = []
|
39
|
+
@before_request = []
|
34
40
|
end
|
35
41
|
|
36
42
|
def run
|
@@ -45,7 +51,7 @@ module HTMLProofer
|
|
45
51
|
end
|
46
52
|
|
47
53
|
if @failures.empty?
|
48
|
-
@logger.
|
54
|
+
@logger.log :info, 'HTML-Proofer finished successfully.'
|
49
55
|
else
|
50
56
|
print_failed_tests
|
51
57
|
end
|
@@ -58,15 +64,13 @@ module HTMLProofer
|
|
58
64
|
end
|
59
65
|
end
|
60
66
|
@external_urls = Hash[*@src.map { |s| [s, nil] }.flatten]
|
61
|
-
|
67
|
+
validate_external_urls
|
62
68
|
end
|
63
69
|
|
64
70
|
# Collects any external URLs found in a directory of files. Also collectes
|
65
71
|
# every failed test from process_files.
|
66
72
|
# Sends the external URLs to Typhoeus for batch processing.
|
67
73
|
def check_files
|
68
|
-
@external_urls = {}
|
69
|
-
|
70
74
|
process_files.each do |item|
|
71
75
|
@external_urls.merge!(item[:external_urls])
|
72
76
|
@failures.concat(item[:failures])
|
@@ -77,9 +81,12 @@ module HTMLProofer
|
|
77
81
|
# just not run those other checks at all.
|
78
82
|
if @options[:external_only]
|
79
83
|
@failures = []
|
80
|
-
|
84
|
+
validate_external_urls
|
81
85
|
elsif !@options[:disable_external]
|
82
|
-
|
86
|
+
validate_external_urls
|
87
|
+
validate_internal_urls
|
88
|
+
else
|
89
|
+
validate_internal_urls
|
83
90
|
end
|
84
91
|
end
|
85
92
|
|
@@ -100,8 +107,21 @@ module HTMLProofer
|
|
100
107
|
@src.each do |src|
|
101
108
|
checks.each do |klass|
|
102
109
|
@logger.log :debug, "Checking #{klass.to_s.downcase} on #{path} ..."
|
103
|
-
check = Object.const_get(klass).new(src, path, html, @options)
|
110
|
+
check = Object.const_get(klass).new(src, path, html, @logger, @cache, @options)
|
104
111
|
check.run
|
112
|
+
|
113
|
+
if klass == 'LinkCheck'
|
114
|
+
@internal_link_checks = check
|
115
|
+
check.internal_urls.each_pair do |url, internal_urls|
|
116
|
+
if @internal_urls_to_paths[url]
|
117
|
+
@internal_urls_to_paths[url].concat(internal_urls.map(&:path))
|
118
|
+
else
|
119
|
+
@internal_urls_to_paths[url] = internal_urls.map(&:path)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
@internal_urls.merge!(check.internal_urls)
|
123
|
+
end
|
124
|
+
|
105
125
|
external_urls = check.external_urls
|
106
126
|
external_urls = Hash[check.external_urls.map { |url, file| [swap(url, @options[:url_swap]), file] }] if @options[:url_swap]
|
107
127
|
result[:external_urls].merge!(external_urls)
|
@@ -112,15 +132,35 @@ module HTMLProofer
|
|
112
132
|
end
|
113
133
|
|
114
134
|
def check_path(path)
|
115
|
-
check_parsed
|
135
|
+
check_parsed(create_nokogiri(path), path)
|
116
136
|
end
|
117
137
|
|
118
|
-
def
|
119
|
-
url_validator = HTMLProofer::UrlValidator.new(@logger, @external_urls, @options)
|
138
|
+
def validate_external_urls
|
139
|
+
url_validator = HTMLProofer::UrlValidator.new(@logger, @cache, @external_urls, @options)
|
140
|
+
url_validator.before_request = @before_request
|
120
141
|
@failures.concat(url_validator.run)
|
121
142
|
@external_urls = url_validator.external_urls
|
122
143
|
end
|
123
144
|
|
145
|
+
def validate_internal_urls
|
146
|
+
if @cache.use_cache?
|
147
|
+
urls_to_check = load_internal_cache
|
148
|
+
|
149
|
+
urls_to_check.each_pair do |url, internal_urls|
|
150
|
+
result = @internal_link_checks.check_internal_link(internal_urls.first.link, internal_urls.first.line, internal_urls.first.content)
|
151
|
+
code = result ? 200 : 404
|
152
|
+
@cache.add(url, @internal_urls_to_paths[url].sort, code, '') # TODO: blank msg for now
|
153
|
+
end
|
154
|
+
@cache.write
|
155
|
+
else
|
156
|
+
@internal_urls.values.flatten.each do |internal_url|
|
157
|
+
@internal_link_checks.check_internal_link(internal_url.link, internal_url.line, internal_url.content)
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
@failures.concat(@internal_link_checks.issues) unless @internal_urls.length.zero?
|
162
|
+
end
|
163
|
+
|
124
164
|
def files
|
125
165
|
@files ||= if @type == :directory
|
126
166
|
@src.map do |src|
|
@@ -147,6 +187,8 @@ module HTMLProofer
|
|
147
187
|
def checks
|
148
188
|
return @checks if defined?(@checks) && !@checks.nil?
|
149
189
|
|
190
|
+
return (@checks = ['LinkCheck']) if @type == :links
|
191
|
+
|
150
192
|
@checks = HTMLProofer::Check.subchecks.map(&:name)
|
151
193
|
@checks.delete('FaviconCheck') unless @options[:check_favicon]
|
152
194
|
@checks.delete('HtmlCheck') unless @options[:check_html]
|
@@ -169,7 +211,31 @@ module HTMLProofer
|
|
169
211
|
sorted_failures.sort_and_report
|
170
212
|
count = @failures.length
|
171
213
|
failure_text = pluralize(count, 'failure', 'failures')
|
172
|
-
raise @logger.colorize :
|
214
|
+
raise @logger.colorize :fatal, "HTML-Proofer found #{failure_text}!"
|
215
|
+
end
|
216
|
+
|
217
|
+
# Set before_request callback.
|
218
|
+
#
|
219
|
+
# @example Set before_request.
|
220
|
+
# request.before_request { |request| p "yay" }
|
221
|
+
#
|
222
|
+
# @param [ Block ] block The block to execute.
|
223
|
+
#
|
224
|
+
# @yield [ Typhoeus::Request ]
|
225
|
+
#
|
226
|
+
# @return [ Array<Block> ] All before_request blocks.
|
227
|
+
def before_request(&block)
|
228
|
+
@before_request ||= []
|
229
|
+
@before_request << block if block_given?
|
230
|
+
@before_request
|
231
|
+
end
|
232
|
+
|
233
|
+
def load_internal_cache
|
234
|
+
urls_to_check = @cache.retrieve_urls(@internal_urls)
|
235
|
+
cache_text = pluralize(urls_to_check.count, 'internal link', 'internal links')
|
236
|
+
@logger.log :info, "Found #{cache_text} in the cache..."
|
237
|
+
|
238
|
+
urls_to_check
|
173
239
|
end
|
174
240
|
end
|
175
241
|
end
|
@@ -10,21 +10,23 @@ module HTMLProofer
|
|
10
10
|
include HTMLProofer::Utils
|
11
11
|
|
12
12
|
attr_reader :external_urls
|
13
|
+
attr_writer :before_request
|
13
14
|
|
14
|
-
def initialize(logger, external_urls, options)
|
15
|
+
def initialize(logger, cache, external_urls, options)
|
15
16
|
@logger = logger
|
16
17
|
@external_urls = external_urls
|
17
18
|
@failed_tests = []
|
18
19
|
@options = options
|
19
20
|
@hydra = Typhoeus::Hydra.new(@options[:hydra])
|
20
|
-
@cache =
|
21
|
+
@cache = cache
|
22
|
+
@before_request = []
|
21
23
|
end
|
22
24
|
|
23
25
|
def run
|
24
26
|
@external_urls = remove_query_values
|
25
27
|
|
26
28
|
if @cache.use_cache?
|
27
|
-
urls_to_check =
|
29
|
+
urls_to_check = @cache.retrieve_urls(@external_urls)
|
28
30
|
external_link_checker(urls_to_check)
|
29
31
|
@cache.write
|
30
32
|
else
|
@@ -41,11 +43,11 @@ module HTMLProofer
|
|
41
43
|
iterable_external_urls = @external_urls.dup
|
42
44
|
@external_urls.each_key do |url|
|
43
45
|
uri = begin
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
46
|
+
Addressable::URI.parse(url)
|
47
|
+
rescue URI::Error, Addressable::URI::InvalidURIError
|
48
|
+
@logger.log :error, "#{url} is an invalid URL"
|
49
|
+
nil
|
50
|
+
end
|
49
51
|
next if uri.nil? || uri.query.nil?
|
50
52
|
|
51
53
|
iterable_external_urls.delete(url) unless new_url_query_values?(uri, paths_with_queries)
|
@@ -72,15 +74,6 @@ module HTMLProofer
|
|
72
74
|
uri.host + uri.path
|
73
75
|
end
|
74
76
|
|
75
|
-
def load_cache
|
76
|
-
cache_count = @cache.size
|
77
|
-
cache_text = pluralize(cache_count, 'link', 'links')
|
78
|
-
|
79
|
-
@logger.log :info, "Found #{cache_text} in the cache..."
|
80
|
-
|
81
|
-
@cache.retrieve_urls(@external_urls)
|
82
|
-
end
|
83
|
-
|
84
77
|
# Proofer runs faster if we pull out all the external URLs and run the checks
|
85
78
|
# at the end. Otherwise, we're halting the consuming process for every file during
|
86
79
|
# `process_files`.
|
@@ -109,11 +102,11 @@ module HTMLProofer
|
|
109
102
|
def establish_queue(external_urls)
|
110
103
|
external_urls.each_pair do |url, filenames|
|
111
104
|
url = begin
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
105
|
+
clean_url(url)
|
106
|
+
rescue URI::Error, Addressable::URI::InvalidURIError
|
107
|
+
add_external_issue(filenames, "#{url} is an invalid URL")
|
108
|
+
next
|
109
|
+
end
|
117
110
|
|
118
111
|
method = if hash?(url) && @options[:check_external_hash]
|
119
112
|
:get
|
@@ -127,16 +120,19 @@ module HTMLProofer
|
|
127
120
|
def clean_url(href)
|
128
121
|
# catch any obvious issues, like strings in port numbers
|
129
122
|
parsed = Addressable::URI.parse(href)
|
130
|
-
if href
|
131
|
-
parsed.normalize
|
132
|
-
else
|
123
|
+
if href =~ /^([!#{$&}-;=?-\[\]_a-z~]|%[0-9a-fA-F]{2})+$/
|
133
124
|
href
|
125
|
+
else
|
126
|
+
parsed.normalize
|
134
127
|
end
|
135
128
|
end
|
136
129
|
|
137
130
|
def queue_request(method, href, filenames)
|
138
131
|
opts = @options[:typhoeus].merge(method: method)
|
139
132
|
request = Typhoeus::Request.new(href, opts)
|
133
|
+
@before_request.each do |callback|
|
134
|
+
callback.call(request)
|
135
|
+
end
|
140
136
|
request.on_complete { |response| response_handler(response, filenames) }
|
141
137
|
@hydra.queue request
|
142
138
|
end
|
data/lib/html-proofer/utils.rb
CHANGED
@@ -5,7 +5,7 @@ require 'nokogumbo'
|
|
5
5
|
module HTMLProofer
|
6
6
|
module Utils
|
7
7
|
def pluralize(count, single, plural)
|
8
|
-
"#{count} #{
|
8
|
+
"#{count} #{count == 1 ? single : plural}"
|
9
9
|
end
|
10
10
|
|
11
11
|
def create_nokogiri(path)
|
@@ -15,7 +15,7 @@ module HTMLProofer
|
|
15
15
|
path
|
16
16
|
end
|
17
17
|
|
18
|
-
Nokogiri::HTML5(content)
|
18
|
+
Nokogiri::HTML5(content, max_errors: -1)
|
19
19
|
end
|
20
20
|
|
21
21
|
def swap(href, replacement)
|
data/lib/html-proofer/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html-proofer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.17.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Garen Torikian
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-11-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|
@@ -293,23 +293,23 @@ homepage: https://github.com/gjtorikian/html-proofer
|
|
293
293
|
licenses:
|
294
294
|
- MIT
|
295
295
|
metadata: {}
|
296
|
-
post_install_message:
|
296
|
+
post_install_message:
|
297
297
|
rdoc_options: []
|
298
298
|
require_paths:
|
299
299
|
- lib
|
300
300
|
required_ruby_version: !ruby/object:Gem::Requirement
|
301
301
|
requirements:
|
302
|
-
- - "
|
302
|
+
- - "~>"
|
303
303
|
- !ruby/object:Gem::Version
|
304
|
-
version: '
|
304
|
+
version: '2.4'
|
305
305
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
306
306
|
requirements:
|
307
307
|
- - ">="
|
308
308
|
- !ruby/object:Gem::Version
|
309
309
|
version: '0'
|
310
310
|
requirements: []
|
311
|
-
rubygems_version: 3.
|
312
|
-
signing_key:
|
311
|
+
rubygems_version: 3.1.2
|
312
|
+
signing_key:
|
313
313
|
specification_version: 4
|
314
314
|
summary: A set of tests to validate your HTML output. These tests check if your image
|
315
315
|
references are legitimate, if they have alt tags, if your internal links are working,
|