html-proofer 3.15.1 → 3.17.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/htmlproofer +6 -2
- data/lib/html-proofer/cache.rb +10 -10
- data/lib/html-proofer/check.rb +15 -7
- data/lib/html-proofer/check/html.rb +7 -1
- data/lib/html-proofer/check/links.rb +47 -21
- data/lib/html-proofer/check/opengraph.rb +4 -4
- data/lib/html-proofer/configuration.rb +4 -2
- data/lib/html-proofer/element.rb +38 -34
- data/lib/html-proofer/middleware.rb +4 -2
- data/lib/html-proofer/runner.rb +76 -10
- data/lib/html-proofer/url_validator.rb +21 -25
- data/lib/html-proofer/utils.rb +2 -2
- data/lib/html-proofer/version.rb +1 -1
- metadata +8 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f8be1f60d1495959b468c2cfad3a4d659a5817b934bb2011906540d296e2a062
|
4
|
+
data.tar.gz: 39ecb6a6899913c4745289443ff77cf1483d3578689fc616099bb00df90a4f16
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ce1749adb1022b2a3245396c28b990d4de6bfe368fb5944cddf81b48822f54dff8c744b847e65bcd4cb040f2e7f63a1cbdcccebda943380ab767ecbd96161c8a
|
7
|
+
data.tar.gz: 0ffc3e5095dbf40272113991b521a0909408a40715efb2119bfd8f4a310b62b6884fe2566beb198e873f781d6d0ebefed06a966a91a4772309c53835c66ce483
|
data/bin/htmlproofer
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
# frozen_string_literal: true
|
3
3
|
|
4
|
-
|
4
|
+
$stdout.sync = true
|
5
5
|
|
6
6
|
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', 'lib')
|
7
7
|
|
@@ -41,6 +41,8 @@ Mercenary.program(:htmlproofer) do |p|
|
|
41
41
|
p.option 'report_missing_names', '--report-missing-names', 'When `check_html` is enabled, HTML markup that are missing entity names are reported as errors (default: `false`)'
|
42
42
|
p.option 'report_script_embeds', '--report-script-embeds', 'When `check_html` is enabled, `script` tags containing markup are reported as errors (default: `false`)'
|
43
43
|
p.option 'report_missing_doctype', '--report-missing-doctype', 'When `check_html` is enabled, HTML markup with missing or out-of-order `DOCTYPE` are reported as errors (default: `false`)'
|
44
|
+
p.option 'report_eof_tags', '--report-eof-tags', 'When `check_html` is enabled, HTML markup with tags that are malformed are reported as errors (default: `false`)'
|
45
|
+
p.option 'report_mismatched_tags', '--report-mismatched-tags', 'When `check_html` is enabled, HTML markup with mismatched tags are reported as errors (default: `false`)'
|
44
46
|
p.option 'log_level', '--log-level <level>', String, 'Sets the logging level, as determined by Yell. One of `:debug`, `:info`, `:warn`, `:error`, or `:fatal`. (default: `:info`)'
|
45
47
|
p.option 'only_4xx', '--only-4xx', 'Only reports errors for links that fall within the 4xx status code range'
|
46
48
|
p.option 'storage_dir', '--storage-dir PATH', String, 'Directory where to store the cache log (default: "tmp/.htmlproofer")'
|
@@ -48,7 +50,7 @@ Mercenary.program(:htmlproofer) do |p|
|
|
48
50
|
p.option 'typhoeus_config', '--typhoeus-config CONFIG', String, 'JSON-formatted string of Typhoeus config. Will override the html-proofer defaults.'
|
49
51
|
p.option 'url_ignore', '--url-ignore link1,[link2,...]', Array, 'A comma-separated list of Strings or RegExps containing URLs that are safe to ignore. It affects all HTML attributes. Note that non-HTTP(S) URIs are always ignored'
|
50
52
|
p.option 'url_swap', '--url-swap re:string,[re:string,...]', Array, 'A comma-separated list containing key-value pairs of `RegExp => String`. It transforms URLs that match `RegExp` into `String` via `gsub`. The escape sequences `\\:` should be used to produce literal `:`s.'
|
51
|
-
p.option 'root_dir', '--root-
|
53
|
+
p.option 'root_dir', '--root-dir PATH', String, 'The absolute path to the directory serving your html-files.'
|
52
54
|
|
53
55
|
p.action do |args, opts|
|
54
56
|
args = ['.'] if args.empty?
|
@@ -82,6 +84,8 @@ Mercenary.program(:htmlproofer) do |p|
|
|
82
84
|
options[:validation][:report_missing_names] = opts['report_missing_names'] unless opts['report_missing_names'].nil?
|
83
85
|
options[:validation][:report_invalid_tags] = opts['report_invalid_tags'] unless opts['report_invalid_tags'].nil?
|
84
86
|
options[:validation][:report_missing_doctype] = opts['report_missing_doctype'] unless opts['report_missing_doctype'].nil?
|
87
|
+
options[:validation][:report_eof_tags] = opts['report_eof_tags'] unless opts['report_eof_tags'].nil?
|
88
|
+
options[:validation][:report_mismatched_tags] = opts['report_mismatched_tags'] unless opts['report_mismatched_tags'].nil?
|
85
89
|
|
86
90
|
options[:typhoeus] = HTMLProofer::Configuration.parse_json_option('typhoeus_config', opts['typhoeus_config']) unless opts['typhoeus_config'].nil?
|
87
91
|
|
data/lib/html-proofer/cache.rb
CHANGED
@@ -59,6 +59,8 @@ module HTMLProofer
|
|
59
59
|
end
|
60
60
|
|
61
61
|
def add(url, filenames, status, msg = '')
|
62
|
+
return unless use_cache?
|
63
|
+
|
62
64
|
data = {
|
63
65
|
time: @cache_time,
|
64
66
|
filenames: filenames,
|
@@ -92,12 +94,12 @@ module HTMLProofer
|
|
92
94
|
del = 0
|
93
95
|
@cache_log.delete_if do |url, _|
|
94
96
|
url = clean_url(url)
|
95
|
-
if
|
97
|
+
if found_urls.include?(url)
|
98
|
+
false
|
99
|
+
else
|
96
100
|
@logger.log :debug, "Removing #{url} from cache check"
|
97
101
|
del += 1
|
98
102
|
true
|
99
|
-
else
|
100
|
-
false
|
101
103
|
end
|
102
104
|
end
|
103
105
|
|
@@ -115,14 +117,12 @@ module HTMLProofer
|
|
115
117
|
@load.nil?
|
116
118
|
end
|
117
119
|
|
118
|
-
def retrieve_urls(
|
119
|
-
urls_to_check = detect_url_changes(
|
120
|
+
def retrieve_urls(urls)
|
121
|
+
urls_to_check = detect_url_changes(urls)
|
120
122
|
@cache_log.each_pair do |url, cache|
|
121
|
-
if within_timeframe?(cache['time'])
|
122
|
-
|
123
|
-
|
124
|
-
urls_to_check[url] = cache['filenames'] # recheck expired links
|
125
|
-
end
|
123
|
+
next if within_timeframe?(cache['time']) && cache['message'].empty? # these were successes to skip
|
124
|
+
|
125
|
+
urls_to_check[url] = cache['filenames'] # recheck expired links
|
126
126
|
end
|
127
127
|
urls_to_check
|
128
128
|
end
|
data/lib/html-proofer/check.rb
CHANGED
@@ -3,20 +3,23 @@
|
|
3
3
|
module HTMLProofer
|
4
4
|
# Mostly handles issue management and collecting of external URLs.
|
5
5
|
class Check
|
6
|
-
attr_reader :node, :html, :element, :src, :path, :options, :issues, :external_urls
|
6
|
+
attr_reader :node, :html, :element, :src, :path, :options, :issues, :internal_urls, :external_urls
|
7
7
|
|
8
|
-
def initialize(src, path, html, options)
|
8
|
+
def initialize(src, path, html, logger, cache, options)
|
9
9
|
@src = src
|
10
10
|
@path = path
|
11
11
|
@html = remove_ignored(html)
|
12
|
+
@logger = logger
|
13
|
+
@cache = cache
|
12
14
|
@options = options
|
13
15
|
@issues = []
|
16
|
+
@internal_urls = {}
|
14
17
|
@external_urls = {}
|
15
18
|
end
|
16
19
|
|
17
20
|
def create_element(node)
|
18
21
|
@node = node
|
19
|
-
Element.new(node, self)
|
22
|
+
Element.new(node, self, @logger)
|
20
23
|
end
|
21
24
|
|
22
25
|
def run
|
@@ -25,15 +28,20 @@ module HTMLProofer
|
|
25
28
|
|
26
29
|
def add_issue(desc, line: nil, status: -1, content: nil)
|
27
30
|
@issues << Issue.new(@path, desc, line: line, status: status, content: content)
|
31
|
+
false
|
32
|
+
end
|
33
|
+
|
34
|
+
def add_to_internal_urls(url, internal_url)
|
35
|
+
if @internal_urls[url]
|
36
|
+
@internal_urls[url] << internal_url
|
37
|
+
else
|
38
|
+
@internal_urls[url] = [internal_url]
|
39
|
+
end
|
28
40
|
end
|
29
41
|
|
30
42
|
def add_to_external_urls(url)
|
31
43
|
return if @external_urls[url]
|
32
44
|
|
33
|
-
add_path_for_url(url)
|
34
|
-
end
|
35
|
-
|
36
|
-
def add_path_for_url(url)
|
37
45
|
if @external_urls[url]
|
38
46
|
@external_urls[url] << @path
|
39
47
|
else
|
@@ -6,7 +6,9 @@ class HtmlCheck < ::HTMLProofer::Check
|
|
6
6
|
INVALID_TAG_MSG = /Tag ([\w\-:]+) invalid/.freeze
|
7
7
|
INVALID_PREFIX = /Namespace prefix/.freeze
|
8
8
|
PARSE_ENTITY_REF = /htmlParseEntityRef: no name/.freeze
|
9
|
-
DOCTYPE_MSG = /
|
9
|
+
DOCTYPE_MSG = /Expected a doctype token/.freeze
|
10
|
+
EOF_IN_TAG = /End of input in tag/.freeze
|
11
|
+
MISMATCHED_TAGS = /That tag isn't allowed here/.freeze
|
10
12
|
|
11
13
|
def run
|
12
14
|
@html.errors.each do |error|
|
@@ -24,6 +26,10 @@ class HtmlCheck < ::HTMLProofer::Check
|
|
24
26
|
options[:validation][:report_missing_names]
|
25
27
|
when DOCTYPE_MSG
|
26
28
|
options[:validation][:report_missing_doctype]
|
29
|
+
when EOF_IN_TAG
|
30
|
+
options[:validation][:report_eof_tags]
|
31
|
+
when MISMATCHED_TAGS
|
32
|
+
options[:validation][:report_mismatched_tags]
|
27
33
|
else
|
28
34
|
true
|
29
35
|
end
|
@@ -4,6 +4,8 @@ class LinkCheck < ::HTMLProofer::Check
|
|
4
4
|
include HTMLProofer::Utils
|
5
5
|
|
6
6
|
def missing_href?
|
7
|
+
return blank?(@link.src) if @node.name == 'source'
|
8
|
+
|
7
9
|
blank?(@link.href) && blank?(@link.name) && blank?(@link.id)
|
8
10
|
end
|
9
11
|
|
@@ -12,7 +14,7 @@ class LinkCheck < ::HTMLProofer::Check
|
|
12
14
|
end
|
13
15
|
|
14
16
|
def run
|
15
|
-
@html.css('a, link').each do |node|
|
17
|
+
@html.css('a, link, source').each do |node|
|
16
18
|
@link = create_element(node)
|
17
19
|
line = node.line
|
18
20
|
content = node.to_s
|
@@ -49,23 +51,31 @@ class LinkCheck < ::HTMLProofer::Check
|
|
49
51
|
# curl/Typheous inaccurately return 404s for some links. cc https://git.io/vyCFx
|
50
52
|
next if @link.respond_to?(:rel) && @link.rel == 'dns-prefetch'
|
51
53
|
|
52
|
-
add_to_external_urls(@link.href)
|
54
|
+
add_to_external_urls(@link.href || @link.src)
|
53
55
|
next
|
54
|
-
elsif @link.internal?
|
55
|
-
|
56
|
+
elsif @link.internal?
|
57
|
+
if @link.exists?
|
58
|
+
add_to_internal_urls(@link.href, InternalLink.new(@link, @path, line, content))
|
59
|
+
else
|
60
|
+
add_issue("internally linking to #{@link.href}, which does not exist", line: line, content: content)
|
61
|
+
end
|
56
62
|
end
|
63
|
+
end
|
57
64
|
|
58
|
-
|
59
|
-
|
60
|
-
add_issue("internally linking to a directory #{@link.absolute_path} without trailing slash", line: line, content: content)
|
61
|
-
next
|
62
|
-
end
|
65
|
+
external_urls
|
66
|
+
end
|
63
67
|
|
64
|
-
|
65
|
-
|
68
|
+
def check_internal_link(link, line, content)
|
69
|
+
# does the local directory have a trailing slash?
|
70
|
+
if link.unslashed_directory?(link.absolute_path)
|
71
|
+
add_issue("internally linking to a directory #{link.absolute_path} without trailing slash", line: line, content: content)
|
72
|
+
return false
|
66
73
|
end
|
67
74
|
|
68
|
-
|
75
|
+
# verify the target hash
|
76
|
+
return handle_hash(link, line, content) if link.hash
|
77
|
+
|
78
|
+
true
|
69
79
|
end
|
70
80
|
|
71
81
|
def check_schemes(link, line, content)
|
@@ -94,23 +104,27 @@ class LinkCheck < ::HTMLProofer::Check
|
|
94
104
|
end
|
95
105
|
|
96
106
|
def handle_hash(link, line, content)
|
97
|
-
if link.internal? && !
|
98
|
-
add_issue("linking to internal hash ##{link.hash} that does not exist", line: line, content: content)
|
107
|
+
if link.internal? && !hash_exists?(link.html, link.hash) # rubocop:disable Style/GuardClause
|
108
|
+
return add_issue("linking to internal hash ##{link.hash} that does not exist", line: line, content: content)
|
99
109
|
elsif link.external?
|
100
|
-
external_link_check(link, line, content)
|
110
|
+
return external_link_check(link, line, content)
|
101
111
|
end
|
112
|
+
|
113
|
+
true
|
102
114
|
end
|
103
115
|
|
104
116
|
def external_link_check(link, line, content)
|
105
|
-
if
|
106
|
-
|
117
|
+
if link.exists? # rubocop:disable Style/GuardClause
|
118
|
+
target_html = create_nokogiri(link.absolute_path)
|
119
|
+
return add_issue("linking to #{link.href}, but #{link.hash} does not exist", line: line, content: content) unless hash_exists?(target_html, link.hash)
|
107
120
|
else
|
108
|
-
|
109
|
-
add_issue("linking to #{link.href}, but #{link.hash} does not exist", line: line, content: content) unless hash_check target_html, link.hash
|
121
|
+
return add_issue("trying to find hash of #{link.href}, but #{link.absolute_path} does not exist", line: line, content: content)
|
110
122
|
end
|
123
|
+
|
124
|
+
true
|
111
125
|
end
|
112
126
|
|
113
|
-
def
|
127
|
+
def hash_exists?(html, href_hash)
|
114
128
|
decoded_href_hash = Addressable::URI.unescape(href_hash)
|
115
129
|
fragment_ids = [href_hash, decoded_href_hash]
|
116
130
|
# https://www.w3.org/TR/html5/single-page.html#scroll-to-fragid
|
@@ -148,7 +162,19 @@ class LinkCheck < ::HTMLProofer::Check
|
|
148
162
|
|
149
163
|
class XpathFunctions
|
150
164
|
def case_sensitive_equals(node_set, str_to_match)
|
151
|
-
node_set.find_all { |node| node.to_s
|
165
|
+
node_set.find_all { |node| node.to_s.== str_to_match.to_s }
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
class InternalLink
|
170
|
+
attr_reader :link, :href, :path, :line, :content
|
171
|
+
|
172
|
+
def initialize(link, path, line, content)
|
173
|
+
@link = link
|
174
|
+
@href = @link.href
|
175
|
+
@path = path
|
176
|
+
@line = line
|
177
|
+
@content = content
|
152
178
|
end
|
153
179
|
end
|
154
180
|
end
|
@@ -3,12 +3,12 @@
|
|
3
3
|
class OpenGraphElement < ::HTMLProofer::Element
|
4
4
|
attr_reader :src
|
5
5
|
|
6
|
-
def initialize(obj, check)
|
7
|
-
super(obj, check)
|
6
|
+
def initialize(obj, check, logger)
|
7
|
+
super(obj, check, logger)
|
8
8
|
# Fake up src from the content attribute
|
9
9
|
instance_variable_set('@src', @content)
|
10
10
|
|
11
|
-
@src.insert 0, 'http:' if
|
11
|
+
@src.insert 0, 'http:' if %r{^//}.match?(@src)
|
12
12
|
end
|
13
13
|
end
|
14
14
|
|
@@ -23,7 +23,7 @@ class OpenGraphCheck < ::HTMLProofer::Check
|
|
23
23
|
|
24
24
|
def run
|
25
25
|
@html.css('meta[property="og:url"], meta[property="og:image"]').each do |m|
|
26
|
-
@opengraph = OpenGraphElement.new(m, self)
|
26
|
+
@opengraph = OpenGraphElement.new(m, self, @logger)
|
27
27
|
|
28
28
|
next if @opengraph.ignore?
|
29
29
|
|
@@ -52,7 +52,9 @@ module HTMLProofer
|
|
52
52
|
report_script_embeds: false,
|
53
53
|
report_missing_names: false,
|
54
54
|
report_invalid_tags: false,
|
55
|
-
report_missing_doctype: false
|
55
|
+
report_missing_doctype: false,
|
56
|
+
report_eof_tags: false,
|
57
|
+
report_mismatched_tags: false
|
56
58
|
}.freeze
|
57
59
|
|
58
60
|
CACHE_DEFAULTS = {}.freeze
|
@@ -78,7 +80,7 @@ module HTMLProofer
|
|
78
80
|
begin
|
79
81
|
JSON.parse(config)
|
80
82
|
rescue StandardError
|
81
|
-
raise ArgumentError, "Option '
|
83
|
+
raise ArgumentError, "Option '#{option_name} did not contain valid JSON."
|
82
84
|
end
|
83
85
|
end
|
84
86
|
end
|
data/lib/html-proofer/element.rb
CHANGED
@@ -10,12 +10,18 @@ module HTMLProofer
|
|
10
10
|
|
11
11
|
attr_reader :id, :name, :alt, :href, :link, :src, :line, :data_proofer_ignore
|
12
12
|
|
13
|
-
def initialize(obj, check)
|
13
|
+
def initialize(obj, check, logger)
|
14
|
+
@logger = logger
|
14
15
|
# Construct readable ivars for every element
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
16
|
+
begin
|
17
|
+
obj.attributes.each_pair do |attribute, value|
|
18
|
+
name = attribute.tr('-:.;@', '_').to_s.to_sym
|
19
|
+
(class << self; self; end).send(:attr_reader, name)
|
20
|
+
instance_variable_set("@#{name}", value.value)
|
21
|
+
end
|
22
|
+
rescue NameError => e
|
23
|
+
@logger.log :error, "Attribute set `#{obj}` contains an error!"
|
24
|
+
raise e
|
19
25
|
end
|
20
26
|
|
21
27
|
@aria_hidden = defined?(@aria_hidden) && @aria_hidden == 'true' ? true : false
|
@@ -36,19 +42,19 @@ module HTMLProofer
|
|
36
42
|
|
37
43
|
# fix up missing protocols
|
38
44
|
if defined?(@href)
|
39
|
-
@href.insert(0, 'http:') if
|
45
|
+
@href.insert(0, 'http:') if %r{^//}.match?(@href)
|
40
46
|
else
|
41
47
|
@href = nil
|
42
48
|
end
|
43
49
|
|
44
50
|
if defined?(@src)
|
45
|
-
@src.insert(0, 'http:') if
|
51
|
+
@src.insert(0, 'http:') if %r{^//}.match?(@src)
|
46
52
|
else
|
47
53
|
@src = nil
|
48
54
|
end
|
49
55
|
|
50
56
|
if defined?(@srcset)
|
51
|
-
@srcset.insert(0, 'http:') if
|
57
|
+
@srcset.insert(0, 'http:') if %r{^//}.match?(@srcset)
|
52
58
|
else
|
53
59
|
@srcset = nil
|
54
60
|
end
|
@@ -99,12 +105,10 @@ module HTMLProofer
|
|
99
105
|
return true if @data_proofer_ignore
|
100
106
|
return true if @parent_ignorable
|
101
107
|
|
102
|
-
return true if
|
108
|
+
return true if /^javascript:/.match?(url)
|
103
109
|
|
104
110
|
# ignore base64 encoded images
|
105
|
-
if %w[ImageCheck FaviconCheck].include?
|
106
|
-
return true if url =~ /^data:image/
|
107
|
-
end
|
111
|
+
return true if %w[ImageCheck FaviconCheck].include?(@type) && /^data:image/.match?(url)
|
108
112
|
|
109
113
|
# ignore user defined URLs
|
110
114
|
return true if ignores_pattern_check(@check.options[:url_ignore])
|
@@ -165,6 +169,10 @@ module HTMLProofer
|
|
165
169
|
url.start_with?('?')
|
166
170
|
end
|
167
171
|
|
172
|
+
def absolute_path?(path)
|
173
|
+
path.start_with?('/')
|
174
|
+
end
|
175
|
+
|
168
176
|
def file_path
|
169
177
|
return if path.nil? || path.empty?
|
170
178
|
|
@@ -172,22 +180,16 @@ module HTMLProofer
|
|
172
180
|
|
173
181
|
path_dot_ext = path + @check.options[:extension] if @check.options[:assume_extension]
|
174
182
|
|
175
|
-
if path
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
base = File.dirname @check.path
|
186
|
-
else # relative link, path is a directory
|
187
|
-
base = @check.path
|
188
|
-
end
|
189
|
-
|
190
|
-
file = File.join base, path
|
183
|
+
base = if absolute_path?(path) # path relative to root
|
184
|
+
@check.options[:root_dir] || File.dirname(@check.src)
|
185
|
+
elsif File.exist?(File.expand_path(path, @check.src)) || File.exist?(File.expand_path(path_dot_ext, @check.src)) # relative links, path is a file
|
186
|
+
File.dirname(@check.path)
|
187
|
+
elsif File.exist?(File.join(File.dirname(@check.path), path)) || File.exist?(File.join(File.dirname(@check.path), path_dot_ext)) # rubocop:disable Lint/DuplicateBranch; relative links in nested dir, path is a file
|
188
|
+
File.dirname(@check.path)
|
189
|
+
else # relative link, path is a directory
|
190
|
+
@check.path
|
191
|
+
end
|
192
|
+
file = File.join(base, path)
|
191
193
|
if @check.options[:assume_extension] && File.file?("#{file}#{@check.options[:extension]}")
|
192
194
|
file = "#{file}#{@check.options[:extension]}"
|
193
195
|
elsif File.directory?(file) && !unslashed_directory?(file) # implicit index support
|
@@ -199,22 +201,24 @@ module HTMLProofer
|
|
199
201
|
|
200
202
|
# checks if a file exists relative to the current pwd
|
201
203
|
def exists?
|
202
|
-
return @checked_paths[absolute_path] if @checked_paths.key?
|
204
|
+
return @checked_paths[absolute_path] if @checked_paths.key?(absolute_path)
|
203
205
|
|
204
|
-
@checked_paths[absolute_path] = File.exist?
|
206
|
+
@checked_paths[absolute_path] = File.exist?(absolute_path)
|
205
207
|
end
|
206
208
|
|
207
209
|
def absolute_path
|
208
210
|
path = file_path || @check.path
|
209
|
-
|
211
|
+
|
212
|
+
File.expand_path(path, Dir.pwd)
|
210
213
|
end
|
211
214
|
|
212
215
|
def ignores_pattern_check(links)
|
213
216
|
links.each do |ignore|
|
214
|
-
|
217
|
+
case ignore
|
218
|
+
when String
|
215
219
|
return true if ignore == url
|
216
|
-
|
217
|
-
return true if ignore
|
220
|
+
when Regexp
|
221
|
+
return true if ignore&.match?(url)
|
218
222
|
end
|
219
223
|
end
|
220
224
|
|
@@ -6,6 +6,7 @@ module HTMLProofer
|
|
6
6
|
|
7
7
|
class InvalidHtmlError < StandardError
|
8
8
|
def initialize(failures)
|
9
|
+
super
|
9
10
|
@failures = failures
|
10
11
|
end
|
11
12
|
|
@@ -21,7 +22,8 @@ module HTMLProofer
|
|
21
22
|
allow_hash_href: true,
|
22
23
|
check_external_hash: true,
|
23
24
|
check_html: true,
|
24
|
-
url_ignore: [/.*/] # Don't try to check local files exist
|
25
|
+
url_ignore: [/.*/], # Don't try to check if local files exist
|
26
|
+
validation: { report_eof_tags: true }
|
25
27
|
}
|
26
28
|
end
|
27
29
|
|
@@ -52,7 +54,7 @@ module HTMLProofer
|
|
52
54
|
def call(env)
|
53
55
|
result = @app.call(env)
|
54
56
|
return result if env['REQUEST_METHOD'] != 'GET'
|
55
|
-
return result if env['QUERY_STRING']
|
57
|
+
return result if /proofer-ignore/.match?(env['QUERY_STRING'])
|
56
58
|
return result if result.first != 200
|
57
59
|
|
58
60
|
body = []
|
data/lib/html-proofer/runner.rb
CHANGED
@@ -4,7 +4,7 @@ module HTMLProofer
|
|
4
4
|
class Runner
|
5
5
|
include HTMLProofer::Utils
|
6
6
|
|
7
|
-
attr_reader :options, :external_urls, :failures
|
7
|
+
attr_reader :options, :internal_urls, :external_urls, :failures
|
8
8
|
|
9
9
|
def initialize(src, opts = {})
|
10
10
|
@src = src
|
@@ -20,6 +20,8 @@ module HTMLProofer
|
|
20
20
|
|
21
21
|
@type = @options.delete(:type)
|
22
22
|
@logger = HTMLProofer::Log.new(@options[:log_level])
|
23
|
+
@cache = Cache.new(@logger, @options[:cache])
|
24
|
+
@internal_link_checks = nil
|
23
25
|
|
24
26
|
# Add swap patterns for internal domains
|
25
27
|
unless @options[:internal_domains].empty?
|
@@ -30,7 +32,11 @@ module HTMLProofer
|
|
30
32
|
end
|
31
33
|
end
|
32
34
|
|
35
|
+
@internal_urls = {}
|
36
|
+
@internal_urls_to_paths = {}
|
37
|
+
@external_urls = {}
|
33
38
|
@failures = []
|
39
|
+
@before_request = []
|
34
40
|
end
|
35
41
|
|
36
42
|
def run
|
@@ -58,15 +64,13 @@ module HTMLProofer
|
|
58
64
|
end
|
59
65
|
end
|
60
66
|
@external_urls = Hash[*@src.map { |s| [s, nil] }.flatten]
|
61
|
-
|
67
|
+
validate_external_urls
|
62
68
|
end
|
63
69
|
|
64
70
|
# Collects any external URLs found in a directory of files. Also collectes
|
65
71
|
# every failed test from process_files.
|
66
72
|
# Sends the external URLs to Typhoeus for batch processing.
|
67
73
|
def check_files
|
68
|
-
@external_urls = {}
|
69
|
-
|
70
74
|
process_files.each do |item|
|
71
75
|
@external_urls.merge!(item[:external_urls])
|
72
76
|
@failures.concat(item[:failures])
|
@@ -77,9 +81,12 @@ module HTMLProofer
|
|
77
81
|
# just not run those other checks at all.
|
78
82
|
if @options[:external_only]
|
79
83
|
@failures = []
|
80
|
-
|
84
|
+
validate_external_urls
|
81
85
|
elsif !@options[:disable_external]
|
82
|
-
|
86
|
+
validate_external_urls
|
87
|
+
validate_internal_urls
|
88
|
+
else
|
89
|
+
validate_internal_urls
|
83
90
|
end
|
84
91
|
end
|
85
92
|
|
@@ -100,8 +107,21 @@ module HTMLProofer
|
|
100
107
|
@src.each do |src|
|
101
108
|
checks.each do |klass|
|
102
109
|
@logger.log :debug, "Checking #{klass.to_s.downcase} on #{path} ..."
|
103
|
-
check = Object.const_get(klass).new(src, path, html, @options)
|
110
|
+
check = Object.const_get(klass).new(src, path, html, @logger, @cache, @options)
|
104
111
|
check.run
|
112
|
+
|
113
|
+
if klass == 'LinkCheck'
|
114
|
+
@internal_link_checks = check
|
115
|
+
check.internal_urls.each_pair do |url, internal_urls|
|
116
|
+
if @internal_urls_to_paths[url]
|
117
|
+
@internal_urls_to_paths[url].concat(internal_urls.map(&:path))
|
118
|
+
else
|
119
|
+
@internal_urls_to_paths[url] = internal_urls.map(&:path)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
@internal_urls.merge!(check.internal_urls)
|
123
|
+
end
|
124
|
+
|
105
125
|
external_urls = check.external_urls
|
106
126
|
external_urls = Hash[check.external_urls.map { |url, file| [swap(url, @options[:url_swap]), file] }] if @options[:url_swap]
|
107
127
|
result[:external_urls].merge!(external_urls)
|
@@ -112,15 +132,35 @@ module HTMLProofer
|
|
112
132
|
end
|
113
133
|
|
114
134
|
def check_path(path)
|
115
|
-
check_parsed
|
135
|
+
check_parsed(create_nokogiri(path), path)
|
116
136
|
end
|
117
137
|
|
118
|
-
def
|
119
|
-
url_validator = HTMLProofer::UrlValidator.new(@logger, @external_urls, @options)
|
138
|
+
def validate_external_urls
|
139
|
+
url_validator = HTMLProofer::UrlValidator.new(@logger, @cache, @external_urls, @options)
|
140
|
+
url_validator.before_request = @before_request
|
120
141
|
@failures.concat(url_validator.run)
|
121
142
|
@external_urls = url_validator.external_urls
|
122
143
|
end
|
123
144
|
|
145
|
+
def validate_internal_urls
|
146
|
+
if @cache.use_cache?
|
147
|
+
urls_to_check = load_internal_cache
|
148
|
+
|
149
|
+
urls_to_check.each_pair do |url, internal_urls|
|
150
|
+
result = @internal_link_checks.check_internal_link(internal_urls.first.link, internal_urls.first.line, internal_urls.first.content)
|
151
|
+
code = result ? 200 : 404
|
152
|
+
@cache.add(url, @internal_urls_to_paths[url].sort, code, '') # TODO: blank msg for now
|
153
|
+
end
|
154
|
+
@cache.write
|
155
|
+
else
|
156
|
+
@internal_urls.values.flatten.each do |internal_url|
|
157
|
+
@internal_link_checks.check_internal_link(internal_url.link, internal_url.line, internal_url.content)
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
@failures.concat(@internal_link_checks.issues) unless @internal_urls.length.zero?
|
162
|
+
end
|
163
|
+
|
124
164
|
def files
|
125
165
|
@files ||= if @type == :directory
|
126
166
|
@src.map do |src|
|
@@ -147,6 +187,8 @@ module HTMLProofer
|
|
147
187
|
def checks
|
148
188
|
return @checks if defined?(@checks) && !@checks.nil?
|
149
189
|
|
190
|
+
return (@checks = ['LinkCheck']) if @type == :links
|
191
|
+
|
150
192
|
@checks = HTMLProofer::Check.subchecks.map(&:name)
|
151
193
|
@checks.delete('FaviconCheck') unless @options[:check_favicon]
|
152
194
|
@checks.delete('HtmlCheck') unless @options[:check_html]
|
@@ -171,5 +213,29 @@ module HTMLProofer
|
|
171
213
|
failure_text = pluralize(count, 'failure', 'failures')
|
172
214
|
raise @logger.colorize :fatal, "HTML-Proofer found #{failure_text}!"
|
173
215
|
end
|
216
|
+
|
217
|
+
# Set before_request callback.
|
218
|
+
#
|
219
|
+
# @example Set before_request.
|
220
|
+
# request.before_request { |request| p "yay" }
|
221
|
+
#
|
222
|
+
# @param [ Block ] block The block to execute.
|
223
|
+
#
|
224
|
+
# @yield [ Typhoeus::Request ]
|
225
|
+
#
|
226
|
+
# @return [ Array<Block> ] All before_request blocks.
|
227
|
+
def before_request(&block)
|
228
|
+
@before_request ||= []
|
229
|
+
@before_request << block if block
|
230
|
+
@before_request
|
231
|
+
end
|
232
|
+
|
233
|
+
def load_internal_cache
|
234
|
+
urls_to_check = @cache.retrieve_urls(@internal_urls)
|
235
|
+
cache_text = pluralize(urls_to_check.count, 'internal link', 'internal links')
|
236
|
+
@logger.log :info, "Found #{cache_text} in the cache..."
|
237
|
+
|
238
|
+
urls_to_check
|
239
|
+
end
|
174
240
|
end
|
175
241
|
end
|
@@ -10,21 +10,23 @@ module HTMLProofer
|
|
10
10
|
include HTMLProofer::Utils
|
11
11
|
|
12
12
|
attr_reader :external_urls
|
13
|
+
attr_writer :before_request
|
13
14
|
|
14
|
-
def initialize(logger, external_urls, options)
|
15
|
+
def initialize(logger, cache, external_urls, options)
|
15
16
|
@logger = logger
|
16
17
|
@external_urls = external_urls
|
17
18
|
@failed_tests = []
|
18
19
|
@options = options
|
19
20
|
@hydra = Typhoeus::Hydra.new(@options[:hydra])
|
20
|
-
@cache =
|
21
|
+
@cache = cache
|
22
|
+
@before_request = []
|
21
23
|
end
|
22
24
|
|
23
25
|
def run
|
24
26
|
@external_urls = remove_query_values
|
25
27
|
|
26
28
|
if @cache.use_cache?
|
27
|
-
urls_to_check =
|
29
|
+
urls_to_check = @cache.retrieve_urls(@external_urls)
|
28
30
|
external_link_checker(urls_to_check)
|
29
31
|
@cache.write
|
30
32
|
else
|
@@ -41,11 +43,11 @@ module HTMLProofer
|
|
41
43
|
iterable_external_urls = @external_urls.dup
|
42
44
|
@external_urls.each_key do |url|
|
43
45
|
uri = begin
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
46
|
+
Addressable::URI.parse(url)
|
47
|
+
rescue URI::Error, Addressable::URI::InvalidURIError
|
48
|
+
@logger.log :error, "#{url} is an invalid URL"
|
49
|
+
nil
|
50
|
+
end
|
49
51
|
next if uri.nil? || uri.query.nil?
|
50
52
|
|
51
53
|
iterable_external_urls.delete(url) unless new_url_query_values?(uri, paths_with_queries)
|
@@ -72,15 +74,6 @@ module HTMLProofer
|
|
72
74
|
uri.host + uri.path
|
73
75
|
end
|
74
76
|
|
75
|
-
def load_cache
|
76
|
-
cache_count = @cache.size
|
77
|
-
cache_text = pluralize(cache_count, 'link', 'links')
|
78
|
-
|
79
|
-
@logger.log :info, "Found #{cache_text} in the cache..."
|
80
|
-
|
81
|
-
@cache.retrieve_urls(@external_urls)
|
82
|
-
end
|
83
|
-
|
84
77
|
# Proofer runs faster if we pull out all the external URLs and run the checks
|
85
78
|
# at the end. Otherwise, we're halting the consuming process for every file during
|
86
79
|
# `process_files`.
|
@@ -109,11 +102,11 @@ module HTMLProofer
|
|
109
102
|
def establish_queue(external_urls)
|
110
103
|
external_urls.each_pair do |url, filenames|
|
111
104
|
url = begin
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
105
|
+
clean_url(url)
|
106
|
+
rescue URI::Error, Addressable::URI::InvalidURIError
|
107
|
+
add_external_issue(filenames, "#{url} is an invalid URL")
|
108
|
+
next
|
109
|
+
end
|
117
110
|
|
118
111
|
method = if hash?(url) && @options[:check_external_hash]
|
119
112
|
:get
|
@@ -127,16 +120,19 @@ module HTMLProofer
|
|
127
120
|
def clean_url(href)
|
128
121
|
# catch any obvious issues, like strings in port numbers
|
129
122
|
parsed = Addressable::URI.parse(href)
|
130
|
-
if href
|
131
|
-
parsed.normalize
|
132
|
-
else
|
123
|
+
if href =~ /^([!#{$&}-;=?-\[\]_a-z~]|%[0-9a-fA-F]{2})+$/
|
133
124
|
href
|
125
|
+
else
|
126
|
+
parsed.normalize
|
134
127
|
end
|
135
128
|
end
|
136
129
|
|
137
130
|
def queue_request(method, href, filenames)
|
138
131
|
opts = @options[:typhoeus].merge(method: method)
|
139
132
|
request = Typhoeus::Request.new(href, opts)
|
133
|
+
@before_request.each do |callback|
|
134
|
+
callback.call(request)
|
135
|
+
end
|
140
136
|
request.on_complete { |response| response_handler(response, filenames) }
|
141
137
|
@hydra.queue request
|
142
138
|
end
|
data/lib/html-proofer/utils.rb
CHANGED
@@ -5,7 +5,7 @@ require 'nokogumbo'
|
|
5
5
|
module HTMLProofer
|
6
6
|
module Utils
|
7
7
|
def pluralize(count, single, plural)
|
8
|
-
"#{count} #{
|
8
|
+
"#{count} #{count == 1 ? single : plural}"
|
9
9
|
end
|
10
10
|
|
11
11
|
def create_nokogiri(path)
|
@@ -15,7 +15,7 @@ module HTMLProofer
|
|
15
15
|
path
|
16
16
|
end
|
17
17
|
|
18
|
-
Nokogiri::HTML5(content)
|
18
|
+
Nokogiri::HTML5(content, max_errors: -1)
|
19
19
|
end
|
20
20
|
|
21
21
|
def swap(href, replacement)
|
data/lib/html-proofer/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html-proofer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.17.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Garen Torikian
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-11-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|
@@ -293,23 +293,23 @@ homepage: https://github.com/gjtorikian/html-proofer
|
|
293
293
|
licenses:
|
294
294
|
- MIT
|
295
295
|
metadata: {}
|
296
|
-
post_install_message:
|
296
|
+
post_install_message:
|
297
297
|
rdoc_options: []
|
298
298
|
require_paths:
|
299
299
|
- lib
|
300
300
|
required_ruby_version: !ruby/object:Gem::Requirement
|
301
301
|
requirements:
|
302
|
-
- - "
|
302
|
+
- - "~>"
|
303
303
|
- !ruby/object:Gem::Version
|
304
|
-
version: '
|
304
|
+
version: '2.4'
|
305
305
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
306
306
|
requirements:
|
307
307
|
- - ">="
|
308
308
|
- !ruby/object:Gem::Version
|
309
309
|
version: '0'
|
310
310
|
requirements: []
|
311
|
-
rubygems_version: 3.
|
312
|
-
signing_key:
|
311
|
+
rubygems_version: 3.1.2
|
312
|
+
signing_key:
|
313
313
|
specification_version: 4
|
314
314
|
summary: A set of tests to validate your HTML output. These tests check if your image
|
315
315
|
references are legitimate, if they have alt tags, if your internal links are working,
|