html-proofer 4.1.0 → 4.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/htmlproofer +19 -10
- data/lib/html_proofer/attribute/url.rb +9 -2
- data/lib/html_proofer/cache.rb +42 -29
- data/lib/html_proofer/check/images.rb +2 -12
- data/lib/html_proofer/configuration.rb +1 -0
- data/lib/html_proofer/runner.rb +0 -2
- data/lib/html_proofer/url_validator/external.rb +10 -11
- data/lib/html_proofer/url_validator/internal.rb +7 -6
- data/lib/html_proofer/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 285bb041bc95bc47d55de09a492e5d802a4c2a5dadf539bb7742f0d8138e2e23
|
4
|
+
data.tar.gz: dddc7a66aab15685917621d28600133b4824ce0c27ad204d7750252f717fe33e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bd983c2df18c4def1ded8f6caadc5917142d34021b9e873091a7040bd596b8efc05cee56b3fb3330ceab441f601d7c305e6b7e28a821fbb8d20ee7a85ccbf7a4
|
7
|
+
data.tar.gz: 966a38245a5bb725838b5c63214b5abc133c53e90fb927118b64a4da2a383d3c993e32994fd7eff0df473e15e8d23788737d716b00c8931977e82acb33114e58
|
data/bin/htmlproofer
CHANGED
@@ -15,21 +15,22 @@ Mercenary.program(:htmlproofer) do |p|
|
|
15
15
|
|
16
16
|
p.description 'Runs the HTML-Proofer suite on the files in PATH. For more details, see the README.'
|
17
17
|
|
18
|
-
p.option 'allow_hash_href', '--allow-hash-href', 'If `true`, assumes `href="#"` anchors are valid'
|
19
|
-
p.option 'allow_missing_href', '--allow-missing-href', 'If `true`, does not flag `a` tags missing `href`. In HTML5, this is technically allowed, but could also be human error.'
|
18
|
+
p.option 'allow_hash_href', '--allow-hash-href=<true|false>', 'String', 'If `true`, assumes `href="#"` anchors are valid (default: `true`)'
|
19
|
+
p.option 'allow_missing_href', '--allow-missing-href=<true|false>', 'String', 'If `true`, does not flag `a` tags missing `href`. In HTML5, this is technically allowed, but could also be human error. (default: `false`)'
|
20
20
|
p.option 'as_links', '--as-links', 'Assumes that `PATH` is a comma-separated array of links to check.'
|
21
21
|
p.option 'assume_extension', '--assume-extension <ext>', 'Automatically add specified extension to files for internal links, to allow extensionless URLs (as supported by most servers) (default: `.html`).'
|
22
22
|
p.option 'checks', '--checks check1,[check2,...]', Array, 'A comma-separated list of Strings indicating which checks you want to run (default: `["Links", "Images", "Scripts"]`)'
|
23
|
-
p.option 'check_external_hash', '--check-external-hash', 'Checks whether external hashes exist (even if the webpage exists) (default: `true`).'
|
24
|
-
p.option '
|
23
|
+
p.option 'check_external_hash', '--check-external-hash=<true|false>', 'String', 'Checks whether external hashes exist (even if the webpage exists) (default: `true`).'
|
24
|
+
p.option 'check_internal_hash', '--check-internal-hash=<true|false>', 'String', 'Checks whether internal hashes exist (even if the webpage exists) (default: `true`).'
|
25
|
+
p.option 'check_sri', '--check-sri=<true|false>', 'String', 'Check that `<link>` and `<script>` external resources use SRI (default: `false`).'
|
25
26
|
p.option 'directory_index_file', '--directory-index-file <filename>', String, 'Sets the file to look for when a link refers to a directory. (default: `index.html`)'
|
26
|
-
p.option 'disable_external', '--disable-external', 'If `true`, does not run the external link checker (default: `false`)'
|
27
|
-
p.option 'enforce_https', '--enforce-https
|
27
|
+
p.option 'disable_external', '--disable-external=<true|false>', String, 'If `true`, does not run the external link checker (default: `false`)'
|
28
|
+
p.option 'enforce_https', '--enforce-https=<true|false>', String, 'Fails a link if it\'s not marked as `https` (default: `true`).'
|
28
29
|
p.option 'extensions', '--extensions ext1,[ext2,...[', Array, 'A comma-separated list of Strings indicating the file extensions you would like to check (including the dot) (default: `.html`)'
|
29
|
-
p.option 'ignore_empty_alt', '--ignore-empty-alt', ' If `true`, ignores images with empty/missing alt tags (in other words, `<img alt>` and `<img alt="">` are valid; set this to `false` to flag those)'
|
30
|
+
p.option 'ignore_empty_alt', '--ignore-empty-alt=<true|false>', 'String', 'If `true`, ignores images with empty/missing alt tags (in other words, `<img alt>` and `<img alt="">` are valid; set this to `false` to flag those) (default: `true`)'
|
31
|
+
p.option 'ignore_empty_mailto', '--ignore-empty-mailto=<true|false>', 'String', 'If `true`, allows `mailto:` `href`s which do not contain an email address (default: `false`)'
|
30
32
|
p.option 'ignore_files', '--ignore-files file1,[file2,...]', Array, 'A comma-separated list of Strings or RegExps containing file paths that are safe to ignore'
|
31
|
-
p.option '
|
32
|
-
p.option 'ignore_missing_alt', '--ignore-missing-alt', 'If `true`, ignores images with missing alt tags'
|
33
|
+
p.option 'ignore_missing_alt', '--ignore-missing-alt=<true|false>', 'String', 'If `true`, ignores images with missing alt tags (default: `false`)'
|
33
34
|
p.option 'ignore_status_codes', '--ignore-status-codes 123,[xxx, ...]', Array, 'A comma-separated list of numbers representing status codes to ignore.'
|
34
35
|
p.option 'ignore_urls', '--ignore-urls link1,[link2,...]', Array, 'A comma-separated list of Strings or RegExps containing URLs that are safe to ignore. This affects all HTML attributes, such as `alt` tags on images.'
|
35
36
|
p.option 'log_level', '--log-level <level>', String, 'Sets the logging level, as determined by Yell. One of `:debug`, `:info`, `:warn`, `:error`, or `:fatal`. (default: `:info`)'
|
@@ -67,7 +68,15 @@ Mercenary.program(:htmlproofer) do |p|
|
|
67
68
|
end
|
68
69
|
end
|
69
70
|
|
70
|
-
|
71
|
+
# check booleans
|
72
|
+
[:allow_hash_href, :allow_missing_href, :check_external_hash, :check_internal_hash, :check_sri, :disable_external, :enforce_https, :ignore_empty_alt, :ignore_empty_mailto, :ignore_missing_alt].each do |option|
|
73
|
+
next if (val = opts[option.to_s]).nil?
|
74
|
+
if val == "false"
|
75
|
+
options[option] = false
|
76
|
+
else
|
77
|
+
options[option] = true
|
78
|
+
end
|
79
|
+
end
|
71
80
|
|
72
81
|
options[:log_level] = opts['log_level'].to_sym unless opts['log_level'].nil?
|
73
82
|
|
@@ -3,18 +3,20 @@
|
|
3
3
|
module HTMLProofer
|
4
4
|
class Attribute
|
5
5
|
class Url < HTMLProofer::Attribute
|
6
|
-
attr_reader :url
|
6
|
+
attr_reader :url, :size
|
7
7
|
|
8
8
|
REMOTE_SCHEMES = ["http", "https"].freeze
|
9
9
|
|
10
|
-
def initialize(runner, link_attribute, base_url: nil)
|
10
|
+
def initialize(runner, link_attribute, base_url: nil, extract_size: false)
|
11
11
|
super
|
12
12
|
|
13
13
|
if @raw_attribute.nil?
|
14
14
|
@url = nil
|
15
15
|
else
|
16
16
|
@url = @raw_attribute.delete("\u200b").strip
|
17
|
+
@url, @size = @url.split(/\s+/) if extract_size
|
17
18
|
@url = Addressable::URI.join(base_url, @url).to_s unless blank?(base_url)
|
19
|
+
@url = "" if @url.nil?
|
18
20
|
|
19
21
|
swap_urls!
|
20
22
|
clean_url!
|
@@ -30,6 +32,7 @@ module HTMLProofer
|
|
30
32
|
|
31
33
|
def known_extension?
|
32
34
|
return true if hash_link?
|
35
|
+
return true if path.end_with?("/")
|
33
36
|
|
34
37
|
ext = File.extname(path)
|
35
38
|
|
@@ -201,6 +204,10 @@ module HTMLProofer
|
|
201
204
|
url.start_with?("#")
|
202
205
|
end
|
203
206
|
|
207
|
+
def has_hash?
|
208
|
+
url.include?("#")
|
209
|
+
end
|
210
|
+
|
204
211
|
def param_link?
|
205
212
|
url.start_with?("?")
|
206
213
|
end
|
data/lib/html_proofer/cache.rb
CHANGED
@@ -26,12 +26,17 @@ module HTMLProofer
|
|
26
26
|
|
27
27
|
if blank?(options)
|
28
28
|
define_singleton_method(:enabled?) { false }
|
29
|
+
define_singleton_method(:external_enabled?) { false }
|
30
|
+
define_singleton_method(:internal_enabled?) { false }
|
29
31
|
else
|
32
|
+
# we still consider the cache as enabled, regardless of the specic timeframes
|
30
33
|
define_singleton_method(:enabled?) { true }
|
31
34
|
setup_cache!(options)
|
32
35
|
|
33
36
|
@external_timeframe = parsed_timeframe(options[:timeframe][:external])
|
37
|
+
define_singleton_method(:external_enabled?) { !@external_timeframe.nil? }
|
34
38
|
@internal_timeframe = parsed_timeframe(options[:timeframe][:internal])
|
39
|
+
define_singleton_method(:internal_enabled?) { !@internal_timeframe.nil? }
|
35
40
|
end
|
36
41
|
end
|
37
42
|
|
@@ -55,17 +60,15 @@ module HTMLProofer
|
|
55
60
|
end
|
56
61
|
|
57
62
|
def add_internal(url, metadata, found)
|
58
|
-
return unless
|
63
|
+
return unless internal_enabled?
|
59
64
|
|
60
65
|
@cache_log[:internal][url] = { time: @cache_time, metadata: [] } if @cache_log[:internal][url].nil?
|
61
66
|
|
62
67
|
@cache_log[:internal][url][:metadata] << construct_internal_link_metadata(metadata, found)
|
63
68
|
end
|
64
69
|
|
65
|
-
def add_external(url, filenames, status_code, msg)
|
66
|
-
return unless
|
67
|
-
|
68
|
-
found = status_code.between?(200, 299)
|
70
|
+
def add_external(url, filenames, status_code, msg, found)
|
71
|
+
return unless external_enabled?
|
69
72
|
|
70
73
|
clean_url = cleaned_url(url)
|
71
74
|
@cache_log[:external][clean_url] =
|
@@ -73,10 +76,10 @@ module HTMLProofer
|
|
73
76
|
end
|
74
77
|
|
75
78
|
def detect_url_changes(urls_detected, type)
|
76
|
-
additions = determine_additions(urls_detected, type)
|
77
|
-
|
78
79
|
determine_deletions(urls_detected, type)
|
79
80
|
|
81
|
+
additions = determine_additions(urls_detected, type)
|
82
|
+
|
80
83
|
additions
|
81
84
|
end
|
82
85
|
|
@@ -96,13 +99,6 @@ module HTMLProofer
|
|
96
99
|
|
97
100
|
urls_to_check = detect_url_changes(urls_detected, type)
|
98
101
|
|
99
|
-
@cache_log[type].each_pair do |url, cache|
|
100
|
-
within_timeframe = type == :external ? within_external_timeframe?(cache[:time]) : within_internal_timeframe?(cache[:time])
|
101
|
-
next if within_timeframe
|
102
|
-
|
103
|
-
urls_to_check[url] = cache[:metadata] # recheck expired links
|
104
|
-
end
|
105
|
-
|
106
102
|
urls_to_check
|
107
103
|
end
|
108
104
|
|
@@ -146,7 +142,11 @@ module HTMLProofer
|
|
146
142
|
private def determine_external_additions(urls_detected)
|
147
143
|
urls_detected.reject do |url, _metadata|
|
148
144
|
if @cache_log[:external].include?(url)
|
149
|
-
@cache_log[:external][url][:found] # if this is false, we're trying again
|
145
|
+
found = @cache_log[:external][url][:found] # if this is false, we're trying again
|
146
|
+
unless found
|
147
|
+
@logger.log(:debug, "Adding #{url} to external cache (not found)")
|
148
|
+
end
|
149
|
+
found
|
150
150
|
else
|
151
151
|
@logger.log(:debug, "Adding #{url} to external cache")
|
152
152
|
false
|
@@ -155,28 +155,36 @@ module HTMLProofer
|
|
155
155
|
end
|
156
156
|
|
157
157
|
private def determine_internal_additions(urls_detected)
|
158
|
-
urls_detected.each_with_object({}) do |(url,
|
158
|
+
urls_detected.each_with_object({}) do |(url, detected_metadata), hsh|
|
159
159
|
# url is not even in cache
|
160
160
|
if @cache_log[:internal][url].nil?
|
161
|
-
|
161
|
+
@logger.log(:debug, "Adding #{url} to internal cache")
|
162
|
+
hsh[url] = detected_metadata
|
162
163
|
next
|
163
164
|
end
|
164
165
|
|
166
|
+
# detect metadata additions
|
167
|
+
# NOTE: the time-stamp for the whole url key will not be updated,
|
168
|
+
# so that it reflects the earliest time any of the metadata was checked
|
165
169
|
cache_metadata = @cache_log[:internal][url][:metadata]
|
166
|
-
|
167
|
-
existing_cache_metadata = cache_metadata.find { |
|
168
|
-
|
170
|
+
metadata_additions = detected_metadata.reject do |detected|
|
171
|
+
existing_cache_metadata = cache_metadata.find { |cached, _| cached[:filename] == detected[:filename] }
|
169
172
|
# cache for this url, from an existing path, exists as found
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
+
found = !existing_cache_metadata.nil? && !existing_cache_metadata.empty? && existing_cache_metadata[:found]
|
174
|
+
unless found
|
175
|
+
@logger.log(:debug, "Adding #{detected} to internal cache for #{url}")
|
173
176
|
end
|
177
|
+
found
|
178
|
+
end
|
174
179
|
|
175
|
-
|
176
|
-
|
180
|
+
if metadata_additions.empty?
|
181
|
+
next
|
177
182
|
end
|
178
183
|
|
179
|
-
hsh[url] =
|
184
|
+
hsh[url] = metadata_additions
|
185
|
+
# remove from the cache the detected metadata additions as they correspond to failures to be rechecked
|
186
|
+
# (this works assuming the detected url metadata have "found" set to false)
|
187
|
+
@cache_log[:internal][url][:metadata] = cache_metadata.difference(metadata_additions)
|
180
188
|
end
|
181
189
|
end
|
182
190
|
|
@@ -184,11 +192,16 @@ module HTMLProofer
|
|
184
192
|
private def determine_deletions(urls_detected, type)
|
185
193
|
deletions = 0
|
186
194
|
|
187
|
-
@cache_log[type].delete_if do |url,
|
188
|
-
|
195
|
+
@cache_log[type].delete_if do |url, cache|
|
196
|
+
expired_timeframe = type == :external ? !within_external_timeframe?(cache[:time]) : !within_internal_timeframe?(cache[:time])
|
197
|
+
if expired_timeframe
|
198
|
+
@logger.log(:debug, "Removing #{url} from #{type} cache (expired timeframe)")
|
199
|
+
deletions += 1
|
200
|
+
true
|
201
|
+
elsif urls_detected.include?(url)
|
189
202
|
false
|
190
203
|
elsif url_matches_type?(url, type)
|
191
|
-
@logger.log(:debug, "Removing #{url} from #{type} cache")
|
204
|
+
@logger.log(:debug, "Removing #{url} from #{type} cache (not detected anymore)")
|
192
205
|
deletions += 1
|
193
206
|
true
|
194
207
|
end
|
@@ -23,19 +23,9 @@ module HTMLProofer
|
|
23
23
|
elsif !@img.url.exists? && !@img.multiple_srcsets? && !@img.multiple_sizes?
|
24
24
|
add_failure("internal image #{@img.url.raw_attribute} does not exist", line: @img.line,
|
25
25
|
content: @img.content)
|
26
|
-
elsif @img.multiple_srcsets?
|
27
|
-
@img.srcsets.each do |srcset|
|
28
|
-
srcset_url = HTMLProofer::Attribute::Url.new(@runner, srcset, base_url: @img.base_url)
|
29
|
-
|
30
|
-
if srcset_url.remote?
|
31
|
-
add_to_external_urls(srcset_url.url, @img.line)
|
32
|
-
elsif !srcset_url.exists?
|
33
|
-
add_failure("internal image #{srcset} does not exist", line: @img.line, content: @img.content)
|
34
|
-
end
|
35
|
-
end
|
36
|
-
elsif @img.multiple_sizes?
|
26
|
+
elsif @img.multiple_srcsets? || @img.multiple_sizes?
|
37
27
|
@img.srcsets_wo_sizes.each do |srcset|
|
38
|
-
srcset_url = HTMLProofer::Attribute::Url.new(@runner, srcset, base_url: @img.base_url)
|
28
|
+
srcset_url = HTMLProofer::Attribute::Url.new(@runner, srcset, base_url: @img.base_url, extract_size: true)
|
39
29
|
|
40
30
|
if srcset_url.remote?
|
41
31
|
add_to_external_urls(srcset_url.url, @img.line)
|
data/lib/html_proofer/runner.rb
CHANGED
@@ -231,8 +231,6 @@ module HTMLProofer
|
|
231
231
|
@logger.log(:debug, "Found #{cache_text} in the cache")
|
232
232
|
|
233
233
|
urls_to_check = @cache.retrieve_urls(ivar, type)
|
234
|
-
urls_detected = pluralize(urls_to_check.count, "#{type} link", "#{type} links")
|
235
|
-
@logger.log(:info, "Checking #{urls_detected}")
|
236
234
|
|
237
235
|
urls_to_check
|
238
236
|
end
|
@@ -22,12 +22,11 @@ module HTMLProofer
|
|
22
22
|
end
|
23
23
|
|
24
24
|
def validate
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
end
|
25
|
+
urls_to_check = @cache.external_enabled? ? @runner.load_external_cache : @external_urls
|
26
|
+
urls_detected = pluralize(urls_to_check.count, "external link", "external links")
|
27
|
+
@logger.log(:info, "Checking #{urls_detected}")
|
28
|
+
|
29
|
+
run_external_link_checker(urls_to_check)
|
31
30
|
|
32
31
|
@failed_checks
|
33
32
|
end
|
@@ -89,7 +88,7 @@ module HTMLProofer
|
|
89
88
|
return if @runner.options[:ignore_status_codes].include?(response_code)
|
90
89
|
|
91
90
|
if response_code.between?(200, 299)
|
92
|
-
@cache.add_external(href, filenames, response_code, "OK") unless check_hash_in_2xx_response(href, url,
|
91
|
+
@cache.add_external(href, filenames, response_code, "OK", true) unless check_hash_in_2xx_response(href, url,
|
93
92
|
response, filenames)
|
94
93
|
elsif response.timed_out?
|
95
94
|
handle_timeout(href, filenames, response_code)
|
@@ -104,7 +103,7 @@ module HTMLProofer
|
|
104
103
|
status_message = blank?(response.status_message) ? "" : ": #{response.status_message}"
|
105
104
|
msg = "External link #{href} failed#{status_message}"
|
106
105
|
add_failure(filenames, msg, response_code)
|
107
|
-
@cache.add_external(href, filenames, response_code, msg)
|
106
|
+
@cache.add_external(href, filenames, response_code, msg, false)
|
108
107
|
end
|
109
108
|
end
|
110
109
|
|
@@ -133,13 +132,13 @@ module HTMLProofer
|
|
133
132
|
|
134
133
|
msg = "External link #{href} failed: #{url.sans_hash} exists, but the hash '#{hash}' does not"
|
135
134
|
add_failure(filenames, msg, response.code)
|
136
|
-
@cache.add_external(href, filenames, response.code, msg)
|
135
|
+
@cache.add_external(href, filenames, response.code, msg, false)
|
137
136
|
true
|
138
137
|
end
|
139
138
|
|
140
139
|
def handle_timeout(href, filenames, response_code)
|
141
140
|
msg = "External link #{href} failed: got a time out (response code #{response_code})"
|
142
|
-
@cache.add_external(href, filenames, 0, msg)
|
141
|
+
@cache.add_external(href, filenames, 0, msg, false)
|
143
142
|
return if @runner.options[:only_4xx]
|
144
143
|
|
145
144
|
add_failure(filenames, msg, response_code)
|
@@ -157,7 +156,7 @@ module HTMLProofer
|
|
157
156
|
|
158
157
|
msg = msgs.join("\n").chomp
|
159
158
|
|
160
|
-
@cache.add_external(href, metadata, 0, msg)
|
159
|
+
@cache.add_external(href, metadata, 0, msg, false)
|
161
160
|
return if @runner.options[:only_4xx]
|
162
161
|
|
163
162
|
add_failure(metadata, msg, response_code)
|
@@ -12,12 +12,11 @@ module HTMLProofer
|
|
12
12
|
end
|
13
13
|
|
14
14
|
def validate
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
end
|
15
|
+
urls_to_check = @cache.internal_enabled? ? @runner.load_internal_cache : @internal_urls
|
16
|
+
urls_detected = pluralize(urls_to_check.count, "internal link", "internal links")
|
17
|
+
@logger.log(:info, "Checking #{urls_detected}")
|
18
|
+
|
19
|
+
run_internal_link_checker(urls_to_check)
|
21
20
|
|
22
21
|
@failed_checks
|
23
22
|
end
|
@@ -68,9 +67,11 @@ module HTMLProofer
|
|
68
67
|
private def hash_exists?(url)
|
69
68
|
href_hash = url.hash
|
70
69
|
return true if blank?(href_hash)
|
70
|
+
return true unless @runner.options[:check_internal_hash]
|
71
71
|
|
72
72
|
# prevents searching files we didn't ask about
|
73
73
|
return false unless url.known_extension?
|
74
|
+
return false unless url.has_hash?
|
74
75
|
|
75
76
|
decoded_href_hash = Addressable::URI.unescape(href_hash)
|
76
77
|
fragment_ids = [href_hash, decoded_href_hash]
|
data/lib/html_proofer/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html-proofer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.1
|
4
|
+
version: 4.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Garen Torikian
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-07-
|
11
|
+
date: 2022-07-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|