html-proofer 4.2.0 → 4.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 916656620e369529ae71be63c4316ba627afcf97d2f60b4ce963e375f772a7b8
4
- data.tar.gz: 83e751f1ec8bf453c2612cf1d34ef24e3de1d7620d740be1f8c392627f39cc85
3
+ metadata.gz: e8afbf9c7f55055c38b77971166e852d6f7180481d1e9fb27f84e071c582d1a7
4
+ data.tar.gz: 88db5f8e0bb8059ff3f6c50c8fcb4320bf0f6a4e1ef6fc053671591ff44a529f
5
5
  SHA512:
6
- metadata.gz: c396512e2262ba0bbeab69ee602c42c522cdec4f7b9036e81808011e4b6d47e06188baeaa86524361166af90ce1751b3f08e4075b437b28eab8c4c6bec065630
7
- data.tar.gz: e2e660641cca01f1501b69dda405808624ec03b4ebaa0a9c12618f82e77314f94adbc459ec343898f548be5676f763d11d69bf5cbbebf154148490e49b304f83
6
+ metadata.gz: 3f765689616f7884c1a33685dceb08a0786c851839d4ec612023d3ed746ed7c7b7e9087bd91d8360d795fd54a2077fdbf9af9df2c0b25d246a9cc0a064579501
7
+ data.tar.gz: 441a4f2b13e237505f9402a53fee5713a68d680824e6a976da42f26bfe1f57a0bdf661f6c3cafc45cb0fa28623c56d679c75044b10dd69db64ac824b7d911885
data/bin/htmlproofer CHANGED
@@ -15,22 +15,22 @@ Mercenary.program(:htmlproofer) do |p|
15
15
 
16
16
  p.description 'Runs the HTML-Proofer suite on the files in PATH. For more details, see the README.'
17
17
 
18
- p.option 'allow_hash_href', '--allow-hash-href', 'If `true`, assumes `href="#"` anchors are valid'
19
- p.option 'allow_missing_href', '--allow-missing-href', 'If `true`, does not flag `a` tags missing `href`. In HTML5, this is technically allowed, but could also be human error.'
18
+ p.option 'allow_hash_href', '--allow-hash-href=<true|false>', 'String', 'If `true`, assumes `href="#"` anchors are valid (default: `true`)'
19
+ p.option 'allow_missing_href', '--allow-missing-href=<true|false>', 'String', 'If `true`, does not flag `a` tags missing `href`. In HTML5, this is technically allowed, but could also be human error. (default: `false`)'
20
20
  p.option 'as_links', '--as-links', 'Assumes that `PATH` is a comma-separated array of links to check.'
21
21
  p.option 'assume_extension', '--assume-extension <ext>', 'Automatically add specified extension to files for internal links, to allow extensionless URLs (as supported by most servers) (default: `.html`).'
22
22
  p.option 'checks', '--checks check1,[check2,...]', Array, 'A comma-separated list of Strings indicating which checks you want to run (default: `["Links", "Images", "Scripts"]`)'
23
- p.option 'check_external_hash', '--check-external-hash', 'Checks whether external hashes exist (even if the webpage exists) (default: `true`).'
24
- p.option 'check_internal_hash', '--check-internal-hash', 'Checks whether internal hashes exist (even if the webpage exists) (default: `true`).'
25
- p.option 'check_sri', '--check-sri', 'Check that `<link>` and `<script>` external resources use SRI (default: `false`).'
23
+ p.option 'check_external_hash', '--check-external-hash=<true|false>', 'String', 'Checks whether external hashes exist (even if the webpage exists) (default: `true`).'
24
+ p.option 'check_internal_hash', '--check-internal-hash=<true|false>', 'String', 'Checks whether internal hashes exist (even if the webpage exists) (default: `true`).'
25
+ p.option 'check_sri', '--check-sri=<true|false>', 'String', 'Check that `<link>` and `<script>` external resources use SRI (default: `false`).'
26
26
  p.option 'directory_index_file', '--directory-index-file <filename>', String, 'Sets the file to look for when a link refers to a directory. (default: `index.html`)'
27
- p.option 'disable_external', '--disable-external', 'If `true`, does not run the external link checker (default: `false`)'
28
- p.option 'enforce_https', '--enforce-https <false>', String, 'Fails a link if it\'s not marked as `https` (default: `true`).'
27
+ p.option 'disable_external', '--disable-external=<true|false>', String, 'If `true`, does not run the external link checker (default: `false`)'
28
+ p.option 'enforce_https', '--enforce-https=<true|false>', String, 'Fails a link if it\'s not marked as `https` (default: `true`).'
29
29
  p.option 'extensions', '--extensions ext1,[ext2,...[', Array, 'A comma-separated list of Strings indicating the file extensions you would like to check (including the dot) (default: `.html`)'
30
- p.option 'ignore_empty_alt', '--ignore-empty-alt', ' If `true`, ignores images with empty/missing alt tags (in other words, `<img alt>` and `<img alt="">` are valid; set this to `false` to flag those)'
30
+ p.option 'ignore_empty_alt', '--ignore-empty-alt=<true|false>', 'String', 'If `true`, ignores images with empty/missing alt tags (in other words, `<img alt>` and `<img alt="">` are valid; set this to `false` to flag those) (default: `true`)'
31
+ p.option 'ignore_empty_mailto', '--ignore-empty-mailto=<true|false>', 'String', 'If `true`, allows `mailto:` `href`s which do not contain an email address (default: `false`)'
31
32
  p.option 'ignore_files', '--ignore-files file1,[file2,...]', Array, 'A comma-separated list of Strings or RegExps containing file paths that are safe to ignore'
32
- p.option 'ignore_empty_mailto', '--ignore-empty-mailto', 'If `true`, allows `mailto:` `href`s which do not contain an email address'
33
- p.option 'ignore_missing_alt', '--ignore-missing-alt', 'If `true`, ignores images with missing alt tags'
33
+ p.option 'ignore_missing_alt', '--ignore-missing-alt=<true|false>', 'String', 'If `true`, ignores images with missing alt tags (default: `false`)'
34
34
  p.option 'ignore_status_codes', '--ignore-status-codes 123,[xxx, ...]', Array, 'A comma-separated list of numbers representing status codes to ignore.'
35
35
  p.option 'ignore_urls', '--ignore-urls link1,[link2,...]', Array, 'A comma-separated list of Strings or RegExps containing URLs that are safe to ignore. This affects all HTML attributes, such as `alt` tags on images.'
36
36
  p.option 'log_level', '--log-level <level>', String, 'Sets the logging level, as determined by Yell. One of `:debug`, `:info`, `:warn`, `:error`, or `:fatal`. (default: `:info`)'
@@ -68,7 +68,15 @@ Mercenary.program(:htmlproofer) do |p|
68
68
  end
69
69
  end
70
70
 
71
- options[:enforce_https] = false if opts['enforce_https'] == "false"
71
+ # check booleans
72
+ [:allow_hash_href, :allow_missing_href, :check_external_hash, :check_internal_hash, :check_sri, :disable_external, :enforce_https, :ignore_empty_alt, :ignore_empty_mailto, :ignore_missing_alt].each do |option|
73
+ next if (val = opts[option.to_s]).nil?
74
+ if val == "false"
75
+ options[option] = false
76
+ else
77
+ options[option] = true
78
+ end
79
+ end
72
80
 
73
81
  options[:log_level] = opts['log_level'].to_sym unless opts['log_level'].nil?
74
82
 
@@ -3,18 +3,20 @@
3
3
  module HTMLProofer
4
4
  class Attribute
5
5
  class Url < HTMLProofer::Attribute
6
- attr_reader :url
6
+ attr_reader :url, :size
7
7
 
8
8
  REMOTE_SCHEMES = ["http", "https"].freeze
9
9
 
10
- def initialize(runner, link_attribute, base_url: nil)
10
+ def initialize(runner, link_attribute, base_url: nil, extract_size: false)
11
11
  super
12
12
 
13
13
  if @raw_attribute.nil?
14
14
  @url = nil
15
15
  else
16
16
  @url = @raw_attribute.delete("\u200b").strip
17
+ @url, @size = @url.split(/\s+/) if extract_size
17
18
  @url = Addressable::URI.join(base_url, @url).to_s unless blank?(base_url)
19
+ @url = "" if @url.nil?
18
20
 
19
21
  swap_urls!
20
22
  clean_url!
@@ -30,6 +32,7 @@ module HTMLProofer
30
32
 
31
33
  def known_extension?
32
34
  return true if hash_link?
35
+ return true if path.end_with?("/")
33
36
 
34
37
  ext = File.extname(path)
35
38
 
@@ -201,6 +204,10 @@ module HTMLProofer
201
204
  url.start_with?("#")
202
205
  end
203
206
 
207
+ def has_hash?
208
+ url.include?("#")
209
+ end
210
+
204
211
  def param_link?
205
212
  url.start_with?("?")
206
213
  end
@@ -26,12 +26,17 @@ module HTMLProofer
26
26
 
27
27
  if blank?(options)
28
28
  define_singleton_method(:enabled?) { false }
29
+ define_singleton_method(:external_enabled?) { false }
30
+ define_singleton_method(:internal_enabled?) { false }
29
31
  else
32
+ # we still consider the cache as enabled, regardless of the specic timeframes
30
33
  define_singleton_method(:enabled?) { true }
31
34
  setup_cache!(options)
32
35
 
33
36
  @external_timeframe = parsed_timeframe(options[:timeframe][:external])
37
+ define_singleton_method(:external_enabled?) { !@external_timeframe.nil? }
34
38
  @internal_timeframe = parsed_timeframe(options[:timeframe][:internal])
39
+ define_singleton_method(:internal_enabled?) { !@internal_timeframe.nil? }
35
40
  end
36
41
  end
37
42
 
@@ -55,17 +60,15 @@ module HTMLProofer
55
60
  end
56
61
 
57
62
  def add_internal(url, metadata, found)
58
- return unless enabled?
63
+ return unless internal_enabled?
59
64
 
60
65
  @cache_log[:internal][url] = { time: @cache_time, metadata: [] } if @cache_log[:internal][url].nil?
61
66
 
62
67
  @cache_log[:internal][url][:metadata] << construct_internal_link_metadata(metadata, found)
63
68
  end
64
69
 
65
- def add_external(url, filenames, status_code, msg)
66
- return unless enabled?
67
-
68
- found = status_code.between?(200, 299)
70
+ def add_external(url, filenames, status_code, msg, found)
71
+ return unless external_enabled?
69
72
 
70
73
  clean_url = cleaned_url(url)
71
74
  @cache_log[:external][clean_url] =
@@ -73,10 +76,10 @@ module HTMLProofer
73
76
  end
74
77
 
75
78
  def detect_url_changes(urls_detected, type)
76
- additions = determine_additions(urls_detected, type)
77
-
78
79
  determine_deletions(urls_detected, type)
79
80
 
81
+ additions = determine_additions(urls_detected, type)
82
+
80
83
  additions
81
84
  end
82
85
 
@@ -96,13 +99,6 @@ module HTMLProofer
96
99
 
97
100
  urls_to_check = detect_url_changes(urls_detected, type)
98
101
 
99
- @cache_log[type].each_pair do |url, cache|
100
- within_timeframe = type == :external ? within_external_timeframe?(cache[:time]) : within_internal_timeframe?(cache[:time])
101
- next if within_timeframe
102
-
103
- urls_to_check[url] = cache[:metadata] # recheck expired links
104
- end
105
-
106
102
  urls_to_check
107
103
  end
108
104
 
@@ -146,7 +142,11 @@ module HTMLProofer
146
142
  private def determine_external_additions(urls_detected)
147
143
  urls_detected.reject do |url, _metadata|
148
144
  if @cache_log[:external].include?(url)
149
- @cache_log[:external][url][:found] # if this is false, we're trying again
145
+ found = @cache_log[:external][url][:found] # if this is false, we're trying again
146
+ unless found
147
+ @logger.log(:debug, "Adding #{url} to external cache (not found)")
148
+ end
149
+ found
150
150
  else
151
151
  @logger.log(:debug, "Adding #{url} to external cache")
152
152
  false
@@ -155,28 +155,36 @@ module HTMLProofer
155
155
  end
156
156
 
157
157
  private def determine_internal_additions(urls_detected)
158
- urls_detected.each_with_object({}) do |(url, metadata), hsh|
158
+ urls_detected.each_with_object({}) do |(url, detected_metadata), hsh|
159
159
  # url is not even in cache
160
160
  if @cache_log[:internal][url].nil?
161
- hsh[url] = metadata
161
+ @logger.log(:debug, "Adding #{url} to internal cache")
162
+ hsh[url] = detected_metadata
162
163
  next
163
164
  end
164
165
 
166
+ # detect metadata additions
167
+ # NOTE: the time-stamp for the whole url key will not be updated,
168
+ # so that it reflects the earliest time any of the metadata was checked
165
169
  cache_metadata = @cache_log[:internal][url][:metadata]
166
- incoming_metadata = urls_detected[url].each_with_object([]) do |incoming_url, arr|
167
- existing_cache_metadata = cache_metadata.find { |k, _| k[:filename] == incoming_url[:filename] }
168
-
170
+ metadata_additions = detected_metadata.reject do |detected|
171
+ existing_cache_metadata = cache_metadata.find { |cached, _| cached[:filename] == detected[:filename] }
169
172
  # cache for this url, from an existing path, exists as found
170
- if !existing_cache_metadata.nil? && !existing_cache_metadata.empty? && existing_cache_metadata[:found]
171
- metadata.find { |m| m[:filename] == existing_cache_metadata[:filename] }[:found] = true
172
- next
173
+ found = !existing_cache_metadata.nil? && !existing_cache_metadata.empty? && existing_cache_metadata[:found]
174
+ unless found
175
+ @logger.log(:debug, "Adding #{detected} to internal cache for #{url}")
173
176
  end
177
+ found
178
+ end
174
179
 
175
- @logger.log(:debug, "Adding #{incoming_url} to internal cache")
176
- arr << incoming_url
180
+ if metadata_additions.empty?
181
+ next
177
182
  end
178
183
 
179
- hsh[url] = incoming_metadata
184
+ hsh[url] = metadata_additions
185
+ # remove from the cache the detected metadata additions as they correspond to failures to be rechecked
186
+ # (this works assuming the detected url metadata have "found" set to false)
187
+ @cache_log[:internal][url][:metadata] = cache_metadata.difference(metadata_additions)
180
188
  end
181
189
  end
182
190
 
@@ -184,11 +192,16 @@ module HTMLProofer
184
192
  private def determine_deletions(urls_detected, type)
185
193
  deletions = 0
186
194
 
187
- @cache_log[type].delete_if do |url, _|
188
- if urls_detected.include?(url)
195
+ @cache_log[type].delete_if do |url, cache|
196
+ expired_timeframe = type == :external ? !within_external_timeframe?(cache[:time]) : !within_internal_timeframe?(cache[:time])
197
+ if expired_timeframe
198
+ @logger.log(:debug, "Removing #{url} from #{type} cache (expired timeframe)")
199
+ deletions += 1
200
+ true
201
+ elsif urls_detected.include?(url)
189
202
  false
190
203
  elsif url_matches_type?(url, type)
191
- @logger.log(:debug, "Removing #{url} from #{type} cache")
204
+ @logger.log(:debug, "Removing #{url} from #{type} cache (not detected anymore)")
192
205
  deletions += 1
193
206
  true
194
207
  end
@@ -25,7 +25,7 @@ module HTMLProofer
25
25
  content: @img.content)
26
26
  elsif @img.multiple_srcsets?
27
27
  @img.srcsets.each do |srcset|
28
- srcset_url = HTMLProofer::Attribute::Url.new(@runner, srcset, base_url: @img.base_url)
28
+ srcset_url = HTMLProofer::Attribute::Url.new(@runner, srcset, base_url: @img.base_url, extract_size: true)
29
29
 
30
30
  if srcset_url.remote?
31
31
  add_to_external_urls(srcset_url.url, @img.line)
@@ -35,7 +35,7 @@ module HTMLProofer
35
35
  end
36
36
  elsif @img.multiple_sizes?
37
37
  @img.srcsets_wo_sizes.each do |srcset|
38
- srcset_url = HTMLProofer::Attribute::Url.new(@runner, srcset, base_url: @img.base_url)
38
+ srcset_url = HTMLProofer::Attribute::Url.new(@runner, srcset, base_url: @img.base_url, extract_size: true)
39
39
 
40
40
  if srcset_url.remote?
41
41
  add_to_external_urls(srcset_url.url, @img.line)
@@ -231,8 +231,6 @@ module HTMLProofer
231
231
  @logger.log(:debug, "Found #{cache_text} in the cache")
232
232
 
233
233
  urls_to_check = @cache.retrieve_urls(ivar, type)
234
- urls_detected = pluralize(urls_to_check.count, "#{type} link", "#{type} links")
235
- @logger.log(:info, "Checking #{urls_detected}")
236
234
 
237
235
  urls_to_check
238
236
  end
@@ -22,12 +22,11 @@ module HTMLProofer
22
22
  end
23
23
 
24
24
  def validate
25
- if @cache.enabled?
26
- urls_to_check = @runner.load_external_cache
27
- run_external_link_checker(urls_to_check)
28
- else
29
- run_external_link_checker(@external_urls)
30
- end
25
+ urls_to_check = @cache.external_enabled? ? @runner.load_external_cache : @external_urls
26
+ urls_detected = pluralize(urls_to_check.count, "external link", "external links")
27
+ @logger.log(:info, "Checking #{urls_detected}")
28
+
29
+ run_external_link_checker(urls_to_check)
31
30
 
32
31
  @failed_checks
33
32
  end
@@ -89,7 +88,7 @@ module HTMLProofer
89
88
  return if @runner.options[:ignore_status_codes].include?(response_code)
90
89
 
91
90
  if response_code.between?(200, 299)
92
- @cache.add_external(href, filenames, response_code, "OK") unless check_hash_in_2xx_response(href, url,
91
+ @cache.add_external(href, filenames, response_code, "OK", true) unless check_hash_in_2xx_response(href, url,
93
92
  response, filenames)
94
93
  elsif response.timed_out?
95
94
  handle_timeout(href, filenames, response_code)
@@ -104,7 +103,7 @@ module HTMLProofer
104
103
  status_message = blank?(response.status_message) ? "" : ": #{response.status_message}"
105
104
  msg = "External link #{href} failed#{status_message}"
106
105
  add_failure(filenames, msg, response_code)
107
- @cache.add_external(href, filenames, response_code, msg)
106
+ @cache.add_external(href, filenames, response_code, msg, false)
108
107
  end
109
108
  end
110
109
 
@@ -133,13 +132,13 @@ module HTMLProofer
133
132
 
134
133
  msg = "External link #{href} failed: #{url.sans_hash} exists, but the hash '#{hash}' does not"
135
134
  add_failure(filenames, msg, response.code)
136
- @cache.add_external(href, filenames, response.code, msg)
135
+ @cache.add_external(href, filenames, response.code, msg, false)
137
136
  true
138
137
  end
139
138
 
140
139
  def handle_timeout(href, filenames, response_code)
141
140
  msg = "External link #{href} failed: got a time out (response code #{response_code})"
142
- @cache.add_external(href, filenames, 0, msg)
141
+ @cache.add_external(href, filenames, 0, msg, false)
143
142
  return if @runner.options[:only_4xx]
144
143
 
145
144
  add_failure(filenames, msg, response_code)
@@ -157,7 +156,7 @@ module HTMLProofer
157
156
 
158
157
  msg = msgs.join("\n").chomp
159
158
 
160
- @cache.add_external(href, metadata, 0, msg)
159
+ @cache.add_external(href, metadata, 0, msg, false)
161
160
  return if @runner.options[:only_4xx]
162
161
 
163
162
  add_failure(metadata, msg, response_code)
@@ -12,12 +12,11 @@ module HTMLProofer
12
12
  end
13
13
 
14
14
  def validate
15
- if @cache.enabled?
16
- urls_to_check = @runner.load_internal_cache
17
- run_internal_link_checker(urls_to_check)
18
- else
19
- run_internal_link_checker(@internal_urls)
20
- end
15
+ urls_to_check = @cache.internal_enabled? ? @runner.load_internal_cache : @internal_urls
16
+ urls_detected = pluralize(urls_to_check.count, "internal link", "internal links")
17
+ @logger.log(:info, "Checking #{urls_detected}")
18
+
19
+ run_internal_link_checker(urls_to_check)
21
20
 
22
21
  @failed_checks
23
22
  end
@@ -72,6 +71,7 @@ module HTMLProofer
72
71
 
73
72
  # prevents searching files we didn't ask about
74
73
  return false unless url.known_extension?
74
+ return false unless url.has_hash?
75
75
 
76
76
  decoded_href_hash = Addressable::URI.unescape(href_hash)
77
77
  fragment_ids = [href_hash, decoded_href_hash]
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HTMLProofer
4
- VERSION = "4.2.0"
4
+ VERSION = "4.3.0"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html-proofer
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.2.0
4
+ version: 4.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Garen Torikian
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-07-16 00:00:00.000000000 Z
11
+ date: 2022-07-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: addressable