html-proofer 4.2.0 → 4.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 916656620e369529ae71be63c4316ba627afcf97d2f60b4ce963e375f772a7b8
4
- data.tar.gz: 83e751f1ec8bf453c2612cf1d34ef24e3de1d7620d740be1f8c392627f39cc85
3
+ metadata.gz: e8afbf9c7f55055c38b77971166e852d6f7180481d1e9fb27f84e071c582d1a7
4
+ data.tar.gz: 88db5f8e0bb8059ff3f6c50c8fcb4320bf0f6a4e1ef6fc053671591ff44a529f
5
5
  SHA512:
6
- metadata.gz: c396512e2262ba0bbeab69ee602c42c522cdec4f7b9036e81808011e4b6d47e06188baeaa86524361166af90ce1751b3f08e4075b437b28eab8c4c6bec065630
7
- data.tar.gz: e2e660641cca01f1501b69dda405808624ec03b4ebaa0a9c12618f82e77314f94adbc459ec343898f548be5676f763d11d69bf5cbbebf154148490e49b304f83
6
+ metadata.gz: 3f765689616f7884c1a33685dceb08a0786c851839d4ec612023d3ed746ed7c7b7e9087bd91d8360d795fd54a2077fdbf9af9df2c0b25d246a9cc0a064579501
7
+ data.tar.gz: 441a4f2b13e237505f9402a53fee5713a68d680824e6a976da42f26bfe1f57a0bdf661f6c3cafc45cb0fa28623c56d679c75044b10dd69db64ac824b7d911885
data/bin/htmlproofer CHANGED
@@ -15,22 +15,22 @@ Mercenary.program(:htmlproofer) do |p|
15
15
 
16
16
  p.description 'Runs the HTML-Proofer suite on the files in PATH. For more details, see the README.'
17
17
 
18
- p.option 'allow_hash_href', '--allow-hash-href', 'If `true`, assumes `href="#"` anchors are valid'
19
- p.option 'allow_missing_href', '--allow-missing-href', 'If `true`, does not flag `a` tags missing `href`. In HTML5, this is technically allowed, but could also be human error.'
18
+ p.option 'allow_hash_href', '--allow-hash-href=<true|false>', 'String', 'If `true`, assumes `href="#"` anchors are valid (default: `true`)'
19
+ p.option 'allow_missing_href', '--allow-missing-href=<true|false>', 'String', 'If `true`, does not flag `a` tags missing `href`. In HTML5, this is technically allowed, but could also be human error. (default: `false`)'
20
20
  p.option 'as_links', '--as-links', 'Assumes that `PATH` is a comma-separated array of links to check.'
21
21
  p.option 'assume_extension', '--assume-extension <ext>', 'Automatically add specified extension to files for internal links, to allow extensionless URLs (as supported by most servers) (default: `.html`).'
22
22
  p.option 'checks', '--checks check1,[check2,...]', Array, 'A comma-separated list of Strings indicating which checks you want to run (default: `["Links", "Images", "Scripts"]`)'
23
- p.option 'check_external_hash', '--check-external-hash', 'Checks whether external hashes exist (even if the webpage exists) (default: `true`).'
24
- p.option 'check_internal_hash', '--check-internal-hash', 'Checks whether internal hashes exist (even if the webpage exists) (default: `true`).'
25
- p.option 'check_sri', '--check-sri', 'Check that `<link>` and `<script>` external resources use SRI (default: `false`).'
23
+ p.option 'check_external_hash', '--check-external-hash=<true|false>', 'String', 'Checks whether external hashes exist (even if the webpage exists) (default: `true`).'
24
+ p.option 'check_internal_hash', '--check-internal-hash=<true|false>', 'String', 'Checks whether internal hashes exist (even if the webpage exists) (default: `true`).'
25
+ p.option 'check_sri', '--check-sri=<true|false>', 'String', 'Check that `<link>` and `<script>` external resources use SRI (default: `false`).'
26
26
  p.option 'directory_index_file', '--directory-index-file <filename>', String, 'Sets the file to look for when a link refers to a directory. (default: `index.html`)'
27
- p.option 'disable_external', '--disable-external', 'If `true`, does not run the external link checker (default: `false`)'
28
- p.option 'enforce_https', '--enforce-https <false>', String, 'Fails a link if it\'s not marked as `https` (default: `true`).'
27
+ p.option 'disable_external', '--disable-external=<true|false>', String, 'If `true`, does not run the external link checker (default: `false`)'
28
+ p.option 'enforce_https', '--enforce-https=<true|false>', String, 'Fails a link if it\'s not marked as `https` (default: `true`).'
29
29
  p.option 'extensions', '--extensions ext1,[ext2,...[', Array, 'A comma-separated list of Strings indicating the file extensions you would like to check (including the dot) (default: `.html`)'
30
- p.option 'ignore_empty_alt', '--ignore-empty-alt', ' If `true`, ignores images with empty/missing alt tags (in other words, `<img alt>` and `<img alt="">` are valid; set this to `false` to flag those)'
30
+ p.option 'ignore_empty_alt', '--ignore-empty-alt=<true|false>', 'String', 'If `true`, ignores images with empty/missing alt tags (in other words, `<img alt>` and `<img alt="">` are valid; set this to `false` to flag those) (default: `true`)'
31
+ p.option 'ignore_empty_mailto', '--ignore-empty-mailto=<true|false>', 'String', 'If `true`, allows `mailto:` `href`s which do not contain an email address (default: `false`)'
31
32
  p.option 'ignore_files', '--ignore-files file1,[file2,...]', Array, 'A comma-separated list of Strings or RegExps containing file paths that are safe to ignore'
32
- p.option 'ignore_empty_mailto', '--ignore-empty-mailto', 'If `true`, allows `mailto:` `href`s which do not contain an email address'
33
- p.option 'ignore_missing_alt', '--ignore-missing-alt', 'If `true`, ignores images with missing alt tags'
33
+ p.option 'ignore_missing_alt', '--ignore-missing-alt=<true|false>', 'String', 'If `true`, ignores images with missing alt tags (default: `false`)'
34
34
  p.option 'ignore_status_codes', '--ignore-status-codes 123,[xxx, ...]', Array, 'A comma-separated list of numbers representing status codes to ignore.'
35
35
  p.option 'ignore_urls', '--ignore-urls link1,[link2,...]', Array, 'A comma-separated list of Strings or RegExps containing URLs that are safe to ignore. This affects all HTML attributes, such as `alt` tags on images.'
36
36
  p.option 'log_level', '--log-level <level>', String, 'Sets the logging level, as determined by Yell. One of `:debug`, `:info`, `:warn`, `:error`, or `:fatal`. (default: `:info`)'
@@ -68,7 +68,15 @@ Mercenary.program(:htmlproofer) do |p|
68
68
  end
69
69
  end
70
70
 
71
- options[:enforce_https] = false if opts['enforce_https'] == "false"
71
+ # check booleans
72
+ [:allow_hash_href, :allow_missing_href, :check_external_hash, :check_internal_hash, :check_sri, :disable_external, :enforce_https, :ignore_empty_alt, :ignore_empty_mailto, :ignore_missing_alt].each do |option|
73
+ next if (val = opts[option.to_s]).nil?
74
+ if val == "false"
75
+ options[option] = false
76
+ else
77
+ options[option] = true
78
+ end
79
+ end
72
80
 
73
81
  options[:log_level] = opts['log_level'].to_sym unless opts['log_level'].nil?
74
82
 
@@ -3,18 +3,20 @@
3
3
  module HTMLProofer
4
4
  class Attribute
5
5
  class Url < HTMLProofer::Attribute
6
- attr_reader :url
6
+ attr_reader :url, :size
7
7
 
8
8
  REMOTE_SCHEMES = ["http", "https"].freeze
9
9
 
10
- def initialize(runner, link_attribute, base_url: nil)
10
+ def initialize(runner, link_attribute, base_url: nil, extract_size: false)
11
11
  super
12
12
 
13
13
  if @raw_attribute.nil?
14
14
  @url = nil
15
15
  else
16
16
  @url = @raw_attribute.delete("\u200b").strip
17
+ @url, @size = @url.split(/\s+/) if extract_size
17
18
  @url = Addressable::URI.join(base_url, @url).to_s unless blank?(base_url)
19
+ @url = "" if @url.nil?
18
20
 
19
21
  swap_urls!
20
22
  clean_url!
@@ -30,6 +32,7 @@ module HTMLProofer
30
32
 
31
33
  def known_extension?
32
34
  return true if hash_link?
35
+ return true if path.end_with?("/")
33
36
 
34
37
  ext = File.extname(path)
35
38
 
@@ -201,6 +204,10 @@ module HTMLProofer
201
204
  url.start_with?("#")
202
205
  end
203
206
 
207
+ def has_hash?
208
+ url.include?("#")
209
+ end
210
+
204
211
  def param_link?
205
212
  url.start_with?("?")
206
213
  end
@@ -26,12 +26,17 @@ module HTMLProofer
26
26
 
27
27
  if blank?(options)
28
28
  define_singleton_method(:enabled?) { false }
29
+ define_singleton_method(:external_enabled?) { false }
30
+ define_singleton_method(:internal_enabled?) { false }
29
31
  else
32
+ # we still consider the cache as enabled, regardless of the specic timeframes
30
33
  define_singleton_method(:enabled?) { true }
31
34
  setup_cache!(options)
32
35
 
33
36
  @external_timeframe = parsed_timeframe(options[:timeframe][:external])
37
+ define_singleton_method(:external_enabled?) { !@external_timeframe.nil? }
34
38
  @internal_timeframe = parsed_timeframe(options[:timeframe][:internal])
39
+ define_singleton_method(:internal_enabled?) { !@internal_timeframe.nil? }
35
40
  end
36
41
  end
37
42
 
@@ -55,17 +60,15 @@ module HTMLProofer
55
60
  end
56
61
 
57
62
  def add_internal(url, metadata, found)
58
- return unless enabled?
63
+ return unless internal_enabled?
59
64
 
60
65
  @cache_log[:internal][url] = { time: @cache_time, metadata: [] } if @cache_log[:internal][url].nil?
61
66
 
62
67
  @cache_log[:internal][url][:metadata] << construct_internal_link_metadata(metadata, found)
63
68
  end
64
69
 
65
- def add_external(url, filenames, status_code, msg)
66
- return unless enabled?
67
-
68
- found = status_code.between?(200, 299)
70
+ def add_external(url, filenames, status_code, msg, found)
71
+ return unless external_enabled?
69
72
 
70
73
  clean_url = cleaned_url(url)
71
74
  @cache_log[:external][clean_url] =
@@ -73,10 +76,10 @@ module HTMLProofer
73
76
  end
74
77
 
75
78
  def detect_url_changes(urls_detected, type)
76
- additions = determine_additions(urls_detected, type)
77
-
78
79
  determine_deletions(urls_detected, type)
79
80
 
81
+ additions = determine_additions(urls_detected, type)
82
+
80
83
  additions
81
84
  end
82
85
 
@@ -96,13 +99,6 @@ module HTMLProofer
96
99
 
97
100
  urls_to_check = detect_url_changes(urls_detected, type)
98
101
 
99
- @cache_log[type].each_pair do |url, cache|
100
- within_timeframe = type == :external ? within_external_timeframe?(cache[:time]) : within_internal_timeframe?(cache[:time])
101
- next if within_timeframe
102
-
103
- urls_to_check[url] = cache[:metadata] # recheck expired links
104
- end
105
-
106
102
  urls_to_check
107
103
  end
108
104
 
@@ -146,7 +142,11 @@ module HTMLProofer
146
142
  private def determine_external_additions(urls_detected)
147
143
  urls_detected.reject do |url, _metadata|
148
144
  if @cache_log[:external].include?(url)
149
- @cache_log[:external][url][:found] # if this is false, we're trying again
145
+ found = @cache_log[:external][url][:found] # if this is false, we're trying again
146
+ unless found
147
+ @logger.log(:debug, "Adding #{url} to external cache (not found)")
148
+ end
149
+ found
150
150
  else
151
151
  @logger.log(:debug, "Adding #{url} to external cache")
152
152
  false
@@ -155,28 +155,36 @@ module HTMLProofer
155
155
  end
156
156
 
157
157
  private def determine_internal_additions(urls_detected)
158
- urls_detected.each_with_object({}) do |(url, metadata), hsh|
158
+ urls_detected.each_with_object({}) do |(url, detected_metadata), hsh|
159
159
  # url is not even in cache
160
160
  if @cache_log[:internal][url].nil?
161
- hsh[url] = metadata
161
+ @logger.log(:debug, "Adding #{url} to internal cache")
162
+ hsh[url] = detected_metadata
162
163
  next
163
164
  end
164
165
 
166
+ # detect metadata additions
167
+ # NOTE: the time-stamp for the whole url key will not be updated,
168
+ # so that it reflects the earliest time any of the metadata was checked
165
169
  cache_metadata = @cache_log[:internal][url][:metadata]
166
- incoming_metadata = urls_detected[url].each_with_object([]) do |incoming_url, arr|
167
- existing_cache_metadata = cache_metadata.find { |k, _| k[:filename] == incoming_url[:filename] }
168
-
170
+ metadata_additions = detected_metadata.reject do |detected|
171
+ existing_cache_metadata = cache_metadata.find { |cached, _| cached[:filename] == detected[:filename] }
169
172
  # cache for this url, from an existing path, exists as found
170
- if !existing_cache_metadata.nil? && !existing_cache_metadata.empty? && existing_cache_metadata[:found]
171
- metadata.find { |m| m[:filename] == existing_cache_metadata[:filename] }[:found] = true
172
- next
173
+ found = !existing_cache_metadata.nil? && !existing_cache_metadata.empty? && existing_cache_metadata[:found]
174
+ unless found
175
+ @logger.log(:debug, "Adding #{detected} to internal cache for #{url}")
173
176
  end
177
+ found
178
+ end
174
179
 
175
- @logger.log(:debug, "Adding #{incoming_url} to internal cache")
176
- arr << incoming_url
180
+ if metadata_additions.empty?
181
+ next
177
182
  end
178
183
 
179
- hsh[url] = incoming_metadata
184
+ hsh[url] = metadata_additions
185
+ # remove from the cache the detected metadata additions as they correspond to failures to be rechecked
186
+ # (this works assuming the detected url metadata have "found" set to false)
187
+ @cache_log[:internal][url][:metadata] = cache_metadata.difference(metadata_additions)
180
188
  end
181
189
  end
182
190
 
@@ -184,11 +192,16 @@ module HTMLProofer
184
192
  private def determine_deletions(urls_detected, type)
185
193
  deletions = 0
186
194
 
187
- @cache_log[type].delete_if do |url, _|
188
- if urls_detected.include?(url)
195
+ @cache_log[type].delete_if do |url, cache|
196
+ expired_timeframe = type == :external ? !within_external_timeframe?(cache[:time]) : !within_internal_timeframe?(cache[:time])
197
+ if expired_timeframe
198
+ @logger.log(:debug, "Removing #{url} from #{type} cache (expired timeframe)")
199
+ deletions += 1
200
+ true
201
+ elsif urls_detected.include?(url)
189
202
  false
190
203
  elsif url_matches_type?(url, type)
191
- @logger.log(:debug, "Removing #{url} from #{type} cache")
204
+ @logger.log(:debug, "Removing #{url} from #{type} cache (not detected anymore)")
192
205
  deletions += 1
193
206
  true
194
207
  end
@@ -25,7 +25,7 @@ module HTMLProofer
25
25
  content: @img.content)
26
26
  elsif @img.multiple_srcsets?
27
27
  @img.srcsets.each do |srcset|
28
- srcset_url = HTMLProofer::Attribute::Url.new(@runner, srcset, base_url: @img.base_url)
28
+ srcset_url = HTMLProofer::Attribute::Url.new(@runner, srcset, base_url: @img.base_url, extract_size: true)
29
29
 
30
30
  if srcset_url.remote?
31
31
  add_to_external_urls(srcset_url.url, @img.line)
@@ -35,7 +35,7 @@ module HTMLProofer
35
35
  end
36
36
  elsif @img.multiple_sizes?
37
37
  @img.srcsets_wo_sizes.each do |srcset|
38
- srcset_url = HTMLProofer::Attribute::Url.new(@runner, srcset, base_url: @img.base_url)
38
+ srcset_url = HTMLProofer::Attribute::Url.new(@runner, srcset, base_url: @img.base_url, extract_size: true)
39
39
 
40
40
  if srcset_url.remote?
41
41
  add_to_external_urls(srcset_url.url, @img.line)
@@ -231,8 +231,6 @@ module HTMLProofer
231
231
  @logger.log(:debug, "Found #{cache_text} in the cache")
232
232
 
233
233
  urls_to_check = @cache.retrieve_urls(ivar, type)
234
- urls_detected = pluralize(urls_to_check.count, "#{type} link", "#{type} links")
235
- @logger.log(:info, "Checking #{urls_detected}")
236
234
 
237
235
  urls_to_check
238
236
  end
@@ -22,12 +22,11 @@ module HTMLProofer
22
22
  end
23
23
 
24
24
  def validate
25
- if @cache.enabled?
26
- urls_to_check = @runner.load_external_cache
27
- run_external_link_checker(urls_to_check)
28
- else
29
- run_external_link_checker(@external_urls)
30
- end
25
+ urls_to_check = @cache.external_enabled? ? @runner.load_external_cache : @external_urls
26
+ urls_detected = pluralize(urls_to_check.count, "external link", "external links")
27
+ @logger.log(:info, "Checking #{urls_detected}")
28
+
29
+ run_external_link_checker(urls_to_check)
31
30
 
32
31
  @failed_checks
33
32
  end
@@ -89,7 +88,7 @@ module HTMLProofer
89
88
  return if @runner.options[:ignore_status_codes].include?(response_code)
90
89
 
91
90
  if response_code.between?(200, 299)
92
- @cache.add_external(href, filenames, response_code, "OK") unless check_hash_in_2xx_response(href, url,
91
+ @cache.add_external(href, filenames, response_code, "OK", true) unless check_hash_in_2xx_response(href, url,
93
92
  response, filenames)
94
93
  elsif response.timed_out?
95
94
  handle_timeout(href, filenames, response_code)
@@ -104,7 +103,7 @@ module HTMLProofer
104
103
  status_message = blank?(response.status_message) ? "" : ": #{response.status_message}"
105
104
  msg = "External link #{href} failed#{status_message}"
106
105
  add_failure(filenames, msg, response_code)
107
- @cache.add_external(href, filenames, response_code, msg)
106
+ @cache.add_external(href, filenames, response_code, msg, false)
108
107
  end
109
108
  end
110
109
 
@@ -133,13 +132,13 @@ module HTMLProofer
133
132
 
134
133
  msg = "External link #{href} failed: #{url.sans_hash} exists, but the hash '#{hash}' does not"
135
134
  add_failure(filenames, msg, response.code)
136
- @cache.add_external(href, filenames, response.code, msg)
135
+ @cache.add_external(href, filenames, response.code, msg, false)
137
136
  true
138
137
  end
139
138
 
140
139
  def handle_timeout(href, filenames, response_code)
141
140
  msg = "External link #{href} failed: got a time out (response code #{response_code})"
142
- @cache.add_external(href, filenames, 0, msg)
141
+ @cache.add_external(href, filenames, 0, msg, false)
143
142
  return if @runner.options[:only_4xx]
144
143
 
145
144
  add_failure(filenames, msg, response_code)
@@ -157,7 +156,7 @@ module HTMLProofer
157
156
 
158
157
  msg = msgs.join("\n").chomp
159
158
 
160
- @cache.add_external(href, metadata, 0, msg)
159
+ @cache.add_external(href, metadata, 0, msg, false)
161
160
  return if @runner.options[:only_4xx]
162
161
 
163
162
  add_failure(metadata, msg, response_code)
@@ -12,12 +12,11 @@ module HTMLProofer
12
12
  end
13
13
 
14
14
  def validate
15
- if @cache.enabled?
16
- urls_to_check = @runner.load_internal_cache
17
- run_internal_link_checker(urls_to_check)
18
- else
19
- run_internal_link_checker(@internal_urls)
20
- end
15
+ urls_to_check = @cache.internal_enabled? ? @runner.load_internal_cache : @internal_urls
16
+ urls_detected = pluralize(urls_to_check.count, "internal link", "internal links")
17
+ @logger.log(:info, "Checking #{urls_detected}")
18
+
19
+ run_internal_link_checker(urls_to_check)
21
20
 
22
21
  @failed_checks
23
22
  end
@@ -72,6 +71,7 @@ module HTMLProofer
72
71
 
73
72
  # prevents searching files we didn't ask about
74
73
  return false unless url.known_extension?
74
+ return false unless url.has_hash?
75
75
 
76
76
  decoded_href_hash = Addressable::URI.unescape(href_hash)
77
77
  fragment_ids = [href_hash, decoded_href_hash]
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HTMLProofer
4
- VERSION = "4.2.0"
4
+ VERSION = "4.3.0"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html-proofer
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.2.0
4
+ version: 4.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Garen Torikian
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-07-16 00:00:00.000000000 Z
11
+ date: 2022-07-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: addressable