html-proofer 4.1.0 → 4.3.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c827c0b89b31ad51e1a4b177f190bdcbbb8f8fd933f235420be60da28e43cfb6
4
- data.tar.gz: e47a4bf7944efc46622762720036c846c1ccb82acf339d99a93d7fade05eb1d9
3
+ metadata.gz: 285bb041bc95bc47d55de09a492e5d802a4c2a5dadf539bb7742f0d8138e2e23
4
+ data.tar.gz: dddc7a66aab15685917621d28600133b4824ce0c27ad204d7750252f717fe33e
5
5
  SHA512:
6
- metadata.gz: c4b46613b516b0fbbaf52747d812dd390598b2bd3db6d45e342d2946ed012a7da2d47f36a0bae9a7c8b09fd484d7d0ecf2e4d12714f1044bb0ef2cfd5bd1d6da
7
- data.tar.gz: '005058a0df180ce5619c2733438189842645840e1b94b6209bd3b2b6751b6cdd5f30b2adaa4aaed1e797e1332cfaf98388d54e06bf4497f5eb6f9796be71a1d8'
6
+ metadata.gz: bd983c2df18c4def1ded8f6caadc5917142d34021b9e873091a7040bd596b8efc05cee56b3fb3330ceab441f601d7c305e6b7e28a821fbb8d20ee7a85ccbf7a4
7
+ data.tar.gz: 966a38245a5bb725838b5c63214b5abc133c53e90fb927118b64a4da2a383d3c993e32994fd7eff0df473e15e8d23788737d716b00c8931977e82acb33114e58
data/bin/htmlproofer CHANGED
@@ -15,21 +15,22 @@ Mercenary.program(:htmlproofer) do |p|
15
15
 
16
16
  p.description 'Runs the HTML-Proofer suite on the files in PATH. For more details, see the README.'
17
17
 
18
- p.option 'allow_hash_href', '--allow-hash-href', 'If `true`, assumes `href="#"` anchors are valid'
19
- p.option 'allow_missing_href', '--allow-missing-href', 'If `true`, does not flag `a` tags missing `href`. In HTML5, this is technically allowed, but could also be human error.'
18
+ p.option 'allow_hash_href', '--allow-hash-href=<true|false>', 'String', 'If `true`, assumes `href="#"` anchors are valid (default: `true`)'
19
+ p.option 'allow_missing_href', '--allow-missing-href=<true|false>', 'String', 'If `true`, does not flag `a` tags missing `href`. In HTML5, this is technically allowed, but could also be human error. (default: `false`)'
20
20
  p.option 'as_links', '--as-links', 'Assumes that `PATH` is a comma-separated array of links to check.'
21
21
  p.option 'assume_extension', '--assume-extension <ext>', 'Automatically add specified extension to files for internal links, to allow extensionless URLs (as supported by most servers) (default: `.html`).'
22
22
  p.option 'checks', '--checks check1,[check2,...]', Array, 'A comma-separated list of Strings indicating which checks you want to run (default: `["Links", "Images", "Scripts"]`)'
23
- p.option 'check_external_hash', '--check-external-hash', 'Checks whether external hashes exist (even if the webpage exists) (default: `true`).'
24
- p.option 'check_sri', '--check-sri', 'Check that `<link>` and `<script>` external resources use SRI (default: `false`).'
23
+ p.option 'check_external_hash', '--check-external-hash=<true|false>', 'String', 'Checks whether external hashes exist (even if the webpage exists) (default: `true`).'
24
+ p.option 'check_internal_hash', '--check-internal-hash=<true|false>', 'String', 'Checks whether internal hashes exist (even if the webpage exists) (default: `true`).'
25
+ p.option 'check_sri', '--check-sri=<true|false>', 'String', 'Check that `<link>` and `<script>` external resources use SRI (default: `false`).'
25
26
  p.option 'directory_index_file', '--directory-index-file <filename>', String, 'Sets the file to look for when a link refers to a directory. (default: `index.html`)'
26
- p.option 'disable_external', '--disable-external', 'If `true`, does not run the external link checker (default: `false`)'
27
- p.option 'enforce_https', '--enforce-https <false>', String, 'Fails a link if it\'s not marked as `https` (default: `true`).'
27
+ p.option 'disable_external', '--disable-external=<true|false>', String, 'If `true`, does not run the external link checker (default: `false`)'
28
+ p.option 'enforce_https', '--enforce-https=<true|false>', String, 'Fails a link if it\'s not marked as `https` (default: `true`).'
28
29
  p.option 'extensions', '--extensions ext1,[ext2,...[', Array, 'A comma-separated list of Strings indicating the file extensions you would like to check (including the dot) (default: `.html`)'
29
- p.option 'ignore_empty_alt', '--ignore-empty-alt', ' If `true`, ignores images with empty/missing alt tags (in other words, `<img alt>` and `<img alt="">` are valid; set this to `false` to flag those)'
30
+ p.option 'ignore_empty_alt', '--ignore-empty-alt=<true|false>', 'String', 'If `true`, ignores images with empty/missing alt tags (in other words, `<img alt>` and `<img alt="">` are valid; set this to `false` to flag those) (default: `true`)'
31
+ p.option 'ignore_empty_mailto', '--ignore-empty-mailto=<true|false>', 'String', 'If `true`, allows `mailto:` `href`s which do not contain an email address (default: `false`)'
30
32
  p.option 'ignore_files', '--ignore-files file1,[file2,...]', Array, 'A comma-separated list of Strings or RegExps containing file paths that are safe to ignore'
31
- p.option 'ignore_empty_mailto', '--ignore-empty-mailto', 'If `true`, allows `mailto:` `href`s which do not contain an email address'
32
- p.option 'ignore_missing_alt', '--ignore-missing-alt', 'If `true`, ignores images with missing alt tags'
33
+ p.option 'ignore_missing_alt', '--ignore-missing-alt=<true|false>', 'String', 'If `true`, ignores images with missing alt tags (default: `false`)'
33
34
  p.option 'ignore_status_codes', '--ignore-status-codes 123,[xxx, ...]', Array, 'A comma-separated list of numbers representing status codes to ignore.'
34
35
  p.option 'ignore_urls', '--ignore-urls link1,[link2,...]', Array, 'A comma-separated list of Strings or RegExps containing URLs that are safe to ignore. This affects all HTML attributes, such as `alt` tags on images.'
35
36
  p.option 'log_level', '--log-level <level>', String, 'Sets the logging level, as determined by Yell. One of `:debug`, `:info`, `:warn`, `:error`, or `:fatal`. (default: `:info`)'
@@ -67,7 +68,15 @@ Mercenary.program(:htmlproofer) do |p|
67
68
  end
68
69
  end
69
70
 
70
- options[:enforce_https] = false if opts['enforce_https'] == "false"
71
+ # check booleans
72
+ [:allow_hash_href, :allow_missing_href, :check_external_hash, :check_internal_hash, :check_sri, :disable_external, :enforce_https, :ignore_empty_alt, :ignore_empty_mailto, :ignore_missing_alt].each do |option|
73
+ next if (val = opts[option.to_s]).nil?
74
+ if val == "false"
75
+ options[option] = false
76
+ else
77
+ options[option] = true
78
+ end
79
+ end
71
80
 
72
81
  options[:log_level] = opts['log_level'].to_sym unless opts['log_level'].nil?
73
82
 
@@ -3,18 +3,20 @@
3
3
  module HTMLProofer
4
4
  class Attribute
5
5
  class Url < HTMLProofer::Attribute
6
- attr_reader :url
6
+ attr_reader :url, :size
7
7
 
8
8
  REMOTE_SCHEMES = ["http", "https"].freeze
9
9
 
10
- def initialize(runner, link_attribute, base_url: nil)
10
+ def initialize(runner, link_attribute, base_url: nil, extract_size: false)
11
11
  super
12
12
 
13
13
  if @raw_attribute.nil?
14
14
  @url = nil
15
15
  else
16
16
  @url = @raw_attribute.delete("\u200b").strip
17
+ @url, @size = @url.split(/\s+/) if extract_size
17
18
  @url = Addressable::URI.join(base_url, @url).to_s unless blank?(base_url)
19
+ @url = "" if @url.nil?
18
20
 
19
21
  swap_urls!
20
22
  clean_url!
@@ -30,6 +32,7 @@ module HTMLProofer
30
32
 
31
33
  def known_extension?
32
34
  return true if hash_link?
35
+ return true if path.end_with?("/")
33
36
 
34
37
  ext = File.extname(path)
35
38
 
@@ -201,6 +204,10 @@ module HTMLProofer
201
204
  url.start_with?("#")
202
205
  end
203
206
 
207
+ def has_hash?
208
+ url.include?("#")
209
+ end
210
+
204
211
  def param_link?
205
212
  url.start_with?("?")
206
213
  end
@@ -26,12 +26,17 @@ module HTMLProofer
26
26
 
27
27
  if blank?(options)
28
28
  define_singleton_method(:enabled?) { false }
29
+ define_singleton_method(:external_enabled?) { false }
30
+ define_singleton_method(:internal_enabled?) { false }
29
31
  else
32
+ # we still consider the cache as enabled, regardless of the specic timeframes
30
33
  define_singleton_method(:enabled?) { true }
31
34
  setup_cache!(options)
32
35
 
33
36
  @external_timeframe = parsed_timeframe(options[:timeframe][:external])
37
+ define_singleton_method(:external_enabled?) { !@external_timeframe.nil? }
34
38
  @internal_timeframe = parsed_timeframe(options[:timeframe][:internal])
39
+ define_singleton_method(:internal_enabled?) { !@internal_timeframe.nil? }
35
40
  end
36
41
  end
37
42
 
@@ -55,17 +60,15 @@ module HTMLProofer
55
60
  end
56
61
 
57
62
  def add_internal(url, metadata, found)
58
- return unless enabled?
63
+ return unless internal_enabled?
59
64
 
60
65
  @cache_log[:internal][url] = { time: @cache_time, metadata: [] } if @cache_log[:internal][url].nil?
61
66
 
62
67
  @cache_log[:internal][url][:metadata] << construct_internal_link_metadata(metadata, found)
63
68
  end
64
69
 
65
- def add_external(url, filenames, status_code, msg)
66
- return unless enabled?
67
-
68
- found = status_code.between?(200, 299)
70
+ def add_external(url, filenames, status_code, msg, found)
71
+ return unless external_enabled?
69
72
 
70
73
  clean_url = cleaned_url(url)
71
74
  @cache_log[:external][clean_url] =
@@ -73,10 +76,10 @@ module HTMLProofer
73
76
  end
74
77
 
75
78
  def detect_url_changes(urls_detected, type)
76
- additions = determine_additions(urls_detected, type)
77
-
78
79
  determine_deletions(urls_detected, type)
79
80
 
81
+ additions = determine_additions(urls_detected, type)
82
+
80
83
  additions
81
84
  end
82
85
 
@@ -96,13 +99,6 @@ module HTMLProofer
96
99
 
97
100
  urls_to_check = detect_url_changes(urls_detected, type)
98
101
 
99
- @cache_log[type].each_pair do |url, cache|
100
- within_timeframe = type == :external ? within_external_timeframe?(cache[:time]) : within_internal_timeframe?(cache[:time])
101
- next if within_timeframe
102
-
103
- urls_to_check[url] = cache[:metadata] # recheck expired links
104
- end
105
-
106
102
  urls_to_check
107
103
  end
108
104
 
@@ -146,7 +142,11 @@ module HTMLProofer
146
142
  private def determine_external_additions(urls_detected)
147
143
  urls_detected.reject do |url, _metadata|
148
144
  if @cache_log[:external].include?(url)
149
- @cache_log[:external][url][:found] # if this is false, we're trying again
145
+ found = @cache_log[:external][url][:found] # if this is false, we're trying again
146
+ unless found
147
+ @logger.log(:debug, "Adding #{url} to external cache (not found)")
148
+ end
149
+ found
150
150
  else
151
151
  @logger.log(:debug, "Adding #{url} to external cache")
152
152
  false
@@ -155,28 +155,36 @@ module HTMLProofer
155
155
  end
156
156
 
157
157
  private def determine_internal_additions(urls_detected)
158
- urls_detected.each_with_object({}) do |(url, metadata), hsh|
158
+ urls_detected.each_with_object({}) do |(url, detected_metadata), hsh|
159
159
  # url is not even in cache
160
160
  if @cache_log[:internal][url].nil?
161
- hsh[url] = metadata
161
+ @logger.log(:debug, "Adding #{url} to internal cache")
162
+ hsh[url] = detected_metadata
162
163
  next
163
164
  end
164
165
 
166
+ # detect metadata additions
167
+ # NOTE: the time-stamp for the whole url key will not be updated,
168
+ # so that it reflects the earliest time any of the metadata was checked
165
169
  cache_metadata = @cache_log[:internal][url][:metadata]
166
- incoming_metadata = urls_detected[url].each_with_object([]) do |incoming_url, arr|
167
- existing_cache_metadata = cache_metadata.find { |k, _| k[:filename] == incoming_url[:filename] }
168
-
170
+ metadata_additions = detected_metadata.reject do |detected|
171
+ existing_cache_metadata = cache_metadata.find { |cached, _| cached[:filename] == detected[:filename] }
169
172
  # cache for this url, from an existing path, exists as found
170
- if !existing_cache_metadata.nil? && !existing_cache_metadata.empty? && existing_cache_metadata[:found]
171
- metadata.find { |m| m[:filename] == existing_cache_metadata[:filename] }[:found] = true
172
- next
173
+ found = !existing_cache_metadata.nil? && !existing_cache_metadata.empty? && existing_cache_metadata[:found]
174
+ unless found
175
+ @logger.log(:debug, "Adding #{detected} to internal cache for #{url}")
173
176
  end
177
+ found
178
+ end
174
179
 
175
- @logger.log(:debug, "Adding #{incoming_url} to internal cache")
176
- arr << incoming_url
180
+ if metadata_additions.empty?
181
+ next
177
182
  end
178
183
 
179
- hsh[url] = incoming_metadata
184
+ hsh[url] = metadata_additions
185
+ # remove from the cache the detected metadata additions as they correspond to failures to be rechecked
186
+ # (this works assuming the detected url metadata have "found" set to false)
187
+ @cache_log[:internal][url][:metadata] = cache_metadata.difference(metadata_additions)
180
188
  end
181
189
  end
182
190
 
@@ -184,11 +192,16 @@ module HTMLProofer
184
192
  private def determine_deletions(urls_detected, type)
185
193
  deletions = 0
186
194
 
187
- @cache_log[type].delete_if do |url, _|
188
- if urls_detected.include?(url)
195
+ @cache_log[type].delete_if do |url, cache|
196
+ expired_timeframe = type == :external ? !within_external_timeframe?(cache[:time]) : !within_internal_timeframe?(cache[:time])
197
+ if expired_timeframe
198
+ @logger.log(:debug, "Removing #{url} from #{type} cache (expired timeframe)")
199
+ deletions += 1
200
+ true
201
+ elsif urls_detected.include?(url)
189
202
  false
190
203
  elsif url_matches_type?(url, type)
191
- @logger.log(:debug, "Removing #{url} from #{type} cache")
204
+ @logger.log(:debug, "Removing #{url} from #{type} cache (not detected anymore)")
192
205
  deletions += 1
193
206
  true
194
207
  end
@@ -23,19 +23,9 @@ module HTMLProofer
23
23
  elsif !@img.url.exists? && !@img.multiple_srcsets? && !@img.multiple_sizes?
24
24
  add_failure("internal image #{@img.url.raw_attribute} does not exist", line: @img.line,
25
25
  content: @img.content)
26
- elsif @img.multiple_srcsets?
27
- @img.srcsets.each do |srcset|
28
- srcset_url = HTMLProofer::Attribute::Url.new(@runner, srcset, base_url: @img.base_url)
29
-
30
- if srcset_url.remote?
31
- add_to_external_urls(srcset_url.url, @img.line)
32
- elsif !srcset_url.exists?
33
- add_failure("internal image #{srcset} does not exist", line: @img.line, content: @img.content)
34
- end
35
- end
36
- elsif @img.multiple_sizes?
26
+ elsif @img.multiple_srcsets? || @img.multiple_sizes?
37
27
  @img.srcsets_wo_sizes.each do |srcset|
38
- srcset_url = HTMLProofer::Attribute::Url.new(@runner, srcset, base_url: @img.base_url)
28
+ srcset_url = HTMLProofer::Attribute::Url.new(@runner, srcset, base_url: @img.base_url, extract_size: true)
39
29
 
40
30
  if srcset_url.remote?
41
31
  add_to_external_urls(srcset_url.url, @img.line)
@@ -9,6 +9,7 @@ module HTMLProofer
9
9
  allow_missing_href: false,
10
10
  assume_extension: ".html",
11
11
  check_external_hash: true,
12
+ check_internal_hash: true,
12
13
  checks: DEFAULT_TESTS,
13
14
  directory_index_file: "index.html",
14
15
  disable_external: false,
@@ -231,8 +231,6 @@ module HTMLProofer
231
231
  @logger.log(:debug, "Found #{cache_text} in the cache")
232
232
 
233
233
  urls_to_check = @cache.retrieve_urls(ivar, type)
234
- urls_detected = pluralize(urls_to_check.count, "#{type} link", "#{type} links")
235
- @logger.log(:info, "Checking #{urls_detected}")
236
234
 
237
235
  urls_to_check
238
236
  end
@@ -22,12 +22,11 @@ module HTMLProofer
22
22
  end
23
23
 
24
24
  def validate
25
- if @cache.enabled?
26
- urls_to_check = @runner.load_external_cache
27
- run_external_link_checker(urls_to_check)
28
- else
29
- run_external_link_checker(@external_urls)
30
- end
25
+ urls_to_check = @cache.external_enabled? ? @runner.load_external_cache : @external_urls
26
+ urls_detected = pluralize(urls_to_check.count, "external link", "external links")
27
+ @logger.log(:info, "Checking #{urls_detected}")
28
+
29
+ run_external_link_checker(urls_to_check)
31
30
 
32
31
  @failed_checks
33
32
  end
@@ -89,7 +88,7 @@ module HTMLProofer
89
88
  return if @runner.options[:ignore_status_codes].include?(response_code)
90
89
 
91
90
  if response_code.between?(200, 299)
92
- @cache.add_external(href, filenames, response_code, "OK") unless check_hash_in_2xx_response(href, url,
91
+ @cache.add_external(href, filenames, response_code, "OK", true) unless check_hash_in_2xx_response(href, url,
93
92
  response, filenames)
94
93
  elsif response.timed_out?
95
94
  handle_timeout(href, filenames, response_code)
@@ -104,7 +103,7 @@ module HTMLProofer
104
103
  status_message = blank?(response.status_message) ? "" : ": #{response.status_message}"
105
104
  msg = "External link #{href} failed#{status_message}"
106
105
  add_failure(filenames, msg, response_code)
107
- @cache.add_external(href, filenames, response_code, msg)
106
+ @cache.add_external(href, filenames, response_code, msg, false)
108
107
  end
109
108
  end
110
109
 
@@ -133,13 +132,13 @@ module HTMLProofer
133
132
 
134
133
  msg = "External link #{href} failed: #{url.sans_hash} exists, but the hash '#{hash}' does not"
135
134
  add_failure(filenames, msg, response.code)
136
- @cache.add_external(href, filenames, response.code, msg)
135
+ @cache.add_external(href, filenames, response.code, msg, false)
137
136
  true
138
137
  end
139
138
 
140
139
  def handle_timeout(href, filenames, response_code)
141
140
  msg = "External link #{href} failed: got a time out (response code #{response_code})"
142
- @cache.add_external(href, filenames, 0, msg)
141
+ @cache.add_external(href, filenames, 0, msg, false)
143
142
  return if @runner.options[:only_4xx]
144
143
 
145
144
  add_failure(filenames, msg, response_code)
@@ -157,7 +156,7 @@ module HTMLProofer
157
156
 
158
157
  msg = msgs.join("\n").chomp
159
158
 
160
- @cache.add_external(href, metadata, 0, msg)
159
+ @cache.add_external(href, metadata, 0, msg, false)
161
160
  return if @runner.options[:only_4xx]
162
161
 
163
162
  add_failure(metadata, msg, response_code)
@@ -12,12 +12,11 @@ module HTMLProofer
12
12
  end
13
13
 
14
14
  def validate
15
- if @cache.enabled?
16
- urls_to_check = @runner.load_internal_cache
17
- run_internal_link_checker(urls_to_check)
18
- else
19
- run_internal_link_checker(@internal_urls)
20
- end
15
+ urls_to_check = @cache.internal_enabled? ? @runner.load_internal_cache : @internal_urls
16
+ urls_detected = pluralize(urls_to_check.count, "internal link", "internal links")
17
+ @logger.log(:info, "Checking #{urls_detected}")
18
+
19
+ run_internal_link_checker(urls_to_check)
21
20
 
22
21
  @failed_checks
23
22
  end
@@ -68,9 +67,11 @@ module HTMLProofer
68
67
  private def hash_exists?(url)
69
68
  href_hash = url.hash
70
69
  return true if blank?(href_hash)
70
+ return true unless @runner.options[:check_internal_hash]
71
71
 
72
72
  # prevents searching files we didn't ask about
73
73
  return false unless url.known_extension?
74
+ return false unless url.has_hash?
74
75
 
75
76
  decoded_href_hash = Addressable::URI.unescape(href_hash)
76
77
  fragment_ids = [href_hash, decoded_href_hash]
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HTMLProofer
4
- VERSION = "4.1.0"
4
+ VERSION = "4.3.1"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html-proofer
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.1.0
4
+ version: 4.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Garen Torikian
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-07-15 00:00:00.000000000 Z
11
+ date: 2022-07-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: addressable