html-proofer 3.16.0 → 3.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 39ffac5e6c23ab7b75d99dd69779be2109ed8b02dc4a5cb0a33740119cb66766
4
- data.tar.gz: cdaf142dfde936fa1d88d410499c04fdbbb3b9814bc00ba6800ff3edb8acee50
3
+ metadata.gz: 9b5a47189e872130e01a2080e6e8ddb1f7f22520098deaf941960194d2338b2b
4
+ data.tar.gz: 3543c42860956427e8c828861f76e1c5cd984d79054c091c6e5f97e0352b3137
5
5
  SHA512:
6
- metadata.gz: 34a4253b2eb9674060583c0f9006f9c36204bfd8bbb3e347293874e8db3248153e1efcdb13cbb9a5fc7967aabe478773a6a33c3c2372b4e819ecc0c41af43f36
7
- data.tar.gz: 34530aeb5fb2836568aa5f7ebea7f154ef8e58c82ac0803aeba5632f704e7aed6c8111a095546c56b1331fa3dc1b563b08eac7cff0205aee4056815ebd8a6478
6
+ metadata.gz: 1defd1fb2ee651015231b0ed4d1407ed56e25a907e0602ee9e175b23f3b88938e020f438d2171ceea8d52bc58984901543c97184a3da13c9b08cdaf62200a4aa
7
+ data.tar.gz: 97eb6120db724822830398a85f14faa4415638e965d21aef0703991e7a14d335f32df65d72bb877be6f0706f24154deea3f08a6bbeb2cdf5d7c392d03ce2af23
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
  # frozen_string_literal: true
3
3
 
4
- STDOUT.sync = true
4
+ $stdout.sync = true
5
5
 
6
6
  $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', 'lib')
7
7
 
@@ -59,6 +59,8 @@ module HTMLProofer
59
59
  end
60
60
 
61
61
  def add(url, filenames, status, msg = '')
62
+ return unless use_cache?
63
+
62
64
  data = {
63
65
  time: @cache_time,
64
66
  filenames: filenames,
@@ -92,12 +94,12 @@ module HTMLProofer
92
94
  del = 0
93
95
  @cache_log.delete_if do |url, _|
94
96
  url = clean_url(url)
95
- if !found_urls.include?(url)
97
+ if found_urls.include?(url)
98
+ false
99
+ else
96
100
  @logger.log :debug, "Removing #{url} from cache check"
97
101
  del += 1
98
102
  true
99
- else
100
- false
101
103
  end
102
104
  end
103
105
 
@@ -115,12 +117,11 @@ module HTMLProofer
115
117
  @load.nil?
116
118
  end
117
119
 
118
- def retrieve_urls(external_urls)
119
- urls_to_check = detect_url_changes(external_urls)
120
+ def retrieve_urls(urls)
121
+ urls_to_check = detect_url_changes(urls)
120
122
  @cache_log.each_pair do |url, cache|
121
- if within_timeframe?(cache['time'])
122
- next if cache['message'].empty? # these were successes to skip
123
- end
123
+ next if within_timeframe?(cache['time']) && cache['message'].empty? # these were successes to skip
124
+
124
125
  urls_to_check[url] = cache['filenames'] # recheck expired links
125
126
  end
126
127
  urls_to_check
@@ -3,15 +3,17 @@
3
3
  module HTMLProofer
4
4
  # Mostly handles issue management and collecting of external URLs.
5
5
  class Check
6
- attr_reader :node, :html, :element, :src, :path, :options, :issues, :external_urls
6
+ attr_reader :node, :html, :element, :src, :path, :options, :issues, :internal_urls, :external_urls
7
7
 
8
- def initialize(src, path, html, logger, options)
8
+ def initialize(src, path, html, logger, cache, options)
9
9
  @src = src
10
10
  @path = path
11
11
  @html = remove_ignored(html)
12
12
  @logger = logger
13
+ @cache = cache
13
14
  @options = options
14
15
  @issues = []
16
+ @internal_urls = {}
15
17
  @external_urls = {}
16
18
  end
17
19
 
@@ -26,15 +28,20 @@ module HTMLProofer
26
28
 
27
29
  def add_issue(desc, line: nil, status: -1, content: nil)
28
30
  @issues << Issue.new(@path, desc, line: line, status: status, content: content)
31
+ false
32
+ end
33
+
34
+ def add_to_internal_urls(url, internal_url)
35
+ if @internal_urls[url]
36
+ @internal_urls[url] << internal_url
37
+ else
38
+ @internal_urls[url] = [internal_url]
39
+ end
29
40
  end
30
41
 
31
42
  def add_to_external_urls(url)
32
43
  return if @external_urls[url]
33
44
 
34
- add_path_for_url(url)
35
- end
36
-
37
- def add_path_for_url(url)
38
45
  if @external_urls[url]
39
46
  @external_urls[url] << @path
40
47
  else
@@ -4,6 +4,8 @@ class LinkCheck < ::HTMLProofer::Check
4
4
  include HTMLProofer::Utils
5
5
 
6
6
  def missing_href?
7
+ return blank?(@link.src) if @node.name == 'source'
8
+
7
9
  blank?(@link.href) && blank?(@link.name) && blank?(@link.id)
8
10
  end
9
11
 
@@ -12,7 +14,7 @@ class LinkCheck < ::HTMLProofer::Check
12
14
  end
13
15
 
14
16
  def run
15
- @html.css('a, link').each do |node|
17
+ @html.css('a, link, source').each do |node|
16
18
  @link = create_element(node)
17
19
  line = node.line
18
20
  content = node.to_s
@@ -49,23 +51,31 @@ class LinkCheck < ::HTMLProofer::Check
49
51
  # curl/Typheous inaccurately return 404s for some links. cc https://git.io/vyCFx
50
52
  next if @link.respond_to?(:rel) && @link.rel == 'dns-prefetch'
51
53
 
52
- add_to_external_urls(@link.href)
54
+ add_to_external_urls(@link.href || @link.src)
53
55
  next
54
- elsif @link.internal? && !@link.exists?
55
- add_issue("internally linking to #{@link.href}, which does not exist", line: line, content: content)
56
+ elsif @link.internal?
57
+ if @link.exists?
58
+ add_to_internal_urls(@link.href, InternalLink.new(@link, @path, line, content))
59
+ else
60
+ add_issue("internally linking to #{@link.href}, which does not exist", line: line, content: content)
61
+ end
56
62
  end
63
+ end
57
64
 
58
- # does the local directory have a trailing slash?
59
- if @link.unslashed_directory? @link.absolute_path
60
- add_issue("internally linking to a directory #{@link.absolute_path} without trailing slash", line: line, content: content)
61
- next
62
- end
65
+ external_urls
66
+ end
63
67
 
64
- # verify the target hash
65
- handle_hash(@link, line, content) if @link.hash
68
+ def check_internal_link(link, line, content)
69
+ # does the local directory have a trailing slash?
70
+ if link.unslashed_directory?(link.absolute_path)
71
+ add_issue("internally linking to a directory #{link.absolute_path} without trailing slash", line: line, content: content)
72
+ return false
66
73
  end
67
74
 
68
- external_urls
75
+ # verify the target hash
76
+ return handle_hash(link, line, content) if link.hash
77
+
78
+ true
69
79
  end
70
80
 
71
81
  def check_schemes(link, line, content)
@@ -94,23 +104,27 @@ class LinkCheck < ::HTMLProofer::Check
94
104
  end
95
105
 
96
106
  def handle_hash(link, line, content)
97
- if link.internal? && !hash_check(link.html, link.hash)
98
- add_issue("linking to internal hash ##{link.hash} that does not exist", line: line, content: content)
107
+ if link.internal? && !hash_exists?(link.html, link.hash) # rubocop:disable Style/GuardClause
108
+ return add_issue("linking to internal hash ##{link.hash} that does not exist", line: line, content: content)
99
109
  elsif link.external?
100
- external_link_check(link, line, content)
110
+ return external_link_check(link, line, content)
101
111
  end
112
+
113
+ true
102
114
  end
103
115
 
104
116
  def external_link_check(link, line, content)
105
- if !link.exists?
106
- add_issue("trying to find hash of #{link.href}, but #{link.absolute_path} does not exist", line: line, content: content)
117
+ if link.exists? # rubocop:disable Style/GuardClause
118
+ target_html = create_nokogiri(link.absolute_path)
119
+ return add_issue("linking to #{link.href}, but #{link.hash} does not exist", line: line, content: content) unless hash_exists?(target_html, link.hash)
107
120
  else
108
- target_html = create_nokogiri link.absolute_path
109
- add_issue("linking to #{link.href}, but #{link.hash} does not exist", line: line, content: content) unless hash_check target_html, link.hash
121
+ return add_issue("trying to find hash of #{link.href}, but #{link.absolute_path} does not exist", line: line, content: content)
110
122
  end
123
+
124
+ true
111
125
  end
112
126
 
113
- def hash_check(html, href_hash)
127
+ def hash_exists?(html, href_hash)
114
128
  decoded_href_hash = Addressable::URI.unescape(href_hash)
115
129
  fragment_ids = [href_hash, decoded_href_hash]
116
130
  # https://www.w3.org/TR/html5/single-page.html#scroll-to-fragid
@@ -148,7 +162,19 @@ class LinkCheck < ::HTMLProofer::Check
148
162
 
149
163
  class XpathFunctions
150
164
  def case_sensitive_equals(node_set, str_to_match)
151
- node_set.find_all { |node| node.to_s. == str_to_match.to_s }
165
+ node_set.find_all { |node| node.to_s.== str_to_match.to_s }
166
+ end
167
+ end
168
+
169
+ class InternalLink
170
+ attr_reader :link, :href, :path, :line, :content
171
+
172
+ def initialize(link, path, line, content)
173
+ @link = link
174
+ @href = @link.href
175
+ @path = path
176
+ @line = line
177
+ @content = content
152
178
  end
153
179
  end
154
180
  end
@@ -80,7 +80,7 @@ module HTMLProofer
80
80
  begin
81
81
  JSON.parse(config)
82
82
  rescue StandardError
83
- raise ArgumentError, "Option '" + option_name + "' did not contain valid JSON."
83
+ raise ArgumentError, "Option '#{option_name} did not contain valid JSON."
84
84
  end
85
85
  end
86
86
  end
@@ -108,9 +108,7 @@ module HTMLProofer
108
108
  return true if /^javascript:/.match?(url)
109
109
 
110
110
  # ignore base64 encoded images
111
- if %w[ImageCheck FaviconCheck].include? @type
112
- return true if /^data:image/.match?(url)
113
- end
111
+ return true if %w[ImageCheck FaviconCheck].include?(@type) && /^data:image/.match?(url)
114
112
 
115
113
  # ignore user defined URLs
116
114
  return true if ignores_pattern_check(@check.options[:url_ignore])
@@ -187,7 +185,7 @@ module HTMLProofer
187
185
  end
188
186
  elsif File.exist?(File.expand_path(path, @check.src)) || File.exist?(File.expand_path(path_dot_ext, @check.src)) # relative links, path is a file
189
187
  base = File.dirname @check.path
190
- elsif File.exist?(File.join(File.dirname(@check.path), path)) || File.exist?(File.join(File.dirname(@check.path), path_dot_ext)) # relative links in nested dir, path is a file
188
+ elsif File.exist?(File.join(File.dirname(@check.path), path)) || File.exist?(File.join(File.dirname(@check.path), path_dot_ext)) # rubocop:disable Lint/DuplicateBranch; relative links in nested dir, path is a file
191
189
  base = File.dirname @check.path
192
190
  else # relative link, path is a directory
193
191
  base = @check.path
@@ -6,6 +6,7 @@ module HTMLProofer
6
6
 
7
7
  class InvalidHtmlError < StandardError
8
8
  def initialize(failures)
9
+ super
9
10
  @failures = failures
10
11
  end
11
12
 
@@ -4,7 +4,7 @@ module HTMLProofer
4
4
  class Runner
5
5
  include HTMLProofer::Utils
6
6
 
7
- attr_reader :options, :external_urls, :failures
7
+ attr_reader :options, :internal_urls, :external_urls, :failures
8
8
 
9
9
  def initialize(src, opts = {})
10
10
  @src = src
@@ -20,6 +20,8 @@ module HTMLProofer
20
20
 
21
21
  @type = @options.delete(:type)
22
22
  @logger = HTMLProofer::Log.new(@options[:log_level])
23
+ @cache = Cache.new(@logger, @options[:cache])
24
+ @internal_link_checks = nil
23
25
 
24
26
  # Add swap patterns for internal domains
25
27
  unless @options[:internal_domains].empty?
@@ -30,6 +32,9 @@ module HTMLProofer
30
32
  end
31
33
  end
32
34
 
35
+ @internal_urls = {}
36
+ @internal_urls_to_paths = {}
37
+ @external_urls = {}
33
38
  @failures = []
34
39
  @before_request = []
35
40
  end
@@ -59,15 +64,13 @@ module HTMLProofer
59
64
  end
60
65
  end
61
66
  @external_urls = Hash[*@src.map { |s| [s, nil] }.flatten]
62
- validate_urls
67
+ validate_external_urls
63
68
  end
64
69
 
65
70
  # Collects any external URLs found in a directory of files. Also collectes
66
71
  # every failed test from process_files.
67
72
  # Sends the external URLs to Typhoeus for batch processing.
68
73
  def check_files
69
- @external_urls = {}
70
-
71
74
  process_files.each do |item|
72
75
  @external_urls.merge!(item[:external_urls])
73
76
  @failures.concat(item[:failures])
@@ -78,9 +81,12 @@ module HTMLProofer
78
81
  # just not run those other checks at all.
79
82
  if @options[:external_only]
80
83
  @failures = []
81
- validate_urls
84
+ validate_external_urls
82
85
  elsif !@options[:disable_external]
83
- validate_urls
86
+ validate_external_urls
87
+ validate_internal_urls
88
+ else
89
+ validate_internal_urls
84
90
  end
85
91
  end
86
92
 
@@ -101,8 +107,21 @@ module HTMLProofer
101
107
  @src.each do |src|
102
108
  checks.each do |klass|
103
109
  @logger.log :debug, "Checking #{klass.to_s.downcase} on #{path} ..."
104
- check = Object.const_get(klass).new(src, path, html, @logger, @options)
110
+ check = Object.const_get(klass).new(src, path, html, @logger, @cache, @options)
105
111
  check.run
112
+
113
+ if klass == 'LinkCheck'
114
+ @internal_link_checks = check
115
+ check.internal_urls.each_pair do |url, internal_urls|
116
+ if @internal_urls_to_paths[url]
117
+ @internal_urls_to_paths[url].concat(internal_urls.map(&:path))
118
+ else
119
+ @internal_urls_to_paths[url] = internal_urls.map(&:path)
120
+ end
121
+ end
122
+ @internal_urls.merge!(check.internal_urls)
123
+ end
124
+
106
125
  external_urls = check.external_urls
107
126
  external_urls = Hash[check.external_urls.map { |url, file| [swap(url, @options[:url_swap]), file] }] if @options[:url_swap]
108
127
  result[:external_urls].merge!(external_urls)
@@ -113,16 +132,35 @@ module HTMLProofer
113
132
  end
114
133
 
115
134
  def check_path(path)
116
- check_parsed create_nokogiri(path), path
135
+ check_parsed(create_nokogiri(path), path)
117
136
  end
118
137
 
119
- def validate_urls
120
- url_validator = HTMLProofer::UrlValidator.new(@logger, @external_urls, @options)
138
+ def validate_external_urls
139
+ url_validator = HTMLProofer::UrlValidator.new(@logger, @cache, @external_urls, @options)
121
140
  url_validator.before_request = @before_request
122
141
  @failures.concat(url_validator.run)
123
142
  @external_urls = url_validator.external_urls
124
143
  end
125
144
 
145
+ def validate_internal_urls
146
+ if @cache.use_cache?
147
+ urls_to_check = load_internal_cache
148
+
149
+ urls_to_check.each_pair do |url, internal_urls|
150
+ result = @internal_link_checks.check_internal_link(internal_urls.first.link, internal_urls.first.line, internal_urls.first.content)
151
+ code = result ? 200 : 404
152
+ @cache.add(url, @internal_urls_to_paths[url].sort, code, '') # TODO: blank msg for now
153
+ end
154
+ @cache.write
155
+ else
156
+ @internal_urls.values.flatten.each do |internal_url|
157
+ @internal_link_checks.check_internal_link(internal_url.link, internal_url.line, internal_url.content)
158
+ end
159
+ end
160
+
161
+ @failures.concat(@internal_link_checks.issues) unless @internal_urls.length.zero?
162
+ end
163
+
126
164
  def files
127
165
  @files ||= if @type == :directory
128
166
  @src.map do |src|
@@ -191,5 +229,13 @@ module HTMLProofer
191
229
  @before_request << block if block_given?
192
230
  @before_request
193
231
  end
232
+
233
+ def load_internal_cache
234
+ urls_to_check = @cache.retrieve_urls(@internal_urls)
235
+ cache_text = pluralize(urls_to_check.count, 'internal link', 'internal links')
236
+ @logger.log :info, "Found #{cache_text} in the cache..."
237
+
238
+ urls_to_check
239
+ end
194
240
  end
195
241
  end
@@ -12,13 +12,13 @@ module HTMLProofer
12
12
  attr_reader :external_urls
13
13
  attr_writer :before_request
14
14
 
15
- def initialize(logger, external_urls, options)
15
+ def initialize(logger, cache, external_urls, options)
16
16
  @logger = logger
17
17
  @external_urls = external_urls
18
18
  @failed_tests = []
19
19
  @options = options
20
20
  @hydra = Typhoeus::Hydra.new(@options[:hydra])
21
- @cache = Cache.new(@logger, @options[:cache])
21
+ @cache = cache
22
22
  @before_request = []
23
23
  end
24
24
 
@@ -26,7 +26,7 @@ module HTMLProofer
26
26
  @external_urls = remove_query_values
27
27
 
28
28
  if @cache.use_cache?
29
- urls_to_check = load_cache
29
+ urls_to_check = @cache.retrieve_urls(@external_urls)
30
30
  external_link_checker(urls_to_check)
31
31
  @cache.write
32
32
  else
@@ -43,11 +43,11 @@ module HTMLProofer
43
43
  iterable_external_urls = @external_urls.dup
44
44
  @external_urls.each_key do |url|
45
45
  uri = begin
46
- Addressable::URI.parse(url)
47
- rescue URI::Error, Addressable::URI::InvalidURIError
48
- @logger.log :error, "#{url} is an invalid URL"
49
- nil
50
- end
46
+ Addressable::URI.parse(url)
47
+ rescue URI::Error, Addressable::URI::InvalidURIError
48
+ @logger.log :error, "#{url} is an invalid URL"
49
+ nil
50
+ end
51
51
  next if uri.nil? || uri.query.nil?
52
52
 
53
53
  iterable_external_urls.delete(url) unless new_url_query_values?(uri, paths_with_queries)
@@ -74,15 +74,6 @@ module HTMLProofer
74
74
  uri.host + uri.path
75
75
  end
76
76
 
77
- def load_cache
78
- cache_count = @cache.size
79
- cache_text = pluralize(cache_count, 'link', 'links')
80
-
81
- @logger.log :info, "Found #{cache_text} in the cache..."
82
-
83
- @cache.retrieve_urls(@external_urls)
84
- end
85
-
86
77
  # Proofer runs faster if we pull out all the external URLs and run the checks
87
78
  # at the end. Otherwise, we're halting the consuming process for every file during
88
79
  # `process_files`.
@@ -111,11 +102,11 @@ module HTMLProofer
111
102
  def establish_queue(external_urls)
112
103
  external_urls.each_pair do |url, filenames|
113
104
  url = begin
114
- clean_url(url)
115
- rescue URI::Error, Addressable::URI::InvalidURIError
116
- add_external_issue(filenames, "#{url} is an invalid URL")
117
- next
118
- end
105
+ clean_url(url)
106
+ rescue URI::Error, Addressable::URI::InvalidURIError
107
+ add_external_issue(filenames, "#{url} is an invalid URL")
108
+ next
109
+ end
119
110
 
120
111
  method = if hash?(url) && @options[:check_external_hash]
121
112
  :get
@@ -129,10 +120,10 @@ module HTMLProofer
129
120
  def clean_url(href)
130
121
  # catch any obvious issues, like strings in port numbers
131
122
  parsed = Addressable::URI.parse(href)
132
- if href !~ /^([!#{$&}-;=?-\[\]_a-z~]|%[0-9a-fA-F]{2})+$/
133
- parsed.normalize
134
- else
123
+ if href =~ /^([!#{$&}-;=?-\[\]_a-z~]|%[0-9a-fA-F]{2})+$/
135
124
  href
125
+ else
126
+ parsed.normalize
136
127
  end
137
128
  end
138
129
 
@@ -5,7 +5,7 @@ require 'nokogumbo'
5
5
  module HTMLProofer
6
6
  module Utils
7
7
  def pluralize(count, single, plural)
8
- "#{count} #{(count == 1 ? single : plural)}"
8
+ "#{count} #{count == 1 ? single : plural}"
9
9
  end
10
10
 
11
11
  def create_nokogiri(path)
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HTMLProofer
4
- VERSION = '3.16.0'
4
+ VERSION = '3.17.0'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html-proofer
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.16.0
4
+ version: 3.17.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Garen Torikian
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-09-11 00:00:00.000000000 Z
11
+ date: 2020-11-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: addressable
@@ -293,15 +293,15 @@ homepage: https://github.com/gjtorikian/html-proofer
293
293
  licenses:
294
294
  - MIT
295
295
  metadata: {}
296
- post_install_message:
296
+ post_install_message:
297
297
  rdoc_options: []
298
298
  require_paths:
299
299
  - lib
300
300
  required_ruby_version: !ruby/object:Gem::Requirement
301
301
  requirements:
302
- - - ">="
302
+ - - "~>"
303
303
  - !ruby/object:Gem::Version
304
- version: '0'
304
+ version: '2.4'
305
305
  required_rubygems_version: !ruby/object:Gem::Requirement
306
306
  requirements:
307
307
  - - ">="
@@ -309,7 +309,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
309
309
  version: '0'
310
310
  requirements: []
311
311
  rubygems_version: 3.1.2
312
- signing_key:
312
+ signing_key:
313
313
  specification_version: 4
314
314
  summary: A set of tests to validate your HTML output. These tests check if your image
315
315
  references are legitimate, if they have alt tags, if your internal links are working,