html-proofer 3.16.0 → 3.17.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 39ffac5e6c23ab7b75d99dd69779be2109ed8b02dc4a5cb0a33740119cb66766
4
- data.tar.gz: cdaf142dfde936fa1d88d410499c04fdbbb3b9814bc00ba6800ff3edb8acee50
3
+ metadata.gz: 9b5a47189e872130e01a2080e6e8ddb1f7f22520098deaf941960194d2338b2b
4
+ data.tar.gz: 3543c42860956427e8c828861f76e1c5cd984d79054c091c6e5f97e0352b3137
5
5
  SHA512:
6
- metadata.gz: 34a4253b2eb9674060583c0f9006f9c36204bfd8bbb3e347293874e8db3248153e1efcdb13cbb9a5fc7967aabe478773a6a33c3c2372b4e819ecc0c41af43f36
7
- data.tar.gz: 34530aeb5fb2836568aa5f7ebea7f154ef8e58c82ac0803aeba5632f704e7aed6c8111a095546c56b1331fa3dc1b563b08eac7cff0205aee4056815ebd8a6478
6
+ metadata.gz: 1defd1fb2ee651015231b0ed4d1407ed56e25a907e0602ee9e175b23f3b88938e020f438d2171ceea8d52bc58984901543c97184a3da13c9b08cdaf62200a4aa
7
+ data.tar.gz: 97eb6120db724822830398a85f14faa4415638e965d21aef0703991e7a14d335f32df65d72bb877be6f0706f24154deea3f08a6bbeb2cdf5d7c392d03ce2af23
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
  # frozen_string_literal: true
3
3
 
4
- STDOUT.sync = true
4
+ $stdout.sync = true
5
5
 
6
6
  $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', 'lib')
7
7
 
@@ -59,6 +59,8 @@ module HTMLProofer
59
59
  end
60
60
 
61
61
  def add(url, filenames, status, msg = '')
62
+ return unless use_cache?
63
+
62
64
  data = {
63
65
  time: @cache_time,
64
66
  filenames: filenames,
@@ -92,12 +94,12 @@ module HTMLProofer
92
94
  del = 0
93
95
  @cache_log.delete_if do |url, _|
94
96
  url = clean_url(url)
95
- if !found_urls.include?(url)
97
+ if found_urls.include?(url)
98
+ false
99
+ else
96
100
  @logger.log :debug, "Removing #{url} from cache check"
97
101
  del += 1
98
102
  true
99
- else
100
- false
101
103
  end
102
104
  end
103
105
 
@@ -115,12 +117,11 @@ module HTMLProofer
115
117
  @load.nil?
116
118
  end
117
119
 
118
- def retrieve_urls(external_urls)
119
- urls_to_check = detect_url_changes(external_urls)
120
+ def retrieve_urls(urls)
121
+ urls_to_check = detect_url_changes(urls)
120
122
  @cache_log.each_pair do |url, cache|
121
- if within_timeframe?(cache['time'])
122
- next if cache['message'].empty? # these were successes to skip
123
- end
123
+ next if within_timeframe?(cache['time']) && cache['message'].empty? # these were successes to skip
124
+
124
125
  urls_to_check[url] = cache['filenames'] # recheck expired links
125
126
  end
126
127
  urls_to_check
@@ -3,15 +3,17 @@
3
3
  module HTMLProofer
4
4
  # Mostly handles issue management and collecting of external URLs.
5
5
  class Check
6
- attr_reader :node, :html, :element, :src, :path, :options, :issues, :external_urls
6
+ attr_reader :node, :html, :element, :src, :path, :options, :issues, :internal_urls, :external_urls
7
7
 
8
- def initialize(src, path, html, logger, options)
8
+ def initialize(src, path, html, logger, cache, options)
9
9
  @src = src
10
10
  @path = path
11
11
  @html = remove_ignored(html)
12
12
  @logger = logger
13
+ @cache = cache
13
14
  @options = options
14
15
  @issues = []
16
+ @internal_urls = {}
15
17
  @external_urls = {}
16
18
  end
17
19
 
@@ -26,15 +28,20 @@ module HTMLProofer
26
28
 
27
29
  def add_issue(desc, line: nil, status: -1, content: nil)
28
30
  @issues << Issue.new(@path, desc, line: line, status: status, content: content)
31
+ false
32
+ end
33
+
34
+ def add_to_internal_urls(url, internal_url)
35
+ if @internal_urls[url]
36
+ @internal_urls[url] << internal_url
37
+ else
38
+ @internal_urls[url] = [internal_url]
39
+ end
29
40
  end
30
41
 
31
42
  def add_to_external_urls(url)
32
43
  return if @external_urls[url]
33
44
 
34
- add_path_for_url(url)
35
- end
36
-
37
- def add_path_for_url(url)
38
45
  if @external_urls[url]
39
46
  @external_urls[url] << @path
40
47
  else
@@ -4,6 +4,8 @@ class LinkCheck < ::HTMLProofer::Check
4
4
  include HTMLProofer::Utils
5
5
 
6
6
  def missing_href?
7
+ return blank?(@link.src) if @node.name == 'source'
8
+
7
9
  blank?(@link.href) && blank?(@link.name) && blank?(@link.id)
8
10
  end
9
11
 
@@ -12,7 +14,7 @@ class LinkCheck < ::HTMLProofer::Check
12
14
  end
13
15
 
14
16
  def run
15
- @html.css('a, link').each do |node|
17
+ @html.css('a, link, source').each do |node|
16
18
  @link = create_element(node)
17
19
  line = node.line
18
20
  content = node.to_s
@@ -49,23 +51,31 @@ class LinkCheck < ::HTMLProofer::Check
49
51
  # curl/Typheous inaccurately return 404s for some links. cc https://git.io/vyCFx
50
52
  next if @link.respond_to?(:rel) && @link.rel == 'dns-prefetch'
51
53
 
52
- add_to_external_urls(@link.href)
54
+ add_to_external_urls(@link.href || @link.src)
53
55
  next
54
- elsif @link.internal? && !@link.exists?
55
- add_issue("internally linking to #{@link.href}, which does not exist", line: line, content: content)
56
+ elsif @link.internal?
57
+ if @link.exists?
58
+ add_to_internal_urls(@link.href, InternalLink.new(@link, @path, line, content))
59
+ else
60
+ add_issue("internally linking to #{@link.href}, which does not exist", line: line, content: content)
61
+ end
56
62
  end
63
+ end
57
64
 
58
- # does the local directory have a trailing slash?
59
- if @link.unslashed_directory? @link.absolute_path
60
- add_issue("internally linking to a directory #{@link.absolute_path} without trailing slash", line: line, content: content)
61
- next
62
- end
65
+ external_urls
66
+ end
63
67
 
64
- # verify the target hash
65
- handle_hash(@link, line, content) if @link.hash
68
+ def check_internal_link(link, line, content)
69
+ # does the local directory have a trailing slash?
70
+ if link.unslashed_directory?(link.absolute_path)
71
+ add_issue("internally linking to a directory #{link.absolute_path} without trailing slash", line: line, content: content)
72
+ return false
66
73
  end
67
74
 
68
- external_urls
75
+ # verify the target hash
76
+ return handle_hash(link, line, content) if link.hash
77
+
78
+ true
69
79
  end
70
80
 
71
81
  def check_schemes(link, line, content)
@@ -94,23 +104,27 @@ class LinkCheck < ::HTMLProofer::Check
94
104
  end
95
105
 
96
106
  def handle_hash(link, line, content)
97
- if link.internal? && !hash_check(link.html, link.hash)
98
- add_issue("linking to internal hash ##{link.hash} that does not exist", line: line, content: content)
107
+ if link.internal? && !hash_exists?(link.html, link.hash) # rubocop:disable Style/GuardClause
108
+ return add_issue("linking to internal hash ##{link.hash} that does not exist", line: line, content: content)
99
109
  elsif link.external?
100
- external_link_check(link, line, content)
110
+ return external_link_check(link, line, content)
101
111
  end
112
+
113
+ true
102
114
  end
103
115
 
104
116
  def external_link_check(link, line, content)
105
- if !link.exists?
106
- add_issue("trying to find hash of #{link.href}, but #{link.absolute_path} does not exist", line: line, content: content)
117
+ if link.exists? # rubocop:disable Style/GuardClause
118
+ target_html = create_nokogiri(link.absolute_path)
119
+ return add_issue("linking to #{link.href}, but #{link.hash} does not exist", line: line, content: content) unless hash_exists?(target_html, link.hash)
107
120
  else
108
- target_html = create_nokogiri link.absolute_path
109
- add_issue("linking to #{link.href}, but #{link.hash} does not exist", line: line, content: content) unless hash_check target_html, link.hash
121
+ return add_issue("trying to find hash of #{link.href}, but #{link.absolute_path} does not exist", line: line, content: content)
110
122
  end
123
+
124
+ true
111
125
  end
112
126
 
113
- def hash_check(html, href_hash)
127
+ def hash_exists?(html, href_hash)
114
128
  decoded_href_hash = Addressable::URI.unescape(href_hash)
115
129
  fragment_ids = [href_hash, decoded_href_hash]
116
130
  # https://www.w3.org/TR/html5/single-page.html#scroll-to-fragid
@@ -148,7 +162,19 @@ class LinkCheck < ::HTMLProofer::Check
148
162
 
149
163
  class XpathFunctions
150
164
  def case_sensitive_equals(node_set, str_to_match)
151
- node_set.find_all { |node| node.to_s. == str_to_match.to_s }
165
+ node_set.find_all { |node| node.to_s.== str_to_match.to_s }
166
+ end
167
+ end
168
+
169
+ class InternalLink
170
+ attr_reader :link, :href, :path, :line, :content
171
+
172
+ def initialize(link, path, line, content)
173
+ @link = link
174
+ @href = @link.href
175
+ @path = path
176
+ @line = line
177
+ @content = content
152
178
  end
153
179
  end
154
180
  end
@@ -80,7 +80,7 @@ module HTMLProofer
80
80
  begin
81
81
  JSON.parse(config)
82
82
  rescue StandardError
83
- raise ArgumentError, "Option '" + option_name + "' did not contain valid JSON."
83
+ raise ArgumentError, "Option '#{option_name} did not contain valid JSON."
84
84
  end
85
85
  end
86
86
  end
@@ -108,9 +108,7 @@ module HTMLProofer
108
108
  return true if /^javascript:/.match?(url)
109
109
 
110
110
  # ignore base64 encoded images
111
- if %w[ImageCheck FaviconCheck].include? @type
112
- return true if /^data:image/.match?(url)
113
- end
111
+ return true if %w[ImageCheck FaviconCheck].include?(@type) && /^data:image/.match?(url)
114
112
 
115
113
  # ignore user defined URLs
116
114
  return true if ignores_pattern_check(@check.options[:url_ignore])
@@ -187,7 +185,7 @@ module HTMLProofer
187
185
  end
188
186
  elsif File.exist?(File.expand_path(path, @check.src)) || File.exist?(File.expand_path(path_dot_ext, @check.src)) # relative links, path is a file
189
187
  base = File.dirname @check.path
190
- elsif File.exist?(File.join(File.dirname(@check.path), path)) || File.exist?(File.join(File.dirname(@check.path), path_dot_ext)) # relative links in nested dir, path is a file
188
+ elsif File.exist?(File.join(File.dirname(@check.path), path)) || File.exist?(File.join(File.dirname(@check.path), path_dot_ext)) # rubocop:disable Lint/DuplicateBranch; relative links in nested dir, path is a file
191
189
  base = File.dirname @check.path
192
190
  else # relative link, path is a directory
193
191
  base = @check.path
@@ -6,6 +6,7 @@ module HTMLProofer
6
6
 
7
7
  class InvalidHtmlError < StandardError
8
8
  def initialize(failures)
9
+ super
9
10
  @failures = failures
10
11
  end
11
12
 
@@ -4,7 +4,7 @@ module HTMLProofer
4
4
  class Runner
5
5
  include HTMLProofer::Utils
6
6
 
7
- attr_reader :options, :external_urls, :failures
7
+ attr_reader :options, :internal_urls, :external_urls, :failures
8
8
 
9
9
  def initialize(src, opts = {})
10
10
  @src = src
@@ -20,6 +20,8 @@ module HTMLProofer
20
20
 
21
21
  @type = @options.delete(:type)
22
22
  @logger = HTMLProofer::Log.new(@options[:log_level])
23
+ @cache = Cache.new(@logger, @options[:cache])
24
+ @internal_link_checks = nil
23
25
 
24
26
  # Add swap patterns for internal domains
25
27
  unless @options[:internal_domains].empty?
@@ -30,6 +32,9 @@ module HTMLProofer
30
32
  end
31
33
  end
32
34
 
35
+ @internal_urls = {}
36
+ @internal_urls_to_paths = {}
37
+ @external_urls = {}
33
38
  @failures = []
34
39
  @before_request = []
35
40
  end
@@ -59,15 +64,13 @@ module HTMLProofer
59
64
  end
60
65
  end
61
66
  @external_urls = Hash[*@src.map { |s| [s, nil] }.flatten]
62
- validate_urls
67
+ validate_external_urls
63
68
  end
64
69
 
65
70
  # Collects any external URLs found in a directory of files. Also collectes
66
71
  # every failed test from process_files.
67
72
  # Sends the external URLs to Typhoeus for batch processing.
68
73
  def check_files
69
- @external_urls = {}
70
-
71
74
  process_files.each do |item|
72
75
  @external_urls.merge!(item[:external_urls])
73
76
  @failures.concat(item[:failures])
@@ -78,9 +81,12 @@ module HTMLProofer
78
81
  # just not run those other checks at all.
79
82
  if @options[:external_only]
80
83
  @failures = []
81
- validate_urls
84
+ validate_external_urls
82
85
  elsif !@options[:disable_external]
83
- validate_urls
86
+ validate_external_urls
87
+ validate_internal_urls
88
+ else
89
+ validate_internal_urls
84
90
  end
85
91
  end
86
92
 
@@ -101,8 +107,21 @@ module HTMLProofer
101
107
  @src.each do |src|
102
108
  checks.each do |klass|
103
109
  @logger.log :debug, "Checking #{klass.to_s.downcase} on #{path} ..."
104
- check = Object.const_get(klass).new(src, path, html, @logger, @options)
110
+ check = Object.const_get(klass).new(src, path, html, @logger, @cache, @options)
105
111
  check.run
112
+
113
+ if klass == 'LinkCheck'
114
+ @internal_link_checks = check
115
+ check.internal_urls.each_pair do |url, internal_urls|
116
+ if @internal_urls_to_paths[url]
117
+ @internal_urls_to_paths[url].concat(internal_urls.map(&:path))
118
+ else
119
+ @internal_urls_to_paths[url] = internal_urls.map(&:path)
120
+ end
121
+ end
122
+ @internal_urls.merge!(check.internal_urls)
123
+ end
124
+
106
125
  external_urls = check.external_urls
107
126
  external_urls = Hash[check.external_urls.map { |url, file| [swap(url, @options[:url_swap]), file] }] if @options[:url_swap]
108
127
  result[:external_urls].merge!(external_urls)
@@ -113,16 +132,35 @@ module HTMLProofer
113
132
  end
114
133
 
115
134
  def check_path(path)
116
- check_parsed create_nokogiri(path), path
135
+ check_parsed(create_nokogiri(path), path)
117
136
  end
118
137
 
119
- def validate_urls
120
- url_validator = HTMLProofer::UrlValidator.new(@logger, @external_urls, @options)
138
+ def validate_external_urls
139
+ url_validator = HTMLProofer::UrlValidator.new(@logger, @cache, @external_urls, @options)
121
140
  url_validator.before_request = @before_request
122
141
  @failures.concat(url_validator.run)
123
142
  @external_urls = url_validator.external_urls
124
143
  end
125
144
 
145
+ def validate_internal_urls
146
+ if @cache.use_cache?
147
+ urls_to_check = load_internal_cache
148
+
149
+ urls_to_check.each_pair do |url, internal_urls|
150
+ result = @internal_link_checks.check_internal_link(internal_urls.first.link, internal_urls.first.line, internal_urls.first.content)
151
+ code = result ? 200 : 404
152
+ @cache.add(url, @internal_urls_to_paths[url].sort, code, '') # TODO: blank msg for now
153
+ end
154
+ @cache.write
155
+ else
156
+ @internal_urls.values.flatten.each do |internal_url|
157
+ @internal_link_checks.check_internal_link(internal_url.link, internal_url.line, internal_url.content)
158
+ end
159
+ end
160
+
161
+ @failures.concat(@internal_link_checks.issues) unless @internal_urls.length.zero?
162
+ end
163
+
126
164
  def files
127
165
  @files ||= if @type == :directory
128
166
  @src.map do |src|
@@ -191,5 +229,13 @@ module HTMLProofer
191
229
  @before_request << block if block_given?
192
230
  @before_request
193
231
  end
232
+
233
+ def load_internal_cache
234
+ urls_to_check = @cache.retrieve_urls(@internal_urls)
235
+ cache_text = pluralize(urls_to_check.count, 'internal link', 'internal links')
236
+ @logger.log :info, "Found #{cache_text} in the cache..."
237
+
238
+ urls_to_check
239
+ end
194
240
  end
195
241
  end
@@ -12,13 +12,13 @@ module HTMLProofer
12
12
  attr_reader :external_urls
13
13
  attr_writer :before_request
14
14
 
15
- def initialize(logger, external_urls, options)
15
+ def initialize(logger, cache, external_urls, options)
16
16
  @logger = logger
17
17
  @external_urls = external_urls
18
18
  @failed_tests = []
19
19
  @options = options
20
20
  @hydra = Typhoeus::Hydra.new(@options[:hydra])
21
- @cache = Cache.new(@logger, @options[:cache])
21
+ @cache = cache
22
22
  @before_request = []
23
23
  end
24
24
 
@@ -26,7 +26,7 @@ module HTMLProofer
26
26
  @external_urls = remove_query_values
27
27
 
28
28
  if @cache.use_cache?
29
- urls_to_check = load_cache
29
+ urls_to_check = @cache.retrieve_urls(@external_urls)
30
30
  external_link_checker(urls_to_check)
31
31
  @cache.write
32
32
  else
@@ -43,11 +43,11 @@ module HTMLProofer
43
43
  iterable_external_urls = @external_urls.dup
44
44
  @external_urls.each_key do |url|
45
45
  uri = begin
46
- Addressable::URI.parse(url)
47
- rescue URI::Error, Addressable::URI::InvalidURIError
48
- @logger.log :error, "#{url} is an invalid URL"
49
- nil
50
- end
46
+ Addressable::URI.parse(url)
47
+ rescue URI::Error, Addressable::URI::InvalidURIError
48
+ @logger.log :error, "#{url} is an invalid URL"
49
+ nil
50
+ end
51
51
  next if uri.nil? || uri.query.nil?
52
52
 
53
53
  iterable_external_urls.delete(url) unless new_url_query_values?(uri, paths_with_queries)
@@ -74,15 +74,6 @@ module HTMLProofer
74
74
  uri.host + uri.path
75
75
  end
76
76
 
77
- def load_cache
78
- cache_count = @cache.size
79
- cache_text = pluralize(cache_count, 'link', 'links')
80
-
81
- @logger.log :info, "Found #{cache_text} in the cache..."
82
-
83
- @cache.retrieve_urls(@external_urls)
84
- end
85
-
86
77
  # Proofer runs faster if we pull out all the external URLs and run the checks
87
78
  # at the end. Otherwise, we're halting the consuming process for every file during
88
79
  # `process_files`.
@@ -111,11 +102,11 @@ module HTMLProofer
111
102
  def establish_queue(external_urls)
112
103
  external_urls.each_pair do |url, filenames|
113
104
  url = begin
114
- clean_url(url)
115
- rescue URI::Error, Addressable::URI::InvalidURIError
116
- add_external_issue(filenames, "#{url} is an invalid URL")
117
- next
118
- end
105
+ clean_url(url)
106
+ rescue URI::Error, Addressable::URI::InvalidURIError
107
+ add_external_issue(filenames, "#{url} is an invalid URL")
108
+ next
109
+ end
119
110
 
120
111
  method = if hash?(url) && @options[:check_external_hash]
121
112
  :get
@@ -129,10 +120,10 @@ module HTMLProofer
129
120
  def clean_url(href)
130
121
  # catch any obvious issues, like strings in port numbers
131
122
  parsed = Addressable::URI.parse(href)
132
- if href !~ /^([!#{$&}-;=?-\[\]_a-z~]|%[0-9a-fA-F]{2})+$/
133
- parsed.normalize
134
- else
123
+ if href =~ /^([!#{$&}-;=?-\[\]_a-z~]|%[0-9a-fA-F]{2})+$/
135
124
  href
125
+ else
126
+ parsed.normalize
136
127
  end
137
128
  end
138
129
 
@@ -5,7 +5,7 @@ require 'nokogumbo'
5
5
  module HTMLProofer
6
6
  module Utils
7
7
  def pluralize(count, single, plural)
8
- "#{count} #{(count == 1 ? single : plural)}"
8
+ "#{count} #{count == 1 ? single : plural}"
9
9
  end
10
10
 
11
11
  def create_nokogiri(path)
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HTMLProofer
4
- VERSION = '3.16.0'
4
+ VERSION = '3.17.0'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html-proofer
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.16.0
4
+ version: 3.17.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Garen Torikian
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-09-11 00:00:00.000000000 Z
11
+ date: 2020-11-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: addressable
@@ -293,15 +293,15 @@ homepage: https://github.com/gjtorikian/html-proofer
293
293
  licenses:
294
294
  - MIT
295
295
  metadata: {}
296
- post_install_message:
296
+ post_install_message:
297
297
  rdoc_options: []
298
298
  require_paths:
299
299
  - lib
300
300
  required_ruby_version: !ruby/object:Gem::Requirement
301
301
  requirements:
302
- - - ">="
302
+ - - "~>"
303
303
  - !ruby/object:Gem::Version
304
- version: '0'
304
+ version: '2.4'
305
305
  required_rubygems_version: !ruby/object:Gem::Requirement
306
306
  requirements:
307
307
  - - ">="
@@ -309,7 +309,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
309
309
  version: '0'
310
310
  requirements: []
311
311
  rubygems_version: 3.1.2
312
- signing_key:
312
+ signing_key:
313
313
  specification_version: 4
314
314
  summary: A set of tests to validate your HTML output. These tests check if your image
315
315
  references are legitimate, if they have alt tags, if your internal links are working,