html-proofer 3.15.1 → 3.17.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9517668b83d7e09665e775b58869aad3688ffa0edf579cb3d9e9c519ca88ceb2
4
- data.tar.gz: c63e8d6bab0e5513f95b1315cee1d0f55a6326815a70105a115d8629d97dc0a8
3
+ metadata.gz: f8be1f60d1495959b468c2cfad3a4d659a5817b934bb2011906540d296e2a062
4
+ data.tar.gz: 39ecb6a6899913c4745289443ff77cf1483d3578689fc616099bb00df90a4f16
5
5
  SHA512:
6
- metadata.gz: efeffe5729c11c5bf129bf37c46388f4942158c0079fe36458d25719bb3264ef8421b54c1cafd458f41497756306de1fe0b663e903a6fec9480243f37aea3edf
7
- data.tar.gz: 69cd979675e246ba566393bae545e58205e6241d46031a69665c96d3ce84b8cce77acf622e544f238079b249a430935485a7cfd29d615c312c587e879233ba1f
6
+ metadata.gz: ce1749adb1022b2a3245396c28b990d4de6bfe368fb5944cddf81b48822f54dff8c744b847e65bcd4cb040f2e7f63a1cbdcccebda943380ab767ecbd96161c8a
7
+ data.tar.gz: 0ffc3e5095dbf40272113991b521a0909408a40715efb2119bfd8f4a310b62b6884fe2566beb198e873f781d6d0ebefed06a966a91a4772309c53835c66ce483
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
  # frozen_string_literal: true
3
3
 
4
- STDOUT.sync = true
4
+ $stdout.sync = true
5
5
 
6
6
  $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', 'lib')
7
7
 
@@ -41,6 +41,8 @@ Mercenary.program(:htmlproofer) do |p|
41
41
  p.option 'report_missing_names', '--report-missing-names', 'When `check_html` is enabled, HTML markup that are missing entity names are reported as errors (default: `false`)'
42
42
  p.option 'report_script_embeds', '--report-script-embeds', 'When `check_html` is enabled, `script` tags containing markup are reported as errors (default: `false`)'
43
43
  p.option 'report_missing_doctype', '--report-missing-doctype', 'When `check_html` is enabled, HTML markup with missing or out-of-order `DOCTYPE` are reported as errors (default: `false`)'
44
+ p.option 'report_eof_tags', '--report-eof-tags', 'When `check_html` is enabled, HTML markup with tags that are malformed are reported as errors (default: `false`)'
45
+ p.option 'report_mismatched_tags', '--report-mismatched-tags', 'When `check_html` is enabled, HTML markup with mismatched tags are reported as errors (default: `false`)'
44
46
  p.option 'log_level', '--log-level <level>', String, 'Sets the logging level, as determined by Yell. One of `:debug`, `:info`, `:warn`, `:error`, or `:fatal`. (default: `:info`)'
45
47
  p.option 'only_4xx', '--only-4xx', 'Only reports errors for links that fall within the 4xx status code range'
46
48
  p.option 'storage_dir', '--storage-dir PATH', String, 'Directory where to store the cache log (default: "tmp/.htmlproofer")'
@@ -48,7 +50,7 @@ Mercenary.program(:htmlproofer) do |p|
48
50
  p.option 'typhoeus_config', '--typhoeus-config CONFIG', String, 'JSON-formatted string of Typhoeus config. Will override the html-proofer defaults.'
49
51
  p.option 'url_ignore', '--url-ignore link1,[link2,...]', Array, 'A comma-separated list of Strings or RegExps containing URLs that are safe to ignore. It affects all HTML attributes. Note that non-HTTP(S) URIs are always ignored'
50
52
  p.option 'url_swap', '--url-swap re:string,[re:string,...]', Array, 'A comma-separated list containing key-value pairs of `RegExp => String`. It transforms URLs that match `RegExp` into `String` via `gsub`. The escape sequences `\\:` should be used to produce literal `:`s.'
51
- p.option 'root_dir', '--root-folder PATH', String, 'The absolute path to the directory serving your html-files. Used when running html-proofer on a file, rather than a directory.'
53
+ p.option 'root_dir', '--root-dir PATH', String, 'The absolute path to the directory serving your html-files.'
52
54
 
53
55
  p.action do |args, opts|
54
56
  args = ['.'] if args.empty?
@@ -82,6 +84,8 @@ Mercenary.program(:htmlproofer) do |p|
82
84
  options[:validation][:report_missing_names] = opts['report_missing_names'] unless opts['report_missing_names'].nil?
83
85
  options[:validation][:report_invalid_tags] = opts['report_invalid_tags'] unless opts['report_invalid_tags'].nil?
84
86
  options[:validation][:report_missing_doctype] = opts['report_missing_doctype'] unless opts['report_missing_doctype'].nil?
87
+ options[:validation][:report_eof_tags] = opts['report_eof_tags'] unless opts['report_eof_tags'].nil?
88
+ options[:validation][:report_mismatched_tags] = opts['report_mismatched_tags'] unless opts['report_mismatched_tags'].nil?
85
89
 
86
90
  options[:typhoeus] = HTMLProofer::Configuration.parse_json_option('typhoeus_config', opts['typhoeus_config']) unless opts['typhoeus_config'].nil?
87
91
 
@@ -59,6 +59,8 @@ module HTMLProofer
59
59
  end
60
60
 
61
61
  def add(url, filenames, status, msg = '')
62
+ return unless use_cache?
63
+
62
64
  data = {
63
65
  time: @cache_time,
64
66
  filenames: filenames,
@@ -92,12 +94,12 @@ module HTMLProofer
92
94
  del = 0
93
95
  @cache_log.delete_if do |url, _|
94
96
  url = clean_url(url)
95
- if !found_urls.include?(url)
97
+ if found_urls.include?(url)
98
+ false
99
+ else
96
100
  @logger.log :debug, "Removing #{url} from cache check"
97
101
  del += 1
98
102
  true
99
- else
100
- false
101
103
  end
102
104
  end
103
105
 
@@ -115,14 +117,12 @@ module HTMLProofer
115
117
  @load.nil?
116
118
  end
117
119
 
118
- def retrieve_urls(external_urls)
119
- urls_to_check = detect_url_changes(external_urls)
120
+ def retrieve_urls(urls)
121
+ urls_to_check = detect_url_changes(urls)
120
122
  @cache_log.each_pair do |url, cache|
121
- if within_timeframe?(cache['time'])
122
- next if cache['message'].empty? # these were successes to skip
123
- else
124
- urls_to_check[url] = cache['filenames'] # recheck expired links
125
- end
123
+ next if within_timeframe?(cache['time']) && cache['message'].empty? # these were successes to skip
124
+
125
+ urls_to_check[url] = cache['filenames'] # recheck expired links
126
126
  end
127
127
  urls_to_check
128
128
  end
@@ -3,20 +3,23 @@
3
3
  module HTMLProofer
4
4
  # Mostly handles issue management and collecting of external URLs.
5
5
  class Check
6
- attr_reader :node, :html, :element, :src, :path, :options, :issues, :external_urls
6
+ attr_reader :node, :html, :element, :src, :path, :options, :issues, :internal_urls, :external_urls
7
7
 
8
- def initialize(src, path, html, options)
8
+ def initialize(src, path, html, logger, cache, options)
9
9
  @src = src
10
10
  @path = path
11
11
  @html = remove_ignored(html)
12
+ @logger = logger
13
+ @cache = cache
12
14
  @options = options
13
15
  @issues = []
16
+ @internal_urls = {}
14
17
  @external_urls = {}
15
18
  end
16
19
 
17
20
  def create_element(node)
18
21
  @node = node
19
- Element.new(node, self)
22
+ Element.new(node, self, @logger)
20
23
  end
21
24
 
22
25
  def run
@@ -25,15 +28,20 @@ module HTMLProofer
25
28
 
26
29
  def add_issue(desc, line: nil, status: -1, content: nil)
27
30
  @issues << Issue.new(@path, desc, line: line, status: status, content: content)
31
+ false
32
+ end
33
+
34
+ def add_to_internal_urls(url, internal_url)
35
+ if @internal_urls[url]
36
+ @internal_urls[url] << internal_url
37
+ else
38
+ @internal_urls[url] = [internal_url]
39
+ end
28
40
  end
29
41
 
30
42
  def add_to_external_urls(url)
31
43
  return if @external_urls[url]
32
44
 
33
- add_path_for_url(url)
34
- end
35
-
36
- def add_path_for_url(url)
37
45
  if @external_urls[url]
38
46
  @external_urls[url] << @path
39
47
  else
@@ -6,7 +6,9 @@ class HtmlCheck < ::HTMLProofer::Check
6
6
  INVALID_TAG_MSG = /Tag ([\w\-:]+) invalid/.freeze
7
7
  INVALID_PREFIX = /Namespace prefix/.freeze
8
8
  PARSE_ENTITY_REF = /htmlParseEntityRef: no name/.freeze
9
- DOCTYPE_MSG = /The doctype must be the first token in the document/.freeze
9
+ DOCTYPE_MSG = /Expected a doctype token/.freeze
10
+ EOF_IN_TAG = /End of input in tag/.freeze
11
+ MISMATCHED_TAGS = /That tag isn't allowed here/.freeze
10
12
 
11
13
  def run
12
14
  @html.errors.each do |error|
@@ -24,6 +26,10 @@ class HtmlCheck < ::HTMLProofer::Check
24
26
  options[:validation][:report_missing_names]
25
27
  when DOCTYPE_MSG
26
28
  options[:validation][:report_missing_doctype]
29
+ when EOF_IN_TAG
30
+ options[:validation][:report_eof_tags]
31
+ when MISMATCHED_TAGS
32
+ options[:validation][:report_mismatched_tags]
27
33
  else
28
34
  true
29
35
  end
@@ -4,6 +4,8 @@ class LinkCheck < ::HTMLProofer::Check
4
4
  include HTMLProofer::Utils
5
5
 
6
6
  def missing_href?
7
+ return blank?(@link.src) if @node.name == 'source'
8
+
7
9
  blank?(@link.href) && blank?(@link.name) && blank?(@link.id)
8
10
  end
9
11
 
@@ -12,7 +14,7 @@ class LinkCheck < ::HTMLProofer::Check
12
14
  end
13
15
 
14
16
  def run
15
- @html.css('a, link').each do |node|
17
+ @html.css('a, link, source').each do |node|
16
18
  @link = create_element(node)
17
19
  line = node.line
18
20
  content = node.to_s
@@ -49,23 +51,31 @@ class LinkCheck < ::HTMLProofer::Check
49
51
  # curl/Typheous inaccurately return 404s for some links. cc https://git.io/vyCFx
50
52
  next if @link.respond_to?(:rel) && @link.rel == 'dns-prefetch'
51
53
 
52
- add_to_external_urls(@link.href)
54
+ add_to_external_urls(@link.href || @link.src)
53
55
  next
54
- elsif @link.internal? && !@link.exists?
55
- add_issue("internally linking to #{@link.href}, which does not exist", line: line, content: content)
56
+ elsif @link.internal?
57
+ if @link.exists?
58
+ add_to_internal_urls(@link.href, InternalLink.new(@link, @path, line, content))
59
+ else
60
+ add_issue("internally linking to #{@link.href}, which does not exist", line: line, content: content)
61
+ end
56
62
  end
63
+ end
57
64
 
58
- # does the local directory have a trailing slash?
59
- if @link.unslashed_directory? @link.absolute_path
60
- add_issue("internally linking to a directory #{@link.absolute_path} without trailing slash", line: line, content: content)
61
- next
62
- end
65
+ external_urls
66
+ end
63
67
 
64
- # verify the target hash
65
- handle_hash(@link, line, content) if @link.hash
68
+ def check_internal_link(link, line, content)
69
+ # does the local directory have a trailing slash?
70
+ if link.unslashed_directory?(link.absolute_path)
71
+ add_issue("internally linking to a directory #{link.absolute_path} without trailing slash", line: line, content: content)
72
+ return false
66
73
  end
67
74
 
68
- external_urls
75
+ # verify the target hash
76
+ return handle_hash(link, line, content) if link.hash
77
+
78
+ true
69
79
  end
70
80
 
71
81
  def check_schemes(link, line, content)
@@ -94,23 +104,27 @@ class LinkCheck < ::HTMLProofer::Check
94
104
  end
95
105
 
96
106
  def handle_hash(link, line, content)
97
- if link.internal? && !hash_check(link.html, link.hash)
98
- add_issue("linking to internal hash ##{link.hash} that does not exist", line: line, content: content)
107
+ if link.internal? && !hash_exists?(link.html, link.hash) # rubocop:disable Style/GuardClause
108
+ return add_issue("linking to internal hash ##{link.hash} that does not exist", line: line, content: content)
99
109
  elsif link.external?
100
- external_link_check(link, line, content)
110
+ return external_link_check(link, line, content)
101
111
  end
112
+
113
+ true
102
114
  end
103
115
 
104
116
  def external_link_check(link, line, content)
105
- if !link.exists?
106
- add_issue("trying to find hash of #{link.href}, but #{link.absolute_path} does not exist", line: line, content: content)
117
+ if link.exists? # rubocop:disable Style/GuardClause
118
+ target_html = create_nokogiri(link.absolute_path)
119
+ return add_issue("linking to #{link.href}, but #{link.hash} does not exist", line: line, content: content) unless hash_exists?(target_html, link.hash)
107
120
  else
108
- target_html = create_nokogiri link.absolute_path
109
- add_issue("linking to #{link.href}, but #{link.hash} does not exist", line: line, content: content) unless hash_check target_html, link.hash
121
+ return add_issue("trying to find hash of #{link.href}, but #{link.absolute_path} does not exist", line: line, content: content)
110
122
  end
123
+
124
+ true
111
125
  end
112
126
 
113
- def hash_check(html, href_hash)
127
+ def hash_exists?(html, href_hash)
114
128
  decoded_href_hash = Addressable::URI.unescape(href_hash)
115
129
  fragment_ids = [href_hash, decoded_href_hash]
116
130
  # https://www.w3.org/TR/html5/single-page.html#scroll-to-fragid
@@ -148,7 +162,19 @@ class LinkCheck < ::HTMLProofer::Check
148
162
 
149
163
  class XpathFunctions
150
164
  def case_sensitive_equals(node_set, str_to_match)
151
- node_set.find_all { |node| node.to_s. == str_to_match.to_s }
165
+ node_set.find_all { |node| node.to_s.== str_to_match.to_s }
166
+ end
167
+ end
168
+
169
+ class InternalLink
170
+ attr_reader :link, :href, :path, :line, :content
171
+
172
+ def initialize(link, path, line, content)
173
+ @link = link
174
+ @href = @link.href
175
+ @path = path
176
+ @line = line
177
+ @content = content
152
178
  end
153
179
  end
154
180
  end
@@ -3,12 +3,12 @@
3
3
  class OpenGraphElement < ::HTMLProofer::Element
4
4
  attr_reader :src
5
5
 
6
- def initialize(obj, check)
7
- super(obj, check)
6
+ def initialize(obj, check, logger)
7
+ super(obj, check, logger)
8
8
  # Fake up src from the content attribute
9
9
  instance_variable_set('@src', @content)
10
10
 
11
- @src.insert 0, 'http:' if @src =~ %r{^//}
11
+ @src.insert 0, 'http:' if %r{^//}.match?(@src)
12
12
  end
13
13
  end
14
14
 
@@ -23,7 +23,7 @@ class OpenGraphCheck < ::HTMLProofer::Check
23
23
 
24
24
  def run
25
25
  @html.css('meta[property="og:url"], meta[property="og:image"]').each do |m|
26
- @opengraph = OpenGraphElement.new(m, self)
26
+ @opengraph = OpenGraphElement.new(m, self, @logger)
27
27
 
28
28
  next if @opengraph.ignore?
29
29
 
@@ -52,7 +52,9 @@ module HTMLProofer
52
52
  report_script_embeds: false,
53
53
  report_missing_names: false,
54
54
  report_invalid_tags: false,
55
- report_missing_doctype: false
55
+ report_missing_doctype: false,
56
+ report_eof_tags: false,
57
+ report_mismatched_tags: false
56
58
  }.freeze
57
59
 
58
60
  CACHE_DEFAULTS = {}.freeze
@@ -78,7 +80,7 @@ module HTMLProofer
78
80
  begin
79
81
  JSON.parse(config)
80
82
  rescue StandardError
81
- raise ArgumentError, "Option '" + option_name + "' did not contain valid JSON."
83
+ raise ArgumentError, "Option '#{option_name} did not contain valid JSON."
82
84
  end
83
85
  end
84
86
  end
@@ -10,12 +10,18 @@ module HTMLProofer
10
10
 
11
11
  attr_reader :id, :name, :alt, :href, :link, :src, :line, :data_proofer_ignore
12
12
 
13
- def initialize(obj, check)
13
+ def initialize(obj, check, logger)
14
+ @logger = logger
14
15
  # Construct readable ivars for every element
15
- obj.attributes.each_pair do |attribute, value|
16
- name = attribute.tr('-:.', '_').to_s.to_sym
17
- (class << self; self; end).send(:attr_reader, name)
18
- instance_variable_set("@#{name}", value.value)
16
+ begin
17
+ obj.attributes.each_pair do |attribute, value|
18
+ name = attribute.tr('-:.;@', '_').to_s.to_sym
19
+ (class << self; self; end).send(:attr_reader, name)
20
+ instance_variable_set("@#{name}", value.value)
21
+ end
22
+ rescue NameError => e
23
+ @logger.log :error, "Attribute set `#{obj}` contains an error!"
24
+ raise e
19
25
  end
20
26
 
21
27
  @aria_hidden = defined?(@aria_hidden) && @aria_hidden == 'true' ? true : false
@@ -36,19 +42,19 @@ module HTMLProofer
36
42
 
37
43
  # fix up missing protocols
38
44
  if defined?(@href)
39
- @href.insert(0, 'http:') if @href =~ %r{^//}
45
+ @href.insert(0, 'http:') if %r{^//}.match?(@href)
40
46
  else
41
47
  @href = nil
42
48
  end
43
49
 
44
50
  if defined?(@src)
45
- @src.insert(0, 'http:') if @src =~ %r{^//}
51
+ @src.insert(0, 'http:') if %r{^//}.match?(@src)
46
52
  else
47
53
  @src = nil
48
54
  end
49
55
 
50
56
  if defined?(@srcset)
51
- @srcset.insert(0, 'http:') if @srcset =~ %r{^//}
57
+ @srcset.insert(0, 'http:') if %r{^//}.match?(@srcset)
52
58
  else
53
59
  @srcset = nil
54
60
  end
@@ -99,12 +105,10 @@ module HTMLProofer
99
105
  return true if @data_proofer_ignore
100
106
  return true if @parent_ignorable
101
107
 
102
- return true if url =~ /^javascript:/
108
+ return true if /^javascript:/.match?(url)
103
109
 
104
110
  # ignore base64 encoded images
105
- if %w[ImageCheck FaviconCheck].include? @type
106
- return true if url =~ /^data:image/
107
- end
111
+ return true if %w[ImageCheck FaviconCheck].include?(@type) && /^data:image/.match?(url)
108
112
 
109
113
  # ignore user defined URLs
110
114
  return true if ignores_pattern_check(@check.options[:url_ignore])
@@ -165,6 +169,10 @@ module HTMLProofer
165
169
  url.start_with?('?')
166
170
  end
167
171
 
172
+ def absolute_path?(path)
173
+ path.start_with?('/')
174
+ end
175
+
168
176
  def file_path
169
177
  return if path.nil? || path.empty?
170
178
 
@@ -172,22 +180,16 @@ module HTMLProofer
172
180
 
173
181
  path_dot_ext = path + @check.options[:extension] if @check.options[:assume_extension]
174
182
 
175
- if path =~ %r{^/} # path relative to root
176
- if File.directory?(@check.src)
177
- base = @check.src
178
- else
179
- root_dir = @check.options[:root_dir]
180
- base = root_dir || File.dirname(@check.src)
181
- end
182
- elsif File.exist?(File.expand_path(path, @check.src)) || File.exist?(File.expand_path(path_dot_ext, @check.src)) # relative links, path is a file
183
- base = File.dirname @check.path
184
- elsif File.exist?(File.join(File.dirname(@check.path), path)) || File.exist?(File.join(File.dirname(@check.path), path_dot_ext)) # relative links in nested dir, path is a file
185
- base = File.dirname @check.path
186
- else # relative link, path is a directory
187
- base = @check.path
188
- end
189
-
190
- file = File.join base, path
183
+ base = if absolute_path?(path) # path relative to root
184
+ @check.options[:root_dir] || File.dirname(@check.src)
185
+ elsif File.exist?(File.expand_path(path, @check.src)) || File.exist?(File.expand_path(path_dot_ext, @check.src)) # relative links, path is a file
186
+ File.dirname(@check.path)
187
+ elsif File.exist?(File.join(File.dirname(@check.path), path)) || File.exist?(File.join(File.dirname(@check.path), path_dot_ext)) # rubocop:disable Lint/DuplicateBranch; relative links in nested dir, path is a file
188
+ File.dirname(@check.path)
189
+ else # relative link, path is a directory
190
+ @check.path
191
+ end
192
+ file = File.join(base, path)
191
193
  if @check.options[:assume_extension] && File.file?("#{file}#{@check.options[:extension]}")
192
194
  file = "#{file}#{@check.options[:extension]}"
193
195
  elsif File.directory?(file) && !unslashed_directory?(file) # implicit index support
@@ -199,22 +201,24 @@ module HTMLProofer
199
201
 
200
202
  # checks if a file exists relative to the current pwd
201
203
  def exists?
202
- return @checked_paths[absolute_path] if @checked_paths.key? absolute_path
204
+ return @checked_paths[absolute_path] if @checked_paths.key?(absolute_path)
203
205
 
204
- @checked_paths[absolute_path] = File.exist? absolute_path
206
+ @checked_paths[absolute_path] = File.exist?(absolute_path)
205
207
  end
206
208
 
207
209
  def absolute_path
208
210
  path = file_path || @check.path
209
- File.expand_path path, Dir.pwd
211
+
212
+ File.expand_path(path, Dir.pwd)
210
213
  end
211
214
 
212
215
  def ignores_pattern_check(links)
213
216
  links.each do |ignore|
214
- if ignore.is_a? String
217
+ case ignore
218
+ when String
215
219
  return true if ignore == url
216
- elsif ignore.is_a? Regexp
217
- return true if ignore =~ url
220
+ when Regexp
221
+ return true if ignore&.match?(url)
218
222
  end
219
223
  end
220
224
 
@@ -6,6 +6,7 @@ module HTMLProofer
6
6
 
7
7
  class InvalidHtmlError < StandardError
8
8
  def initialize(failures)
9
+ super
9
10
  @failures = failures
10
11
  end
11
12
 
@@ -21,7 +22,8 @@ module HTMLProofer
21
22
  allow_hash_href: true,
22
23
  check_external_hash: true,
23
24
  check_html: true,
24
- url_ignore: [/.*/] # Don't try to check local files exist
25
+ url_ignore: [/.*/], # Don't try to check if local files exist
26
+ validation: { report_eof_tags: true }
25
27
  }
26
28
  end
27
29
 
@@ -52,7 +54,7 @@ module HTMLProofer
52
54
  def call(env)
53
55
  result = @app.call(env)
54
56
  return result if env['REQUEST_METHOD'] != 'GET'
55
- return result if env['QUERY_STRING'] =~ /proofer-ignore/
57
+ return result if /proofer-ignore/.match?(env['QUERY_STRING'])
56
58
  return result if result.first != 200
57
59
 
58
60
  body = []
@@ -4,7 +4,7 @@ module HTMLProofer
4
4
  class Runner
5
5
  include HTMLProofer::Utils
6
6
 
7
- attr_reader :options, :external_urls, :failures
7
+ attr_reader :options, :internal_urls, :external_urls, :failures
8
8
 
9
9
  def initialize(src, opts = {})
10
10
  @src = src
@@ -20,6 +20,8 @@ module HTMLProofer
20
20
 
21
21
  @type = @options.delete(:type)
22
22
  @logger = HTMLProofer::Log.new(@options[:log_level])
23
+ @cache = Cache.new(@logger, @options[:cache])
24
+ @internal_link_checks = nil
23
25
 
24
26
  # Add swap patterns for internal domains
25
27
  unless @options[:internal_domains].empty?
@@ -30,7 +32,11 @@ module HTMLProofer
30
32
  end
31
33
  end
32
34
 
35
+ @internal_urls = {}
36
+ @internal_urls_to_paths = {}
37
+ @external_urls = {}
33
38
  @failures = []
39
+ @before_request = []
34
40
  end
35
41
 
36
42
  def run
@@ -58,15 +64,13 @@ module HTMLProofer
58
64
  end
59
65
  end
60
66
  @external_urls = Hash[*@src.map { |s| [s, nil] }.flatten]
61
- validate_urls
67
+ validate_external_urls
62
68
  end
63
69
 
64
70
  # Collects any external URLs found in a directory of files. Also collectes
65
71
  # every failed test from process_files.
66
72
  # Sends the external URLs to Typhoeus for batch processing.
67
73
  def check_files
68
- @external_urls = {}
69
-
70
74
  process_files.each do |item|
71
75
  @external_urls.merge!(item[:external_urls])
72
76
  @failures.concat(item[:failures])
@@ -77,9 +81,12 @@ module HTMLProofer
77
81
  # just not run those other checks at all.
78
82
  if @options[:external_only]
79
83
  @failures = []
80
- validate_urls
84
+ validate_external_urls
81
85
  elsif !@options[:disable_external]
82
- validate_urls
86
+ validate_external_urls
87
+ validate_internal_urls
88
+ else
89
+ validate_internal_urls
83
90
  end
84
91
  end
85
92
 
@@ -100,8 +107,21 @@ module HTMLProofer
100
107
  @src.each do |src|
101
108
  checks.each do |klass|
102
109
  @logger.log :debug, "Checking #{klass.to_s.downcase} on #{path} ..."
103
- check = Object.const_get(klass).new(src, path, html, @options)
110
+ check = Object.const_get(klass).new(src, path, html, @logger, @cache, @options)
104
111
  check.run
112
+
113
+ if klass == 'LinkCheck'
114
+ @internal_link_checks = check
115
+ check.internal_urls.each_pair do |url, internal_urls|
116
+ if @internal_urls_to_paths[url]
117
+ @internal_urls_to_paths[url].concat(internal_urls.map(&:path))
118
+ else
119
+ @internal_urls_to_paths[url] = internal_urls.map(&:path)
120
+ end
121
+ end
122
+ @internal_urls.merge!(check.internal_urls)
123
+ end
124
+
105
125
  external_urls = check.external_urls
106
126
  external_urls = Hash[check.external_urls.map { |url, file| [swap(url, @options[:url_swap]), file] }] if @options[:url_swap]
107
127
  result[:external_urls].merge!(external_urls)
@@ -112,15 +132,35 @@ module HTMLProofer
112
132
  end
113
133
 
114
134
  def check_path(path)
115
- check_parsed create_nokogiri(path), path
135
+ check_parsed(create_nokogiri(path), path)
116
136
  end
117
137
 
118
- def validate_urls
119
- url_validator = HTMLProofer::UrlValidator.new(@logger, @external_urls, @options)
138
+ def validate_external_urls
139
+ url_validator = HTMLProofer::UrlValidator.new(@logger, @cache, @external_urls, @options)
140
+ url_validator.before_request = @before_request
120
141
  @failures.concat(url_validator.run)
121
142
  @external_urls = url_validator.external_urls
122
143
  end
123
144
 
145
+ def validate_internal_urls
146
+ if @cache.use_cache?
147
+ urls_to_check = load_internal_cache
148
+
149
+ urls_to_check.each_pair do |url, internal_urls|
150
+ result = @internal_link_checks.check_internal_link(internal_urls.first.link, internal_urls.first.line, internal_urls.first.content)
151
+ code = result ? 200 : 404
152
+ @cache.add(url, @internal_urls_to_paths[url].sort, code, '') # TODO: blank msg for now
153
+ end
154
+ @cache.write
155
+ else
156
+ @internal_urls.values.flatten.each do |internal_url|
157
+ @internal_link_checks.check_internal_link(internal_url.link, internal_url.line, internal_url.content)
158
+ end
159
+ end
160
+
161
+ @failures.concat(@internal_link_checks.issues) unless @internal_urls.length.zero?
162
+ end
163
+
124
164
  def files
125
165
  @files ||= if @type == :directory
126
166
  @src.map do |src|
@@ -147,6 +187,8 @@ module HTMLProofer
147
187
  def checks
148
188
  return @checks if defined?(@checks) && !@checks.nil?
149
189
 
190
+ return (@checks = ['LinkCheck']) if @type == :links
191
+
150
192
  @checks = HTMLProofer::Check.subchecks.map(&:name)
151
193
  @checks.delete('FaviconCheck') unless @options[:check_favicon]
152
194
  @checks.delete('HtmlCheck') unless @options[:check_html]
@@ -171,5 +213,29 @@ module HTMLProofer
171
213
  failure_text = pluralize(count, 'failure', 'failures')
172
214
  raise @logger.colorize :fatal, "HTML-Proofer found #{failure_text}!"
173
215
  end
216
+
217
+ # Set before_request callback.
218
+ #
219
+ # @example Set before_request.
220
+ # request.before_request { |request| p "yay" }
221
+ #
222
+ # @param [ Block ] block The block to execute.
223
+ #
224
+ # @yield [ Typhoeus::Request ]
225
+ #
226
+ # @return [ Array<Block> ] All before_request blocks.
227
+ def before_request(&block)
228
+ @before_request ||= []
229
+ @before_request << block if block
230
+ @before_request
231
+ end
232
+
233
+ def load_internal_cache
234
+ urls_to_check = @cache.retrieve_urls(@internal_urls)
235
+ cache_text = pluralize(urls_to_check.count, 'internal link', 'internal links')
236
+ @logger.log :info, "Found #{cache_text} in the cache..."
237
+
238
+ urls_to_check
239
+ end
174
240
  end
175
241
  end
@@ -10,21 +10,23 @@ module HTMLProofer
10
10
  include HTMLProofer::Utils
11
11
 
12
12
  attr_reader :external_urls
13
+ attr_writer :before_request
13
14
 
14
- def initialize(logger, external_urls, options)
15
+ def initialize(logger, cache, external_urls, options)
15
16
  @logger = logger
16
17
  @external_urls = external_urls
17
18
  @failed_tests = []
18
19
  @options = options
19
20
  @hydra = Typhoeus::Hydra.new(@options[:hydra])
20
- @cache = Cache.new(@logger, @options[:cache])
21
+ @cache = cache
22
+ @before_request = []
21
23
  end
22
24
 
23
25
  def run
24
26
  @external_urls = remove_query_values
25
27
 
26
28
  if @cache.use_cache?
27
- urls_to_check = load_cache
29
+ urls_to_check = @cache.retrieve_urls(@external_urls)
28
30
  external_link_checker(urls_to_check)
29
31
  @cache.write
30
32
  else
@@ -41,11 +43,11 @@ module HTMLProofer
41
43
  iterable_external_urls = @external_urls.dup
42
44
  @external_urls.each_key do |url|
43
45
  uri = begin
44
- Addressable::URI.parse(url)
45
- rescue URI::Error, Addressable::URI::InvalidURIError
46
- @logger.log :error, "#{url} is an invalid URL"
47
- nil
48
- end
46
+ Addressable::URI.parse(url)
47
+ rescue URI::Error, Addressable::URI::InvalidURIError
48
+ @logger.log :error, "#{url} is an invalid URL"
49
+ nil
50
+ end
49
51
  next if uri.nil? || uri.query.nil?
50
52
 
51
53
  iterable_external_urls.delete(url) unless new_url_query_values?(uri, paths_with_queries)
@@ -72,15 +74,6 @@ module HTMLProofer
72
74
  uri.host + uri.path
73
75
  end
74
76
 
75
- def load_cache
76
- cache_count = @cache.size
77
- cache_text = pluralize(cache_count, 'link', 'links')
78
-
79
- @logger.log :info, "Found #{cache_text} in the cache..."
80
-
81
- @cache.retrieve_urls(@external_urls)
82
- end
83
-
84
77
  # Proofer runs faster if we pull out all the external URLs and run the checks
85
78
  # at the end. Otherwise, we're halting the consuming process for every file during
86
79
  # `process_files`.
@@ -109,11 +102,11 @@ module HTMLProofer
109
102
  def establish_queue(external_urls)
110
103
  external_urls.each_pair do |url, filenames|
111
104
  url = begin
112
- clean_url(url)
113
- rescue URI::Error, Addressable::URI::InvalidURIError
114
- add_external_issue(filenames, "#{url} is an invalid URL")
115
- next
116
- end
105
+ clean_url(url)
106
+ rescue URI::Error, Addressable::URI::InvalidURIError
107
+ add_external_issue(filenames, "#{url} is an invalid URL")
108
+ next
109
+ end
117
110
 
118
111
  method = if hash?(url) && @options[:check_external_hash]
119
112
  :get
@@ -127,16 +120,19 @@ module HTMLProofer
127
120
  def clean_url(href)
128
121
  # catch any obvious issues, like strings in port numbers
129
122
  parsed = Addressable::URI.parse(href)
130
- if href !~ /^([!#{$&}-;=?-\[\]_a-z~]|%[0-9a-fA-F]{2})+$/
131
- parsed.normalize
132
- else
123
+ if href =~ /^([!#{$&}-;=?-\[\]_a-z~]|%[0-9a-fA-F]{2})+$/
133
124
  href
125
+ else
126
+ parsed.normalize
134
127
  end
135
128
  end
136
129
 
137
130
  def queue_request(method, href, filenames)
138
131
  opts = @options[:typhoeus].merge(method: method)
139
132
  request = Typhoeus::Request.new(href, opts)
133
+ @before_request.each do |callback|
134
+ callback.call(request)
135
+ end
140
136
  request.on_complete { |response| response_handler(response, filenames) }
141
137
  @hydra.queue request
142
138
  end
@@ -5,7 +5,7 @@ require 'nokogumbo'
5
5
  module HTMLProofer
6
6
  module Utils
7
7
  def pluralize(count, single, plural)
8
- "#{count} #{(count == 1 ? single : plural)}"
8
+ "#{count} #{count == 1 ? single : plural}"
9
9
  end
10
10
 
11
11
  def create_nokogiri(path)
@@ -15,7 +15,7 @@ module HTMLProofer
15
15
  path
16
16
  end
17
17
 
18
- Nokogiri::HTML5(content)
18
+ Nokogiri::HTML5(content, max_errors: -1)
19
19
  end
20
20
 
21
21
  def swap(href, replacement)
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HTMLProofer
4
- VERSION = '3.15.1'
4
+ VERSION = '3.17.1'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html-proofer
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.15.1
4
+ version: 3.17.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Garen Torikian
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-01-20 00:00:00.000000000 Z
11
+ date: 2020-11-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: addressable
@@ -293,23 +293,23 @@ homepage: https://github.com/gjtorikian/html-proofer
293
293
  licenses:
294
294
  - MIT
295
295
  metadata: {}
296
- post_install_message:
296
+ post_install_message:
297
297
  rdoc_options: []
298
298
  require_paths:
299
299
  - lib
300
300
  required_ruby_version: !ruby/object:Gem::Requirement
301
301
  requirements:
302
- - - ">="
302
+ - - "~>"
303
303
  - !ruby/object:Gem::Version
304
- version: '0'
304
+ version: '2.4'
305
305
  required_rubygems_version: !ruby/object:Gem::Requirement
306
306
  requirements:
307
307
  - - ">="
308
308
  - !ruby/object:Gem::Version
309
309
  version: '0'
310
310
  requirements: []
311
- rubygems_version: 3.0.6
312
- signing_key:
311
+ rubygems_version: 3.1.2
312
+ signing_key:
313
313
  specification_version: 4
314
314
  summary: A set of tests to validate your HTML output. These tests check if your image
315
315
  references are legitimate, if they have alt tags, if your internal links are working,