html-proofer 3.15.1 → 3.17.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9517668b83d7e09665e775b58869aad3688ffa0edf579cb3d9e9c519ca88ceb2
4
- data.tar.gz: c63e8d6bab0e5513f95b1315cee1d0f55a6326815a70105a115d8629d97dc0a8
3
+ metadata.gz: f8be1f60d1495959b468c2cfad3a4d659a5817b934bb2011906540d296e2a062
4
+ data.tar.gz: 39ecb6a6899913c4745289443ff77cf1483d3578689fc616099bb00df90a4f16
5
5
  SHA512:
6
- metadata.gz: efeffe5729c11c5bf129bf37c46388f4942158c0079fe36458d25719bb3264ef8421b54c1cafd458f41497756306de1fe0b663e903a6fec9480243f37aea3edf
7
- data.tar.gz: 69cd979675e246ba566393bae545e58205e6241d46031a69665c96d3ce84b8cce77acf622e544f238079b249a430935485a7cfd29d615c312c587e879233ba1f
6
+ metadata.gz: ce1749adb1022b2a3245396c28b990d4de6bfe368fb5944cddf81b48822f54dff8c744b847e65bcd4cb040f2e7f63a1cbdcccebda943380ab767ecbd96161c8a
7
+ data.tar.gz: 0ffc3e5095dbf40272113991b521a0909408a40715efb2119bfd8f4a310b62b6884fe2566beb198e873f781d6d0ebefed06a966a91a4772309c53835c66ce483
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
  # frozen_string_literal: true
3
3
 
4
- STDOUT.sync = true
4
+ $stdout.sync = true
5
5
 
6
6
  $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', 'lib')
7
7
 
@@ -41,6 +41,8 @@ Mercenary.program(:htmlproofer) do |p|
41
41
  p.option 'report_missing_names', '--report-missing-names', 'When `check_html` is enabled, HTML markup that are missing entity names are reported as errors (default: `false`)'
42
42
  p.option 'report_script_embeds', '--report-script-embeds', 'When `check_html` is enabled, `script` tags containing markup are reported as errors (default: `false`)'
43
43
  p.option 'report_missing_doctype', '--report-missing-doctype', 'When `check_html` is enabled, HTML markup with missing or out-of-order `DOCTYPE` are reported as errors (default: `false`)'
44
+ p.option 'report_eof_tags', '--report-eof-tags', 'When `check_html` is enabled, HTML markup with tags that are malformed are reported as errors (default: `false`)'
45
+ p.option 'report_mismatched_tags', '--report-mismatched-tags', 'When `check_html` is enabled, HTML markup with mismatched tags are reported as errors (default: `false`)'
44
46
  p.option 'log_level', '--log-level <level>', String, 'Sets the logging level, as determined by Yell. One of `:debug`, `:info`, `:warn`, `:error`, or `:fatal`. (default: `:info`)'
45
47
  p.option 'only_4xx', '--only-4xx', 'Only reports errors for links that fall within the 4xx status code range'
46
48
  p.option 'storage_dir', '--storage-dir PATH', String, 'Directory where to store the cache log (default: "tmp/.htmlproofer")'
@@ -48,7 +50,7 @@ Mercenary.program(:htmlproofer) do |p|
48
50
  p.option 'typhoeus_config', '--typhoeus-config CONFIG', String, 'JSON-formatted string of Typhoeus config. Will override the html-proofer defaults.'
49
51
  p.option 'url_ignore', '--url-ignore link1,[link2,...]', Array, 'A comma-separated list of Strings or RegExps containing URLs that are safe to ignore. It affects all HTML attributes. Note that non-HTTP(S) URIs are always ignored'
50
52
  p.option 'url_swap', '--url-swap re:string,[re:string,...]', Array, 'A comma-separated list containing key-value pairs of `RegExp => String`. It transforms URLs that match `RegExp` into `String` via `gsub`. The escape sequences `\\:` should be used to produce literal `:`s.'
51
- p.option 'root_dir', '--root-folder PATH', String, 'The absolute path to the directory serving your html-files. Used when running html-proofer on a file, rather than a directory.'
53
+ p.option 'root_dir', '--root-dir PATH', String, 'The absolute path to the directory serving your html-files.'
52
54
 
53
55
  p.action do |args, opts|
54
56
  args = ['.'] if args.empty?
@@ -82,6 +84,8 @@ Mercenary.program(:htmlproofer) do |p|
82
84
  options[:validation][:report_missing_names] = opts['report_missing_names'] unless opts['report_missing_names'].nil?
83
85
  options[:validation][:report_invalid_tags] = opts['report_invalid_tags'] unless opts['report_invalid_tags'].nil?
84
86
  options[:validation][:report_missing_doctype] = opts['report_missing_doctype'] unless opts['report_missing_doctype'].nil?
87
+ options[:validation][:report_eof_tags] = opts['report_eof_tags'] unless opts['report_eof_tags'].nil?
88
+ options[:validation][:report_mismatched_tags] = opts['report_mismatched_tags'] unless opts['report_mismatched_tags'].nil?
85
89
 
86
90
  options[:typhoeus] = HTMLProofer::Configuration.parse_json_option('typhoeus_config', opts['typhoeus_config']) unless opts['typhoeus_config'].nil?
87
91
 
@@ -59,6 +59,8 @@ module HTMLProofer
59
59
  end
60
60
 
61
61
  def add(url, filenames, status, msg = '')
62
+ return unless use_cache?
63
+
62
64
  data = {
63
65
  time: @cache_time,
64
66
  filenames: filenames,
@@ -92,12 +94,12 @@ module HTMLProofer
92
94
  del = 0
93
95
  @cache_log.delete_if do |url, _|
94
96
  url = clean_url(url)
95
- if !found_urls.include?(url)
97
+ if found_urls.include?(url)
98
+ false
99
+ else
96
100
  @logger.log :debug, "Removing #{url} from cache check"
97
101
  del += 1
98
102
  true
99
- else
100
- false
101
103
  end
102
104
  end
103
105
 
@@ -115,14 +117,12 @@ module HTMLProofer
115
117
  @load.nil?
116
118
  end
117
119
 
118
- def retrieve_urls(external_urls)
119
- urls_to_check = detect_url_changes(external_urls)
120
+ def retrieve_urls(urls)
121
+ urls_to_check = detect_url_changes(urls)
120
122
  @cache_log.each_pair do |url, cache|
121
- if within_timeframe?(cache['time'])
122
- next if cache['message'].empty? # these were successes to skip
123
- else
124
- urls_to_check[url] = cache['filenames'] # recheck expired links
125
- end
123
+ next if within_timeframe?(cache['time']) && cache['message'].empty? # these were successes to skip
124
+
125
+ urls_to_check[url] = cache['filenames'] # recheck expired links
126
126
  end
127
127
  urls_to_check
128
128
  end
@@ -3,20 +3,23 @@
3
3
  module HTMLProofer
4
4
  # Mostly handles issue management and collecting of external URLs.
5
5
  class Check
6
- attr_reader :node, :html, :element, :src, :path, :options, :issues, :external_urls
6
+ attr_reader :node, :html, :element, :src, :path, :options, :issues, :internal_urls, :external_urls
7
7
 
8
- def initialize(src, path, html, options)
8
+ def initialize(src, path, html, logger, cache, options)
9
9
  @src = src
10
10
  @path = path
11
11
  @html = remove_ignored(html)
12
+ @logger = logger
13
+ @cache = cache
12
14
  @options = options
13
15
  @issues = []
16
+ @internal_urls = {}
14
17
  @external_urls = {}
15
18
  end
16
19
 
17
20
  def create_element(node)
18
21
  @node = node
19
- Element.new(node, self)
22
+ Element.new(node, self, @logger)
20
23
  end
21
24
 
22
25
  def run
@@ -25,15 +28,20 @@ module HTMLProofer
25
28
 
26
29
  def add_issue(desc, line: nil, status: -1, content: nil)
27
30
  @issues << Issue.new(@path, desc, line: line, status: status, content: content)
31
+ false
32
+ end
33
+
34
+ def add_to_internal_urls(url, internal_url)
35
+ if @internal_urls[url]
36
+ @internal_urls[url] << internal_url
37
+ else
38
+ @internal_urls[url] = [internal_url]
39
+ end
28
40
  end
29
41
 
30
42
  def add_to_external_urls(url)
31
43
  return if @external_urls[url]
32
44
 
33
- add_path_for_url(url)
34
- end
35
-
36
- def add_path_for_url(url)
37
45
  if @external_urls[url]
38
46
  @external_urls[url] << @path
39
47
  else
@@ -6,7 +6,9 @@ class HtmlCheck < ::HTMLProofer::Check
6
6
  INVALID_TAG_MSG = /Tag ([\w\-:]+) invalid/.freeze
7
7
  INVALID_PREFIX = /Namespace prefix/.freeze
8
8
  PARSE_ENTITY_REF = /htmlParseEntityRef: no name/.freeze
9
- DOCTYPE_MSG = /The doctype must be the first token in the document/.freeze
9
+ DOCTYPE_MSG = /Expected a doctype token/.freeze
10
+ EOF_IN_TAG = /End of input in tag/.freeze
11
+ MISMATCHED_TAGS = /That tag isn't allowed here/.freeze
10
12
 
11
13
  def run
12
14
  @html.errors.each do |error|
@@ -24,6 +26,10 @@ class HtmlCheck < ::HTMLProofer::Check
24
26
  options[:validation][:report_missing_names]
25
27
  when DOCTYPE_MSG
26
28
  options[:validation][:report_missing_doctype]
29
+ when EOF_IN_TAG
30
+ options[:validation][:report_eof_tags]
31
+ when MISMATCHED_TAGS
32
+ options[:validation][:report_mismatched_tags]
27
33
  else
28
34
  true
29
35
  end
@@ -4,6 +4,8 @@ class LinkCheck < ::HTMLProofer::Check
4
4
  include HTMLProofer::Utils
5
5
 
6
6
  def missing_href?
7
+ return blank?(@link.src) if @node.name == 'source'
8
+
7
9
  blank?(@link.href) && blank?(@link.name) && blank?(@link.id)
8
10
  end
9
11
 
@@ -12,7 +14,7 @@ class LinkCheck < ::HTMLProofer::Check
12
14
  end
13
15
 
14
16
  def run
15
- @html.css('a, link').each do |node|
17
+ @html.css('a, link, source').each do |node|
16
18
  @link = create_element(node)
17
19
  line = node.line
18
20
  content = node.to_s
@@ -49,23 +51,31 @@ class LinkCheck < ::HTMLProofer::Check
49
51
  # curl/Typheous inaccurately return 404s for some links. cc https://git.io/vyCFx
50
52
  next if @link.respond_to?(:rel) && @link.rel == 'dns-prefetch'
51
53
 
52
- add_to_external_urls(@link.href)
54
+ add_to_external_urls(@link.href || @link.src)
53
55
  next
54
- elsif @link.internal? && !@link.exists?
55
- add_issue("internally linking to #{@link.href}, which does not exist", line: line, content: content)
56
+ elsif @link.internal?
57
+ if @link.exists?
58
+ add_to_internal_urls(@link.href, InternalLink.new(@link, @path, line, content))
59
+ else
60
+ add_issue("internally linking to #{@link.href}, which does not exist", line: line, content: content)
61
+ end
56
62
  end
63
+ end
57
64
 
58
- # does the local directory have a trailing slash?
59
- if @link.unslashed_directory? @link.absolute_path
60
- add_issue("internally linking to a directory #{@link.absolute_path} without trailing slash", line: line, content: content)
61
- next
62
- end
65
+ external_urls
66
+ end
63
67
 
64
- # verify the target hash
65
- handle_hash(@link, line, content) if @link.hash
68
+ def check_internal_link(link, line, content)
69
+ # does the local directory have a trailing slash?
70
+ if link.unslashed_directory?(link.absolute_path)
71
+ add_issue("internally linking to a directory #{link.absolute_path} without trailing slash", line: line, content: content)
72
+ return false
66
73
  end
67
74
 
68
- external_urls
75
+ # verify the target hash
76
+ return handle_hash(link, line, content) if link.hash
77
+
78
+ true
69
79
  end
70
80
 
71
81
  def check_schemes(link, line, content)
@@ -94,23 +104,27 @@ class LinkCheck < ::HTMLProofer::Check
94
104
  end
95
105
 
96
106
  def handle_hash(link, line, content)
97
- if link.internal? && !hash_check(link.html, link.hash)
98
- add_issue("linking to internal hash ##{link.hash} that does not exist", line: line, content: content)
107
+ if link.internal? && !hash_exists?(link.html, link.hash) # rubocop:disable Style/GuardClause
108
+ return add_issue("linking to internal hash ##{link.hash} that does not exist", line: line, content: content)
99
109
  elsif link.external?
100
- external_link_check(link, line, content)
110
+ return external_link_check(link, line, content)
101
111
  end
112
+
113
+ true
102
114
  end
103
115
 
104
116
  def external_link_check(link, line, content)
105
- if !link.exists?
106
- add_issue("trying to find hash of #{link.href}, but #{link.absolute_path} does not exist", line: line, content: content)
117
+ if link.exists? # rubocop:disable Style/GuardClause
118
+ target_html = create_nokogiri(link.absolute_path)
119
+ return add_issue("linking to #{link.href}, but #{link.hash} does not exist", line: line, content: content) unless hash_exists?(target_html, link.hash)
107
120
  else
108
- target_html = create_nokogiri link.absolute_path
109
- add_issue("linking to #{link.href}, but #{link.hash} does not exist", line: line, content: content) unless hash_check target_html, link.hash
121
+ return add_issue("trying to find hash of #{link.href}, but #{link.absolute_path} does not exist", line: line, content: content)
110
122
  end
123
+
124
+ true
111
125
  end
112
126
 
113
- def hash_check(html, href_hash)
127
+ def hash_exists?(html, href_hash)
114
128
  decoded_href_hash = Addressable::URI.unescape(href_hash)
115
129
  fragment_ids = [href_hash, decoded_href_hash]
116
130
  # https://www.w3.org/TR/html5/single-page.html#scroll-to-fragid
@@ -148,7 +162,19 @@ class LinkCheck < ::HTMLProofer::Check
148
162
 
149
163
  class XpathFunctions
150
164
  def case_sensitive_equals(node_set, str_to_match)
151
- node_set.find_all { |node| node.to_s. == str_to_match.to_s }
165
+ node_set.find_all { |node| node.to_s.== str_to_match.to_s }
166
+ end
167
+ end
168
+
169
+ class InternalLink
170
+ attr_reader :link, :href, :path, :line, :content
171
+
172
+ def initialize(link, path, line, content)
173
+ @link = link
174
+ @href = @link.href
175
+ @path = path
176
+ @line = line
177
+ @content = content
152
178
  end
153
179
  end
154
180
  end
@@ -3,12 +3,12 @@
3
3
  class OpenGraphElement < ::HTMLProofer::Element
4
4
  attr_reader :src
5
5
 
6
- def initialize(obj, check)
7
- super(obj, check)
6
+ def initialize(obj, check, logger)
7
+ super(obj, check, logger)
8
8
  # Fake up src from the content attribute
9
9
  instance_variable_set('@src', @content)
10
10
 
11
- @src.insert 0, 'http:' if @src =~ %r{^//}
11
+ @src.insert 0, 'http:' if %r{^//}.match?(@src)
12
12
  end
13
13
  end
14
14
 
@@ -23,7 +23,7 @@ class OpenGraphCheck < ::HTMLProofer::Check
23
23
 
24
24
  def run
25
25
  @html.css('meta[property="og:url"], meta[property="og:image"]').each do |m|
26
- @opengraph = OpenGraphElement.new(m, self)
26
+ @opengraph = OpenGraphElement.new(m, self, @logger)
27
27
 
28
28
  next if @opengraph.ignore?
29
29
 
@@ -52,7 +52,9 @@ module HTMLProofer
52
52
  report_script_embeds: false,
53
53
  report_missing_names: false,
54
54
  report_invalid_tags: false,
55
- report_missing_doctype: false
55
+ report_missing_doctype: false,
56
+ report_eof_tags: false,
57
+ report_mismatched_tags: false
56
58
  }.freeze
57
59
 
58
60
  CACHE_DEFAULTS = {}.freeze
@@ -78,7 +80,7 @@ module HTMLProofer
78
80
  begin
79
81
  JSON.parse(config)
80
82
  rescue StandardError
81
- raise ArgumentError, "Option '" + option_name + "' did not contain valid JSON."
83
+ raise ArgumentError, "Option '#{option_name} did not contain valid JSON."
82
84
  end
83
85
  end
84
86
  end
@@ -10,12 +10,18 @@ module HTMLProofer
10
10
 
11
11
  attr_reader :id, :name, :alt, :href, :link, :src, :line, :data_proofer_ignore
12
12
 
13
- def initialize(obj, check)
13
+ def initialize(obj, check, logger)
14
+ @logger = logger
14
15
  # Construct readable ivars for every element
15
- obj.attributes.each_pair do |attribute, value|
16
- name = attribute.tr('-:.', '_').to_s.to_sym
17
- (class << self; self; end).send(:attr_reader, name)
18
- instance_variable_set("@#{name}", value.value)
16
+ begin
17
+ obj.attributes.each_pair do |attribute, value|
18
+ name = attribute.tr('-:.;@', '_').to_s.to_sym
19
+ (class << self; self; end).send(:attr_reader, name)
20
+ instance_variable_set("@#{name}", value.value)
21
+ end
22
+ rescue NameError => e
23
+ @logger.log :error, "Attribute set `#{obj}` contains an error!"
24
+ raise e
19
25
  end
20
26
 
21
27
  @aria_hidden = defined?(@aria_hidden) && @aria_hidden == 'true' ? true : false
@@ -36,19 +42,19 @@ module HTMLProofer
36
42
 
37
43
  # fix up missing protocols
38
44
  if defined?(@href)
39
- @href.insert(0, 'http:') if @href =~ %r{^//}
45
+ @href.insert(0, 'http:') if %r{^//}.match?(@href)
40
46
  else
41
47
  @href = nil
42
48
  end
43
49
 
44
50
  if defined?(@src)
45
- @src.insert(0, 'http:') if @src =~ %r{^//}
51
+ @src.insert(0, 'http:') if %r{^//}.match?(@src)
46
52
  else
47
53
  @src = nil
48
54
  end
49
55
 
50
56
  if defined?(@srcset)
51
- @srcset.insert(0, 'http:') if @srcset =~ %r{^//}
57
+ @srcset.insert(0, 'http:') if %r{^//}.match?(@srcset)
52
58
  else
53
59
  @srcset = nil
54
60
  end
@@ -99,12 +105,10 @@ module HTMLProofer
99
105
  return true if @data_proofer_ignore
100
106
  return true if @parent_ignorable
101
107
 
102
- return true if url =~ /^javascript:/
108
+ return true if /^javascript:/.match?(url)
103
109
 
104
110
  # ignore base64 encoded images
105
- if %w[ImageCheck FaviconCheck].include? @type
106
- return true if url =~ /^data:image/
107
- end
111
+ return true if %w[ImageCheck FaviconCheck].include?(@type) && /^data:image/.match?(url)
108
112
 
109
113
  # ignore user defined URLs
110
114
  return true if ignores_pattern_check(@check.options[:url_ignore])
@@ -165,6 +169,10 @@ module HTMLProofer
165
169
  url.start_with?('?')
166
170
  end
167
171
 
172
+ def absolute_path?(path)
173
+ path.start_with?('/')
174
+ end
175
+
168
176
  def file_path
169
177
  return if path.nil? || path.empty?
170
178
 
@@ -172,22 +180,16 @@ module HTMLProofer
172
180
 
173
181
  path_dot_ext = path + @check.options[:extension] if @check.options[:assume_extension]
174
182
 
175
- if path =~ %r{^/} # path relative to root
176
- if File.directory?(@check.src)
177
- base = @check.src
178
- else
179
- root_dir = @check.options[:root_dir]
180
- base = root_dir || File.dirname(@check.src)
181
- end
182
- elsif File.exist?(File.expand_path(path, @check.src)) || File.exist?(File.expand_path(path_dot_ext, @check.src)) # relative links, path is a file
183
- base = File.dirname @check.path
184
- elsif File.exist?(File.join(File.dirname(@check.path), path)) || File.exist?(File.join(File.dirname(@check.path), path_dot_ext)) # relative links in nested dir, path is a file
185
- base = File.dirname @check.path
186
- else # relative link, path is a directory
187
- base = @check.path
188
- end
189
-
190
- file = File.join base, path
183
+ base = if absolute_path?(path) # path relative to root
184
+ @check.options[:root_dir] || File.dirname(@check.src)
185
+ elsif File.exist?(File.expand_path(path, @check.src)) || File.exist?(File.expand_path(path_dot_ext, @check.src)) # relative links, path is a file
186
+ File.dirname(@check.path)
187
+ elsif File.exist?(File.join(File.dirname(@check.path), path)) || File.exist?(File.join(File.dirname(@check.path), path_dot_ext)) # rubocop:disable Lint/DuplicateBranch; relative links in nested dir, path is a file
188
+ File.dirname(@check.path)
189
+ else # relative link, path is a directory
190
+ @check.path
191
+ end
192
+ file = File.join(base, path)
191
193
  if @check.options[:assume_extension] && File.file?("#{file}#{@check.options[:extension]}")
192
194
  file = "#{file}#{@check.options[:extension]}"
193
195
  elsif File.directory?(file) && !unslashed_directory?(file) # implicit index support
@@ -199,22 +201,24 @@ module HTMLProofer
199
201
 
200
202
  # checks if a file exists relative to the current pwd
201
203
  def exists?
202
- return @checked_paths[absolute_path] if @checked_paths.key? absolute_path
204
+ return @checked_paths[absolute_path] if @checked_paths.key?(absolute_path)
203
205
 
204
- @checked_paths[absolute_path] = File.exist? absolute_path
206
+ @checked_paths[absolute_path] = File.exist?(absolute_path)
205
207
  end
206
208
 
207
209
  def absolute_path
208
210
  path = file_path || @check.path
209
- File.expand_path path, Dir.pwd
211
+
212
+ File.expand_path(path, Dir.pwd)
210
213
  end
211
214
 
212
215
  def ignores_pattern_check(links)
213
216
  links.each do |ignore|
214
- if ignore.is_a? String
217
+ case ignore
218
+ when String
215
219
  return true if ignore == url
216
- elsif ignore.is_a? Regexp
217
- return true if ignore =~ url
220
+ when Regexp
221
+ return true if ignore&.match?(url)
218
222
  end
219
223
  end
220
224
 
@@ -6,6 +6,7 @@ module HTMLProofer
6
6
 
7
7
  class InvalidHtmlError < StandardError
8
8
  def initialize(failures)
9
+ super
9
10
  @failures = failures
10
11
  end
11
12
 
@@ -21,7 +22,8 @@ module HTMLProofer
21
22
  allow_hash_href: true,
22
23
  check_external_hash: true,
23
24
  check_html: true,
24
- url_ignore: [/.*/] # Don't try to check local files exist
25
+ url_ignore: [/.*/], # Don't try to check if local files exist
26
+ validation: { report_eof_tags: true }
25
27
  }
26
28
  end
27
29
 
@@ -52,7 +54,7 @@ module HTMLProofer
52
54
  def call(env)
53
55
  result = @app.call(env)
54
56
  return result if env['REQUEST_METHOD'] != 'GET'
55
- return result if env['QUERY_STRING'] =~ /proofer-ignore/
57
+ return result if /proofer-ignore/.match?(env['QUERY_STRING'])
56
58
  return result if result.first != 200
57
59
 
58
60
  body = []
@@ -4,7 +4,7 @@ module HTMLProofer
4
4
  class Runner
5
5
  include HTMLProofer::Utils
6
6
 
7
- attr_reader :options, :external_urls, :failures
7
+ attr_reader :options, :internal_urls, :external_urls, :failures
8
8
 
9
9
  def initialize(src, opts = {})
10
10
  @src = src
@@ -20,6 +20,8 @@ module HTMLProofer
20
20
 
21
21
  @type = @options.delete(:type)
22
22
  @logger = HTMLProofer::Log.new(@options[:log_level])
23
+ @cache = Cache.new(@logger, @options[:cache])
24
+ @internal_link_checks = nil
23
25
 
24
26
  # Add swap patterns for internal domains
25
27
  unless @options[:internal_domains].empty?
@@ -30,7 +32,11 @@ module HTMLProofer
30
32
  end
31
33
  end
32
34
 
35
+ @internal_urls = {}
36
+ @internal_urls_to_paths = {}
37
+ @external_urls = {}
33
38
  @failures = []
39
+ @before_request = []
34
40
  end
35
41
 
36
42
  def run
@@ -58,15 +64,13 @@ module HTMLProofer
58
64
  end
59
65
  end
60
66
  @external_urls = Hash[*@src.map { |s| [s, nil] }.flatten]
61
- validate_urls
67
+ validate_external_urls
62
68
  end
63
69
 
64
70
  # Collects any external URLs found in a directory of files. Also collectes
65
71
  # every failed test from process_files.
66
72
  # Sends the external URLs to Typhoeus for batch processing.
67
73
  def check_files
68
- @external_urls = {}
69
-
70
74
  process_files.each do |item|
71
75
  @external_urls.merge!(item[:external_urls])
72
76
  @failures.concat(item[:failures])
@@ -77,9 +81,12 @@ module HTMLProofer
77
81
  # just not run those other checks at all.
78
82
  if @options[:external_only]
79
83
  @failures = []
80
- validate_urls
84
+ validate_external_urls
81
85
  elsif !@options[:disable_external]
82
- validate_urls
86
+ validate_external_urls
87
+ validate_internal_urls
88
+ else
89
+ validate_internal_urls
83
90
  end
84
91
  end
85
92
 
@@ -100,8 +107,21 @@ module HTMLProofer
100
107
  @src.each do |src|
101
108
  checks.each do |klass|
102
109
  @logger.log :debug, "Checking #{klass.to_s.downcase} on #{path} ..."
103
- check = Object.const_get(klass).new(src, path, html, @options)
110
+ check = Object.const_get(klass).new(src, path, html, @logger, @cache, @options)
104
111
  check.run
112
+
113
+ if klass == 'LinkCheck'
114
+ @internal_link_checks = check
115
+ check.internal_urls.each_pair do |url, internal_urls|
116
+ if @internal_urls_to_paths[url]
117
+ @internal_urls_to_paths[url].concat(internal_urls.map(&:path))
118
+ else
119
+ @internal_urls_to_paths[url] = internal_urls.map(&:path)
120
+ end
121
+ end
122
+ @internal_urls.merge!(check.internal_urls)
123
+ end
124
+
105
125
  external_urls = check.external_urls
106
126
  external_urls = Hash[check.external_urls.map { |url, file| [swap(url, @options[:url_swap]), file] }] if @options[:url_swap]
107
127
  result[:external_urls].merge!(external_urls)
@@ -112,15 +132,35 @@ module HTMLProofer
112
132
  end
113
133
 
114
134
  def check_path(path)
115
- check_parsed create_nokogiri(path), path
135
+ check_parsed(create_nokogiri(path), path)
116
136
  end
117
137
 
118
- def validate_urls
119
- url_validator = HTMLProofer::UrlValidator.new(@logger, @external_urls, @options)
138
+ def validate_external_urls
139
+ url_validator = HTMLProofer::UrlValidator.new(@logger, @cache, @external_urls, @options)
140
+ url_validator.before_request = @before_request
120
141
  @failures.concat(url_validator.run)
121
142
  @external_urls = url_validator.external_urls
122
143
  end
123
144
 
145
+ def validate_internal_urls
146
+ if @cache.use_cache?
147
+ urls_to_check = load_internal_cache
148
+
149
+ urls_to_check.each_pair do |url, internal_urls|
150
+ result = @internal_link_checks.check_internal_link(internal_urls.first.link, internal_urls.first.line, internal_urls.first.content)
151
+ code = result ? 200 : 404
152
+ @cache.add(url, @internal_urls_to_paths[url].sort, code, '') # TODO: blank msg for now
153
+ end
154
+ @cache.write
155
+ else
156
+ @internal_urls.values.flatten.each do |internal_url|
157
+ @internal_link_checks.check_internal_link(internal_url.link, internal_url.line, internal_url.content)
158
+ end
159
+ end
160
+
161
+ @failures.concat(@internal_link_checks.issues) unless @internal_urls.length.zero?
162
+ end
163
+
124
164
  def files
125
165
  @files ||= if @type == :directory
126
166
  @src.map do |src|
@@ -147,6 +187,8 @@ module HTMLProofer
147
187
  def checks
148
188
  return @checks if defined?(@checks) && !@checks.nil?
149
189
 
190
+ return (@checks = ['LinkCheck']) if @type == :links
191
+
150
192
  @checks = HTMLProofer::Check.subchecks.map(&:name)
151
193
  @checks.delete('FaviconCheck') unless @options[:check_favicon]
152
194
  @checks.delete('HtmlCheck') unless @options[:check_html]
@@ -171,5 +213,29 @@ module HTMLProofer
171
213
  failure_text = pluralize(count, 'failure', 'failures')
172
214
  raise @logger.colorize :fatal, "HTML-Proofer found #{failure_text}!"
173
215
  end
216
+
217
+ # Set before_request callback.
218
+ #
219
+ # @example Set before_request.
220
+ # request.before_request { |request| p "yay" }
221
+ #
222
+ # @param [ Block ] block The block to execute.
223
+ #
224
+ # @yield [ Typhoeus::Request ]
225
+ #
226
+ # @return [ Array<Block> ] All before_request blocks.
227
+ def before_request(&block)
228
+ @before_request ||= []
229
+ @before_request << block if block
230
+ @before_request
231
+ end
232
+
233
+ def load_internal_cache
234
+ urls_to_check = @cache.retrieve_urls(@internal_urls)
235
+ cache_text = pluralize(urls_to_check.count, 'internal link', 'internal links')
236
+ @logger.log :info, "Found #{cache_text} in the cache..."
237
+
238
+ urls_to_check
239
+ end
174
240
  end
175
241
  end
@@ -10,21 +10,23 @@ module HTMLProofer
10
10
  include HTMLProofer::Utils
11
11
 
12
12
  attr_reader :external_urls
13
+ attr_writer :before_request
13
14
 
14
- def initialize(logger, external_urls, options)
15
+ def initialize(logger, cache, external_urls, options)
15
16
  @logger = logger
16
17
  @external_urls = external_urls
17
18
  @failed_tests = []
18
19
  @options = options
19
20
  @hydra = Typhoeus::Hydra.new(@options[:hydra])
20
- @cache = Cache.new(@logger, @options[:cache])
21
+ @cache = cache
22
+ @before_request = []
21
23
  end
22
24
 
23
25
  def run
24
26
  @external_urls = remove_query_values
25
27
 
26
28
  if @cache.use_cache?
27
- urls_to_check = load_cache
29
+ urls_to_check = @cache.retrieve_urls(@external_urls)
28
30
  external_link_checker(urls_to_check)
29
31
  @cache.write
30
32
  else
@@ -41,11 +43,11 @@ module HTMLProofer
41
43
  iterable_external_urls = @external_urls.dup
42
44
  @external_urls.each_key do |url|
43
45
  uri = begin
44
- Addressable::URI.parse(url)
45
- rescue URI::Error, Addressable::URI::InvalidURIError
46
- @logger.log :error, "#{url} is an invalid URL"
47
- nil
48
- end
46
+ Addressable::URI.parse(url)
47
+ rescue URI::Error, Addressable::URI::InvalidURIError
48
+ @logger.log :error, "#{url} is an invalid URL"
49
+ nil
50
+ end
49
51
  next if uri.nil? || uri.query.nil?
50
52
 
51
53
  iterable_external_urls.delete(url) unless new_url_query_values?(uri, paths_with_queries)
@@ -72,15 +74,6 @@ module HTMLProofer
72
74
  uri.host + uri.path
73
75
  end
74
76
 
75
- def load_cache
76
- cache_count = @cache.size
77
- cache_text = pluralize(cache_count, 'link', 'links')
78
-
79
- @logger.log :info, "Found #{cache_text} in the cache..."
80
-
81
- @cache.retrieve_urls(@external_urls)
82
- end
83
-
84
77
  # Proofer runs faster if we pull out all the external URLs and run the checks
85
78
  # at the end. Otherwise, we're halting the consuming process for every file during
86
79
  # `process_files`.
@@ -109,11 +102,11 @@ module HTMLProofer
109
102
  def establish_queue(external_urls)
110
103
  external_urls.each_pair do |url, filenames|
111
104
  url = begin
112
- clean_url(url)
113
- rescue URI::Error, Addressable::URI::InvalidURIError
114
- add_external_issue(filenames, "#{url} is an invalid URL")
115
- next
116
- end
105
+ clean_url(url)
106
+ rescue URI::Error, Addressable::URI::InvalidURIError
107
+ add_external_issue(filenames, "#{url} is an invalid URL")
108
+ next
109
+ end
117
110
 
118
111
  method = if hash?(url) && @options[:check_external_hash]
119
112
  :get
@@ -127,16 +120,19 @@ module HTMLProofer
127
120
  def clean_url(href)
128
121
  # catch any obvious issues, like strings in port numbers
129
122
  parsed = Addressable::URI.parse(href)
130
- if href !~ /^([!#{$&}-;=?-\[\]_a-z~]|%[0-9a-fA-F]{2})+$/
131
- parsed.normalize
132
- else
123
+ if href =~ /^([!#{$&}-;=?-\[\]_a-z~]|%[0-9a-fA-F]{2})+$/
133
124
  href
125
+ else
126
+ parsed.normalize
134
127
  end
135
128
  end
136
129
 
137
130
  def queue_request(method, href, filenames)
138
131
  opts = @options[:typhoeus].merge(method: method)
139
132
  request = Typhoeus::Request.new(href, opts)
133
+ @before_request.each do |callback|
134
+ callback.call(request)
135
+ end
140
136
  request.on_complete { |response| response_handler(response, filenames) }
141
137
  @hydra.queue request
142
138
  end
@@ -5,7 +5,7 @@ require 'nokogumbo'
5
5
  module HTMLProofer
6
6
  module Utils
7
7
  def pluralize(count, single, plural)
8
- "#{count} #{(count == 1 ? single : plural)}"
8
+ "#{count} #{count == 1 ? single : plural}"
9
9
  end
10
10
 
11
11
  def create_nokogiri(path)
@@ -15,7 +15,7 @@ module HTMLProofer
15
15
  path
16
16
  end
17
17
 
18
- Nokogiri::HTML5(content)
18
+ Nokogiri::HTML5(content, max_errors: -1)
19
19
  end
20
20
 
21
21
  def swap(href, replacement)
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HTMLProofer
4
- VERSION = '3.15.1'
4
+ VERSION = '3.17.1'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html-proofer
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.15.1
4
+ version: 3.17.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Garen Torikian
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-01-20 00:00:00.000000000 Z
11
+ date: 2020-11-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: addressable
@@ -293,23 +293,23 @@ homepage: https://github.com/gjtorikian/html-proofer
293
293
  licenses:
294
294
  - MIT
295
295
  metadata: {}
296
- post_install_message:
296
+ post_install_message:
297
297
  rdoc_options: []
298
298
  require_paths:
299
299
  - lib
300
300
  required_ruby_version: !ruby/object:Gem::Requirement
301
301
  requirements:
302
- - - ">="
302
+ - - "~>"
303
303
  - !ruby/object:Gem::Version
304
- version: '0'
304
+ version: '2.4'
305
305
  required_rubygems_version: !ruby/object:Gem::Requirement
306
306
  requirements:
307
307
  - - ">="
308
308
  - !ruby/object:Gem::Version
309
309
  version: '0'
310
310
  requirements: []
311
- rubygems_version: 3.0.6
312
- signing_key:
311
+ rubygems_version: 3.1.2
312
+ signing_key:
313
313
  specification_version: 4
314
314
  summary: A set of tests to validate your HTML output. These tests check if your image
315
315
  references are legitimate, if they have alt tags, if your internal links are working,