html-proofer 3.12.1 → 3.15.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 04d2b18950f5d8c6b2bcfadb24dec58d50d7c2fd8997977506a79a22d24aeeef
4
- data.tar.gz: f33be2af98f5a199b563eb78aef31518c94b304c1fd120985dc3ee2febee233f
3
+ metadata.gz: 722b9c6547474c7a4654e46a3c7af273eaa8a77b7538592ab0df4b42dd787ea2
4
+ data.tar.gz: 240979d4d2d62dbdd76ab14d868c8c0ecf420e10c93ae1cdc5c8182106fef698
5
5
  SHA512:
6
- metadata.gz: 1e9fd7e64a8c76c74351128fd63a25c1c56ce11351fc8cd4ce0e91ee793586ab71609ae247d3fa66615b089546887a330cb2eeec9550888ebbf9f8abb59b628d
7
- data.tar.gz: bed4a30375077f6bfd35900f75976e9f960deb316421e113b65b33ad7d61f9084be427f5241c3700aa1d8fbd717d4104389855db71a43f60f846af58c3a6694b
6
+ metadata.gz: 483c149a8b502277cb3d5d02f3aa4e009c27b1ef1a0c9f775b27410515a6654a013f434d018933c8991227d3826648902f0831613efddf3523e031a49dd586f8
7
+ data.tar.gz: 823bc37d94226a5dfed9d0db49ae6b8803ff74a719e8d3332b08d934bdaaa2c83374b41b5591fd6a714b6f8bca41a65bc6fa0ea099f6eadb5b17176ed5f61c56
@@ -20,13 +20,13 @@ Mercenary.program(:htmlproofer) do |p|
20
20
  p.option 'as_links', '--as-links', 'Assumes that `PATH` is a comma-separated array of links to check.'
21
21
  p.option 'alt_ignore', '--alt-ignore image1,[image2,...]', Array, 'A comma-separated list of Strings or RegExps containing `img`s whose missing `alt` tags are safe to ignore'
22
22
  p.option 'assume_extension', '--assume-extension', 'Automatically add extension (e.g. `.html`) to file paths, to allow extensionless URLs (as supported by Jekyll 3 and GitHub Pages) (default: `false`).'
23
- p.option 'checks_to_ignore', '--checks-to-ignore check1,[check2,...]', Array, ' An array of Strings indicating which checks you\'d like to not perform.'
23
+ p.option 'checks_to_ignore', '--checks-to-ignore check1,[check2,...]', Array, 'A comma-separated list of Strings indicating which checks you do not want to run (default: `[]`)'
24
24
  p.option 'check_external_hash', '--check-external-hash', 'Checks whether external hashes exist (even if the webpage exists). This slows the checker down (default: `false`).'
25
25
  p.option 'check_favicon', '--check-favicon', 'Enables the favicon checker (default: `false`).'
26
- p.option 'check_html', '--check-html', 'Enables HTML validation errors from Nokogiri (default: `false`).'
26
+ p.option 'check_html', '--check-html', 'Enables HTML validation errors from Nokogumbo (default: `false`).'
27
27
  p.option 'check_img_http', '--check-img-http', 'Fails an image if it\'s marked as `http` (default: `false`).'
28
28
  p.option 'check_opengraph', '--check-opengraph', 'Enables the Open Graph checker (default: `false`).'
29
- p.option 'check_sri', '--check-sri', 'Check that `<link>` and `<script>` external resources do use SRI (default: `false`).'
29
+ p.option 'check_sri', '--check-sri', 'Check that `<link>` and `<script>` external resources use SRI (default: `false`).'
30
30
  p.option 'directory_index_file', '--directory-index-file <filename>', String, 'Sets the file to look for when a link refers to a directory. (default: `index.html`)'
31
31
  p.option 'disable_external', '--disable-external', 'If `true`, does not run the external link checker, which can take a lot of time (default: `false`)'
32
32
  p.option 'empty_alt_ignore', '--empty-alt-ignore', 'If `true`, ignores images with empty alt tags'
@@ -37,9 +37,10 @@ Mercenary.program(:htmlproofer) do |p|
37
37
  p.option 'file_ignore', '--file-ignore file1,[file2,...]', Array, 'A comma-separated list of Strings or RegExps containing file paths that are safe to ignore'
38
38
  p.option 'http_status_ignore', '--http-status-ignore 123,[xxx, ...]', Array, 'A comma-separated list of numbers representing status codes to ignore.'
39
39
  p.option 'internal_domains', '--internal-domains domain1,[domain2,...]', Array, 'A comma-separated list of Strings containing domains that will be treated as internal urls.'
40
- p.option 'report_invalid_tags', '--report-invalid-tags', 'Ignore `check_html` errors associated with unknown markup (default: `false`)'
41
- p.option 'report_missing_names', '--report-missing-names', 'Ignore `check_html` errors associated with missing entities (default: `false`)'
42
- p.option 'report_script_embeds', '--report-script-embeds', 'Ignore `check_html` errors associated with `script`s (default: `false`)'
40
+ p.option 'report_invalid_tags', '--report-invalid-tags', 'When `check_html` is enabled, HTML markup that is unknown to Nokogumbo are reported as errors (default: `false`)'
41
+ p.option 'report_missing_names', '--report-missing-names', 'When `check_html` is enabled, HTML markup that are missing entity names are reported as errors (default: `false`)'
42
+ p.option 'report_script_embeds', '--report-script-embeds', 'When `check_html` is enabled, `script` tags containing markup are reported as errors (default: `false`)'
43
+ p.option 'report_missing_doctype', '--report-missing-doctype', 'When `check_html` is enabled, HTML markup with missing or out-of-order `DOCTYPE` are reported as errors (default: `false`)'
43
44
  p.option 'log_level', '--log-level <level>', String, 'Sets the logging level, as determined by Yell. One of `:debug`, `:info`, `:warn`, `:error`, or `:fatal`. (default: `:info`)'
44
45
  p.option 'only_4xx', '--only-4xx', 'Only reports errors for links that fall within the 4xx status code range'
45
46
  p.option 'storage_dir', '--storage-dir PATH', String, 'Directory where to store the cache log (default: "tmp/.htmlproofer")'
@@ -47,6 +48,7 @@ Mercenary.program(:htmlproofer) do |p|
47
48
  p.option 'typhoeus_config', '--typhoeus-config CONFIG', String, 'JSON-formatted string of Typhoeus config. Will override the html-proofer defaults.'
48
49
  p.option 'url_ignore', '--url-ignore link1,[link2,...]', Array, 'A comma-separated list of Strings or RegExps containing URLs that are safe to ignore. It affects all HTML attributes. Note that non-HTTP(S) URIs are always ignored'
49
50
  p.option 'url_swap', '--url-swap re:string,[re:string,...]', Array, 'A comma-separated list containing key-value pairs of `RegExp => String`. It transforms URLs that match `RegExp` into `String` via `gsub`. The escape sequences `\\:` should be used to produce literal `:`s.'
51
+ p.option 'root_dir', '--root-folder PATH', String, 'The absolute path to the directory serving your html-files. Used when running html-proofer on a file, rather than a directory.'
50
52
 
51
53
  p.action do |args, opts|
52
54
  args = ['.'] if args.empty?
@@ -56,9 +58,7 @@ Mercenary.program(:htmlproofer) do |p|
56
58
 
57
59
  # prepare everything to go to proofer
58
60
  p.options.reject { |o| opts[o.config_key].nil? }.each do |option|
59
- if opts[option.config_key].is_a?(Array)
60
- opts[option.config_key] = opts[option.config_key].map { |i| HTMLProofer::Configuration.to_regex?(i) }
61
- end
61
+ opts[option.config_key] = opts[option.config_key].map { |i| HTMLProofer::Configuration.to_regex?(i) } if opts[option.config_key].is_a?(Array)
62
62
  options[option.config_key.to_sym] = opts[option.config_key]
63
63
  end
64
64
 
@@ -81,10 +81,9 @@ Mercenary.program(:htmlproofer) do |p|
81
81
  options[:validation][:report_script_embeds] = opts['report_script_embeds'] unless opts['report_script_embeds'].nil?
82
82
  options[:validation][:report_missing_names] = opts['report_missing_names'] unless opts['report_missing_names'].nil?
83
83
  options[:validation][:report_invalid_tags] = opts['report_invalid_tags'] unless opts['report_invalid_tags'].nil?
84
+ options[:validation][:report_missing_doctype] = opts['report_missing_doctype'] unless opts['report_missing_doctype'].nil?
84
85
 
85
- unless opts['typhoeus_config'].nil?
86
- options[:typhoeus] = HTMLProofer::Configuration.parse_json_option('typhoeus_config', opts['typhoeus_config'])
87
- end
86
+ options[:typhoeus] = HTMLProofer::Configuration.parse_json_option('typhoeus_config', opts['typhoeus_config']) unless opts['typhoeus_config'].nil?
88
87
 
89
88
  unless opts['timeframe'].nil?
90
89
  options[:cache] ||= {}
@@ -17,38 +17,38 @@ require 'fileutils'
17
17
  begin
18
18
  require 'awesome_print'
19
19
  require 'pry-byebug'
20
- rescue LoadError; end
20
+ rescue LoadError; end # rubocop:disable Lint/SuppressedException
21
21
  module HTMLProofer
22
- def check_file(file, options = {})
22
+ def self.check_file(file, options = {})
23
23
  raise ArgumentError unless file.is_a?(String)
24
24
  raise ArgumentError, "#{file} does not exist" unless File.exist?(file)
25
+
25
26
  options[:type] = :file
26
27
  HTMLProofer::Runner.new(file, options)
27
28
  end
28
- module_function :check_file
29
29
 
30
- def check_directory(directory, options = {})
30
+ def self.check_directory(directory, options = {})
31
31
  raise ArgumentError unless directory.is_a?(String)
32
32
  raise ArgumentError, "#{directory} does not exist" unless Dir.exist?(directory)
33
+
33
34
  options[:type] = :directory
34
35
  HTMLProofer::Runner.new([directory], options)
35
36
  end
36
- module_function :check_directory
37
37
 
38
- def check_directories(directories, options = {})
38
+ def self.check_directories(directories, options = {})
39
39
  raise ArgumentError unless directories.is_a?(Array)
40
+
40
41
  options[:type] = :directory
41
42
  directories.each do |directory|
42
43
  raise ArgumentError, "#{directory} does not exist" unless Dir.exist?(directory)
43
44
  end
44
45
  HTMLProofer::Runner.new(directories, options)
45
46
  end
46
- module_function :check_directories
47
47
 
48
- def check_links(links, options = {})
48
+ def self.check_links(links, options = {})
49
49
  raise ArgumentError unless links.is_a?(Array)
50
+
50
51
  options[:type] = :links
51
52
  HTMLProofer::Runner.new(links, options)
52
53
  end
53
- module_function :check_links
54
54
  end
@@ -9,7 +9,7 @@ module HTMLProofer
9
9
  include HTMLProofer::Utils
10
10
 
11
11
  DEFAULT_STORAGE_DIR = File.join('tmp', '.htmlproofer')
12
- DEFAULT_CACHE_FILE_NAME = 'cache.log'.freeze
12
+ DEFAULT_CACHE_FILE_NAME = 'cache.log'
13
13
 
14
14
  attr_reader :exists, :cache_log, :storage_dir, :cache_file
15
15
 
@@ -120,9 +120,8 @@ module HTMLProofer
120
120
  @cache_log.each_pair do |url, cache|
121
121
  if within_timeframe?(cache['time'])
122
122
  next if cache['message'].empty? # these were successes to skip
123
- urls_to_check[url] = cache['filenames'] # these are failures to retry
124
123
  else
125
- urls_to_check[url] = cache['filenames'] # pass or fail, recheck expired links
124
+ urls_to_check[url] = cache['filenames'] # recheck expired links
126
125
  end
127
126
  end
128
127
  urls_to_check
@@ -142,23 +141,16 @@ module HTMLProofer
142
141
  end
143
142
 
144
143
  def setup_cache!(options)
145
- @storage_dir = if options[:storage_dir]
146
- options[:storage_dir]
147
- else
148
- DEFAULT_STORAGE_DIR
149
- end
144
+ @storage_dir = options[:storage_dir] || DEFAULT_STORAGE_DIR
150
145
 
151
146
  FileUtils.mkdir_p(storage_dir) unless Dir.exist?(storage_dir)
152
147
 
153
- cache_file_name = if options[:cache_file]
154
- options[:cache_file]
155
- else
156
- DEFAULT_CACHE_FILE_NAME
157
- end
148
+ cache_file_name = options[:cache_file] || DEFAULT_CACHE_FILE_NAME
158
149
 
159
150
  @cache_file = File.join(storage_dir, cache_file_name)
160
151
 
161
152
  return unless File.exist?(cache_file)
153
+
162
154
  contents = File.read(cache_file)
163
155
  @cache_log = contents.empty? ? {} : JSON.parse(contents)
164
156
  end
@@ -174,7 +166,7 @@ module HTMLProofer
174
166
  when :days
175
167
  @cache_datetime - measurement
176
168
  when :hours
177
- @cache_datetime - Rational(measurement/24.0)
169
+ @cache_datetime - Rational(measurement / 24.0)
178
170
  end.to_time
179
171
  end
180
172
  end
@@ -29,6 +29,7 @@ module HTMLProofer
29
29
 
30
30
  def add_to_external_urls(url)
31
31
  return if @external_urls[url]
32
+
32
33
  add_path_for_url(url)
33
34
  end
34
35
 
@@ -45,6 +46,7 @@ module HTMLProofer
45
46
 
46
47
  ObjectSpace.each_object(Class) do |c|
47
48
  next unless c.superclass == self
49
+
48
50
  classes << c
49
51
  end
50
52
 
@@ -6,22 +6,24 @@ class FaviconCheck < ::HTMLProofer::Check
6
6
  @html.xpath('//link[not(ancestor::pre or ancestor::code)]').each do |node|
7
7
  favicon = create_element(node)
8
8
  next if favicon.ignore?
9
+
9
10
  found = true if favicon.rel.split(' ').last.eql? 'icon'
10
11
  break if found
11
12
  end
12
13
 
13
14
  return if found
14
15
 
15
- return if is_immediate_redirect?
16
+ return if immediate_redirect?
16
17
 
17
18
  add_issue('no favicon specified')
18
19
  end
19
20
 
20
21
  private
21
22
 
22
- def is_immediate_redirect?
23
- # allow any instant-redirect meta tag
24
- @html.xpath("//meta[@http-equiv='refresh']").attribute('content').value.start_with? '0;' rescue false
23
+ # allow any instant-redirect meta tag
24
+ def immediate_redirect?
25
+ @html.xpath("//meta[@http-equiv='refresh']").attribute('content').value.start_with? '0;'
26
+ rescue StandardError
27
+ false
25
28
  end
26
-
27
29
  end
@@ -1,28 +1,31 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class HtmlCheck < ::HTMLProofer::Check
4
- SCRIPT_EMBEDS_MSG = /Element script embeds close tag/
5
- INVALID_TAG_MSG = /Tag ([\w\-:]+) invalid/
6
- INVALID_PREFIX = /Namespace prefix/
7
- PARSE_ENTITY_REF = /htmlParseEntityRef: no name/
4
+ # tags embedded in scripts are used in templating languages: http://git.io/vOovv
5
+ SCRIPT_EMBEDS_MSG = /Element script embeds close tag/.freeze
6
+ INVALID_TAG_MSG = /Tag ([\w\-:]+) invalid/.freeze
7
+ INVALID_PREFIX = /Namespace prefix/.freeze
8
+ PARSE_ENTITY_REF = /htmlParseEntityRef: no name/.freeze
9
+ DOCTYPE_MSG = /The doctype must be the first token in the document/.freeze
8
10
 
9
11
  def run
10
12
  @html.errors.each do |error|
11
- message = error.message
12
- line = error.line
13
-
14
- if message =~ INVALID_TAG_MSG || message =~ INVALID_PREFIX
15
- next unless options[:validation][:report_invalid_tags]
16
- end
17
-
18
- if message =~ PARSE_ENTITY_REF
19
- next unless options[:validation][:report_missing_names]
20
- end
21
-
22
- # tags embedded in scripts are used in templating languages: http://git.io/vOovv
23
- next if !options[:validation][:report_script_embeds] && message =~ SCRIPT_EMBEDS_MSG
13
+ add_issue(error.message, line: error.line) if report?(error.message)
14
+ end
15
+ end
24
16
 
25
- add_issue(message, line: line)
17
+ def report?(message)
18
+ case message
19
+ when SCRIPT_EMBEDS_MSG
20
+ options[:validation][:report_script_embeds]
21
+ when INVALID_TAG_MSG, INVALID_PREFIX
22
+ options[:validation][:report_invalid_tags]
23
+ when PARSE_ENTITY_REF
24
+ options[:validation][:report_missing_names]
25
+ when DOCTYPE_MSG
26
+ options[:validation][:report_missing_doctype]
27
+ else
28
+ true
26
29
  end
27
30
  end
28
31
  end
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class ImageCheck < ::HTMLProofer::Check
4
- SCREEN_SHOT_REGEX = /Screen(?: |%20)Shot(?: |%20)\d+-\d+-\d+(?: |%20)at(?: |%20)\d+.\d+.\d+/
4
+ SCREEN_SHOT_REGEX = /Screen(?: |%20)Shot(?: |%20)\d+-\d+-\d+(?: |%20)at(?: |%20)\d+.\d+.\d+/.freeze
5
5
 
6
6
  def empty_alt_tag?
7
7
  @img.alt.nil? || @img.alt.strip.empty?
@@ -38,13 +38,9 @@ class ImageCheck < ::HTMLProofer::Check
38
38
  add_issue("internal image #{@img.url} does not exist", line: line, content: content)
39
39
  end
40
40
 
41
- if empty_alt_tag? && !@img.ignore_empty_alt? && !@img.ignore_alt?
42
- add_issue("image #{@img.url} does not have an alt attribute", line: line, content: content)
43
- end
41
+ add_issue("image #{@img.url} does not have an alt attribute", line: line, content: content) if empty_alt_tag? && !@img.ignore_empty_alt? && !@img.ignore_alt?
44
42
 
45
- if @img.check_img_http? && @img.scheme == 'http'
46
- add_issue("image #{@img.url} uses the http scheme", line: line, content: content)
47
- end
43
+ add_issue("image #{@img.url} uses the http scheme", line: line, content: content) if @img.check_img_http? && @img.scheme == 'http'
48
44
  end
49
45
 
50
46
  external_urls
@@ -34,7 +34,8 @@ class LinkCheck < ::HTMLProofer::Check
34
34
  if missing_href?
35
35
  next if @link.allow_missing_href?
36
36
  # HTML5 allows dropping the href: http://git.io/vBX0z
37
- next if @html.internal_subset.name == 'html' && @html.internal_subset.external_id.nil?
37
+ next if @html.internal_subset.nil? || (@html.internal_subset.name == 'html' && @html.internal_subset.external_id.nil?)
38
+
38
39
  add_issue('anchor has no href attribute', line: line, content: content)
39
40
  next
40
41
  end
@@ -47,9 +48,10 @@ class LinkCheck < ::HTMLProofer::Check
47
48
  # we need to skip these for now; although the domain main be valid,
48
49
  # curl/Typheous inaccurately return 404s for some links. cc https://git.io/vyCFx
49
50
  next if @link.respond_to?(:rel) && @link.rel == 'dns-prefetch'
51
+
50
52
  add_to_external_urls(@link.href)
51
53
  next
52
- elsif !@link.internal? && !@link.exists?
54
+ elsif @link.internal? && !@link.exists?
53
55
  add_issue("internally linking to #{@link.href}, which does not exist", line: line, content: content)
54
56
  end
55
57
 
@@ -74,6 +76,7 @@ class LinkCheck < ::HTMLProofer::Check
74
76
  handle_tel(link, line, content)
75
77
  when 'http'
76
78
  return unless @options[:enforce_https]
79
+
77
80
  add_issue("#{link.href} is not an HTTPS link", line: line, content: content)
78
81
  end
79
82
  end
@@ -103,9 +106,7 @@ class LinkCheck < ::HTMLProofer::Check
103
106
  add_issue("trying to find hash of #{link.href}, but #{link.absolute_path} does not exist", line: line, content: content)
104
107
  else
105
108
  target_html = create_nokogiri link.absolute_path
106
- unless hash_check target_html, link.hash
107
- add_issue("linking to #{link.href}, but #{link.hash} does not exist", line: line, content: content)
108
- end
109
+ add_issue("linking to #{link.href}, but #{link.hash} does not exist", line: line, content: content) unless hash_check target_html, link.hash
109
110
  end
110
111
  end
111
112
 
@@ -129,10 +130,13 @@ class LinkCheck < ::HTMLProofer::Check
129
130
  html.xpath(*xpaths)
130
131
  end
131
132
 
132
- IGNORABE_REL = %(canonical alternate next prev previous icon manifest apple-touch-icon)
133
+ # Whitelist for affected elements from Subresource Integrity specification
134
+ # https://w3c.github.io/webappsec-subresource-integrity/#link-element-for-stylesheets
135
+ SRI_REL_TYPES = %(stylesheet)
133
136
 
134
137
  def check_sri(line, content)
135
- return if IGNORABE_REL.include?(@link.rel)
138
+ return unless SRI_REL_TYPES.include?(@link.rel)
139
+
136
140
  if !defined?(@link.integrity) && !defined?(@link.crossorigin)
137
141
  add_issue("SRI and CORS not provided in: #{@link.src}", line: line, content: content)
138
142
  elsif !defined?(@link.integrity)
@@ -1,4 +1,3 @@
1
- # encoding: utf-8
2
1
  # frozen_string_literal: true
3
2
 
4
3
  class OpenGraphElement < ::HTMLProofer::Element
@@ -51,7 +51,8 @@ module HTMLProofer
51
51
  VALIDATION_DEFAULTS = {
52
52
  report_script_embeds: false,
53
53
  report_missing_names: false,
54
- report_invalid_tags: false
54
+ report_invalid_tags: false,
55
+ report_missing_doctype: false
55
56
  }.freeze
56
57
 
57
58
  CACHE_DEFAULTS = {}.freeze
@@ -65,19 +66,19 @@ module HTMLProofer
65
66
  end
66
67
 
67
68
  def self.parse_json_option(option_name, config)
68
- raise ArgumentError.new('Must provide an option name in string format.') unless option_name.is_a?(String)
69
- raise ArgumentError.new('Must provide an option name in string format.') unless !option_name.strip.empty?
69
+ raise ArgumentError, 'Must provide an option name in string format.' unless option_name.is_a?(String)
70
+ raise ArgumentError, 'Must provide an option name in string format.' if option_name.strip.empty?
70
71
 
71
72
  return {} if config.nil?
72
73
 
73
- raise ArgumentError.new('Must provide a JSON configuration in string format.') unless config.is_a?(String)
74
+ raise ArgumentError, 'Must provide a JSON configuration in string format.' unless config.is_a?(String)
74
75
 
75
76
  return {} if config.strip.empty?
76
77
 
77
78
  begin
78
79
  JSON.parse(config)
79
- rescue
80
- raise ArgumentError.new("Option '" + option_name + "' did not contain valid JSON.")
80
+ rescue StandardError
81
+ raise ArgumentError, "Option '" + option_name + "' did not contain valid JSON."
81
82
  end
82
83
  end
83
84
  end
@@ -18,7 +18,7 @@ module HTMLProofer
18
18
  instance_variable_set("@#{name}", value.value)
19
19
  end
20
20
 
21
- @aria_hidden = (defined?(@aria_hidden) && @aria_hidden == 'true') ? true : false
21
+ @aria_hidden = defined?(@aria_hidden) && @aria_hidden == 'true' ? true : false
22
22
 
23
23
  @data_proofer_ignore = defined?(@data_proofer_ignore)
24
24
 
@@ -56,9 +56,11 @@ module HTMLProofer
56
56
 
57
57
  def url
58
58
  return @url if defined?(@url)
59
+
59
60
  @url = (@src || @srcset || @href || '').delete("\u200b").strip
60
61
  @url = Addressable::URI.join(base.attr('href') || '', url).to_s if base
61
62
  return @url if @check.options[:url_swap].empty?
63
+
62
64
  @url = swap(@url, @check.options[:url_swap])
63
65
  end
64
66
 
@@ -77,11 +79,11 @@ module HTMLProofer
77
79
  end
78
80
 
79
81
  def hash
80
- parts.fragment unless parts.nil?
82
+ parts&.fragment
81
83
  end
82
84
 
83
85
  def scheme
84
- parts.scheme unless parts.nil?
86
+ parts&.scheme
85
87
  end
86
88
 
87
89
  # path is to an external server
@@ -137,9 +139,22 @@ module HTMLProofer
137
139
  !internal?
138
140
  end
139
141
 
140
- # path is an anchor or a query
141
142
  def internal?
142
- hash_link || param_link || slash_link
143
+ relative_link? || internal_absolute_link?
144
+ end
145
+
146
+ def internal_absolute_link?
147
+ url.start_with?('/')
148
+ end
149
+
150
+ def relative_link?
151
+ return false if remote?
152
+
153
+ hash_link || param_link || url.start_with?('.') || url =~ /^\S/
154
+ end
155
+
156
+ def link_points_to_same_page?
157
+ hash_link || param_link
143
158
  end
144
159
 
145
160
  def hash_link
@@ -150,21 +165,20 @@ module HTMLProofer
150
165
  url.start_with?('?')
151
166
  end
152
167
 
153
- def slash_link
154
- url.start_with?('|')
155
- end
156
-
157
168
  def file_path
158
- return if path.nil?
169
+ return if path.nil? || path.empty?
159
170
 
160
171
  path_dot_ext = ''
161
172
 
162
- if @check.options[:assume_extension]
163
- path_dot_ext = path + @check.options[:extension]
164
- end
173
+ path_dot_ext = path + @check.options[:extension] if @check.options[:assume_extension]
165
174
 
166
175
  if path =~ %r{^/} # path relative to root
167
- base = File.directory?(@check.src) ? @check.src : File.dirname(@check.src)
176
+ if File.directory?(@check.src)
177
+ base = @check.src
178
+ else
179
+ root_dir = @check.options[:root_dir]
180
+ base = root_dir || File.dirname(@check.src)
181
+ end
168
182
  elsif File.exist?(File.expand_path(path, @check.src)) || File.exist?(File.expand_path(path_dot_ext, @check.src)) # relative links, path is a file
169
183
  base = File.dirname @check.path
170
184
  elsif File.exist?(File.join(File.dirname(@check.path), path)) || File.exist?(File.join(File.dirname(@check.path), path_dot_ext)) # relative links in nested dir, path is a file
@@ -174,7 +188,6 @@ module HTMLProofer
174
188
  end
175
189
 
176
190
  file = File.join base, path
177
-
178
191
  if @check.options[:assume_extension] && File.file?("#{file}#{@check.options[:extension]}")
179
192
  file = "#{file}#{@check.options[:extension]}"
180
193
  elsif File.directory?(file) && !unslashed_directory?(file) # implicit index support
@@ -187,6 +200,7 @@ module HTMLProofer
187
200
  # checks if a file exists relative to the current pwd
188
201
  def exists?
189
202
  return @checked_paths[absolute_path] if @checked_paths.key? absolute_path
203
+
190
204
  @checked_paths[absolute_path] = File.exist? absolute_path
191
205
  end
192
206
 
@@ -220,12 +234,14 @@ module HTMLProofer
220
234
  end
221
235
 
222
236
  def html
223
- # If link is on the same page, then URL is on the current page so can use the same HTML as for current page
224
- if (hash_link || param_link) && internal?
237
+ # If link is on the same page, then URL is on the current page. use the same HTML as for current page
238
+ if link_points_to_same_page?
225
239
  @html
226
- elsif slash_link && internal?
240
+ elsif internal?
227
241
  # link on another page, e.g. /about#Team - need to get HTML from the other page
228
242
  create_nokogiri(absolute_path)
243
+ else
244
+ raise NotImplementedError, 'HTMLProofer should not have gotten here. Please report this as a bug.'
229
245
  end
230
246
  end
231
247
  end
@@ -56,9 +56,7 @@ module HTMLProofer
56
56
  @logger.log :error, " * #{issue}"
57
57
  else
58
58
  msg = " * #{issue.send(second_report)}#{issue.line}"
59
- if !issue.content.nil? && !issue.content.empty?
60
- msg = "#{msg}\n #{issue.content}"
61
- end
59
+ msg = "#{msg}\n #{issue.content}" if !issue.content.nil? && !issue.content.empty?
62
60
  @logger.log(:error, msg)
63
61
  end
64
62
  end
@@ -2,6 +2,7 @@
2
2
 
3
3
  module HTMLProofer
4
4
  class Middleware
5
+ include HTMLProofer::Utils
5
6
 
6
7
  class InvalidHtmlError < StandardError
7
8
  def initialize(failures)
@@ -9,18 +10,18 @@ module HTMLProofer
9
10
  end
10
11
 
11
12
  def message
12
- "HTML Validation errors (skip by adding `?proofer-ignore` to URL): \n#{@failures.join("\n")}"
13
+ "HTML Validation errors (skip by adding `?proofer-ignore` to URL): \n#{@failures.join("\n")}"
13
14
  end
14
15
  end
15
16
 
16
17
  def self.options
17
18
  @options ||= {
18
- type: :file,
19
- allow_missing_href: true, # Permitted in html5
20
- allow_hash_href: true,
19
+ type: :file,
20
+ allow_missing_href: true, # Permitted in html5
21
+ allow_hash_href: true,
21
22
  check_external_hash: true,
22
- check_html: true,
23
- url_ignore: [/.*/], # Don't try to check local files exist
23
+ check_html: true,
24
+ url_ignore: [/.*/] # Don't try to check local files exist
24
25
  }
25
26
  end
26
27
 
@@ -46,20 +47,21 @@ module HTMLProofer
46
47
  '<BR',
47
48
  '<P',
48
49
  '<!--'
49
- ]
50
+ ].freeze
50
51
 
51
52
  def call(env)
52
53
  result = @app.call(env)
53
54
  return result if env['REQUEST_METHOD'] != 'GET'
54
55
  return result if env['QUERY_STRING'] =~ /proofer-ignore/
55
56
  return result if result.first != 200
57
+
56
58
  body = []
57
59
  result.last.each { |e| body << e }
58
60
 
59
61
  body = body.join('')
60
62
  begin
61
63
  html = body.lstrip
62
- rescue
64
+ rescue StandardError
63
65
  return result # Invalid encoding; it's not gonna be html.
64
66
  end
65
67
  if HTML_SIGNATURE.any? { |sig| html.upcase.start_with? sig }
@@ -67,12 +69,10 @@ module HTMLProofer
67
69
  'response',
68
70
  Middleware.options
69
71
  ).check_parsed(
70
- Nokogiri::HTML(Utils.clean_content(html)), 'response'
72
+ Nokogiri::HTML5(html, max_errors: -1), 'response'
71
73
  )
72
74
 
73
- if parsed[:failures].length > 0
74
- raise InvalidHtmlError.new(parsed[:failures])
75
- end
75
+ raise InvalidHtmlError, parsed[:failures] unless parsed[:failures].empty?
76
76
  end
77
77
  result
78
78
  end
@@ -4,7 +4,7 @@ module HTMLProofer
4
4
  class Runner
5
5
  include HTMLProofer::Utils
6
6
 
7
- attr_reader :options, :external_urls
7
+ attr_reader :options, :external_urls, :failures
8
8
 
9
9
  def initialize(src, opts = {})
10
10
  @src = src
@@ -103,9 +103,7 @@ module HTMLProofer
103
103
  check = Object.const_get(klass).new(src, path, html, @options)
104
104
  check.run
105
105
  external_urls = check.external_urls
106
- if @options[:url_swap]
107
- external_urls = Hash[check.external_urls.map { |url, file| [swap(url, @options[:url_swap]), file] }]
108
- end
106
+ external_urls = Hash[check.external_urls.map { |url, file| [swap(url, @options[:url_swap]), file] }] if @options[:url_swap]
109
107
  result[:external_urls].merge!(external_urls)
110
108
  result[:failures].concat(check.issues)
111
109
  end
@@ -148,6 +146,7 @@ module HTMLProofer
148
146
 
149
147
  def checks
150
148
  return @checks if defined?(@checks) && !@checks.nil?
149
+
151
150
  @checks = HTMLProofer::Check.subchecks.map(&:name)
152
151
  @checks.delete('FaviconCheck') unless @options[:check_favicon]
153
152
  @checks.delete('HtmlCheck') unless @options[:check_html]
@@ -159,6 +158,7 @@ module HTMLProofer
159
158
  def failed_tests
160
159
  result = []
161
160
  return result if @failures.empty?
161
+
162
162
  @failures.each { |f| result << f.to_s }
163
163
  result
164
164
  end
@@ -36,6 +36,7 @@ module HTMLProofer
36
36
 
37
37
  def remove_query_values
38
38
  return nil if @external_urls.nil?
39
+
39
40
  paths_with_queries = {}
40
41
  iterable_external_urls = @external_urls.dup
41
42
  @external_urls.each_key do |url|
@@ -46,6 +47,7 @@ module HTMLProofer
46
47
  nil
47
48
  end
48
49
  next if uri.nil? || uri.query.nil?
50
+
49
51
  iterable_external_urls.delete(url) unless new_url_query_values?(uri, paths_with_queries)
50
52
  end
51
53
  iterable_external_urls
@@ -108,9 +110,9 @@ module HTMLProofer
108
110
  external_urls.each_pair do |url, filenames|
109
111
  url = begin
110
112
  clean_url(url)
111
- rescue URI::Error, Addressable::URI::InvalidURIError
112
- add_external_issue(filenames, "#{url} is an invalid URL")
113
- next
113
+ rescue URI::Error, Addressable::URI::InvalidURIError
114
+ add_external_issue(filenames, "#{url} is an invalid URL")
115
+ next
114
116
  end
115
117
 
116
118
  method = if hash?(url) && @options[:check_external_hash]
@@ -144,22 +146,20 @@ module HTMLProofer
144
146
  href = response.request.base_url.to_s
145
147
  method = response.request.options[:method]
146
148
  response_code = response.code
147
- response.body.gsub!("\x00", '')
149
+ response.body.delete!("\x00")
148
150
 
149
- if filenames.nil?
150
- debug_msg = "Received a #{response_code} for #{href}"
151
- else
152
- debug_msg = "Received a #{response_code} for #{href} in #{filenames.join(' ')}"
153
- end
151
+ debug_msg = if filenames.nil?
152
+ "Received a #{response_code} for #{href}"
153
+ else
154
+ "Received a #{response_code} for #{href} in #{filenames.join(' ')}"
155
+ end
154
156
 
155
157
  @logger.log :debug, debug_msg
156
158
 
157
159
  return if @options[:http_status_ignore].include?(response_code)
158
160
 
159
161
  if response_code.between?(200, 299)
160
- unless check_hash_in_2xx_response(href, effective_url, response, filenames)
161
- @cache.add(href, filenames, response_code)
162
- end
162
+ @cache.add(href, filenames, response_code) unless check_hash_in_2xx_response(href, effective_url, response, filenames)
163
163
  elsif response.timed_out?
164
164
  handle_timeout(href, filenames, response_code)
165
165
  elsif response_code.zero?
@@ -168,6 +168,7 @@ module HTMLProofer
168
168
  queue_request(:get, href, filenames)
169
169
  else
170
170
  return if @options[:only_4xx] && !response_code.between?(400, 499)
171
+
171
172
  # Received a non-successful http response.
172
173
  msg = "External link #{href} failed: #{response_code} #{response.return_message}"
173
174
  add_external_issue(filenames, msg, response_code)
@@ -191,9 +192,7 @@ module HTMLProofer
191
192
  xpath << [%(//*[@name="user-content-#{hash}"]|//*[@id="user-content-#{hash}"])]
192
193
  # when linking to a file on GitHub, like #L12-L34, only the first "L" portion
193
194
  # will be identified as a linkable portion
194
- if hash =~ /\A(L\d)+/
195
- xpath << [%(//td[@id="#{Regexp.last_match[1]}"])]
196
- end
195
+ xpath << [%(//td[@id="#{Regexp.last_match[1]}"])] if hash =~ /\A(L\d)+/
197
196
  end
198
197
 
199
198
  return unless body_doc.xpath(xpath.join('|')).empty?
@@ -208,6 +207,7 @@ module HTMLProofer
208
207
  msg = "External link #{href} failed: got a time out (response code #{response_code})"
209
208
  @cache.add(href, filenames, 0, msg)
210
209
  return if @options[:only_4xx]
210
+
211
211
  add_external_issue(filenames, msg, response_code)
212
212
  end
213
213
 
@@ -218,6 +218,7 @@ module HTMLProofer
218
218
  Either way, the return message (if any) from the server is: #{return_message}"
219
219
  @cache.add(href, filenames, 0, msg)
220
220
  return if @options[:only_4xx]
221
+
221
222
  add_external_issue(filenames, msg, response_code)
222
223
  end
223
224
 
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'nokogiri'
3
+ require 'nokogumbo'
4
4
 
5
5
  module HTMLProofer
6
6
  module Utils
@@ -15,9 +15,8 @@ module HTMLProofer
15
15
  path
16
16
  end
17
17
 
18
- Nokogiri::HTML(clean_content(content))
18
+ Nokogiri::HTML5(content)
19
19
  end
20
- module_function :create_nokogiri
21
20
 
22
21
  def swap(href, replacement)
23
22
  replacement.each do |link, replace|
@@ -25,16 +24,5 @@ module HTMLProofer
25
24
  end
26
25
  href
27
26
  end
28
- module_function :swap
29
-
30
- # address a problem with Nokogiri's parsing URL entities
31
- # problem from http://git.io/vBYU1
32
- # solution from http://git.io/vBYUi
33
- def clean_content(string)
34
- string.gsub(%r{(?:https?:)?//([^>]+)}i) do |url|
35
- url.gsub(/&(?!amp;)/, '&amp;')
36
- end
37
- end
38
- module_function :clean_content
39
27
  end
40
28
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HTMLProofer
4
- VERSION = '3.12.1'.freeze
4
+ VERSION = '3.15.0'
5
5
  end
metadata CHANGED
@@ -1,59 +1,59 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html-proofer
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.12.1
4
+ version: 3.15.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Garen Torikian
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-09-07 00:00:00.000000000 Z
11
+ date: 2019-12-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: mercenary
14
+ name: addressable
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '0.3'
19
+ version: '2.3'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '0.3'
26
+ version: '2.3'
27
27
  - !ruby/object:Gem::Dependency
28
- name: nokogiri
28
+ name: mercenary
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '1.10'
33
+ version: '0.3'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '1.10'
40
+ version: '0.3'
41
41
  - !ruby/object:Gem::Dependency
42
- name: rainbow
42
+ name: nokogumbo
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '3.0'
47
+ version: '2.0'
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '3.0'
54
+ version: '2.0'
55
55
  - !ruby/object:Gem::Dependency
56
- name: typhoeus
56
+ name: parallel
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - "~>"
@@ -67,21 +67,21 @@ dependencies:
67
67
  - !ruby/object:Gem::Version
68
68
  version: '1.3'
69
69
  - !ruby/object:Gem::Dependency
70
- name: yell
70
+ name: rainbow
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
73
  - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: '2.0'
75
+ version: '3.0'
76
76
  type: :runtime
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: '2.0'
82
+ version: '3.0'
83
83
  - !ruby/object:Gem::Dependency
84
- name: parallel
84
+ name: typhoeus
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
87
  - - "~>"
@@ -95,21 +95,21 @@ dependencies:
95
95
  - !ruby/object:Gem::Version
96
96
  version: '1.3'
97
97
  - !ruby/object:Gem::Dependency
98
- name: addressable
98
+ name: yell
99
99
  requirement: !ruby/object:Gem::Requirement
100
100
  requirements:
101
101
  - - "~>"
102
102
  - !ruby/object:Gem::Version
103
- version: '2.3'
103
+ version: '2.0'
104
104
  type: :runtime
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
- version: '2.3'
110
+ version: '2.0'
111
111
  - !ruby/object:Gem::Dependency
112
- name: redcarpet
112
+ name: awesome_print
113
113
  requirement: !ruby/object:Gem::Requirement
114
114
  requirements:
115
115
  - - ">="
@@ -123,7 +123,7 @@ dependencies:
123
123
  - !ruby/object:Gem::Version
124
124
  version: '0'
125
125
  - !ruby/object:Gem::Dependency
126
- name: rubocop
126
+ name: codecov
127
127
  requirement: !ruby/object:Gem::Requirement
128
128
  requirements:
129
129
  - - ">="
@@ -137,7 +137,7 @@ dependencies:
137
137
  - !ruby/object:Gem::Version
138
138
  version: '0'
139
139
  - !ruby/object:Gem::Dependency
140
- name: rubocop-standard
140
+ name: pry-byebug
141
141
  requirement: !ruby/object:Gem::Requirement
142
142
  requirements:
143
143
  - - ">="
@@ -151,7 +151,7 @@ dependencies:
151
151
  - !ruby/object:Gem::Version
152
152
  version: '0'
153
153
  - !ruby/object:Gem::Dependency
154
- name: rubocop-performance
154
+ name: rake
155
155
  requirement: !ruby/object:Gem::Requirement
156
156
  requirements:
157
157
  - - ">="
@@ -165,7 +165,7 @@ dependencies:
165
165
  - !ruby/object:Gem::Version
166
166
  version: '0'
167
167
  - !ruby/object:Gem::Dependency
168
- name: codecov
168
+ name: redcarpet
169
169
  requirement: !ruby/object:Gem::Requirement
170
170
  requirements:
171
171
  - - ">="
@@ -193,7 +193,7 @@ dependencies:
193
193
  - !ruby/object:Gem::Version
194
194
  version: '3.1'
195
195
  - !ruby/object:Gem::Dependency
196
- name: rake
196
+ name: rubocop
197
197
  requirement: !ruby/object:Gem::Requirement
198
198
  requirements:
199
199
  - - ">="
@@ -207,7 +207,7 @@ dependencies:
207
207
  - !ruby/object:Gem::Version
208
208
  version: '0'
209
209
  - !ruby/object:Gem::Dependency
210
- name: pry-byebug
210
+ name: rubocop-performance
211
211
  requirement: !ruby/object:Gem::Requirement
212
212
  requirements:
213
213
  - - ">="
@@ -221,7 +221,7 @@ dependencies:
221
221
  - !ruby/object:Gem::Version
222
222
  version: '0'
223
223
  - !ruby/object:Gem::Dependency
224
- name: awesome_print
224
+ name: rubocop-standard
225
225
  requirement: !ruby/object:Gem::Requirement
226
226
  requirements:
227
227
  - - ">="
@@ -235,33 +235,33 @@ dependencies:
235
235
  - !ruby/object:Gem::Version
236
236
  version: '0'
237
237
  - !ruby/object:Gem::Dependency
238
- name: vcr
238
+ name: timecop
239
239
  requirement: !ruby/object:Gem::Requirement
240
240
  requirements:
241
241
  - - "~>"
242
242
  - !ruby/object:Gem::Version
243
- version: '2.9'
243
+ version: '0.8'
244
244
  type: :development
245
245
  prerelease: false
246
246
  version_requirements: !ruby/object:Gem::Requirement
247
247
  requirements:
248
248
  - - "~>"
249
249
  - !ruby/object:Gem::Version
250
- version: '2.9'
250
+ version: '0.8'
251
251
  - !ruby/object:Gem::Dependency
252
- name: timecop
252
+ name: vcr
253
253
  requirement: !ruby/object:Gem::Requirement
254
254
  requirements:
255
255
  - - "~>"
256
256
  - !ruby/object:Gem::Version
257
- version: '0.8'
257
+ version: '2.9'
258
258
  type: :development
259
259
  prerelease: false
260
260
  version_requirements: !ruby/object:Gem::Requirement
261
261
  requirements:
262
262
  - - "~>"
263
263
  - !ruby/object:Gem::Version
264
- version: '0.8'
264
+ version: '2.9'
265
265
  description: Test your rendered HTML files to make sure they're accurate.
266
266
  email:
267
267
  - gjtorikian@gmail.com