html-proofer 3.12.1 → 3.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 04d2b18950f5d8c6b2bcfadb24dec58d50d7c2fd8997977506a79a22d24aeeef
4
- data.tar.gz: f33be2af98f5a199b563eb78aef31518c94b304c1fd120985dc3ee2febee233f
3
+ metadata.gz: 722b9c6547474c7a4654e46a3c7af273eaa8a77b7538592ab0df4b42dd787ea2
4
+ data.tar.gz: 240979d4d2d62dbdd76ab14d868c8c0ecf420e10c93ae1cdc5c8182106fef698
5
5
  SHA512:
6
- metadata.gz: 1e9fd7e64a8c76c74351128fd63a25c1c56ce11351fc8cd4ce0e91ee793586ab71609ae247d3fa66615b089546887a330cb2eeec9550888ebbf9f8abb59b628d
7
- data.tar.gz: bed4a30375077f6bfd35900f75976e9f960deb316421e113b65b33ad7d61f9084be427f5241c3700aa1d8fbd717d4104389855db71a43f60f846af58c3a6694b
6
+ metadata.gz: 483c149a8b502277cb3d5d02f3aa4e009c27b1ef1a0c9f775b27410515a6654a013f434d018933c8991227d3826648902f0831613efddf3523e031a49dd586f8
7
+ data.tar.gz: 823bc37d94226a5dfed9d0db49ae6b8803ff74a719e8d3332b08d934bdaaa2c83374b41b5591fd6a714b6f8bca41a65bc6fa0ea099f6eadb5b17176ed5f61c56
@@ -20,13 +20,13 @@ Mercenary.program(:htmlproofer) do |p|
20
20
  p.option 'as_links', '--as-links', 'Assumes that `PATH` is a comma-separated array of links to check.'
21
21
  p.option 'alt_ignore', '--alt-ignore image1,[image2,...]', Array, 'A comma-separated list of Strings or RegExps containing `img`s whose missing `alt` tags are safe to ignore'
22
22
  p.option 'assume_extension', '--assume-extension', 'Automatically add extension (e.g. `.html`) to file paths, to allow extensionless URLs (as supported by Jekyll 3 and GitHub Pages) (default: `false`).'
23
- p.option 'checks_to_ignore', '--checks-to-ignore check1,[check2,...]', Array, ' An array of Strings indicating which checks you\'d like to not perform.'
23
+ p.option 'checks_to_ignore', '--checks-to-ignore check1,[check2,...]', Array, 'A comma-separated list of Strings indicating which checks you do not want to run (default: `[]`)'
24
24
  p.option 'check_external_hash', '--check-external-hash', 'Checks whether external hashes exist (even if the webpage exists). This slows the checker down (default: `false`).'
25
25
  p.option 'check_favicon', '--check-favicon', 'Enables the favicon checker (default: `false`).'
26
- p.option 'check_html', '--check-html', 'Enables HTML validation errors from Nokogiri (default: `false`).'
26
+ p.option 'check_html', '--check-html', 'Enables HTML validation errors from Nokogumbo (default: `false`).'
27
27
  p.option 'check_img_http', '--check-img-http', 'Fails an image if it\'s marked as `http` (default: `false`).'
28
28
  p.option 'check_opengraph', '--check-opengraph', 'Enables the Open Graph checker (default: `false`).'
29
- p.option 'check_sri', '--check-sri', 'Check that `<link>` and `<script>` external resources do use SRI (default: `false`).'
29
+ p.option 'check_sri', '--check-sri', 'Check that `<link>` and `<script>` external resources use SRI (default: `false`).'
30
30
  p.option 'directory_index_file', '--directory-index-file <filename>', String, 'Sets the file to look for when a link refers to a directory. (default: `index.html`)'
31
31
  p.option 'disable_external', '--disable-external', 'If `true`, does not run the external link checker, which can take a lot of time (default: `false`)'
32
32
  p.option 'empty_alt_ignore', '--empty-alt-ignore', 'If `true`, ignores images with empty alt tags'
@@ -37,9 +37,10 @@ Mercenary.program(:htmlproofer) do |p|
37
37
  p.option 'file_ignore', '--file-ignore file1,[file2,...]', Array, 'A comma-separated list of Strings or RegExps containing file paths that are safe to ignore'
38
38
  p.option 'http_status_ignore', '--http-status-ignore 123,[xxx, ...]', Array, 'A comma-separated list of numbers representing status codes to ignore.'
39
39
  p.option 'internal_domains', '--internal-domains domain1,[domain2,...]', Array, 'A comma-separated list of Strings containing domains that will be treated as internal urls.'
40
- p.option 'report_invalid_tags', '--report-invalid-tags', 'Ignore `check_html` errors associated with unknown markup (default: `false`)'
41
- p.option 'report_missing_names', '--report-missing-names', 'Ignore `check_html` errors associated with missing entities (default: `false`)'
42
- p.option 'report_script_embeds', '--report-script-embeds', 'Ignore `check_html` errors associated with `script`s (default: `false`)'
40
+ p.option 'report_invalid_tags', '--report-invalid-tags', 'When `check_html` is enabled, HTML markup that is unknown to Nokogumbo are reported as errors (default: `false`)'
41
+ p.option 'report_missing_names', '--report-missing-names', 'When `check_html` is enabled, HTML markup that are missing entity names are reported as errors (default: `false`)'
42
+ p.option 'report_script_embeds', '--report-script-embeds', 'When `check_html` is enabled, `script` tags containing markup are reported as errors (default: `false`)'
43
+ p.option 'report_missing_doctype', '--report-missing-doctype', 'When `check_html` is enabled, HTML markup with missing or out-of-order `DOCTYPE` are reported as errors (default: `false`)'
43
44
  p.option 'log_level', '--log-level <level>', String, 'Sets the logging level, as determined by Yell. One of `:debug`, `:info`, `:warn`, `:error`, or `:fatal`. (default: `:info`)'
44
45
  p.option 'only_4xx', '--only-4xx', 'Only reports errors for links that fall within the 4xx status code range'
45
46
  p.option 'storage_dir', '--storage-dir PATH', String, 'Directory where to store the cache log (default: "tmp/.htmlproofer")'
@@ -47,6 +48,7 @@ Mercenary.program(:htmlproofer) do |p|
47
48
  p.option 'typhoeus_config', '--typhoeus-config CONFIG', String, 'JSON-formatted string of Typhoeus config. Will override the html-proofer defaults.'
48
49
  p.option 'url_ignore', '--url-ignore link1,[link2,...]', Array, 'A comma-separated list of Strings or RegExps containing URLs that are safe to ignore. It affects all HTML attributes. Note that non-HTTP(S) URIs are always ignored'
49
50
  p.option 'url_swap', '--url-swap re:string,[re:string,...]', Array, 'A comma-separated list containing key-value pairs of `RegExp => String`. It transforms URLs that match `RegExp` into `String` via `gsub`. The escape sequences `\\:` should be used to produce literal `:`s.'
51
+ p.option 'root_dir', '--root-folder PATH', String, 'The absolute path to the directory serving your html-files. Used when running html-proofer on a file, rather than a directory.'
50
52
 
51
53
  p.action do |args, opts|
52
54
  args = ['.'] if args.empty?
@@ -56,9 +58,7 @@ Mercenary.program(:htmlproofer) do |p|
56
58
 
57
59
  # prepare everything to go to proofer
58
60
  p.options.reject { |o| opts[o.config_key].nil? }.each do |option|
59
- if opts[option.config_key].is_a?(Array)
60
- opts[option.config_key] = opts[option.config_key].map { |i| HTMLProofer::Configuration.to_regex?(i) }
61
- end
61
+ opts[option.config_key] = opts[option.config_key].map { |i| HTMLProofer::Configuration.to_regex?(i) } if opts[option.config_key].is_a?(Array)
62
62
  options[option.config_key.to_sym] = opts[option.config_key]
63
63
  end
64
64
 
@@ -81,10 +81,9 @@ Mercenary.program(:htmlproofer) do |p|
81
81
  options[:validation][:report_script_embeds] = opts['report_script_embeds'] unless opts['report_script_embeds'].nil?
82
82
  options[:validation][:report_missing_names] = opts['report_missing_names'] unless opts['report_missing_names'].nil?
83
83
  options[:validation][:report_invalid_tags] = opts['report_invalid_tags'] unless opts['report_invalid_tags'].nil?
84
+ options[:validation][:report_missing_doctype] = opts['report_missing_doctype'] unless opts['report_missing_doctype'].nil?
84
85
 
85
- unless opts['typhoeus_config'].nil?
86
- options[:typhoeus] = HTMLProofer::Configuration.parse_json_option('typhoeus_config', opts['typhoeus_config'])
87
- end
86
+ options[:typhoeus] = HTMLProofer::Configuration.parse_json_option('typhoeus_config', opts['typhoeus_config']) unless opts['typhoeus_config'].nil?
88
87
 
89
88
  unless opts['timeframe'].nil?
90
89
  options[:cache] ||= {}
@@ -17,38 +17,38 @@ require 'fileutils'
17
17
  begin
18
18
  require 'awesome_print'
19
19
  require 'pry-byebug'
20
- rescue LoadError; end
20
+ rescue LoadError; end # rubocop:disable Lint/SuppressedException
21
21
  module HTMLProofer
22
- def check_file(file, options = {})
22
+ def self.check_file(file, options = {})
23
23
  raise ArgumentError unless file.is_a?(String)
24
24
  raise ArgumentError, "#{file} does not exist" unless File.exist?(file)
25
+
25
26
  options[:type] = :file
26
27
  HTMLProofer::Runner.new(file, options)
27
28
  end
28
- module_function :check_file
29
29
 
30
- def check_directory(directory, options = {})
30
+ def self.check_directory(directory, options = {})
31
31
  raise ArgumentError unless directory.is_a?(String)
32
32
  raise ArgumentError, "#{directory} does not exist" unless Dir.exist?(directory)
33
+
33
34
  options[:type] = :directory
34
35
  HTMLProofer::Runner.new([directory], options)
35
36
  end
36
- module_function :check_directory
37
37
 
38
- def check_directories(directories, options = {})
38
+ def self.check_directories(directories, options = {})
39
39
  raise ArgumentError unless directories.is_a?(Array)
40
+
40
41
  options[:type] = :directory
41
42
  directories.each do |directory|
42
43
  raise ArgumentError, "#{directory} does not exist" unless Dir.exist?(directory)
43
44
  end
44
45
  HTMLProofer::Runner.new(directories, options)
45
46
  end
46
- module_function :check_directories
47
47
 
48
- def check_links(links, options = {})
48
+ def self.check_links(links, options = {})
49
49
  raise ArgumentError unless links.is_a?(Array)
50
+
50
51
  options[:type] = :links
51
52
  HTMLProofer::Runner.new(links, options)
52
53
  end
53
- module_function :check_links
54
54
  end
@@ -9,7 +9,7 @@ module HTMLProofer
9
9
  include HTMLProofer::Utils
10
10
 
11
11
  DEFAULT_STORAGE_DIR = File.join('tmp', '.htmlproofer')
12
- DEFAULT_CACHE_FILE_NAME = 'cache.log'.freeze
12
+ DEFAULT_CACHE_FILE_NAME = 'cache.log'
13
13
 
14
14
  attr_reader :exists, :cache_log, :storage_dir, :cache_file
15
15
 
@@ -120,9 +120,8 @@ module HTMLProofer
120
120
  @cache_log.each_pair do |url, cache|
121
121
  if within_timeframe?(cache['time'])
122
122
  next if cache['message'].empty? # these were successes to skip
123
- urls_to_check[url] = cache['filenames'] # these are failures to retry
124
123
  else
125
- urls_to_check[url] = cache['filenames'] # pass or fail, recheck expired links
124
+ urls_to_check[url] = cache['filenames'] # recheck expired links
126
125
  end
127
126
  end
128
127
  urls_to_check
@@ -142,23 +141,16 @@ module HTMLProofer
142
141
  end
143
142
 
144
143
  def setup_cache!(options)
145
- @storage_dir = if options[:storage_dir]
146
- options[:storage_dir]
147
- else
148
- DEFAULT_STORAGE_DIR
149
- end
144
+ @storage_dir = options[:storage_dir] || DEFAULT_STORAGE_DIR
150
145
 
151
146
  FileUtils.mkdir_p(storage_dir) unless Dir.exist?(storage_dir)
152
147
 
153
- cache_file_name = if options[:cache_file]
154
- options[:cache_file]
155
- else
156
- DEFAULT_CACHE_FILE_NAME
157
- end
148
+ cache_file_name = options[:cache_file] || DEFAULT_CACHE_FILE_NAME
158
149
 
159
150
  @cache_file = File.join(storage_dir, cache_file_name)
160
151
 
161
152
  return unless File.exist?(cache_file)
153
+
162
154
  contents = File.read(cache_file)
163
155
  @cache_log = contents.empty? ? {} : JSON.parse(contents)
164
156
  end
@@ -174,7 +166,7 @@ module HTMLProofer
174
166
  when :days
175
167
  @cache_datetime - measurement
176
168
  when :hours
177
- @cache_datetime - Rational(measurement/24.0)
169
+ @cache_datetime - Rational(measurement / 24.0)
178
170
  end.to_time
179
171
  end
180
172
  end
@@ -29,6 +29,7 @@ module HTMLProofer
29
29
 
30
30
  def add_to_external_urls(url)
31
31
  return if @external_urls[url]
32
+
32
33
  add_path_for_url(url)
33
34
  end
34
35
 
@@ -45,6 +46,7 @@ module HTMLProofer
45
46
 
46
47
  ObjectSpace.each_object(Class) do |c|
47
48
  next unless c.superclass == self
49
+
48
50
  classes << c
49
51
  end
50
52
 
@@ -6,22 +6,24 @@ class FaviconCheck < ::HTMLProofer::Check
6
6
  @html.xpath('//link[not(ancestor::pre or ancestor::code)]').each do |node|
7
7
  favicon = create_element(node)
8
8
  next if favicon.ignore?
9
+
9
10
  found = true if favicon.rel.split(' ').last.eql? 'icon'
10
11
  break if found
11
12
  end
12
13
 
13
14
  return if found
14
15
 
15
- return if is_immediate_redirect?
16
+ return if immediate_redirect?
16
17
 
17
18
  add_issue('no favicon specified')
18
19
  end
19
20
 
20
21
  private
21
22
 
22
- def is_immediate_redirect?
23
- # allow any instant-redirect meta tag
24
- @html.xpath("//meta[@http-equiv='refresh']").attribute('content').value.start_with? '0;' rescue false
23
+ # allow any instant-redirect meta tag
24
+ def immediate_redirect?
25
+ @html.xpath("//meta[@http-equiv='refresh']").attribute('content').value.start_with? '0;'
26
+ rescue StandardError
27
+ false
25
28
  end
26
-
27
29
  end
@@ -1,28 +1,31 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class HtmlCheck < ::HTMLProofer::Check
4
- SCRIPT_EMBEDS_MSG = /Element script embeds close tag/
5
- INVALID_TAG_MSG = /Tag ([\w\-:]+) invalid/
6
- INVALID_PREFIX = /Namespace prefix/
7
- PARSE_ENTITY_REF = /htmlParseEntityRef: no name/
4
+ # tags embedded in scripts are used in templating languages: http://git.io/vOovv
5
+ SCRIPT_EMBEDS_MSG = /Element script embeds close tag/.freeze
6
+ INVALID_TAG_MSG = /Tag ([\w\-:]+) invalid/.freeze
7
+ INVALID_PREFIX = /Namespace prefix/.freeze
8
+ PARSE_ENTITY_REF = /htmlParseEntityRef: no name/.freeze
9
+ DOCTYPE_MSG = /The doctype must be the first token in the document/.freeze
8
10
 
9
11
  def run
10
12
  @html.errors.each do |error|
11
- message = error.message
12
- line = error.line
13
-
14
- if message =~ INVALID_TAG_MSG || message =~ INVALID_PREFIX
15
- next unless options[:validation][:report_invalid_tags]
16
- end
17
-
18
- if message =~ PARSE_ENTITY_REF
19
- next unless options[:validation][:report_missing_names]
20
- end
21
-
22
- # tags embedded in scripts are used in templating languages: http://git.io/vOovv
23
- next if !options[:validation][:report_script_embeds] && message =~ SCRIPT_EMBEDS_MSG
13
+ add_issue(error.message, line: error.line) if report?(error.message)
14
+ end
15
+ end
24
16
 
25
- add_issue(message, line: line)
17
+ def report?(message)
18
+ case message
19
+ when SCRIPT_EMBEDS_MSG
20
+ options[:validation][:report_script_embeds]
21
+ when INVALID_TAG_MSG, INVALID_PREFIX
22
+ options[:validation][:report_invalid_tags]
23
+ when PARSE_ENTITY_REF
24
+ options[:validation][:report_missing_names]
25
+ when DOCTYPE_MSG
26
+ options[:validation][:report_missing_doctype]
27
+ else
28
+ true
26
29
  end
27
30
  end
28
31
  end
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class ImageCheck < ::HTMLProofer::Check
4
- SCREEN_SHOT_REGEX = /Screen(?: |%20)Shot(?: |%20)\d+-\d+-\d+(?: |%20)at(?: |%20)\d+.\d+.\d+/
4
+ SCREEN_SHOT_REGEX = /Screen(?: |%20)Shot(?: |%20)\d+-\d+-\d+(?: |%20)at(?: |%20)\d+.\d+.\d+/.freeze
5
5
 
6
6
  def empty_alt_tag?
7
7
  @img.alt.nil? || @img.alt.strip.empty?
@@ -38,13 +38,9 @@ class ImageCheck < ::HTMLProofer::Check
38
38
  add_issue("internal image #{@img.url} does not exist", line: line, content: content)
39
39
  end
40
40
 
41
- if empty_alt_tag? && !@img.ignore_empty_alt? && !@img.ignore_alt?
42
- add_issue("image #{@img.url} does not have an alt attribute", line: line, content: content)
43
- end
41
+ add_issue("image #{@img.url} does not have an alt attribute", line: line, content: content) if empty_alt_tag? && !@img.ignore_empty_alt? && !@img.ignore_alt?
44
42
 
45
- if @img.check_img_http? && @img.scheme == 'http'
46
- add_issue("image #{@img.url} uses the http scheme", line: line, content: content)
47
- end
43
+ add_issue("image #{@img.url} uses the http scheme", line: line, content: content) if @img.check_img_http? && @img.scheme == 'http'
48
44
  end
49
45
 
50
46
  external_urls
@@ -34,7 +34,8 @@ class LinkCheck < ::HTMLProofer::Check
34
34
  if missing_href?
35
35
  next if @link.allow_missing_href?
36
36
  # HTML5 allows dropping the href: http://git.io/vBX0z
37
- next if @html.internal_subset.name == 'html' && @html.internal_subset.external_id.nil?
37
+ next if @html.internal_subset.nil? || (@html.internal_subset.name == 'html' && @html.internal_subset.external_id.nil?)
38
+
38
39
  add_issue('anchor has no href attribute', line: line, content: content)
39
40
  next
40
41
  end
@@ -47,9 +48,10 @@ class LinkCheck < ::HTMLProofer::Check
47
48
  # we need to skip these for now; although the domain main be valid,
48
49
  # curl/Typheous inaccurately return 404s for some links. cc https://git.io/vyCFx
49
50
  next if @link.respond_to?(:rel) && @link.rel == 'dns-prefetch'
51
+
50
52
  add_to_external_urls(@link.href)
51
53
  next
52
- elsif !@link.internal? && !@link.exists?
54
+ elsif @link.internal? && !@link.exists?
53
55
  add_issue("internally linking to #{@link.href}, which does not exist", line: line, content: content)
54
56
  end
55
57
 
@@ -74,6 +76,7 @@ class LinkCheck < ::HTMLProofer::Check
74
76
  handle_tel(link, line, content)
75
77
  when 'http'
76
78
  return unless @options[:enforce_https]
79
+
77
80
  add_issue("#{link.href} is not an HTTPS link", line: line, content: content)
78
81
  end
79
82
  end
@@ -103,9 +106,7 @@ class LinkCheck < ::HTMLProofer::Check
103
106
  add_issue("trying to find hash of #{link.href}, but #{link.absolute_path} does not exist", line: line, content: content)
104
107
  else
105
108
  target_html = create_nokogiri link.absolute_path
106
- unless hash_check target_html, link.hash
107
- add_issue("linking to #{link.href}, but #{link.hash} does not exist", line: line, content: content)
108
- end
109
+ add_issue("linking to #{link.href}, but #{link.hash} does not exist", line: line, content: content) unless hash_check target_html, link.hash
109
110
  end
110
111
  end
111
112
 
@@ -129,10 +130,13 @@ class LinkCheck < ::HTMLProofer::Check
129
130
  html.xpath(*xpaths)
130
131
  end
131
132
 
132
- IGNORABE_REL = %(canonical alternate next prev previous icon manifest apple-touch-icon)
133
+ # Whitelist for affected elements from Subresource Integrity specification
134
+ # https://w3c.github.io/webappsec-subresource-integrity/#link-element-for-stylesheets
135
+ SRI_REL_TYPES = %(stylesheet)
133
136
 
134
137
  def check_sri(line, content)
135
- return if IGNORABE_REL.include?(@link.rel)
138
+ return unless SRI_REL_TYPES.include?(@link.rel)
139
+
136
140
  if !defined?(@link.integrity) && !defined?(@link.crossorigin)
137
141
  add_issue("SRI and CORS not provided in: #{@link.src}", line: line, content: content)
138
142
  elsif !defined?(@link.integrity)
@@ -1,4 +1,3 @@
1
- # encoding: utf-8
2
1
  # frozen_string_literal: true
3
2
 
4
3
  class OpenGraphElement < ::HTMLProofer::Element
@@ -51,7 +51,8 @@ module HTMLProofer
51
51
  VALIDATION_DEFAULTS = {
52
52
  report_script_embeds: false,
53
53
  report_missing_names: false,
54
- report_invalid_tags: false
54
+ report_invalid_tags: false,
55
+ report_missing_doctype: false
55
56
  }.freeze
56
57
 
57
58
  CACHE_DEFAULTS = {}.freeze
@@ -65,19 +66,19 @@ module HTMLProofer
65
66
  end
66
67
 
67
68
  def self.parse_json_option(option_name, config)
68
- raise ArgumentError.new('Must provide an option name in string format.') unless option_name.is_a?(String)
69
- raise ArgumentError.new('Must provide an option name in string format.') unless !option_name.strip.empty?
69
+ raise ArgumentError, 'Must provide an option name in string format.' unless option_name.is_a?(String)
70
+ raise ArgumentError, 'Must provide an option name in string format.' if option_name.strip.empty?
70
71
 
71
72
  return {} if config.nil?
72
73
 
73
- raise ArgumentError.new('Must provide a JSON configuration in string format.') unless config.is_a?(String)
74
+ raise ArgumentError, 'Must provide a JSON configuration in string format.' unless config.is_a?(String)
74
75
 
75
76
  return {} if config.strip.empty?
76
77
 
77
78
  begin
78
79
  JSON.parse(config)
79
- rescue
80
- raise ArgumentError.new("Option '" + option_name + "' did not contain valid JSON.")
80
+ rescue StandardError
81
+ raise ArgumentError, "Option '" + option_name + "' did not contain valid JSON."
81
82
  end
82
83
  end
83
84
  end
@@ -18,7 +18,7 @@ module HTMLProofer
18
18
  instance_variable_set("@#{name}", value.value)
19
19
  end
20
20
 
21
- @aria_hidden = (defined?(@aria_hidden) && @aria_hidden == 'true') ? true : false
21
+ @aria_hidden = defined?(@aria_hidden) && @aria_hidden == 'true' ? true : false
22
22
 
23
23
  @data_proofer_ignore = defined?(@data_proofer_ignore)
24
24
 
@@ -56,9 +56,11 @@ module HTMLProofer
56
56
 
57
57
  def url
58
58
  return @url if defined?(@url)
59
+
59
60
  @url = (@src || @srcset || @href || '').delete("\u200b").strip
60
61
  @url = Addressable::URI.join(base.attr('href') || '', url).to_s if base
61
62
  return @url if @check.options[:url_swap].empty?
63
+
62
64
  @url = swap(@url, @check.options[:url_swap])
63
65
  end
64
66
 
@@ -77,11 +79,11 @@ module HTMLProofer
77
79
  end
78
80
 
79
81
  def hash
80
- parts.fragment unless parts.nil?
82
+ parts&.fragment
81
83
  end
82
84
 
83
85
  def scheme
84
- parts.scheme unless parts.nil?
86
+ parts&.scheme
85
87
  end
86
88
 
87
89
  # path is to an external server
@@ -137,9 +139,22 @@ module HTMLProofer
137
139
  !internal?
138
140
  end
139
141
 
140
- # path is an anchor or a query
141
142
  def internal?
142
- hash_link || param_link || slash_link
143
+ relative_link? || internal_absolute_link?
144
+ end
145
+
146
+ def internal_absolute_link?
147
+ url.start_with?('/')
148
+ end
149
+
150
+ def relative_link?
151
+ return false if remote?
152
+
153
+ hash_link || param_link || url.start_with?('.') || url =~ /^\S/
154
+ end
155
+
156
+ def link_points_to_same_page?
157
+ hash_link || param_link
143
158
  end
144
159
 
145
160
  def hash_link
@@ -150,21 +165,20 @@ module HTMLProofer
150
165
  url.start_with?('?')
151
166
  end
152
167
 
153
- def slash_link
154
- url.start_with?('|')
155
- end
156
-
157
168
  def file_path
158
- return if path.nil?
169
+ return if path.nil? || path.empty?
159
170
 
160
171
  path_dot_ext = ''
161
172
 
162
- if @check.options[:assume_extension]
163
- path_dot_ext = path + @check.options[:extension]
164
- end
173
+ path_dot_ext = path + @check.options[:extension] if @check.options[:assume_extension]
165
174
 
166
175
  if path =~ %r{^/} # path relative to root
167
- base = File.directory?(@check.src) ? @check.src : File.dirname(@check.src)
176
+ if File.directory?(@check.src)
177
+ base = @check.src
178
+ else
179
+ root_dir = @check.options[:root_dir]
180
+ base = root_dir || File.dirname(@check.src)
181
+ end
168
182
  elsif File.exist?(File.expand_path(path, @check.src)) || File.exist?(File.expand_path(path_dot_ext, @check.src)) # relative links, path is a file
169
183
  base = File.dirname @check.path
170
184
  elsif File.exist?(File.join(File.dirname(@check.path), path)) || File.exist?(File.join(File.dirname(@check.path), path_dot_ext)) # relative links in nested dir, path is a file
@@ -174,7 +188,6 @@ module HTMLProofer
174
188
  end
175
189
 
176
190
  file = File.join base, path
177
-
178
191
  if @check.options[:assume_extension] && File.file?("#{file}#{@check.options[:extension]}")
179
192
  file = "#{file}#{@check.options[:extension]}"
180
193
  elsif File.directory?(file) && !unslashed_directory?(file) # implicit index support
@@ -187,6 +200,7 @@ module HTMLProofer
187
200
  # checks if a file exists relative to the current pwd
188
201
  def exists?
189
202
  return @checked_paths[absolute_path] if @checked_paths.key? absolute_path
203
+
190
204
  @checked_paths[absolute_path] = File.exist? absolute_path
191
205
  end
192
206
 
@@ -220,12 +234,14 @@ module HTMLProofer
220
234
  end
221
235
 
222
236
  def html
223
- # If link is on the same page, then URL is on the current page so can use the same HTML as for current page
224
- if (hash_link || param_link) && internal?
237
+ # If link is on the same page, then URL is on the current page. use the same HTML as for current page
238
+ if link_points_to_same_page?
225
239
  @html
226
- elsif slash_link && internal?
240
+ elsif internal?
227
241
  # link on another page, e.g. /about#Team - need to get HTML from the other page
228
242
  create_nokogiri(absolute_path)
243
+ else
244
+ raise NotImplementedError, 'HTMLProofer should not have gotten here. Please report this as a bug.'
229
245
  end
230
246
  end
231
247
  end
@@ -56,9 +56,7 @@ module HTMLProofer
56
56
  @logger.log :error, " * #{issue}"
57
57
  else
58
58
  msg = " * #{issue.send(second_report)}#{issue.line}"
59
- if !issue.content.nil? && !issue.content.empty?
60
- msg = "#{msg}\n #{issue.content}"
61
- end
59
+ msg = "#{msg}\n #{issue.content}" if !issue.content.nil? && !issue.content.empty?
62
60
  @logger.log(:error, msg)
63
61
  end
64
62
  end
@@ -2,6 +2,7 @@
2
2
 
3
3
  module HTMLProofer
4
4
  class Middleware
5
+ include HTMLProofer::Utils
5
6
 
6
7
  class InvalidHtmlError < StandardError
7
8
  def initialize(failures)
@@ -9,18 +10,18 @@ module HTMLProofer
9
10
  end
10
11
 
11
12
  def message
12
- "HTML Validation errors (skip by adding `?proofer-ignore` to URL): \n#{@failures.join("\n")}"
13
+ "HTML Validation errors (skip by adding `?proofer-ignore` to URL): \n#{@failures.join("\n")}"
13
14
  end
14
15
  end
15
16
 
16
17
  def self.options
17
18
  @options ||= {
18
- type: :file,
19
- allow_missing_href: true, # Permitted in html5
20
- allow_hash_href: true,
19
+ type: :file,
20
+ allow_missing_href: true, # Permitted in html5
21
+ allow_hash_href: true,
21
22
  check_external_hash: true,
22
- check_html: true,
23
- url_ignore: [/.*/], # Don't try to check local files exist
23
+ check_html: true,
24
+ url_ignore: [/.*/] # Don't try to check local files exist
24
25
  }
25
26
  end
26
27
 
@@ -46,20 +47,21 @@ module HTMLProofer
46
47
  '<BR',
47
48
  '<P',
48
49
  '<!--'
49
- ]
50
+ ].freeze
50
51
 
51
52
  def call(env)
52
53
  result = @app.call(env)
53
54
  return result if env['REQUEST_METHOD'] != 'GET'
54
55
  return result if env['QUERY_STRING'] =~ /proofer-ignore/
55
56
  return result if result.first != 200
57
+
56
58
  body = []
57
59
  result.last.each { |e| body << e }
58
60
 
59
61
  body = body.join('')
60
62
  begin
61
63
  html = body.lstrip
62
- rescue
64
+ rescue StandardError
63
65
  return result # Invalid encoding; it's not gonna be html.
64
66
  end
65
67
  if HTML_SIGNATURE.any? { |sig| html.upcase.start_with? sig }
@@ -67,12 +69,10 @@ module HTMLProofer
67
69
  'response',
68
70
  Middleware.options
69
71
  ).check_parsed(
70
- Nokogiri::HTML(Utils.clean_content(html)), 'response'
72
+ Nokogiri::HTML5(html, max_errors: -1), 'response'
71
73
  )
72
74
 
73
- if parsed[:failures].length > 0
74
- raise InvalidHtmlError.new(parsed[:failures])
75
- end
75
+ raise InvalidHtmlError, parsed[:failures] unless parsed[:failures].empty?
76
76
  end
77
77
  result
78
78
  end
@@ -4,7 +4,7 @@ module HTMLProofer
4
4
  class Runner
5
5
  include HTMLProofer::Utils
6
6
 
7
- attr_reader :options, :external_urls
7
+ attr_reader :options, :external_urls, :failures
8
8
 
9
9
  def initialize(src, opts = {})
10
10
  @src = src
@@ -103,9 +103,7 @@ module HTMLProofer
103
103
  check = Object.const_get(klass).new(src, path, html, @options)
104
104
  check.run
105
105
  external_urls = check.external_urls
106
- if @options[:url_swap]
107
- external_urls = Hash[check.external_urls.map { |url, file| [swap(url, @options[:url_swap]), file] }]
108
- end
106
+ external_urls = Hash[check.external_urls.map { |url, file| [swap(url, @options[:url_swap]), file] }] if @options[:url_swap]
109
107
  result[:external_urls].merge!(external_urls)
110
108
  result[:failures].concat(check.issues)
111
109
  end
@@ -148,6 +146,7 @@ module HTMLProofer
148
146
 
149
147
  def checks
150
148
  return @checks if defined?(@checks) && !@checks.nil?
149
+
151
150
  @checks = HTMLProofer::Check.subchecks.map(&:name)
152
151
  @checks.delete('FaviconCheck') unless @options[:check_favicon]
153
152
  @checks.delete('HtmlCheck') unless @options[:check_html]
@@ -159,6 +158,7 @@ module HTMLProofer
159
158
  def failed_tests
160
159
  result = []
161
160
  return result if @failures.empty?
161
+
162
162
  @failures.each { |f| result << f.to_s }
163
163
  result
164
164
  end
@@ -36,6 +36,7 @@ module HTMLProofer
36
36
 
37
37
  def remove_query_values
38
38
  return nil if @external_urls.nil?
39
+
39
40
  paths_with_queries = {}
40
41
  iterable_external_urls = @external_urls.dup
41
42
  @external_urls.each_key do |url|
@@ -46,6 +47,7 @@ module HTMLProofer
46
47
  nil
47
48
  end
48
49
  next if uri.nil? || uri.query.nil?
50
+
49
51
  iterable_external_urls.delete(url) unless new_url_query_values?(uri, paths_with_queries)
50
52
  end
51
53
  iterable_external_urls
@@ -108,9 +110,9 @@ module HTMLProofer
108
110
  external_urls.each_pair do |url, filenames|
109
111
  url = begin
110
112
  clean_url(url)
111
- rescue URI::Error, Addressable::URI::InvalidURIError
112
- add_external_issue(filenames, "#{url} is an invalid URL")
113
- next
113
+ rescue URI::Error, Addressable::URI::InvalidURIError
114
+ add_external_issue(filenames, "#{url} is an invalid URL")
115
+ next
114
116
  end
115
117
 
116
118
  method = if hash?(url) && @options[:check_external_hash]
@@ -144,22 +146,20 @@ module HTMLProofer
144
146
  href = response.request.base_url.to_s
145
147
  method = response.request.options[:method]
146
148
  response_code = response.code
147
- response.body.gsub!("\x00", '')
149
+ response.body.delete!("\x00")
148
150
 
149
- if filenames.nil?
150
- debug_msg = "Received a #{response_code} for #{href}"
151
- else
152
- debug_msg = "Received a #{response_code} for #{href} in #{filenames.join(' ')}"
153
- end
151
+ debug_msg = if filenames.nil?
152
+ "Received a #{response_code} for #{href}"
153
+ else
154
+ "Received a #{response_code} for #{href} in #{filenames.join(' ')}"
155
+ end
154
156
 
155
157
  @logger.log :debug, debug_msg
156
158
 
157
159
  return if @options[:http_status_ignore].include?(response_code)
158
160
 
159
161
  if response_code.between?(200, 299)
160
- unless check_hash_in_2xx_response(href, effective_url, response, filenames)
161
- @cache.add(href, filenames, response_code)
162
- end
162
+ @cache.add(href, filenames, response_code) unless check_hash_in_2xx_response(href, effective_url, response, filenames)
163
163
  elsif response.timed_out?
164
164
  handle_timeout(href, filenames, response_code)
165
165
  elsif response_code.zero?
@@ -168,6 +168,7 @@ module HTMLProofer
168
168
  queue_request(:get, href, filenames)
169
169
  else
170
170
  return if @options[:only_4xx] && !response_code.between?(400, 499)
171
+
171
172
  # Received a non-successful http response.
172
173
  msg = "External link #{href} failed: #{response_code} #{response.return_message}"
173
174
  add_external_issue(filenames, msg, response_code)
@@ -191,9 +192,7 @@ module HTMLProofer
191
192
  xpath << [%(//*[@name="user-content-#{hash}"]|//*[@id="user-content-#{hash}"])]
192
193
  # when linking to a file on GitHub, like #L12-L34, only the first "L" portion
193
194
  # will be identified as a linkable portion
194
- if hash =~ /\A(L\d)+/
195
- xpath << [%(//td[@id="#{Regexp.last_match[1]}"])]
196
- end
195
+ xpath << [%(//td[@id="#{Regexp.last_match[1]}"])] if hash =~ /\A(L\d)+/
197
196
  end
198
197
 
199
198
  return unless body_doc.xpath(xpath.join('|')).empty?
@@ -208,6 +207,7 @@ module HTMLProofer
208
207
  msg = "External link #{href} failed: got a time out (response code #{response_code})"
209
208
  @cache.add(href, filenames, 0, msg)
210
209
  return if @options[:only_4xx]
210
+
211
211
  add_external_issue(filenames, msg, response_code)
212
212
  end
213
213
 
@@ -218,6 +218,7 @@ module HTMLProofer
218
218
  Either way, the return message (if any) from the server is: #{return_message}"
219
219
  @cache.add(href, filenames, 0, msg)
220
220
  return if @options[:only_4xx]
221
+
221
222
  add_external_issue(filenames, msg, response_code)
222
223
  end
223
224
 
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'nokogiri'
3
+ require 'nokogumbo'
4
4
 
5
5
  module HTMLProofer
6
6
  module Utils
@@ -15,9 +15,8 @@ module HTMLProofer
15
15
  path
16
16
  end
17
17
 
18
- Nokogiri::HTML(clean_content(content))
18
+ Nokogiri::HTML5(content)
19
19
  end
20
- module_function :create_nokogiri
21
20
 
22
21
  def swap(href, replacement)
23
22
  replacement.each do |link, replace|
@@ -25,16 +24,5 @@ module HTMLProofer
25
24
  end
26
25
  href
27
26
  end
28
- module_function :swap
29
-
30
- # address a problem with Nokogiri's parsing URL entities
31
- # problem from http://git.io/vBYU1
32
- # solution from http://git.io/vBYUi
33
- def clean_content(string)
34
- string.gsub(%r{(?:https?:)?//([^>]+)}i) do |url|
35
- url.gsub(/&(?!amp;)/, '&amp;')
36
- end
37
- end
38
- module_function :clean_content
39
27
  end
40
28
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HTMLProofer
4
- VERSION = '3.12.1'.freeze
4
+ VERSION = '3.15.0'
5
5
  end
metadata CHANGED
@@ -1,59 +1,59 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html-proofer
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.12.1
4
+ version: 3.15.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Garen Torikian
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-09-07 00:00:00.000000000 Z
11
+ date: 2019-12-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: mercenary
14
+ name: addressable
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '0.3'
19
+ version: '2.3'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '0.3'
26
+ version: '2.3'
27
27
  - !ruby/object:Gem::Dependency
28
- name: nokogiri
28
+ name: mercenary
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '1.10'
33
+ version: '0.3'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '1.10'
40
+ version: '0.3'
41
41
  - !ruby/object:Gem::Dependency
42
- name: rainbow
42
+ name: nokogumbo
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '3.0'
47
+ version: '2.0'
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '3.0'
54
+ version: '2.0'
55
55
  - !ruby/object:Gem::Dependency
56
- name: typhoeus
56
+ name: parallel
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - "~>"
@@ -67,21 +67,21 @@ dependencies:
67
67
  - !ruby/object:Gem::Version
68
68
  version: '1.3'
69
69
  - !ruby/object:Gem::Dependency
70
- name: yell
70
+ name: rainbow
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
73
  - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: '2.0'
75
+ version: '3.0'
76
76
  type: :runtime
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: '2.0'
82
+ version: '3.0'
83
83
  - !ruby/object:Gem::Dependency
84
- name: parallel
84
+ name: typhoeus
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
87
  - - "~>"
@@ -95,21 +95,21 @@ dependencies:
95
95
  - !ruby/object:Gem::Version
96
96
  version: '1.3'
97
97
  - !ruby/object:Gem::Dependency
98
- name: addressable
98
+ name: yell
99
99
  requirement: !ruby/object:Gem::Requirement
100
100
  requirements:
101
101
  - - "~>"
102
102
  - !ruby/object:Gem::Version
103
- version: '2.3'
103
+ version: '2.0'
104
104
  type: :runtime
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
- version: '2.3'
110
+ version: '2.0'
111
111
  - !ruby/object:Gem::Dependency
112
- name: redcarpet
112
+ name: awesome_print
113
113
  requirement: !ruby/object:Gem::Requirement
114
114
  requirements:
115
115
  - - ">="
@@ -123,7 +123,7 @@ dependencies:
123
123
  - !ruby/object:Gem::Version
124
124
  version: '0'
125
125
  - !ruby/object:Gem::Dependency
126
- name: rubocop
126
+ name: codecov
127
127
  requirement: !ruby/object:Gem::Requirement
128
128
  requirements:
129
129
  - - ">="
@@ -137,7 +137,7 @@ dependencies:
137
137
  - !ruby/object:Gem::Version
138
138
  version: '0'
139
139
  - !ruby/object:Gem::Dependency
140
- name: rubocop-standard
140
+ name: pry-byebug
141
141
  requirement: !ruby/object:Gem::Requirement
142
142
  requirements:
143
143
  - - ">="
@@ -151,7 +151,7 @@ dependencies:
151
151
  - !ruby/object:Gem::Version
152
152
  version: '0'
153
153
  - !ruby/object:Gem::Dependency
154
- name: rubocop-performance
154
+ name: rake
155
155
  requirement: !ruby/object:Gem::Requirement
156
156
  requirements:
157
157
  - - ">="
@@ -165,7 +165,7 @@ dependencies:
165
165
  - !ruby/object:Gem::Version
166
166
  version: '0'
167
167
  - !ruby/object:Gem::Dependency
168
- name: codecov
168
+ name: redcarpet
169
169
  requirement: !ruby/object:Gem::Requirement
170
170
  requirements:
171
171
  - - ">="
@@ -193,7 +193,7 @@ dependencies:
193
193
  - !ruby/object:Gem::Version
194
194
  version: '3.1'
195
195
  - !ruby/object:Gem::Dependency
196
- name: rake
196
+ name: rubocop
197
197
  requirement: !ruby/object:Gem::Requirement
198
198
  requirements:
199
199
  - - ">="
@@ -207,7 +207,7 @@ dependencies:
207
207
  - !ruby/object:Gem::Version
208
208
  version: '0'
209
209
  - !ruby/object:Gem::Dependency
210
- name: pry-byebug
210
+ name: rubocop-performance
211
211
  requirement: !ruby/object:Gem::Requirement
212
212
  requirements:
213
213
  - - ">="
@@ -221,7 +221,7 @@ dependencies:
221
221
  - !ruby/object:Gem::Version
222
222
  version: '0'
223
223
  - !ruby/object:Gem::Dependency
224
- name: awesome_print
224
+ name: rubocop-standard
225
225
  requirement: !ruby/object:Gem::Requirement
226
226
  requirements:
227
227
  - - ">="
@@ -235,33 +235,33 @@ dependencies:
235
235
  - !ruby/object:Gem::Version
236
236
  version: '0'
237
237
  - !ruby/object:Gem::Dependency
238
- name: vcr
238
+ name: timecop
239
239
  requirement: !ruby/object:Gem::Requirement
240
240
  requirements:
241
241
  - - "~>"
242
242
  - !ruby/object:Gem::Version
243
- version: '2.9'
243
+ version: '0.8'
244
244
  type: :development
245
245
  prerelease: false
246
246
  version_requirements: !ruby/object:Gem::Requirement
247
247
  requirements:
248
248
  - - "~>"
249
249
  - !ruby/object:Gem::Version
250
- version: '2.9'
250
+ version: '0.8'
251
251
  - !ruby/object:Gem::Dependency
252
- name: timecop
252
+ name: vcr
253
253
  requirement: !ruby/object:Gem::Requirement
254
254
  requirements:
255
255
  - - "~>"
256
256
  - !ruby/object:Gem::Version
257
- version: '0.8'
257
+ version: '2.9'
258
258
  type: :development
259
259
  prerelease: false
260
260
  version_requirements: !ruby/object:Gem::Requirement
261
261
  requirements:
262
262
  - - "~>"
263
263
  - !ruby/object:Gem::Version
264
- version: '0.8'
264
+ version: '2.9'
265
265
  description: Test your rendered HTML files to make sure they're accurate.
266
266
  email:
267
267
  - gjtorikian@gmail.com