html-proofer 3.14.0 → 3.15.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cf06ea39b2c106240c1d6c10261b50bab110fa5620451690970c8226bd0022cb
4
- data.tar.gz: 70446e5bf8162760c6583f53917c3774ad165d29af0bd01ae600c6069d09360b
3
+ metadata.gz: 40942bc9c00696cb5c3c02e6ca427c0adfd1f80dae7028a6a2d70992c29065a2
4
+ data.tar.gz: 1ef56b761178d31791dc2e457c8d4da0704c6845725e87eada63e8c6d6d01d84
5
5
  SHA512:
6
- metadata.gz: ab6ef8adc5d80cd409f1cd6248ce7b969f69c219b6878e8d4614852dfc8211430db2fd6863c5e5b03a42326dc315e5538d939f506f95a774b1f48df6a9c95b06
7
- data.tar.gz: 603ff3aad5001b0484a753dda3c9b72fa6bac15e94d767f8cb8aaf009810c21b2e7cf5ee039d9261b05c0bb888cad85aa9da769ff0fc13489234bb841c3ee858
6
+ metadata.gz: 10f6a27dc6c59b01dd3ff4aadf8d3c1fef3cd82e35ee604f180a9738094aae1be4748d78279e5abe5fdac2ed07f43a8efdac8cec4d1a3b24ba91a3125301bbe3
7
+ data.tar.gz: 33f427ea5661e13e301b06033f65d16623005cf4834b412729e8e04c13bbaecbc0d88cce2f7a20d06cedbb3b6bc6015bef0bd1ed370ba0edc6b18ba4d6117e03
@@ -20,13 +20,13 @@ Mercenary.program(:htmlproofer) do |p|
20
20
  p.option 'as_links', '--as-links', 'Assumes that `PATH` is a comma-separated array of links to check.'
21
21
  p.option 'alt_ignore', '--alt-ignore image1,[image2,...]', Array, 'A comma-separated list of Strings or RegExps containing `img`s whose missing `alt` tags are safe to ignore'
22
22
  p.option 'assume_extension', '--assume-extension', 'Automatically add extension (e.g. `.html`) to file paths, to allow extensionless URLs (as supported by Jekyll 3 and GitHub Pages) (default: `false`).'
23
- p.option 'checks_to_ignore', '--checks-to-ignore check1,[check2,...]', Array, ' An array of Strings indicating which checks you\'d like to not perform.'
23
+ p.option 'checks_to_ignore', '--checks-to-ignore check1,[check2,...]', Array, 'A comma-separated list of Strings indicating which checks you do not want to run (default: `[]`)'
24
24
  p.option 'check_external_hash', '--check-external-hash', 'Checks whether external hashes exist (even if the webpage exists). This slows the checker down (default: `false`).'
25
25
  p.option 'check_favicon', '--check-favicon', 'Enables the favicon checker (default: `false`).'
26
- p.option 'check_html', '--check-html', 'Enables HTML validation errors from Nokogiri (default: `false`).'
26
+ p.option 'check_html', '--check-html', 'Enables HTML validation errors from Nokogumbo (default: `false`).'
27
27
  p.option 'check_img_http', '--check-img-http', 'Fails an image if it\'s marked as `http` (default: `false`).'
28
28
  p.option 'check_opengraph', '--check-opengraph', 'Enables the Open Graph checker (default: `false`).'
29
- p.option 'check_sri', '--check-sri', 'Check that `<link>` and `<script>` external resources do use SRI (default: `false`).'
29
+ p.option 'check_sri', '--check-sri', 'Check that `<link>` and `<script>` external resources use SRI (default: `false`).'
30
30
  p.option 'directory_index_file', '--directory-index-file <filename>', String, 'Sets the file to look for when a link refers to a directory. (default: `index.html`)'
31
31
  p.option 'disable_external', '--disable-external', 'If `true`, does not run the external link checker, which can take a lot of time (default: `false`)'
32
32
  p.option 'empty_alt_ignore', '--empty-alt-ignore', 'If `true`, ignores images with empty alt tags'
@@ -37,9 +37,12 @@ Mercenary.program(:htmlproofer) do |p|
37
37
  p.option 'file_ignore', '--file-ignore file1,[file2,...]', Array, 'A comma-separated list of Strings or RegExps containing file paths that are safe to ignore'
38
38
  p.option 'http_status_ignore', '--http-status-ignore 123,[xxx, ...]', Array, 'A comma-separated list of numbers representing status codes to ignore.'
39
39
  p.option 'internal_domains', '--internal-domains domain1,[domain2,...]', Array, 'A comma-separated list of Strings containing domains that will be treated as internal urls.'
40
- p.option 'report_invalid_tags', '--report-invalid-tags', 'Ignore `check_html` errors associated with unknown markup (default: `false`)'
41
- p.option 'report_missing_names', '--report-missing-names', 'Ignore `check_html` errors associated with missing entities (default: `false`)'
42
- p.option 'report_script_embeds', '--report-script-embeds', 'Ignore `check_html` errors associated with `script`s (default: `false`)'
40
+ p.option 'report_invalid_tags', '--report-invalid-tags', 'When `check_html` is enabled, HTML markup that is unknown to Nokogumbo are reported as errors (default: `false`)'
41
+ p.option 'report_missing_names', '--report-missing-names', 'When `check_html` is enabled, HTML markup that are missing entity names are reported as errors (default: `false`)'
42
+ p.option 'report_script_embeds', '--report-script-embeds', 'When `check_html` is enabled, `script` tags containing markup are reported as errors (default: `false`)'
43
+ p.option 'report_missing_doctype', '--report-missing-doctype', 'When `check_html` is enabled, HTML markup with missing or out-of-order `DOCTYPE` are reported as errors (default: `false`)'
44
+ p.option 'report_eof_tags', '--report-eof-tags', 'When `check_html` is enabled, HTML markup with tags that are malformed are reported as errors (default: `false`)'
45
+ p.option 'report_mismatched_tags', '--report-mismatched-tags', 'When `check_html` is enabled, HTML markup with mismatched tags are reported as errors (default: `false`)'
43
46
  p.option 'log_level', '--log-level <level>', String, 'Sets the logging level, as determined by Yell. One of `:debug`, `:info`, `:warn`, `:error`, or `:fatal`. (default: `:info`)'
44
47
  p.option 'only_4xx', '--only-4xx', 'Only reports errors for links that fall within the 4xx status code range'
45
48
  p.option 'storage_dir', '--storage-dir PATH', String, 'Directory where to store the cache log (default: "tmp/.htmlproofer")'
@@ -80,6 +83,9 @@ Mercenary.program(:htmlproofer) do |p|
80
83
  options[:validation][:report_script_embeds] = opts['report_script_embeds'] unless opts['report_script_embeds'].nil?
81
84
  options[:validation][:report_missing_names] = opts['report_missing_names'] unless opts['report_missing_names'].nil?
82
85
  options[:validation][:report_invalid_tags] = opts['report_invalid_tags'] unless opts['report_invalid_tags'].nil?
86
+ options[:validation][:report_missing_doctype] = opts['report_missing_doctype'] unless opts['report_missing_doctype'].nil?
87
+ options[:validation][:report_eof_tags] = opts['report_eof_tags'] unless opts['report_eof_tags'].nil?
88
+ options[:validation][:report_mismatched_tags] = opts['report_mismatched_tags'] unless opts['report_mismatched_tags'].nil?
83
89
 
84
90
  options[:typhoeus] = HTMLProofer::Configuration.parse_json_option('typhoeus_config', opts['typhoeus_config']) unless opts['typhoeus_config'].nil?
85
91
 
@@ -2,7 +2,7 @@
2
2
 
3
3
  def require_all(path)
4
4
  dir = File.join(File.dirname(__FILE__), path)
5
- Dir[File.join(dir, '*.rb')].each do |f|
5
+ Dir[File.join(dir, '*.rb')].sort.each do |f|
6
6
  require f
7
7
  end
8
8
  end
@@ -17,7 +17,7 @@ require 'fileutils'
17
17
  begin
18
18
  require 'awesome_print'
19
19
  require 'pry-byebug'
20
- rescue LoadError; end # rubocop:disable Lint/HandleExceptions
20
+ rescue LoadError; end # rubocop:disable Lint/SuppressedException
21
21
  module HTMLProofer
22
22
  def self.check_file(file, options = {})
23
23
  raise ArgumentError unless file.is_a?(String)
@@ -120,9 +120,8 @@ module HTMLProofer
120
120
  @cache_log.each_pair do |url, cache|
121
121
  if within_timeframe?(cache['time'])
122
122
  next if cache['message'].empty? # these were successes to skip
123
- else
124
- urls_to_check[url] = cache['filenames'] # recheck expired links
125
123
  end
124
+ urls_to_check[url] = cache['filenames'] # recheck expired links
126
125
  end
127
126
  urls_to_check
128
127
  end
@@ -5,10 +5,11 @@ module HTMLProofer
5
5
  class Check
6
6
  attr_reader :node, :html, :element, :src, :path, :options, :issues, :external_urls
7
7
 
8
- def initialize(src, path, html, options)
8
+ def initialize(src, path, html, logger, options)
9
9
  @src = src
10
10
  @path = path
11
11
  @html = remove_ignored(html)
12
+ @logger = logger
12
13
  @options = options
13
14
  @issues = []
14
15
  @external_urls = {}
@@ -16,7 +17,7 @@ module HTMLProofer
16
17
 
17
18
  def create_element(node)
18
19
  @node = node
19
- Element.new(node, self)
20
+ Element.new(node, self, @logger)
20
21
  end
21
22
 
22
23
  def run
@@ -1,28 +1,37 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class HtmlCheck < ::HTMLProofer::Check
4
+ # tags embedded in scripts are used in templating languages: http://git.io/vOovv
4
5
  SCRIPT_EMBEDS_MSG = /Element script embeds close tag/.freeze
5
6
  INVALID_TAG_MSG = /Tag ([\w\-:]+) invalid/.freeze
6
7
  INVALID_PREFIX = /Namespace prefix/.freeze
7
8
  PARSE_ENTITY_REF = /htmlParseEntityRef: no name/.freeze
9
+ DOCTYPE_MSG = /Expected a doctype token/.freeze
10
+ EOF_IN_TAG = /End of input in tag/.freeze
11
+ MISMATCHED_TAGS = /That tag isn't allowed here/.freeze
8
12
 
9
13
  def run
10
14
  @html.errors.each do |error|
11
- message = error.message
12
- line = error.line
13
-
14
- if message =~ INVALID_TAG_MSG || message =~ INVALID_PREFIX
15
- next unless options[:validation][:report_invalid_tags]
16
- end
17
-
18
- if message =~ PARSE_ENTITY_REF
19
- next unless options[:validation][:report_missing_names]
20
- end
21
-
22
- # tags embedded in scripts are used in templating languages: http://git.io/vOovv
23
- next if !options[:validation][:report_script_embeds] && message =~ SCRIPT_EMBEDS_MSG
15
+ add_issue(error.message, line: error.line) if report?(error.message)
16
+ end
17
+ end
24
18
 
25
- add_issue(message, line: line)
19
+ def report?(message)
20
+ case message
21
+ when SCRIPT_EMBEDS_MSG
22
+ options[:validation][:report_script_embeds]
23
+ when INVALID_TAG_MSG, INVALID_PREFIX
24
+ options[:validation][:report_invalid_tags]
25
+ when PARSE_ENTITY_REF
26
+ options[:validation][:report_missing_names]
27
+ when DOCTYPE_MSG
28
+ options[:validation][:report_missing_doctype]
29
+ when EOF_IN_TAG
30
+ options[:validation][:report_eof_tags]
31
+ when MISMATCHED_TAGS
32
+ options[:validation][:report_mismatched_tags]
33
+ else
34
+ true
26
35
  end
27
36
  end
28
37
  end
@@ -34,7 +34,7 @@ class LinkCheck < ::HTMLProofer::Check
34
34
  if missing_href?
35
35
  next if @link.allow_missing_href?
36
36
  # HTML5 allows dropping the href: http://git.io/vBX0z
37
- next if @html.internal_subset.name == 'html' && @html.internal_subset.external_id.nil?
37
+ next if @html.internal_subset.nil? || (@html.internal_subset.name == 'html' && @html.internal_subset.external_id.nil?)
38
38
 
39
39
  add_issue('anchor has no href attribute', line: line, content: content)
40
40
  next
@@ -3,8 +3,8 @@
3
3
  class OpenGraphElement < ::HTMLProofer::Element
4
4
  attr_reader :src
5
5
 
6
- def initialize(obj, check)
7
- super(obj, check)
6
+ def initialize(obj, check, logger)
7
+ super(obj, check, logger)
8
8
  # Fake up src from the content attribute
9
9
  instance_variable_set('@src', @content)
10
10
 
@@ -23,7 +23,7 @@ class OpenGraphCheck < ::HTMLProofer::Check
23
23
 
24
24
  def run
25
25
  @html.css('meta[property="og:url"], meta[property="og:image"]').each do |m|
26
- @opengraph = OpenGraphElement.new(m, self)
26
+ @opengraph = OpenGraphElement.new(m, self, @logger)
27
27
 
28
28
  next if @opengraph.ignore?
29
29
 
@@ -51,7 +51,10 @@ module HTMLProofer
51
51
  VALIDATION_DEFAULTS = {
52
52
  report_script_embeds: false,
53
53
  report_missing_names: false,
54
- report_invalid_tags: false
54
+ report_invalid_tags: false,
55
+ report_missing_doctype: false,
56
+ report_eof_tags: false,
57
+ report_mismatched_tags: false
55
58
  }.freeze
56
59
 
57
60
  CACHE_DEFAULTS = {}.freeze
@@ -10,12 +10,18 @@ module HTMLProofer
10
10
 
11
11
  attr_reader :id, :name, :alt, :href, :link, :src, :line, :data_proofer_ignore
12
12
 
13
- def initialize(obj, check)
13
+ def initialize(obj, check, logger)
14
+ @logger = logger
14
15
  # Construct readable ivars for every element
15
- obj.attributes.each_pair do |attribute, value|
16
- name = attribute.tr('-:.', '_').to_s.to_sym
17
- (class << self; self; end).send(:attr_reader, name)
18
- instance_variable_set("@#{name}", value.value)
16
+ begin
17
+ obj.attributes.each_pair do |attribute, value|
18
+ name = attribute.tr('-:.;', '_').to_s.to_sym
19
+ (class << self; self; end).send(:attr_reader, name)
20
+ instance_variable_set("@#{name}", value.value)
21
+ end
22
+ rescue NameError => e
23
+ @logger.log :error, "Attribute set `#{obj}` contains an error!"
24
+ raise e
19
25
  end
20
26
 
21
27
  @aria_hidden = defined?(@aria_hidden) && @aria_hidden == 'true' ? true : false
@@ -150,7 +156,7 @@ module HTMLProofer
150
156
  def relative_link?
151
157
  return false if remote?
152
158
 
153
- hash_link || param_link || url.start_with?('.') || url =~ /^\w/
159
+ hash_link || param_link || url.start_with?('.') || url =~ /^\S/
154
160
  end
155
161
 
156
162
  def link_points_to_same_page?
@@ -234,12 +240,14 @@ module HTMLProofer
234
240
  end
235
241
 
236
242
  def html
237
- # If link is on the same page, then URL is on the current page so can use the same HTML as for current page
243
+ # If link is on the same page, then URL is on the current page. use the same HTML as for current page
238
244
  if link_points_to_same_page?
239
245
  @html
240
- elsif relative_link?
246
+ elsif internal?
241
247
  # link on another page, e.g. /about#Team - need to get HTML from the other page
242
248
  create_nokogiri(absolute_path)
249
+ else
250
+ raise NotImplementedError, 'HTMLProofer should not have gotten here. Please report this as a bug.'
243
251
  end
244
252
  end
245
253
  end
@@ -7,16 +7,27 @@ module HTMLProofer
7
7
  class Log
8
8
  include Yell::Loggable
9
9
 
10
+ STDOUT_LEVELS = %i[debug info warn].freeze
11
+ STDERR_LEVELS = %i[error fatal].freeze
12
+
10
13
  def initialize(log_level)
11
14
  @logger = Yell.new(format: false, \
12
15
  name: 'HTMLProofer', \
13
16
  level: "gte.#{log_level}") do |l|
14
- l.adapter :stdout, level: %i[debug info warn]
15
- l.adapter :stderr, level: %i[error fatal]
17
+ l.adapter :stdout, level: 'lte.warn'
18
+ l.adapter :stderr, level: 'gte.error'
16
19
  end
17
20
  end
18
21
 
19
22
  def log(level, message)
23
+ log_with_color(level, message)
24
+ end
25
+
26
+ def log_with_color(level, message)
27
+ @logger.send level, colorize(level, message)
28
+ end
29
+
30
+ def colorize(level, message)
20
31
  color = case level
21
32
  when :debug
22
33
  :cyan
@@ -28,15 +39,8 @@ module HTMLProofer
28
39
  :red
29
40
  end
30
41
 
31
- log_with_color(level, color, message)
32
- end
33
-
34
- def log_with_color(level, color, message)
35
- @logger.send level, colorize(color, message)
36
- end
37
-
38
- def colorize(color, message)
39
- if $stdout.isatty && $stderr.isatty
42
+ if (STDOUT_LEVELS.include?(level) && $stdout.isatty) || \
43
+ (STDERR_LEVELS.include?(level) && $stderr.isatty)
40
44
  Rainbow(message).send(color)
41
45
  else
42
46
  message
@@ -21,7 +21,8 @@ module HTMLProofer
21
21
  allow_hash_href: true,
22
22
  check_external_hash: true,
23
23
  check_html: true,
24
- url_ignore: [/.*/] # Don't try to check local files exist
24
+ url_ignore: [/.*/], # Don't try to check if local files exist
25
+ validation: { report_eof_tags: true }
25
26
  }
26
27
  end
27
28
 
@@ -69,7 +70,7 @@ module HTMLProofer
69
70
  'response',
70
71
  Middleware.options
71
72
  ).check_parsed(
72
- Nokogiri::HTML(clean_content(html)), 'response'
73
+ Nokogiri::HTML5(html, max_errors: -1), 'response'
73
74
  )
74
75
 
75
76
  raise InvalidHtmlError, parsed[:failures] unless parsed[:failures].empty?
@@ -45,7 +45,7 @@ module HTMLProofer
45
45
  end
46
46
 
47
47
  if @failures.empty?
48
- @logger.log_with_color :info, :green, 'HTML-Proofer finished successfully.'
48
+ @logger.log :info, 'HTML-Proofer finished successfully.'
49
49
  else
50
50
  print_failed_tests
51
51
  end
@@ -100,7 +100,7 @@ module HTMLProofer
100
100
  @src.each do |src|
101
101
  checks.each do |klass|
102
102
  @logger.log :debug, "Checking #{klass.to_s.downcase} on #{path} ..."
103
- check = Object.const_get(klass).new(src, path, html, @options)
103
+ check = Object.const_get(klass).new(src, path, html, @logger, @options)
104
104
  check.run
105
105
  external_urls = check.external_urls
106
106
  external_urls = Hash[check.external_urls.map { |url, file| [swap(url, @options[:url_swap]), file] }] if @options[:url_swap]
@@ -147,6 +147,8 @@ module HTMLProofer
147
147
  def checks
148
148
  return @checks if defined?(@checks) && !@checks.nil?
149
149
 
150
+ return (@checks = ['LinkCheck']) if @type == :links
151
+
150
152
  @checks = HTMLProofer::Check.subchecks.map(&:name)
151
153
  @checks.delete('FaviconCheck') unless @options[:check_favicon]
152
154
  @checks.delete('HtmlCheck') unless @options[:check_html]
@@ -169,7 +171,7 @@ module HTMLProofer
169
171
  sorted_failures.sort_and_report
170
172
  count = @failures.length
171
173
  failure_text = pluralize(count, 'failure', 'failures')
172
- raise @logger.colorize :red, "HTML-Proofer found #{failure_text}!"
174
+ raise @logger.colorize :fatal, "HTML-Proofer found #{failure_text}!"
173
175
  end
174
176
  end
175
177
  end
@@ -146,7 +146,7 @@ module HTMLProofer
146
146
  href = response.request.base_url.to_s
147
147
  method = response.request.options[:method]
148
148
  response_code = response.code
149
- response.body.gsub!("\x00", '')
149
+ response.body.delete!("\x00")
150
150
 
151
151
  debug_msg = if filenames.nil?
152
152
  "Received a #{response_code} for #{href}"
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'nokogiri'
3
+ require 'nokogumbo'
4
4
 
5
5
  module HTMLProofer
6
6
  module Utils
@@ -15,7 +15,7 @@ module HTMLProofer
15
15
  path
16
16
  end
17
17
 
18
- Nokogiri::HTML(clean_content(content))
18
+ Nokogiri::HTML5(content, max_errors: -1)
19
19
  end
20
20
 
21
21
  def swap(href, replacement)
@@ -24,14 +24,5 @@ module HTMLProofer
24
24
  end
25
25
  href
26
26
  end
27
-
28
- # address a problem with Nokogiri's parsing URL entities
29
- # problem from http://git.io/vBYU1
30
- # solution from http://git.io/vBYUi
31
- def clean_content(string)
32
- string.gsub(%r{(?:https?:)?//([^>]+)}i) do |url|
33
- url.gsub(/&(?!amp;)/, '&amp;')
34
- end
35
- end
36
27
  end
37
28
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HTMLProofer
4
- VERSION = '3.14.0'
4
+ VERSION = '3.15.3'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html-proofer
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.14.0
4
+ version: 3.15.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Garen Torikian
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-11-17 00:00:00.000000000 Z
11
+ date: 2020-04-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: addressable
@@ -39,19 +39,19 @@ dependencies:
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0.3'
41
41
  - !ruby/object:Gem::Dependency
42
- name: nokogiri
42
+ name: nokogumbo
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '1.10'
47
+ version: '2.0'
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '1.10'
54
+ version: '2.0'
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: parallel
57
57
  requirement: !ruby/object:Gem::Requirement
@@ -308,7 +308,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
308
308
  - !ruby/object:Gem::Version
309
309
  version: '0'
310
310
  requirements: []
311
- rubygems_version: 3.0.6
311
+ rubygems_version: 3.1.2
312
312
  signing_key:
313
313
  specification_version: 4
314
314
  summary: A set of tests to validate your HTML output. These tests check if your image