html-proofer 3.14.1 → 3.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e652993805b4e62434a27fd09f20936c71d1abfa2588e1ea991ed00d5174cfa4
4
- data.tar.gz: 1e4feee6f61dbadd8ed0d91e25950c0f643e3f29ee1cf881fa9bde6f37854901
3
+ metadata.gz: 722b9c6547474c7a4654e46a3c7af273eaa8a77b7538592ab0df4b42dd787ea2
4
+ data.tar.gz: 240979d4d2d62dbdd76ab14d868c8c0ecf420e10c93ae1cdc5c8182106fef698
5
5
  SHA512:
6
- metadata.gz: a6955c42ec89662b66b83422555e0d43c837f2d11f9e67bf85f587b2301d7c24f6f0275e16b31c904ee9da2999dde8583994d532f5be7a2b2f838dda2dab6e1b
7
- data.tar.gz: d261d16b8fcd86ceeac73eac6594b3c30a589015b5400ad785e879ce8b77ad7793fd85609063b6b57ffb08333ac07e1982a1b7cae449ab3e7cc5e0f1c36d6e5d
6
+ metadata.gz: 483c149a8b502277cb3d5d02f3aa4e009c27b1ef1a0c9f775b27410515a6654a013f434d018933c8991227d3826648902f0831613efddf3523e031a49dd586f8
7
+ data.tar.gz: 823bc37d94226a5dfed9d0db49ae6b8803ff74a719e8d3332b08d934bdaaa2c83374b41b5591fd6a714b6f8bca41a65bc6fa0ea099f6eadb5b17176ed5f61c56
@@ -20,13 +20,13 @@ Mercenary.program(:htmlproofer) do |p|
20
20
  p.option 'as_links', '--as-links', 'Assumes that `PATH` is a comma-separated array of links to check.'
21
21
  p.option 'alt_ignore', '--alt-ignore image1,[image2,...]', Array, 'A comma-separated list of Strings or RegExps containing `img`s whose missing `alt` tags are safe to ignore'
22
22
  p.option 'assume_extension', '--assume-extension', 'Automatically add extension (e.g. `.html`) to file paths, to allow extensionless URLs (as supported by Jekyll 3 and GitHub Pages) (default: `false`).'
23
- p.option 'checks_to_ignore', '--checks-to-ignore check1,[check2,...]', Array, ' An array of Strings indicating which checks you\'d like to not perform.'
23
+ p.option 'checks_to_ignore', '--checks-to-ignore check1,[check2,...]', Array, 'A comma-separated list of Strings indicating which checks you do not want to run (default: `[]`)'
24
24
  p.option 'check_external_hash', '--check-external-hash', 'Checks whether external hashes exist (even if the webpage exists). This slows the checker down (default: `false`).'
25
25
  p.option 'check_favicon', '--check-favicon', 'Enables the favicon checker (default: `false`).'
26
- p.option 'check_html', '--check-html', 'Enables HTML validation errors from Nokogiri (default: `false`).'
26
+ p.option 'check_html', '--check-html', 'Enables HTML validation errors from Nokogumbo (default: `false`).'
27
27
  p.option 'check_img_http', '--check-img-http', 'Fails an image if it\'s marked as `http` (default: `false`).'
28
28
  p.option 'check_opengraph', '--check-opengraph', 'Enables the Open Graph checker (default: `false`).'
29
- p.option 'check_sri', '--check-sri', 'Check that `<link>` and `<script>` external resources do use SRI (default: `false`).'
29
+ p.option 'check_sri', '--check-sri', 'Check that `<link>` and `<script>` external resources use SRI (default: `false`).'
30
30
  p.option 'directory_index_file', '--directory-index-file <filename>', String, 'Sets the file to look for when a link refers to a directory. (default: `index.html`)'
31
31
  p.option 'disable_external', '--disable-external', 'If `true`, does not run the external link checker, which can take a lot of time (default: `false`)'
32
32
  p.option 'empty_alt_ignore', '--empty-alt-ignore', 'If `true`, ignores images with empty alt tags'
@@ -37,9 +37,10 @@ Mercenary.program(:htmlproofer) do |p|
37
37
  p.option 'file_ignore', '--file-ignore file1,[file2,...]', Array, 'A comma-separated list of Strings or RegExps containing file paths that are safe to ignore'
38
38
  p.option 'http_status_ignore', '--http-status-ignore 123,[xxx, ...]', Array, 'A comma-separated list of numbers representing status codes to ignore.'
39
39
  p.option 'internal_domains', '--internal-domains domain1,[domain2,...]', Array, 'A comma-separated list of Strings containing domains that will be treated as internal urls.'
40
- p.option 'report_invalid_tags', '--report-invalid-tags', 'Ignore `check_html` errors associated with unknown markup (default: `false`)'
41
- p.option 'report_missing_names', '--report-missing-names', 'Ignore `check_html` errors associated with missing entities (default: `false`)'
42
- p.option 'report_script_embeds', '--report-script-embeds', 'Ignore `check_html` errors associated with `script`s (default: `false`)'
40
+ p.option 'report_invalid_tags', '--report-invalid-tags', 'When `check_html` is enabled, HTML markup that is unknown to Nokogumbo are reported as errors (default: `false`)'
41
+ p.option 'report_missing_names', '--report-missing-names', 'When `check_html` is enabled, HTML markup that are missing entity names are reported as errors (default: `false`)'
42
+ p.option 'report_script_embeds', '--report-script-embeds', 'When `check_html` is enabled, `script` tags containing markup are reported as errors (default: `false`)'
43
+ p.option 'report_missing_doctype', '--report-missing-doctype', 'When `check_html` is enabled, HTML markup with missing or out-of-order `DOCTYPE` are reported as errors (default: `false`)'
43
44
  p.option 'log_level', '--log-level <level>', String, 'Sets the logging level, as determined by Yell. One of `:debug`, `:info`, `:warn`, `:error`, or `:fatal`. (default: `:info`)'
44
45
  p.option 'only_4xx', '--only-4xx', 'Only reports errors for links that fall within the 4xx status code range'
45
46
  p.option 'storage_dir', '--storage-dir PATH', String, 'Directory where to store the cache log (default: "tmp/.htmlproofer")'
@@ -80,6 +81,7 @@ Mercenary.program(:htmlproofer) do |p|
80
81
  options[:validation][:report_script_embeds] = opts['report_script_embeds'] unless opts['report_script_embeds'].nil?
81
82
  options[:validation][:report_missing_names] = opts['report_missing_names'] unless opts['report_missing_names'].nil?
82
83
  options[:validation][:report_invalid_tags] = opts['report_invalid_tags'] unless opts['report_invalid_tags'].nil?
84
+ options[:validation][:report_missing_doctype] = opts['report_missing_doctype'] unless opts['report_missing_doctype'].nil?
83
85
 
84
86
  options[:typhoeus] = HTMLProofer::Configuration.parse_json_option('typhoeus_config', opts['typhoeus_config']) unless opts['typhoeus_config'].nil?
85
87
 
@@ -17,7 +17,7 @@ require 'fileutils'
17
17
  begin
18
18
  require 'awesome_print'
19
19
  require 'pry-byebug'
20
- rescue LoadError; end # rubocop:disable Lint/HandleExceptions
20
+ rescue LoadError; end # rubocop:disable Lint/SuppressedException
21
21
  module HTMLProofer
22
22
  def self.check_file(file, options = {})
23
23
  raise ArgumentError unless file.is_a?(String)
@@ -1,28 +1,31 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class HtmlCheck < ::HTMLProofer::Check
4
+ # tags embedded in scripts are used in templating languages: http://git.io/vOovv
4
5
  SCRIPT_EMBEDS_MSG = /Element script embeds close tag/.freeze
5
6
  INVALID_TAG_MSG = /Tag ([\w\-:]+) invalid/.freeze
6
7
  INVALID_PREFIX = /Namespace prefix/.freeze
7
8
  PARSE_ENTITY_REF = /htmlParseEntityRef: no name/.freeze
9
+ DOCTYPE_MSG = /The doctype must be the first token in the document/.freeze
8
10
 
9
11
  def run
10
12
  @html.errors.each do |error|
11
- message = error.message
12
- line = error.line
13
-
14
- if message =~ INVALID_TAG_MSG || message =~ INVALID_PREFIX
15
- next unless options[:validation][:report_invalid_tags]
16
- end
17
-
18
- if message =~ PARSE_ENTITY_REF
19
- next unless options[:validation][:report_missing_names]
20
- end
21
-
22
- # tags embedded in scripts are used in templating languages: http://git.io/vOovv
23
- next if !options[:validation][:report_script_embeds] && message =~ SCRIPT_EMBEDS_MSG
13
+ add_issue(error.message, line: error.line) if report?(error.message)
14
+ end
15
+ end
24
16
 
25
- add_issue(message, line: line)
17
+ def report?(message)
18
+ case message
19
+ when SCRIPT_EMBEDS_MSG
20
+ options[:validation][:report_script_embeds]
21
+ when INVALID_TAG_MSG, INVALID_PREFIX
22
+ options[:validation][:report_invalid_tags]
23
+ when PARSE_ENTITY_REF
24
+ options[:validation][:report_missing_names]
25
+ when DOCTYPE_MSG
26
+ options[:validation][:report_missing_doctype]
27
+ else
28
+ true
26
29
  end
27
30
  end
28
31
  end
@@ -34,7 +34,7 @@ class LinkCheck < ::HTMLProofer::Check
34
34
  if missing_href?
35
35
  next if @link.allow_missing_href?
36
36
  # HTML5 allows dropping the href: http://git.io/vBX0z
37
- next if @html.internal_subset.name == 'html' && @html.internal_subset.external_id.nil?
37
+ next if @html.internal_subset.nil? || (@html.internal_subset.name == 'html' && @html.internal_subset.external_id.nil?)
38
38
 
39
39
  add_issue('anchor has no href attribute', line: line, content: content)
40
40
  next
@@ -51,7 +51,8 @@ module HTMLProofer
51
51
  VALIDATION_DEFAULTS = {
52
52
  report_script_embeds: false,
53
53
  report_missing_names: false,
54
- report_invalid_tags: false
54
+ report_invalid_tags: false,
55
+ report_missing_doctype: false
55
56
  }.freeze
56
57
 
57
58
  CACHE_DEFAULTS = {}.freeze
@@ -69,7 +69,7 @@ module HTMLProofer
69
69
  'response',
70
70
  Middleware.options
71
71
  ).check_parsed(
72
- Nokogiri::HTML(clean_content(html)), 'response'
72
+ Nokogiri::HTML5(html, max_errors: -1), 'response'
73
73
  )
74
74
 
75
75
  raise InvalidHtmlError, parsed[:failures] unless parsed[:failures].empty?
@@ -146,7 +146,7 @@ module HTMLProofer
146
146
  href = response.request.base_url.to_s
147
147
  method = response.request.options[:method]
148
148
  response_code = response.code
149
- response.body.gsub!("\x00", '')
149
+ response.body.delete!("\x00")
150
150
 
151
151
  debug_msg = if filenames.nil?
152
152
  "Received a #{response_code} for #{href}"
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'nokogiri'
3
+ require 'nokogumbo'
4
4
 
5
5
  module HTMLProofer
6
6
  module Utils
@@ -15,7 +15,7 @@ module HTMLProofer
15
15
  path
16
16
  end
17
17
 
18
- Nokogiri::HTML(clean_content(content))
18
+ Nokogiri::HTML5(content)
19
19
  end
20
20
 
21
21
  def swap(href, replacement)
@@ -24,14 +24,5 @@ module HTMLProofer
24
24
  end
25
25
  href
26
26
  end
27
-
28
- # address a problem with Nokogiri's parsing URL entities
29
- # problem from http://git.io/vBYU1
30
- # solution from http://git.io/vBYUi
31
- def clean_content(string)
32
- string.gsub(%r{(?:https?:)?//([^>]+)}i) do |url|
33
- url.gsub(/&(?!amp;)/, '&amp;')
34
- end
35
- end
36
27
  end
37
28
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HTMLProofer
4
- VERSION = '3.14.1'
4
+ VERSION = '3.15.0'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html-proofer
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.14.1
4
+ version: 3.15.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Garen Torikian
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-11-17 00:00:00.000000000 Z
11
+ date: 2019-12-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: addressable
@@ -39,19 +39,19 @@ dependencies:
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0.3'
41
41
  - !ruby/object:Gem::Dependency
42
- name: nokogiri
42
+ name: nokogumbo
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '1.10'
47
+ version: '2.0'
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '1.10'
54
+ version: '2.0'
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: parallel
57
57
  requirement: !ruby/object:Gem::Requirement