html-proofer 4.3.2 → 4.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f917378eca9c4bf25268ef711b047db8762a6eda7dd613bc75b66520f67cee0e
4
- data.tar.gz: 909509bebddb27a6857775642123f0cc2f97d8bf3982ae262e6ba70497f255a4
3
+ metadata.gz: 4043366fa1c0d7aaa58fc0b577d25d2d0fba7e72afca2b43ae688e90c55dddef
4
+ data.tar.gz: a93621d2169757d3665e5950e8c99d23a1b64cc9014fbc806a986fcdcb906e26
5
5
  SHA512:
6
- metadata.gz: 84f7ea7cf3eb93b39c0882fa8694027dce632b6e47a8e9fb194dbb901c64b14e82d33ff036f31f7786ab782671e9861c8a991688af7a2cbf5e67c50c591f5d91
7
- data.tar.gz: feb87b5fc56c88b44fc0924e1e7f3e3a9d96a580920cd07080b2633bc75c3fb99543fdf449e49b5975c3292ff250c39f029118141cf60bd72b0cd4ba01d07723
6
+ metadata.gz: 1636df520fa7d4035ca482eeafd0b5e059b0fc77a0219f340e7c47b8f2105c8dd107bf03e0d78ca582f5d7abef9d5853edae498a2133b75eab6f92b7e77b636f
7
+ data.tar.gz: 5310df2250a5db3c8864b12ae01e49a0a2d043fc15f5bece45615cbad1d2b9100c899fa35902db5536b2be85908f9208ebfe65e31318cf6a6db47e945a745f17
@@ -20,12 +20,13 @@ module HTMLProofer
20
20
 
21
21
  swap_urls!
22
22
  clean_url!
23
-
24
- # convert "//" links to "https://"
25
- @url.start_with?("//") ? @url = "https:#{@url}" : @url
26
23
  end
27
24
  end
28
25
 
26
+ def protocol_relative?
27
+ url.start_with?("//")
28
+ end
29
+
29
30
  def to_s
30
31
  @url
31
32
  end
@@ -16,7 +16,10 @@ module HTMLProofer
16
16
  return if immediate_redirect?
17
17
 
18
18
  if found
19
- if @favicon.url.remote?
19
+ if @favicon.url.protocol_relative?
20
+ add_failure("favicon link #{@favicon.url} is a protocol-relative URL, use explicit https:// instead",
21
+ line: @favicon.line, content: @favicon.content)
22
+ elsif @favicon.url.remote?
20
23
  add_to_external_urls(@favicon.url, @favicon.line)
21
24
  elsif !@favicon.url.exists?
22
25
  add_failure("internal favicon #{@favicon.url.raw_attribute} does not exist", line: @favicon.line,
@@ -18,6 +18,9 @@ module HTMLProofer
18
18
  # does the image exist?
19
19
  if missing_src?
20
20
  add_failure("image has no src or srcset attribute", line: @img.line, content: @img.content)
21
+ elsif @img.url.protocol_relative?
22
+ add_failure("image link #{@img.url} is a protocol-relative URL, use explicit https:// instead",
23
+ line: @img.line, content: @img.content)
21
24
  elsif @img.url.remote?
22
25
  add_to_external_urls(@img.url, @img.line)
23
26
  elsif !@img.url.exists? && !@img.multiple_srcsets? && !@img.multiple_sizes?
@@ -27,7 +30,10 @@ module HTMLProofer
27
30
  @img.srcsets_wo_sizes.each do |srcset|
28
31
  srcset_url = HTMLProofer::Attribute::Url.new(@runner, srcset, base_url: @img.base_url, extract_size: true)
29
32
 
30
- if srcset_url.remote?
33
+ if srcset_url.protocol_relative?
34
+ add_failure("image link #{srcset_url.url} is a protocol-relative URL, use explicit https:// instead",
35
+ line: @img.line, content: @img.content)
36
+ elsif srcset_url.remote?
31
37
  add_to_external_urls(srcset_url.url, @img.line)
32
38
  elsif !srcset_url.exists?
33
39
  add_failure("internal image #{srcset} does not exist", line: @img.line, content: @img.content)
@@ -28,6 +28,12 @@ module HTMLProofer
28
28
  next
29
29
  end
30
30
 
31
+ if @link.url.protocol_relative?
32
+ add_failure("#{@link.url} is a protocol-relative URL, use explicit https:// instead",
33
+ line: @link.line, content: @link.content)
34
+ next
35
+ end
36
+
31
37
  check_schemes
32
38
 
33
39
  # intentionally down here because we still want valid? & missing_href? to execute
@@ -16,6 +16,9 @@ module HTMLProofer
16
16
  add_failure("open graph content attribute is empty", line: @open_graph.line, content: @open_graph.content)
17
17
  elsif !@open_graph.url.valid?
18
18
  add_failure("#{@open_graph.src} is an invalid URL", line: @open_graph.line)
19
+ elsif @open_graph.url.protocol_relative?
20
+ add_failure("open graph link #{@open_graph.url} is a protocol-relative URL, use explicit https:// instead",
21
+ line: @open_graph.line, content: @open_graph.content)
19
22
  elsif @open_graph.url.remote?
20
23
  add_to_external_urls(@open_graph.url, @open_graph.line)
21
24
  else
@@ -13,8 +13,11 @@ module HTMLProofer
13
13
  # does the script exist?
14
14
  if missing_src?
15
15
  add_failure("script is empty and has no src attribute", line: @script.line, content: @script.content)
16
+ elsif @script.url.protocol_relative?
17
+ add_failure("script link #{@script.url} is a protocol-relative URL, use explicit https:// instead",
18
+ line: @script.line, content: @script.content)
16
19
  elsif @script.url.remote?
17
- add_to_external_urls(@script.src, @script.line)
20
+ add_to_external_urls(@script.url, @script.line)
18
21
  check_sri if @runner.check_sri?
19
22
  elsif !@script.url.exists?
20
23
  add_failure("internal script reference #{@script.src} does not exist", line: @script.line,
@@ -29,28 +29,10 @@ module HTMLProofer
29
29
  content: content)
30
30
  end
31
31
 
32
- def self.subchecks(runner_options)
33
- # grab all known checks
34
- checks = ObjectSpace.each_object(Class).select do |klass|
35
- klass < self
36
- end
37
-
38
- # remove any checks not explicitly included
39
- checks.each_with_object([]) do |check, arr|
40
- next unless runner_options[:checks].include?(check.short_name)
41
-
42
- arr << check
43
- end
44
- end
45
-
46
32
  def short_name
47
33
  self.class.name.split("::").last
48
34
  end
49
35
 
50
- def self.short_name
51
- name.split("::").last
52
- end
53
-
54
36
  def add_to_internal_urls(url, line)
55
37
  url_string = url.raw_attribute
56
38
 
@@ -74,6 +56,26 @@ module HTMLProofer
74
56
  @external_urls[url_string] << { filename: @runner.current_filename, line: line }
75
57
  end
76
58
 
59
+ class << self
60
+ def subchecks(runner_options)
61
+ # grab all known checks
62
+ checks = ObjectSpace.each_object(Class).select do |klass|
63
+ klass < self
64
+ end
65
+
66
+ # remove any checks not explicitly included
67
+ checks.each_with_object([]) do |check, arr|
68
+ next unless runner_options[:checks].include?(check.short_name)
69
+
70
+ arr << check
71
+ end
72
+ end
73
+
74
+ def short_name
75
+ name.split("::").last
76
+ end
77
+ end
78
+
77
79
  private def base_url
78
80
  return @base_url if defined?(@base_url)
79
81
 
@@ -47,42 +47,44 @@ module HTMLProofer
47
47
 
48
48
  CACHE_DEFAULTS = {}.freeze
49
49
 
50
- def self.generate_defaults(opts)
51
- options = PROOFER_DEFAULTS.merge(opts)
50
+ class << self
51
+ def generate_defaults(opts)
52
+ options = PROOFER_DEFAULTS.merge(opts)
52
53
 
53
- options[:typhoeus] = HTMLProofer::Configuration::TYPHOEUS_DEFAULTS.merge(opts[:typhoeus] || {})
54
- options[:hydra] = HTMLProofer::Configuration::HYDRA_DEFAULTS.merge(opts[:hydra] || {})
54
+ options[:typhoeus] = HTMLProofer::Configuration::TYPHOEUS_DEFAULTS.merge(opts[:typhoeus] || {})
55
+ options[:hydra] = HTMLProofer::Configuration::HYDRA_DEFAULTS.merge(opts[:hydra] || {})
55
56
 
56
- options[:parallel] = HTMLProofer::Configuration::PARALLEL_DEFAULTS.merge(opts[:parallel] || {})
57
- options[:cache] = HTMLProofer::Configuration::CACHE_DEFAULTS.merge(opts[:cache] || {})
57
+ options[:parallel] = HTMLProofer::Configuration::PARALLEL_DEFAULTS.merge(opts[:parallel] || {})
58
+ options[:cache] = HTMLProofer::Configuration::CACHE_DEFAULTS.merge(opts[:cache] || {})
58
59
 
59
- options.delete(:src)
60
+ options.delete(:src)
60
61
 
61
- options
62
- end
62
+ options
63
+ end
63
64
 
64
- def self.to_regex?(item)
65
- if item.start_with?("/") && item.end_with?("/")
66
- Regexp.new(item[1...-1])
67
- else
68
- item
65
+ def to_regex?(item)
66
+ if item.start_with?("/") && item.end_with?("/")
67
+ Regexp.new(item[1...-1])
68
+ else
69
+ item
70
+ end
69
71
  end
70
- end
71
72
 
72
- def self.parse_json_option(option_name, config, symbolize_names: true)
73
- raise ArgumentError, "Must provide an option name in string format." unless option_name.is_a?(String)
74
- raise ArgumentError, "Must provide an option name in string format." if option_name.strip.empty?
73
+ def parse_json_option(option_name, config, symbolize_names: true)
74
+ raise ArgumentError, "Must provide an option name in string format." unless option_name.is_a?(String)
75
+ raise ArgumentError, "Must provide an option name in string format." if option_name.strip.empty?
75
76
 
76
- return {} if config.nil?
77
+ return {} if config.nil?
77
78
 
78
- raise ArgumentError, "Must provide a JSON configuration in string format." unless config.is_a?(String)
79
+ raise ArgumentError, "Must provide a JSON configuration in string format." unless config.is_a?(String)
79
80
 
80
- return {} if config.strip.empty?
81
+ return {} if config.strip.empty?
81
82
 
82
- begin
83
- JSON.parse(config, { symbolize_names: symbolize_names })
84
- rescue StandardError
85
- raise ArgumentError, "Option '#{option_name} did not contain valid JSON."
83
+ begin
84
+ JSON.parse(config, { symbolize_names: symbolize_names })
85
+ rescue StandardError
86
+ raise ArgumentError, "Option '#{option_name} did not contain valid JSON."
87
+ end
86
88
  end
87
89
  end
88
90
  end
@@ -22,22 +22,39 @@ module HTMLProofer
22
22
  end
23
23
 
24
24
  def run_internal_link_checker(links)
25
+ # collect urls and metadata for hashes to be checked in the same target file
26
+ file_paths_hashes_to_check = {}
25
27
  to_add = []
26
- links.each_pair do |link, matched_files|
28
+ links.each_with_index do |(link, matched_files), i|
29
+ matched_count_to_log = pluralize(matched_files.count, "reference", "references")
30
+ @logger.log(:debug, "(#{i + 1} / #{links.count}) Internal link #{link}: Checking #{matched_count_to_log}")
27
31
  matched_files.each do |metadata|
28
32
  url = HTMLProofer::Attribute::Url.new(@runner, link, base_url: metadata[:base_url])
29
33
 
30
34
  @runner.current_source = metadata[:source]
31
35
  @runner.current_filename = metadata[:filename]
32
36
 
33
- unless file_exists?(url)
37
+ target_file_path = url.absolute_path
38
+ unless file_exists?(target_file_path)
34
39
  @failed_checks << Failure.new(@runner.current_filename, "Links > Internal",
35
40
  "internally linking to #{url}, which does not exist", line: metadata[:line], status: nil, content: nil)
36
41
  to_add << [url, metadata, false]
37
42
  next
38
43
  end
39
44
 
40
- unless hash_exists?(url)
45
+ hash_exists = hash_exists_for_url?(url)
46
+ if hash_exists.nil?
47
+ # the hash needs to be checked in the target file, we collect the url and metadata
48
+ unless file_paths_hashes_to_check.key?(target_file_path)
49
+ file_paths_hashes_to_check[target_file_path] = {}
50
+ end
51
+ unless file_paths_hashes_to_check[target_file_path].key?(url.hash)
52
+ file_paths_hashes_to_check[target_file_path][url.hash] = []
53
+ end
54
+ file_paths_hashes_to_check[target_file_path][url.hash] << [url, metadata]
55
+ next
56
+ end
57
+ unless hash_exists
41
58
  @failed_checks << Failure.new(@runner.current_filename, "Links > Internal",
42
59
  "internally linking to #{url}; the file exists, but the hash '#{url.hash}' does not", line: metadata[:line], status: nil, content: nil)
43
60
  to_add << [url, metadata, false]
@@ -48,6 +65,24 @@ module HTMLProofer
48
65
  end
49
66
  end
50
67
 
68
+ # check hashes by target file
69
+ @logger.log(:info, "Checking internal link hashes in #{pluralize(file_paths_hashes_to_check.count, "file", "files")}")
70
+ file_paths_hashes_to_check.each_with_index do |(file_path, hashes_to_check), i|
71
+ hash_count_to_log = pluralize(hashes_to_check.count, "hash", "hashes")
72
+ @logger.log(:debug, "(#{i + 1} / #{file_paths_hashes_to_check.count}) Checking #{hash_count_to_log} in #{file_path}")
73
+ html = create_nokogiri(file_path)
74
+ hashes_to_check.each_pair do |href_hash, url_metadata|
75
+ exists = hash_exists_in_html?(href_hash, html)
76
+ url_metadata.each do |(url, metadata)|
77
+ unless exists
78
+ @failed_checks << Failure.new(metadata[:filename], "Links > Internal",
79
+ "internally linking to #{url}; the file exists, but the hash '#{href_hash}' does not", line: metadata[:line], status: nil, content: nil)
80
+ end
81
+ to_add << [url, metadata, exists]
82
+ end
83
+ end
84
+ end
85
+
51
86
  # adding directly to the cache above results in an endless loop
52
87
  to_add.each do |(url, metadata, exists)|
53
88
  @cache.add_internal(url.to_s, metadata, exists)
@@ -56,15 +91,15 @@ module HTMLProofer
56
91
  @failed_checks
57
92
  end
58
93
 
59
- private def file_exists?(url)
60
- absolute_path = url.absolute_path
61
- return @runner.checked_paths[url.absolute_path] if @runner.checked_paths.key?(absolute_path)
94
+ private def file_exists?(absolute_path)
95
+ return @runner.checked_paths[absolute_path] if @runner.checked_paths.key?(absolute_path)
62
96
 
63
- @runner.checked_paths[url.absolute_path] = File.exist?(absolute_path)
97
+ @runner.checked_paths[absolute_path] = File.exist?(absolute_path)
64
98
  end
65
99
 
66
- # verify the target hash
67
- private def hash_exists?(url)
100
+ # verify the hash w/o just based on the URL, w/o looking at the target file
101
+ # => returns nil if the has could not be verified
102
+ private def hash_exists_for_url?(url)
68
103
  href_hash = url.hash
69
104
  return true if blank?(href_hash)
70
105
  return true unless @runner.options[:check_internal_hash]
@@ -76,10 +111,18 @@ module HTMLProofer
76
111
  decoded_href_hash = Addressable::URI.unescape(href_hash)
77
112
  fragment_ids = [href_hash, decoded_href_hash]
78
113
  # https://www.w3.org/TR/html5/single-page.html#scroll-to-fragid
79
- fragment_ids.include?("top") || !find_fragments(fragment_ids, url).empty?
114
+ return true if fragment_ids.include?("top")
115
+
116
+ nil
117
+ end
118
+
119
+ private def hash_exists_in_html?(href_hash, html)
120
+ decoded_href_hash = Addressable::URI.unescape(href_hash)
121
+ fragment_ids = [href_hash, decoded_href_hash]
122
+ !find_fragments(fragment_ids, html).empty?
80
123
  end
81
124
 
82
- private def find_fragments(fragment_ids, url)
125
+ private def find_fragments(fragment_ids, html)
83
126
  xpaths = fragment_ids.uniq.flat_map do |frag_id|
84
127
  escaped_frag_id = "'#{frag_id.split("'").join("', \"'\", '")}', ''"
85
128
  [
@@ -89,7 +132,6 @@ module HTMLProofer
89
132
  end
90
133
  xpaths << XpathFunctions.new
91
134
 
92
- html = create_nokogiri(url.absolute_path)
93
135
  html.xpath(*xpaths)
94
136
  end
95
137
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HTMLProofer
4
- VERSION = "4.3.2"
4
+ VERSION = "4.4.1"
5
5
  end
data/lib/html_proofer.rb CHANGED
@@ -4,7 +4,7 @@ require "zeitwerk"
4
4
  lib_dir = File.join(File.dirname(__dir__), "lib")
5
5
  gem_loader = Zeitwerk::Loader.for_gem
6
6
  gem_loader.inflector.inflect(
7
- "html_proofer" => "HTMLProofer"
7
+ "html_proofer" => "HTMLProofer",
8
8
  )
9
9
  gem_loader.ignore(File.join(lib_dir, "html-proofer.rb"))
10
10
  gem_loader.setup
@@ -20,37 +20,39 @@ if ENV.fetch("DEBUG", false)
20
20
  end
21
21
 
22
22
  module HTMLProofer
23
- def self.check_file(file, options = {})
24
- raise ArgumentError unless file.is_a?(String)
25
- raise ArgumentError, "#{file} does not exist" unless File.exist?(file)
23
+ class << self
24
+ def check_file(file, options = {})
25
+ raise ArgumentError unless file.is_a?(String)
26
+ raise ArgumentError, "#{file} does not exist" unless File.exist?(file)
26
27
 
27
- options[:type] = :file
28
- HTMLProofer::Runner.new(file, options)
29
- end
28
+ options[:type] = :file
29
+ HTMLProofer::Runner.new(file, options)
30
+ end
30
31
 
31
- def self.check_directory(directory, options = {})
32
- raise ArgumentError unless directory.is_a?(String)
33
- raise ArgumentError, "#{directory} does not exist" unless Dir.exist?(directory)
32
+ def check_directory(directory, options = {})
33
+ raise ArgumentError unless directory.is_a?(String)
34
+ raise ArgumentError, "#{directory} does not exist" unless Dir.exist?(directory)
34
35
 
35
- options[:type] = :directory
36
- HTMLProofer::Runner.new([directory], options)
37
- end
36
+ options[:type] = :directory
37
+ HTMLProofer::Runner.new([directory], options)
38
+ end
38
39
 
39
- def self.check_directories(directories, options = {})
40
- raise ArgumentError unless directories.is_a?(Array)
40
+ def check_directories(directories, options = {})
41
+ raise ArgumentError unless directories.is_a?(Array)
41
42
 
42
- options[:type] = :directory
43
- directories.each do |directory|
44
- raise ArgumentError, "#{directory} does not exist" unless Dir.exist?(directory)
43
+ options[:type] = :directory
44
+ directories.each do |directory|
45
+ raise ArgumentError, "#{directory} does not exist" unless Dir.exist?(directory)
46
+ end
47
+ HTMLProofer::Runner.new(directories, options)
45
48
  end
46
- HTMLProofer::Runner.new(directories, options)
47
- end
48
49
 
49
- def self.check_links(links, options = {})
50
- raise ArgumentError unless links.is_a?(Array)
50
+ def check_links(links, options = {})
51
+ raise ArgumentError unless links.is_a?(Array)
51
52
 
52
- options[:type] = :links
53
- HTMLProofer::Runner.new(links, options)
53
+ options[:type] = :links
54
+ HTMLProofer::Runner.new(links, options)
55
+ end
54
56
  end
55
57
  end
56
58
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html-proofer
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.3.2
4
+ version: 4.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Garen Torikian
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-08-03 00:00:00.000000000 Z
11
+ date: 2022-09-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: addressable