html-proofer 4.3.2 → 4.4.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f917378eca9c4bf25268ef711b047db8762a6eda7dd613bc75b66520f67cee0e
4
- data.tar.gz: 909509bebddb27a6857775642123f0cc2f97d8bf3982ae262e6ba70497f255a4
3
+ metadata.gz: 4043366fa1c0d7aaa58fc0b577d25d2d0fba7e72afca2b43ae688e90c55dddef
4
+ data.tar.gz: a93621d2169757d3665e5950e8c99d23a1b64cc9014fbc806a986fcdcb906e26
5
5
  SHA512:
6
- metadata.gz: 84f7ea7cf3eb93b39c0882fa8694027dce632b6e47a8e9fb194dbb901c64b14e82d33ff036f31f7786ab782671e9861c8a991688af7a2cbf5e67c50c591f5d91
7
- data.tar.gz: feb87b5fc56c88b44fc0924e1e7f3e3a9d96a580920cd07080b2633bc75c3fb99543fdf449e49b5975c3292ff250c39f029118141cf60bd72b0cd4ba01d07723
6
+ metadata.gz: 1636df520fa7d4035ca482eeafd0b5e059b0fc77a0219f340e7c47b8f2105c8dd107bf03e0d78ca582f5d7abef9d5853edae498a2133b75eab6f92b7e77b636f
7
+ data.tar.gz: 5310df2250a5db3c8864b12ae01e49a0a2d043fc15f5bece45615cbad1d2b9100c899fa35902db5536b2be85908f9208ebfe65e31318cf6a6db47e945a745f17
@@ -20,12 +20,13 @@ module HTMLProofer
20
20
 
21
21
  swap_urls!
22
22
  clean_url!
23
-
24
- # convert "//" links to "https://"
25
- @url.start_with?("//") ? @url = "https:#{@url}" : @url
26
23
  end
27
24
  end
28
25
 
26
+ def protocol_relative?
27
+ url.start_with?("//")
28
+ end
29
+
29
30
  def to_s
30
31
  @url
31
32
  end
@@ -16,7 +16,10 @@ module HTMLProofer
16
16
  return if immediate_redirect?
17
17
 
18
18
  if found
19
- if @favicon.url.remote?
19
+ if @favicon.url.protocol_relative?
20
+ add_failure("favicon link #{@favicon.url} is a protocol-relative URL, use explicit https:// instead",
21
+ line: @favicon.line, content: @favicon.content)
22
+ elsif @favicon.url.remote?
20
23
  add_to_external_urls(@favicon.url, @favicon.line)
21
24
  elsif !@favicon.url.exists?
22
25
  add_failure("internal favicon #{@favicon.url.raw_attribute} does not exist", line: @favicon.line,
@@ -18,6 +18,9 @@ module HTMLProofer
18
18
  # does the image exist?
19
19
  if missing_src?
20
20
  add_failure("image has no src or srcset attribute", line: @img.line, content: @img.content)
21
+ elsif @img.url.protocol_relative?
22
+ add_failure("image link #{@img.url} is a protocol-relative URL, use explicit https:// instead",
23
+ line: @img.line, content: @img.content)
21
24
  elsif @img.url.remote?
22
25
  add_to_external_urls(@img.url, @img.line)
23
26
  elsif !@img.url.exists? && !@img.multiple_srcsets? && !@img.multiple_sizes?
@@ -27,7 +30,10 @@ module HTMLProofer
27
30
  @img.srcsets_wo_sizes.each do |srcset|
28
31
  srcset_url = HTMLProofer::Attribute::Url.new(@runner, srcset, base_url: @img.base_url, extract_size: true)
29
32
 
30
- if srcset_url.remote?
33
+ if srcset_url.protocol_relative?
34
+ add_failure("image link #{srcset_url.url} is a protocol-relative URL, use explicit https:// instead",
35
+ line: @img.line, content: @img.content)
36
+ elsif srcset_url.remote?
31
37
  add_to_external_urls(srcset_url.url, @img.line)
32
38
  elsif !srcset_url.exists?
33
39
  add_failure("internal image #{srcset} does not exist", line: @img.line, content: @img.content)
@@ -28,6 +28,12 @@ module HTMLProofer
28
28
  next
29
29
  end
30
30
 
31
+ if @link.url.protocol_relative?
32
+ add_failure("#{@link.url} is a protocol-relative URL, use explicit https:// instead",
33
+ line: @link.line, content: @link.content)
34
+ next
35
+ end
36
+
31
37
  check_schemes
32
38
 
33
39
  # intentionally down here because we still want valid? & missing_href? to execute
@@ -16,6 +16,9 @@ module HTMLProofer
16
16
  add_failure("open graph content attribute is empty", line: @open_graph.line, content: @open_graph.content)
17
17
  elsif !@open_graph.url.valid?
18
18
  add_failure("#{@open_graph.src} is an invalid URL", line: @open_graph.line)
19
+ elsif @open_graph.url.protocol_relative?
20
+ add_failure("open graph link #{@open_graph.url} is a protocol-relative URL, use explicit https:// instead",
21
+ line: @open_graph.line, content: @open_graph.content)
19
22
  elsif @open_graph.url.remote?
20
23
  add_to_external_urls(@open_graph.url, @open_graph.line)
21
24
  else
@@ -13,8 +13,11 @@ module HTMLProofer
13
13
  # does the script exist?
14
14
  if missing_src?
15
15
  add_failure("script is empty and has no src attribute", line: @script.line, content: @script.content)
16
+ elsif @script.url.protocol_relative?
17
+ add_failure("script link #{@script.url} is a protocol-relative URL, use explicit https:// instead",
18
+ line: @script.line, content: @script.content)
16
19
  elsif @script.url.remote?
17
- add_to_external_urls(@script.src, @script.line)
20
+ add_to_external_urls(@script.url, @script.line)
18
21
  check_sri if @runner.check_sri?
19
22
  elsif !@script.url.exists?
20
23
  add_failure("internal script reference #{@script.src} does not exist", line: @script.line,
@@ -29,28 +29,10 @@ module HTMLProofer
29
29
  content: content)
30
30
  end
31
31
 
32
- def self.subchecks(runner_options)
33
- # grab all known checks
34
- checks = ObjectSpace.each_object(Class).select do |klass|
35
- klass < self
36
- end
37
-
38
- # remove any checks not explicitly included
39
- checks.each_with_object([]) do |check, arr|
40
- next unless runner_options[:checks].include?(check.short_name)
41
-
42
- arr << check
43
- end
44
- end
45
-
46
32
  def short_name
47
33
  self.class.name.split("::").last
48
34
  end
49
35
 
50
- def self.short_name
51
- name.split("::").last
52
- end
53
-
54
36
  def add_to_internal_urls(url, line)
55
37
  url_string = url.raw_attribute
56
38
 
@@ -74,6 +56,26 @@ module HTMLProofer
74
56
  @external_urls[url_string] << { filename: @runner.current_filename, line: line }
75
57
  end
76
58
 
59
+ class << self
60
+ def subchecks(runner_options)
61
+ # grab all known checks
62
+ checks = ObjectSpace.each_object(Class).select do |klass|
63
+ klass < self
64
+ end
65
+
66
+ # remove any checks not explicitly included
67
+ checks.each_with_object([]) do |check, arr|
68
+ next unless runner_options[:checks].include?(check.short_name)
69
+
70
+ arr << check
71
+ end
72
+ end
73
+
74
+ def short_name
75
+ name.split("::").last
76
+ end
77
+ end
78
+
77
79
  private def base_url
78
80
  return @base_url if defined?(@base_url)
79
81
 
@@ -47,42 +47,44 @@ module HTMLProofer
47
47
 
48
48
  CACHE_DEFAULTS = {}.freeze
49
49
 
50
- def self.generate_defaults(opts)
51
- options = PROOFER_DEFAULTS.merge(opts)
50
+ class << self
51
+ def generate_defaults(opts)
52
+ options = PROOFER_DEFAULTS.merge(opts)
52
53
 
53
- options[:typhoeus] = HTMLProofer::Configuration::TYPHOEUS_DEFAULTS.merge(opts[:typhoeus] || {})
54
- options[:hydra] = HTMLProofer::Configuration::HYDRA_DEFAULTS.merge(opts[:hydra] || {})
54
+ options[:typhoeus] = HTMLProofer::Configuration::TYPHOEUS_DEFAULTS.merge(opts[:typhoeus] || {})
55
+ options[:hydra] = HTMLProofer::Configuration::HYDRA_DEFAULTS.merge(opts[:hydra] || {})
55
56
 
56
- options[:parallel] = HTMLProofer::Configuration::PARALLEL_DEFAULTS.merge(opts[:parallel] || {})
57
- options[:cache] = HTMLProofer::Configuration::CACHE_DEFAULTS.merge(opts[:cache] || {})
57
+ options[:parallel] = HTMLProofer::Configuration::PARALLEL_DEFAULTS.merge(opts[:parallel] || {})
58
+ options[:cache] = HTMLProofer::Configuration::CACHE_DEFAULTS.merge(opts[:cache] || {})
58
59
 
59
- options.delete(:src)
60
+ options.delete(:src)
60
61
 
61
- options
62
- end
62
+ options
63
+ end
63
64
 
64
- def self.to_regex?(item)
65
- if item.start_with?("/") && item.end_with?("/")
66
- Regexp.new(item[1...-1])
67
- else
68
- item
65
+ def to_regex?(item)
66
+ if item.start_with?("/") && item.end_with?("/")
67
+ Regexp.new(item[1...-1])
68
+ else
69
+ item
70
+ end
69
71
  end
70
- end
71
72
 
72
- def self.parse_json_option(option_name, config, symbolize_names: true)
73
- raise ArgumentError, "Must provide an option name in string format." unless option_name.is_a?(String)
74
- raise ArgumentError, "Must provide an option name in string format." if option_name.strip.empty?
73
+ def parse_json_option(option_name, config, symbolize_names: true)
74
+ raise ArgumentError, "Must provide an option name in string format." unless option_name.is_a?(String)
75
+ raise ArgumentError, "Must provide an option name in string format." if option_name.strip.empty?
75
76
 
76
- return {} if config.nil?
77
+ return {} if config.nil?
77
78
 
78
- raise ArgumentError, "Must provide a JSON configuration in string format." unless config.is_a?(String)
79
+ raise ArgumentError, "Must provide a JSON configuration in string format." unless config.is_a?(String)
79
80
 
80
- return {} if config.strip.empty?
81
+ return {} if config.strip.empty?
81
82
 
82
- begin
83
- JSON.parse(config, { symbolize_names: symbolize_names })
84
- rescue StandardError
85
- raise ArgumentError, "Option '#{option_name} did not contain valid JSON."
83
+ begin
84
+ JSON.parse(config, { symbolize_names: symbolize_names })
85
+ rescue StandardError
86
+ raise ArgumentError, "Option '#{option_name} did not contain valid JSON."
87
+ end
86
88
  end
87
89
  end
88
90
  end
@@ -22,22 +22,39 @@ module HTMLProofer
22
22
  end
23
23
 
24
24
  def run_internal_link_checker(links)
25
+ # collect urls and metadata for hashes to be checked in the same target file
26
+ file_paths_hashes_to_check = {}
25
27
  to_add = []
26
- links.each_pair do |link, matched_files|
28
+ links.each_with_index do |(link, matched_files), i|
29
+ matched_count_to_log = pluralize(matched_files.count, "reference", "references")
30
+ @logger.log(:debug, "(#{i + 1} / #{links.count}) Internal link #{link}: Checking #{matched_count_to_log}")
27
31
  matched_files.each do |metadata|
28
32
  url = HTMLProofer::Attribute::Url.new(@runner, link, base_url: metadata[:base_url])
29
33
 
30
34
  @runner.current_source = metadata[:source]
31
35
  @runner.current_filename = metadata[:filename]
32
36
 
33
- unless file_exists?(url)
37
+ target_file_path = url.absolute_path
38
+ unless file_exists?(target_file_path)
34
39
  @failed_checks << Failure.new(@runner.current_filename, "Links > Internal",
35
40
  "internally linking to #{url}, which does not exist", line: metadata[:line], status: nil, content: nil)
36
41
  to_add << [url, metadata, false]
37
42
  next
38
43
  end
39
44
 
40
- unless hash_exists?(url)
45
+ hash_exists = hash_exists_for_url?(url)
46
+ if hash_exists.nil?
47
+ # the hash needs to be checked in the target file, we collect the url and metadata
48
+ unless file_paths_hashes_to_check.key?(target_file_path)
49
+ file_paths_hashes_to_check[target_file_path] = {}
50
+ end
51
+ unless file_paths_hashes_to_check[target_file_path].key?(url.hash)
52
+ file_paths_hashes_to_check[target_file_path][url.hash] = []
53
+ end
54
+ file_paths_hashes_to_check[target_file_path][url.hash] << [url, metadata]
55
+ next
56
+ end
57
+ unless hash_exists
41
58
  @failed_checks << Failure.new(@runner.current_filename, "Links > Internal",
42
59
  "internally linking to #{url}; the file exists, but the hash '#{url.hash}' does not", line: metadata[:line], status: nil, content: nil)
43
60
  to_add << [url, metadata, false]
@@ -48,6 +65,24 @@ module HTMLProofer
48
65
  end
49
66
  end
50
67
 
68
+ # check hashes by target file
69
+ @logger.log(:info, "Checking internal link hashes in #{pluralize(file_paths_hashes_to_check.count, "file", "files")}")
70
+ file_paths_hashes_to_check.each_with_index do |(file_path, hashes_to_check), i|
71
+ hash_count_to_log = pluralize(hashes_to_check.count, "hash", "hashes")
72
+ @logger.log(:debug, "(#{i + 1} / #{file_paths_hashes_to_check.count}) Checking #{hash_count_to_log} in #{file_path}")
73
+ html = create_nokogiri(file_path)
74
+ hashes_to_check.each_pair do |href_hash, url_metadata|
75
+ exists = hash_exists_in_html?(href_hash, html)
76
+ url_metadata.each do |(url, metadata)|
77
+ unless exists
78
+ @failed_checks << Failure.new(metadata[:filename], "Links > Internal",
79
+ "internally linking to #{url}; the file exists, but the hash '#{href_hash}' does not", line: metadata[:line], status: nil, content: nil)
80
+ end
81
+ to_add << [url, metadata, exists]
82
+ end
83
+ end
84
+ end
85
+
51
86
  # adding directly to the cache above results in an endless loop
52
87
  to_add.each do |(url, metadata, exists)|
53
88
  @cache.add_internal(url.to_s, metadata, exists)
@@ -56,15 +91,15 @@ module HTMLProofer
56
91
  @failed_checks
57
92
  end
58
93
 
59
- private def file_exists?(url)
60
- absolute_path = url.absolute_path
61
- return @runner.checked_paths[url.absolute_path] if @runner.checked_paths.key?(absolute_path)
94
+ private def file_exists?(absolute_path)
95
+ return @runner.checked_paths[absolute_path] if @runner.checked_paths.key?(absolute_path)
62
96
 
63
- @runner.checked_paths[url.absolute_path] = File.exist?(absolute_path)
97
+ @runner.checked_paths[absolute_path] = File.exist?(absolute_path)
64
98
  end
65
99
 
66
- # verify the target hash
67
- private def hash_exists?(url)
100
+ # verify the hash w/o just based on the URL, w/o looking at the target file
101
+ # => returns nil if the has could not be verified
102
+ private def hash_exists_for_url?(url)
68
103
  href_hash = url.hash
69
104
  return true if blank?(href_hash)
70
105
  return true unless @runner.options[:check_internal_hash]
@@ -76,10 +111,18 @@ module HTMLProofer
76
111
  decoded_href_hash = Addressable::URI.unescape(href_hash)
77
112
  fragment_ids = [href_hash, decoded_href_hash]
78
113
  # https://www.w3.org/TR/html5/single-page.html#scroll-to-fragid
79
- fragment_ids.include?("top") || !find_fragments(fragment_ids, url).empty?
114
+ return true if fragment_ids.include?("top")
115
+
116
+ nil
117
+ end
118
+
119
+ private def hash_exists_in_html?(href_hash, html)
120
+ decoded_href_hash = Addressable::URI.unescape(href_hash)
121
+ fragment_ids = [href_hash, decoded_href_hash]
122
+ !find_fragments(fragment_ids, html).empty?
80
123
  end
81
124
 
82
- private def find_fragments(fragment_ids, url)
125
+ private def find_fragments(fragment_ids, html)
83
126
  xpaths = fragment_ids.uniq.flat_map do |frag_id|
84
127
  escaped_frag_id = "'#{frag_id.split("'").join("', \"'\", '")}', ''"
85
128
  [
@@ -89,7 +132,6 @@ module HTMLProofer
89
132
  end
90
133
  xpaths << XpathFunctions.new
91
134
 
92
- html = create_nokogiri(url.absolute_path)
93
135
  html.xpath(*xpaths)
94
136
  end
95
137
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HTMLProofer
4
- VERSION = "4.3.2"
4
+ VERSION = "4.4.1"
5
5
  end
data/lib/html_proofer.rb CHANGED
@@ -4,7 +4,7 @@ require "zeitwerk"
4
4
  lib_dir = File.join(File.dirname(__dir__), "lib")
5
5
  gem_loader = Zeitwerk::Loader.for_gem
6
6
  gem_loader.inflector.inflect(
7
- "html_proofer" => "HTMLProofer"
7
+ "html_proofer" => "HTMLProofer",
8
8
  )
9
9
  gem_loader.ignore(File.join(lib_dir, "html-proofer.rb"))
10
10
  gem_loader.setup
@@ -20,37 +20,39 @@ if ENV.fetch("DEBUG", false)
20
20
  end
21
21
 
22
22
  module HTMLProofer
23
- def self.check_file(file, options = {})
24
- raise ArgumentError unless file.is_a?(String)
25
- raise ArgumentError, "#{file} does not exist" unless File.exist?(file)
23
+ class << self
24
+ def check_file(file, options = {})
25
+ raise ArgumentError unless file.is_a?(String)
26
+ raise ArgumentError, "#{file} does not exist" unless File.exist?(file)
26
27
 
27
- options[:type] = :file
28
- HTMLProofer::Runner.new(file, options)
29
- end
28
+ options[:type] = :file
29
+ HTMLProofer::Runner.new(file, options)
30
+ end
30
31
 
31
- def self.check_directory(directory, options = {})
32
- raise ArgumentError unless directory.is_a?(String)
33
- raise ArgumentError, "#{directory} does not exist" unless Dir.exist?(directory)
32
+ def check_directory(directory, options = {})
33
+ raise ArgumentError unless directory.is_a?(String)
34
+ raise ArgumentError, "#{directory} does not exist" unless Dir.exist?(directory)
34
35
 
35
- options[:type] = :directory
36
- HTMLProofer::Runner.new([directory], options)
37
- end
36
+ options[:type] = :directory
37
+ HTMLProofer::Runner.new([directory], options)
38
+ end
38
39
 
39
- def self.check_directories(directories, options = {})
40
- raise ArgumentError unless directories.is_a?(Array)
40
+ def check_directories(directories, options = {})
41
+ raise ArgumentError unless directories.is_a?(Array)
41
42
 
42
- options[:type] = :directory
43
- directories.each do |directory|
44
- raise ArgumentError, "#{directory} does not exist" unless Dir.exist?(directory)
43
+ options[:type] = :directory
44
+ directories.each do |directory|
45
+ raise ArgumentError, "#{directory} does not exist" unless Dir.exist?(directory)
46
+ end
47
+ HTMLProofer::Runner.new(directories, options)
45
48
  end
46
- HTMLProofer::Runner.new(directories, options)
47
- end
48
49
 
49
- def self.check_links(links, options = {})
50
- raise ArgumentError unless links.is_a?(Array)
50
+ def check_links(links, options = {})
51
+ raise ArgumentError unless links.is_a?(Array)
51
52
 
52
- options[:type] = :links
53
- HTMLProofer::Runner.new(links, options)
53
+ options[:type] = :links
54
+ HTMLProofer::Runner.new(links, options)
55
+ end
54
56
  end
55
57
  end
56
58
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html-proofer
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.3.2
4
+ version: 4.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Garen Torikian
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-08-03 00:00:00.000000000 Z
11
+ date: 2022-09-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: addressable