html-proofer 4.4.0 → 4.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 98bde6dd5e32f42e5983fd1aaa7bf02d3359b40f2b4a8b5360a5826abf2cc674
4
- data.tar.gz: 9b7d57fd18e625ab7cc91c46faceac510b05ba5290f665fb5dd333b92446b00b
3
+ metadata.gz: e54554dcf4b21e6724cff760438b0a116c2871922f7ae0373b9d234d8202b2af
4
+ data.tar.gz: 4ab71e2407af3f65381bd48fec4ef9bba19a9849265884adfa7d6a7d7dd797ac
5
5
  SHA512:
6
- metadata.gz: a666be806bbb70028488b3ef89f1325d7b5faaec9f48e105220db2d2f7dd022c12dabac828949ffd2100bdc2b4e3bd4c1f9ce9df48bab17b9106d9453eb49f01
7
- data.tar.gz: 8a58bb5b120ab3cf3eb0e692f3f52f4385bd5923d3a7c91e95dddd17d8b0e25d20dd207872099665ac37737afb6b4ed86d04d39bc7793c6af2abcd35390d2051
6
+ metadata.gz: 228448293b92c6e5e5938201278f963fc2573c28869721d90671f31e84747cde91c6cfc5059da3b8404c4de874e67c319391b270de24e1df5afcf20062bda698
7
+ data.tar.gz: da4e98326f717141c3c433cd9afd651987a8a091f5a07cd2cf95e0b5034e0a33214bf63cc1f85b234999b2eb6047afb926437ca4ab56d0eb8f81e5eaaa4aab2f
@@ -49,6 +49,8 @@ module HTMLProofer
49
49
 
50
50
  class << self
51
51
  def generate_defaults(opts)
52
+ validate_options(default_options, opts)
53
+
52
54
  options = PROOFER_DEFAULTS.merge(opts)
53
55
 
54
56
  options[:typhoeus] = HTMLProofer::Configuration::TYPHOEUS_DEFAULTS.merge(opts[:typhoeus] || {})
@@ -86,6 +88,24 @@ module HTMLProofer
86
88
  raise ArgumentError, "Option '#{option_name} did not contain valid JSON."
87
89
  end
88
90
  end
91
+
92
+ private
93
+
94
+ def default_options
95
+ PROOFER_DEFAULTS.merge(typhoeus: TYPHOEUS_DEFAULTS).merge(hydra: HYDRA_DEFAULTS).merge(parallel: PARALLEL_DEFAULTS)
96
+ end
97
+
98
+ def validate_options(defaults, options)
99
+ defaults.each do |key, default_value|
100
+ next unless options.key?(key)
101
+
102
+ value = options[key]
103
+ raise TypeError, "Invalid value for '#{key}': '#{value}'. Expected #{default_value.class}." unless value.is_a?(default_value.class)
104
+
105
+ # Iterate over nested hashes
106
+ validate_options(default_value, value) if default_value.is_a?(Hash)
107
+ end
108
+ end
89
109
  end
90
110
  end
91
111
  end
@@ -22,22 +22,39 @@ module HTMLProofer
22
22
  end
23
23
 
24
24
  def run_internal_link_checker(links)
25
+ # collect urls and metadata for hashes to be checked in the same target file
26
+ file_paths_hashes_to_check = {}
25
27
  to_add = []
26
- links.each_pair do |link, matched_files|
28
+ links.each_with_index do |(link, matched_files), i|
29
+ matched_count_to_log = pluralize(matched_files.count, "reference", "references")
30
+ @logger.log(:debug, "(#{i + 1} / #{links.count}) Internal link #{link}: Checking #{matched_count_to_log}")
27
31
  matched_files.each do |metadata|
28
32
  url = HTMLProofer::Attribute::Url.new(@runner, link, base_url: metadata[:base_url])
29
33
 
30
34
  @runner.current_source = metadata[:source]
31
35
  @runner.current_filename = metadata[:filename]
32
36
 
33
- unless file_exists?(url)
37
+ target_file_path = url.absolute_path
38
+ unless file_exists?(target_file_path)
34
39
  @failed_checks << Failure.new(@runner.current_filename, "Links > Internal",
35
40
  "internally linking to #{url}, which does not exist", line: metadata[:line], status: nil, content: nil)
36
41
  to_add << [url, metadata, false]
37
42
  next
38
43
  end
39
44
 
40
- unless hash_exists?(url)
45
+ hash_exists = hash_exists_for_url?(url)
46
+ if hash_exists.nil?
47
+ # the hash needs to be checked in the target file, we collect the url and metadata
48
+ unless file_paths_hashes_to_check.key?(target_file_path)
49
+ file_paths_hashes_to_check[target_file_path] = {}
50
+ end
51
+ unless file_paths_hashes_to_check[target_file_path].key?(url.hash)
52
+ file_paths_hashes_to_check[target_file_path][url.hash] = []
53
+ end
54
+ file_paths_hashes_to_check[target_file_path][url.hash] << [url, metadata]
55
+ next
56
+ end
57
+ unless hash_exists
41
58
  @failed_checks << Failure.new(@runner.current_filename, "Links > Internal",
42
59
  "internally linking to #{url}; the file exists, but the hash '#{url.hash}' does not", line: metadata[:line], status: nil, content: nil)
43
60
  to_add << [url, metadata, false]
@@ -48,6 +65,24 @@ module HTMLProofer
48
65
  end
49
66
  end
50
67
 
68
+ # check hashes by target file
69
+ @logger.log(:info, "Checking internal link hashes in #{pluralize(file_paths_hashes_to_check.count, "file", "files")}")
70
+ file_paths_hashes_to_check.each_with_index do |(file_path, hashes_to_check), i|
71
+ hash_count_to_log = pluralize(hashes_to_check.count, "hash", "hashes")
72
+ @logger.log(:debug, "(#{i + 1} / #{file_paths_hashes_to_check.count}) Checking #{hash_count_to_log} in #{file_path}")
73
+ html = create_nokogiri(file_path)
74
+ hashes_to_check.each_pair do |href_hash, url_metadata|
75
+ exists = hash_exists_in_html?(href_hash, html)
76
+ url_metadata.each do |(url, metadata)|
77
+ unless exists
78
+ @failed_checks << Failure.new(metadata[:filename], "Links > Internal",
79
+ "internally linking to #{url}; the file exists, but the hash '#{href_hash}' does not", line: metadata[:line], status: nil, content: nil)
80
+ end
81
+ to_add << [url, metadata, exists]
82
+ end
83
+ end
84
+ end
85
+
51
86
  # adding directly to the cache above results in an endless loop
52
87
  to_add.each do |(url, metadata, exists)|
53
88
  @cache.add_internal(url.to_s, metadata, exists)
@@ -56,15 +91,15 @@ module HTMLProofer
56
91
  @failed_checks
57
92
  end
58
93
 
59
- private def file_exists?(url)
60
- absolute_path = url.absolute_path
61
- return @runner.checked_paths[url.absolute_path] if @runner.checked_paths.key?(absolute_path)
94
+ private def file_exists?(absolute_path)
95
+ return @runner.checked_paths[absolute_path] if @runner.checked_paths.key?(absolute_path)
62
96
 
63
- @runner.checked_paths[url.absolute_path] = File.exist?(absolute_path)
97
+ @runner.checked_paths[absolute_path] = File.exist?(absolute_path)
64
98
  end
65
99
 
66
- # verify the target hash
67
- private def hash_exists?(url)
100
+ # verify the hash w/o just based on the URL, w/o looking at the target file
101
+ # => returns nil if the has could not be verified
102
+ private def hash_exists_for_url?(url)
68
103
  href_hash = url.hash
69
104
  return true if blank?(href_hash)
70
105
  return true unless @runner.options[:check_internal_hash]
@@ -76,10 +111,18 @@ module HTMLProofer
76
111
  decoded_href_hash = Addressable::URI.unescape(href_hash)
77
112
  fragment_ids = [href_hash, decoded_href_hash]
78
113
  # https://www.w3.org/TR/html5/single-page.html#scroll-to-fragid
79
- fragment_ids.include?("top") || !find_fragments(fragment_ids, url).empty?
114
+ return true if fragment_ids.include?("top")
115
+
116
+ nil
117
+ end
118
+
119
+ private def hash_exists_in_html?(href_hash, html)
120
+ decoded_href_hash = Addressable::URI.unescape(href_hash)
121
+ fragment_ids = [href_hash, decoded_href_hash]
122
+ !find_fragments(fragment_ids, html).empty?
80
123
  end
81
124
 
82
- private def find_fragments(fragment_ids, url)
125
+ private def find_fragments(fragment_ids, html)
83
126
  xpaths = fragment_ids.uniq.flat_map do |frag_id|
84
127
  escaped_frag_id = "'#{frag_id.split("'").join("', \"'\", '")}', ''"
85
128
  [
@@ -89,7 +132,6 @@ module HTMLProofer
89
132
  end
90
133
  xpaths << XpathFunctions.new
91
134
 
92
- html = create_nokogiri(url.absolute_path)
93
135
  html.xpath(*xpaths)
94
136
  end
95
137
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HTMLProofer
4
- VERSION = "4.4.0"
4
+ VERSION = "4.4.2"
5
5
  end
data/lib/html_proofer.rb CHANGED
@@ -4,7 +4,7 @@ require "zeitwerk"
4
4
  lib_dir = File.join(File.dirname(__dir__), "lib")
5
5
  gem_loader = Zeitwerk::Loader.for_gem
6
6
  gem_loader.inflector.inflect(
7
- "html_proofer" => "HTMLProofer"
7
+ "html_proofer" => "HTMLProofer",
8
8
  )
9
9
  gem_loader.ignore(File.join(lib_dir, "html-proofer.rb"))
10
10
  gem_loader.setup
@@ -25,7 +25,7 @@ module HTMLProofer
25
25
  raise ArgumentError unless file.is_a?(String)
26
26
  raise ArgumentError, "#{file} does not exist" unless File.exist?(file)
27
27
 
28
- options[:type] = :file
28
+ options = prepare_options(options, :file)
29
29
  HTMLProofer::Runner.new(file, options)
30
30
  end
31
31
 
@@ -33,14 +33,14 @@ module HTMLProofer
33
33
  raise ArgumentError unless directory.is_a?(String)
34
34
  raise ArgumentError, "#{directory} does not exist" unless Dir.exist?(directory)
35
35
 
36
- options[:type] = :directory
36
+ options = prepare_options(options, :directory)
37
37
  HTMLProofer::Runner.new([directory], options)
38
38
  end
39
39
 
40
40
  def check_directories(directories, options = {})
41
41
  raise ArgumentError unless directories.is_a?(Array)
42
42
 
43
- options[:type] = :directory
43
+ options = prepare_options(options, :directory)
44
44
  directories.each do |directory|
45
45
  raise ArgumentError, "#{directory} does not exist" unless Dir.exist?(directory)
46
46
  end
@@ -50,9 +50,20 @@ module HTMLProofer
50
50
  def check_links(links, options = {})
51
51
  raise ArgumentError unless links.is_a?(Array)
52
52
 
53
- options[:type] = :links
53
+ options = prepare_options(options, :links)
54
54
  HTMLProofer::Runner.new(links, options)
55
55
  end
56
+
57
+ private
58
+
59
+ def prepare_options(options, type)
60
+ options = {} if options.nil?
61
+
62
+ raise ArgumentError, "Options must be a Hash" unless options.is_a?(Hash)
63
+
64
+ options[:type] = type
65
+ options
66
+ end
56
67
  end
57
68
  end
58
69
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html-proofer
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.4.0
4
+ version: 4.4.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Garen Torikian
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-08-13 00:00:00.000000000 Z
11
+ date: 2022-10-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: addressable