html-proofer 4.4.0 → 4.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/html_proofer/configuration.rb +20 -0
- data/lib/html_proofer/url_validator/internal.rb +54 -12
- data/lib/html_proofer/version.rb +1 -1
- data/lib/html_proofer.rb +16 -5
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e54554dcf4b21e6724cff760438b0a116c2871922f7ae0373b9d234d8202b2af
|
4
|
+
data.tar.gz: 4ab71e2407af3f65381bd48fec4ef9bba19a9849265884adfa7d6a7d7dd797ac
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 228448293b92c6e5e5938201278f963fc2573c28869721d90671f31e84747cde91c6cfc5059da3b8404c4de874e67c319391b270de24e1df5afcf20062bda698
|
7
|
+
data.tar.gz: da4e98326f717141c3c433cd9afd651987a8a091f5a07cd2cf95e0b5034e0a33214bf63cc1f85b234999b2eb6047afb926437ca4ab56d0eb8f81e5eaaa4aab2f
|
@@ -49,6 +49,8 @@ module HTMLProofer
|
|
49
49
|
|
50
50
|
class << self
|
51
51
|
def generate_defaults(opts)
|
52
|
+
validate_options(default_options, opts)
|
53
|
+
|
52
54
|
options = PROOFER_DEFAULTS.merge(opts)
|
53
55
|
|
54
56
|
options[:typhoeus] = HTMLProofer::Configuration::TYPHOEUS_DEFAULTS.merge(opts[:typhoeus] || {})
|
@@ -86,6 +88,24 @@ module HTMLProofer
|
|
86
88
|
raise ArgumentError, "Option '#{option_name} did not contain valid JSON."
|
87
89
|
end
|
88
90
|
end
|
91
|
+
|
92
|
+
private
|
93
|
+
|
94
|
+
def default_options
|
95
|
+
PROOFER_DEFAULTS.merge(typhoeus: TYPHOEUS_DEFAULTS).merge(hydra: HYDRA_DEFAULTS).merge(parallel: PARALLEL_DEFAULTS)
|
96
|
+
end
|
97
|
+
|
98
|
+
def validate_options(defaults, options)
|
99
|
+
defaults.each do |key, default_value|
|
100
|
+
next unless options.key?(key)
|
101
|
+
|
102
|
+
value = options[key]
|
103
|
+
raise TypeError, "Invalid value for '#{key}': '#{value}'. Expected #{default_value.class}." unless value.is_a?(default_value.class)
|
104
|
+
|
105
|
+
# Iterate over nested hashes
|
106
|
+
validate_options(default_value, value) if default_value.is_a?(Hash)
|
107
|
+
end
|
108
|
+
end
|
89
109
|
end
|
90
110
|
end
|
91
111
|
end
|
@@ -22,22 +22,39 @@ module HTMLProofer
|
|
22
22
|
end
|
23
23
|
|
24
24
|
def run_internal_link_checker(links)
|
25
|
+
# collect urls and metadata for hashes to be checked in the same target file
|
26
|
+
file_paths_hashes_to_check = {}
|
25
27
|
to_add = []
|
26
|
-
links.
|
28
|
+
links.each_with_index do |(link, matched_files), i|
|
29
|
+
matched_count_to_log = pluralize(matched_files.count, "reference", "references")
|
30
|
+
@logger.log(:debug, "(#{i + 1} / #{links.count}) Internal link #{link}: Checking #{matched_count_to_log}")
|
27
31
|
matched_files.each do |metadata|
|
28
32
|
url = HTMLProofer::Attribute::Url.new(@runner, link, base_url: metadata[:base_url])
|
29
33
|
|
30
34
|
@runner.current_source = metadata[:source]
|
31
35
|
@runner.current_filename = metadata[:filename]
|
32
36
|
|
33
|
-
|
37
|
+
target_file_path = url.absolute_path
|
38
|
+
unless file_exists?(target_file_path)
|
34
39
|
@failed_checks << Failure.new(@runner.current_filename, "Links > Internal",
|
35
40
|
"internally linking to #{url}, which does not exist", line: metadata[:line], status: nil, content: nil)
|
36
41
|
to_add << [url, metadata, false]
|
37
42
|
next
|
38
43
|
end
|
39
44
|
|
40
|
-
|
45
|
+
hash_exists = hash_exists_for_url?(url)
|
46
|
+
if hash_exists.nil?
|
47
|
+
# the hash needs to be checked in the target file, we collect the url and metadata
|
48
|
+
unless file_paths_hashes_to_check.key?(target_file_path)
|
49
|
+
file_paths_hashes_to_check[target_file_path] = {}
|
50
|
+
end
|
51
|
+
unless file_paths_hashes_to_check[target_file_path].key?(url.hash)
|
52
|
+
file_paths_hashes_to_check[target_file_path][url.hash] = []
|
53
|
+
end
|
54
|
+
file_paths_hashes_to_check[target_file_path][url.hash] << [url, metadata]
|
55
|
+
next
|
56
|
+
end
|
57
|
+
unless hash_exists
|
41
58
|
@failed_checks << Failure.new(@runner.current_filename, "Links > Internal",
|
42
59
|
"internally linking to #{url}; the file exists, but the hash '#{url.hash}' does not", line: metadata[:line], status: nil, content: nil)
|
43
60
|
to_add << [url, metadata, false]
|
@@ -48,6 +65,24 @@ module HTMLProofer
|
|
48
65
|
end
|
49
66
|
end
|
50
67
|
|
68
|
+
# check hashes by target file
|
69
|
+
@logger.log(:info, "Checking internal link hashes in #{pluralize(file_paths_hashes_to_check.count, "file", "files")}")
|
70
|
+
file_paths_hashes_to_check.each_with_index do |(file_path, hashes_to_check), i|
|
71
|
+
hash_count_to_log = pluralize(hashes_to_check.count, "hash", "hashes")
|
72
|
+
@logger.log(:debug, "(#{i + 1} / #{file_paths_hashes_to_check.count}) Checking #{hash_count_to_log} in #{file_path}")
|
73
|
+
html = create_nokogiri(file_path)
|
74
|
+
hashes_to_check.each_pair do |href_hash, url_metadata|
|
75
|
+
exists = hash_exists_in_html?(href_hash, html)
|
76
|
+
url_metadata.each do |(url, metadata)|
|
77
|
+
unless exists
|
78
|
+
@failed_checks << Failure.new(metadata[:filename], "Links > Internal",
|
79
|
+
"internally linking to #{url}; the file exists, but the hash '#{href_hash}' does not", line: metadata[:line], status: nil, content: nil)
|
80
|
+
end
|
81
|
+
to_add << [url, metadata, exists]
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
51
86
|
# adding directly to the cache above results in an endless loop
|
52
87
|
to_add.each do |(url, metadata, exists)|
|
53
88
|
@cache.add_internal(url.to_s, metadata, exists)
|
@@ -56,15 +91,15 @@ module HTMLProofer
|
|
56
91
|
@failed_checks
|
57
92
|
end
|
58
93
|
|
59
|
-
private def file_exists?(
|
60
|
-
absolute_path
|
61
|
-
return @runner.checked_paths[url.absolute_path] if @runner.checked_paths.key?(absolute_path)
|
94
|
+
private def file_exists?(absolute_path)
|
95
|
+
return @runner.checked_paths[absolute_path] if @runner.checked_paths.key?(absolute_path)
|
62
96
|
|
63
|
-
@runner.checked_paths[
|
97
|
+
@runner.checked_paths[absolute_path] = File.exist?(absolute_path)
|
64
98
|
end
|
65
99
|
|
66
|
-
# verify the target
|
67
|
-
|
100
|
+
# verify the hash w/o just based on the URL, w/o looking at the target file
|
101
|
+
# => returns nil if the has could not be verified
|
102
|
+
private def hash_exists_for_url?(url)
|
68
103
|
href_hash = url.hash
|
69
104
|
return true if blank?(href_hash)
|
70
105
|
return true unless @runner.options[:check_internal_hash]
|
@@ -76,10 +111,18 @@ module HTMLProofer
|
|
76
111
|
decoded_href_hash = Addressable::URI.unescape(href_hash)
|
77
112
|
fragment_ids = [href_hash, decoded_href_hash]
|
78
113
|
# https://www.w3.org/TR/html5/single-page.html#scroll-to-fragid
|
79
|
-
fragment_ids.include?("top")
|
114
|
+
return true if fragment_ids.include?("top")
|
115
|
+
|
116
|
+
nil
|
117
|
+
end
|
118
|
+
|
119
|
+
private def hash_exists_in_html?(href_hash, html)
|
120
|
+
decoded_href_hash = Addressable::URI.unescape(href_hash)
|
121
|
+
fragment_ids = [href_hash, decoded_href_hash]
|
122
|
+
!find_fragments(fragment_ids, html).empty?
|
80
123
|
end
|
81
124
|
|
82
|
-
private def find_fragments(fragment_ids,
|
125
|
+
private def find_fragments(fragment_ids, html)
|
83
126
|
xpaths = fragment_ids.uniq.flat_map do |frag_id|
|
84
127
|
escaped_frag_id = "'#{frag_id.split("'").join("', \"'\", '")}', ''"
|
85
128
|
[
|
@@ -89,7 +132,6 @@ module HTMLProofer
|
|
89
132
|
end
|
90
133
|
xpaths << XpathFunctions.new
|
91
134
|
|
92
|
-
html = create_nokogiri(url.absolute_path)
|
93
135
|
html.xpath(*xpaths)
|
94
136
|
end
|
95
137
|
end
|
data/lib/html_proofer/version.rb
CHANGED
data/lib/html_proofer.rb
CHANGED
@@ -4,7 +4,7 @@ require "zeitwerk"
|
|
4
4
|
lib_dir = File.join(File.dirname(__dir__), "lib")
|
5
5
|
gem_loader = Zeitwerk::Loader.for_gem
|
6
6
|
gem_loader.inflector.inflect(
|
7
|
-
"html_proofer" => "HTMLProofer"
|
7
|
+
"html_proofer" => "HTMLProofer",
|
8
8
|
)
|
9
9
|
gem_loader.ignore(File.join(lib_dir, "html-proofer.rb"))
|
10
10
|
gem_loader.setup
|
@@ -25,7 +25,7 @@ module HTMLProofer
|
|
25
25
|
raise ArgumentError unless file.is_a?(String)
|
26
26
|
raise ArgumentError, "#{file} does not exist" unless File.exist?(file)
|
27
27
|
|
28
|
-
options
|
28
|
+
options = prepare_options(options, :file)
|
29
29
|
HTMLProofer::Runner.new(file, options)
|
30
30
|
end
|
31
31
|
|
@@ -33,14 +33,14 @@ module HTMLProofer
|
|
33
33
|
raise ArgumentError unless directory.is_a?(String)
|
34
34
|
raise ArgumentError, "#{directory} does not exist" unless Dir.exist?(directory)
|
35
35
|
|
36
|
-
options
|
36
|
+
options = prepare_options(options, :directory)
|
37
37
|
HTMLProofer::Runner.new([directory], options)
|
38
38
|
end
|
39
39
|
|
40
40
|
def check_directories(directories, options = {})
|
41
41
|
raise ArgumentError unless directories.is_a?(Array)
|
42
42
|
|
43
|
-
options
|
43
|
+
options = prepare_options(options, :directory)
|
44
44
|
directories.each do |directory|
|
45
45
|
raise ArgumentError, "#{directory} does not exist" unless Dir.exist?(directory)
|
46
46
|
end
|
@@ -50,9 +50,20 @@ module HTMLProofer
|
|
50
50
|
def check_links(links, options = {})
|
51
51
|
raise ArgumentError unless links.is_a?(Array)
|
52
52
|
|
53
|
-
options
|
53
|
+
options = prepare_options(options, :links)
|
54
54
|
HTMLProofer::Runner.new(links, options)
|
55
55
|
end
|
56
|
+
|
57
|
+
private
|
58
|
+
|
59
|
+
def prepare_options(options, type)
|
60
|
+
options = {} if options.nil?
|
61
|
+
|
62
|
+
raise ArgumentError, "Options must be a Hash" unless options.is_a?(Hash)
|
63
|
+
|
64
|
+
options[:type] = type
|
65
|
+
options
|
66
|
+
end
|
56
67
|
end
|
57
68
|
end
|
58
69
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html-proofer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.4.
|
4
|
+
version: 4.4.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Garen Torikian
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-10-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|