html-proofer 4.3.2 → 4.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/html_proofer/attribute/url.rb +4 -3
- data/lib/html_proofer/check/favicon.rb +4 -1
- data/lib/html_proofer/check/images.rb +7 -1
- data/lib/html_proofer/check/links.rb +6 -0
- data/lib/html_proofer/check/open_graph.rb +3 -0
- data/lib/html_proofer/check/scripts.rb +4 -1
- data/lib/html_proofer/check.rb +20 -18
- data/lib/html_proofer/configuration.rb +27 -25
- data/lib/html_proofer/url_validator/internal.rb +54 -12
- data/lib/html_proofer/version.rb +1 -1
- data/lib/html_proofer.rb +26 -24
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4043366fa1c0d7aaa58fc0b577d25d2d0fba7e72afca2b43ae688e90c55dddef
|
4
|
+
data.tar.gz: a93621d2169757d3665e5950e8c99d23a1b64cc9014fbc806a986fcdcb906e26
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1636df520fa7d4035ca482eeafd0b5e059b0fc77a0219f340e7c47b8f2105c8dd107bf03e0d78ca582f5d7abef9d5853edae498a2133b75eab6f92b7e77b636f
|
7
|
+
data.tar.gz: 5310df2250a5db3c8864b12ae01e49a0a2d043fc15f5bece45615cbad1d2b9100c899fa35902db5536b2be85908f9208ebfe65e31318cf6a6db47e945a745f17
|
@@ -20,12 +20,13 @@ module HTMLProofer
|
|
20
20
|
|
21
21
|
swap_urls!
|
22
22
|
clean_url!
|
23
|
-
|
24
|
-
# convert "//" links to "https://"
|
25
|
-
@url.start_with?("//") ? @url = "https:#{@url}" : @url
|
26
23
|
end
|
27
24
|
end
|
28
25
|
|
26
|
+
def protocol_relative?
|
27
|
+
url.start_with?("//")
|
28
|
+
end
|
29
|
+
|
29
30
|
def to_s
|
30
31
|
@url
|
31
32
|
end
|
@@ -16,7 +16,10 @@ module HTMLProofer
|
|
16
16
|
return if immediate_redirect?
|
17
17
|
|
18
18
|
if found
|
19
|
-
if @favicon.url.
|
19
|
+
if @favicon.url.protocol_relative?
|
20
|
+
add_failure("favicon link #{@favicon.url} is a protocol-relative URL, use explicit https:// instead",
|
21
|
+
line: @favicon.line, content: @favicon.content)
|
22
|
+
elsif @favicon.url.remote?
|
20
23
|
add_to_external_urls(@favicon.url, @favicon.line)
|
21
24
|
elsif !@favicon.url.exists?
|
22
25
|
add_failure("internal favicon #{@favicon.url.raw_attribute} does not exist", line: @favicon.line,
|
@@ -18,6 +18,9 @@ module HTMLProofer
|
|
18
18
|
# does the image exist?
|
19
19
|
if missing_src?
|
20
20
|
add_failure("image has no src or srcset attribute", line: @img.line, content: @img.content)
|
21
|
+
elsif @img.url.protocol_relative?
|
22
|
+
add_failure("image link #{@img.url} is a protocol-relative URL, use explicit https:// instead",
|
23
|
+
line: @img.line, content: @img.content)
|
21
24
|
elsif @img.url.remote?
|
22
25
|
add_to_external_urls(@img.url, @img.line)
|
23
26
|
elsif !@img.url.exists? && !@img.multiple_srcsets? && !@img.multiple_sizes?
|
@@ -27,7 +30,10 @@ module HTMLProofer
|
|
27
30
|
@img.srcsets_wo_sizes.each do |srcset|
|
28
31
|
srcset_url = HTMLProofer::Attribute::Url.new(@runner, srcset, base_url: @img.base_url, extract_size: true)
|
29
32
|
|
30
|
-
if srcset_url.
|
33
|
+
if srcset_url.protocol_relative?
|
34
|
+
add_failure("image link #{srcset_url.url} is a protocol-relative URL, use explicit https:// instead",
|
35
|
+
line: @img.line, content: @img.content)
|
36
|
+
elsif srcset_url.remote?
|
31
37
|
add_to_external_urls(srcset_url.url, @img.line)
|
32
38
|
elsif !srcset_url.exists?
|
33
39
|
add_failure("internal image #{srcset} does not exist", line: @img.line, content: @img.content)
|
@@ -28,6 +28,12 @@ module HTMLProofer
|
|
28
28
|
next
|
29
29
|
end
|
30
30
|
|
31
|
+
if @link.url.protocol_relative?
|
32
|
+
add_failure("#{@link.url} is a protocol-relative URL, use explicit https:// instead",
|
33
|
+
line: @link.line, content: @link.content)
|
34
|
+
next
|
35
|
+
end
|
36
|
+
|
31
37
|
check_schemes
|
32
38
|
|
33
39
|
# intentionally down here because we still want valid? & missing_href? to execute
|
@@ -16,6 +16,9 @@ module HTMLProofer
|
|
16
16
|
add_failure("open graph content attribute is empty", line: @open_graph.line, content: @open_graph.content)
|
17
17
|
elsif !@open_graph.url.valid?
|
18
18
|
add_failure("#{@open_graph.src} is an invalid URL", line: @open_graph.line)
|
19
|
+
elsif @open_graph.url.protocol_relative?
|
20
|
+
add_failure("open graph link #{@open_graph.url} is a protocol-relative URL, use explicit https:// instead",
|
21
|
+
line: @open_graph.line, content: @open_graph.content)
|
19
22
|
elsif @open_graph.url.remote?
|
20
23
|
add_to_external_urls(@open_graph.url, @open_graph.line)
|
21
24
|
else
|
@@ -13,8 +13,11 @@ module HTMLProofer
|
|
13
13
|
# does the script exist?
|
14
14
|
if missing_src?
|
15
15
|
add_failure("script is empty and has no src attribute", line: @script.line, content: @script.content)
|
16
|
+
elsif @script.url.protocol_relative?
|
17
|
+
add_failure("script link #{@script.url} is a protocol-relative URL, use explicit https:// instead",
|
18
|
+
line: @script.line, content: @script.content)
|
16
19
|
elsif @script.url.remote?
|
17
|
-
add_to_external_urls(@script.
|
20
|
+
add_to_external_urls(@script.url, @script.line)
|
18
21
|
check_sri if @runner.check_sri?
|
19
22
|
elsif !@script.url.exists?
|
20
23
|
add_failure("internal script reference #{@script.src} does not exist", line: @script.line,
|
data/lib/html_proofer/check.rb
CHANGED
@@ -29,28 +29,10 @@ module HTMLProofer
|
|
29
29
|
content: content)
|
30
30
|
end
|
31
31
|
|
32
|
-
def self.subchecks(runner_options)
|
33
|
-
# grab all known checks
|
34
|
-
checks = ObjectSpace.each_object(Class).select do |klass|
|
35
|
-
klass < self
|
36
|
-
end
|
37
|
-
|
38
|
-
# remove any checks not explicitly included
|
39
|
-
checks.each_with_object([]) do |check, arr|
|
40
|
-
next unless runner_options[:checks].include?(check.short_name)
|
41
|
-
|
42
|
-
arr << check
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
32
|
def short_name
|
47
33
|
self.class.name.split("::").last
|
48
34
|
end
|
49
35
|
|
50
|
-
def self.short_name
|
51
|
-
name.split("::").last
|
52
|
-
end
|
53
|
-
|
54
36
|
def add_to_internal_urls(url, line)
|
55
37
|
url_string = url.raw_attribute
|
56
38
|
|
@@ -74,6 +56,26 @@ module HTMLProofer
|
|
74
56
|
@external_urls[url_string] << { filename: @runner.current_filename, line: line }
|
75
57
|
end
|
76
58
|
|
59
|
+
class << self
|
60
|
+
def subchecks(runner_options)
|
61
|
+
# grab all known checks
|
62
|
+
checks = ObjectSpace.each_object(Class).select do |klass|
|
63
|
+
klass < self
|
64
|
+
end
|
65
|
+
|
66
|
+
# remove any checks not explicitly included
|
67
|
+
checks.each_with_object([]) do |check, arr|
|
68
|
+
next unless runner_options[:checks].include?(check.short_name)
|
69
|
+
|
70
|
+
arr << check
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def short_name
|
75
|
+
name.split("::").last
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
77
79
|
private def base_url
|
78
80
|
return @base_url if defined?(@base_url)
|
79
81
|
|
@@ -47,42 +47,44 @@ module HTMLProofer
|
|
47
47
|
|
48
48
|
CACHE_DEFAULTS = {}.freeze
|
49
49
|
|
50
|
-
|
51
|
-
|
50
|
+
class << self
|
51
|
+
def generate_defaults(opts)
|
52
|
+
options = PROOFER_DEFAULTS.merge(opts)
|
52
53
|
|
53
|
-
|
54
|
-
|
54
|
+
options[:typhoeus] = HTMLProofer::Configuration::TYPHOEUS_DEFAULTS.merge(opts[:typhoeus] || {})
|
55
|
+
options[:hydra] = HTMLProofer::Configuration::HYDRA_DEFAULTS.merge(opts[:hydra] || {})
|
55
56
|
|
56
|
-
|
57
|
-
|
57
|
+
options[:parallel] = HTMLProofer::Configuration::PARALLEL_DEFAULTS.merge(opts[:parallel] || {})
|
58
|
+
options[:cache] = HTMLProofer::Configuration::CACHE_DEFAULTS.merge(opts[:cache] || {})
|
58
59
|
|
59
|
-
|
60
|
+
options.delete(:src)
|
60
61
|
|
61
|
-
|
62
|
-
|
62
|
+
options
|
63
|
+
end
|
63
64
|
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
65
|
+
def to_regex?(item)
|
66
|
+
if item.start_with?("/") && item.end_with?("/")
|
67
|
+
Regexp.new(item[1...-1])
|
68
|
+
else
|
69
|
+
item
|
70
|
+
end
|
69
71
|
end
|
70
|
-
end
|
71
72
|
|
72
|
-
|
73
|
-
|
74
|
-
|
73
|
+
def parse_json_option(option_name, config, symbolize_names: true)
|
74
|
+
raise ArgumentError, "Must provide an option name in string format." unless option_name.is_a?(String)
|
75
|
+
raise ArgumentError, "Must provide an option name in string format." if option_name.strip.empty?
|
75
76
|
|
76
|
-
|
77
|
+
return {} if config.nil?
|
77
78
|
|
78
|
-
|
79
|
+
raise ArgumentError, "Must provide a JSON configuration in string format." unless config.is_a?(String)
|
79
80
|
|
80
|
-
|
81
|
+
return {} if config.strip.empty?
|
81
82
|
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
83
|
+
begin
|
84
|
+
JSON.parse(config, { symbolize_names: symbolize_names })
|
85
|
+
rescue StandardError
|
86
|
+
raise ArgumentError, "Option '#{option_name} did not contain valid JSON."
|
87
|
+
end
|
86
88
|
end
|
87
89
|
end
|
88
90
|
end
|
@@ -22,22 +22,39 @@ module HTMLProofer
|
|
22
22
|
end
|
23
23
|
|
24
24
|
def run_internal_link_checker(links)
|
25
|
+
# collect urls and metadata for hashes to be checked in the same target file
|
26
|
+
file_paths_hashes_to_check = {}
|
25
27
|
to_add = []
|
26
|
-
links.
|
28
|
+
links.each_with_index do |(link, matched_files), i|
|
29
|
+
matched_count_to_log = pluralize(matched_files.count, "reference", "references")
|
30
|
+
@logger.log(:debug, "(#{i + 1} / #{links.count}) Internal link #{link}: Checking #{matched_count_to_log}")
|
27
31
|
matched_files.each do |metadata|
|
28
32
|
url = HTMLProofer::Attribute::Url.new(@runner, link, base_url: metadata[:base_url])
|
29
33
|
|
30
34
|
@runner.current_source = metadata[:source]
|
31
35
|
@runner.current_filename = metadata[:filename]
|
32
36
|
|
33
|
-
|
37
|
+
target_file_path = url.absolute_path
|
38
|
+
unless file_exists?(target_file_path)
|
34
39
|
@failed_checks << Failure.new(@runner.current_filename, "Links > Internal",
|
35
40
|
"internally linking to #{url}, which does not exist", line: metadata[:line], status: nil, content: nil)
|
36
41
|
to_add << [url, metadata, false]
|
37
42
|
next
|
38
43
|
end
|
39
44
|
|
40
|
-
|
45
|
+
hash_exists = hash_exists_for_url?(url)
|
46
|
+
if hash_exists.nil?
|
47
|
+
# the hash needs to be checked in the target file, we collect the url and metadata
|
48
|
+
unless file_paths_hashes_to_check.key?(target_file_path)
|
49
|
+
file_paths_hashes_to_check[target_file_path] = {}
|
50
|
+
end
|
51
|
+
unless file_paths_hashes_to_check[target_file_path].key?(url.hash)
|
52
|
+
file_paths_hashes_to_check[target_file_path][url.hash] = []
|
53
|
+
end
|
54
|
+
file_paths_hashes_to_check[target_file_path][url.hash] << [url, metadata]
|
55
|
+
next
|
56
|
+
end
|
57
|
+
unless hash_exists
|
41
58
|
@failed_checks << Failure.new(@runner.current_filename, "Links > Internal",
|
42
59
|
"internally linking to #{url}; the file exists, but the hash '#{url.hash}' does not", line: metadata[:line], status: nil, content: nil)
|
43
60
|
to_add << [url, metadata, false]
|
@@ -48,6 +65,24 @@ module HTMLProofer
|
|
48
65
|
end
|
49
66
|
end
|
50
67
|
|
68
|
+
# check hashes by target file
|
69
|
+
@logger.log(:info, "Checking internal link hashes in #{pluralize(file_paths_hashes_to_check.count, "file", "files")}")
|
70
|
+
file_paths_hashes_to_check.each_with_index do |(file_path, hashes_to_check), i|
|
71
|
+
hash_count_to_log = pluralize(hashes_to_check.count, "hash", "hashes")
|
72
|
+
@logger.log(:debug, "(#{i + 1} / #{file_paths_hashes_to_check.count}) Checking #{hash_count_to_log} in #{file_path}")
|
73
|
+
html = create_nokogiri(file_path)
|
74
|
+
hashes_to_check.each_pair do |href_hash, url_metadata|
|
75
|
+
exists = hash_exists_in_html?(href_hash, html)
|
76
|
+
url_metadata.each do |(url, metadata)|
|
77
|
+
unless exists
|
78
|
+
@failed_checks << Failure.new(metadata[:filename], "Links > Internal",
|
79
|
+
"internally linking to #{url}; the file exists, but the hash '#{href_hash}' does not", line: metadata[:line], status: nil, content: nil)
|
80
|
+
end
|
81
|
+
to_add << [url, metadata, exists]
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
51
86
|
# adding directly to the cache above results in an endless loop
|
52
87
|
to_add.each do |(url, metadata, exists)|
|
53
88
|
@cache.add_internal(url.to_s, metadata, exists)
|
@@ -56,15 +91,15 @@ module HTMLProofer
|
|
56
91
|
@failed_checks
|
57
92
|
end
|
58
93
|
|
59
|
-
private def file_exists?(
|
60
|
-
absolute_path
|
61
|
-
return @runner.checked_paths[url.absolute_path] if @runner.checked_paths.key?(absolute_path)
|
94
|
+
private def file_exists?(absolute_path)
|
95
|
+
return @runner.checked_paths[absolute_path] if @runner.checked_paths.key?(absolute_path)
|
62
96
|
|
63
|
-
@runner.checked_paths[
|
97
|
+
@runner.checked_paths[absolute_path] = File.exist?(absolute_path)
|
64
98
|
end
|
65
99
|
|
66
|
-
# verify the target
|
67
|
-
|
100
|
+
# verify the hash w/o just based on the URL, w/o looking at the target file
|
101
|
+
# => returns nil if the has could not be verified
|
102
|
+
private def hash_exists_for_url?(url)
|
68
103
|
href_hash = url.hash
|
69
104
|
return true if blank?(href_hash)
|
70
105
|
return true unless @runner.options[:check_internal_hash]
|
@@ -76,10 +111,18 @@ module HTMLProofer
|
|
76
111
|
decoded_href_hash = Addressable::URI.unescape(href_hash)
|
77
112
|
fragment_ids = [href_hash, decoded_href_hash]
|
78
113
|
# https://www.w3.org/TR/html5/single-page.html#scroll-to-fragid
|
79
|
-
fragment_ids.include?("top")
|
114
|
+
return true if fragment_ids.include?("top")
|
115
|
+
|
116
|
+
nil
|
117
|
+
end
|
118
|
+
|
119
|
+
private def hash_exists_in_html?(href_hash, html)
|
120
|
+
decoded_href_hash = Addressable::URI.unescape(href_hash)
|
121
|
+
fragment_ids = [href_hash, decoded_href_hash]
|
122
|
+
!find_fragments(fragment_ids, html).empty?
|
80
123
|
end
|
81
124
|
|
82
|
-
private def find_fragments(fragment_ids,
|
125
|
+
private def find_fragments(fragment_ids, html)
|
83
126
|
xpaths = fragment_ids.uniq.flat_map do |frag_id|
|
84
127
|
escaped_frag_id = "'#{frag_id.split("'").join("', \"'\", '")}', ''"
|
85
128
|
[
|
@@ -89,7 +132,6 @@ module HTMLProofer
|
|
89
132
|
end
|
90
133
|
xpaths << XpathFunctions.new
|
91
134
|
|
92
|
-
html = create_nokogiri(url.absolute_path)
|
93
135
|
html.xpath(*xpaths)
|
94
136
|
end
|
95
137
|
end
|
data/lib/html_proofer/version.rb
CHANGED
data/lib/html_proofer.rb
CHANGED
@@ -4,7 +4,7 @@ require "zeitwerk"
|
|
4
4
|
lib_dir = File.join(File.dirname(__dir__), "lib")
|
5
5
|
gem_loader = Zeitwerk::Loader.for_gem
|
6
6
|
gem_loader.inflector.inflect(
|
7
|
-
"html_proofer" => "HTMLProofer"
|
7
|
+
"html_proofer" => "HTMLProofer",
|
8
8
|
)
|
9
9
|
gem_loader.ignore(File.join(lib_dir, "html-proofer.rb"))
|
10
10
|
gem_loader.setup
|
@@ -20,37 +20,39 @@ if ENV.fetch("DEBUG", false)
|
|
20
20
|
end
|
21
21
|
|
22
22
|
module HTMLProofer
|
23
|
-
|
24
|
-
|
25
|
-
|
23
|
+
class << self
|
24
|
+
def check_file(file, options = {})
|
25
|
+
raise ArgumentError unless file.is_a?(String)
|
26
|
+
raise ArgumentError, "#{file} does not exist" unless File.exist?(file)
|
26
27
|
|
27
|
-
|
28
|
-
|
29
|
-
|
28
|
+
options[:type] = :file
|
29
|
+
HTMLProofer::Runner.new(file, options)
|
30
|
+
end
|
30
31
|
|
31
|
-
|
32
|
-
|
33
|
-
|
32
|
+
def check_directory(directory, options = {})
|
33
|
+
raise ArgumentError unless directory.is_a?(String)
|
34
|
+
raise ArgumentError, "#{directory} does not exist" unless Dir.exist?(directory)
|
34
35
|
|
35
|
-
|
36
|
-
|
37
|
-
|
36
|
+
options[:type] = :directory
|
37
|
+
HTMLProofer::Runner.new([directory], options)
|
38
|
+
end
|
38
39
|
|
39
|
-
|
40
|
-
|
40
|
+
def check_directories(directories, options = {})
|
41
|
+
raise ArgumentError unless directories.is_a?(Array)
|
41
42
|
|
42
|
-
|
43
|
-
|
44
|
-
|
43
|
+
options[:type] = :directory
|
44
|
+
directories.each do |directory|
|
45
|
+
raise ArgumentError, "#{directory} does not exist" unless Dir.exist?(directory)
|
46
|
+
end
|
47
|
+
HTMLProofer::Runner.new(directories, options)
|
45
48
|
end
|
46
|
-
HTMLProofer::Runner.new(directories, options)
|
47
|
-
end
|
48
49
|
|
49
|
-
|
50
|
-
|
50
|
+
def check_links(links, options = {})
|
51
|
+
raise ArgumentError unless links.is_a?(Array)
|
51
52
|
|
52
|
-
|
53
|
-
|
53
|
+
options[:type] = :links
|
54
|
+
HTMLProofer::Runner.new(links, options)
|
55
|
+
end
|
54
56
|
end
|
55
57
|
end
|
56
58
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html-proofer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.
|
4
|
+
version: 4.4.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Garen Torikian
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-09-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|