html-proofer 4.3.2 → 4.4.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/html_proofer/attribute/url.rb +4 -3
- data/lib/html_proofer/check/favicon.rb +4 -1
- data/lib/html_proofer/check/images.rb +7 -1
- data/lib/html_proofer/check/links.rb +6 -0
- data/lib/html_proofer/check/open_graph.rb +3 -0
- data/lib/html_proofer/check/scripts.rb +4 -1
- data/lib/html_proofer/check.rb +20 -18
- data/lib/html_proofer/configuration.rb +27 -25
- data/lib/html_proofer/url_validator/internal.rb +54 -12
- data/lib/html_proofer/version.rb +1 -1
- data/lib/html_proofer.rb +26 -24
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4043366fa1c0d7aaa58fc0b577d25d2d0fba7e72afca2b43ae688e90c55dddef
|
4
|
+
data.tar.gz: a93621d2169757d3665e5950e8c99d23a1b64cc9014fbc806a986fcdcb906e26
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1636df520fa7d4035ca482eeafd0b5e059b0fc77a0219f340e7c47b8f2105c8dd107bf03e0d78ca582f5d7abef9d5853edae498a2133b75eab6f92b7e77b636f
|
7
|
+
data.tar.gz: 5310df2250a5db3c8864b12ae01e49a0a2d043fc15f5bece45615cbad1d2b9100c899fa35902db5536b2be85908f9208ebfe65e31318cf6a6db47e945a745f17
|
@@ -20,12 +20,13 @@ module HTMLProofer
|
|
20
20
|
|
21
21
|
swap_urls!
|
22
22
|
clean_url!
|
23
|
-
|
24
|
-
# convert "//" links to "https://"
|
25
|
-
@url.start_with?("//") ? @url = "https:#{@url}" : @url
|
26
23
|
end
|
27
24
|
end
|
28
25
|
|
26
|
+
def protocol_relative?
|
27
|
+
url.start_with?("//")
|
28
|
+
end
|
29
|
+
|
29
30
|
def to_s
|
30
31
|
@url
|
31
32
|
end
|
@@ -16,7 +16,10 @@ module HTMLProofer
|
|
16
16
|
return if immediate_redirect?
|
17
17
|
|
18
18
|
if found
|
19
|
-
if @favicon.url.
|
19
|
+
if @favicon.url.protocol_relative?
|
20
|
+
add_failure("favicon link #{@favicon.url} is a protocol-relative URL, use explicit https:// instead",
|
21
|
+
line: @favicon.line, content: @favicon.content)
|
22
|
+
elsif @favicon.url.remote?
|
20
23
|
add_to_external_urls(@favicon.url, @favicon.line)
|
21
24
|
elsif !@favicon.url.exists?
|
22
25
|
add_failure("internal favicon #{@favicon.url.raw_attribute} does not exist", line: @favicon.line,
|
@@ -18,6 +18,9 @@ module HTMLProofer
|
|
18
18
|
# does the image exist?
|
19
19
|
if missing_src?
|
20
20
|
add_failure("image has no src or srcset attribute", line: @img.line, content: @img.content)
|
21
|
+
elsif @img.url.protocol_relative?
|
22
|
+
add_failure("image link #{@img.url} is a protocol-relative URL, use explicit https:// instead",
|
23
|
+
line: @img.line, content: @img.content)
|
21
24
|
elsif @img.url.remote?
|
22
25
|
add_to_external_urls(@img.url, @img.line)
|
23
26
|
elsif !@img.url.exists? && !@img.multiple_srcsets? && !@img.multiple_sizes?
|
@@ -27,7 +30,10 @@ module HTMLProofer
|
|
27
30
|
@img.srcsets_wo_sizes.each do |srcset|
|
28
31
|
srcset_url = HTMLProofer::Attribute::Url.new(@runner, srcset, base_url: @img.base_url, extract_size: true)
|
29
32
|
|
30
|
-
if srcset_url.
|
33
|
+
if srcset_url.protocol_relative?
|
34
|
+
add_failure("image link #{srcset_url.url} is a protocol-relative URL, use explicit https:// instead",
|
35
|
+
line: @img.line, content: @img.content)
|
36
|
+
elsif srcset_url.remote?
|
31
37
|
add_to_external_urls(srcset_url.url, @img.line)
|
32
38
|
elsif !srcset_url.exists?
|
33
39
|
add_failure("internal image #{srcset} does not exist", line: @img.line, content: @img.content)
|
@@ -28,6 +28,12 @@ module HTMLProofer
|
|
28
28
|
next
|
29
29
|
end
|
30
30
|
|
31
|
+
if @link.url.protocol_relative?
|
32
|
+
add_failure("#{@link.url} is a protocol-relative URL, use explicit https:// instead",
|
33
|
+
line: @link.line, content: @link.content)
|
34
|
+
next
|
35
|
+
end
|
36
|
+
|
31
37
|
check_schemes
|
32
38
|
|
33
39
|
# intentionally down here because we still want valid? & missing_href? to execute
|
@@ -16,6 +16,9 @@ module HTMLProofer
|
|
16
16
|
add_failure("open graph content attribute is empty", line: @open_graph.line, content: @open_graph.content)
|
17
17
|
elsif !@open_graph.url.valid?
|
18
18
|
add_failure("#{@open_graph.src} is an invalid URL", line: @open_graph.line)
|
19
|
+
elsif @open_graph.url.protocol_relative?
|
20
|
+
add_failure("open graph link #{@open_graph.url} is a protocol-relative URL, use explicit https:// instead",
|
21
|
+
line: @open_graph.line, content: @open_graph.content)
|
19
22
|
elsif @open_graph.url.remote?
|
20
23
|
add_to_external_urls(@open_graph.url, @open_graph.line)
|
21
24
|
else
|
@@ -13,8 +13,11 @@ module HTMLProofer
|
|
13
13
|
# does the script exist?
|
14
14
|
if missing_src?
|
15
15
|
add_failure("script is empty and has no src attribute", line: @script.line, content: @script.content)
|
16
|
+
elsif @script.url.protocol_relative?
|
17
|
+
add_failure("script link #{@script.url} is a protocol-relative URL, use explicit https:// instead",
|
18
|
+
line: @script.line, content: @script.content)
|
16
19
|
elsif @script.url.remote?
|
17
|
-
add_to_external_urls(@script.
|
20
|
+
add_to_external_urls(@script.url, @script.line)
|
18
21
|
check_sri if @runner.check_sri?
|
19
22
|
elsif !@script.url.exists?
|
20
23
|
add_failure("internal script reference #{@script.src} does not exist", line: @script.line,
|
data/lib/html_proofer/check.rb
CHANGED
@@ -29,28 +29,10 @@ module HTMLProofer
|
|
29
29
|
content: content)
|
30
30
|
end
|
31
31
|
|
32
|
-
def self.subchecks(runner_options)
|
33
|
-
# grab all known checks
|
34
|
-
checks = ObjectSpace.each_object(Class).select do |klass|
|
35
|
-
klass < self
|
36
|
-
end
|
37
|
-
|
38
|
-
# remove any checks not explicitly included
|
39
|
-
checks.each_with_object([]) do |check, arr|
|
40
|
-
next unless runner_options[:checks].include?(check.short_name)
|
41
|
-
|
42
|
-
arr << check
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
32
|
def short_name
|
47
33
|
self.class.name.split("::").last
|
48
34
|
end
|
49
35
|
|
50
|
-
def self.short_name
|
51
|
-
name.split("::").last
|
52
|
-
end
|
53
|
-
|
54
36
|
def add_to_internal_urls(url, line)
|
55
37
|
url_string = url.raw_attribute
|
56
38
|
|
@@ -74,6 +56,26 @@ module HTMLProofer
|
|
74
56
|
@external_urls[url_string] << { filename: @runner.current_filename, line: line }
|
75
57
|
end
|
76
58
|
|
59
|
+
class << self
|
60
|
+
def subchecks(runner_options)
|
61
|
+
# grab all known checks
|
62
|
+
checks = ObjectSpace.each_object(Class).select do |klass|
|
63
|
+
klass < self
|
64
|
+
end
|
65
|
+
|
66
|
+
# remove any checks not explicitly included
|
67
|
+
checks.each_with_object([]) do |check, arr|
|
68
|
+
next unless runner_options[:checks].include?(check.short_name)
|
69
|
+
|
70
|
+
arr << check
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def short_name
|
75
|
+
name.split("::").last
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
77
79
|
private def base_url
|
78
80
|
return @base_url if defined?(@base_url)
|
79
81
|
|
@@ -47,42 +47,44 @@ module HTMLProofer
|
|
47
47
|
|
48
48
|
CACHE_DEFAULTS = {}.freeze
|
49
49
|
|
50
|
-
|
51
|
-
|
50
|
+
class << self
|
51
|
+
def generate_defaults(opts)
|
52
|
+
options = PROOFER_DEFAULTS.merge(opts)
|
52
53
|
|
53
|
-
|
54
|
-
|
54
|
+
options[:typhoeus] = HTMLProofer::Configuration::TYPHOEUS_DEFAULTS.merge(opts[:typhoeus] || {})
|
55
|
+
options[:hydra] = HTMLProofer::Configuration::HYDRA_DEFAULTS.merge(opts[:hydra] || {})
|
55
56
|
|
56
|
-
|
57
|
-
|
57
|
+
options[:parallel] = HTMLProofer::Configuration::PARALLEL_DEFAULTS.merge(opts[:parallel] || {})
|
58
|
+
options[:cache] = HTMLProofer::Configuration::CACHE_DEFAULTS.merge(opts[:cache] || {})
|
58
59
|
|
59
|
-
|
60
|
+
options.delete(:src)
|
60
61
|
|
61
|
-
|
62
|
-
|
62
|
+
options
|
63
|
+
end
|
63
64
|
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
65
|
+
def to_regex?(item)
|
66
|
+
if item.start_with?("/") && item.end_with?("/")
|
67
|
+
Regexp.new(item[1...-1])
|
68
|
+
else
|
69
|
+
item
|
70
|
+
end
|
69
71
|
end
|
70
|
-
end
|
71
72
|
|
72
|
-
|
73
|
-
|
74
|
-
|
73
|
+
def parse_json_option(option_name, config, symbolize_names: true)
|
74
|
+
raise ArgumentError, "Must provide an option name in string format." unless option_name.is_a?(String)
|
75
|
+
raise ArgumentError, "Must provide an option name in string format." if option_name.strip.empty?
|
75
76
|
|
76
|
-
|
77
|
+
return {} if config.nil?
|
77
78
|
|
78
|
-
|
79
|
+
raise ArgumentError, "Must provide a JSON configuration in string format." unless config.is_a?(String)
|
79
80
|
|
80
|
-
|
81
|
+
return {} if config.strip.empty?
|
81
82
|
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
83
|
+
begin
|
84
|
+
JSON.parse(config, { symbolize_names: symbolize_names })
|
85
|
+
rescue StandardError
|
86
|
+
raise ArgumentError, "Option '#{option_name} did not contain valid JSON."
|
87
|
+
end
|
86
88
|
end
|
87
89
|
end
|
88
90
|
end
|
@@ -22,22 +22,39 @@ module HTMLProofer
|
|
22
22
|
end
|
23
23
|
|
24
24
|
def run_internal_link_checker(links)
|
25
|
+
# collect urls and metadata for hashes to be checked in the same target file
|
26
|
+
file_paths_hashes_to_check = {}
|
25
27
|
to_add = []
|
26
|
-
links.
|
28
|
+
links.each_with_index do |(link, matched_files), i|
|
29
|
+
matched_count_to_log = pluralize(matched_files.count, "reference", "references")
|
30
|
+
@logger.log(:debug, "(#{i + 1} / #{links.count}) Internal link #{link}: Checking #{matched_count_to_log}")
|
27
31
|
matched_files.each do |metadata|
|
28
32
|
url = HTMLProofer::Attribute::Url.new(@runner, link, base_url: metadata[:base_url])
|
29
33
|
|
30
34
|
@runner.current_source = metadata[:source]
|
31
35
|
@runner.current_filename = metadata[:filename]
|
32
36
|
|
33
|
-
|
37
|
+
target_file_path = url.absolute_path
|
38
|
+
unless file_exists?(target_file_path)
|
34
39
|
@failed_checks << Failure.new(@runner.current_filename, "Links > Internal",
|
35
40
|
"internally linking to #{url}, which does not exist", line: metadata[:line], status: nil, content: nil)
|
36
41
|
to_add << [url, metadata, false]
|
37
42
|
next
|
38
43
|
end
|
39
44
|
|
40
|
-
|
45
|
+
hash_exists = hash_exists_for_url?(url)
|
46
|
+
if hash_exists.nil?
|
47
|
+
# the hash needs to be checked in the target file, we collect the url and metadata
|
48
|
+
unless file_paths_hashes_to_check.key?(target_file_path)
|
49
|
+
file_paths_hashes_to_check[target_file_path] = {}
|
50
|
+
end
|
51
|
+
unless file_paths_hashes_to_check[target_file_path].key?(url.hash)
|
52
|
+
file_paths_hashes_to_check[target_file_path][url.hash] = []
|
53
|
+
end
|
54
|
+
file_paths_hashes_to_check[target_file_path][url.hash] << [url, metadata]
|
55
|
+
next
|
56
|
+
end
|
57
|
+
unless hash_exists
|
41
58
|
@failed_checks << Failure.new(@runner.current_filename, "Links > Internal",
|
42
59
|
"internally linking to #{url}; the file exists, but the hash '#{url.hash}' does not", line: metadata[:line], status: nil, content: nil)
|
43
60
|
to_add << [url, metadata, false]
|
@@ -48,6 +65,24 @@ module HTMLProofer
|
|
48
65
|
end
|
49
66
|
end
|
50
67
|
|
68
|
+
# check hashes by target file
|
69
|
+
@logger.log(:info, "Checking internal link hashes in #{pluralize(file_paths_hashes_to_check.count, "file", "files")}")
|
70
|
+
file_paths_hashes_to_check.each_with_index do |(file_path, hashes_to_check), i|
|
71
|
+
hash_count_to_log = pluralize(hashes_to_check.count, "hash", "hashes")
|
72
|
+
@logger.log(:debug, "(#{i + 1} / #{file_paths_hashes_to_check.count}) Checking #{hash_count_to_log} in #{file_path}")
|
73
|
+
html = create_nokogiri(file_path)
|
74
|
+
hashes_to_check.each_pair do |href_hash, url_metadata|
|
75
|
+
exists = hash_exists_in_html?(href_hash, html)
|
76
|
+
url_metadata.each do |(url, metadata)|
|
77
|
+
unless exists
|
78
|
+
@failed_checks << Failure.new(metadata[:filename], "Links > Internal",
|
79
|
+
"internally linking to #{url}; the file exists, but the hash '#{href_hash}' does not", line: metadata[:line], status: nil, content: nil)
|
80
|
+
end
|
81
|
+
to_add << [url, metadata, exists]
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
51
86
|
# adding directly to the cache above results in an endless loop
|
52
87
|
to_add.each do |(url, metadata, exists)|
|
53
88
|
@cache.add_internal(url.to_s, metadata, exists)
|
@@ -56,15 +91,15 @@ module HTMLProofer
|
|
56
91
|
@failed_checks
|
57
92
|
end
|
58
93
|
|
59
|
-
private def file_exists?(
|
60
|
-
absolute_path
|
61
|
-
return @runner.checked_paths[url.absolute_path] if @runner.checked_paths.key?(absolute_path)
|
94
|
+
private def file_exists?(absolute_path)
|
95
|
+
return @runner.checked_paths[absolute_path] if @runner.checked_paths.key?(absolute_path)
|
62
96
|
|
63
|
-
@runner.checked_paths[
|
97
|
+
@runner.checked_paths[absolute_path] = File.exist?(absolute_path)
|
64
98
|
end
|
65
99
|
|
66
|
-
# verify the target
|
67
|
-
|
100
|
+
# verify the hash w/o just based on the URL, w/o looking at the target file
|
101
|
+
# => returns nil if the has could not be verified
|
102
|
+
private def hash_exists_for_url?(url)
|
68
103
|
href_hash = url.hash
|
69
104
|
return true if blank?(href_hash)
|
70
105
|
return true unless @runner.options[:check_internal_hash]
|
@@ -76,10 +111,18 @@ module HTMLProofer
|
|
76
111
|
decoded_href_hash = Addressable::URI.unescape(href_hash)
|
77
112
|
fragment_ids = [href_hash, decoded_href_hash]
|
78
113
|
# https://www.w3.org/TR/html5/single-page.html#scroll-to-fragid
|
79
|
-
fragment_ids.include?("top")
|
114
|
+
return true if fragment_ids.include?("top")
|
115
|
+
|
116
|
+
nil
|
117
|
+
end
|
118
|
+
|
119
|
+
private def hash_exists_in_html?(href_hash, html)
|
120
|
+
decoded_href_hash = Addressable::URI.unescape(href_hash)
|
121
|
+
fragment_ids = [href_hash, decoded_href_hash]
|
122
|
+
!find_fragments(fragment_ids, html).empty?
|
80
123
|
end
|
81
124
|
|
82
|
-
private def find_fragments(fragment_ids,
|
125
|
+
private def find_fragments(fragment_ids, html)
|
83
126
|
xpaths = fragment_ids.uniq.flat_map do |frag_id|
|
84
127
|
escaped_frag_id = "'#{frag_id.split("'").join("', \"'\", '")}', ''"
|
85
128
|
[
|
@@ -89,7 +132,6 @@ module HTMLProofer
|
|
89
132
|
end
|
90
133
|
xpaths << XpathFunctions.new
|
91
134
|
|
92
|
-
html = create_nokogiri(url.absolute_path)
|
93
135
|
html.xpath(*xpaths)
|
94
136
|
end
|
95
137
|
end
|
data/lib/html_proofer/version.rb
CHANGED
data/lib/html_proofer.rb
CHANGED
@@ -4,7 +4,7 @@ require "zeitwerk"
|
|
4
4
|
lib_dir = File.join(File.dirname(__dir__), "lib")
|
5
5
|
gem_loader = Zeitwerk::Loader.for_gem
|
6
6
|
gem_loader.inflector.inflect(
|
7
|
-
"html_proofer" => "HTMLProofer"
|
7
|
+
"html_proofer" => "HTMLProofer",
|
8
8
|
)
|
9
9
|
gem_loader.ignore(File.join(lib_dir, "html-proofer.rb"))
|
10
10
|
gem_loader.setup
|
@@ -20,37 +20,39 @@ if ENV.fetch("DEBUG", false)
|
|
20
20
|
end
|
21
21
|
|
22
22
|
module HTMLProofer
|
23
|
-
|
24
|
-
|
25
|
-
|
23
|
+
class << self
|
24
|
+
def check_file(file, options = {})
|
25
|
+
raise ArgumentError unless file.is_a?(String)
|
26
|
+
raise ArgumentError, "#{file} does not exist" unless File.exist?(file)
|
26
27
|
|
27
|
-
|
28
|
-
|
29
|
-
|
28
|
+
options[:type] = :file
|
29
|
+
HTMLProofer::Runner.new(file, options)
|
30
|
+
end
|
30
31
|
|
31
|
-
|
32
|
-
|
33
|
-
|
32
|
+
def check_directory(directory, options = {})
|
33
|
+
raise ArgumentError unless directory.is_a?(String)
|
34
|
+
raise ArgumentError, "#{directory} does not exist" unless Dir.exist?(directory)
|
34
35
|
|
35
|
-
|
36
|
-
|
37
|
-
|
36
|
+
options[:type] = :directory
|
37
|
+
HTMLProofer::Runner.new([directory], options)
|
38
|
+
end
|
38
39
|
|
39
|
-
|
40
|
-
|
40
|
+
def check_directories(directories, options = {})
|
41
|
+
raise ArgumentError unless directories.is_a?(Array)
|
41
42
|
|
42
|
-
|
43
|
-
|
44
|
-
|
43
|
+
options[:type] = :directory
|
44
|
+
directories.each do |directory|
|
45
|
+
raise ArgumentError, "#{directory} does not exist" unless Dir.exist?(directory)
|
46
|
+
end
|
47
|
+
HTMLProofer::Runner.new(directories, options)
|
45
48
|
end
|
46
|
-
HTMLProofer::Runner.new(directories, options)
|
47
|
-
end
|
48
49
|
|
49
|
-
|
50
|
-
|
50
|
+
def check_links(links, options = {})
|
51
|
+
raise ArgumentError unless links.is_a?(Array)
|
51
52
|
|
52
|
-
|
53
|
-
|
53
|
+
options[:type] = :links
|
54
|
+
HTMLProofer::Runner.new(links, options)
|
55
|
+
end
|
54
56
|
end
|
55
57
|
end
|
56
58
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html-proofer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.
|
4
|
+
version: 4.4.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Garen Torikian
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-09-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|