html-proofer 3.15.3 → 3.17.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/htmlproofer +2 -2
- data/lib/html-proofer/cache.rb +9 -8
- data/lib/html-proofer/check.rb +15 -8
- data/lib/html-proofer/check/favicon.rb +1 -1
- data/lib/html-proofer/check/links.rb +48 -22
- data/lib/html-proofer/check/opengraph.rb +1 -1
- data/lib/html-proofer/configuration.rb +1 -1
- data/lib/html-proofer/element.rb +30 -29
- data/lib/html-proofer/middleware.rb +3 -2
- data/lib/html-proofer/runner.rb +74 -10
- data/lib/html-proofer/url_validator.rb +21 -25
- data/lib/html-proofer/utils.rb +1 -1
- data/lib/html-proofer/version.rb +1 -1
- metadata +7 -21
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 78c779b0dfc11fd1d80baaf4133ee26b695e13fcb20ed3852cb99a65caef7e24
|
4
|
+
data.tar.gz: 71e4f5e229d8754bc1ad01d4dc28d76ae43f0c5c2d0e3ff2871c32ae4ce89c42
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: aa82fcbba561ef4107dce9188d2c451ee8c40afbf5ff67dbd4f98729f91fec40ce3b58f2e13fd04a9e2f91258757f283240680cbf7b609e2c3b7000168b2fff6
|
7
|
+
data.tar.gz: 848b2d731f440c3128c77e4cdddcb54ffc18cf1fdd423afc63b9300ccf074ad457b0064b70aab59d950ea242ba3e01b2ff33f8e02491859c24f23d5895f6f705
|
data/bin/htmlproofer
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
# frozen_string_literal: true
|
3
3
|
|
4
|
-
|
4
|
+
$stdout.sync = true
|
5
5
|
|
6
6
|
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', 'lib')
|
7
7
|
|
@@ -50,7 +50,7 @@ Mercenary.program(:htmlproofer) do |p|
|
|
50
50
|
p.option 'typhoeus_config', '--typhoeus-config CONFIG', String, 'JSON-formatted string of Typhoeus config. Will override the html-proofer defaults.'
|
51
51
|
p.option 'url_ignore', '--url-ignore link1,[link2,...]', Array, 'A comma-separated list of Strings or RegExps containing URLs that are safe to ignore. It affects all HTML attributes. Note that non-HTTP(S) URIs are always ignored'
|
52
52
|
p.option 'url_swap', '--url-swap re:string,[re:string,...]', Array, 'A comma-separated list containing key-value pairs of `RegExp => String`. It transforms URLs that match `RegExp` into `String` via `gsub`. The escape sequences `\\:` should be used to produce literal `:`s.'
|
53
|
-
p.option 'root_dir', '--root-
|
53
|
+
p.option 'root_dir', '--root-dir PATH', String, 'The absolute path to the directory serving your html-files.'
|
54
54
|
|
55
55
|
p.action do |args, opts|
|
56
56
|
args = ['.'] if args.empty?
|
data/lib/html-proofer/cache.rb
CHANGED
@@ -59,6 +59,8 @@ module HTMLProofer
|
|
59
59
|
end
|
60
60
|
|
61
61
|
def add(url, filenames, status, msg = '')
|
62
|
+
return unless use_cache?
|
63
|
+
|
62
64
|
data = {
|
63
65
|
time: @cache_time,
|
64
66
|
filenames: filenames,
|
@@ -92,12 +94,12 @@ module HTMLProofer
|
|
92
94
|
del = 0
|
93
95
|
@cache_log.delete_if do |url, _|
|
94
96
|
url = clean_url(url)
|
95
|
-
if
|
97
|
+
if found_urls.include?(url)
|
98
|
+
false
|
99
|
+
else
|
96
100
|
@logger.log :debug, "Removing #{url} from cache check"
|
97
101
|
del += 1
|
98
102
|
true
|
99
|
-
else
|
100
|
-
false
|
101
103
|
end
|
102
104
|
end
|
103
105
|
|
@@ -115,12 +117,11 @@ module HTMLProofer
|
|
115
117
|
@load.nil?
|
116
118
|
end
|
117
119
|
|
118
|
-
def retrieve_urls(
|
119
|
-
urls_to_check = detect_url_changes(
|
120
|
+
def retrieve_urls(urls)
|
121
|
+
urls_to_check = detect_url_changes(urls)
|
120
122
|
@cache_log.each_pair do |url, cache|
|
121
|
-
if within_timeframe?(cache['time'])
|
122
|
-
|
123
|
-
end
|
123
|
+
next if within_timeframe?(cache['time']) && cache['message'].empty? # these were successes to skip
|
124
|
+
|
124
125
|
urls_to_check[url] = cache['filenames'] # recheck expired links
|
125
126
|
end
|
126
127
|
urls_to_check
|
data/lib/html-proofer/check.rb
CHANGED
@@ -3,15 +3,17 @@
|
|
3
3
|
module HTMLProofer
|
4
4
|
# Mostly handles issue management and collecting of external URLs.
|
5
5
|
class Check
|
6
|
-
attr_reader :node, :html, :element, :src, :path, :options, :issues, :external_urls
|
6
|
+
attr_reader :node, :html, :element, :src, :path, :options, :issues, :internal_urls, :external_urls
|
7
7
|
|
8
|
-
def initialize(src, path, html, logger, options)
|
8
|
+
def initialize(src, path, html, logger, cache, options)
|
9
9
|
@src = src
|
10
10
|
@path = path
|
11
11
|
@html = remove_ignored(html)
|
12
12
|
@logger = logger
|
13
|
+
@cache = cache
|
13
14
|
@options = options
|
14
15
|
@issues = []
|
16
|
+
@internal_urls = {}
|
15
17
|
@external_urls = {}
|
16
18
|
end
|
17
19
|
|
@@ -24,17 +26,22 @@ module HTMLProofer
|
|
24
26
|
raise NotImplementedError, 'HTMLProofer::Check subclasses must implement #run'
|
25
27
|
end
|
26
28
|
|
27
|
-
def add_issue(desc, line: nil, status: -1, content: nil)
|
28
|
-
@issues << Issue.new(@path, desc, line: line, status: status, content: content)
|
29
|
+
def add_issue(desc, line: nil, path: nil, status: -1, content: nil)
|
30
|
+
@issues << Issue.new(path || @path, desc, line: line, status: status, content: content)
|
31
|
+
false
|
32
|
+
end
|
33
|
+
|
34
|
+
def add_to_internal_urls(url, internal_url)
|
35
|
+
if @internal_urls[url]
|
36
|
+
@internal_urls[url] << internal_url
|
37
|
+
else
|
38
|
+
@internal_urls[url] = [internal_url]
|
39
|
+
end
|
29
40
|
end
|
30
41
|
|
31
42
|
def add_to_external_urls(url)
|
32
43
|
return if @external_urls[url]
|
33
44
|
|
34
|
-
add_path_for_url(url)
|
35
|
-
end
|
36
|
-
|
37
|
-
def add_path_for_url(url)
|
38
45
|
if @external_urls[url]
|
39
46
|
@external_urls[url] << @path
|
40
47
|
else
|
@@ -4,6 +4,8 @@ class LinkCheck < ::HTMLProofer::Check
|
|
4
4
|
include HTMLProofer::Utils
|
5
5
|
|
6
6
|
def missing_href?
|
7
|
+
return blank?(@link.src) if @node.name == 'source'
|
8
|
+
|
7
9
|
blank?(@link.href) && blank?(@link.name) && blank?(@link.id)
|
8
10
|
end
|
9
11
|
|
@@ -12,7 +14,7 @@ class LinkCheck < ::HTMLProofer::Check
|
|
12
14
|
end
|
13
15
|
|
14
16
|
def run
|
15
|
-
@html.css('a, link').each do |node|
|
17
|
+
@html.css('a, link, source').each do |node|
|
16
18
|
@link = create_element(node)
|
17
19
|
line = node.line
|
18
20
|
content = node.to_s
|
@@ -49,23 +51,31 @@ class LinkCheck < ::HTMLProofer::Check
|
|
49
51
|
# curl/Typheous inaccurately return 404s for some links. cc https://git.io/vyCFx
|
50
52
|
next if @link.respond_to?(:rel) && @link.rel == 'dns-prefetch'
|
51
53
|
|
52
|
-
add_to_external_urls(@link.href)
|
54
|
+
add_to_external_urls(@link.href || @link.src)
|
53
55
|
next
|
54
|
-
elsif @link.internal?
|
55
|
-
|
56
|
+
elsif @link.internal?
|
57
|
+
if @link.exists?
|
58
|
+
add_to_internal_urls(@link.href, InternalLink.new(@link, @path, line, content))
|
59
|
+
else
|
60
|
+
add_issue("internally linking to #{@link.href}, which does not exist", line: line, content: content)
|
61
|
+
end
|
56
62
|
end
|
63
|
+
end
|
57
64
|
|
58
|
-
|
59
|
-
|
60
|
-
add_issue("internally linking to a directory #{@link.absolute_path} without trailing slash", line: line, content: content)
|
61
|
-
next
|
62
|
-
end
|
65
|
+
external_urls
|
66
|
+
end
|
63
67
|
|
64
|
-
|
65
|
-
|
68
|
+
def check_internal_link(link, path, line, content)
|
69
|
+
# does the local directory have a trailing slash?
|
70
|
+
if link.unslashed_directory?(link.absolute_path)
|
71
|
+
add_issue("internally linking to a directory #{link.absolute_path} without trailing slash", path: path, line: line, content: content)
|
72
|
+
return false
|
66
73
|
end
|
67
74
|
|
68
|
-
|
75
|
+
# verify the target hash
|
76
|
+
return handle_hash(link, path, line, content) if link.hash
|
77
|
+
|
78
|
+
true
|
69
79
|
end
|
70
80
|
|
71
81
|
def check_schemes(link, line, content)
|
@@ -93,24 +103,28 @@ class LinkCheck < ::HTMLProofer::Check
|
|
93
103
|
add_issue("#{link.href} contains no phone number", line: line, content: content) if link.path.empty?
|
94
104
|
end
|
95
105
|
|
96
|
-
def handle_hash(link, line, content)
|
97
|
-
if link.internal? && !
|
98
|
-
add_issue("linking to internal hash ##{link.hash} that does not exist", line: line, content: content)
|
106
|
+
def handle_hash(link, path, line, content)
|
107
|
+
if link.internal? && !hash_exists?(link.html, link.hash) # rubocop:disable Style/GuardClause
|
108
|
+
return add_issue("linking to internal hash ##{link.hash} that does not exist", path: path, line: line, content: content)
|
99
109
|
elsif link.external?
|
100
|
-
external_link_check(link, line, content)
|
110
|
+
return external_link_check(link, line, content)
|
101
111
|
end
|
112
|
+
|
113
|
+
true
|
102
114
|
end
|
103
115
|
|
104
116
|
def external_link_check(link, line, content)
|
105
|
-
if
|
106
|
-
|
117
|
+
if link.exists? # rubocop:disable Style/GuardClause
|
118
|
+
target_html = create_nokogiri(link.absolute_path)
|
119
|
+
return add_issue("linking to #{link.href}, but #{link.hash} does not exist", line: line, content: content) unless hash_exists?(target_html, link.hash)
|
107
120
|
else
|
108
|
-
|
109
|
-
add_issue("linking to #{link.href}, but #{link.hash} does not exist", line: line, content: content) unless hash_check target_html, link.hash
|
121
|
+
return add_issue("trying to find hash of #{link.href}, but #{link.absolute_path} does not exist", line: line, content: content)
|
110
122
|
end
|
123
|
+
|
124
|
+
true
|
111
125
|
end
|
112
126
|
|
113
|
-
def
|
127
|
+
def hash_exists?(html, href_hash)
|
114
128
|
decoded_href_hash = Addressable::URI.unescape(href_hash)
|
115
129
|
fragment_ids = [href_hash, decoded_href_hash]
|
116
130
|
# https://www.w3.org/TR/html5/single-page.html#scroll-to-fragid
|
@@ -148,7 +162,19 @@ class LinkCheck < ::HTMLProofer::Check
|
|
148
162
|
|
149
163
|
class XpathFunctions
|
150
164
|
def case_sensitive_equals(node_set, str_to_match)
|
151
|
-
node_set.find_all { |node| node.to_s
|
165
|
+
node_set.find_all { |node| node.to_s.== str_to_match.to_s }
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
class InternalLink
|
170
|
+
attr_reader :link, :href, :path, :line, :content
|
171
|
+
|
172
|
+
def initialize(link, path, line, content)
|
173
|
+
@link = link
|
174
|
+
@href = @link.href
|
175
|
+
@path = path
|
176
|
+
@line = line
|
177
|
+
@content = content
|
152
178
|
end
|
153
179
|
end
|
154
180
|
end
|
data/lib/html-proofer/element.rb
CHANGED
@@ -15,7 +15,7 @@ module HTMLProofer
|
|
15
15
|
# Construct readable ivars for every element
|
16
16
|
begin
|
17
17
|
obj.attributes.each_pair do |attribute, value|
|
18
|
-
name = attribute.tr('
|
18
|
+
name = attribute.tr('-:.;@', '_').to_s.to_sym
|
19
19
|
(class << self; self; end).send(:attr_reader, name)
|
20
20
|
instance_variable_set("@#{name}", value.value)
|
21
21
|
end
|
@@ -42,19 +42,19 @@ module HTMLProofer
|
|
42
42
|
|
43
43
|
# fix up missing protocols
|
44
44
|
if defined?(@href)
|
45
|
-
@href.insert(0, 'http:') if
|
45
|
+
@href.insert(0, 'http:') if %r{^//}.match?(@href)
|
46
46
|
else
|
47
47
|
@href = nil
|
48
48
|
end
|
49
49
|
|
50
50
|
if defined?(@src)
|
51
|
-
@src.insert(0, 'http:') if
|
51
|
+
@src.insert(0, 'http:') if %r{^//}.match?(@src)
|
52
52
|
else
|
53
53
|
@src = nil
|
54
54
|
end
|
55
55
|
|
56
56
|
if defined?(@srcset)
|
57
|
-
@srcset.insert(0, 'http:') if
|
57
|
+
@srcset.insert(0, 'http:') if %r{^//}.match?(@srcset)
|
58
58
|
else
|
59
59
|
@srcset = nil
|
60
60
|
end
|
@@ -105,12 +105,10 @@ module HTMLProofer
|
|
105
105
|
return true if @data_proofer_ignore
|
106
106
|
return true if @parent_ignorable
|
107
107
|
|
108
|
-
return true if
|
108
|
+
return true if /^javascript:/.match?(url)
|
109
109
|
|
110
110
|
# ignore base64 encoded images
|
111
|
-
if %w[ImageCheck FaviconCheck].include?
|
112
|
-
return true if url =~ /^data:image/
|
113
|
-
end
|
111
|
+
return true if %w[ImageCheck FaviconCheck].include?(@type) && /^data:image/.match?(url)
|
114
112
|
|
115
113
|
# ignore user defined URLs
|
116
114
|
return true if ignores_pattern_check(@check.options[:url_ignore])
|
@@ -171,6 +169,10 @@ module HTMLProofer
|
|
171
169
|
url.start_with?('?')
|
172
170
|
end
|
173
171
|
|
172
|
+
def absolute_path?(path)
|
173
|
+
path.start_with?('/')
|
174
|
+
end
|
175
|
+
|
174
176
|
def file_path
|
175
177
|
return if path.nil? || path.empty?
|
176
178
|
|
@@ -178,22 +180,19 @@ module HTMLProofer
|
|
178
180
|
|
179
181
|
path_dot_ext = path + @check.options[:extension] if @check.options[:assume_extension]
|
180
182
|
|
181
|
-
if path
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
base = @check.path
|
194
|
-
end
|
183
|
+
base = if absolute_path?(path) # path relative to root
|
184
|
+
# either overwrite with root_dir; or, if source is directory, use that; or, just get the current file's dirname
|
185
|
+
@check.options[:root_dir] || (File.directory?(@check.src) ? @check.src : File.dirname(@check.src))
|
186
|
+
elsif File.exist?(File.expand_path(path, @check.src)) || File.exist?(File.expand_path(path_dot_ext, @check.src)) # relative links, path is a file
|
187
|
+
File.dirname(@check.path)
|
188
|
+
elsif File.exist?(File.join(File.dirname(@check.path), path)) || File.exist?(File.join(File.dirname(@check.path), path_dot_ext)) # rubocop:disable Lint/DuplicateBranch; relative links in nested dir, path is a file
|
189
|
+
File.dirname(@check.path)
|
190
|
+
else # relative link, path is a directory
|
191
|
+
@check.path
|
192
|
+
end
|
193
|
+
|
194
|
+
file = File.join(base, path)
|
195
195
|
|
196
|
-
file = File.join base, path
|
197
196
|
if @check.options[:assume_extension] && File.file?("#{file}#{@check.options[:extension]}")
|
198
197
|
file = "#{file}#{@check.options[:extension]}"
|
199
198
|
elsif File.directory?(file) && !unslashed_directory?(file) # implicit index support
|
@@ -205,22 +204,24 @@ module HTMLProofer
|
|
205
204
|
|
206
205
|
# checks if a file exists relative to the current pwd
|
207
206
|
def exists?
|
208
|
-
return @checked_paths[absolute_path] if @checked_paths.key?
|
207
|
+
return @checked_paths[absolute_path] if @checked_paths.key?(absolute_path)
|
209
208
|
|
210
|
-
@checked_paths[absolute_path] = File.exist?
|
209
|
+
@checked_paths[absolute_path] = File.exist?(absolute_path)
|
211
210
|
end
|
212
211
|
|
213
212
|
def absolute_path
|
214
213
|
path = file_path || @check.path
|
215
|
-
|
214
|
+
|
215
|
+
File.expand_path(path, Dir.pwd)
|
216
216
|
end
|
217
217
|
|
218
218
|
def ignores_pattern_check(links)
|
219
219
|
links.each do |ignore|
|
220
|
-
|
220
|
+
case ignore
|
221
|
+
when String
|
221
222
|
return true if ignore == url
|
222
|
-
|
223
|
-
return true if ignore
|
223
|
+
when Regexp
|
224
|
+
return true if ignore&.match?(url)
|
224
225
|
end
|
225
226
|
end
|
226
227
|
|
@@ -6,6 +6,7 @@ module HTMLProofer
|
|
6
6
|
|
7
7
|
class InvalidHtmlError < StandardError
|
8
8
|
def initialize(failures)
|
9
|
+
super
|
9
10
|
@failures = failures
|
10
11
|
end
|
11
12
|
|
@@ -53,13 +54,13 @@ module HTMLProofer
|
|
53
54
|
def call(env)
|
54
55
|
result = @app.call(env)
|
55
56
|
return result if env['REQUEST_METHOD'] != 'GET'
|
56
|
-
return result if env['QUERY_STRING']
|
57
|
+
return result if /proofer-ignore/.match?(env['QUERY_STRING'])
|
57
58
|
return result if result.first != 200
|
58
59
|
|
59
60
|
body = []
|
60
61
|
result.last.each { |e| body << e }
|
61
62
|
|
62
|
-
body = body.join
|
63
|
+
body = body.join
|
63
64
|
begin
|
64
65
|
html = body.lstrip
|
65
66
|
rescue StandardError
|
data/lib/html-proofer/runner.rb
CHANGED
@@ -4,7 +4,7 @@ module HTMLProofer
|
|
4
4
|
class Runner
|
5
5
|
include HTMLProofer::Utils
|
6
6
|
|
7
|
-
attr_reader :options, :external_urls, :failures
|
7
|
+
attr_reader :options, :internal_urls, :external_urls, :failures
|
8
8
|
|
9
9
|
def initialize(src, opts = {})
|
10
10
|
@src = src
|
@@ -20,6 +20,8 @@ module HTMLProofer
|
|
20
20
|
|
21
21
|
@type = @options.delete(:type)
|
22
22
|
@logger = HTMLProofer::Log.new(@options[:log_level])
|
23
|
+
@cache = Cache.new(@logger, @options[:cache])
|
24
|
+
@internal_link_checks = nil
|
23
25
|
|
24
26
|
# Add swap patterns for internal domains
|
25
27
|
unless @options[:internal_domains].empty?
|
@@ -30,7 +32,11 @@ module HTMLProofer
|
|
30
32
|
end
|
31
33
|
end
|
32
34
|
|
35
|
+
@internal_urls = {}
|
36
|
+
@internal_urls_to_paths = {}
|
37
|
+
@external_urls = {}
|
33
38
|
@failures = []
|
39
|
+
@before_request = []
|
34
40
|
end
|
35
41
|
|
36
42
|
def run
|
@@ -58,15 +64,13 @@ module HTMLProofer
|
|
58
64
|
end
|
59
65
|
end
|
60
66
|
@external_urls = Hash[*@src.map { |s| [s, nil] }.flatten]
|
61
|
-
|
67
|
+
validate_external_urls
|
62
68
|
end
|
63
69
|
|
64
70
|
# Collects any external URLs found in a directory of files. Also collectes
|
65
71
|
# every failed test from process_files.
|
66
72
|
# Sends the external URLs to Typhoeus for batch processing.
|
67
73
|
def check_files
|
68
|
-
@external_urls = {}
|
69
|
-
|
70
74
|
process_files.each do |item|
|
71
75
|
@external_urls.merge!(item[:external_urls])
|
72
76
|
@failures.concat(item[:failures])
|
@@ -77,9 +81,12 @@ module HTMLProofer
|
|
77
81
|
# just not run those other checks at all.
|
78
82
|
if @options[:external_only]
|
79
83
|
@failures = []
|
80
|
-
|
84
|
+
validate_external_urls
|
81
85
|
elsif !@options[:disable_external]
|
82
|
-
|
86
|
+
validate_external_urls
|
87
|
+
validate_internal_urls
|
88
|
+
else
|
89
|
+
validate_internal_urls
|
83
90
|
end
|
84
91
|
end
|
85
92
|
|
@@ -100,8 +107,21 @@ module HTMLProofer
|
|
100
107
|
@src.each do |src|
|
101
108
|
checks.each do |klass|
|
102
109
|
@logger.log :debug, "Checking #{klass.to_s.downcase} on #{path} ..."
|
103
|
-
check = Object.const_get(klass).new(src, path, html, @logger, @options)
|
110
|
+
check = Object.const_get(klass).new(src, path, html, @logger, @cache, @options)
|
104
111
|
check.run
|
112
|
+
|
113
|
+
if klass == 'LinkCheck'
|
114
|
+
@internal_link_checks = check
|
115
|
+
check.internal_urls.each_pair do |url, internal_urls|
|
116
|
+
if @internal_urls_to_paths[url]
|
117
|
+
@internal_urls_to_paths[url].concat(internal_urls.map(&:path))
|
118
|
+
else
|
119
|
+
@internal_urls_to_paths[url] = internal_urls.map(&:path)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
@internal_urls.merge!(check.internal_urls)
|
123
|
+
end
|
124
|
+
|
105
125
|
external_urls = check.external_urls
|
106
126
|
external_urls = Hash[check.external_urls.map { |url, file| [swap(url, @options[:url_swap]), file] }] if @options[:url_swap]
|
107
127
|
result[:external_urls].merge!(external_urls)
|
@@ -112,15 +132,35 @@ module HTMLProofer
|
|
112
132
|
end
|
113
133
|
|
114
134
|
def check_path(path)
|
115
|
-
check_parsed
|
135
|
+
check_parsed(create_nokogiri(path), path)
|
116
136
|
end
|
117
137
|
|
118
|
-
def
|
119
|
-
url_validator = HTMLProofer::UrlValidator.new(@logger, @external_urls, @options)
|
138
|
+
def validate_external_urls
|
139
|
+
url_validator = HTMLProofer::UrlValidator.new(@logger, @cache, @external_urls, @options)
|
140
|
+
url_validator.before_request = @before_request
|
120
141
|
@failures.concat(url_validator.run)
|
121
142
|
@external_urls = url_validator.external_urls
|
122
143
|
end
|
123
144
|
|
145
|
+
def validate_internal_urls
|
146
|
+
if @cache.use_cache?
|
147
|
+
urls_to_check = load_internal_cache
|
148
|
+
|
149
|
+
urls_to_check.each_pair do |url, internal_urls|
|
150
|
+
result = @internal_link_checks.check_internal_link(internal_urls.first.link, internal_urls.first.path, internal_urls.first.line, internal_urls.first.content)
|
151
|
+
code = result ? 200 : 404
|
152
|
+
@cache.add(url, @internal_urls_to_paths[url].sort, code, '') # TODO: blank msg for now
|
153
|
+
end
|
154
|
+
@cache.write
|
155
|
+
else
|
156
|
+
@internal_urls.values.flatten.each do |internal_url|
|
157
|
+
@internal_link_checks.check_internal_link(internal_url.link, internal_url.path, internal_url.line, internal_url.content)
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
@failures.concat(@internal_link_checks.issues) unless @internal_urls.length.zero?
|
162
|
+
end
|
163
|
+
|
124
164
|
def files
|
125
165
|
@files ||= if @type == :directory
|
126
166
|
@src.map do |src|
|
@@ -173,5 +213,29 @@ module HTMLProofer
|
|
173
213
|
failure_text = pluralize(count, 'failure', 'failures')
|
174
214
|
raise @logger.colorize :fatal, "HTML-Proofer found #{failure_text}!"
|
175
215
|
end
|
216
|
+
|
217
|
+
# Set before_request callback.
|
218
|
+
#
|
219
|
+
# @example Set before_request.
|
220
|
+
# request.before_request { |request| p "yay" }
|
221
|
+
#
|
222
|
+
# @param [ Block ] block The block to execute.
|
223
|
+
#
|
224
|
+
# @yield [ Typhoeus::Request ]
|
225
|
+
#
|
226
|
+
# @return [ Array<Block> ] All before_request blocks.
|
227
|
+
def before_request(&block)
|
228
|
+
@before_request ||= []
|
229
|
+
@before_request << block if block
|
230
|
+
@before_request
|
231
|
+
end
|
232
|
+
|
233
|
+
def load_internal_cache
|
234
|
+
urls_to_check = @cache.retrieve_urls(@internal_urls)
|
235
|
+
cache_text = pluralize(urls_to_check.count, 'internal link', 'internal links')
|
236
|
+
@logger.log :info, "Found #{cache_text} in the cache..."
|
237
|
+
|
238
|
+
urls_to_check
|
239
|
+
end
|
176
240
|
end
|
177
241
|
end
|
@@ -10,21 +10,23 @@ module HTMLProofer
|
|
10
10
|
include HTMLProofer::Utils
|
11
11
|
|
12
12
|
attr_reader :external_urls
|
13
|
+
attr_writer :before_request
|
13
14
|
|
14
|
-
def initialize(logger, external_urls, options)
|
15
|
+
def initialize(logger, cache, external_urls, options)
|
15
16
|
@logger = logger
|
16
17
|
@external_urls = external_urls
|
17
18
|
@failed_tests = []
|
18
19
|
@options = options
|
19
20
|
@hydra = Typhoeus::Hydra.new(@options[:hydra])
|
20
|
-
@cache =
|
21
|
+
@cache = cache
|
22
|
+
@before_request = []
|
21
23
|
end
|
22
24
|
|
23
25
|
def run
|
24
26
|
@external_urls = remove_query_values
|
25
27
|
|
26
28
|
if @cache.use_cache?
|
27
|
-
urls_to_check =
|
29
|
+
urls_to_check = @cache.retrieve_urls(@external_urls)
|
28
30
|
external_link_checker(urls_to_check)
|
29
31
|
@cache.write
|
30
32
|
else
|
@@ -41,11 +43,11 @@ module HTMLProofer
|
|
41
43
|
iterable_external_urls = @external_urls.dup
|
42
44
|
@external_urls.each_key do |url|
|
43
45
|
uri = begin
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
46
|
+
Addressable::URI.parse(url)
|
47
|
+
rescue URI::Error, Addressable::URI::InvalidURIError
|
48
|
+
@logger.log :error, "#{url} is an invalid URL"
|
49
|
+
nil
|
50
|
+
end
|
49
51
|
next if uri.nil? || uri.query.nil?
|
50
52
|
|
51
53
|
iterable_external_urls.delete(url) unless new_url_query_values?(uri, paths_with_queries)
|
@@ -72,15 +74,6 @@ module HTMLProofer
|
|
72
74
|
uri.host + uri.path
|
73
75
|
end
|
74
76
|
|
75
|
-
def load_cache
|
76
|
-
cache_count = @cache.size
|
77
|
-
cache_text = pluralize(cache_count, 'link', 'links')
|
78
|
-
|
79
|
-
@logger.log :info, "Found #{cache_text} in the cache..."
|
80
|
-
|
81
|
-
@cache.retrieve_urls(@external_urls)
|
82
|
-
end
|
83
|
-
|
84
77
|
# Proofer runs faster if we pull out all the external URLs and run the checks
|
85
78
|
# at the end. Otherwise, we're halting the consuming process for every file during
|
86
79
|
# `process_files`.
|
@@ -109,11 +102,11 @@ module HTMLProofer
|
|
109
102
|
def establish_queue(external_urls)
|
110
103
|
external_urls.each_pair do |url, filenames|
|
111
104
|
url = begin
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
105
|
+
clean_url(url)
|
106
|
+
rescue URI::Error, Addressable::URI::InvalidURIError
|
107
|
+
add_external_issue(filenames, "#{url} is an invalid URL")
|
108
|
+
next
|
109
|
+
end
|
117
110
|
|
118
111
|
method = if hash?(url) && @options[:check_external_hash]
|
119
112
|
:get
|
@@ -127,16 +120,19 @@ module HTMLProofer
|
|
127
120
|
def clean_url(href)
|
128
121
|
# catch any obvious issues, like strings in port numbers
|
129
122
|
parsed = Addressable::URI.parse(href)
|
130
|
-
if href
|
131
|
-
parsed.normalize
|
132
|
-
else
|
123
|
+
if href =~ /^([!#{$&}-;=?-\[\]_a-z~]|%[0-9a-fA-F]{2})+$/
|
133
124
|
href
|
125
|
+
else
|
126
|
+
parsed.normalize
|
134
127
|
end
|
135
128
|
end
|
136
129
|
|
137
130
|
def queue_request(method, href, filenames)
|
138
131
|
opts = @options[:typhoeus].merge(method: method)
|
139
132
|
request = Typhoeus::Request.new(href, opts)
|
133
|
+
@before_request.each do |callback|
|
134
|
+
callback.call(request)
|
135
|
+
end
|
140
136
|
request.on_complete { |response| response_handler(response, filenames) }
|
141
137
|
@hydra.queue request
|
142
138
|
end
|
data/lib/html-proofer/utils.rb
CHANGED
data/lib/html-proofer/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html-proofer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.17.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Garen Torikian
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-11-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|
@@ -122,20 +122,6 @@ dependencies:
|
|
122
122
|
- - ">="
|
123
123
|
- !ruby/object:Gem::Version
|
124
124
|
version: '0'
|
125
|
-
- !ruby/object:Gem::Dependency
|
126
|
-
name: codecov
|
127
|
-
requirement: !ruby/object:Gem::Requirement
|
128
|
-
requirements:
|
129
|
-
- - ">="
|
130
|
-
- !ruby/object:Gem::Version
|
131
|
-
version: '0'
|
132
|
-
type: :development
|
133
|
-
prerelease: false
|
134
|
-
version_requirements: !ruby/object:Gem::Requirement
|
135
|
-
requirements:
|
136
|
-
- - ">="
|
137
|
-
- !ruby/object:Gem::Version
|
138
|
-
version: '0'
|
139
125
|
- !ruby/object:Gem::Dependency
|
140
126
|
name: pry-byebug
|
141
127
|
requirement: !ruby/object:Gem::Requirement
|
@@ -293,15 +279,15 @@ homepage: https://github.com/gjtorikian/html-proofer
|
|
293
279
|
licenses:
|
294
280
|
- MIT
|
295
281
|
metadata: {}
|
296
|
-
post_install_message:
|
282
|
+
post_install_message:
|
297
283
|
rdoc_options: []
|
298
284
|
require_paths:
|
299
285
|
- lib
|
300
286
|
required_ruby_version: !ruby/object:Gem::Requirement
|
301
287
|
requirements:
|
302
|
-
- - "
|
288
|
+
- - "~>"
|
303
289
|
- !ruby/object:Gem::Version
|
304
|
-
version: '
|
290
|
+
version: '2.4'
|
305
291
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
306
292
|
requirements:
|
307
293
|
- - ">="
|
@@ -309,7 +295,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
309
295
|
version: '0'
|
310
296
|
requirements: []
|
311
297
|
rubygems_version: 3.1.2
|
312
|
-
signing_key:
|
298
|
+
signing_key:
|
313
299
|
specification_version: 4
|
314
300
|
summary: A set of tests to validate your HTML output. These tests check if your image
|
315
301
|
references are legitimate, if they have alt tags, if your internal links are working,
|