html-proofer 3.16.0 → 3.17.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/htmlproofer +2 -2
- data/lib/html-proofer/cache.rb +9 -8
- data/lib/html-proofer/check.rb +15 -8
- data/lib/html-proofer/check/favicon.rb +1 -1
- data/lib/html-proofer/check/links.rb +48 -22
- data/lib/html-proofer/configuration.rb +1 -1
- data/lib/html-proofer/element.rb +21 -21
- data/lib/html-proofer/middleware.rb +2 -1
- data/lib/html-proofer/runner.rb +60 -11
- data/lib/html-proofer/url_validator.rb +16 -25
- data/lib/html-proofer/utils.rb +1 -1
- data/lib/html-proofer/version.rb +1 -1
- metadata +7 -21
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: fdcf7fa13f788c5839a1878a8a261d812fc399b4552362db458fc2d7e72c795c
|
|
4
|
+
data.tar.gz: 9996e3534390ab7796a59cfe8afcf06eb10025f2ac9c050bf80394c6f2d76494
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 5f78ec4bd00a705dc332a14837f5e3f4b4b16bddafe48d20a24db3281f5d142f456690c9486eedcac4828fc3639fa46130079c490209942172c24b2012739c42
|
|
7
|
+
data.tar.gz: 602aff4a287ef6d40d72da5497d9ba5d7c01c13d45157d5d29215e35d0fcde2db244c9e4da9206452425a6b03fa437dc6c483651f12be80209cda5ec0e54050a
|
data/bin/htmlproofer
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env ruby
|
|
2
2
|
# frozen_string_literal: true
|
|
3
3
|
|
|
4
|
-
|
|
4
|
+
$stdout.sync = true
|
|
5
5
|
|
|
6
6
|
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', 'lib')
|
|
7
7
|
|
|
@@ -50,7 +50,7 @@ Mercenary.program(:htmlproofer) do |p|
|
|
|
50
50
|
p.option 'typhoeus_config', '--typhoeus-config CONFIG', String, 'JSON-formatted string of Typhoeus config. Will override the html-proofer defaults.'
|
|
51
51
|
p.option 'url_ignore', '--url-ignore link1,[link2,...]', Array, 'A comma-separated list of Strings or RegExps containing URLs that are safe to ignore. It affects all HTML attributes. Note that non-HTTP(S) URIs are always ignored'
|
|
52
52
|
p.option 'url_swap', '--url-swap re:string,[re:string,...]', Array, 'A comma-separated list containing key-value pairs of `RegExp => String`. It transforms URLs that match `RegExp` into `String` via `gsub`. The escape sequences `\\:` should be used to produce literal `:`s.'
|
|
53
|
-
p.option 'root_dir', '--root-
|
|
53
|
+
p.option 'root_dir', '--root-dir PATH', String, 'The absolute path to the directory serving your html-files.'
|
|
54
54
|
|
|
55
55
|
p.action do |args, opts|
|
|
56
56
|
args = ['.'] if args.empty?
|
data/lib/html-proofer/cache.rb
CHANGED
|
@@ -59,6 +59,8 @@ module HTMLProofer
|
|
|
59
59
|
end
|
|
60
60
|
|
|
61
61
|
def add(url, filenames, status, msg = '')
|
|
62
|
+
return unless use_cache?
|
|
63
|
+
|
|
62
64
|
data = {
|
|
63
65
|
time: @cache_time,
|
|
64
66
|
filenames: filenames,
|
|
@@ -92,12 +94,12 @@ module HTMLProofer
|
|
|
92
94
|
del = 0
|
|
93
95
|
@cache_log.delete_if do |url, _|
|
|
94
96
|
url = clean_url(url)
|
|
95
|
-
if
|
|
97
|
+
if found_urls.include?(url)
|
|
98
|
+
false
|
|
99
|
+
else
|
|
96
100
|
@logger.log :debug, "Removing #{url} from cache check"
|
|
97
101
|
del += 1
|
|
98
102
|
true
|
|
99
|
-
else
|
|
100
|
-
false
|
|
101
103
|
end
|
|
102
104
|
end
|
|
103
105
|
|
|
@@ -115,12 +117,11 @@ module HTMLProofer
|
|
|
115
117
|
@load.nil?
|
|
116
118
|
end
|
|
117
119
|
|
|
118
|
-
def retrieve_urls(
|
|
119
|
-
urls_to_check = detect_url_changes(
|
|
120
|
+
def retrieve_urls(urls)
|
|
121
|
+
urls_to_check = detect_url_changes(urls)
|
|
120
122
|
@cache_log.each_pair do |url, cache|
|
|
121
|
-
if within_timeframe?(cache['time'])
|
|
122
|
-
|
|
123
|
-
end
|
|
123
|
+
next if within_timeframe?(cache['time']) && cache['message'].empty? # these were successes to skip
|
|
124
|
+
|
|
124
125
|
urls_to_check[url] = cache['filenames'] # recheck expired links
|
|
125
126
|
end
|
|
126
127
|
urls_to_check
|
data/lib/html-proofer/check.rb
CHANGED
|
@@ -3,15 +3,17 @@
|
|
|
3
3
|
module HTMLProofer
|
|
4
4
|
# Mostly handles issue management and collecting of external URLs.
|
|
5
5
|
class Check
|
|
6
|
-
attr_reader :node, :html, :element, :src, :path, :options, :issues, :external_urls
|
|
6
|
+
attr_reader :node, :html, :element, :src, :path, :options, :issues, :internal_urls, :external_urls
|
|
7
7
|
|
|
8
|
-
def initialize(src, path, html, logger, options)
|
|
8
|
+
def initialize(src, path, html, logger, cache, options)
|
|
9
9
|
@src = src
|
|
10
10
|
@path = path
|
|
11
11
|
@html = remove_ignored(html)
|
|
12
12
|
@logger = logger
|
|
13
|
+
@cache = cache
|
|
13
14
|
@options = options
|
|
14
15
|
@issues = []
|
|
16
|
+
@internal_urls = {}
|
|
15
17
|
@external_urls = {}
|
|
16
18
|
end
|
|
17
19
|
|
|
@@ -24,17 +26,22 @@ module HTMLProofer
|
|
|
24
26
|
raise NotImplementedError, 'HTMLProofer::Check subclasses must implement #run'
|
|
25
27
|
end
|
|
26
28
|
|
|
27
|
-
def add_issue(desc, line: nil, status: -1, content: nil)
|
|
28
|
-
@issues << Issue.new(@path, desc, line: line, status: status, content: content)
|
|
29
|
+
def add_issue(desc, line: nil, path: nil, status: -1, content: nil)
|
|
30
|
+
@issues << Issue.new(path || @path, desc, line: line, status: status, content: content)
|
|
31
|
+
false
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def add_to_internal_urls(url, internal_url)
|
|
35
|
+
if @internal_urls[url]
|
|
36
|
+
@internal_urls[url] << internal_url
|
|
37
|
+
else
|
|
38
|
+
@internal_urls[url] = [internal_url]
|
|
39
|
+
end
|
|
29
40
|
end
|
|
30
41
|
|
|
31
42
|
def add_to_external_urls(url)
|
|
32
43
|
return if @external_urls[url]
|
|
33
44
|
|
|
34
|
-
add_path_for_url(url)
|
|
35
|
-
end
|
|
36
|
-
|
|
37
|
-
def add_path_for_url(url)
|
|
38
45
|
if @external_urls[url]
|
|
39
46
|
@external_urls[url] << @path
|
|
40
47
|
else
|
|
@@ -4,6 +4,8 @@ class LinkCheck < ::HTMLProofer::Check
|
|
|
4
4
|
include HTMLProofer::Utils
|
|
5
5
|
|
|
6
6
|
def missing_href?
|
|
7
|
+
return blank?(@link.src) if @node.name == 'source'
|
|
8
|
+
|
|
7
9
|
blank?(@link.href) && blank?(@link.name) && blank?(@link.id)
|
|
8
10
|
end
|
|
9
11
|
|
|
@@ -12,7 +14,7 @@ class LinkCheck < ::HTMLProofer::Check
|
|
|
12
14
|
end
|
|
13
15
|
|
|
14
16
|
def run
|
|
15
|
-
@html.css('a, link').each do |node|
|
|
17
|
+
@html.css('a, link, source').each do |node|
|
|
16
18
|
@link = create_element(node)
|
|
17
19
|
line = node.line
|
|
18
20
|
content = node.to_s
|
|
@@ -49,23 +51,31 @@ class LinkCheck < ::HTMLProofer::Check
|
|
|
49
51
|
# curl/Typheous inaccurately return 404s for some links. cc https://git.io/vyCFx
|
|
50
52
|
next if @link.respond_to?(:rel) && @link.rel == 'dns-prefetch'
|
|
51
53
|
|
|
52
|
-
add_to_external_urls(@link.href)
|
|
54
|
+
add_to_external_urls(@link.href || @link.src)
|
|
53
55
|
next
|
|
54
|
-
elsif @link.internal?
|
|
55
|
-
|
|
56
|
+
elsif @link.internal?
|
|
57
|
+
if @link.exists?
|
|
58
|
+
add_to_internal_urls(@link.href, InternalLink.new(@link, @path, line, content))
|
|
59
|
+
else
|
|
60
|
+
add_issue("internally linking to #{@link.href}, which does not exist", line: line, content: content)
|
|
61
|
+
end
|
|
56
62
|
end
|
|
63
|
+
end
|
|
57
64
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
add_issue("internally linking to a directory #{@link.absolute_path} without trailing slash", line: line, content: content)
|
|
61
|
-
next
|
|
62
|
-
end
|
|
65
|
+
external_urls
|
|
66
|
+
end
|
|
63
67
|
|
|
64
|
-
|
|
65
|
-
|
|
68
|
+
def check_internal_link(link, path, line, content)
|
|
69
|
+
# does the local directory have a trailing slash?
|
|
70
|
+
if link.unslashed_directory?(link.absolute_path)
|
|
71
|
+
add_issue("internally linking to a directory #{link.absolute_path} without trailing slash", path: path, line: line, content: content)
|
|
72
|
+
return false
|
|
66
73
|
end
|
|
67
74
|
|
|
68
|
-
|
|
75
|
+
# verify the target hash
|
|
76
|
+
return handle_hash(link, path, line, content) if link.hash
|
|
77
|
+
|
|
78
|
+
true
|
|
69
79
|
end
|
|
70
80
|
|
|
71
81
|
def check_schemes(link, line, content)
|
|
@@ -93,24 +103,28 @@ class LinkCheck < ::HTMLProofer::Check
|
|
|
93
103
|
add_issue("#{link.href} contains no phone number", line: line, content: content) if link.path.empty?
|
|
94
104
|
end
|
|
95
105
|
|
|
96
|
-
def handle_hash(link, line, content)
|
|
97
|
-
if link.internal? && !
|
|
98
|
-
add_issue("linking to internal hash ##{link.hash} that does not exist", line: line, content: content)
|
|
106
|
+
def handle_hash(link, path, line, content)
|
|
107
|
+
if link.internal? && !hash_exists?(link.html, link.hash) # rubocop:disable Style/GuardClause
|
|
108
|
+
return add_issue("linking to internal hash ##{link.hash} that does not exist", path: path, line: line, content: content)
|
|
99
109
|
elsif link.external?
|
|
100
|
-
external_link_check(link, line, content)
|
|
110
|
+
return external_link_check(link, line, content)
|
|
101
111
|
end
|
|
112
|
+
|
|
113
|
+
true
|
|
102
114
|
end
|
|
103
115
|
|
|
104
116
|
def external_link_check(link, line, content)
|
|
105
|
-
if
|
|
106
|
-
|
|
117
|
+
if link.exists? # rubocop:disable Style/GuardClause
|
|
118
|
+
target_html = create_nokogiri(link.absolute_path)
|
|
119
|
+
return add_issue("linking to #{link.href}, but #{link.hash} does not exist", line: line, content: content) unless hash_exists?(target_html, link.hash)
|
|
107
120
|
else
|
|
108
|
-
|
|
109
|
-
add_issue("linking to #{link.href}, but #{link.hash} does not exist", line: line, content: content) unless hash_check target_html, link.hash
|
|
121
|
+
return add_issue("trying to find hash of #{link.href}, but #{link.absolute_path} does not exist", line: line, content: content)
|
|
110
122
|
end
|
|
123
|
+
|
|
124
|
+
true
|
|
111
125
|
end
|
|
112
126
|
|
|
113
|
-
def
|
|
127
|
+
def hash_exists?(html, href_hash)
|
|
114
128
|
decoded_href_hash = Addressable::URI.unescape(href_hash)
|
|
115
129
|
fragment_ids = [href_hash, decoded_href_hash]
|
|
116
130
|
# https://www.w3.org/TR/html5/single-page.html#scroll-to-fragid
|
|
@@ -148,7 +162,19 @@ class LinkCheck < ::HTMLProofer::Check
|
|
|
148
162
|
|
|
149
163
|
class XpathFunctions
|
|
150
164
|
def case_sensitive_equals(node_set, str_to_match)
|
|
151
|
-
node_set.find_all { |node| node.to_s
|
|
165
|
+
node_set.find_all { |node| node.to_s.== str_to_match.to_s }
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
class InternalLink
|
|
170
|
+
attr_reader :link, :href, :path, :line, :content
|
|
171
|
+
|
|
172
|
+
def initialize(link, path, line, content)
|
|
173
|
+
@link = link
|
|
174
|
+
@href = @link.href
|
|
175
|
+
@path = path
|
|
176
|
+
@line = line
|
|
177
|
+
@content = content
|
|
152
178
|
end
|
|
153
179
|
end
|
|
154
180
|
end
|
data/lib/html-proofer/element.rb
CHANGED
|
@@ -108,9 +108,7 @@ module HTMLProofer
|
|
|
108
108
|
return true if /^javascript:/.match?(url)
|
|
109
109
|
|
|
110
110
|
# ignore base64 encoded images
|
|
111
|
-
if %w[ImageCheck FaviconCheck].include?
|
|
112
|
-
return true if /^data:image/.match?(url)
|
|
113
|
-
end
|
|
111
|
+
return true if %w[ImageCheck FaviconCheck].include?(@type) && /^data:image/.match?(url)
|
|
114
112
|
|
|
115
113
|
# ignore user defined URLs
|
|
116
114
|
return true if ignores_pattern_check(@check.options[:url_ignore])
|
|
@@ -171,6 +169,10 @@ module HTMLProofer
|
|
|
171
169
|
url.start_with?('?')
|
|
172
170
|
end
|
|
173
171
|
|
|
172
|
+
def absolute_path?(path)
|
|
173
|
+
path.start_with?('/')
|
|
174
|
+
end
|
|
175
|
+
|
|
174
176
|
def file_path
|
|
175
177
|
return if path.nil? || path.empty?
|
|
176
178
|
|
|
@@ -178,22 +180,19 @@ module HTMLProofer
|
|
|
178
180
|
|
|
179
181
|
path_dot_ext = path + @check.options[:extension] if @check.options[:assume_extension]
|
|
180
182
|
|
|
181
|
-
if
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
base = @check.path
|
|
194
|
-
end
|
|
183
|
+
base = if absolute_path?(path) # path relative to root
|
|
184
|
+
# either overwrite with root_dir; or, if source is directory, use that; or, just get the current file's dirname
|
|
185
|
+
@check.options[:root_dir] || (File.directory?(@check.src) ? @check.src : File.dirname(@check.src))
|
|
186
|
+
elsif File.exist?(File.expand_path(path, @check.src)) || File.exist?(File.expand_path(path_dot_ext, @check.src)) # relative links, path is a file
|
|
187
|
+
File.dirname(@check.path)
|
|
188
|
+
elsif File.exist?(File.join(File.dirname(@check.path), path)) || File.exist?(File.join(File.dirname(@check.path), path_dot_ext)) # rubocop:disable Lint/DuplicateBranch; relative links in nested dir, path is a file
|
|
189
|
+
File.dirname(@check.path)
|
|
190
|
+
else # relative link, path is a directory
|
|
191
|
+
@check.path
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
file = File.join(base, path)
|
|
195
195
|
|
|
196
|
-
file = File.join base, path
|
|
197
196
|
if @check.options[:assume_extension] && File.file?("#{file}#{@check.options[:extension]}")
|
|
198
197
|
file = "#{file}#{@check.options[:extension]}"
|
|
199
198
|
elsif File.directory?(file) && !unslashed_directory?(file) # implicit index support
|
|
@@ -205,14 +204,15 @@ module HTMLProofer
|
|
|
205
204
|
|
|
206
205
|
# checks if a file exists relative to the current pwd
|
|
207
206
|
def exists?
|
|
208
|
-
return @checked_paths[absolute_path] if @checked_paths.key?
|
|
207
|
+
return @checked_paths[absolute_path] if @checked_paths.key?(absolute_path)
|
|
209
208
|
|
|
210
|
-
@checked_paths[absolute_path] = File.exist?
|
|
209
|
+
@checked_paths[absolute_path] = File.exist?(absolute_path)
|
|
211
210
|
end
|
|
212
211
|
|
|
213
212
|
def absolute_path
|
|
214
213
|
path = file_path || @check.path
|
|
215
|
-
|
|
214
|
+
|
|
215
|
+
File.expand_path(path, Dir.pwd)
|
|
216
216
|
end
|
|
217
217
|
|
|
218
218
|
def ignores_pattern_check(links)
|
|
@@ -6,6 +6,7 @@ module HTMLProofer
|
|
|
6
6
|
|
|
7
7
|
class InvalidHtmlError < StandardError
|
|
8
8
|
def initialize(failures)
|
|
9
|
+
super
|
|
9
10
|
@failures = failures
|
|
10
11
|
end
|
|
11
12
|
|
|
@@ -59,7 +60,7 @@ module HTMLProofer
|
|
|
59
60
|
body = []
|
|
60
61
|
result.last.each { |e| body << e }
|
|
61
62
|
|
|
62
|
-
body = body.join
|
|
63
|
+
body = body.join
|
|
63
64
|
begin
|
|
64
65
|
html = body.lstrip
|
|
65
66
|
rescue StandardError
|
data/lib/html-proofer/runner.rb
CHANGED
|
@@ -4,7 +4,7 @@ module HTMLProofer
|
|
|
4
4
|
class Runner
|
|
5
5
|
include HTMLProofer::Utils
|
|
6
6
|
|
|
7
|
-
attr_reader :options, :external_urls, :failures
|
|
7
|
+
attr_reader :options, :internal_urls, :external_urls, :failures
|
|
8
8
|
|
|
9
9
|
def initialize(src, opts = {})
|
|
10
10
|
@src = src
|
|
@@ -20,6 +20,8 @@ module HTMLProofer
|
|
|
20
20
|
|
|
21
21
|
@type = @options.delete(:type)
|
|
22
22
|
@logger = HTMLProofer::Log.new(@options[:log_level])
|
|
23
|
+
@cache = Cache.new(@logger, @options[:cache])
|
|
24
|
+
@internal_link_checks = nil
|
|
23
25
|
|
|
24
26
|
# Add swap patterns for internal domains
|
|
25
27
|
unless @options[:internal_domains].empty?
|
|
@@ -30,6 +32,9 @@ module HTMLProofer
|
|
|
30
32
|
end
|
|
31
33
|
end
|
|
32
34
|
|
|
35
|
+
@internal_urls = {}
|
|
36
|
+
@internal_urls_to_paths = {}
|
|
37
|
+
@external_urls = {}
|
|
33
38
|
@failures = []
|
|
34
39
|
@before_request = []
|
|
35
40
|
end
|
|
@@ -59,15 +64,13 @@ module HTMLProofer
|
|
|
59
64
|
end
|
|
60
65
|
end
|
|
61
66
|
@external_urls = Hash[*@src.map { |s| [s, nil] }.flatten]
|
|
62
|
-
|
|
67
|
+
validate_external_urls
|
|
63
68
|
end
|
|
64
69
|
|
|
65
70
|
# Collects any external URLs found in a directory of files. Also collectes
|
|
66
71
|
# every failed test from process_files.
|
|
67
72
|
# Sends the external URLs to Typhoeus for batch processing.
|
|
68
73
|
def check_files
|
|
69
|
-
@external_urls = {}
|
|
70
|
-
|
|
71
74
|
process_files.each do |item|
|
|
72
75
|
@external_urls.merge!(item[:external_urls])
|
|
73
76
|
@failures.concat(item[:failures])
|
|
@@ -78,9 +81,12 @@ module HTMLProofer
|
|
|
78
81
|
# just not run those other checks at all.
|
|
79
82
|
if @options[:external_only]
|
|
80
83
|
@failures = []
|
|
81
|
-
|
|
84
|
+
validate_external_urls
|
|
82
85
|
elsif !@options[:disable_external]
|
|
83
|
-
|
|
86
|
+
validate_external_urls
|
|
87
|
+
validate_internal_urls
|
|
88
|
+
else
|
|
89
|
+
validate_internal_urls
|
|
84
90
|
end
|
|
85
91
|
end
|
|
86
92
|
|
|
@@ -101,8 +107,21 @@ module HTMLProofer
|
|
|
101
107
|
@src.each do |src|
|
|
102
108
|
checks.each do |klass|
|
|
103
109
|
@logger.log :debug, "Checking #{klass.to_s.downcase} on #{path} ..."
|
|
104
|
-
check = Object.const_get(klass).new(src, path, html, @logger, @options)
|
|
110
|
+
check = Object.const_get(klass).new(src, path, html, @logger, @cache, @options)
|
|
105
111
|
check.run
|
|
112
|
+
|
|
113
|
+
if klass == 'LinkCheck'
|
|
114
|
+
@internal_link_checks = check
|
|
115
|
+
check.internal_urls.each_pair do |url, internal_urls|
|
|
116
|
+
if @internal_urls_to_paths[url]
|
|
117
|
+
@internal_urls_to_paths[url].concat(internal_urls.map(&:path))
|
|
118
|
+
else
|
|
119
|
+
@internal_urls_to_paths[url] = internal_urls.map(&:path)
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
@internal_urls.merge!(check.internal_urls)
|
|
123
|
+
end
|
|
124
|
+
|
|
106
125
|
external_urls = check.external_urls
|
|
107
126
|
external_urls = Hash[check.external_urls.map { |url, file| [swap(url, @options[:url_swap]), file] }] if @options[:url_swap]
|
|
108
127
|
result[:external_urls].merge!(external_urls)
|
|
@@ -113,16 +132,38 @@ module HTMLProofer
|
|
|
113
132
|
end
|
|
114
133
|
|
|
115
134
|
def check_path(path)
|
|
116
|
-
check_parsed
|
|
135
|
+
check_parsed(create_nokogiri(path), path)
|
|
117
136
|
end
|
|
118
137
|
|
|
119
|
-
def
|
|
120
|
-
url_validator = HTMLProofer::UrlValidator.new(@logger, @external_urls, @options)
|
|
138
|
+
def validate_external_urls
|
|
139
|
+
url_validator = HTMLProofer::UrlValidator.new(@logger, @cache, @external_urls, @options)
|
|
121
140
|
url_validator.before_request = @before_request
|
|
122
141
|
@failures.concat(url_validator.run)
|
|
123
142
|
@external_urls = url_validator.external_urls
|
|
124
143
|
end
|
|
125
144
|
|
|
145
|
+
def validate_internal_urls
|
|
146
|
+
if @cache.use_cache?
|
|
147
|
+
urls_to_check = load_internal_cache
|
|
148
|
+
|
|
149
|
+
urls_to_check.each_pair do |url, internal_urls|
|
|
150
|
+
# pulled from cache
|
|
151
|
+
internal_urls = @internal_urls[url] unless internal_urls.first.is_a?(LinkCheck::InternalLink)
|
|
152
|
+
|
|
153
|
+
result = @internal_link_checks.check_internal_link(internal_urls.first.link, internal_urls.first.path, internal_urls.first.line, internal_urls.first.content)
|
|
154
|
+
code = result ? 200 : 404
|
|
155
|
+
@cache.add(url, @internal_urls_to_paths[url].sort, code, '') # TODO: blank msg for now
|
|
156
|
+
end
|
|
157
|
+
@cache.write
|
|
158
|
+
else
|
|
159
|
+
@internal_urls.values.flatten.each do |internal_url|
|
|
160
|
+
@internal_link_checks.check_internal_link(internal_url.link, internal_url.path, internal_url.line, internal_url.content)
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
@failures.concat(@internal_link_checks.issues) unless @internal_urls.length.zero?
|
|
165
|
+
end
|
|
166
|
+
|
|
126
167
|
def files
|
|
127
168
|
@files ||= if @type == :directory
|
|
128
169
|
@src.map do |src|
|
|
@@ -188,8 +229,16 @@ module HTMLProofer
|
|
|
188
229
|
# @return [ Array<Block> ] All before_request blocks.
|
|
189
230
|
def before_request(&block)
|
|
190
231
|
@before_request ||= []
|
|
191
|
-
@before_request << block if
|
|
232
|
+
@before_request << block if block
|
|
192
233
|
@before_request
|
|
193
234
|
end
|
|
235
|
+
|
|
236
|
+
def load_internal_cache
|
|
237
|
+
urls_to_check = @cache.retrieve_urls(@internal_urls)
|
|
238
|
+
cache_text = pluralize(urls_to_check.count, 'internal link', 'internal links')
|
|
239
|
+
@logger.log :info, "Found #{cache_text} in the cache..."
|
|
240
|
+
|
|
241
|
+
urls_to_check
|
|
242
|
+
end
|
|
194
243
|
end
|
|
195
244
|
end
|
|
@@ -12,13 +12,13 @@ module HTMLProofer
|
|
|
12
12
|
attr_reader :external_urls
|
|
13
13
|
attr_writer :before_request
|
|
14
14
|
|
|
15
|
-
def initialize(logger, external_urls, options)
|
|
15
|
+
def initialize(logger, cache, external_urls, options)
|
|
16
16
|
@logger = logger
|
|
17
17
|
@external_urls = external_urls
|
|
18
18
|
@failed_tests = []
|
|
19
19
|
@options = options
|
|
20
20
|
@hydra = Typhoeus::Hydra.new(@options[:hydra])
|
|
21
|
-
@cache =
|
|
21
|
+
@cache = cache
|
|
22
22
|
@before_request = []
|
|
23
23
|
end
|
|
24
24
|
|
|
@@ -26,7 +26,7 @@ module HTMLProofer
|
|
|
26
26
|
@external_urls = remove_query_values
|
|
27
27
|
|
|
28
28
|
if @cache.use_cache?
|
|
29
|
-
urls_to_check =
|
|
29
|
+
urls_to_check = @cache.retrieve_urls(@external_urls)
|
|
30
30
|
external_link_checker(urls_to_check)
|
|
31
31
|
@cache.write
|
|
32
32
|
else
|
|
@@ -43,11 +43,11 @@ module HTMLProofer
|
|
|
43
43
|
iterable_external_urls = @external_urls.dup
|
|
44
44
|
@external_urls.each_key do |url|
|
|
45
45
|
uri = begin
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
46
|
+
Addressable::URI.parse(url)
|
|
47
|
+
rescue URI::Error, Addressable::URI::InvalidURIError
|
|
48
|
+
@logger.log :error, "#{url} is an invalid URL"
|
|
49
|
+
nil
|
|
50
|
+
end
|
|
51
51
|
next if uri.nil? || uri.query.nil?
|
|
52
52
|
|
|
53
53
|
iterable_external_urls.delete(url) unless new_url_query_values?(uri, paths_with_queries)
|
|
@@ -74,15 +74,6 @@ module HTMLProofer
|
|
|
74
74
|
uri.host + uri.path
|
|
75
75
|
end
|
|
76
76
|
|
|
77
|
-
def load_cache
|
|
78
|
-
cache_count = @cache.size
|
|
79
|
-
cache_text = pluralize(cache_count, 'link', 'links')
|
|
80
|
-
|
|
81
|
-
@logger.log :info, "Found #{cache_text} in the cache..."
|
|
82
|
-
|
|
83
|
-
@cache.retrieve_urls(@external_urls)
|
|
84
|
-
end
|
|
85
|
-
|
|
86
77
|
# Proofer runs faster if we pull out all the external URLs and run the checks
|
|
87
78
|
# at the end. Otherwise, we're halting the consuming process for every file during
|
|
88
79
|
# `process_files`.
|
|
@@ -111,11 +102,11 @@ module HTMLProofer
|
|
|
111
102
|
def establish_queue(external_urls)
|
|
112
103
|
external_urls.each_pair do |url, filenames|
|
|
113
104
|
url = begin
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
105
|
+
clean_url(url)
|
|
106
|
+
rescue URI::Error, Addressable::URI::InvalidURIError
|
|
107
|
+
add_external_issue(filenames, "#{url} is an invalid URL")
|
|
108
|
+
next
|
|
109
|
+
end
|
|
119
110
|
|
|
120
111
|
method = if hash?(url) && @options[:check_external_hash]
|
|
121
112
|
:get
|
|
@@ -129,10 +120,10 @@ module HTMLProofer
|
|
|
129
120
|
def clean_url(href)
|
|
130
121
|
# catch any obvious issues, like strings in port numbers
|
|
131
122
|
parsed = Addressable::URI.parse(href)
|
|
132
|
-
if href
|
|
133
|
-
parsed.normalize
|
|
134
|
-
else
|
|
123
|
+
if href =~ /^([!#{$&}-;=?-\[\]_a-z~]|%[0-9a-fA-F]{2})+$/
|
|
135
124
|
href
|
|
125
|
+
else
|
|
126
|
+
parsed.normalize
|
|
136
127
|
end
|
|
137
128
|
end
|
|
138
129
|
|
data/lib/html-proofer/utils.rb
CHANGED
data/lib/html-proofer/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: html-proofer
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 3.
|
|
4
|
+
version: 3.17.4
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Garen Torikian
|
|
8
|
-
autorequire:
|
|
8
|
+
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2020-
|
|
11
|
+
date: 2020-12-03 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: addressable
|
|
@@ -122,20 +122,6 @@ dependencies:
|
|
|
122
122
|
- - ">="
|
|
123
123
|
- !ruby/object:Gem::Version
|
|
124
124
|
version: '0'
|
|
125
|
-
- !ruby/object:Gem::Dependency
|
|
126
|
-
name: codecov
|
|
127
|
-
requirement: !ruby/object:Gem::Requirement
|
|
128
|
-
requirements:
|
|
129
|
-
- - ">="
|
|
130
|
-
- !ruby/object:Gem::Version
|
|
131
|
-
version: '0'
|
|
132
|
-
type: :development
|
|
133
|
-
prerelease: false
|
|
134
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
135
|
-
requirements:
|
|
136
|
-
- - ">="
|
|
137
|
-
- !ruby/object:Gem::Version
|
|
138
|
-
version: '0'
|
|
139
125
|
- !ruby/object:Gem::Dependency
|
|
140
126
|
name: pry-byebug
|
|
141
127
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -293,15 +279,15 @@ homepage: https://github.com/gjtorikian/html-proofer
|
|
|
293
279
|
licenses:
|
|
294
280
|
- MIT
|
|
295
281
|
metadata: {}
|
|
296
|
-
post_install_message:
|
|
282
|
+
post_install_message:
|
|
297
283
|
rdoc_options: []
|
|
298
284
|
require_paths:
|
|
299
285
|
- lib
|
|
300
286
|
required_ruby_version: !ruby/object:Gem::Requirement
|
|
301
287
|
requirements:
|
|
302
|
-
- - "
|
|
288
|
+
- - "~>"
|
|
303
289
|
- !ruby/object:Gem::Version
|
|
304
|
-
version: '
|
|
290
|
+
version: '2.4'
|
|
305
291
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
306
292
|
requirements:
|
|
307
293
|
- - ">="
|
|
@@ -309,7 +295,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
309
295
|
version: '0'
|
|
310
296
|
requirements: []
|
|
311
297
|
rubygems_version: 3.1.2
|
|
312
|
-
signing_key:
|
|
298
|
+
signing_key:
|
|
313
299
|
specification_version: 4
|
|
314
300
|
summary: A set of tests to validate your HTML output. These tests check if your image
|
|
315
301
|
references are legitimate, if they have alt tags, if your internal links are working,
|