html-proofer 3.18.3 → 3.18.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/html-proofer/cache.rb +32 -11
- data/lib/html-proofer/check/links.rb +7 -5
- data/lib/html-proofer/element.rb +7 -1
- data/lib/html-proofer/middleware.rb +1 -1
- data/lib/html-proofer/runner.rb +5 -3
- data/lib/html-proofer/url_validator.rb +2 -2
- data/lib/html-proofer/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d705eda2b6ad497d076f1dfa565e5c98e0769b9cd83ddee56ab04b8d4ad6722f
|
4
|
+
data.tar.gz: 74d2ca4e6b65e8f1d1467aef9cae90a4804cb285eecf8a5014e8b77c579ef25b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b51003f4125230780bc0335f72148e7b4c964aba772b16cdcfb5bcad941eea7eb0ce428cd8b0d78ebcf08fe2e049b4850fbd4e85f90e57b56b1b854bfb516408
|
7
|
+
data.tar.gz: 2bc6deafa255f91f210bdfc0df14c8aac36f16bff90e2402b4a0657a1ed68ad7c60b4717bdceaff4a819764104a05da6c95e049c83cb83d3f518e83a6638d14b
|
data/lib/html-proofer/cache.rb
CHANGED
@@ -3,6 +3,7 @@
|
|
3
3
|
require_relative 'utils'
|
4
4
|
require 'date'
|
5
5
|
require 'json'
|
6
|
+
require 'uri'
|
6
7
|
|
7
8
|
module HTMLProofer
|
8
9
|
class Cache
|
@@ -11,6 +12,8 @@ module HTMLProofer
|
|
11
12
|
DEFAULT_STORAGE_DIR = File.join('tmp', '.htmlproofer')
|
12
13
|
DEFAULT_CACHE_FILE_NAME = 'cache.log'
|
13
14
|
|
15
|
+
URI_REGEXP = URI::DEFAULT_PARSER.make_regexp
|
16
|
+
|
14
17
|
attr_reader :exists, :cache_log, :storage_dir, :cache_file
|
15
18
|
|
16
19
|
def initialize(logger, options)
|
@@ -30,6 +33,8 @@ module HTMLProofer
|
|
30
33
|
end
|
31
34
|
|
32
35
|
def within_timeframe?(time)
|
36
|
+
return false if time.nil?
|
37
|
+
|
33
38
|
(@parsed_timeframe..@cache_time).cover?(Time.parse(time))
|
34
39
|
end
|
35
40
|
|
@@ -71,10 +76,14 @@ module HTMLProofer
|
|
71
76
|
@cache_log[clean_url(url)] = data
|
72
77
|
end
|
73
78
|
|
74
|
-
def detect_url_changes(found)
|
75
|
-
existing_urls = @cache_log.keys.map { |url| clean_url(url) }
|
79
|
+
def detect_url_changes(found, type)
|
76
80
|
found_urls = found.keys.map { |url| clean_url(url) }
|
77
81
|
|
82
|
+
# if there were no urls, bail
|
83
|
+
return {} if found_urls.empty?
|
84
|
+
|
85
|
+
existing_urls = @cache_log.keys.map { |url| clean_url(url) }
|
86
|
+
|
78
87
|
# prepare to add new URLs detected
|
79
88
|
additions = found.reject do |url, _|
|
80
89
|
url = clean_url(url)
|
@@ -91,24 +100,28 @@ module HTMLProofer
|
|
91
100
|
@logger.log :info, "Adding #{new_link_text} to the cache..."
|
92
101
|
|
93
102
|
# remove from cache URLs that no longer exist
|
94
|
-
|
103
|
+
deletions = 0
|
95
104
|
@cache_log.delete_if do |url, _|
|
96
105
|
url = clean_url(url)
|
106
|
+
|
97
107
|
if found_urls.include?(url)
|
98
108
|
false
|
99
|
-
|
109
|
+
elsif url_matches_type?(url, type)
|
100
110
|
@logger.log :debug, "Removing #{url} from cache check"
|
101
|
-
|
111
|
+
deletions += 1
|
102
112
|
true
|
103
113
|
end
|
104
114
|
end
|
105
115
|
|
106
|
-
del_link_text = pluralize(
|
116
|
+
del_link_text = pluralize(deletions, 'link', 'links')
|
107
117
|
@logger.log :info, "Removing #{del_link_text} from the cache..."
|
108
118
|
|
109
119
|
additions
|
110
120
|
end
|
111
121
|
|
122
|
+
# TODO: Garbage performance--both the external and internal
|
123
|
+
# caches need access to this file. Write a proper versioned
|
124
|
+
# schema in the future
|
112
125
|
def write
|
113
126
|
File.write(cache_file, @cache_log.to_json)
|
114
127
|
end
|
@@ -117,12 +130,15 @@ module HTMLProofer
|
|
117
130
|
@load.nil?
|
118
131
|
end
|
119
132
|
|
120
|
-
def retrieve_urls(urls)
|
121
|
-
urls_to_check = detect_url_changes(urls)
|
133
|
+
def retrieve_urls(urls, type)
|
134
|
+
urls_to_check = detect_url_changes(urls, type)
|
135
|
+
|
122
136
|
@cache_log.each_pair do |url, cache|
|
123
137
|
next if within_timeframe?(cache['time']) && cache['message'].empty? # these were successes to skip
|
124
138
|
|
125
|
-
|
139
|
+
if url_matches_type?(url, type)
|
140
|
+
urls_to_check[url] = cache['filenames'] # recheck expired links
|
141
|
+
end
|
126
142
|
end
|
127
143
|
urls_to_check
|
128
144
|
end
|
@@ -149,9 +165,9 @@ module HTMLProofer
|
|
149
165
|
|
150
166
|
@cache_file = File.join(storage_dir, cache_file_name)
|
151
167
|
|
152
|
-
return unless File.exist?(cache_file)
|
168
|
+
return unless File.exist?(@cache_file)
|
153
169
|
|
154
|
-
contents = File.read(cache_file)
|
170
|
+
contents = File.read(@cache_file)
|
155
171
|
@cache_log = contents.empty? ? {} : JSON.parse(contents)
|
156
172
|
end
|
157
173
|
|
@@ -169,5 +185,10 @@ module HTMLProofer
|
|
169
185
|
@cache_datetime - Rational(measurement / 24.0)
|
170
186
|
end.to_time
|
171
187
|
end
|
188
|
+
|
189
|
+
def url_matches_type?(url, type)
|
190
|
+
return true if type == :internal && url !~ URI_REGEXP
|
191
|
+
return true if type == :external && url =~ URI_REGEXP
|
192
|
+
end
|
172
193
|
end
|
173
194
|
end
|
@@ -51,14 +51,16 @@ class LinkCheck < ::HTMLProofer::Check
|
|
51
51
|
# curl/Typheous inaccurately return 404s for some links. cc https://git.io/vyCFx
|
52
52
|
next if @link.respond_to?(:rel) && @link.rel == 'dns-prefetch'
|
53
53
|
|
54
|
+
unless @link.path?
|
55
|
+
add_issue("#{@link.href} is an invalid URL", line: line, content: content)
|
56
|
+
next
|
57
|
+
end
|
58
|
+
|
54
59
|
add_to_external_urls(@link.href || @link.src)
|
55
60
|
next
|
56
61
|
elsif @link.internal?
|
57
|
-
|
58
|
-
|
59
|
-
else
|
60
|
-
add_issue("internally linking to #{@link.href}, which does not exist", line: line, content: content)
|
61
|
-
end
|
62
|
+
add_to_internal_urls(@link.href, InternalLink.new(@link, @path, line, content))
|
63
|
+
add_issue("internally linking to #{@link.href}, which does not exist", line: line, content: content) if !@link.exists? && !@link.hash
|
62
64
|
end
|
63
65
|
end
|
64
66
|
|
data/lib/html-proofer/element.rb
CHANGED
@@ -24,7 +24,7 @@ module HTMLProofer
|
|
24
24
|
raise e
|
25
25
|
end
|
26
26
|
|
27
|
-
@aria_hidden = defined?(@aria_hidden) && @aria_hidden == 'true'
|
27
|
+
@aria_hidden = defined?(@aria_hidden) && @aria_hidden == 'true'
|
28
28
|
|
29
29
|
@data_proofer_ignore = defined?(@data_proofer_ignore)
|
30
30
|
|
@@ -74,6 +74,10 @@ module HTMLProofer
|
|
74
74
|
!parts.nil?
|
75
75
|
end
|
76
76
|
|
77
|
+
def path?
|
78
|
+
!parts.host.nil? && !parts.path.nil?
|
79
|
+
end
|
80
|
+
|
77
81
|
def parts
|
78
82
|
@parts ||= Addressable::URI.parse url
|
79
83
|
rescue URI::Error, Addressable::URI::InvalidURIError
|
@@ -216,6 +220,8 @@ module HTMLProofer
|
|
216
220
|
end
|
217
221
|
|
218
222
|
def ignores_pattern_check(links)
|
223
|
+
return false unless links.is_a?(Array)
|
224
|
+
|
219
225
|
links.each do |ignore|
|
220
226
|
case ignore
|
221
227
|
when String
|
@@ -22,7 +22,7 @@ module HTMLProofer
|
|
22
22
|
allow_hash_href: true,
|
23
23
|
check_external_hash: true,
|
24
24
|
check_html: true,
|
25
|
-
url_ignore: [
|
25
|
+
url_ignore: [%r{^/}], # Don't try to check if local files exist
|
26
26
|
validation: { report_eof_tags: true }
|
27
27
|
}
|
28
28
|
end
|
data/lib/html-proofer/runner.rb
CHANGED
@@ -63,7 +63,9 @@ module HTMLProofer
|
|
63
63
|
swap(url, @options[:url_swap])
|
64
64
|
end
|
65
65
|
end
|
66
|
-
@external_urls =
|
66
|
+
@external_urls = @src.each_with_object({}) do |url, hash|
|
67
|
+
hash[url] = nil
|
68
|
+
end
|
67
69
|
validate_external_urls
|
68
70
|
end
|
69
71
|
|
@@ -123,7 +125,7 @@ module HTMLProofer
|
|
123
125
|
end
|
124
126
|
|
125
127
|
external_urls = check.external_urls
|
126
|
-
external_urls =
|
128
|
+
external_urls = check.external_urls.map { |url, file| [swap(url, @options[:url_swap]), file] }.to_h if @options[:url_swap]
|
127
129
|
result[:external_urls].merge!(external_urls)
|
128
130
|
result[:failures].concat(check.issues)
|
129
131
|
end
|
@@ -236,7 +238,7 @@ module HTMLProofer
|
|
236
238
|
end
|
237
239
|
|
238
240
|
def load_internal_cache
|
239
|
-
urls_to_check = @cache.retrieve_urls(@internal_urls)
|
241
|
+
urls_to_check = @cache.retrieve_urls(@internal_urls, :internal)
|
240
242
|
cache_text = pluralize(urls_to_check.count, 'internal link', 'internal links')
|
241
243
|
@logger.log :info, "Found #{cache_text} in the cache..."
|
242
244
|
|
@@ -26,7 +26,7 @@ module HTMLProofer
|
|
26
26
|
@external_urls = remove_query_values
|
27
27
|
|
28
28
|
if @cache.use_cache?
|
29
|
-
urls_to_check = @cache.retrieve_urls(@external_urls)
|
29
|
+
urls_to_check = @cache.retrieve_urls(@external_urls, :external)
|
30
30
|
external_link_checker(urls_to_check)
|
31
31
|
@cache.write
|
32
32
|
else
|
@@ -85,7 +85,7 @@ module HTMLProofer
|
|
85
85
|
# for HEAD. If we've decided to check for hashes, we must do a GET--HEAD is
|
86
86
|
# not available as an option.
|
87
87
|
def external_link_checker(external_urls)
|
88
|
-
external_urls =
|
88
|
+
external_urls = external_urls.sort.to_h
|
89
89
|
|
90
90
|
count = external_urls.length
|
91
91
|
check_text = pluralize(count, 'external link', 'external links')
|
data/lib/html-proofer/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html-proofer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.18.
|
4
|
+
version: 3.18.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Garen Torikian
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-03-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|