html-proofer 3.18.5 → 3.19.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/htmlproofer +3 -1
- data/lib/html-proofer/cache.rb +32 -11
- data/lib/html-proofer/check/links.rb +2 -5
- data/lib/html-proofer/configuration.rb +2 -2
- data/lib/html-proofer/element.rb +3 -1
- data/lib/html-proofer/middleware.rb +1 -1
- data/lib/html-proofer/runner.rb +5 -3
- data/lib/html-proofer/url_validator.rb +2 -2
- data/lib/html-proofer/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2e18991c07a3b31ae78293fc8dc2a9392211f8609ed494ed5cad87158747cde2
|
4
|
+
data.tar.gz: d9551668e860b1055a640be9ba597cb9fca8822849a7b000d251086e8eadb023
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2e9187bcd157c8c69cb8dc7f47e9ef6908e7ccb06b56fd2fd241e8c3cbb245874a9776e5405c9aa5328a01e7038880f173d205efb2c925c7c5787e3c46d3389d
|
7
|
+
data.tar.gz: 76afe0c48983344c1ebfb86075458d71b05163fb051ff0fbcbfa2104b324d520bbf7213ff262ec437d3ddeb6265d4454056d66a50d4c68ab02a3c6626a9633e0
|
data/bin/htmlproofer
CHANGED
@@ -48,6 +48,7 @@ Mercenary.program(:htmlproofer) do |p|
|
|
48
48
|
p.option 'storage_dir', '--storage-dir PATH', String, 'Directory where to store the cache log (default: "tmp/.htmlproofer")'
|
49
49
|
p.option 'timeframe', '--timeframe <time>', String, 'A string representing the caching timeframe.'
|
50
50
|
p.option 'typhoeus_config', '--typhoeus-config CONFIG', String, 'JSON-formatted string of Typhoeus config. Will override the html-proofer defaults.'
|
51
|
+
p.option 'hydra_config', '--hydra-config CONFIG', String, 'JSON-formatted string of Hydra config. Will override the html-proofer defaults.'
|
51
52
|
p.option 'url_ignore', '--url-ignore link1,[link2,...]', Array, 'A comma-separated list of Strings or RegExps containing URLs that are safe to ignore. It affects all HTML attributes. Note that non-HTTP(S) URIs are always ignored'
|
52
53
|
p.option 'url_swap', '--url-swap re:string,[re:string,...]', Array, 'A comma-separated list containing key-value pairs of `RegExp => String`. It transforms URLs that match `RegExp` into `String` via `gsub`. The escape sequences `\\:` should be used to produce literal `:`s.'
|
53
54
|
p.option 'root_dir', '--root-dir PATH', String, 'The absolute path to the directory serving your html-files.'
|
@@ -87,7 +88,8 @@ Mercenary.program(:htmlproofer) do |p|
|
|
87
88
|
options[:validation][:report_eof_tags] = opts['report_eof_tags'] unless opts['report_eof_tags'].nil?
|
88
89
|
options[:validation][:report_mismatched_tags] = opts['report_mismatched_tags'] unless opts['report_mismatched_tags'].nil?
|
89
90
|
|
90
|
-
options[:typhoeus] = HTMLProofer::Configuration.parse_json_option('typhoeus_config', opts['typhoeus_config']) unless opts['typhoeus_config'].nil?
|
91
|
+
options[:typhoeus] = HTMLProofer::Configuration.parse_json_option('typhoeus_config', opts['typhoeus_config'], symbolize_names: false) unless opts['typhoeus_config'].nil?
|
92
|
+
options[:hydra] = HTMLProofer::Configuration.parse_json_option('hydra_config', opts['hydra_config'], symbolize_names: false) unless opts['hydra_config'].nil?
|
91
93
|
|
92
94
|
unless opts['timeframe'].nil?
|
93
95
|
options[:cache] ||= {}
|
data/lib/html-proofer/cache.rb
CHANGED
@@ -3,6 +3,7 @@
|
|
3
3
|
require_relative 'utils'
|
4
4
|
require 'date'
|
5
5
|
require 'json'
|
6
|
+
require 'uri'
|
6
7
|
|
7
8
|
module HTMLProofer
|
8
9
|
class Cache
|
@@ -11,6 +12,8 @@ module HTMLProofer
|
|
11
12
|
DEFAULT_STORAGE_DIR = File.join('tmp', '.htmlproofer')
|
12
13
|
DEFAULT_CACHE_FILE_NAME = 'cache.log'
|
13
14
|
|
15
|
+
URI_REGEXP = URI::DEFAULT_PARSER.make_regexp
|
16
|
+
|
14
17
|
attr_reader :exists, :cache_log, :storage_dir, :cache_file
|
15
18
|
|
16
19
|
def initialize(logger, options)
|
@@ -30,6 +33,8 @@ module HTMLProofer
|
|
30
33
|
end
|
31
34
|
|
32
35
|
def within_timeframe?(time)
|
36
|
+
return false if time.nil?
|
37
|
+
|
33
38
|
(@parsed_timeframe..@cache_time).cover?(Time.parse(time))
|
34
39
|
end
|
35
40
|
|
@@ -71,10 +76,14 @@ module HTMLProofer
|
|
71
76
|
@cache_log[clean_url(url)] = data
|
72
77
|
end
|
73
78
|
|
74
|
-
def detect_url_changes(found)
|
75
|
-
existing_urls = @cache_log.keys.map { |url| clean_url(url) }
|
79
|
+
def detect_url_changes(found, type)
|
76
80
|
found_urls = found.keys.map { |url| clean_url(url) }
|
77
81
|
|
82
|
+
# if there were no urls, bail
|
83
|
+
return {} if found_urls.empty?
|
84
|
+
|
85
|
+
existing_urls = @cache_log.keys.map { |url| clean_url(url) }
|
86
|
+
|
78
87
|
# prepare to add new URLs detected
|
79
88
|
additions = found.reject do |url, _|
|
80
89
|
url = clean_url(url)
|
@@ -91,24 +100,28 @@ module HTMLProofer
|
|
91
100
|
@logger.log :info, "Adding #{new_link_text} to the cache..."
|
92
101
|
|
93
102
|
# remove from cache URLs that no longer exist
|
94
|
-
|
103
|
+
deletions = 0
|
95
104
|
@cache_log.delete_if do |url, _|
|
96
105
|
url = clean_url(url)
|
106
|
+
|
97
107
|
if found_urls.include?(url)
|
98
108
|
false
|
99
|
-
|
109
|
+
elsif url_matches_type?(url, type)
|
100
110
|
@logger.log :debug, "Removing #{url} from cache check"
|
101
|
-
|
111
|
+
deletions += 1
|
102
112
|
true
|
103
113
|
end
|
104
114
|
end
|
105
115
|
|
106
|
-
del_link_text = pluralize(
|
116
|
+
del_link_text = pluralize(deletions, 'link', 'links')
|
107
117
|
@logger.log :info, "Removing #{del_link_text} from the cache..."
|
108
118
|
|
109
119
|
additions
|
110
120
|
end
|
111
121
|
|
122
|
+
# TODO: Garbage performance--both the external and internal
|
123
|
+
# caches need access to this file. Write a proper versioned
|
124
|
+
# schema in the future
|
112
125
|
def write
|
113
126
|
File.write(cache_file, @cache_log.to_json)
|
114
127
|
end
|
@@ -117,12 +130,15 @@ module HTMLProofer
|
|
117
130
|
@load.nil?
|
118
131
|
end
|
119
132
|
|
120
|
-
def retrieve_urls(urls)
|
121
|
-
urls_to_check = detect_url_changes(urls)
|
133
|
+
def retrieve_urls(urls, type)
|
134
|
+
urls_to_check = detect_url_changes(urls, type)
|
135
|
+
|
122
136
|
@cache_log.each_pair do |url, cache|
|
123
137
|
next if within_timeframe?(cache['time']) && cache['message'].empty? # these were successes to skip
|
124
138
|
|
125
|
-
|
139
|
+
if url_matches_type?(url, type)
|
140
|
+
urls_to_check[url] = cache['filenames'] # recheck expired links
|
141
|
+
end
|
126
142
|
end
|
127
143
|
urls_to_check
|
128
144
|
end
|
@@ -149,9 +165,9 @@ module HTMLProofer
|
|
149
165
|
|
150
166
|
@cache_file = File.join(storage_dir, cache_file_name)
|
151
167
|
|
152
|
-
return unless File.exist?(cache_file)
|
168
|
+
return unless File.exist?(@cache_file)
|
153
169
|
|
154
|
-
contents = File.read(cache_file)
|
170
|
+
contents = File.read(@cache_file)
|
155
171
|
@cache_log = contents.empty? ? {} : JSON.parse(contents)
|
156
172
|
end
|
157
173
|
|
@@ -169,5 +185,10 @@ module HTMLProofer
|
|
169
185
|
@cache_datetime - Rational(measurement / 24.0)
|
170
186
|
end.to_time
|
171
187
|
end
|
188
|
+
|
189
|
+
def url_matches_type?(url, type)
|
190
|
+
return true if type == :internal && url !~ URI_REGEXP
|
191
|
+
return true if type == :external && url =~ URI_REGEXP
|
192
|
+
end
|
172
193
|
end
|
173
194
|
end
|
@@ -59,11 +59,8 @@ class LinkCheck < ::HTMLProofer::Check
|
|
59
59
|
add_to_external_urls(@link.href || @link.src)
|
60
60
|
next
|
61
61
|
elsif @link.internal?
|
62
|
-
|
63
|
-
|
64
|
-
else
|
65
|
-
add_issue("internally linking to #{@link.href}, which does not exist", line: line, content: content)
|
66
|
-
end
|
62
|
+
add_to_internal_urls(@link.href, InternalLink.new(@link, @path, line, content))
|
63
|
+
add_issue("internally linking to #{@link.href}, which does not exist", line: line, content: content) if !@link.exists? && !@link.hash
|
67
64
|
end
|
68
65
|
end
|
69
66
|
|
@@ -67,7 +67,7 @@ module HTMLProofer
|
|
67
67
|
end
|
68
68
|
end
|
69
69
|
|
70
|
-
def self.parse_json_option(option_name, config)
|
70
|
+
def self.parse_json_option(option_name, config, symbolize_names: true)
|
71
71
|
raise ArgumentError, 'Must provide an option name in string format.' unless option_name.is_a?(String)
|
72
72
|
raise ArgumentError, 'Must provide an option name in string format.' if option_name.strip.empty?
|
73
73
|
|
@@ -78,7 +78,7 @@ module HTMLProofer
|
|
78
78
|
return {} if config.strip.empty?
|
79
79
|
|
80
80
|
begin
|
81
|
-
JSON.parse(config)
|
81
|
+
JSON.parse(config, { symbolize_names: symbolize_names })
|
82
82
|
rescue StandardError
|
83
83
|
raise ArgumentError, "Option '#{option_name} did not contain valid JSON."
|
84
84
|
end
|
data/lib/html-proofer/element.rb
CHANGED
@@ -24,7 +24,7 @@ module HTMLProofer
|
|
24
24
|
raise e
|
25
25
|
end
|
26
26
|
|
27
|
-
@aria_hidden = defined?(@aria_hidden) && @aria_hidden == 'true'
|
27
|
+
@aria_hidden = defined?(@aria_hidden) && @aria_hidden == 'true'
|
28
28
|
|
29
29
|
@data_proofer_ignore = defined?(@data_proofer_ignore)
|
30
30
|
|
@@ -220,6 +220,8 @@ module HTMLProofer
|
|
220
220
|
end
|
221
221
|
|
222
222
|
def ignores_pattern_check(links)
|
223
|
+
return false unless links.is_a?(Array)
|
224
|
+
|
223
225
|
links.each do |ignore|
|
224
226
|
case ignore
|
225
227
|
when String
|
@@ -22,7 +22,7 @@ module HTMLProofer
|
|
22
22
|
allow_hash_href: true,
|
23
23
|
check_external_hash: true,
|
24
24
|
check_html: true,
|
25
|
-
url_ignore: [
|
25
|
+
url_ignore: [%r{^/}], # Don't try to check if local files exist
|
26
26
|
validation: { report_eof_tags: true }
|
27
27
|
}
|
28
28
|
end
|
data/lib/html-proofer/runner.rb
CHANGED
@@ -63,7 +63,9 @@ module HTMLProofer
|
|
63
63
|
swap(url, @options[:url_swap])
|
64
64
|
end
|
65
65
|
end
|
66
|
-
@external_urls =
|
66
|
+
@external_urls = @src.each_with_object({}) do |url, hash|
|
67
|
+
hash[url] = nil
|
68
|
+
end
|
67
69
|
validate_external_urls
|
68
70
|
end
|
69
71
|
|
@@ -123,7 +125,7 @@ module HTMLProofer
|
|
123
125
|
end
|
124
126
|
|
125
127
|
external_urls = check.external_urls
|
126
|
-
external_urls =
|
128
|
+
external_urls = check.external_urls.map { |url, file| [swap(url, @options[:url_swap]), file] }.to_h if @options[:url_swap]
|
127
129
|
result[:external_urls].merge!(external_urls)
|
128
130
|
result[:failures].concat(check.issues)
|
129
131
|
end
|
@@ -236,7 +238,7 @@ module HTMLProofer
|
|
236
238
|
end
|
237
239
|
|
238
240
|
def load_internal_cache
|
239
|
-
urls_to_check = @cache.retrieve_urls(@internal_urls)
|
241
|
+
urls_to_check = @cache.retrieve_urls(@internal_urls, :internal)
|
240
242
|
cache_text = pluralize(urls_to_check.count, 'internal link', 'internal links')
|
241
243
|
@logger.log :info, "Found #{cache_text} in the cache..."
|
242
244
|
|
@@ -26,7 +26,7 @@ module HTMLProofer
|
|
26
26
|
@external_urls = remove_query_values
|
27
27
|
|
28
28
|
if @cache.use_cache?
|
29
|
-
urls_to_check = @cache.retrieve_urls(@external_urls)
|
29
|
+
urls_to_check = @cache.retrieve_urls(@external_urls, :external)
|
30
30
|
external_link_checker(urls_to_check)
|
31
31
|
@cache.write
|
32
32
|
else
|
@@ -85,7 +85,7 @@ module HTMLProofer
|
|
85
85
|
# for HEAD. If we've decided to check for hashes, we must do a GET--HEAD is
|
86
86
|
# not available as an option.
|
87
87
|
def external_link_checker(external_urls)
|
88
|
-
external_urls =
|
88
|
+
external_urls = external_urls.sort.to_h
|
89
89
|
|
90
90
|
count = external_urls.length
|
91
91
|
check_text = pluralize(count, 'external link', 'external links')
|
data/lib/html-proofer/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html-proofer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.19.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Garen Torikian
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-04-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|