html-proofer 3.18.6 → 3.19.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/htmlproofer +3 -1
- data/lib/html-proofer/cache.rb +30 -14
- data/lib/html-proofer/configuration.rb +2 -2
- data/lib/html-proofer/runner.rb +2 -2
- data/lib/html-proofer/url_validator.rb +1 -1
- data/lib/html-proofer/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d06dbda8bf9baad3be96b5565fcb86de0892d4241c6b7ede08c3c2d7203f6752
|
4
|
+
data.tar.gz: da895d696b7b9d1f3ca9c2e504e0c375ce9942f53dd5c2174f80570d32e3ad5b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 693f677cb91b9b0e79135ef27ed2771d34d7e5fee5bd368b20b5791a4e25a468ba769d299c2cd1417eccd989d9ac1290cfaf9dfd90b9534fa0af0440382e4e0f
|
7
|
+
data.tar.gz: c6c33f309e3f8b00dc1721653f2e2099c647c9bd8d3b4ecaab4ec7b97161925911d1e6e3c8cabb30b9b9eb2274fed7238ed338da7d85d25b7c6981d537f6435d
|
data/bin/htmlproofer
CHANGED
@@ -48,6 +48,7 @@ Mercenary.program(:htmlproofer) do |p|
|
|
48
48
|
p.option 'storage_dir', '--storage-dir PATH', String, 'Directory where to store the cache log (default: "tmp/.htmlproofer")'
|
49
49
|
p.option 'timeframe', '--timeframe <time>', String, 'A string representing the caching timeframe.'
|
50
50
|
p.option 'typhoeus_config', '--typhoeus-config CONFIG', String, 'JSON-formatted string of Typhoeus config. Will override the html-proofer defaults.'
|
51
|
+
p.option 'hydra_config', '--hydra-config CONFIG', String, 'JSON-formatted string of Hydra config. Will override the html-proofer defaults.'
|
51
52
|
p.option 'url_ignore', '--url-ignore link1,[link2,...]', Array, 'A comma-separated list of Strings or RegExps containing URLs that are safe to ignore. It affects all HTML attributes. Note that non-HTTP(S) URIs are always ignored'
|
52
53
|
p.option 'url_swap', '--url-swap re:string,[re:string,...]', Array, 'A comma-separated list containing key-value pairs of `RegExp => String`. It transforms URLs that match `RegExp` into `String` via `gsub`. The escape sequences `\\:` should be used to produce literal `:`s.'
|
53
54
|
p.option 'root_dir', '--root-dir PATH', String, 'The absolute path to the directory serving your html-files.'
|
@@ -87,7 +88,8 @@ Mercenary.program(:htmlproofer) do |p|
|
|
87
88
|
options[:validation][:report_eof_tags] = opts['report_eof_tags'] unless opts['report_eof_tags'].nil?
|
88
89
|
options[:validation][:report_mismatched_tags] = opts['report_mismatched_tags'] unless opts['report_mismatched_tags'].nil?
|
89
90
|
|
90
|
-
options[:typhoeus] = HTMLProofer::Configuration.parse_json_option('typhoeus_config', opts['typhoeus_config']) unless opts['typhoeus_config'].nil?
|
91
|
+
options[:typhoeus] = HTMLProofer::Configuration.parse_json_option('typhoeus_config', opts['typhoeus_config'], symbolize_names: false) unless opts['typhoeus_config'].nil?
|
92
|
+
options[:hydra] = HTMLProofer::Configuration.parse_json_option('hydra_config', opts['hydra_config']) unless opts['hydra_config'].nil?
|
91
93
|
|
92
94
|
unless opts['timeframe'].nil?
|
93
95
|
options[:cache] ||= {}
|
data/lib/html-proofer/cache.rb
CHANGED
@@ -3,6 +3,7 @@
|
|
3
3
|
require_relative 'utils'
|
4
4
|
require 'date'
|
5
5
|
require 'json'
|
6
|
+
require 'uri'
|
6
7
|
|
7
8
|
module HTMLProofer
|
8
9
|
class Cache
|
@@ -11,6 +12,8 @@ module HTMLProofer
|
|
11
12
|
DEFAULT_STORAGE_DIR = File.join('tmp', '.htmlproofer')
|
12
13
|
DEFAULT_CACHE_FILE_NAME = 'cache.log'
|
13
14
|
|
15
|
+
URI_REGEXP = URI::DEFAULT_PARSER.make_regexp
|
16
|
+
|
14
17
|
attr_reader :exists, :cache_log, :storage_dir, :cache_file
|
15
18
|
|
16
19
|
def initialize(logger, options)
|
@@ -30,6 +33,8 @@ module HTMLProofer
|
|
30
33
|
end
|
31
34
|
|
32
35
|
def within_timeframe?(time)
|
36
|
+
return false if time.nil?
|
37
|
+
|
33
38
|
(@parsed_timeframe..@cache_time).cover?(Time.parse(time))
|
34
39
|
end
|
35
40
|
|
@@ -71,10 +76,14 @@ module HTMLProofer
|
|
71
76
|
@cache_log[clean_url(url)] = data
|
72
77
|
end
|
73
78
|
|
74
|
-
def detect_url_changes(found)
|
75
|
-
existing_urls = @cache_log.keys.map { |url| clean_url(url) }
|
79
|
+
def detect_url_changes(found, type)
|
76
80
|
found_urls = found.keys.map { |url| clean_url(url) }
|
77
81
|
|
82
|
+
# if there were no urls, bail
|
83
|
+
return {} if found_urls.empty?
|
84
|
+
|
85
|
+
existing_urls = @cache_log.keys.map { |url| clean_url(url) }
|
86
|
+
|
78
87
|
# prepare to add new URLs detected
|
79
88
|
additions = found.reject do |url, _|
|
80
89
|
url = clean_url(url)
|
@@ -91,19 +100,20 @@ module HTMLProofer
|
|
91
100
|
@logger.log :info, "Adding #{new_link_text} to the cache..."
|
92
101
|
|
93
102
|
# remove from cache URLs that no longer exist
|
94
|
-
|
103
|
+
deletions = 0
|
95
104
|
@cache_log.delete_if do |url, _|
|
96
105
|
url = clean_url(url)
|
106
|
+
|
97
107
|
if found_urls.include?(url)
|
98
108
|
false
|
99
|
-
|
109
|
+
elsif url_matches_type?(url, type)
|
100
110
|
@logger.log :debug, "Removing #{url} from cache check"
|
101
|
-
|
111
|
+
deletions += 1
|
102
112
|
true
|
103
113
|
end
|
104
114
|
end
|
105
115
|
|
106
|
-
del_link_text = pluralize(
|
116
|
+
del_link_text = pluralize(deletions, 'link', 'links')
|
107
117
|
@logger.log :info, "Removing #{del_link_text} from the cache..."
|
108
118
|
|
109
119
|
additions
|
@@ -113,21 +123,22 @@ module HTMLProofer
|
|
113
123
|
# caches need access to this file. Write a proper versioned
|
114
124
|
# schema in the future
|
115
125
|
def write
|
116
|
-
|
117
|
-
file = JSON.parse(File.read(cache_file)) if File.exist?(cache_file)
|
118
|
-
File.write(cache_file, file.merge(@cache_log).to_json)
|
126
|
+
File.write(cache_file, @cache_log.to_json)
|
119
127
|
end
|
120
128
|
|
121
129
|
def load?
|
122
130
|
@load.nil?
|
123
131
|
end
|
124
132
|
|
125
|
-
def retrieve_urls(urls)
|
126
|
-
urls_to_check = detect_url_changes(urls)
|
133
|
+
def retrieve_urls(urls, type)
|
134
|
+
urls_to_check = detect_url_changes(urls, type)
|
135
|
+
|
127
136
|
@cache_log.each_pair do |url, cache|
|
128
137
|
next if within_timeframe?(cache['time']) && cache['message'].empty? # these were successes to skip
|
129
138
|
|
130
|
-
|
139
|
+
if url_matches_type?(url, type)
|
140
|
+
urls_to_check[url] = cache['filenames'] # recheck expired links
|
141
|
+
end
|
131
142
|
end
|
132
143
|
urls_to_check
|
133
144
|
end
|
@@ -154,9 +165,9 @@ module HTMLProofer
|
|
154
165
|
|
155
166
|
@cache_file = File.join(storage_dir, cache_file_name)
|
156
167
|
|
157
|
-
return unless File.exist?(cache_file)
|
168
|
+
return unless File.exist?(@cache_file)
|
158
169
|
|
159
|
-
contents = File.read(cache_file)
|
170
|
+
contents = File.read(@cache_file)
|
160
171
|
@cache_log = contents.empty? ? {} : JSON.parse(contents)
|
161
172
|
end
|
162
173
|
|
@@ -174,5 +185,10 @@ module HTMLProofer
|
|
174
185
|
@cache_datetime - Rational(measurement / 24.0)
|
175
186
|
end.to_time
|
176
187
|
end
|
188
|
+
|
189
|
+
def url_matches_type?(url, type)
|
190
|
+
return true if type == :internal && url !~ URI_REGEXP
|
191
|
+
return true if type == :external && url =~ URI_REGEXP
|
192
|
+
end
|
177
193
|
end
|
178
194
|
end
|
@@ -67,7 +67,7 @@ module HTMLProofer
|
|
67
67
|
end
|
68
68
|
end
|
69
69
|
|
70
|
-
def self.parse_json_option(option_name, config)
|
70
|
+
def self.parse_json_option(option_name, config, symbolize_names: true)
|
71
71
|
raise ArgumentError, 'Must provide an option name in string format.' unless option_name.is_a?(String)
|
72
72
|
raise ArgumentError, 'Must provide an option name in string format.' if option_name.strip.empty?
|
73
73
|
|
@@ -78,7 +78,7 @@ module HTMLProofer
|
|
78
78
|
return {} if config.strip.empty?
|
79
79
|
|
80
80
|
begin
|
81
|
-
JSON.parse(config)
|
81
|
+
JSON.parse(config, { symbolize_names: symbolize_names })
|
82
82
|
rescue StandardError
|
83
83
|
raise ArgumentError, "Option '#{option_name} did not contain valid JSON."
|
84
84
|
end
|
data/lib/html-proofer/runner.rb
CHANGED
@@ -125,7 +125,7 @@ module HTMLProofer
|
|
125
125
|
end
|
126
126
|
|
127
127
|
external_urls = check.external_urls
|
128
|
-
external_urls = check.external_urls.
|
128
|
+
external_urls = check.external_urls.transform_keys { |url| swap(url, @options[:url_swap]) } if @options[:url_swap]
|
129
129
|
result[:external_urls].merge!(external_urls)
|
130
130
|
result[:failures].concat(check.issues)
|
131
131
|
end
|
@@ -238,7 +238,7 @@ module HTMLProofer
|
|
238
238
|
end
|
239
239
|
|
240
240
|
def load_internal_cache
|
241
|
-
urls_to_check = @cache.retrieve_urls(@internal_urls)
|
241
|
+
urls_to_check = @cache.retrieve_urls(@internal_urls, :internal)
|
242
242
|
cache_text = pluralize(urls_to_check.count, 'internal link', 'internal links')
|
243
243
|
@logger.log :info, "Found #{cache_text} in the cache..."
|
244
244
|
|
@@ -26,7 +26,7 @@ module HTMLProofer
|
|
26
26
|
@external_urls = remove_query_values
|
27
27
|
|
28
28
|
if @cache.use_cache?
|
29
|
-
urls_to_check = @cache.retrieve_urls(@external_urls)
|
29
|
+
urls_to_check = @cache.retrieve_urls(@external_urls, :external)
|
30
30
|
external_link_checker(urls_to_check)
|
31
31
|
@cache.write
|
32
32
|
else
|
data/lib/html-proofer/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html-proofer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.19.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Garen Torikian
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-06-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|
@@ -287,7 +287,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
287
287
|
requirements:
|
288
288
|
- - ">="
|
289
289
|
- !ruby/object:Gem::Version
|
290
|
-
version: 2.
|
290
|
+
version: 2.6.0
|
291
291
|
- - "<"
|
292
292
|
- !ruby/object:Gem::Version
|
293
293
|
version: '4.0'
|