html-proofer 3.19.4 → 4.0.0.rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/htmlproofer +30 -57
- data/lib/html-proofer.rb +1 -54
- data/lib/html_proofer/attribute/url.rb +231 -0
- data/lib/html_proofer/attribute.rb +15 -0
- data/lib/html_proofer/cache.rb +234 -0
- data/lib/html_proofer/check/favicon.rb +35 -0
- data/lib/html_proofer/check/images.rb +62 -0
- data/lib/html_proofer/check/links.rb +118 -0
- data/lib/html_proofer/check/open_graph.rb +34 -0
- data/lib/html_proofer/check/scripts.rb +38 -0
- data/lib/html_proofer/check.rb +91 -0
- data/lib/{html-proofer → html_proofer}/configuration.rb +30 -31
- data/lib/html_proofer/element.rb +122 -0
- data/lib/html_proofer/failure.rb +17 -0
- data/lib/{html-proofer → html_proofer}/log.rb +0 -0
- data/lib/html_proofer/reporter/cli.rb +29 -0
- data/lib/html_proofer/reporter.rb +23 -0
- data/lib/html_proofer/runner.rb +245 -0
- data/lib/html_proofer/url_validator/external.rb +189 -0
- data/lib/html_proofer/url_validator/internal.rb +86 -0
- data/lib/html_proofer/url_validator.rb +16 -0
- data/lib/{html-proofer → html_proofer}/utils.rb +5 -8
- data/lib/{html-proofer → html_proofer}/version.rb +1 -1
- data/lib/html_proofer/xpath_functions.rb +10 -0
- data/lib/html_proofer.rb +56 -0
- metadata +46 -27
- data/lib/html-proofer/cache.rb +0 -194
- data/lib/html-proofer/check/favicon.rb +0 -29
- data/lib/html-proofer/check/html.rb +0 -37
- data/lib/html-proofer/check/images.rb +0 -48
- data/lib/html-proofer/check/links.rb +0 -182
- data/lib/html-proofer/check/opengraph.rb +0 -46
- data/lib/html-proofer/check/scripts.rb +0 -42
- data/lib/html-proofer/check.rb +0 -75
- data/lib/html-proofer/element.rb +0 -265
- data/lib/html-proofer/issue.rb +0 -65
- data/lib/html-proofer/middleware.rb +0 -82
- data/lib/html-proofer/runner.rb +0 -249
- data/lib/html-proofer/url_validator.rb +0 -237
@@ -0,0 +1,245 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module HTMLProofer
|
4
|
+
class Runner
|
5
|
+
include HTMLProofer::Utils
|
6
|
+
|
7
|
+
attr_reader :options, :cache, :logger, :internal_urls, :external_urls, :checked_paths, :current_check
|
8
|
+
attr_accessor :current_path, :current_source, :reporter
|
9
|
+
|
10
|
+
URL_TYPES = %i[external internal].freeze
|
11
|
+
|
12
|
+
def initialize(src, opts = {})
|
13
|
+
@options = HTMLProofer::Configuration.generate_defaults(opts)
|
14
|
+
|
15
|
+
@type = @options.delete(:type)
|
16
|
+
@source = src
|
17
|
+
|
18
|
+
@logger = HTMLProofer::Log.new(@options[:log_level])
|
19
|
+
@cache = Cache.new(self, @options[:cache])
|
20
|
+
|
21
|
+
@external_urls = {}
|
22
|
+
@internal_urls = {}
|
23
|
+
@failures = []
|
24
|
+
|
25
|
+
@before_request = []
|
26
|
+
|
27
|
+
@checked_paths = {}
|
28
|
+
|
29
|
+
@current_check = nil
|
30
|
+
@current_source = nil
|
31
|
+
@current_path = nil
|
32
|
+
|
33
|
+
@reporter = Reporter::Cli.new(logger: @logger)
|
34
|
+
end
|
35
|
+
|
36
|
+
def run
|
37
|
+
check_text = pluralize(checks.length, 'check', 'checks')
|
38
|
+
|
39
|
+
if @type == :links
|
40
|
+
@logger.log :info, "Running #{check_text} (#{format_checks_list(checks)}) on #{@source} ... \n\n"
|
41
|
+
check_list_of_links unless @options[:disable_external]
|
42
|
+
else
|
43
|
+
@logger.log :info, "Running #{check_text} (#{format_checks_list(checks)}) in #{@source} on *#{@options[:extensions].join(', ')} files...\n\n"
|
44
|
+
|
45
|
+
check_files
|
46
|
+
@logger.log :info, "Ran on #{pluralize(files.length, 'file', 'files')}!\n\n"
|
47
|
+
end
|
48
|
+
|
49
|
+
@cache.write
|
50
|
+
|
51
|
+
@reporter.failures = @failures
|
52
|
+
|
53
|
+
if @failures.empty?
|
54
|
+
@logger.log :info, 'HTML-Proofer finished successfully.'
|
55
|
+
else
|
56
|
+
@failures.uniq!
|
57
|
+
report_failed_checks
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def check_list_of_links
|
62
|
+
@external_urls = @source.uniq.each_with_object({}) do |link, hash|
|
63
|
+
url = Attribute::Url.new(self, link, base_url: nil).to_s
|
64
|
+
|
65
|
+
hash[url] = []
|
66
|
+
end
|
67
|
+
|
68
|
+
validate_external_urls
|
69
|
+
end
|
70
|
+
|
71
|
+
# Walks over each implemented check and runs them on the files, in parallel.
|
72
|
+
# Sends the collected external URLs to Typhoeus for batch processing.
|
73
|
+
def check_files
|
74
|
+
process_files.each do |result|
|
75
|
+
URL_TYPES.each do |url_type|
|
76
|
+
type = :"#{url_type}_urls"
|
77
|
+
ivar_name = "@#{type}"
|
78
|
+
ivar = instance_variable_get(ivar_name)
|
79
|
+
|
80
|
+
if ivar.empty?
|
81
|
+
instance_variable_set(ivar_name, result[type])
|
82
|
+
else
|
83
|
+
result[type].each do |url, metadata|
|
84
|
+
ivar[url] = [] if ivar[url].nil?
|
85
|
+
ivar[url].concat(metadata)
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
@failures.concat(result[:failures])
|
90
|
+
end
|
91
|
+
|
92
|
+
validate_external_urls unless @options[:disable_external]
|
93
|
+
|
94
|
+
validate_internal_urls
|
95
|
+
end
|
96
|
+
|
97
|
+
# Walks over each implemented check and runs them on the files, in parallel.
|
98
|
+
def process_files
|
99
|
+
if @options[:parallel][:enable]
|
100
|
+
Parallel.map(files, @options[:parallel]) { |path| load_file(path) }
|
101
|
+
else
|
102
|
+
files.map { |path| load_file(path) }
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
def load_file(path)
|
107
|
+
@html = create_nokogiri(path)
|
108
|
+
check_parsed(path)
|
109
|
+
end
|
110
|
+
|
111
|
+
# Collects any external URLs found in a directory of files. Also collectes
|
112
|
+
# every failed test from process_files.
|
113
|
+
def check_parsed(path)
|
114
|
+
result = { internal_urls: {}, external_urls: {}, failures: [] }
|
115
|
+
|
116
|
+
@source = [@source] if @type == :file
|
117
|
+
|
118
|
+
@source.each do |current_source|
|
119
|
+
checks.each do |klass|
|
120
|
+
@current_source = current_source
|
121
|
+
@current_path = path
|
122
|
+
|
123
|
+
check = Object.const_get(klass).new(self, @html)
|
124
|
+
@logger.log :debug, "Running #{check.short_name} in #{path}"
|
125
|
+
|
126
|
+
@current_check = check
|
127
|
+
|
128
|
+
check.run
|
129
|
+
|
130
|
+
result[:external_urls].merge!(check.external_urls)
|
131
|
+
result[:internal_urls].merge!(check.internal_urls)
|
132
|
+
result[:failures].concat(check.failures)
|
133
|
+
end
|
134
|
+
end
|
135
|
+
result
|
136
|
+
end
|
137
|
+
|
138
|
+
def validate_external_urls
|
139
|
+
external_url_validator = HTMLProofer::UrlValidator::External.new(self, @external_urls)
|
140
|
+
external_url_validator.before_request = @before_request
|
141
|
+
@failures.concat(external_url_validator.validate)
|
142
|
+
end
|
143
|
+
|
144
|
+
def validate_internal_urls
|
145
|
+
internal_link_validator = HTMLProofer::UrlValidator::Internal.new(self, @internal_urls)
|
146
|
+
@failures.concat(internal_link_validator.validate)
|
147
|
+
end
|
148
|
+
|
149
|
+
def files
|
150
|
+
@files ||= if @type == :directory
|
151
|
+
@source.map do |src|
|
152
|
+
pattern = File.join(src, '**', "*{#{@options[:extensions].join(',')}}")
|
153
|
+
Dir.glob(pattern).select { |f| File.file?(f) && !ignore_file?(f) }
|
154
|
+
end.flatten
|
155
|
+
elsif @type == :file && @options[:extensions].include?(File.extname(@source))
|
156
|
+
[@source].reject { |f| ignore_file?(f) }
|
157
|
+
else
|
158
|
+
[]
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
def ignore_file?(file)
|
163
|
+
@options[:ignore_files].each do |pattern|
|
164
|
+
return true if pattern.is_a?(String) && pattern == file
|
165
|
+
return true if pattern.is_a?(Regexp) && pattern =~ file
|
166
|
+
end
|
167
|
+
|
168
|
+
false
|
169
|
+
end
|
170
|
+
|
171
|
+
def check_sri?
|
172
|
+
@options[:check_sri]
|
173
|
+
end
|
174
|
+
|
175
|
+
def enforce_https?
|
176
|
+
@options[:enforce_https]
|
177
|
+
end
|
178
|
+
|
179
|
+
def checks
|
180
|
+
return @checks if defined?(@checks) && !@checks.nil?
|
181
|
+
|
182
|
+
return (@checks = ['LinkCheck']) if @type == :links
|
183
|
+
|
184
|
+
@checks = HTMLProofer::Check.subchecks(@options).map(&:name)
|
185
|
+
|
186
|
+
@checks
|
187
|
+
end
|
188
|
+
|
189
|
+
def failed_checks
|
190
|
+
@reporter.failures.flatten.select { |f| f.is_a?(Failure) }
|
191
|
+
end
|
192
|
+
|
193
|
+
def report_failed_checks
|
194
|
+
@reporter.report
|
195
|
+
|
196
|
+
failure_text = pluralize(@failures.length, 'failure', 'failures')
|
197
|
+
@logger.log :fatal, "\nHTML-Proofer found #{failure_text}!"
|
198
|
+
exit 1
|
199
|
+
end
|
200
|
+
|
201
|
+
# Set before_request callback.
|
202
|
+
#
|
203
|
+
# @example Set before_request.
|
204
|
+
# request.before_request { |request| p "yay" }
|
205
|
+
#
|
206
|
+
# @param [ Block ] block The block to execute.
|
207
|
+
#
|
208
|
+
# @yield [ Typhoeus::Request ]
|
209
|
+
#
|
210
|
+
# @return [ Array<Block> ] All before_request blocks.
|
211
|
+
def before_request(&block)
|
212
|
+
@before_request ||= []
|
213
|
+
@before_request << block if block
|
214
|
+
@before_request
|
215
|
+
end
|
216
|
+
|
217
|
+
def load_internal_cache
|
218
|
+
load_cache(:internal)
|
219
|
+
end
|
220
|
+
|
221
|
+
def load_external_cache
|
222
|
+
load_cache(:external)
|
223
|
+
end
|
224
|
+
|
225
|
+
private def load_cache(type)
|
226
|
+
ivar = instance_variable_get("@#{type}_urls")
|
227
|
+
|
228
|
+
existing_urls_count = @cache.size(type)
|
229
|
+
cache_text = pluralize(existing_urls_count, "#{type} link", "#{type} links")
|
230
|
+
@logger.log :debug, "Found #{cache_text} in the cache"
|
231
|
+
|
232
|
+
urls_to_check = @cache.retrieve_urls(ivar, type)
|
233
|
+
urls_detected = pluralize(urls_to_check.count, "#{type} link", "#{type} links")
|
234
|
+
@logger.log :info, "Checking #{urls_detected}"
|
235
|
+
|
236
|
+
urls_to_check
|
237
|
+
end
|
238
|
+
|
239
|
+
private def format_checks_list(checks)
|
240
|
+
checks.map do |check|
|
241
|
+
check.sub(/HTMLProofer::Check::/, '')
|
242
|
+
end.join(', ')
|
243
|
+
end
|
244
|
+
end
|
245
|
+
end
|
@@ -0,0 +1,189 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'typhoeus'
|
4
|
+
require 'uri'
|
5
|
+
|
6
|
+
module HTMLProofer
|
7
|
+
class UrlValidator::External < UrlValidator
|
8
|
+
include HTMLProofer::Utils
|
9
|
+
|
10
|
+
attr_reader :external_urls
|
11
|
+
attr_writer :before_request
|
12
|
+
|
13
|
+
def initialize(runner, external_urls)
|
14
|
+
super(runner)
|
15
|
+
|
16
|
+
@external_urls = external_urls
|
17
|
+
@hydra = Typhoeus::Hydra.new(@runner.options[:hydra])
|
18
|
+
@before_request = []
|
19
|
+
|
20
|
+
@paths_with_queries = {}
|
21
|
+
end
|
22
|
+
|
23
|
+
def validate
|
24
|
+
if @cache.enabled?
|
25
|
+
urls_to_check = @runner.load_external_cache
|
26
|
+
run_external_link_checker(urls_to_check)
|
27
|
+
else
|
28
|
+
run_external_link_checker(@external_urls)
|
29
|
+
end
|
30
|
+
|
31
|
+
@failed_checks
|
32
|
+
end
|
33
|
+
|
34
|
+
# Proofer runs faster if we pull out all the external URLs and run the checks
|
35
|
+
# at the end. Otherwise, we're halting the consuming process for every file during
|
36
|
+
# `process_files`.
|
37
|
+
#
|
38
|
+
# In addition, sorting the list lets libcurl keep connections to the same hosts alive.
|
39
|
+
#
|
40
|
+
# Finally, we'll first make a HEAD request, rather than GETing all the contents.
|
41
|
+
# If the HEAD fails, we'll fall back to GET, as some servers are not configured
|
42
|
+
# for HEAD. If we've decided to check for hashes, we must do a GET--HEAD is
|
43
|
+
# not available as an option.
|
44
|
+
def run_external_link_checker(external_urls)
|
45
|
+
# Route log from Typhoeus/Ethon to our own logger
|
46
|
+
Ethon.logger = @logger
|
47
|
+
|
48
|
+
external_urls.each_pair do |external_url, metadata|
|
49
|
+
url = Attribute::Url.new(@runner, external_url, base_url: nil)
|
50
|
+
|
51
|
+
unless url.valid?
|
52
|
+
add_failure(metadata, "#{url} is an invalid URL", 0)
|
53
|
+
next
|
54
|
+
end
|
55
|
+
|
56
|
+
next unless new_url_query_values?(url)
|
57
|
+
|
58
|
+
method = if @runner.options[:check_external_hash] && url.hash?
|
59
|
+
:get
|
60
|
+
else
|
61
|
+
:head
|
62
|
+
end
|
63
|
+
|
64
|
+
queue_request(method, url, metadata)
|
65
|
+
end
|
66
|
+
|
67
|
+
@hydra.run
|
68
|
+
end
|
69
|
+
|
70
|
+
def queue_request(method, url, filenames)
|
71
|
+
opts = @runner.options[:typhoeus].merge(method: method)
|
72
|
+
request = Typhoeus::Request.new(url.url, opts)
|
73
|
+
@before_request.each do |callback|
|
74
|
+
callback.call(request)
|
75
|
+
end
|
76
|
+
request.on_complete { |response| response_handler(response, url, filenames) }
|
77
|
+
@hydra.queue request
|
78
|
+
end
|
79
|
+
|
80
|
+
def response_handler(response, url, filenames)
|
81
|
+
method = response.request.options[:method]
|
82
|
+
href = response.request.base_url.to_s
|
83
|
+
response_code = response.code
|
84
|
+
response.body.delete!("\x00")
|
85
|
+
|
86
|
+
@logger.log :debug, "Received a #{response_code} for #{href}"
|
87
|
+
|
88
|
+
return if @runner.options[:ignore_status_codes].include?(response_code)
|
89
|
+
|
90
|
+
if response_code.between?(200, 299)
|
91
|
+
@cache.add_external(href, filenames, response_code, 'OK') unless check_hash_in_2xx_response(href, url, response, filenames)
|
92
|
+
elsif response.timed_out?
|
93
|
+
handle_timeout(href, filenames, response_code)
|
94
|
+
elsif response_code.zero?
|
95
|
+
handle_connection_failure(href, filenames, response_code, response.status_message)
|
96
|
+
elsif method == :head # some servers don't support HEAD
|
97
|
+
queue_request(:get, url, filenames)
|
98
|
+
else
|
99
|
+
return if @runner.options[:only_4xx] && !response_code.between?(400, 499)
|
100
|
+
|
101
|
+
# Received a non-successful http response.
|
102
|
+
status_message = blank?(response.status_message) ? '' : ": #{response.status_message}"
|
103
|
+
msg = "External link #{href} failed#{status_message}"
|
104
|
+
add_failure(filenames, msg, response_code)
|
105
|
+
@cache.add_external(href, filenames, response_code, msg)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
# Even though the response was a success, we may have been asked to check
|
110
|
+
# if the hash on the URL exists on the page
|
111
|
+
def check_hash_in_2xx_response(href, url, response, filenames)
|
112
|
+
return false if @runner.options[:only_4xx]
|
113
|
+
return false unless @runner.options[:check_external_hash]
|
114
|
+
return false unless url.hash?
|
115
|
+
|
116
|
+
hash = url.hash
|
117
|
+
|
118
|
+
body_doc = create_nokogiri(response.body)
|
119
|
+
|
120
|
+
unencoded_hash = Addressable::URI.unescape(hash)
|
121
|
+
xpath = [%(//*[@name="#{hash}"]|/*[@name="#{unencoded_hash}"]|//*[@id="#{hash}"]|//*[@id="#{unencoded_hash}"])]
|
122
|
+
# user-content is a special addition by GitHub.
|
123
|
+
if url.host =~ /github\.com/i
|
124
|
+
xpath << [%(//*[@name="user-content-#{hash}"]|//*[@id="user-content-#{hash}"])]
|
125
|
+
# when linking to a file on GitHub, like #L12-L34, only the first "L" portion
|
126
|
+
# will be identified as a linkable portion
|
127
|
+
xpath << [%(//td[@id="#{Regexp.last_match[1]}"])] if hash =~ /\A(L\d)+/
|
128
|
+
end
|
129
|
+
|
130
|
+
return unless body_doc.xpath(xpath.join('|')).empty?
|
131
|
+
|
132
|
+
msg = "External link #{href} failed: #{url.sans_hash} exists, but the hash '#{hash}' does not"
|
133
|
+
add_failure(filenames, msg, response.code)
|
134
|
+
@cache.add_external(href, filenames, response.code, msg)
|
135
|
+
true
|
136
|
+
end
|
137
|
+
|
138
|
+
def handle_timeout(href, filenames, response_code)
|
139
|
+
msg = "External link #{href} failed: got a time out (response code #{response_code})"
|
140
|
+
@cache.add_external(href, filenames, 0, msg)
|
141
|
+
return if @runner.options[:only_4xx]
|
142
|
+
|
143
|
+
add_failure(filenames, msg, response_code)
|
144
|
+
end
|
145
|
+
|
146
|
+
def handle_connection_failure(href, metadata, response_code, status_message)
|
147
|
+
msgs = [<<~MSG
|
148
|
+
External link #{href} failed with something very wrong.
|
149
|
+
It's possible libcurl couldn't connect to the server, or perhaps the request timed out.
|
150
|
+
Sometimes, making too many requests at once also breaks things.
|
151
|
+
MSG
|
152
|
+
]
|
153
|
+
|
154
|
+
msgs << "Either way, the return message from the server is: #{status_message}" unless blank?(status_message)
|
155
|
+
|
156
|
+
msg = msgs.join("\n").chomp
|
157
|
+
|
158
|
+
@cache.add_external(href, metadata, 0, msg)
|
159
|
+
return if @runner.options[:only_4xx]
|
160
|
+
|
161
|
+
add_failure(metadata, msg, response_code)
|
162
|
+
end
|
163
|
+
|
164
|
+
def add_failure(metadata, description, status = nil)
|
165
|
+
if blank?(metadata) # possible if we're checking an array of links
|
166
|
+
@failed_checks << Failure.new('', 'Links > External', description, status: status)
|
167
|
+
else
|
168
|
+
metadata.each { |m| @failed_checks << Failure.new(m[:filename], 'Links > External', description, line: m[:line], status: status) }
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
# remember queries we've seen, ignore future ones
|
173
|
+
private def new_url_query_values?(url)
|
174
|
+
return true if (query_values = url.query_values).nil?
|
175
|
+
|
176
|
+
queries = query_values.keys.join('-')
|
177
|
+
domain_path = url.domain_path
|
178
|
+
if @paths_with_queries[domain_path].nil?
|
179
|
+
@paths_with_queries[domain_path] = [queries]
|
180
|
+
true
|
181
|
+
elsif !@paths_with_queries[domain_path].include?(queries)
|
182
|
+
@paths_with_queries[domain_path] << queries
|
183
|
+
true
|
184
|
+
else
|
185
|
+
false
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module HTMLProofer
|
4
|
+
class UrlValidator::Internal < UrlValidator
|
5
|
+
attr_reader :internal_urls
|
6
|
+
|
7
|
+
def initialize(runner, internal_urls)
|
8
|
+
super(runner)
|
9
|
+
|
10
|
+
@internal_urls = internal_urls
|
11
|
+
end
|
12
|
+
|
13
|
+
def validate
|
14
|
+
if @cache.enabled?
|
15
|
+
urls_to_check = @runner.load_internal_cache
|
16
|
+
run_internal_link_checker(urls_to_check)
|
17
|
+
else
|
18
|
+
run_internal_link_checker(@internal_urls)
|
19
|
+
end
|
20
|
+
|
21
|
+
@failed_checks
|
22
|
+
end
|
23
|
+
|
24
|
+
def run_internal_link_checker(links)
|
25
|
+
links.each_pair do |link, matched_files|
|
26
|
+
matched_files.each do |metadata|
|
27
|
+
url = HTMLProofer::Attribute::Url.new(@runner, link, base_url: metadata[:base_url])
|
28
|
+
|
29
|
+
@runner.current_source = metadata[:source]
|
30
|
+
@runner.current_path = metadata[:current_path]
|
31
|
+
|
32
|
+
unless file_exists?(url)
|
33
|
+
@failed_checks << Failure.new(@runner.current_path, 'Links > Internal', "internally linking to #{url}, which does not exist", line: metadata[:line], status: nil, content: nil)
|
34
|
+
@cache.add_internal(url.to_s, metadata, false)
|
35
|
+
next
|
36
|
+
end
|
37
|
+
|
38
|
+
unless hash_exists?(url)
|
39
|
+
@failed_checks << Failure.new(@runner.current_path, 'Links > Internal', "internally linking to #{url}; the file exists, but the hash '#{url.hash}' does not", line: metadata[:line], status: nil, content: nil)
|
40
|
+
@cache.add_internal(url.to_s, metadata, false)
|
41
|
+
next
|
42
|
+
end
|
43
|
+
|
44
|
+
@cache.add_internal(url.to_s, metadata, true)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
@failed_checks
|
49
|
+
end
|
50
|
+
|
51
|
+
private def file_exists?(url)
|
52
|
+
absolute_path = url.absolute_path
|
53
|
+
return @runner.checked_paths[url.absolute_path] if @runner.checked_paths.key?(absolute_path)
|
54
|
+
|
55
|
+
@runner.checked_paths[url.absolute_path] = File.exist?(absolute_path)
|
56
|
+
end
|
57
|
+
|
58
|
+
# verify the target hash
|
59
|
+
private def hash_exists?(url)
|
60
|
+
href_hash = url.hash
|
61
|
+
return true if blank?(href_hash)
|
62
|
+
|
63
|
+
# prevents searching files we didn't ask about
|
64
|
+
return false unless url.known_extension?
|
65
|
+
|
66
|
+
decoded_href_hash = Addressable::URI.unescape(href_hash)
|
67
|
+
fragment_ids = [href_hash, decoded_href_hash]
|
68
|
+
# https://www.w3.org/TR/html5/single-page.html#scroll-to-fragid
|
69
|
+
fragment_ids.include?('top') || !find_fragments(fragment_ids, url).empty?
|
70
|
+
end
|
71
|
+
|
72
|
+
private def find_fragments(fragment_ids, url)
|
73
|
+
xpaths = fragment_ids.uniq.flat_map do |frag_id|
|
74
|
+
escaped_frag_id = "'#{frag_id.split("'").join("', \"'\", '")}', ''"
|
75
|
+
[
|
76
|
+
"//*[case_sensitive_equals(@id, concat(#{escaped_frag_id}))]",
|
77
|
+
"//*[case_sensitive_equals(@name, concat(#{escaped_frag_id}))]"
|
78
|
+
]
|
79
|
+
end
|
80
|
+
xpaths << XpathFunctions.new
|
81
|
+
|
82
|
+
html = create_nokogiri(url.absolute_path)
|
83
|
+
html.xpath(*xpaths)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module HTMLProofer
|
4
|
+
class UrlValidator
|
5
|
+
include HTMLProofer::Utils
|
6
|
+
|
7
|
+
def initialize(runner)
|
8
|
+
@runner = runner
|
9
|
+
|
10
|
+
@cache = @runner.cache
|
11
|
+
@logger = @runner.logger
|
12
|
+
|
13
|
+
@failed_checks = []
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -8,21 +8,18 @@ module HTMLProofer
|
|
8
8
|
"#{count} #{count == 1 ? single : plural}"
|
9
9
|
end
|
10
10
|
|
11
|
+
def blank?(obj)
|
12
|
+
obj.nil? || obj.empty?
|
13
|
+
end
|
14
|
+
|
11
15
|
def create_nokogiri(path)
|
12
16
|
content = if File.exist?(path) && !File.directory?(path)
|
13
|
-
File.
|
17
|
+
File.read(path)
|
14
18
|
else
|
15
19
|
path
|
16
20
|
end
|
17
21
|
|
18
22
|
Nokogiri::HTML5(content, max_errors: -1)
|
19
23
|
end
|
20
|
-
|
21
|
-
def swap(href, replacement)
|
22
|
-
replacement.each do |link, replace|
|
23
|
-
href = href.gsub(link, replace)
|
24
|
-
end
|
25
|
-
href
|
26
|
-
end
|
27
24
|
end
|
28
25
|
end
|
data/lib/html_proofer.rb
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'zeitwerk'
|
4
|
+
lib_dir = File.join(File.dirname(__dir__), 'lib')
|
5
|
+
gem_loader = Zeitwerk::Loader.for_gem
|
6
|
+
gem_loader.inflector.inflect(
|
7
|
+
'html_proofer' => 'HTMLProofer'
|
8
|
+
)
|
9
|
+
gem_loader.ignore(File.join(lib_dir, 'html-proofer.rb'))
|
10
|
+
gem_loader.setup
|
11
|
+
|
12
|
+
require 'html_proofer/version'
|
13
|
+
|
14
|
+
require 'parallel'
|
15
|
+
require 'fileutils'
|
16
|
+
|
17
|
+
%w[awesome_print debug].each do |gem|
|
18
|
+
require gem
|
19
|
+
rescue LoadError; # rubocop:disable Lint/SuppressedException
|
20
|
+
end
|
21
|
+
module HTMLProofer
|
22
|
+
def self.check_file(file, options = {})
|
23
|
+
raise ArgumentError unless file.is_a?(String)
|
24
|
+
raise ArgumentError, "#{file} does not exist" unless File.exist?(file)
|
25
|
+
|
26
|
+
options[:type] = :file
|
27
|
+
HTMLProofer::Runner.new(file, options)
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.check_directory(directory, options = {})
|
31
|
+
raise ArgumentError unless directory.is_a?(String)
|
32
|
+
raise ArgumentError, "#{directory} does not exist" unless Dir.exist?(directory)
|
33
|
+
|
34
|
+
options[:type] = :directory
|
35
|
+
HTMLProofer::Runner.new([directory], options)
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.check_directories(directories, options = {})
|
39
|
+
raise ArgumentError unless directories.is_a?(Array)
|
40
|
+
|
41
|
+
options[:type] = :directory
|
42
|
+
directories.each do |directory|
|
43
|
+
raise ArgumentError, "#{directory} does not exist" unless Dir.exist?(directory)
|
44
|
+
end
|
45
|
+
HTMLProofer::Runner.new(directories, options)
|
46
|
+
end
|
47
|
+
|
48
|
+
def self.check_links(links, options = {})
|
49
|
+
raise ArgumentError unless links.is_a?(Array)
|
50
|
+
|
51
|
+
options[:type] = :links
|
52
|
+
HTMLProofer::Runner.new(links, options)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
gem_loader.eager_load
|