html-proofer 3.19.1 → 4.0.0.rc2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/htmlproofer +31 -57
- data/lib/html-proofer.rb +1 -54
- data/lib/html_proofer/attribute/url.rb +231 -0
- data/lib/html_proofer/attribute.rb +15 -0
- data/lib/html_proofer/cache.rb +236 -0
- data/lib/html_proofer/check/favicon.rb +35 -0
- data/lib/html_proofer/check/images.rb +62 -0
- data/lib/html_proofer/check/links.rb +118 -0
- data/lib/html_proofer/check/open_graph.rb +34 -0
- data/lib/html_proofer/check/scripts.rb +38 -0
- data/lib/html_proofer/check.rb +91 -0
- data/lib/{html-proofer → html_proofer}/configuration.rb +30 -30
- data/lib/html_proofer/element.rb +122 -0
- data/lib/html_proofer/failure.rb +17 -0
- data/lib/{html-proofer → html_proofer}/log.rb +0 -0
- data/lib/html_proofer/reporter/cli.rb +29 -0
- data/lib/html_proofer/reporter.rb +23 -0
- data/lib/html_proofer/runner.rb +245 -0
- data/lib/html_proofer/url_validator/external.rb +189 -0
- data/lib/html_proofer/url_validator/internal.rb +86 -0
- data/lib/html_proofer/url_validator.rb +16 -0
- data/lib/{html-proofer → html_proofer}/utils.rb +6 -9
- data/lib/{html-proofer → html_proofer}/version.rb +1 -1
- data/lib/html_proofer/xpath_functions.rb +10 -0
- data/lib/html_proofer.rb +56 -0
- metadata +44 -37
- data/lib/html-proofer/cache.rb +0 -194
- data/lib/html-proofer/check/favicon.rb +0 -29
- data/lib/html-proofer/check/html.rb +0 -37
- data/lib/html-proofer/check/images.rb +0 -48
- data/lib/html-proofer/check/links.rb +0 -182
- data/lib/html-proofer/check/opengraph.rb +0 -46
- data/lib/html-proofer/check/scripts.rb +0 -42
- data/lib/html-proofer/check.rb +0 -75
- data/lib/html-proofer/element.rb +0 -261
- data/lib/html-proofer/issue.rb +0 -65
- data/lib/html-proofer/middleware.rb +0 -82
- data/lib/html-proofer/runner.rb +0 -248
- data/lib/html-proofer/url_validator.rb +0 -237
@@ -0,0 +1,245 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module HTMLProofer
|
4
|
+
class Runner
|
5
|
+
include HTMLProofer::Utils
|
6
|
+
|
7
|
+
attr_reader :options, :cache, :logger, :internal_urls, :external_urls, :checked_paths, :current_check
|
8
|
+
attr_accessor :current_path, :current_source, :reporter
|
9
|
+
|
10
|
+
URL_TYPES = %i[external internal].freeze
|
11
|
+
|
12
|
+
def initialize(src, opts = {})
|
13
|
+
@options = HTMLProofer::Configuration.generate_defaults(opts)
|
14
|
+
|
15
|
+
@type = @options.delete(:type)
|
16
|
+
@source = src
|
17
|
+
|
18
|
+
@logger = HTMLProofer::Log.new(@options[:log_level])
|
19
|
+
@cache = Cache.new(self, @options[:cache])
|
20
|
+
|
21
|
+
@external_urls = {}
|
22
|
+
@internal_urls = {}
|
23
|
+
@failures = []
|
24
|
+
|
25
|
+
@before_request = []
|
26
|
+
|
27
|
+
@checked_paths = {}
|
28
|
+
|
29
|
+
@current_check = nil
|
30
|
+
@current_source = nil
|
31
|
+
@current_path = nil
|
32
|
+
|
33
|
+
@reporter = Reporter::Cli.new(logger: @logger)
|
34
|
+
end
|
35
|
+
|
36
|
+
def run
|
37
|
+
check_text = pluralize(checks.length, 'check', 'checks')
|
38
|
+
|
39
|
+
if @type == :links
|
40
|
+
@logger.log :info, "Running #{check_text} (#{format_checks_list(checks)}) on #{@source} ... \n\n"
|
41
|
+
check_list_of_links unless @options[:disable_external]
|
42
|
+
else
|
43
|
+
@logger.log :info, "Running #{check_text} (#{format_checks_list(checks)}) in #{@source} on *#{@options[:extensions].join(', ')} files...\n\n"
|
44
|
+
|
45
|
+
check_files
|
46
|
+
@logger.log :info, "Ran on #{pluralize(files.length, 'file', 'files')}!\n\n"
|
47
|
+
end
|
48
|
+
|
49
|
+
@cache.write
|
50
|
+
|
51
|
+
@reporter.failures = @failures
|
52
|
+
|
53
|
+
if @failures.empty?
|
54
|
+
@logger.log :info, 'HTML-Proofer finished successfully.'
|
55
|
+
else
|
56
|
+
@failures.uniq!
|
57
|
+
report_failed_checks
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def check_list_of_links
|
62
|
+
@external_urls = @source.uniq.each_with_object({}) do |link, hash|
|
63
|
+
url = Attribute::Url.new(self, link, base_url: nil).to_s
|
64
|
+
|
65
|
+
hash[url] = []
|
66
|
+
end
|
67
|
+
|
68
|
+
validate_external_urls
|
69
|
+
end
|
70
|
+
|
71
|
+
# Walks over each implemented check and runs them on the files, in parallel.
|
72
|
+
# Sends the collected external URLs to Typhoeus for batch processing.
|
73
|
+
def check_files
|
74
|
+
process_files.each do |result|
|
75
|
+
URL_TYPES.each do |url_type|
|
76
|
+
type = :"#{url_type}_urls"
|
77
|
+
ivar_name = "@#{type}"
|
78
|
+
ivar = instance_variable_get(ivar_name)
|
79
|
+
|
80
|
+
if ivar.empty?
|
81
|
+
instance_variable_set(ivar_name, result[type])
|
82
|
+
else
|
83
|
+
result[type].each do |url, metadata|
|
84
|
+
ivar[url] = [] if ivar[url].nil?
|
85
|
+
ivar[url].concat(metadata)
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
@failures.concat(result[:failures])
|
90
|
+
end
|
91
|
+
|
92
|
+
validate_external_urls unless @options[:disable_external]
|
93
|
+
|
94
|
+
validate_internal_urls
|
95
|
+
end
|
96
|
+
|
97
|
+
# Walks over each implemented check and runs them on the files, in parallel.
|
98
|
+
def process_files
|
99
|
+
if @options[:parallel][:enable]
|
100
|
+
Parallel.map(files, @options[:parallel]) { |path| load_file(path) }
|
101
|
+
else
|
102
|
+
files.map { |path| load_file(path) }
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
def load_file(path)
|
107
|
+
@html = create_nokogiri(path)
|
108
|
+
check_parsed(path)
|
109
|
+
end
|
110
|
+
|
111
|
+
# Collects any external URLs found in a directory of files. Also collectes
|
112
|
+
# every failed test from process_files.
|
113
|
+
def check_parsed(path)
|
114
|
+
result = { internal_urls: {}, external_urls: {}, failures: [] }
|
115
|
+
|
116
|
+
@source = [@source] if @type == :file
|
117
|
+
|
118
|
+
@source.each do |current_source|
|
119
|
+
checks.each do |klass|
|
120
|
+
@current_source = current_source
|
121
|
+
@current_path = path
|
122
|
+
|
123
|
+
check = Object.const_get(klass).new(self, @html)
|
124
|
+
@logger.log :debug, "Running #{check.short_name} in #{path}"
|
125
|
+
|
126
|
+
@current_check = check
|
127
|
+
|
128
|
+
check.run
|
129
|
+
|
130
|
+
result[:external_urls].merge!(check.external_urls)
|
131
|
+
result[:internal_urls].merge!(check.internal_urls)
|
132
|
+
result[:failures].concat(check.failures)
|
133
|
+
end
|
134
|
+
end
|
135
|
+
result
|
136
|
+
end
|
137
|
+
|
138
|
+
def validate_external_urls
|
139
|
+
external_url_validator = HTMLProofer::UrlValidator::External.new(self, @external_urls)
|
140
|
+
external_url_validator.before_request = @before_request
|
141
|
+
@failures.concat(external_url_validator.validate)
|
142
|
+
end
|
143
|
+
|
144
|
+
def validate_internal_urls
|
145
|
+
internal_link_validator = HTMLProofer::UrlValidator::Internal.new(self, @internal_urls)
|
146
|
+
@failures.concat(internal_link_validator.validate)
|
147
|
+
end
|
148
|
+
|
149
|
+
def files
|
150
|
+
@files ||= if @type == :directory
|
151
|
+
@source.map do |src|
|
152
|
+
pattern = File.join(src, '**', "*{#{@options[:extensions].join(',')}}")
|
153
|
+
Dir.glob(pattern).select { |f| File.file?(f) && !ignore_file?(f) }
|
154
|
+
end.flatten
|
155
|
+
elsif @type == :file && @options[:extensions].include?(File.extname(@source))
|
156
|
+
[@source].reject { |f| ignore_file?(f) }
|
157
|
+
else
|
158
|
+
[]
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
def ignore_file?(file)
|
163
|
+
@options[:ignore_files].each do |pattern|
|
164
|
+
return true if pattern.is_a?(String) && pattern == file
|
165
|
+
return true if pattern.is_a?(Regexp) && pattern =~ file
|
166
|
+
end
|
167
|
+
|
168
|
+
false
|
169
|
+
end
|
170
|
+
|
171
|
+
def check_sri?
|
172
|
+
@options[:check_sri]
|
173
|
+
end
|
174
|
+
|
175
|
+
def enforce_https?
|
176
|
+
@options[:enforce_https]
|
177
|
+
end
|
178
|
+
|
179
|
+
def checks
|
180
|
+
return @checks if defined?(@checks) && !@checks.nil?
|
181
|
+
|
182
|
+
return (@checks = ['LinkCheck']) if @type == :links
|
183
|
+
|
184
|
+
@checks = HTMLProofer::Check.subchecks(@options).map(&:name)
|
185
|
+
|
186
|
+
@checks
|
187
|
+
end
|
188
|
+
|
189
|
+
def failed_checks
|
190
|
+
@reporter.failures.flatten.select { |f| f.is_a?(Failure) }
|
191
|
+
end
|
192
|
+
|
193
|
+
def report_failed_checks
|
194
|
+
@reporter.report
|
195
|
+
|
196
|
+
failure_text = pluralize(@failures.length, 'failure', 'failures')
|
197
|
+
@logger.log :fatal, "\nHTML-Proofer found #{failure_text}!"
|
198
|
+
exit 1
|
199
|
+
end
|
200
|
+
|
201
|
+
# Set before_request callback.
|
202
|
+
#
|
203
|
+
# @example Set before_request.
|
204
|
+
# request.before_request { |request| p "yay" }
|
205
|
+
#
|
206
|
+
# @param [ Block ] block The block to execute.
|
207
|
+
#
|
208
|
+
# @yield [ Typhoeus::Request ]
|
209
|
+
#
|
210
|
+
# @return [ Array<Block> ] All before_request blocks.
|
211
|
+
def before_request(&block)
|
212
|
+
@before_request ||= []
|
213
|
+
@before_request << block if block
|
214
|
+
@before_request
|
215
|
+
end
|
216
|
+
|
217
|
+
def load_internal_cache
|
218
|
+
load_cache(:internal)
|
219
|
+
end
|
220
|
+
|
221
|
+
def load_external_cache
|
222
|
+
load_cache(:external)
|
223
|
+
end
|
224
|
+
|
225
|
+
private def load_cache(type)
|
226
|
+
ivar = instance_variable_get("@#{type}_urls")
|
227
|
+
|
228
|
+
existing_urls_count = @cache.size(type)
|
229
|
+
cache_text = pluralize(existing_urls_count, "#{type} link", "#{type} links")
|
230
|
+
@logger.log :debug, "Found #{cache_text} in the cache"
|
231
|
+
|
232
|
+
urls_to_check = @cache.retrieve_urls(ivar, type)
|
233
|
+
urls_detected = pluralize(urls_to_check.count, "#{type} link", "#{type} links")
|
234
|
+
@logger.log :info, "Checking #{urls_detected}"
|
235
|
+
|
236
|
+
urls_to_check
|
237
|
+
end
|
238
|
+
|
239
|
+
private def format_checks_list(checks)
|
240
|
+
checks.map do |check|
|
241
|
+
check.sub(/HTMLProofer::Check::/, '')
|
242
|
+
end.join(', ')
|
243
|
+
end
|
244
|
+
end
|
245
|
+
end
|
@@ -0,0 +1,189 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'typhoeus'
|
4
|
+
require 'uri'
|
5
|
+
|
6
|
+
module HTMLProofer
|
7
|
+
class UrlValidator::External < UrlValidator
|
8
|
+
include HTMLProofer::Utils
|
9
|
+
|
10
|
+
attr_reader :external_urls
|
11
|
+
attr_writer :before_request
|
12
|
+
|
13
|
+
def initialize(runner, external_urls)
|
14
|
+
super(runner)
|
15
|
+
|
16
|
+
@external_urls = external_urls
|
17
|
+
@hydra = Typhoeus::Hydra.new(@runner.options[:hydra])
|
18
|
+
@before_request = []
|
19
|
+
|
20
|
+
@paths_with_queries = {}
|
21
|
+
end
|
22
|
+
|
23
|
+
def validate
|
24
|
+
if @cache.enabled?
|
25
|
+
urls_to_check = @runner.load_external_cache
|
26
|
+
run_external_link_checker(urls_to_check)
|
27
|
+
else
|
28
|
+
run_external_link_checker(@external_urls)
|
29
|
+
end
|
30
|
+
|
31
|
+
@failed_checks
|
32
|
+
end
|
33
|
+
|
34
|
+
# Proofer runs faster if we pull out all the external URLs and run the checks
|
35
|
+
# at the end. Otherwise, we're halting the consuming process for every file during
|
36
|
+
# `process_files`.
|
37
|
+
#
|
38
|
+
# In addition, sorting the list lets libcurl keep connections to the same hosts alive.
|
39
|
+
#
|
40
|
+
# Finally, we'll first make a HEAD request, rather than GETing all the contents.
|
41
|
+
# If the HEAD fails, we'll fall back to GET, as some servers are not configured
|
42
|
+
# for HEAD. If we've decided to check for hashes, we must do a GET--HEAD is
|
43
|
+
# not available as an option.
|
44
|
+
def run_external_link_checker(external_urls)
|
45
|
+
# Route log from Typhoeus/Ethon to our own logger
|
46
|
+
Ethon.logger = @logger
|
47
|
+
|
48
|
+
external_urls.each_pair do |external_url, metadata|
|
49
|
+
url = Attribute::Url.new(@runner, external_url, base_url: nil)
|
50
|
+
|
51
|
+
unless url.valid?
|
52
|
+
add_failure(metadata, "#{url} is an invalid URL", 0)
|
53
|
+
next
|
54
|
+
end
|
55
|
+
|
56
|
+
next unless new_url_query_values?(url)
|
57
|
+
|
58
|
+
method = if @runner.options[:check_external_hash] && url.hash?
|
59
|
+
:get
|
60
|
+
else
|
61
|
+
:head
|
62
|
+
end
|
63
|
+
|
64
|
+
queue_request(method, url, metadata)
|
65
|
+
end
|
66
|
+
|
67
|
+
@hydra.run
|
68
|
+
end
|
69
|
+
|
70
|
+
def queue_request(method, url, filenames)
|
71
|
+
opts = @runner.options[:typhoeus].merge(method: method)
|
72
|
+
request = Typhoeus::Request.new(url.url, opts)
|
73
|
+
@before_request.each do |callback|
|
74
|
+
callback.call(request)
|
75
|
+
end
|
76
|
+
request.on_complete { |response| response_handler(response, url, filenames) }
|
77
|
+
@hydra.queue request
|
78
|
+
end
|
79
|
+
|
80
|
+
def response_handler(response, url, filenames)
|
81
|
+
method = response.request.options[:method]
|
82
|
+
href = response.request.base_url.to_s
|
83
|
+
response_code = response.code
|
84
|
+
response.body.delete!("\x00")
|
85
|
+
|
86
|
+
@logger.log :debug, "Received a #{response_code} for #{href}"
|
87
|
+
|
88
|
+
return if @runner.options[:ignore_status_codes].include?(response_code)
|
89
|
+
|
90
|
+
if response_code.between?(200, 299)
|
91
|
+
@cache.add_external(href, filenames, response_code, 'OK') unless check_hash_in_2xx_response(href, url, response, filenames)
|
92
|
+
elsif response.timed_out?
|
93
|
+
handle_timeout(href, filenames, response_code)
|
94
|
+
elsif response_code.zero?
|
95
|
+
handle_connection_failure(href, filenames, response_code, response.status_message)
|
96
|
+
elsif method == :head # some servers don't support HEAD
|
97
|
+
queue_request(:get, url, filenames)
|
98
|
+
else
|
99
|
+
return if @runner.options[:only_4xx] && !response_code.between?(400, 499)
|
100
|
+
|
101
|
+
# Received a non-successful http response.
|
102
|
+
status_message = blank?(response.status_message) ? '' : ": #{response.status_message}"
|
103
|
+
msg = "External link #{href} failed#{status_message}"
|
104
|
+
add_failure(filenames, msg, response_code)
|
105
|
+
@cache.add_external(href, filenames, response_code, msg)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
# Even though the response was a success, we may have been asked to check
|
110
|
+
# if the hash on the URL exists on the page
|
111
|
+
def check_hash_in_2xx_response(href, url, response, filenames)
|
112
|
+
return false if @runner.options[:only_4xx]
|
113
|
+
return false unless @runner.options[:check_external_hash]
|
114
|
+
return false unless url.hash?
|
115
|
+
|
116
|
+
hash = url.hash
|
117
|
+
|
118
|
+
body_doc = create_nokogiri(response.body)
|
119
|
+
|
120
|
+
unencoded_hash = Addressable::URI.unescape(hash)
|
121
|
+
xpath = [%(//*[@name="#{hash}"]|/*[@name="#{unencoded_hash}"]|//*[@id="#{hash}"]|//*[@id="#{unencoded_hash}"])]
|
122
|
+
# user-content is a special addition by GitHub.
|
123
|
+
if url.host =~ /github\.com/i
|
124
|
+
xpath << [%(//*[@name="user-content-#{hash}"]|//*[@id="user-content-#{hash}"])]
|
125
|
+
# when linking to a file on GitHub, like #L12-L34, only the first "L" portion
|
126
|
+
# will be identified as a linkable portion
|
127
|
+
xpath << [%(//td[@id="#{Regexp.last_match[1]}"])] if hash =~ /\A(L\d)+/
|
128
|
+
end
|
129
|
+
|
130
|
+
return unless body_doc.xpath(xpath.join('|')).empty?
|
131
|
+
|
132
|
+
msg = "External link #{href} failed: #{url.sans_hash} exists, but the hash '#{hash}' does not"
|
133
|
+
add_failure(filenames, msg, response.code)
|
134
|
+
@cache.add_external(href, filenames, response.code, msg)
|
135
|
+
true
|
136
|
+
end
|
137
|
+
|
138
|
+
def handle_timeout(href, filenames, response_code)
|
139
|
+
msg = "External link #{href} failed: got a time out (response code #{response_code})"
|
140
|
+
@cache.add_external(href, filenames, 0, msg)
|
141
|
+
return if @runner.options[:only_4xx]
|
142
|
+
|
143
|
+
add_failure(filenames, msg, response_code)
|
144
|
+
end
|
145
|
+
|
146
|
+
def handle_connection_failure(href, metadata, response_code, status_message)
|
147
|
+
msgs = [<<~MSG
|
148
|
+
External link #{href} failed with something very wrong.
|
149
|
+
It's possible libcurl couldn't connect to the server, or perhaps the request timed out.
|
150
|
+
Sometimes, making too many requests at once also breaks things.
|
151
|
+
MSG
|
152
|
+
]
|
153
|
+
|
154
|
+
msgs << "Either way, the return message from the server is: #{status_message}" unless blank?(status_message)
|
155
|
+
|
156
|
+
msg = msgs.join("\n").chomp
|
157
|
+
|
158
|
+
@cache.add_external(href, metadata, 0, msg)
|
159
|
+
return if @runner.options[:only_4xx]
|
160
|
+
|
161
|
+
add_failure(metadata, msg, response_code)
|
162
|
+
end
|
163
|
+
|
164
|
+
def add_failure(metadata, description, status = nil)
|
165
|
+
if blank?(metadata) # possible if we're checking an array of links
|
166
|
+
@failed_checks << Failure.new('', 'Links > External', description, status: status)
|
167
|
+
else
|
168
|
+
metadata.each { |m| @failed_checks << Failure.new(m[:filename], 'Links > External', description, line: m[:line], status: status) }
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
# remember queries we've seen, ignore future ones
|
173
|
+
private def new_url_query_values?(url)
|
174
|
+
return true if (query_values = url.query_values).nil?
|
175
|
+
|
176
|
+
queries = query_values.keys.join('-')
|
177
|
+
domain_path = url.domain_path
|
178
|
+
if @paths_with_queries[domain_path].nil?
|
179
|
+
@paths_with_queries[domain_path] = [queries]
|
180
|
+
true
|
181
|
+
elsif !@paths_with_queries[domain_path].include?(queries)
|
182
|
+
@paths_with_queries[domain_path] << queries
|
183
|
+
true
|
184
|
+
else
|
185
|
+
false
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module HTMLProofer
|
4
|
+
class UrlValidator::Internal < UrlValidator
|
5
|
+
attr_reader :internal_urls
|
6
|
+
|
7
|
+
def initialize(runner, internal_urls)
|
8
|
+
super(runner)
|
9
|
+
|
10
|
+
@internal_urls = internal_urls
|
11
|
+
end
|
12
|
+
|
13
|
+
def validate
|
14
|
+
if @cache.enabled?
|
15
|
+
urls_to_check = @runner.load_internal_cache
|
16
|
+
run_internal_link_checker(urls_to_check)
|
17
|
+
else
|
18
|
+
run_internal_link_checker(@internal_urls)
|
19
|
+
end
|
20
|
+
|
21
|
+
@failed_checks
|
22
|
+
end
|
23
|
+
|
24
|
+
def run_internal_link_checker(links)
|
25
|
+
links.each_pair do |link, matched_files|
|
26
|
+
matched_files.each do |metadata|
|
27
|
+
url = HTMLProofer::Attribute::Url.new(@runner, link, base_url: metadata[:base_url])
|
28
|
+
|
29
|
+
@runner.current_source = metadata[:source]
|
30
|
+
@runner.current_path = metadata[:current_path]
|
31
|
+
|
32
|
+
unless file_exists?(url)
|
33
|
+
@failed_checks << Failure.new(@runner.current_path, 'Links > Internal', "internally linking to #{url}, which does not exist", line: metadata[:line], status: nil, content: nil)
|
34
|
+
@cache.add_internal(url.to_s, metadata, false)
|
35
|
+
next
|
36
|
+
end
|
37
|
+
|
38
|
+
unless hash_exists?(url)
|
39
|
+
@failed_checks << Failure.new(@runner.current_path, 'Links > Internal', "internally linking to #{url}; the file exists, but the hash '#{url.hash}' does not", line: metadata[:line], status: nil, content: nil)
|
40
|
+
@cache.add_internal(url.to_s, metadata, false)
|
41
|
+
next
|
42
|
+
end
|
43
|
+
|
44
|
+
@cache.add_internal(url.to_s, metadata, true)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
@failed_checks
|
49
|
+
end
|
50
|
+
|
51
|
+
private def file_exists?(url)
|
52
|
+
absolute_path = url.absolute_path
|
53
|
+
return @runner.checked_paths[url.absolute_path] if @runner.checked_paths.key?(absolute_path)
|
54
|
+
|
55
|
+
@runner.checked_paths[url.absolute_path] = File.exist?(absolute_path)
|
56
|
+
end
|
57
|
+
|
58
|
+
# verify the target hash
|
59
|
+
private def hash_exists?(url)
|
60
|
+
href_hash = url.hash
|
61
|
+
return true if blank?(href_hash)
|
62
|
+
|
63
|
+
# prevents searching files we didn't ask about
|
64
|
+
return false unless url.known_extension?
|
65
|
+
|
66
|
+
decoded_href_hash = Addressable::URI.unescape(href_hash)
|
67
|
+
fragment_ids = [href_hash, decoded_href_hash]
|
68
|
+
# https://www.w3.org/TR/html5/single-page.html#scroll-to-fragid
|
69
|
+
fragment_ids.include?('top') || !find_fragments(fragment_ids, url).empty?
|
70
|
+
end
|
71
|
+
|
72
|
+
private def find_fragments(fragment_ids, url)
|
73
|
+
xpaths = fragment_ids.uniq.flat_map do |frag_id|
|
74
|
+
escaped_frag_id = "'#{frag_id.split("'").join("', \"'\", '")}', ''"
|
75
|
+
[
|
76
|
+
"//*[case_sensitive_equals(@id, concat(#{escaped_frag_id}))]",
|
77
|
+
"//*[case_sensitive_equals(@name, concat(#{escaped_frag_id}))]"
|
78
|
+
]
|
79
|
+
end
|
80
|
+
xpaths << XpathFunctions.new
|
81
|
+
|
82
|
+
html = create_nokogiri(url.absolute_path)
|
83
|
+
html.xpath(*xpaths)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module HTMLProofer
|
4
|
+
class UrlValidator
|
5
|
+
include HTMLProofer::Utils
|
6
|
+
|
7
|
+
def initialize(runner)
|
8
|
+
@runner = runner
|
9
|
+
|
10
|
+
@cache = @runner.cache
|
11
|
+
@logger = @runner.logger
|
12
|
+
|
13
|
+
@failed_checks = []
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require '
|
3
|
+
require 'nokogiri'
|
4
4
|
|
5
5
|
module HTMLProofer
|
6
6
|
module Utils
|
@@ -8,21 +8,18 @@ module HTMLProofer
|
|
8
8
|
"#{count} #{count == 1 ? single : plural}"
|
9
9
|
end
|
10
10
|
|
11
|
+
def blank?(obj)
|
12
|
+
obj.nil? || obj.empty?
|
13
|
+
end
|
14
|
+
|
11
15
|
def create_nokogiri(path)
|
12
16
|
content = if File.exist?(path) && !File.directory?(path)
|
13
|
-
File.
|
17
|
+
File.read(path)
|
14
18
|
else
|
15
19
|
path
|
16
20
|
end
|
17
21
|
|
18
22
|
Nokogiri::HTML5(content, max_errors: -1)
|
19
23
|
end
|
20
|
-
|
21
|
-
def swap(href, replacement)
|
22
|
-
replacement.each do |link, replace|
|
23
|
-
href = href.gsub(link, replace)
|
24
|
-
end
|
25
|
-
href
|
26
|
-
end
|
27
24
|
end
|
28
25
|
end
|
data/lib/html_proofer.rb
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'zeitwerk'
|
4
|
+
lib_dir = File.join(File.dirname(__dir__), 'lib')
|
5
|
+
gem_loader = Zeitwerk::Loader.for_gem
|
6
|
+
gem_loader.inflector.inflect(
|
7
|
+
'html_proofer' => 'HTMLProofer'
|
8
|
+
)
|
9
|
+
gem_loader.ignore(File.join(lib_dir, 'html-proofer.rb'))
|
10
|
+
gem_loader.setup
|
11
|
+
|
12
|
+
require 'html_proofer/version'
|
13
|
+
|
14
|
+
require 'parallel'
|
15
|
+
require 'fileutils'
|
16
|
+
|
17
|
+
%w[awesome_print debug].each do |gem|
|
18
|
+
require gem
|
19
|
+
rescue LoadError; # rubocop:disable Lint/SuppressedException
|
20
|
+
end
|
21
|
+
module HTMLProofer
|
22
|
+
def self.check_file(file, options = {})
|
23
|
+
raise ArgumentError unless file.is_a?(String)
|
24
|
+
raise ArgumentError, "#{file} does not exist" unless File.exist?(file)
|
25
|
+
|
26
|
+
options[:type] = :file
|
27
|
+
HTMLProofer::Runner.new(file, options)
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.check_directory(directory, options = {})
|
31
|
+
raise ArgumentError unless directory.is_a?(String)
|
32
|
+
raise ArgumentError, "#{directory} does not exist" unless Dir.exist?(directory)
|
33
|
+
|
34
|
+
options[:type] = :directory
|
35
|
+
HTMLProofer::Runner.new([directory], options)
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.check_directories(directories, options = {})
|
39
|
+
raise ArgumentError unless directories.is_a?(Array)
|
40
|
+
|
41
|
+
options[:type] = :directory
|
42
|
+
directories.each do |directory|
|
43
|
+
raise ArgumentError, "#{directory} does not exist" unless Dir.exist?(directory)
|
44
|
+
end
|
45
|
+
HTMLProofer::Runner.new(directories, options)
|
46
|
+
end
|
47
|
+
|
48
|
+
def self.check_links(links, options = {})
|
49
|
+
raise ArgumentError unless links.is_a?(Array)
|
50
|
+
|
51
|
+
options[:type] = :links
|
52
|
+
HTMLProofer::Runner.new(links, options)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
gem_loader.eager_load
|