html-proofer 4.0.0.rc2 → 4.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/htmlproofer +3 -2
- data/lib/html-proofer.rb +1 -1
- data/lib/html_proofer/attribute/url.rb +186 -174
- data/lib/html_proofer/cache.rb +128 -85
- data/lib/html_proofer/check/favicon.rb +29 -24
- data/lib/html_proofer/check/images.rb +78 -47
- data/lib/html_proofer/check/links.rb +109 -98
- data/lib/html_proofer/check/open_graph.rb +30 -25
- data/lib/html_proofer/check/scripts.rb +36 -28
- data/lib/html_proofer/check.rb +11 -10
- data/lib/html_proofer/configuration.rb +16 -15
- data/lib/html_proofer/element.rb +19 -19
- data/lib/html_proofer/log.rb +19 -19
- data/lib/html_proofer/reporter/cli.rb +22 -18
- data/lib/html_proofer/reporter.rb +3 -3
- data/lib/html_proofer/runner.rb +45 -44
- data/lib/html_proofer/url_validator/external.rb +157 -152
- data/lib/html_proofer/url_validator/internal.rb +72 -62
- data/lib/html_proofer/utils.rb +5 -5
- data/lib/html_proofer/version.rb +1 -1
- data/lib/html_proofer.rb +11 -10
- metadata +22 -7
data/lib/html_proofer/log.rb
CHANGED
@@ -1,21 +1,21 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require
|
4
|
-
require
|
3
|
+
require "yell"
|
4
|
+
require "rainbow"
|
5
5
|
|
6
6
|
module HTMLProofer
|
7
7
|
class Log
|
8
8
|
include Yell::Loggable
|
9
9
|
|
10
|
-
STDOUT_LEVELS =
|
11
|
-
STDERR_LEVELS =
|
10
|
+
STDOUT_LEVELS = [:debug, :info, :warn].freeze
|
11
|
+
STDERR_LEVELS = [:error, :fatal].freeze
|
12
12
|
|
13
13
|
def initialize(log_level)
|
14
14
|
@logger = Yell.new(format: false, \
|
15
|
-
|
16
|
-
|
17
|
-
l.adapter
|
18
|
-
l.adapter
|
15
|
+
name: "HTMLProofer", \
|
16
|
+
level: "gte.#{log_level}") do |l|
|
17
|
+
l.adapter(:stdout, level: "lte.warn")
|
18
|
+
l.adapter(:stderr, level: "gte.error")
|
19
19
|
end
|
20
20
|
end
|
21
21
|
|
@@ -24,23 +24,23 @@ module HTMLProofer
|
|
24
24
|
end
|
25
25
|
|
26
26
|
def log_with_color(level, message)
|
27
|
-
@logger.send
|
27
|
+
@logger.send(level, colorize(level, message))
|
28
28
|
end
|
29
29
|
|
30
30
|
def colorize(level, message)
|
31
31
|
color = case level
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
32
|
+
when :debug
|
33
|
+
:cyan
|
34
|
+
when :info
|
35
|
+
:blue
|
36
|
+
when :warn
|
37
|
+
:yellow
|
38
|
+
when :error, :fatal
|
39
|
+
:red
|
40
|
+
end
|
41
41
|
|
42
42
|
if (STDOUT_LEVELS.include?(level) && $stdout.isatty) || \
|
43
|
-
|
43
|
+
(STDERR_LEVELS.include?(level) && $stderr.isatty)
|
44
44
|
Rainbow(message).send(color)
|
45
45
|
else
|
46
46
|
message
|
@@ -1,29 +1,33 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
3
|
+
module HTMLProofer
|
4
|
+
class Reporter
|
5
|
+
class Cli < HTMLProofer::Reporter
|
6
|
+
def report
|
7
|
+
msg = failures.each_with_object([]) do |(check_name, failures), arr|
|
8
|
+
str = ["For the #{check_name} check, the following failures were found:\n"]
|
7
9
|
|
8
|
-
|
9
|
-
|
10
|
+
failures.each do |failure|
|
11
|
+
path_str = blank?(failure.path) ? "" : "At #{failure.path}"
|
10
12
|
|
11
|
-
|
13
|
+
line_str = failure.line.nil? ? "" : ":#{failure.line}"
|
12
14
|
|
13
|
-
|
14
|
-
|
15
|
+
path_and_line = "#{path_str}#{line_str}"
|
16
|
+
path_and_line = blank?(path_and_line) ? "" : "* #{path_and_line}:\n\n"
|
15
17
|
|
16
|
-
|
18
|
+
status_str = failure.status.nil? ? "" : " (status code #{failure.status})"
|
17
19
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
20
|
+
indent = blank?(path_and_line) ? "* " : " "
|
21
|
+
str << <<~MSG
|
22
|
+
#{path_and_line}#{indent}#{failure.description}#{status_str}
|
23
|
+
MSG
|
24
|
+
end
|
23
25
|
|
24
|
-
|
25
|
-
|
26
|
+
arr << str.join("\n")
|
27
|
+
end
|
26
28
|
|
27
|
-
|
29
|
+
@logger.log(:error, msg.join("\n"))
|
30
|
+
end
|
31
|
+
end
|
28
32
|
end
|
29
33
|
end
|
@@ -12,12 +12,12 @@ module HTMLProofer
|
|
12
12
|
|
13
13
|
def failures=(failures)
|
14
14
|
@failures = failures.group_by(&:check_name) \
|
15
|
-
|
16
|
-
|
15
|
+
.transform_values { |issues| issues.sort_by { |issue| [issue.path, issue.line] } } \
|
16
|
+
.sort
|
17
17
|
end
|
18
18
|
|
19
19
|
def report
|
20
|
-
raise NotImplementedError,
|
20
|
+
raise NotImplementedError, "HTMLProofer::Reporter subclasses must implement #report"
|
21
21
|
end
|
22
22
|
end
|
23
23
|
end
|
data/lib/html_proofer/runner.rb
CHANGED
@@ -5,9 +5,9 @@ module HTMLProofer
|
|
5
5
|
include HTMLProofer::Utils
|
6
6
|
|
7
7
|
attr_reader :options, :cache, :logger, :internal_urls, :external_urls, :checked_paths, :current_check
|
8
|
-
attr_accessor :
|
8
|
+
attr_accessor :current_filename, :current_source, :reporter
|
9
9
|
|
10
|
-
URL_TYPES =
|
10
|
+
URL_TYPES = [:external, :internal].freeze
|
11
11
|
|
12
12
|
def initialize(src, opts = {})
|
13
13
|
@options = HTMLProofer::Configuration.generate_defaults(opts)
|
@@ -28,22 +28,23 @@ module HTMLProofer
|
|
28
28
|
|
29
29
|
@current_check = nil
|
30
30
|
@current_source = nil
|
31
|
-
@
|
31
|
+
@current_filename = nil
|
32
32
|
|
33
33
|
@reporter = Reporter::Cli.new(logger: @logger)
|
34
34
|
end
|
35
35
|
|
36
36
|
def run
|
37
|
-
check_text = pluralize(checks.length,
|
37
|
+
check_text = pluralize(checks.length, "check", "checks")
|
38
38
|
|
39
39
|
if @type == :links
|
40
|
-
@logger.log
|
40
|
+
@logger.log(:info, "Running #{check_text} (#{format_checks_list(checks)}) on #{@source} ... \n\n")
|
41
41
|
check_list_of_links unless @options[:disable_external]
|
42
42
|
else
|
43
|
-
@logger.log
|
43
|
+
@logger.log(:info,
|
44
|
+
"Running #{check_text} (#{format_checks_list(checks)}) in #{@source} on *#{@options[:extensions].join(", ")} files...\n\n")
|
44
45
|
|
45
46
|
check_files
|
46
|
-
@logger.log
|
47
|
+
@logger.log(:info, "Ran on #{pluralize(files.length, "file", "files")}!\n\n")
|
47
48
|
end
|
48
49
|
|
49
50
|
@cache.write
|
@@ -51,7 +52,7 @@ module HTMLProofer
|
|
51
52
|
@reporter.failures = @failures
|
52
53
|
|
53
54
|
if @failures.empty?
|
54
|
-
@logger.log
|
55
|
+
@logger.log(:info, "HTML-Proofer finished successfully.")
|
55
56
|
else
|
56
57
|
@failures.uniq!
|
57
58
|
report_failed_checks
|
@@ -97,40 +98,38 @@ module HTMLProofer
|
|
97
98
|
# Walks over each implemented check and runs them on the files, in parallel.
|
98
99
|
def process_files
|
99
100
|
if @options[:parallel][:enable]
|
100
|
-
Parallel.map(files, @options[:parallel]) { |
|
101
|
+
Parallel.map(files, @options[:parallel]) { |file| load_file(file[:path], file[:source]) }
|
101
102
|
else
|
102
|
-
files.map
|
103
|
+
files.map do |file|
|
104
|
+
load_file(file[:path], file[:source])
|
105
|
+
end
|
103
106
|
end
|
104
107
|
end
|
105
108
|
|
106
|
-
def load_file(path)
|
109
|
+
def load_file(path, source)
|
107
110
|
@html = create_nokogiri(path)
|
108
|
-
check_parsed(path)
|
111
|
+
check_parsed(path, source)
|
109
112
|
end
|
110
113
|
|
111
114
|
# Collects any external URLs found in a directory of files. Also collectes
|
112
115
|
# every failed test from process_files.
|
113
|
-
def check_parsed(path)
|
116
|
+
def check_parsed(path, source)
|
114
117
|
result = { internal_urls: {}, external_urls: {}, failures: [] }
|
115
118
|
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
checks.each do |klass|
|
120
|
-
@current_source = current_source
|
121
|
-
@current_path = path
|
119
|
+
checks.each do |klass|
|
120
|
+
@current_source = source
|
121
|
+
@current_filename = path
|
122
122
|
|
123
|
-
|
124
|
-
|
123
|
+
check = Object.const_get(klass).new(self, @html)
|
124
|
+
@logger.log(:debug, "Running #{check.short_name} in #{path}")
|
125
125
|
|
126
|
-
|
126
|
+
@current_check = check
|
127
127
|
|
128
|
-
|
128
|
+
check.run
|
129
129
|
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
end
|
130
|
+
result[:external_urls].merge!(check.external_urls) { |_key, old, current| old.concat(current) }
|
131
|
+
result[:internal_urls].merge!(check.internal_urls) { |_key, old, current| old.concat(current) }
|
132
|
+
result[:failures].concat(check.failures)
|
134
133
|
end
|
135
134
|
result
|
136
135
|
end
|
@@ -148,15 +147,17 @@ module HTMLProofer
|
|
148
147
|
|
149
148
|
def files
|
150
149
|
@files ||= if @type == :directory
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
150
|
+
@source.map do |src|
|
151
|
+
pattern = File.join(src, "**", "*{#{@options[:extensions].join(",")}}")
|
152
|
+
Dir.glob(pattern).select do |f|
|
153
|
+
File.file?(f) && !ignore_file?(f)
|
154
|
+
end.map { |f| { source: src, path: f } }
|
155
|
+
end.flatten
|
156
|
+
elsif @type == :file && @options[:extensions].include?(File.extname(@source))
|
157
|
+
[@source].reject { |f| ignore_file?(f) }.map { |f| { source: f, path: f } }
|
158
|
+
else
|
159
|
+
[]
|
160
|
+
end
|
160
161
|
end
|
161
162
|
|
162
163
|
def ignore_file?(file)
|
@@ -179,7 +180,7 @@ module HTMLProofer
|
|
179
180
|
def checks
|
180
181
|
return @checks if defined?(@checks) && !@checks.nil?
|
181
182
|
|
182
|
-
return (@checks = [
|
183
|
+
return (@checks = ["LinkCheck"]) if @type == :links
|
183
184
|
|
184
185
|
@checks = HTMLProofer::Check.subchecks(@options).map(&:name)
|
185
186
|
|
@@ -193,9 +194,9 @@ module HTMLProofer
|
|
193
194
|
def report_failed_checks
|
194
195
|
@reporter.report
|
195
196
|
|
196
|
-
failure_text = pluralize(@failures.length,
|
197
|
-
@logger.log
|
198
|
-
exit
|
197
|
+
failure_text = pluralize(@failures.length, "failure", "failures")
|
198
|
+
@logger.log(:fatal, "\nHTML-Proofer found #{failure_text}!")
|
199
|
+
exit(1)
|
199
200
|
end
|
200
201
|
|
201
202
|
# Set before_request callback.
|
@@ -227,19 +228,19 @@ module HTMLProofer
|
|
227
228
|
|
228
229
|
existing_urls_count = @cache.size(type)
|
229
230
|
cache_text = pluralize(existing_urls_count, "#{type} link", "#{type} links")
|
230
|
-
@logger.log
|
231
|
+
@logger.log(:debug, "Found #{cache_text} in the cache")
|
231
232
|
|
232
233
|
urls_to_check = @cache.retrieve_urls(ivar, type)
|
233
234
|
urls_detected = pluralize(urls_to_check.count, "#{type} link", "#{type} links")
|
234
|
-
@logger.log
|
235
|
+
@logger.log(:info, "Checking #{urls_detected}")
|
235
236
|
|
236
237
|
urls_to_check
|
237
238
|
end
|
238
239
|
|
239
240
|
private def format_checks_list(checks)
|
240
241
|
checks.map do |check|
|
241
|
-
check.sub(/HTMLProofer::Check::/,
|
242
|
-
end.join(
|
242
|
+
check.sub(/HTMLProofer::Check::/, "")
|
243
|
+
end.join(", ")
|
243
244
|
end
|
244
245
|
end
|
245
246
|
end
|
@@ -1,188 +1,193 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require
|
4
|
-
require
|
3
|
+
require "typhoeus"
|
4
|
+
require "uri"
|
5
5
|
|
6
6
|
module HTMLProofer
|
7
|
-
class UrlValidator
|
8
|
-
|
7
|
+
class UrlValidator
|
8
|
+
class External < UrlValidator
|
9
|
+
include HTMLProofer::Utils
|
9
10
|
|
10
|
-
|
11
|
-
|
11
|
+
attr_reader :external_urls
|
12
|
+
attr_writer :before_request
|
12
13
|
|
13
|
-
|
14
|
-
|
14
|
+
def initialize(runner, external_urls)
|
15
|
+
super(runner)
|
15
16
|
|
16
|
-
|
17
|
-
|
18
|
-
|
17
|
+
@external_urls = external_urls
|
18
|
+
@hydra = Typhoeus::Hydra.new(@runner.options[:hydra])
|
19
|
+
@before_request = []
|
19
20
|
|
20
|
-
|
21
|
-
end
|
22
|
-
|
23
|
-
def validate
|
24
|
-
if @cache.enabled?
|
25
|
-
urls_to_check = @runner.load_external_cache
|
26
|
-
run_external_link_checker(urls_to_check)
|
27
|
-
else
|
28
|
-
run_external_link_checker(@external_urls)
|
21
|
+
@paths_with_queries = {}
|
29
22
|
end
|
30
23
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
#
|
38
|
-
# In addition, sorting the list lets libcurl keep connections to the same hosts alive.
|
39
|
-
#
|
40
|
-
# Finally, we'll first make a HEAD request, rather than GETing all the contents.
|
41
|
-
# If the HEAD fails, we'll fall back to GET, as some servers are not configured
|
42
|
-
# for HEAD. If we've decided to check for hashes, we must do a GET--HEAD is
|
43
|
-
# not available as an option.
|
44
|
-
def run_external_link_checker(external_urls)
|
45
|
-
# Route log from Typhoeus/Ethon to our own logger
|
46
|
-
Ethon.logger = @logger
|
47
|
-
|
48
|
-
external_urls.each_pair do |external_url, metadata|
|
49
|
-
url = Attribute::Url.new(@runner, external_url, base_url: nil)
|
50
|
-
|
51
|
-
unless url.valid?
|
52
|
-
add_failure(metadata, "#{url} is an invalid URL", 0)
|
53
|
-
next
|
24
|
+
def validate
|
25
|
+
if @cache.enabled?
|
26
|
+
urls_to_check = @runner.load_external_cache
|
27
|
+
run_external_link_checker(urls_to_check)
|
28
|
+
else
|
29
|
+
run_external_link_checker(@external_urls)
|
54
30
|
end
|
55
31
|
|
56
|
-
|
57
|
-
|
58
|
-
method = if @runner.options[:check_external_hash] && url.hash?
|
59
|
-
:get
|
60
|
-
else
|
61
|
-
:head
|
62
|
-
end
|
63
|
-
|
64
|
-
queue_request(method, url, metadata)
|
32
|
+
@failed_checks
|
65
33
|
end
|
66
34
|
|
67
|
-
|
68
|
-
|
35
|
+
# Proofer runs faster if we pull out all the external URLs and run the checks
|
36
|
+
# at the end. Otherwise, we're halting the consuming process for every file during
|
37
|
+
# `process_files`.
|
38
|
+
#
|
39
|
+
# In addition, sorting the list lets libcurl keep connections to the same hosts alive.
|
40
|
+
#
|
41
|
+
# Finally, we'll first make a HEAD request, rather than GETing all the contents.
|
42
|
+
# If the HEAD fails, we'll fall back to GET, as some servers are not configured
|
43
|
+
# for HEAD. If we've decided to check for hashes, we must do a GET--HEAD is
|
44
|
+
# not available as an option.
|
45
|
+
def run_external_link_checker(external_urls)
|
46
|
+
# Route log from Typhoeus/Ethon to our own logger
|
47
|
+
Ethon.logger = @logger
|
48
|
+
|
49
|
+
external_urls.each_pair do |external_url, metadata|
|
50
|
+
url = Attribute::Url.new(@runner, external_url, base_url: nil)
|
51
|
+
|
52
|
+
unless url.valid?
|
53
|
+
add_failure(metadata, "#{url} is an invalid URL", 0)
|
54
|
+
next
|
55
|
+
end
|
56
|
+
|
57
|
+
next unless new_url_query_values?(url)
|
58
|
+
|
59
|
+
method = if @runner.options[:check_external_hash] && url.hash?
|
60
|
+
:get
|
61
|
+
else
|
62
|
+
:head
|
63
|
+
end
|
64
|
+
|
65
|
+
queue_request(method, url, metadata)
|
66
|
+
end
|
69
67
|
|
70
|
-
|
71
|
-
opts = @runner.options[:typhoeus].merge(method: method)
|
72
|
-
request = Typhoeus::Request.new(url.url, opts)
|
73
|
-
@before_request.each do |callback|
|
74
|
-
callback.call(request)
|
68
|
+
@hydra.run
|
75
69
|
end
|
76
|
-
request.on_complete { |response| response_handler(response, url, filenames) }
|
77
|
-
@hydra.queue request
|
78
|
-
end
|
79
70
|
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
return if @runner.options[:ignore_status_codes].include?(response_code)
|
89
|
-
|
90
|
-
if response_code.between?(200, 299)
|
91
|
-
@cache.add_external(href, filenames, response_code, 'OK') unless check_hash_in_2xx_response(href, url, response, filenames)
|
92
|
-
elsif response.timed_out?
|
93
|
-
handle_timeout(href, filenames, response_code)
|
94
|
-
elsif response_code.zero?
|
95
|
-
handle_connection_failure(href, filenames, response_code, response.status_message)
|
96
|
-
elsif method == :head # some servers don't support HEAD
|
97
|
-
queue_request(:get, url, filenames)
|
98
|
-
else
|
99
|
-
return if @runner.options[:only_4xx] && !response_code.between?(400, 499)
|
100
|
-
|
101
|
-
# Received a non-successful http response.
|
102
|
-
status_message = blank?(response.status_message) ? '' : ": #{response.status_message}"
|
103
|
-
msg = "External link #{href} failed#{status_message}"
|
104
|
-
add_failure(filenames, msg, response_code)
|
105
|
-
@cache.add_external(href, filenames, response_code, msg)
|
71
|
+
def queue_request(method, url, filenames)
|
72
|
+
opts = @runner.options[:typhoeus].merge(method: method)
|
73
|
+
request = Typhoeus::Request.new(url.url, opts)
|
74
|
+
@before_request.each do |callback|
|
75
|
+
callback.call(request)
|
76
|
+
end
|
77
|
+
request.on_complete { |response| response_handler(response, url, filenames) }
|
78
|
+
@hydra.queue(request)
|
106
79
|
end
|
107
|
-
end
|
108
80
|
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
81
|
+
def response_handler(response, url, filenames)
|
82
|
+
method = response.request.options[:method]
|
83
|
+
href = response.request.base_url.to_s
|
84
|
+
response_code = response.code
|
85
|
+
response.body.delete!("\x00")
|
86
|
+
|
87
|
+
@logger.log(:debug, "Received a #{response_code} for #{href}")
|
88
|
+
|
89
|
+
return if @runner.options[:ignore_status_codes].include?(response_code)
|
90
|
+
|
91
|
+
if response_code.between?(200, 299)
|
92
|
+
@cache.add_external(href, filenames, response_code, "OK") unless check_hash_in_2xx_response(href, url,
|
93
|
+
response, filenames)
|
94
|
+
elsif response.timed_out?
|
95
|
+
handle_timeout(href, filenames, response_code)
|
96
|
+
elsif response_code.zero?
|
97
|
+
handle_connection_failure(href, filenames, response_code, response.status_message)
|
98
|
+
elsif method == :head # some servers don't support HEAD
|
99
|
+
queue_request(:get, url, filenames)
|
100
|
+
else
|
101
|
+
return if @runner.options[:only_4xx] && !response_code.between?(400, 499)
|
102
|
+
|
103
|
+
# Received a non-successful http response.
|
104
|
+
status_message = blank?(response.status_message) ? "" : ": #{response.status_message}"
|
105
|
+
msg = "External link #{href} failed#{status_message}"
|
106
|
+
add_failure(filenames, msg, response_code)
|
107
|
+
@cache.add_external(href, filenames, response_code, msg)
|
108
|
+
end
|
128
109
|
end
|
129
110
|
|
130
|
-
|
111
|
+
# Even though the response was a success, we may have been asked to check
|
112
|
+
# if the hash on the URL exists on the page
|
113
|
+
def check_hash_in_2xx_response(href, url, response, filenames)
|
114
|
+
return false if @runner.options[:only_4xx]
|
115
|
+
return false unless @runner.options[:check_external_hash]
|
116
|
+
return false unless url.hash?
|
117
|
+
|
118
|
+
hash = url.hash
|
119
|
+
|
120
|
+
body_doc = create_nokogiri(response.body)
|
121
|
+
|
122
|
+
unencoded_hash = Addressable::URI.unescape(hash)
|
123
|
+
xpath = [%(//*[@name="#{hash}"]|/*[@name="#{unencoded_hash}"]|//*[@id="#{hash}"]|//*[@id="#{unencoded_hash}"])]
|
124
|
+
# user-content is a special addition by GitHub.
|
125
|
+
if url.host =~ /github\.com/i
|
126
|
+
xpath << [%(//*[@name="user-content-#{hash}"]|//*[@id="user-content-#{hash}"])]
|
127
|
+
# when linking to a file on GitHub, like #L12-L34, only the first "L" portion
|
128
|
+
# will be identified as a linkable portion
|
129
|
+
xpath << [%(//td[@id="#{Regexp.last_match[1]}"])] if hash =~ /\A(L\d)+/
|
130
|
+
end
|
131
131
|
|
132
|
-
|
133
|
-
add_failure(filenames, msg, response.code)
|
134
|
-
@cache.add_external(href, filenames, response.code, msg)
|
135
|
-
true
|
136
|
-
end
|
132
|
+
return unless body_doc.xpath(xpath.join("|")).empty?
|
137
133
|
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
134
|
+
msg = "External link #{href} failed: #{url.sans_hash} exists, but the hash '#{hash}' does not"
|
135
|
+
add_failure(filenames, msg, response.code)
|
136
|
+
@cache.add_external(href, filenames, response.code, msg)
|
137
|
+
true
|
138
|
+
end
|
142
139
|
|
143
|
-
|
144
|
-
|
140
|
+
def handle_timeout(href, filenames, response_code)
|
141
|
+
msg = "External link #{href} failed: got a time out (response code #{response_code})"
|
142
|
+
@cache.add_external(href, filenames, 0, msg)
|
143
|
+
return if @runner.options[:only_4xx]
|
145
144
|
|
146
|
-
|
147
|
-
|
148
|
-
External link #{href} failed with something very wrong.
|
149
|
-
It's possible libcurl couldn't connect to the server, or perhaps the request timed out.
|
150
|
-
Sometimes, making too many requests at once also breaks things.
|
151
|
-
MSG
|
152
|
-
]
|
145
|
+
add_failure(filenames, msg, response_code)
|
146
|
+
end
|
153
147
|
|
154
|
-
|
148
|
+
def handle_connection_failure(href, metadata, response_code, status_message)
|
149
|
+
msgs = [<<~MSG,
|
150
|
+
External link #{href} failed with something very wrong.
|
151
|
+
It's possible libcurl couldn't connect to the server, or perhaps the request timed out.
|
152
|
+
Sometimes, making too many requests at once also breaks things.
|
153
|
+
MSG
|
154
|
+
]
|
155
155
|
|
156
|
-
|
156
|
+
msgs << "Either way, the return message from the server is: #{status_message}" unless blank?(status_message)
|
157
157
|
|
158
|
-
|
159
|
-
return if @runner.options[:only_4xx]
|
158
|
+
msg = msgs.join("\n").chomp
|
160
159
|
|
161
|
-
|
162
|
-
|
160
|
+
@cache.add_external(href, metadata, 0, msg)
|
161
|
+
return if @runner.options[:only_4xx]
|
163
162
|
|
164
|
-
|
165
|
-
if blank?(metadata) # possible if we're checking an array of links
|
166
|
-
@failed_checks << Failure.new('', 'Links > External', description, status: status)
|
167
|
-
else
|
168
|
-
metadata.each { |m| @failed_checks << Failure.new(m[:filename], 'Links > External', description, line: m[:line], status: status) }
|
163
|
+
add_failure(metadata, msg, response_code)
|
169
164
|
end
|
170
|
-
end
|
171
165
|
|
172
|
-
|
173
|
-
|
174
|
-
|
166
|
+
def add_failure(metadata, description, status = nil)
|
167
|
+
if blank?(metadata) # possible if we're checking an array of links
|
168
|
+
@failed_checks << Failure.new("", "Links > External", description, status: status)
|
169
|
+
else
|
170
|
+
metadata.each do |m|
|
171
|
+
@failed_checks << Failure.new(m[:filename], "Links > External", description, line: m[:line], status: status)
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
175
175
|
|
176
|
-
queries
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
@paths_with_queries[domain_path]
|
183
|
-
|
184
|
-
|
185
|
-
|
176
|
+
# remember queries we've seen, ignore future ones
|
177
|
+
private def new_url_query_values?(url)
|
178
|
+
return true if (query_values = url.query_values).nil?
|
179
|
+
|
180
|
+
queries = query_values.keys.join("-")
|
181
|
+
domain_path = url.domain_path
|
182
|
+
if @paths_with_queries[domain_path].nil?
|
183
|
+
@paths_with_queries[domain_path] = [queries]
|
184
|
+
true
|
185
|
+
elsif !@paths_with_queries[domain_path].include?(queries)
|
186
|
+
@paths_with_queries[domain_path] << queries
|
187
|
+
true
|
188
|
+
else
|
189
|
+
false
|
190
|
+
end
|
186
191
|
end
|
187
192
|
end
|
188
193
|
end
|