html-proofer 4.0.0.rc3 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/htmlproofer +3 -2
- data/lib/html-proofer.rb +1 -1
- data/lib/html_proofer/attribute/url.rb +180 -174
- data/lib/html_proofer/cache.rb +128 -85
- data/lib/html_proofer/check/favicon.rb +29 -24
- data/lib/html_proofer/check/images.rb +78 -47
- data/lib/html_proofer/check/links.rb +109 -98
- data/lib/html_proofer/check/open_graph.rb +30 -25
- data/lib/html_proofer/check/scripts.rb +36 -28
- data/lib/html_proofer/check.rb +11 -10
- data/lib/html_proofer/configuration.rb +16 -15
- data/lib/html_proofer/element.rb +19 -19
- data/lib/html_proofer/log.rb +19 -19
- data/lib/html_proofer/reporter/cli.rb +22 -18
- data/lib/html_proofer/reporter.rb +3 -3
- data/lib/html_proofer/runner.rb +45 -44
- data/lib/html_proofer/url_validator/external.rb +157 -152
- data/lib/html_proofer/url_validator/internal.rb +72 -62
- data/lib/html_proofer/utils.rb +5 -5
- data/lib/html_proofer/version.rb +1 -1
- data/lib/html_proofer.rb +11 -9
- metadata +8 -7
data/lib/html_proofer/log.rb
CHANGED
@@ -1,21 +1,21 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require
|
4
|
-
require
|
3
|
+
require "yell"
|
4
|
+
require "rainbow"
|
5
5
|
|
6
6
|
module HTMLProofer
|
7
7
|
class Log
|
8
8
|
include Yell::Loggable
|
9
9
|
|
10
|
-
STDOUT_LEVELS =
|
11
|
-
STDERR_LEVELS =
|
10
|
+
STDOUT_LEVELS = [:debug, :info, :warn].freeze
|
11
|
+
STDERR_LEVELS = [:error, :fatal].freeze
|
12
12
|
|
13
13
|
def initialize(log_level)
|
14
14
|
@logger = Yell.new(format: false, \
|
15
|
-
|
16
|
-
|
17
|
-
l.adapter
|
18
|
-
l.adapter
|
15
|
+
name: "HTMLProofer", \
|
16
|
+
level: "gte.#{log_level}") do |l|
|
17
|
+
l.adapter(:stdout, level: "lte.warn")
|
18
|
+
l.adapter(:stderr, level: "gte.error")
|
19
19
|
end
|
20
20
|
end
|
21
21
|
|
@@ -24,23 +24,23 @@ module HTMLProofer
|
|
24
24
|
end
|
25
25
|
|
26
26
|
def log_with_color(level, message)
|
27
|
-
@logger.send
|
27
|
+
@logger.send(level, colorize(level, message))
|
28
28
|
end
|
29
29
|
|
30
30
|
def colorize(level, message)
|
31
31
|
color = case level
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
32
|
+
when :debug
|
33
|
+
:cyan
|
34
|
+
when :info
|
35
|
+
:blue
|
36
|
+
when :warn
|
37
|
+
:yellow
|
38
|
+
when :error, :fatal
|
39
|
+
:red
|
40
|
+
end
|
41
41
|
|
42
42
|
if (STDOUT_LEVELS.include?(level) && $stdout.isatty) || \
|
43
|
-
|
43
|
+
(STDERR_LEVELS.include?(level) && $stderr.isatty)
|
44
44
|
Rainbow(message).send(color)
|
45
45
|
else
|
46
46
|
message
|
@@ -1,29 +1,33 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
3
|
+
module HTMLProofer
|
4
|
+
class Reporter
|
5
|
+
class Cli < HTMLProofer::Reporter
|
6
|
+
def report
|
7
|
+
msg = failures.each_with_object([]) do |(check_name, failures), arr|
|
8
|
+
str = ["For the #{check_name} check, the following failures were found:\n"]
|
7
9
|
|
8
|
-
|
9
|
-
|
10
|
+
failures.each do |failure|
|
11
|
+
path_str = blank?(failure.path) ? "" : "At #{failure.path}"
|
10
12
|
|
11
|
-
|
13
|
+
line_str = failure.line.nil? ? "" : ":#{failure.line}"
|
12
14
|
|
13
|
-
|
14
|
-
|
15
|
+
path_and_line = "#{path_str}#{line_str}"
|
16
|
+
path_and_line = blank?(path_and_line) ? "" : "* #{path_and_line}:\n\n"
|
15
17
|
|
16
|
-
|
18
|
+
status_str = failure.status.nil? ? "" : " (status code #{failure.status})"
|
17
19
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
20
|
+
indent = blank?(path_and_line) ? "* " : " "
|
21
|
+
str << <<~MSG
|
22
|
+
#{path_and_line}#{indent}#{failure.description}#{status_str}
|
23
|
+
MSG
|
24
|
+
end
|
23
25
|
|
24
|
-
|
25
|
-
|
26
|
+
arr << str.join("\n")
|
27
|
+
end
|
26
28
|
|
27
|
-
|
29
|
+
@logger.log(:error, msg.join("\n"))
|
30
|
+
end
|
31
|
+
end
|
28
32
|
end
|
29
33
|
end
|
@@ -12,12 +12,12 @@ module HTMLProofer
|
|
12
12
|
|
13
13
|
def failures=(failures)
|
14
14
|
@failures = failures.group_by(&:check_name) \
|
15
|
-
|
16
|
-
|
15
|
+
.transform_values { |issues| issues.sort_by { |issue| [issue.path, issue.line] } } \
|
16
|
+
.sort
|
17
17
|
end
|
18
18
|
|
19
19
|
def report
|
20
|
-
raise NotImplementedError,
|
20
|
+
raise NotImplementedError, "HTMLProofer::Reporter subclasses must implement #report"
|
21
21
|
end
|
22
22
|
end
|
23
23
|
end
|
data/lib/html_proofer/runner.rb
CHANGED
@@ -5,9 +5,9 @@ module HTMLProofer
|
|
5
5
|
include HTMLProofer::Utils
|
6
6
|
|
7
7
|
attr_reader :options, :cache, :logger, :internal_urls, :external_urls, :checked_paths, :current_check
|
8
|
-
attr_accessor :
|
8
|
+
attr_accessor :current_filename, :current_source, :reporter
|
9
9
|
|
10
|
-
URL_TYPES =
|
10
|
+
URL_TYPES = [:external, :internal].freeze
|
11
11
|
|
12
12
|
def initialize(src, opts = {})
|
13
13
|
@options = HTMLProofer::Configuration.generate_defaults(opts)
|
@@ -28,22 +28,23 @@ module HTMLProofer
|
|
28
28
|
|
29
29
|
@current_check = nil
|
30
30
|
@current_source = nil
|
31
|
-
@
|
31
|
+
@current_filename = nil
|
32
32
|
|
33
33
|
@reporter = Reporter::Cli.new(logger: @logger)
|
34
34
|
end
|
35
35
|
|
36
36
|
def run
|
37
|
-
check_text = pluralize(checks.length,
|
37
|
+
check_text = pluralize(checks.length, "check", "checks")
|
38
38
|
|
39
39
|
if @type == :links
|
40
|
-
@logger.log
|
40
|
+
@logger.log(:info, "Running #{check_text} (#{format_checks_list(checks)}) on #{@source} ... \n\n")
|
41
41
|
check_list_of_links unless @options[:disable_external]
|
42
42
|
else
|
43
|
-
@logger.log
|
43
|
+
@logger.log(:info,
|
44
|
+
"Running #{check_text} (#{format_checks_list(checks)}) in #{@source} on *#{@options[:extensions].join(", ")} files...\n\n")
|
44
45
|
|
45
46
|
check_files
|
46
|
-
@logger.log
|
47
|
+
@logger.log(:info, "Ran on #{pluralize(files.length, "file", "files")}!\n\n")
|
47
48
|
end
|
48
49
|
|
49
50
|
@cache.write
|
@@ -51,7 +52,7 @@ module HTMLProofer
|
|
51
52
|
@reporter.failures = @failures
|
52
53
|
|
53
54
|
if @failures.empty?
|
54
|
-
@logger.log
|
55
|
+
@logger.log(:info, "HTML-Proofer finished successfully.")
|
55
56
|
else
|
56
57
|
@failures.uniq!
|
57
58
|
report_failed_checks
|
@@ -97,40 +98,38 @@ module HTMLProofer
|
|
97
98
|
# Walks over each implemented check and runs them on the files, in parallel.
|
98
99
|
def process_files
|
99
100
|
if @options[:parallel][:enable]
|
100
|
-
Parallel.map(files, @options[:parallel]) { |
|
101
|
+
Parallel.map(files, @options[:parallel]) { |file| load_file(file[:path], file[:source]) }
|
101
102
|
else
|
102
|
-
files.map
|
103
|
+
files.map do |file|
|
104
|
+
load_file(file[:path], file[:source])
|
105
|
+
end
|
103
106
|
end
|
104
107
|
end
|
105
108
|
|
106
|
-
def load_file(path)
|
109
|
+
def load_file(path, source)
|
107
110
|
@html = create_nokogiri(path)
|
108
|
-
check_parsed(path)
|
111
|
+
check_parsed(path, source)
|
109
112
|
end
|
110
113
|
|
111
114
|
# Collects any external URLs found in a directory of files. Also collectes
|
112
115
|
# every failed test from process_files.
|
113
|
-
def check_parsed(path)
|
116
|
+
def check_parsed(path, source)
|
114
117
|
result = { internal_urls: {}, external_urls: {}, failures: [] }
|
115
118
|
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
checks.each do |klass|
|
120
|
-
@current_source = current_source
|
121
|
-
@current_path = path
|
119
|
+
checks.each do |klass|
|
120
|
+
@current_source = source
|
121
|
+
@current_filename = path
|
122
122
|
|
123
|
-
|
124
|
-
|
123
|
+
check = Object.const_get(klass).new(self, @html)
|
124
|
+
@logger.log(:debug, "Running #{check.short_name} in #{path}")
|
125
125
|
|
126
|
-
|
126
|
+
@current_check = check
|
127
127
|
|
128
|
-
|
128
|
+
check.run
|
129
129
|
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
end
|
130
|
+
result[:external_urls].merge!(check.external_urls) { |_key, old, current| old.concat(current) }
|
131
|
+
result[:internal_urls].merge!(check.internal_urls) { |_key, old, current| old.concat(current) }
|
132
|
+
result[:failures].concat(check.failures)
|
134
133
|
end
|
135
134
|
result
|
136
135
|
end
|
@@ -148,15 +147,17 @@ module HTMLProofer
|
|
148
147
|
|
149
148
|
def files
|
150
149
|
@files ||= if @type == :directory
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
150
|
+
@source.map do |src|
|
151
|
+
pattern = File.join(src, "**", "*{#{@options[:extensions].join(",")}}")
|
152
|
+
Dir.glob(pattern).select do |f|
|
153
|
+
File.file?(f) && !ignore_file?(f)
|
154
|
+
end.map { |f| { source: src, path: f } }
|
155
|
+
end.flatten
|
156
|
+
elsif @type == :file && @options[:extensions].include?(File.extname(@source))
|
157
|
+
[@source].reject { |f| ignore_file?(f) }.map { |f| { source: f, path: f } }
|
158
|
+
else
|
159
|
+
[]
|
160
|
+
end
|
160
161
|
end
|
161
162
|
|
162
163
|
def ignore_file?(file)
|
@@ -179,7 +180,7 @@ module HTMLProofer
|
|
179
180
|
def checks
|
180
181
|
return @checks if defined?(@checks) && !@checks.nil?
|
181
182
|
|
182
|
-
return (@checks = [
|
183
|
+
return (@checks = ["LinkCheck"]) if @type == :links
|
183
184
|
|
184
185
|
@checks = HTMLProofer::Check.subchecks(@options).map(&:name)
|
185
186
|
|
@@ -193,9 +194,9 @@ module HTMLProofer
|
|
193
194
|
def report_failed_checks
|
194
195
|
@reporter.report
|
195
196
|
|
196
|
-
failure_text = pluralize(@failures.length,
|
197
|
-
@logger.log
|
198
|
-
exit
|
197
|
+
failure_text = pluralize(@failures.length, "failure", "failures")
|
198
|
+
@logger.log(:fatal, "\nHTML-Proofer found #{failure_text}!")
|
199
|
+
exit(1)
|
199
200
|
end
|
200
201
|
|
201
202
|
# Set before_request callback.
|
@@ -227,19 +228,19 @@ module HTMLProofer
|
|
227
228
|
|
228
229
|
existing_urls_count = @cache.size(type)
|
229
230
|
cache_text = pluralize(existing_urls_count, "#{type} link", "#{type} links")
|
230
|
-
@logger.log
|
231
|
+
@logger.log(:debug, "Found #{cache_text} in the cache")
|
231
232
|
|
232
233
|
urls_to_check = @cache.retrieve_urls(ivar, type)
|
233
234
|
urls_detected = pluralize(urls_to_check.count, "#{type} link", "#{type} links")
|
234
|
-
@logger.log
|
235
|
+
@logger.log(:info, "Checking #{urls_detected}")
|
235
236
|
|
236
237
|
urls_to_check
|
237
238
|
end
|
238
239
|
|
239
240
|
private def format_checks_list(checks)
|
240
241
|
checks.map do |check|
|
241
|
-
check.sub(/HTMLProofer::Check::/,
|
242
|
-
end.join(
|
242
|
+
check.sub(/HTMLProofer::Check::/, "")
|
243
|
+
end.join(", ")
|
243
244
|
end
|
244
245
|
end
|
245
246
|
end
|
@@ -1,188 +1,193 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require
|
4
|
-
require
|
3
|
+
require "typhoeus"
|
4
|
+
require "uri"
|
5
5
|
|
6
6
|
module HTMLProofer
|
7
|
-
class UrlValidator
|
8
|
-
|
7
|
+
class UrlValidator
|
8
|
+
class External < UrlValidator
|
9
|
+
include HTMLProofer::Utils
|
9
10
|
|
10
|
-
|
11
|
-
|
11
|
+
attr_reader :external_urls
|
12
|
+
attr_writer :before_request
|
12
13
|
|
13
|
-
|
14
|
-
|
14
|
+
def initialize(runner, external_urls)
|
15
|
+
super(runner)
|
15
16
|
|
16
|
-
|
17
|
-
|
18
|
-
|
17
|
+
@external_urls = external_urls
|
18
|
+
@hydra = Typhoeus::Hydra.new(@runner.options[:hydra])
|
19
|
+
@before_request = []
|
19
20
|
|
20
|
-
|
21
|
-
end
|
22
|
-
|
23
|
-
def validate
|
24
|
-
if @cache.enabled?
|
25
|
-
urls_to_check = @runner.load_external_cache
|
26
|
-
run_external_link_checker(urls_to_check)
|
27
|
-
else
|
28
|
-
run_external_link_checker(@external_urls)
|
21
|
+
@paths_with_queries = {}
|
29
22
|
end
|
30
23
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
#
|
38
|
-
# In addition, sorting the list lets libcurl keep connections to the same hosts alive.
|
39
|
-
#
|
40
|
-
# Finally, we'll first make a HEAD request, rather than GETing all the contents.
|
41
|
-
# If the HEAD fails, we'll fall back to GET, as some servers are not configured
|
42
|
-
# for HEAD. If we've decided to check for hashes, we must do a GET--HEAD is
|
43
|
-
# not available as an option.
|
44
|
-
def run_external_link_checker(external_urls)
|
45
|
-
# Route log from Typhoeus/Ethon to our own logger
|
46
|
-
Ethon.logger = @logger
|
47
|
-
|
48
|
-
external_urls.each_pair do |external_url, metadata|
|
49
|
-
url = Attribute::Url.new(@runner, external_url, base_url: nil)
|
50
|
-
|
51
|
-
unless url.valid?
|
52
|
-
add_failure(metadata, "#{url} is an invalid URL", 0)
|
53
|
-
next
|
24
|
+
def validate
|
25
|
+
if @cache.enabled?
|
26
|
+
urls_to_check = @runner.load_external_cache
|
27
|
+
run_external_link_checker(urls_to_check)
|
28
|
+
else
|
29
|
+
run_external_link_checker(@external_urls)
|
54
30
|
end
|
55
31
|
|
56
|
-
|
57
|
-
|
58
|
-
method = if @runner.options[:check_external_hash] && url.hash?
|
59
|
-
:get
|
60
|
-
else
|
61
|
-
:head
|
62
|
-
end
|
63
|
-
|
64
|
-
queue_request(method, url, metadata)
|
32
|
+
@failed_checks
|
65
33
|
end
|
66
34
|
|
67
|
-
|
68
|
-
|
35
|
+
# Proofer runs faster if we pull out all the external URLs and run the checks
|
36
|
+
# at the end. Otherwise, we're halting the consuming process for every file during
|
37
|
+
# `process_files`.
|
38
|
+
#
|
39
|
+
# In addition, sorting the list lets libcurl keep connections to the same hosts alive.
|
40
|
+
#
|
41
|
+
# Finally, we'll first make a HEAD request, rather than GETing all the contents.
|
42
|
+
# If the HEAD fails, we'll fall back to GET, as some servers are not configured
|
43
|
+
# for HEAD. If we've decided to check for hashes, we must do a GET--HEAD is
|
44
|
+
# not available as an option.
|
45
|
+
def run_external_link_checker(external_urls)
|
46
|
+
# Route log from Typhoeus/Ethon to our own logger
|
47
|
+
Ethon.logger = @logger
|
48
|
+
|
49
|
+
external_urls.each_pair do |external_url, metadata|
|
50
|
+
url = Attribute::Url.new(@runner, external_url, base_url: nil)
|
51
|
+
|
52
|
+
unless url.valid?
|
53
|
+
add_failure(metadata, "#{url} is an invalid URL", 0)
|
54
|
+
next
|
55
|
+
end
|
56
|
+
|
57
|
+
next unless new_url_query_values?(url)
|
58
|
+
|
59
|
+
method = if @runner.options[:check_external_hash] && url.hash?
|
60
|
+
:get
|
61
|
+
else
|
62
|
+
:head
|
63
|
+
end
|
64
|
+
|
65
|
+
queue_request(method, url, metadata)
|
66
|
+
end
|
69
67
|
|
70
|
-
|
71
|
-
opts = @runner.options[:typhoeus].merge(method: method)
|
72
|
-
request = Typhoeus::Request.new(url.url, opts)
|
73
|
-
@before_request.each do |callback|
|
74
|
-
callback.call(request)
|
68
|
+
@hydra.run
|
75
69
|
end
|
76
|
-
request.on_complete { |response| response_handler(response, url, filenames) }
|
77
|
-
@hydra.queue request
|
78
|
-
end
|
79
70
|
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
return if @runner.options[:ignore_status_codes].include?(response_code)
|
89
|
-
|
90
|
-
if response_code.between?(200, 299)
|
91
|
-
@cache.add_external(href, filenames, response_code, 'OK') unless check_hash_in_2xx_response(href, url, response, filenames)
|
92
|
-
elsif response.timed_out?
|
93
|
-
handle_timeout(href, filenames, response_code)
|
94
|
-
elsif response_code.zero?
|
95
|
-
handle_connection_failure(href, filenames, response_code, response.status_message)
|
96
|
-
elsif method == :head # some servers don't support HEAD
|
97
|
-
queue_request(:get, url, filenames)
|
98
|
-
else
|
99
|
-
return if @runner.options[:only_4xx] && !response_code.between?(400, 499)
|
100
|
-
|
101
|
-
# Received a non-successful http response.
|
102
|
-
status_message = blank?(response.status_message) ? '' : ": #{response.status_message}"
|
103
|
-
msg = "External link #{href} failed#{status_message}"
|
104
|
-
add_failure(filenames, msg, response_code)
|
105
|
-
@cache.add_external(href, filenames, response_code, msg)
|
71
|
+
def queue_request(method, url, filenames)
|
72
|
+
opts = @runner.options[:typhoeus].merge(method: method)
|
73
|
+
request = Typhoeus::Request.new(url.url, opts)
|
74
|
+
@before_request.each do |callback|
|
75
|
+
callback.call(request)
|
76
|
+
end
|
77
|
+
request.on_complete { |response| response_handler(response, url, filenames) }
|
78
|
+
@hydra.queue(request)
|
106
79
|
end
|
107
|
-
end
|
108
80
|
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
81
|
+
def response_handler(response, url, filenames)
|
82
|
+
method = response.request.options[:method]
|
83
|
+
href = response.request.base_url.to_s
|
84
|
+
response_code = response.code
|
85
|
+
response.body.delete!("\x00")
|
86
|
+
|
87
|
+
@logger.log(:debug, "Received a #{response_code} for #{href}")
|
88
|
+
|
89
|
+
return if @runner.options[:ignore_status_codes].include?(response_code)
|
90
|
+
|
91
|
+
if response_code.between?(200, 299)
|
92
|
+
@cache.add_external(href, filenames, response_code, "OK") unless check_hash_in_2xx_response(href, url,
|
93
|
+
response, filenames)
|
94
|
+
elsif response.timed_out?
|
95
|
+
handle_timeout(href, filenames, response_code)
|
96
|
+
elsif response_code.zero?
|
97
|
+
handle_connection_failure(href, filenames, response_code, response.status_message)
|
98
|
+
elsif method == :head # some servers don't support HEAD
|
99
|
+
queue_request(:get, url, filenames)
|
100
|
+
else
|
101
|
+
return if @runner.options[:only_4xx] && !response_code.between?(400, 499)
|
102
|
+
|
103
|
+
# Received a non-successful http response.
|
104
|
+
status_message = blank?(response.status_message) ? "" : ": #{response.status_message}"
|
105
|
+
msg = "External link #{href} failed#{status_message}"
|
106
|
+
add_failure(filenames, msg, response_code)
|
107
|
+
@cache.add_external(href, filenames, response_code, msg)
|
108
|
+
end
|
128
109
|
end
|
129
110
|
|
130
|
-
|
111
|
+
# Even though the response was a success, we may have been asked to check
|
112
|
+
# if the hash on the URL exists on the page
|
113
|
+
def check_hash_in_2xx_response(href, url, response, filenames)
|
114
|
+
return false if @runner.options[:only_4xx]
|
115
|
+
return false unless @runner.options[:check_external_hash]
|
116
|
+
return false unless url.hash?
|
117
|
+
|
118
|
+
hash = url.hash
|
119
|
+
|
120
|
+
body_doc = create_nokogiri(response.body)
|
121
|
+
|
122
|
+
unencoded_hash = Addressable::URI.unescape(hash)
|
123
|
+
xpath = [%(//*[@name="#{hash}"]|/*[@name="#{unencoded_hash}"]|//*[@id="#{hash}"]|//*[@id="#{unencoded_hash}"])]
|
124
|
+
# user-content is a special addition by GitHub.
|
125
|
+
if url.host =~ /github\.com/i
|
126
|
+
xpath << [%(//*[@name="user-content-#{hash}"]|//*[@id="user-content-#{hash}"])]
|
127
|
+
# when linking to a file on GitHub, like #L12-L34, only the first "L" portion
|
128
|
+
# will be identified as a linkable portion
|
129
|
+
xpath << [%(//td[@id="#{Regexp.last_match[1]}"])] if hash =~ /\A(L\d)+/
|
130
|
+
end
|
131
131
|
|
132
|
-
|
133
|
-
add_failure(filenames, msg, response.code)
|
134
|
-
@cache.add_external(href, filenames, response.code, msg)
|
135
|
-
true
|
136
|
-
end
|
132
|
+
return unless body_doc.xpath(xpath.join("|")).empty?
|
137
133
|
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
134
|
+
msg = "External link #{href} failed: #{url.sans_hash} exists, but the hash '#{hash}' does not"
|
135
|
+
add_failure(filenames, msg, response.code)
|
136
|
+
@cache.add_external(href, filenames, response.code, msg)
|
137
|
+
true
|
138
|
+
end
|
142
139
|
|
143
|
-
|
144
|
-
|
140
|
+
def handle_timeout(href, filenames, response_code)
|
141
|
+
msg = "External link #{href} failed: got a time out (response code #{response_code})"
|
142
|
+
@cache.add_external(href, filenames, 0, msg)
|
143
|
+
return if @runner.options[:only_4xx]
|
145
144
|
|
146
|
-
|
147
|
-
|
148
|
-
External link #{href} failed with something very wrong.
|
149
|
-
It's possible libcurl couldn't connect to the server, or perhaps the request timed out.
|
150
|
-
Sometimes, making too many requests at once also breaks things.
|
151
|
-
MSG
|
152
|
-
]
|
145
|
+
add_failure(filenames, msg, response_code)
|
146
|
+
end
|
153
147
|
|
154
|
-
|
148
|
+
def handle_connection_failure(href, metadata, response_code, status_message)
|
149
|
+
msgs = [<<~MSG,
|
150
|
+
External link #{href} failed with something very wrong.
|
151
|
+
It's possible libcurl couldn't connect to the server, or perhaps the request timed out.
|
152
|
+
Sometimes, making too many requests at once also breaks things.
|
153
|
+
MSG
|
154
|
+
]
|
155
155
|
|
156
|
-
|
156
|
+
msgs << "Either way, the return message from the server is: #{status_message}" unless blank?(status_message)
|
157
157
|
|
158
|
-
|
159
|
-
return if @runner.options[:only_4xx]
|
158
|
+
msg = msgs.join("\n").chomp
|
160
159
|
|
161
|
-
|
162
|
-
|
160
|
+
@cache.add_external(href, metadata, 0, msg)
|
161
|
+
return if @runner.options[:only_4xx]
|
163
162
|
|
164
|
-
|
165
|
-
if blank?(metadata) # possible if we're checking an array of links
|
166
|
-
@failed_checks << Failure.new('', 'Links > External', description, status: status)
|
167
|
-
else
|
168
|
-
metadata.each { |m| @failed_checks << Failure.new(m[:filename], 'Links > External', description, line: m[:line], status: status) }
|
163
|
+
add_failure(metadata, msg, response_code)
|
169
164
|
end
|
170
|
-
end
|
171
165
|
|
172
|
-
|
173
|
-
|
174
|
-
|
166
|
+
def add_failure(metadata, description, status = nil)
|
167
|
+
if blank?(metadata) # possible if we're checking an array of links
|
168
|
+
@failed_checks << Failure.new("", "Links > External", description, status: status)
|
169
|
+
else
|
170
|
+
metadata.each do |m|
|
171
|
+
@failed_checks << Failure.new(m[:filename], "Links > External", description, line: m[:line], status: status)
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
175
175
|
|
176
|
-
queries
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
@paths_with_queries[domain_path]
|
183
|
-
|
184
|
-
|
185
|
-
|
176
|
+
# remember queries we've seen, ignore future ones
|
177
|
+
private def new_url_query_values?(url)
|
178
|
+
return true if (query_values = url.query_values).nil?
|
179
|
+
|
180
|
+
queries = query_values.keys.join("-")
|
181
|
+
domain_path = url.domain_path
|
182
|
+
if @paths_with_queries[domain_path].nil?
|
183
|
+
@paths_with_queries[domain_path] = [queries]
|
184
|
+
true
|
185
|
+
elsif !@paths_with_queries[domain_path].include?(queries)
|
186
|
+
@paths_with_queries[domain_path] << queries
|
187
|
+
true
|
188
|
+
else
|
189
|
+
false
|
190
|
+
end
|
186
191
|
end
|
187
192
|
end
|
188
193
|
end
|