html-proofer 2.6.4 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/{htmlproof → htmlproofer} +31 -24
- data/lib/html-proofer.rb +47 -0
- data/lib/html-proofer/cache.rb +153 -0
- data/lib/html-proofer/check.rb +63 -0
- data/lib/{html/proofer/checks → html-proofer/check}/favicon.rb +2 -8
- data/lib/html-proofer/check/html.rb +21 -0
- data/lib/html-proofer/check/images.rb +47 -0
- data/lib/{html/proofer/checks → html-proofer/check}/links.rb +40 -48
- data/lib/html-proofer/check/scripts.rb +28 -0
- data/lib/html-proofer/configuration.rb +56 -0
- data/lib/html-proofer/element.rb +165 -0
- data/lib/{html/proofer/check_runner → html-proofer}/issue.rb +8 -10
- data/lib/html-proofer/log.rb +49 -0
- data/lib/html-proofer/runner.rb +160 -0
- data/lib/html-proofer/url_validator.rb +218 -0
- data/lib/html-proofer/utils.rb +40 -0
- data/lib/html-proofer/version.rb +3 -0
- metadata +20 -20
- data/lib/html/proofer.rb +0 -191
- data/lib/html/proofer/cache.rb +0 -141
- data/lib/html/proofer/check_runner.rb +0 -70
- data/lib/html/proofer/checkable.rb +0 -168
- data/lib/html/proofer/checks/html.rb +0 -46
- data/lib/html/proofer/checks/images.rb +0 -54
- data/lib/html/proofer/checks/scripts.rb +0 -40
- data/lib/html/proofer/configuration.rb +0 -48
- data/lib/html/proofer/log.rb +0 -42
- data/lib/html/proofer/url_validator.rb +0 -222
- data/lib/html/proofer/utils.rb +0 -42
- data/lib/html/proofer/version.rb +0 -5
- data/lib/html/proofer/xpathfunctions.rb +0 -9
@@ -1,168 +0,0 @@
|
|
1
|
-
require 'addressable/uri'
|
2
|
-
require_relative './utils'
|
3
|
-
|
4
|
-
module HTML
|
5
|
-
class Proofer
|
6
|
-
# Represents the superclass from which all checks derive.
|
7
|
-
class Checkable
|
8
|
-
include HTML::Proofer::Utils
|
9
|
-
|
10
|
-
attr_reader :line
|
11
|
-
|
12
|
-
def initialize(obj, check)
|
13
|
-
obj.attributes.each_pair do |attribute, value|
|
14
|
-
instance_variable_set("@#{attribute.tr('-:.', '_')}".to_sym, value.value)
|
15
|
-
end
|
16
|
-
|
17
|
-
@text = obj.content
|
18
|
-
@check = check
|
19
|
-
@checked_paths = {}
|
20
|
-
@type = self.class.name
|
21
|
-
@line = obj.line
|
22
|
-
|
23
|
-
if @href && @check.options[:href_swap]
|
24
|
-
@href = swap(@href, @check.options[:href_swap])
|
25
|
-
end
|
26
|
-
|
27
|
-
# fix up missing protocols
|
28
|
-
@href.insert 0, 'http:' if @href =~ %r{^//}
|
29
|
-
@src.insert 0, 'http:' if @src =~ %r{^//}
|
30
|
-
end
|
31
|
-
|
32
|
-
def url
|
33
|
-
@src || @srcset || @href || ''
|
34
|
-
end
|
35
|
-
|
36
|
-
def valid?
|
37
|
-
!parts.nil?
|
38
|
-
end
|
39
|
-
|
40
|
-
def parts
|
41
|
-
@parts ||= Addressable::URI.parse url
|
42
|
-
rescue URI::Error, Addressable::URI::InvalidURIError
|
43
|
-
@parts = nil
|
44
|
-
end
|
45
|
-
|
46
|
-
def path
|
47
|
-
Addressable::URI.unencode parts.path unless parts.nil?
|
48
|
-
end
|
49
|
-
|
50
|
-
def hash
|
51
|
-
parts.fragment unless parts.nil?
|
52
|
-
end
|
53
|
-
|
54
|
-
def scheme
|
55
|
-
parts.scheme unless parts.nil?
|
56
|
-
end
|
57
|
-
|
58
|
-
# path is to an external server
|
59
|
-
def remote?
|
60
|
-
%w( http https ).include? scheme
|
61
|
-
end
|
62
|
-
|
63
|
-
def non_http_remote?
|
64
|
-
!scheme.nil? && !remote?
|
65
|
-
end
|
66
|
-
|
67
|
-
def ignore?
|
68
|
-
return true if @data_proofer_ignore
|
69
|
-
|
70
|
-
# ignore base64 encoded images
|
71
|
-
if %w(ImageCheckable FaviconCheckable).include? @type
|
72
|
-
return true if url.match(/^data:image/)
|
73
|
-
end
|
74
|
-
|
75
|
-
# ignore user defined URLs
|
76
|
-
return true if ignores_pattern_check(@check.url_ignores)
|
77
|
-
|
78
|
-
# ignore user defined hrefs
|
79
|
-
if 'LinkCheckable' == @type
|
80
|
-
return true if ignores_pattern_check(@check.href_ignores)
|
81
|
-
end
|
82
|
-
|
83
|
-
# ignore user defined alts
|
84
|
-
if 'ImageCheckable' == @type
|
85
|
-
return true if ignores_pattern_check(@check.alt_ignores)
|
86
|
-
end
|
87
|
-
end
|
88
|
-
|
89
|
-
def ignore_empty_alt?
|
90
|
-
@check.empty_alt_ignore
|
91
|
-
end
|
92
|
-
|
93
|
-
def allow_hash_href?
|
94
|
-
@check.allow_hash_href
|
95
|
-
end
|
96
|
-
|
97
|
-
# path is external to the file
|
98
|
-
def external?
|
99
|
-
!internal?
|
100
|
-
end
|
101
|
-
|
102
|
-
# path is an anchor or a query
|
103
|
-
def internal?
|
104
|
-
url.start_with? '#', '?'
|
105
|
-
end
|
106
|
-
|
107
|
-
def file_path
|
108
|
-
return if path.nil?
|
109
|
-
|
110
|
-
if path =~ %r{^/} # path relative to root
|
111
|
-
base = File.directory?(@check.src) ? @check.src : File.dirname(@check.src)
|
112
|
-
elsif File.exist?(File.expand_path path, @check.src) # relative links, path is a file
|
113
|
-
base = File.dirname @check.path
|
114
|
-
elsif File.exist?(File.join(File.dirname(@check.path), path)) # relative links in nested dir, path is a file
|
115
|
-
base = File.dirname @check.path
|
116
|
-
else # relative link, path is a directory
|
117
|
-
base = @check.path
|
118
|
-
end
|
119
|
-
|
120
|
-
file = File.join base, path
|
121
|
-
|
122
|
-
# implicit index support
|
123
|
-
if File.directory?(file) && !unslashed_directory?(file)
|
124
|
-
file = File.join file, @check.options[:directory_index_file]
|
125
|
-
end
|
126
|
-
|
127
|
-
file
|
128
|
-
end
|
129
|
-
|
130
|
-
# checks if a file exists relative to the current pwd
|
131
|
-
def exists?
|
132
|
-
return @checked_paths[absolute_path] if @checked_paths.key? absolute_path
|
133
|
-
@checked_paths[absolute_path] = File.exist? absolute_path
|
134
|
-
end
|
135
|
-
|
136
|
-
def absolute_path
|
137
|
-
path = file_path || @check.path
|
138
|
-
File.expand_path path, Dir.pwd
|
139
|
-
end
|
140
|
-
|
141
|
-
def ignores_pattern_check(links)
|
142
|
-
links.each do |ignore|
|
143
|
-
if ignore.is_a? String
|
144
|
-
return true if ignore == url
|
145
|
-
elsif ignore.is_a? Regexp
|
146
|
-
return true if ignore =~ url
|
147
|
-
end
|
148
|
-
end
|
149
|
-
|
150
|
-
false
|
151
|
-
end
|
152
|
-
|
153
|
-
def unslashed_directory?(file)
|
154
|
-
File.directory?(file) && !file.end_with?(File::SEPARATOR) && !follow_location?
|
155
|
-
end
|
156
|
-
|
157
|
-
def follow_location?
|
158
|
-
@check.typhoeus_opts && @check.typhoeus_opts[:followlocation]
|
159
|
-
end
|
160
|
-
|
161
|
-
private
|
162
|
-
|
163
|
-
def real_attr(attr)
|
164
|
-
attr.to_s unless attr.nil? || attr.empty?
|
165
|
-
end
|
166
|
-
end
|
167
|
-
end
|
168
|
-
end
|
@@ -1,46 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
class HtmlCheck < ::HTML::Proofer::CheckRunner
|
4
|
-
# new html5 tags (source: http://www.w3schools.com/html/html5_new_elements.asp)
|
5
|
-
# and svg child tags (source: https://developer.mozilla.org/en-US/docs/Web/SVG/Element)
|
6
|
-
HTML5_TAGS = %w(article aside bdi details dialog figcaption
|
7
|
-
figure footer header main mark menuitem meter
|
8
|
-
nav progress rp rt ruby section summary
|
9
|
-
time wbr datalist keygen output color date
|
10
|
-
datetime datetime-local email month number
|
11
|
-
range search tel time url week canvas
|
12
|
-
svg audio embed source track video
|
13
|
-
altGlyph altGlyphDef altGlyphItem animate
|
14
|
-
animateColor animateMotion animateTransform
|
15
|
-
circle clipPath color-profile cursor defs
|
16
|
-
desc ellipse feBlend feColorMatrix
|
17
|
-
feComponentTransfer feComposite feConvolveMatrix
|
18
|
-
feDiffuseLighting feDisplacementMap feDistantLight
|
19
|
-
feFlood feFuncA feFuncB feFuncG feFuncR feGaussianBlur
|
20
|
-
feImage feMerge feMergeNode feMorphology feOffset
|
21
|
-
fePointLight feSpecularLighting feSpotLight feTile
|
22
|
-
feTurbulence filter font font-face font-face-format
|
23
|
-
font-face-name font-face-src font-face-uri
|
24
|
-
foreignObject g glyph glyphRef hkern image line
|
25
|
-
linearGradient marker mask metadata missing-glyph
|
26
|
-
mpath path pattern polygon polyline radialGradient
|
27
|
-
rect set stop switch symbol text textPath tref tspan use
|
28
|
-
view vkern)
|
29
|
-
|
30
|
-
SCRIPT_EMBEDS_MSG = /Element script embeds close tag/
|
31
|
-
|
32
|
-
def run
|
33
|
-
@html.errors.each do |error|
|
34
|
-
message = error.message
|
35
|
-
line = error.line
|
36
|
-
# Nokogiri (or rather libxml2 underhood) only recognizes html4 tags,
|
37
|
-
# so we need to skip errors caused by the new tags in html5
|
38
|
-
next if HTML5_TAGS.include? message[/Tag ([\w-]+) invalid/o, 1]
|
39
|
-
|
40
|
-
# tags embedded in scripts are used in templating languages: http://git.io/vOovv
|
41
|
-
next if @validation_opts[:ignore_script_embeds] && message =~ SCRIPT_EMBEDS_MSG
|
42
|
-
|
43
|
-
add_issue(message, line)
|
44
|
-
end
|
45
|
-
end
|
46
|
-
end
|
@@ -1,54 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
class ImageCheckable < ::HTML::Proofer::Checkable
|
4
|
-
SCREEN_SHOT_REGEX = /Screen(?: |%20)Shot(?: |%20)\d+-\d+-\d+(?: |%20)at(?: |%20)\d+.\d+.\d+/
|
5
|
-
|
6
|
-
attr_reader :alt
|
7
|
-
|
8
|
-
def empty_alt_tag?
|
9
|
-
alt.strip.empty?
|
10
|
-
end
|
11
|
-
|
12
|
-
def terrible_filename?
|
13
|
-
src =~ SCREEN_SHOT_REGEX
|
14
|
-
end
|
15
|
-
|
16
|
-
def src
|
17
|
-
real_attr(@src) || real_attr(@srcset)
|
18
|
-
end
|
19
|
-
|
20
|
-
def missing_src?
|
21
|
-
!src
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
class ImageCheck < ::HTML::Proofer::CheckRunner
|
26
|
-
def run
|
27
|
-
@html.css('img').each do |node|
|
28
|
-
img = ImageCheckable.new(node, self)
|
29
|
-
line = node.line
|
30
|
-
|
31
|
-
next if img.ignore?
|
32
|
-
|
33
|
-
# screenshot filenames should return because of terrible names
|
34
|
-
next add_issue("image has a terrible filename (#{img.src})", line) if img.terrible_filename?
|
35
|
-
|
36
|
-
# does the image exist?
|
37
|
-
if img.missing_src?
|
38
|
-
add_issue('image has no src or srcset attribute', line)
|
39
|
-
else
|
40
|
-
if img.remote?
|
41
|
-
add_to_external_urls(img.src, line)
|
42
|
-
else
|
43
|
-
add_issue("internal image #{img.src} does not exist", line) unless img.exists?
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
if img.alt.nil? || (img.empty_alt_tag? && !img.ignore_empty_alt?)
|
48
|
-
add_issue("image #{img.src} does not have an alt attribute", line)
|
49
|
-
end
|
50
|
-
end
|
51
|
-
|
52
|
-
external_urls
|
53
|
-
end
|
54
|
-
end
|
@@ -1,40 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
class ScriptCheckable < ::HTML::Proofer::Checkable
|
4
|
-
|
5
|
-
def src
|
6
|
-
real_attr @src
|
7
|
-
end
|
8
|
-
|
9
|
-
def missing_src?
|
10
|
-
!src
|
11
|
-
end
|
12
|
-
|
13
|
-
def blank?
|
14
|
-
@text.strip.empty?
|
15
|
-
end
|
16
|
-
|
17
|
-
end
|
18
|
-
|
19
|
-
class ScriptCheck < ::HTML::Proofer::CheckRunner
|
20
|
-
def run
|
21
|
-
@html.css('script').each do |node|
|
22
|
-
script = ScriptCheckable.new(node, self)
|
23
|
-
line = node.line
|
24
|
-
|
25
|
-
next if script.ignore?
|
26
|
-
next unless script.blank?
|
27
|
-
|
28
|
-
# does the script exist?
|
29
|
-
if script.missing_src?
|
30
|
-
add_issue('script is empty and has no src attribute', line)
|
31
|
-
elsif script.remote?
|
32
|
-
add_to_external_urls(script.src, line)
|
33
|
-
else
|
34
|
-
add_issue("internal script #{script.src} does not exist", line) unless script.exists?
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
external_urls
|
39
|
-
end
|
40
|
-
end
|
@@ -1,48 +0,0 @@
|
|
1
|
-
module HTML
|
2
|
-
class Proofer
|
3
|
-
module Configuration
|
4
|
-
require_relative 'version'
|
5
|
-
|
6
|
-
PROOFER_DEFAULTS = {
|
7
|
-
:allow_hash_href => false,
|
8
|
-
:alt_ignore => [],
|
9
|
-
:check_external_hash => false,
|
10
|
-
:check_favicon => false,
|
11
|
-
:check_html => false,
|
12
|
-
:checks_to_ignore => [],
|
13
|
-
:directory_index_file => 'index.html',
|
14
|
-
:disable_external => false,
|
15
|
-
:empty_alt_ignore => false,
|
16
|
-
:enforce_https => false,
|
17
|
-
:error_sort => :path,
|
18
|
-
:ext => '.html',
|
19
|
-
:external_only => false,
|
20
|
-
:file_ignore => [],
|
21
|
-
:href_ignore => [],
|
22
|
-
:href_swap => [],
|
23
|
-
:only_4xx => false,
|
24
|
-
:url_ignore => [],
|
25
|
-
:verbose => false
|
26
|
-
}
|
27
|
-
|
28
|
-
TYPHOEUS_DEFAULTS = {
|
29
|
-
:followlocation => true,
|
30
|
-
:headers => {
|
31
|
-
'User-Agent' => "Mozilla/5.0 (compatible; HTML Proofer/#{HTML::Proofer::VERSION}; +https://github.com/gjtorikian/html-proofer)"
|
32
|
-
}
|
33
|
-
}
|
34
|
-
|
35
|
-
HYDRA_DEFAULTS = {
|
36
|
-
:max_concurrency => 50
|
37
|
-
}
|
38
|
-
|
39
|
-
def self.to_regex?(item)
|
40
|
-
if item.start_with?('/') && item.end_with?('/')
|
41
|
-
Regexp.new item[1...-1]
|
42
|
-
else
|
43
|
-
item
|
44
|
-
end
|
45
|
-
end
|
46
|
-
end
|
47
|
-
end
|
48
|
-
end
|
data/lib/html/proofer/log.rb
DELETED
@@ -1,42 +0,0 @@
|
|
1
|
-
require 'yell'
|
2
|
-
require 'colored'
|
3
|
-
|
4
|
-
module HTML
|
5
|
-
class Proofer
|
6
|
-
class Log
|
7
|
-
include Yell::Loggable
|
8
|
-
|
9
|
-
def initialize(verbose, verbosity = nil)
|
10
|
-
log_level = if verbosity.nil?
|
11
|
-
verbose ? :debug : :info
|
12
|
-
else
|
13
|
-
verbosity
|
14
|
-
end
|
15
|
-
|
16
|
-
@logger = Yell.new(:format => false, \
|
17
|
-
:name => 'HTML::Proofer', \
|
18
|
-
:level => "gte.#{log_level}") do |l|
|
19
|
-
l.adapter :stdout, :level => [:debug, :info, :warn]
|
20
|
-
l.adapter :stderr, :level => [:error, :fatal]
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
def log(level, color, message)
|
25
|
-
@logger.send level, colorize(color, message)
|
26
|
-
end
|
27
|
-
|
28
|
-
def colorize(color, message)
|
29
|
-
if $stdout.isatty && $stderr.isatty
|
30
|
-
Colored.colorize(message, foreground: color)
|
31
|
-
else
|
32
|
-
message
|
33
|
-
end
|
34
|
-
end
|
35
|
-
|
36
|
-
# dumb override to play nice with Typhoeus/Ethon
|
37
|
-
def debug(message = nil)
|
38
|
-
log(:debug, :yellow, message) unless message.nil?
|
39
|
-
end
|
40
|
-
end
|
41
|
-
end
|
42
|
-
end
|
@@ -1,222 +0,0 @@
|
|
1
|
-
require 'typhoeus'
|
2
|
-
require 'uri'
|
3
|
-
require_relative './utils'
|
4
|
-
require_relative './cache'
|
5
|
-
|
6
|
-
module HTML
|
7
|
-
class Proofer
|
8
|
-
class UrlValidator
|
9
|
-
include HTML::Proofer::Utils
|
10
|
-
|
11
|
-
attr_accessor :logger, :external_urls, :iterable_external_urls, :hydra
|
12
|
-
|
13
|
-
def initialize(logger, external_urls, options, typhoeus_opts, hydra_opts)
|
14
|
-
@logger = logger
|
15
|
-
@external_urls = external_urls
|
16
|
-
@iterable_external_urls = {}
|
17
|
-
@failed_tests = []
|
18
|
-
@options = options
|
19
|
-
@hydra = Typhoeus::Hydra.new(hydra_opts)
|
20
|
-
@typhoeus_opts = typhoeus_opts
|
21
|
-
@external_domain_paths_with_queries = {}
|
22
|
-
@cache = Cache.new(@logger, @options[:cache])
|
23
|
-
end
|
24
|
-
|
25
|
-
def run
|
26
|
-
@iterable_external_urls = remove_query_values
|
27
|
-
|
28
|
-
if @cache.exists && @cache.load
|
29
|
-
cache_count = @cache.cache_log.length
|
30
|
-
cache_text = pluralize(cache_count, 'link', 'links')
|
31
|
-
|
32
|
-
logger.log :info, :blue, "Found #{cache_text} in the cache..."
|
33
|
-
|
34
|
-
urls_to_check = @cache.detect_url_changes(@iterable_external_urls)
|
35
|
-
|
36
|
-
@cache.cache_log.each_pair do |url, cache|
|
37
|
-
if @cache.within_timeframe?(cache['time'])
|
38
|
-
next if cache['message'].empty? # these were successes to skip
|
39
|
-
urls_to_check[url] = cache['filenames'] # these are failures to retry
|
40
|
-
else
|
41
|
-
urls_to_check[url] = cache['filenames'] # pass or fail, recheck expired links
|
42
|
-
end
|
43
|
-
end
|
44
|
-
|
45
|
-
external_link_checker(urls_to_check)
|
46
|
-
else
|
47
|
-
external_link_checker(@iterable_external_urls)
|
48
|
-
end
|
49
|
-
|
50
|
-
@cache.write
|
51
|
-
@failed_tests
|
52
|
-
end
|
53
|
-
|
54
|
-
def remove_query_values
|
55
|
-
return nil if @external_urls.nil?
|
56
|
-
iterable_external_urls = @external_urls.dup
|
57
|
-
@external_urls.keys.each do |url|
|
58
|
-
uri = begin
|
59
|
-
Addressable::URI.parse(url)
|
60
|
-
rescue URI::Error, Addressable::URI::InvalidURIError
|
61
|
-
@logger.log :error, :red, "#{url} is an invalid URL"
|
62
|
-
nil
|
63
|
-
end
|
64
|
-
next if uri.nil? || uri.query.nil?
|
65
|
-
iterable_external_urls.delete(url) unless new_url_query_values?(uri)
|
66
|
-
end
|
67
|
-
iterable_external_urls
|
68
|
-
end
|
69
|
-
|
70
|
-
# remember queries we've seen, ignore future ones
|
71
|
-
def new_url_query_values?(uri)
|
72
|
-
queries = uri.query_values.keys.join('-')
|
73
|
-
domain_path = extract_domain_path(uri)
|
74
|
-
if @external_domain_paths_with_queries[domain_path].nil?
|
75
|
-
@external_domain_paths_with_queries[domain_path] = [queries]
|
76
|
-
true
|
77
|
-
elsif !@external_domain_paths_with_queries[domain_path].include?(queries)
|
78
|
-
@external_domain_paths_with_queries[domain_path] << queries
|
79
|
-
true
|
80
|
-
else
|
81
|
-
false
|
82
|
-
end
|
83
|
-
end
|
84
|
-
|
85
|
-
def extract_domain_path(uri)
|
86
|
-
uri.host + uri.path
|
87
|
-
end
|
88
|
-
|
89
|
-
# Proofer runs faster if we pull out all the external URLs and run the checks
|
90
|
-
# at the end. Otherwise, we're halting the consuming process for every file during
|
91
|
-
# the check_directory_of_files process.
|
92
|
-
#
|
93
|
-
# In addition, sorting the list lets libcurl keep connections to the same hosts alive.
|
94
|
-
#
|
95
|
-
# Finally, we'll first make a HEAD request, rather than GETing all the contents.
|
96
|
-
# If the HEAD fails, we'll fall back to GET, as some servers are not configured
|
97
|
-
# for HEAD. If we've decided to check for hashes, we must do a GET--HEAD is
|
98
|
-
# not an option.
|
99
|
-
def external_link_checker(external_urls)
|
100
|
-
external_urls = Hash[external_urls.sort]
|
101
|
-
|
102
|
-
count = external_urls.length
|
103
|
-
check_text = pluralize(count, 'external link', 'external links')
|
104
|
-
logger.log :info, :blue, "Checking #{check_text}..."
|
105
|
-
|
106
|
-
Ethon.logger = logger # log from Typhoeus/Ethon
|
107
|
-
|
108
|
-
url_processor(external_urls)
|
109
|
-
|
110
|
-
logger.log :debug, :yellow, "Running requests for:"
|
111
|
-
logger.log :debug, :yellow, "###\n" + external_urls.keys.join("\n") + "\n###"
|
112
|
-
|
113
|
-
hydra.run
|
114
|
-
end
|
115
|
-
|
116
|
-
def url_processor(external_urls)
|
117
|
-
external_urls.each_pair do |href, filenames|
|
118
|
-
href = begin
|
119
|
-
clean_url(href)
|
120
|
-
rescue URI::Error, Addressable::URI::InvalidURIError
|
121
|
-
add_external_issue(filenames, "#{href} is an invalid URL")
|
122
|
-
next
|
123
|
-
end
|
124
|
-
|
125
|
-
if hash?(href) && @options[:check_external_hash]
|
126
|
-
queue_request(:get, href, filenames)
|
127
|
-
else
|
128
|
-
queue_request(:head, href, filenames)
|
129
|
-
end
|
130
|
-
end
|
131
|
-
end
|
132
|
-
|
133
|
-
def clean_url(href)
|
134
|
-
Addressable::URI.parse(href).normalize
|
135
|
-
end
|
136
|
-
|
137
|
-
def queue_request(method, href, filenames)
|
138
|
-
request = Typhoeus::Request.new(href, @typhoeus_opts.merge({ :method => method }))
|
139
|
-
request.on_complete { |response| response_handler(response, filenames) }
|
140
|
-
hydra.queue request
|
141
|
-
end
|
142
|
-
|
143
|
-
def response_handler(response, filenames)
|
144
|
-
effective_url = response.options[:effective_url]
|
145
|
-
href = response.request.base_url.to_s
|
146
|
-
method = response.request.options[:method]
|
147
|
-
response_code = response.code
|
148
|
-
|
149
|
-
debug_msg = "Received a #{response_code} for #{href}"
|
150
|
-
debug_msg << " in #{filenames.join(' ')}" unless filenames.nil?
|
151
|
-
logger.log :debug, :yellow, debug_msg
|
152
|
-
|
153
|
-
if response_code.between?(200, 299)
|
154
|
-
check_hash_in_2xx_response(href, effective_url, response, filenames)
|
155
|
-
@cache.add(href, filenames, response_code)
|
156
|
-
elsif response.timed_out?
|
157
|
-
handle_timeout(href, filenames, response_code)
|
158
|
-
elsif response_code == 0
|
159
|
-
handle_failure(href, filenames, response_code)
|
160
|
-
elsif method == :head
|
161
|
-
queue_request(:get, href, filenames)
|
162
|
-
else
|
163
|
-
return if @options[:only_4xx] && !response_code.between?(400, 499)
|
164
|
-
# Received a non-successful http response.
|
165
|
-
msg = "External link #{href} failed: #{response_code} #{response.return_message}"
|
166
|
-
add_external_issue(filenames, msg, response_code)
|
167
|
-
@cache.add(href, filenames, response_code, msg)
|
168
|
-
end
|
169
|
-
end
|
170
|
-
|
171
|
-
# Even though the response was a success, we may have been asked to check
|
172
|
-
# if the hash on the URL exists on the page
|
173
|
-
def check_hash_in_2xx_response(href, effective_url, response, filenames)
|
174
|
-
return if @options[:only_4xx]
|
175
|
-
return unless @options[:check_external_hash]
|
176
|
-
return unless (hash = hash?(href))
|
177
|
-
|
178
|
-
body_doc = create_nokogiri(response.body)
|
179
|
-
|
180
|
-
# user-content is a special addition by GitHub.
|
181
|
-
xpath = %(//*[@name="#{hash}"]|//*[@id="#{hash}"])
|
182
|
-
if URI.parse(href).host.match(/github\.com/i)
|
183
|
-
xpath << %(|//*[@name="user-content-#{hash}"]|//*[@id="user-content-#{hash}"])
|
184
|
-
end
|
185
|
-
|
186
|
-
return unless body_doc.xpath(xpath).empty?
|
187
|
-
|
188
|
-
msg = "External link #{href} failed: #{effective_url} exists, but the hash '#{hash}' does not"
|
189
|
-
add_external_issue(filenames, msg, response.code)
|
190
|
-
@cache.add(href, filenames, response.code, msg)
|
191
|
-
end
|
192
|
-
|
193
|
-
def handle_timeout(href, filenames, response_code)
|
194
|
-
msg = "External link #{href} failed: got a time out (response code #{response_code})"
|
195
|
-
@cache.add(href, filenames, 0, msg)
|
196
|
-
return if @options[:only_4xx]
|
197
|
-
add_external_issue(filenames, msg, response_code)
|
198
|
-
end
|
199
|
-
|
200
|
-
def handle_failure(href, filenames, response_code)
|
201
|
-
msg = "External link #{href} failed: response code #{response_code} means something's wrong"
|
202
|
-
@cache.add(href, filenames, 0, msg)
|
203
|
-
return if @options[:only_4xx]
|
204
|
-
add_external_issue(filenames, msg, response_code)
|
205
|
-
end
|
206
|
-
|
207
|
-
def add_external_issue(filenames, desc, status = nil)
|
208
|
-
if filenames.nil?
|
209
|
-
@failed_tests << CheckRunner::Issue.new('', desc, nil, status)
|
210
|
-
else
|
211
|
-
filenames.each { |f| @failed_tests << CheckRunner::Issue.new(f, desc, nil, status) }
|
212
|
-
end
|
213
|
-
end
|
214
|
-
|
215
|
-
def hash?(url)
|
216
|
-
URI.parse(url).fragment
|
217
|
-
rescue URI::InvalidURIError
|
218
|
-
nil
|
219
|
-
end
|
220
|
-
end
|
221
|
-
end
|
222
|
-
end
|