html-proofer 2.6.4 → 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/{htmlproof → htmlproofer} +31 -24
- data/lib/html-proofer.rb +47 -0
- data/lib/html-proofer/cache.rb +153 -0
- data/lib/html-proofer/check.rb +63 -0
- data/lib/{html/proofer/checks → html-proofer/check}/favicon.rb +2 -8
- data/lib/html-proofer/check/html.rb +21 -0
- data/lib/html-proofer/check/images.rb +47 -0
- data/lib/{html/proofer/checks → html-proofer/check}/links.rb +40 -48
- data/lib/html-proofer/check/scripts.rb +28 -0
- data/lib/html-proofer/configuration.rb +56 -0
- data/lib/html-proofer/element.rb +165 -0
- data/lib/{html/proofer/check_runner → html-proofer}/issue.rb +8 -10
- data/lib/html-proofer/log.rb +49 -0
- data/lib/html-proofer/runner.rb +160 -0
- data/lib/html-proofer/url_validator.rb +218 -0
- data/lib/html-proofer/utils.rb +40 -0
- data/lib/html-proofer/version.rb +3 -0
- metadata +20 -20
- data/lib/html/proofer.rb +0 -191
- data/lib/html/proofer/cache.rb +0 -141
- data/lib/html/proofer/check_runner.rb +0 -70
- data/lib/html/proofer/checkable.rb +0 -168
- data/lib/html/proofer/checks/html.rb +0 -46
- data/lib/html/proofer/checks/images.rb +0 -54
- data/lib/html/proofer/checks/scripts.rb +0 -40
- data/lib/html/proofer/configuration.rb +0 -48
- data/lib/html/proofer/log.rb +0 -42
- data/lib/html/proofer/url_validator.rb +0 -222
- data/lib/html/proofer/utils.rb +0 -42
- data/lib/html/proofer/version.rb +0 -5
- data/lib/html/proofer/xpathfunctions.rb +0 -9
@@ -1,168 +0,0 @@
|
|
1
|
-
require 'addressable/uri'
|
2
|
-
require_relative './utils'
|
3
|
-
|
4
|
-
module HTML
|
5
|
-
class Proofer
|
6
|
-
# Represents the superclass from which all checks derive.
|
7
|
-
class Checkable
|
8
|
-
include HTML::Proofer::Utils
|
9
|
-
|
10
|
-
attr_reader :line
|
11
|
-
|
12
|
-
def initialize(obj, check)
|
13
|
-
obj.attributes.each_pair do |attribute, value|
|
14
|
-
instance_variable_set("@#{attribute.tr('-:.', '_')}".to_sym, value.value)
|
15
|
-
end
|
16
|
-
|
17
|
-
@text = obj.content
|
18
|
-
@check = check
|
19
|
-
@checked_paths = {}
|
20
|
-
@type = self.class.name
|
21
|
-
@line = obj.line
|
22
|
-
|
23
|
-
if @href && @check.options[:href_swap]
|
24
|
-
@href = swap(@href, @check.options[:href_swap])
|
25
|
-
end
|
26
|
-
|
27
|
-
# fix up missing protocols
|
28
|
-
@href.insert 0, 'http:' if @href =~ %r{^//}
|
29
|
-
@src.insert 0, 'http:' if @src =~ %r{^//}
|
30
|
-
end
|
31
|
-
|
32
|
-
def url
|
33
|
-
@src || @srcset || @href || ''
|
34
|
-
end
|
35
|
-
|
36
|
-
def valid?
|
37
|
-
!parts.nil?
|
38
|
-
end
|
39
|
-
|
40
|
-
def parts
|
41
|
-
@parts ||= Addressable::URI.parse url
|
42
|
-
rescue URI::Error, Addressable::URI::InvalidURIError
|
43
|
-
@parts = nil
|
44
|
-
end
|
45
|
-
|
46
|
-
def path
|
47
|
-
Addressable::URI.unencode parts.path unless parts.nil?
|
48
|
-
end
|
49
|
-
|
50
|
-
def hash
|
51
|
-
parts.fragment unless parts.nil?
|
52
|
-
end
|
53
|
-
|
54
|
-
def scheme
|
55
|
-
parts.scheme unless parts.nil?
|
56
|
-
end
|
57
|
-
|
58
|
-
# path is to an external server
|
59
|
-
def remote?
|
60
|
-
%w( http https ).include? scheme
|
61
|
-
end
|
62
|
-
|
63
|
-
def non_http_remote?
|
64
|
-
!scheme.nil? && !remote?
|
65
|
-
end
|
66
|
-
|
67
|
-
def ignore?
|
68
|
-
return true if @data_proofer_ignore
|
69
|
-
|
70
|
-
# ignore base64 encoded images
|
71
|
-
if %w(ImageCheckable FaviconCheckable).include? @type
|
72
|
-
return true if url.match(/^data:image/)
|
73
|
-
end
|
74
|
-
|
75
|
-
# ignore user defined URLs
|
76
|
-
return true if ignores_pattern_check(@check.url_ignores)
|
77
|
-
|
78
|
-
# ignore user defined hrefs
|
79
|
-
if 'LinkCheckable' == @type
|
80
|
-
return true if ignores_pattern_check(@check.href_ignores)
|
81
|
-
end
|
82
|
-
|
83
|
-
# ignore user defined alts
|
84
|
-
if 'ImageCheckable' == @type
|
85
|
-
return true if ignores_pattern_check(@check.alt_ignores)
|
86
|
-
end
|
87
|
-
end
|
88
|
-
|
89
|
-
def ignore_empty_alt?
|
90
|
-
@check.empty_alt_ignore
|
91
|
-
end
|
92
|
-
|
93
|
-
def allow_hash_href?
|
94
|
-
@check.allow_hash_href
|
95
|
-
end
|
96
|
-
|
97
|
-
# path is external to the file
|
98
|
-
def external?
|
99
|
-
!internal?
|
100
|
-
end
|
101
|
-
|
102
|
-
# path is an anchor or a query
|
103
|
-
def internal?
|
104
|
-
url.start_with? '#', '?'
|
105
|
-
end
|
106
|
-
|
107
|
-
def file_path
|
108
|
-
return if path.nil?
|
109
|
-
|
110
|
-
if path =~ %r{^/} # path relative to root
|
111
|
-
base = File.directory?(@check.src) ? @check.src : File.dirname(@check.src)
|
112
|
-
elsif File.exist?(File.expand_path path, @check.src) # relative links, path is a file
|
113
|
-
base = File.dirname @check.path
|
114
|
-
elsif File.exist?(File.join(File.dirname(@check.path), path)) # relative links in nested dir, path is a file
|
115
|
-
base = File.dirname @check.path
|
116
|
-
else # relative link, path is a directory
|
117
|
-
base = @check.path
|
118
|
-
end
|
119
|
-
|
120
|
-
file = File.join base, path
|
121
|
-
|
122
|
-
# implicit index support
|
123
|
-
if File.directory?(file) && !unslashed_directory?(file)
|
124
|
-
file = File.join file, @check.options[:directory_index_file]
|
125
|
-
end
|
126
|
-
|
127
|
-
file
|
128
|
-
end
|
129
|
-
|
130
|
-
# checks if a file exists relative to the current pwd
|
131
|
-
def exists?
|
132
|
-
return @checked_paths[absolute_path] if @checked_paths.key? absolute_path
|
133
|
-
@checked_paths[absolute_path] = File.exist? absolute_path
|
134
|
-
end
|
135
|
-
|
136
|
-
def absolute_path
|
137
|
-
path = file_path || @check.path
|
138
|
-
File.expand_path path, Dir.pwd
|
139
|
-
end
|
140
|
-
|
141
|
-
def ignores_pattern_check(links)
|
142
|
-
links.each do |ignore|
|
143
|
-
if ignore.is_a? String
|
144
|
-
return true if ignore == url
|
145
|
-
elsif ignore.is_a? Regexp
|
146
|
-
return true if ignore =~ url
|
147
|
-
end
|
148
|
-
end
|
149
|
-
|
150
|
-
false
|
151
|
-
end
|
152
|
-
|
153
|
-
def unslashed_directory?(file)
|
154
|
-
File.directory?(file) && !file.end_with?(File::SEPARATOR) && !follow_location?
|
155
|
-
end
|
156
|
-
|
157
|
-
def follow_location?
|
158
|
-
@check.typhoeus_opts && @check.typhoeus_opts[:followlocation]
|
159
|
-
end
|
160
|
-
|
161
|
-
private
|
162
|
-
|
163
|
-
def real_attr(attr)
|
164
|
-
attr.to_s unless attr.nil? || attr.empty?
|
165
|
-
end
|
166
|
-
end
|
167
|
-
end
|
168
|
-
end
|
@@ -1,46 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
class HtmlCheck < ::HTML::Proofer::CheckRunner
|
4
|
-
# new html5 tags (source: http://www.w3schools.com/html/html5_new_elements.asp)
|
5
|
-
# and svg child tags (source: https://developer.mozilla.org/en-US/docs/Web/SVG/Element)
|
6
|
-
HTML5_TAGS = %w(article aside bdi details dialog figcaption
|
7
|
-
figure footer header main mark menuitem meter
|
8
|
-
nav progress rp rt ruby section summary
|
9
|
-
time wbr datalist keygen output color date
|
10
|
-
datetime datetime-local email month number
|
11
|
-
range search tel time url week canvas
|
12
|
-
svg audio embed source track video
|
13
|
-
altGlyph altGlyphDef altGlyphItem animate
|
14
|
-
animateColor animateMotion animateTransform
|
15
|
-
circle clipPath color-profile cursor defs
|
16
|
-
desc ellipse feBlend feColorMatrix
|
17
|
-
feComponentTransfer feComposite feConvolveMatrix
|
18
|
-
feDiffuseLighting feDisplacementMap feDistantLight
|
19
|
-
feFlood feFuncA feFuncB feFuncG feFuncR feGaussianBlur
|
20
|
-
feImage feMerge feMergeNode feMorphology feOffset
|
21
|
-
fePointLight feSpecularLighting feSpotLight feTile
|
22
|
-
feTurbulence filter font font-face font-face-format
|
23
|
-
font-face-name font-face-src font-face-uri
|
24
|
-
foreignObject g glyph glyphRef hkern image line
|
25
|
-
linearGradient marker mask metadata missing-glyph
|
26
|
-
mpath path pattern polygon polyline radialGradient
|
27
|
-
rect set stop switch symbol text textPath tref tspan use
|
28
|
-
view vkern)
|
29
|
-
|
30
|
-
SCRIPT_EMBEDS_MSG = /Element script embeds close tag/
|
31
|
-
|
32
|
-
def run
|
33
|
-
@html.errors.each do |error|
|
34
|
-
message = error.message
|
35
|
-
line = error.line
|
36
|
-
# Nokogiri (or rather libxml2 underhood) only recognizes html4 tags,
|
37
|
-
# so we need to skip errors caused by the new tags in html5
|
38
|
-
next if HTML5_TAGS.include? message[/Tag ([\w-]+) invalid/o, 1]
|
39
|
-
|
40
|
-
# tags embedded in scripts are used in templating languages: http://git.io/vOovv
|
41
|
-
next if @validation_opts[:ignore_script_embeds] && message =~ SCRIPT_EMBEDS_MSG
|
42
|
-
|
43
|
-
add_issue(message, line)
|
44
|
-
end
|
45
|
-
end
|
46
|
-
end
|
@@ -1,54 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
class ImageCheckable < ::HTML::Proofer::Checkable
|
4
|
-
SCREEN_SHOT_REGEX = /Screen(?: |%20)Shot(?: |%20)\d+-\d+-\d+(?: |%20)at(?: |%20)\d+.\d+.\d+/
|
5
|
-
|
6
|
-
attr_reader :alt
|
7
|
-
|
8
|
-
def empty_alt_tag?
|
9
|
-
alt.strip.empty?
|
10
|
-
end
|
11
|
-
|
12
|
-
def terrible_filename?
|
13
|
-
src =~ SCREEN_SHOT_REGEX
|
14
|
-
end
|
15
|
-
|
16
|
-
def src
|
17
|
-
real_attr(@src) || real_attr(@srcset)
|
18
|
-
end
|
19
|
-
|
20
|
-
def missing_src?
|
21
|
-
!src
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
class ImageCheck < ::HTML::Proofer::CheckRunner
|
26
|
-
def run
|
27
|
-
@html.css('img').each do |node|
|
28
|
-
img = ImageCheckable.new(node, self)
|
29
|
-
line = node.line
|
30
|
-
|
31
|
-
next if img.ignore?
|
32
|
-
|
33
|
-
# screenshot filenames should return because of terrible names
|
34
|
-
next add_issue("image has a terrible filename (#{img.src})", line) if img.terrible_filename?
|
35
|
-
|
36
|
-
# does the image exist?
|
37
|
-
if img.missing_src?
|
38
|
-
add_issue('image has no src or srcset attribute', line)
|
39
|
-
else
|
40
|
-
if img.remote?
|
41
|
-
add_to_external_urls(img.src, line)
|
42
|
-
else
|
43
|
-
add_issue("internal image #{img.src} does not exist", line) unless img.exists?
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
if img.alt.nil? || (img.empty_alt_tag? && !img.ignore_empty_alt?)
|
48
|
-
add_issue("image #{img.src} does not have an alt attribute", line)
|
49
|
-
end
|
50
|
-
end
|
51
|
-
|
52
|
-
external_urls
|
53
|
-
end
|
54
|
-
end
|
@@ -1,40 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
class ScriptCheckable < ::HTML::Proofer::Checkable
|
4
|
-
|
5
|
-
def src
|
6
|
-
real_attr @src
|
7
|
-
end
|
8
|
-
|
9
|
-
def missing_src?
|
10
|
-
!src
|
11
|
-
end
|
12
|
-
|
13
|
-
def blank?
|
14
|
-
@text.strip.empty?
|
15
|
-
end
|
16
|
-
|
17
|
-
end
|
18
|
-
|
19
|
-
class ScriptCheck < ::HTML::Proofer::CheckRunner
|
20
|
-
def run
|
21
|
-
@html.css('script').each do |node|
|
22
|
-
script = ScriptCheckable.new(node, self)
|
23
|
-
line = node.line
|
24
|
-
|
25
|
-
next if script.ignore?
|
26
|
-
next unless script.blank?
|
27
|
-
|
28
|
-
# does the script exist?
|
29
|
-
if script.missing_src?
|
30
|
-
add_issue('script is empty and has no src attribute', line)
|
31
|
-
elsif script.remote?
|
32
|
-
add_to_external_urls(script.src, line)
|
33
|
-
else
|
34
|
-
add_issue("internal script #{script.src} does not exist", line) unless script.exists?
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
external_urls
|
39
|
-
end
|
40
|
-
end
|
@@ -1,48 +0,0 @@
|
|
1
|
-
module HTML
|
2
|
-
class Proofer
|
3
|
-
module Configuration
|
4
|
-
require_relative 'version'
|
5
|
-
|
6
|
-
PROOFER_DEFAULTS = {
|
7
|
-
:allow_hash_href => false,
|
8
|
-
:alt_ignore => [],
|
9
|
-
:check_external_hash => false,
|
10
|
-
:check_favicon => false,
|
11
|
-
:check_html => false,
|
12
|
-
:checks_to_ignore => [],
|
13
|
-
:directory_index_file => 'index.html',
|
14
|
-
:disable_external => false,
|
15
|
-
:empty_alt_ignore => false,
|
16
|
-
:enforce_https => false,
|
17
|
-
:error_sort => :path,
|
18
|
-
:ext => '.html',
|
19
|
-
:external_only => false,
|
20
|
-
:file_ignore => [],
|
21
|
-
:href_ignore => [],
|
22
|
-
:href_swap => [],
|
23
|
-
:only_4xx => false,
|
24
|
-
:url_ignore => [],
|
25
|
-
:verbose => false
|
26
|
-
}
|
27
|
-
|
28
|
-
TYPHOEUS_DEFAULTS = {
|
29
|
-
:followlocation => true,
|
30
|
-
:headers => {
|
31
|
-
'User-Agent' => "Mozilla/5.0 (compatible; HTML Proofer/#{HTML::Proofer::VERSION}; +https://github.com/gjtorikian/html-proofer)"
|
32
|
-
}
|
33
|
-
}
|
34
|
-
|
35
|
-
HYDRA_DEFAULTS = {
|
36
|
-
:max_concurrency => 50
|
37
|
-
}
|
38
|
-
|
39
|
-
def self.to_regex?(item)
|
40
|
-
if item.start_with?('/') && item.end_with?('/')
|
41
|
-
Regexp.new item[1...-1]
|
42
|
-
else
|
43
|
-
item
|
44
|
-
end
|
45
|
-
end
|
46
|
-
end
|
47
|
-
end
|
48
|
-
end
|
data/lib/html/proofer/log.rb
DELETED
@@ -1,42 +0,0 @@
|
|
1
|
-
require 'yell'
|
2
|
-
require 'colored'
|
3
|
-
|
4
|
-
module HTML
|
5
|
-
class Proofer
|
6
|
-
class Log
|
7
|
-
include Yell::Loggable
|
8
|
-
|
9
|
-
def initialize(verbose, verbosity = nil)
|
10
|
-
log_level = if verbosity.nil?
|
11
|
-
verbose ? :debug : :info
|
12
|
-
else
|
13
|
-
verbosity
|
14
|
-
end
|
15
|
-
|
16
|
-
@logger = Yell.new(:format => false, \
|
17
|
-
:name => 'HTML::Proofer', \
|
18
|
-
:level => "gte.#{log_level}") do |l|
|
19
|
-
l.adapter :stdout, :level => [:debug, :info, :warn]
|
20
|
-
l.adapter :stderr, :level => [:error, :fatal]
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
def log(level, color, message)
|
25
|
-
@logger.send level, colorize(color, message)
|
26
|
-
end
|
27
|
-
|
28
|
-
def colorize(color, message)
|
29
|
-
if $stdout.isatty && $stderr.isatty
|
30
|
-
Colored.colorize(message, foreground: color)
|
31
|
-
else
|
32
|
-
message
|
33
|
-
end
|
34
|
-
end
|
35
|
-
|
36
|
-
# dumb override to play nice with Typhoeus/Ethon
|
37
|
-
def debug(message = nil)
|
38
|
-
log(:debug, :yellow, message) unless message.nil?
|
39
|
-
end
|
40
|
-
end
|
41
|
-
end
|
42
|
-
end
|
@@ -1,222 +0,0 @@
|
|
1
|
-
require 'typhoeus'
|
2
|
-
require 'uri'
|
3
|
-
require_relative './utils'
|
4
|
-
require_relative './cache'
|
5
|
-
|
6
|
-
module HTML
|
7
|
-
class Proofer
|
8
|
-
class UrlValidator
|
9
|
-
include HTML::Proofer::Utils
|
10
|
-
|
11
|
-
attr_accessor :logger, :external_urls, :iterable_external_urls, :hydra
|
12
|
-
|
13
|
-
def initialize(logger, external_urls, options, typhoeus_opts, hydra_opts)
|
14
|
-
@logger = logger
|
15
|
-
@external_urls = external_urls
|
16
|
-
@iterable_external_urls = {}
|
17
|
-
@failed_tests = []
|
18
|
-
@options = options
|
19
|
-
@hydra = Typhoeus::Hydra.new(hydra_opts)
|
20
|
-
@typhoeus_opts = typhoeus_opts
|
21
|
-
@external_domain_paths_with_queries = {}
|
22
|
-
@cache = Cache.new(@logger, @options[:cache])
|
23
|
-
end
|
24
|
-
|
25
|
-
def run
|
26
|
-
@iterable_external_urls = remove_query_values
|
27
|
-
|
28
|
-
if @cache.exists && @cache.load
|
29
|
-
cache_count = @cache.cache_log.length
|
30
|
-
cache_text = pluralize(cache_count, 'link', 'links')
|
31
|
-
|
32
|
-
logger.log :info, :blue, "Found #{cache_text} in the cache..."
|
33
|
-
|
34
|
-
urls_to_check = @cache.detect_url_changes(@iterable_external_urls)
|
35
|
-
|
36
|
-
@cache.cache_log.each_pair do |url, cache|
|
37
|
-
if @cache.within_timeframe?(cache['time'])
|
38
|
-
next if cache['message'].empty? # these were successes to skip
|
39
|
-
urls_to_check[url] = cache['filenames'] # these are failures to retry
|
40
|
-
else
|
41
|
-
urls_to_check[url] = cache['filenames'] # pass or fail, recheck expired links
|
42
|
-
end
|
43
|
-
end
|
44
|
-
|
45
|
-
external_link_checker(urls_to_check)
|
46
|
-
else
|
47
|
-
external_link_checker(@iterable_external_urls)
|
48
|
-
end
|
49
|
-
|
50
|
-
@cache.write
|
51
|
-
@failed_tests
|
52
|
-
end
|
53
|
-
|
54
|
-
def remove_query_values
|
55
|
-
return nil if @external_urls.nil?
|
56
|
-
iterable_external_urls = @external_urls.dup
|
57
|
-
@external_urls.keys.each do |url|
|
58
|
-
uri = begin
|
59
|
-
Addressable::URI.parse(url)
|
60
|
-
rescue URI::Error, Addressable::URI::InvalidURIError
|
61
|
-
@logger.log :error, :red, "#{url} is an invalid URL"
|
62
|
-
nil
|
63
|
-
end
|
64
|
-
next if uri.nil? || uri.query.nil?
|
65
|
-
iterable_external_urls.delete(url) unless new_url_query_values?(uri)
|
66
|
-
end
|
67
|
-
iterable_external_urls
|
68
|
-
end
|
69
|
-
|
70
|
-
# remember queries we've seen, ignore future ones
|
71
|
-
def new_url_query_values?(uri)
|
72
|
-
queries = uri.query_values.keys.join('-')
|
73
|
-
domain_path = extract_domain_path(uri)
|
74
|
-
if @external_domain_paths_with_queries[domain_path].nil?
|
75
|
-
@external_domain_paths_with_queries[domain_path] = [queries]
|
76
|
-
true
|
77
|
-
elsif !@external_domain_paths_with_queries[domain_path].include?(queries)
|
78
|
-
@external_domain_paths_with_queries[domain_path] << queries
|
79
|
-
true
|
80
|
-
else
|
81
|
-
false
|
82
|
-
end
|
83
|
-
end
|
84
|
-
|
85
|
-
def extract_domain_path(uri)
|
86
|
-
uri.host + uri.path
|
87
|
-
end
|
88
|
-
|
89
|
-
# Proofer runs faster if we pull out all the external URLs and run the checks
|
90
|
-
# at the end. Otherwise, we're halting the consuming process for every file during
|
91
|
-
# the check_directory_of_files process.
|
92
|
-
#
|
93
|
-
# In addition, sorting the list lets libcurl keep connections to the same hosts alive.
|
94
|
-
#
|
95
|
-
# Finally, we'll first make a HEAD request, rather than GETing all the contents.
|
96
|
-
# If the HEAD fails, we'll fall back to GET, as some servers are not configured
|
97
|
-
# for HEAD. If we've decided to check for hashes, we must do a GET--HEAD is
|
98
|
-
# not an option.
|
99
|
-
def external_link_checker(external_urls)
|
100
|
-
external_urls = Hash[external_urls.sort]
|
101
|
-
|
102
|
-
count = external_urls.length
|
103
|
-
check_text = pluralize(count, 'external link', 'external links')
|
104
|
-
logger.log :info, :blue, "Checking #{check_text}..."
|
105
|
-
|
106
|
-
Ethon.logger = logger # log from Typhoeus/Ethon
|
107
|
-
|
108
|
-
url_processor(external_urls)
|
109
|
-
|
110
|
-
logger.log :debug, :yellow, "Running requests for:"
|
111
|
-
logger.log :debug, :yellow, "###\n" + external_urls.keys.join("\n") + "\n###"
|
112
|
-
|
113
|
-
hydra.run
|
114
|
-
end
|
115
|
-
|
116
|
-
def url_processor(external_urls)
|
117
|
-
external_urls.each_pair do |href, filenames|
|
118
|
-
href = begin
|
119
|
-
clean_url(href)
|
120
|
-
rescue URI::Error, Addressable::URI::InvalidURIError
|
121
|
-
add_external_issue(filenames, "#{href} is an invalid URL")
|
122
|
-
next
|
123
|
-
end
|
124
|
-
|
125
|
-
if hash?(href) && @options[:check_external_hash]
|
126
|
-
queue_request(:get, href, filenames)
|
127
|
-
else
|
128
|
-
queue_request(:head, href, filenames)
|
129
|
-
end
|
130
|
-
end
|
131
|
-
end
|
132
|
-
|
133
|
-
def clean_url(href)
|
134
|
-
Addressable::URI.parse(href).normalize
|
135
|
-
end
|
136
|
-
|
137
|
-
def queue_request(method, href, filenames)
|
138
|
-
request = Typhoeus::Request.new(href, @typhoeus_opts.merge({ :method => method }))
|
139
|
-
request.on_complete { |response| response_handler(response, filenames) }
|
140
|
-
hydra.queue request
|
141
|
-
end
|
142
|
-
|
143
|
-
def response_handler(response, filenames)
|
144
|
-
effective_url = response.options[:effective_url]
|
145
|
-
href = response.request.base_url.to_s
|
146
|
-
method = response.request.options[:method]
|
147
|
-
response_code = response.code
|
148
|
-
|
149
|
-
debug_msg = "Received a #{response_code} for #{href}"
|
150
|
-
debug_msg << " in #{filenames.join(' ')}" unless filenames.nil?
|
151
|
-
logger.log :debug, :yellow, debug_msg
|
152
|
-
|
153
|
-
if response_code.between?(200, 299)
|
154
|
-
check_hash_in_2xx_response(href, effective_url, response, filenames)
|
155
|
-
@cache.add(href, filenames, response_code)
|
156
|
-
elsif response.timed_out?
|
157
|
-
handle_timeout(href, filenames, response_code)
|
158
|
-
elsif response_code == 0
|
159
|
-
handle_failure(href, filenames, response_code)
|
160
|
-
elsif method == :head
|
161
|
-
queue_request(:get, href, filenames)
|
162
|
-
else
|
163
|
-
return if @options[:only_4xx] && !response_code.between?(400, 499)
|
164
|
-
# Received a non-successful http response.
|
165
|
-
msg = "External link #{href} failed: #{response_code} #{response.return_message}"
|
166
|
-
add_external_issue(filenames, msg, response_code)
|
167
|
-
@cache.add(href, filenames, response_code, msg)
|
168
|
-
end
|
169
|
-
end
|
170
|
-
|
171
|
-
# Even though the response was a success, we may have been asked to check
|
172
|
-
# if the hash on the URL exists on the page
|
173
|
-
def check_hash_in_2xx_response(href, effective_url, response, filenames)
|
174
|
-
return if @options[:only_4xx]
|
175
|
-
return unless @options[:check_external_hash]
|
176
|
-
return unless (hash = hash?(href))
|
177
|
-
|
178
|
-
body_doc = create_nokogiri(response.body)
|
179
|
-
|
180
|
-
# user-content is a special addition by GitHub.
|
181
|
-
xpath = %(//*[@name="#{hash}"]|//*[@id="#{hash}"])
|
182
|
-
if URI.parse(href).host.match(/github\.com/i)
|
183
|
-
xpath << %(|//*[@name="user-content-#{hash}"]|//*[@id="user-content-#{hash}"])
|
184
|
-
end
|
185
|
-
|
186
|
-
return unless body_doc.xpath(xpath).empty?
|
187
|
-
|
188
|
-
msg = "External link #{href} failed: #{effective_url} exists, but the hash '#{hash}' does not"
|
189
|
-
add_external_issue(filenames, msg, response.code)
|
190
|
-
@cache.add(href, filenames, response.code, msg)
|
191
|
-
end
|
192
|
-
|
193
|
-
def handle_timeout(href, filenames, response_code)
|
194
|
-
msg = "External link #{href} failed: got a time out (response code #{response_code})"
|
195
|
-
@cache.add(href, filenames, 0, msg)
|
196
|
-
return if @options[:only_4xx]
|
197
|
-
add_external_issue(filenames, msg, response_code)
|
198
|
-
end
|
199
|
-
|
200
|
-
def handle_failure(href, filenames, response_code)
|
201
|
-
msg = "External link #{href} failed: response code #{response_code} means something's wrong"
|
202
|
-
@cache.add(href, filenames, 0, msg)
|
203
|
-
return if @options[:only_4xx]
|
204
|
-
add_external_issue(filenames, msg, response_code)
|
205
|
-
end
|
206
|
-
|
207
|
-
def add_external_issue(filenames, desc, status = nil)
|
208
|
-
if filenames.nil?
|
209
|
-
@failed_tests << CheckRunner::Issue.new('', desc, nil, status)
|
210
|
-
else
|
211
|
-
filenames.each { |f| @failed_tests << CheckRunner::Issue.new(f, desc, nil, status) }
|
212
|
-
end
|
213
|
-
end
|
214
|
-
|
215
|
-
def hash?(url)
|
216
|
-
URI.parse(url).fragment
|
217
|
-
rescue URI::InvalidURIError
|
218
|
-
nil
|
219
|
-
end
|
220
|
-
end
|
221
|
-
end
|
222
|
-
end
|