html-proofer 1.6.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +1 -1
- data/README.md +74 -56
- data/Rakefile +4 -6
- data/bin/htmlproof +46 -36
- data/html-proofer.gemspec +22 -22
- data/lib/html/proofer/check_runner/issue.rb +62 -0
- data/lib/html/proofer/{check.rb → check_runner.rb} +11 -19
- data/lib/html/proofer/checkable.rb +42 -28
- data/lib/html/proofer/checks/favicon.rb +6 -6
- data/lib/html/proofer/checks/html.rb +11 -12
- data/lib/html/proofer/checks/images.rb +11 -11
- data/lib/html/proofer/checks/links.rb +30 -28
- data/lib/html/proofer/checks/scripts.rb +7 -8
- data/lib/html/proofer/log.rb +38 -0
- data/lib/html/proofer/url_validator.rb +135 -0
- data/lib/html/proofer/utils.rb +24 -0
- data/lib/html/proofer/version.rb +1 -1
- data/lib/html/proofer.rb +95 -199
- data/spec/html/proofer/command_spec.rb +82 -0
- data/spec/html/proofer/favicon_spec.rb +20 -20
- data/spec/html/proofer/fixtures/images/srcSetCheck.html +7 -0
- data/spec/html/proofer/fixtures/images/srcSetIgnorable.html +13 -0
- data/spec/html/proofer/fixtures/images/srcSetMissingAlt.html +7 -0
- data/spec/html/proofer/fixtures/images/srcSetMissingImage.html +7 -0
- data/spec/html/proofer/fixtures/links/erstiebegru/314/210/303/237ung.html +1 -0
- data/spec/html/proofer/fixtures/links/erstiebegr/303/274/303/237ung.html +1 -0
- data/spec/html/proofer/fixtures/links/file.foo +11 -0
- data/spec/html/proofer/fixtures/links/folder/multiples/catalog/file.html +8 -0
- data/spec/html/proofer/fixtures/links/folder/multiples/javadoc/file.html +8 -0
- data/spec/html/proofer/fixtures/links/nodupe.html +1 -1
- data/spec/html/proofer/fixtures/links/redirected_error.html +1 -0
- data/spec/html/proofer/fixtures/links/rootLink/rootLink.html +0 -1
- data/spec/html/proofer/fixtures/links/urlencoded-href.html +2 -0
- data/spec/html/proofer/fixtures/links/utf8Link.html +2 -0
- data/spec/html/proofer/fixtures/utils/lang-jp.html +1 -0
- data/spec/html/proofer/html_spec.rb +25 -25
- data/spec/html/proofer/images_spec.rb +59 -35
- data/spec/html/proofer/links_spec.rb +152 -109
- data/spec/html/proofer/scripts_spec.rb +17 -17
- data/spec/html/proofer/utils_spec.rb +14 -0
- data/spec/html/proofer_spec.rb +58 -38
- data/spec/spec_helper.rb +13 -6
- metadata +39 -7
- data/lib/html/proofer/checks.rb +0 -15
- data/lib/html/proofer/issue.rb +0 -21
@@ -1,34 +1,37 @@
|
|
1
|
+
require 'addressable/uri'
|
2
|
+
require_relative './utils'
|
3
|
+
|
1
4
|
module HTML
|
2
5
|
class Proofer
|
6
|
+
# Represents the superclass from which all checks derive.
|
3
7
|
class Checkable
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
8
|
+
include HTML::Utils
|
9
|
+
attr_reader :line
|
10
|
+
|
11
|
+
def initialize(obj, check)
|
12
|
+
obj.attributes.each_pair do |attribute, value|
|
13
|
+
next if attribute == 'data-proofer-ignore' # TODO: not quite sure why this doesn't work
|
14
|
+
instance_variable_set("@#{attribute}".to_sym, value.value)
|
15
|
+
end
|
11
16
|
|
12
17
|
@data_ignore_proofer = obj['data-proofer-ignore']
|
13
18
|
@content = obj.content
|
14
19
|
@check = check
|
15
20
|
@checked_paths = {}
|
16
|
-
@type =
|
21
|
+
@type = self.class.name
|
22
|
+
@line = obj.line
|
17
23
|
|
18
24
|
if @href && @check.options[:href_swap]
|
19
|
-
@check.options[:href_swap]
|
20
|
-
@href = @href.gsub(link, replace)
|
21
|
-
end
|
25
|
+
@href = swap(@href, @check.options[:href_swap])
|
22
26
|
end
|
23
27
|
|
24
28
|
# fix up missing protocols
|
25
|
-
@href.insert 0,
|
26
|
-
@src.insert 0,
|
27
|
-
|
29
|
+
@href.insert 0, 'http:' if @href =~ %r{^//}
|
30
|
+
@src.insert 0, 'http:' if @src =~ %r{^//}
|
28
31
|
end
|
29
32
|
|
30
33
|
def url
|
31
|
-
@src || @href ||
|
34
|
+
@src || @srcset || @href || ''
|
32
35
|
end
|
33
36
|
|
34
37
|
def valid?
|
@@ -42,15 +45,15 @@ module HTML
|
|
42
45
|
end
|
43
46
|
|
44
47
|
def path
|
45
|
-
parts.path
|
48
|
+
CGI.unescape parts.path unless parts.nil?
|
46
49
|
end
|
47
50
|
|
48
51
|
def hash
|
49
|
-
parts.fragment
|
52
|
+
parts.fragment unless parts.nil?
|
50
53
|
end
|
51
54
|
|
52
55
|
def scheme
|
53
|
-
parts.scheme
|
56
|
+
parts.scheme unless parts.nil?
|
54
57
|
end
|
55
58
|
|
56
59
|
# path is to an external server
|
@@ -66,13 +69,13 @@ module HTML
|
|
66
69
|
return true if @data_ignore_proofer
|
67
70
|
|
68
71
|
case @type
|
69
|
-
when
|
72
|
+
when 'FaviconCheckable'
|
70
73
|
return true if url.match(/^data:image/)
|
71
|
-
when
|
72
|
-
return true if ignores_pattern_check(@check.
|
73
|
-
when
|
74
|
+
when 'LinkCheckable'
|
75
|
+
return true if ignores_pattern_check(@check.href_ignores)
|
76
|
+
when 'ImageCheckable'
|
74
77
|
return true if url.match(/^data:image/)
|
75
|
-
return true if ignores_pattern_check(@check.
|
78
|
+
return true if ignores_pattern_check(@check.alt_ignores)
|
76
79
|
end
|
77
80
|
end
|
78
81
|
|
@@ -83,7 +86,7 @@ module HTML
|
|
83
86
|
|
84
87
|
# path is an anchor or a query
|
85
88
|
def internal?
|
86
|
-
url.start_with?
|
89
|
+
url.start_with? '#', '?'
|
87
90
|
end
|
88
91
|
|
89
92
|
def file_path
|
@@ -102,7 +105,7 @@ module HTML
|
|
102
105
|
file = File.join base, path
|
103
106
|
|
104
107
|
# implicit index support
|
105
|
-
if File.directory?
|
108
|
+
if File.directory?(file) && !unslashed_directory?(file)
|
106
109
|
file = File.join file, @check.options[:directory_index_file]
|
107
110
|
end
|
108
111
|
|
@@ -111,7 +114,7 @@ module HTML
|
|
111
114
|
|
112
115
|
# checks if a file exists relative to the current pwd
|
113
116
|
def exists?
|
114
|
-
return @checked_paths[absolute_path] if @checked_paths.
|
117
|
+
return @checked_paths[absolute_path] if @checked_paths.key? absolute_path
|
115
118
|
@checked_paths[absolute_path] = File.exist? absolute_path
|
116
119
|
end
|
117
120
|
|
@@ -132,9 +135,20 @@ module HTML
|
|
132
135
|
false
|
133
136
|
end
|
134
137
|
|
135
|
-
def unslashed_directory?
|
136
|
-
File.directory?
|
138
|
+
def unslashed_directory?(file)
|
139
|
+
File.directory?(file) && !file.end_with?(File::SEPARATOR) && !follow_location?
|
137
140
|
end
|
141
|
+
|
142
|
+
def follow_location?
|
143
|
+
@check.options[:typhoeus] && @check.options[:typhoeus][:followlocation]
|
144
|
+
end
|
145
|
+
|
146
|
+
private
|
147
|
+
|
148
|
+
def real_attr(attr)
|
149
|
+
attr.to_s unless attr.nil? || attr.empty?
|
150
|
+
end
|
151
|
+
|
138
152
|
end
|
139
153
|
end
|
140
154
|
end
|
@@ -1,21 +1,21 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
|
-
class
|
3
|
+
class FaviconCheckable < ::HTML::Proofer::Checkable
|
4
4
|
def rel
|
5
5
|
@rel
|
6
6
|
end
|
7
7
|
end
|
8
8
|
|
9
|
-
class
|
9
|
+
class FaviconCheck < ::HTML::Proofer::CheckRunner
|
10
10
|
|
11
11
|
def run
|
12
|
-
@html.xpath(
|
13
|
-
favicon =
|
12
|
+
@html.xpath('//link[not(ancestor::pre or ancestor::code)]').each do |favicon|
|
13
|
+
favicon = FaviconCheckable.new favicon, self
|
14
14
|
next if favicon.ignore?
|
15
|
-
return if favicon.rel.split(
|
15
|
+
return if favicon.rel.split(' ').last.eql? 'icon'
|
16
16
|
end
|
17
17
|
|
18
|
-
|
18
|
+
add_issue 'no favicon specified'
|
19
19
|
end
|
20
20
|
|
21
21
|
end
|
@@ -1,24 +1,23 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
|
-
class
|
3
|
+
class HtmlCheck < ::HTML::Proofer::CheckRunner
|
4
4
|
|
5
5
|
# new html5 tags (source: http://www.w3schools.com/html/html5_new_elements.asp)
|
6
|
-
HTML5_TAGS = %w(article aside bdi details dialog figcaption
|
7
|
-
figure footer header main mark menuitem meter
|
8
|
-
nav progress rp rt ruby section summary
|
9
|
-
time wbr datalist keygen output color date
|
10
|
-
datetime datetime-local email month number
|
11
|
-
range search tel time url week canvas
|
6
|
+
HTML5_TAGS = %w(article aside bdi details dialog figcaption
|
7
|
+
figure footer header main mark menuitem meter
|
8
|
+
nav progress rp rt ruby section summary
|
9
|
+
time wbr datalist keygen output color date
|
10
|
+
datetime datetime-local email month number
|
11
|
+
range search tel time url week canvas
|
12
12
|
svg audio embed source track video)
|
13
13
|
|
14
14
|
def run
|
15
15
|
@html.errors.each do |e|
|
16
|
+
# Nokogiri (or rather libxml2 underhood) only recognizes html4 tags,
|
17
|
+
# so we need to skip errors caused by the new tags in html5
|
18
|
+
next if HTML5_TAGS.include? e.to_s[/Tag ([\w-]+) invalid/o, 1]
|
16
19
|
|
17
|
-
|
18
|
-
# so we need to skip errors caused by the new tags in html5
|
19
|
-
next if HTML5_TAGS.include? e.to_s[/Tag ([\w-]+) invalid/o, 1]
|
20
|
-
|
21
|
-
self.add_issue(e.to_s)
|
20
|
+
add_issue(e.to_s)
|
22
21
|
end
|
23
22
|
end
|
24
23
|
end
|
@@ -1,19 +1,19 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
|
-
class
|
3
|
+
class ImageCheckable < ::HTML::Proofer::Checkable
|
4
4
|
|
5
5
|
SCREEN_SHOT_REGEX = /Screen(?: |%20)Shot(?: |%20)\d+-\d+-\d+(?: |%20)at(?: |%20)\d+.\d+.\d+/
|
6
6
|
|
7
7
|
def valid_alt_tag?
|
8
|
-
@alt
|
8
|
+
@alt && !@alt.empty?
|
9
9
|
end
|
10
10
|
|
11
11
|
def terrible_filename?
|
12
|
-
|
12
|
+
src =~ SCREEN_SHOT_REGEX
|
13
13
|
end
|
14
14
|
|
15
15
|
def src
|
16
|
-
@src
|
16
|
+
real_attr(@src) || real_attr(@srcset)
|
17
17
|
end
|
18
18
|
|
19
19
|
def missing_src?
|
@@ -22,29 +22,29 @@ class Image < ::HTML::Proofer::Checkable
|
|
22
22
|
|
23
23
|
end
|
24
24
|
|
25
|
-
class
|
25
|
+
class ImageCheck < ::HTML::Proofer::CheckRunner
|
26
26
|
def run
|
27
|
-
@html.css(
|
28
|
-
img =
|
27
|
+
@html.css('img').each do |i|
|
28
|
+
img = ImageCheckable.new i, self
|
29
29
|
|
30
30
|
next if img.ignore?
|
31
31
|
|
32
32
|
# screenshot filenames should return because of terrible names
|
33
|
-
next
|
33
|
+
next add_issue("image has a terrible filename (#{img.src})", i.line) if img.terrible_filename?
|
34
34
|
|
35
35
|
# does the image exist?
|
36
36
|
if img.missing_src?
|
37
|
-
|
37
|
+
add_issue('image has no src or srcset attribute', i.line)
|
38
38
|
else
|
39
39
|
if img.remote?
|
40
40
|
add_to_external_urls img.src
|
41
41
|
else
|
42
|
-
|
42
|
+
add_issue("internal image #{img.src} does not exist", i.line) unless img.exists?
|
43
43
|
end
|
44
44
|
end
|
45
45
|
|
46
46
|
# check alt tag
|
47
|
-
|
47
|
+
add_issue("image #{img.src} does not have an alt attribute", i.line) unless img.valid_alt_tag?
|
48
48
|
end
|
49
49
|
|
50
50
|
external_urls
|
@@ -1,6 +1,5 @@
|
|
1
1
|
# encoding: utf-8
|
2
|
-
|
3
|
-
class Link < ::HTML::Proofer::Checkable
|
2
|
+
class LinkCheckable < ::HTML::Proofer::Checkable
|
4
3
|
|
5
4
|
def href
|
6
5
|
real_attr @href
|
@@ -15,26 +14,21 @@ class Link < ::HTML::Proofer::Checkable
|
|
15
14
|
end
|
16
15
|
|
17
16
|
def missing_href?
|
18
|
-
href.nil?
|
17
|
+
href.nil? && name.nil? && id.nil?
|
19
18
|
end
|
20
19
|
|
21
20
|
def placeholder?
|
22
21
|
(id || name) && href.nil?
|
23
22
|
end
|
24
23
|
|
25
|
-
private
|
26
|
-
|
27
|
-
def real_attr(attr)
|
28
|
-
attr unless attr.nil? || attr.empty?
|
29
|
-
end
|
30
|
-
|
31
24
|
end
|
32
25
|
|
33
|
-
class
|
26
|
+
class LinkCheck < ::HTML::Proofer::CheckRunner
|
27
|
+
include HTML::Utils
|
34
28
|
|
35
29
|
def run
|
36
|
-
@html.css(
|
37
|
-
link =
|
30
|
+
@html.css('a, link').each do |l|
|
31
|
+
link = LinkCheckable.new l, self
|
38
32
|
|
39
33
|
next if link.ignore?
|
40
34
|
next if link.href =~ /^javascript:/ # can't put this in ignore? because the URI does not parse
|
@@ -42,22 +36,22 @@ class Links < ::HTML::Proofer::Checks::Check
|
|
42
36
|
|
43
37
|
# is it even a valid URL?
|
44
38
|
unless link.valid?
|
45
|
-
|
39
|
+
add_issue("#{link.href} is an invalid URL", l.line)
|
46
40
|
next
|
47
41
|
end
|
48
42
|
|
49
|
-
if link.scheme ==
|
50
|
-
|
51
|
-
|
43
|
+
if link.scheme == 'mailto'
|
44
|
+
add_issue("#{link.href} contains no email address", l.line) if link.path.empty?
|
45
|
+
add_issue("#{link.href} contain an invalid email address", l.line) unless link.path.include?('@')
|
52
46
|
end
|
53
47
|
|
54
|
-
if link.scheme ==
|
55
|
-
|
48
|
+
if link.scheme == 'tel'
|
49
|
+
add_issue("#{link.href} contains no phone number", l.line) if link.path.empty?
|
56
50
|
end
|
57
51
|
|
58
52
|
# is there even a href?
|
59
53
|
if link.missing_href?
|
60
|
-
|
54
|
+
add_issue('anchor has no href attribute', l.line)
|
61
55
|
next
|
62
56
|
end
|
63
57
|
|
@@ -69,26 +63,23 @@ class Links < ::HTML::Proofer::Checks::Check
|
|
69
63
|
add_to_external_urls link.href
|
70
64
|
next
|
71
65
|
elsif !link.internal?
|
72
|
-
|
66
|
+
add_issue("internally linking to #{link.href}, which does not exist", l.line) unless link.exists?
|
73
67
|
end
|
74
68
|
|
75
69
|
# does the local directory have a trailing slash?
|
76
70
|
if link.unslashed_directory? link.absolute_path
|
77
|
-
|
71
|
+
add_issue("internally linking to a directory #{link.absolute_path} without trailing slash", l.line)
|
78
72
|
next
|
79
73
|
end
|
80
74
|
|
81
75
|
# verify the target hash
|
82
76
|
if link.hash
|
83
77
|
if link.internal?
|
84
|
-
|
85
|
-
|
86
|
-
unless link.exists?
|
87
|
-
self.add_issue "trying to find hash of #{link.href}, but #{link.absolute_path} does not exist"
|
88
|
-
else
|
89
|
-
target_html = HTML::Proofer.create_nokogiri link.absolute_path
|
90
|
-
self.add_issue "linking to #{link.href}, but #{link.hash} does not exist" unless hash_check target_html, link.hash
|
78
|
+
unless hash_check @html, link.hash
|
79
|
+
add_issue("linking to internal hash ##{link.hash} that does not exist", l.line)
|
91
80
|
end
|
81
|
+
elsif link.external?
|
82
|
+
external_link_check(link)
|
92
83
|
end
|
93
84
|
end
|
94
85
|
end
|
@@ -96,6 +87,17 @@ class Links < ::HTML::Proofer::Checks::Check
|
|
96
87
|
external_urls
|
97
88
|
end
|
98
89
|
|
90
|
+
def external_link_check(link)
|
91
|
+
if !link.exists?
|
92
|
+
add_issue("trying to find hash of #{link.href}, but #{link.absolute_path} does not exist", l.line)
|
93
|
+
else
|
94
|
+
target_html = create_nokogiri link.absolute_path
|
95
|
+
unless hash_check target_html, link.hash
|
96
|
+
add_issue("linking to #{link.href}, but #{link.hash} does not exist", link.line)
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
99
101
|
def hash_check(html, href_hash)
|
100
102
|
html.xpath("//*[@id='#{href_hash}']", "//*[@name='#{href_hash}']").length > 0
|
101
103
|
end
|
@@ -1,9 +1,9 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
|
-
class
|
3
|
+
class ScriptCheckable < ::HTML::Proofer::Checkable
|
4
4
|
|
5
5
|
def src
|
6
|
-
|
6
|
+
real_attr @src
|
7
7
|
end
|
8
8
|
|
9
9
|
def missing_src?
|
@@ -16,23 +16,22 @@ class Script < ::HTML::Proofer::Checkable
|
|
16
16
|
|
17
17
|
end
|
18
18
|
|
19
|
-
class
|
19
|
+
class ScriptCheck < ::HTML::Proofer::CheckRunner
|
20
20
|
def run
|
21
|
-
@html.css(
|
22
|
-
script =
|
21
|
+
@html.css('script').each do |s|
|
22
|
+
script = ScriptCheckable.new s, self
|
23
23
|
|
24
24
|
next if script.ignore?
|
25
25
|
next unless script.blank?
|
26
26
|
|
27
27
|
# does the script exist?
|
28
28
|
if script.missing_src?
|
29
|
-
|
29
|
+
add_issue('script is empty and has no src attribute', s.line)
|
30
30
|
elsif script.remote?
|
31
31
|
add_to_external_urls script.src
|
32
32
|
else
|
33
|
-
|
33
|
+
add_issue("internal script #{script.src} does not exist", s.line) unless script.exists?
|
34
34
|
end
|
35
|
-
|
36
35
|
end
|
37
36
|
|
38
37
|
external_urls
|
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'yell'
|
2
|
+
require 'colored'
|
3
|
+
|
4
|
+
module HTML
|
5
|
+
class Proofer
|
6
|
+
class Log
|
7
|
+
include Yell::Loggable
|
8
|
+
|
9
|
+
def initialize(verbose)
|
10
|
+
log_level = verbose ? :debug : :info
|
11
|
+
|
12
|
+
@logger = Yell.new(:format => false, \
|
13
|
+
:name => 'HTML::Proofer', \
|
14
|
+
:level => "gte.#{log_level}") do |l|
|
15
|
+
l.adapter :stdout, :level => [:debug, :info, :warn]
|
16
|
+
l.adapter :stderr, :level => [:error, :fatal]
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def log(level, color, message)
|
21
|
+
@logger.send level, colorize(color, message)
|
22
|
+
end
|
23
|
+
|
24
|
+
def colorize(color, message)
|
25
|
+
if $stdout.isatty && $stderr.isatty
|
26
|
+
Colored.colorize(message, foreground: color)
|
27
|
+
else
|
28
|
+
message
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# dumb override to play nice with Typhoeus/Ethon
|
33
|
+
def debug(message = nil)
|
34
|
+
log(:debug, :yellow, message) unless message.nil?
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,135 @@
|
|
1
|
+
require 'typhoeus'
|
2
|
+
require 'uri'
|
3
|
+
require_relative './utils'
|
4
|
+
|
5
|
+
module HTML
|
6
|
+
class Proofer
|
7
|
+
class UrlValidator
|
8
|
+
include Utils
|
9
|
+
|
10
|
+
attr_accessor :logger, :external_urls, :hydra
|
11
|
+
|
12
|
+
def initialize(logger, external_urls, options, typhoeus_opts, hydra_opts)
|
13
|
+
@logger = logger
|
14
|
+
@external_urls = external_urls
|
15
|
+
@failed_tests = []
|
16
|
+
@options = options
|
17
|
+
@hydra = Typhoeus::Hydra.new(hydra_opts)
|
18
|
+
@typhoeus_opts = typhoeus_opts
|
19
|
+
end
|
20
|
+
|
21
|
+
def run
|
22
|
+
external_link_checker(external_urls)
|
23
|
+
@failed_tests
|
24
|
+
end
|
25
|
+
|
26
|
+
# Proofer runs faster if we pull out all the external URLs and run the checks
|
27
|
+
# at the end. Otherwise, we're halting the consuming process for every file during
|
28
|
+
# the check_directory_of_files process.
|
29
|
+
#
|
30
|
+
# In addition, sorting the list lets libcurl keep connections to the same hosts alive.
|
31
|
+
#
|
32
|
+
# Finally, we'll first make a HEAD request, rather than GETing all the contents.
|
33
|
+
# If the HEAD fails, we'll fall back to GET, as some servers are not configured
|
34
|
+
# for HEAD. If we've decided to check for hashes, we must do a GET--HEAD is
|
35
|
+
# not an option.
|
36
|
+
def external_link_checker(external_urls)
|
37
|
+
external_urls = Hash[external_urls.sort]
|
38
|
+
|
39
|
+
count = external_urls.length
|
40
|
+
check_text = "#{count} " << (count == 1 ? 'external link' : 'external links')
|
41
|
+
logger.log :info, :blue, "Checking #{check_text}..."
|
42
|
+
|
43
|
+
Ethon.logger = logger # log from Typhoeus/Ethon
|
44
|
+
|
45
|
+
url_processor(external_urls)
|
46
|
+
|
47
|
+
logger.log :debug, :yellow, "Running requests for all #{hydra.queued_requests.size} external URLs..."
|
48
|
+
hydra.run
|
49
|
+
end
|
50
|
+
|
51
|
+
def url_processor(external_urls)
|
52
|
+
external_urls.each_pair do |href, filenames|
|
53
|
+
href = clean_url(href)
|
54
|
+
if hash?(href) && @options[:check_external_hash]
|
55
|
+
queue_request(:get, href, filenames)
|
56
|
+
else
|
57
|
+
queue_request(:head, href, filenames)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def clean_url(href)
|
63
|
+
Addressable::URI.parse(href).normalize
|
64
|
+
end
|
65
|
+
|
66
|
+
def queue_request(method, href, filenames)
|
67
|
+
request = Typhoeus::Request.new(href, @typhoeus_opts.merge({ :method => method }))
|
68
|
+
request.on_complete { |response| response_handler(response, filenames) }
|
69
|
+
hydra.queue request
|
70
|
+
end
|
71
|
+
|
72
|
+
def response_handler(response, filenames)
|
73
|
+
effective_url = response.options[:effective_url]
|
74
|
+
href = response.request.base_url.to_s
|
75
|
+
method = response.request.options[:method]
|
76
|
+
response_code = response.code
|
77
|
+
debug_msg = "Received a #{response_code} for #{href}"
|
78
|
+
debug_msg << " in #{filenames.join(' ')}" unless filenames.nil?
|
79
|
+
logger.log :debug, :yellow, debug_msg
|
80
|
+
|
81
|
+
if response_code.between?(200, 299)
|
82
|
+
check_hash_in_2xx_response(href, effective_url, response, filenames)
|
83
|
+
elsif response.timed_out?
|
84
|
+
handle_timeout(filenames, response_code)
|
85
|
+
elsif method == :head
|
86
|
+
queue_request(:get, href, filenames)
|
87
|
+
else
|
88
|
+
return if @options[:only_4xx] && !response_code.between?(400, 499)
|
89
|
+
# Received a non-successful http response.
|
90
|
+
add_failed_tests filenames, "External link #{href} failed: #{response_code} #{response.return_message}", response_code
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
# Even though the response was a success, we may have been asked to check
|
95
|
+
# if the hash on the URL exists on the page
|
96
|
+
def check_hash_in_2xx_response(href, effective_url, response, filenames)
|
97
|
+
return if @options[:only_4xx]
|
98
|
+
return unless @options[:check_external_hash]
|
99
|
+
return unless (hash = hash?(href))
|
100
|
+
|
101
|
+
body_doc = create_nokogiri(response.body)
|
102
|
+
|
103
|
+
# user-content is a special addition by GitHub.
|
104
|
+
xpath = %(//*[@name="#{hash}"]|//*[@id="#{hash}"])
|
105
|
+
if URI.parse(href).host.match(/github\.com/i)
|
106
|
+
xpath << %(|//*[@name="user-content-#{hash}"]|//*[@id="user-content-#{hash}"])
|
107
|
+
end
|
108
|
+
|
109
|
+
return unless body_doc.xpath(xpath).empty?
|
110
|
+
|
111
|
+
add_failed_tests filenames, "External link #{href} failed: #{effective_url} exists, but the hash '#{hash}' does not", response.code
|
112
|
+
end
|
113
|
+
|
114
|
+
def handle_timeout
|
115
|
+
return if @options[:only_4xx]
|
116
|
+
add_failed_tests filenames, "External link #{href} failed: got a time out", response_code
|
117
|
+
end
|
118
|
+
|
119
|
+
def add_failed_tests(filenames, desc, status = nil)
|
120
|
+
if filenames.nil?
|
121
|
+
@failed_tests << CheckRunner::Issue.new('', desc, nil, status)
|
122
|
+
else
|
123
|
+
filenames.each { |f| @failed_tests << CheckRunner::Issue.new(f, desc, nil, status) }
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
def hash?(url)
|
128
|
+
URI.parse(url).fragment
|
129
|
+
rescue URI::InvalidURIError
|
130
|
+
nil
|
131
|
+
end
|
132
|
+
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
module HTML
|
4
|
+
module Utils
|
5
|
+
def create_nokogiri(path)
|
6
|
+
if File.exist? path
|
7
|
+
content = File.open(path).read
|
8
|
+
else
|
9
|
+
content = path
|
10
|
+
end
|
11
|
+
|
12
|
+
Nokogiri::HTML(content)
|
13
|
+
end
|
14
|
+
module_function :create_nokogiri
|
15
|
+
|
16
|
+
def swap(href, replacement)
|
17
|
+
replacement.each do |link, replace|
|
18
|
+
href = href.gsub(link, replace)
|
19
|
+
end
|
20
|
+
href
|
21
|
+
end
|
22
|
+
module_function :swap
|
23
|
+
end
|
24
|
+
end
|
data/lib/html/proofer/version.rb
CHANGED