html-proofer 1.6.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +1 -1
- data/README.md +74 -56
- data/Rakefile +4 -6
- data/bin/htmlproof +46 -36
- data/html-proofer.gemspec +22 -22
- data/lib/html/proofer/check_runner/issue.rb +62 -0
- data/lib/html/proofer/{check.rb → check_runner.rb} +11 -19
- data/lib/html/proofer/checkable.rb +42 -28
- data/lib/html/proofer/checks/favicon.rb +6 -6
- data/lib/html/proofer/checks/html.rb +11 -12
- data/lib/html/proofer/checks/images.rb +11 -11
- data/lib/html/proofer/checks/links.rb +30 -28
- data/lib/html/proofer/checks/scripts.rb +7 -8
- data/lib/html/proofer/log.rb +38 -0
- data/lib/html/proofer/url_validator.rb +135 -0
- data/lib/html/proofer/utils.rb +24 -0
- data/lib/html/proofer/version.rb +1 -1
- data/lib/html/proofer.rb +95 -199
- data/spec/html/proofer/command_spec.rb +82 -0
- data/spec/html/proofer/favicon_spec.rb +20 -20
- data/spec/html/proofer/fixtures/images/srcSetCheck.html +7 -0
- data/spec/html/proofer/fixtures/images/srcSetIgnorable.html +13 -0
- data/spec/html/proofer/fixtures/images/srcSetMissingAlt.html +7 -0
- data/spec/html/proofer/fixtures/images/srcSetMissingImage.html +7 -0
- data/spec/html/proofer/fixtures/links/erstiebegru/314/210/303/237ung.html +1 -0
- data/spec/html/proofer/fixtures/links/erstiebegr/303/274/303/237ung.html +1 -0
- data/spec/html/proofer/fixtures/links/file.foo +11 -0
- data/spec/html/proofer/fixtures/links/folder/multiples/catalog/file.html +8 -0
- data/spec/html/proofer/fixtures/links/folder/multiples/javadoc/file.html +8 -0
- data/spec/html/proofer/fixtures/links/nodupe.html +1 -1
- data/spec/html/proofer/fixtures/links/redirected_error.html +1 -0
- data/spec/html/proofer/fixtures/links/rootLink/rootLink.html +0 -1
- data/spec/html/proofer/fixtures/links/urlencoded-href.html +2 -0
- data/spec/html/proofer/fixtures/links/utf8Link.html +2 -0
- data/spec/html/proofer/fixtures/utils/lang-jp.html +1 -0
- data/spec/html/proofer/html_spec.rb +25 -25
- data/spec/html/proofer/images_spec.rb +59 -35
- data/spec/html/proofer/links_spec.rb +152 -109
- data/spec/html/proofer/scripts_spec.rb +17 -17
- data/spec/html/proofer/utils_spec.rb +14 -0
- data/spec/html/proofer_spec.rb +58 -38
- data/spec/spec_helper.rb +13 -6
- metadata +39 -7
- data/lib/html/proofer/checks.rb +0 -15
- data/lib/html/proofer/issue.rb +0 -21
@@ -1,34 +1,37 @@
|
|
1
|
+
require 'addressable/uri'
|
2
|
+
require_relative './utils'
|
3
|
+
|
1
4
|
module HTML
|
2
5
|
class Proofer
|
6
|
+
# Represents the superclass from which all checks derive.
|
3
7
|
class Checkable
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
8
|
+
include HTML::Utils
|
9
|
+
attr_reader :line
|
10
|
+
|
11
|
+
def initialize(obj, check)
|
12
|
+
obj.attributes.each_pair do |attribute, value|
|
13
|
+
next if attribute == 'data-proofer-ignore' # TODO: not quite sure why this doesn't work
|
14
|
+
instance_variable_set("@#{attribute}".to_sym, value.value)
|
15
|
+
end
|
11
16
|
|
12
17
|
@data_ignore_proofer = obj['data-proofer-ignore']
|
13
18
|
@content = obj.content
|
14
19
|
@check = check
|
15
20
|
@checked_paths = {}
|
16
|
-
@type =
|
21
|
+
@type = self.class.name
|
22
|
+
@line = obj.line
|
17
23
|
|
18
24
|
if @href && @check.options[:href_swap]
|
19
|
-
@check.options[:href_swap]
|
20
|
-
@href = @href.gsub(link, replace)
|
21
|
-
end
|
25
|
+
@href = swap(@href, @check.options[:href_swap])
|
22
26
|
end
|
23
27
|
|
24
28
|
# fix up missing protocols
|
25
|
-
@href.insert 0,
|
26
|
-
@src.insert 0,
|
27
|
-
|
29
|
+
@href.insert 0, 'http:' if @href =~ %r{^//}
|
30
|
+
@src.insert 0, 'http:' if @src =~ %r{^//}
|
28
31
|
end
|
29
32
|
|
30
33
|
def url
|
31
|
-
@src || @href ||
|
34
|
+
@src || @srcset || @href || ''
|
32
35
|
end
|
33
36
|
|
34
37
|
def valid?
|
@@ -42,15 +45,15 @@ module HTML
|
|
42
45
|
end
|
43
46
|
|
44
47
|
def path
|
45
|
-
parts.path
|
48
|
+
CGI.unescape parts.path unless parts.nil?
|
46
49
|
end
|
47
50
|
|
48
51
|
def hash
|
49
|
-
parts.fragment
|
52
|
+
parts.fragment unless parts.nil?
|
50
53
|
end
|
51
54
|
|
52
55
|
def scheme
|
53
|
-
parts.scheme
|
56
|
+
parts.scheme unless parts.nil?
|
54
57
|
end
|
55
58
|
|
56
59
|
# path is to an external server
|
@@ -66,13 +69,13 @@ module HTML
|
|
66
69
|
return true if @data_ignore_proofer
|
67
70
|
|
68
71
|
case @type
|
69
|
-
when
|
72
|
+
when 'FaviconCheckable'
|
70
73
|
return true if url.match(/^data:image/)
|
71
|
-
when
|
72
|
-
return true if ignores_pattern_check(@check.
|
73
|
-
when
|
74
|
+
when 'LinkCheckable'
|
75
|
+
return true if ignores_pattern_check(@check.href_ignores)
|
76
|
+
when 'ImageCheckable'
|
74
77
|
return true if url.match(/^data:image/)
|
75
|
-
return true if ignores_pattern_check(@check.
|
78
|
+
return true if ignores_pattern_check(@check.alt_ignores)
|
76
79
|
end
|
77
80
|
end
|
78
81
|
|
@@ -83,7 +86,7 @@ module HTML
|
|
83
86
|
|
84
87
|
# path is an anchor or a query
|
85
88
|
def internal?
|
86
|
-
url.start_with?
|
89
|
+
url.start_with? '#', '?'
|
87
90
|
end
|
88
91
|
|
89
92
|
def file_path
|
@@ -102,7 +105,7 @@ module HTML
|
|
102
105
|
file = File.join base, path
|
103
106
|
|
104
107
|
# implicit index support
|
105
|
-
if File.directory?
|
108
|
+
if File.directory?(file) && !unslashed_directory?(file)
|
106
109
|
file = File.join file, @check.options[:directory_index_file]
|
107
110
|
end
|
108
111
|
|
@@ -111,7 +114,7 @@ module HTML
|
|
111
114
|
|
112
115
|
# checks if a file exists relative to the current pwd
|
113
116
|
def exists?
|
114
|
-
return @checked_paths[absolute_path] if @checked_paths.
|
117
|
+
return @checked_paths[absolute_path] if @checked_paths.key? absolute_path
|
115
118
|
@checked_paths[absolute_path] = File.exist? absolute_path
|
116
119
|
end
|
117
120
|
|
@@ -132,9 +135,20 @@ module HTML
|
|
132
135
|
false
|
133
136
|
end
|
134
137
|
|
135
|
-
def unslashed_directory?
|
136
|
-
File.directory?
|
138
|
+
def unslashed_directory?(file)
|
139
|
+
File.directory?(file) && !file.end_with?(File::SEPARATOR) && !follow_location?
|
137
140
|
end
|
141
|
+
|
142
|
+
def follow_location?
|
143
|
+
@check.options[:typhoeus] && @check.options[:typhoeus][:followlocation]
|
144
|
+
end
|
145
|
+
|
146
|
+
private
|
147
|
+
|
148
|
+
def real_attr(attr)
|
149
|
+
attr.to_s unless attr.nil? || attr.empty?
|
150
|
+
end
|
151
|
+
|
138
152
|
end
|
139
153
|
end
|
140
154
|
end
|
@@ -1,21 +1,21 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
|
-
class
|
3
|
+
class FaviconCheckable < ::HTML::Proofer::Checkable
|
4
4
|
def rel
|
5
5
|
@rel
|
6
6
|
end
|
7
7
|
end
|
8
8
|
|
9
|
-
class
|
9
|
+
class FaviconCheck < ::HTML::Proofer::CheckRunner
|
10
10
|
|
11
11
|
def run
|
12
|
-
@html.xpath(
|
13
|
-
favicon =
|
12
|
+
@html.xpath('//link[not(ancestor::pre or ancestor::code)]').each do |favicon|
|
13
|
+
favicon = FaviconCheckable.new favicon, self
|
14
14
|
next if favicon.ignore?
|
15
|
-
return if favicon.rel.split(
|
15
|
+
return if favicon.rel.split(' ').last.eql? 'icon'
|
16
16
|
end
|
17
17
|
|
18
|
-
|
18
|
+
add_issue 'no favicon specified'
|
19
19
|
end
|
20
20
|
|
21
21
|
end
|
@@ -1,24 +1,23 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
|
-
class
|
3
|
+
class HtmlCheck < ::HTML::Proofer::CheckRunner
|
4
4
|
|
5
5
|
# new html5 tags (source: http://www.w3schools.com/html/html5_new_elements.asp)
|
6
|
-
HTML5_TAGS = %w(article aside bdi details dialog figcaption
|
7
|
-
figure footer header main mark menuitem meter
|
8
|
-
nav progress rp rt ruby section summary
|
9
|
-
time wbr datalist keygen output color date
|
10
|
-
datetime datetime-local email month number
|
11
|
-
range search tel time url week canvas
|
6
|
+
HTML5_TAGS = %w(article aside bdi details dialog figcaption
|
7
|
+
figure footer header main mark menuitem meter
|
8
|
+
nav progress rp rt ruby section summary
|
9
|
+
time wbr datalist keygen output color date
|
10
|
+
datetime datetime-local email month number
|
11
|
+
range search tel time url week canvas
|
12
12
|
svg audio embed source track video)
|
13
13
|
|
14
14
|
def run
|
15
15
|
@html.errors.each do |e|
|
16
|
+
# Nokogiri (or rather libxml2 underhood) only recognizes html4 tags,
|
17
|
+
# so we need to skip errors caused by the new tags in html5
|
18
|
+
next if HTML5_TAGS.include? e.to_s[/Tag ([\w-]+) invalid/o, 1]
|
16
19
|
|
17
|
-
|
18
|
-
# so we need to skip errors caused by the new tags in html5
|
19
|
-
next if HTML5_TAGS.include? e.to_s[/Tag ([\w-]+) invalid/o, 1]
|
20
|
-
|
21
|
-
self.add_issue(e.to_s)
|
20
|
+
add_issue(e.to_s)
|
22
21
|
end
|
23
22
|
end
|
24
23
|
end
|
@@ -1,19 +1,19 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
|
-
class
|
3
|
+
class ImageCheckable < ::HTML::Proofer::Checkable
|
4
4
|
|
5
5
|
SCREEN_SHOT_REGEX = /Screen(?: |%20)Shot(?: |%20)\d+-\d+-\d+(?: |%20)at(?: |%20)\d+.\d+.\d+/
|
6
6
|
|
7
7
|
def valid_alt_tag?
|
8
|
-
@alt
|
8
|
+
@alt && !@alt.empty?
|
9
9
|
end
|
10
10
|
|
11
11
|
def terrible_filename?
|
12
|
-
|
12
|
+
src =~ SCREEN_SHOT_REGEX
|
13
13
|
end
|
14
14
|
|
15
15
|
def src
|
16
|
-
@src
|
16
|
+
real_attr(@src) || real_attr(@srcset)
|
17
17
|
end
|
18
18
|
|
19
19
|
def missing_src?
|
@@ -22,29 +22,29 @@ class Image < ::HTML::Proofer::Checkable
|
|
22
22
|
|
23
23
|
end
|
24
24
|
|
25
|
-
class
|
25
|
+
class ImageCheck < ::HTML::Proofer::CheckRunner
|
26
26
|
def run
|
27
|
-
@html.css(
|
28
|
-
img =
|
27
|
+
@html.css('img').each do |i|
|
28
|
+
img = ImageCheckable.new i, self
|
29
29
|
|
30
30
|
next if img.ignore?
|
31
31
|
|
32
32
|
# screenshot filenames should return because of terrible names
|
33
|
-
next
|
33
|
+
next add_issue("image has a terrible filename (#{img.src})", i.line) if img.terrible_filename?
|
34
34
|
|
35
35
|
# does the image exist?
|
36
36
|
if img.missing_src?
|
37
|
-
|
37
|
+
add_issue('image has no src or srcset attribute', i.line)
|
38
38
|
else
|
39
39
|
if img.remote?
|
40
40
|
add_to_external_urls img.src
|
41
41
|
else
|
42
|
-
|
42
|
+
add_issue("internal image #{img.src} does not exist", i.line) unless img.exists?
|
43
43
|
end
|
44
44
|
end
|
45
45
|
|
46
46
|
# check alt tag
|
47
|
-
|
47
|
+
add_issue("image #{img.src} does not have an alt attribute", i.line) unless img.valid_alt_tag?
|
48
48
|
end
|
49
49
|
|
50
50
|
external_urls
|
@@ -1,6 +1,5 @@
|
|
1
1
|
# encoding: utf-8
|
2
|
-
|
3
|
-
class Link < ::HTML::Proofer::Checkable
|
2
|
+
class LinkCheckable < ::HTML::Proofer::Checkable
|
4
3
|
|
5
4
|
def href
|
6
5
|
real_attr @href
|
@@ -15,26 +14,21 @@ class Link < ::HTML::Proofer::Checkable
|
|
15
14
|
end
|
16
15
|
|
17
16
|
def missing_href?
|
18
|
-
href.nil?
|
17
|
+
href.nil? && name.nil? && id.nil?
|
19
18
|
end
|
20
19
|
|
21
20
|
def placeholder?
|
22
21
|
(id || name) && href.nil?
|
23
22
|
end
|
24
23
|
|
25
|
-
private
|
26
|
-
|
27
|
-
def real_attr(attr)
|
28
|
-
attr unless attr.nil? || attr.empty?
|
29
|
-
end
|
30
|
-
|
31
24
|
end
|
32
25
|
|
33
|
-
class
|
26
|
+
class LinkCheck < ::HTML::Proofer::CheckRunner
|
27
|
+
include HTML::Utils
|
34
28
|
|
35
29
|
def run
|
36
|
-
@html.css(
|
37
|
-
link =
|
30
|
+
@html.css('a, link').each do |l|
|
31
|
+
link = LinkCheckable.new l, self
|
38
32
|
|
39
33
|
next if link.ignore?
|
40
34
|
next if link.href =~ /^javascript:/ # can't put this in ignore? because the URI does not parse
|
@@ -42,22 +36,22 @@ class Links < ::HTML::Proofer::Checks::Check
|
|
42
36
|
|
43
37
|
# is it even a valid URL?
|
44
38
|
unless link.valid?
|
45
|
-
|
39
|
+
add_issue("#{link.href} is an invalid URL", l.line)
|
46
40
|
next
|
47
41
|
end
|
48
42
|
|
49
|
-
if link.scheme ==
|
50
|
-
|
51
|
-
|
43
|
+
if link.scheme == 'mailto'
|
44
|
+
add_issue("#{link.href} contains no email address", l.line) if link.path.empty?
|
45
|
+
add_issue("#{link.href} contain an invalid email address", l.line) unless link.path.include?('@')
|
52
46
|
end
|
53
47
|
|
54
|
-
if link.scheme ==
|
55
|
-
|
48
|
+
if link.scheme == 'tel'
|
49
|
+
add_issue("#{link.href} contains no phone number", l.line) if link.path.empty?
|
56
50
|
end
|
57
51
|
|
58
52
|
# is there even a href?
|
59
53
|
if link.missing_href?
|
60
|
-
|
54
|
+
add_issue('anchor has no href attribute', l.line)
|
61
55
|
next
|
62
56
|
end
|
63
57
|
|
@@ -69,26 +63,23 @@ class Links < ::HTML::Proofer::Checks::Check
|
|
69
63
|
add_to_external_urls link.href
|
70
64
|
next
|
71
65
|
elsif !link.internal?
|
72
|
-
|
66
|
+
add_issue("internally linking to #{link.href}, which does not exist", l.line) unless link.exists?
|
73
67
|
end
|
74
68
|
|
75
69
|
# does the local directory have a trailing slash?
|
76
70
|
if link.unslashed_directory? link.absolute_path
|
77
|
-
|
71
|
+
add_issue("internally linking to a directory #{link.absolute_path} without trailing slash", l.line)
|
78
72
|
next
|
79
73
|
end
|
80
74
|
|
81
75
|
# verify the target hash
|
82
76
|
if link.hash
|
83
77
|
if link.internal?
|
84
|
-
|
85
|
-
|
86
|
-
unless link.exists?
|
87
|
-
self.add_issue "trying to find hash of #{link.href}, but #{link.absolute_path} does not exist"
|
88
|
-
else
|
89
|
-
target_html = HTML::Proofer.create_nokogiri link.absolute_path
|
90
|
-
self.add_issue "linking to #{link.href}, but #{link.hash} does not exist" unless hash_check target_html, link.hash
|
78
|
+
unless hash_check @html, link.hash
|
79
|
+
add_issue("linking to internal hash ##{link.hash} that does not exist", l.line)
|
91
80
|
end
|
81
|
+
elsif link.external?
|
82
|
+
external_link_check(link)
|
92
83
|
end
|
93
84
|
end
|
94
85
|
end
|
@@ -96,6 +87,17 @@ class Links < ::HTML::Proofer::Checks::Check
|
|
96
87
|
external_urls
|
97
88
|
end
|
98
89
|
|
90
|
+
def external_link_check(link)
|
91
|
+
if !link.exists?
|
92
|
+
add_issue("trying to find hash of #{link.href}, but #{link.absolute_path} does not exist", l.line)
|
93
|
+
else
|
94
|
+
target_html = create_nokogiri link.absolute_path
|
95
|
+
unless hash_check target_html, link.hash
|
96
|
+
add_issue("linking to #{link.href}, but #{link.hash} does not exist", link.line)
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
99
101
|
def hash_check(html, href_hash)
|
100
102
|
html.xpath("//*[@id='#{href_hash}']", "//*[@name='#{href_hash}']").length > 0
|
101
103
|
end
|
@@ -1,9 +1,9 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
|
-
class
|
3
|
+
class ScriptCheckable < ::HTML::Proofer::Checkable
|
4
4
|
|
5
5
|
def src
|
6
|
-
|
6
|
+
real_attr @src
|
7
7
|
end
|
8
8
|
|
9
9
|
def missing_src?
|
@@ -16,23 +16,22 @@ class Script < ::HTML::Proofer::Checkable
|
|
16
16
|
|
17
17
|
end
|
18
18
|
|
19
|
-
class
|
19
|
+
class ScriptCheck < ::HTML::Proofer::CheckRunner
|
20
20
|
def run
|
21
|
-
@html.css(
|
22
|
-
script =
|
21
|
+
@html.css('script').each do |s|
|
22
|
+
script = ScriptCheckable.new s, self
|
23
23
|
|
24
24
|
next if script.ignore?
|
25
25
|
next unless script.blank?
|
26
26
|
|
27
27
|
# does the script exist?
|
28
28
|
if script.missing_src?
|
29
|
-
|
29
|
+
add_issue('script is empty and has no src attribute', s.line)
|
30
30
|
elsif script.remote?
|
31
31
|
add_to_external_urls script.src
|
32
32
|
else
|
33
|
-
|
33
|
+
add_issue("internal script #{script.src} does not exist", s.line) unless script.exists?
|
34
34
|
end
|
35
|
-
|
36
35
|
end
|
37
36
|
|
38
37
|
external_urls
|
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'yell'
|
2
|
+
require 'colored'
|
3
|
+
|
4
|
+
module HTML
|
5
|
+
class Proofer
|
6
|
+
class Log
|
7
|
+
include Yell::Loggable
|
8
|
+
|
9
|
+
def initialize(verbose)
|
10
|
+
log_level = verbose ? :debug : :info
|
11
|
+
|
12
|
+
@logger = Yell.new(:format => false, \
|
13
|
+
:name => 'HTML::Proofer', \
|
14
|
+
:level => "gte.#{log_level}") do |l|
|
15
|
+
l.adapter :stdout, :level => [:debug, :info, :warn]
|
16
|
+
l.adapter :stderr, :level => [:error, :fatal]
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def log(level, color, message)
|
21
|
+
@logger.send level, colorize(color, message)
|
22
|
+
end
|
23
|
+
|
24
|
+
def colorize(color, message)
|
25
|
+
if $stdout.isatty && $stderr.isatty
|
26
|
+
Colored.colorize(message, foreground: color)
|
27
|
+
else
|
28
|
+
message
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# dumb override to play nice with Typhoeus/Ethon
|
33
|
+
def debug(message = nil)
|
34
|
+
log(:debug, :yellow, message) unless message.nil?
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,135 @@
|
|
1
|
+
require 'typhoeus'
|
2
|
+
require 'uri'
|
3
|
+
require_relative './utils'
|
4
|
+
|
5
|
+
module HTML
|
6
|
+
class Proofer
|
7
|
+
class UrlValidator
|
8
|
+
include Utils
|
9
|
+
|
10
|
+
attr_accessor :logger, :external_urls, :hydra
|
11
|
+
|
12
|
+
def initialize(logger, external_urls, options, typhoeus_opts, hydra_opts)
|
13
|
+
@logger = logger
|
14
|
+
@external_urls = external_urls
|
15
|
+
@failed_tests = []
|
16
|
+
@options = options
|
17
|
+
@hydra = Typhoeus::Hydra.new(hydra_opts)
|
18
|
+
@typhoeus_opts = typhoeus_opts
|
19
|
+
end
|
20
|
+
|
21
|
+
def run
|
22
|
+
external_link_checker(external_urls)
|
23
|
+
@failed_tests
|
24
|
+
end
|
25
|
+
|
26
|
+
# Proofer runs faster if we pull out all the external URLs and run the checks
|
27
|
+
# at the end. Otherwise, we're halting the consuming process for every file during
|
28
|
+
# the check_directory_of_files process.
|
29
|
+
#
|
30
|
+
# In addition, sorting the list lets libcurl keep connections to the same hosts alive.
|
31
|
+
#
|
32
|
+
# Finally, we'll first make a HEAD request, rather than GETing all the contents.
|
33
|
+
# If the HEAD fails, we'll fall back to GET, as some servers are not configured
|
34
|
+
# for HEAD. If we've decided to check for hashes, we must do a GET--HEAD is
|
35
|
+
# not an option.
|
36
|
+
def external_link_checker(external_urls)
|
37
|
+
external_urls = Hash[external_urls.sort]
|
38
|
+
|
39
|
+
count = external_urls.length
|
40
|
+
check_text = "#{count} " << (count == 1 ? 'external link' : 'external links')
|
41
|
+
logger.log :info, :blue, "Checking #{check_text}..."
|
42
|
+
|
43
|
+
Ethon.logger = logger # log from Typhoeus/Ethon
|
44
|
+
|
45
|
+
url_processor(external_urls)
|
46
|
+
|
47
|
+
logger.log :debug, :yellow, "Running requests for all #{hydra.queued_requests.size} external URLs..."
|
48
|
+
hydra.run
|
49
|
+
end
|
50
|
+
|
51
|
+
def url_processor(external_urls)
|
52
|
+
external_urls.each_pair do |href, filenames|
|
53
|
+
href = clean_url(href)
|
54
|
+
if hash?(href) && @options[:check_external_hash]
|
55
|
+
queue_request(:get, href, filenames)
|
56
|
+
else
|
57
|
+
queue_request(:head, href, filenames)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def clean_url(href)
|
63
|
+
Addressable::URI.parse(href).normalize
|
64
|
+
end
|
65
|
+
|
66
|
+
def queue_request(method, href, filenames)
|
67
|
+
request = Typhoeus::Request.new(href, @typhoeus_opts.merge({ :method => method }))
|
68
|
+
request.on_complete { |response| response_handler(response, filenames) }
|
69
|
+
hydra.queue request
|
70
|
+
end
|
71
|
+
|
72
|
+
def response_handler(response, filenames)
|
73
|
+
effective_url = response.options[:effective_url]
|
74
|
+
href = response.request.base_url.to_s
|
75
|
+
method = response.request.options[:method]
|
76
|
+
response_code = response.code
|
77
|
+
debug_msg = "Received a #{response_code} for #{href}"
|
78
|
+
debug_msg << " in #{filenames.join(' ')}" unless filenames.nil?
|
79
|
+
logger.log :debug, :yellow, debug_msg
|
80
|
+
|
81
|
+
if response_code.between?(200, 299)
|
82
|
+
check_hash_in_2xx_response(href, effective_url, response, filenames)
|
83
|
+
elsif response.timed_out?
|
84
|
+
handle_timeout(filenames, response_code)
|
85
|
+
elsif method == :head
|
86
|
+
queue_request(:get, href, filenames)
|
87
|
+
else
|
88
|
+
return if @options[:only_4xx] && !response_code.between?(400, 499)
|
89
|
+
# Received a non-successful http response.
|
90
|
+
add_failed_tests filenames, "External link #{href} failed: #{response_code} #{response.return_message}", response_code
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
# Even though the response was a success, we may have been asked to check
|
95
|
+
# if the hash on the URL exists on the page
|
96
|
+
def check_hash_in_2xx_response(href, effective_url, response, filenames)
|
97
|
+
return if @options[:only_4xx]
|
98
|
+
return unless @options[:check_external_hash]
|
99
|
+
return unless (hash = hash?(href))
|
100
|
+
|
101
|
+
body_doc = create_nokogiri(response.body)
|
102
|
+
|
103
|
+
# user-content is a special addition by GitHub.
|
104
|
+
xpath = %(//*[@name="#{hash}"]|//*[@id="#{hash}"])
|
105
|
+
if URI.parse(href).host.match(/github\.com/i)
|
106
|
+
xpath << %(|//*[@name="user-content-#{hash}"]|//*[@id="user-content-#{hash}"])
|
107
|
+
end
|
108
|
+
|
109
|
+
return unless body_doc.xpath(xpath).empty?
|
110
|
+
|
111
|
+
add_failed_tests filenames, "External link #{href} failed: #{effective_url} exists, but the hash '#{hash}' does not", response.code
|
112
|
+
end
|
113
|
+
|
114
|
+
def handle_timeout
|
115
|
+
return if @options[:only_4xx]
|
116
|
+
add_failed_tests filenames, "External link #{href} failed: got a time out", response_code
|
117
|
+
end
|
118
|
+
|
119
|
+
def add_failed_tests(filenames, desc, status = nil)
|
120
|
+
if filenames.nil?
|
121
|
+
@failed_tests << CheckRunner::Issue.new('', desc, nil, status)
|
122
|
+
else
|
123
|
+
filenames.each { |f| @failed_tests << CheckRunner::Issue.new(f, desc, nil, status) }
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
def hash?(url)
|
128
|
+
URI.parse(url).fragment
|
129
|
+
rescue URI::InvalidURIError
|
130
|
+
nil
|
131
|
+
end
|
132
|
+
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
module HTML
|
4
|
+
module Utils
|
5
|
+
def create_nokogiri(path)
|
6
|
+
if File.exist? path
|
7
|
+
content = File.open(path).read
|
8
|
+
else
|
9
|
+
content = path
|
10
|
+
end
|
11
|
+
|
12
|
+
Nokogiri::HTML(content)
|
13
|
+
end
|
14
|
+
module_function :create_nokogiri
|
15
|
+
|
16
|
+
def swap(href, replacement)
|
17
|
+
replacement.each do |link, replace|
|
18
|
+
href = href.gsub(link, replace)
|
19
|
+
end
|
20
|
+
href
|
21
|
+
end
|
22
|
+
module_function :swap
|
23
|
+
end
|
24
|
+
end
|
data/lib/html/proofer/version.rb
CHANGED