html-proofer 2.6.4 → 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/{htmlproof → htmlproofer} +31 -24
- data/lib/html-proofer.rb +47 -0
- data/lib/html-proofer/cache.rb +153 -0
- data/lib/html-proofer/check.rb +63 -0
- data/lib/{html/proofer/checks → html-proofer/check}/favicon.rb +2 -8
- data/lib/html-proofer/check/html.rb +21 -0
- data/lib/html-proofer/check/images.rb +47 -0
- data/lib/{html/proofer/checks → html-proofer/check}/links.rb +40 -48
- data/lib/html-proofer/check/scripts.rb +28 -0
- data/lib/html-proofer/configuration.rb +56 -0
- data/lib/html-proofer/element.rb +165 -0
- data/lib/{html/proofer/check_runner → html-proofer}/issue.rb +8 -10
- data/lib/html-proofer/log.rb +49 -0
- data/lib/html-proofer/runner.rb +160 -0
- data/lib/html-proofer/url_validator.rb +218 -0
- data/lib/html-proofer/utils.rb +40 -0
- data/lib/html-proofer/version.rb +3 -0
- metadata +20 -20
- data/lib/html/proofer.rb +0 -191
- data/lib/html/proofer/cache.rb +0 -141
- data/lib/html/proofer/check_runner.rb +0 -70
- data/lib/html/proofer/checkable.rb +0 -168
- data/lib/html/proofer/checks/html.rb +0 -46
- data/lib/html/proofer/checks/images.rb +0 -54
- data/lib/html/proofer/checks/scripts.rb +0 -40
- data/lib/html/proofer/configuration.rb +0 -48
- data/lib/html/proofer/log.rb +0 -42
- data/lib/html/proofer/url_validator.rb +0 -222
- data/lib/html/proofer/utils.rb +0 -42
- data/lib/html/proofer/version.rb +0 -5
- data/lib/html/proofer/xpathfunctions.rb +0 -9
@@ -1,72 +1,58 @@
|
|
1
|
-
class
|
2
|
-
|
3
|
-
def href
|
4
|
-
real_attr @href
|
5
|
-
end
|
6
|
-
|
7
|
-
def id
|
8
|
-
real_attr @id
|
9
|
-
end
|
10
|
-
|
11
|
-
def name
|
12
|
-
real_attr @name
|
13
|
-
end
|
1
|
+
class LinkCheck < ::HTMLProofer::Check
|
2
|
+
include HTMLProofer::Utils
|
14
3
|
|
15
4
|
def missing_href?
|
16
|
-
href
|
5
|
+
blank?(@link.href) && blank?(@link.name) && blank?(@link.id)
|
17
6
|
end
|
18
7
|
|
19
8
|
def placeholder?
|
20
|
-
(id || name) && href.nil?
|
9
|
+
(!blank?(@link.id) || !blank?(@link.name)) && @link.href.nil?
|
21
10
|
end
|
22
|
-
end
|
23
|
-
|
24
|
-
class LinkCheck < ::HTML::Proofer::CheckRunner
|
25
|
-
include HTML::Proofer::Utils
|
26
11
|
|
27
12
|
def run
|
28
13
|
@html.css('a, link').each do |node|
|
29
|
-
link =
|
30
|
-
line = node.line
|
14
|
+
@link = create_element(node)
|
15
|
+
line = @node.line
|
16
|
+
|
17
|
+
next if @link.ignore?
|
31
18
|
|
32
|
-
next if
|
33
|
-
next if link.
|
34
|
-
next if link.placeholder?
|
35
|
-
next if link.allow_hash_href? && link.href == '#'
|
19
|
+
next if placeholder?
|
20
|
+
next if @link.allow_hash_href? && @link.href == '#'
|
36
21
|
|
37
22
|
# is it even a valid URL?
|
38
|
-
unless link.valid?
|
39
|
-
add_issue("#{link.href} is an invalid URL", line)
|
23
|
+
unless @link.valid?
|
24
|
+
add_issue("#{@link.href} is an invalid URL", line: line)
|
40
25
|
next
|
41
26
|
end
|
42
27
|
|
43
|
-
check_schemes(link, line)
|
28
|
+
check_schemes(@link, line)
|
44
29
|
|
45
|
-
# is there even
|
46
|
-
if
|
47
|
-
|
30
|
+
# is there even an href?
|
31
|
+
if missing_href?
|
32
|
+
# HTML5 allows dropping the href: http://git.io/vBX0z
|
33
|
+
next if @html.internal_subset.name == 'html' && @html.internal_subset.external_id.nil?
|
34
|
+
add_issue('anchor has no href attribute', line: line)
|
48
35
|
next
|
49
36
|
end
|
50
37
|
|
51
38
|
# intentionally here because we still want valid? & missing_href? to execute
|
52
|
-
next if link.non_http_remote?
|
53
|
-
|
39
|
+
next if @link.non_http_remote?
|
54
40
|
# does the file even exist?
|
55
|
-
if link.remote?
|
56
|
-
add_to_external_urls(link.href, line)
|
41
|
+
if @link.remote?
|
42
|
+
add_to_external_urls(@link.href, line)
|
57
43
|
next
|
58
|
-
elsif
|
59
|
-
add_issue("internally linking to #{link.href}, which does not exist", line)
|
44
|
+
elsif !@link.internal? && !@link.exists?
|
45
|
+
add_issue("internally linking to #{@link.href}, which does not exist", line: line)
|
60
46
|
end
|
61
47
|
|
62
48
|
# does the local directory have a trailing slash?
|
63
|
-
if link.unslashed_directory? link.absolute_path
|
64
|
-
add_issue("internally linking to a directory #{link.absolute_path} without trailing slash", line)
|
49
|
+
if @link.unslashed_directory? @link.absolute_path
|
50
|
+
add_issue("internally linking to a directory #{@link.absolute_path} without trailing slash", line: line)
|
65
51
|
next
|
66
52
|
end
|
67
53
|
|
68
54
|
# verify the target hash
|
69
|
-
handle_hash(link, line) if link.hash
|
55
|
+
handle_hash(@link, line) if @link.hash
|
70
56
|
end
|
71
57
|
|
72
58
|
external_urls
|
@@ -79,26 +65,27 @@ class LinkCheck < ::HTML::Proofer::CheckRunner
|
|
79
65
|
when 'tel'
|
80
66
|
handle_tel(link, line)
|
81
67
|
when 'http'
|
82
|
-
|
68
|
+
return unless @options[:enforce_https]
|
69
|
+
add_issue("#{link.href} is not an HTTPS link", line: line)
|
83
70
|
end
|
84
71
|
end
|
85
72
|
|
86
73
|
def handle_mailto(link, line)
|
87
74
|
if link.path.empty?
|
88
|
-
add_issue("#{link.href} contains no email address", line)
|
75
|
+
add_issue("#{link.href} contains no email address", line: line)
|
89
76
|
elsif !link.path.include?('@')
|
90
|
-
add_issue("#{link.href} contains an invalid email address", line)
|
77
|
+
add_issue("#{link.href} contains an invalid email address", line: line)
|
91
78
|
end
|
92
79
|
end
|
93
80
|
|
94
81
|
def handle_tel(link, line)
|
95
|
-
add_issue("#{link.href} contains no phone number", line) if link.path.empty?
|
82
|
+
add_issue("#{link.href} contains no phone number", line: line) if link.path.empty?
|
96
83
|
end
|
97
84
|
|
98
85
|
def handle_hash(link, line)
|
99
86
|
if link.internal?
|
100
87
|
unless hash_check @html, link.hash
|
101
|
-
add_issue("linking to internal hash ##{link.hash} that does not exist", line)
|
88
|
+
add_issue("linking to internal hash ##{link.hash} that does not exist", line: line)
|
102
89
|
end
|
103
90
|
elsif link.external?
|
104
91
|
external_link_check(link, line)
|
@@ -107,11 +94,11 @@ class LinkCheck < ::HTML::Proofer::CheckRunner
|
|
107
94
|
|
108
95
|
def external_link_check(link, line)
|
109
96
|
if !link.exists?
|
110
|
-
add_issue("trying to find hash of #{link.href}, but #{link.absolute_path} does not exist", line)
|
97
|
+
add_issue("trying to find hash of #{link.href}, but #{link.absolute_path} does not exist", line: line)
|
111
98
|
else
|
112
99
|
target_html = create_nokogiri link.absolute_path
|
113
100
|
unless hash_check target_html, link.hash
|
114
|
-
add_issue("linking to #{link.href}, but #{link.hash} does not exist", line)
|
101
|
+
add_issue("linking to #{link.href}, but #{link.hash} does not exist", line: line)
|
115
102
|
end
|
116
103
|
end
|
117
104
|
end
|
@@ -122,7 +109,12 @@ class LinkCheck < ::HTML::Proofer::CheckRunner
|
|
122
109
|
"//*[case_insensitive_equals(@name, '#{href_hash}')]", \
|
123
110
|
"//*[case_insensitive_equals(@id, '#{decoded_href_hash}')]", \
|
124
111
|
"//*[case_insensitive_equals(@name, '#{decoded_href_hash}')]", \
|
125
|
-
|
112
|
+
XpathFunctions.new).length > 0
|
126
113
|
end
|
127
114
|
|
115
|
+
class XpathFunctions
|
116
|
+
def case_insensitive_equals(node_set, str_to_match)
|
117
|
+
node_set.find_all {|node| node.to_s.downcase == str_to_match.to_s.downcase }
|
118
|
+
end
|
119
|
+
end
|
128
120
|
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
class ScriptCheck < ::HTMLProofer::Check
|
2
|
+
attr_reader :src
|
3
|
+
|
4
|
+
def missing_src?
|
5
|
+
!@script.src
|
6
|
+
end
|
7
|
+
|
8
|
+
def run
|
9
|
+
@html.css('script').each do |node|
|
10
|
+
@script = create_element(node)
|
11
|
+
line = node.line
|
12
|
+
|
13
|
+
next if @script.ignore?
|
14
|
+
next unless node.text.strip.empty?
|
15
|
+
|
16
|
+
# does the script exist?
|
17
|
+
if missing_src?
|
18
|
+
add_issue('script is empty and has no src attribute', line: line)
|
19
|
+
elsif @script.remote?
|
20
|
+
add_to_external_urls(@script.src, line)
|
21
|
+
elsif !@script.exists?
|
22
|
+
add_issue("internal script #{@script.src} does not exist", line: line)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
external_urls
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
module HTMLProofer
|
2
|
+
module Configuration
|
3
|
+
require_relative 'version'
|
4
|
+
|
5
|
+
PROOFER_DEFAULTS = {
|
6
|
+
:allow_hash_href => false,
|
7
|
+
:alt_ignore => [],
|
8
|
+
:assume_extension => false,
|
9
|
+
:check_external_hash => false,
|
10
|
+
:check_favicon => false,
|
11
|
+
:check_html => false,
|
12
|
+
:checks_to_ignore => [],
|
13
|
+
:directory_index_file => 'index.html',
|
14
|
+
:disable_external => false,
|
15
|
+
:empty_alt_ignore => false,
|
16
|
+
:enforce_https => false,
|
17
|
+
:error_sort => :path,
|
18
|
+
:extension => '.html',
|
19
|
+
:external_only => false,
|
20
|
+
:file_ignore => [],
|
21
|
+
:http_status_ignore => [],
|
22
|
+
:log_level => :info,
|
23
|
+
:only_4xx => false,
|
24
|
+
:url_ignore => [],
|
25
|
+
:url_swap => []
|
26
|
+
}
|
27
|
+
|
28
|
+
TYPHOEUS_DEFAULTS = {
|
29
|
+
:followlocation => true,
|
30
|
+
:headers => {
|
31
|
+
'User-Agent' => "Mozilla/5.0 (compatible; HTML Proofer/#{HTMLProofer::VERSION}; +https://github.com/gjtorikian/html-proofer)"
|
32
|
+
}
|
33
|
+
}
|
34
|
+
|
35
|
+
HYDRA_DEFAULTS = {
|
36
|
+
:max_concurrency => 50
|
37
|
+
}
|
38
|
+
|
39
|
+
PARALLEL_DEFAULTS = {}
|
40
|
+
|
41
|
+
VALIDATION_DEFAULTS = {
|
42
|
+
:report_script_embeds => false,
|
43
|
+
:report_invalid_tags => false
|
44
|
+
}
|
45
|
+
|
46
|
+
CACHE_DEFAULTS = {}
|
47
|
+
|
48
|
+
def self.to_regex?(item)
|
49
|
+
if item.start_with?('/') && item.end_with?('/')
|
50
|
+
Regexp.new item[1...-1]
|
51
|
+
else
|
52
|
+
item
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,165 @@
|
|
1
|
+
require 'addressable/uri'
|
2
|
+
require_relative './utils'
|
3
|
+
|
4
|
+
module HTMLProofer
|
5
|
+
# Represents the element currently being processed
|
6
|
+
class Element
|
7
|
+
include HTMLProofer::Utils
|
8
|
+
|
9
|
+
attr_reader :id, :name, :alt, :href, :link, :src, :line
|
10
|
+
|
11
|
+
def initialize(obj, check)
|
12
|
+
# Contruct readable ivars for every element
|
13
|
+
obj.attributes.each_pair do |attribute, value|
|
14
|
+
name = "#{attribute.tr('-:.', '_')}".to_sym
|
15
|
+
(class << self; self; end).send(:attr_reader, name)
|
16
|
+
instance_variable_set("@#{name}", value.value)
|
17
|
+
end
|
18
|
+
|
19
|
+
@text = obj.content
|
20
|
+
@check = check
|
21
|
+
@checked_paths = {}
|
22
|
+
@type = check.class.name
|
23
|
+
@line = obj.line
|
24
|
+
|
25
|
+
# fix up missing protocols
|
26
|
+
@href.insert 0, 'http:' if @href =~ %r{^//}
|
27
|
+
@src.insert 0, 'http:' if @src =~ %r{^//}
|
28
|
+
end
|
29
|
+
|
30
|
+
def url
|
31
|
+
url = @src || @srcset || @href || ''
|
32
|
+
return url if @check.options[:url_swap].empty?
|
33
|
+
swap(url, @check.options[:url_swap])
|
34
|
+
end
|
35
|
+
|
36
|
+
def valid?
|
37
|
+
!parts.nil?
|
38
|
+
end
|
39
|
+
|
40
|
+
def parts
|
41
|
+
@parts ||= Addressable::URI.parse url
|
42
|
+
rescue URI::Error, Addressable::URI::InvalidURIError
|
43
|
+
@parts = nil
|
44
|
+
end
|
45
|
+
|
46
|
+
def path
|
47
|
+
Addressable::URI.unencode parts.path unless parts.nil?
|
48
|
+
end
|
49
|
+
|
50
|
+
def hash
|
51
|
+
parts.fragment unless parts.nil?
|
52
|
+
end
|
53
|
+
|
54
|
+
def scheme
|
55
|
+
parts.scheme unless parts.nil?
|
56
|
+
end
|
57
|
+
|
58
|
+
# path is to an external server
|
59
|
+
def remote?
|
60
|
+
%w( http https ).include? scheme
|
61
|
+
end
|
62
|
+
|
63
|
+
def non_http_remote?
|
64
|
+
!scheme.nil? && !remote?
|
65
|
+
end
|
66
|
+
|
67
|
+
def ignore?
|
68
|
+
return true if @data_proofer_ignore
|
69
|
+
|
70
|
+
return true if url.match(/^javascript:/)
|
71
|
+
|
72
|
+
# ignore base64 encoded images
|
73
|
+
if %w(ImageCheck FaviconCheck).include? @type
|
74
|
+
return true if url.match(/^data:image/)
|
75
|
+
end
|
76
|
+
|
77
|
+
# ignore user defined URLs
|
78
|
+
return true if ignores_pattern_check(@check.options[:url_ignore])
|
79
|
+
|
80
|
+
# ignore user defined alts
|
81
|
+
return false unless 'ImageCheck' == @type
|
82
|
+
return true if ignores_pattern_check(@check.options[:alt_ignore])
|
83
|
+
end
|
84
|
+
|
85
|
+
def ignore_empty_alt?
|
86
|
+
@check.options[:empty_alt_ignore]
|
87
|
+
end
|
88
|
+
|
89
|
+
def allow_hash_href?
|
90
|
+
@check.options[:allow_hash_href]
|
91
|
+
end
|
92
|
+
|
93
|
+
# path is external to the file
|
94
|
+
def external?
|
95
|
+
!internal?
|
96
|
+
end
|
97
|
+
|
98
|
+
# path is an anchor or a query
|
99
|
+
def internal?
|
100
|
+
url.start_with? '#', '?'
|
101
|
+
end
|
102
|
+
|
103
|
+
def file_path
|
104
|
+
return if path.nil?
|
105
|
+
|
106
|
+
path_dot_ext = ''
|
107
|
+
|
108
|
+
if @check.options[:assume_extension]
|
109
|
+
path_dot_ext = path + @check.options[:extension]
|
110
|
+
end
|
111
|
+
|
112
|
+
if path =~ %r{^/} # path relative to root
|
113
|
+
base = File.directory?(@check.src) ? @check.src : File.dirname(@check.src)
|
114
|
+
elsif File.exist?(File.expand_path(path, @check.src)) || File.exist?(File.expand_path(path_dot_ext, @check.src)) # relative links, path is a file
|
115
|
+
base = File.dirname @check.path
|
116
|
+
elsif File.exist?(File.join(File.dirname(@check.path), path)) || File.exist?(File.join(File.dirname(@check.path), path_dot_ext)) # relative links in nested dir, path is a file
|
117
|
+
base = File.dirname @check.path
|
118
|
+
else # relative link, path is a directory
|
119
|
+
base = @check.path
|
120
|
+
end
|
121
|
+
|
122
|
+
file = File.join base, path
|
123
|
+
|
124
|
+
# implicit index support
|
125
|
+
if File.directory?(file) && !unslashed_directory?(file)
|
126
|
+
file = File.join file, @check.options[:directory_index_file]
|
127
|
+
elsif @check.options[:assume_extension] && File.file?("#{file}#{@check.options[:extension]}")
|
128
|
+
file = "#{file}#{@check.options[:extension]}"
|
129
|
+
end
|
130
|
+
|
131
|
+
file
|
132
|
+
end
|
133
|
+
|
134
|
+
# checks if a file exists relative to the current pwd
|
135
|
+
def exists?
|
136
|
+
return @checked_paths[absolute_path] if @checked_paths.key? absolute_path
|
137
|
+
@checked_paths[absolute_path] = File.exist? absolute_path
|
138
|
+
end
|
139
|
+
|
140
|
+
def absolute_path
|
141
|
+
path = file_path || @check.path
|
142
|
+
File.expand_path path, Dir.pwd
|
143
|
+
end
|
144
|
+
|
145
|
+
def ignores_pattern_check(links)
|
146
|
+
links.each do |ignore|
|
147
|
+
if ignore.is_a? String
|
148
|
+
return true if ignore == url
|
149
|
+
elsif ignore.is_a? Regexp
|
150
|
+
return true if ignore =~ url
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
false
|
155
|
+
end
|
156
|
+
|
157
|
+
def unslashed_directory?(file)
|
158
|
+
File.directory?(file) && !file.end_with?(File::SEPARATOR) && !follow_location?
|
159
|
+
end
|
160
|
+
|
161
|
+
def follow_location?
|
162
|
+
@check.options[:typhoeus] && @check.options[:typhoeus][:followlocation]
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
@@ -1,18 +1,16 @@
|
|
1
|
-
|
2
|
-
class HTML::Proofer::CheckRunner
|
3
|
-
|
1
|
+
module HTMLProofer
|
4
2
|
class Issue
|
5
|
-
attr_reader :path, :desc, :status, :
|
3
|
+
attr_reader :path, :desc, :status, :line
|
6
4
|
|
7
|
-
def initialize(path, desc,
|
8
|
-
@
|
5
|
+
def initialize(path, desc, line: nil, status: -1)
|
6
|
+
@line = line.nil? ? '' : " (line #{line})"
|
9
7
|
@path = path
|
10
8
|
@desc = desc
|
11
9
|
@status = status
|
12
10
|
end
|
13
11
|
|
14
12
|
def to_s
|
15
|
-
"#{@path}: #{@desc}#{@
|
13
|
+
"#{@path}: #{@desc}#{@line}"
|
16
14
|
end
|
17
15
|
end
|
18
16
|
|
@@ -48,13 +46,13 @@ class HTML::Proofer::CheckRunner
|
|
48
46
|
|
49
47
|
sorted_issues.each do |issue|
|
50
48
|
if matcher != issue.send(first_report)
|
51
|
-
@logger.log :error,
|
49
|
+
@logger.log :error, "- #{issue.send(first_report)}"
|
52
50
|
matcher = issue.send(first_report)
|
53
51
|
end
|
54
52
|
if first_report == :status
|
55
|
-
@logger.log :error,
|
53
|
+
@logger.log :error, " * #{issue}"
|
56
54
|
else
|
57
|
-
@logger.log :error,
|
55
|
+
@logger.log :error, " * #{issue.send(second_report)}#{issue.line}"
|
58
56
|
end
|
59
57
|
end
|
60
58
|
end
|