html-proofer 2.6.4 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/{htmlproof → htmlproofer} +31 -24
- data/lib/html-proofer.rb +47 -0
- data/lib/html-proofer/cache.rb +153 -0
- data/lib/html-proofer/check.rb +63 -0
- data/lib/{html/proofer/checks → html-proofer/check}/favicon.rb +2 -8
- data/lib/html-proofer/check/html.rb +21 -0
- data/lib/html-proofer/check/images.rb +47 -0
- data/lib/{html/proofer/checks → html-proofer/check}/links.rb +40 -48
- data/lib/html-proofer/check/scripts.rb +28 -0
- data/lib/html-proofer/configuration.rb +56 -0
- data/lib/html-proofer/element.rb +165 -0
- data/lib/{html/proofer/check_runner → html-proofer}/issue.rb +8 -10
- data/lib/html-proofer/log.rb +49 -0
- data/lib/html-proofer/runner.rb +160 -0
- data/lib/html-proofer/url_validator.rb +218 -0
- data/lib/html-proofer/utils.rb +40 -0
- data/lib/html-proofer/version.rb +3 -0
- metadata +20 -20
- data/lib/html/proofer.rb +0 -191
- data/lib/html/proofer/cache.rb +0 -141
- data/lib/html/proofer/check_runner.rb +0 -70
- data/lib/html/proofer/checkable.rb +0 -168
- data/lib/html/proofer/checks/html.rb +0 -46
- data/lib/html/proofer/checks/images.rb +0 -54
- data/lib/html/proofer/checks/scripts.rb +0 -40
- data/lib/html/proofer/configuration.rb +0 -48
- data/lib/html/proofer/log.rb +0 -42
- data/lib/html/proofer/url_validator.rb +0 -222
- data/lib/html/proofer/utils.rb +0 -42
- data/lib/html/proofer/version.rb +0 -5
- data/lib/html/proofer/xpathfunctions.rb +0 -9
@@ -1,72 +1,58 @@
|
|
1
|
-
class
|
2
|
-
|
3
|
-
def href
|
4
|
-
real_attr @href
|
5
|
-
end
|
6
|
-
|
7
|
-
def id
|
8
|
-
real_attr @id
|
9
|
-
end
|
10
|
-
|
11
|
-
def name
|
12
|
-
real_attr @name
|
13
|
-
end
|
1
|
+
class LinkCheck < ::HTMLProofer::Check
|
2
|
+
include HTMLProofer::Utils
|
14
3
|
|
15
4
|
def missing_href?
|
16
|
-
href
|
5
|
+
blank?(@link.href) && blank?(@link.name) && blank?(@link.id)
|
17
6
|
end
|
18
7
|
|
19
8
|
def placeholder?
|
20
|
-
(id || name) && href.nil?
|
9
|
+
(!blank?(@link.id) || !blank?(@link.name)) && @link.href.nil?
|
21
10
|
end
|
22
|
-
end
|
23
|
-
|
24
|
-
class LinkCheck < ::HTML::Proofer::CheckRunner
|
25
|
-
include HTML::Proofer::Utils
|
26
11
|
|
27
12
|
def run
|
28
13
|
@html.css('a, link').each do |node|
|
29
|
-
link =
|
30
|
-
line = node.line
|
14
|
+
@link = create_element(node)
|
15
|
+
line = @node.line
|
16
|
+
|
17
|
+
next if @link.ignore?
|
31
18
|
|
32
|
-
next if
|
33
|
-
next if link.
|
34
|
-
next if link.placeholder?
|
35
|
-
next if link.allow_hash_href? && link.href == '#'
|
19
|
+
next if placeholder?
|
20
|
+
next if @link.allow_hash_href? && @link.href == '#'
|
36
21
|
|
37
22
|
# is it even a valid URL?
|
38
|
-
unless link.valid?
|
39
|
-
add_issue("#{link.href} is an invalid URL", line)
|
23
|
+
unless @link.valid?
|
24
|
+
add_issue("#{@link.href} is an invalid URL", line: line)
|
40
25
|
next
|
41
26
|
end
|
42
27
|
|
43
|
-
check_schemes(link, line)
|
28
|
+
check_schemes(@link, line)
|
44
29
|
|
45
|
-
# is there even
|
46
|
-
if
|
47
|
-
|
30
|
+
# is there even an href?
|
31
|
+
if missing_href?
|
32
|
+
# HTML5 allows dropping the href: http://git.io/vBX0z
|
33
|
+
next if @html.internal_subset.name == 'html' && @html.internal_subset.external_id.nil?
|
34
|
+
add_issue('anchor has no href attribute', line: line)
|
48
35
|
next
|
49
36
|
end
|
50
37
|
|
51
38
|
# intentionally here because we still want valid? & missing_href? to execute
|
52
|
-
next if link.non_http_remote?
|
53
|
-
|
39
|
+
next if @link.non_http_remote?
|
54
40
|
# does the file even exist?
|
55
|
-
if link.remote?
|
56
|
-
add_to_external_urls(link.href, line)
|
41
|
+
if @link.remote?
|
42
|
+
add_to_external_urls(@link.href, line)
|
57
43
|
next
|
58
|
-
elsif
|
59
|
-
add_issue("internally linking to #{link.href}, which does not exist", line)
|
44
|
+
elsif !@link.internal? && !@link.exists?
|
45
|
+
add_issue("internally linking to #{@link.href}, which does not exist", line: line)
|
60
46
|
end
|
61
47
|
|
62
48
|
# does the local directory have a trailing slash?
|
63
|
-
if link.unslashed_directory? link.absolute_path
|
64
|
-
add_issue("internally linking to a directory #{link.absolute_path} without trailing slash", line)
|
49
|
+
if @link.unslashed_directory? @link.absolute_path
|
50
|
+
add_issue("internally linking to a directory #{@link.absolute_path} without trailing slash", line: line)
|
65
51
|
next
|
66
52
|
end
|
67
53
|
|
68
54
|
# verify the target hash
|
69
|
-
handle_hash(link, line) if link.hash
|
55
|
+
handle_hash(@link, line) if @link.hash
|
70
56
|
end
|
71
57
|
|
72
58
|
external_urls
|
@@ -79,26 +65,27 @@ class LinkCheck < ::HTML::Proofer::CheckRunner
|
|
79
65
|
when 'tel'
|
80
66
|
handle_tel(link, line)
|
81
67
|
when 'http'
|
82
|
-
|
68
|
+
return unless @options[:enforce_https]
|
69
|
+
add_issue("#{link.href} is not an HTTPS link", line: line)
|
83
70
|
end
|
84
71
|
end
|
85
72
|
|
86
73
|
def handle_mailto(link, line)
|
87
74
|
if link.path.empty?
|
88
|
-
add_issue("#{link.href} contains no email address", line)
|
75
|
+
add_issue("#{link.href} contains no email address", line: line)
|
89
76
|
elsif !link.path.include?('@')
|
90
|
-
add_issue("#{link.href} contains an invalid email address", line)
|
77
|
+
add_issue("#{link.href} contains an invalid email address", line: line)
|
91
78
|
end
|
92
79
|
end
|
93
80
|
|
94
81
|
def handle_tel(link, line)
|
95
|
-
add_issue("#{link.href} contains no phone number", line) if link.path.empty?
|
82
|
+
add_issue("#{link.href} contains no phone number", line: line) if link.path.empty?
|
96
83
|
end
|
97
84
|
|
98
85
|
def handle_hash(link, line)
|
99
86
|
if link.internal?
|
100
87
|
unless hash_check @html, link.hash
|
101
|
-
add_issue("linking to internal hash ##{link.hash} that does not exist", line)
|
88
|
+
add_issue("linking to internal hash ##{link.hash} that does not exist", line: line)
|
102
89
|
end
|
103
90
|
elsif link.external?
|
104
91
|
external_link_check(link, line)
|
@@ -107,11 +94,11 @@ class LinkCheck < ::HTML::Proofer::CheckRunner
|
|
107
94
|
|
108
95
|
def external_link_check(link, line)
|
109
96
|
if !link.exists?
|
110
|
-
add_issue("trying to find hash of #{link.href}, but #{link.absolute_path} does not exist", line)
|
97
|
+
add_issue("trying to find hash of #{link.href}, but #{link.absolute_path} does not exist", line: line)
|
111
98
|
else
|
112
99
|
target_html = create_nokogiri link.absolute_path
|
113
100
|
unless hash_check target_html, link.hash
|
114
|
-
add_issue("linking to #{link.href}, but #{link.hash} does not exist", line)
|
101
|
+
add_issue("linking to #{link.href}, but #{link.hash} does not exist", line: line)
|
115
102
|
end
|
116
103
|
end
|
117
104
|
end
|
@@ -122,7 +109,12 @@ class LinkCheck < ::HTML::Proofer::CheckRunner
|
|
122
109
|
"//*[case_insensitive_equals(@name, '#{href_hash}')]", \
|
123
110
|
"//*[case_insensitive_equals(@id, '#{decoded_href_hash}')]", \
|
124
111
|
"//*[case_insensitive_equals(@name, '#{decoded_href_hash}')]", \
|
125
|
-
|
112
|
+
XpathFunctions.new).length > 0
|
126
113
|
end
|
127
114
|
|
115
|
+
class XpathFunctions
|
116
|
+
def case_insensitive_equals(node_set, str_to_match)
|
117
|
+
node_set.find_all {|node| node.to_s.downcase == str_to_match.to_s.downcase }
|
118
|
+
end
|
119
|
+
end
|
128
120
|
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
class ScriptCheck < ::HTMLProofer::Check
|
2
|
+
attr_reader :src
|
3
|
+
|
4
|
+
def missing_src?
|
5
|
+
!@script.src
|
6
|
+
end
|
7
|
+
|
8
|
+
def run
|
9
|
+
@html.css('script').each do |node|
|
10
|
+
@script = create_element(node)
|
11
|
+
line = node.line
|
12
|
+
|
13
|
+
next if @script.ignore?
|
14
|
+
next unless node.text.strip.empty?
|
15
|
+
|
16
|
+
# does the script exist?
|
17
|
+
if missing_src?
|
18
|
+
add_issue('script is empty and has no src attribute', line: line)
|
19
|
+
elsif @script.remote?
|
20
|
+
add_to_external_urls(@script.src, line)
|
21
|
+
elsif !@script.exists?
|
22
|
+
add_issue("internal script #{@script.src} does not exist", line: line)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
external_urls
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
module HTMLProofer
|
2
|
+
module Configuration
|
3
|
+
require_relative 'version'
|
4
|
+
|
5
|
+
PROOFER_DEFAULTS = {
|
6
|
+
:allow_hash_href => false,
|
7
|
+
:alt_ignore => [],
|
8
|
+
:assume_extension => false,
|
9
|
+
:check_external_hash => false,
|
10
|
+
:check_favicon => false,
|
11
|
+
:check_html => false,
|
12
|
+
:checks_to_ignore => [],
|
13
|
+
:directory_index_file => 'index.html',
|
14
|
+
:disable_external => false,
|
15
|
+
:empty_alt_ignore => false,
|
16
|
+
:enforce_https => false,
|
17
|
+
:error_sort => :path,
|
18
|
+
:extension => '.html',
|
19
|
+
:external_only => false,
|
20
|
+
:file_ignore => [],
|
21
|
+
:http_status_ignore => [],
|
22
|
+
:log_level => :info,
|
23
|
+
:only_4xx => false,
|
24
|
+
:url_ignore => [],
|
25
|
+
:url_swap => []
|
26
|
+
}
|
27
|
+
|
28
|
+
TYPHOEUS_DEFAULTS = {
|
29
|
+
:followlocation => true,
|
30
|
+
:headers => {
|
31
|
+
'User-Agent' => "Mozilla/5.0 (compatible; HTML Proofer/#{HTMLProofer::VERSION}; +https://github.com/gjtorikian/html-proofer)"
|
32
|
+
}
|
33
|
+
}
|
34
|
+
|
35
|
+
HYDRA_DEFAULTS = {
|
36
|
+
:max_concurrency => 50
|
37
|
+
}
|
38
|
+
|
39
|
+
PARALLEL_DEFAULTS = {}
|
40
|
+
|
41
|
+
VALIDATION_DEFAULTS = {
|
42
|
+
:report_script_embeds => false,
|
43
|
+
:report_invalid_tags => false
|
44
|
+
}
|
45
|
+
|
46
|
+
CACHE_DEFAULTS = {}
|
47
|
+
|
48
|
+
def self.to_regex?(item)
|
49
|
+
if item.start_with?('/') && item.end_with?('/')
|
50
|
+
Regexp.new item[1...-1]
|
51
|
+
else
|
52
|
+
item
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,165 @@
|
|
1
|
+
require 'addressable/uri'
|
2
|
+
require_relative './utils'
|
3
|
+
|
4
|
+
module HTMLProofer
|
5
|
+
# Represents the element currently being processed
|
6
|
+
class Element
|
7
|
+
include HTMLProofer::Utils
|
8
|
+
|
9
|
+
attr_reader :id, :name, :alt, :href, :link, :src, :line
|
10
|
+
|
11
|
+
def initialize(obj, check)
|
12
|
+
# Contruct readable ivars for every element
|
13
|
+
obj.attributes.each_pair do |attribute, value|
|
14
|
+
name = "#{attribute.tr('-:.', '_')}".to_sym
|
15
|
+
(class << self; self; end).send(:attr_reader, name)
|
16
|
+
instance_variable_set("@#{name}", value.value)
|
17
|
+
end
|
18
|
+
|
19
|
+
@text = obj.content
|
20
|
+
@check = check
|
21
|
+
@checked_paths = {}
|
22
|
+
@type = check.class.name
|
23
|
+
@line = obj.line
|
24
|
+
|
25
|
+
# fix up missing protocols
|
26
|
+
@href.insert 0, 'http:' if @href =~ %r{^//}
|
27
|
+
@src.insert 0, 'http:' if @src =~ %r{^//}
|
28
|
+
end
|
29
|
+
|
30
|
+
def url
|
31
|
+
url = @src || @srcset || @href || ''
|
32
|
+
return url if @check.options[:url_swap].empty?
|
33
|
+
swap(url, @check.options[:url_swap])
|
34
|
+
end
|
35
|
+
|
36
|
+
def valid?
|
37
|
+
!parts.nil?
|
38
|
+
end
|
39
|
+
|
40
|
+
def parts
|
41
|
+
@parts ||= Addressable::URI.parse url
|
42
|
+
rescue URI::Error, Addressable::URI::InvalidURIError
|
43
|
+
@parts = nil
|
44
|
+
end
|
45
|
+
|
46
|
+
def path
|
47
|
+
Addressable::URI.unencode parts.path unless parts.nil?
|
48
|
+
end
|
49
|
+
|
50
|
+
def hash
|
51
|
+
parts.fragment unless parts.nil?
|
52
|
+
end
|
53
|
+
|
54
|
+
def scheme
|
55
|
+
parts.scheme unless parts.nil?
|
56
|
+
end
|
57
|
+
|
58
|
+
# path is to an external server
|
59
|
+
def remote?
|
60
|
+
%w( http https ).include? scheme
|
61
|
+
end
|
62
|
+
|
63
|
+
def non_http_remote?
|
64
|
+
!scheme.nil? && !remote?
|
65
|
+
end
|
66
|
+
|
67
|
+
def ignore?
|
68
|
+
return true if @data_proofer_ignore
|
69
|
+
|
70
|
+
return true if url.match(/^javascript:/)
|
71
|
+
|
72
|
+
# ignore base64 encoded images
|
73
|
+
if %w(ImageCheck FaviconCheck).include? @type
|
74
|
+
return true if url.match(/^data:image/)
|
75
|
+
end
|
76
|
+
|
77
|
+
# ignore user defined URLs
|
78
|
+
return true if ignores_pattern_check(@check.options[:url_ignore])
|
79
|
+
|
80
|
+
# ignore user defined alts
|
81
|
+
return false unless 'ImageCheck' == @type
|
82
|
+
return true if ignores_pattern_check(@check.options[:alt_ignore])
|
83
|
+
end
|
84
|
+
|
85
|
+
def ignore_empty_alt?
|
86
|
+
@check.options[:empty_alt_ignore]
|
87
|
+
end
|
88
|
+
|
89
|
+
def allow_hash_href?
|
90
|
+
@check.options[:allow_hash_href]
|
91
|
+
end
|
92
|
+
|
93
|
+
# path is external to the file
|
94
|
+
def external?
|
95
|
+
!internal?
|
96
|
+
end
|
97
|
+
|
98
|
+
# path is an anchor or a query
|
99
|
+
def internal?
|
100
|
+
url.start_with? '#', '?'
|
101
|
+
end
|
102
|
+
|
103
|
+
def file_path
|
104
|
+
return if path.nil?
|
105
|
+
|
106
|
+
path_dot_ext = ''
|
107
|
+
|
108
|
+
if @check.options[:assume_extension]
|
109
|
+
path_dot_ext = path + @check.options[:extension]
|
110
|
+
end
|
111
|
+
|
112
|
+
if path =~ %r{^/} # path relative to root
|
113
|
+
base = File.directory?(@check.src) ? @check.src : File.dirname(@check.src)
|
114
|
+
elsif File.exist?(File.expand_path(path, @check.src)) || File.exist?(File.expand_path(path_dot_ext, @check.src)) # relative links, path is a file
|
115
|
+
base = File.dirname @check.path
|
116
|
+
elsif File.exist?(File.join(File.dirname(@check.path), path)) || File.exist?(File.join(File.dirname(@check.path), path_dot_ext)) # relative links in nested dir, path is a file
|
117
|
+
base = File.dirname @check.path
|
118
|
+
else # relative link, path is a directory
|
119
|
+
base = @check.path
|
120
|
+
end
|
121
|
+
|
122
|
+
file = File.join base, path
|
123
|
+
|
124
|
+
# implicit index support
|
125
|
+
if File.directory?(file) && !unslashed_directory?(file)
|
126
|
+
file = File.join file, @check.options[:directory_index_file]
|
127
|
+
elsif @check.options[:assume_extension] && File.file?("#{file}#{@check.options[:extension]}")
|
128
|
+
file = "#{file}#{@check.options[:extension]}"
|
129
|
+
end
|
130
|
+
|
131
|
+
file
|
132
|
+
end
|
133
|
+
|
134
|
+
# checks if a file exists relative to the current pwd
|
135
|
+
def exists?
|
136
|
+
return @checked_paths[absolute_path] if @checked_paths.key? absolute_path
|
137
|
+
@checked_paths[absolute_path] = File.exist? absolute_path
|
138
|
+
end
|
139
|
+
|
140
|
+
def absolute_path
|
141
|
+
path = file_path || @check.path
|
142
|
+
File.expand_path path, Dir.pwd
|
143
|
+
end
|
144
|
+
|
145
|
+
def ignores_pattern_check(links)
|
146
|
+
links.each do |ignore|
|
147
|
+
if ignore.is_a? String
|
148
|
+
return true if ignore == url
|
149
|
+
elsif ignore.is_a? Regexp
|
150
|
+
return true if ignore =~ url
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
false
|
155
|
+
end
|
156
|
+
|
157
|
+
def unslashed_directory?(file)
|
158
|
+
File.directory?(file) && !file.end_with?(File::SEPARATOR) && !follow_location?
|
159
|
+
end
|
160
|
+
|
161
|
+
def follow_location?
|
162
|
+
@check.options[:typhoeus] && @check.options[:typhoeus][:followlocation]
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
@@ -1,18 +1,16 @@
|
|
1
|
-
|
2
|
-
class HTML::Proofer::CheckRunner
|
3
|
-
|
1
|
+
module HTMLProofer
|
4
2
|
class Issue
|
5
|
-
attr_reader :path, :desc, :status, :
|
3
|
+
attr_reader :path, :desc, :status, :line
|
6
4
|
|
7
|
-
def initialize(path, desc,
|
8
|
-
@
|
5
|
+
def initialize(path, desc, line: nil, status: -1)
|
6
|
+
@line = line.nil? ? '' : " (line #{line})"
|
9
7
|
@path = path
|
10
8
|
@desc = desc
|
11
9
|
@status = status
|
12
10
|
end
|
13
11
|
|
14
12
|
def to_s
|
15
|
-
"#{@path}: #{@desc}#{@
|
13
|
+
"#{@path}: #{@desc}#{@line}"
|
16
14
|
end
|
17
15
|
end
|
18
16
|
|
@@ -48,13 +46,13 @@ class HTML::Proofer::CheckRunner
|
|
48
46
|
|
49
47
|
sorted_issues.each do |issue|
|
50
48
|
if matcher != issue.send(first_report)
|
51
|
-
@logger.log :error,
|
49
|
+
@logger.log :error, "- #{issue.send(first_report)}"
|
52
50
|
matcher = issue.send(first_report)
|
53
51
|
end
|
54
52
|
if first_report == :status
|
55
|
-
@logger.log :error,
|
53
|
+
@logger.log :error, " * #{issue}"
|
56
54
|
else
|
57
|
-
@logger.log :error,
|
55
|
+
@logger.log :error, " * #{issue.send(second_report)}#{issue.line}"
|
58
56
|
end
|
59
57
|
end
|
60
58
|
end
|