html-proofer 1.4.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +8 -2
- data/bin/htmlproof +3 -1
- data/html-proofer.gemspec +4 -2
- data/lib/html/proofer.rb +66 -10
- data/lib/html/proofer/check.rb +1 -1
- data/lib/html/proofer/checkable.rb +6 -6
- data/lib/html/proofer/checks.rb +2 -1
- data/lib/html/proofer/checks/favicon.rb +0 -2
- data/lib/html/proofer/checks/html.rb +24 -0
- data/lib/html/proofer/checks/links.rb +12 -0
- data/lib/html/proofer/issue.rb +1 -1
- data/lib/html/proofer/version.rb +5 -0
- data/spec/html/proofer/favicon_spec.rb +15 -14
- data/spec/html/proofer/fixtures/html/div_inside_head.html +6 -0
- data/spec/html/proofer/fixtures/html/html5_tags.html +9 -0
- data/spec/html/proofer/fixtures/html/invalid_tag.html +3 -0
- data/spec/html/proofer/fixtures/html/missing_closing_quotes.html +5 -0
- data/spec/html/proofer/fixtures/html/opening_and_ending_tag_mismatch.html +7 -0
- data/spec/html/proofer/fixtures/html/unescaped_ampersand_in_attribute.html +4 -0
- data/spec/html/proofer/fixtures/html/unmatched_end_tag.html +5 -0
- data/spec/html/proofer/fixtures/links/brokenHashOnTheWeb.html +2 -0
- data/spec/html/proofer/fixtures/links/githubHash.html +1 -0
- data/spec/html/proofer/fixtures/links/non_standard_characters.html +10 -0
- data/spec/html/proofer/fixtures/links/other_protocols.html +4 -0
- data/spec/html/proofer/fixtures/sorting/issue/broken_image_one.html +1 -0
- data/spec/html/proofer/fixtures/sorting/issue/broken_image_two.html +4 -0
- data/spec/html/proofer/fixtures/sorting/path/multiple_issues.html +11 -0
- data/spec/html/proofer/fixtures/sorting/path/single_issue.html +1 -0
- data/spec/html/proofer/fixtures/sorting/status/a_404.html +1 -0
- data/spec/html/proofer/fixtures/sorting/status/broken_link.html +3 -0
- data/spec/html/proofer/fixtures/sorting/status/missing_redirect.html +1 -0
- data/spec/html/proofer/html_spec.rb +51 -0
- data/spec/html/proofer/images_spec.rb +33 -33
- data/spec/html/proofer/links_spec.rb +115 -86
- data/spec/html/proofer/scripts_spec.rb +12 -12
- data/spec/html/proofer_spec.rb +50 -9
- data/spec/spec_helper.rb +13 -1
- metadata +58 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 892dbf1a283cd5c3bf33d9a76430371d035f3abe
|
4
|
+
data.tar.gz: e4b6498ca79d5ad258f5763c005f039599304434
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fc79438fbab4a5e7d7e9c9f318d3cfb71e315e6028a1b9b0e1934babb6a4aa046930ff830cf193baffec171785421d28fdbfb5470b6e8781a6ecaaed4eecfece
|
7
|
+
data.tar.gz: 45ff2420a45ebda37369dfcf713f4b9aa89ba3e8f4278c705345fa83915cf6fe5d76cf04b1a80b102fb4123f9c3ba365eceb4f0a808f66011f4f6665fc15807c
|
data/README.md
CHANGED
@@ -122,6 +122,11 @@ Project | Repository
|
|
122
122
|
* Whether your internal script references are not broken
|
123
123
|
* Whether external scripts are loading
|
124
124
|
|
125
|
+
### HTML
|
126
|
+
|
127
|
+
Nokogiri looks at the markup and [provides errors](http://www.nokogiri.org/tutorials/ensuring_well_formed_markup.html) when parsing your document.
|
128
|
+
This is an optional feature, set the `validate_html` option to enable validation errors from Nokogiri.
|
129
|
+
|
125
130
|
## Configuration
|
126
131
|
|
127
132
|
The `HTML::Proofer` constructor takes an optional hash of additional options:
|
@@ -133,11 +138,12 @@ The `HTML::Proofer` constructor takes an optional hash of additional options:
|
|
133
138
|
| `favicon` | Enables the favicon checker. | `false` |
|
134
139
|
| `followlocation` | Follows external redirections. Amends missing trailing slashes to internal directories. | `true` |
|
135
140
|
| `directory_index_file` | Sets the file to look for when a link refers to a directory. | `index.html` |
|
136
|
-
| `href_ignore` | An array of Strings or RegExps containing `href`s that are safe to ignore.
|
141
|
+
| `href_ignore` | An array of Strings or RegExps containing `href`s that are safe to ignore. Note that non-HTTP(S) URIs are always ignored. | `[]` |
|
137
142
|
| `alt_ignore` | An array of Strings or RegExps containing `img`s whose missing `alt` tags are safe to ignore. | `[]` |
|
138
143
|
| `href_swap` | A hash containing key-value pairs of `RegExp => String`. It transforms links that match `RegExp` into `String` via `gsub`. | `{}` |
|
139
144
|
| `verbose` | If `true`, outputs extra information as the checking happens. Useful for debugging. | `false` |
|
140
145
|
| `only_4xx` | Only reports errors for links that fall within the 4xx status code range. | `false` |
|
146
|
+
| `validate_html` | Enables HTML validation errors from Nokogiri | `false` |
|
141
147
|
|
142
148
|
### Configuring Typhoeus
|
143
149
|
|
@@ -181,7 +187,7 @@ Add the `data-proofer-ignore` attribute to any tag to ignore it from the checks.
|
|
181
187
|
|
182
188
|
Want to write your own test? Sure! Just create two classes--one that inherits from `HTML::Proofer::Checkable`, and another that inherits from `HTML::Proofer::Checks::Check`. `Checkable` defines various helper methods for your test, while `Checks::Check` actually runs across your content. `Checks::Check` should call `self.add_issue` on failures, to add them to the list.
|
183
189
|
|
184
|
-
Here's an example custom test that protects against `mailto` links
|
190
|
+
Here's an example custom test that protects against `mailto` links that point to `octocat@github.com`:
|
185
191
|
|
186
192
|
``` ruby
|
187
193
|
class OctocatLink < ::HTML::Proofer::Checkable
|
data/bin/htmlproof
CHANGED
@@ -18,12 +18,13 @@ Mercenary.program(:htmlproof) do |p|
|
|
18
18
|
p.option 'favicon', '--favicon', 'Enables the favicon checker (default: `false`).'
|
19
19
|
p.option 'as-links', '--as-links', 'Assumes that `PATH` is a comma-separated array of links to check.'
|
20
20
|
p.option 'swap', '--swap regex:string,[regex:string,...]', Array, 'Array containing key-value pairs of `RegExp:String`. It transforms links that match `RegExp` into `String`'
|
21
|
-
p.option 'href_ignore', '--href_ignore link1,[link2,...]', Array, 'Array of Strings containing `href`s that are safe to ignore.
|
21
|
+
p.option 'href_ignore', '--href_ignore link1,[link2,...]', Array, 'Array of Strings containing `href`s that are safe to ignore. Note that non-HTTP(S) URIs are always ignored.'
|
22
22
|
p.option 'alt_ignore', '--alt_ignore image1,[image2,...]', Array, 'Array of Strings containing `img`s whose missing `alt` tags are safe to ignore'
|
23
23
|
p.option 'disable_external', '--disable_external', 'Disables the external link checker (default: `false`)'
|
24
24
|
p.option 'only-4xx', '--only-4xx', 'Only reports errors for links that fall within the 4x status code range.'
|
25
25
|
p.option 'verbose', '--verbose', 'Enables more verbose logging.'
|
26
26
|
p.option 'directory_index_file', '--directory_index_file', 'Sets the file to look for when a link refers to a directory.'
|
27
|
+
p.option 'validate_html', '--validate_html', 'Enables HTML validation errors from Nokogiri (default: `false`).'
|
27
28
|
|
28
29
|
p.action do |args, opts|
|
29
30
|
args = ["."] if args.empty?
|
@@ -45,6 +46,7 @@ Mercenary.program(:htmlproof) do |p|
|
|
45
46
|
options[:favicon] = opts["favicon"] unless opts["favicon"].nil?
|
46
47
|
options[:verbose] = opts["verbose"] unless opts["verbose"].nil?
|
47
48
|
options[:directory_index_file] = opts["directory_index_file"] unless opts["directory_index_file"].nil?
|
49
|
+
options[:validate_html] = opts["validate_html"] unless opts["validate_html"].nil?
|
48
50
|
|
49
51
|
path = path.delete(' ').split(",") if opts["as-links"]
|
50
52
|
|
data/html-proofer.gemspec
CHANGED
@@ -1,9 +1,10 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
2
|
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require 'html/proofer/version'
|
3
4
|
|
4
5
|
Gem::Specification.new do |gem|
|
5
6
|
gem.name = "html-proofer"
|
6
|
-
gem.version =
|
7
|
+
gem.version = HTML::Proofer::VERSION
|
7
8
|
gem.authors = ["Garen Torikian"]
|
8
9
|
gem.email = ["gjtorikian@gmail.com"]
|
9
10
|
gem.description = %q{Test your rendered HTML files to make sure they're accurate.}
|
@@ -21,9 +22,10 @@ Gem::Specification.new do |gem|
|
|
21
22
|
gem.add_dependency "typhoeus", "~> 0.6.7"
|
22
23
|
gem.add_dependency "yell", "~> 2.0"
|
23
24
|
gem.add_dependency "parallel", "~> 1.3"
|
25
|
+
gem.add_dependency "addressable", "~> 2.3"
|
24
26
|
|
25
27
|
gem.add_development_dependency "redcarpet"
|
26
|
-
gem.add_development_dependency "rspec", "~>
|
28
|
+
gem.add_development_dependency "rspec", "~> 3.1.0"
|
27
29
|
gem.add_development_dependency "rake"
|
28
30
|
gem.add_development_dependency "awesome_print"
|
29
31
|
end
|
data/lib/html/proofer.rb
CHANGED
@@ -1,15 +1,17 @@
|
|
1
1
|
require 'nokogiri'
|
2
2
|
require 'yell'
|
3
3
|
require 'parallel'
|
4
|
+
require "addressable/uri"
|
4
5
|
|
5
6
|
begin
|
6
7
|
require "awesome_print"
|
7
8
|
rescue LoadError; end
|
8
9
|
|
9
|
-
[
|
10
|
-
|
11
|
-
|
12
|
-
|
10
|
+
%w[
|
11
|
+
checkable
|
12
|
+
checks
|
13
|
+
issue
|
14
|
+
version
|
13
15
|
].each { |r| require File.join(File.dirname(__FILE__), "proofer", r) }
|
14
16
|
|
15
17
|
module HTML
|
@@ -39,7 +41,9 @@ module HTML
|
|
39
41
|
:disable_external => false,
|
40
42
|
:verbose => false,
|
41
43
|
:only_4xx => false,
|
42
|
-
:directory_index_file => "index.html"
|
44
|
+
:directory_index_file => "index.html",
|
45
|
+
:validate_html => false,
|
46
|
+
:error_sort => :path
|
43
47
|
}
|
44
48
|
|
45
49
|
@typhoeus_opts = {
|
@@ -104,8 +108,35 @@ module HTML
|
|
104
108
|
if @failed_tests.empty?
|
105
109
|
logger.info HTML::colorize :green, "HTML-Proofer finished successfully."
|
106
110
|
else
|
107
|
-
|
108
|
-
|
111
|
+
matcher = nil
|
112
|
+
|
113
|
+
# always sort by the actual option, then path, to ensure consistent alphabetical (by filename) results
|
114
|
+
@failed_tests = @failed_tests.sort do |a,b|
|
115
|
+
comp = (a.send(@options[:error_sort]) <=> b.send(@options[:error_sort]))
|
116
|
+
comp.zero? ? (a.path <=> b.path) : comp
|
117
|
+
end
|
118
|
+
|
119
|
+
@failed_tests.each do |issue|
|
120
|
+
case @options[:error_sort]
|
121
|
+
when :path
|
122
|
+
if matcher != issue.path
|
123
|
+
logger.error HTML::colorize :blue, "- #{issue.path}"
|
124
|
+
matcher = issue.path
|
125
|
+
end
|
126
|
+
logger.error HTML::colorize :red, " * #{issue.desc}"
|
127
|
+
when :desc
|
128
|
+
if matcher != issue.desc
|
129
|
+
logger.error HTML::colorize :blue, "- #{issue.desc}"
|
130
|
+
matcher = issue.desc
|
131
|
+
end
|
132
|
+
logger.error HTML::colorize :red, " * #{issue.path}"
|
133
|
+
when :status
|
134
|
+
if matcher != issue.status
|
135
|
+
logger.error HTML::colorize :blue, "- #{issue.status}"
|
136
|
+
matcher = issue.status
|
137
|
+
end
|
138
|
+
logger.error HTML::colorize :red, " * #{issue.to_s}"
|
139
|
+
end
|
109
140
|
end
|
110
141
|
|
111
142
|
raise HTML::colorize :red, "HTML-Proofer found #{@failed_tests.length} failures!"
|
@@ -125,7 +156,11 @@ module HTML
|
|
125
156
|
Ethon.logger = logger # log from Typhoeus/Ethon
|
126
157
|
|
127
158
|
external_urls.each_pair do |href, filenames|
|
128
|
-
|
159
|
+
if has_hash? href
|
160
|
+
queue_request(:get, href, filenames)
|
161
|
+
else
|
162
|
+
queue_request(:head, href, filenames)
|
163
|
+
end
|
129
164
|
end
|
130
165
|
logger.debug HTML::colorize :yellow, "Running requests for all #{hydra.queued_requests.size} external URLs..."
|
131
166
|
hydra.run
|
@@ -138,7 +173,8 @@ module HTML
|
|
138
173
|
end
|
139
174
|
|
140
175
|
def response_handler(response, filenames)
|
141
|
-
|
176
|
+
effective_url = response.options[:effective_url]
|
177
|
+
href = response.request.base_url
|
142
178
|
method = response.request.options[:method]
|
143
179
|
response_code = response.code
|
144
180
|
|
@@ -147,7 +183,18 @@ module HTML
|
|
147
183
|
logger.debug debug_msg
|
148
184
|
|
149
185
|
if response_code.between?(200, 299)
|
150
|
-
|
186
|
+
return if @options[:only_4xx]
|
187
|
+
if hash = has_hash?(href)
|
188
|
+
body_doc = Nokogiri::HTML(response.body)
|
189
|
+
# user-content is a special addition by GitHub.
|
190
|
+
if URI.parse(href).host.match(/github\.com/i)
|
191
|
+
if body_doc.xpath(%$//*[@name="user-content-#{hash}"]|//*[@id="user-content-#{hash}"]$).empty?
|
192
|
+
add_failed_tests filenames, "External link #{href} failed: #{effective_url} exists, but the hash '#{hash}' does not", response_code
|
193
|
+
end
|
194
|
+
elsif body_doc.xpath(%$//*[@name="#{hash}"]|//*[@id="#{hash}"]$).empty?
|
195
|
+
add_failed_tests filenames, "External link #{href} failed: #{effective_url} exists, but the hash '#{hash}' does not", response_code
|
196
|
+
end
|
197
|
+
end
|
151
198
|
elsif response.timed_out?
|
152
199
|
return if @options[:only_4xx]
|
153
200
|
add_failed_tests filenames, "External link #{href} failed: got a time out", response_code
|
@@ -189,9 +236,18 @@ module HTML
|
|
189
236
|
def get_checks
|
190
237
|
checks = HTML::Proofer::Checks::Check.subclasses.map { |c| c.name }
|
191
238
|
checks.delete("Favicons") unless @options[:favicon]
|
239
|
+
checks.delete("Html") unless @options[:validate_html]
|
192
240
|
checks
|
193
241
|
end
|
194
242
|
|
243
|
+
def has_hash?(url)
|
244
|
+
begin
|
245
|
+
URI.parse(url).fragment
|
246
|
+
rescue URI::InvalidURIError
|
247
|
+
nil
|
248
|
+
end
|
249
|
+
end
|
250
|
+
|
195
251
|
def log_level
|
196
252
|
@options[:verbose] ? :debug : :info
|
197
253
|
end
|
data/lib/html/proofer/check.rb
CHANGED
@@ -36,11 +36,9 @@ module HTML
|
|
36
36
|
end
|
37
37
|
|
38
38
|
def parts
|
39
|
-
|
40
|
-
|
41
|
-
@parts_cached = URI::Parser.new(:ESCAPED => '\%|\|').parse url
|
39
|
+
@parts ||= Addressable::URI.parse url
|
42
40
|
rescue URI::Error
|
43
|
-
@
|
41
|
+
@parts = nil
|
44
42
|
end
|
45
43
|
|
46
44
|
def path
|
@@ -60,6 +58,10 @@ module HTML
|
|
60
58
|
%w( http https ).include? scheme
|
61
59
|
end
|
62
60
|
|
61
|
+
def non_http_remote?
|
62
|
+
!scheme.nil? && !remote?
|
63
|
+
end
|
64
|
+
|
63
65
|
def ignore?
|
64
66
|
return true if @data_ignore_proofer
|
65
67
|
|
@@ -72,8 +74,6 @@ module HTML
|
|
72
74
|
return true if url.match(/^data:image/)
|
73
75
|
return true if ignores_pattern_check(@check.additional_alt_ignores)
|
74
76
|
end
|
75
|
-
|
76
|
-
%w( mailto tel ).include? scheme
|
77
77
|
end
|
78
78
|
|
79
79
|
# path is external to the file
|
data/lib/html/proofer/checks.rb
CHANGED
@@ -9,8 +9,6 @@ end
|
|
9
9
|
class Favicons < ::HTML::Proofer::Checks::Check
|
10
10
|
|
11
11
|
def run
|
12
|
-
return unless @options[:favicon]
|
13
|
-
|
14
12
|
@html.xpath("//link[not(ancestor::pre or ancestor::code)]").each do |favicon|
|
15
13
|
favicon = Favicon.new favicon, "favicon", self
|
16
14
|
next if favicon.ignore?
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
class Html < ::HTML::Proofer::Checks::Check
|
4
|
+
|
5
|
+
# new html5 tags (source: http://www.w3schools.com/html/html5_new_elements.asp)
|
6
|
+
HTML5_TAGS = %w(article aside bdi details dialog figcaption
|
7
|
+
figure footer header main mark menuitem meter
|
8
|
+
nav progress rp rt ruby section summary
|
9
|
+
time wbr datalist keygen output color date
|
10
|
+
datetime datetime-local email month number
|
11
|
+
range search tel time url week canvas
|
12
|
+
svg audio embed source track video)
|
13
|
+
|
14
|
+
def run
|
15
|
+
@html.errors.each do |e|
|
16
|
+
|
17
|
+
# Nokogiri (or rather libxml2 underhood) only recognizes html4 tags,
|
18
|
+
# so we need to skip errors caused by the new tags in html5
|
19
|
+
next if HTML5_TAGS.include? e.to_s[/Tag ([\w-]+) invalid/o, 1]
|
20
|
+
|
21
|
+
self.add_issue(e.to_s)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -46,12 +46,24 @@ class Links < ::HTML::Proofer::Checks::Check
|
|
46
46
|
next
|
47
47
|
end
|
48
48
|
|
49
|
+
if link.scheme == "mailto"
|
50
|
+
self.add_issue "#{link.href} contains no email address" if link.path.empty?
|
51
|
+
self.add_issue "#{link.href} contain an invalid email address" unless link.path.include?("@")
|
52
|
+
end
|
53
|
+
|
54
|
+
if link.scheme == "tel"
|
55
|
+
self.add_issue "#{link.href} contains no phone number" if link.path.empty?
|
56
|
+
end
|
57
|
+
|
49
58
|
# is there even a href?
|
50
59
|
if link.missing_href?
|
51
60
|
self.add_issue("anchor has no href attribute")
|
52
61
|
next
|
53
62
|
end
|
54
63
|
|
64
|
+
# intentionally here because we still want valid? & missing_href? to execute
|
65
|
+
next if link.non_http_remote?
|
66
|
+
|
55
67
|
# does the file even exist?
|
56
68
|
if link.remote?
|
57
69
|
add_to_external_urls link.href
|
data/lib/html/proofer/issue.rb
CHANGED
@@ -3,44 +3,45 @@ require "spec_helper"
|
|
3
3
|
describe "Favicons test" do
|
4
4
|
it "ignores for absent favicon by default" do
|
5
5
|
absent = "#{FIXTURES_DIR}/favicon/favicon_absent.html"
|
6
|
-
|
7
|
-
output.should == ""
|
6
|
+
expect(make_proofer(absent).failed_tests).to eq []
|
8
7
|
end
|
9
8
|
|
10
9
|
it "fails for absent favicon" do
|
11
10
|
absent = "#{FIXTURES_DIR}/favicon/favicon_absent.html"
|
12
|
-
|
13
|
-
|
11
|
+
proofer = make_proofer(absent, {:favicon => true})
|
12
|
+
expect(proofer.failed_tests.first).to match /no favicon specified/
|
14
13
|
end
|
15
14
|
|
16
15
|
it "fails for absent favicon but present apple touch icon" do
|
17
16
|
absent = "#{FIXTURES_DIR}/favicon/favicon_absent_apple.html"
|
18
|
-
|
19
|
-
|
17
|
+
proofer = make_proofer(absent, {:favicon => true})
|
18
|
+
# Travis gives a different error message here for some reason
|
19
|
+
expect(proofer.failed_tests.last).to match /(internally linking to gpl.png, which does not exist|no favicon specified)/
|
20
20
|
end
|
21
21
|
|
22
22
|
it "fails for broken favicon" do
|
23
23
|
broken = "#{FIXTURES_DIR}/favicon/favicon_broken.html"
|
24
|
-
|
25
|
-
|
24
|
+
proofer = make_proofer(broken, {:favicon => true})
|
25
|
+
|
26
|
+
expect(proofer.failed_tests.first).to match /internally linking to asdadaskdalsdk.png/
|
26
27
|
end
|
27
28
|
|
28
29
|
it "passes for present favicon" do
|
29
30
|
present = "#{FIXTURES_DIR}/favicon/favicon_present.html"
|
30
|
-
|
31
|
-
|
31
|
+
proofer = make_proofer(present, {:favicon => true})
|
32
|
+
expect(proofer.failed_tests).to eq []
|
32
33
|
end
|
33
34
|
|
34
35
|
it "passes for present favicon with shortcut notation" do
|
35
36
|
present = "#{FIXTURES_DIR}/favicon/favicon_present_shortcut.html"
|
36
|
-
|
37
|
-
|
37
|
+
proofer = make_proofer(present, {:favicon => true})
|
38
|
+
expect(proofer.failed_tests).to eq []
|
38
39
|
end
|
39
40
|
|
40
41
|
it "fails for broken favicon with data-proofer-ignore" do
|
41
42
|
broken_but_ignored = "#{FIXTURES_DIR}/favicon/favicon_broken_but_ignored.html"
|
42
|
-
|
43
|
-
|
43
|
+
proofer = make_proofer(broken_but_ignored, {:favicon => true})
|
44
|
+
expect(proofer.failed_tests.first).to match /no favicon specified/
|
44
45
|
end
|
45
46
|
|
46
47
|
end
|