html-proofer 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +8 -2
  3. data/bin/htmlproof +3 -1
  4. data/html-proofer.gemspec +4 -2
  5. data/lib/html/proofer.rb +66 -10
  6. data/lib/html/proofer/check.rb +1 -1
  7. data/lib/html/proofer/checkable.rb +6 -6
  8. data/lib/html/proofer/checks.rb +2 -1
  9. data/lib/html/proofer/checks/favicon.rb +0 -2
  10. data/lib/html/proofer/checks/html.rb +24 -0
  11. data/lib/html/proofer/checks/links.rb +12 -0
  12. data/lib/html/proofer/issue.rb +1 -1
  13. data/lib/html/proofer/version.rb +5 -0
  14. data/spec/html/proofer/favicon_spec.rb +15 -14
  15. data/spec/html/proofer/fixtures/html/div_inside_head.html +6 -0
  16. data/spec/html/proofer/fixtures/html/html5_tags.html +9 -0
  17. data/spec/html/proofer/fixtures/html/invalid_tag.html +3 -0
  18. data/spec/html/proofer/fixtures/html/missing_closing_quotes.html +5 -0
  19. data/spec/html/proofer/fixtures/html/opening_and_ending_tag_mismatch.html +7 -0
  20. data/spec/html/proofer/fixtures/html/unescaped_ampersand_in_attribute.html +4 -0
  21. data/spec/html/proofer/fixtures/html/unmatched_end_tag.html +5 -0
  22. data/spec/html/proofer/fixtures/links/brokenHashOnTheWeb.html +2 -0
  23. data/spec/html/proofer/fixtures/links/githubHash.html +1 -0
  24. data/spec/html/proofer/fixtures/links/non_standard_characters.html +10 -0
  25. data/spec/html/proofer/fixtures/links/other_protocols.html +4 -0
  26. data/spec/html/proofer/fixtures/sorting/issue/broken_image_one.html +1 -0
  27. data/spec/html/proofer/fixtures/sorting/issue/broken_image_two.html +4 -0
  28. data/spec/html/proofer/fixtures/sorting/path/multiple_issues.html +11 -0
  29. data/spec/html/proofer/fixtures/sorting/path/single_issue.html +1 -0
  30. data/spec/html/proofer/fixtures/sorting/status/a_404.html +1 -0
  31. data/spec/html/proofer/fixtures/sorting/status/broken_link.html +3 -0
  32. data/spec/html/proofer/fixtures/sorting/status/missing_redirect.html +1 -0
  33. data/spec/html/proofer/html_spec.rb +51 -0
  34. data/spec/html/proofer/images_spec.rb +33 -33
  35. data/spec/html/proofer/links_spec.rb +115 -86
  36. data/spec/html/proofer/scripts_spec.rb +12 -12
  37. data/spec/html/proofer_spec.rb +50 -9
  38. data/spec/spec_helper.rb +13 -1
  39. metadata +58 -4
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f2eb21756ab1eb24e38b51ada6d481b1dbc42eb8
4
- data.tar.gz: bb779ed45d42bb8c9e7824cb66bcdd33f027c2ee
3
+ metadata.gz: 892dbf1a283cd5c3bf33d9a76430371d035f3abe
4
+ data.tar.gz: e4b6498ca79d5ad258f5763c005f039599304434
5
5
  SHA512:
6
- metadata.gz: 3dd53810b0e3ae1b9760ced7d748100ab5ff56ed9b2b5f9770ea67960653b3a00d199a82f3a92c86c43054c3ec21191f82d9a860be389abce615aaf71b465425
7
- data.tar.gz: 1225ae4c9a19292b05017409233dd8e8d1c0bcc5761038a8b40f62bebcc5926073e185ff7a4d4de205a4ffd53886d91481adaba54a57c13eba0086eeac404aba
6
+ metadata.gz: fc79438fbab4a5e7d7e9c9f318d3cfb71e315e6028a1b9b0e1934babb6a4aa046930ff830cf193baffec171785421d28fdbfb5470b6e8781a6ecaaed4eecfece
7
+ data.tar.gz: 45ff2420a45ebda37369dfcf713f4b9aa89ba3e8f4278c705345fa83915cf6fe5d76cf04b1a80b102fb4123f9c3ba365eceb4f0a808f66011f4f6665fc15807c
data/README.md CHANGED
@@ -122,6 +122,11 @@ Project | Repository
122
122
  * Whether your internal script references are not broken
123
123
  * Whether external scripts are loading
124
124
 
125
+ ### HTML
126
+
127
+ Nokogiri looks at the markup and [provides errors](http://www.nokogiri.org/tutorials/ensuring_well_formed_markup.html) when parsing your document.
128
+ This is an optional feature, set the `validate_html` option to enable validation errors from Nokogiri.
129
+
125
130
  ## Configuration
126
131
 
127
132
  The `HTML::Proofer` constructor takes an optional hash of additional options:
@@ -133,11 +138,12 @@ The `HTML::Proofer` constructor takes an optional hash of additional options:
133
138
  | `favicon` | Enables the favicon checker. | `false` |
134
139
  | `followlocation` | Follows external redirections. Amends missing trailing slashes to internal directories. | `true` |
135
140
  | `directory_index_file` | Sets the file to look for when a link refers to a directory. | `index.html` |
136
- | `href_ignore` | An array of Strings or RegExps containing `href`s that are safe to ignore. Certain URIs, like `mailto` and `tel`, are always ignored. | `[]` |
141
+ | `href_ignore` | An array of Strings or RegExps containing `href`s that are safe to ignore. Note that non-HTTP(S) URIs are always ignored. | `[]` |
137
142
  | `alt_ignore` | An array of Strings or RegExps containing `img`s whose missing `alt` tags are safe to ignore. | `[]` |
138
143
  | `href_swap` | A hash containing key-value pairs of `RegExp => String`. It transforms links that match `RegExp` into `String` via `gsub`. | `{}` |
139
144
  | `verbose` | If `true`, outputs extra information as the checking happens. Useful for debugging. | `false` |
140
145
  | `only_4xx` | Only reports errors for links that fall within the 4xx status code range. | `false` |
146
+ | `validate_html` | Enables HTML validation errors from Nokogiri | `false` |
141
147
 
142
148
  ### Configuring Typhoeus
143
149
 
@@ -181,7 +187,7 @@ Add the `data-proofer-ignore` attribute to any tag to ignore it from the checks.
181
187
 
182
188
  Want to write your own test? Sure! Just create two classes--one that inherits from `HTML::Proofer::Checkable`, and another that inherits from `HTML::Proofer::Checks::Check`. `Checkable` defines various helper methods for your test, while `Checks::Check` actually runs across your content. `Checks::Check` should call `self.add_issue` on failures, to add them to the list.
183
189
 
184
- Here's an example custom test that protects against `mailto` links:
190
+ Here's an example custom test that protects against `mailto` links that point to `octocat@github.com`:
185
191
 
186
192
  ``` ruby
187
193
  class OctocatLink < ::HTML::Proofer::Checkable
data/bin/htmlproof CHANGED
@@ -18,12 +18,13 @@ Mercenary.program(:htmlproof) do |p|
18
18
  p.option 'favicon', '--favicon', 'Enables the favicon checker (default: `false`).'
19
19
  p.option 'as-links', '--as-links', 'Assumes that `PATH` is a comma-separated array of links to check.'
20
20
  p.option 'swap', '--swap regex:string,[regex:string,...]', Array, 'Array containing key-value pairs of `RegExp:String`. It transforms links that match `RegExp` into `String`'
21
- p.option 'href_ignore', '--href_ignore link1,[link2,...]', Array, 'Array of Strings containing `href`s that are safe to ignore. Certain URIs, like `mailto` and `tel`, are always ignored.'
21
+ p.option 'href_ignore', '--href_ignore link1,[link2,...]', Array, 'Array of Strings containing `href`s that are safe to ignore. Note that non-HTTP(S) URIs are always ignored.'
22
22
  p.option 'alt_ignore', '--alt_ignore image1,[image2,...]', Array, 'Array of Strings containing `img`s whose missing `alt` tags are safe to ignore'
23
23
  p.option 'disable_external', '--disable_external', 'Disables the external link checker (default: `false`)'
24
24
  p.option 'only-4xx', '--only-4xx', 'Only reports errors for links that fall within the 4x status code range.'
25
25
  p.option 'verbose', '--verbose', 'Enables more verbose logging.'
26
26
  p.option 'directory_index_file', '--directory_index_file', 'Sets the file to look for when a link refers to a directory.'
27
+ p.option 'validate_html', '--validate_html', 'Enables HTML validation errors from Nokogiri (default: `false`).'
27
28
 
28
29
  p.action do |args, opts|
29
30
  args = ["."] if args.empty?
@@ -45,6 +46,7 @@ Mercenary.program(:htmlproof) do |p|
45
46
  options[:favicon] = opts["favicon"] unless opts["favicon"].nil?
46
47
  options[:verbose] = opts["verbose"] unless opts["verbose"].nil?
47
48
  options[:directory_index_file] = opts["directory_index_file"] unless opts["directory_index_file"].nil?
49
+ options[:validate_html] = opts["validate_html"] unless opts["validate_html"].nil?
48
50
 
49
51
  path = path.delete(' ').split(",") if opts["as-links"]
50
52
 
data/html-proofer.gemspec CHANGED
@@ -1,9 +1,10 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
  $:.push File.expand_path("../lib", __FILE__)
3
+ require 'html/proofer/version'
3
4
 
4
5
  Gem::Specification.new do |gem|
5
6
  gem.name = "html-proofer"
6
- gem.version = "1.4.0"
7
+ gem.version = HTML::Proofer::VERSION
7
8
  gem.authors = ["Garen Torikian"]
8
9
  gem.email = ["gjtorikian@gmail.com"]
9
10
  gem.description = %q{Test your rendered HTML files to make sure they're accurate.}
@@ -21,9 +22,10 @@ Gem::Specification.new do |gem|
21
22
  gem.add_dependency "typhoeus", "~> 0.6.7"
22
23
  gem.add_dependency "yell", "~> 2.0"
23
24
  gem.add_dependency "parallel", "~> 1.3"
25
+ gem.add_dependency "addressable", "~> 2.3"
24
26
 
25
27
  gem.add_development_dependency "redcarpet"
26
- gem.add_development_dependency "rspec", "~> 2.13.0"
28
+ gem.add_development_dependency "rspec", "~> 3.1.0"
27
29
  gem.add_development_dependency "rake"
28
30
  gem.add_development_dependency "awesome_print"
29
31
  end
data/lib/html/proofer.rb CHANGED
@@ -1,15 +1,17 @@
1
1
  require 'nokogiri'
2
2
  require 'yell'
3
3
  require 'parallel'
4
+ require "addressable/uri"
4
5
 
5
6
  begin
6
7
  require "awesome_print"
7
8
  rescue LoadError; end
8
9
 
9
- [
10
- 'checkable',
11
- 'checks',
12
- 'issue'
10
+ %w[
11
+ checkable
12
+ checks
13
+ issue
14
+ version
13
15
  ].each { |r| require File.join(File.dirname(__FILE__), "proofer", r) }
14
16
 
15
17
  module HTML
@@ -39,7 +41,9 @@ module HTML
39
41
  :disable_external => false,
40
42
  :verbose => false,
41
43
  :only_4xx => false,
42
- :directory_index_file => "index.html"
44
+ :directory_index_file => "index.html",
45
+ :validate_html => false,
46
+ :error_sort => :path
43
47
  }
44
48
 
45
49
  @typhoeus_opts = {
@@ -104,8 +108,35 @@ module HTML
104
108
  if @failed_tests.empty?
105
109
  logger.info HTML::colorize :green, "HTML-Proofer finished successfully."
106
110
  else
107
- @failed_tests.sort_by(&:path).each do |issue|
108
- logger.error HTML::colorize :red, issue.to_s
111
+ matcher = nil
112
+
113
+ # always sort by the actual option, then path, to ensure consistent alphabetical (by filename) results
114
+ @failed_tests = @failed_tests.sort do |a,b|
115
+ comp = (a.send(@options[:error_sort]) <=> b.send(@options[:error_sort]))
116
+ comp.zero? ? (a.path <=> b.path) : comp
117
+ end
118
+
119
+ @failed_tests.each do |issue|
120
+ case @options[:error_sort]
121
+ when :path
122
+ if matcher != issue.path
123
+ logger.error HTML::colorize :blue, "- #{issue.path}"
124
+ matcher = issue.path
125
+ end
126
+ logger.error HTML::colorize :red, " * #{issue.desc}"
127
+ when :desc
128
+ if matcher != issue.desc
129
+ logger.error HTML::colorize :blue, "- #{issue.desc}"
130
+ matcher = issue.desc
131
+ end
132
+ logger.error HTML::colorize :red, " * #{issue.path}"
133
+ when :status
134
+ if matcher != issue.status
135
+ logger.error HTML::colorize :blue, "- #{issue.status}"
136
+ matcher = issue.status
137
+ end
138
+ logger.error HTML::colorize :red, " * #{issue.to_s}"
139
+ end
109
140
  end
110
141
 
111
142
  raise HTML::colorize :red, "HTML-Proofer found #{@failed_tests.length} failures!"
@@ -125,7 +156,11 @@ module HTML
125
156
  Ethon.logger = logger # log from Typhoeus/Ethon
126
157
 
127
158
  external_urls.each_pair do |href, filenames|
128
- queue_request(:head, href, filenames)
159
+ if has_hash? href
160
+ queue_request(:get, href, filenames)
161
+ else
162
+ queue_request(:head, href, filenames)
163
+ end
129
164
  end
130
165
  logger.debug HTML::colorize :yellow, "Running requests for all #{hydra.queued_requests.size} external URLs..."
131
166
  hydra.run
@@ -138,7 +173,8 @@ module HTML
138
173
  end
139
174
 
140
175
  def response_handler(response, filenames)
141
- href = response.options[:effective_url]
176
+ effective_url = response.options[:effective_url]
177
+ href = response.request.base_url
142
178
  method = response.request.options[:method]
143
179
  response_code = response.code
144
180
 
@@ -147,7 +183,18 @@ module HTML
147
183
  logger.debug debug_msg
148
184
 
149
185
  if response_code.between?(200, 299)
150
- # continue with no op
186
+ return if @options[:only_4xx]
187
+ if hash = has_hash?(href)
188
+ body_doc = Nokogiri::HTML(response.body)
189
+ # user-content is a special addition by GitHub.
190
+ if URI.parse(href).host.match(/github\.com/i)
191
+ if body_doc.xpath(%$//*[@name="user-content-#{hash}"]|//*[@id="user-content-#{hash}"]$).empty?
192
+ add_failed_tests filenames, "External link #{href} failed: #{effective_url} exists, but the hash '#{hash}' does not", response_code
193
+ end
194
+ elsif body_doc.xpath(%$//*[@name="#{hash}"]|//*[@id="#{hash}"]$).empty?
195
+ add_failed_tests filenames, "External link #{href} failed: #{effective_url} exists, but the hash '#{hash}' does not", response_code
196
+ end
197
+ end
151
198
  elsif response.timed_out?
152
199
  return if @options[:only_4xx]
153
200
  add_failed_tests filenames, "External link #{href} failed: got a time out", response_code
@@ -189,9 +236,18 @@ module HTML
189
236
  def get_checks
190
237
  checks = HTML::Proofer::Checks::Check.subclasses.map { |c| c.name }
191
238
  checks.delete("Favicons") unless @options[:favicon]
239
+ checks.delete("Html") unless @options[:validate_html]
192
240
  checks
193
241
  end
194
242
 
243
+ def has_hash?(url)
244
+ begin
245
+ URI.parse(url).fragment
246
+ rescue URI::InvalidURIError
247
+ nil
248
+ end
249
+ end
250
+
195
251
  def log_level
196
252
  @options[:verbose] ? :debug : :info
197
253
  end
@@ -26,7 +26,7 @@ class HTML::Proofer::Checks
26
26
  raise NotImplementedError.new("HTML::Proofer::Check subclasses must implement #run")
27
27
  end
28
28
 
29
- def add_issue(desc, status = nil)
29
+ def add_issue(desc, status = -1)
30
30
  @issues << Issue.new(@path, desc, status)
31
31
  end
32
32
 
@@ -36,11 +36,9 @@ module HTML
36
36
  end
37
37
 
38
38
  def parts
39
- return @parts_cached if defined?(@parts_cached)
40
-
41
- @parts_cached = URI::Parser.new(:ESCAPED => '\%|\|').parse url
39
+ @parts ||= Addressable::URI.parse url
42
40
  rescue URI::Error
43
- @parts_cached = nil
41
+ @parts = nil
44
42
  end
45
43
 
46
44
  def path
@@ -60,6 +58,10 @@ module HTML
60
58
  %w( http https ).include? scheme
61
59
  end
62
60
 
61
+ def non_http_remote?
62
+ !scheme.nil? && !remote?
63
+ end
64
+
63
65
  def ignore?
64
66
  return true if @data_ignore_proofer
65
67
 
@@ -72,8 +74,6 @@ module HTML
72
74
  return true if url.match(/^data:image/)
73
75
  return true if ignores_pattern_check(@check.additional_alt_ignores)
74
76
  end
75
-
76
- %w( mailto tel ).include? scheme
77
77
  end
78
78
 
79
79
  # path is external to the file
@@ -7,7 +7,8 @@ module HTML
7
7
  "checks/images",
8
8
  "checks/links",
9
9
  "checks/scripts",
10
- "checks/favicon"
10
+ "checks/favicon",
11
+ "checks/html"
11
12
  ].each { |r| require File.join(File.dirname(__FILE__), r) }
12
13
  end
13
14
  end
@@ -9,8 +9,6 @@ end
9
9
  class Favicons < ::HTML::Proofer::Checks::Check
10
10
 
11
11
  def run
12
- return unless @options[:favicon]
13
-
14
12
  @html.xpath("//link[not(ancestor::pre or ancestor::code)]").each do |favicon|
15
13
  favicon = Favicon.new favicon, "favicon", self
16
14
  next if favicon.ignore?
@@ -0,0 +1,24 @@
1
+ # encoding: utf-8
2
+
3
+ class Html < ::HTML::Proofer::Checks::Check
4
+
5
+ # new html5 tags (source: http://www.w3schools.com/html/html5_new_elements.asp)
6
+ HTML5_TAGS = %w(article aside bdi details dialog figcaption
7
+ figure footer header main mark menuitem meter
8
+ nav progress rp rt ruby section summary
9
+ time wbr datalist keygen output color date
10
+ datetime datetime-local email month number
11
+ range search tel time url week canvas
12
+ svg audio embed source track video)
13
+
14
+ def run
15
+ @html.errors.each do |e|
16
+
17
+ # Nokogiri (or rather libxml2 underhood) only recognizes html4 tags,
18
+ # so we need to skip errors caused by the new tags in html5
19
+ next if HTML5_TAGS.include? e.to_s[/Tag ([\w-]+) invalid/o, 1]
20
+
21
+ self.add_issue(e.to_s)
22
+ end
23
+ end
24
+ end
@@ -46,12 +46,24 @@ class Links < ::HTML::Proofer::Checks::Check
46
46
  next
47
47
  end
48
48
 
49
+ if link.scheme == "mailto"
50
+ self.add_issue "#{link.href} contains no email address" if link.path.empty?
51
+ self.add_issue "#{link.href} contain an invalid email address" unless link.path.include?("@")
52
+ end
53
+
54
+ if link.scheme == "tel"
55
+ self.add_issue "#{link.href} contains no phone number" if link.path.empty?
56
+ end
57
+
49
58
  # is there even a href?
50
59
  if link.missing_href?
51
60
  self.add_issue("anchor has no href attribute")
52
61
  next
53
62
  end
54
63
 
64
+ # intentionally here because we still want valid? & missing_href? to execute
65
+ next if link.non_http_remote?
66
+
55
67
  # does the file even exist?
56
68
  if link.remote?
57
69
  add_to_external_urls link.href
@@ -7,7 +7,7 @@ class HTML::Proofer::Checks
7
7
 
8
8
  attr_reader :path, :desc, :status
9
9
 
10
- def initialize(path, desc, status = nil)
10
+ def initialize(path, desc, status = -1)
11
11
  @path = path
12
12
  @desc = desc
13
13
  @status = status
@@ -0,0 +1,5 @@
1
+ module HTML
2
+ class Proofer
3
+ VERSION = "1.5.0"
4
+ end
5
+ end
@@ -3,44 +3,45 @@ require "spec_helper"
3
3
  describe "Favicons test" do
4
4
  it "ignores for absent favicon by default" do
5
5
  absent = "#{FIXTURES_DIR}/favicon/favicon_absent.html"
6
- output = capture_stderr { HTML::Proofer.new(absent).run }
7
- output.should == ""
6
+ expect(make_proofer(absent).failed_tests).to eq []
8
7
  end
9
8
 
10
9
  it "fails for absent favicon" do
11
10
  absent = "#{FIXTURES_DIR}/favicon/favicon_absent.html"
12
- output = capture_stderr { HTML::Proofer.new(absent, {:favicon => true}).run }
13
- output.should match /no favicon specified/
11
+ proofer = make_proofer(absent, {:favicon => true})
12
+ expect(proofer.failed_tests.first).to match /no favicon specified/
14
13
  end
15
14
 
16
15
  it "fails for absent favicon but present apple touch icon" do
17
16
  absent = "#{FIXTURES_DIR}/favicon/favicon_absent_apple.html"
18
- output = capture_stderr { HTML::Proofer.new(absent, {:favicon => true}).run }
19
- output.should match /no favicon specified/
17
+ proofer = make_proofer(absent, {:favicon => true})
18
+ # Travis gives a different error message here for some reason
19
+ expect(proofer.failed_tests.last).to match /(internally linking to gpl.png, which does not exist|no favicon specified)/
20
20
  end
21
21
 
22
22
  it "fails for broken favicon" do
23
23
  broken = "#{FIXTURES_DIR}/favicon/favicon_broken.html"
24
- output = capture_stderr { HTML::Proofer.new(broken, {:favicon => true}).run }
25
- output.should match /internally linking to asdadaskdalsdk.png/
24
+ proofer = make_proofer(broken, {:favicon => true})
25
+
26
+ expect(proofer.failed_tests.first).to match /internally linking to asdadaskdalsdk.png/
26
27
  end
27
28
 
28
29
  it "passes for present favicon" do
29
30
  present = "#{FIXTURES_DIR}/favicon/favicon_present.html"
30
- output = capture_stderr { HTML::Proofer.new(present, {:favicon => true}).run }
31
- output.should == ""
31
+ proofer = make_proofer(present, {:favicon => true})
32
+ expect(proofer.failed_tests).to eq []
32
33
  end
33
34
 
34
35
  it "passes for present favicon with shortcut notation" do
35
36
  present = "#{FIXTURES_DIR}/favicon/favicon_present_shortcut.html"
36
- output = capture_stderr { HTML::Proofer.new(present, {:favicon => true}).run }
37
- output.should == ""
37
+ proofer = make_proofer(present, {:favicon => true})
38
+ expect(proofer.failed_tests).to eq []
38
39
  end
39
40
 
40
41
  it "fails for broken favicon with data-proofer-ignore" do
41
42
  broken_but_ignored = "#{FIXTURES_DIR}/favicon/favicon_broken_but_ignored.html"
42
- output = capture_stderr { HTML::Proofer.new(broken_but_ignored, {:favicon => true}).run }
43
- output.should match /no favicon specified/
43
+ proofer = make_proofer(broken_but_ignored, {:favicon => true})
44
+ expect(proofer.failed_tests.first).to match /no favicon specified/
44
45
  end
45
46
 
46
47
  end
@@ -0,0 +1,6 @@
1
+ <html>
2
+ <head>
3
+ <div>
4
+ </div>
5
+ </head>
6
+ </html>
@@ -0,0 +1,9 @@
1
+ <html>
2
+ <body>
3
+ <nav>
4
+ </nav>
5
+ <article>
6
+ <p>Some text</p>
7
+ </article>
8
+ </body>
9
+ </html>
@@ -0,0 +1,3 @@
1
+ <html>
2
+ <myfancytag></myfancytag>
3
+ </html>
@@ -0,0 +1,5 @@
1
+ <html>
2
+ <body>
3
+ <a href="/test>A test link</a>
4
+ </body>
5
+ </html>