html-proofer 1.0.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +4 -3
- data/bin/htmlproof +6 -2
- data/html-proofer.gemspec +1 -1
- data/lib/html/proofer.rb +63 -49
- data/spec/html/proofer/links_spec.rb +6 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e6fdca322142a45bad728b408d6e00e155fe01ce
|
4
|
+
data.tar.gz: 288123bd8384f03768178f352a90729e85101b89
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 997be4ed93329ceb98a5fd2aa6caf7b1e664d78fc9c65057c4c76e766ada8851f4c4edb1756a80c5f54313f97048ce867bf5337d1266b62a3831d514ede33856
|
7
|
+
data.tar.gz: 88defb2fc1bda8ffa7a15f8fda61efcdb943074513eb73738613b556c84c75b33f4d35fa8debbd79d8fb0b33fbbe1c7a858dca396770cf4c97ef63b0655c0ee5
|
data/README.md
CHANGED
@@ -121,8 +121,9 @@ The `HTML::Proofer` constructor takes an optional hash of additional options:
|
|
121
121
|
| Option | Description | Default |
|
122
122
|
| :----- | :---------- | :------ |
|
123
123
|
| `disable_external` | If `true`, does not run the external link checker, which can take a lot of time. | `false` |
|
124
|
-
| `ext` | The extension of your HTML files including the dot. | `.html`
|
124
|
+
| `ext` | The extension of your HTML files including the dot. | `.html`
|
125
125
|
| `favicon` | Enables the favicon checker. | `false` |
|
126
|
+
| `as_link_array` | Assumes that you've passed in just an array of links to check. | `false` |
|
126
127
|
| `href_ignore` | An array of Strings or RegExps containing `href`s that are safe to ignore. Certain URIs, like `mailto` and `tel`, are always ignored. | `[]` |
|
127
128
|
| `alt_ignore` | An array of Strings or RegExps containing `img`s whose missing `alt` tags are safe to ignore. | `[]` |
|
128
129
|
| `href_swap` | A hash containing key-value pairs of `RegExp => String`. It transforms links that match `RegExp` into `String` via `gsub`. | `{}` |
|
@@ -134,11 +135,11 @@ You can also pass in any of Typhoeus' options for the external link check. For e
|
|
134
135
|
HTML::Proofer.new("out/", {:ext => ".htm", :verbose = > true, :ssl_verifyhost => 2 })
|
135
136
|
```
|
136
137
|
|
137
|
-
This sets `HTML::Proofer`'s
|
138
|
+
This sets `HTML::Proofer`'s extensions to use _.htm_, and gives Typhoeus a configuration for it to be verbose, and use specific SSL settings. Check [the Typhoeus documentation](https://github.com/typhoeus/typhoeus#other-curl-options) for more information on what options it can receive.
|
138
139
|
|
139
140
|
## Ignoring content
|
140
141
|
|
141
|
-
Add the `data-proofer-ignore` attribute to any
|
142
|
+
Add the `data-proofer-ignore` attribute to any tag to ignore it from the checks.
|
142
143
|
|
143
144
|
## Custom tests
|
144
145
|
|
data/bin/htmlproof
CHANGED
@@ -16,6 +16,7 @@ Mercenary.program(:htmlproof) do |p|
|
|
16
16
|
|
17
17
|
p.option 'ext', '--ext EXT', 'The extension of your HTML files (default: `.html`)'
|
18
18
|
p.option 'favicon', '--favicon', 'Enables the favicon checker (default: `false`).'
|
19
|
+
p.option 'as-links', '--as-links', 'Assumes that `PATH` is an array of links to check.'
|
19
20
|
p.option 'swap', '--swap regex:string,[regex:string,...]', Array, 'Array containing key-value pairs of `RegExp:String`. It transforms links that match `RegExp` into `String`'
|
20
21
|
p.option 'href_ignore', '--href_ignore link1,[link2,...]', Array, 'Array of Strings containing `href`s that are safe to ignore. Certain URIs, like `mailto` and `tel`, are always ignored.'
|
21
22
|
p.option 'alt_ignore', '--alt_ignore image1,[image2,...]', Array, 'Array of Strings containing `img`s whose missing `alt` tags are safe to ignore'
|
@@ -35,12 +36,15 @@ Mercenary.program(:htmlproof) do |p|
|
|
35
36
|
options[:href_swap][%r{#{pair[0]}}] = pair[1]
|
36
37
|
end
|
37
38
|
end
|
38
|
-
options[:
|
39
|
-
options[:
|
39
|
+
options[:as_link_array] = opts["as-links"] unless opts["as-links"].nil?
|
40
|
+
options[:href_ignore] = opts["href_ignore"] unless opts["href_ignore"].nil?
|
41
|
+
options[:alt_ignore] = opts["alt_ignore"] unless opts["alt_ignore"].nil?
|
40
42
|
options[:disable_external] = opts["disable_external"] unless opts["disable_external"].nil?
|
41
43
|
options[:favicon] = opts["favicon"] unless opts["favicon"].nil?
|
42
44
|
options[:verbose] = opts["verbose"] unless opts["verbose"].nil?
|
43
45
|
|
46
|
+
path = path.delete(' ').split(",") if options[:as_link_array]
|
47
|
+
|
44
48
|
HTML::Proofer.new(path, options).run
|
45
49
|
end
|
46
50
|
end
|
data/html-proofer.gemspec
CHANGED
@@ -3,7 +3,7 @@ $:.push File.expand_path("../lib", __FILE__)
|
|
3
3
|
|
4
4
|
Gem::Specification.new do |gem|
|
5
5
|
gem.name = "html-proofer"
|
6
|
-
gem.version = "1.
|
6
|
+
gem.version = "1.1.0"
|
7
7
|
gem.authors = ["Garen Torikian"]
|
8
8
|
gem.email = ["gjtorikian@gmail.com"]
|
9
9
|
gem.description = %q{Test your rendered HTML files to make sure they're accurate.}
|
data/lib/html/proofer.rb
CHANGED
@@ -11,7 +11,7 @@ module HTML
|
|
11
11
|
attr_accessor :failed_tests
|
12
12
|
|
13
13
|
def initialize(src, opts={})
|
14
|
-
@
|
14
|
+
@src = src
|
15
15
|
|
16
16
|
@proofer_opts = {
|
17
17
|
:ext => ".html",
|
@@ -20,7 +20,8 @@ module HTML
|
|
20
20
|
:href_ignore => [],
|
21
21
|
:alt_ignore => [],
|
22
22
|
:disable_external => false,
|
23
|
-
:verbose => false
|
23
|
+
:verbose => false,
|
24
|
+
:as_link_array => false
|
24
25
|
}
|
25
26
|
@options = @proofer_opts.merge({:followlocation => true}).merge(opts)
|
26
27
|
|
@@ -33,52 +34,33 @@ module HTML
|
|
33
34
|
end
|
34
35
|
|
35
36
|
def run
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
37
|
+
unless @options[:as_link_array]
|
38
|
+
total_files = 0
|
39
|
+
external_urls = {}
|
40
|
+
|
41
|
+
logger.info "Running #{get_checks} checks on #{@src} on *#{@options[:ext]}... \n\n".white
|
42
|
+
|
43
|
+
files.each do |path|
|
44
|
+
total_files += 1
|
45
|
+
html = HTML::Proofer.create_nokogiri(path)
|
46
|
+
|
47
|
+
get_checks.each do |klass|
|
48
|
+
logger.debug "Checking #{klass.to_s.downcase} on #{path} ...".blue
|
49
|
+
check = klass.new(@src, path, html, @options)
|
50
|
+
check.run
|
51
|
+
external_urls.merge!(check.external_urls)
|
52
|
+
@failed_tests.concat(check.issues) if check.issues.length > 0
|
53
|
+
end
|
51
54
|
end
|
52
|
-
end
|
53
|
-
|
54
|
-
# the hypothesis is that Proofer runs way faster if we pull out
|
55
|
-
# all the external URLs and run the checks at the end. Otherwise, we're halting
|
56
|
-
# the consuming process for every file. In addition, sorting the list lets
|
57
|
-
# libcurl keep connections to hosts alive. Finally, we'll make a HEAD request,
|
58
|
-
# rather than GETing all the contents
|
59
|
-
external_urls = Hash[external_urls.sort]
|
60
55
|
|
61
|
-
|
62
|
-
logger.info "Checking #{external_urls.length} external links...".yellow
|
56
|
+
external_link_checker(external_urls) unless @options[:disable_external]
|
63
57
|
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
Ethon.logger = logger # log from Typhoeus/Ethon
|
70
|
-
|
71
|
-
external_urls.each_pair do |href, filenames|
|
72
|
-
request = Typhoeus::Request.new(href, @options.merge({:method => :head}))
|
73
|
-
request.on_complete { |response| response_handler(response, filenames) }
|
74
|
-
hydra.queue request
|
75
|
-
end
|
76
|
-
logger.debug "Running requests for all #{hydra.queued_requests.size} external URLs...".yellow
|
77
|
-
hydra.run
|
58
|
+
logger.info "Ran on #{total_files} files!\n\n".green
|
59
|
+
else
|
60
|
+
external_urls = Hash[*@src.map{ |s| [s, nil] }.flatten]
|
61
|
+
external_link_checker(external_urls) unless @options[:disable_external]
|
78
62
|
end
|
79
63
|
|
80
|
-
logger.info "Ran on #{total_files} files!\n\n".green
|
81
|
-
|
82
64
|
if @failed_tests.empty?
|
83
65
|
logger.info "HTML-Proofer finished successfully.".green
|
84
66
|
else
|
@@ -90,17 +72,47 @@ module HTML
|
|
90
72
|
end
|
91
73
|
end
|
92
74
|
|
75
|
+
# the hypothesis is that Proofer runs way faster if we pull out
|
76
|
+
# all the external URLs and run the checks at the end. Otherwise, we're halting
|
77
|
+
# the consuming process for every file. In addition, sorting the list lets
|
78
|
+
# libcurl keep connections to hosts alive. Finally, we'll make a HEAD request,
|
79
|
+
# rather than GETing all the contents
|
80
|
+
def external_link_checker(external_urls)
|
81
|
+
external_urls = Hash[external_urls.sort]
|
82
|
+
|
83
|
+
logger.info "Checking #{external_urls.length} external links...".yellow
|
84
|
+
|
85
|
+
# Typhoeus won't let you pass any non-Typhoeus option
|
86
|
+
@proofer_opts.each_key do |opt|
|
87
|
+
@options.delete opt
|
88
|
+
end
|
89
|
+
|
90
|
+
Ethon.logger = logger # log from Typhoeus/Ethon
|
91
|
+
|
92
|
+
external_urls.each_pair do |href, filenames|
|
93
|
+
request = Typhoeus::Request.new(href, @options.merge({:method => :head}))
|
94
|
+
request.on_complete { |response| response_handler(response, filenames) }
|
95
|
+
hydra.queue request
|
96
|
+
end
|
97
|
+
logger.debug "Running requests for all #{hydra.queued_requests.size} external URLs...".yellow
|
98
|
+
hydra.run
|
99
|
+
end
|
100
|
+
|
93
101
|
def response_handler(response, filenames)
|
94
102
|
href = response.options[:effective_url]
|
95
103
|
method = response.request.options[:method]
|
96
104
|
response_code = response.code
|
97
105
|
|
98
|
-
|
106
|
+
debug_msg = "Received a #{response_code} for #{href}"
|
107
|
+
debug_msg << " in #{filenames.join(' ')}" unless filenames.nil?
|
108
|
+
logger.debug debug_msg
|
99
109
|
|
100
110
|
if response_code.between?(200, 299)
|
101
111
|
# continue with no op
|
102
112
|
elsif response.timed_out?
|
103
|
-
|
113
|
+
failed_test_msg = "External link #{href} failed: got a time out"
|
114
|
+
failed_test_msg.insert(0, "#{filenames.join(' ').blue}: ") unless filenames.nil?
|
115
|
+
@failed_tests << failed_test_msg
|
104
116
|
elsif (response_code == 405 || response_code == 420 || response_code == 503) && method == :head
|
105
117
|
# 420s usually come from rate limiting; let's ignore the query and try just the path with a GET
|
106
118
|
uri = URI(href)
|
@@ -113,7 +125,9 @@ module HTML
|
|
113
125
|
response_handler(next_response, filenames)
|
114
126
|
else
|
115
127
|
# Received a non-successful http response.
|
116
|
-
|
128
|
+
failed_test_msg = "External link #{href} failed: #{response_code} #{response.return_message}"
|
129
|
+
failed_test_msg.insert(0, "#{filenames.join(' ').blue}: ") unless filenames.nil?
|
130
|
+
@failed_tests << failed_test_msg
|
117
131
|
end
|
118
132
|
end
|
119
133
|
|
@@ -122,10 +136,10 @@ module HTML
|
|
122
136
|
end
|
123
137
|
|
124
138
|
def files
|
125
|
-
if File.directory? @
|
126
|
-
Dir.glob("#{@
|
139
|
+
if File.directory? @src
|
140
|
+
Dir.glob("#{@src}/**/*#{@options[:ext]}")
|
127
141
|
else
|
128
|
-
File.extname(@
|
142
|
+
File.extname(@src) == @options[:ext] ? [@src] : []
|
129
143
|
end
|
130
144
|
end
|
131
145
|
|
@@ -165,4 +165,10 @@ describe "Links test" do
|
|
165
165
|
output = capture_stderr { HTML::Proofer.new(internal, options).run }
|
166
166
|
output.should match /without trailing slash/
|
167
167
|
end
|
168
|
+
|
169
|
+
it "works for array of links" do
|
170
|
+
options = { :as_link_array => true}
|
171
|
+
output = capture_stderr { HTML::Proofer.new(["www.github.com", "foofoofoo.biz"], options).run }
|
172
|
+
output.should match /foofoo.biz\/? failed: 0 Couldn't resolve host name/
|
173
|
+
end
|
168
174
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html-proofer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Garen Torikian
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-07-
|
11
|
+
date: 2014-07-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: mercenary
|