html-proofer 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +4 -3
- data/bin/htmlproof +6 -2
- data/html-proofer.gemspec +1 -1
- data/lib/html/proofer.rb +63 -49
- data/spec/html/proofer/links_spec.rb +6 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e6fdca322142a45bad728b408d6e00e155fe01ce
|
4
|
+
data.tar.gz: 288123bd8384f03768178f352a90729e85101b89
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 997be4ed93329ceb98a5fd2aa6caf7b1e664d78fc9c65057c4c76e766ada8851f4c4edb1756a80c5f54313f97048ce867bf5337d1266b62a3831d514ede33856
|
7
|
+
data.tar.gz: 88defb2fc1bda8ffa7a15f8fda61efcdb943074513eb73738613b556c84c75b33f4d35fa8debbd79d8fb0b33fbbe1c7a858dca396770cf4c97ef63b0655c0ee5
|
data/README.md
CHANGED
@@ -121,8 +121,9 @@ The `HTML::Proofer` constructor takes an optional hash of additional options:
|
|
121
121
|
| Option | Description | Default |
|
122
122
|
| :----- | :---------- | :------ |
|
123
123
|
| `disable_external` | If `true`, does not run the external link checker, which can take a lot of time. | `false` |
|
124
|
-
| `ext` | The extension of your HTML files including the dot. | `.html`
|
124
|
+
| `ext` | The extension of your HTML files including the dot. | `.html`
|
125
125
|
| `favicon` | Enables the favicon checker. | `false` |
|
126
|
+
| `as_link_array` | Assumes that you've passed in just an array of links to check. | `false` |
|
126
127
|
| `href_ignore` | An array of Strings or RegExps containing `href`s that are safe to ignore. Certain URIs, like `mailto` and `tel`, are always ignored. | `[]` |
|
127
128
|
| `alt_ignore` | An array of Strings or RegExps containing `img`s whose missing `alt` tags are safe to ignore. | `[]` |
|
128
129
|
| `href_swap` | A hash containing key-value pairs of `RegExp => String`. It transforms links that match `RegExp` into `String` via `gsub`. | `{}` |
|
@@ -134,11 +135,11 @@ You can also pass in any of Typhoeus' options for the external link check. For e
|
|
134
135
|
HTML::Proofer.new("out/", {:ext => ".htm", :verbose = > true, :ssl_verifyhost => 2 })
|
135
136
|
```
|
136
137
|
|
137
|
-
This sets `HTML::Proofer`'s
|
138
|
+
This sets `HTML::Proofer`'s extensions to use _.htm_, and gives Typhoeus a configuration for it to be verbose, and use specific SSL settings. Check [the Typhoeus documentation](https://github.com/typhoeus/typhoeus#other-curl-options) for more information on what options it can receive.
|
138
139
|
|
139
140
|
## Ignoring content
|
140
141
|
|
141
|
-
Add the `data-proofer-ignore` attribute to any
|
142
|
+
Add the `data-proofer-ignore` attribute to any tag to ignore it from the checks.
|
142
143
|
|
143
144
|
## Custom tests
|
144
145
|
|
data/bin/htmlproof
CHANGED
@@ -16,6 +16,7 @@ Mercenary.program(:htmlproof) do |p|
|
|
16
16
|
|
17
17
|
p.option 'ext', '--ext EXT', 'The extension of your HTML files (default: `.html`)'
|
18
18
|
p.option 'favicon', '--favicon', 'Enables the favicon checker (default: `false`).'
|
19
|
+
p.option 'as-links', '--as-links', 'Assumes that `PATH` is an array of links to check.'
|
19
20
|
p.option 'swap', '--swap regex:string,[regex:string,...]', Array, 'Array containing key-value pairs of `RegExp:String`. It transforms links that match `RegExp` into `String`'
|
20
21
|
p.option 'href_ignore', '--href_ignore link1,[link2,...]', Array, 'Array of Strings containing `href`s that are safe to ignore. Certain URIs, like `mailto` and `tel`, are always ignored.'
|
21
22
|
p.option 'alt_ignore', '--alt_ignore image1,[image2,...]', Array, 'Array of Strings containing `img`s whose missing `alt` tags are safe to ignore'
|
@@ -35,12 +36,15 @@ Mercenary.program(:htmlproof) do |p|
|
|
35
36
|
options[:href_swap][%r{#{pair[0]}}] = pair[1]
|
36
37
|
end
|
37
38
|
end
|
38
|
-
options[:
|
39
|
-
options[:
|
39
|
+
options[:as_link_array] = opts["as-links"] unless opts["as-links"].nil?
|
40
|
+
options[:href_ignore] = opts["href_ignore"] unless opts["href_ignore"].nil?
|
41
|
+
options[:alt_ignore] = opts["alt_ignore"] unless opts["alt_ignore"].nil?
|
40
42
|
options[:disable_external] = opts["disable_external"] unless opts["disable_external"].nil?
|
41
43
|
options[:favicon] = opts["favicon"] unless opts["favicon"].nil?
|
42
44
|
options[:verbose] = opts["verbose"] unless opts["verbose"].nil?
|
43
45
|
|
46
|
+
path = path.delete(' ').split(",") if options[:as_link_array]
|
47
|
+
|
44
48
|
HTML::Proofer.new(path, options).run
|
45
49
|
end
|
46
50
|
end
|
data/html-proofer.gemspec
CHANGED
@@ -3,7 +3,7 @@ $:.push File.expand_path("../lib", __FILE__)
|
|
3
3
|
|
4
4
|
Gem::Specification.new do |gem|
|
5
5
|
gem.name = "html-proofer"
|
6
|
-
gem.version = "1.
|
6
|
+
gem.version = "1.1.0"
|
7
7
|
gem.authors = ["Garen Torikian"]
|
8
8
|
gem.email = ["gjtorikian@gmail.com"]
|
9
9
|
gem.description = %q{Test your rendered HTML files to make sure they're accurate.}
|
data/lib/html/proofer.rb
CHANGED
@@ -11,7 +11,7 @@ module HTML
|
|
11
11
|
attr_accessor :failed_tests
|
12
12
|
|
13
13
|
def initialize(src, opts={})
|
14
|
-
@
|
14
|
+
@src = src
|
15
15
|
|
16
16
|
@proofer_opts = {
|
17
17
|
:ext => ".html",
|
@@ -20,7 +20,8 @@ module HTML
|
|
20
20
|
:href_ignore => [],
|
21
21
|
:alt_ignore => [],
|
22
22
|
:disable_external => false,
|
23
|
-
:verbose => false
|
23
|
+
:verbose => false,
|
24
|
+
:as_link_array => false
|
24
25
|
}
|
25
26
|
@options = @proofer_opts.merge({:followlocation => true}).merge(opts)
|
26
27
|
|
@@ -33,52 +34,33 @@ module HTML
|
|
33
34
|
end
|
34
35
|
|
35
36
|
def run
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
37
|
+
unless @options[:as_link_array]
|
38
|
+
total_files = 0
|
39
|
+
external_urls = {}
|
40
|
+
|
41
|
+
logger.info "Running #{get_checks} checks on #{@src} on *#{@options[:ext]}... \n\n".white
|
42
|
+
|
43
|
+
files.each do |path|
|
44
|
+
total_files += 1
|
45
|
+
html = HTML::Proofer.create_nokogiri(path)
|
46
|
+
|
47
|
+
get_checks.each do |klass|
|
48
|
+
logger.debug "Checking #{klass.to_s.downcase} on #{path} ...".blue
|
49
|
+
check = klass.new(@src, path, html, @options)
|
50
|
+
check.run
|
51
|
+
external_urls.merge!(check.external_urls)
|
52
|
+
@failed_tests.concat(check.issues) if check.issues.length > 0
|
53
|
+
end
|
51
54
|
end
|
52
|
-
end
|
53
|
-
|
54
|
-
# the hypothesis is that Proofer runs way faster if we pull out
|
55
|
-
# all the external URLs and run the checks at the end. Otherwise, we're halting
|
56
|
-
# the consuming process for every file. In addition, sorting the list lets
|
57
|
-
# libcurl keep connections to hosts alive. Finally, we'll make a HEAD request,
|
58
|
-
# rather than GETing all the contents
|
59
|
-
external_urls = Hash[external_urls.sort]
|
60
55
|
|
61
|
-
|
62
|
-
logger.info "Checking #{external_urls.length} external links...".yellow
|
56
|
+
external_link_checker(external_urls) unless @options[:disable_external]
|
63
57
|
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
Ethon.logger = logger # log from Typhoeus/Ethon
|
70
|
-
|
71
|
-
external_urls.each_pair do |href, filenames|
|
72
|
-
request = Typhoeus::Request.new(href, @options.merge({:method => :head}))
|
73
|
-
request.on_complete { |response| response_handler(response, filenames) }
|
74
|
-
hydra.queue request
|
75
|
-
end
|
76
|
-
logger.debug "Running requests for all #{hydra.queued_requests.size} external URLs...".yellow
|
77
|
-
hydra.run
|
58
|
+
logger.info "Ran on #{total_files} files!\n\n".green
|
59
|
+
else
|
60
|
+
external_urls = Hash[*@src.map{ |s| [s, nil] }.flatten]
|
61
|
+
external_link_checker(external_urls) unless @options[:disable_external]
|
78
62
|
end
|
79
63
|
|
80
|
-
logger.info "Ran on #{total_files} files!\n\n".green
|
81
|
-
|
82
64
|
if @failed_tests.empty?
|
83
65
|
logger.info "HTML-Proofer finished successfully.".green
|
84
66
|
else
|
@@ -90,17 +72,47 @@ module HTML
|
|
90
72
|
end
|
91
73
|
end
|
92
74
|
|
75
|
+
# the hypothesis is that Proofer runs way faster if we pull out
|
76
|
+
# all the external URLs and run the checks at the end. Otherwise, we're halting
|
77
|
+
# the consuming process for every file. In addition, sorting the list lets
|
78
|
+
# libcurl keep connections to hosts alive. Finally, we'll make a HEAD request,
|
79
|
+
# rather than GETing all the contents
|
80
|
+
def external_link_checker(external_urls)
|
81
|
+
external_urls = Hash[external_urls.sort]
|
82
|
+
|
83
|
+
logger.info "Checking #{external_urls.length} external links...".yellow
|
84
|
+
|
85
|
+
# Typhoeus won't let you pass any non-Typhoeus option
|
86
|
+
@proofer_opts.each_key do |opt|
|
87
|
+
@options.delete opt
|
88
|
+
end
|
89
|
+
|
90
|
+
Ethon.logger = logger # log from Typhoeus/Ethon
|
91
|
+
|
92
|
+
external_urls.each_pair do |href, filenames|
|
93
|
+
request = Typhoeus::Request.new(href, @options.merge({:method => :head}))
|
94
|
+
request.on_complete { |response| response_handler(response, filenames) }
|
95
|
+
hydra.queue request
|
96
|
+
end
|
97
|
+
logger.debug "Running requests for all #{hydra.queued_requests.size} external URLs...".yellow
|
98
|
+
hydra.run
|
99
|
+
end
|
100
|
+
|
93
101
|
def response_handler(response, filenames)
|
94
102
|
href = response.options[:effective_url]
|
95
103
|
method = response.request.options[:method]
|
96
104
|
response_code = response.code
|
97
105
|
|
98
|
-
|
106
|
+
debug_msg = "Received a #{response_code} for #{href}"
|
107
|
+
debug_msg << " in #{filenames.join(' ')}" unless filenames.nil?
|
108
|
+
logger.debug debug_msg
|
99
109
|
|
100
110
|
if response_code.between?(200, 299)
|
101
111
|
# continue with no op
|
102
112
|
elsif response.timed_out?
|
103
|
-
|
113
|
+
failed_test_msg = "External link #{href} failed: got a time out"
|
114
|
+
failed_test_msg.insert(0, "#{filenames.join(' ').blue}: ") unless filenames.nil?
|
115
|
+
@failed_tests << failed_test_msg
|
104
116
|
elsif (response_code == 405 || response_code == 420 || response_code == 503) && method == :head
|
105
117
|
# 420s usually come from rate limiting; let's ignore the query and try just the path with a GET
|
106
118
|
uri = URI(href)
|
@@ -113,7 +125,9 @@ module HTML
|
|
113
125
|
response_handler(next_response, filenames)
|
114
126
|
else
|
115
127
|
# Received a non-successful http response.
|
116
|
-
|
128
|
+
failed_test_msg = "External link #{href} failed: #{response_code} #{response.return_message}"
|
129
|
+
failed_test_msg.insert(0, "#{filenames.join(' ').blue}: ") unless filenames.nil?
|
130
|
+
@failed_tests << failed_test_msg
|
117
131
|
end
|
118
132
|
end
|
119
133
|
|
@@ -122,10 +136,10 @@ module HTML
|
|
122
136
|
end
|
123
137
|
|
124
138
|
def files
|
125
|
-
if File.directory? @
|
126
|
-
Dir.glob("#{@
|
139
|
+
if File.directory? @src
|
140
|
+
Dir.glob("#{@src}/**/*#{@options[:ext]}")
|
127
141
|
else
|
128
|
-
File.extname(@
|
142
|
+
File.extname(@src) == @options[:ext] ? [@src] : []
|
129
143
|
end
|
130
144
|
end
|
131
145
|
|
@@ -165,4 +165,10 @@ describe "Links test" do
|
|
165
165
|
output = capture_stderr { HTML::Proofer.new(internal, options).run }
|
166
166
|
output.should match /without trailing slash/
|
167
167
|
end
|
168
|
+
|
169
|
+
it "works for array of links" do
|
170
|
+
options = { :as_link_array => true}
|
171
|
+
output = capture_stderr { HTML::Proofer.new(["www.github.com", "foofoofoo.biz"], options).run }
|
172
|
+
output.should match /foofoo.biz\/? failed: 0 Couldn't resolve host name/
|
173
|
+
end
|
168
174
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html-proofer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Garen Torikian
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-07-
|
11
|
+
date: 2014-07-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: mercenary
|