html-proofer 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0e7b88c5fd839aa05b97b85399326a9db187a340
4
- data.tar.gz: e0a0c34a2602aca002797185b7ee488b1a94314d
3
+ metadata.gz: e6fdca322142a45bad728b408d6e00e155fe01ce
4
+ data.tar.gz: 288123bd8384f03768178f352a90729e85101b89
5
5
  SHA512:
6
- metadata.gz: 904bc7984ee9ddba79e06833a9e235e26c5d388c7581493f79f210053248d4d7916dfe849c8eac7e5fc75815f5be2dbf9d88b3f043eb1181e95f11241cae698b
7
- data.tar.gz: 509fe2ee57606ea93e1c175bafc8bf8ff39891a93c0bfa8c7ea3242f731f184e4a884bb1deea9d2433074f3b816d25542873deed24e7df4a3f421e64d2f106ba
6
+ metadata.gz: 997be4ed93329ceb98a5fd2aa6caf7b1e664d78fc9c65057c4c76e766ada8851f4c4edb1756a80c5f54313f97048ce867bf5337d1266b62a3831d514ede33856
7
+ data.tar.gz: 88defb2fc1bda8ffa7a15f8fda61efcdb943074513eb73738613b556c84c75b33f4d35fa8debbd79d8fb0b33fbbe1c7a858dca396770cf4c97ef63b0655c0ee5
data/README.md CHANGED
@@ -121,8 +121,9 @@ The `HTML::Proofer` constructor takes an optional hash of additional options:
121
121
  | Option | Description | Default |
122
122
  | :----- | :---------- | :------ |
123
123
  | `disable_external` | If `true`, does not run the external link checker, which can take a lot of time. | `false` |
124
- | `ext` | The extension of your HTML files including the dot. | `.html` |
124
+ | `ext` | The extension of your HTML files including the dot. | `.html`
125
125
  | `favicon` | Enables the favicon checker. | `false` |
126
+ | `as_link_array` | Assumes that you've passed in just an array of links to check. | `false` |
126
127
  | `href_ignore` | An array of Strings or RegExps containing `href`s that are safe to ignore. Certain URIs, like `mailto` and `tel`, are always ignored. | `[]` |
127
128
  | `alt_ignore` | An array of Strings or RegExps containing `img`s whose missing `alt` tags are safe to ignore. | `[]` |
128
129
  | `href_swap` | A hash containing key-value pairs of `RegExp => String`. It transforms links that match `RegExp` into `String` via `gsub`. | `{}` |
@@ -134,11 +135,11 @@ You can also pass in any of Typhoeus' options for the external link check. For e
134
135
  HTML::Proofer.new("out/", {:ext => ".htm", :verbose = > true, :ssl_verifyhost => 2 })
135
136
  ```
136
137
 
137
- This sets `HTML::Proofer`'s' extensions to use _.htm_, and gives Typhoeus a configurtion for it to be verbose, and use specific SSL settings. Check [the Typhoeus documentation](https://github.com/typhoeus/typhoeus#other-curl-options) for more information on what options it can receive.
138
+ This sets `HTML::Proofer`'s extensions to use _.htm_, and gives Typhoeus a configuration for it to be verbose, and use specific SSL settings. Check [the Typhoeus documentation](https://github.com/typhoeus/typhoeus#other-curl-options) for more information on what options it can receive.
138
139
 
139
140
  ## Ignoring content
140
141
 
141
- Add the `data-proofer-ignore` attribute to any `<a>` or `<img>` tag to ignore it from the checks.
142
+ Add the `data-proofer-ignore` attribute to any tag to ignore it from the checks.
142
143
 
143
144
  ## Custom tests
144
145
 
data/bin/htmlproof CHANGED
@@ -16,6 +16,7 @@ Mercenary.program(:htmlproof) do |p|
16
16
 
17
17
  p.option 'ext', '--ext EXT', 'The extension of your HTML files (default: `.html`)'
18
18
  p.option 'favicon', '--favicon', 'Enables the favicon checker (default: `false`).'
19
+ p.option 'as-links', '--as-links', 'Assumes that `PATH` is an array of links to check.'
19
20
  p.option 'swap', '--swap regex:string,[regex:string,...]', Array, 'Array containing key-value pairs of `RegExp:String`. It transforms links that match `RegExp` into `String`'
20
21
  p.option 'href_ignore', '--href_ignore link1,[link2,...]', Array, 'Array of Strings containing `href`s that are safe to ignore. Certain URIs, like `mailto` and `tel`, are always ignored.'
21
22
  p.option 'alt_ignore', '--alt_ignore image1,[image2,...]', Array, 'Array of Strings containing `img`s whose missing `alt` tags are safe to ignore'
@@ -35,12 +36,15 @@ Mercenary.program(:htmlproof) do |p|
35
36
  options[:href_swap][%r{#{pair[0]}}] = pair[1]
36
37
  end
37
38
  end
38
- options[:href_ignore] = opts["ignore"] unless opts["href_ignore"].nil?
39
- options[:alt_ignore] = opts["ignore"] unless opts["alt_ignore"].nil?
39
+ options[:as_link_array] = opts["as-links"] unless opts["as-links"].nil?
40
+ options[:href_ignore] = opts["href_ignore"] unless opts["href_ignore"].nil?
41
+ options[:alt_ignore] = opts["alt_ignore"] unless opts["alt_ignore"].nil?
40
42
  options[:disable_external] = opts["disable_external"] unless opts["disable_external"].nil?
41
43
  options[:favicon] = opts["favicon"] unless opts["favicon"].nil?
42
44
  options[:verbose] = opts["verbose"] unless opts["verbose"].nil?
43
45
 
46
+ path = path.delete(' ').split(",") if options[:as_link_array]
47
+
44
48
  HTML::Proofer.new(path, options).run
45
49
  end
46
50
  end
data/html-proofer.gemspec CHANGED
@@ -3,7 +3,7 @@ $:.push File.expand_path("../lib", __FILE__)
3
3
 
4
4
  Gem::Specification.new do |gem|
5
5
  gem.name = "html-proofer"
6
- gem.version = "1.0.0"
6
+ gem.version = "1.1.0"
7
7
  gem.authors = ["Garen Torikian"]
8
8
  gem.email = ["gjtorikian@gmail.com"]
9
9
  gem.description = %q{Test your rendered HTML files to make sure they're accurate.}
data/lib/html/proofer.rb CHANGED
@@ -11,7 +11,7 @@ module HTML
11
11
  attr_accessor :failed_tests
12
12
 
13
13
  def initialize(src, opts={})
14
- @srcDir = src
14
+ @src = src
15
15
 
16
16
  @proofer_opts = {
17
17
  :ext => ".html",
@@ -20,7 +20,8 @@ module HTML
20
20
  :href_ignore => [],
21
21
  :alt_ignore => [],
22
22
  :disable_external => false,
23
- :verbose => false
23
+ :verbose => false,
24
+ :as_link_array => false
24
25
  }
25
26
  @options = @proofer_opts.merge({:followlocation => true}).merge(opts)
26
27
 
@@ -33,52 +34,33 @@ module HTML
33
34
  end
34
35
 
35
36
  def run
36
- total_files = 0
37
- external_urls = {}
38
-
39
- logger.info "Running #{get_checks} checks on #{@srcDir} on *#{@options[:ext]}... \n\n".white
40
-
41
- files.each do |path|
42
- total_files += 1
43
- html = HTML::Proofer.create_nokogiri(path)
44
-
45
- get_checks.each do |klass|
46
- logger.debug "Checking #{klass.to_s.downcase} on #{path} ...".blue
47
- check = klass.new(@srcDir, path, html, @options)
48
- check.run
49
- external_urls.merge!(check.external_urls)
50
- @failed_tests.concat(check.issues) if check.issues.length > 0
37
+ unless @options[:as_link_array]
38
+ total_files = 0
39
+ external_urls = {}
40
+
41
+ logger.info "Running #{get_checks} checks on #{@src} on *#{@options[:ext]}... \n\n".white
42
+
43
+ files.each do |path|
44
+ total_files += 1
45
+ html = HTML::Proofer.create_nokogiri(path)
46
+
47
+ get_checks.each do |klass|
48
+ logger.debug "Checking #{klass.to_s.downcase} on #{path} ...".blue
49
+ check = klass.new(@src, path, html, @options)
50
+ check.run
51
+ external_urls.merge!(check.external_urls)
52
+ @failed_tests.concat(check.issues) if check.issues.length > 0
53
+ end
51
54
  end
52
- end
53
-
54
- # the hypothesis is that Proofer runs way faster if we pull out
55
- # all the external URLs and run the checks at the end. Otherwise, we're halting
56
- # the consuming process for every file. In addition, sorting the list lets
57
- # libcurl keep connections to hosts alive. Finally, we'll make a HEAD request,
58
- # rather than GETing all the contents
59
- external_urls = Hash[external_urls.sort]
60
55
 
61
- unless @options[:disable_external]
62
- logger.info "Checking #{external_urls.length} external links...".yellow
56
+ external_link_checker(external_urls) unless @options[:disable_external]
63
57
 
64
- # Typhoeus won't let you pass any non-Typhoeus option
65
- @proofer_opts.each_key do |opt|
66
- @options.delete opt
67
- end
68
-
69
- Ethon.logger = logger # log from Typhoeus/Ethon
70
-
71
- external_urls.each_pair do |href, filenames|
72
- request = Typhoeus::Request.new(href, @options.merge({:method => :head}))
73
- request.on_complete { |response| response_handler(response, filenames) }
74
- hydra.queue request
75
- end
76
- logger.debug "Running requests for all #{hydra.queued_requests.size} external URLs...".yellow
77
- hydra.run
58
+ logger.info "Ran on #{total_files} files!\n\n".green
59
+ else
60
+ external_urls = Hash[*@src.map{ |s| [s, nil] }.flatten]
61
+ external_link_checker(external_urls) unless @options[:disable_external]
78
62
  end
79
63
 
80
- logger.info "Ran on #{total_files} files!\n\n".green
81
-
82
64
  if @failed_tests.empty?
83
65
  logger.info "HTML-Proofer finished successfully.".green
84
66
  else
@@ -90,17 +72,47 @@ module HTML
90
72
  end
91
73
  end
92
74
 
75
+ # the hypothesis is that Proofer runs way faster if we pull out
76
+ # all the external URLs and run the checks at the end. Otherwise, we're halting
77
+ # the consuming process for every file. In addition, sorting the list lets
78
+ # libcurl keep connections to hosts alive. Finally, we'll make a HEAD request,
79
+ # rather than GETing all the contents
80
+ def external_link_checker(external_urls)
81
+ external_urls = Hash[external_urls.sort]
82
+
83
+ logger.info "Checking #{external_urls.length} external links...".yellow
84
+
85
+ # Typhoeus won't let you pass any non-Typhoeus option
86
+ @proofer_opts.each_key do |opt|
87
+ @options.delete opt
88
+ end
89
+
90
+ Ethon.logger = logger # log from Typhoeus/Ethon
91
+
92
+ external_urls.each_pair do |href, filenames|
93
+ request = Typhoeus::Request.new(href, @options.merge({:method => :head}))
94
+ request.on_complete { |response| response_handler(response, filenames) }
95
+ hydra.queue request
96
+ end
97
+ logger.debug "Running requests for all #{hydra.queued_requests.size} external URLs...".yellow
98
+ hydra.run
99
+ end
100
+
93
101
  def response_handler(response, filenames)
94
102
  href = response.options[:effective_url]
95
103
  method = response.request.options[:method]
96
104
  response_code = response.code
97
105
 
98
- logger.debug "Received a #{response_code} for #{href} in #{filenames.join(' ')}"
106
+ debug_msg = "Received a #{response_code} for #{href}"
107
+ debug_msg << " in #{filenames.join(' ')}" unless filenames.nil?
108
+ logger.debug debug_msg
99
109
 
100
110
  if response_code.between?(200, 299)
101
111
  # continue with no op
102
112
  elsif response.timed_out?
103
- @failed_tests << "#{filenames.join(' ').blue}: External link #{href} failed: got a time out"
113
+ failed_test_msg = "External link #{href} failed: got a time out"
114
+ failed_test_msg.insert(0, "#{filenames.join(' ').blue}: ") unless filenames.nil?
115
+ @failed_tests << failed_test_msg
104
116
  elsif (response_code == 405 || response_code == 420 || response_code == 503) && method == :head
105
117
  # 420s usually come from rate limiting; let's ignore the query and try just the path with a GET
106
118
  uri = URI(href)
@@ -113,7 +125,9 @@ module HTML
113
125
  response_handler(next_response, filenames)
114
126
  else
115
127
  # Received a non-successful http response.
116
- @failed_tests << "#{filenames.join(' ').blue}: External link #{href} failed: #{response_code} #{response.return_message}"
128
+ failed_test_msg = "External link #{href} failed: #{response_code} #{response.return_message}"
129
+ failed_test_msg.insert(0, "#{filenames.join(' ').blue}: ") unless filenames.nil?
130
+ @failed_tests << failed_test_msg
117
131
  end
118
132
  end
119
133
 
@@ -122,10 +136,10 @@ module HTML
122
136
  end
123
137
 
124
138
  def files
125
- if File.directory? @srcDir
126
- Dir.glob("#{@srcDir}/**/*#{@options[:ext]}")
139
+ if File.directory? @src
140
+ Dir.glob("#{@src}/**/*#{@options[:ext]}")
127
141
  else
128
- File.extname(@srcDir) == @options[:ext] ? [@srcDir] : []
142
+ File.extname(@src) == @options[:ext] ? [@src] : []
129
143
  end
130
144
  end
131
145
 
@@ -165,4 +165,10 @@ describe "Links test" do
165
165
  output = capture_stderr { HTML::Proofer.new(internal, options).run }
166
166
  output.should match /without trailing slash/
167
167
  end
168
+
169
+ it "works for array of links" do
170
+ options = { :as_link_array => true}
171
+ output = capture_stderr { HTML::Proofer.new(["www.github.com", "foofoofoo.biz"], options).run }
172
+ output.should match /foofoo.biz\/? failed: 0 Couldn't resolve host name/
173
+ end
168
174
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html-proofer
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Garen Torikian
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-07-20 00:00:00.000000000 Z
11
+ date: 2014-07-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: mercenary