html-proofer 1.0.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0e7b88c5fd839aa05b97b85399326a9db187a340
4
- data.tar.gz: e0a0c34a2602aca002797185b7ee488b1a94314d
3
+ metadata.gz: e6fdca322142a45bad728b408d6e00e155fe01ce
4
+ data.tar.gz: 288123bd8384f03768178f352a90729e85101b89
5
5
  SHA512:
6
- metadata.gz: 904bc7984ee9ddba79e06833a9e235e26c5d388c7581493f79f210053248d4d7916dfe849c8eac7e5fc75815f5be2dbf9d88b3f043eb1181e95f11241cae698b
7
- data.tar.gz: 509fe2ee57606ea93e1c175bafc8bf8ff39891a93c0bfa8c7ea3242f731f184e4a884bb1deea9d2433074f3b816d25542873deed24e7df4a3f421e64d2f106ba
6
+ metadata.gz: 997be4ed93329ceb98a5fd2aa6caf7b1e664d78fc9c65057c4c76e766ada8851f4c4edb1756a80c5f54313f97048ce867bf5337d1266b62a3831d514ede33856
7
+ data.tar.gz: 88defb2fc1bda8ffa7a15f8fda61efcdb943074513eb73738613b556c84c75b33f4d35fa8debbd79d8fb0b33fbbe1c7a858dca396770cf4c97ef63b0655c0ee5
data/README.md CHANGED
@@ -121,8 +121,9 @@ The `HTML::Proofer` constructor takes an optional hash of additional options:
121
121
  | Option | Description | Default |
122
122
  | :----- | :---------- | :------ |
123
123
  | `disable_external` | If `true`, does not run the external link checker, which can take a lot of time. | `false` |
124
- | `ext` | The extension of your HTML files including the dot. | `.html` |
124
+ | `ext` | The extension of your HTML files including the dot. | `.html`
125
125
  | `favicon` | Enables the favicon checker. | `false` |
126
+ | `as_link_array` | Assumes that you've passed in just an array of links to check. | `false` |
126
127
  | `href_ignore` | An array of Strings or RegExps containing `href`s that are safe to ignore. Certain URIs, like `mailto` and `tel`, are always ignored. | `[]` |
127
128
  | `alt_ignore` | An array of Strings or RegExps containing `img`s whose missing `alt` tags are safe to ignore. | `[]` |
128
129
  | `href_swap` | A hash containing key-value pairs of `RegExp => String`. It transforms links that match `RegExp` into `String` via `gsub`. | `{}` |
@@ -134,11 +135,11 @@ You can also pass in any of Typhoeus' options for the external link check. For e
134
135
  HTML::Proofer.new("out/", {:ext => ".htm", :verbose = > true, :ssl_verifyhost => 2 })
135
136
  ```
136
137
 
137
- This sets `HTML::Proofer`'s' extensions to use _.htm_, and gives Typhoeus a configurtion for it to be verbose, and use specific SSL settings. Check [the Typhoeus documentation](https://github.com/typhoeus/typhoeus#other-curl-options) for more information on what options it can receive.
138
+ This sets `HTML::Proofer`'s extensions to use _.htm_, and gives Typhoeus a configuration for it to be verbose, and use specific SSL settings. Check [the Typhoeus documentation](https://github.com/typhoeus/typhoeus#other-curl-options) for more information on what options it can receive.
138
139
 
139
140
  ## Ignoring content
140
141
 
141
- Add the `data-proofer-ignore` attribute to any `<a>` or `<img>` tag to ignore it from the checks.
142
+ Add the `data-proofer-ignore` attribute to any tag to ignore it from the checks.
142
143
 
143
144
  ## Custom tests
144
145
 
data/bin/htmlproof CHANGED
@@ -16,6 +16,7 @@ Mercenary.program(:htmlproof) do |p|
16
16
 
17
17
  p.option 'ext', '--ext EXT', 'The extension of your HTML files (default: `.html`)'
18
18
  p.option 'favicon', '--favicon', 'Enables the favicon checker (default: `false`).'
19
+ p.option 'as-links', '--as-links', 'Assumes that `PATH` is an array of links to check.'
19
20
  p.option 'swap', '--swap regex:string,[regex:string,...]', Array, 'Array containing key-value pairs of `RegExp:String`. It transforms links that match `RegExp` into `String`'
20
21
  p.option 'href_ignore', '--href_ignore link1,[link2,...]', Array, 'Array of Strings containing `href`s that are safe to ignore. Certain URIs, like `mailto` and `tel`, are always ignored.'
21
22
  p.option 'alt_ignore', '--alt_ignore image1,[image2,...]', Array, 'Array of Strings containing `img`s whose missing `alt` tags are safe to ignore'
@@ -35,12 +36,15 @@ Mercenary.program(:htmlproof) do |p|
35
36
  options[:href_swap][%r{#{pair[0]}}] = pair[1]
36
37
  end
37
38
  end
38
- options[:href_ignore] = opts["ignore"] unless opts["href_ignore"].nil?
39
- options[:alt_ignore] = opts["ignore"] unless opts["alt_ignore"].nil?
39
+ options[:as_link_array] = opts["as-links"] unless opts["as-links"].nil?
40
+ options[:href_ignore] = opts["href_ignore"] unless opts["href_ignore"].nil?
41
+ options[:alt_ignore] = opts["alt_ignore"] unless opts["alt_ignore"].nil?
40
42
  options[:disable_external] = opts["disable_external"] unless opts["disable_external"].nil?
41
43
  options[:favicon] = opts["favicon"] unless opts["favicon"].nil?
42
44
  options[:verbose] = opts["verbose"] unless opts["verbose"].nil?
43
45
 
46
+ path = path.delete(' ').split(",") if options[:as_link_array]
47
+
44
48
  HTML::Proofer.new(path, options).run
45
49
  end
46
50
  end
data/html-proofer.gemspec CHANGED
@@ -3,7 +3,7 @@ $:.push File.expand_path("../lib", __FILE__)
3
3
 
4
4
  Gem::Specification.new do |gem|
5
5
  gem.name = "html-proofer"
6
- gem.version = "1.0.0"
6
+ gem.version = "1.1.0"
7
7
  gem.authors = ["Garen Torikian"]
8
8
  gem.email = ["gjtorikian@gmail.com"]
9
9
  gem.description = %q{Test your rendered HTML files to make sure they're accurate.}
data/lib/html/proofer.rb CHANGED
@@ -11,7 +11,7 @@ module HTML
11
11
  attr_accessor :failed_tests
12
12
 
13
13
  def initialize(src, opts={})
14
- @srcDir = src
14
+ @src = src
15
15
 
16
16
  @proofer_opts = {
17
17
  :ext => ".html",
@@ -20,7 +20,8 @@ module HTML
20
20
  :href_ignore => [],
21
21
  :alt_ignore => [],
22
22
  :disable_external => false,
23
- :verbose => false
23
+ :verbose => false,
24
+ :as_link_array => false
24
25
  }
25
26
  @options = @proofer_opts.merge({:followlocation => true}).merge(opts)
26
27
 
@@ -33,52 +34,33 @@ module HTML
33
34
  end
34
35
 
35
36
  def run
36
- total_files = 0
37
- external_urls = {}
38
-
39
- logger.info "Running #{get_checks} checks on #{@srcDir} on *#{@options[:ext]}... \n\n".white
40
-
41
- files.each do |path|
42
- total_files += 1
43
- html = HTML::Proofer.create_nokogiri(path)
44
-
45
- get_checks.each do |klass|
46
- logger.debug "Checking #{klass.to_s.downcase} on #{path} ...".blue
47
- check = klass.new(@srcDir, path, html, @options)
48
- check.run
49
- external_urls.merge!(check.external_urls)
50
- @failed_tests.concat(check.issues) if check.issues.length > 0
37
+ unless @options[:as_link_array]
38
+ total_files = 0
39
+ external_urls = {}
40
+
41
+ logger.info "Running #{get_checks} checks on #{@src} on *#{@options[:ext]}... \n\n".white
42
+
43
+ files.each do |path|
44
+ total_files += 1
45
+ html = HTML::Proofer.create_nokogiri(path)
46
+
47
+ get_checks.each do |klass|
48
+ logger.debug "Checking #{klass.to_s.downcase} on #{path} ...".blue
49
+ check = klass.new(@src, path, html, @options)
50
+ check.run
51
+ external_urls.merge!(check.external_urls)
52
+ @failed_tests.concat(check.issues) if check.issues.length > 0
53
+ end
51
54
  end
52
- end
53
-
54
- # the hypothesis is that Proofer runs way faster if we pull out
55
- # all the external URLs and run the checks at the end. Otherwise, we're halting
56
- # the consuming process for every file. In addition, sorting the list lets
57
- # libcurl keep connections to hosts alive. Finally, we'll make a HEAD request,
58
- # rather than GETing all the contents
59
- external_urls = Hash[external_urls.sort]
60
55
 
61
- unless @options[:disable_external]
62
- logger.info "Checking #{external_urls.length} external links...".yellow
56
+ external_link_checker(external_urls) unless @options[:disable_external]
63
57
 
64
- # Typhoeus won't let you pass any non-Typhoeus option
65
- @proofer_opts.each_key do |opt|
66
- @options.delete opt
67
- end
68
-
69
- Ethon.logger = logger # log from Typhoeus/Ethon
70
-
71
- external_urls.each_pair do |href, filenames|
72
- request = Typhoeus::Request.new(href, @options.merge({:method => :head}))
73
- request.on_complete { |response| response_handler(response, filenames) }
74
- hydra.queue request
75
- end
76
- logger.debug "Running requests for all #{hydra.queued_requests.size} external URLs...".yellow
77
- hydra.run
58
+ logger.info "Ran on #{total_files} files!\n\n".green
59
+ else
60
+ external_urls = Hash[*@src.map{ |s| [s, nil] }.flatten]
61
+ external_link_checker(external_urls) unless @options[:disable_external]
78
62
  end
79
63
 
80
- logger.info "Ran on #{total_files} files!\n\n".green
81
-
82
64
  if @failed_tests.empty?
83
65
  logger.info "HTML-Proofer finished successfully.".green
84
66
  else
@@ -90,17 +72,47 @@ module HTML
90
72
  end
91
73
  end
92
74
 
75
+ # the hypothesis is that Proofer runs way faster if we pull out
76
+ # all the external URLs and run the checks at the end. Otherwise, we're halting
77
+ # the consuming process for every file. In addition, sorting the list lets
78
+ # libcurl keep connections to hosts alive. Finally, we'll make a HEAD request,
79
+ # rather than GETing all the contents
80
+ def external_link_checker(external_urls)
81
+ external_urls = Hash[external_urls.sort]
82
+
83
+ logger.info "Checking #{external_urls.length} external links...".yellow
84
+
85
+ # Typhoeus won't let you pass any non-Typhoeus option
86
+ @proofer_opts.each_key do |opt|
87
+ @options.delete opt
88
+ end
89
+
90
+ Ethon.logger = logger # log from Typhoeus/Ethon
91
+
92
+ external_urls.each_pair do |href, filenames|
93
+ request = Typhoeus::Request.new(href, @options.merge({:method => :head}))
94
+ request.on_complete { |response| response_handler(response, filenames) }
95
+ hydra.queue request
96
+ end
97
+ logger.debug "Running requests for all #{hydra.queued_requests.size} external URLs...".yellow
98
+ hydra.run
99
+ end
100
+
93
101
  def response_handler(response, filenames)
94
102
  href = response.options[:effective_url]
95
103
  method = response.request.options[:method]
96
104
  response_code = response.code
97
105
 
98
- logger.debug "Received a #{response_code} for #{href} in #{filenames.join(' ')}"
106
+ debug_msg = "Received a #{response_code} for #{href}"
107
+ debug_msg << " in #{filenames.join(' ')}" unless filenames.nil?
108
+ logger.debug debug_msg
99
109
 
100
110
  if response_code.between?(200, 299)
101
111
  # continue with no op
102
112
  elsif response.timed_out?
103
- @failed_tests << "#{filenames.join(' ').blue}: External link #{href} failed: got a time out"
113
+ failed_test_msg = "External link #{href} failed: got a time out"
114
+ failed_test_msg.insert(0, "#{filenames.join(' ').blue}: ") unless filenames.nil?
115
+ @failed_tests << failed_test_msg
104
116
  elsif (response_code == 405 || response_code == 420 || response_code == 503) && method == :head
105
117
  # 420s usually come from rate limiting; let's ignore the query and try just the path with a GET
106
118
  uri = URI(href)
@@ -113,7 +125,9 @@ module HTML
113
125
  response_handler(next_response, filenames)
114
126
  else
115
127
  # Received a non-successful http response.
116
- @failed_tests << "#{filenames.join(' ').blue}: External link #{href} failed: #{response_code} #{response.return_message}"
128
+ failed_test_msg = "External link #{href} failed: #{response_code} #{response.return_message}"
129
+ failed_test_msg.insert(0, "#{filenames.join(' ').blue}: ") unless filenames.nil?
130
+ @failed_tests << failed_test_msg
117
131
  end
118
132
  end
119
133
 
@@ -122,10 +136,10 @@ module HTML
122
136
  end
123
137
 
124
138
  def files
125
- if File.directory? @srcDir
126
- Dir.glob("#{@srcDir}/**/*#{@options[:ext]}")
139
+ if File.directory? @src
140
+ Dir.glob("#{@src}/**/*#{@options[:ext]}")
127
141
  else
128
- File.extname(@srcDir) == @options[:ext] ? [@srcDir] : []
142
+ File.extname(@src) == @options[:ext] ? [@src] : []
129
143
  end
130
144
  end
131
145
 
@@ -165,4 +165,10 @@ describe "Links test" do
165
165
  output = capture_stderr { HTML::Proofer.new(internal, options).run }
166
166
  output.should match /without trailing slash/
167
167
  end
168
+
169
+ it "works for array of links" do
170
+ options = { :as_link_array => true}
171
+ output = capture_stderr { HTML::Proofer.new(["www.github.com", "foofoofoo.biz"], options).run }
172
+ output.should match /foofoo.biz\/? failed: 0 Couldn't resolve host name/
173
+ end
168
174
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html-proofer
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Garen Torikian
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-07-20 00:00:00.000000000 Z
11
+ date: 2014-07-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: mercenary