html-proofer 3.15.3 → 3.17.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 40942bc9c00696cb5c3c02e6ca427c0adfd1f80dae7028a6a2d70992c29065a2
4
- data.tar.gz: 1ef56b761178d31791dc2e457c8d4da0704c6845725e87eada63e8c6d6d01d84
3
+ metadata.gz: 78c779b0dfc11fd1d80baaf4133ee26b695e13fcb20ed3852cb99a65caef7e24
4
+ data.tar.gz: 71e4f5e229d8754bc1ad01d4dc28d76ae43f0c5c2d0e3ff2871c32ae4ce89c42
5
5
  SHA512:
6
- metadata.gz: 10f6a27dc6c59b01dd3ff4aadf8d3c1fef3cd82e35ee604f180a9738094aae1be4748d78279e5abe5fdac2ed07f43a8efdac8cec4d1a3b24ba91a3125301bbe3
7
- data.tar.gz: 33f427ea5661e13e301b06033f65d16623005cf4834b412729e8e04c13bbaecbc0d88cce2f7a20d06cedbb3b6bc6015bef0bd1ed370ba0edc6b18ba4d6117e03
6
+ metadata.gz: aa82fcbba561ef4107dce9188d2c451ee8c40afbf5ff67dbd4f98729f91fec40ce3b58f2e13fd04a9e2f91258757f283240680cbf7b609e2c3b7000168b2fff6
7
+ data.tar.gz: 848b2d731f440c3128c77e4cdddcb54ffc18cf1fdd423afc63b9300ccf074ad457b0064b70aab59d950ea242ba3e01b2ff33f8e02491859c24f23d5895f6f705
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
  # frozen_string_literal: true
3
3
 
4
- STDOUT.sync = true
4
+ $stdout.sync = true
5
5
 
6
6
  $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', 'lib')
7
7
 
@@ -50,7 +50,7 @@ Mercenary.program(:htmlproofer) do |p|
50
50
  p.option 'typhoeus_config', '--typhoeus-config CONFIG', String, 'JSON-formatted string of Typhoeus config. Will override the html-proofer defaults.'
51
51
  p.option 'url_ignore', '--url-ignore link1,[link2,...]', Array, 'A comma-separated list of Strings or RegExps containing URLs that are safe to ignore. It affects all HTML attributes. Note that non-HTTP(S) URIs are always ignored'
52
52
  p.option 'url_swap', '--url-swap re:string,[re:string,...]', Array, 'A comma-separated list containing key-value pairs of `RegExp => String`. It transforms URLs that match `RegExp` into `String` via `gsub`. The escape sequences `\\:` should be used to produce literal `:`s.'
53
- p.option 'root_dir', '--root-folder PATH', String, 'The absolute path to the directory serving your html-files. Used when running html-proofer on a file, rather than a directory.'
53
+ p.option 'root_dir', '--root-dir PATH', String, 'The absolute path to the directory serving your html-files.'
54
54
 
55
55
  p.action do |args, opts|
56
56
  args = ['.'] if args.empty?
@@ -59,6 +59,8 @@ module HTMLProofer
59
59
  end
60
60
 
61
61
  def add(url, filenames, status, msg = '')
62
+ return unless use_cache?
63
+
62
64
  data = {
63
65
  time: @cache_time,
64
66
  filenames: filenames,
@@ -92,12 +94,12 @@ module HTMLProofer
92
94
  del = 0
93
95
  @cache_log.delete_if do |url, _|
94
96
  url = clean_url(url)
95
- if !found_urls.include?(url)
97
+ if found_urls.include?(url)
98
+ false
99
+ else
96
100
  @logger.log :debug, "Removing #{url} from cache check"
97
101
  del += 1
98
102
  true
99
- else
100
- false
101
103
  end
102
104
  end
103
105
 
@@ -115,12 +117,11 @@ module HTMLProofer
115
117
  @load.nil?
116
118
  end
117
119
 
118
- def retrieve_urls(external_urls)
119
- urls_to_check = detect_url_changes(external_urls)
120
+ def retrieve_urls(urls)
121
+ urls_to_check = detect_url_changes(urls)
120
122
  @cache_log.each_pair do |url, cache|
121
- if within_timeframe?(cache['time'])
122
- next if cache['message'].empty? # these were successes to skip
123
- end
123
+ next if within_timeframe?(cache['time']) && cache['message'].empty? # these were successes to skip
124
+
124
125
  urls_to_check[url] = cache['filenames'] # recheck expired links
125
126
  end
126
127
  urls_to_check
@@ -3,15 +3,17 @@
3
3
  module HTMLProofer
4
4
  # Mostly handles issue management and collecting of external URLs.
5
5
  class Check
6
- attr_reader :node, :html, :element, :src, :path, :options, :issues, :external_urls
6
+ attr_reader :node, :html, :element, :src, :path, :options, :issues, :internal_urls, :external_urls
7
7
 
8
- def initialize(src, path, html, logger, options)
8
+ def initialize(src, path, html, logger, cache, options)
9
9
  @src = src
10
10
  @path = path
11
11
  @html = remove_ignored(html)
12
12
  @logger = logger
13
+ @cache = cache
13
14
  @options = options
14
15
  @issues = []
16
+ @internal_urls = {}
15
17
  @external_urls = {}
16
18
  end
17
19
 
@@ -24,17 +26,22 @@ module HTMLProofer
24
26
  raise NotImplementedError, 'HTMLProofer::Check subclasses must implement #run'
25
27
  end
26
28
 
27
- def add_issue(desc, line: nil, status: -1, content: nil)
28
- @issues << Issue.new(@path, desc, line: line, status: status, content: content)
29
+ def add_issue(desc, line: nil, path: nil, status: -1, content: nil)
30
+ @issues << Issue.new(path || @path, desc, line: line, status: status, content: content)
31
+ false
32
+ end
33
+
34
+ def add_to_internal_urls(url, internal_url)
35
+ if @internal_urls[url]
36
+ @internal_urls[url] << internal_url
37
+ else
38
+ @internal_urls[url] = [internal_url]
39
+ end
29
40
  end
30
41
 
31
42
  def add_to_external_urls(url)
32
43
  return if @external_urls[url]
33
44
 
34
- add_path_for_url(url)
35
- end
36
-
37
- def add_path_for_url(url)
38
45
  if @external_urls[url]
39
46
  @external_urls[url] << @path
40
47
  else
@@ -7,7 +7,7 @@ class FaviconCheck < ::HTMLProofer::Check
7
7
  favicon = create_element(node)
8
8
  next if favicon.ignore?
9
9
 
10
- found = true if favicon.rel.split(' ').last.eql? 'icon'
10
+ found = true if favicon.rel.split.last.eql? 'icon'
11
11
  break if found
12
12
  end
13
13
 
@@ -4,6 +4,8 @@ class LinkCheck < ::HTMLProofer::Check
4
4
  include HTMLProofer::Utils
5
5
 
6
6
  def missing_href?
7
+ return blank?(@link.src) if @node.name == 'source'
8
+
7
9
  blank?(@link.href) && blank?(@link.name) && blank?(@link.id)
8
10
  end
9
11
 
@@ -12,7 +14,7 @@ class LinkCheck < ::HTMLProofer::Check
12
14
  end
13
15
 
14
16
  def run
15
- @html.css('a, link').each do |node|
17
+ @html.css('a, link, source').each do |node|
16
18
  @link = create_element(node)
17
19
  line = node.line
18
20
  content = node.to_s
@@ -49,23 +51,31 @@ class LinkCheck < ::HTMLProofer::Check
49
51
  # curl/Typheous inaccurately return 404s for some links. cc https://git.io/vyCFx
50
52
  next if @link.respond_to?(:rel) && @link.rel == 'dns-prefetch'
51
53
 
52
- add_to_external_urls(@link.href)
54
+ add_to_external_urls(@link.href || @link.src)
53
55
  next
54
- elsif @link.internal? && !@link.exists?
55
- add_issue("internally linking to #{@link.href}, which does not exist", line: line, content: content)
56
+ elsif @link.internal?
57
+ if @link.exists?
58
+ add_to_internal_urls(@link.href, InternalLink.new(@link, @path, line, content))
59
+ else
60
+ add_issue("internally linking to #{@link.href}, which does not exist", line: line, content: content)
61
+ end
56
62
  end
63
+ end
57
64
 
58
- # does the local directory have a trailing slash?
59
- if @link.unslashed_directory? @link.absolute_path
60
- add_issue("internally linking to a directory #{@link.absolute_path} without trailing slash", line: line, content: content)
61
- next
62
- end
65
+ external_urls
66
+ end
63
67
 
64
- # verify the target hash
65
- handle_hash(@link, line, content) if @link.hash
68
+ def check_internal_link(link, path, line, content)
69
+ # does the local directory have a trailing slash?
70
+ if link.unslashed_directory?(link.absolute_path)
71
+ add_issue("internally linking to a directory #{link.absolute_path} without trailing slash", path: path, line: line, content: content)
72
+ return false
66
73
  end
67
74
 
68
- external_urls
75
+ # verify the target hash
76
+ return handle_hash(link, path, line, content) if link.hash
77
+
78
+ true
69
79
  end
70
80
 
71
81
  def check_schemes(link, line, content)
@@ -93,24 +103,28 @@ class LinkCheck < ::HTMLProofer::Check
93
103
  add_issue("#{link.href} contains no phone number", line: line, content: content) if link.path.empty?
94
104
  end
95
105
 
96
- def handle_hash(link, line, content)
97
- if link.internal? && !hash_check(link.html, link.hash)
98
- add_issue("linking to internal hash ##{link.hash} that does not exist", line: line, content: content)
106
+ def handle_hash(link, path, line, content)
107
+ if link.internal? && !hash_exists?(link.html, link.hash) # rubocop:disable Style/GuardClause
108
+ return add_issue("linking to internal hash ##{link.hash} that does not exist", path: path, line: line, content: content)
99
109
  elsif link.external?
100
- external_link_check(link, line, content)
110
+ return external_link_check(link, line, content)
101
111
  end
112
+
113
+ true
102
114
  end
103
115
 
104
116
  def external_link_check(link, line, content)
105
- if !link.exists?
106
- add_issue("trying to find hash of #{link.href}, but #{link.absolute_path} does not exist", line: line, content: content)
117
+ if link.exists? # rubocop:disable Style/GuardClause
118
+ target_html = create_nokogiri(link.absolute_path)
119
+ return add_issue("linking to #{link.href}, but #{link.hash} does not exist", line: line, content: content) unless hash_exists?(target_html, link.hash)
107
120
  else
108
- target_html = create_nokogiri link.absolute_path
109
- add_issue("linking to #{link.href}, but #{link.hash} does not exist", line: line, content: content) unless hash_check target_html, link.hash
121
+ return add_issue("trying to find hash of #{link.href}, but #{link.absolute_path} does not exist", line: line, content: content)
110
122
  end
123
+
124
+ true
111
125
  end
112
126
 
113
- def hash_check(html, href_hash)
127
+ def hash_exists?(html, href_hash)
114
128
  decoded_href_hash = Addressable::URI.unescape(href_hash)
115
129
  fragment_ids = [href_hash, decoded_href_hash]
116
130
  # https://www.w3.org/TR/html5/single-page.html#scroll-to-fragid
@@ -148,7 +162,19 @@ class LinkCheck < ::HTMLProofer::Check
148
162
 
149
163
  class XpathFunctions
150
164
  def case_sensitive_equals(node_set, str_to_match)
151
- node_set.find_all { |node| node.to_s. == str_to_match.to_s }
165
+ node_set.find_all { |node| node.to_s.== str_to_match.to_s }
166
+ end
167
+ end
168
+
169
+ class InternalLink
170
+ attr_reader :link, :href, :path, :line, :content
171
+
172
+ def initialize(link, path, line, content)
173
+ @link = link
174
+ @href = @link.href
175
+ @path = path
176
+ @line = line
177
+ @content = content
152
178
  end
153
179
  end
154
180
  end
@@ -8,7 +8,7 @@ class OpenGraphElement < ::HTMLProofer::Element
8
8
  # Fake up src from the content attribute
9
9
  instance_variable_set('@src', @content)
10
10
 
11
- @src.insert 0, 'http:' if @src =~ %r{^//}
11
+ @src.insert 0, 'http:' if %r{^//}.match?(@src)
12
12
  end
13
13
  end
14
14
 
@@ -80,7 +80,7 @@ module HTMLProofer
80
80
  begin
81
81
  JSON.parse(config)
82
82
  rescue StandardError
83
- raise ArgumentError, "Option '" + option_name + "' did not contain valid JSON."
83
+ raise ArgumentError, "Option '#{option_name} did not contain valid JSON."
84
84
  end
85
85
  end
86
86
  end
@@ -15,7 +15,7 @@ module HTMLProofer
15
15
  # Construct readable ivars for every element
16
16
  begin
17
17
  obj.attributes.each_pair do |attribute, value|
18
- name = attribute.tr('-:.;', '_').to_s.to_sym
18
+ name = attribute.tr('-:.;@', '_').to_s.to_sym
19
19
  (class << self; self; end).send(:attr_reader, name)
20
20
  instance_variable_set("@#{name}", value.value)
21
21
  end
@@ -42,19 +42,19 @@ module HTMLProofer
42
42
 
43
43
  # fix up missing protocols
44
44
  if defined?(@href)
45
- @href.insert(0, 'http:') if @href =~ %r{^//}
45
+ @href.insert(0, 'http:') if %r{^//}.match?(@href)
46
46
  else
47
47
  @href = nil
48
48
  end
49
49
 
50
50
  if defined?(@src)
51
- @src.insert(0, 'http:') if @src =~ %r{^//}
51
+ @src.insert(0, 'http:') if %r{^//}.match?(@src)
52
52
  else
53
53
  @src = nil
54
54
  end
55
55
 
56
56
  if defined?(@srcset)
57
- @srcset.insert(0, 'http:') if @srcset =~ %r{^//}
57
+ @srcset.insert(0, 'http:') if %r{^//}.match?(@srcset)
58
58
  else
59
59
  @srcset = nil
60
60
  end
@@ -105,12 +105,10 @@ module HTMLProofer
105
105
  return true if @data_proofer_ignore
106
106
  return true if @parent_ignorable
107
107
 
108
- return true if url =~ /^javascript:/
108
+ return true if /^javascript:/.match?(url)
109
109
 
110
110
  # ignore base64 encoded images
111
- if %w[ImageCheck FaviconCheck].include? @type
112
- return true if url =~ /^data:image/
113
- end
111
+ return true if %w[ImageCheck FaviconCheck].include?(@type) && /^data:image/.match?(url)
114
112
 
115
113
  # ignore user defined URLs
116
114
  return true if ignores_pattern_check(@check.options[:url_ignore])
@@ -171,6 +169,10 @@ module HTMLProofer
171
169
  url.start_with?('?')
172
170
  end
173
171
 
172
+ def absolute_path?(path)
173
+ path.start_with?('/')
174
+ end
175
+
174
176
  def file_path
175
177
  return if path.nil? || path.empty?
176
178
 
@@ -178,22 +180,19 @@ module HTMLProofer
178
180
 
179
181
  path_dot_ext = path + @check.options[:extension] if @check.options[:assume_extension]
180
182
 
181
- if path =~ %r{^/} # path relative to root
182
- if File.directory?(@check.src)
183
- base = @check.src
184
- else
185
- root_dir = @check.options[:root_dir]
186
- base = root_dir || File.dirname(@check.src)
187
- end
188
- elsif File.exist?(File.expand_path(path, @check.src)) || File.exist?(File.expand_path(path_dot_ext, @check.src)) # relative links, path is a file
189
- base = File.dirname @check.path
190
- elsif File.exist?(File.join(File.dirname(@check.path), path)) || File.exist?(File.join(File.dirname(@check.path), path_dot_ext)) # relative links in nested dir, path is a file
191
- base = File.dirname @check.path
192
- else # relative link, path is a directory
193
- base = @check.path
194
- end
183
+ base = if absolute_path?(path) # path relative to root
184
+ # either overwrite with root_dir; or, if source is directory, use that; or, just get the current file's dirname
185
+ @check.options[:root_dir] || (File.directory?(@check.src) ? @check.src : File.dirname(@check.src))
186
+ elsif File.exist?(File.expand_path(path, @check.src)) || File.exist?(File.expand_path(path_dot_ext, @check.src)) # relative links, path is a file
187
+ File.dirname(@check.path)
188
+ elsif File.exist?(File.join(File.dirname(@check.path), path)) || File.exist?(File.join(File.dirname(@check.path), path_dot_ext)) # rubocop:disable Lint/DuplicateBranch; relative links in nested dir, path is a file
189
+ File.dirname(@check.path)
190
+ else # relative link, path is a directory
191
+ @check.path
192
+ end
193
+
194
+ file = File.join(base, path)
195
195
 
196
- file = File.join base, path
197
196
  if @check.options[:assume_extension] && File.file?("#{file}#{@check.options[:extension]}")
198
197
  file = "#{file}#{@check.options[:extension]}"
199
198
  elsif File.directory?(file) && !unslashed_directory?(file) # implicit index support
@@ -205,22 +204,24 @@ module HTMLProofer
205
204
 
206
205
  # checks if a file exists relative to the current pwd
207
206
  def exists?
208
- return @checked_paths[absolute_path] if @checked_paths.key? absolute_path
207
+ return @checked_paths[absolute_path] if @checked_paths.key?(absolute_path)
209
208
 
210
- @checked_paths[absolute_path] = File.exist? absolute_path
209
+ @checked_paths[absolute_path] = File.exist?(absolute_path)
211
210
  end
212
211
 
213
212
  def absolute_path
214
213
  path = file_path || @check.path
215
- File.expand_path path, Dir.pwd
214
+
215
+ File.expand_path(path, Dir.pwd)
216
216
  end
217
217
 
218
218
  def ignores_pattern_check(links)
219
219
  links.each do |ignore|
220
- if ignore.is_a? String
220
+ case ignore
221
+ when String
221
222
  return true if ignore == url
222
- elsif ignore.is_a? Regexp
223
- return true if ignore =~ url
223
+ when Regexp
224
+ return true if ignore&.match?(url)
224
225
  end
225
226
  end
226
227
 
@@ -6,6 +6,7 @@ module HTMLProofer
6
6
 
7
7
  class InvalidHtmlError < StandardError
8
8
  def initialize(failures)
9
+ super
9
10
  @failures = failures
10
11
  end
11
12
 
@@ -53,13 +54,13 @@ module HTMLProofer
53
54
  def call(env)
54
55
  result = @app.call(env)
55
56
  return result if env['REQUEST_METHOD'] != 'GET'
56
- return result if env['QUERY_STRING'] =~ /proofer-ignore/
57
+ return result if /proofer-ignore/.match?(env['QUERY_STRING'])
57
58
  return result if result.first != 200
58
59
 
59
60
  body = []
60
61
  result.last.each { |e| body << e }
61
62
 
62
- body = body.join('')
63
+ body = body.join
63
64
  begin
64
65
  html = body.lstrip
65
66
  rescue StandardError
@@ -4,7 +4,7 @@ module HTMLProofer
4
4
  class Runner
5
5
  include HTMLProofer::Utils
6
6
 
7
- attr_reader :options, :external_urls, :failures
7
+ attr_reader :options, :internal_urls, :external_urls, :failures
8
8
 
9
9
  def initialize(src, opts = {})
10
10
  @src = src
@@ -20,6 +20,8 @@ module HTMLProofer
20
20
 
21
21
  @type = @options.delete(:type)
22
22
  @logger = HTMLProofer::Log.new(@options[:log_level])
23
+ @cache = Cache.new(@logger, @options[:cache])
24
+ @internal_link_checks = nil
23
25
 
24
26
  # Add swap patterns for internal domains
25
27
  unless @options[:internal_domains].empty?
@@ -30,7 +32,11 @@ module HTMLProofer
30
32
  end
31
33
  end
32
34
 
35
+ @internal_urls = {}
36
+ @internal_urls_to_paths = {}
37
+ @external_urls = {}
33
38
  @failures = []
39
+ @before_request = []
34
40
  end
35
41
 
36
42
  def run
@@ -58,15 +64,13 @@ module HTMLProofer
58
64
  end
59
65
  end
60
66
  @external_urls = Hash[*@src.map { |s| [s, nil] }.flatten]
61
- validate_urls
67
+ validate_external_urls
62
68
  end
63
69
 
64
70
  # Collects any external URLs found in a directory of files. Also collectes
65
71
  # every failed test from process_files.
66
72
  # Sends the external URLs to Typhoeus for batch processing.
67
73
  def check_files
68
- @external_urls = {}
69
-
70
74
  process_files.each do |item|
71
75
  @external_urls.merge!(item[:external_urls])
72
76
  @failures.concat(item[:failures])
@@ -77,9 +81,12 @@ module HTMLProofer
77
81
  # just not run those other checks at all.
78
82
  if @options[:external_only]
79
83
  @failures = []
80
- validate_urls
84
+ validate_external_urls
81
85
  elsif !@options[:disable_external]
82
- validate_urls
86
+ validate_external_urls
87
+ validate_internal_urls
88
+ else
89
+ validate_internal_urls
83
90
  end
84
91
  end
85
92
 
@@ -100,8 +107,21 @@ module HTMLProofer
100
107
  @src.each do |src|
101
108
  checks.each do |klass|
102
109
  @logger.log :debug, "Checking #{klass.to_s.downcase} on #{path} ..."
103
- check = Object.const_get(klass).new(src, path, html, @logger, @options)
110
+ check = Object.const_get(klass).new(src, path, html, @logger, @cache, @options)
104
111
  check.run
112
+
113
+ if klass == 'LinkCheck'
114
+ @internal_link_checks = check
115
+ check.internal_urls.each_pair do |url, internal_urls|
116
+ if @internal_urls_to_paths[url]
117
+ @internal_urls_to_paths[url].concat(internal_urls.map(&:path))
118
+ else
119
+ @internal_urls_to_paths[url] = internal_urls.map(&:path)
120
+ end
121
+ end
122
+ @internal_urls.merge!(check.internal_urls)
123
+ end
124
+
105
125
  external_urls = check.external_urls
106
126
  external_urls = Hash[check.external_urls.map { |url, file| [swap(url, @options[:url_swap]), file] }] if @options[:url_swap]
107
127
  result[:external_urls].merge!(external_urls)
@@ -112,15 +132,35 @@ module HTMLProofer
112
132
  end
113
133
 
114
134
  def check_path(path)
115
- check_parsed create_nokogiri(path), path
135
+ check_parsed(create_nokogiri(path), path)
116
136
  end
117
137
 
118
- def validate_urls
119
- url_validator = HTMLProofer::UrlValidator.new(@logger, @external_urls, @options)
138
+ def validate_external_urls
139
+ url_validator = HTMLProofer::UrlValidator.new(@logger, @cache, @external_urls, @options)
140
+ url_validator.before_request = @before_request
120
141
  @failures.concat(url_validator.run)
121
142
  @external_urls = url_validator.external_urls
122
143
  end
123
144
 
145
+ def validate_internal_urls
146
+ if @cache.use_cache?
147
+ urls_to_check = load_internal_cache
148
+
149
+ urls_to_check.each_pair do |url, internal_urls|
150
+ result = @internal_link_checks.check_internal_link(internal_urls.first.link, internal_urls.first.path, internal_urls.first.line, internal_urls.first.content)
151
+ code = result ? 200 : 404
152
+ @cache.add(url, @internal_urls_to_paths[url].sort, code, '') # TODO: blank msg for now
153
+ end
154
+ @cache.write
155
+ else
156
+ @internal_urls.values.flatten.each do |internal_url|
157
+ @internal_link_checks.check_internal_link(internal_url.link, internal_url.path, internal_url.line, internal_url.content)
158
+ end
159
+ end
160
+
161
+ @failures.concat(@internal_link_checks.issues) unless @internal_urls.length.zero?
162
+ end
163
+
124
164
  def files
125
165
  @files ||= if @type == :directory
126
166
  @src.map do |src|
@@ -173,5 +213,29 @@ module HTMLProofer
173
213
  failure_text = pluralize(count, 'failure', 'failures')
174
214
  raise @logger.colorize :fatal, "HTML-Proofer found #{failure_text}!"
175
215
  end
216
+
217
+ # Set before_request callback.
218
+ #
219
+ # @example Set before_request.
220
+ # request.before_request { |request| p "yay" }
221
+ #
222
+ # @param [ Block ] block The block to execute.
223
+ #
224
+ # @yield [ Typhoeus::Request ]
225
+ #
226
+ # @return [ Array<Block> ] All before_request blocks.
227
+ def before_request(&block)
228
+ @before_request ||= []
229
+ @before_request << block if block
230
+ @before_request
231
+ end
232
+
233
+ def load_internal_cache
234
+ urls_to_check = @cache.retrieve_urls(@internal_urls)
235
+ cache_text = pluralize(urls_to_check.count, 'internal link', 'internal links')
236
+ @logger.log :info, "Found #{cache_text} in the cache..."
237
+
238
+ urls_to_check
239
+ end
176
240
  end
177
241
  end
@@ -10,21 +10,23 @@ module HTMLProofer
10
10
  include HTMLProofer::Utils
11
11
 
12
12
  attr_reader :external_urls
13
+ attr_writer :before_request
13
14
 
14
- def initialize(logger, external_urls, options)
15
+ def initialize(logger, cache, external_urls, options)
15
16
  @logger = logger
16
17
  @external_urls = external_urls
17
18
  @failed_tests = []
18
19
  @options = options
19
20
  @hydra = Typhoeus::Hydra.new(@options[:hydra])
20
- @cache = Cache.new(@logger, @options[:cache])
21
+ @cache = cache
22
+ @before_request = []
21
23
  end
22
24
 
23
25
  def run
24
26
  @external_urls = remove_query_values
25
27
 
26
28
  if @cache.use_cache?
27
- urls_to_check = load_cache
29
+ urls_to_check = @cache.retrieve_urls(@external_urls)
28
30
  external_link_checker(urls_to_check)
29
31
  @cache.write
30
32
  else
@@ -41,11 +43,11 @@ module HTMLProofer
41
43
  iterable_external_urls = @external_urls.dup
42
44
  @external_urls.each_key do |url|
43
45
  uri = begin
44
- Addressable::URI.parse(url)
45
- rescue URI::Error, Addressable::URI::InvalidURIError
46
- @logger.log :error, "#{url} is an invalid URL"
47
- nil
48
- end
46
+ Addressable::URI.parse(url)
47
+ rescue URI::Error, Addressable::URI::InvalidURIError
48
+ @logger.log :error, "#{url} is an invalid URL"
49
+ nil
50
+ end
49
51
  next if uri.nil? || uri.query.nil?
50
52
 
51
53
  iterable_external_urls.delete(url) unless new_url_query_values?(uri, paths_with_queries)
@@ -72,15 +74,6 @@ module HTMLProofer
72
74
  uri.host + uri.path
73
75
  end
74
76
 
75
- def load_cache
76
- cache_count = @cache.size
77
- cache_text = pluralize(cache_count, 'link', 'links')
78
-
79
- @logger.log :info, "Found #{cache_text} in the cache..."
80
-
81
- @cache.retrieve_urls(@external_urls)
82
- end
83
-
84
77
  # Proofer runs faster if we pull out all the external URLs and run the checks
85
78
  # at the end. Otherwise, we're halting the consuming process for every file during
86
79
  # `process_files`.
@@ -109,11 +102,11 @@ module HTMLProofer
109
102
  def establish_queue(external_urls)
110
103
  external_urls.each_pair do |url, filenames|
111
104
  url = begin
112
- clean_url(url)
113
- rescue URI::Error, Addressable::URI::InvalidURIError
114
- add_external_issue(filenames, "#{url} is an invalid URL")
115
- next
116
- end
105
+ clean_url(url)
106
+ rescue URI::Error, Addressable::URI::InvalidURIError
107
+ add_external_issue(filenames, "#{url} is an invalid URL")
108
+ next
109
+ end
117
110
 
118
111
  method = if hash?(url) && @options[:check_external_hash]
119
112
  :get
@@ -127,16 +120,19 @@ module HTMLProofer
127
120
  def clean_url(href)
128
121
  # catch any obvious issues, like strings in port numbers
129
122
  parsed = Addressable::URI.parse(href)
130
- if href !~ /^([!#{$&}-;=?-\[\]_a-z~]|%[0-9a-fA-F]{2})+$/
131
- parsed.normalize
132
- else
123
+ if href =~ /^([!#{$&}-;=?-\[\]_a-z~]|%[0-9a-fA-F]{2})+$/
133
124
  href
125
+ else
126
+ parsed.normalize
134
127
  end
135
128
  end
136
129
 
137
130
  def queue_request(method, href, filenames)
138
131
  opts = @options[:typhoeus].merge(method: method)
139
132
  request = Typhoeus::Request.new(href, opts)
133
+ @before_request.each do |callback|
134
+ callback.call(request)
135
+ end
140
136
  request.on_complete { |response| response_handler(response, filenames) }
141
137
  @hydra.queue request
142
138
  end
@@ -5,7 +5,7 @@ require 'nokogumbo'
5
5
  module HTMLProofer
6
6
  module Utils
7
7
  def pluralize(count, single, plural)
8
- "#{count} #{(count == 1 ? single : plural)}"
8
+ "#{count} #{count == 1 ? single : plural}"
9
9
  end
10
10
 
11
11
  def create_nokogiri(path)
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HTMLProofer
4
- VERSION = '3.15.3'
4
+ VERSION = '3.17.3'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html-proofer
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.15.3
4
+ version: 3.17.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Garen Torikian
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-04-20 00:00:00.000000000 Z
11
+ date: 2020-11-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: addressable
@@ -122,20 +122,6 @@ dependencies:
122
122
  - - ">="
123
123
  - !ruby/object:Gem::Version
124
124
  version: '0'
125
- - !ruby/object:Gem::Dependency
126
- name: codecov
127
- requirement: !ruby/object:Gem::Requirement
128
- requirements:
129
- - - ">="
130
- - !ruby/object:Gem::Version
131
- version: '0'
132
- type: :development
133
- prerelease: false
134
- version_requirements: !ruby/object:Gem::Requirement
135
- requirements:
136
- - - ">="
137
- - !ruby/object:Gem::Version
138
- version: '0'
139
125
  - !ruby/object:Gem::Dependency
140
126
  name: pry-byebug
141
127
  requirement: !ruby/object:Gem::Requirement
@@ -293,15 +279,15 @@ homepage: https://github.com/gjtorikian/html-proofer
293
279
  licenses:
294
280
  - MIT
295
281
  metadata: {}
296
- post_install_message:
282
+ post_install_message:
297
283
  rdoc_options: []
298
284
  require_paths:
299
285
  - lib
300
286
  required_ruby_version: !ruby/object:Gem::Requirement
301
287
  requirements:
302
- - - ">="
288
+ - - "~>"
303
289
  - !ruby/object:Gem::Version
304
- version: '0'
290
+ version: '2.4'
305
291
  required_rubygems_version: !ruby/object:Gem::Requirement
306
292
  requirements:
307
293
  - - ">="
@@ -309,7 +295,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
309
295
  version: '0'
310
296
  requirements: []
311
297
  rubygems_version: 3.1.2
312
- signing_key:
298
+ signing_key:
313
299
  specification_version: 4
314
300
  summary: A set of tests to validate your HTML output. These tests check if your image
315
301
  references are legitimate, if they have alt tags, if your internal links are working,