html-proofer 3.18.5 → 3.19.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e90edda0dcb30f00baa440ec7b231bf73e086e0414fe9a1a56870940e2c15aeb
4
- data.tar.gz: 2505df1fb597aaadfa68bf0f7c8b3d9380e27a236c31582d4c91840bf5e65623
3
+ metadata.gz: 2e18991c07a3b31ae78293fc8dc2a9392211f8609ed494ed5cad87158747cde2
4
+ data.tar.gz: d9551668e860b1055a640be9ba597cb9fca8822849a7b000d251086e8eadb023
5
5
  SHA512:
6
- metadata.gz: c5b864da4544723801ade7b906c387f893aed338ca36f9f3ae4a4660fa69f2f60883071cb2030cf526911abe954cf81932691a96c7c8f93e5e6970a18120449a
7
- data.tar.gz: b141806119a645c1b974028f4e4ec3ecf60d74d51d1f5575d8b2fab4a3c7e90ddd8a6f8511474bd64825d49f591630ed8e9be83c1d5676b22b0f73b42ce93e3e
6
+ metadata.gz: 2e9187bcd157c8c69cb8dc7f47e9ef6908e7ccb06b56fd2fd241e8c3cbb245874a9776e5405c9aa5328a01e7038880f173d205efb2c925c7c5787e3c46d3389d
7
+ data.tar.gz: 76afe0c48983344c1ebfb86075458d71b05163fb051ff0fbcbfa2104b324d520bbf7213ff262ec437d3ddeb6265d4454056d66a50d4c68ab02a3c6626a9633e0
data/bin/htmlproofer CHANGED
@@ -48,6 +48,7 @@ Mercenary.program(:htmlproofer) do |p|
48
48
  p.option 'storage_dir', '--storage-dir PATH', String, 'Directory where to store the cache log (default: "tmp/.htmlproofer")'
49
49
  p.option 'timeframe', '--timeframe <time>', String, 'A string representing the caching timeframe.'
50
50
  p.option 'typhoeus_config', '--typhoeus-config CONFIG', String, 'JSON-formatted string of Typhoeus config. Will override the html-proofer defaults.'
51
+ p.option 'hydra_config', '--hydra-config CONFIG', String, 'JSON-formatted string of Hydra config. Will override the html-proofer defaults.'
51
52
  p.option 'url_ignore', '--url-ignore link1,[link2,...]', Array, 'A comma-separated list of Strings or RegExps containing URLs that are safe to ignore. It affects all HTML attributes. Note that non-HTTP(S) URIs are always ignored'
52
53
  p.option 'url_swap', '--url-swap re:string,[re:string,...]', Array, 'A comma-separated list containing key-value pairs of `RegExp => String`. It transforms URLs that match `RegExp` into `String` via `gsub`. The escape sequences `\\:` should be used to produce literal `:`s.'
53
54
  p.option 'root_dir', '--root-dir PATH', String, 'The absolute path to the directory serving your html-files.'
@@ -87,7 +88,8 @@ Mercenary.program(:htmlproofer) do |p|
87
88
  options[:validation][:report_eof_tags] = opts['report_eof_tags'] unless opts['report_eof_tags'].nil?
88
89
  options[:validation][:report_mismatched_tags] = opts['report_mismatched_tags'] unless opts['report_mismatched_tags'].nil?
89
90
 
90
- options[:typhoeus] = HTMLProofer::Configuration.parse_json_option('typhoeus_config', opts['typhoeus_config']) unless opts['typhoeus_config'].nil?
91
+ options[:typhoeus] = HTMLProofer::Configuration.parse_json_option('typhoeus_config', opts['typhoeus_config'], symbolize_names: false) unless opts['typhoeus_config'].nil?
92
+ options[:hydra] = HTMLProofer::Configuration.parse_json_option('hydra_config', opts['hydra_config'], symbolize_names: false) unless opts['hydra_config'].nil?
91
93
 
92
94
  unless opts['timeframe'].nil?
93
95
  options[:cache] ||= {}
@@ -3,6 +3,7 @@
3
3
  require_relative 'utils'
4
4
  require 'date'
5
5
  require 'json'
6
+ require 'uri'
6
7
 
7
8
  module HTMLProofer
8
9
  class Cache
@@ -11,6 +12,8 @@ module HTMLProofer
11
12
  DEFAULT_STORAGE_DIR = File.join('tmp', '.htmlproofer')
12
13
  DEFAULT_CACHE_FILE_NAME = 'cache.log'
13
14
 
15
+ URI_REGEXP = URI::DEFAULT_PARSER.make_regexp
16
+
14
17
  attr_reader :exists, :cache_log, :storage_dir, :cache_file
15
18
 
16
19
  def initialize(logger, options)
@@ -30,6 +33,8 @@ module HTMLProofer
30
33
  end
31
34
 
32
35
  def within_timeframe?(time)
36
+ return false if time.nil?
37
+
33
38
  (@parsed_timeframe..@cache_time).cover?(Time.parse(time))
34
39
  end
35
40
 
@@ -71,10 +76,14 @@ module HTMLProofer
71
76
  @cache_log[clean_url(url)] = data
72
77
  end
73
78
 
74
- def detect_url_changes(found)
75
- existing_urls = @cache_log.keys.map { |url| clean_url(url) }
79
+ def detect_url_changes(found, type)
76
80
  found_urls = found.keys.map { |url| clean_url(url) }
77
81
 
82
+ # if there were no urls, bail
83
+ return {} if found_urls.empty?
84
+
85
+ existing_urls = @cache_log.keys.map { |url| clean_url(url) }
86
+
78
87
  # prepare to add new URLs detected
79
88
  additions = found.reject do |url, _|
80
89
  url = clean_url(url)
@@ -91,24 +100,28 @@ module HTMLProofer
91
100
  @logger.log :info, "Adding #{new_link_text} to the cache..."
92
101
 
93
102
  # remove from cache URLs that no longer exist
94
- del = 0
103
+ deletions = 0
95
104
  @cache_log.delete_if do |url, _|
96
105
  url = clean_url(url)
106
+
97
107
  if found_urls.include?(url)
98
108
  false
99
- else
109
+ elsif url_matches_type?(url, type)
100
110
  @logger.log :debug, "Removing #{url} from cache check"
101
- del += 1
111
+ deletions += 1
102
112
  true
103
113
  end
104
114
  end
105
115
 
106
- del_link_text = pluralize(del, 'link', 'links')
116
+ del_link_text = pluralize(deletions, 'link', 'links')
107
117
  @logger.log :info, "Removing #{del_link_text} from the cache..."
108
118
 
109
119
  additions
110
120
  end
111
121
 
122
+ # TODO: Garbage performance--both the external and internal
123
+ # caches need access to this file. Write a proper versioned
124
+ # schema in the future
112
125
  def write
113
126
  File.write(cache_file, @cache_log.to_json)
114
127
  end
@@ -117,12 +130,15 @@ module HTMLProofer
117
130
  @load.nil?
118
131
  end
119
132
 
120
- def retrieve_urls(urls)
121
- urls_to_check = detect_url_changes(urls)
133
+ def retrieve_urls(urls, type)
134
+ urls_to_check = detect_url_changes(urls, type)
135
+
122
136
  @cache_log.each_pair do |url, cache|
123
137
  next if within_timeframe?(cache['time']) && cache['message'].empty? # these were successes to skip
124
138
 
125
- urls_to_check[url] = cache['filenames'] # recheck expired links
139
+ if url_matches_type?(url, type)
140
+ urls_to_check[url] = cache['filenames'] # recheck expired links
141
+ end
126
142
  end
127
143
  urls_to_check
128
144
  end
@@ -149,9 +165,9 @@ module HTMLProofer
149
165
 
150
166
  @cache_file = File.join(storage_dir, cache_file_name)
151
167
 
152
- return unless File.exist?(cache_file)
168
+ return unless File.exist?(@cache_file)
153
169
 
154
- contents = File.read(cache_file)
170
+ contents = File.read(@cache_file)
155
171
  @cache_log = contents.empty? ? {} : JSON.parse(contents)
156
172
  end
157
173
 
@@ -169,5 +185,10 @@ module HTMLProofer
169
185
  @cache_datetime - Rational(measurement / 24.0)
170
186
  end.to_time
171
187
  end
188
+
189
+ def url_matches_type?(url, type)
190
+ return true if type == :internal && url !~ URI_REGEXP
191
+ return true if type == :external && url =~ URI_REGEXP
192
+ end
172
193
  end
173
194
  end
@@ -59,11 +59,8 @@ class LinkCheck < ::HTMLProofer::Check
59
59
  add_to_external_urls(@link.href || @link.src)
60
60
  next
61
61
  elsif @link.internal?
62
- if @link.exists? || @link.hash
63
- add_to_internal_urls(@link.href, InternalLink.new(@link, @path, line, content))
64
- else
65
- add_issue("internally linking to #{@link.href}, which does not exist", line: line, content: content)
66
- end
62
+ add_to_internal_urls(@link.href, InternalLink.new(@link, @path, line, content))
63
+ add_issue("internally linking to #{@link.href}, which does not exist", line: line, content: content) if !@link.exists? && !@link.hash
67
64
  end
68
65
  end
69
66
 
@@ -67,7 +67,7 @@ module HTMLProofer
67
67
  end
68
68
  end
69
69
 
70
- def self.parse_json_option(option_name, config)
70
+ def self.parse_json_option(option_name, config, symbolize_names: true)
71
71
  raise ArgumentError, 'Must provide an option name in string format.' unless option_name.is_a?(String)
72
72
  raise ArgumentError, 'Must provide an option name in string format.' if option_name.strip.empty?
73
73
 
@@ -78,7 +78,7 @@ module HTMLProofer
78
78
  return {} if config.strip.empty?
79
79
 
80
80
  begin
81
- JSON.parse(config)
81
+ JSON.parse(config, { symbolize_names: symbolize_names })
82
82
  rescue StandardError
83
83
  raise ArgumentError, "Option '#{option_name} did not contain valid JSON."
84
84
  end
@@ -24,7 +24,7 @@ module HTMLProofer
24
24
  raise e
25
25
  end
26
26
 
27
- @aria_hidden = defined?(@aria_hidden) && @aria_hidden == 'true' ? true : false
27
+ @aria_hidden = defined?(@aria_hidden) && @aria_hidden == 'true'
28
28
 
29
29
  @data_proofer_ignore = defined?(@data_proofer_ignore)
30
30
 
@@ -220,6 +220,8 @@ module HTMLProofer
220
220
  end
221
221
 
222
222
  def ignores_pattern_check(links)
223
+ return false unless links.is_a?(Array)
224
+
223
225
  links.each do |ignore|
224
226
  case ignore
225
227
  when String
@@ -22,7 +22,7 @@ module HTMLProofer
22
22
  allow_hash_href: true,
23
23
  check_external_hash: true,
24
24
  check_html: true,
25
- url_ignore: [/.*/], # Don't try to check if local files exist
25
+ url_ignore: [%r{^/}], # Don't try to check if local files exist
26
26
  validation: { report_eof_tags: true }
27
27
  }
28
28
  end
@@ -63,7 +63,9 @@ module HTMLProofer
63
63
  swap(url, @options[:url_swap])
64
64
  end
65
65
  end
66
- @external_urls = Hash[*@src.map { |s| [s, nil] }.flatten]
66
+ @external_urls = @src.each_with_object({}) do |url, hash|
67
+ hash[url] = nil
68
+ end
67
69
  validate_external_urls
68
70
  end
69
71
 
@@ -123,7 +125,7 @@ module HTMLProofer
123
125
  end
124
126
 
125
127
  external_urls = check.external_urls
126
- external_urls = Hash[check.external_urls.map { |url, file| [swap(url, @options[:url_swap]), file] }] if @options[:url_swap]
128
+ external_urls = check.external_urls.map { |url, file| [swap(url, @options[:url_swap]), file] }.to_h if @options[:url_swap]
127
129
  result[:external_urls].merge!(external_urls)
128
130
  result[:failures].concat(check.issues)
129
131
  end
@@ -236,7 +238,7 @@ module HTMLProofer
236
238
  end
237
239
 
238
240
  def load_internal_cache
239
- urls_to_check = @cache.retrieve_urls(@internal_urls)
241
+ urls_to_check = @cache.retrieve_urls(@internal_urls, :internal)
240
242
  cache_text = pluralize(urls_to_check.count, 'internal link', 'internal links')
241
243
  @logger.log :info, "Found #{cache_text} in the cache..."
242
244
 
@@ -26,7 +26,7 @@ module HTMLProofer
26
26
  @external_urls = remove_query_values
27
27
 
28
28
  if @cache.use_cache?
29
- urls_to_check = @cache.retrieve_urls(@external_urls)
29
+ urls_to_check = @cache.retrieve_urls(@external_urls, :external)
30
30
  external_link_checker(urls_to_check)
31
31
  @cache.write
32
32
  else
@@ -85,7 +85,7 @@ module HTMLProofer
85
85
  # for HEAD. If we've decided to check for hashes, we must do a GET--HEAD is
86
86
  # not available as an option.
87
87
  def external_link_checker(external_urls)
88
- external_urls = Hash[external_urls.sort]
88
+ external_urls = external_urls.sort.to_h
89
89
 
90
90
  count = external_urls.length
91
91
  check_text = pluralize(count, 'external link', 'external links')
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HTMLProofer
4
- VERSION = '3.18.5'
4
+ VERSION = '3.19.1'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html-proofer
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.18.5
4
+ version: 3.19.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Garen Torikian
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-01-02 00:00:00.000000000 Z
11
+ date: 2021-04-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: addressable