html-proofer 3.18.6 → 3.19.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e2d6deb85601ff3f5e5df02999f1b55f29231e5658f1d4fa9986c002b3cc0263
4
- data.tar.gz: e20a007144a866b41b97eb36a964ca1372d693ade77acc69a9c9e706b353c8ef
3
+ metadata.gz: d06dbda8bf9baad3be96b5565fcb86de0892d4241c6b7ede08c3c2d7203f6752
4
+ data.tar.gz: da895d696b7b9d1f3ca9c2e504e0c375ce9942f53dd5c2174f80570d32e3ad5b
5
5
  SHA512:
6
- metadata.gz: c51154450eb20c04c0b7038effb12ab8d258457ea5560421b5a2a0739342504afebb8fd092fe4378838a076636ff2665eb691f513b8b34b49af2e89ffba988c2
7
- data.tar.gz: fcb7bf492588fabc991caf1f779fca14a7b553d3ce00dbf55ba24aebf3c39f7613af6a408d94c3a3faaeb21dceb8265ece91ca9bf4320aee7d4f3dca133ba4de
6
+ metadata.gz: 693f677cb91b9b0e79135ef27ed2771d34d7e5fee5bd368b20b5791a4e25a468ba769d299c2cd1417eccd989d9ac1290cfaf9dfd90b9534fa0af0440382e4e0f
7
+ data.tar.gz: c6c33f309e3f8b00dc1721653f2e2099c647c9bd8d3b4ecaab4ec7b97161925911d1e6e3c8cabb30b9b9eb2274fed7238ed338da7d85d25b7c6981d537f6435d
data/bin/htmlproofer CHANGED
@@ -48,6 +48,7 @@ Mercenary.program(:htmlproofer) do |p|
48
48
  p.option 'storage_dir', '--storage-dir PATH', String, 'Directory where to store the cache log (default: "tmp/.htmlproofer")'
49
49
  p.option 'timeframe', '--timeframe <time>', String, 'A string representing the caching timeframe.'
50
50
  p.option 'typhoeus_config', '--typhoeus-config CONFIG', String, 'JSON-formatted string of Typhoeus config. Will override the html-proofer defaults.'
51
+ p.option 'hydra_config', '--hydra-config CONFIG', String, 'JSON-formatted string of Hydra config. Will override the html-proofer defaults.'
51
52
  p.option 'url_ignore', '--url-ignore link1,[link2,...]', Array, 'A comma-separated list of Strings or RegExps containing URLs that are safe to ignore. It affects all HTML attributes. Note that non-HTTP(S) URIs are always ignored'
52
53
  p.option 'url_swap', '--url-swap re:string,[re:string,...]', Array, 'A comma-separated list containing key-value pairs of `RegExp => String`. It transforms URLs that match `RegExp` into `String` via `gsub`. The escape sequences `\\:` should be used to produce literal `:`s.'
53
54
  p.option 'root_dir', '--root-dir PATH', String, 'The absolute path to the directory serving your html-files.'
@@ -87,7 +88,8 @@ Mercenary.program(:htmlproofer) do |p|
87
88
  options[:validation][:report_eof_tags] = opts['report_eof_tags'] unless opts['report_eof_tags'].nil?
88
89
  options[:validation][:report_mismatched_tags] = opts['report_mismatched_tags'] unless opts['report_mismatched_tags'].nil?
89
90
 
90
- options[:typhoeus] = HTMLProofer::Configuration.parse_json_option('typhoeus_config', opts['typhoeus_config']) unless opts['typhoeus_config'].nil?
91
+ options[:typhoeus] = HTMLProofer::Configuration.parse_json_option('typhoeus_config', opts['typhoeus_config'], symbolize_names: false) unless opts['typhoeus_config'].nil?
92
+ options[:hydra] = HTMLProofer::Configuration.parse_json_option('hydra_config', opts['hydra_config']) unless opts['hydra_config'].nil?
91
93
 
92
94
  unless opts['timeframe'].nil?
93
95
  options[:cache] ||= {}
@@ -3,6 +3,7 @@
3
3
  require_relative 'utils'
4
4
  require 'date'
5
5
  require 'json'
6
+ require 'uri'
6
7
 
7
8
  module HTMLProofer
8
9
  class Cache
@@ -11,6 +12,8 @@ module HTMLProofer
11
12
  DEFAULT_STORAGE_DIR = File.join('tmp', '.htmlproofer')
12
13
  DEFAULT_CACHE_FILE_NAME = 'cache.log'
13
14
 
15
+ URI_REGEXP = URI::DEFAULT_PARSER.make_regexp
16
+
14
17
  attr_reader :exists, :cache_log, :storage_dir, :cache_file
15
18
 
16
19
  def initialize(logger, options)
@@ -30,6 +33,8 @@ module HTMLProofer
30
33
  end
31
34
 
32
35
  def within_timeframe?(time)
36
+ return false if time.nil?
37
+
33
38
  (@parsed_timeframe..@cache_time).cover?(Time.parse(time))
34
39
  end
35
40
 
@@ -71,10 +76,14 @@ module HTMLProofer
71
76
  @cache_log[clean_url(url)] = data
72
77
  end
73
78
 
74
- def detect_url_changes(found)
75
- existing_urls = @cache_log.keys.map { |url| clean_url(url) }
79
+ def detect_url_changes(found, type)
76
80
  found_urls = found.keys.map { |url| clean_url(url) }
77
81
 
82
+ # if there were no urls, bail
83
+ return {} if found_urls.empty?
84
+
85
+ existing_urls = @cache_log.keys.map { |url| clean_url(url) }
86
+
78
87
  # prepare to add new URLs detected
79
88
  additions = found.reject do |url, _|
80
89
  url = clean_url(url)
@@ -91,19 +100,20 @@ module HTMLProofer
91
100
  @logger.log :info, "Adding #{new_link_text} to the cache..."
92
101
 
93
102
  # remove from cache URLs that no longer exist
94
- del = 0
103
+ deletions = 0
95
104
  @cache_log.delete_if do |url, _|
96
105
  url = clean_url(url)
106
+
97
107
  if found_urls.include?(url)
98
108
  false
99
- else
109
+ elsif url_matches_type?(url, type)
100
110
  @logger.log :debug, "Removing #{url} from cache check"
101
- del += 1
111
+ deletions += 1
102
112
  true
103
113
  end
104
114
  end
105
115
 
106
- del_link_text = pluralize(del, 'link', 'links')
116
+ del_link_text = pluralize(deletions, 'link', 'links')
107
117
  @logger.log :info, "Removing #{del_link_text} from the cache..."
108
118
 
109
119
  additions
@@ -113,21 +123,22 @@ module HTMLProofer
113
123
  # caches need access to this file. Write a proper versioned
114
124
  # schema in the future
115
125
  def write
116
- file = {}
117
- file = JSON.parse(File.read(cache_file)) if File.exist?(cache_file)
118
- File.write(cache_file, file.merge(@cache_log).to_json)
126
+ File.write(cache_file, @cache_log.to_json)
119
127
  end
120
128
 
121
129
  def load?
122
130
  @load.nil?
123
131
  end
124
132
 
125
- def retrieve_urls(urls)
126
- urls_to_check = detect_url_changes(urls)
133
+ def retrieve_urls(urls, type)
134
+ urls_to_check = detect_url_changes(urls, type)
135
+
127
136
  @cache_log.each_pair do |url, cache|
128
137
  next if within_timeframe?(cache['time']) && cache['message'].empty? # these were successes to skip
129
138
 
130
- urls_to_check[url] = cache['filenames'] # recheck expired links
139
+ if url_matches_type?(url, type)
140
+ urls_to_check[url] = cache['filenames'] # recheck expired links
141
+ end
131
142
  end
132
143
  urls_to_check
133
144
  end
@@ -154,9 +165,9 @@ module HTMLProofer
154
165
 
155
166
  @cache_file = File.join(storage_dir, cache_file_name)
156
167
 
157
- return unless File.exist?(cache_file)
168
+ return unless File.exist?(@cache_file)
158
169
 
159
- contents = File.read(cache_file)
170
+ contents = File.read(@cache_file)
160
171
  @cache_log = contents.empty? ? {} : JSON.parse(contents)
161
172
  end
162
173
 
@@ -174,5 +185,10 @@ module HTMLProofer
174
185
  @cache_datetime - Rational(measurement / 24.0)
175
186
  end.to_time
176
187
  end
188
+
189
+ def url_matches_type?(url, type)
190
+ return true if type == :internal && url !~ URI_REGEXP
191
+ return true if type == :external && url =~ URI_REGEXP
192
+ end
177
193
  end
178
194
  end
@@ -67,7 +67,7 @@ module HTMLProofer
67
67
  end
68
68
  end
69
69
 
70
- def self.parse_json_option(option_name, config)
70
+ def self.parse_json_option(option_name, config, symbolize_names: true)
71
71
  raise ArgumentError, 'Must provide an option name in string format.' unless option_name.is_a?(String)
72
72
  raise ArgumentError, 'Must provide an option name in string format.' if option_name.strip.empty?
73
73
 
@@ -78,7 +78,7 @@ module HTMLProofer
78
78
  return {} if config.strip.empty?
79
79
 
80
80
  begin
81
- JSON.parse(config)
81
+ JSON.parse(config, { symbolize_names: symbolize_names })
82
82
  rescue StandardError
83
83
  raise ArgumentError, "Option '#{option_name} did not contain valid JSON."
84
84
  end
@@ -125,7 +125,7 @@ module HTMLProofer
125
125
  end
126
126
 
127
127
  external_urls = check.external_urls
128
- external_urls = check.external_urls.map { |url, file| [swap(url, @options[:url_swap]), file] }.to_h if @options[:url_swap]
128
+ external_urls = check.external_urls.transform_keys { |url| swap(url, @options[:url_swap]) } if @options[:url_swap]
129
129
  result[:external_urls].merge!(external_urls)
130
130
  result[:failures].concat(check.issues)
131
131
  end
@@ -238,7 +238,7 @@ module HTMLProofer
238
238
  end
239
239
 
240
240
  def load_internal_cache
241
- urls_to_check = @cache.retrieve_urls(@internal_urls)
241
+ urls_to_check = @cache.retrieve_urls(@internal_urls, :internal)
242
242
  cache_text = pluralize(urls_to_check.count, 'internal link', 'internal links')
243
243
  @logger.log :info, "Found #{cache_text} in the cache..."
244
244
 
@@ -26,7 +26,7 @@ module HTMLProofer
26
26
  @external_urls = remove_query_values
27
27
 
28
28
  if @cache.use_cache?
29
- urls_to_check = @cache.retrieve_urls(@external_urls)
29
+ urls_to_check = @cache.retrieve_urls(@external_urls, :external)
30
30
  external_link_checker(urls_to_check)
31
31
  @cache.write
32
32
  else
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HTMLProofer
4
- VERSION = '3.18.6'
4
+ VERSION = '3.19.2'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html-proofer
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.18.6
4
+ version: 3.19.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Garen Torikian
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-02-21 00:00:00.000000000 Z
11
+ date: 2021-06-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: addressable
@@ -287,7 +287,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
287
287
  requirements:
288
288
  - - ">="
289
289
  - !ruby/object:Gem::Version
290
- version: 2.4.10
290
+ version: 2.6.0
291
291
  - - "<"
292
292
  - !ruby/object:Gem::Version
293
293
  version: '4.0'