html-proofer 4.0.0.rc3 → 4.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1c88c30b96819085add5b171ae6c2b61ed24637d4acf25a0258b19fed2e6aa6b
4
- data.tar.gz: 14d8383987c15edcb2868f35e7d6a56369cadd296bf0e31d5e0ed5d3ccd0f6d3
3
+ metadata.gz: b6e8293984dde05cd406326b21fa93a638b0a972efdab95f59adfe606e8be538
4
+ data.tar.gz: b9323e2c6d7cf91a08f45a7b9ecb77b826c02b2ad2b6bf74e6951701b41c3bdb
5
5
  SHA512:
6
- metadata.gz: 3b2f276abff0c540bb08fc26d6267bacefc8e1a9bf2f5e2aa1dfb3294398b9a63fbcfe906baa24b013dd9bf19052f826ce5d47bfce53572381aba2e948b8f918
7
- data.tar.gz: fff8dcd929eed104ab69543f49e7cba4005165f909505d82bd857d1791545593d61aa988a2ef9d91c9887b2a2c57aab8bffeb3e474f8b01521827a6abbf8d475
6
+ metadata.gz: 6d962362e0abc03b2deb15da0b06f2e42857756b045db568c959263346957b628dee73b719de2ed9f24a03762e2ae97e2ff208ea8f216c6d0323bca77864d43d
7
+ data.tar.gz: 2bb9ca1d81e1a088344c8df0c513d2c0e96187cf23b7895def81fba4ac1820f56a0099a4dd2caa861df6ecd75afd4388e4a5b37e18308c4641300ab07f704d15
data/bin/htmlproofer CHANGED
@@ -26,10 +26,11 @@ Mercenary.program(:htmlproofer) do |p|
26
26
  p.option 'disable_external', '--disable-external', 'If `true`, does not run the external link checker (default: `false`)'
27
27
  p.option 'enforce_https', '--enforce-https', 'Fails a link if it\'s not marked as `https` (default: `true`).'
28
28
  p.option 'extensions', '--extensions ext1,[ext2,...[', Array, 'A comma-separated list of Strings indicating the file extensions you would like to check (including the dot) (default: `.html`)'
29
+ p.option 'ignore_empty_alt', '--ignore-empty-alt', ' If `true`, ignores images with empty/missing alt tags (in other words, `<img alt>` and `<img alt="">` are valid; set this to `false` to flag those)'
29
30
  p.option 'ignore_files', '--ignore-files file1,[file2,...]', Array, 'A comma-separated list of Strings or RegExps containing file paths that are safe to ignore'
30
31
  p.option 'ignore_empty_mailto', '--ignore-empty-mailto', 'If `true`, allows `mailto:` `href`s which do not contain an email address'
31
- p.option 'ignore_missing_alt', '--empty-alt-ignore', 'If `true`, ignores images with empty/missing alt tags'
32
- p.option 'ignore_status_codes', '--http-status-ignore 123,[xxx, ...]', Array, 'A comma-separated list of numbers representing status codes to ignore.'
32
+ p.option 'ignore_missing_alt', '--ignore-missing-alt', 'If `true`, ignores images with missing alt tags'
33
+ p.option 'ignore_status_codes', '--ignore-status-codes 123,[xxx, ...]', Array, 'A comma-separated list of numbers representing status codes to ignore.'
33
34
  p.option 'ignore_urls', '--ignore-urls link1,[link2,...]', Array, 'A comma-separated list of Strings or RegExps containing URLs that are safe to ignore. This affects all HTML attributes, such as `alt` tags on images.'
34
35
  p.option 'log_level', '--log-level <level>', String, 'Sets the logging level, as determined by Yell. One of `:debug`, `:info`, `:warn`, `:error`, or `:fatal`. (default: `:info`)'
35
36
  p.option 'only_4xx', '--only-4xx', 'Only reports errors for links that fall within the 4xx status code range'
data/lib/html-proofer.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative 'html_proofer'
3
+ require_relative "html_proofer"
@@ -1,231 +1,237 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- class HTMLProofer::Attribute::Url < HTMLProofer::Attribute
4
- attr_reader :url
3
+ module HTMLProofer
4
+ class Attribute
5
+ class Url < HTMLProofer::Attribute
6
+ attr_reader :url
5
7
 
6
- REMOTE_SCHEMES = %w[http https].freeze
8
+ REMOTE_SCHEMES = ["http", "https"].freeze
7
9
 
8
- def initialize(runner, link_attribute, base_url: nil)
9
- super
10
+ def initialize(runner, link_attribute, base_url: nil)
11
+ super
10
12
 
11
- if @raw_attribute.nil?
12
- @url = nil
13
- else
14
- @url = @raw_attribute.delete("\u200b").strip
15
- @url = Addressable::URI.join(base_url, @url).to_s unless blank?(base_url)
13
+ if @raw_attribute.nil?
14
+ @url = nil
15
+ else
16
+ @url = @raw_attribute.delete("\u200b").strip
17
+ @url = Addressable::URI.join(base_url, @url).to_s unless blank?(base_url)
16
18
 
17
- swap_urls!
18
- clean_url!
19
+ swap_urls!
20
+ clean_url!
19
21
 
20
- # convert "//" links to "https://"
21
- @url.start_with?('//') ? @url = "https:#{@url}" : @url
22
- end
23
- end
22
+ # convert "//" links to "https://"
23
+ @url.start_with?("//") ? @url = "https:#{@url}" : @url
24
+ end
25
+ end
24
26
 
25
- def to_s
26
- @url
27
- end
27
+ def to_s
28
+ @url
29
+ end
28
30
 
29
- def known_extension?
30
- return true if hash_link?
31
+ def known_extension?
32
+ return true if hash_link?
31
33
 
32
- ext = File.extname(path)
34
+ ext = File.extname(path)
33
35
 
34
- # no extension means we use the assumed one
35
- return @runner.options[:extensions].include?(@runner.options[:assume_extension]) if blank?(ext)
36
+ # no extension means we use the assumed one
37
+ return @runner.options[:extensions].include?(@runner.options[:assume_extension]) if blank?(ext)
36
38
 
37
- @runner.options[:extensions].include?(ext)
38
- end
39
+ @runner.options[:extensions].include?(ext)
40
+ end
39
41
 
40
- def unknown_extension?
41
- !known_extension?
42
- end
42
+ def unknown_extension?
43
+ !known_extension?
44
+ end
43
45
 
44
- def ignore?
45
- return true if (/^javascript:/).match?(@url)
46
- return true if ignores_pattern?(@runner.options[:ignore_urls])
47
- end
46
+ def ignore?
47
+ return true if /^javascript:/.match?(@url)
48
+ return true if ignores_pattern?(@runner.options[:ignore_urls])
49
+ end
48
50
 
49
- def valid?
50
- !parts.nil?
51
- end
51
+ def valid?
52
+ !parts.nil?
53
+ end
52
54
 
53
- def path?
54
- !parts.host.nil? && !parts.path.nil?
55
- end
55
+ def path?
56
+ !parts.host.nil? && !parts.path.nil?
57
+ end
56
58
 
57
- def parts
58
- @parts ||= Addressable::URI.parse @url
59
- rescue URI::Error, Addressable::URI::InvalidURIError
60
- @parts = nil
61
- end
59
+ def parts
60
+ @parts ||= Addressable::URI.parse(@url)
61
+ rescue URI::Error, Addressable::URI::InvalidURIError
62
+ @parts = nil
63
+ end
62
64
 
63
- def path
64
- Addressable::URI.unencode parts.path unless parts.nil?
65
- end
65
+ def path
66
+ Addressable::URI.unencode(parts.path) unless parts.nil?
67
+ end
66
68
 
67
- def hash
68
- parts&.fragment
69
- end
69
+ def hash
70
+ parts&.fragment
71
+ end
70
72
 
71
- # Does the URL have a hash?
72
- def hash?
73
- !blank?(hash)
74
- end
73
+ # Does the URL have a hash?
74
+ def hash?
75
+ !blank?(hash)
76
+ end
75
77
 
76
- def scheme
77
- parts&.scheme
78
- end
78
+ def scheme
79
+ parts&.scheme
80
+ end
79
81
 
80
- def remote?
81
- REMOTE_SCHEMES.include?(scheme)
82
- end
82
+ def remote?
83
+ REMOTE_SCHEMES.include?(scheme)
84
+ end
83
85
 
84
- def http?
85
- scheme == 'http'
86
- end
86
+ def http?
87
+ scheme == "http"
88
+ end
87
89
 
88
- def https?
89
- scheme == 'https'
90
- end
90
+ def https?
91
+ scheme == "https"
92
+ end
91
93
 
92
- def non_http_remote?
93
- !scheme.nil? && !remote?
94
- end
94
+ def non_http_remote?
95
+ !scheme.nil? && !remote?
96
+ end
95
97
 
96
- def host
97
- parts&.host
98
- end
98
+ def host
99
+ parts&.host
100
+ end
99
101
 
100
- def domain_path
101
- (host || '') + path
102
- end
102
+ def domain_path
103
+ (host || "") + path
104
+ end
103
105
 
104
- def query_values
105
- parts&.query_values
106
- end
106
+ def query_values
107
+ parts&.query_values
108
+ end
107
109
 
108
- # checks if a file exists relative to the current pwd
109
- def exists?
110
- return true if base64?
110
+ # checks if a file exists relative to the current pwd
111
+ def exists?
112
+ return true if base64?
111
113
 
112
- return @runner.checked_paths[absolute_path] if @runner.checked_paths.key?(absolute_path)
114
+ return @runner.checked_paths[absolute_path] if @runner.checked_paths.key?(absolute_path)
113
115
 
114
- @runner.checked_paths[absolute_path] = File.exist?(absolute_path)
115
- end
116
+ @runner.checked_paths[absolute_path] = File.exist?(absolute_path)
117
+ end
116
118
 
117
- def base64?
118
- /^data:image/.match?(@raw_attribute)
119
- end
119
+ def base64?
120
+ /^data:image/.match?(@raw_attribute)
121
+ end
120
122
 
121
- def absolute_path
122
- path = file_path || @runner.current_path
123
+ def absolute_path
124
+ path = file_path || @runner.current_filename
123
125
 
124
- File.expand_path(path, Dir.pwd)
125
- end
126
+ File.expand_path(path, Dir.pwd)
127
+ end
126
128
 
127
- def file_path
128
- return if path.nil? || path.empty?
129
-
130
- path_dot_ext = ''
131
-
132
- path_dot_ext = path + @runner.options[:assume_extension] unless blank?(@runner.options[:assume_extension])
133
-
134
- base = if absolute_path?(path) # path relative to root
135
- # either overwrite with root_dir; or, if source is directory, use that; or, just get the current file's dirname
136
- @runner.options[:root_dir] || (File.directory?(@runner.current_source) ? @runner.current_source : File.dirname(@runner.current_source))
137
- # relative links, path is a file
138
- elsif File.exist?(File.expand_path(path, @runner.current_source)) || File.exist?(File.expand_path(path_dot_ext, @runner.current_source))
139
- File.dirname(@runner.current_path)
140
- # relative links in nested dir, path is a file
141
- elsif File.exist?(File.join(File.dirname(@runner.current_path), path)) || File.exist?(File.join(File.dirname(@runner.current_path), path_dot_ext)) # rubocop:disable Lint/DuplicateBranch
142
- File.dirname(@runner.current_path)
143
- # relative link, path is a directory
144
- else
145
- @runner.current_path
146
- end
147
-
148
- file = File.join(base, path)
149
-
150
- if @runner.options[:assume_extension] && File.file?("#{file}#{@runner.options[:assume_extension]}")
151
- file = "#{file}#{@runner.options[:assume_extension]}"
152
- elsif File.directory?(file) && !unslashed_directory?(file) # implicit index support
153
- file = File.join file, @runner.options[:directory_index_file]
154
- end
129
+ def file_path
130
+ return if path.nil? || path.empty?
131
+
132
+ path_dot_ext = ""
133
+
134
+ path_dot_ext = path + @runner.options[:assume_extension] unless blank?(@runner.options[:assume_extension])
135
+
136
+ base = if absolute_path?(path) # path relative to root
137
+ # either overwrite with root_dir; or, if source is directory, use that; or, just get the current file's dirname
138
+ @runner.options[:root_dir] || (File.directory?(@runner.current_source) ? @runner.current_source : File.dirname(@runner.current_source))
139
+ # relative links, path is a file
140
+ elsif File.exist?(File.expand_path(path,
141
+ @runner.current_source)) || File.exist?(File.expand_path(path_dot_ext, @runner.current_source))
142
+ File.dirname(@runner.current_filename)
143
+ # relative links in nested dir, path is a file
144
+ elsif File.exist?(File.join(File.dirname(@runner.current_filename),
145
+ path)) || File.exist?(File.join(File.dirname(@runner.current_filename), path_dot_ext))
146
+ File.dirname(@runner.current_filename)
147
+ # relative link, path is a directory
148
+ else
149
+ @runner.current_filename
150
+ end
151
+
152
+ file = File.join(base, path)
153
+
154
+ if @runner.options[:assume_extension] && File.file?("#{file}#{@runner.options[:assume_extension]}")
155
+ file = "#{file}#{@runner.options[:assume_extension]}"
156
+ elsif File.directory?(file) && !unslashed_directory?(file) # implicit index support
157
+ file = File.join(file, @runner.options[:directory_index_file])
158
+ end
159
+
160
+ file
161
+ end
155
162
 
156
- file
157
- end
163
+ def unslashed_directory?(file)
164
+ File.directory?(file) && !file.end_with?(File::SEPARATOR)
165
+ end
158
166
 
159
- def unslashed_directory?(file)
160
- File.directory?(file) && !file.end_with?(File::SEPARATOR)
161
- end
167
+ def absolute_path?(path)
168
+ path.start_with?("/")
169
+ end
162
170
 
163
- def absolute_path?(path)
164
- path.start_with?('/')
165
- end
171
+ # path is external to the file
172
+ def external?
173
+ !internal?
174
+ end
166
175
 
167
- # path is external to the file
168
- def external?
169
- !internal?
170
- end
176
+ def internal?
177
+ relative_link? || internal_absolute_link? || hash_link?
178
+ end
171
179
 
172
- def internal?
173
- relative_link? || internal_absolute_link? || hash_link?
174
- end
180
+ def internal_absolute_link?
181
+ url.start_with?("/")
182
+ end
175
183
 
176
- def internal_absolute_link?
177
- url.start_with?('/')
178
- end
184
+ def relative_link?
185
+ return false if remote?
179
186
 
180
- def relative_link?
181
- return false if remote?
187
+ hash_link? || param_link? || url.start_with?(".") || url =~ /^\S/
188
+ end
182
189
 
183
- hash_link? || param_link? || url.start_with?('.') || url =~ /^\S/
184
- end
190
+ def link_points_to_same_page?
191
+ hash_link || param_link
192
+ end
185
193
 
186
- def link_points_to_same_page?
187
- hash_link || param_link
188
- end
194
+ def hash_link?
195
+ url.start_with?("#")
196
+ end
189
197
 
190
- def hash_link?
191
- url.start_with?('#')
192
- end
198
+ def param_link?
199
+ url.start_with?("?")
200
+ end
193
201
 
194
- def param_link?
195
- url.start_with?('?')
196
- end
202
+ def sans_hash
203
+ @url.to_s.sub(/##{hash}/, "")
204
+ end
197
205
 
198
- def sans_hash
199
- @url.to_s.sub(/##{hash}/, '')
200
- end
206
+ # catch any obvious issues, like strings in port numbers
207
+ private def clean_url!
208
+ return if @url =~ /^([!#{Regexp.last_match(0)}-;=?-\[\]_a-z~]|%[0-9a-fA-F]{2})+$/
201
209
 
202
- # catch any obvious issues, like strings in port numbers
203
- private def clean_url!
204
- return if @url =~ /^([!#{Regexp.last_match(0)}-;=?-\[\]_a-z~]|%[0-9a-fA-F]{2})+$/
210
+ @url = Addressable::URI.parse(@url).normalize.to_s
211
+ end
205
212
 
206
- @url = Addressable::URI.parse(@url).normalize.to_s
207
- end
213
+ private def swap_urls!
214
+ return @url if blank?(replacements = @runner.options[:swap_urls])
208
215
 
209
- private def swap_urls!
210
- return @url if blank?(replacements = @runner.options[:swap_urls])
216
+ replacements.each do |link, replace|
217
+ @url = @url.gsub(link, replace)
218
+ end
219
+ end
211
220
 
212
- replacements.each do |link, replace|
213
- @url = @url.gsub(link, replace)
214
- end
215
- end
221
+ private def ignores_pattern?(links_to_ignore)
222
+ return false unless links_to_ignore.is_a?(Array)
216
223
 
217
- private def ignores_pattern?(links_to_ignore)
218
- return false unless links_to_ignore.is_a?(Array)
224
+ links_to_ignore.each do |link_to_ignore|
225
+ case link_to_ignore
226
+ when String
227
+ return true if link_to_ignore == @raw_attribute
228
+ when Regexp
229
+ return true if link_to_ignore&.match?(@raw_attribute)
230
+ end
231
+ end
219
232
 
220
- links_to_ignore.each do |link_to_ignore|
221
- case link_to_ignore
222
- when String
223
- return true if link_to_ignore == @raw_attribute
224
- when Regexp
225
- return true if link_to_ignore&.match?(@raw_attribute)
233
+ false
226
234
  end
227
235
  end
228
-
229
- false
230
236
  end
231
237
  end