html-proofer 3.19.4 → 4.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/htmlproofer +44 -59
- data/lib/html-proofer.rb +1 -54
- data/lib/html_proofer/attribute/url.rb +251 -0
- data/lib/html_proofer/attribute.rb +15 -0
- data/lib/html_proofer/cache.rb +292 -0
- data/lib/html_proofer/check/favicon.rb +43 -0
- data/lib/html_proofer/check/images.rb +99 -0
- data/lib/html_proofer/check/links.rb +135 -0
- data/lib/html_proofer/check/open_graph.rb +42 -0
- data/lib/html_proofer/check/scripts.rb +49 -0
- data/lib/html_proofer/check.rb +94 -0
- data/lib/html_proofer/configuration.rb +91 -0
- data/lib/html_proofer/element.rb +144 -0
- data/lib/html_proofer/failure.rb +17 -0
- data/lib/{html-proofer → html_proofer}/log.rb +19 -19
- data/lib/html_proofer/reporter/cli.rb +33 -0
- data/lib/html_proofer/reporter.rb +23 -0
- data/lib/html_proofer/runner.rb +244 -0
- data/lib/html_proofer/url_validator/external.rb +193 -0
- data/lib/html_proofer/url_validator/internal.rb +97 -0
- data/lib/html_proofer/url_validator.rb +16 -0
- data/lib/{html-proofer → html_proofer}/utils.rb +9 -12
- data/lib/{html-proofer → html_proofer}/version.rb +1 -1
- data/lib/html_proofer/xpath_functions.rb +10 -0
- data/lib/html_proofer.rb +59 -0
- metadata +42 -22
- data/lib/html-proofer/cache.rb +0 -194
- data/lib/html-proofer/check/favicon.rb +0 -29
- data/lib/html-proofer/check/html.rb +0 -37
- data/lib/html-proofer/check/images.rb +0 -48
- data/lib/html-proofer/check/links.rb +0 -182
- data/lib/html-proofer/check/opengraph.rb +0 -46
- data/lib/html-proofer/check/scripts.rb +0 -42
- data/lib/html-proofer/check.rb +0 -75
- data/lib/html-proofer/configuration.rb +0 -88
- data/lib/html-proofer/element.rb +0 -265
- data/lib/html-proofer/issue.rb +0 -65
- data/lib/html-proofer/middleware.rb +0 -82
- data/lib/html-proofer/runner.rb +0 -249
- data/lib/html-proofer/url_validator.rb +0 -237
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 98bde6dd5e32f42e5983fd1aaa7bf02d3359b40f2b4a8b5360a5826abf2cc674
|
|
4
|
+
data.tar.gz: 9b7d57fd18e625ab7cc91c46faceac510b05ba5290f665fb5dd333b92446b00b
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: a666be806bbb70028488b3ef89f1325d7b5faaec9f48e105220db2d2f7dd022c12dabac828949ffd2100bdc2b4e3bd4c1f9ce9df48bab17b9106d9453eb49f01
|
|
7
|
+
data.tar.gz: 8a58bb5b120ab3cf3eb0e692f3f52f4385bd5923d3a7c91e95dddd17d8b0e25d20dd207872099665ac37737afb6b4ed86d04d39bc7793c6af2abcd35390d2051
|
data/bin/htmlproofer
CHANGED
|
@@ -15,44 +15,34 @@ Mercenary.program(:htmlproofer) do |p|
|
|
|
15
15
|
|
|
16
16
|
p.description 'Runs the HTML-Proofer suite on the files in PATH. For more details, see the README.'
|
|
17
17
|
|
|
18
|
-
p.option '
|
|
19
|
-
p.option '
|
|
18
|
+
p.option 'allow_hash_href', '--allow-hash-href=<true|false>', 'String', 'If `true`, assumes `href="#"` anchors are valid (default: `true`)'
|
|
19
|
+
p.option 'allow_missing_href', '--allow-missing-href=<true|false>', 'String', 'If `true`, does not flag `a` tags missing `href`. In HTML5, this is technically allowed, but could also be human error. (default: `false`)'
|
|
20
20
|
p.option 'as_links', '--as-links', 'Assumes that `PATH` is a comma-separated array of links to check.'
|
|
21
|
-
p.option '
|
|
22
|
-
p.option '
|
|
23
|
-
p.option '
|
|
24
|
-
p.option '
|
|
25
|
-
p.option '
|
|
26
|
-
p.option 'check_html', '--check-html', 'Enables HTML validation errors from Nokogumbo (default: `false`).'
|
|
27
|
-
p.option 'check_img_http', '--check-img-http', 'Fails an image if it\'s marked as `http` (default: `false`).'
|
|
28
|
-
p.option 'check_opengraph', '--check-opengraph', 'Enables the Open Graph checker (default: `false`).'
|
|
29
|
-
p.option 'check_sri', '--check-sri', 'Check that `<link>` and `<script>` external resources use SRI (default: `false`).'
|
|
21
|
+
p.option 'assume_extension', '--assume-extension <ext>', 'Automatically add specified extension to files for internal links, to allow extensionless URLs (as supported by most servers) (default: `.html`).'
|
|
22
|
+
p.option 'checks', '--checks check1,[check2,...]', Array, 'A comma-separated list of Strings indicating which checks you want to run (default: `["Links", "Images", "Scripts"]`)'
|
|
23
|
+
p.option 'check_external_hash', '--check-external-hash=<true|false>', 'String', 'Checks whether external hashes exist (even if the webpage exists) (default: `true`).'
|
|
24
|
+
p.option 'check_internal_hash', '--check-internal-hash=<true|false>', 'String', 'Checks whether internal hashes exist (even if the webpage exists) (default: `true`).'
|
|
25
|
+
p.option 'check_sri', '--check-sri=<true|false>', 'String', 'Check that `<link>` and `<script>` external resources use SRI (default: `false`).'
|
|
30
26
|
p.option 'directory_index_file', '--directory-index-file <filename>', String, 'Sets the file to look for when a link refers to a directory. (default: `index.html`)'
|
|
31
|
-
p.option 'disable_external', '--disable-external', 'If `true`, does not run the external link checker
|
|
32
|
-
p.option '
|
|
33
|
-
p.option '
|
|
34
|
-
p.option '
|
|
35
|
-
p.option '
|
|
36
|
-
p.option '
|
|
37
|
-
p.option '
|
|
38
|
-
p.option '
|
|
39
|
-
p.option '
|
|
40
|
-
p.option 'ignore_empty_mailto', '--ignore-empty-mailto', 'If `true`, allows `mailto:` `href`s which do not contain an email address'
|
|
41
|
-
p.option 'report_invalid_tags', '--report-invalid-tags', 'When `check_html` is enabled, HTML markup that is unknown to Nokogumbo are reported as errors (default: `false`)'
|
|
42
|
-
p.option 'report_missing_names', '--report-missing-names', 'When `check_html` is enabled, HTML markup that are missing entity names are reported as errors (default: `false`)'
|
|
43
|
-
p.option 'report_script_embeds', '--report-script-embeds', 'When `check_html` is enabled, `script` tags containing markup are reported as errors (default: `false`)'
|
|
44
|
-
p.option 'report_missing_doctype', '--report-missing-doctype', 'When `check_html` is enabled, HTML markup with missing or out-of-order `DOCTYPE` are reported as errors (default: `false`)'
|
|
45
|
-
p.option 'report_eof_tags', '--report-eof-tags', 'When `check_html` is enabled, HTML markup with tags that are malformed are reported as errors (default: `false`)'
|
|
46
|
-
p.option 'report_mismatched_tags', '--report-mismatched-tags', 'When `check_html` is enabled, HTML markup with mismatched tags are reported as errors (default: `false`)'
|
|
27
|
+
p.option 'disable_external', '--disable-external=<true|false>', String, 'If `true`, does not run the external link checker (default: `false`)'
|
|
28
|
+
p.option 'enforce_https', '--enforce-https=<true|false>', String, 'Fails a link if it\'s not marked as `https` (default: `true`).'
|
|
29
|
+
p.option 'extensions', '--extensions ext1,[ext2,...[', Array, 'A comma-separated list of Strings indicating the file extensions you would like to check (including the dot) (default: `.html`)'
|
|
30
|
+
p.option 'ignore_empty_alt', '--ignore-empty-alt=<true|false>', 'String', 'If `true`, ignores images with empty/missing alt tags (in other words, `<img alt>` and `<img alt="">` are valid; set this to `false` to flag those) (default: `true`)'
|
|
31
|
+
p.option 'ignore_empty_mailto', '--ignore-empty-mailto=<true|false>', 'String', 'If `true`, allows `mailto:` `href`s which do not contain an email address (default: `false`)'
|
|
32
|
+
p.option 'ignore_files', '--ignore-files file1,[file2,...]', Array, 'A comma-separated list of Strings or RegExps containing file paths that are safe to ignore'
|
|
33
|
+
p.option 'ignore_missing_alt', '--ignore-missing-alt=<true|false>', 'String', 'If `true`, ignores images with missing alt tags (default: `false`)'
|
|
34
|
+
p.option 'ignore_status_codes', '--ignore-status-codes 123,[xxx, ...]', Array, 'A comma-separated list of numbers representing status codes to ignore.'
|
|
35
|
+
p.option 'ignore_urls', '--ignore-urls link1,[link2,...]', Array, 'A comma-separated list of Strings or RegExps containing URLs that are safe to ignore. This affects all HTML attributes, such as `alt` tags on images.'
|
|
47
36
|
p.option 'log_level', '--log-level <level>', String, 'Sets the logging level, as determined by Yell. One of `:debug`, `:info`, `:warn`, `:error`, or `:fatal`. (default: `:info`)'
|
|
48
37
|
p.option 'only_4xx', '--only-4xx', 'Only reports errors for links that fall within the 4xx status code range'
|
|
49
|
-
p.option 'storage_dir', '--storage-dir PATH', String, 'Directory where to store the cache log (default: "tmp/.htmlproofer")'
|
|
50
|
-
p.option 'timeframe', '--timeframe <time>', String, 'A string representing the caching timeframe.'
|
|
51
|
-
p.option 'typhoeus_config', '--typhoeus-config CONFIG', String, 'JSON-formatted string of Typhoeus config. Will override the html-proofer defaults.'
|
|
52
|
-
p.option 'hydra_config', '--hydra-config CONFIG', String, 'JSON-formatted string of Hydra config. Will override the html-proofer defaults.'
|
|
53
|
-
p.option 'url_ignore', '--url-ignore link1,[link2,...]', Array, 'A comma-separated list of Strings or RegExps containing URLs that are safe to ignore. It affects all HTML attributes. Note that non-HTTP(S) URIs are always ignored'
|
|
54
|
-
p.option 'url_swap', '--url-swap re:string,[re:string,...]', Array, 'A comma-separated list containing key-value pairs of `RegExp => String`. It transforms URLs that match `RegExp` into `String` via `gsub`. The escape sequences `\\:` should be used to produce literal `:`s.'
|
|
55
38
|
p.option 'root_dir', '--root-dir PATH', String, 'The absolute path to the directory serving your html-files.'
|
|
39
|
+
p.option 'swap_attributes', '--swap-attributes CONFIG', String, 'JSON-formatted config that maps element names to the preferred attribute to check (default: `{}`).'
|
|
40
|
+
p.option 'swap_urls', '--swap-urls re:string,[re:string,...]', Array, 'A comma-separated list containing key-value pairs of `RegExp => String`. It transforms URLs that match `RegExp` into `String` via `gsub`. The escape sequences `\\:` should be used to produce literal `:`s.'
|
|
41
|
+
|
|
42
|
+
p.option 'typhoeus', '--typhoeus CONFIG', String, 'JSON-formatted string of Typhoeus config. Will override the html-proofer defaults.'
|
|
43
|
+
p.option 'hydra', '--hydra CONFIG', String, 'JSON-formatted string of Hydra config. Will override the html-proofer defaults.'
|
|
44
|
+
p.option 'parallel', '--parallel CONFIG', String, 'JSON-formatted string of Parallel config. Will override the html-proofer defaults.'
|
|
45
|
+
p.option 'cache', '--cache CONFIG', String, 'JSON-formatted string of cache config. Will override the html-proofer defaults.'
|
|
56
46
|
|
|
57
47
|
p.action do |args, opts|
|
|
58
48
|
args = ['.'] if args.empty?
|
|
@@ -67,46 +57,41 @@ Mercenary.program(:htmlproofer) do |p|
|
|
|
67
57
|
end
|
|
68
58
|
|
|
69
59
|
# some minor manipulation of a special option
|
|
70
|
-
unless opts['
|
|
71
|
-
options[:
|
|
72
|
-
opts['
|
|
60
|
+
unless opts['swap_urls'].nil?
|
|
61
|
+
options[:swap_urls] = {}
|
|
62
|
+
opts['swap_urls'].each do |s|
|
|
73
63
|
splt = s.split(/(?<!\\):/, 2)
|
|
74
64
|
|
|
75
65
|
re = splt[0].gsub(/\\:/, ':')
|
|
76
66
|
string = splt[1].gsub(/\\:/, ':')
|
|
77
|
-
options[:
|
|
67
|
+
options[:swap_urls][Regexp.new(re)] = string
|
|
78
68
|
end
|
|
79
69
|
end
|
|
80
70
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
options[:validation][:report_mismatched_tags] = opts['report_mismatched_tags'] unless opts['report_mismatched_tags'].nil?
|
|
71
|
+
# check booleans
|
|
72
|
+
[:allow_hash_href, :allow_missing_href, :check_external_hash, :check_internal_hash, :check_sri, :disable_external, :enforce_https, :ignore_empty_alt, :ignore_empty_mailto, :ignore_missing_alt].each do |option|
|
|
73
|
+
next if (val = opts[option.to_s]).nil?
|
|
74
|
+
if val == "false"
|
|
75
|
+
options[option] = false
|
|
76
|
+
else
|
|
77
|
+
options[option] = true
|
|
78
|
+
end
|
|
79
|
+
end
|
|
91
80
|
|
|
92
|
-
options[:
|
|
93
|
-
options[:hydra] = HTMLProofer::Configuration.parse_json_option('hydra_config', opts['hydra_config']) unless opts['hydra_config'].nil?
|
|
81
|
+
options[:log_level] = opts['log_level'].to_sym unless opts['log_level'].nil?
|
|
94
82
|
|
|
95
|
-
unless opts['
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
83
|
+
options[:typhoeus] = HTMLProofer::Configuration.parse_json_option('typhoeus', opts['typhoeus'], symbolize_names: false) unless opts['typhoeus'].nil?
|
|
84
|
+
options[:hydra] = HTMLProofer::Configuration.parse_json_option('hydra', opts['hydra']) unless opts['hydra'].nil?
|
|
85
|
+
options[:parallel] = HTMLProofer::Configuration.parse_json_option('parallel', opts['parallel']) unless opts['parallel'].nil?
|
|
86
|
+
options[:cache] = HTMLProofer::Configuration.parse_json_option('cache', opts['cache']) unless opts['cache'].nil?
|
|
99
87
|
|
|
100
|
-
unless opts['
|
|
101
|
-
options[:cache] ||= {}
|
|
102
|
-
options[:cache][:storage_dir] = opts['storage_dir'] unless opts['storage_dir'].nil?
|
|
103
|
-
end
|
|
88
|
+
options[:swap_attributes] = HTMLProofer::Configuration.parse_json_option('swap_attributes', opts['swap_attributes'], symbolize_names: false) unless opts['swap_attributes'].nil?
|
|
104
89
|
|
|
105
|
-
options[:
|
|
90
|
+
options[:ignore_status_codes] = Array(options[:ignore_status_codes]).map(&:to_i)
|
|
106
91
|
|
|
107
92
|
paths = path.split(',')
|
|
108
93
|
if opts['as_links']
|
|
109
|
-
links = path.
|
|
94
|
+
links = path.split(',').map(&:strip)
|
|
110
95
|
HTMLProofer.check_links(links, options).run
|
|
111
96
|
elsif File.directory?(paths.first)
|
|
112
97
|
HTMLProofer.check_directories(paths, options).run
|
data/lib/html-proofer.rb
CHANGED
|
@@ -1,56 +1,3 @@
|
|
|
1
|
-
# rubocop:disable Naming/FileName
|
|
2
1
|
# frozen_string_literal: true
|
|
3
2
|
|
|
4
|
-
|
|
5
|
-
dir = File.join(File.dirname(__FILE__), path)
|
|
6
|
-
Dir[File.join(dir, '*.rb')].sort.each do |f|
|
|
7
|
-
require f
|
|
8
|
-
end
|
|
9
|
-
end
|
|
10
|
-
|
|
11
|
-
require_relative 'html-proofer/utils'
|
|
12
|
-
require_all 'html-proofer'
|
|
13
|
-
require_all 'html-proofer/check'
|
|
14
|
-
|
|
15
|
-
require 'parallel'
|
|
16
|
-
require 'fileutils'
|
|
17
|
-
|
|
18
|
-
begin
|
|
19
|
-
require 'awesome_print'
|
|
20
|
-
require 'pry-byebug'
|
|
21
|
-
rescue LoadError; end # rubocop:disable Lint/SuppressedException
|
|
22
|
-
module HTMLProofer
|
|
23
|
-
def self.check_file(file, options = {})
|
|
24
|
-
raise ArgumentError unless file.is_a?(String)
|
|
25
|
-
raise ArgumentError, "#{file} does not exist" unless File.exist?(file)
|
|
26
|
-
|
|
27
|
-
options[:type] = :file
|
|
28
|
-
HTMLProofer::Runner.new(file, options)
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
def self.check_directory(directory, options = {})
|
|
32
|
-
raise ArgumentError unless directory.is_a?(String)
|
|
33
|
-
raise ArgumentError, "#{directory} does not exist" unless Dir.exist?(directory)
|
|
34
|
-
|
|
35
|
-
options[:type] = :directory
|
|
36
|
-
HTMLProofer::Runner.new([directory], options)
|
|
37
|
-
end
|
|
38
|
-
|
|
39
|
-
def self.check_directories(directories, options = {})
|
|
40
|
-
raise ArgumentError unless directories.is_a?(Array)
|
|
41
|
-
|
|
42
|
-
options[:type] = :directory
|
|
43
|
-
directories.each do |directory|
|
|
44
|
-
raise ArgumentError, "#{directory} does not exist" unless Dir.exist?(directory)
|
|
45
|
-
end
|
|
46
|
-
HTMLProofer::Runner.new(directories, options)
|
|
47
|
-
end
|
|
48
|
-
|
|
49
|
-
def self.check_links(links, options = {})
|
|
50
|
-
raise ArgumentError unless links.is_a?(Array)
|
|
51
|
-
|
|
52
|
-
options[:type] = :links
|
|
53
|
-
HTMLProofer::Runner.new(links, options)
|
|
54
|
-
end
|
|
55
|
-
end
|
|
56
|
-
# rubocop:enable Naming/FileName
|
|
3
|
+
require_relative "html_proofer"
|
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module HTMLProofer
|
|
4
|
+
class Attribute
|
|
5
|
+
class Url < HTMLProofer::Attribute
|
|
6
|
+
attr_reader :url, :size
|
|
7
|
+
|
|
8
|
+
REMOTE_SCHEMES = ["http", "https"].freeze
|
|
9
|
+
|
|
10
|
+
def initialize(runner, link_attribute, base_url: nil, extract_size: false)
|
|
11
|
+
super
|
|
12
|
+
|
|
13
|
+
if @raw_attribute.nil?
|
|
14
|
+
@url = nil
|
|
15
|
+
else
|
|
16
|
+
@url = @raw_attribute.delete("\u200b").strip
|
|
17
|
+
@url, @size = @url.split(/\s+/) if extract_size
|
|
18
|
+
@url = Addressable::URI.join(base_url, @url).to_s unless blank?(base_url)
|
|
19
|
+
@url = "" if @url.nil?
|
|
20
|
+
|
|
21
|
+
swap_urls!
|
|
22
|
+
clean_url!
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def protocol_relative?
|
|
27
|
+
url.start_with?("//")
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def to_s
|
|
31
|
+
@url
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def known_extension?
|
|
35
|
+
return true if hash_link?
|
|
36
|
+
return true if path.end_with?("/")
|
|
37
|
+
|
|
38
|
+
ext = File.extname(path)
|
|
39
|
+
|
|
40
|
+
# no extension means we use the assumed one
|
|
41
|
+
return @runner.options[:extensions].include?(@runner.options[:assume_extension]) if blank?(ext)
|
|
42
|
+
|
|
43
|
+
@runner.options[:extensions].include?(ext)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def unknown_extension?
|
|
47
|
+
!known_extension?
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def ignore?
|
|
51
|
+
return true if /^javascript:/.match?(@url)
|
|
52
|
+
return true if ignores_pattern?(@runner.options[:ignore_urls])
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def valid?
|
|
56
|
+
!parts.nil?
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def path?
|
|
60
|
+
!parts.host.nil? && !parts.path.nil?
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def parts
|
|
64
|
+
@parts ||= Addressable::URI.parse(@url)
|
|
65
|
+
rescue URI::Error, Addressable::URI::InvalidURIError
|
|
66
|
+
@parts = nil
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def path
|
|
70
|
+
Addressable::URI.unencode(parts.path) unless parts.nil?
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def hash
|
|
74
|
+
parts&.fragment
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Does the URL have a hash?
|
|
78
|
+
def hash?
|
|
79
|
+
!blank?(hash)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def scheme
|
|
83
|
+
parts&.scheme
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def remote?
|
|
87
|
+
REMOTE_SCHEMES.include?(scheme)
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def http?
|
|
91
|
+
scheme == "http"
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def https?
|
|
95
|
+
scheme == "https"
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def non_http_remote?
|
|
99
|
+
!scheme.nil? && !remote?
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def host
|
|
103
|
+
parts&.host
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def domain_path
|
|
107
|
+
(host || "") + path
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def query_values
|
|
111
|
+
parts&.query_values
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
# checks if a file exists relative to the current pwd
|
|
115
|
+
def exists?
|
|
116
|
+
return true if base64?
|
|
117
|
+
|
|
118
|
+
return @runner.checked_paths[absolute_path] if @runner.checked_paths.key?(absolute_path)
|
|
119
|
+
|
|
120
|
+
@runner.checked_paths[absolute_path] = File.exist?(absolute_path)
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def base64?
|
|
124
|
+
/^data:image/.match?(@raw_attribute)
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def absolute_path
|
|
128
|
+
path = file_path || @runner.current_filename
|
|
129
|
+
|
|
130
|
+
File.expand_path(path, Dir.pwd)
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
def file_path
|
|
134
|
+
return if path.nil? || path.empty?
|
|
135
|
+
|
|
136
|
+
path_dot_ext = ""
|
|
137
|
+
|
|
138
|
+
path_dot_ext = path + @runner.options[:assume_extension] unless blank?(@runner.options[:assume_extension])
|
|
139
|
+
|
|
140
|
+
base = if absolute_path?(path) # path relative to root
|
|
141
|
+
# either overwrite with root_dir; or, if source is directory, use that; or, just get the current file's dirname
|
|
142
|
+
@runner.options[:root_dir] || (File.directory?(@runner.current_source) ? @runner.current_source : File.dirname(@runner.current_source))
|
|
143
|
+
# relative links, path is a file
|
|
144
|
+
elsif File.exist?(File.expand_path(path,
|
|
145
|
+
@runner.current_source)) || File.exist?(File.expand_path(path_dot_ext, @runner.current_source))
|
|
146
|
+
File.dirname(@runner.current_filename)
|
|
147
|
+
# relative links in nested dir, path is a file
|
|
148
|
+
elsif File.exist?(File.join(File.dirname(@runner.current_filename),
|
|
149
|
+
path)) || File.exist?(File.join(File.dirname(@runner.current_filename), path_dot_ext))
|
|
150
|
+
File.dirname(@runner.current_filename)
|
|
151
|
+
# relative link, path is a directory
|
|
152
|
+
else
|
|
153
|
+
@runner.current_filename
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
file = File.join(base, path)
|
|
157
|
+
|
|
158
|
+
if @runner.options[:assume_extension] && File.file?("#{file}#{@runner.options[:assume_extension]}")
|
|
159
|
+
file = "#{file}#{@runner.options[:assume_extension]}"
|
|
160
|
+
elsif File.directory?(file) && !unslashed_directory?(file) # implicit index support
|
|
161
|
+
file = File.join(file, @runner.options[:directory_index_file])
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
file
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
def unslashed_directory?(file)
|
|
168
|
+
return false unless File.directory?(file)
|
|
169
|
+
|
|
170
|
+
!file.end_with?(File::SEPARATOR) && !follow_location?
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def follow_location?
|
|
174
|
+
@runner.options[:typhoeus] && @runner.options[:typhoeus][:followlocation]
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
def absolute_path?(path)
|
|
178
|
+
path.start_with?("/")
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
# path is external to the file
|
|
182
|
+
def external?
|
|
183
|
+
!internal?
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
def internal?
|
|
187
|
+
relative_link? || internal_absolute_link? || hash_link?
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
def internal_absolute_link?
|
|
191
|
+
url.start_with?("/")
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
def relative_link?
|
|
195
|
+
return false if remote?
|
|
196
|
+
|
|
197
|
+
hash_link? || param_link? || url.start_with?(".") || url =~ /^\S/
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
def link_points_to_same_page?
|
|
201
|
+
hash_link || param_link
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
def hash_link?
|
|
205
|
+
url.start_with?("#")
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
def has_hash?
|
|
209
|
+
url.include?("#")
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
def param_link?
|
|
213
|
+
url.start_with?("?")
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
def sans_hash
|
|
217
|
+
@url.to_s.sub(/##{hash}/, "")
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
# catch any obvious issues, like strings in port numbers
|
|
221
|
+
private def clean_url!
|
|
222
|
+
return if @url =~ /^([!#{Regexp.last_match(0)}-;=?-\[\]_a-z~]|%[0-9a-fA-F]{2})+$/
|
|
223
|
+
|
|
224
|
+
@url = Addressable::URI.parse(@url).normalize.to_s
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
private def swap_urls!
|
|
228
|
+
return @url if blank?(replacements = @runner.options[:swap_urls])
|
|
229
|
+
|
|
230
|
+
replacements.each do |link, replace|
|
|
231
|
+
@url = @url.gsub(link, replace)
|
|
232
|
+
end
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
private def ignores_pattern?(links_to_ignore)
|
|
236
|
+
return false unless links_to_ignore.is_a?(Array)
|
|
237
|
+
|
|
238
|
+
links_to_ignore.each do |link_to_ignore|
|
|
239
|
+
case link_to_ignore
|
|
240
|
+
when String
|
|
241
|
+
return true if link_to_ignore == @raw_attribute
|
|
242
|
+
when Regexp
|
|
243
|
+
return true if link_to_ignore&.match?(@raw_attribute)
|
|
244
|
+
end
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
false
|
|
248
|
+
end
|
|
249
|
+
end
|
|
250
|
+
end
|
|
251
|
+
end
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module HTMLProofer
|
|
4
|
+
# Represents an element currently being processed
|
|
5
|
+
class Attribute
|
|
6
|
+
include HTMLProofer::Utils
|
|
7
|
+
|
|
8
|
+
attr_reader :raw_attribute
|
|
9
|
+
|
|
10
|
+
def initialize(runner, raw_attribute, **_)
|
|
11
|
+
@runner = runner
|
|
12
|
+
@raw_attribute = raw_attribute
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|