html-proofer 3.19.4 → 4.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/htmlproofer +44 -59
- data/lib/html-proofer.rb +1 -54
- data/lib/html_proofer/attribute/url.rb +251 -0
- data/lib/html_proofer/attribute.rb +15 -0
- data/lib/html_proofer/cache.rb +292 -0
- data/lib/html_proofer/check/favicon.rb +43 -0
- data/lib/html_proofer/check/images.rb +99 -0
- data/lib/html_proofer/check/links.rb +135 -0
- data/lib/html_proofer/check/open_graph.rb +42 -0
- data/lib/html_proofer/check/scripts.rb +49 -0
- data/lib/html_proofer/check.rb +94 -0
- data/lib/html_proofer/configuration.rb +91 -0
- data/lib/html_proofer/element.rb +144 -0
- data/lib/html_proofer/failure.rb +17 -0
- data/lib/{html-proofer → html_proofer}/log.rb +19 -19
- data/lib/html_proofer/reporter/cli.rb +33 -0
- data/lib/html_proofer/reporter.rb +23 -0
- data/lib/html_proofer/runner.rb +244 -0
- data/lib/html_proofer/url_validator/external.rb +193 -0
- data/lib/html_proofer/url_validator/internal.rb +97 -0
- data/lib/html_proofer/url_validator.rb +16 -0
- data/lib/{html-proofer → html_proofer}/utils.rb +9 -12
- data/lib/{html-proofer → html_proofer}/version.rb +1 -1
- data/lib/html_proofer/xpath_functions.rb +10 -0
- data/lib/html_proofer.rb +59 -0
- metadata +42 -22
- data/lib/html-proofer/cache.rb +0 -194
- data/lib/html-proofer/check/favicon.rb +0 -29
- data/lib/html-proofer/check/html.rb +0 -37
- data/lib/html-proofer/check/images.rb +0 -48
- data/lib/html-proofer/check/links.rb +0 -182
- data/lib/html-proofer/check/opengraph.rb +0 -46
- data/lib/html-proofer/check/scripts.rb +0 -42
- data/lib/html-proofer/check.rb +0 -75
- data/lib/html-proofer/configuration.rb +0 -88
- data/lib/html-proofer/element.rb +0 -265
- data/lib/html-proofer/issue.rb +0 -65
- data/lib/html-proofer/middleware.rb +0 -82
- data/lib/html-proofer/runner.rb +0 -249
- data/lib/html-proofer/url_validator.rb +0 -237
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 98bde6dd5e32f42e5983fd1aaa7bf02d3359b40f2b4a8b5360a5826abf2cc674
|
4
|
+
data.tar.gz: 9b7d57fd18e625ab7cc91c46faceac510b05ba5290f665fb5dd333b92446b00b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a666be806bbb70028488b3ef89f1325d7b5faaec9f48e105220db2d2f7dd022c12dabac828949ffd2100bdc2b4e3bd4c1f9ce9df48bab17b9106d9453eb49f01
|
7
|
+
data.tar.gz: 8a58bb5b120ab3cf3eb0e692f3f52f4385bd5923d3a7c91e95dddd17d8b0e25d20dd207872099665ac37737afb6b4ed86d04d39bc7793c6af2abcd35390d2051
|
data/bin/htmlproofer
CHANGED
@@ -15,44 +15,34 @@ Mercenary.program(:htmlproofer) do |p|
|
|
15
15
|
|
16
16
|
p.description 'Runs the HTML-Proofer suite on the files in PATH. For more details, see the README.'
|
17
17
|
|
18
|
-
p.option '
|
19
|
-
p.option '
|
18
|
+
p.option 'allow_hash_href', '--allow-hash-href=<true|false>', 'String', 'If `true`, assumes `href="#"` anchors are valid (default: `true`)'
|
19
|
+
p.option 'allow_missing_href', '--allow-missing-href=<true|false>', 'String', 'If `true`, does not flag `a` tags missing `href`. In HTML5, this is technically allowed, but could also be human error. (default: `false`)'
|
20
20
|
p.option 'as_links', '--as-links', 'Assumes that `PATH` is a comma-separated array of links to check.'
|
21
|
-
p.option '
|
22
|
-
p.option '
|
23
|
-
p.option '
|
24
|
-
p.option '
|
25
|
-
p.option '
|
26
|
-
p.option 'check_html', '--check-html', 'Enables HTML validation errors from Nokogumbo (default: `false`).'
|
27
|
-
p.option 'check_img_http', '--check-img-http', 'Fails an image if it\'s marked as `http` (default: `false`).'
|
28
|
-
p.option 'check_opengraph', '--check-opengraph', 'Enables the Open Graph checker (default: `false`).'
|
29
|
-
p.option 'check_sri', '--check-sri', 'Check that `<link>` and `<script>` external resources use SRI (default: `false`).'
|
21
|
+
p.option 'assume_extension', '--assume-extension <ext>', 'Automatically add specified extension to files for internal links, to allow extensionless URLs (as supported by most servers) (default: `.html`).'
|
22
|
+
p.option 'checks', '--checks check1,[check2,...]', Array, 'A comma-separated list of Strings indicating which checks you want to run (default: `["Links", "Images", "Scripts"]`)'
|
23
|
+
p.option 'check_external_hash', '--check-external-hash=<true|false>', 'String', 'Checks whether external hashes exist (even if the webpage exists) (default: `true`).'
|
24
|
+
p.option 'check_internal_hash', '--check-internal-hash=<true|false>', 'String', 'Checks whether internal hashes exist (even if the webpage exists) (default: `true`).'
|
25
|
+
p.option 'check_sri', '--check-sri=<true|false>', 'String', 'Check that `<link>` and `<script>` external resources use SRI (default: `false`).'
|
30
26
|
p.option 'directory_index_file', '--directory-index-file <filename>', String, 'Sets the file to look for when a link refers to a directory. (default: `index.html`)'
|
31
|
-
p.option 'disable_external', '--disable-external', 'If `true`, does not run the external link checker
|
32
|
-
p.option '
|
33
|
-
p.option '
|
34
|
-
p.option '
|
35
|
-
p.option '
|
36
|
-
p.option '
|
37
|
-
p.option '
|
38
|
-
p.option '
|
39
|
-
p.option '
|
40
|
-
p.option 'ignore_empty_mailto', '--ignore-empty-mailto', 'If `true`, allows `mailto:` `href`s which do not contain an email address'
|
41
|
-
p.option 'report_invalid_tags', '--report-invalid-tags', 'When `check_html` is enabled, HTML markup that is unknown to Nokogumbo are reported as errors (default: `false`)'
|
42
|
-
p.option 'report_missing_names', '--report-missing-names', 'When `check_html` is enabled, HTML markup that are missing entity names are reported as errors (default: `false`)'
|
43
|
-
p.option 'report_script_embeds', '--report-script-embeds', 'When `check_html` is enabled, `script` tags containing markup are reported as errors (default: `false`)'
|
44
|
-
p.option 'report_missing_doctype', '--report-missing-doctype', 'When `check_html` is enabled, HTML markup with missing or out-of-order `DOCTYPE` are reported as errors (default: `false`)'
|
45
|
-
p.option 'report_eof_tags', '--report-eof-tags', 'When `check_html` is enabled, HTML markup with tags that are malformed are reported as errors (default: `false`)'
|
46
|
-
p.option 'report_mismatched_tags', '--report-mismatched-tags', 'When `check_html` is enabled, HTML markup with mismatched tags are reported as errors (default: `false`)'
|
27
|
+
p.option 'disable_external', '--disable-external=<true|false>', String, 'If `true`, does not run the external link checker (default: `false`)'
|
28
|
+
p.option 'enforce_https', '--enforce-https=<true|false>', String, 'Fails a link if it\'s not marked as `https` (default: `true`).'
|
29
|
+
p.option 'extensions', '--extensions ext1,[ext2,...[', Array, 'A comma-separated list of Strings indicating the file extensions you would like to check (including the dot) (default: `.html`)'
|
30
|
+
p.option 'ignore_empty_alt', '--ignore-empty-alt=<true|false>', 'String', 'If `true`, ignores images with empty/missing alt tags (in other words, `<img alt>` and `<img alt="">` are valid; set this to `false` to flag those) (default: `true`)'
|
31
|
+
p.option 'ignore_empty_mailto', '--ignore-empty-mailto=<true|false>', 'String', 'If `true`, allows `mailto:` `href`s which do not contain an email address (default: `false`)'
|
32
|
+
p.option 'ignore_files', '--ignore-files file1,[file2,...]', Array, 'A comma-separated list of Strings or RegExps containing file paths that are safe to ignore'
|
33
|
+
p.option 'ignore_missing_alt', '--ignore-missing-alt=<true|false>', 'String', 'If `true`, ignores images with missing alt tags (default: `false`)'
|
34
|
+
p.option 'ignore_status_codes', '--ignore-status-codes 123,[xxx, ...]', Array, 'A comma-separated list of numbers representing status codes to ignore.'
|
35
|
+
p.option 'ignore_urls', '--ignore-urls link1,[link2,...]', Array, 'A comma-separated list of Strings or RegExps containing URLs that are safe to ignore. This affects all HTML attributes, such as `alt` tags on images.'
|
47
36
|
p.option 'log_level', '--log-level <level>', String, 'Sets the logging level, as determined by Yell. One of `:debug`, `:info`, `:warn`, `:error`, or `:fatal`. (default: `:info`)'
|
48
37
|
p.option 'only_4xx', '--only-4xx', 'Only reports errors for links that fall within the 4xx status code range'
|
49
|
-
p.option 'storage_dir', '--storage-dir PATH', String, 'Directory where to store the cache log (default: "tmp/.htmlproofer")'
|
50
|
-
p.option 'timeframe', '--timeframe <time>', String, 'A string representing the caching timeframe.'
|
51
|
-
p.option 'typhoeus_config', '--typhoeus-config CONFIG', String, 'JSON-formatted string of Typhoeus config. Will override the html-proofer defaults.'
|
52
|
-
p.option 'hydra_config', '--hydra-config CONFIG', String, 'JSON-formatted string of Hydra config. Will override the html-proofer defaults.'
|
53
|
-
p.option 'url_ignore', '--url-ignore link1,[link2,...]', Array, 'A comma-separated list of Strings or RegExps containing URLs that are safe to ignore. It affects all HTML attributes. Note that non-HTTP(S) URIs are always ignored'
|
54
|
-
p.option 'url_swap', '--url-swap re:string,[re:string,...]', Array, 'A comma-separated list containing key-value pairs of `RegExp => String`. It transforms URLs that match `RegExp` into `String` via `gsub`. The escape sequences `\\:` should be used to produce literal `:`s.'
|
55
38
|
p.option 'root_dir', '--root-dir PATH', String, 'The absolute path to the directory serving your html-files.'
|
39
|
+
p.option 'swap_attributes', '--swap-attributes CONFIG', String, 'JSON-formatted config that maps element names to the preferred attribute to check (default: `{}`).'
|
40
|
+
p.option 'swap_urls', '--swap-urls re:string,[re:string,...]', Array, 'A comma-separated list containing key-value pairs of `RegExp => String`. It transforms URLs that match `RegExp` into `String` via `gsub`. The escape sequences `\\:` should be used to produce literal `:`s.'
|
41
|
+
|
42
|
+
p.option 'typhoeus', '--typhoeus CONFIG', String, 'JSON-formatted string of Typhoeus config. Will override the html-proofer defaults.'
|
43
|
+
p.option 'hydra', '--hydra CONFIG', String, 'JSON-formatted string of Hydra config. Will override the html-proofer defaults.'
|
44
|
+
p.option 'parallel', '--parallel CONFIG', String, 'JSON-formatted string of Parallel config. Will override the html-proofer defaults.'
|
45
|
+
p.option 'cache', '--cache CONFIG', String, 'JSON-formatted string of cache config. Will override the html-proofer defaults.'
|
56
46
|
|
57
47
|
p.action do |args, opts|
|
58
48
|
args = ['.'] if args.empty?
|
@@ -67,46 +57,41 @@ Mercenary.program(:htmlproofer) do |p|
|
|
67
57
|
end
|
68
58
|
|
69
59
|
# some minor manipulation of a special option
|
70
|
-
unless opts['
|
71
|
-
options[:
|
72
|
-
opts['
|
60
|
+
unless opts['swap_urls'].nil?
|
61
|
+
options[:swap_urls] = {}
|
62
|
+
opts['swap_urls'].each do |s|
|
73
63
|
splt = s.split(/(?<!\\):/, 2)
|
74
64
|
|
75
65
|
re = splt[0].gsub(/\\:/, ':')
|
76
66
|
string = splt[1].gsub(/\\:/, ':')
|
77
|
-
options[:
|
67
|
+
options[:swap_urls][Regexp.new(re)] = string
|
78
68
|
end
|
79
69
|
end
|
80
70
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
options[:validation][:report_mismatched_tags] = opts['report_mismatched_tags'] unless opts['report_mismatched_tags'].nil?
|
71
|
+
# check booleans
|
72
|
+
[:allow_hash_href, :allow_missing_href, :check_external_hash, :check_internal_hash, :check_sri, :disable_external, :enforce_https, :ignore_empty_alt, :ignore_empty_mailto, :ignore_missing_alt].each do |option|
|
73
|
+
next if (val = opts[option.to_s]).nil?
|
74
|
+
if val == "false"
|
75
|
+
options[option] = false
|
76
|
+
else
|
77
|
+
options[option] = true
|
78
|
+
end
|
79
|
+
end
|
91
80
|
|
92
|
-
options[:
|
93
|
-
options[:hydra] = HTMLProofer::Configuration.parse_json_option('hydra_config', opts['hydra_config']) unless opts['hydra_config'].nil?
|
81
|
+
options[:log_level] = opts['log_level'].to_sym unless opts['log_level'].nil?
|
94
82
|
|
95
|
-
unless opts['
|
96
|
-
|
97
|
-
|
98
|
-
|
83
|
+
options[:typhoeus] = HTMLProofer::Configuration.parse_json_option('typhoeus', opts['typhoeus'], symbolize_names: false) unless opts['typhoeus'].nil?
|
84
|
+
options[:hydra] = HTMLProofer::Configuration.parse_json_option('hydra', opts['hydra']) unless opts['hydra'].nil?
|
85
|
+
options[:parallel] = HTMLProofer::Configuration.parse_json_option('parallel', opts['parallel']) unless opts['parallel'].nil?
|
86
|
+
options[:cache] = HTMLProofer::Configuration.parse_json_option('cache', opts['cache']) unless opts['cache'].nil?
|
99
87
|
|
100
|
-
unless opts['
|
101
|
-
options[:cache] ||= {}
|
102
|
-
options[:cache][:storage_dir] = opts['storage_dir'] unless opts['storage_dir'].nil?
|
103
|
-
end
|
88
|
+
options[:swap_attributes] = HTMLProofer::Configuration.parse_json_option('swap_attributes', opts['swap_attributes'], symbolize_names: false) unless opts['swap_attributes'].nil?
|
104
89
|
|
105
|
-
options[:
|
90
|
+
options[:ignore_status_codes] = Array(options[:ignore_status_codes]).map(&:to_i)
|
106
91
|
|
107
92
|
paths = path.split(',')
|
108
93
|
if opts['as_links']
|
109
|
-
links = path.
|
94
|
+
links = path.split(',').map(&:strip)
|
110
95
|
HTMLProofer.check_links(links, options).run
|
111
96
|
elsif File.directory?(paths.first)
|
112
97
|
HTMLProofer.check_directories(paths, options).run
|
data/lib/html-proofer.rb
CHANGED
@@ -1,56 +1,3 @@
|
|
1
|
-
# rubocop:disable Naming/FileName
|
2
1
|
# frozen_string_literal: true
|
3
2
|
|
4
|
-
|
5
|
-
dir = File.join(File.dirname(__FILE__), path)
|
6
|
-
Dir[File.join(dir, '*.rb')].sort.each do |f|
|
7
|
-
require f
|
8
|
-
end
|
9
|
-
end
|
10
|
-
|
11
|
-
require_relative 'html-proofer/utils'
|
12
|
-
require_all 'html-proofer'
|
13
|
-
require_all 'html-proofer/check'
|
14
|
-
|
15
|
-
require 'parallel'
|
16
|
-
require 'fileutils'
|
17
|
-
|
18
|
-
begin
|
19
|
-
require 'awesome_print'
|
20
|
-
require 'pry-byebug'
|
21
|
-
rescue LoadError; end # rubocop:disable Lint/SuppressedException
|
22
|
-
module HTMLProofer
|
23
|
-
def self.check_file(file, options = {})
|
24
|
-
raise ArgumentError unless file.is_a?(String)
|
25
|
-
raise ArgumentError, "#{file} does not exist" unless File.exist?(file)
|
26
|
-
|
27
|
-
options[:type] = :file
|
28
|
-
HTMLProofer::Runner.new(file, options)
|
29
|
-
end
|
30
|
-
|
31
|
-
def self.check_directory(directory, options = {})
|
32
|
-
raise ArgumentError unless directory.is_a?(String)
|
33
|
-
raise ArgumentError, "#{directory} does not exist" unless Dir.exist?(directory)
|
34
|
-
|
35
|
-
options[:type] = :directory
|
36
|
-
HTMLProofer::Runner.new([directory], options)
|
37
|
-
end
|
38
|
-
|
39
|
-
def self.check_directories(directories, options = {})
|
40
|
-
raise ArgumentError unless directories.is_a?(Array)
|
41
|
-
|
42
|
-
options[:type] = :directory
|
43
|
-
directories.each do |directory|
|
44
|
-
raise ArgumentError, "#{directory} does not exist" unless Dir.exist?(directory)
|
45
|
-
end
|
46
|
-
HTMLProofer::Runner.new(directories, options)
|
47
|
-
end
|
48
|
-
|
49
|
-
def self.check_links(links, options = {})
|
50
|
-
raise ArgumentError unless links.is_a?(Array)
|
51
|
-
|
52
|
-
options[:type] = :links
|
53
|
-
HTMLProofer::Runner.new(links, options)
|
54
|
-
end
|
55
|
-
end
|
56
|
-
# rubocop:enable Naming/FileName
|
3
|
+
require_relative "html_proofer"
|
@@ -0,0 +1,251 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module HTMLProofer
|
4
|
+
class Attribute
|
5
|
+
class Url < HTMLProofer::Attribute
|
6
|
+
attr_reader :url, :size
|
7
|
+
|
8
|
+
REMOTE_SCHEMES = ["http", "https"].freeze
|
9
|
+
|
10
|
+
def initialize(runner, link_attribute, base_url: nil, extract_size: false)
|
11
|
+
super
|
12
|
+
|
13
|
+
if @raw_attribute.nil?
|
14
|
+
@url = nil
|
15
|
+
else
|
16
|
+
@url = @raw_attribute.delete("\u200b").strip
|
17
|
+
@url, @size = @url.split(/\s+/) if extract_size
|
18
|
+
@url = Addressable::URI.join(base_url, @url).to_s unless blank?(base_url)
|
19
|
+
@url = "" if @url.nil?
|
20
|
+
|
21
|
+
swap_urls!
|
22
|
+
clean_url!
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def protocol_relative?
|
27
|
+
url.start_with?("//")
|
28
|
+
end
|
29
|
+
|
30
|
+
def to_s
|
31
|
+
@url
|
32
|
+
end
|
33
|
+
|
34
|
+
def known_extension?
|
35
|
+
return true if hash_link?
|
36
|
+
return true if path.end_with?("/")
|
37
|
+
|
38
|
+
ext = File.extname(path)
|
39
|
+
|
40
|
+
# no extension means we use the assumed one
|
41
|
+
return @runner.options[:extensions].include?(@runner.options[:assume_extension]) if blank?(ext)
|
42
|
+
|
43
|
+
@runner.options[:extensions].include?(ext)
|
44
|
+
end
|
45
|
+
|
46
|
+
def unknown_extension?
|
47
|
+
!known_extension?
|
48
|
+
end
|
49
|
+
|
50
|
+
def ignore?
|
51
|
+
return true if /^javascript:/.match?(@url)
|
52
|
+
return true if ignores_pattern?(@runner.options[:ignore_urls])
|
53
|
+
end
|
54
|
+
|
55
|
+
def valid?
|
56
|
+
!parts.nil?
|
57
|
+
end
|
58
|
+
|
59
|
+
def path?
|
60
|
+
!parts.host.nil? && !parts.path.nil?
|
61
|
+
end
|
62
|
+
|
63
|
+
def parts
|
64
|
+
@parts ||= Addressable::URI.parse(@url)
|
65
|
+
rescue URI::Error, Addressable::URI::InvalidURIError
|
66
|
+
@parts = nil
|
67
|
+
end
|
68
|
+
|
69
|
+
def path
|
70
|
+
Addressable::URI.unencode(parts.path) unless parts.nil?
|
71
|
+
end
|
72
|
+
|
73
|
+
def hash
|
74
|
+
parts&.fragment
|
75
|
+
end
|
76
|
+
|
77
|
+
# Does the URL have a hash?
|
78
|
+
def hash?
|
79
|
+
!blank?(hash)
|
80
|
+
end
|
81
|
+
|
82
|
+
def scheme
|
83
|
+
parts&.scheme
|
84
|
+
end
|
85
|
+
|
86
|
+
def remote?
|
87
|
+
REMOTE_SCHEMES.include?(scheme)
|
88
|
+
end
|
89
|
+
|
90
|
+
def http?
|
91
|
+
scheme == "http"
|
92
|
+
end
|
93
|
+
|
94
|
+
def https?
|
95
|
+
scheme == "https"
|
96
|
+
end
|
97
|
+
|
98
|
+
def non_http_remote?
|
99
|
+
!scheme.nil? && !remote?
|
100
|
+
end
|
101
|
+
|
102
|
+
def host
|
103
|
+
parts&.host
|
104
|
+
end
|
105
|
+
|
106
|
+
def domain_path
|
107
|
+
(host || "") + path
|
108
|
+
end
|
109
|
+
|
110
|
+
def query_values
|
111
|
+
parts&.query_values
|
112
|
+
end
|
113
|
+
|
114
|
+
# checks if a file exists relative to the current pwd
|
115
|
+
def exists?
|
116
|
+
return true if base64?
|
117
|
+
|
118
|
+
return @runner.checked_paths[absolute_path] if @runner.checked_paths.key?(absolute_path)
|
119
|
+
|
120
|
+
@runner.checked_paths[absolute_path] = File.exist?(absolute_path)
|
121
|
+
end
|
122
|
+
|
123
|
+
def base64?
|
124
|
+
/^data:image/.match?(@raw_attribute)
|
125
|
+
end
|
126
|
+
|
127
|
+
def absolute_path
|
128
|
+
path = file_path || @runner.current_filename
|
129
|
+
|
130
|
+
File.expand_path(path, Dir.pwd)
|
131
|
+
end
|
132
|
+
|
133
|
+
def file_path
|
134
|
+
return if path.nil? || path.empty?
|
135
|
+
|
136
|
+
path_dot_ext = ""
|
137
|
+
|
138
|
+
path_dot_ext = path + @runner.options[:assume_extension] unless blank?(@runner.options[:assume_extension])
|
139
|
+
|
140
|
+
base = if absolute_path?(path) # path relative to root
|
141
|
+
# either overwrite with root_dir; or, if source is directory, use that; or, just get the current file's dirname
|
142
|
+
@runner.options[:root_dir] || (File.directory?(@runner.current_source) ? @runner.current_source : File.dirname(@runner.current_source))
|
143
|
+
# relative links, path is a file
|
144
|
+
elsif File.exist?(File.expand_path(path,
|
145
|
+
@runner.current_source)) || File.exist?(File.expand_path(path_dot_ext, @runner.current_source))
|
146
|
+
File.dirname(@runner.current_filename)
|
147
|
+
# relative links in nested dir, path is a file
|
148
|
+
elsif File.exist?(File.join(File.dirname(@runner.current_filename),
|
149
|
+
path)) || File.exist?(File.join(File.dirname(@runner.current_filename), path_dot_ext))
|
150
|
+
File.dirname(@runner.current_filename)
|
151
|
+
# relative link, path is a directory
|
152
|
+
else
|
153
|
+
@runner.current_filename
|
154
|
+
end
|
155
|
+
|
156
|
+
file = File.join(base, path)
|
157
|
+
|
158
|
+
if @runner.options[:assume_extension] && File.file?("#{file}#{@runner.options[:assume_extension]}")
|
159
|
+
file = "#{file}#{@runner.options[:assume_extension]}"
|
160
|
+
elsif File.directory?(file) && !unslashed_directory?(file) # implicit index support
|
161
|
+
file = File.join(file, @runner.options[:directory_index_file])
|
162
|
+
end
|
163
|
+
|
164
|
+
file
|
165
|
+
end
|
166
|
+
|
167
|
+
def unslashed_directory?(file)
|
168
|
+
return false unless File.directory?(file)
|
169
|
+
|
170
|
+
!file.end_with?(File::SEPARATOR) && !follow_location?
|
171
|
+
end
|
172
|
+
|
173
|
+
def follow_location?
|
174
|
+
@runner.options[:typhoeus] && @runner.options[:typhoeus][:followlocation]
|
175
|
+
end
|
176
|
+
|
177
|
+
def absolute_path?(path)
|
178
|
+
path.start_with?("/")
|
179
|
+
end
|
180
|
+
|
181
|
+
# path is external to the file
|
182
|
+
def external?
|
183
|
+
!internal?
|
184
|
+
end
|
185
|
+
|
186
|
+
def internal?
|
187
|
+
relative_link? || internal_absolute_link? || hash_link?
|
188
|
+
end
|
189
|
+
|
190
|
+
def internal_absolute_link?
|
191
|
+
url.start_with?("/")
|
192
|
+
end
|
193
|
+
|
194
|
+
def relative_link?
|
195
|
+
return false if remote?
|
196
|
+
|
197
|
+
hash_link? || param_link? || url.start_with?(".") || url =~ /^\S/
|
198
|
+
end
|
199
|
+
|
200
|
+
def link_points_to_same_page?
|
201
|
+
hash_link || param_link
|
202
|
+
end
|
203
|
+
|
204
|
+
def hash_link?
|
205
|
+
url.start_with?("#")
|
206
|
+
end
|
207
|
+
|
208
|
+
def has_hash?
|
209
|
+
url.include?("#")
|
210
|
+
end
|
211
|
+
|
212
|
+
def param_link?
|
213
|
+
url.start_with?("?")
|
214
|
+
end
|
215
|
+
|
216
|
+
def sans_hash
|
217
|
+
@url.to_s.sub(/##{hash}/, "")
|
218
|
+
end
|
219
|
+
|
220
|
+
# catch any obvious issues, like strings in port numbers
|
221
|
+
private def clean_url!
|
222
|
+
return if @url =~ /^([!#{Regexp.last_match(0)}-;=?-\[\]_a-z~]|%[0-9a-fA-F]{2})+$/
|
223
|
+
|
224
|
+
@url = Addressable::URI.parse(@url).normalize.to_s
|
225
|
+
end
|
226
|
+
|
227
|
+
private def swap_urls!
|
228
|
+
return @url if blank?(replacements = @runner.options[:swap_urls])
|
229
|
+
|
230
|
+
replacements.each do |link, replace|
|
231
|
+
@url = @url.gsub(link, replace)
|
232
|
+
end
|
233
|
+
end
|
234
|
+
|
235
|
+
private def ignores_pattern?(links_to_ignore)
|
236
|
+
return false unless links_to_ignore.is_a?(Array)
|
237
|
+
|
238
|
+
links_to_ignore.each do |link_to_ignore|
|
239
|
+
case link_to_ignore
|
240
|
+
when String
|
241
|
+
return true if link_to_ignore == @raw_attribute
|
242
|
+
when Regexp
|
243
|
+
return true if link_to_ignore&.match?(@raw_attribute)
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
false
|
248
|
+
end
|
249
|
+
end
|
250
|
+
end
|
251
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module HTMLProofer
|
4
|
+
# Represents an element currently being processed
|
5
|
+
class Attribute
|
6
|
+
include HTMLProofer::Utils
|
7
|
+
|
8
|
+
attr_reader :raw_attribute
|
9
|
+
|
10
|
+
def initialize(runner, raw_attribute, **_)
|
11
|
+
@runner = runner
|
12
|
+
@raw_attribute = raw_attribute
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|