html-proofer 3.19.4 → 4.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/bin/htmlproofer +44 -59
  3. data/lib/html-proofer.rb +1 -54
  4. data/lib/html_proofer/attribute/url.rb +251 -0
  5. data/lib/html_proofer/attribute.rb +15 -0
  6. data/lib/html_proofer/cache.rb +292 -0
  7. data/lib/html_proofer/check/favicon.rb +43 -0
  8. data/lib/html_proofer/check/images.rb +99 -0
  9. data/lib/html_proofer/check/links.rb +135 -0
  10. data/lib/html_proofer/check/open_graph.rb +42 -0
  11. data/lib/html_proofer/check/scripts.rb +49 -0
  12. data/lib/html_proofer/check.rb +94 -0
  13. data/lib/html_proofer/configuration.rb +91 -0
  14. data/lib/html_proofer/element.rb +144 -0
  15. data/lib/html_proofer/failure.rb +17 -0
  16. data/lib/{html-proofer → html_proofer}/log.rb +19 -19
  17. data/lib/html_proofer/reporter/cli.rb +33 -0
  18. data/lib/html_proofer/reporter.rb +23 -0
  19. data/lib/html_proofer/runner.rb +244 -0
  20. data/lib/html_proofer/url_validator/external.rb +193 -0
  21. data/lib/html_proofer/url_validator/internal.rb +97 -0
  22. data/lib/html_proofer/url_validator.rb +16 -0
  23. data/lib/{html-proofer → html_proofer}/utils.rb +9 -12
  24. data/lib/{html-proofer → html_proofer}/version.rb +1 -1
  25. data/lib/html_proofer/xpath_functions.rb +10 -0
  26. data/lib/html_proofer.rb +59 -0
  27. metadata +42 -22
  28. data/lib/html-proofer/cache.rb +0 -194
  29. data/lib/html-proofer/check/favicon.rb +0 -29
  30. data/lib/html-proofer/check/html.rb +0 -37
  31. data/lib/html-proofer/check/images.rb +0 -48
  32. data/lib/html-proofer/check/links.rb +0 -182
  33. data/lib/html-proofer/check/opengraph.rb +0 -46
  34. data/lib/html-proofer/check/scripts.rb +0 -42
  35. data/lib/html-proofer/check.rb +0 -75
  36. data/lib/html-proofer/configuration.rb +0 -88
  37. data/lib/html-proofer/element.rb +0 -265
  38. data/lib/html-proofer/issue.rb +0 -65
  39. data/lib/html-proofer/middleware.rb +0 -82
  40. data/lib/html-proofer/runner.rb +0 -249
  41. data/lib/html-proofer/url_validator.rb +0 -237
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f27b5c50ae5c1c77d5fbe36dbbdca327bcb96302912b726f7f955f643d1dfc48
4
- data.tar.gz: f09405cd0c70f1d2dc98f904c388bcab594f79107fdbe441c63f934821bef1b0
3
+ metadata.gz: 98bde6dd5e32f42e5983fd1aaa7bf02d3359b40f2b4a8b5360a5826abf2cc674
4
+ data.tar.gz: 9b7d57fd18e625ab7cc91c46faceac510b05ba5290f665fb5dd333b92446b00b
5
5
  SHA512:
6
- metadata.gz: 53a8c98438f2056e7e2d926e926e10a6d0aa840b1b6f790860631912a2146dc20c68ca2b303d799a8fbfa723476e0e95dd5bc89695ceddf09ecede6f9acafbd1
7
- data.tar.gz: f68269ba70facf5ede07452d1029f49d17baadff8c6b4fd1d9de520c0ede91ff360bacb0cf46b5c719c0ae35c50ad61c6ce5b36171867f6a1c9d8c675d805ebc
6
+ metadata.gz: a666be806bbb70028488b3ef89f1325d7b5faaec9f48e105220db2d2f7dd022c12dabac828949ffd2100bdc2b4e3bd4c1f9ce9df48bab17b9106d9453eb49f01
7
+ data.tar.gz: 8a58bb5b120ab3cf3eb0e692f3f52f4385bd5923d3a7c91e95dddd17d8b0e25d20dd207872099665ac37737afb6b4ed86d04d39bc7793c6af2abcd35390d2051
data/bin/htmlproofer CHANGED
@@ -15,44 +15,34 @@ Mercenary.program(:htmlproofer) do |p|
15
15
 
16
16
  p.description 'Runs the HTML-Proofer suite on the files in PATH. For more details, see the README.'
17
17
 
18
- p.option 'allow_missing_href', '--allow-missing-href', 'If `true`, does not flag `a` tags missing `href` (this is the default for HTML5).'
19
- p.option 'allow_hash_href', '--allow-hash-href', 'If `true`, ignores the `href="#"`'
18
+ p.option 'allow_hash_href', '--allow-hash-href=<true|false>', 'String', 'If `true`, assumes `href="#"` anchors are valid (default: `true`)'
19
+ p.option 'allow_missing_href', '--allow-missing-href=<true|false>', 'String', 'If `true`, does not flag `a` tags missing `href`. In HTML5, this is technically allowed, but could also be human error. (default: `false`)'
20
20
  p.option 'as_links', '--as-links', 'Assumes that `PATH` is a comma-separated array of links to check.'
21
- p.option 'alt_ignore', '--alt-ignore image1,[image2,...]', Array, 'A comma-separated list of Strings or RegExps containing `img`s whose missing `alt` tags are safe to ignore'
22
- p.option 'assume_extension', '--assume-extension', 'Automatically add extension (e.g. `.html`) to file paths, to allow extensionless URLs (as supported by Jekyll 3 and GitHub Pages) (default: `false`).'
23
- p.option 'checks_to_ignore', '--checks-to-ignore check1,[check2,...]', Array, 'A comma-separated list of Strings indicating which checks you do not want to run (default: `[]`)'
24
- p.option 'check_external_hash', '--check-external-hash', 'Checks whether external hashes exist (even if the webpage exists). This slows the checker down (default: `false`).'
25
- p.option 'check_favicon', '--check-favicon', 'Enables the favicon checker (default: `false`).'
26
- p.option 'check_html', '--check-html', 'Enables HTML validation errors from Nokogumbo (default: `false`).'
27
- p.option 'check_img_http', '--check-img-http', 'Fails an image if it\'s marked as `http` (default: `false`).'
28
- p.option 'check_opengraph', '--check-opengraph', 'Enables the Open Graph checker (default: `false`).'
29
- p.option 'check_sri', '--check-sri', 'Check that `<link>` and `<script>` external resources use SRI (default: `false`).'
21
+ p.option 'assume_extension', '--assume-extension <ext>', 'Automatically add specified extension to files for internal links, to allow extensionless URLs (as supported by most servers) (default: `.html`).'
22
+ p.option 'checks', '--checks check1,[check2,...]', Array, 'A comma-separated list of Strings indicating which checks you want to run (default: `["Links", "Images", "Scripts"]`)'
23
+ p.option 'check_external_hash', '--check-external-hash=<true|false>', 'String', 'Checks whether external hashes exist (even if the webpage exists) (default: `true`).'
24
+ p.option 'check_internal_hash', '--check-internal-hash=<true|false>', 'String', 'Checks whether internal hashes exist (even if the webpage exists) (default: `true`).'
25
+ p.option 'check_sri', '--check-sri=<true|false>', 'String', 'Check that `<link>` and `<script>` external resources use SRI (default: `false`).'
30
26
  p.option 'directory_index_file', '--directory-index-file <filename>', String, 'Sets the file to look for when a link refers to a directory. (default: `index.html`)'
31
- p.option 'disable_external', '--disable-external', 'If `true`, does not run the external link checker, which can take a lot of time (default: `false`)'
32
- p.option 'empty_alt_ignore', '--empty-alt-ignore', 'If `true`, ignores images with empty alt tags'
33
- p.option 'error_sort', '--error-sort <sort>', String, 'Defines the sort order for error output. Can be `:path`, `:desc`, or `:status` (default: `:path`).'
34
- p.option 'enforce_https', '--enforce-https', 'Fails a link if it\'s not marked as `https` (default: `false`).'
35
- p.option 'extension', '--extension <ext>', String, 'The extension of your HTML files including the dot. (default: `.html`)'
36
- p.option 'external_only', '--external_only', 'Only checks problems with external references'
37
- p.option 'file_ignore', '--file-ignore file1,[file2,...]', Array, 'A comma-separated list of Strings or RegExps containing file paths that are safe to ignore'
38
- p.option 'http_status_ignore', '--http-status-ignore 123,[xxx, ...]', Array, 'A comma-separated list of numbers representing status codes to ignore.'
39
- p.option 'internal_domains', '--internal-domains domain1,[domain2,...]', Array, 'A comma-separated list of Strings containing domains that will be treated as internal urls.'
40
- p.option 'ignore_empty_mailto', '--ignore-empty-mailto', 'If `true`, allows `mailto:` `href`s which do not contain an email address'
41
- p.option 'report_invalid_tags', '--report-invalid-tags', 'When `check_html` is enabled, HTML markup that is unknown to Nokogumbo are reported as errors (default: `false`)'
42
- p.option 'report_missing_names', '--report-missing-names', 'When `check_html` is enabled, HTML markup that are missing entity names are reported as errors (default: `false`)'
43
- p.option 'report_script_embeds', '--report-script-embeds', 'When `check_html` is enabled, `script` tags containing markup are reported as errors (default: `false`)'
44
- p.option 'report_missing_doctype', '--report-missing-doctype', 'When `check_html` is enabled, HTML markup with missing or out-of-order `DOCTYPE` are reported as errors (default: `false`)'
45
- p.option 'report_eof_tags', '--report-eof-tags', 'When `check_html` is enabled, HTML markup with tags that are malformed are reported as errors (default: `false`)'
46
- p.option 'report_mismatched_tags', '--report-mismatched-tags', 'When `check_html` is enabled, HTML markup with mismatched tags are reported as errors (default: `false`)'
27
+ p.option 'disable_external', '--disable-external=<true|false>', String, 'If `true`, does not run the external link checker (default: `false`)'
28
+ p.option 'enforce_https', '--enforce-https=<true|false>', String, 'Fails a link if it\'s not marked as `https` (default: `true`).'
29
+ p.option 'extensions', '--extensions ext1,[ext2,...[', Array, 'A comma-separated list of Strings indicating the file extensions you would like to check (including the dot) (default: `.html`)'
30
+ p.option 'ignore_empty_alt', '--ignore-empty-alt=<true|false>', 'String', 'If `true`, ignores images with empty/missing alt tags (in other words, `<img alt>` and `<img alt="">` are valid; set this to `false` to flag those) (default: `true`)'
31
+ p.option 'ignore_empty_mailto', '--ignore-empty-mailto=<true|false>', 'String', 'If `true`, allows `mailto:` `href`s which do not contain an email address (default: `false`)'
32
+ p.option 'ignore_files', '--ignore-files file1,[file2,...]', Array, 'A comma-separated list of Strings or RegExps containing file paths that are safe to ignore'
33
+ p.option 'ignore_missing_alt', '--ignore-missing-alt=<true|false>', 'String', 'If `true`, ignores images with missing alt tags (default: `false`)'
34
+ p.option 'ignore_status_codes', '--ignore-status-codes 123,[xxx, ...]', Array, 'A comma-separated list of numbers representing status codes to ignore.'
35
+ p.option 'ignore_urls', '--ignore-urls link1,[link2,...]', Array, 'A comma-separated list of Strings or RegExps containing URLs that are safe to ignore. This affects all HTML attributes, such as `alt` tags on images.'
47
36
  p.option 'log_level', '--log-level <level>', String, 'Sets the logging level, as determined by Yell. One of `:debug`, `:info`, `:warn`, `:error`, or `:fatal`. (default: `:info`)'
48
37
  p.option 'only_4xx', '--only-4xx', 'Only reports errors for links that fall within the 4xx status code range'
49
- p.option 'storage_dir', '--storage-dir PATH', String, 'Directory where to store the cache log (default: "tmp/.htmlproofer")'
50
- p.option 'timeframe', '--timeframe <time>', String, 'A string representing the caching timeframe.'
51
- p.option 'typhoeus_config', '--typhoeus-config CONFIG', String, 'JSON-formatted string of Typhoeus config. Will override the html-proofer defaults.'
52
- p.option 'hydra_config', '--hydra-config CONFIG', String, 'JSON-formatted string of Hydra config. Will override the html-proofer defaults.'
53
- p.option 'url_ignore', '--url-ignore link1,[link2,...]', Array, 'A comma-separated list of Strings or RegExps containing URLs that are safe to ignore. It affects all HTML attributes. Note that non-HTTP(S) URIs are always ignored'
54
- p.option 'url_swap', '--url-swap re:string,[re:string,...]', Array, 'A comma-separated list containing key-value pairs of `RegExp => String`. It transforms URLs that match `RegExp` into `String` via `gsub`. The escape sequences `\\:` should be used to produce literal `:`s.'
55
38
  p.option 'root_dir', '--root-dir PATH', String, 'The absolute path to the directory serving your html-files.'
39
+ p.option 'swap_attributes', '--swap-attributes CONFIG', String, 'JSON-formatted config that maps element names to the preferred attribute to check (default: `{}`).'
40
+ p.option 'swap_urls', '--swap-urls re:string,[re:string,...]', Array, 'A comma-separated list containing key-value pairs of `RegExp => String`. It transforms URLs that match `RegExp` into `String` via `gsub`. The escape sequences `\\:` should be used to produce literal `:`s.'
41
+
42
+ p.option 'typhoeus', '--typhoeus CONFIG', String, 'JSON-formatted string of Typhoeus config. Will override the html-proofer defaults.'
43
+ p.option 'hydra', '--hydra CONFIG', String, 'JSON-formatted string of Hydra config. Will override the html-proofer defaults.'
44
+ p.option 'parallel', '--parallel CONFIG', String, 'JSON-formatted string of Parallel config. Will override the html-proofer defaults.'
45
+ p.option 'cache', '--cache CONFIG', String, 'JSON-formatted string of cache config. Will override the html-proofer defaults.'
56
46
 
57
47
  p.action do |args, opts|
58
48
  args = ['.'] if args.empty?
@@ -67,46 +57,41 @@ Mercenary.program(:htmlproofer) do |p|
67
57
  end
68
58
 
69
59
  # some minor manipulation of a special option
70
- unless opts['url_swap'].nil?
71
- options[:url_swap] = {}
72
- opts['url_swap'].each do |s|
60
+ unless opts['swap_urls'].nil?
61
+ options[:swap_urls] = {}
62
+ opts['swap_urls'].each do |s|
73
63
  splt = s.split(/(?<!\\):/, 2)
74
64
 
75
65
  re = splt[0].gsub(/\\:/, ':')
76
66
  string = splt[1].gsub(/\\:/, ':')
77
- options[:url_swap][Regexp.new(re)] = string
67
+ options[:swap_urls][Regexp.new(re)] = string
78
68
  end
79
69
  end
80
70
 
81
- options[:error_sort] = opts['error-sort'].to_sym unless opts['error-sort'].nil?
82
- options[:log_level] = opts['log_level'].to_sym unless opts['log_level'].nil?
83
-
84
- options[:validation] = HTMLProofer::Configuration::VALIDATION_DEFAULTS.dup
85
- options[:validation][:report_script_embeds] = opts['report_script_embeds'] unless opts['report_script_embeds'].nil?
86
- options[:validation][:report_missing_names] = opts['report_missing_names'] unless opts['report_missing_names'].nil?
87
- options[:validation][:report_invalid_tags] = opts['report_invalid_tags'] unless opts['report_invalid_tags'].nil?
88
- options[:validation][:report_missing_doctype] = opts['report_missing_doctype'] unless opts['report_missing_doctype'].nil?
89
- options[:validation][:report_eof_tags] = opts['report_eof_tags'] unless opts['report_eof_tags'].nil?
90
- options[:validation][:report_mismatched_tags] = opts['report_mismatched_tags'] unless opts['report_mismatched_tags'].nil?
71
+ # check booleans
72
+ [:allow_hash_href, :allow_missing_href, :check_external_hash, :check_internal_hash, :check_sri, :disable_external, :enforce_https, :ignore_empty_alt, :ignore_empty_mailto, :ignore_missing_alt].each do |option|
73
+ next if (val = opts[option.to_s]).nil?
74
+ if val == "false"
75
+ options[option] = false
76
+ else
77
+ options[option] = true
78
+ end
79
+ end
91
80
 
92
- options[:typhoeus] = HTMLProofer::Configuration.parse_json_option('typhoeus_config', opts['typhoeus_config'], symbolize_names: false) unless opts['typhoeus_config'].nil?
93
- options[:hydra] = HTMLProofer::Configuration.parse_json_option('hydra_config', opts['hydra_config']) unless opts['hydra_config'].nil?
81
+ options[:log_level] = opts['log_level'].to_sym unless opts['log_level'].nil?
94
82
 
95
- unless opts['timeframe'].nil?
96
- options[:cache] ||= {}
97
- options[:cache][:timeframe] = opts['timeframe'] unless opts['timeframe'].nil?
98
- end
83
+ options[:typhoeus] = HTMLProofer::Configuration.parse_json_option('typhoeus', opts['typhoeus'], symbolize_names: false) unless opts['typhoeus'].nil?
84
+ options[:hydra] = HTMLProofer::Configuration.parse_json_option('hydra', opts['hydra']) unless opts['hydra'].nil?
85
+ options[:parallel] = HTMLProofer::Configuration.parse_json_option('parallel', opts['parallel']) unless opts['parallel'].nil?
86
+ options[:cache] = HTMLProofer::Configuration.parse_json_option('cache', opts['cache']) unless opts['cache'].nil?
99
87
 
100
- unless opts['storage_dir'].nil?
101
- options[:cache] ||= {}
102
- options[:cache][:storage_dir] = opts['storage_dir'] unless opts['storage_dir'].nil?
103
- end
88
+ options[:swap_attributes] = HTMLProofer::Configuration.parse_json_option('swap_attributes', opts['swap_attributes'], symbolize_names: false) unless opts['swap_attributes'].nil?
104
89
 
105
- options[:http_status_ignore] = Array(options[:http_status_ignore]).map(&:to_i)
90
+ options[:ignore_status_codes] = Array(options[:ignore_status_codes]).map(&:to_i)
106
91
 
107
92
  paths = path.split(',')
108
93
  if opts['as_links']
109
- links = path.delete(' ').split(',')
94
+ links = path.split(',').map(&:strip)
110
95
  HTMLProofer.check_links(links, options).run
111
96
  elsif File.directory?(paths.first)
112
97
  HTMLProofer.check_directories(paths, options).run
data/lib/html-proofer.rb CHANGED
@@ -1,56 +1,3 @@
1
- # rubocop:disable Naming/FileName
2
1
  # frozen_string_literal: true
3
2
 
4
- def require_all(path)
5
- dir = File.join(File.dirname(__FILE__), path)
6
- Dir[File.join(dir, '*.rb')].sort.each do |f|
7
- require f
8
- end
9
- end
10
-
11
- require_relative 'html-proofer/utils'
12
- require_all 'html-proofer'
13
- require_all 'html-proofer/check'
14
-
15
- require 'parallel'
16
- require 'fileutils'
17
-
18
- begin
19
- require 'awesome_print'
20
- require 'pry-byebug'
21
- rescue LoadError; end # rubocop:disable Lint/SuppressedException
22
- module HTMLProofer
23
- def self.check_file(file, options = {})
24
- raise ArgumentError unless file.is_a?(String)
25
- raise ArgumentError, "#{file} does not exist" unless File.exist?(file)
26
-
27
- options[:type] = :file
28
- HTMLProofer::Runner.new(file, options)
29
- end
30
-
31
- def self.check_directory(directory, options = {})
32
- raise ArgumentError unless directory.is_a?(String)
33
- raise ArgumentError, "#{directory} does not exist" unless Dir.exist?(directory)
34
-
35
- options[:type] = :directory
36
- HTMLProofer::Runner.new([directory], options)
37
- end
38
-
39
- def self.check_directories(directories, options = {})
40
- raise ArgumentError unless directories.is_a?(Array)
41
-
42
- options[:type] = :directory
43
- directories.each do |directory|
44
- raise ArgumentError, "#{directory} does not exist" unless Dir.exist?(directory)
45
- end
46
- HTMLProofer::Runner.new(directories, options)
47
- end
48
-
49
- def self.check_links(links, options = {})
50
- raise ArgumentError unless links.is_a?(Array)
51
-
52
- options[:type] = :links
53
- HTMLProofer::Runner.new(links, options)
54
- end
55
- end
56
- # rubocop:enable Naming/FileName
3
+ require_relative "html_proofer"
@@ -0,0 +1,251 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HTMLProofer
4
+ class Attribute
5
+ class Url < HTMLProofer::Attribute
6
+ attr_reader :url, :size
7
+
8
+ REMOTE_SCHEMES = ["http", "https"].freeze
9
+
10
+ def initialize(runner, link_attribute, base_url: nil, extract_size: false)
11
+ super
12
+
13
+ if @raw_attribute.nil?
14
+ @url = nil
15
+ else
16
+ @url = @raw_attribute.delete("\u200b").strip
17
+ @url, @size = @url.split(/\s+/) if extract_size
18
+ @url = Addressable::URI.join(base_url, @url).to_s unless blank?(base_url)
19
+ @url = "" if @url.nil?
20
+
21
+ swap_urls!
22
+ clean_url!
23
+ end
24
+ end
25
+
26
+ def protocol_relative?
27
+ url.start_with?("//")
28
+ end
29
+
30
+ def to_s
31
+ @url
32
+ end
33
+
34
+ def known_extension?
35
+ return true if hash_link?
36
+ return true if path.end_with?("/")
37
+
38
+ ext = File.extname(path)
39
+
40
+ # no extension means we use the assumed one
41
+ return @runner.options[:extensions].include?(@runner.options[:assume_extension]) if blank?(ext)
42
+
43
+ @runner.options[:extensions].include?(ext)
44
+ end
45
+
46
+ def unknown_extension?
47
+ !known_extension?
48
+ end
49
+
50
+ def ignore?
51
+ return true if /^javascript:/.match?(@url)
52
+ return true if ignores_pattern?(@runner.options[:ignore_urls])
53
+ end
54
+
55
+ def valid?
56
+ !parts.nil?
57
+ end
58
+
59
+ def path?
60
+ !parts.host.nil? && !parts.path.nil?
61
+ end
62
+
63
+ def parts
64
+ @parts ||= Addressable::URI.parse(@url)
65
+ rescue URI::Error, Addressable::URI::InvalidURIError
66
+ @parts = nil
67
+ end
68
+
69
+ def path
70
+ Addressable::URI.unencode(parts.path) unless parts.nil?
71
+ end
72
+
73
+ def hash
74
+ parts&.fragment
75
+ end
76
+
77
+ # Does the URL have a hash?
78
+ def hash?
79
+ !blank?(hash)
80
+ end
81
+
82
+ def scheme
83
+ parts&.scheme
84
+ end
85
+
86
+ def remote?
87
+ REMOTE_SCHEMES.include?(scheme)
88
+ end
89
+
90
+ def http?
91
+ scheme == "http"
92
+ end
93
+
94
+ def https?
95
+ scheme == "https"
96
+ end
97
+
98
+ def non_http_remote?
99
+ !scheme.nil? && !remote?
100
+ end
101
+
102
+ def host
103
+ parts&.host
104
+ end
105
+
106
+ def domain_path
107
+ (host || "") + path
108
+ end
109
+
110
+ def query_values
111
+ parts&.query_values
112
+ end
113
+
114
+ # checks if a file exists relative to the current pwd
115
+ def exists?
116
+ return true if base64?
117
+
118
+ return @runner.checked_paths[absolute_path] if @runner.checked_paths.key?(absolute_path)
119
+
120
+ @runner.checked_paths[absolute_path] = File.exist?(absolute_path)
121
+ end
122
+
123
+ def base64?
124
+ /^data:image/.match?(@raw_attribute)
125
+ end
126
+
127
+ def absolute_path
128
+ path = file_path || @runner.current_filename
129
+
130
+ File.expand_path(path, Dir.pwd)
131
+ end
132
+
133
+ def file_path
134
+ return if path.nil? || path.empty?
135
+
136
+ path_dot_ext = ""
137
+
138
+ path_dot_ext = path + @runner.options[:assume_extension] unless blank?(@runner.options[:assume_extension])
139
+
140
+ base = if absolute_path?(path) # path relative to root
141
+ # either overwrite with root_dir; or, if source is directory, use that; or, just get the current file's dirname
142
+ @runner.options[:root_dir] || (File.directory?(@runner.current_source) ? @runner.current_source : File.dirname(@runner.current_source))
143
+ # relative links, path is a file
144
+ elsif File.exist?(File.expand_path(path,
145
+ @runner.current_source)) || File.exist?(File.expand_path(path_dot_ext, @runner.current_source))
146
+ File.dirname(@runner.current_filename)
147
+ # relative links in nested dir, path is a file
148
+ elsif File.exist?(File.join(File.dirname(@runner.current_filename),
149
+ path)) || File.exist?(File.join(File.dirname(@runner.current_filename), path_dot_ext))
150
+ File.dirname(@runner.current_filename)
151
+ # relative link, path is a directory
152
+ else
153
+ @runner.current_filename
154
+ end
155
+
156
+ file = File.join(base, path)
157
+
158
+ if @runner.options[:assume_extension] && File.file?("#{file}#{@runner.options[:assume_extension]}")
159
+ file = "#{file}#{@runner.options[:assume_extension]}"
160
+ elsif File.directory?(file) && !unslashed_directory?(file) # implicit index support
161
+ file = File.join(file, @runner.options[:directory_index_file])
162
+ end
163
+
164
+ file
165
+ end
166
+
167
+ def unslashed_directory?(file)
168
+ return false unless File.directory?(file)
169
+
170
+ !file.end_with?(File::SEPARATOR) && !follow_location?
171
+ end
172
+
173
+ def follow_location?
174
+ @runner.options[:typhoeus] && @runner.options[:typhoeus][:followlocation]
175
+ end
176
+
177
+ def absolute_path?(path)
178
+ path.start_with?("/")
179
+ end
180
+
181
+ # path is external to the file
182
+ def external?
183
+ !internal?
184
+ end
185
+
186
+ def internal?
187
+ relative_link? || internal_absolute_link? || hash_link?
188
+ end
189
+
190
+ def internal_absolute_link?
191
+ url.start_with?("/")
192
+ end
193
+
194
+ def relative_link?
195
+ return false if remote?
196
+
197
+ hash_link? || param_link? || url.start_with?(".") || url =~ /^\S/
198
+ end
199
+
200
+ def link_points_to_same_page?
201
+ hash_link || param_link
202
+ end
203
+
204
+ def hash_link?
205
+ url.start_with?("#")
206
+ end
207
+
208
+ def has_hash?
209
+ url.include?("#")
210
+ end
211
+
212
+ def param_link?
213
+ url.start_with?("?")
214
+ end
215
+
216
+ def sans_hash
217
+ @url.to_s.sub(/##{hash}/, "")
218
+ end
219
+
220
+ # catch any obvious issues, like strings in port numbers
221
+ private def clean_url!
222
+ return if @url =~ /^([!#{Regexp.last_match(0)}-;=?-\[\]_a-z~]|%[0-9a-fA-F]{2})+$/
223
+
224
+ @url = Addressable::URI.parse(@url).normalize.to_s
225
+ end
226
+
227
+ private def swap_urls!
228
+ return @url if blank?(replacements = @runner.options[:swap_urls])
229
+
230
+ replacements.each do |link, replace|
231
+ @url = @url.gsub(link, replace)
232
+ end
233
+ end
234
+
235
+ private def ignores_pattern?(links_to_ignore)
236
+ return false unless links_to_ignore.is_a?(Array)
237
+
238
+ links_to_ignore.each do |link_to_ignore|
239
+ case link_to_ignore
240
+ when String
241
+ return true if link_to_ignore == @raw_attribute
242
+ when Regexp
243
+ return true if link_to_ignore&.match?(@raw_attribute)
244
+ end
245
+ end
246
+
247
+ false
248
+ end
249
+ end
250
+ end
251
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HTMLProofer
4
+ # Represents an element currently being processed
5
+ class Attribute
6
+ include HTMLProofer::Utils
7
+
8
+ attr_reader :raw_attribute
9
+
10
+ def initialize(runner, raw_attribute, **_)
11
+ @runner = runner
12
+ @raw_attribute = raw_attribute
13
+ end
14
+ end
15
+ end