html-proofer 3.19.4 → 4.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/bin/htmlproofer +44 -59
  3. data/lib/html-proofer.rb +1 -54
  4. data/lib/html_proofer/attribute/url.rb +251 -0
  5. data/lib/html_proofer/attribute.rb +15 -0
  6. data/lib/html_proofer/cache.rb +292 -0
  7. data/lib/html_proofer/check/favicon.rb +43 -0
  8. data/lib/html_proofer/check/images.rb +99 -0
  9. data/lib/html_proofer/check/links.rb +135 -0
  10. data/lib/html_proofer/check/open_graph.rb +42 -0
  11. data/lib/html_proofer/check/scripts.rb +49 -0
  12. data/lib/html_proofer/check.rb +94 -0
  13. data/lib/html_proofer/configuration.rb +91 -0
  14. data/lib/html_proofer/element.rb +144 -0
  15. data/lib/html_proofer/failure.rb +17 -0
  16. data/lib/{html-proofer → html_proofer}/log.rb +19 -19
  17. data/lib/html_proofer/reporter/cli.rb +33 -0
  18. data/lib/html_proofer/reporter.rb +23 -0
  19. data/lib/html_proofer/runner.rb +244 -0
  20. data/lib/html_proofer/url_validator/external.rb +193 -0
  21. data/lib/html_proofer/url_validator/internal.rb +97 -0
  22. data/lib/html_proofer/url_validator.rb +16 -0
  23. data/lib/{html-proofer → html_proofer}/utils.rb +9 -12
  24. data/lib/{html-proofer → html_proofer}/version.rb +1 -1
  25. data/lib/html_proofer/xpath_functions.rb +10 -0
  26. data/lib/html_proofer.rb +59 -0
  27. metadata +42 -22
  28. data/lib/html-proofer/cache.rb +0 -194
  29. data/lib/html-proofer/check/favicon.rb +0 -29
  30. data/lib/html-proofer/check/html.rb +0 -37
  31. data/lib/html-proofer/check/images.rb +0 -48
  32. data/lib/html-proofer/check/links.rb +0 -182
  33. data/lib/html-proofer/check/opengraph.rb +0 -46
  34. data/lib/html-proofer/check/scripts.rb +0 -42
  35. data/lib/html-proofer/check.rb +0 -75
  36. data/lib/html-proofer/configuration.rb +0 -88
  37. data/lib/html-proofer/element.rb +0 -265
  38. data/lib/html-proofer/issue.rb +0 -65
  39. data/lib/html-proofer/middleware.rb +0 -82
  40. data/lib/html-proofer/runner.rb +0 -249
  41. data/lib/html-proofer/url_validator.rb +0 -237
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f27b5c50ae5c1c77d5fbe36dbbdca327bcb96302912b726f7f955f643d1dfc48
4
- data.tar.gz: f09405cd0c70f1d2dc98f904c388bcab594f79107fdbe441c63f934821bef1b0
3
+ metadata.gz: 98bde6dd5e32f42e5983fd1aaa7bf02d3359b40f2b4a8b5360a5826abf2cc674
4
+ data.tar.gz: 9b7d57fd18e625ab7cc91c46faceac510b05ba5290f665fb5dd333b92446b00b
5
5
  SHA512:
6
- metadata.gz: 53a8c98438f2056e7e2d926e926e10a6d0aa840b1b6f790860631912a2146dc20c68ca2b303d799a8fbfa723476e0e95dd5bc89695ceddf09ecede6f9acafbd1
7
- data.tar.gz: f68269ba70facf5ede07452d1029f49d17baadff8c6b4fd1d9de520c0ede91ff360bacb0cf46b5c719c0ae35c50ad61c6ce5b36171867f6a1c9d8c675d805ebc
6
+ metadata.gz: a666be806bbb70028488b3ef89f1325d7b5faaec9f48e105220db2d2f7dd022c12dabac828949ffd2100bdc2b4e3bd4c1f9ce9df48bab17b9106d9453eb49f01
7
+ data.tar.gz: 8a58bb5b120ab3cf3eb0e692f3f52f4385bd5923d3a7c91e95dddd17d8b0e25d20dd207872099665ac37737afb6b4ed86d04d39bc7793c6af2abcd35390d2051
data/bin/htmlproofer CHANGED
@@ -15,44 +15,34 @@ Mercenary.program(:htmlproofer) do |p|
15
15
 
16
16
  p.description 'Runs the HTML-Proofer suite on the files in PATH. For more details, see the README.'
17
17
 
18
- p.option 'allow_missing_href', '--allow-missing-href', 'If `true`, does not flag `a` tags missing `href` (this is the default for HTML5).'
19
- p.option 'allow_hash_href', '--allow-hash-href', 'If `true`, ignores the `href="#"`'
18
+ p.option 'allow_hash_href', '--allow-hash-href=<true|false>', 'String', 'If `true`, assumes `href="#"` anchors are valid (default: `true`)'
19
+ p.option 'allow_missing_href', '--allow-missing-href=<true|false>', 'String', 'If `true`, does not flag `a` tags missing `href`. In HTML5, this is technically allowed, but could also be human error. (default: `false`)'
20
20
  p.option 'as_links', '--as-links', 'Assumes that `PATH` is a comma-separated array of links to check.'
21
- p.option 'alt_ignore', '--alt-ignore image1,[image2,...]', Array, 'A comma-separated list of Strings or RegExps containing `img`s whose missing `alt` tags are safe to ignore'
22
- p.option 'assume_extension', '--assume-extension', 'Automatically add extension (e.g. `.html`) to file paths, to allow extensionless URLs (as supported by Jekyll 3 and GitHub Pages) (default: `false`).'
23
- p.option 'checks_to_ignore', '--checks-to-ignore check1,[check2,...]', Array, 'A comma-separated list of Strings indicating which checks you do not want to run (default: `[]`)'
24
- p.option 'check_external_hash', '--check-external-hash', 'Checks whether external hashes exist (even if the webpage exists). This slows the checker down (default: `false`).'
25
- p.option 'check_favicon', '--check-favicon', 'Enables the favicon checker (default: `false`).'
26
- p.option 'check_html', '--check-html', 'Enables HTML validation errors from Nokogumbo (default: `false`).'
27
- p.option 'check_img_http', '--check-img-http', 'Fails an image if it\'s marked as `http` (default: `false`).'
28
- p.option 'check_opengraph', '--check-opengraph', 'Enables the Open Graph checker (default: `false`).'
29
- p.option 'check_sri', '--check-sri', 'Check that `<link>` and `<script>` external resources use SRI (default: `false`).'
21
+ p.option 'assume_extension', '--assume-extension <ext>', 'Automatically add specified extension to files for internal links, to allow extensionless URLs (as supported by most servers) (default: `.html`).'
22
+ p.option 'checks', '--checks check1,[check2,...]', Array, 'A comma-separated list of Strings indicating which checks you want to run (default: `["Links", "Images", "Scripts"]`)'
23
+ p.option 'check_external_hash', '--check-external-hash=<true|false>', 'String', 'Checks whether external hashes exist (even if the webpage exists) (default: `true`).'
24
+ p.option 'check_internal_hash', '--check-internal-hash=<true|false>', 'String', 'Checks whether internal hashes exist (even if the webpage exists) (default: `true`).'
25
+ p.option 'check_sri', '--check-sri=<true|false>', 'String', 'Check that `<link>` and `<script>` external resources use SRI (default: `false`).'
30
26
  p.option 'directory_index_file', '--directory-index-file <filename>', String, 'Sets the file to look for when a link refers to a directory. (default: `index.html`)'
31
- p.option 'disable_external', '--disable-external', 'If `true`, does not run the external link checker, which can take a lot of time (default: `false`)'
32
- p.option 'empty_alt_ignore', '--empty-alt-ignore', 'If `true`, ignores images with empty alt tags'
33
- p.option 'error_sort', '--error-sort <sort>', String, 'Defines the sort order for error output. Can be `:path`, `:desc`, or `:status` (default: `:path`).'
34
- p.option 'enforce_https', '--enforce-https', 'Fails a link if it\'s not marked as `https` (default: `false`).'
35
- p.option 'extension', '--extension <ext>', String, 'The extension of your HTML files including the dot. (default: `.html`)'
36
- p.option 'external_only', '--external_only', 'Only checks problems with external references'
37
- p.option 'file_ignore', '--file-ignore file1,[file2,...]', Array, 'A comma-separated list of Strings or RegExps containing file paths that are safe to ignore'
38
- p.option 'http_status_ignore', '--http-status-ignore 123,[xxx, ...]', Array, 'A comma-separated list of numbers representing status codes to ignore.'
39
- p.option 'internal_domains', '--internal-domains domain1,[domain2,...]', Array, 'A comma-separated list of Strings containing domains that will be treated as internal urls.'
40
- p.option 'ignore_empty_mailto', '--ignore-empty-mailto', 'If `true`, allows `mailto:` `href`s which do not contain an email address'
41
- p.option 'report_invalid_tags', '--report-invalid-tags', 'When `check_html` is enabled, HTML markup that is unknown to Nokogumbo are reported as errors (default: `false`)'
42
- p.option 'report_missing_names', '--report-missing-names', 'When `check_html` is enabled, HTML markup that are missing entity names are reported as errors (default: `false`)'
43
- p.option 'report_script_embeds', '--report-script-embeds', 'When `check_html` is enabled, `script` tags containing markup are reported as errors (default: `false`)'
44
- p.option 'report_missing_doctype', '--report-missing-doctype', 'When `check_html` is enabled, HTML markup with missing or out-of-order `DOCTYPE` are reported as errors (default: `false`)'
45
- p.option 'report_eof_tags', '--report-eof-tags', 'When `check_html` is enabled, HTML markup with tags that are malformed are reported as errors (default: `false`)'
46
- p.option 'report_mismatched_tags', '--report-mismatched-tags', 'When `check_html` is enabled, HTML markup with mismatched tags are reported as errors (default: `false`)'
27
+ p.option 'disable_external', '--disable-external=<true|false>', String, 'If `true`, does not run the external link checker (default: `false`)'
28
+ p.option 'enforce_https', '--enforce-https=<true|false>', String, 'Fails a link if it\'s not marked as `https` (default: `true`).'
29
+ p.option 'extensions', '--extensions ext1,[ext2,...[', Array, 'A comma-separated list of Strings indicating the file extensions you would like to check (including the dot) (default: `.html`)'
30
+ p.option 'ignore_empty_alt', '--ignore-empty-alt=<true|false>', 'String', 'If `true`, ignores images with empty/missing alt tags (in other words, `<img alt>` and `<img alt="">` are valid; set this to `false` to flag those) (default: `true`)'
31
+ p.option 'ignore_empty_mailto', '--ignore-empty-mailto=<true|false>', 'String', 'If `true`, allows `mailto:` `href`s which do not contain an email address (default: `false`)'
32
+ p.option 'ignore_files', '--ignore-files file1,[file2,...]', Array, 'A comma-separated list of Strings or RegExps containing file paths that are safe to ignore'
33
+ p.option 'ignore_missing_alt', '--ignore-missing-alt=<true|false>', 'String', 'If `true`, ignores images with missing alt tags (default: `false`)'
34
+ p.option 'ignore_status_codes', '--ignore-status-codes 123,[xxx, ...]', Array, 'A comma-separated list of numbers representing status codes to ignore.'
35
+ p.option 'ignore_urls', '--ignore-urls link1,[link2,...]', Array, 'A comma-separated list of Strings or RegExps containing URLs that are safe to ignore. This affects all HTML attributes, such as `alt` tags on images.'
47
36
  p.option 'log_level', '--log-level <level>', String, 'Sets the logging level, as determined by Yell. One of `:debug`, `:info`, `:warn`, `:error`, or `:fatal`. (default: `:info`)'
48
37
  p.option 'only_4xx', '--only-4xx', 'Only reports errors for links that fall within the 4xx status code range'
49
- p.option 'storage_dir', '--storage-dir PATH', String, 'Directory where to store the cache log (default: "tmp/.htmlproofer")'
50
- p.option 'timeframe', '--timeframe <time>', String, 'A string representing the caching timeframe.'
51
- p.option 'typhoeus_config', '--typhoeus-config CONFIG', String, 'JSON-formatted string of Typhoeus config. Will override the html-proofer defaults.'
52
- p.option 'hydra_config', '--hydra-config CONFIG', String, 'JSON-formatted string of Hydra config. Will override the html-proofer defaults.'
53
- p.option 'url_ignore', '--url-ignore link1,[link2,...]', Array, 'A comma-separated list of Strings or RegExps containing URLs that are safe to ignore. It affects all HTML attributes. Note that non-HTTP(S) URIs are always ignored'
54
- p.option 'url_swap', '--url-swap re:string,[re:string,...]', Array, 'A comma-separated list containing key-value pairs of `RegExp => String`. It transforms URLs that match `RegExp` into `String` via `gsub`. The escape sequences `\\:` should be used to produce literal `:`s.'
55
38
  p.option 'root_dir', '--root-dir PATH', String, 'The absolute path to the directory serving your html-files.'
39
+ p.option 'swap_attributes', '--swap-attributes CONFIG', String, 'JSON-formatted config that maps element names to the preferred attribute to check (default: `{}`).'
40
+ p.option 'swap_urls', '--swap-urls re:string,[re:string,...]', Array, 'A comma-separated list containing key-value pairs of `RegExp => String`. It transforms URLs that match `RegExp` into `String` via `gsub`. The escape sequences `\\:` should be used to produce literal `:`s.'
41
+
42
+ p.option 'typhoeus', '--typhoeus CONFIG', String, 'JSON-formatted string of Typhoeus config. Will override the html-proofer defaults.'
43
+ p.option 'hydra', '--hydra CONFIG', String, 'JSON-formatted string of Hydra config. Will override the html-proofer defaults.'
44
+ p.option 'parallel', '--parallel CONFIG', String, 'JSON-formatted string of Parallel config. Will override the html-proofer defaults.'
45
+ p.option 'cache', '--cache CONFIG', String, 'JSON-formatted string of cache config. Will override the html-proofer defaults.'
56
46
 
57
47
  p.action do |args, opts|
58
48
  args = ['.'] if args.empty?
@@ -67,46 +57,41 @@ Mercenary.program(:htmlproofer) do |p|
67
57
  end
68
58
 
69
59
  # some minor manipulation of a special option
70
- unless opts['url_swap'].nil?
71
- options[:url_swap] = {}
72
- opts['url_swap'].each do |s|
60
+ unless opts['swap_urls'].nil?
61
+ options[:swap_urls] = {}
62
+ opts['swap_urls'].each do |s|
73
63
  splt = s.split(/(?<!\\):/, 2)
74
64
 
75
65
  re = splt[0].gsub(/\\:/, ':')
76
66
  string = splt[1].gsub(/\\:/, ':')
77
- options[:url_swap][Regexp.new(re)] = string
67
+ options[:swap_urls][Regexp.new(re)] = string
78
68
  end
79
69
  end
80
70
 
81
- options[:error_sort] = opts['error-sort'].to_sym unless opts['error-sort'].nil?
82
- options[:log_level] = opts['log_level'].to_sym unless opts['log_level'].nil?
83
-
84
- options[:validation] = HTMLProofer::Configuration::VALIDATION_DEFAULTS.dup
85
- options[:validation][:report_script_embeds] = opts['report_script_embeds'] unless opts['report_script_embeds'].nil?
86
- options[:validation][:report_missing_names] = opts['report_missing_names'] unless opts['report_missing_names'].nil?
87
- options[:validation][:report_invalid_tags] = opts['report_invalid_tags'] unless opts['report_invalid_tags'].nil?
88
- options[:validation][:report_missing_doctype] = opts['report_missing_doctype'] unless opts['report_missing_doctype'].nil?
89
- options[:validation][:report_eof_tags] = opts['report_eof_tags'] unless opts['report_eof_tags'].nil?
90
- options[:validation][:report_mismatched_tags] = opts['report_mismatched_tags'] unless opts['report_mismatched_tags'].nil?
71
+ # check booleans
72
+ [:allow_hash_href, :allow_missing_href, :check_external_hash, :check_internal_hash, :check_sri, :disable_external, :enforce_https, :ignore_empty_alt, :ignore_empty_mailto, :ignore_missing_alt].each do |option|
73
+ next if (val = opts[option.to_s]).nil?
74
+ if val == "false"
75
+ options[option] = false
76
+ else
77
+ options[option] = true
78
+ end
79
+ end
91
80
 
92
- options[:typhoeus] = HTMLProofer::Configuration.parse_json_option('typhoeus_config', opts['typhoeus_config'], symbolize_names: false) unless opts['typhoeus_config'].nil?
93
- options[:hydra] = HTMLProofer::Configuration.parse_json_option('hydra_config', opts['hydra_config']) unless opts['hydra_config'].nil?
81
+ options[:log_level] = opts['log_level'].to_sym unless opts['log_level'].nil?
94
82
 
95
- unless opts['timeframe'].nil?
96
- options[:cache] ||= {}
97
- options[:cache][:timeframe] = opts['timeframe'] unless opts['timeframe'].nil?
98
- end
83
+ options[:typhoeus] = HTMLProofer::Configuration.parse_json_option('typhoeus', opts['typhoeus'], symbolize_names: false) unless opts['typhoeus'].nil?
84
+ options[:hydra] = HTMLProofer::Configuration.parse_json_option('hydra', opts['hydra']) unless opts['hydra'].nil?
85
+ options[:parallel] = HTMLProofer::Configuration.parse_json_option('parallel', opts['parallel']) unless opts['parallel'].nil?
86
+ options[:cache] = HTMLProofer::Configuration.parse_json_option('cache', opts['cache']) unless opts['cache'].nil?
99
87
 
100
- unless opts['storage_dir'].nil?
101
- options[:cache] ||= {}
102
- options[:cache][:storage_dir] = opts['storage_dir'] unless opts['storage_dir'].nil?
103
- end
88
+ options[:swap_attributes] = HTMLProofer::Configuration.parse_json_option('swap_attributes', opts['swap_attributes'], symbolize_names: false) unless opts['swap_attributes'].nil?
104
89
 
105
- options[:http_status_ignore] = Array(options[:http_status_ignore]).map(&:to_i)
90
+ options[:ignore_status_codes] = Array(options[:ignore_status_codes]).map(&:to_i)
106
91
 
107
92
  paths = path.split(',')
108
93
  if opts['as_links']
109
- links = path.delete(' ').split(',')
94
+ links = path.split(',').map(&:strip)
110
95
  HTMLProofer.check_links(links, options).run
111
96
  elsif File.directory?(paths.first)
112
97
  HTMLProofer.check_directories(paths, options).run
data/lib/html-proofer.rb CHANGED
@@ -1,56 +1,3 @@
1
- # rubocop:disable Naming/FileName
2
1
  # frozen_string_literal: true
3
2
 
4
- def require_all(path)
5
- dir = File.join(File.dirname(__FILE__), path)
6
- Dir[File.join(dir, '*.rb')].sort.each do |f|
7
- require f
8
- end
9
- end
10
-
11
- require_relative 'html-proofer/utils'
12
- require_all 'html-proofer'
13
- require_all 'html-proofer/check'
14
-
15
- require 'parallel'
16
- require 'fileutils'
17
-
18
- begin
19
- require 'awesome_print'
20
- require 'pry-byebug'
21
- rescue LoadError; end # rubocop:disable Lint/SuppressedException
22
- module HTMLProofer
23
- def self.check_file(file, options = {})
24
- raise ArgumentError unless file.is_a?(String)
25
- raise ArgumentError, "#{file} does not exist" unless File.exist?(file)
26
-
27
- options[:type] = :file
28
- HTMLProofer::Runner.new(file, options)
29
- end
30
-
31
- def self.check_directory(directory, options = {})
32
- raise ArgumentError unless directory.is_a?(String)
33
- raise ArgumentError, "#{directory} does not exist" unless Dir.exist?(directory)
34
-
35
- options[:type] = :directory
36
- HTMLProofer::Runner.new([directory], options)
37
- end
38
-
39
- def self.check_directories(directories, options = {})
40
- raise ArgumentError unless directories.is_a?(Array)
41
-
42
- options[:type] = :directory
43
- directories.each do |directory|
44
- raise ArgumentError, "#{directory} does not exist" unless Dir.exist?(directory)
45
- end
46
- HTMLProofer::Runner.new(directories, options)
47
- end
48
-
49
- def self.check_links(links, options = {})
50
- raise ArgumentError unless links.is_a?(Array)
51
-
52
- options[:type] = :links
53
- HTMLProofer::Runner.new(links, options)
54
- end
55
- end
56
- # rubocop:enable Naming/FileName
3
+ require_relative "html_proofer"
@@ -0,0 +1,251 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HTMLProofer
4
+ class Attribute
5
+ class Url < HTMLProofer::Attribute
6
+ attr_reader :url, :size
7
+
8
+ REMOTE_SCHEMES = ["http", "https"].freeze
9
+
10
+ def initialize(runner, link_attribute, base_url: nil, extract_size: false)
11
+ super
12
+
13
+ if @raw_attribute.nil?
14
+ @url = nil
15
+ else
16
+ @url = @raw_attribute.delete("\u200b").strip
17
+ @url, @size = @url.split(/\s+/) if extract_size
18
+ @url = Addressable::URI.join(base_url, @url).to_s unless blank?(base_url)
19
+ @url = "" if @url.nil?
20
+
21
+ swap_urls!
22
+ clean_url!
23
+ end
24
+ end
25
+
26
+ def protocol_relative?
27
+ url.start_with?("//")
28
+ end
29
+
30
+ def to_s
31
+ @url
32
+ end
33
+
34
+ def known_extension?
35
+ return true if hash_link?
36
+ return true if path.end_with?("/")
37
+
38
+ ext = File.extname(path)
39
+
40
+ # no extension means we use the assumed one
41
+ return @runner.options[:extensions].include?(@runner.options[:assume_extension]) if blank?(ext)
42
+
43
+ @runner.options[:extensions].include?(ext)
44
+ end
45
+
46
+ def unknown_extension?
47
+ !known_extension?
48
+ end
49
+
50
+ def ignore?
51
+ return true if /^javascript:/.match?(@url)
52
+ return true if ignores_pattern?(@runner.options[:ignore_urls])
53
+ end
54
+
55
+ def valid?
56
+ !parts.nil?
57
+ end
58
+
59
+ def path?
60
+ !parts.host.nil? && !parts.path.nil?
61
+ end
62
+
63
+ def parts
64
+ @parts ||= Addressable::URI.parse(@url)
65
+ rescue URI::Error, Addressable::URI::InvalidURIError
66
+ @parts = nil
67
+ end
68
+
69
+ def path
70
+ Addressable::URI.unencode(parts.path) unless parts.nil?
71
+ end
72
+
73
+ def hash
74
+ parts&.fragment
75
+ end
76
+
77
+ # Does the URL have a hash?
78
+ def hash?
79
+ !blank?(hash)
80
+ end
81
+
82
+ def scheme
83
+ parts&.scheme
84
+ end
85
+
86
+ def remote?
87
+ REMOTE_SCHEMES.include?(scheme)
88
+ end
89
+
90
+ def http?
91
+ scheme == "http"
92
+ end
93
+
94
+ def https?
95
+ scheme == "https"
96
+ end
97
+
98
+ def non_http_remote?
99
+ !scheme.nil? && !remote?
100
+ end
101
+
102
+ def host
103
+ parts&.host
104
+ end
105
+
106
+ def domain_path
107
+ (host || "") + path
108
+ end
109
+
110
+ def query_values
111
+ parts&.query_values
112
+ end
113
+
114
+ # checks if a file exists relative to the current pwd
115
+ def exists?
116
+ return true if base64?
117
+
118
+ return @runner.checked_paths[absolute_path] if @runner.checked_paths.key?(absolute_path)
119
+
120
+ @runner.checked_paths[absolute_path] = File.exist?(absolute_path)
121
+ end
122
+
123
+ def base64?
124
+ /^data:image/.match?(@raw_attribute)
125
+ end
126
+
127
+ def absolute_path
128
+ path = file_path || @runner.current_filename
129
+
130
+ File.expand_path(path, Dir.pwd)
131
+ end
132
+
133
+ def file_path
134
+ return if path.nil? || path.empty?
135
+
136
+ path_dot_ext = ""
137
+
138
+ path_dot_ext = path + @runner.options[:assume_extension] unless blank?(@runner.options[:assume_extension])
139
+
140
+ base = if absolute_path?(path) # path relative to root
141
+ # either overwrite with root_dir; or, if source is directory, use that; or, just get the current file's dirname
142
+ @runner.options[:root_dir] || (File.directory?(@runner.current_source) ? @runner.current_source : File.dirname(@runner.current_source))
143
+ # relative links, path is a file
144
+ elsif File.exist?(File.expand_path(path,
145
+ @runner.current_source)) || File.exist?(File.expand_path(path_dot_ext, @runner.current_source))
146
+ File.dirname(@runner.current_filename)
147
+ # relative links in nested dir, path is a file
148
+ elsif File.exist?(File.join(File.dirname(@runner.current_filename),
149
+ path)) || File.exist?(File.join(File.dirname(@runner.current_filename), path_dot_ext))
150
+ File.dirname(@runner.current_filename)
151
+ # relative link, path is a directory
152
+ else
153
+ @runner.current_filename
154
+ end
155
+
156
+ file = File.join(base, path)
157
+
158
+ if @runner.options[:assume_extension] && File.file?("#{file}#{@runner.options[:assume_extension]}")
159
+ file = "#{file}#{@runner.options[:assume_extension]}"
160
+ elsif File.directory?(file) && !unslashed_directory?(file) # implicit index support
161
+ file = File.join(file, @runner.options[:directory_index_file])
162
+ end
163
+
164
+ file
165
+ end
166
+
167
+ def unslashed_directory?(file)
168
+ return false unless File.directory?(file)
169
+
170
+ !file.end_with?(File::SEPARATOR) && !follow_location?
171
+ end
172
+
173
+ def follow_location?
174
+ @runner.options[:typhoeus] && @runner.options[:typhoeus][:followlocation]
175
+ end
176
+
177
+ def absolute_path?(path)
178
+ path.start_with?("/")
179
+ end
180
+
181
+ # path is external to the file
182
+ def external?
183
+ !internal?
184
+ end
185
+
186
+ def internal?
187
+ relative_link? || internal_absolute_link? || hash_link?
188
+ end
189
+
190
+ def internal_absolute_link?
191
+ url.start_with?("/")
192
+ end
193
+
194
+ def relative_link?
195
+ return false if remote?
196
+
197
+ hash_link? || param_link? || url.start_with?(".") || url =~ /^\S/
198
+ end
199
+
200
+ def link_points_to_same_page?
201
+ hash_link || param_link
202
+ end
203
+
204
+ def hash_link?
205
+ url.start_with?("#")
206
+ end
207
+
208
+ def has_hash?
209
+ url.include?("#")
210
+ end
211
+
212
+ def param_link?
213
+ url.start_with?("?")
214
+ end
215
+
216
+ def sans_hash
217
+ @url.to_s.sub(/##{hash}/, "")
218
+ end
219
+
220
+ # catch any obvious issues, like strings in port numbers
221
+ private def clean_url!
222
+ return if @url =~ /^([!#{Regexp.last_match(0)}-;=?-\[\]_a-z~]|%[0-9a-fA-F]{2})+$/
223
+
224
+ @url = Addressable::URI.parse(@url).normalize.to_s
225
+ end
226
+
227
+ private def swap_urls!
228
+ return @url if blank?(replacements = @runner.options[:swap_urls])
229
+
230
+ replacements.each do |link, replace|
231
+ @url = @url.gsub(link, replace)
232
+ end
233
+ end
234
+
235
+ private def ignores_pattern?(links_to_ignore)
236
+ return false unless links_to_ignore.is_a?(Array)
237
+
238
+ links_to_ignore.each do |link_to_ignore|
239
+ case link_to_ignore
240
+ when String
241
+ return true if link_to_ignore == @raw_attribute
242
+ when Regexp
243
+ return true if link_to_ignore&.match?(@raw_attribute)
244
+ end
245
+ end
246
+
247
+ false
248
+ end
249
+ end
250
+ end
251
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HTMLProofer
4
+ # Represents an element currently being processed
5
+ class Attribute
6
+ include HTMLProofer::Utils
7
+
8
+ attr_reader :raw_attribute
9
+
10
+ def initialize(runner, raw_attribute, **_)
11
+ @runner = runner
12
+ @raw_attribute = raw_attribute
13
+ end
14
+ end
15
+ end