site-inspector 1.0.2 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.gitignore +8 -0
- data/.rubocop.yml +42 -0
- data/.rubocop_todo.yml +139 -0
- data/.ruby-version +1 -0
- data/.travis.yml +9 -0
- data/Gemfile +7 -0
- data/Guardfile +10 -0
- data/README.md +189 -0
- data/Rakefile +10 -0
- data/bin/site-inspector +50 -22
- data/lib/cliver/dependency_ext.rb +24 -0
- data/lib/site-inspector.rb +62 -615
- data/lib/site-inspector/cache.rb +10 -51
- data/lib/site-inspector/checks/accessibility.rb +135 -0
- data/lib/site-inspector/checks/check.rb +54 -0
- data/lib/site-inspector/checks/content.rb +85 -0
- data/lib/site-inspector/checks/cookies.rb +45 -0
- data/lib/site-inspector/checks/dns.rb +138 -0
- data/lib/site-inspector/checks/headers.rb +68 -0
- data/lib/site-inspector/checks/hsts.rb +81 -0
- data/lib/site-inspector/checks/https.rb +40 -0
- data/lib/site-inspector/checks/sniffer.rb +67 -0
- data/lib/site-inspector/checks/wappalyzer.rb +62 -0
- data/lib/site-inspector/checks/whois.rb +36 -0
- data/lib/site-inspector/disk_cache.rb +42 -0
- data/lib/site-inspector/domain.rb +271 -0
- data/lib/site-inspector/endpoint.rb +217 -0
- data/lib/site-inspector/rails_cache.rb +13 -0
- data/lib/site-inspector/version.rb +5 -0
- data/package-lock.json +505 -0
- data/package.json +23 -0
- data/script/bootstrap +2 -0
- data/script/cibuild +11 -0
- data/script/console +3 -0
- data/script/pa11y-version +10 -0
- data/script/release +38 -0
- data/site-inspector.gemspec +42 -0
- data/spec/checks/site_inspector_endpoint_accessibility_spec.rb +84 -0
- data/spec/checks/site_inspector_endpoint_check_spec.rb +42 -0
- data/spec/checks/site_inspector_endpoint_content_spec.rb +117 -0
- data/spec/checks/site_inspector_endpoint_cookies_spec.rb +73 -0
- data/spec/checks/site_inspector_endpoint_dns_spec.rb +184 -0
- data/spec/checks/site_inspector_endpoint_headers_spec.rb +65 -0
- data/spec/checks/site_inspector_endpoint_hsts_spec.rb +92 -0
- data/spec/checks/site_inspector_endpoint_https_spec.rb +49 -0
- data/spec/checks/site_inspector_endpoint_sniffer_spec.rb +150 -0
- data/spec/checks/site_inspector_endpoint_wappalyzer_spec.rb +34 -0
- data/spec/checks/site_inspector_endpoint_whois_spec.rb +26 -0
- data/spec/fixtures/wappalyzer.json +125 -0
- data/spec/site_inspector_cache_spec.rb +15 -0
- data/spec/site_inspector_disk_cache_spec.rb +39 -0
- data/spec/site_inspector_domain_spec.rb +271 -0
- data/spec/site_inspector_endpoint_spec.rb +252 -0
- data/spec/site_inspector_spec.rb +48 -0
- data/spec/spec_helper.rb +19 -0
- metadata +204 -63
- data/lib/site-inspector/compliance.rb +0 -19
- data/lib/site-inspector/dns.rb +0 -92
- data/lib/site-inspector/headers.rb +0 -59
- data/lib/site-inspector/sniffer.rb +0 -26
data/lib/site-inspector/cache.rb
CHANGED
@@ -1,58 +1,17 @@
|
|
1
|
-
|
2
|
-
def initialize
|
3
|
-
@memory = {}
|
4
|
-
end
|
5
|
-
|
6
|
-
def get(request)
|
7
|
-
@memory[request]
|
8
|
-
end
|
9
|
-
|
10
|
-
def set(request, response)
|
11
|
-
@memory[request] = response
|
12
|
-
end
|
13
|
-
end
|
14
|
-
|
15
|
-
class SiteInspectorDiskCache
|
16
|
-
def initialize(dir = nil, replace = false)
|
17
|
-
@dir = dir
|
18
|
-
@memory = {}
|
19
|
-
@replace = replace
|
20
|
-
end
|
1
|
+
# frozen_string_literal: true
|
21
2
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
def fetch(request)
|
27
|
-
if File.exist?(path(request))
|
28
|
-
|
29
|
-
if @replace
|
30
|
-
FileUtils.rm(path(request))
|
31
|
-
nil
|
32
|
-
else
|
33
|
-
contents = File.read(path(request))
|
34
|
-
begin
|
35
|
-
Marshal.load(contents)
|
36
|
-
rescue ArgumentError
|
37
|
-
FileUtils.rm(path(request))
|
38
|
-
nil
|
39
|
-
end
|
40
|
-
end
|
3
|
+
class SiteInspector
|
4
|
+
class Cache
|
5
|
+
def memory
|
6
|
+
@memory ||= {}
|
41
7
|
end
|
42
|
-
end
|
43
8
|
|
44
|
-
|
45
|
-
|
46
|
-
f.write Marshal.dump(response)
|
9
|
+
def get(request)
|
10
|
+
memory[request]
|
47
11
|
end
|
48
|
-
end
|
49
|
-
|
50
|
-
def get(request)
|
51
|
-
@memory[request] || fetch(request)
|
52
|
-
end
|
53
12
|
|
54
|
-
|
55
|
-
|
56
|
-
|
13
|
+
def set(request, response)
|
14
|
+
memory[request] = response
|
15
|
+
end
|
57
16
|
end
|
58
17
|
end
|
@@ -0,0 +1,135 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'json'
|
4
|
+
require 'open3'
|
5
|
+
|
6
|
+
class SiteInspector
|
7
|
+
class Endpoint
|
8
|
+
class Accessibility < Check
|
9
|
+
class Pa11yError < RuntimeError; end
|
10
|
+
|
11
|
+
STANDARDS = {
|
12
|
+
wcag2a: 'WCAG2A', # Default standard
|
13
|
+
wcag2aa: 'WCAG2AA',
|
14
|
+
wcag2aaa: 'WCAG2AAA',
|
15
|
+
section508: 'Section508'
|
16
|
+
}.freeze
|
17
|
+
|
18
|
+
DEFAULT_LEVEL = :error
|
19
|
+
|
20
|
+
REQUIRED_PA11Y_VERSION = '~> 5.0'
|
21
|
+
|
22
|
+
class << self
|
23
|
+
def pa11y_version
|
24
|
+
@pa11y_version ||= begin
|
25
|
+
output, status = run_command('--version')
|
26
|
+
output.strip if status.exitstatus.zero?
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def pa11y?
|
31
|
+
return @pa11y_detected if defined? @pa11y_detected
|
32
|
+
|
33
|
+
@pa11y_detected = !!pa11y.detect
|
34
|
+
end
|
35
|
+
|
36
|
+
def enabled?
|
37
|
+
@@enabled && pa11y?
|
38
|
+
end
|
39
|
+
|
40
|
+
def pa11y
|
41
|
+
@pa11y ||= begin
|
42
|
+
node_bin = File.expand_path('../../../node_modules/pa11y/bin', File.dirname(__FILE__))
|
43
|
+
path = ['*', node_bin].join(File::PATH_SEPARATOR)
|
44
|
+
Cliver::Dependency.new('pa11y.js', REQUIRED_PA11Y_VERSION, path: path)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def run_command(args)
|
49
|
+
Open3.capture2e(pa11y.detect, *args)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def level
|
54
|
+
@level ||= DEFAULT_LEVEL
|
55
|
+
end
|
56
|
+
|
57
|
+
def level=(level)
|
58
|
+
raise ArgumentError, "Invalid level '#{level}'" unless %i[error warning notice].include?(level)
|
59
|
+
|
60
|
+
@level = level
|
61
|
+
end
|
62
|
+
|
63
|
+
def standard?(standard)
|
64
|
+
STANDARDS.key?(standard)
|
65
|
+
end
|
66
|
+
|
67
|
+
def standard
|
68
|
+
@standard ||= STANDARDS.keys.first
|
69
|
+
end
|
70
|
+
|
71
|
+
def standard=(standard)
|
72
|
+
raise ArgumentError, "Unknown standard '#{standard}'" unless standard?(standard)
|
73
|
+
|
74
|
+
@standard = standard
|
75
|
+
end
|
76
|
+
|
77
|
+
def valid?
|
78
|
+
check[:valid] if check
|
79
|
+
end
|
80
|
+
|
81
|
+
def errors
|
82
|
+
check[:results].count { |r| r['type'] == 'error' } if check
|
83
|
+
end
|
84
|
+
|
85
|
+
def check
|
86
|
+
@check ||= run_pa11y(standard)
|
87
|
+
rescue Pa11yError
|
88
|
+
nil
|
89
|
+
end
|
90
|
+
alias to_h check
|
91
|
+
|
92
|
+
def method_missing(method_sym, *arguments, &block)
|
93
|
+
if standard?(method_sym)
|
94
|
+
run_pa11y(method_sym)
|
95
|
+
else
|
96
|
+
super
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def respond_to_missing?(method_sym, include_private = false)
|
101
|
+
if standard?(method_sym)
|
102
|
+
true
|
103
|
+
else
|
104
|
+
super
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
private
|
109
|
+
|
110
|
+
def run_pa11y(standard)
|
111
|
+
self.class.pa11y.detect! unless ENV['SKIP_PA11Y_CHECK']
|
112
|
+
raise ArgumentError, "Unknown standard '#{standard}'" unless standard?(standard)
|
113
|
+
|
114
|
+
args = [
|
115
|
+
'--standard', STANDARDS[standard],
|
116
|
+
'--reporter', 'json',
|
117
|
+
'--level', level.to_s,
|
118
|
+
endpoint.uri.to_s
|
119
|
+
]
|
120
|
+
output, status = self.class.run_command(args)
|
121
|
+
|
122
|
+
# Pa11y exit codes: https://github.com/nature/pa11y#exit-codes
|
123
|
+
# 0: No errors, 1: Technical error within pa11y, 2: accessibility error (configurable via --level)
|
124
|
+
raise Pa11yError if status.exitstatus == 1
|
125
|
+
|
126
|
+
{
|
127
|
+
valid: status.exitstatus.zero?,
|
128
|
+
results: JSON.parse(output)
|
129
|
+
}
|
130
|
+
rescue Pa11yError, JSON::ParserError
|
131
|
+
raise Pa11yError, "Command `pa11y #{args.join(' ')}` failed: #{output}"
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class SiteInspector
|
4
|
+
class Endpoint
|
5
|
+
class Check
|
6
|
+
attr_reader :endpoint
|
7
|
+
|
8
|
+
# A check is an abstract class that takes an Endpoint object
|
9
|
+
# and is extended to preform the specific site inspector checks
|
10
|
+
#
|
11
|
+
# It is automatically accessable within the endpoint object
|
12
|
+
# by virtue of extending the Check class
|
13
|
+
def initialize(endpoint)
|
14
|
+
@endpoint = endpoint
|
15
|
+
end
|
16
|
+
|
17
|
+
def response
|
18
|
+
endpoint.response
|
19
|
+
end
|
20
|
+
|
21
|
+
def request
|
22
|
+
response.request
|
23
|
+
end
|
24
|
+
|
25
|
+
def host
|
26
|
+
request.base_url.host
|
27
|
+
end
|
28
|
+
|
29
|
+
def inspect
|
30
|
+
"#<#{self.class} endpoint=\"#{response.effective_url}\">"
|
31
|
+
end
|
32
|
+
|
33
|
+
def name
|
34
|
+
self.class.name
|
35
|
+
end
|
36
|
+
|
37
|
+
class << self
|
38
|
+
@@enabled = true
|
39
|
+
|
40
|
+
def name
|
41
|
+
to_s.split('::').last.downcase.to_sym
|
42
|
+
end
|
43
|
+
|
44
|
+
def enabled?
|
45
|
+
!!@@enabled
|
46
|
+
end
|
47
|
+
|
48
|
+
def enabled=(value)
|
49
|
+
@@enabled = !!value
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class SiteInspector
|
4
|
+
class Endpoint
|
5
|
+
class Content < Check
|
6
|
+
# Given a path (e.g, "/data"), check if the given path exists on the canonical endpoint
|
7
|
+
def path_exists?(path)
|
8
|
+
endpoint.up? && endpoint.request(path: path, followlocation: true).success?
|
9
|
+
end
|
10
|
+
|
11
|
+
# The default Check#response method is from a HEAD request
|
12
|
+
# The content check has a special response which includes the body from a GET request
|
13
|
+
def response
|
14
|
+
@response ||= endpoint.request(method: :get)
|
15
|
+
end
|
16
|
+
|
17
|
+
def document
|
18
|
+
require 'nokogiri'
|
19
|
+
@doc ||= Nokogiri::HTML response.body if response
|
20
|
+
end
|
21
|
+
alias doc document
|
22
|
+
|
23
|
+
def body
|
24
|
+
@body ||= document.to_s.force_encoding('UTF-8').encode('UTF-8', invalid: :replace, replace: '')
|
25
|
+
end
|
26
|
+
|
27
|
+
def robots_txt?
|
28
|
+
@bodts_txt ||= path_exists?('robots.txt') if proper_404s?
|
29
|
+
end
|
30
|
+
|
31
|
+
def sitemap_xml?
|
32
|
+
@sitemap_xml ||= path_exists?('sitemap.xml') if proper_404s?
|
33
|
+
end
|
34
|
+
|
35
|
+
def humans_txt?
|
36
|
+
@humans_txt ||= path_exists?('humans.txt') if proper_404s?
|
37
|
+
end
|
38
|
+
|
39
|
+
def doctype
|
40
|
+
document.internal_subset.external_id
|
41
|
+
end
|
42
|
+
|
43
|
+
def generator
|
44
|
+
@generator ||= begin
|
45
|
+
tag = document.at('meta[name="generator"]')
|
46
|
+
tag['content'] if tag
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def prefetch
|
51
|
+
return unless endpoint.up?
|
52
|
+
|
53
|
+
options = SiteInspector.typhoeus_defaults.merge(followlocation: true)
|
54
|
+
['robots.txt', 'sitemap.xml', 'humans.txt', random_path].each do |path|
|
55
|
+
request = Typhoeus::Request.new(URI.join(endpoint.uri, path), options)
|
56
|
+
SiteInspector.hydra.queue(request)
|
57
|
+
end
|
58
|
+
SiteInspector.hydra.run
|
59
|
+
end
|
60
|
+
|
61
|
+
def proper_404s?
|
62
|
+
@proper_404s ||= !path_exists?(random_path)
|
63
|
+
end
|
64
|
+
|
65
|
+
def to_h
|
66
|
+
prefetch
|
67
|
+
{
|
68
|
+
doctype: doctype,
|
69
|
+
generator: generator,
|
70
|
+
sitemap_xml: sitemap_xml?,
|
71
|
+
robots_txt: robots_txt?,
|
72
|
+
humans_txt: humans_txt?,
|
73
|
+
proper_404s: proper_404s?
|
74
|
+
}
|
75
|
+
end
|
76
|
+
|
77
|
+
private
|
78
|
+
|
79
|
+
def random_path
|
80
|
+
require 'securerandom'
|
81
|
+
@random_path ||= SecureRandom.hex
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class SiteInspector
|
4
|
+
class Endpoint
|
5
|
+
class Cookies < Check
|
6
|
+
def any?(&block)
|
7
|
+
if cookie_header.nil? || cookie_header.empty?
|
8
|
+
false
|
9
|
+
elsif block_given?
|
10
|
+
all.any?(&block)
|
11
|
+
else
|
12
|
+
true
|
13
|
+
end
|
14
|
+
end
|
15
|
+
alias cookies? any?
|
16
|
+
|
17
|
+
def all
|
18
|
+
@cookies ||= cookie_header.map { |c| CGI::Cookie.parse(c) } if cookies?
|
19
|
+
end
|
20
|
+
|
21
|
+
def [](key)
|
22
|
+
all.find { |cookie| cookie.keys.first == key } if cookies?
|
23
|
+
end
|
24
|
+
|
25
|
+
def secure?
|
26
|
+
pairs = cookie_header.join('; ').split('; ') # CGI::Cookies#Parse doesn't seem to like secure headers
|
27
|
+
pairs.any? { |c| c.casecmp('secure').zero? } && pairs.any? { |c| c.casecmp('httponly').zero? }
|
28
|
+
end
|
29
|
+
|
30
|
+
def to_h
|
31
|
+
{
|
32
|
+
cookie?: any?,
|
33
|
+
secure?: secure?
|
34
|
+
}
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
def cookie_header
|
40
|
+
# Cookie header may be an array or string, always return an array
|
41
|
+
[endpoint.headers.all['set-cookie']].flatten.compact
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,138 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class SiteInspector
|
4
|
+
class Endpoint
|
5
|
+
class Dns < Check
|
6
|
+
class LocalhostError < StandardError; end
|
7
|
+
|
8
|
+
def self.resolver
|
9
|
+
require 'dnsruby'
|
10
|
+
@resolver ||= begin
|
11
|
+
resolver = Dnsruby::Resolver.new
|
12
|
+
resolver.config.nameserver = ['8.8.8.8', '8.8.4.4']
|
13
|
+
resolver
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def query(type = 'ANY')
|
18
|
+
SiteInspector::Endpoint::Dns.resolver.query(host.to_s, type).answer
|
19
|
+
rescue Dnsruby::ResolvTimeout, Dnsruby::ServFail, Dnsruby::NXDomain
|
20
|
+
[]
|
21
|
+
end
|
22
|
+
|
23
|
+
def records
|
24
|
+
@records ||= query
|
25
|
+
end
|
26
|
+
|
27
|
+
def record?(type)
|
28
|
+
records.any? { |record| record.type == type } || query(type).count != 0
|
29
|
+
end
|
30
|
+
alias has_record? record?
|
31
|
+
|
32
|
+
def dnssec?
|
33
|
+
@dnssec ||= has_record? 'DNSKEY'
|
34
|
+
end
|
35
|
+
|
36
|
+
def ipv6?
|
37
|
+
@ipv6 ||= has_record? 'AAAA'
|
38
|
+
end
|
39
|
+
|
40
|
+
def cdn
|
41
|
+
detect_by_hostname 'cdn'
|
42
|
+
end
|
43
|
+
|
44
|
+
def cdn?
|
45
|
+
!!cdn
|
46
|
+
end
|
47
|
+
|
48
|
+
def cloud_provider
|
49
|
+
detect_by_hostname 'cloud'
|
50
|
+
end
|
51
|
+
|
52
|
+
def cloud?
|
53
|
+
!!cloud_provider
|
54
|
+
end
|
55
|
+
|
56
|
+
def google_apps?
|
57
|
+
@google_apps ||= records.any? do |record|
|
58
|
+
record.type == 'MX' && record.exchange.to_s =~ /google(mail)?\.com\.?\z/i
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def localhost?
|
63
|
+
ip == '127.0.0.1'
|
64
|
+
end
|
65
|
+
|
66
|
+
def ip
|
67
|
+
@ip ||= Resolv.getaddress host
|
68
|
+
rescue Resolv::ResolvError
|
69
|
+
nil
|
70
|
+
end
|
71
|
+
|
72
|
+
def hostname
|
73
|
+
require 'resolv'
|
74
|
+
@hostname ||= PublicSuffix.parse(Resolv.getname(ip))
|
75
|
+
rescue Resolv::ResolvError, PublicSuffix::DomainInvalid
|
76
|
+
nil
|
77
|
+
end
|
78
|
+
|
79
|
+
def cnames
|
80
|
+
@cnames ||= records.select { |record| record.type == 'CNAME' }.map do |record|
|
81
|
+
PublicSuffix.parse(record.cname.to_s)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def inspect
|
86
|
+
"#<SiteInspector::Domain::Dns host=\"#{host}\">"
|
87
|
+
end
|
88
|
+
|
89
|
+
def to_h
|
90
|
+
return { error: LocalhostError } if localhost?
|
91
|
+
|
92
|
+
{
|
93
|
+
dnssec: dnssec?,
|
94
|
+
ipv6: ipv6?,
|
95
|
+
cdn: cdn,
|
96
|
+
cloud_provider: cloud_provider,
|
97
|
+
google_apps: google_apps?,
|
98
|
+
hostname: hostname,
|
99
|
+
ip: ip
|
100
|
+
}
|
101
|
+
end
|
102
|
+
|
103
|
+
private
|
104
|
+
|
105
|
+
def data
|
106
|
+
@data ||= {}
|
107
|
+
end
|
108
|
+
|
109
|
+
def data_path(name)
|
110
|
+
File.expand_path "../../data/#{name}.yml", File.dirname(__FILE__)
|
111
|
+
end
|
112
|
+
|
113
|
+
def load_data(name)
|
114
|
+
require 'yaml'
|
115
|
+
path = data_path(name)
|
116
|
+
data[name] ||= YAML.load_file(path)
|
117
|
+
end
|
118
|
+
|
119
|
+
def detect_by_hostname(type)
|
120
|
+
haystack = load_data(type)
|
121
|
+
needle = haystack.find do |_name, domain|
|
122
|
+
cnames.any? do |cname|
|
123
|
+
[cname.tld, "#{cname.sld}.#{cname.tld}"].include? domain
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
return needle[0].to_sym if needle
|
128
|
+
return nil unless hostname
|
129
|
+
|
130
|
+
needle = haystack.find do |_name, domain|
|
131
|
+
[hostname.tld, "#{hostname.sld}.#{hostname.tld}"].include? domain
|
132
|
+
end
|
133
|
+
|
134
|
+
needle ? needle[0].to_sym : nil
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|