site-inspector 1.0.2 → 3.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.gitignore +8 -0
- data/.rubocop.yml +42 -0
- data/.rubocop_todo.yml +139 -0
- data/.ruby-version +1 -0
- data/.travis.yml +9 -0
- data/Gemfile +7 -0
- data/Guardfile +10 -0
- data/README.md +189 -0
- data/Rakefile +10 -0
- data/bin/site-inspector +50 -22
- data/lib/cliver/dependency_ext.rb +24 -0
- data/lib/site-inspector.rb +62 -615
- data/lib/site-inspector/cache.rb +10 -51
- data/lib/site-inspector/checks/accessibility.rb +135 -0
- data/lib/site-inspector/checks/check.rb +54 -0
- data/lib/site-inspector/checks/content.rb +85 -0
- data/lib/site-inspector/checks/cookies.rb +45 -0
- data/lib/site-inspector/checks/dns.rb +138 -0
- data/lib/site-inspector/checks/headers.rb +68 -0
- data/lib/site-inspector/checks/hsts.rb +81 -0
- data/lib/site-inspector/checks/https.rb +40 -0
- data/lib/site-inspector/checks/sniffer.rb +67 -0
- data/lib/site-inspector/checks/wappalyzer.rb +62 -0
- data/lib/site-inspector/checks/whois.rb +36 -0
- data/lib/site-inspector/disk_cache.rb +42 -0
- data/lib/site-inspector/domain.rb +271 -0
- data/lib/site-inspector/endpoint.rb +217 -0
- data/lib/site-inspector/rails_cache.rb +13 -0
- data/lib/site-inspector/version.rb +5 -0
- data/package-lock.json +505 -0
- data/package.json +23 -0
- data/script/bootstrap +2 -0
- data/script/cibuild +11 -0
- data/script/console +3 -0
- data/script/pa11y-version +10 -0
- data/script/release +38 -0
- data/site-inspector.gemspec +42 -0
- data/spec/checks/site_inspector_endpoint_accessibility_spec.rb +84 -0
- data/spec/checks/site_inspector_endpoint_check_spec.rb +42 -0
- data/spec/checks/site_inspector_endpoint_content_spec.rb +117 -0
- data/spec/checks/site_inspector_endpoint_cookies_spec.rb +73 -0
- data/spec/checks/site_inspector_endpoint_dns_spec.rb +184 -0
- data/spec/checks/site_inspector_endpoint_headers_spec.rb +65 -0
- data/spec/checks/site_inspector_endpoint_hsts_spec.rb +92 -0
- data/spec/checks/site_inspector_endpoint_https_spec.rb +49 -0
- data/spec/checks/site_inspector_endpoint_sniffer_spec.rb +150 -0
- data/spec/checks/site_inspector_endpoint_wappalyzer_spec.rb +34 -0
- data/spec/checks/site_inspector_endpoint_whois_spec.rb +26 -0
- data/spec/fixtures/wappalyzer.json +125 -0
- data/spec/site_inspector_cache_spec.rb +15 -0
- data/spec/site_inspector_disk_cache_spec.rb +39 -0
- data/spec/site_inspector_domain_spec.rb +271 -0
- data/spec/site_inspector_endpoint_spec.rb +252 -0
- data/spec/site_inspector_spec.rb +48 -0
- data/spec/spec_helper.rb +19 -0
- metadata +204 -63
- data/lib/site-inspector/compliance.rb +0 -19
- data/lib/site-inspector/dns.rb +0 -92
- data/lib/site-inspector/headers.rb +0 -59
- data/lib/site-inspector/sniffer.rb +0 -26
data/lib/site-inspector/cache.rb
CHANGED
@@ -1,58 +1,17 @@
|
|
1
|
-
|
2
|
-
def initialize
|
3
|
-
@memory = {}
|
4
|
-
end
|
5
|
-
|
6
|
-
def get(request)
|
7
|
-
@memory[request]
|
8
|
-
end
|
9
|
-
|
10
|
-
def set(request, response)
|
11
|
-
@memory[request] = response
|
12
|
-
end
|
13
|
-
end
|
14
|
-
|
15
|
-
class SiteInspectorDiskCache
|
16
|
-
def initialize(dir = nil, replace = false)
|
17
|
-
@dir = dir
|
18
|
-
@memory = {}
|
19
|
-
@replace = replace
|
20
|
-
end
|
1
|
+
# frozen_string_literal: true
|
21
2
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
def fetch(request)
|
27
|
-
if File.exist?(path(request))
|
28
|
-
|
29
|
-
if @replace
|
30
|
-
FileUtils.rm(path(request))
|
31
|
-
nil
|
32
|
-
else
|
33
|
-
contents = File.read(path(request))
|
34
|
-
begin
|
35
|
-
Marshal.load(contents)
|
36
|
-
rescue ArgumentError
|
37
|
-
FileUtils.rm(path(request))
|
38
|
-
nil
|
39
|
-
end
|
40
|
-
end
|
3
|
+
class SiteInspector
|
4
|
+
class Cache
|
5
|
+
def memory
|
6
|
+
@memory ||= {}
|
41
7
|
end
|
42
|
-
end
|
43
8
|
|
44
|
-
|
45
|
-
|
46
|
-
f.write Marshal.dump(response)
|
9
|
+
def get(request)
|
10
|
+
memory[request]
|
47
11
|
end
|
48
|
-
end
|
49
|
-
|
50
|
-
def get(request)
|
51
|
-
@memory[request] || fetch(request)
|
52
|
-
end
|
53
12
|
|
54
|
-
|
55
|
-
|
56
|
-
|
13
|
+
def set(request, response)
|
14
|
+
memory[request] = response
|
15
|
+
end
|
57
16
|
end
|
58
17
|
end
|
@@ -0,0 +1,135 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'json'
|
4
|
+
require 'open3'
|
5
|
+
|
6
|
+
class SiteInspector
|
7
|
+
class Endpoint
|
8
|
+
class Accessibility < Check
|
9
|
+
class Pa11yError < RuntimeError; end
|
10
|
+
|
11
|
+
STANDARDS = {
|
12
|
+
wcag2a: 'WCAG2A', # Default standard
|
13
|
+
wcag2aa: 'WCAG2AA',
|
14
|
+
wcag2aaa: 'WCAG2AAA',
|
15
|
+
section508: 'Section508'
|
16
|
+
}.freeze
|
17
|
+
|
18
|
+
DEFAULT_LEVEL = :error
|
19
|
+
|
20
|
+
REQUIRED_PA11Y_VERSION = '~> 5.0'
|
21
|
+
|
22
|
+
class << self
|
23
|
+
def pa11y_version
|
24
|
+
@pa11y_version ||= begin
|
25
|
+
output, status = run_command('--version')
|
26
|
+
output.strip if status.exitstatus.zero?
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def pa11y?
|
31
|
+
return @pa11y_detected if defined? @pa11y_detected
|
32
|
+
|
33
|
+
@pa11y_detected = !!pa11y.detect
|
34
|
+
end
|
35
|
+
|
36
|
+
def enabled?
|
37
|
+
@@enabled && pa11y?
|
38
|
+
end
|
39
|
+
|
40
|
+
def pa11y
|
41
|
+
@pa11y ||= begin
|
42
|
+
node_bin = File.expand_path('../../../node_modules/pa11y/bin', File.dirname(__FILE__))
|
43
|
+
path = ['*', node_bin].join(File::PATH_SEPARATOR)
|
44
|
+
Cliver::Dependency.new('pa11y.js', REQUIRED_PA11Y_VERSION, path: path)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def run_command(args)
|
49
|
+
Open3.capture2e(pa11y.detect, *args)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def level
|
54
|
+
@level ||= DEFAULT_LEVEL
|
55
|
+
end
|
56
|
+
|
57
|
+
def level=(level)
|
58
|
+
raise ArgumentError, "Invalid level '#{level}'" unless %i[error warning notice].include?(level)
|
59
|
+
|
60
|
+
@level = level
|
61
|
+
end
|
62
|
+
|
63
|
+
def standard?(standard)
|
64
|
+
STANDARDS.key?(standard)
|
65
|
+
end
|
66
|
+
|
67
|
+
def standard
|
68
|
+
@standard ||= STANDARDS.keys.first
|
69
|
+
end
|
70
|
+
|
71
|
+
def standard=(standard)
|
72
|
+
raise ArgumentError, "Unknown standard '#{standard}'" unless standard?(standard)
|
73
|
+
|
74
|
+
@standard = standard
|
75
|
+
end
|
76
|
+
|
77
|
+
def valid?
|
78
|
+
check[:valid] if check
|
79
|
+
end
|
80
|
+
|
81
|
+
def errors
|
82
|
+
check[:results].count { |r| r['type'] == 'error' } if check
|
83
|
+
end
|
84
|
+
|
85
|
+
def check
|
86
|
+
@check ||= run_pa11y(standard)
|
87
|
+
rescue Pa11yError
|
88
|
+
nil
|
89
|
+
end
|
90
|
+
alias to_h check
|
91
|
+
|
92
|
+
def method_missing(method_sym, *arguments, &block)
|
93
|
+
if standard?(method_sym)
|
94
|
+
run_pa11y(method_sym)
|
95
|
+
else
|
96
|
+
super
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def respond_to_missing?(method_sym, include_private = false)
|
101
|
+
if standard?(method_sym)
|
102
|
+
true
|
103
|
+
else
|
104
|
+
super
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
private
|
109
|
+
|
110
|
+
def run_pa11y(standard)
|
111
|
+
self.class.pa11y.detect! unless ENV['SKIP_PA11Y_CHECK']
|
112
|
+
raise ArgumentError, "Unknown standard '#{standard}'" unless standard?(standard)
|
113
|
+
|
114
|
+
args = [
|
115
|
+
'--standard', STANDARDS[standard],
|
116
|
+
'--reporter', 'json',
|
117
|
+
'--level', level.to_s,
|
118
|
+
endpoint.uri.to_s
|
119
|
+
]
|
120
|
+
output, status = self.class.run_command(args)
|
121
|
+
|
122
|
+
# Pa11y exit codes: https://github.com/nature/pa11y#exit-codes
|
123
|
+
# 0: No errors, 1: Technical error within pa11y, 2: accessibility error (configurable via --level)
|
124
|
+
raise Pa11yError if status.exitstatus == 1
|
125
|
+
|
126
|
+
{
|
127
|
+
valid: status.exitstatus.zero?,
|
128
|
+
results: JSON.parse(output)
|
129
|
+
}
|
130
|
+
rescue Pa11yError, JSON::ParserError
|
131
|
+
raise Pa11yError, "Command `pa11y #{args.join(' ')}` failed: #{output}"
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class SiteInspector
|
4
|
+
class Endpoint
|
5
|
+
class Check
|
6
|
+
attr_reader :endpoint
|
7
|
+
|
8
|
+
# A check is an abstract class that takes an Endpoint object
|
9
|
+
# and is extended to preform the specific site inspector checks
|
10
|
+
#
|
11
|
+
# It is automatically accessable within the endpoint object
|
12
|
+
# by virtue of extending the Check class
|
13
|
+
def initialize(endpoint)
|
14
|
+
@endpoint = endpoint
|
15
|
+
end
|
16
|
+
|
17
|
+
def response
|
18
|
+
endpoint.response
|
19
|
+
end
|
20
|
+
|
21
|
+
def request
|
22
|
+
response.request
|
23
|
+
end
|
24
|
+
|
25
|
+
def host
|
26
|
+
request.base_url.host
|
27
|
+
end
|
28
|
+
|
29
|
+
def inspect
|
30
|
+
"#<#{self.class} endpoint=\"#{response.effective_url}\">"
|
31
|
+
end
|
32
|
+
|
33
|
+
def name
|
34
|
+
self.class.name
|
35
|
+
end
|
36
|
+
|
37
|
+
class << self
|
38
|
+
@@enabled = true
|
39
|
+
|
40
|
+
def name
|
41
|
+
to_s.split('::').last.downcase.to_sym
|
42
|
+
end
|
43
|
+
|
44
|
+
def enabled?
|
45
|
+
!!@@enabled
|
46
|
+
end
|
47
|
+
|
48
|
+
def enabled=(value)
|
49
|
+
@@enabled = !!value
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class SiteInspector
|
4
|
+
class Endpoint
|
5
|
+
class Content < Check
|
6
|
+
# Given a path (e.g, "/data"), check if the given path exists on the canonical endpoint
|
7
|
+
def path_exists?(path)
|
8
|
+
endpoint.up? && endpoint.request(path: path, followlocation: true).success?
|
9
|
+
end
|
10
|
+
|
11
|
+
# The default Check#response method is from a HEAD request
|
12
|
+
# The content check has a special response which includes the body from a GET request
|
13
|
+
def response
|
14
|
+
@response ||= endpoint.request(method: :get)
|
15
|
+
end
|
16
|
+
|
17
|
+
def document
|
18
|
+
require 'nokogiri'
|
19
|
+
@doc ||= Nokogiri::HTML response.body if response
|
20
|
+
end
|
21
|
+
alias doc document
|
22
|
+
|
23
|
+
def body
|
24
|
+
@body ||= document.to_s.force_encoding('UTF-8').encode('UTF-8', invalid: :replace, replace: '')
|
25
|
+
end
|
26
|
+
|
27
|
+
def robots_txt?
|
28
|
+
@bodts_txt ||= path_exists?('robots.txt') if proper_404s?
|
29
|
+
end
|
30
|
+
|
31
|
+
def sitemap_xml?
|
32
|
+
@sitemap_xml ||= path_exists?('sitemap.xml') if proper_404s?
|
33
|
+
end
|
34
|
+
|
35
|
+
def humans_txt?
|
36
|
+
@humans_txt ||= path_exists?('humans.txt') if proper_404s?
|
37
|
+
end
|
38
|
+
|
39
|
+
def doctype
|
40
|
+
document.internal_subset.external_id
|
41
|
+
end
|
42
|
+
|
43
|
+
def generator
|
44
|
+
@generator ||= begin
|
45
|
+
tag = document.at('meta[name="generator"]')
|
46
|
+
tag['content'] if tag
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def prefetch
|
51
|
+
return unless endpoint.up?
|
52
|
+
|
53
|
+
options = SiteInspector.typhoeus_defaults.merge(followlocation: true)
|
54
|
+
['robots.txt', 'sitemap.xml', 'humans.txt', random_path].each do |path|
|
55
|
+
request = Typhoeus::Request.new(URI.join(endpoint.uri, path), options)
|
56
|
+
SiteInspector.hydra.queue(request)
|
57
|
+
end
|
58
|
+
SiteInspector.hydra.run
|
59
|
+
end
|
60
|
+
|
61
|
+
def proper_404s?
|
62
|
+
@proper_404s ||= !path_exists?(random_path)
|
63
|
+
end
|
64
|
+
|
65
|
+
def to_h
|
66
|
+
prefetch
|
67
|
+
{
|
68
|
+
doctype: doctype,
|
69
|
+
generator: generator,
|
70
|
+
sitemap_xml: sitemap_xml?,
|
71
|
+
robots_txt: robots_txt?,
|
72
|
+
humans_txt: humans_txt?,
|
73
|
+
proper_404s: proper_404s?
|
74
|
+
}
|
75
|
+
end
|
76
|
+
|
77
|
+
private
|
78
|
+
|
79
|
+
def random_path
|
80
|
+
require 'securerandom'
|
81
|
+
@random_path ||= SecureRandom.hex
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class SiteInspector
|
4
|
+
class Endpoint
|
5
|
+
class Cookies < Check
|
6
|
+
def any?(&block)
|
7
|
+
if cookie_header.nil? || cookie_header.empty?
|
8
|
+
false
|
9
|
+
elsif block_given?
|
10
|
+
all.any?(&block)
|
11
|
+
else
|
12
|
+
true
|
13
|
+
end
|
14
|
+
end
|
15
|
+
alias cookies? any?
|
16
|
+
|
17
|
+
def all
|
18
|
+
@cookies ||= cookie_header.map { |c| CGI::Cookie.parse(c) } if cookies?
|
19
|
+
end
|
20
|
+
|
21
|
+
def [](key)
|
22
|
+
all.find { |cookie| cookie.keys.first == key } if cookies?
|
23
|
+
end
|
24
|
+
|
25
|
+
def secure?
|
26
|
+
pairs = cookie_header.join('; ').split('; ') # CGI::Cookies#Parse doesn't seem to like secure headers
|
27
|
+
pairs.any? { |c| c.casecmp('secure').zero? } && pairs.any? { |c| c.casecmp('httponly').zero? }
|
28
|
+
end
|
29
|
+
|
30
|
+
def to_h
|
31
|
+
{
|
32
|
+
cookie?: any?,
|
33
|
+
secure?: secure?
|
34
|
+
}
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
def cookie_header
|
40
|
+
# Cookie header may be an array or string, always return an array
|
41
|
+
[endpoint.headers.all['set-cookie']].flatten.compact
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,138 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class SiteInspector
|
4
|
+
class Endpoint
|
5
|
+
class Dns < Check
|
6
|
+
class LocalhostError < StandardError; end
|
7
|
+
|
8
|
+
def self.resolver
|
9
|
+
require 'dnsruby'
|
10
|
+
@resolver ||= begin
|
11
|
+
resolver = Dnsruby::Resolver.new
|
12
|
+
resolver.config.nameserver = ['8.8.8.8', '8.8.4.4']
|
13
|
+
resolver
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def query(type = 'ANY')
|
18
|
+
SiteInspector::Endpoint::Dns.resolver.query(host.to_s, type).answer
|
19
|
+
rescue Dnsruby::ResolvTimeout, Dnsruby::ServFail, Dnsruby::NXDomain
|
20
|
+
[]
|
21
|
+
end
|
22
|
+
|
23
|
+
def records
|
24
|
+
@records ||= query
|
25
|
+
end
|
26
|
+
|
27
|
+
def record?(type)
|
28
|
+
records.any? { |record| record.type == type } || query(type).count != 0
|
29
|
+
end
|
30
|
+
alias has_record? record?
|
31
|
+
|
32
|
+
def dnssec?
|
33
|
+
@dnssec ||= has_record? 'DNSKEY'
|
34
|
+
end
|
35
|
+
|
36
|
+
def ipv6?
|
37
|
+
@ipv6 ||= has_record? 'AAAA'
|
38
|
+
end
|
39
|
+
|
40
|
+
def cdn
|
41
|
+
detect_by_hostname 'cdn'
|
42
|
+
end
|
43
|
+
|
44
|
+
def cdn?
|
45
|
+
!!cdn
|
46
|
+
end
|
47
|
+
|
48
|
+
def cloud_provider
|
49
|
+
detect_by_hostname 'cloud'
|
50
|
+
end
|
51
|
+
|
52
|
+
def cloud?
|
53
|
+
!!cloud_provider
|
54
|
+
end
|
55
|
+
|
56
|
+
def google_apps?
|
57
|
+
@google_apps ||= records.any? do |record|
|
58
|
+
record.type == 'MX' && record.exchange.to_s =~ /google(mail)?\.com\.?\z/i
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def localhost?
|
63
|
+
ip == '127.0.0.1'
|
64
|
+
end
|
65
|
+
|
66
|
+
def ip
|
67
|
+
@ip ||= Resolv.getaddress host
|
68
|
+
rescue Resolv::ResolvError
|
69
|
+
nil
|
70
|
+
end
|
71
|
+
|
72
|
+
def hostname
|
73
|
+
require 'resolv'
|
74
|
+
@hostname ||= PublicSuffix.parse(Resolv.getname(ip))
|
75
|
+
rescue Resolv::ResolvError, PublicSuffix::DomainInvalid
|
76
|
+
nil
|
77
|
+
end
|
78
|
+
|
79
|
+
def cnames
|
80
|
+
@cnames ||= records.select { |record| record.type == 'CNAME' }.map do |record|
|
81
|
+
PublicSuffix.parse(record.cname.to_s)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def inspect
|
86
|
+
"#<SiteInspector::Domain::Dns host=\"#{host}\">"
|
87
|
+
end
|
88
|
+
|
89
|
+
def to_h
|
90
|
+
return { error: LocalhostError } if localhost?
|
91
|
+
|
92
|
+
{
|
93
|
+
dnssec: dnssec?,
|
94
|
+
ipv6: ipv6?,
|
95
|
+
cdn: cdn,
|
96
|
+
cloud_provider: cloud_provider,
|
97
|
+
google_apps: google_apps?,
|
98
|
+
hostname: hostname,
|
99
|
+
ip: ip
|
100
|
+
}
|
101
|
+
end
|
102
|
+
|
103
|
+
private
|
104
|
+
|
105
|
+
def data
|
106
|
+
@data ||= {}
|
107
|
+
end
|
108
|
+
|
109
|
+
def data_path(name)
|
110
|
+
File.expand_path "../../data/#{name}.yml", File.dirname(__FILE__)
|
111
|
+
end
|
112
|
+
|
113
|
+
def load_data(name)
|
114
|
+
require 'yaml'
|
115
|
+
path = data_path(name)
|
116
|
+
data[name] ||= YAML.load_file(path)
|
117
|
+
end
|
118
|
+
|
119
|
+
def detect_by_hostname(type)
|
120
|
+
haystack = load_data(type)
|
121
|
+
needle = haystack.find do |_name, domain|
|
122
|
+
cnames.any? do |cname|
|
123
|
+
[cname.tld, "#{cname.sld}.#{cname.tld}"].include? domain
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
return needle[0].to_sym if needle
|
128
|
+
return nil unless hostname
|
129
|
+
|
130
|
+
needle = haystack.find do |_name, domain|
|
131
|
+
[hostname.tld, "#{hostname.sld}.#{hostname.tld}"].include? domain
|
132
|
+
end
|
133
|
+
|
134
|
+
needle ? needle[0].to_sym : nil
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|