site-inspector 1.0.2 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. checksums.yaml +5 -5
  2. data/.gitignore +8 -0
  3. data/.rubocop.yml +42 -0
  4. data/.rubocop_todo.yml +139 -0
  5. data/.ruby-version +1 -0
  6. data/.travis.yml +9 -0
  7. data/Gemfile +7 -0
  8. data/Guardfile +10 -0
  9. data/README.md +189 -0
  10. data/Rakefile +10 -0
  11. data/bin/site-inspector +50 -22
  12. data/lib/cliver/dependency_ext.rb +24 -0
  13. data/lib/site-inspector.rb +62 -615
  14. data/lib/site-inspector/cache.rb +10 -51
  15. data/lib/site-inspector/checks/accessibility.rb +135 -0
  16. data/lib/site-inspector/checks/check.rb +54 -0
  17. data/lib/site-inspector/checks/content.rb +85 -0
  18. data/lib/site-inspector/checks/cookies.rb +45 -0
  19. data/lib/site-inspector/checks/dns.rb +138 -0
  20. data/lib/site-inspector/checks/headers.rb +68 -0
  21. data/lib/site-inspector/checks/hsts.rb +81 -0
  22. data/lib/site-inspector/checks/https.rb +40 -0
  23. data/lib/site-inspector/checks/sniffer.rb +67 -0
  24. data/lib/site-inspector/checks/wappalyzer.rb +62 -0
  25. data/lib/site-inspector/checks/whois.rb +36 -0
  26. data/lib/site-inspector/disk_cache.rb +42 -0
  27. data/lib/site-inspector/domain.rb +271 -0
  28. data/lib/site-inspector/endpoint.rb +217 -0
  29. data/lib/site-inspector/rails_cache.rb +13 -0
  30. data/lib/site-inspector/version.rb +5 -0
  31. data/package-lock.json +505 -0
  32. data/package.json +23 -0
  33. data/script/bootstrap +2 -0
  34. data/script/cibuild +11 -0
  35. data/script/console +3 -0
  36. data/script/pa11y-version +10 -0
  37. data/script/release +38 -0
  38. data/site-inspector.gemspec +42 -0
  39. data/spec/checks/site_inspector_endpoint_accessibility_spec.rb +84 -0
  40. data/spec/checks/site_inspector_endpoint_check_spec.rb +42 -0
  41. data/spec/checks/site_inspector_endpoint_content_spec.rb +117 -0
  42. data/spec/checks/site_inspector_endpoint_cookies_spec.rb +73 -0
  43. data/spec/checks/site_inspector_endpoint_dns_spec.rb +184 -0
  44. data/spec/checks/site_inspector_endpoint_headers_spec.rb +65 -0
  45. data/spec/checks/site_inspector_endpoint_hsts_spec.rb +92 -0
  46. data/spec/checks/site_inspector_endpoint_https_spec.rb +49 -0
  47. data/spec/checks/site_inspector_endpoint_sniffer_spec.rb +150 -0
  48. data/spec/checks/site_inspector_endpoint_wappalyzer_spec.rb +34 -0
  49. data/spec/checks/site_inspector_endpoint_whois_spec.rb +26 -0
  50. data/spec/fixtures/wappalyzer.json +125 -0
  51. data/spec/site_inspector_cache_spec.rb +15 -0
  52. data/spec/site_inspector_disk_cache_spec.rb +39 -0
  53. data/spec/site_inspector_domain_spec.rb +271 -0
  54. data/spec/site_inspector_endpoint_spec.rb +252 -0
  55. data/spec/site_inspector_spec.rb +48 -0
  56. data/spec/spec_helper.rb +19 -0
  57. metadata +204 -63
  58. data/lib/site-inspector/compliance.rb +0 -19
  59. data/lib/site-inspector/dns.rb +0 -92
  60. data/lib/site-inspector/headers.rb +0 -59
  61. data/lib/site-inspector/sniffer.rb +0 -26
@@ -1,58 +1,17 @@
1
- class SiteInspectorCache
2
- def initialize
3
- @memory = {}
4
- end
5
-
6
- def get(request)
7
- @memory[request]
8
- end
9
-
10
- def set(request, response)
11
- @memory[request] = response
12
- end
13
- end
14
-
15
- class SiteInspectorDiskCache
16
- def initialize(dir = nil, replace = false)
17
- @dir = dir
18
- @memory = {}
19
- @replace = replace
20
- end
1
+ # frozen_string_literal: true
21
2
 
22
- def path(request)
23
- File.join(@dir, request.cache_key)
24
- end
25
-
26
- def fetch(request)
27
- if File.exist?(path(request))
28
-
29
- if @replace
30
- FileUtils.rm(path(request))
31
- nil
32
- else
33
- contents = File.read(path(request))
34
- begin
35
- Marshal.load(contents)
36
- rescue ArgumentError
37
- FileUtils.rm(path(request))
38
- nil
39
- end
40
- end
3
+ class SiteInspector
4
+ class Cache
5
+ def memory
6
+ @memory ||= {}
41
7
  end
42
- end
43
8
 
44
- def store(request, response)
45
- File.open(File.join(@dir, request.cache_key), "w") do |f|
46
- f.write Marshal.dump(response)
9
+ def get(request)
10
+ memory[request]
47
11
  end
48
- end
49
-
50
- def get(request)
51
- @memory[request] || fetch(request)
52
- end
53
12
 
54
- def set(request, response)
55
- store(request, response)
56
- @memory[request] = response
13
+ def set(request, response)
14
+ memory[request] = response
15
+ end
57
16
  end
58
17
  end
@@ -0,0 +1,135 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+ require 'open3'
5
+
6
+ class SiteInspector
7
+ class Endpoint
8
+ class Accessibility < Check
9
+ class Pa11yError < RuntimeError; end
10
+
11
+ STANDARDS = {
12
+ wcag2a: 'WCAG2A', # Default standard
13
+ wcag2aa: 'WCAG2AA',
14
+ wcag2aaa: 'WCAG2AAA',
15
+ section508: 'Section508'
16
+ }.freeze
17
+
18
+ DEFAULT_LEVEL = :error
19
+
20
+ REQUIRED_PA11Y_VERSION = '~> 5.0'
21
+
22
+ class << self
23
+ def pa11y_version
24
+ @pa11y_version ||= begin
25
+ output, status = run_command('--version')
26
+ output.strip if status.exitstatus.zero?
27
+ end
28
+ end
29
+
30
+ def pa11y?
31
+ return @pa11y_detected if defined? @pa11y_detected
32
+
33
+ @pa11y_detected = !!pa11y.detect
34
+ end
35
+
36
+ def enabled?
37
+ @@enabled && pa11y?
38
+ end
39
+
40
+ def pa11y
41
+ @pa11y ||= begin
42
+ node_bin = File.expand_path('../../../node_modules/pa11y/bin', File.dirname(__FILE__))
43
+ path = ['*', node_bin].join(File::PATH_SEPARATOR)
44
+ Cliver::Dependency.new('pa11y.js', REQUIRED_PA11Y_VERSION, path: path)
45
+ end
46
+ end
47
+
48
+ def run_command(args)
49
+ Open3.capture2e(pa11y.detect, *args)
50
+ end
51
+ end
52
+
53
+ def level
54
+ @level ||= DEFAULT_LEVEL
55
+ end
56
+
57
+ def level=(level)
58
+ raise ArgumentError, "Invalid level '#{level}'" unless %i[error warning notice].include?(level)
59
+
60
+ @level = level
61
+ end
62
+
63
+ def standard?(standard)
64
+ STANDARDS.key?(standard)
65
+ end
66
+
67
+ def standard
68
+ @standard ||= STANDARDS.keys.first
69
+ end
70
+
71
+ def standard=(standard)
72
+ raise ArgumentError, "Unknown standard '#{standard}'" unless standard?(standard)
73
+
74
+ @standard = standard
75
+ end
76
+
77
+ def valid?
78
+ check[:valid] if check
79
+ end
80
+
81
+ def errors
82
+ check[:results].count { |r| r['type'] == 'error' } if check
83
+ end
84
+
85
+ def check
86
+ @check ||= run_pa11y(standard)
87
+ rescue Pa11yError
88
+ nil
89
+ end
90
+ alias to_h check
91
+
92
+ def method_missing(method_sym, *arguments, &block)
93
+ if standard?(method_sym)
94
+ run_pa11y(method_sym)
95
+ else
96
+ super
97
+ end
98
+ end
99
+
100
+ def respond_to_missing?(method_sym, include_private = false)
101
+ if standard?(method_sym)
102
+ true
103
+ else
104
+ super
105
+ end
106
+ end
107
+
108
+ private
109
+
110
+ def run_pa11y(standard)
111
+ self.class.pa11y.detect! unless ENV['SKIP_PA11Y_CHECK']
112
+ raise ArgumentError, "Unknown standard '#{standard}'" unless standard?(standard)
113
+
114
+ args = [
115
+ '--standard', STANDARDS[standard],
116
+ '--reporter', 'json',
117
+ '--level', level.to_s,
118
+ endpoint.uri.to_s
119
+ ]
120
+ output, status = self.class.run_command(args)
121
+
122
+ # Pa11y exit codes: https://github.com/nature/pa11y#exit-codes
123
+ # 0: No errors, 1: Technical error within pa11y, 2: accessibility error (configurable via --level)
124
+ raise Pa11yError if status.exitstatus == 1
125
+
126
+ {
127
+ valid: status.exitstatus.zero?,
128
+ results: JSON.parse(output)
129
+ }
130
+ rescue Pa11yError, JSON::ParserError
131
+ raise Pa11yError, "Command `pa11y #{args.join(' ')}` failed: #{output}"
132
+ end
133
+ end
134
+ end
135
+ end
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ class SiteInspector
4
+ class Endpoint
5
+ class Check
6
+ attr_reader :endpoint
7
+
8
+ # A check is an abstract class that takes an Endpoint object
9
+ # and is extended to preform the specific site inspector checks
10
+ #
11
+ # It is automatically accessable within the endpoint object
12
+ # by virtue of extending the Check class
13
+ def initialize(endpoint)
14
+ @endpoint = endpoint
15
+ end
16
+
17
+ def response
18
+ endpoint.response
19
+ end
20
+
21
+ def request
22
+ response.request
23
+ end
24
+
25
+ def host
26
+ request.base_url.host
27
+ end
28
+
29
+ def inspect
30
+ "#<#{self.class} endpoint=\"#{response.effective_url}\">"
31
+ end
32
+
33
+ def name
34
+ self.class.name
35
+ end
36
+
37
+ class << self
38
+ @@enabled = true
39
+
40
+ def name
41
+ to_s.split('::').last.downcase.to_sym
42
+ end
43
+
44
+ def enabled?
45
+ !!@@enabled
46
+ end
47
+
48
+ def enabled=(value)
49
+ @@enabled = !!value
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,85 @@
1
+ # frozen_string_literal: true
2
+
3
+ class SiteInspector
4
+ class Endpoint
5
+ class Content < Check
6
+ # Given a path (e.g, "/data"), check if the given path exists on the canonical endpoint
7
+ def path_exists?(path)
8
+ endpoint.up? && endpoint.request(path: path, followlocation: true).success?
9
+ end
10
+
11
+ # The default Check#response method is from a HEAD request
12
+ # The content check has a special response which includes the body from a GET request
13
+ def response
14
+ @response ||= endpoint.request(method: :get)
15
+ end
16
+
17
+ def document
18
+ require 'nokogiri'
19
+ @doc ||= Nokogiri::HTML response.body if response
20
+ end
21
+ alias doc document
22
+
23
+ def body
24
+ @body ||= document.to_s.force_encoding('UTF-8').encode('UTF-8', invalid: :replace, replace: '')
25
+ end
26
+
27
+ def robots_txt?
28
+ @bodts_txt ||= path_exists?('robots.txt') if proper_404s?
29
+ end
30
+
31
+ def sitemap_xml?
32
+ @sitemap_xml ||= path_exists?('sitemap.xml') if proper_404s?
33
+ end
34
+
35
+ def humans_txt?
36
+ @humans_txt ||= path_exists?('humans.txt') if proper_404s?
37
+ end
38
+
39
+ def doctype
40
+ document.internal_subset.external_id
41
+ end
42
+
43
+ def generator
44
+ @generator ||= begin
45
+ tag = document.at('meta[name="generator"]')
46
+ tag['content'] if tag
47
+ end
48
+ end
49
+
50
+ def prefetch
51
+ return unless endpoint.up?
52
+
53
+ options = SiteInspector.typhoeus_defaults.merge(followlocation: true)
54
+ ['robots.txt', 'sitemap.xml', 'humans.txt', random_path].each do |path|
55
+ request = Typhoeus::Request.new(URI.join(endpoint.uri, path), options)
56
+ SiteInspector.hydra.queue(request)
57
+ end
58
+ SiteInspector.hydra.run
59
+ end
60
+
61
+ def proper_404s?
62
+ @proper_404s ||= !path_exists?(random_path)
63
+ end
64
+
65
+ def to_h
66
+ prefetch
67
+ {
68
+ doctype: doctype,
69
+ generator: generator,
70
+ sitemap_xml: sitemap_xml?,
71
+ robots_txt: robots_txt?,
72
+ humans_txt: humans_txt?,
73
+ proper_404s: proper_404s?
74
+ }
75
+ end
76
+
77
+ private
78
+
79
+ def random_path
80
+ require 'securerandom'
81
+ @random_path ||= SecureRandom.hex
82
+ end
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ class SiteInspector
4
+ class Endpoint
5
+ class Cookies < Check
6
+ def any?(&block)
7
+ if cookie_header.nil? || cookie_header.empty?
8
+ false
9
+ elsif block_given?
10
+ all.any?(&block)
11
+ else
12
+ true
13
+ end
14
+ end
15
+ alias cookies? any?
16
+
17
+ def all
18
+ @cookies ||= cookie_header.map { |c| CGI::Cookie.parse(c) } if cookies?
19
+ end
20
+
21
+ def [](key)
22
+ all.find { |cookie| cookie.keys.first == key } if cookies?
23
+ end
24
+
25
+ def secure?
26
+ pairs = cookie_header.join('; ').split('; ') # CGI::Cookies#Parse doesn't seem to like secure headers
27
+ pairs.any? { |c| c.casecmp('secure').zero? } && pairs.any? { |c| c.casecmp('httponly').zero? }
28
+ end
29
+
30
+ def to_h
31
+ {
32
+ cookie?: any?,
33
+ secure?: secure?
34
+ }
35
+ end
36
+
37
+ private
38
+
39
+ def cookie_header
40
+ # Cookie header may be an array or string, always return an array
41
+ [endpoint.headers.all['set-cookie']].flatten.compact
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,138 @@
1
+ # frozen_string_literal: true
2
+
3
+ class SiteInspector
4
+ class Endpoint
5
+ class Dns < Check
6
+ class LocalhostError < StandardError; end
7
+
8
+ def self.resolver
9
+ require 'dnsruby'
10
+ @resolver ||= begin
11
+ resolver = Dnsruby::Resolver.new
12
+ resolver.config.nameserver = ['8.8.8.8', '8.8.4.4']
13
+ resolver
14
+ end
15
+ end
16
+
17
+ def query(type = 'ANY')
18
+ SiteInspector::Endpoint::Dns.resolver.query(host.to_s, type).answer
19
+ rescue Dnsruby::ResolvTimeout, Dnsruby::ServFail, Dnsruby::NXDomain
20
+ []
21
+ end
22
+
23
+ def records
24
+ @records ||= query
25
+ end
26
+
27
+ def record?(type)
28
+ records.any? { |record| record.type == type } || query(type).count != 0
29
+ end
30
+ alias has_record? record?
31
+
32
+ def dnssec?
33
+ @dnssec ||= has_record? 'DNSKEY'
34
+ end
35
+
36
+ def ipv6?
37
+ @ipv6 ||= has_record? 'AAAA'
38
+ end
39
+
40
+ def cdn
41
+ detect_by_hostname 'cdn'
42
+ end
43
+
44
+ def cdn?
45
+ !!cdn
46
+ end
47
+
48
+ def cloud_provider
49
+ detect_by_hostname 'cloud'
50
+ end
51
+
52
+ def cloud?
53
+ !!cloud_provider
54
+ end
55
+
56
+ def google_apps?
57
+ @google_apps ||= records.any? do |record|
58
+ record.type == 'MX' && record.exchange.to_s =~ /google(mail)?\.com\.?\z/i
59
+ end
60
+ end
61
+
62
+ def localhost?
63
+ ip == '127.0.0.1'
64
+ end
65
+
66
+ def ip
67
+ @ip ||= Resolv.getaddress host
68
+ rescue Resolv::ResolvError
69
+ nil
70
+ end
71
+
72
+ def hostname
73
+ require 'resolv'
74
+ @hostname ||= PublicSuffix.parse(Resolv.getname(ip))
75
+ rescue Resolv::ResolvError, PublicSuffix::DomainInvalid
76
+ nil
77
+ end
78
+
79
+ def cnames
80
+ @cnames ||= records.select { |record| record.type == 'CNAME' }.map do |record|
81
+ PublicSuffix.parse(record.cname.to_s)
82
+ end
83
+ end
84
+
85
+ def inspect
86
+ "#<SiteInspector::Domain::Dns host=\"#{host}\">"
87
+ end
88
+
89
+ def to_h
90
+ return { error: LocalhostError } if localhost?
91
+
92
+ {
93
+ dnssec: dnssec?,
94
+ ipv6: ipv6?,
95
+ cdn: cdn,
96
+ cloud_provider: cloud_provider,
97
+ google_apps: google_apps?,
98
+ hostname: hostname,
99
+ ip: ip
100
+ }
101
+ end
102
+
103
+ private
104
+
105
+ def data
106
+ @data ||= {}
107
+ end
108
+
109
+ def data_path(name)
110
+ File.expand_path "../../data/#{name}.yml", File.dirname(__FILE__)
111
+ end
112
+
113
+ def load_data(name)
114
+ require 'yaml'
115
+ path = data_path(name)
116
+ data[name] ||= YAML.load_file(path)
117
+ end
118
+
119
+ def detect_by_hostname(type)
120
+ haystack = load_data(type)
121
+ needle = haystack.find do |_name, domain|
122
+ cnames.any? do |cname|
123
+ [cname.tld, "#{cname.sld}.#{cname.tld}"].include? domain
124
+ end
125
+ end
126
+
127
+ return needle[0].to_sym if needle
128
+ return nil unless hostname
129
+
130
+ needle = haystack.find do |_name, domain|
131
+ [hostname.tld, "#{hostname.sld}.#{hostname.tld}"].include? domain
132
+ end
133
+
134
+ needle ? needle[0].to_sym : nil
135
+ end
136
+ end
137
+ end
138
+ end