site-inspector 1.0.2 → 3.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. checksums.yaml +5 -5
  2. data/.gitignore +8 -0
  3. data/.rubocop.yml +42 -0
  4. data/.rubocop_todo.yml +139 -0
  5. data/.ruby-version +1 -0
  6. data/.travis.yml +9 -0
  7. data/Gemfile +7 -0
  8. data/Guardfile +10 -0
  9. data/README.md +189 -0
  10. data/Rakefile +10 -0
  11. data/bin/site-inspector +50 -22
  12. data/lib/cliver/dependency_ext.rb +24 -0
  13. data/lib/site-inspector.rb +62 -615
  14. data/lib/site-inspector/cache.rb +10 -51
  15. data/lib/site-inspector/checks/accessibility.rb +135 -0
  16. data/lib/site-inspector/checks/check.rb +54 -0
  17. data/lib/site-inspector/checks/content.rb +85 -0
  18. data/lib/site-inspector/checks/cookies.rb +45 -0
  19. data/lib/site-inspector/checks/dns.rb +138 -0
  20. data/lib/site-inspector/checks/headers.rb +68 -0
  21. data/lib/site-inspector/checks/hsts.rb +81 -0
  22. data/lib/site-inspector/checks/https.rb +40 -0
  23. data/lib/site-inspector/checks/sniffer.rb +67 -0
  24. data/lib/site-inspector/checks/wappalyzer.rb +62 -0
  25. data/lib/site-inspector/checks/whois.rb +36 -0
  26. data/lib/site-inspector/disk_cache.rb +42 -0
  27. data/lib/site-inspector/domain.rb +271 -0
  28. data/lib/site-inspector/endpoint.rb +217 -0
  29. data/lib/site-inspector/rails_cache.rb +13 -0
  30. data/lib/site-inspector/version.rb +5 -0
  31. data/package-lock.json +505 -0
  32. data/package.json +23 -0
  33. data/script/bootstrap +2 -0
  34. data/script/cibuild +11 -0
  35. data/script/console +3 -0
  36. data/script/pa11y-version +10 -0
  37. data/script/release +38 -0
  38. data/site-inspector.gemspec +42 -0
  39. data/spec/checks/site_inspector_endpoint_accessibility_spec.rb +84 -0
  40. data/spec/checks/site_inspector_endpoint_check_spec.rb +42 -0
  41. data/spec/checks/site_inspector_endpoint_content_spec.rb +117 -0
  42. data/spec/checks/site_inspector_endpoint_cookies_spec.rb +73 -0
  43. data/spec/checks/site_inspector_endpoint_dns_spec.rb +184 -0
  44. data/spec/checks/site_inspector_endpoint_headers_spec.rb +65 -0
  45. data/spec/checks/site_inspector_endpoint_hsts_spec.rb +92 -0
  46. data/spec/checks/site_inspector_endpoint_https_spec.rb +49 -0
  47. data/spec/checks/site_inspector_endpoint_sniffer_spec.rb +150 -0
  48. data/spec/checks/site_inspector_endpoint_wappalyzer_spec.rb +34 -0
  49. data/spec/checks/site_inspector_endpoint_whois_spec.rb +26 -0
  50. data/spec/fixtures/wappalyzer.json +125 -0
  51. data/spec/site_inspector_cache_spec.rb +15 -0
  52. data/spec/site_inspector_disk_cache_spec.rb +39 -0
  53. data/spec/site_inspector_domain_spec.rb +271 -0
  54. data/spec/site_inspector_endpoint_spec.rb +252 -0
  55. data/spec/site_inspector_spec.rb +48 -0
  56. data/spec/spec_helper.rb +19 -0
  57. metadata +204 -63
  58. data/lib/site-inspector/compliance.rb +0 -19
  59. data/lib/site-inspector/dns.rb +0 -92
  60. data/lib/site-inspector/headers.rb +0 -59
  61. data/lib/site-inspector/sniffer.rb +0 -26
@@ -1,58 +1,17 @@
1
- class SiteInspectorCache
2
- def initialize
3
- @memory = {}
4
- end
5
-
6
- def get(request)
7
- @memory[request]
8
- end
9
-
10
- def set(request, response)
11
- @memory[request] = response
12
- end
13
- end
14
-
15
- class SiteInspectorDiskCache
16
- def initialize(dir = nil, replace = false)
17
- @dir = dir
18
- @memory = {}
19
- @replace = replace
20
- end
1
+ # frozen_string_literal: true
21
2
 
22
- def path(request)
23
- File.join(@dir, request.cache_key)
24
- end
25
-
26
- def fetch(request)
27
- if File.exist?(path(request))
28
-
29
- if @replace
30
- FileUtils.rm(path(request))
31
- nil
32
- else
33
- contents = File.read(path(request))
34
- begin
35
- Marshal.load(contents)
36
- rescue ArgumentError
37
- FileUtils.rm(path(request))
38
- nil
39
- end
40
- end
3
+ class SiteInspector
4
+ class Cache
5
+ def memory
6
+ @memory ||= {}
41
7
  end
42
- end
43
8
 
44
- def store(request, response)
45
- File.open(File.join(@dir, request.cache_key), "w") do |f|
46
- f.write Marshal.dump(response)
9
+ def get(request)
10
+ memory[request]
47
11
  end
48
- end
49
-
50
- def get(request)
51
- @memory[request] || fetch(request)
52
- end
53
12
 
54
- def set(request, response)
55
- store(request, response)
56
- @memory[request] = response
13
+ def set(request, response)
14
+ memory[request] = response
15
+ end
57
16
  end
58
17
  end
@@ -0,0 +1,135 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+ require 'open3'
5
+
6
+ class SiteInspector
7
+ class Endpoint
8
+ class Accessibility < Check
9
+ class Pa11yError < RuntimeError; end
10
+
11
+ STANDARDS = {
12
+ wcag2a: 'WCAG2A', # Default standard
13
+ wcag2aa: 'WCAG2AA',
14
+ wcag2aaa: 'WCAG2AAA',
15
+ section508: 'Section508'
16
+ }.freeze
17
+
18
+ DEFAULT_LEVEL = :error
19
+
20
+ REQUIRED_PA11Y_VERSION = '~> 5.0'
21
+
22
+ class << self
23
+ def pa11y_version
24
+ @pa11y_version ||= begin
25
+ output, status = run_command('--version')
26
+ output.strip if status.exitstatus.zero?
27
+ end
28
+ end
29
+
30
+ def pa11y?
31
+ return @pa11y_detected if defined? @pa11y_detected
32
+
33
+ @pa11y_detected = !!pa11y.detect
34
+ end
35
+
36
+ def enabled?
37
+ @@enabled && pa11y?
38
+ end
39
+
40
+ def pa11y
41
+ @pa11y ||= begin
42
+ node_bin = File.expand_path('../../../node_modules/pa11y/bin', File.dirname(__FILE__))
43
+ path = ['*', node_bin].join(File::PATH_SEPARATOR)
44
+ Cliver::Dependency.new('pa11y.js', REQUIRED_PA11Y_VERSION, path: path)
45
+ end
46
+ end
47
+
48
+ def run_command(args)
49
+ Open3.capture2e(pa11y.detect, *args)
50
+ end
51
+ end
52
+
53
+ def level
54
+ @level ||= DEFAULT_LEVEL
55
+ end
56
+
57
+ def level=(level)
58
+ raise ArgumentError, "Invalid level '#{level}'" unless %i[error warning notice].include?(level)
59
+
60
+ @level = level
61
+ end
62
+
63
+ def standard?(standard)
64
+ STANDARDS.key?(standard)
65
+ end
66
+
67
+ def standard
68
+ @standard ||= STANDARDS.keys.first
69
+ end
70
+
71
+ def standard=(standard)
72
+ raise ArgumentError, "Unknown standard '#{standard}'" unless standard?(standard)
73
+
74
+ @standard = standard
75
+ end
76
+
77
+ def valid?
78
+ check[:valid] if check
79
+ end
80
+
81
+ def errors
82
+ check[:results].count { |r| r['type'] == 'error' } if check
83
+ end
84
+
85
+ def check
86
+ @check ||= run_pa11y(standard)
87
+ rescue Pa11yError
88
+ nil
89
+ end
90
+ alias to_h check
91
+
92
+ def method_missing(method_sym, *arguments, &block)
93
+ if standard?(method_sym)
94
+ run_pa11y(method_sym)
95
+ else
96
+ super
97
+ end
98
+ end
99
+
100
+ def respond_to_missing?(method_sym, include_private = false)
101
+ if standard?(method_sym)
102
+ true
103
+ else
104
+ super
105
+ end
106
+ end
107
+
108
+ private
109
+
110
+ def run_pa11y(standard)
111
+ self.class.pa11y.detect! unless ENV['SKIP_PA11Y_CHECK']
112
+ raise ArgumentError, "Unknown standard '#{standard}'" unless standard?(standard)
113
+
114
+ args = [
115
+ '--standard', STANDARDS[standard],
116
+ '--reporter', 'json',
117
+ '--level', level.to_s,
118
+ endpoint.uri.to_s
119
+ ]
120
+ output, status = self.class.run_command(args)
121
+
122
+ # Pa11y exit codes: https://github.com/nature/pa11y#exit-codes
123
+ # 0: No errors, 1: Technical error within pa11y, 2: accessibility error (configurable via --level)
124
+ raise Pa11yError if status.exitstatus == 1
125
+
126
+ {
127
+ valid: status.exitstatus.zero?,
128
+ results: JSON.parse(output)
129
+ }
130
+ rescue Pa11yError, JSON::ParserError
131
+ raise Pa11yError, "Command `pa11y #{args.join(' ')}` failed: #{output}"
132
+ end
133
+ end
134
+ end
135
+ end
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ class SiteInspector
4
+ class Endpoint
5
+ class Check
6
+ attr_reader :endpoint
7
+
8
+ # A check is an abstract class that takes an Endpoint object
9
+ # and is extended to preform the specific site inspector checks
10
+ #
11
+ # It is automatically accessable within the endpoint object
12
+ # by virtue of extending the Check class
13
+ def initialize(endpoint)
14
+ @endpoint = endpoint
15
+ end
16
+
17
+ def response
18
+ endpoint.response
19
+ end
20
+
21
+ def request
22
+ response.request
23
+ end
24
+
25
+ def host
26
+ request.base_url.host
27
+ end
28
+
29
+ def inspect
30
+ "#<#{self.class} endpoint=\"#{response.effective_url}\">"
31
+ end
32
+
33
+ def name
34
+ self.class.name
35
+ end
36
+
37
+ class << self
38
+ @@enabled = true
39
+
40
+ def name
41
+ to_s.split('::').last.downcase.to_sym
42
+ end
43
+
44
+ def enabled?
45
+ !!@@enabled
46
+ end
47
+
48
+ def enabled=(value)
49
+ @@enabled = !!value
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,85 @@
1
+ # frozen_string_literal: true
2
+
3
+ class SiteInspector
4
+ class Endpoint
5
+ class Content < Check
6
+ # Given a path (e.g, "/data"), check if the given path exists on the canonical endpoint
7
+ def path_exists?(path)
8
+ endpoint.up? && endpoint.request(path: path, followlocation: true).success?
9
+ end
10
+
11
+ # The default Check#response method is from a HEAD request
12
+ # The content check has a special response which includes the body from a GET request
13
+ def response
14
+ @response ||= endpoint.request(method: :get)
15
+ end
16
+
17
+ def document
18
+ require 'nokogiri'
19
+ @doc ||= Nokogiri::HTML response.body if response
20
+ end
21
+ alias doc document
22
+
23
+ def body
24
+ @body ||= document.to_s.force_encoding('UTF-8').encode('UTF-8', invalid: :replace, replace: '')
25
+ end
26
+
27
+ def robots_txt?
28
+ @bodts_txt ||= path_exists?('robots.txt') if proper_404s?
29
+ end
30
+
31
+ def sitemap_xml?
32
+ @sitemap_xml ||= path_exists?('sitemap.xml') if proper_404s?
33
+ end
34
+
35
+ def humans_txt?
36
+ @humans_txt ||= path_exists?('humans.txt') if proper_404s?
37
+ end
38
+
39
+ def doctype
40
+ document.internal_subset.external_id
41
+ end
42
+
43
+ def generator
44
+ @generator ||= begin
45
+ tag = document.at('meta[name="generator"]')
46
+ tag['content'] if tag
47
+ end
48
+ end
49
+
50
+ def prefetch
51
+ return unless endpoint.up?
52
+
53
+ options = SiteInspector.typhoeus_defaults.merge(followlocation: true)
54
+ ['robots.txt', 'sitemap.xml', 'humans.txt', random_path].each do |path|
55
+ request = Typhoeus::Request.new(URI.join(endpoint.uri, path), options)
56
+ SiteInspector.hydra.queue(request)
57
+ end
58
+ SiteInspector.hydra.run
59
+ end
60
+
61
+ def proper_404s?
62
+ @proper_404s ||= !path_exists?(random_path)
63
+ end
64
+
65
+ def to_h
66
+ prefetch
67
+ {
68
+ doctype: doctype,
69
+ generator: generator,
70
+ sitemap_xml: sitemap_xml?,
71
+ robots_txt: robots_txt?,
72
+ humans_txt: humans_txt?,
73
+ proper_404s: proper_404s?
74
+ }
75
+ end
76
+
77
+ private
78
+
79
+ def random_path
80
+ require 'securerandom'
81
+ @random_path ||= SecureRandom.hex
82
+ end
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ class SiteInspector
4
+ class Endpoint
5
+ class Cookies < Check
6
+ def any?(&block)
7
+ if cookie_header.nil? || cookie_header.empty?
8
+ false
9
+ elsif block_given?
10
+ all.any?(&block)
11
+ else
12
+ true
13
+ end
14
+ end
15
+ alias cookies? any?
16
+
17
+ def all
18
+ @cookies ||= cookie_header.map { |c| CGI::Cookie.parse(c) } if cookies?
19
+ end
20
+
21
+ def [](key)
22
+ all.find { |cookie| cookie.keys.first == key } if cookies?
23
+ end
24
+
25
+ def secure?
26
+ pairs = cookie_header.join('; ').split('; ') # CGI::Cookies#Parse doesn't seem to like secure headers
27
+ pairs.any? { |c| c.casecmp('secure').zero? } && pairs.any? { |c| c.casecmp('httponly').zero? }
28
+ end
29
+
30
+ def to_h
31
+ {
32
+ cookie?: any?,
33
+ secure?: secure?
34
+ }
35
+ end
36
+
37
+ private
38
+
39
+ def cookie_header
40
+ # Cookie header may be an array or string, always return an array
41
+ [endpoint.headers.all['set-cookie']].flatten.compact
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,138 @@
1
+ # frozen_string_literal: true
2
+
3
+ class SiteInspector
4
+ class Endpoint
5
+ class Dns < Check
6
+ class LocalhostError < StandardError; end
7
+
8
+ def self.resolver
9
+ require 'dnsruby'
10
+ @resolver ||= begin
11
+ resolver = Dnsruby::Resolver.new
12
+ resolver.config.nameserver = ['8.8.8.8', '8.8.4.4']
13
+ resolver
14
+ end
15
+ end
16
+
17
+ def query(type = 'ANY')
18
+ SiteInspector::Endpoint::Dns.resolver.query(host.to_s, type).answer
19
+ rescue Dnsruby::ResolvTimeout, Dnsruby::ServFail, Dnsruby::NXDomain
20
+ []
21
+ end
22
+
23
+ def records
24
+ @records ||= query
25
+ end
26
+
27
+ def record?(type)
28
+ records.any? { |record| record.type == type } || query(type).count != 0
29
+ end
30
+ alias has_record? record?
31
+
32
+ def dnssec?
33
+ @dnssec ||= has_record? 'DNSKEY'
34
+ end
35
+
36
+ def ipv6?
37
+ @ipv6 ||= has_record? 'AAAA'
38
+ end
39
+
40
+ def cdn
41
+ detect_by_hostname 'cdn'
42
+ end
43
+
44
+ def cdn?
45
+ !!cdn
46
+ end
47
+
48
+ def cloud_provider
49
+ detect_by_hostname 'cloud'
50
+ end
51
+
52
+ def cloud?
53
+ !!cloud_provider
54
+ end
55
+
56
+ def google_apps?
57
+ @google_apps ||= records.any? do |record|
58
+ record.type == 'MX' && record.exchange.to_s =~ /google(mail)?\.com\.?\z/i
59
+ end
60
+ end
61
+
62
+ def localhost?
63
+ ip == '127.0.0.1'
64
+ end
65
+
66
+ def ip
67
+ @ip ||= Resolv.getaddress host
68
+ rescue Resolv::ResolvError
69
+ nil
70
+ end
71
+
72
+ def hostname
73
+ require 'resolv'
74
+ @hostname ||= PublicSuffix.parse(Resolv.getname(ip))
75
+ rescue Resolv::ResolvError, PublicSuffix::DomainInvalid
76
+ nil
77
+ end
78
+
79
+ def cnames
80
+ @cnames ||= records.select { |record| record.type == 'CNAME' }.map do |record|
81
+ PublicSuffix.parse(record.cname.to_s)
82
+ end
83
+ end
84
+
85
+ def inspect
86
+ "#<SiteInspector::Domain::Dns host=\"#{host}\">"
87
+ end
88
+
89
+ def to_h
90
+ return { error: LocalhostError } if localhost?
91
+
92
+ {
93
+ dnssec: dnssec?,
94
+ ipv6: ipv6?,
95
+ cdn: cdn,
96
+ cloud_provider: cloud_provider,
97
+ google_apps: google_apps?,
98
+ hostname: hostname,
99
+ ip: ip
100
+ }
101
+ end
102
+
103
+ private
104
+
105
+ def data
106
+ @data ||= {}
107
+ end
108
+
109
+ def data_path(name)
110
+ File.expand_path "../../data/#{name}.yml", File.dirname(__FILE__)
111
+ end
112
+
113
+ def load_data(name)
114
+ require 'yaml'
115
+ path = data_path(name)
116
+ data[name] ||= YAML.load_file(path)
117
+ end
118
+
119
+ def detect_by_hostname(type)
120
+ haystack = load_data(type)
121
+ needle = haystack.find do |_name, domain|
122
+ cnames.any? do |cname|
123
+ [cname.tld, "#{cname.sld}.#{cname.tld}"].include? domain
124
+ end
125
+ end
126
+
127
+ return needle[0].to_sym if needle
128
+ return nil unless hostname
129
+
130
+ needle = haystack.find do |_name, domain|
131
+ [hostname.tld, "#{hostname.sld}.#{hostname.tld}"].include? domain
132
+ end
133
+
134
+ needle ? needle[0].to_sym : nil
135
+ end
136
+ end
137
+ end
138
+ end