site-inspector 1.0.2 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +7 -0
  3. data/.ruby-version +1 -0
  4. data/.travis.yml +6 -0
  5. data/Gemfile +3 -0
  6. data/Guardfile +8 -0
  7. data/README.md +175 -0
  8. data/Rakefile +8 -0
  9. data/bin/site-inspector +48 -21
  10. data/lib/site-inspector.rb +38 -613
  11. data/lib/site-inspector/cache.rb +9 -52
  12. data/lib/site-inspector/checks/check.rb +41 -0
  13. data/lib/site-inspector/checks/content.rb +67 -0
  14. data/lib/site-inspector/checks/dns.rb +129 -0
  15. data/lib/site-inspector/checks/headers.rb +83 -0
  16. data/lib/site-inspector/checks/hsts.rb +78 -0
  17. data/lib/site-inspector/checks/https.rb +40 -0
  18. data/lib/site-inspector/checks/sniffer.rb +42 -0
  19. data/lib/site-inspector/disk_cache.rb +38 -0
  20. data/lib/site-inspector/domain.rb +248 -0
  21. data/lib/site-inspector/endpoint.rb +200 -0
  22. data/lib/site-inspector/rails_cache.rb +11 -0
  23. data/lib/site-inspector/version.rb +3 -0
  24. data/script/bootstrap +1 -0
  25. data/script/cibuild +7 -0
  26. data/script/console +1 -0
  27. data/script/release +38 -0
  28. data/site-inspector.gemspec +33 -0
  29. data/spec/checks/site_inspector_endpoint_check_spec.rb +34 -0
  30. data/spec/checks/site_inspector_endpoint_content_spec.rb +89 -0
  31. data/spec/checks/site_inspector_endpoint_dns_spec.rb +167 -0
  32. data/spec/checks/site_inspector_endpoint_headers_spec.rb +74 -0
  33. data/spec/checks/site_inspector_endpoint_hsts_spec.rb +91 -0
  34. data/spec/checks/site_inspector_endpoint_https_spec.rb +48 -0
  35. data/spec/checks/site_inspector_endpoint_sniffer_spec.rb +52 -0
  36. data/spec/site_inspector_cache_spec.rb +13 -0
  37. data/spec/site_inspector_disc_cache_spec.rb +31 -0
  38. data/spec/site_inspector_domain_spec.rb +252 -0
  39. data/spec/site_inspector_endpoint_spec.rb +224 -0
  40. data/spec/site_inspector_spec.rb +46 -0
  41. data/spec/spec_helper.rb +17 -0
  42. metadata +75 -57
  43. data/lib/site-inspector/compliance.rb +0 -19
  44. data/lib/site-inspector/dns.rb +0 -92
  45. data/lib/site-inspector/headers.rb +0 -59
  46. data/lib/site-inspector/sniffer.rb +0 -26
@@ -1,58 +1,15 @@
1
- class SiteInspectorCache
2
- def initialize
3
- @memory = {}
4
- end
5
-
6
- def get(request)
7
- @memory[request]
8
- end
9
-
10
- def set(request, response)
11
- @memory[request] = response
12
- end
13
- end
14
-
15
- class SiteInspectorDiskCache
16
- def initialize(dir = nil, replace = false)
17
- @dir = dir
18
- @memory = {}
19
- @replace = replace
20
- end
21
-
22
- def path(request)
23
- File.join(@dir, request.cache_key)
24
- end
25
-
26
- def fetch(request)
27
- if File.exist?(path(request))
28
-
29
- if @replace
30
- FileUtils.rm(path(request))
31
- nil
32
- else
33
- contents = File.read(path(request))
34
- begin
35
- Marshal.load(contents)
36
- rescue ArgumentError
37
- FileUtils.rm(path(request))
38
- nil
39
- end
40
- end
1
+ class SiteInspector
2
+ class Cache
3
+ def memory
4
+ @memory ||= {}
41
5
  end
42
- end
43
6
 
44
- def store(request, response)
45
- File.open(File.join(@dir, request.cache_key), "w") do |f|
46
- f.write Marshal.dump(response)
7
+ def get(request)
8
+ memory[request]
47
9
  end
48
- end
49
-
50
- def get(request)
51
- @memory[request] || fetch(request)
52
- end
53
10
 
54
- def set(request, response)
55
- store(request, response)
56
- @memory[request] = response
11
+ def set(request, response)
12
+ memory[request] = response
13
+ end
57
14
  end
58
15
  end
@@ -0,0 +1,41 @@
1
+ class SiteInspector
2
+ class Endpoint
3
+ class Check
4
+
5
+ attr_reader :endpoint
6
+
7
+ # A check is an abstract class that takes an Endpoint object
8
+ # and is extended to preform the specific site inspector checks
9
+ #
10
+ # It is automatically accessable within the endpoint object
11
+ # by virtue of extending the Check class
12
+ def initialize(endpoint)
13
+ @endpoint = endpoint
14
+ end
15
+
16
+ def response
17
+ endpoint.response
18
+ end
19
+
20
+ def request
21
+ response.request
22
+ end
23
+
24
+ def host
25
+ request.base_url.host
26
+ end
27
+
28
+ def inspect
29
+ "#<#{self.class} endpoint=\"#{response.effective_url}\">"
30
+ end
31
+
32
+ def name
33
+ self.class.name
34
+ end
35
+
36
+ def self.name
37
+ self.to_s.split('::').last.downcase.to_sym
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,67 @@
1
+ class SiteInspector
2
+ class Endpoint
3
+ class Content < Check
4
+ # Given a path (e.g, "/data"), check if the given path exists on the canonical endpoint
5
+ def path_exists?(path)
6
+ endpoint.request(path: path, followlocation: true).success?
7
+ end
8
+
9
+ def document
10
+ require 'nokogiri'
11
+ @doc ||= Nokogiri::HTML response.body if response
12
+ end
13
+ alias_method :doc, :document
14
+
15
+ def body
16
+ @body ||= document.to_s.force_encoding("UTF-8").encode("UTF-8", :invalid => :replace, :replace => "")
17
+ end
18
+
19
+ def robots_txt?
20
+ @bodts_txt ||= path_exists?("robots.txt")
21
+ end
22
+
23
+ def sitemap_xml?
24
+ @sitemap_xml ||= path_exists?("sitemap.xml")
25
+ end
26
+
27
+ def humans_txt?
28
+ @humans_txt ||= path_exists?("humans.txt")
29
+ end
30
+
31
+ def doctype
32
+ document.internal_subset.name
33
+ end
34
+
35
+ def prefetch
36
+ options = SiteInspector.typhoeus_defaults.merge(followlocation: true)
37
+ ["robots.txt", "sitemap.xml", "humans.txt", random_path].each do |path|
38
+ request = Typhoeus::Request.new(URI.join(endpoint.uri, path), options)
39
+ SiteInspector.hydra.queue(request)
40
+ end
41
+ SiteInspector.hydra.run
42
+ end
43
+
44
+ def proper_404s?
45
+ @proper_404s ||= !path_exists?(random_path)
46
+ end
47
+
48
+ def to_h
49
+ prefetch
50
+ {
51
+ doctype: doctype,
52
+ sitemap_xml: sitemap_xml?,
53
+ robots_txt: robots_txt?,
54
+ humans_txt: humans_txt?,
55
+ proper_404s: proper_404s?
56
+ }
57
+ end
58
+
59
+ private
60
+
61
+ def random_path
62
+ require 'securerandom'
63
+ @random_path ||= SecureRandom.hex
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,129 @@
1
+ class SiteInspector
2
+ class Endpoint
3
+ class Dns < Check
4
+
5
+ def self.resolver
6
+ require "dnsruby"
7
+ @resolver ||= begin
8
+ resolver = Dnsruby::Resolver.new
9
+ resolver.config.nameserver = ["8.8.8.8", "8.8.4.4"]
10
+ resolver
11
+ end
12
+ end
13
+
14
+ def query(type="ANY")
15
+ SiteInspector::Endpoint::Dns.resolver.query(host.to_s, type).answer
16
+ rescue Dnsruby::ResolvTimeout, Dnsruby::ServFail, Dnsruby::NXDomain
17
+ []
18
+ end
19
+
20
+ def records
21
+ @records ||= query
22
+ end
23
+
24
+ def has_record?(type)
25
+ records.any? { |record| record.type == type } || query(type).count != 0
26
+ end
27
+
28
+ def dnssec?
29
+ @dnssec ||= has_record? "DNSKEY"
30
+ end
31
+
32
+ def ipv6?
33
+ @ipv6 ||= has_record? "AAAA"
34
+ end
35
+
36
+ def cdn
37
+ detect_by_hostname "cdn"
38
+ end
39
+
40
+ def cdn?
41
+ !!cdn
42
+ end
43
+
44
+ def cloud_provider
45
+ detect_by_hostname "cloud"
46
+ end
47
+
48
+ def cloud?
49
+ !!cloud_provider
50
+ end
51
+
52
+ def google_apps?
53
+ @google ||= records.any? do |record|
54
+ record.type == "MX" && record.exchange.to_s =~ /google(mail)?\.com\.?$/
55
+ end
56
+ end
57
+
58
+ def ip
59
+ require 'resolv'
60
+ @ip ||= Resolv.getaddress host
61
+ rescue Resolv::ResolvError
62
+ nil
63
+ end
64
+
65
+ def hostname
66
+ require 'resolv'
67
+ @hostname ||= PublicSuffix.parse(Resolv.getname(ip))
68
+ rescue Resolv::ResolvError, PublicSuffix::DomainInvalid
69
+ nil
70
+ end
71
+
72
+ def cnames
73
+ @cnames ||= records.select { |record| record.type == "CNAME" }.map do |record|
74
+ PublicSuffix.parse(record.cname.to_s)
75
+ end
76
+ end
77
+
78
+ def inspect
79
+ "#<SiteInspector::Domain::Dns host=\"#{host}\">"
80
+ end
81
+
82
+ def to_h
83
+ {
84
+ :dnssec => dnssec?,
85
+ :ipv6 => ipv6?,
86
+ :cdn => cdn,
87
+ :cloud_provider => cloud_provider,
88
+ :google_apps => google_apps?,
89
+ :hostname => hostname,
90
+ :ip => ip
91
+ }
92
+ end
93
+
94
+ private
95
+
96
+ def data
97
+ @data ||= {}
98
+ end
99
+
100
+ def data_path(name)
101
+ File.expand_path "../../data/#{name}.yml", File.dirname(__FILE__)
102
+ end
103
+
104
+ def load_data(name)
105
+ require 'yaml'
106
+ path = data_path(name)
107
+ data[name] ||= YAML.load_file(path)
108
+ end
109
+
110
+ def detect_by_hostname(type)
111
+ haystack = load_data(type)
112
+ needle = haystack.find do |name, domain|
113
+ cnames.any? do |cname|
114
+ domain == cname.tld || domain == "#{cname.sld}.#{cname.tld}"
115
+ end
116
+ end
117
+
118
+ return needle[0].to_sym if needle
119
+ return nil unless hostname
120
+
121
+ needle = haystack.find do |name, domain|
122
+ domain == hostname.tld || domain == "#{hostname.sld}.#{hostname.tld}"
123
+ end
124
+
125
+ needle ? needle[0].to_sym : nil
126
+ end
127
+ end
128
+ end
129
+ end
@@ -0,0 +1,83 @@
1
+ class SiteInspector
2
+ class Endpoint
3
+ class Headers < Check
4
+
5
+ # cookies can have multiple set-cookie headers, so this detects
6
+ # whether cookies are set, but not all their values.
7
+ def cookies?
8
+ !!headers["set-cookie"]
9
+ end
10
+
11
+ # TODO: kill this
12
+ def strict_transport_security?
13
+ !!strict_transport_security
14
+ end
15
+
16
+ def content_security_policy?
17
+ !!content_security_policy
18
+ end
19
+
20
+ def click_jacking_protection?
21
+ !!click_jacking_protection
22
+ end
23
+
24
+ # return the found header value
25
+
26
+ # TODO: kill this
27
+ def strict_transport_security
28
+ headers["strict-transport-security"]
29
+ end
30
+
31
+ def content_security_policy
32
+ headers["content-security-policy"]
33
+ end
34
+
35
+ def click_jacking_protection
36
+ headers["x-frame-options"]
37
+ end
38
+
39
+ def server
40
+ headers["server"]
41
+ end
42
+
43
+ def xss_protection
44
+ headers["x-xss-protection"]
45
+ end
46
+
47
+ # more specific checks than presence of headers
48
+ def xss_protection?
49
+ xss_protection == "1; mode=block"
50
+ end
51
+
52
+ def secure_cookies?
53
+ return false if !cookies?
54
+ cookie = headers["set-cookie"]
55
+ cookie = cookie.first if cookie.is_a?(Array)
56
+ !!(cookie =~ /(; secure.*; httponly|; httponly.*; secure)/i)
57
+ end
58
+
59
+ # Returns an array of hashes of downcased key/value header pairs (or an empty hash)
60
+ def all
61
+ @all ||= (response && response.headers) ? Hash[response.headers.map{ |k,v| [k.downcase,v] }] : {}
62
+ end
63
+ alias_method :headers, :all
64
+
65
+ def [](header)
66
+ headers[header]
67
+ end
68
+
69
+ def to_h
70
+ {
71
+ :cookies => cookies?,
72
+ :strict_transport_security => strict_transport_security || false,
73
+ :content_security_policy => content_security_policy || false,
74
+ :click_jacking_protection => click_jacking_protection || false,
75
+ :click_jacking_protection => click_jacking_protection || false,
76
+ :server => server,
77
+ :xss_protection => xss_protection || false,
78
+ :secure_cookies => secure_cookies?
79
+ }
80
+ end
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,78 @@
1
+ class SiteInspector
2
+ class Endpoint
3
+ # Utility parser for HSTS headers.
4
+ # RFC: http://tools.ietf.org/html/rfc6797
5
+ class Hsts < Check
6
+
7
+ def valid?
8
+ return false unless header
9
+ pairs.none? { |key, value| "#{key}#{value}" =~ /[\s\'\"]/ }
10
+ end
11
+
12
+ def max_age
13
+ pairs[:"max-age"].to_i
14
+ end
15
+
16
+ def include_subdomains?
17
+ pairs.keys.include? :includesubdomains
18
+ end
19
+
20
+ def preload?
21
+ pairs.keys.include? :preload
22
+ end
23
+
24
+ def enabled?
25
+ return false unless max_age
26
+ max_age > 0
27
+ end
28
+
29
+ # Google's minimum max-age for automatic preloading
30
+ def preload_ready?
31
+ include_subdomains? and preload? and max_age >= 10886400
32
+ end
33
+
34
+ def to_h
35
+ {
36
+ valid: valid?,
37
+ max_age: max_age,
38
+ include_subdomains: include_subdomains?,
39
+ preload: preload?,
40
+ enabled: enabled?,
41
+ preload_ready: preload_ready?
42
+ }
43
+ end
44
+
45
+ private
46
+
47
+ def headers
48
+ endpoint.headers
49
+ end
50
+
51
+ def header
52
+ @header ||= headers["strict-transport-security"]
53
+ end
54
+
55
+ def directives
56
+ @directives ||= header ? header.split(/\s*;\s*/) : []
57
+ end
58
+
59
+ def pairs
60
+ @pairs ||= begin
61
+ pairs = {}
62
+ directives.each do |directive|
63
+ key, value = directive.downcase.split("=")
64
+
65
+ if value =~ /\".*\"/
66
+ value = value.sub(/^\"/, '')
67
+ value = value.sub(/\"$/, '')
68
+ end
69
+
70
+ pairs[key.to_sym] = value
71
+ end
72
+
73
+ pairs
74
+ end
75
+ end
76
+ end
77
+ end
78
+ end