site-inspector 1.0.2 → 3.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. checksums.yaml +5 -5
  2. data/.gitignore +8 -0
  3. data/.rubocop.yml +42 -0
  4. data/.rubocop_todo.yml +139 -0
  5. data/.ruby-version +1 -0
  6. data/.travis.yml +9 -0
  7. data/Gemfile +7 -0
  8. data/Guardfile +10 -0
  9. data/README.md +189 -0
  10. data/Rakefile +10 -0
  11. data/bin/site-inspector +50 -22
  12. data/lib/cliver/dependency_ext.rb +24 -0
  13. data/lib/site-inspector.rb +62 -615
  14. data/lib/site-inspector/cache.rb +10 -51
  15. data/lib/site-inspector/checks/accessibility.rb +135 -0
  16. data/lib/site-inspector/checks/check.rb +54 -0
  17. data/lib/site-inspector/checks/content.rb +85 -0
  18. data/lib/site-inspector/checks/cookies.rb +45 -0
  19. data/lib/site-inspector/checks/dns.rb +138 -0
  20. data/lib/site-inspector/checks/headers.rb +68 -0
  21. data/lib/site-inspector/checks/hsts.rb +81 -0
  22. data/lib/site-inspector/checks/https.rb +40 -0
  23. data/lib/site-inspector/checks/sniffer.rb +67 -0
  24. data/lib/site-inspector/checks/wappalyzer.rb +62 -0
  25. data/lib/site-inspector/checks/whois.rb +36 -0
  26. data/lib/site-inspector/disk_cache.rb +42 -0
  27. data/lib/site-inspector/domain.rb +271 -0
  28. data/lib/site-inspector/endpoint.rb +217 -0
  29. data/lib/site-inspector/rails_cache.rb +13 -0
  30. data/lib/site-inspector/version.rb +5 -0
  31. data/package-lock.json +505 -0
  32. data/package.json +23 -0
  33. data/script/bootstrap +2 -0
  34. data/script/cibuild +11 -0
  35. data/script/console +3 -0
  36. data/script/pa11y-version +10 -0
  37. data/script/release +38 -0
  38. data/site-inspector.gemspec +42 -0
  39. data/spec/checks/site_inspector_endpoint_accessibility_spec.rb +84 -0
  40. data/spec/checks/site_inspector_endpoint_check_spec.rb +42 -0
  41. data/spec/checks/site_inspector_endpoint_content_spec.rb +117 -0
  42. data/spec/checks/site_inspector_endpoint_cookies_spec.rb +73 -0
  43. data/spec/checks/site_inspector_endpoint_dns_spec.rb +184 -0
  44. data/spec/checks/site_inspector_endpoint_headers_spec.rb +65 -0
  45. data/spec/checks/site_inspector_endpoint_hsts_spec.rb +92 -0
  46. data/spec/checks/site_inspector_endpoint_https_spec.rb +49 -0
  47. data/spec/checks/site_inspector_endpoint_sniffer_spec.rb +150 -0
  48. data/spec/checks/site_inspector_endpoint_wappalyzer_spec.rb +34 -0
  49. data/spec/checks/site_inspector_endpoint_whois_spec.rb +26 -0
  50. data/spec/fixtures/wappalyzer.json +125 -0
  51. data/spec/site_inspector_cache_spec.rb +15 -0
  52. data/spec/site_inspector_disk_cache_spec.rb +39 -0
  53. data/spec/site_inspector_domain_spec.rb +271 -0
  54. data/spec/site_inspector_endpoint_spec.rb +252 -0
  55. data/spec/site_inspector_spec.rb +48 -0
  56. data/spec/spec_helper.rb +19 -0
  57. metadata +204 -63
  58. data/lib/site-inspector/compliance.rb +0 -19
  59. data/lib/site-inspector/dns.rb +0 -92
  60. data/lib/site-inspector/headers.rb +0 -59
  61. data/lib/site-inspector/sniffer.rb +0 -26
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+
3
+ class SiteInspector
4
+ class Endpoint
5
+ class Headers < Check
6
+ # TODO: kill this
7
+ def strict_transport_security?
8
+ !!strict_transport_security
9
+ end
10
+
11
+ def content_security_policy?
12
+ !!content_security_policy
13
+ end
14
+
15
+ def click_jacking_protection?
16
+ !!click_jacking_protection
17
+ end
18
+
19
+ # return the found header value
20
+
21
+ # TODO: kill this
22
+ def strict_transport_security
23
+ headers['strict-transport-security']
24
+ end
25
+
26
+ def content_security_policy
27
+ headers['content-security-policy']
28
+ end
29
+
30
+ def click_jacking_protection
31
+ headers['x-frame-options']
32
+ end
33
+
34
+ def server
35
+ headers['server']
36
+ end
37
+
38
+ def xss_protection
39
+ headers['x-xss-protection']
40
+ end
41
+
42
+ # more specific checks than presence of headers
43
+ def xss_protection?
44
+ xss_protection == '1; mode=block'
45
+ end
46
+
47
+ # Returns an array of hashes of downcased key/value header pairs (or an empty hash)
48
+ def all
49
+ @all ||= response&.headers ? response.headers.transform_keys(&:downcase) : {}
50
+ end
51
+ alias headers all
52
+
53
+ def [](header)
54
+ headers[header]
55
+ end
56
+
57
+ def to_h
58
+ {
59
+ strict_transport_security: strict_transport_security || false,
60
+ content_security_policy: content_security_policy || false,
61
+ click_jacking_protection: click_jacking_protection || false,
62
+ server: server,
63
+ xss_protection: xss_protection || false
64
+ }
65
+ end
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,81 @@
1
+ # frozen_string_literal: true
2
+
3
+ class SiteInspector
4
+ class Endpoint
5
+ # Utility parser for HSTS headers.
6
+ # RFC: http://tools.ietf.org/html/rfc6797
7
+ class Hsts < Check
8
+ def valid?
9
+ return false unless header
10
+
11
+ pairs.none? { |key, value| "#{key}#{value}" =~ /[\s'"]/ }
12
+ end
13
+
14
+ def max_age
15
+ pairs[:"max-age"].to_i
16
+ end
17
+
18
+ def include_subdomains?
19
+ pairs.key?(:includesubdomains)
20
+ end
21
+
22
+ def preload?
23
+ pairs.key?(:preload)
24
+ end
25
+
26
+ def enabled?
27
+ return false unless max_age
28
+
29
+ max_age.positive?
30
+ end
31
+
32
+ # Google's minimum max-age for automatic preloading
33
+ def preload_ready?
34
+ include_subdomains? && preload? && max_age >= 10_886_400
35
+ end
36
+
37
+ def to_h
38
+ {
39
+ valid: valid?,
40
+ max_age: max_age,
41
+ include_subdomains: include_subdomains?,
42
+ preload: preload?,
43
+ enabled: enabled?,
44
+ preload_ready: preload_ready?
45
+ }
46
+ end
47
+
48
+ private
49
+
50
+ def headers
51
+ endpoint.headers
52
+ end
53
+
54
+ def header
55
+ @header ||= headers['strict-transport-security']
56
+ end
57
+
58
+ def directives
59
+ @directives ||= header ? header.split(/\s*;\s*/) : []
60
+ end
61
+
62
+ def pairs
63
+ @pairs ||= begin
64
+ pairs = {}
65
+ directives.each do |directive|
66
+ key, value = directive.downcase.split('=')
67
+
68
+ if /".*"/.match?(value)
69
+ value = value.sub(/^"/, '')
70
+ value = value.sub(/"$/, '')
71
+ end
72
+
73
+ pairs[key.to_sym] = value
74
+ end
75
+
76
+ pairs
77
+ end
78
+ end
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ class SiteInspector
4
+ class Endpoint
5
+ class Https < Check
6
+ def scheme?
7
+ scheme == 'https'
8
+ end
9
+
10
+ def valid?
11
+ scheme? && response && response.return_code == :ok
12
+ end
13
+
14
+ def bad_chain?
15
+ scheme? && response && response.return_code == :ssl_cacert
16
+ end
17
+
18
+ def bad_name?
19
+ scheme? && response && response.return_code == :peer_failed_verification
20
+ end
21
+
22
+ def inspect
23
+ "#<SiteInspector::Endpoint::Https valid=#{valid?}>"
24
+ end
25
+
26
+ def to_h
27
+ {
28
+ valid: valid?,
29
+ return_code: response.return_code
30
+ }
31
+ end
32
+
33
+ private
34
+
35
+ def scheme
36
+ @scheme ||= request.base_url.scheme
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,67 @@
1
+ # frozen_string_literal: true
2
+
3
+ class SiteInspector
4
+ class Endpoint
5
+ class Sniffer < Check
6
+ OPEN_SOURCE_FRAMEWORKS = [
7
+ # Sniffles
8
+ :drupal,
9
+ :joomla,
10
+ :movabletype,
11
+ :phpbb,
12
+ :wordpress,
13
+
14
+ # Internal
15
+ :php,
16
+ :expression_engine,
17
+ :cowboy
18
+ ].freeze
19
+
20
+ def framework
21
+ cms = sniff :cms
22
+ return cms unless cms.nil?
23
+ return :expression_engine if endpoint.cookies.any? { |c| c.keys.first =~ /^exp_/ }
24
+ return :php if endpoint.cookies['PHPSESSID']
25
+ return :coldfusion if endpoint.cookies['CFID'] && endpoint.cookies['CFTOKEN']
26
+ return :cowboy if endpoint.headers.server.to_s.casecmp('cowboy').zero?
27
+
28
+ nil
29
+ end
30
+
31
+ def open_source?
32
+ OPEN_SOURCE_FRAMEWORKS.include?(framework)
33
+ end
34
+
35
+ def analytics
36
+ sniff :analytics
37
+ end
38
+
39
+ def javascript
40
+ sniff :javascript
41
+ end
42
+
43
+ def advertising
44
+ sniff :advertising
45
+ end
46
+
47
+ def to_h
48
+ {
49
+ framework: framework,
50
+ analytics: analytics,
51
+ javascript: javascript,
52
+ advertising: advertising
53
+ }
54
+ end
55
+
56
+ private
57
+
58
+ def sniff(type)
59
+ require 'sniffles'
60
+ results = Sniffles.sniff(endpoint.content.body, type).select { |_name, meta| meta[:found] }
61
+ results&.keys&.first
62
+ rescue StandardError
63
+ nil
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ class SiteInspector
4
+ class Endpoint
5
+ class Wappalyzer < Check
6
+ ENDPOINT = 'https://api.wappalyzer.com/lookup/v2/'
7
+
8
+ def to_h
9
+ return {} unless data['technologies']
10
+
11
+ @to_h ||= begin
12
+ technologies = {}
13
+ data['technologies'].each do |t|
14
+ category = t['categories'].first
15
+ category = category ? category['name'] : 'Other'
16
+ technologies[category] ||= []
17
+ technologies[category].push t['name']
18
+ end
19
+
20
+ technologies
21
+ end
22
+ end
23
+
24
+ private
25
+
26
+ def request
27
+ @request ||= begin
28
+ options = SiteInspector.typhoeus_defaults
29
+ headers = options[:headers].merge({ "x-api-key": api_key })
30
+ options = options.merge(method: :get, headers: headers)
31
+ Typhoeus::Request.new(url, options)
32
+ end
33
+ end
34
+
35
+ def data
36
+ return {} unless api_key && api_key != ''
37
+
38
+ @data ||= begin
39
+ SiteInspector.hydra.queue(request)
40
+ SiteInspector.hydra.run
41
+
42
+ response = request.response
43
+ if response.success?
44
+ JSON.parse(response.body).first
45
+ else
46
+ {}
47
+ end
48
+ end
49
+ end
50
+
51
+ def url
52
+ url = Addressable::URI.parse(ENDPOINT)
53
+ url.query_values = { urls: endpoint.uri }
54
+ url
55
+ end
56
+
57
+ def api_key
58
+ @api_key ||= ENV['WAPPALYZER_API_KEY']
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ class SiteInspector
4
+ class Endpoint
5
+ class Whois < Check
6
+ def domain
7
+ @domain ||= whois.lookup host
8
+ end
9
+
10
+ def ip
11
+ @ip ||= whois.lookup ip_address
12
+ end
13
+
14
+ def to_h
15
+ {
16
+ domain: record_to_h(domain),
17
+ ip: record_to_h(ip)
18
+ }
19
+ end
20
+
21
+ private
22
+
23
+ def record_to_h(record)
24
+ record.content.scan(/^\s*(.*?):\s*(.*?)\r?\n/).to_h
25
+ end
26
+
27
+ def ip_address
28
+ @ip_address ||= Resolv.getaddress host
29
+ end
30
+
31
+ def whois
32
+ @whois ||= ::Whois::Client.new
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ class SiteInspector
4
+ class DiskCache
5
+ def initialize(dir = nil, replace = nil)
6
+ @dir = dir || ENV['CACHE']
7
+ @replace = replace || ENV['CACHE_REPLACE']
8
+ @memory = {}
9
+ end
10
+
11
+ def get(request)
12
+ return unless File.exist?(path(request))
13
+ return @memory[request] if @memory[request]
14
+
15
+ if @replace
16
+ FileUtils.rm(path(request))
17
+ nil
18
+ else
19
+ begin
20
+ contents = File.read(path(request))
21
+ Marshal.load(contents)
22
+ rescue ArgumentError
23
+ FileUtils.rm(path(request))
24
+ nil
25
+ end
26
+ end
27
+ end
28
+
29
+ def set(request, response)
30
+ File.write(path(request), Marshal.dump(response))
31
+ @memory[request] = response
32
+ end
33
+
34
+ private
35
+
36
+ # The `request` is a Typhoeus::Request, which provides a
37
+ # unique `cache_key` string for exactly this sort of thing.
38
+ def path(request)
39
+ File.join(@dir, request.cache_key)
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,271 @@
1
+ # frozen_string_literal: true
2
+
3
+ class SiteInspector
4
+ class Domain
5
+ attr_reader :host
6
+
7
+ def initialize(host)
8
+ host = host.downcase
9
+ host = host.sub(/^https?:/, '')
10
+ host = host.sub(%r{^/+}, '')
11
+ host = host.sub(/^www\./, '')
12
+ uri = Addressable::URI.parse "//#{host}"
13
+ @host = uri.host
14
+ end
15
+
16
+ def endpoints
17
+ @endpoints ||= [
18
+ Endpoint.new("https://#{host}", domain: self),
19
+ Endpoint.new("https://www.#{host}", domain: self),
20
+ Endpoint.new("http://#{host}", domain: self),
21
+ Endpoint.new("http://www.#{host}", domain: self)
22
+ ]
23
+ end
24
+
25
+ def canonical_endpoint
26
+ @canonical_endpoint ||= begin
27
+ prefetch
28
+ endpoints.find do |e|
29
+ e.https? == canonically_https? && e.www? == canonically_www?
30
+ end
31
+ end
32
+ end
33
+
34
+ def government?
35
+ require 'gman'
36
+ Gman.valid? host
37
+ end
38
+
39
+ # Does *any* endpoint return a 200 or 300 response code?
40
+ def up?
41
+ endpoints.any?(&:up?)
42
+ end
43
+
44
+ # Does *any* endpoint respond to HTTP?
45
+ # TODO: needs to allow an invalid chain.
46
+ def responds?
47
+ endpoints.any?(&:responds?)
48
+ end
49
+
50
+ # TODO: These weren't present before, and may not be useful.
51
+ # Can you connect to www?
52
+ def www?
53
+ endpoints.any? { |e| e.www? && e.up? }
54
+ end
55
+
56
+ # Can you connect without www?
57
+ def root?
58
+ endpoints.any? { |e| e.root? && e.up? }
59
+ end
60
+
61
+ # HTTPS is "supported" (different than "canonical" or "enforced") if:
62
+ #
63
+ # * Either of the HTTPS endpoints is listening, and doesn't have
64
+ # an invalid hostname.
65
+ #
66
+ # TODO: needs to allow an invalid chain.
67
+ def https?
68
+ endpoints.any? { |e| e.https? && e.up? && e.https.valid? }
69
+ end
70
+
71
+ # HTTPS is enforced if one of the HTTPS endpoints is "up",
72
+ # and if both *HTTP* endpoints are either:
73
+ #
74
+ # * down, or
75
+ # * redirect immediately to HTTPS.
76
+ #
77
+ # This is different than whether a domain is "canonically" HTTPS.
78
+ #
79
+ # * an HTTP redirect can go to HTTPS on another domain, as long
80
+ # as it's immediate.
81
+ # * a domain with an invalid cert can still be enforcing HTTPS.
82
+ #
83
+ # TODO: need to ensure the redirect *immediately* goes to HTTPS.
84
+ # TODO: don't need to require that the HTTPS cert is valid for this purpose.
85
+ def enforces_https?
86
+ return false unless https?
87
+
88
+ endpoints.select(&:http?).all? { |e| !e.up? || e.redirect&.https? }
89
+ end
90
+
91
+ # we can say that a canonical HTTPS site "defaults" to HTTPS,
92
+ # even if it doesn't *strictly* enforce it (e.g. having a www
93
+ # subdomain first to go HTTP root before HTTPS root).
94
+ #
95
+ # TODO: not implemented.
96
+ def defaults_https?
97
+ raise 'Not implemented. Halp?'
98
+ end
99
+
100
+ # HTTPS is "downgraded" if both:
101
+ #
102
+ # * HTTPS is supported, and
103
+ # * The 'canonical' endpoint gets an immediate internal redirect to HTTP.
104
+ #
105
+ # TODO: the redirect must be internal.
106
+ def downgrades_https?
107
+ return false unless https?
108
+
109
+ canonical_endpoint.redirect? && canonical_endpoint.redirect.http?
110
+ end
111
+
112
+ # A domain is "canonically" at www if:
113
+ # * at least one of its www endpoints responds
114
+ # * both root endpoints are either down ~~or redirect *somewhere*~~, or
115
+ # * at least one root endpoint redirect should immediately go to
116
+ # an *internal* www endpoint
117
+ # This is meant to affirm situations like:
118
+ # http:// -> https:// -> https://www
119
+ # https:// -> http:// -> https://www
120
+ # and meant to avoid affirming situations like:
121
+ # http:// -> http://non-www,
122
+ # http://www -> http://non-www
123
+ # or like:
124
+ # https:// -> 200, http:// -> http://www
125
+ def canonically_www?
126
+ # Does any endpoint respond?
127
+ return false unless up?
128
+
129
+ # Does at least one www endpoint respond?
130
+ return false unless www?
131
+
132
+ # Are both root endpoints down?
133
+ return true if endpoints.select(&:root?).all? { |e| !e.up? }
134
+
135
+ # Does either root endpoint redirect to a www endpoint?
136
+ endpoints.select(&:root?).any? { |e| e.redirect&.www? }
137
+ end
138
+
139
+ # A domain is "canonically" at https if:
140
+ # * at least one of its https endpoints is live and
141
+ # doesn't have an invalid hostname
142
+ # * both http endpoints are either down or redirect *somewhere*
143
+ # * at least one http endpoint redirects immediately to
144
+ # an *internal* https endpoint
145
+ # This is meant to affirm situations like:
146
+ # http:// -> http://www -> https://
147
+ # https:// -> http:// -> https://www
148
+ # and meant to avoid affirming situations like:
149
+ # http:// -> http://non-www
150
+ # http://www -> http://non-www
151
+ # or:
152
+ # http:// -> 200, http://www -> https://www
153
+ #
154
+ # It allows a site to be canonically HTTPS if the cert has
155
+ # a valid hostname but invalid chain issues.
156
+ def canonically_https?
157
+ # Does any endpoint respond?
158
+ return false unless up?
159
+
160
+ # At least one of its https endpoints is live and doesn't have an invalid hostname
161
+ return false unless https?
162
+
163
+ # Both http endpoints are down
164
+ return true if endpoints.select(&:http?).all? { |e| !e.up? }
165
+
166
+ # at least one http endpoint redirects immediately to https
167
+ endpoints.select(&:http?).any? { |e| e.redirect&.https? }
168
+ end
169
+
170
+ # A domain redirects if
171
+ # 1. At least one endpoint is an external redirect, and
172
+ # 2. All endpoints are either down or an external redirect
173
+ def redirect?
174
+ return false unless redirect
175
+
176
+ endpoints.all? { |e| !e.up? || e.external_redirect? }
177
+ end
178
+
179
+ # The first endpoint to respond with a redirect
180
+ def redirect
181
+ endpoints.find(&:external_redirect?)
182
+ end
183
+
184
+ # HSTS on the canonical domain?
185
+ def hsts?
186
+ canonical_endpoint.hsts&.enabled?
187
+ end
188
+
189
+ def hsts_subdomains?
190
+ endpoints.find { |e| e.root? && e.https? }.hsts.include_subdomains?
191
+ end
192
+
193
+ def hsts_preload_ready?
194
+ return false unless hsts_subdomains?
195
+
196
+ endpoints.find { |e| e.root? && e.https? }.hsts.preload_ready?
197
+ end
198
+
199
+ def to_s
200
+ host
201
+ end
202
+
203
+ def inspect
204
+ "#<SiteInspector::Domain host=\"#{host}\">"
205
+ end
206
+
207
+ # We know most API calls to the domain model are going to require
208
+ # That the root of all four endpoints are called. Rather than process them
209
+ # In serial, lets grab them in parallel and cache the results to speed
210
+ # up later calls.
211
+ def prefetch
212
+ endpoints.each do |endpoint|
213
+ request = Typhoeus::Request.new(endpoint.uri, SiteInspector.typhoeus_defaults)
214
+ SiteInspector.hydra.queue(request)
215
+ end
216
+ SiteInspector.hydra.run
217
+ end
218
+
219
+ # Converts the domain to a hash
220
+ #
221
+ # By default, it only returns domain-wide information and
222
+ # information about the canonical endpoint
223
+ #
224
+ # It will also pass options allong to each endpoint's to_h method
225
+ #
226
+ # options:
227
+ # :all - return information about all endpoints
228
+ #
229
+ # Returns a complete hash of the domain's information
230
+ def to_h(options = {})
231
+ prefetch
232
+
233
+ hash = {
234
+ host: host,
235
+ up: up?,
236
+ responds: responds?,
237
+ www: www?,
238
+ root: root?,
239
+ https: https?,
240
+ enforces_https: enforces_https?,
241
+ downgrades_https: downgrades_https?,
242
+ canonically_www: canonically_www?,
243
+ canonically_https: canonically_https?,
244
+ redirect: redirect?,
245
+ hsts: hsts?,
246
+ hsts_subdomains: hsts_subdomains?,
247
+ hsts_preload_ready: hsts_preload_ready?,
248
+ canonical_endpoint: canonical_endpoint.to_h(options)
249
+ }
250
+
251
+ if options['all']
252
+ hash[:endpoints] = {
253
+ https: {
254
+ root: endpoints[0].to_h(options),
255
+ www: endpoints[1].to_h(options)
256
+ },
257
+ http: {
258
+ root: endpoints[2].to_h(options),
259
+ www: endpoints[3].to_h(options)
260
+ }
261
+ }
262
+ end
263
+
264
+ hash
265
+ end
266
+
267
+ def to_json(*_args)
268
+ to_h.to_json
269
+ end
270
+ end
271
+ end