site-inspector 3.1.1 → 3.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (51) hide show
  1. checksums.yaml +5 -5
  2. data/.gitignore +1 -1
  3. data/.rubocop.yml +18 -10
  4. data/.rubocop_todo.yml +139 -0
  5. data/.ruby-version +1 -1
  6. data/Gemfile +4 -0
  7. data/Guardfile +2 -0
  8. data/Rakefile +2 -0
  9. data/bin/site-inspector +7 -6
  10. data/lib/cliver/dependency_ext.rb +6 -3
  11. data/lib/site-inspector.rb +18 -11
  12. data/lib/site-inspector/cache.rb +2 -0
  13. data/lib/site-inspector/checks/accessibility.rb +30 -22
  14. data/lib/site-inspector/checks/check.rb +4 -2
  15. data/lib/site-inspector/checks/content.rb +15 -4
  16. data/lib/site-inspector/checks/cookies.rb +5 -3
  17. data/lib/site-inspector/checks/dns.rb +13 -11
  18. data/lib/site-inspector/checks/headers.rb +8 -6
  19. data/lib/site-inspector/checks/hsts.rb +16 -12
  20. data/lib/site-inspector/checks/https.rb +3 -1
  21. data/lib/site-inspector/checks/sniffer.rb +10 -7
  22. data/lib/site-inspector/checks/wappalyzer.rb +62 -0
  23. data/lib/site-inspector/checks/whois.rb +36 -0
  24. data/lib/site-inspector/disk_cache.rb +2 -0
  25. data/lib/site-inspector/domain.rb +36 -30
  26. data/lib/site-inspector/endpoint.rb +22 -23
  27. data/lib/site-inspector/rails_cache.rb +2 -0
  28. data/lib/site-inspector/version.rb +3 -1
  29. data/package-lock.json +505 -0
  30. data/package.json +1 -1
  31. data/script/pa11y-version +1 -0
  32. data/site-inspector.gemspec +24 -17
  33. data/spec/checks/site_inspector_endpoint_accessibility_spec.rb +15 -13
  34. data/spec/checks/site_inspector_endpoint_check_spec.rb +9 -7
  35. data/spec/checks/site_inspector_endpoint_content_spec.rb +30 -21
  36. data/spec/checks/site_inspector_endpoint_cookies_spec.rb +17 -15
  37. data/spec/checks/site_inspector_endpoint_dns_spec.rb +42 -40
  38. data/spec/checks/site_inspector_endpoint_headers_spec.rb +12 -10
  39. data/spec/checks/site_inspector_endpoint_hsts_spec.rb +27 -25
  40. data/spec/checks/site_inspector_endpoint_https_spec.rb +12 -10
  41. data/spec/checks/site_inspector_endpoint_sniffer_spec.rb +33 -31
  42. data/spec/checks/site_inspector_endpoint_wappalyzer_spec.rb +34 -0
  43. data/spec/checks/site_inspector_endpoint_whois_spec.rb +26 -0
  44. data/spec/fixtures/wappalyzer.json +125 -0
  45. data/spec/site_inspector_cache_spec.rb +2 -0
  46. data/spec/site_inspector_disk_cache_spec.rb +8 -6
  47. data/spec/site_inspector_domain_spec.rb +34 -34
  48. data/spec/site_inspector_endpoint_spec.rb +44 -43
  49. data/spec/site_inspector_spec.rb +15 -13
  50. data/spec/spec_helper.rb +2 -0
  51. metadata +125 -55
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class SiteInspector
2
4
  class Endpoint
3
5
  class Check
@@ -40,11 +42,11 @@ class SiteInspector
40
42
  end
41
43
 
42
44
  def enabled?
43
- !!(@@enabled)
45
+ !!@@enabled
44
46
  end
45
47
 
46
48
  def enabled=(value)
47
- @@enabled = !!(value)
49
+ @@enabled = !!value
48
50
  end
49
51
  end
50
52
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class SiteInspector
2
4
  class Endpoint
3
5
  class Content < Check
@@ -16,7 +18,7 @@ class SiteInspector
16
18
  require 'nokogiri'
17
19
  @doc ||= Nokogiri::HTML response.body if response
18
20
  end
19
- alias_method :doc, :document
21
+ alias doc document
20
22
 
21
23
  def body
22
24
  @body ||= document.to_s.force_encoding('UTF-8').encode('UTF-8', invalid: :replace, replace: '')
@@ -38,8 +40,16 @@ class SiteInspector
38
40
  document.internal_subset.external_id
39
41
  end
40
42
 
43
+ def generator
44
+ @generator ||= begin
45
+ tag = document.at('meta[name="generator"]')
46
+ tag['content'] if tag
47
+ end
48
+ end
49
+
41
50
  def prefetch
42
51
  return unless endpoint.up?
52
+
43
53
  options = SiteInspector.typhoeus_defaults.merge(followlocation: true)
44
54
  ['robots.txt', 'sitemap.xml', 'humans.txt', random_path].each do |path|
45
55
  request = Typhoeus::Request.new(URI.join(endpoint.uri, path), options)
@@ -55,10 +65,11 @@ class SiteInspector
55
65
  def to_h
56
66
  prefetch
57
67
  {
58
- doctype: doctype,
68
+ doctype: doctype,
69
+ generator: generator,
59
70
  sitemap_xml: sitemap_xml?,
60
- robots_txt: robots_txt?,
61
- humans_txt: humans_txt?,
71
+ robots_txt: robots_txt?,
72
+ humans_txt: humans_txt?,
62
73
  proper_404s: proper_404s?
63
74
  }
64
75
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class SiteInspector
2
4
  class Endpoint
3
5
  class Cookies < Check
@@ -5,12 +7,12 @@ class SiteInspector
5
7
  if cookie_header.nil? || cookie_header.empty?
6
8
  false
7
9
  elsif block_given?
8
- all.any? { |cookie| block.call(cookie) }
10
+ all.any?(&block)
9
11
  else
10
12
  true
11
13
  end
12
14
  end
13
- alias_method :cookies?, :any?
15
+ alias cookies? any?
14
16
 
15
17
  def all
16
18
  @cookies ||= cookie_header.map { |c| CGI::Cookie.parse(c) } if cookies?
@@ -22,7 +24,7 @@ class SiteInspector
22
24
 
23
25
  def secure?
24
26
  pairs = cookie_header.join('; ').split('; ') # CGI::Cookies#Parse doesn't seem to like secure headers
25
- pairs.any? { |c| c.downcase == 'secure' } && pairs.any? { |c| c.downcase == 'httponly' }
27
+ pairs.any? { |c| c.casecmp('secure').zero? } && pairs.any? { |c| c.casecmp('httponly').zero? }
26
28
  end
27
29
 
28
30
  def to_h
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class SiteInspector
2
4
  class Endpoint
3
5
  class Dns < Check
@@ -25,7 +27,7 @@ class SiteInspector
25
27
  def record?(type)
26
28
  records.any? { |record| record.type == type } || query(type).count != 0
27
29
  end
28
- alias_method :has_record?, :record?
30
+ alias has_record? record?
29
31
 
30
32
  def dnssec?
31
33
  @dnssec ||= has_record? 'DNSKEY'
@@ -52,7 +54,7 @@ class SiteInspector
52
54
  end
53
55
 
54
56
  def google_apps?
55
- @google ||= records.any? do |record|
57
+ @google_apps ||= records.any? do |record|
56
58
  record.type == 'MX' && record.exchange.to_s =~ /google(mail)?\.com\.?\z/i
57
59
  end
58
60
  end
@@ -62,7 +64,6 @@ class SiteInspector
62
64
  end
63
65
 
64
66
  def ip
65
- require 'resolv'
66
67
  @ip ||= Resolv.getaddress host
67
68
  rescue Resolv::ResolvError
68
69
  nil
@@ -87,14 +88,15 @@ class SiteInspector
87
88
 
88
89
  def to_h
89
90
  return { error: LocalhostError } if localhost?
91
+
90
92
  {
91
- dnssec: dnssec?,
92
- ipv6: ipv6?,
93
- cdn: cdn,
93
+ dnssec: dnssec?,
94
+ ipv6: ipv6?,
95
+ cdn: cdn,
94
96
  cloud_provider: cloud_provider,
95
- google_apps: google_apps?,
96
- hostname: hostname,
97
- ip: ip
97
+ google_apps: google_apps?,
98
+ hostname: hostname,
99
+ ip: ip
98
100
  }
99
101
  end
100
102
 
@@ -118,7 +120,7 @@ class SiteInspector
118
120
  haystack = load_data(type)
119
121
  needle = haystack.find do |_name, domain|
120
122
  cnames.any? do |cname|
121
- domain == cname.tld || domain == "#{cname.sld}.#{cname.tld}"
123
+ [cname.tld, "#{cname.sld}.#{cname.tld}"].include? domain
122
124
  end
123
125
  end
124
126
 
@@ -126,7 +128,7 @@ class SiteInspector
126
128
  return nil unless hostname
127
129
 
128
130
  needle = haystack.find do |_name, domain|
129
- domain == hostname.tld || domain == "#{hostname.sld}.#{hostname.tld}"
131
+ [hostname.tld, "#{hostname.sld}.#{hostname.tld}"].include? domain
130
132
  end
131
133
 
132
134
  needle ? needle[0].to_sym : nil
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class SiteInspector
2
4
  class Endpoint
3
5
  class Headers < Check
@@ -44,9 +46,9 @@ class SiteInspector
44
46
 
45
47
  # Returns an array of hashes of downcased key/value header pairs (or an empty hash)
46
48
  def all
47
- @all ||= (response && response.headers) ? Hash[response.headers.map { |k, v| [k.downcase, v] }] : {}
49
+ @all ||= response&.headers ? response.headers.transform_keys(&:downcase) : {}
48
50
  end
49
- alias_method :headers, :all
51
+ alias headers all
50
52
 
51
53
  def [](header)
52
54
  headers[header]
@@ -55,10 +57,10 @@ class SiteInspector
55
57
  def to_h
56
58
  {
57
59
  strict_transport_security: strict_transport_security || false,
58
- content_security_policy: content_security_policy || false,
59
- click_jacking_protection: click_jacking_protection || false,
60
- server: server,
61
- xss_protection: xss_protection || false
60
+ content_security_policy: content_security_policy || false,
61
+ click_jacking_protection: click_jacking_protection || false,
62
+ server: server,
63
+ xss_protection: xss_protection || false
62
64
  }
63
65
  end
64
66
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class SiteInspector
2
4
  class Endpoint
3
5
  # Utility parser for HSTS headers.
@@ -5,7 +7,8 @@ class SiteInspector
5
7
  class Hsts < Check
6
8
  def valid?
7
9
  return false unless header
8
- pairs.none? { |key, value| "#{key}#{value}" =~ /[\s\'\"]/ }
10
+
11
+ pairs.none? { |key, value| "#{key}#{value}" =~ /[\s'"]/ }
9
12
  end
10
13
 
11
14
  def max_age
@@ -13,16 +16,17 @@ class SiteInspector
13
16
  end
14
17
 
15
18
  def include_subdomains?
16
- pairs.keys.include? :includesubdomains
19
+ pairs.key?(:includesubdomains)
17
20
  end
18
21
 
19
22
  def preload?
20
- pairs.keys.include? :preload
23
+ pairs.key?(:preload)
21
24
  end
22
25
 
23
26
  def enabled?
24
27
  return false unless max_age
25
- max_age > 0
28
+
29
+ max_age.positive?
26
30
  end
27
31
 
28
32
  # Google's minimum max-age for automatic preloading
@@ -32,12 +36,12 @@ class SiteInspector
32
36
 
33
37
  def to_h
34
38
  {
35
- valid: valid?,
36
- max_age: max_age,
39
+ valid: valid?,
40
+ max_age: max_age,
37
41
  include_subdomains: include_subdomains?,
38
- preload: preload?,
39
- enabled: enabled?,
40
- preload_ready: preload_ready?
42
+ preload: preload?,
43
+ enabled: enabled?,
44
+ preload_ready: preload_ready?
41
45
  }
42
46
  end
43
47
 
@@ -61,9 +65,9 @@ class SiteInspector
61
65
  directives.each do |directive|
62
66
  key, value = directive.downcase.split('=')
63
67
 
64
- if value =~ /\".*\"/
65
- value = value.sub(/^\"/, '')
66
- value = value.sub(/\"$/, '')
68
+ if /".*"/.match?(value)
69
+ value = value.sub(/^"/, '')
70
+ value = value.sub(/"$/, '')
67
71
  end
68
72
 
69
73
  pairs[key.to_sym] = value
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class SiteInspector
2
4
  class Endpoint
3
5
  class Https < Check
@@ -23,7 +25,7 @@ class SiteInspector
23
25
 
24
26
  def to_h
25
27
  {
26
- valid: valid?,
28
+ valid: valid?,
27
29
  return_code: response.return_code
28
30
  }
29
31
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class SiteInspector
2
4
  class Endpoint
3
5
  class Sniffer < Check
@@ -13,7 +15,7 @@ class SiteInspector
13
15
  :php,
14
16
  :expression_engine,
15
17
  :cowboy
16
- ]
18
+ ].freeze
17
19
 
18
20
  def framework
19
21
  cms = sniff :cms
@@ -21,7 +23,8 @@ class SiteInspector
21
23
  return :expression_engine if endpoint.cookies.any? { |c| c.keys.first =~ /^exp_/ }
22
24
  return :php if endpoint.cookies['PHPSESSID']
23
25
  return :coldfusion if endpoint.cookies['CFID'] && endpoint.cookies['CFTOKEN']
24
- return :cowboy if endpoint.headers.server.to_s.downcase == 'cowboy'
26
+ return :cowboy if endpoint.headers.server.to_s.casecmp('cowboy').zero?
27
+
25
28
  nil
26
29
  end
27
30
 
@@ -43,9 +46,9 @@ class SiteInspector
43
46
 
44
47
  def to_h
45
48
  {
46
- framework: framework,
47
- analytics: analytics,
48
- javascript: javascript,
49
+ framework: framework,
50
+ analytics: analytics,
51
+ javascript: javascript,
49
52
  advertising: advertising
50
53
  }
51
54
  end
@@ -55,8 +58,8 @@ class SiteInspector
55
58
  def sniff(type)
56
59
  require 'sniffles'
57
60
  results = Sniffles.sniff(endpoint.content.body, type).select { |_name, meta| meta[:found] }
58
- results.keys.first if results
59
- rescue
61
+ results&.keys&.first
62
+ rescue StandardError
60
63
  nil
61
64
  end
62
65
  end
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ class SiteInspector
4
+ class Endpoint
5
+ class Wappalyzer < Check
6
+ ENDPOINT = 'https://api.wappalyzer.com/lookup/v2/'
7
+
8
+ def to_h
9
+ return {} unless data['technologies']
10
+
11
+ @to_h ||= begin
12
+ technologies = {}
13
+ data['technologies'].each do |t|
14
+ category = t['categories'].first
15
+ category = category ? category['name'] : 'Other'
16
+ technologies[category] ||= []
17
+ technologies[category].push t['name']
18
+ end
19
+
20
+ technologies
21
+ end
22
+ end
23
+
24
+ private
25
+
26
+ def request
27
+ @request ||= begin
28
+ options = SiteInspector.typhoeus_defaults
29
+ headers = options[:headers].merge({ "x-api-key": api_key })
30
+ options = options.merge(method: :get, headers: headers)
31
+ Typhoeus::Request.new(url, options)
32
+ end
33
+ end
34
+
35
+ def data
36
+ return {} unless api_key && api_key != ''
37
+
38
+ @data ||= begin
39
+ SiteInspector.hydra.queue(request)
40
+ SiteInspector.hydra.run
41
+
42
+ response = request.response
43
+ if response.success?
44
+ JSON.parse(response.body).first
45
+ else
46
+ {}
47
+ end
48
+ end
49
+ end
50
+
51
+ def url
52
+ url = Addressable::URI.parse(ENDPOINT)
53
+ url.query_values = { urls: endpoint.uri }
54
+ url
55
+ end
56
+
57
+ def api_key
58
+ @api_key ||= ENV['WAPPALYZER_API_KEY']
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ class SiteInspector
4
+ class Endpoint
5
+ class Whois < Check
6
+ def domain
7
+ @domain ||= whois.lookup host
8
+ end
9
+
10
+ def ip
11
+ @ip ||= whois.lookup ip_address
12
+ end
13
+
14
+ def to_h
15
+ {
16
+ domain: record_to_h(domain),
17
+ ip: record_to_h(ip)
18
+ }
19
+ end
20
+
21
+ private
22
+
23
+ def record_to_h(record)
24
+ record.content.scan(/^\s*(.*?):\s*(.*?)\r?\n/).to_h
25
+ end
26
+
27
+ def ip_address
28
+ @ip_address ||= Resolv.getaddress host
29
+ end
30
+
31
+ def whois
32
+ @whois ||= ::Whois::Client.new
33
+ end
34
+ end
35
+ end
36
+ end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class SiteInspector
2
4
  class DiskCache
3
5
  def initialize(dir = nil, replace = nil)
@@ -1,10 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class SiteInspector
2
4
  class Domain
3
5
  attr_reader :host
4
6
 
5
7
  def initialize(host)
6
8
  host = host.downcase
7
- host = host.sub(/^https?\:/, '')
9
+ host = host.sub(/^https?:/, '')
8
10
  host = host.sub(%r{^/+}, '')
9
11
  host = host.sub(/^www\./, '')
10
12
  uri = Addressable::URI.parse "//#{host}"
@@ -82,7 +84,8 @@ class SiteInspector
82
84
  # TODO: don't need to require that the HTTPS cert is valid for this purpose.
83
85
  def enforces_https?
84
86
  return false unless https?
85
- endpoints.select(&:http?).all? { |e| !e.up? || (e.redirect && e.redirect.https?) }
87
+
88
+ endpoints.select(&:http?).all? { |e| !e.up? || e.redirect&.https? }
86
89
  end
87
90
 
88
91
  # we can say that a canonical HTTPS site "defaults" to HTTPS,
@@ -91,7 +94,7 @@ class SiteInspector
91
94
  #
92
95
  # TODO: not implemented.
93
96
  def defaults_https?
94
- fail 'Not implemented. Halp?'
97
+ raise 'Not implemented. Halp?'
95
98
  end
96
99
 
97
100
  # HTTPS is "downgraded" if both:
@@ -102,6 +105,7 @@ class SiteInspector
102
105
  # TODO: the redirect must be internal.
103
106
  def downgrades_https?
104
107
  return false unless https?
108
+
105
109
  canonical_endpoint.redirect? && canonical_endpoint.redirect.http?
106
110
  end
107
111
 
@@ -129,7 +133,7 @@ class SiteInspector
129
133
  return true if endpoints.select(&:root?).all? { |e| !e.up? }
130
134
 
131
135
  # Does either root endpoint redirect to a www endpoint?
132
- endpoints.select(&:root?).any? { |e| e.redirect && e.redirect.www? }
136
+ endpoints.select(&:root?).any? { |e| e.redirect&.www? }
133
137
  end
134
138
 
135
139
  # A domain is "canonically" at https if:
@@ -160,7 +164,7 @@ class SiteInspector
160
164
  return true if endpoints.select(&:http?).all? { |e| !e.up? }
161
165
 
162
166
  # at least one http endpoint redirects immediately to https
163
- endpoints.select(&:http?).any? { |e| e.redirect && e.redirect.https? }
167
+ endpoints.select(&:http?).any? { |e| e.redirect&.https? }
164
168
  end
165
169
 
166
170
  # A domain redirects if
@@ -168,6 +172,7 @@ class SiteInspector
168
172
  # 2. All endpoints are either down or an external redirect
169
173
  def redirect?
170
174
  return false unless redirect
175
+
171
176
  endpoints.all? { |e| !e.up? || e.external_redirect? }
172
177
  end
173
178
 
@@ -178,7 +183,7 @@ class SiteInspector
178
183
 
179
184
  # HSTS on the canonical domain?
180
185
  def hsts?
181
- canonical_endpoint.hsts && canonical_endpoint.hsts.enabled?
186
+ canonical_endpoint.hsts&.enabled?
182
187
  end
183
188
 
184
189
  def hsts_subdomains?
@@ -187,6 +192,7 @@ class SiteInspector
187
192
 
188
193
  def hsts_preload_ready?
189
194
  return false unless hsts_subdomains?
195
+
190
196
  endpoints.find { |e| e.root? && e.https? }.hsts.preload_ready?
191
197
  end
192
198
 
@@ -225,40 +231,40 @@ class SiteInspector
225
231
  prefetch
226
232
 
227
233
  hash = {
228
- host: host,
229
- up: up?,
230
- responds: responds?,
231
- www: www?,
232
- root: root?,
233
- https: https?,
234
- enforces_https: enforces_https?,
235
- downgrades_https: downgrades_https?,
236
- canonically_www: canonically_www?,
237
- canonically_https: canonically_https?,
238
- redirect: redirect?,
239
- hsts: hsts?,
240
- hsts_subdomains: hsts_subdomains?,
234
+ host: host,
235
+ up: up?,
236
+ responds: responds?,
237
+ www: www?,
238
+ root: root?,
239
+ https: https?,
240
+ enforces_https: enforces_https?,
241
+ downgrades_https: downgrades_https?,
242
+ canonically_www: canonically_www?,
243
+ canonically_https: canonically_https?,
244
+ redirect: redirect?,
245
+ hsts: hsts?,
246
+ hsts_subdomains: hsts_subdomains?,
241
247
  hsts_preload_ready: hsts_preload_ready?,
242
248
  canonical_endpoint: canonical_endpoint.to_h(options)
243
249
  }
244
250
 
245
251
  if options['all']
246
- hash.merge!(endpoints: {
247
- https: {
248
- root: endpoints[0].to_h(options),
249
- www: endpoints[1].to_h(options)
250
- },
251
- http: {
252
- root: endpoints[2].to_h(options),
253
- www: endpoints[3].to_h(options)
254
- }
255
- })
252
+ hash[:endpoints] = {
253
+ https: {
254
+ root: endpoints[0].to_h(options),
255
+ www: endpoints[1].to_h(options)
256
+ },
257
+ http: {
258
+ root: endpoints[2].to_h(options),
259
+ www: endpoints[3].to_h(options)
260
+ }
261
+ }
256
262
  end
257
263
 
258
264
  hash
259
265
  end
260
266
 
261
- def to_json
267
+ def to_json(*_args)
262
268
  to_h.to_json
263
269
  end
264
270
  end