site-inspector 3.1.1 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +5 -5
  2. data/.gitignore +1 -1
  3. data/.rubocop.yml +18 -10
  4. data/.rubocop_todo.yml +139 -0
  5. data/.ruby-version +1 -1
  6. data/Gemfile +4 -0
  7. data/Guardfile +2 -0
  8. data/Rakefile +2 -0
  9. data/bin/site-inspector +7 -6
  10. data/lib/cliver/dependency_ext.rb +6 -3
  11. data/lib/site-inspector.rb +18 -11
  12. data/lib/site-inspector/cache.rb +2 -0
  13. data/lib/site-inspector/checks/accessibility.rb +30 -22
  14. data/lib/site-inspector/checks/check.rb +4 -2
  15. data/lib/site-inspector/checks/content.rb +15 -4
  16. data/lib/site-inspector/checks/cookies.rb +5 -3
  17. data/lib/site-inspector/checks/dns.rb +13 -11
  18. data/lib/site-inspector/checks/headers.rb +8 -6
  19. data/lib/site-inspector/checks/hsts.rb +16 -12
  20. data/lib/site-inspector/checks/https.rb +3 -1
  21. data/lib/site-inspector/checks/sniffer.rb +10 -7
  22. data/lib/site-inspector/checks/wappalyzer.rb +62 -0
  23. data/lib/site-inspector/checks/whois.rb +36 -0
  24. data/lib/site-inspector/disk_cache.rb +2 -0
  25. data/lib/site-inspector/domain.rb +36 -30
  26. data/lib/site-inspector/endpoint.rb +22 -23
  27. data/lib/site-inspector/rails_cache.rb +2 -0
  28. data/lib/site-inspector/version.rb +3 -1
  29. data/package-lock.json +505 -0
  30. data/package.json +1 -1
  31. data/script/pa11y-version +1 -0
  32. data/site-inspector.gemspec +24 -17
  33. data/spec/checks/site_inspector_endpoint_accessibility_spec.rb +15 -13
  34. data/spec/checks/site_inspector_endpoint_check_spec.rb +9 -7
  35. data/spec/checks/site_inspector_endpoint_content_spec.rb +30 -21
  36. data/spec/checks/site_inspector_endpoint_cookies_spec.rb +17 -15
  37. data/spec/checks/site_inspector_endpoint_dns_spec.rb +42 -40
  38. data/spec/checks/site_inspector_endpoint_headers_spec.rb +12 -10
  39. data/spec/checks/site_inspector_endpoint_hsts_spec.rb +27 -25
  40. data/spec/checks/site_inspector_endpoint_https_spec.rb +12 -10
  41. data/spec/checks/site_inspector_endpoint_sniffer_spec.rb +33 -31
  42. data/spec/checks/site_inspector_endpoint_wappalyzer_spec.rb +34 -0
  43. data/spec/checks/site_inspector_endpoint_whois_spec.rb +26 -0
  44. data/spec/fixtures/wappalyzer.json +125 -0
  45. data/spec/site_inspector_cache_spec.rb +2 -0
  46. data/spec/site_inspector_disk_cache_spec.rb +8 -6
  47. data/spec/site_inspector_domain_spec.rb +34 -34
  48. data/spec/site_inspector_endpoint_spec.rb +44 -43
  49. data/spec/site_inspector_spec.rb +15 -13
  50. data/spec/spec_helper.rb +2 -0
  51. metadata +125 -55
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class SiteInspector
2
4
  class Endpoint
3
5
  class Check
@@ -40,11 +42,11 @@ class SiteInspector
40
42
  end
41
43
 
42
44
  def enabled?
43
- !!(@@enabled)
45
+ !!@@enabled
44
46
  end
45
47
 
46
48
  def enabled=(value)
47
- @@enabled = !!(value)
49
+ @@enabled = !!value
48
50
  end
49
51
  end
50
52
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class SiteInspector
2
4
  class Endpoint
3
5
  class Content < Check
@@ -16,7 +18,7 @@ class SiteInspector
16
18
  require 'nokogiri'
17
19
  @doc ||= Nokogiri::HTML response.body if response
18
20
  end
19
- alias_method :doc, :document
21
+ alias doc document
20
22
 
21
23
  def body
22
24
  @body ||= document.to_s.force_encoding('UTF-8').encode('UTF-8', invalid: :replace, replace: '')
@@ -38,8 +40,16 @@ class SiteInspector
38
40
  document.internal_subset.external_id
39
41
  end
40
42
 
43
+ def generator
44
+ @generator ||= begin
45
+ tag = document.at('meta[name="generator"]')
46
+ tag['content'] if tag
47
+ end
48
+ end
49
+
41
50
  def prefetch
42
51
  return unless endpoint.up?
52
+
43
53
  options = SiteInspector.typhoeus_defaults.merge(followlocation: true)
44
54
  ['robots.txt', 'sitemap.xml', 'humans.txt', random_path].each do |path|
45
55
  request = Typhoeus::Request.new(URI.join(endpoint.uri, path), options)
@@ -55,10 +65,11 @@ class SiteInspector
55
65
  def to_h
56
66
  prefetch
57
67
  {
58
- doctype: doctype,
68
+ doctype: doctype,
69
+ generator: generator,
59
70
  sitemap_xml: sitemap_xml?,
60
- robots_txt: robots_txt?,
61
- humans_txt: humans_txt?,
71
+ robots_txt: robots_txt?,
72
+ humans_txt: humans_txt?,
62
73
  proper_404s: proper_404s?
63
74
  }
64
75
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class SiteInspector
2
4
  class Endpoint
3
5
  class Cookies < Check
@@ -5,12 +7,12 @@ class SiteInspector
5
7
  if cookie_header.nil? || cookie_header.empty?
6
8
  false
7
9
  elsif block_given?
8
- all.any? { |cookie| block.call(cookie) }
10
+ all.any?(&block)
9
11
  else
10
12
  true
11
13
  end
12
14
  end
13
- alias_method :cookies?, :any?
15
+ alias cookies? any?
14
16
 
15
17
  def all
16
18
  @cookies ||= cookie_header.map { |c| CGI::Cookie.parse(c) } if cookies?
@@ -22,7 +24,7 @@ class SiteInspector
22
24
 
23
25
  def secure?
24
26
  pairs = cookie_header.join('; ').split('; ') # CGI::Cookies#Parse doesn't seem to like secure headers
25
- pairs.any? { |c| c.downcase == 'secure' } && pairs.any? { |c| c.downcase == 'httponly' }
27
+ pairs.any? { |c| c.casecmp('secure').zero? } && pairs.any? { |c| c.casecmp('httponly').zero? }
26
28
  end
27
29
 
28
30
  def to_h
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class SiteInspector
2
4
  class Endpoint
3
5
  class Dns < Check
@@ -25,7 +27,7 @@ class SiteInspector
25
27
  def record?(type)
26
28
  records.any? { |record| record.type == type } || query(type).count != 0
27
29
  end
28
- alias_method :has_record?, :record?
30
+ alias has_record? record?
29
31
 
30
32
  def dnssec?
31
33
  @dnssec ||= has_record? 'DNSKEY'
@@ -52,7 +54,7 @@ class SiteInspector
52
54
  end
53
55
 
54
56
  def google_apps?
55
- @google ||= records.any? do |record|
57
+ @google_apps ||= records.any? do |record|
56
58
  record.type == 'MX' && record.exchange.to_s =~ /google(mail)?\.com\.?\z/i
57
59
  end
58
60
  end
@@ -62,7 +64,6 @@ class SiteInspector
62
64
  end
63
65
 
64
66
  def ip
65
- require 'resolv'
66
67
  @ip ||= Resolv.getaddress host
67
68
  rescue Resolv::ResolvError
68
69
  nil
@@ -87,14 +88,15 @@ class SiteInspector
87
88
 
88
89
  def to_h
89
90
  return { error: LocalhostError } if localhost?
91
+
90
92
  {
91
- dnssec: dnssec?,
92
- ipv6: ipv6?,
93
- cdn: cdn,
93
+ dnssec: dnssec?,
94
+ ipv6: ipv6?,
95
+ cdn: cdn,
94
96
  cloud_provider: cloud_provider,
95
- google_apps: google_apps?,
96
- hostname: hostname,
97
- ip: ip
97
+ google_apps: google_apps?,
98
+ hostname: hostname,
99
+ ip: ip
98
100
  }
99
101
  end
100
102
 
@@ -118,7 +120,7 @@ class SiteInspector
118
120
  haystack = load_data(type)
119
121
  needle = haystack.find do |_name, domain|
120
122
  cnames.any? do |cname|
121
- domain == cname.tld || domain == "#{cname.sld}.#{cname.tld}"
123
+ [cname.tld, "#{cname.sld}.#{cname.tld}"].include? domain
122
124
  end
123
125
  end
124
126
 
@@ -126,7 +128,7 @@ class SiteInspector
126
128
  return nil unless hostname
127
129
 
128
130
  needle = haystack.find do |_name, domain|
129
- domain == hostname.tld || domain == "#{hostname.sld}.#{hostname.tld}"
131
+ [hostname.tld, "#{hostname.sld}.#{hostname.tld}"].include? domain
130
132
  end
131
133
 
132
134
  needle ? needle[0].to_sym : nil
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class SiteInspector
2
4
  class Endpoint
3
5
  class Headers < Check
@@ -44,9 +46,9 @@ class SiteInspector
44
46
 
45
47
  # Returns an array of hashes of downcased key/value header pairs (or an empty hash)
46
48
  def all
47
- @all ||= (response && response.headers) ? Hash[response.headers.map { |k, v| [k.downcase, v] }] : {}
49
+ @all ||= response&.headers ? response.headers.transform_keys(&:downcase) : {}
48
50
  end
49
- alias_method :headers, :all
51
+ alias headers all
50
52
 
51
53
  def [](header)
52
54
  headers[header]
@@ -55,10 +57,10 @@ class SiteInspector
55
57
  def to_h
56
58
  {
57
59
  strict_transport_security: strict_transport_security || false,
58
- content_security_policy: content_security_policy || false,
59
- click_jacking_protection: click_jacking_protection || false,
60
- server: server,
61
- xss_protection: xss_protection || false
60
+ content_security_policy: content_security_policy || false,
61
+ click_jacking_protection: click_jacking_protection || false,
62
+ server: server,
63
+ xss_protection: xss_protection || false
62
64
  }
63
65
  end
64
66
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class SiteInspector
2
4
  class Endpoint
3
5
  # Utility parser for HSTS headers.
@@ -5,7 +7,8 @@ class SiteInspector
5
7
  class Hsts < Check
6
8
  def valid?
7
9
  return false unless header
8
- pairs.none? { |key, value| "#{key}#{value}" =~ /[\s\'\"]/ }
10
+
11
+ pairs.none? { |key, value| "#{key}#{value}" =~ /[\s'"]/ }
9
12
  end
10
13
 
11
14
  def max_age
@@ -13,16 +16,17 @@ class SiteInspector
13
16
  end
14
17
 
15
18
  def include_subdomains?
16
- pairs.keys.include? :includesubdomains
19
+ pairs.key?(:includesubdomains)
17
20
  end
18
21
 
19
22
  def preload?
20
- pairs.keys.include? :preload
23
+ pairs.key?(:preload)
21
24
  end
22
25
 
23
26
  def enabled?
24
27
  return false unless max_age
25
- max_age > 0
28
+
29
+ max_age.positive?
26
30
  end
27
31
 
28
32
  # Google's minimum max-age for automatic preloading
@@ -32,12 +36,12 @@ class SiteInspector
32
36
 
33
37
  def to_h
34
38
  {
35
- valid: valid?,
36
- max_age: max_age,
39
+ valid: valid?,
40
+ max_age: max_age,
37
41
  include_subdomains: include_subdomains?,
38
- preload: preload?,
39
- enabled: enabled?,
40
- preload_ready: preload_ready?
42
+ preload: preload?,
43
+ enabled: enabled?,
44
+ preload_ready: preload_ready?
41
45
  }
42
46
  end
43
47
 
@@ -61,9 +65,9 @@ class SiteInspector
61
65
  directives.each do |directive|
62
66
  key, value = directive.downcase.split('=')
63
67
 
64
- if value =~ /\".*\"/
65
- value = value.sub(/^\"/, '')
66
- value = value.sub(/\"$/, '')
68
+ if /".*"/.match?(value)
69
+ value = value.sub(/^"/, '')
70
+ value = value.sub(/"$/, '')
67
71
  end
68
72
 
69
73
  pairs[key.to_sym] = value
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class SiteInspector
2
4
  class Endpoint
3
5
  class Https < Check
@@ -23,7 +25,7 @@ class SiteInspector
23
25
 
24
26
  def to_h
25
27
  {
26
- valid: valid?,
28
+ valid: valid?,
27
29
  return_code: response.return_code
28
30
  }
29
31
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class SiteInspector
2
4
  class Endpoint
3
5
  class Sniffer < Check
@@ -13,7 +15,7 @@ class SiteInspector
13
15
  :php,
14
16
  :expression_engine,
15
17
  :cowboy
16
- ]
18
+ ].freeze
17
19
 
18
20
  def framework
19
21
  cms = sniff :cms
@@ -21,7 +23,8 @@ class SiteInspector
21
23
  return :expression_engine if endpoint.cookies.any? { |c| c.keys.first =~ /^exp_/ }
22
24
  return :php if endpoint.cookies['PHPSESSID']
23
25
  return :coldfusion if endpoint.cookies['CFID'] && endpoint.cookies['CFTOKEN']
24
- return :cowboy if endpoint.headers.server.to_s.downcase == 'cowboy'
26
+ return :cowboy if endpoint.headers.server.to_s.casecmp('cowboy').zero?
27
+
25
28
  nil
26
29
  end
27
30
 
@@ -43,9 +46,9 @@ class SiteInspector
43
46
 
44
47
  def to_h
45
48
  {
46
- framework: framework,
47
- analytics: analytics,
48
- javascript: javascript,
49
+ framework: framework,
50
+ analytics: analytics,
51
+ javascript: javascript,
49
52
  advertising: advertising
50
53
  }
51
54
  end
@@ -55,8 +58,8 @@ class SiteInspector
55
58
  def sniff(type)
56
59
  require 'sniffles'
57
60
  results = Sniffles.sniff(endpoint.content.body, type).select { |_name, meta| meta[:found] }
58
- results.keys.first if results
59
- rescue
61
+ results&.keys&.first
62
+ rescue StandardError
60
63
  nil
61
64
  end
62
65
  end
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ class SiteInspector
4
+ class Endpoint
5
+ class Wappalyzer < Check
6
+ ENDPOINT = 'https://api.wappalyzer.com/lookup/v2/'
7
+
8
+ def to_h
9
+ return {} unless data['technologies']
10
+
11
+ @to_h ||= begin
12
+ technologies = {}
13
+ data['technologies'].each do |t|
14
+ category = t['categories'].first
15
+ category = category ? category['name'] : 'Other'
16
+ technologies[category] ||= []
17
+ technologies[category].push t['name']
18
+ end
19
+
20
+ technologies
21
+ end
22
+ end
23
+
24
+ private
25
+
26
+ def request
27
+ @request ||= begin
28
+ options = SiteInspector.typhoeus_defaults
29
+ headers = options[:headers].merge({ "x-api-key": api_key })
30
+ options = options.merge(method: :get, headers: headers)
31
+ Typhoeus::Request.new(url, options)
32
+ end
33
+ end
34
+
35
+ def data
36
+ return {} unless api_key && api_key != ''
37
+
38
+ @data ||= begin
39
+ SiteInspector.hydra.queue(request)
40
+ SiteInspector.hydra.run
41
+
42
+ response = request.response
43
+ if response.success?
44
+ JSON.parse(response.body).first
45
+ else
46
+ {}
47
+ end
48
+ end
49
+ end
50
+
51
+ def url
52
+ url = Addressable::URI.parse(ENDPOINT)
53
+ url.query_values = { urls: endpoint.uri }
54
+ url
55
+ end
56
+
57
+ def api_key
58
+ @api_key ||= ENV['WAPPALYZER_API_KEY']
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ class SiteInspector
4
+ class Endpoint
5
+ class Whois < Check
6
+ def domain
7
+ @domain ||= whois.lookup host
8
+ end
9
+
10
+ def ip
11
+ @ip ||= whois.lookup ip_address
12
+ end
13
+
14
+ def to_h
15
+ {
16
+ domain: record_to_h(domain),
17
+ ip: record_to_h(ip)
18
+ }
19
+ end
20
+
21
+ private
22
+
23
+ def record_to_h(record)
24
+ record.content.scan(/^\s*(.*?):\s*(.*?)\r?\n/).to_h
25
+ end
26
+
27
+ def ip_address
28
+ @ip_address ||= Resolv.getaddress host
29
+ end
30
+
31
+ def whois
32
+ @whois ||= ::Whois::Client.new
33
+ end
34
+ end
35
+ end
36
+ end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class SiteInspector
2
4
  class DiskCache
3
5
  def initialize(dir = nil, replace = nil)
@@ -1,10 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class SiteInspector
2
4
  class Domain
3
5
  attr_reader :host
4
6
 
5
7
  def initialize(host)
6
8
  host = host.downcase
7
- host = host.sub(/^https?\:/, '')
9
+ host = host.sub(/^https?:/, '')
8
10
  host = host.sub(%r{^/+}, '')
9
11
  host = host.sub(/^www\./, '')
10
12
  uri = Addressable::URI.parse "//#{host}"
@@ -82,7 +84,8 @@ class SiteInspector
82
84
  # TODO: don't need to require that the HTTPS cert is valid for this purpose.
83
85
  def enforces_https?
84
86
  return false unless https?
85
- endpoints.select(&:http?).all? { |e| !e.up? || (e.redirect && e.redirect.https?) }
87
+
88
+ endpoints.select(&:http?).all? { |e| !e.up? || e.redirect&.https? }
86
89
  end
87
90
 
88
91
  # we can say that a canonical HTTPS site "defaults" to HTTPS,
@@ -91,7 +94,7 @@ class SiteInspector
91
94
  #
92
95
  # TODO: not implemented.
93
96
  def defaults_https?
94
- fail 'Not implemented. Halp?'
97
+ raise 'Not implemented. Halp?'
95
98
  end
96
99
 
97
100
  # HTTPS is "downgraded" if both:
@@ -102,6 +105,7 @@ class SiteInspector
102
105
  # TODO: the redirect must be internal.
103
106
  def downgrades_https?
104
107
  return false unless https?
108
+
105
109
  canonical_endpoint.redirect? && canonical_endpoint.redirect.http?
106
110
  end
107
111
 
@@ -129,7 +133,7 @@ class SiteInspector
129
133
  return true if endpoints.select(&:root?).all? { |e| !e.up? }
130
134
 
131
135
  # Does either root endpoint redirect to a www endpoint?
132
- endpoints.select(&:root?).any? { |e| e.redirect && e.redirect.www? }
136
+ endpoints.select(&:root?).any? { |e| e.redirect&.www? }
133
137
  end
134
138
 
135
139
  # A domain is "canonically" at https if:
@@ -160,7 +164,7 @@ class SiteInspector
160
164
  return true if endpoints.select(&:http?).all? { |e| !e.up? }
161
165
 
162
166
  # at least one http endpoint redirects immediately to https
163
- endpoints.select(&:http?).any? { |e| e.redirect && e.redirect.https? }
167
+ endpoints.select(&:http?).any? { |e| e.redirect&.https? }
164
168
  end
165
169
 
166
170
  # A domain redirects if
@@ -168,6 +172,7 @@ class SiteInspector
168
172
  # 2. All endpoints are either down or an external redirect
169
173
  def redirect?
170
174
  return false unless redirect
175
+
171
176
  endpoints.all? { |e| !e.up? || e.external_redirect? }
172
177
  end
173
178
 
@@ -178,7 +183,7 @@ class SiteInspector
178
183
 
179
184
  # HSTS on the canonical domain?
180
185
  def hsts?
181
- canonical_endpoint.hsts && canonical_endpoint.hsts.enabled?
186
+ canonical_endpoint.hsts&.enabled?
182
187
  end
183
188
 
184
189
  def hsts_subdomains?
@@ -187,6 +192,7 @@ class SiteInspector
187
192
 
188
193
  def hsts_preload_ready?
189
194
  return false unless hsts_subdomains?
195
+
190
196
  endpoints.find { |e| e.root? && e.https? }.hsts.preload_ready?
191
197
  end
192
198
 
@@ -225,40 +231,40 @@ class SiteInspector
225
231
  prefetch
226
232
 
227
233
  hash = {
228
- host: host,
229
- up: up?,
230
- responds: responds?,
231
- www: www?,
232
- root: root?,
233
- https: https?,
234
- enforces_https: enforces_https?,
235
- downgrades_https: downgrades_https?,
236
- canonically_www: canonically_www?,
237
- canonically_https: canonically_https?,
238
- redirect: redirect?,
239
- hsts: hsts?,
240
- hsts_subdomains: hsts_subdomains?,
234
+ host: host,
235
+ up: up?,
236
+ responds: responds?,
237
+ www: www?,
238
+ root: root?,
239
+ https: https?,
240
+ enforces_https: enforces_https?,
241
+ downgrades_https: downgrades_https?,
242
+ canonically_www: canonically_www?,
243
+ canonically_https: canonically_https?,
244
+ redirect: redirect?,
245
+ hsts: hsts?,
246
+ hsts_subdomains: hsts_subdomains?,
241
247
  hsts_preload_ready: hsts_preload_ready?,
242
248
  canonical_endpoint: canonical_endpoint.to_h(options)
243
249
  }
244
250
 
245
251
  if options['all']
246
- hash.merge!(endpoints: {
247
- https: {
248
- root: endpoints[0].to_h(options),
249
- www: endpoints[1].to_h(options)
250
- },
251
- http: {
252
- root: endpoints[2].to_h(options),
253
- www: endpoints[3].to_h(options)
254
- }
255
- })
252
+ hash[:endpoints] = {
253
+ https: {
254
+ root: endpoints[0].to_h(options),
255
+ www: endpoints[1].to_h(options)
256
+ },
257
+ http: {
258
+ root: endpoints[2].to_h(options),
259
+ www: endpoints[3].to_h(options)
260
+ }
261
+ }
256
262
  end
257
263
 
258
264
  hash
259
265
  end
260
266
 
261
- def to_json
267
+ def to_json(*_args)
262
268
  to_h.to_json
263
269
  end
264
270
  end