site-inspector 1.0.2 → 3.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.gitignore +8 -0
- data/.rubocop.yml +42 -0
- data/.rubocop_todo.yml +139 -0
- data/.ruby-version +1 -0
- data/.travis.yml +9 -0
- data/Gemfile +7 -0
- data/Guardfile +10 -0
- data/README.md +189 -0
- data/Rakefile +10 -0
- data/bin/site-inspector +50 -22
- data/lib/cliver/dependency_ext.rb +24 -0
- data/lib/site-inspector.rb +62 -615
- data/lib/site-inspector/cache.rb +10 -51
- data/lib/site-inspector/checks/accessibility.rb +135 -0
- data/lib/site-inspector/checks/check.rb +54 -0
- data/lib/site-inspector/checks/content.rb +85 -0
- data/lib/site-inspector/checks/cookies.rb +45 -0
- data/lib/site-inspector/checks/dns.rb +138 -0
- data/lib/site-inspector/checks/headers.rb +68 -0
- data/lib/site-inspector/checks/hsts.rb +81 -0
- data/lib/site-inspector/checks/https.rb +40 -0
- data/lib/site-inspector/checks/sniffer.rb +67 -0
- data/lib/site-inspector/checks/wappalyzer.rb +62 -0
- data/lib/site-inspector/checks/whois.rb +36 -0
- data/lib/site-inspector/disk_cache.rb +42 -0
- data/lib/site-inspector/domain.rb +271 -0
- data/lib/site-inspector/endpoint.rb +217 -0
- data/lib/site-inspector/rails_cache.rb +13 -0
- data/lib/site-inspector/version.rb +5 -0
- data/package-lock.json +505 -0
- data/package.json +23 -0
- data/script/bootstrap +2 -0
- data/script/cibuild +11 -0
- data/script/console +3 -0
- data/script/pa11y-version +10 -0
- data/script/release +38 -0
- data/site-inspector.gemspec +42 -0
- data/spec/checks/site_inspector_endpoint_accessibility_spec.rb +84 -0
- data/spec/checks/site_inspector_endpoint_check_spec.rb +42 -0
- data/spec/checks/site_inspector_endpoint_content_spec.rb +117 -0
- data/spec/checks/site_inspector_endpoint_cookies_spec.rb +73 -0
- data/spec/checks/site_inspector_endpoint_dns_spec.rb +184 -0
- data/spec/checks/site_inspector_endpoint_headers_spec.rb +65 -0
- data/spec/checks/site_inspector_endpoint_hsts_spec.rb +92 -0
- data/spec/checks/site_inspector_endpoint_https_spec.rb +49 -0
- data/spec/checks/site_inspector_endpoint_sniffer_spec.rb +150 -0
- data/spec/checks/site_inspector_endpoint_wappalyzer_spec.rb +34 -0
- data/spec/checks/site_inspector_endpoint_whois_spec.rb +26 -0
- data/spec/fixtures/wappalyzer.json +125 -0
- data/spec/site_inspector_cache_spec.rb +15 -0
- data/spec/site_inspector_disk_cache_spec.rb +39 -0
- data/spec/site_inspector_domain_spec.rb +271 -0
- data/spec/site_inspector_endpoint_spec.rb +252 -0
- data/spec/site_inspector_spec.rb +48 -0
- data/spec/spec_helper.rb +19 -0
- metadata +204 -63
- data/lib/site-inspector/compliance.rb +0 -19
- data/lib/site-inspector/dns.rb +0 -92
- data/lib/site-inspector/headers.rb +0 -59
- data/lib/site-inspector/sniffer.rb +0 -26
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Cliver
|
4
|
+
class Dependency
|
5
|
+
# Memoized shortcut for detect
|
6
|
+
# Returns the path to the detected dependency
|
7
|
+
# Raises an error if the dependency was not satisfied
|
8
|
+
def path
|
9
|
+
@path ||= detect!
|
10
|
+
end
|
11
|
+
|
12
|
+
# Returns the version of the resolved dependency
|
13
|
+
def version
|
14
|
+
return @version if defined? @version
|
15
|
+
|
16
|
+
version = installed_versions.find { |p, _v| p == path }
|
17
|
+
@detected_version = version.nil? ? nil : version[1]
|
18
|
+
end
|
19
|
+
|
20
|
+
def major_version
|
21
|
+
version&.split('.')&.first
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
data/lib/site-inspector.rb
CHANGED
@@ -1,636 +1,83 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
|
2
|
-
# needed for HTTP analysis
|
3
3
|
require 'open-uri'
|
4
|
-
require
|
4
|
+
require 'addressable/uri'
|
5
5
|
require 'public_suffix'
|
6
6
|
require 'typhoeus'
|
7
|
+
require 'parallel'
|
8
|
+
require 'cliver'
|
9
|
+
require 'whois'
|
10
|
+
require 'cgi'
|
11
|
+
require 'resolv'
|
12
|
+
require 'dotenv/load'
|
7
13
|
|
8
14
|
require_relative 'site-inspector/cache'
|
9
|
-
require_relative 'site-inspector/
|
10
|
-
require_relative 'site-inspector/
|
11
|
-
require_relative 'site-inspector/
|
12
|
-
require_relative 'site-inspector/
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
15
|
+
require_relative 'site-inspector/disk_cache'
|
16
|
+
require_relative 'site-inspector/rails_cache'
|
17
|
+
require_relative 'site-inspector/domain'
|
18
|
+
require_relative 'site-inspector/checks/check'
|
19
|
+
require_relative 'site-inspector/checks/accessibility'
|
20
|
+
require_relative 'site-inspector/checks/content'
|
21
|
+
require_relative 'site-inspector/checks/dns'
|
22
|
+
require_relative 'site-inspector/checks/headers'
|
23
|
+
require_relative 'site-inspector/checks/hsts'
|
24
|
+
require_relative 'site-inspector/checks/https'
|
25
|
+
require_relative 'site-inspector/checks/sniffer'
|
26
|
+
require_relative 'site-inspector/checks/cookies'
|
27
|
+
require_relative 'site-inspector/checks/whois'
|
28
|
+
require_relative 'site-inspector/checks/wappalyzer'
|
29
|
+
require_relative 'site-inspector/endpoint'
|
30
|
+
require_relative 'site-inspector/version'
|
31
|
+
require_relative 'cliver/dependency_ext'
|
20
32
|
|
21
33
|
class SiteInspector
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
max_age: nil,
|
34
|
-
include_subdomains: false,
|
35
|
-
preload: false,
|
36
|
-
enabled: false,
|
37
|
-
preload_ready: false
|
38
|
-
}
|
39
|
-
|
40
|
-
return nothing unless header and header.is_a?(String)
|
41
|
-
|
42
|
-
directives = header.split(/\s*;\s*/)
|
43
|
-
|
44
|
-
pairs = []
|
45
|
-
directives.each do |directive|
|
46
|
-
name, value = directive.downcase.split("=")
|
47
|
-
|
48
|
-
if value and value.start_with?("\"") and value.end_with?("\"")
|
49
|
-
value = value.sub(/^\"/, '')
|
50
|
-
value = value.sub(/\"$/, '')
|
51
|
-
end
|
52
|
-
|
53
|
-
pairs.push([name, value])
|
34
|
+
class << self
|
35
|
+
attr_writer :timeout, :cache, :typhoeus_options
|
36
|
+
|
37
|
+
def cache
|
38
|
+
@cache ||= if ENV['CACHE']
|
39
|
+
SiteInspector::DiskCache.new
|
40
|
+
elsif Object.const_defined?('Rails')
|
41
|
+
SiteInspector::RailsCache.new
|
42
|
+
else
|
43
|
+
SiteInspector::Cache.new
|
44
|
+
end
|
54
45
|
end
|
55
46
|
|
56
|
-
|
57
|
-
|
58
|
-
# TODO: more comprehensive rejection of characters
|
59
|
-
invalid_chars = /[\s\'\"]/
|
60
|
-
(name =~ invalid_chars) or (value =~ invalid_chars)
|
47
|
+
def timeout
|
48
|
+
@timeout || 10
|
61
49
|
end
|
62
50
|
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
max_age_directive = pairs.find {|n, v| n == "max-age"}
|
67
|
-
max_age = max_age_directive ? max_age_directive[1].to_i : nil
|
68
|
-
include_subdomains = !!pairs.find {|n, v| n == "includesubdomains"}
|
69
|
-
preload = !!pairs.find {|n, v| n == "preload"}
|
70
|
-
|
71
|
-
enabled = !!(max_age and (max_age > 0))
|
72
|
-
|
73
|
-
# Google's minimum max-age for automatic preloading
|
74
|
-
eighteen_weeks = !!(max_age and (max_age >= 10886400))
|
75
|
-
preload_ready = !!(eighteen_weeks and include_subdomains and preload)
|
76
|
-
|
77
|
-
{
|
78
|
-
max_age: max_age,
|
79
|
-
include_subdomains: include_subdomains,
|
80
|
-
preload: preload,
|
81
|
-
enabled: enabled,
|
82
|
-
preload_ready: preload_ready
|
83
|
-
}
|
84
|
-
end
|
85
|
-
|
86
|
-
# makes no network requests
|
87
|
-
def initialize(domain, options = {})
|
88
|
-
domain = domain.downcase
|
89
|
-
domain = domain.sub /^https?\:/, ""
|
90
|
-
domain = domain.sub /^\/+/, ""
|
91
|
-
domain = domain.sub /^www\./, ""
|
92
|
-
@uri = Addressable::URI.parse "//#{domain}"
|
93
|
-
@domain = PublicSuffix.parse @uri.host
|
94
|
-
@timeout = options[:timeout] || 10
|
95
|
-
end
|
96
|
-
|
97
|
-
def inspect
|
98
|
-
"<SiteInspector domain=\"#{domain}\">"
|
99
|
-
end
|
100
|
-
|
101
|
-
def uri(ssl=enforce_https?,www=www?)
|
102
|
-
uri = @uri.clone
|
103
|
-
uri.host = www ? "www.#{uri.host}" : uri.host
|
104
|
-
uri.scheme = ssl ? "https" : "http"
|
105
|
-
uri
|
106
|
-
end
|
107
|
-
|
108
|
-
def domain
|
109
|
-
www? ? PublicSuffix.parse("www.#{@uri.host}") : @domain
|
110
|
-
end
|
111
|
-
|
112
|
-
def request(ssl=false, www=false, followlocation=true, ssl_verifypeer=true, ssl_verifyhost=true)
|
113
|
-
to_get = uri(ssl, www)
|
114
|
-
|
115
|
-
# debugging
|
116
|
-
# puts "fetching: #{to_get}, #{followlocation ? "follow" : "no follow"}, #{ssl_verifypeer ? "verify peer, " : ""}#{ssl_verifyhost ? "verify host" : ""}"
|
117
|
-
|
118
|
-
Typhoeus.get(to_get, followlocation: followlocation, ssl_verifypeer: ssl_verifypeer, ssl_verifyhost: (ssl_verifyhost ? 2 : 0), timeout: @timeout)
|
119
|
-
end
|
120
|
-
|
121
|
-
def response
|
122
|
-
@response ||= begin
|
123
|
-
if response = request(false, false) and response.success?
|
124
|
-
@non_www = true
|
125
|
-
response
|
126
|
-
elsif response = request(false, true) and response.success?
|
127
|
-
@non_www = false
|
128
|
-
response
|
129
|
-
else
|
130
|
-
false
|
131
|
-
end
|
132
|
-
end
|
133
|
-
end
|
134
|
-
|
135
|
-
def timed_out?
|
136
|
-
response && response.timed_out?
|
137
|
-
end
|
138
|
-
|
139
|
-
def doc
|
140
|
-
require 'nokogiri'
|
141
|
-
@doc ||= Nokogiri::HTML response.body if response
|
142
|
-
end
|
143
|
-
|
144
|
-
def body
|
145
|
-
doc.to_s.force_encoding("UTF-8").encode("UTF-8", :invalid => :replace, :replace => "")
|
146
|
-
end
|
147
|
-
|
148
|
-
def government?
|
149
|
-
require 'gman'
|
150
|
-
Gman.valid? domain.to_s
|
151
|
-
end
|
152
|
-
|
153
|
-
def https?
|
154
|
-
@https ||= request(true, www?).success?
|
155
|
-
end
|
156
|
-
alias_method :ssl?, :https?
|
157
|
-
|
158
|
-
def enforce_https?
|
159
|
-
return false unless https?
|
160
|
-
@enforce_https ||= begin
|
161
|
-
response = request(false, www?)
|
162
|
-
if response.effective_url
|
163
|
-
Addressable::URI.parse(response.effective_url).scheme == "https"
|
164
|
-
else
|
165
|
-
false
|
166
|
-
end
|
51
|
+
def inspect(domain)
|
52
|
+
Domain.new(domain)
|
167
53
|
end
|
168
|
-
end
|
169
|
-
|
170
|
-
def www?
|
171
|
-
response && response.effective_url && !!response.effective_url.match(/^https?:\/\/www\./)
|
172
|
-
end
|
173
|
-
|
174
|
-
def non_www?
|
175
|
-
response && @non_www
|
176
|
-
end
|
177
|
-
|
178
|
-
def redirect?
|
179
|
-
!!redirect
|
180
|
-
end
|
181
|
-
|
182
|
-
def redirect
|
183
|
-
@redirect ||= begin
|
184
|
-
if location = request(https?, www?, false).headers["location"]
|
185
|
-
redirect_domain = SiteInspector.new(location).domain
|
186
|
-
redirect_domain.to_s if redirect_domain.to_s != domain.to_s
|
187
|
-
end
|
188
|
-
rescue
|
189
|
-
nil
|
190
|
-
end
|
191
|
-
end
|
192
|
-
|
193
|
-
def http
|
194
|
-
details = {
|
195
|
-
endpoints: endpoints
|
196
|
-
}
|
197
|
-
|
198
|
-
# convenient shorthand for the extensive statements to come
|
199
|
-
combos = details[:endpoints]
|
200
|
-
|
201
|
-
# A domain is "canonically" at www if:
|
202
|
-
# * at least one of its www endpoints responds
|
203
|
-
# * both root endpoints are either down or redirect *somewhere*
|
204
|
-
# * either both root endpoints are down, *or* at least one
|
205
|
-
# root endpoint redirect should immediately go to
|
206
|
-
# an *internal* www endpoint
|
207
|
-
# This is meant to affirm situations like:
|
208
|
-
# http:// -> https:// -> https://www
|
209
|
-
# https:// -> http:// -> https://www
|
210
|
-
# and meant to avoid affirming situations like:
|
211
|
-
# http:// -> http://non-www,
|
212
|
-
# http://www -> http://non-www
|
213
|
-
# or like:
|
214
|
-
# https:// -> 200, http:// -> http://www
|
215
54
|
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
!combos[:https][:root][:status].to_s.start_with?("2")
|
226
|
-
) and (
|
227
|
-
combos[:http][:root][:redirect] or
|
228
|
-
!combos[:http][:root][:up] or
|
229
|
-
!combos[:http][:root][:status].to_s.start_with?("2")
|
230
|
-
)
|
231
|
-
) and (
|
232
|
-
(
|
233
|
-
(
|
234
|
-
!combos[:https][:root][:up] or
|
235
|
-
combos[:https][:root][:https_bad_name] or
|
236
|
-
!combos[:https][:root][:status].to_s.start_with?("2")
|
237
|
-
) and
|
238
|
-
(
|
239
|
-
!combos[:http][:root][:up] or
|
240
|
-
!combos[:http][:root][:status].to_s.start_with?("2")
|
241
|
-
)
|
242
|
-
) or
|
243
|
-
(
|
244
|
-
combos[:https][:root][:redirect_immediately_to_www] and
|
245
|
-
!combos[:https][:root][:redirect_immediately_external]
|
246
|
-
) or
|
247
|
-
(
|
248
|
-
combos[:http][:root][:redirect_immediately_to_www] and
|
249
|
-
!combos[:http][:root][:redirect_immediately_external]
|
250
|
-
)
|
251
|
-
)
|
252
|
-
)
|
253
|
-
|
254
|
-
# A domain is "canonically" at https if:
|
255
|
-
# * at least one of its https endpoints is live and
|
256
|
-
# doesn't have an invalid hostname
|
257
|
-
# * both http endpoints are either down or redirect *somewhere*
|
258
|
-
# * at least one http endpoint redirects immediately to
|
259
|
-
# an *internal* https endpoint
|
260
|
-
# This is meant to affirm situations like:
|
261
|
-
# http:// -> http://www -> https://
|
262
|
-
# https:// -> http:// -> https://www
|
263
|
-
# and meant to avoid affirming situations like:
|
264
|
-
# http:// -> http://non-www
|
265
|
-
# http://www -> http://non-www
|
266
|
-
# or:
|
267
|
-
# http:// -> 200, http://www -> https://www
|
268
|
-
#
|
269
|
-
# It allows a site to be canonically HTTPS if the cert has
|
270
|
-
# a valid hostname but invalid chain issues.
|
271
|
-
|
272
|
-
https = !!(
|
273
|
-
(
|
274
|
-
(
|
275
|
-
combos[:https][:root][:up] and
|
276
|
-
!combos[:https][:root][:https_bad_name]
|
277
|
-
) or
|
278
|
-
(
|
279
|
-
combos[:https][:www][:up] and
|
280
|
-
!combos[:https][:www][:https_bad_name]
|
281
|
-
)
|
282
|
-
) and (
|
283
|
-
(
|
284
|
-
combos[:http][:root][:redirect] or
|
285
|
-
!combos[:http][:root][:up] or
|
286
|
-
!combos[:http][:root][:status].to_s.start_with?("2")
|
287
|
-
) and (
|
288
|
-
combos[:http][:www][:redirect] or
|
289
|
-
!combos[:http][:www][:up] or
|
290
|
-
!combos[:http][:www][:status].to_s.start_with?("2")
|
291
|
-
)
|
292
|
-
) and (
|
293
|
-
(
|
294
|
-
combos[:http][:root][:redirect_immediately_to_https] and
|
295
|
-
!combos[:http][:root][:redirect_immediately_external]
|
296
|
-
) or (
|
297
|
-
combos[:http][:www][:redirect_immediately_to_https] and
|
298
|
-
!combos[:http][:www][:redirect_immediately_external]
|
299
|
-
)
|
300
|
-
)
|
301
|
-
)
|
302
|
-
|
303
|
-
details[:canonical_endpoint] = www ? :www : :root
|
304
|
-
details[:canonical_protocol] = https ? :https : :http
|
305
|
-
details[:canonical] = uri(https, www).to_s
|
306
|
-
|
307
|
-
# If any endpoint is up, the domain is up.
|
308
|
-
details[:up] = !!(
|
309
|
-
combos[:https][:www][:up] or
|
310
|
-
combos[:https][:root][:up] or
|
311
|
-
combos[:http][:www][:up] or
|
312
|
-
combos[:http][:root][:up]
|
313
|
-
)
|
314
|
-
|
315
|
-
# A domain's root is broken if neither protocol can connect.
|
316
|
-
details[:broken_root] = !!(
|
317
|
-
!combos[:https][:root][:up] and
|
318
|
-
!combos[:http][:root][:up]
|
319
|
-
)
|
320
|
-
|
321
|
-
# A domain's www is broken if neither protocol can connect.
|
322
|
-
details[:broken_www] = !!(
|
323
|
-
!combos[:https][:www][:up] and
|
324
|
-
!combos[:http][:www][:up]
|
325
|
-
)
|
326
|
-
|
327
|
-
# HTTPS is "supported" (different than "canonical" or "enforced") if:
|
328
|
-
#
|
329
|
-
# * Either of the HTTPS endpoints is listening, and doesn't have
|
330
|
-
# an invalid hostname.
|
331
|
-
details[:support_https] = !!(
|
332
|
-
(
|
333
|
-
(combos[:https][:root][:status] != 0) and
|
334
|
-
!combos[:https][:root][:https_bad_name]
|
335
|
-
) or (
|
336
|
-
(combos[:https][:www][:status] != 0) and
|
337
|
-
!combos[:https][:www][:https_bad_name]
|
338
|
-
)
|
339
|
-
)
|
340
|
-
|
341
|
-
# we can say that a canonical HTTPS site "defaults" to HTTPS,
|
342
|
-
# even if it doesn't *strictly* enforce it (e.g. having a www
|
343
|
-
# subdomain first to go HTTP root before HTTPS root).
|
344
|
-
details[:default_https] = https
|
345
|
-
|
346
|
-
# HTTPS is "downgraded" if both:
|
347
|
-
#
|
348
|
-
# * HTTPS is supported, and
|
349
|
-
# * The 'canonical' endpoint gets an immediate internal redirect to HTTP.
|
350
|
-
|
351
|
-
details[:downgrade_https] = !!(
|
352
|
-
details[:support_https] and
|
353
|
-
(
|
354
|
-
combos[:https][details[:canonical_endpoint]][:redirect] and
|
355
|
-
!combos[:https][details[:canonical_endpoint]][:redirect_immediately_external] and
|
356
|
-
!combos[:https][details[:canonical_endpoint]][:redirect_immediately_to_https]
|
357
|
-
)
|
358
|
-
)
|
359
|
-
|
360
|
-
# HTTPS is enforced if one of the HTTPS endpoints is "live",
|
361
|
-
# and if both *HTTP* endpoints are either:
|
362
|
-
#
|
363
|
-
# * down, or
|
364
|
-
# * redirect immediately to HTTPS.
|
365
|
-
#
|
366
|
-
# This is different than whether a domain is "canonically" HTTPS.
|
367
|
-
#
|
368
|
-
# * an HTTP redirect can go to HTTPS on another domain, as long
|
369
|
-
# as it's immediate.
|
370
|
-
# * a domain with an invalid cert can still be enforcing HTTPS.
|
371
|
-
details[:enforce_https] = !!(
|
372
|
-
(
|
373
|
-
!combos[:http][:www][:up] or
|
374
|
-
(combos[:http][:www][:redirect_immediately_to_https])
|
375
|
-
) and
|
376
|
-
(
|
377
|
-
!combos[:http][:root][:up] or
|
378
|
-
(combos[:http][:root][:redirect_immediately_to_https])
|
379
|
-
) and
|
380
|
-
(
|
381
|
-
combos[:https][:www][:up] or
|
382
|
-
combos[:https][:root][:up]
|
383
|
-
)
|
384
|
-
)
|
385
|
-
|
386
|
-
# The domain is a redirect if at least one endpoint is up,
|
387
|
-
# and each one is *either* an external redirect or down entirely.
|
388
|
-
details[:redirect] = !!(
|
389
|
-
details[:up] and
|
390
|
-
(
|
391
|
-
combos[:http][:www][:redirect_external] or
|
392
|
-
!combos[:http][:www][:up] or
|
393
|
-
combos[:http][:www][:status] >= 400
|
394
|
-
) and
|
395
|
-
(
|
396
|
-
combos[:http][:root][:redirect_external] or
|
397
|
-
!combos[:http][:root][:up] or
|
398
|
-
combos[:http][:root][:status] >= 400
|
399
|
-
) and
|
400
|
-
(
|
401
|
-
combos[:https][:www][:redirect_external] or
|
402
|
-
!combos[:https][:www][:up] or
|
403
|
-
combos[:https][:www][:https_bad_name] or
|
404
|
-
combos[:https][:www][:status] >= 400
|
405
|
-
) and
|
406
|
-
(
|
407
|
-
combos[:https][:root][:redirect_external] or
|
408
|
-
!combos[:https][:root][:up] or
|
409
|
-
combos[:https][:root][:https_bad_name] or
|
410
|
-
combos[:https][:root][:status] >= 400
|
411
|
-
)
|
412
|
-
)
|
413
|
-
|
414
|
-
# OK, we've said a domain is a "redirect" domain.
|
415
|
-
# What does the domain redirect to?
|
416
|
-
if details[:redirect]
|
417
|
-
canon = combos[details[:canonical_protocol]][details[:canonical_endpoint]]
|
418
|
-
details[:redirect_to] = canon[:redirect_to]
|
419
|
-
else
|
420
|
-
details[:redirect_to] = nil
|
421
|
-
end
|
422
|
-
|
423
|
-
# HSTS on the canonical domain? (valid HTTPS checked in endpoint)
|
424
|
-
details[:hsts] = !!combos[:https][details[:canonical_endpoint]][:hsts]
|
425
|
-
details[:hsts_header] = combos[:https][details[:canonical_endpoint]][:hsts_header]
|
426
|
-
|
427
|
-
# HSTS on the entire domain?
|
428
|
-
details[:hsts_entire_domain] = !!(
|
429
|
-
combos[:https][:root][:hsts] and
|
430
|
-
combos[:https][:root][:hsts_details][:include_subdomains]
|
431
|
-
)
|
432
|
-
|
433
|
-
# HSTS preload-ready for the entire domain?
|
434
|
-
#
|
435
|
-
# Re-checks :hsts_entire_domain in case the :preload_ready
|
436
|
-
# flag ever changes its definition to not require include_subdomains.
|
437
|
-
|
438
|
-
details[:hsts_entire_domain_preload] = !!(
|
439
|
-
details[:hsts_entire_domain] and
|
440
|
-
combos[:https][:root][:hsts_details][:preload_ready]
|
441
|
-
)
|
442
|
-
|
443
|
-
details
|
444
|
-
end
|
445
|
-
|
446
|
-
def endpoints
|
447
|
-
https_www = http_endpoint(true, true)
|
448
|
-
http_www = http_endpoint(false, true)
|
449
|
-
https_root = http_endpoint(true, false)
|
450
|
-
http_root = http_endpoint(false, false)
|
451
|
-
|
452
|
-
{
|
453
|
-
https: {
|
454
|
-
www: https_www,
|
455
|
-
root: https_root
|
456
|
-
},
|
457
|
-
http: {
|
458
|
-
www: http_www,
|
459
|
-
root: http_root
|
55
|
+
def typhoeus_defaults
|
56
|
+
defaults = {
|
57
|
+
followlocation: false,
|
58
|
+
timeout: SiteInspector.timeout,
|
59
|
+
accept_encoding: 'gzip',
|
60
|
+
method: :head,
|
61
|
+
headers: {
|
62
|
+
'User-Agent' => "Mozilla/5.0 (compatible; SiteInspector/#{SiteInspector::VERSION}; +https://github.com/benbalter/site-inspector)"
|
63
|
+
}
|
460
64
|
}
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
# State of affairs at a particular endpoint.
|
465
|
-
def http_endpoint(ssl, www)
|
466
|
-
details = {}
|
467
|
-
|
468
|
-
# Don't follow redirects for first ping.
|
469
|
-
response = request(ssl, www, false)
|
470
|
-
|
471
|
-
|
472
|
-
# For HTTPS: examine the full range of possibilities.
|
473
|
-
if ssl
|
474
|
-
if response.return_code == :ok
|
475
|
-
details[:https_valid] = true
|
476
|
-
details[:https_bad_chain] = false
|
477
|
-
details[:https_bad_name] = false
|
478
|
-
|
479
|
-
# Bad certificate chain.
|
480
|
-
elsif response.return_code == :ssl_cacert
|
481
|
-
details[:https_valid] = false
|
482
|
-
details[:https_bad_chain] = true
|
483
|
-
response = request(ssl, www, false, false, true)
|
484
|
-
# Bad everything.
|
485
|
-
if response.return_code == :peer_failed_verification
|
486
|
-
details[:https_bad_name] = true
|
487
|
-
response = request(ssl, www, false, false, false)
|
488
|
-
end
|
489
|
-
|
490
|
-
# Bad hostname.
|
491
|
-
elsif response.return_code == :peer_failed_verification
|
492
|
-
details[:https_valid] = false
|
493
|
-
details[:https_bad_name] = true
|
494
|
-
response = request(ssl, www, false, true, false)
|
495
|
-
# Bad everything.
|
496
|
-
if response.return_code == :ssl_cacert
|
497
|
-
details[:https_bad_chain] = true
|
498
|
-
response = request(ssl, www, false, false, false)
|
499
|
-
end
|
500
|
-
|
501
|
-
# not sure what else would happen
|
502
|
-
elsif response.response_code != 0
|
503
|
-
details[:https_valid] = false
|
504
|
-
details[:https_unknown_issue] = response.return_code
|
505
|
-
end
|
65
|
+
defaults.merge! @typhoeus_options if @typhoeus_options
|
66
|
+
defaults
|
506
67
|
end
|
507
68
|
|
508
|
-
#
|
509
|
-
|
510
|
-
|
511
|
-
return details if !details[:up]
|
512
|
-
|
513
|
-
headers = Hash[response.headers.map{ |k,v| [k.downcase,v] }]
|
514
|
-
details[:headers] = headers
|
515
|
-
|
516
|
-
|
517
|
-
# HSTS only takes effect when delivered over valid HTTPS.
|
518
|
-
hsts = SiteInspector.hsts_parse(headers["strict-transport-security"])
|
519
|
-
|
520
|
-
details[:hsts] = !!(
|
521
|
-
ssl and
|
522
|
-
details[:https_valid] and
|
523
|
-
hsts[:enabled]
|
524
|
-
)
|
525
|
-
|
526
|
-
details[:hsts_header] = headers["strict-transport-security"]
|
527
|
-
details[:hsts_details] = hsts
|
528
|
-
|
529
|
-
|
530
|
-
# If it's a redirect, go find the ultimate response starting from this combo.
|
531
|
-
redirect_code = response.response_code.to_s.start_with?("3")
|
532
|
-
location_header = headers["location"]
|
533
|
-
if redirect_code and location_header
|
534
|
-
location_header = location_header.downcase
|
535
|
-
details[:redirect] = true
|
536
|
-
|
537
|
-
ultimate_response = request(ssl, www, true, !details[:https_bad_chain], !details[:https_bad_name])
|
538
|
-
uri_original = URI(ultimate_response.request.url)
|
539
|
-
|
540
|
-
# treat relative Location headers as having the original hostname
|
541
|
-
if location_header.start_with?("http:") or location_header.start_with?("https:")
|
542
|
-
uri_immediate = URI(URI.escape(location_header))
|
543
|
-
else
|
544
|
-
uri_immediate = URI.join(uri_original, URI.escape(location_header))
|
545
|
-
end
|
546
|
-
|
547
|
-
uri_eventual = URI(ultimate_response.effective_url.downcase)
|
548
|
-
|
549
|
-
# compare base domain names
|
550
|
-
base_original = PublicSuffix.parse(uri_original.hostname).domain
|
551
|
-
|
552
|
-
# if the redirects aren't to valid hostnames (e.g. IP addresses)
|
553
|
-
# then fine just compare them directly, they're not going to be
|
554
|
-
# identical anyway.
|
555
|
-
base_immediate = begin
|
556
|
-
PublicSuffix.parse(uri_immediate.hostname).domain
|
557
|
-
rescue PublicSuffix::DomainInvalid
|
558
|
-
uri_immediate.to_s
|
559
|
-
end
|
560
|
-
|
561
|
-
base_eventual = begin
|
562
|
-
PublicSuffix.parse(uri_eventual.hostname).domain
|
563
|
-
rescue PublicSuffix::DomainInvalid
|
564
|
-
uri_eventual.to_s
|
565
|
-
end
|
566
|
-
|
567
|
-
details[:redirect_immediately_to] = uri_immediate.to_s
|
568
|
-
details[:redirect_immediately_to_www] = !!uri_immediate.to_s.match(/^https?:\/\/www\./)
|
569
|
-
details[:redirect_immediately_to_https] = uri_immediate.to_s.start_with?("https://")
|
570
|
-
details[:redirect_immediately_external] = (base_original != base_immediate)
|
571
|
-
|
572
|
-
details[:redirect_to] = uri_eventual.to_s
|
573
|
-
details[:redirect_external] = (base_original != base_eventual)
|
574
|
-
|
575
|
-
# otherwise, mark all the redirect fields as false/null
|
576
|
-
else
|
577
|
-
details[:redirect] = false
|
578
|
-
details[:redirect_immediately_to] = nil
|
579
|
-
details[:redirect_immediately_to_www] = false
|
580
|
-
details[:redirect_immediately_to_https] = false
|
581
|
-
details[:redirect_immediately_external] = false
|
582
|
-
|
583
|
-
details[:redirect_to] = nil
|
584
|
-
details[:redirect_external] = false
|
69
|
+
# Returns a thread-safe, memoized hydra instance
|
70
|
+
def hydra
|
71
|
+
Typhoeus::Hydra.hydra
|
585
72
|
end
|
586
|
-
|
587
|
-
details
|
588
73
|
end
|
74
|
+
end
|
589
75
|
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
-
|
594
|
-
:uri => uri.to_s,
|
595
|
-
:live => !!response,
|
596
|
-
:ssl => https?,
|
597
|
-
:enforce_https => enforce_https?,
|
598
|
-
:non_www => non_www?,
|
599
|
-
:redirect => redirect,
|
600
|
-
:headers => headers
|
601
|
-
}
|
602
|
-
else
|
603
|
-
{
|
604
|
-
:domain => domain.to_s,
|
605
|
-
:uri => uri.to_s,
|
606
|
-
:government => government?,
|
607
|
-
:live => !!response,
|
608
|
-
:ssl => https?,
|
609
|
-
:enforce_https => enforce_https?,
|
610
|
-
:non_www => non_www?,
|
611
|
-
:redirect => redirect,
|
612
|
-
:ip => ip,
|
613
|
-
:hostname => hostname.to_s,
|
614
|
-
:ipv6 => ipv6?,
|
615
|
-
:dnssec => dnssec?,
|
616
|
-
:cdn => cdn,
|
617
|
-
:google_apps => google_apps?,
|
618
|
-
:cloud_provider => cloud_provider,
|
619
|
-
:server => server,
|
620
|
-
:cms => cms,
|
621
|
-
:analytics => analytics,
|
622
|
-
:javascript => javascript,
|
623
|
-
:advertising => advertising,
|
624
|
-
:slash_data => slash_data?,
|
625
|
-
:slash_developer => slash_developer?,
|
626
|
-
:data_dot_json => data_dot_json?,
|
627
|
-
:click_jacking_protection => click_jacking_protection?,
|
628
|
-
:content_security_policy => content_security_policy?,
|
629
|
-
:xss_protection => xss_protection?,
|
630
|
-
:secure_cookies => secure_cookies?,
|
631
|
-
:strict_transport_security => strict_transport_security?,
|
632
|
-
:headers => headers
|
633
|
-
}
|
634
|
-
end
|
635
|
-
end
|
76
|
+
if ENV['DEBUG']
|
77
|
+
Ethon.logger = Logger.new($stdout)
|
78
|
+
Ethon.logger.level = Logger::DEBUG
|
79
|
+
Typhoeus::Config.verbose = true
|
636
80
|
end
|
81
|
+
|
82
|
+
Typhoeus::Config.memoize = true
|
83
|
+
Typhoeus::Config.cache = SiteInspector.cache
|