site-inspector 1.0.2 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.gitignore +8 -0
- data/.rubocop.yml +42 -0
- data/.rubocop_todo.yml +139 -0
- data/.ruby-version +1 -0
- data/.travis.yml +9 -0
- data/Gemfile +7 -0
- data/Guardfile +10 -0
- data/README.md +189 -0
- data/Rakefile +10 -0
- data/bin/site-inspector +50 -22
- data/lib/cliver/dependency_ext.rb +24 -0
- data/lib/site-inspector.rb +62 -615
- data/lib/site-inspector/cache.rb +10 -51
- data/lib/site-inspector/checks/accessibility.rb +135 -0
- data/lib/site-inspector/checks/check.rb +54 -0
- data/lib/site-inspector/checks/content.rb +85 -0
- data/lib/site-inspector/checks/cookies.rb +45 -0
- data/lib/site-inspector/checks/dns.rb +138 -0
- data/lib/site-inspector/checks/headers.rb +68 -0
- data/lib/site-inspector/checks/hsts.rb +81 -0
- data/lib/site-inspector/checks/https.rb +40 -0
- data/lib/site-inspector/checks/sniffer.rb +67 -0
- data/lib/site-inspector/checks/wappalyzer.rb +62 -0
- data/lib/site-inspector/checks/whois.rb +36 -0
- data/lib/site-inspector/disk_cache.rb +42 -0
- data/lib/site-inspector/domain.rb +271 -0
- data/lib/site-inspector/endpoint.rb +217 -0
- data/lib/site-inspector/rails_cache.rb +13 -0
- data/lib/site-inspector/version.rb +5 -0
- data/package-lock.json +505 -0
- data/package.json +23 -0
- data/script/bootstrap +2 -0
- data/script/cibuild +11 -0
- data/script/console +3 -0
- data/script/pa11y-version +10 -0
- data/script/release +38 -0
- data/site-inspector.gemspec +42 -0
- data/spec/checks/site_inspector_endpoint_accessibility_spec.rb +84 -0
- data/spec/checks/site_inspector_endpoint_check_spec.rb +42 -0
- data/spec/checks/site_inspector_endpoint_content_spec.rb +117 -0
- data/spec/checks/site_inspector_endpoint_cookies_spec.rb +73 -0
- data/spec/checks/site_inspector_endpoint_dns_spec.rb +184 -0
- data/spec/checks/site_inspector_endpoint_headers_spec.rb +65 -0
- data/spec/checks/site_inspector_endpoint_hsts_spec.rb +92 -0
- data/spec/checks/site_inspector_endpoint_https_spec.rb +49 -0
- data/spec/checks/site_inspector_endpoint_sniffer_spec.rb +150 -0
- data/spec/checks/site_inspector_endpoint_wappalyzer_spec.rb +34 -0
- data/spec/checks/site_inspector_endpoint_whois_spec.rb +26 -0
- data/spec/fixtures/wappalyzer.json +125 -0
- data/spec/site_inspector_cache_spec.rb +15 -0
- data/spec/site_inspector_disk_cache_spec.rb +39 -0
- data/spec/site_inspector_domain_spec.rb +271 -0
- data/spec/site_inspector_endpoint_spec.rb +252 -0
- data/spec/site_inspector_spec.rb +48 -0
- data/spec/spec_helper.rb +19 -0
- metadata +204 -63
- data/lib/site-inspector/compliance.rb +0 -19
- data/lib/site-inspector/dns.rb +0 -92
- data/lib/site-inspector/headers.rb +0 -59
- data/lib/site-inspector/sniffer.rb +0 -26
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Cliver
|
4
|
+
class Dependency
|
5
|
+
# Memoized shortcut for detect
|
6
|
+
# Returns the path to the detected dependency
|
7
|
+
# Raises an error if the dependency was not satisfied
|
8
|
+
def path
|
9
|
+
@path ||= detect!
|
10
|
+
end
|
11
|
+
|
12
|
+
# Returns the version of the resolved dependency
|
13
|
+
def version
|
14
|
+
return @version if defined? @version
|
15
|
+
|
16
|
+
version = installed_versions.find { |p, _v| p == path }
|
17
|
+
@detected_version = version.nil? ? nil : version[1]
|
18
|
+
end
|
19
|
+
|
20
|
+
def major_version
|
21
|
+
version&.split('.')&.first
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
data/lib/site-inspector.rb
CHANGED
@@ -1,636 +1,83 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
|
2
|
-
# needed for HTTP analysis
|
3
3
|
require 'open-uri'
|
4
|
-
require
|
4
|
+
require 'addressable/uri'
|
5
5
|
require 'public_suffix'
|
6
6
|
require 'typhoeus'
|
7
|
+
require 'parallel'
|
8
|
+
require 'cliver'
|
9
|
+
require 'whois'
|
10
|
+
require 'cgi'
|
11
|
+
require 'resolv'
|
12
|
+
require 'dotenv/load'
|
7
13
|
|
8
14
|
require_relative 'site-inspector/cache'
|
9
|
-
require_relative 'site-inspector/
|
10
|
-
require_relative 'site-inspector/
|
11
|
-
require_relative 'site-inspector/
|
12
|
-
require_relative 'site-inspector/
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
15
|
+
require_relative 'site-inspector/disk_cache'
|
16
|
+
require_relative 'site-inspector/rails_cache'
|
17
|
+
require_relative 'site-inspector/domain'
|
18
|
+
require_relative 'site-inspector/checks/check'
|
19
|
+
require_relative 'site-inspector/checks/accessibility'
|
20
|
+
require_relative 'site-inspector/checks/content'
|
21
|
+
require_relative 'site-inspector/checks/dns'
|
22
|
+
require_relative 'site-inspector/checks/headers'
|
23
|
+
require_relative 'site-inspector/checks/hsts'
|
24
|
+
require_relative 'site-inspector/checks/https'
|
25
|
+
require_relative 'site-inspector/checks/sniffer'
|
26
|
+
require_relative 'site-inspector/checks/cookies'
|
27
|
+
require_relative 'site-inspector/checks/whois'
|
28
|
+
require_relative 'site-inspector/checks/wappalyzer'
|
29
|
+
require_relative 'site-inspector/endpoint'
|
30
|
+
require_relative 'site-inspector/version'
|
31
|
+
require_relative 'cliver/dependency_ext'
|
20
32
|
|
21
33
|
class SiteInspector
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
max_age: nil,
|
34
|
-
include_subdomains: false,
|
35
|
-
preload: false,
|
36
|
-
enabled: false,
|
37
|
-
preload_ready: false
|
38
|
-
}
|
39
|
-
|
40
|
-
return nothing unless header and header.is_a?(String)
|
41
|
-
|
42
|
-
directives = header.split(/\s*;\s*/)
|
43
|
-
|
44
|
-
pairs = []
|
45
|
-
directives.each do |directive|
|
46
|
-
name, value = directive.downcase.split("=")
|
47
|
-
|
48
|
-
if value and value.start_with?("\"") and value.end_with?("\"")
|
49
|
-
value = value.sub(/^\"/, '')
|
50
|
-
value = value.sub(/\"$/, '')
|
51
|
-
end
|
52
|
-
|
53
|
-
pairs.push([name, value])
|
34
|
+
class << self
|
35
|
+
attr_writer :timeout, :cache, :typhoeus_options
|
36
|
+
|
37
|
+
def cache
|
38
|
+
@cache ||= if ENV['CACHE']
|
39
|
+
SiteInspector::DiskCache.new
|
40
|
+
elsif Object.const_defined?('Rails')
|
41
|
+
SiteInspector::RailsCache.new
|
42
|
+
else
|
43
|
+
SiteInspector::Cache.new
|
44
|
+
end
|
54
45
|
end
|
55
46
|
|
56
|
-
|
57
|
-
|
58
|
-
# TODO: more comprehensive rejection of characters
|
59
|
-
invalid_chars = /[\s\'\"]/
|
60
|
-
(name =~ invalid_chars) or (value =~ invalid_chars)
|
47
|
+
def timeout
|
48
|
+
@timeout || 10
|
61
49
|
end
|
62
50
|
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
max_age_directive = pairs.find {|n, v| n == "max-age"}
|
67
|
-
max_age = max_age_directive ? max_age_directive[1].to_i : nil
|
68
|
-
include_subdomains = !!pairs.find {|n, v| n == "includesubdomains"}
|
69
|
-
preload = !!pairs.find {|n, v| n == "preload"}
|
70
|
-
|
71
|
-
enabled = !!(max_age and (max_age > 0))
|
72
|
-
|
73
|
-
# Google's minimum max-age for automatic preloading
|
74
|
-
eighteen_weeks = !!(max_age and (max_age >= 10886400))
|
75
|
-
preload_ready = !!(eighteen_weeks and include_subdomains and preload)
|
76
|
-
|
77
|
-
{
|
78
|
-
max_age: max_age,
|
79
|
-
include_subdomains: include_subdomains,
|
80
|
-
preload: preload,
|
81
|
-
enabled: enabled,
|
82
|
-
preload_ready: preload_ready
|
83
|
-
}
|
84
|
-
end
|
85
|
-
|
86
|
-
# makes no network requests
|
87
|
-
def initialize(domain, options = {})
|
88
|
-
domain = domain.downcase
|
89
|
-
domain = domain.sub /^https?\:/, ""
|
90
|
-
domain = domain.sub /^\/+/, ""
|
91
|
-
domain = domain.sub /^www\./, ""
|
92
|
-
@uri = Addressable::URI.parse "//#{domain}"
|
93
|
-
@domain = PublicSuffix.parse @uri.host
|
94
|
-
@timeout = options[:timeout] || 10
|
95
|
-
end
|
96
|
-
|
97
|
-
def inspect
|
98
|
-
"<SiteInspector domain=\"#{domain}\">"
|
99
|
-
end
|
100
|
-
|
101
|
-
def uri(ssl=enforce_https?,www=www?)
|
102
|
-
uri = @uri.clone
|
103
|
-
uri.host = www ? "www.#{uri.host}" : uri.host
|
104
|
-
uri.scheme = ssl ? "https" : "http"
|
105
|
-
uri
|
106
|
-
end
|
107
|
-
|
108
|
-
def domain
|
109
|
-
www? ? PublicSuffix.parse("www.#{@uri.host}") : @domain
|
110
|
-
end
|
111
|
-
|
112
|
-
def request(ssl=false, www=false, followlocation=true, ssl_verifypeer=true, ssl_verifyhost=true)
|
113
|
-
to_get = uri(ssl, www)
|
114
|
-
|
115
|
-
# debugging
|
116
|
-
# puts "fetching: #{to_get}, #{followlocation ? "follow" : "no follow"}, #{ssl_verifypeer ? "verify peer, " : ""}#{ssl_verifyhost ? "verify host" : ""}"
|
117
|
-
|
118
|
-
Typhoeus.get(to_get, followlocation: followlocation, ssl_verifypeer: ssl_verifypeer, ssl_verifyhost: (ssl_verifyhost ? 2 : 0), timeout: @timeout)
|
119
|
-
end
|
120
|
-
|
121
|
-
def response
|
122
|
-
@response ||= begin
|
123
|
-
if response = request(false, false) and response.success?
|
124
|
-
@non_www = true
|
125
|
-
response
|
126
|
-
elsif response = request(false, true) and response.success?
|
127
|
-
@non_www = false
|
128
|
-
response
|
129
|
-
else
|
130
|
-
false
|
131
|
-
end
|
132
|
-
end
|
133
|
-
end
|
134
|
-
|
135
|
-
def timed_out?
|
136
|
-
response && response.timed_out?
|
137
|
-
end
|
138
|
-
|
139
|
-
def doc
|
140
|
-
require 'nokogiri'
|
141
|
-
@doc ||= Nokogiri::HTML response.body if response
|
142
|
-
end
|
143
|
-
|
144
|
-
def body
|
145
|
-
doc.to_s.force_encoding("UTF-8").encode("UTF-8", :invalid => :replace, :replace => "")
|
146
|
-
end
|
147
|
-
|
148
|
-
def government?
|
149
|
-
require 'gman'
|
150
|
-
Gman.valid? domain.to_s
|
151
|
-
end
|
152
|
-
|
153
|
-
def https?
|
154
|
-
@https ||= request(true, www?).success?
|
155
|
-
end
|
156
|
-
alias_method :ssl?, :https?
|
157
|
-
|
158
|
-
def enforce_https?
|
159
|
-
return false unless https?
|
160
|
-
@enforce_https ||= begin
|
161
|
-
response = request(false, www?)
|
162
|
-
if response.effective_url
|
163
|
-
Addressable::URI.parse(response.effective_url).scheme == "https"
|
164
|
-
else
|
165
|
-
false
|
166
|
-
end
|
51
|
+
def inspect(domain)
|
52
|
+
Domain.new(domain)
|
167
53
|
end
|
168
|
-
end
|
169
|
-
|
170
|
-
def www?
|
171
|
-
response && response.effective_url && !!response.effective_url.match(/^https?:\/\/www\./)
|
172
|
-
end
|
173
|
-
|
174
|
-
def non_www?
|
175
|
-
response && @non_www
|
176
|
-
end
|
177
|
-
|
178
|
-
def redirect?
|
179
|
-
!!redirect
|
180
|
-
end
|
181
|
-
|
182
|
-
def redirect
|
183
|
-
@redirect ||= begin
|
184
|
-
if location = request(https?, www?, false).headers["location"]
|
185
|
-
redirect_domain = SiteInspector.new(location).domain
|
186
|
-
redirect_domain.to_s if redirect_domain.to_s != domain.to_s
|
187
|
-
end
|
188
|
-
rescue
|
189
|
-
nil
|
190
|
-
end
|
191
|
-
end
|
192
|
-
|
193
|
-
def http
|
194
|
-
details = {
|
195
|
-
endpoints: endpoints
|
196
|
-
}
|
197
|
-
|
198
|
-
# convenient shorthand for the extensive statements to come
|
199
|
-
combos = details[:endpoints]
|
200
|
-
|
201
|
-
# A domain is "canonically" at www if:
|
202
|
-
# * at least one of its www endpoints responds
|
203
|
-
# * both root endpoints are either down or redirect *somewhere*
|
204
|
-
# * either both root endpoints are down, *or* at least one
|
205
|
-
# root endpoint redirect should immediately go to
|
206
|
-
# an *internal* www endpoint
|
207
|
-
# This is meant to affirm situations like:
|
208
|
-
# http:// -> https:// -> https://www
|
209
|
-
# https:// -> http:// -> https://www
|
210
|
-
# and meant to avoid affirming situations like:
|
211
|
-
# http:// -> http://non-www,
|
212
|
-
# http://www -> http://non-www
|
213
|
-
# or like:
|
214
|
-
# https:// -> 200, http:// -> http://www
|
215
54
|
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
!combos[:https][:root][:status].to_s.start_with?("2")
|
226
|
-
) and (
|
227
|
-
combos[:http][:root][:redirect] or
|
228
|
-
!combos[:http][:root][:up] or
|
229
|
-
!combos[:http][:root][:status].to_s.start_with?("2")
|
230
|
-
)
|
231
|
-
) and (
|
232
|
-
(
|
233
|
-
(
|
234
|
-
!combos[:https][:root][:up] or
|
235
|
-
combos[:https][:root][:https_bad_name] or
|
236
|
-
!combos[:https][:root][:status].to_s.start_with?("2")
|
237
|
-
) and
|
238
|
-
(
|
239
|
-
!combos[:http][:root][:up] or
|
240
|
-
!combos[:http][:root][:status].to_s.start_with?("2")
|
241
|
-
)
|
242
|
-
) or
|
243
|
-
(
|
244
|
-
combos[:https][:root][:redirect_immediately_to_www] and
|
245
|
-
!combos[:https][:root][:redirect_immediately_external]
|
246
|
-
) or
|
247
|
-
(
|
248
|
-
combos[:http][:root][:redirect_immediately_to_www] and
|
249
|
-
!combos[:http][:root][:redirect_immediately_external]
|
250
|
-
)
|
251
|
-
)
|
252
|
-
)
|
253
|
-
|
254
|
-
# A domain is "canonically" at https if:
|
255
|
-
# * at least one of its https endpoints is live and
|
256
|
-
# doesn't have an invalid hostname
|
257
|
-
# * both http endpoints are either down or redirect *somewhere*
|
258
|
-
# * at least one http endpoint redirects immediately to
|
259
|
-
# an *internal* https endpoint
|
260
|
-
# This is meant to affirm situations like:
|
261
|
-
# http:// -> http://www -> https://
|
262
|
-
# https:// -> http:// -> https://www
|
263
|
-
# and meant to avoid affirming situations like:
|
264
|
-
# http:// -> http://non-www
|
265
|
-
# http://www -> http://non-www
|
266
|
-
# or:
|
267
|
-
# http:// -> 200, http://www -> https://www
|
268
|
-
#
|
269
|
-
# It allows a site to be canonically HTTPS if the cert has
|
270
|
-
# a valid hostname but invalid chain issues.
|
271
|
-
|
272
|
-
https = !!(
|
273
|
-
(
|
274
|
-
(
|
275
|
-
combos[:https][:root][:up] and
|
276
|
-
!combos[:https][:root][:https_bad_name]
|
277
|
-
) or
|
278
|
-
(
|
279
|
-
combos[:https][:www][:up] and
|
280
|
-
!combos[:https][:www][:https_bad_name]
|
281
|
-
)
|
282
|
-
) and (
|
283
|
-
(
|
284
|
-
combos[:http][:root][:redirect] or
|
285
|
-
!combos[:http][:root][:up] or
|
286
|
-
!combos[:http][:root][:status].to_s.start_with?("2")
|
287
|
-
) and (
|
288
|
-
combos[:http][:www][:redirect] or
|
289
|
-
!combos[:http][:www][:up] or
|
290
|
-
!combos[:http][:www][:status].to_s.start_with?("2")
|
291
|
-
)
|
292
|
-
) and (
|
293
|
-
(
|
294
|
-
combos[:http][:root][:redirect_immediately_to_https] and
|
295
|
-
!combos[:http][:root][:redirect_immediately_external]
|
296
|
-
) or (
|
297
|
-
combos[:http][:www][:redirect_immediately_to_https] and
|
298
|
-
!combos[:http][:www][:redirect_immediately_external]
|
299
|
-
)
|
300
|
-
)
|
301
|
-
)
|
302
|
-
|
303
|
-
details[:canonical_endpoint] = www ? :www : :root
|
304
|
-
details[:canonical_protocol] = https ? :https : :http
|
305
|
-
details[:canonical] = uri(https, www).to_s
|
306
|
-
|
307
|
-
# If any endpoint is up, the domain is up.
|
308
|
-
details[:up] = !!(
|
309
|
-
combos[:https][:www][:up] or
|
310
|
-
combos[:https][:root][:up] or
|
311
|
-
combos[:http][:www][:up] or
|
312
|
-
combos[:http][:root][:up]
|
313
|
-
)
|
314
|
-
|
315
|
-
# A domain's root is broken if neither protocol can connect.
|
316
|
-
details[:broken_root] = !!(
|
317
|
-
!combos[:https][:root][:up] and
|
318
|
-
!combos[:http][:root][:up]
|
319
|
-
)
|
320
|
-
|
321
|
-
# A domain's www is broken if neither protocol can connect.
|
322
|
-
details[:broken_www] = !!(
|
323
|
-
!combos[:https][:www][:up] and
|
324
|
-
!combos[:http][:www][:up]
|
325
|
-
)
|
326
|
-
|
327
|
-
# HTTPS is "supported" (different than "canonical" or "enforced") if:
|
328
|
-
#
|
329
|
-
# * Either of the HTTPS endpoints is listening, and doesn't have
|
330
|
-
# an invalid hostname.
|
331
|
-
details[:support_https] = !!(
|
332
|
-
(
|
333
|
-
(combos[:https][:root][:status] != 0) and
|
334
|
-
!combos[:https][:root][:https_bad_name]
|
335
|
-
) or (
|
336
|
-
(combos[:https][:www][:status] != 0) and
|
337
|
-
!combos[:https][:www][:https_bad_name]
|
338
|
-
)
|
339
|
-
)
|
340
|
-
|
341
|
-
# we can say that a canonical HTTPS site "defaults" to HTTPS,
|
342
|
-
# even if it doesn't *strictly* enforce it (e.g. having a www
|
343
|
-
# subdomain first to go HTTP root before HTTPS root).
|
344
|
-
details[:default_https] = https
|
345
|
-
|
346
|
-
# HTTPS is "downgraded" if both:
|
347
|
-
#
|
348
|
-
# * HTTPS is supported, and
|
349
|
-
# * The 'canonical' endpoint gets an immediate internal redirect to HTTP.
|
350
|
-
|
351
|
-
details[:downgrade_https] = !!(
|
352
|
-
details[:support_https] and
|
353
|
-
(
|
354
|
-
combos[:https][details[:canonical_endpoint]][:redirect] and
|
355
|
-
!combos[:https][details[:canonical_endpoint]][:redirect_immediately_external] and
|
356
|
-
!combos[:https][details[:canonical_endpoint]][:redirect_immediately_to_https]
|
357
|
-
)
|
358
|
-
)
|
359
|
-
|
360
|
-
# HTTPS is enforced if one of the HTTPS endpoints is "live",
|
361
|
-
# and if both *HTTP* endpoints are either:
|
362
|
-
#
|
363
|
-
# * down, or
|
364
|
-
# * redirect immediately to HTTPS.
|
365
|
-
#
|
366
|
-
# This is different than whether a domain is "canonically" HTTPS.
|
367
|
-
#
|
368
|
-
# * an HTTP redirect can go to HTTPS on another domain, as long
|
369
|
-
# as it's immediate.
|
370
|
-
# * a domain with an invalid cert can still be enforcing HTTPS.
|
371
|
-
details[:enforce_https] = !!(
|
372
|
-
(
|
373
|
-
!combos[:http][:www][:up] or
|
374
|
-
(combos[:http][:www][:redirect_immediately_to_https])
|
375
|
-
) and
|
376
|
-
(
|
377
|
-
!combos[:http][:root][:up] or
|
378
|
-
(combos[:http][:root][:redirect_immediately_to_https])
|
379
|
-
) and
|
380
|
-
(
|
381
|
-
combos[:https][:www][:up] or
|
382
|
-
combos[:https][:root][:up]
|
383
|
-
)
|
384
|
-
)
|
385
|
-
|
386
|
-
# The domain is a redirect if at least one endpoint is up,
|
387
|
-
# and each one is *either* an external redirect or down entirely.
|
388
|
-
details[:redirect] = !!(
|
389
|
-
details[:up] and
|
390
|
-
(
|
391
|
-
combos[:http][:www][:redirect_external] or
|
392
|
-
!combos[:http][:www][:up] or
|
393
|
-
combos[:http][:www][:status] >= 400
|
394
|
-
) and
|
395
|
-
(
|
396
|
-
combos[:http][:root][:redirect_external] or
|
397
|
-
!combos[:http][:root][:up] or
|
398
|
-
combos[:http][:root][:status] >= 400
|
399
|
-
) and
|
400
|
-
(
|
401
|
-
combos[:https][:www][:redirect_external] or
|
402
|
-
!combos[:https][:www][:up] or
|
403
|
-
combos[:https][:www][:https_bad_name] or
|
404
|
-
combos[:https][:www][:status] >= 400
|
405
|
-
) and
|
406
|
-
(
|
407
|
-
combos[:https][:root][:redirect_external] or
|
408
|
-
!combos[:https][:root][:up] or
|
409
|
-
combos[:https][:root][:https_bad_name] or
|
410
|
-
combos[:https][:root][:status] >= 400
|
411
|
-
)
|
412
|
-
)
|
413
|
-
|
414
|
-
# OK, we've said a domain is a "redirect" domain.
|
415
|
-
# What does the domain redirect to?
|
416
|
-
if details[:redirect]
|
417
|
-
canon = combos[details[:canonical_protocol]][details[:canonical_endpoint]]
|
418
|
-
details[:redirect_to] = canon[:redirect_to]
|
419
|
-
else
|
420
|
-
details[:redirect_to] = nil
|
421
|
-
end
|
422
|
-
|
423
|
-
# HSTS on the canonical domain? (valid HTTPS checked in endpoint)
|
424
|
-
details[:hsts] = !!combos[:https][details[:canonical_endpoint]][:hsts]
|
425
|
-
details[:hsts_header] = combos[:https][details[:canonical_endpoint]][:hsts_header]
|
426
|
-
|
427
|
-
# HSTS on the entire domain?
|
428
|
-
details[:hsts_entire_domain] = !!(
|
429
|
-
combos[:https][:root][:hsts] and
|
430
|
-
combos[:https][:root][:hsts_details][:include_subdomains]
|
431
|
-
)
|
432
|
-
|
433
|
-
# HSTS preload-ready for the entire domain?
|
434
|
-
#
|
435
|
-
# Re-checks :hsts_entire_domain in case the :preload_ready
|
436
|
-
# flag ever changes its definition to not require include_subdomains.
|
437
|
-
|
438
|
-
details[:hsts_entire_domain_preload] = !!(
|
439
|
-
details[:hsts_entire_domain] and
|
440
|
-
combos[:https][:root][:hsts_details][:preload_ready]
|
441
|
-
)
|
442
|
-
|
443
|
-
details
|
444
|
-
end
|
445
|
-
|
446
|
-
def endpoints
|
447
|
-
https_www = http_endpoint(true, true)
|
448
|
-
http_www = http_endpoint(false, true)
|
449
|
-
https_root = http_endpoint(true, false)
|
450
|
-
http_root = http_endpoint(false, false)
|
451
|
-
|
452
|
-
{
|
453
|
-
https: {
|
454
|
-
www: https_www,
|
455
|
-
root: https_root
|
456
|
-
},
|
457
|
-
http: {
|
458
|
-
www: http_www,
|
459
|
-
root: http_root
|
55
|
+
def typhoeus_defaults
|
56
|
+
defaults = {
|
57
|
+
followlocation: false,
|
58
|
+
timeout: SiteInspector.timeout,
|
59
|
+
accept_encoding: 'gzip',
|
60
|
+
method: :head,
|
61
|
+
headers: {
|
62
|
+
'User-Agent' => "Mozilla/5.0 (compatible; SiteInspector/#{SiteInspector::VERSION}; +https://github.com/benbalter/site-inspector)"
|
63
|
+
}
|
460
64
|
}
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
# State of affairs at a particular endpoint.
|
465
|
-
def http_endpoint(ssl, www)
|
466
|
-
details = {}
|
467
|
-
|
468
|
-
# Don't follow redirects for first ping.
|
469
|
-
response = request(ssl, www, false)
|
470
|
-
|
471
|
-
|
472
|
-
# For HTTPS: examine the full range of possibilities.
|
473
|
-
if ssl
|
474
|
-
if response.return_code == :ok
|
475
|
-
details[:https_valid] = true
|
476
|
-
details[:https_bad_chain] = false
|
477
|
-
details[:https_bad_name] = false
|
478
|
-
|
479
|
-
# Bad certificate chain.
|
480
|
-
elsif response.return_code == :ssl_cacert
|
481
|
-
details[:https_valid] = false
|
482
|
-
details[:https_bad_chain] = true
|
483
|
-
response = request(ssl, www, false, false, true)
|
484
|
-
# Bad everything.
|
485
|
-
if response.return_code == :peer_failed_verification
|
486
|
-
details[:https_bad_name] = true
|
487
|
-
response = request(ssl, www, false, false, false)
|
488
|
-
end
|
489
|
-
|
490
|
-
# Bad hostname.
|
491
|
-
elsif response.return_code == :peer_failed_verification
|
492
|
-
details[:https_valid] = false
|
493
|
-
details[:https_bad_name] = true
|
494
|
-
response = request(ssl, www, false, true, false)
|
495
|
-
# Bad everything.
|
496
|
-
if response.return_code == :ssl_cacert
|
497
|
-
details[:https_bad_chain] = true
|
498
|
-
response = request(ssl, www, false, false, false)
|
499
|
-
end
|
500
|
-
|
501
|
-
# not sure what else would happen
|
502
|
-
elsif response.response_code != 0
|
503
|
-
details[:https_valid] = false
|
504
|
-
details[:https_unknown_issue] = response.return_code
|
505
|
-
end
|
65
|
+
defaults.merge! @typhoeus_options if @typhoeus_options
|
66
|
+
defaults
|
506
67
|
end
|
507
68
|
|
508
|
-
#
|
509
|
-
|
510
|
-
|
511
|
-
return details if !details[:up]
|
512
|
-
|
513
|
-
headers = Hash[response.headers.map{ |k,v| [k.downcase,v] }]
|
514
|
-
details[:headers] = headers
|
515
|
-
|
516
|
-
|
517
|
-
# HSTS only takes effect when delivered over valid HTTPS.
|
518
|
-
hsts = SiteInspector.hsts_parse(headers["strict-transport-security"])
|
519
|
-
|
520
|
-
details[:hsts] = !!(
|
521
|
-
ssl and
|
522
|
-
details[:https_valid] and
|
523
|
-
hsts[:enabled]
|
524
|
-
)
|
525
|
-
|
526
|
-
details[:hsts_header] = headers["strict-transport-security"]
|
527
|
-
details[:hsts_details] = hsts
|
528
|
-
|
529
|
-
|
530
|
-
# If it's a redirect, go find the ultimate response starting from this combo.
|
531
|
-
redirect_code = response.response_code.to_s.start_with?("3")
|
532
|
-
location_header = headers["location"]
|
533
|
-
if redirect_code and location_header
|
534
|
-
location_header = location_header.downcase
|
535
|
-
details[:redirect] = true
|
536
|
-
|
537
|
-
ultimate_response = request(ssl, www, true, !details[:https_bad_chain], !details[:https_bad_name])
|
538
|
-
uri_original = URI(ultimate_response.request.url)
|
539
|
-
|
540
|
-
# treat relative Location headers as having the original hostname
|
541
|
-
if location_header.start_with?("http:") or location_header.start_with?("https:")
|
542
|
-
uri_immediate = URI(URI.escape(location_header))
|
543
|
-
else
|
544
|
-
uri_immediate = URI.join(uri_original, URI.escape(location_header))
|
545
|
-
end
|
546
|
-
|
547
|
-
uri_eventual = URI(ultimate_response.effective_url.downcase)
|
548
|
-
|
549
|
-
# compare base domain names
|
550
|
-
base_original = PublicSuffix.parse(uri_original.hostname).domain
|
551
|
-
|
552
|
-
# if the redirects aren't to valid hostnames (e.g. IP addresses)
|
553
|
-
# then fine just compare them directly, they're not going to be
|
554
|
-
# identical anyway.
|
555
|
-
base_immediate = begin
|
556
|
-
PublicSuffix.parse(uri_immediate.hostname).domain
|
557
|
-
rescue PublicSuffix::DomainInvalid
|
558
|
-
uri_immediate.to_s
|
559
|
-
end
|
560
|
-
|
561
|
-
base_eventual = begin
|
562
|
-
PublicSuffix.parse(uri_eventual.hostname).domain
|
563
|
-
rescue PublicSuffix::DomainInvalid
|
564
|
-
uri_eventual.to_s
|
565
|
-
end
|
566
|
-
|
567
|
-
details[:redirect_immediately_to] = uri_immediate.to_s
|
568
|
-
details[:redirect_immediately_to_www] = !!uri_immediate.to_s.match(/^https?:\/\/www\./)
|
569
|
-
details[:redirect_immediately_to_https] = uri_immediate.to_s.start_with?("https://")
|
570
|
-
details[:redirect_immediately_external] = (base_original != base_immediate)
|
571
|
-
|
572
|
-
details[:redirect_to] = uri_eventual.to_s
|
573
|
-
details[:redirect_external] = (base_original != base_eventual)
|
574
|
-
|
575
|
-
# otherwise, mark all the redirect fields as false/null
|
576
|
-
else
|
577
|
-
details[:redirect] = false
|
578
|
-
details[:redirect_immediately_to] = nil
|
579
|
-
details[:redirect_immediately_to_www] = false
|
580
|
-
details[:redirect_immediately_to_https] = false
|
581
|
-
details[:redirect_immediately_external] = false
|
582
|
-
|
583
|
-
details[:redirect_to] = nil
|
584
|
-
details[:redirect_external] = false
|
69
|
+
# Returns a thread-safe, memoized hydra instance
|
70
|
+
def hydra
|
71
|
+
Typhoeus::Hydra.hydra
|
585
72
|
end
|
586
|
-
|
587
|
-
details
|
588
73
|
end
|
74
|
+
end
|
589
75
|
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
-
|
594
|
-
:uri => uri.to_s,
|
595
|
-
:live => !!response,
|
596
|
-
:ssl => https?,
|
597
|
-
:enforce_https => enforce_https?,
|
598
|
-
:non_www => non_www?,
|
599
|
-
:redirect => redirect,
|
600
|
-
:headers => headers
|
601
|
-
}
|
602
|
-
else
|
603
|
-
{
|
604
|
-
:domain => domain.to_s,
|
605
|
-
:uri => uri.to_s,
|
606
|
-
:government => government?,
|
607
|
-
:live => !!response,
|
608
|
-
:ssl => https?,
|
609
|
-
:enforce_https => enforce_https?,
|
610
|
-
:non_www => non_www?,
|
611
|
-
:redirect => redirect,
|
612
|
-
:ip => ip,
|
613
|
-
:hostname => hostname.to_s,
|
614
|
-
:ipv6 => ipv6?,
|
615
|
-
:dnssec => dnssec?,
|
616
|
-
:cdn => cdn,
|
617
|
-
:google_apps => google_apps?,
|
618
|
-
:cloud_provider => cloud_provider,
|
619
|
-
:server => server,
|
620
|
-
:cms => cms,
|
621
|
-
:analytics => analytics,
|
622
|
-
:javascript => javascript,
|
623
|
-
:advertising => advertising,
|
624
|
-
:slash_data => slash_data?,
|
625
|
-
:slash_developer => slash_developer?,
|
626
|
-
:data_dot_json => data_dot_json?,
|
627
|
-
:click_jacking_protection => click_jacking_protection?,
|
628
|
-
:content_security_policy => content_security_policy?,
|
629
|
-
:xss_protection => xss_protection?,
|
630
|
-
:secure_cookies => secure_cookies?,
|
631
|
-
:strict_transport_security => strict_transport_security?,
|
632
|
-
:headers => headers
|
633
|
-
}
|
634
|
-
end
|
635
|
-
end
|
76
|
+
if ENV['DEBUG']
|
77
|
+
Ethon.logger = Logger.new($stdout)
|
78
|
+
Ethon.logger.level = Logger::DEBUG
|
79
|
+
Typhoeus::Config.verbose = true
|
636
80
|
end
|
81
|
+
|
82
|
+
Typhoeus::Config.memoize = true
|
83
|
+
Typhoeus::Config.cache = SiteInspector.cache
|