site-inspector 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 081857089e6dd0832b6fd0553615c02c74ba271b
4
- data.tar.gz: aaa19892e68cf24c05898e44e2314413976819a1
3
+ metadata.gz: e4e41e2a1639e9f5f6e7f018ef58c664c834b2c2
4
+ data.tar.gz: 3205114fcaaaa11cf03ec1eb8fa2f5736b6a99f7
5
5
  SHA512:
6
- metadata.gz: 613c90276d90b35a8d74502893f6d4bf2579c84f4f399c34c7ee33c3722f92af29e12796d7c733a8503903e63eb85e49f940379ca40d41c55ce1aa5660437181
7
- data.tar.gz: 6ddf58171d7c76d8a0f5b029b94878a76e1b68b3becb6dc6c48b972389c9c94e48f5e7ff56ad52b3b00dab8b02aa6620c083920dbe97e2228de6d513ad52c8d1
6
+ metadata.gz: ffa69fcc3949abe434a476bf8bff0c65dbd4085b5c40f379c0f7b3bbb9eaf43ffc41527ea571fe211ac71d3911b58deaebf8446ee96a69d8a279c2877e87e3bb
7
+ data.tar.gz: b369d00e140c4b258b02f02e5b841f4ce26d84c0b7b32f10e6015a99673355b3a505194495fbf2bf5e9ecfabfed6e486db18572a56418bcfaf329b6acb746778
data/bin/site-inspector CHANGED
@@ -1,15 +1,29 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  require_relative "../lib/site-inspector"
4
- require "json"
4
+
5
+ require "oj"
5
6
 
6
7
  domain = ARGV[0]
8
+ http_mode = (ARGV[1] == "--http")
7
9
 
8
10
  if domain.to_s.empty?
9
- puts "Usage: site-inspector [DOMAIN]"
11
+ puts "Usage: site-inspector [DOMAIN] [--http]"
10
12
  exit 1
11
13
  end
12
14
 
13
- details = SiteInspector.new(domain).to_hash
15
+ # HTTP mode:
16
+ # * all details for possible endpoints
17
+ # * don't follow redirects
18
+ # * shorter timeout
19
+ if http_mode
20
+ site = SiteInspector.new(domain)
21
+ details = site.http
22
+
23
+ # Normal mode: autodetect canonical domain, sweep every attribute.
24
+ else
25
+ site = SiteInspector.new(domain)
26
+ details = site.to_hash
27
+ end
14
28
 
15
- puts JSON.pretty_generate(details)
29
+ puts Oj.dump(details, indent: 2, mode: :compat)
@@ -11,3 +11,48 @@ class SiteInspectorCache
11
11
  @memory[request] = response
12
12
  end
13
13
  end
14
+
15
+ class SiteInspectorDiskCache
16
+ def initialize(dir = nil, replace = false)
17
+ @dir = dir
18
+ @memory = {}
19
+ @replace = replace
20
+ end
21
+
22
+ def path(request)
23
+ File.join(@dir, request.cache_key)
24
+ end
25
+
26
+ def fetch(request)
27
+ if File.exist?(path(request))
28
+
29
+ if @replace
30
+ FileUtils.rm(path(request))
31
+ nil
32
+ else
33
+ contents = File.read(path(request))
34
+ begin
35
+ Marshal.load(contents)
36
+ rescue ArgumentError
37
+ FileUtils.rm(path(request))
38
+ nil
39
+ end
40
+ end
41
+ end
42
+ end
43
+
44
+ def store(request, response)
45
+ File.open(File.join(@dir, request.cache_key), "w") do |f|
46
+ f.write Marshal.dump(response)
47
+ end
48
+ end
49
+
50
+ def get(request)
51
+ @memory[request] || fetch(request)
52
+ end
53
+
54
+ def set(request, response)
55
+ store(request, response)
56
+ @memory[request] = response
57
+ end
58
+ end
@@ -1,7 +1,12 @@
1
1
  class SiteInspector
2
2
 
3
3
  def resolver
4
- @resolver ||= Dnsruby::Resolver.new
4
+ require "dnsruby"
5
+ @resolver ||= begin
6
+ resolver = Dnsruby::Resolver.new
7
+ resolver.config.nameserver = ["8.8.8.8", "8.8.4.4"]
8
+ resolver
9
+ end
5
10
  end
6
11
 
7
12
  def query(type="ANY")
@@ -14,12 +19,16 @@ class SiteInspector
14
19
  @dns ||= query
15
20
  end
16
21
 
22
+ def has_record?(type)
23
+ dns.any? { |record| record.type == type } || query(type).count != 0
24
+ end
25
+
17
26
  def dnssec?
18
- @dnssec ||= query("DNSKEY").count != 0
27
+ @dnssec ||= has_record? "DNSKEY"
19
28
  end
20
29
 
21
30
  def ipv6?
22
- @ipv6 ||= query("AAAA").count != 0
31
+ @ipv6 ||= has_record? "AAAA"
23
32
  end
24
33
 
25
34
  def detect_by_hostname(type)
@@ -64,12 +73,14 @@ class SiteInspector
64
73
  end
65
74
 
66
75
  def ip
76
+ require 'resolv'
67
77
  @ip ||= Resolv.getaddress domain.to_s
68
78
  rescue Resolv::ResolvError
69
79
  nil
70
80
  end
71
81
 
72
82
  def hostname
83
+ require 'resolv'
73
84
  @hostname ||= PublicSuffix.parse(Resolv.getname(ip))
74
85
  rescue Exception => e
75
86
  nil
@@ -1,5 +1,6 @@
1
1
  class SiteInspector
2
2
  def sniff(type)
3
+ require 'sniffles'
3
4
  results = Sniffles.sniff(body, type).select { |name, meta| meta[:found] == true }
4
5
  results.each { |name, result| result.delete :found} if results
5
6
  results
@@ -1,46 +1,106 @@
1
- require 'nokogiri'
1
+
2
+ # needed for HTTP analysis
2
3
  require 'open-uri'
3
- require 'public_suffix'
4
- require 'gman'
5
- require 'net/http'
6
- require "dnsruby"
7
- require 'yaml'
8
- require 'sniffles'
9
4
  require "addressable/uri"
5
+ require 'public_suffix'
10
6
  require 'typhoeus'
11
- require 'json'
12
- require 'resolv'
13
7
 
14
8
  require_relative 'site-inspector/cache'
9
+ require_relative 'site-inspector/headers'
15
10
  require_relative 'site-inspector/sniffer'
16
11
  require_relative 'site-inspector/dns'
17
12
  require_relative 'site-inspector/compliance'
18
- require_relative 'site-inspector/headers'
19
13
 
20
- Typhoeus::Config.cache = SiteInspectorCache.new
14
+
15
+ if ENV['CACHE']
16
+ Typhoeus::Config.cache = SiteInspectorDiskCache.new(ENV['CACHE'], ENV['CACHE_REPLACE'])
17
+ else
18
+ Typhoeus::Config.cache = SiteInspectorCache.new
19
+ end
21
20
 
22
21
  class SiteInspector
23
22
 
24
23
  def self.load_data(name)
24
+ require 'yaml'
25
25
  YAML.load_file File.expand_path "./data/#{name}.yml", File.dirname(__FILE__)
26
26
  end
27
27
 
28
- def initialize(domain)
28
+ # Utility parser for HSTS headers.
29
+ # RFC: http://tools.ietf.org/html/rfc6797
30
+ def self.hsts_parse(header)
31
+ # no hsts for you
32
+ nothing = {
33
+ max_age: nil,
34
+ include_subdomains: false,
35
+ preload: false,
36
+ enabled: false,
37
+ preload_ready: false
38
+ }
39
+
40
+ return nothing unless header and header.is_a?(String)
41
+
42
+ directives = header.split(/\s*;\s*/)
43
+
44
+ pairs = []
45
+ directives.each do |directive|
46
+ name, value = directive.downcase.split("=")
47
+
48
+ if value and value.start_with?("\"") and value.end_with?("\"")
49
+ value = value.sub(/^\"/, '')
50
+ value = value.sub(/\"$/, '')
51
+ end
52
+
53
+ pairs.push([name, value])
54
+ end
55
+
56
+ # reject invalid directives
57
+ fatal = pairs.any? do |name, value|
58
+ # TODO: more comprehensive rejection of characters
59
+ invalid_chars = /[\s\'\"]/
60
+ (name =~ invalid_chars) or (value =~ invalid_chars)
61
+ end
62
+
63
+ # good DAY, sir
64
+ return nothing if fatal
65
+
66
+ max_age_directive = pairs.find {|n, v| n == "max-age"}
67
+ max_age = max_age_directive ? max_age_directive[1].to_i : nil
68
+ include_subdomains = !!pairs.find {|n, v| n == "includesubdomains"}
69
+ preload = !!pairs.find {|n, v| n == "preload"}
70
+
71
+ enabled = !!(max_age and (max_age > 0))
72
+
73
+ # Google's minimum max-age for automatic preloading
74
+ eighteen_weeks = !!(max_age and (max_age >= 10886400))
75
+ preload_ready = !!(eighteen_weeks and include_subdomains and preload)
76
+
77
+ {
78
+ max_age: max_age,
79
+ include_subdomains: include_subdomains,
80
+ preload: preload,
81
+ enabled: enabled,
82
+ preload_ready: preload_ready
83
+ }
84
+ end
85
+
86
+ # makes no network requests
87
+ def initialize(domain, options = {})
29
88
  domain = domain.downcase
30
89
  domain = domain.sub /^https?\:/, ""
31
90
  domain = domain.sub /^\/+/, ""
32
91
  domain = domain.sub /^www\./, ""
33
92
  @uri = Addressable::URI.parse "//#{domain}"
34
93
  @domain = PublicSuffix.parse @uri.host
94
+ @timeout = options[:timeout] || 10
35
95
  end
36
96
 
37
97
  def inspect
38
98
  "<SiteInspector domain=\"#{domain}\">"
39
99
  end
40
100
 
41
- def uri(ssl=https?,www=www?)
101
+ def uri(ssl=enforce_https?,www=www?)
42
102
  uri = @uri.clone
43
- uri.host = "www.#{uri.host}" if www
103
+ uri.host = www ? "www.#{uri.host}" : uri.host
44
104
  uri.scheme = ssl ? "https" : "http"
45
105
  uri
46
106
  end
@@ -49,8 +109,13 @@ class SiteInspector
49
109
  www? ? PublicSuffix.parse("www.#{@uri.host}") : @domain
50
110
  end
51
111
 
52
- def request(ssl=false, www=false, followlocation=true)
53
- Typhoeus.get(uri(ssl, www), followlocation: followlocation, timeout: 10)
112
+ def request(ssl=false, www=false, followlocation=true, ssl_verifypeer=true, ssl_verifyhost=true)
113
+ to_get = uri(ssl, www)
114
+
115
+ # debugging
116
+ # puts "fetching: #{to_get}, #{followlocation ? "follow" : "no follow"}, #{ssl_verifypeer ? "verify peer, " : ""}#{ssl_verifyhost ? "verify host" : ""}"
117
+
118
+ Typhoeus.get(to_get, followlocation: followlocation, ssl_verifypeer: ssl_verifypeer, ssl_verifyhost: (ssl_verifyhost ? 2 : 0), timeout: @timeout)
54
119
  end
55
120
 
56
121
  def response
@@ -72,6 +137,7 @@ class SiteInspector
72
137
  end
73
138
 
74
139
  def doc
140
+ require 'nokogiri'
75
141
  @doc ||= Nokogiri::HTML response.body if response
76
142
  end
77
143
 
@@ -80,6 +146,7 @@ class SiteInspector
80
146
  end
81
147
 
82
148
  def government?
149
+ require 'gman'
83
150
  Gman.valid? domain.to_s
84
151
  end
85
152
 
@@ -101,7 +168,7 @@ class SiteInspector
101
168
  end
102
169
 
103
170
  def www?
104
- response && response.effective_url && !!response.effective_url.match(/https?:\/\/www\./)
171
+ response && response.effective_url && !!response.effective_url.match(/^https?:\/\/www\./)
105
172
  end
106
173
 
107
174
  def non_www?
@@ -123,40 +190,447 @@ class SiteInspector
123
190
  end
124
191
  end
125
192
 
126
- def to_json
127
- to_hash.to_json
193
+ def http
194
+ details = {
195
+ endpoints: endpoints
196
+ }
197
+
198
+ # convenient shorthand for the extensive statements to come
199
+ combos = details[:endpoints]
200
+
201
+ # A domain is "canonically" at www if:
202
+ # * at least one of its www endpoints responds
203
+ # * both root endpoints are either down or redirect *somewhere*
204
+ # * either both root endpoints are down, *or* at least one
205
+ # root endpoint redirect should immediately go to
206
+ # an *internal* www endpoint
207
+ # This is meant to affirm situations like:
208
+ # http:// -> https:// -> https://www
209
+ # https:// -> http:// -> https://www
210
+ # and meant to avoid affirming situations like:
211
+ # http:// -> http://non-www,
212
+ # http://www -> http://non-www
213
+ # or like:
214
+ # https:// -> 200, http:// -> http://www
215
+
216
+ www = !!(
217
+ (
218
+ combos[:https][:www][:up] or
219
+ combos[:http][:www][:up]
220
+ ) and (
221
+ (
222
+ combos[:https][:root][:redirect] or
223
+ !combos[:https][:root][:up] or
224
+ combos[:https][:root][:https_bad_name] or
225
+ !combos[:https][:root][:status].to_s.start_with?("2")
226
+ ) and (
227
+ combos[:http][:root][:redirect] or
228
+ !combos[:http][:root][:up] or
229
+ !combos[:http][:root][:status].to_s.start_with?("2")
230
+ )
231
+ ) and (
232
+ (
233
+ (
234
+ !combos[:https][:root][:up] or
235
+ combos[:https][:root][:https_bad_name] or
236
+ !combos[:https][:root][:status].to_s.start_with?("2")
237
+ ) and
238
+ (
239
+ !combos[:http][:root][:up] or
240
+ !combos[:http][:root][:status].to_s.start_with?("2")
241
+ )
242
+ ) or
243
+ (
244
+ combos[:https][:root][:redirect_immediately_to_www] and
245
+ !combos[:https][:root][:redirect_immediately_external]
246
+ ) or
247
+ (
248
+ combos[:http][:root][:redirect_immediately_to_www] and
249
+ !combos[:http][:root][:redirect_immediately_external]
250
+ )
251
+ )
252
+ )
253
+
254
+ # A domain is "canonically" at https if:
255
+ # * at least one of its https endpoints is live and
256
+ # doesn't have an invalid hostname
257
+ # * both http endpoints are either down or redirect *somewhere*
258
+ # * at least one http endpoint redirects immediately to
259
+ # an *internal* https endpoint
260
+ # This is meant to affirm situations like:
261
+ # http:// -> http://www -> https://
262
+ # https:// -> http:// -> https://www
263
+ # and meant to avoid affirming situations like:
264
+ # http:// -> http://non-www
265
+ # http://www -> http://non-www
266
+ # or:
267
+ # http:// -> 200, http://www -> https://www
268
+ #
269
+ # It allows a site to be canonically HTTPS if the cert has
270
+ # a valid hostname but invalid chain issues.
271
+
272
+ https = !!(
273
+ (
274
+ (
275
+ combos[:https][:root][:up] and
276
+ !combos[:https][:root][:https_bad_name]
277
+ ) or
278
+ (
279
+ combos[:https][:www][:up] and
280
+ !combos[:https][:www][:https_bad_name]
281
+ )
282
+ ) and (
283
+ (
284
+ combos[:http][:root][:redirect] or
285
+ !combos[:http][:root][:up] or
286
+ !combos[:http][:root][:status].to_s.start_with?("2")
287
+ ) and (
288
+ combos[:http][:www][:redirect] or
289
+ !combos[:http][:www][:up] or
290
+ !combos[:http][:www][:status].to_s.start_with?("2")
291
+ )
292
+ ) and (
293
+ (
294
+ combos[:http][:root][:redirect_immediately_to_https] and
295
+ !combos[:http][:root][:redirect_immediately_external]
296
+ ) or (
297
+ combos[:http][:www][:redirect_immediately_to_https] and
298
+ !combos[:http][:www][:redirect_immediately_external]
299
+ )
300
+ )
301
+ )
302
+
303
+ details[:canonical_endpoint] = www ? :www : :root
304
+ details[:canonical_protocol] = https ? :https : :http
305
+ details[:canonical] = uri(https, www).to_s
306
+
307
+ # If any endpoint is up, the domain is up.
308
+ details[:up] = !!(
309
+ combos[:https][:www][:up] or
310
+ combos[:https][:root][:up] or
311
+ combos[:http][:www][:up] or
312
+ combos[:http][:root][:up]
313
+ )
314
+
315
+ # A domain's root is broken if neither protocol can connect.
316
+ details[:broken_root] = !!(
317
+ !combos[:https][:root][:up] and
318
+ !combos[:http][:root][:up]
319
+ )
320
+
321
+ # A domain's www is broken if neither protocol can connect.
322
+ details[:broken_www] = !!(
323
+ !combos[:https][:www][:up] and
324
+ !combos[:http][:www][:up]
325
+ )
326
+
327
+ # HTTPS is "supported" (different than "canonical" or "enforced") if:
328
+ #
329
+ # * Either of the HTTPS endpoints is listening, and doesn't have
330
+ # an invalid hostname.
331
+ details[:support_https] = !!(
332
+ (
333
+ (combos[:https][:root][:status] != 0) and
334
+ !combos[:https][:root][:https_bad_name]
335
+ ) or (
336
+ (combos[:https][:www][:status] != 0) and
337
+ !combos[:https][:www][:https_bad_name]
338
+ )
339
+ )
340
+
341
+ # we can say that a canonical HTTPS site "defaults" to HTTPS,
342
+ # even if it doesn't *strictly* enforce it (e.g. having a www
343
+ # subdomain first to go HTTP root before HTTPS root).
344
+ details[:default_https] = https
345
+
346
+ # HTTPS is "downgraded" if both:
347
+ #
348
+ # * HTTPS is supported, and
349
+ # * The 'canonical' endpoint gets an immediate internal redirect to HTTP.
350
+
351
+ details[:downgrade_https] = !!(
352
+ details[:support_https] and
353
+ (
354
+ combos[:https][details[:canonical_endpoint]][:redirect] and
355
+ !combos[:https][details[:canonical_endpoint]][:redirect_immediately_external] and
356
+ !combos[:https][details[:canonical_endpoint]][:redirect_immediately_to_https]
357
+ )
358
+ )
359
+
360
+ # HTTPS is enforced if one of the HTTPS endpoints is "live",
361
+ # and if both *HTTP* endpoints are either:
362
+ #
363
+ # * down, or
364
+ # * redirect immediately to HTTPS.
365
+ #
366
+ # This is different than whether a domain is "canonically" HTTPS.
367
+ #
368
+ # * an HTTP redirect can go to HTTPS on another domain, as long
369
+ # as it's immediate.
370
+ # * a domain with an invalid cert can still be enforcing HTTPS.
371
+ details[:enforce_https] = !!(
372
+ (
373
+ !combos[:http][:www][:up] or
374
+ (combos[:http][:www][:redirect_immediately_to_https])
375
+ ) and
376
+ (
377
+ !combos[:http][:root][:up] or
378
+ (combos[:http][:root][:redirect_immediately_to_https])
379
+ ) and
380
+ (
381
+ combos[:https][:www][:up] or
382
+ combos[:https][:root][:up]
383
+ )
384
+ )
385
+
386
+ # The domain is a redirect if at least one endpoint is up,
387
+ # and each one is *either* an external redirect or down entirely.
388
+ details[:redirect] = !!(
389
+ details[:up] and
390
+ (
391
+ combos[:http][:www][:redirect_external] or
392
+ !combos[:http][:www][:up] or
393
+ combos[:http][:www][:status] >= 400
394
+ ) and
395
+ (
396
+ combos[:http][:root][:redirect_external] or
397
+ !combos[:http][:root][:up] or
398
+ combos[:http][:root][:status] >= 400
399
+ ) and
400
+ (
401
+ combos[:https][:www][:redirect_external] or
402
+ !combos[:https][:www][:up] or
403
+ combos[:https][:www][:https_bad_name] or
404
+ combos[:https][:www][:status] >= 400
405
+ ) and
406
+ (
407
+ combos[:https][:root][:redirect_external] or
408
+ !combos[:https][:root][:up] or
409
+ combos[:https][:root][:https_bad_name] or
410
+ combos[:https][:root][:status] >= 400
411
+ )
412
+ )
413
+
414
+ # OK, we've said a domain is a "redirect" domain.
415
+ # What does the domain redirect to?
416
+ if details[:redirect]
417
+ canon = combos[details[:canonical_protocol]][details[:canonical_endpoint]]
418
+ details[:redirect_to] = canon[:redirect_to]
419
+ else
420
+ details[:redirect_to] = nil
421
+ end
422
+
423
+ # HSTS on the canonical domain? (valid HTTPS checked in endpoint)
424
+ details[:hsts] = !!combos[:https][details[:canonical_endpoint]][:hsts]
425
+ details[:hsts_header] = combos[:https][details[:canonical_endpoint]][:hsts_header]
426
+
427
+ # HSTS on the entire domain?
428
+ details[:hsts_entire_domain] = !!(
429
+ combos[:https][:root][:hsts] and
430
+ combos[:https][:root][:hsts_details][:include_subdomains]
431
+ )
432
+
433
+ # HSTS preload-ready for the entire domain?
434
+ #
435
+ # Re-checks :hsts_entire_domain in case the :preload_ready
436
+ # flag ever changes its definition to not require include_subdomains.
437
+
438
+ details[:hsts_entire_domain_preload] = !!(
439
+ details[:hsts_entire_domain] and
440
+ combos[:https][:root][:hsts_details][:preload_ready]
441
+ )
442
+
443
+ details
128
444
  end
129
445
 
130
- def to_hash
446
+ def endpoints
447
+ https_www = http_endpoint(true, true)
448
+ http_www = http_endpoint(false, true)
449
+ https_root = http_endpoint(true, false)
450
+ http_root = http_endpoint(false, false)
451
+
131
452
  {
132
- :domain => domain.to_s,
133
- :uri => uri.to_s,
134
- :government => government?,
135
- :live => !!response,
136
- :ssl => https?,
137
- :enforce_https => enforce_https?,
138
- :non_www => non_www?,
139
- :redirect => redirect,
140
- :ip => ip,
141
- :hostname => hostname.to_s,
142
- :ipv6 => ipv6?,
143
- :dnssec => dnssec?,
144
- :cdn => cdn,
145
- :google_apps => google_apps?,
146
- :cloud_provider => cloud_provider,
147
- :server => server,
148
- :cms => cms,
149
- :analytics => analytics,
150
- :javascript => javascript,
151
- :advertising => advertising,
152
- :slash_data => slash_data?,
153
- :slash_developer => slash_developer?,
154
- :data_dot_json => data_dot_json?,
155
- :click_jacking_protection => click_jacking_protection?,
156
- :content_security_policy => content_security_policy?,
157
- :xss_protection => xss_protection?,
158
- :secure_cookies => secure_cookies?,
159
- :strict_transport_security => strict_transport_security?
453
+ https: {
454
+ www: https_www,
455
+ root: https_root
456
+ },
457
+ http: {
458
+ www: http_www,
459
+ root: http_root
460
+ }
160
461
  }
161
462
  end
463
+
464
+ # State of affairs at a particular endpoint.
465
+ def http_endpoint(ssl, www)
466
+ details = {}
467
+
468
+ # Don't follow redirects for first ping.
469
+ response = request(ssl, www, false)
470
+
471
+
472
+ # For HTTPS: examine the full range of possibilities.
473
+ if ssl
474
+ if response.return_code == :ok
475
+ details[:https_valid] = true
476
+ details[:https_bad_chain] = false
477
+ details[:https_bad_name] = false
478
+
479
+ # Bad certificate chain.
480
+ elsif response.return_code == :ssl_cacert
481
+ details[:https_valid] = false
482
+ details[:https_bad_chain] = true
483
+ response = request(ssl, www, false, false, true)
484
+ # Bad everything.
485
+ if response.return_code == :peer_failed_verification
486
+ details[:https_bad_name] = true
487
+ response = request(ssl, www, false, false, false)
488
+ end
489
+
490
+ # Bad hostname.
491
+ elsif response.return_code == :peer_failed_verification
492
+ details[:https_valid] = false
493
+ details[:https_bad_name] = true
494
+ response = request(ssl, www, false, true, false)
495
+ # Bad everything.
496
+ if response.return_code == :ssl_cacert
497
+ details[:https_bad_chain] = true
498
+ response = request(ssl, www, false, false, false)
499
+ end
500
+
501
+ # not sure what else would happen
502
+ elsif response.response_code != 0
503
+ details[:https_valid] = false
504
+ details[:https_unknown_issue] = response.return_code
505
+ end
506
+ end
507
+
508
+ # If we ended up with a failure, return it.
509
+ details[:status] = response.response_code
510
+ details[:up] = (response.response_code != 0)
511
+ return details if !details[:up]
512
+
513
+ headers = Hash[response.headers.map{ |k,v| [k.downcase,v] }]
514
+ details[:headers] = headers
515
+
516
+
517
+ # HSTS only takes effect when delivered over valid HTTPS.
518
+ hsts = SiteInspector.hsts_parse(headers["strict-transport-security"])
519
+
520
+ details[:hsts] = !!(
521
+ ssl and
522
+ details[:https_valid] and
523
+ hsts[:enabled]
524
+ )
525
+
526
+ details[:hsts_header] = headers["strict-transport-security"]
527
+ details[:hsts_details] = hsts
528
+
529
+
530
+ # If it's a redirect, go find the ultimate response starting from this combo.
531
+ redirect_code = response.response_code.to_s.start_with?("3")
532
+ location_header = headers["location"]
533
+ if redirect_code and location_header
534
+ location_header = location_header.downcase
535
+ details[:redirect] = true
536
+
537
+ ultimate_response = request(ssl, www, true, !details[:https_bad_chain], !details[:https_bad_name])
538
+ uri_original = URI(ultimate_response.request.url)
539
+
540
+ # treat relative Location headers as having the original hostname
541
+ if location_header.start_with?("http:") or location_header.start_with?("https:")
542
+ uri_immediate = URI(URI.escape(location_header))
543
+ else
544
+ uri_immediate = URI.join(uri_original, URI.escape(location_header))
545
+ end
546
+
547
+ uri_eventual = URI(ultimate_response.effective_url.downcase)
548
+
549
+ # compare base domain names
550
+ base_original = PublicSuffix.parse(uri_original.hostname).domain
551
+
552
+ # if the redirects aren't to valid hostnames (e.g. IP addresses)
553
+ # then fine just compare them directly, they're not going to be
554
+ # identical anyway.
555
+ base_immediate = begin
556
+ PublicSuffix.parse(uri_immediate.hostname).domain
557
+ rescue PublicSuffix::DomainInvalid
558
+ uri_immediate.to_s
559
+ end
560
+
561
+ base_eventual = begin
562
+ PublicSuffix.parse(uri_eventual.hostname).domain
563
+ rescue PublicSuffix::DomainInvalid
564
+ uri_eventual.to_s
565
+ end
566
+
567
+ details[:redirect_immediately_to] = uri_immediate.to_s
568
+ details[:redirect_immediately_to_www] = !!uri_immediate.to_s.match(/^https?:\/\/www\./)
569
+ details[:redirect_immediately_to_https] = uri_immediate.to_s.start_with?("https://")
570
+ details[:redirect_immediately_external] = (base_original != base_immediate)
571
+
572
+ details[:redirect_to] = uri_eventual.to_s
573
+ details[:redirect_external] = (base_original != base_eventual)
574
+
575
+ # otherwise, mark all the redirect fields as false/null
576
+ else
577
+ details[:redirect] = false
578
+ details[:redirect_immediately_to] = nil
579
+ details[:redirect_immediately_to_www] = false
580
+ details[:redirect_immediately_to_https] = false
581
+ details[:redirect_immediately_external] = false
582
+
583
+ details[:redirect_to] = nil
584
+ details[:redirect_external] = false
585
+ end
586
+
587
+ details
588
+ end
589
+
590
+ def to_hash(http_only=false)
591
+ if http_only
592
+ {
593
+ :domain => domain.to_s,
594
+ :uri => uri.to_s,
595
+ :live => !!response,
596
+ :ssl => https?,
597
+ :enforce_https => enforce_https?,
598
+ :non_www => non_www?,
599
+ :redirect => redirect,
600
+ :headers => headers
601
+ }
602
+ else
603
+ {
604
+ :domain => domain.to_s,
605
+ :uri => uri.to_s,
606
+ :government => government?,
607
+ :live => !!response,
608
+ :ssl => https?,
609
+ :enforce_https => enforce_https?,
610
+ :non_www => non_www?,
611
+ :redirect => redirect,
612
+ :ip => ip,
613
+ :hostname => hostname.to_s,
614
+ :ipv6 => ipv6?,
615
+ :dnssec => dnssec?,
616
+ :cdn => cdn,
617
+ :google_apps => google_apps?,
618
+ :cloud_provider => cloud_provider,
619
+ :server => server,
620
+ :cms => cms,
621
+ :analytics => analytics,
622
+ :javascript => javascript,
623
+ :advertising => advertising,
624
+ :slash_data => slash_data?,
625
+ :slash_developer => slash_developer?,
626
+ :data_dot_json => data_dot_json?,
627
+ :click_jacking_protection => click_jacking_protection?,
628
+ :content_security_policy => content_security_policy?,
629
+ :xss_protection => xss_protection?,
630
+ :secure_cookies => secure_cookies?,
631
+ :strict_transport_security => strict_transport_security?,
632
+ :headers => headers
633
+ }
634
+ end
635
+ end
162
636
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: site-inspector
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ben Balter
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-02-13 00:00:00.000000000 Z
11
+ date: 2015-06-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -94,6 +94,20 @@ dependencies:
94
94
  - - "~>"
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0.6'
97
+ - !ruby/object:Gem::Dependency
98
+ name: oj
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '2.11'
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '2.11'
97
111
  - !ruby/object:Gem::Dependency
98
112
  name: pry
99
113
  requirement: !ruby/object:Gem::Requirement