site-inspector 1.0.1 → 1.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 081857089e6dd0832b6fd0553615c02c74ba271b
4
- data.tar.gz: aaa19892e68cf24c05898e44e2314413976819a1
3
+ metadata.gz: e4e41e2a1639e9f5f6e7f018ef58c664c834b2c2
4
+ data.tar.gz: 3205114fcaaaa11cf03ec1eb8fa2f5736b6a99f7
5
5
  SHA512:
6
- metadata.gz: 613c90276d90b35a8d74502893f6d4bf2579c84f4f399c34c7ee33c3722f92af29e12796d7c733a8503903e63eb85e49f940379ca40d41c55ce1aa5660437181
7
- data.tar.gz: 6ddf58171d7c76d8a0f5b029b94878a76e1b68b3becb6dc6c48b972389c9c94e48f5e7ff56ad52b3b00dab8b02aa6620c083920dbe97e2228de6d513ad52c8d1
6
+ metadata.gz: ffa69fcc3949abe434a476bf8bff0c65dbd4085b5c40f379c0f7b3bbb9eaf43ffc41527ea571fe211ac71d3911b58deaebf8446ee96a69d8a279c2877e87e3bb
7
+ data.tar.gz: b369d00e140c4b258b02f02e5b841f4ce26d84c0b7b32f10e6015a99673355b3a505194495fbf2bf5e9ecfabfed6e486db18572a56418bcfaf329b6acb746778
data/bin/site-inspector CHANGED
@@ -1,15 +1,29 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  require_relative "../lib/site-inspector"
4
- require "json"
4
+
5
+ require "oj"
5
6
 
6
7
  domain = ARGV[0]
8
+ http_mode = (ARGV[1] == "--http")
7
9
 
8
10
  if domain.to_s.empty?
9
- puts "Usage: site-inspector [DOMAIN]"
11
+ puts "Usage: site-inspector [DOMAIN] [--http]"
10
12
  exit 1
11
13
  end
12
14
 
13
- details = SiteInspector.new(domain).to_hash
15
+ # HTTP mode:
16
+ # * all details for possible endpoints
17
+ # * don't follow redirects
18
+ # * shorter timeout
19
+ if http_mode
20
+ site = SiteInspector.new(domain)
21
+ details = site.http
22
+
23
+ # Normal mode: autodetect canonical domain, sweep every attribute.
24
+ else
25
+ site = SiteInspector.new(domain)
26
+ details = site.to_hash
27
+ end
14
28
 
15
- puts JSON.pretty_generate(details)
29
+ puts Oj.dump(details, indent: 2, mode: :compat)
@@ -11,3 +11,48 @@ class SiteInspectorCache
11
11
  @memory[request] = response
12
12
  end
13
13
  end
14
+
15
+ class SiteInspectorDiskCache
16
+ def initialize(dir = nil, replace = false)
17
+ @dir = dir
18
+ @memory = {}
19
+ @replace = replace
20
+ end
21
+
22
+ def path(request)
23
+ File.join(@dir, request.cache_key)
24
+ end
25
+
26
+ def fetch(request)
27
+ if File.exist?(path(request))
28
+
29
+ if @replace
30
+ FileUtils.rm(path(request))
31
+ nil
32
+ else
33
+ contents = File.read(path(request))
34
+ begin
35
+ Marshal.load(contents)
36
+ rescue ArgumentError
37
+ FileUtils.rm(path(request))
38
+ nil
39
+ end
40
+ end
41
+ end
42
+ end
43
+
44
+ def store(request, response)
45
+ File.open(File.join(@dir, request.cache_key), "w") do |f|
46
+ f.write Marshal.dump(response)
47
+ end
48
+ end
49
+
50
+ def get(request)
51
+ @memory[request] || fetch(request)
52
+ end
53
+
54
+ def set(request, response)
55
+ store(request, response)
56
+ @memory[request] = response
57
+ end
58
+ end
@@ -1,7 +1,12 @@
1
1
  class SiteInspector
2
2
 
3
3
  def resolver
4
- @resolver ||= Dnsruby::Resolver.new
4
+ require "dnsruby"
5
+ @resolver ||= begin
6
+ resolver = Dnsruby::Resolver.new
7
+ resolver.config.nameserver = ["8.8.8.8", "8.8.4.4"]
8
+ resolver
9
+ end
5
10
  end
6
11
 
7
12
  def query(type="ANY")
@@ -14,12 +19,16 @@ class SiteInspector
14
19
  @dns ||= query
15
20
  end
16
21
 
22
+ def has_record?(type)
23
+ dns.any? { |record| record.type == type } || query(type).count != 0
24
+ end
25
+
17
26
  def dnssec?
18
- @dnssec ||= query("DNSKEY").count != 0
27
+ @dnssec ||= has_record? "DNSKEY"
19
28
  end
20
29
 
21
30
  def ipv6?
22
- @ipv6 ||= query("AAAA").count != 0
31
+ @ipv6 ||= has_record? "AAAA"
23
32
  end
24
33
 
25
34
  def detect_by_hostname(type)
@@ -64,12 +73,14 @@ class SiteInspector
64
73
  end
65
74
 
66
75
  def ip
76
+ require 'resolv'
67
77
  @ip ||= Resolv.getaddress domain.to_s
68
78
  rescue Resolv::ResolvError
69
79
  nil
70
80
  end
71
81
 
72
82
  def hostname
83
+ require 'resolv'
73
84
  @hostname ||= PublicSuffix.parse(Resolv.getname(ip))
74
85
  rescue Exception => e
75
86
  nil
@@ -1,5 +1,6 @@
1
1
  class SiteInspector
2
2
  def sniff(type)
3
+ require 'sniffles'
3
4
  results = Sniffles.sniff(body, type).select { |name, meta| meta[:found] == true }
4
5
  results.each { |name, result| result.delete :found} if results
5
6
  results
@@ -1,46 +1,106 @@
1
- require 'nokogiri'
1
+
2
+ # needed for HTTP analysis
2
3
  require 'open-uri'
3
- require 'public_suffix'
4
- require 'gman'
5
- require 'net/http'
6
- require "dnsruby"
7
- require 'yaml'
8
- require 'sniffles'
9
4
  require "addressable/uri"
5
+ require 'public_suffix'
10
6
  require 'typhoeus'
11
- require 'json'
12
- require 'resolv'
13
7
 
14
8
  require_relative 'site-inspector/cache'
9
+ require_relative 'site-inspector/headers'
15
10
  require_relative 'site-inspector/sniffer'
16
11
  require_relative 'site-inspector/dns'
17
12
  require_relative 'site-inspector/compliance'
18
- require_relative 'site-inspector/headers'
19
13
 
20
- Typhoeus::Config.cache = SiteInspectorCache.new
14
+
15
+ if ENV['CACHE']
16
+ Typhoeus::Config.cache = SiteInspectorDiskCache.new(ENV['CACHE'], ENV['CACHE_REPLACE'])
17
+ else
18
+ Typhoeus::Config.cache = SiteInspectorCache.new
19
+ end
21
20
 
22
21
  class SiteInspector
23
22
 
24
23
  def self.load_data(name)
24
+ require 'yaml'
25
25
  YAML.load_file File.expand_path "./data/#{name}.yml", File.dirname(__FILE__)
26
26
  end
27
27
 
28
- def initialize(domain)
28
+ # Utility parser for HSTS headers.
29
+ # RFC: http://tools.ietf.org/html/rfc6797
30
+ def self.hsts_parse(header)
31
+ # no hsts for you
32
+ nothing = {
33
+ max_age: nil,
34
+ include_subdomains: false,
35
+ preload: false,
36
+ enabled: false,
37
+ preload_ready: false
38
+ }
39
+
40
+ return nothing unless header and header.is_a?(String)
41
+
42
+ directives = header.split(/\s*;\s*/)
43
+
44
+ pairs = []
45
+ directives.each do |directive|
46
+ name, value = directive.downcase.split("=")
47
+
48
+ if value and value.start_with?("\"") and value.end_with?("\"")
49
+ value = value.sub(/^\"/, '')
50
+ value = value.sub(/\"$/, '')
51
+ end
52
+
53
+ pairs.push([name, value])
54
+ end
55
+
56
+ # reject invalid directives
57
+ fatal = pairs.any? do |name, value|
58
+ # TODO: more comprehensive rejection of characters
59
+ invalid_chars = /[\s\'\"]/
60
+ (name =~ invalid_chars) or (value =~ invalid_chars)
61
+ end
62
+
63
+ # good DAY, sir
64
+ return nothing if fatal
65
+
66
+ max_age_directive = pairs.find {|n, v| n == "max-age"}
67
+ max_age = max_age_directive ? max_age_directive[1].to_i : nil
68
+ include_subdomains = !!pairs.find {|n, v| n == "includesubdomains"}
69
+ preload = !!pairs.find {|n, v| n == "preload"}
70
+
71
+ enabled = !!(max_age and (max_age > 0))
72
+
73
+ # Google's minimum max-age for automatic preloading
74
+ eighteen_weeks = !!(max_age and (max_age >= 10886400))
75
+ preload_ready = !!(eighteen_weeks and include_subdomains and preload)
76
+
77
+ {
78
+ max_age: max_age,
79
+ include_subdomains: include_subdomains,
80
+ preload: preload,
81
+ enabled: enabled,
82
+ preload_ready: preload_ready
83
+ }
84
+ end
85
+
86
+ # makes no network requests
87
+ def initialize(domain, options = {})
29
88
  domain = domain.downcase
30
89
  domain = domain.sub /^https?\:/, ""
31
90
  domain = domain.sub /^\/+/, ""
32
91
  domain = domain.sub /^www\./, ""
33
92
  @uri = Addressable::URI.parse "//#{domain}"
34
93
  @domain = PublicSuffix.parse @uri.host
94
+ @timeout = options[:timeout] || 10
35
95
  end
36
96
 
37
97
  def inspect
38
98
  "<SiteInspector domain=\"#{domain}\">"
39
99
  end
40
100
 
41
- def uri(ssl=https?,www=www?)
101
+ def uri(ssl=enforce_https?,www=www?)
42
102
  uri = @uri.clone
43
- uri.host = "www.#{uri.host}" if www
103
+ uri.host = www ? "www.#{uri.host}" : uri.host
44
104
  uri.scheme = ssl ? "https" : "http"
45
105
  uri
46
106
  end
@@ -49,8 +109,13 @@ class SiteInspector
49
109
  www? ? PublicSuffix.parse("www.#{@uri.host}") : @domain
50
110
  end
51
111
 
52
- def request(ssl=false, www=false, followlocation=true)
53
- Typhoeus.get(uri(ssl, www), followlocation: followlocation, timeout: 10)
112
+ def request(ssl=false, www=false, followlocation=true, ssl_verifypeer=true, ssl_verifyhost=true)
113
+ to_get = uri(ssl, www)
114
+
115
+ # debugging
116
+ # puts "fetching: #{to_get}, #{followlocation ? "follow" : "no follow"}, #{ssl_verifypeer ? "verify peer, " : ""}#{ssl_verifyhost ? "verify host" : ""}"
117
+
118
+ Typhoeus.get(to_get, followlocation: followlocation, ssl_verifypeer: ssl_verifypeer, ssl_verifyhost: (ssl_verifyhost ? 2 : 0), timeout: @timeout)
54
119
  end
55
120
 
56
121
  def response
@@ -72,6 +137,7 @@ class SiteInspector
72
137
  end
73
138
 
74
139
  def doc
140
+ require 'nokogiri'
75
141
  @doc ||= Nokogiri::HTML response.body if response
76
142
  end
77
143
 
@@ -80,6 +146,7 @@ class SiteInspector
80
146
  end
81
147
 
82
148
  def government?
149
+ require 'gman'
83
150
  Gman.valid? domain.to_s
84
151
  end
85
152
 
@@ -101,7 +168,7 @@ class SiteInspector
101
168
  end
102
169
 
103
170
  def www?
104
- response && response.effective_url && !!response.effective_url.match(/https?:\/\/www\./)
171
+ response && response.effective_url && !!response.effective_url.match(/^https?:\/\/www\./)
105
172
  end
106
173
 
107
174
  def non_www?
@@ -123,40 +190,447 @@ class SiteInspector
123
190
  end
124
191
  end
125
192
 
126
- def to_json
127
- to_hash.to_json
193
+ def http
194
+ details = {
195
+ endpoints: endpoints
196
+ }
197
+
198
+ # convenient shorthand for the extensive statements to come
199
+ combos = details[:endpoints]
200
+
201
+ # A domain is "canonically" at www if:
202
+ # * at least one of its www endpoints responds
203
+ # * both root endpoints are either down or redirect *somewhere*
204
+ # * either both root endpoints are down, *or* at least one
205
+ # root endpoint redirect should immediately go to
206
+ # an *internal* www endpoint
207
+ # This is meant to affirm situations like:
208
+ # http:// -> https:// -> https://www
209
+ # https:// -> http:// -> https://www
210
+ # and meant to avoid affirming situations like:
211
+ # http:// -> http://non-www,
212
+ # http://www -> http://non-www
213
+ # or like:
214
+ # https:// -> 200, http:// -> http://www
215
+
216
+ www = !!(
217
+ (
218
+ combos[:https][:www][:up] or
219
+ combos[:http][:www][:up]
220
+ ) and (
221
+ (
222
+ combos[:https][:root][:redirect] or
223
+ !combos[:https][:root][:up] or
224
+ combos[:https][:root][:https_bad_name] or
225
+ !combos[:https][:root][:status].to_s.start_with?("2")
226
+ ) and (
227
+ combos[:http][:root][:redirect] or
228
+ !combos[:http][:root][:up] or
229
+ !combos[:http][:root][:status].to_s.start_with?("2")
230
+ )
231
+ ) and (
232
+ (
233
+ (
234
+ !combos[:https][:root][:up] or
235
+ combos[:https][:root][:https_bad_name] or
236
+ !combos[:https][:root][:status].to_s.start_with?("2")
237
+ ) and
238
+ (
239
+ !combos[:http][:root][:up] or
240
+ !combos[:http][:root][:status].to_s.start_with?("2")
241
+ )
242
+ ) or
243
+ (
244
+ combos[:https][:root][:redirect_immediately_to_www] and
245
+ !combos[:https][:root][:redirect_immediately_external]
246
+ ) or
247
+ (
248
+ combos[:http][:root][:redirect_immediately_to_www] and
249
+ !combos[:http][:root][:redirect_immediately_external]
250
+ )
251
+ )
252
+ )
253
+
254
+ # A domain is "canonically" at https if:
255
+ # * at least one of its https endpoints is live and
256
+ # doesn't have an invalid hostname
257
+ # * both http endpoints are either down or redirect *somewhere*
258
+ # * at least one http endpoint redirects immediately to
259
+ # an *internal* https endpoint
260
+ # This is meant to affirm situations like:
261
+ # http:// -> http://www -> https://
262
+ # https:// -> http:// -> https://www
263
+ # and meant to avoid affirming situations like:
264
+ # http:// -> http://non-www
265
+ # http://www -> http://non-www
266
+ # or:
267
+ # http:// -> 200, http://www -> https://www
268
+ #
269
+ # It allows a site to be canonically HTTPS if the cert has
270
+ # a valid hostname but invalid chain issues.
271
+
272
+ https = !!(
273
+ (
274
+ (
275
+ combos[:https][:root][:up] and
276
+ !combos[:https][:root][:https_bad_name]
277
+ ) or
278
+ (
279
+ combos[:https][:www][:up] and
280
+ !combos[:https][:www][:https_bad_name]
281
+ )
282
+ ) and (
283
+ (
284
+ combos[:http][:root][:redirect] or
285
+ !combos[:http][:root][:up] or
286
+ !combos[:http][:root][:status].to_s.start_with?("2")
287
+ ) and (
288
+ combos[:http][:www][:redirect] or
289
+ !combos[:http][:www][:up] or
290
+ !combos[:http][:www][:status].to_s.start_with?("2")
291
+ )
292
+ ) and (
293
+ (
294
+ combos[:http][:root][:redirect_immediately_to_https] and
295
+ !combos[:http][:root][:redirect_immediately_external]
296
+ ) or (
297
+ combos[:http][:www][:redirect_immediately_to_https] and
298
+ !combos[:http][:www][:redirect_immediately_external]
299
+ )
300
+ )
301
+ )
302
+
303
+ details[:canonical_endpoint] = www ? :www : :root
304
+ details[:canonical_protocol] = https ? :https : :http
305
+ details[:canonical] = uri(https, www).to_s
306
+
307
+ # If any endpoint is up, the domain is up.
308
+ details[:up] = !!(
309
+ combos[:https][:www][:up] or
310
+ combos[:https][:root][:up] or
311
+ combos[:http][:www][:up] or
312
+ combos[:http][:root][:up]
313
+ )
314
+
315
+ # A domain's root is broken if neither protocol can connect.
316
+ details[:broken_root] = !!(
317
+ !combos[:https][:root][:up] and
318
+ !combos[:http][:root][:up]
319
+ )
320
+
321
+ # A domain's www is broken if neither protocol can connect.
322
+ details[:broken_www] = !!(
323
+ !combos[:https][:www][:up] and
324
+ !combos[:http][:www][:up]
325
+ )
326
+
327
+ # HTTPS is "supported" (different than "canonical" or "enforced") if:
328
+ #
329
+ # * Either of the HTTPS endpoints is listening, and doesn't have
330
+ # an invalid hostname.
331
+ details[:support_https] = !!(
332
+ (
333
+ (combos[:https][:root][:status] != 0) and
334
+ !combos[:https][:root][:https_bad_name]
335
+ ) or (
336
+ (combos[:https][:www][:status] != 0) and
337
+ !combos[:https][:www][:https_bad_name]
338
+ )
339
+ )
340
+
341
+ # we can say that a canonical HTTPS site "defaults" to HTTPS,
342
+ # even if it doesn't *strictly* enforce it (e.g. having a www
343
+ # subdomain first to go HTTP root before HTTPS root).
344
+ details[:default_https] = https
345
+
346
+ # HTTPS is "downgraded" if both:
347
+ #
348
+ # * HTTPS is supported, and
349
+ # * The 'canonical' endpoint gets an immediate internal redirect to HTTP.
350
+
351
+ details[:downgrade_https] = !!(
352
+ details[:support_https] and
353
+ (
354
+ combos[:https][details[:canonical_endpoint]][:redirect] and
355
+ !combos[:https][details[:canonical_endpoint]][:redirect_immediately_external] and
356
+ !combos[:https][details[:canonical_endpoint]][:redirect_immediately_to_https]
357
+ )
358
+ )
359
+
360
+ # HTTPS is enforced if one of the HTTPS endpoints is "live",
361
+ # and if both *HTTP* endpoints are either:
362
+ #
363
+ # * down, or
364
+ # * redirect immediately to HTTPS.
365
+ #
366
+ # This is different than whether a domain is "canonically" HTTPS.
367
+ #
368
+ # * an HTTP redirect can go to HTTPS on another domain, as long
369
+ # as it's immediate.
370
+ # * a domain with an invalid cert can still be enforcing HTTPS.
371
+ details[:enforce_https] = !!(
372
+ (
373
+ !combos[:http][:www][:up] or
374
+ (combos[:http][:www][:redirect_immediately_to_https])
375
+ ) and
376
+ (
377
+ !combos[:http][:root][:up] or
378
+ (combos[:http][:root][:redirect_immediately_to_https])
379
+ ) and
380
+ (
381
+ combos[:https][:www][:up] or
382
+ combos[:https][:root][:up]
383
+ )
384
+ )
385
+
386
+ # The domain is a redirect if at least one endpoint is up,
387
+ # and each one is *either* an external redirect or down entirely.
388
+ details[:redirect] = !!(
389
+ details[:up] and
390
+ (
391
+ combos[:http][:www][:redirect_external] or
392
+ !combos[:http][:www][:up] or
393
+ combos[:http][:www][:status] >= 400
394
+ ) and
395
+ (
396
+ combos[:http][:root][:redirect_external] or
397
+ !combos[:http][:root][:up] or
398
+ combos[:http][:root][:status] >= 400
399
+ ) and
400
+ (
401
+ combos[:https][:www][:redirect_external] or
402
+ !combos[:https][:www][:up] or
403
+ combos[:https][:www][:https_bad_name] or
404
+ combos[:https][:www][:status] >= 400
405
+ ) and
406
+ (
407
+ combos[:https][:root][:redirect_external] or
408
+ !combos[:https][:root][:up] or
409
+ combos[:https][:root][:https_bad_name] or
410
+ combos[:https][:root][:status] >= 400
411
+ )
412
+ )
413
+
414
+ # OK, we've said a domain is a "redirect" domain.
415
+ # What does the domain redirect to?
416
+ if details[:redirect]
417
+ canon = combos[details[:canonical_protocol]][details[:canonical_endpoint]]
418
+ details[:redirect_to] = canon[:redirect_to]
419
+ else
420
+ details[:redirect_to] = nil
421
+ end
422
+
423
+ # HSTS on the canonical domain? (valid HTTPS checked in endpoint)
424
+ details[:hsts] = !!combos[:https][details[:canonical_endpoint]][:hsts]
425
+ details[:hsts_header] = combos[:https][details[:canonical_endpoint]][:hsts_header]
426
+
427
+ # HSTS on the entire domain?
428
+ details[:hsts_entire_domain] = !!(
429
+ combos[:https][:root][:hsts] and
430
+ combos[:https][:root][:hsts_details][:include_subdomains]
431
+ )
432
+
433
+ # HSTS preload-ready for the entire domain?
434
+ #
435
+ # Re-checks :hsts_entire_domain in case the :preload_ready
436
+ # flag ever changes its definition to not require include_subdomains.
437
+
438
+ details[:hsts_entire_domain_preload] = !!(
439
+ details[:hsts_entire_domain] and
440
+ combos[:https][:root][:hsts_details][:preload_ready]
441
+ )
442
+
443
+ details
128
444
  end
129
445
 
130
- def to_hash
446
+ def endpoints
447
+ https_www = http_endpoint(true, true)
448
+ http_www = http_endpoint(false, true)
449
+ https_root = http_endpoint(true, false)
450
+ http_root = http_endpoint(false, false)
451
+
131
452
  {
132
- :domain => domain.to_s,
133
- :uri => uri.to_s,
134
- :government => government?,
135
- :live => !!response,
136
- :ssl => https?,
137
- :enforce_https => enforce_https?,
138
- :non_www => non_www?,
139
- :redirect => redirect,
140
- :ip => ip,
141
- :hostname => hostname.to_s,
142
- :ipv6 => ipv6?,
143
- :dnssec => dnssec?,
144
- :cdn => cdn,
145
- :google_apps => google_apps?,
146
- :cloud_provider => cloud_provider,
147
- :server => server,
148
- :cms => cms,
149
- :analytics => analytics,
150
- :javascript => javascript,
151
- :advertising => advertising,
152
- :slash_data => slash_data?,
153
- :slash_developer => slash_developer?,
154
- :data_dot_json => data_dot_json?,
155
- :click_jacking_protection => click_jacking_protection?,
156
- :content_security_policy => content_security_policy?,
157
- :xss_protection => xss_protection?,
158
- :secure_cookies => secure_cookies?,
159
- :strict_transport_security => strict_transport_security?
453
+ https: {
454
+ www: https_www,
455
+ root: https_root
456
+ },
457
+ http: {
458
+ www: http_www,
459
+ root: http_root
460
+ }
160
461
  }
161
462
  end
463
+
464
+ # State of affairs at a particular endpoint.
465
+ def http_endpoint(ssl, www)
466
+ details = {}
467
+
468
+ # Don't follow redirects for first ping.
469
+ response = request(ssl, www, false)
470
+
471
+
472
+ # For HTTPS: examine the full range of possibilities.
473
+ if ssl
474
+ if response.return_code == :ok
475
+ details[:https_valid] = true
476
+ details[:https_bad_chain] = false
477
+ details[:https_bad_name] = false
478
+
479
+ # Bad certificate chain.
480
+ elsif response.return_code == :ssl_cacert
481
+ details[:https_valid] = false
482
+ details[:https_bad_chain] = true
483
+ response = request(ssl, www, false, false, true)
484
+ # Bad everything.
485
+ if response.return_code == :peer_failed_verification
486
+ details[:https_bad_name] = true
487
+ response = request(ssl, www, false, false, false)
488
+ end
489
+
490
+ # Bad hostname.
491
+ elsif response.return_code == :peer_failed_verification
492
+ details[:https_valid] = false
493
+ details[:https_bad_name] = true
494
+ response = request(ssl, www, false, true, false)
495
+ # Bad everything.
496
+ if response.return_code == :ssl_cacert
497
+ details[:https_bad_chain] = true
498
+ response = request(ssl, www, false, false, false)
499
+ end
500
+
501
+ # not sure what else would happen
502
+ elsif response.response_code != 0
503
+ details[:https_valid] = false
504
+ details[:https_unknown_issue] = response.return_code
505
+ end
506
+ end
507
+
508
+ # If we ended up with a failure, return it.
509
+ details[:status] = response.response_code
510
+ details[:up] = (response.response_code != 0)
511
+ return details if !details[:up]
512
+
513
+ headers = Hash[response.headers.map{ |k,v| [k.downcase,v] }]
514
+ details[:headers] = headers
515
+
516
+
517
+ # HSTS only takes effect when delivered over valid HTTPS.
518
+ hsts = SiteInspector.hsts_parse(headers["strict-transport-security"])
519
+
520
+ details[:hsts] = !!(
521
+ ssl and
522
+ details[:https_valid] and
523
+ hsts[:enabled]
524
+ )
525
+
526
+ details[:hsts_header] = headers["strict-transport-security"]
527
+ details[:hsts_details] = hsts
528
+
529
+
530
+ # If it's a redirect, go find the ultimate response starting from this combo.
531
+ redirect_code = response.response_code.to_s.start_with?("3")
532
+ location_header = headers["location"]
533
+ if redirect_code and location_header
534
+ location_header = location_header.downcase
535
+ details[:redirect] = true
536
+
537
+ ultimate_response = request(ssl, www, true, !details[:https_bad_chain], !details[:https_bad_name])
538
+ uri_original = URI(ultimate_response.request.url)
539
+
540
+ # treat relative Location headers as having the original hostname
541
+ if location_header.start_with?("http:") or location_header.start_with?("https:")
542
+ uri_immediate = URI(URI.escape(location_header))
543
+ else
544
+ uri_immediate = URI.join(uri_original, URI.escape(location_header))
545
+ end
546
+
547
+ uri_eventual = URI(ultimate_response.effective_url.downcase)
548
+
549
+ # compare base domain names
550
+ base_original = PublicSuffix.parse(uri_original.hostname).domain
551
+
552
+ # if the redirects aren't to valid hostnames (e.g. IP addresses)
553
+ # then fine just compare them directly, they're not going to be
554
+ # identical anyway.
555
+ base_immediate = begin
556
+ PublicSuffix.parse(uri_immediate.hostname).domain
557
+ rescue PublicSuffix::DomainInvalid
558
+ uri_immediate.to_s
559
+ end
560
+
561
+ base_eventual = begin
562
+ PublicSuffix.parse(uri_eventual.hostname).domain
563
+ rescue PublicSuffix::DomainInvalid
564
+ uri_eventual.to_s
565
+ end
566
+
567
+ details[:redirect_immediately_to] = uri_immediate.to_s
568
+ details[:redirect_immediately_to_www] = !!uri_immediate.to_s.match(/^https?:\/\/www\./)
569
+ details[:redirect_immediately_to_https] = uri_immediate.to_s.start_with?("https://")
570
+ details[:redirect_immediately_external] = (base_original != base_immediate)
571
+
572
+ details[:redirect_to] = uri_eventual.to_s
573
+ details[:redirect_external] = (base_original != base_eventual)
574
+
575
+ # otherwise, mark all the redirect fields as false/null
576
+ else
577
+ details[:redirect] = false
578
+ details[:redirect_immediately_to] = nil
579
+ details[:redirect_immediately_to_www] = false
580
+ details[:redirect_immediately_to_https] = false
581
+ details[:redirect_immediately_external] = false
582
+
583
+ details[:redirect_to] = nil
584
+ details[:redirect_external] = false
585
+ end
586
+
587
+ details
588
+ end
589
+
590
+ def to_hash(http_only=false)
591
+ if http_only
592
+ {
593
+ :domain => domain.to_s,
594
+ :uri => uri.to_s,
595
+ :live => !!response,
596
+ :ssl => https?,
597
+ :enforce_https => enforce_https?,
598
+ :non_www => non_www?,
599
+ :redirect => redirect,
600
+ :headers => headers
601
+ }
602
+ else
603
+ {
604
+ :domain => domain.to_s,
605
+ :uri => uri.to_s,
606
+ :government => government?,
607
+ :live => !!response,
608
+ :ssl => https?,
609
+ :enforce_https => enforce_https?,
610
+ :non_www => non_www?,
611
+ :redirect => redirect,
612
+ :ip => ip,
613
+ :hostname => hostname.to_s,
614
+ :ipv6 => ipv6?,
615
+ :dnssec => dnssec?,
616
+ :cdn => cdn,
617
+ :google_apps => google_apps?,
618
+ :cloud_provider => cloud_provider,
619
+ :server => server,
620
+ :cms => cms,
621
+ :analytics => analytics,
622
+ :javascript => javascript,
623
+ :advertising => advertising,
624
+ :slash_data => slash_data?,
625
+ :slash_developer => slash_developer?,
626
+ :data_dot_json => data_dot_json?,
627
+ :click_jacking_protection => click_jacking_protection?,
628
+ :content_security_policy => content_security_policy?,
629
+ :xss_protection => xss_protection?,
630
+ :secure_cookies => secure_cookies?,
631
+ :strict_transport_security => strict_transport_security?,
632
+ :headers => headers
633
+ }
634
+ end
635
+ end
162
636
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: site-inspector
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ben Balter
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-02-13 00:00:00.000000000 Z
11
+ date: 2015-06-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -94,6 +94,20 @@ dependencies:
94
94
  - - "~>"
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0.6'
97
+ - !ruby/object:Gem::Dependency
98
+ name: oj
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '2.11'
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '2.11'
97
111
  - !ruby/object:Gem::Dependency
98
112
  name: pry
99
113
  requirement: !ruby/object:Gem::Requirement