site-inspector 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/site-inspector +18 -4
- data/lib/site-inspector/cache.rb +45 -0
- data/lib/site-inspector/dns.rb +14 -3
- data/lib/site-inspector/sniffer.rb +1 -0
- data/lib/site-inspector.rb +522 -48
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e4e41e2a1639e9f5f6e7f018ef58c664c834b2c2
|
4
|
+
data.tar.gz: 3205114fcaaaa11cf03ec1eb8fa2f5736b6a99f7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ffa69fcc3949abe434a476bf8bff0c65dbd4085b5c40f379c0f7b3bbb9eaf43ffc41527ea571fe211ac71d3911b58deaebf8446ee96a69d8a279c2877e87e3bb
|
7
|
+
data.tar.gz: b369d00e140c4b258b02f02e5b841f4ce26d84c0b7b32f10e6015a99673355b3a505194495fbf2bf5e9ecfabfed6e486db18572a56418bcfaf329b6acb746778
|
data/bin/site-inspector
CHANGED
@@ -1,15 +1,29 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
3
|
require_relative "../lib/site-inspector"
|
4
|
-
|
4
|
+
|
5
|
+
require "oj"
|
5
6
|
|
6
7
|
domain = ARGV[0]
|
8
|
+
http_mode = (ARGV[1] == "--http")
|
7
9
|
|
8
10
|
if domain.to_s.empty?
|
9
|
-
puts "Usage: site-inspector [DOMAIN]"
|
11
|
+
puts "Usage: site-inspector [DOMAIN] [--http]"
|
10
12
|
exit 1
|
11
13
|
end
|
12
14
|
|
13
|
-
|
15
|
+
# HTTP mode:
|
16
|
+
# * all details for possible endpoints
|
17
|
+
# * don't follow redirects
|
18
|
+
# * shorter timeout
|
19
|
+
if http_mode
|
20
|
+
site = SiteInspector.new(domain)
|
21
|
+
details = site.http
|
22
|
+
|
23
|
+
# Normal mode: autodetect canonical domain, sweep every attribute.
|
24
|
+
else
|
25
|
+
site = SiteInspector.new(domain)
|
26
|
+
details = site.to_hash
|
27
|
+
end
|
14
28
|
|
15
|
-
puts
|
29
|
+
puts Oj.dump(details, indent: 2, mode: :compat)
|
data/lib/site-inspector/cache.rb
CHANGED
@@ -11,3 +11,48 @@ class SiteInspectorCache
|
|
11
11
|
@memory[request] = response
|
12
12
|
end
|
13
13
|
end
|
14
|
+
|
15
|
+
class SiteInspectorDiskCache
|
16
|
+
def initialize(dir = nil, replace = false)
|
17
|
+
@dir = dir
|
18
|
+
@memory = {}
|
19
|
+
@replace = replace
|
20
|
+
end
|
21
|
+
|
22
|
+
def path(request)
|
23
|
+
File.join(@dir, request.cache_key)
|
24
|
+
end
|
25
|
+
|
26
|
+
def fetch(request)
|
27
|
+
if File.exist?(path(request))
|
28
|
+
|
29
|
+
if @replace
|
30
|
+
FileUtils.rm(path(request))
|
31
|
+
nil
|
32
|
+
else
|
33
|
+
contents = File.read(path(request))
|
34
|
+
begin
|
35
|
+
Marshal.load(contents)
|
36
|
+
rescue ArgumentError
|
37
|
+
FileUtils.rm(path(request))
|
38
|
+
nil
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def store(request, response)
|
45
|
+
File.open(File.join(@dir, request.cache_key), "w") do |f|
|
46
|
+
f.write Marshal.dump(response)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def get(request)
|
51
|
+
@memory[request] || fetch(request)
|
52
|
+
end
|
53
|
+
|
54
|
+
def set(request, response)
|
55
|
+
store(request, response)
|
56
|
+
@memory[request] = response
|
57
|
+
end
|
58
|
+
end
|
data/lib/site-inspector/dns.rb
CHANGED
@@ -1,7 +1,12 @@
|
|
1
1
|
class SiteInspector
|
2
2
|
|
3
3
|
def resolver
|
4
|
-
|
4
|
+
require "dnsruby"
|
5
|
+
@resolver ||= begin
|
6
|
+
resolver = Dnsruby::Resolver.new
|
7
|
+
resolver.config.nameserver = ["8.8.8.8", "8.8.4.4"]
|
8
|
+
resolver
|
9
|
+
end
|
5
10
|
end
|
6
11
|
|
7
12
|
def query(type="ANY")
|
@@ -14,12 +19,16 @@ class SiteInspector
|
|
14
19
|
@dns ||= query
|
15
20
|
end
|
16
21
|
|
22
|
+
def has_record?(type)
|
23
|
+
dns.any? { |record| record.type == type } || query(type).count != 0
|
24
|
+
end
|
25
|
+
|
17
26
|
def dnssec?
|
18
|
-
@dnssec ||=
|
27
|
+
@dnssec ||= has_record? "DNSKEY"
|
19
28
|
end
|
20
29
|
|
21
30
|
def ipv6?
|
22
|
-
@ipv6 ||=
|
31
|
+
@ipv6 ||= has_record? "AAAA"
|
23
32
|
end
|
24
33
|
|
25
34
|
def detect_by_hostname(type)
|
@@ -64,12 +73,14 @@ class SiteInspector
|
|
64
73
|
end
|
65
74
|
|
66
75
|
def ip
|
76
|
+
require 'resolv'
|
67
77
|
@ip ||= Resolv.getaddress domain.to_s
|
68
78
|
rescue Resolv::ResolvError
|
69
79
|
nil
|
70
80
|
end
|
71
81
|
|
72
82
|
def hostname
|
83
|
+
require 'resolv'
|
73
84
|
@hostname ||= PublicSuffix.parse(Resolv.getname(ip))
|
74
85
|
rescue Exception => e
|
75
86
|
nil
|
data/lib/site-inspector.rb
CHANGED
@@ -1,46 +1,106 @@
|
|
1
|
-
|
1
|
+
|
2
|
+
# needed for HTTP analysis
|
2
3
|
require 'open-uri'
|
3
|
-
require 'public_suffix'
|
4
|
-
require 'gman'
|
5
|
-
require 'net/http'
|
6
|
-
require "dnsruby"
|
7
|
-
require 'yaml'
|
8
|
-
require 'sniffles'
|
9
4
|
require "addressable/uri"
|
5
|
+
require 'public_suffix'
|
10
6
|
require 'typhoeus'
|
11
|
-
require 'json'
|
12
|
-
require 'resolv'
|
13
7
|
|
14
8
|
require_relative 'site-inspector/cache'
|
9
|
+
require_relative 'site-inspector/headers'
|
15
10
|
require_relative 'site-inspector/sniffer'
|
16
11
|
require_relative 'site-inspector/dns'
|
17
12
|
require_relative 'site-inspector/compliance'
|
18
|
-
require_relative 'site-inspector/headers'
|
19
13
|
|
20
|
-
|
14
|
+
|
15
|
+
if ENV['CACHE']
|
16
|
+
Typhoeus::Config.cache = SiteInspectorDiskCache.new(ENV['CACHE'], ENV['CACHE_REPLACE'])
|
17
|
+
else
|
18
|
+
Typhoeus::Config.cache = SiteInspectorCache.new
|
19
|
+
end
|
21
20
|
|
22
21
|
class SiteInspector
|
23
22
|
|
24
23
|
def self.load_data(name)
|
24
|
+
require 'yaml'
|
25
25
|
YAML.load_file File.expand_path "./data/#{name}.yml", File.dirname(__FILE__)
|
26
26
|
end
|
27
27
|
|
28
|
-
|
28
|
+
# Utility parser for HSTS headers.
|
29
|
+
# RFC: http://tools.ietf.org/html/rfc6797
|
30
|
+
def self.hsts_parse(header)
|
31
|
+
# no hsts for you
|
32
|
+
nothing = {
|
33
|
+
max_age: nil,
|
34
|
+
include_subdomains: false,
|
35
|
+
preload: false,
|
36
|
+
enabled: false,
|
37
|
+
preload_ready: false
|
38
|
+
}
|
39
|
+
|
40
|
+
return nothing unless header and header.is_a?(String)
|
41
|
+
|
42
|
+
directives = header.split(/\s*;\s*/)
|
43
|
+
|
44
|
+
pairs = []
|
45
|
+
directives.each do |directive|
|
46
|
+
name, value = directive.downcase.split("=")
|
47
|
+
|
48
|
+
if value and value.start_with?("\"") and value.end_with?("\"")
|
49
|
+
value = value.sub(/^\"/, '')
|
50
|
+
value = value.sub(/\"$/, '')
|
51
|
+
end
|
52
|
+
|
53
|
+
pairs.push([name, value])
|
54
|
+
end
|
55
|
+
|
56
|
+
# reject invalid directives
|
57
|
+
fatal = pairs.any? do |name, value|
|
58
|
+
# TODO: more comprehensive rejection of characters
|
59
|
+
invalid_chars = /[\s\'\"]/
|
60
|
+
(name =~ invalid_chars) or (value =~ invalid_chars)
|
61
|
+
end
|
62
|
+
|
63
|
+
# good DAY, sir
|
64
|
+
return nothing if fatal
|
65
|
+
|
66
|
+
max_age_directive = pairs.find {|n, v| n == "max-age"}
|
67
|
+
max_age = max_age_directive ? max_age_directive[1].to_i : nil
|
68
|
+
include_subdomains = !!pairs.find {|n, v| n == "includesubdomains"}
|
69
|
+
preload = !!pairs.find {|n, v| n == "preload"}
|
70
|
+
|
71
|
+
enabled = !!(max_age and (max_age > 0))
|
72
|
+
|
73
|
+
# Google's minimum max-age for automatic preloading
|
74
|
+
eighteen_weeks = !!(max_age and (max_age >= 10886400))
|
75
|
+
preload_ready = !!(eighteen_weeks and include_subdomains and preload)
|
76
|
+
|
77
|
+
{
|
78
|
+
max_age: max_age,
|
79
|
+
include_subdomains: include_subdomains,
|
80
|
+
preload: preload,
|
81
|
+
enabled: enabled,
|
82
|
+
preload_ready: preload_ready
|
83
|
+
}
|
84
|
+
end
|
85
|
+
|
86
|
+
# makes no network requests
|
87
|
+
def initialize(domain, options = {})
|
29
88
|
domain = domain.downcase
|
30
89
|
domain = domain.sub /^https?\:/, ""
|
31
90
|
domain = domain.sub /^\/+/, ""
|
32
91
|
domain = domain.sub /^www\./, ""
|
33
92
|
@uri = Addressable::URI.parse "//#{domain}"
|
34
93
|
@domain = PublicSuffix.parse @uri.host
|
94
|
+
@timeout = options[:timeout] || 10
|
35
95
|
end
|
36
96
|
|
37
97
|
def inspect
|
38
98
|
"<SiteInspector domain=\"#{domain}\">"
|
39
99
|
end
|
40
100
|
|
41
|
-
def uri(ssl=
|
101
|
+
def uri(ssl=enforce_https?,www=www?)
|
42
102
|
uri = @uri.clone
|
43
|
-
uri.host = "www.#{uri.host}"
|
103
|
+
uri.host = www ? "www.#{uri.host}" : uri.host
|
44
104
|
uri.scheme = ssl ? "https" : "http"
|
45
105
|
uri
|
46
106
|
end
|
@@ -49,8 +109,13 @@ class SiteInspector
|
|
49
109
|
www? ? PublicSuffix.parse("www.#{@uri.host}") : @domain
|
50
110
|
end
|
51
111
|
|
52
|
-
def request(ssl=false, www=false, followlocation=true)
|
53
|
-
|
112
|
+
def request(ssl=false, www=false, followlocation=true, ssl_verifypeer=true, ssl_verifyhost=true)
|
113
|
+
to_get = uri(ssl, www)
|
114
|
+
|
115
|
+
# debugging
|
116
|
+
# puts "fetching: #{to_get}, #{followlocation ? "follow" : "no follow"}, #{ssl_verifypeer ? "verify peer, " : ""}#{ssl_verifyhost ? "verify host" : ""}"
|
117
|
+
|
118
|
+
Typhoeus.get(to_get, followlocation: followlocation, ssl_verifypeer: ssl_verifypeer, ssl_verifyhost: (ssl_verifyhost ? 2 : 0), timeout: @timeout)
|
54
119
|
end
|
55
120
|
|
56
121
|
def response
|
@@ -72,6 +137,7 @@ class SiteInspector
|
|
72
137
|
end
|
73
138
|
|
74
139
|
def doc
|
140
|
+
require 'nokogiri'
|
75
141
|
@doc ||= Nokogiri::HTML response.body if response
|
76
142
|
end
|
77
143
|
|
@@ -80,6 +146,7 @@ class SiteInspector
|
|
80
146
|
end
|
81
147
|
|
82
148
|
def government?
|
149
|
+
require 'gman'
|
83
150
|
Gman.valid? domain.to_s
|
84
151
|
end
|
85
152
|
|
@@ -101,7 +168,7 @@ class SiteInspector
|
|
101
168
|
end
|
102
169
|
|
103
170
|
def www?
|
104
|
-
response && response.effective_url && !!response.effective_url.match(
|
171
|
+
response && response.effective_url && !!response.effective_url.match(/^https?:\/\/www\./)
|
105
172
|
end
|
106
173
|
|
107
174
|
def non_www?
|
@@ -123,40 +190,447 @@ class SiteInspector
|
|
123
190
|
end
|
124
191
|
end
|
125
192
|
|
126
|
-
def
|
127
|
-
|
193
|
+
def http
|
194
|
+
details = {
|
195
|
+
endpoints: endpoints
|
196
|
+
}
|
197
|
+
|
198
|
+
# convenient shorthand for the extensive statements to come
|
199
|
+
combos = details[:endpoints]
|
200
|
+
|
201
|
+
# A domain is "canonically" at www if:
|
202
|
+
# * at least one of its www endpoints responds
|
203
|
+
# * both root endpoints are either down or redirect *somewhere*
|
204
|
+
# * either both root endpoints are down, *or* at least one
|
205
|
+
# root endpoint redirect should immediately go to
|
206
|
+
# an *internal* www endpoint
|
207
|
+
# This is meant to affirm situations like:
|
208
|
+
# http:// -> https:// -> https://www
|
209
|
+
# https:// -> http:// -> https://www
|
210
|
+
# and meant to avoid affirming situations like:
|
211
|
+
# http:// -> http://non-www,
|
212
|
+
# http://www -> http://non-www
|
213
|
+
# or like:
|
214
|
+
# https:// -> 200, http:// -> http://www
|
215
|
+
|
216
|
+
www = !!(
|
217
|
+
(
|
218
|
+
combos[:https][:www][:up] or
|
219
|
+
combos[:http][:www][:up]
|
220
|
+
) and (
|
221
|
+
(
|
222
|
+
combos[:https][:root][:redirect] or
|
223
|
+
!combos[:https][:root][:up] or
|
224
|
+
combos[:https][:root][:https_bad_name] or
|
225
|
+
!combos[:https][:root][:status].to_s.start_with?("2")
|
226
|
+
) and (
|
227
|
+
combos[:http][:root][:redirect] or
|
228
|
+
!combos[:http][:root][:up] or
|
229
|
+
!combos[:http][:root][:status].to_s.start_with?("2")
|
230
|
+
)
|
231
|
+
) and (
|
232
|
+
(
|
233
|
+
(
|
234
|
+
!combos[:https][:root][:up] or
|
235
|
+
combos[:https][:root][:https_bad_name] or
|
236
|
+
!combos[:https][:root][:status].to_s.start_with?("2")
|
237
|
+
) and
|
238
|
+
(
|
239
|
+
!combos[:http][:root][:up] or
|
240
|
+
!combos[:http][:root][:status].to_s.start_with?("2")
|
241
|
+
)
|
242
|
+
) or
|
243
|
+
(
|
244
|
+
combos[:https][:root][:redirect_immediately_to_www] and
|
245
|
+
!combos[:https][:root][:redirect_immediately_external]
|
246
|
+
) or
|
247
|
+
(
|
248
|
+
combos[:http][:root][:redirect_immediately_to_www] and
|
249
|
+
!combos[:http][:root][:redirect_immediately_external]
|
250
|
+
)
|
251
|
+
)
|
252
|
+
)
|
253
|
+
|
254
|
+
# A domain is "canonically" at https if:
|
255
|
+
# * at least one of its https endpoints is live and
|
256
|
+
# doesn't have an invalid hostname
|
257
|
+
# * both http endpoints are either down or redirect *somewhere*
|
258
|
+
# * at least one http endpoint redirects immediately to
|
259
|
+
# an *internal* https endpoint
|
260
|
+
# This is meant to affirm situations like:
|
261
|
+
# http:// -> http://www -> https://
|
262
|
+
# https:// -> http:// -> https://www
|
263
|
+
# and meant to avoid affirming situations like:
|
264
|
+
# http:// -> http://non-www
|
265
|
+
# http://www -> http://non-www
|
266
|
+
# or:
|
267
|
+
# http:// -> 200, http://www -> https://www
|
268
|
+
#
|
269
|
+
# It allows a site to be canonically HTTPS if the cert has
|
270
|
+
# a valid hostname but invalid chain issues.
|
271
|
+
|
272
|
+
https = !!(
|
273
|
+
(
|
274
|
+
(
|
275
|
+
combos[:https][:root][:up] and
|
276
|
+
!combos[:https][:root][:https_bad_name]
|
277
|
+
) or
|
278
|
+
(
|
279
|
+
combos[:https][:www][:up] and
|
280
|
+
!combos[:https][:www][:https_bad_name]
|
281
|
+
)
|
282
|
+
) and (
|
283
|
+
(
|
284
|
+
combos[:http][:root][:redirect] or
|
285
|
+
!combos[:http][:root][:up] or
|
286
|
+
!combos[:http][:root][:status].to_s.start_with?("2")
|
287
|
+
) and (
|
288
|
+
combos[:http][:www][:redirect] or
|
289
|
+
!combos[:http][:www][:up] or
|
290
|
+
!combos[:http][:www][:status].to_s.start_with?("2")
|
291
|
+
)
|
292
|
+
) and (
|
293
|
+
(
|
294
|
+
combos[:http][:root][:redirect_immediately_to_https] and
|
295
|
+
!combos[:http][:root][:redirect_immediately_external]
|
296
|
+
) or (
|
297
|
+
combos[:http][:www][:redirect_immediately_to_https] and
|
298
|
+
!combos[:http][:www][:redirect_immediately_external]
|
299
|
+
)
|
300
|
+
)
|
301
|
+
)
|
302
|
+
|
303
|
+
details[:canonical_endpoint] = www ? :www : :root
|
304
|
+
details[:canonical_protocol] = https ? :https : :http
|
305
|
+
details[:canonical] = uri(https, www).to_s
|
306
|
+
|
307
|
+
# If any endpoint is up, the domain is up.
|
308
|
+
details[:up] = !!(
|
309
|
+
combos[:https][:www][:up] or
|
310
|
+
combos[:https][:root][:up] or
|
311
|
+
combos[:http][:www][:up] or
|
312
|
+
combos[:http][:root][:up]
|
313
|
+
)
|
314
|
+
|
315
|
+
# A domain's root is broken if neither protocol can connect.
|
316
|
+
details[:broken_root] = !!(
|
317
|
+
!combos[:https][:root][:up] and
|
318
|
+
!combos[:http][:root][:up]
|
319
|
+
)
|
320
|
+
|
321
|
+
# A domain's www is broken if neither protocol can connect.
|
322
|
+
details[:broken_www] = !!(
|
323
|
+
!combos[:https][:www][:up] and
|
324
|
+
!combos[:http][:www][:up]
|
325
|
+
)
|
326
|
+
|
327
|
+
# HTTPS is "supported" (different than "canonical" or "enforced") if:
|
328
|
+
#
|
329
|
+
# * Either of the HTTPS endpoints is listening, and doesn't have
|
330
|
+
# an invalid hostname.
|
331
|
+
details[:support_https] = !!(
|
332
|
+
(
|
333
|
+
(combos[:https][:root][:status] != 0) and
|
334
|
+
!combos[:https][:root][:https_bad_name]
|
335
|
+
) or (
|
336
|
+
(combos[:https][:www][:status] != 0) and
|
337
|
+
!combos[:https][:www][:https_bad_name]
|
338
|
+
)
|
339
|
+
)
|
340
|
+
|
341
|
+
# we can say that a canonical HTTPS site "defaults" to HTTPS,
|
342
|
+
# even if it doesn't *strictly* enforce it (e.g. having a www
|
343
|
+
# subdomain first to go HTTP root before HTTPS root).
|
344
|
+
details[:default_https] = https
|
345
|
+
|
346
|
+
# HTTPS is "downgraded" if both:
|
347
|
+
#
|
348
|
+
# * HTTPS is supported, and
|
349
|
+
# * The 'canonical' endpoint gets an immediate internal redirect to HTTP.
|
350
|
+
|
351
|
+
details[:downgrade_https] = !!(
|
352
|
+
details[:support_https] and
|
353
|
+
(
|
354
|
+
combos[:https][details[:canonical_endpoint]][:redirect] and
|
355
|
+
!combos[:https][details[:canonical_endpoint]][:redirect_immediately_external] and
|
356
|
+
!combos[:https][details[:canonical_endpoint]][:redirect_immediately_to_https]
|
357
|
+
)
|
358
|
+
)
|
359
|
+
|
360
|
+
# HTTPS is enforced if one of the HTTPS endpoints is "live",
|
361
|
+
# and if both *HTTP* endpoints are either:
|
362
|
+
#
|
363
|
+
# * down, or
|
364
|
+
# * redirect immediately to HTTPS.
|
365
|
+
#
|
366
|
+
# This is different than whether a domain is "canonically" HTTPS.
|
367
|
+
#
|
368
|
+
# * an HTTP redirect can go to HTTPS on another domain, as long
|
369
|
+
# as it's immediate.
|
370
|
+
# * a domain with an invalid cert can still be enforcing HTTPS.
|
371
|
+
details[:enforce_https] = !!(
|
372
|
+
(
|
373
|
+
!combos[:http][:www][:up] or
|
374
|
+
(combos[:http][:www][:redirect_immediately_to_https])
|
375
|
+
) and
|
376
|
+
(
|
377
|
+
!combos[:http][:root][:up] or
|
378
|
+
(combos[:http][:root][:redirect_immediately_to_https])
|
379
|
+
) and
|
380
|
+
(
|
381
|
+
combos[:https][:www][:up] or
|
382
|
+
combos[:https][:root][:up]
|
383
|
+
)
|
384
|
+
)
|
385
|
+
|
386
|
+
# The domain is a redirect if at least one endpoint is up,
|
387
|
+
# and each one is *either* an external redirect or down entirely.
|
388
|
+
details[:redirect] = !!(
|
389
|
+
details[:up] and
|
390
|
+
(
|
391
|
+
combos[:http][:www][:redirect_external] or
|
392
|
+
!combos[:http][:www][:up] or
|
393
|
+
combos[:http][:www][:status] >= 400
|
394
|
+
) and
|
395
|
+
(
|
396
|
+
combos[:http][:root][:redirect_external] or
|
397
|
+
!combos[:http][:root][:up] or
|
398
|
+
combos[:http][:root][:status] >= 400
|
399
|
+
) and
|
400
|
+
(
|
401
|
+
combos[:https][:www][:redirect_external] or
|
402
|
+
!combos[:https][:www][:up] or
|
403
|
+
combos[:https][:www][:https_bad_name] or
|
404
|
+
combos[:https][:www][:status] >= 400
|
405
|
+
) and
|
406
|
+
(
|
407
|
+
combos[:https][:root][:redirect_external] or
|
408
|
+
!combos[:https][:root][:up] or
|
409
|
+
combos[:https][:root][:https_bad_name] or
|
410
|
+
combos[:https][:root][:status] >= 400
|
411
|
+
)
|
412
|
+
)
|
413
|
+
|
414
|
+
# OK, we've said a domain is a "redirect" domain.
|
415
|
+
# What does the domain redirect to?
|
416
|
+
if details[:redirect]
|
417
|
+
canon = combos[details[:canonical_protocol]][details[:canonical_endpoint]]
|
418
|
+
details[:redirect_to] = canon[:redirect_to]
|
419
|
+
else
|
420
|
+
details[:redirect_to] = nil
|
421
|
+
end
|
422
|
+
|
423
|
+
# HSTS on the canonical domain? (valid HTTPS checked in endpoint)
|
424
|
+
details[:hsts] = !!combos[:https][details[:canonical_endpoint]][:hsts]
|
425
|
+
details[:hsts_header] = combos[:https][details[:canonical_endpoint]][:hsts_header]
|
426
|
+
|
427
|
+
# HSTS on the entire domain?
|
428
|
+
details[:hsts_entire_domain] = !!(
|
429
|
+
combos[:https][:root][:hsts] and
|
430
|
+
combos[:https][:root][:hsts_details][:include_subdomains]
|
431
|
+
)
|
432
|
+
|
433
|
+
# HSTS preload-ready for the entire domain?
|
434
|
+
#
|
435
|
+
# Re-checks :hsts_entire_domain in case the :preload_ready
|
436
|
+
# flag ever changes its definition to not require include_subdomains.
|
437
|
+
|
438
|
+
details[:hsts_entire_domain_preload] = !!(
|
439
|
+
details[:hsts_entire_domain] and
|
440
|
+
combos[:https][:root][:hsts_details][:preload_ready]
|
441
|
+
)
|
442
|
+
|
443
|
+
details
|
128
444
|
end
|
129
445
|
|
130
|
-
def
|
446
|
+
def endpoints
|
447
|
+
https_www = http_endpoint(true, true)
|
448
|
+
http_www = http_endpoint(false, true)
|
449
|
+
https_root = http_endpoint(true, false)
|
450
|
+
http_root = http_endpoint(false, false)
|
451
|
+
|
131
452
|
{
|
132
|
-
:
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
:
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
:ip => ip,
|
141
|
-
:hostname => hostname.to_s,
|
142
|
-
:ipv6 => ipv6?,
|
143
|
-
:dnssec => dnssec?,
|
144
|
-
:cdn => cdn,
|
145
|
-
:google_apps => google_apps?,
|
146
|
-
:cloud_provider => cloud_provider,
|
147
|
-
:server => server,
|
148
|
-
:cms => cms,
|
149
|
-
:analytics => analytics,
|
150
|
-
:javascript => javascript,
|
151
|
-
:advertising => advertising,
|
152
|
-
:slash_data => slash_data?,
|
153
|
-
:slash_developer => slash_developer?,
|
154
|
-
:data_dot_json => data_dot_json?,
|
155
|
-
:click_jacking_protection => click_jacking_protection?,
|
156
|
-
:content_security_policy => content_security_policy?,
|
157
|
-
:xss_protection => xss_protection?,
|
158
|
-
:secure_cookies => secure_cookies?,
|
159
|
-
:strict_transport_security => strict_transport_security?
|
453
|
+
https: {
|
454
|
+
www: https_www,
|
455
|
+
root: https_root
|
456
|
+
},
|
457
|
+
http: {
|
458
|
+
www: http_www,
|
459
|
+
root: http_root
|
460
|
+
}
|
160
461
|
}
|
161
462
|
end
|
463
|
+
|
464
|
+
# State of affairs at a particular endpoint.
|
465
|
+
def http_endpoint(ssl, www)
|
466
|
+
details = {}
|
467
|
+
|
468
|
+
# Don't follow redirects for first ping.
|
469
|
+
response = request(ssl, www, false)
|
470
|
+
|
471
|
+
|
472
|
+
# For HTTPS: examine the full range of possibilities.
|
473
|
+
if ssl
|
474
|
+
if response.return_code == :ok
|
475
|
+
details[:https_valid] = true
|
476
|
+
details[:https_bad_chain] = false
|
477
|
+
details[:https_bad_name] = false
|
478
|
+
|
479
|
+
# Bad certificate chain.
|
480
|
+
elsif response.return_code == :ssl_cacert
|
481
|
+
details[:https_valid] = false
|
482
|
+
details[:https_bad_chain] = true
|
483
|
+
response = request(ssl, www, false, false, true)
|
484
|
+
# Bad everything.
|
485
|
+
if response.return_code == :peer_failed_verification
|
486
|
+
details[:https_bad_name] = true
|
487
|
+
response = request(ssl, www, false, false, false)
|
488
|
+
end
|
489
|
+
|
490
|
+
# Bad hostname.
|
491
|
+
elsif response.return_code == :peer_failed_verification
|
492
|
+
details[:https_valid] = false
|
493
|
+
details[:https_bad_name] = true
|
494
|
+
response = request(ssl, www, false, true, false)
|
495
|
+
# Bad everything.
|
496
|
+
if response.return_code == :ssl_cacert
|
497
|
+
details[:https_bad_chain] = true
|
498
|
+
response = request(ssl, www, false, false, false)
|
499
|
+
end
|
500
|
+
|
501
|
+
# not sure what else would happen
|
502
|
+
elsif response.response_code != 0
|
503
|
+
details[:https_valid] = false
|
504
|
+
details[:https_unknown_issue] = response.return_code
|
505
|
+
end
|
506
|
+
end
|
507
|
+
|
508
|
+
# If we ended up with a failure, return it.
|
509
|
+
details[:status] = response.response_code
|
510
|
+
details[:up] = (response.response_code != 0)
|
511
|
+
return details if !details[:up]
|
512
|
+
|
513
|
+
headers = Hash[response.headers.map{ |k,v| [k.downcase,v] }]
|
514
|
+
details[:headers] = headers
|
515
|
+
|
516
|
+
|
517
|
+
# HSTS only takes effect when delivered over valid HTTPS.
|
518
|
+
hsts = SiteInspector.hsts_parse(headers["strict-transport-security"])
|
519
|
+
|
520
|
+
details[:hsts] = !!(
|
521
|
+
ssl and
|
522
|
+
details[:https_valid] and
|
523
|
+
hsts[:enabled]
|
524
|
+
)
|
525
|
+
|
526
|
+
details[:hsts_header] = headers["strict-transport-security"]
|
527
|
+
details[:hsts_details] = hsts
|
528
|
+
|
529
|
+
|
530
|
+
# If it's a redirect, go find the ultimate response starting from this combo.
|
531
|
+
redirect_code = response.response_code.to_s.start_with?("3")
|
532
|
+
location_header = headers["location"]
|
533
|
+
if redirect_code and location_header
|
534
|
+
location_header = location_header.downcase
|
535
|
+
details[:redirect] = true
|
536
|
+
|
537
|
+
ultimate_response = request(ssl, www, true, !details[:https_bad_chain], !details[:https_bad_name])
|
538
|
+
uri_original = URI(ultimate_response.request.url)
|
539
|
+
|
540
|
+
# treat relative Location headers as having the original hostname
|
541
|
+
if location_header.start_with?("http:") or location_header.start_with?("https:")
|
542
|
+
uri_immediate = URI(URI.escape(location_header))
|
543
|
+
else
|
544
|
+
uri_immediate = URI.join(uri_original, URI.escape(location_header))
|
545
|
+
end
|
546
|
+
|
547
|
+
uri_eventual = URI(ultimate_response.effective_url.downcase)
|
548
|
+
|
549
|
+
# compare base domain names
|
550
|
+
base_original = PublicSuffix.parse(uri_original.hostname).domain
|
551
|
+
|
552
|
+
# if the redirects aren't to valid hostnames (e.g. IP addresses)
|
553
|
+
# then fine just compare them directly, they're not going to be
|
554
|
+
# identical anyway.
|
555
|
+
base_immediate = begin
|
556
|
+
PublicSuffix.parse(uri_immediate.hostname).domain
|
557
|
+
rescue PublicSuffix::DomainInvalid
|
558
|
+
uri_immediate.to_s
|
559
|
+
end
|
560
|
+
|
561
|
+
base_eventual = begin
|
562
|
+
PublicSuffix.parse(uri_eventual.hostname).domain
|
563
|
+
rescue PublicSuffix::DomainInvalid
|
564
|
+
uri_eventual.to_s
|
565
|
+
end
|
566
|
+
|
567
|
+
details[:redirect_immediately_to] = uri_immediate.to_s
|
568
|
+
details[:redirect_immediately_to_www] = !!uri_immediate.to_s.match(/^https?:\/\/www\./)
|
569
|
+
details[:redirect_immediately_to_https] = uri_immediate.to_s.start_with?("https://")
|
570
|
+
details[:redirect_immediately_external] = (base_original != base_immediate)
|
571
|
+
|
572
|
+
details[:redirect_to] = uri_eventual.to_s
|
573
|
+
details[:redirect_external] = (base_original != base_eventual)
|
574
|
+
|
575
|
+
# otherwise, mark all the redirect fields as false/null
|
576
|
+
else
|
577
|
+
details[:redirect] = false
|
578
|
+
details[:redirect_immediately_to] = nil
|
579
|
+
details[:redirect_immediately_to_www] = false
|
580
|
+
details[:redirect_immediately_to_https] = false
|
581
|
+
details[:redirect_immediately_external] = false
|
582
|
+
|
583
|
+
details[:redirect_to] = nil
|
584
|
+
details[:redirect_external] = false
|
585
|
+
end
|
586
|
+
|
587
|
+
details
|
588
|
+
end
|
589
|
+
|
590
|
+
def to_hash(http_only=false)
|
591
|
+
if http_only
|
592
|
+
{
|
593
|
+
:domain => domain.to_s,
|
594
|
+
:uri => uri.to_s,
|
595
|
+
:live => !!response,
|
596
|
+
:ssl => https?,
|
597
|
+
:enforce_https => enforce_https?,
|
598
|
+
:non_www => non_www?,
|
599
|
+
:redirect => redirect,
|
600
|
+
:headers => headers
|
601
|
+
}
|
602
|
+
else
|
603
|
+
{
|
604
|
+
:domain => domain.to_s,
|
605
|
+
:uri => uri.to_s,
|
606
|
+
:government => government?,
|
607
|
+
:live => !!response,
|
608
|
+
:ssl => https?,
|
609
|
+
:enforce_https => enforce_https?,
|
610
|
+
:non_www => non_www?,
|
611
|
+
:redirect => redirect,
|
612
|
+
:ip => ip,
|
613
|
+
:hostname => hostname.to_s,
|
614
|
+
:ipv6 => ipv6?,
|
615
|
+
:dnssec => dnssec?,
|
616
|
+
:cdn => cdn,
|
617
|
+
:google_apps => google_apps?,
|
618
|
+
:cloud_provider => cloud_provider,
|
619
|
+
:server => server,
|
620
|
+
:cms => cms,
|
621
|
+
:analytics => analytics,
|
622
|
+
:javascript => javascript,
|
623
|
+
:advertising => advertising,
|
624
|
+
:slash_data => slash_data?,
|
625
|
+
:slash_developer => slash_developer?,
|
626
|
+
:data_dot_json => data_dot_json?,
|
627
|
+
:click_jacking_protection => click_jacking_protection?,
|
628
|
+
:content_security_policy => content_security_policy?,
|
629
|
+
:xss_protection => xss_protection?,
|
630
|
+
:secure_cookies => secure_cookies?,
|
631
|
+
:strict_transport_security => strict_transport_security?,
|
632
|
+
:headers => headers
|
633
|
+
}
|
634
|
+
end
|
635
|
+
end
|
162
636
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: site-inspector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ben Balter
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-06-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -94,6 +94,20 @@ dependencies:
|
|
94
94
|
- - "~>"
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0.6'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: oj
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - "~>"
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '2.11'
|
104
|
+
type: :runtime
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - "~>"
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '2.11'
|
97
111
|
- !ruby/object:Gem::Dependency
|
98
112
|
name: pry
|
99
113
|
requirement: !ruby/object:Gem::Requirement
|