site-inspector 1.0.2 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. checksums.yaml +5 -5
  2. data/.gitignore +8 -0
  3. data/.rubocop.yml +42 -0
  4. data/.rubocop_todo.yml +139 -0
  5. data/.ruby-version +1 -0
  6. data/.travis.yml +9 -0
  7. data/Gemfile +7 -0
  8. data/Guardfile +10 -0
  9. data/README.md +189 -0
  10. data/Rakefile +10 -0
  11. data/bin/site-inspector +50 -22
  12. data/lib/cliver/dependency_ext.rb +24 -0
  13. data/lib/site-inspector.rb +62 -615
  14. data/lib/site-inspector/cache.rb +10 -51
  15. data/lib/site-inspector/checks/accessibility.rb +135 -0
  16. data/lib/site-inspector/checks/check.rb +54 -0
  17. data/lib/site-inspector/checks/content.rb +85 -0
  18. data/lib/site-inspector/checks/cookies.rb +45 -0
  19. data/lib/site-inspector/checks/dns.rb +138 -0
  20. data/lib/site-inspector/checks/headers.rb +68 -0
  21. data/lib/site-inspector/checks/hsts.rb +81 -0
  22. data/lib/site-inspector/checks/https.rb +40 -0
  23. data/lib/site-inspector/checks/sniffer.rb +67 -0
  24. data/lib/site-inspector/checks/wappalyzer.rb +62 -0
  25. data/lib/site-inspector/checks/whois.rb +36 -0
  26. data/lib/site-inspector/disk_cache.rb +42 -0
  27. data/lib/site-inspector/domain.rb +271 -0
  28. data/lib/site-inspector/endpoint.rb +217 -0
  29. data/lib/site-inspector/rails_cache.rb +13 -0
  30. data/lib/site-inspector/version.rb +5 -0
  31. data/package-lock.json +505 -0
  32. data/package.json +23 -0
  33. data/script/bootstrap +2 -0
  34. data/script/cibuild +11 -0
  35. data/script/console +3 -0
  36. data/script/pa11y-version +10 -0
  37. data/script/release +38 -0
  38. data/site-inspector.gemspec +42 -0
  39. data/spec/checks/site_inspector_endpoint_accessibility_spec.rb +84 -0
  40. data/spec/checks/site_inspector_endpoint_check_spec.rb +42 -0
  41. data/spec/checks/site_inspector_endpoint_content_spec.rb +117 -0
  42. data/spec/checks/site_inspector_endpoint_cookies_spec.rb +73 -0
  43. data/spec/checks/site_inspector_endpoint_dns_spec.rb +184 -0
  44. data/spec/checks/site_inspector_endpoint_headers_spec.rb +65 -0
  45. data/spec/checks/site_inspector_endpoint_hsts_spec.rb +92 -0
  46. data/spec/checks/site_inspector_endpoint_https_spec.rb +49 -0
  47. data/spec/checks/site_inspector_endpoint_sniffer_spec.rb +150 -0
  48. data/spec/checks/site_inspector_endpoint_wappalyzer_spec.rb +34 -0
  49. data/spec/checks/site_inspector_endpoint_whois_spec.rb +26 -0
  50. data/spec/fixtures/wappalyzer.json +125 -0
  51. data/spec/site_inspector_cache_spec.rb +15 -0
  52. data/spec/site_inspector_disk_cache_spec.rb +39 -0
  53. data/spec/site_inspector_domain_spec.rb +271 -0
  54. data/spec/site_inspector_endpoint_spec.rb +252 -0
  55. data/spec/site_inspector_spec.rb +48 -0
  56. data/spec/spec_helper.rb +19 -0
  57. metadata +204 -63
  58. data/lib/site-inspector/compliance.rb +0 -19
  59. data/lib/site-inspector/dns.rb +0 -92
  60. data/lib/site-inspector/headers.rb +0 -59
  61. data/lib/site-inspector/sniffer.rb +0 -26
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cliver
4
+ class Dependency
5
+ # Memoized shortcut for detect
6
+ # Returns the path to the detected dependency
7
+ # Raises an error if the dependency was not satisfied
8
+ def path
9
+ @path ||= detect!
10
+ end
11
+
12
+ # Returns the version of the resolved dependency
13
+ def version
14
+ return @version if defined? @version
15
+
16
+ version = installed_versions.find { |p, _v| p == path }
17
+ @detected_version = version.nil? ? nil : version[1]
18
+ end
19
+
20
+ def major_version
21
+ version&.split('.')&.first
22
+ end
23
+ end
24
+ end
@@ -1,636 +1,83 @@
1
+ # frozen_string_literal: true
1
2
 
2
- # needed for HTTP analysis
3
3
  require 'open-uri'
4
- require "addressable/uri"
4
+ require 'addressable/uri'
5
5
  require 'public_suffix'
6
6
  require 'typhoeus'
7
+ require 'parallel'
8
+ require 'cliver'
9
+ require 'whois'
10
+ require 'cgi'
11
+ require 'resolv'
12
+ require 'dotenv/load'
7
13
 
8
14
  require_relative 'site-inspector/cache'
9
- require_relative 'site-inspector/headers'
10
- require_relative 'site-inspector/sniffer'
11
- require_relative 'site-inspector/dns'
12
- require_relative 'site-inspector/compliance'
13
-
14
-
15
- if ENV['CACHE']
16
- Typhoeus::Config.cache = SiteInspectorDiskCache.new(ENV['CACHE'], ENV['CACHE_REPLACE'])
17
- else
18
- Typhoeus::Config.cache = SiteInspectorCache.new
19
- end
15
+ require_relative 'site-inspector/disk_cache'
16
+ require_relative 'site-inspector/rails_cache'
17
+ require_relative 'site-inspector/domain'
18
+ require_relative 'site-inspector/checks/check'
19
+ require_relative 'site-inspector/checks/accessibility'
20
+ require_relative 'site-inspector/checks/content'
21
+ require_relative 'site-inspector/checks/dns'
22
+ require_relative 'site-inspector/checks/headers'
23
+ require_relative 'site-inspector/checks/hsts'
24
+ require_relative 'site-inspector/checks/https'
25
+ require_relative 'site-inspector/checks/sniffer'
26
+ require_relative 'site-inspector/checks/cookies'
27
+ require_relative 'site-inspector/checks/whois'
28
+ require_relative 'site-inspector/checks/wappalyzer'
29
+ require_relative 'site-inspector/endpoint'
30
+ require_relative 'site-inspector/version'
31
+ require_relative 'cliver/dependency_ext'
20
32
 
21
33
  class SiteInspector
22
-
23
- def self.load_data(name)
24
- require 'yaml'
25
- YAML.load_file File.expand_path "./data/#{name}.yml", File.dirname(__FILE__)
26
- end
27
-
28
- # Utility parser for HSTS headers.
29
- # RFC: http://tools.ietf.org/html/rfc6797
30
- def self.hsts_parse(header)
31
- # no hsts for you
32
- nothing = {
33
- max_age: nil,
34
- include_subdomains: false,
35
- preload: false,
36
- enabled: false,
37
- preload_ready: false
38
- }
39
-
40
- return nothing unless header and header.is_a?(String)
41
-
42
- directives = header.split(/\s*;\s*/)
43
-
44
- pairs = []
45
- directives.each do |directive|
46
- name, value = directive.downcase.split("=")
47
-
48
- if value and value.start_with?("\"") and value.end_with?("\"")
49
- value = value.sub(/^\"/, '')
50
- value = value.sub(/\"$/, '')
51
- end
52
-
53
- pairs.push([name, value])
34
+ class << self
35
+ attr_writer :timeout, :cache, :typhoeus_options
36
+
37
+ def cache
38
+ @cache ||= if ENV['CACHE']
39
+ SiteInspector::DiskCache.new
40
+ elsif Object.const_defined?('Rails')
41
+ SiteInspector::RailsCache.new
42
+ else
43
+ SiteInspector::Cache.new
44
+ end
54
45
  end
55
46
 
56
- # reject invalid directives
57
- fatal = pairs.any? do |name, value|
58
- # TODO: more comprehensive rejection of characters
59
- invalid_chars = /[\s\'\"]/
60
- (name =~ invalid_chars) or (value =~ invalid_chars)
47
+ def timeout
48
+ @timeout || 10
61
49
  end
62
50
 
63
- # good DAY, sir
64
- return nothing if fatal
65
-
66
- max_age_directive = pairs.find {|n, v| n == "max-age"}
67
- max_age = max_age_directive ? max_age_directive[1].to_i : nil
68
- include_subdomains = !!pairs.find {|n, v| n == "includesubdomains"}
69
- preload = !!pairs.find {|n, v| n == "preload"}
70
-
71
- enabled = !!(max_age and (max_age > 0))
72
-
73
- # Google's minimum max-age for automatic preloading
74
- eighteen_weeks = !!(max_age and (max_age >= 10886400))
75
- preload_ready = !!(eighteen_weeks and include_subdomains and preload)
76
-
77
- {
78
- max_age: max_age,
79
- include_subdomains: include_subdomains,
80
- preload: preload,
81
- enabled: enabled,
82
- preload_ready: preload_ready
83
- }
84
- end
85
-
86
- # makes no network requests
87
- def initialize(domain, options = {})
88
- domain = domain.downcase
89
- domain = domain.sub /^https?\:/, ""
90
- domain = domain.sub /^\/+/, ""
91
- domain = domain.sub /^www\./, ""
92
- @uri = Addressable::URI.parse "//#{domain}"
93
- @domain = PublicSuffix.parse @uri.host
94
- @timeout = options[:timeout] || 10
95
- end
96
-
97
- def inspect
98
- "<SiteInspector domain=\"#{domain}\">"
99
- end
100
-
101
- def uri(ssl=enforce_https?,www=www?)
102
- uri = @uri.clone
103
- uri.host = www ? "www.#{uri.host}" : uri.host
104
- uri.scheme = ssl ? "https" : "http"
105
- uri
106
- end
107
-
108
- def domain
109
- www? ? PublicSuffix.parse("www.#{@uri.host}") : @domain
110
- end
111
-
112
- def request(ssl=false, www=false, followlocation=true, ssl_verifypeer=true, ssl_verifyhost=true)
113
- to_get = uri(ssl, www)
114
-
115
- # debugging
116
- # puts "fetching: #{to_get}, #{followlocation ? "follow" : "no follow"}, #{ssl_verifypeer ? "verify peer, " : ""}#{ssl_verifyhost ? "verify host" : ""}"
117
-
118
- Typhoeus.get(to_get, followlocation: followlocation, ssl_verifypeer: ssl_verifypeer, ssl_verifyhost: (ssl_verifyhost ? 2 : 0), timeout: @timeout)
119
- end
120
-
121
- def response
122
- @response ||= begin
123
- if response = request(false, false) and response.success?
124
- @non_www = true
125
- response
126
- elsif response = request(false, true) and response.success?
127
- @non_www = false
128
- response
129
- else
130
- false
131
- end
132
- end
133
- end
134
-
135
- def timed_out?
136
- response && response.timed_out?
137
- end
138
-
139
- def doc
140
- require 'nokogiri'
141
- @doc ||= Nokogiri::HTML response.body if response
142
- end
143
-
144
- def body
145
- doc.to_s.force_encoding("UTF-8").encode("UTF-8", :invalid => :replace, :replace => "")
146
- end
147
-
148
- def government?
149
- require 'gman'
150
- Gman.valid? domain.to_s
151
- end
152
-
153
- def https?
154
- @https ||= request(true, www?).success?
155
- end
156
- alias_method :ssl?, :https?
157
-
158
- def enforce_https?
159
- return false unless https?
160
- @enforce_https ||= begin
161
- response = request(false, www?)
162
- if response.effective_url
163
- Addressable::URI.parse(response.effective_url).scheme == "https"
164
- else
165
- false
166
- end
51
+ def inspect(domain)
52
+ Domain.new(domain)
167
53
  end
168
- end
169
-
170
- def www?
171
- response && response.effective_url && !!response.effective_url.match(/^https?:\/\/www\./)
172
- end
173
-
174
- def non_www?
175
- response && @non_www
176
- end
177
-
178
- def redirect?
179
- !!redirect
180
- end
181
-
182
- def redirect
183
- @redirect ||= begin
184
- if location = request(https?, www?, false).headers["location"]
185
- redirect_domain = SiteInspector.new(location).domain
186
- redirect_domain.to_s if redirect_domain.to_s != domain.to_s
187
- end
188
- rescue
189
- nil
190
- end
191
- end
192
-
193
- def http
194
- details = {
195
- endpoints: endpoints
196
- }
197
-
198
- # convenient shorthand for the extensive statements to come
199
- combos = details[:endpoints]
200
-
201
- # A domain is "canonically" at www if:
202
- # * at least one of its www endpoints responds
203
- # * both root endpoints are either down or redirect *somewhere*
204
- # * either both root endpoints are down, *or* at least one
205
- # root endpoint redirect should immediately go to
206
- # an *internal* www endpoint
207
- # This is meant to affirm situations like:
208
- # http:// -> https:// -> https://www
209
- # https:// -> http:// -> https://www
210
- # and meant to avoid affirming situations like:
211
- # http:// -> http://non-www,
212
- # http://www -> http://non-www
213
- # or like:
214
- # https:// -> 200, http:// -> http://www
215
54
 
216
- www = !!(
217
- (
218
- combos[:https][:www][:up] or
219
- combos[:http][:www][:up]
220
- ) and (
221
- (
222
- combos[:https][:root][:redirect] or
223
- !combos[:https][:root][:up] or
224
- combos[:https][:root][:https_bad_name] or
225
- !combos[:https][:root][:status].to_s.start_with?("2")
226
- ) and (
227
- combos[:http][:root][:redirect] or
228
- !combos[:http][:root][:up] or
229
- !combos[:http][:root][:status].to_s.start_with?("2")
230
- )
231
- ) and (
232
- (
233
- (
234
- !combos[:https][:root][:up] or
235
- combos[:https][:root][:https_bad_name] or
236
- !combos[:https][:root][:status].to_s.start_with?("2")
237
- ) and
238
- (
239
- !combos[:http][:root][:up] or
240
- !combos[:http][:root][:status].to_s.start_with?("2")
241
- )
242
- ) or
243
- (
244
- combos[:https][:root][:redirect_immediately_to_www] and
245
- !combos[:https][:root][:redirect_immediately_external]
246
- ) or
247
- (
248
- combos[:http][:root][:redirect_immediately_to_www] and
249
- !combos[:http][:root][:redirect_immediately_external]
250
- )
251
- )
252
- )
253
-
254
- # A domain is "canonically" at https if:
255
- # * at least one of its https endpoints is live and
256
- # doesn't have an invalid hostname
257
- # * both http endpoints are either down or redirect *somewhere*
258
- # * at least one http endpoint redirects immediately to
259
- # an *internal* https endpoint
260
- # This is meant to affirm situations like:
261
- # http:// -> http://www -> https://
262
- # https:// -> http:// -> https://www
263
- # and meant to avoid affirming situations like:
264
- # http:// -> http://non-www
265
- # http://www -> http://non-www
266
- # or:
267
- # http:// -> 200, http://www -> https://www
268
- #
269
- # It allows a site to be canonically HTTPS if the cert has
270
- # a valid hostname but invalid chain issues.
271
-
272
- https = !!(
273
- (
274
- (
275
- combos[:https][:root][:up] and
276
- !combos[:https][:root][:https_bad_name]
277
- ) or
278
- (
279
- combos[:https][:www][:up] and
280
- !combos[:https][:www][:https_bad_name]
281
- )
282
- ) and (
283
- (
284
- combos[:http][:root][:redirect] or
285
- !combos[:http][:root][:up] or
286
- !combos[:http][:root][:status].to_s.start_with?("2")
287
- ) and (
288
- combos[:http][:www][:redirect] or
289
- !combos[:http][:www][:up] or
290
- !combos[:http][:www][:status].to_s.start_with?("2")
291
- )
292
- ) and (
293
- (
294
- combos[:http][:root][:redirect_immediately_to_https] and
295
- !combos[:http][:root][:redirect_immediately_external]
296
- ) or (
297
- combos[:http][:www][:redirect_immediately_to_https] and
298
- !combos[:http][:www][:redirect_immediately_external]
299
- )
300
- )
301
- )
302
-
303
- details[:canonical_endpoint] = www ? :www : :root
304
- details[:canonical_protocol] = https ? :https : :http
305
- details[:canonical] = uri(https, www).to_s
306
-
307
- # If any endpoint is up, the domain is up.
308
- details[:up] = !!(
309
- combos[:https][:www][:up] or
310
- combos[:https][:root][:up] or
311
- combos[:http][:www][:up] or
312
- combos[:http][:root][:up]
313
- )
314
-
315
- # A domain's root is broken if neither protocol can connect.
316
- details[:broken_root] = !!(
317
- !combos[:https][:root][:up] and
318
- !combos[:http][:root][:up]
319
- )
320
-
321
- # A domain's www is broken if neither protocol can connect.
322
- details[:broken_www] = !!(
323
- !combos[:https][:www][:up] and
324
- !combos[:http][:www][:up]
325
- )
326
-
327
- # HTTPS is "supported" (different than "canonical" or "enforced") if:
328
- #
329
- # * Either of the HTTPS endpoints is listening, and doesn't have
330
- # an invalid hostname.
331
- details[:support_https] = !!(
332
- (
333
- (combos[:https][:root][:status] != 0) and
334
- !combos[:https][:root][:https_bad_name]
335
- ) or (
336
- (combos[:https][:www][:status] != 0) and
337
- !combos[:https][:www][:https_bad_name]
338
- )
339
- )
340
-
341
- # we can say that a canonical HTTPS site "defaults" to HTTPS,
342
- # even if it doesn't *strictly* enforce it (e.g. having a www
343
- # subdomain first to go HTTP root before HTTPS root).
344
- details[:default_https] = https
345
-
346
- # HTTPS is "downgraded" if both:
347
- #
348
- # * HTTPS is supported, and
349
- # * The 'canonical' endpoint gets an immediate internal redirect to HTTP.
350
-
351
- details[:downgrade_https] = !!(
352
- details[:support_https] and
353
- (
354
- combos[:https][details[:canonical_endpoint]][:redirect] and
355
- !combos[:https][details[:canonical_endpoint]][:redirect_immediately_external] and
356
- !combos[:https][details[:canonical_endpoint]][:redirect_immediately_to_https]
357
- )
358
- )
359
-
360
- # HTTPS is enforced if one of the HTTPS endpoints is "live",
361
- # and if both *HTTP* endpoints are either:
362
- #
363
- # * down, or
364
- # * redirect immediately to HTTPS.
365
- #
366
- # This is different than whether a domain is "canonically" HTTPS.
367
- #
368
- # * an HTTP redirect can go to HTTPS on another domain, as long
369
- # as it's immediate.
370
- # * a domain with an invalid cert can still be enforcing HTTPS.
371
- details[:enforce_https] = !!(
372
- (
373
- !combos[:http][:www][:up] or
374
- (combos[:http][:www][:redirect_immediately_to_https])
375
- ) and
376
- (
377
- !combos[:http][:root][:up] or
378
- (combos[:http][:root][:redirect_immediately_to_https])
379
- ) and
380
- (
381
- combos[:https][:www][:up] or
382
- combos[:https][:root][:up]
383
- )
384
- )
385
-
386
- # The domain is a redirect if at least one endpoint is up,
387
- # and each one is *either* an external redirect or down entirely.
388
- details[:redirect] = !!(
389
- details[:up] and
390
- (
391
- combos[:http][:www][:redirect_external] or
392
- !combos[:http][:www][:up] or
393
- combos[:http][:www][:status] >= 400
394
- ) and
395
- (
396
- combos[:http][:root][:redirect_external] or
397
- !combos[:http][:root][:up] or
398
- combos[:http][:root][:status] >= 400
399
- ) and
400
- (
401
- combos[:https][:www][:redirect_external] or
402
- !combos[:https][:www][:up] or
403
- combos[:https][:www][:https_bad_name] or
404
- combos[:https][:www][:status] >= 400
405
- ) and
406
- (
407
- combos[:https][:root][:redirect_external] or
408
- !combos[:https][:root][:up] or
409
- combos[:https][:root][:https_bad_name] or
410
- combos[:https][:root][:status] >= 400
411
- )
412
- )
413
-
414
- # OK, we've said a domain is a "redirect" domain.
415
- # What does the domain redirect to?
416
- if details[:redirect]
417
- canon = combos[details[:canonical_protocol]][details[:canonical_endpoint]]
418
- details[:redirect_to] = canon[:redirect_to]
419
- else
420
- details[:redirect_to] = nil
421
- end
422
-
423
- # HSTS on the canonical domain? (valid HTTPS checked in endpoint)
424
- details[:hsts] = !!combos[:https][details[:canonical_endpoint]][:hsts]
425
- details[:hsts_header] = combos[:https][details[:canonical_endpoint]][:hsts_header]
426
-
427
- # HSTS on the entire domain?
428
- details[:hsts_entire_domain] = !!(
429
- combos[:https][:root][:hsts] and
430
- combos[:https][:root][:hsts_details][:include_subdomains]
431
- )
432
-
433
- # HSTS preload-ready for the entire domain?
434
- #
435
- # Re-checks :hsts_entire_domain in case the :preload_ready
436
- # flag ever changes its definition to not require include_subdomains.
437
-
438
- details[:hsts_entire_domain_preload] = !!(
439
- details[:hsts_entire_domain] and
440
- combos[:https][:root][:hsts_details][:preload_ready]
441
- )
442
-
443
- details
444
- end
445
-
446
- def endpoints
447
- https_www = http_endpoint(true, true)
448
- http_www = http_endpoint(false, true)
449
- https_root = http_endpoint(true, false)
450
- http_root = http_endpoint(false, false)
451
-
452
- {
453
- https: {
454
- www: https_www,
455
- root: https_root
456
- },
457
- http: {
458
- www: http_www,
459
- root: http_root
55
+ def typhoeus_defaults
56
+ defaults = {
57
+ followlocation: false,
58
+ timeout: SiteInspector.timeout,
59
+ accept_encoding: 'gzip',
60
+ method: :head,
61
+ headers: {
62
+ 'User-Agent' => "Mozilla/5.0 (compatible; SiteInspector/#{SiteInspector::VERSION}; +https://github.com/benbalter/site-inspector)"
63
+ }
460
64
  }
461
- }
462
- end
463
-
464
- # State of affairs at a particular endpoint.
465
- def http_endpoint(ssl, www)
466
- details = {}
467
-
468
- # Don't follow redirects for first ping.
469
- response = request(ssl, www, false)
470
-
471
-
472
- # For HTTPS: examine the full range of possibilities.
473
- if ssl
474
- if response.return_code == :ok
475
- details[:https_valid] = true
476
- details[:https_bad_chain] = false
477
- details[:https_bad_name] = false
478
-
479
- # Bad certificate chain.
480
- elsif response.return_code == :ssl_cacert
481
- details[:https_valid] = false
482
- details[:https_bad_chain] = true
483
- response = request(ssl, www, false, false, true)
484
- # Bad everything.
485
- if response.return_code == :peer_failed_verification
486
- details[:https_bad_name] = true
487
- response = request(ssl, www, false, false, false)
488
- end
489
-
490
- # Bad hostname.
491
- elsif response.return_code == :peer_failed_verification
492
- details[:https_valid] = false
493
- details[:https_bad_name] = true
494
- response = request(ssl, www, false, true, false)
495
- # Bad everything.
496
- if response.return_code == :ssl_cacert
497
- details[:https_bad_chain] = true
498
- response = request(ssl, www, false, false, false)
499
- end
500
-
501
- # not sure what else would happen
502
- elsif response.response_code != 0
503
- details[:https_valid] = false
504
- details[:https_unknown_issue] = response.return_code
505
- end
65
+ defaults.merge! @typhoeus_options if @typhoeus_options
66
+ defaults
506
67
  end
507
68
 
508
- # If we ended up with a failure, return it.
509
- details[:status] = response.response_code
510
- details[:up] = (response.response_code != 0)
511
- return details if !details[:up]
512
-
513
- headers = Hash[response.headers.map{ |k,v| [k.downcase,v] }]
514
- details[:headers] = headers
515
-
516
-
517
- # HSTS only takes effect when delivered over valid HTTPS.
518
- hsts = SiteInspector.hsts_parse(headers["strict-transport-security"])
519
-
520
- details[:hsts] = !!(
521
- ssl and
522
- details[:https_valid] and
523
- hsts[:enabled]
524
- )
525
-
526
- details[:hsts_header] = headers["strict-transport-security"]
527
- details[:hsts_details] = hsts
528
-
529
-
530
- # If it's a redirect, go find the ultimate response starting from this combo.
531
- redirect_code = response.response_code.to_s.start_with?("3")
532
- location_header = headers["location"]
533
- if redirect_code and location_header
534
- location_header = location_header.downcase
535
- details[:redirect] = true
536
-
537
- ultimate_response = request(ssl, www, true, !details[:https_bad_chain], !details[:https_bad_name])
538
- uri_original = URI(ultimate_response.request.url)
539
-
540
- # treat relative Location headers as having the original hostname
541
- if location_header.start_with?("http:") or location_header.start_with?("https:")
542
- uri_immediate = URI(URI.escape(location_header))
543
- else
544
- uri_immediate = URI.join(uri_original, URI.escape(location_header))
545
- end
546
-
547
- uri_eventual = URI(ultimate_response.effective_url.downcase)
548
-
549
- # compare base domain names
550
- base_original = PublicSuffix.parse(uri_original.hostname).domain
551
-
552
- # if the redirects aren't to valid hostnames (e.g. IP addresses)
553
- # then fine just compare them directly, they're not going to be
554
- # identical anyway.
555
- base_immediate = begin
556
- PublicSuffix.parse(uri_immediate.hostname).domain
557
- rescue PublicSuffix::DomainInvalid
558
- uri_immediate.to_s
559
- end
560
-
561
- base_eventual = begin
562
- PublicSuffix.parse(uri_eventual.hostname).domain
563
- rescue PublicSuffix::DomainInvalid
564
- uri_eventual.to_s
565
- end
566
-
567
- details[:redirect_immediately_to] = uri_immediate.to_s
568
- details[:redirect_immediately_to_www] = !!uri_immediate.to_s.match(/^https?:\/\/www\./)
569
- details[:redirect_immediately_to_https] = uri_immediate.to_s.start_with?("https://")
570
- details[:redirect_immediately_external] = (base_original != base_immediate)
571
-
572
- details[:redirect_to] = uri_eventual.to_s
573
- details[:redirect_external] = (base_original != base_eventual)
574
-
575
- # otherwise, mark all the redirect fields as false/null
576
- else
577
- details[:redirect] = false
578
- details[:redirect_immediately_to] = nil
579
- details[:redirect_immediately_to_www] = false
580
- details[:redirect_immediately_to_https] = false
581
- details[:redirect_immediately_external] = false
582
-
583
- details[:redirect_to] = nil
584
- details[:redirect_external] = false
69
+ # Returns a thread-safe, memoized hydra instance
70
+ def hydra
71
+ Typhoeus::Hydra.hydra
585
72
  end
586
-
587
- details
588
73
  end
74
+ end
589
75
 
590
- def to_hash(http_only=false)
591
- if http_only
592
- {
593
- :domain => domain.to_s,
594
- :uri => uri.to_s,
595
- :live => !!response,
596
- :ssl => https?,
597
- :enforce_https => enforce_https?,
598
- :non_www => non_www?,
599
- :redirect => redirect,
600
- :headers => headers
601
- }
602
- else
603
- {
604
- :domain => domain.to_s,
605
- :uri => uri.to_s,
606
- :government => government?,
607
- :live => !!response,
608
- :ssl => https?,
609
- :enforce_https => enforce_https?,
610
- :non_www => non_www?,
611
- :redirect => redirect,
612
- :ip => ip,
613
- :hostname => hostname.to_s,
614
- :ipv6 => ipv6?,
615
- :dnssec => dnssec?,
616
- :cdn => cdn,
617
- :google_apps => google_apps?,
618
- :cloud_provider => cloud_provider,
619
- :server => server,
620
- :cms => cms,
621
- :analytics => analytics,
622
- :javascript => javascript,
623
- :advertising => advertising,
624
- :slash_data => slash_data?,
625
- :slash_developer => slash_developer?,
626
- :data_dot_json => data_dot_json?,
627
- :click_jacking_protection => click_jacking_protection?,
628
- :content_security_policy => content_security_policy?,
629
- :xss_protection => xss_protection?,
630
- :secure_cookies => secure_cookies?,
631
- :strict_transport_security => strict_transport_security?,
632
- :headers => headers
633
- }
634
- end
635
- end
76
+ if ENV['DEBUG']
77
+ Ethon.logger = Logger.new($stdout)
78
+ Ethon.logger.level = Logger::DEBUG
79
+ Typhoeus::Config.verbose = true
636
80
  end
81
+
82
+ Typhoeus::Config.memoize = true
83
+ Typhoeus::Config.cache = SiteInspector.cache