site-inspector 1.0.2 → 3.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. checksums.yaml +5 -5
  2. data/.gitignore +8 -0
  3. data/.rubocop.yml +42 -0
  4. data/.rubocop_todo.yml +139 -0
  5. data/.ruby-version +1 -0
  6. data/.travis.yml +9 -0
  7. data/Gemfile +7 -0
  8. data/Guardfile +10 -0
  9. data/README.md +189 -0
  10. data/Rakefile +10 -0
  11. data/bin/site-inspector +50 -22
  12. data/lib/cliver/dependency_ext.rb +24 -0
  13. data/lib/site-inspector.rb +62 -615
  14. data/lib/site-inspector/cache.rb +10 -51
  15. data/lib/site-inspector/checks/accessibility.rb +135 -0
  16. data/lib/site-inspector/checks/check.rb +54 -0
  17. data/lib/site-inspector/checks/content.rb +85 -0
  18. data/lib/site-inspector/checks/cookies.rb +45 -0
  19. data/lib/site-inspector/checks/dns.rb +138 -0
  20. data/lib/site-inspector/checks/headers.rb +68 -0
  21. data/lib/site-inspector/checks/hsts.rb +81 -0
  22. data/lib/site-inspector/checks/https.rb +40 -0
  23. data/lib/site-inspector/checks/sniffer.rb +67 -0
  24. data/lib/site-inspector/checks/wappalyzer.rb +62 -0
  25. data/lib/site-inspector/checks/whois.rb +36 -0
  26. data/lib/site-inspector/disk_cache.rb +42 -0
  27. data/lib/site-inspector/domain.rb +271 -0
  28. data/lib/site-inspector/endpoint.rb +217 -0
  29. data/lib/site-inspector/rails_cache.rb +13 -0
  30. data/lib/site-inspector/version.rb +5 -0
  31. data/package-lock.json +505 -0
  32. data/package.json +23 -0
  33. data/script/bootstrap +2 -0
  34. data/script/cibuild +11 -0
  35. data/script/console +3 -0
  36. data/script/pa11y-version +10 -0
  37. data/script/release +38 -0
  38. data/site-inspector.gemspec +42 -0
  39. data/spec/checks/site_inspector_endpoint_accessibility_spec.rb +84 -0
  40. data/spec/checks/site_inspector_endpoint_check_spec.rb +42 -0
  41. data/spec/checks/site_inspector_endpoint_content_spec.rb +117 -0
  42. data/spec/checks/site_inspector_endpoint_cookies_spec.rb +73 -0
  43. data/spec/checks/site_inspector_endpoint_dns_spec.rb +184 -0
  44. data/spec/checks/site_inspector_endpoint_headers_spec.rb +65 -0
  45. data/spec/checks/site_inspector_endpoint_hsts_spec.rb +92 -0
  46. data/spec/checks/site_inspector_endpoint_https_spec.rb +49 -0
  47. data/spec/checks/site_inspector_endpoint_sniffer_spec.rb +150 -0
  48. data/spec/checks/site_inspector_endpoint_wappalyzer_spec.rb +34 -0
  49. data/spec/checks/site_inspector_endpoint_whois_spec.rb +26 -0
  50. data/spec/fixtures/wappalyzer.json +125 -0
  51. data/spec/site_inspector_cache_spec.rb +15 -0
  52. data/spec/site_inspector_disk_cache_spec.rb +39 -0
  53. data/spec/site_inspector_domain_spec.rb +271 -0
  54. data/spec/site_inspector_endpoint_spec.rb +252 -0
  55. data/spec/site_inspector_spec.rb +48 -0
  56. data/spec/spec_helper.rb +19 -0
  57. metadata +204 -63
  58. data/lib/site-inspector/compliance.rb +0 -19
  59. data/lib/site-inspector/dns.rb +0 -92
  60. data/lib/site-inspector/headers.rb +0 -59
  61. data/lib/site-inspector/sniffer.rb +0 -26
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cliver
4
+ class Dependency
5
+ # Memoized shortcut for detect
6
+ # Returns the path to the detected dependency
7
+ # Raises an error if the dependency was not satisfied
8
+ def path
9
+ @path ||= detect!
10
+ end
11
+
12
+ # Returns the version of the resolved dependency
13
+ def version
14
+ return @version if defined? @version
15
+
16
+ version = installed_versions.find { |p, _v| p == path }
17
+ @detected_version = version.nil? ? nil : version[1]
18
+ end
19
+
20
+ def major_version
21
+ version&.split('.')&.first
22
+ end
23
+ end
24
+ end
@@ -1,636 +1,83 @@
1
+ # frozen_string_literal: true
1
2
 
2
- # needed for HTTP analysis
3
3
  require 'open-uri'
4
- require "addressable/uri"
4
+ require 'addressable/uri'
5
5
  require 'public_suffix'
6
6
  require 'typhoeus'
7
+ require 'parallel'
8
+ require 'cliver'
9
+ require 'whois'
10
+ require 'cgi'
11
+ require 'resolv'
12
+ require 'dotenv/load'
7
13
 
8
14
  require_relative 'site-inspector/cache'
9
- require_relative 'site-inspector/headers'
10
- require_relative 'site-inspector/sniffer'
11
- require_relative 'site-inspector/dns'
12
- require_relative 'site-inspector/compliance'
13
-
14
-
15
- if ENV['CACHE']
16
- Typhoeus::Config.cache = SiteInspectorDiskCache.new(ENV['CACHE'], ENV['CACHE_REPLACE'])
17
- else
18
- Typhoeus::Config.cache = SiteInspectorCache.new
19
- end
15
+ require_relative 'site-inspector/disk_cache'
16
+ require_relative 'site-inspector/rails_cache'
17
+ require_relative 'site-inspector/domain'
18
+ require_relative 'site-inspector/checks/check'
19
+ require_relative 'site-inspector/checks/accessibility'
20
+ require_relative 'site-inspector/checks/content'
21
+ require_relative 'site-inspector/checks/dns'
22
+ require_relative 'site-inspector/checks/headers'
23
+ require_relative 'site-inspector/checks/hsts'
24
+ require_relative 'site-inspector/checks/https'
25
+ require_relative 'site-inspector/checks/sniffer'
26
+ require_relative 'site-inspector/checks/cookies'
27
+ require_relative 'site-inspector/checks/whois'
28
+ require_relative 'site-inspector/checks/wappalyzer'
29
+ require_relative 'site-inspector/endpoint'
30
+ require_relative 'site-inspector/version'
31
+ require_relative 'cliver/dependency_ext'
20
32
 
21
33
  class SiteInspector
22
-
23
- def self.load_data(name)
24
- require 'yaml'
25
- YAML.load_file File.expand_path "./data/#{name}.yml", File.dirname(__FILE__)
26
- end
27
-
28
- # Utility parser for HSTS headers.
29
- # RFC: http://tools.ietf.org/html/rfc6797
30
- def self.hsts_parse(header)
31
- # no hsts for you
32
- nothing = {
33
- max_age: nil,
34
- include_subdomains: false,
35
- preload: false,
36
- enabled: false,
37
- preload_ready: false
38
- }
39
-
40
- return nothing unless header and header.is_a?(String)
41
-
42
- directives = header.split(/\s*;\s*/)
43
-
44
- pairs = []
45
- directives.each do |directive|
46
- name, value = directive.downcase.split("=")
47
-
48
- if value and value.start_with?("\"") and value.end_with?("\"")
49
- value = value.sub(/^\"/, '')
50
- value = value.sub(/\"$/, '')
51
- end
52
-
53
- pairs.push([name, value])
34
+ class << self
35
+ attr_writer :timeout, :cache, :typhoeus_options
36
+
37
+ def cache
38
+ @cache ||= if ENV['CACHE']
39
+ SiteInspector::DiskCache.new
40
+ elsif Object.const_defined?('Rails')
41
+ SiteInspector::RailsCache.new
42
+ else
43
+ SiteInspector::Cache.new
44
+ end
54
45
  end
55
46
 
56
- # reject invalid directives
57
- fatal = pairs.any? do |name, value|
58
- # TODO: more comprehensive rejection of characters
59
- invalid_chars = /[\s\'\"]/
60
- (name =~ invalid_chars) or (value =~ invalid_chars)
47
+ def timeout
48
+ @timeout || 10
61
49
  end
62
50
 
63
- # good DAY, sir
64
- return nothing if fatal
65
-
66
- max_age_directive = pairs.find {|n, v| n == "max-age"}
67
- max_age = max_age_directive ? max_age_directive[1].to_i : nil
68
- include_subdomains = !!pairs.find {|n, v| n == "includesubdomains"}
69
- preload = !!pairs.find {|n, v| n == "preload"}
70
-
71
- enabled = !!(max_age and (max_age > 0))
72
-
73
- # Google's minimum max-age for automatic preloading
74
- eighteen_weeks = !!(max_age and (max_age >= 10886400))
75
- preload_ready = !!(eighteen_weeks and include_subdomains and preload)
76
-
77
- {
78
- max_age: max_age,
79
- include_subdomains: include_subdomains,
80
- preload: preload,
81
- enabled: enabled,
82
- preload_ready: preload_ready
83
- }
84
- end
85
-
86
- # makes no network requests
87
- def initialize(domain, options = {})
88
- domain = domain.downcase
89
- domain = domain.sub /^https?\:/, ""
90
- domain = domain.sub /^\/+/, ""
91
- domain = domain.sub /^www\./, ""
92
- @uri = Addressable::URI.parse "//#{domain}"
93
- @domain = PublicSuffix.parse @uri.host
94
- @timeout = options[:timeout] || 10
95
- end
96
-
97
- def inspect
98
- "<SiteInspector domain=\"#{domain}\">"
99
- end
100
-
101
- def uri(ssl=enforce_https?,www=www?)
102
- uri = @uri.clone
103
- uri.host = www ? "www.#{uri.host}" : uri.host
104
- uri.scheme = ssl ? "https" : "http"
105
- uri
106
- end
107
-
108
- def domain
109
- www? ? PublicSuffix.parse("www.#{@uri.host}") : @domain
110
- end
111
-
112
- def request(ssl=false, www=false, followlocation=true, ssl_verifypeer=true, ssl_verifyhost=true)
113
- to_get = uri(ssl, www)
114
-
115
- # debugging
116
- # puts "fetching: #{to_get}, #{followlocation ? "follow" : "no follow"}, #{ssl_verifypeer ? "verify peer, " : ""}#{ssl_verifyhost ? "verify host" : ""}"
117
-
118
- Typhoeus.get(to_get, followlocation: followlocation, ssl_verifypeer: ssl_verifypeer, ssl_verifyhost: (ssl_verifyhost ? 2 : 0), timeout: @timeout)
119
- end
120
-
121
- def response
122
- @response ||= begin
123
- if response = request(false, false) and response.success?
124
- @non_www = true
125
- response
126
- elsif response = request(false, true) and response.success?
127
- @non_www = false
128
- response
129
- else
130
- false
131
- end
132
- end
133
- end
134
-
135
- def timed_out?
136
- response && response.timed_out?
137
- end
138
-
139
- def doc
140
- require 'nokogiri'
141
- @doc ||= Nokogiri::HTML response.body if response
142
- end
143
-
144
- def body
145
- doc.to_s.force_encoding("UTF-8").encode("UTF-8", :invalid => :replace, :replace => "")
146
- end
147
-
148
- def government?
149
- require 'gman'
150
- Gman.valid? domain.to_s
151
- end
152
-
153
- def https?
154
- @https ||= request(true, www?).success?
155
- end
156
- alias_method :ssl?, :https?
157
-
158
- def enforce_https?
159
- return false unless https?
160
- @enforce_https ||= begin
161
- response = request(false, www?)
162
- if response.effective_url
163
- Addressable::URI.parse(response.effective_url).scheme == "https"
164
- else
165
- false
166
- end
51
+ def inspect(domain)
52
+ Domain.new(domain)
167
53
  end
168
- end
169
-
170
- def www?
171
- response && response.effective_url && !!response.effective_url.match(/^https?:\/\/www\./)
172
- end
173
-
174
- def non_www?
175
- response && @non_www
176
- end
177
-
178
- def redirect?
179
- !!redirect
180
- end
181
-
182
- def redirect
183
- @redirect ||= begin
184
- if location = request(https?, www?, false).headers["location"]
185
- redirect_domain = SiteInspector.new(location).domain
186
- redirect_domain.to_s if redirect_domain.to_s != domain.to_s
187
- end
188
- rescue
189
- nil
190
- end
191
- end
192
-
193
- def http
194
- details = {
195
- endpoints: endpoints
196
- }
197
-
198
- # convenient shorthand for the extensive statements to come
199
- combos = details[:endpoints]
200
-
201
- # A domain is "canonically" at www if:
202
- # * at least one of its www endpoints responds
203
- # * both root endpoints are either down or redirect *somewhere*
204
- # * either both root endpoints are down, *or* at least one
205
- # root endpoint redirect should immediately go to
206
- # an *internal* www endpoint
207
- # This is meant to affirm situations like:
208
- # http:// -> https:// -> https://www
209
- # https:// -> http:// -> https://www
210
- # and meant to avoid affirming situations like:
211
- # http:// -> http://non-www,
212
- # http://www -> http://non-www
213
- # or like:
214
- # https:// -> 200, http:// -> http://www
215
54
 
216
- www = !!(
217
- (
218
- combos[:https][:www][:up] or
219
- combos[:http][:www][:up]
220
- ) and (
221
- (
222
- combos[:https][:root][:redirect] or
223
- !combos[:https][:root][:up] or
224
- combos[:https][:root][:https_bad_name] or
225
- !combos[:https][:root][:status].to_s.start_with?("2")
226
- ) and (
227
- combos[:http][:root][:redirect] or
228
- !combos[:http][:root][:up] or
229
- !combos[:http][:root][:status].to_s.start_with?("2")
230
- )
231
- ) and (
232
- (
233
- (
234
- !combos[:https][:root][:up] or
235
- combos[:https][:root][:https_bad_name] or
236
- !combos[:https][:root][:status].to_s.start_with?("2")
237
- ) and
238
- (
239
- !combos[:http][:root][:up] or
240
- !combos[:http][:root][:status].to_s.start_with?("2")
241
- )
242
- ) or
243
- (
244
- combos[:https][:root][:redirect_immediately_to_www] and
245
- !combos[:https][:root][:redirect_immediately_external]
246
- ) or
247
- (
248
- combos[:http][:root][:redirect_immediately_to_www] and
249
- !combos[:http][:root][:redirect_immediately_external]
250
- )
251
- )
252
- )
253
-
254
- # A domain is "canonically" at https if:
255
- # * at least one of its https endpoints is live and
256
- # doesn't have an invalid hostname
257
- # * both http endpoints are either down or redirect *somewhere*
258
- # * at least one http endpoint redirects immediately to
259
- # an *internal* https endpoint
260
- # This is meant to affirm situations like:
261
- # http:// -> http://www -> https://
262
- # https:// -> http:// -> https://www
263
- # and meant to avoid affirming situations like:
264
- # http:// -> http://non-www
265
- # http://www -> http://non-www
266
- # or:
267
- # http:// -> 200, http://www -> https://www
268
- #
269
- # It allows a site to be canonically HTTPS if the cert has
270
- # a valid hostname but invalid chain issues.
271
-
272
- https = !!(
273
- (
274
- (
275
- combos[:https][:root][:up] and
276
- !combos[:https][:root][:https_bad_name]
277
- ) or
278
- (
279
- combos[:https][:www][:up] and
280
- !combos[:https][:www][:https_bad_name]
281
- )
282
- ) and (
283
- (
284
- combos[:http][:root][:redirect] or
285
- !combos[:http][:root][:up] or
286
- !combos[:http][:root][:status].to_s.start_with?("2")
287
- ) and (
288
- combos[:http][:www][:redirect] or
289
- !combos[:http][:www][:up] or
290
- !combos[:http][:www][:status].to_s.start_with?("2")
291
- )
292
- ) and (
293
- (
294
- combos[:http][:root][:redirect_immediately_to_https] and
295
- !combos[:http][:root][:redirect_immediately_external]
296
- ) or (
297
- combos[:http][:www][:redirect_immediately_to_https] and
298
- !combos[:http][:www][:redirect_immediately_external]
299
- )
300
- )
301
- )
302
-
303
- details[:canonical_endpoint] = www ? :www : :root
304
- details[:canonical_protocol] = https ? :https : :http
305
- details[:canonical] = uri(https, www).to_s
306
-
307
- # If any endpoint is up, the domain is up.
308
- details[:up] = !!(
309
- combos[:https][:www][:up] or
310
- combos[:https][:root][:up] or
311
- combos[:http][:www][:up] or
312
- combos[:http][:root][:up]
313
- )
314
-
315
- # A domain's root is broken if neither protocol can connect.
316
- details[:broken_root] = !!(
317
- !combos[:https][:root][:up] and
318
- !combos[:http][:root][:up]
319
- )
320
-
321
- # A domain's www is broken if neither protocol can connect.
322
- details[:broken_www] = !!(
323
- !combos[:https][:www][:up] and
324
- !combos[:http][:www][:up]
325
- )
326
-
327
- # HTTPS is "supported" (different than "canonical" or "enforced") if:
328
- #
329
- # * Either of the HTTPS endpoints is listening, and doesn't have
330
- # an invalid hostname.
331
- details[:support_https] = !!(
332
- (
333
- (combos[:https][:root][:status] != 0) and
334
- !combos[:https][:root][:https_bad_name]
335
- ) or (
336
- (combos[:https][:www][:status] != 0) and
337
- !combos[:https][:www][:https_bad_name]
338
- )
339
- )
340
-
341
- # we can say that a canonical HTTPS site "defaults" to HTTPS,
342
- # even if it doesn't *strictly* enforce it (e.g. having a www
343
- # subdomain first to go HTTP root before HTTPS root).
344
- details[:default_https] = https
345
-
346
- # HTTPS is "downgraded" if both:
347
- #
348
- # * HTTPS is supported, and
349
- # * The 'canonical' endpoint gets an immediate internal redirect to HTTP.
350
-
351
- details[:downgrade_https] = !!(
352
- details[:support_https] and
353
- (
354
- combos[:https][details[:canonical_endpoint]][:redirect] and
355
- !combos[:https][details[:canonical_endpoint]][:redirect_immediately_external] and
356
- !combos[:https][details[:canonical_endpoint]][:redirect_immediately_to_https]
357
- )
358
- )
359
-
360
- # HTTPS is enforced if one of the HTTPS endpoints is "live",
361
- # and if both *HTTP* endpoints are either:
362
- #
363
- # * down, or
364
- # * redirect immediately to HTTPS.
365
- #
366
- # This is different than whether a domain is "canonically" HTTPS.
367
- #
368
- # * an HTTP redirect can go to HTTPS on another domain, as long
369
- # as it's immediate.
370
- # * a domain with an invalid cert can still be enforcing HTTPS.
371
- details[:enforce_https] = !!(
372
- (
373
- !combos[:http][:www][:up] or
374
- (combos[:http][:www][:redirect_immediately_to_https])
375
- ) and
376
- (
377
- !combos[:http][:root][:up] or
378
- (combos[:http][:root][:redirect_immediately_to_https])
379
- ) and
380
- (
381
- combos[:https][:www][:up] or
382
- combos[:https][:root][:up]
383
- )
384
- )
385
-
386
- # The domain is a redirect if at least one endpoint is up,
387
- # and each one is *either* an external redirect or down entirely.
388
- details[:redirect] = !!(
389
- details[:up] and
390
- (
391
- combos[:http][:www][:redirect_external] or
392
- !combos[:http][:www][:up] or
393
- combos[:http][:www][:status] >= 400
394
- ) and
395
- (
396
- combos[:http][:root][:redirect_external] or
397
- !combos[:http][:root][:up] or
398
- combos[:http][:root][:status] >= 400
399
- ) and
400
- (
401
- combos[:https][:www][:redirect_external] or
402
- !combos[:https][:www][:up] or
403
- combos[:https][:www][:https_bad_name] or
404
- combos[:https][:www][:status] >= 400
405
- ) and
406
- (
407
- combos[:https][:root][:redirect_external] or
408
- !combos[:https][:root][:up] or
409
- combos[:https][:root][:https_bad_name] or
410
- combos[:https][:root][:status] >= 400
411
- )
412
- )
413
-
414
- # OK, we've said a domain is a "redirect" domain.
415
- # What does the domain redirect to?
416
- if details[:redirect]
417
- canon = combos[details[:canonical_protocol]][details[:canonical_endpoint]]
418
- details[:redirect_to] = canon[:redirect_to]
419
- else
420
- details[:redirect_to] = nil
421
- end
422
-
423
- # HSTS on the canonical domain? (valid HTTPS checked in endpoint)
424
- details[:hsts] = !!combos[:https][details[:canonical_endpoint]][:hsts]
425
- details[:hsts_header] = combos[:https][details[:canonical_endpoint]][:hsts_header]
426
-
427
- # HSTS on the entire domain?
428
- details[:hsts_entire_domain] = !!(
429
- combos[:https][:root][:hsts] and
430
- combos[:https][:root][:hsts_details][:include_subdomains]
431
- )
432
-
433
- # HSTS preload-ready for the entire domain?
434
- #
435
- # Re-checks :hsts_entire_domain in case the :preload_ready
436
- # flag ever changes its definition to not require include_subdomains.
437
-
438
- details[:hsts_entire_domain_preload] = !!(
439
- details[:hsts_entire_domain] and
440
- combos[:https][:root][:hsts_details][:preload_ready]
441
- )
442
-
443
- details
444
- end
445
-
446
- def endpoints
447
- https_www = http_endpoint(true, true)
448
- http_www = http_endpoint(false, true)
449
- https_root = http_endpoint(true, false)
450
- http_root = http_endpoint(false, false)
451
-
452
- {
453
- https: {
454
- www: https_www,
455
- root: https_root
456
- },
457
- http: {
458
- www: http_www,
459
- root: http_root
55
+ def typhoeus_defaults
56
+ defaults = {
57
+ followlocation: false,
58
+ timeout: SiteInspector.timeout,
59
+ accept_encoding: 'gzip',
60
+ method: :head,
61
+ headers: {
62
+ 'User-Agent' => "Mozilla/5.0 (compatible; SiteInspector/#{SiteInspector::VERSION}; +https://github.com/benbalter/site-inspector)"
63
+ }
460
64
  }
461
- }
462
- end
463
-
464
- # State of affairs at a particular endpoint.
465
- def http_endpoint(ssl, www)
466
- details = {}
467
-
468
- # Don't follow redirects for first ping.
469
- response = request(ssl, www, false)
470
-
471
-
472
- # For HTTPS: examine the full range of possibilities.
473
- if ssl
474
- if response.return_code == :ok
475
- details[:https_valid] = true
476
- details[:https_bad_chain] = false
477
- details[:https_bad_name] = false
478
-
479
- # Bad certificate chain.
480
- elsif response.return_code == :ssl_cacert
481
- details[:https_valid] = false
482
- details[:https_bad_chain] = true
483
- response = request(ssl, www, false, false, true)
484
- # Bad everything.
485
- if response.return_code == :peer_failed_verification
486
- details[:https_bad_name] = true
487
- response = request(ssl, www, false, false, false)
488
- end
489
-
490
- # Bad hostname.
491
- elsif response.return_code == :peer_failed_verification
492
- details[:https_valid] = false
493
- details[:https_bad_name] = true
494
- response = request(ssl, www, false, true, false)
495
- # Bad everything.
496
- if response.return_code == :ssl_cacert
497
- details[:https_bad_chain] = true
498
- response = request(ssl, www, false, false, false)
499
- end
500
-
501
- # not sure what else would happen
502
- elsif response.response_code != 0
503
- details[:https_valid] = false
504
- details[:https_unknown_issue] = response.return_code
505
- end
65
+ defaults.merge! @typhoeus_options if @typhoeus_options
66
+ defaults
506
67
  end
507
68
 
508
- # If we ended up with a failure, return it.
509
- details[:status] = response.response_code
510
- details[:up] = (response.response_code != 0)
511
- return details if !details[:up]
512
-
513
- headers = Hash[response.headers.map{ |k,v| [k.downcase,v] }]
514
- details[:headers] = headers
515
-
516
-
517
- # HSTS only takes effect when delivered over valid HTTPS.
518
- hsts = SiteInspector.hsts_parse(headers["strict-transport-security"])
519
-
520
- details[:hsts] = !!(
521
- ssl and
522
- details[:https_valid] and
523
- hsts[:enabled]
524
- )
525
-
526
- details[:hsts_header] = headers["strict-transport-security"]
527
- details[:hsts_details] = hsts
528
-
529
-
530
- # If it's a redirect, go find the ultimate response starting from this combo.
531
- redirect_code = response.response_code.to_s.start_with?("3")
532
- location_header = headers["location"]
533
- if redirect_code and location_header
534
- location_header = location_header.downcase
535
- details[:redirect] = true
536
-
537
- ultimate_response = request(ssl, www, true, !details[:https_bad_chain], !details[:https_bad_name])
538
- uri_original = URI(ultimate_response.request.url)
539
-
540
- # treat relative Location headers as having the original hostname
541
- if location_header.start_with?("http:") or location_header.start_with?("https:")
542
- uri_immediate = URI(URI.escape(location_header))
543
- else
544
- uri_immediate = URI.join(uri_original, URI.escape(location_header))
545
- end
546
-
547
- uri_eventual = URI(ultimate_response.effective_url.downcase)
548
-
549
- # compare base domain names
550
- base_original = PublicSuffix.parse(uri_original.hostname).domain
551
-
552
- # if the redirects aren't to valid hostnames (e.g. IP addresses)
553
- # then fine just compare them directly, they're not going to be
554
- # identical anyway.
555
- base_immediate = begin
556
- PublicSuffix.parse(uri_immediate.hostname).domain
557
- rescue PublicSuffix::DomainInvalid
558
- uri_immediate.to_s
559
- end
560
-
561
- base_eventual = begin
562
- PublicSuffix.parse(uri_eventual.hostname).domain
563
- rescue PublicSuffix::DomainInvalid
564
- uri_eventual.to_s
565
- end
566
-
567
- details[:redirect_immediately_to] = uri_immediate.to_s
568
- details[:redirect_immediately_to_www] = !!uri_immediate.to_s.match(/^https?:\/\/www\./)
569
- details[:redirect_immediately_to_https] = uri_immediate.to_s.start_with?("https://")
570
- details[:redirect_immediately_external] = (base_original != base_immediate)
571
-
572
- details[:redirect_to] = uri_eventual.to_s
573
- details[:redirect_external] = (base_original != base_eventual)
574
-
575
- # otherwise, mark all the redirect fields as false/null
576
- else
577
- details[:redirect] = false
578
- details[:redirect_immediately_to] = nil
579
- details[:redirect_immediately_to_www] = false
580
- details[:redirect_immediately_to_https] = false
581
- details[:redirect_immediately_external] = false
582
-
583
- details[:redirect_to] = nil
584
- details[:redirect_external] = false
69
+ # Returns a thread-safe, memoized hydra instance
70
+ def hydra
71
+ Typhoeus::Hydra.hydra
585
72
  end
586
-
587
- details
588
73
  end
74
+ end
589
75
 
590
- def to_hash(http_only=false)
591
- if http_only
592
- {
593
- :domain => domain.to_s,
594
- :uri => uri.to_s,
595
- :live => !!response,
596
- :ssl => https?,
597
- :enforce_https => enforce_https?,
598
- :non_www => non_www?,
599
- :redirect => redirect,
600
- :headers => headers
601
- }
602
- else
603
- {
604
- :domain => domain.to_s,
605
- :uri => uri.to_s,
606
- :government => government?,
607
- :live => !!response,
608
- :ssl => https?,
609
- :enforce_https => enforce_https?,
610
- :non_www => non_www?,
611
- :redirect => redirect,
612
- :ip => ip,
613
- :hostname => hostname.to_s,
614
- :ipv6 => ipv6?,
615
- :dnssec => dnssec?,
616
- :cdn => cdn,
617
- :google_apps => google_apps?,
618
- :cloud_provider => cloud_provider,
619
- :server => server,
620
- :cms => cms,
621
- :analytics => analytics,
622
- :javascript => javascript,
623
- :advertising => advertising,
624
- :slash_data => slash_data?,
625
- :slash_developer => slash_developer?,
626
- :data_dot_json => data_dot_json?,
627
- :click_jacking_protection => click_jacking_protection?,
628
- :content_security_policy => content_security_policy?,
629
- :xss_protection => xss_protection?,
630
- :secure_cookies => secure_cookies?,
631
- :strict_transport_security => strict_transport_security?,
632
- :headers => headers
633
- }
634
- end
635
- end
76
+ if ENV['DEBUG']
77
+ Ethon.logger = Logger.new($stdout)
78
+ Ethon.logger.level = Logger::DEBUG
79
+ Typhoeus::Config.verbose = true
636
80
  end
81
+
82
+ Typhoeus::Config.memoize = true
83
+ Typhoeus::Config.cache = SiteInspector.cache