RubyGems - site-inspector - Versions diffs - 1.0.2 → 3.2.0 - Mend

site-inspector 1.0.2 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

checksums.yaml +5 -5
data/.gitignore +8 -0
data/.rubocop.yml +42 -0
data/.rubocop_todo.yml +139 -0
data/.ruby-version +1 -0
data/.travis.yml +9 -0
data/Gemfile +7 -0
data/Guardfile +10 -0
data/README.md +189 -0
data/Rakefile +10 -0
data/bin/site-inspector +50 -22
data/lib/cliver/dependency_ext.rb +24 -0
data/lib/site-inspector.rb +62 -615
data/lib/site-inspector/cache.rb +10 -51
data/lib/site-inspector/checks/accessibility.rb +135 -0
data/lib/site-inspector/checks/check.rb +54 -0
data/lib/site-inspector/checks/content.rb +85 -0
data/lib/site-inspector/checks/cookies.rb +45 -0
data/lib/site-inspector/checks/dns.rb +138 -0
data/lib/site-inspector/checks/headers.rb +68 -0
data/lib/site-inspector/checks/hsts.rb +81 -0
data/lib/site-inspector/checks/https.rb +40 -0
data/lib/site-inspector/checks/sniffer.rb +67 -0
data/lib/site-inspector/checks/wappalyzer.rb +62 -0
data/lib/site-inspector/checks/whois.rb +36 -0
data/lib/site-inspector/disk_cache.rb +42 -0
data/lib/site-inspector/domain.rb +271 -0
data/lib/site-inspector/endpoint.rb +217 -0
data/lib/site-inspector/rails_cache.rb +13 -0
data/lib/site-inspector/version.rb +5 -0
data/package-lock.json +505 -0
data/package.json +23 -0
data/script/bootstrap +2 -0
data/script/cibuild +11 -0
data/script/console +3 -0
data/script/pa11y-version +10 -0
data/script/release +38 -0
data/site-inspector.gemspec +42 -0
data/spec/checks/site_inspector_endpoint_accessibility_spec.rb +84 -0
data/spec/checks/site_inspector_endpoint_check_spec.rb +42 -0
data/spec/checks/site_inspector_endpoint_content_spec.rb +117 -0
data/spec/checks/site_inspector_endpoint_cookies_spec.rb +73 -0
data/spec/checks/site_inspector_endpoint_dns_spec.rb +184 -0
data/spec/checks/site_inspector_endpoint_headers_spec.rb +65 -0
data/spec/checks/site_inspector_endpoint_hsts_spec.rb +92 -0
data/spec/checks/site_inspector_endpoint_https_spec.rb +49 -0
data/spec/checks/site_inspector_endpoint_sniffer_spec.rb +150 -0
data/spec/checks/site_inspector_endpoint_wappalyzer_spec.rb +34 -0
data/spec/checks/site_inspector_endpoint_whois_spec.rb +26 -0
data/spec/fixtures/wappalyzer.json +125 -0
data/spec/site_inspector_cache_spec.rb +15 -0
data/spec/site_inspector_disk_cache_spec.rb +39 -0
data/spec/site_inspector_domain_spec.rb +271 -0
data/spec/site_inspector_endpoint_spec.rb +252 -0
data/spec/site_inspector_spec.rb +48 -0
data/spec/spec_helper.rb +19 -0
metadata +204 -63
data/lib/site-inspector/compliance.rb +0 -19
data/lib/site-inspector/dns.rb +0 -92
data/lib/site-inspector/headers.rb +0 -59
data/lib/site-inspector/sniffer.rb +0 -26

data/spec/checks/site_inspector_endpoint_wappalyzer_spec.rb ADDED

@@ -0,0 +1,34 @@
+# frozen_string_literal: true
+require 'spec_helper'
+describe SiteInspector::Endpoint::Wappalyzer do
+  subject { described_class.new(endpoint) }
+  let(:domain) { 'http://ben.balter.com.com' }
+  let(:endpoint) { SiteInspector::Endpoint.new(domain) }
+  let(:url) { "https://api.wappalyzer.com/lookup/v2/?urls=#{domain}/" }
+  before do
+    path = File.expand_path '../fixtures/wappalyzer.json', __dir__
+    body = File.read path
+    stub_request(:get, url).to_return(status: 200, body: body)
+  end
+  it 'returns the API response' do
+    expected = {
+      'Analytics' => ['Google Analytics'],
+      'CDN' => %w[Cloudflare Fastly],
+      'Caching' => ['Varnish'],
+      'Other' => %w[Disqus Jekyll],
+      'PaaS' => ['GitHub Pages'],
+      'Web frameworks' => ['Ruby on Rails']
+    }
+    expect(subject.to_h).to eql(expected)
+  end
+  it 'fails gracefully' do
+    stub_request(:get, url).to_return(status: 400, body: '')
+    expect(subject.to_h).to eql({})
+  end
+end

data/spec/checks/site_inspector_endpoint_whois_spec.rb ADDED

@@ -0,0 +1,26 @@
+# frozen_string_literal: true
+require 'spec_helper'
+describe SiteInspector::Endpoint::Whois do
+  subject do
+    stub_request(:head, site).to_return(status: 200)
+    endpoint = SiteInspector::Endpoint.new(site)
+    described_class.new(endpoint)
+  end
+  let(:site) { 'https://example.com' }
+  it 'returns the whois for the IP' do
+    expect(subject.ip).to match(/Derrick Sawyer/)
+  end
+  it 'returns the whois for the domain' do
+    expect(subject.domain).to match(/Domain Name: EXAMPLE\.COM/)
+  end
+  it 'returns the hash' do
+    expect(subject.to_h[:domain].keys.first).to eql('Domain Name')
+    expect(subject.to_h[:domain].values.first).to eql('EXAMPLE.COM')
+  end
+end

data/spec/fixtures/wappalyzer.json ADDED

@@ -0,0 +1,125 @@
+[
+   {
+      "url":"https://ben.balter.com",
+      "technologies":[
+         {
+            "slug":"cloudflare",
+            "name":"Cloudflare",
+            "versions":[
+            ],
+            "trafficRank":11,
+            "categories":[
+               {
+                  "id":31,
+                  "slug":"cdn",
+                  "name":"CDN"
+               }
+            ]
+         },
+         {
+            "slug":"varnish",
+            "name":"Varnish",
+            "versions":[
+            ],
+            "trafficRank":11,
+            "categories":[
+               {
+                  "id":23,
+                  "slug":"caching",
+                  "name":"Caching"
+               }
+            ]
+         },
+         {
+            "slug":"disqus",
+            "name":"Disqus",
+            "versions":[
+            ],
+            "trafficRank":11,
+            "categories":[
+            ]
+         },
+         {
+            "slug":"google-analytics",
+            "name":"Google Analytics",
+            "versions":[
+            ],
+            "trafficRank":11,
+            "categories":[
+               {
+                  "id":10,
+                  "slug":"analytics",
+                  "name":"Analytics"
+               },
+               {
+                  "id":61,
+                  "slug":"saas",
+                  "name":"SaaS"
+               }
+            ]
+         },
+         {
+            "slug":"jekyll",
+            "name":"Jekyll",
+            "versions":[
+               "v3.9.0"
+            ],
+            "trafficRank":11,
+            "categories":[
+            ]
+         },
+         {
+            "slug":"ruby-on-rails",
+            "name":"Ruby on Rails",
+            "versions":[
+            ],
+            "trafficRank":11,
+            "categories":[
+               {
+                  "id":18,
+                  "slug":"web-frameworks",
+                  "name":"Web frameworks"
+               }
+            ]
+         },
+         {
+            "slug":"fastly",
+            "name":"Fastly",
+            "versions":[
+            ],
+            "trafficRank":11,
+            "categories":[
+               {
+                  "id":31,
+                  "slug":"cdn",
+                  "name":"CDN"
+               }
+            ]
+         },
+         {
+            "slug":"github-pages",
+            "name":"GitHub Pages",
+            "versions":[
+            ],
+            "trafficRank":11,
+            "categories":[
+               {
+                  "id":62,
+                  "slug":"paas",
+                  "name":"PaaS"
+               }
+            ]
+         }
+      ],
+      "crawl":true
+   }
+]

data/spec/site_inspector_cache_spec.rb ADDED

@@ -0,0 +1,15 @@
+# frozen_string_literal: true
+require 'spec_helper'
+describe SiteInspector::Cache do
+  it 'stores a cache value' do
+    subject.set 'foo', 'bar'
+    expect(subject.instance_variable_get('@memory')['foo']).to eql('bar')
+  end
+  it 'retrieves values from the cache' do
+    subject.instance_variable_set('@memory', 'foo' => 'bar')
+    expect(subject.get('foo')).to eql('bar')
+  end
+end

data/spec/site_inspector_disk_cache_spec.rb ADDED

@@ -0,0 +1,39 @@
+# frozen_string_literal: true
+require 'spec_helper'
+describe SiteInspector::DiskCache do
+  subject { described_class.new(tmpdir) }
+  before do
+    FileUtils.rm_rf(tmpdir)
+    Dir.mkdir(tmpdir)
+  end
+  it 'writes a value to disk' do
+    foo = Typhoeus::Request.new('foo')
+    path = File.expand_path foo.cache_key, tmpdir
+    expect(File.exist?(path)).to be(false)
+    subject.set foo, 'bar'
+    expect(File.exist?(path)).to be(true)
+    expect(File.open(path).read).to eql("I\"bar:ET")
+  end
+  it 'reads a value from disk' do
+    foo = Typhoeus::Request.new('foo')
+    path = File.expand_path foo.cache_key, tmpdir
+    File.write(path, "I\"bar:ET")
+    expect(subject.get(foo)).to eql('bar')
+  end
+  it "calculates a file's path" do
+    foo = Typhoeus::Request.new('foo')
+    path = File.expand_path foo.cache_key, tmpdir
+    expect(subject.send(:path, foo)).to eql(path)
+  end
+end

data/spec/site_inspector_domain_spec.rb ADDED

@@ -0,0 +1,271 @@
+# frozen_string_literal: true
+require 'spec_helper'
+describe SiteInspector::Domain do
+  subject { described_class.new('example.com') }
+  context 'domain parsing' do
+    it 'downcases the domain' do
+      domain = described_class.new('EXAMPLE.com')
+      expect(domain.host).to eql('example.com')
+    end
+    it 'strips http from the domain' do
+      domain = described_class.new('http://example.com')
+      expect(domain.host).to eql('example.com')
+    end
+    it 'strips https from the domain' do
+      domain = described_class.new('https://example.com')
+      expect(domain.host).to eql('example.com')
+    end
+    it 'strips www from the domain' do
+      domain = described_class.new('www.example.com')
+      expect(domain.host).to eql('example.com')
+    end
+    it 'strips http://www from the domain' do
+      domain = described_class.new('http://www.example.com')
+      expect(domain.host).to eql('example.com')
+    end
+    it 'strips paths from the domain' do
+      domain = described_class.new('http://www.example.com/foo')
+      expect(domain.host).to eql('example.com')
+    end
+    it 'strips trailing slashes from the domain' do
+      domain = described_class.new('http://www.example.com/')
+      expect(domain.host).to eql('example.com')
+    end
+  end
+  context 'endpoints' do
+    it 'generates the endpoints' do
+      endpoints = subject.endpoints
+      expect(endpoints.count).to be(4)
+      expect(endpoints[0].to_s).to eql('https://example.com/')
+      expect(endpoints[1].to_s).to eql('https://www.example.com/')
+      expect(endpoints[2].to_s).to eql('http://example.com/')
+      expect(endpoints[3].to_s).to eql('http://www.example.com/')
+    end
+  end
+  it 'knows the canonical domain' do
+    stub_request(:head, 'https://example.com/').to_return(status: 500)
+    stub_request(:head, 'https://www.example.com/').to_return(status: 500)
+    stub_request(:head, 'http://www.example.com/').to_return(status: 200)
+    stub_request(:head, 'http://example.com/').to_return(status: 200)
+    expect(subject.canonical_endpoint.to_s).to eql('http://example.com/')
+  end
+  it 'knows if a domain is a government domain' do
+    expect(subject.government?).to be(false)
+    domain = described_class.new('whitehouse.gov')
+    expect(domain.government?).to be(true)
+  end
+  context 'up' do
+    it 'considers a domain up if at least one endpoint is up' do
+      subject.endpoints.each do |endpoint|
+        allow(endpoint).to receive(:response) { Typhoeus::Response.new(code: 0) } unless endpoint.uri.to_s.start_with?('http://www')
+      end
+      stub_request(:head, 'http://www.example.com/').to_return(status: 200)
+      expect(subject.up?).to be(true)
+    end
+    it "doesn't consider a domain up when all endpoints are down" do
+      subject.endpoints.each do |endpoint|
+        allow(endpoint).to receive(:response) { Typhoeus::Response.new(code: 0) }
+      end
+      expect(subject.up?).to be(false)
+    end
+  end
+  context 'up' do
+    it 'considers a domain up if at least one endpoint is up' do
+      stub_request(:head, 'https://example.com/').to_return(status: 500)
+      stub_request(:head, 'https://www.example.com/').to_return(status: 500)
+      stub_request(:head, 'http://example.com/').to_return(status: 500)
+      stub_request(:head, 'http://www.example.com/').to_return(status: 200)
+      expect(subject.up?).to be(true)
+    end
+    it "doesn't consider a domain up if all endpoints are down" do
+      stub_request(:head, 'https://example.com/').to_return(status: 500)
+      stub_request(:head, 'https://www.example.com/').to_return(status: 500)
+      stub_request(:head, 'http://example.com/').to_return(status: 500)
+      stub_request(:head, 'http://www.example.com/').to_return(status: 500)
+      expect(subject.up?).to be(false)
+    end
+  end
+  context 'www' do
+    it 'considers a site www when at least one endpoint is www' do
+      stub_request(:head, 'https://example.com/').to_return(status: 200)
+      stub_request(:head, 'https://www.example.com/').to_return(status: 500)
+      stub_request(:head, 'http://example.com/').to_return(status: 500)
+      stub_request(:head, 'http://www.example.com/').to_return(status: 200)
+      expect(subject.www?).to be(true)
+    end
+    it "doesn't consider a site www when no endpoint is www" do
+      stub_request(:head, 'https://example.com/').to_return(status: 200)
+      stub_request(:head, 'https://www.example.com/').to_return(status: 500)
+      stub_request(:head, 'http://example.com/').to_return(status: 200)
+      stub_request(:head, 'http://www.example.com/').to_return(status: 500)
+      expect(subject.www?).to be(false)
+    end
+  end
+  context 'root' do
+    it 'considers a domain root if you can connect without www' do
+      stub_request(:head, 'https://example.com/').to_return(status: 200)
+      stub_request(:head, 'https://www.example.com/').to_return(status: 500)
+      stub_request(:head, 'http://example.com/').to_return(status: 500)
+      stub_request(:head, 'http://www.example.com/').to_return(status: 500)
+      expect(subject.root?).to be(true)
+    end
+    it "doesn't call a www-only domain root" do
+      stub_request(:head, 'https://example.com/').to_return(status: 500)
+      stub_request(:head, 'https://www.example.com/').to_return(status: 200)
+      stub_request(:head, 'http://example.com/').to_return(status: 500)
+      stub_request(:head, 'http://www.example.com/').to_return(status: 200)
+      expect(subject.root?).to be(false)
+    end
+  end
+  context 'https' do
+    it 'knows when a domain supports https' do
+      stub_request(:head, 'https://example.com/').to_return(status: 200)
+      stub_request(:head, 'https://www.example.com/').to_return(status: 200)
+      stub_request(:head, 'http://example.com/').to_return(status: 200)
+      stub_request(:head, 'http://www.example.com/').to_return(status: 200)
+      allow(subject.endpoints.first.https).to receive(:valid?).and_return(true)
+      expect(subject.https?).to be(true)
+    end
+    it "knows when a domain doesn't support https" do
+      stub_request(:head, 'https://example.com/').to_return(status: 500)
+      stub_request(:head, 'https://www.example.com/').to_return(status: 500)
+      stub_request(:head, 'http://example.com/').to_return(status: 200)
+      stub_request(:head, 'http://www.example.com/').to_return(status: 200)
+      expect(subject.https?).to be(false)
+    end
+    it 'considers HTTPS inforced when no http endpoint responds' do
+      stub_request(:head, 'https://example.com/').to_return(status: 200)
+      stub_request(:head, 'https://www.example.com/').to_return(status: 500)
+      stub_request(:head, 'http://example.com/').to_return(status: 500)
+      stub_request(:head, 'http://www.example.com/').to_return(status: 500)
+      # expect(subject.enforces_https?).to eql(true)
+    end
+    it "doesn't consider HTTPS inforced when an http endpoint responds" do
+      stub_request(:head, 'https://example.com/').to_return(status: 200)
+      stub_request(:head, 'https://www.example.com/').to_return(status: 500)
+      stub_request(:head, 'http://example.com/').to_return(status: 500)
+      stub_request(:head, 'http://www.example.com/').to_return(status: 200)
+      expect(subject.enforces_https?).to be(false)
+    end
+    it 'detects when a domain downgrades to http' do
+      # TODO
+    end
+    it 'detects when a domain enforces https' do
+      # TODO
+    end
+  end
+  context 'canonical' do
+    context 'www' do
+      it 'detects a domain as canonically www when root is down' do
+        stub_request(:head, 'https://example.com/').to_return(status: 500)
+        stub_request(:head, 'https://www.example.com/').to_return(status: 500)
+        stub_request(:head, 'http://example.com/').to_return(status: 500)
+        stub_request(:head, 'http://www.example.com/').to_return(status: 200)
+        expect(subject.canonically_www?).to be(true)
+      end
+      it 'detects a domain as canonically www when root redirects' do
+        stub_request(:head, 'https://example.com/').to_return(status: 500)
+        stub_request(:head, 'https://www.example.com/').to_return(status: 500)
+        stub_request(:head, 'http://example.com/')
+          .to_return(status: 301, headers: { location: 'http://www.example.com' })
+        stub_request(:head, 'http://www.example.com/').to_return(status: 200)
+        expect(subject.canonically_www?).to be(true)
+      end
+    end
+    context 'https' do
+      it 'detects a domain as canonically https when http is down' do
+        stub_request(:head, 'https://example.com/').to_return(status: 200)
+        stub_request(:head, 'https://www.example.com/').to_return(status: 200)
+        stub_request(:head, 'http://example.com/').to_return(status: 500)
+        stub_request(:head, 'http://www.example.com/').to_return(status: 500)
+        allow(subject.endpoints.first.https).to receive(:valid?).and_return(true)
+        expect(subject.canonically_https?).to be(true)
+      end
+      it 'detects a domain as canonically https when http redirect' do
+        stub_request(:head, 'https://example.com/').to_return(status: 200)
+        stub_request(:head, 'https://www.example.com/').to_return(status: 200)
+        stub_request(:head, 'http://example.com/')
+          .to_return(status: 301, headers: { location: 'https://example.com' })
+        stub_request(:head, 'http://www.example.com/').to_return(status: 500)
+        allow(subject.endpoints.first.https).to receive(:valid?).and_return(true)
+        expect(subject.canonically_https?).to be(true)
+      end
+    end
+  end
+  context 'redirects' do
+    it 'knows when a domain redirects' do
+      stub_request(:head, 'https://example.com/').to_return(status: 500)
+      stub_request(:head, 'https://www.example.com/').to_return(status: 500)
+      stub_request(:head, 'http://example.com/')
+        .to_return(status: 301, headers: { location: 'http://foo.example.com' })
+      stub_request(:head, 'http://www.example.com/').to_return(status: 500)
+      stub_request(:head, 'http://foo.example.com/').to_return(status: 200)
+      expect(subject.redirect?).to be(true)
+    end
+  end
+  context 'hsts' do
+    it 'enabled' do
+    end
+    it 'subdomains' do
+    end
+    it 'preload ready' do
+    end
+  end
+  it 'returns the host as a string' do
+    expect(subject.to_s).to eql('example.com')
+  end
+end