site-inspector 1.0.2 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. checksums.yaml +5 -5
  2. data/.gitignore +8 -0
  3. data/.rubocop.yml +42 -0
  4. data/.rubocop_todo.yml +139 -0
  5. data/.ruby-version +1 -0
  6. data/.travis.yml +9 -0
  7. data/Gemfile +7 -0
  8. data/Guardfile +10 -0
  9. data/README.md +189 -0
  10. data/Rakefile +10 -0
  11. data/bin/site-inspector +50 -22
  12. data/lib/cliver/dependency_ext.rb +24 -0
  13. data/lib/site-inspector.rb +62 -615
  14. data/lib/site-inspector/cache.rb +10 -51
  15. data/lib/site-inspector/checks/accessibility.rb +135 -0
  16. data/lib/site-inspector/checks/check.rb +54 -0
  17. data/lib/site-inspector/checks/content.rb +85 -0
  18. data/lib/site-inspector/checks/cookies.rb +45 -0
  19. data/lib/site-inspector/checks/dns.rb +138 -0
  20. data/lib/site-inspector/checks/headers.rb +68 -0
  21. data/lib/site-inspector/checks/hsts.rb +81 -0
  22. data/lib/site-inspector/checks/https.rb +40 -0
  23. data/lib/site-inspector/checks/sniffer.rb +67 -0
  24. data/lib/site-inspector/checks/wappalyzer.rb +62 -0
  25. data/lib/site-inspector/checks/whois.rb +36 -0
  26. data/lib/site-inspector/disk_cache.rb +42 -0
  27. data/lib/site-inspector/domain.rb +271 -0
  28. data/lib/site-inspector/endpoint.rb +217 -0
  29. data/lib/site-inspector/rails_cache.rb +13 -0
  30. data/lib/site-inspector/version.rb +5 -0
  31. data/package-lock.json +505 -0
  32. data/package.json +23 -0
  33. data/script/bootstrap +2 -0
  34. data/script/cibuild +11 -0
  35. data/script/console +3 -0
  36. data/script/pa11y-version +10 -0
  37. data/script/release +38 -0
  38. data/site-inspector.gemspec +42 -0
  39. data/spec/checks/site_inspector_endpoint_accessibility_spec.rb +84 -0
  40. data/spec/checks/site_inspector_endpoint_check_spec.rb +42 -0
  41. data/spec/checks/site_inspector_endpoint_content_spec.rb +117 -0
  42. data/spec/checks/site_inspector_endpoint_cookies_spec.rb +73 -0
  43. data/spec/checks/site_inspector_endpoint_dns_spec.rb +184 -0
  44. data/spec/checks/site_inspector_endpoint_headers_spec.rb +65 -0
  45. data/spec/checks/site_inspector_endpoint_hsts_spec.rb +92 -0
  46. data/spec/checks/site_inspector_endpoint_https_spec.rb +49 -0
  47. data/spec/checks/site_inspector_endpoint_sniffer_spec.rb +150 -0
  48. data/spec/checks/site_inspector_endpoint_wappalyzer_spec.rb +34 -0
  49. data/spec/checks/site_inspector_endpoint_whois_spec.rb +26 -0
  50. data/spec/fixtures/wappalyzer.json +125 -0
  51. data/spec/site_inspector_cache_spec.rb +15 -0
  52. data/spec/site_inspector_disk_cache_spec.rb +39 -0
  53. data/spec/site_inspector_domain_spec.rb +271 -0
  54. data/spec/site_inspector_endpoint_spec.rb +252 -0
  55. data/spec/site_inspector_spec.rb +48 -0
  56. data/spec/spec_helper.rb +19 -0
  57. metadata +204 -63
  58. data/lib/site-inspector/compliance.rb +0 -19
  59. data/lib/site-inspector/dns.rb +0 -92
  60. data/lib/site-inspector/headers.rb +0 -59
  61. data/lib/site-inspector/sniffer.rb +0 -26
@@ -0,0 +1,23 @@
1
+ {
2
+ "name": "site-inspector",
3
+ "version": "2.0.0",
4
+ "description": "Returns information about a domain's technology and capabilities",
5
+ "main": "site-inspector",
6
+ "dependencies": {
7
+ "pa11y": "^5.0.0"
8
+ },
9
+ "devDependencies": {},
10
+ "scripts": {
11
+ "test": "script/cibuild"
12
+ },
13
+ "repository": {
14
+ "type": "git",
15
+ "url": "git+https://github.com/benbalter/site-inspector.git"
16
+ },
17
+ "author": "",
18
+ "license": "MIT",
19
+ "bugs": {
20
+ "url": "https://github.com/benbalter/site-inspector/issues"
21
+ },
22
+ "homepage": "https://github.com/benbalter/site-inspector#readme"
23
+ }
@@ -0,0 +1,2 @@
1
+ bundle install
2
+ npm install
@@ -0,0 +1,11 @@
1
+ #!/bin/sh
2
+
3
+ set -e
4
+
5
+ script/pa11y-version
6
+
7
+ bundle exec rake spec
8
+
9
+ bundle exec rubocop
10
+
11
+ gem build site-inspector.gemspec
@@ -0,0 +1,3 @@
1
+ #! /bin/sh
2
+
3
+ DEBUG=1 bundle exec pry -r './lib/site-inspector'
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require './lib/site-inspector'
5
+
6
+ if SiteInspector::Endpoint::Accessibility.pa11y?
7
+ puts "Pa11y version: #{SiteInspector::Endpoint::Accessibility.pa11y_version}"
8
+ else
9
+ puts '**RUNNING TESTS WITHOUT PA11Y**'
10
+ end
@@ -0,0 +1,38 @@
1
+ #!/bin/sh
2
+ # Tag and push a release.
3
+
4
+ set -e
5
+
6
+ # Make sure we're in the project root.
7
+
8
+ cd $(dirname "$0")/..
9
+
10
+ # Build a new gem archive.
11
+
12
+ rm -rf site-inspector-*.gem
13
+ gem build -q site-inspector.gemspec
14
+
15
+ # Make sure we're on the master branch.
16
+
17
+ (git branch | grep -q '* master') || {
18
+ echo "Only release from the master branch."
19
+ exit 1
20
+ }
21
+
22
+ # Figure out what version we're releasing.
23
+
24
+ tag=v`ls site-inspector-*.gem | sed 's/^site-inspector-\(.*\)\.gem$/\1/'`
25
+
26
+ # Make sure we haven't released this version before.
27
+
28
+ git fetch -t origin
29
+
30
+ (git tag -l | grep -q "$tag") && {
31
+ echo "Whoops, there's already a '${tag}' tag."
32
+ exit 1
33
+ }
34
+
35
+ # Tag it and bag it.
36
+
37
+ gem push site-inspector-*.gem && git tag "$tag" &&
38
+ git push origin master && git push origin "$tag"
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ require File.expand_path './lib/site-inspector/version', File.dirname(__FILE__)
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = 'site-inspector'
7
+ s.version = SiteInspector::VERSION
8
+ s.summary = 'A Ruby port and v2 of Site Inspector (https://github.com/benbalter/site-inspector)'
9
+ s.description = "Returns information about a domain's technology and capabilities"
10
+ s.authors = 'Ben Balter'
11
+ s.email = 'ben@balter.com'
12
+ s.homepage = 'https://github.com/benbalter/site-inspector'
13
+ s.license = 'MIT'
14
+
15
+ s.files = `git ls-files -z`.split("\x0")
16
+ s.executables = s.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
18
+ s.require_paths = ['lib']
19
+
20
+ s.add_dependency('cliver', '~> 0.0')
21
+ s.add_dependency('colorator', '~> 1.1')
22
+ s.add_dependency('dnsruby', '~> 1.0')
23
+ s.add_dependency('dotenv', '~> 2.0')
24
+ s.add_dependency('gman', '~> 7.0', '>= 7.0.4')
25
+ s.add_dependency('mercenary', '~> 0.0')
26
+ s.add_dependency('nokogiri', '~> 1.0')
27
+ s.add_dependency('oj', '~> 3.0')
28
+ s.add_dependency('parallel', '~> 1.0')
29
+ s.add_dependency('public_suffix', '~> 4.0')
30
+ s.add_dependency('sniffles', '~> 0.0')
31
+ s.add_dependency('typhoeus', '~> 1.0')
32
+ s.add_dependency('urlscan', '~> 0.6')
33
+ s.add_dependency('whois', '~> 5.0')
34
+
35
+ s.add_development_dependency('pry', '~> 0.0')
36
+ s.add_development_dependency('rake', '~> 13.0')
37
+ s.add_development_dependency('rspec', '~> 3.0')
38
+ s.add_development_dependency('rubocop', '~> 1.0')
39
+ s.add_development_dependency('rubocop-performance', '~> 1.5')
40
+ s.add_development_dependency('rubocop-rspec', '~> 2.0')
41
+ s.add_development_dependency('webmock', '~> 3.0')
42
+ end
@@ -0,0 +1,84 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ describe SiteInspector::Endpoint::Accessibility do
6
+ subject do
7
+ endpoint = SiteInspector::Endpoint.new('http://example.com')
8
+ described_class.new(endpoint)
9
+ end
10
+
11
+ it "retrieve's pa11y's version" do
12
+ pending('Pa11y not installed') unless described_class.pa11y?
13
+ expect(subject.class.pa11y_version).to match(/\d\.\d\.\d/)
14
+ end
15
+
16
+ it 'responds to valid standards' do
17
+ expect(subject.respond_to?(:section508)).to be(true)
18
+ end
19
+
20
+ it 'knows the level' do
21
+ expect(subject.level).to be(:error)
22
+ end
23
+
24
+ it 'allows the user to set the level' do
25
+ subject.level = :warning
26
+ expect(subject.level).to be(:warning)
27
+ end
28
+
29
+ it 'errors on invalid levels' do
30
+ expect { subject.level = 'foo' }.to raise_error(ArgumentError)
31
+ end
32
+
33
+ it 'knows the standard' do
34
+ expect(subject.standard).to be(:wcag2a)
35
+ end
36
+
37
+ it 'allows the user to set the standard' do
38
+ subject.standard = :wcag2a
39
+ expect(subject.standard).to be(:wcag2a)
40
+ end
41
+
42
+ it 'errors on invalid standards' do
43
+ expect { subject.standard = :foo }.to raise_error(ArgumentError)
44
+ end
45
+
46
+ context 'with pa11y installed' do
47
+ before do
48
+ stub_request(:head, 'http://example.com/').to_return(status: 200)
49
+ end
50
+ end
51
+
52
+ context "with pa11y stub'd" do
53
+ before do
54
+ output = '[{"code":"Section508.L.NoContentAnchor","context":"<a href=\"foo\"></a>","message":"Anchor element found with a valid href attribute, but no link content has been supplied.","selector":"html > body > a","type":"error","typeCode":1}]'
55
+ allow(subject).to receive(:run_command) { [output, 2] }
56
+ end
57
+
58
+ it 'knows if a site is valid' do
59
+ with_env 'SKIP_PA11Y_CHECK', 'true' do
60
+ expect(subject.valid?).to be(false)
61
+ end
62
+ end
63
+
64
+ it 'counts the errors' do
65
+ with_env 'SKIP_PA11Y_CHECK', 'true' do
66
+ expect(subject.errors).to be(1)
67
+ end
68
+ end
69
+
70
+ it 'runs the check' do
71
+ with_env 'SKIP_PA11Y_CHECK', 'true' do
72
+ expect(subject.check[:valid]).to be(false)
73
+ expect(subject.check[:results].first['code']).to eql('WCAG2A.Principle3.Guideline3_1.3_1_1.H57.2')
74
+ end
75
+ end
76
+
77
+ it 'runs a named check' do
78
+ with_env 'SKIP_PA11Y_CHECK', 'true' do
79
+ expect(subject.check[:valid]).to be(false)
80
+ expect(subject.check[:results].first['code']).to eql('WCAG2A.Principle3.Guideline3_1.3_1_1.H57.2')
81
+ end
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ describe SiteInspector::Endpoint::Check do
6
+ subject do
7
+ stub_request(:head, 'http://example.com/').to_return(status: 200)
8
+ endpoint = SiteInspector::Endpoint.new('http://example.com')
9
+ described_class.new(endpoint)
10
+ end
11
+
12
+ it 'returns the endpoint' do
13
+ expect(subject.endpoint.class).to eql(SiteInspector::Endpoint)
14
+ end
15
+
16
+ it 'returns the response' do
17
+ expect(subject.response.class).to eql(Typhoeus::Response)
18
+ end
19
+
20
+ it 'returns the request' do
21
+ expect(subject.request.class).to eql(Typhoeus::Request)
22
+ end
23
+
24
+ it 'returns the host' do
25
+ expect(subject.host).to eql('example.com')
26
+ end
27
+
28
+ it 'returns its name' do
29
+ expect(subject.name).to be(:check)
30
+ end
31
+
32
+ it 'returns the instance name' do
33
+ expect(described_class.name).to be(:check)
34
+ end
35
+
36
+ it 'enables and disables the check' do
37
+ expect(described_class.enabled?).to be(true)
38
+ described_class.enabled = false
39
+ expect(described_class.enabled?).to be(false)
40
+ described_class.enabled = true
41
+ end
42
+ end
@@ -0,0 +1,117 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ describe SiteInspector::Endpoint::Content do
6
+ subject do
7
+ body = <<-BODY
8
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
9
+ <html>
10
+ <head>
11
+ <meta name="generator" content="Jekyll v3.8.5" />
12
+ </head>
13
+ <body>
14
+ <h1>Some page</h1>
15
+ </body>
16
+ </html>
17
+ BODY
18
+
19
+ stub_request(:get, 'http://example.com/')
20
+ .to_return(status: 200, body: body)
21
+ stub_request(:head, 'http://example.com/')
22
+ .to_return(status: 200)
23
+ endpoint = SiteInspector::Endpoint.new('http://example.com')
24
+ described_class.new(endpoint)
25
+ end
26
+
27
+ it 'returns the doc' do
28
+ expect(subject.document.class).to eql(Nokogiri::HTML::Document)
29
+ expect(subject.document.css('h1').text).to eql('Some page')
30
+ end
31
+
32
+ it 'returns the body' do
33
+ expect(subject.body).to match('<h1>Some page</h1>')
34
+ end
35
+
36
+ it 'returns the doctype' do
37
+ expect(subject.doctype).to eql('-//W3C//DTD XHTML 1.0 Transitional//EN')
38
+ end
39
+
40
+ it 'knows when robots.txt exists' do
41
+ stub_request(:head, %r{http://example.com/[a-z0-9]{32}}i).to_return(status: 404)
42
+
43
+ stub_request(:head, 'http://example.com/robots.txt')
44
+ .to_return(status: 200)
45
+ expect(subject.robots_txt?).to be(true)
46
+ end
47
+
48
+ it "knows when robots.txt doesn't exist" do
49
+ stub_request(:head, %r{http://example.com/[a-z0-9]{32}}i).to_return(status: 404)
50
+
51
+ stub_request(:head, 'http://example.com/robots.txt')
52
+ .to_return(status: 404)
53
+ expect(subject.robots_txt?).to be(false)
54
+ end
55
+
56
+ it 'knows when sitemap.xml exists' do
57
+ stub_request(:head, %r{http://example.com/[a-z0-9]{32}}i).to_return(status: 404)
58
+
59
+ stub_request(:head, 'http://example.com/sitemap.xml')
60
+ .to_return(status: 200)
61
+ expect(subject.sitemap_xml?).to be(true)
62
+ end
63
+
64
+ it 'knows when sitemap.xml exists' do
65
+ stub_request(:head, %r{http://example.com/[a-z0-9]{32}}i).to_return(status: 404)
66
+
67
+ stub_request(:head, 'http://example.com/sitemap.xml')
68
+ .to_return(status: 404)
69
+ expect(subject.sitemap_xml?).to be(false)
70
+ end
71
+
72
+ it 'knows when humans.txt exists' do
73
+ stub_request(:head, %r{http://example.com/[a-z0-9]{32}}i).to_return(status: 404)
74
+
75
+ stub_request(:head, 'http://example.com/humans.txt')
76
+ .to_return(status: 200)
77
+ expect(subject.humans_txt?).to be(true)
78
+ end
79
+
80
+ it "knows when humans.txt doesn't exist" do
81
+ stub_request(:head, %r{http://example.com/[a-z0-9]{32}}i).to_return(status: 404)
82
+
83
+ stub_request(:head, 'http://example.com/humans.txt')
84
+ .to_return(status: 200)
85
+ expect(subject.humans_txt?).to be(true)
86
+ end
87
+
88
+ it 'returns the generator' do
89
+ expect(subject.generator).to eql('Jekyll v3.8.5')
90
+ end
91
+
92
+ context '404s' do
93
+ it 'knows when an endpoint returns a proper 404' do
94
+ stub_request(:head, %r{http://example.com/.*})
95
+ .to_return(status: 404)
96
+ expect(subject.proper_404s?).to be(true)
97
+ end
98
+
99
+ it "knows when an endpoint doesn't return a proper 404" do
100
+ stub_request(:head, %r{http://example.com/[a-z0-9]{32}}i)
101
+ .to_return(status: 200)
102
+ expect(subject.proper_404s?).to be(false)
103
+ end
104
+
105
+ it 'generates a random path' do
106
+ path = subject.send(:random_path)
107
+ expect(path).to match(/[a-z0-9]{32}/i)
108
+ expect(subject.send(:random_path)).to eql(path)
109
+ end
110
+
111
+ it "doesn't say something exists when there are no 404s" do
112
+ stub_request(:head, %r{http://example.com/[a-z0-9]{32}}i).to_return(status: 200)
113
+ stub_request(:head, 'http://example.com/humans.txt').to_return(status: 200)
114
+ expect(subject.humans_txt?).to be(nil)
115
+ end
116
+ end
117
+ end
@@ -0,0 +1,73 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ describe SiteInspector::Endpoint::Cookies do
6
+ context 'without cookies' do
7
+ subject do
8
+ stub_request(:head, 'http://example.com/')
9
+ .to_return(status: 200, body: '')
10
+ endpoint = SiteInspector::Endpoint.new('http://example.com')
11
+ described_class.new(endpoint)
12
+ end
13
+
14
+ it 'knows when there are no cookies' do
15
+ expect(subject.cookies?).to be(false)
16
+ expect(subject.all).to be(nil)
17
+ end
18
+ end
19
+
20
+ context 'with cookies' do
21
+ subject do
22
+ cookies = [
23
+ CGI::Cookie.new(
24
+ 'name' => 'foo',
25
+ 'value' => 'bar',
26
+ 'domain' => 'example.com',
27
+ 'path' => '/'
28
+ ),
29
+ CGI::Cookie.new(
30
+ 'name' => 'foo2',
31
+ 'value' => 'bar2',
32
+ 'domain' => 'example.com',
33
+ 'path' => '/'
34
+ )
35
+ ].map(&:to_s)
36
+
37
+ stub_request(:head, 'http://example.com/')
38
+ .to_return(status: 200, body: '', headers: { 'set-cookie' => cookies })
39
+ endpoint = SiteInspector::Endpoint.new('http://example.com')
40
+ described_class.new(endpoint)
41
+ end
42
+
43
+ it 'knows when there are cookies' do
44
+ expect(subject.cookies?).to be(true)
45
+ expect(subject.all.count).to be(2)
46
+ end
47
+
48
+ it 'returns a cookie by name' do
49
+ expect(subject['foo'].to_s).to match(/foo=bar/)
50
+ end
51
+
52
+ it "knows cookies aren't secure" do
53
+ expect(subject.secure?).to be(false)
54
+ end
55
+ end
56
+
57
+ context 'with secure cookies' do
58
+ subject do
59
+ cookies = [
60
+ 'foo=bar; domain=example.com; path=/; secure; HttpOnly',
61
+ 'foo2=bar2; domain=example.com; path=/'
62
+ ]
63
+ stub_request(:head, 'http://example.com/')
64
+ .to_return(status: 200, body: '', headers: { 'set-cookie' => cookies })
65
+ endpoint = SiteInspector::Endpoint.new('http://example.com')
66
+ described_class.new(endpoint)
67
+ end
68
+
69
+ it 'knows cookies are secure' do
70
+ expect(subject.secure?).to be(true)
71
+ end
72
+ end
73
+ end