site-inspector 1.0.2 → 3.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. checksums.yaml +5 -5
  2. data/.gitignore +8 -0
  3. data/.rubocop.yml +42 -0
  4. data/.rubocop_todo.yml +139 -0
  5. data/.ruby-version +1 -0
  6. data/.travis.yml +9 -0
  7. data/Gemfile +7 -0
  8. data/Guardfile +10 -0
  9. data/README.md +189 -0
  10. data/Rakefile +10 -0
  11. data/bin/site-inspector +50 -22
  12. data/lib/cliver/dependency_ext.rb +24 -0
  13. data/lib/site-inspector.rb +62 -615
  14. data/lib/site-inspector/cache.rb +10 -51
  15. data/lib/site-inspector/checks/accessibility.rb +135 -0
  16. data/lib/site-inspector/checks/check.rb +54 -0
  17. data/lib/site-inspector/checks/content.rb +85 -0
  18. data/lib/site-inspector/checks/cookies.rb +45 -0
  19. data/lib/site-inspector/checks/dns.rb +138 -0
  20. data/lib/site-inspector/checks/headers.rb +68 -0
  21. data/lib/site-inspector/checks/hsts.rb +81 -0
  22. data/lib/site-inspector/checks/https.rb +40 -0
  23. data/lib/site-inspector/checks/sniffer.rb +67 -0
  24. data/lib/site-inspector/checks/wappalyzer.rb +62 -0
  25. data/lib/site-inspector/checks/whois.rb +36 -0
  26. data/lib/site-inspector/disk_cache.rb +42 -0
  27. data/lib/site-inspector/domain.rb +271 -0
  28. data/lib/site-inspector/endpoint.rb +217 -0
  29. data/lib/site-inspector/rails_cache.rb +13 -0
  30. data/lib/site-inspector/version.rb +5 -0
  31. data/package-lock.json +505 -0
  32. data/package.json +23 -0
  33. data/script/bootstrap +2 -0
  34. data/script/cibuild +11 -0
  35. data/script/console +3 -0
  36. data/script/pa11y-version +10 -0
  37. data/script/release +38 -0
  38. data/site-inspector.gemspec +42 -0
  39. data/spec/checks/site_inspector_endpoint_accessibility_spec.rb +84 -0
  40. data/spec/checks/site_inspector_endpoint_check_spec.rb +42 -0
  41. data/spec/checks/site_inspector_endpoint_content_spec.rb +117 -0
  42. data/spec/checks/site_inspector_endpoint_cookies_spec.rb +73 -0
  43. data/spec/checks/site_inspector_endpoint_dns_spec.rb +184 -0
  44. data/spec/checks/site_inspector_endpoint_headers_spec.rb +65 -0
  45. data/spec/checks/site_inspector_endpoint_hsts_spec.rb +92 -0
  46. data/spec/checks/site_inspector_endpoint_https_spec.rb +49 -0
  47. data/spec/checks/site_inspector_endpoint_sniffer_spec.rb +150 -0
  48. data/spec/checks/site_inspector_endpoint_wappalyzer_spec.rb +34 -0
  49. data/spec/checks/site_inspector_endpoint_whois_spec.rb +26 -0
  50. data/spec/fixtures/wappalyzer.json +125 -0
  51. data/spec/site_inspector_cache_spec.rb +15 -0
  52. data/spec/site_inspector_disk_cache_spec.rb +39 -0
  53. data/spec/site_inspector_domain_spec.rb +271 -0
  54. data/spec/site_inspector_endpoint_spec.rb +252 -0
  55. data/spec/site_inspector_spec.rb +48 -0
  56. data/spec/spec_helper.rb +19 -0
  57. metadata +204 -63
  58. data/lib/site-inspector/compliance.rb +0 -19
  59. data/lib/site-inspector/dns.rb +0 -92
  60. data/lib/site-inspector/headers.rb +0 -59
  61. data/lib/site-inspector/sniffer.rb +0 -26
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ describe SiteInspector::Endpoint::Wappalyzer do
6
+ subject { described_class.new(endpoint) }
7
+
8
+ let(:domain) { 'http://ben.balter.com.com' }
9
+ let(:endpoint) { SiteInspector::Endpoint.new(domain) }
10
+ let(:url) { "https://api.wappalyzer.com/lookup/v2/?urls=#{domain}/" }
11
+
12
+ before do
13
+ path = File.expand_path '../fixtures/wappalyzer.json', __dir__
14
+ body = File.read path
15
+ stub_request(:get, url).to_return(status: 200, body: body)
16
+ end
17
+
18
+ it 'returns the API response' do
19
+ expected = {
20
+ 'Analytics' => ['Google Analytics'],
21
+ 'CDN' => %w[Cloudflare Fastly],
22
+ 'Caching' => ['Varnish'],
23
+ 'Other' => %w[Disqus Jekyll],
24
+ 'PaaS' => ['GitHub Pages'],
25
+ 'Web frameworks' => ['Ruby on Rails']
26
+ }
27
+ expect(subject.to_h).to eql(expected)
28
+ end
29
+
30
+ it 'fails gracefully' do
31
+ stub_request(:get, url).to_return(status: 400, body: '')
32
+ expect(subject.to_h).to eql({})
33
+ end
34
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ describe SiteInspector::Endpoint::Whois do
6
+ subject do
7
+ stub_request(:head, site).to_return(status: 200)
8
+ endpoint = SiteInspector::Endpoint.new(site)
9
+ described_class.new(endpoint)
10
+ end
11
+
12
+ let(:site) { 'https://example.com' }
13
+
14
+ it 'returns the whois for the IP' do
15
+ expect(subject.ip).to match(/Derrick Sawyer/)
16
+ end
17
+
18
+ it 'returns the whois for the domain' do
19
+ expect(subject.domain).to match(/Domain Name: EXAMPLE\.COM/)
20
+ end
21
+
22
+ it 'returns the hash' do
23
+ expect(subject.to_h[:domain].keys.first).to eql('Domain Name')
24
+ expect(subject.to_h[:domain].values.first).to eql('EXAMPLE.COM')
25
+ end
26
+ end
@@ -0,0 +1,125 @@
1
+ [
2
+ {
3
+ "url":"https://ben.balter.com",
4
+ "technologies":[
5
+ {
6
+ "slug":"cloudflare",
7
+ "name":"Cloudflare",
8
+ "versions":[
9
+
10
+ ],
11
+ "trafficRank":11,
12
+ "categories":[
13
+ {
14
+ "id":31,
15
+ "slug":"cdn",
16
+ "name":"CDN"
17
+ }
18
+ ]
19
+ },
20
+ {
21
+ "slug":"varnish",
22
+ "name":"Varnish",
23
+ "versions":[
24
+
25
+ ],
26
+ "trafficRank":11,
27
+ "categories":[
28
+ {
29
+ "id":23,
30
+ "slug":"caching",
31
+ "name":"Caching"
32
+ }
33
+ ]
34
+ },
35
+ {
36
+ "slug":"disqus",
37
+ "name":"Disqus",
38
+ "versions":[
39
+
40
+ ],
41
+ "trafficRank":11,
42
+ "categories":[
43
+
44
+ ]
45
+ },
46
+ {
47
+ "slug":"google-analytics",
48
+ "name":"Google Analytics",
49
+ "versions":[
50
+
51
+ ],
52
+ "trafficRank":11,
53
+ "categories":[
54
+ {
55
+ "id":10,
56
+ "slug":"analytics",
57
+ "name":"Analytics"
58
+ },
59
+ {
60
+ "id":61,
61
+ "slug":"saas",
62
+ "name":"SaaS"
63
+ }
64
+ ]
65
+ },
66
+ {
67
+ "slug":"jekyll",
68
+ "name":"Jekyll",
69
+ "versions":[
70
+ "v3.9.0"
71
+ ],
72
+ "trafficRank":11,
73
+ "categories":[
74
+
75
+ ]
76
+ },
77
+ {
78
+ "slug":"ruby-on-rails",
79
+ "name":"Ruby on Rails",
80
+ "versions":[
81
+
82
+ ],
83
+ "trafficRank":11,
84
+ "categories":[
85
+ {
86
+ "id":18,
87
+ "slug":"web-frameworks",
88
+ "name":"Web frameworks"
89
+ }
90
+ ]
91
+ },
92
+ {
93
+ "slug":"fastly",
94
+ "name":"Fastly",
95
+ "versions":[
96
+
97
+ ],
98
+ "trafficRank":11,
99
+ "categories":[
100
+ {
101
+ "id":31,
102
+ "slug":"cdn",
103
+ "name":"CDN"
104
+ }
105
+ ]
106
+ },
107
+ {
108
+ "slug":"github-pages",
109
+ "name":"GitHub Pages",
110
+ "versions":[
111
+
112
+ ],
113
+ "trafficRank":11,
114
+ "categories":[
115
+ {
116
+ "id":62,
117
+ "slug":"paas",
118
+ "name":"PaaS"
119
+ }
120
+ ]
121
+ }
122
+ ],
123
+ "crawl":true
124
+ }
125
+ ]
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ describe SiteInspector::Cache do
6
+ it 'stores a cache value' do
7
+ subject.set 'foo', 'bar'
8
+ expect(subject.instance_variable_get('@memory')['foo']).to eql('bar')
9
+ end
10
+
11
+ it 'retrieves values from the cache' do
12
+ subject.instance_variable_set('@memory', 'foo' => 'bar')
13
+ expect(subject.get('foo')).to eql('bar')
14
+ end
15
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ describe SiteInspector::DiskCache do
6
+ subject { described_class.new(tmpdir) }
7
+
8
+ before do
9
+ FileUtils.rm_rf(tmpdir)
10
+ Dir.mkdir(tmpdir)
11
+ end
12
+
13
+ it 'writes a value to disk' do
14
+ foo = Typhoeus::Request.new('foo')
15
+
16
+ path = File.expand_path foo.cache_key, tmpdir
17
+ expect(File.exist?(path)).to be(false)
18
+
19
+ subject.set foo, 'bar'
20
+
21
+ expect(File.exist?(path)).to be(true)
22
+ expect(File.open(path).read).to eql("I\"bar:ET")
23
+ end
24
+
25
+ it 'reads a value from disk' do
26
+ foo = Typhoeus::Request.new('foo')
27
+
28
+ path = File.expand_path foo.cache_key, tmpdir
29
+ File.write(path, "I\"bar:ET")
30
+ expect(subject.get(foo)).to eql('bar')
31
+ end
32
+
33
+ it "calculates a file's path" do
34
+ foo = Typhoeus::Request.new('foo')
35
+
36
+ path = File.expand_path foo.cache_key, tmpdir
37
+ expect(subject.send(:path, foo)).to eql(path)
38
+ end
39
+ end
@@ -0,0 +1,271 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ describe SiteInspector::Domain do
6
+ subject { described_class.new('example.com') }
7
+
8
+ context 'domain parsing' do
9
+ it 'downcases the domain' do
10
+ domain = described_class.new('EXAMPLE.com')
11
+ expect(domain.host).to eql('example.com')
12
+ end
13
+
14
+ it 'strips http from the domain' do
15
+ domain = described_class.new('http://example.com')
16
+ expect(domain.host).to eql('example.com')
17
+ end
18
+
19
+ it 'strips https from the domain' do
20
+ domain = described_class.new('https://example.com')
21
+ expect(domain.host).to eql('example.com')
22
+ end
23
+
24
+ it 'strips www from the domain' do
25
+ domain = described_class.new('www.example.com')
26
+ expect(domain.host).to eql('example.com')
27
+ end
28
+
29
+ it 'strips http://www from the domain' do
30
+ domain = described_class.new('http://www.example.com')
31
+ expect(domain.host).to eql('example.com')
32
+ end
33
+
34
+ it 'strips paths from the domain' do
35
+ domain = described_class.new('http://www.example.com/foo')
36
+ expect(domain.host).to eql('example.com')
37
+ end
38
+
39
+ it 'strips trailing slashes from the domain' do
40
+ domain = described_class.new('http://www.example.com/')
41
+ expect(domain.host).to eql('example.com')
42
+ end
43
+ end
44
+
45
+ context 'endpoints' do
46
+ it 'generates the endpoints' do
47
+ endpoints = subject.endpoints
48
+ expect(endpoints.count).to be(4)
49
+ expect(endpoints[0].to_s).to eql('https://example.com/')
50
+ expect(endpoints[1].to_s).to eql('https://www.example.com/')
51
+ expect(endpoints[2].to_s).to eql('http://example.com/')
52
+ expect(endpoints[3].to_s).to eql('http://www.example.com/')
53
+ end
54
+ end
55
+
56
+ it 'knows the canonical domain' do
57
+ stub_request(:head, 'https://example.com/').to_return(status: 500)
58
+ stub_request(:head, 'https://www.example.com/').to_return(status: 500)
59
+ stub_request(:head, 'http://www.example.com/').to_return(status: 200)
60
+ stub_request(:head, 'http://example.com/').to_return(status: 200)
61
+ expect(subject.canonical_endpoint.to_s).to eql('http://example.com/')
62
+ end
63
+
64
+ it 'knows if a domain is a government domain' do
65
+ expect(subject.government?).to be(false)
66
+
67
+ domain = described_class.new('whitehouse.gov')
68
+ expect(domain.government?).to be(true)
69
+ end
70
+
71
+ context 'up' do
72
+ it 'considers a domain up if at least one endpoint is up' do
73
+ subject.endpoints.each do |endpoint|
74
+ allow(endpoint).to receive(:response) { Typhoeus::Response.new(code: 0) } unless endpoint.uri.to_s.start_with?('http://www')
75
+ end
76
+
77
+ stub_request(:head, 'http://www.example.com/').to_return(status: 200)
78
+
79
+ expect(subject.up?).to be(true)
80
+ end
81
+
82
+ it "doesn't consider a domain up when all endpoints are down" do
83
+ subject.endpoints.each do |endpoint|
84
+ allow(endpoint).to receive(:response) { Typhoeus::Response.new(code: 0) }
85
+ end
86
+
87
+ expect(subject.up?).to be(false)
88
+ end
89
+ end
90
+
91
+ context 'up' do
92
+ it 'considers a domain up if at least one endpoint is up' do
93
+ stub_request(:head, 'https://example.com/').to_return(status: 500)
94
+ stub_request(:head, 'https://www.example.com/').to_return(status: 500)
95
+ stub_request(:head, 'http://example.com/').to_return(status: 500)
96
+ stub_request(:head, 'http://www.example.com/').to_return(status: 200)
97
+
98
+ expect(subject.up?).to be(true)
99
+ end
100
+
101
+ it "doesn't consider a domain up if all endpoints are down" do
102
+ stub_request(:head, 'https://example.com/').to_return(status: 500)
103
+ stub_request(:head, 'https://www.example.com/').to_return(status: 500)
104
+ stub_request(:head, 'http://example.com/').to_return(status: 500)
105
+ stub_request(:head, 'http://www.example.com/').to_return(status: 500)
106
+
107
+ expect(subject.up?).to be(false)
108
+ end
109
+ end
110
+
111
+ context 'www' do
112
+ it 'considers a site www when at least one endpoint is www' do
113
+ stub_request(:head, 'https://example.com/').to_return(status: 200)
114
+ stub_request(:head, 'https://www.example.com/').to_return(status: 500)
115
+ stub_request(:head, 'http://example.com/').to_return(status: 500)
116
+ stub_request(:head, 'http://www.example.com/').to_return(status: 200)
117
+
118
+ expect(subject.www?).to be(true)
119
+ end
120
+
121
+ it "doesn't consider a site www when no endpoint is www" do
122
+ stub_request(:head, 'https://example.com/').to_return(status: 200)
123
+ stub_request(:head, 'https://www.example.com/').to_return(status: 500)
124
+ stub_request(:head, 'http://example.com/').to_return(status: 200)
125
+ stub_request(:head, 'http://www.example.com/').to_return(status: 500)
126
+
127
+ expect(subject.www?).to be(false)
128
+ end
129
+ end
130
+
131
+ context 'root' do
132
+ it 'considers a domain root if you can connect without www' do
133
+ stub_request(:head, 'https://example.com/').to_return(status: 200)
134
+ stub_request(:head, 'https://www.example.com/').to_return(status: 500)
135
+ stub_request(:head, 'http://example.com/').to_return(status: 500)
136
+ stub_request(:head, 'http://www.example.com/').to_return(status: 500)
137
+
138
+ expect(subject.root?).to be(true)
139
+ end
140
+
141
+ it "doesn't call a www-only domain root" do
142
+ stub_request(:head, 'https://example.com/').to_return(status: 500)
143
+ stub_request(:head, 'https://www.example.com/').to_return(status: 200)
144
+ stub_request(:head, 'http://example.com/').to_return(status: 500)
145
+ stub_request(:head, 'http://www.example.com/').to_return(status: 200)
146
+
147
+ expect(subject.root?).to be(false)
148
+ end
149
+ end
150
+
151
+ context 'https' do
152
+ it 'knows when a domain supports https' do
153
+ stub_request(:head, 'https://example.com/').to_return(status: 200)
154
+ stub_request(:head, 'https://www.example.com/').to_return(status: 200)
155
+ stub_request(:head, 'http://example.com/').to_return(status: 200)
156
+ stub_request(:head, 'http://www.example.com/').to_return(status: 200)
157
+ allow(subject.endpoints.first.https).to receive(:valid?).and_return(true)
158
+
159
+ expect(subject.https?).to be(true)
160
+ end
161
+
162
+ it "knows when a domain doesn't support https" do
163
+ stub_request(:head, 'https://example.com/').to_return(status: 500)
164
+ stub_request(:head, 'https://www.example.com/').to_return(status: 500)
165
+ stub_request(:head, 'http://example.com/').to_return(status: 200)
166
+ stub_request(:head, 'http://www.example.com/').to_return(status: 200)
167
+
168
+ expect(subject.https?).to be(false)
169
+ end
170
+
171
+ it 'considers HTTPS inforced when no http endpoint responds' do
172
+ stub_request(:head, 'https://example.com/').to_return(status: 200)
173
+ stub_request(:head, 'https://www.example.com/').to_return(status: 500)
174
+ stub_request(:head, 'http://example.com/').to_return(status: 500)
175
+ stub_request(:head, 'http://www.example.com/').to_return(status: 500)
176
+
177
+ # expect(subject.enforces_https?).to eql(true)
178
+ end
179
+
180
+ it "doesn't consider HTTPS inforced when an http endpoint responds" do
181
+ stub_request(:head, 'https://example.com/').to_return(status: 200)
182
+ stub_request(:head, 'https://www.example.com/').to_return(status: 500)
183
+ stub_request(:head, 'http://example.com/').to_return(status: 500)
184
+ stub_request(:head, 'http://www.example.com/').to_return(status: 200)
185
+
186
+ expect(subject.enforces_https?).to be(false)
187
+ end
188
+
189
+ it 'detects when a domain downgrades to http' do
190
+ # TODO
191
+ end
192
+
193
+ it 'detects when a domain enforces https' do
194
+ # TODO
195
+ end
196
+ end
197
+
198
+ context 'canonical' do
199
+ context 'www' do
200
+ it 'detects a domain as canonically www when root is down' do
201
+ stub_request(:head, 'https://example.com/').to_return(status: 500)
202
+ stub_request(:head, 'https://www.example.com/').to_return(status: 500)
203
+ stub_request(:head, 'http://example.com/').to_return(status: 500)
204
+ stub_request(:head, 'http://www.example.com/').to_return(status: 200)
205
+
206
+ expect(subject.canonically_www?).to be(true)
207
+ end
208
+
209
+ it 'detects a domain as canonically www when root redirects' do
210
+ stub_request(:head, 'https://example.com/').to_return(status: 500)
211
+ stub_request(:head, 'https://www.example.com/').to_return(status: 500)
212
+ stub_request(:head, 'http://example.com/')
213
+ .to_return(status: 301, headers: { location: 'http://www.example.com' })
214
+ stub_request(:head, 'http://www.example.com/').to_return(status: 200)
215
+
216
+ expect(subject.canonically_www?).to be(true)
217
+ end
218
+ end
219
+
220
+ context 'https' do
221
+ it 'detects a domain as canonically https when http is down' do
222
+ stub_request(:head, 'https://example.com/').to_return(status: 200)
223
+ stub_request(:head, 'https://www.example.com/').to_return(status: 200)
224
+ stub_request(:head, 'http://example.com/').to_return(status: 500)
225
+ stub_request(:head, 'http://www.example.com/').to_return(status: 500)
226
+ allow(subject.endpoints.first.https).to receive(:valid?).and_return(true)
227
+
228
+ expect(subject.canonically_https?).to be(true)
229
+ end
230
+
231
+ it 'detects a domain as canonically https when http redirect' do
232
+ stub_request(:head, 'https://example.com/').to_return(status: 200)
233
+ stub_request(:head, 'https://www.example.com/').to_return(status: 200)
234
+ stub_request(:head, 'http://example.com/')
235
+ .to_return(status: 301, headers: { location: 'https://example.com' })
236
+ stub_request(:head, 'http://www.example.com/').to_return(status: 500)
237
+ allow(subject.endpoints.first.https).to receive(:valid?).and_return(true)
238
+
239
+ expect(subject.canonically_https?).to be(true)
240
+ end
241
+ end
242
+ end
243
+
244
+ context 'redirects' do
245
+ it 'knows when a domain redirects' do
246
+ stub_request(:head, 'https://example.com/').to_return(status: 500)
247
+ stub_request(:head, 'https://www.example.com/').to_return(status: 500)
248
+ stub_request(:head, 'http://example.com/')
249
+ .to_return(status: 301, headers: { location: 'http://foo.example.com' })
250
+ stub_request(:head, 'http://www.example.com/').to_return(status: 500)
251
+ stub_request(:head, 'http://foo.example.com/').to_return(status: 200)
252
+
253
+ expect(subject.redirect?).to be(true)
254
+ end
255
+ end
256
+
257
+ context 'hsts' do
258
+ it 'enabled' do
259
+ end
260
+
261
+ it 'subdomains' do
262
+ end
263
+
264
+ it 'preload ready' do
265
+ end
266
+ end
267
+
268
+ it 'returns the host as a string' do
269
+ expect(subject.to_s).to eql('example.com')
270
+ end
271
+ end