site-inspector 1.0.2 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. checksums.yaml +5 -5
  2. data/.gitignore +8 -0
  3. data/.rubocop.yml +42 -0
  4. data/.rubocop_todo.yml +139 -0
  5. data/.ruby-version +1 -0
  6. data/.travis.yml +9 -0
  7. data/Gemfile +7 -0
  8. data/Guardfile +10 -0
  9. data/README.md +189 -0
  10. data/Rakefile +10 -0
  11. data/bin/site-inspector +50 -22
  12. data/lib/cliver/dependency_ext.rb +24 -0
  13. data/lib/site-inspector.rb +62 -615
  14. data/lib/site-inspector/cache.rb +10 -51
  15. data/lib/site-inspector/checks/accessibility.rb +135 -0
  16. data/lib/site-inspector/checks/check.rb +54 -0
  17. data/lib/site-inspector/checks/content.rb +85 -0
  18. data/lib/site-inspector/checks/cookies.rb +45 -0
  19. data/lib/site-inspector/checks/dns.rb +138 -0
  20. data/lib/site-inspector/checks/headers.rb +68 -0
  21. data/lib/site-inspector/checks/hsts.rb +81 -0
  22. data/lib/site-inspector/checks/https.rb +40 -0
  23. data/lib/site-inspector/checks/sniffer.rb +67 -0
  24. data/lib/site-inspector/checks/wappalyzer.rb +62 -0
  25. data/lib/site-inspector/checks/whois.rb +36 -0
  26. data/lib/site-inspector/disk_cache.rb +42 -0
  27. data/lib/site-inspector/domain.rb +271 -0
  28. data/lib/site-inspector/endpoint.rb +217 -0
  29. data/lib/site-inspector/rails_cache.rb +13 -0
  30. data/lib/site-inspector/version.rb +5 -0
  31. data/package-lock.json +505 -0
  32. data/package.json +23 -0
  33. data/script/bootstrap +2 -0
  34. data/script/cibuild +11 -0
  35. data/script/console +3 -0
  36. data/script/pa11y-version +10 -0
  37. data/script/release +38 -0
  38. data/site-inspector.gemspec +42 -0
  39. data/spec/checks/site_inspector_endpoint_accessibility_spec.rb +84 -0
  40. data/spec/checks/site_inspector_endpoint_check_spec.rb +42 -0
  41. data/spec/checks/site_inspector_endpoint_content_spec.rb +117 -0
  42. data/spec/checks/site_inspector_endpoint_cookies_spec.rb +73 -0
  43. data/spec/checks/site_inspector_endpoint_dns_spec.rb +184 -0
  44. data/spec/checks/site_inspector_endpoint_headers_spec.rb +65 -0
  45. data/spec/checks/site_inspector_endpoint_hsts_spec.rb +92 -0
  46. data/spec/checks/site_inspector_endpoint_https_spec.rb +49 -0
  47. data/spec/checks/site_inspector_endpoint_sniffer_spec.rb +150 -0
  48. data/spec/checks/site_inspector_endpoint_wappalyzer_spec.rb +34 -0
  49. data/spec/checks/site_inspector_endpoint_whois_spec.rb +26 -0
  50. data/spec/fixtures/wappalyzer.json +125 -0
  51. data/spec/site_inspector_cache_spec.rb +15 -0
  52. data/spec/site_inspector_disk_cache_spec.rb +39 -0
  53. data/spec/site_inspector_domain_spec.rb +271 -0
  54. data/spec/site_inspector_endpoint_spec.rb +252 -0
  55. data/spec/site_inspector_spec.rb +48 -0
  56. data/spec/spec_helper.rb +19 -0
  57. metadata +204 -63
  58. data/lib/site-inspector/compliance.rb +0 -19
  59. data/lib/site-inspector/dns.rb +0 -92
  60. data/lib/site-inspector/headers.rb +0 -59
  61. data/lib/site-inspector/sniffer.rb +0 -26
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ describe SiteInspector::Endpoint::Wappalyzer do
6
+ subject { described_class.new(endpoint) }
7
+
8
+ let(:domain) { 'http://ben.balter.com.com' }
9
+ let(:endpoint) { SiteInspector::Endpoint.new(domain) }
10
+ let(:url) { "https://api.wappalyzer.com/lookup/v2/?urls=#{domain}/" }
11
+
12
+ before do
13
+ path = File.expand_path '../fixtures/wappalyzer.json', __dir__
14
+ body = File.read path
15
+ stub_request(:get, url).to_return(status: 200, body: body)
16
+ end
17
+
18
+ it 'returns the API response' do
19
+ expected = {
20
+ 'Analytics' => ['Google Analytics'],
21
+ 'CDN' => %w[Cloudflare Fastly],
22
+ 'Caching' => ['Varnish'],
23
+ 'Other' => %w[Disqus Jekyll],
24
+ 'PaaS' => ['GitHub Pages'],
25
+ 'Web frameworks' => ['Ruby on Rails']
26
+ }
27
+ expect(subject.to_h).to eql(expected)
28
+ end
29
+
30
+ it 'fails gracefully' do
31
+ stub_request(:get, url).to_return(status: 400, body: '')
32
+ expect(subject.to_h).to eql({})
33
+ end
34
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ describe SiteInspector::Endpoint::Whois do
6
+ subject do
7
+ stub_request(:head, site).to_return(status: 200)
8
+ endpoint = SiteInspector::Endpoint.new(site)
9
+ described_class.new(endpoint)
10
+ end
11
+
12
+ let(:site) { 'https://example.com' }
13
+
14
+ it 'returns the whois for the IP' do
15
+ expect(subject.ip).to match(/Derrick Sawyer/)
16
+ end
17
+
18
+ it 'returns the whois for the domain' do
19
+ expect(subject.domain).to match(/Domain Name: EXAMPLE\.COM/)
20
+ end
21
+
22
+ it 'returns the hash' do
23
+ expect(subject.to_h[:domain].keys.first).to eql('Domain Name')
24
+ expect(subject.to_h[:domain].values.first).to eql('EXAMPLE.COM')
25
+ end
26
+ end
@@ -0,0 +1,125 @@
1
+ [
2
+ {
3
+ "url":"https://ben.balter.com",
4
+ "technologies":[
5
+ {
6
+ "slug":"cloudflare",
7
+ "name":"Cloudflare",
8
+ "versions":[
9
+
10
+ ],
11
+ "trafficRank":11,
12
+ "categories":[
13
+ {
14
+ "id":31,
15
+ "slug":"cdn",
16
+ "name":"CDN"
17
+ }
18
+ ]
19
+ },
20
+ {
21
+ "slug":"varnish",
22
+ "name":"Varnish",
23
+ "versions":[
24
+
25
+ ],
26
+ "trafficRank":11,
27
+ "categories":[
28
+ {
29
+ "id":23,
30
+ "slug":"caching",
31
+ "name":"Caching"
32
+ }
33
+ ]
34
+ },
35
+ {
36
+ "slug":"disqus",
37
+ "name":"Disqus",
38
+ "versions":[
39
+
40
+ ],
41
+ "trafficRank":11,
42
+ "categories":[
43
+
44
+ ]
45
+ },
46
+ {
47
+ "slug":"google-analytics",
48
+ "name":"Google Analytics",
49
+ "versions":[
50
+
51
+ ],
52
+ "trafficRank":11,
53
+ "categories":[
54
+ {
55
+ "id":10,
56
+ "slug":"analytics",
57
+ "name":"Analytics"
58
+ },
59
+ {
60
+ "id":61,
61
+ "slug":"saas",
62
+ "name":"SaaS"
63
+ }
64
+ ]
65
+ },
66
+ {
67
+ "slug":"jekyll",
68
+ "name":"Jekyll",
69
+ "versions":[
70
+ "v3.9.0"
71
+ ],
72
+ "trafficRank":11,
73
+ "categories":[
74
+
75
+ ]
76
+ },
77
+ {
78
+ "slug":"ruby-on-rails",
79
+ "name":"Ruby on Rails",
80
+ "versions":[
81
+
82
+ ],
83
+ "trafficRank":11,
84
+ "categories":[
85
+ {
86
+ "id":18,
87
+ "slug":"web-frameworks",
88
+ "name":"Web frameworks"
89
+ }
90
+ ]
91
+ },
92
+ {
93
+ "slug":"fastly",
94
+ "name":"Fastly",
95
+ "versions":[
96
+
97
+ ],
98
+ "trafficRank":11,
99
+ "categories":[
100
+ {
101
+ "id":31,
102
+ "slug":"cdn",
103
+ "name":"CDN"
104
+ }
105
+ ]
106
+ },
107
+ {
108
+ "slug":"github-pages",
109
+ "name":"GitHub Pages",
110
+ "versions":[
111
+
112
+ ],
113
+ "trafficRank":11,
114
+ "categories":[
115
+ {
116
+ "id":62,
117
+ "slug":"paas",
118
+ "name":"PaaS"
119
+ }
120
+ ]
121
+ }
122
+ ],
123
+ "crawl":true
124
+ }
125
+ ]
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ describe SiteInspector::Cache do
6
+ it 'stores a cache value' do
7
+ subject.set 'foo', 'bar'
8
+ expect(subject.instance_variable_get('@memory')['foo']).to eql('bar')
9
+ end
10
+
11
+ it 'retrieves values from the cache' do
12
+ subject.instance_variable_set('@memory', 'foo' => 'bar')
13
+ expect(subject.get('foo')).to eql('bar')
14
+ end
15
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ describe SiteInspector::DiskCache do
6
+ subject { described_class.new(tmpdir) }
7
+
8
+ before do
9
+ FileUtils.rm_rf(tmpdir)
10
+ Dir.mkdir(tmpdir)
11
+ end
12
+
13
+ it 'writes a value to disk' do
14
+ foo = Typhoeus::Request.new('foo')
15
+
16
+ path = File.expand_path foo.cache_key, tmpdir
17
+ expect(File.exist?(path)).to be(false)
18
+
19
+ subject.set foo, 'bar'
20
+
21
+ expect(File.exist?(path)).to be(true)
22
+ expect(File.open(path).read).to eql("I\"bar:ET")
23
+ end
24
+
25
+ it 'reads a value from disk' do
26
+ foo = Typhoeus::Request.new('foo')
27
+
28
+ path = File.expand_path foo.cache_key, tmpdir
29
+ File.write(path, "I\"bar:ET")
30
+ expect(subject.get(foo)).to eql('bar')
31
+ end
32
+
33
+ it "calculates a file's path" do
34
+ foo = Typhoeus::Request.new('foo')
35
+
36
+ path = File.expand_path foo.cache_key, tmpdir
37
+ expect(subject.send(:path, foo)).to eql(path)
38
+ end
39
+ end
@@ -0,0 +1,271 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ describe SiteInspector::Domain do
6
+ subject { described_class.new('example.com') }
7
+
8
+ context 'domain parsing' do
9
+ it 'downcases the domain' do
10
+ domain = described_class.new('EXAMPLE.com')
11
+ expect(domain.host).to eql('example.com')
12
+ end
13
+
14
+ it 'strips http from the domain' do
15
+ domain = described_class.new('http://example.com')
16
+ expect(domain.host).to eql('example.com')
17
+ end
18
+
19
+ it 'strips https from the domain' do
20
+ domain = described_class.new('https://example.com')
21
+ expect(domain.host).to eql('example.com')
22
+ end
23
+
24
+ it 'strips www from the domain' do
25
+ domain = described_class.new('www.example.com')
26
+ expect(domain.host).to eql('example.com')
27
+ end
28
+
29
+ it 'strips http://www from the domain' do
30
+ domain = described_class.new('http://www.example.com')
31
+ expect(domain.host).to eql('example.com')
32
+ end
33
+
34
+ it 'strips paths from the domain' do
35
+ domain = described_class.new('http://www.example.com/foo')
36
+ expect(domain.host).to eql('example.com')
37
+ end
38
+
39
+ it 'strips trailing slashes from the domain' do
40
+ domain = described_class.new('http://www.example.com/')
41
+ expect(domain.host).to eql('example.com')
42
+ end
43
+ end
44
+
45
+ context 'endpoints' do
46
+ it 'generates the endpoints' do
47
+ endpoints = subject.endpoints
48
+ expect(endpoints.count).to be(4)
49
+ expect(endpoints[0].to_s).to eql('https://example.com/')
50
+ expect(endpoints[1].to_s).to eql('https://www.example.com/')
51
+ expect(endpoints[2].to_s).to eql('http://example.com/')
52
+ expect(endpoints[3].to_s).to eql('http://www.example.com/')
53
+ end
54
+ end
55
+
56
+ it 'knows the canonical domain' do
57
+ stub_request(:head, 'https://example.com/').to_return(status: 500)
58
+ stub_request(:head, 'https://www.example.com/').to_return(status: 500)
59
+ stub_request(:head, 'http://www.example.com/').to_return(status: 200)
60
+ stub_request(:head, 'http://example.com/').to_return(status: 200)
61
+ expect(subject.canonical_endpoint.to_s).to eql('http://example.com/')
62
+ end
63
+
64
+ it 'knows if a domain is a government domain' do
65
+ expect(subject.government?).to be(false)
66
+
67
+ domain = described_class.new('whitehouse.gov')
68
+ expect(domain.government?).to be(true)
69
+ end
70
+
71
+ context 'up' do
72
+ it 'considers a domain up if at least one endpoint is up' do
73
+ subject.endpoints.each do |endpoint|
74
+ allow(endpoint).to receive(:response) { Typhoeus::Response.new(code: 0) } unless endpoint.uri.to_s.start_with?('http://www')
75
+ end
76
+
77
+ stub_request(:head, 'http://www.example.com/').to_return(status: 200)
78
+
79
+ expect(subject.up?).to be(true)
80
+ end
81
+
82
+ it "doesn't consider a domain up when all endpoints are down" do
83
+ subject.endpoints.each do |endpoint|
84
+ allow(endpoint).to receive(:response) { Typhoeus::Response.new(code: 0) }
85
+ end
86
+
87
+ expect(subject.up?).to be(false)
88
+ end
89
+ end
90
+
91
+ context 'up' do
92
+ it 'considers a domain up if at least one endpoint is up' do
93
+ stub_request(:head, 'https://example.com/').to_return(status: 500)
94
+ stub_request(:head, 'https://www.example.com/').to_return(status: 500)
95
+ stub_request(:head, 'http://example.com/').to_return(status: 500)
96
+ stub_request(:head, 'http://www.example.com/').to_return(status: 200)
97
+
98
+ expect(subject.up?).to be(true)
99
+ end
100
+
101
+ it "doesn't consider a domain up if all endpoints are down" do
102
+ stub_request(:head, 'https://example.com/').to_return(status: 500)
103
+ stub_request(:head, 'https://www.example.com/').to_return(status: 500)
104
+ stub_request(:head, 'http://example.com/').to_return(status: 500)
105
+ stub_request(:head, 'http://www.example.com/').to_return(status: 500)
106
+
107
+ expect(subject.up?).to be(false)
108
+ end
109
+ end
110
+
111
+ context 'www' do
112
+ it 'considers a site www when at least one endpoint is www' do
113
+ stub_request(:head, 'https://example.com/').to_return(status: 200)
114
+ stub_request(:head, 'https://www.example.com/').to_return(status: 500)
115
+ stub_request(:head, 'http://example.com/').to_return(status: 500)
116
+ stub_request(:head, 'http://www.example.com/').to_return(status: 200)
117
+
118
+ expect(subject.www?).to be(true)
119
+ end
120
+
121
+ it "doesn't consider a site www when no endpoint is www" do
122
+ stub_request(:head, 'https://example.com/').to_return(status: 200)
123
+ stub_request(:head, 'https://www.example.com/').to_return(status: 500)
124
+ stub_request(:head, 'http://example.com/').to_return(status: 200)
125
+ stub_request(:head, 'http://www.example.com/').to_return(status: 500)
126
+
127
+ expect(subject.www?).to be(false)
128
+ end
129
+ end
130
+
131
+ context 'root' do
132
+ it 'considers a domain root if you can connect without www' do
133
+ stub_request(:head, 'https://example.com/').to_return(status: 200)
134
+ stub_request(:head, 'https://www.example.com/').to_return(status: 500)
135
+ stub_request(:head, 'http://example.com/').to_return(status: 500)
136
+ stub_request(:head, 'http://www.example.com/').to_return(status: 500)
137
+
138
+ expect(subject.root?).to be(true)
139
+ end
140
+
141
+ it "doesn't call a www-only domain root" do
142
+ stub_request(:head, 'https://example.com/').to_return(status: 500)
143
+ stub_request(:head, 'https://www.example.com/').to_return(status: 200)
144
+ stub_request(:head, 'http://example.com/').to_return(status: 500)
145
+ stub_request(:head, 'http://www.example.com/').to_return(status: 200)
146
+
147
+ expect(subject.root?).to be(false)
148
+ end
149
+ end
150
+
151
+ context 'https' do
152
+ it 'knows when a domain supports https' do
153
+ stub_request(:head, 'https://example.com/').to_return(status: 200)
154
+ stub_request(:head, 'https://www.example.com/').to_return(status: 200)
155
+ stub_request(:head, 'http://example.com/').to_return(status: 200)
156
+ stub_request(:head, 'http://www.example.com/').to_return(status: 200)
157
+ allow(subject.endpoints.first.https).to receive(:valid?).and_return(true)
158
+
159
+ expect(subject.https?).to be(true)
160
+ end
161
+
162
+ it "knows when a domain doesn't support https" do
163
+ stub_request(:head, 'https://example.com/').to_return(status: 500)
164
+ stub_request(:head, 'https://www.example.com/').to_return(status: 500)
165
+ stub_request(:head, 'http://example.com/').to_return(status: 200)
166
+ stub_request(:head, 'http://www.example.com/').to_return(status: 200)
167
+
168
+ expect(subject.https?).to be(false)
169
+ end
170
+
171
+ it 'considers HTTPS inforced when no http endpoint responds' do
172
+ stub_request(:head, 'https://example.com/').to_return(status: 200)
173
+ stub_request(:head, 'https://www.example.com/').to_return(status: 500)
174
+ stub_request(:head, 'http://example.com/').to_return(status: 500)
175
+ stub_request(:head, 'http://www.example.com/').to_return(status: 500)
176
+
177
+ # expect(subject.enforces_https?).to eql(true)
178
+ end
179
+
180
+ it "doesn't consider HTTPS inforced when an http endpoint responds" do
181
+ stub_request(:head, 'https://example.com/').to_return(status: 200)
182
+ stub_request(:head, 'https://www.example.com/').to_return(status: 500)
183
+ stub_request(:head, 'http://example.com/').to_return(status: 500)
184
+ stub_request(:head, 'http://www.example.com/').to_return(status: 200)
185
+
186
+ expect(subject.enforces_https?).to be(false)
187
+ end
188
+
189
+ it 'detects when a domain downgrades to http' do
190
+ # TODO
191
+ end
192
+
193
+ it 'detects when a domain enforces https' do
194
+ # TODO
195
+ end
196
+ end
197
+
198
+ context 'canonical' do
199
+ context 'www' do
200
+ it 'detects a domain as canonically www when root is down' do
201
+ stub_request(:head, 'https://example.com/').to_return(status: 500)
202
+ stub_request(:head, 'https://www.example.com/').to_return(status: 500)
203
+ stub_request(:head, 'http://example.com/').to_return(status: 500)
204
+ stub_request(:head, 'http://www.example.com/').to_return(status: 200)
205
+
206
+ expect(subject.canonically_www?).to be(true)
207
+ end
208
+
209
+ it 'detects a domain as canonically www when root redirects' do
210
+ stub_request(:head, 'https://example.com/').to_return(status: 500)
211
+ stub_request(:head, 'https://www.example.com/').to_return(status: 500)
212
+ stub_request(:head, 'http://example.com/')
213
+ .to_return(status: 301, headers: { location: 'http://www.example.com' })
214
+ stub_request(:head, 'http://www.example.com/').to_return(status: 200)
215
+
216
+ expect(subject.canonically_www?).to be(true)
217
+ end
218
+ end
219
+
220
+ context 'https' do
221
+ it 'detects a domain as canonically https when http is down' do
222
+ stub_request(:head, 'https://example.com/').to_return(status: 200)
223
+ stub_request(:head, 'https://www.example.com/').to_return(status: 200)
224
+ stub_request(:head, 'http://example.com/').to_return(status: 500)
225
+ stub_request(:head, 'http://www.example.com/').to_return(status: 500)
226
+ allow(subject.endpoints.first.https).to receive(:valid?).and_return(true)
227
+
228
+ expect(subject.canonically_https?).to be(true)
229
+ end
230
+
231
+ it 'detects a domain as canonically https when http redirect' do
232
+ stub_request(:head, 'https://example.com/').to_return(status: 200)
233
+ stub_request(:head, 'https://www.example.com/').to_return(status: 200)
234
+ stub_request(:head, 'http://example.com/')
235
+ .to_return(status: 301, headers: { location: 'https://example.com' })
236
+ stub_request(:head, 'http://www.example.com/').to_return(status: 500)
237
+ allow(subject.endpoints.first.https).to receive(:valid?).and_return(true)
238
+
239
+ expect(subject.canonically_https?).to be(true)
240
+ end
241
+ end
242
+ end
243
+
244
+ context 'redirects' do
245
+ it 'knows when a domain redirects' do
246
+ stub_request(:head, 'https://example.com/').to_return(status: 500)
247
+ stub_request(:head, 'https://www.example.com/').to_return(status: 500)
248
+ stub_request(:head, 'http://example.com/')
249
+ .to_return(status: 301, headers: { location: 'http://foo.example.com' })
250
+ stub_request(:head, 'http://www.example.com/').to_return(status: 500)
251
+ stub_request(:head, 'http://foo.example.com/').to_return(status: 200)
252
+
253
+ expect(subject.redirect?).to be(true)
254
+ end
255
+ end
256
+
257
+ context 'hsts' do
258
+ it 'enabled' do
259
+ end
260
+
261
+ it 'subdomains' do
262
+ end
263
+
264
+ it 'preload ready' do
265
+ end
266
+ end
267
+
268
+ it 'returns the host as a string' do
269
+ expect(subject.to_s).to eql('example.com')
270
+ end
271
+ end