ronin-web-spider 0.1.0.beta2 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/spec/archive_spec.rb DELETED
@@ -1,91 +0,0 @@
1
- require 'spec_helper'
2
- require 'ronin/web/spider/archive'
3
-
4
- require 'tmpdir'
5
-
6
- describe Ronin::Web::Spider::Archive do
7
- let(:root) { File.join(Dir.mktmpdir('ronin-web-spider')) }
8
-
9
- subject { described_class.new(root) }
10
-
11
- describe "#initialize" do
12
- it "must set #root" do
13
- expect(subject.root).to eq(root)
14
- end
15
- end
16
-
17
- describe ".open" do
18
- subject { described_class.open(root) }
19
-
20
- it "must return a new #{described_class}" do
21
- expect(subject).to be_kind_of(described_class)
22
- end
23
-
24
- context "when given a block" do
25
- it "must yield the new #{described_class}" do
26
- expect { |b|
27
- described_class.open(root,&b)
28
- }.to yield_with_args(described_class)
29
- end
30
- end
31
-
32
- context "when the root directory does not exist" do
33
- let(:root) { File.join(super(),'does-not-exist-yet') }
34
-
35
- it "must create the given root directory" do
36
- described_class.open(root)
37
-
38
- expect(File.directory?(root)).to be(true)
39
- end
40
- end
41
-
42
- context "when the root directory does exist" do
43
- let(:root) { File.join(super(),'does-not-exist-yet') }
44
-
45
- before { FileUtils.mkdir(root) }
46
-
47
- it "must not raise an error" do
48
- expect {
49
- described_class.open(root)
50
- }.to_not raise_error
51
- end
52
- end
53
- end
54
-
55
- describe "#write" do
56
- let(:url) { URI('https://example.com/foo/bar.html') }
57
- let(:body) { 'test file' }
58
-
59
- before { subject.write(url,body) }
60
-
61
- it "must automatically create parent directory" do
62
- expect(File.directory?(File.join(root,'foo'))).to be(true)
63
- end
64
-
65
- it "must write the body into the file" do
66
- expect(File.read(File.join(root,'foo','bar.html'))).to eq(body)
67
- end
68
-
69
- context "when the URL has a query string" do
70
- let(:url) { URI('https://example.com/foo/bar.php?q=1') }
71
-
72
- it "must include the query string as part of the file name" do
73
- expect(File.read(File.join(root,'foo','bar.php?q=1'))).to eq(body)
74
- end
75
- end
76
-
77
- context "when the URL path ends with a '/'" do
78
- let(:url) { URI('https://example.com/foo/bar/') }
79
-
80
- it "must write the body to an index.html file within the URL's path" do
81
- expect(File.read(File.join(root,'foo','bar','index.html'))).to eq(body)
82
- end
83
- end
84
- end
85
-
86
- describe "#to_s" do
87
- it "must return the root directory" do
88
- expect(subject.to_s).to eq(root)
89
- end
90
- end
91
- end
data/spec/example_app.rb DELETED
@@ -1,27 +0,0 @@
1
- require 'rspec'
2
- require 'sinatra/base'
3
- require 'webmock/rspec'
4
-
5
- require 'ronin/web/spider/agent'
6
-
7
- RSpec.shared_context "example App" do
8
- let(:host) { 'example.com' }
9
-
10
- subject { Ronin::Web::Spider::Agent.new(host: host) }
11
-
12
- def self.app(&block)
13
- let(:app) do
14
- klass = Class.new(Sinatra::Base)
15
- klass.set :host, host
16
- klass.set :port, 80
17
- klass.class_eval(&block)
18
- return klass
19
- end
20
-
21
- before do
22
- stub_request(:any, /#{Regexp.escape(host)}/).to_rack(app)
23
-
24
- subject.start_at("http://#{host}/")
25
- end
26
- end
27
- end
@@ -1,137 +0,0 @@
1
- require 'spec_helper'
2
- require 'ronin/web/spider/git_archive'
3
-
4
- require 'tmpdir'
5
-
6
- describe Ronin::Web::Spider::GitArchive do
7
- let(:root) { File.join(Dir.mktmpdir('ronin-web-spider')) }
8
-
9
- describe ".open" do
10
- subject { described_class }
11
-
12
- context "when the root directory does not already exist" do
13
- let(:root) { File.join(Dir.tmpdir,'ronin-web-spider-new-dir') }
14
-
15
- it "must run `git init` on the new archive directory" do
16
- subject.open(root)
17
-
18
- expect(File.directory?(File.join(root,'.git'))).to be(true)
19
- end
20
-
21
- after { FileUtils.rm_r(root) }
22
- end
23
-
24
- context "when the root directory already exists" do
25
- context "but does not contain a .git directory" do
26
- it "must run `git init` within the root directory" do
27
- subject.open(root)
28
-
29
- expect(File.directory?(File.join(root,'.git'))).to be(true)
30
- end
31
- end
32
- end
33
- end
34
-
35
- subject { described_class.open(root) }
36
-
37
- describe "#git?" do
38
- subject { described_class.new(root) }
39
-
40
- context "when the archive directory contains a .git directory" do
41
- before do
42
- FileUtils.mkdir(File.join(root,'.git'))
43
- end
44
-
45
- it "must return true" do
46
- expect(subject.git?).to be(true)
47
- end
48
- end
49
-
50
- context "when the archive directory does not contains a .git directory" do
51
- it "must return false" do
52
- expect(subject.git?).to be(false)
53
- end
54
- end
55
- end
56
-
57
- describe "#init" do
58
- it "must run the 'git init' command" do
59
- expect(subject).to receive(:system).with('git','-C',root,'init').and_return(true)
60
-
61
- subject.init
62
- end
63
-
64
- context "when the 'git init' command fails" do
65
- it do
66
- allow(subject).to receive(:system).with('git','-C',root,'init').and_return(false)
67
-
68
- expect {
69
- subject.init
70
- }.to raise_error(Ronin::Web::Spider::GitError,"git command failed: git -C #{root} init")
71
- end
72
- end
73
-
74
- context "when 'git' is not installed" do
75
- it do
76
- allow(subject).to receive(:system).with('git','-C',root,'init').and_return(nil)
77
-
78
- expect {
79
- subject.init
80
- }.to raise_error(Ronin::Web::Spider::GitError,"the git command was not found")
81
- end
82
- end
83
- end
84
-
85
- describe "#write" do
86
- let(:url) { URI('https://example.com/foo/bar.html') }
87
- let(:body) { 'test file' }
88
-
89
- it "must automatically create parent directory" do
90
- subject.write(url,body)
91
-
92
- expect(File.directory?(File.join(root,'foo'))).to be(true)
93
- end
94
-
95
- it "must write the body into the file" do
96
- subject.write(url,body)
97
-
98
- expect(File.read(File.join(root,'foo','bar.html'))).to eq(body)
99
- end
100
-
101
- it "must add the file using `git add`" do
102
- absolute_path = File.join(root,'foo','bar.html')
103
-
104
- expect(subject).to receive(:system).with(
105
- 'git', '-C', root, 'add', absolute_path
106
- ).and_return(true)
107
-
108
- subject.write(url,body)
109
- end
110
- end
111
-
112
- describe "#commit" do
113
- let(:message) { 'commit message' }
114
-
115
- context "when a block is given" do
116
- it "must yield control before calling `git commit -m ...` with the commit message" do
117
- expect(subject).to receive(:system).with(
118
- 'git', '-C', root, 'commit', '-m', message
119
- ).and_return(true)
120
-
121
- expect { |b|
122
- subject.commit(message,&b)
123
- }.to yield_with_args(subject)
124
- end
125
- end
126
-
127
- context "when no block is given" do
128
- it "must not yield and call `git commit -m ...` with the commit message" do
129
- expect(subject).to receive(:system).with(
130
- 'git', '-C', root, 'commit', '-m', message
131
- ).and_return(true)
132
-
133
- subject.commit(message)
134
- end
135
- end
136
- end
137
- end
data/spec/spec_helper.rb DELETED
@@ -1,4 +0,0 @@
1
- require 'rspec'
2
- require 'simplecov'
3
-
4
- SimpleCov.start
data/spec/spider_spec.rb DELETED
@@ -1,252 +0,0 @@
1
- require 'spec_helper'
2
- require 'example_app'
3
-
4
- require 'ronin/web/spider'
5
-
6
- describe Ronin::Web::Spider do
7
- include_context "example App"
8
-
9
- describe ".start_at" do
10
- module TestAgentStartAt
11
- class ExampleApp < Sinatra::Base
12
-
13
- set :host, 'example.com'
14
- set :port, 80
15
-
16
- get '/' do
17
- '<html><body>should not get here</body></html>'
18
- end
19
-
20
- get '/entry-point' do
21
- <<~HTML
22
- <html>
23
- <body>
24
- <a href="/link1">link1</a>
25
- <a href="http://other.com/offsite-link">offsite link</a>
26
- <a href="/link2">link2</a>
27
- </body>
28
- </html>
29
- HTML
30
- end
31
-
32
- get '/link1' do
33
- '<html><body>got here</body></html>'
34
- end
35
-
36
- get '/link2' do
37
- '<html><body>got here</body></html>'
38
- end
39
- end
40
-
41
- class OtherApp < Sinatra::Base
42
-
43
- set :host, 'other.com'
44
- set :port, 80
45
-
46
- get '/offsite-link' do
47
- '<html><body>should not get here</body></html>'
48
- end
49
-
50
- end
51
- end
52
-
53
- subject { described_class }
54
-
55
- let(:host) { 'example.com' }
56
- let(:other_host) { 'other.com' }
57
- let(:url) { URI("http://#{host}/entry-point") }
58
-
59
- let(:app) { TestAgentStartAt::ExampleApp }
60
- let(:other_app) { TestAgentStartAt::OtherApp }
61
-
62
- before do
63
- stub_request(:any, /#{Regexp.escape(host)}/).to_rack(app)
64
- stub_request(:any, /#{Regexp.escape(other_host)}/).to_rack(other_app)
65
- end
66
-
67
- it "must spider the website starting at the given URL" do
68
- agent = subject.start_at(url)
69
-
70
- expect(agent.history).to be == Set[
71
- URI("http://#{host}/entry-point"),
72
- URI("http://#{host}/link1"),
73
- URI("http://#{other_host}/offsite-link"),
74
- URI("http://#{host}/link2")
75
- ]
76
- end
77
- end
78
-
79
- describe ".site" do
80
- module TestAgentSite
81
- class ExampleApp < Sinatra::Base
82
-
83
- set :host, 'example.com'
84
- set :port, 80
85
-
86
- get '/' do
87
- '<html><body>should not get here</body></html>'
88
- end
89
-
90
- get '/entry-point' do
91
- <<~HTML
92
- <html>
93
- <body>
94
- <a href="/link1">link1</a>
95
- <a href="http://other.com/offsite-link">offsite link</a>
96
- <a href="/link2">link2</a>
97
- </body>
98
- </html>
99
- HTML
100
- end
101
-
102
- get '/link1' do
103
- '<html><body>got here</body></html>'
104
- end
105
-
106
- get '/link2' do
107
- '<html><body>got here</body></html>'
108
- end
109
-
110
- end
111
- end
112
-
113
- subject { described_class }
114
-
115
- let(:host) { 'example.com' }
116
- let(:url) { URI("http://#{host}/entry-point") }
117
-
118
- let(:app) { TestAgentSite::ExampleApp }
119
-
120
- before do
121
- stub_request(:any, /#{Regexp.escape(host)}/).to_rack(app)
122
- end
123
-
124
- it "must spider the website starting at the given URL" do
125
- agent = subject.site(url)
126
-
127
- expect(agent.history).to be == Set[
128
- URI("http://#{host}/entry-point"),
129
- URI("http://#{host}/link1"),
130
- URI("http://#{host}/link2")
131
- ]
132
- end
133
- end
134
-
135
- describe ".host" do
136
- module TestAgentHost
137
- class ExampleApp < Sinatra::Base
138
-
139
- set :host, 'example.com'
140
- set :port, 80
141
-
142
- get '/' do
143
- <<~HTML
144
- <html>
145
- <body>
146
- <a href="/link1">link1</a>
147
- <a href="http://other.com/offsite-link">offsite link</a>
148
- <a href="/link2">link2</a>
149
- </body>
150
- </html>
151
- HTML
152
- end
153
-
154
- get '/link1' do
155
- '<html><body>got here</body></html>'
156
- end
157
-
158
- get '/link2' do
159
- '<html><body>got here</body></html>'
160
- end
161
-
162
- end
163
- end
164
-
165
- subject { described_class }
166
-
167
- let(:host) { 'example.com' }
168
- let(:app) { TestAgentHost::ExampleApp }
169
-
170
- before do
171
- stub_request(:any, /#{Regexp.escape(host)}/).to_rack(app)
172
- end
173
-
174
- it "must spider the website starting at the given URL" do
175
- agent = subject.host(host)
176
-
177
- # XXX: for some reason Set#== was returning false, so convert to an Array
178
- expect(agent.history.to_a).to be == [
179
- URI("http://#{host}/"),
180
- URI("http://#{host}/link1"),
181
- URI("http://#{host}/link2")
182
- ]
183
- end
184
- end
185
-
186
- describe ".domain" do
187
- module TestAgentDomain
188
- class ExampleApp < Sinatra::Base
189
-
190
- set :host, 'example.com'
191
- set :port, 80
192
-
193
- get '/' do
194
- <<~HTML
195
- <html>
196
- <body>
197
- <a href="/link1">link1</a>
198
- <a href="http://sub.example.com/subdomain-link">subdomain link</a>
199
- <a href="/link2">link2</a>
200
- </body>
201
- </html>
202
- HTML
203
- end
204
-
205
- get '/link1' do
206
- '<html><body>got here</body></html>'
207
- end
208
-
209
- get '/link2' do
210
- '<html><body>got here</body></html>'
211
- end
212
-
213
- end
214
-
215
- class SubDomainApp < Sinatra::Base
216
-
217
- set :host, 'sub.example.com'
218
- set :port, 80
219
-
220
- get '/subdomain-link' do
221
- '<html><body>should get here</body></html>'
222
- end
223
-
224
- end
225
- end
226
-
227
- subject { described_class }
228
-
229
- let(:domain) { 'example.com' }
230
- let(:domain_app) { TestAgentDomain::ExampleApp }
231
-
232
- let(:subdomain) { 'sub.example.com' }
233
- let(:subdomain_app) { TestAgentDomain::SubDomainApp }
234
-
235
- before do
236
- stub_request(:any, /#{Regexp.escape(subdomain)}/).to_rack(subdomain_app)
237
- stub_request(:any, /#{Regexp.escape(domain)}/).to_rack(domain_app)
238
- end
239
-
240
- it "must spider the domain and subdomains starting at the given domain" do
241
- agent = subject.domain(domain)
242
-
243
- # XXX: for some reason Set#== was returning false, so convert to an Array
244
- expect(agent.history.to_a).to be == [
245
- URI("http://#{domain}/"),
246
- URI("http://#{domain}/link1"),
247
- URI("http://#{subdomain}/subdomain-link"),
248
- URI("http://#{domain}/link2")
249
- ]
250
- end
251
- end
252
- end