ronin-web-spider 0.1.0.beta2 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,137 +0,0 @@
1
- require 'spec_helper'
2
- require 'ronin/web/spider/git_archive'
3
-
4
- require 'tmpdir'
5
-
6
- describe Ronin::Web::Spider::GitArchive do
7
- let(:root) { File.join(Dir.mktmpdir('ronin-web-spider')) }
8
-
9
- describe ".open" do
10
- subject { described_class }
11
-
12
- context "when the root directory does not already exist" do
13
- let(:root) { File.join(Dir.tmpdir,'ronin-web-spider-new-dir') }
14
-
15
- it "must run `git init` on the new archive directory" do
16
- subject.open(root)
17
-
18
- expect(File.directory?(File.join(root,'.git'))).to be(true)
19
- end
20
-
21
- after { FileUtils.rm_r(root) }
22
- end
23
-
24
- context "when the root directory already exists" do
25
- context "but does not contain a .git directory" do
26
- it "must run `git init` within the root directory" do
27
- subject.open(root)
28
-
29
- expect(File.directory?(File.join(root,'.git'))).to be(true)
30
- end
31
- end
32
- end
33
- end
34
-
35
- subject { described_class.open(root) }
36
-
37
- describe "#git?" do
38
- subject { described_class.new(root) }
39
-
40
- context "when the archive directory contains a .git directory" do
41
- before do
42
- FileUtils.mkdir(File.join(root,'.git'))
43
- end
44
-
45
- it "must return true" do
46
- expect(subject.git?).to be(true)
47
- end
48
- end
49
-
50
- context "when the archive directory does not contains a .git directory" do
51
- it "must return false" do
52
- expect(subject.git?).to be(false)
53
- end
54
- end
55
- end
56
-
57
- describe "#init" do
58
- it "must run the 'git init' command" do
59
- expect(subject).to receive(:system).with('git','-C',root,'init').and_return(true)
60
-
61
- subject.init
62
- end
63
-
64
- context "when the 'git init' command fails" do
65
- it do
66
- allow(subject).to receive(:system).with('git','-C',root,'init').and_return(false)
67
-
68
- expect {
69
- subject.init
70
- }.to raise_error(Ronin::Web::Spider::GitError,"git command failed: git -C #{root} init")
71
- end
72
- end
73
-
74
- context "when 'git' is not installed" do
75
- it do
76
- allow(subject).to receive(:system).with('git','-C',root,'init').and_return(nil)
77
-
78
- expect {
79
- subject.init
80
- }.to raise_error(Ronin::Web::Spider::GitError,"the git command was not found")
81
- end
82
- end
83
- end
84
-
85
- describe "#write" do
86
- let(:url) { URI('https://example.com/foo/bar.html') }
87
- let(:body) { 'test file' }
88
-
89
- it "must automatically create parent directory" do
90
- subject.write(url,body)
91
-
92
- expect(File.directory?(File.join(root,'foo'))).to be(true)
93
- end
94
-
95
- it "must write the body into the file" do
96
- subject.write(url,body)
97
-
98
- expect(File.read(File.join(root,'foo','bar.html'))).to eq(body)
99
- end
100
-
101
- it "must add the file using `git add`" do
102
- absolute_path = File.join(root,'foo','bar.html')
103
-
104
- expect(subject).to receive(:system).with(
105
- 'git', '-C', root, 'add', absolute_path
106
- ).and_return(true)
107
-
108
- subject.write(url,body)
109
- end
110
- end
111
-
112
- describe "#commit" do
113
- let(:message) { 'commit message' }
114
-
115
- context "when a block is given" do
116
- it "must yield control before calling `git commit -m ...` with the commit message" do
117
- expect(subject).to receive(:system).with(
118
- 'git', '-C', root, 'commit', '-m', message
119
- ).and_return(true)
120
-
121
- expect { |b|
122
- subject.commit(message,&b)
123
- }.to yield_with_args(subject)
124
- end
125
- end
126
-
127
- context "when no block is given" do
128
- it "must not yield and call `git commit -m ...` with the commit message" do
129
- expect(subject).to receive(:system).with(
130
- 'git', '-C', root, 'commit', '-m', message
131
- ).and_return(true)
132
-
133
- subject.commit(message)
134
- end
135
- end
136
- end
137
- end
data/spec/spec_helper.rb DELETED
@@ -1,4 +0,0 @@
1
- require 'rspec'
2
- require 'simplecov'
3
-
4
- SimpleCov.start
data/spec/spider_spec.rb DELETED
@@ -1,252 +0,0 @@
1
- require 'spec_helper'
2
- require 'example_app'
3
-
4
- require 'ronin/web/spider'
5
-
6
- describe Ronin::Web::Spider do
7
- include_context "example App"
8
-
9
- describe ".start_at" do
10
- module TestAgentStartAt
11
- class ExampleApp < Sinatra::Base
12
-
13
- set :host, 'example.com'
14
- set :port, 80
15
-
16
- get '/' do
17
- '<html><body>should not get here</body></html>'
18
- end
19
-
20
- get '/entry-point' do
21
- <<~HTML
22
- <html>
23
- <body>
24
- <a href="/link1">link1</a>
25
- <a href="http://other.com/offsite-link">offsite link</a>
26
- <a href="/link2">link2</a>
27
- </body>
28
- </html>
29
- HTML
30
- end
31
-
32
- get '/link1' do
33
- '<html><body>got here</body></html>'
34
- end
35
-
36
- get '/link2' do
37
- '<html><body>got here</body></html>'
38
- end
39
- end
40
-
41
- class OtherApp < Sinatra::Base
42
-
43
- set :host, 'other.com'
44
- set :port, 80
45
-
46
- get '/offsite-link' do
47
- '<html><body>should not get here</body></html>'
48
- end
49
-
50
- end
51
- end
52
-
53
- subject { described_class }
54
-
55
- let(:host) { 'example.com' }
56
- let(:other_host) { 'other.com' }
57
- let(:url) { URI("http://#{host}/entry-point") }
58
-
59
- let(:app) { TestAgentStartAt::ExampleApp }
60
- let(:other_app) { TestAgentStartAt::OtherApp }
61
-
62
- before do
63
- stub_request(:any, /#{Regexp.escape(host)}/).to_rack(app)
64
- stub_request(:any, /#{Regexp.escape(other_host)}/).to_rack(other_app)
65
- end
66
-
67
- it "must spider the website starting at the given URL" do
68
- agent = subject.start_at(url)
69
-
70
- expect(agent.history).to be == Set[
71
- URI("http://#{host}/entry-point"),
72
- URI("http://#{host}/link1"),
73
- URI("http://#{other_host}/offsite-link"),
74
- URI("http://#{host}/link2")
75
- ]
76
- end
77
- end
78
-
79
- describe ".site" do
80
- module TestAgentSite
81
- class ExampleApp < Sinatra::Base
82
-
83
- set :host, 'example.com'
84
- set :port, 80
85
-
86
- get '/' do
87
- '<html><body>should not get here</body></html>'
88
- end
89
-
90
- get '/entry-point' do
91
- <<~HTML
92
- <html>
93
- <body>
94
- <a href="/link1">link1</a>
95
- <a href="http://other.com/offsite-link">offsite link</a>
96
- <a href="/link2">link2</a>
97
- </body>
98
- </html>
99
- HTML
100
- end
101
-
102
- get '/link1' do
103
- '<html><body>got here</body></html>'
104
- end
105
-
106
- get '/link2' do
107
- '<html><body>got here</body></html>'
108
- end
109
-
110
- end
111
- end
112
-
113
- subject { described_class }
114
-
115
- let(:host) { 'example.com' }
116
- let(:url) { URI("http://#{host}/entry-point") }
117
-
118
- let(:app) { TestAgentSite::ExampleApp }
119
-
120
- before do
121
- stub_request(:any, /#{Regexp.escape(host)}/).to_rack(app)
122
- end
123
-
124
- it "must spider the website starting at the given URL" do
125
- agent = subject.site(url)
126
-
127
- expect(agent.history).to be == Set[
128
- URI("http://#{host}/entry-point"),
129
- URI("http://#{host}/link1"),
130
- URI("http://#{host}/link2")
131
- ]
132
- end
133
- end
134
-
135
- describe ".host" do
136
- module TestAgentHost
137
- class ExampleApp < Sinatra::Base
138
-
139
- set :host, 'example.com'
140
- set :port, 80
141
-
142
- get '/' do
143
- <<~HTML
144
- <html>
145
- <body>
146
- <a href="/link1">link1</a>
147
- <a href="http://other.com/offsite-link">offsite link</a>
148
- <a href="/link2">link2</a>
149
- </body>
150
- </html>
151
- HTML
152
- end
153
-
154
- get '/link1' do
155
- '<html><body>got here</body></html>'
156
- end
157
-
158
- get '/link2' do
159
- '<html><body>got here</body></html>'
160
- end
161
-
162
- end
163
- end
164
-
165
- subject { described_class }
166
-
167
- let(:host) { 'example.com' }
168
- let(:app) { TestAgentHost::ExampleApp }
169
-
170
- before do
171
- stub_request(:any, /#{Regexp.escape(host)}/).to_rack(app)
172
- end
173
-
174
- it "must spider the website starting at the given URL" do
175
- agent = subject.host(host)
176
-
177
- # XXX: for some reason Set#== was returning false, so convert to an Array
178
- expect(agent.history.to_a).to be == [
179
- URI("http://#{host}/"),
180
- URI("http://#{host}/link1"),
181
- URI("http://#{host}/link2")
182
- ]
183
- end
184
- end
185
-
186
- describe ".domain" do
187
- module TestAgentDomain
188
- class ExampleApp < Sinatra::Base
189
-
190
- set :host, 'example.com'
191
- set :port, 80
192
-
193
- get '/' do
194
- <<~HTML
195
- <html>
196
- <body>
197
- <a href="/link1">link1</a>
198
- <a href="http://sub.example.com/subdomain-link">subdomain link</a>
199
- <a href="/link2">link2</a>
200
- </body>
201
- </html>
202
- HTML
203
- end
204
-
205
- get '/link1' do
206
- '<html><body>got here</body></html>'
207
- end
208
-
209
- get '/link2' do
210
- '<html><body>got here</body></html>'
211
- end
212
-
213
- end
214
-
215
- class SubDomainApp < Sinatra::Base
216
-
217
- set :host, 'sub.example.com'
218
- set :port, 80
219
-
220
- get '/subdomain-link' do
221
- '<html><body>should get here</body></html>'
222
- end
223
-
224
- end
225
- end
226
-
227
- subject { described_class }
228
-
229
- let(:domain) { 'example.com' }
230
- let(:domain_app) { TestAgentDomain::ExampleApp }
231
-
232
- let(:subdomain) { 'sub.example.com' }
233
- let(:subdomain_app) { TestAgentDomain::SubDomainApp }
234
-
235
- before do
236
- stub_request(:any, /#{Regexp.escape(subdomain)}/).to_rack(subdomain_app)
237
- stub_request(:any, /#{Regexp.escape(domain)}/).to_rack(domain_app)
238
- end
239
-
240
- it "must spider the domain and subdomains starting at the given domain" do
241
- agent = subject.domain(domain)
242
-
243
- # XXX: for some reason Set#== was returning false, so convert to an Array
244
- expect(agent.history.to_a).to be == [
245
- URI("http://#{domain}/"),
246
- URI("http://#{domain}/link1"),
247
- URI("http://#{subdomain}/subdomain-link"),
248
- URI("http://#{domain}/link2")
249
- ]
250
- end
251
- end
252
- end