ronin-web-spider 0.1.0.beta1 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +1 -4
- data/.yardopts +1 -1
- data/ChangeLog.md +3 -0
- data/Gemfile +2 -2
- data/README.md +302 -30
- data/gemspec.yml +2 -2
- data/lib/ronin/web/spider/agent.rb +62 -2
- data/lib/ronin/web/spider/archive.rb +3 -0
- data/lib/ronin/web/spider/exceptions.rb +1 -1
- data/lib/ronin/web/spider/git_archive.rb +1 -1
- data/lib/ronin/web/spider/version.rb +2 -2
- data/lib/ronin/web/spider.rb +289 -1
- data/ronin-web-spider.gemspec +2 -1
- metadata +5 -15
- data/spec/agent_spec.rb +0 -585
- data/spec/archive_spec.rb +0 -91
- data/spec/example_app.rb +0 -27
- data/spec/git_archive_spec.rb +0 -137
- data/spec/spec_helper.rb +0 -4
- data/spec/spider_spec.rb +0 -252
data/spec/git_archive_spec.rb
DELETED
@@ -1,137 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
require 'ronin/web/spider/git_archive'
|
3
|
-
|
4
|
-
require 'tmpdir'
|
5
|
-
|
6
|
-
describe Ronin::Web::Spider::GitArchive do
|
7
|
-
let(:root) { File.join(Dir.mktmpdir('ronin-web-spider')) }
|
8
|
-
|
9
|
-
describe ".open" do
|
10
|
-
subject { described_class }
|
11
|
-
|
12
|
-
context "when the root directory does not already exist" do
|
13
|
-
let(:root) { File.join(Dir.tmpdir,'ronin-web-spider-new-dir') }
|
14
|
-
|
15
|
-
it "must run `git init` on the new archive directory" do
|
16
|
-
subject.open(root)
|
17
|
-
|
18
|
-
expect(File.directory?(File.join(root,'.git'))).to be(true)
|
19
|
-
end
|
20
|
-
|
21
|
-
after { FileUtils.rm_r(root) }
|
22
|
-
end
|
23
|
-
|
24
|
-
context "when the root directory already exists" do
|
25
|
-
context "but does not contain a .git directory" do
|
26
|
-
it "must run `git init` within the root directory" do
|
27
|
-
subject.open(root)
|
28
|
-
|
29
|
-
expect(File.directory?(File.join(root,'.git'))).to be(true)
|
30
|
-
end
|
31
|
-
end
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
subject { described_class.open(root) }
|
36
|
-
|
37
|
-
describe "#git?" do
|
38
|
-
subject { described_class.new(root) }
|
39
|
-
|
40
|
-
context "when the archive directory contains a .git directory" do
|
41
|
-
before do
|
42
|
-
FileUtils.mkdir(File.join(root,'.git'))
|
43
|
-
end
|
44
|
-
|
45
|
-
it "must return true" do
|
46
|
-
expect(subject.git?).to be(true)
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
50
|
-
context "when the archive directory does not contains a .git directory" do
|
51
|
-
it "must return false" do
|
52
|
-
expect(subject.git?).to be(false)
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
describe "#init" do
|
58
|
-
it "must run the 'git init' command" do
|
59
|
-
expect(subject).to receive(:system).with('git','-C',root,'init').and_return(true)
|
60
|
-
|
61
|
-
subject.init
|
62
|
-
end
|
63
|
-
|
64
|
-
context "when the 'git init' command fails" do
|
65
|
-
it do
|
66
|
-
allow(subject).to receive(:system).with('git','-C',root,'init').and_return(false)
|
67
|
-
|
68
|
-
expect {
|
69
|
-
subject.init
|
70
|
-
}.to raise_error(Ronin::Web::Spider::GitError,"git command failed: git -C #{root} init")
|
71
|
-
end
|
72
|
-
end
|
73
|
-
|
74
|
-
context "when 'git' is not installed" do
|
75
|
-
it do
|
76
|
-
allow(subject).to receive(:system).with('git','-C',root,'init').and_return(nil)
|
77
|
-
|
78
|
-
expect {
|
79
|
-
subject.init
|
80
|
-
}.to raise_error(Ronin::Web::Spider::GitError,"the git command was not found")
|
81
|
-
end
|
82
|
-
end
|
83
|
-
end
|
84
|
-
|
85
|
-
describe "#write" do
|
86
|
-
let(:url) { URI('https://example.com/foo/bar.html') }
|
87
|
-
let(:body) { 'test file' }
|
88
|
-
|
89
|
-
it "must automatically create parent directory" do
|
90
|
-
subject.write(url,body)
|
91
|
-
|
92
|
-
expect(File.directory?(File.join(root,'foo'))).to be(true)
|
93
|
-
end
|
94
|
-
|
95
|
-
it "must write the body into the file" do
|
96
|
-
subject.write(url,body)
|
97
|
-
|
98
|
-
expect(File.read(File.join(root,'foo','bar.html'))).to eq(body)
|
99
|
-
end
|
100
|
-
|
101
|
-
it "must add the file using `git add`" do
|
102
|
-
absolute_path = File.join(root,'foo','bar.html')
|
103
|
-
|
104
|
-
expect(subject).to receive(:system).with(
|
105
|
-
'git', '-C', root, 'add', absolute_path
|
106
|
-
).and_return(true)
|
107
|
-
|
108
|
-
subject.write(url,body)
|
109
|
-
end
|
110
|
-
end
|
111
|
-
|
112
|
-
describe "#commit" do
|
113
|
-
let(:message) { 'commit message' }
|
114
|
-
|
115
|
-
context "when a block is given" do
|
116
|
-
it "must yield control before calling `git commit -m ...` with the commit message" do
|
117
|
-
expect(subject).to receive(:system).with(
|
118
|
-
'git', '-C', root, 'commit', '-m', message
|
119
|
-
).and_return(true)
|
120
|
-
|
121
|
-
expect { |b|
|
122
|
-
subject.commit(message,&b)
|
123
|
-
}.to yield_with_args(subject)
|
124
|
-
end
|
125
|
-
end
|
126
|
-
|
127
|
-
context "when no block is given" do
|
128
|
-
it "must not yield and call `git commit -m ...` with the commit message" do
|
129
|
-
expect(subject).to receive(:system).with(
|
130
|
-
'git', '-C', root, 'commit', '-m', message
|
131
|
-
).and_return(true)
|
132
|
-
|
133
|
-
subject.commit(message)
|
134
|
-
end
|
135
|
-
end
|
136
|
-
end
|
137
|
-
end
|
data/spec/spec_helper.rb
DELETED
data/spec/spider_spec.rb
DELETED
@@ -1,252 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
require 'example_app'
|
3
|
-
|
4
|
-
require 'ronin/web/spider'
|
5
|
-
|
6
|
-
describe Ronin::Web::Spider do
|
7
|
-
include_context "example App"
|
8
|
-
|
9
|
-
describe ".start_at" do
|
10
|
-
module TestAgentStartAt
|
11
|
-
class ExampleApp < Sinatra::Base
|
12
|
-
|
13
|
-
set :host, 'example.com'
|
14
|
-
set :port, 80
|
15
|
-
|
16
|
-
get '/' do
|
17
|
-
'<html><body>should not get here</body></html>'
|
18
|
-
end
|
19
|
-
|
20
|
-
get '/entry-point' do
|
21
|
-
<<~HTML
|
22
|
-
<html>
|
23
|
-
<body>
|
24
|
-
<a href="/link1">link1</a>
|
25
|
-
<a href="http://other.com/offsite-link">offsite link</a>
|
26
|
-
<a href="/link2">link2</a>
|
27
|
-
</body>
|
28
|
-
</html>
|
29
|
-
HTML
|
30
|
-
end
|
31
|
-
|
32
|
-
get '/link1' do
|
33
|
-
'<html><body>got here</body></html>'
|
34
|
-
end
|
35
|
-
|
36
|
-
get '/link2' do
|
37
|
-
'<html><body>got here</body></html>'
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
class OtherApp < Sinatra::Base
|
42
|
-
|
43
|
-
set :host, 'other.com'
|
44
|
-
set :port, 80
|
45
|
-
|
46
|
-
get '/offsite-link' do
|
47
|
-
'<html><body>should not get here</body></html>'
|
48
|
-
end
|
49
|
-
|
50
|
-
end
|
51
|
-
end
|
52
|
-
|
53
|
-
subject { described_class }
|
54
|
-
|
55
|
-
let(:host) { 'example.com' }
|
56
|
-
let(:other_host) { 'other.com' }
|
57
|
-
let(:url) { URI("http://#{host}/entry-point") }
|
58
|
-
|
59
|
-
let(:app) { TestAgentStartAt::ExampleApp }
|
60
|
-
let(:other_app) { TestAgentStartAt::OtherApp }
|
61
|
-
|
62
|
-
before do
|
63
|
-
stub_request(:any, /#{Regexp.escape(host)}/).to_rack(app)
|
64
|
-
stub_request(:any, /#{Regexp.escape(other_host)}/).to_rack(other_app)
|
65
|
-
end
|
66
|
-
|
67
|
-
it "must spider the website starting at the given URL" do
|
68
|
-
agent = subject.start_at(url)
|
69
|
-
|
70
|
-
expect(agent.history).to be == Set[
|
71
|
-
URI("http://#{host}/entry-point"),
|
72
|
-
URI("http://#{host}/link1"),
|
73
|
-
URI("http://#{other_host}/offsite-link"),
|
74
|
-
URI("http://#{host}/link2")
|
75
|
-
]
|
76
|
-
end
|
77
|
-
end
|
78
|
-
|
79
|
-
describe ".site" do
|
80
|
-
module TestAgentSite
|
81
|
-
class ExampleApp < Sinatra::Base
|
82
|
-
|
83
|
-
set :host, 'example.com'
|
84
|
-
set :port, 80
|
85
|
-
|
86
|
-
get '/' do
|
87
|
-
'<html><body>should not get here</body></html>'
|
88
|
-
end
|
89
|
-
|
90
|
-
get '/entry-point' do
|
91
|
-
<<~HTML
|
92
|
-
<html>
|
93
|
-
<body>
|
94
|
-
<a href="/link1">link1</a>
|
95
|
-
<a href="http://other.com/offsite-link">offsite link</a>
|
96
|
-
<a href="/link2">link2</a>
|
97
|
-
</body>
|
98
|
-
</html>
|
99
|
-
HTML
|
100
|
-
end
|
101
|
-
|
102
|
-
get '/link1' do
|
103
|
-
'<html><body>got here</body></html>'
|
104
|
-
end
|
105
|
-
|
106
|
-
get '/link2' do
|
107
|
-
'<html><body>got here</body></html>'
|
108
|
-
end
|
109
|
-
|
110
|
-
end
|
111
|
-
end
|
112
|
-
|
113
|
-
subject { described_class }
|
114
|
-
|
115
|
-
let(:host) { 'example.com' }
|
116
|
-
let(:url) { URI("http://#{host}/entry-point") }
|
117
|
-
|
118
|
-
let(:app) { TestAgentSite::ExampleApp }
|
119
|
-
|
120
|
-
before do
|
121
|
-
stub_request(:any, /#{Regexp.escape(host)}/).to_rack(app)
|
122
|
-
end
|
123
|
-
|
124
|
-
it "must spider the website starting at the given URL" do
|
125
|
-
agent = subject.site(url)
|
126
|
-
|
127
|
-
expect(agent.history).to be == Set[
|
128
|
-
URI("http://#{host}/entry-point"),
|
129
|
-
URI("http://#{host}/link1"),
|
130
|
-
URI("http://#{host}/link2")
|
131
|
-
]
|
132
|
-
end
|
133
|
-
end
|
134
|
-
|
135
|
-
describe ".host" do
|
136
|
-
module TestAgentHost
|
137
|
-
class ExampleApp < Sinatra::Base
|
138
|
-
|
139
|
-
set :host, 'example.com'
|
140
|
-
set :port, 80
|
141
|
-
|
142
|
-
get '/' do
|
143
|
-
<<~HTML
|
144
|
-
<html>
|
145
|
-
<body>
|
146
|
-
<a href="/link1">link1</a>
|
147
|
-
<a href="http://other.com/offsite-link">offsite link</a>
|
148
|
-
<a href="/link2">link2</a>
|
149
|
-
</body>
|
150
|
-
</html>
|
151
|
-
HTML
|
152
|
-
end
|
153
|
-
|
154
|
-
get '/link1' do
|
155
|
-
'<html><body>got here</body></html>'
|
156
|
-
end
|
157
|
-
|
158
|
-
get '/link2' do
|
159
|
-
'<html><body>got here</body></html>'
|
160
|
-
end
|
161
|
-
|
162
|
-
end
|
163
|
-
end
|
164
|
-
|
165
|
-
subject { described_class }
|
166
|
-
|
167
|
-
let(:host) { 'example.com' }
|
168
|
-
let(:app) { TestAgentHost::ExampleApp }
|
169
|
-
|
170
|
-
before do
|
171
|
-
stub_request(:any, /#{Regexp.escape(host)}/).to_rack(app)
|
172
|
-
end
|
173
|
-
|
174
|
-
it "must spider the website starting at the given URL" do
|
175
|
-
agent = subject.host(host)
|
176
|
-
|
177
|
-
# XXX: for some reason Set#== was returning false, so convert to an Array
|
178
|
-
expect(agent.history.to_a).to be == [
|
179
|
-
URI("http://#{host}/"),
|
180
|
-
URI("http://#{host}/link1"),
|
181
|
-
URI("http://#{host}/link2")
|
182
|
-
]
|
183
|
-
end
|
184
|
-
end
|
185
|
-
|
186
|
-
describe ".domain" do
|
187
|
-
module TestAgentDomain
|
188
|
-
class ExampleApp < Sinatra::Base
|
189
|
-
|
190
|
-
set :host, 'example.com'
|
191
|
-
set :port, 80
|
192
|
-
|
193
|
-
get '/' do
|
194
|
-
<<~HTML
|
195
|
-
<html>
|
196
|
-
<body>
|
197
|
-
<a href="/link1">link1</a>
|
198
|
-
<a href="http://sub.example.com/subdomain-link">subdomain link</a>
|
199
|
-
<a href="/link2">link2</a>
|
200
|
-
</body>
|
201
|
-
</html>
|
202
|
-
HTML
|
203
|
-
end
|
204
|
-
|
205
|
-
get '/link1' do
|
206
|
-
'<html><body>got here</body></html>'
|
207
|
-
end
|
208
|
-
|
209
|
-
get '/link2' do
|
210
|
-
'<html><body>got here</body></html>'
|
211
|
-
end
|
212
|
-
|
213
|
-
end
|
214
|
-
|
215
|
-
class SubDomainApp < Sinatra::Base
|
216
|
-
|
217
|
-
set :host, 'sub.example.com'
|
218
|
-
set :port, 80
|
219
|
-
|
220
|
-
get '/subdomain-link' do
|
221
|
-
'<html><body>should get here</body></html>'
|
222
|
-
end
|
223
|
-
|
224
|
-
end
|
225
|
-
end
|
226
|
-
|
227
|
-
subject { described_class }
|
228
|
-
|
229
|
-
let(:domain) { 'example.com' }
|
230
|
-
let(:domain_app) { TestAgentDomain::ExampleApp }
|
231
|
-
|
232
|
-
let(:subdomain) { 'sub.example.com' }
|
233
|
-
let(:subdomain_app) { TestAgentDomain::SubDomainApp }
|
234
|
-
|
235
|
-
before do
|
236
|
-
stub_request(:any, /#{Regexp.escape(subdomain)}/).to_rack(subdomain_app)
|
237
|
-
stub_request(:any, /#{Regexp.escape(domain)}/).to_rack(domain_app)
|
238
|
-
end
|
239
|
-
|
240
|
-
it "must spider the domain and subdomains starting at the given domain" do
|
241
|
-
agent = subject.domain(domain)
|
242
|
-
|
243
|
-
# XXX: for some reason Set#== was returning false, so convert to an Array
|
244
|
-
expect(agent.history.to_a).to be == [
|
245
|
-
URI("http://#{domain}/"),
|
246
|
-
URI("http://#{domain}/link1"),
|
247
|
-
URI("http://#{subdomain}/subdomain-link"),
|
248
|
-
URI("http://#{domain}/link2")
|
249
|
-
]
|
250
|
-
end
|
251
|
-
end
|
252
|
-
end
|