spidr 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +14 -0
- data/ChangeLog.md +20 -2
- data/Gemfile +2 -2
- data/README.md +4 -2
- data/Rakefile +1 -0
- data/gemspec.yml +1 -1
- data/lib/spidr/agent.rb +145 -85
- data/lib/spidr/agent/filters.rb +1 -9
- data/lib/spidr/agent/robots.rb +36 -0
- data/lib/spidr/page.rb +76 -28
- data/lib/spidr/page/{headers.rb → content_types.rb} +2 -147
- data/lib/spidr/page/cookies.rb +60 -0
- data/lib/spidr/page/{links.rb → html.rb} +47 -23
- data/lib/spidr/page/status_codes.rb +112 -0
- data/lib/spidr/proxy.rb +56 -0
- data/lib/spidr/session_cache.rb +60 -24
- data/lib/spidr/settings.rb +3 -0
- data/lib/spidr/settings/proxy.rb +61 -0
- data/lib/spidr/settings/timeouts.rb +33 -0
- data/lib/spidr/settings/user_agent.rb +14 -0
- data/lib/spidr/spidr.rb +15 -79
- data/lib/spidr/version.rb +1 -1
- data/spec/agent/actions_spec.rb +158 -32
- data/spec/agent/filters_spec.rb +46 -29
- data/spec/agent/sanitizers_spec.rb +25 -31
- data/spec/agent_spec.rb +772 -50
- data/spec/example_app.rb +27 -0
- data/spec/example_page.rb +33 -0
- data/spec/page/content_types_spec.rb +150 -0
- data/spec/page/cookies_spec.rb +58 -0
- data/spec/page/html_spec.rb +524 -0
- data/spec/page/status_codes_spec.rb +87 -0
- data/spec/page_spec.rb +114 -78
- data/spec/proxy_spec.rb +45 -0
- data/spec/session_cache.rb +103 -2
- data/spec/settings/proxy_examples.rb +82 -0
- data/spec/settings/timeouts_examples.rb +93 -0
- data/spec/settings/user_agent_examples.rb +25 -0
- data/spec/spidr_spec.rb +6 -29
- data/spidr.gemspec +38 -109
- metadata +35 -31
- data/lib/spidr/page/body.rb +0 -98
- data/spec/helpers/history.rb +0 -34
- data/spec/helpers/page.rb +0 -8
- data/spec/helpers/wsoc.rb +0 -83
- data/spec/page_examples.rb +0 -21
data/lib/spidr/version.rb
CHANGED
data/spec/agent/actions_spec.rb
CHANGED
@@ -1,60 +1,186 @@
|
|
1
|
-
require 'spidr/agent'
|
2
|
-
|
3
1
|
require 'spec_helper'
|
2
|
+
require 'example_app'
|
3
|
+
|
4
|
+
require 'spidr/agent'
|
4
5
|
|
5
6
|
describe Agent do
|
6
|
-
describe "
|
7
|
-
|
7
|
+
describe "#continue!" do
|
8
|
+
before { subject.pause = true }
|
9
|
+
before { subject.continue! }
|
10
|
+
|
11
|
+
it "should un-pause the Agent" do
|
12
|
+
expect(subject.paused?).to be false
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
describe "#pause=" do
|
17
|
+
it "should change the paused state" do
|
18
|
+
subject.pause = true
|
19
|
+
|
20
|
+
expect(subject.paused?).to be true
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
describe "#pause!" do
|
25
|
+
it "should raise Action::Paused" do
|
26
|
+
expect {
|
27
|
+
subject.pause!
|
28
|
+
}.to raise_error(described_class::Actions::Paused)
|
29
|
+
end
|
30
|
+
end
|
8
31
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
32
|
+
describe "#paused?" do
|
33
|
+
context "when the agent is paused" do
|
34
|
+
before do
|
35
|
+
begin
|
36
|
+
subject.pause!
|
37
|
+
rescue described_class::Actions::Paused
|
15
38
|
end
|
16
39
|
end
|
17
40
|
|
18
|
-
expect(
|
19
|
-
|
41
|
+
it { expect(subject.paused?).to be true }
|
42
|
+
end
|
43
|
+
|
44
|
+
context "when the agent is not paused" do
|
45
|
+
it { expect(subject.paused?).to be false }
|
20
46
|
end
|
47
|
+
end
|
48
|
+
|
49
|
+
describe "#skip_link!" do
|
50
|
+
it "should raise Actions::SkipLink" do
|
51
|
+
expect {
|
52
|
+
subject.skip_link!
|
53
|
+
}.to raise_error(described_class::Actions::SkipLink)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
describe "#skip_page!" do
|
58
|
+
it "should raise Actions::SkipPage" do
|
59
|
+
expect {
|
60
|
+
subject.skip_page!
|
61
|
+
}.to raise_error(described_class::Actions::SkipPage)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
context "when spidering" do
|
66
|
+
include_context "example App"
|
67
|
+
|
68
|
+
context "when pause! is called" do
|
69
|
+
app do
|
70
|
+
get '/' do
|
71
|
+
%{<html><body><a href="/link">link</a></body></html>}
|
72
|
+
end
|
21
73
|
|
22
|
-
|
23
|
-
|
24
|
-
spider.every_page do |page|
|
25
|
-
spider.pause!
|
74
|
+
get '/link' do
|
75
|
+
%{<html><body>should not get here</body></html>}
|
26
76
|
end
|
27
77
|
end
|
28
78
|
|
29
|
-
|
30
|
-
|
79
|
+
subject do
|
80
|
+
described_class.new(host: host) do |agent|
|
81
|
+
agent.every_page do |page|
|
82
|
+
if page.url.path == '/'
|
83
|
+
agent.pause!
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
it "should pause spidering" do
|
90
|
+
expect(subject).to be_paused
|
91
|
+
expect(subject.history).to be == Set[
|
92
|
+
URI("http://#{host}/")
|
93
|
+
]
|
94
|
+
end
|
31
95
|
|
32
|
-
|
96
|
+
context "and continue! is called afterwards" do
|
97
|
+
before do
|
98
|
+
subject.enqueue "http://#{host}/link"
|
99
|
+
subject.continue!
|
100
|
+
end
|
101
|
+
|
102
|
+
it "should continue spidering" do
|
103
|
+
expect(subject.history).to be == Set[
|
104
|
+
URI("http://#{host}/"),
|
105
|
+
URI("http://#{host}/link")
|
106
|
+
]
|
107
|
+
end
|
108
|
+
end
|
33
109
|
end
|
34
110
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
111
|
+
context "when skip_link! is called" do
|
112
|
+
app do
|
113
|
+
get '/' do
|
114
|
+
%{<html><body><a href="/link1">link1</a> <a href="/link2">link2</a> <a href="/link3">link3</a></body></html>}
|
115
|
+
end
|
116
|
+
|
117
|
+
get '/link1' do
|
118
|
+
%{<html><body>link1</body></html>}
|
119
|
+
end
|
120
|
+
|
121
|
+
get '/link2' do
|
122
|
+
%{<html><body>link2</body></html>}
|
123
|
+
end
|
124
|
+
|
125
|
+
get '/link3' do
|
126
|
+
%{<html><body>link3</body></html>}
|
39
127
|
end
|
40
128
|
end
|
41
129
|
|
42
|
-
|
130
|
+
subject do
|
131
|
+
described_class.new(host: host) do |agent|
|
132
|
+
agent.every_url do |url|
|
133
|
+
if url.path == '/link2'
|
134
|
+
agent.skip_link!
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
43
139
|
|
44
|
-
|
140
|
+
it "should skip all links on the page" do
|
141
|
+
expect(subject.history).to be == Set[
|
142
|
+
URI("http://#{host}/"),
|
143
|
+
URI("http://#{host}/link1"),
|
144
|
+
URI("http://#{host}/link3")
|
145
|
+
]
|
146
|
+
end
|
45
147
|
end
|
46
148
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
149
|
+
context "when skip_page! is called" do
|
150
|
+
app do
|
151
|
+
get '/' do
|
152
|
+
%{<html><body><a href="/link">entry link</a></body></html>}
|
153
|
+
end
|
154
|
+
|
155
|
+
get '/link' do
|
156
|
+
%{<html><body><a href="/link1">link1</a> <a href="/link2">link2</a></body></html>}
|
157
|
+
end
|
158
|
+
|
159
|
+
get '/link1' do
|
160
|
+
%{<html><body>should not get here</body></html>}
|
161
|
+
end
|
162
|
+
|
163
|
+
get '/link2' do
|
164
|
+
%{<html><body>should not get here</body></html>}
|
51
165
|
end
|
52
166
|
end
|
53
167
|
|
54
|
-
|
168
|
+
subject do
|
169
|
+
described_class.new(host: host) do |agent|
|
170
|
+
agent.every_page do |page|
|
171
|
+
if page.url.path == '/link'
|
172
|
+
agent.skip_page!
|
173
|
+
end
|
174
|
+
end
|
175
|
+
end
|
176
|
+
end
|
55
177
|
|
56
|
-
|
57
|
-
|
178
|
+
it "should skip all links on the page" do
|
179
|
+
expect(subject.history).to be == Set[
|
180
|
+
URI("http://#{host}/"),
|
181
|
+
URI("http://#{host}/link")
|
182
|
+
]
|
183
|
+
end
|
58
184
|
end
|
59
185
|
end
|
60
186
|
end
|
data/spec/agent/filters_spec.rb
CHANGED
@@ -3,60 +3,77 @@ require 'spidr/agent'
|
|
3
3
|
require 'spec_helper'
|
4
4
|
|
5
5
|
describe Agent do
|
6
|
-
describe "
|
7
|
-
|
8
|
-
|
6
|
+
describe "#initialize_filters" do
|
7
|
+
describe ":schemes" do
|
8
|
+
it "should override the default schemes" do
|
9
|
+
agent = described_class.new(schemes: [:https])
|
9
10
|
|
10
|
-
|
11
|
-
|
11
|
+
expect(agent.schemes).to be == ['https']
|
12
|
+
end
|
12
13
|
end
|
13
14
|
|
14
|
-
|
15
|
-
|
15
|
+
describe ":hosts" do
|
16
|
+
it "should set the hosts that will be visited" do
|
17
|
+
agent = described_class.new(hosts: ['spidr.rubyforge.org'])
|
16
18
|
|
17
|
-
|
19
|
+
expect(agent.visit_hosts).to be == ['spidr.rubyforge.org']
|
20
|
+
end
|
18
21
|
end
|
19
22
|
|
20
|
-
|
21
|
-
|
23
|
+
describe ":ignore_hosts" do
|
24
|
+
it "should set the hosts that will not be visited" do
|
25
|
+
agent = described_class.new(ignore_hosts: ['example.com'])
|
22
26
|
|
23
|
-
|
27
|
+
expect(agent.ignore_hosts).to be == ['example.com']
|
28
|
+
end
|
24
29
|
end
|
25
30
|
|
26
|
-
|
27
|
-
|
31
|
+
describe ":ports" do
|
32
|
+
it "should set the ports that will be visited" do
|
33
|
+
agent = described_class.new(ports: [80, 443, 8000])
|
28
34
|
|
29
|
-
|
35
|
+
expect(agent.visit_ports).to be == [80, 443, 8000]
|
36
|
+
end
|
30
37
|
end
|
31
38
|
|
32
|
-
|
33
|
-
|
39
|
+
describe ":ignore_ports" do
|
40
|
+
it "should set the ports that will not be visited" do
|
41
|
+
agent = described_class.new(ignore_ports: [8000, 8080])
|
34
42
|
|
35
|
-
|
43
|
+
expect(agent.ignore_ports).to be == [8000, 8080]
|
44
|
+
end
|
36
45
|
end
|
37
46
|
|
38
|
-
|
39
|
-
|
47
|
+
describe ":links" do
|
48
|
+
it "should set the links that will be visited" do
|
49
|
+
agent = described_class.new(links: ['index.php'])
|
40
50
|
|
41
|
-
|
51
|
+
expect(agent.visit_links).to be == ['index.php']
|
52
|
+
end
|
42
53
|
end
|
43
54
|
|
44
|
-
|
45
|
-
|
55
|
+
describe ":ignore_links" do
|
56
|
+
it "should set the links that will not be visited" do
|
57
|
+
agent = described_class.new(ignore_links: [/login/])
|
46
58
|
|
47
|
-
|
59
|
+
expect(agent.ignore_links).to be == [/login/]
|
60
|
+
end
|
48
61
|
end
|
49
62
|
|
50
|
-
|
51
|
-
|
63
|
+
describe ":exts" do
|
64
|
+
it "should set the exts that will be visited" do
|
65
|
+
agent = described_class.new(exts: ['htm'])
|
52
66
|
|
53
|
-
|
67
|
+
expect(agent.visit_exts).to be == ['htm']
|
68
|
+
end
|
54
69
|
end
|
55
70
|
|
56
|
-
|
57
|
-
|
71
|
+
describe ":ignore_exts" do
|
72
|
+
it "should set the exts that will not be visited" do
|
73
|
+
agent = described_class.new(ignore_exts: ['cfm'])
|
58
74
|
|
59
|
-
|
75
|
+
expect(agent.ignore_exts).to be == ['cfm']
|
76
|
+
end
|
60
77
|
end
|
61
78
|
end
|
62
79
|
end
|
@@ -4,58 +4,52 @@ require 'spec_helper'
|
|
4
4
|
|
5
5
|
describe Agent do
|
6
6
|
describe "sanitizers" do
|
7
|
-
describe "sanitize_url" do
|
8
|
-
let(:url) { 'http://
|
9
|
-
|
7
|
+
describe "#sanitize_url" do
|
8
|
+
let(:url) { 'http://example.com/page?q=1#fragment' }
|
9
|
+
let(:uri) { URI(url) }
|
10
10
|
|
11
|
-
it "should sanitize
|
12
|
-
|
13
|
-
clean_url = agent.sanitize_url(URI(url))
|
11
|
+
it "should sanitize URIs" do
|
12
|
+
clean_url = subject.sanitize_url(uri)
|
14
13
|
|
15
|
-
expect(clean_url.host).to eq('
|
14
|
+
expect(clean_url.host).to eq('example.com')
|
16
15
|
end
|
17
16
|
|
18
17
|
it "should sanitize URLs given as Strings" do
|
19
|
-
|
20
|
-
clean_url = agent.sanitize_url(url)
|
18
|
+
clean_url = subject.sanitize_url(url)
|
21
19
|
|
22
|
-
expect(clean_url.host).to eq('
|
20
|
+
expect(clean_url.host).to eq('example.com')
|
23
21
|
end
|
24
|
-
end
|
25
|
-
|
26
|
-
describe "strip_fragments" do
|
27
|
-
let(:url) { URI("http://host.com/page#lol") }
|
28
22
|
|
29
23
|
it "should strip fragment components by default" do
|
30
|
-
|
31
|
-
clean_url = agent.sanitize_url(url)
|
24
|
+
clean_url = subject.sanitize_url(url)
|
32
25
|
|
33
26
|
expect(clean_url.fragment).to be_nil
|
34
27
|
end
|
35
28
|
|
36
|
-
it "should
|
37
|
-
|
38
|
-
clean_url = agent.sanitize_url(url)
|
29
|
+
it "should not strip query components by default" do
|
30
|
+
clean_url = subject.sanitize_url(uri)
|
39
31
|
|
40
|
-
expect(clean_url.
|
32
|
+
expect(clean_url.query).to eq('q=1')
|
41
33
|
end
|
42
|
-
end
|
43
34
|
|
44
|
-
|
45
|
-
|
35
|
+
context "when strip_fragments is disabled" do
|
36
|
+
subject { described_class.new(strip_fragments: false) }
|
46
37
|
|
47
|
-
|
48
|
-
|
49
|
-
clean_url = agent.sanitize_url(url)
|
38
|
+
it "should perserve the fragment components" do
|
39
|
+
clean_url = subject.sanitize_url(uri)
|
50
40
|
|
51
|
-
|
41
|
+
expect(clean_url.fragment).to eq('fragment')
|
42
|
+
end
|
52
43
|
end
|
53
44
|
|
54
|
-
|
55
|
-
|
56
|
-
|
45
|
+
context "when strip_query is enabled" do
|
46
|
+
subject { described_class.new(strip_query: true) }
|
47
|
+
|
48
|
+
it "should allow stripping of query components" do
|
49
|
+
clean_url = subject.sanitize_url(uri)
|
57
50
|
|
58
|
-
|
51
|
+
expect(clean_url.query).to be_nil
|
52
|
+
end
|
59
53
|
end
|
60
54
|
end
|
61
55
|
end
|
data/spec/agent_spec.rb
CHANGED
@@ -1,81 +1,803 @@
|
|
1
|
-
require 'spidr/agent'
|
2
|
-
|
3
1
|
require 'spec_helper'
|
4
|
-
require '
|
2
|
+
require 'example_app'
|
3
|
+
require 'settings/user_agent_examples'
|
4
|
+
|
5
|
+
require 'spidr/agent'
|
5
6
|
|
6
7
|
describe Agent do
|
7
|
-
|
8
|
+
it_should_behave_like "includes Spidr::Settings::UserAgent"
|
9
|
+
|
10
|
+
describe "#initialize" do
|
11
|
+
it "should not be running" do
|
12
|
+
expect(subject).to_not be_running
|
13
|
+
end
|
14
|
+
|
15
|
+
it "should default :delay to 0" do
|
16
|
+
expect(subject.delay).to be 0
|
17
|
+
end
|
18
|
+
|
19
|
+
it "should initialize #history" do
|
20
|
+
expect(subject.history).to be_empty
|
21
|
+
end
|
22
|
+
|
23
|
+
it "should initialize #failures" do
|
24
|
+
expect(subject.failures).to be_empty
|
25
|
+
end
|
26
|
+
|
27
|
+
it "should initialize #queue" do
|
28
|
+
expect(subject.queue).to be_empty
|
29
|
+
end
|
8
30
|
|
9
|
-
|
10
|
-
|
31
|
+
it "should initialize the #session_cache" do
|
32
|
+
expect(subject.sessions).to be_kind_of(SessionCache)
|
33
|
+
end
|
34
|
+
|
35
|
+
it "should initialize the #cookie_jar" do
|
36
|
+
expect(subject.cookies).to be_kind_of(CookieJar)
|
37
|
+
end
|
38
|
+
|
39
|
+
it "should initialize the #auth_store" do
|
40
|
+
expect(subject.authorized).to be_kind_of(AuthStore)
|
41
|
+
end
|
11
42
|
end
|
12
43
|
|
13
|
-
|
14
|
-
|
44
|
+
describe "#history=" do
|
45
|
+
let(:previous_history) { Set[URI('http://example.com')] }
|
46
|
+
|
47
|
+
before { subject.history = previous_history }
|
48
|
+
|
49
|
+
it "should be able to restore the history" do
|
50
|
+
expect(subject.history).to eq(previous_history)
|
51
|
+
end
|
52
|
+
|
53
|
+
context "when given an Array of URIs" do
|
54
|
+
let(:previous_history) { [URI('http://example.com')] }
|
55
|
+
let(:converted_history) { Set.new(previous_history) }
|
56
|
+
|
57
|
+
it "should convert the Array to a Set" do
|
58
|
+
expect(subject.history).to eq(converted_history)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
context "when given an Set of Strings" do
|
63
|
+
let(:previous_history) { Set['http://example.com'] }
|
64
|
+
let(:converted_history) do
|
65
|
+
previous_history.map { |url| URI(url) }.to_set
|
66
|
+
end
|
67
|
+
|
68
|
+
it "should convert the Strings to URIs" do
|
69
|
+
expect(subject.history).to eq(converted_history)
|
70
|
+
end
|
71
|
+
end
|
15
72
|
end
|
16
73
|
|
17
|
-
|
18
|
-
|
74
|
+
describe "#failures=" do
|
75
|
+
let(:previous_failures) { Set[URI('http://example.com')] }
|
76
|
+
|
77
|
+
before { subject.failures = previous_failures }
|
78
|
+
|
79
|
+
it "should be able to restore the failures" do
|
80
|
+
expect(subject.failures).to eq(previous_failures)
|
81
|
+
end
|
82
|
+
|
83
|
+
context "when given an Array of URIs" do
|
84
|
+
let(:previous_failures) { [URI('http://example.com')] }
|
85
|
+
let(:converted_failures) { Set.new(previous_failures) }
|
86
|
+
|
87
|
+
it "should convert the Array to a Set" do
|
88
|
+
expect(subject.failures).to eq(converted_failures)
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
context "when given an Set of Strings" do
|
93
|
+
let(:previous_failures) { Set['http://example.com'] }
|
94
|
+
let(:converted_failures) do
|
95
|
+
previous_failures.map { |url| URI(url) }.to_set
|
96
|
+
end
|
97
|
+
|
98
|
+
it "should convert the Strings to URIs" do
|
99
|
+
expect(subject.failures).to eq(converted_failures)
|
100
|
+
end
|
101
|
+
end
|
19
102
|
end
|
20
103
|
|
21
|
-
|
22
|
-
|
23
|
-
|
104
|
+
describe "#queue=" do
|
105
|
+
let(:previous_queue) { [URI('http://example.com')] }
|
106
|
+
|
107
|
+
before { subject.queue = previous_queue }
|
108
|
+
|
109
|
+
it "should be able to restore the queue" do
|
110
|
+
expect(subject.queue).to eq(previous_queue)
|
111
|
+
end
|
24
112
|
|
25
|
-
|
26
|
-
|
113
|
+
context "when given an Set of URIs" do
|
114
|
+
let(:previous_queue) { Set[URI('http://example.com')] }
|
115
|
+
let(:converted_queue) { previous_queue.to_a }
|
116
|
+
|
117
|
+
it "should convert the Set to an Array" do
|
118
|
+
expect(subject.queue).to eq(converted_queue)
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
context "when given an Array of Strings" do
|
123
|
+
let(:previous_queue) { Set['http://example.com'] }
|
124
|
+
let(:converted_queue) { previous_queue.map { |url| URI(url) } }
|
125
|
+
|
126
|
+
it "should convert the Strings to URIs" do
|
127
|
+
expect(subject.queue).to eq(converted_queue)
|
128
|
+
end
|
129
|
+
end
|
27
130
|
end
|
28
131
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
132
|
+
describe "#to_hash" do
|
133
|
+
let(:queue) { [URI("http://example.com/link")] }
|
134
|
+
let(:history) { Set[URI("http://example.com/")] }
|
135
|
+
|
136
|
+
subject do
|
137
|
+
described_class.new do |agent|
|
138
|
+
agent.queue = queue
|
139
|
+
agent.history = history
|
140
|
+
end
|
141
|
+
end
|
33
142
|
|
34
|
-
|
35
|
-
|
36
|
-
|
143
|
+
it "should return the queue and history" do
|
144
|
+
expect(subject.to_hash).to be == {
|
145
|
+
history: history,
|
146
|
+
queue: queue
|
147
|
+
}
|
148
|
+
end
|
37
149
|
end
|
38
150
|
|
39
|
-
|
40
|
-
|
41
|
-
|
151
|
+
context "when spidering" do
|
152
|
+
include_context "example App"
|
153
|
+
|
154
|
+
context "local links" do
|
155
|
+
context "relative paths" do
|
156
|
+
app do
|
157
|
+
get '/' do
|
158
|
+
%{<html><body><a href="link">relative link</a></body></html>}
|
159
|
+
end
|
160
|
+
|
161
|
+
get '/link' do
|
162
|
+
'<html><body>got here</body></html>'
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
it "should expand relative paths of links" do
|
167
|
+
expect(subject.history).to be == Set[
|
168
|
+
URI("http://#{host}/"),
|
169
|
+
URI("http://#{host}/link")
|
170
|
+
]
|
171
|
+
end
|
172
|
+
|
173
|
+
context "that contain directory escapes" do
|
174
|
+
app do
|
175
|
+
get '/' do
|
176
|
+
%{<html><body><a href="foo/./../../../../link">link</a></body></html>}
|
177
|
+
end
|
178
|
+
|
179
|
+
get '/link' do
|
180
|
+
'<html><body>got here</body></html>'
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
it "should expand relative paths before visiting them" do
|
185
|
+
expect(subject.history).to be == Set[
|
186
|
+
URI("http://#{host}/"),
|
187
|
+
URI("http://#{host}/link")
|
188
|
+
]
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
context "absolute paths" do
|
194
|
+
app do
|
195
|
+
get '/' do
|
196
|
+
%{<html><body><a href="/link">absolute path</a></body></html>}
|
197
|
+
end
|
198
|
+
|
199
|
+
get '/link' do
|
200
|
+
'<html><body>got here</body></html>'
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
it "should visit links with absolute paths" do
|
205
|
+
expect(subject.history).to be == Set[
|
206
|
+
URI("http://#{host}/"),
|
207
|
+
URI("http://#{host}/link")
|
208
|
+
]
|
209
|
+
end
|
210
|
+
|
211
|
+
context "that contain directory escapes" do
|
212
|
+
app do
|
213
|
+
get '/' do
|
214
|
+
%{<html><body><a href="/foo/./../../../../link">link</a></body></html>}
|
215
|
+
end
|
216
|
+
|
217
|
+
get '/link' do
|
218
|
+
'<html><body>got here</body></html>'
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
it "should expand absolute links before visiting them" do
|
223
|
+
expect(subject.history).to be == Set[
|
224
|
+
URI("http://#{host}/"),
|
225
|
+
URI("http://#{host}/link")
|
226
|
+
]
|
227
|
+
end
|
228
|
+
end
|
229
|
+
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
context "remote links" do
|
234
|
+
app do
|
235
|
+
get '/' do
|
236
|
+
%{<html><body><a href="http://#{settings.host}/link">absolute link</a></body></html>}
|
237
|
+
end
|
238
|
+
|
239
|
+
get '/link' do
|
240
|
+
'<html><body>got here</body></html>'
|
241
|
+
end
|
242
|
+
end
|
243
|
+
|
244
|
+
it "should visit absolute links" do
|
245
|
+
expect(subject.history).to be == Set[
|
246
|
+
URI("http://#{host}/"),
|
247
|
+
URI("http://#{host}/link")
|
248
|
+
]
|
249
|
+
end
|
250
|
+
|
251
|
+
context "that contain directory escapes" do
|
252
|
+
app do
|
253
|
+
get '/' do
|
254
|
+
%{<html><body><a href="http://#{settings.host}/foo/./../../../../link">link</a></body></html>}
|
255
|
+
end
|
256
|
+
|
257
|
+
get '/link' do
|
258
|
+
'<html><body>got here</body></html>'
|
259
|
+
end
|
260
|
+
end
|
261
|
+
|
262
|
+
it "should expand absolute links before visiting them" do
|
263
|
+
expect(subject.history).to be == Set[
|
264
|
+
URI("http://#{host}/"),
|
265
|
+
URI("http://#{host}/link")
|
266
|
+
]
|
267
|
+
end
|
268
|
+
end
|
269
|
+
end
|
270
|
+
|
271
|
+
context "self-referential links" do
|
272
|
+
app do
|
273
|
+
get '/' do
|
274
|
+
%{<html><body><a href="/">same page</a></body></html>}
|
275
|
+
end
|
276
|
+
end
|
277
|
+
|
278
|
+
it "should ignore self-referential links" do
|
279
|
+
expect(subject.history).to be == Set[
|
280
|
+
URI("http://#{host}/")
|
281
|
+
]
|
282
|
+
end
|
283
|
+
end
|
284
|
+
|
285
|
+
context "circular links" do
|
286
|
+
app do
|
287
|
+
get '/' do
|
288
|
+
%{<html><body><a href="/link">link</a></body></html>}
|
289
|
+
end
|
290
|
+
|
291
|
+
get '/link' do
|
292
|
+
%{<html><body><a href="/">previous page</a></body></html>}
|
293
|
+
end
|
294
|
+
end
|
295
|
+
|
296
|
+
it "should ignore links that have been previous visited" do
|
297
|
+
expect(subject.history).to be == Set[
|
298
|
+
URI("http://#{host}/"),
|
299
|
+
URI("http://#{host}/link")
|
300
|
+
]
|
301
|
+
end
|
302
|
+
end
|
303
|
+
|
304
|
+
context "link cycles" do
|
305
|
+
app do
|
306
|
+
get '/' do
|
307
|
+
%{<html><body><a href="/link1">first link</a></body></html>}
|
308
|
+
end
|
309
|
+
|
310
|
+
get '/link1' do
|
311
|
+
%{<html><body><a href="/link2">next link</a></body></html>}
|
312
|
+
end
|
313
|
+
|
314
|
+
get '/link2' do
|
315
|
+
%{<html><body><a href="/">back to the beginning</a></body></html>}
|
316
|
+
end
|
317
|
+
end
|
318
|
+
|
319
|
+
it "should ignore links that have been previous visited" do
|
320
|
+
expect(subject.history).to be == Set[
|
321
|
+
URI("http://#{host}/"),
|
322
|
+
URI("http://#{host}/link1"),
|
323
|
+
URI("http://#{host}/link2"),
|
324
|
+
]
|
325
|
+
end
|
326
|
+
end
|
327
|
+
|
328
|
+
context "fragment links" do
|
329
|
+
app do
|
330
|
+
get '/' do
|
331
|
+
%{<html><body><a href="#fragment">fragment link</a></body></html>}
|
332
|
+
end
|
333
|
+
end
|
334
|
+
|
335
|
+
it "should ignore fragment links" do
|
336
|
+
expect(subject.history).to be == Set[
|
337
|
+
URI("http://#{host}/")
|
338
|
+
]
|
339
|
+
end
|
340
|
+
end
|
341
|
+
|
342
|
+
context "empty links" do
|
343
|
+
context "empty href" do
|
344
|
+
app do
|
345
|
+
get '/' do
|
346
|
+
%{<html><body><a href="">empty link</a> <a href=" ">blank link</a> <a>no href</a></body></html>}
|
347
|
+
end
|
348
|
+
end
|
349
|
+
|
350
|
+
it "should ignore links with empty hrefs" do
|
351
|
+
expect(subject.history).to be == Set[
|
352
|
+
URI("http://#{host}/")
|
353
|
+
]
|
354
|
+
end
|
355
|
+
end
|
356
|
+
|
357
|
+
context "whitespace href" do
|
358
|
+
app do
|
359
|
+
get '/' do
|
360
|
+
%{<html><body><a href=" ">blank link</a></body></html>}
|
361
|
+
end
|
362
|
+
end
|
363
|
+
|
364
|
+
it "should ignore links containing only whitespace" do
|
365
|
+
expect(subject.history).to be == Set[
|
366
|
+
URI("http://#{host}/")
|
367
|
+
]
|
368
|
+
end
|
369
|
+
end
|
370
|
+
|
371
|
+
context "missing href" do
|
372
|
+
app do
|
373
|
+
get '/' do
|
374
|
+
%{<html><body><a>no href</a></body></html>}
|
375
|
+
end
|
376
|
+
end
|
377
|
+
|
378
|
+
it "should ignore links with no href" do
|
379
|
+
expect(subject.history).to be == Set[
|
380
|
+
URI("http://#{host}/")
|
381
|
+
]
|
382
|
+
end
|
383
|
+
end
|
384
|
+
end
|
385
|
+
|
386
|
+
context "frames" do
|
387
|
+
app do
|
388
|
+
get '/' do
|
389
|
+
%{<html><body><frameset><frame src="/frame" /></frameset></body></html>}
|
390
|
+
end
|
42
391
|
|
43
|
-
|
44
|
-
|
392
|
+
get '/frame' do
|
393
|
+
%{<html><body><a href="/link">link</a></body></html>}
|
394
|
+
end
|
395
|
+
|
396
|
+
get '/link' do
|
397
|
+
%{<html><body>got here</body></html>}
|
398
|
+
end
|
399
|
+
end
|
400
|
+
|
401
|
+
it "should visit the frame and links within the frame" do
|
402
|
+
expect(subject.history).to be == Set[
|
403
|
+
URI("http://#{host}/"),
|
404
|
+
URI("http://#{host}/frame"),
|
405
|
+
URI("http://#{host}/link")
|
406
|
+
]
|
407
|
+
end
|
408
|
+
end
|
409
|
+
|
410
|
+
context "iframes" do
|
411
|
+
app do
|
412
|
+
get '/' do
|
413
|
+
%{<html><body><iframe src="/iframe" /></body></html>}
|
414
|
+
end
|
415
|
+
|
416
|
+
get '/iframe' do
|
417
|
+
%{<html><body><a href="/link">link</a></body></html>}
|
418
|
+
end
|
419
|
+
|
420
|
+
get '/link' do
|
421
|
+
%{<html><body>got here</body></html>}
|
422
|
+
end
|
423
|
+
end
|
424
|
+
|
425
|
+
it "should visit the iframe and links within the iframe" do
|
426
|
+
expect(subject.history).to be == Set[
|
427
|
+
URI("http://#{host}/"),
|
428
|
+
URI("http://#{host}/iframe"),
|
429
|
+
URI("http://#{host}/link")
|
430
|
+
]
|
431
|
+
end
|
432
|
+
end
|
433
|
+
|
434
|
+
context "javascript links" do
|
435
|
+
app do
|
436
|
+
get '/' do
|
437
|
+
%{<html><body><a href="javascript:fail();">javascript link</a></body></html>}
|
438
|
+
end
|
439
|
+
end
|
440
|
+
|
441
|
+
it "should ignore javascript: links" do
|
442
|
+
expect(subject.history).to be == Set[
|
443
|
+
URI("http://#{host}/")
|
444
|
+
]
|
445
|
+
end
|
446
|
+
|
447
|
+
context "when the link has an onclick action" do
|
448
|
+
app do
|
449
|
+
get '/' do
|
450
|
+
%{<html><body><a href="#" onclick="javascript:fail();">onclick link</a></body></html>}
|
451
|
+
end
|
452
|
+
end
|
453
|
+
|
454
|
+
it "should ignore links with onclick actions" do
|
455
|
+
expect(subject.history).to be == Set[
|
456
|
+
URI("http://#{host}/")
|
457
|
+
]
|
458
|
+
end
|
459
|
+
end
|
460
|
+
end
|
461
|
+
|
462
|
+
context "cookies" do
|
463
|
+
app do
|
464
|
+
get '/' do
|
465
|
+
response.set_cookie 'visited', 'true'
|
466
|
+
|
467
|
+
%{<html><body><a href="/link">link</a></body></html>}
|
468
|
+
end
|
469
|
+
|
470
|
+
get '/link' do
|
471
|
+
if request.cookies['visited'] == 'true'
|
472
|
+
%{<html><body>got here</body></html>}
|
473
|
+
else
|
474
|
+
halt 401, "Cookie not set"
|
475
|
+
end
|
476
|
+
end
|
477
|
+
end
|
478
|
+
|
479
|
+
it "should record cookies and send them with each request" do
|
480
|
+
expect(subject.history).to be == Set[
|
481
|
+
URI("http://#{host}/"),
|
482
|
+
URI("http://#{host}/link"),
|
483
|
+
]
|
484
|
+
|
485
|
+
expect(subject.cookies[host]).to be == {'visited' => 'true'}
|
486
|
+
end
|
487
|
+
end
|
488
|
+
|
489
|
+
context "redirects" do
|
490
|
+
context "300" do
|
491
|
+
app do
|
492
|
+
get '/' do
|
493
|
+
%{<html><body><a href="/redirect">redirect</a></body></html>}
|
494
|
+
end
|
495
|
+
|
496
|
+
get '/redirect' do
|
497
|
+
redirect to('/link'), 300
|
498
|
+
end
|
499
|
+
|
500
|
+
get '/link' do
|
501
|
+
%{<html><body>got here</body></html>}
|
502
|
+
end
|
503
|
+
end
|
504
|
+
|
505
|
+
it "should follow HTTP 300 redirects" do
|
506
|
+
expect(subject.history).to be == Set[
|
507
|
+
URI("http://#{host}/"),
|
508
|
+
URI("http://#{host}/redirect"),
|
509
|
+
URI("http://#{host}/link"),
|
510
|
+
]
|
511
|
+
end
|
512
|
+
end
|
513
|
+
|
514
|
+
context "301" do
|
515
|
+
app do
|
516
|
+
get '/' do
|
517
|
+
%{<html><body><a href="/redirect">redirect</a></body></html>}
|
518
|
+
end
|
519
|
+
|
520
|
+
get '/redirect' do
|
521
|
+
redirect to('/link'), 301
|
522
|
+
end
|
523
|
+
|
524
|
+
get '/link' do
|
525
|
+
%{<html><body>got here</body></html>}
|
526
|
+
end
|
527
|
+
end
|
528
|
+
|
529
|
+
it "should follow HTTP 301 redirects" do
|
530
|
+
expect(subject.history).to be == Set[
|
531
|
+
URI("http://#{host}/"),
|
532
|
+
URI("http://#{host}/redirect"),
|
533
|
+
URI("http://#{host}/link"),
|
534
|
+
]
|
535
|
+
end
|
536
|
+
end
|
537
|
+
|
538
|
+
context "302" do
|
539
|
+
app do
|
540
|
+
get '/' do
|
541
|
+
%{<html><body><a href="/redirect">redirect</a></body></html>}
|
542
|
+
end
|
543
|
+
|
544
|
+
get '/redirect' do
|
545
|
+
redirect to('/link'), 302
|
546
|
+
end
|
547
|
+
|
548
|
+
get '/link' do
|
549
|
+
%{<html><body>got here</body></html>}
|
550
|
+
end
|
551
|
+
end
|
552
|
+
|
553
|
+
it "should follow HTTP 302 redirects" do
|
554
|
+
expect(subject.history).to be == Set[
|
555
|
+
URI("http://#{host}/"),
|
556
|
+
URI("http://#{host}/redirect"),
|
557
|
+
URI("http://#{host}/link"),
|
558
|
+
]
|
559
|
+
end
|
560
|
+
end
|
561
|
+
|
562
|
+
context "303" do
|
563
|
+
app do
|
564
|
+
get '/' do
|
565
|
+
%{<html><body><a href="/redirect">redirect</a></body></html>}
|
566
|
+
end
|
567
|
+
|
568
|
+
get '/redirect' do
|
569
|
+
redirect to('/link'), 303
|
570
|
+
end
|
571
|
+
|
572
|
+
get '/link' do
|
573
|
+
%{<html><body>got here</body></html>}
|
574
|
+
end
|
575
|
+
end
|
576
|
+
|
577
|
+
it "should follow HTTP 303 redirects" do
|
578
|
+
expect(subject.history).to be == Set[
|
579
|
+
URI("http://#{host}/"),
|
580
|
+
URI("http://#{host}/redirect"),
|
581
|
+
URI("http://#{host}/link"),
|
582
|
+
]
|
583
|
+
end
|
584
|
+
end
|
585
|
+
|
586
|
+
context "307" do
|
587
|
+
app do
|
588
|
+
get '/' do
|
589
|
+
%{<html><body><a href="/redirect">redirect</a></body></html>}
|
590
|
+
end
|
591
|
+
|
592
|
+
get '/redirect' do
|
593
|
+
redirect to('/link'), 307
|
594
|
+
end
|
595
|
+
|
596
|
+
get '/link' do
|
597
|
+
%{<html><body>got here</body></html>}
|
598
|
+
end
|
599
|
+
end
|
600
|
+
|
601
|
+
it "should follow HTTP 307 redirects" do
|
602
|
+
expect(subject.history).to be == Set[
|
603
|
+
URI("http://#{host}/"),
|
604
|
+
URI("http://#{host}/redirect"),
|
605
|
+
URI("http://#{host}/link"),
|
606
|
+
]
|
607
|
+
end
|
608
|
+
end
|
609
|
+
|
610
|
+
context "meta-refresh" do
|
611
|
+
app do
|
612
|
+
get '/' do
|
613
|
+
%{<html><body><a href="/redirect">redirect</a></body></html>}
|
614
|
+
end
|
615
|
+
|
616
|
+
get '/redirect' do
|
617
|
+
%{<html><head><meta http-equiv="refresh" content="0; url=http://#{settings.host}/link" /></head><body>Redirecting...</body></html>}
|
618
|
+
end
|
619
|
+
|
620
|
+
get '/link' do
|
621
|
+
%{<html><body>got here</body></html>}
|
622
|
+
end
|
623
|
+
end
|
624
|
+
|
625
|
+
it "should follow meta-refresh redirects" do
|
626
|
+
expect(subject.history).to be == Set[
|
627
|
+
URI("http://#{host}/"),
|
628
|
+
URI("http://#{host}/redirect"),
|
629
|
+
URI("http://#{host}/link"),
|
630
|
+
]
|
631
|
+
end
|
632
|
+
end
|
633
|
+
end
|
634
|
+
|
635
|
+
context "Basic-Auth" do
|
636
|
+
app do
|
637
|
+
set :user, 'admin'
|
638
|
+
set :password, 'swordfish'
|
639
|
+
|
640
|
+
get '/' do
|
641
|
+
%{<html><body><a href="/private">private link</a></body></html>}
|
642
|
+
end
|
643
|
+
|
644
|
+
get '/private' do
|
645
|
+
auth = Rack::Auth::Basic::Request.new(request.env)
|
646
|
+
|
647
|
+
if auth.provided? && auth.basic? && auth.credentials && \
|
648
|
+
auth.credentials == [settings.user, settings.password]
|
649
|
+
%{<html><body>got here</body></html>}
|
650
|
+
else
|
651
|
+
headers['WWW-Authenticate'] = %{Basic realm="Restricted Area"}
|
652
|
+
halt 401, "<html><body><h1>Not authorized</h1></body></html>"
|
653
|
+
end
|
654
|
+
end
|
655
|
+
end
|
656
|
+
|
657
|
+
before do
|
658
|
+
subject.authorized.add("http://#{host}/private", app.user, app.password)
|
659
|
+
end
|
660
|
+
|
661
|
+
it "should send HTTP Basic-Auth credentials for protected URLs" do
|
662
|
+
expect(subject.history).to be == Set[
|
663
|
+
URI("http://#{host}/"),
|
664
|
+
URI("http://#{host}/private")
|
665
|
+
]
|
666
|
+
end
|
667
|
+
end
|
45
668
|
end
|
46
669
|
|
47
|
-
|
48
|
-
|
49
|
-
previous_failures = ['http://localhost/']
|
50
|
-
expected_failures = Set[URI('http://localhost/')]
|
670
|
+
context "when :host is specified" do
|
671
|
+
include_context "example App"
|
51
672
|
|
52
|
-
|
53
|
-
|
54
|
-
|
673
|
+
subject { described_class.new(host: host) }
|
674
|
+
|
675
|
+
app do
|
676
|
+
get '/' do
|
677
|
+
%{<html><body><a href="http://google.com/">external link</a> <a href="/link">local link</a></body></html>}
|
678
|
+
end
|
679
|
+
|
680
|
+
get '/link' do
|
681
|
+
%{<html><body>got here</body></html>}
|
682
|
+
end
|
683
|
+
end
|
684
|
+
|
685
|
+
it "should only visit links on the host" do
|
686
|
+
expect(subject.history).to be == Set[
|
687
|
+
URI("http://#{host}/"),
|
688
|
+
URI("http://#{host}/link")
|
689
|
+
]
|
690
|
+
end
|
55
691
|
end
|
56
692
|
|
57
|
-
|
58
|
-
|
59
|
-
|
693
|
+
context "when :limit is set" do
|
694
|
+
include_context "example App"
|
695
|
+
|
696
|
+
let(:limit) { 10 }
|
60
697
|
|
61
|
-
|
62
|
-
|
698
|
+
subject { described_class.new(host: host, limit: limit) }
|
699
|
+
|
700
|
+
app do
|
701
|
+
get '/' do
|
702
|
+
i = Integer(params['i'] || 0)
|
703
|
+
|
704
|
+
%{<html><body><a href="/?i=#{i+1}">next link</a></body></html>}
|
705
|
+
end
|
706
|
+
end
|
707
|
+
|
708
|
+
it "must only visit the maximum number of links" do
|
709
|
+
expect(subject.history).to be == Set[
|
710
|
+
URI("http://#{host}/"),
|
711
|
+
URI("http://#{host}/?i=1"),
|
712
|
+
URI("http://#{host}/?i=2"),
|
713
|
+
URI("http://#{host}/?i=3"),
|
714
|
+
URI("http://#{host}/?i=4"),
|
715
|
+
URI("http://#{host}/?i=5"),
|
716
|
+
URI("http://#{host}/?i=6"),
|
717
|
+
URI("http://#{host}/?i=7"),
|
718
|
+
URI("http://#{host}/?i=8"),
|
719
|
+
URI("http://#{host}/?i=9"),
|
720
|
+
]
|
721
|
+
end
|
63
722
|
end
|
64
723
|
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
724
|
+
context "when :depth is set" do
|
725
|
+
include_context "example App"
|
726
|
+
|
727
|
+
app do
|
728
|
+
get '/' do
|
729
|
+
%{<html><body><a href="/left?d=1">left</a><a href="/right?d=1">right</a></body></html>}
|
730
|
+
end
|
731
|
+
|
732
|
+
get %r{^/left|/right} do
|
733
|
+
d = Integer(params['d'])
|
734
|
+
|
735
|
+
%{<html><body><a href="/left?d=#{d+1}">left</a><a href="/right?d=#{d+1}">right</a></body></html>}
|
736
|
+
end
|
737
|
+
end
|
738
|
+
|
739
|
+
context "depth 0" do
|
740
|
+
subject { described_class.new(host: host, max_depth: 0) }
|
741
|
+
|
742
|
+
it "must only visit the first page" do
|
743
|
+
expect(subject.history).to be == Set[URI("http://#{host}/")]
|
744
|
+
end
|
745
|
+
end
|
69
746
|
|
70
|
-
|
71
|
-
|
72
|
-
|
747
|
+
context "depth > 0" do
|
748
|
+
subject { described_class.new(host: host, max_depth: 2) }
|
749
|
+
|
750
|
+
it "must visit links below the maximum depth" do
|
751
|
+
expect(subject.history).to be == Set[
|
752
|
+
URI("http://#{host}/"),
|
753
|
+
URI("http://#{host}/left?d=1"),
|
754
|
+
URI("http://#{host}/right?d=1"),
|
755
|
+
URI("http://#{host}/left?d=2"),
|
756
|
+
URI("http://#{host}/right?d=2")
|
757
|
+
]
|
758
|
+
end
|
759
|
+
end
|
73
760
|
end
|
74
761
|
|
75
|
-
|
76
|
-
|
762
|
+
context "when :robots is enabled" do
|
763
|
+
include_context "example App"
|
764
|
+
|
765
|
+
let(:user_agent) { 'Ruby' }
|
766
|
+
|
767
|
+
subject do
|
768
|
+
described_class.new(
|
769
|
+
host: host,
|
770
|
+
user_agent: user_agent,
|
771
|
+
robots: true
|
772
|
+
)
|
773
|
+
end
|
774
|
+
|
775
|
+
app do
|
776
|
+
get '/' do
|
777
|
+
%{<html><body><a href="/secret">don't follow this link</a> <a href="/pub">follow this link</a></body></html>}
|
778
|
+
end
|
779
|
+
|
780
|
+
get '/pub' do
|
781
|
+
%{<html><body>got here</body></html>}
|
782
|
+
end
|
783
|
+
|
784
|
+
get '/robots.txt' do
|
785
|
+
content_type 'text/plain'
|
786
|
+
|
787
|
+
[
|
788
|
+
"User-agent: *",
|
789
|
+
'Disallow: /',
|
790
|
+
].join($/)
|
791
|
+
end
|
792
|
+
end
|
793
|
+
|
794
|
+
it "should not follow links Disallowed by robots.txt" do
|
795
|
+
pending "https://github.com/bblimke/webmock/issues/642"
|
77
796
|
|
78
|
-
|
79
|
-
|
797
|
+
expect(subject.history).to be == Set[
|
798
|
+
URI("http://#{host}/"),
|
799
|
+
URI("http://#{host}/pub")
|
800
|
+
]
|
801
|
+
end
|
80
802
|
end
|
81
803
|
end
|