anemone 0.3.1 → 0.3.2
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.rdoc +16 -0
- data/lib/anemone/core.rb +1 -1
- data/lib/anemone/http.rb +8 -1
- data/spec/core_spec.rb +12 -1
- data/spec/fakeweb_helper.rb +18 -10
- metadata +2 -2
data/CHANGELOG.rdoc
CHANGED
@@ -1,3 +1,19 @@
|
|
1
|
+
== 0.3.2 / 2010-02-04
|
2
|
+
|
3
|
+
* Bug fixes
|
4
|
+
|
5
|
+
* Fixed issue that allowed following redirects off the original domain
|
6
|
+
|
7
|
+
== 0.3.1 / 2010-01-22
|
8
|
+
|
9
|
+
* Minor enhancements
|
10
|
+
|
11
|
+
* Added an attr_accessor to Page for the HTTP response body
|
12
|
+
|
13
|
+
* Bug fixes
|
14
|
+
|
15
|
+
* Fixed incorrect method calls in CLI scripts
|
16
|
+
|
1
17
|
== 0.3.0 / 2009-12-15
|
2
18
|
|
3
19
|
* Major enchancements
|
data/lib/anemone/core.rb
CHANGED
data/lib/anemone/http.rb
CHANGED
@@ -62,7 +62,7 @@ module Anemone
|
|
62
62
|
yield response, code, loc, redirect_to, response_time
|
63
63
|
|
64
64
|
limit = redirect_limit
|
65
|
-
while
|
65
|
+
while redirect_to && allowed?(redirect_to, url) && limit > 0
|
66
66
|
loc = redirect_to
|
67
67
|
loc = url.merge(loc) if loc.relative?
|
68
68
|
response, response_time = get_response(loc, referer)
|
@@ -128,5 +128,12 @@ module Anemone
|
|
128
128
|
@opts[:verbose]
|
129
129
|
end
|
130
130
|
|
131
|
+
#
|
132
|
+
# Allowed to connect to the requested url?
|
133
|
+
#
|
134
|
+
def allowed?(to_url, from_url)
|
135
|
+
to_url.host.nil? || (to_url.host == from_url.host)
|
136
|
+
end
|
137
|
+
|
131
138
|
end
|
132
139
|
end
|
data/spec/core_spec.rb
CHANGED
@@ -19,7 +19,7 @@ module Anemone
|
|
19
19
|
Anemone.crawl(pages[0].url, @opts).should have(4).pages
|
20
20
|
end
|
21
21
|
|
22
|
-
it "should not leave the original domain" do
|
22
|
+
it "should not follow links that leave the original domain" do
|
23
23
|
pages = []
|
24
24
|
pages << FakePage.new('0', :links => ['1'], :hrefs => 'http://www.other.com/')
|
25
25
|
pages << FakePage.new('1')
|
@@ -30,6 +30,17 @@ module Anemone
|
|
30
30
|
core.pages.keys.should_not include('http://www.other.com/')
|
31
31
|
end
|
32
32
|
|
33
|
+
it "should not follow redirects that leave the original domain" do
|
34
|
+
pages = []
|
35
|
+
pages << FakePage.new('0', :links => ['1'], :redirect => 'http://www.other.com/')
|
36
|
+
pages << FakePage.new('1')
|
37
|
+
|
38
|
+
core = Anemone.crawl(pages[0].url, @opts)
|
39
|
+
|
40
|
+
core.should have(2).pages
|
41
|
+
core.pages.keys.should_not include('http://www.other.com/')
|
42
|
+
end
|
43
|
+
|
33
44
|
it "should follow http redirects" do
|
34
45
|
pages = []
|
35
46
|
pages << FakePage.new('0', :links => ['1'])
|
data/spec/fakeweb_helper.rb
CHANGED
@@ -9,12 +9,12 @@ FakeWeb.allow_net_connect = false
|
|
9
9
|
|
10
10
|
module Anemone
|
11
11
|
SPEC_DOMAIN = "http://www.example.com/"
|
12
|
-
|
12
|
+
|
13
13
|
class FakePage
|
14
14
|
attr_accessor :links
|
15
15
|
attr_accessor :hrefs
|
16
16
|
attr_accessor :body
|
17
|
-
|
17
|
+
|
18
18
|
def initialize(name = '', options = {})
|
19
19
|
@name = name
|
20
20
|
@links = [options[:links]].flatten if options.has_key?(:links)
|
@@ -22,30 +22,38 @@ module Anemone
|
|
22
22
|
@redirect = options[:redirect] if options.has_key?(:redirect)
|
23
23
|
@content_type = options[:content_type] || "text/html"
|
24
24
|
@body = options[:body]
|
25
|
-
|
25
|
+
|
26
26
|
create_body unless @body
|
27
27
|
add_to_fakeweb
|
28
28
|
end
|
29
|
-
|
29
|
+
|
30
30
|
def url
|
31
31
|
SPEC_DOMAIN + @name
|
32
32
|
end
|
33
|
-
|
33
|
+
|
34
34
|
private
|
35
|
-
|
35
|
+
|
36
36
|
def create_body
|
37
37
|
@body = "<html><body>"
|
38
38
|
@links.each{|l| @body += "<a href=\"#{SPEC_DOMAIN}#{l}\"></a>"} if @links
|
39
39
|
@hrefs.each{|h| @body += "<a href=\"#{h}\"></a>"} if @hrefs
|
40
40
|
@body += "</body></html>"
|
41
41
|
end
|
42
|
-
|
42
|
+
|
43
43
|
def add_to_fakeweb
|
44
44
|
options = {:body => @body, :content_type => @content_type, :status => [200, "OK"]}
|
45
|
-
|
45
|
+
|
46
46
|
if @redirect
|
47
|
-
options[:status] = [301, "Permanently Moved"]
|
48
|
-
|
47
|
+
options[:status] = [301, "Permanently Moved"]
|
48
|
+
|
49
|
+
# only prepend SPEC_DOMAIN if a relative url (without an http scheme) was specified
|
50
|
+
redirect_url = (@redirect =~ /http/) ? @redirect : SPEC_DOMAIN + @redirect
|
51
|
+
options[:location] = redirect_url
|
52
|
+
|
53
|
+
# register the page this one redirects to
|
54
|
+
FakeWeb.register_uri(:get, redirect_url, {:body => '',
|
55
|
+
:content_type => @content_type,
|
56
|
+
:status => [200, "OK"]})
|
49
57
|
end
|
50
58
|
|
51
59
|
FakeWeb.register_uri(:get, SPEC_DOMAIN + @name, options)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: anemone
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Chris Kite
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-
|
12
|
+
date: 2010-02-04 00:00:00 -06:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|