tomtaylor-geo-spider 0.2.1 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/geo-spider.rb +1 -0
- data/lib/geo-spider/extractors/base.rb +1 -0
- data/lib/geo-spider/site.rb +1 -1
- data/lib/geo-spider/version.rb +1 -1
- data/spec/geo-spider/page_spec.rb +13 -0
- metadata +1 -1
data/lib/geo-spider.rb
CHANGED
data/lib/geo-spider/site.rb
CHANGED
@@ -28,7 +28,7 @@ module GeoSpider
|
|
28
28
|
seen << url
|
29
29
|
next_links = (page.internal_links - seen - queue) # only add internal links that we've not seen or already have queued.
|
30
30
|
queue.concat(next_links)
|
31
|
-
rescue Timeout::Error, OpenURI::HTTPError
|
31
|
+
rescue Timeout::Error, OpenURI::HTTPError, InvalidElement
|
32
32
|
next
|
33
33
|
end
|
34
34
|
end
|
data/lib/geo-spider/version.rb
CHANGED
@@ -135,4 +135,17 @@ describe Page, "which is parsing a page with a string in a URL that happens to m
|
|
135
135
|
@page.locations.should be_empty
|
136
136
|
end
|
137
137
|
|
138
|
+
end
|
139
|
+
|
140
|
+
describe Page, "which is a parsing a page which doesn't contain the specific content_css_selector" do
|
141
|
+
|
142
|
+
before(:each) do
|
143
|
+
OpenURI.should_receive(:open_uri).and_return(page_as_string('page_with_links.html'))
|
144
|
+
@page = Page.new("http://www.example.com", :content_css_selector => "notreal")
|
145
|
+
end
|
146
|
+
|
147
|
+
it "should raise InvalidElement" do
|
148
|
+
lambda { @page.locations }.should raise_error(InvalidElement)
|
149
|
+
end
|
150
|
+
|
138
151
|
end
|