guitsaru-scraper 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/lib/scraper/link.rb +5 -1
- data/scraper.gemspec +1 -1
- data/test/test_helper.rb +1 -0
- data/test/test_scraper.rb +24 -0
- metadata +1 -1
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.2.
|
1
|
+
0.2.1
|
data/lib/scraper/link.rb
CHANGED
@@ -54,7 +54,11 @@ module Scrape
|
|
54
54
|
elsif url =~ /^#/
|
55
55
|
url = @url.gsub(/#.*/, '').gsub(/\/$/, '') + url
|
56
56
|
else
|
57
|
-
|
57
|
+
if @url =~ /\/$/
|
58
|
+
url = @url + (url || '')
|
59
|
+
else
|
60
|
+
url = (File.dirname(@url) + '/' + (url || ''))
|
61
|
+
end
|
58
62
|
end
|
59
63
|
|
60
64
|
# Don't add this link if it matches a pattern in ignore
|
data/scraper.gemspec
CHANGED
data/test/test_helper.rb
CHANGED
@@ -9,6 +9,7 @@ require 'scraper'
|
|
9
9
|
|
10
10
|
class Test::Unit::TestCase
|
11
11
|
FakeWeb.register_uri(:get, "http://example.com/main.html", :body => File.join(File.dirname(__FILE__), 'fake_pages/main.html'))
|
12
|
+
FakeWeb.register_uri(:get, "http://example.com/folder/", :body => File.join(File.dirname(__FILE__), 'fake_pages/main.html'))
|
12
13
|
FakeWeb.register_uri(:get, "http://example.com/first_page.html", :body => File.join(File.dirname(__FILE__), 'fake_pages/first_page.html'))
|
13
14
|
FakeWeb.register_uri(:get, "http://example.com/first_child_page.html", :body => File.join(File.dirname(__FILE__), 'fake_pages/first_child_page.html'))
|
14
15
|
FakeWeb.register_uri(:get, "http://example.com/not_added.html", :body => File.join(File.dirname(__FILE__), 'fake_pages/not_added.html'))
|
data/test/test_scraper.rb
CHANGED
@@ -14,6 +14,30 @@ class TestScraper < Test::Unit::TestCase
|
|
14
14
|
end
|
15
15
|
|
16
16
|
context "scraping" do
|
17
|
+
setup do
|
18
|
+
@scraper = Scraper.new('http://example.com/main.html', :recursive => false)
|
19
|
+
@results = @scraper.scrape
|
20
|
+
end
|
21
|
+
|
22
|
+
should "Include a list of links on the pages." do
|
23
|
+
assert(@results.include?(Link.new('http://example.com/first_page.html')))
|
24
|
+
assert(@results.include?(Link.new('http://example.com/not_added.html')))
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
context "scraping from folder" do
|
29
|
+
setup do
|
30
|
+
@scraper = Scraper.new('http://example.com/folder/', :recursive => false)
|
31
|
+
@results = @scraper.scrape
|
32
|
+
end
|
33
|
+
|
34
|
+
should "Include a list of links on the pages." do
|
35
|
+
assert(@results.include?(Link.new('http://example.com/folder/first_page.html')))
|
36
|
+
assert(@results.include?(Link.new('http://example.com/folder/not_added.html')))
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
context "scraping with div" do
|
17
41
|
setup do
|
18
42
|
@scraper = Scraper.new('http://example.com/main.html')
|
19
43
|
@results = @scraper.scrape(:div => '#content')
|