guitsaru-scraper 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/lib/scraper/link.rb +11 -1
- data/scraper.gemspec +57 -0
- data/test/fake_pages/first_child_page.html +4 -1
- data/test/test_scraper.rb +1 -0
- metadata +2 -1
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.1
|
data/lib/scraper/link.rb
CHANGED
@@ -18,6 +18,14 @@ module Scrape
|
|
18
18
|
@url == other.url
|
19
19
|
end
|
20
20
|
|
21
|
+
def eql?(other)
|
22
|
+
return self == other
|
23
|
+
end
|
24
|
+
|
25
|
+
def hash
|
26
|
+
@url.hash
|
27
|
+
end
|
28
|
+
|
21
29
|
private
|
22
30
|
def get_links(div)
|
23
31
|
links = []
|
@@ -27,9 +35,11 @@ module Scrape
|
|
27
35
|
url = link['href']
|
28
36
|
if url =~ /^\/(.*)/
|
29
37
|
components = URI::split(@url)
|
30
|
-
url = "#{components[0] || 'http'}://#{components[2]}
|
38
|
+
url = "#{components[0] || 'http'}://#{components[2]}#{url}"
|
31
39
|
elsif url =~ /^http:\/\//i
|
32
40
|
url = url
|
41
|
+
elsif url =~ /^#/
|
42
|
+
url = @url.gsub(/#.*/, '').gsub(/\/$/, '') + url
|
33
43
|
else
|
34
44
|
url = (File.dirname(@url) + '/' + (url || ''))
|
35
45
|
end
|
data/scraper.gemspec
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = %q{scraper}
|
5
|
+
s.version = "0.1.1"
|
6
|
+
|
7
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
8
|
+
s.authors = ["Matt Pruitt"]
|
9
|
+
s.date = %q{2009-06-17}
|
10
|
+
s.email = %q{guitsaru@gmail.com}
|
11
|
+
s.extra_rdoc_files = [
|
12
|
+
"LICENSE",
|
13
|
+
"README.rdoc"
|
14
|
+
]
|
15
|
+
s.files = [
|
16
|
+
".document",
|
17
|
+
".gitignore",
|
18
|
+
"LICENSE",
|
19
|
+
"README.rdoc",
|
20
|
+
"Rakefile",
|
21
|
+
"VERSION",
|
22
|
+
"lib/scraper.rb",
|
23
|
+
"lib/scraper/link.rb",
|
24
|
+
"scraper.gemspec",
|
25
|
+
"test/fake_pages/first_child_page.html",
|
26
|
+
"test/fake_pages/first_page.html",
|
27
|
+
"test/fake_pages/main.html",
|
28
|
+
"test/fake_pages/not_added.html",
|
29
|
+
"test/test_helper.rb",
|
30
|
+
"test/test_link.rb",
|
31
|
+
"test/test_scraper.rb"
|
32
|
+
]
|
33
|
+
s.homepage = %q{http://github.com/guitsaru/scraper}
|
34
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
35
|
+
s.require_paths = ["lib"]
|
36
|
+
s.rubyforge_project = %q{scraper}
|
37
|
+
s.rubygems_version = %q{1.3.4}
|
38
|
+
s.summary = %q{Collects all links on a webpage recursively.}
|
39
|
+
s.test_files = [
|
40
|
+
"test/test_helper.rb",
|
41
|
+
"test/test_link.rb",
|
42
|
+
"test/test_scraper.rb"
|
43
|
+
]
|
44
|
+
|
45
|
+
if s.respond_to? :specification_version then
|
46
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
47
|
+
s.specification_version = 3
|
48
|
+
|
49
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
50
|
+
s.add_runtime_dependency(%q<hpricot>, [">= 0.6.161"])
|
51
|
+
else
|
52
|
+
s.add_dependency(%q<hpricot>, [">= 0.6.161"])
|
53
|
+
end
|
54
|
+
else
|
55
|
+
s.add_dependency(%q<hpricot>, [">= 0.6.161"])
|
56
|
+
end
|
57
|
+
end
|
data/test/test_scraper.rb
CHANGED
@@ -22,6 +22,7 @@ class TestScraper < Test::Unit::TestCase
|
|
22
22
|
should "Include a list of links on the pages." do
|
23
23
|
assert(@results.include?(Link.new('http://example.com/first_page.html')))
|
24
24
|
assert(@results.include?(Link.new('http://example.com/first_child_page.html')))
|
25
|
+
assert(@results.include?(Link.new('http://example.com/first_child_page.html#content')))
|
25
26
|
assert(@results.include?(Link.new('http://example.com/main.html')))
|
26
27
|
end
|
27
28
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: guitsaru-scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Matt Pruitt
|
@@ -40,6 +40,7 @@ files:
|
|
40
40
|
- VERSION
|
41
41
|
- lib/scraper.rb
|
42
42
|
- lib/scraper/link.rb
|
43
|
+
- scraper.gemspec
|
43
44
|
- test/fake_pages/first_child_page.html
|
44
45
|
- test/fake_pages/first_page.html
|
45
46
|
- test/fake_pages/main.html
|