guitsaru-scraper 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.0
1
+ 0.1.1
data/lib/scraper/link.rb CHANGED
@@ -18,6 +18,14 @@ module Scrape
18
18
  @url == other.url
19
19
  end
20
20
 
21
+ def eql?(other)
22
+ return self == other
23
+ end
24
+
25
+ def hash
26
+ @url.hash
27
+ end
28
+
21
29
  private
22
30
  def get_links(div)
23
31
  links = []
@@ -27,9 +35,11 @@ module Scrape
27
35
  url = link['href']
28
36
  if url =~ /^\/(.*)/
29
37
  components = URI::split(@url)
30
- url = "#{components[0] || 'http'}://#{components[2]}/url"
38
+ url = "#{components[0] || 'http'}://#{components[2]}#{url}"
31
39
  elsif url =~ /^http:\/\//i
32
40
  url = url
41
+ elsif url =~ /^#/
42
+ url = @url.gsub(/#.*/, '').gsub(/\/$/, '') + url
33
43
  else
34
44
  url = (File.dirname(@url) + '/' + (url || ''))
35
45
  end
data/scraper.gemspec ADDED
@@ -0,0 +1,57 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = %q{scraper}
5
+ s.version = "0.1.1"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
+ s.authors = ["Matt Pruitt"]
9
+ s.date = %q{2009-06-17}
10
+ s.email = %q{guitsaru@gmail.com}
11
+ s.extra_rdoc_files = [
12
+ "LICENSE",
13
+ "README.rdoc"
14
+ ]
15
+ s.files = [
16
+ ".document",
17
+ ".gitignore",
18
+ "LICENSE",
19
+ "README.rdoc",
20
+ "Rakefile",
21
+ "VERSION",
22
+ "lib/scraper.rb",
23
+ "lib/scraper/link.rb",
24
+ "scraper.gemspec",
25
+ "test/fake_pages/first_child_page.html",
26
+ "test/fake_pages/first_page.html",
27
+ "test/fake_pages/main.html",
28
+ "test/fake_pages/not_added.html",
29
+ "test/test_helper.rb",
30
+ "test/test_link.rb",
31
+ "test/test_scraper.rb"
32
+ ]
33
+ s.homepage = %q{http://github.com/guitsaru/scraper}
34
+ s.rdoc_options = ["--charset=UTF-8"]
35
+ s.require_paths = ["lib"]
36
+ s.rubyforge_project = %q{scraper}
37
+ s.rubygems_version = %q{1.3.4}
38
+ s.summary = %q{Collects all links on a webpage recursively.}
39
+ s.test_files = [
40
+ "test/test_helper.rb",
41
+ "test/test_link.rb",
42
+ "test/test_scraper.rb"
43
+ ]
44
+
45
+ if s.respond_to? :specification_version then
46
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
47
+ s.specification_version = 3
48
+
49
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
50
+ s.add_runtime_dependency(%q<hpricot>, [">= 0.6.161"])
51
+ else
52
+ s.add_dependency(%q<hpricot>, [">= 0.6.161"])
53
+ end
54
+ else
55
+ s.add_dependency(%q<hpricot>, [">= 0.6.161"])
56
+ end
57
+ end
@@ -10,6 +10,9 @@
10
10
  <!-- Date: 2009-06-17 -->
11
11
  </head>
12
12
  <body>
13
- <div id="content"><a href="/main.html">Main</a></div>
13
+ <div id="content">
14
+ <a href="/main.html">Main</a>
15
+ <a href="#content">Content</a>
16
+ </div>
14
17
  </body>
15
18
  </html>
data/test/test_scraper.rb CHANGED
@@ -22,6 +22,7 @@ class TestScraper < Test::Unit::TestCase
22
22
  should "Include a list of links on the pages." do
23
23
  assert(@results.include?(Link.new('http://example.com/first_page.html')))
24
24
  assert(@results.include?(Link.new('http://example.com/first_child_page.html')))
25
+ assert(@results.include?(Link.new('http://example.com/first_child_page.html#content')))
25
26
  assert(@results.include?(Link.new('http://example.com/main.html')))
26
27
  end
27
28
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: guitsaru-scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Matt Pruitt
@@ -40,6 +40,7 @@ files:
40
40
  - VERSION
41
41
  - lib/scraper.rb
42
42
  - lib/scraper/link.rb
43
+ - scraper.gemspec
43
44
  - test/fake_pages/first_child_page.html
44
45
  - test/fake_pages/first_page.html
45
46
  - test/fake_pages/main.html