guitsaru-scraper 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.0
1
+ 0.1.1
data/lib/scraper/link.rb CHANGED
@@ -18,6 +18,14 @@ module Scrape
18
18
  @url == other.url
19
19
  end
20
20
 
21
+ def eql?(other)
22
+ return self == other
23
+ end
24
+
25
+ def hash
26
+ @url.hash
27
+ end
28
+
21
29
  private
22
30
  def get_links(div)
23
31
  links = []
@@ -27,9 +35,11 @@ module Scrape
27
35
  url = link['href']
28
36
  if url =~ /^\/(.*)/
29
37
  components = URI::split(@url)
30
- url = "#{components[0] || 'http'}://#{components[2]}/url"
38
+ url = "#{components[0] || 'http'}://#{components[2]}#{url}"
31
39
  elsif url =~ /^http:\/\//i
32
40
  url = url
41
+ elsif url =~ /^#/
42
+ url = @url.gsub(/#.*/, '').gsub(/\/$/, '') + url
33
43
  else
34
44
  url = (File.dirname(@url) + '/' + (url || ''))
35
45
  end
data/scraper.gemspec ADDED
@@ -0,0 +1,57 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = %q{scraper}
5
+ s.version = "0.1.1"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
+ s.authors = ["Matt Pruitt"]
9
+ s.date = %q{2009-06-17}
10
+ s.email = %q{guitsaru@gmail.com}
11
+ s.extra_rdoc_files = [
12
+ "LICENSE",
13
+ "README.rdoc"
14
+ ]
15
+ s.files = [
16
+ ".document",
17
+ ".gitignore",
18
+ "LICENSE",
19
+ "README.rdoc",
20
+ "Rakefile",
21
+ "VERSION",
22
+ "lib/scraper.rb",
23
+ "lib/scraper/link.rb",
24
+ "scraper.gemspec",
25
+ "test/fake_pages/first_child_page.html",
26
+ "test/fake_pages/first_page.html",
27
+ "test/fake_pages/main.html",
28
+ "test/fake_pages/not_added.html",
29
+ "test/test_helper.rb",
30
+ "test/test_link.rb",
31
+ "test/test_scraper.rb"
32
+ ]
33
+ s.homepage = %q{http://github.com/guitsaru/scraper}
34
+ s.rdoc_options = ["--charset=UTF-8"]
35
+ s.require_paths = ["lib"]
36
+ s.rubyforge_project = %q{scraper}
37
+ s.rubygems_version = %q{1.3.4}
38
+ s.summary = %q{Collects all links on a webpage recursively.}
39
+ s.test_files = [
40
+ "test/test_helper.rb",
41
+ "test/test_link.rb",
42
+ "test/test_scraper.rb"
43
+ ]
44
+
45
+ if s.respond_to? :specification_version then
46
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
47
+ s.specification_version = 3
48
+
49
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
50
+ s.add_runtime_dependency(%q<hpricot>, [">= 0.6.161"])
51
+ else
52
+ s.add_dependency(%q<hpricot>, [">= 0.6.161"])
53
+ end
54
+ else
55
+ s.add_dependency(%q<hpricot>, [">= 0.6.161"])
56
+ end
57
+ end
@@ -10,6 +10,9 @@
10
10
  <!-- Date: 2009-06-17 -->
11
11
  </head>
12
12
  <body>
13
- <div id="content"><a href="/main.html">Main</a></div>
13
+ <div id="content">
14
+ <a href="/main.html">Main</a>
15
+ <a href="#content">Content</a>
16
+ </div>
14
17
  </body>
15
18
  </html>
data/test/test_scraper.rb CHANGED
@@ -22,6 +22,7 @@ class TestScraper < Test::Unit::TestCase
22
22
  should "Include a list of links on the pages." do
23
23
  assert(@results.include?(Link.new('http://example.com/first_page.html')))
24
24
  assert(@results.include?(Link.new('http://example.com/first_child_page.html')))
25
+ assert(@results.include?(Link.new('http://example.com/first_child_page.html#content')))
25
26
  assert(@results.include?(Link.new('http://example.com/main.html')))
26
27
  end
27
28
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: guitsaru-scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Matt Pruitt
@@ -40,6 +40,7 @@ files:
40
40
  - VERSION
41
41
  - lib/scraper.rb
42
42
  - lib/scraper/link.rb
43
+ - scraper.gemspec
43
44
  - test/fake_pages/first_child_page.html
44
45
  - test/fake_pages/first_page.html
45
46
  - test/fake_pages/main.html