rawler 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt CHANGED
@@ -1,4 +1,10 @@
1
- === 1.0.0 / 2011-01-10
1
+ === 0.0.2 / 2011-01-10
2
+
3
+ * 1 major enhancement
4
+
5
+ * Handle relative urls
6
+
7
+ === 0.0.1 / 2011-01-10
2
8
 
3
9
  * 1 major enhancement
4
10
 
data/lib/rawler.rb CHANGED
@@ -6,7 +6,7 @@ $:.unshift(File.dirname(__FILE__)) unless
6
6
  $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
7
7
 
8
8
  module Rawler
9
- VERSION = '0.0.1'
9
+ VERSION = '0.0.2'
10
10
 
11
11
  autoload :Base, "rawler/base"
12
12
  autoload :Crawler, "rawler/crawler"
@@ -12,11 +12,17 @@ module Rawler
12
12
  content = Net::HTTP.get(URI.parse(url))
13
13
 
14
14
  doc = Nokogiri::HTML(content)
15
- doc.css('a').map { |a| a['href'] }
15
+ doc.css('a').map { |a| absolute_url(a['href']) }
16
16
  rescue Errno::ECONNREFUSED
17
17
  $output.puts "Couldn't connect to #{url}"
18
18
  []
19
19
  end
20
+
21
+ private
22
+
23
+ def absolute_url(path)
24
+ URI.parse(url).merge(path.to_s).to_s
25
+ end
20
26
 
21
27
  end
22
28
 
@@ -3,12 +3,40 @@ require File.dirname(__FILE__) + '/../spec_helper.rb'
3
3
  describe Rawler::Crawler do
4
4
 
5
5
  it "should parse all links" do
6
- url = 'http://example.com'
6
+ url = 'http://example.com/'
7
7
  register(url, site)
8
8
 
9
9
  Rawler::Crawler.new(url).links.should == ['http://example.com/foo', 'http://external.com/bar']
10
10
  end
11
11
 
12
+ it "should return an empty array when raising Errno::ECONNREFUSED" do
13
+ url = 'http://example.com'
14
+ register(url, site)
15
+
16
+ Net::HTTP.should_receive(:get).and_raise Errno::ECONNREFUSED
17
+
18
+ crawler = Rawler::Crawler.new(url).links.should == []
19
+ end
20
+
21
+ it "should parse relative links" do
22
+ url = 'http://example.com/path'
23
+ register(url, '<a href="/foo">foo</a>')
24
+
25
+ Rawler::Crawler.new(url).links.should == ['http://example.com/foo']
26
+ end
27
+
28
+ # it "should print a message when raising Errno::ECONNREFUSED" do
29
+ # pending "refactor output. Don't use a global variable"
30
+ # url = 'http://example.com'
31
+ # register(url, site)
32
+ #
33
+ # Net::HTTP.should_receive(:get).and_raise Errno::ECONNREFUSED
34
+ #
35
+ # $stdout.should_receive(:puts).with("Couldn't connect to #{url}")
36
+ #
37
+ # Rawler::Crawler.new(url).links
38
+ # end
39
+
12
40
  private
13
41
 
14
42
  def site
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rawler
3
3
  version: !ruby/object:Gem::Version
4
- hash: 29
4
+ hash: 27
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 1
10
- version: 0.0.1
9
+ - 2
10
+ version: 0.0.2
11
11
  platform: ruby
12
12
  authors:
13
13
  - Oscar Del Ben