rawler 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt CHANGED
@@ -1,4 +1,10 @@
1
- === 1.0.0 / 2011-01-10
1
+ === 0.0.2 / 2011-01-10
2
+
3
+ * 1 major enhancement
4
+
5
+ * Handle relative urls
6
+
7
+ === 0.0.1 / 2011-01-10
2
8
 
3
9
  * 1 major enhancement
4
10
 
data/lib/rawler.rb CHANGED
@@ -6,7 +6,7 @@ $:.unshift(File.dirname(__FILE__)) unless
6
6
  $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
7
7
 
8
8
  module Rawler
9
- VERSION = '0.0.1'
9
+ VERSION = '0.0.2'
10
10
 
11
11
  autoload :Base, "rawler/base"
12
12
  autoload :Crawler, "rawler/crawler"
@@ -12,11 +12,17 @@ module Rawler
12
12
  content = Net::HTTP.get(URI.parse(url))
13
13
 
14
14
  doc = Nokogiri::HTML(content)
15
- doc.css('a').map { |a| a['href'] }
15
+ doc.css('a').map { |a| absolute_url(a['href']) }
16
16
  rescue Errno::ECONNREFUSED
17
17
  $output.puts "Couldn't connect to #{url}"
18
18
  []
19
19
  end
20
+
21
+ private
22
+
23
+ def absolute_url(path)
24
+ URI.parse(url).merge(path.to_s).to_s
25
+ end
20
26
 
21
27
  end
22
28
 
@@ -3,12 +3,40 @@ require File.dirname(__FILE__) + '/../spec_helper.rb'
3
3
  describe Rawler::Crawler do
4
4
 
5
5
  it "should parse all links" do
6
- url = 'http://example.com'
6
+ url = 'http://example.com/'
7
7
  register(url, site)
8
8
 
9
9
  Rawler::Crawler.new(url).links.should == ['http://example.com/foo', 'http://external.com/bar']
10
10
  end
11
11
 
12
+ it "should return an empty array when raising Errno::ECONNREFUSED" do
13
+ url = 'http://example.com'
14
+ register(url, site)
15
+
16
+ Net::HTTP.should_receive(:get).and_raise Errno::ECONNREFUSED
17
+
18
+ crawler = Rawler::Crawler.new(url).links.should == []
19
+ end
20
+
21
+ it "should parse relative links" do
22
+ url = 'http://example.com/path'
23
+ register(url, '<a href="/foo">foo</a>')
24
+
25
+ Rawler::Crawler.new(url).links.should == ['http://example.com/foo']
26
+ end
27
+
28
+ # it "should print a message when raising Errno::ECONNREFUSED" do
29
+ # pending "refactor output. Don't use a global variable"
30
+ # url = 'http://example.com'
31
+ # register(url, site)
32
+ #
33
+ # Net::HTTP.should_receive(:get).and_raise Errno::ECONNREFUSED
34
+ #
35
+ # $stdout.should_receive(:puts).with("Couldn't connect to #{url}")
36
+ #
37
+ # Rawler::Crawler.new(url).links
38
+ # end
39
+
12
40
  private
13
41
 
14
42
  def site
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rawler
3
3
  version: !ruby/object:Gem::Version
4
- hash: 29
4
+ hash: 27
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 1
10
- version: 0.0.1
9
+ - 2
10
+ version: 0.0.2
11
11
  platform: ruby
12
12
  authors:
13
13
  - Oscar Del Ben