RubyGems - rawler - Versions diffs - 0.0.5 → 0.0.6 - Mend

rawler 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

data/Gemfile +9 -0
data/Gemfile.lock +22 -0
data/Manifest.txt +7 -7
data/README.txt +26 -4
data/Rakefile +11 -0
data/bin/rawler +2 -1
data/lib/rawler.rb +13 -2
data/lib/rawler/base.rb +27 -6
data/lib/rawler/crawler.rb +24 -7
data/spec/lib/base_spec.rb +5 -0
data/spec/lib/rawler/base_spec.rb +0 -0
data/spec/lib/rawler/crawler_spec.rb +214 -0
data/spec/{unit/base_spec.rb → lib/rawler_spec.rb} +79 -8
data/spec/spec_helper.rb +1 -1
data/specs.watchr +59 -0
metadata +11 -13
data/.autotest +0 -23
data/spec/unit/crawler/base_spec.rb +0 -75
data/spec/unit/crawler/content_type_spec.rb +0 -23
data/spec/unit/crawler/exceptions_spec.rb +0 -54
data/spec/unit/crawler/http_basic_spec.rb +0 -25
data/spec/unit/crawler/url_domain_spec.rb +0 -26

data/Gemfile ADDED Viewed

@@ -0,0 +1,9 @@
+source "http://rubygems.org"
+gem "nokogiri", "1.4.4"
+group :development, :test do
+  gem "hoe", "2.6.2"
+  gem "rspec", "2.4.0"
+  gem "fakeweb", "1.3.0"
+end

data/Gemfile.lock ADDED Viewed

@@ -0,0 +1,22 @@
+GEM
+  remote: http://rubygems.org/
+  specs:
+    diff-lcs (1.1.2)
+    fakeweb (1.3.0)
+    nokogiri (1.4.4)
+    rspec (2.4.0)
+      rspec-core (~> 2.4.0)
+      rspec-expectations (~> 2.4.0)
+      rspec-mocks (~> 2.4.0)
+    rspec-core (2.4.0)
+    rspec-expectations (2.4.0)
+      diff-lcs (~> 1.1.2)
+    rspec-mocks (2.4.0)
+PLATFORMS
+  ruby
+DEPENDENCIES
+  fakeweb (= 1.3.0)
+  nokogiri (= 1.4.4)
+  rspec (= 2.4.0)

data/Manifest.txt CHANGED Viewed

@@ -1,4 +1,5 @@
-.autotest
+Gemfile
+Gemfile.lock
 History.txt
 Manifest.txt
 README.txt
@@ -10,13 +11,12 @@ lib/rawler/core_extensions.rb
 lib/rawler/core_extensions/module.rb
 lib/rawler/crawler.rb
 lib/rawler/request.rb
+spec/lib/base_spec.rb
+spec/lib/rawler/base_spec.rb
+spec/lib/rawler/crawler_spec.rb
+spec/lib/rawler_spec.rb
 spec/spec.opts
 spec/spec_helper.rb
-spec/unit/base_spec.rb
-spec/unit/crawler/base_spec.rb
-spec/unit/crawler/content_type_spec.rb
-spec/unit/crawler/exceptions_spec.rb
-spec/unit/crawler/http_basic_spec.rb
-spec/unit/crawler/url_domain_spec.rb
+specs.watchr
 tasks/rspec.rake
 vendor/lib-trollop.rb

data/README.txt CHANGED Viewed

@@ -8,8 +8,6 @@ Rawler is a Ruby library that crawls your website and checks the status code for
 Rawler will only parse pages with content type 'text/html', but it will check for the response code of every link.
-Please note: I had to temporarily remove url encoding in order to resolve some issues, so if you find any issue, please let me know. I'm also going to use Mechanizer for parsing pages with the next release.
 == SYNOPSIS:
   rawler http://example.com [options]
@@ -24,16 +22,40 @@ Please note: I had to temporarily remove url encoding in order to resolve some i
 gem install rawler
+== DEVELOPMENT:
+Run bundle install to install everything you need
+  rake test
+To package and run the gem locally:
+  rake package
+  cd pkg
+  gem install rawler-#{version}.gem
+If you add files, run:
+  rake check_manifest
+And add them to the Manifest file.
 == TODO
+* Add logger levels
 * Follow redirects, but still inform about them
 * Respect robots.txt
 * Export to html
 == CONTRIBUTORS:
-* Vesa Vänskä https://github.com/vesan
+* bcoob
 * Hugh Sasse
+* Ken Egozi
+* Robert Glaser
+* Vesa Vänskä
+See also https://github.com/oscardelben/rawler/contributors
 == LICENSE:
@@ -58,4 +80,4 @@ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

data/Rakefile CHANGED Viewed

@@ -3,6 +3,12 @@
 require 'rubygems'
 require 'hoe'
+# require 'bundler'
+# Bundler::GemHelper.install_tasks
+require 'rspec/core/rake_task'
+RSpec::Core::RakeTask.new(:test)
 # Hoe.plugin :compiler
 # Hoe.plugin :gem_prelude_sucks
 # Hoe.plugin :inline
@@ -21,4 +27,9 @@ Hoe.spec 'rawler' do
   extra_deps << ['nokogiri']
 end
+desc 'Console'
+task :console do
+  exec 'irb -rubygems -I lib -r rawler.rb'
+end
 # vim: syntax=ruby

data/bin/rawler CHANGED Viewed

@@ -19,7 +19,8 @@ EOS
   opt :password, "HTT Basic Password", :type => :string
 end
-domain = ARGV.shift
+# Use dup to unfrozen string
+domain = ARGV.shift.dup
 if domain.nil?
   Trollop::die "Domain name is mandatory. Type --help for help"

data/lib/rawler.rb CHANGED Viewed

@@ -2,11 +2,12 @@ require 'rubygems'
 require 'net/http'
 require 'net/https'
 require 'nokogiri'
+require 'logger'
 require 'rawler/core_extensions'
 module Rawler
-  VERSION = '0.0.5'
+  VERSION = '0.0.6'
   mattr_accessor :output
   mattr_accessor :url
@@ -16,4 +17,14 @@ module Rawler
   autoload :Base, "rawler/base"
   autoload :Crawler, "rawler/crawler"
   autoload :Request, "rawler/request"
-end
+  def self.url=(url)
+    url.strip!
+    if (url =~ /http:\/\//) != 0
+      url = 'http://' + url
+    end
+    @@url = url
+  end
+end

data/lib/rawler/base.rb CHANGED Viewed

@@ -8,7 +8,7 @@ module Rawler
       @responses = {}
       Rawler.url      = url
-      Rawler.output   = output
+      Rawler.output   = Logger.new(output)
       Rawler.username = username
       Rawler.password = password
     end
@@ -37,13 +37,13 @@ module Rawler
     def add_status_code(link)
       response = Rawler::Request.get(link)
-      write("#{response.code} - #{link}")
+      record_response(response.code, link)
       responses[link] = { :status => response.code.to_i }
     rescue Errno::ECONNREFUSED
-      write("Connection refused - '#{link}'")
+      Rawler.output.error("Connection refused - '#{link}'")
     rescue Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, Errno::ETIMEDOUT,
       EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError, Net::ProtocolError
-      write("Connection problems - '#{link}'")
+      Rawler.output.error("Connection problems - '#{link}'")
     end
     def same_domain?(link)
@@ -55,9 +55,30 @@ module Rawler
     end
     def write(message)
-      Rawler.output.puts(message)
+      # TODO: This may not always be an error message,
+      # but that will make it show up most of the time
+      Rawler.output.error(message)
+    end
+    def record_response(code, link)
+      message = "#{code} - #{link}"
+      code = code.to_i
+      case code / 100
+      when 1
+        # TODO: check that if a 100 is received
+        # then there is another status code as well
+        Rawler.output.info(message)
+      when 2 then
+        Rawler.output.info(message)
+      when 3 then
+        Rawler.output.warn(message)
+      when 4,5 then
+        Rawler.output.error(message)
+      else
+        Rawler.output.error("Unknown code #{message}")
+      end
     end
   end
-end
+end

data/lib/rawler/crawler.rb CHANGED Viewed

@@ -5,7 +5,7 @@ module Rawler
     attr_accessor :url, :links
     def initialize(url)
-      @url = url
+      @url = url.strip
     end
     def links
@@ -16,7 +16,7 @@ module Rawler
       response = Rawler::Request.get(url)
       doc = Nokogiri::HTML(response.body)
-      doc.css('a').map { |a| absolute_url(a['href']) }.select { |url| valid_url?(url) }
+      doc.css('a').map { |a| a['href'] }.map { |url| absolute_url(url) }.select { |url| valid_url?(url) }
     rescue Errno::ECONNREFUSED
       write("Couldn't connect to #{url}")
       []
@@ -28,11 +28,16 @@ module Rawler
     private
     def absolute_url(path)
-      URI.parse(url).merge(path.to_s).to_s
+      path.strip!
+      if path[0].chr == '/'
+        URI.parse(url).merge(path.to_s).to_s
+      else
+        path
+      end
     end
     def write(message)
-      Rawler.output.puts(message)
+      Rawler.output.error(message)
     end
     def different_domain?(url_1, url_2)
@@ -44,11 +49,23 @@ module Rawler
     end
     def valid_url?(url)
+      return false unless url
+      url.strip!
       scheme = URI.parse(url).scheme
+      if ['http', 'https'].include?(scheme)
+        true
+      else
+        write("Invalid url - #{url}")
+        false
+      end
-      ['http', 'https'].include?(scheme)
+    rescue URI::InvalidURIError
+      false
+       write("Invalid url - #{url}")
     end
   end
-end
+end

data/spec/lib/base_spec.rb ADDED Viewed

@@ -0,0 +1,5 @@
+require File.dirname(__FILE__) + '/../spec_helper.rb'
+describe Rawler do
+end

data/spec/lib/rawler/base_spec.rb ADDED Viewed

File without changes

data/spec/lib/rawler/crawler_spec.rb ADDED Viewed

@@ -0,0 +1,214 @@
+require File.dirname(__FILE__) + '/../../spec_helper.rb'
+describe Rawler::Crawler do
+  let(:url)    { 'http://example.com' }
+  let(:output) { double("output", :error => nil) }
+  before(:each) do
+    Rawler.stub!(:url).and_return(url)
+    Rawler.stub!(:output).and_return(output)
+  end
+  context "basic functionality" do
+    let(:url) { 'http://example.com' }
+    let(:crawler) { Rawler::Crawler.new(url) }
+    let(:content) {
+      content = <<-content
+        <p><a href="http://example.com/foo">foo</a></p>
+    		<p><a href="http://external.com/bar">bar</a></p>
+    	content
+    }
+    before(:each) do
+      register(url, content)
+    end
+    it "should parse all links" do
+      crawler.links.should == ['http://example.com/foo', 'http://external.com/bar']
+    end
+  end
+  context "relative paths" do
+    let(:url)     { 'http://example.com/path' }
+    let(:crawler) { Rawler::Crawler.new(url) }
+    let(:content) { '<a href="/foo">foo</a>' }
+    before(:each) do
+      register(url, content)
+    end
+    it "should parse relative links" do
+      crawler.links.should == ['http://example.com/foo']
+    end
+  end
+  context "different domains" do
+    let(:url)     { 'http://external.com/path' }
+    let(:crawler) { Rawler::Crawler.new(url) }
+    let(:content) { '<a href="/foo">foo</a>' }
+    before(:each) do
+      Rawler.stub!(:url).and_return('http://example.com')
+      register(url, content)
+    end
+    it "should parse relative links" do
+      crawler.links.should == []
+    end
+  end
+  context "urls with hash tags" do
+    let(:url)     { 'http://example.com/path' }
+    let(:crawler) { Rawler::Crawler.new(url) }
+    let(:content) { '<a href="/foo#bar">foo</a>' }
+    before(:each) do
+      register(url, content)
+    end
+    it "should parse relative links" do
+      crawler.links.should == ['http://example.com/foo#bar']
+    end
+  end
+  context "invalid urls" do
+    let(:url)     { 'http://example.com/path' }
+    let(:crawler) { Rawler::Crawler.new(url) }
+    let(:js_url)  { "javascript:fn('nbjmup;jhfs.esf{fio/dpn');" }
+    let(:content) { "<a href=\"#{js_url}\">foo</a>" }
+    before(:each) do
+      register(url, content)
+    end
+    it "should parse relative links" do
+      crawler.links.should == []
+    end
+    it "should report the error" do
+      crawler.should_receive(:write).with("Invalid url - #{js_url}")
+      crawler.links
+    end
+  end
+  context "content type" do
+    ['text/plain', 'text/css', 'image/jpeg'].each do |content_type|
+      let(:url)     { 'http://example.com' }
+      let(:crawler) { Rawler::Crawler.new(url) }
+      before(:each) do
+        register(url, '', 200, :content_type => content_type)
+      end
+      it "should ignore '#{content_type}'" do
+        crawler.links.should == []
+      end
+    end
+  end
+  context "Exceptions" do
+    let(:url)     { 'http://example.com' }
+    let(:crawler) { Rawler::Crawler.new(url) }
+    let(:output)  { double('output', :error => nil) }
+    before(:each) do
+      register(url, '')
+      Rawler.stub!(:output).and_return(output)
+    end
+    context "Errno::ECONNREFUSED" do
+      before(:each) do
+        Rawler::Request.stub!(:get).and_raise Errno::ECONNREFUSED
+      end
+      it "should return an empty array" do
+        crawler.links.should == []
+      end
+      it "should print a message when raising Errno::ECONNREFUSED" do
+        output.should_receive(:error).with("Couldn't connect to #{url}")
+        crawler.links
+      end
+    end
+    context "Errno::ETIMEDOUT" do
+      before(:each) do
+        Rawler::Request.stub!(:get).and_raise Errno::ETIMEDOUT
+      end
+      it "should return an empty array when raising Errno::ETIMEDOUT" do
+        crawler.links.should == []
+      end
+      it "should print a message when raising Errno::ETIMEDOUT" do
+        output.should_receive(:error).with("Connection to #{url} timed out")
+        crawler.links
+      end
+    end
+  end
+  context "http basic" do
+    let(:url)     { 'http://example.com' }
+    let(:content) { '<a href="http://example.com/secret-path">foo</a>' }
+    let(:crawler) { Rawler::Crawler.new('http://example.com/secret') }
+    before(:each) do
+      register('http://example.com/secret', '', :status => ["401", "Unauthorized"])
+      register('http://foo:bar@example.com/secret', content)
+      Rawler.stub!(:username).and_return('foo')
+      Rawler.stub!(:password).and_return('bar')
+    end
+    it "should crawl http basic pages" do
+      crawler.links.should == ['http://example.com/secret-path']
+    end
+  end
+  context "url domain" do
+    let(:content) {
+      content = <<-content
+        <a href="http://example.com/valid">foo</a>
+        <a href="mailto:info@example.com">invalid</a>
+        <a href="https://foo.com">valid</a>
+        <a href=" http://fooo.com ">valid with illegal whitespaces</a>
+      content
+    }
+    let(:url)     { 'http://example.com' }
+    let(:crawler) { Rawler::Crawler.new(url) }
+    before(:each) do
+      register(url, content)
+    end
+    it "should ignore links other than http or https" do
+      crawler.links.should == ['http://example.com/valid', 'https://foo.com', 'http://fooo.com']
+    end
+  end
+end

data/spec/{unit/base_spec.rb → lib/rawler_spec.rb} RENAMED Viewed

@@ -6,6 +6,7 @@ describe Rawler::Base do
   let(:rawler)  { Rawler::Base.new('http://example.com', output) }
   before(:each) do
+    Rawler.stub!(:output).and_return(output)
     register('http://example.com', site)
   end
@@ -42,10 +43,10 @@ describe Rawler::Base do
       register('http://external.com', '')
       register('http://external.com/foo', '', 302)
-      output.should_receive(:puts).with('200 - http://example.com/foo1')
-      output.should_receive(:puts).with('200 - http://example.com/foo2')
-      output.should_receive(:puts).with('200 - http://external.com')
-      output.should_receive(:puts).with('302 - http://external.com/foo')
+      output.should_receive(:info).with('200 - http://example.com/foo1')
+      output.should_receive(:info).with('200 - http://example.com/foo2')
+      output.should_receive(:info).with('200 - http://external.com')
+      output.should_receive(:warn).with('302 - http://external.com/foo')
       rawler.validate
     end
@@ -54,7 +55,7 @@ describe Rawler::Base do
       register('http://example.com/foo1', '<a href="http://example.com/page-with#hashtag">x</a>')
       register('http://example.com/page-with', '')
-      output.should_receive(:puts).with('200 - http://example.com/page-with#hashtag')
+      output.should_receive(:info).with('200 - http://example.com/page-with#hashtag')
       rawler.validate
     end
@@ -93,7 +94,7 @@ describe Rawler::Base do
       Rawler::Request.should_receive(:get).and_raise Errno::ECONNREFUSED
-      output.should_receive(:puts).with("Connection refused - '#{url}'")
+      output.should_receive(:error).with("Connection refused - '#{url}'")
       rawler.send(:add_status_code, url)
     end
@@ -105,7 +106,7 @@ describe Rawler::Base do
          Rawler::Request.should_receive(:get).and_raise error
-         output.should_receive(:puts).with("Connection problems - '#{url}'")
+         output.should_receive(:error).with("Connection problems - '#{url}'")
          rawler.send(:add_status_code, url)
        end
@@ -113,6 +114,76 @@ describe Rawler::Base do
   end
+  describe "record_response" do
+    let(:message) { 'foo' }
+    context "response code 100" do
+      %w!100, 150, 199!.each do |code|
+        it "logger should receive info" do
+          output.should_receive(:info).with("#{code} - #{message}")
+          rawler.send(:record_response, code, message)
+        end
+      end
+    end
+    context "response code 200" do
+      %w!200, 250, 299!.each do |code|
+        it "logger should receive info" do
+          output.should_receive(:info).with("#{code} - #{message}")
+          rawler.send(:record_response, code, message)
+        end
+      end
+    end
+    context "response code 300" do
+      %w!300, 350, 399!.each do |code|
+        it "logger should receive warn" do
+          output.should_receive(:warn).with("#{code} - #{message}")
+          rawler.send(:record_response, code, message)
+        end
+      end
+    end
+    context "response code 400" do
+      %w!400, 450, 499!.each do |code|
+        it "logger should receive info" do
+          output.should_receive(:error).with("#{code} - #{message}")
+          rawler.send(:record_response, code, message)
+        end
+      end
+    end
+    context "response code 500" do
+      %w!400, 550, 599!.each do |code|
+        it "logger should receive info" do
+          output.should_receive(:error).with("#{code} - #{message}")
+          rawler.send(:record_response, code, message)
+        end
+      end
+    end
+    context "response code invalid" do
+      let(:code) { 600 }
+      it "logger should receive eror" do
+        output.should_receive(:error).with("Unknown code #{code} - #{message}")
+        rawler.send(:record_response, code, message)
+      end
+    end
+  end
   private
@@ -129,4 +200,4 @@ describe Rawler::Base do
     site
   end
-end
+end

data/spec/spec_helper.rb CHANGED Viewed

@@ -15,4 +15,4 @@ FakeWeb.allow_net_connect = false
 def register(uri, content, status=200, options={})
   FakeWeb.register_uri(:any, uri, { :body => content, :status => status, :content_type => 'text/html' }.merge(options))
-end
+end

data/specs.watchr ADDED Viewed

@@ -0,0 +1,59 @@
+# Run me with:
+#
+#   $ watchr specs.watchr
+# --------------------------------------------------
+# Convenience Methods
+# --------------------------------------------------
+def all_test_files
+  Dir['spec/**/*_spec.rb']
+end
+def run_test_matching(thing_to_match)
+  matches = all_test_files.grep(/#{thing_to_match}/i)
+  if matches.empty?
+    puts "Sorry, thanks for playing, but there were no matches for #{thing_to_match}"
+  else
+    run matches.join(' ')
+  end
+end
+def run(files_to_run)
+  puts("Running: #{files_to_run}")
+  system("clear;rspec -cfs #{files_to_run}")
+  no_int_for_you
+end
+def run_all_tests
+  puts "foo"
+  run(all_test_files.join(' '))
+end
+# --------------------------------------------------
+# Watchr Rules
+# --------------------------------------------------
+watch('^spec/(.*)_spec\.rb'  )   { |m| run_test_matching(m[1]) }
+watch('^lib/(.*)\.rb'               )   { |m| run_test_matching(m[1]) }
+watch('^spec/spec_helper\.rb')   { run_all_tests }
+# --------------------------------------------------
+# Signal Handling
+# --------------------------------------------------
+def no_int_for_you
+  @sent_an_int = nil
+end
+Signal.trap 'INT' do
+  if @sent_an_int then
+    puts "   A second INT?  Ok, I get the message.  Shutting down now."
+    exit
+  else
+    puts "   Did you just send me an INT? Ugh.  I'll quit for real if you do it again."
+    @sent_an_int = true
+    Kernel.sleep 1.5
+    run_all_tests
+  end
+end
+# vim:ft=ruby

metadata CHANGED Viewed

@@ -1,13 +1,13 @@
 --- !ruby/object:Gem::Specification
 name: rawler
 version: !ruby/object:Gem::Version
-  hash: 21
+  hash: 19
   prerelease:
   segments:
   - 0
   - 0
-  - 5
-  version: 0.0.5
+  - 6
+  version: 0.0.6
 platform: ruby
 authors:
 - Oscar Del Ben
@@ -15,7 +15,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2011-01-21 00:00:00 +01:00
+date: 2011-02-04 00:00:00 +01:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency
@@ -52,8 +52,6 @@ description: |-
   Rawler is a Ruby library that crawls your website and checks the status code for each of your links. Useful for finding dead links.
   Rawler will only parse pages with content type 'text/html', but it will check for the response code of every link.
-  Please note: I had to temporarily remove url encoding in order to resolve some issues, so if you find any issue, please let me know. I'm also going to use Mechanizer for parsing pages with the next release.
 email:
 - info@oscardelben.com
 executables:
@@ -65,7 +63,8 @@ extra_rdoc_files:
 - Manifest.txt
 - README.txt
 files:
-- .autotest
+- Gemfile
+- Gemfile.lock
 - History.txt
 - Manifest.txt
 - README.txt
@@ -77,14 +76,13 @@ files:
 - lib/rawler/core_extensions/module.rb
 - lib/rawler/crawler.rb
 - lib/rawler/request.rb
+- spec/lib/base_spec.rb
+- spec/lib/rawler/base_spec.rb
+- spec/lib/rawler/crawler_spec.rb
+- spec/lib/rawler_spec.rb
 - spec/spec.opts
 - spec/spec_helper.rb
-- spec/unit/base_spec.rb
-- spec/unit/crawler/base_spec.rb
-- spec/unit/crawler/content_type_spec.rb
-- spec/unit/crawler/exceptions_spec.rb
-- spec/unit/crawler/http_basic_spec.rb
-- spec/unit/crawler/url_domain_spec.rb
+- specs.watchr
 - tasks/rspec.rake
 - vendor/lib-trollop.rb
 has_rdoc: true

data/.autotest DELETED Viewed

@@ -1,23 +0,0 @@
-# -*- ruby -*-
-require 'autotest/restart'
-# Autotest.add_hook :initialize do |at|
-#   at.extra_files << "../some/external/dependency.rb"
-#
-#   at.libs << ":../some/external"
-#
-#   at.add_exception 'vendor'
-#
-#   at.add_mapping(/dependency.rb/) do |f, _|
-#     at.files_matching(/test_.*rb$/)
-#   end
-#
-#   %w(TestA TestB).each do |klass|
-#     at.extra_class_map[klass] = "test/test_misc.rb"
-#   end
-# end
-# Autotest.add_hook :run_command do |at|
-#   system "rake build"
-# end

data/spec/unit/crawler/base_spec.rb DELETED Viewed

@@ -1,75 +0,0 @@
-require File.dirname(__FILE__) + '/../../spec_helper.rb'
-describe Rawler::Crawler do
-  context "basic functionality" do
-    let(:url) { 'http://example.com' }
-    let(:crawler) { Rawler::Crawler.new(url) }
-    let(:content) {
-      content = <<-content
-        <p><a href="http://example.com/foo">foo</a></p>
-    		<p><a href="http://external.com/bar">bar</a></p>
-    	content
-    }
-    before(:each) do
-      register(url, content)
-    end
-    it "should parse all links" do
-      crawler.links.should == ['http://example.com/foo', 'http://external.com/bar']
-    end
-  end
-  context "relative paths" do
-    let(:url)     { 'http://example.com/path' }
-    let(:crawler) { Rawler::Crawler.new(url) }
-    let(:content) { '<a href="/foo">foo</a>' }
-    before(:each) do
-      register(url, content)
-    end
-    it "should parse relative links" do
-      crawler.links.should == ['http://example.com/foo']
-    end
-  end
-  context "different domains" do
-    let(:url)     { 'http://external.com/path' }
-    let(:crawler) { Rawler::Crawler.new(url) }
-    let(:content) { '<a href="/foo">foo</a>' }
-    before(:each) do
-      register(url, content)
-    end
-    it "should parse relative links" do
-      crawler.links.should == []
-    end
-  end
-  context "urls with hash tags" do
-    let(:url)     { 'http://example.com/path' }
-    let(:crawler) { Rawler::Crawler.new(url) }
-    let(:content) { '<a href="/foo#bar">foo</a>' }
-    before(:each) do
-      register(url, content)
-    end
-    it "should parse relative links" do
-      crawler.links.should == ['http://example.com/foo#bar']
-    end
-  end
-end

data/spec/unit/crawler/content_type_spec.rb DELETED Viewed

@@ -1,23 +0,0 @@
-require File.dirname(__FILE__) + '/../../spec_helper.rb'
-describe Rawler::Crawler do
-  context "content type" do
-    ['text/plain', 'text/css', 'image/jpeg'].each do |content_type|
-      let(:url)     { 'http://example.com' }
-      let(:crawler) { Rawler::Crawler.new(url) }
-      before(:each) do
-        register(url, '', 200, :content_type => content_type)
-      end
-      it "should ignore '#{content_type}'" do
-        crawler.links.should == []
-      end
-    end
-  end
-end

data/spec/unit/crawler/exceptions_spec.rb DELETED Viewed

@@ -1,54 +0,0 @@
-require File.dirname(__FILE__) + '/../../spec_helper.rb'
-describe Rawler::Crawler do
-  context "Exceptions" do
-    let(:url)     { 'http://example.com' }
-    let(:crawler) { Rawler::Crawler.new(url) }
-    let(:output)  { double('output', :puts => nil) }
-    before(:each) do
-      register(url, '')
-      Rawler.stub!(:output).and_return(output)
-    end
-    context "Errno::ECONNREFUSED" do
-      before(:each) do
-        Rawler::Request.stub!(:get).and_raise Errno::ECONNREFUSED
-      end
-      it "should return an empty array" do
-        crawler.links.should == []
-      end
-      it "should print a message when raising Errno::ECONNREFUSED" do
-        output.should_receive(:puts).with("Couldn't connect to #{url}")
-        crawler.links
-      end
-    end
-    context "Errno::ETIMEDOUT" do
-      before(:each) do
-        Rawler::Request.stub!(:get).and_raise Errno::ETIMEDOUT
-      end
-      it "should return an empty array when raising Errno::ETIMEDOUT" do
-        crawler.links.should == []
-      end
-      it "should print a message when raising Errno::ETIMEDOUT" do
-        output.should_receive(:puts).with("Connection to #{url} timed out")
-        crawler.links
-      end
-    end
-  end
-end

data/spec/unit/crawler/http_basic_spec.rb DELETED Viewed

@@ -1,25 +0,0 @@
-require File.dirname(__FILE__) + '/../../spec_helper.rb'
-describe Rawler::Crawler do
-  context "http basic" do
-    let(:url)     { 'http://example.com' }
-    let(:content) { '<a href="http://example.com/secret-path">foo</a>' }
-    let(:crawler) { Rawler::Crawler.new('http://example.com/secret') }
-    before(:each) do
-      register('http://example.com/secret', '', :status => ["401", "Unauthorized"])
-      register('http://foo:bar@example.com/secret', content)
-      Rawler.stub!(:username).and_return('foo')
-      Rawler.stub!(:password).and_return('bar')
-    end
-    it "should crawl http basic pages" do
-      crawler.links.should == ['http://example.com/secret-path']
-    end
-  end
-end

data/spec/unit/crawler/url_domain_spec.rb DELETED Viewed

@@ -1,26 +0,0 @@
-require File.dirname(__FILE__) + '/../../spec_helper.rb'
-describe Rawler::Crawler do
-  context "url domain" do
-    let(:content) {
-      content = <<-content
-        <a href="http://example.com/valid">foo</a>
-        <a href="mailto:info@example.com">invalid</a>
-        <a href="https://foo.com">valid</a>
-      content
-    }
-    let(:url)     { 'http://example.com' }
-    let(:crawler) { Rawler::Crawler.new(url) }
-    before(:each) do
-      register(url, content)
-    end
-    it "should ignore links other than http or https" do
-      crawler.links.should == ['http://example.com/valid', 'https://foo.com']
-    end
-  end
-end