RubyGems - rawler - Versions diffs - 0.1.0 → 0.1.1 - Mend

rawler 0.1.0 → 0.1.1

Files changed (20) hide show

data/Gemfile +12 -5
data/Gemfile.lock +14 -22
data/LICENSE.txt +20 -0
data/{README.txt → README.rdoc} +6 -35
data/Rakefile +58 -24
data/VERSION +1 -0
data/bin/rawler +1 -1
data/lib/rawler.rb +52 -2
data/lib/rawler/base.rb +45 -5
data/lib/rawler/core_extensions.rb +3 -1
data/lib/rawler/core_extensions/module.rb +3 -1
data/lib/rawler/crawler.rb +28 -4
data/lib/rawler/request.rb +3 -1
data/spec/lib/rawler_spec.rb +12 -0
data/test/helper.rb +18 -0
data/test/test_rawler.rb +7 -0
metadata +68 -38
data/Manifest.txt +0 -21
data/specs.watchr +0 -58
data/tasks/rspec.rake +0 -21

data/Gemfile CHANGED

@@ -1,9 +1,16 @@
 source "http://rubygems.org"
+# Add dependencies required to use your gem here.
+# Example:
+#   gem "activesupport", ">= 2.3.5"
-gem "nokogiri", "1.4.4"
+# Add dependencies to develop your gem here.
+# Include everything needed to run rake, tests, features, etc.
+gem 'nokogiri'
-group :development, :test do
-  gem "hoe", "2.6.2"
-  gem "rspec", "2.4.0"
-  gem "fakeweb", "1.3.0"
+group :development do
+  gem "shoulda", ">= 0"
+  gem "bundler", "~> 1.0.0"
+  gem "jeweler", "~> 1.6.4"
+  gem "rcov", ">= 0"
 end

data/Gemfile.lock CHANGED

@@ -1,30 +1,22 @@
 GEM
   remote: http://rubygems.org/
   specs:
-    diff-lcs (1.1.2)
-    fakeweb (1.3.0)
-    hoe (2.6.2)
-      rake (>= 0.8.7)
-      rubyforge (>= 2.0.4)
-    json_pure (1.5.1)
-    nokogiri (1.4.4)
-    rake (0.8.7)
-    rspec (2.4.0)
-      rspec-core (~> 2.4.0)
-      rspec-expectations (~> 2.4.0)
-      rspec-mocks (~> 2.4.0)
-    rspec-core (2.4.0)
-    rspec-expectations (2.4.0)
-      diff-lcs (~> 1.1.2)
-    rspec-mocks (2.4.0)
-    rubyforge (2.0.4)
-      json_pure (>= 1.1.7)
+    git (1.2.5)
+    jeweler (1.6.4)
+      bundler (~> 1.0)
+      git (>= 1.2.5)
+      rake
+    nokogiri (1.5.0)
+    rake (0.9.2)
+    rcov (0.9.9)
+    shoulda (2.11.3)
 PLATFORMS
   ruby
 DEPENDENCIES
-  fakeweb (= 1.3.0)
-  hoe (= 2.6.2)
-  nokogiri (= 1.4.4)
-  rspec (= 2.4.0)
+  bundler (~> 1.0.0)
+  jeweler (~> 1.6.4)
+  nokogiri
+  rcov
+  shoulda

data/LICENSE.txt ADDED

@@ -0,0 +1,20 @@
+Copyright (c) 2011 Oscar Del Ben
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

data/{README.txt → README.rdoc} RENAMED

@@ -1,14 +1,10 @@
-= rawler
-* http://github.com/oscardelben/rawler
-== DESCRIPTION:
+### rawler
 Rawler is a Ruby library that crawls your website and checks the status code for each of your links. Useful for finding dead links.
 Rawler will only parse pages with content type 'text/html', but it will check for the response code of every link.
-== SYNOPSIS:
+### SYNOPSIS:
   rawler http://example.com [options]
@@ -19,40 +15,15 @@ Rawler will only parse pages with content type 'text/html', but it will check fo
 	       --version, -v:   Print version and exit
 	          --help, -h:   Show this message
-== INSTALL:
+### INSTALL:
 gem install rawler
-== DEVELOPMENT:
-Run bundle install to install everything you need
-  rake test
-To package and run the gem locally:
-  rake package
-  cd pkg
-  gem install rawler-#{version}.gem
-If you add files, run:
-  rake check_manifest
-And add them to the Manifest file.
-== CONTRIBUTORS:
-* bcoob
-* Hugh Sasse
-* Ken Egozi
-* Robert Glaser
-* Stefan Schüßler
-* Vesa Vänskä
+### CONTRIBUTORS:
-See also https://github.com/oscardelben/rawler/contributors
+Many. See [https://github.com/oscardelben/rawler/contributors](https://github.com/oscardelben/rawler/contributors)
-== LICENSE:
+### LICENSE:
 (The MIT License)

data/Rakefile CHANGED

@@ -1,35 +1,69 @@
-# -*- ruby -*-
+# encoding: utf-8
 require 'rubygems'
-require 'hoe'
+require 'bundler'
+require 'fileutils'
-# require 'bundler'
-# Bundler::GemHelper.install_tasks
+begin
+  Bundler.setup(:default, :development)
+rescue Bundler::BundlerError => e
+  $stderr.puts e.message
+  $stderr.puts "Run `bundle install` to install missing gems"
+  exit e.status_code
+end
+require 'rake'
-require 'rspec/core/rake_task'
-RSpec::Core::RakeTask.new(:test)
+require 'jeweler'
+Jeweler::Tasks.new do |gem|
+  # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
+  gem.name = "rawler"
+  gem.homepage = "http://github.com/oscardelben/rawler"
+  gem.license = "MIT"
+  gem.summary = %Q{Rawler is a tool that crawls the links of your website}
+  gem.description = %Q{Rawler is a tool that crawls the links of your website}
+  gem.email = "info@oscardelben.com"
+  gem.authors = ["Oscar Del Ben"]
+  gem.executables = ['rawler']
+  # dependencies defined in Gemfile
+end
+Jeweler::RubygemsDotOrgTasks.new
-# Hoe.plugin :compiler
-# Hoe.plugin :gem_prelude_sucks
-# Hoe.plugin :inline
-# Hoe.plugin :racc
-# Hoe.plugin :rubyforge
+require 'rake/testtask'
+Rake::TestTask.new(:test) do |test|
+  test.libs << 'lib' << 'test'
+  test.pattern = 'test/**/test_*.rb'
+  test.verbose = true
+end
-Hoe.spec 'rawler' do
-  # HEY! If you fill these out in ~/.hoe_template/Rakefile.erb then
-  # you'll never have to touch them again!
-  # (delete this comment too, of course)
+require 'rcov/rcovtask'
+Rcov::RcovTask.new do |test|
+  test.libs << 'test'
+  test.pattern = 'test/**/test_*.rb'
+  test.verbose = true
+  test.rcov_opts << '--exclude "gems/*"'
+end
-  developer('Oscar Del Ben', 'info@oscardelben.com')
+task :default => :test
-  self.rubyforge_name = 'oscardelben'
-  extra_deps << ['nokogiri']
-end
+require 'rake/rdoctask'
+Rake::RDocTask.new do |rdoc|
+  version = File.exist?('VERSION') ? File.read('VERSION') : ""
-desc 'Console'
-task :console do
-  exec 'irb -rubygems -I lib -r rawler.rb'
+  rdoc.rdoc_dir = 'rdoc'
+  rdoc.title = "rawler #{version}"
+  rdoc.rdoc_files.include('README*')
+  rdoc.rdoc_files.include('lib/**/*.rb')
 end
-# vim: syntax=ruby
+desc 'generate docs'
+task :rocco do
+  #%x!rm -r html/*!
+  Dir.chdir "lib"
+  files = Dir['**/*.*']
+  files.each do |file|
+    %x!rocco #{file} -o ../html!
+  end
+end

data/VERSION ADDED

	@@ -0,0 +1 @@
1	+ 0.1.1

data/bin/rawler CHANGED

@@ -4,7 +4,7 @@ require 'rawler'
 require File.join(File.dirname(__FILE__), '..', '/vendor/lib-trollop.rb')
 opts = Trollop::options do
-  version "rawler 0.0.3 (c) 2011 Oscar Del Ben"
+  version "rawler #{Rawler::VERSION} (c) 2011 Oscar Del Ben"
   banner <<-EOS
 Rawler is a command line utility for parsing links on a website

data/lib/rawler.rb CHANGED

@@ -1,24 +1,74 @@
+# **Rawler** is a command line tool for finding broken links on your website.
+# You can install Rawler by running:
+#
+#   gem install rawler
+#
+# To use Rawler type:
+#
+#   rawler example.com
+#
+# Type `rawler -h` to see all the available options (including basic auth support).
+#### Prerequisites
 require 'rubygems'
-require 'net/http'
+# We use [net/https](http://www.ruby-doc.org/stdlib/libdoc/net/http/rdoc/index.html) for making requests.
 require 'net/https'
+# We use [nokogiri](http://nokogiri.org/) for parsing web pages.
 require 'nokogiri'
+# We use the [logger](http://www.ruby-doc.org/stdlib/libdoc/logger/rdoc/) utility for handling the output.
 require 'logger'
+# We require [rawler/core_extensions](rawler/core_extensions.html) which includes some core extensions we need.
 require 'rawler/core_extensions'
+#### The Rawler module
+# The Rawler module itself is very simple, and it's only used for storing configuration data like the url that we want to fetch, basic username and password.
 module Rawler
-  VERSION = '0.1.0'
+  VERSION = '0.1.1'
+  # `output` is where we want to direct output. It's set to `$stdout` by default.
   mattr_accessor :output
+  # `url` is the url that we want to fetch. We need to keep track of it when parsing other pages to see if they are of the same domain.
   mattr_accessor :url
+  # The time we wait between requests, default 3. We don't want to send too many requests to your website!
   mattr_accessor :wait
+  # Username and Password for basic auth, if needed.
   mattr_accessor :username, :password
+  # Here we autoload when needed the specific namespaces.
+  # [Rawler::Base](rawler/base.html) is responsible for validating all the pages in a domain. It's where all the magic happens.
   autoload :Base, "rawler/base"
+  # [Rawler::Crawler](rawler/crawler.html) is responsible for parsing links inside a page.
   autoload :Crawler, "rawler/crawler"
+  # [Rawler::Request](rawler/reqeust.html) contains some helper methods for performing requests.
   autoload :Request, "rawler/request"
+  # We overwrite url= to automatically add `http://` if needed so that you can simply type `rawler example.com` in the command line.
   def self.url=(url)
     url.strip!

data/lib/rawler/base.rb CHANGED

@@ -1,32 +1,50 @@
+#### Rawler workflow
+# `Rawler::Base` is where all the heavy work is being made.
+# When you call `rawler somesite.com`, we create an instance of Rawler::Base and then call `validate`, which recursively validates all the links relative to the domain that we specified.
 module Rawler
   class Base
+    # `responses` is used to keep track of which links we have already parsed, so that we wont parse them again and again.
+    # TODO: rename `responses` to something more meaningful.
     attr_accessor :responses
+    # When we instantiate `Rawler::Base` we set some options according to what you specified on the command line.
     def initialize(url, output, options={})
       @responses = {}
       Rawler.url      = URI.escape(url)
+      output.sync     = true
       Rawler.output   = Logger.new(output)
       Rawler.username = options[:username]
       Rawler.password = options[:password]
       Rawler.wait     = options[:wait].to_i
     end
+    # The method used to start the real validation process
     def validate
       validate_links_in_page(Rawler.url)
     end
     private
-    def validate_links_in_page(current_url)
-      Rawler::Crawler.new(current_url).links.each do |page_url|
-        validate_page(page_url, current_url)
+    # We ask [Rawler::Crawler](crawler.html) for all the links in page and then validate each of them individually.
+    # We then sleep for the value of `Rawler.wait` (default 3) between each request to avoid dossing your server.
+    def validate_links_in_page(page)
+      Rawler::Crawler.new(page).links.each do |page_url|
+        validate_page(page_url, page)
         sleep(Rawler.wait)
       end
     end
+    # If we haven't validated the page yet, we check its status code and then validate all the links in the page if it's in the same domain
     def validate_page(page_url, from_url)
       if not_yet_parsed?(page_url)
         add_status_code(page_url, from_url)
@@ -34,20 +52,33 @@ module Rawler
       end
     end
+    # This is where we check the specific page status.
     def add_status_code(link, from_url)
       response = Rawler::Request.get(link)
+      # We follow a redirect if necessary.
       validate_page(response['Location'], from_url) if response['Location']
+      # We inform the user about what we got.
       record_response(response.code, link, from_url, response['Location'])
+      # We add the current page to `responses` to avoid parsing it again/
       responses[link] = { :status => response.code.to_i }
     rescue Errno::ECONNREFUSED
       error("Connection refused - #{link} - Called from: #{from_url}")
     rescue Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, Errno::ETIMEDOUT,
       EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError, Net::ProtocolError, SocketError
       error("Connection problems - #{link} - Called from: #{from_url}")
+    rescue Exception
+      error("Unknown error - #{link} - Called from: #{from_url}")
     end
+    # Some helper methods
     def same_domain?(link)
       URI.parse(Rawler.url).host == URI.parse(link).host
     end
@@ -59,21 +90,30 @@ module Rawler
     def error(message)
       Rawler.output.error(message)
     end
+    # We use this method to inform the user of a page status
     def record_response(code, link, from_url, redirection=nil)
+      # By default, we just give the status code and the page url
       message = "#{code} - #{link}"
+      # If the status code is more or equal than 300, we also add which url linked the current page
       if code.to_i >= 300
         message += " - Called from: #{from_url}"
       end
+      # We add information about redirects, if a redirect was set
       message += " - Following redirection to: #{redirection}" if redirection
+      # Depending on the status code, we use a different method of logger.
       code = code.to_i
       case code / 100
       when 1
-        # TODO: check that if a 100 is received
-        # then there is another status code as well
         Rawler.output.info(message)
       when 2 then
         Rawler.output.info(message)

data/lib/rawler/core_extensions.rb CHANGED

@@ -1 +1,3 @@
-require 'rawler/core_extensions/module'
+# We load the [module](core_extensions/module.html) core extension
+require 'rawler/core_extensions/module'

data/lib/rawler/core_extensions/module.rb CHANGED

@@ -1,3 +1,5 @@
+# Add `attr_accessor` like methods to modules
 class Module
   def mattr_reader(*syms)
     syms.each do |sym|
@@ -42,4 +44,4 @@ class Module
     mattr_reader(*syms)
     mattr_writer(*syms)
   end
-end
+end

data/lib/rawler/crawler.rb CHANGED

@@ -1,39 +1,62 @@
+# `Rawler::Crawler` is responsible for parsing links inside a page
 module Rawler
   class Crawler
-    attr_accessor :url, :links
+    # An instance of Rawler::Crawler has a url which represents the url for which we want to parse links.
+    attr_accessor :url
+    # We want to skip some kind of formats
     SKIP_FORMATS = /^(javascript|mailto)/
+    # To use this class, just pass it a url
     def initialize(url)
       @url = url.strip
     end
+    # And then call `links` to get its links.
     def links
+      # If the url is different than the main Rawler.url, or if the page is not html, we return an empty array
       if different_domain?(url, Rawler.url) || not_html?(url)
         return []
       end
+      # Otherwise we fetch the page
       response = Rawler::Request.get(url)
+      # And kindly ask nokogiri to convert it for us
       doc = Nokogiri::HTML(response.body)
+      # We then do some magic, search all the links in the document that contain a valid link, and return them.
       doc.css('a').map { |a| a['href'] }.select { |url| !url.nil? }.map { |url| absolute_url(url) }.select { |url| valid_url?(url) }
-    rescue Errno::ECONNREFUSED # TODO: add called from
+    rescue Errno::ECONNREFUSED
       write("Couldn't connect to #{url}")
       []
-    rescue Errno::ETIMEDOUT # TODO: add called from
+    rescue Errno::ETIMEDOUT
       write("Connection to #{url} timed out")
       []
     end
     private
+    # Here's how we transform a relative url to an absolute url
     def absolute_url(path)
+      # First, encode the url
       path = URI.encode(path.strip, Regexp.new("[^#{URI::PATTERN::UNRESERVED}#{URI::PATTERN::RESERVED}#]"))
+      # if the url contains a scheme that means it's already absolute
       if URI.parse(path).scheme
         path
       else
+        # Otherwise we merge `url` to get the absolute url
         URI.parse(url).merge(path).to_s
       end
     rescue URI::InvalidURIError
@@ -41,7 +64,8 @@ module Rawler
       nil
     end
-    # TODO: add 'called from in a more pragmatic way as an optional parameter
+    # Some helper methods
     def write(message)
       Rawler.output.error(message)
     end

data/lib/rawler/request.rb CHANGED

@@ -1,3 +1,5 @@
+# `Rawler::Request` contains some abstraction for making web requests, like automatically adding ssl and basic auth.
 module Rawler
   class Request
@@ -32,4 +34,4 @@ module Rawler
   end
-end
+end

data/spec/lib/rawler_spec.rb CHANGED

@@ -135,6 +135,18 @@ describe Rawler::Base do
          rawler.send(:add_status_code, url, from)
        end
     end
+    it "should rescue from general errors" do
+      url = 'http://example.com'
+      from = 'http://other.com'
+      Rawler::Request.should_receive(:get).and_raise
+      output.should_receive(:error).with("Unknown error - #{url} - Called from: #{from}")
+      rawler.send(:add_status_code, url, from)
+    end
   end

data/test/helper.rb ADDED

@@ -0,0 +1,18 @@
+require 'rubygems'
+require 'bundler'
+begin
+  Bundler.setup(:default, :development)
+rescue Bundler::BundlerError => e
+  $stderr.puts e.message
+  $stderr.puts "Run `bundle install` to install missing gems"
+  exit e.status_code
+end
+require 'test/unit'
+require 'shoulda'
+$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
+$LOAD_PATH.unshift(File.dirname(__FILE__))
+require 'rawler'
+class Test::Unit::TestCase
+end

data/test/test_rawler.rb ADDED

@@ -0,0 +1,7 @@
+require 'helper'
+class TestRawler < Test::Unit::TestCase
+  should "probably rename this file and start testing for real" do
+    flunk "hey buddy, you should probably rename this file and start testing for real"
+  end
+end

metadata CHANGED

@@ -1,12 +1,13 @@
 --- !ruby/object:Gem::Specification
 name: rawler
 version: !ruby/object:Gem::Version
-  prerelease: false
+  hash: 25
+  prerelease:
   segments:
   - 0
   - 1
-  - 0
-  version: 0.1.0
+  - 1
+  version: 0.1.1
 platform: ruby
 authors:
 - Oscar Del Ben
@@ -14,71 +15,99 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2011-03-29 00:00:00 +02:00
-default_executable:
+date: 2011-09-16 00:00:00 +02:00
+default_executable: rawler
 dependencies:
 - !ruby/object:Gem::Dependency
-  name: nokogiri
   prerelease: false
+  type: :runtime
   requirement: &id001 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
+        hash: 3
         segments:
         - 0
         version: "0"
-  type: :runtime
+  name: nokogiri
   version_requirements: *id001
 - !ruby/object:Gem::Dependency
-  name: rubyforge
   prerelease: false
+  type: :development
   requirement: &id002 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
+        hash: 3
         segments:
-        - 2
         - 0
-        - 4
-        version: 2.0.4
-  type: :development
+        version: "0"
+  name: shoulda
   version_requirements: *id002
 - !ruby/object:Gem::Dependency
-  name: hoe
   prerelease: false
+  type: :development
   requirement: &id003 !ruby/object:Gem::Requirement
     none: false
     requirements:
-    - - ">="
+    - - ~>
+      - !ruby/object:Gem::Version
+        hash: 23
+        segments:
+        - 1
+        - 0
+        - 0
+        version: 1.0.0
+  name: bundler
+  version_requirements: *id003
+- !ruby/object:Gem::Dependency
+  prerelease: false
+  type: :development
+  requirement: &id004 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ~>
       - !ruby/object:Gem::Version
+        hash: 7
         segments:
-        - 2
+        - 1
         - 6
-        - 2
-        version: 2.6.2
+        - 4
+        version: 1.6.4
+  name: jeweler
+  version_requirements: *id004
+- !ruby/object:Gem::Dependency
+  prerelease: false
   type: :development
-  version_requirements: *id003
-description: |-
-  Rawler is a Ruby library that crawls your website and checks the status code for each of your links. Useful for finding dead links.
-  Rawler will only parse pages with content type 'text/html', but it will check for the response code of every link.
-email:
-- info@oscardelben.com
+  requirement: &id005 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        hash: 3
+        segments:
+        - 0
+        version: "0"
+  name: rcov
+  version_requirements: *id005
+description: Rawler is a tool that crawls the links of your website
+email: info@oscardelben.com
 executables:
 - rawler
 extensions: []
 extra_rdoc_files:
-- Manifest.txt
-- README.txt
+- LICENSE.txt
+- README.rdoc
 files:
 - Gemfile
 - Gemfile.lock
-- Manifest.txt
-- README.txt
+- LICENSE.txt
+- README.rdoc
 - Rakefile
+- VERSION
 - bin/rawler
 - lib/rawler.rb
 - lib/rawler/base.rb
@@ -92,17 +121,16 @@ files:
 - spec/lib/rawler_spec.rb
 - spec/spec.opts
 - spec/spec_helper.rb
-- specs.watchr
-- tasks/rspec.rake
+- test/helper.rb
+- test/test_rawler.rb
 - vendor/lib-trollop.rb
 has_rdoc: true
 homepage: http://github.com/oscardelben/rawler
-licenses: []
+licenses:
+- MIT
 post_install_message:
-rdoc_options:
-- --main
-- README.txt
+rdoc_options: []
 require_paths:
 - lib
 required_ruby_version: !ruby/object:Gem::Requirement
@@ -110,6 +138,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
+      hash: 3
       segments:
       - 0
       version: "0"
@@ -118,15 +147,16 @@ required_rubygems_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
+      hash: 3
       segments:
       - 0
       version: "0"
 requirements: []
-rubyforge_project: oscardelben
-rubygems_version: 1.3.7
+rubyforge_project:
+rubygems_version: 1.6.2
 signing_key:
 specification_version: 3
-summary: Rawler is a Ruby library that crawls your website and checks the status code for each of your links
+summary: Rawler is a tool that crawls the links of your website
 test_files: []

data/Manifest.txt DELETED

@@ -1,21 +0,0 @@
-Gemfile
-Gemfile.lock
-Manifest.txt
-README.txt
-Rakefile
-bin/rawler
-lib/rawler.rb
-lib/rawler/base.rb
-lib/rawler/core_extensions.rb
-lib/rawler/core_extensions/module.rb
-lib/rawler/crawler.rb
-lib/rawler/request.rb
-spec/lib/base_spec.rb
-spec/lib/rawler/base_spec.rb
-spec/lib/rawler/crawler_spec.rb
-spec/lib/rawler_spec.rb
-spec/spec.opts
-spec/spec_helper.rb
-specs.watchr
-tasks/rspec.rake
-vendor/lib-trollop.rb

data/specs.watchr DELETED

@@ -1,58 +0,0 @@
-# Run me with:
-#
-#   $ watchr specs.watchr
-# --------------------------------------------------
-# Convenience Methods
-# --------------------------------------------------
-def all_test_files
-  Dir['spec/**/*_spec.rb']
-end
-def run_test_matching(thing_to_match)
-  matches = all_test_files.grep(/#{thing_to_match}/i)
-  if matches.empty?
-    puts "Sorry, thanks for playing, but there were no matches for #{thing_to_match}"
-  else
-    run matches.join(' ')
-  end
-end
-def run(files_to_run)
-  puts("Running: #{files_to_run}")
-  system("clear;rspec -cfs #{files_to_run}")
-  no_int_for_you
-end
-def run_all_tests
-  run(all_test_files.join(' '))
-end
-# --------------------------------------------------
-# Watchr Rules
-# --------------------------------------------------
-watch('^spec/(.*)_spec\.rb'  )   { |m| run_test_matching(m[1]) }
-watch('^lib/(.*)\.rb'               )   { |m| run_test_matching(m[1]) }
-watch('^spec/spec_helper\.rb')   { run_all_tests }
-# --------------------------------------------------
-# Signal Handling
-# --------------------------------------------------
-def no_int_for_you
-  @sent_an_int = nil
-end
-Signal.trap 'INT' do
-  if @sent_an_int then
-    puts "   A second INT?  Ok, I get the message.  Shutting down now."
-    exit
-  else
-    puts "   Did you just send me an INT? Ugh.  I'll quit for real if you do it again."
-    @sent_an_int = true
-    Kernel.sleep 1.5
-    run_all_tests
-  end
-end
-# vim:ft=ruby

data/tasks/rspec.rake DELETED

@@ -1,21 +0,0 @@
-begin
-  require 'spec'
-rescue LoadError
-  require 'rubygems' unless ENV['NO_RUBYGEMS']
-  require 'spec'
-end
-begin
-  require 'spec/rake/spectask'
-rescue LoadError
-  puts <<-EOS
-To use rspec for testing you must install rspec gem:
-    gem install rspec
-EOS
-  exit(0)
-end
-desc "Run the specs under spec/models"
-Spec::Rake::SpecTask.new do |t|
-  t.spec_opts = ['--options', "spec/spec.opts"]
-  t.spec_files = FileList['spec/**/*_spec.rb']
-end