rawler 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile CHANGED
@@ -1,9 +1,16 @@
1
1
  source "http://rubygems.org"
2
+ # Add dependencies required to use your gem here.
3
+ # Example:
4
+ # gem "activesupport", ">= 2.3.5"
2
5
 
3
- gem "nokogiri", "1.4.4"
6
+ # Add dependencies to develop your gem here.
7
+ # Include everything needed to run rake, tests, features, etc.
8
+ gem 'nokogiri'
4
9
 
5
- group :development, :test do
6
- gem "hoe", "2.6.2"
7
- gem "rspec", "2.4.0"
8
- gem "fakeweb", "1.3.0"
10
+
11
+ group :development do
12
+ gem "shoulda", ">= 0"
13
+ gem "bundler", "~> 1.0.0"
14
+ gem "jeweler", "~> 1.6.4"
15
+ gem "rcov", ">= 0"
9
16
  end
@@ -1,30 +1,22 @@
1
1
  GEM
2
2
  remote: http://rubygems.org/
3
3
  specs:
4
- diff-lcs (1.1.2)
5
- fakeweb (1.3.0)
6
- hoe (2.6.2)
7
- rake (>= 0.8.7)
8
- rubyforge (>= 2.0.4)
9
- json_pure (1.5.1)
10
- nokogiri (1.4.4)
11
- rake (0.8.7)
12
- rspec (2.4.0)
13
- rspec-core (~> 2.4.0)
14
- rspec-expectations (~> 2.4.0)
15
- rspec-mocks (~> 2.4.0)
16
- rspec-core (2.4.0)
17
- rspec-expectations (2.4.0)
18
- diff-lcs (~> 1.1.2)
19
- rspec-mocks (2.4.0)
20
- rubyforge (2.0.4)
21
- json_pure (>= 1.1.7)
4
+ git (1.2.5)
5
+ jeweler (1.6.4)
6
+ bundler (~> 1.0)
7
+ git (>= 1.2.5)
8
+ rake
9
+ nokogiri (1.5.0)
10
+ rake (0.9.2)
11
+ rcov (0.9.9)
12
+ shoulda (2.11.3)
22
13
 
23
14
  PLATFORMS
24
15
  ruby
25
16
 
26
17
  DEPENDENCIES
27
- fakeweb (= 1.3.0)
28
- hoe (= 2.6.2)
29
- nokogiri (= 1.4.4)
30
- rspec (= 2.4.0)
18
+ bundler (~> 1.0.0)
19
+ jeweler (~> 1.6.4)
20
+ nokogiri
21
+ rcov
22
+ shoulda
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2011 Oscar Del Ben
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -1,14 +1,10 @@
1
- = rawler
2
-
3
- * http://github.com/oscardelben/rawler
4
-
5
- == DESCRIPTION:
1
+ ### rawler
6
2
 
7
3
  Rawler is a Ruby library that crawls your website and checks the status code for each of your links. Useful for finding dead links.
8
4
 
9
5
  Rawler will only parse pages with content type 'text/html', but it will check for the response code of every link.
10
6
 
11
- == SYNOPSIS:
7
+ ### SYNOPSIS:
12
8
 
13
9
  rawler http://example.com [options]
14
10
 
@@ -19,40 +15,15 @@ Rawler will only parse pages with content type 'text/html', but it will check fo
19
15
  --version, -v: Print version and exit
20
16
  --help, -h: Show this message
21
17
 
22
- == INSTALL:
18
+ ### INSTALL:
23
19
 
24
20
  gem install rawler
25
21
 
26
- == DEVELOPMENT:
27
-
28
- Run bundle install to install everything you need
29
-
30
- rake test
31
-
32
- To package and run the gem locally:
33
-
34
- rake package
35
- cd pkg
36
- gem install rawler-#{version}.gem
37
-
38
- If you add files, run:
39
-
40
- rake check_manifest
41
-
42
- And add them to the Manifest file.
43
-
44
- == CONTRIBUTORS:
45
-
46
- * bcoob
47
- * Hugh Sasse
48
- * Ken Egozi
49
- * Robert Glaser
50
- * Stefan Schüßler
51
- * Vesa Vänskä
22
+ ### CONTRIBUTORS:
52
23
 
53
- See also https://github.com/oscardelben/rawler/contributors
24
+ Many. See [https://github.com/oscardelben/rawler/contributors](https://github.com/oscardelben/rawler/contributors)
54
25
 
55
- == LICENSE:
26
+ ### LICENSE:
56
27
 
57
28
  (The MIT License)
58
29
 
data/Rakefile CHANGED
@@ -1,35 +1,69 @@
1
- # -*- ruby -*-
1
+ # encoding: utf-8
2
2
 
3
3
  require 'rubygems'
4
- require 'hoe'
4
+ require 'bundler'
5
+ require 'fileutils'
5
6
 
6
- # require 'bundler'
7
- # Bundler::GemHelper.install_tasks
7
+ begin
8
+ Bundler.setup(:default, :development)
9
+ rescue Bundler::BundlerError => e
10
+ $stderr.puts e.message
11
+ $stderr.puts "Run `bundle install` to install missing gems"
12
+ exit e.status_code
13
+ end
14
+ require 'rake'
8
15
 
9
- require 'rspec/core/rake_task'
10
- RSpec::Core::RakeTask.new(:test)
16
+ require 'jeweler'
17
+ Jeweler::Tasks.new do |gem|
18
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
19
+ gem.name = "rawler"
20
+ gem.homepage = "http://github.com/oscardelben/rawler"
21
+ gem.license = "MIT"
22
+ gem.summary = %Q{Rawler is a tool that crawls the links of your website}
23
+ gem.description = %Q{Rawler is a tool that crawls the links of your website}
24
+ gem.email = "info@oscardelben.com"
25
+ gem.authors = ["Oscar Del Ben"]
26
+ gem.executables = ['rawler']
27
+ # dependencies defined in Gemfile
28
+ end
29
+ Jeweler::RubygemsDotOrgTasks.new
11
30
 
12
- # Hoe.plugin :compiler
13
- # Hoe.plugin :gem_prelude_sucks
14
- # Hoe.plugin :inline
15
- # Hoe.plugin :racc
16
- # Hoe.plugin :rubyforge
31
+ require 'rake/testtask'
32
+ Rake::TestTask.new(:test) do |test|
33
+ test.libs << 'lib' << 'test'
34
+ test.pattern = 'test/**/test_*.rb'
35
+ test.verbose = true
36
+ end
17
37
 
18
- Hoe.spec 'rawler' do
19
- # HEY! If you fill these out in ~/.hoe_template/Rakefile.erb then
20
- # you'll never have to touch them again!
21
- # (delete this comment too, of course)
38
+ require 'rcov/rcovtask'
39
+ Rcov::RcovTask.new do |test|
40
+ test.libs << 'test'
41
+ test.pattern = 'test/**/test_*.rb'
42
+ test.verbose = true
43
+ test.rcov_opts << '--exclude "gems/*"'
44
+ end
22
45
 
23
- developer('Oscar Del Ben', 'info@oscardelben.com')
46
+ task :default => :test
24
47
 
25
- self.rubyforge_name = 'oscardelben'
26
-
27
- extra_deps << ['nokogiri']
28
- end
48
+ require 'rake/rdoctask'
49
+ Rake::RDocTask.new do |rdoc|
50
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
29
51
 
30
- desc 'Console'
31
- task :console do
32
- exec 'irb -rubygems -I lib -r rawler.rb'
52
+ rdoc.rdoc_dir = 'rdoc'
53
+ rdoc.title = "rawler #{version}"
54
+ rdoc.rdoc_files.include('README*')
55
+ rdoc.rdoc_files.include('lib/**/*.rb')
33
56
  end
34
57
 
35
- # vim: syntax=ruby
58
+ desc 'generate docs'
59
+ task :rocco do
60
+ #%x!rm -r html/*!
61
+
62
+ Dir.chdir "lib"
63
+
64
+ files = Dir['**/*.*']
65
+
66
+ files.each do |file|
67
+ %x!rocco #{file} -o ../html!
68
+ end
69
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.1
data/bin/rawler CHANGED
@@ -4,7 +4,7 @@ require 'rawler'
4
4
  require File.join(File.dirname(__FILE__), '..', '/vendor/lib-trollop.rb')
5
5
 
6
6
  opts = Trollop::options do
7
- version "rawler 0.0.3 (c) 2011 Oscar Del Ben"
7
+ version "rawler #{Rawler::VERSION} (c) 2011 Oscar Del Ben"
8
8
  banner <<-EOS
9
9
  Rawler is a command line utility for parsing links on a website
10
10
 
@@ -1,24 +1,74 @@
1
+ # **Rawler** is a command line tool for finding broken links on your website.
2
+ # You can install Rawler by running:
3
+ #
4
+ # gem install rawler
5
+ #
6
+ # To use Rawler type:
7
+ #
8
+ # rawler example.com
9
+ #
10
+ # Type `rawler -h` to see all the available options (including basic auth support).
11
+
12
+ #### Prerequisites
13
+
14
+
1
15
  require 'rubygems'
2
- require 'net/http'
16
+
17
+ # We use [net/https](http://www.ruby-doc.org/stdlib/libdoc/net/http/rdoc/index.html) for making requests.
18
+
3
19
  require 'net/https'
20
+
21
+ # We use [nokogiri](http://nokogiri.org/) for parsing web pages.
22
+
4
23
  require 'nokogiri'
24
+
25
+ # We use the [logger](http://www.ruby-doc.org/stdlib/libdoc/logger/rdoc/) utility for handling the output.
26
+
5
27
  require 'logger'
6
28
 
29
+ # We require [rawler/core_extensions](rawler/core_extensions.html) which includes some core extensions we need.
30
+
7
31
  require 'rawler/core_extensions'
8
32
 
33
+ #### The Rawler module
34
+
35
+ # The Rawler module itself is very simple, and it's only used for storing configuration data like the url that we want to fetch, basic username and password.
36
+
9
37
  module Rawler
10
- VERSION = '0.1.0'
38
+ VERSION = '0.1.1'
11
39
 
40
+ # `output` is where we want to direct output. It's set to `$stdout` by default.
41
+
12
42
  mattr_accessor :output
43
+
44
+ # `url` is the url that we want to fetch. We need to keep track of it when parsing other pages to see if they are of the same domain.
45
+
13
46
  mattr_accessor :url
47
+
48
+ # The time we wait between requests, default 3. We don't want to send too many requests to your website!
49
+
14
50
  mattr_accessor :wait
15
51
 
52
+ # Username and Password for basic auth, if needed.
53
+
16
54
  mattr_accessor :username, :password
17
55
 
56
+ # Here we autoload when needed the specific namespaces.
57
+
58
+ # [Rawler::Base](rawler/base.html) is responsible for validating all the pages in a domain. It's where all the magic happens.
59
+
18
60
  autoload :Base, "rawler/base"
61
+
62
+ # [Rawler::Crawler](rawler/crawler.html) is responsible for parsing links inside a page.
63
+
19
64
  autoload :Crawler, "rawler/crawler"
65
+
66
+ # [Rawler::Request](rawler/reqeust.html) contains some helper methods for performing requests.
67
+
20
68
  autoload :Request, "rawler/request"
21
69
 
70
+ # We overwrite url= to automatically add `http://` if needed so that you can simply type `rawler example.com` in the command line.
71
+
22
72
  def self.url=(url)
23
73
  url.strip!
24
74
 
@@ -1,32 +1,50 @@
1
+ #### Rawler workflow
2
+
3
+ # `Rawler::Base` is where all the heavy work is being made.
4
+ # When you call `rawler somesite.com`, we create an instance of Rawler::Base and then call `validate`, which recursively validates all the links relative to the domain that we specified.
5
+
1
6
  module Rawler
2
7
 
3
8
  class Base
4
9
 
10
+ # `responses` is used to keep track of which links we have already parsed, so that we wont parse them again and again.
11
+ # TODO: rename `responses` to something more meaningful.
12
+
5
13
  attr_accessor :responses
6
14
 
15
+ # When we instantiate `Rawler::Base` we set some options according to what you specified on the command line.
16
+
7
17
  def initialize(url, output, options={})
8
18
  @responses = {}
9
19
 
10
20
  Rawler.url = URI.escape(url)
21
+ output.sync = true
11
22
  Rawler.output = Logger.new(output)
12
23
  Rawler.username = options[:username]
13
24
  Rawler.password = options[:password]
14
25
  Rawler.wait = options[:wait].to_i
15
26
  end
16
27
 
28
+ # The method used to start the real validation process
29
+
17
30
  def validate
18
31
  validate_links_in_page(Rawler.url)
19
32
  end
20
33
 
21
34
  private
22
35
 
23
- def validate_links_in_page(current_url)
24
- Rawler::Crawler.new(current_url).links.each do |page_url|
25
- validate_page(page_url, current_url)
36
+ # We ask [Rawler::Crawler](crawler.html) for all the links in page and then validate each of them individually.
37
+ # We then sleep for the value of `Rawler.wait` (default 3) between each request to avoid dossing your server.
38
+
39
+ def validate_links_in_page(page)
40
+ Rawler::Crawler.new(page).links.each do |page_url|
41
+ validate_page(page_url, page)
26
42
  sleep(Rawler.wait)
27
43
  end
28
44
  end
29
45
 
46
+ # If we haven't validated the page yet, we check its status code and then validate all the links in the page if it's in the same domain
47
+
30
48
  def validate_page(page_url, from_url)
31
49
  if not_yet_parsed?(page_url)
32
50
  add_status_code(page_url, from_url)
@@ -34,20 +52,33 @@ module Rawler
34
52
  end
35
53
  end
36
54
 
55
+ # This is where we check the specific page status.
56
+
37
57
  def add_status_code(link, from_url)
38
58
  response = Rawler::Request.get(link)
39
59
 
60
+ # We follow a redirect if necessary.
61
+
40
62
  validate_page(response['Location'], from_url) if response['Location']
41
63
 
64
+ # We inform the user about what we got.
65
+
42
66
  record_response(response.code, link, from_url, response['Location'])
67
+
68
+ # We add the current page to `responses` to avoid parsing it again/
69
+
43
70
  responses[link] = { :status => response.code.to_i }
44
71
  rescue Errno::ECONNREFUSED
45
72
  error("Connection refused - #{link} - Called from: #{from_url}")
46
73
  rescue Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, Errno::ETIMEDOUT,
47
74
  EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError, Net::ProtocolError, SocketError
48
75
  error("Connection problems - #{link} - Called from: #{from_url}")
76
+ rescue Exception
77
+ error("Unknown error - #{link} - Called from: #{from_url}")
49
78
  end
50
79
 
80
+ # Some helper methods
81
+
51
82
  def same_domain?(link)
52
83
  URI.parse(Rawler.url).host == URI.parse(link).host
53
84
  end
@@ -59,21 +90,30 @@ module Rawler
59
90
  def error(message)
60
91
  Rawler.output.error(message)
61
92
  end
93
+
94
+ # We use this method to inform the user of a page status
62
95
 
63
96
  def record_response(code, link, from_url, redirection=nil)
97
+
98
+ # By default, we just give the status code and the page url
99
+
64
100
  message = "#{code} - #{link}"
65
101
 
102
+ # If the status code is more or equal than 300, we also add which url linked the current page
103
+
66
104
  if code.to_i >= 300
67
105
  message += " - Called from: #{from_url}"
68
106
  end
107
+
108
+ # We add information about redirects, if a redirect was set
69
109
 
70
110
  message += " - Following redirection to: #{redirection}" if redirection
71
111
 
112
+ # Depending on the status code, we use a different method of logger.
113
+
72
114
  code = code.to_i
73
115
  case code / 100
74
116
  when 1
75
- # TODO: check that if a 100 is received
76
- # then there is another status code as well
77
117
  Rawler.output.info(message)
78
118
  when 2 then
79
119
  Rawler.output.info(message)
@@ -1 +1,3 @@
1
- require 'rawler/core_extensions/module'
1
+ # We load the [module](core_extensions/module.html) core extension
2
+
3
+ require 'rawler/core_extensions/module'
@@ -1,3 +1,5 @@
1
+ # Add `attr_accessor` like methods to modules
2
+
1
3
  class Module
2
4
  def mattr_reader(*syms)
3
5
  syms.each do |sym|
@@ -42,4 +44,4 @@ class Module
42
44
  mattr_reader(*syms)
43
45
  mattr_writer(*syms)
44
46
  end
45
- end
47
+ end
@@ -1,39 +1,62 @@
1
+ # `Rawler::Crawler` is responsible for parsing links inside a page
2
+
1
3
  module Rawler
2
4
 
3
5
  class Crawler
4
6
 
5
- attr_accessor :url, :links
7
+ # An instance of Rawler::Crawler has a url which represents the url for which we want to parse links.
8
+
9
+ attr_accessor :url
10
+
11
+ # We want to skip some kind of formats
6
12
 
7
13
  SKIP_FORMATS = /^(javascript|mailto)/
14
+
15
+ # To use this class, just pass it a url
8
16
 
9
17
  def initialize(url)
10
18
  @url = url.strip
11
19
  end
20
+
21
+ # And then call `links` to get its links.
12
22
 
13
23
  def links
24
+ # If the url is different than the main Rawler.url, or if the page is not html, we return an empty array
14
25
  if different_domain?(url, Rawler.url) || not_html?(url)
15
26
  return []
16
27
  end
17
28
 
29
+ # Otherwise we fetch the page
30
+
18
31
  response = Rawler::Request.get(url)
32
+
33
+ # And kindly ask nokogiri to convert it for us
19
34
 
20
35
  doc = Nokogiri::HTML(response.body)
36
+
37
+ # We then do some magic, search all the links in the document that contain a valid link, and return them.
21
38
  doc.css('a').map { |a| a['href'] }.select { |url| !url.nil? }.map { |url| absolute_url(url) }.select { |url| valid_url?(url) }
22
- rescue Errno::ECONNREFUSED # TODO: add called from
39
+ rescue Errno::ECONNREFUSED
23
40
  write("Couldn't connect to #{url}")
24
41
  []
25
- rescue Errno::ETIMEDOUT # TODO: add called from
42
+ rescue Errno::ETIMEDOUT
26
43
  write("Connection to #{url} timed out")
27
44
  []
28
45
  end
29
46
 
30
47
  private
31
48
 
49
+ # Here's how we transform a relative url to an absolute url
50
+
32
51
  def absolute_url(path)
52
+ # First, encode the url
33
53
  path = URI.encode(path.strip, Regexp.new("[^#{URI::PATTERN::UNRESERVED}#{URI::PATTERN::RESERVED}#]"))
54
+
55
+ # if the url contains a scheme that means it's already absolute
34
56
  if URI.parse(path).scheme
35
57
  path
36
58
  else
59
+ # Otherwise we merge `url` to get the absolute url
37
60
  URI.parse(url).merge(path).to_s
38
61
  end
39
62
  rescue URI::InvalidURIError
@@ -41,7 +64,8 @@ module Rawler
41
64
  nil
42
65
  end
43
66
 
44
- # TODO: add 'called from in a more pragmatic way as an optional parameter
67
+ # Some helper methods
68
+
45
69
  def write(message)
46
70
  Rawler.output.error(message)
47
71
  end
@@ -1,3 +1,5 @@
1
+ # `Rawler::Request` contains some abstraction for making web requests, like automatically adding ssl and basic auth.
2
+
1
3
  module Rawler
2
4
 
3
5
  class Request
@@ -32,4 +34,4 @@ module Rawler
32
34
 
33
35
  end
34
36
 
35
- end
37
+ end
@@ -135,6 +135,18 @@ describe Rawler::Base do
135
135
  rawler.send(:add_status_code, url, from)
136
136
  end
137
137
  end
138
+
139
+ it "should rescue from general errors" do
140
+ url = 'http://example.com'
141
+ from = 'http://other.com'
142
+
143
+ Rawler::Request.should_receive(:get).and_raise
144
+
145
+ output.should_receive(:error).with("Unknown error - #{url} - Called from: #{from}")
146
+
147
+ rawler.send(:add_status_code, url, from)
148
+ end
149
+
138
150
 
139
151
  end
140
152
 
@@ -0,0 +1,18 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'test/unit'
11
+ require 'shoulda'
12
+
13
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
14
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
15
+ require 'rawler'
16
+
17
+ class Test::Unit::TestCase
18
+ end
@@ -0,0 +1,7 @@
1
+ require 'helper'
2
+
3
+ class TestRawler < Test::Unit::TestCase
4
+ should "probably rename this file and start testing for real" do
5
+ flunk "hey buddy, you should probably rename this file and start testing for real"
6
+ end
7
+ end
metadata CHANGED
@@ -1,12 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rawler
3
3
  version: !ruby/object:Gem::Version
4
- prerelease: false
4
+ hash: 25
5
+ prerelease:
5
6
  segments:
6
7
  - 0
7
8
  - 1
8
- - 0
9
- version: 0.1.0
9
+ - 1
10
+ version: 0.1.1
10
11
  platform: ruby
11
12
  authors:
12
13
  - Oscar Del Ben
@@ -14,71 +15,99 @@ autorequire:
14
15
  bindir: bin
15
16
  cert_chain: []
16
17
 
17
- date: 2011-03-29 00:00:00 +02:00
18
- default_executable:
18
+ date: 2011-09-16 00:00:00 +02:00
19
+ default_executable: rawler
19
20
  dependencies:
20
21
  - !ruby/object:Gem::Dependency
21
- name: nokogiri
22
22
  prerelease: false
23
+ type: :runtime
23
24
  requirement: &id001 !ruby/object:Gem::Requirement
24
25
  none: false
25
26
  requirements:
26
27
  - - ">="
27
28
  - !ruby/object:Gem::Version
29
+ hash: 3
28
30
  segments:
29
31
  - 0
30
32
  version: "0"
31
- type: :runtime
33
+ name: nokogiri
32
34
  version_requirements: *id001
33
35
  - !ruby/object:Gem::Dependency
34
- name: rubyforge
35
36
  prerelease: false
37
+ type: :development
36
38
  requirement: &id002 !ruby/object:Gem::Requirement
37
39
  none: false
38
40
  requirements:
39
41
  - - ">="
40
42
  - !ruby/object:Gem::Version
43
+ hash: 3
41
44
  segments:
42
- - 2
43
45
  - 0
44
- - 4
45
- version: 2.0.4
46
- type: :development
46
+ version: "0"
47
+ name: shoulda
47
48
  version_requirements: *id002
48
49
  - !ruby/object:Gem::Dependency
49
- name: hoe
50
50
  prerelease: false
51
+ type: :development
51
52
  requirement: &id003 !ruby/object:Gem::Requirement
52
53
  none: false
53
54
  requirements:
54
- - - ">="
55
+ - - ~>
56
+ - !ruby/object:Gem::Version
57
+ hash: 23
58
+ segments:
59
+ - 1
60
+ - 0
61
+ - 0
62
+ version: 1.0.0
63
+ name: bundler
64
+ version_requirements: *id003
65
+ - !ruby/object:Gem::Dependency
66
+ prerelease: false
67
+ type: :development
68
+ requirement: &id004 !ruby/object:Gem::Requirement
69
+ none: false
70
+ requirements:
71
+ - - ~>
55
72
  - !ruby/object:Gem::Version
73
+ hash: 7
56
74
  segments:
57
- - 2
75
+ - 1
58
76
  - 6
59
- - 2
60
- version: 2.6.2
77
+ - 4
78
+ version: 1.6.4
79
+ name: jeweler
80
+ version_requirements: *id004
81
+ - !ruby/object:Gem::Dependency
82
+ prerelease: false
61
83
  type: :development
62
- version_requirements: *id003
63
- description: |-
64
- Rawler is a Ruby library that crawls your website and checks the status code for each of your links. Useful for finding dead links.
65
-
66
- Rawler will only parse pages with content type 'text/html', but it will check for the response code of every link.
67
- email:
68
- - info@oscardelben.com
84
+ requirement: &id005 !ruby/object:Gem::Requirement
85
+ none: false
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ hash: 3
90
+ segments:
91
+ - 0
92
+ version: "0"
93
+ name: rcov
94
+ version_requirements: *id005
95
+ description: Rawler is a tool that crawls the links of your website
96
+ email: info@oscardelben.com
69
97
  executables:
70
98
  - rawler
71
99
  extensions: []
72
100
 
73
101
  extra_rdoc_files:
74
- - Manifest.txt
75
- - README.txt
102
+ - LICENSE.txt
103
+ - README.rdoc
76
104
  files:
77
105
  - Gemfile
78
106
  - Gemfile.lock
79
- - Manifest.txt
80
- - README.txt
107
+ - LICENSE.txt
108
+ - README.rdoc
81
109
  - Rakefile
110
+ - VERSION
82
111
  - bin/rawler
83
112
  - lib/rawler.rb
84
113
  - lib/rawler/base.rb
@@ -92,17 +121,16 @@ files:
92
121
  - spec/lib/rawler_spec.rb
93
122
  - spec/spec.opts
94
123
  - spec/spec_helper.rb
95
- - specs.watchr
96
- - tasks/rspec.rake
124
+ - test/helper.rb
125
+ - test/test_rawler.rb
97
126
  - vendor/lib-trollop.rb
98
127
  has_rdoc: true
99
128
  homepage: http://github.com/oscardelben/rawler
100
- licenses: []
101
-
129
+ licenses:
130
+ - MIT
102
131
  post_install_message:
103
- rdoc_options:
104
- - --main
105
- - README.txt
132
+ rdoc_options: []
133
+
106
134
  require_paths:
107
135
  - lib
108
136
  required_ruby_version: !ruby/object:Gem::Requirement
@@ -110,6 +138,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
110
138
  requirements:
111
139
  - - ">="
112
140
  - !ruby/object:Gem::Version
141
+ hash: 3
113
142
  segments:
114
143
  - 0
115
144
  version: "0"
@@ -118,15 +147,16 @@ required_rubygems_version: !ruby/object:Gem::Requirement
118
147
  requirements:
119
148
  - - ">="
120
149
  - !ruby/object:Gem::Version
150
+ hash: 3
121
151
  segments:
122
152
  - 0
123
153
  version: "0"
124
154
  requirements: []
125
155
 
126
- rubyforge_project: oscardelben
127
- rubygems_version: 1.3.7
156
+ rubyforge_project:
157
+ rubygems_version: 1.6.2
128
158
  signing_key:
129
159
  specification_version: 3
130
- summary: Rawler is a Ruby library that crawls your website and checks the status code for each of your links
160
+ summary: Rawler is a tool that crawls the links of your website
131
161
  test_files: []
132
162
 
@@ -1,21 +0,0 @@
1
- Gemfile
2
- Gemfile.lock
3
- Manifest.txt
4
- README.txt
5
- Rakefile
6
- bin/rawler
7
- lib/rawler.rb
8
- lib/rawler/base.rb
9
- lib/rawler/core_extensions.rb
10
- lib/rawler/core_extensions/module.rb
11
- lib/rawler/crawler.rb
12
- lib/rawler/request.rb
13
- spec/lib/base_spec.rb
14
- spec/lib/rawler/base_spec.rb
15
- spec/lib/rawler/crawler_spec.rb
16
- spec/lib/rawler_spec.rb
17
- spec/spec.opts
18
- spec/spec_helper.rb
19
- specs.watchr
20
- tasks/rspec.rake
21
- vendor/lib-trollop.rb
@@ -1,58 +0,0 @@
1
- # Run me with:
2
- #
3
- # $ watchr specs.watchr
4
-
5
- # --------------------------------------------------
6
- # Convenience Methods
7
- # --------------------------------------------------
8
- def all_test_files
9
- Dir['spec/**/*_spec.rb']
10
- end
11
-
12
- def run_test_matching(thing_to_match)
13
- matches = all_test_files.grep(/#{thing_to_match}/i)
14
- if matches.empty?
15
- puts "Sorry, thanks for playing, but there were no matches for #{thing_to_match}"
16
- else
17
- run matches.join(' ')
18
- end
19
- end
20
-
21
- def run(files_to_run)
22
- puts("Running: #{files_to_run}")
23
- system("clear;rspec -cfs #{files_to_run}")
24
- no_int_for_you
25
- end
26
-
27
- def run_all_tests
28
- run(all_test_files.join(' '))
29
- end
30
-
31
- # --------------------------------------------------
32
- # Watchr Rules
33
- # --------------------------------------------------
34
- watch('^spec/(.*)_spec\.rb' ) { |m| run_test_matching(m[1]) }
35
- watch('^lib/(.*)\.rb' ) { |m| run_test_matching(m[1]) }
36
- watch('^spec/spec_helper\.rb') { run_all_tests }
37
- # --------------------------------------------------
38
- # Signal Handling
39
- # --------------------------------------------------
40
-
41
- def no_int_for_you
42
- @sent_an_int = nil
43
- end
44
-
45
- Signal.trap 'INT' do
46
- if @sent_an_int then
47
- puts " A second INT? Ok, I get the message. Shutting down now."
48
- exit
49
- else
50
- puts " Did you just send me an INT? Ugh. I'll quit for real if you do it again."
51
- @sent_an_int = true
52
- Kernel.sleep 1.5
53
- run_all_tests
54
- end
55
- end
56
-
57
- # vim:ft=ruby
58
-
@@ -1,21 +0,0 @@
1
- begin
2
- require 'spec'
3
- rescue LoadError
4
- require 'rubygems' unless ENV['NO_RUBYGEMS']
5
- require 'spec'
6
- end
7
- begin
8
- require 'spec/rake/spectask'
9
- rescue LoadError
10
- puts <<-EOS
11
- To use rspec for testing you must install rspec gem:
12
- gem install rspec
13
- EOS
14
- exit(0)
15
- end
16
-
17
- desc "Run the specs under spec/models"
18
- Spec::Rake::SpecTask.new do |t|
19
- t.spec_opts = ['--options', "spec/spec.opts"]
20
- t.spec_files = FileList['spec/**/*_spec.rb']
21
- end