rawler 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile CHANGED
@@ -1,9 +1,16 @@
1
1
  source "http://rubygems.org"
2
+ # Add dependencies required to use your gem here.
3
+ # Example:
4
+ # gem "activesupport", ">= 2.3.5"
2
5
 
3
- gem "nokogiri", "1.4.4"
6
+ # Add dependencies to develop your gem here.
7
+ # Include everything needed to run rake, tests, features, etc.
8
+ gem 'nokogiri'
4
9
 
5
- group :development, :test do
6
- gem "hoe", "2.6.2"
7
- gem "rspec", "2.4.0"
8
- gem "fakeweb", "1.3.0"
10
+
11
+ group :development do
12
+ gem "shoulda", ">= 0"
13
+ gem "bundler", "~> 1.0.0"
14
+ gem "jeweler", "~> 1.6.4"
15
+ gem "rcov", ">= 0"
9
16
  end
@@ -1,30 +1,22 @@
1
1
  GEM
2
2
  remote: http://rubygems.org/
3
3
  specs:
4
- diff-lcs (1.1.2)
5
- fakeweb (1.3.0)
6
- hoe (2.6.2)
7
- rake (>= 0.8.7)
8
- rubyforge (>= 2.0.4)
9
- json_pure (1.5.1)
10
- nokogiri (1.4.4)
11
- rake (0.8.7)
12
- rspec (2.4.0)
13
- rspec-core (~> 2.4.0)
14
- rspec-expectations (~> 2.4.0)
15
- rspec-mocks (~> 2.4.0)
16
- rspec-core (2.4.0)
17
- rspec-expectations (2.4.0)
18
- diff-lcs (~> 1.1.2)
19
- rspec-mocks (2.4.0)
20
- rubyforge (2.0.4)
21
- json_pure (>= 1.1.7)
4
+ git (1.2.5)
5
+ jeweler (1.6.4)
6
+ bundler (~> 1.0)
7
+ git (>= 1.2.5)
8
+ rake
9
+ nokogiri (1.5.0)
10
+ rake (0.9.2)
11
+ rcov (0.9.9)
12
+ shoulda (2.11.3)
22
13
 
23
14
  PLATFORMS
24
15
  ruby
25
16
 
26
17
  DEPENDENCIES
27
- fakeweb (= 1.3.0)
28
- hoe (= 2.6.2)
29
- nokogiri (= 1.4.4)
30
- rspec (= 2.4.0)
18
+ bundler (~> 1.0.0)
19
+ jeweler (~> 1.6.4)
20
+ nokogiri
21
+ rcov
22
+ shoulda
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2011 Oscar Del Ben
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -1,14 +1,10 @@
1
- = rawler
2
-
3
- * http://github.com/oscardelben/rawler
4
-
5
- == DESCRIPTION:
1
+ ### rawler
6
2
 
7
3
  Rawler is a Ruby library that crawls your website and checks the status code for each of your links. Useful for finding dead links.
8
4
 
9
5
  Rawler will only parse pages with content type 'text/html', but it will check for the response code of every link.
10
6
 
11
- == SYNOPSIS:
7
+ ### SYNOPSIS:
12
8
 
13
9
  rawler http://example.com [options]
14
10
 
@@ -19,40 +15,15 @@ Rawler will only parse pages with content type 'text/html', but it will check fo
19
15
  --version, -v: Print version and exit
20
16
  --help, -h: Show this message
21
17
 
22
- == INSTALL:
18
+ ### INSTALL:
23
19
 
24
20
  gem install rawler
25
21
 
26
- == DEVELOPMENT:
27
-
28
- Run bundle install to install everything you need
29
-
30
- rake test
31
-
32
- To package and run the gem locally:
33
-
34
- rake package
35
- cd pkg
36
- gem install rawler-#{version}.gem
37
-
38
- If you add files, run:
39
-
40
- rake check_manifest
41
-
42
- And add them to the Manifest file.
43
-
44
- == CONTRIBUTORS:
45
-
46
- * bcoob
47
- * Hugh Sasse
48
- * Ken Egozi
49
- * Robert Glaser
50
- * Stefan Schüßler
51
- * Vesa Vänskä
22
+ ### CONTRIBUTORS:
52
23
 
53
- See also https://github.com/oscardelben/rawler/contributors
24
+ Many. See [https://github.com/oscardelben/rawler/contributors](https://github.com/oscardelben/rawler/contributors)
54
25
 
55
- == LICENSE:
26
+ ### LICENSE:
56
27
 
57
28
  (The MIT License)
58
29
 
data/Rakefile CHANGED
@@ -1,35 +1,69 @@
1
- # -*- ruby -*-
1
+ # encoding: utf-8
2
2
 
3
3
  require 'rubygems'
4
- require 'hoe'
4
+ require 'bundler'
5
+ require 'fileutils'
5
6
 
6
- # require 'bundler'
7
- # Bundler::GemHelper.install_tasks
7
+ begin
8
+ Bundler.setup(:default, :development)
9
+ rescue Bundler::BundlerError => e
10
+ $stderr.puts e.message
11
+ $stderr.puts "Run `bundle install` to install missing gems"
12
+ exit e.status_code
13
+ end
14
+ require 'rake'
8
15
 
9
- require 'rspec/core/rake_task'
10
- RSpec::Core::RakeTask.new(:test)
16
+ require 'jeweler'
17
+ Jeweler::Tasks.new do |gem|
18
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
19
+ gem.name = "rawler"
20
+ gem.homepage = "http://github.com/oscardelben/rawler"
21
+ gem.license = "MIT"
22
+ gem.summary = %Q{Rawler is a tool that crawls the links of your website}
23
+ gem.description = %Q{Rawler is a tool that crawls the links of your website}
24
+ gem.email = "info@oscardelben.com"
25
+ gem.authors = ["Oscar Del Ben"]
26
+ gem.executables = ['rawler']
27
+ # dependencies defined in Gemfile
28
+ end
29
+ Jeweler::RubygemsDotOrgTasks.new
11
30
 
12
- # Hoe.plugin :compiler
13
- # Hoe.plugin :gem_prelude_sucks
14
- # Hoe.plugin :inline
15
- # Hoe.plugin :racc
16
- # Hoe.plugin :rubyforge
31
+ require 'rake/testtask'
32
+ Rake::TestTask.new(:test) do |test|
33
+ test.libs << 'lib' << 'test'
34
+ test.pattern = 'test/**/test_*.rb'
35
+ test.verbose = true
36
+ end
17
37
 
18
- Hoe.spec 'rawler' do
19
- # HEY! If you fill these out in ~/.hoe_template/Rakefile.erb then
20
- # you'll never have to touch them again!
21
- # (delete this comment too, of course)
38
+ require 'rcov/rcovtask'
39
+ Rcov::RcovTask.new do |test|
40
+ test.libs << 'test'
41
+ test.pattern = 'test/**/test_*.rb'
42
+ test.verbose = true
43
+ test.rcov_opts << '--exclude "gems/*"'
44
+ end
22
45
 
23
- developer('Oscar Del Ben', 'info@oscardelben.com')
46
+ task :default => :test
24
47
 
25
- self.rubyforge_name = 'oscardelben'
26
-
27
- extra_deps << ['nokogiri']
28
- end
48
+ require 'rake/rdoctask'
49
+ Rake::RDocTask.new do |rdoc|
50
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
29
51
 
30
- desc 'Console'
31
- task :console do
32
- exec 'irb -rubygems -I lib -r rawler.rb'
52
+ rdoc.rdoc_dir = 'rdoc'
53
+ rdoc.title = "rawler #{version}"
54
+ rdoc.rdoc_files.include('README*')
55
+ rdoc.rdoc_files.include('lib/**/*.rb')
33
56
  end
34
57
 
35
- # vim: syntax=ruby
58
+ desc 'generate docs'
59
+ task :rocco do
60
+ #%x!rm -r html/*!
61
+
62
+ Dir.chdir "lib"
63
+
64
+ files = Dir['**/*.*']
65
+
66
+ files.each do |file|
67
+ %x!rocco #{file} -o ../html!
68
+ end
69
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.1
data/bin/rawler CHANGED
@@ -4,7 +4,7 @@ require 'rawler'
4
4
  require File.join(File.dirname(__FILE__), '..', '/vendor/lib-trollop.rb')
5
5
 
6
6
  opts = Trollop::options do
7
- version "rawler 0.0.3 (c) 2011 Oscar Del Ben"
7
+ version "rawler #{Rawler::VERSION} (c) 2011 Oscar Del Ben"
8
8
  banner <<-EOS
9
9
  Rawler is a command line utility for parsing links on a website
10
10
 
@@ -1,24 +1,74 @@
1
+ # **Rawler** is a command line tool for finding broken links on your website.
2
+ # You can install Rawler by running:
3
+ #
4
+ # gem install rawler
5
+ #
6
+ # To use Rawler type:
7
+ #
8
+ # rawler example.com
9
+ #
10
+ # Type `rawler -h` to see all the available options (including basic auth support).
11
+
12
+ #### Prerequisites
13
+
14
+
1
15
  require 'rubygems'
2
- require 'net/http'
16
+
17
+ # We use [net/https](http://www.ruby-doc.org/stdlib/libdoc/net/http/rdoc/index.html) for making requests.
18
+
3
19
  require 'net/https'
20
+
21
+ # We use [nokogiri](http://nokogiri.org/) for parsing web pages.
22
+
4
23
  require 'nokogiri'
24
+
25
+ # We use the [logger](http://www.ruby-doc.org/stdlib/libdoc/logger/rdoc/) utility for handling the output.
26
+
5
27
  require 'logger'
6
28
 
29
+ # We require [rawler/core_extensions](rawler/core_extensions.html) which includes some core extensions we need.
30
+
7
31
  require 'rawler/core_extensions'
8
32
 
33
+ #### The Rawler module
34
+
35
+ # The Rawler module itself is very simple, and it's only used for storing configuration data like the url that we want to fetch, basic username and password.
36
+
9
37
  module Rawler
10
- VERSION = '0.1.0'
38
+ VERSION = '0.1.1'
11
39
 
40
+ # `output` is where we want to direct output. It's set to `$stdout` by default.
41
+
12
42
  mattr_accessor :output
43
+
44
+ # `url` is the url that we want to fetch. We need to keep track of it when parsing other pages to see if they are of the same domain.
45
+
13
46
  mattr_accessor :url
47
+
48
+ # The time we wait between requests, default 3. We don't want to send too many requests to your website!
49
+
14
50
  mattr_accessor :wait
15
51
 
52
+ # Username and Password for basic auth, if needed.
53
+
16
54
  mattr_accessor :username, :password
17
55
 
56
+ # Here we autoload when needed the specific namespaces.
57
+
58
+ # [Rawler::Base](rawler/base.html) is responsible for validating all the pages in a domain. It's where all the magic happens.
59
+
18
60
  autoload :Base, "rawler/base"
61
+
62
+ # [Rawler::Crawler](rawler/crawler.html) is responsible for parsing links inside a page.
63
+
19
64
  autoload :Crawler, "rawler/crawler"
65
+
66
+ # [Rawler::Request](rawler/reqeust.html) contains some helper methods for performing requests.
67
+
20
68
  autoload :Request, "rawler/request"
21
69
 
70
+ # We overwrite url= to automatically add `http://` if needed so that you can simply type `rawler example.com` in the command line.
71
+
22
72
  def self.url=(url)
23
73
  url.strip!
24
74
 
@@ -1,32 +1,50 @@
1
+ #### Rawler workflow
2
+
3
+ # `Rawler::Base` is where all the heavy work is being made.
4
+ # When you call `rawler somesite.com`, we create an instance of Rawler::Base and then call `validate`, which recursively validates all the links relative to the domain that we specified.
5
+
1
6
  module Rawler
2
7
 
3
8
  class Base
4
9
 
10
+ # `responses` is used to keep track of which links we have already parsed, so that we wont parse them again and again.
11
+ # TODO: rename `responses` to something more meaningful.
12
+
5
13
  attr_accessor :responses
6
14
 
15
+ # When we instantiate `Rawler::Base` we set some options according to what you specified on the command line.
16
+
7
17
  def initialize(url, output, options={})
8
18
  @responses = {}
9
19
 
10
20
  Rawler.url = URI.escape(url)
21
+ output.sync = true
11
22
  Rawler.output = Logger.new(output)
12
23
  Rawler.username = options[:username]
13
24
  Rawler.password = options[:password]
14
25
  Rawler.wait = options[:wait].to_i
15
26
  end
16
27
 
28
+ # The method used to start the real validation process
29
+
17
30
  def validate
18
31
  validate_links_in_page(Rawler.url)
19
32
  end
20
33
 
21
34
  private
22
35
 
23
- def validate_links_in_page(current_url)
24
- Rawler::Crawler.new(current_url).links.each do |page_url|
25
- validate_page(page_url, current_url)
36
+ # We ask [Rawler::Crawler](crawler.html) for all the links in page and then validate each of them individually.
37
+ # We then sleep for the value of `Rawler.wait` (default 3) between each request to avoid dossing your server.
38
+
39
+ def validate_links_in_page(page)
40
+ Rawler::Crawler.new(page).links.each do |page_url|
41
+ validate_page(page_url, page)
26
42
  sleep(Rawler.wait)
27
43
  end
28
44
  end
29
45
 
46
+ # If we haven't validated the page yet, we check its status code and then validate all the links in the page if it's in the same domain
47
+
30
48
  def validate_page(page_url, from_url)
31
49
  if not_yet_parsed?(page_url)
32
50
  add_status_code(page_url, from_url)
@@ -34,20 +52,33 @@ module Rawler
34
52
  end
35
53
  end
36
54
 
55
+ # This is where we check the specific page status.
56
+
37
57
  def add_status_code(link, from_url)
38
58
  response = Rawler::Request.get(link)
39
59
 
60
+ # We follow a redirect if necessary.
61
+
40
62
  validate_page(response['Location'], from_url) if response['Location']
41
63
 
64
+ # We inform the user about what we got.
65
+
42
66
  record_response(response.code, link, from_url, response['Location'])
67
+
68
+ # We add the current page to `responses` to avoid parsing it again/
69
+
43
70
  responses[link] = { :status => response.code.to_i }
44
71
  rescue Errno::ECONNREFUSED
45
72
  error("Connection refused - #{link} - Called from: #{from_url}")
46
73
  rescue Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, Errno::ETIMEDOUT,
47
74
  EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError, Net::ProtocolError, SocketError
48
75
  error("Connection problems - #{link} - Called from: #{from_url}")
76
+ rescue Exception
77
+ error("Unknown error - #{link} - Called from: #{from_url}")
49
78
  end
50
79
 
80
+ # Some helper methods
81
+
51
82
  def same_domain?(link)
52
83
  URI.parse(Rawler.url).host == URI.parse(link).host
53
84
  end
@@ -59,21 +90,30 @@ module Rawler
59
90
  def error(message)
60
91
  Rawler.output.error(message)
61
92
  end
93
+
94
+ # We use this method to inform the user of a page status
62
95
 
63
96
  def record_response(code, link, from_url, redirection=nil)
97
+
98
+ # By default, we just give the status code and the page url
99
+
64
100
  message = "#{code} - #{link}"
65
101
 
102
+ # If the status code is more or equal than 300, we also add which url linked the current page
103
+
66
104
  if code.to_i >= 300
67
105
  message += " - Called from: #{from_url}"
68
106
  end
107
+
108
+ # We add information about redirects, if a redirect was set
69
109
 
70
110
  message += " - Following redirection to: #{redirection}" if redirection
71
111
 
112
+ # Depending on the status code, we use a different method of logger.
113
+
72
114
  code = code.to_i
73
115
  case code / 100
74
116
  when 1
75
- # TODO: check that if a 100 is received
76
- # then there is another status code as well
77
117
  Rawler.output.info(message)
78
118
  when 2 then
79
119
  Rawler.output.info(message)
@@ -1 +1,3 @@
1
- require 'rawler/core_extensions/module'
1
+ # We load the [module](core_extensions/module.html) core extension
2
+
3
+ require 'rawler/core_extensions/module'
@@ -1,3 +1,5 @@
1
+ # Add `attr_accessor` like methods to modules
2
+
1
3
  class Module
2
4
  def mattr_reader(*syms)
3
5
  syms.each do |sym|
@@ -42,4 +44,4 @@ class Module
42
44
  mattr_reader(*syms)
43
45
  mattr_writer(*syms)
44
46
  end
45
- end
47
+ end
@@ -1,39 +1,62 @@
1
+ # `Rawler::Crawler` is responsible for parsing links inside a page
2
+
1
3
  module Rawler
2
4
 
3
5
  class Crawler
4
6
 
5
- attr_accessor :url, :links
7
+ # An instance of Rawler::Crawler has a url which represents the url for which we want to parse links.
8
+
9
+ attr_accessor :url
10
+
11
+ # We want to skip some kind of formats
6
12
 
7
13
  SKIP_FORMATS = /^(javascript|mailto)/
14
+
15
+ # To use this class, just pass it a url
8
16
 
9
17
  def initialize(url)
10
18
  @url = url.strip
11
19
  end
20
+
21
+ # And then call `links` to get its links.
12
22
 
13
23
  def links
24
+ # If the url is different than the main Rawler.url, or if the page is not html, we return an empty array
14
25
  if different_domain?(url, Rawler.url) || not_html?(url)
15
26
  return []
16
27
  end
17
28
 
29
+ # Otherwise we fetch the page
30
+
18
31
  response = Rawler::Request.get(url)
32
+
33
+ # And kindly ask nokogiri to convert it for us
19
34
 
20
35
  doc = Nokogiri::HTML(response.body)
36
+
37
+ # We then do some magic, search all the links in the document that contain a valid link, and return them.
21
38
  doc.css('a').map { |a| a['href'] }.select { |url| !url.nil? }.map { |url| absolute_url(url) }.select { |url| valid_url?(url) }
22
- rescue Errno::ECONNREFUSED # TODO: add called from
39
+ rescue Errno::ECONNREFUSED
23
40
  write("Couldn't connect to #{url}")
24
41
  []
25
- rescue Errno::ETIMEDOUT # TODO: add called from
42
+ rescue Errno::ETIMEDOUT
26
43
  write("Connection to #{url} timed out")
27
44
  []
28
45
  end
29
46
 
30
47
  private
31
48
 
49
+ # Here's how we transform a relative url to an absolute url
50
+
32
51
  def absolute_url(path)
52
+ # First, encode the url
33
53
  path = URI.encode(path.strip, Regexp.new("[^#{URI::PATTERN::UNRESERVED}#{URI::PATTERN::RESERVED}#]"))
54
+
55
+ # if the url contains a scheme that means it's already absolute
34
56
  if URI.parse(path).scheme
35
57
  path
36
58
  else
59
+ # Otherwise we merge `url` to get the absolute url
37
60
  URI.parse(url).merge(path).to_s
38
61
  end
39
62
  rescue URI::InvalidURIError
@@ -41,7 +64,8 @@ module Rawler
41
64
  nil
42
65
  end
43
66
 
44
- # TODO: add 'called from in a more pragmatic way as an optional parameter
67
+ # Some helper methods
68
+
45
69
  def write(message)
46
70
  Rawler.output.error(message)
47
71
  end
@@ -1,3 +1,5 @@
1
+ # `Rawler::Request` contains some abstraction for making web requests, like automatically adding ssl and basic auth.
2
+
1
3
  module Rawler
2
4
 
3
5
  class Request
@@ -32,4 +34,4 @@ module Rawler
32
34
 
33
35
  end
34
36
 
35
- end
37
+ end
@@ -135,6 +135,18 @@ describe Rawler::Base do
135
135
  rawler.send(:add_status_code, url, from)
136
136
  end
137
137
  end
138
+
139
+ it "should rescue from general errors" do
140
+ url = 'http://example.com'
141
+ from = 'http://other.com'
142
+
143
+ Rawler::Request.should_receive(:get).and_raise
144
+
145
+ output.should_receive(:error).with("Unknown error - #{url} - Called from: #{from}")
146
+
147
+ rawler.send(:add_status_code, url, from)
148
+ end
149
+
138
150
 
139
151
  end
140
152
 
@@ -0,0 +1,18 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'test/unit'
11
+ require 'shoulda'
12
+
13
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
14
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
15
+ require 'rawler'
16
+
17
+ class Test::Unit::TestCase
18
+ end
@@ -0,0 +1,7 @@
1
+ require 'helper'
2
+
3
+ class TestRawler < Test::Unit::TestCase
4
+ should "probably rename this file and start testing for real" do
5
+ flunk "hey buddy, you should probably rename this file and start testing for real"
6
+ end
7
+ end
metadata CHANGED
@@ -1,12 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rawler
3
3
  version: !ruby/object:Gem::Version
4
- prerelease: false
4
+ hash: 25
5
+ prerelease:
5
6
  segments:
6
7
  - 0
7
8
  - 1
8
- - 0
9
- version: 0.1.0
9
+ - 1
10
+ version: 0.1.1
10
11
  platform: ruby
11
12
  authors:
12
13
  - Oscar Del Ben
@@ -14,71 +15,99 @@ autorequire:
14
15
  bindir: bin
15
16
  cert_chain: []
16
17
 
17
- date: 2011-03-29 00:00:00 +02:00
18
- default_executable:
18
+ date: 2011-09-16 00:00:00 +02:00
19
+ default_executable: rawler
19
20
  dependencies:
20
21
  - !ruby/object:Gem::Dependency
21
- name: nokogiri
22
22
  prerelease: false
23
+ type: :runtime
23
24
  requirement: &id001 !ruby/object:Gem::Requirement
24
25
  none: false
25
26
  requirements:
26
27
  - - ">="
27
28
  - !ruby/object:Gem::Version
29
+ hash: 3
28
30
  segments:
29
31
  - 0
30
32
  version: "0"
31
- type: :runtime
33
+ name: nokogiri
32
34
  version_requirements: *id001
33
35
  - !ruby/object:Gem::Dependency
34
- name: rubyforge
35
36
  prerelease: false
37
+ type: :development
36
38
  requirement: &id002 !ruby/object:Gem::Requirement
37
39
  none: false
38
40
  requirements:
39
41
  - - ">="
40
42
  - !ruby/object:Gem::Version
43
+ hash: 3
41
44
  segments:
42
- - 2
43
45
  - 0
44
- - 4
45
- version: 2.0.4
46
- type: :development
46
+ version: "0"
47
+ name: shoulda
47
48
  version_requirements: *id002
48
49
  - !ruby/object:Gem::Dependency
49
- name: hoe
50
50
  prerelease: false
51
+ type: :development
51
52
  requirement: &id003 !ruby/object:Gem::Requirement
52
53
  none: false
53
54
  requirements:
54
- - - ">="
55
+ - - ~>
56
+ - !ruby/object:Gem::Version
57
+ hash: 23
58
+ segments:
59
+ - 1
60
+ - 0
61
+ - 0
62
+ version: 1.0.0
63
+ name: bundler
64
+ version_requirements: *id003
65
+ - !ruby/object:Gem::Dependency
66
+ prerelease: false
67
+ type: :development
68
+ requirement: &id004 !ruby/object:Gem::Requirement
69
+ none: false
70
+ requirements:
71
+ - - ~>
55
72
  - !ruby/object:Gem::Version
73
+ hash: 7
56
74
  segments:
57
- - 2
75
+ - 1
58
76
  - 6
59
- - 2
60
- version: 2.6.2
77
+ - 4
78
+ version: 1.6.4
79
+ name: jeweler
80
+ version_requirements: *id004
81
+ - !ruby/object:Gem::Dependency
82
+ prerelease: false
61
83
  type: :development
62
- version_requirements: *id003
63
- description: |-
64
- Rawler is a Ruby library that crawls your website and checks the status code for each of your links. Useful for finding dead links.
65
-
66
- Rawler will only parse pages with content type 'text/html', but it will check for the response code of every link.
67
- email:
68
- - info@oscardelben.com
84
+ requirement: &id005 !ruby/object:Gem::Requirement
85
+ none: false
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ hash: 3
90
+ segments:
91
+ - 0
92
+ version: "0"
93
+ name: rcov
94
+ version_requirements: *id005
95
+ description: Rawler is a tool that crawls the links of your website
96
+ email: info@oscardelben.com
69
97
  executables:
70
98
  - rawler
71
99
  extensions: []
72
100
 
73
101
  extra_rdoc_files:
74
- - Manifest.txt
75
- - README.txt
102
+ - LICENSE.txt
103
+ - README.rdoc
76
104
  files:
77
105
  - Gemfile
78
106
  - Gemfile.lock
79
- - Manifest.txt
80
- - README.txt
107
+ - LICENSE.txt
108
+ - README.rdoc
81
109
  - Rakefile
110
+ - VERSION
82
111
  - bin/rawler
83
112
  - lib/rawler.rb
84
113
  - lib/rawler/base.rb
@@ -92,17 +121,16 @@ files:
92
121
  - spec/lib/rawler_spec.rb
93
122
  - spec/spec.opts
94
123
  - spec/spec_helper.rb
95
- - specs.watchr
96
- - tasks/rspec.rake
124
+ - test/helper.rb
125
+ - test/test_rawler.rb
97
126
  - vendor/lib-trollop.rb
98
127
  has_rdoc: true
99
128
  homepage: http://github.com/oscardelben/rawler
100
- licenses: []
101
-
129
+ licenses:
130
+ - MIT
102
131
  post_install_message:
103
- rdoc_options:
104
- - --main
105
- - README.txt
132
+ rdoc_options: []
133
+
106
134
  require_paths:
107
135
  - lib
108
136
  required_ruby_version: !ruby/object:Gem::Requirement
@@ -110,6 +138,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
110
138
  requirements:
111
139
  - - ">="
112
140
  - !ruby/object:Gem::Version
141
+ hash: 3
113
142
  segments:
114
143
  - 0
115
144
  version: "0"
@@ -118,15 +147,16 @@ required_rubygems_version: !ruby/object:Gem::Requirement
118
147
  requirements:
119
148
  - - ">="
120
149
  - !ruby/object:Gem::Version
150
+ hash: 3
121
151
  segments:
122
152
  - 0
123
153
  version: "0"
124
154
  requirements: []
125
155
 
126
- rubyforge_project: oscardelben
127
- rubygems_version: 1.3.7
156
+ rubyforge_project:
157
+ rubygems_version: 1.6.2
128
158
  signing_key:
129
159
  specification_version: 3
130
- summary: Rawler is a Ruby library that crawls your website and checks the status code for each of your links
160
+ summary: Rawler is a tool that crawls the links of your website
131
161
  test_files: []
132
162
 
@@ -1,21 +0,0 @@
1
- Gemfile
2
- Gemfile.lock
3
- Manifest.txt
4
- README.txt
5
- Rakefile
6
- bin/rawler
7
- lib/rawler.rb
8
- lib/rawler/base.rb
9
- lib/rawler/core_extensions.rb
10
- lib/rawler/core_extensions/module.rb
11
- lib/rawler/crawler.rb
12
- lib/rawler/request.rb
13
- spec/lib/base_spec.rb
14
- spec/lib/rawler/base_spec.rb
15
- spec/lib/rawler/crawler_spec.rb
16
- spec/lib/rawler_spec.rb
17
- spec/spec.opts
18
- spec/spec_helper.rb
19
- specs.watchr
20
- tasks/rspec.rake
21
- vendor/lib-trollop.rb
@@ -1,58 +0,0 @@
1
- # Run me with:
2
- #
3
- # $ watchr specs.watchr
4
-
5
- # --------------------------------------------------
6
- # Convenience Methods
7
- # --------------------------------------------------
8
- def all_test_files
9
- Dir['spec/**/*_spec.rb']
10
- end
11
-
12
- def run_test_matching(thing_to_match)
13
- matches = all_test_files.grep(/#{thing_to_match}/i)
14
- if matches.empty?
15
- puts "Sorry, thanks for playing, but there were no matches for #{thing_to_match}"
16
- else
17
- run matches.join(' ')
18
- end
19
- end
20
-
21
- def run(files_to_run)
22
- puts("Running: #{files_to_run}")
23
- system("clear;rspec -cfs #{files_to_run}")
24
- no_int_for_you
25
- end
26
-
27
- def run_all_tests
28
- run(all_test_files.join(' '))
29
- end
30
-
31
- # --------------------------------------------------
32
- # Watchr Rules
33
- # --------------------------------------------------
34
- watch('^spec/(.*)_spec\.rb' ) { |m| run_test_matching(m[1]) }
35
- watch('^lib/(.*)\.rb' ) { |m| run_test_matching(m[1]) }
36
- watch('^spec/spec_helper\.rb') { run_all_tests }
37
- # --------------------------------------------------
38
- # Signal Handling
39
- # --------------------------------------------------
40
-
41
- def no_int_for_you
42
- @sent_an_int = nil
43
- end
44
-
45
- Signal.trap 'INT' do
46
- if @sent_an_int then
47
- puts " A second INT? Ok, I get the message. Shutting down now."
48
- exit
49
- else
50
- puts " Did you just send me an INT? Ugh. I'll quit for real if you do it again."
51
- @sent_an_int = true
52
- Kernel.sleep 1.5
53
- run_all_tests
54
- end
55
- end
56
-
57
- # vim:ft=ruby
58
-
@@ -1,21 +0,0 @@
1
- begin
2
- require 'spec'
3
- rescue LoadError
4
- require 'rubygems' unless ENV['NO_RUBYGEMS']
5
- require 'spec'
6
- end
7
- begin
8
- require 'spec/rake/spectask'
9
- rescue LoadError
10
- puts <<-EOS
11
- To use rspec for testing you must install rspec gem:
12
- gem install rspec
13
- EOS
14
- exit(0)
15
- end
16
-
17
- desc "Run the specs under spec/models"
18
- Spec::Rake::SpecTask.new do |t|
19
- t.spec_opts = ['--options', "spec/spec.opts"]
20
- t.spec_files = FileList['spec/**/*_spec.rb']
21
- end