shelob 0.1.0.beta1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/Gemfile +4 -0
- data/Guardfile +7 -0
- data/LICENSE.txt +22 -0
- data/README.md +29 -0
- data/Rakefile +8 -0
- data/bin/shelob +38 -0
- data/lib/extractor.rb +23 -0
- data/lib/link_result.rb +13 -0
- data/lib/resolver.rb +16 -0
- data/lib/shelob/version.rb +3 -0
- data/lib/shelob.rb +64 -0
- data/shelob.gemspec +28 -0
- data/test/test_extractor.rb +37 -0
- data/test/test_link_result.rb +29 -0
- data/test/test_resolver.rb +31 -0
- data/test/test_shelob.rb +97 -0
- metadata +165 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 4fd39cfeaa059a074821a0d60ee93b464ff48819
|
4
|
+
data.tar.gz: fc04fd19c13c1a970abfed9c126854da9ad6eaeb
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 72b0645723887dfb1942108c93e5cfda5f50de550d5a240f5da0119763fbd02745fdce0f88038b42849b863e54a81cf214db6285badab74eed6dbb082debd8ab
|
7
|
+
data.tar.gz: e53ddc74da61a78b19acde7d9e87859c9682dc48bd044f736f51942d1ee51d79b573b868ee5c72e3c87e9ab1e4e922f6c3760357947a2497492f1b1ce9ecd693
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/Guardfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 Benjamin Nicholas
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# LinkChecker
|
2
|
+
|
3
|
+
TODO: Write a gem description
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
gem 'link_checker'
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install link_checker
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
TODO: Write usage instructions here
|
22
|
+
|
23
|
+
## Contributing
|
24
|
+
|
25
|
+
1. Fork it
|
26
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
27
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
28
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
29
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
data/bin/shelob
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
require 'shelob'
|
5
|
+
|
6
|
+
def main args
|
7
|
+
puts Shelob::Spider.new(args[0], verbose: options[:verbose]).check
|
8
|
+
end
|
9
|
+
|
10
|
+
options = {verbose: 0}
|
11
|
+
optparse = OptionParser.new do |opts|
|
12
|
+
opts.banner = "Usage: shelob [options] root_url"
|
13
|
+
|
14
|
+
opts.on('-v', "--[no-]verbose", "Print simple information(overrides -r)") do
|
15
|
+
options[:verbose] = 1
|
16
|
+
end
|
17
|
+
|
18
|
+
opts.on('-r', '--[no-]really-verbose', "Print lots of information(overrides -v)") do
|
19
|
+
options[:verbose] = 2
|
20
|
+
end
|
21
|
+
|
22
|
+
opts.on_tail('-h', '--help', 'Show this message') do
|
23
|
+
puts opts
|
24
|
+
exit
|
25
|
+
end
|
26
|
+
end.parse!
|
27
|
+
|
28
|
+
if ARGV.empty?
|
29
|
+
puts optparse
|
30
|
+
exit 1
|
31
|
+
end
|
32
|
+
|
33
|
+
begin
|
34
|
+
exit main(ARGV)
|
35
|
+
rescue => ex
|
36
|
+
STDERR.puts ex.message
|
37
|
+
end
|
38
|
+
|
data/lib/extractor.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
module Shelob
|
5
|
+
class Extractor
|
6
|
+
def initialize fetched
|
7
|
+
@fetched = fetched
|
8
|
+
end
|
9
|
+
|
10
|
+
def extract
|
11
|
+
content = Nokogiri::HTML(@fetched.body)
|
12
|
+
raw = content.css('a').map { |anchor| anchor['href'] }
|
13
|
+
raw.map do |link|
|
14
|
+
if link.start_with? '/'
|
15
|
+
u = URI(@fetched.url)
|
16
|
+
"#{u.scheme}://#{u.host}#{link}"
|
17
|
+
else
|
18
|
+
link
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
data/lib/link_result.rb
ADDED
data/lib/resolver.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'link_result'
|
2
|
+
require 'net/http'
|
3
|
+
|
4
|
+
module Shelob
|
5
|
+
class Resolver
|
6
|
+
def initialize url
|
7
|
+
@uri = URI(url)
|
8
|
+
end
|
9
|
+
|
10
|
+
def resolve
|
11
|
+
resp = Net::HTTP.get_response(@uri)
|
12
|
+
|
13
|
+
LinkResult.new @uri.to_s, resp.code.to_i, resp.body
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
data/lib/shelob.rb
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
require "shelob/version"
|
2
|
+
require "resolver"
|
3
|
+
require "extractor"
|
4
|
+
require "set"
|
5
|
+
|
6
|
+
module Shelob
|
7
|
+
class Spider
|
8
|
+
attr_accessor :hostname
|
9
|
+
|
10
|
+
def initialize hostname, options = {}
|
11
|
+
@hostname = hostname
|
12
|
+
@queue = [ hostname ]
|
13
|
+
@urls = Set.new @queue
|
14
|
+
@failures = []
|
15
|
+
@verbose = options[:verbose] == 1 ? true : false
|
16
|
+
@chatty = options[:verbose] == 2 ? true : false
|
17
|
+
end
|
18
|
+
|
19
|
+
def check
|
20
|
+
while not @queue.empty?
|
21
|
+
url = @queue.shift
|
22
|
+
@urls << url
|
23
|
+
|
24
|
+
if @verbose
|
25
|
+
print '.'
|
26
|
+
end
|
27
|
+
|
28
|
+
if @chatty
|
29
|
+
print "#{url}... "
|
30
|
+
end
|
31
|
+
|
32
|
+
fetch = Resolver.new(url).resolve
|
33
|
+
|
34
|
+
@failures << fetch if fetch.status >= 400
|
35
|
+
|
36
|
+
links = Extractor.new(fetch).extract
|
37
|
+
|
38
|
+
filtered = links.select do |link|
|
39
|
+
link.start_with? @hostname and !@urls.include? link
|
40
|
+
end
|
41
|
+
|
42
|
+
if @chatty
|
43
|
+
puts "checked!"
|
44
|
+
end
|
45
|
+
|
46
|
+
@queue.push(*filtered)
|
47
|
+
end
|
48
|
+
|
49
|
+
@failures
|
50
|
+
end
|
51
|
+
|
52
|
+
def remaining
|
53
|
+
return @queue.count
|
54
|
+
end
|
55
|
+
|
56
|
+
def requests
|
57
|
+
return @urls.count
|
58
|
+
end
|
59
|
+
|
60
|
+
def fetched
|
61
|
+
return @urls
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
data/shelob.gemspec
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'shelob/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "shelob"
|
8
|
+
spec.version = Shelob::VERSION
|
9
|
+
spec.authors = ["Benjamin Nicholas"]
|
10
|
+
spec.email = ["bnicholas@brandnetworksinc.com"]
|
11
|
+
spec.description = %q{A giant spider that starts on a given page, finds all links on the page, ensure they resolve, and recurses if the link is underneath the starting url}
|
12
|
+
spec.summary = %q{Spider a site and check links}
|
13
|
+
spec.license = "MIT"
|
14
|
+
|
15
|
+
spec.files = `git ls-files`.split($/)
|
16
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
17
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
18
|
+
spec.require_paths = ["lib"]
|
19
|
+
|
20
|
+
spec.add_development_dependency "bundler", "~> 1.3"
|
21
|
+
spec.add_development_dependency "rake"
|
22
|
+
spec.add_development_dependency "minitest"
|
23
|
+
spec.add_development_dependency "webmock"
|
24
|
+
spec.add_development_dependency "guard"
|
25
|
+
spec.add_development_dependency "guard-minitest"
|
26
|
+
|
27
|
+
spec.add_runtime_dependency "nokogiri"
|
28
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'minitest/autorun'
|
2
|
+
require 'extractor'
|
3
|
+
require 'link_result'
|
4
|
+
|
5
|
+
describe Shelob::Extractor, "Link extracting module" do
|
6
|
+
|
7
|
+
describe "when created" do
|
8
|
+
it "should be created with a LinkResult" do
|
9
|
+
le = LinkResult.new("http://google.com", 200, '<html><head><title>resume</title></head><body><a href="http://bmnick.com">home</a><a href="http://bmnick.com/resume/resume.pdf">pdf</a></body></html>')
|
10
|
+
le.wont_be_nil
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
describe "when used" do
|
15
|
+
before do
|
16
|
+
@result = LinkResult.new("http://google.com", 200, '<html><head><title>hi</title></head><body><a href="http://bing.com">bing</a><a href="http://yahoo.com">yahoo</a></body></html>')
|
17
|
+
@result2 = LinkResult.new("http://google.com/something", 200, '<html><head><title>hi</title></head><body><a href="/about">about</a></body></html>')
|
18
|
+
@le = Shelob::Extractor.new(@result)
|
19
|
+
@le2 = Shelob::Extractor.new(@result2)
|
20
|
+
end
|
21
|
+
|
22
|
+
it "should return a list of the links in the page" do
|
23
|
+
extracts = @le.extract
|
24
|
+
extracts.must_be_kind_of Array
|
25
|
+
extracts.must_equal ["http://bing.com", "http://yahoo.com"]
|
26
|
+
end
|
27
|
+
|
28
|
+
it "should transform relative links to absolute" do
|
29
|
+
extracts = @le2.extract
|
30
|
+
extracts.must_be_kind_of Array
|
31
|
+
extracts.must_equal ["http://google.com/about"]
|
32
|
+
end
|
33
|
+
|
34
|
+
end # describe
|
35
|
+
|
36
|
+
end # describe
|
37
|
+
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'minitest/autorun'
|
2
|
+
require 'link_result'
|
3
|
+
|
4
|
+
describe LinkResult, "Link fetch result" do
|
5
|
+
before do
|
6
|
+
@result = LinkResult.new("http://google.com", 200, '<html><head><title>hi</title></head><body><a href="http://bing.com">bing</a><a href="http://yahoo.com">yahoo</a></body></html>')
|
7
|
+
end
|
8
|
+
|
9
|
+
describe "when created" do
|
10
|
+
it "should take three arguments" do
|
11
|
+
@result.wont_be_nil
|
12
|
+
end
|
13
|
+
|
14
|
+
it "should save arguments" do
|
15
|
+
@result.url.must_equal "http://google.com"
|
16
|
+
@result.status.must_equal 200
|
17
|
+
@result.body.must_equal '<html><head><title>hi</title></head><body><a href="http://bing.com">bing</a><a href="http://yahoo.com">yahoo</a></body></html>'
|
18
|
+
end
|
19
|
+
|
20
|
+
it "should be immutable" do
|
21
|
+
proc { @result.status = 404 }.must_raise NoMethodError
|
22
|
+
end
|
23
|
+
|
24
|
+
it "should have a clean string rep" do
|
25
|
+
@result.to_s.must_equal "200: http://google.com"
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'minitest/autorun'
|
2
|
+
require 'webmock/minitest'
|
3
|
+
require 'resolver'
|
4
|
+
require 'link_result'
|
5
|
+
|
6
|
+
WebMock.allow_net_connect!
|
7
|
+
|
8
|
+
describe Shelob::Resolver, "Link fetching module" do
|
9
|
+
describe "when created" do
|
10
|
+
it "should be created with a url" do
|
11
|
+
Shelob::Resolver.new("http://bmnick.com/ruby-c-extensions")
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
describe "when used" do
|
16
|
+
before do
|
17
|
+
@resolver = Shelob::Resolver.new("http://bmnick.com/ruby-c-extensions")
|
18
|
+
@result = @resolver.resolve
|
19
|
+
end
|
20
|
+
|
21
|
+
it "should return a LinkResult" do
|
22
|
+
@result.must_be_kind_of LinkResult
|
23
|
+
end
|
24
|
+
|
25
|
+
it "should return live result" do
|
26
|
+
@result.body.must_match(/CExt/)
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
data/test/test_shelob.rb
ADDED
@@ -0,0 +1,97 @@
|
|
1
|
+
require 'minitest/autorun'
|
2
|
+
require 'webmock/minitest'
|
3
|
+
require 'shelob'
|
4
|
+
|
5
|
+
# Stub out requests
|
6
|
+
|
7
|
+
describe Shelob, "Link checking module" do
|
8
|
+
describe "when created" do
|
9
|
+
it "should exist" do
|
10
|
+
Shelob.wont_be_nil
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
describe Shelob::Spider, "Link checking spider" do
|
16
|
+
before do
|
17
|
+
stub_request(:any, 'http://bmnick.com/resume').to_return(body: '<html><head><title>resume</title></head><body><a href="http://bmnick.com">home</a><a href="http://bmnick.com/resume/resume.pdf">pdf</a><a href="http://bmnick.com/resume/secret"</body></html>')
|
18
|
+
stub_request(:any, 'http://bmnick.com/').to_return(status: 200, body: '<html><head><title>pdf</title></head><body><a href="http://bmnick.com/resume/">resume</a><a href="http://bmnick.com/">home</a><a href="http://bmnick.com/resume/secret">no touchy!</a></body></html>')
|
19
|
+
stub_request(:any, 'http://bmnick.com/resume/secret').to_return(body: '<html><head><title>secrets</title></head><body><a href="http://bmnick.com/resume/boring">boredom</a><a href="http://bmnick.com/resume">resume</a><a href="/resume/relative">relative</a></body></html>"')
|
20
|
+
stub_request(:any, 'http://bmnick.com/resume/resume.pdf').to_return(status: 404)
|
21
|
+
stub_request(:any, 'http://bmnick.com/resume/boring').to_return(status: 500)
|
22
|
+
stub_request(:any, 'http://bmnick.com/resume/relative').to_return(status: 204)
|
23
|
+
end
|
24
|
+
describe "when created" do
|
25
|
+
it "should exist" do
|
26
|
+
Shelob::Spider.wont_be_nil
|
27
|
+
end
|
28
|
+
it "should store the initial url" do
|
29
|
+
spider = Shelob::Spider.new("https://openforum.com")
|
30
|
+
spider.wont_be_nil
|
31
|
+
spider.hostname.must_equal "https://openforum.com"
|
32
|
+
end
|
33
|
+
end
|
34
|
+
describe "when checking links" do
|
35
|
+
before do
|
36
|
+
|
37
|
+
@spider = Shelob::Spider.new("http://bmnick.com/resume")
|
38
|
+
@results = @spider.check
|
39
|
+
end
|
40
|
+
|
41
|
+
it "should return an array from check" do
|
42
|
+
@results.must_be_kind_of Array
|
43
|
+
end
|
44
|
+
it "should return only error links" do
|
45
|
+
@results.select{|r| r.status == 200}.must_be_empty
|
46
|
+
end
|
47
|
+
it "should provide remaining counts" do
|
48
|
+
@spider.remaining.must_equal 0
|
49
|
+
end
|
50
|
+
it "should fetch the original url" do
|
51
|
+
@spider.fetched.must_include "http://bmnick.com/resume"
|
52
|
+
end
|
53
|
+
it "should provide a number of urls fetched" do
|
54
|
+
# http://bmnick.com/resume
|
55
|
+
# http://bmnick.com/resume/resume.pdf
|
56
|
+
# http://bmnick.com/resume/secret
|
57
|
+
# http://bmnick.com/resume/boring
|
58
|
+
# http://bmnick.com/resume/relative
|
59
|
+
@spider.requests.must_equal 5
|
60
|
+
end
|
61
|
+
it "should make a web request for the original url" do
|
62
|
+
assert_requested :get, "http://bmnick.com/resume"
|
63
|
+
end
|
64
|
+
it "should make a web request for child urls" do
|
65
|
+
# 404
|
66
|
+
assert_requested :get, "http://bmnick.com/resume/resume.pdf"
|
67
|
+
@spider.fetched.must_include "http://bmnick.com/resume/resume.pdf"
|
68
|
+
|
69
|
+
# successful
|
70
|
+
assert_requested :get, "http://bmnick.com/resume/secret"
|
71
|
+
@spider.fetched.must_include "http://bmnick.com/resume/secret"
|
72
|
+
end
|
73
|
+
it "should return the failed request" do
|
74
|
+
# http://bmnick.com/resume/resume.pdf => 404
|
75
|
+
# http://bmnick.com/resume/boring => 500
|
76
|
+
@results.count.must_equal 2
|
77
|
+
end
|
78
|
+
it "shouldn't request pages without the prefix" do
|
79
|
+
assert_not_requested :get, "http://bmnick.com"
|
80
|
+
end
|
81
|
+
it "shouldn't request pages multiple times" do
|
82
|
+
assert_requested :get, "http://bmnick.com/resume", times: 1
|
83
|
+
end
|
84
|
+
it "should continue to spider down the page" do
|
85
|
+
assert_requested :get, "http://bmnick.com/resume/boring"
|
86
|
+
@spider.fetched.must_include "http://bmnick.com/resume/boring"
|
87
|
+
end
|
88
|
+
it "should support relative links" do
|
89
|
+
assert_requested :get, "http://bmnick.com/resume/relative"
|
90
|
+
@spider.fetched.must_include "http://bmnick.com/resume/relative"
|
91
|
+
end
|
92
|
+
it "should format a string cleanly" do
|
93
|
+
@results.map{|r|r.to_s}.join("\n").must_equal "404: http://bmnick.com/resume/resume.pdf
|
94
|
+
500: http://bmnick.com/resume/boring"
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
metadata
ADDED
@@ -0,0 +1,165 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: shelob
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0.beta1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Benjamin Nicholas
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-12-30 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.3'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.3'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: minitest
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: webmock
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - '>='
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: guard
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - '>='
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - '>='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: guard-minitest
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - '>='
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - '>='
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: nokogiri
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - '>='
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :runtime
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - '>='
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
111
|
+
description: A giant spider that starts on a given page, finds all links on the page,
|
112
|
+
ensure they resolve, and recurses if the link is underneath the starting url
|
113
|
+
email:
|
114
|
+
- bnicholas@brandnetworksinc.com
|
115
|
+
executables:
|
116
|
+
- shelob
|
117
|
+
extensions: []
|
118
|
+
extra_rdoc_files: []
|
119
|
+
files:
|
120
|
+
- .gitignore
|
121
|
+
- Gemfile
|
122
|
+
- Guardfile
|
123
|
+
- LICENSE.txt
|
124
|
+
- README.md
|
125
|
+
- Rakefile
|
126
|
+
- bin/shelob
|
127
|
+
- lib/extractor.rb
|
128
|
+
- lib/link_result.rb
|
129
|
+
- lib/resolver.rb
|
130
|
+
- lib/shelob.rb
|
131
|
+
- lib/shelob/version.rb
|
132
|
+
- shelob.gemspec
|
133
|
+
- test/test_extractor.rb
|
134
|
+
- test/test_link_result.rb
|
135
|
+
- test/test_resolver.rb
|
136
|
+
- test/test_shelob.rb
|
137
|
+
homepage:
|
138
|
+
licenses:
|
139
|
+
- MIT
|
140
|
+
metadata: {}
|
141
|
+
post_install_message:
|
142
|
+
rdoc_options: []
|
143
|
+
require_paths:
|
144
|
+
- lib
|
145
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
146
|
+
requirements:
|
147
|
+
- - '>='
|
148
|
+
- !ruby/object:Gem::Version
|
149
|
+
version: '0'
|
150
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
151
|
+
requirements:
|
152
|
+
- - '>'
|
153
|
+
- !ruby/object:Gem::Version
|
154
|
+
version: 1.3.1
|
155
|
+
requirements: []
|
156
|
+
rubyforge_project:
|
157
|
+
rubygems_version: 2.0.3
|
158
|
+
signing_key:
|
159
|
+
specification_version: 4
|
160
|
+
summary: Spider a site and check links
|
161
|
+
test_files:
|
162
|
+
- test/test_extractor.rb
|
163
|
+
- test/test_link_result.rb
|
164
|
+
- test/test_resolver.rb
|
165
|
+
- test/test_shelob.rb
|