just_crawl 1.1.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 97e1b466ee0a52002c3e35a3f5ed622e5fa7208761dd831fe695c9b3104ed7db
4
+ data.tar.gz: 4ded6f28ab8e36e144969fba44f7d017fd0405fa2bece4efeb45f67e9b5985d5
5
+ SHA512:
6
+ metadata.gz: b0f0ea51add4876a3b397abb74c0b6a0d4c21585eae7ecdf58069178f38396c6ec9022f96af42dbc69ecb50755d7237d9a533c42777cc6cf9debd977e9362c1e
7
+ data.tar.gz: 16a5673b5aa8d7d727de7f4cfbdedb05abbd9dab576738f1bf53ed04494a842626f04a86e0ff59d8515879dbe7225938479fd0b3a7927991964f853d4f5bebba
data/.gitignore ADDED
@@ -0,0 +1,20 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ spec/examples.txt
18
+ tmp
19
+ .DS_Store
20
+ .idea
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --require spec_helper
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 2.5.1
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ source 'https://rubygems.org'
2
+
3
+ git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
4
+
5
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2015 AlphaSights
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
6
+ this software and associated documentation files (the "Software"), to deal in
7
+ the Software without restriction, including without limitation the rights to
8
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9
+ the Software, and to permit persons to whom the Software is furnished to do so,
10
+ subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,53 @@
1
+ # JustCrawl
2
+
3
+ JustCrawl crawls pages within a domain, reporting any page that returns a bad response code
4
+
5
+ Usage:
6
+
7
+ > just_crawl [options] domain
8
+
9
+ Usage: just_crawl [options] domain
10
+ -s, --start /home,/about Starting path(s), defaults to /
11
+ -u, --username username Basic auth username
12
+ -p, --password password Basic auth password
13
+ -c, --connections count Max mumber of parallel connections to use. The default is 5.
14
+ -v, --verbose Give details when crawling
15
+ -h, --help Show this message
16
+ --version Print version
17
+
18
+
19
+
20
+ Example:
21
+
22
+ > just_crawl https://vaskohandmade.com --connections=5 --start=/ --verbose
23
+
24
+ Adding /
25
+ Fetching / ...
26
+ Adding index.html
27
+ Adding assets/custom/images/vasko/portfolio/reverb-con-modulo-belton.jpg
28
+ Adding assets/custom/images/vasko/portfolio/amplificador-fender-champ.jpg
29
+ Adding assets/custom/images/vasko/portfolio/overdrive-simil-zendrive.jpg
30
+ Adding assets/custom/images/vasko/portfolio/booster-simil-super-hardon.jpg
31
+ Adding assets/custom/images/vasko/portfolio/amplificador-valvular.jpg
32
+ Adding assets/custom/images/vasko/portfolio/rehousing-fender-superchamp.jpg
33
+ Adding assets/custom/images/vasko/portfolio/fuzz-simil-big-muff-violet-ram.jpg
34
+ Adding assets/custom/images/vasko/portfolio/amplificador-valvular-simil-marshall-1974x.jpg
35
+ Adding assets/custom/images/vasko/portfolio/distorsion-high-gain-simil-triple-wreck.jpg
36
+ Adding assets/custom/images/vasko/portfolio/booster-simil-rc-booster.jpg
37
+ Adding assets/custom/images/vasko/portfolio/amplificador-blues-mojo-7w-valvular.jpg
38
+ Adding assets/custom/images/vasko/portfolio/rehousing-fender-super-champ.jpg
39
+ Fetching index.html ...
40
+ Fetching assets/custom/images/vasko/portfolio/reverb-con-modulo-belton.jpg ...
41
+ Fetching assets/custom/images/vasko/portfolio/amplificador-fender-champ.jpg ...
42
+ Fetching assets/custom/images/vasko/portfolio/overdrive-simil-zendrive.jpg ...
43
+ Fetching assets/custom/images/vasko/portfolio/booster-simil-super-hardon.jpg ...
44
+ Fetching assets/custom/images/vasko/portfolio/amplificador-valvular.jpg ...
45
+ Fetching assets/custom/images/vasko/portfolio/rehousing-fender-superchamp.jpg ...
46
+ Fetching assets/custom/images/vasko/portfolio/fuzz-simil-big-muff-violet-ram.jpg ...
47
+ Fetching assets/custom/images/vasko/portfolio/amplificador-valvular-simil-marshall-1974x.jpg ...
48
+ Fetching assets/custom/images/vasko/portfolio/distorsion-high-gain-simil-triple-wreck.jpg ...
49
+ Fetching assets/custom/images/vasko/portfolio/booster-simil-rc-booster.jpg ...
50
+ Fetching assets/custom/images/vasko/portfolio/amplificador-blues-mojo-7w-valvular.jpg ...
51
+ Fetching assets/custom/images/vasko/portfolio/rehousing-fender-super-champ.jpg ...
52
+
53
+ 14 pages crawled without errors.
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
data/bin/just_crawl ADDED
@@ -0,0 +1,39 @@
1
+ #!/usr/bin/env ruby
2
+ require 'optparse'
3
+ require_relative '../lib/just_crawl.rb'
4
+
5
+ EM.threadpool_size = 5
6
+
7
+ options = {}
8
+ optparse = OptionParser.new do |opts|
9
+ opts.banner = "JustCrawl pages within a domain, reporting any page that returns a bad response code\nUsage: just_crawl [options] domain"
10
+ opts.on('-s', '--start /home,/about', Array, 'Starting path(s), defaults to /') { |o| options[:start] = o }
11
+ opts.on('-u', '--username username', String, 'Basic auth username') { |o| options[:username] = o }
12
+ opts.on('-p', '--password password', String, 'Basic auth password') { |o| options[:password] = o }
13
+ opts.on('-c', '--connections count', Integer, "Max mumber of parallel connections to use. The default is #{EM.threadpool_size}.") { |o| EM.threadpool_size = o }
14
+ opts.on('-v', '--verbose', 'Give details when crawling') { |o| $verbose = o }
15
+ opts.on_tail('-h', '--help', 'Show this message') { |o| puts opts; exit }
16
+ opts.on_tail('-v', '--version', 'Print version') { |o| puts JustCrawl::VERSION; exit }
17
+ end.parse!
18
+
19
+ options.merge!(domain: optparse.first)
20
+
21
+ unless options[:domain]
22
+ puts 'Must provide a domain'
23
+ exit -1
24
+ end
25
+
26
+ options[:domain] = "http://#{options[:domain]}" unless options[:domain].include?('://')
27
+
28
+ crawler = JustCrawl::Engine.new(options)
29
+
30
+ trap('SIGINT') do
31
+ puts "\n\nAborting just_crawl.."
32
+ crawler.summarize
33
+ exit -1
34
+ end
35
+
36
+ crawler.run
37
+ crawler.summarize
38
+
39
+ exit -1 if crawler.errors? || crawler.no_links_found?
data/circle.yml ADDED
@@ -0,0 +1,3 @@
1
+ test:
2
+ override:
3
+ - RAILS_ENV=test bundle exec rspec -r rspec_junit_formatter --format RspecJunitFormatter -o $CIRCLE_TEST_REPORTS/rspec/junit.xml
@@ -0,0 +1,26 @@
1
+ require File.expand_path('../lib/just_crawl/version', __FILE__)
2
+
3
+ Gem::Specification.new do |gem|
4
+ gem.authors = ['Nicolas Sebastian Vidal']
5
+ gem.email = ['nicolas.s.vidal@gmail.com']
6
+ gem.description = 'JustCrawl crawls all pages on a domain, checking for errors'
7
+ gem.summary = 'JustCrawl crawls pages within a domain, reporting any page that returns a bad response code'
8
+ gem.homepage = 'http://github.com/nisevi/just_crawl'
9
+
10
+ gem.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
11
+ gem.files = `git ls-files`.split("\n")
12
+ gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
13
+ gem.name = 'just_crawl'
14
+ gem.require_paths = ['lib']
15
+ gem.required_ruby_version = '>= 2.5.1'
16
+ gem.version = JustCrawl::VERSION
17
+ gem.licenses = ['MIT']
18
+ gem.add_dependency 'em-http-request', '~> 1.1', '>= 1.1.5'
19
+ gem.add_dependency 'eventmachine', '~> 1.2', '>= 1.2.6'
20
+ gem.add_dependency 'nokogiri', '~> 1.8', '>= 1.8.2'
21
+ gem.add_dependency 'rest-client', '~> 2.0', '>= 2.0.2'
22
+ gem.add_development_dependency 'rspec-core', '~> 3.7', '>= 3.7.1'
23
+ gem.add_development_dependency 'rspec-expectations', '~> 3.7'
24
+ gem.add_development_dependency 'rspec_junit_formatter', '~> 0.3.0'
25
+ gem.add_development_dependency 'simplecov', '~> 0.16.1'
26
+ end
@@ -0,0 +1,116 @@
1
+ class JustCrawl::Engine
2
+ DEFAULT_OPTIONS = {
3
+ domain: '',
4
+ start: ['/'],
5
+ username: '',
6
+ password: '',
7
+ verbose: false,
8
+ session_id: false
9
+ }.freeze
10
+
11
+ IGNORE = [/#/, /mailto:/, /skype:/, /logout/, /javascript:/, %r{/xhr/}, /https:/, /\.pdf$/, /^$/, /tel:/].freeze
12
+ VALID_RESPONSE_CODES = [200, 302].freeze
13
+ MAX_REDIRECTS = 3
14
+ LINE_WIDTH = 78
15
+
16
+ attr_reader :options
17
+
18
+ def initialize(caller_options = {})
19
+ @options = DEFAULT_OPTIONS.merge(caller_options)
20
+ @authorization = Base64.encode64("#{options[:username]}:#{options[:password]}")
21
+ @register = JustCrawl::Register.new
22
+
23
+ start_pages = options[:start].to_a.map { |page| Page.new(@register, page, '/') }
24
+
25
+ @register.add(start_pages)
26
+ end
27
+
28
+ def run
29
+ EventMachine.run do
30
+ process_next
31
+ end
32
+ end
33
+
34
+ def process_next
35
+ return if @register.processing_size >= EM.threadpool_size
36
+ if @register.finished?
37
+ EventMachine.stop
38
+ elsif (page = @register.next_page)
39
+ retrieve(page)
40
+ process_next
41
+ end
42
+ end
43
+
44
+ def summarize
45
+ @register.summarize
46
+ end
47
+
48
+ def errors?
49
+ @register.errors?
50
+ end
51
+
52
+ def no_links_found?
53
+ @register.no_links_found?
54
+ end
55
+
56
+ private
57
+
58
+ def retrieve(page)
59
+ puts "Fetching #{page.url} ..." if $verbose
60
+
61
+ absolute_url = options[:domain] + page.relative_url
62
+
63
+ http = EventMachine::HttpRequest.new(absolute_url)
64
+ req = http.get redirects: MAX_REDIRECTS,
65
+ connect_timeout: 20,
66
+ inactivity_timeout: 20,
67
+ head: {
68
+ 'authorization' => [
69
+ options[:username], options[:password]
70
+ ]
71
+ }
72
+ req.errback do
73
+ if req.nil?
74
+ page.intermittent('Req is nil. WAT?')
75
+ elsif msg = req.error
76
+ page.intermittent(msg)
77
+ elsif req.response.nil? || req.response.empty?
78
+ page.intermittent('Timeout?')
79
+ else
80
+ page.intermittent('Partial response: Server Broke Connection?')
81
+ end
82
+ process_next
83
+ end
84
+
85
+ req.callback do
86
+ status_code = req.response_header.status
87
+ if VALID_RESPONSE_CODES.include?(status_code)
88
+ page.success
89
+ if req.response_header['CONTENT_TYPE'] =~ %r{text/html}
90
+ @register.add find_linked_pages(page, req.response.to_str)
91
+ end
92
+ elsif status_code == 503
93
+ page.intermittent('Status code: 503')
94
+ else
95
+ page.fatal("Status code: #{status_code}")
96
+ end
97
+ process_next
98
+ end
99
+ end
100
+
101
+ def find_linked_pages(page, body)
102
+ doc = Nokogiri::HTML(body)
103
+ anchors = doc.css('a').to_a
104
+ anchors.reject! { |anchor| anchor['onclick'].to_s =~ /f.method = 'POST'/ }
105
+ anchors.reject! { |anchor| anchor['data-method'] =~ /put|post|delete/ }
106
+ anchors.reject! { |anchor| anchor['data-remote'] =~ /true/ }
107
+ anchors.reject! { |anchor| anchor['class'].to_s =~ /unobtrusive_/ }
108
+ anchors.reject! { |anchor| anchor['rel'].to_s =~ /nofollow/ }
109
+ raw_links = anchors.map { |anchor| anchor['href'] }
110
+ raw_links.compact!
111
+ raw_links.map! { |link| link.sub(options[:domain], '') }
112
+ raw_links.delete_if { |link| link =~ %r{^http(s)?://} && !link.include?(options[:domain]) }
113
+ raw_links.delete_if { |link| IGNORE.any? { |pattern| link =~ pattern } }
114
+ raw_links.map { |url| Page.new(@register, url, page.url) }
115
+ end
116
+ end
@@ -0,0 +1,29 @@
1
+ class JustCrawl::Failure
2
+ attr_reader :link, :code, :from
3
+
4
+ def initialize(link, code, from)
5
+ @link = link
6
+ @code = code
7
+ @from = from
8
+ end
9
+
10
+ def failure?
11
+ true
12
+ end
13
+
14
+ def error?
15
+ !failure?
16
+ end
17
+
18
+ def name
19
+ link
20
+ end
21
+
22
+ def message
23
+ "Status code was #{code}"
24
+ end
25
+
26
+ def location
27
+ "Linked from #{from}"
28
+ end
29
+ end
@@ -0,0 +1,59 @@
1
+ require 'uri'
2
+
3
+ class Page
4
+ include Comparable
5
+
6
+ attr_reader :register, :url, :source, :error
7
+
8
+ ATTEMPTS = 3
9
+
10
+ def initialize(register, url, source)
11
+ @register = register
12
+ @url = url
13
+ @source = source
14
+ @attempts = 0
15
+ @errors = nil
16
+ end
17
+
18
+ def relative_url
19
+ @relative_url ||= URI.join('http://example.com', source, url).path
20
+ end
21
+
22
+ def <=>(other)
23
+ relative_url <=> other.relative_url
24
+ end
25
+
26
+ def eql?(other)
27
+ relative_url.eql?(other.relative_url)
28
+ end
29
+
30
+ def hash
31
+ relative_url.hash
32
+ end
33
+
34
+ def success
35
+ @error = nil
36
+ @register.completed(self)
37
+ end
38
+
39
+ def fatal(error)
40
+ puts " Fatal - #{error}" if $VERBOSE
41
+ @error = error
42
+ @register.completed(self)
43
+ end
44
+
45
+ def intermittent(error)
46
+ puts " Intermittent - #{error}" if $VERBOSE
47
+ if @attempts >= ATTEMPTS
48
+ @error = error
49
+ @register.completed(self)
50
+ else
51
+ @attempts += 1
52
+ @register.retry(self)
53
+ end
54
+ end
55
+
56
+ def to_s
57
+ "#{url} found on #{source} - #{error || 'OK'}"
58
+ end
59
+ end
@@ -0,0 +1,69 @@
1
+ class JustCrawl::Register
2
+
3
+ Result = Struct.new(:url, :object)
4
+
5
+ def initialize
6
+ @unprocessed = Set.new
7
+ @processing = Set.new
8
+ @processed = Set.new
9
+ end
10
+
11
+ def add(pages)
12
+ new_pages = pages.to_set - @processed - @processing - @unprocessed
13
+ new_pages.each do |new_page|
14
+ puts " Adding #{new_page.url}" if $verbose
15
+ end
16
+ @unprocessed.merge(new_pages)
17
+ end
18
+
19
+ def next_page
20
+ page = @unprocessed.first
21
+ @unprocessed.delete(page)
22
+ @processing << page if page
23
+ if @processing.size > EM.threadpool_size
24
+ puts "WARNING: #{@processing.size} pages are being process when EM threadpool only has #{EM.threadpool_size} threads."
25
+ end
26
+ page
27
+ end
28
+
29
+ def retry(page)
30
+ @unprocessed << page
31
+ @processing.delete(page)
32
+ end
33
+
34
+ def completed(page)
35
+ @processed << page
36
+ @processing.delete(page)
37
+ end
38
+
39
+ def finished?
40
+ (@unprocessed.size + @processing.size).zero?
41
+ end
42
+
43
+ def processing_size
44
+ @processing.size
45
+ end
46
+
47
+ def error_pages
48
+ @processed.select(&:error)
49
+ end
50
+
51
+ def errors?
52
+ !error_pages.empty?
53
+ end
54
+
55
+ def summarize
56
+ if errors?
57
+ puts "\nPages with errors:"
58
+ error_pages.each do |page|
59
+ puts page.to_s
60
+ end
61
+ else
62
+ puts "\n#{@processed.size} pages crawled without errors."
63
+ end
64
+ end
65
+
66
+ def no_links_found?
67
+ @processed.size <= 1
68
+ end
69
+ end
@@ -0,0 +1,7 @@
1
+ class String
2
+ def word_wrap(line_width = 80)
3
+ split("\n").collect do |line|
4
+ line.length > line_width ? line.gsub(/(.{1,#{line_width}})(\s+|$)/, "\\1\n").strip : line
5
+ end * "\n"
6
+ end
7
+ end
@@ -0,0 +1,3 @@
1
+ module JustCrawl
2
+ VERSION = '1.1.8'.freeze
3
+ end
data/lib/just_crawl.rb ADDED
@@ -0,0 +1,18 @@
1
+ require 'nokogiri'
2
+ require 'rest_client'
3
+ require 'eventmachine'
4
+ require 'em-http-request'
5
+ require 'base64'
6
+ require 'set'
7
+ require 'fileutils'
8
+ require 'digest/sha1'
9
+ require 'json'
10
+ require 'tempfile'
11
+ require 'tmpdir'
12
+
13
+ require_relative 'just_crawl/version'
14
+ require_relative 'just_crawl/engine'
15
+ require_relative 'just_crawl/string'
16
+ require_relative 'just_crawl/failure'
17
+ require_relative 'just_crawl/register'
18
+ require_relative 'just_crawl/page'
@@ -0,0 +1,9 @@
1
+ require_relative '../../spec_helper'
2
+
3
+ RSpec.describe 'JustCrawl::Engine' do
4
+ xit '#initialize'
5
+ xit '#run'
6
+ xit '#summarize'
7
+ xit '#errors?'
8
+ xit '#no_links_found?'
9
+ end
@@ -0,0 +1,10 @@
1
+ require_relative '../../spec_helper'
2
+
3
+ RSpec.describe 'JustCrawl::Failure' do
4
+ xit '#initialize'
5
+ xit '#failure?'
6
+ xit '#error?'
7
+ xit '#name'
8
+ xit '#message'
9
+ xit '#location'
10
+ end
@@ -0,0 +1,24 @@
1
+ require_relative '../../spec_helper'
2
+ require './lib/just_crawl/page'
3
+
4
+ RSpec.describe Page do
5
+ xit 'initialize'
6
+
7
+ it '#relative_url' do
8
+ expect(Page.new(:register, '/', '/').relative_url).to eq '/'
9
+ expect(Page.new(:register, './', '/').relative_url).to eq '/'
10
+ expect(Page.new(:register, 'page.html', '').relative_url).to eq '/page.html'
11
+ expect(Page.new(:register, '/interview', '/').relative_url).to eq '/interview'
12
+ expect(Page.new(:register, 'overview.html', '/').relative_url).to eq '/overview.html'
13
+ expect(Page.new(:register, 'post-5.html', '/posts/index.html').relative_url).to eq '/posts/post-5.html'
14
+ expect(Page.new(:register, 'https://staging.alphasights.com/careers/meet-us', '/posts/foo').relative_url).to eq '/careers/meet-us'
15
+ end
16
+
17
+ xit '#<=>'
18
+ xit '#eql?'
19
+ xit '#hash'
20
+ xit '#success'
21
+ xit '#fatal'
22
+ xit '#intermittent'
23
+ xit '#to_s'
24
+ end
@@ -0,0 +1,15 @@
1
+ require_relative '../../spec_helper'
2
+
3
+ RSpec.describe 'JustCrawl::Register' do
4
+ xit '#initialize'
5
+ xit '#add'
6
+ xit '#next_page'
7
+ xit '#retry'
8
+ xit '#completed'
9
+ xit '#finished?'
10
+ xit '#processing_size'
11
+ xit '#error_pages'
12
+ xit '#errors?'
13
+ xit '#summarize'
14
+ xit '#no_links_found?'
15
+ end
@@ -0,0 +1,5 @@
1
+ require_relative '../../spec_helper'
2
+
3
+ RSpec.describe String do
4
+ xit '#word_wrap'
5
+ end
@@ -0,0 +1,8 @@
1
+ require_relative '../../spec_helper'
2
+ require_relative '../../../lib/just_crawl/version'
3
+
4
+ RSpec.describe 'Gem version' do
5
+ it 'should match the current gem version' do
6
+ expect(JustCrawl::VERSION).to eq '1.1.8'
7
+ end
8
+ end
@@ -0,0 +1,5 @@
1
+ require_relative '../spec_helper'
2
+
3
+ RSpec.describe 'JustCrawl require files.' do
4
+ xit 'should load all required files'
5
+ end
@@ -0,0 +1,18 @@
1
+ require 'simplecov'
2
+ SimpleCov.start
3
+
4
+ RSpec.configure do |config|
5
+ config.expect_with :rspec do |expectations|
6
+ expectations.include_chain_clauses_in_custom_matcher_descriptions = true
7
+ end
8
+
9
+ config.default_formatter = 'doc' if config.files_to_run.one?
10
+
11
+ config.shared_context_metadata_behavior = :apply_to_host_groups
12
+ config.filter_run_when_matching :focus
13
+ config.example_status_persistence_file_path = 'spec/examples.txt'
14
+ config.disable_monkey_patching!
15
+ config.warnings = true
16
+ config.order = :random
17
+ Kernel.srand config.seed
18
+ end
metadata ADDED
@@ -0,0 +1,213 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: just_crawl
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.1.8
5
+ platform: ruby
6
+ authors:
7
+ - Nicolas Sebastian Vidal
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2018-05-03 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: em-http-request
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.1'
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 1.1.5
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - "~>"
28
+ - !ruby/object:Gem::Version
29
+ version: '1.1'
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 1.1.5
33
+ - !ruby/object:Gem::Dependency
34
+ name: eventmachine
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '1.2'
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ version: 1.2.6
43
+ type: :runtime
44
+ prerelease: false
45
+ version_requirements: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - "~>"
48
+ - !ruby/object:Gem::Version
49
+ version: '1.2'
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: 1.2.6
53
+ - !ruby/object:Gem::Dependency
54
+ name: nokogiri
55
+ requirement: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - "~>"
58
+ - !ruby/object:Gem::Version
59
+ version: '1.8'
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: 1.8.2
63
+ type: :runtime
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - "~>"
68
+ - !ruby/object:Gem::Version
69
+ version: '1.8'
70
+ - - ">="
71
+ - !ruby/object:Gem::Version
72
+ version: 1.8.2
73
+ - !ruby/object:Gem::Dependency
74
+ name: rest-client
75
+ requirement: !ruby/object:Gem::Requirement
76
+ requirements:
77
+ - - "~>"
78
+ - !ruby/object:Gem::Version
79
+ version: '2.0'
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: 2.0.2
83
+ type: :runtime
84
+ prerelease: false
85
+ version_requirements: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '2.0'
90
+ - - ">="
91
+ - !ruby/object:Gem::Version
92
+ version: 2.0.2
93
+ - !ruby/object:Gem::Dependency
94
+ name: rspec-core
95
+ requirement: !ruby/object:Gem::Requirement
96
+ requirements:
97
+ - - "~>"
98
+ - !ruby/object:Gem::Version
99
+ version: '3.7'
100
+ - - ">="
101
+ - !ruby/object:Gem::Version
102
+ version: 3.7.1
103
+ type: :development
104
+ prerelease: false
105
+ version_requirements: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - "~>"
108
+ - !ruby/object:Gem::Version
109
+ version: '3.7'
110
+ - - ">="
111
+ - !ruby/object:Gem::Version
112
+ version: 3.7.1
113
+ - !ruby/object:Gem::Dependency
114
+ name: rspec-expectations
115
+ requirement: !ruby/object:Gem::Requirement
116
+ requirements:
117
+ - - "~>"
118
+ - !ruby/object:Gem::Version
119
+ version: '3.7'
120
+ type: :development
121
+ prerelease: false
122
+ version_requirements: !ruby/object:Gem::Requirement
123
+ requirements:
124
+ - - "~>"
125
+ - !ruby/object:Gem::Version
126
+ version: '3.7'
127
+ - !ruby/object:Gem::Dependency
128
+ name: rspec_junit_formatter
129
+ requirement: !ruby/object:Gem::Requirement
130
+ requirements:
131
+ - - "~>"
132
+ - !ruby/object:Gem::Version
133
+ version: 0.3.0
134
+ type: :development
135
+ prerelease: false
136
+ version_requirements: !ruby/object:Gem::Requirement
137
+ requirements:
138
+ - - "~>"
139
+ - !ruby/object:Gem::Version
140
+ version: 0.3.0
141
+ - !ruby/object:Gem::Dependency
142
+ name: simplecov
143
+ requirement: !ruby/object:Gem::Requirement
144
+ requirements:
145
+ - - "~>"
146
+ - !ruby/object:Gem::Version
147
+ version: 0.16.1
148
+ type: :development
149
+ prerelease: false
150
+ version_requirements: !ruby/object:Gem::Requirement
151
+ requirements:
152
+ - - "~>"
153
+ - !ruby/object:Gem::Version
154
+ version: 0.16.1
155
+ description: JustCrawl crawls all pages on a domain, checking for errors
156
+ email:
157
+ - nicolas.s.vidal@gmail.com
158
+ executables:
159
+ - just_crawl
160
+ extensions: []
161
+ extra_rdoc_files: []
162
+ files:
163
+ - ".gitignore"
164
+ - ".rspec"
165
+ - ".ruby-version"
166
+ - Gemfile
167
+ - LICENSE.txt
168
+ - README.md
169
+ - Rakefile
170
+ - bin/just_crawl
171
+ - circle.yml
172
+ - just_crawl.gemspec
173
+ - lib/just_crawl.rb
174
+ - lib/just_crawl/engine.rb
175
+ - lib/just_crawl/failure.rb
176
+ - lib/just_crawl/page.rb
177
+ - lib/just_crawl/register.rb
178
+ - lib/just_crawl/string.rb
179
+ - lib/just_crawl/version.rb
180
+ - spec/lib/just_crawl/engine_spec.rb
181
+ - spec/lib/just_crawl/failure_spec.rb
182
+ - spec/lib/just_crawl/page_spec.rb
183
+ - spec/lib/just_crawl/register_spec.rb
184
+ - spec/lib/just_crawl/string_spec.rb
185
+ - spec/lib/just_crawl/version_spec.rb
186
+ - spec/lib/just_crawl_spec.rb
187
+ - spec/spec_helper.rb
188
+ homepage: http://github.com/nisevi/just_crawl
189
+ licenses:
190
+ - MIT
191
+ metadata: {}
192
+ post_install_message:
193
+ rdoc_options: []
194
+ require_paths:
195
+ - lib
196
+ required_ruby_version: !ruby/object:Gem::Requirement
197
+ requirements:
198
+ - - ">="
199
+ - !ruby/object:Gem::Version
200
+ version: 2.5.1
201
+ required_rubygems_version: !ruby/object:Gem::Requirement
202
+ requirements:
203
+ - - ">="
204
+ - !ruby/object:Gem::Version
205
+ version: '0'
206
+ requirements: []
207
+ rubyforge_project:
208
+ rubygems_version: 2.7.6
209
+ signing_key:
210
+ specification_version: 4
211
+ summary: JustCrawl crawls pages within a domain, reporting any page that returns a
212
+ bad response code
213
+ test_files: []