just_crawl 1.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 97e1b466ee0a52002c3e35a3f5ed622e5fa7208761dd831fe695c9b3104ed7db
4
+ data.tar.gz: 4ded6f28ab8e36e144969fba44f7d017fd0405fa2bece4efeb45f67e9b5985d5
5
+ SHA512:
6
+ metadata.gz: b0f0ea51add4876a3b397abb74c0b6a0d4c21585eae7ecdf58069178f38396c6ec9022f96af42dbc69ecb50755d7237d9a533c42777cc6cf9debd977e9362c1e
7
+ data.tar.gz: 16a5673b5aa8d7d727de7f4cfbdedb05abbd9dab576738f1bf53ed04494a842626f04a86e0ff59d8515879dbe7225938479fd0b3a7927991964f853d4f5bebba
data/.gitignore ADDED
@@ -0,0 +1,20 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ spec/examples.txt
18
+ tmp
19
+ .DS_Store
20
+ .idea
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --require spec_helper
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 2.5.1
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ source 'https://rubygems.org'
2
+
3
+ git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
4
+
5
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2015 AlphaSights
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
6
+ this software and associated documentation files (the "Software"), to deal in
7
+ the Software without restriction, including without limitation the rights to
8
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9
+ the Software, and to permit persons to whom the Software is furnished to do so,
10
+ subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,53 @@
1
+ # JustCrawl
2
+
3
+ JustCrawl crawls pages within a domain, reporting any page that returns a bad response code
4
+
5
+ Usage:
6
+
7
+ > just_crawl [options] domain
8
+
9
+ Usage: just_crawl [options] domain
10
+ -s, --start /home,/about Starting path(s), defaults to /
11
+ -u, --username username Basic auth username
12
+ -p, --password password Basic auth password
13
+ -c, --connections count Max mumber of parallel connections to use. The default is 5.
14
+ -v, --verbose Give details when crawling
15
+ -h, --help Show this message
16
+ --version Print version
17
+
18
+
19
+
20
+ Example:
21
+
22
+ > just_crawl https://vaskohandmade.com --connections=5 --start=/ --verbose
23
+
24
+ Adding /
25
+ Fetching / ...
26
+ Adding index.html
27
+ Adding assets/custom/images/vasko/portfolio/reverb-con-modulo-belton.jpg
28
+ Adding assets/custom/images/vasko/portfolio/amplificador-fender-champ.jpg
29
+ Adding assets/custom/images/vasko/portfolio/overdrive-simil-zendrive.jpg
30
+ Adding assets/custom/images/vasko/portfolio/booster-simil-super-hardon.jpg
31
+ Adding assets/custom/images/vasko/portfolio/amplificador-valvular.jpg
32
+ Adding assets/custom/images/vasko/portfolio/rehousing-fender-superchamp.jpg
33
+ Adding assets/custom/images/vasko/portfolio/fuzz-simil-big-muff-violet-ram.jpg
34
+ Adding assets/custom/images/vasko/portfolio/amplificador-valvular-simil-marshall-1974x.jpg
35
+ Adding assets/custom/images/vasko/portfolio/distorsion-high-gain-simil-triple-wreck.jpg
36
+ Adding assets/custom/images/vasko/portfolio/booster-simil-rc-booster.jpg
37
+ Adding assets/custom/images/vasko/portfolio/amplificador-blues-mojo-7w-valvular.jpg
38
+ Adding assets/custom/images/vasko/portfolio/rehousing-fender-super-champ.jpg
39
+ Fetching index.html ...
40
+ Fetching assets/custom/images/vasko/portfolio/reverb-con-modulo-belton.jpg ...
41
+ Fetching assets/custom/images/vasko/portfolio/amplificador-fender-champ.jpg ...
42
+ Fetching assets/custom/images/vasko/portfolio/overdrive-simil-zendrive.jpg ...
43
+ Fetching assets/custom/images/vasko/portfolio/booster-simil-super-hardon.jpg ...
44
+ Fetching assets/custom/images/vasko/portfolio/amplificador-valvular.jpg ...
45
+ Fetching assets/custom/images/vasko/portfolio/rehousing-fender-superchamp.jpg ...
46
+ Fetching assets/custom/images/vasko/portfolio/fuzz-simil-big-muff-violet-ram.jpg ...
47
+ Fetching assets/custom/images/vasko/portfolio/amplificador-valvular-simil-marshall-1974x.jpg ...
48
+ Fetching assets/custom/images/vasko/portfolio/distorsion-high-gain-simil-triple-wreck.jpg ...
49
+ Fetching assets/custom/images/vasko/portfolio/booster-simil-rc-booster.jpg ...
50
+ Fetching assets/custom/images/vasko/portfolio/amplificador-blues-mojo-7w-valvular.jpg ...
51
+ Fetching assets/custom/images/vasko/portfolio/rehousing-fender-super-champ.jpg ...
52
+
53
+ 14 pages crawled without errors.
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
data/bin/just_crawl ADDED
@@ -0,0 +1,39 @@
1
+ #!/usr/bin/env ruby
2
+ require 'optparse'
3
+ require_relative '../lib/just_crawl.rb'
4
+
5
+ EM.threadpool_size = 5
6
+
7
+ options = {}
8
+ optparse = OptionParser.new do |opts|
9
+ opts.banner = "JustCrawl pages within a domain, reporting any page that returns a bad response code\nUsage: just_crawl [options] domain"
10
+ opts.on('-s', '--start /home,/about', Array, 'Starting path(s), defaults to /') { |o| options[:start] = o }
11
+ opts.on('-u', '--username username', String, 'Basic auth username') { |o| options[:username] = o }
12
+ opts.on('-p', '--password password', String, 'Basic auth password') { |o| options[:password] = o }
13
+ opts.on('-c', '--connections count', Integer, "Max mumber of parallel connections to use. The default is #{EM.threadpool_size}.") { |o| EM.threadpool_size = o }
14
+ opts.on('-v', '--verbose', 'Give details when crawling') { |o| $verbose = o }
15
+ opts.on_tail('-h', '--help', 'Show this message') { |o| puts opts; exit }
16
+ opts.on_tail('-v', '--version', 'Print version') { |o| puts JustCrawl::VERSION; exit }
17
+ end.parse!
18
+
19
+ options.merge!(domain: optparse.first)
20
+
21
+ unless options[:domain]
22
+ puts 'Must provide a domain'
23
+ exit -1
24
+ end
25
+
26
+ options[:domain] = "http://#{options[:domain]}" unless options[:domain].include?('://')
27
+
28
+ crawler = JustCrawl::Engine.new(options)
29
+
30
+ trap('SIGINT') do
31
+ puts "\n\nAborting just_crawl.."
32
+ crawler.summarize
33
+ exit -1
34
+ end
35
+
36
+ crawler.run
37
+ crawler.summarize
38
+
39
+ exit -1 if crawler.errors? || crawler.no_links_found?
data/circle.yml ADDED
@@ -0,0 +1,3 @@
1
+ test:
2
+ override:
3
+ - RAILS_ENV=test bundle exec rspec -r rspec_junit_formatter --format RspecJunitFormatter -o $CIRCLE_TEST_REPORTS/rspec/junit.xml
@@ -0,0 +1,26 @@
1
+ require File.expand_path('../lib/just_crawl/version', __FILE__)
2
+
3
+ Gem::Specification.new do |gem|
4
+ gem.authors = ['Nicolas Sebastian Vidal']
5
+ gem.email = ['nicolas.s.vidal@gmail.com']
6
+ gem.description = 'JustCrawl crawls all pages on a domain, checking for errors'
7
+ gem.summary = 'JustCrawl crawls pages within a domain, reporting any page that returns a bad response code'
8
+ gem.homepage = 'http://github.com/nisevi/just_crawl'
9
+
10
+ gem.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
11
+ gem.files = `git ls-files`.split("\n")
12
+ gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
13
+ gem.name = 'just_crawl'
14
+ gem.require_paths = ['lib']
15
+ gem.required_ruby_version = '>= 2.5.1'
16
+ gem.version = JustCrawl::VERSION
17
+ gem.licenses = ['MIT']
18
+ gem.add_dependency 'em-http-request', '~> 1.1', '>= 1.1.5'
19
+ gem.add_dependency 'eventmachine', '~> 1.2', '>= 1.2.6'
20
+ gem.add_dependency 'nokogiri', '~> 1.8', '>= 1.8.2'
21
+ gem.add_dependency 'rest-client', '~> 2.0', '>= 2.0.2'
22
+ gem.add_development_dependency 'rspec-core', '~> 3.7', '>= 3.7.1'
23
+ gem.add_development_dependency 'rspec-expectations', '~> 3.7'
24
+ gem.add_development_dependency 'rspec_junit_formatter', '~> 0.3.0'
25
+ gem.add_development_dependency 'simplecov', '~> 0.16.1'
26
+ end
@@ -0,0 +1,116 @@
1
+ class JustCrawl::Engine
2
+ DEFAULT_OPTIONS = {
3
+ domain: '',
4
+ start: ['/'],
5
+ username: '',
6
+ password: '',
7
+ verbose: false,
8
+ session_id: false
9
+ }.freeze
10
+
11
+ IGNORE = [/#/, /mailto:/, /skype:/, /logout/, /javascript:/, %r{/xhr/}, /https:/, /\.pdf$/, /^$/, /tel:/].freeze
12
+ VALID_RESPONSE_CODES = [200, 302].freeze
13
+ MAX_REDIRECTS = 3
14
+ LINE_WIDTH = 78
15
+
16
+ attr_reader :options
17
+
18
+ def initialize(caller_options = {})
19
+ @options = DEFAULT_OPTIONS.merge(caller_options)
20
+ @authorization = Base64.encode64("#{options[:username]}:#{options[:password]}")
21
+ @register = JustCrawl::Register.new
22
+
23
+ start_pages = options[:start].to_a.map { |page| Page.new(@register, page, '/') }
24
+
25
+ @register.add(start_pages)
26
+ end
27
+
28
+ def run
29
+ EventMachine.run do
30
+ process_next
31
+ end
32
+ end
33
+
34
+ def process_next
35
+ return if @register.processing_size >= EM.threadpool_size
36
+ if @register.finished?
37
+ EventMachine.stop
38
+ elsif (page = @register.next_page)
39
+ retrieve(page)
40
+ process_next
41
+ end
42
+ end
43
+
44
+ def summarize
45
+ @register.summarize
46
+ end
47
+
48
+ def errors?
49
+ @register.errors?
50
+ end
51
+
52
+ def no_links_found?
53
+ @register.no_links_found?
54
+ end
55
+
56
+ private
57
+
58
+ def retrieve(page)
59
+ puts "Fetching #{page.url} ..." if $verbose
60
+
61
+ absolute_url = options[:domain] + page.relative_url
62
+
63
+ http = EventMachine::HttpRequest.new(absolute_url)
64
+ req = http.get redirects: MAX_REDIRECTS,
65
+ connect_timeout: 20,
66
+ inactivity_timeout: 20,
67
+ head: {
68
+ 'authorization' => [
69
+ options[:username], options[:password]
70
+ ]
71
+ }
72
+ req.errback do
73
+ if req.nil?
74
+ page.intermittent('Req is nil. WAT?')
75
+ elsif msg = req.error
76
+ page.intermittent(msg)
77
+ elsif req.response.nil? || req.response.empty?
78
+ page.intermittent('Timeout?')
79
+ else
80
+ page.intermittent('Partial response: Server Broke Connection?')
81
+ end
82
+ process_next
83
+ end
84
+
85
+ req.callback do
86
+ status_code = req.response_header.status
87
+ if VALID_RESPONSE_CODES.include?(status_code)
88
+ page.success
89
+ if req.response_header['CONTENT_TYPE'] =~ %r{text/html}
90
+ @register.add find_linked_pages(page, req.response.to_str)
91
+ end
92
+ elsif status_code == 503
93
+ page.intermittent('Status code: 503')
94
+ else
95
+ page.fatal("Status code: #{status_code}")
96
+ end
97
+ process_next
98
+ end
99
+ end
100
+
101
+ def find_linked_pages(page, body)
102
+ doc = Nokogiri::HTML(body)
103
+ anchors = doc.css('a').to_a
104
+ anchors.reject! { |anchor| anchor['onclick'].to_s =~ /f.method = 'POST'/ }
105
+ anchors.reject! { |anchor| anchor['data-method'] =~ /put|post|delete/ }
106
+ anchors.reject! { |anchor| anchor['data-remote'] =~ /true/ }
107
+ anchors.reject! { |anchor| anchor['class'].to_s =~ /unobtrusive_/ }
108
+ anchors.reject! { |anchor| anchor['rel'].to_s =~ /nofollow/ }
109
+ raw_links = anchors.map { |anchor| anchor['href'] }
110
+ raw_links.compact!
111
+ raw_links.map! { |link| link.sub(options[:domain], '') }
112
+ raw_links.delete_if { |link| link =~ %r{^http(s)?://} && !link.include?(options[:domain]) }
113
+ raw_links.delete_if { |link| IGNORE.any? { |pattern| link =~ pattern } }
114
+ raw_links.map { |url| Page.new(@register, url, page.url) }
115
+ end
116
+ end
@@ -0,0 +1,29 @@
1
+ class JustCrawl::Failure
2
+ attr_reader :link, :code, :from
3
+
4
+ def initialize(link, code, from)
5
+ @link = link
6
+ @code = code
7
+ @from = from
8
+ end
9
+
10
+ def failure?
11
+ true
12
+ end
13
+
14
+ def error?
15
+ !failure?
16
+ end
17
+
18
+ def name
19
+ link
20
+ end
21
+
22
+ def message
23
+ "Status code was #{code}"
24
+ end
25
+
26
+ def location
27
+ "Linked from #{from}"
28
+ end
29
+ end
@@ -0,0 +1,59 @@
1
+ require 'uri'
2
+
3
+ class Page
4
+ include Comparable
5
+
6
+ attr_reader :register, :url, :source, :error
7
+
8
+ ATTEMPTS = 3
9
+
10
+ def initialize(register, url, source)
11
+ @register = register
12
+ @url = url
13
+ @source = source
14
+ @attempts = 0
15
+ @errors = nil
16
+ end
17
+
18
+ def relative_url
19
+ @relative_url ||= URI.join('http://example.com', source, url).path
20
+ end
21
+
22
+ def <=>(other)
23
+ relative_url <=> other.relative_url
24
+ end
25
+
26
+ def eql?(other)
27
+ relative_url.eql?(other.relative_url)
28
+ end
29
+
30
+ def hash
31
+ relative_url.hash
32
+ end
33
+
34
+ def success
35
+ @error = nil
36
+ @register.completed(self)
37
+ end
38
+
39
+ def fatal(error)
40
+ puts " Fatal - #{error}" if $VERBOSE
41
+ @error = error
42
+ @register.completed(self)
43
+ end
44
+
45
+ def intermittent(error)
46
+ puts " Intermittent - #{error}" if $VERBOSE
47
+ if @attempts >= ATTEMPTS
48
+ @error = error
49
+ @register.completed(self)
50
+ else
51
+ @attempts += 1
52
+ @register.retry(self)
53
+ end
54
+ end
55
+
56
+ def to_s
57
+ "#{url} found on #{source} - #{error || 'OK'}"
58
+ end
59
+ end
@@ -0,0 +1,69 @@
1
+ class JustCrawl::Register
2
+
3
+ Result = Struct.new(:url, :object)
4
+
5
+ def initialize
6
+ @unprocessed = Set.new
7
+ @processing = Set.new
8
+ @processed = Set.new
9
+ end
10
+
11
+ def add(pages)
12
+ new_pages = pages.to_set - @processed - @processing - @unprocessed
13
+ new_pages.each do |new_page|
14
+ puts " Adding #{new_page.url}" if $verbose
15
+ end
16
+ @unprocessed.merge(new_pages)
17
+ end
18
+
19
+ def next_page
20
+ page = @unprocessed.first
21
+ @unprocessed.delete(page)
22
+ @processing << page if page
23
+ if @processing.size > EM.threadpool_size
24
+ puts "WARNING: #{@processing.size} pages are being process when EM threadpool only has #{EM.threadpool_size} threads."
25
+ end
26
+ page
27
+ end
28
+
29
+ def retry(page)
30
+ @unprocessed << page
31
+ @processing.delete(page)
32
+ end
33
+
34
+ def completed(page)
35
+ @processed << page
36
+ @processing.delete(page)
37
+ end
38
+
39
+ def finished?
40
+ (@unprocessed.size + @processing.size).zero?
41
+ end
42
+
43
+ def processing_size
44
+ @processing.size
45
+ end
46
+
47
+ def error_pages
48
+ @processed.select(&:error)
49
+ end
50
+
51
+ def errors?
52
+ !error_pages.empty?
53
+ end
54
+
55
+ def summarize
56
+ if errors?
57
+ puts "\nPages with errors:"
58
+ error_pages.each do |page|
59
+ puts page.to_s
60
+ end
61
+ else
62
+ puts "\n#{@processed.size} pages crawled without errors."
63
+ end
64
+ end
65
+
66
+ def no_links_found?
67
+ @processed.size <= 1
68
+ end
69
+ end
@@ -0,0 +1,7 @@
1
+ class String
2
+ def word_wrap(line_width = 80)
3
+ split("\n").collect do |line|
4
+ line.length > line_width ? line.gsub(/(.{1,#{line_width}})(\s+|$)/, "\\1\n").strip : line
5
+ end * "\n"
6
+ end
7
+ end
@@ -0,0 +1,3 @@
1
+ module JustCrawl
2
+ VERSION = '1.1.8'.freeze
3
+ end
data/lib/just_crawl.rb ADDED
@@ -0,0 +1,18 @@
1
+ require 'nokogiri'
2
+ require 'rest_client'
3
+ require 'eventmachine'
4
+ require 'em-http-request'
5
+ require 'base64'
6
+ require 'set'
7
+ require 'fileutils'
8
+ require 'digest/sha1'
9
+ require 'json'
10
+ require 'tempfile'
11
+ require 'tmpdir'
12
+
13
+ require_relative 'just_crawl/version'
14
+ require_relative 'just_crawl/engine'
15
+ require_relative 'just_crawl/string'
16
+ require_relative 'just_crawl/failure'
17
+ require_relative 'just_crawl/register'
18
+ require_relative 'just_crawl/page'
@@ -0,0 +1,9 @@
1
+ require_relative '../../spec_helper'
2
+
3
+ RSpec.describe 'JustCrawl::Engine' do
4
+ xit '#initialize'
5
+ xit '#run'
6
+ xit '#summarize'
7
+ xit '#errors?'
8
+ xit '#no_links_found?'
9
+ end
@@ -0,0 +1,10 @@
1
+ require_relative '../../spec_helper'
2
+
3
+ RSpec.describe 'JustCrawl::Failure' do
4
+ xit '#initialize'
5
+ xit '#failure?'
6
+ xit '#error?'
7
+ xit '#name'
8
+ xit '#message'
9
+ xit '#location'
10
+ end
@@ -0,0 +1,24 @@
1
+ require_relative '../../spec_helper'
2
+ require './lib/just_crawl/page'
3
+
4
+ RSpec.describe Page do
5
+ xit 'initialize'
6
+
7
+ it '#relative_url' do
8
+ expect(Page.new(:register, '/', '/').relative_url).to eq '/'
9
+ expect(Page.new(:register, './', '/').relative_url).to eq '/'
10
+ expect(Page.new(:register, 'page.html', '').relative_url).to eq '/page.html'
11
+ expect(Page.new(:register, '/interview', '/').relative_url).to eq '/interview'
12
+ expect(Page.new(:register, 'overview.html', '/').relative_url).to eq '/overview.html'
13
+ expect(Page.new(:register, 'post-5.html', '/posts/index.html').relative_url).to eq '/posts/post-5.html'
14
+ expect(Page.new(:register, 'https://staging.alphasights.com/careers/meet-us', '/posts/foo').relative_url).to eq '/careers/meet-us'
15
+ end
16
+
17
+ xit '#<=>'
18
+ xit '#eql?'
19
+ xit '#hash'
20
+ xit '#success'
21
+ xit '#fatal'
22
+ xit '#intermittent'
23
+ xit '#to_s'
24
+ end
@@ -0,0 +1,15 @@
1
+ require_relative '../../spec_helper'
2
+
3
+ RSpec.describe 'JustCrawl::Register' do
4
+ xit '#initialize'
5
+ xit '#add'
6
+ xit '#next_page'
7
+ xit '#retry'
8
+ xit '#completed'
9
+ xit '#finished?'
10
+ xit '#processing_size'
11
+ xit '#error_pages'
12
+ xit '#errors?'
13
+ xit '#summarize'
14
+ xit '#no_links_found?'
15
+ end
@@ -0,0 +1,5 @@
1
+ require_relative '../../spec_helper'
2
+
3
+ RSpec.describe String do
4
+ xit '#word_wrap'
5
+ end
@@ -0,0 +1,8 @@
1
+ require_relative '../../spec_helper'
2
+ require_relative '../../../lib/just_crawl/version'
3
+
4
+ RSpec.describe 'Gem version' do
5
+ it 'should match the current gem version' do
6
+ expect(JustCrawl::VERSION).to eq '1.1.8'
7
+ end
8
+ end
@@ -0,0 +1,5 @@
1
+ require_relative '../spec_helper'
2
+
3
+ RSpec.describe 'JustCrawl require files.' do
4
+ xit 'should load all required files'
5
+ end
@@ -0,0 +1,18 @@
1
+ require 'simplecov'
2
+ SimpleCov.start
3
+
4
+ RSpec.configure do |config|
5
+ config.expect_with :rspec do |expectations|
6
+ expectations.include_chain_clauses_in_custom_matcher_descriptions = true
7
+ end
8
+
9
+ config.default_formatter = 'doc' if config.files_to_run.one?
10
+
11
+ config.shared_context_metadata_behavior = :apply_to_host_groups
12
+ config.filter_run_when_matching :focus
13
+ config.example_status_persistence_file_path = 'spec/examples.txt'
14
+ config.disable_monkey_patching!
15
+ config.warnings = true
16
+ config.order = :random
17
+ Kernel.srand config.seed
18
+ end
metadata ADDED
@@ -0,0 +1,213 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: just_crawl
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.1.8
5
+ platform: ruby
6
+ authors:
7
+ - Nicolas Sebastian Vidal
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2018-05-03 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: em-http-request
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.1'
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 1.1.5
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - "~>"
28
+ - !ruby/object:Gem::Version
29
+ version: '1.1'
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 1.1.5
33
+ - !ruby/object:Gem::Dependency
34
+ name: eventmachine
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '1.2'
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ version: 1.2.6
43
+ type: :runtime
44
+ prerelease: false
45
+ version_requirements: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - "~>"
48
+ - !ruby/object:Gem::Version
49
+ version: '1.2'
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: 1.2.6
53
+ - !ruby/object:Gem::Dependency
54
+ name: nokogiri
55
+ requirement: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - "~>"
58
+ - !ruby/object:Gem::Version
59
+ version: '1.8'
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: 1.8.2
63
+ type: :runtime
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - "~>"
68
+ - !ruby/object:Gem::Version
69
+ version: '1.8'
70
+ - - ">="
71
+ - !ruby/object:Gem::Version
72
+ version: 1.8.2
73
+ - !ruby/object:Gem::Dependency
74
+ name: rest-client
75
+ requirement: !ruby/object:Gem::Requirement
76
+ requirements:
77
+ - - "~>"
78
+ - !ruby/object:Gem::Version
79
+ version: '2.0'
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: 2.0.2
83
+ type: :runtime
84
+ prerelease: false
85
+ version_requirements: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '2.0'
90
+ - - ">="
91
+ - !ruby/object:Gem::Version
92
+ version: 2.0.2
93
+ - !ruby/object:Gem::Dependency
94
+ name: rspec-core
95
+ requirement: !ruby/object:Gem::Requirement
96
+ requirements:
97
+ - - "~>"
98
+ - !ruby/object:Gem::Version
99
+ version: '3.7'
100
+ - - ">="
101
+ - !ruby/object:Gem::Version
102
+ version: 3.7.1
103
+ type: :development
104
+ prerelease: false
105
+ version_requirements: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - "~>"
108
+ - !ruby/object:Gem::Version
109
+ version: '3.7'
110
+ - - ">="
111
+ - !ruby/object:Gem::Version
112
+ version: 3.7.1
113
+ - !ruby/object:Gem::Dependency
114
+ name: rspec-expectations
115
+ requirement: !ruby/object:Gem::Requirement
116
+ requirements:
117
+ - - "~>"
118
+ - !ruby/object:Gem::Version
119
+ version: '3.7'
120
+ type: :development
121
+ prerelease: false
122
+ version_requirements: !ruby/object:Gem::Requirement
123
+ requirements:
124
+ - - "~>"
125
+ - !ruby/object:Gem::Version
126
+ version: '3.7'
127
+ - !ruby/object:Gem::Dependency
128
+ name: rspec_junit_formatter
129
+ requirement: !ruby/object:Gem::Requirement
130
+ requirements:
131
+ - - "~>"
132
+ - !ruby/object:Gem::Version
133
+ version: 0.3.0
134
+ type: :development
135
+ prerelease: false
136
+ version_requirements: !ruby/object:Gem::Requirement
137
+ requirements:
138
+ - - "~>"
139
+ - !ruby/object:Gem::Version
140
+ version: 0.3.0
141
+ - !ruby/object:Gem::Dependency
142
+ name: simplecov
143
+ requirement: !ruby/object:Gem::Requirement
144
+ requirements:
145
+ - - "~>"
146
+ - !ruby/object:Gem::Version
147
+ version: 0.16.1
148
+ type: :development
149
+ prerelease: false
150
+ version_requirements: !ruby/object:Gem::Requirement
151
+ requirements:
152
+ - - "~>"
153
+ - !ruby/object:Gem::Version
154
+ version: 0.16.1
155
+ description: JustCrawl crawls all pages on a domain, checking for errors
156
+ email:
157
+ - nicolas.s.vidal@gmail.com
158
+ executables:
159
+ - just_crawl
160
+ extensions: []
161
+ extra_rdoc_files: []
162
+ files:
163
+ - ".gitignore"
164
+ - ".rspec"
165
+ - ".ruby-version"
166
+ - Gemfile
167
+ - LICENSE.txt
168
+ - README.md
169
+ - Rakefile
170
+ - bin/just_crawl
171
+ - circle.yml
172
+ - just_crawl.gemspec
173
+ - lib/just_crawl.rb
174
+ - lib/just_crawl/engine.rb
175
+ - lib/just_crawl/failure.rb
176
+ - lib/just_crawl/page.rb
177
+ - lib/just_crawl/register.rb
178
+ - lib/just_crawl/string.rb
179
+ - lib/just_crawl/version.rb
180
+ - spec/lib/just_crawl/engine_spec.rb
181
+ - spec/lib/just_crawl/failure_spec.rb
182
+ - spec/lib/just_crawl/page_spec.rb
183
+ - spec/lib/just_crawl/register_spec.rb
184
+ - spec/lib/just_crawl/string_spec.rb
185
+ - spec/lib/just_crawl/version_spec.rb
186
+ - spec/lib/just_crawl_spec.rb
187
+ - spec/spec_helper.rb
188
+ homepage: http://github.com/nisevi/just_crawl
189
+ licenses:
190
+ - MIT
191
+ metadata: {}
192
+ post_install_message:
193
+ rdoc_options: []
194
+ require_paths:
195
+ - lib
196
+ required_ruby_version: !ruby/object:Gem::Requirement
197
+ requirements:
198
+ - - ">="
199
+ - !ruby/object:Gem::Version
200
+ version: 2.5.1
201
+ required_rubygems_version: !ruby/object:Gem::Requirement
202
+ requirements:
203
+ - - ">="
204
+ - !ruby/object:Gem::Version
205
+ version: '0'
206
+ requirements: []
207
+ rubyforge_project:
208
+ rubygems_version: 2.7.6
209
+ signing_key:
210
+ specification_version: 4
211
+ summary: JustCrawl crawls pages within a domain, reporting any page that returns a
212
+ bad response code
213
+ test_files: []