crawl 1.0.3 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4825e68f15b25c5f26947ea149b6c835ced7bdf8
4
- data.tar.gz: fdebd71a13a40f51cb20a635c5e02ea6dd9a4f6f
3
+ metadata.gz: 70e1653730fc26973b11c738e31b30655948fb79
4
+ data.tar.gz: 43506b21e7ce399c35bef3e7a48f6acff72632ae
5
5
  SHA512:
6
- metadata.gz: 25a485be0c41e3f23b60f5974806757c6a6ab6888bbadbf0df317b3c7bce1825616026a5d1cd0c2feaf223a7af844b3630de0d6e547108d87d77c00b89dd27c0
7
- data.tar.gz: abe1b5261102f0cd498d75caedef412e4bee3a7995401fe3c88ef0619b01c4d18a46904db4d5b778916fbf76f1ae7faf0935758463ab45e389ef98915ff633e7
6
+ metadata.gz: 1b96daad9bdfb00388beffec236f13f9463f5df139160fe504779a46881eabc56ee9d357beff2609e933549f8638fbb391d29caa06902cae0aa1b10c3e581e0d
7
+ data.tar.gz: e4c7521800d88c5984177a9b1801a9dcb507081a3f4d0a309de896422a1ddcc7e4ce8a038f7e0906a83cccd22a3c5d374355592fb6915a52ecfae094fff20e40
data/Gemfile CHANGED
@@ -1,3 +1,3 @@
1
- source 'http://rubygems.org'
1
+ source 'https://rubygems.org'
2
2
 
3
3
  gemspec
data/README.md CHANGED
@@ -1,25 +1,36 @@
1
1
  # Crawl
2
2
 
3
- Crawl pages witin a domain, reporting any page that returns a bad response code
3
+ Crawl pages within a domain, reporting any page that returns a bad response code
4
4
 
5
5
  Usage:
6
6
 
7
7
  > crawl [options] domain
8
8
 
9
+ Usage: crawl [options] domain
9
10
  -s, --start /home,/about Starting path(s), defaults to /
10
11
  -u, --username username Basic auth username
11
12
  -p, --password password Basic auth password
13
+ -c, --connections count Max mumber of parallel connections to use. The default is 20.
12
14
  -v, --verbose Give details when crawling
13
- -m, --markup Validate markup
14
15
  -h, --help Show this message
16
+ --version Print version
17
+
18
+
15
19
 
16
20
  Example:
17
21
 
18
- > crawl http://alphasights.com --start=/no-such-page --verbose
22
+ > crawl https://engineering.alphasights.com --connections=5 --start=/ --verbose
19
23
 
20
- Adding /no-such-page
21
- Fetching /no-such-page ...
24
+ Adding /
25
+ Fetching / ...
26
+ Adding /positions/ruby-developer
27
+ Adding /positions/js-ember-developer
28
+ Adding /positions/ux-ui-designer
29
+ Adding /positions/support-specialist
30
+ Fetching /positions/ruby-developer
31
+ Fetching /positions/js-ember-developer ...
32
+ Fetching /positions/ux-ui-designer ...
33
+ Fetching /positions/support-specialist ...
22
34
 
23
- Pages with errors:
24
- /no-such-page found on the command line - Status code: 404
35
+ 5 pages crawled without errors.
25
36
 
data/bin/crawl CHANGED
@@ -2,12 +2,15 @@
2
2
  require 'optparse'
3
3
  require_relative '../lib/crawl.rb'
4
4
 
5
+ EM.threadpool_size = 5
6
+
5
7
  options = {}
6
8
  optparse = OptionParser.new do |opts|
7
9
  opts.banner = "Crawl pages witin a domain, reporting any page that returns a bad response code\nUsage: crawl [options] domain"
8
10
  opts.on('-s', '--start /home,/about', Array, 'Starting path(s), defaults to /') { |o| options[:start] = o }
9
11
  opts.on('-u', '--username username', String, 'Basic auth username') { |o| options[:username] = o }
10
12
  opts.on('-p', '--password password', String, 'Basic auth password') { |o| options[:password] = o }
13
+ opts.on('-c', '--connections count', Integer, "Max mumber of parallel connections to use. The default is #{EM.threadpool_size}.") { |o| EM.threadpool_size = o }
11
14
  opts.on('-v', '--verbose', 'Give details when crawling') { |o| $verbose = o }
12
15
  opts.on_tail("-h", "--help", "Show this message") { |o| puts opts; exit }
13
16
  opts.on_tail("-v", "--version", "Print version") { |o| puts Crawl::VERSION; exit }
data/crawl.gemspec CHANGED
@@ -14,8 +14,9 @@ Gem::Specification.new do |gem|
14
14
  gem.name = "crawl"
15
15
  gem.require_paths = ["lib"]
16
16
  gem.version = Crawl::VERSION
17
- gem.add_dependency('nokogiri')
18
- gem.add_dependency('rest-client')
19
- gem.add_dependency('eventmachine', '1.0.1')
20
- gem.add_dependency('em-http-request', '1.0.3')
17
+ gem.licenses = ['MIT']
18
+ gem.add_dependency('nokogiri', '~> 1.6')
19
+ gem.add_dependency('rest-client', '~> 1.7')
20
+ gem.add_dependency('eventmachine', '~> 1.0')
21
+ gem.add_dependency('em-http-request', '~> 1.1')
21
22
  end
data/lib/crawl/version.rb CHANGED
@@ -1,4 +1,4 @@
1
1
  # encoding: utf-8
2
2
  module Crawl
3
- VERSION = "1.0.3"
3
+ VERSION = "1.1.0"
4
4
  end
metadata CHANGED
@@ -1,71 +1,71 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: crawl
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.3
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tor Erik Linnerud
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-03-15 00:00:00.000000000 Z
11
+ date: 2015-02-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - '>='
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '0'
19
+ version: '1.6'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - '>='
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '0'
26
+ version: '1.6'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rest-client
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - '>='
31
+ - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '0'
33
+ version: '1.7'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - '>='
38
+ - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '0'
40
+ version: '1.7'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: eventmachine
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - '='
45
+ - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: 1.0.1
47
+ version: '1.0'
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - '='
52
+ - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: 1.0.1
54
+ version: '1.0'
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: em-http-request
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - '='
59
+ - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: 1.0.3
61
+ version: '1.1'
62
62
  type: :runtime
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - '='
66
+ - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: 1.0.3
68
+ version: '1.1'
69
69
  description: Crawl all pages on a domain, checking for errors
70
70
  email:
71
71
  - tor@alphasights.com
@@ -74,7 +74,7 @@ executables:
74
74
  extensions: []
75
75
  extra_rdoc_files: []
76
76
  files:
77
- - .gitignore
77
+ - ".gitignore"
78
78
  - Gemfile
79
79
  - README.md
80
80
  - Rakefile
@@ -88,7 +88,8 @@ files:
88
88
  - lib/crawl/string.rb
89
89
  - lib/crawl/version.rb
90
90
  homepage: http://github.com/alphasights/crawl
91
- licenses: []
91
+ licenses:
92
+ - MIT
92
93
  metadata: {}
93
94
  post_install_message:
94
95
  rdoc_options: []
@@ -96,19 +97,20 @@ require_paths:
96
97
  - lib
97
98
  required_ruby_version: !ruby/object:Gem::Requirement
98
99
  requirements:
99
- - - '>='
100
+ - - ">="
100
101
  - !ruby/object:Gem::Version
101
102
  version: '0'
102
103
  required_rubygems_version: !ruby/object:Gem::Requirement
103
104
  requirements:
104
- - - '>='
105
+ - - ">="
105
106
  - !ruby/object:Gem::Version
106
107
  version: '0'
107
108
  requirements: []
108
109
  rubyforge_project:
109
- rubygems_version: 2.0.0
110
+ rubygems_version: 2.2.2
110
111
  signing_key:
111
112
  specification_version: 4
112
113
  summary: Crawl pages witin a domain, reporting any page that returns a bad response
113
114
  code
114
115
  test_files: []
116
+ has_rdoc: