crawl 1.0.3 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4825e68f15b25c5f26947ea149b6c835ced7bdf8
4
- data.tar.gz: fdebd71a13a40f51cb20a635c5e02ea6dd9a4f6f
3
+ metadata.gz: 70e1653730fc26973b11c738e31b30655948fb79
4
+ data.tar.gz: 43506b21e7ce399c35bef3e7a48f6acff72632ae
5
5
  SHA512:
6
- metadata.gz: 25a485be0c41e3f23b60f5974806757c6a6ab6888bbadbf0df317b3c7bce1825616026a5d1cd0c2feaf223a7af844b3630de0d6e547108d87d77c00b89dd27c0
7
- data.tar.gz: abe1b5261102f0cd498d75caedef412e4bee3a7995401fe3c88ef0619b01c4d18a46904db4d5b778916fbf76f1ae7faf0935758463ab45e389ef98915ff633e7
6
+ metadata.gz: 1b96daad9bdfb00388beffec236f13f9463f5df139160fe504779a46881eabc56ee9d357beff2609e933549f8638fbb391d29caa06902cae0aa1b10c3e581e0d
7
+ data.tar.gz: e4c7521800d88c5984177a9b1801a9dcb507081a3f4d0a309de896422a1ddcc7e4ce8a038f7e0906a83cccd22a3c5d374355592fb6915a52ecfae094fff20e40
data/Gemfile CHANGED
@@ -1,3 +1,3 @@
1
- source 'http://rubygems.org'
1
+ source 'https://rubygems.org'
2
2
 
3
3
  gemspec
data/README.md CHANGED
@@ -1,25 +1,36 @@
1
1
  # Crawl
2
2
 
3
- Crawl pages witin a domain, reporting any page that returns a bad response code
3
+ Crawl pages within a domain, reporting any page that returns a bad response code
4
4
 
5
5
  Usage:
6
6
 
7
7
  > crawl [options] domain
8
8
 
9
+ Usage: crawl [options] domain
9
10
  -s, --start /home,/about Starting path(s), defaults to /
10
11
  -u, --username username Basic auth username
11
12
  -p, --password password Basic auth password
13
+ -c, --connections count Max mumber of parallel connections to use. The default is 20.
12
14
  -v, --verbose Give details when crawling
13
- -m, --markup Validate markup
14
15
  -h, --help Show this message
16
+ --version Print version
17
+
18
+
15
19
 
16
20
  Example:
17
21
 
18
- > crawl http://alphasights.com --start=/no-such-page --verbose
22
+ > crawl https://engineering.alphasights.com --connections=5 --start=/ --verbose
19
23
 
20
- Adding /no-such-page
21
- Fetching /no-such-page ...
24
+ Adding /
25
+ Fetching / ...
26
+ Adding /positions/ruby-developer
27
+ Adding /positions/js-ember-developer
28
+ Adding /positions/ux-ui-designer
29
+ Adding /positions/support-specialist
30
+ Fetching /positions/ruby-developer
31
+ Fetching /positions/js-ember-developer ...
32
+ Fetching /positions/ux-ui-designer ...
33
+ Fetching /positions/support-specialist ...
22
34
 
23
- Pages with errors:
24
- /no-such-page found on the command line - Status code: 404
35
+ 5 pages crawled without errors.
25
36
 
data/bin/crawl CHANGED
@@ -2,12 +2,15 @@
2
2
  require 'optparse'
3
3
  require_relative '../lib/crawl.rb'
4
4
 
5
+ EM.threadpool_size = 5
6
+
5
7
  options = {}
6
8
  optparse = OptionParser.new do |opts|
7
9
  opts.banner = "Crawl pages witin a domain, reporting any page that returns a bad response code\nUsage: crawl [options] domain"
8
10
  opts.on('-s', '--start /home,/about', Array, 'Starting path(s), defaults to /') { |o| options[:start] = o }
9
11
  opts.on('-u', '--username username', String, 'Basic auth username') { |o| options[:username] = o }
10
12
  opts.on('-p', '--password password', String, 'Basic auth password') { |o| options[:password] = o }
13
+ opts.on('-c', '--connections count', Integer, "Max mumber of parallel connections to use. The default is #{EM.threadpool_size}.") { |o| EM.threadpool_size = o }
11
14
  opts.on('-v', '--verbose', 'Give details when crawling') { |o| $verbose = o }
12
15
  opts.on_tail("-h", "--help", "Show this message") { |o| puts opts; exit }
13
16
  opts.on_tail("-v", "--version", "Print version") { |o| puts Crawl::VERSION; exit }
data/crawl.gemspec CHANGED
@@ -14,8 +14,9 @@ Gem::Specification.new do |gem|
14
14
  gem.name = "crawl"
15
15
  gem.require_paths = ["lib"]
16
16
  gem.version = Crawl::VERSION
17
- gem.add_dependency('nokogiri')
18
- gem.add_dependency('rest-client')
19
- gem.add_dependency('eventmachine', '1.0.1')
20
- gem.add_dependency('em-http-request', '1.0.3')
17
+ gem.licenses = ['MIT']
18
+ gem.add_dependency('nokogiri', '~> 1.6')
19
+ gem.add_dependency('rest-client', '~> 1.7')
20
+ gem.add_dependency('eventmachine', '~> 1.0')
21
+ gem.add_dependency('em-http-request', '~> 1.1')
21
22
  end
data/lib/crawl/version.rb CHANGED
@@ -1,4 +1,4 @@
1
1
  # encoding: utf-8
2
2
  module Crawl
3
- VERSION = "1.0.3"
3
+ VERSION = "1.1.0"
4
4
  end
metadata CHANGED
@@ -1,71 +1,71 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: crawl
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.3
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tor Erik Linnerud
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-03-15 00:00:00.000000000 Z
11
+ date: 2015-02-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - '>='
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '0'
19
+ version: '1.6'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - '>='
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '0'
26
+ version: '1.6'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rest-client
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - '>='
31
+ - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '0'
33
+ version: '1.7'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - '>='
38
+ - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '0'
40
+ version: '1.7'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: eventmachine
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - '='
45
+ - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: 1.0.1
47
+ version: '1.0'
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - '='
52
+ - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: 1.0.1
54
+ version: '1.0'
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: em-http-request
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - '='
59
+ - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: 1.0.3
61
+ version: '1.1'
62
62
  type: :runtime
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - '='
66
+ - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: 1.0.3
68
+ version: '1.1'
69
69
  description: Crawl all pages on a domain, checking for errors
70
70
  email:
71
71
  - tor@alphasights.com
@@ -74,7 +74,7 @@ executables:
74
74
  extensions: []
75
75
  extra_rdoc_files: []
76
76
  files:
77
- - .gitignore
77
+ - ".gitignore"
78
78
  - Gemfile
79
79
  - README.md
80
80
  - Rakefile
@@ -88,7 +88,8 @@ files:
88
88
  - lib/crawl/string.rb
89
89
  - lib/crawl/version.rb
90
90
  homepage: http://github.com/alphasights/crawl
91
- licenses: []
91
+ licenses:
92
+ - MIT
92
93
  metadata: {}
93
94
  post_install_message:
94
95
  rdoc_options: []
@@ -96,19 +97,20 @@ require_paths:
96
97
  - lib
97
98
  required_ruby_version: !ruby/object:Gem::Requirement
98
99
  requirements:
99
- - - '>='
100
+ - - ">="
100
101
  - !ruby/object:Gem::Version
101
102
  version: '0'
102
103
  required_rubygems_version: !ruby/object:Gem::Requirement
103
104
  requirements:
104
- - - '>='
105
+ - - ">="
105
106
  - !ruby/object:Gem::Version
106
107
  version: '0'
107
108
  requirements: []
108
109
  rubyforge_project:
109
- rubygems_version: 2.0.0
110
+ rubygems_version: 2.2.2
110
111
  signing_key:
111
112
  specification_version: 4
112
113
  summary: Crawl pages witin a domain, reporting any page that returns a bad response
113
114
  code
114
115
  test_files: []
116
+ has_rdoc: