crawl 1.0.3 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +1 -1
- data/README.md +18 -7
- data/bin/crawl +3 -0
- data/crawl.gemspec +5 -4
- data/lib/crawl/version.rb +1 -1
- metadata +25 -23
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 70e1653730fc26973b11c738e31b30655948fb79
|
4
|
+
data.tar.gz: 43506b21e7ce399c35bef3e7a48f6acff72632ae
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1b96daad9bdfb00388beffec236f13f9463f5df139160fe504779a46881eabc56ee9d357beff2609e933549f8638fbb391d29caa06902cae0aa1b10c3e581e0d
|
7
|
+
data.tar.gz: e4c7521800d88c5984177a9b1801a9dcb507081a3f4d0a309de896422a1ddcc7e4ce8a038f7e0906a83cccd22a3c5d374355592fb6915a52ecfae094fff20e40
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -1,25 +1,36 @@
|
|
1
1
|
# Crawl
|
2
2
|
|
3
|
-
Crawl pages
|
3
|
+
Crawl pages within a domain, reporting any page that returns a bad response code
|
4
4
|
|
5
5
|
Usage:
|
6
6
|
|
7
7
|
> crawl [options] domain
|
8
8
|
|
9
|
+
Usage: crawl [options] domain
|
9
10
|
-s, --start /home,/about Starting path(s), defaults to /
|
10
11
|
-u, --username username Basic auth username
|
11
12
|
-p, --password password Basic auth password
|
13
|
+
-c, --connections count Max mumber of parallel connections to use. The default is 20.
|
12
14
|
-v, --verbose Give details when crawling
|
13
|
-
-m, --markup Validate markup
|
14
15
|
-h, --help Show this message
|
16
|
+
--version Print version
|
17
|
+
|
18
|
+
|
15
19
|
|
16
20
|
Example:
|
17
21
|
|
18
|
-
> crawl
|
22
|
+
> crawl https://engineering.alphasights.com --connections=5 --start=/ --verbose
|
19
23
|
|
20
|
-
Adding /
|
21
|
-
Fetching /
|
24
|
+
Adding /
|
25
|
+
Fetching / ...
|
26
|
+
Adding /positions/ruby-developer
|
27
|
+
Adding /positions/js-ember-developer
|
28
|
+
Adding /positions/ux-ui-designer
|
29
|
+
Adding /positions/support-specialist
|
30
|
+
Fetching /positions/ruby-developer
|
31
|
+
Fetching /positions/js-ember-developer ...
|
32
|
+
Fetching /positions/ux-ui-designer ...
|
33
|
+
Fetching /positions/support-specialist ...
|
22
34
|
|
23
|
-
|
24
|
-
/no-such-page found on the command line - Status code: 404
|
35
|
+
5 pages crawled without errors.
|
25
36
|
|
data/bin/crawl
CHANGED
@@ -2,12 +2,15 @@
|
|
2
2
|
require 'optparse'
|
3
3
|
require_relative '../lib/crawl.rb'
|
4
4
|
|
5
|
+
EM.threadpool_size = 5
|
6
|
+
|
5
7
|
options = {}
|
6
8
|
optparse = OptionParser.new do |opts|
|
7
9
|
opts.banner = "Crawl pages witin a domain, reporting any page that returns a bad response code\nUsage: crawl [options] domain"
|
8
10
|
opts.on('-s', '--start /home,/about', Array, 'Starting path(s), defaults to /') { |o| options[:start] = o }
|
9
11
|
opts.on('-u', '--username username', String, 'Basic auth username') { |o| options[:username] = o }
|
10
12
|
opts.on('-p', '--password password', String, 'Basic auth password') { |o| options[:password] = o }
|
13
|
+
opts.on('-c', '--connections count', Integer, "Max mumber of parallel connections to use. The default is #{EM.threadpool_size}.") { |o| EM.threadpool_size = o }
|
11
14
|
opts.on('-v', '--verbose', 'Give details when crawling') { |o| $verbose = o }
|
12
15
|
opts.on_tail("-h", "--help", "Show this message") { |o| puts opts; exit }
|
13
16
|
opts.on_tail("-v", "--version", "Print version") { |o| puts Crawl::VERSION; exit }
|
data/crawl.gemspec
CHANGED
@@ -14,8 +14,9 @@ Gem::Specification.new do |gem|
|
|
14
14
|
gem.name = "crawl"
|
15
15
|
gem.require_paths = ["lib"]
|
16
16
|
gem.version = Crawl::VERSION
|
17
|
-
gem.
|
18
|
-
gem.add_dependency('
|
19
|
-
gem.add_dependency('
|
20
|
-
gem.add_dependency('
|
17
|
+
gem.licenses = ['MIT']
|
18
|
+
gem.add_dependency('nokogiri', '~> 1.6')
|
19
|
+
gem.add_dependency('rest-client', '~> 1.7')
|
20
|
+
gem.add_dependency('eventmachine', '~> 1.0')
|
21
|
+
gem.add_dependency('em-http-request', '~> 1.1')
|
21
22
|
end
|
data/lib/crawl/version.rb
CHANGED
metadata
CHANGED
@@ -1,71 +1,71 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: crawl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tor Erik Linnerud
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-02-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- -
|
17
|
+
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
19
|
+
version: '1.6'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- -
|
24
|
+
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '
|
26
|
+
version: '1.6'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rest-client
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '1.7'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- -
|
38
|
+
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
40
|
+
version: '1.7'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: eventmachine
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- -
|
45
|
+
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: 1.0
|
47
|
+
version: '1.0'
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- -
|
52
|
+
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: 1.0
|
54
|
+
version: '1.0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: em-http-request
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- -
|
59
|
+
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: 1.
|
61
|
+
version: '1.1'
|
62
62
|
type: :runtime
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
|
-
- -
|
66
|
+
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version: 1.
|
68
|
+
version: '1.1'
|
69
69
|
description: Crawl all pages on a domain, checking for errors
|
70
70
|
email:
|
71
71
|
- tor@alphasights.com
|
@@ -74,7 +74,7 @@ executables:
|
|
74
74
|
extensions: []
|
75
75
|
extra_rdoc_files: []
|
76
76
|
files:
|
77
|
-
- .gitignore
|
77
|
+
- ".gitignore"
|
78
78
|
- Gemfile
|
79
79
|
- README.md
|
80
80
|
- Rakefile
|
@@ -88,7 +88,8 @@ files:
|
|
88
88
|
- lib/crawl/string.rb
|
89
89
|
- lib/crawl/version.rb
|
90
90
|
homepage: http://github.com/alphasights/crawl
|
91
|
-
licenses:
|
91
|
+
licenses:
|
92
|
+
- MIT
|
92
93
|
metadata: {}
|
93
94
|
post_install_message:
|
94
95
|
rdoc_options: []
|
@@ -96,19 +97,20 @@ require_paths:
|
|
96
97
|
- lib
|
97
98
|
required_ruby_version: !ruby/object:Gem::Requirement
|
98
99
|
requirements:
|
99
|
-
- -
|
100
|
+
- - ">="
|
100
101
|
- !ruby/object:Gem::Version
|
101
102
|
version: '0'
|
102
103
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
103
104
|
requirements:
|
104
|
-
- -
|
105
|
+
- - ">="
|
105
106
|
- !ruby/object:Gem::Version
|
106
107
|
version: '0'
|
107
108
|
requirements: []
|
108
109
|
rubyforge_project:
|
109
|
-
rubygems_version: 2.
|
110
|
+
rubygems_version: 2.2.2
|
110
111
|
signing_key:
|
111
112
|
specification_version: 4
|
112
113
|
summary: Crawl pages witin a domain, reporting any page that returns a bad response
|
113
114
|
code
|
114
115
|
test_files: []
|
116
|
+
has_rdoc:
|