crawl 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/crawl +2 -2
- data/lib/crawl/engine.rb +12 -8
- data/lib/crawl/register.rb +4 -0
- data/lib/crawl/version.rb +1 -1
- metadata +31 -11
data/bin/crawl
CHANGED
data/lib/crawl/engine.rb
CHANGED
@@ -50,6 +50,10 @@ class Crawl::Engine
|
|
50
50
|
@register.errors?
|
51
51
|
end
|
52
52
|
|
53
|
+
def no_links_found?
|
54
|
+
@register.no_links_found?
|
55
|
+
end
|
56
|
+
|
53
57
|
private
|
54
58
|
|
55
59
|
def retrieve(page)
|
@@ -64,14 +68,14 @@ private
|
|
64
68
|
req.errback do
|
65
69
|
if req.nil?
|
66
70
|
page.intermittent("Req is nil. WAT?")
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
71
|
+
elsif msg = req.error
|
72
|
+
page.intermittent(msg)
|
73
|
+
elsif req.response.nil? || req.response.empty?
|
74
|
+
page.intermittent('Timeout?')
|
75
|
+
else
|
76
|
+
page.intermittent('Partial response: Server Broke Connection?')
|
77
|
+
end
|
78
|
+
process_next
|
75
79
|
end
|
76
80
|
|
77
81
|
req.callback do
|
data/lib/crawl/register.rb
CHANGED
data/lib/crawl/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: crawl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2013-01-18 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
16
|
-
requirement:
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,15 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements:
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
25
30
|
- !ruby/object:Gem::Dependency
|
26
31
|
name: rest-client
|
27
|
-
requirement:
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
28
33
|
none: false
|
29
34
|
requirements:
|
30
35
|
- - ! '>='
|
@@ -32,10 +37,15 @@ dependencies:
|
|
32
37
|
version: '0'
|
33
38
|
type: :runtime
|
34
39
|
prerelease: false
|
35
|
-
version_requirements:
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
36
46
|
- !ruby/object:Gem::Dependency
|
37
47
|
name: eventmachine
|
38
|
-
requirement:
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
39
49
|
none: false
|
40
50
|
requirements:
|
41
51
|
- - ~>
|
@@ -43,10 +53,15 @@ dependencies:
|
|
43
53
|
version: 1.0.0
|
44
54
|
type: :runtime
|
45
55
|
prerelease: false
|
46
|
-
version_requirements:
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ~>
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 1.0.0
|
47
62
|
- !ruby/object:Gem::Dependency
|
48
63
|
name: em-http-request
|
49
|
-
requirement:
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
50
65
|
none: false
|
51
66
|
requirements:
|
52
67
|
- - ! '>='
|
@@ -54,7 +69,12 @@ dependencies:
|
|
54
69
|
version: '0'
|
55
70
|
type: :runtime
|
56
71
|
prerelease: false
|
57
|
-
version_requirements:
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
58
78
|
description: Crawl all pages on a domain, checking for errors
|
59
79
|
email:
|
60
80
|
- tor@alphasights.com
|
@@ -96,7 +116,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
96
116
|
version: '0'
|
97
117
|
requirements: []
|
98
118
|
rubyforge_project:
|
99
|
-
rubygems_version: 1.8.
|
119
|
+
rubygems_version: 1.8.23
|
100
120
|
signing_key:
|
101
121
|
specification_version: 3
|
102
122
|
summary: Crawl pages witin a domain, reporting any page that returns a bad response
|