cobweb 0.0.29 → 0.0.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.textile +1 -1
- data/lib/cobweb.rb +7 -2
- metadata +20 -20
data/README.textile
CHANGED
data/lib/cobweb.rb
CHANGED
|
@@ -19,7 +19,7 @@ class Cobweb
|
|
|
19
19
|
# investigate using event machine for single threaded crawling
|
|
20
20
|
|
|
21
21
|
def self.version
|
|
22
|
-
"0.0.
|
|
22
|
+
"0.0.30"
|
|
23
23
|
end
|
|
24
24
|
|
|
25
25
|
def method_missing(method_sym, *arguments, &block)
|
|
@@ -103,6 +103,8 @@ class Cobweb
|
|
|
103
103
|
uri = Addressable::URI.parse(url.strip)
|
|
104
104
|
|
|
105
105
|
# retrieve data
|
|
106
|
+
ap uri.host
|
|
107
|
+
ap uri.inferred_port
|
|
106
108
|
unless @http && @http.address == uri.host && @http.port == uri.inferred_port
|
|
107
109
|
puts "Creating connection to #{uri.host}..." unless @options[:quiet]
|
|
108
110
|
@http = Net::HTTP.new(uri.host, uri.inferred_port)
|
|
@@ -112,6 +114,7 @@ class Cobweb
|
|
|
112
114
|
@http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
|
113
115
|
end
|
|
114
116
|
|
|
117
|
+
ap uri.request_uri
|
|
115
118
|
request_time = Time.now.to_f
|
|
116
119
|
@http.read_timeout = @options[:timeout].to_i
|
|
117
120
|
@http.open_timeout = @options[:timeout].to_i
|
|
@@ -125,7 +128,9 @@ class Cobweb
|
|
|
125
128
|
puts "redirected... " unless @options[:quiet]
|
|
126
129
|
|
|
127
130
|
# get location to redirect to
|
|
128
|
-
|
|
131
|
+
ap "redirecting to #{response['location']}"
|
|
132
|
+
url = Addressable::URI.join(uri, response['location']).to_s
|
|
133
|
+
ap url
|
|
129
134
|
|
|
130
135
|
# decrement redirect limit
|
|
131
136
|
redirect_limit = redirect_limit - 1
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: cobweb
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.0.
|
|
4
|
+
version: 0.0.30
|
|
5
5
|
prerelease:
|
|
6
6
|
platform: ruby
|
|
7
7
|
authors:
|
|
@@ -9,11 +9,11 @@ authors:
|
|
|
9
9
|
autorequire:
|
|
10
10
|
bindir: bin
|
|
11
11
|
cert_chain: []
|
|
12
|
-
date: 2012-03-
|
|
12
|
+
date: 2012-03-15 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
|
14
14
|
- !ruby/object:Gem::Dependency
|
|
15
15
|
name: resque
|
|
16
|
-
requirement: &
|
|
16
|
+
requirement: &70105794152620 !ruby/object:Gem::Requirement
|
|
17
17
|
none: false
|
|
18
18
|
requirements:
|
|
19
19
|
- - ! '>='
|
|
@@ -21,10 +21,10 @@ dependencies:
|
|
|
21
21
|
version: '0'
|
|
22
22
|
type: :runtime
|
|
23
23
|
prerelease: false
|
|
24
|
-
version_requirements: *
|
|
24
|
+
version_requirements: *70105794152620
|
|
25
25
|
- !ruby/object:Gem::Dependency
|
|
26
26
|
name: redis
|
|
27
|
-
requirement: &
|
|
27
|
+
requirement: &70105794152180 !ruby/object:Gem::Requirement
|
|
28
28
|
none: false
|
|
29
29
|
requirements:
|
|
30
30
|
- - ! '>='
|
|
@@ -32,10 +32,10 @@ dependencies:
|
|
|
32
32
|
version: '0'
|
|
33
33
|
type: :runtime
|
|
34
34
|
prerelease: false
|
|
35
|
-
version_requirements: *
|
|
35
|
+
version_requirements: *70105794152180
|
|
36
36
|
- !ruby/object:Gem::Dependency
|
|
37
37
|
name: nokogiri
|
|
38
|
-
requirement: &
|
|
38
|
+
requirement: &70105794151760 !ruby/object:Gem::Requirement
|
|
39
39
|
none: false
|
|
40
40
|
requirements:
|
|
41
41
|
- - ! '>='
|
|
@@ -43,10 +43,10 @@ dependencies:
|
|
|
43
43
|
version: '0'
|
|
44
44
|
type: :runtime
|
|
45
45
|
prerelease: false
|
|
46
|
-
version_requirements: *
|
|
46
|
+
version_requirements: *70105794151760
|
|
47
47
|
- !ruby/object:Gem::Dependency
|
|
48
48
|
name: addressable
|
|
49
|
-
requirement: &
|
|
49
|
+
requirement: &70105794151340 !ruby/object:Gem::Requirement
|
|
50
50
|
none: false
|
|
51
51
|
requirements:
|
|
52
52
|
- - ! '>='
|
|
@@ -54,10 +54,10 @@ dependencies:
|
|
|
54
54
|
version: '0'
|
|
55
55
|
type: :runtime
|
|
56
56
|
prerelease: false
|
|
57
|
-
version_requirements: *
|
|
57
|
+
version_requirements: *70105794151340
|
|
58
58
|
- !ruby/object:Gem::Dependency
|
|
59
59
|
name: rspec
|
|
60
|
-
requirement: &
|
|
60
|
+
requirement: &70105794150920 !ruby/object:Gem::Requirement
|
|
61
61
|
none: false
|
|
62
62
|
requirements:
|
|
63
63
|
- - ! '>='
|
|
@@ -65,10 +65,10 @@ dependencies:
|
|
|
65
65
|
version: '0'
|
|
66
66
|
type: :runtime
|
|
67
67
|
prerelease: false
|
|
68
|
-
version_requirements: *
|
|
68
|
+
version_requirements: *70105794150920
|
|
69
69
|
- !ruby/object:Gem::Dependency
|
|
70
70
|
name: awesome_print
|
|
71
|
-
requirement: &
|
|
71
|
+
requirement: &70105794150500 !ruby/object:Gem::Requirement
|
|
72
72
|
none: false
|
|
73
73
|
requirements:
|
|
74
74
|
- - ! '>='
|
|
@@ -76,10 +76,10 @@ dependencies:
|
|
|
76
76
|
version: '0'
|
|
77
77
|
type: :runtime
|
|
78
78
|
prerelease: false
|
|
79
|
-
version_requirements: *
|
|
79
|
+
version_requirements: *70105794150500
|
|
80
80
|
- !ruby/object:Gem::Dependency
|
|
81
81
|
name: sinatra
|
|
82
|
-
requirement: &
|
|
82
|
+
requirement: &70105794166440 !ruby/object:Gem::Requirement
|
|
83
83
|
none: false
|
|
84
84
|
requirements:
|
|
85
85
|
- - ! '>='
|
|
@@ -87,10 +87,10 @@ dependencies:
|
|
|
87
87
|
version: '0'
|
|
88
88
|
type: :runtime
|
|
89
89
|
prerelease: false
|
|
90
|
-
version_requirements: *
|
|
90
|
+
version_requirements: *70105794166440
|
|
91
91
|
- !ruby/object:Gem::Dependency
|
|
92
92
|
name: thin
|
|
93
|
-
requirement: &
|
|
93
|
+
requirement: &70105794166020 !ruby/object:Gem::Requirement
|
|
94
94
|
none: false
|
|
95
95
|
requirements:
|
|
96
96
|
- - ! '>='
|
|
@@ -98,10 +98,10 @@ dependencies:
|
|
|
98
98
|
version: '0'
|
|
99
99
|
type: :runtime
|
|
100
100
|
prerelease: false
|
|
101
|
-
version_requirements: *
|
|
101
|
+
version_requirements: *70105794166020
|
|
102
102
|
- !ruby/object:Gem::Dependency
|
|
103
103
|
name: haml
|
|
104
|
-
requirement: &
|
|
104
|
+
requirement: &70105794165600 !ruby/object:Gem::Requirement
|
|
105
105
|
none: false
|
|
106
106
|
requirements:
|
|
107
107
|
- - ! '>='
|
|
@@ -109,7 +109,7 @@ dependencies:
|
|
|
109
109
|
version: '0'
|
|
110
110
|
type: :runtime
|
|
111
111
|
prerelease: false
|
|
112
|
-
version_requirements: *
|
|
112
|
+
version_requirements: *70105794165600
|
|
113
113
|
description: Web Crawler that uses resque background job engine to allow you to cluster
|
|
114
114
|
your crawl.
|
|
115
115
|
email: stewart@rockwellcottage.com
|