cobweb 0.0.75 → 0.0.76
Sign up to get free protection for your applications and to get access to all the features.
- data/README.textile +1 -1
- data/lib/cobweb_version.rb +1 -1
- data/lib/crawl.rb +2 -1
- data/spec/cobweb/cobweb_crawl_spec.rb +36 -0
- data/spec/cobweb/cobweb_job_spec.rb +4 -4
- metadata +25 -24
data/README.textile
CHANGED
data/lib/cobweb_version.rb
CHANGED
data/lib/crawl.rb
CHANGED
@@ -5,7 +5,8 @@ module CobwebModule
|
|
5
5
|
@options = HashUtil.deep_symbolize_keys(options)
|
6
6
|
|
7
7
|
setup_defaults
|
8
|
-
|
8
|
+
|
9
|
+
@redis = Redis::Namespace.new("cobweb-#{Cobweb.version}-#{@options[:crawl_id]}", :redis => Redis.new(@options[:redis_options]))
|
9
10
|
@stats = Stats.new(@options)
|
10
11
|
@debug = @options[:debug]
|
11
12
|
@first_to_finish = false
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
|
2
|
+
|
3
|
+
describe CobwebModule::Crawl, :local_only => true do
|
4
|
+
|
5
|
+
before(:each) do
|
6
|
+
@local_redis = {:host => "127.0.0.1", :port => 6379}
|
7
|
+
@remote_redis = {:host => "192.168.100.16", :port => 6379}
|
8
|
+
|
9
|
+
@request = {:crawl_id => "test_crawl_id"}
|
10
|
+
end
|
11
|
+
|
12
|
+
describe "remote redis" do
|
13
|
+
before(:each) do
|
14
|
+
@local = CobwebModule::Crawl.new(:redis_options => @local_redis)
|
15
|
+
@remote = CobwebModule::Crawl.new(:redis_options => @remote_redis)
|
16
|
+
|
17
|
+
@local.redis.del("test_redis")
|
18
|
+
@remote.redis.del("test_redis")
|
19
|
+
|
20
|
+
end
|
21
|
+
it "should connect to the local redis" do
|
22
|
+
@local.redis.exists("test_redis").should be_false
|
23
|
+
@local.redis.set("test_redis", 1)
|
24
|
+
@local.redis.exists("test_redis").should be_true
|
25
|
+
|
26
|
+
@remote.redis.exists("test_redis").should be_false
|
27
|
+
end
|
28
|
+
it "should connect to the remote redis" do
|
29
|
+
@remote.redis.exists("test_redis").should be_false
|
30
|
+
@remote.redis.set("test_redis", 1)
|
31
|
+
@remote.redis.exists("test_redis").should be_true
|
32
|
+
|
33
|
+
@local.redis.exists("test_redis").should be_false
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -30,7 +30,7 @@ describe Cobweb, :local_only => true do
|
|
30
30
|
:debug => false,
|
31
31
|
:cache => nil
|
32
32
|
}
|
33
|
-
@redis = Redis::Namespace.new("cobweb-#{Cobweb.version}-#{@request[:crawl_id]}", Redis.new)
|
33
|
+
@redis = Redis::Namespace.new("cobweb-#{Cobweb.version}-#{@request[:crawl_id]}", :redis => Redis.new)
|
34
34
|
@cobweb = Cobweb.new @request
|
35
35
|
end
|
36
36
|
it "should not crawl anything if nothing has started" do
|
@@ -63,7 +63,7 @@ describe Cobweb, :local_only => true do
|
|
63
63
|
:debug => false,
|
64
64
|
:cache => nil
|
65
65
|
}
|
66
|
-
@redis = Redis::Namespace.new("cobweb-#{Cobweb.version}-#{@request[:crawl_id]}", Redis.new)
|
66
|
+
@redis = Redis::Namespace.new("cobweb-#{Cobweb.version}-#{@request[:crawl_id]}", :redis => Redis.new)
|
67
67
|
|
68
68
|
@cobweb = Cobweb.new @request
|
69
69
|
end
|
@@ -93,7 +93,7 @@ describe Cobweb, :local_only => true do
|
|
93
93
|
:cache => nil,
|
94
94
|
:valid_mime_types => ["text/html"]
|
95
95
|
}
|
96
|
-
@redis = Redis::Namespace.new("cobweb-#{Cobweb.version}-#{@request[:crawl_id]}", Redis.new)
|
96
|
+
@redis = Redis::Namespace.new("cobweb-#{Cobweb.version}-#{@request[:crawl_id]}", :redis => Redis.new)
|
97
97
|
@cobweb = Cobweb.new @request
|
98
98
|
end
|
99
99
|
|
@@ -118,7 +118,7 @@ describe Cobweb, :local_only => true do
|
|
118
118
|
:debug => false,
|
119
119
|
:cache => nil
|
120
120
|
}
|
121
|
-
@redis = Redis::Namespace.new("cobweb-#{Cobweb.version}-#{@request[:crawl_id]}", Redis.new)
|
121
|
+
@redis = Redis::Namespace.new("cobweb-#{Cobweb.version}-#{@request[:crawl_id]}", :redis => Redis.new)
|
122
122
|
end
|
123
123
|
|
124
124
|
# describe "crawling http://yepadeperrors.wordpress.com/ with limit of 20" do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cobweb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.76
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-10-
|
12
|
+
date: 2012-10-18 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: resque
|
16
|
-
requirement: &
|
16
|
+
requirement: &70301036171860 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70301036171860
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: redis
|
27
|
-
requirement: &
|
27
|
+
requirement: &70301036170940 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '0'
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *70301036170940
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: nokogiri
|
38
|
-
requirement: &
|
38
|
+
requirement: &70301036169780 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: '0'
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *70301036169780
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: addressable
|
49
|
-
requirement: &
|
49
|
+
requirement: &70301036169040 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: '0'
|
55
55
|
type: :runtime
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *70301036169040
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: rspec
|
60
|
-
requirement: &
|
60
|
+
requirement: &70301036167760 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ! '>='
|
@@ -65,10 +65,10 @@ dependencies:
|
|
65
65
|
version: '0'
|
66
66
|
type: :runtime
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *70301036167760
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: awesome_print
|
71
|
-
requirement: &
|
71
|
+
requirement: &70301036166240 !ruby/object:Gem::Requirement
|
72
72
|
none: false
|
73
73
|
requirements:
|
74
74
|
- - ! '>='
|
@@ -76,10 +76,10 @@ dependencies:
|
|
76
76
|
version: '0'
|
77
77
|
type: :runtime
|
78
78
|
prerelease: false
|
79
|
-
version_requirements: *
|
79
|
+
version_requirements: *70301036166240
|
80
80
|
- !ruby/object:Gem::Dependency
|
81
81
|
name: sinatra
|
82
|
-
requirement: &
|
82
|
+
requirement: &70301036164760 !ruby/object:Gem::Requirement
|
83
83
|
none: false
|
84
84
|
requirements:
|
85
85
|
- - ! '>='
|
@@ -87,10 +87,10 @@ dependencies:
|
|
87
87
|
version: '0'
|
88
88
|
type: :runtime
|
89
89
|
prerelease: false
|
90
|
-
version_requirements: *
|
90
|
+
version_requirements: *70301036164760
|
91
91
|
- !ruby/object:Gem::Dependency
|
92
92
|
name: thin
|
93
|
-
requirement: &
|
93
|
+
requirement: &70301036163660 !ruby/object:Gem::Requirement
|
94
94
|
none: false
|
95
95
|
requirements:
|
96
96
|
- - ! '>='
|
@@ -98,10 +98,10 @@ dependencies:
|
|
98
98
|
version: '0'
|
99
99
|
type: :runtime
|
100
100
|
prerelease: false
|
101
|
-
version_requirements: *
|
101
|
+
version_requirements: *70301036163660
|
102
102
|
- !ruby/object:Gem::Dependency
|
103
103
|
name: haml
|
104
|
-
requirement: &
|
104
|
+
requirement: &70301036162760 !ruby/object:Gem::Requirement
|
105
105
|
none: false
|
106
106
|
requirements:
|
107
107
|
- - ! '>='
|
@@ -109,10 +109,10 @@ dependencies:
|
|
109
109
|
version: '0'
|
110
110
|
type: :runtime
|
111
111
|
prerelease: false
|
112
|
-
version_requirements: *
|
112
|
+
version_requirements: *70301036162760
|
113
113
|
- !ruby/object:Gem::Dependency
|
114
114
|
name: namespaced_redis
|
115
|
-
requirement: &
|
115
|
+
requirement: &70301036161620 !ruby/object:Gem::Requirement
|
116
116
|
none: false
|
117
117
|
requirements:
|
118
118
|
- - ! '>='
|
@@ -120,10 +120,10 @@ dependencies:
|
|
120
120
|
version: 1.0.2
|
121
121
|
type: :runtime
|
122
122
|
prerelease: false
|
123
|
-
version_requirements: *
|
123
|
+
version_requirements: *70301036161620
|
124
124
|
- !ruby/object:Gem::Dependency
|
125
125
|
name: json
|
126
|
-
requirement: &
|
126
|
+
requirement: &70301036161100 !ruby/object:Gem::Requirement
|
127
127
|
none: false
|
128
128
|
requirements:
|
129
129
|
- - ! '>='
|
@@ -131,7 +131,7 @@ dependencies:
|
|
131
131
|
version: '0'
|
132
132
|
type: :runtime
|
133
133
|
prerelease: false
|
134
|
-
version_requirements: *
|
134
|
+
version_requirements: *70301036161100
|
135
135
|
description: Cobweb is a web crawler that can use resque to cluster crawls to quickly
|
136
136
|
crawl extremely large sites which is much more perofmant than multi-threaded crawlers. It
|
137
137
|
is also a standalone crawler that has a sophisticated statistics monitoring interface
|
@@ -143,6 +143,7 @@ extra_rdoc_files:
|
|
143
143
|
- README.textile
|
144
144
|
files:
|
145
145
|
- spec/cobweb/cobweb_crawl_helper_spec.rb
|
146
|
+
- spec/cobweb/cobweb_crawl_spec.rb
|
146
147
|
- spec/cobweb/cobweb_crawler_spec.rb
|
147
148
|
- spec/cobweb/cobweb_job_spec.rb
|
148
149
|
- spec/cobweb/cobweb_links_spec.rb
|