anemone 0.7.0 → 0.7.1
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.rdoc +10 -0
- data/README.rdoc +2 -1
- data/Rakefile +2 -4
- data/VERSION +1 -1
- data/lib/anemone/core.rb +3 -3
- data/lib/anemone/storage.rb +1 -1
- data/spec/storage_spec.rb +54 -29
- metadata +62 -51
data/CHANGELOG.rdoc
CHANGED
@@ -1,3 +1,13 @@
|
|
1
|
+
== 0.7.1 / 2012-01-20
|
2
|
+
|
3
|
+
* Minor enhancements
|
4
|
+
|
5
|
+
* Switch from robots gem (which people reported problems with) to new robotex gem
|
6
|
+
|
7
|
+
* Bug fixes
|
8
|
+
|
9
|
+
* Fix incorrect default file extension for KyotoCabinet
|
10
|
+
|
1
11
|
== 0.7.0 / 2012-01-19
|
2
12
|
|
3
13
|
* Major enhancements
|
data/README.rdoc
CHANGED
@@ -30,8 +30,9 @@ To test and develop this gem, additional requirements are:
|
|
30
30
|
* rspec
|
31
31
|
* fakeweb
|
32
32
|
* tokyocabinet
|
33
|
+
* kyotocabinet-ruby
|
33
34
|
* mongo
|
34
35
|
* redis
|
35
36
|
* sqlite3
|
36
37
|
|
37
|
-
You will need to have {Tokyo Cabinet}[http://fallabs.com/tokyocabinet/], {MongoDB}[http://www.mongodb.org/], and {Redis}[http://code.google.com/p/redis/] installed on your system and running.
|
38
|
+
You will need to have KyotoCabinet, {Tokyo Cabinet}[http://fallabs.com/tokyocabinet/], {MongoDB}[http://www.mongodb.org/], and {Redis}[http://code.google.com/p/redis/] installed on your system and running.
|
data/Rakefile
CHANGED
@@ -1,6 +1,5 @@
|
|
1
|
-
require 'rubygems'
|
2
|
-
require 'rake'
|
3
1
|
require 'rspec/core/rake_task'
|
2
|
+
require 'rake/rdoctask'
|
4
3
|
|
5
4
|
desc "Run all specs"
|
6
5
|
RSpec::Core::RakeTask.new(:rspec) do |spec|
|
@@ -14,8 +13,7 @@ end
|
|
14
13
|
|
15
14
|
task :default => :rspec
|
16
15
|
|
17
|
-
|
18
|
-
RDoc::Task.new do |rdoc|
|
16
|
+
Rake::RDocTask.new(:rdoc) do |rdoc|
|
19
17
|
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
20
18
|
|
21
19
|
rdoc.rdoc_dir = 'rdoc'
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.7.
|
1
|
+
0.7.1
|
data/lib/anemone/core.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
require 'thread'
|
2
|
-
require '
|
2
|
+
require 'robotex'
|
3
3
|
require 'anemone/tentacle'
|
4
4
|
require 'anemone/page'
|
5
5
|
require 'anemone/exceptions'
|
@@ -9,7 +9,7 @@ require 'anemone/storage/base'
|
|
9
9
|
|
10
10
|
module Anemone
|
11
11
|
|
12
|
-
VERSION = '0.7.
|
12
|
+
VERSION = '0.7.1';
|
13
13
|
|
14
14
|
#
|
15
15
|
# Convenience method to start a crawl
|
@@ -199,7 +199,7 @@ module Anemone
|
|
199
199
|
@opts[:threads] = 1 if @opts[:delay] > 0
|
200
200
|
storage = Anemone::Storage::Base.new(@opts[:storage] || Anemone::Storage.Hash)
|
201
201
|
@pages = PageStore.new(storage)
|
202
|
-
@robots =
|
202
|
+
@robots = Robotex.new(@opts[:user_agent]) if @opts[:obey_robots_txt]
|
203
203
|
|
204
204
|
freeze_options
|
205
205
|
end
|
data/lib/anemone/storage.rb
CHANGED
data/spec/storage_spec.rb
CHANGED
@@ -6,45 +6,70 @@ require 'spec_helper'
|
|
6
6
|
module Anemone
|
7
7
|
describe Storage do
|
8
8
|
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
describe ".Hash" do
|
10
|
+
it "returns a Hash adapter" do
|
11
|
+
Anemone::Storage.Hash.should be_an_instance_of(Hash)
|
12
|
+
end
|
12
13
|
end
|
13
14
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
15
|
+
describe ".PStore" do
|
16
|
+
it "returns a PStore adapter" do
|
17
|
+
test_file = 'test.pstore'
|
18
|
+
Anemone::Storage.PStore(test_file).should be_an_instance_of(Anemone::Storage::PStore)
|
19
|
+
end
|
18
20
|
end
|
19
21
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
22
|
+
describe ".TokyoCabinet" do
|
23
|
+
it "returns a TokyoCabinet adapter" do
|
24
|
+
test_file = 'test.tch'
|
25
|
+
store = Anemone::Storage.TokyoCabinet(test_file)
|
26
|
+
store.should be_an_instance_of(Anemone::Storage::TokyoCabinet)
|
27
|
+
store.close
|
28
|
+
end
|
26
29
|
end
|
27
30
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
31
|
+
describe ".KyotoCabinet" do
|
32
|
+
context "when the file is specified" do
|
33
|
+
it "returns a KyotoCabinet adapter using that file" do
|
34
|
+
test_file = 'test.kch'
|
35
|
+
store = Anemone::Storage.KyotoCabinet(test_file)
|
36
|
+
store.should be_an_instance_of(Anemone::Storage::KyotoCabinet)
|
37
|
+
store.close
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
context "when no file is specified" do
|
42
|
+
it "returns a KyotoCabinet adapter using the default filename" do
|
43
|
+
store = Anemone::Storage.KyotoCabinet
|
44
|
+
store.should be_an_instance_of(Anemone::Storage::KyotoCabinet)
|
45
|
+
store.close
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
describe ".SQLite3" do
|
51
|
+
it "returns a SQLite3 adapter" do
|
52
|
+
test_file = 'test.db'
|
53
|
+
store = Anemone::Storage.SQLite3(test_file)
|
54
|
+
store.should be_an_instance_of(Anemone::Storage::SQLite3)
|
55
|
+
store.close
|
56
|
+
end
|
34
57
|
end
|
35
58
|
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
59
|
+
describe ".MongoDB" do
|
60
|
+
it "returns a MongoDB adapter" do
|
61
|
+
store = Anemone::Storage.MongoDB
|
62
|
+
store.should be_an_instance_of(Anemone::Storage::MongoDB)
|
63
|
+
store.close
|
64
|
+
end
|
41
65
|
end
|
42
66
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
67
|
+
describe ".MongoDB" do
|
68
|
+
it "returns a Redis adapter" do
|
69
|
+
store = Anemone::Storage.Redis
|
70
|
+
store.should be_an_instance_of(Anemone::Storage::Redis)
|
71
|
+
store.close
|
72
|
+
end
|
48
73
|
end
|
49
74
|
|
50
75
|
module Storage
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: anemone
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -13,7 +13,7 @@ date: 2012-01-20 00:00:00.000000000Z
|
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
16
|
-
requirement: &
|
16
|
+
requirement: &23166540 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,43 +21,54 @@ dependencies:
|
|
21
21
|
version: 1.3.0
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *23166540
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
|
-
name:
|
27
|
-
requirement: &
|
26
|
+
name: robotex
|
27
|
+
requirement: &23166060 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
31
31
|
- !ruby/object:Gem::Version
|
32
|
-
version: 0.
|
32
|
+
version: 1.0.0
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *23166060
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: rake
|
38
|
-
requirement: &
|
38
|
+
requirement: &23165600 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
42
42
|
- !ruby/object:Gem::Version
|
43
|
-
version: 0.
|
43
|
+
version: 0.9.2
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *23165600
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: rdoc
|
49
|
+
requirement: &23165140 !ruby/object:Gem::Requirement
|
50
|
+
none: false
|
51
|
+
requirements:
|
52
|
+
- - ! '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '3.12'
|
55
|
+
type: :development
|
56
|
+
prerelease: false
|
57
|
+
version_requirements: *23165140
|
47
58
|
- !ruby/object:Gem::Dependency
|
48
59
|
name: rspec
|
49
|
-
requirement: &
|
60
|
+
requirement: &23164680 !ruby/object:Gem::Requirement
|
50
61
|
none: false
|
51
62
|
requirements:
|
52
63
|
- - ! '>='
|
53
64
|
- !ruby/object:Gem::Version
|
54
|
-
version: 2.
|
65
|
+
version: 2.8.0
|
55
66
|
type: :development
|
56
67
|
prerelease: false
|
57
|
-
version_requirements: *
|
68
|
+
version_requirements: *23164680
|
58
69
|
- !ruby/object:Gem::Dependency
|
59
70
|
name: fakeweb
|
60
|
-
requirement: &
|
71
|
+
requirement: &18439940 !ruby/object:Gem::Requirement
|
61
72
|
none: false
|
62
73
|
requirements:
|
63
74
|
- - ! '>='
|
@@ -65,10 +76,10 @@ dependencies:
|
|
65
76
|
version: 1.3.0
|
66
77
|
type: :development
|
67
78
|
prerelease: false
|
68
|
-
version_requirements: *
|
79
|
+
version_requirements: *18439940
|
69
80
|
- !ruby/object:Gem::Dependency
|
70
81
|
name: redis
|
71
|
-
requirement: &
|
82
|
+
requirement: &21684560 !ruby/object:Gem::Requirement
|
72
83
|
none: false
|
73
84
|
requirements:
|
74
85
|
- - ! '>='
|
@@ -76,10 +87,10 @@ dependencies:
|
|
76
87
|
version: 2.2.0
|
77
88
|
type: :development
|
78
89
|
prerelease: false
|
79
|
-
version_requirements: *
|
90
|
+
version_requirements: *21684560
|
80
91
|
- !ruby/object:Gem::Dependency
|
81
92
|
name: mongo
|
82
|
-
requirement: &
|
93
|
+
requirement: &21856420 !ruby/object:Gem::Requirement
|
83
94
|
none: false
|
84
95
|
requirements:
|
85
96
|
- - ! '>='
|
@@ -87,10 +98,10 @@ dependencies:
|
|
87
98
|
version: 1.3.1
|
88
99
|
type: :development
|
89
100
|
prerelease: false
|
90
|
-
version_requirements: *
|
101
|
+
version_requirements: *21856420
|
91
102
|
- !ruby/object:Gem::Dependency
|
92
103
|
name: bson_ext
|
93
|
-
requirement: &
|
104
|
+
requirement: &21924200 !ruby/object:Gem::Requirement
|
94
105
|
none: false
|
95
106
|
requirements:
|
96
107
|
- - ! '>='
|
@@ -98,10 +109,10 @@ dependencies:
|
|
98
109
|
version: 1.3.1
|
99
110
|
type: :development
|
100
111
|
prerelease: false
|
101
|
-
version_requirements: *
|
112
|
+
version_requirements: *21924200
|
102
113
|
- !ruby/object:Gem::Dependency
|
103
114
|
name: tokyocabinet
|
104
|
-
requirement: &
|
115
|
+
requirement: &22177700 !ruby/object:Gem::Requirement
|
105
116
|
none: false
|
106
117
|
requirements:
|
107
118
|
- - ! '>='
|
@@ -109,10 +120,10 @@ dependencies:
|
|
109
120
|
version: '1.29'
|
110
121
|
type: :development
|
111
122
|
prerelease: false
|
112
|
-
version_requirements: *
|
123
|
+
version_requirements: *22177700
|
113
124
|
- !ruby/object:Gem::Dependency
|
114
125
|
name: kyotocabinet-ruby
|
115
|
-
requirement: &
|
126
|
+
requirement: &22411440 !ruby/object:Gem::Requirement
|
116
127
|
none: false
|
117
128
|
requirements:
|
118
129
|
- - ! '>='
|
@@ -120,10 +131,10 @@ dependencies:
|
|
120
131
|
version: 1.27.1
|
121
132
|
type: :development
|
122
133
|
prerelease: false
|
123
|
-
version_requirements: *
|
134
|
+
version_requirements: *22411440
|
124
135
|
- !ruby/object:Gem::Dependency
|
125
136
|
name: sqlite3
|
126
|
-
requirement: &
|
137
|
+
requirement: &22845660 !ruby/object:Gem::Requirement
|
127
138
|
none: false
|
128
139
|
requirements:
|
129
140
|
- - ! '>='
|
@@ -131,7 +142,7 @@ dependencies:
|
|
131
142
|
version: 1.3.4
|
132
143
|
type: :development
|
133
144
|
prerelease: false
|
134
|
-
version_requirements: *
|
145
|
+
version_requirements: *22845660
|
135
146
|
description:
|
136
147
|
email:
|
137
148
|
executables:
|
@@ -145,38 +156,38 @@ files:
|
|
145
156
|
- CHANGELOG.rdoc
|
146
157
|
- README.rdoc
|
147
158
|
- Rakefile
|
159
|
+
- lib/anemone.rb
|
160
|
+
- lib/anemone/cookie_store.rb
|
161
|
+
- lib/anemone/page.rb
|
162
|
+
- lib/anemone/storage.rb
|
163
|
+
- lib/anemone/page_store.rb
|
164
|
+
- lib/anemone/storage/tokyo_cabinet.rb
|
148
165
|
- lib/anemone/storage/pstore.rb
|
166
|
+
- lib/anemone/storage/kyoto_cabinet.rb
|
149
167
|
- lib/anemone/storage/mongodb.rb
|
150
|
-
- lib/anemone/storage/tokyo_cabinet.rb
|
151
168
|
- lib/anemone/storage/exceptions.rb
|
152
|
-
- lib/anemone/storage/redis.rb
|
153
|
-
- lib/anemone/storage/sqlite3.rb
|
154
169
|
- lib/anemone/storage/base.rb
|
155
|
-
- lib/anemone/storage/
|
156
|
-
- lib/anemone/
|
157
|
-
- lib/anemone/
|
170
|
+
- lib/anemone/storage/sqlite3.rb
|
171
|
+
- lib/anemone/storage/redis.rb
|
172
|
+
- lib/anemone/exceptions.rb
|
173
|
+
- lib/anemone/cli.rb
|
158
174
|
- lib/anemone/tentacle.rb
|
159
175
|
- lib/anemone/http.rb
|
160
|
-
- lib/anemone/cli.rb
|
161
|
-
- lib/anemone/page.rb
|
162
|
-
- lib/anemone/exceptions.rb
|
163
176
|
- lib/anemone/core.rb
|
164
|
-
- lib/anemone/cli/url_list.rb
|
165
177
|
- lib/anemone/cli/serialize.rb
|
166
|
-
- lib/anemone/cli/count.rb
|
167
|
-
- lib/anemone/cli/cron.rb
|
168
178
|
- lib/anemone/cli/pagedepth.rb
|
169
|
-
- lib/anemone/
|
170
|
-
- lib/anemone.rb
|
171
|
-
-
|
172
|
-
- spec/page_spec.rb
|
173
|
-
- spec/anemone_spec.rb
|
179
|
+
- lib/anemone/cli/cron.rb
|
180
|
+
- lib/anemone/cli/url_list.rb
|
181
|
+
- lib/anemone/cli/count.rb
|
174
182
|
- spec/core_spec.rb
|
175
|
-
- spec/
|
183
|
+
- spec/anemone_spec.rb
|
184
|
+
- spec/spec_helper.rb
|
176
185
|
- spec/page_store_spec.rb
|
186
|
+
- spec/page_spec.rb
|
177
187
|
- spec/cookie_store_spec.rb
|
188
|
+
- spec/fakeweb_helper.rb
|
178
189
|
- spec/http_spec.rb
|
179
|
-
- spec/
|
190
|
+
- spec/storage_spec.rb
|
180
191
|
- bin/anemone
|
181
192
|
homepage: http://anemone.rubyforge.org
|
182
193
|
licenses: []
|
@@ -207,12 +218,12 @@ signing_key:
|
|
207
218
|
specification_version: 3
|
208
219
|
summary: Anemone web-spider framework
|
209
220
|
test_files:
|
210
|
-
- spec/fakeweb_helper.rb
|
211
|
-
- spec/page_spec.rb
|
212
|
-
- spec/anemone_spec.rb
|
213
221
|
- spec/core_spec.rb
|
214
|
-
- spec/
|
222
|
+
- spec/anemone_spec.rb
|
223
|
+
- spec/spec_helper.rb
|
215
224
|
- spec/page_store_spec.rb
|
225
|
+
- spec/page_spec.rb
|
216
226
|
- spec/cookie_store_spec.rb
|
227
|
+
- spec/fakeweb_helper.rb
|
217
228
|
- spec/http_spec.rb
|
218
|
-
- spec/
|
229
|
+
- spec/storage_spec.rb
|