anemone 0.7.0 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.rdoc +10 -0
- data/README.rdoc +2 -1
- data/Rakefile +2 -4
- data/VERSION +1 -1
- data/lib/anemone/core.rb +3 -3
- data/lib/anemone/storage.rb +1 -1
- data/spec/storage_spec.rb +54 -29
- metadata +62 -51
data/CHANGELOG.rdoc
CHANGED
@@ -1,3 +1,13 @@
|
|
1
|
+
== 0.7.1 / 2012-01-20
|
2
|
+
|
3
|
+
* Minor enhancements
|
4
|
+
|
5
|
+
* Switch from robots gem (which people reported problems with) to new robotex gem
|
6
|
+
|
7
|
+
* Bug fixes
|
8
|
+
|
9
|
+
* Fix incorrect default file extension for KyotoCabinet
|
10
|
+
|
1
11
|
== 0.7.0 / 2012-01-19
|
2
12
|
|
3
13
|
* Major enhancements
|
data/README.rdoc
CHANGED
@@ -30,8 +30,9 @@ To test and develop this gem, additional requirements are:
|
|
30
30
|
* rspec
|
31
31
|
* fakeweb
|
32
32
|
* tokyocabinet
|
33
|
+
* kyotocabinet-ruby
|
33
34
|
* mongo
|
34
35
|
* redis
|
35
36
|
* sqlite3
|
36
37
|
|
37
|
-
You will need to have {Tokyo Cabinet}[http://fallabs.com/tokyocabinet/], {MongoDB}[http://www.mongodb.org/], and {Redis}[http://code.google.com/p/redis/] installed on your system and running.
|
38
|
+
You will need to have KyotoCabinet, {Tokyo Cabinet}[http://fallabs.com/tokyocabinet/], {MongoDB}[http://www.mongodb.org/], and {Redis}[http://code.google.com/p/redis/] installed on your system and running.
|
data/Rakefile
CHANGED
@@ -1,6 +1,5 @@
|
|
1
|
-
require 'rubygems'
|
2
|
-
require 'rake'
|
3
1
|
require 'rspec/core/rake_task'
|
2
|
+
require 'rake/rdoctask'
|
4
3
|
|
5
4
|
desc "Run all specs"
|
6
5
|
RSpec::Core::RakeTask.new(:rspec) do |spec|
|
@@ -14,8 +13,7 @@ end
|
|
14
13
|
|
15
14
|
task :default => :rspec
|
16
15
|
|
17
|
-
|
18
|
-
RDoc::Task.new do |rdoc|
|
16
|
+
Rake::RDocTask.new(:rdoc) do |rdoc|
|
19
17
|
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
20
18
|
|
21
19
|
rdoc.rdoc_dir = 'rdoc'
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.7.
|
1
|
+
0.7.1
|
data/lib/anemone/core.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
require 'thread'
|
2
|
-
require '
|
2
|
+
require 'robotex'
|
3
3
|
require 'anemone/tentacle'
|
4
4
|
require 'anemone/page'
|
5
5
|
require 'anemone/exceptions'
|
@@ -9,7 +9,7 @@ require 'anemone/storage/base'
|
|
9
9
|
|
10
10
|
module Anemone
|
11
11
|
|
12
|
-
VERSION = '0.7.
|
12
|
+
VERSION = '0.7.1';
|
13
13
|
|
14
14
|
#
|
15
15
|
# Convenience method to start a crawl
|
@@ -199,7 +199,7 @@ module Anemone
|
|
199
199
|
@opts[:threads] = 1 if @opts[:delay] > 0
|
200
200
|
storage = Anemone::Storage::Base.new(@opts[:storage] || Anemone::Storage.Hash)
|
201
201
|
@pages = PageStore.new(storage)
|
202
|
-
@robots =
|
202
|
+
@robots = Robotex.new(@opts[:user_agent]) if @opts[:obey_robots_txt]
|
203
203
|
|
204
204
|
freeze_options
|
205
205
|
end
|
data/lib/anemone/storage.rb
CHANGED
data/spec/storage_spec.rb
CHANGED
@@ -6,45 +6,70 @@ require 'spec_helper'
|
|
6
6
|
module Anemone
|
7
7
|
describe Storage do
|
8
8
|
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
describe ".Hash" do
|
10
|
+
it "returns a Hash adapter" do
|
11
|
+
Anemone::Storage.Hash.should be_an_instance_of(Hash)
|
12
|
+
end
|
12
13
|
end
|
13
14
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
15
|
+
describe ".PStore" do
|
16
|
+
it "returns a PStore adapter" do
|
17
|
+
test_file = 'test.pstore'
|
18
|
+
Anemone::Storage.PStore(test_file).should be_an_instance_of(Anemone::Storage::PStore)
|
19
|
+
end
|
18
20
|
end
|
19
21
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
22
|
+
describe ".TokyoCabinet" do
|
23
|
+
it "returns a TokyoCabinet adapter" do
|
24
|
+
test_file = 'test.tch'
|
25
|
+
store = Anemone::Storage.TokyoCabinet(test_file)
|
26
|
+
store.should be_an_instance_of(Anemone::Storage::TokyoCabinet)
|
27
|
+
store.close
|
28
|
+
end
|
26
29
|
end
|
27
30
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
31
|
+
describe ".KyotoCabinet" do
|
32
|
+
context "when the file is specified" do
|
33
|
+
it "returns a KyotoCabinet adapter using that file" do
|
34
|
+
test_file = 'test.kch'
|
35
|
+
store = Anemone::Storage.KyotoCabinet(test_file)
|
36
|
+
store.should be_an_instance_of(Anemone::Storage::KyotoCabinet)
|
37
|
+
store.close
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
context "when no file is specified" do
|
42
|
+
it "returns a KyotoCabinet adapter using the default filename" do
|
43
|
+
store = Anemone::Storage.KyotoCabinet
|
44
|
+
store.should be_an_instance_of(Anemone::Storage::KyotoCabinet)
|
45
|
+
store.close
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
describe ".SQLite3" do
|
51
|
+
it "returns a SQLite3 adapter" do
|
52
|
+
test_file = 'test.db'
|
53
|
+
store = Anemone::Storage.SQLite3(test_file)
|
54
|
+
store.should be_an_instance_of(Anemone::Storage::SQLite3)
|
55
|
+
store.close
|
56
|
+
end
|
34
57
|
end
|
35
58
|
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
59
|
+
describe ".MongoDB" do
|
60
|
+
it "returns a MongoDB adapter" do
|
61
|
+
store = Anemone::Storage.MongoDB
|
62
|
+
store.should be_an_instance_of(Anemone::Storage::MongoDB)
|
63
|
+
store.close
|
64
|
+
end
|
41
65
|
end
|
42
66
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
67
|
+
describe ".MongoDB" do
|
68
|
+
it "returns a Redis adapter" do
|
69
|
+
store = Anemone::Storage.Redis
|
70
|
+
store.should be_an_instance_of(Anemone::Storage::Redis)
|
71
|
+
store.close
|
72
|
+
end
|
48
73
|
end
|
49
74
|
|
50
75
|
module Storage
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: anemone
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -13,7 +13,7 @@ date: 2012-01-20 00:00:00.000000000Z
|
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
16
|
-
requirement: &
|
16
|
+
requirement: &23166540 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,43 +21,54 @@ dependencies:
|
|
21
21
|
version: 1.3.0
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *23166540
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
|
-
name:
|
27
|
-
requirement: &
|
26
|
+
name: robotex
|
27
|
+
requirement: &23166060 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
31
31
|
- !ruby/object:Gem::Version
|
32
|
-
version: 0.
|
32
|
+
version: 1.0.0
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *23166060
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: rake
|
38
|
-
requirement: &
|
38
|
+
requirement: &23165600 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
42
42
|
- !ruby/object:Gem::Version
|
43
|
-
version: 0.
|
43
|
+
version: 0.9.2
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *23165600
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: rdoc
|
49
|
+
requirement: &23165140 !ruby/object:Gem::Requirement
|
50
|
+
none: false
|
51
|
+
requirements:
|
52
|
+
- - ! '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '3.12'
|
55
|
+
type: :development
|
56
|
+
prerelease: false
|
57
|
+
version_requirements: *23165140
|
47
58
|
- !ruby/object:Gem::Dependency
|
48
59
|
name: rspec
|
49
|
-
requirement: &
|
60
|
+
requirement: &23164680 !ruby/object:Gem::Requirement
|
50
61
|
none: false
|
51
62
|
requirements:
|
52
63
|
- - ! '>='
|
53
64
|
- !ruby/object:Gem::Version
|
54
|
-
version: 2.
|
65
|
+
version: 2.8.0
|
55
66
|
type: :development
|
56
67
|
prerelease: false
|
57
|
-
version_requirements: *
|
68
|
+
version_requirements: *23164680
|
58
69
|
- !ruby/object:Gem::Dependency
|
59
70
|
name: fakeweb
|
60
|
-
requirement: &
|
71
|
+
requirement: &18439940 !ruby/object:Gem::Requirement
|
61
72
|
none: false
|
62
73
|
requirements:
|
63
74
|
- - ! '>='
|
@@ -65,10 +76,10 @@ dependencies:
|
|
65
76
|
version: 1.3.0
|
66
77
|
type: :development
|
67
78
|
prerelease: false
|
68
|
-
version_requirements: *
|
79
|
+
version_requirements: *18439940
|
69
80
|
- !ruby/object:Gem::Dependency
|
70
81
|
name: redis
|
71
|
-
requirement: &
|
82
|
+
requirement: &21684560 !ruby/object:Gem::Requirement
|
72
83
|
none: false
|
73
84
|
requirements:
|
74
85
|
- - ! '>='
|
@@ -76,10 +87,10 @@ dependencies:
|
|
76
87
|
version: 2.2.0
|
77
88
|
type: :development
|
78
89
|
prerelease: false
|
79
|
-
version_requirements: *
|
90
|
+
version_requirements: *21684560
|
80
91
|
- !ruby/object:Gem::Dependency
|
81
92
|
name: mongo
|
82
|
-
requirement: &
|
93
|
+
requirement: &21856420 !ruby/object:Gem::Requirement
|
83
94
|
none: false
|
84
95
|
requirements:
|
85
96
|
- - ! '>='
|
@@ -87,10 +98,10 @@ dependencies:
|
|
87
98
|
version: 1.3.1
|
88
99
|
type: :development
|
89
100
|
prerelease: false
|
90
|
-
version_requirements: *
|
101
|
+
version_requirements: *21856420
|
91
102
|
- !ruby/object:Gem::Dependency
|
92
103
|
name: bson_ext
|
93
|
-
requirement: &
|
104
|
+
requirement: &21924200 !ruby/object:Gem::Requirement
|
94
105
|
none: false
|
95
106
|
requirements:
|
96
107
|
- - ! '>='
|
@@ -98,10 +109,10 @@ dependencies:
|
|
98
109
|
version: 1.3.1
|
99
110
|
type: :development
|
100
111
|
prerelease: false
|
101
|
-
version_requirements: *
|
112
|
+
version_requirements: *21924200
|
102
113
|
- !ruby/object:Gem::Dependency
|
103
114
|
name: tokyocabinet
|
104
|
-
requirement: &
|
115
|
+
requirement: &22177700 !ruby/object:Gem::Requirement
|
105
116
|
none: false
|
106
117
|
requirements:
|
107
118
|
- - ! '>='
|
@@ -109,10 +120,10 @@ dependencies:
|
|
109
120
|
version: '1.29'
|
110
121
|
type: :development
|
111
122
|
prerelease: false
|
112
|
-
version_requirements: *
|
123
|
+
version_requirements: *22177700
|
113
124
|
- !ruby/object:Gem::Dependency
|
114
125
|
name: kyotocabinet-ruby
|
115
|
-
requirement: &
|
126
|
+
requirement: &22411440 !ruby/object:Gem::Requirement
|
116
127
|
none: false
|
117
128
|
requirements:
|
118
129
|
- - ! '>='
|
@@ -120,10 +131,10 @@ dependencies:
|
|
120
131
|
version: 1.27.1
|
121
132
|
type: :development
|
122
133
|
prerelease: false
|
123
|
-
version_requirements: *
|
134
|
+
version_requirements: *22411440
|
124
135
|
- !ruby/object:Gem::Dependency
|
125
136
|
name: sqlite3
|
126
|
-
requirement: &
|
137
|
+
requirement: &22845660 !ruby/object:Gem::Requirement
|
127
138
|
none: false
|
128
139
|
requirements:
|
129
140
|
- - ! '>='
|
@@ -131,7 +142,7 @@ dependencies:
|
|
131
142
|
version: 1.3.4
|
132
143
|
type: :development
|
133
144
|
prerelease: false
|
134
|
-
version_requirements: *
|
145
|
+
version_requirements: *22845660
|
135
146
|
description:
|
136
147
|
email:
|
137
148
|
executables:
|
@@ -145,38 +156,38 @@ files:
|
|
145
156
|
- CHANGELOG.rdoc
|
146
157
|
- README.rdoc
|
147
158
|
- Rakefile
|
159
|
+
- lib/anemone.rb
|
160
|
+
- lib/anemone/cookie_store.rb
|
161
|
+
- lib/anemone/page.rb
|
162
|
+
- lib/anemone/storage.rb
|
163
|
+
- lib/anemone/page_store.rb
|
164
|
+
- lib/anemone/storage/tokyo_cabinet.rb
|
148
165
|
- lib/anemone/storage/pstore.rb
|
166
|
+
- lib/anemone/storage/kyoto_cabinet.rb
|
149
167
|
- lib/anemone/storage/mongodb.rb
|
150
|
-
- lib/anemone/storage/tokyo_cabinet.rb
|
151
168
|
- lib/anemone/storage/exceptions.rb
|
152
|
-
- lib/anemone/storage/redis.rb
|
153
|
-
- lib/anemone/storage/sqlite3.rb
|
154
169
|
- lib/anemone/storage/base.rb
|
155
|
-
- lib/anemone/storage/
|
156
|
-
- lib/anemone/
|
157
|
-
- lib/anemone/
|
170
|
+
- lib/anemone/storage/sqlite3.rb
|
171
|
+
- lib/anemone/storage/redis.rb
|
172
|
+
- lib/anemone/exceptions.rb
|
173
|
+
- lib/anemone/cli.rb
|
158
174
|
- lib/anemone/tentacle.rb
|
159
175
|
- lib/anemone/http.rb
|
160
|
-
- lib/anemone/cli.rb
|
161
|
-
- lib/anemone/page.rb
|
162
|
-
- lib/anemone/exceptions.rb
|
163
176
|
- lib/anemone/core.rb
|
164
|
-
- lib/anemone/cli/url_list.rb
|
165
177
|
- lib/anemone/cli/serialize.rb
|
166
|
-
- lib/anemone/cli/count.rb
|
167
|
-
- lib/anemone/cli/cron.rb
|
168
178
|
- lib/anemone/cli/pagedepth.rb
|
169
|
-
- lib/anemone/
|
170
|
-
- lib/anemone.rb
|
171
|
-
-
|
172
|
-
- spec/page_spec.rb
|
173
|
-
- spec/anemone_spec.rb
|
179
|
+
- lib/anemone/cli/cron.rb
|
180
|
+
- lib/anemone/cli/url_list.rb
|
181
|
+
- lib/anemone/cli/count.rb
|
174
182
|
- spec/core_spec.rb
|
175
|
-
- spec/
|
183
|
+
- spec/anemone_spec.rb
|
184
|
+
- spec/spec_helper.rb
|
176
185
|
- spec/page_store_spec.rb
|
186
|
+
- spec/page_spec.rb
|
177
187
|
- spec/cookie_store_spec.rb
|
188
|
+
- spec/fakeweb_helper.rb
|
178
189
|
- spec/http_spec.rb
|
179
|
-
- spec/
|
190
|
+
- spec/storage_spec.rb
|
180
191
|
- bin/anemone
|
181
192
|
homepage: http://anemone.rubyforge.org
|
182
193
|
licenses: []
|
@@ -207,12 +218,12 @@ signing_key:
|
|
207
218
|
specification_version: 3
|
208
219
|
summary: Anemone web-spider framework
|
209
220
|
test_files:
|
210
|
-
- spec/fakeweb_helper.rb
|
211
|
-
- spec/page_spec.rb
|
212
|
-
- spec/anemone_spec.rb
|
213
221
|
- spec/core_spec.rb
|
214
|
-
- spec/
|
222
|
+
- spec/anemone_spec.rb
|
223
|
+
- spec/spec_helper.rb
|
215
224
|
- spec/page_store_spec.rb
|
225
|
+
- spec/page_spec.rb
|
216
226
|
- spec/cookie_store_spec.rb
|
227
|
+
- spec/fakeweb_helper.rb
|
217
228
|
- spec/http_spec.rb
|
218
|
-
- spec/
|
229
|
+
- spec/storage_spec.rb
|