anemone 0.7.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,13 @@
1
+ == 0.7.1 / 2012-01-20
2
+
3
+ * Minor enhancements
4
+
5
+ * Switch from robots gem (which people reported problems with) to new robotex gem
6
+
7
+ * Bug fixes
8
+
9
+ * Fix incorrect default file extension for KyotoCabinet
10
+
1
11
  == 0.7.0 / 2012-01-19
2
12
 
3
13
  * Major enhancements
@@ -30,8 +30,9 @@ To test and develop this gem, additional requirements are:
30
30
  * rspec
31
31
  * fakeweb
32
32
  * tokyocabinet
33
+ * kyotocabinet-ruby
33
34
  * mongo
34
35
  * redis
35
36
  * sqlite3
36
37
 
37
- You will need to have {Tokyo Cabinet}[http://fallabs.com/tokyocabinet/], {MongoDB}[http://www.mongodb.org/], and {Redis}[http://code.google.com/p/redis/] installed on your system and running.
38
+ You will need to have KyotoCabinet, {Tokyo Cabinet}[http://fallabs.com/tokyocabinet/], {MongoDB}[http://www.mongodb.org/], and {Redis}[http://code.google.com/p/redis/] installed on your system and running.
data/Rakefile CHANGED
@@ -1,6 +1,5 @@
1
- require 'rubygems'
2
- require 'rake'
3
1
  require 'rspec/core/rake_task'
2
+ require 'rake/rdoctask'
4
3
 
5
4
  desc "Run all specs"
6
5
  RSpec::Core::RakeTask.new(:rspec) do |spec|
@@ -14,8 +13,7 @@ end
14
13
 
15
14
  task :default => :rspec
16
15
 
17
- require 'rdoc/task'
18
- RDoc::Task.new do |rdoc|
16
+ Rake::RDocTask.new(:rdoc) do |rdoc|
19
17
  version = File.exist?('VERSION') ? File.read('VERSION') : ""
20
18
 
21
19
  rdoc.rdoc_dir = 'rdoc'
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.7.0
1
+ 0.7.1
@@ -1,5 +1,5 @@
1
1
  require 'thread'
2
- require 'robots'
2
+ require 'robotex'
3
3
  require 'anemone/tentacle'
4
4
  require 'anemone/page'
5
5
  require 'anemone/exceptions'
@@ -9,7 +9,7 @@ require 'anemone/storage/base'
9
9
 
10
10
  module Anemone
11
11
 
12
- VERSION = '0.7.0';
12
+ VERSION = '0.7.1';
13
13
 
14
14
  #
15
15
  # Convenience method to start a crawl
@@ -199,7 +199,7 @@ module Anemone
199
199
  @opts[:threads] = 1 if @opts[:delay] > 0
200
200
  storage = Anemone::Storage::Base.new(@opts[:storage] || Anemone::Storage.Hash)
201
201
  @pages = PageStore.new(storage)
202
- @robots = Robots.new(@opts[:user_agent]) if @opts[:obey_robots_txt]
202
+ @robots = Robotex.new(@opts[:user_agent]) if @opts[:obey_robots_txt]
203
203
 
204
204
  freeze_options
205
205
  end
@@ -18,7 +18,7 @@ module Anemone
18
18
  self::TokyoCabinet.new(file)
19
19
  end
20
20
 
21
- def self.KyotoCabinet(file = 'anemone.tch')
21
+ def self.KyotoCabinet(file = 'anemone.kch')
22
22
  require 'anemone/storage/kyoto_cabinet'
23
23
  self::KyotoCabinet.new(file)
24
24
  end
@@ -6,45 +6,70 @@ require 'spec_helper'
6
6
  module Anemone
7
7
  describe Storage do
8
8
 
9
- it "should have a class method to produce a Hash" do
10
- Anemone::Storage.should respond_to(:Hash)
11
- Anemone::Storage.Hash.should be_an_instance_of(Hash)
9
+ describe ".Hash" do
10
+ it "returns a Hash adapter" do
11
+ Anemone::Storage.Hash.should be_an_instance_of(Hash)
12
+ end
12
13
  end
13
14
 
14
- it "should have a class method to produce a PStore" do
15
- test_file = 'test.pstore'
16
- Anemone::Storage.should respond_to(:PStore)
17
- Anemone::Storage.PStore(test_file).should be_an_instance_of(Anemone::Storage::PStore)
15
+ describe ".PStore" do
16
+ it "returns a PStore adapter" do
17
+ test_file = 'test.pstore'
18
+ Anemone::Storage.PStore(test_file).should be_an_instance_of(Anemone::Storage::PStore)
19
+ end
18
20
  end
19
21
 
20
- it "should have a class method to produce a TokyoCabinet" do
21
- test_file = 'test.tch'
22
- Anemone::Storage.should respond_to(:TokyoCabinet)
23
- store = Anemone::Storage.TokyoCabinet(test_file)
24
- store.should be_an_instance_of(Anemone::Storage::TokyoCabinet)
25
- store.close
22
+ describe ".TokyoCabinet" do
23
+ it "returns a TokyoCabinet adapter" do
24
+ test_file = 'test.tch'
25
+ store = Anemone::Storage.TokyoCabinet(test_file)
26
+ store.should be_an_instance_of(Anemone::Storage::TokyoCabinet)
27
+ store.close
28
+ end
26
29
  end
27
30
 
28
- it "should have a class method to produce a SQLite3" do
29
- test_file = 'test.db'
30
- Anemone::Storage.should respond_to(:SQLite3)
31
- store = Anemone::Storage.SQLite3(test_file)
32
- store.should be_an_instance_of(Anemone::Storage::SQLite3)
33
- store.close
31
+ describe ".KyotoCabinet" do
32
+ context "when the file is specified" do
33
+ it "returns a KyotoCabinet adapter using that file" do
34
+ test_file = 'test.kch'
35
+ store = Anemone::Storage.KyotoCabinet(test_file)
36
+ store.should be_an_instance_of(Anemone::Storage::KyotoCabinet)
37
+ store.close
38
+ end
39
+ end
40
+
41
+ context "when no file is specified" do
42
+ it "returns a KyotoCabinet adapter using the default filename" do
43
+ store = Anemone::Storage.KyotoCabinet
44
+ store.should be_an_instance_of(Anemone::Storage::KyotoCabinet)
45
+ store.close
46
+ end
47
+ end
48
+ end
49
+
50
+ describe ".SQLite3" do
51
+ it "returns a SQLite3 adapter" do
52
+ test_file = 'test.db'
53
+ store = Anemone::Storage.SQLite3(test_file)
54
+ store.should be_an_instance_of(Anemone::Storage::SQLite3)
55
+ store.close
56
+ end
34
57
  end
35
58
 
36
- it "should have a class method to produce a MongoDB" do
37
- Anemone::Storage.should respond_to(:MongoDB)
38
- store = Anemone::Storage.MongoDB
39
- store.should be_an_instance_of(Anemone::Storage::MongoDB)
40
- store.close
59
+ describe ".MongoDB" do
60
+ it "returns a MongoDB adapter" do
61
+ store = Anemone::Storage.MongoDB
62
+ store.should be_an_instance_of(Anemone::Storage::MongoDB)
63
+ store.close
64
+ end
41
65
  end
42
66
 
43
- it "should have a class method to produce a Redis" do
44
- Anemone::Storage.should respond_to(:Redis)
45
- store = Anemone::Storage.Redis
46
- store.should be_an_instance_of(Anemone::Storage::Redis)
47
- store.close
67
+ describe ".MongoDB" do
68
+ it "returns a Redis adapter" do
69
+ store = Anemone::Storage.Redis
70
+ store.should be_an_instance_of(Anemone::Storage::Redis)
71
+ store.close
72
+ end
48
73
  end
49
74
 
50
75
  module Storage
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: anemone
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.0
4
+ version: 0.7.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -13,7 +13,7 @@ date: 2012-01-20 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri
16
- requirement: &19111780 !ruby/object:Gem::Requirement
16
+ requirement: &23166540 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,43 +21,54 @@ dependencies:
21
21
  version: 1.3.0
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *19111780
24
+ version_requirements: *23166540
25
25
  - !ruby/object:Gem::Dependency
26
- name: robots
27
- requirement: &19111300 !ruby/object:Gem::Requirement
26
+ name: robotex
27
+ requirement: &23166060 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
31
31
  - !ruby/object:Gem::Version
32
- version: 0.7.2
32
+ version: 1.0.0
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *19111300
35
+ version_requirements: *23166060
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: rake
38
- requirement: &19141340 !ruby/object:Gem::Requirement
38
+ requirement: &23165600 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
42
42
  - !ruby/object:Gem::Version
43
- version: 0.8.7
43
+ version: 0.9.2
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *19141340
46
+ version_requirements: *23165600
47
+ - !ruby/object:Gem::Dependency
48
+ name: rdoc
49
+ requirement: &23165140 !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '3.12'
55
+ type: :development
56
+ prerelease: false
57
+ version_requirements: *23165140
47
58
  - !ruby/object:Gem::Dependency
48
59
  name: rspec
49
- requirement: &19140880 !ruby/object:Gem::Requirement
60
+ requirement: &23164680 !ruby/object:Gem::Requirement
50
61
  none: false
51
62
  requirements:
52
63
  - - ! '>='
53
64
  - !ruby/object:Gem::Version
54
- version: 2.6.0
65
+ version: 2.8.0
55
66
  type: :development
56
67
  prerelease: false
57
- version_requirements: *19140880
68
+ version_requirements: *23164680
58
69
  - !ruby/object:Gem::Dependency
59
70
  name: fakeweb
60
- requirement: &19140420 !ruby/object:Gem::Requirement
71
+ requirement: &18439940 !ruby/object:Gem::Requirement
61
72
  none: false
62
73
  requirements:
63
74
  - - ! '>='
@@ -65,10 +76,10 @@ dependencies:
65
76
  version: 1.3.0
66
77
  type: :development
67
78
  prerelease: false
68
- version_requirements: *19140420
79
+ version_requirements: *18439940
69
80
  - !ruby/object:Gem::Dependency
70
81
  name: redis
71
- requirement: &19139960 !ruby/object:Gem::Requirement
82
+ requirement: &21684560 !ruby/object:Gem::Requirement
72
83
  none: false
73
84
  requirements:
74
85
  - - ! '>='
@@ -76,10 +87,10 @@ dependencies:
76
87
  version: 2.2.0
77
88
  type: :development
78
89
  prerelease: false
79
- version_requirements: *19139960
90
+ version_requirements: *21684560
80
91
  - !ruby/object:Gem::Dependency
81
92
  name: mongo
82
- requirement: &19139500 !ruby/object:Gem::Requirement
93
+ requirement: &21856420 !ruby/object:Gem::Requirement
83
94
  none: false
84
95
  requirements:
85
96
  - - ! '>='
@@ -87,10 +98,10 @@ dependencies:
87
98
  version: 1.3.1
88
99
  type: :development
89
100
  prerelease: false
90
- version_requirements: *19139500
101
+ version_requirements: *21856420
91
102
  - !ruby/object:Gem::Dependency
92
103
  name: bson_ext
93
- requirement: &19139040 !ruby/object:Gem::Requirement
104
+ requirement: &21924200 !ruby/object:Gem::Requirement
94
105
  none: false
95
106
  requirements:
96
107
  - - ! '>='
@@ -98,10 +109,10 @@ dependencies:
98
109
  version: 1.3.1
99
110
  type: :development
100
111
  prerelease: false
101
- version_requirements: *19139040
112
+ version_requirements: *21924200
102
113
  - !ruby/object:Gem::Dependency
103
114
  name: tokyocabinet
104
- requirement: &19138580 !ruby/object:Gem::Requirement
115
+ requirement: &22177700 !ruby/object:Gem::Requirement
105
116
  none: false
106
117
  requirements:
107
118
  - - ! '>='
@@ -109,10 +120,10 @@ dependencies:
109
120
  version: '1.29'
110
121
  type: :development
111
122
  prerelease: false
112
- version_requirements: *19138580
123
+ version_requirements: *22177700
113
124
  - !ruby/object:Gem::Dependency
114
125
  name: kyotocabinet-ruby
115
- requirement: &19138120 !ruby/object:Gem::Requirement
126
+ requirement: &22411440 !ruby/object:Gem::Requirement
116
127
  none: false
117
128
  requirements:
118
129
  - - ! '>='
@@ -120,10 +131,10 @@ dependencies:
120
131
  version: 1.27.1
121
132
  type: :development
122
133
  prerelease: false
123
- version_requirements: *19138120
134
+ version_requirements: *22411440
124
135
  - !ruby/object:Gem::Dependency
125
136
  name: sqlite3
126
- requirement: &19137660 !ruby/object:Gem::Requirement
137
+ requirement: &22845660 !ruby/object:Gem::Requirement
127
138
  none: false
128
139
  requirements:
129
140
  - - ! '>='
@@ -131,7 +142,7 @@ dependencies:
131
142
  version: 1.3.4
132
143
  type: :development
133
144
  prerelease: false
134
- version_requirements: *19137660
145
+ version_requirements: *22845660
135
146
  description:
136
147
  email:
137
148
  executables:
@@ -145,38 +156,38 @@ files:
145
156
  - CHANGELOG.rdoc
146
157
  - README.rdoc
147
158
  - Rakefile
159
+ - lib/anemone.rb
160
+ - lib/anemone/cookie_store.rb
161
+ - lib/anemone/page.rb
162
+ - lib/anemone/storage.rb
163
+ - lib/anemone/page_store.rb
164
+ - lib/anemone/storage/tokyo_cabinet.rb
148
165
  - lib/anemone/storage/pstore.rb
166
+ - lib/anemone/storage/kyoto_cabinet.rb
149
167
  - lib/anemone/storage/mongodb.rb
150
- - lib/anemone/storage/tokyo_cabinet.rb
151
168
  - lib/anemone/storage/exceptions.rb
152
- - lib/anemone/storage/redis.rb
153
- - lib/anemone/storage/sqlite3.rb
154
169
  - lib/anemone/storage/base.rb
155
- - lib/anemone/storage/kyoto_cabinet.rb
156
- - lib/anemone/page_store.rb
157
- - lib/anemone/storage.rb
170
+ - lib/anemone/storage/sqlite3.rb
171
+ - lib/anemone/storage/redis.rb
172
+ - lib/anemone/exceptions.rb
173
+ - lib/anemone/cli.rb
158
174
  - lib/anemone/tentacle.rb
159
175
  - lib/anemone/http.rb
160
- - lib/anemone/cli.rb
161
- - lib/anemone/page.rb
162
- - lib/anemone/exceptions.rb
163
176
  - lib/anemone/core.rb
164
- - lib/anemone/cli/url_list.rb
165
177
  - lib/anemone/cli/serialize.rb
166
- - lib/anemone/cli/count.rb
167
- - lib/anemone/cli/cron.rb
168
178
  - lib/anemone/cli/pagedepth.rb
169
- - lib/anemone/cookie_store.rb
170
- - lib/anemone.rb
171
- - spec/fakeweb_helper.rb
172
- - spec/page_spec.rb
173
- - spec/anemone_spec.rb
179
+ - lib/anemone/cli/cron.rb
180
+ - lib/anemone/cli/url_list.rb
181
+ - lib/anemone/cli/count.rb
174
182
  - spec/core_spec.rb
175
- - spec/storage_spec.rb
183
+ - spec/anemone_spec.rb
184
+ - spec/spec_helper.rb
176
185
  - spec/page_store_spec.rb
186
+ - spec/page_spec.rb
177
187
  - spec/cookie_store_spec.rb
188
+ - spec/fakeweb_helper.rb
178
189
  - spec/http_spec.rb
179
- - spec/spec_helper.rb
190
+ - spec/storage_spec.rb
180
191
  - bin/anemone
181
192
  homepage: http://anemone.rubyforge.org
182
193
  licenses: []
@@ -207,12 +218,12 @@ signing_key:
207
218
  specification_version: 3
208
219
  summary: Anemone web-spider framework
209
220
  test_files:
210
- - spec/fakeweb_helper.rb
211
- - spec/page_spec.rb
212
- - spec/anemone_spec.rb
213
221
  - spec/core_spec.rb
214
- - spec/storage_spec.rb
222
+ - spec/anemone_spec.rb
223
+ - spec/spec_helper.rb
215
224
  - spec/page_store_spec.rb
225
+ - spec/page_spec.rb
216
226
  - spec/cookie_store_spec.rb
227
+ - spec/fakeweb_helper.rb
217
228
  - spec/http_spec.rb
218
- - spec/spec_helper.rb
229
+ - spec/storage_spec.rb