sutch-anemone 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,91 @@
1
+ $:.unshift(File.dirname(__FILE__))
2
+ require 'spec_helper'
3
+
4
+ # Based on Anemone::Page (i.e., only tests HTML resources)
5
+
6
+ module Anemone
7
+ describe Resource do
8
+
9
+ before(:each) do
10
+ FakeWeb.clean_registry
11
+ @http = Anemone::HTTP.new(:page_class => Anemone::Page)
12
+
13
+ @page = @http.fetch_page(FakePage.new('home', :links => '1').url)
14
+ end
15
+
16
+ describe "#to_hash" do
17
+ it "converts the page to a hash" do
18
+ hash = @page.to_hash
19
+ hash['url'].should == @page.url.to_s
20
+ hash['referer'].should == @page.referer.to_s
21
+ hash['links'].should == @page.links.map(&:to_s)
22
+ end
23
+ end
24
+
25
+ describe "#from_hash" do
26
+ it "converts from a hash to a Page" do
27
+ page = @page.dup
28
+ page.depth = 1
29
+ converted = Page.from_hash(page.to_hash)
30
+ converted.links.should == page.links
31
+ converted.depth.should == page.depth
32
+ end
33
+ end
34
+
35
+ describe "#links" do
36
+ it "should not convert anchors to %23" do
37
+ page = @http.fetch_page(FakePage.new('', :body => '<a href="#top">Top</a>').url)
38
+ page.links.should have(1).link
39
+ page.links.first.to_s.should == SPEC_DOMAIN
40
+ end
41
+ end
42
+
43
+ it "should detect, store and expose the base url for the page head" do
44
+ base = "#{SPEC_DOMAIN}path/to/base_url/"
45
+ page = @http.fetch_page(FakePage.new('body_test', {:base => base}).url)
46
+ page.base.should == URI(base)
47
+ @page.base.should be_nil
48
+ end
49
+
50
+ it "should have a method to convert a relative url to an absolute one" do
51
+ @page.should respond_to(:to_absolute)
52
+
53
+ # Identity
54
+ @page.to_absolute(@page.url).should == @page.url
55
+ @page.to_absolute("").should == @page.url
56
+
57
+ # Root-ness
58
+ @page.to_absolute("/").should == URI("#{SPEC_DOMAIN}")
59
+
60
+ # Relativeness
61
+ relative_path = "a/relative/path"
62
+ @page.to_absolute(relative_path).should == URI("#{SPEC_DOMAIN}#{relative_path}")
63
+
64
+ deep_page = @http.fetch_page(FakePage.new('home/deep', :links => '1').url)
65
+ upward_relative_path = "../a/relative/path"
66
+ deep_page.to_absolute(upward_relative_path).should == URI("#{SPEC_DOMAIN}#{relative_path}")
67
+
68
+ # The base URL case
69
+ base_path = "path/to/base_url/"
70
+ base = "#{SPEC_DOMAIN}#{base_path}"
71
+ page = @http.fetch_page(FakePage.new('home', {:base => base}).url)
72
+
73
+ # Identity
74
+ page.to_absolute(page.url).should == page.url
75
+ # It should revert to the base url
76
+ page.to_absolute("").should_not == page.url
77
+
78
+ # Root-ness
79
+ page.to_absolute("/").should == URI("#{SPEC_DOMAIN}")
80
+
81
+ # Relativeness
82
+ relative_path = "a/relative/path"
83
+ page.to_absolute(relative_path).should == URI("#{base}#{relative_path}")
84
+
85
+ upward_relative_path = "../a/relative/path"
86
+ upward_base = "#{SPEC_DOMAIN}path/to/"
87
+ page.to_absolute(upward_relative_path).should == URI("#{upward_base}#{relative_path}")
88
+ end
89
+
90
+ end
91
+ end
@@ -0,0 +1,9 @@
1
+ require 'rubygems'
2
+ require 'bundler/setup'
3
+ require 'fakeweb'
4
+ require File.dirname(__FILE__) + '/fakeweb_helper'
5
+
6
+ $:.unshift(File.dirname(__FILE__) + '/../lib/')
7
+ require 'anemone'
8
+
9
+ SPEC_DOMAIN = 'http://www.example.com/'
@@ -0,0 +1,252 @@
1
+ $:.unshift(File.dirname(__FILE__))
2
+ require 'spec_helper'
3
+
4
+ %w[pstore tokyo_cabinet kyoto_cabinet sqlite3 mongodb redis].each { |file| require "anemone/storage/#{file}.rb" }
5
+
6
+ module Anemone
7
+ describe Storage do
8
+
9
+ describe ".Hash" do
10
+ it "returns a Hash adapter" do
11
+ Anemone::Storage.Hash.should be_an_instance_of(Hash)
12
+ end
13
+ end
14
+
15
+ describe ".PStore" do
16
+ it "returns a PStore adapter" do
17
+ test_file = 'test.pstore'
18
+ Anemone::Storage.PStore(test_file).should be_an_instance_of(Anemone::Storage::PStore)
19
+ end
20
+ end
21
+
22
+ describe ".TokyoCabinet" do
23
+ it "returns a TokyoCabinet adapter" do
24
+ test_file = 'test.tch'
25
+ store = Anemone::Storage.TokyoCabinet(test_file)
26
+ store.should be_an_instance_of(Anemone::Storage::TokyoCabinet)
27
+ store.close
28
+ end
29
+ end
30
+
31
+ describe ".KyotoCabinet" do
32
+ context "when the file is specified" do
33
+ it "returns a KyotoCabinet adapter using that file" do
34
+ test_file = 'test.kch'
35
+ store = Anemone::Storage.KyotoCabinet(test_file)
36
+ store.should be_an_instance_of(Anemone::Storage::KyotoCabinet)
37
+ store.close
38
+ end
39
+ end
40
+
41
+ context "when no file is specified" do
42
+ it "returns a KyotoCabinet adapter using the default filename" do
43
+ store = Anemone::Storage.KyotoCabinet
44
+ store.should be_an_instance_of(Anemone::Storage::KyotoCabinet)
45
+ store.close
46
+ end
47
+ end
48
+ end
49
+
50
+ describe ".SQLite3" do
51
+ it "returns a SQLite3 adapter" do
52
+ test_file = 'test.db'
53
+ store = Anemone::Storage.SQLite3(test_file)
54
+ store.should be_an_instance_of(Anemone::Storage::SQLite3)
55
+ store.close
56
+ end
57
+ end
58
+
59
+ describe ".MongoDB" do
60
+ it "returns a MongoDB adapter" do
61
+ store = Anemone::Storage.MongoDB
62
+ store.should be_an_instance_of(Anemone::Storage::MongoDB)
63
+ store.close
64
+ end
65
+ end
66
+
67
+ describe ".MongoDB" do
68
+ it "returns a Redis adapter" do
69
+ store = Anemone::Storage.Redis
70
+ store.should be_an_instance_of(Anemone::Storage::Redis)
71
+ store.close
72
+ end
73
+ end
74
+
75
+ module Storage
76
+ shared_examples_for "storage engine" do
77
+
78
+ before(:each) do
79
+ @url = SPEC_DOMAIN
80
+ @page = Page.new(URI(@url))
81
+ end
82
+
83
+ it "should implement [] and []=" do
84
+ @store.should respond_to(:[])
85
+ @store.should respond_to(:[]=)
86
+
87
+ @store[@url] = @page
88
+ @store[@url].url.should == URI(@url)
89
+ end
90
+
91
+ it "should implement has_key?" do
92
+ @store.should respond_to(:has_key?)
93
+
94
+ @store[@url] = @page
95
+ @store.has_key?(@url).should == true
96
+
97
+ @store.has_key?('missing').should == false
98
+ end
99
+
100
+ it "should implement delete" do
101
+ @store.should respond_to(:delete)
102
+
103
+ @store[@url] = @page
104
+ @store.delete(@url).url.should == @page.url
105
+ @store.has_key?(@url).should == false
106
+ end
107
+
108
+ it "should implement keys" do
109
+ @store.should respond_to(:keys)
110
+
111
+ urls = [SPEC_DOMAIN, SPEC_DOMAIN + 'test', SPEC_DOMAIN + 'another']
112
+ pages = urls.map { |url| Page.new(URI(url)) }
113
+ urls.zip(pages).each { |arr| @store[arr[0]] = arr[1] }
114
+
115
+ (@store.keys - urls).should == []
116
+ end
117
+
118
+ it "should implement each" do
119
+ @store.should respond_to(:each)
120
+
121
+ urls = [SPEC_DOMAIN, SPEC_DOMAIN + 'test', SPEC_DOMAIN + 'another']
122
+ pages = urls.map { |url| Page.new(URI(url)) }
123
+ urls.zip(pages).each { |arr| @store[arr[0]] = arr[1] }
124
+
125
+ result = {}
126
+ @store.each { |k, v| result[k] = v }
127
+ (result.keys - urls).should == []
128
+ (result.values.map { |page| page.url.to_s } - urls).should == []
129
+ end
130
+
131
+ it "should implement merge!, and return self" do
132
+ @store.should respond_to(:merge!)
133
+
134
+ hash = {SPEC_DOMAIN => Page.new(URI(SPEC_DOMAIN)),
135
+ SPEC_DOMAIN + 'test' => Page.new(URI(SPEC_DOMAIN + 'test'))}
136
+ merged = @store.merge! hash
137
+ hash.each { |key, value| @store[key].url.to_s.should == key }
138
+
139
+ merged.should === @store
140
+ end
141
+
142
+ it "should correctly deserialize nil redirect_to when loading" do
143
+ @page.redirect_to.should be_nil
144
+ @store[@url] = @page
145
+ @store[@url].redirect_to.should be_nil
146
+ end
147
+ end
148
+
149
+ describe PStore do
150
+ it_should_behave_like "storage engine"
151
+
152
+ before(:each) do
153
+ @test_file = 'test.pstore'
154
+ File.delete @test_file rescue nil
155
+ @store = Anemone::Storage.PStore(@test_file)
156
+ end
157
+
158
+ after(:all) do
159
+ File.delete @test_file rescue nil
160
+ end
161
+ end
162
+
163
+ describe TokyoCabinet do
164
+ it_should_behave_like "storage engine"
165
+
166
+ before(:each) do
167
+ @test_file = 'test.tch'
168
+ File.delete @test_file rescue nil
169
+ @store = Anemone::Storage.TokyoCabinet(@test_file)
170
+ end
171
+
172
+ after(:each) do
173
+ @store.close
174
+ end
175
+
176
+ after(:all) do
177
+ File.delete @test_file rescue nil
178
+ end
179
+
180
+ it "should raise an error if supplied with a file extension other than .tch" do
181
+ lambda { Anemone::Storage.TokyoCabinet('test.tmp') }.should raise_error(RuntimeError)
182
+ end
183
+ end
184
+
185
+ describe KyotoCabinet do
186
+ it_should_behave_like "storage engine"
187
+
188
+ before(:each) do
189
+ @test_file = 'test.kch'
190
+ File.delete @test_file rescue nil
191
+ @store = Anemone::Storage.KyotoCabinet(@test_file)
192
+ end
193
+
194
+ after(:each) do
195
+ @store.close
196
+ end
197
+
198
+ after(:all) do
199
+ File.delete @test_file rescue nil
200
+ end
201
+
202
+ it "should raise an error if supplied with a file extension other than .kch" do
203
+ lambda { Anemone::Storage.KyotoCabinet('test.tmp') }.should raise_error(RuntimeError)
204
+ end
205
+ end
206
+
207
+ describe SQLite3 do
208
+ it_should_behave_like "storage engine"
209
+
210
+ before(:each) do
211
+ @test_file = 'test.db'
212
+ File.delete @test_file rescue nil
213
+ @store = Anemone::Storage.SQLite3(@test_file)
214
+ end
215
+
216
+ after(:each) do
217
+ @store.close
218
+ end
219
+
220
+ after(:all) do
221
+ File.delete @test_file rescue nil
222
+ end
223
+
224
+ end
225
+
226
+ describe Storage::MongoDB do
227
+ it_should_behave_like "storage engine"
228
+
229
+ before(:each) do
230
+ @store = Storage.MongoDB
231
+ end
232
+
233
+ after(:each) do
234
+ @store.close
235
+ end
236
+ end
237
+
238
+ describe Storage::Redis do
239
+ it_should_behave_like "storage engine"
240
+
241
+ before(:each) do
242
+ @store = Storage.Redis
243
+ end
244
+
245
+ after(:each) do
246
+ @store.close
247
+ end
248
+ end
249
+
250
+ end
251
+ end
252
+ end
metadata ADDED
@@ -0,0 +1,281 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: sutch-anemone
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.7.2
5
+ platform: ruby
6
+ authors:
7
+ - Chris Kite (Dennis Sutch's fork)
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-08-01 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ! '>='
18
+ - !ruby/object:Gem::Version
19
+ version: 1.3.0
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ! '>='
25
+ - !ruby/object:Gem::Version
26
+ version: 1.3.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: robotex
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ! '>='
32
+ - !ruby/object:Gem::Version
33
+ version: 1.0.0
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ! '>='
39
+ - !ruby/object:Gem::Version
40
+ version: 1.0.0
41
+ - !ruby/object:Gem::Dependency
42
+ name: content_urls
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ! '>='
46
+ - !ruby/object:Gem::Version
47
+ version: 0.1.8
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: 0.1.8
55
+ - !ruby/object:Gem::Dependency
56
+ name: rake
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: 0.9.2
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ! '>='
67
+ - !ruby/object:Gem::Version
68
+ version: 0.9.2
69
+ - !ruby/object:Gem::Dependency
70
+ name: rdoc
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ! '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '3.12'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ! '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '3.12'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rspec
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ! '>='
88
+ - !ruby/object:Gem::Version
89
+ version: 2.8.0
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ! '>='
95
+ - !ruby/object:Gem::Version
96
+ version: 2.8.0
97
+ - !ruby/object:Gem::Dependency
98
+ name: fakeweb
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ! '>='
102
+ - !ruby/object:Gem::Version
103
+ version: 1.3.0
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ! '>='
109
+ - !ruby/object:Gem::Version
110
+ version: 1.3.0
111
+ - !ruby/object:Gem::Dependency
112
+ name: redis
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ! '>='
116
+ - !ruby/object:Gem::Version
117
+ version: 2.2.0
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ! '>='
123
+ - !ruby/object:Gem::Version
124
+ version: 2.2.0
125
+ - !ruby/object:Gem::Dependency
126
+ name: mongo
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ! '>='
130
+ - !ruby/object:Gem::Version
131
+ version: 1.3.1
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ! '>='
137
+ - !ruby/object:Gem::Version
138
+ version: 1.3.1
139
+ - !ruby/object:Gem::Dependency
140
+ name: bson_ext
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - ! '>='
144
+ - !ruby/object:Gem::Version
145
+ version: 1.3.1
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - ! '>='
151
+ - !ruby/object:Gem::Version
152
+ version: 1.3.1
153
+ - !ruby/object:Gem::Dependency
154
+ name: tokyocabinet
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - ! '>='
158
+ - !ruby/object:Gem::Version
159
+ version: '1.29'
160
+ type: :development
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - ! '>='
165
+ - !ruby/object:Gem::Version
166
+ version: '1.29'
167
+ - !ruby/object:Gem::Dependency
168
+ name: kyotocabinet-ruby
169
+ requirement: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - ! '>='
172
+ - !ruby/object:Gem::Version
173
+ version: 1.27.1
174
+ type: :development
175
+ prerelease: false
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - ! '>='
179
+ - !ruby/object:Gem::Version
180
+ version: 1.27.1
181
+ - !ruby/object:Gem::Dependency
182
+ name: sqlite3
183
+ requirement: !ruby/object:Gem::Requirement
184
+ requirements:
185
+ - - ! '>='
186
+ - !ruby/object:Gem::Version
187
+ version: 1.3.4
188
+ type: :development
189
+ prerelease: false
190
+ version_requirements: !ruby/object:Gem::Requirement
191
+ requirements:
192
+ - - ! '>='
193
+ - !ruby/object:Gem::Version
194
+ version: 1.3.4
195
+ description:
196
+ email:
197
+ executables:
198
+ - anemone
199
+ extensions: []
200
+ extra_rdoc_files:
201
+ - README.rdoc
202
+ files:
203
+ - VERSION
204
+ - LICENSE.txt
205
+ - CHANGELOG.rdoc
206
+ - README.rdoc
207
+ - Rakefile
208
+ - lib/anemone/cli/count.rb
209
+ - lib/anemone/cli/cron.rb
210
+ - lib/anemone/cli/pagedepth.rb
211
+ - lib/anemone/cli/serialize.rb
212
+ - lib/anemone/cli/url_list.rb
213
+ - lib/anemone/cli.rb
214
+ - lib/anemone/cookie_store.rb
215
+ - lib/anemone/core.rb
216
+ - lib/anemone/exceptions.rb
217
+ - lib/anemone/http.rb
218
+ - lib/anemone/page.rb
219
+ - lib/anemone/page_store.rb
220
+ - lib/anemone/resource.rb
221
+ - lib/anemone/storage/base.rb
222
+ - lib/anemone/storage/exceptions.rb
223
+ - lib/anemone/storage/kyoto_cabinet.rb
224
+ - lib/anemone/storage/mongodb.rb
225
+ - lib/anemone/storage/pstore.rb
226
+ - lib/anemone/storage/redis.rb
227
+ - lib/anemone/storage/sqlite3.rb
228
+ - lib/anemone/storage/tokyo_cabinet.rb
229
+ - lib/anemone/storage.rb
230
+ - lib/anemone/tentacle.rb
231
+ - lib/anemone.rb
232
+ - spec/anemone_spec.rb
233
+ - spec/cookie_store_spec.rb
234
+ - spec/core_spec.rb
235
+ - spec/fakeweb_helper.rb
236
+ - spec/http_spec.rb
237
+ - spec/page_spec.rb
238
+ - spec/page_store_spec.rb
239
+ - spec/resource_spec.rb
240
+ - spec/spec_helper.rb
241
+ - spec/storage_spec.rb
242
+ - bin/anemone
243
+ homepage: http://anemone.rubyforge.org
244
+ licenses: []
245
+ metadata: {}
246
+ post_install_message:
247
+ rdoc_options:
248
+ - -m
249
+ - README.rdoc
250
+ - -t
251
+ - Anemone
252
+ require_paths:
253
+ - lib
254
+ required_ruby_version: !ruby/object:Gem::Requirement
255
+ requirements:
256
+ - - ! '>='
257
+ - !ruby/object:Gem::Version
258
+ version: '0'
259
+ required_rubygems_version: !ruby/object:Gem::Requirement
260
+ requirements:
261
+ - - ! '>='
262
+ - !ruby/object:Gem::Version
263
+ version: '0'
264
+ requirements: []
265
+ rubyforge_project: anemone
266
+ rubygems_version: 2.0.6
267
+ signing_key:
268
+ specification_version: 4
269
+ summary: Anemone web-spider framework (sutch's fork)
270
+ test_files:
271
+ - spec/anemone_spec.rb
272
+ - spec/cookie_store_spec.rb
273
+ - spec/core_spec.rb
274
+ - spec/fakeweb_helper.rb
275
+ - spec/http_spec.rb
276
+ - spec/page_spec.rb
277
+ - spec/page_store_spec.rb
278
+ - spec/resource_spec.rb
279
+ - spec/spec_helper.rb
280
+ - spec/storage_spec.rb
281
+ has_rdoc: true