sutch-anemone 0.7.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,91 @@
1
+ $:.unshift(File.dirname(__FILE__))
2
+ require 'spec_helper'
3
+
4
+ # Based on Anemone::Page (i.e., only tests HTML resources)
5
+
6
+ module Anemone
7
+ describe Resource do
8
+
9
+ before(:each) do
10
+ FakeWeb.clean_registry
11
+ @http = Anemone::HTTP.new(:page_class => Anemone::Page)
12
+
13
+ @page = @http.fetch_page(FakePage.new('home', :links => '1').url)
14
+ end
15
+
16
+ describe "#to_hash" do
17
+ it "converts the page to a hash" do
18
+ hash = @page.to_hash
19
+ hash['url'].should == @page.url.to_s
20
+ hash['referer'].should == @page.referer.to_s
21
+ hash['links'].should == @page.links.map(&:to_s)
22
+ end
23
+ end
24
+
25
+ describe "#from_hash" do
26
+ it "converts from a hash to a Page" do
27
+ page = @page.dup
28
+ page.depth = 1
29
+ converted = Page.from_hash(page.to_hash)
30
+ converted.links.should == page.links
31
+ converted.depth.should == page.depth
32
+ end
33
+ end
34
+
35
+ describe "#links" do
36
+ it "should not convert anchors to %23" do
37
+ page = @http.fetch_page(FakePage.new('', :body => '<a href="#top">Top</a>').url)
38
+ page.links.should have(1).link
39
+ page.links.first.to_s.should == SPEC_DOMAIN
40
+ end
41
+ end
42
+
43
+ it "should detect, store and expose the base url for the page head" do
44
+ base = "#{SPEC_DOMAIN}path/to/base_url/"
45
+ page = @http.fetch_page(FakePage.new('body_test', {:base => base}).url)
46
+ page.base.should == URI(base)
47
+ @page.base.should be_nil
48
+ end
49
+
50
+ it "should have a method to convert a relative url to an absolute one" do
51
+ @page.should respond_to(:to_absolute)
52
+
53
+ # Identity
54
+ @page.to_absolute(@page.url).should == @page.url
55
+ @page.to_absolute("").should == @page.url
56
+
57
+ # Root-ness
58
+ @page.to_absolute("/").should == URI("#{SPEC_DOMAIN}")
59
+
60
+ # Relativeness
61
+ relative_path = "a/relative/path"
62
+ @page.to_absolute(relative_path).should == URI("#{SPEC_DOMAIN}#{relative_path}")
63
+
64
+ deep_page = @http.fetch_page(FakePage.new('home/deep', :links => '1').url)
65
+ upward_relative_path = "../a/relative/path"
66
+ deep_page.to_absolute(upward_relative_path).should == URI("#{SPEC_DOMAIN}#{relative_path}")
67
+
68
+ # The base URL case
69
+ base_path = "path/to/base_url/"
70
+ base = "#{SPEC_DOMAIN}#{base_path}"
71
+ page = @http.fetch_page(FakePage.new('home', {:base => base}).url)
72
+
73
+ # Identity
74
+ page.to_absolute(page.url).should == page.url
75
+ # It should revert to the base url
76
+ page.to_absolute("").should_not == page.url
77
+
78
+ # Root-ness
79
+ page.to_absolute("/").should == URI("#{SPEC_DOMAIN}")
80
+
81
+ # Relativeness
82
+ relative_path = "a/relative/path"
83
+ page.to_absolute(relative_path).should == URI("#{base}#{relative_path}")
84
+
85
+ upward_relative_path = "../a/relative/path"
86
+ upward_base = "#{SPEC_DOMAIN}path/to/"
87
+ page.to_absolute(upward_relative_path).should == URI("#{upward_base}#{relative_path}")
88
+ end
89
+
90
+ end
91
+ end
@@ -0,0 +1,9 @@
1
+ require 'rubygems'
2
+ require 'bundler/setup'
3
+ require 'fakeweb'
4
+ require File.dirname(__FILE__) + '/fakeweb_helper'
5
+
6
+ $:.unshift(File.dirname(__FILE__) + '/../lib/')
7
+ require 'anemone'
8
+
9
+ SPEC_DOMAIN = 'http://www.example.com/'
@@ -0,0 +1,252 @@
1
+ $:.unshift(File.dirname(__FILE__))
2
+ require 'spec_helper'
3
+
4
+ %w[pstore tokyo_cabinet kyoto_cabinet sqlite3 mongodb redis].each { |file| require "anemone/storage/#{file}.rb" }
5
+
6
+ module Anemone
7
+ describe Storage do
8
+
9
+ describe ".Hash" do
10
+ it "returns a Hash adapter" do
11
+ Anemone::Storage.Hash.should be_an_instance_of(Hash)
12
+ end
13
+ end
14
+
15
+ describe ".PStore" do
16
+ it "returns a PStore adapter" do
17
+ test_file = 'test.pstore'
18
+ Anemone::Storage.PStore(test_file).should be_an_instance_of(Anemone::Storage::PStore)
19
+ end
20
+ end
21
+
22
+ describe ".TokyoCabinet" do
23
+ it "returns a TokyoCabinet adapter" do
24
+ test_file = 'test.tch'
25
+ store = Anemone::Storage.TokyoCabinet(test_file)
26
+ store.should be_an_instance_of(Anemone::Storage::TokyoCabinet)
27
+ store.close
28
+ end
29
+ end
30
+
31
+ describe ".KyotoCabinet" do
32
+ context "when the file is specified" do
33
+ it "returns a KyotoCabinet adapter using that file" do
34
+ test_file = 'test.kch'
35
+ store = Anemone::Storage.KyotoCabinet(test_file)
36
+ store.should be_an_instance_of(Anemone::Storage::KyotoCabinet)
37
+ store.close
38
+ end
39
+ end
40
+
41
+ context "when no file is specified" do
42
+ it "returns a KyotoCabinet adapter using the default filename" do
43
+ store = Anemone::Storage.KyotoCabinet
44
+ store.should be_an_instance_of(Anemone::Storage::KyotoCabinet)
45
+ store.close
46
+ end
47
+ end
48
+ end
49
+
50
+ describe ".SQLite3" do
51
+ it "returns a SQLite3 adapter" do
52
+ test_file = 'test.db'
53
+ store = Anemone::Storage.SQLite3(test_file)
54
+ store.should be_an_instance_of(Anemone::Storage::SQLite3)
55
+ store.close
56
+ end
57
+ end
58
+
59
+ describe ".MongoDB" do
60
+ it "returns a MongoDB adapter" do
61
+ store = Anemone::Storage.MongoDB
62
+ store.should be_an_instance_of(Anemone::Storage::MongoDB)
63
+ store.close
64
+ end
65
+ end
66
+
67
+ describe ".MongoDB" do
68
+ it "returns a Redis adapter" do
69
+ store = Anemone::Storage.Redis
70
+ store.should be_an_instance_of(Anemone::Storage::Redis)
71
+ store.close
72
+ end
73
+ end
74
+
75
+ module Storage
76
+ shared_examples_for "storage engine" do
77
+
78
+ before(:each) do
79
+ @url = SPEC_DOMAIN
80
+ @page = Page.new(URI(@url))
81
+ end
82
+
83
+ it "should implement [] and []=" do
84
+ @store.should respond_to(:[])
85
+ @store.should respond_to(:[]=)
86
+
87
+ @store[@url] = @page
88
+ @store[@url].url.should == URI(@url)
89
+ end
90
+
91
+ it "should implement has_key?" do
92
+ @store.should respond_to(:has_key?)
93
+
94
+ @store[@url] = @page
95
+ @store.has_key?(@url).should == true
96
+
97
+ @store.has_key?('missing').should == false
98
+ end
99
+
100
+ it "should implement delete" do
101
+ @store.should respond_to(:delete)
102
+
103
+ @store[@url] = @page
104
+ @store.delete(@url).url.should == @page.url
105
+ @store.has_key?(@url).should == false
106
+ end
107
+
108
+ it "should implement keys" do
109
+ @store.should respond_to(:keys)
110
+
111
+ urls = [SPEC_DOMAIN, SPEC_DOMAIN + 'test', SPEC_DOMAIN + 'another']
112
+ pages = urls.map { |url| Page.new(URI(url)) }
113
+ urls.zip(pages).each { |arr| @store[arr[0]] = arr[1] }
114
+
115
+ (@store.keys - urls).should == []
116
+ end
117
+
118
+ it "should implement each" do
119
+ @store.should respond_to(:each)
120
+
121
+ urls = [SPEC_DOMAIN, SPEC_DOMAIN + 'test', SPEC_DOMAIN + 'another']
122
+ pages = urls.map { |url| Page.new(URI(url)) }
123
+ urls.zip(pages).each { |arr| @store[arr[0]] = arr[1] }
124
+
125
+ result = {}
126
+ @store.each { |k, v| result[k] = v }
127
+ (result.keys - urls).should == []
128
+ (result.values.map { |page| page.url.to_s } - urls).should == []
129
+ end
130
+
131
+ it "should implement merge!, and return self" do
132
+ @store.should respond_to(:merge!)
133
+
134
+ hash = {SPEC_DOMAIN => Page.new(URI(SPEC_DOMAIN)),
135
+ SPEC_DOMAIN + 'test' => Page.new(URI(SPEC_DOMAIN + 'test'))}
136
+ merged = @store.merge! hash
137
+ hash.each { |key, value| @store[key].url.to_s.should == key }
138
+
139
+ merged.should === @store
140
+ end
141
+
142
+ it "should correctly deserialize nil redirect_to when loading" do
143
+ @page.redirect_to.should be_nil
144
+ @store[@url] = @page
145
+ @store[@url].redirect_to.should be_nil
146
+ end
147
+ end
148
+
149
+ describe PStore do
150
+ it_should_behave_like "storage engine"
151
+
152
+ before(:each) do
153
+ @test_file = 'test.pstore'
154
+ File.delete @test_file rescue nil
155
+ @store = Anemone::Storage.PStore(@test_file)
156
+ end
157
+
158
+ after(:all) do
159
+ File.delete @test_file rescue nil
160
+ end
161
+ end
162
+
163
+ describe TokyoCabinet do
164
+ it_should_behave_like "storage engine"
165
+
166
+ before(:each) do
167
+ @test_file = 'test.tch'
168
+ File.delete @test_file rescue nil
169
+ @store = Anemone::Storage.TokyoCabinet(@test_file)
170
+ end
171
+
172
+ after(:each) do
173
+ @store.close
174
+ end
175
+
176
+ after(:all) do
177
+ File.delete @test_file rescue nil
178
+ end
179
+
180
+ it "should raise an error if supplied with a file extension other than .tch" do
181
+ lambda { Anemone::Storage.TokyoCabinet('test.tmp') }.should raise_error(RuntimeError)
182
+ end
183
+ end
184
+
185
+ describe KyotoCabinet do
186
+ it_should_behave_like "storage engine"
187
+
188
+ before(:each) do
189
+ @test_file = 'test.kch'
190
+ File.delete @test_file rescue nil
191
+ @store = Anemone::Storage.KyotoCabinet(@test_file)
192
+ end
193
+
194
+ after(:each) do
195
+ @store.close
196
+ end
197
+
198
+ after(:all) do
199
+ File.delete @test_file rescue nil
200
+ end
201
+
202
+ it "should raise an error if supplied with a file extension other than .kch" do
203
+ lambda { Anemone::Storage.KyotoCabinet('test.tmp') }.should raise_error(RuntimeError)
204
+ end
205
+ end
206
+
207
+ describe SQLite3 do
208
+ it_should_behave_like "storage engine"
209
+
210
+ before(:each) do
211
+ @test_file = 'test.db'
212
+ File.delete @test_file rescue nil
213
+ @store = Anemone::Storage.SQLite3(@test_file)
214
+ end
215
+
216
+ after(:each) do
217
+ @store.close
218
+ end
219
+
220
+ after(:all) do
221
+ File.delete @test_file rescue nil
222
+ end
223
+
224
+ end
225
+
226
+ describe Storage::MongoDB do
227
+ it_should_behave_like "storage engine"
228
+
229
+ before(:each) do
230
+ @store = Storage.MongoDB
231
+ end
232
+
233
+ after(:each) do
234
+ @store.close
235
+ end
236
+ end
237
+
238
+ describe Storage::Redis do
239
+ it_should_behave_like "storage engine"
240
+
241
+ before(:each) do
242
+ @store = Storage.Redis
243
+ end
244
+
245
+ after(:each) do
246
+ @store.close
247
+ end
248
+ end
249
+
250
+ end
251
+ end
252
+ end
metadata ADDED
@@ -0,0 +1,281 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: sutch-anemone
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.7.2
5
+ platform: ruby
6
+ authors:
7
+ - Chris Kite (Dennis Sutch's fork)
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-08-01 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ! '>='
18
+ - !ruby/object:Gem::Version
19
+ version: 1.3.0
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ! '>='
25
+ - !ruby/object:Gem::Version
26
+ version: 1.3.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: robotex
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ! '>='
32
+ - !ruby/object:Gem::Version
33
+ version: 1.0.0
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ! '>='
39
+ - !ruby/object:Gem::Version
40
+ version: 1.0.0
41
+ - !ruby/object:Gem::Dependency
42
+ name: content_urls
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ! '>='
46
+ - !ruby/object:Gem::Version
47
+ version: 0.1.8
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: 0.1.8
55
+ - !ruby/object:Gem::Dependency
56
+ name: rake
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: 0.9.2
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ! '>='
67
+ - !ruby/object:Gem::Version
68
+ version: 0.9.2
69
+ - !ruby/object:Gem::Dependency
70
+ name: rdoc
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ! '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '3.12'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ! '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '3.12'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rspec
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ! '>='
88
+ - !ruby/object:Gem::Version
89
+ version: 2.8.0
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ! '>='
95
+ - !ruby/object:Gem::Version
96
+ version: 2.8.0
97
+ - !ruby/object:Gem::Dependency
98
+ name: fakeweb
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ! '>='
102
+ - !ruby/object:Gem::Version
103
+ version: 1.3.0
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ! '>='
109
+ - !ruby/object:Gem::Version
110
+ version: 1.3.0
111
+ - !ruby/object:Gem::Dependency
112
+ name: redis
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ! '>='
116
+ - !ruby/object:Gem::Version
117
+ version: 2.2.0
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ! '>='
123
+ - !ruby/object:Gem::Version
124
+ version: 2.2.0
125
+ - !ruby/object:Gem::Dependency
126
+ name: mongo
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ! '>='
130
+ - !ruby/object:Gem::Version
131
+ version: 1.3.1
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ! '>='
137
+ - !ruby/object:Gem::Version
138
+ version: 1.3.1
139
+ - !ruby/object:Gem::Dependency
140
+ name: bson_ext
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - ! '>='
144
+ - !ruby/object:Gem::Version
145
+ version: 1.3.1
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - ! '>='
151
+ - !ruby/object:Gem::Version
152
+ version: 1.3.1
153
+ - !ruby/object:Gem::Dependency
154
+ name: tokyocabinet
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - ! '>='
158
+ - !ruby/object:Gem::Version
159
+ version: '1.29'
160
+ type: :development
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - ! '>='
165
+ - !ruby/object:Gem::Version
166
+ version: '1.29'
167
+ - !ruby/object:Gem::Dependency
168
+ name: kyotocabinet-ruby
169
+ requirement: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - ! '>='
172
+ - !ruby/object:Gem::Version
173
+ version: 1.27.1
174
+ type: :development
175
+ prerelease: false
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - ! '>='
179
+ - !ruby/object:Gem::Version
180
+ version: 1.27.1
181
+ - !ruby/object:Gem::Dependency
182
+ name: sqlite3
183
+ requirement: !ruby/object:Gem::Requirement
184
+ requirements:
185
+ - - ! '>='
186
+ - !ruby/object:Gem::Version
187
+ version: 1.3.4
188
+ type: :development
189
+ prerelease: false
190
+ version_requirements: !ruby/object:Gem::Requirement
191
+ requirements:
192
+ - - ! '>='
193
+ - !ruby/object:Gem::Version
194
+ version: 1.3.4
195
+ description:
196
+ email:
197
+ executables:
198
+ - anemone
199
+ extensions: []
200
+ extra_rdoc_files:
201
+ - README.rdoc
202
+ files:
203
+ - VERSION
204
+ - LICENSE.txt
205
+ - CHANGELOG.rdoc
206
+ - README.rdoc
207
+ - Rakefile
208
+ - lib/anemone/cli/count.rb
209
+ - lib/anemone/cli/cron.rb
210
+ - lib/anemone/cli/pagedepth.rb
211
+ - lib/anemone/cli/serialize.rb
212
+ - lib/anemone/cli/url_list.rb
213
+ - lib/anemone/cli.rb
214
+ - lib/anemone/cookie_store.rb
215
+ - lib/anemone/core.rb
216
+ - lib/anemone/exceptions.rb
217
+ - lib/anemone/http.rb
218
+ - lib/anemone/page.rb
219
+ - lib/anemone/page_store.rb
220
+ - lib/anemone/resource.rb
221
+ - lib/anemone/storage/base.rb
222
+ - lib/anemone/storage/exceptions.rb
223
+ - lib/anemone/storage/kyoto_cabinet.rb
224
+ - lib/anemone/storage/mongodb.rb
225
+ - lib/anemone/storage/pstore.rb
226
+ - lib/anemone/storage/redis.rb
227
+ - lib/anemone/storage/sqlite3.rb
228
+ - lib/anemone/storage/tokyo_cabinet.rb
229
+ - lib/anemone/storage.rb
230
+ - lib/anemone/tentacle.rb
231
+ - lib/anemone.rb
232
+ - spec/anemone_spec.rb
233
+ - spec/cookie_store_spec.rb
234
+ - spec/core_spec.rb
235
+ - spec/fakeweb_helper.rb
236
+ - spec/http_spec.rb
237
+ - spec/page_spec.rb
238
+ - spec/page_store_spec.rb
239
+ - spec/resource_spec.rb
240
+ - spec/spec_helper.rb
241
+ - spec/storage_spec.rb
242
+ - bin/anemone
243
+ homepage: http://anemone.rubyforge.org
244
+ licenses: []
245
+ metadata: {}
246
+ post_install_message:
247
+ rdoc_options:
248
+ - -m
249
+ - README.rdoc
250
+ - -t
251
+ - Anemone
252
+ require_paths:
253
+ - lib
254
+ required_ruby_version: !ruby/object:Gem::Requirement
255
+ requirements:
256
+ - - ! '>='
257
+ - !ruby/object:Gem::Version
258
+ version: '0'
259
+ required_rubygems_version: !ruby/object:Gem::Requirement
260
+ requirements:
261
+ - - ! '>='
262
+ - !ruby/object:Gem::Version
263
+ version: '0'
264
+ requirements: []
265
+ rubyforge_project: anemone
266
+ rubygems_version: 2.0.6
267
+ signing_key:
268
+ specification_version: 4
269
+ summary: Anemone web-spider framework (sutch's fork)
270
+ test_files:
271
+ - spec/anemone_spec.rb
272
+ - spec/cookie_store_spec.rb
273
+ - spec/core_spec.rb
274
+ - spec/fakeweb_helper.rb
275
+ - spec/http_spec.rb
276
+ - spec/page_spec.rb
277
+ - spec/page_store_spec.rb
278
+ - spec/resource_spec.rb
279
+ - spec/spec_helper.rb
280
+ - spec/storage_spec.rb
281
+ has_rdoc: true