anemone 0.6.1 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,6 +1,23 @@
1
+ == 0.7.0 / 2012-01-19
2
+
3
+ * Major enhancements
4
+
5
+ * Added support for SQLite3 and Kyoto Cabinet storage
6
+
7
+ * Minor enhancements
8
+
9
+ * Added Page#base to use base HTML element
10
+ * Use bundler for development dependencies
11
+
12
+ * Bug fixes
13
+
14
+ * Encode characters in URLs
15
+ * Fix specs to run under rake
16
+ * Fix handling of redirect_to in storage adapters
17
+
1
18
  == 0.6.1 / 2011-02-24
2
19
 
3
- *Bug fixes
20
+ * Bug fixes
4
21
 
5
22
  * Fix a bug preventing SSL connections from working
6
23
 
@@ -16,7 +16,7 @@ See http://anemone.rubyforge.org for more information.
16
16
  * Records response time for each page
17
17
  * CLI program can list all pages in a domain, calculate page depths, and more
18
18
  * Obey robots.txt
19
- * In-memory or persistent storage of pages during crawl, using TokyoCabinet, MongoDB, or Redis
19
+ * In-memory or persistent storage of pages during crawl, using TokyoCabinet, SQLite3, MongoDB, or Redis
20
20
 
21
21
  == Examples
22
22
  See the scripts under the <tt>lib/anemone/cli</tt> directory for examples of several useful Anemone tasks.
@@ -32,5 +32,6 @@ To test and develop this gem, additional requirements are:
32
32
  * tokyocabinet
33
33
  * mongo
34
34
  * redis
35
+ * sqlite3
35
36
 
36
37
  You will need to have {Tokyo Cabinet}[http://fallabs.com/tokyocabinet/], {MongoDB}[http://www.mongodb.org/], and {Redis}[http://code.google.com/p/redis/] installed on your system and running.
data/Rakefile CHANGED
@@ -1,26 +1,25 @@
1
1
  require 'rubygems'
2
2
  require 'rake'
3
+ require 'rspec/core/rake_task'
3
4
 
4
- require 'spec/rake/spectask'
5
- Spec::Rake::SpecTask.new(:spec) do |spec|
6
- spec.libs << 'lib' << 'spec'
7
- spec.spec_files = FileList['spec/**/*_spec.rb']
5
+ desc "Run all specs"
6
+ RSpec::Core::RakeTask.new(:rspec) do |spec|
7
+ spec.pattern = 'spec/**/*_spec.rb'
8
8
  end
9
9
 
10
- Spec::Rake::SpecTask.new(:rcov) do |spec|
11
- spec.libs << 'lib' << 'spec'
10
+ RSpec::Core::RakeTask.new(:rcov) do |spec|
12
11
  spec.pattern = 'spec/**/*_spec.rb'
13
12
  spec.rcov = true
14
13
  end
15
14
 
16
- task :default => :spec
15
+ task :default => :rspec
17
16
 
18
- require 'rake/rdoctask'
19
- Rake::RDocTask.new do |rdoc|
17
+ require 'rdoc/task'
18
+ RDoc::Task.new do |rdoc|
20
19
  version = File.exist?('VERSION') ? File.read('VERSION') : ""
21
20
 
22
21
  rdoc.rdoc_dir = 'rdoc'
23
22
  rdoc.title = "anemone #{version}"
24
23
  rdoc.rdoc_files.include('README*')
25
24
  rdoc.rdoc_files.include('lib/**/*.rb')
26
- end
25
+ end
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.6.1
1
+ 0.7.0
@@ -9,7 +9,7 @@ require 'anemone/storage/base'
9
9
 
10
10
  module Anemone
11
11
 
12
- VERSION = '0.6.1';
12
+ VERSION = '0.7.0';
13
13
 
14
14
  #
15
15
  # Convenience method to start a crawl
@@ -112,7 +112,7 @@ module Anemone
112
112
 
113
113
  response, response_time = get_response(loc, referer)
114
114
  code = Integer(response.code)
115
- redirect_to = response.is_a?(Net::HTTPRedirection) ? URI(response['location']).normalize : nil
115
+ redirect_to = response.is_a?(Net::HTTPRedirection) ? URI(response['location']).normalize : nil
116
116
  yield response, code, loc, redirect_to, response_time
117
117
  limit -= 1
118
118
  end while (loc = redirect_to) && allowed?(redirect_to, url) && limit > 0
@@ -62,7 +62,7 @@ module Anemone
62
62
  doc.search("//a[@href]").each do |a|
63
63
  u = a['href']
64
64
  next if u.nil? or u.empty?
65
- abs = to_absolute(URI(u)) rescue next
65
+ abs = to_absolute(URI(URI.escape(u))) rescue next
66
66
  @links << abs if in_domain?(abs)
67
67
  end
68
68
  @links.uniq!
@@ -131,6 +131,21 @@ module Anemone
131
131
  404 == @code
132
132
  end
133
133
 
134
+ #
135
+ # Base URI from the HTML doc head element
136
+ # http://www.w3.org/TR/html4/struct/links.html#edef-BASE
137
+ #
138
+ def base
139
+ @base = if doc
140
+ href = doc.search('//head/base/@href')
141
+ URI(href.to_s) unless href.nil? rescue nil
142
+ end unless @base
143
+
144
+ return nil if @base && @base.to_s().empty?
145
+ @base
146
+ end
147
+
148
+
134
149
  #
135
150
  # Converts relative URL *link* into an absolute URL based on the
136
151
  # location of the page
@@ -142,7 +157,7 @@ module Anemone
142
157
  link = URI.encode(URI.decode(link.to_s.gsub(/#[a-zA-Z0-9_-]*$/,'')))
143
158
 
144
159
  relative = URI(link)
145
- absolute = @url.merge(relative)
160
+ absolute = base ? base.merge(relative) : @url.merge(relative)
146
161
 
147
162
  absolute.path = '/' if absolute.path.empty?
148
163
 
@@ -190,7 +205,7 @@ module Anemone
190
205
  '@visited' => hash['visited'],
191
206
  '@depth' => hash['depth'].to_i,
192
207
  '@referer' => hash['referer'],
193
- '@redirect_to' => URI(hash['redirect_to']),
208
+ '@redirect_to' => (!!hash['redirect_to'] && !hash['redirect_to'].empty?) ? URI(hash['redirect_to']) : nil,
194
209
  '@response_time' => hash['response_time'].to_i,
195
210
  '@fetched' => hash['fetched']
196
211
  }.each do |var, value|
@@ -18,6 +18,11 @@ module Anemone
18
18
  self::TokyoCabinet.new(file)
19
19
  end
20
20
 
21
+ def self.KyotoCabinet(file = 'anemone.tch')
22
+ require 'anemone/storage/kyoto_cabinet'
23
+ self::KyotoCabinet.new(file)
24
+ end
25
+
21
26
  def self.MongoDB(mongo_db = nil, collection_name = 'pages')
22
27
  require 'anemone/storage/mongodb'
23
28
  mongo_db ||= Mongo::Connection.new.db('anemone')
@@ -29,6 +34,11 @@ module Anemone
29
34
  require 'anemone/storage/redis'
30
35
  self::Redis.new(opts)
31
36
  end
37
+
38
+ def self.SQLite3(file = 'anemone.db')
39
+ require 'anemone/storage/sqlite3'
40
+ self::SQLite3.new(file)
41
+ end
32
42
 
33
43
  end
34
44
  end
@@ -0,0 +1,72 @@
1
+ begin
2
+ require 'kyotocabinet'
3
+ rescue LoadError
4
+ puts $!
5
+ puts "You need the kyotocabinet-ruby gem to use Anemone::Storage::KyotoCabinet"
6
+ exit
7
+ end
8
+
9
+ require 'forwardable'
10
+
11
+ module Anemone
12
+ module Storage
13
+ class KyotoCabinet
14
+ extend Forwardable
15
+
16
+ def_delegators :@db, :close, :size, :each
17
+
18
+ def initialize(file)
19
+ raise "KyotoCabinet filename must have .kch extension" if File.extname(file) != '.kch'
20
+ @db = ::KyotoCabinet::DB::new
21
+ @db.open(file, ::KyotoCabinet::DB::OWRITER | ::KyotoCabinet::DB::OCREATE)
22
+ @db.clear
23
+ end
24
+
25
+ def [](key)
26
+ if value = @db[key]
27
+ load_value(value)
28
+ end
29
+ end
30
+
31
+ def []=(key, value)
32
+ @db[key] = [Marshal.dump(value)].pack("m")
33
+ end
34
+
35
+ def each
36
+ @db.each do |k, v|
37
+ yield(k, load_value(v))
38
+ end
39
+ end
40
+
41
+ def has_key?(key)
42
+ # Kyoto Cabinet doesn't have a way to query whether a key exists, so hack it
43
+ keys = @db.match_prefix(key)
44
+ !!keys && keys.include?(key)
45
+ end
46
+
47
+ def keys
48
+ acc = []
49
+ @db.each_key { |key| acc << key.first }
50
+ acc
51
+ end
52
+
53
+ def delete(key)
54
+ value = self[key]
55
+ @db.delete(key)
56
+ value
57
+ end
58
+
59
+ def merge!(hash)
60
+ hash.each { |key, value| self[key] = value }
61
+ self
62
+ end
63
+
64
+ private
65
+
66
+ def load_value(value)
67
+ Marshal.load(value.unpack("m")[0])
68
+ end
69
+
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,90 @@
1
+ begin
2
+ require 'sqlite3'
3
+ rescue LoadError
4
+ puts "You need the sqlite3 gem to use Anemone::Storage::SQLite3"
5
+ exit
6
+ end
7
+
8
+ module Anemone
9
+ module Storage
10
+ class SQLite3
11
+
12
+ def initialize(file)
13
+ @db = ::SQLite3::Database.new(file)
14
+ create_schema
15
+ end
16
+
17
+ def [](url)
18
+ value = @db.get_first_value('SELECT data FROM anemone_storage WHERE key = ?', url.to_s)
19
+ if value
20
+ Marshal.load(value)
21
+ end
22
+ end
23
+
24
+ def []=(url, value)
25
+ data = Marshal.dump(value)
26
+ if has_key?(url)
27
+ @db.execute('UPDATE anemone_storage SET data = ? WHERE key = ?', data, url.to_s)
28
+ else
29
+ @db.execute('INSERT INTO anemone_storage (data, key) VALUES(?, ?)', data, url.to_s)
30
+ end
31
+ end
32
+
33
+ def delete(url)
34
+ page = self[url]
35
+ @db.execute('DELETE FROM anemone_storage WHERE key = ?', url.to_s)
36
+ page
37
+ end
38
+
39
+ def each
40
+ @db.execute("SELECT key, data FROM anemone_storage ORDER BY id") do |row|
41
+ value = Marshal.load(row[1])
42
+ yield row[0], value
43
+ end
44
+ end
45
+
46
+ def merge!(hash)
47
+ hash.each { |key, value| self[key] = value }
48
+ self
49
+ end
50
+
51
+ def size
52
+ @db.get_first_value('SELECT COUNT(id) FROM anemone_storage')
53
+ end
54
+
55
+ def keys
56
+ @db.execute("SELECT key FROM anemone_storage ORDER BY id").map{|t| t[0]}
57
+ end
58
+
59
+ def has_key?(url)
60
+ !!@db.get_first_value('SELECT id FROM anemone_storage WHERE key = ?', url.to_s)
61
+ end
62
+
63
+ def close
64
+ @db.close
65
+ end
66
+
67
+ private
68
+
69
+ def create_schema
70
+ @db.execute_batch <<SQL
71
+ create table if not exists anemone_storage (
72
+ id INTEGER PRIMARY KEY ASC,
73
+ key TEXT,
74
+ data BLOB
75
+ );
76
+ create index if not exists anemone_key_idx on anemone_storage (key);
77
+ SQL
78
+ end
79
+
80
+ def load_page(hash)
81
+ BINARY_FIELDS.each do |field|
82
+ hash[field] = hash[field].to_s
83
+ end
84
+ Page.from_hash(hash)
85
+ end
86
+
87
+ end
88
+ end
89
+ end
90
+
@@ -1,6 +1,7 @@
1
1
  begin
2
2
  require 'tokyocabinet'
3
3
  rescue LoadError
4
+ puts $!
4
5
  puts "You need the tokyocabinet gem to use Anemone::Storage::TokyoCabinet"
5
6
  exit
6
7
  end
@@ -1,6 +1,6 @@
1
1
  $:.unshift(File.dirname(__FILE__))
2
2
  require 'spec_helper'
3
- %w[pstore tokyo_cabinet].each { |file| require "anemone/storage/#{file}.rb" }
3
+ %w[pstore tokyo_cabinet sqlite3].each { |file| require "anemone/storage/#{file}.rb" }
4
4
 
5
5
  module Anemone
6
6
  describe Core do
@@ -277,6 +277,27 @@ module Anemone
277
277
  end
278
278
  end
279
279
 
280
+ describe Storage::SQLite3 do
281
+ it_should_behave_like "crawl"
282
+
283
+ before(:all) do
284
+ @test_file = 'test.db'
285
+ end
286
+
287
+ before(:each) do
288
+ File.delete(@test_file) if File.exists?(@test_file)
289
+ @opts = {:storage => @store = Storage.SQLite3(@test_file)}
290
+ end
291
+
292
+ after(:each) do
293
+ @store.close
294
+ end
295
+
296
+ after(:each) do
297
+ File.delete(@test_file) if File.exists?(@test_file)
298
+ end
299
+ end
300
+
280
301
  describe "options" do
281
302
  it "should accept options for the crawl" do
282
303
  core = Anemone.crawl(SPEC_DOMAIN, :verbose => false,
@@ -1,10 +1,3 @@
1
- begin
2
- require 'fakeweb'
3
- rescue LoadError
4
- warn "You need the 'fakeweb' gem installed to test Anemone"
5
- exit
6
- end
7
-
8
1
  FakeWeb.allow_net_connect = false
9
2
 
10
3
  module Anemone
@@ -22,6 +15,7 @@ module Anemone
22
15
  @hrefs = [options[:hrefs]].flatten if options.has_key?(:hrefs)
23
16
  @redirect = options[:redirect] if options.has_key?(:redirect)
24
17
  @auth = options[:auth] if options.has_key?(:auth)
18
+ @base = options[:base] if options.has_key?(:base)
25
19
  @content_type = options[:content_type] || "text/html"
26
20
  @body = options[:body]
27
21
 
@@ -40,7 +34,11 @@ module Anemone
40
34
  private
41
35
 
42
36
  def create_body
43
- @body = "<html><body>"
37
+ if @base
38
+ @body = "<html><head><base href=\"#{@base}\"></head><body>"
39
+ else
40
+ @body = "<html><body>"
41
+ end
44
42
  @links.each{|l| @body += "<a href=\"#{SPEC_DOMAIN}#{l}\"></a>"} if @links
45
43
  @hrefs.each{|h| @body += "<a href=\"#{h}\"></a>"} if @hrefs
46
44
  @body += "</body></html>"
@@ -1,4 +1,3 @@
1
- $:.unshift(File.dirname(__FILE__))
2
1
  require 'spec_helper'
3
2
 
4
3
  module Anemone
@@ -10,12 +9,7 @@ module Anemone
10
9
  end
11
10
 
12
11
  it "should still return a Page if an exception occurs during the HTTP connection" do
13
- class HTTP
14
- def refresh_connection
15
- raise "test exception"
16
- end
17
- end
18
-
12
+ HTTP.stub!(:refresh_connection).and_raise(StandardError)
19
13
  http = Anemone::HTTP.new
20
14
  http.fetch_page(SPEC_DOMAIN).should be_an_instance_of(Page)
21
15
  end
@@ -74,19 +74,103 @@ module Anemone
74
74
  @page.cookies.should == []
75
75
  end
76
76
 
77
- it "should have a to_hash method that converts the page to a hash" do
78
- hash = @page.to_hash
79
- hash['url'].should == @page.url.to_s
80
- hash['referer'].should == @page.referer.to_s
81
- hash['links'].should == @page.links.map(&:to_s)
82
- end
83
-
84
- it "should have a from_hash method to convert from a hash to a Page" do
85
- page = @page.dup
86
- page.depth = 1
87
- converted = Page.from_hash(page.to_hash)
88
- converted.links.should == page.links
89
- converted.depth.should == page.depth
77
+ describe "#to_hash" do
78
+ it "converts the page to a hash" do
79
+ hash = @page.to_hash
80
+ hash['url'].should == @page.url.to_s
81
+ hash['referer'].should == @page.referer.to_s
82
+ hash['links'].should == @page.links.map(&:to_s)
83
+ end
84
+
85
+ context "when redirect_to is nil" do
86
+ it "sets 'redirect_to' to nil in the hash" do
87
+ @page.redirect_to.should be_nil
88
+ @page.to_hash[:redirect_to].should be_nil
89
+ end
90
+ end
91
+
92
+ context "when redirect_to is a non-nil URI" do
93
+ it "sets 'redirect_to' to the URI string" do
94
+ new_page = Page.new(URI(SPEC_DOMAIN), {:redirect_to => URI(SPEC_DOMAIN + '1')})
95
+ new_page.redirect_to.to_s.should == SPEC_DOMAIN + '1'
96
+ new_page.to_hash['redirect_to'].should == SPEC_DOMAIN + '1'
97
+ end
98
+ end
99
+ end
100
+
101
+ describe "#from_hash" do
102
+ it "converts from a hash to a Page" do
103
+ page = @page.dup
104
+ page.depth = 1
105
+ converted = Page.from_hash(page.to_hash)
106
+ converted.links.should == page.links
107
+ converted.depth.should == page.depth
108
+ end
109
+
110
+ it 'handles a from_hash with a nil redirect_to' do
111
+ page_hash = @page.to_hash
112
+ page_hash['redirect_to'] = nil
113
+ lambda{Page.from_hash(page_hash)}.should_not raise_error(URI::InvalidURIError)
114
+ Page.from_hash(page_hash).redirect_to.should be_nil
115
+ end
116
+ end
117
+
118
+ describe "#redirect_to" do
119
+ context "when the page was a redirect" do
120
+ it "returns a URI of the page it redirects to" do
121
+ new_page = Page.new(URI(SPEC_DOMAIN), {:redirect_to => URI(SPEC_DOMAIN + '1')})
122
+ redirect = new_page.redirect_to
123
+ redirect.should be_a(URI)
124
+ redirect.to_s.should == SPEC_DOMAIN + '1'
125
+ end
126
+ end
127
+ end
128
+
129
+ it "should detect, store and expose the base url for the page head" do
130
+ base = "#{SPEC_DOMAIN}path/to/base_url/"
131
+ page = @http.fetch_page(FakePage.new('body_test', {:base => base}).url)
132
+ page.base.should == URI(base)
133
+ @page.base.should be_nil
134
+ end
135
+
136
+ it "should have a method to convert a relative url to an absolute one" do
137
+ @page.should respond_to(:to_absolute)
138
+
139
+ # Identity
140
+ @page.to_absolute(@page.url).should == @page.url
141
+ @page.to_absolute("").should == @page.url
142
+
143
+ # Root-ness
144
+ @page.to_absolute("/").should == URI("#{SPEC_DOMAIN}")
145
+
146
+ # Relativeness
147
+ relative_path = "a/relative/path"
148
+ @page.to_absolute(relative_path).should == URI("#{SPEC_DOMAIN}#{relative_path}")
149
+
150
+ deep_page = @http.fetch_page(FakePage.new('home/deep', :links => '1').url)
151
+ upward_relative_path = "../a/relative/path"
152
+ deep_page.to_absolute(upward_relative_path).should == URI("#{SPEC_DOMAIN}#{relative_path}")
153
+
154
+ # The base URL case
155
+ base_path = "path/to/base_url/"
156
+ base = "#{SPEC_DOMAIN}#{base_path}"
157
+ page = @http.fetch_page(FakePage.new('home', {:base => base}).url)
158
+
159
+ # Identity
160
+ page.to_absolute(page.url).should == page.url
161
+ # It should revert to the base url
162
+ page.to_absolute("").should_not == page.url
163
+
164
+ # Root-ness
165
+ page.to_absolute("/").should == URI("#{SPEC_DOMAIN}")
166
+
167
+ # Relativeness
168
+ relative_path = "a/relative/path"
169
+ page.to_absolute(relative_path).should == URI("#{base}#{relative_path}")
170
+
171
+ upward_relative_path = "../a/relative/path"
172
+ upward_base = "#{SPEC_DOMAIN}path/to/"
173
+ page.to_absolute(upward_relative_path).should == URI("#{upward_base}#{relative_path}")
90
174
  end
91
175
 
92
176
  end
@@ -1,6 +1,6 @@
1
1
  $:.unshift(File.dirname(__FILE__))
2
2
  require 'spec_helper'
3
- %w[pstore tokyo_cabinet mongodb redis].each { |file| require "anemone/storage/#{file}.rb" }
3
+ %w[pstore tokyo_cabinet sqlite3 mongodb redis].each { |file| require "anemone/storage/#{file}.rb" }
4
4
 
5
5
  module Anemone
6
6
  describe PageStore do
@@ -125,6 +125,24 @@ module Anemone
125
125
  end
126
126
  end
127
127
 
128
+ describe Storage::SQLite3 do
129
+ it_should_behave_like "page storage"
130
+
131
+ before(:each) do
132
+ @test_file = 'test.db'
133
+ File.delete(@test_file) if File.exists?(@test_file)
134
+ @opts = {:storage => @store = Storage.SQLite3(@test_file)}
135
+ end
136
+
137
+ after(:each) do
138
+ @store.close
139
+ end
140
+
141
+ after(:each) do
142
+ File.delete(@test_file) if File.exists?(@test_file)
143
+ end
144
+ end
145
+
128
146
  describe Storage::MongoDB do
129
147
  it_should_behave_like "page storage"
130
148
 
@@ -1,4 +1,6 @@
1
1
  require 'rubygems'
2
+ require 'bundler/setup'
3
+ require 'fakeweb'
2
4
  require File.dirname(__FILE__) + '/fakeweb_helper'
3
5
 
4
6
  $:.unshift(File.dirname(__FILE__) + '/../lib/')
@@ -1,7 +1,7 @@
1
1
  $:.unshift(File.dirname(__FILE__))
2
2
  require 'spec_helper'
3
3
 
4
- %w[pstore tokyo_cabinet mongodb redis].each { |file| require "anemone/storage/#{file}.rb" }
4
+ %w[pstore tokyo_cabinet kyoto_cabinet sqlite3 mongodb redis].each { |file| require "anemone/storage/#{file}.rb" }
5
5
 
6
6
  module Anemone
7
7
  describe Storage do
@@ -25,6 +25,14 @@ module Anemone
25
25
  store.close
26
26
  end
27
27
 
28
+ it "should have a class method to produce a SQLite3" do
29
+ test_file = 'test.db'
30
+ Anemone::Storage.should respond_to(:SQLite3)
31
+ store = Anemone::Storage.SQLite3(test_file)
32
+ store.should be_an_instance_of(Anemone::Storage::SQLite3)
33
+ store.close
34
+ end
35
+
28
36
  it "should have a class method to produce a MongoDB" do
29
37
  Anemone::Storage.should respond_to(:MongoDB)
30
38
  store = Anemone::Storage.MongoDB
@@ -105,6 +113,12 @@ module Anemone
105
113
 
106
114
  merged.should === @store
107
115
  end
116
+
117
+ it "should correctly deserialize nil redirect_to when loading" do
118
+ @page.redirect_to.should be_nil
119
+ @store[@url] = @page
120
+ @store[@url].redirect_to.should be_nil
121
+ end
108
122
  end
109
123
 
110
124
  describe PStore do
@@ -143,6 +157,47 @@ module Anemone
143
157
  end
144
158
  end
145
159
 
160
+ describe KyotoCabinet do
161
+ it_should_behave_like "storage engine"
162
+
163
+ before(:each) do
164
+ @test_file = 'test.kch'
165
+ File.delete @test_file rescue nil
166
+ @store = Anemone::Storage.KyotoCabinet(@test_file)
167
+ end
168
+
169
+ after(:each) do
170
+ @store.close
171
+ end
172
+
173
+ after(:all) do
174
+ File.delete @test_file rescue nil
175
+ end
176
+
177
+ it "should raise an error if supplied with a file extension other than .kch" do
178
+ lambda { Anemone::Storage.KyotoCabinet('test.tmp') }.should raise_error(RuntimeError)
179
+ end
180
+ end
181
+
182
+ describe SQLite3 do
183
+ it_should_behave_like "storage engine"
184
+
185
+ before(:each) do
186
+ @test_file = 'test.db'
187
+ File.delete @test_file rescue nil
188
+ @store = Anemone::Storage.SQLite3(@test_file)
189
+ end
190
+
191
+ after(:each) do
192
+ @store.close
193
+ end
194
+
195
+ after(:all) do
196
+ File.delete @test_file rescue nil
197
+ end
198
+
199
+ end
200
+
146
201
  describe Storage::MongoDB do
147
202
  it_should_behave_like "storage engine"
148
203
 
metadata CHANGED
@@ -1,139 +1,218 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: anemone
3
- version: !ruby/object:Gem::Version
4
- prerelease: false
5
- segments:
6
- - 0
7
- - 6
8
- - 1
9
- version: 0.6.1
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.7.0
5
+ prerelease:
10
6
  platform: ruby
11
- authors:
7
+ authors:
12
8
  - Chris Kite
13
9
  autorequire:
14
10
  bindir: bin
15
11
  cert_chain: []
16
-
17
- date: 2011-02-24 00:00:00 -06:00
18
- default_executable:
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
12
+ date: 2012-01-20 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
21
15
  name: nokogiri
22
- prerelease: false
23
- requirement: &id001 !ruby/object:Gem::Requirement
16
+ requirement: &19111780 !ruby/object:Gem::Requirement
24
17
  none: false
25
- requirements:
26
- - - ">="
27
- - !ruby/object:Gem::Version
28
- segments:
29
- - 1
30
- - 3
31
- - 0
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
32
21
  version: 1.3.0
33
22
  type: :runtime
34
- version_requirements: *id001
35
- - !ruby/object:Gem::Dependency
36
- name: robots
37
23
  prerelease: false
38
- requirement: &id002 !ruby/object:Gem::Requirement
24
+ version_requirements: *19111780
25
+ - !ruby/object:Gem::Dependency
26
+ name: robots
27
+ requirement: &19111300 !ruby/object:Gem::Requirement
39
28
  none: false
40
- requirements:
41
- - - ">="
42
- - !ruby/object:Gem::Version
43
- segments:
44
- - 0
45
- - 7
46
- - 2
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
47
32
  version: 0.7.2
48
33
  type: :runtime
49
- version_requirements: *id002
34
+ prerelease: false
35
+ version_requirements: *19111300
36
+ - !ruby/object:Gem::Dependency
37
+ name: rake
38
+ requirement: &19141340 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: 0.8.7
44
+ type: :development
45
+ prerelease: false
46
+ version_requirements: *19141340
47
+ - !ruby/object:Gem::Dependency
48
+ name: rspec
49
+ requirement: &19140880 !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: 2.6.0
55
+ type: :development
56
+ prerelease: false
57
+ version_requirements: *19140880
58
+ - !ruby/object:Gem::Dependency
59
+ name: fakeweb
60
+ requirement: &19140420 !ruby/object:Gem::Requirement
61
+ none: false
62
+ requirements:
63
+ - - ! '>='
64
+ - !ruby/object:Gem::Version
65
+ version: 1.3.0
66
+ type: :development
67
+ prerelease: false
68
+ version_requirements: *19140420
69
+ - !ruby/object:Gem::Dependency
70
+ name: redis
71
+ requirement: &19139960 !ruby/object:Gem::Requirement
72
+ none: false
73
+ requirements:
74
+ - - ! '>='
75
+ - !ruby/object:Gem::Version
76
+ version: 2.2.0
77
+ type: :development
78
+ prerelease: false
79
+ version_requirements: *19139960
80
+ - !ruby/object:Gem::Dependency
81
+ name: mongo
82
+ requirement: &19139500 !ruby/object:Gem::Requirement
83
+ none: false
84
+ requirements:
85
+ - - ! '>='
86
+ - !ruby/object:Gem::Version
87
+ version: 1.3.1
88
+ type: :development
89
+ prerelease: false
90
+ version_requirements: *19139500
91
+ - !ruby/object:Gem::Dependency
92
+ name: bson_ext
93
+ requirement: &19139040 !ruby/object:Gem::Requirement
94
+ none: false
95
+ requirements:
96
+ - - ! '>='
97
+ - !ruby/object:Gem::Version
98
+ version: 1.3.1
99
+ type: :development
100
+ prerelease: false
101
+ version_requirements: *19139040
102
+ - !ruby/object:Gem::Dependency
103
+ name: tokyocabinet
104
+ requirement: &19138580 !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '1.29'
110
+ type: :development
111
+ prerelease: false
112
+ version_requirements: *19138580
113
+ - !ruby/object:Gem::Dependency
114
+ name: kyotocabinet-ruby
115
+ requirement: &19138120 !ruby/object:Gem::Requirement
116
+ none: false
117
+ requirements:
118
+ - - ! '>='
119
+ - !ruby/object:Gem::Version
120
+ version: 1.27.1
121
+ type: :development
122
+ prerelease: false
123
+ version_requirements: *19138120
124
+ - !ruby/object:Gem::Dependency
125
+ name: sqlite3
126
+ requirement: &19137660 !ruby/object:Gem::Requirement
127
+ none: false
128
+ requirements:
129
+ - - ! '>='
130
+ - !ruby/object:Gem::Version
131
+ version: 1.3.4
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: *19137660
50
135
  description:
51
136
  email:
52
- executables:
137
+ executables:
53
138
  - anemone
54
139
  extensions: []
55
-
56
- extra_rdoc_files:
140
+ extra_rdoc_files:
57
141
  - README.rdoc
58
- files:
142
+ files:
59
143
  - VERSION
60
144
  - LICENSE.txt
61
145
  - CHANGELOG.rdoc
62
146
  - README.rdoc
63
147
  - Rakefile
64
- - lib/anemone.rb
65
- - lib/anemone/cookie_store.rb
66
- - lib/anemone/storage.rb
67
- - lib/anemone/core.rb
68
- - lib/anemone/cli.rb
69
- - lib/anemone/exceptions.rb
70
- - lib/anemone/tentacle.rb
71
- - lib/anemone/storage/tokyo_cabinet.rb
72
- - lib/anemone/storage/base.rb
73
- - lib/anemone/storage/exceptions.rb
74
148
  - lib/anemone/storage/pstore.rb
75
149
  - lib/anemone/storage/mongodb.rb
150
+ - lib/anemone/storage/tokyo_cabinet.rb
151
+ - lib/anemone/storage/exceptions.rb
76
152
  - lib/anemone/storage/redis.rb
77
- - lib/anemone/http.rb
153
+ - lib/anemone/storage/sqlite3.rb
154
+ - lib/anemone/storage/base.rb
155
+ - lib/anemone/storage/kyoto_cabinet.rb
78
156
  - lib/anemone/page_store.rb
79
- - lib/anemone/cli/cron.rb
80
- - lib/anemone/cli/pagedepth.rb
81
- - lib/anemone/cli/count.rb
157
+ - lib/anemone/storage.rb
158
+ - lib/anemone/tentacle.rb
159
+ - lib/anemone/http.rb
160
+ - lib/anemone/cli.rb
161
+ - lib/anemone/page.rb
162
+ - lib/anemone/exceptions.rb
163
+ - lib/anemone/core.rb
82
164
  - lib/anemone/cli/url_list.rb
83
165
  - lib/anemone/cli/serialize.rb
84
- - lib/anemone/page.rb
85
- - spec/http_spec.rb
86
- - spec/page_store_spec.rb
87
- - spec/core_spec.rb
166
+ - lib/anemone/cli/count.rb
167
+ - lib/anemone/cli/cron.rb
168
+ - lib/anemone/cli/pagedepth.rb
169
+ - lib/anemone/cookie_store.rb
170
+ - lib/anemone.rb
88
171
  - spec/fakeweb_helper.rb
89
172
  - spec/page_spec.rb
90
- - spec/cookie_store_spec.rb
91
173
  - spec/anemone_spec.rb
92
- - spec/spec_helper.rb
174
+ - spec/core_spec.rb
93
175
  - spec/storage_spec.rb
176
+ - spec/page_store_spec.rb
177
+ - spec/cookie_store_spec.rb
178
+ - spec/http_spec.rb
179
+ - spec/spec_helper.rb
94
180
  - bin/anemone
95
- has_rdoc: true
96
181
  homepage: http://anemone.rubyforge.org
97
182
  licenses: []
98
-
99
183
  post_install_message:
100
- rdoc_options:
184
+ rdoc_options:
101
185
  - -m
102
186
  - README.rdoc
103
187
  - -t
104
188
  - Anemone
105
- require_paths:
189
+ require_paths:
106
190
  - lib
107
- required_ruby_version: !ruby/object:Gem::Requirement
191
+ required_ruby_version: !ruby/object:Gem::Requirement
108
192
  none: false
109
- requirements:
110
- - - ">="
111
- - !ruby/object:Gem::Version
112
- segments:
113
- - 0
114
- version: "0"
115
- required_rubygems_version: !ruby/object:Gem::Requirement
193
+ requirements:
194
+ - - ! '>='
195
+ - !ruby/object:Gem::Version
196
+ version: '0'
197
+ required_rubygems_version: !ruby/object:Gem::Requirement
116
198
  none: false
117
- requirements:
118
- - - ">="
119
- - !ruby/object:Gem::Version
120
- segments:
121
- - 0
122
- version: "0"
199
+ requirements:
200
+ - - ! '>='
201
+ - !ruby/object:Gem::Version
202
+ version: '0'
123
203
  requirements: []
124
-
125
204
  rubyforge_project: anemone
126
- rubygems_version: 1.3.7
205
+ rubygems_version: 1.8.15
127
206
  signing_key:
128
207
  specification_version: 3
129
208
  summary: Anemone web-spider framework
130
- test_files:
131
- - spec/http_spec.rb
132
- - spec/page_store_spec.rb
133
- - spec/core_spec.rb
209
+ test_files:
134
210
  - spec/fakeweb_helper.rb
135
211
  - spec/page_spec.rb
136
- - spec/cookie_store_spec.rb
137
212
  - spec/anemone_spec.rb
138
- - spec/spec_helper.rb
213
+ - spec/core_spec.rb
139
214
  - spec/storage_spec.rb
215
+ - spec/page_store_spec.rb
216
+ - spec/cookie_store_spec.rb
217
+ - spec/http_spec.rb
218
+ - spec/spec_helper.rb