anemone 0.6.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,23 @@
1
+ == 0.7.0 / 2012-01-19
2
+
3
+ * Major enhancements
4
+
5
+ * Added support for SQLite3 and Kyoto Cabinet storage
6
+
7
+ * Minor enhancements
8
+
9
+ * Added Page#base to use base HTML element
10
+ * Use bundler for development dependencies
11
+
12
+ * Bug fixes
13
+
14
+ * Encode characters in URLs
15
+ * Fix specs to run under rake
16
+ * Fix handling of redirect_to in storage adapters
17
+
1
18
  == 0.6.1 / 2011-02-24
2
19
 
3
- *Bug fixes
20
+ * Bug fixes
4
21
 
5
22
  * Fix a bug preventing SSL connections from working
6
23
 
@@ -16,7 +16,7 @@ See http://anemone.rubyforge.org for more information.
16
16
  * Records response time for each page
17
17
  * CLI program can list all pages in a domain, calculate page depths, and more
18
18
  * Obey robots.txt
19
- * In-memory or persistent storage of pages during crawl, using TokyoCabinet, MongoDB, or Redis
19
+ * In-memory or persistent storage of pages during crawl, using TokyoCabinet, SQLite3, MongoDB, or Redis
20
20
 
21
21
  == Examples
22
22
  See the scripts under the <tt>lib/anemone/cli</tt> directory for examples of several useful Anemone tasks.
@@ -32,5 +32,6 @@ To test and develop this gem, additional requirements are:
32
32
  * tokyocabinet
33
33
  * mongo
34
34
  * redis
35
+ * sqlite3
35
36
 
36
37
  You will need to have {Tokyo Cabinet}[http://fallabs.com/tokyocabinet/], {MongoDB}[http://www.mongodb.org/], and {Redis}[http://code.google.com/p/redis/] installed on your system and running.
data/Rakefile CHANGED
@@ -1,26 +1,25 @@
1
1
  require 'rubygems'
2
2
  require 'rake'
3
+ require 'rspec/core/rake_task'
3
4
 
4
- require 'spec/rake/spectask'
5
- Spec::Rake::SpecTask.new(:spec) do |spec|
6
- spec.libs << 'lib' << 'spec'
7
- spec.spec_files = FileList['spec/**/*_spec.rb']
5
+ desc "Run all specs"
6
+ RSpec::Core::RakeTask.new(:rspec) do |spec|
7
+ spec.pattern = 'spec/**/*_spec.rb'
8
8
  end
9
9
 
10
- Spec::Rake::SpecTask.new(:rcov) do |spec|
11
- spec.libs << 'lib' << 'spec'
10
+ RSpec::Core::RakeTask.new(:rcov) do |spec|
12
11
  spec.pattern = 'spec/**/*_spec.rb'
13
12
  spec.rcov = true
14
13
  end
15
14
 
16
- task :default => :spec
15
+ task :default => :rspec
17
16
 
18
- require 'rake/rdoctask'
19
- Rake::RDocTask.new do |rdoc|
17
+ require 'rdoc/task'
18
+ RDoc::Task.new do |rdoc|
20
19
  version = File.exist?('VERSION') ? File.read('VERSION') : ""
21
20
 
22
21
  rdoc.rdoc_dir = 'rdoc'
23
22
  rdoc.title = "anemone #{version}"
24
23
  rdoc.rdoc_files.include('README*')
25
24
  rdoc.rdoc_files.include('lib/**/*.rb')
26
- end
25
+ end
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.6.1
1
+ 0.7.0
@@ -9,7 +9,7 @@ require 'anemone/storage/base'
9
9
 
10
10
  module Anemone
11
11
 
12
- VERSION = '0.6.1';
12
+ VERSION = '0.7.0';
13
13
 
14
14
  #
15
15
  # Convenience method to start a crawl
@@ -112,7 +112,7 @@ module Anemone
112
112
 
113
113
  response, response_time = get_response(loc, referer)
114
114
  code = Integer(response.code)
115
- redirect_to = response.is_a?(Net::HTTPRedirection) ? URI(response['location']).normalize : nil
115
+ redirect_to = response.is_a?(Net::HTTPRedirection) ? URI(response['location']).normalize : nil
116
116
  yield response, code, loc, redirect_to, response_time
117
117
  limit -= 1
118
118
  end while (loc = redirect_to) && allowed?(redirect_to, url) && limit > 0
@@ -62,7 +62,7 @@ module Anemone
62
62
  doc.search("//a[@href]").each do |a|
63
63
  u = a['href']
64
64
  next if u.nil? or u.empty?
65
- abs = to_absolute(URI(u)) rescue next
65
+ abs = to_absolute(URI(URI.escape(u))) rescue next
66
66
  @links << abs if in_domain?(abs)
67
67
  end
68
68
  @links.uniq!
@@ -131,6 +131,21 @@ module Anemone
131
131
  404 == @code
132
132
  end
133
133
 
134
+ #
135
+ # Base URI from the HTML doc head element
136
+ # http://www.w3.org/TR/html4/struct/links.html#edef-BASE
137
+ #
138
+ def base
139
+ @base = if doc
140
+ href = doc.search('//head/base/@href')
141
+ URI(href.to_s) unless href.nil? rescue nil
142
+ end unless @base
143
+
144
+ return nil if @base && @base.to_s().empty?
145
+ @base
146
+ end
147
+
148
+
134
149
  #
135
150
  # Converts relative URL *link* into an absolute URL based on the
136
151
  # location of the page
@@ -142,7 +157,7 @@ module Anemone
142
157
  link = URI.encode(URI.decode(link.to_s.gsub(/#[a-zA-Z0-9_-]*$/,'')))
143
158
 
144
159
  relative = URI(link)
145
- absolute = @url.merge(relative)
160
+ absolute = base ? base.merge(relative) : @url.merge(relative)
146
161
 
147
162
  absolute.path = '/' if absolute.path.empty?
148
163
 
@@ -190,7 +205,7 @@ module Anemone
190
205
  '@visited' => hash['visited'],
191
206
  '@depth' => hash['depth'].to_i,
192
207
  '@referer' => hash['referer'],
193
- '@redirect_to' => URI(hash['redirect_to']),
208
+ '@redirect_to' => (!!hash['redirect_to'] && !hash['redirect_to'].empty?) ? URI(hash['redirect_to']) : nil,
194
209
  '@response_time' => hash['response_time'].to_i,
195
210
  '@fetched' => hash['fetched']
196
211
  }.each do |var, value|
@@ -18,6 +18,11 @@ module Anemone
18
18
  self::TokyoCabinet.new(file)
19
19
  end
20
20
 
21
+ def self.KyotoCabinet(file = 'anemone.tch')
22
+ require 'anemone/storage/kyoto_cabinet'
23
+ self::KyotoCabinet.new(file)
24
+ end
25
+
21
26
  def self.MongoDB(mongo_db = nil, collection_name = 'pages')
22
27
  require 'anemone/storage/mongodb'
23
28
  mongo_db ||= Mongo::Connection.new.db('anemone')
@@ -29,6 +34,11 @@ module Anemone
29
34
  require 'anemone/storage/redis'
30
35
  self::Redis.new(opts)
31
36
  end
37
+
38
+ def self.SQLite3(file = 'anemone.db')
39
+ require 'anemone/storage/sqlite3'
40
+ self::SQLite3.new(file)
41
+ end
32
42
 
33
43
  end
34
44
  end
@@ -0,0 +1,72 @@
1
+ begin
2
+ require 'kyotocabinet'
3
+ rescue LoadError
4
+ puts $!
5
+ puts "You need the kyotocabinet-ruby gem to use Anemone::Storage::KyotoCabinet"
6
+ exit
7
+ end
8
+
9
+ require 'forwardable'
10
+
11
+ module Anemone
12
+ module Storage
13
+ class KyotoCabinet
14
+ extend Forwardable
15
+
16
+ def_delegators :@db, :close, :size, :each
17
+
18
+ def initialize(file)
19
+ raise "KyotoCabinet filename must have .kch extension" if File.extname(file) != '.kch'
20
+ @db = ::KyotoCabinet::DB::new
21
+ @db.open(file, ::KyotoCabinet::DB::OWRITER | ::KyotoCabinet::DB::OCREATE)
22
+ @db.clear
23
+ end
24
+
25
+ def [](key)
26
+ if value = @db[key]
27
+ load_value(value)
28
+ end
29
+ end
30
+
31
+ def []=(key, value)
32
+ @db[key] = [Marshal.dump(value)].pack("m")
33
+ end
34
+
35
+ def each
36
+ @db.each do |k, v|
37
+ yield(k, load_value(v))
38
+ end
39
+ end
40
+
41
+ def has_key?(key)
42
+ # Kyoto Cabinet doesn't have a way to query whether a key exists, so hack it
43
+ keys = @db.match_prefix(key)
44
+ !!keys && keys.include?(key)
45
+ end
46
+
47
+ def keys
48
+ acc = []
49
+ @db.each_key { |key| acc << key.first }
50
+ acc
51
+ end
52
+
53
+ def delete(key)
54
+ value = self[key]
55
+ @db.delete(key)
56
+ value
57
+ end
58
+
59
+ def merge!(hash)
60
+ hash.each { |key, value| self[key] = value }
61
+ self
62
+ end
63
+
64
+ private
65
+
66
+ def load_value(value)
67
+ Marshal.load(value.unpack("m")[0])
68
+ end
69
+
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,90 @@
1
+ begin
2
+ require 'sqlite3'
3
+ rescue LoadError
4
+ puts "You need the sqlite3 gem to use Anemone::Storage::SQLite3"
5
+ exit
6
+ end
7
+
8
+ module Anemone
9
+ module Storage
10
+ class SQLite3
11
+
12
+ def initialize(file)
13
+ @db = ::SQLite3::Database.new(file)
14
+ create_schema
15
+ end
16
+
17
+ def [](url)
18
+ value = @db.get_first_value('SELECT data FROM anemone_storage WHERE key = ?', url.to_s)
19
+ if value
20
+ Marshal.load(value)
21
+ end
22
+ end
23
+
24
+ def []=(url, value)
25
+ data = Marshal.dump(value)
26
+ if has_key?(url)
27
+ @db.execute('UPDATE anemone_storage SET data = ? WHERE key = ?', data, url.to_s)
28
+ else
29
+ @db.execute('INSERT INTO anemone_storage (data, key) VALUES(?, ?)', data, url.to_s)
30
+ end
31
+ end
32
+
33
+ def delete(url)
34
+ page = self[url]
35
+ @db.execute('DELETE FROM anemone_storage WHERE key = ?', url.to_s)
36
+ page
37
+ end
38
+
39
+ def each
40
+ @db.execute("SELECT key, data FROM anemone_storage ORDER BY id") do |row|
41
+ value = Marshal.load(row[1])
42
+ yield row[0], value
43
+ end
44
+ end
45
+
46
+ def merge!(hash)
47
+ hash.each { |key, value| self[key] = value }
48
+ self
49
+ end
50
+
51
+ def size
52
+ @db.get_first_value('SELECT COUNT(id) FROM anemone_storage')
53
+ end
54
+
55
+ def keys
56
+ @db.execute("SELECT key FROM anemone_storage ORDER BY id").map{|t| t[0]}
57
+ end
58
+
59
+ def has_key?(url)
60
+ !!@db.get_first_value('SELECT id FROM anemone_storage WHERE key = ?', url.to_s)
61
+ end
62
+
63
+ def close
64
+ @db.close
65
+ end
66
+
67
+ private
68
+
69
+ def create_schema
70
+ @db.execute_batch <<SQL
71
+ create table if not exists anemone_storage (
72
+ id INTEGER PRIMARY KEY ASC,
73
+ key TEXT,
74
+ data BLOB
75
+ );
76
+ create index if not exists anemone_key_idx on anemone_storage (key);
77
+ SQL
78
+ end
79
+
80
+ def load_page(hash)
81
+ BINARY_FIELDS.each do |field|
82
+ hash[field] = hash[field].to_s
83
+ end
84
+ Page.from_hash(hash)
85
+ end
86
+
87
+ end
88
+ end
89
+ end
90
+
@@ -1,6 +1,7 @@
1
1
  begin
2
2
  require 'tokyocabinet'
3
3
  rescue LoadError
4
+ puts $!
4
5
  puts "You need the tokyocabinet gem to use Anemone::Storage::TokyoCabinet"
5
6
  exit
6
7
  end
@@ -1,6 +1,6 @@
1
1
  $:.unshift(File.dirname(__FILE__))
2
2
  require 'spec_helper'
3
- %w[pstore tokyo_cabinet].each { |file| require "anemone/storage/#{file}.rb" }
3
+ %w[pstore tokyo_cabinet sqlite3].each { |file| require "anemone/storage/#{file}.rb" }
4
4
 
5
5
  module Anemone
6
6
  describe Core do
@@ -277,6 +277,27 @@ module Anemone
277
277
  end
278
278
  end
279
279
 
280
+ describe Storage::SQLite3 do
281
+ it_should_behave_like "crawl"
282
+
283
+ before(:all) do
284
+ @test_file = 'test.db'
285
+ end
286
+
287
+ before(:each) do
288
+ File.delete(@test_file) if File.exists?(@test_file)
289
+ @opts = {:storage => @store = Storage.SQLite3(@test_file)}
290
+ end
291
+
292
+ after(:each) do
293
+ @store.close
294
+ end
295
+
296
+ after(:each) do
297
+ File.delete(@test_file) if File.exists?(@test_file)
298
+ end
299
+ end
300
+
280
301
  describe "options" do
281
302
  it "should accept options for the crawl" do
282
303
  core = Anemone.crawl(SPEC_DOMAIN, :verbose => false,
@@ -1,10 +1,3 @@
1
- begin
2
- require 'fakeweb'
3
- rescue LoadError
4
- warn "You need the 'fakeweb' gem installed to test Anemone"
5
- exit
6
- end
7
-
8
1
  FakeWeb.allow_net_connect = false
9
2
 
10
3
  module Anemone
@@ -22,6 +15,7 @@ module Anemone
22
15
  @hrefs = [options[:hrefs]].flatten if options.has_key?(:hrefs)
23
16
  @redirect = options[:redirect] if options.has_key?(:redirect)
24
17
  @auth = options[:auth] if options.has_key?(:auth)
18
+ @base = options[:base] if options.has_key?(:base)
25
19
  @content_type = options[:content_type] || "text/html"
26
20
  @body = options[:body]
27
21
 
@@ -40,7 +34,11 @@ module Anemone
40
34
  private
41
35
 
42
36
  def create_body
43
- @body = "<html><body>"
37
+ if @base
38
+ @body = "<html><head><base href=\"#{@base}\"></head><body>"
39
+ else
40
+ @body = "<html><body>"
41
+ end
44
42
  @links.each{|l| @body += "<a href=\"#{SPEC_DOMAIN}#{l}\"></a>"} if @links
45
43
  @hrefs.each{|h| @body += "<a href=\"#{h}\"></a>"} if @hrefs
46
44
  @body += "</body></html>"
@@ -1,4 +1,3 @@
1
- $:.unshift(File.dirname(__FILE__))
2
1
  require 'spec_helper'
3
2
 
4
3
  module Anemone
@@ -10,12 +9,7 @@ module Anemone
10
9
  end
11
10
 
12
11
  it "should still return a Page if an exception occurs during the HTTP connection" do
13
- class HTTP
14
- def refresh_connection
15
- raise "test exception"
16
- end
17
- end
18
-
12
+ HTTP.stub!(:refresh_connection).and_raise(StandardError)
19
13
  http = Anemone::HTTP.new
20
14
  http.fetch_page(SPEC_DOMAIN).should be_an_instance_of(Page)
21
15
  end
@@ -74,19 +74,103 @@ module Anemone
74
74
  @page.cookies.should == []
75
75
  end
76
76
 
77
- it "should have a to_hash method that converts the page to a hash" do
78
- hash = @page.to_hash
79
- hash['url'].should == @page.url.to_s
80
- hash['referer'].should == @page.referer.to_s
81
- hash['links'].should == @page.links.map(&:to_s)
82
- end
83
-
84
- it "should have a from_hash method to convert from a hash to a Page" do
85
- page = @page.dup
86
- page.depth = 1
87
- converted = Page.from_hash(page.to_hash)
88
- converted.links.should == page.links
89
- converted.depth.should == page.depth
77
+ describe "#to_hash" do
78
+ it "converts the page to a hash" do
79
+ hash = @page.to_hash
80
+ hash['url'].should == @page.url.to_s
81
+ hash['referer'].should == @page.referer.to_s
82
+ hash['links'].should == @page.links.map(&:to_s)
83
+ end
84
+
85
+ context "when redirect_to is nil" do
86
+ it "sets 'redirect_to' to nil in the hash" do
87
+ @page.redirect_to.should be_nil
88
+ @page.to_hash[:redirect_to].should be_nil
89
+ end
90
+ end
91
+
92
+ context "when redirect_to is a non-nil URI" do
93
+ it "sets 'redirect_to' to the URI string" do
94
+ new_page = Page.new(URI(SPEC_DOMAIN), {:redirect_to => URI(SPEC_DOMAIN + '1')})
95
+ new_page.redirect_to.to_s.should == SPEC_DOMAIN + '1'
96
+ new_page.to_hash['redirect_to'].should == SPEC_DOMAIN + '1'
97
+ end
98
+ end
99
+ end
100
+
101
+ describe "#from_hash" do
102
+ it "converts from a hash to a Page" do
103
+ page = @page.dup
104
+ page.depth = 1
105
+ converted = Page.from_hash(page.to_hash)
106
+ converted.links.should == page.links
107
+ converted.depth.should == page.depth
108
+ end
109
+
110
+ it 'handles a from_hash with a nil redirect_to' do
111
+ page_hash = @page.to_hash
112
+ page_hash['redirect_to'] = nil
113
+ lambda{Page.from_hash(page_hash)}.should_not raise_error(URI::InvalidURIError)
114
+ Page.from_hash(page_hash).redirect_to.should be_nil
115
+ end
116
+ end
117
+
118
+ describe "#redirect_to" do
119
+ context "when the page was a redirect" do
120
+ it "returns a URI of the page it redirects to" do
121
+ new_page = Page.new(URI(SPEC_DOMAIN), {:redirect_to => URI(SPEC_DOMAIN + '1')})
122
+ redirect = new_page.redirect_to
123
+ redirect.should be_a(URI)
124
+ redirect.to_s.should == SPEC_DOMAIN + '1'
125
+ end
126
+ end
127
+ end
128
+
129
+ it "should detect, store and expose the base url for the page head" do
130
+ base = "#{SPEC_DOMAIN}path/to/base_url/"
131
+ page = @http.fetch_page(FakePage.new('body_test', {:base => base}).url)
132
+ page.base.should == URI(base)
133
+ @page.base.should be_nil
134
+ end
135
+
136
+ it "should have a method to convert a relative url to an absolute one" do
137
+ @page.should respond_to(:to_absolute)
138
+
139
+ # Identity
140
+ @page.to_absolute(@page.url).should == @page.url
141
+ @page.to_absolute("").should == @page.url
142
+
143
+ # Root-ness
144
+ @page.to_absolute("/").should == URI("#{SPEC_DOMAIN}")
145
+
146
+ # Relativeness
147
+ relative_path = "a/relative/path"
148
+ @page.to_absolute(relative_path).should == URI("#{SPEC_DOMAIN}#{relative_path}")
149
+
150
+ deep_page = @http.fetch_page(FakePage.new('home/deep', :links => '1').url)
151
+ upward_relative_path = "../a/relative/path"
152
+ deep_page.to_absolute(upward_relative_path).should == URI("#{SPEC_DOMAIN}#{relative_path}")
153
+
154
+ # The base URL case
155
+ base_path = "path/to/base_url/"
156
+ base = "#{SPEC_DOMAIN}#{base_path}"
157
+ page = @http.fetch_page(FakePage.new('home', {:base => base}).url)
158
+
159
+ # Identity
160
+ page.to_absolute(page.url).should == page.url
161
+ # It should revert to the base url
162
+ page.to_absolute("").should_not == page.url
163
+
164
+ # Root-ness
165
+ page.to_absolute("/").should == URI("#{SPEC_DOMAIN}")
166
+
167
+ # Relativeness
168
+ relative_path = "a/relative/path"
169
+ page.to_absolute(relative_path).should == URI("#{base}#{relative_path}")
170
+
171
+ upward_relative_path = "../a/relative/path"
172
+ upward_base = "#{SPEC_DOMAIN}path/to/"
173
+ page.to_absolute(upward_relative_path).should == URI("#{upward_base}#{relative_path}")
90
174
  end
91
175
 
92
176
  end
@@ -1,6 +1,6 @@
1
1
  $:.unshift(File.dirname(__FILE__))
2
2
  require 'spec_helper'
3
- %w[pstore tokyo_cabinet mongodb redis].each { |file| require "anemone/storage/#{file}.rb" }
3
+ %w[pstore tokyo_cabinet sqlite3 mongodb redis].each { |file| require "anemone/storage/#{file}.rb" }
4
4
 
5
5
  module Anemone
6
6
  describe PageStore do
@@ -125,6 +125,24 @@ module Anemone
125
125
  end
126
126
  end
127
127
 
128
+ describe Storage::SQLite3 do
129
+ it_should_behave_like "page storage"
130
+
131
+ before(:each) do
132
+ @test_file = 'test.db'
133
+ File.delete(@test_file) if File.exists?(@test_file)
134
+ @opts = {:storage => @store = Storage.SQLite3(@test_file)}
135
+ end
136
+
137
+ after(:each) do
138
+ @store.close
139
+ end
140
+
141
+ after(:each) do
142
+ File.delete(@test_file) if File.exists?(@test_file)
143
+ end
144
+ end
145
+
128
146
  describe Storage::MongoDB do
129
147
  it_should_behave_like "page storage"
130
148
 
@@ -1,4 +1,6 @@
1
1
  require 'rubygems'
2
+ require 'bundler/setup'
3
+ require 'fakeweb'
2
4
  require File.dirname(__FILE__) + '/fakeweb_helper'
3
5
 
4
6
  $:.unshift(File.dirname(__FILE__) + '/../lib/')
@@ -1,7 +1,7 @@
1
1
  $:.unshift(File.dirname(__FILE__))
2
2
  require 'spec_helper'
3
3
 
4
- %w[pstore tokyo_cabinet mongodb redis].each { |file| require "anemone/storage/#{file}.rb" }
4
+ %w[pstore tokyo_cabinet kyoto_cabinet sqlite3 mongodb redis].each { |file| require "anemone/storage/#{file}.rb" }
5
5
 
6
6
  module Anemone
7
7
  describe Storage do
@@ -25,6 +25,14 @@ module Anemone
25
25
  store.close
26
26
  end
27
27
 
28
+ it "should have a class method to produce a SQLite3" do
29
+ test_file = 'test.db'
30
+ Anemone::Storage.should respond_to(:SQLite3)
31
+ store = Anemone::Storage.SQLite3(test_file)
32
+ store.should be_an_instance_of(Anemone::Storage::SQLite3)
33
+ store.close
34
+ end
35
+
28
36
  it "should have a class method to produce a MongoDB" do
29
37
  Anemone::Storage.should respond_to(:MongoDB)
30
38
  store = Anemone::Storage.MongoDB
@@ -105,6 +113,12 @@ module Anemone
105
113
 
106
114
  merged.should === @store
107
115
  end
116
+
117
+ it "should correctly deserialize nil redirect_to when loading" do
118
+ @page.redirect_to.should be_nil
119
+ @store[@url] = @page
120
+ @store[@url].redirect_to.should be_nil
121
+ end
108
122
  end
109
123
 
110
124
  describe PStore do
@@ -143,6 +157,47 @@ module Anemone
143
157
  end
144
158
  end
145
159
 
160
+ describe KyotoCabinet do
161
+ it_should_behave_like "storage engine"
162
+
163
+ before(:each) do
164
+ @test_file = 'test.kch'
165
+ File.delete @test_file rescue nil
166
+ @store = Anemone::Storage.KyotoCabinet(@test_file)
167
+ end
168
+
169
+ after(:each) do
170
+ @store.close
171
+ end
172
+
173
+ after(:all) do
174
+ File.delete @test_file rescue nil
175
+ end
176
+
177
+ it "should raise an error if supplied with a file extension other than .kch" do
178
+ lambda { Anemone::Storage.KyotoCabinet('test.tmp') }.should raise_error(RuntimeError)
179
+ end
180
+ end
181
+
182
+ describe SQLite3 do
183
+ it_should_behave_like "storage engine"
184
+
185
+ before(:each) do
186
+ @test_file = 'test.db'
187
+ File.delete @test_file rescue nil
188
+ @store = Anemone::Storage.SQLite3(@test_file)
189
+ end
190
+
191
+ after(:each) do
192
+ @store.close
193
+ end
194
+
195
+ after(:all) do
196
+ File.delete @test_file rescue nil
197
+ end
198
+
199
+ end
200
+
146
201
  describe Storage::MongoDB do
147
202
  it_should_behave_like "storage engine"
148
203
 
metadata CHANGED
@@ -1,139 +1,218 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: anemone
3
- version: !ruby/object:Gem::Version
4
- prerelease: false
5
- segments:
6
- - 0
7
- - 6
8
- - 1
9
- version: 0.6.1
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.7.0
5
+ prerelease:
10
6
  platform: ruby
11
- authors:
7
+ authors:
12
8
  - Chris Kite
13
9
  autorequire:
14
10
  bindir: bin
15
11
  cert_chain: []
16
-
17
- date: 2011-02-24 00:00:00 -06:00
18
- default_executable:
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
12
+ date: 2012-01-20 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
21
15
  name: nokogiri
22
- prerelease: false
23
- requirement: &id001 !ruby/object:Gem::Requirement
16
+ requirement: &19111780 !ruby/object:Gem::Requirement
24
17
  none: false
25
- requirements:
26
- - - ">="
27
- - !ruby/object:Gem::Version
28
- segments:
29
- - 1
30
- - 3
31
- - 0
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
32
21
  version: 1.3.0
33
22
  type: :runtime
34
- version_requirements: *id001
35
- - !ruby/object:Gem::Dependency
36
- name: robots
37
23
  prerelease: false
38
- requirement: &id002 !ruby/object:Gem::Requirement
24
+ version_requirements: *19111780
25
+ - !ruby/object:Gem::Dependency
26
+ name: robots
27
+ requirement: &19111300 !ruby/object:Gem::Requirement
39
28
  none: false
40
- requirements:
41
- - - ">="
42
- - !ruby/object:Gem::Version
43
- segments:
44
- - 0
45
- - 7
46
- - 2
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
47
32
  version: 0.7.2
48
33
  type: :runtime
49
- version_requirements: *id002
34
+ prerelease: false
35
+ version_requirements: *19111300
36
+ - !ruby/object:Gem::Dependency
37
+ name: rake
38
+ requirement: &19141340 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: 0.8.7
44
+ type: :development
45
+ prerelease: false
46
+ version_requirements: *19141340
47
+ - !ruby/object:Gem::Dependency
48
+ name: rspec
49
+ requirement: &19140880 !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: 2.6.0
55
+ type: :development
56
+ prerelease: false
57
+ version_requirements: *19140880
58
+ - !ruby/object:Gem::Dependency
59
+ name: fakeweb
60
+ requirement: &19140420 !ruby/object:Gem::Requirement
61
+ none: false
62
+ requirements:
63
+ - - ! '>='
64
+ - !ruby/object:Gem::Version
65
+ version: 1.3.0
66
+ type: :development
67
+ prerelease: false
68
+ version_requirements: *19140420
69
+ - !ruby/object:Gem::Dependency
70
+ name: redis
71
+ requirement: &19139960 !ruby/object:Gem::Requirement
72
+ none: false
73
+ requirements:
74
+ - - ! '>='
75
+ - !ruby/object:Gem::Version
76
+ version: 2.2.0
77
+ type: :development
78
+ prerelease: false
79
+ version_requirements: *19139960
80
+ - !ruby/object:Gem::Dependency
81
+ name: mongo
82
+ requirement: &19139500 !ruby/object:Gem::Requirement
83
+ none: false
84
+ requirements:
85
+ - - ! '>='
86
+ - !ruby/object:Gem::Version
87
+ version: 1.3.1
88
+ type: :development
89
+ prerelease: false
90
+ version_requirements: *19139500
91
+ - !ruby/object:Gem::Dependency
92
+ name: bson_ext
93
+ requirement: &19139040 !ruby/object:Gem::Requirement
94
+ none: false
95
+ requirements:
96
+ - - ! '>='
97
+ - !ruby/object:Gem::Version
98
+ version: 1.3.1
99
+ type: :development
100
+ prerelease: false
101
+ version_requirements: *19139040
102
+ - !ruby/object:Gem::Dependency
103
+ name: tokyocabinet
104
+ requirement: &19138580 !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '1.29'
110
+ type: :development
111
+ prerelease: false
112
+ version_requirements: *19138580
113
+ - !ruby/object:Gem::Dependency
114
+ name: kyotocabinet-ruby
115
+ requirement: &19138120 !ruby/object:Gem::Requirement
116
+ none: false
117
+ requirements:
118
+ - - ! '>='
119
+ - !ruby/object:Gem::Version
120
+ version: 1.27.1
121
+ type: :development
122
+ prerelease: false
123
+ version_requirements: *19138120
124
+ - !ruby/object:Gem::Dependency
125
+ name: sqlite3
126
+ requirement: &19137660 !ruby/object:Gem::Requirement
127
+ none: false
128
+ requirements:
129
+ - - ! '>='
130
+ - !ruby/object:Gem::Version
131
+ version: 1.3.4
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: *19137660
50
135
  description:
51
136
  email:
52
- executables:
137
+ executables:
53
138
  - anemone
54
139
  extensions: []
55
-
56
- extra_rdoc_files:
140
+ extra_rdoc_files:
57
141
  - README.rdoc
58
- files:
142
+ files:
59
143
  - VERSION
60
144
  - LICENSE.txt
61
145
  - CHANGELOG.rdoc
62
146
  - README.rdoc
63
147
  - Rakefile
64
- - lib/anemone.rb
65
- - lib/anemone/cookie_store.rb
66
- - lib/anemone/storage.rb
67
- - lib/anemone/core.rb
68
- - lib/anemone/cli.rb
69
- - lib/anemone/exceptions.rb
70
- - lib/anemone/tentacle.rb
71
- - lib/anemone/storage/tokyo_cabinet.rb
72
- - lib/anemone/storage/base.rb
73
- - lib/anemone/storage/exceptions.rb
74
148
  - lib/anemone/storage/pstore.rb
75
149
  - lib/anemone/storage/mongodb.rb
150
+ - lib/anemone/storage/tokyo_cabinet.rb
151
+ - lib/anemone/storage/exceptions.rb
76
152
  - lib/anemone/storage/redis.rb
77
- - lib/anemone/http.rb
153
+ - lib/anemone/storage/sqlite3.rb
154
+ - lib/anemone/storage/base.rb
155
+ - lib/anemone/storage/kyoto_cabinet.rb
78
156
  - lib/anemone/page_store.rb
79
- - lib/anemone/cli/cron.rb
80
- - lib/anemone/cli/pagedepth.rb
81
- - lib/anemone/cli/count.rb
157
+ - lib/anemone/storage.rb
158
+ - lib/anemone/tentacle.rb
159
+ - lib/anemone/http.rb
160
+ - lib/anemone/cli.rb
161
+ - lib/anemone/page.rb
162
+ - lib/anemone/exceptions.rb
163
+ - lib/anemone/core.rb
82
164
  - lib/anemone/cli/url_list.rb
83
165
  - lib/anemone/cli/serialize.rb
84
- - lib/anemone/page.rb
85
- - spec/http_spec.rb
86
- - spec/page_store_spec.rb
87
- - spec/core_spec.rb
166
+ - lib/anemone/cli/count.rb
167
+ - lib/anemone/cli/cron.rb
168
+ - lib/anemone/cli/pagedepth.rb
169
+ - lib/anemone/cookie_store.rb
170
+ - lib/anemone.rb
88
171
  - spec/fakeweb_helper.rb
89
172
  - spec/page_spec.rb
90
- - spec/cookie_store_spec.rb
91
173
  - spec/anemone_spec.rb
92
- - spec/spec_helper.rb
174
+ - spec/core_spec.rb
93
175
  - spec/storage_spec.rb
176
+ - spec/page_store_spec.rb
177
+ - spec/cookie_store_spec.rb
178
+ - spec/http_spec.rb
179
+ - spec/spec_helper.rb
94
180
  - bin/anemone
95
- has_rdoc: true
96
181
  homepage: http://anemone.rubyforge.org
97
182
  licenses: []
98
-
99
183
  post_install_message:
100
- rdoc_options:
184
+ rdoc_options:
101
185
  - -m
102
186
  - README.rdoc
103
187
  - -t
104
188
  - Anemone
105
- require_paths:
189
+ require_paths:
106
190
  - lib
107
- required_ruby_version: !ruby/object:Gem::Requirement
191
+ required_ruby_version: !ruby/object:Gem::Requirement
108
192
  none: false
109
- requirements:
110
- - - ">="
111
- - !ruby/object:Gem::Version
112
- segments:
113
- - 0
114
- version: "0"
115
- required_rubygems_version: !ruby/object:Gem::Requirement
193
+ requirements:
194
+ - - ! '>='
195
+ - !ruby/object:Gem::Version
196
+ version: '0'
197
+ required_rubygems_version: !ruby/object:Gem::Requirement
116
198
  none: false
117
- requirements:
118
- - - ">="
119
- - !ruby/object:Gem::Version
120
- segments:
121
- - 0
122
- version: "0"
199
+ requirements:
200
+ - - ! '>='
201
+ - !ruby/object:Gem::Version
202
+ version: '0'
123
203
  requirements: []
124
-
125
204
  rubyforge_project: anemone
126
- rubygems_version: 1.3.7
205
+ rubygems_version: 1.8.15
127
206
  signing_key:
128
207
  specification_version: 3
129
208
  summary: Anemone web-spider framework
130
- test_files:
131
- - spec/http_spec.rb
132
- - spec/page_store_spec.rb
133
- - spec/core_spec.rb
209
+ test_files:
134
210
  - spec/fakeweb_helper.rb
135
211
  - spec/page_spec.rb
136
- - spec/cookie_store_spec.rb
137
212
  - spec/anemone_spec.rb
138
- - spec/spec_helper.rb
213
+ - spec/core_spec.rb
139
214
  - spec/storage_spec.rb
215
+ - spec/page_store_spec.rb
216
+ - spec/cookie_store_spec.rb
217
+ - spec/http_spec.rb
218
+ - spec/spec_helper.rb