sutch-anemone 0.7.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,75 @@
1
+ require 'anemone/storage/exceptions'
2
+
3
+ module Anemone
4
+ module Storage
5
+ class Base
6
+
7
+ def initialize(adapter)
8
+ @adap = adapter
9
+
10
+ # verify adapter conforms to this class's methods
11
+ methods.each do |method|
12
+ if !@adap.respond_to?(method.to_sym)
13
+ raise "Storage adapter must support method #{method}"
14
+ end
15
+ end
16
+ end
17
+
18
+ def [](key)
19
+ @adap[key]
20
+ rescue
21
+ puts key
22
+ raise RetrievalError, $!
23
+ end
24
+
25
+ def []=(key, value)
26
+ @adap[key] = value
27
+ rescue
28
+ raise InsertionError, $!
29
+ end
30
+
31
+ def delete(key)
32
+ @adap.delete(key)
33
+ rescue
34
+ raise DeletionError, $!
35
+ end
36
+
37
+ def each
38
+ @adap.each { |k, v| yield k, v }
39
+ rescue
40
+ raise GenericError, $!
41
+ end
42
+
43
+ def merge!(hash)
44
+ @adap.merge!(hash)
45
+ rescue
46
+ raise GenericError, $!
47
+ end
48
+
49
+ def close
50
+ @adap.close
51
+ rescue
52
+ raise CloseError, $!
53
+ end
54
+
55
+ def size
56
+ @adap.size
57
+ rescue
58
+ raise GenericError, $!
59
+ end
60
+
61
+ def keys
62
+ @adap.keys
63
+ rescue
64
+ raise GenericError, $!
65
+ end
66
+
67
+ def has_key?(key)
68
+ @adap.has_key?(key)
69
+ rescue
70
+ raise GenericError, $!
71
+ end
72
+
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,15 @@
1
+ module Anemone
2
+ module Storage
3
+
4
+ class GenericError < Error; end;
5
+
6
+ class ConnectionError < Error; end
7
+
8
+ class RetrievalError < Error; end
9
+
10
+ class InsertionError < Error; end
11
+
12
+ class CloseError < Error; end
13
+
14
+ end
15
+ end
@@ -0,0 +1,72 @@
1
+ begin
2
+ require 'kyotocabinet'
3
+ rescue LoadError
4
+ puts $!
5
+ puts "You need the kyotocabinet-ruby gem to use Anemone::Storage::KyotoCabinet"
6
+ exit
7
+ end
8
+
9
+ require 'forwardable'
10
+
11
+ module Anemone
12
+ module Storage
13
+ class KyotoCabinet
14
+ extend Forwardable
15
+
16
+ def_delegators :@db, :close, :size, :each
17
+
18
+ def initialize(file)
19
+ raise "KyotoCabinet filename must have .kch extension" if File.extname(file) != '.kch'
20
+ @db = ::KyotoCabinet::DB::new
21
+ @db.open(file, ::KyotoCabinet::DB::OWRITER | ::KyotoCabinet::DB::OCREATE)
22
+ @db.clear
23
+ end
24
+
25
+ def [](key)
26
+ if value = @db[key]
27
+ load_value(value)
28
+ end
29
+ end
30
+
31
+ def []=(key, value)
32
+ @db[key] = [Marshal.dump(value)].pack("m")
33
+ end
34
+
35
+ def each
36
+ @db.each do |k, v|
37
+ yield(k, load_value(v))
38
+ end
39
+ end
40
+
41
+ def has_key?(key)
42
+ # Kyoto Cabinet doesn't have a way to query whether a key exists, so hack it
43
+ keys = @db.match_prefix(key)
44
+ !!keys && keys.include?(key)
45
+ end
46
+
47
+ def keys
48
+ acc = []
49
+ @db.each_key { |key| acc << key.first }
50
+ acc
51
+ end
52
+
53
+ def delete(key)
54
+ value = self[key]
55
+ @db.delete(key)
56
+ value
57
+ end
58
+
59
+ def merge!(hash)
60
+ hash.each { |key, value| self[key] = value }
61
+ self
62
+ end
63
+
64
+ private
65
+
66
+ def load_value(value)
67
+ Marshal.load(value.unpack("m")[0])
68
+ end
69
+
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,89 @@
1
+ begin
2
+ require 'mongo'
3
+ rescue LoadError
4
+ puts "You need the mongo gem to use Anemone::Storage::MongoDB"
5
+ exit
6
+ end
7
+
8
+ module Anemone
9
+ module Storage
10
+ class MongoDB
11
+
12
+ BINARY_FIELDS = %w(body headers data)
13
+
14
+ def initialize(mongo_db, collection_name)
15
+ @db = mongo_db
16
+ @collection = @db[collection_name]
17
+ @collection.remove
18
+ @collection.create_index 'url'
19
+ end
20
+
21
+ def [](url)
22
+ if value = @collection.find_one('url' => url.to_s)
23
+ load_page(value)
24
+ end
25
+ end
26
+
27
+ def []=(url, page)
28
+ hash = page.to_hash
29
+ BINARY_FIELDS.each do |field|
30
+ hash[field] = BSON::Binary.new(hash[field]) unless hash[field].nil?
31
+ end
32
+ @collection.update(
33
+ {'url' => page.url.to_s},
34
+ hash,
35
+ :upsert => true
36
+ )
37
+ end
38
+
39
+ def delete(url)
40
+ page = self[url]
41
+ @collection.remove('url' => url.to_s)
42
+ page
43
+ end
44
+
45
+ def each
46
+ @collection.find do |cursor|
47
+ cursor.each do |doc|
48
+ page = load_page(doc)
49
+ yield page.url.to_s, page
50
+ end
51
+ end
52
+ end
53
+
54
+ def merge!(hash)
55
+ hash.each { |key, value| self[key] = value }
56
+ self
57
+ end
58
+
59
+ def size
60
+ @collection.count
61
+ end
62
+
63
+ def keys
64
+ keys = []
65
+ self.each { |k, v| keys << k.to_s }
66
+ keys
67
+ end
68
+
69
+ def has_key?(url)
70
+ !!@collection.find_one('url' => url.to_s)
71
+ end
72
+
73
+ def close
74
+ @db.connection.close
75
+ end
76
+
77
+ private
78
+
79
+ def load_page(hash)
80
+ BINARY_FIELDS.each do |field|
81
+ hash[field] = hash[field].to_s
82
+ end
83
+ Page.from_hash(hash)
84
+ end
85
+
86
+ end
87
+ end
88
+ end
89
+
@@ -0,0 +1,50 @@
1
+ require 'pstore'
2
+ require 'forwardable'
3
+
4
+ module Anemone
5
+ module Storage
6
+ class PStore
7
+ extend Forwardable
8
+
9
+ def_delegators :@keys, :has_key?, :keys, :size
10
+
11
+ def initialize(file)
12
+ File.delete(file) if File.exists?(file)
13
+ @store = ::PStore.new(file)
14
+ @keys = {}
15
+ end
16
+
17
+ def [](key)
18
+ @store.transaction { |s| s[key] }
19
+ end
20
+
21
+ def []=(key,value)
22
+ @keys[key] = nil
23
+ @store.transaction { |s| s[key] = value }
24
+ end
25
+
26
+ def delete(key)
27
+ @keys.delete(key)
28
+ @store.transaction { |s| s.delete key}
29
+ end
30
+
31
+ def each
32
+ @keys.each_key do |key|
33
+ value = nil
34
+ @store.transaction { |s| value = s[key] }
35
+ yield key, value
36
+ end
37
+ end
38
+
39
+ def merge!(hash)
40
+ @store.transaction do |s|
41
+ hash.each { |key, value| s[key] = value; @keys[key] = nil }
42
+ end
43
+ self
44
+ end
45
+
46
+ def close; end
47
+
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,90 @@
1
+ require 'redis'
2
+
3
+ module Anemone
4
+ module Storage
5
+ class Redis
6
+
7
+ MARSHAL_FIELDS = %w(links visited fetched)
8
+
9
+ def initialize(opts = {})
10
+ @redis = ::Redis.new(opts)
11
+ @key_prefix = opts[:key_prefix] || 'anemone'
12
+ keys.each { |key| delete(key) }
13
+ end
14
+
15
+ def [](key)
16
+ rkey = "#{@key_prefix}:pages:#{key.to_s}"
17
+ rget(rkey)
18
+ end
19
+
20
+ def []=(key, value)
21
+ rkey = "#{@key_prefix}:pages:#{key.to_s}"
22
+ hash = value.to_hash
23
+ MARSHAL_FIELDS.each do |field|
24
+ hash[field] = Marshal.dump(hash[field])
25
+ end
26
+ hash.each do |field, value|
27
+ @redis.hset(rkey, field, value)
28
+ end
29
+ end
30
+
31
+ def delete(key)
32
+ rkey = "#{@key_prefix}:pages:#{key.to_s}"
33
+ page = self[key]
34
+ @redis.del(rkey)
35
+ page
36
+ end
37
+
38
+ def each
39
+ rkeys = @redis.keys("#{@key_prefix}:pages:*")
40
+ rkeys.each do |rkey|
41
+ page = rget(rkey)
42
+ yield page.url.to_s, page
43
+ end
44
+ end
45
+
46
+ def merge!(hash)
47
+ hash.each { |key, value| self[key] = value }
48
+ self
49
+ end
50
+
51
+ def size
52
+ @redis.keys("#{@key_prefix}:pages:*").size
53
+ end
54
+
55
+ def keys
56
+ keys = []
57
+ self.each { |k, v| keys << k.to_s }
58
+ keys
59
+ end
60
+
61
+ def has_key?(key)
62
+ rkey = "#{@key_prefix}:pages:#{key.to_s}"
63
+ @redis.exists(rkey)
64
+ end
65
+
66
+ def close
67
+ @redis.quit
68
+ end
69
+
70
+ private
71
+
72
+ def load_value(hash)
73
+ MARSHAL_FIELDS.each do |field|
74
+ unless hash[field].nil? || hash[field] == ''
75
+ hash[field] = Marshal.load(hash[field])
76
+ end
77
+ end
78
+ Page.from_hash(hash)
79
+ end
80
+
81
+ def rget(rkey)
82
+ hash = @redis.hgetall(rkey)
83
+ if !!hash
84
+ load_value(hash)
85
+ end
86
+ end
87
+
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,90 @@
1
+ begin
2
+ require 'sqlite3'
3
+ rescue LoadError
4
+ puts "You need the sqlite3 gem to use Anemone::Storage::SQLite3"
5
+ exit
6
+ end
7
+
8
+ module Anemone
9
+ module Storage
10
+ class SQLite3
11
+
12
+ def initialize(file)
13
+ @db = ::SQLite3::Database.new(file)
14
+ create_schema
15
+ end
16
+
17
+ def [](url)
18
+ value = @db.get_first_value('SELECT data FROM anemone_storage WHERE key = ?', url.to_s)
19
+ if value
20
+ Marshal.load(value)
21
+ end
22
+ end
23
+
24
+ def []=(url, value)
25
+ data = Marshal.dump(value)
26
+ if has_key?(url)
27
+ @db.execute('UPDATE anemone_storage SET data = ? WHERE key = ?', data, url.to_s)
28
+ else
29
+ @db.execute('INSERT INTO anemone_storage (data, key) VALUES(?, ?)', data, url.to_s)
30
+ end
31
+ end
32
+
33
+ def delete(url)
34
+ page = self[url]
35
+ @db.execute('DELETE FROM anemone_storage WHERE key = ?', url.to_s)
36
+ page
37
+ end
38
+
39
+ def each
40
+ @db.execute("SELECT key, data FROM anemone_storage ORDER BY id") do |row|
41
+ value = Marshal.load(row[1])
42
+ yield row[0], value
43
+ end
44
+ end
45
+
46
+ def merge!(hash)
47
+ hash.each { |key, value| self[key] = value }
48
+ self
49
+ end
50
+
51
+ def size
52
+ @db.get_first_value('SELECT COUNT(id) FROM anemone_storage')
53
+ end
54
+
55
+ def keys
56
+ @db.execute("SELECT key FROM anemone_storage ORDER BY id").map{|t| t[0]}
57
+ end
58
+
59
+ def has_key?(url)
60
+ !!@db.get_first_value('SELECT id FROM anemone_storage WHERE key = ?', url.to_s)
61
+ end
62
+
63
+ def close
64
+ @db.close
65
+ end
66
+
67
+ private
68
+
69
+ def create_schema
70
+ @db.execute_batch <<SQL
71
+ create table if not exists anemone_storage (
72
+ id INTEGER PRIMARY KEY ASC,
73
+ key TEXT,
74
+ data BLOB
75
+ );
76
+ create index if not exists anemone_key_idx on anemone_storage (key);
77
+ SQL
78
+ end
79
+
80
+ def load_page(hash)
81
+ BINARY_FIELDS.each do |field|
82
+ hash[field] = hash[field].to_s
83
+ end
84
+ Page.from_hash(hash)
85
+ end
86
+
87
+ end
88
+ end
89
+ end
90
+