sutch-anemone 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,75 @@
1
+ require 'anemone/storage/exceptions'
2
+
3
+ module Anemone
4
+ module Storage
5
+ class Base
6
+
7
+ def initialize(adapter)
8
+ @adap = adapter
9
+
10
+ # verify adapter conforms to this class's methods
11
+ methods.each do |method|
12
+ if !@adap.respond_to?(method.to_sym)
13
+ raise "Storage adapter must support method #{method}"
14
+ end
15
+ end
16
+ end
17
+
18
+ def [](key)
19
+ @adap[key]
20
+ rescue
21
+ puts key
22
+ raise RetrievalError, $!
23
+ end
24
+
25
+ def []=(key, value)
26
+ @adap[key] = value
27
+ rescue
28
+ raise InsertionError, $!
29
+ end
30
+
31
+ def delete(key)
32
+ @adap.delete(key)
33
+ rescue
34
+ raise DeletionError, $!
35
+ end
36
+
37
+ def each
38
+ @adap.each { |k, v| yield k, v }
39
+ rescue
40
+ raise GenericError, $!
41
+ end
42
+
43
+ def merge!(hash)
44
+ @adap.merge!(hash)
45
+ rescue
46
+ raise GenericError, $!
47
+ end
48
+
49
+ def close
50
+ @adap.close
51
+ rescue
52
+ raise CloseError, $!
53
+ end
54
+
55
+ def size
56
+ @adap.size
57
+ rescue
58
+ raise GenericError, $!
59
+ end
60
+
61
+ def keys
62
+ @adap.keys
63
+ rescue
64
+ raise GenericError, $!
65
+ end
66
+
67
+ def has_key?(key)
68
+ @adap.has_key?(key)
69
+ rescue
70
+ raise GenericError, $!
71
+ end
72
+
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,15 @@
1
+ module Anemone
2
+ module Storage
3
+
4
+ class GenericError < Error; end;
5
+
6
+ class ConnectionError < Error; end
7
+
8
+ class RetrievalError < Error; end
9
+
10
+ class InsertionError < Error; end
11
+
12
+ class CloseError < Error; end
13
+
14
+ end
15
+ end
@@ -0,0 +1,72 @@
1
+ begin
2
+ require 'kyotocabinet'
3
+ rescue LoadError
4
+ puts $!
5
+ puts "You need the kyotocabinet-ruby gem to use Anemone::Storage::KyotoCabinet"
6
+ exit
7
+ end
8
+
9
+ require 'forwardable'
10
+
11
+ module Anemone
12
+ module Storage
13
+ class KyotoCabinet
14
+ extend Forwardable
15
+
16
+ def_delegators :@db, :close, :size, :each
17
+
18
+ def initialize(file)
19
+ raise "KyotoCabinet filename must have .kch extension" if File.extname(file) != '.kch'
20
+ @db = ::KyotoCabinet::DB::new
21
+ @db.open(file, ::KyotoCabinet::DB::OWRITER | ::KyotoCabinet::DB::OCREATE)
22
+ @db.clear
23
+ end
24
+
25
+ def [](key)
26
+ if value = @db[key]
27
+ load_value(value)
28
+ end
29
+ end
30
+
31
+ def []=(key, value)
32
+ @db[key] = [Marshal.dump(value)].pack("m")
33
+ end
34
+
35
+ def each
36
+ @db.each do |k, v|
37
+ yield(k, load_value(v))
38
+ end
39
+ end
40
+
41
+ def has_key?(key)
42
+ # Kyoto Cabinet doesn't have a way to query whether a key exists, so hack it
43
+ keys = @db.match_prefix(key)
44
+ !!keys && keys.include?(key)
45
+ end
46
+
47
+ def keys
48
+ acc = []
49
+ @db.each_key { |key| acc << key.first }
50
+ acc
51
+ end
52
+
53
+ def delete(key)
54
+ value = self[key]
55
+ @db.delete(key)
56
+ value
57
+ end
58
+
59
+ def merge!(hash)
60
+ hash.each { |key, value| self[key] = value }
61
+ self
62
+ end
63
+
64
+ private
65
+
66
+ def load_value(value)
67
+ Marshal.load(value.unpack("m")[0])
68
+ end
69
+
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,89 @@
1
+ begin
2
+ require 'mongo'
3
+ rescue LoadError
4
+ puts "You need the mongo gem to use Anemone::Storage::MongoDB"
5
+ exit
6
+ end
7
+
8
+ module Anemone
9
+ module Storage
10
+ class MongoDB
11
+
12
+ BINARY_FIELDS = %w(body headers data)
13
+
14
+ def initialize(mongo_db, collection_name)
15
+ @db = mongo_db
16
+ @collection = @db[collection_name]
17
+ @collection.remove
18
+ @collection.create_index 'url'
19
+ end
20
+
21
+ def [](url)
22
+ if value = @collection.find_one('url' => url.to_s)
23
+ load_page(value)
24
+ end
25
+ end
26
+
27
+ def []=(url, page)
28
+ hash = page.to_hash
29
+ BINARY_FIELDS.each do |field|
30
+ hash[field] = BSON::Binary.new(hash[field]) unless hash[field].nil?
31
+ end
32
+ @collection.update(
33
+ {'url' => page.url.to_s},
34
+ hash,
35
+ :upsert => true
36
+ )
37
+ end
38
+
39
+ def delete(url)
40
+ page = self[url]
41
+ @collection.remove('url' => url.to_s)
42
+ page
43
+ end
44
+
45
+ def each
46
+ @collection.find do |cursor|
47
+ cursor.each do |doc|
48
+ page = load_page(doc)
49
+ yield page.url.to_s, page
50
+ end
51
+ end
52
+ end
53
+
54
+ def merge!(hash)
55
+ hash.each { |key, value| self[key] = value }
56
+ self
57
+ end
58
+
59
+ def size
60
+ @collection.count
61
+ end
62
+
63
+ def keys
64
+ keys = []
65
+ self.each { |k, v| keys << k.to_s }
66
+ keys
67
+ end
68
+
69
+ def has_key?(url)
70
+ !!@collection.find_one('url' => url.to_s)
71
+ end
72
+
73
+ def close
74
+ @db.connection.close
75
+ end
76
+
77
+ private
78
+
79
+ def load_page(hash)
80
+ BINARY_FIELDS.each do |field|
81
+ hash[field] = hash[field].to_s
82
+ end
83
+ Page.from_hash(hash)
84
+ end
85
+
86
+ end
87
+ end
88
+ end
89
+
@@ -0,0 +1,50 @@
1
+ require 'pstore'
2
+ require 'forwardable'
3
+
4
+ module Anemone
5
+ module Storage
6
+ class PStore
7
+ extend Forwardable
8
+
9
+ def_delegators :@keys, :has_key?, :keys, :size
10
+
11
+ def initialize(file)
12
+ File.delete(file) if File.exists?(file)
13
+ @store = ::PStore.new(file)
14
+ @keys = {}
15
+ end
16
+
17
+ def [](key)
18
+ @store.transaction { |s| s[key] }
19
+ end
20
+
21
+ def []=(key,value)
22
+ @keys[key] = nil
23
+ @store.transaction { |s| s[key] = value }
24
+ end
25
+
26
+ def delete(key)
27
+ @keys.delete(key)
28
+ @store.transaction { |s| s.delete key}
29
+ end
30
+
31
+ def each
32
+ @keys.each_key do |key|
33
+ value = nil
34
+ @store.transaction { |s| value = s[key] }
35
+ yield key, value
36
+ end
37
+ end
38
+
39
+ def merge!(hash)
40
+ @store.transaction do |s|
41
+ hash.each { |key, value| s[key] = value; @keys[key] = nil }
42
+ end
43
+ self
44
+ end
45
+
46
+ def close; end
47
+
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,90 @@
1
+ require 'redis'
2
+
3
+ module Anemone
4
+ module Storage
5
+ class Redis
6
+
7
+ MARSHAL_FIELDS = %w(links visited fetched)
8
+
9
+ def initialize(opts = {})
10
+ @redis = ::Redis.new(opts)
11
+ @key_prefix = opts[:key_prefix] || 'anemone'
12
+ keys.each { |key| delete(key) }
13
+ end
14
+
15
+ def [](key)
16
+ rkey = "#{@key_prefix}:pages:#{key.to_s}"
17
+ rget(rkey)
18
+ end
19
+
20
+ def []=(key, value)
21
+ rkey = "#{@key_prefix}:pages:#{key.to_s}"
22
+ hash = value.to_hash
23
+ MARSHAL_FIELDS.each do |field|
24
+ hash[field] = Marshal.dump(hash[field])
25
+ end
26
+ hash.each do |field, value|
27
+ @redis.hset(rkey, field, value)
28
+ end
29
+ end
30
+
31
+ def delete(key)
32
+ rkey = "#{@key_prefix}:pages:#{key.to_s}"
33
+ page = self[key]
34
+ @redis.del(rkey)
35
+ page
36
+ end
37
+
38
+ def each
39
+ rkeys = @redis.keys("#{@key_prefix}:pages:*")
40
+ rkeys.each do |rkey|
41
+ page = rget(rkey)
42
+ yield page.url.to_s, page
43
+ end
44
+ end
45
+
46
+ def merge!(hash)
47
+ hash.each { |key, value| self[key] = value }
48
+ self
49
+ end
50
+
51
+ def size
52
+ @redis.keys("#{@key_prefix}:pages:*").size
53
+ end
54
+
55
+ def keys
56
+ keys = []
57
+ self.each { |k, v| keys << k.to_s }
58
+ keys
59
+ end
60
+
61
+ def has_key?(key)
62
+ rkey = "#{@key_prefix}:pages:#{key.to_s}"
63
+ @redis.exists(rkey)
64
+ end
65
+
66
+ def close
67
+ @redis.quit
68
+ end
69
+
70
+ private
71
+
72
+ def load_value(hash)
73
+ MARSHAL_FIELDS.each do |field|
74
+ unless hash[field].nil? || hash[field] == ''
75
+ hash[field] = Marshal.load(hash[field])
76
+ end
77
+ end
78
+ Page.from_hash(hash)
79
+ end
80
+
81
+ def rget(rkey)
82
+ hash = @redis.hgetall(rkey)
83
+ if !!hash
84
+ load_value(hash)
85
+ end
86
+ end
87
+
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,90 @@
1
+ begin
2
+ require 'sqlite3'
3
+ rescue LoadError
4
+ puts "You need the sqlite3 gem to use Anemone::Storage::SQLite3"
5
+ exit
6
+ end
7
+
8
+ module Anemone
9
+ module Storage
10
+ class SQLite3
11
+
12
+ def initialize(file)
13
+ @db = ::SQLite3::Database.new(file)
14
+ create_schema
15
+ end
16
+
17
+ def [](url)
18
+ value = @db.get_first_value('SELECT data FROM anemone_storage WHERE key = ?', url.to_s)
19
+ if value
20
+ Marshal.load(value)
21
+ end
22
+ end
23
+
24
+ def []=(url, value)
25
+ data = Marshal.dump(value)
26
+ if has_key?(url)
27
+ @db.execute('UPDATE anemone_storage SET data = ? WHERE key = ?', data, url.to_s)
28
+ else
29
+ @db.execute('INSERT INTO anemone_storage (data, key) VALUES(?, ?)', data, url.to_s)
30
+ end
31
+ end
32
+
33
+ def delete(url)
34
+ page = self[url]
35
+ @db.execute('DELETE FROM anemone_storage WHERE key = ?', url.to_s)
36
+ page
37
+ end
38
+
39
+ def each
40
+ @db.execute("SELECT key, data FROM anemone_storage ORDER BY id") do |row|
41
+ value = Marshal.load(row[1])
42
+ yield row[0], value
43
+ end
44
+ end
45
+
46
+ def merge!(hash)
47
+ hash.each { |key, value| self[key] = value }
48
+ self
49
+ end
50
+
51
+ def size
52
+ @db.get_first_value('SELECT COUNT(id) FROM anemone_storage')
53
+ end
54
+
55
+ def keys
56
+ @db.execute("SELECT key FROM anemone_storage ORDER BY id").map{|t| t[0]}
57
+ end
58
+
59
+ def has_key?(url)
60
+ !!@db.get_first_value('SELECT id FROM anemone_storage WHERE key = ?', url.to_s)
61
+ end
62
+
63
+ def close
64
+ @db.close
65
+ end
66
+
67
+ private
68
+
69
+ def create_schema
70
+ @db.execute_batch <<SQL
71
+ create table if not exists anemone_storage (
72
+ id INTEGER PRIMARY KEY ASC,
73
+ key TEXT,
74
+ data BLOB
75
+ );
76
+ create index if not exists anemone_key_idx on anemone_storage (key);
77
+ SQL
78
+ end
79
+
80
+ def load_page(hash)
81
+ BINARY_FIELDS.each do |field|
82
+ hash[field] = hash[field].to_s
83
+ end
84
+ Page.from_hash(hash)
85
+ end
86
+
87
+ end
88
+ end
89
+ end
90
+