perobs 3.0.2 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +18 -17
- data/lib/perobs/BTree.rb +9 -44
- data/lib/perobs/BTreeNode.rb +116 -88
- data/lib/perobs/BTreeNodeCache.rb +10 -8
- data/lib/perobs/BTreeNodeLink.rb +1 -1
- data/lib/perobs/Cache.rb +14 -14
- data/lib/perobs/DynamoDB.rb +1 -1
- data/lib/perobs/EquiBlobsFile.rb +7 -2
- data/lib/perobs/FlatFile.rb +28 -49
- data/lib/perobs/FlatFileBlobHeader.rb +1 -19
- data/lib/perobs/FlatFileDB.rb +5 -0
- data/lib/perobs/LockFile.rb +3 -0
- data/lib/perobs/Object.rb +8 -3
- data/lib/perobs/ObjectBase.rb +6 -4
- data/lib/perobs/PersistentObjectCache.rb +153 -0
- data/lib/perobs/PersistentObjectCacheLine.rb +87 -0
- data/lib/perobs/SpaceTree.rb +5 -3
- data/lib/perobs/SpaceTreeNode.rb +15 -8
- data/lib/perobs/Store.rb +41 -13
- data/lib/perobs/version.rb +1 -1
- data/test/Array_spec.rb +38 -38
- data/test/BTree_spec.rb +45 -0
- data/test/EquiBlobsFile_spec.rb +0 -4
- data/test/FlatFileDB_spec.rb +1 -1
- data/test/Hash_spec.rb +14 -13
- data/test/Object_spec.rb +5 -5
- data/test/Store_spec.rb +62 -19
- data/test/perobs_spec.rb +7 -3
- metadata +4 -3
- data/lib/perobs/SpaceTreeNodeCache.rb +0 -149
@@ -0,0 +1,153 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# = PersistentObjectCache.rb -- Persistent Ruby Object Store
|
4
|
+
#
|
5
|
+
# Copyright (c) 2016, 2017 by Chris Schlaeger <chris@taskjuggler.org>
|
6
|
+
#
|
7
|
+
# MIT License
|
8
|
+
#
|
9
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
10
|
+
# a copy of this software and associated documentation files (the
|
11
|
+
# "Software"), to deal in the Software without restriction, including
|
12
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
13
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
14
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
15
|
+
# the following conditions:
|
16
|
+
#
|
17
|
+
# The above copyright notice and this permission notice shall be
|
18
|
+
# included in all copies or substantial portions of the Software.
|
19
|
+
#
|
20
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
21
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
22
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
23
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
24
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
25
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
26
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
27
|
+
|
28
|
+
require 'perobs/PersistentObjectCacheLine'
|
29
|
+
|
30
|
+
module PEROBS
|
31
|
+
|
32
|
+
class PersistentObjectCache
|
33
|
+
|
34
|
+
FLUSH_WATERMARK = 500
|
35
|
+
|
36
|
+
# This cache class manages the presence of objects that primarily live in
|
37
|
+
# a backing store but temporarily exist in memory as well. To work with
|
38
|
+
# these objects, direct references must be only very short lived. Indirect
|
39
|
+
# references can be done via a unique ID that the object must provide. Due
|
40
|
+
# to the indirect references the Ruby garbage collector can collect these
|
41
|
+
# objects and the cache is notified via a finalizer that the objects must
|
42
|
+
# provide. The finalize must call the _collect() method. To reduce the
|
43
|
+
# read and write latencies of the backing store this class keeps a subset
|
44
|
+
# of the object in memory which prevents them from being collected. All
|
45
|
+
# references to the objects must be resolved via the get() method to
|
46
|
+
# prevent duplicate instances in memory of the same object.
|
47
|
+
# @param size [Integer] Maximum number of objects to be cached at a time
|
48
|
+
# @param klass [Class] The class of the objects to be cached. Objects must
|
49
|
+
# provide a uid() method that returns a unique ID for every object.
|
50
|
+
# @param collection [] The object collection the objects belong to. It
|
51
|
+
# must provide a ::load method.
|
52
|
+
def initialize(size, klass, collection)
|
53
|
+
@size = size
|
54
|
+
@klass = klass
|
55
|
+
@collection = collection
|
56
|
+
@flush_counter = FLUSH_WATERMARK
|
57
|
+
clear
|
58
|
+
end
|
59
|
+
|
60
|
+
# Insert an object into the cache.
|
61
|
+
# @param object [Object] Object to cache
|
62
|
+
# @param modified [Boolean] True if the object was modified, false otherwise
|
63
|
+
def insert(object, modified = true)
|
64
|
+
# Store the object via its Ruby object ID instead of a direct reference.
|
65
|
+
# This allows the object to be collected by the garbage collector.
|
66
|
+
@in_memory_objects[object.uid] = object.object_id
|
67
|
+
|
68
|
+
@lines[object.uid % @size].insert(object, modified)
|
69
|
+
end
|
70
|
+
|
71
|
+
# Retrieve a object reference from the cache.
|
72
|
+
# @param uid [Integer] uid of the object to retrieve.
|
73
|
+
def get(uid)
|
74
|
+
if (entry = @lines[uid % @size].get(uid))
|
75
|
+
return entry.obj
|
76
|
+
end
|
77
|
+
|
78
|
+
if (ruby_object_id = @in_memory_objects[uid])
|
79
|
+
# We have the object in memory so we can just return it.
|
80
|
+
begin
|
81
|
+
object = ObjectSpace._id2ref(ruby_object_id)
|
82
|
+
# Let's make sure the object is really the object we are looking
|
83
|
+
# for. The GC might have recycled it already and the Ruby object ID
|
84
|
+
# could now be used for another object.
|
85
|
+
if object.is_a?(@klass) && object.uid == uid
|
86
|
+
# Let's put the object in the cache. We might need it soon again.
|
87
|
+
insert(object, false)
|
88
|
+
return object
|
89
|
+
end
|
90
|
+
rescue RangeError
|
91
|
+
# Due to a race condition the object can still be in the
|
92
|
+
# @in_memory_objects list but has been collected already by the Ruby
|
93
|
+
# GC. In that case we need to load it again. In this case the
|
94
|
+
# _collect() call will happen much later, potentially after we have
|
95
|
+
# registered a new object with the same ID.
|
96
|
+
@in_memory_objects.delete(uid)
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
@klass::load(@collection, uid)
|
101
|
+
end
|
102
|
+
|
103
|
+
# Remove a object from the cache.
|
104
|
+
# @param uid [Integer] unique ID of object to remove.
|
105
|
+
def delete(uid)
|
106
|
+
# The object is likely still in memory, but we really don't want to
|
107
|
+
# access it anymore.
|
108
|
+
@in_memory_objects.delete(uid)
|
109
|
+
|
110
|
+
@lines[uid % @size].delete(uid)
|
111
|
+
end
|
112
|
+
|
113
|
+
# Remove a object from the in-memory list. This is an internal method
|
114
|
+
# and should never be called from user code. It will be called from a
|
115
|
+
# finalizer, so many restrictions apply!
|
116
|
+
# @param uid [Integer] Object address of the object to remove from
|
117
|
+
# the list
|
118
|
+
# @param ruby_object_id [Integer] The Ruby object ID of the collected
|
119
|
+
# object
|
120
|
+
def _collect(address, ruby_object_id)
|
121
|
+
if @in_memory_objects[id] == ruby_object_id
|
122
|
+
@in_memory_objects.delete(address)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
# Write all excess modified objects into the backing store. If now is true
|
127
|
+
# all modified objects will be written.
|
128
|
+
# @param now [Boolean]
|
129
|
+
def flush(now = false)
|
130
|
+
if now || (@flush_counter -= 1) <= 0
|
131
|
+
@lines.each { |line| line.flush(now) }
|
132
|
+
@flush_counter = FLUSH_WATERMARK
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
# Remove all entries from the cache.
|
137
|
+
def clear
|
138
|
+
# A hash that stores all objects by the Ruby object ID that are
|
139
|
+
# currently in memory. Objects are added via insert() and will be
|
140
|
+
# removed via delete() or _collect() called from a Object
|
141
|
+
# finalizer. It only stores the object Ruby object ID hashed by their
|
142
|
+
# address in the file. This enables them from being collected by the
|
143
|
+
# Ruby garbage collector.
|
144
|
+
@in_memory_objects = {}
|
145
|
+
# This is the actual cache. The Array stores objects as Entry objects to
|
146
|
+
# also store the modified/not-modified state.
|
147
|
+
@lines = ::Array.new(@size) { |i| PersistentObjectCacheLine.new }
|
148
|
+
end
|
149
|
+
|
150
|
+
end
|
151
|
+
|
152
|
+
end
|
153
|
+
|
@@ -0,0 +1,87 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# = PersistentObjectCacheLine.rb -- Persistent Ruby Object Store
|
4
|
+
#
|
5
|
+
# Copyright (c) 2016, 2017 by Chris Schlaeger <chris@taskjuggler.org>
|
6
|
+
#
|
7
|
+
# MIT License
|
8
|
+
#
|
9
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
10
|
+
# a copy of this software and associated documentation files (the
|
11
|
+
# "Software"), to deal in the Software without restriction, including
|
12
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
13
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
14
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
15
|
+
# the following conditions:
|
16
|
+
#
|
17
|
+
# The above copyright notice and this permission notice shall be
|
18
|
+
# included in all copies or substantial portions of the Software.
|
19
|
+
#
|
20
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
21
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
22
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
23
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
24
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
25
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
26
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
27
|
+
|
28
|
+
module PEROBS
|
29
|
+
|
30
|
+
class PersistentObjectCacheLine
|
31
|
+
|
32
|
+
# Utility class to store persistent objects and their
|
33
|
+
# modified/not-modified state.
|
34
|
+
class Entry < Struct.new(:obj, :modified)
|
35
|
+
end
|
36
|
+
|
37
|
+
WATERMARK = 4
|
38
|
+
|
39
|
+
def initialize
|
40
|
+
@entries = []
|
41
|
+
end
|
42
|
+
|
43
|
+
def insert(object, modified)
|
44
|
+
@entries.each do |e|
|
45
|
+
if e.obj.uid == object.uid
|
46
|
+
e.modified = true if modified
|
47
|
+
return
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# Insert the new entry at the beginning of the line.
|
52
|
+
@entries.unshift(Entry.new(object, modified))
|
53
|
+
end
|
54
|
+
|
55
|
+
def get(uid)
|
56
|
+
@entries.each do |e|
|
57
|
+
return e if e.obj.uid == uid
|
58
|
+
end
|
59
|
+
|
60
|
+
nil
|
61
|
+
end
|
62
|
+
|
63
|
+
# Delete the entry that matches the given UID
|
64
|
+
# @param uid [Integer]
|
65
|
+
def delete(uid)
|
66
|
+
@entries.delete_if { |e| e.obj.uid == uid }
|
67
|
+
end
|
68
|
+
|
69
|
+
# Save all modified entries and delete all but the most recently added.
|
70
|
+
def flush(now)
|
71
|
+
if now || @entries.length > WATERMARK
|
72
|
+
@entries.each do |e|
|
73
|
+
if e.modified
|
74
|
+
e.obj.save
|
75
|
+
e.modified = false
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
# Delete all but the first WATERMARK entry.
|
80
|
+
@entries = @entries[0..WATERMARK - 1] if @entries.length > WATERMARK
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
end
|
85
|
+
|
86
|
+
end
|
87
|
+
|
data/lib/perobs/SpaceTree.rb
CHANGED
@@ -27,7 +27,7 @@
|
|
27
27
|
|
28
28
|
require 'perobs/Log'
|
29
29
|
require 'perobs/EquiBlobsFile'
|
30
|
-
require 'perobs/
|
30
|
+
require 'perobs/PersistentObjectCache'
|
31
31
|
require 'perobs/SpaceTreeNode'
|
32
32
|
require 'perobs/FlatFile'
|
33
33
|
|
@@ -51,7 +51,9 @@ module PEROBS
|
|
51
51
|
@nodes = EquiBlobsFile.new(@dir, 'database_spaces',
|
52
52
|
SpaceTreeNode::NODE_BYTES, 1)
|
53
53
|
|
54
|
-
|
54
|
+
# Benchmark runs showed a cache size of 128 to be a good compromise
|
55
|
+
# between read and write performance trade-offs and memory consumption.
|
56
|
+
@cache = PersistentObjectCache.new(128, SpaceTreeNode, self)
|
55
57
|
end
|
56
58
|
|
57
59
|
# Open the SpaceTree file.
|
@@ -66,7 +68,7 @@ module PEROBS
|
|
66
68
|
|
67
69
|
# Close the SpaceTree file.
|
68
70
|
def close
|
69
|
-
@cache.flush
|
71
|
+
@cache.flush(true)
|
70
72
|
@nodes.close
|
71
73
|
@root_address = nil
|
72
74
|
@cache.clear
|
data/lib/perobs/SpaceTreeNode.rb
CHANGED
@@ -76,16 +76,16 @@ module PEROBS
|
|
76
76
|
@larger = larger
|
77
77
|
|
78
78
|
ObjectSpace.define_finalizer(
|
79
|
-
self, SpaceTreeNode._finalize(@tree, @node_address))
|
80
|
-
@tree.cache.
|
79
|
+
self, SpaceTreeNode._finalize(@tree, @node_address, object_id))
|
80
|
+
@tree.cache.insert(self, false)
|
81
81
|
end
|
82
82
|
|
83
83
|
# This method generates the destructor for the objects of this class. It
|
84
84
|
# is done this way to prevent the Proc object hanging on to a reference to
|
85
85
|
# self which would prevent the object from being collected. This internal
|
86
86
|
# method is not intended for users to call.
|
87
|
-
def SpaceTreeNode._finalize(tree, node_address)
|
88
|
-
proc { tree.cache._collect(node_address) }
|
87
|
+
def SpaceTreeNode._finalize(tree, node_address, ruby_object_id)
|
88
|
+
proc { tree.cache._collect(node_address, ruby_object_id) }
|
89
89
|
end
|
90
90
|
|
91
91
|
# Create a new SpaceTreeNode. This method should be used for the creation
|
@@ -135,6 +135,7 @@ module PEROBS
|
|
135
135
|
node
|
136
136
|
end
|
137
137
|
|
138
|
+
# Save the node into the blob file.
|
138
139
|
def save
|
139
140
|
bytes = [ @blob_address, @size,
|
140
141
|
@parent ? @parent.node_address : 0,
|
@@ -296,7 +297,13 @@ module PEROBS
|
|
296
297
|
PEROBS.log.fatal "Cannot unlink unknown child node with address " +
|
297
298
|
"#{child_node.node_address} from #{to_s}"
|
298
299
|
end
|
299
|
-
@tree.cache.
|
300
|
+
@tree.cache.insert(self)
|
301
|
+
end
|
302
|
+
|
303
|
+
# @return [Integer] The node address since it uniquely identifies the
|
304
|
+
# Node.
|
305
|
+
def uid
|
306
|
+
@node_address
|
300
307
|
end
|
301
308
|
|
302
309
|
# Depth-first iterator for all nodes. The iterator yields the given block
|
@@ -436,7 +443,7 @@ module PEROBS
|
|
436
443
|
def set_size_and_address(size, address)
|
437
444
|
@size = size
|
438
445
|
@blob_address = address
|
439
|
-
@tree.cache.
|
446
|
+
@tree.cache.insert(self)
|
440
447
|
end
|
441
448
|
|
442
449
|
def set_link(name, node_or_address)
|
@@ -452,12 +459,12 @@ module PEROBS
|
|
452
459
|
# Clear the node link.
|
453
460
|
instance_variable_set(name, nil)
|
454
461
|
end
|
455
|
-
@tree.cache.
|
462
|
+
@tree.cache.insert(self)
|
456
463
|
end
|
457
464
|
|
458
465
|
def parent=(p)
|
459
466
|
@parent = p ? SpaceTreeNodeLink.new(@tree, p) : nil
|
460
|
-
@tree.cache.
|
467
|
+
@tree.cache.insert(self)
|
461
468
|
end
|
462
469
|
# Compare this node to another node.
|
463
470
|
# @return [Boolean] true if node address is identical, false otherwise
|
data/lib/perobs/Store.rb
CHANGED
@@ -67,12 +67,20 @@ module PEROBS
|
|
67
67
|
#
|
68
68
|
# class Person < PEROBS::Object
|
69
69
|
#
|
70
|
-
#
|
70
|
+
# attr_persist :name, :mother, :father, :kids
|
71
71
|
#
|
72
|
+
# # The contructor is only called for the creation of a new object. It is
|
73
|
+
# # not called when the object is restored from the database. In that case
|
74
|
+
# # only restore() is called.
|
72
75
|
# def initialize(cf, name)
|
73
76
|
# super(cf)
|
74
|
-
#
|
75
|
-
#
|
77
|
+
# self.name = name
|
78
|
+
# self.kids = @store.new(PEROBS::Array)
|
79
|
+
# end
|
80
|
+
#
|
81
|
+
# def restore
|
82
|
+
# # In case you need to do any checks or massaging (e. g. for additional
|
83
|
+
# # attributes) you can provide this method.
|
76
84
|
# end
|
77
85
|
#
|
78
86
|
# def to_s
|
@@ -90,7 +98,7 @@ module PEROBS
|
|
90
98
|
# joe.kids << jim
|
91
99
|
# jim.mother = jane
|
92
100
|
# jane.kids << jim
|
93
|
-
# store.
|
101
|
+
# store.exit
|
94
102
|
#
|
95
103
|
class Store
|
96
104
|
|
@@ -126,7 +134,7 @@ module PEROBS
|
|
126
134
|
# Unfortunately, it is 10x slower than marshal.
|
127
135
|
def initialize(data_base, options = {})
|
128
136
|
# Create a backing store handler
|
129
|
-
@db = (options[:engine] ||
|
137
|
+
@db = (options[:engine] || FlatFileDB).new(data_base, options)
|
130
138
|
@db.open
|
131
139
|
# Create a map that can translate classes to numerical IDs and vice
|
132
140
|
# versa.
|
@@ -228,10 +236,12 @@ module PEROBS
|
|
228
236
|
end
|
229
237
|
|
230
238
|
# Delete the entire store. The store is no longer usable after this
|
231
|
-
# method was called.
|
239
|
+
# method was called. This is an alternative to exit() that additionaly
|
240
|
+
# deletes the entire database.
|
232
241
|
def delete_store
|
233
242
|
@db.delete_database
|
234
|
-
@db = @class_map = @cache = @root_objects =
|
243
|
+
@db = @class_map = @in_memory_objects = @stats = @cache = @root_objects =
|
244
|
+
nil
|
235
245
|
end
|
236
246
|
|
237
247
|
# Store the provided object under the given name. Use this to make the
|
@@ -310,18 +320,30 @@ module PEROBS
|
|
310
320
|
# public API and should never be called by outside users. It's purely
|
311
321
|
# intended for internal use.
|
312
322
|
def object_by_id(id)
|
313
|
-
if (
|
323
|
+
if (ruby_object_id = @in_memory_objects[id])
|
314
324
|
# We have the object in memory so we can just return it.
|
315
325
|
begin
|
316
|
-
|
317
|
-
|
326
|
+
object = ObjectSpace._id2ref(ruby_object_id)
|
327
|
+
# Let's make sure the object is really the object we are looking
|
328
|
+
# for. The GC might have recycled it already and the Ruby object ID
|
329
|
+
# could now be used for another object.
|
330
|
+
if object.is_a?(ObjectBase) && object._id == id
|
331
|
+
return object
|
332
|
+
end
|
333
|
+
rescue RangeError => e
|
318
334
|
# Due to a race condition the object can still be in the
|
319
335
|
# @in_memory_objects list but has been collected already by the Ruby
|
320
|
-
# GC. In that case we need to load it again.
|
336
|
+
# GC. In that case we need to load it again. In this case the
|
337
|
+
# _collect() call will happen much later, potentially after we have
|
338
|
+
# registered a new object with the same ID.
|
321
339
|
@in_memory_objects.delete(id)
|
322
340
|
end
|
323
341
|
end
|
324
342
|
|
343
|
+
if (obj = @cache.object_by_id(id))
|
344
|
+
PEROBS.log.fatal "Object #{id} with Ruby #{obj.object_id} is in cache but not in_memory"
|
345
|
+
end
|
346
|
+
|
325
347
|
# We don't have the object in memory. Let's find it in the storage.
|
326
348
|
if @db.include?(id)
|
327
349
|
# Great, object found. Read it into memory and return it.
|
@@ -332,6 +354,10 @@ module PEROBS
|
|
332
354
|
return obj
|
333
355
|
end
|
334
356
|
|
357
|
+
#if (obj = @db.search_object(id))
|
358
|
+
# PEROBS.log.fatal "Object was not in index but in DB"
|
359
|
+
#end
|
360
|
+
|
335
361
|
# The requested object does not exist. Return nil.
|
336
362
|
nil
|
337
363
|
end
|
@@ -463,8 +489,10 @@ module PEROBS
|
|
463
489
|
# and should never be called from user code. It will be called from a
|
464
490
|
# finalizer, so many restrictions apply!
|
465
491
|
# @param id [Integer] Object ID of object to remove from the list
|
466
|
-
def _collect(id,
|
467
|
-
@in_memory_objects
|
492
|
+
def _collect(id, ruby_object_id)
|
493
|
+
if @in_memory_objects[id] == ruby_object_id
|
494
|
+
@in_memory_objects.delete(id)
|
495
|
+
end
|
468
496
|
end
|
469
497
|
|
470
498
|
# This method returns a Hash with some statistics about this store.
|