perobs 3.0.2 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,153 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # = PersistentObjectCache.rb -- Persistent Ruby Object Store
4
+ #
5
+ # Copyright (c) 2016, 2017 by Chris Schlaeger <chris@taskjuggler.org>
6
+ #
7
+ # MIT License
8
+ #
9
+ # Permission is hereby granted, free of charge, to any person obtaining
10
+ # a copy of this software and associated documentation files (the
11
+ # "Software"), to deal in the Software without restriction, including
12
+ # without limitation the rights to use, copy, modify, merge, publish,
13
+ # distribute, sublicense, and/or sell copies of the Software, and to
14
+ # permit persons to whom the Software is furnished to do so, subject to
15
+ # the following conditions:
16
+ #
17
+ # The above copyright notice and this permission notice shall be
18
+ # included in all copies or substantial portions of the Software.
19
+ #
20
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
+
28
+ require 'perobs/PersistentObjectCacheLine'
29
+
30
+ module PEROBS
31
+
32
+ class PersistentObjectCache
33
+
34
+ FLUSH_WATERMARK = 500
35
+
36
+ # This cache class manages the presence of objects that primarily live in
37
+ # a backing store but temporarily exist in memory as well. To work with
38
+ # these objects, direct references must be only very short lived. Indirect
39
+ # references can be done via a unique ID that the object must provide. Due
40
+ # to the indirect references the Ruby garbage collector can collect these
41
+ # objects and the cache is notified via a finalizer that the objects must
42
+ # provide. The finalize must call the _collect() method. To reduce the
43
+ # read and write latencies of the backing store this class keeps a subset
44
+ # of the object in memory which prevents them from being collected. All
45
+ # references to the objects must be resolved via the get() method to
46
+ # prevent duplicate instances in memory of the same object.
47
+ # @param size [Integer] Maximum number of objects to be cached at a time
48
+ # @param klass [Class] The class of the objects to be cached. Objects must
49
+ # provide a uid() method that returns a unique ID for every object.
50
+ # @param collection [] The object collection the objects belong to. It
51
+ # must provide a ::load method.
52
+ def initialize(size, klass, collection)
53
+ @size = size
54
+ @klass = klass
55
+ @collection = collection
56
+ @flush_counter = FLUSH_WATERMARK
57
+ clear
58
+ end
59
+
60
+ # Insert an object into the cache.
61
+ # @param object [Object] Object to cache
62
+ # @param modified [Boolean] True if the object was modified, false otherwise
63
+ def insert(object, modified = true)
64
+ # Store the object via its Ruby object ID instead of a direct reference.
65
+ # This allows the object to be collected by the garbage collector.
66
+ @in_memory_objects[object.uid] = object.object_id
67
+
68
+ @lines[object.uid % @size].insert(object, modified)
69
+ end
70
+
71
+ # Retrieve a object reference from the cache.
72
+ # @param uid [Integer] uid of the object to retrieve.
73
+ def get(uid)
74
+ if (entry = @lines[uid % @size].get(uid))
75
+ return entry.obj
76
+ end
77
+
78
+ if (ruby_object_id = @in_memory_objects[uid])
79
+ # We have the object in memory so we can just return it.
80
+ begin
81
+ object = ObjectSpace._id2ref(ruby_object_id)
82
+ # Let's make sure the object is really the object we are looking
83
+ # for. The GC might have recycled it already and the Ruby object ID
84
+ # could now be used for another object.
85
+ if object.is_a?(@klass) && object.uid == uid
86
+ # Let's put the object in the cache. We might need it soon again.
87
+ insert(object, false)
88
+ return object
89
+ end
90
+ rescue RangeError
91
+ # Due to a race condition the object can still be in the
92
+ # @in_memory_objects list but has been collected already by the Ruby
93
+ # GC. In that case we need to load it again. In this case the
94
+ # _collect() call will happen much later, potentially after we have
95
+ # registered a new object with the same ID.
96
+ @in_memory_objects.delete(uid)
97
+ end
98
+ end
99
+
100
+ @klass::load(@collection, uid)
101
+ end
102
+
103
+ # Remove a object from the cache.
104
+ # @param uid [Integer] unique ID of object to remove.
105
+ def delete(uid)
106
+ # The object is likely still in memory, but we really don't want to
107
+ # access it anymore.
108
+ @in_memory_objects.delete(uid)
109
+
110
+ @lines[uid % @size].delete(uid)
111
+ end
112
+
113
+ # Remove a object from the in-memory list. This is an internal method
114
+ # and should never be called from user code. It will be called from a
115
+ # finalizer, so many restrictions apply!
116
+ # @param uid [Integer] Object address of the object to remove from
117
+ # the list
118
+ # @param ruby_object_id [Integer] The Ruby object ID of the collected
119
+ # object
120
+ def _collect(address, ruby_object_id)
121
+ if @in_memory_objects[id] == ruby_object_id
122
+ @in_memory_objects.delete(address)
123
+ end
124
+ end
125
+
126
+ # Write all excess modified objects into the backing store. If now is true
127
+ # all modified objects will be written.
128
+ # @param now [Boolean]
129
+ def flush(now = false)
130
+ if now || (@flush_counter -= 1) <= 0
131
+ @lines.each { |line| line.flush(now) }
132
+ @flush_counter = FLUSH_WATERMARK
133
+ end
134
+ end
135
+
136
+ # Remove all entries from the cache.
137
+ def clear
138
+ # A hash that stores all objects by the Ruby object ID that are
139
+ # currently in memory. Objects are added via insert() and will be
140
+ # removed via delete() or _collect() called from a Object
141
+ # finalizer. It only stores the object Ruby object ID hashed by their
142
+ # address in the file. This enables them from being collected by the
143
+ # Ruby garbage collector.
144
+ @in_memory_objects = {}
145
+ # This is the actual cache. The Array stores objects as Entry objects to
146
+ # also store the modified/not-modified state.
147
+ @lines = ::Array.new(@size) { |i| PersistentObjectCacheLine.new }
148
+ end
149
+
150
+ end
151
+
152
+ end
153
+
@@ -0,0 +1,87 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # = PersistentObjectCacheLine.rb -- Persistent Ruby Object Store
4
+ #
5
+ # Copyright (c) 2016, 2017 by Chris Schlaeger <chris@taskjuggler.org>
6
+ #
7
+ # MIT License
8
+ #
9
+ # Permission is hereby granted, free of charge, to any person obtaining
10
+ # a copy of this software and associated documentation files (the
11
+ # "Software"), to deal in the Software without restriction, including
12
+ # without limitation the rights to use, copy, modify, merge, publish,
13
+ # distribute, sublicense, and/or sell copies of the Software, and to
14
+ # permit persons to whom the Software is furnished to do so, subject to
15
+ # the following conditions:
16
+ #
17
+ # The above copyright notice and this permission notice shall be
18
+ # included in all copies or substantial portions of the Software.
19
+ #
20
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
+
28
+ module PEROBS
29
+
30
+ class PersistentObjectCacheLine
31
+
32
+ # Utility class to store persistent objects and their
33
+ # modified/not-modified state.
34
+ class Entry < Struct.new(:obj, :modified)
35
+ end
36
+
37
+ WATERMARK = 4
38
+
39
+ def initialize
40
+ @entries = []
41
+ end
42
+
43
+ def insert(object, modified)
44
+ @entries.each do |e|
45
+ if e.obj.uid == object.uid
46
+ e.modified = true if modified
47
+ return
48
+ end
49
+ end
50
+
51
+ # Insert the new entry at the beginning of the line.
52
+ @entries.unshift(Entry.new(object, modified))
53
+ end
54
+
55
+ def get(uid)
56
+ @entries.each do |e|
57
+ return e if e.obj.uid == uid
58
+ end
59
+
60
+ nil
61
+ end
62
+
63
+ # Delete the entry that matches the given UID
64
+ # @param uid [Integer]
65
+ def delete(uid)
66
+ @entries.delete_if { |e| e.obj.uid == uid }
67
+ end
68
+
69
+ # Save all modified entries and delete all but the most recently added.
70
+ def flush(now)
71
+ if now || @entries.length > WATERMARK
72
+ @entries.each do |e|
73
+ if e.modified
74
+ e.obj.save
75
+ e.modified = false
76
+ end
77
+ end
78
+
79
+ # Delete all but the first WATERMARK entry.
80
+ @entries = @entries[0..WATERMARK - 1] if @entries.length > WATERMARK
81
+ end
82
+ end
83
+
84
+ end
85
+
86
+ end
87
+
@@ -27,7 +27,7 @@
27
27
 
28
28
  require 'perobs/Log'
29
29
  require 'perobs/EquiBlobsFile'
30
- require 'perobs/SpaceTreeNodeCache'
30
+ require 'perobs/PersistentObjectCache'
31
31
  require 'perobs/SpaceTreeNode'
32
32
  require 'perobs/FlatFile'
33
33
 
@@ -51,7 +51,9 @@ module PEROBS
51
51
  @nodes = EquiBlobsFile.new(@dir, 'database_spaces',
52
52
  SpaceTreeNode::NODE_BYTES, 1)
53
53
 
54
- @cache = SpaceTreeNodeCache.new(self, 128)
54
+ # Benchmark runs showed a cache size of 128 to be a good compromise
55
+ # between read and write performance trade-offs and memory consumption.
56
+ @cache = PersistentObjectCache.new(128, SpaceTreeNode, self)
55
57
  end
56
58
 
57
59
  # Open the SpaceTree file.
@@ -66,7 +68,7 @@ module PEROBS
66
68
 
67
69
  # Close the SpaceTree file.
68
70
  def close
69
- @cache.flush
71
+ @cache.flush(true)
70
72
  @nodes.close
71
73
  @root_address = nil
72
74
  @cache.clear
@@ -76,16 +76,16 @@ module PEROBS
76
76
  @larger = larger
77
77
 
78
78
  ObjectSpace.define_finalizer(
79
- self, SpaceTreeNode._finalize(@tree, @node_address))
80
- @tree.cache.insert_unmodified(self)
79
+ self, SpaceTreeNode._finalize(@tree, @node_address, object_id))
80
+ @tree.cache.insert(self, false)
81
81
  end
82
82
 
83
83
  # This method generates the destructor for the objects of this class. It
84
84
  # is done this way to prevent the Proc object hanging on to a reference to
85
85
  # self which would prevent the object from being collected. This internal
86
86
  # method is not intended for users to call.
87
- def SpaceTreeNode._finalize(tree, node_address)
88
- proc { tree.cache._collect(node_address) }
87
+ def SpaceTreeNode._finalize(tree, node_address, ruby_object_id)
88
+ proc { tree.cache._collect(node_address, ruby_object_id) }
89
89
  end
90
90
 
91
91
  # Create a new SpaceTreeNode. This method should be used for the creation
@@ -135,6 +135,7 @@ module PEROBS
135
135
  node
136
136
  end
137
137
 
138
+ # Save the node into the blob file.
138
139
  def save
139
140
  bytes = [ @blob_address, @size,
140
141
  @parent ? @parent.node_address : 0,
@@ -296,7 +297,13 @@ module PEROBS
296
297
  PEROBS.log.fatal "Cannot unlink unknown child node with address " +
297
298
  "#{child_node.node_address} from #{to_s}"
298
299
  end
299
- @tree.cache.insert_modified(self)
300
+ @tree.cache.insert(self)
301
+ end
302
+
303
+ # @return [Integer] The node address since it uniquely identifies the
304
+ # Node.
305
+ def uid
306
+ @node_address
300
307
  end
301
308
 
302
309
  # Depth-first iterator for all nodes. The iterator yields the given block
@@ -436,7 +443,7 @@ module PEROBS
436
443
  def set_size_and_address(size, address)
437
444
  @size = size
438
445
  @blob_address = address
439
- @tree.cache.insert_modified(self)
446
+ @tree.cache.insert(self)
440
447
  end
441
448
 
442
449
  def set_link(name, node_or_address)
@@ -452,12 +459,12 @@ module PEROBS
452
459
  # Clear the node link.
453
460
  instance_variable_set(name, nil)
454
461
  end
455
- @tree.cache.insert_modified(self)
462
+ @tree.cache.insert(self)
456
463
  end
457
464
 
458
465
  def parent=(p)
459
466
  @parent = p ? SpaceTreeNodeLink.new(@tree, p) : nil
460
- @tree.cache.insert_modified(self)
467
+ @tree.cache.insert(self)
461
468
  end
462
469
  # Compare this node to another node.
463
470
  # @return [Boolean] true if node address is identical, false otherwise
data/lib/perobs/Store.rb CHANGED
@@ -67,12 +67,20 @@ module PEROBS
67
67
  #
68
68
  # class Person < PEROBS::Object
69
69
  #
70
- # po_attr :name, :mother, :father, :kids
70
+ # attr_persist :name, :mother, :father, :kids
71
71
  #
72
+ # # The contructor is only called for the creation of a new object. It is
73
+ # # not called when the object is restored from the database. In that case
74
+ # # only restore() is called.
72
75
  # def initialize(cf, name)
73
76
  # super(cf)
74
- # attr_init(:name, name)
75
- # attr_init(:kids, @store.new(PEROBS::Array))
77
+ # self.name = name
78
+ # self.kids = @store.new(PEROBS::Array)
79
+ # end
80
+ #
81
+ # def restore
82
+ # # In case you need to do any checks or massaging (e. g. for additional
83
+ # # attributes) you can provide this method.
76
84
  # end
77
85
  #
78
86
  # def to_s
@@ -90,7 +98,7 @@ module PEROBS
90
98
  # joe.kids << jim
91
99
  # jim.mother = jane
92
100
  # jane.kids << jim
93
- # store.sync
101
+ # store.exit
94
102
  #
95
103
  class Store
96
104
 
@@ -126,7 +134,7 @@ module PEROBS
126
134
  # Unfortunately, it is 10x slower than marshal.
127
135
  def initialize(data_base, options = {})
128
136
  # Create a backing store handler
129
- @db = (options[:engine] || BTreeDB).new(data_base, options)
137
+ @db = (options[:engine] || FlatFileDB).new(data_base, options)
130
138
  @db.open
131
139
  # Create a map that can translate classes to numerical IDs and vice
132
140
  # versa.
@@ -228,10 +236,12 @@ module PEROBS
228
236
  end
229
237
 
230
238
  # Delete the entire store. The store is no longer usable after this
231
- # method was called.
239
+ # method was called. This is an alternative to exit() that additionaly
240
+ # deletes the entire database.
232
241
  def delete_store
233
242
  @db.delete_database
234
- @db = @class_map = @cache = @root_objects = nil
243
+ @db = @class_map = @in_memory_objects = @stats = @cache = @root_objects =
244
+ nil
235
245
  end
236
246
 
237
247
  # Store the provided object under the given name. Use this to make the
@@ -310,18 +320,30 @@ module PEROBS
310
320
  # public API and should never be called by outside users. It's purely
311
321
  # intended for internal use.
312
322
  def object_by_id(id)
313
- if (obj = @in_memory_objects[id])
323
+ if (ruby_object_id = @in_memory_objects[id])
314
324
  # We have the object in memory so we can just return it.
315
325
  begin
316
- return ObjectSpace._id2ref(obj)
317
- rescue RangeError
326
+ object = ObjectSpace._id2ref(ruby_object_id)
327
+ # Let's make sure the object is really the object we are looking
328
+ # for. The GC might have recycled it already and the Ruby object ID
329
+ # could now be used for another object.
330
+ if object.is_a?(ObjectBase) && object._id == id
331
+ return object
332
+ end
333
+ rescue RangeError => e
318
334
  # Due to a race condition the object can still be in the
319
335
  # @in_memory_objects list but has been collected already by the Ruby
320
- # GC. In that case we need to load it again.
336
+ # GC. In that case we need to load it again. In this case the
337
+ # _collect() call will happen much later, potentially after we have
338
+ # registered a new object with the same ID.
321
339
  @in_memory_objects.delete(id)
322
340
  end
323
341
  end
324
342
 
343
+ if (obj = @cache.object_by_id(id))
344
+ PEROBS.log.fatal "Object #{id} with Ruby #{obj.object_id} is in cache but not in_memory"
345
+ end
346
+
325
347
  # We don't have the object in memory. Let's find it in the storage.
326
348
  if @db.include?(id)
327
349
  # Great, object found. Read it into memory and return it.
@@ -332,6 +354,10 @@ module PEROBS
332
354
  return obj
333
355
  end
334
356
 
357
+ #if (obj = @db.search_object(id))
358
+ # PEROBS.log.fatal "Object was not in index but in DB"
359
+ #end
360
+
335
361
  # The requested object does not exist. Return nil.
336
362
  nil
337
363
  end
@@ -463,8 +489,10 @@ module PEROBS
463
489
  # and should never be called from user code. It will be called from a
464
490
  # finalizer, so many restrictions apply!
465
491
  # @param id [Integer] Object ID of object to remove from the list
466
- def _collect(id, ignore_errors = false)
467
- @in_memory_objects.delete(id)
492
+ def _collect(id, ruby_object_id)
493
+ if @in_memory_objects[id] == ruby_object_id
494
+ @in_memory_objects.delete(id)
495
+ end
468
496
  end
469
497
 
470
498
  # This method returns a Hash with some statistics about this store.