perobs 4.0.0 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. data/lib/perobs.rb +1 -0
  3. data/lib/perobs/Array.rb +66 -19
  4. data/lib/perobs/BTree.rb +83 -12
  5. data/lib/perobs/BTreeBlob.rb +1 -1
  6. data/lib/perobs/BTreeDB.rb +2 -2
  7. data/lib/perobs/BTreeNode.rb +365 -85
  8. data/lib/perobs/BigArray.rb +267 -0
  9. data/lib/perobs/BigArrayNode.rb +998 -0
  10. data/lib/perobs/BigHash.rb +262 -0
  11. data/lib/perobs/BigTree.rb +184 -0
  12. data/lib/perobs/BigTreeNode.rb +873 -0
  13. data/lib/perobs/ConsoleProgressMeter.rb +61 -0
  14. data/lib/perobs/DataBase.rb +4 -3
  15. data/lib/perobs/DynamoDB.rb +57 -15
  16. data/lib/perobs/EquiBlobsFile.rb +143 -51
  17. data/lib/perobs/FNV_Hash_1a_64.rb +54 -0
  18. data/lib/perobs/FlatFile.rb +363 -203
  19. data/lib/perobs/FlatFileBlobHeader.rb +98 -54
  20. data/lib/perobs/FlatFileDB.rb +42 -20
  21. data/lib/perobs/Hash.rb +58 -13
  22. data/lib/perobs/IDList.rb +144 -0
  23. data/lib/perobs/IDListPage.rb +107 -0
  24. data/lib/perobs/IDListPageFile.rb +180 -0
  25. data/lib/perobs/IDListPageRecord.rb +142 -0
  26. data/lib/perobs/Object.rb +18 -15
  27. data/lib/perobs/ObjectBase.rb +38 -4
  28. data/lib/perobs/PersistentObjectCache.rb +53 -67
  29. data/lib/perobs/PersistentObjectCacheLine.rb +24 -12
  30. data/lib/perobs/ProgressMeter.rb +97 -0
  31. data/lib/perobs/SpaceTree.rb +21 -12
  32. data/lib/perobs/SpaceTreeNode.rb +53 -61
  33. data/lib/perobs/Store.rb +71 -32
  34. data/lib/perobs/version.rb +1 -1
  35. data/perobs.gemspec +4 -4
  36. data/test/Array_spec.rb +15 -6
  37. data/test/BTree_spec.rb +5 -2
  38. data/test/BigArray_spec.rb +214 -0
  39. data/test/BigHash_spec.rb +144 -0
  40. data/test/BigTreeNode_spec.rb +153 -0
  41. data/test/BigTree_spec.rb +259 -0
  42. data/test/EquiBlobsFile_spec.rb +105 -1
  43. data/test/FNV_Hash_1a_64_spec.rb +59 -0
  44. data/test/FlatFileDB_spec.rb +63 -14
  45. data/test/Hash_spec.rb +1 -2
  46. data/test/IDList_spec.rb +77 -0
  47. data/test/LegacyDBs/LegacyDB.rb +151 -0
  48. data/test/LegacyDBs/version_3/class_map.json +1 -0
  49. data/test/LegacyDBs/version_3/config.json +1 -0
  50. data/test/LegacyDBs/version_3/database.blobs +0 -0
  51. data/test/LegacyDBs/version_3/database_spaces.blobs +0 -0
  52. data/test/LegacyDBs/version_3/index.blobs +0 -0
  53. data/test/LegacyDBs/version_3/version +1 -0
  54. data/test/LockFile_spec.rb +9 -6
  55. data/test/SpaceTree_spec.rb +4 -1
  56. data/test/Store_spec.rb +290 -199
  57. data/test/spec_helper.rb +9 -4
  58. metadata +47 -10
  59. data/lib/perobs/TreeDB.rb +0 -277
@@ -2,7 +2,7 @@
2
2
  #
3
3
  # = Object.rb -- Persistent Ruby Object Store
4
4
  #
5
- # Copyright (c) 2015, 2016 by Chris Schlaeger <chris@taskjuggler.org>
5
+ # Copyright (c) 2015, 2016, 2017 by Chris Schlaeger <chris@taskjuggler.org>
6
6
  #
7
7
  # MIT License
8
8
  #
@@ -212,17 +212,7 @@ module PEROBS
212
212
  end
213
213
 
214
214
  def _set(attr, val)
215
- if val.respond_to?(:is_poxreference?)
216
- # References to other PEROBS::Objects must be handled somewhat
217
- # special.
218
- if @store != val.store
219
- PEROBS.log.fatal 'The referenced object is not part of this store'
220
- end
221
- elsif val.is_a?(ObjectBase)
222
- PEROBS.log.fatal 'A PEROBS::ObjectBase object escaped! ' +
223
- 'Have you used self() instead of myself() to get the reference ' +
224
- 'of the PEROBS object that you are trying to assign here?'
225
- end
215
+ _check_assignment_value(val)
226
216
  instance_variable_set(('@' + attr.to_s).to_sym, val)
227
217
  # Let the store know that we have a modified object. If we restored the
228
218
  # object from the DB, we don't mark it as modified.
@@ -236,13 +226,26 @@ module PEROBS
236
226
  end
237
227
 
238
228
  def _all_attributes
229
+ # Collect all persistent attributes from this class and all
230
+ # super classes into a single Array.
231
+ attributes = []
232
+ klass = self.class
233
+ while klass && klass.respond_to?(:attributes)
234
+ if (attrs = klass.attributes)
235
+ attributes += attrs
236
+ end
237
+ klass = klass.superclass
238
+ end
239
+
239
240
  # PEROBS objects that don't have persistent attributes declared don't
240
241
  # really make sense.
241
- unless self.class.attributes
242
+ if attributes.empty?
242
243
  PEROBS.log.fatal "No persistent attributes have been declared for " +
243
- "class #{self.class}. Use 'po_attr' to declare them."
244
+ "class #{self.class} or any parent class. Use 'attr_persist' " +
245
+ "to declare them."
244
246
  end
245
- self.class.attributes
247
+
248
+ attributes
246
249
  end
247
250
 
248
251
  end
@@ -86,6 +86,10 @@ module PEROBS
86
86
  _referenced_object == obj
87
87
  end
88
88
 
89
+ def eql?(obj)
90
+ _referenced_object._id == obj._id
91
+ end
92
+
89
93
  # BasicObject provides a equal?() method that prevents method_missing from
90
94
  # being called. So we have to pass the call manually to the referenced
91
95
  # object.
@@ -114,6 +118,20 @@ module PEROBS
114
118
  # common to all classes of persistent objects.
115
119
  class ObjectBase
116
120
 
121
+ # This is a list of the native Ruby classes that are supported for
122
+ # instance variable assignements in addition to other PEROBS objects.
123
+ if RUBY_VERSION < '2.2'
124
+ NATIVE_CLASSES = [
125
+ NilClass, Integer, Bignum, Fixnum, Float, String, Time,
126
+ TrueClass, FalseClass
127
+ ]
128
+ else
129
+ NATIVE_CLASSES = [
130
+ NilClass, Integer, Float, String, Time,
131
+ TrueClass, FalseClass
132
+ ]
133
+ end
134
+
117
135
  attr_reader :_id, :store, :myself
118
136
 
119
137
  # New PEROBS objects must always be created by calling # Store.new().
@@ -192,6 +210,25 @@ module PEROBS
192
210
  @store.db.put_object(db_obj, @_id)
193
211
  end
194
212
 
213
+ #
214
+ def _check_assignment_value(val)
215
+ if val.respond_to?(:is_poxreference?)
216
+ # References to other PEROBS::Objects must be handled somewhat
217
+ # special.
218
+ if @store != val.store
219
+ PEROBS.log.fatal 'The referenced object is not part of this store'
220
+ end
221
+ elsif val.is_a?(ObjectBase)
222
+ PEROBS.log.fatal 'A PEROBS::ObjectBase object escaped! ' +
223
+ 'Have you used self() instead of myself() to get the reference ' +
224
+ 'of the PEROBS object that you are trying to assign here?'
225
+ elsif !NATIVE_CLASSES.include?(val.class)
226
+ PEROBS.log.fatal "Assigning objects of class #{val.class} is not " +
227
+ "supported. Only PEROBS objects or one of the following classes " +
228
+ "are supported: #{NATIVE_CLASSES.join(', ')}"
229
+ end
230
+ end
231
+
195
232
  # Read an raw object with the specified ID from the backing store and
196
233
  # instantiate a new object of the specific type.
197
234
  def ObjectBase.read(store, id)
@@ -218,10 +255,7 @@ module PEROBS
218
255
  data = nil
219
256
  if @_stash_map
220
257
  (level - 1).downto(0) do |lvl|
221
- if @_stash_map[lvl]
222
- data = @_stash_map[lvl]
223
- break
224
- end
258
+ break if (data = @_stash_map[lvl])
225
259
  end
226
260
  end
227
261
  if data
@@ -31,29 +31,31 @@ module PEROBS
31
31
 
32
32
  class PersistentObjectCache
33
33
 
34
- FLUSH_WATERMARK = 500
35
-
36
34
  # This cache class manages the presence of objects that primarily live in
37
35
  # a backing store but temporarily exist in memory as well. To work with
38
36
  # these objects, direct references must be only very short lived. Indirect
39
37
  # references can be done via a unique ID that the object must provide. Due
40
38
  # to the indirect references the Ruby garbage collector can collect these
41
- # objects and the cache is notified via a finalizer that the objects must
42
- # provide. The finalize must call the _collect() method. To reduce the
43
- # read and write latencies of the backing store this class keeps a subset
44
- # of the object in memory which prevents them from being collected. All
45
- # references to the objects must be resolved via the get() method to
46
- # prevent duplicate instances in memory of the same object.
47
- # @param size [Integer] Maximum number of objects to be cached at a time
39
+ # objects. To reduce the read and write latencies of the backing store
40
+ # this class keeps a subset of the objects in memory which prevents them
41
+ # from being collected. All references to the objects must be resolved via
42
+ # the get() method to prevent duplicate instances in memory of the same
43
+ # in-store object. The cache uses a least-recently-used (LRU) scheme to
44
+ # cache objects.
45
+ # @param size [Integer] Minimum number of objects to be cached at a time
46
+ # @param flush_delay [Integer] Determines how often non-forced flushes are
47
+ # ignored in a row before the flush is really done.
48
48
  # @param klass [Class] The class of the objects to be cached. Objects must
49
49
  # provide a uid() method that returns a unique ID for every object.
50
50
  # @param collection [] The object collection the objects belong to. It
51
51
  # must provide a ::load method.
52
- def initialize(size, klass, collection)
52
+ def initialize(size, flush_delay, klass, collection)
53
53
  @size = size
54
54
  @klass = klass
55
55
  @collection = collection
56
- @flush_counter = FLUSH_WATERMARK
56
+ @flush_delay = @flush_counter = flush_delay
57
+ @flush_times = 0
58
+
57
59
  clear
58
60
  end
59
61
 
@@ -61,65 +63,47 @@ module PEROBS
61
63
  # @param object [Object] Object to cache
62
64
  # @param modified [Boolean] True if the object was modified, false otherwise
63
65
  def insert(object, modified = true)
64
- # Store the object via its Ruby object ID instead of a direct reference.
65
- # This allows the object to be collected by the garbage collector.
66
- @in_memory_objects[object.uid] = object.object_id
66
+ unless object.is_a?(@klass)
67
+ raise ArgumentError, "You can insert only #{@klass} objects in this " +
68
+ "cache. You have tried to insert a #{object.class} instead."
69
+ end
70
+
71
+ if modified
72
+ @modified_entries[object.uid] = object
73
+ else
74
+ index = object.uid % @size
75
+ @unmodified_entries[index] = object
76
+ end
67
77
 
68
- @lines[object.uid % @size].insert(object, modified)
78
+ nil
69
79
  end
70
80
 
71
81
  # Retrieve a object reference from the cache.
72
82
  # @param uid [Integer] uid of the object to retrieve.
73
- def get(uid)
74
- if (entry = @lines[uid % @size].get(uid))
75
- return entry.obj
83
+ # @param ref [Object] optional reference to be used by the load method
84
+ def get(uid, ref = nil)
85
+ # First check if it's a modified object.
86
+ if (object = @modified_entries[uid])
87
+ return object
76
88
  end
77
89
 
78
- if (ruby_object_id = @in_memory_objects[uid])
79
- # We have the object in memory so we can just return it.
80
- begin
81
- object = ObjectSpace._id2ref(ruby_object_id)
82
- # Let's make sure the object is really the object we are looking
83
- # for. The GC might have recycled it already and the Ruby object ID
84
- # could now be used for another object.
85
- if object.is_a?(@klass) && object.uid == uid
86
- # Let's put the object in the cache. We might need it soon again.
87
- insert(object, false)
88
- return object
89
- end
90
- rescue RangeError
91
- # Due to a race condition the object can still be in the
92
- # @in_memory_objects list but has been collected already by the Ruby
93
- # GC. In that case we need to load it again. In this case the
94
- # _collect() call will happen much later, potentially after we have
95
- # registered a new object with the same ID.
96
- @in_memory_objects.delete(uid)
97
- end
90
+ # Then check the unmodified object list.
91
+ if (object = @unmodified_entries[uid % @size]) && object.uid == uid
92
+ return object
98
93
  end
99
94
 
100
- @klass::load(@collection, uid)
95
+ # If we don't have it in memory we need to load it.
96
+ @klass::load(@collection, uid, ref)
101
97
  end
102
98
 
103
99
  # Remove a object from the cache.
104
100
  # @param uid [Integer] unique ID of object to remove.
105
101
  def delete(uid)
106
- # The object is likely still in memory, but we really don't want to
107
- # access it anymore.
108
- @in_memory_objects.delete(uid)
109
-
110
- @lines[uid % @size].delete(uid)
111
- end
102
+ @modified_entries.delete(uid)
112
103
 
113
- # Remove a object from the in-memory list. This is an internal method
114
- # and should never be called from user code. It will be called from a
115
- # finalizer, so many restrictions apply!
116
- # @param uid [Integer] Object address of the object to remove from
117
- # the list
118
- # @param ruby_object_id [Integer] The Ruby object ID of the collected
119
- # object
120
- def _collect(address, ruby_object_id)
121
- if @in_memory_objects[id] == ruby_object_id
122
- @in_memory_objects.delete(address)
104
+ index = uid % @size
105
+ if (object = @unmodified_entries[index]) && object.uid == uid
106
+ @unmodified_entries[index] = nil
123
107
  end
124
108
  end
125
109
 
@@ -128,23 +112,25 @@ module PEROBS
128
112
  # @param now [Boolean]
129
113
  def flush(now = false)
130
114
  if now || (@flush_counter -= 1) <= 0
131
- @lines.each { |line| line.flush(now) }
132
- @flush_counter = FLUSH_WATERMARK
115
+ @modified_entries.each do |id, object|
116
+ object.save
117
+ end
118
+ @modified_entries = ::Hash.new
119
+ @flush_counter = @flush_delay
133
120
  end
121
+ @flush_times += 1
134
122
  end
135
123
 
136
124
  # Remove all entries from the cache.
137
125
  def clear
138
- # A hash that stores all objects by the Ruby object ID that are
139
- # currently in memory. Objects are added via insert() and will be
140
- # removed via delete() or _collect() called from a Object
141
- # finalizer. It only stores the object Ruby object ID hashed by their
142
- # address in the file. This enables them from being collected by the
143
- # Ruby garbage collector.
144
- @in_memory_objects = {}
145
- # This is the actual cache. The Array stores objects as Entry objects to
146
- # also store the modified/not-modified state.
147
- @lines = ::Array.new(@size) { |i| PersistentObjectCacheLine.new }
126
+ # This Array stores all unmodified entries. It has a fixed size and uses
127
+ # a % operation to compute the index from the object ID.
128
+ @unmodified_entries = ::Array.new(@size)
129
+
130
+ # This Hash stores all modified entries. It can grow and shrink as
131
+ # needed. A flush operation writes all modified objects into the backing
132
+ # store.
133
+ @modified_entries = ::Hash.new
148
134
  end
149
135
 
150
136
  end
@@ -34,30 +34,42 @@ module PEROBS
34
34
  class Entry < Struct.new(:obj, :modified)
35
35
  end
36
36
 
37
- WATERMARK = 4
37
+ # This defines the minimum size of the cache line. If it is too large, the
38
+ # time to find an entry will grow too much. If it is too small the number
39
+ # of cache lines will be too large and create more store overhead. By
40
+ # running benchmarks it turned out that 8 is a pretty good compromise.
41
+ WATERMARK = 8
38
42
 
39
43
  def initialize
40
44
  @entries = []
41
45
  end
42
46
 
43
47
  def insert(object, modified)
44
- @entries.each do |e|
45
- if e.obj.uid == object.uid
46
- e.modified = true if modified
47
- return
48
- end
48
+ if (index = @entries.find_index{ |e| e.obj.uid == object.uid })
49
+ # We have found and removed an existing entry for this particular
50
+ # object. If the modified flag is set, ensure that the entry has it
51
+ # set as well.
52
+ entry = @entries.delete_at(index)
53
+ entry.modified = true if modified && !entry.modified
54
+ else
55
+ # There is no existing entry for this object. Create a new one.
56
+ entry = Entry.new(object, modified)
49
57
  end
50
58
 
51
- # Insert the new entry at the beginning of the line.
52
- @entries.unshift(Entry.new(object, modified))
59
+ # Insert the entry at the beginning of the line.
60
+ @entries.unshift(entry)
53
61
  end
54
62
 
55
63
  def get(uid)
56
- @entries.each do |e|
57
- return e if e.obj.uid == uid
64
+ if (index = @entries.find_index{ |e| e.obj.uid == uid })
65
+ if index > 0
66
+ # Move the entry to the front.
67
+ @entries.unshift(@entries.delete_at(index))
68
+ end
69
+ @entries.first
70
+ else
71
+ nil
58
72
  end
59
-
60
- nil
61
73
  end
62
74
 
63
75
  # Delete the entry that matches the given UID
@@ -0,0 +1,97 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # = ProgressMeter.rb -- Persistent Ruby Object Store
4
+ #
5
+ # Copyright (c) 2018 by Chris Schlaeger <chris@taskjuggler.org>
6
+ #
7
+ # MIT License
8
+ #
9
+ # Permission is hereby granted, free of charge, to any person obtaining
10
+ # a copy of this software and associated documentation files (the
11
+ # "Software"), to deal in the Software without restriction, including
12
+ # without limitation the rights to use, copy, modify, merge, publish,
13
+ # distribute, sublicense, and/or sell copies of the Software, and to
14
+ # permit persons to whom the Software is furnished to do so, subject to
15
+ # the following conditions:
16
+ #
17
+ # The above copyright notice and this permission notice shall be
18
+ # included in all copies or substantial portions of the Software.
19
+ #
20
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
+
28
+ require 'time'
29
+
30
+ module PEROBS
31
+
32
+ # This is the base class for all ProgressMeter classes. It only logs into
33
+ # the PEROBS log. You need to create a derived class that overloads
34
+ # print_bar() and print_time() to provide more fancy outputs.
35
+ class ProgressMeter
36
+
37
+ def initialize
38
+ @name = nil
39
+ @max_value = nil
40
+ @current_value = nil
41
+ @start_time = nil
42
+ @end_time = nil
43
+ end
44
+
45
+ def start(name, max_value)
46
+ @name = name
47
+ unless max_value >= 0
48
+ raise ArgumentError, "Maximum value (#{max_value}) must be larger " +
49
+ "or equal to 0"
50
+ end
51
+ @max_value = max_value
52
+ @current_value = 0
53
+ @start_time = Time.now
54
+ @end_time = nil
55
+ print_bar
56
+
57
+ if block_given?
58
+ yield(self)
59
+ done
60
+ end
61
+ end
62
+
63
+ def update(value)
64
+ return unless (value_i = value.to_i) > @current_value
65
+
66
+ @current_value = value_i
67
+ print_bar
68
+ end
69
+
70
+ def done
71
+ @end_time = Time.now
72
+ print_time
73
+ PEROBS.log.info "#{@name} completed in " +
74
+ secsToHMS(@end_time - @start_time)
75
+ end
76
+
77
+ private
78
+
79
+ def print_bar
80
+ end
81
+
82
+ def print_time
83
+ end
84
+
85
+ def secsToHMS(secs)
86
+ secs = secs.to_i
87
+ s = secs % 60
88
+ mins = secs / 60
89
+ m = mins % 60
90
+ h = mins / 60
91
+ "#{h}:#{'%02d' % m}:#{'%02d' % s}"
92
+ end
93
+
94
+ end
95
+
96
+ end
97
+