perobs 4.0.0 → 4.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. data/lib/perobs.rb +1 -0
  3. data/lib/perobs/Array.rb +66 -19
  4. data/lib/perobs/BTree.rb +83 -12
  5. data/lib/perobs/BTreeBlob.rb +1 -1
  6. data/lib/perobs/BTreeDB.rb +2 -2
  7. data/lib/perobs/BTreeNode.rb +365 -85
  8. data/lib/perobs/BigArray.rb +267 -0
  9. data/lib/perobs/BigArrayNode.rb +998 -0
  10. data/lib/perobs/BigHash.rb +262 -0
  11. data/lib/perobs/BigTree.rb +184 -0
  12. data/lib/perobs/BigTreeNode.rb +873 -0
  13. data/lib/perobs/ConsoleProgressMeter.rb +61 -0
  14. data/lib/perobs/DataBase.rb +4 -3
  15. data/lib/perobs/DynamoDB.rb +57 -15
  16. data/lib/perobs/EquiBlobsFile.rb +143 -51
  17. data/lib/perobs/FNV_Hash_1a_64.rb +54 -0
  18. data/lib/perobs/FlatFile.rb +363 -203
  19. data/lib/perobs/FlatFileBlobHeader.rb +98 -54
  20. data/lib/perobs/FlatFileDB.rb +42 -20
  21. data/lib/perobs/Hash.rb +58 -13
  22. data/lib/perobs/IDList.rb +144 -0
  23. data/lib/perobs/IDListPage.rb +107 -0
  24. data/lib/perobs/IDListPageFile.rb +180 -0
  25. data/lib/perobs/IDListPageRecord.rb +142 -0
  26. data/lib/perobs/Object.rb +18 -15
  27. data/lib/perobs/ObjectBase.rb +38 -4
  28. data/lib/perobs/PersistentObjectCache.rb +53 -67
  29. data/lib/perobs/PersistentObjectCacheLine.rb +24 -12
  30. data/lib/perobs/ProgressMeter.rb +97 -0
  31. data/lib/perobs/SpaceTree.rb +21 -12
  32. data/lib/perobs/SpaceTreeNode.rb +53 -61
  33. data/lib/perobs/Store.rb +71 -32
  34. data/lib/perobs/version.rb +1 -1
  35. data/perobs.gemspec +4 -4
  36. data/test/Array_spec.rb +15 -6
  37. data/test/BTree_spec.rb +5 -2
  38. data/test/BigArray_spec.rb +214 -0
  39. data/test/BigHash_spec.rb +144 -0
  40. data/test/BigTreeNode_spec.rb +153 -0
  41. data/test/BigTree_spec.rb +259 -0
  42. data/test/EquiBlobsFile_spec.rb +105 -1
  43. data/test/FNV_Hash_1a_64_spec.rb +59 -0
  44. data/test/FlatFileDB_spec.rb +63 -14
  45. data/test/Hash_spec.rb +1 -2
  46. data/test/IDList_spec.rb +77 -0
  47. data/test/LegacyDBs/LegacyDB.rb +151 -0
  48. data/test/LegacyDBs/version_3/class_map.json +1 -0
  49. data/test/LegacyDBs/version_3/config.json +1 -0
  50. data/test/LegacyDBs/version_3/database.blobs +0 -0
  51. data/test/LegacyDBs/version_3/database_spaces.blobs +0 -0
  52. data/test/LegacyDBs/version_3/index.blobs +0 -0
  53. data/test/LegacyDBs/version_3/version +1 -0
  54. data/test/LockFile_spec.rb +9 -6
  55. data/test/SpaceTree_spec.rb +4 -1
  56. data/test/Store_spec.rb +290 -199
  57. data/test/spec_helper.rb +9 -4
  58. metadata +47 -10
  59. data/lib/perobs/TreeDB.rb +0 -277
@@ -2,7 +2,7 @@
2
2
  #
3
3
  # = Object.rb -- Persistent Ruby Object Store
4
4
  #
5
- # Copyright (c) 2015, 2016 by Chris Schlaeger <chris@taskjuggler.org>
5
+ # Copyright (c) 2015, 2016, 2017 by Chris Schlaeger <chris@taskjuggler.org>
6
6
  #
7
7
  # MIT License
8
8
  #
@@ -212,17 +212,7 @@ module PEROBS
212
212
  end
213
213
 
214
214
  def _set(attr, val)
215
- if val.respond_to?(:is_poxreference?)
216
- # References to other PEROBS::Objects must be handled somewhat
217
- # special.
218
- if @store != val.store
219
- PEROBS.log.fatal 'The referenced object is not part of this store'
220
- end
221
- elsif val.is_a?(ObjectBase)
222
- PEROBS.log.fatal 'A PEROBS::ObjectBase object escaped! ' +
223
- 'Have you used self() instead of myself() to get the reference ' +
224
- 'of the PEROBS object that you are trying to assign here?'
225
- end
215
+ _check_assignment_value(val)
226
216
  instance_variable_set(('@' + attr.to_s).to_sym, val)
227
217
  # Let the store know that we have a modified object. If we restored the
228
218
  # object from the DB, we don't mark it as modified.
@@ -236,13 +226,26 @@ module PEROBS
236
226
  end
237
227
 
238
228
  def _all_attributes
229
+ # Collect all persistent attributes from this class and all
230
+ # super classes into a single Array.
231
+ attributes = []
232
+ klass = self.class
233
+ while klass && klass.respond_to?(:attributes)
234
+ if (attrs = klass.attributes)
235
+ attributes += attrs
236
+ end
237
+ klass = klass.superclass
238
+ end
239
+
239
240
  # PEROBS objects that don't have persistent attributes declared don't
240
241
  # really make sense.
241
- unless self.class.attributes
242
+ if attributes.empty?
242
243
  PEROBS.log.fatal "No persistent attributes have been declared for " +
243
- "class #{self.class}. Use 'po_attr' to declare them."
244
+ "class #{self.class} or any parent class. Use 'attr_persist' " +
245
+ "to declare them."
244
246
  end
245
- self.class.attributes
247
+
248
+ attributes
246
249
  end
247
250
 
248
251
  end
@@ -86,6 +86,10 @@ module PEROBS
86
86
  _referenced_object == obj
87
87
  end
88
88
 
89
+ def eql?(obj)
90
+ _referenced_object._id == obj._id
91
+ end
92
+
89
93
  # BasicObject provides a equal?() method that prevents method_missing from
90
94
  # being called. So we have to pass the call manually to the referenced
91
95
  # object.
@@ -114,6 +118,20 @@ module PEROBS
114
118
  # common to all classes of persistent objects.
115
119
  class ObjectBase
116
120
 
121
+ # This is a list of the native Ruby classes that are supported for
122
+ # instance variable assignements in addition to other PEROBS objects.
123
+ if RUBY_VERSION < '2.2'
124
+ NATIVE_CLASSES = [
125
+ NilClass, Integer, Bignum, Fixnum, Float, String, Time,
126
+ TrueClass, FalseClass
127
+ ]
128
+ else
129
+ NATIVE_CLASSES = [
130
+ NilClass, Integer, Float, String, Time,
131
+ TrueClass, FalseClass
132
+ ]
133
+ end
134
+
117
135
  attr_reader :_id, :store, :myself
118
136
 
119
137
  # New PEROBS objects must always be created by calling # Store.new().
@@ -192,6 +210,25 @@ module PEROBS
192
210
  @store.db.put_object(db_obj, @_id)
193
211
  end
194
212
 
213
+ #
214
+ def _check_assignment_value(val)
215
+ if val.respond_to?(:is_poxreference?)
216
+ # References to other PEROBS::Objects must be handled somewhat
217
+ # special.
218
+ if @store != val.store
219
+ PEROBS.log.fatal 'The referenced object is not part of this store'
220
+ end
221
+ elsif val.is_a?(ObjectBase)
222
+ PEROBS.log.fatal 'A PEROBS::ObjectBase object escaped! ' +
223
+ 'Have you used self() instead of myself() to get the reference ' +
224
+ 'of the PEROBS object that you are trying to assign here?'
225
+ elsif !NATIVE_CLASSES.include?(val.class)
226
+ PEROBS.log.fatal "Assigning objects of class #{val.class} is not " +
227
+ "supported. Only PEROBS objects or one of the following classes " +
228
+ "are supported: #{NATIVE_CLASSES.join(', ')}"
229
+ end
230
+ end
231
+
195
232
  # Read an raw object with the specified ID from the backing store and
196
233
  # instantiate a new object of the specific type.
197
234
  def ObjectBase.read(store, id)
@@ -218,10 +255,7 @@ module PEROBS
218
255
  data = nil
219
256
  if @_stash_map
220
257
  (level - 1).downto(0) do |lvl|
221
- if @_stash_map[lvl]
222
- data = @_stash_map[lvl]
223
- break
224
- end
258
+ break if (data = @_stash_map[lvl])
225
259
  end
226
260
  end
227
261
  if data
@@ -31,29 +31,31 @@ module PEROBS
31
31
 
32
32
  class PersistentObjectCache
33
33
 
34
- FLUSH_WATERMARK = 500
35
-
36
34
  # This cache class manages the presence of objects that primarily live in
37
35
  # a backing store but temporarily exist in memory as well. To work with
38
36
  # these objects, direct references must be only very short lived. Indirect
39
37
  # references can be done via a unique ID that the object must provide. Due
40
38
  # to the indirect references the Ruby garbage collector can collect these
41
- # objects and the cache is notified via a finalizer that the objects must
42
- # provide. The finalize must call the _collect() method. To reduce the
43
- # read and write latencies of the backing store this class keeps a subset
44
- # of the object in memory which prevents them from being collected. All
45
- # references to the objects must be resolved via the get() method to
46
- # prevent duplicate instances in memory of the same object.
47
- # @param size [Integer] Maximum number of objects to be cached at a time
39
+ # objects. To reduce the read and write latencies of the backing store
40
+ # this class keeps a subset of the objects in memory which prevents them
41
+ # from being collected. All references to the objects must be resolved via
42
+ # the get() method to prevent duplicate instances in memory of the same
43
+ # in-store object. The cache uses a least-recently-used (LRU) scheme to
44
+ # cache objects.
45
+ # @param size [Integer] Minimum number of objects to be cached at a time
46
+ # @param flush_delay [Integer] Determines how often non-forced flushes are
47
+ # ignored in a row before the flush is really done.
48
48
  # @param klass [Class] The class of the objects to be cached. Objects must
49
49
  # provide a uid() method that returns a unique ID for every object.
50
50
  # @param collection [] The object collection the objects belong to. It
51
51
  # must provide a ::load method.
52
- def initialize(size, klass, collection)
52
+ def initialize(size, flush_delay, klass, collection)
53
53
  @size = size
54
54
  @klass = klass
55
55
  @collection = collection
56
- @flush_counter = FLUSH_WATERMARK
56
+ @flush_delay = @flush_counter = flush_delay
57
+ @flush_times = 0
58
+
57
59
  clear
58
60
  end
59
61
 
@@ -61,65 +63,47 @@ module PEROBS
61
63
  # @param object [Object] Object to cache
62
64
  # @param modified [Boolean] True if the object was modified, false otherwise
63
65
  def insert(object, modified = true)
64
- # Store the object via its Ruby object ID instead of a direct reference.
65
- # This allows the object to be collected by the garbage collector.
66
- @in_memory_objects[object.uid] = object.object_id
66
+ unless object.is_a?(@klass)
67
+ raise ArgumentError, "You can insert only #{@klass} objects in this " +
68
+ "cache. You have tried to insert a #{object.class} instead."
69
+ end
70
+
71
+ if modified
72
+ @modified_entries[object.uid] = object
73
+ else
74
+ index = object.uid % @size
75
+ @unmodified_entries[index] = object
76
+ end
67
77
 
68
- @lines[object.uid % @size].insert(object, modified)
78
+ nil
69
79
  end
70
80
 
71
81
  # Retrieve a object reference from the cache.
72
82
  # @param uid [Integer] uid of the object to retrieve.
73
- def get(uid)
74
- if (entry = @lines[uid % @size].get(uid))
75
- return entry.obj
83
+ # @param ref [Object] optional reference to be used by the load method
84
+ def get(uid, ref = nil)
85
+ # First check if it's a modified object.
86
+ if (object = @modified_entries[uid])
87
+ return object
76
88
  end
77
89
 
78
- if (ruby_object_id = @in_memory_objects[uid])
79
- # We have the object in memory so we can just return it.
80
- begin
81
- object = ObjectSpace._id2ref(ruby_object_id)
82
- # Let's make sure the object is really the object we are looking
83
- # for. The GC might have recycled it already and the Ruby object ID
84
- # could now be used for another object.
85
- if object.is_a?(@klass) && object.uid == uid
86
- # Let's put the object in the cache. We might need it soon again.
87
- insert(object, false)
88
- return object
89
- end
90
- rescue RangeError
91
- # Due to a race condition the object can still be in the
92
- # @in_memory_objects list but has been collected already by the Ruby
93
- # GC. In that case we need to load it again. In this case the
94
- # _collect() call will happen much later, potentially after we have
95
- # registered a new object with the same ID.
96
- @in_memory_objects.delete(uid)
97
- end
90
+ # Then check the unmodified object list.
91
+ if (object = @unmodified_entries[uid % @size]) && object.uid == uid
92
+ return object
98
93
  end
99
94
 
100
- @klass::load(@collection, uid)
95
+ # If we don't have it in memory we need to load it.
96
+ @klass::load(@collection, uid, ref)
101
97
  end
102
98
 
103
99
  # Remove a object from the cache.
104
100
  # @param uid [Integer] unique ID of object to remove.
105
101
  def delete(uid)
106
- # The object is likely still in memory, but we really don't want to
107
- # access it anymore.
108
- @in_memory_objects.delete(uid)
109
-
110
- @lines[uid % @size].delete(uid)
111
- end
102
+ @modified_entries.delete(uid)
112
103
 
113
- # Remove a object from the in-memory list. This is an internal method
114
- # and should never be called from user code. It will be called from a
115
- # finalizer, so many restrictions apply!
116
- # @param uid [Integer] Object address of the object to remove from
117
- # the list
118
- # @param ruby_object_id [Integer] The Ruby object ID of the collected
119
- # object
120
- def _collect(address, ruby_object_id)
121
- if @in_memory_objects[id] == ruby_object_id
122
- @in_memory_objects.delete(address)
104
+ index = uid % @size
105
+ if (object = @unmodified_entries[index]) && object.uid == uid
106
+ @unmodified_entries[index] = nil
123
107
  end
124
108
  end
125
109
 
@@ -128,23 +112,25 @@ module PEROBS
128
112
  # @param now [Boolean]
129
113
  def flush(now = false)
130
114
  if now || (@flush_counter -= 1) <= 0
131
- @lines.each { |line| line.flush(now) }
132
- @flush_counter = FLUSH_WATERMARK
115
+ @modified_entries.each do |id, object|
116
+ object.save
117
+ end
118
+ @modified_entries = ::Hash.new
119
+ @flush_counter = @flush_delay
133
120
  end
121
+ @flush_times += 1
134
122
  end
135
123
 
136
124
  # Remove all entries from the cache.
137
125
  def clear
138
- # A hash that stores all objects by the Ruby object ID that are
139
- # currently in memory. Objects are added via insert() and will be
140
- # removed via delete() or _collect() called from a Object
141
- # finalizer. It only stores the object Ruby object ID hashed by their
142
- # address in the file. This enables them from being collected by the
143
- # Ruby garbage collector.
144
- @in_memory_objects = {}
145
- # This is the actual cache. The Array stores objects as Entry objects to
146
- # also store the modified/not-modified state.
147
- @lines = ::Array.new(@size) { |i| PersistentObjectCacheLine.new }
126
+ # This Array stores all unmodified entries. It has a fixed size and uses
127
+ # a % operation to compute the index from the object ID.
128
+ @unmodified_entries = ::Array.new(@size)
129
+
130
+ # This Hash stores all modified entries. It can grow and shrink as
131
+ # needed. A flush operation writes all modified objects into the backing
132
+ # store.
133
+ @modified_entries = ::Hash.new
148
134
  end
149
135
 
150
136
  end
@@ -34,30 +34,42 @@ module PEROBS
34
34
  class Entry < Struct.new(:obj, :modified)
35
35
  end
36
36
 
37
- WATERMARK = 4
37
+ # This defines the minimum size of the cache line. If it is too large, the
38
+ # time to find an entry will grow too much. If it is too small the number
39
+ # of cache lines will be too large and create more store overhead. By
40
+ # running benchmarks it turned out that 8 is a pretty good compromise.
41
+ WATERMARK = 8
38
42
 
39
43
  def initialize
40
44
  @entries = []
41
45
  end
42
46
 
43
47
  def insert(object, modified)
44
- @entries.each do |e|
45
- if e.obj.uid == object.uid
46
- e.modified = true if modified
47
- return
48
- end
48
+ if (index = @entries.find_index{ |e| e.obj.uid == object.uid })
49
+ # We have found and removed an existing entry for this particular
50
+ # object. If the modified flag is set, ensure that the entry has it
51
+ # set as well.
52
+ entry = @entries.delete_at(index)
53
+ entry.modified = true if modified && !entry.modified
54
+ else
55
+ # There is no existing entry for this object. Create a new one.
56
+ entry = Entry.new(object, modified)
49
57
  end
50
58
 
51
- # Insert the new entry at the beginning of the line.
52
- @entries.unshift(Entry.new(object, modified))
59
+ # Insert the entry at the beginning of the line.
60
+ @entries.unshift(entry)
53
61
  end
54
62
 
55
63
  def get(uid)
56
- @entries.each do |e|
57
- return e if e.obj.uid == uid
64
+ if (index = @entries.find_index{ |e| e.obj.uid == uid })
65
+ if index > 0
66
+ # Move the entry to the front.
67
+ @entries.unshift(@entries.delete_at(index))
68
+ end
69
+ @entries.first
70
+ else
71
+ nil
58
72
  end
59
-
60
- nil
61
73
  end
62
74
 
63
75
  # Delete the entry that matches the given UID
@@ -0,0 +1,97 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # = ProgressMeter.rb -- Persistent Ruby Object Store
4
+ #
5
+ # Copyright (c) 2018 by Chris Schlaeger <chris@taskjuggler.org>
6
+ #
7
+ # MIT License
8
+ #
9
+ # Permission is hereby granted, free of charge, to any person obtaining
10
+ # a copy of this software and associated documentation files (the
11
+ # "Software"), to deal in the Software without restriction, including
12
+ # without limitation the rights to use, copy, modify, merge, publish,
13
+ # distribute, sublicense, and/or sell copies of the Software, and to
14
+ # permit persons to whom the Software is furnished to do so, subject to
15
+ # the following conditions:
16
+ #
17
+ # The above copyright notice and this permission notice shall be
18
+ # included in all copies or substantial portions of the Software.
19
+ #
20
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
+
28
+ require 'time'
29
+
30
+ module PEROBS
31
+
32
+ # This is the base class for all ProgressMeter classes. It only logs into
33
+ # the PEROBS log. You need to create a derived class that overloads
34
+ # print_bar() and print_time() to provide more fancy outputs.
35
+ class ProgressMeter
36
+
37
+ def initialize
38
+ @name = nil
39
+ @max_value = nil
40
+ @current_value = nil
41
+ @start_time = nil
42
+ @end_time = nil
43
+ end
44
+
45
+ def start(name, max_value)
46
+ @name = name
47
+ unless max_value >= 0
48
+ raise ArgumentError, "Maximum value (#{max_value}) must be larger " +
49
+ "or equal to 0"
50
+ end
51
+ @max_value = max_value
52
+ @current_value = 0
53
+ @start_time = Time.now
54
+ @end_time = nil
55
+ print_bar
56
+
57
+ if block_given?
58
+ yield(self)
59
+ done
60
+ end
61
+ end
62
+
63
+ def update(value)
64
+ return unless (value_i = value.to_i) > @current_value
65
+
66
+ @current_value = value_i
67
+ print_bar
68
+ end
69
+
70
+ def done
71
+ @end_time = Time.now
72
+ print_time
73
+ PEROBS.log.info "#{@name} completed in " +
74
+ secsToHMS(@end_time - @start_time)
75
+ end
76
+
77
+ private
78
+
79
+ def print_bar
80
+ end
81
+
82
+ def print_time
83
+ end
84
+
85
+ def secsToHMS(secs)
86
+ secs = secs.to_i
87
+ s = secs % 60
88
+ mins = secs / 60
89
+ m = mins % 60
90
+ h = mins / 60
91
+ "#{h}:#{'%02d' % m}:#{'%02d' % s}"
92
+ end
93
+
94
+ end
95
+
96
+ end
97
+