perobs 3.0.1 → 4.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. checksums.yaml +5 -5
  2. data/README.md +19 -18
  3. data/lib/perobs.rb +2 -0
  4. data/lib/perobs/Array.rb +68 -21
  5. data/lib/perobs/BTree.rb +110 -54
  6. data/lib/perobs/BTreeBlob.rb +14 -13
  7. data/lib/perobs/BTreeDB.rb +11 -10
  8. data/lib/perobs/BTreeNode.rb +551 -197
  9. data/lib/perobs/BTreeNodeCache.rb +10 -8
  10. data/lib/perobs/BTreeNodeLink.rb +11 -1
  11. data/lib/perobs/BigArray.rb +285 -0
  12. data/lib/perobs/BigArrayNode.rb +1002 -0
  13. data/lib/perobs/BigHash.rb +246 -0
  14. data/lib/perobs/BigTree.rb +197 -0
  15. data/lib/perobs/BigTreeNode.rb +873 -0
  16. data/lib/perobs/Cache.rb +47 -22
  17. data/lib/perobs/ClassMap.rb +2 -2
  18. data/lib/perobs/ConsoleProgressMeter.rb +61 -0
  19. data/lib/perobs/DataBase.rb +4 -3
  20. data/lib/perobs/DynamoDB.rb +62 -20
  21. data/lib/perobs/EquiBlobsFile.rb +174 -59
  22. data/lib/perobs/FNV_Hash_1a_64.rb +54 -0
  23. data/lib/perobs/FlatFile.rb +536 -242
  24. data/lib/perobs/FlatFileBlobHeader.rb +120 -84
  25. data/lib/perobs/FlatFileDB.rb +58 -27
  26. data/lib/perobs/FuzzyStringMatcher.rb +175 -0
  27. data/lib/perobs/Hash.rb +129 -35
  28. data/lib/perobs/IDList.rb +144 -0
  29. data/lib/perobs/IDListPage.rb +107 -0
  30. data/lib/perobs/IDListPageFile.rb +180 -0
  31. data/lib/perobs/IDListPageRecord.rb +142 -0
  32. data/lib/perobs/LockFile.rb +3 -0
  33. data/lib/perobs/Object.rb +28 -20
  34. data/lib/perobs/ObjectBase.rb +53 -10
  35. data/lib/perobs/PersistentObjectCache.rb +142 -0
  36. data/lib/perobs/PersistentObjectCacheLine.rb +99 -0
  37. data/lib/perobs/ProgressMeter.rb +97 -0
  38. data/lib/perobs/SpaceManager.rb +273 -0
  39. data/lib/perobs/SpaceTree.rb +63 -47
  40. data/lib/perobs/SpaceTreeNode.rb +134 -115
  41. data/lib/perobs/SpaceTreeNodeLink.rb +1 -1
  42. data/lib/perobs/StackFile.rb +1 -1
  43. data/lib/perobs/Store.rb +180 -70
  44. data/lib/perobs/version.rb +1 -1
  45. data/perobs.gemspec +4 -4
  46. data/test/Array_spec.rb +48 -39
  47. data/test/BTreeDB_spec.rb +2 -2
  48. data/test/BTree_spec.rb +50 -1
  49. data/test/BigArray_spec.rb +261 -0
  50. data/test/BigHash_spec.rb +152 -0
  51. data/test/BigTreeNode_spec.rb +153 -0
  52. data/test/BigTree_spec.rb +259 -0
  53. data/test/EquiBlobsFile_spec.rb +105 -5
  54. data/test/FNV_Hash_1a_64_spec.rb +59 -0
  55. data/test/FlatFileDB_spec.rb +199 -15
  56. data/test/FuzzyStringMatcher_spec.rb +261 -0
  57. data/test/Hash_spec.rb +27 -16
  58. data/test/IDList_spec.rb +77 -0
  59. data/test/LegacyDBs/LegacyDB.rb +155 -0
  60. data/test/LegacyDBs/version_3/class_map.json +1 -0
  61. data/test/LegacyDBs/version_3/config.json +1 -0
  62. data/test/LegacyDBs/version_3/database.blobs +0 -0
  63. data/test/LegacyDBs/version_3/database_spaces.blobs +0 -0
  64. data/test/LegacyDBs/version_3/index.blobs +0 -0
  65. data/test/LegacyDBs/version_3/version +1 -0
  66. data/test/LockFile_spec.rb +9 -6
  67. data/test/Object_spec.rb +5 -5
  68. data/test/SpaceManager_spec.rb +176 -0
  69. data/test/SpaceTree_spec.rb +27 -9
  70. data/test/Store_spec.rb +353 -206
  71. data/test/perobs_spec.rb +7 -3
  72. data/test/spec_helper.rb +9 -4
  73. metadata +59 -16
  74. data/lib/perobs/SpaceTreeNodeCache.rb +0 -76
  75. data/lib/perobs/TreeDB.rb +0 -277
@@ -51,43 +51,87 @@ module PEROBS
51
51
  # Create a new SpaceTreeNode object. If node_address is not nil, the data
52
52
  # will be read from the SpaceTree file at the given node_address.
53
53
  # @param tree [SpaceTree] Tree that the object should belong to
54
- # @param parent [SpaceTreeNode] Parent node in the tree
55
54
  # @param node_address [Integer] Address of the node in the file
56
55
  # @param blob_address [Integer] Address of the free space blob
57
56
  # @param size [Integer] Size of the free space blob
58
- def initialize(tree, parent = nil, node_address = nil, blob_address = 0,
59
- size = 0)
57
+ # @param parent [SpaceTreeNode] Parent node in the tree
58
+ # @param smaller [SpaceTreeNode] smaller node in the tree
59
+ # @param equal [SpaceTreeNode] equal node in the tree
60
+ # @param larger [SpaceTreeNode] larger node in the tree
61
+ def initialize(tree, node_address, blob_address = 0, size = 0,
62
+ parent = nil, smaller = nil, equal = nil, larger = nil)
60
63
  @tree = tree
61
- if blob_address < 0
64
+ if node_address <= 0
62
65
  PEROBS.log.fatal "Node address (#{node_address}) must be larger than 0"
63
66
  end
67
+ @node_address = node_address
68
+ if blob_address < 0
69
+ PEROBS.log.fatal "Blob address (#{node_address}) must be larger than 0"
70
+ end
64
71
  @blob_address = blob_address
65
72
  @size = size
66
- @smaller = @equal = @larger = nil
67
- @node_address = node_address
73
+ @parent = parent
74
+ @smaller = smaller
75
+ @equal = equal
76
+ @larger = larger
77
+ end
68
78
 
69
- unless node_address.nil? || node_address.is_a?(Integer)
70
- PEROBS.log.fatal "node_address is not Integer: #{node_address.class}"
71
- end
79
+ # Create a new SpaceTreeNode. This method should be used for the creation
80
+ # of new nodes instead of calling the constructor directly.
81
+ # @param tree [SpaceTree] The tree the node should belong to
82
+ # @param blob_address [Integer] Address of the free space blob
83
+ # @param size [Integer] Size of the free space blob
84
+ # @param parent [SpaceTreeNode] Parent node in the tree
85
+ def SpaceTreeNode::create(tree, blob_address = 0, size = 0, parent = nil)
86
+ node_address = tree.nodes.free_address
72
87
 
73
- if node_address
74
- # This must be an existing node. Try to read it and fill the instance
75
- # variables.
76
- if size != 0
77
- PEROBS.log.fatal "If node_address is not nil size must be 0"
78
- end
79
- if blob_address != 0
80
- PEROBS.log.fatal "If node_address is not nil blob_address must be 0"
81
- end
82
- unless read_node
83
- PEROBS.log.fatal "SpaceTree node at address #{node_address} " +
84
- "does not exist"
85
- end
86
- else
87
- # This is a new node. Make sure the data is written to the file.
88
- @node_address = @tree.nodes.free_address
89
- self.parent = parent
88
+ node = SpaceTreeNode.new(tree, node_address, blob_address, size, parent)
89
+ tree.cache.insert(node)
90
+
91
+ node
92
+ end
93
+
94
+ # Restore a node from the backing store at the given address and tree.
95
+ # @param tree [SpaceTree] The tree the node belongs to
96
+ # @param node_address [Integer] The address in the file.
97
+ def SpaceTreeNode::load(tree, node_address, unused = nil)
98
+ unless node_address > 0
99
+ PEROBS.log.fatal "node_address (#{node_address}) must be larger than 0"
90
100
  end
101
+ unless (bytes = tree.nodes.retrieve_blob(node_address))
102
+ PEROBS.log.fatal "SpaceTreeNode at address #{node_address} does " +
103
+ "not exist"
104
+ end
105
+
106
+ blob_address, size, parent_node_address,
107
+ smaller_node_address, equal_node_address,
108
+ larger_node_address = bytes.unpack(NODE_BYTES_FORMAT)
109
+
110
+ parent = parent_node_address != 0 ?
111
+ SpaceTreeNodeLink.new(tree, parent_node_address) : nil
112
+ smaller = smaller_node_address != 0 ?
113
+ SpaceTreeNodeLink.new(tree, smaller_node_address) : nil
114
+ equal = equal_node_address != 0 ?
115
+ SpaceTreeNodeLink.new(tree, equal_node_address) : nil
116
+ larger = larger_node_address != 0 ?
117
+ SpaceTreeNodeLink.new(tree, larger_node_address) : nil
118
+
119
+ node = SpaceTreeNode.new(tree, node_address, blob_address, size,
120
+ parent, smaller, equal, larger)
121
+
122
+ tree.cache.insert(node, false)
123
+
124
+ node
125
+ end
126
+
127
+ # Save the node into the blob file.
128
+ def save
129
+ bytes = [ @blob_address, @size,
130
+ @parent ? @parent.node_address : 0,
131
+ @smaller ? @smaller.node_address : 0,
132
+ @equal ? @equal.node_address : 0,
133
+ @larger ? @larger.node_address : 0].pack(NODE_BYTES_FORMAT)
134
+ @tree.nodes.store_blob(@node_address, bytes)
91
135
  end
92
136
 
93
137
  # Add a new node for the given address and size to the tree.
@@ -110,7 +154,7 @@ module PEROBS
110
154
  # There is no smaller node yet, so we create a new one as a
111
155
  # smaller child of the current node.
112
156
  node.set_link('@smaller',
113
- @tree.new_node(node, address, size))
157
+ SpaceTreeNode::create(@tree, address, size, node))
114
158
  break
115
159
  end
116
160
  elsif size > node.size
@@ -122,13 +166,13 @@ module PEROBS
122
166
  # There is no larger node yet, so we create a new one as a larger
123
167
  # child of the current node.
124
168
  node.set_link('@larger',
125
- @tree.new_node(node, address, size))
169
+ SpaceTreeNode::create(@tree, address, size, node))
126
170
  break
127
171
  end
128
172
  else
129
173
  # Same size as current node. Insert new node as equal child at top of
130
174
  # equal list.
131
- new_node = @tree.new_node(node, address, size)
175
+ new_node = SpaceTreeNode::create(@tree, address, size, node)
132
176
  new_node.set_link('@equal', node.equal)
133
177
 
134
178
  node.set_link('@equal', new_node)
@@ -147,7 +191,7 @@ module PEROBS
147
191
  node = self
148
192
  loop do
149
193
  if node.blob_address == address
150
- return true
194
+ return size == node.size
151
195
  elsif size < node.size && node.smaller
152
196
  node = node.smaller
153
197
  elsif size > node.size && node.larger
@@ -242,7 +286,13 @@ module PEROBS
242
286
  PEROBS.log.fatal "Cannot unlink unknown child node with address " +
243
287
  "#{child_node.node_address} from #{to_s}"
244
288
  end
245
- write_node
289
+ @tree.cache.insert(self)
290
+ end
291
+
292
+ # @return [Integer] The node address since it uniquely identifies the
293
+ # Node.
294
+ def uid
295
+ @node_address
246
296
  end
247
297
 
248
298
  # Depth-first iterator for all nodes. The iterator yields the given block
@@ -339,7 +389,7 @@ module PEROBS
339
389
  @parent.set_link('@larger', node)
340
390
  else
341
391
  PEROBS.log.fatal "Cannot relink unknown child node with address " +
342
- "#{node.node_address} from #{to_s}"
392
+ "#{node.node_address} from #{parent.to_s}"
343
393
  end
344
394
  else
345
395
  if node
@@ -382,7 +432,7 @@ module PEROBS
382
432
  def set_size_and_address(size, address)
383
433
  @size = size
384
434
  @blob_address = address
385
- write_node
435
+ @tree.cache.insert(self)
386
436
  end
387
437
 
388
438
  def set_link(name, node_or_address)
@@ -398,12 +448,12 @@ module PEROBS
398
448
  # Clear the node link.
399
449
  instance_variable_set(name, nil)
400
450
  end
401
- write_node
451
+ @tree.cache.insert(self)
402
452
  end
403
453
 
404
454
  def parent=(p)
405
455
  @parent = p ? SpaceTreeNodeLink.new(@tree, p) : nil
406
- write_node
456
+ @tree.cache.insert(self)
407
457
  end
408
458
  # Compare this node to another node.
409
459
  # @return [Boolean] true if node address is identical, false otherwise
@@ -468,62 +518,65 @@ module PEROBS
468
518
  # errors.
469
519
  # @param flat_file [FlatFile] If given, check that the space is also
470
520
  # present in the given flat file.
521
+ # @param count [Integer] The total number of entries in the tree
471
522
  # @return [false,true] True if OK, false otherwise
472
- def check(flat_file)
523
+ def check(flat_file, count)
473
524
  node_counter = 0
474
525
  max_depth = 0
475
526
 
476
- each do |node, mode, stack|
477
- max_depth = stack.size if stack.size > max_depth
478
-
479
- case mode
480
- when :smaller
481
- if node.smaller
482
- return false unless node.check_node_link('smaller', stack)
483
- smaller_node = node.smaller
484
- if smaller_node.size >= node.size
485
- PEROBS.log.error "Smaller SpaceTreeNode size " +
486
- "(#{smaller_node}) is not smaller than #{node}"
487
- return false
527
+ @tree.progressmeter.start('Checking space list entries', count) do |pm|
528
+ each do |node, mode, stack|
529
+ max_depth = stack.size if stack.size > max_depth
530
+
531
+ case mode
532
+ when :smaller
533
+ if node.smaller
534
+ return false unless node.check_node_link('smaller', stack)
535
+ smaller_node = node.smaller
536
+ if smaller_node.size >= node.size
537
+ PEROBS.log.error "Smaller SpaceTreeNode size " +
538
+ "(#{smaller_node}) is not smaller than #{node}"
539
+ return false
540
+ end
488
541
  end
489
- end
490
- when :equal
491
- if node.equal
492
- return false unless node.check_node_link('equal', stack)
493
- equal_node = node.equal
494
-
495
- if equal_node.smaller || equal_node.larger
496
- PEROBS.log.error "Equal node #{equal_node} must not have " +
497
- "smaller/larger childs"
498
- return false
542
+ when :equal
543
+ if node.equal
544
+ return false unless node.check_node_link('equal', stack)
545
+ equal_node = node.equal
546
+
547
+ if equal_node.smaller || equal_node.larger
548
+ PEROBS.log.error "Equal node #{equal_node} must not have " +
549
+ "smaller/larger childs"
550
+ return false
551
+ end
552
+
553
+ if node.size != equal_node.size
554
+ PEROBS.log.error "Equal SpaceTreeNode size (#{equal_node}) " +
555
+ "is not equal parent node #{node}"
556
+ return false
557
+ end
499
558
  end
500
-
501
- if node.size != equal_node.size
502
- PEROBS.log.error "Equal SpaceTreeNode size (#{equal_node}) is " +
503
- "not equal parent node #{node}"
504
- return false
559
+ when :larger
560
+ if node.larger
561
+ return false unless node.check_node_link('larger', stack)
562
+ larger_node = node.larger
563
+ if larger_node.size <= node.size
564
+ PEROBS.log.error "Larger SpaceTreeNode size " +
565
+ "(#{larger_node}) is not larger than #{node}"
566
+ return false
567
+ end
505
568
  end
506
- end
507
- when :larger
508
- if node.larger
509
- return false unless node.check_node_link('larger', stack)
510
- larger_node = node.larger
511
- if larger_node.size <= node.size
512
- PEROBS.log.error "Larger SpaceTreeNode size " +
513
- "(#{larger_node}) is not larger than #{node}"
569
+ when :on_exit
570
+ if flat_file &&
571
+ !flat_file.has_space?(node.blob_address, node.size)
572
+ PEROBS.log.error "SpaceTreeNode has space at offset " +
573
+ "#{node.blob_address} of size #{node.size} that isn't " +
574
+ "available in the FlatFile."
514
575
  return false
515
576
  end
516
- end
517
- when :on_exit
518
- if flat_file &&
519
- !flat_file.has_space?(node.blob_address, node.size)
520
- PEROBS.log.error "SpaceTreeNode has space at offset " +
521
- "#{node.blob_address} of size #{node.size} that isn't " +
522
- "available in the FlatFile."
523
- return false
524
- end
525
577
 
526
- node_counter += 1
578
+ pm.update(node_counter += 1)
579
+ end
527
580
  end
528
581
  end
529
582
  PEROBS.log.debug "#{node_counter} SpaceTree nodes checked"
@@ -633,40 +686,6 @@ module PEROBS
633
686
  str
634
687
  end
635
688
 
636
- private
637
-
638
- def write_node
639
- bytes = [ @blob_address, @size,
640
- @parent ? @parent.node_address : 0,
641
- @smaller ? @smaller.node_address : 0,
642
- @equal ? @equal.node_address : 0,
643
- @larger ? @larger.node_address : 0].pack(NODE_BYTES_FORMAT)
644
- @tree.nodes.store_blob(@node_address, bytes)
645
- end
646
-
647
- def read_node
648
- unless @node_address > 0
649
- PEROBS.log.fatal "@node_address must be larger than 0"
650
- end
651
- return false unless (bytes = @tree.nodes.retrieve_blob(@node_address))
652
-
653
- @blob_address, @size, parent_node_address,
654
- smaller_node_address, equal_node_address,
655
- larger_node_address = bytes.unpack(NODE_BYTES_FORMAT)
656
- # The parent address can also be 0 as the parent can rightly point back
657
- # to the root node which always has the address 0.
658
- @parent = parent_node_address != 0 ?
659
- SpaceTreeNodeLink.new(@tree, parent_node_address) : nil
660
- @smaller = smaller_node_address != 0 ?
661
- SpaceTreeNodeLink.new(@tree, smaller_node_address) : nil
662
- @equal = equal_node_address != 0 ?
663
- SpaceTreeNodeLink.new(@tree, equal_node_address) : nil
664
- @larger = larger_node_address != 0 ?
665
- SpaceTreeNodeLink.new(@tree, larger_node_address) : nil
666
-
667
- true
668
- end
669
-
670
689
  end
671
690
 
672
691
  end
@@ -94,7 +94,7 @@ module PEROBS
94
94
  private
95
95
 
96
96
  def get_node
97
- @tree.get_node(@node_address)
97
+ @tree.cache.get(@node_address)
98
98
  end
99
99
 
100
100
  end
@@ -36,7 +36,7 @@ module PEROBS
36
36
  # Create a new stack file in the given directory with the given file name.
37
37
  # @param dir [String] Directory
38
38
  # @param name [String] File name
39
- # @param entry_bytes [Fixnum] Number of bytes each entry must have
39
+ # @param entry_bytes [Integer] Number of bytes each entry must have
40
40
  def initialize(dir, name, entry_bytes)
41
41
  @file_name = File.join(dir, name + '.stack')
42
42
  @entry_bytes = entry_bytes
data/lib/perobs/Store.rb CHANGED
@@ -2,7 +2,8 @@
2
2
  #
3
3
  # = Store.rb -- Persistent Ruby Object Store
4
4
  #
5
- # Copyright (c) 2015, 2016 by Chris Schlaeger <chris@taskjuggler.org>
5
+ # Copyright (c) 2015, 2016, 2017, 2018, 2019
6
+ # by Chris Schlaeger <chris@taskjuggler.org>
6
7
  #
7
8
  # MIT License
8
9
  #
@@ -26,7 +27,6 @@
26
27
  # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
28
 
28
29
  require 'set'
29
- require 'weakref'
30
30
 
31
31
  require 'perobs/Log'
32
32
  require 'perobs/Handle'
@@ -37,12 +37,18 @@ require 'perobs/FlatFileDB'
37
37
  require 'perobs/Object'
38
38
  require 'perobs/Hash'
39
39
  require 'perobs/Array'
40
+ require 'perobs/BigTree'
41
+ require 'perobs/BigHash'
42
+ require 'perobs/BigArray'
43
+ require 'perobs/ProgressMeter'
44
+ require 'perobs/ConsoleProgressMeter'
40
45
 
41
46
  # PErsistent Ruby OBject Store
42
47
  module PEROBS
43
48
 
44
- Statistics = Struct.new(:in_memory_objects, :root_objects,
45
- :marked_objects, :swept_objects)
49
+ Statistics = Struct.new(:in_memory_objects, :root_objects, :zombie_objects,
50
+ :marked_objects, :swept_objects,
51
+ :created_objects, :collected_objects)
46
52
 
47
53
  # PEROBS::Store is a persistent storage system for Ruby objects. Regular
48
54
  # Ruby objects are transparently stored in a back-end storage and retrieved
@@ -68,12 +74,20 @@ module PEROBS
68
74
  #
69
75
  # class Person < PEROBS::Object
70
76
  #
71
- # po_attr :name, :mother, :father, :kids
77
+ # attr_persist :name, :mother, :father, :kids
72
78
  #
79
+ # # The contructor is only called for the creation of a new object. It is
80
+ # # not called when the object is restored from the database. In that case
81
+ # # only restore() is called.
73
82
  # def initialize(cf, name)
74
83
  # super(cf)
75
- # attr_init(:name, name)
76
- # attr_init(:kids, @store.new(PEROBS::Array))
84
+ # self.name = name
85
+ # self.kids = @store.new(PEROBS::Array)
86
+ # end
87
+ #
88
+ # def restore
89
+ # # In case you need to do any checks or massaging (e. g. for additional
90
+ # # attributes) you can provide this method.
77
91
  # end
78
92
  #
79
93
  # def to_s
@@ -91,11 +105,12 @@ module PEROBS
91
105
  # joe.kids << jim
92
106
  # jim.mother = jane
93
107
  # jane.kids << jim
94
- # store.sync
108
+ # store.exit
95
109
  #
96
110
  class Store
97
111
 
98
112
  attr_reader :db, :cache, :class_map
113
+ attr_writer :root_objects
99
114
 
100
115
  # Create a new Store.
101
116
  # @param data_base [String] the name of the database
@@ -120,14 +135,23 @@ module PEROBS
120
135
  # :json : About half as fast as marshal, but the
121
136
  # format is rock solid and portable between
122
137
  # languages. It only supports basic Ruby data types
123
- # like String, Fixnum, Float, Array, Hash. This is
138
+ # like String, Integer, Float, Array, Hash. This is
124
139
  # the default option.
125
140
  # :yaml : Can also handle most Ruby data types and is
126
141
  # portable between Ruby versions (1.9 and later).
127
142
  # Unfortunately, it is 10x slower than marshal.
143
+ # :progressmeter : reference to a ProgressMeter object that receives
144
+ # progress information during longer running tasks.
145
+ # It defaults to ProgressMeter which only logs into
146
+ # the log. Use ConsoleProgressMeter or a derived
147
+ # class for more fancy progress reporting.
148
+ # :no_root_objects : Create a new store without root objects. This only
149
+ # makes sense if you want to copy the objects of
150
+ # another store into this store.
128
151
  def initialize(data_base, options = {})
129
152
  # Create a backing store handler
130
- @db = (options[:engine] || BTreeDB).new(data_base, options)
153
+ @progressmeter = (options[:progressmeter] ||= ProgressMeter.new)
154
+ @db = (options[:engine] || FlatFileDB).new(data_base, options)
131
155
  @db.open
132
156
  # Create a map that can translate classes to numerical IDs and vice
133
157
  # versa.
@@ -136,25 +160,32 @@ module PEROBS
136
160
  # List of PEROBS objects that are currently available as Ruby objects
137
161
  # hashed by their ID.
138
162
  @in_memory_objects = {}
163
+ # List of objects that were destroyed already but were still found in
164
+ # the in_memory_objects list. _collect has not yet been called for them.
165
+ @zombie_objects = {}
139
166
 
140
167
  # This objects keeps some counters of interest.
141
168
  @stats = Statistics.new
169
+ @stats[:created_objects] = 0
170
+ @stats[:collected_objects] = 0
142
171
 
143
172
  # The Cache reduces read and write latencies by keeping a subset of the
144
173
  # objects in memory.
145
174
  @cache = Cache.new(options[:cache_bits] || 16)
146
175
 
147
176
  # The named (global) objects IDs hashed by their name
148
- unless (@root_objects = object_by_id(0))
149
- PEROBS.log.debug "Initializing the PEROBS store"
150
- # The root object hash always has the object ID 0.
151
- @root_objects = _construct_po(Hash, 0)
152
- # Mark the root_objects object as modified.
153
- @cache.cache_write(@root_objects)
154
- end
155
- unless @root_objects.is_a?(Hash)
156
- PEROBS.log.fatal "Database corrupted: Root objects must be a Hash " +
157
- "but is a #{@root_objects.class}"
177
+ unless options[:no_root_objects]
178
+ unless (@root_objects = object_by_id(0))
179
+ PEROBS.log.debug "Initializing the PEROBS store"
180
+ # The root object hash always has the object ID 0.
181
+ @root_objects = _construct_po(Hash, 0)
182
+ # Mark the root_objects object as modified.
183
+ @cache.cache_write(@root_objects)
184
+ end
185
+ unless @root_objects.is_a?(Hash)
186
+ PEROBS.log.fatal "Database corrupted: Root objects must be a Hash " +
187
+ "but is a #{@root_objects.class}"
188
+ end
158
189
  end
159
190
  end
160
191
 
@@ -166,7 +197,9 @@ module PEROBS
166
197
  sync
167
198
 
168
199
  # Create a new store with the specified directory and options.
169
- new_db = Store.new(dir, options)
200
+ new_options = options.clone
201
+ new_options[:no_root_objects] = true
202
+ new_db = Store.new(dir, new_options)
170
203
  # Clear the cache.
171
204
  new_db.sync
172
205
  # Copy all objects of the existing store to the new store.
@@ -177,6 +210,7 @@ module PEROBS
177
210
  obj._sync
178
211
  i += 1
179
212
  end
213
+ new_db.root_objects = new_db.object_by_id(0)
180
214
  PEROBS.log.debug "Copied #{i} objects into new database at #{dir}"
181
215
  # Flush the new store and close it.
182
216
  new_db.exit
@@ -184,20 +218,34 @@ module PEROBS
184
218
  true
185
219
  end
186
220
 
187
-
188
221
  # Close the store and ensure that all in-memory objects are written out to
189
222
  # the storage backend. The Store object is no longer usable after this
190
223
  # method was called.
191
224
  def exit
192
225
  if @cache && @cache.in_transaction?
193
- PEROBS.log.fatal 'You cannot call exit() during a transaction'
226
+ @cache.abort_transaction
227
+ @cache.flush
228
+ @db.close if @db
229
+ PEROBS.log.fatal "You cannot call exit() during a transaction: #{Kernel.caller}"
194
230
  end
195
231
  @cache.flush if @cache
196
232
  @db.close if @db
197
- @db = @class_map = @in_memory_objects = @stats = @cache = @root_objects =
198
- nil
199
- end
200
233
 
234
+ GC.start
235
+ if @stats
236
+ unless @stats[:created_objects] == @stats[:collected_objects] +
237
+ @in_memory_objects.length
238
+ PEROGS.log.fatal "Created objects count " +
239
+ "(#{@stats[:created_objects]})" +
240
+ " is not equal to the collected count " +
241
+ "(#{@stats[:collected_objects]}) + in_memory_objects count " +
242
+ "(#{@in_memory_objects.length})"
243
+ end
244
+ end
245
+
246
+ @db = @class_map = @in_memory_objects = @zombie_objects =
247
+ @stats = @cache = @root_objects = nil
248
+ end
201
249
 
202
250
  # You need to call this method to create new PEROBS objects that belong to
203
251
  # this Store.
@@ -221,7 +269,7 @@ module PEROBS
221
269
  # For library internal use only!
222
270
  # This method will create a new PEROBS object.
223
271
  # @param klass [BasicObject] Class of the object to create
224
- # @param id [Fixnum, Bignum] Requested object ID
272
+ # @param id [Integer] Requested object ID
225
273
  # @param args [Array] Arguments to pass to the object constructor.
226
274
  # @return [BasicObject] Newly constructed PEROBS object
227
275
  def _construct_po(klass, id, *args)
@@ -229,10 +277,12 @@ module PEROBS
229
277
  end
230
278
 
231
279
  # Delete the entire store. The store is no longer usable after this
232
- # method was called.
280
+ # method was called. This is an alternative to exit() that additionaly
281
+ # deletes the entire database.
233
282
  def delete_store
234
283
  @db.delete_database
235
- @db = @class_map = @cache = @root_objects = nil
284
+ @db = @class_map = @in_memory_objects = @zombie_objects =
285
+ @stats = @cache = @root_objects = nil
236
286
  end
237
287
 
238
288
  # Store the provided object under the given name. Use this to make the
@@ -288,20 +338,30 @@ module PEROBS
288
338
  # needed.
289
339
  def sync
290
340
  if @cache.in_transaction?
291
- PEROBS.log.fatal 'You cannot call sync() during a transaction'
341
+ @cache.abort_transaction
342
+ @cache.flush
343
+ PEROBS.log.fatal "You cannot call sync() during a transaction: \n" +
344
+ Kernel.caller.join("\n")
292
345
  end
293
346
  @cache.flush
294
347
  end
295
348
 
349
+ # Return the number of object stored in the store. CAVEAT: This method
350
+ # will only return correct values when it is separated from any mutating
351
+ # call by a call to sync().
352
+ # @return [Integer] Number of persistently stored objects in the Store.
353
+ def size
354
+ # We don't include the Hash that stores the root objects into the object
355
+ # count.
356
+ @db.item_counter - 1
357
+ end
358
+
296
359
  # Discard all objects that are not somehow connected to the root objects
297
360
  # from the back-end storage. The garbage collector is not invoked
298
361
  # automatically. Depending on your usage pattern, you need to call this
299
362
  # method periodically.
300
- # @return [Fixnum] The number of collected objects
363
+ # @return [Integer] The number of collected objects
301
364
  def gc
302
- if @cache.in_transaction?
303
- PEROBS.log.fatal 'You cannot call gc() during a transaction'
304
- end
305
365
  sync
306
366
  mark
307
367
  sweep
@@ -311,17 +371,30 @@ module PEROBS
311
371
  # public API and should never be called by outside users. It's purely
312
372
  # intended for internal use.
313
373
  def object_by_id(id)
314
- if (obj = @in_memory_objects[id])
374
+ if (ruby_object_id = @in_memory_objects[id])
315
375
  # We have the object in memory so we can just return it.
316
376
  begin
317
- return obj.__getobj__
318
- rescue WeakRef::RefError
377
+ object = ObjectSpace._id2ref(ruby_object_id)
378
+ # Let's make sure the object is really the object we are looking
379
+ # for. The GC might have recycled it already and the Ruby object ID
380
+ # could now be used for another object.
381
+ if object.is_a?(ObjectBase) && object._id == id
382
+ return object
383
+ end
384
+ rescue RangeError => e
319
385
  # Due to a race condition the object can still be in the
320
386
  # @in_memory_objects list but has been collected already by the Ruby
321
- # GC. In that case we need to load it again.
387
+ # GC. In that case we need to load it again. The _collect() call
388
+ # will happen much later, potentially after we have registered a new
389
+ # object with the same ID.
390
+ @zombie_objects[id] = @in_memory_objects.delete(id)
322
391
  end
323
392
  end
324
393
 
394
+ if (obj = @cache.object_by_id(id))
395
+ PEROBS.log.fatal "Object #{id} with Ruby #{obj.object_id} is in cache but not in_memory"
396
+ end
397
+
325
398
  # We don't have the object in memory. Let's find it in the storage.
326
399
  if @db.include?(id)
327
400
  # Great, object found. Read it into memory and return it.
@@ -342,40 +415,44 @@ module PEROBS
342
415
  # unreadable object is found, the reference will simply be deleted.
343
416
  # @param repair [TrueClass/FalseClass] true if a repair attempt should be
344
417
  # made.
345
- # @return [Fixnum] The number of references to bad objects found.
418
+ # @return [Integer] The number of references to bad objects found.
346
419
  def check(repair = false)
420
+ stats = { :errors => 0, :object_cnt => 0 }
421
+
347
422
  # All objects must have in-db version.
348
423
  sync
349
424
  # Run basic consistency checks first.
350
- errors = @db.check_db(repair)
425
+ stats[:errors] += @db.check_db(repair)
351
426
 
352
427
  # We will use the mark to mark all objects that we have checked already.
353
428
  # Before we start, we need to clear all marks.
354
429
  @db.clear_marks
355
430
 
356
- objects = 0
357
- @root_objects.each do |name, id|
358
- objects += 1
359
- errors += check_object(id, repair)
431
+ @progressmeter.start("Checking object link structure",
432
+ @db.item_counter) do
433
+ @root_objects.each do |name, id|
434
+ check_object(id, repair, stats)
435
+ end
360
436
  end
361
437
 
362
438
  # Delete all broken root objects.
363
439
  if repair
364
440
  @root_objects.delete_if do |name, id|
365
- unless (res = @db.check(id, repair))
441
+ unless @db.check(id, repair)
366
442
  PEROBS.log.error "Discarding broken root object '#{name}' " +
367
443
  "with ID #{id}"
368
- errors += 1
444
+ stats[:errors] += 1
369
445
  end
370
- !res
371
446
  end
372
447
  end
373
448
 
374
- if errors > 0
449
+ if stats[:errors] > 0
375
450
  if repair
376
- PEROBS.log.error "#{errors} errors found in #{objects} objects"
451
+ PEROBS.log.error "#{stats[:errors]} errors found in " +
452
+ "#{stats[:object_cnt]} objects"
377
453
  else
378
- PEROBS.log.fatal "#{errors} errors found in #{objects} objects"
454
+ PEROBS.log.fatal "#{stats[:errors]} errors found in " +
455
+ "#{stats[:object_cnt]} objects"
379
456
  end
380
457
  else
381
458
  PEROBS.log.debug "No errors found"
@@ -384,7 +461,7 @@ module PEROBS
384
461
  # Ensure that any fixes are written into the DB.
385
462
  sync if repair
386
463
 
387
- errors
464
+ stats[:errors]
388
465
  end
389
466
 
390
467
  # This method will execute the provided block as an atomic transaction
@@ -413,7 +490,10 @@ module PEROBS
413
490
  stack = [ 0 ] + @root_objects.values
414
491
  while !stack.empty?
415
492
  # Get an object index from the stack.
416
- unless (obj = object_by_id(id = stack.pop))
493
+ id = stack.pop
494
+ next if @db.is_marked?(id)
495
+
496
+ unless (obj = object_by_id(id))
417
497
  PEROBS.log.fatal "Database is corrupted. Object with ID #{id} " +
418
498
  "not found."
419
499
  end
@@ -436,7 +516,7 @@ module PEROBS
436
516
  # Internal method. Don't use this outside of this library!
437
517
  # Generate a new unique ID that is not used by any other object. It uses
438
518
  # random numbers between 0 and 2**64 - 1.
439
- # @return [Fixnum or Bignum]
519
+ # @return [Integer]
440
520
  def _new_id
441
521
  begin
442
522
  # Generate a random number. It's recommended to not store more than
@@ -454,23 +534,39 @@ module PEROBS
454
534
  # happens the object finalizer is triggered and calls _forget() to
455
535
  # remove the object from this hash again.
456
536
  # @param obj [BasicObject] Object to register
457
- # @param id [Fixnum or Bignum] object ID
537
+ # @param id [Integer] object ID
458
538
  def _register_in_memory(obj, id)
459
- @in_memory_objects[id] = WeakRef.new(obj)
539
+ unless obj.is_a?(ObjectBase)
540
+ PEROBS.log.fatal "You can only register ObjectBase objects"
541
+ end
542
+ if @in_memory_objects.include?(id)
543
+ PEROBS.log.fatal "The Store::_in_memory_objects list already " +
544
+ "contains an object for ID #{id}"
545
+ end
546
+
547
+ @in_memory_objects[id] = obj.object_id
548
+ @stats[:created_objects] += 1
460
549
  end
461
550
 
462
551
  # Remove the object from the in-memory list. This is an internal method
463
552
  # and should never be called from user code. It will be called from a
464
553
  # finalizer, so many restrictions apply!
465
- # @param id [Fixnum or Bignum] Object ID of object to remove from the list
466
- def _collect(id, ignore_errors = false)
467
- @in_memory_objects.delete(id)
554
+ # @param id [Integer] Object ID of object to remove from the list
555
+ def _collect(id, ruby_object_id)
556
+ if @in_memory_objects[id] == ruby_object_id
557
+ @in_memory_objects.delete(id)
558
+ @stats[:collected_objects] += 1
559
+ elsif @zombie_objects[id] == ruby_object_id
560
+ @zombie_objects.delete(id)
561
+ @stats[:collected_objects] += 1
562
+ end
468
563
  end
469
564
 
470
565
  # This method returns a Hash with some statistics about this store.
471
566
  def statistics
472
567
  @stats.in_memory_objects = @in_memory_objects.length
473
568
  @stats.root_objects = @root_objects.length
569
+ @stats.zombie_objects = @zombie_objects.length
474
570
 
475
571
  @stats
476
572
  end
@@ -482,32 +578,40 @@ module PEROBS
482
578
  def mark
483
579
  classes = Set.new
484
580
  marked_objects = 0
485
- each { |obj| classes.add(obj.class); marked_objects += 1 }
581
+ @progressmeter.start("Marking linked objects", @db.item_counter) do
582
+ each do |obj|
583
+ classes.add(obj.class)
584
+ @progressmeter.update(marked_objects += 1)
585
+ end
586
+ end
486
587
  @class_map.keep(classes.map { |c| c.to_s })
487
588
 
488
589
  # The root_objects object is included in the count, but we only want to
489
590
  # count user objects here.
490
- PEROBS.log.debug "#{marked_objects - 1} objects marked"
591
+ PEROBS.log.debug "#{marked_objects - 1} of #{@db.item_counter} " +
592
+ "objects marked"
491
593
  @stats.marked_objects = marked_objects - 1
492
594
  end
493
595
 
494
596
  # Sweep phase of a mark-and-sweep garbage collector. It will remove all
495
597
  # unmarked objects from the store.
496
598
  def sweep
497
- @stats.swept_objects = @db.delete_unmarked_objects.length
498
- @cache.reset
599
+ @stats.swept_objects = @db.delete_unmarked_objects do |id|
600
+ @cache.evict(id)
601
+ end
602
+ @db.clear_marks
603
+ GC.start
499
604
  PEROBS.log.debug "#{@stats.swept_objects} objects collected"
500
605
  @stats.swept_objects
501
606
  end
502
607
 
503
608
  # Check the object with the given start_id and all other objects that are
504
609
  # somehow reachable from the start object.
505
- # @param start_id [Fixnum or Bignum] ID of the top-level object to start
610
+ # @param start_id [Integer] ID of the top-level object to start
506
611
  # with
507
612
  # @param repair [Boolean] Delete refernces to broken objects if true
508
- # @return [Fixnum] The number of references to bad objects.
509
- def check_object(start_id, repair)
510
- errors = 0
613
+ # @return [Integer] The number of references to bad objects.
614
+ def check_object(start_id, repair, stats)
511
615
  @db.mark(start_id)
512
616
  # The todo list holds a touple for each object that still needs to be
513
617
  # checked. The first item is the referring object and the second is the
@@ -518,7 +622,13 @@ module PEROBS
518
622
  # Get the next PEROBS object to check
519
623
  ref_obj, id = todo_list.pop
520
624
 
521
- if (obj = object_by_id(id)) && (obj_ok = @db.check(id, repair))
625
+ begin
626
+ obj = object_by_id(id)
627
+ rescue PEROBS::FatalError
628
+ obj = nil
629
+ end
630
+
631
+ if obj
522
632
  # The object exists and is OK. Mark is as checked.
523
633
  @db.mark(id)
524
634
  # Now look at all other objects referenced by this object.
@@ -541,11 +651,11 @@ module PEROBS
541
651
  ref_obj.inspect
542
652
  end
543
653
  end
544
- errors += 1
654
+ stats[:errors] += 1
545
655
  end
546
- end
547
656
 
548
- errors
657
+ @progressmeter.update(stats[:object_cnt] += 1)
658
+ end
549
659
  end
550
660
 
551
661
  end