perobs 3.0.1 → 4.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (75) hide show
  1. checksums.yaml +5 -5
  2. data/README.md +19 -18
  3. data/lib/perobs.rb +2 -0
  4. data/lib/perobs/Array.rb +68 -21
  5. data/lib/perobs/BTree.rb +110 -54
  6. data/lib/perobs/BTreeBlob.rb +14 -13
  7. data/lib/perobs/BTreeDB.rb +11 -10
  8. data/lib/perobs/BTreeNode.rb +551 -197
  9. data/lib/perobs/BTreeNodeCache.rb +10 -8
  10. data/lib/perobs/BTreeNodeLink.rb +11 -1
  11. data/lib/perobs/BigArray.rb +285 -0
  12. data/lib/perobs/BigArrayNode.rb +1002 -0
  13. data/lib/perobs/BigHash.rb +246 -0
  14. data/lib/perobs/BigTree.rb +197 -0
  15. data/lib/perobs/BigTreeNode.rb +873 -0
  16. data/lib/perobs/Cache.rb +47 -22
  17. data/lib/perobs/ClassMap.rb +2 -2
  18. data/lib/perobs/ConsoleProgressMeter.rb +61 -0
  19. data/lib/perobs/DataBase.rb +4 -3
  20. data/lib/perobs/DynamoDB.rb +62 -20
  21. data/lib/perobs/EquiBlobsFile.rb +174 -59
  22. data/lib/perobs/FNV_Hash_1a_64.rb +54 -0
  23. data/lib/perobs/FlatFile.rb +536 -242
  24. data/lib/perobs/FlatFileBlobHeader.rb +120 -84
  25. data/lib/perobs/FlatFileDB.rb +58 -27
  26. data/lib/perobs/FuzzyStringMatcher.rb +175 -0
  27. data/lib/perobs/Hash.rb +129 -35
  28. data/lib/perobs/IDList.rb +144 -0
  29. data/lib/perobs/IDListPage.rb +107 -0
  30. data/lib/perobs/IDListPageFile.rb +180 -0
  31. data/lib/perobs/IDListPageRecord.rb +142 -0
  32. data/lib/perobs/LockFile.rb +3 -0
  33. data/lib/perobs/Object.rb +28 -20
  34. data/lib/perobs/ObjectBase.rb +53 -10
  35. data/lib/perobs/PersistentObjectCache.rb +142 -0
  36. data/lib/perobs/PersistentObjectCacheLine.rb +99 -0
  37. data/lib/perobs/ProgressMeter.rb +97 -0
  38. data/lib/perobs/SpaceManager.rb +273 -0
  39. data/lib/perobs/SpaceTree.rb +63 -47
  40. data/lib/perobs/SpaceTreeNode.rb +134 -115
  41. data/lib/perobs/SpaceTreeNodeLink.rb +1 -1
  42. data/lib/perobs/StackFile.rb +1 -1
  43. data/lib/perobs/Store.rb +180 -70
  44. data/lib/perobs/version.rb +1 -1
  45. data/perobs.gemspec +4 -4
  46. data/test/Array_spec.rb +48 -39
  47. data/test/BTreeDB_spec.rb +2 -2
  48. data/test/BTree_spec.rb +50 -1
  49. data/test/BigArray_spec.rb +261 -0
  50. data/test/BigHash_spec.rb +152 -0
  51. data/test/BigTreeNode_spec.rb +153 -0
  52. data/test/BigTree_spec.rb +259 -0
  53. data/test/EquiBlobsFile_spec.rb +105 -5
  54. data/test/FNV_Hash_1a_64_spec.rb +59 -0
  55. data/test/FlatFileDB_spec.rb +199 -15
  56. data/test/FuzzyStringMatcher_spec.rb +261 -0
  57. data/test/Hash_spec.rb +27 -16
  58. data/test/IDList_spec.rb +77 -0
  59. data/test/LegacyDBs/LegacyDB.rb +155 -0
  60. data/test/LegacyDBs/version_3/class_map.json +1 -0
  61. data/test/LegacyDBs/version_3/config.json +1 -0
  62. data/test/LegacyDBs/version_3/database.blobs +0 -0
  63. data/test/LegacyDBs/version_3/database_spaces.blobs +0 -0
  64. data/test/LegacyDBs/version_3/index.blobs +0 -0
  65. data/test/LegacyDBs/version_3/version +1 -0
  66. data/test/LockFile_spec.rb +9 -6
  67. data/test/Object_spec.rb +5 -5
  68. data/test/SpaceManager_spec.rb +176 -0
  69. data/test/SpaceTree_spec.rb +27 -9
  70. data/test/Store_spec.rb +353 -206
  71. data/test/perobs_spec.rb +7 -3
  72. data/test/spec_helper.rb +9 -4
  73. metadata +59 -16
  74. data/lib/perobs/SpaceTreeNodeCache.rb +0 -76
  75. data/lib/perobs/TreeDB.rb +0 -277
@@ -51,43 +51,87 @@ module PEROBS
51
51
  # Create a new SpaceTreeNode object. If node_address is not nil, the data
52
52
  # will be read from the SpaceTree file at the given node_address.
53
53
  # @param tree [SpaceTree] Tree that the object should belong to
54
- # @param parent [SpaceTreeNode] Parent node in the tree
55
54
  # @param node_address [Integer] Address of the node in the file
56
55
  # @param blob_address [Integer] Address of the free space blob
57
56
  # @param size [Integer] Size of the free space blob
58
- def initialize(tree, parent = nil, node_address = nil, blob_address = 0,
59
- size = 0)
57
+ # @param parent [SpaceTreeNode] Parent node in the tree
58
+ # @param smaller [SpaceTreeNode] smaller node in the tree
59
+ # @param equal [SpaceTreeNode] equal node in the tree
60
+ # @param larger [SpaceTreeNode] larger node in the tree
61
+ def initialize(tree, node_address, blob_address = 0, size = 0,
62
+ parent = nil, smaller = nil, equal = nil, larger = nil)
60
63
  @tree = tree
61
- if blob_address < 0
64
+ if node_address <= 0
62
65
  PEROBS.log.fatal "Node address (#{node_address}) must be larger than 0"
63
66
  end
67
+ @node_address = node_address
68
+ if blob_address < 0
69
+ PEROBS.log.fatal "Blob address (#{node_address}) must be larger than 0"
70
+ end
64
71
  @blob_address = blob_address
65
72
  @size = size
66
- @smaller = @equal = @larger = nil
67
- @node_address = node_address
73
+ @parent = parent
74
+ @smaller = smaller
75
+ @equal = equal
76
+ @larger = larger
77
+ end
68
78
 
69
- unless node_address.nil? || node_address.is_a?(Integer)
70
- PEROBS.log.fatal "node_address is not Integer: #{node_address.class}"
71
- end
79
+ # Create a new SpaceTreeNode. This method should be used for the creation
80
+ # of new nodes instead of calling the constructor directly.
81
+ # @param tree [SpaceTree] The tree the node should belong to
82
+ # @param blob_address [Integer] Address of the free space blob
83
+ # @param size [Integer] Size of the free space blob
84
+ # @param parent [SpaceTreeNode] Parent node in the tree
85
+ def SpaceTreeNode::create(tree, blob_address = 0, size = 0, parent = nil)
86
+ node_address = tree.nodes.free_address
72
87
 
73
- if node_address
74
- # This must be an existing node. Try to read it and fill the instance
75
- # variables.
76
- if size != 0
77
- PEROBS.log.fatal "If node_address is not nil size must be 0"
78
- end
79
- if blob_address != 0
80
- PEROBS.log.fatal "If node_address is not nil blob_address must be 0"
81
- end
82
- unless read_node
83
- PEROBS.log.fatal "SpaceTree node at address #{node_address} " +
84
- "does not exist"
85
- end
86
- else
87
- # This is a new node. Make sure the data is written to the file.
88
- @node_address = @tree.nodes.free_address
89
- self.parent = parent
88
+ node = SpaceTreeNode.new(tree, node_address, blob_address, size, parent)
89
+ tree.cache.insert(node)
90
+
91
+ node
92
+ end
93
+
94
+ # Restore a node from the backing store at the given address and tree.
95
+ # @param tree [SpaceTree] The tree the node belongs to
96
+ # @param node_address [Integer] The address in the file.
97
+ def SpaceTreeNode::load(tree, node_address, unused = nil)
98
+ unless node_address > 0
99
+ PEROBS.log.fatal "node_address (#{node_address}) must be larger than 0"
90
100
  end
101
+ unless (bytes = tree.nodes.retrieve_blob(node_address))
102
+ PEROBS.log.fatal "SpaceTreeNode at address #{node_address} does " +
103
+ "not exist"
104
+ end
105
+
106
+ blob_address, size, parent_node_address,
107
+ smaller_node_address, equal_node_address,
108
+ larger_node_address = bytes.unpack(NODE_BYTES_FORMAT)
109
+
110
+ parent = parent_node_address != 0 ?
111
+ SpaceTreeNodeLink.new(tree, parent_node_address) : nil
112
+ smaller = smaller_node_address != 0 ?
113
+ SpaceTreeNodeLink.new(tree, smaller_node_address) : nil
114
+ equal = equal_node_address != 0 ?
115
+ SpaceTreeNodeLink.new(tree, equal_node_address) : nil
116
+ larger = larger_node_address != 0 ?
117
+ SpaceTreeNodeLink.new(tree, larger_node_address) : nil
118
+
119
+ node = SpaceTreeNode.new(tree, node_address, blob_address, size,
120
+ parent, smaller, equal, larger)
121
+
122
+ tree.cache.insert(node, false)
123
+
124
+ node
125
+ end
126
+
127
+ # Save the node into the blob file.
128
+ def save
129
+ bytes = [ @blob_address, @size,
130
+ @parent ? @parent.node_address : 0,
131
+ @smaller ? @smaller.node_address : 0,
132
+ @equal ? @equal.node_address : 0,
133
+ @larger ? @larger.node_address : 0].pack(NODE_BYTES_FORMAT)
134
+ @tree.nodes.store_blob(@node_address, bytes)
91
135
  end
92
136
 
93
137
  # Add a new node for the given address and size to the tree.
@@ -110,7 +154,7 @@ module PEROBS
110
154
  # There is no smaller node yet, so we create a new one as a
111
155
  # smaller child of the current node.
112
156
  node.set_link('@smaller',
113
- @tree.new_node(node, address, size))
157
+ SpaceTreeNode::create(@tree, address, size, node))
114
158
  break
115
159
  end
116
160
  elsif size > node.size
@@ -122,13 +166,13 @@ module PEROBS
122
166
  # There is no larger node yet, so we create a new one as a larger
123
167
  # child of the current node.
124
168
  node.set_link('@larger',
125
- @tree.new_node(node, address, size))
169
+ SpaceTreeNode::create(@tree, address, size, node))
126
170
  break
127
171
  end
128
172
  else
129
173
  # Same size as current node. Insert new node as equal child at top of
130
174
  # equal list.
131
- new_node = @tree.new_node(node, address, size)
175
+ new_node = SpaceTreeNode::create(@tree, address, size, node)
132
176
  new_node.set_link('@equal', node.equal)
133
177
 
134
178
  node.set_link('@equal', new_node)
@@ -147,7 +191,7 @@ module PEROBS
147
191
  node = self
148
192
  loop do
149
193
  if node.blob_address == address
150
- return true
194
+ return size == node.size
151
195
  elsif size < node.size && node.smaller
152
196
  node = node.smaller
153
197
  elsif size > node.size && node.larger
@@ -242,7 +286,13 @@ module PEROBS
242
286
  PEROBS.log.fatal "Cannot unlink unknown child node with address " +
243
287
  "#{child_node.node_address} from #{to_s}"
244
288
  end
245
- write_node
289
+ @tree.cache.insert(self)
290
+ end
291
+
292
+ # @return [Integer] The node address since it uniquely identifies the
293
+ # Node.
294
+ def uid
295
+ @node_address
246
296
  end
247
297
 
248
298
  # Depth-first iterator for all nodes. The iterator yields the given block
@@ -339,7 +389,7 @@ module PEROBS
339
389
  @parent.set_link('@larger', node)
340
390
  else
341
391
  PEROBS.log.fatal "Cannot relink unknown child node with address " +
342
- "#{node.node_address} from #{to_s}"
392
+ "#{node.node_address} from #{parent.to_s}"
343
393
  end
344
394
  else
345
395
  if node
@@ -382,7 +432,7 @@ module PEROBS
382
432
  def set_size_and_address(size, address)
383
433
  @size = size
384
434
  @blob_address = address
385
- write_node
435
+ @tree.cache.insert(self)
386
436
  end
387
437
 
388
438
  def set_link(name, node_or_address)
@@ -398,12 +448,12 @@ module PEROBS
398
448
  # Clear the node link.
399
449
  instance_variable_set(name, nil)
400
450
  end
401
- write_node
451
+ @tree.cache.insert(self)
402
452
  end
403
453
 
404
454
  def parent=(p)
405
455
  @parent = p ? SpaceTreeNodeLink.new(@tree, p) : nil
406
- write_node
456
+ @tree.cache.insert(self)
407
457
  end
408
458
  # Compare this node to another node.
409
459
  # @return [Boolean] true if node address is identical, false otherwise
@@ -468,62 +518,65 @@ module PEROBS
468
518
  # errors.
469
519
  # @param flat_file [FlatFile] If given, check that the space is also
470
520
  # present in the given flat file.
521
+ # @param count [Integer] The total number of entries in the tree
471
522
  # @return [false,true] True if OK, false otherwise
472
- def check(flat_file)
523
+ def check(flat_file, count)
473
524
  node_counter = 0
474
525
  max_depth = 0
475
526
 
476
- each do |node, mode, stack|
477
- max_depth = stack.size if stack.size > max_depth
478
-
479
- case mode
480
- when :smaller
481
- if node.smaller
482
- return false unless node.check_node_link('smaller', stack)
483
- smaller_node = node.smaller
484
- if smaller_node.size >= node.size
485
- PEROBS.log.error "Smaller SpaceTreeNode size " +
486
- "(#{smaller_node}) is not smaller than #{node}"
487
- return false
527
+ @tree.progressmeter.start('Checking space list entries', count) do |pm|
528
+ each do |node, mode, stack|
529
+ max_depth = stack.size if stack.size > max_depth
530
+
531
+ case mode
532
+ when :smaller
533
+ if node.smaller
534
+ return false unless node.check_node_link('smaller', stack)
535
+ smaller_node = node.smaller
536
+ if smaller_node.size >= node.size
537
+ PEROBS.log.error "Smaller SpaceTreeNode size " +
538
+ "(#{smaller_node}) is not smaller than #{node}"
539
+ return false
540
+ end
488
541
  end
489
- end
490
- when :equal
491
- if node.equal
492
- return false unless node.check_node_link('equal', stack)
493
- equal_node = node.equal
494
-
495
- if equal_node.smaller || equal_node.larger
496
- PEROBS.log.error "Equal node #{equal_node} must not have " +
497
- "smaller/larger childs"
498
- return false
542
+ when :equal
543
+ if node.equal
544
+ return false unless node.check_node_link('equal', stack)
545
+ equal_node = node.equal
546
+
547
+ if equal_node.smaller || equal_node.larger
548
+ PEROBS.log.error "Equal node #{equal_node} must not have " +
549
+ "smaller/larger childs"
550
+ return false
551
+ end
552
+
553
+ if node.size != equal_node.size
554
+ PEROBS.log.error "Equal SpaceTreeNode size (#{equal_node}) " +
555
+ "is not equal parent node #{node}"
556
+ return false
557
+ end
499
558
  end
500
-
501
- if node.size != equal_node.size
502
- PEROBS.log.error "Equal SpaceTreeNode size (#{equal_node}) is " +
503
- "not equal parent node #{node}"
504
- return false
559
+ when :larger
560
+ if node.larger
561
+ return false unless node.check_node_link('larger', stack)
562
+ larger_node = node.larger
563
+ if larger_node.size <= node.size
564
+ PEROBS.log.error "Larger SpaceTreeNode size " +
565
+ "(#{larger_node}) is not larger than #{node}"
566
+ return false
567
+ end
505
568
  end
506
- end
507
- when :larger
508
- if node.larger
509
- return false unless node.check_node_link('larger', stack)
510
- larger_node = node.larger
511
- if larger_node.size <= node.size
512
- PEROBS.log.error "Larger SpaceTreeNode size " +
513
- "(#{larger_node}) is not larger than #{node}"
569
+ when :on_exit
570
+ if flat_file &&
571
+ !flat_file.has_space?(node.blob_address, node.size)
572
+ PEROBS.log.error "SpaceTreeNode has space at offset " +
573
+ "#{node.blob_address} of size #{node.size} that isn't " +
574
+ "available in the FlatFile."
514
575
  return false
515
576
  end
516
- end
517
- when :on_exit
518
- if flat_file &&
519
- !flat_file.has_space?(node.blob_address, node.size)
520
- PEROBS.log.error "SpaceTreeNode has space at offset " +
521
- "#{node.blob_address} of size #{node.size} that isn't " +
522
- "available in the FlatFile."
523
- return false
524
- end
525
577
 
526
- node_counter += 1
578
+ pm.update(node_counter += 1)
579
+ end
527
580
  end
528
581
  end
529
582
  PEROBS.log.debug "#{node_counter} SpaceTree nodes checked"
@@ -633,40 +686,6 @@ module PEROBS
633
686
  str
634
687
  end
635
688
 
636
- private
637
-
638
- def write_node
639
- bytes = [ @blob_address, @size,
640
- @parent ? @parent.node_address : 0,
641
- @smaller ? @smaller.node_address : 0,
642
- @equal ? @equal.node_address : 0,
643
- @larger ? @larger.node_address : 0].pack(NODE_BYTES_FORMAT)
644
- @tree.nodes.store_blob(@node_address, bytes)
645
- end
646
-
647
- def read_node
648
- unless @node_address > 0
649
- PEROBS.log.fatal "@node_address must be larger than 0"
650
- end
651
- return false unless (bytes = @tree.nodes.retrieve_blob(@node_address))
652
-
653
- @blob_address, @size, parent_node_address,
654
- smaller_node_address, equal_node_address,
655
- larger_node_address = bytes.unpack(NODE_BYTES_FORMAT)
656
- # The parent address can also be 0 as the parent can rightly point back
657
- # to the root node which always has the address 0.
658
- @parent = parent_node_address != 0 ?
659
- SpaceTreeNodeLink.new(@tree, parent_node_address) : nil
660
- @smaller = smaller_node_address != 0 ?
661
- SpaceTreeNodeLink.new(@tree, smaller_node_address) : nil
662
- @equal = equal_node_address != 0 ?
663
- SpaceTreeNodeLink.new(@tree, equal_node_address) : nil
664
- @larger = larger_node_address != 0 ?
665
- SpaceTreeNodeLink.new(@tree, larger_node_address) : nil
666
-
667
- true
668
- end
669
-
670
689
  end
671
690
 
672
691
  end
@@ -94,7 +94,7 @@ module PEROBS
94
94
  private
95
95
 
96
96
  def get_node
97
- @tree.get_node(@node_address)
97
+ @tree.cache.get(@node_address)
98
98
  end
99
99
 
100
100
  end
@@ -36,7 +36,7 @@ module PEROBS
36
36
  # Create a new stack file in the given directory with the given file name.
37
37
  # @param dir [String] Directory
38
38
  # @param name [String] File name
39
- # @param entry_bytes [Fixnum] Number of bytes each entry must have
39
+ # @param entry_bytes [Integer] Number of bytes each entry must have
40
40
  def initialize(dir, name, entry_bytes)
41
41
  @file_name = File.join(dir, name + '.stack')
42
42
  @entry_bytes = entry_bytes
data/lib/perobs/Store.rb CHANGED
@@ -2,7 +2,8 @@
2
2
  #
3
3
  # = Store.rb -- Persistent Ruby Object Store
4
4
  #
5
- # Copyright (c) 2015, 2016 by Chris Schlaeger <chris@taskjuggler.org>
5
+ # Copyright (c) 2015, 2016, 2017, 2018, 2019
6
+ # by Chris Schlaeger <chris@taskjuggler.org>
6
7
  #
7
8
  # MIT License
8
9
  #
@@ -26,7 +27,6 @@
26
27
  # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
28
 
28
29
  require 'set'
29
- require 'weakref'
30
30
 
31
31
  require 'perobs/Log'
32
32
  require 'perobs/Handle'
@@ -37,12 +37,18 @@ require 'perobs/FlatFileDB'
37
37
  require 'perobs/Object'
38
38
  require 'perobs/Hash'
39
39
  require 'perobs/Array'
40
+ require 'perobs/BigTree'
41
+ require 'perobs/BigHash'
42
+ require 'perobs/BigArray'
43
+ require 'perobs/ProgressMeter'
44
+ require 'perobs/ConsoleProgressMeter'
40
45
 
41
46
  # PErsistent Ruby OBject Store
42
47
  module PEROBS
43
48
 
44
- Statistics = Struct.new(:in_memory_objects, :root_objects,
45
- :marked_objects, :swept_objects)
49
+ Statistics = Struct.new(:in_memory_objects, :root_objects, :zombie_objects,
50
+ :marked_objects, :swept_objects,
51
+ :created_objects, :collected_objects)
46
52
 
47
53
  # PEROBS::Store is a persistent storage system for Ruby objects. Regular
48
54
  # Ruby objects are transparently stored in a back-end storage and retrieved
@@ -68,12 +74,20 @@ module PEROBS
68
74
  #
69
75
  # class Person < PEROBS::Object
70
76
  #
71
- # po_attr :name, :mother, :father, :kids
77
+ # attr_persist :name, :mother, :father, :kids
72
78
  #
79
+ # # The contructor is only called for the creation of a new object. It is
80
+ # # not called when the object is restored from the database. In that case
81
+ # # only restore() is called.
73
82
  # def initialize(cf, name)
74
83
  # super(cf)
75
- # attr_init(:name, name)
76
- # attr_init(:kids, @store.new(PEROBS::Array))
84
+ # self.name = name
85
+ # self.kids = @store.new(PEROBS::Array)
86
+ # end
87
+ #
88
+ # def restore
89
+ # # In case you need to do any checks or massaging (e. g. for additional
90
+ # # attributes) you can provide this method.
77
91
  # end
78
92
  #
79
93
  # def to_s
@@ -91,11 +105,12 @@ module PEROBS
91
105
  # joe.kids << jim
92
106
  # jim.mother = jane
93
107
  # jane.kids << jim
94
- # store.sync
108
+ # store.exit
95
109
  #
96
110
  class Store
97
111
 
98
112
  attr_reader :db, :cache, :class_map
113
+ attr_writer :root_objects
99
114
 
100
115
  # Create a new Store.
101
116
  # @param data_base [String] the name of the database
@@ -120,14 +135,23 @@ module PEROBS
120
135
  # :json : About half as fast as marshal, but the
121
136
  # format is rock solid and portable between
122
137
  # languages. It only supports basic Ruby data types
123
- # like String, Fixnum, Float, Array, Hash. This is
138
+ # like String, Integer, Float, Array, Hash. This is
124
139
  # the default option.
125
140
  # :yaml : Can also handle most Ruby data types and is
126
141
  # portable between Ruby versions (1.9 and later).
127
142
  # Unfortunately, it is 10x slower than marshal.
143
+ # :progressmeter : reference to a ProgressMeter object that receives
144
+ # progress information during longer running tasks.
145
+ # It defaults to ProgressMeter which only logs into
146
+ # the log. Use ConsoleProgressMeter or a derived
147
+ # class for more fancy progress reporting.
148
+ # :no_root_objects : Create a new store without root objects. This only
149
+ # makes sense if you want to copy the objects of
150
+ # another store into this store.
128
151
  def initialize(data_base, options = {})
129
152
  # Create a backing store handler
130
- @db = (options[:engine] || BTreeDB).new(data_base, options)
153
+ @progressmeter = (options[:progressmeter] ||= ProgressMeter.new)
154
+ @db = (options[:engine] || FlatFileDB).new(data_base, options)
131
155
  @db.open
132
156
  # Create a map that can translate classes to numerical IDs and vice
133
157
  # versa.
@@ -136,25 +160,32 @@ module PEROBS
136
160
  # List of PEROBS objects that are currently available as Ruby objects
137
161
  # hashed by their ID.
138
162
  @in_memory_objects = {}
163
+ # List of objects that were destroyed already but were still found in
164
+ # the in_memory_objects list. _collect has not yet been called for them.
165
+ @zombie_objects = {}
139
166
 
140
167
  # This objects keeps some counters of interest.
141
168
  @stats = Statistics.new
169
+ @stats[:created_objects] = 0
170
+ @stats[:collected_objects] = 0
142
171
 
143
172
  # The Cache reduces read and write latencies by keeping a subset of the
144
173
  # objects in memory.
145
174
  @cache = Cache.new(options[:cache_bits] || 16)
146
175
 
147
176
  # The named (global) objects IDs hashed by their name
148
- unless (@root_objects = object_by_id(0))
149
- PEROBS.log.debug "Initializing the PEROBS store"
150
- # The root object hash always has the object ID 0.
151
- @root_objects = _construct_po(Hash, 0)
152
- # Mark the root_objects object as modified.
153
- @cache.cache_write(@root_objects)
154
- end
155
- unless @root_objects.is_a?(Hash)
156
- PEROBS.log.fatal "Database corrupted: Root objects must be a Hash " +
157
- "but is a #{@root_objects.class}"
177
+ unless options[:no_root_objects]
178
+ unless (@root_objects = object_by_id(0))
179
+ PEROBS.log.debug "Initializing the PEROBS store"
180
+ # The root object hash always has the object ID 0.
181
+ @root_objects = _construct_po(Hash, 0)
182
+ # Mark the root_objects object as modified.
183
+ @cache.cache_write(@root_objects)
184
+ end
185
+ unless @root_objects.is_a?(Hash)
186
+ PEROBS.log.fatal "Database corrupted: Root objects must be a Hash " +
187
+ "but is a #{@root_objects.class}"
188
+ end
158
189
  end
159
190
  end
160
191
 
@@ -166,7 +197,9 @@ module PEROBS
166
197
  sync
167
198
 
168
199
  # Create a new store with the specified directory and options.
169
- new_db = Store.new(dir, options)
200
+ new_options = options.clone
201
+ new_options[:no_root_objects] = true
202
+ new_db = Store.new(dir, new_options)
170
203
  # Clear the cache.
171
204
  new_db.sync
172
205
  # Copy all objects of the existing store to the new store.
@@ -177,6 +210,7 @@ module PEROBS
177
210
  obj._sync
178
211
  i += 1
179
212
  end
213
+ new_db.root_objects = new_db.object_by_id(0)
180
214
  PEROBS.log.debug "Copied #{i} objects into new database at #{dir}"
181
215
  # Flush the new store and close it.
182
216
  new_db.exit
@@ -184,20 +218,34 @@ module PEROBS
184
218
  true
185
219
  end
186
220
 
187
-
188
221
  # Close the store and ensure that all in-memory objects are written out to
189
222
  # the storage backend. The Store object is no longer usable after this
190
223
  # method was called.
191
224
  def exit
192
225
  if @cache && @cache.in_transaction?
193
- PEROBS.log.fatal 'You cannot call exit() during a transaction'
226
+ @cache.abort_transaction
227
+ @cache.flush
228
+ @db.close if @db
229
+ PEROBS.log.fatal "You cannot call exit() during a transaction: #{Kernel.caller}"
194
230
  end
195
231
  @cache.flush if @cache
196
232
  @db.close if @db
197
- @db = @class_map = @in_memory_objects = @stats = @cache = @root_objects =
198
- nil
199
- end
200
233
 
234
+ GC.start
235
+ if @stats
236
+ unless @stats[:created_objects] == @stats[:collected_objects] +
237
+ @in_memory_objects.length
238
+ PEROGS.log.fatal "Created objects count " +
239
+ "(#{@stats[:created_objects]})" +
240
+ " is not equal to the collected count " +
241
+ "(#{@stats[:collected_objects]}) + in_memory_objects count " +
242
+ "(#{@in_memory_objects.length})"
243
+ end
244
+ end
245
+
246
+ @db = @class_map = @in_memory_objects = @zombie_objects =
247
+ @stats = @cache = @root_objects = nil
248
+ end
201
249
 
202
250
  # You need to call this method to create new PEROBS objects that belong to
203
251
  # this Store.
@@ -221,7 +269,7 @@ module PEROBS
221
269
  # For library internal use only!
222
270
  # This method will create a new PEROBS object.
223
271
  # @param klass [BasicObject] Class of the object to create
224
- # @param id [Fixnum, Bignum] Requested object ID
272
+ # @param id [Integer] Requested object ID
225
273
  # @param args [Array] Arguments to pass to the object constructor.
226
274
  # @return [BasicObject] Newly constructed PEROBS object
227
275
  def _construct_po(klass, id, *args)
@@ -229,10 +277,12 @@ module PEROBS
229
277
  end
230
278
 
231
279
  # Delete the entire store. The store is no longer usable after this
232
- # method was called.
280
+ # method was called. This is an alternative to exit() that additionaly
281
+ # deletes the entire database.
233
282
  def delete_store
234
283
  @db.delete_database
235
- @db = @class_map = @cache = @root_objects = nil
284
+ @db = @class_map = @in_memory_objects = @zombie_objects =
285
+ @stats = @cache = @root_objects = nil
236
286
  end
237
287
 
238
288
  # Store the provided object under the given name. Use this to make the
@@ -288,20 +338,30 @@ module PEROBS
288
338
  # needed.
289
339
  def sync
290
340
  if @cache.in_transaction?
291
- PEROBS.log.fatal 'You cannot call sync() during a transaction'
341
+ @cache.abort_transaction
342
+ @cache.flush
343
+ PEROBS.log.fatal "You cannot call sync() during a transaction: \n" +
344
+ Kernel.caller.join("\n")
292
345
  end
293
346
  @cache.flush
294
347
  end
295
348
 
349
+ # Return the number of object stored in the store. CAVEAT: This method
350
+ # will only return correct values when it is separated from any mutating
351
+ # call by a call to sync().
352
+ # @return [Integer] Number of persistently stored objects in the Store.
353
+ def size
354
+ # We don't include the Hash that stores the root objects into the object
355
+ # count.
356
+ @db.item_counter - 1
357
+ end
358
+
296
359
  # Discard all objects that are not somehow connected to the root objects
297
360
  # from the back-end storage. The garbage collector is not invoked
298
361
  # automatically. Depending on your usage pattern, you need to call this
299
362
  # method periodically.
300
- # @return [Fixnum] The number of collected objects
363
+ # @return [Integer] The number of collected objects
301
364
  def gc
302
- if @cache.in_transaction?
303
- PEROBS.log.fatal 'You cannot call gc() during a transaction'
304
- end
305
365
  sync
306
366
  mark
307
367
  sweep
@@ -311,17 +371,30 @@ module PEROBS
311
371
  # public API and should never be called by outside users. It's purely
312
372
  # intended for internal use.
313
373
  def object_by_id(id)
314
- if (obj = @in_memory_objects[id])
374
+ if (ruby_object_id = @in_memory_objects[id])
315
375
  # We have the object in memory so we can just return it.
316
376
  begin
317
- return obj.__getobj__
318
- rescue WeakRef::RefError
377
+ object = ObjectSpace._id2ref(ruby_object_id)
378
+ # Let's make sure the object is really the object we are looking
379
+ # for. The GC might have recycled it already and the Ruby object ID
380
+ # could now be used for another object.
381
+ if object.is_a?(ObjectBase) && object._id == id
382
+ return object
383
+ end
384
+ rescue RangeError => e
319
385
  # Due to a race condition the object can still be in the
320
386
  # @in_memory_objects list but has been collected already by the Ruby
321
- # GC. In that case we need to load it again.
387
+ # GC. In that case we need to load it again. The _collect() call
388
+ # will happen much later, potentially after we have registered a new
389
+ # object with the same ID.
390
+ @zombie_objects[id] = @in_memory_objects.delete(id)
322
391
  end
323
392
  end
324
393
 
394
+ if (obj = @cache.object_by_id(id))
395
+ PEROBS.log.fatal "Object #{id} with Ruby #{obj.object_id} is in cache but not in_memory"
396
+ end
397
+
325
398
  # We don't have the object in memory. Let's find it in the storage.
326
399
  if @db.include?(id)
327
400
  # Great, object found. Read it into memory and return it.
@@ -342,40 +415,44 @@ module PEROBS
342
415
  # unreadable object is found, the reference will simply be deleted.
343
416
  # @param repair [TrueClass/FalseClass] true if a repair attempt should be
344
417
  # made.
345
- # @return [Fixnum] The number of references to bad objects found.
418
+ # @return [Integer] The number of references to bad objects found.
346
419
  def check(repair = false)
420
+ stats = { :errors => 0, :object_cnt => 0 }
421
+
347
422
  # All objects must have in-db version.
348
423
  sync
349
424
  # Run basic consistency checks first.
350
- errors = @db.check_db(repair)
425
+ stats[:errors] += @db.check_db(repair)
351
426
 
352
427
  # We will use the mark to mark all objects that we have checked already.
353
428
  # Before we start, we need to clear all marks.
354
429
  @db.clear_marks
355
430
 
356
- objects = 0
357
- @root_objects.each do |name, id|
358
- objects += 1
359
- errors += check_object(id, repair)
431
+ @progressmeter.start("Checking object link structure",
432
+ @db.item_counter) do
433
+ @root_objects.each do |name, id|
434
+ check_object(id, repair, stats)
435
+ end
360
436
  end
361
437
 
362
438
  # Delete all broken root objects.
363
439
  if repair
364
440
  @root_objects.delete_if do |name, id|
365
- unless (res = @db.check(id, repair))
441
+ unless @db.check(id, repair)
366
442
  PEROBS.log.error "Discarding broken root object '#{name}' " +
367
443
  "with ID #{id}"
368
- errors += 1
444
+ stats[:errors] += 1
369
445
  end
370
- !res
371
446
  end
372
447
  end
373
448
 
374
- if errors > 0
449
+ if stats[:errors] > 0
375
450
  if repair
376
- PEROBS.log.error "#{errors} errors found in #{objects} objects"
451
+ PEROBS.log.error "#{stats[:errors]} errors found in " +
452
+ "#{stats[:object_cnt]} objects"
377
453
  else
378
- PEROBS.log.fatal "#{errors} errors found in #{objects} objects"
454
+ PEROBS.log.fatal "#{stats[:errors]} errors found in " +
455
+ "#{stats[:object_cnt]} objects"
379
456
  end
380
457
  else
381
458
  PEROBS.log.debug "No errors found"
@@ -384,7 +461,7 @@ module PEROBS
384
461
  # Ensure that any fixes are written into the DB.
385
462
  sync if repair
386
463
 
387
- errors
464
+ stats[:errors]
388
465
  end
389
466
 
390
467
  # This method will execute the provided block as an atomic transaction
@@ -413,7 +490,10 @@ module PEROBS
413
490
  stack = [ 0 ] + @root_objects.values
414
491
  while !stack.empty?
415
492
  # Get an object index from the stack.
416
- unless (obj = object_by_id(id = stack.pop))
493
+ id = stack.pop
494
+ next if @db.is_marked?(id)
495
+
496
+ unless (obj = object_by_id(id))
417
497
  PEROBS.log.fatal "Database is corrupted. Object with ID #{id} " +
418
498
  "not found."
419
499
  end
@@ -436,7 +516,7 @@ module PEROBS
436
516
  # Internal method. Don't use this outside of this library!
437
517
  # Generate a new unique ID that is not used by any other object. It uses
438
518
  # random numbers between 0 and 2**64 - 1.
439
- # @return [Fixnum or Bignum]
519
+ # @return [Integer]
440
520
  def _new_id
441
521
  begin
442
522
  # Generate a random number. It's recommended to not store more than
@@ -454,23 +534,39 @@ module PEROBS
454
534
  # happens the object finalizer is triggered and calls _forget() to
455
535
  # remove the object from this hash again.
456
536
  # @param obj [BasicObject] Object to register
457
- # @param id [Fixnum or Bignum] object ID
537
+ # @param id [Integer] object ID
458
538
  def _register_in_memory(obj, id)
459
- @in_memory_objects[id] = WeakRef.new(obj)
539
+ unless obj.is_a?(ObjectBase)
540
+ PEROBS.log.fatal "You can only register ObjectBase objects"
541
+ end
542
+ if @in_memory_objects.include?(id)
543
+ PEROBS.log.fatal "The Store::_in_memory_objects list already " +
544
+ "contains an object for ID #{id}"
545
+ end
546
+
547
+ @in_memory_objects[id] = obj.object_id
548
+ @stats[:created_objects] += 1
460
549
  end
461
550
 
462
551
  # Remove the object from the in-memory list. This is an internal method
463
552
  # and should never be called from user code. It will be called from a
464
553
  # finalizer, so many restrictions apply!
465
- # @param id [Fixnum or Bignum] Object ID of object to remove from the list
466
- def _collect(id, ignore_errors = false)
467
- @in_memory_objects.delete(id)
554
+ # @param id [Integer] Object ID of object to remove from the list
555
+ def _collect(id, ruby_object_id)
556
+ if @in_memory_objects[id] == ruby_object_id
557
+ @in_memory_objects.delete(id)
558
+ @stats[:collected_objects] += 1
559
+ elsif @zombie_objects[id] == ruby_object_id
560
+ @zombie_objects.delete(id)
561
+ @stats[:collected_objects] += 1
562
+ end
468
563
  end
469
564
 
470
565
  # This method returns a Hash with some statistics about this store.
471
566
  def statistics
472
567
  @stats.in_memory_objects = @in_memory_objects.length
473
568
  @stats.root_objects = @root_objects.length
569
+ @stats.zombie_objects = @zombie_objects.length
474
570
 
475
571
  @stats
476
572
  end
@@ -482,32 +578,40 @@ module PEROBS
482
578
  def mark
483
579
  classes = Set.new
484
580
  marked_objects = 0
485
- each { |obj| classes.add(obj.class); marked_objects += 1 }
581
+ @progressmeter.start("Marking linked objects", @db.item_counter) do
582
+ each do |obj|
583
+ classes.add(obj.class)
584
+ @progressmeter.update(marked_objects += 1)
585
+ end
586
+ end
486
587
  @class_map.keep(classes.map { |c| c.to_s })
487
588
 
488
589
  # The root_objects object is included in the count, but we only want to
489
590
  # count user objects here.
490
- PEROBS.log.debug "#{marked_objects - 1} objects marked"
591
+ PEROBS.log.debug "#{marked_objects - 1} of #{@db.item_counter} " +
592
+ "objects marked"
491
593
  @stats.marked_objects = marked_objects - 1
492
594
  end
493
595
 
494
596
  # Sweep phase of a mark-and-sweep garbage collector. It will remove all
495
597
  # unmarked objects from the store.
496
598
  def sweep
497
- @stats.swept_objects = @db.delete_unmarked_objects.length
498
- @cache.reset
599
+ @stats.swept_objects = @db.delete_unmarked_objects do |id|
600
+ @cache.evict(id)
601
+ end
602
+ @db.clear_marks
603
+ GC.start
499
604
  PEROBS.log.debug "#{@stats.swept_objects} objects collected"
500
605
  @stats.swept_objects
501
606
  end
502
607
 
503
608
  # Check the object with the given start_id and all other objects that are
504
609
  # somehow reachable from the start object.
505
- # @param start_id [Fixnum or Bignum] ID of the top-level object to start
610
+ # @param start_id [Integer] ID of the top-level object to start
506
611
  # with
507
612
  # @param repair [Boolean] Delete refernces to broken objects if true
508
- # @return [Fixnum] The number of references to bad objects.
509
- def check_object(start_id, repair)
510
- errors = 0
613
+ # @return [Integer] The number of references to bad objects.
614
+ def check_object(start_id, repair, stats)
511
615
  @db.mark(start_id)
512
616
  # The todo list holds a touple for each object that still needs to be
513
617
  # checked. The first item is the referring object and the second is the
@@ -518,7 +622,13 @@ module PEROBS
518
622
  # Get the next PEROBS object to check
519
623
  ref_obj, id = todo_list.pop
520
624
 
521
- if (obj = object_by_id(id)) && (obj_ok = @db.check(id, repair))
625
+ begin
626
+ obj = object_by_id(id)
627
+ rescue PEROBS::FatalError
628
+ obj = nil
629
+ end
630
+
631
+ if obj
522
632
  # The object exists and is OK. Mark is as checked.
523
633
  @db.mark(id)
524
634
  # Now look at all other objects referenced by this object.
@@ -541,11 +651,11 @@ module PEROBS
541
651
  ref_obj.inspect
542
652
  end
543
653
  end
544
- errors += 1
654
+ stats[:errors] += 1
545
655
  end
546
- end
547
656
 
548
- errors
657
+ @progressmeter.update(stats[:object_cnt] += 1)
658
+ end
549
659
  end
550
660
 
551
661
  end