perobs 3.0.1 → 4.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/README.md +19 -18
- data/lib/perobs.rb +2 -0
- data/lib/perobs/Array.rb +68 -21
- data/lib/perobs/BTree.rb +110 -54
- data/lib/perobs/BTreeBlob.rb +14 -13
- data/lib/perobs/BTreeDB.rb +11 -10
- data/lib/perobs/BTreeNode.rb +551 -197
- data/lib/perobs/BTreeNodeCache.rb +10 -8
- data/lib/perobs/BTreeNodeLink.rb +11 -1
- data/lib/perobs/BigArray.rb +285 -0
- data/lib/perobs/BigArrayNode.rb +1002 -0
- data/lib/perobs/BigHash.rb +246 -0
- data/lib/perobs/BigTree.rb +197 -0
- data/lib/perobs/BigTreeNode.rb +873 -0
- data/lib/perobs/Cache.rb +47 -22
- data/lib/perobs/ClassMap.rb +2 -2
- data/lib/perobs/ConsoleProgressMeter.rb +61 -0
- data/lib/perobs/DataBase.rb +4 -3
- data/lib/perobs/DynamoDB.rb +62 -20
- data/lib/perobs/EquiBlobsFile.rb +174 -59
- data/lib/perobs/FNV_Hash_1a_64.rb +54 -0
- data/lib/perobs/FlatFile.rb +536 -242
- data/lib/perobs/FlatFileBlobHeader.rb +120 -84
- data/lib/perobs/FlatFileDB.rb +58 -27
- data/lib/perobs/FuzzyStringMatcher.rb +175 -0
- data/lib/perobs/Hash.rb +129 -35
- data/lib/perobs/IDList.rb +144 -0
- data/lib/perobs/IDListPage.rb +107 -0
- data/lib/perobs/IDListPageFile.rb +180 -0
- data/lib/perobs/IDListPageRecord.rb +142 -0
- data/lib/perobs/LockFile.rb +3 -0
- data/lib/perobs/Object.rb +28 -20
- data/lib/perobs/ObjectBase.rb +53 -10
- data/lib/perobs/PersistentObjectCache.rb +142 -0
- data/lib/perobs/PersistentObjectCacheLine.rb +99 -0
- data/lib/perobs/ProgressMeter.rb +97 -0
- data/lib/perobs/SpaceManager.rb +273 -0
- data/lib/perobs/SpaceTree.rb +63 -47
- data/lib/perobs/SpaceTreeNode.rb +134 -115
- data/lib/perobs/SpaceTreeNodeLink.rb +1 -1
- data/lib/perobs/StackFile.rb +1 -1
- data/lib/perobs/Store.rb +180 -70
- data/lib/perobs/version.rb +1 -1
- data/perobs.gemspec +4 -4
- data/test/Array_spec.rb +48 -39
- data/test/BTreeDB_spec.rb +2 -2
- data/test/BTree_spec.rb +50 -1
- data/test/BigArray_spec.rb +261 -0
- data/test/BigHash_spec.rb +152 -0
- data/test/BigTreeNode_spec.rb +153 -0
- data/test/BigTree_spec.rb +259 -0
- data/test/EquiBlobsFile_spec.rb +105 -5
- data/test/FNV_Hash_1a_64_spec.rb +59 -0
- data/test/FlatFileDB_spec.rb +199 -15
- data/test/FuzzyStringMatcher_spec.rb +261 -0
- data/test/Hash_spec.rb +27 -16
- data/test/IDList_spec.rb +77 -0
- data/test/LegacyDBs/LegacyDB.rb +155 -0
- data/test/LegacyDBs/version_3/class_map.json +1 -0
- data/test/LegacyDBs/version_3/config.json +1 -0
- data/test/LegacyDBs/version_3/database.blobs +0 -0
- data/test/LegacyDBs/version_3/database_spaces.blobs +0 -0
- data/test/LegacyDBs/version_3/index.blobs +0 -0
- data/test/LegacyDBs/version_3/version +1 -0
- data/test/LockFile_spec.rb +9 -6
- data/test/Object_spec.rb +5 -5
- data/test/SpaceManager_spec.rb +176 -0
- data/test/SpaceTree_spec.rb +27 -9
- data/test/Store_spec.rb +353 -206
- data/test/perobs_spec.rb +7 -3
- data/test/spec_helper.rb +9 -4
- metadata +59 -16
- data/lib/perobs/SpaceTreeNodeCache.rb +0 -76
- data/lib/perobs/TreeDB.rb +0 -277
data/lib/perobs/SpaceTreeNode.rb
CHANGED
@@ -51,43 +51,87 @@ module PEROBS
|
|
51
51
|
# Create a new SpaceTreeNode object. If node_address is not nil, the data
|
52
52
|
# will be read from the SpaceTree file at the given node_address.
|
53
53
|
# @param tree [SpaceTree] Tree that the object should belong to
|
54
|
-
# @param parent [SpaceTreeNode] Parent node in the tree
|
55
54
|
# @param node_address [Integer] Address of the node in the file
|
56
55
|
# @param blob_address [Integer] Address of the free space blob
|
57
56
|
# @param size [Integer] Size of the free space blob
|
58
|
-
|
59
|
-
|
57
|
+
# @param parent [SpaceTreeNode] Parent node in the tree
|
58
|
+
# @param smaller [SpaceTreeNode] smaller node in the tree
|
59
|
+
# @param equal [SpaceTreeNode] equal node in the tree
|
60
|
+
# @param larger [SpaceTreeNode] larger node in the tree
|
61
|
+
def initialize(tree, node_address, blob_address = 0, size = 0,
|
62
|
+
parent = nil, smaller = nil, equal = nil, larger = nil)
|
60
63
|
@tree = tree
|
61
|
-
if
|
64
|
+
if node_address <= 0
|
62
65
|
PEROBS.log.fatal "Node address (#{node_address}) must be larger than 0"
|
63
66
|
end
|
67
|
+
@node_address = node_address
|
68
|
+
if blob_address < 0
|
69
|
+
PEROBS.log.fatal "Blob address (#{node_address}) must be larger than 0"
|
70
|
+
end
|
64
71
|
@blob_address = blob_address
|
65
72
|
@size = size
|
66
|
-
@
|
67
|
-
@
|
73
|
+
@parent = parent
|
74
|
+
@smaller = smaller
|
75
|
+
@equal = equal
|
76
|
+
@larger = larger
|
77
|
+
end
|
68
78
|
|
69
|
-
|
70
|
-
|
71
|
-
|
79
|
+
# Create a new SpaceTreeNode. This method should be used for the creation
|
80
|
+
# of new nodes instead of calling the constructor directly.
|
81
|
+
# @param tree [SpaceTree] The tree the node should belong to
|
82
|
+
# @param blob_address [Integer] Address of the free space blob
|
83
|
+
# @param size [Integer] Size of the free space blob
|
84
|
+
# @param parent [SpaceTreeNode] Parent node in the tree
|
85
|
+
def SpaceTreeNode::create(tree, blob_address = 0, size = 0, parent = nil)
|
86
|
+
node_address = tree.nodes.free_address
|
72
87
|
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
end
|
86
|
-
else
|
87
|
-
# This is a new node. Make sure the data is written to the file.
|
88
|
-
@node_address = @tree.nodes.free_address
|
89
|
-
self.parent = parent
|
88
|
+
node = SpaceTreeNode.new(tree, node_address, blob_address, size, parent)
|
89
|
+
tree.cache.insert(node)
|
90
|
+
|
91
|
+
node
|
92
|
+
end
|
93
|
+
|
94
|
+
# Restore a node from the backing store at the given address and tree.
|
95
|
+
# @param tree [SpaceTree] The tree the node belongs to
|
96
|
+
# @param node_address [Integer] The address in the file.
|
97
|
+
def SpaceTreeNode::load(tree, node_address, unused = nil)
|
98
|
+
unless node_address > 0
|
99
|
+
PEROBS.log.fatal "node_address (#{node_address}) must be larger than 0"
|
90
100
|
end
|
101
|
+
unless (bytes = tree.nodes.retrieve_blob(node_address))
|
102
|
+
PEROBS.log.fatal "SpaceTreeNode at address #{node_address} does " +
|
103
|
+
"not exist"
|
104
|
+
end
|
105
|
+
|
106
|
+
blob_address, size, parent_node_address,
|
107
|
+
smaller_node_address, equal_node_address,
|
108
|
+
larger_node_address = bytes.unpack(NODE_BYTES_FORMAT)
|
109
|
+
|
110
|
+
parent = parent_node_address != 0 ?
|
111
|
+
SpaceTreeNodeLink.new(tree, parent_node_address) : nil
|
112
|
+
smaller = smaller_node_address != 0 ?
|
113
|
+
SpaceTreeNodeLink.new(tree, smaller_node_address) : nil
|
114
|
+
equal = equal_node_address != 0 ?
|
115
|
+
SpaceTreeNodeLink.new(tree, equal_node_address) : nil
|
116
|
+
larger = larger_node_address != 0 ?
|
117
|
+
SpaceTreeNodeLink.new(tree, larger_node_address) : nil
|
118
|
+
|
119
|
+
node = SpaceTreeNode.new(tree, node_address, blob_address, size,
|
120
|
+
parent, smaller, equal, larger)
|
121
|
+
|
122
|
+
tree.cache.insert(node, false)
|
123
|
+
|
124
|
+
node
|
125
|
+
end
|
126
|
+
|
127
|
+
# Save the node into the blob file.
|
128
|
+
def save
|
129
|
+
bytes = [ @blob_address, @size,
|
130
|
+
@parent ? @parent.node_address : 0,
|
131
|
+
@smaller ? @smaller.node_address : 0,
|
132
|
+
@equal ? @equal.node_address : 0,
|
133
|
+
@larger ? @larger.node_address : 0].pack(NODE_BYTES_FORMAT)
|
134
|
+
@tree.nodes.store_blob(@node_address, bytes)
|
91
135
|
end
|
92
136
|
|
93
137
|
# Add a new node for the given address and size to the tree.
|
@@ -110,7 +154,7 @@ module PEROBS
|
|
110
154
|
# There is no smaller node yet, so we create a new one as a
|
111
155
|
# smaller child of the current node.
|
112
156
|
node.set_link('@smaller',
|
113
|
-
@tree
|
157
|
+
SpaceTreeNode::create(@tree, address, size, node))
|
114
158
|
break
|
115
159
|
end
|
116
160
|
elsif size > node.size
|
@@ -122,13 +166,13 @@ module PEROBS
|
|
122
166
|
# There is no larger node yet, so we create a new one as a larger
|
123
167
|
# child of the current node.
|
124
168
|
node.set_link('@larger',
|
125
|
-
@tree
|
169
|
+
SpaceTreeNode::create(@tree, address, size, node))
|
126
170
|
break
|
127
171
|
end
|
128
172
|
else
|
129
173
|
# Same size as current node. Insert new node as equal child at top of
|
130
174
|
# equal list.
|
131
|
-
new_node = @tree
|
175
|
+
new_node = SpaceTreeNode::create(@tree, address, size, node)
|
132
176
|
new_node.set_link('@equal', node.equal)
|
133
177
|
|
134
178
|
node.set_link('@equal', new_node)
|
@@ -147,7 +191,7 @@ module PEROBS
|
|
147
191
|
node = self
|
148
192
|
loop do
|
149
193
|
if node.blob_address == address
|
150
|
-
return
|
194
|
+
return size == node.size
|
151
195
|
elsif size < node.size && node.smaller
|
152
196
|
node = node.smaller
|
153
197
|
elsif size > node.size && node.larger
|
@@ -242,7 +286,13 @@ module PEROBS
|
|
242
286
|
PEROBS.log.fatal "Cannot unlink unknown child node with address " +
|
243
287
|
"#{child_node.node_address} from #{to_s}"
|
244
288
|
end
|
245
|
-
|
289
|
+
@tree.cache.insert(self)
|
290
|
+
end
|
291
|
+
|
292
|
+
# @return [Integer] The node address since it uniquely identifies the
|
293
|
+
# Node.
|
294
|
+
def uid
|
295
|
+
@node_address
|
246
296
|
end
|
247
297
|
|
248
298
|
# Depth-first iterator for all nodes. The iterator yields the given block
|
@@ -339,7 +389,7 @@ module PEROBS
|
|
339
389
|
@parent.set_link('@larger', node)
|
340
390
|
else
|
341
391
|
PEROBS.log.fatal "Cannot relink unknown child node with address " +
|
342
|
-
"#{node.node_address} from #{to_s}"
|
392
|
+
"#{node.node_address} from #{parent.to_s}"
|
343
393
|
end
|
344
394
|
else
|
345
395
|
if node
|
@@ -382,7 +432,7 @@ module PEROBS
|
|
382
432
|
def set_size_and_address(size, address)
|
383
433
|
@size = size
|
384
434
|
@blob_address = address
|
385
|
-
|
435
|
+
@tree.cache.insert(self)
|
386
436
|
end
|
387
437
|
|
388
438
|
def set_link(name, node_or_address)
|
@@ -398,12 +448,12 @@ module PEROBS
|
|
398
448
|
# Clear the node link.
|
399
449
|
instance_variable_set(name, nil)
|
400
450
|
end
|
401
|
-
|
451
|
+
@tree.cache.insert(self)
|
402
452
|
end
|
403
453
|
|
404
454
|
def parent=(p)
|
405
455
|
@parent = p ? SpaceTreeNodeLink.new(@tree, p) : nil
|
406
|
-
|
456
|
+
@tree.cache.insert(self)
|
407
457
|
end
|
408
458
|
# Compare this node to another node.
|
409
459
|
# @return [Boolean] true if node address is identical, false otherwise
|
@@ -468,62 +518,65 @@ module PEROBS
|
|
468
518
|
# errors.
|
469
519
|
# @param flat_file [FlatFile] If given, check that the space is also
|
470
520
|
# present in the given flat file.
|
521
|
+
# @param count [Integer] The total number of entries in the tree
|
471
522
|
# @return [false,true] True if OK, false otherwise
|
472
|
-
def check(flat_file)
|
523
|
+
def check(flat_file, count)
|
473
524
|
node_counter = 0
|
474
525
|
max_depth = 0
|
475
526
|
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
"
|
487
|
-
|
527
|
+
@tree.progressmeter.start('Checking space list entries', count) do |pm|
|
528
|
+
each do |node, mode, stack|
|
529
|
+
max_depth = stack.size if stack.size > max_depth
|
530
|
+
|
531
|
+
case mode
|
532
|
+
when :smaller
|
533
|
+
if node.smaller
|
534
|
+
return false unless node.check_node_link('smaller', stack)
|
535
|
+
smaller_node = node.smaller
|
536
|
+
if smaller_node.size >= node.size
|
537
|
+
PEROBS.log.error "Smaller SpaceTreeNode size " +
|
538
|
+
"(#{smaller_node}) is not smaller than #{node}"
|
539
|
+
return false
|
540
|
+
end
|
488
541
|
end
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
542
|
+
when :equal
|
543
|
+
if node.equal
|
544
|
+
return false unless node.check_node_link('equal', stack)
|
545
|
+
equal_node = node.equal
|
546
|
+
|
547
|
+
if equal_node.smaller || equal_node.larger
|
548
|
+
PEROBS.log.error "Equal node #{equal_node} must not have " +
|
549
|
+
"smaller/larger childs"
|
550
|
+
return false
|
551
|
+
end
|
552
|
+
|
553
|
+
if node.size != equal_node.size
|
554
|
+
PEROBS.log.error "Equal SpaceTreeNode size (#{equal_node}) " +
|
555
|
+
"is not equal parent node #{node}"
|
556
|
+
return false
|
557
|
+
end
|
499
558
|
end
|
500
|
-
|
501
|
-
if node.
|
502
|
-
|
503
|
-
|
504
|
-
|
559
|
+
when :larger
|
560
|
+
if node.larger
|
561
|
+
return false unless node.check_node_link('larger', stack)
|
562
|
+
larger_node = node.larger
|
563
|
+
if larger_node.size <= node.size
|
564
|
+
PEROBS.log.error "Larger SpaceTreeNode size " +
|
565
|
+
"(#{larger_node}) is not larger than #{node}"
|
566
|
+
return false
|
567
|
+
end
|
505
568
|
end
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
PEROBS.log.error "Larger SpaceTreeNode size " +
|
513
|
-
"(#{larger_node}) is not larger than #{node}"
|
569
|
+
when :on_exit
|
570
|
+
if flat_file &&
|
571
|
+
!flat_file.has_space?(node.blob_address, node.size)
|
572
|
+
PEROBS.log.error "SpaceTreeNode has space at offset " +
|
573
|
+
"#{node.blob_address} of size #{node.size} that isn't " +
|
574
|
+
"available in the FlatFile."
|
514
575
|
return false
|
515
576
|
end
|
516
|
-
end
|
517
|
-
when :on_exit
|
518
|
-
if flat_file &&
|
519
|
-
!flat_file.has_space?(node.blob_address, node.size)
|
520
|
-
PEROBS.log.error "SpaceTreeNode has space at offset " +
|
521
|
-
"#{node.blob_address} of size #{node.size} that isn't " +
|
522
|
-
"available in the FlatFile."
|
523
|
-
return false
|
524
|
-
end
|
525
577
|
|
526
|
-
|
578
|
+
pm.update(node_counter += 1)
|
579
|
+
end
|
527
580
|
end
|
528
581
|
end
|
529
582
|
PEROBS.log.debug "#{node_counter} SpaceTree nodes checked"
|
@@ -633,40 +686,6 @@ module PEROBS
|
|
633
686
|
str
|
634
687
|
end
|
635
688
|
|
636
|
-
private
|
637
|
-
|
638
|
-
def write_node
|
639
|
-
bytes = [ @blob_address, @size,
|
640
|
-
@parent ? @parent.node_address : 0,
|
641
|
-
@smaller ? @smaller.node_address : 0,
|
642
|
-
@equal ? @equal.node_address : 0,
|
643
|
-
@larger ? @larger.node_address : 0].pack(NODE_BYTES_FORMAT)
|
644
|
-
@tree.nodes.store_blob(@node_address, bytes)
|
645
|
-
end
|
646
|
-
|
647
|
-
def read_node
|
648
|
-
unless @node_address > 0
|
649
|
-
PEROBS.log.fatal "@node_address must be larger than 0"
|
650
|
-
end
|
651
|
-
return false unless (bytes = @tree.nodes.retrieve_blob(@node_address))
|
652
|
-
|
653
|
-
@blob_address, @size, parent_node_address,
|
654
|
-
smaller_node_address, equal_node_address,
|
655
|
-
larger_node_address = bytes.unpack(NODE_BYTES_FORMAT)
|
656
|
-
# The parent address can also be 0 as the parent can rightly point back
|
657
|
-
# to the root node which always has the address 0.
|
658
|
-
@parent = parent_node_address != 0 ?
|
659
|
-
SpaceTreeNodeLink.new(@tree, parent_node_address) : nil
|
660
|
-
@smaller = smaller_node_address != 0 ?
|
661
|
-
SpaceTreeNodeLink.new(@tree, smaller_node_address) : nil
|
662
|
-
@equal = equal_node_address != 0 ?
|
663
|
-
SpaceTreeNodeLink.new(@tree, equal_node_address) : nil
|
664
|
-
@larger = larger_node_address != 0 ?
|
665
|
-
SpaceTreeNodeLink.new(@tree, larger_node_address) : nil
|
666
|
-
|
667
|
-
true
|
668
|
-
end
|
669
|
-
|
670
689
|
end
|
671
690
|
|
672
691
|
end
|
data/lib/perobs/StackFile.rb
CHANGED
@@ -36,7 +36,7 @@ module PEROBS
|
|
36
36
|
# Create a new stack file in the given directory with the given file name.
|
37
37
|
# @param dir [String] Directory
|
38
38
|
# @param name [String] File name
|
39
|
-
# @param entry_bytes [
|
39
|
+
# @param entry_bytes [Integer] Number of bytes each entry must have
|
40
40
|
def initialize(dir, name, entry_bytes)
|
41
41
|
@file_name = File.join(dir, name + '.stack')
|
42
42
|
@entry_bytes = entry_bytes
|
data/lib/perobs/Store.rb
CHANGED
@@ -2,7 +2,8 @@
|
|
2
2
|
#
|
3
3
|
# = Store.rb -- Persistent Ruby Object Store
|
4
4
|
#
|
5
|
-
# Copyright (c) 2015, 2016
|
5
|
+
# Copyright (c) 2015, 2016, 2017, 2018, 2019
|
6
|
+
# by Chris Schlaeger <chris@taskjuggler.org>
|
6
7
|
#
|
7
8
|
# MIT License
|
8
9
|
#
|
@@ -26,7 +27,6 @@
|
|
26
27
|
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
27
28
|
|
28
29
|
require 'set'
|
29
|
-
require 'weakref'
|
30
30
|
|
31
31
|
require 'perobs/Log'
|
32
32
|
require 'perobs/Handle'
|
@@ -37,12 +37,18 @@ require 'perobs/FlatFileDB'
|
|
37
37
|
require 'perobs/Object'
|
38
38
|
require 'perobs/Hash'
|
39
39
|
require 'perobs/Array'
|
40
|
+
require 'perobs/BigTree'
|
41
|
+
require 'perobs/BigHash'
|
42
|
+
require 'perobs/BigArray'
|
43
|
+
require 'perobs/ProgressMeter'
|
44
|
+
require 'perobs/ConsoleProgressMeter'
|
40
45
|
|
41
46
|
# PErsistent Ruby OBject Store
|
42
47
|
module PEROBS
|
43
48
|
|
44
|
-
Statistics = Struct.new(:in_memory_objects, :root_objects,
|
45
|
-
:marked_objects, :swept_objects
|
49
|
+
Statistics = Struct.new(:in_memory_objects, :root_objects, :zombie_objects,
|
50
|
+
:marked_objects, :swept_objects,
|
51
|
+
:created_objects, :collected_objects)
|
46
52
|
|
47
53
|
# PEROBS::Store is a persistent storage system for Ruby objects. Regular
|
48
54
|
# Ruby objects are transparently stored in a back-end storage and retrieved
|
@@ -68,12 +74,20 @@ module PEROBS
|
|
68
74
|
#
|
69
75
|
# class Person < PEROBS::Object
|
70
76
|
#
|
71
|
-
#
|
77
|
+
# attr_persist :name, :mother, :father, :kids
|
72
78
|
#
|
79
|
+
# # The contructor is only called for the creation of a new object. It is
|
80
|
+
# # not called when the object is restored from the database. In that case
|
81
|
+
# # only restore() is called.
|
73
82
|
# def initialize(cf, name)
|
74
83
|
# super(cf)
|
75
|
-
#
|
76
|
-
#
|
84
|
+
# self.name = name
|
85
|
+
# self.kids = @store.new(PEROBS::Array)
|
86
|
+
# end
|
87
|
+
#
|
88
|
+
# def restore
|
89
|
+
# # In case you need to do any checks or massaging (e. g. for additional
|
90
|
+
# # attributes) you can provide this method.
|
77
91
|
# end
|
78
92
|
#
|
79
93
|
# def to_s
|
@@ -91,11 +105,12 @@ module PEROBS
|
|
91
105
|
# joe.kids << jim
|
92
106
|
# jim.mother = jane
|
93
107
|
# jane.kids << jim
|
94
|
-
# store.
|
108
|
+
# store.exit
|
95
109
|
#
|
96
110
|
class Store
|
97
111
|
|
98
112
|
attr_reader :db, :cache, :class_map
|
113
|
+
attr_writer :root_objects
|
99
114
|
|
100
115
|
# Create a new Store.
|
101
116
|
# @param data_base [String] the name of the database
|
@@ -120,14 +135,23 @@ module PEROBS
|
|
120
135
|
# :json : About half as fast as marshal, but the
|
121
136
|
# format is rock solid and portable between
|
122
137
|
# languages. It only supports basic Ruby data types
|
123
|
-
# like String,
|
138
|
+
# like String, Integer, Float, Array, Hash. This is
|
124
139
|
# the default option.
|
125
140
|
# :yaml : Can also handle most Ruby data types and is
|
126
141
|
# portable between Ruby versions (1.9 and later).
|
127
142
|
# Unfortunately, it is 10x slower than marshal.
|
143
|
+
# :progressmeter : reference to a ProgressMeter object that receives
|
144
|
+
# progress information during longer running tasks.
|
145
|
+
# It defaults to ProgressMeter which only logs into
|
146
|
+
# the log. Use ConsoleProgressMeter or a derived
|
147
|
+
# class for more fancy progress reporting.
|
148
|
+
# :no_root_objects : Create a new store without root objects. This only
|
149
|
+
# makes sense if you want to copy the objects of
|
150
|
+
# another store into this store.
|
128
151
|
def initialize(data_base, options = {})
|
129
152
|
# Create a backing store handler
|
130
|
-
@
|
153
|
+
@progressmeter = (options[:progressmeter] ||= ProgressMeter.new)
|
154
|
+
@db = (options[:engine] || FlatFileDB).new(data_base, options)
|
131
155
|
@db.open
|
132
156
|
# Create a map that can translate classes to numerical IDs and vice
|
133
157
|
# versa.
|
@@ -136,25 +160,32 @@ module PEROBS
|
|
136
160
|
# List of PEROBS objects that are currently available as Ruby objects
|
137
161
|
# hashed by their ID.
|
138
162
|
@in_memory_objects = {}
|
163
|
+
# List of objects that were destroyed already but were still found in
|
164
|
+
# the in_memory_objects list. _collect has not yet been called for them.
|
165
|
+
@zombie_objects = {}
|
139
166
|
|
140
167
|
# This objects keeps some counters of interest.
|
141
168
|
@stats = Statistics.new
|
169
|
+
@stats[:created_objects] = 0
|
170
|
+
@stats[:collected_objects] = 0
|
142
171
|
|
143
172
|
# The Cache reduces read and write latencies by keeping a subset of the
|
144
173
|
# objects in memory.
|
145
174
|
@cache = Cache.new(options[:cache_bits] || 16)
|
146
175
|
|
147
176
|
# The named (global) objects IDs hashed by their name
|
148
|
-
unless
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
"
|
177
|
+
unless options[:no_root_objects]
|
178
|
+
unless (@root_objects = object_by_id(0))
|
179
|
+
PEROBS.log.debug "Initializing the PEROBS store"
|
180
|
+
# The root object hash always has the object ID 0.
|
181
|
+
@root_objects = _construct_po(Hash, 0)
|
182
|
+
# Mark the root_objects object as modified.
|
183
|
+
@cache.cache_write(@root_objects)
|
184
|
+
end
|
185
|
+
unless @root_objects.is_a?(Hash)
|
186
|
+
PEROBS.log.fatal "Database corrupted: Root objects must be a Hash " +
|
187
|
+
"but is a #{@root_objects.class}"
|
188
|
+
end
|
158
189
|
end
|
159
190
|
end
|
160
191
|
|
@@ -166,7 +197,9 @@ module PEROBS
|
|
166
197
|
sync
|
167
198
|
|
168
199
|
# Create a new store with the specified directory and options.
|
169
|
-
|
200
|
+
new_options = options.clone
|
201
|
+
new_options[:no_root_objects] = true
|
202
|
+
new_db = Store.new(dir, new_options)
|
170
203
|
# Clear the cache.
|
171
204
|
new_db.sync
|
172
205
|
# Copy all objects of the existing store to the new store.
|
@@ -177,6 +210,7 @@ module PEROBS
|
|
177
210
|
obj._sync
|
178
211
|
i += 1
|
179
212
|
end
|
213
|
+
new_db.root_objects = new_db.object_by_id(0)
|
180
214
|
PEROBS.log.debug "Copied #{i} objects into new database at #{dir}"
|
181
215
|
# Flush the new store and close it.
|
182
216
|
new_db.exit
|
@@ -184,20 +218,34 @@ module PEROBS
|
|
184
218
|
true
|
185
219
|
end
|
186
220
|
|
187
|
-
|
188
221
|
# Close the store and ensure that all in-memory objects are written out to
|
189
222
|
# the storage backend. The Store object is no longer usable after this
|
190
223
|
# method was called.
|
191
224
|
def exit
|
192
225
|
if @cache && @cache.in_transaction?
|
193
|
-
|
226
|
+
@cache.abort_transaction
|
227
|
+
@cache.flush
|
228
|
+
@db.close if @db
|
229
|
+
PEROBS.log.fatal "You cannot call exit() during a transaction: #{Kernel.caller}"
|
194
230
|
end
|
195
231
|
@cache.flush if @cache
|
196
232
|
@db.close if @db
|
197
|
-
@db = @class_map = @in_memory_objects = @stats = @cache = @root_objects =
|
198
|
-
nil
|
199
|
-
end
|
200
233
|
|
234
|
+
GC.start
|
235
|
+
if @stats
|
236
|
+
unless @stats[:created_objects] == @stats[:collected_objects] +
|
237
|
+
@in_memory_objects.length
|
238
|
+
PEROGS.log.fatal "Created objects count " +
|
239
|
+
"(#{@stats[:created_objects]})" +
|
240
|
+
" is not equal to the collected count " +
|
241
|
+
"(#{@stats[:collected_objects]}) + in_memory_objects count " +
|
242
|
+
"(#{@in_memory_objects.length})"
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
@db = @class_map = @in_memory_objects = @zombie_objects =
|
247
|
+
@stats = @cache = @root_objects = nil
|
248
|
+
end
|
201
249
|
|
202
250
|
# You need to call this method to create new PEROBS objects that belong to
|
203
251
|
# this Store.
|
@@ -221,7 +269,7 @@ module PEROBS
|
|
221
269
|
# For library internal use only!
|
222
270
|
# This method will create a new PEROBS object.
|
223
271
|
# @param klass [BasicObject] Class of the object to create
|
224
|
-
# @param id [
|
272
|
+
# @param id [Integer] Requested object ID
|
225
273
|
# @param args [Array] Arguments to pass to the object constructor.
|
226
274
|
# @return [BasicObject] Newly constructed PEROBS object
|
227
275
|
def _construct_po(klass, id, *args)
|
@@ -229,10 +277,12 @@ module PEROBS
|
|
229
277
|
end
|
230
278
|
|
231
279
|
# Delete the entire store. The store is no longer usable after this
|
232
|
-
# method was called.
|
280
|
+
# method was called. This is an alternative to exit() that additionaly
|
281
|
+
# deletes the entire database.
|
233
282
|
def delete_store
|
234
283
|
@db.delete_database
|
235
|
-
@db = @class_map = @
|
284
|
+
@db = @class_map = @in_memory_objects = @zombie_objects =
|
285
|
+
@stats = @cache = @root_objects = nil
|
236
286
|
end
|
237
287
|
|
238
288
|
# Store the provided object under the given name. Use this to make the
|
@@ -288,20 +338,30 @@ module PEROBS
|
|
288
338
|
# needed.
|
289
339
|
def sync
|
290
340
|
if @cache.in_transaction?
|
291
|
-
|
341
|
+
@cache.abort_transaction
|
342
|
+
@cache.flush
|
343
|
+
PEROBS.log.fatal "You cannot call sync() during a transaction: \n" +
|
344
|
+
Kernel.caller.join("\n")
|
292
345
|
end
|
293
346
|
@cache.flush
|
294
347
|
end
|
295
348
|
|
349
|
+
# Return the number of object stored in the store. CAVEAT: This method
|
350
|
+
# will only return correct values when it is separated from any mutating
|
351
|
+
# call by a call to sync().
|
352
|
+
# @return [Integer] Number of persistently stored objects in the Store.
|
353
|
+
def size
|
354
|
+
# We don't include the Hash that stores the root objects into the object
|
355
|
+
# count.
|
356
|
+
@db.item_counter - 1
|
357
|
+
end
|
358
|
+
|
296
359
|
# Discard all objects that are not somehow connected to the root objects
|
297
360
|
# from the back-end storage. The garbage collector is not invoked
|
298
361
|
# automatically. Depending on your usage pattern, you need to call this
|
299
362
|
# method periodically.
|
300
|
-
# @return [
|
363
|
+
# @return [Integer] The number of collected objects
|
301
364
|
def gc
|
302
|
-
if @cache.in_transaction?
|
303
|
-
PEROBS.log.fatal 'You cannot call gc() during a transaction'
|
304
|
-
end
|
305
365
|
sync
|
306
366
|
mark
|
307
367
|
sweep
|
@@ -311,17 +371,30 @@ module PEROBS
|
|
311
371
|
# public API and should never be called by outside users. It's purely
|
312
372
|
# intended for internal use.
|
313
373
|
def object_by_id(id)
|
314
|
-
if (
|
374
|
+
if (ruby_object_id = @in_memory_objects[id])
|
315
375
|
# We have the object in memory so we can just return it.
|
316
376
|
begin
|
317
|
-
|
318
|
-
|
377
|
+
object = ObjectSpace._id2ref(ruby_object_id)
|
378
|
+
# Let's make sure the object is really the object we are looking
|
379
|
+
# for. The GC might have recycled it already and the Ruby object ID
|
380
|
+
# could now be used for another object.
|
381
|
+
if object.is_a?(ObjectBase) && object._id == id
|
382
|
+
return object
|
383
|
+
end
|
384
|
+
rescue RangeError => e
|
319
385
|
# Due to a race condition the object can still be in the
|
320
386
|
# @in_memory_objects list but has been collected already by the Ruby
|
321
|
-
# GC. In that case we need to load it again.
|
387
|
+
# GC. In that case we need to load it again. The _collect() call
|
388
|
+
# will happen much later, potentially after we have registered a new
|
389
|
+
# object with the same ID.
|
390
|
+
@zombie_objects[id] = @in_memory_objects.delete(id)
|
322
391
|
end
|
323
392
|
end
|
324
393
|
|
394
|
+
if (obj = @cache.object_by_id(id))
|
395
|
+
PEROBS.log.fatal "Object #{id} with Ruby #{obj.object_id} is in cache but not in_memory"
|
396
|
+
end
|
397
|
+
|
325
398
|
# We don't have the object in memory. Let's find it in the storage.
|
326
399
|
if @db.include?(id)
|
327
400
|
# Great, object found. Read it into memory and return it.
|
@@ -342,40 +415,44 @@ module PEROBS
|
|
342
415
|
# unreadable object is found, the reference will simply be deleted.
|
343
416
|
# @param repair [TrueClass/FalseClass] true if a repair attempt should be
|
344
417
|
# made.
|
345
|
-
# @return [
|
418
|
+
# @return [Integer] The number of references to bad objects found.
|
346
419
|
def check(repair = false)
|
420
|
+
stats = { :errors => 0, :object_cnt => 0 }
|
421
|
+
|
347
422
|
# All objects must have in-db version.
|
348
423
|
sync
|
349
424
|
# Run basic consistency checks first.
|
350
|
-
errors
|
425
|
+
stats[:errors] += @db.check_db(repair)
|
351
426
|
|
352
427
|
# We will use the mark to mark all objects that we have checked already.
|
353
428
|
# Before we start, we need to clear all marks.
|
354
429
|
@db.clear_marks
|
355
430
|
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
431
|
+
@progressmeter.start("Checking object link structure",
|
432
|
+
@db.item_counter) do
|
433
|
+
@root_objects.each do |name, id|
|
434
|
+
check_object(id, repair, stats)
|
435
|
+
end
|
360
436
|
end
|
361
437
|
|
362
438
|
# Delete all broken root objects.
|
363
439
|
if repair
|
364
440
|
@root_objects.delete_if do |name, id|
|
365
|
-
unless
|
441
|
+
unless @db.check(id, repair)
|
366
442
|
PEROBS.log.error "Discarding broken root object '#{name}' " +
|
367
443
|
"with ID #{id}"
|
368
|
-
errors += 1
|
444
|
+
stats[:errors] += 1
|
369
445
|
end
|
370
|
-
!res
|
371
446
|
end
|
372
447
|
end
|
373
448
|
|
374
|
-
if errors > 0
|
449
|
+
if stats[:errors] > 0
|
375
450
|
if repair
|
376
|
-
PEROBS.log.error "#{errors} errors found in
|
451
|
+
PEROBS.log.error "#{stats[:errors]} errors found in " +
|
452
|
+
"#{stats[:object_cnt]} objects"
|
377
453
|
else
|
378
|
-
PEROBS.log.fatal "#{errors} errors found in
|
454
|
+
PEROBS.log.fatal "#{stats[:errors]} errors found in " +
|
455
|
+
"#{stats[:object_cnt]} objects"
|
379
456
|
end
|
380
457
|
else
|
381
458
|
PEROBS.log.debug "No errors found"
|
@@ -384,7 +461,7 @@ module PEROBS
|
|
384
461
|
# Ensure that any fixes are written into the DB.
|
385
462
|
sync if repair
|
386
463
|
|
387
|
-
errors
|
464
|
+
stats[:errors]
|
388
465
|
end
|
389
466
|
|
390
467
|
# This method will execute the provided block as an atomic transaction
|
@@ -413,7 +490,10 @@ module PEROBS
|
|
413
490
|
stack = [ 0 ] + @root_objects.values
|
414
491
|
while !stack.empty?
|
415
492
|
# Get an object index from the stack.
|
416
|
-
|
493
|
+
id = stack.pop
|
494
|
+
next if @db.is_marked?(id)
|
495
|
+
|
496
|
+
unless (obj = object_by_id(id))
|
417
497
|
PEROBS.log.fatal "Database is corrupted. Object with ID #{id} " +
|
418
498
|
"not found."
|
419
499
|
end
|
@@ -436,7 +516,7 @@ module PEROBS
|
|
436
516
|
# Internal method. Don't use this outside of this library!
|
437
517
|
# Generate a new unique ID that is not used by any other object. It uses
|
438
518
|
# random numbers between 0 and 2**64 - 1.
|
439
|
-
# @return [
|
519
|
+
# @return [Integer]
|
440
520
|
def _new_id
|
441
521
|
begin
|
442
522
|
# Generate a random number. It's recommended to not store more than
|
@@ -454,23 +534,39 @@ module PEROBS
|
|
454
534
|
# happens the object finalizer is triggered and calls _forget() to
|
455
535
|
# remove the object from this hash again.
|
456
536
|
# @param obj [BasicObject] Object to register
|
457
|
-
# @param id [
|
537
|
+
# @param id [Integer] object ID
|
458
538
|
def _register_in_memory(obj, id)
|
459
|
-
|
539
|
+
unless obj.is_a?(ObjectBase)
|
540
|
+
PEROBS.log.fatal "You can only register ObjectBase objects"
|
541
|
+
end
|
542
|
+
if @in_memory_objects.include?(id)
|
543
|
+
PEROBS.log.fatal "The Store::_in_memory_objects list already " +
|
544
|
+
"contains an object for ID #{id}"
|
545
|
+
end
|
546
|
+
|
547
|
+
@in_memory_objects[id] = obj.object_id
|
548
|
+
@stats[:created_objects] += 1
|
460
549
|
end
|
461
550
|
|
462
551
|
# Remove the object from the in-memory list. This is an internal method
|
463
552
|
# and should never be called from user code. It will be called from a
|
464
553
|
# finalizer, so many restrictions apply!
|
465
|
-
# @param id [
|
466
|
-
def _collect(id,
|
467
|
-
@in_memory_objects
|
554
|
+
# @param id [Integer] Object ID of object to remove from the list
|
555
|
+
def _collect(id, ruby_object_id)
|
556
|
+
if @in_memory_objects[id] == ruby_object_id
|
557
|
+
@in_memory_objects.delete(id)
|
558
|
+
@stats[:collected_objects] += 1
|
559
|
+
elsif @zombie_objects[id] == ruby_object_id
|
560
|
+
@zombie_objects.delete(id)
|
561
|
+
@stats[:collected_objects] += 1
|
562
|
+
end
|
468
563
|
end
|
469
564
|
|
470
565
|
# This method returns a Hash with some statistics about this store.
|
471
566
|
def statistics
|
472
567
|
@stats.in_memory_objects = @in_memory_objects.length
|
473
568
|
@stats.root_objects = @root_objects.length
|
569
|
+
@stats.zombie_objects = @zombie_objects.length
|
474
570
|
|
475
571
|
@stats
|
476
572
|
end
|
@@ -482,32 +578,40 @@ module PEROBS
|
|
482
578
|
def mark
|
483
579
|
classes = Set.new
|
484
580
|
marked_objects = 0
|
485
|
-
|
581
|
+
@progressmeter.start("Marking linked objects", @db.item_counter) do
|
582
|
+
each do |obj|
|
583
|
+
classes.add(obj.class)
|
584
|
+
@progressmeter.update(marked_objects += 1)
|
585
|
+
end
|
586
|
+
end
|
486
587
|
@class_map.keep(classes.map { |c| c.to_s })
|
487
588
|
|
488
589
|
# The root_objects object is included in the count, but we only want to
|
489
590
|
# count user objects here.
|
490
|
-
PEROBS.log.debug "#{marked_objects - 1}
|
591
|
+
PEROBS.log.debug "#{marked_objects - 1} of #{@db.item_counter} " +
|
592
|
+
"objects marked"
|
491
593
|
@stats.marked_objects = marked_objects - 1
|
492
594
|
end
|
493
595
|
|
494
596
|
# Sweep phase of a mark-and-sweep garbage collector. It will remove all
|
495
597
|
# unmarked objects from the store.
|
496
598
|
def sweep
|
497
|
-
@stats.swept_objects = @db.delete_unmarked_objects
|
498
|
-
|
599
|
+
@stats.swept_objects = @db.delete_unmarked_objects do |id|
|
600
|
+
@cache.evict(id)
|
601
|
+
end
|
602
|
+
@db.clear_marks
|
603
|
+
GC.start
|
499
604
|
PEROBS.log.debug "#{@stats.swept_objects} objects collected"
|
500
605
|
@stats.swept_objects
|
501
606
|
end
|
502
607
|
|
503
608
|
# Check the object with the given start_id and all other objects that are
|
504
609
|
# somehow reachable from the start object.
|
505
|
-
# @param start_id [
|
610
|
+
# @param start_id [Integer] ID of the top-level object to start
|
506
611
|
# with
|
507
612
|
# @param repair [Boolean] Delete refernces to broken objects if true
|
508
|
-
# @return [
|
509
|
-
def check_object(start_id, repair)
|
510
|
-
errors = 0
|
613
|
+
# @return [Integer] The number of references to bad objects.
|
614
|
+
def check_object(start_id, repair, stats)
|
511
615
|
@db.mark(start_id)
|
512
616
|
# The todo list holds a touple for each object that still needs to be
|
513
617
|
# checked. The first item is the referring object and the second is the
|
@@ -518,7 +622,13 @@ module PEROBS
|
|
518
622
|
# Get the next PEROBS object to check
|
519
623
|
ref_obj, id = todo_list.pop
|
520
624
|
|
521
|
-
|
625
|
+
begin
|
626
|
+
obj = object_by_id(id)
|
627
|
+
rescue PEROBS::FatalError
|
628
|
+
obj = nil
|
629
|
+
end
|
630
|
+
|
631
|
+
if obj
|
522
632
|
# The object exists and is OK. Mark is as checked.
|
523
633
|
@db.mark(id)
|
524
634
|
# Now look at all other objects referenced by this object.
|
@@ -541,11 +651,11 @@ module PEROBS
|
|
541
651
|
ref_obj.inspect
|
542
652
|
end
|
543
653
|
end
|
544
|
-
errors += 1
|
654
|
+
stats[:errors] += 1
|
545
655
|
end
|
546
|
-
end
|
547
656
|
|
548
|
-
|
657
|
+
@progressmeter.update(stats[:object_cnt] += 1)
|
658
|
+
end
|
549
659
|
end
|
550
660
|
|
551
661
|
end
|