perobs 3.0.1 → 4.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/README.md +19 -18
- data/lib/perobs.rb +2 -0
- data/lib/perobs/Array.rb +68 -21
- data/lib/perobs/BTree.rb +110 -54
- data/lib/perobs/BTreeBlob.rb +14 -13
- data/lib/perobs/BTreeDB.rb +11 -10
- data/lib/perobs/BTreeNode.rb +551 -197
- data/lib/perobs/BTreeNodeCache.rb +10 -8
- data/lib/perobs/BTreeNodeLink.rb +11 -1
- data/lib/perobs/BigArray.rb +285 -0
- data/lib/perobs/BigArrayNode.rb +1002 -0
- data/lib/perobs/BigHash.rb +246 -0
- data/lib/perobs/BigTree.rb +197 -0
- data/lib/perobs/BigTreeNode.rb +873 -0
- data/lib/perobs/Cache.rb +47 -22
- data/lib/perobs/ClassMap.rb +2 -2
- data/lib/perobs/ConsoleProgressMeter.rb +61 -0
- data/lib/perobs/DataBase.rb +4 -3
- data/lib/perobs/DynamoDB.rb +62 -20
- data/lib/perobs/EquiBlobsFile.rb +174 -59
- data/lib/perobs/FNV_Hash_1a_64.rb +54 -0
- data/lib/perobs/FlatFile.rb +536 -242
- data/lib/perobs/FlatFileBlobHeader.rb +120 -84
- data/lib/perobs/FlatFileDB.rb +58 -27
- data/lib/perobs/FuzzyStringMatcher.rb +175 -0
- data/lib/perobs/Hash.rb +129 -35
- data/lib/perobs/IDList.rb +144 -0
- data/lib/perobs/IDListPage.rb +107 -0
- data/lib/perobs/IDListPageFile.rb +180 -0
- data/lib/perobs/IDListPageRecord.rb +142 -0
- data/lib/perobs/LockFile.rb +3 -0
- data/lib/perobs/Object.rb +28 -20
- data/lib/perobs/ObjectBase.rb +53 -10
- data/lib/perobs/PersistentObjectCache.rb +142 -0
- data/lib/perobs/PersistentObjectCacheLine.rb +99 -0
- data/lib/perobs/ProgressMeter.rb +97 -0
- data/lib/perobs/SpaceManager.rb +273 -0
- data/lib/perobs/SpaceTree.rb +63 -47
- data/lib/perobs/SpaceTreeNode.rb +134 -115
- data/lib/perobs/SpaceTreeNodeLink.rb +1 -1
- data/lib/perobs/StackFile.rb +1 -1
- data/lib/perobs/Store.rb +180 -70
- data/lib/perobs/version.rb +1 -1
- data/perobs.gemspec +4 -4
- data/test/Array_spec.rb +48 -39
- data/test/BTreeDB_spec.rb +2 -2
- data/test/BTree_spec.rb +50 -1
- data/test/BigArray_spec.rb +261 -0
- data/test/BigHash_spec.rb +152 -0
- data/test/BigTreeNode_spec.rb +153 -0
- data/test/BigTree_spec.rb +259 -0
- data/test/EquiBlobsFile_spec.rb +105 -5
- data/test/FNV_Hash_1a_64_spec.rb +59 -0
- data/test/FlatFileDB_spec.rb +199 -15
- data/test/FuzzyStringMatcher_spec.rb +261 -0
- data/test/Hash_spec.rb +27 -16
- data/test/IDList_spec.rb +77 -0
- data/test/LegacyDBs/LegacyDB.rb +155 -0
- data/test/LegacyDBs/version_3/class_map.json +1 -0
- data/test/LegacyDBs/version_3/config.json +1 -0
- data/test/LegacyDBs/version_3/database.blobs +0 -0
- data/test/LegacyDBs/version_3/database_spaces.blobs +0 -0
- data/test/LegacyDBs/version_3/index.blobs +0 -0
- data/test/LegacyDBs/version_3/version +1 -0
- data/test/LockFile_spec.rb +9 -6
- data/test/Object_spec.rb +5 -5
- data/test/SpaceManager_spec.rb +176 -0
- data/test/SpaceTree_spec.rb +27 -9
- data/test/Store_spec.rb +353 -206
- data/test/perobs_spec.rb +7 -3
- data/test/spec_helper.rb +9 -4
- metadata +59 -16
- data/lib/perobs/SpaceTreeNodeCache.rb +0 -76
- data/lib/perobs/TreeDB.rb +0 -277
@@ -0,0 +1,246 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# = BigHash.rb -- Persistent Ruby Object Store
|
4
|
+
#
|
5
|
+
# Copyright (c) 2016, 2017 by Chris Schlaeger <chris@taskjuggler.org>
|
6
|
+
#
|
7
|
+
# MIT License
|
8
|
+
#
|
9
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
10
|
+
# a copy of this software and associated documentation files (the
|
11
|
+
# "Software"), to deal in the Software without restriction, including
|
12
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
13
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
14
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
15
|
+
# the following conditions:
|
16
|
+
#
|
17
|
+
# The above copyright notice and this permission notice shall be
|
18
|
+
# included in all copies or substantial portions of the Software.
|
19
|
+
#
|
20
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
21
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
22
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
23
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
24
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
25
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
26
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
27
|
+
|
28
|
+
require 'perobs/Object'
|
29
|
+
require 'perobs/BigTree'
|
30
|
+
require 'perobs/Array'
|
31
|
+
require 'perobs/FNV_Hash_1a_64'
|
32
|
+
|
33
|
+
module PEROBS
|
34
|
+
|
35
|
+
# The BigHash is similar to the Hash object in that it provides a simple
|
36
|
+
# hash functionality. The difference is that this class scales to much
|
37
|
+
# larger data sets essentially limited to the amount of space available on
|
38
|
+
# your backing store. The data is persisted immediately and uses
|
39
|
+
# transactions to ensure the data consistent. It only provides a small
|
40
|
+
# subset of the methods provided by the native Hash class that make sense
|
41
|
+
# for giant data sets.
|
42
|
+
class BigHash < PEROBS::Object
|
43
|
+
|
44
|
+
# Internally this class uses BigTree to store the values by the hashed
|
45
|
+
# key. We are using a 64 bit hash space so collisions are fairly unlikely
|
46
|
+
# but not impossible. Therefor we have to store the originial key with the
|
47
|
+
# value to ensure that we got the right value. The key and value are
|
48
|
+
# stored in an Entry object.
|
49
|
+
#
|
50
|
+
# In case we have a collision we need to store multiple values for the
|
51
|
+
# same hashed key. In that case we store the Entry objects for the same
|
52
|
+
# hashed key in a Collisions object instead of storing the Entry
|
53
|
+
# directly in the BigTree.
|
54
|
+
class Entry < PEROBS::Object
|
55
|
+
|
56
|
+
attr_persist :key, :value
|
57
|
+
|
58
|
+
def initialize(p, key, value)
|
59
|
+
super(p)
|
60
|
+
self.key = key
|
61
|
+
self.value = value
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
65
|
+
|
66
|
+
# Since the BigHash can also store PEROBS::Array values we need to
|
67
|
+
# introduce a new class so we can tell apart collisions from Arrays.
|
68
|
+
class Collisions < PEROBS::Array
|
69
|
+
end
|
70
|
+
|
71
|
+
attr_persist :btree
|
72
|
+
|
73
|
+
# Create a new BigHash object.
|
74
|
+
# @param p [Handle] Store handle
|
75
|
+
def initialize(p)
|
76
|
+
super(p)
|
77
|
+
restore
|
78
|
+
self.btree = @store.new(PEROBS::BigTree)
|
79
|
+
end
|
80
|
+
|
81
|
+
def restore
|
82
|
+
end
|
83
|
+
|
84
|
+
# Insert a value that is associated with the given key. If a value for
|
85
|
+
# this key already exists, the value will be overwritten with the newly
|
86
|
+
# provided value.
|
87
|
+
# @param key [Integer or String]
|
88
|
+
# @param value [Any PEROBS storable object]
|
89
|
+
def []=(key, value)
|
90
|
+
hashed_key = hash_key(key)
|
91
|
+
@store.transaction do
|
92
|
+
entry = @store.new(Entry, key, value)
|
93
|
+
|
94
|
+
if (existing_entry = @btree.get(hashed_key))
|
95
|
+
# There is already an existing entry for this hashed key.
|
96
|
+
if existing_entry.is_a?(Collisions)
|
97
|
+
# Find the right index to insert the new entry. If there is
|
98
|
+
# already an entry with the same key overwrite that entry.
|
99
|
+
index_to_insert = 0
|
100
|
+
overwrite = false
|
101
|
+
existing_entry.each do |ae|
|
102
|
+
if ae.key == key
|
103
|
+
overwrite = true
|
104
|
+
break
|
105
|
+
end
|
106
|
+
index_to_insert += 1
|
107
|
+
end
|
108
|
+
existing_entry[index_to_insert] = entry
|
109
|
+
elsif existing_entry.key == key
|
110
|
+
# The existing value is for the identical key. We can safely
|
111
|
+
# overwrite
|
112
|
+
@btree.insert(hashed_key, entry)
|
113
|
+
else
|
114
|
+
# There is a single existing entry, but for a different key. Create
|
115
|
+
# a new PEROBS::Array and store both entries.
|
116
|
+
array_entry = @store.new(Collisions)
|
117
|
+
array_entry << existing_entry
|
118
|
+
array_entry << entry
|
119
|
+
@btree.insert(hashed_key, array_entry)
|
120
|
+
end
|
121
|
+
else
|
122
|
+
# No existing entry. Insert the new entry.
|
123
|
+
@btree.insert(hashed_key, entry)
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
# Retrieve the value for the given key. If no value for the key is found
|
129
|
+
# nil is returned.
|
130
|
+
# @param key [Integer or String]
|
131
|
+
# @return [Any PEROBS storable object]
|
132
|
+
def [](key)
|
133
|
+
hashed_key = hash_key(key)
|
134
|
+
unless (entry = @btree.get(hashed_key))
|
135
|
+
return nil
|
136
|
+
end
|
137
|
+
|
138
|
+
if entry.is_a?(PEROBS::Array)
|
139
|
+
entry.each do |ae|
|
140
|
+
return ae.value if ae.key == key
|
141
|
+
end
|
142
|
+
else
|
143
|
+
return entry.value if entry.key == key
|
144
|
+
end
|
145
|
+
|
146
|
+
nil
|
147
|
+
end
|
148
|
+
|
149
|
+
# Check if the is a value stored for the given key.
|
150
|
+
# @param key [Integer or String]
|
151
|
+
# @return [TrueClass or FalseClass]
|
152
|
+
def has_key?(key)
|
153
|
+
hashed_key = hash_key(key)
|
154
|
+
unless (entry = @btree.get(hashed_key))
|
155
|
+
return false
|
156
|
+
end
|
157
|
+
|
158
|
+
if entry.is_a?(PEROBS::Array)
|
159
|
+
entry.each do |ae|
|
160
|
+
return true if ae.key == key
|
161
|
+
end
|
162
|
+
else
|
163
|
+
return true if entry.key == key
|
164
|
+
end
|
165
|
+
|
166
|
+
false
|
167
|
+
end
|
168
|
+
|
169
|
+
alias include? has_key?
|
170
|
+
|
171
|
+
# Delete and return the entry for the given key. Return nil if no matching
|
172
|
+
# entry exists.
|
173
|
+
# @param key [Integer or String]
|
174
|
+
# @return [Object] Deleted entry
|
175
|
+
def delete(key)
|
176
|
+
hashed_key = hash_key(key)
|
177
|
+
unless (entry = @btree.get(hashed_key))
|
178
|
+
return nil
|
179
|
+
end
|
180
|
+
|
181
|
+
if entry.is_a?(PEROBS::Array)
|
182
|
+
entry.each_with_index do |ae, i|
|
183
|
+
if ae.key == key
|
184
|
+
return entry.delete_at(i).value
|
185
|
+
end
|
186
|
+
end
|
187
|
+
else
|
188
|
+
return entry.value if entry.key == key
|
189
|
+
end
|
190
|
+
|
191
|
+
nil
|
192
|
+
end
|
193
|
+
|
194
|
+
# Return the number of entries stored in the hash.
|
195
|
+
# @return [Integer]
|
196
|
+
def length
|
197
|
+
@btree.entry_counter
|
198
|
+
end
|
199
|
+
|
200
|
+
alias size length
|
201
|
+
|
202
|
+
# Return true if hash is empty. False otherweise.
|
203
|
+
# @return [TrueClass, FalseClass]
|
204
|
+
def empty?
|
205
|
+
@btree.entry_counter == 0
|
206
|
+
end
|
207
|
+
|
208
|
+
# Calls the given block for each key/value pair.
|
209
|
+
# @yield(key, value)
|
210
|
+
def each(&block)
|
211
|
+
@btree.each do |index, entry|
|
212
|
+
if entry.is_a?(Collisions)
|
213
|
+
break if entry.each do |c_entry|
|
214
|
+
yield(c_entry.key, c_entry.value)
|
215
|
+
end.nil?
|
216
|
+
else
|
217
|
+
yield(entry.key, entry.value)
|
218
|
+
end
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
# This is mostly intended for debugging as the result can be very big.
|
223
|
+
# It returns an Array of keys stored in the hash.
|
224
|
+
# @return [Array] A list of all keys
|
225
|
+
def keys
|
226
|
+
ks = []
|
227
|
+
each { |k, v| ks << k }
|
228
|
+
ks
|
229
|
+
end
|
230
|
+
|
231
|
+
# Check if the data structure contains any errors.
|
232
|
+
# @return [Boolean] true if no erros were found, false otherwise
|
233
|
+
def check
|
234
|
+
return @btree.check
|
235
|
+
end
|
236
|
+
|
237
|
+
private
|
238
|
+
|
239
|
+
def hash_key(key)
|
240
|
+
FNV_Hash_1a_64::digest(key)
|
241
|
+
end
|
242
|
+
|
243
|
+
end
|
244
|
+
|
245
|
+
end
|
246
|
+
|
@@ -0,0 +1,197 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# = BigTree.rb -- Persistent Ruby Object Store
|
4
|
+
#
|
5
|
+
# Copyright (c) 2016, 2017 by Chris Schlaeger <chris@taskjuggler.org>
|
6
|
+
#
|
7
|
+
# MIT License
|
8
|
+
#
|
9
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
10
|
+
# a copy of this software and associated documentation files (the
|
11
|
+
# "Software"), to deal in the Software without restriction, including
|
12
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
13
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
14
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
15
|
+
# the following conditions:
|
16
|
+
#
|
17
|
+
# The above copyright notice and this permission notice shall be
|
18
|
+
# included in all copies or substantial portions of the Software.
|
19
|
+
#
|
20
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
21
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
22
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
23
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
24
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
25
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
26
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
27
|
+
|
28
|
+
require 'perobs/Object'
|
29
|
+
require 'perobs/BigTreeNode'
|
30
|
+
|
31
|
+
module PEROBS
|
32
|
+
|
33
|
+
# The BigTree class implements a BTree as a PEROBS object. It allows to
|
34
|
+
# manage huge amounts of data in a reasonably efficient way. The number of
|
35
|
+
# entries is limited by the space on the backing store, not the main
|
36
|
+
# memory. Entries are addressed by a Integer key.
|
37
|
+
class BigTree < PEROBS::Object
|
38
|
+
|
39
|
+
class Stats < Struct.new(:leaf_nodes, :branch_nodes, :min_depth,
|
40
|
+
:max_depth)
|
41
|
+
end
|
42
|
+
|
43
|
+
attr_persist :node_size, :root, :first_leaf, :last_leaf, :entry_counter
|
44
|
+
|
45
|
+
# Internal constructor. Use Store.new() instead.
|
46
|
+
# @param p [Handle]
|
47
|
+
# @param node_size [Integer] The size of the tree nodes. This determines
|
48
|
+
# how many entries must be read/written for each operation.
|
49
|
+
def initialize(p, node_size = 127)
|
50
|
+
super(p)
|
51
|
+
unless node_size > 2
|
52
|
+
PEROBS.log.fatal "Node size (#{node_size}) must be larger than 2"
|
53
|
+
end
|
54
|
+
attr_init(:node_size, node_size)
|
55
|
+
clear unless instance_variable_defined?('@root')
|
56
|
+
end
|
57
|
+
|
58
|
+
# Remove all entries from the BigTree.
|
59
|
+
def clear
|
60
|
+
self.root = self.first_leaf = self.last_leaf =
|
61
|
+
@store.new(BigTreeNode, myself, true)
|
62
|
+
self.entry_counter = 0
|
63
|
+
end
|
64
|
+
|
65
|
+
# Insert a new value into the tree using the key as a unique index. If the
|
66
|
+
# key already exists the old value will be overwritten.
|
67
|
+
# @param key [Integer] Unique key
|
68
|
+
# @param value [Integer] value
|
69
|
+
def insert(key, value)
|
70
|
+
@store.transaction do
|
71
|
+
@root.insert(key, value)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# Retrieve the value associated with the given key. If no entry was found,
|
76
|
+
# return nil.
|
77
|
+
# @param key [Integer] Unique key
|
78
|
+
# @return [Integer or nil] found value or nil
|
79
|
+
def get(key)
|
80
|
+
@root.get(key)
|
81
|
+
end
|
82
|
+
|
83
|
+
# Return the node chain from the root to the leaf node storing the
|
84
|
+
# key/value pair.
|
85
|
+
# @param key [Integer] key to search for
|
86
|
+
# @return [Array of BigTreeNode] node list (may be empty)
|
87
|
+
def node_chain(key)
|
88
|
+
@root.node_chain(key)
|
89
|
+
end
|
90
|
+
|
91
|
+
# Check if there is an entry for the given key.
|
92
|
+
# @param key [Integer] Unique key
|
93
|
+
# @return [Boolean] True if key is present, false otherwise.
|
94
|
+
def has_key?(key)
|
95
|
+
@root.has_key?(key)
|
96
|
+
end
|
97
|
+
|
98
|
+
# Find and remove the value associated with the given key. If no entry was
|
99
|
+
# found, return nil, otherwise the found value.
|
100
|
+
# @param key [Integer] Unique key
|
101
|
+
# @return [Integer or nil] found value or nil
|
102
|
+
def remove(key)
|
103
|
+
removed_value = nil
|
104
|
+
|
105
|
+
@store.transaction do
|
106
|
+
removed_value = @root.remove(key)
|
107
|
+
end
|
108
|
+
|
109
|
+
removed_value
|
110
|
+
end
|
111
|
+
|
112
|
+
# Delete all entries for which the passed block yields true. The
|
113
|
+
# implementation is optimized for large bulk deletes. It rebuilds a new
|
114
|
+
# BTree for the elements to keep. If only few elements are deleted the
|
115
|
+
# overhead of rebuilding the BTree is rather high.
|
116
|
+
# @yield [key, value]
|
117
|
+
def delete_if
|
118
|
+
old_root = @root
|
119
|
+
clear
|
120
|
+
old_root.each do |k, v|
|
121
|
+
if !yield(k, v)
|
122
|
+
insert(k, v)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
# @return [Integer] The number of entries stored in the tree.
|
128
|
+
def length
|
129
|
+
@entry_counter
|
130
|
+
end
|
131
|
+
|
132
|
+
# Return true if the BigTree has no stored entries.
|
133
|
+
def empty?
|
134
|
+
@entry_counter == 0
|
135
|
+
end
|
136
|
+
|
137
|
+
# Iterate over all entries in the tree. Entries are always sorted by the
|
138
|
+
# key.
|
139
|
+
# @yield [key, value]
|
140
|
+
def each(&block)
|
141
|
+
node = @first_leaf
|
142
|
+
while node
|
143
|
+
break if node.each_element(&block).nil?
|
144
|
+
node = node.next_sibling
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
# Iterate over all entries in the tree in reverse order. Entries are
|
149
|
+
# always sorted by the key.
|
150
|
+
# @yield [key, value]
|
151
|
+
def reverse_each(&block)
|
152
|
+
node = @last_leaf
|
153
|
+
while node
|
154
|
+
node.reverse_each_element(&block)
|
155
|
+
node = node.prev_sibling
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
|
160
|
+
# @return [String] Human reable form of the tree.
|
161
|
+
def to_s
|
162
|
+
@root.to_s
|
163
|
+
end
|
164
|
+
|
165
|
+
# Check if the tree file contains any errors.
|
166
|
+
# @return [Boolean] true if no erros were found, false otherwise
|
167
|
+
def check(&block)
|
168
|
+
@root.check(&block)
|
169
|
+
|
170
|
+
i = 0
|
171
|
+
each do |k, v|
|
172
|
+
i += 1
|
173
|
+
end
|
174
|
+
|
175
|
+
unless @entry_counter == i
|
176
|
+
PEROBS.log.error "BigTree contains #{i} values but entry counter " +
|
177
|
+
"is #{@entry_counter}"
|
178
|
+
return false
|
179
|
+
end
|
180
|
+
|
181
|
+
true
|
182
|
+
end
|
183
|
+
|
184
|
+
# Gather some statistics regarding the tree structure.
|
185
|
+
# @return [Stats] Structs with gathered data
|
186
|
+
def statistics
|
187
|
+
stats = Stats.new(0, 0, nil, nil)
|
188
|
+
@root.statistics(stats)
|
189
|
+
stats
|
190
|
+
end
|
191
|
+
|
192
|
+
private
|
193
|
+
|
194
|
+
end
|
195
|
+
|
196
|
+
end
|
197
|
+
|
@@ -0,0 +1,873 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# = BigTreeNode.rb -- Persistent Ruby Object Store
|
4
|
+
#
|
5
|
+
# Copyright (c) 2016, 2017 by Chris Schlaeger <chris@taskjuggler.org>
|
6
|
+
#
|
7
|
+
# MIT License
|
8
|
+
#
|
9
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
10
|
+
# a copy of this software and associated documentation files (the
|
11
|
+
# "Software"), to deal in the Software without restriction, including
|
12
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
13
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
14
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
15
|
+
# the following conditions:
|
16
|
+
#
|
17
|
+
# The above copyright notice and this permission notice shall be
|
18
|
+
# included in all copies or substantial portions of the Software.
|
19
|
+
#
|
20
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
21
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
22
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
23
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
24
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
25
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
26
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
27
|
+
|
28
|
+
require 'perobs/Object'
|
29
|
+
require 'perobs/Array'
|
30
|
+
|
31
|
+
module PEROBS
|
32
|
+
|
33
|
+
# The BigTreeNode class provides the BTree nodes for the BigTree objects.
|
34
|
+
# A node can either be a branch node or a leaf node. Branch nodes don't
|
35
|
+
# store values, only references to child nodes. Leaf nodes don't have child
|
36
|
+
# nodes but store the actual values. All nodes store a list of keys that are
|
37
|
+
# used to naviate the tree and find the values. A key is either directly
|
38
|
+
# associated with a value or determines the lower key boundary for the
|
39
|
+
# following child node.
|
40
|
+
class BigTreeNode < PEROBS::Object
|
41
|
+
|
42
|
+
attr_persist :tree, :parent, :keys, :values, :children,
|
43
|
+
:prev_sibling, :next_sibling
|
44
|
+
|
45
|
+
# Internal constructor. Use Store.new(BigTreeNode, ...) instead.
|
46
|
+
# @param p [Handle]
|
47
|
+
# @param tree [BigTree] The tree this node should belong to
|
48
|
+
# @param is_leaf [Boolean] True if a leaf node should be created, false
|
49
|
+
# for a branch node.
|
50
|
+
# @param parent [BigTreeNode] Parent node
|
51
|
+
# @param prev_sibling [BigTreeNode] Previous sibling
|
52
|
+
# @param next_sibling [BigTreeNode] Next sibling
|
53
|
+
def initialize(p, tree, is_leaf, parent = nil, prev_sibling = nil,
|
54
|
+
next_sibling = nil)
|
55
|
+
super(p)
|
56
|
+
self.tree = tree
|
57
|
+
self.parent = parent
|
58
|
+
self.keys = @store.new(PEROBS::Array)
|
59
|
+
|
60
|
+
if is_leaf
|
61
|
+
# Create a new leaf node. It stores values and has no children.
|
62
|
+
self.values = @store.new(PEROBS::Array)
|
63
|
+
self.children = nil
|
64
|
+
else
|
65
|
+
# Create a new tree node. It doesn't store values and can have child
|
66
|
+
# nodes.
|
67
|
+
self.children = @store.new(PEROBS::Array)
|
68
|
+
self.values = nil
|
69
|
+
end
|
70
|
+
# Link the neighboring siblings to the newly inserted node. If the node
|
71
|
+
# is a leaf node and has no sibling on a side we also must register it
|
72
|
+
# as first or last leaf with the BigTree object.
|
73
|
+
if (self.prev_sibling = prev_sibling)
|
74
|
+
@prev_sibling.next_sibling = myself
|
75
|
+
elsif is_leaf?
|
76
|
+
@tree.first_leaf = myself
|
77
|
+
end
|
78
|
+
if (self.next_sibling = next_sibling)
|
79
|
+
@next_sibling.prev_sibling = myself
|
80
|
+
elsif is_leaf?
|
81
|
+
@tree.last_leaf = myself
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
# @return [Boolean] True if this is a leaf node, false otherwise.
|
86
|
+
def is_leaf?
|
87
|
+
@children.nil?
|
88
|
+
end
|
89
|
+
|
90
|
+
# Insert or replace the given value by using the key as unique address.
|
91
|
+
# @param key [Integer] Unique key to retrieve the value
|
92
|
+
# @param value [Integer] value to insert
|
93
|
+
def insert(key, value)
|
94
|
+
node = myself
|
95
|
+
|
96
|
+
# Traverse the tree to find the right node to add or replace the value.
|
97
|
+
while node do
|
98
|
+
# All nodes that we find on the way that are full will be split into
|
99
|
+
# two half-full nodes.
|
100
|
+
if node.keys.size >= @tree.node_size
|
101
|
+
node = node.split_node
|
102
|
+
end
|
103
|
+
|
104
|
+
# Once we have reached a leaf node we can insert or replace the value.
|
105
|
+
if node.is_leaf?
|
106
|
+
return node.insert_element(key, value)
|
107
|
+
else
|
108
|
+
# Descend into the right child node to add the value to.
|
109
|
+
node = node.children[node.search_key_index(key)]
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
PEROBS.log.fatal "Could not find proper node to insert into"
|
114
|
+
end
|
115
|
+
|
116
|
+
# Return the value that matches the given key or return nil if they key is
|
117
|
+
# unknown.
|
118
|
+
# @param key [Integer] key to search for
|
119
|
+
# @return [Integer or nil] value that matches the key
|
120
|
+
def get(key)
|
121
|
+
node = self
|
122
|
+
|
123
|
+
while node do
|
124
|
+
# Find index of the entry that best fits the key.
|
125
|
+
i = node.search_key_index(key)
|
126
|
+
if node.is_leaf?
|
127
|
+
# This is a leaf node. Check if there is an exact match for the
|
128
|
+
# given key and return the corresponding value or nil.
|
129
|
+
return node.keys[i] == key ? node.values[i] : nil
|
130
|
+
end
|
131
|
+
|
132
|
+
# Descend into the right child node to continue the search.
|
133
|
+
node = node.children[i]
|
134
|
+
end
|
135
|
+
|
136
|
+
PEROBS.log.fatal "Could not find proper node to get from while " +
|
137
|
+
"looking for key #{key}"
|
138
|
+
end
|
139
|
+
|
140
|
+
# Return the node chain from the root to the leaf node storing the
|
141
|
+
# key/value pair.
|
142
|
+
# @param key [Integer] key to search for
|
143
|
+
# @return [Array of BigTreeNode] node list (may be empty)
|
144
|
+
def node_chain(key)
|
145
|
+
node = myself
|
146
|
+
list = [ node ]
|
147
|
+
|
148
|
+
while node do
|
149
|
+
# Find index of the entry that best fits the key.
|
150
|
+
i = node.search_key_index(key)
|
151
|
+
if node.is_leaf?
|
152
|
+
# This is a leaf node. Check if there is an exact match for the
|
153
|
+
# given key and return the corresponding value or nil.
|
154
|
+
return node.keys[i] == key ? list : []
|
155
|
+
end
|
156
|
+
|
157
|
+
# Add current node to chain.
|
158
|
+
list << node
|
159
|
+
# Descend into the right child node to continue the search.
|
160
|
+
node = node.children[i]
|
161
|
+
end
|
162
|
+
|
163
|
+
PEROBS.log.fatal "Could not find node chain for key #{key}"
|
164
|
+
end
|
165
|
+
|
166
|
+
# Return if given key is stored in the node.
|
167
|
+
# @param key [Integer] key to search for
|
168
|
+
# @return [Boolean] True if key was found, false otherwise
|
169
|
+
def has_key?(key)
|
170
|
+
node = self
|
171
|
+
|
172
|
+
while node do
|
173
|
+
# Find index of the entry that best fits the key.
|
174
|
+
i = node.search_key_index(key)
|
175
|
+
if node.is_leaf?
|
176
|
+
# This is a leaf node. Check if there is an exact match for the
|
177
|
+
# given key and return the corresponding value or nil.
|
178
|
+
return node.keys[i] == key
|
179
|
+
end
|
180
|
+
|
181
|
+
# Descend into the right child node to continue the search.
|
182
|
+
node = node.children[i]
|
183
|
+
end
|
184
|
+
|
185
|
+
PEROBS.log.fatal "Could not find proper node to get from while " +
|
186
|
+
"looking for key #{key}"
|
187
|
+
end
|
188
|
+
|
189
|
+
# Return the value that matches the given key and remove the value from
|
190
|
+
# the tree. Return nil if the key is unknown.
|
191
|
+
# @param key [Integer] key to search for
|
192
|
+
# @return [Object] value that matches the key
|
193
|
+
def remove(key)
|
194
|
+
node = self
|
195
|
+
|
196
|
+
while node do
|
197
|
+
# Find index of the entry that best fits the key.
|
198
|
+
i = node.search_key_index(key)
|
199
|
+
if node.is_leaf?
|
200
|
+
# This is a leaf node. Check if there is an exact match for the
|
201
|
+
# given key and return the corresponding value or nil.
|
202
|
+
if node.keys[i] == key
|
203
|
+
@tree.entry_counter -= 1
|
204
|
+
return node.remove_element(i)
|
205
|
+
else
|
206
|
+
return nil
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
# Descend into the right child node to continue the search.
|
211
|
+
node = node.children[i]
|
212
|
+
end
|
213
|
+
|
214
|
+
PEROBS.log.fatal 'Could not find proper node to remove from'
|
215
|
+
end
|
216
|
+
|
217
|
+
# Iterate over all the key/value pairs in this node and all sub-nodes.
|
218
|
+
# @yield [key, value]
|
219
|
+
def each
|
220
|
+
traverse do |node, position, stack|
|
221
|
+
if node.is_leaf? && position < node.keys.size
|
222
|
+
yield(node.keys[position], node.values[position])
|
223
|
+
end
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
# Iterate over all the key/value pairs of the node.
|
228
|
+
# @yield [key, value]
|
229
|
+
def each_element
|
230
|
+
return self unless is_leaf?
|
231
|
+
|
232
|
+
0.upto(@keys.length - 1) do |i|
|
233
|
+
yield(@keys[i], @values[i])
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
# Iterate over all the key/value pairs of the node in reverse order.
|
238
|
+
# @yield [key, value]
|
239
|
+
def reverse_each_element
|
240
|
+
return self unless is_leaf?
|
241
|
+
|
242
|
+
(@keys.length - 1).downto(0) do |i|
|
243
|
+
yield(@keys[i], @values[i])
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
# Check consistency of the node and all subsequent nodes. In case an error
|
248
|
+
# is found, a message is logged and false is returned.
|
249
|
+
# @yield [key, value]
|
250
|
+
# @return [Boolean] true if tree has no errors
|
251
|
+
def check
|
252
|
+
branch_depth = nil
|
253
|
+
|
254
|
+
traverse do |node, position, stack|
|
255
|
+
if position == 0
|
256
|
+
if node.parent
|
257
|
+
# After a split the nodes will only have half the maximum keys.
|
258
|
+
# For branch nodes one of the split nodes will have even 1 key
|
259
|
+
# less as this will become the branch key in a parent node.
|
260
|
+
if node.keys.size < min_keys - (node.is_leaf? ? 0 : 1)
|
261
|
+
node.error "BigTree node #{node._id} has too few keys"
|
262
|
+
return false
|
263
|
+
end
|
264
|
+
end
|
265
|
+
|
266
|
+
if node.keys.size > @tree.node_size
|
267
|
+
node.error "BigTree node must not have more then " +
|
268
|
+
"#{@tree.node_size} keys, but has #{node.keys.size} keys"
|
269
|
+
return false
|
270
|
+
end
|
271
|
+
|
272
|
+
last_key = nil
|
273
|
+
node.keys.each do |key|
|
274
|
+
if last_key && key < last_key
|
275
|
+
node.error "Keys are not increasing monotoneously: " +
|
276
|
+
"#{node.keys.inspect}"
|
277
|
+
return false
|
278
|
+
end
|
279
|
+
last_key = key
|
280
|
+
end
|
281
|
+
|
282
|
+
if node.is_leaf?
|
283
|
+
if branch_depth
|
284
|
+
unless branch_depth == stack.size
|
285
|
+
node.error "All leaf nodes must have same distance from root"
|
286
|
+
return false
|
287
|
+
end
|
288
|
+
else
|
289
|
+
branch_depth = stack.size
|
290
|
+
end
|
291
|
+
if node.prev_sibling.nil?
|
292
|
+
if @tree.first_leaf != node
|
293
|
+
node.error "Leaf node #{node._id} has no previous sibling " +
|
294
|
+
"but is not the first leaf of the tree"
|
295
|
+
return false
|
296
|
+
end
|
297
|
+
elsif node.prev_sibling.next_sibling != node
|
298
|
+
node.error "next_sibling of previous sibling does not point to " +
|
299
|
+
"this node"
|
300
|
+
return false
|
301
|
+
end
|
302
|
+
if node.next_sibling.nil?
|
303
|
+
if @tree.last_leaf != node
|
304
|
+
node.error "Leaf node #{node._id} has no next sibling " +
|
305
|
+
"but is not the last leaf of the tree"
|
306
|
+
return false
|
307
|
+
end
|
308
|
+
elsif node.next_sibling.prev_sibling != node
|
309
|
+
node.error "previous_sibling of next sibling does not point to " +
|
310
|
+
"this node"
|
311
|
+
return false
|
312
|
+
end
|
313
|
+
unless node.keys.size == node.values.size
|
314
|
+
node.error "Key count (#{node.keys.size}) and value " +
|
315
|
+
"count (#{node.values.size}) don't match"
|
316
|
+
return false
|
317
|
+
end
|
318
|
+
if node.children
|
319
|
+
node.error "children must be nil for a leaf node"
|
320
|
+
return false
|
321
|
+
end
|
322
|
+
else
|
323
|
+
if node.values
|
324
|
+
node.error "values must be nil for a branch node"
|
325
|
+
return false
|
326
|
+
end
|
327
|
+
unless node.children.size == node.keys.size + 1
|
328
|
+
node.error "Key count (#{node.keys.size}) must be one " +
|
329
|
+
"less than children count (#{node.children.size})"
|
330
|
+
return false
|
331
|
+
end
|
332
|
+
node.children.each_with_index do |child, i|
|
333
|
+
unless child.is_a?(BigTreeNode)
|
334
|
+
node.error "Child #{i} is of class #{child.class} " +
|
335
|
+
"instead of BigTreeNode"
|
336
|
+
return false
|
337
|
+
end
|
338
|
+
unless child.parent.is_a?(BigTreeNode)
|
339
|
+
node.error "Parent reference of child #{i} is of class " +
|
340
|
+
"#{child.class} instead of BigTreeNode"
|
341
|
+
return false
|
342
|
+
end
|
343
|
+
if child == node
|
344
|
+
node.error "Child #{i} point to self"
|
345
|
+
return false
|
346
|
+
end
|
347
|
+
if stack.include?(child)
|
348
|
+
node.error "Child #{i} points to ancester node"
|
349
|
+
return false
|
350
|
+
end
|
351
|
+
unless child.parent == node
|
352
|
+
node.error "Child #{i} does not have parent pointing " +
|
353
|
+
"to this node"
|
354
|
+
return false
|
355
|
+
end
|
356
|
+
if i > 0
|
357
|
+
unless node.children[i - 1].next_sibling == child
|
358
|
+
node.error "next_sibling of node " +
|
359
|
+
"#{node.children[i - 1]._id} " +
|
360
|
+
"must point to node #{child._id}"
|
361
|
+
return false
|
362
|
+
end
|
363
|
+
end
|
364
|
+
if i < node.children.length - 1
|
365
|
+
unless child == node.children[i + 1].prev_sibling
|
366
|
+
node.error "prev_sibling of node " +
|
367
|
+
"#{node.children[i + 1]._id} " +
|
368
|
+
"must point to node #{child._id}"
|
369
|
+
return false
|
370
|
+
end
|
371
|
+
end
|
372
|
+
end
|
373
|
+
end
|
374
|
+
elsif position <= node.keys.size
|
375
|
+
# These checks are done after we have completed the respective child
|
376
|
+
# node with index 'position - 1'.
|
377
|
+
index = position - 1
|
378
|
+
if node.is_leaf?
|
379
|
+
if block_given?
|
380
|
+
# If a block was given, call this block with the key and value.
|
381
|
+
return false unless yield(node.keys[index], node.values[index])
|
382
|
+
end
|
383
|
+
else
|
384
|
+
unless node.children[index].keys.last < node.keys[index]
|
385
|
+
node.error "Child #{node.children[index]._id} " +
|
386
|
+
"has too large key #{node.children[index].keys.last}. " +
|
387
|
+
"Must be smaller than #{node.keys[index]}."
|
388
|
+
return false
|
389
|
+
end
|
390
|
+
unless node.children[position].keys.first >= node.keys[index]
|
391
|
+
node.error "Child #{node.children[position]._id} " +
|
392
|
+
"has too small key #{node.children[position].keys.first}. " +
|
393
|
+
"Must be larger than or equal to #{node.keys[index]}."
|
394
|
+
return false
|
395
|
+
end
|
396
|
+
end
|
397
|
+
end
|
398
|
+
end
|
399
|
+
|
400
|
+
true
|
401
|
+
end
|
402
|
+
|
403
|
+
# @return [String] Human reable form of the sub-tree.
|
404
|
+
def to_s
|
405
|
+
str = ''
|
406
|
+
|
407
|
+
traverse do |node, position, stack|
|
408
|
+
if position == 0
|
409
|
+
begin
|
410
|
+
str += "#{node.parent ? node.parent.tree_prefix + ' +' : 'o'}" +
|
411
|
+
"#{node.tree_branch_mark}-" +
|
412
|
+
"#{node.keys.first.nil? ? '--' : 'v-'}#{node.tree_summary}\n"
|
413
|
+
rescue => e
|
414
|
+
str += "@@@@@@@@@@: #{e.message}\n"
|
415
|
+
end
|
416
|
+
else
|
417
|
+
begin
|
418
|
+
if node.is_leaf?
|
419
|
+
if node.keys[position - 1]
|
420
|
+
str += "#{node.tree_prefix} |" +
|
421
|
+
"[#{node.keys[position - 1]}, " +
|
422
|
+
"#{node.values[position - 1]}]\n"
|
423
|
+
end
|
424
|
+
else
|
425
|
+
if node.keys[position - 1]
|
426
|
+
str += "#{node.tree_prefix} #{node.keys[position - 1]}\n"
|
427
|
+
end
|
428
|
+
end
|
429
|
+
rescue => e
|
430
|
+
str += "@@@@@@@@@@: #{e.message}\n"
|
431
|
+
end
|
432
|
+
end
|
433
|
+
end
|
434
|
+
|
435
|
+
str
|
436
|
+
end
|
437
|
+
|
438
|
+
# Split the current node into two nodes. The upper half of the elements
|
439
|
+
# will be moved into a newly created node. This node will retain the lower
|
440
|
+
# half.
|
441
|
+
# @return [BigTreeNode] common parent of the two nodes
|
442
|
+
def split_node
|
443
|
+
unless @parent
|
444
|
+
# The node is the root node. We need to create a parent node first.
|
445
|
+
self.parent = @store.new(BigTreeNode, @tree, false)
|
446
|
+
@parent.children[0] = myself
|
447
|
+
@tree.root = @parent
|
448
|
+
end
|
449
|
+
|
450
|
+
# Create the new sibling that will take the 2nd half of the
|
451
|
+
# node content.
|
452
|
+
sibling = @store.new(BigTreeNode, @tree, is_leaf?, @parent, myself,
|
453
|
+
@next_sibling)
|
454
|
+
# Determine the index of the middle element that gets moved to the
|
455
|
+
# parent. The node size must be an uneven number.
|
456
|
+
mid = @keys.size / 2
|
457
|
+
# Insert the middle element key into the parent node
|
458
|
+
@parent.insert_element(@keys[mid], sibling)
|
459
|
+
if is_leaf?
|
460
|
+
# Copy the keys and values from the mid element onwards into the new
|
461
|
+
# sibling node.
|
462
|
+
sibling.keys += @keys[mid..-1]
|
463
|
+
sibling.values += @values[mid..-1]
|
464
|
+
# Delete the copied keys and values from this node.
|
465
|
+
@values.slice!(mid..-1)
|
466
|
+
else
|
467
|
+
# Copy the keys from after the mid value onwards to the new sibling
|
468
|
+
# node.
|
469
|
+
sibling.keys += @keys[mid + 1..-1]
|
470
|
+
# Same for the children.
|
471
|
+
sibling.children += @children[mid + 1..-1]
|
472
|
+
# Reparent the children to the new sibling parent.
|
473
|
+
sibling.children.each { |c| c.parent = sibling }
|
474
|
+
# And delete the copied children references.
|
475
|
+
@children.slice!(mid + 1..-1)
|
476
|
+
end
|
477
|
+
# Delete the copied keys from this node.
|
478
|
+
@keys.slice!(mid..-1)
|
479
|
+
|
480
|
+
@parent
|
481
|
+
end
|
482
|
+
|
483
|
+
# Insert the given value or child into the current node using the key as
|
484
|
+
# index.
|
485
|
+
# @param key [Integer] key to address the value or child
|
486
|
+
# @param child_or_value [Integer or BigTreeNode] value or BigTreeNode
|
487
|
+
# @return [Boolean] true if new element, false if override existing
|
488
|
+
# element
|
489
|
+
def insert_element(key, child_or_value)
|
490
|
+
if @keys.size >= @tree.node_size
|
491
|
+
PEROBS.log.fatal "Cannot insert into a full BigTreeNode: #{@keys.size}"
|
492
|
+
end
|
493
|
+
|
494
|
+
i = search_key_index(key)
|
495
|
+
if @keys[i] == key
|
496
|
+
# Overwrite existing entries
|
497
|
+
@keys[i] = key
|
498
|
+
if is_leaf?
|
499
|
+
@values[i] = child_or_value
|
500
|
+
else
|
501
|
+
@children[i + 1] = child_or_value
|
502
|
+
end
|
503
|
+
else
|
504
|
+
# Create a new entry
|
505
|
+
@keys.insert(i, key)
|
506
|
+
if is_leaf?
|
507
|
+
@values.insert(i, child_or_value)
|
508
|
+
@tree.entry_counter += 1
|
509
|
+
else
|
510
|
+
@children.insert(i + 1, child_or_value)
|
511
|
+
end
|
512
|
+
end
|
513
|
+
end
|
514
|
+
|
515
|
+
# Remove the element from a leaf node at the given index.
|
516
|
+
# @param index [Integer] The index of the entry to be removed
|
517
|
+
# @return [Object] The removed value
|
518
|
+
def remove_element(index)
|
519
|
+
# Delete the key at the specified index.
|
520
|
+
unless (key = @keys.delete_at(index))
|
521
|
+
PEROBS.log.fatal "Could not remove element #{index} from BigTreeNode " +
|
522
|
+
"@#{@_id}"
|
523
|
+
end
|
524
|
+
update_branch_key(key) if index == 0
|
525
|
+
|
526
|
+
# Delete the corresponding value.
|
527
|
+
removed_value = @values.delete_at(index)
|
528
|
+
if @keys.length < min_keys
|
529
|
+
if @prev_sibling && @prev_sibling.parent == @parent
|
530
|
+
borrow_from_previous_sibling(@prev_sibling) ||
|
531
|
+
@prev_sibling.merge_with_leaf_node(myself)
|
532
|
+
elsif @next_sibling && @next_sibling.parent == @parent
|
533
|
+
borrow_from_next_sibling(@next_sibling) ||
|
534
|
+
merge_with_leaf_node(@next_sibling)
|
535
|
+
elsif @parent
|
536
|
+
PEROBS.log.fatal "Cannot not find adjecent leaf siblings"
|
537
|
+
end
|
538
|
+
end
|
539
|
+
|
540
|
+
# The merge has potentially invalidated this node. After this method has
|
541
|
+
# been called this copy of the node should no longer be used.
|
542
|
+
removed_value
|
543
|
+
end
|
544
|
+
|
545
|
+
# Remove the specified node from this branch node.
|
546
|
+
# @param node [BigTreeNode] The child to remove
|
547
|
+
def remove_child(node)
|
548
|
+
unless (index = search_node_index(node))
|
549
|
+
PEROBS.log.fatal "Cannot remove child #{node._id} from node #{@_id}"
|
550
|
+
end
|
551
|
+
|
552
|
+
if index == 0
|
553
|
+
# Removing the first child is a bit more complicated as the
|
554
|
+
# corresponding branch key is in a parent node.
|
555
|
+
key = @keys.shift
|
556
|
+
update_branch_key(key)
|
557
|
+
else
|
558
|
+
# For all other children we can just remove the corresponding key.
|
559
|
+
@keys.delete_at(index - 1)
|
560
|
+
end
|
561
|
+
|
562
|
+
# Remove the child node link.
|
563
|
+
child = @children.delete_at(index)
|
564
|
+
# If we remove the first or last leaf node we must update the reference
|
565
|
+
# in the BigTree object.
|
566
|
+
@tree.first_leaf = child.next_sibling if child == @tree.first_leaf
|
567
|
+
@tree.last_leaf = child.prev_sibling if child == @tree.last_leaf
|
568
|
+
# Unlink the neighbouring siblings from the child
|
569
|
+
child.prev_sibling.next_sibling = child.next_sibling if child.prev_sibling
|
570
|
+
child.next_sibling.prev_sibling = child.prev_sibling if child.next_sibling
|
571
|
+
|
572
|
+
if @keys.length < min_keys
|
573
|
+
# The node has become too small. Try borrowing a node from an adjecent
|
574
|
+
# sibling or merge with an adjecent node.
|
575
|
+
if @prev_sibling && @prev_sibling.parent == @parent
|
576
|
+
borrow_from_previous_sibling(@prev_sibling) ||
|
577
|
+
@prev_sibling.merge_with_branch_node(myself)
|
578
|
+
elsif @next_sibling && @next_sibling.parent == @parent
|
579
|
+
borrow_from_next_sibling(@next_sibling) ||
|
580
|
+
merge_with_branch_node(@next_sibling)
|
581
|
+
end
|
582
|
+
end
|
583
|
+
|
584
|
+
if @parent.nil? && @children.length <= 1
|
585
|
+
# If the node just below the root only has one child it will become
|
586
|
+
# the new root node.
|
587
|
+
new_root = @children.first
|
588
|
+
new_root.parent = nil
|
589
|
+
@tree.root = new_root
|
590
|
+
end
|
591
|
+
end
|
592
|
+
|
593
|
+
def merge_with_leaf_node(node)
|
594
|
+
if @keys.length + node.keys.length > @tree.node_size
|
595
|
+
PEROBS.log.fatal "Leaf nodes are too big to merge"
|
596
|
+
end
|
597
|
+
|
598
|
+
self.keys += node.keys
|
599
|
+
self.values += node.values
|
600
|
+
|
601
|
+
node.parent.remove_child(node)
|
602
|
+
end
|
603
|
+
|
604
|
+
def merge_with_branch_node(node)
|
605
|
+
if @keys.length + 1 + node.keys.length > @tree.node_size
|
606
|
+
PEROBS.log.fatal "Branch nodes are too big to merge"
|
607
|
+
end
|
608
|
+
|
609
|
+
index = @parent.search_node_index(node) - 1
|
610
|
+
self.keys << @parent.keys[index]
|
611
|
+
self.keys += node.keys
|
612
|
+
node.children.each { |c| c.parent = myself }
|
613
|
+
self.children += node.children
|
614
|
+
|
615
|
+
node.parent.remove_child(node)
|
616
|
+
end
|
617
|
+
|
618
|
+
# Search the keys of the node that fits the given key. The result is
|
619
|
+
# either the index of an exact match or the index of the position where
|
620
|
+
# the given key would have to be inserted.
|
621
|
+
# @param key [Integer] key to search for
|
622
|
+
# @return [Integer] Index of the matching key or the insert position.
|
623
|
+
def search_key_index(key)
|
624
|
+
# Handle special case for empty keys list.
|
625
|
+
return 0 if @keys.empty?
|
626
|
+
|
627
|
+
# Keys are unique and always sorted. Use a binary search to find the
|
628
|
+
# index that fits the given key.
|
629
|
+
li = pi = 0
|
630
|
+
ui = @keys.size - 1
|
631
|
+
while li <= ui
|
632
|
+
# The pivot element is always in the middle between the lower and upper
|
633
|
+
# index.
|
634
|
+
pi = li + (ui - li) / 2
|
635
|
+
|
636
|
+
if key < @keys[pi]
|
637
|
+
# The pivot element is smaller than the key. Set the upper index to
|
638
|
+
# the pivot index.
|
639
|
+
ui = pi - 1
|
640
|
+
elsif key > @keys[pi]
|
641
|
+
# The pivot element is larger than the key. Set the lower index to
|
642
|
+
# the pivot index.
|
643
|
+
li = pi + 1
|
644
|
+
else
|
645
|
+
# We've found an exact match. For leaf nodes return the found index.
|
646
|
+
# For branch nodes we have to add one to the index since the larger
|
647
|
+
# child is the right one.
|
648
|
+
return is_leaf? ? pi : pi + 1
|
649
|
+
end
|
650
|
+
end
|
651
|
+
# No exact match was found. For the insert operaton we need to return
|
652
|
+
# the index of the first key that is larger than the given key.
|
653
|
+
@keys[pi] < key ? pi + 1 : pi
|
654
|
+
end
|
655
|
+
|
656
|
+
def search_node_index(node)
|
657
|
+
index = search_key_index(node.keys.first)
|
658
|
+
unless @children[index] == node
|
659
|
+
raise RuntimeError, "Child at index #{index} is not the requested node"
|
660
|
+
end
|
661
|
+
|
662
|
+
index
|
663
|
+
end
|
664
|
+
|
665
|
+
# This is a generic tree iterator. It yields before it descends into the
|
666
|
+
# child node and after (which is identical to before the next child
|
667
|
+
# descend). It yields the node, the position and the stack of parent
|
668
|
+
# nodes.
|
669
|
+
# @yield [node, position, stack]
|
670
|
+
def traverse
|
671
|
+
# We use a non-recursive implementation to traverse the tree. This stack
|
672
|
+
# keeps track of all the known still to be checked nodes.
|
673
|
+
stack = [ [ self, 0 ] ]
|
674
|
+
|
675
|
+
while !stack.empty?
|
676
|
+
node, position = stack.pop
|
677
|
+
|
678
|
+
# Call the payload method. The position marks where we are in the node
|
679
|
+
# with respect to the traversal. 0 means we've just entered the node
|
680
|
+
# for the first time and are about to descent to the first child.
|
681
|
+
# Position 1 is after the 1st child has been processed and before the
|
682
|
+
# 2nd child is being processed. If we have N children, the last
|
683
|
+
# position is N after we have processed the last child and are about
|
684
|
+
# to return to the parent node.
|
685
|
+
yield(node, position, stack)
|
686
|
+
|
687
|
+
if position <= node.keys.size
|
688
|
+
# Push the next position for this node onto the stack.
|
689
|
+
stack.push([ node, position + 1 ])
|
690
|
+
|
691
|
+
if !node.is_leaf? && node.children[position]
|
692
|
+
# If we have a child node for this position, push the linked node
|
693
|
+
# and the starting position onto the stack.
|
694
|
+
stack.push([ node.children[position], 0 ])
|
695
|
+
end
|
696
|
+
end
|
697
|
+
end
|
698
|
+
end
|
699
|
+
|
700
|
+
# Gather some statistics about the node and all sub nodes.
|
701
|
+
# @param stats [Stats] Data structure that stores the gathered data
|
702
|
+
def statistics(stats)
|
703
|
+
traverse do |node, position, stack|
|
704
|
+
if position == 0
|
705
|
+
if node.is_leaf?
|
706
|
+
stats.leaf_nodes += 1
|
707
|
+
depth = stack.size + 1
|
708
|
+
if stats.min_depth.nil? || stats.min_depth < depth
|
709
|
+
stats.min_depth = depth
|
710
|
+
end
|
711
|
+
if stats.max_depth.nil? || stats.max_depth > depth
|
712
|
+
stats.max_depth = depth
|
713
|
+
end
|
714
|
+
else
|
715
|
+
stats.branch_nodes += 1
|
716
|
+
end
|
717
|
+
end
|
718
|
+
end
|
719
|
+
end
|
720
|
+
|
721
|
+
# Return the decoration that marks the tree structure of this node for the
|
722
|
+
# inspection method.
|
723
|
+
def tree_prefix
|
724
|
+
node = self
|
725
|
+
str = ''
|
726
|
+
|
727
|
+
while node
|
728
|
+
is_last_child = false
|
729
|
+
if node.parent
|
730
|
+
is_last_child = node.parent.children.last == node
|
731
|
+
else
|
732
|
+
# Don't add lines for the top-level.
|
733
|
+
break
|
734
|
+
end
|
735
|
+
|
736
|
+
str = (is_last_child ? ' ' : ' |') + str
|
737
|
+
node = node.parent
|
738
|
+
end
|
739
|
+
|
740
|
+
str
|
741
|
+
end
|
742
|
+
|
743
|
+
# Branch node decoration for the inspection method.
|
744
|
+
def tree_branch_mark
|
745
|
+
return '' unless @parent
|
746
|
+
'-'
|
747
|
+
end
|
748
|
+
|
749
|
+
# Text for the node line for the inspection method.
|
750
|
+
def tree_summary
|
751
|
+
s = " @#{@_id}"
|
752
|
+
if @parent
|
753
|
+
begin
|
754
|
+
s += " ^#{@parent._id}"
|
755
|
+
rescue
|
756
|
+
s += ' ^@'
|
757
|
+
end
|
758
|
+
end
|
759
|
+
if @prev_sibling
|
760
|
+
begin
|
761
|
+
s += " <#{@prev_sibling._id}"
|
762
|
+
rescue
|
763
|
+
s += ' <@'
|
764
|
+
end
|
765
|
+
end
|
766
|
+
if @next_sibling
|
767
|
+
begin
|
768
|
+
s += " >#{@next_sibling._id}"
|
769
|
+
rescue
|
770
|
+
s += ' >@'
|
771
|
+
end
|
772
|
+
end
|
773
|
+
|
774
|
+
s
|
775
|
+
end
|
776
|
+
|
777
|
+
# Print and log an error message for the node.
|
778
|
+
def error(msg)
|
779
|
+
msg = "Error in BigTree node @#{@_id}: #{msg}\n" + @tree.to_s
|
780
|
+
$stderr.puts msg
|
781
|
+
PEROBS.log.error msg
|
782
|
+
end
|
783
|
+
|
784
|
+
private
|
785
|
+
|
786
|
+
def min_keys
|
787
|
+
@tree.node_size / 2
|
788
|
+
end
|
789
|
+
|
790
|
+
# Try to borrow an element from the preceding sibling.
|
791
|
+
# @return [True or False] True if an element was borrowed, false
|
792
|
+
# otherwise.
|
793
|
+
def borrow_from_previous_sibling(prev_node)
|
794
|
+
if prev_node.keys.length - 1 > min_keys
|
795
|
+
index = @parent.search_node_index(self) - 1
|
796
|
+
|
797
|
+
if is_leaf?
|
798
|
+
# Move the last key of the previous node to the front of this node
|
799
|
+
@keys.unshift(prev_node.keys.pop)
|
800
|
+
# Register the new lead key of this node with its parent
|
801
|
+
@parent.keys[index] = @keys.first
|
802
|
+
# Move the last value of the previous node to the front of this node
|
803
|
+
@values.unshift(prev_node.values.pop)
|
804
|
+
else
|
805
|
+
# For branch nodes the branch key will be the borrowed key.
|
806
|
+
@keys.unshift(@parent.keys[index])
|
807
|
+
# And the last key of the previous key will become the new branch
|
808
|
+
# key for this node.
|
809
|
+
@parent.keys[index] = prev_node.keys.pop
|
810
|
+
# Move the last child of the previous node to the front of this node
|
811
|
+
@children.unshift(node = prev_node.children.pop)
|
812
|
+
node.parent = myself
|
813
|
+
end
|
814
|
+
|
815
|
+
return true
|
816
|
+
end
|
817
|
+
|
818
|
+
false
|
819
|
+
end
|
820
|
+
|
821
|
+
# Try to borrow an element from the next sibling.
|
822
|
+
# @return [True or False] True if an element was borrowed, false
|
823
|
+
# otherwise.
|
824
|
+
def borrow_from_next_sibling(next_node)
|
825
|
+
if next_node.keys.length - 1 > min_keys
|
826
|
+
# The next sibling now has a new lead key that requires the branch key
|
827
|
+
# to be updated in the parent node.
|
828
|
+
index = next_node.parent.search_node_index(next_node) - 1
|
829
|
+
|
830
|
+
if is_leaf?
|
831
|
+
# Move the first key of the next node to the end of the this node
|
832
|
+
self.keys << next_node.keys.shift
|
833
|
+
# Register the new lead key of next_node with its parent
|
834
|
+
next_node.parent.keys[index] = next_node.keys.first
|
835
|
+
# Move the first value of the next node to the end of this node
|
836
|
+
self.values << next_node.values.shift
|
837
|
+
else
|
838
|
+
# For branch nodes we need to get the lead key from the parent of
|
839
|
+
# next_node.
|
840
|
+
self.keys << next_node.parent.keys[index]
|
841
|
+
# The old lead key of next_node becomes the branch key in the parent
|
842
|
+
# of next_node. And the keys of next_node are shifted.
|
843
|
+
next_node.parent.keys[index] = next_node.keys.shift
|
844
|
+
# Move the first child of the next node to the end of this node
|
845
|
+
self.children << (node = next_node.children.shift)
|
846
|
+
node.parent = myself
|
847
|
+
end
|
848
|
+
|
849
|
+
return true
|
850
|
+
end
|
851
|
+
|
852
|
+
false
|
853
|
+
end
|
854
|
+
|
855
|
+
def update_branch_key(old_key)
|
856
|
+
new_key = @keys.first
|
857
|
+
return unless (node = @parent)
|
858
|
+
|
859
|
+
while node
|
860
|
+
if (index = node.keys.index(old_key))
|
861
|
+
node.keys[index] = new_key
|
862
|
+
return
|
863
|
+
end
|
864
|
+
node = node.parent
|
865
|
+
end
|
866
|
+
|
867
|
+
# The smallest element has no branch key.
|
868
|
+
end
|
869
|
+
|
870
|
+
end
|
871
|
+
|
872
|
+
end
|
873
|
+
|