perobs 4.0.0 → 4.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/README.md +27 -16
- data/lib/perobs/Array.rb +66 -19
- data/lib/perobs/BTree.rb +106 -15
- data/lib/perobs/BTreeBlob.rb +4 -3
- data/lib/perobs/BTreeDB.rb +5 -4
- data/lib/perobs/BTreeNode.rb +482 -156
- data/lib/perobs/BTreeNodeLink.rb +10 -0
- data/lib/perobs/BigArray.rb +285 -0
- data/lib/perobs/BigArrayNode.rb +1002 -0
- data/lib/perobs/BigHash.rb +246 -0
- data/lib/perobs/BigTree.rb +197 -0
- data/lib/perobs/BigTreeNode.rb +873 -0
- data/lib/perobs/Cache.rb +48 -10
- data/lib/perobs/ConsoleProgressMeter.rb +61 -0
- data/lib/perobs/DataBase.rb +4 -3
- data/lib/perobs/DynamoDB.rb +57 -15
- data/lib/perobs/EquiBlobsFile.rb +155 -50
- data/lib/perobs/FNV_Hash_1a_64.rb +54 -0
- data/lib/perobs/FlatFile.rb +519 -227
- data/lib/perobs/FlatFileBlobHeader.rb +113 -54
- data/lib/perobs/FlatFileDB.rb +49 -23
- data/lib/perobs/FuzzyStringMatcher.rb +175 -0
- data/lib/perobs/Hash.rb +127 -33
- data/lib/perobs/IDList.rb +144 -0
- data/lib/perobs/IDListPage.rb +107 -0
- data/lib/perobs/IDListPageFile.rb +180 -0
- data/lib/perobs/IDListPageRecord.rb +142 -0
- data/lib/perobs/Object.rb +18 -15
- data/lib/perobs/ObjectBase.rb +46 -5
- data/lib/perobs/PersistentObjectCache.rb +57 -68
- data/lib/perobs/PersistentObjectCacheLine.rb +24 -12
- data/lib/perobs/ProgressMeter.rb +97 -0
- data/lib/perobs/SpaceManager.rb +273 -0
- data/lib/perobs/SpaceTree.rb +21 -12
- data/lib/perobs/SpaceTreeNode.rb +53 -61
- data/lib/perobs/Store.rb +264 -145
- data/lib/perobs/version.rb +1 -1
- data/lib/perobs.rb +2 -0
- data/perobs.gemspec +4 -4
- data/test/Array_spec.rb +15 -6
- data/test/BTree_spec.rb +6 -2
- data/test/BigArray_spec.rb +261 -0
- data/test/BigHash_spec.rb +152 -0
- data/test/BigTreeNode_spec.rb +153 -0
- data/test/BigTree_spec.rb +259 -0
- data/test/EquiBlobsFile_spec.rb +105 -1
- data/test/FNV_Hash_1a_64_spec.rb +59 -0
- data/test/FlatFileDB_spec.rb +198 -14
- data/test/FuzzyStringMatcher_spec.rb +261 -0
- data/test/Hash_spec.rb +13 -3
- data/test/IDList_spec.rb +77 -0
- data/test/LegacyDBs/LegacyDB.rb +155 -0
- data/test/LegacyDBs/version_3/class_map.json +1 -0
- data/test/LegacyDBs/version_3/config.json +1 -0
- data/test/LegacyDBs/version_3/database.blobs +0 -0
- data/test/LegacyDBs/version_3/database_spaces.blobs +0 -0
- data/test/LegacyDBs/version_3/index.blobs +0 -0
- data/test/LegacyDBs/version_3/version +1 -0
- data/test/LockFile_spec.rb +9 -6
- data/test/SpaceManager_spec.rb +176 -0
- data/test/SpaceTree_spec.rb +4 -1
- data/test/Store_spec.rb +305 -203
- data/test/spec_helper.rb +9 -4
- metadata +57 -16
- data/lib/perobs/BTreeNodeCache.rb +0 -109
- data/lib/perobs/TreeDB.rb +0 -277
@@ -0,0 +1,873 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# = BigTreeNode.rb -- Persistent Ruby Object Store
|
4
|
+
#
|
5
|
+
# Copyright (c) 2016, 2017 by Chris Schlaeger <chris@taskjuggler.org>
|
6
|
+
#
|
7
|
+
# MIT License
|
8
|
+
#
|
9
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
10
|
+
# a copy of this software and associated documentation files (the
|
11
|
+
# "Software"), to deal in the Software without restriction, including
|
12
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
13
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
14
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
15
|
+
# the following conditions:
|
16
|
+
#
|
17
|
+
# The above copyright notice and this permission notice shall be
|
18
|
+
# included in all copies or substantial portions of the Software.
|
19
|
+
#
|
20
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
21
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
22
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
23
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
24
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
25
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
26
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
27
|
+
|
28
|
+
require 'perobs/Object'
|
29
|
+
require 'perobs/Array'
|
30
|
+
|
31
|
+
module PEROBS
|
32
|
+
|
33
|
+
# The BigTreeNode class provides the BTree nodes for the BigTree objects.
|
34
|
+
# A node can either be a branch node or a leaf node. Branch nodes don't
|
35
|
+
# store values, only references to child nodes. Leaf nodes don't have child
|
36
|
+
# nodes but store the actual values. All nodes store a list of keys that are
|
37
|
+
# used to naviate the tree and find the values. A key is either directly
|
38
|
+
# associated with a value or determines the lower key boundary for the
|
39
|
+
# following child node.
|
40
|
+
class BigTreeNode < PEROBS::Object
|
41
|
+
|
42
|
+
attr_persist :tree, :parent, :keys, :values, :children,
|
43
|
+
:prev_sibling, :next_sibling
|
44
|
+
|
45
|
+
# Internal constructor. Use Store.new(BigTreeNode, ...) instead.
|
46
|
+
# @param p [Handle]
|
47
|
+
# @param tree [BigTree] The tree this node should belong to
|
48
|
+
# @param is_leaf [Boolean] True if a leaf node should be created, false
|
49
|
+
# for a branch node.
|
50
|
+
# @param parent [BigTreeNode] Parent node
|
51
|
+
# @param prev_sibling [BigTreeNode] Previous sibling
|
52
|
+
# @param next_sibling [BigTreeNode] Next sibling
|
53
|
+
def initialize(p, tree, is_leaf, parent = nil, prev_sibling = nil,
|
54
|
+
next_sibling = nil)
|
55
|
+
super(p)
|
56
|
+
self.tree = tree
|
57
|
+
self.parent = parent
|
58
|
+
self.keys = @store.new(PEROBS::Array)
|
59
|
+
|
60
|
+
if is_leaf
|
61
|
+
# Create a new leaf node. It stores values and has no children.
|
62
|
+
self.values = @store.new(PEROBS::Array)
|
63
|
+
self.children = nil
|
64
|
+
else
|
65
|
+
# Create a new tree node. It doesn't store values and can have child
|
66
|
+
# nodes.
|
67
|
+
self.children = @store.new(PEROBS::Array)
|
68
|
+
self.values = nil
|
69
|
+
end
|
70
|
+
# Link the neighboring siblings to the newly inserted node. If the node
|
71
|
+
# is a leaf node and has no sibling on a side we also must register it
|
72
|
+
# as first or last leaf with the BigTree object.
|
73
|
+
if (self.prev_sibling = prev_sibling)
|
74
|
+
@prev_sibling.next_sibling = myself
|
75
|
+
elsif is_leaf?
|
76
|
+
@tree.first_leaf = myself
|
77
|
+
end
|
78
|
+
if (self.next_sibling = next_sibling)
|
79
|
+
@next_sibling.prev_sibling = myself
|
80
|
+
elsif is_leaf?
|
81
|
+
@tree.last_leaf = myself
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
# @return [Boolean] True if this is a leaf node, false otherwise.
|
86
|
+
def is_leaf?
|
87
|
+
@children.nil?
|
88
|
+
end
|
89
|
+
|
90
|
+
# Insert or replace the given value by using the key as unique address.
|
91
|
+
# @param key [Integer] Unique key to retrieve the value
|
92
|
+
# @param value [Integer] value to insert
|
93
|
+
def insert(key, value)
|
94
|
+
node = myself
|
95
|
+
|
96
|
+
# Traverse the tree to find the right node to add or replace the value.
|
97
|
+
while node do
|
98
|
+
# All nodes that we find on the way that are full will be split into
|
99
|
+
# two half-full nodes.
|
100
|
+
if node.keys.size >= @tree.node_size
|
101
|
+
node = node.split_node
|
102
|
+
end
|
103
|
+
|
104
|
+
# Once we have reached a leaf node we can insert or replace the value.
|
105
|
+
if node.is_leaf?
|
106
|
+
return node.insert_element(key, value)
|
107
|
+
else
|
108
|
+
# Descend into the right child node to add the value to.
|
109
|
+
node = node.children[node.search_key_index(key)]
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
PEROBS.log.fatal "Could not find proper node to insert into"
|
114
|
+
end
|
115
|
+
|
116
|
+
# Return the value that matches the given key or return nil if they key is
|
117
|
+
# unknown.
|
118
|
+
# @param key [Integer] key to search for
|
119
|
+
# @return [Integer or nil] value that matches the key
|
120
|
+
def get(key)
|
121
|
+
node = self
|
122
|
+
|
123
|
+
while node do
|
124
|
+
# Find index of the entry that best fits the key.
|
125
|
+
i = node.search_key_index(key)
|
126
|
+
if node.is_leaf?
|
127
|
+
# This is a leaf node. Check if there is an exact match for the
|
128
|
+
# given key and return the corresponding value or nil.
|
129
|
+
return node.keys[i] == key ? node.values[i] : nil
|
130
|
+
end
|
131
|
+
|
132
|
+
# Descend into the right child node to continue the search.
|
133
|
+
node = node.children[i]
|
134
|
+
end
|
135
|
+
|
136
|
+
PEROBS.log.fatal "Could not find proper node to get from while " +
|
137
|
+
"looking for key #{key}"
|
138
|
+
end
|
139
|
+
|
140
|
+
# Return the node chain from the root to the leaf node storing the
|
141
|
+
# key/value pair.
|
142
|
+
# @param key [Integer] key to search for
|
143
|
+
# @return [Array of BigTreeNode] node list (may be empty)
|
144
|
+
def node_chain(key)
|
145
|
+
node = myself
|
146
|
+
list = [ node ]
|
147
|
+
|
148
|
+
while node do
|
149
|
+
# Find index of the entry that best fits the key.
|
150
|
+
i = node.search_key_index(key)
|
151
|
+
if node.is_leaf?
|
152
|
+
# This is a leaf node. Check if there is an exact match for the
|
153
|
+
# given key and return the corresponding value or nil.
|
154
|
+
return node.keys[i] == key ? list : []
|
155
|
+
end
|
156
|
+
|
157
|
+
# Add current node to chain.
|
158
|
+
list << node
|
159
|
+
# Descend into the right child node to continue the search.
|
160
|
+
node = node.children[i]
|
161
|
+
end
|
162
|
+
|
163
|
+
PEROBS.log.fatal "Could not find node chain for key #{key}"
|
164
|
+
end
|
165
|
+
|
166
|
+
# Return if given key is stored in the node.
|
167
|
+
# @param key [Integer] key to search for
|
168
|
+
# @return [Boolean] True if key was found, false otherwise
|
169
|
+
def has_key?(key)
|
170
|
+
node = self
|
171
|
+
|
172
|
+
while node do
|
173
|
+
# Find index of the entry that best fits the key.
|
174
|
+
i = node.search_key_index(key)
|
175
|
+
if node.is_leaf?
|
176
|
+
# This is a leaf node. Check if there is an exact match for the
|
177
|
+
# given key and return the corresponding value or nil.
|
178
|
+
return node.keys[i] == key
|
179
|
+
end
|
180
|
+
|
181
|
+
# Descend into the right child node to continue the search.
|
182
|
+
node = node.children[i]
|
183
|
+
end
|
184
|
+
|
185
|
+
PEROBS.log.fatal "Could not find proper node to get from while " +
|
186
|
+
"looking for key #{key}"
|
187
|
+
end
|
188
|
+
|
189
|
+
# Return the value that matches the given key and remove the value from
|
190
|
+
# the tree. Return nil if the key is unknown.
|
191
|
+
# @param key [Integer] key to search for
|
192
|
+
# @return [Object] value that matches the key
|
193
|
+
def remove(key)
|
194
|
+
node = self
|
195
|
+
|
196
|
+
while node do
|
197
|
+
# Find index of the entry that best fits the key.
|
198
|
+
i = node.search_key_index(key)
|
199
|
+
if node.is_leaf?
|
200
|
+
# This is a leaf node. Check if there is an exact match for the
|
201
|
+
# given key and return the corresponding value or nil.
|
202
|
+
if node.keys[i] == key
|
203
|
+
@tree.entry_counter -= 1
|
204
|
+
return node.remove_element(i)
|
205
|
+
else
|
206
|
+
return nil
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
# Descend into the right child node to continue the search.
|
211
|
+
node = node.children[i]
|
212
|
+
end
|
213
|
+
|
214
|
+
PEROBS.log.fatal 'Could not find proper node to remove from'
|
215
|
+
end
|
216
|
+
|
217
|
+
# Iterate over all the key/value pairs in this node and all sub-nodes.
|
218
|
+
# @yield [key, value]
|
219
|
+
def each
|
220
|
+
traverse do |node, position, stack|
|
221
|
+
if node.is_leaf? && position < node.keys.size
|
222
|
+
yield(node.keys[position], node.values[position])
|
223
|
+
end
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
# Iterate over all the key/value pairs of the node.
|
228
|
+
# @yield [key, value]
|
229
|
+
def each_element
|
230
|
+
return self unless is_leaf?
|
231
|
+
|
232
|
+
0.upto(@keys.length - 1) do |i|
|
233
|
+
yield(@keys[i], @values[i])
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
# Iterate over all the key/value pairs of the node in reverse order.
|
238
|
+
# @yield [key, value]
|
239
|
+
def reverse_each_element
|
240
|
+
return self unless is_leaf?
|
241
|
+
|
242
|
+
(@keys.length - 1).downto(0) do |i|
|
243
|
+
yield(@keys[i], @values[i])
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
# Check consistency of the node and all subsequent nodes. In case an error
|
248
|
+
# is found, a message is logged and false is returned.
|
249
|
+
# @yield [key, value]
|
250
|
+
# @return [Boolean] true if tree has no errors
|
251
|
+
def check
|
252
|
+
branch_depth = nil
|
253
|
+
|
254
|
+
traverse do |node, position, stack|
|
255
|
+
if position == 0
|
256
|
+
if node.parent
|
257
|
+
# After a split the nodes will only have half the maximum keys.
|
258
|
+
# For branch nodes one of the split nodes will have even 1 key
|
259
|
+
# less as this will become the branch key in a parent node.
|
260
|
+
if node.keys.size < min_keys - (node.is_leaf? ? 0 : 1)
|
261
|
+
node.error "BigTree node #{node._id} has too few keys"
|
262
|
+
return false
|
263
|
+
end
|
264
|
+
end
|
265
|
+
|
266
|
+
if node.keys.size > @tree.node_size
|
267
|
+
node.error "BigTree node must not have more then " +
|
268
|
+
"#{@tree.node_size} keys, but has #{node.keys.size} keys"
|
269
|
+
return false
|
270
|
+
end
|
271
|
+
|
272
|
+
last_key = nil
|
273
|
+
node.keys.each do |key|
|
274
|
+
if last_key && key < last_key
|
275
|
+
node.error "Keys are not increasing monotoneously: " +
|
276
|
+
"#{node.keys.inspect}"
|
277
|
+
return false
|
278
|
+
end
|
279
|
+
last_key = key
|
280
|
+
end
|
281
|
+
|
282
|
+
if node.is_leaf?
|
283
|
+
if branch_depth
|
284
|
+
unless branch_depth == stack.size
|
285
|
+
node.error "All leaf nodes must have same distance from root"
|
286
|
+
return false
|
287
|
+
end
|
288
|
+
else
|
289
|
+
branch_depth = stack.size
|
290
|
+
end
|
291
|
+
if node.prev_sibling.nil?
|
292
|
+
if @tree.first_leaf != node
|
293
|
+
node.error "Leaf node #{node._id} has no previous sibling " +
|
294
|
+
"but is not the first leaf of the tree"
|
295
|
+
return false
|
296
|
+
end
|
297
|
+
elsif node.prev_sibling.next_sibling != node
|
298
|
+
node.error "next_sibling of previous sibling does not point to " +
|
299
|
+
"this node"
|
300
|
+
return false
|
301
|
+
end
|
302
|
+
if node.next_sibling.nil?
|
303
|
+
if @tree.last_leaf != node
|
304
|
+
node.error "Leaf node #{node._id} has no next sibling " +
|
305
|
+
"but is not the last leaf of the tree"
|
306
|
+
return false
|
307
|
+
end
|
308
|
+
elsif node.next_sibling.prev_sibling != node
|
309
|
+
node.error "previous_sibling of next sibling does not point to " +
|
310
|
+
"this node"
|
311
|
+
return false
|
312
|
+
end
|
313
|
+
unless node.keys.size == node.values.size
|
314
|
+
node.error "Key count (#{node.keys.size}) and value " +
|
315
|
+
"count (#{node.values.size}) don't match"
|
316
|
+
return false
|
317
|
+
end
|
318
|
+
if node.children
|
319
|
+
node.error "children must be nil for a leaf node"
|
320
|
+
return false
|
321
|
+
end
|
322
|
+
else
|
323
|
+
if node.values
|
324
|
+
node.error "values must be nil for a branch node"
|
325
|
+
return false
|
326
|
+
end
|
327
|
+
unless node.children.size == node.keys.size + 1
|
328
|
+
node.error "Key count (#{node.keys.size}) must be one " +
|
329
|
+
"less than children count (#{node.children.size})"
|
330
|
+
return false
|
331
|
+
end
|
332
|
+
node.children.each_with_index do |child, i|
|
333
|
+
unless child.is_a?(BigTreeNode)
|
334
|
+
node.error "Child #{i} is of class #{child.class} " +
|
335
|
+
"instead of BigTreeNode"
|
336
|
+
return false
|
337
|
+
end
|
338
|
+
unless child.parent.is_a?(BigTreeNode)
|
339
|
+
node.error "Parent reference of child #{i} is of class " +
|
340
|
+
"#{child.class} instead of BigTreeNode"
|
341
|
+
return false
|
342
|
+
end
|
343
|
+
if child == node
|
344
|
+
node.error "Child #{i} point to self"
|
345
|
+
return false
|
346
|
+
end
|
347
|
+
if stack.include?(child)
|
348
|
+
node.error "Child #{i} points to ancester node"
|
349
|
+
return false
|
350
|
+
end
|
351
|
+
unless child.parent == node
|
352
|
+
node.error "Child #{i} does not have parent pointing " +
|
353
|
+
"to this node"
|
354
|
+
return false
|
355
|
+
end
|
356
|
+
if i > 0
|
357
|
+
unless node.children[i - 1].next_sibling == child
|
358
|
+
node.error "next_sibling of node " +
|
359
|
+
"#{node.children[i - 1]._id} " +
|
360
|
+
"must point to node #{child._id}"
|
361
|
+
return false
|
362
|
+
end
|
363
|
+
end
|
364
|
+
if i < node.children.length - 1
|
365
|
+
unless child == node.children[i + 1].prev_sibling
|
366
|
+
node.error "prev_sibling of node " +
|
367
|
+
"#{node.children[i + 1]._id} " +
|
368
|
+
"must point to node #{child._id}"
|
369
|
+
return false
|
370
|
+
end
|
371
|
+
end
|
372
|
+
end
|
373
|
+
end
|
374
|
+
elsif position <= node.keys.size
|
375
|
+
# These checks are done after we have completed the respective child
|
376
|
+
# node with index 'position - 1'.
|
377
|
+
index = position - 1
|
378
|
+
if node.is_leaf?
|
379
|
+
if block_given?
|
380
|
+
# If a block was given, call this block with the key and value.
|
381
|
+
return false unless yield(node.keys[index], node.values[index])
|
382
|
+
end
|
383
|
+
else
|
384
|
+
unless node.children[index].keys.last < node.keys[index]
|
385
|
+
node.error "Child #{node.children[index]._id} " +
|
386
|
+
"has too large key #{node.children[index].keys.last}. " +
|
387
|
+
"Must be smaller than #{node.keys[index]}."
|
388
|
+
return false
|
389
|
+
end
|
390
|
+
unless node.children[position].keys.first >= node.keys[index]
|
391
|
+
node.error "Child #{node.children[position]._id} " +
|
392
|
+
"has too small key #{node.children[position].keys.first}. " +
|
393
|
+
"Must be larger than or equal to #{node.keys[index]}."
|
394
|
+
return false
|
395
|
+
end
|
396
|
+
end
|
397
|
+
end
|
398
|
+
end
|
399
|
+
|
400
|
+
true
|
401
|
+
end
|
402
|
+
|
403
|
+
# @return [String] Human reable form of the sub-tree.
|
404
|
+
def to_s
|
405
|
+
str = ''
|
406
|
+
|
407
|
+
traverse do |node, position, stack|
|
408
|
+
if position == 0
|
409
|
+
begin
|
410
|
+
str += "#{node.parent ? node.parent.tree_prefix + ' +' : 'o'}" +
|
411
|
+
"#{node.tree_branch_mark}-" +
|
412
|
+
"#{node.keys.first.nil? ? '--' : 'v-'}#{node.tree_summary}\n"
|
413
|
+
rescue => e
|
414
|
+
str += "@@@@@@@@@@: #{e.message}\n"
|
415
|
+
end
|
416
|
+
else
|
417
|
+
begin
|
418
|
+
if node.is_leaf?
|
419
|
+
if node.keys[position - 1]
|
420
|
+
str += "#{node.tree_prefix} |" +
|
421
|
+
"[#{node.keys[position - 1]}, " +
|
422
|
+
"#{node.values[position - 1]}]\n"
|
423
|
+
end
|
424
|
+
else
|
425
|
+
if node.keys[position - 1]
|
426
|
+
str += "#{node.tree_prefix} #{node.keys[position - 1]}\n"
|
427
|
+
end
|
428
|
+
end
|
429
|
+
rescue => e
|
430
|
+
str += "@@@@@@@@@@: #{e.message}\n"
|
431
|
+
end
|
432
|
+
end
|
433
|
+
end
|
434
|
+
|
435
|
+
str
|
436
|
+
end
|
437
|
+
|
438
|
+
# Split the current node into two nodes. The upper half of the elements
|
439
|
+
# will be moved into a newly created node. This node will retain the lower
|
440
|
+
# half.
|
441
|
+
# @return [BigTreeNode] common parent of the two nodes
|
442
|
+
def split_node
|
443
|
+
unless @parent
|
444
|
+
# The node is the root node. We need to create a parent node first.
|
445
|
+
self.parent = @store.new(BigTreeNode, @tree, false)
|
446
|
+
@parent.children[0] = myself
|
447
|
+
@tree.root = @parent
|
448
|
+
end
|
449
|
+
|
450
|
+
# Create the new sibling that will take the 2nd half of the
|
451
|
+
# node content.
|
452
|
+
sibling = @store.new(BigTreeNode, @tree, is_leaf?, @parent, myself,
|
453
|
+
@next_sibling)
|
454
|
+
# Determine the index of the middle element that gets moved to the
|
455
|
+
# parent. The node size must be an uneven number.
|
456
|
+
mid = @keys.size / 2
|
457
|
+
# Insert the middle element key into the parent node
|
458
|
+
@parent.insert_element(@keys[mid], sibling)
|
459
|
+
if is_leaf?
|
460
|
+
# Copy the keys and values from the mid element onwards into the new
|
461
|
+
# sibling node.
|
462
|
+
sibling.keys += @keys[mid..-1]
|
463
|
+
sibling.values += @values[mid..-1]
|
464
|
+
# Delete the copied keys and values from this node.
|
465
|
+
@values.slice!(mid..-1)
|
466
|
+
else
|
467
|
+
# Copy the keys from after the mid value onwards to the new sibling
|
468
|
+
# node.
|
469
|
+
sibling.keys += @keys[mid + 1..-1]
|
470
|
+
# Same for the children.
|
471
|
+
sibling.children += @children[mid + 1..-1]
|
472
|
+
# Reparent the children to the new sibling parent.
|
473
|
+
sibling.children.each { |c| c.parent = sibling }
|
474
|
+
# And delete the copied children references.
|
475
|
+
@children.slice!(mid + 1..-1)
|
476
|
+
end
|
477
|
+
# Delete the copied keys from this node.
|
478
|
+
@keys.slice!(mid..-1)
|
479
|
+
|
480
|
+
@parent
|
481
|
+
end
|
482
|
+
|
483
|
+
# Insert the given value or child into the current node using the key as
|
484
|
+
# index.
|
485
|
+
# @param key [Integer] key to address the value or child
|
486
|
+
# @param child_or_value [Integer or BigTreeNode] value or BigTreeNode
|
487
|
+
# @return [Boolean] true if new element, false if override existing
|
488
|
+
# element
|
489
|
+
def insert_element(key, child_or_value)
|
490
|
+
if @keys.size >= @tree.node_size
|
491
|
+
PEROBS.log.fatal "Cannot insert into a full BigTreeNode: #{@keys.size}"
|
492
|
+
end
|
493
|
+
|
494
|
+
i = search_key_index(key)
|
495
|
+
if @keys[i] == key
|
496
|
+
# Overwrite existing entries
|
497
|
+
@keys[i] = key
|
498
|
+
if is_leaf?
|
499
|
+
@values[i] = child_or_value
|
500
|
+
else
|
501
|
+
@children[i + 1] = child_or_value
|
502
|
+
end
|
503
|
+
else
|
504
|
+
# Create a new entry
|
505
|
+
@keys.insert(i, key)
|
506
|
+
if is_leaf?
|
507
|
+
@values.insert(i, child_or_value)
|
508
|
+
@tree.entry_counter += 1
|
509
|
+
else
|
510
|
+
@children.insert(i + 1, child_or_value)
|
511
|
+
end
|
512
|
+
end
|
513
|
+
end
|
514
|
+
|
515
|
+
# Remove the element from a leaf node at the given index.
|
516
|
+
# @param index [Integer] The index of the entry to be removed
|
517
|
+
# @return [Object] The removed value
|
518
|
+
def remove_element(index)
|
519
|
+
# Delete the key at the specified index.
|
520
|
+
unless (key = @keys.delete_at(index))
|
521
|
+
PEROBS.log.fatal "Could not remove element #{index} from BigTreeNode " +
|
522
|
+
"@#{@_id}"
|
523
|
+
end
|
524
|
+
update_branch_key(key) if index == 0
|
525
|
+
|
526
|
+
# Delete the corresponding value.
|
527
|
+
removed_value = @values.delete_at(index)
|
528
|
+
if @keys.length < min_keys
|
529
|
+
if @prev_sibling && @prev_sibling.parent == @parent
|
530
|
+
borrow_from_previous_sibling(@prev_sibling) ||
|
531
|
+
@prev_sibling.merge_with_leaf_node(myself)
|
532
|
+
elsif @next_sibling && @next_sibling.parent == @parent
|
533
|
+
borrow_from_next_sibling(@next_sibling) ||
|
534
|
+
merge_with_leaf_node(@next_sibling)
|
535
|
+
elsif @parent
|
536
|
+
PEROBS.log.fatal "Cannot not find adjecent leaf siblings"
|
537
|
+
end
|
538
|
+
end
|
539
|
+
|
540
|
+
# The merge has potentially invalidated this node. After this method has
|
541
|
+
# been called this copy of the node should no longer be used.
|
542
|
+
removed_value
|
543
|
+
end
|
544
|
+
|
545
|
+
# Remove the specified node from this branch node.
|
546
|
+
# @param node [BigTreeNode] The child to remove
|
547
|
+
def remove_child(node)
|
548
|
+
unless (index = search_node_index(node))
|
549
|
+
PEROBS.log.fatal "Cannot remove child #{node._id} from node #{@_id}"
|
550
|
+
end
|
551
|
+
|
552
|
+
if index == 0
|
553
|
+
# Removing the first child is a bit more complicated as the
|
554
|
+
# corresponding branch key is in a parent node.
|
555
|
+
key = @keys.shift
|
556
|
+
update_branch_key(key)
|
557
|
+
else
|
558
|
+
# For all other children we can just remove the corresponding key.
|
559
|
+
@keys.delete_at(index - 1)
|
560
|
+
end
|
561
|
+
|
562
|
+
# Remove the child node link.
|
563
|
+
child = @children.delete_at(index)
|
564
|
+
# If we remove the first or last leaf node we must update the reference
|
565
|
+
# in the BigTree object.
|
566
|
+
@tree.first_leaf = child.next_sibling if child == @tree.first_leaf
|
567
|
+
@tree.last_leaf = child.prev_sibling if child == @tree.last_leaf
|
568
|
+
# Unlink the neighbouring siblings from the child
|
569
|
+
child.prev_sibling.next_sibling = child.next_sibling if child.prev_sibling
|
570
|
+
child.next_sibling.prev_sibling = child.prev_sibling if child.next_sibling
|
571
|
+
|
572
|
+
if @keys.length < min_keys
|
573
|
+
# The node has become too small. Try borrowing a node from an adjecent
|
574
|
+
# sibling or merge with an adjecent node.
|
575
|
+
if @prev_sibling && @prev_sibling.parent == @parent
|
576
|
+
borrow_from_previous_sibling(@prev_sibling) ||
|
577
|
+
@prev_sibling.merge_with_branch_node(myself)
|
578
|
+
elsif @next_sibling && @next_sibling.parent == @parent
|
579
|
+
borrow_from_next_sibling(@next_sibling) ||
|
580
|
+
merge_with_branch_node(@next_sibling)
|
581
|
+
end
|
582
|
+
end
|
583
|
+
|
584
|
+
if @parent.nil? && @children.length <= 1
|
585
|
+
# If the node just below the root only has one child it will become
|
586
|
+
# the new root node.
|
587
|
+
new_root = @children.first
|
588
|
+
new_root.parent = nil
|
589
|
+
@tree.root = new_root
|
590
|
+
end
|
591
|
+
end
|
592
|
+
|
593
|
+
def merge_with_leaf_node(node)
|
594
|
+
if @keys.length + node.keys.length > @tree.node_size
|
595
|
+
PEROBS.log.fatal "Leaf nodes are too big to merge"
|
596
|
+
end
|
597
|
+
|
598
|
+
self.keys += node.keys
|
599
|
+
self.values += node.values
|
600
|
+
|
601
|
+
node.parent.remove_child(node)
|
602
|
+
end
|
603
|
+
|
604
|
+
def merge_with_branch_node(node)
|
605
|
+
if @keys.length + 1 + node.keys.length > @tree.node_size
|
606
|
+
PEROBS.log.fatal "Branch nodes are too big to merge"
|
607
|
+
end
|
608
|
+
|
609
|
+
index = @parent.search_node_index(node) - 1
|
610
|
+
self.keys << @parent.keys[index]
|
611
|
+
self.keys += node.keys
|
612
|
+
node.children.each { |c| c.parent = myself }
|
613
|
+
self.children += node.children
|
614
|
+
|
615
|
+
node.parent.remove_child(node)
|
616
|
+
end
|
617
|
+
|
618
|
+
# Search the keys of the node that fits the given key. The result is
|
619
|
+
# either the index of an exact match or the index of the position where
|
620
|
+
# the given key would have to be inserted.
|
621
|
+
# @param key [Integer] key to search for
|
622
|
+
# @return [Integer] Index of the matching key or the insert position.
|
623
|
+
def search_key_index(key)
|
624
|
+
# Handle special case for empty keys list.
|
625
|
+
return 0 if @keys.empty?
|
626
|
+
|
627
|
+
# Keys are unique and always sorted. Use a binary search to find the
|
628
|
+
# index that fits the given key.
|
629
|
+
li = pi = 0
|
630
|
+
ui = @keys.size - 1
|
631
|
+
while li <= ui
|
632
|
+
# The pivot element is always in the middle between the lower and upper
|
633
|
+
# index.
|
634
|
+
pi = li + (ui - li) / 2
|
635
|
+
|
636
|
+
if key < @keys[pi]
|
637
|
+
# The pivot element is smaller than the key. Set the upper index to
|
638
|
+
# the pivot index.
|
639
|
+
ui = pi - 1
|
640
|
+
elsif key > @keys[pi]
|
641
|
+
# The pivot element is larger than the key. Set the lower index to
|
642
|
+
# the pivot index.
|
643
|
+
li = pi + 1
|
644
|
+
else
|
645
|
+
# We've found an exact match. For leaf nodes return the found index.
|
646
|
+
# For branch nodes we have to add one to the index since the larger
|
647
|
+
# child is the right one.
|
648
|
+
return is_leaf? ? pi : pi + 1
|
649
|
+
end
|
650
|
+
end
|
651
|
+
# No exact match was found. For the insert operaton we need to return
|
652
|
+
# the index of the first key that is larger than the given key.
|
653
|
+
@keys[pi] < key ? pi + 1 : pi
|
654
|
+
end
|
655
|
+
|
656
|
+
def search_node_index(node)
|
657
|
+
index = search_key_index(node.keys.first)
|
658
|
+
unless @children[index] == node
|
659
|
+
raise RuntimeError, "Child at index #{index} is not the requested node"
|
660
|
+
end
|
661
|
+
|
662
|
+
index
|
663
|
+
end
|
664
|
+
|
665
|
+
# This is a generic tree iterator. It yields before it descends into the
|
666
|
+
# child node and after (which is identical to before the next child
|
667
|
+
# descend). It yields the node, the position and the stack of parent
|
668
|
+
# nodes.
|
669
|
+
# @yield [node, position, stack]
|
670
|
+
def traverse
|
671
|
+
# We use a non-recursive implementation to traverse the tree. This stack
|
672
|
+
# keeps track of all the known still to be checked nodes.
|
673
|
+
stack = [ [ self, 0 ] ]
|
674
|
+
|
675
|
+
while !stack.empty?
|
676
|
+
node, position = stack.pop
|
677
|
+
|
678
|
+
# Call the payload method. The position marks where we are in the node
|
679
|
+
# with respect to the traversal. 0 means we've just entered the node
|
680
|
+
# for the first time and are about to descent to the first child.
|
681
|
+
# Position 1 is after the 1st child has been processed and before the
|
682
|
+
# 2nd child is being processed. If we have N children, the last
|
683
|
+
# position is N after we have processed the last child and are about
|
684
|
+
# to return to the parent node.
|
685
|
+
yield(node, position, stack)
|
686
|
+
|
687
|
+
if position <= node.keys.size
|
688
|
+
# Push the next position for this node onto the stack.
|
689
|
+
stack.push([ node, position + 1 ])
|
690
|
+
|
691
|
+
if !node.is_leaf? && node.children[position]
|
692
|
+
# If we have a child node for this position, push the linked node
|
693
|
+
# and the starting position onto the stack.
|
694
|
+
stack.push([ node.children[position], 0 ])
|
695
|
+
end
|
696
|
+
end
|
697
|
+
end
|
698
|
+
end
|
699
|
+
|
700
|
+
# Gather some statistics about the node and all sub nodes.
|
701
|
+
# @param stats [Stats] Data structure that stores the gathered data
|
702
|
+
def statistics(stats)
|
703
|
+
traverse do |node, position, stack|
|
704
|
+
if position == 0
|
705
|
+
if node.is_leaf?
|
706
|
+
stats.leaf_nodes += 1
|
707
|
+
depth = stack.size + 1
|
708
|
+
if stats.min_depth.nil? || stats.min_depth < depth
|
709
|
+
stats.min_depth = depth
|
710
|
+
end
|
711
|
+
if stats.max_depth.nil? || stats.max_depth > depth
|
712
|
+
stats.max_depth = depth
|
713
|
+
end
|
714
|
+
else
|
715
|
+
stats.branch_nodes += 1
|
716
|
+
end
|
717
|
+
end
|
718
|
+
end
|
719
|
+
end
|
720
|
+
|
721
|
+
# Return the decoration that marks the tree structure of this node for the
|
722
|
+
# inspection method.
|
723
|
+
def tree_prefix
|
724
|
+
node = self
|
725
|
+
str = ''
|
726
|
+
|
727
|
+
while node
|
728
|
+
is_last_child = false
|
729
|
+
if node.parent
|
730
|
+
is_last_child = node.parent.children.last == node
|
731
|
+
else
|
732
|
+
# Don't add lines for the top-level.
|
733
|
+
break
|
734
|
+
end
|
735
|
+
|
736
|
+
str = (is_last_child ? ' ' : ' |') + str
|
737
|
+
node = node.parent
|
738
|
+
end
|
739
|
+
|
740
|
+
str
|
741
|
+
end
|
742
|
+
|
743
|
+
# Branch node decoration for the inspection method.
|
744
|
+
def tree_branch_mark
|
745
|
+
return '' unless @parent
|
746
|
+
'-'
|
747
|
+
end
|
748
|
+
|
749
|
+
# Text for the node line for the inspection method.
|
750
|
+
def tree_summary
|
751
|
+
s = " @#{@_id}"
|
752
|
+
if @parent
|
753
|
+
begin
|
754
|
+
s += " ^#{@parent._id}"
|
755
|
+
rescue
|
756
|
+
s += ' ^@'
|
757
|
+
end
|
758
|
+
end
|
759
|
+
if @prev_sibling
|
760
|
+
begin
|
761
|
+
s += " <#{@prev_sibling._id}"
|
762
|
+
rescue
|
763
|
+
s += ' <@'
|
764
|
+
end
|
765
|
+
end
|
766
|
+
if @next_sibling
|
767
|
+
begin
|
768
|
+
s += " >#{@next_sibling._id}"
|
769
|
+
rescue
|
770
|
+
s += ' >@'
|
771
|
+
end
|
772
|
+
end
|
773
|
+
|
774
|
+
s
|
775
|
+
end
|
776
|
+
|
777
|
+
# Print and log an error message for the node.
|
778
|
+
def error(msg)
|
779
|
+
msg = "Error in BigTree node @#{@_id}: #{msg}\n" + @tree.to_s
|
780
|
+
$stderr.puts msg
|
781
|
+
PEROBS.log.error msg
|
782
|
+
end
|
783
|
+
|
784
|
+
private
|
785
|
+
|
786
|
+
def min_keys
|
787
|
+
@tree.node_size / 2
|
788
|
+
end
|
789
|
+
|
790
|
+
# Try to borrow an element from the preceding sibling.
|
791
|
+
# @return [True or False] True if an element was borrowed, false
|
792
|
+
# otherwise.
|
793
|
+
def borrow_from_previous_sibling(prev_node)
|
794
|
+
if prev_node.keys.length - 1 > min_keys
|
795
|
+
index = @parent.search_node_index(self) - 1
|
796
|
+
|
797
|
+
if is_leaf?
|
798
|
+
# Move the last key of the previous node to the front of this node
|
799
|
+
@keys.unshift(prev_node.keys.pop)
|
800
|
+
# Register the new lead key of this node with its parent
|
801
|
+
@parent.keys[index] = @keys.first
|
802
|
+
# Move the last value of the previous node to the front of this node
|
803
|
+
@values.unshift(prev_node.values.pop)
|
804
|
+
else
|
805
|
+
# For branch nodes the branch key will be the borrowed key.
|
806
|
+
@keys.unshift(@parent.keys[index])
|
807
|
+
# And the last key of the previous key will become the new branch
|
808
|
+
# key for this node.
|
809
|
+
@parent.keys[index] = prev_node.keys.pop
|
810
|
+
# Move the last child of the previous node to the front of this node
|
811
|
+
@children.unshift(node = prev_node.children.pop)
|
812
|
+
node.parent = myself
|
813
|
+
end
|
814
|
+
|
815
|
+
return true
|
816
|
+
end
|
817
|
+
|
818
|
+
false
|
819
|
+
end
|
820
|
+
|
821
|
+
# Try to borrow an element from the next sibling.
|
822
|
+
# @return [True or False] True if an element was borrowed, false
|
823
|
+
# otherwise.
|
824
|
+
def borrow_from_next_sibling(next_node)
|
825
|
+
if next_node.keys.length - 1 > min_keys
|
826
|
+
# The next sibling now has a new lead key that requires the branch key
|
827
|
+
# to be updated in the parent node.
|
828
|
+
index = next_node.parent.search_node_index(next_node) - 1
|
829
|
+
|
830
|
+
if is_leaf?
|
831
|
+
# Move the first key of the next node to the end of the this node
|
832
|
+
self.keys << next_node.keys.shift
|
833
|
+
# Register the new lead key of next_node with its parent
|
834
|
+
next_node.parent.keys[index] = next_node.keys.first
|
835
|
+
# Move the first value of the next node to the end of this node
|
836
|
+
self.values << next_node.values.shift
|
837
|
+
else
|
838
|
+
# For branch nodes we need to get the lead key from the parent of
|
839
|
+
# next_node.
|
840
|
+
self.keys << next_node.parent.keys[index]
|
841
|
+
# The old lead key of next_node becomes the branch key in the parent
|
842
|
+
# of next_node. And the keys of next_node are shifted.
|
843
|
+
next_node.parent.keys[index] = next_node.keys.shift
|
844
|
+
# Move the first child of the next node to the end of this node
|
845
|
+
self.children << (node = next_node.children.shift)
|
846
|
+
node.parent = myself
|
847
|
+
end
|
848
|
+
|
849
|
+
return true
|
850
|
+
end
|
851
|
+
|
852
|
+
false
|
853
|
+
end
|
854
|
+
|
855
|
+
def update_branch_key(old_key)
|
856
|
+
new_key = @keys.first
|
857
|
+
return unless (node = @parent)
|
858
|
+
|
859
|
+
while node
|
860
|
+
if (index = node.keys.index(old_key))
|
861
|
+
node.keys[index] = new_key
|
862
|
+
return
|
863
|
+
end
|
864
|
+
node = node.parent
|
865
|
+
end
|
866
|
+
|
867
|
+
# The smallest element has no branch key.
|
868
|
+
end
|
869
|
+
|
870
|
+
end
|
871
|
+
|
872
|
+
end
|
873
|
+
|