perobs 4.0.0 → 4.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/README.md +27 -16
- data/lib/perobs/Array.rb +66 -19
- data/lib/perobs/BTree.rb +106 -15
- data/lib/perobs/BTreeBlob.rb +4 -3
- data/lib/perobs/BTreeDB.rb +5 -4
- data/lib/perobs/BTreeNode.rb +482 -156
- data/lib/perobs/BTreeNodeLink.rb +10 -0
- data/lib/perobs/BigArray.rb +285 -0
- data/lib/perobs/BigArrayNode.rb +1002 -0
- data/lib/perobs/BigHash.rb +246 -0
- data/lib/perobs/BigTree.rb +197 -0
- data/lib/perobs/BigTreeNode.rb +873 -0
- data/lib/perobs/Cache.rb +48 -10
- data/lib/perobs/ConsoleProgressMeter.rb +61 -0
- data/lib/perobs/DataBase.rb +4 -3
- data/lib/perobs/DynamoDB.rb +57 -15
- data/lib/perobs/EquiBlobsFile.rb +155 -50
- data/lib/perobs/FNV_Hash_1a_64.rb +54 -0
- data/lib/perobs/FlatFile.rb +519 -227
- data/lib/perobs/FlatFileBlobHeader.rb +113 -54
- data/lib/perobs/FlatFileDB.rb +49 -23
- data/lib/perobs/FuzzyStringMatcher.rb +175 -0
- data/lib/perobs/Hash.rb +127 -33
- data/lib/perobs/IDList.rb +144 -0
- data/lib/perobs/IDListPage.rb +107 -0
- data/lib/perobs/IDListPageFile.rb +180 -0
- data/lib/perobs/IDListPageRecord.rb +142 -0
- data/lib/perobs/Object.rb +18 -15
- data/lib/perobs/ObjectBase.rb +46 -5
- data/lib/perobs/PersistentObjectCache.rb +57 -68
- data/lib/perobs/PersistentObjectCacheLine.rb +24 -12
- data/lib/perobs/ProgressMeter.rb +97 -0
- data/lib/perobs/SpaceManager.rb +273 -0
- data/lib/perobs/SpaceTree.rb +21 -12
- data/lib/perobs/SpaceTreeNode.rb +53 -61
- data/lib/perobs/Store.rb +264 -145
- data/lib/perobs/version.rb +1 -1
- data/lib/perobs.rb +2 -0
- data/perobs.gemspec +4 -4
- data/test/Array_spec.rb +15 -6
- data/test/BTree_spec.rb +6 -2
- data/test/BigArray_spec.rb +261 -0
- data/test/BigHash_spec.rb +152 -0
- data/test/BigTreeNode_spec.rb +153 -0
- data/test/BigTree_spec.rb +259 -0
- data/test/EquiBlobsFile_spec.rb +105 -1
- data/test/FNV_Hash_1a_64_spec.rb +59 -0
- data/test/FlatFileDB_spec.rb +198 -14
- data/test/FuzzyStringMatcher_spec.rb +261 -0
- data/test/Hash_spec.rb +13 -3
- data/test/IDList_spec.rb +77 -0
- data/test/LegacyDBs/LegacyDB.rb +155 -0
- data/test/LegacyDBs/version_3/class_map.json +1 -0
- data/test/LegacyDBs/version_3/config.json +1 -0
- data/test/LegacyDBs/version_3/database.blobs +0 -0
- data/test/LegacyDBs/version_3/database_spaces.blobs +0 -0
- data/test/LegacyDBs/version_3/index.blobs +0 -0
- data/test/LegacyDBs/version_3/version +1 -0
- data/test/LockFile_spec.rb +9 -6
- data/test/SpaceManager_spec.rb +176 -0
- data/test/SpaceTree_spec.rb +4 -1
- data/test/Store_spec.rb +305 -203
- data/test/spec_helper.rb +9 -4
- metadata +57 -16
- data/lib/perobs/BTreeNodeCache.rb +0 -109
- data/lib/perobs/TreeDB.rb +0 -277
data/lib/perobs/BTreeNode.rb
CHANGED
@@ -39,7 +39,10 @@ module PEROBS
|
|
39
39
|
# mark a node as leaf or branch node.
|
40
40
|
class BTreeNode
|
41
41
|
|
42
|
-
|
42
|
+
Stats = Struct.new(:branch_depth, :nodes_count, :leave_nodes, :leaves)
|
43
|
+
|
44
|
+
attr_reader :node_address, :parent, :is_leaf, :next_sibling, :prev_sibling,
|
45
|
+
:keys, :values, :children
|
43
46
|
|
44
47
|
# Create a new BTreeNode object for the given tree with the given parent
|
45
48
|
# or recreate the node with the given node_address from the backing store.
|
@@ -48,47 +51,43 @@ module PEROBS
|
|
48
51
|
# restore the node.
|
49
52
|
# @param tree [BTree] The tree this node is part of
|
50
53
|
# @param parent [BTreeNode] reference to parent node
|
54
|
+
# @param prev_sibling [BTreeNode] reference to previous sibling node
|
55
|
+
# @param next_sibling [BTreeNode] reference to next sibling node
|
51
56
|
# @param node_address [Integer] the address of the node to read from the
|
52
57
|
# backing store
|
53
58
|
# @param is_leaf [Boolean] true if the node should be a leaf node, false
|
54
59
|
# if not
|
55
60
|
def initialize(tree, node_address = nil, parent = nil, is_leaf = true,
|
56
|
-
|
61
|
+
prev_sibling = nil, next_sibling = nil,
|
62
|
+
keys = nil, values = nil, children = nil)
|
57
63
|
@tree = tree
|
58
64
|
if node_address == 0
|
59
65
|
PEROBS.log.fatal "Node address may not be 0"
|
60
66
|
end
|
61
67
|
@node_address = node_address
|
62
|
-
@parent =
|
63
|
-
@
|
68
|
+
@parent = link(parent)
|
69
|
+
@prev_sibling = link(prev_sibling)
|
70
|
+
@next_sibling = link(next_sibling)
|
71
|
+
@keys = keys || []
|
64
72
|
if (@is_leaf = is_leaf)
|
65
|
-
@values = values
|
66
|
-
@children =
|
73
|
+
@values = values || []
|
74
|
+
@children = nil
|
67
75
|
else
|
68
|
-
@children = children
|
69
|
-
@values =
|
76
|
+
@children = children || []
|
77
|
+
@values = nil
|
70
78
|
end
|
71
|
-
|
72
|
-
ObjectSpace.define_finalizer(
|
73
|
-
self, BTreeNode._finalize(@tree, @node_address, object_id))
|
74
|
-
@tree.node_cache.insert(self, false)
|
75
|
-
end
|
76
|
-
|
77
|
-
# This method generates the destructor for the objects of this class. It
|
78
|
-
# is done this way to prevent the Proc object hanging on to a reference to
|
79
|
-
# self which would prevent the object from being collected. This internal
|
80
|
-
# method is not intended for users to call.
|
81
|
-
def BTreeNode::_finalize(tree, node_address, ruby_object_id)
|
82
|
-
proc { tree.node_cache._collect(node_address, ruby_object_id) }
|
83
79
|
end
|
84
80
|
|
85
|
-
# Create a new
|
81
|
+
# Create a new BTreeNode. This method should be used for the creation
|
86
82
|
# of new nodes instead of calling the constructor directly.
|
87
83
|
# @param tree [BTree] The tree the new node should belong to
|
88
84
|
# @param parent [BTreeNode] The parent node
|
89
85
|
# @param is_leaf [Boolean] True if the node has no children, false
|
90
86
|
# otherwise
|
91
|
-
|
87
|
+
# @param prev_sibling [BTreeNode] reference to previous sibling node
|
88
|
+
# @param next_sibling [BTreeNode] reference to next sibling node
|
89
|
+
def BTreeNode::create(tree, parent = nil, is_leaf = true,
|
90
|
+
prev_sibling = nil, next_sibling = nil)
|
92
91
|
unless parent.nil? || parent.is_a?(BTreeNode) ||
|
93
92
|
parent.is_a?(BTreeNodeLink)
|
94
93
|
PEROBS.log.fatal "Parent node must be a BTreeNode but is of class " +
|
@@ -96,17 +95,26 @@ module PEROBS
|
|
96
95
|
end
|
97
96
|
|
98
97
|
address = tree.nodes.free_address
|
99
|
-
node = BTreeNode.new(tree, address, parent, is_leaf
|
98
|
+
node = BTreeNode.new(tree, address, parent, is_leaf, prev_sibling,
|
99
|
+
next_sibling)
|
100
100
|
# This is a new node. Make sure the data is written to the file.
|
101
101
|
tree.node_cache.insert(node)
|
102
102
|
|
103
|
-
node
|
103
|
+
# Insert the newly created node into the existing node chain.
|
104
|
+
if (node.prev_sibling = prev_sibling)
|
105
|
+
node.prev_sibling.next_sibling = BTreeNodeLink.new(tree, node)
|
106
|
+
end
|
107
|
+
if (node.next_sibling = next_sibling)
|
108
|
+
node.next_sibling.prev_sibling = BTreeNodeLink.new(tree, node)
|
109
|
+
end
|
110
|
+
|
111
|
+
BTreeNodeLink.new(tree, node)
|
104
112
|
end
|
105
113
|
|
106
114
|
# Restore a node from the backing store at the given address and tree.
|
107
115
|
# @param tree [BTree] The tree the node belongs to
|
108
|
-
# @param
|
109
|
-
def BTreeNode::load(tree, address)
|
116
|
+
# @param address [Integer] The address in the blob file.
|
117
|
+
def BTreeNode::load(tree, address, unused = nil)
|
110
118
|
unless address.is_a?(Integer)
|
111
119
|
PEROBS.log.fatal "address is not Integer: #{address.class}"
|
112
120
|
end
|
@@ -122,7 +130,8 @@ module PEROBS
|
|
122
130
|
ary = bytes.unpack(BTreeNode::node_bytes_format(tree))
|
123
131
|
# Read is_leaf
|
124
132
|
if ary[0] != 0 && ary[0] != 1
|
125
|
-
PEROBS.log.fatal "First byte of a BTreeNode entry
|
133
|
+
PEROBS.log.fatal "First byte of a BTreeNode entry at address " +
|
134
|
+
"#{address} must be 0 or 1 but is #{ary[0]}"
|
126
135
|
end
|
127
136
|
is_leaf = ary[0] == 0 ? false : true
|
128
137
|
# This is the number of keys this node has.
|
@@ -130,19 +139,21 @@ module PEROBS
|
|
130
139
|
data_count = ary[2]
|
131
140
|
# Read the parent node address
|
132
141
|
parent = ary[3] == 0 ? nil : BTreeNodeLink.new(tree, ary[3])
|
142
|
+
prev_sibling = ary[4] == 0 ? nil : BTreeNodeLink.new(tree, ary[4])
|
143
|
+
next_sibling = ary[5] == 0 ? nil : BTreeNodeLink.new(tree, ary[5])
|
133
144
|
# Read the keys
|
134
|
-
keys = ary[
|
145
|
+
keys = ary[6, key_count]
|
135
146
|
|
136
147
|
children = nil
|
137
148
|
values = nil
|
138
149
|
if is_leaf
|
139
150
|
# Read the values
|
140
|
-
values = ary[
|
151
|
+
values = ary[6 + tree.order, data_count]
|
141
152
|
else
|
142
153
|
# Read the child addresses
|
143
154
|
children = []
|
144
155
|
data_count.times do |i|
|
145
|
-
child_address = ary[
|
156
|
+
child_address = ary[6 + tree.order + i]
|
146
157
|
unless child_address > 0
|
147
158
|
PEROBS.log.fatal "Child address must be larger than 0"
|
148
159
|
end
|
@@ -150,17 +161,28 @@ module PEROBS
|
|
150
161
|
end
|
151
162
|
end
|
152
163
|
|
153
|
-
node = BTreeNode.new(tree, address, parent, is_leaf,
|
164
|
+
node = BTreeNode.new(tree, address, parent, is_leaf,
|
165
|
+
prev_sibling, next_sibling, keys, values,
|
154
166
|
children)
|
155
167
|
tree.node_cache.insert(node, false)
|
156
168
|
|
157
169
|
node
|
158
170
|
end
|
159
171
|
|
172
|
+
# This is a wrapper around BTreeNode::load() that returns a BTreeNodeLink
|
173
|
+
# instead of the actual node.
|
174
|
+
# @param tree [BTree] The tree the node belongs to
|
175
|
+
# @param address [Integer] The address in the blob file.
|
176
|
+
# @return [BTreeNodeLink] Link to loaded noded
|
177
|
+
def BTreeNode::load_and_link(tree, address)
|
178
|
+
BTreeNodeLink.new(tree, BTreeNode::load(tree, address))
|
179
|
+
end
|
180
|
+
|
181
|
+
|
160
182
|
# @return [String] The format used for String.pack.
|
161
183
|
def BTreeNode::node_bytes_format(tree)
|
162
184
|
# This does not include the 4 bytes for the CRC32 checksum
|
163
|
-
"
|
185
|
+
"CSSQQQQ#{tree.order}Q#{tree.order + 1}"
|
164
186
|
end
|
165
187
|
|
166
188
|
# @return [Integer] The number of bytes needed to store a node.
|
@@ -169,6 +191,8 @@ module PEROBS
|
|
169
191
|
2 + # actual key count
|
170
192
|
2 + # actual value or children count (aka data count)
|
171
193
|
8 + # parent address
|
194
|
+
8 + # previous sibling address
|
195
|
+
8 + # next sibling address
|
172
196
|
8 * order + # keys
|
173
197
|
8 * (order + 1) + # values or child addresses
|
174
198
|
4 # CRC32 checksum
|
@@ -200,11 +224,11 @@ module PEROBS
|
|
200
224
|
|
201
225
|
# Once we have reached a leaf node we can insert or replace the value.
|
202
226
|
if node.is_leaf
|
203
|
-
node.insert_element(key, value)
|
204
|
-
return
|
227
|
+
return node.insert_element(key, value)
|
205
228
|
else
|
206
229
|
# Descend into the right child node to add the value to.
|
207
230
|
node = node.children[node.search_key_index(key)]
|
231
|
+
node = node.get_node if node
|
208
232
|
end
|
209
233
|
end
|
210
234
|
|
@@ -229,6 +253,62 @@ module PEROBS
|
|
229
253
|
|
230
254
|
# Descend into the right child node to continue the search.
|
231
255
|
node = node.children[i]
|
256
|
+
node = node.get_node if node
|
257
|
+
end
|
258
|
+
|
259
|
+
PEROBS.log.fatal "Could not find proper node to get from while " +
|
260
|
+
"looking for key #{key}"
|
261
|
+
end
|
262
|
+
|
263
|
+
# Return the key/value pair that matches the given key or the next larger
|
264
|
+
# key/value pair with a key that is at least as large as key +
|
265
|
+
# min_miss_increment.
|
266
|
+
# @param key [Integer] key to search for
|
267
|
+
# @param min_miss_increment [Integer] minimum required key increment in
|
268
|
+
# case an exact key match could not be found
|
269
|
+
# @return [Integer or nil] value that matches the key
|
270
|
+
def get_best_match(key, min_miss_increment)
|
271
|
+
node = self
|
272
|
+
|
273
|
+
while node do
|
274
|
+
# Find index of the entry that best fits the key.
|
275
|
+
i = node.search_key_index(key)
|
276
|
+
if node.is_leaf
|
277
|
+
# This is a leaf node. Check if there is an exact match for the
|
278
|
+
# given key.
|
279
|
+
if node.keys[i] == key
|
280
|
+
# Return the corresponding value/value pair.
|
281
|
+
return [ key, node.values[i] ]
|
282
|
+
else
|
283
|
+
# No exact key match. Now search the larger keys for the first
|
284
|
+
# that is at least key + min_miss_increment large.
|
285
|
+
keys = node.keys
|
286
|
+
keys_length = keys.length
|
287
|
+
while node
|
288
|
+
if i >= keys_length
|
289
|
+
# We've reached the end of a node. Continue search in next
|
290
|
+
# sibling.
|
291
|
+
return nil unless (node = node.next_sibling)
|
292
|
+
node = node.get_node
|
293
|
+
keys = node.keys
|
294
|
+
keys_length = keys.length
|
295
|
+
i = -1
|
296
|
+
elsif keys[i] >= key + min_miss_increment
|
297
|
+
# We've found a key that fits the critera. Return the
|
298
|
+
# corresponding key/value pair.
|
299
|
+
return [ keys[i], node.values[i] ]
|
300
|
+
end
|
301
|
+
|
302
|
+
i += 1
|
303
|
+
end
|
304
|
+
|
305
|
+
return nil
|
306
|
+
end
|
307
|
+
end
|
308
|
+
|
309
|
+
# Descend into the right child node to continue the search.
|
310
|
+
node = node.children[i]
|
311
|
+
node = node.get_node if node
|
232
312
|
end
|
233
313
|
|
234
314
|
PEROBS.log.fatal "Could not find proper node to get from while " +
|
@@ -257,6 +337,7 @@ module PEROBS
|
|
257
337
|
|
258
338
|
# Descend into the right child node to continue the search.
|
259
339
|
node = node.children[i]
|
340
|
+
node = node.get_node if node
|
260
341
|
end
|
261
342
|
|
262
343
|
PEROBS.log.fatal 'Could not find proper node to remove from'
|
@@ -269,18 +350,20 @@ module PEROBS
|
|
269
350
|
def split_node
|
270
351
|
unless @parent
|
271
352
|
# The node is the root node. We need to create a parent node first.
|
272
|
-
self.parent = BTreeNode::create(@tree, nil, false)
|
353
|
+
self.parent = link(BTreeNode::create(@tree, nil, false))
|
354
|
+
@tree.node_cache.insert(self)
|
273
355
|
@parent.set_child(0, self)
|
274
356
|
@tree.set_root(@parent)
|
275
357
|
end
|
276
358
|
|
277
359
|
# Create the new sibling that will take the 2nd half of the
|
278
360
|
# node content.
|
279
|
-
sibling = BTreeNode::create(@tree, @parent, @is_leaf)
|
361
|
+
sibling = BTreeNode::create(@tree, @parent, @is_leaf, link(self),
|
362
|
+
@next_sibling)
|
280
363
|
# Determine the index of the middle element that gets moved to the
|
281
364
|
# parent. The order must be an uneven number, so adding 1 will get us
|
282
365
|
# the middle element.
|
283
|
-
mid = @tree.order / 2
|
366
|
+
mid = @tree.order / 2
|
284
367
|
# Insert the middle element key into the parent node
|
285
368
|
@parent.insert_element(@keys[mid], sibling)
|
286
369
|
copy_elements(mid + (@is_leaf ? 0 : 1), sibling)
|
@@ -289,89 +372,149 @@ module PEROBS
|
|
289
372
|
@parent
|
290
373
|
end
|
291
374
|
|
292
|
-
def merge_node(upper_sibling, parent_index)
|
293
|
-
if upper_sibling == self
|
294
|
-
PEROBS.log.fatal "Cannot merge node @#{@node_address} with self"
|
295
|
-
end
|
296
|
-
unless upper_sibling.is_leaf
|
297
|
-
insert_element(@parent.keys[parent_index], upper_sibling.children[0])
|
298
|
-
end
|
299
|
-
upper_sibling.copy_elements(0, self, @keys.size, upper_sibling.keys.size)
|
300
|
-
@tree.delete_node(upper_sibling.node_address)
|
301
|
-
|
302
|
-
@parent.remove_element(parent_index)
|
303
|
-
end
|
304
|
-
|
305
375
|
# Insert the given value or child into the current node using the key as
|
306
376
|
# index.
|
307
377
|
# @param key [Integer] key to address the value or child
|
308
378
|
# @param value_or_child [Integer or BTreeNode] value or BTreeNode
|
309
379
|
# reference
|
380
|
+
# @return true for insert, false for overwrite
|
310
381
|
def insert_element(key, value_or_child)
|
311
382
|
if @keys.size >= @tree.order
|
312
383
|
PEROBS.log.fatal "Cannot insert into a full BTreeNode"
|
313
384
|
end
|
314
385
|
|
315
386
|
i = search_key_index(key)
|
387
|
+
@tree.node_cache.insert(self)
|
316
388
|
if @keys[i] == key
|
317
389
|
# Overwrite existing entries
|
318
390
|
@keys[i] = key
|
319
391
|
if is_leaf
|
320
392
|
@values[i] = value_or_child
|
321
393
|
else
|
322
|
-
@children[i + 1] =
|
394
|
+
@children[i + 1] = link(value_or_child)
|
323
395
|
end
|
396
|
+
|
397
|
+
return false
|
324
398
|
else
|
325
399
|
# Create a new entry
|
326
400
|
@keys.insert(i, key)
|
327
401
|
if is_leaf
|
328
402
|
@values.insert(i, value_or_child)
|
329
403
|
else
|
330
|
-
@children.insert(i + 1,
|
404
|
+
@children.insert(i + 1, link(value_or_child))
|
331
405
|
end
|
406
|
+
|
407
|
+
return true
|
332
408
|
end
|
333
|
-
@tree.node_cache.insert(self)
|
334
409
|
end
|
335
410
|
|
336
411
|
# Remove the element at the given index.
|
337
412
|
def remove_element(index)
|
338
|
-
# We need this key to find the link in the parent node.
|
339
|
-
first_key = @keys[0]
|
340
|
-
removed_value = nil
|
341
|
-
|
342
413
|
# Delete the key at the specified index.
|
343
|
-
unless @keys.delete_at(index)
|
344
|
-
PEROBS.log.fatal "Could not remove element #{index} from
|
414
|
+
unless (key = @keys.delete_at(index))
|
415
|
+
PEROBS.log.fatal "Could not remove element #{index} from BigTreeNode " +
|
345
416
|
"@#{@node_address}"
|
346
417
|
end
|
347
|
-
if
|
348
|
-
|
349
|
-
|
418
|
+
update_branch_key(key) if index == 0
|
419
|
+
|
420
|
+
# Delete the corresponding value.
|
421
|
+
@tree.node_cache.insert(self)
|
422
|
+
removed_value = @values.delete_at(index)
|
423
|
+
|
424
|
+
if @keys.length < min_keys
|
425
|
+
if @prev_sibling && @prev_sibling.parent == @parent
|
426
|
+
borrow_from_previous_sibling(@prev_sibling) ||
|
427
|
+
@prev_sibling.merge_with_leaf_node(self)
|
428
|
+
elsif @next_sibling && @next_sibling.parent == @parent
|
429
|
+
borrow_from_next_sibling(@next_sibling) ||
|
430
|
+
merge_with_leaf_node(@next_sibling)
|
431
|
+
elsif @parent
|
432
|
+
PEROBS.log.fatal "Cannot not find adjecent leaf siblings"
|
433
|
+
end
|
434
|
+
end
|
435
|
+
|
436
|
+
# The merge has potentially invalidated this node. After this method has
|
437
|
+
# been called this copy of the node should no longer be used.
|
438
|
+
removed_value
|
439
|
+
end
|
440
|
+
|
441
|
+
def remove_child(node)
|
442
|
+
unless (index = search_node_index(node))
|
443
|
+
PEROBS.log.fatal "Cannot remove child #{node.node_address} " +
|
444
|
+
"from node #{@node_address}"
|
445
|
+
end
|
446
|
+
|
447
|
+
@tree.node_cache.insert(self)
|
448
|
+
if index == 0
|
449
|
+
# Removing the first child is a bit more complicated as the
|
450
|
+
# corresponding branch key is in a parent node.
|
451
|
+
key = @keys.shift
|
452
|
+
update_branch_key(key)
|
350
453
|
else
|
351
|
-
#
|
352
|
-
@
|
454
|
+
# For all other children we can just remove the corresponding key.
|
455
|
+
@keys.delete_at(index - 1)
|
456
|
+
end
|
457
|
+
|
458
|
+
# Remove the child node link.
|
459
|
+
child = @children.delete_at(index)
|
460
|
+
# Unlink the neighbouring siblings from the child
|
461
|
+
child.prev_sibling.next_sibling = child.next_sibling if child.prev_sibling
|
462
|
+
child.next_sibling.prev_sibling = child.prev_sibling if child.next_sibling
|
463
|
+
|
464
|
+
if @keys.length < min_keys
|
465
|
+
# The node has become too small. Try borrowing a node from an adjecent
|
466
|
+
# sibling or merge with an adjecent node.
|
467
|
+
if @prev_sibling && @prev_sibling.parent == @parent
|
468
|
+
borrow_from_previous_sibling(@prev_sibling) ||
|
469
|
+
@prev_sibling.merge_with_branch_node(self)
|
470
|
+
elsif @next_sibling && @next_sibling.parent == @parent
|
471
|
+
borrow_from_next_sibling(@next_sibling) ||
|
472
|
+
merge_with_branch_node(@next_sibling)
|
473
|
+
end
|
474
|
+
end
|
475
|
+
|
476
|
+
# Delete the node from the cache and backing store.
|
477
|
+
@tree.delete_node(node.node_address)
|
478
|
+
end
|
479
|
+
|
480
|
+
def merge_with_leaf_node(node)
|
481
|
+
if @keys.length + node.keys.length > @tree.order
|
482
|
+
PEROBS.log.fatal "Leaf nodes are too big to merge"
|
353
483
|
end
|
484
|
+
|
354
485
|
@tree.node_cache.insert(self)
|
486
|
+
@keys += node.keys
|
487
|
+
@values += node.values
|
355
488
|
|
356
|
-
|
357
|
-
|
358
|
-
lower_sibling, upper_sibling, parent_index =
|
359
|
-
find_closest_siblings(first_key)
|
489
|
+
node.parent.remove_child(node)
|
490
|
+
end
|
360
491
|
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
elsif upper_sibling &&
|
365
|
-
@keys.size + upper_sibling.keys.size < @tree.order
|
366
|
-
merge_node(upper_sibling, parent_index)
|
492
|
+
def merge_with_branch_node(node)
|
493
|
+
if @keys.length + 1 + node.keys.length > @tree.order
|
494
|
+
PEROBS.log.fatal "Branch nodes are too big to merge"
|
367
495
|
end
|
368
496
|
|
369
|
-
|
370
|
-
|
371
|
-
|
497
|
+
index = @parent.search_node_index(node) - 1
|
498
|
+
@tree.node_cache.insert(self)
|
499
|
+
@keys << @parent.keys[index]
|
500
|
+
@keys += node.keys
|
501
|
+
node.children.each { |c| c.parent = link(self) }
|
502
|
+
@children += node.children
|
503
|
+
|
504
|
+
node.parent.remove_child(node)
|
505
|
+
end
|
506
|
+
|
507
|
+
def search_node_index(node)
|
508
|
+
index = search_key_index(node.keys.first)
|
509
|
+
unless @children[index] == node
|
510
|
+
raise RuntimeError, "Child at index #{index} is not the requested node"
|
511
|
+
end
|
512
|
+
|
513
|
+
index
|
372
514
|
end
|
373
515
|
|
374
516
|
def copy_elements(src_idx, dest_node, dst_idx = 0, count = nil)
|
517
|
+
dest_node = dest_node.get_node
|
375
518
|
unless count
|
376
519
|
count = @tree.order - src_idx
|
377
520
|
end
|
@@ -383,6 +526,7 @@ module PEROBS
|
|
383
526
|
"#{dest_node.is_leaf} node must be of same kind"
|
384
527
|
end
|
385
528
|
|
529
|
+
@tree.node_cache.insert(dest_node)
|
386
530
|
dest_node.keys[dst_idx, count] = @keys[src_idx, count]
|
387
531
|
if @is_leaf
|
388
532
|
# For leaves we copy the keys and corresponding values.
|
@@ -395,32 +539,59 @@ module PEROBS
|
|
395
539
|
dest_node.set_child(dst_idx + i, @children[src_idx + i])
|
396
540
|
end
|
397
541
|
end
|
398
|
-
@tree.node_cache.insert(dest_node)
|
399
542
|
end
|
400
543
|
|
401
544
|
def parent=(p)
|
402
|
-
@parent = p ? BTreeNodeLink.new(@tree, p) : nil
|
403
545
|
@tree.node_cache.insert(self)
|
546
|
+
@parent = p
|
547
|
+
|
548
|
+
p
|
549
|
+
end
|
550
|
+
|
551
|
+
def prev_sibling=(node)
|
552
|
+
@tree.node_cache.insert(self)
|
553
|
+
@prev_sibling = node
|
554
|
+
if node.nil? && @is_leaf
|
555
|
+
# If this node is a leaf node without a previous sibling we need to
|
556
|
+
# register it as the first leaf node.
|
557
|
+
@tree.set_first_leaf(BTreeNodeLink.new(@tree, self))
|
558
|
+
end
|
559
|
+
|
560
|
+
node
|
561
|
+
end
|
562
|
+
|
563
|
+
def next_sibling=(node)
|
564
|
+
@tree.node_cache.insert(self)
|
565
|
+
@next_sibling = node
|
566
|
+
if node.nil? && @is_leaf
|
567
|
+
# If this node is a leaf node without a next sibling we need to
|
568
|
+
# register it as the last leaf node.
|
569
|
+
@tree.set_last_leaf(BTreeNodeLink.new(@tree, self))
|
570
|
+
end
|
571
|
+
|
572
|
+
node
|
404
573
|
end
|
405
574
|
|
406
575
|
def set_child(index, child)
|
576
|
+
@tree.node_cache.insert(self)
|
407
577
|
if child
|
408
|
-
@children[index] =
|
409
|
-
@children[index].parent = self
|
578
|
+
@children[index] = link(child)
|
579
|
+
@children[index].parent = link(self)
|
410
580
|
else
|
411
581
|
@children[index] = nil
|
412
582
|
end
|
413
|
-
|
583
|
+
|
584
|
+
child
|
414
585
|
end
|
415
586
|
|
416
587
|
def trim(idx)
|
417
|
-
@
|
588
|
+
@tree.node_cache.insert(self)
|
589
|
+
@keys.slice!(idx, @keys.length - idx)
|
418
590
|
if @is_leaf
|
419
|
-
@values
|
591
|
+
@values.slice!(idx, @values.length - idx)
|
420
592
|
else
|
421
|
-
@children
|
593
|
+
@children.slice!(idx + 1, @children.length - idx - 1)
|
422
594
|
end
|
423
|
-
@tree.node_cache.insert(self)
|
424
595
|
end
|
425
596
|
|
426
597
|
# Search the keys of the node that fits the given key. The result is
|
@@ -429,36 +600,8 @@ module PEROBS
|
|
429
600
|
# @param key [Integer] key to search for
|
430
601
|
# @return [Integer] Index of the matching key or the insert position.
|
431
602
|
def search_key_index(key)
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
# Keys are unique and always sorted. Use a binary search to find the
|
436
|
-
# index that fits the given key.
|
437
|
-
li = pi = 0
|
438
|
-
ui = @keys.size - 1
|
439
|
-
while li <= ui
|
440
|
-
# The pivot element is always in the middle between the lower and upper
|
441
|
-
# index.
|
442
|
-
pi = li + (ui - li) / 2
|
443
|
-
|
444
|
-
if key < @keys[pi]
|
445
|
-
# The pivot element is smaller than the key. Set the upper index to
|
446
|
-
# the pivot index.
|
447
|
-
ui = pi - 1
|
448
|
-
elsif key > @keys[pi]
|
449
|
-
# The pivot element is larger than the key. Set the lower index to
|
450
|
-
# the pivot index.
|
451
|
-
li = pi + 1
|
452
|
-
else
|
453
|
-
# We've found an exact match. For leaf nodes return the found index.
|
454
|
-
# For branch nodes we have to add one to the index since the larger
|
455
|
-
# child is the right one.
|
456
|
-
return @is_leaf ? pi : pi + 1
|
457
|
-
end
|
458
|
-
end
|
459
|
-
# No exact match was found. For the insert operaton we need to return
|
460
|
-
# the index of the first key that is larger than the given key.
|
461
|
-
@keys[pi] < key ? pi + 1 : pi
|
603
|
+
(@is_leaf ? @keys.bsearch_index { |x| x >= key } :
|
604
|
+
@keys.bsearch_index { |x| x > key }) || @keys.length
|
462
605
|
end
|
463
606
|
|
464
607
|
# Iterate over all the key/value pairs in this node and all sub-nodes.
|
@@ -509,17 +652,33 @@ module PEROBS
|
|
509
652
|
# Check consistency of the node and all subsequent nodes. In case an error
|
510
653
|
# is found, a message is logged and false is returned.
|
511
654
|
# @yield [key, value]
|
512
|
-
# @return [
|
513
|
-
|
655
|
+
# @return [nil or Hash] nil in case of errors or a hash with some
|
656
|
+
# statistical information about the tree
|
657
|
+
def check(&block)
|
658
|
+
stats = Stats.new(nil, 0, 0, 0)
|
659
|
+
|
514
660
|
traverse do |node, position, stack|
|
515
661
|
if position == 0
|
516
|
-
|
517
|
-
|
518
|
-
|
662
|
+
stats.nodes_count += 1
|
663
|
+
if node.parent
|
664
|
+
unless node.parent.is_a?(BTreeNodeLink)
|
665
|
+
node.error "parent is a #{node.parent.class} instead of a " +
|
666
|
+
"BTreeNodeLink"
|
667
|
+
return nil
|
668
|
+
end
|
669
|
+
# After a split the nodes will only have half the maximum keys.
|
670
|
+
# For branch nodes one of the split nodes will have even 1 key
|
671
|
+
# less as this will become the branch key in a parent node.
|
672
|
+
if node.keys.size < min_keys - (node.is_leaf ? 0 : 1)
|
673
|
+
node.error "BTreeNode #{node.node_address} has too few keys"
|
674
|
+
return nil
|
675
|
+
end
|
519
676
|
end
|
677
|
+
|
520
678
|
if node.keys.size > @tree.order
|
521
679
|
node.error "BTreeNode must not have more then #{@tree.order} " +
|
522
680
|
"keys, but has #{node.keys.size} keys"
|
681
|
+
return nil
|
523
682
|
end
|
524
683
|
|
525
684
|
last_key = nil
|
@@ -527,45 +686,101 @@ module PEROBS
|
|
527
686
|
if last_key && key < last_key
|
528
687
|
node.error "Keys are not increasing monotoneously: " +
|
529
688
|
"#{node.keys.inspect}"
|
530
|
-
return
|
689
|
+
return nil
|
531
690
|
end
|
691
|
+
last_key = key
|
532
692
|
end
|
533
693
|
|
534
694
|
if node.is_leaf
|
695
|
+
if stats.branch_depth
|
696
|
+
unless stats.branch_depth == node.tree_level
|
697
|
+
node.error "All leaf nodes must have same distance from root "
|
698
|
+
return nil
|
699
|
+
end
|
700
|
+
else
|
701
|
+
stats.branch_depth = node.tree_level
|
702
|
+
end
|
703
|
+
if node.prev_sibling && !node.prev_sibling.is_a?(BTreeNodeLink)
|
704
|
+
node.error "prev_sibling is a #{node.prev_sibling.class} " +
|
705
|
+
"instead of a BTreeNodeLink"
|
706
|
+
return nil
|
707
|
+
end
|
708
|
+
if node.next_sibling && !node.next_sibling.is_a?(BTreeNodeLink)
|
709
|
+
node.error "next_sibling is a #{node.next_sibling.class} " +
|
710
|
+
"instead of a BTreeNodeLink"
|
711
|
+
return nil
|
712
|
+
end
|
713
|
+
if node.prev_sibling.nil? && @tree.first_leaf != node
|
714
|
+
node.error "Leaf node #{node.node_address} has no previous " +
|
715
|
+
"sibling but is not the first leaf of the tree"
|
716
|
+
return nil
|
717
|
+
end
|
718
|
+
if node.next_sibling.nil? && @tree.last_leaf != node
|
719
|
+
node.error "Leaf node #{node.node_address} has no next sibling " +
|
720
|
+
"but is not the last leaf of the tree"
|
721
|
+
return nil
|
722
|
+
end
|
535
723
|
unless node.keys.size == node.values.size
|
536
724
|
node.error "Key count (#{node.keys.size}) and value " +
|
537
725
|
"count (#{node.values.size}) don't match"
|
538
|
-
|
726
|
+
return nil
|
539
727
|
end
|
728
|
+
unless node.children.nil?
|
729
|
+
node.error "@children must be nil for a leaf node"
|
730
|
+
return nil
|
731
|
+
end
|
732
|
+
|
733
|
+
stats.leave_nodes += 1
|
734
|
+
stats.leaves += node.keys.length
|
540
735
|
else
|
541
|
-
unless node.
|
736
|
+
unless node.values.nil?
|
737
|
+
node.error "@values must be nil for a branch node"
|
738
|
+
return nil
|
739
|
+
end
|
740
|
+
unless node.children.size == node.keys.size + 1
|
542
741
|
node.error "Key count (#{node.keys.size}) must be one " +
|
543
742
|
"less than children count (#{node.children.size})"
|
544
|
-
|
743
|
+
return nil
|
545
744
|
end
|
546
745
|
node.children.each_with_index do |child, i|
|
547
746
|
unless child.is_a?(BTreeNodeLink)
|
548
747
|
node.error "Child #{i} is of class #{child.class} " +
|
549
748
|
"instead of BTreeNodeLink"
|
550
|
-
return
|
749
|
+
return nil
|
551
750
|
end
|
552
751
|
unless child.parent.is_a?(BTreeNodeLink)
|
553
752
|
node.error "Parent reference of child #{i} is of class " +
|
554
|
-
"#{child.class} instead of BTreeNodeLink"
|
555
|
-
return
|
753
|
+
"#{child.parent.class} instead of BTreeNodeLink"
|
754
|
+
return nil
|
556
755
|
end
|
557
|
-
if child
|
756
|
+
if child == node
|
558
757
|
node.error "Child #{i} points to self"
|
559
|
-
return
|
758
|
+
return nil
|
560
759
|
end
|
561
760
|
if stack.include?(child)
|
562
761
|
node.error "Child #{i} points to ancester node"
|
563
|
-
return
|
762
|
+
return nil
|
564
763
|
end
|
565
764
|
unless child.parent == node
|
566
765
|
node.error "Child #{i} does not have parent pointing " +
|
567
766
|
"to this node"
|
568
|
-
return
|
767
|
+
return nil
|
768
|
+
end
|
769
|
+
if i > 0
|
770
|
+
unless node.children[i - 1].next_sibling == child
|
771
|
+
node.error "next_sibling of node " +
|
772
|
+
"#{node.children[i - 1].node_address} " +
|
773
|
+
"must point to node #{child.node_address}"
|
774
|
+
return nil
|
775
|
+
end
|
776
|
+
end
|
777
|
+
if i < node.children.length - 1
|
778
|
+
unless child == node.children[i + 1].prev_sibling
|
779
|
+
node.error "prev_sibling of node " +
|
780
|
+
"#{node.children[i + 1].node_address} " +
|
781
|
+
"must point to node #{child.node_address}"
|
782
|
+
return nil
|
783
|
+
end
|
569
784
|
end
|
570
785
|
end
|
571
786
|
end
|
@@ -578,25 +793,26 @@ module PEROBS
|
|
578
793
|
node.error "Child #{node.children[index].node_address} " +
|
579
794
|
"has too large key #{node.children[index].keys.last}. " +
|
580
795
|
"Must be smaller than #{node.keys[index]}."
|
581
|
-
return
|
796
|
+
return nil
|
582
797
|
end
|
583
|
-
unless node.children[position].keys.first >=
|
584
|
-
node.keys[index]
|
798
|
+
unless node.children[position].keys.first >= node.keys[index]
|
585
799
|
node.error "Child #{node.children[position].node_address} " +
|
586
800
|
"has too small key #{node.children[position].keys.first}. " +
|
587
801
|
"Must be larger than or equal to #{node.keys[index]}."
|
588
|
-
return
|
802
|
+
return nil
|
589
803
|
end
|
590
804
|
else
|
591
805
|
if block_given?
|
592
806
|
# If a block was given, call this block with the key and value.
|
593
|
-
|
807
|
+
unless yield(node.keys[index], node.values[index])
|
808
|
+
return nil
|
809
|
+
end
|
594
810
|
end
|
595
811
|
end
|
596
812
|
end
|
597
813
|
end
|
598
814
|
|
599
|
-
|
815
|
+
stats
|
600
816
|
end
|
601
817
|
|
602
818
|
def is_top?
|
@@ -652,6 +868,7 @@ module PEROBS
|
|
652
868
|
|
653
869
|
str = (is_last_child ? ' ' : ' |') + str
|
654
870
|
node = node.parent
|
871
|
+
node = node.get_node if node
|
655
872
|
end
|
656
873
|
|
657
874
|
str
|
@@ -671,13 +888,37 @@ module PEROBS
|
|
671
888
|
s += ' ^@'
|
672
889
|
end
|
673
890
|
end
|
891
|
+
if @prev_sibling
|
892
|
+
begin
|
893
|
+
s += " <#{@prev_sibling.node_address}"
|
894
|
+
rescue
|
895
|
+
s += ' <@'
|
896
|
+
end
|
897
|
+
end
|
898
|
+
if @next_sibling
|
899
|
+
begin
|
900
|
+
s += " >#{@next_sibling.node_address}"
|
901
|
+
rescue
|
902
|
+
s += ' >@'
|
903
|
+
end
|
904
|
+
end
|
674
905
|
|
675
906
|
s
|
676
907
|
end
|
677
908
|
|
909
|
+
def tree_level
|
910
|
+
level = 1
|
911
|
+
node = self
|
912
|
+
while (node = node.parent)
|
913
|
+
level += 1
|
914
|
+
end
|
915
|
+
|
916
|
+
level
|
917
|
+
end
|
918
|
+
|
919
|
+
|
678
920
|
def error(msg)
|
679
|
-
PEROBS.log.error "Error in BTreeNode @#{@node_address}: #{msg}
|
680
|
-
@tree.to_s
|
921
|
+
PEROBS.log.error "Error in BTreeNode @#{@node_address}: #{msg}"
|
681
922
|
end
|
682
923
|
|
683
924
|
def write_node
|
@@ -685,7 +926,9 @@ module PEROBS
|
|
685
926
|
@is_leaf ? 1 : 0,
|
686
927
|
@keys.size,
|
687
928
|
@is_leaf ? @values.size : @children.size,
|
688
|
-
@parent ? @parent.node_address : 0
|
929
|
+
@parent ? @parent.node_address : 0,
|
930
|
+
@prev_sibling ? @prev_sibling.node_address : 0,
|
931
|
+
@next_sibling ? @next_sibling.node_address : 0
|
689
932
|
] + @keys + ::Array.new(@tree.order - @keys.size, 0)
|
690
933
|
|
691
934
|
if @is_leaf
|
@@ -708,24 +951,107 @@ module PEROBS
|
|
708
951
|
|
709
952
|
private
|
710
953
|
|
711
|
-
def
|
712
|
-
|
713
|
-
|
954
|
+
def min_keys
|
955
|
+
@tree.order / 2
|
956
|
+
end
|
957
|
+
|
958
|
+
def link(node)
|
959
|
+
return nil if node.nil?
|
960
|
+
|
961
|
+
if node.is_a?(BTreeNodeLink)
|
962
|
+
return node
|
963
|
+
elsif node.is_a?(BTreeNode) || node.is_a?(Integer)
|
964
|
+
return BTreeNodeLink.new(@tree, node)
|
965
|
+
else
|
966
|
+
PEROBS.log.fatal "Node link must be a BTreeNode, not a #{node.class}"
|
967
|
+
end
|
968
|
+
end
|
969
|
+
|
970
|
+
# Try to borrow an element from the preceding sibling.
|
971
|
+
# @return [True or False] True if an element was borrowed, false
|
972
|
+
# otherwise.
|
973
|
+
def borrow_from_previous_sibling(prev_node)
|
974
|
+
if prev_node.keys.length - 1 > min_keys
|
975
|
+
index = @parent.search_node_index(self) - 1
|
976
|
+
|
977
|
+
@tree.node_cache.insert(self)
|
978
|
+
@tree.node_cache.insert(prev_node.get_node)
|
979
|
+
@tree.node_cache.insert(@parent.get_node)
|
980
|
+
if @is_leaf
|
981
|
+
# Move the last key of the previous node to the front of this node
|
982
|
+
@keys.unshift(prev_node.keys.pop)
|
983
|
+
# Register the new lead key of this node with its parent
|
984
|
+
@parent.keys[index] = @keys.first
|
985
|
+
# Move the last value of the previous node to the front of this node
|
986
|
+
@values.unshift(prev_node.values.pop)
|
987
|
+
else
|
988
|
+
# For branch nodes the branch key will be the borrowed key.
|
989
|
+
@keys.unshift(@parent.keys[index])
|
990
|
+
# And the last key of the previous key will become the new branch
|
991
|
+
# key for this node.
|
992
|
+
@parent.keys[index] = prev_node.keys.pop
|
993
|
+
# Move the last child of the previous node to the front of this node
|
994
|
+
@children.unshift(node = prev_node.children.pop)
|
995
|
+
node.parent = link(self)
|
996
|
+
end
|
997
|
+
|
998
|
+
return true
|
999
|
+
end
|
1000
|
+
|
1001
|
+
false
|
1002
|
+
end
|
1003
|
+
|
1004
|
+
# Try to borrow an element from the next sibling.
|
1005
|
+
# @return [True or False] True if an element was borrowed, false
|
1006
|
+
# otherwise.
|
1007
|
+
def borrow_from_next_sibling(next_node)
|
1008
|
+
if next_node.keys.length - 1 > min_keys
|
1009
|
+
# The next sibling now has a new lead key that requires the branch key
|
1010
|
+
# to be updated in the parent node.
|
1011
|
+
index = next_node.parent.search_node_index(next_node) - 1
|
1012
|
+
|
1013
|
+
@tree.node_cache.insert(self)
|
1014
|
+
@tree.node_cache.insert(next_node.get_node)
|
1015
|
+
@tree.node_cache.insert(next_node.parent.get_node)
|
1016
|
+
if @is_leaf
|
1017
|
+
# Move the first key of the next node to the end of the this node
|
1018
|
+
@keys << next_node.keys.shift
|
1019
|
+
# Register the new lead key of next_node with its parent
|
1020
|
+
next_node.parent.keys[index] = next_node.keys.first
|
1021
|
+
# Move the first value of the next node to the end of this node
|
1022
|
+
@values << next_node.values.shift
|
1023
|
+
else
|
1024
|
+
# For branch nodes we need to get the lead key from the parent of
|
1025
|
+
# next_node.
|
1026
|
+
@keys << next_node.parent.keys[index]
|
1027
|
+
# The old lead key of next_node becomes the branch key in the parent
|
1028
|
+
# of next_node. And the keys of next_node are shifted.
|
1029
|
+
next_node.parent.keys[index] = next_node.keys.shift
|
1030
|
+
# Move the first child of the next node to the end of this node
|
1031
|
+
@children << (node = next_node.children.shift)
|
1032
|
+
node.parent = link(self)
|
1033
|
+
end
|
1034
|
+
|
1035
|
+
return true
|
1036
|
+
end
|
1037
|
+
|
1038
|
+
false
|
1039
|
+
end
|
1040
|
+
|
1041
|
+
def update_branch_key(old_key)
|
1042
|
+
new_key = @keys.first
|
1043
|
+
return unless (node = @parent)
|
714
1044
|
|
715
|
-
|
716
|
-
|
717
|
-
|
1045
|
+
while node
|
1046
|
+
if (index = node.keys.index(old_key))
|
1047
|
+
node.keys[index] = new_key
|
1048
|
+
@tree.node_cache.insert(node.get_node)
|
1049
|
+
return
|
1050
|
+
end
|
1051
|
+
node = node.parent
|
718
1052
|
end
|
719
|
-
# The child that corresponds to the key at parent_index has an index of
|
720
|
-
# parent_index + 1! The lower_sibling has an child index of
|
721
|
-
# parent_index and the upper sibling has a child index of parent_index +
|
722
|
-
# 2.
|
723
|
-
lower_sibling = parent_index < 1 ?
|
724
|
-
nil : @parent.children[parent_index - 1]
|
725
|
-
upper_sibling = parent_index >= (@parent.children.size - 1) ?
|
726
|
-
nil : @parent.children[parent_index + 1]
|
727
1053
|
|
728
|
-
|
1054
|
+
# The smallest element has no branch key.
|
729
1055
|
end
|
730
1056
|
|
731
1057
|
end
|