perobs 4.0.0 → 4.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +5 -5
  2. data/README.md +27 -16
  3. data/lib/perobs/Array.rb +66 -19
  4. data/lib/perobs/BTree.rb +106 -15
  5. data/lib/perobs/BTreeBlob.rb +4 -3
  6. data/lib/perobs/BTreeDB.rb +5 -4
  7. data/lib/perobs/BTreeNode.rb +482 -156
  8. data/lib/perobs/BTreeNodeLink.rb +10 -0
  9. data/lib/perobs/BigArray.rb +285 -0
  10. data/lib/perobs/BigArrayNode.rb +1002 -0
  11. data/lib/perobs/BigHash.rb +246 -0
  12. data/lib/perobs/BigTree.rb +197 -0
  13. data/lib/perobs/BigTreeNode.rb +873 -0
  14. data/lib/perobs/Cache.rb +48 -10
  15. data/lib/perobs/ConsoleProgressMeter.rb +61 -0
  16. data/lib/perobs/DataBase.rb +4 -3
  17. data/lib/perobs/DynamoDB.rb +57 -15
  18. data/lib/perobs/EquiBlobsFile.rb +155 -50
  19. data/lib/perobs/FNV_Hash_1a_64.rb +54 -0
  20. data/lib/perobs/FlatFile.rb +519 -227
  21. data/lib/perobs/FlatFileBlobHeader.rb +113 -54
  22. data/lib/perobs/FlatFileDB.rb +49 -23
  23. data/lib/perobs/FuzzyStringMatcher.rb +175 -0
  24. data/lib/perobs/Hash.rb +127 -33
  25. data/lib/perobs/IDList.rb +144 -0
  26. data/lib/perobs/IDListPage.rb +107 -0
  27. data/lib/perobs/IDListPageFile.rb +180 -0
  28. data/lib/perobs/IDListPageRecord.rb +142 -0
  29. data/lib/perobs/Object.rb +18 -15
  30. data/lib/perobs/ObjectBase.rb +46 -5
  31. data/lib/perobs/PersistentObjectCache.rb +57 -68
  32. data/lib/perobs/PersistentObjectCacheLine.rb +24 -12
  33. data/lib/perobs/ProgressMeter.rb +97 -0
  34. data/lib/perobs/SpaceManager.rb +273 -0
  35. data/lib/perobs/SpaceTree.rb +21 -12
  36. data/lib/perobs/SpaceTreeNode.rb +53 -61
  37. data/lib/perobs/Store.rb +264 -145
  38. data/lib/perobs/version.rb +1 -1
  39. data/lib/perobs.rb +2 -0
  40. data/perobs.gemspec +4 -4
  41. data/test/Array_spec.rb +15 -6
  42. data/test/BTree_spec.rb +6 -2
  43. data/test/BigArray_spec.rb +261 -0
  44. data/test/BigHash_spec.rb +152 -0
  45. data/test/BigTreeNode_spec.rb +153 -0
  46. data/test/BigTree_spec.rb +259 -0
  47. data/test/EquiBlobsFile_spec.rb +105 -1
  48. data/test/FNV_Hash_1a_64_spec.rb +59 -0
  49. data/test/FlatFileDB_spec.rb +198 -14
  50. data/test/FuzzyStringMatcher_spec.rb +261 -0
  51. data/test/Hash_spec.rb +13 -3
  52. data/test/IDList_spec.rb +77 -0
  53. data/test/LegacyDBs/LegacyDB.rb +155 -0
  54. data/test/LegacyDBs/version_3/class_map.json +1 -0
  55. data/test/LegacyDBs/version_3/config.json +1 -0
  56. data/test/LegacyDBs/version_3/database.blobs +0 -0
  57. data/test/LegacyDBs/version_3/database_spaces.blobs +0 -0
  58. data/test/LegacyDBs/version_3/index.blobs +0 -0
  59. data/test/LegacyDBs/version_3/version +1 -0
  60. data/test/LockFile_spec.rb +9 -6
  61. data/test/SpaceManager_spec.rb +176 -0
  62. data/test/SpaceTree_spec.rb +4 -1
  63. data/test/Store_spec.rb +305 -203
  64. data/test/spec_helper.rb +9 -4
  65. metadata +57 -16
  66. data/lib/perobs/BTreeNodeCache.rb +0 -109
  67. data/lib/perobs/TreeDB.rb +0 -277
@@ -0,0 +1,873 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # = BigTreeNode.rb -- Persistent Ruby Object Store
4
+ #
5
+ # Copyright (c) 2016, 2017 by Chris Schlaeger <chris@taskjuggler.org>
6
+ #
7
+ # MIT License
8
+ #
9
+ # Permission is hereby granted, free of charge, to any person obtaining
10
+ # a copy of this software and associated documentation files (the
11
+ # "Software"), to deal in the Software without restriction, including
12
+ # without limitation the rights to use, copy, modify, merge, publish,
13
+ # distribute, sublicense, and/or sell copies of the Software, and to
14
+ # permit persons to whom the Software is furnished to do so, subject to
15
+ # the following conditions:
16
+ #
17
+ # The above copyright notice and this permission notice shall be
18
+ # included in all copies or substantial portions of the Software.
19
+ #
20
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
+
28
+ require 'perobs/Object'
29
+ require 'perobs/Array'
30
+
31
+ module PEROBS
32
+
33
+ # The BigTreeNode class provides the BTree nodes for the BigTree objects.
34
+ # A node can either be a branch node or a leaf node. Branch nodes don't
35
+ # store values, only references to child nodes. Leaf nodes don't have child
36
+ # nodes but store the actual values. All nodes store a list of keys that are
37
+ # used to naviate the tree and find the values. A key is either directly
38
+ # associated with a value or determines the lower key boundary for the
39
+ # following child node.
40
+ class BigTreeNode < PEROBS::Object
41
+
42
+ attr_persist :tree, :parent, :keys, :values, :children,
43
+ :prev_sibling, :next_sibling
44
+
45
+ # Internal constructor. Use Store.new(BigTreeNode, ...) instead.
46
+ # @param p [Handle]
47
+ # @param tree [BigTree] The tree this node should belong to
48
+ # @param is_leaf [Boolean] True if a leaf node should be created, false
49
+ # for a branch node.
50
+ # @param parent [BigTreeNode] Parent node
51
+ # @param prev_sibling [BigTreeNode] Previous sibling
52
+ # @param next_sibling [BigTreeNode] Next sibling
53
+ def initialize(p, tree, is_leaf, parent = nil, prev_sibling = nil,
54
+ next_sibling = nil)
55
+ super(p)
56
+ self.tree = tree
57
+ self.parent = parent
58
+ self.keys = @store.new(PEROBS::Array)
59
+
60
+ if is_leaf
61
+ # Create a new leaf node. It stores values and has no children.
62
+ self.values = @store.new(PEROBS::Array)
63
+ self.children = nil
64
+ else
65
+ # Create a new tree node. It doesn't store values and can have child
66
+ # nodes.
67
+ self.children = @store.new(PEROBS::Array)
68
+ self.values = nil
69
+ end
70
+ # Link the neighboring siblings to the newly inserted node. If the node
71
+ # is a leaf node and has no sibling on a side we also must register it
72
+ # as first or last leaf with the BigTree object.
73
+ if (self.prev_sibling = prev_sibling)
74
+ @prev_sibling.next_sibling = myself
75
+ elsif is_leaf?
76
+ @tree.first_leaf = myself
77
+ end
78
+ if (self.next_sibling = next_sibling)
79
+ @next_sibling.prev_sibling = myself
80
+ elsif is_leaf?
81
+ @tree.last_leaf = myself
82
+ end
83
+ end
84
+
85
+ # @return [Boolean] True if this is a leaf node, false otherwise.
86
+ def is_leaf?
87
+ @children.nil?
88
+ end
89
+
90
+ # Insert or replace the given value by using the key as unique address.
91
+ # @param key [Integer] Unique key to retrieve the value
92
+ # @param value [Integer] value to insert
93
+ def insert(key, value)
94
+ node = myself
95
+
96
+ # Traverse the tree to find the right node to add or replace the value.
97
+ while node do
98
+ # All nodes that we find on the way that are full will be split into
99
+ # two half-full nodes.
100
+ if node.keys.size >= @tree.node_size
101
+ node = node.split_node
102
+ end
103
+
104
+ # Once we have reached a leaf node we can insert or replace the value.
105
+ if node.is_leaf?
106
+ return node.insert_element(key, value)
107
+ else
108
+ # Descend into the right child node to add the value to.
109
+ node = node.children[node.search_key_index(key)]
110
+ end
111
+ end
112
+
113
+ PEROBS.log.fatal "Could not find proper node to insert into"
114
+ end
115
+
116
+ # Return the value that matches the given key or return nil if they key is
117
+ # unknown.
118
+ # @param key [Integer] key to search for
119
+ # @return [Integer or nil] value that matches the key
120
+ def get(key)
121
+ node = self
122
+
123
+ while node do
124
+ # Find index of the entry that best fits the key.
125
+ i = node.search_key_index(key)
126
+ if node.is_leaf?
127
+ # This is a leaf node. Check if there is an exact match for the
128
+ # given key and return the corresponding value or nil.
129
+ return node.keys[i] == key ? node.values[i] : nil
130
+ end
131
+
132
+ # Descend into the right child node to continue the search.
133
+ node = node.children[i]
134
+ end
135
+
136
+ PEROBS.log.fatal "Could not find proper node to get from while " +
137
+ "looking for key #{key}"
138
+ end
139
+
140
+ # Return the node chain from the root to the leaf node storing the
141
+ # key/value pair.
142
+ # @param key [Integer] key to search for
143
+ # @return [Array of BigTreeNode] node list (may be empty)
144
+ def node_chain(key)
145
+ node = myself
146
+ list = [ node ]
147
+
148
+ while node do
149
+ # Find index of the entry that best fits the key.
150
+ i = node.search_key_index(key)
151
+ if node.is_leaf?
152
+ # This is a leaf node. Check if there is an exact match for the
153
+ # given key and return the corresponding value or nil.
154
+ return node.keys[i] == key ? list : []
155
+ end
156
+
157
+ # Add current node to chain.
158
+ list << node
159
+ # Descend into the right child node to continue the search.
160
+ node = node.children[i]
161
+ end
162
+
163
+ PEROBS.log.fatal "Could not find node chain for key #{key}"
164
+ end
165
+
166
+ # Return if given key is stored in the node.
167
+ # @param key [Integer] key to search for
168
+ # @return [Boolean] True if key was found, false otherwise
169
+ def has_key?(key)
170
+ node = self
171
+
172
+ while node do
173
+ # Find index of the entry that best fits the key.
174
+ i = node.search_key_index(key)
175
+ if node.is_leaf?
176
+ # This is a leaf node. Check if there is an exact match for the
177
+ # given key and return the corresponding value or nil.
178
+ return node.keys[i] == key
179
+ end
180
+
181
+ # Descend into the right child node to continue the search.
182
+ node = node.children[i]
183
+ end
184
+
185
+ PEROBS.log.fatal "Could not find proper node to get from while " +
186
+ "looking for key #{key}"
187
+ end
188
+
189
+ # Return the value that matches the given key and remove the value from
190
+ # the tree. Return nil if the key is unknown.
191
+ # @param key [Integer] key to search for
192
+ # @return [Object] value that matches the key
193
+ def remove(key)
194
+ node = self
195
+
196
+ while node do
197
+ # Find index of the entry that best fits the key.
198
+ i = node.search_key_index(key)
199
+ if node.is_leaf?
200
+ # This is a leaf node. Check if there is an exact match for the
201
+ # given key and return the corresponding value or nil.
202
+ if node.keys[i] == key
203
+ @tree.entry_counter -= 1
204
+ return node.remove_element(i)
205
+ else
206
+ return nil
207
+ end
208
+ end
209
+
210
+ # Descend into the right child node to continue the search.
211
+ node = node.children[i]
212
+ end
213
+
214
+ PEROBS.log.fatal 'Could not find proper node to remove from'
215
+ end
216
+
217
+ # Iterate over all the key/value pairs in this node and all sub-nodes.
218
+ # @yield [key, value]
219
+ def each
220
+ traverse do |node, position, stack|
221
+ if node.is_leaf? && position < node.keys.size
222
+ yield(node.keys[position], node.values[position])
223
+ end
224
+ end
225
+ end
226
+
227
+ # Iterate over all the key/value pairs of the node.
228
+ # @yield [key, value]
229
+ def each_element
230
+ return self unless is_leaf?
231
+
232
+ 0.upto(@keys.length - 1) do |i|
233
+ yield(@keys[i], @values[i])
234
+ end
235
+ end
236
+
237
+ # Iterate over all the key/value pairs of the node in reverse order.
238
+ # @yield [key, value]
239
+ def reverse_each_element
240
+ return self unless is_leaf?
241
+
242
+ (@keys.length - 1).downto(0) do |i|
243
+ yield(@keys[i], @values[i])
244
+ end
245
+ end
246
+
247
+ # Check consistency of the node and all subsequent nodes. In case an error
248
+ # is found, a message is logged and false is returned.
249
+ # @yield [key, value]
250
+ # @return [Boolean] true if tree has no errors
251
+ def check
252
+ branch_depth = nil
253
+
254
+ traverse do |node, position, stack|
255
+ if position == 0
256
+ if node.parent
257
+ # After a split the nodes will only have half the maximum keys.
258
+ # For branch nodes one of the split nodes will have even 1 key
259
+ # less as this will become the branch key in a parent node.
260
+ if node.keys.size < min_keys - (node.is_leaf? ? 0 : 1)
261
+ node.error "BigTree node #{node._id} has too few keys"
262
+ return false
263
+ end
264
+ end
265
+
266
+ if node.keys.size > @tree.node_size
267
+ node.error "BigTree node must not have more then " +
268
+ "#{@tree.node_size} keys, but has #{node.keys.size} keys"
269
+ return false
270
+ end
271
+
272
+ last_key = nil
273
+ node.keys.each do |key|
274
+ if last_key && key < last_key
275
+ node.error "Keys are not increasing monotoneously: " +
276
+ "#{node.keys.inspect}"
277
+ return false
278
+ end
279
+ last_key = key
280
+ end
281
+
282
+ if node.is_leaf?
283
+ if branch_depth
284
+ unless branch_depth == stack.size
285
+ node.error "All leaf nodes must have same distance from root"
286
+ return false
287
+ end
288
+ else
289
+ branch_depth = stack.size
290
+ end
291
+ if node.prev_sibling.nil?
292
+ if @tree.first_leaf != node
293
+ node.error "Leaf node #{node._id} has no previous sibling " +
294
+ "but is not the first leaf of the tree"
295
+ return false
296
+ end
297
+ elsif node.prev_sibling.next_sibling != node
298
+ node.error "next_sibling of previous sibling does not point to " +
299
+ "this node"
300
+ return false
301
+ end
302
+ if node.next_sibling.nil?
303
+ if @tree.last_leaf != node
304
+ node.error "Leaf node #{node._id} has no next sibling " +
305
+ "but is not the last leaf of the tree"
306
+ return false
307
+ end
308
+ elsif node.next_sibling.prev_sibling != node
309
+ node.error "previous_sibling of next sibling does not point to " +
310
+ "this node"
311
+ return false
312
+ end
313
+ unless node.keys.size == node.values.size
314
+ node.error "Key count (#{node.keys.size}) and value " +
315
+ "count (#{node.values.size}) don't match"
316
+ return false
317
+ end
318
+ if node.children
319
+ node.error "children must be nil for a leaf node"
320
+ return false
321
+ end
322
+ else
323
+ if node.values
324
+ node.error "values must be nil for a branch node"
325
+ return false
326
+ end
327
+ unless node.children.size == node.keys.size + 1
328
+ node.error "Key count (#{node.keys.size}) must be one " +
329
+ "less than children count (#{node.children.size})"
330
+ return false
331
+ end
332
+ node.children.each_with_index do |child, i|
333
+ unless child.is_a?(BigTreeNode)
334
+ node.error "Child #{i} is of class #{child.class} " +
335
+ "instead of BigTreeNode"
336
+ return false
337
+ end
338
+ unless child.parent.is_a?(BigTreeNode)
339
+ node.error "Parent reference of child #{i} is of class " +
340
+ "#{child.class} instead of BigTreeNode"
341
+ return false
342
+ end
343
+ if child == node
344
+ node.error "Child #{i} point to self"
345
+ return false
346
+ end
347
+ if stack.include?(child)
348
+ node.error "Child #{i} points to ancester node"
349
+ return false
350
+ end
351
+ unless child.parent == node
352
+ node.error "Child #{i} does not have parent pointing " +
353
+ "to this node"
354
+ return false
355
+ end
356
+ if i > 0
357
+ unless node.children[i - 1].next_sibling == child
358
+ node.error "next_sibling of node " +
359
+ "#{node.children[i - 1]._id} " +
360
+ "must point to node #{child._id}"
361
+ return false
362
+ end
363
+ end
364
+ if i < node.children.length - 1
365
+ unless child == node.children[i + 1].prev_sibling
366
+ node.error "prev_sibling of node " +
367
+ "#{node.children[i + 1]._id} " +
368
+ "must point to node #{child._id}"
369
+ return false
370
+ end
371
+ end
372
+ end
373
+ end
374
+ elsif position <= node.keys.size
375
+ # These checks are done after we have completed the respective child
376
+ # node with index 'position - 1'.
377
+ index = position - 1
378
+ if node.is_leaf?
379
+ if block_given?
380
+ # If a block was given, call this block with the key and value.
381
+ return false unless yield(node.keys[index], node.values[index])
382
+ end
383
+ else
384
+ unless node.children[index].keys.last < node.keys[index]
385
+ node.error "Child #{node.children[index]._id} " +
386
+ "has too large key #{node.children[index].keys.last}. " +
387
+ "Must be smaller than #{node.keys[index]}."
388
+ return false
389
+ end
390
+ unless node.children[position].keys.first >= node.keys[index]
391
+ node.error "Child #{node.children[position]._id} " +
392
+ "has too small key #{node.children[position].keys.first}. " +
393
+ "Must be larger than or equal to #{node.keys[index]}."
394
+ return false
395
+ end
396
+ end
397
+ end
398
+ end
399
+
400
+ true
401
+ end
402
+
403
+ # @return [String] Human reable form of the sub-tree.
404
+ def to_s
405
+ str = ''
406
+
407
+ traverse do |node, position, stack|
408
+ if position == 0
409
+ begin
410
+ str += "#{node.parent ? node.parent.tree_prefix + ' +' : 'o'}" +
411
+ "#{node.tree_branch_mark}-" +
412
+ "#{node.keys.first.nil? ? '--' : 'v-'}#{node.tree_summary}\n"
413
+ rescue => e
414
+ str += "@@@@@@@@@@: #{e.message}\n"
415
+ end
416
+ else
417
+ begin
418
+ if node.is_leaf?
419
+ if node.keys[position - 1]
420
+ str += "#{node.tree_prefix} |" +
421
+ "[#{node.keys[position - 1]}, " +
422
+ "#{node.values[position - 1]}]\n"
423
+ end
424
+ else
425
+ if node.keys[position - 1]
426
+ str += "#{node.tree_prefix} #{node.keys[position - 1]}\n"
427
+ end
428
+ end
429
+ rescue => e
430
+ str += "@@@@@@@@@@: #{e.message}\n"
431
+ end
432
+ end
433
+ end
434
+
435
+ str
436
+ end
437
+
438
+ # Split the current node into two nodes. The upper half of the elements
439
+ # will be moved into a newly created node. This node will retain the lower
440
+ # half.
441
+ # @return [BigTreeNode] common parent of the two nodes
442
+ def split_node
443
+ unless @parent
444
+ # The node is the root node. We need to create a parent node first.
445
+ self.parent = @store.new(BigTreeNode, @tree, false)
446
+ @parent.children[0] = myself
447
+ @tree.root = @parent
448
+ end
449
+
450
+ # Create the new sibling that will take the 2nd half of the
451
+ # node content.
452
+ sibling = @store.new(BigTreeNode, @tree, is_leaf?, @parent, myself,
453
+ @next_sibling)
454
+ # Determine the index of the middle element that gets moved to the
455
+ # parent. The node size must be an uneven number.
456
+ mid = @keys.size / 2
457
+ # Insert the middle element key into the parent node
458
+ @parent.insert_element(@keys[mid], sibling)
459
+ if is_leaf?
460
+ # Copy the keys and values from the mid element onwards into the new
461
+ # sibling node.
462
+ sibling.keys += @keys[mid..-1]
463
+ sibling.values += @values[mid..-1]
464
+ # Delete the copied keys and values from this node.
465
+ @values.slice!(mid..-1)
466
+ else
467
+ # Copy the keys from after the mid value onwards to the new sibling
468
+ # node.
469
+ sibling.keys += @keys[mid + 1..-1]
470
+ # Same for the children.
471
+ sibling.children += @children[mid + 1..-1]
472
+ # Reparent the children to the new sibling parent.
473
+ sibling.children.each { |c| c.parent = sibling }
474
+ # And delete the copied children references.
475
+ @children.slice!(mid + 1..-1)
476
+ end
477
+ # Delete the copied keys from this node.
478
+ @keys.slice!(mid..-1)
479
+
480
+ @parent
481
+ end
482
+
483
+ # Insert the given value or child into the current node using the key as
484
+ # index.
485
+ # @param key [Integer] key to address the value or child
486
+ # @param child_or_value [Integer or BigTreeNode] value or BigTreeNode
487
+ # @return [Boolean] true if new element, false if override existing
488
+ # element
489
+ def insert_element(key, child_or_value)
490
+ if @keys.size >= @tree.node_size
491
+ PEROBS.log.fatal "Cannot insert into a full BigTreeNode: #{@keys.size}"
492
+ end
493
+
494
+ i = search_key_index(key)
495
+ if @keys[i] == key
496
+ # Overwrite existing entries
497
+ @keys[i] = key
498
+ if is_leaf?
499
+ @values[i] = child_or_value
500
+ else
501
+ @children[i + 1] = child_or_value
502
+ end
503
+ else
504
+ # Create a new entry
505
+ @keys.insert(i, key)
506
+ if is_leaf?
507
+ @values.insert(i, child_or_value)
508
+ @tree.entry_counter += 1
509
+ else
510
+ @children.insert(i + 1, child_or_value)
511
+ end
512
+ end
513
+ end
514
+
515
+ # Remove the element from a leaf node at the given index.
516
+ # @param index [Integer] The index of the entry to be removed
517
+ # @return [Object] The removed value
518
+ def remove_element(index)
519
+ # Delete the key at the specified index.
520
+ unless (key = @keys.delete_at(index))
521
+ PEROBS.log.fatal "Could not remove element #{index} from BigTreeNode " +
522
+ "@#{@_id}"
523
+ end
524
+ update_branch_key(key) if index == 0
525
+
526
+ # Delete the corresponding value.
527
+ removed_value = @values.delete_at(index)
528
+ if @keys.length < min_keys
529
+ if @prev_sibling && @prev_sibling.parent == @parent
530
+ borrow_from_previous_sibling(@prev_sibling) ||
531
+ @prev_sibling.merge_with_leaf_node(myself)
532
+ elsif @next_sibling && @next_sibling.parent == @parent
533
+ borrow_from_next_sibling(@next_sibling) ||
534
+ merge_with_leaf_node(@next_sibling)
535
+ elsif @parent
536
+ PEROBS.log.fatal "Cannot not find adjecent leaf siblings"
537
+ end
538
+ end
539
+
540
+ # The merge has potentially invalidated this node. After this method has
541
+ # been called this copy of the node should no longer be used.
542
+ removed_value
543
+ end
544
+
545
+ # Remove the specified node from this branch node.
546
+ # @param node [BigTreeNode] The child to remove
547
+ def remove_child(node)
548
+ unless (index = search_node_index(node))
549
+ PEROBS.log.fatal "Cannot remove child #{node._id} from node #{@_id}"
550
+ end
551
+
552
+ if index == 0
553
+ # Removing the first child is a bit more complicated as the
554
+ # corresponding branch key is in a parent node.
555
+ key = @keys.shift
556
+ update_branch_key(key)
557
+ else
558
+ # For all other children we can just remove the corresponding key.
559
+ @keys.delete_at(index - 1)
560
+ end
561
+
562
+ # Remove the child node link.
563
+ child = @children.delete_at(index)
564
+ # If we remove the first or last leaf node we must update the reference
565
+ # in the BigTree object.
566
+ @tree.first_leaf = child.next_sibling if child == @tree.first_leaf
567
+ @tree.last_leaf = child.prev_sibling if child == @tree.last_leaf
568
+ # Unlink the neighbouring siblings from the child
569
+ child.prev_sibling.next_sibling = child.next_sibling if child.prev_sibling
570
+ child.next_sibling.prev_sibling = child.prev_sibling if child.next_sibling
571
+
572
+ if @keys.length < min_keys
573
+ # The node has become too small. Try borrowing a node from an adjecent
574
+ # sibling or merge with an adjecent node.
575
+ if @prev_sibling && @prev_sibling.parent == @parent
576
+ borrow_from_previous_sibling(@prev_sibling) ||
577
+ @prev_sibling.merge_with_branch_node(myself)
578
+ elsif @next_sibling && @next_sibling.parent == @parent
579
+ borrow_from_next_sibling(@next_sibling) ||
580
+ merge_with_branch_node(@next_sibling)
581
+ end
582
+ end
583
+
584
+ if @parent.nil? && @children.length <= 1
585
+ # If the node just below the root only has one child it will become
586
+ # the new root node.
587
+ new_root = @children.first
588
+ new_root.parent = nil
589
+ @tree.root = new_root
590
+ end
591
+ end
592
+
593
+ def merge_with_leaf_node(node)
594
+ if @keys.length + node.keys.length > @tree.node_size
595
+ PEROBS.log.fatal "Leaf nodes are too big to merge"
596
+ end
597
+
598
+ self.keys += node.keys
599
+ self.values += node.values
600
+
601
+ node.parent.remove_child(node)
602
+ end
603
+
604
+ def merge_with_branch_node(node)
605
+ if @keys.length + 1 + node.keys.length > @tree.node_size
606
+ PEROBS.log.fatal "Branch nodes are too big to merge"
607
+ end
608
+
609
+ index = @parent.search_node_index(node) - 1
610
+ self.keys << @parent.keys[index]
611
+ self.keys += node.keys
612
+ node.children.each { |c| c.parent = myself }
613
+ self.children += node.children
614
+
615
+ node.parent.remove_child(node)
616
+ end
617
+
618
+ # Search the keys of the node that fits the given key. The result is
619
+ # either the index of an exact match or the index of the position where
620
+ # the given key would have to be inserted.
621
+ # @param key [Integer] key to search for
622
+ # @return [Integer] Index of the matching key or the insert position.
623
+ def search_key_index(key)
624
+ # Handle special case for empty keys list.
625
+ return 0 if @keys.empty?
626
+
627
+ # Keys are unique and always sorted. Use a binary search to find the
628
+ # index that fits the given key.
629
+ li = pi = 0
630
+ ui = @keys.size - 1
631
+ while li <= ui
632
+ # The pivot element is always in the middle between the lower and upper
633
+ # index.
634
+ pi = li + (ui - li) / 2
635
+
636
+ if key < @keys[pi]
637
+ # The pivot element is smaller than the key. Set the upper index to
638
+ # the pivot index.
639
+ ui = pi - 1
640
+ elsif key > @keys[pi]
641
+ # The pivot element is larger than the key. Set the lower index to
642
+ # the pivot index.
643
+ li = pi + 1
644
+ else
645
+ # We've found an exact match. For leaf nodes return the found index.
646
+ # For branch nodes we have to add one to the index since the larger
647
+ # child is the right one.
648
+ return is_leaf? ? pi : pi + 1
649
+ end
650
+ end
651
+ # No exact match was found. For the insert operaton we need to return
652
+ # the index of the first key that is larger than the given key.
653
+ @keys[pi] < key ? pi + 1 : pi
654
+ end
655
+
656
+ def search_node_index(node)
657
+ index = search_key_index(node.keys.first)
658
+ unless @children[index] == node
659
+ raise RuntimeError, "Child at index #{index} is not the requested node"
660
+ end
661
+
662
+ index
663
+ end
664
+
665
+ # This is a generic tree iterator. It yields before it descends into the
666
+ # child node and after (which is identical to before the next child
667
+ # descend). It yields the node, the position and the stack of parent
668
+ # nodes.
669
+ # @yield [node, position, stack]
670
+ def traverse
671
+ # We use a non-recursive implementation to traverse the tree. This stack
672
+ # keeps track of all the known still to be checked nodes.
673
+ stack = [ [ self, 0 ] ]
674
+
675
+ while !stack.empty?
676
+ node, position = stack.pop
677
+
678
+ # Call the payload method. The position marks where we are in the node
679
+ # with respect to the traversal. 0 means we've just entered the node
680
+ # for the first time and are about to descent to the first child.
681
+ # Position 1 is after the 1st child has been processed and before the
682
+ # 2nd child is being processed. If we have N children, the last
683
+ # position is N after we have processed the last child and are about
684
+ # to return to the parent node.
685
+ yield(node, position, stack)
686
+
687
+ if position <= node.keys.size
688
+ # Push the next position for this node onto the stack.
689
+ stack.push([ node, position + 1 ])
690
+
691
+ if !node.is_leaf? && node.children[position]
692
+ # If we have a child node for this position, push the linked node
693
+ # and the starting position onto the stack.
694
+ stack.push([ node.children[position], 0 ])
695
+ end
696
+ end
697
+ end
698
+ end
699
+
700
+ # Gather some statistics about the node and all sub nodes.
701
+ # @param stats [Stats] Data structure that stores the gathered data
702
+ def statistics(stats)
703
+ traverse do |node, position, stack|
704
+ if position == 0
705
+ if node.is_leaf?
706
+ stats.leaf_nodes += 1
707
+ depth = stack.size + 1
708
+ if stats.min_depth.nil? || stats.min_depth < depth
709
+ stats.min_depth = depth
710
+ end
711
+ if stats.max_depth.nil? || stats.max_depth > depth
712
+ stats.max_depth = depth
713
+ end
714
+ else
715
+ stats.branch_nodes += 1
716
+ end
717
+ end
718
+ end
719
+ end
720
+
721
+ # Return the decoration that marks the tree structure of this node for the
722
+ # inspection method.
723
+ def tree_prefix
724
+ node = self
725
+ str = ''
726
+
727
+ while node
728
+ is_last_child = false
729
+ if node.parent
730
+ is_last_child = node.parent.children.last == node
731
+ else
732
+ # Don't add lines for the top-level.
733
+ break
734
+ end
735
+
736
+ str = (is_last_child ? ' ' : ' |') + str
737
+ node = node.parent
738
+ end
739
+
740
+ str
741
+ end
742
+
743
+ # Branch node decoration for the inspection method.
744
+ def tree_branch_mark
745
+ return '' unless @parent
746
+ '-'
747
+ end
748
+
749
+ # Text for the node line for the inspection method.
750
+ def tree_summary
751
+ s = " @#{@_id}"
752
+ if @parent
753
+ begin
754
+ s += " ^#{@parent._id}"
755
+ rescue
756
+ s += ' ^@'
757
+ end
758
+ end
759
+ if @prev_sibling
760
+ begin
761
+ s += " <#{@prev_sibling._id}"
762
+ rescue
763
+ s += ' <@'
764
+ end
765
+ end
766
+ if @next_sibling
767
+ begin
768
+ s += " >#{@next_sibling._id}"
769
+ rescue
770
+ s += ' >@'
771
+ end
772
+ end
773
+
774
+ s
775
+ end
776
+
777
+ # Print and log an error message for the node.
778
+ def error(msg)
779
+ msg = "Error in BigTree node @#{@_id}: #{msg}\n" + @tree.to_s
780
+ $stderr.puts msg
781
+ PEROBS.log.error msg
782
+ end
783
+
784
+ private
785
+
786
+ def min_keys
787
+ @tree.node_size / 2
788
+ end
789
+
790
+ # Try to borrow an element from the preceding sibling.
791
+ # @return [True or False] True if an element was borrowed, false
792
+ # otherwise.
793
+ def borrow_from_previous_sibling(prev_node)
794
+ if prev_node.keys.length - 1 > min_keys
795
+ index = @parent.search_node_index(self) - 1
796
+
797
+ if is_leaf?
798
+ # Move the last key of the previous node to the front of this node
799
+ @keys.unshift(prev_node.keys.pop)
800
+ # Register the new lead key of this node with its parent
801
+ @parent.keys[index] = @keys.first
802
+ # Move the last value of the previous node to the front of this node
803
+ @values.unshift(prev_node.values.pop)
804
+ else
805
+ # For branch nodes the branch key will be the borrowed key.
806
+ @keys.unshift(@parent.keys[index])
807
+ # And the last key of the previous key will become the new branch
808
+ # key for this node.
809
+ @parent.keys[index] = prev_node.keys.pop
810
+ # Move the last child of the previous node to the front of this node
811
+ @children.unshift(node = prev_node.children.pop)
812
+ node.parent = myself
813
+ end
814
+
815
+ return true
816
+ end
817
+
818
+ false
819
+ end
820
+
821
+ # Try to borrow an element from the next sibling.
822
+ # @return [True or False] True if an element was borrowed, false
823
+ # otherwise.
824
+ def borrow_from_next_sibling(next_node)
825
+ if next_node.keys.length - 1 > min_keys
826
+ # The next sibling now has a new lead key that requires the branch key
827
+ # to be updated in the parent node.
828
+ index = next_node.parent.search_node_index(next_node) - 1
829
+
830
+ if is_leaf?
831
+ # Move the first key of the next node to the end of the this node
832
+ self.keys << next_node.keys.shift
833
+ # Register the new lead key of next_node with its parent
834
+ next_node.parent.keys[index] = next_node.keys.first
835
+ # Move the first value of the next node to the end of this node
836
+ self.values << next_node.values.shift
837
+ else
838
+ # For branch nodes we need to get the lead key from the parent of
839
+ # next_node.
840
+ self.keys << next_node.parent.keys[index]
841
+ # The old lead key of next_node becomes the branch key in the parent
842
+ # of next_node. And the keys of next_node are shifted.
843
+ next_node.parent.keys[index] = next_node.keys.shift
844
+ # Move the first child of the next node to the end of this node
845
+ self.children << (node = next_node.children.shift)
846
+ node.parent = myself
847
+ end
848
+
849
+ return true
850
+ end
851
+
852
+ false
853
+ end
854
+
855
+ def update_branch_key(old_key)
856
+ new_key = @keys.first
857
+ return unless (node = @parent)
858
+
859
+ while node
860
+ if (index = node.keys.index(old_key))
861
+ node.keys[index] = new_key
862
+ return
863
+ end
864
+ node = node.parent
865
+ end
866
+
867
+ # The smallest element has no branch key.
868
+ end
869
+
870
+ end
871
+
872
+ end
873
+