perobs 4.0.0 → 4.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +5 -5
  2. data/README.md +27 -16
  3. data/lib/perobs/Array.rb +66 -19
  4. data/lib/perobs/BTree.rb +106 -15
  5. data/lib/perobs/BTreeBlob.rb +4 -3
  6. data/lib/perobs/BTreeDB.rb +5 -4
  7. data/lib/perobs/BTreeNode.rb +482 -156
  8. data/lib/perobs/BTreeNodeLink.rb +10 -0
  9. data/lib/perobs/BigArray.rb +285 -0
  10. data/lib/perobs/BigArrayNode.rb +1002 -0
  11. data/lib/perobs/BigHash.rb +246 -0
  12. data/lib/perobs/BigTree.rb +197 -0
  13. data/lib/perobs/BigTreeNode.rb +873 -0
  14. data/lib/perobs/Cache.rb +48 -10
  15. data/lib/perobs/ConsoleProgressMeter.rb +61 -0
  16. data/lib/perobs/DataBase.rb +4 -3
  17. data/lib/perobs/DynamoDB.rb +57 -15
  18. data/lib/perobs/EquiBlobsFile.rb +155 -50
  19. data/lib/perobs/FNV_Hash_1a_64.rb +54 -0
  20. data/lib/perobs/FlatFile.rb +519 -227
  21. data/lib/perobs/FlatFileBlobHeader.rb +113 -54
  22. data/lib/perobs/FlatFileDB.rb +49 -23
  23. data/lib/perobs/FuzzyStringMatcher.rb +175 -0
  24. data/lib/perobs/Hash.rb +127 -33
  25. data/lib/perobs/IDList.rb +144 -0
  26. data/lib/perobs/IDListPage.rb +107 -0
  27. data/lib/perobs/IDListPageFile.rb +180 -0
  28. data/lib/perobs/IDListPageRecord.rb +142 -0
  29. data/lib/perobs/Object.rb +18 -15
  30. data/lib/perobs/ObjectBase.rb +46 -5
  31. data/lib/perobs/PersistentObjectCache.rb +57 -68
  32. data/lib/perobs/PersistentObjectCacheLine.rb +24 -12
  33. data/lib/perobs/ProgressMeter.rb +97 -0
  34. data/lib/perobs/SpaceManager.rb +273 -0
  35. data/lib/perobs/SpaceTree.rb +21 -12
  36. data/lib/perobs/SpaceTreeNode.rb +53 -61
  37. data/lib/perobs/Store.rb +264 -145
  38. data/lib/perobs/version.rb +1 -1
  39. data/lib/perobs.rb +2 -0
  40. data/perobs.gemspec +4 -4
  41. data/test/Array_spec.rb +15 -6
  42. data/test/BTree_spec.rb +6 -2
  43. data/test/BigArray_spec.rb +261 -0
  44. data/test/BigHash_spec.rb +152 -0
  45. data/test/BigTreeNode_spec.rb +153 -0
  46. data/test/BigTree_spec.rb +259 -0
  47. data/test/EquiBlobsFile_spec.rb +105 -1
  48. data/test/FNV_Hash_1a_64_spec.rb +59 -0
  49. data/test/FlatFileDB_spec.rb +198 -14
  50. data/test/FuzzyStringMatcher_spec.rb +261 -0
  51. data/test/Hash_spec.rb +13 -3
  52. data/test/IDList_spec.rb +77 -0
  53. data/test/LegacyDBs/LegacyDB.rb +155 -0
  54. data/test/LegacyDBs/version_3/class_map.json +1 -0
  55. data/test/LegacyDBs/version_3/config.json +1 -0
  56. data/test/LegacyDBs/version_3/database.blobs +0 -0
  57. data/test/LegacyDBs/version_3/database_spaces.blobs +0 -0
  58. data/test/LegacyDBs/version_3/index.blobs +0 -0
  59. data/test/LegacyDBs/version_3/version +1 -0
  60. data/test/LockFile_spec.rb +9 -6
  61. data/test/SpaceManager_spec.rb +176 -0
  62. data/test/SpaceTree_spec.rb +4 -1
  63. data/test/Store_spec.rb +305 -203
  64. data/test/spec_helper.rb +9 -4
  65. metadata +57 -16
  66. data/lib/perobs/BTreeNodeCache.rb +0 -109
  67. data/lib/perobs/TreeDB.rb +0 -277
@@ -39,7 +39,10 @@ module PEROBS
39
39
  # mark a node as leaf or branch node.
40
40
  class BTreeNode
41
41
 
42
- attr_reader :node_address, :parent, :is_leaf, :keys, :values, :children
42
+ Stats = Struct.new(:branch_depth, :nodes_count, :leave_nodes, :leaves)
43
+
44
+ attr_reader :node_address, :parent, :is_leaf, :next_sibling, :prev_sibling,
45
+ :keys, :values, :children
43
46
 
44
47
  # Create a new BTreeNode object for the given tree with the given parent
45
48
  # or recreate the node with the given node_address from the backing store.
@@ -48,47 +51,43 @@ module PEROBS
48
51
  # restore the node.
49
52
  # @param tree [BTree] The tree this node is part of
50
53
  # @param parent [BTreeNode] reference to parent node
54
+ # @param prev_sibling [BTreeNode] reference to previous sibling node
55
+ # @param next_sibling [BTreeNode] reference to next sibling node
51
56
  # @param node_address [Integer] the address of the node to read from the
52
57
  # backing store
53
58
  # @param is_leaf [Boolean] true if the node should be a leaf node, false
54
59
  # if not
55
60
  def initialize(tree, node_address = nil, parent = nil, is_leaf = true,
56
- keys = [], values = [], children = [])
61
+ prev_sibling = nil, next_sibling = nil,
62
+ keys = nil, values = nil, children = nil)
57
63
  @tree = tree
58
64
  if node_address == 0
59
65
  PEROBS.log.fatal "Node address may not be 0"
60
66
  end
61
67
  @node_address = node_address
62
- @parent = parent ? BTreeNodeLink.new(tree, parent) : nil
63
- @keys = keys
68
+ @parent = link(parent)
69
+ @prev_sibling = link(prev_sibling)
70
+ @next_sibling = link(next_sibling)
71
+ @keys = keys || []
64
72
  if (@is_leaf = is_leaf)
65
- @values = values
66
- @children = []
73
+ @values = values || []
74
+ @children = nil
67
75
  else
68
- @children = children
69
- @values = []
76
+ @children = children || []
77
+ @values = nil
70
78
  end
71
-
72
- ObjectSpace.define_finalizer(
73
- self, BTreeNode._finalize(@tree, @node_address, object_id))
74
- @tree.node_cache.insert(self, false)
75
- end
76
-
77
- # This method generates the destructor for the objects of this class. It
78
- # is done this way to prevent the Proc object hanging on to a reference to
79
- # self which would prevent the object from being collected. This internal
80
- # method is not intended for users to call.
81
- def BTreeNode::_finalize(tree, node_address, ruby_object_id)
82
- proc { tree.node_cache._collect(node_address, ruby_object_id) }
83
79
  end
84
80
 
85
- # Create a new SpaceTreeNode. This method should be used for the creation
81
+ # Create a new BTreeNode. This method should be used for the creation
86
82
  # of new nodes instead of calling the constructor directly.
87
83
  # @param tree [BTree] The tree the new node should belong to
88
84
  # @param parent [BTreeNode] The parent node
89
85
  # @param is_leaf [Boolean] True if the node has no children, false
90
86
  # otherwise
91
- def BTreeNode::create(tree, parent = nil, is_leaf = true)
87
+ # @param prev_sibling [BTreeNode] reference to previous sibling node
88
+ # @param next_sibling [BTreeNode] reference to next sibling node
89
+ def BTreeNode::create(tree, parent = nil, is_leaf = true,
90
+ prev_sibling = nil, next_sibling = nil)
92
91
  unless parent.nil? || parent.is_a?(BTreeNode) ||
93
92
  parent.is_a?(BTreeNodeLink)
94
93
  PEROBS.log.fatal "Parent node must be a BTreeNode but is of class " +
@@ -96,17 +95,26 @@ module PEROBS
96
95
  end
97
96
 
98
97
  address = tree.nodes.free_address
99
- node = BTreeNode.new(tree, address, parent, is_leaf)
98
+ node = BTreeNode.new(tree, address, parent, is_leaf, prev_sibling,
99
+ next_sibling)
100
100
  # This is a new node. Make sure the data is written to the file.
101
101
  tree.node_cache.insert(node)
102
102
 
103
- node
103
+ # Insert the newly created node into the existing node chain.
104
+ if (node.prev_sibling = prev_sibling)
105
+ node.prev_sibling.next_sibling = BTreeNodeLink.new(tree, node)
106
+ end
107
+ if (node.next_sibling = next_sibling)
108
+ node.next_sibling.prev_sibling = BTreeNodeLink.new(tree, node)
109
+ end
110
+
111
+ BTreeNodeLink.new(tree, node)
104
112
  end
105
113
 
106
114
  # Restore a node from the backing store at the given address and tree.
107
115
  # @param tree [BTree] The tree the node belongs to
108
- # @param node_address [Integer] The address in the blob file.
109
- def BTreeNode::load(tree, address)
116
+ # @param address [Integer] The address in the blob file.
117
+ def BTreeNode::load(tree, address, unused = nil)
110
118
  unless address.is_a?(Integer)
111
119
  PEROBS.log.fatal "address is not Integer: #{address.class}"
112
120
  end
@@ -122,7 +130,8 @@ module PEROBS
122
130
  ary = bytes.unpack(BTreeNode::node_bytes_format(tree))
123
131
  # Read is_leaf
124
132
  if ary[0] != 0 && ary[0] != 1
125
- PEROBS.log.fatal "First byte of a BTreeNode entry must be 0 or 1"
133
+ PEROBS.log.fatal "First byte of a BTreeNode entry at address " +
134
+ "#{address} must be 0 or 1 but is #{ary[0]}"
126
135
  end
127
136
  is_leaf = ary[0] == 0 ? false : true
128
137
  # This is the number of keys this node has.
@@ -130,19 +139,21 @@ module PEROBS
130
139
  data_count = ary[2]
131
140
  # Read the parent node address
132
141
  parent = ary[3] == 0 ? nil : BTreeNodeLink.new(tree, ary[3])
142
+ prev_sibling = ary[4] == 0 ? nil : BTreeNodeLink.new(tree, ary[4])
143
+ next_sibling = ary[5] == 0 ? nil : BTreeNodeLink.new(tree, ary[5])
133
144
  # Read the keys
134
- keys = ary[4, key_count]
145
+ keys = ary[6, key_count]
135
146
 
136
147
  children = nil
137
148
  values = nil
138
149
  if is_leaf
139
150
  # Read the values
140
- values = ary[4 + tree.order, data_count]
151
+ values = ary[6 + tree.order, data_count]
141
152
  else
142
153
  # Read the child addresses
143
154
  children = []
144
155
  data_count.times do |i|
145
- child_address = ary[4 + tree.order + i]
156
+ child_address = ary[6 + tree.order + i]
146
157
  unless child_address > 0
147
158
  PEROBS.log.fatal "Child address must be larger than 0"
148
159
  end
@@ -150,17 +161,28 @@ module PEROBS
150
161
  end
151
162
  end
152
163
 
153
- node = BTreeNode.new(tree, address, parent, is_leaf, keys, values,
164
+ node = BTreeNode.new(tree, address, parent, is_leaf,
165
+ prev_sibling, next_sibling, keys, values,
154
166
  children)
155
167
  tree.node_cache.insert(node, false)
156
168
 
157
169
  node
158
170
  end
159
171
 
172
+ # This is a wrapper around BTreeNode::load() that returns a BTreeNodeLink
173
+ # instead of the actual node.
174
+ # @param tree [BTree] The tree the node belongs to
175
+ # @param address [Integer] The address in the blob file.
176
+ # @return [BTreeNodeLink] Link to loaded noded
177
+ def BTreeNode::load_and_link(tree, address)
178
+ BTreeNodeLink.new(tree, BTreeNode::load(tree, address))
179
+ end
180
+
181
+
160
182
  # @return [String] The format used for String.pack.
161
183
  def BTreeNode::node_bytes_format(tree)
162
184
  # This does not include the 4 bytes for the CRC32 checksum
163
- "CSSQQ#{tree.order}Q#{tree.order + 1}"
185
+ "CSSQQQQ#{tree.order}Q#{tree.order + 1}"
164
186
  end
165
187
 
166
188
  # @return [Integer] The number of bytes needed to store a node.
@@ -169,6 +191,8 @@ module PEROBS
169
191
  2 + # actual key count
170
192
  2 + # actual value or children count (aka data count)
171
193
  8 + # parent address
194
+ 8 + # previous sibling address
195
+ 8 + # next sibling address
172
196
  8 * order + # keys
173
197
  8 * (order + 1) + # values or child addresses
174
198
  4 # CRC32 checksum
@@ -200,11 +224,11 @@ module PEROBS
200
224
 
201
225
  # Once we have reached a leaf node we can insert or replace the value.
202
226
  if node.is_leaf
203
- node.insert_element(key, value)
204
- return
227
+ return node.insert_element(key, value)
205
228
  else
206
229
  # Descend into the right child node to add the value to.
207
230
  node = node.children[node.search_key_index(key)]
231
+ node = node.get_node if node
208
232
  end
209
233
  end
210
234
 
@@ -229,6 +253,62 @@ module PEROBS
229
253
 
230
254
  # Descend into the right child node to continue the search.
231
255
  node = node.children[i]
256
+ node = node.get_node if node
257
+ end
258
+
259
+ PEROBS.log.fatal "Could not find proper node to get from while " +
260
+ "looking for key #{key}"
261
+ end
262
+
263
+ # Return the key/value pair that matches the given key or the next larger
264
+ # key/value pair with a key that is at least as large as key +
265
+ # min_miss_increment.
266
+ # @param key [Integer] key to search for
267
+ # @param min_miss_increment [Integer] minimum required key increment in
268
+ # case an exact key match could not be found
269
+ # @return [Integer or nil] value that matches the key
270
+ def get_best_match(key, min_miss_increment)
271
+ node = self
272
+
273
+ while node do
274
+ # Find index of the entry that best fits the key.
275
+ i = node.search_key_index(key)
276
+ if node.is_leaf
277
+ # This is a leaf node. Check if there is an exact match for the
278
+ # given key.
279
+ if node.keys[i] == key
280
+ # Return the corresponding value/value pair.
281
+ return [ key, node.values[i] ]
282
+ else
283
+ # No exact key match. Now search the larger keys for the first
284
+ # that is at least key + min_miss_increment large.
285
+ keys = node.keys
286
+ keys_length = keys.length
287
+ while node
288
+ if i >= keys_length
289
+ # We've reached the end of a node. Continue search in next
290
+ # sibling.
291
+ return nil unless (node = node.next_sibling)
292
+ node = node.get_node
293
+ keys = node.keys
294
+ keys_length = keys.length
295
+ i = -1
296
+ elsif keys[i] >= key + min_miss_increment
297
+ # We've found a key that fits the critera. Return the
298
+ # corresponding key/value pair.
299
+ return [ keys[i], node.values[i] ]
300
+ end
301
+
302
+ i += 1
303
+ end
304
+
305
+ return nil
306
+ end
307
+ end
308
+
309
+ # Descend into the right child node to continue the search.
310
+ node = node.children[i]
311
+ node = node.get_node if node
232
312
  end
233
313
 
234
314
  PEROBS.log.fatal "Could not find proper node to get from while " +
@@ -257,6 +337,7 @@ module PEROBS
257
337
 
258
338
  # Descend into the right child node to continue the search.
259
339
  node = node.children[i]
340
+ node = node.get_node if node
260
341
  end
261
342
 
262
343
  PEROBS.log.fatal 'Could not find proper node to remove from'
@@ -269,18 +350,20 @@ module PEROBS
269
350
  def split_node
270
351
  unless @parent
271
352
  # The node is the root node. We need to create a parent node first.
272
- self.parent = BTreeNode::create(@tree, nil, false)
353
+ self.parent = link(BTreeNode::create(@tree, nil, false))
354
+ @tree.node_cache.insert(self)
273
355
  @parent.set_child(0, self)
274
356
  @tree.set_root(@parent)
275
357
  end
276
358
 
277
359
  # Create the new sibling that will take the 2nd half of the
278
360
  # node content.
279
- sibling = BTreeNode::create(@tree, @parent, @is_leaf)
361
+ sibling = BTreeNode::create(@tree, @parent, @is_leaf, link(self),
362
+ @next_sibling)
280
363
  # Determine the index of the middle element that gets moved to the
281
364
  # parent. The order must be an uneven number, so adding 1 will get us
282
365
  # the middle element.
283
- mid = @tree.order / 2 + 1
366
+ mid = @tree.order / 2
284
367
  # Insert the middle element key into the parent node
285
368
  @parent.insert_element(@keys[mid], sibling)
286
369
  copy_elements(mid + (@is_leaf ? 0 : 1), sibling)
@@ -289,89 +372,149 @@ module PEROBS
289
372
  @parent
290
373
  end
291
374
 
292
- def merge_node(upper_sibling, parent_index)
293
- if upper_sibling == self
294
- PEROBS.log.fatal "Cannot merge node @#{@node_address} with self"
295
- end
296
- unless upper_sibling.is_leaf
297
- insert_element(@parent.keys[parent_index], upper_sibling.children[0])
298
- end
299
- upper_sibling.copy_elements(0, self, @keys.size, upper_sibling.keys.size)
300
- @tree.delete_node(upper_sibling.node_address)
301
-
302
- @parent.remove_element(parent_index)
303
- end
304
-
305
375
  # Insert the given value or child into the current node using the key as
306
376
  # index.
307
377
  # @param key [Integer] key to address the value or child
308
378
  # @param value_or_child [Integer or BTreeNode] value or BTreeNode
309
379
  # reference
380
+ # @return true for insert, false for overwrite
310
381
  def insert_element(key, value_or_child)
311
382
  if @keys.size >= @tree.order
312
383
  PEROBS.log.fatal "Cannot insert into a full BTreeNode"
313
384
  end
314
385
 
315
386
  i = search_key_index(key)
387
+ @tree.node_cache.insert(self)
316
388
  if @keys[i] == key
317
389
  # Overwrite existing entries
318
390
  @keys[i] = key
319
391
  if is_leaf
320
392
  @values[i] = value_or_child
321
393
  else
322
- @children[i + 1] = BTreeNodeLink.new(@tree, value_or_child)
394
+ @children[i + 1] = link(value_or_child)
323
395
  end
396
+
397
+ return false
324
398
  else
325
399
  # Create a new entry
326
400
  @keys.insert(i, key)
327
401
  if is_leaf
328
402
  @values.insert(i, value_or_child)
329
403
  else
330
- @children.insert(i + 1, BTreeNodeLink.new(@tree, value_or_child))
404
+ @children.insert(i + 1, link(value_or_child))
331
405
  end
406
+
407
+ return true
332
408
  end
333
- @tree.node_cache.insert(self)
334
409
  end
335
410
 
336
411
  # Remove the element at the given index.
337
412
  def remove_element(index)
338
- # We need this key to find the link in the parent node.
339
- first_key = @keys[0]
340
- removed_value = nil
341
-
342
413
  # Delete the key at the specified index.
343
- unless @keys.delete_at(index)
344
- PEROBS.log.fatal "Could not remove element #{index} from BTreeNode " +
414
+ unless (key = @keys.delete_at(index))
415
+ PEROBS.log.fatal "Could not remove element #{index} from BigTreeNode " +
345
416
  "@#{@node_address}"
346
417
  end
347
- if @is_leaf
348
- # For leaf nodes, also delete the corresponding value.
349
- removed_value = @values.delete_at(index)
418
+ update_branch_key(key) if index == 0
419
+
420
+ # Delete the corresponding value.
421
+ @tree.node_cache.insert(self)
422
+ removed_value = @values.delete_at(index)
423
+
424
+ if @keys.length < min_keys
425
+ if @prev_sibling && @prev_sibling.parent == @parent
426
+ borrow_from_previous_sibling(@prev_sibling) ||
427
+ @prev_sibling.merge_with_leaf_node(self)
428
+ elsif @next_sibling && @next_sibling.parent == @parent
429
+ borrow_from_next_sibling(@next_sibling) ||
430
+ merge_with_leaf_node(@next_sibling)
431
+ elsif @parent
432
+ PEROBS.log.fatal "Cannot not find adjecent leaf siblings"
433
+ end
434
+ end
435
+
436
+ # The merge has potentially invalidated this node. After this method has
437
+ # been called this copy of the node should no longer be used.
438
+ removed_value
439
+ end
440
+
441
+ def remove_child(node)
442
+ unless (index = search_node_index(node))
443
+ PEROBS.log.fatal "Cannot remove child #{node.node_address} " +
444
+ "from node #{@node_address}"
445
+ end
446
+
447
+ @tree.node_cache.insert(self)
448
+ if index == 0
449
+ # Removing the first child is a bit more complicated as the
450
+ # corresponding branch key is in a parent node.
451
+ key = @keys.shift
452
+ update_branch_key(key)
350
453
  else
351
- # The corresponding child has can be found at 1 index higher.
352
- @children.delete_at(index + 1)
454
+ # For all other children we can just remove the corresponding key.
455
+ @keys.delete_at(index - 1)
456
+ end
457
+
458
+ # Remove the child node link.
459
+ child = @children.delete_at(index)
460
+ # Unlink the neighbouring siblings from the child
461
+ child.prev_sibling.next_sibling = child.next_sibling if child.prev_sibling
462
+ child.next_sibling.prev_sibling = child.prev_sibling if child.next_sibling
463
+
464
+ if @keys.length < min_keys
465
+ # The node has become too small. Try borrowing a node from an adjecent
466
+ # sibling or merge with an adjecent node.
467
+ if @prev_sibling && @prev_sibling.parent == @parent
468
+ borrow_from_previous_sibling(@prev_sibling) ||
469
+ @prev_sibling.merge_with_branch_node(self)
470
+ elsif @next_sibling && @next_sibling.parent == @parent
471
+ borrow_from_next_sibling(@next_sibling) ||
472
+ merge_with_branch_node(@next_sibling)
473
+ end
474
+ end
475
+
476
+ # Delete the node from the cache and backing store.
477
+ @tree.delete_node(node.node_address)
478
+ end
479
+
480
+ def merge_with_leaf_node(node)
481
+ if @keys.length + node.keys.length > @tree.order
482
+ PEROBS.log.fatal "Leaf nodes are too big to merge"
353
483
  end
484
+
354
485
  @tree.node_cache.insert(self)
486
+ @keys += node.keys
487
+ @values += node.values
355
488
 
356
- # Find the lower and upper siblings and the index of the key for this
357
- # node in the parent node.
358
- lower_sibling, upper_sibling, parent_index =
359
- find_closest_siblings(first_key)
489
+ node.parent.remove_child(node)
490
+ end
360
491
 
361
- if lower_sibling &&
362
- lower_sibling.keys.size + @keys.size < @tree.order
363
- lower_sibling.merge_node(self, parent_index - 1)
364
- elsif upper_sibling &&
365
- @keys.size + upper_sibling.keys.size < @tree.order
366
- merge_node(upper_sibling, parent_index)
492
+ def merge_with_branch_node(node)
493
+ if @keys.length + 1 + node.keys.length > @tree.order
494
+ PEROBS.log.fatal "Branch nodes are too big to merge"
367
495
  end
368
496
 
369
- # The merge has potentially invalidated this node. After this method has
370
- # been called this copy of the node should no longer be used.
371
- removed_value
497
+ index = @parent.search_node_index(node) - 1
498
+ @tree.node_cache.insert(self)
499
+ @keys << @parent.keys[index]
500
+ @keys += node.keys
501
+ node.children.each { |c| c.parent = link(self) }
502
+ @children += node.children
503
+
504
+ node.parent.remove_child(node)
505
+ end
506
+
507
+ def search_node_index(node)
508
+ index = search_key_index(node.keys.first)
509
+ unless @children[index] == node
510
+ raise RuntimeError, "Child at index #{index} is not the requested node"
511
+ end
512
+
513
+ index
372
514
  end
373
515
 
374
516
  def copy_elements(src_idx, dest_node, dst_idx = 0, count = nil)
517
+ dest_node = dest_node.get_node
375
518
  unless count
376
519
  count = @tree.order - src_idx
377
520
  end
@@ -383,6 +526,7 @@ module PEROBS
383
526
  "#{dest_node.is_leaf} node must be of same kind"
384
527
  end
385
528
 
529
+ @tree.node_cache.insert(dest_node)
386
530
  dest_node.keys[dst_idx, count] = @keys[src_idx, count]
387
531
  if @is_leaf
388
532
  # For leaves we copy the keys and corresponding values.
@@ -395,32 +539,59 @@ module PEROBS
395
539
  dest_node.set_child(dst_idx + i, @children[src_idx + i])
396
540
  end
397
541
  end
398
- @tree.node_cache.insert(dest_node)
399
542
  end
400
543
 
401
544
  def parent=(p)
402
- @parent = p ? BTreeNodeLink.new(@tree, p) : nil
403
545
  @tree.node_cache.insert(self)
546
+ @parent = p
547
+
548
+ p
549
+ end
550
+
551
+ def prev_sibling=(node)
552
+ @tree.node_cache.insert(self)
553
+ @prev_sibling = node
554
+ if node.nil? && @is_leaf
555
+ # If this node is a leaf node without a previous sibling we need to
556
+ # register it as the first leaf node.
557
+ @tree.set_first_leaf(BTreeNodeLink.new(@tree, self))
558
+ end
559
+
560
+ node
561
+ end
562
+
563
+ def next_sibling=(node)
564
+ @tree.node_cache.insert(self)
565
+ @next_sibling = node
566
+ if node.nil? && @is_leaf
567
+ # If this node is a leaf node without a next sibling we need to
568
+ # register it as the last leaf node.
569
+ @tree.set_last_leaf(BTreeNodeLink.new(@tree, self))
570
+ end
571
+
572
+ node
404
573
  end
405
574
 
406
575
  def set_child(index, child)
576
+ @tree.node_cache.insert(self)
407
577
  if child
408
- @children[index] = BTreeNodeLink.new(@tree, child)
409
- @children[index].parent = self
578
+ @children[index] = link(child)
579
+ @children[index].parent = link(self)
410
580
  else
411
581
  @children[index] = nil
412
582
  end
413
- @tree.node_cache.insert(self)
583
+
584
+ child
414
585
  end
415
586
 
416
587
  def trim(idx)
417
- @keys = @keys[0..idx - 1]
588
+ @tree.node_cache.insert(self)
589
+ @keys.slice!(idx, @keys.length - idx)
418
590
  if @is_leaf
419
- @values = @values[0..idx - 1]
591
+ @values.slice!(idx, @values.length - idx)
420
592
  else
421
- @children = @children[0..idx]
593
+ @children.slice!(idx + 1, @children.length - idx - 1)
422
594
  end
423
- @tree.node_cache.insert(self)
424
595
  end
425
596
 
426
597
  # Search the keys of the node that fits the given key. The result is
@@ -429,36 +600,8 @@ module PEROBS
429
600
  # @param key [Integer] key to search for
430
601
  # @return [Integer] Index of the matching key or the insert position.
431
602
  def search_key_index(key)
432
- # Handle special case for empty keys list.
433
- return 0 if @keys.empty?
434
-
435
- # Keys are unique and always sorted. Use a binary search to find the
436
- # index that fits the given key.
437
- li = pi = 0
438
- ui = @keys.size - 1
439
- while li <= ui
440
- # The pivot element is always in the middle between the lower and upper
441
- # index.
442
- pi = li + (ui - li) / 2
443
-
444
- if key < @keys[pi]
445
- # The pivot element is smaller than the key. Set the upper index to
446
- # the pivot index.
447
- ui = pi - 1
448
- elsif key > @keys[pi]
449
- # The pivot element is larger than the key. Set the lower index to
450
- # the pivot index.
451
- li = pi + 1
452
- else
453
- # We've found an exact match. For leaf nodes return the found index.
454
- # For branch nodes we have to add one to the index since the larger
455
- # child is the right one.
456
- return @is_leaf ? pi : pi + 1
457
- end
458
- end
459
- # No exact match was found. For the insert operaton we need to return
460
- # the index of the first key that is larger than the given key.
461
- @keys[pi] < key ? pi + 1 : pi
603
+ (@is_leaf ? @keys.bsearch_index { |x| x >= key } :
604
+ @keys.bsearch_index { |x| x > key }) || @keys.length
462
605
  end
463
606
 
464
607
  # Iterate over all the key/value pairs in this node and all sub-nodes.
@@ -509,17 +652,33 @@ module PEROBS
509
652
  # Check consistency of the node and all subsequent nodes. In case an error
510
653
  # is found, a message is logged and false is returned.
511
654
  # @yield [key, value]
512
- # @return [Boolean] true if tree has no errors
513
- def check
655
+ # @return [nil or Hash] nil in case of errors or a hash with some
656
+ # statistical information about the tree
657
+ def check(&block)
658
+ stats = Stats.new(nil, 0, 0, 0)
659
+
514
660
  traverse do |node, position, stack|
515
661
  if position == 0
516
- if node.parent && node.keys.size < 1
517
- node.error "BTreeNode must have at least one entry"
518
- return false
662
+ stats.nodes_count += 1
663
+ if node.parent
664
+ unless node.parent.is_a?(BTreeNodeLink)
665
+ node.error "parent is a #{node.parent.class} instead of a " +
666
+ "BTreeNodeLink"
667
+ return nil
668
+ end
669
+ # After a split the nodes will only have half the maximum keys.
670
+ # For branch nodes one of the split nodes will have even 1 key
671
+ # less as this will become the branch key in a parent node.
672
+ if node.keys.size < min_keys - (node.is_leaf ? 0 : 1)
673
+ node.error "BTreeNode #{node.node_address} has too few keys"
674
+ return nil
675
+ end
519
676
  end
677
+
520
678
  if node.keys.size > @tree.order
521
679
  node.error "BTreeNode must not have more then #{@tree.order} " +
522
680
  "keys, but has #{node.keys.size} keys"
681
+ return nil
523
682
  end
524
683
 
525
684
  last_key = nil
@@ -527,45 +686,101 @@ module PEROBS
527
686
  if last_key && key < last_key
528
687
  node.error "Keys are not increasing monotoneously: " +
529
688
  "#{node.keys.inspect}"
530
- return false
689
+ return nil
531
690
  end
691
+ last_key = key
532
692
  end
533
693
 
534
694
  if node.is_leaf
695
+ if stats.branch_depth
696
+ unless stats.branch_depth == node.tree_level
697
+ node.error "All leaf nodes must have same distance from root "
698
+ return nil
699
+ end
700
+ else
701
+ stats.branch_depth = node.tree_level
702
+ end
703
+ if node.prev_sibling && !node.prev_sibling.is_a?(BTreeNodeLink)
704
+ node.error "prev_sibling is a #{node.prev_sibling.class} " +
705
+ "instead of a BTreeNodeLink"
706
+ return nil
707
+ end
708
+ if node.next_sibling && !node.next_sibling.is_a?(BTreeNodeLink)
709
+ node.error "next_sibling is a #{node.next_sibling.class} " +
710
+ "instead of a BTreeNodeLink"
711
+ return nil
712
+ end
713
+ if node.prev_sibling.nil? && @tree.first_leaf != node
714
+ node.error "Leaf node #{node.node_address} has no previous " +
715
+ "sibling but is not the first leaf of the tree"
716
+ return nil
717
+ end
718
+ if node.next_sibling.nil? && @tree.last_leaf != node
719
+ node.error "Leaf node #{node.node_address} has no next sibling " +
720
+ "but is not the last leaf of the tree"
721
+ return nil
722
+ end
535
723
  unless node.keys.size == node.values.size
536
724
  node.error "Key count (#{node.keys.size}) and value " +
537
725
  "count (#{node.values.size}) don't match"
538
- return false
726
+ return nil
539
727
  end
728
+ unless node.children.nil?
729
+ node.error "@children must be nil for a leaf node"
730
+ return nil
731
+ end
732
+
733
+ stats.leave_nodes += 1
734
+ stats.leaves += node.keys.length
540
735
  else
541
- unless node.keys.size == node.children.size - 1
736
+ unless node.values.nil?
737
+ node.error "@values must be nil for a branch node"
738
+ return nil
739
+ end
740
+ unless node.children.size == node.keys.size + 1
542
741
  node.error "Key count (#{node.keys.size}) must be one " +
543
742
  "less than children count (#{node.children.size})"
544
- return false
743
+ return nil
545
744
  end
546
745
  node.children.each_with_index do |child, i|
547
746
  unless child.is_a?(BTreeNodeLink)
548
747
  node.error "Child #{i} is of class #{child.class} " +
549
748
  "instead of BTreeNodeLink"
550
- return false
749
+ return nil
551
750
  end
552
751
  unless child.parent.is_a?(BTreeNodeLink)
553
752
  node.error "Parent reference of child #{i} is of class " +
554
- "#{child.class} instead of BTreeNodeLink"
555
- return false
753
+ "#{child.parent.class} instead of BTreeNodeLink"
754
+ return nil
556
755
  end
557
- if child.node_address == node.node_address
756
+ if child == node
558
757
  node.error "Child #{i} points to self"
559
- return false
758
+ return nil
560
759
  end
561
760
  if stack.include?(child)
562
761
  node.error "Child #{i} points to ancester node"
563
- return false
762
+ return nil
564
763
  end
565
764
  unless child.parent == node
566
765
  node.error "Child #{i} does not have parent pointing " +
567
766
  "to this node"
568
- return false
767
+ return nil
768
+ end
769
+ if i > 0
770
+ unless node.children[i - 1].next_sibling == child
771
+ node.error "next_sibling of node " +
772
+ "#{node.children[i - 1].node_address} " +
773
+ "must point to node #{child.node_address}"
774
+ return nil
775
+ end
776
+ end
777
+ if i < node.children.length - 1
778
+ unless child == node.children[i + 1].prev_sibling
779
+ node.error "prev_sibling of node " +
780
+ "#{node.children[i + 1].node_address} " +
781
+ "must point to node #{child.node_address}"
782
+ return nil
783
+ end
569
784
  end
570
785
  end
571
786
  end
@@ -578,25 +793,26 @@ module PEROBS
578
793
  node.error "Child #{node.children[index].node_address} " +
579
794
  "has too large key #{node.children[index].keys.last}. " +
580
795
  "Must be smaller than #{node.keys[index]}."
581
- return false
796
+ return nil
582
797
  end
583
- unless node.children[position].keys.first >=
584
- node.keys[index]
798
+ unless node.children[position].keys.first >= node.keys[index]
585
799
  node.error "Child #{node.children[position].node_address} " +
586
800
  "has too small key #{node.children[position].keys.first}. " +
587
801
  "Must be larger than or equal to #{node.keys[index]}."
588
- return false
802
+ return nil
589
803
  end
590
804
  else
591
805
  if block_given?
592
806
  # If a block was given, call this block with the key and value.
593
- return false unless yield(node.keys[index], node.values[index])
807
+ unless yield(node.keys[index], node.values[index])
808
+ return nil
809
+ end
594
810
  end
595
811
  end
596
812
  end
597
813
  end
598
814
 
599
- true
815
+ stats
600
816
  end
601
817
 
602
818
  def is_top?
@@ -652,6 +868,7 @@ module PEROBS
652
868
 
653
869
  str = (is_last_child ? ' ' : ' |') + str
654
870
  node = node.parent
871
+ node = node.get_node if node
655
872
  end
656
873
 
657
874
  str
@@ -671,13 +888,37 @@ module PEROBS
671
888
  s += ' ^@'
672
889
  end
673
890
  end
891
+ if @prev_sibling
892
+ begin
893
+ s += " <#{@prev_sibling.node_address}"
894
+ rescue
895
+ s += ' <@'
896
+ end
897
+ end
898
+ if @next_sibling
899
+ begin
900
+ s += " >#{@next_sibling.node_address}"
901
+ rescue
902
+ s += ' >@'
903
+ end
904
+ end
674
905
 
675
906
  s
676
907
  end
677
908
 
909
+ def tree_level
910
+ level = 1
911
+ node = self
912
+ while (node = node.parent)
913
+ level += 1
914
+ end
915
+
916
+ level
917
+ end
918
+
919
+
678
920
  def error(msg)
679
- PEROBS.log.error "Error in BTreeNode @#{@node_address}: #{msg}\n" +
680
- @tree.to_s
921
+ PEROBS.log.error "Error in BTreeNode @#{@node_address}: #{msg}"
681
922
  end
682
923
 
683
924
  def write_node
@@ -685,7 +926,9 @@ module PEROBS
685
926
  @is_leaf ? 1 : 0,
686
927
  @keys.size,
687
928
  @is_leaf ? @values.size : @children.size,
688
- @parent ? @parent.node_address : 0
929
+ @parent ? @parent.node_address : 0,
930
+ @prev_sibling ? @prev_sibling.node_address : 0,
931
+ @next_sibling ? @next_sibling.node_address : 0
689
932
  ] + @keys + ::Array.new(@tree.order - @keys.size, 0)
690
933
 
691
934
  if @is_leaf
@@ -708,24 +951,107 @@ module PEROBS
708
951
 
709
952
  private
710
953
 
711
- def find_closest_siblings(key)
712
- # The root node has no siblings.
713
- return [ nil, nil, nil ] unless @parent
954
+ def min_keys
955
+ @tree.order / 2
956
+ end
957
+
958
+ def link(node)
959
+ return nil if node.nil?
960
+
961
+ if node.is_a?(BTreeNodeLink)
962
+ return node
963
+ elsif node.is_a?(BTreeNode) || node.is_a?(Integer)
964
+ return BTreeNodeLink.new(@tree, node)
965
+ else
966
+ PEROBS.log.fatal "Node link must be a BTreeNode, not a #{node.class}"
967
+ end
968
+ end
969
+
970
+ # Try to borrow an element from the preceding sibling.
971
+ # @return [True or False] True if an element was borrowed, false
972
+ # otherwise.
973
+ def borrow_from_previous_sibling(prev_node)
974
+ if prev_node.keys.length - 1 > min_keys
975
+ index = @parent.search_node_index(self) - 1
976
+
977
+ @tree.node_cache.insert(self)
978
+ @tree.node_cache.insert(prev_node.get_node)
979
+ @tree.node_cache.insert(@parent.get_node)
980
+ if @is_leaf
981
+ # Move the last key of the previous node to the front of this node
982
+ @keys.unshift(prev_node.keys.pop)
983
+ # Register the new lead key of this node with its parent
984
+ @parent.keys[index] = @keys.first
985
+ # Move the last value of the previous node to the front of this node
986
+ @values.unshift(prev_node.values.pop)
987
+ else
988
+ # For branch nodes the branch key will be the borrowed key.
989
+ @keys.unshift(@parent.keys[index])
990
+ # And the last key of the previous key will become the new branch
991
+ # key for this node.
992
+ @parent.keys[index] = prev_node.keys.pop
993
+ # Move the last child of the previous node to the front of this node
994
+ @children.unshift(node = prev_node.children.pop)
995
+ node.parent = link(self)
996
+ end
997
+
998
+ return true
999
+ end
1000
+
1001
+ false
1002
+ end
1003
+
1004
+ # Try to borrow an element from the next sibling.
1005
+ # @return [True or False] True if an element was borrowed, false
1006
+ # otherwise.
1007
+ def borrow_from_next_sibling(next_node)
1008
+ if next_node.keys.length - 1 > min_keys
1009
+ # The next sibling now has a new lead key that requires the branch key
1010
+ # to be updated in the parent node.
1011
+ index = next_node.parent.search_node_index(next_node) - 1
1012
+
1013
+ @tree.node_cache.insert(self)
1014
+ @tree.node_cache.insert(next_node.get_node)
1015
+ @tree.node_cache.insert(next_node.parent.get_node)
1016
+ if @is_leaf
1017
+ # Move the first key of the next node to the end of the this node
1018
+ @keys << next_node.keys.shift
1019
+ # Register the new lead key of next_node with its parent
1020
+ next_node.parent.keys[index] = next_node.keys.first
1021
+ # Move the first value of the next node to the end of this node
1022
+ @values << next_node.values.shift
1023
+ else
1024
+ # For branch nodes we need to get the lead key from the parent of
1025
+ # next_node.
1026
+ @keys << next_node.parent.keys[index]
1027
+ # The old lead key of next_node becomes the branch key in the parent
1028
+ # of next_node. And the keys of next_node are shifted.
1029
+ next_node.parent.keys[index] = next_node.keys.shift
1030
+ # Move the first child of the next node to the end of this node
1031
+ @children << (node = next_node.children.shift)
1032
+ node.parent = link(self)
1033
+ end
1034
+
1035
+ return true
1036
+ end
1037
+
1038
+ false
1039
+ end
1040
+
1041
+ def update_branch_key(old_key)
1042
+ new_key = @keys.first
1043
+ return unless (node = @parent)
714
1044
 
715
- parent_index = @parent.search_key_index(key)
716
- unless @parent.children[parent_index] == self
717
- PEROBS.log.fatal "Failed to find self in parent"
1045
+ while node
1046
+ if (index = node.keys.index(old_key))
1047
+ node.keys[index] = new_key
1048
+ @tree.node_cache.insert(node.get_node)
1049
+ return
1050
+ end
1051
+ node = node.parent
718
1052
  end
719
- # The child that corresponds to the key at parent_index has an index of
720
- # parent_index + 1! The lower_sibling has an child index of
721
- # parent_index and the upper sibling has a child index of parent_index +
722
- # 2.
723
- lower_sibling = parent_index < 1 ?
724
- nil : @parent.children[parent_index - 1]
725
- upper_sibling = parent_index >= (@parent.children.size - 1) ?
726
- nil : @parent.children[parent_index + 1]
727
1053
 
728
- [ lower_sibling, upper_sibling, parent_index ]
1054
+ # The smallest element has no branch key.
729
1055
  end
730
1056
 
731
1057
  end